A subset of the value database in a column or adjacent columns

Question

A subset of the value database in a column or adjacent columns

I would like to find which column contains the largest number 1. The number 1should be displayed only once per line. Once the column with the largest number 1is located, the script should also check the adjacent columns ( +1+/ -1), and if any of them contains a number 1, it should also be selected. All these lines must be stored within the subset function.

Put part of the source data:

structure(list(   `10` = c(0, 0, 0, 0),  `34` = c(0, 0, 0, 0),
                  `59` = c(0, 0, 0, 0),  `84` = c(0, 0, 0, 0),
                 `110` = c(0, 0, 0, 0), `134` = c(0, 0, 0, 0),
                 `165` = c(0, 0, 0, 0), `199` = c(0, 0, 0, 0),
                 `234` = c(0, 0, 0, 0),
                 `257` = c(0.0160178986200301, 0, 0.0409772658686249, 0.0289710439505515),
                 `362` = c(0.0679054515644214, 0.126933274414494, 0.0855598028367368, 0.0596214721268868),
                 `433` = c(0.490914059297718, 0.604765061128296, 0.813348757670254, 1),
                 `506` = c(1, 1, 1, 0.971410482822965),
                 `581` = c(0.198244295668807, 0.234158197083517, 0.269655970224324, 0.195318383259472),
                 `652` = c(0.271177756524115, 0.223018854028576, 0.301352982597324, 0.142584385725234),
                 `733` = c(0.212426561005602, 0.212778023272942, 0.228513228045468, 0),
                 `818` = c(0.213816778248395, 0.168570481661511, 0.264465345538678, 0),
                 `896` = c(0.137102063123377, 0, 0.320234382858867, 0),
                 `972` = c(0.108932231179123, 0, 0.179106729705261, 0),
                `1039` = c(0.101762535865555, 0, 0, 0),
                   EOD = c("Peter", "Peter", "Peter", "Peter"),
               Complex = c(""FT team", "FT team", "FT team", "FT team")),
          .Names = c("10", "34", "59", "84", "110", "134", "165", "199",
                     "234", "257", "362", "433", "506", "581", "652", "733",
                     "818", "896", "972", "1039", "EOD", "Complex"),
          row.names = c("Peter_1_Rep_1_E", "Peter_1_Rep_2_E",
                        "Peter_1_Rep_3_E", "Peter_1_Rep_4_E"),
          class = "data.frame")

, 506 , 1, . , (-1, 433) 1. .

, :

structure(list(    `10` = c(0, 0, 0, 0, 0, 0, 0, 0),
                   `34` = c(0, 0, 0, 0, 0, 0, 0, 0),
                   `59` = c(0, 0, 0, 0, 0, 0, 0, 0),
                   `84` = c(0, 0, 0, 0, 0, 0, 0, 0),
                  `110` = c(0, 0, 0, 0, 0, 0, 0, 0),
                  `134` = c(0.168783347110543, 0, 0.382618775924215, 0, 0.530638724516877, 0, 0.169526042048202, 0),
                  `165` = c(1, 0.36380544964196, 1, 0.13979454361738, 1, 0.239652477288689, 1, 0.240341578327444),
                  `199` = c(0.355158938904336, 1, 0.646724265971128, 1, 0.582637073151552, 1, 0.20319390520841, 1),
                  `234` = c(0.0963628165627114, 0.575436312346942, 0.229853828180188, 0.433555069046817, 0.247567185011894, 0.508529485059242, 0.138356164383562, 0.389880251276011),
                  `257` = c(0, 0.17393595585728, 0, 0.127787133715056, 0, 0.117147323350173, 0, 0),
                  `362` = c(0, 0, 0, 0.0919333108790839, 0, 0, 0, 0),
                  `433` = c(0, 0, 0, 0.0745570899292691, 0, 0, 0, 0),
                  `506` = c(0, 0, 0, 0, 0, 0, 0, 0),
                  `581` = c(0, 0, 0, 0, 0, 0, 0, 0),
                  `652` = c(0, 0, 0, 0, 0, 0, 0, 0),
                  `733` = c(0, 0, 0, 0, 0, 0, 0, 0),
                  `818` = c(0, 0, 0, 0, 0, 0, 0, 0),
                  `896` = c(0, 0, 0, 0, 0, 0, 0, 0),
                  `972` = c(0, 0, 0, 0, 0, 0, 0, 0),
                 `1039` = c(0, 0, 0, 0, 0, 0, 0, 0),
                    EOD = c("Paul", "Paul", "Paul", "Paul", "Paul", "Paul", "Paul", "Paul"),
                Complex = c("GG Team", "GG Team", "GG Team", "GG Team", "GG Team", "GG Team", "GG Team", "GG Team")),
          .Names = c("10", "34", "59", "84", "110", "134", "165", "199", "234", "257", "362", "433", "506", "581", "652", "733", "818", "896", "972", "1039", "EOD", "Complex"),
          row.names = c("PaulG_1_Rep_1_E", "Paul_1_Rep_1_E", "PaulN_1_Rep_2_E", "PaulG_1_Rep_2_E", "Paul_1_Rep_3_E", "PaulC_1_Rep_3_E", "PaulC_1_Rep_4_E", "Paul_1_Rep_4_E"),
          class = "data.frame")

, 1s. colsum.

+4

r

Shaxi Liver 06 . '17 14:11

2

tidyverse, , , , ( ):

library(tidyverse)

# add rownames to the data frame
df2$id  <- rownames(df2)

# make a data frame of each column sum
thecolsums  <- colSums(df2[,map_lgl(df2, is.numeric)]) %>% 
  enframe(name = "colname", value = "colsum")

# change the data frame to long format
dflong  <- df2 %>% 
  mutate(rowid = row_number()) %>% 
  gather(colname, val, -rowid)

# which column has the first 1 value
whichcol  <- dflong %>% 
  group_by(colname) %>% 
  filter(val ==1) %>% 
  summarize(
    firstone = min(rowid, na.rm = T)
  ) %>% 
  left_join(thecolsums, by = 'colname') %>% 
  filter(colsum == max(colsum)) %>% 
  pluck('colname')

# what the numerical index of the column
whichcolindex  <- which(names(df2) == whichcol)

# get previous and next columns if they exist
prevcolindex  <- ifelse(whichcolindex < 1, F, whichcolindex -1)
nextcolindex  <- ifelse(whichcolindex == ncol(df2) , F, whichcolindex +1)

# do the previous and next columns have 1s in them?
prevcolhasone  <- any(df2[,prevcolindex] == 1)
nextcolhasone  <- any(df2[,nextcolindex] == 1)

# create a vector with 1, 2 or 3 column indexes
finalindex  <- c(
    prevcolindex[prevcolhasone]
  , whichcolindex
  , nextcolindex[nextcolhasone]
)

# subset the original data frame, only preserving the columns in question
results  <- df2[, finalindex]

+1

crazybilly 11 . '17 20:44

Moody_Mudskipper · Accepted Answer · 2017-12-11T12:13:33+0000

df1 :

df_num <- df1[,sapply(df1,is.numeric)]            # keep only numeric columns to build filter
n1 <- colSums(df_num == 1)                        # number of 1s per column
i  <- which(n1 == max(n1))                        # index of cols with max 1s
if(length(i) > 1){
  max_cs <- which.max(colSums(df_num[,i]))        # index of col with max colsum among results
  i <- i[max_cs]                                  # our column index
}
filter <- rowSums(df_num[,seq(max(i-1,0),min(i+1,ncol(df_num)))]==1) >0    # filter is true if chosen column is 1 or if any neighbour is 1

df1[filter,] # your result

A subset of the value database in a column or adjacent columns

More articles: