In R, split the data frame so that the dataframes subset contains the last row of the previous data frame and the first row of the next data frame

Question

In R, split the data frame so that the dataframes subset contains the last row of the previous data frame and the first row of the next data frame

There are many answers to the question of how to split a data frame, for example, How to split a data frame?

However, I would like to split the data frame so that the smaller data frames contain the last row of the previous data frame and the first row of the next frame.

Here is an example

n <- 1:9
group <- rep(c("a","b","c"), each = 3)
data.frame(n = n, group)

  n  group
1 1     a
2 2     a
3 3     a
4 4     b
5 5     b
6 6     b
7 7     c
8 8     c
9 9     c

I would like the result to look like this:

 d1 <- data.frame(n = 1:4, group = c(rep("a",3),"b"))
 d2 <- data.frame(n = 3:7, group = c("a",rep("b",3),"c"))
 d3 <- data.frame(n = 6:9, group = c("b",rep("c",3)))
 d <- list(d1, d2, d3)
 d

[[1]]
  n group
1 1     a
2 2     a
3 3     a
4 4     b

[[2]]
  n group
1 3     a
2 4     b
3 5     b
4 6     b
5 7     c

[[3]]
  n group
1 6     b
2 7     c
3 8     c
4 9     c

What is an effective way to accomplish this task?

+4

r dataframe

Tedward Nov 18 '15 at 20:21

source share

5 answers

good'ol by, "[a] ppl [ies] [ INDICES]".

by(data = df, INDICES = df$group, function(x){
   id <- c(min(x$n) - 1, x$n, max(x$n) + 1)
   na.omit(df[id, ])
   })


# df$group: a
#   n group
# 1 1     a
# 2 2     a
# 3 3     a
# 4 4     b
# -------------------------------------------------------------------------------- 
#   df$group: b
# n group
# 3 3     a
# 4 4     b
# 5 5     b
# 6 6     b
# 7 7     c
# -------------------------------------------------------------------------------- 
#   df$group: c
#   n group
# 6 6     b
# 7 7     c
# 8 8     c
# 9 9     c

print by "" , ( ) list, , ( str names ).

+4

Henrik 18 . '15 20:58

@cdetermans, . , data.table::shift ( dyplr::lag), , lapply , -

library(data.table) # v1.9.6+ 
indx <- setDT(df)[, which(group != shift(group, fill = TRUE))]
lapply(Map(`:`, c(1L, indx - 1L), c(indx, nrow(df))), function(x) df[x,])
# [[1]]
#    n group
# 1: 1     a
# 2: 2     a
# 3: 3     a
# 4: 4     b
# 
# [[2]]
#    n group
# 1: 3     a
# 2: 4     b
# 3: 5     b
# 4: 6     b
# 5: 7     c
# 
# [[3]]
#    n group
# 1: 6     b
# 2: 7     c
# 3: 8     c
# 4: 9     c

+3

David Arenburg 18 . '15 20:52

data.frame, - data.table? parallelism.

library(data.table)
n <- 1:9
group <- rep(c("a","b","c"), each = 3)
df <- data.table(n = n, group)
df[, `:=` (group = factor(df$group))]
df[, `:=` (group_i = seq_len(.N), group_N = .N), by = "group"]

library(doParallel)
groups <- unique(df$group)
foreach(i = seq(groups)) %do% {
  df[group == groups[i] | (as.integer(group) == i + 1 & group_i == 1) | (as.integer(group) == i - 1 & group_i == group_N), c("n", "group"), with = FALSE]  
}
[[1]]
   n group
1: 1     a
2: 2     a
3: 3     a
4: 4     b
[[2]]
   n group
1: 3     a
2: 4     b
3: 5     b
4: 6     b
5: 7     c
[[3]]
   n group
1: 6     b
2: 7     c
3: 8     c
4: 9     c

+1

mlegge 18 . '15 20:47

dplyr:

library(dplyr)

data = 
  data_frame(n = n, group) %>%
  group_by(group)

firsts = 
  data %>%
  slice(1) %>%
  ungroup %>%
  mutate(new_group = lag(group)) %>%
  slice(-1)

lasts = 
  data %>%
  slice(n()) %>%
  ungroup %>%
  mutate(new_group = lead(group)) %>%
  slice(-n())

bind_rows(firsts, data, lasts) %>%
  mutate(final_group = 
           ifelse(is.na(new_group),
                  group,
                  new_group) ) %>%
  arrange(final_group, n) %>%
  group_by(final_group)

0

bramtayl Nov 18 '15 at 10:27

source share

G. Grothendieck · Accepted Answer · 2015-11-18T20:54:50+0000

, DF - data.frame, n group. n - DF. extract, ix, , DF. , extract, 1,..., n extract .

n <- nrow(DF)
extract <- function(ix) DF[seq(max(1, min(ix) - 1), min(n, max(ix) + 1)), ]
lapply(split(seq_len(n), DF$group), extract)

$a
  n group
1 1     a
2 2     a
3 3     a
4 4     b

$b
  n group
3 3     a
4 4     b
5 5     b
6 6     b
7 7     c

$c
  n group
6 6     b
7 7     c
8 8     c
9 9     c

In R, split the data frame so that the dataframes subset contains the last row of the previous data frame and the first row of the next data frame

More articles: