Move data block but must have unique rows

Let's say I have a data beat:

d <- data.frame(time = c(1,3,5,6,11,15,15,18,18,20), side = c("L", "R", "R", "L", "L", "L", "L", "R","R","R"), id = c(1,2,1,2,4,3,4,2,1,1), stringsAsFactors = F)
d

   time side id
1     1    L  1
2     3    R  2
3     5    R  1
4     6    L  2
5    11    L  4
6    15    L  3
7    15    L  4
8    18    R  2
9    18    R  1
10   20    R  1

I want to rearrange the id variable and save the other two constants. However, what is important, in my last permutations I do not want to have the same identifier on the same side at the same time. For example, there are two times / sides where this can happen. In the initial data at time points 15 and 18 there are two unique identifiers on the one hand (left at time 15 and right at time 18). If I rearrange with sample, there is a chance that the same identifier will appear at the same time / side combination.

For instance,

set.seed(11)
data.frame(time=d$time, side=d$side, id=sample(d$id))

   time side id
1     1    L  1
2     3    R  1
3     5    R  4
4     6    L  1
5    11    L  4
6    15    L  2
7    15    L  3
8    18    R  2
9    18    R  2
10   20    R  1

Here id = 2 appears on two lines at time 18 on the "R" side. This is not allowed in the permutation I need.

- , , 100 , 500 , . , sample . , ? , -?

+4
1

:

library(tidyverse)
d <- data.frame(time = c(1,3,5,6,11,15,15,18,18,20), side = c("L", "R", "R", "L", "L", "L", "L", "R","R","R"), id = c(1,2,1,2,4,3,4,2,1,1), stringsAsFactors = F)
d <- rownames_to_column(d)

, .

, (, id) n , , ( , , .. ). "", :

samp_uniq_n <- function(vec, n) {
  x <- vec
  out <- rep(NA, n)
  for(i in 1:n) {
    # Here would be a good place to make sure sampling is even possible.
    probs <- prop.table(table(x))
    out[i] <- sample(unique(x), 1, prob=probs)
    x <- x[x != out[i]]
    vec <- vec[-min(which(vec == out[i]))]
  }
  return(list(out=out, vec=vec))
}

, time side, :

id <- d$id
d_split <- d %>% select(-id) %>% split(., list(d$time, d$side), drop = TRUE)
d_split_desc <- d_split[order(-sapply(d_split, nrow))]

:

for(i in seq_along(d_split_desc)) {
  samp <- samp_uniq_n(id, nrow(d_split_desc[[i]]))
  this_id <- samp$out
  d_split_desc[[i]]$id <- this_id
  id <- samp$vec
}

, :

d_permute <- do.call(rbind, d_split_desc) %>% 
  arrange(as.numeric(rowname)) %>% 
  select(-rowname)

- , , .

+1

Source: https://habr.com/ru/post/1692988/


All Articles