Find a unique set of lines in a vector where vector elements can be multiple lines

Question

Find a unique set of lines in a vector where vector elements can be multiple lines

I have a series of batch records that are labeled sequentially. Sometimes packets overlap.

x <- c("1","1","1/2","2","3","4","5/4","5")
> data.frame(x)
    x
1   1
2   1
3 1/2
4   2
5   3
6   4
7 5/4
8   5

I want to find a set of packages that do not overlap, and stick these periods. Package "1/2" includes both "1" and "2", so it is not unique. When a package = "3", which is not contained in any previous batches, it starts a new period. I have difficulties with combined games, otherwise it would be easy. The result of this will be:

    x period
1   1      1
2   1      1
3 1/2      1
4   2      1
5   3      2
6   4      3
7 5/4      3
8   5      3

My experience is with more functional programming paradigms, so I know how I did it, it is very non-R. I am looking for a way to do this in R, which is clean and simple. Any help is appreciated.

un-R, , .

x <- c("1","1","1/2","2","3","4","5/4","5")

p <- 1 #period number
temp <- NULL #temp variable for storing cases of x (batches)
temp[1] <- x[1]
period <- NULL
rl <- 0 #length to repeat period

for (i in 1:length(x)){

    #check for "/", split and add to temp
    if (grepl("/", x[i])){
        z <- strsplit(x[i], "/") #split character
        z <- unlist(z) #convert to vector
        temp <- c(temp, z, x[i]) #add to temp vector for comparison
    }

    #check if x in temp
    if(x[i] %in% temp){
        temp <- append(temp, x[i]) #add to search vector
        rl <- rl + 1 #increase length
    } else {
        period <- append(period, rep(p, rl)) #add to period vector
        p <- p + 1 #increase period count
        temp <- NULL #reset
        rl <- 1 #reset
    }
}

#add last batch

rl <- length(x) - length(period)
period <- append(period, rep(p,rl))

df <- data.frame(x,period)

> df
    x period
1   1      1
2   1      1
3 1/2      1
4   2      1
5   3      2
6   4      3
7 5/4      3
8   5      3

+4

r

Lloyd Christmas 05 . '16 18:52

3

R , Map Reduce. , . , , , .

x <- c("1","1","1/2","2","3","4","5/4","5")
s<-strsplit(x,"/")
r<-Reduce(union,s,init=list(),acc=TRUE)
p<-cumsum(Map(function(x,y) length(intersect(x,y))==0,s,r[-length(r)]))

data.frame(x,period=p)

    x period
1   1      1
2   1      1
3 1/2      1
4   2      1
5   3      2
6   4      3
7 5/4      3
8   5      3

, , . , , . ( , , , .) "", , .

, ,

x <- c("1","1","1/2","2","3","4","5/4","5")
s<-strsplit(x,"/")
n<-mapply(function(x) range(as.numeric(x)),s)
p<-cumsum(c(1,n[1,-1]>n[2,-ncol(n)]))

data.frame(x,period=p)

( ).

+2

A. Webb 05 . '16 20:26

, tidyr , :

# sample data
x <- c("1","1","1/2","2","3","4","5/4","5")
df <- data.frame(x)

library(tidyr)
# separate x into two columns, with second NA if only one number
df <- separate(df, x, c('x1', 'x2'), sep = '/', remove = FALSE, convert = TRUE)

df :

> df
    x x1 x2
1   1  1 NA
2   1  1 NA
3 1/2  1  2
4   2  2 NA
5   3  3 NA
6   4  4 NA
7 5/4  5  4
8   5  5 NA

:

period <- 1
for(i in 1:nrow(df)){
  period <- c(period,
              # test if either x1 or x2 of row i are in any x1 or x2 above it
              ifelse(any(df[i, 2:3] %in% unlist(df[1:(i-1),2:3])), 
                     period[i],        # if so, repeat the terminal value
                     period[i] + 1))   # else append the terminal value + 1
}

# rebuild df with x and period, which loses its extra initializing value here 
df <- data.frame(x = df$x, period = period[2:length(period)])

df:

> df
    x period
1   1      1
2   1      1
3 1/2      1
4   2      1
5   3      2
6   4      3
7 5/4      3
8   5      3

0

alistaire 05 . '16 20:51

arturro · Accepted Answer · 2016-02-05T19:52:33+0000

:

x <- c("1","1","1/2","2","3","4","5/4","5")
x<-data.frame(x=x, period=-1, stringsAsFactors = F)
period=0
prevBatch=-1
for (i in 1:nrow(x))
{
   spl=unlist(strsplit(x$x[i], "/"))
   currentBatch=min(spl)
   if (currentBatch<prevBatch) { stop("Error in sequence") }
   if (currentBatch>prevBatch)
      period=period+1;

   x$period[i]=period;

   prevBatch=max(spl)
}
x

Find a unique set of lines in a vector where vector elements can be multiple lines

More articles: