The interval establishes an algebra in R (union, intersection, differences, inclusion, ...)

I am wondering if there is an appropriate structure in R for interval manipulation and comparison.

After some searching, I could only find the following: - the findInterval function in the base package. (but I hardly understand this) - some answers here and there about union and intersection (in particular: http://r.789695.n4.nabble.com/Union-Intersect-two-continuous-sets-td4224545.html )

You know about the initiative to implement a comprehensive set of tools that make it easy to cope with common tasks when processing intervals, for example, include / setdiff / union / intersection / etc. (for example, see the list of functions here)? or do you have any advice in developing this approach?

Below are some drafts on my side for this. this is certainly inconvenient and still has some errors, but it can illustrate what I'm looking for.


preliminary aspects of the accepted options - should deal with intervals or intervals - intervals are represented as 2 columns of data.frames (lower bound, upper bound), on one row - interval sets are presented as 2 columns with several rows - for identification of interval sets you may need third column


COMPOUND

interval_union <- function(df){ # for data frame df <- interval_clean(df) if(is.empty(df)){ return(as.data.frame(NULL)) } else { if(is.POSIXct(df[,1])) { dated <- TRUE df <- colwise(as.numeric)(df) } else { dated <- FALSE } M <- as.matrix(df) o <- order(c(M[, 1], M[, 2])) n <- cumsum( rep(c(1, -1), each=nrow(M))[o]) startPos <- c(TRUE, n[-1]==1 & n[-length(n)]==0) endPos <- c(FALSE, n[-1]==0 & n[-length(n)]==1) M <- M[o] if(dated == TRUE) { df2 <- colwise(mkDateTime)(as.data.frame(cbind(M[startPos], M[endPos])), from.s = TRUE) } else { df2 <- as.data.frame(cbind(M[startPos], M[endPos])) } colnames(df2) <- colnames(df) # print(df2) return(df2) } } union_1_1 <- function(test, ref){ names(ref) <- names(test) tmp <- interval_union(as.data.frame(rbind(test, ref))) return(tmp) } union_1_n <- function(test, ref){ return(union_1_1(test, ref)) } union_n_n <- function(test, ref){ testnn <- adply(.data = test, 1, union_1_n, ref, .expand = FALSE) return(testnn) } ref_interval_union <- function(df, ref){ tmp0 <- adply(df, 1, union_1_1, ref, .expand = FALSE) # set to FALSE to keep ID return(tmp0) } 

Intersection

 interval_intersect <- function(df){ # adapted from : http://r.789695.n4.nabble.com/Union-Intersect-two-continuous-sets-td4224545.html M <- as.matrix(df) L <- max(M[, 1]) R <- min(M[, 2]) Inew <- if (L <= R) c(L, R) else c() if (!is.empty(Inew)){ df2 <- t(as.data.frame(Inew)) colnames(df2) <- colnames(df) rownames(df2) <- NULL } else { df2 <- NULL } return(as.data.frame(df2)) } ref_interval_intersect <- function(df, ref){ tmpfun <- function(a, b){ names(b) <- names(a) tmp <- interval_intersect(as.data.frame(rbind(a, b))) return(tmp) } tmp0 <- adply(df, 1, tmpfun, ref, .expand = FALSE) # [,3:4] #if(!is.empty(tmp0)) colnames(tmp0) <- colnames(df) return(tmp0) } int_1_1 <- function(test, ref){ te <- as.vector(test) re <- as.vector(ref) names(re) <- names(te) tmp0 <- c(max(te[1, 1], re[1, 1]), min(te[1, 2], re[1, 2])) if(tmp0[1]>tmp0[2]) tmp0 <- NULL # inverse of a correct interval --> VOID if(!is.empty(tmp0)){ tmp1 <- colwise(mkDateTime)(as.data.frame(t(as.data.frame(tmp0)))) colnames(tmp1) <- colnames(test) } else { tmp1 <- data.frame(NULL) } return(tmp1) } int_1_n <- function(test, ref){ test1 <- adply(.data = ref, 1, int_1_1, test = test, .expand = FALSE) if(is.empty(test1)){ return(data.frame(NULL)) } else { testn <- interval_union(test1[,2:3]) return(testn) } } int_n_n <- function(test, ref){ testnn <- adply(.data = test, 1, int_1_n, ref, .expand = FALSE) # return(testnn[,2:3]) # return interval set without index (1st column) return(testnn) # return interval set with index (1st column) --> usefull to go with merge to keep metadata going alon g with interval description } int_intersect <- function(df, ref){ mycols <- colnames(df) df$X1 <- 1:nrow(df) test <- df[, 1:2] tmp <- int_n_n(test, ref) intersection <- merge(tmp, df, by = "X1", suffixes = c("", "init")) return(intersection[,mycols]) } 

AN EXCEPTION

 excl_1_1 <- function(test, ref){ te <- as.vector(test) re <- as.vector(ref) names(re) <- names(te) if(te[1] < re[1]){ # Lower Bound if(te[2] > re[1]){ # overlap x <- unlist(c(te[1], re[1])) } else { # no overlap x <- unlist(c(te[1], te[2])) } } else { # test > ref on lower bound side x <- NULL } if(te[2] > re[2]){ # Upper Bound if(te[1] < re[2]){ # overlap y <- unlist(c(re[2], te[2])) } else { # no overlap y <- unlist(c(te[1], te[2])) } } else { # test < ref on upper bound side y <- NULL } if(is.empty(x) & is.empty(y)){ tmp0 <- NULL tmp1 <- tmp0 } else { tmp0 <- as.data.frame(rbind(x, y)) colnames(tmp0) <- colnames(test) tmp1 <- interval_union(tmp0) } return(tmp1) } excl_1_n <- function(test, ref){ testn0 <- adply(.data = ref, 1, excl_1_1, test = test, .expand=FALSE) # boucle pour intersecter successivement les intervalles sets, pour gérer les intervalles disjoints (identifiés par X1, col1) tmp <- range(testn0) names(tmp) <- colnames(testn0)[2:3] tmp <- as.data.frame(t(tmp)) for(i in unique(testn0[,1])){ tmp <- int_n_n(tmp, testn0[testn0[,1]==i, 2:3]) } return(tmp) } 

TURNING ON

 incl_1_1 <- function(test, ref){ te <- as.vector(test) re <- as.vector(ref) if(te[1] >= re[1] & te[2] <= re[2]){ return(TRUE) } else { return(FALSE) } } incl_1_n <- function(test, ref){ testn <- adply(.data = ref, 1, incl_1_1, test = test) return(any(testn[,ncol(testn)])) } incl_n_n <- function(test, ref){ testnn <- aaply(.data = test, 1, incl_1_n, ref, .expand = FALSE) names(testnn) <- NULL return(testnn) } flat_incl_n_n <- function(test, ref){ ref <- interval_union(ref) return(incl_n_n(test, ref)) } # testing for a vector, instead of an interval set incl_x_1 <- function(x, ref){ test <- (x>=ref[1,1] & x<ref[1,2]) return(test) } incl_x_n <- function(x, ref){ test <- any(x>=ref[,1] & x<ref[,2]) return(test) } 
+6
source share
1 answer

I think that you could effectively use many interval related functions in the sets package.

Here is a small example illustrating the support of a package for constructing an interval, intersection, difference, union and complement, as well as checking for inclusion in the interval. These and many other related functions are described on the help page for ?interval .

 library(sets) i1 <- interval(1,6) i2 <- interval(5,10) i3 <- interval(200,400) i4 <- interval(202,402) i5 <- interval_union(interval_intersection(i1,i2), interval_symdiff(i3,i4)) i5 # [5, 6] U [200, 202) U (400, 402] interval_complement(i5) # [-Inf, 5) U (6, 200) U [202, 400] U (402, Inf] interval_contains_element(i5, 5.5) # [1] TRUE interval_contains_element(i5, 201) # [1] TRUE 

If your intervals are currently encoded in a two-column data.frame, you can use something like mapply() to convert them to intervals of the type used by the sets package:

 df <- data.frame(lBound = c(1,5,100), uBound = c(10, 6, 200)) Ints <- with(df, mapply("interval", l=lBound, r=uBound, SIMPLIFY=FALSE)) Ints # [[1]] # [1, 10] # [[2]] # [5, 6] # [[3]] # [100, 200] 
+7
source

Source: https://habr.com/ru/post/908995/


All Articles