: 1) nrow(myData) * nrow(refData) 2) refData$id.
, ( myData$pos /) , findInterval , refData$pos myData$pos +/- ( 2). , nrow(refData) * log(nrow(myData)) , , .
:
a = myData$pos
b = refData$pos
a + 2, b :
i = findInterval(b, a + 2L, all.inside = TRUE, left.open = TRUE)
(lower, upper] 1:(length(a) - 1), , b 2 a:
i1 = ifelse(abs(b - a[i + 1L]) <= 2, i + 1L, NA)
i2 = ifelse(abs(b - a[i]) <= 2, i, NA)
ii = pmin(i1, i2, na.rm = TRUE)
([lower, upper)) a - 2, b , a, b - 2 :
j = findInterval(b, a - 2L, all.inside = TRUE, left.open = FALSE)
j1 = ifelse(abs(b - a[j + 1L]) <= 2, j + 1L, NA)
j2 = ifelse(abs(b - a[j]) <= 2, j, NA)
jj = pmax(j1, j2, na.rm = TRUE)
(ii) (jj) myData$pos (a), refData$pos (b) +/- 2 ( ).
, , .
However, to continue representing concatenated refData$ids as matches , we could probably use the package IRangesfrom here to hope for something effective:
library(IRanges)
nr = 1:nrow(myData)
myrng = IRanges(nr, nr)
refrng = IRanges(ifelse(is.na(ii), 0L, ii), ifelse(is.na(jj), 0L, jj)) ## replace NA with 0
ovrs = findOverlaps(myrng, refrng)
tapply(refData$id[subjectHits(ovrs)], factor(queryHits(ovrs), nr), toString)
# 1 2 3 4 5
#"c, g, i, j, l" "c, g, i, j" "c, g, i" "g, i" "g"
# 6 7 8 9 10
# NA NA "k" "h, k" "h, k, m"