tx0 <- read.table(textConnection("chr start end
NONHSAT000001 chr1 11868 14409
NONHSAT000002 chr1 11871 14412
NONHSAT000003 chr1 11873 14409
NONHSAT000004 chr1 12009 13670
NONHSAT000005 chr1 14777 16668
NONHSAT000006 chr1 15602 29370"))
gene0 <- read.table(textConnection("chr start end
NONHSAG000001 chr1 11869 14412
NONHSAG000002 chr1 14778 29370
NONHSAG000003 chr1 29554 31109
NONHSAG000004 chr1 34554 36081
NONHSAG000005 chr1 36273 50281
NONHSAG000006 chr1 62948 63887"))
GenomicRanges Bioconductor ( ).
library(GenomicRanges)
tx <- with(tx0, GRanges(chr, IRanges(start, end)))
gene <- with(gene0, GRanges(chr, IRanges(start, end)))
gene <- resize(gene, width=width(gene) + 20, fix="center")
olaps <- findOverlaps(tx, gene, type="within")
, , "" (tx) 1, 2, 3 4 "" () 1.
> findOverlaps(tx, gene, type="within")
Hits of length 6
queryLength: 6
subjectLength: 6
queryHits subjectHits
<integer> <integer>
1 1 1
2 2 1
3 3 1
4 4 1
5 5 2
6 6 2
1 4 , 2 2.
> table(subjectHits(olaps))
1 2
4 2
. . :
tx <- with(transcriptcoords, GRanges(V1, IRanges(V2, V3, names=rownames(tx0))))
gene <- with(genecoords, GRanges(V1, IRanges(V2, V3, names=rownames(gene0))))
:
system.time(gene <- resize(gene, width=width(gene) + 20, fix="center"))
#
#
system.time(findOverlaps(tx, gene, type="within"))
#
#
, data.table @danas.zuokos 1000
system.time({
dt <- genecoords[transcriptcoords, allow.cartesian = TRUE];
res <- dt[start <= start.1 + tol & end >= end.1 - tol,
list(gene = gene[which.min(size)]), by = transcript]
})