, data.table .
start_gid end_gid:
pairs <- dt[, .N, by = .(start_gid, end_gid)]
pairs
, 3 (100, 82) (82, 100) , 4- (82, 100) . , (100, 32), (34, 100) (31, 100) .
nmatch . (100, 82) (82, 100) 82_100. , , nmatch 0.
pairs <- pairs[, .(start_gid, end_gid, nmatch = if (.N <= 1L) 0L else min(N)),
by = .(grp = paste(pmin(start_gid, end_gid), pmax(start_gid, end_gid), sep = "_"))]
pairs
. , dt :
out <- pairs[dt, on = .(start_gid, end_gid)]
out
# grp start_gid end_gid nmatch track_fid
# 1: 82_100 100 82 3 1
# 2: 82_100 82 100 3 2
# 3: 82_100 100 82 3 3
# 4: 32_100 100 32 0 4
# 5: 82_100 82 100 3 5
# 6: 82_100 82 100 3 6
# 7: 82_100 82 100 3 7
# 8: 82_100 100 82 3 8
# 9: 34_100 34 100 0 9
#10: 31_100 31 100 0 10
nmatch , track_fid
out <- out[, .(track_fid, to_from = seq_len(.N) <= nmatch), by = .(start_gid, end_gid)]
out[order(track_fid)]
start_gid end_gid track_fid to_from
# 1: 100 82 1 TRUE
# 2: 82 100 2 TRUE
# 3: 100 82 3 TRUE
# 4: 100 32 4 FALSE
# 5: 82 100 5 TRUE
# 6: 82 100 6 TRUE
# 7: 82 100 7 FALSE
# 8: 100 82 8 TRUE
# 9: 34 100 9 FALSE
#10: 31 100 10 FALSE
1: (10 )
data.table Rolands Rcpp 10 :
library(microbenchmark)
microbenchmark(
dt = {
dt[, .N, by = .(start_gid, end_gid)][
, .(start_gid, end_gid, nmatch = if (.N <= 1L) 0L else min(N)),
by = .(grp = paste(pmin(start_gid, end_gid), pmax(start_gid, end_gid), sep = "_"))][
dt, on = .(start_gid, end_gid)][
, .(track_fid, to_from = seq_len(.N) <= nmatch),
by = .(start_gid, end_gid)][
order(track_fid)]
},
rcpp_source = {
sourceCpp(code = "
// [[Rcpp::export]]
Rcpp::LogicalVector myfun(const Rcpp::IntegerVector x, const Rcpp::IntegerVector y) {
Rcpp::LogicalVector res(x.length());
for (int i=0; i<(x.length()-1); i++) {
if(res(i)) continue;
for (int j=i+1; j<x.length(); j++) {
if (res(j)) continue;
if (x(i) == y(j) && x(j) == y(i)) {
res(i) = true;
res(j) = true;
break;
}
}
}
return res;
}
")
dt$from_to <- myfun(dt$start_gid, dt$end_gid)
dt
},
rcpp_func = {
dt$from_to <- myfun(dt$start_gid, dt$end_gid)
dt
}
)
Unit: microseconds
expr min lq mean median uq max neval
dt 2873.017 3233.418 3466.5484 3408.0495 3558.705 6345.633 100
rcpp_source 8112.335 8537.114 8932.8953 8811.2385 9173.150 12093.931 100
rcpp_func 101.192 121.582 142.0769 137.4405 154.620 255.246 100
, Rcpp 20 , data.table ( ). , sourceCPP, data.table.
, data.table data.table .
2:
@Roland, data.table Rcpp :

1000 Rcpp , data.table. data.table , Rcpp. , Rcpp .