Version of the function with indexes.
fun1 <- function(){ idx <- c(0,which(diff(f$group)!=0))+1 idx2 <- unlist(lapply(1:length(nf),function(x) seq.int(from=idx[x],length.out=nf[x])),use.names=F) f1 <- f[idx2,] return(f1) } fun2 <- function(){ ddply(f,.(group),function(x) head(x,nf[x[1,1]])) }
Test data (size suggested by the question author)
f<-data.frame(group=sample(1:1000,50000,T),y=c(1:50000)) f <- f[order(f$group),] nf <- rpois(length(unique(f$group)),3)
system.time (fun1 ()) system.time (fun2 ())
On my system ~ 60 times faster fun1.
source share