I am trying to use the derivedFactor from the mosaic package in R to create a factor variable, but it is surprisingly slow. When I encoded the same function using a series of if and ran it, it works almost twice as fast.
Here's a reproducible example (sorry for the length):
library(microbenchmark) library(mosaic) library(lubridate) library(data.table) library(dplyr) df <- structure( list( study.week = structure( c( 1299369600, 1299974400, 1300579200, 1301184000, 1301788800, 1302393600, 1302998400, 1303603200, 1304208000, 1304812800, 1305417600, 1306022400, 1306627200, 1307232000, 1307836800, 1308441600, 1309046400, 1309651200, 1310256000, 1310860800, 1311465600, 1312070400, 1312675200, 1313280000, 1313884800, 1314489600, 1315094400, 1315699200, 1316304000, 1316908800, 1317513600, 1318118400, 1318723200, 1319328000, 1319932800, 1320537600, 1321142400, 1321747200, 1322352000, 1322956800, 1323561600, 1324166400, 1324771200, 1325376000, 1325980800, 1326585600, 1327190400, 1327795200, 1328400000, 1329004800, 1329609600, 1330214400, 1330819200, 1331424000, 1332028800, 1332633600, 1333238400, 1333843200, 1334448000, 1335052800, 1335657600, 1336262400, 1336867200, 1337472000, 1338076800, 1338681600, 1339286400, 1339891200, 1340496000, 1341100800, 1341705600, 1342310400, 1342915200, 1343520000, 1344124800, 1344729600, 1345334400, 1345939200, 1346544000, 1347148800, 1347753600, 1348358400, 1348963200, 1349568000, 1350172800, 1350777600, 1351382400, 1351987200, 1352592000, 1353196800, 1353801600, 1354406400, 1355011200, 1355616000, 1356220800, 1356825600, 1357430400, 1358035200, 1358640000, 1359244800, 1359849600, 1360454400, 1361059200, 1361664000, 1362268800, 1362873600, 1363478400, 1364083200, 1364688000, 1365292800, 1365897600, 1366502400, 1367107200, 1367712000, 1368316800, 1368921600, 1369526400, 1370131200, 1370736000, 1371340800, 1371945600, 1372550400, 1373155200, 1373760000, 1374364800, 1374969600, 1375574400, 1376179200, 1376784000, 1377388800, 1377993600, 1378598400, 1379203200, 1379808000, 1380412800, 1381017600, 1381622400, 1382227200, 1382832000, 1383436800, 1384041600, 1384646400, 1385251200, 1385856000, 1386460800, 1387065600, 1387670400, 1388275200, 1388880000, 1389484800, 1390089600, 1390694400, 1391299200, 1391904000, 1392508800, 1393113600, 1393718400, 1394323200, 1394928000, 1395532800, 1396137600, 1396742400, 1397347200 ), class = c("POSIXct", "POSIXt"), tzone = "UTC" ), time.min = structure( cclass = c("POSIXct", "POSIXt"), tzone = "UTC" ), time.max = structure( c( 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694 ), class = c("POSIXct", "POSIXt"), tzone = "UTC" ), adopt = structure( c( 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L ), .Label = c("experiment", "abandon", "adopt"), class = "factor" ), floor.min = structure( c( 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000, 1388880000 ), tzone = "UTC", class = c("POSIXct", "POSIXt") ), sup.using = csup.use = structure( c( 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L ), .Label = c("never used", "experimented", "abandoned"), class = "factor" ) ), .Names = c( "SupSID", "sid", "study.week", "event", "n.posts", "cum.posts", "time.min", "time.max", "adopt", "floor.min", "sup.using", "sup.use" ), sorted = "SupSID", class = c("tbl_dt", "tbl", "data.table", "data.frame"), row.names = c(NA,-163L) ) # base R function: recodeTimes <- Vectorize(function(floor.min, study.week, time.max, adopt) { if (is.na(floor.min) | study.week < floor.min) { out <- "never used" } else if (study.week > time.max) { if (adopt == "experiment") { out <- "experimented" } else if (adopt == "abandon") { out <- "abandoned" } else { out <- "currently using" } } else { out <- "currently using" } return(out) }) microbenchmark( { df1 <- df %>% mutate( floor.min = floor_date(time.min, "week"), sup.using = study.week %within% interval(floor.min, time.max), sup.using = ifelse(is.na(sup.using), FALSE, sup.using), sup.use = derivedFactor( "never used" = (is.na(floor.min) | study.week < floor.min), "experimented" = (study.week > time.max & adopt == "experiment"), "abandoned" = (study.week > time.max & adopt == "abandon"), .method = "first", .default = "currently using" ) ) }, { df2 <- df %>% mutate( floor.min = floor_date(time.min, "week"), sup.using = study.week %within% interval(floor.min, time.max), sup.using = ifelse(is.na(sup.using), FALSE, sup.using), sup.use = recodeTimes(floor.min, study.week, time.max, adopt) ) } ) # results: # min lq mean median uq max neval # 57.41792 62.77737 87.01017 72.6734 104.12907 242.4751 100 # 32.77108 34.84122 50.51734 43.2975 60.34229 122.6671 100
Any guesses on what causes the big time difference?