dat1 <- aggregate(. ~PurchaseID+UserID, data=df[,1:3], function(V)max(V)-min(V))
dat2 <- aggregate(. ~PurchaseID+UserID, data=df[,c(1:2, 4)], sum)
dat3 <- aggregate(. ~PurchaseID+UserID, data=df[,c(1:2, 5)], mean)
dat <- merge(merge(dat1, dat2, by = c("PurchaseID", "UserID")),
dat3, by = c("PurchaseID", "UserID"))
)
dat <- dat[-which(dat$TimeofContact == 0),]
names(dat)[3] <- "CustomerJourneyLength"
hours <- dat$CustomerJourneyLength %/% 3600
minutes <- (dat$CustomerJourneyLength %% 3600)%/%60
seconds <- (dat$CustomerJourneyLength %% 3600)%%60
dat$CustomerJourneyLength <- paste0(hours, " hours ", minutes, " minutes ", round(seconds), " seconds")
> dat
PurchaseID UserID CustomerJourneyLength Purchase Age
1 1 1 15 hours 28 minutes 49 seconds 1 27
2 1 2 15 hours 21 minutes 44 seconds 3 31
3 2 1 4 hours 11 minutes 17 seconds 2 27
5 3 1 9 hours 39 minutes 45 seconds 1 27
6 3 2 14 hours 36 minutes 31 seconds 1 31
,
df <- data.frame(UserID = sample(1:2, 20, replace = T),
PurchaseID = sample(1:3, 20, replace = T),
TimeofContact = runif(20, Sys.time(), Sys.time() + 20*3600),
Purchase = sample(0:1, 20, replace = T),
Age = rep(NA, 20))
df$Age[which(df$UserID == 1)] <- sample(20:40, 1)
df$Age[which(df$UserID == 2)] <- sample(20:40, 1)