This solution is based on functions from tidyverse , lubridate and stringr .
Update
I misunderstood the definition of tid . Now the code should calculate tid , as expected. tid shows the total number of records, but the beginning of tid is the earliest month of the earliest year, and myear is the month and year information combined together.
library(tidyverse) library(lubridate) library(stringr) df2 <- df %>% mutate(start_date = ymd(start_date), end_date = ymd(end_date)) %>% mutate(start_year = year(start_date), end_year = year(end_date), start_month = month(start_date), end_month = month(end_date)) %>% mutate(Year = map2(start_year, end_year, `:`)) %>% unnest() %>% group_by(profile) %>% mutate(first_year = ifelse(Year == min(Year), TRUE, FALSE), last_year = ifelse(Year == max(Year), TRUE, FALSE)) %>% mutate(start_month = ifelse(!first_year, 1, start_month), end_month = ifelse(!last_year, 12, end_month)) %>% mutate(Month = map2(start_month, end_month, `:`)) %>% unnest() %>% mutate(endid = n() + Month - 1) %>% mutate(tid = first(Month):first(endid)) %>% mutate(Multiple_Year = ifelse(length(unique(Year)) > 1, TRUE, FALSE)) %>% ungroup() %>% mutate(tid = ifelse(Year > min(Year) & !Multiple_Year, tid + 12 * (Year - min(Year)), tid)) %>% mutate(tid = str_pad(tid, width = 2, pad = "0")) %>% mutate(Month = str_pad(Month, width = 2, pad = "0")) %>% mutate(myear = paste(Month, Year, sep = "-")) %>% select(profile, start_date, end_date, tid, myear)
Output
Now look at some of the df2 output to see if the code is working as expected.
The first two lines of lehman
df2 %>% filter(profile %in% "lehman") %>% head(2) # A tibble: 2 x 5 profile start_date end_date tid myear <fctr> <date> <date> <chr> <chr> 1 lehman 2008-01-01 2009-12-31 01 01-2008 2 lehman 2008-01-01 2009-12-31 02 02-2008
The last line of lehman
df2 %>% filter(profile %in% "lehman") %>% tail(1) # A tibble: 1 x 5 profile start_date end_date tid myear <fctr> <date> <date> <chr> <chr> 1 lehman 2008-01-01 2009-12-31 24 12-2009
The first two lines of Picasso
df2 %>% filter(profile %in% "picasso") %>% head(2) # A tibble: 2 x 5 profile start_date end_date tid myear <fctr> <date> <date> <chr> <chr> 1 picasso 2009-02-02 2009-12-31 14 02-2009 2 picasso 2009-02-02 2009-12-31 15 03-2009
Data preparation
profile <- c('lehman', 'john','oliver','stephen','picasso') start_date <- c("2008-01-01", "2008-02-02", "2008-04-02", "2008-09-02", "2009-02-02") end_date <- c("2009-12-31", "2009-12-31", "2009-12-31", "2009-12-31", "2009-12-31") df <- data.frame(profile,start_date,end_date)