昨日、サブスクリプション コホートを分析する可能性を探していたのを覚えています。だから私はあなたができると思います
library(tidyverse)
set.seed(1)
n <- 100
df <- data.frame(
user = sample(1:20, n, T),
transDate = sample(seq(as.Date("2016-01-01"), as.Date("2016-12-31"), "1 month"), n, T),
group = sample(LETTERS[1:2], n, T)
)
diffmonth <- function(d1, d2) {
# http://stackoverflow.com/questions/1995933/number-of-months-between-two-dates
monnb <- function(d) {
lt <- as.POSIXlt(as.Date(d, origin="1900-01-01"))
lt$year*12 + lt$mon
}
monnb(d2) - monnb(d1) + 1L
}
df %>%
group_by(user, group) %>%
mutate(cohort = min(transDate), month = diffmonth(cohort, transDate)) %>%
unite(cohort, cohort, group, remove = T) %>%
group_by(month, cohort) %>%
summarise(n=n()) %>%
spread(month, n, fill = 0, drop = F)
# # A tibble: 16 × 12
# cohort `1` `2` `3` `4` `5` `6` `7` `8` `9` `10` `11`
# * <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 2016-01-01_A 5 1 0 1 1 1 1 0 2 0 0
# 2 2016-02-01_A 1 0 0 0 0 0 0 0 1 0 1
# 3 2016-02-01_B 4 1 2 1 0 1 2 0 1 1 0
# 4 2016-03-01_A 5 0 3 1 2 2 2 0 1 2 0
# 5 2016-03-01_B 4 0 0 0 2 0 1 0 0 0 0
# 6 2016-04-01_A 4 0 2 1 0 1 0 2 1 0 0
# 7 2016-04-01_B 1 0 0 0 0 0 0 0 0 0 0
# 8 2016-05-01_A 2 0 2 2 0 0 2 0 0 0 0
# 9 2016-05-01_B 1 0 0 1 0 0 2 0 0 0 0
# 10 2016-06-01_A 1 0 2 0 0 1 0 0 0 0 0
# 11 2016-06-01_B 4 0 0 0 0 1 1 0 0 0 0
# 12 2016-07-01_A 1 0 1 0 0 0 0 0 0 0 0
# 13 2016-08-01_B 4 1 1 0 0 0 0 0 0 0 0
# 14 2016-09-01_A 1 0 0 0 0 0 0 0 0 0 0
# 15 2016-10-01_B 1 0 0 0 0 0 0 0 0 0 0
# 16 2016-12-01_A 3 0 0 0 0 0 0 0 0 0 0