Analisis kelompok sangat populer dalam pemasaran . Popularitasnya kemungkinan besar karena kemudahan algoritma dan perhitungannya. Tidak ada konsep matematika yang serius di dasar, matematika dasar dilakukan di excel. Dari sudut pandang untuk memperoleh wawasan, analisis kelangsungan hidup jauh lebih menarik.

Namun demikian, kami percaya bahwa ada tugas seperti itu dan itu harus diselesaikan. Mencari paket dan fungsi siap pakai tidak menarik - matematikanya sederhana, ada banyak pengaturan. Di bawah ini adalah contoh implementasi yang mungkin (tanpa fiksasi khusus pada kecepatan eksekusi), seluruh kode untuk beberapa lusin baris.

Ini adalah kelanjutan dari serangkaian publikasi sebelumnya .

Beberapa kode

Saat membuat set pengujian, kami mungkin tidak terlalu fokus pada zona waktu, sama saja, datanya acak.

Pembuatan kasus uji

#    15 
set.seed(42)

events_dt <- tibble(user_id = 1000:9000) %>%
  mutate(birthday = Sys.Date() + as.integer(rexp(n(), 1/10))) %>%
  rowwise() %>%
  mutate(timestamp = list(as_datetime(birthday) + 24*60*60 * (
     rexp(10^3, rate = 1/runif(1, 2, 25))))) %>%
  ungroup() %>%
  unnest(timestamp) %>%
  #        
  filter(timestamp >= quantile(timestamp, probs = 0.1),
         timestamp <= quantile(timestamp, probs = 0.95)) %>%
  mutate(date = as_date(timestamp)) %>%
  select(user_id, date) %>%
  setDT(key = c("user_id", "date")) %>%
  #      
  unique()

Mari kita lihat distribusi kumulatif yang dihasilkan

ggplot(events_dt, aes(date)) +
  geom_histogram()

Langkah 1. Membentuk panduan pengguna

" ", .. , . data.table

.

users_dict <- events_dt[, .(birthday = head(date, 1)), by = user_id] %>%
  #       
  .[, week_start := floor_date(.BY[[1]], unit = "week"), by = birthday] %>%
    #      
  .[, cohort := stri_c(
        lubridate::isoyear(.BY[[1]]), 
        sprintf("%02d", lubridate::isoweek(.BY[[1]])), 
        sep = "/"), by = week_start]
#    ,      
as_tibble(janitor::tabyl(users_dict, birthday))

2.

. .

data.frame

cohort_dict <- unique(users_dict[, .(cohort, week_start)])

cohort_tbl <- users_dict[events_dt, on = "user_id"] %>%
  #         
  .[, rel_week := floor(as.numeric(difftime(date, birthday, units = "week")))] %>%
  #   10 
  .[rel_week <= 9] %>%
  #    
  unique(by = c("user_id", "cohort", "rel_week")) %>%
  #       
  .[, .N, by = .(cohort, rel_week)] %>%
  .[, rate := N/max(N), by = cohort]

3.

1. `ggplot`

ggplot

#  ggplot
data_tbl <- cohort_tbl %>%
  #      
  left_join(cohort_dict)

data_tbl %>%
  mutate(cohort_group = forcats::fct_reorder(cohort, week_start, .desc = TRUE)) %>%
  ggplot(mapping = aes(x = rel_week, y = cohort_group, fill = rate)) +
  geom_tile()  +
  geom_text(aes(label = N), colour = "darkgray") +
  labs(x = "  ",
       y = "  ",
       fill = "\n",
       title = "graph_title") +
  scale_fill_viridis_c(option = "inferno") +
  scale_x_continuous(breaks = scales::breaks_width(1)) +
  theme_minimal() +
  theme(panel.grid = element_blank())

2. `gt`

, .

#  -
data_tbl <- cohort_tbl %>%
  pivot_longer(cols = c(N, rate)) %>%
  pivot_wider(names_from = rel_week, values_from = value) %>%
  #      
  left_join(cohort_dict) %>%
  arrange(week_start, desc(name))

odd_rows <- seq(1, to = nrow(data_tbl), by = 2)
even_rows <- seq(2, to = nrow(data_tbl), by = 2)

tab <- data_tbl %>%
  mutate(cohort = if_else(rep(c(TRUE, FALSE), length.out = nrow(.)), 
                          cohort, "")) %>%
  select(-name, -week_start) %>%
  gt(rowname_col = "cohort") %>%
  fmt_percent(columns = matches("[0-9]+"), 
              rows = odd_rows, 
              decimals = 0, pattern = "<big>{x}</big>") %>%
  fmt_missing(columns = everything(), 
              missing_text = "---") %>%
  tab_stubhead(label = "  ") %>%
  tab_spanner(label = "  ",
              columns = everything()) %>%
  tab_header(title = "") %>%
  data_color(columns = everything(),
             colors = scales::col_numeric(palette = "inferno",
                                          domain = c(0, 1), 
                                          alpha = 0.6,
                                          na.color = "lightgray")) %>%
  tab_options(
    table.font.size = "smaller",
    data_row.padding = px(1),
    table.width = pct(75)
  ) %>%
  tab_style(
    style = list(
      cell_fill(color = "white"),
      cell_text(style = "italic"),
      cell_borders(sides = "bottom")
    ),
    locations = cells_body(
      columns = everything(),
      rows = even_rows)
  ) %>%
  tab_style(
    style = list(
      cell_borders(sides = "top")
    ),
    locations = cells_body(
      columns = everything(),
      rows = odd_rows)
  )

tab

, .

Publikasi sebelumnya - “R dan bekerja dengan waktu. Ada apa di balik layar? " ...

R di tangan pemasar. Analisis kelompok do-it-yourself

Beberapa kode

Langkah 1. Membentuk panduan pengguna

2.

3.

1. `ggplot`

2. `gt`

More articles:

R di tangan pemasar. Analisis kelompok do-it-yourself

Beberapa kode

Langkah 1. Membentuk panduan pengguna

2.

3.

1. ggplot (adsbygoogle = window.adsbygoogle || []).push({}); (adsbygoogle = window.adsbygoogle || []).push({}); (adsbygoogle = window.adsbygoogle || []).push({});

2. gt (adsbygoogle = window.adsbygoogle || []).push({}); (adsbygoogle = window.adsbygoogle || []).push({}); (adsbygoogle = window.adsbygoogle || []).push({});

More articles:

1. `ggplot`

2. `gt`