R di tangan pemasar. Analisis kelompok do-it-yourself

Analisis kelompok sangat populer dalam pemasaran . Popularitasnya kemungkinan besar karena kemudahan algoritma dan perhitungannya. Tidak ada konsep matematika yang serius di dasar, matematika dasar dilakukan di excel. Dari sudut pandang untuk memperoleh wawasan, analisis kelangsungan hidup jauh lebih menarik.







Namun demikian, kami percaya bahwa ada tugas seperti itu dan itu harus diselesaikan. Mencari paket dan fungsi siap pakai tidak menarik - matematikanya sederhana, ada banyak pengaturan. Di bawah ini adalah contoh implementasi yang mungkin (tanpa fiksasi khusus pada kecepatan eksekusi), seluruh kode untuk beberapa lusin baris.







Ini adalah kelanjutan dari serangkaian publikasi sebelumnya .







Beberapa kode



Saat membuat set pengujian, kami mungkin tidak terlalu fokus pada zona waktu, sama saja, datanya acak.







Pembuatan kasus uji
#    15 
set.seed(42)

events_dt <- tibble(user_id = 1000:9000) %>%
  mutate(birthday = Sys.Date() + as.integer(rexp(n(), 1/10))) %>%
  rowwise() %>%
  mutate(timestamp = list(as_datetime(birthday) + 24*60*60 * (
     rexp(10^3, rate = 1/runif(1, 2, 25))))) %>%
  ungroup() %>%
  unnest(timestamp) %>%
  #        
  filter(timestamp >= quantile(timestamp, probs = 0.1),
         timestamp <= quantile(timestamp, probs = 0.95)) %>%
  mutate(date = as_date(timestamp)) %>%
  select(user_id, date) %>%
  setDT(key = c("user_id", "date")) %>%
  #      
  unique()
      
      





Mari kita lihat distribusi kumulatif yang dihasilkan







ggplot(events_dt, aes(date)) +
  geom_histogram()
      
      











Langkah 1. Membentuk panduan pengguna



" ", .. , . data.table



.







users_dict <- events_dt[, .(birthday = head(date, 1)), by = user_id] %>%
  #       
  .[, week_start := floor_date(.BY[[1]], unit = "week"), by = birthday] %>%
    #      
  .[, cohort := stri_c(
        lubridate::isoyear(.BY[[1]]), 
        sprintf("%02d", lubridate::isoweek(.BY[[1]])), 
        sep = "/"), by = week_start]
#    ,      
as_tibble(janitor::tabyl(users_dict, birthday))
      
      











2.



.







. .







data.frame
cohort_dict <- unique(users_dict[, .(cohort, week_start)])

cohort_tbl <- users_dict[events_dt, on = "user_id"] %>%
  #         
  .[, rel_week := floor(as.numeric(difftime(date, birthday, units = "week")))] %>%
  #   10 
  .[rel_week <= 9] %>%
  #    
  unique(by = c("user_id", "cohort", "rel_week")) %>%
  #       
  .[, .N, by = .(cohort, rel_week)] %>%
  .[, rate := N/max(N), by = cohort]
      
      





3.



1. ggplot





ggplot
#  ggplot
data_tbl <- cohort_tbl %>%
  #      
  left_join(cohort_dict)

data_tbl %>%
  mutate(cohort_group = forcats::fct_reorder(cohort, week_start, .desc = TRUE)) %>%
  ggplot(mapping = aes(x = rel_week, y = cohort_group, fill = rate)) +
  geom_tile()  +
  geom_text(aes(label = N), colour = "darkgray") +
  labs(x = "  ",
       y = "  ",
       fill = "\n",
       title = "graph_title") +
  scale_fill_viridis_c(option = "inferno") +
  scale_x_continuous(breaks = scales::breaks_width(1)) +
  theme_minimal() +
  theme(panel.grid = element_blank())
      
      











2. gt





, .







gt
#  -
data_tbl <- cohort_tbl %>%
  pivot_longer(cols = c(N, rate)) %>%
  pivot_wider(names_from = rel_week, values_from = value) %>%
  #      
  left_join(cohort_dict) %>%
  arrange(week_start, desc(name))

odd_rows <- seq(1, to = nrow(data_tbl), by = 2)
even_rows <- seq(2, to = nrow(data_tbl), by = 2)

tab <- data_tbl %>%
  mutate(cohort = if_else(rep(c(TRUE, FALSE), length.out = nrow(.)), 
                          cohort, "")) %>%
  select(-name, -week_start) %>%
  gt(rowname_col = "cohort") %>%
  fmt_percent(columns = matches("[0-9]+"), 
              rows = odd_rows, 
              decimals = 0, pattern = "<big>{x}</big>") %>%
  fmt_missing(columns = everything(), 
              missing_text = "---") %>%
  tab_stubhead(label = "  ") %>%
  tab_spanner(label = "  ",
              columns = everything()) %>%
  tab_header(title = "") %>%
  data_color(columns = everything(),
             colors = scales::col_numeric(palette = "inferno",
                                          domain = c(0, 1), 
                                          alpha = 0.6,
                                          na.color = "lightgray")) %>%
  tab_options(
    table.font.size = "smaller",
    data_row.padding = px(1),
    table.width = pct(75)
  ) %>%
  tab_style(
    style = list(
      cell_fill(color = "white"),
      cell_text(style = "italic"),
      cell_borders(sides = "bottom")
    ),
    locations = cells_body(
      columns = everything(),
      rows = even_rows)
  ) %>%
  tab_style(
    style = list(
      cell_borders(sides = "top")
    ),
    locations = cells_body(
      columns = everything(),
      rows = odd_rows)
  )

tab
      
      











, .







Publikasi sebelumnya - β€œR dan bekerja dengan waktu. Ada apa di balik layar? " ...








All Articles