First round of fiddling
This commit is contained in:
46
.gitignore
vendored
Normal file
46
.gitignore
vendored
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
*.so
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
|
||||||
|
# Virtual environments
|
||||||
|
.venv/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
|
||||||
|
# Jupyter
|
||||||
|
.ipynb_checkpoints/
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
|
||||||
|
# OS
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
.env
|
||||||
|
.python-version
|
||||||
|
|
||||||
|
# Data (uncomment if you want to ignore data files)
|
||||||
|
# *.csv
|
||||||
146
2026-01-02_query-01-booking-people.sql
Normal file
146
2026-01-02_query-01-booking-people.sql
Normal file
@ -0,0 +1,146 @@
|
|||||||
|
-- First establish some base parameters for our users
|
||||||
|
WITH config AS (
|
||||||
|
SELECT
|
||||||
|
14 AS short_term,
|
||||||
|
90 AS medium_term,
|
||||||
|
360 AS long_term,
|
||||||
|
CAST('2023-01-01' AS timestamp) AS date_cutoff
|
||||||
|
),
|
||||||
|
-- Then establish a standard core set of user data
|
||||||
|
users_with_churn_stats AS (
|
||||||
|
SELECT id, active, billing_state,
|
||||||
|
DATE_TRUNC('month', created_at) AS month_created,
|
||||||
|
created_at, free_trial_ends_at, updated_at,
|
||||||
|
updated_at::date - created_at::date AS lifespan
|
||||||
|
FROM users, config
|
||||||
|
-- We'll only look at newish users.
|
||||||
|
WHERE created_at >= config.date_cutoff
|
||||||
|
),
|
||||||
|
-- Next use that core data to extract the records we want to analyze
|
||||||
|
-- and categorize them by date and status
|
||||||
|
users_to_examine AS (
|
||||||
|
-- First the users who exited just after the trial
|
||||||
|
SELECT 'quick-exit' AS category, uwcs.*
|
||||||
|
FROM users_with_churn_stats uwcs, config
|
||||||
|
WHERE billing_state = 'CHURNED'
|
||||||
|
AND lifespan <= config.short_term
|
||||||
|
|
||||||
|
UNION
|
||||||
|
|
||||||
|
-- Next, users who gave the tools some time but still left
|
||||||
|
SELECT 'fair-trial' AS category, uwcs.*
|
||||||
|
FROM users_with_churn_stats uwcs, config
|
||||||
|
WHERE billing_state = 'CHURNED'
|
||||||
|
AND lifespan < config.medium_term
|
||||||
|
AND lifespan > config.short_term
|
||||||
|
|
||||||
|
UNION
|
||||||
|
|
||||||
|
-- Then some longer-time users who still churned.
|
||||||
|
-- We'll cut this off after 6 months since afterwards
|
||||||
|
-- company closures, etc. could factor in.
|
||||||
|
|
||||||
|
SELECT 'short-termer' AS category, uwcs.*
|
||||||
|
FROM users_with_churn_stats uwcs, config
|
||||||
|
WHERE billing_state = 'CHURNED'
|
||||||
|
AND lifespan >= config.medium_term
|
||||||
|
AND lifespan < config.long_term
|
||||||
|
|
||||||
|
UNION
|
||||||
|
|
||||||
|
-- Finally, people who are still kicking.
|
||||||
|
-- This cohort is going to be a little wrong for older
|
||||||
|
-- dates since it only includes those who are STILL around.
|
||||||
|
SELECT 'active-user' AS category, uwcs.*
|
||||||
|
FROM users_with_churn_stats uwcs, config
|
||||||
|
WHERE BILLING_STATE != 'CHURNED'
|
||||||
|
AND lifespan >= config.long_term
|
||||||
|
),
|
||||||
|
-- Well add booking forms created by era
|
||||||
|
booking_form_counts AS (
|
||||||
|
SELECT bf.user_id,
|
||||||
|
SUM(CASE WHEN created_age < config.short_term THEN 1 ELSE 0 END) AS short_term,
|
||||||
|
SUM(CASE WHEN created_age >= config.short_term AND created_age < config.medium_term THEN 1 ELSE 0 END) AS medium_term,
|
||||||
|
SUM(CASE WHEN created_age >= config.medium_term AND created_age < config.long_term THEN 1 ELSE 0 END) AS long_term,
|
||||||
|
COUNT(*) AS total
|
||||||
|
FROM config, (
|
||||||
|
SELECT booking_forms.user_id, booking_forms.created_at::date - users.created_at::date AS created_age
|
||||||
|
FROM config, booking_forms JOIN users ON booking_forms.user_id = users.id
|
||||||
|
WHERE booking_forms.created_at >= config.date_cutoff
|
||||||
|
AND users.created_at >= config.date_cutoff
|
||||||
|
) bf
|
||||||
|
GROUP BY bf.user_id
|
||||||
|
),
|
||||||
|
-- Bookings made by era
|
||||||
|
booking_counts AS (
|
||||||
|
SELECT bk.user_id,
|
||||||
|
SUM(CASE WHEN created_age < config.short_term THEN 1 ELSE 0 END) AS short_term,
|
||||||
|
SUM(CASE WHEN created_age >= config.short_term AND created_age < config.medium_term THEN 1 ELSE 0 END) AS medium_term,
|
||||||
|
SUM(CASE WHEN created_age >= config.medium_term AND created_age < config.long_term THEN 1 ELSE 0 END) AS long_term,
|
||||||
|
COUNT(*) AS total
|
||||||
|
FROM config, (
|
||||||
|
SELECT booking_forms.user_id, bookings.created_at::date - users.created_at::date AS created_age
|
||||||
|
FROM config, booking_forms
|
||||||
|
JOIN users ON booking_forms.user_id = users.id
|
||||||
|
JOIN bookings ON bookings.booking_form_id = booking_forms.id
|
||||||
|
WHERE booking_forms.created_at >= config.date_cutoff
|
||||||
|
AND bookings.created_at >= config.date_cutoff
|
||||||
|
AND users.created_at >= config.date_cutoff
|
||||||
|
) bk
|
||||||
|
GROUP BY bk.user_id
|
||||||
|
),
|
||||||
|
-- Employees added by era
|
||||||
|
employee_counts AS (
|
||||||
|
SELECT empl.user_id,
|
||||||
|
SUM(CASE WHEN created_age < config.short_term THEN 1 ELSE 0 END) AS short_term,
|
||||||
|
SUM(CASE WHEN created_age >= config.short_term AND created_age < config.medium_term THEN 1 ELSE 0 END) AS medium_term,
|
||||||
|
SUM(CASE WHEN created_age >= config.medium_term AND created_age < config.long_term THEN 1 ELSE 0 END) AS long_term,
|
||||||
|
COUNT(*) AS total
|
||||||
|
FROM config, (
|
||||||
|
SELECT employees.user_id, employees.created_at::date - users.created_at::date AS created_age
|
||||||
|
FROM config, employees JOIN users ON employees.user_id = users.id
|
||||||
|
WHERE employees.created_at >= config.date_cutoff
|
||||||
|
AND users.created_at >= config.date_cutoff
|
||||||
|
) empl
|
||||||
|
GROUP BY empl.user_id
|
||||||
|
),
|
||||||
|
-- Contacts created by era
|
||||||
|
contact_counts AS (
|
||||||
|
SELECT cust.user_id,
|
||||||
|
SUM(CASE WHEN created_age < config.short_term THEN 1 ELSE 0 END) AS short_term,
|
||||||
|
SUM(CASE WHEN created_age >= config.short_term AND created_age < config.medium_term THEN 1 ELSE 0 END) AS medium_term,
|
||||||
|
SUM(CASE WHEN created_age >= config.medium_term AND created_age < config.long_term THEN 1 ELSE 0 END) AS long_term,
|
||||||
|
COUNT(*) AS total
|
||||||
|
FROM config, (
|
||||||
|
SELECT customers.user_id, customers.created_at::date - users.created_at::date AS created_age
|
||||||
|
FROM config, customers JOIN users ON customers.user_id = users.id
|
||||||
|
WHERE customers.created_at >= config.date_cutoff
|
||||||
|
AND users.created_at >= config.date_cutoff
|
||||||
|
) cust
|
||||||
|
GROUP BY cust.user_id
|
||||||
|
),
|
||||||
|
-- Finally, we'll flatten it all out into a repurposable flat table
|
||||||
|
SELECT users_to_examine.id,
|
||||||
|
users_to_examine.category,
|
||||||
|
COALESCE(booking_form_counts.short_term, 0) AS booking_forms_short_term,
|
||||||
|
COALESCE(booking_form_counts.medium_term, 0) AS booking_forms_medium_term,
|
||||||
|
COALESCE(booking_form_counts.long_term, 0) AS booking_forms_long_term,
|
||||||
|
COALESCE(booking_form_counts.total, 0) AS booking_forms,
|
||||||
|
COALESCE(booking_counts.short_term, 0) AS bookings_short_term,
|
||||||
|
COALESCE(booking_counts.medium_term, 0) AS bookings_medium_term,
|
||||||
|
COALESCE(booking_counts.long_term, 0) AS bookings_long_term,
|
||||||
|
COALESCE(booking_counts.total, 0) AS bookings,
|
||||||
|
COALESCE(employee_counts.short_term, 0) AS employees_short_term,
|
||||||
|
COALESCE(employee_counts.medium_term, 0) AS employees_medium_term,
|
||||||
|
COALESCE(employee_counts.long_term, 0) AS employees_long_term,
|
||||||
|
COALESCE(employee_counts.total, 0) AS employees,
|
||||||
|
COALESCE(contact_counts.short_term, 0) AS contacts_short_term,
|
||||||
|
COALESCE(contact_counts.medium_term, 0) AS contacts_medium_term,
|
||||||
|
COALESCE(contact_counts.long_term, 0) AS contacts_long_term,
|
||||||
|
COALESCE(contact_counts.total, 0) AS contacts
|
||||||
|
FROM users_to_examine
|
||||||
|
LEFT OUTER JOIN booking_form_counts ON booking_form_counts.user_id = users_to_examine.id
|
||||||
|
LEFT OUTER JOIN booking_counts ON booking_counts.user_id = users_to_examine.id
|
||||||
|
LEFT OUTER JOIN employee_counts ON employee_counts.user_id = users_to_examine.id
|
||||||
|
LEFT OUTER JOIN contact_counts ON contact_counts.user_id = users_to_examine.id
|
||||||
|
;
|
||||||
83
2026-01-02_query-02-email.sql
Normal file
83
2026-01-02_query-02-email.sql
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
-- First establish some base parameters for our users
|
||||||
|
WITH config AS (
|
||||||
|
SELECT
|
||||||
|
14 AS short_term,
|
||||||
|
90 AS medium_term,
|
||||||
|
360 AS long_term,
|
||||||
|
CAST('2023-01-01' AS timestamp) AS date_cutoff
|
||||||
|
),
|
||||||
|
-- Then establish a standard core set of user data
|
||||||
|
users_with_churn_stats AS (
|
||||||
|
SELECT id, active, billing_state,
|
||||||
|
DATE_TRUNC('month', created_at) AS month_created,
|
||||||
|
created_at, free_trial_ends_at, updated_at,
|
||||||
|
updated_at::date - created_at::date AS lifespan
|
||||||
|
FROM users, config
|
||||||
|
-- We'll only look at newish users.
|
||||||
|
WHERE created_at >= config.date_cutoff
|
||||||
|
),
|
||||||
|
-- Next use that core data to extract the records we want to analyze
|
||||||
|
-- and categorize them by date and status
|
||||||
|
users_to_examine AS (
|
||||||
|
-- First the users who exited just after the trial
|
||||||
|
SELECT 'quick-exit' AS category, uwcs.*
|
||||||
|
FROM users_with_churn_stats uwcs, config
|
||||||
|
WHERE billing_state = 'CHURNED'
|
||||||
|
AND lifespan <= config.short_term
|
||||||
|
|
||||||
|
UNION
|
||||||
|
|
||||||
|
-- Next, users who gave the tools some time but still left
|
||||||
|
SELECT 'fair-trial' AS category, uwcs.*
|
||||||
|
FROM users_with_churn_stats uwcs, config
|
||||||
|
WHERE billing_state = 'CHURNED'
|
||||||
|
AND lifespan < config.medium_term
|
||||||
|
AND lifespan > config.short_term
|
||||||
|
|
||||||
|
UNION
|
||||||
|
|
||||||
|
-- Then some longer-time users who still churned.
|
||||||
|
-- We'll cut this off after 6 months since afterwards
|
||||||
|
-- company closures, etc. could factor in.
|
||||||
|
|
||||||
|
SELECT 'short-termer' AS category, uwcs.*
|
||||||
|
FROM users_with_churn_stats uwcs, config
|
||||||
|
WHERE billing_state = 'CHURNED'
|
||||||
|
AND lifespan >= config.medium_term
|
||||||
|
AND lifespan < config.long_term
|
||||||
|
|
||||||
|
UNION
|
||||||
|
|
||||||
|
-- Finally, people who are still kicking.
|
||||||
|
-- This cohort is going to be a little wrong for older
|
||||||
|
-- dates since it only includes those who are STILL around.
|
||||||
|
SELECT 'active-user' AS category, uwcs.*
|
||||||
|
FROM users_with_churn_stats uwcs, config
|
||||||
|
WHERE BILLING_STATE != 'CHURNED'
|
||||||
|
AND lifespan >= config.long_term
|
||||||
|
),
|
||||||
|
-- Emails sent by era
|
||||||
|
email_counts AS (
|
||||||
|
SELECT email.user_id,
|
||||||
|
SUM(CASE WHEN created_age < config.short_term THEN 1 ELSE 0 END) AS short_term,
|
||||||
|
SUM(CASE WHEN created_age >= config.short_term AND created_age < config.medium_term THEN 1 ELSE 0 END) AS medium_term,
|
||||||
|
SUM(CASE WHEN created_age >= config.medium_term AND created_age < config.long_term THEN 1 ELSE 0 END) AS long_term,
|
||||||
|
COUNT(*) AS total
|
||||||
|
FROM config, (
|
||||||
|
SELECT sent_emails.user_id, sent_emails.created_at::date - users.created_at::date AS created_age
|
||||||
|
FROM config, sent_emails JOIN users ON sent_emails.user_id = users.id
|
||||||
|
WHERE sent_emails.created_at >= config.date_cutoff
|
||||||
|
AND users.created_at >= config.date_cutoff
|
||||||
|
) email
|
||||||
|
GROUP BY email.user_id
|
||||||
|
)
|
||||||
|
-- Finally, we'll flatten it all out into a repurposable flat table
|
||||||
|
SELECT users_to_examine.id,
|
||||||
|
users_to_examine.category,
|
||||||
|
COALESCE(email_counts.short_term, 0) AS emails_short_term,
|
||||||
|
COALESCE(email_counts.medium_term, 0) AS emails_medium_term,
|
||||||
|
COALESCE(email_counts.long_term, 0) AS emails_long_term,
|
||||||
|
COALESCE(email_counts.total, 0) AS emails
|
||||||
|
FROM users_to_examine
|
||||||
|
LEFT OUTER JOIN email_counts ON email_counts.user_id = users_to_examine.id
|
||||||
|
;
|
||||||
151
2026-01-02_query-03-sms-appts-checklist.sql
Normal file
151
2026-01-02_query-03-sms-appts-checklist.sql
Normal file
@ -0,0 +1,151 @@
|
|||||||
|
-- First establish some base parameters for our users
|
||||||
|
WITH config AS (
|
||||||
|
SELECT
|
||||||
|
14 AS short_term,
|
||||||
|
90 AS medium_term,
|
||||||
|
360 AS long_term,
|
||||||
|
CAST('2023-01-01' AS timestamp) AS date_cutoff
|
||||||
|
),
|
||||||
|
-- Then establish a standard core set of user data
|
||||||
|
users_with_churn_stats AS (
|
||||||
|
SELECT id, active, billing_state,
|
||||||
|
DATE_TRUNC('month', created_at) AS month_created,
|
||||||
|
created_at, free_trial_ends_at, updated_at,
|
||||||
|
updated_at::date - created_at::date AS lifespan
|
||||||
|
FROM users, config
|
||||||
|
-- We'll only look at newish users.
|
||||||
|
WHERE created_at >= config.date_cutoff
|
||||||
|
),
|
||||||
|
-- Next use that core data to extract the records we want to analyze
|
||||||
|
-- and categorize them by date and status
|
||||||
|
users_to_examine AS (
|
||||||
|
-- First the users who exited just after the trial
|
||||||
|
SELECT 'quick-exit' AS category, uwcs.*
|
||||||
|
FROM users_with_churn_stats uwcs, config
|
||||||
|
WHERE billing_state = 'CHURNED'
|
||||||
|
AND lifespan <= config.short_term
|
||||||
|
|
||||||
|
UNION
|
||||||
|
|
||||||
|
-- Next, users who gave the tools some time but still left
|
||||||
|
SELECT 'fair-trial' AS category, uwcs.*
|
||||||
|
FROM users_with_churn_stats uwcs, config
|
||||||
|
WHERE billing_state = 'CHURNED'
|
||||||
|
AND lifespan < config.medium_term
|
||||||
|
AND lifespan > config.short_term
|
||||||
|
|
||||||
|
UNION
|
||||||
|
|
||||||
|
-- Then some longer-time users who still churned.
|
||||||
|
-- We'll cut this off after 6 months since afterwards
|
||||||
|
-- company closures, etc. could factor in.
|
||||||
|
|
||||||
|
SELECT 'short-termer' AS category, uwcs.*
|
||||||
|
FROM users_with_churn_stats uwcs, config
|
||||||
|
WHERE billing_state = 'CHURNED'
|
||||||
|
AND lifespan >= config.medium_term
|
||||||
|
AND lifespan < config.long_term
|
||||||
|
|
||||||
|
UNION
|
||||||
|
|
||||||
|
-- Finally, people who are still kicking.
|
||||||
|
-- This cohort is going to be a little wrong for older
|
||||||
|
-- dates since it only includes those who are STILL around.
|
||||||
|
SELECT 'active-user' AS category, uwcs.*
|
||||||
|
FROM users_with_churn_stats uwcs, config
|
||||||
|
WHERE BILLING_STATE != 'CHURNED'
|
||||||
|
AND lifespan >= config.long_term
|
||||||
|
),
|
||||||
|
-- Messages sent by era
|
||||||
|
sms_counts AS (
|
||||||
|
SELECT texts.user_id,
|
||||||
|
SUM(CASE WHEN created_age < config.short_term THEN 1 ELSE 0 END) AS short_term,
|
||||||
|
SUM(CASE WHEN created_age >= config.short_term AND created_age < config.medium_term THEN 1 ELSE 0 END) AS medium_term,
|
||||||
|
SUM(CASE WHEN created_age >= config.medium_term AND created_age < config.long_term THEN 1 ELSE 0 END) AS long_term,
|
||||||
|
COUNT(*) AS total
|
||||||
|
FROM config, (
|
||||||
|
SELECT sent_texts.user_id, sent_texts.created_at::date - users.created_at::date AS created_age
|
||||||
|
FROM config, sent_texts JOIN users ON sent_texts.user_id = users.id
|
||||||
|
WHERE sent_texts.created_at >= config.date_cutoff
|
||||||
|
AND users.created_at >= config.date_cutoff
|
||||||
|
) texts
|
||||||
|
GROUP BY texts.user_id
|
||||||
|
),
|
||||||
|
-- Appointments by era
|
||||||
|
appointment_counts AS (
|
||||||
|
SELECT appts.user_id,
|
||||||
|
SUM(CASE WHEN created_age < config.short_term THEN 1 ELSE 0 END) AS short_term,
|
||||||
|
SUM(CASE WHEN created_age >= config.short_term AND created_age < config.medium_term THEN 1 ELSE 0 END) AS medium_term,
|
||||||
|
SUM(CASE WHEN created_age >= config.medium_term AND created_age < config.long_term THEN 1 ELSE 0 END) AS long_term,
|
||||||
|
COUNT(*) AS total
|
||||||
|
FROM config, (
|
||||||
|
SELECT appointments.user_id, appointments.created_at::date - users.created_at::date AS created_age
|
||||||
|
FROM config, appointments JOIN users ON appointments.user_id = users.id
|
||||||
|
WHERE appointments.created_at >= config.date_cutoff
|
||||||
|
AND users.created_at >= config.date_cutoff
|
||||||
|
) appts
|
||||||
|
GROUP BY appts.user_id
|
||||||
|
),
|
||||||
|
-- Checklists created by era
|
||||||
|
checklist_counts AS (
|
||||||
|
SELECT checks.user_id,
|
||||||
|
SUM(CASE WHEN created_age < config.short_term THEN 1 ELSE 0 END) AS short_term,
|
||||||
|
SUM(CASE WHEN created_age >= config.short_term AND created_age < config.medium_term THEN 1 ELSE 0 END) AS medium_term,
|
||||||
|
SUM(CASE WHEN created_age >= config.medium_term AND created_age < config.long_term THEN 1 ELSE 0 END) AS long_term,
|
||||||
|
COUNT(*) AS total
|
||||||
|
FROM config, (
|
||||||
|
SELECT checklist_templates.user_id, checklists.created_at::date - users.created_at::date AS created_age
|
||||||
|
FROM config, checklist_templates
|
||||||
|
JOIN users ON checklist_templates.user_id = users.id
|
||||||
|
JOIN checklists ON checklists.checklist_template_id = checklist_templates.id
|
||||||
|
WHERE checklist_templates.created_at >= config.date_cutoff
|
||||||
|
AND checklists.created_at >= config.date_cutoff
|
||||||
|
AND users.created_at >= config.date_cutoff
|
||||||
|
) checks
|
||||||
|
GROUP BY checks.user_id
|
||||||
|
),
|
||||||
|
-- Checklist events created by era
|
||||||
|
checklist_filled_counts AS (
|
||||||
|
SELECT events.user_id,
|
||||||
|
SUM(CASE WHEN created_age < config.short_term THEN 1 ELSE 0 END) AS short_term,
|
||||||
|
SUM(CASE WHEN created_age >= config.short_term AND created_age < config.medium_term THEN 1 ELSE 0 END) AS medium_term,
|
||||||
|
SUM(CASE WHEN created_age >= config.medium_term AND created_age < config.long_term THEN 1 ELSE 0 END) AS long_term,
|
||||||
|
COUNT(*) AS total
|
||||||
|
FROM config, (
|
||||||
|
SELECT employees.user_id, checklist_item_events.created_at::date - users.created_at::date AS created_age
|
||||||
|
FROM config, employees
|
||||||
|
JOIN users ON employees.user_id = users.id
|
||||||
|
JOIN checklist_item_events ON checklist_item_events.employee_id = employees.id
|
||||||
|
WHERE checklist_item_events.created_at >= config.date_cutoff
|
||||||
|
AND checklist_item_events.created_at >= config.date_cutoff
|
||||||
|
AND users.created_at >= config.date_cutoff
|
||||||
|
) events
|
||||||
|
GROUP BY events.user_id
|
||||||
|
)
|
||||||
|
-- Finally, we'll flatten it all out into a repurposable flat table
|
||||||
|
SELECT users_to_examine.id,
|
||||||
|
users_to_examine.category,
|
||||||
|
users_to_examine.month_created,
|
||||||
|
users_to_examine.lifespan,
|
||||||
|
COALESCE(sms_counts.short_term, 0) AS sms_short_term,
|
||||||
|
COALESCE(sms_counts.medium_term, 0) AS sms_medium_term,
|
||||||
|
COALESCE(sms_counts.long_term, 0) AS sms_long_term,
|
||||||
|
COALESCE(sms_counts.total, 0) AS sms,
|
||||||
|
COALESCE(appointment_counts.short_term, 0) AS appointments_short_term,
|
||||||
|
COALESCE(appointment_counts.medium_term, 0) AS appointments_medium_term,
|
||||||
|
COALESCE(appointment_counts.long_term, 0) AS appointments_long_term,
|
||||||
|
COALESCE(appointment_counts.total, 0) AS appointments,
|
||||||
|
COALESCE(checklist_counts.short_term, 0) AS checklists_short_term,
|
||||||
|
COALESCE(checklist_counts.medium_term, 0) AS checklists_medium_term,
|
||||||
|
COALESCE(checklist_counts.long_term, 0) AS checklists_long_term,
|
||||||
|
COALESCE(checklist_counts.total, 0) AS checklists,
|
||||||
|
COALESCE(checklist_filled_counts.short_term, 0) AS checklists_filled_short_term,
|
||||||
|
COALESCE(checklist_filled_counts.medium_term, 0) AS checklists_filled_medium_term,
|
||||||
|
COALESCE(checklist_filled_counts.long_term, 0) AS checklists_filled_long_term,
|
||||||
|
COALESCE(checklist_filled_counts.total, 0) AS checklists_filled
|
||||||
|
FROM users_to_examine
|
||||||
|
LEFT OUTER JOIN sms_counts ON sms_counts.user_id = users_to_examine.id
|
||||||
|
LEFT OUTER JOIN appointment_counts ON appointment_counts.user_id = users_to_examine.id
|
||||||
|
LEFT OUTER JOIN checklist_counts ON checklist_counts.user_id = users_to_examine.id
|
||||||
|
LEFT OUTER JOIN checklist_filled_counts ON checklist_filled_counts.user_id = users_to_examine.id
|
||||||
|
;
|
||||||
15
README.md
Normal file
15
README.md
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
# Jupyter Analysis
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
poetry install
|
||||||
|
```
|
||||||
|
|
||||||
|
## Run
|
||||||
|
|
||||||
|
```bash
|
||||||
|
poetry run jupyter notebook
|
||||||
|
# or
|
||||||
|
poetry run jupyter lab
|
||||||
|
```
|
||||||
184
churn-analysis.ipynb
Normal file
184
churn-analysis.ipynb
Normal file
@ -0,0 +1,184 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import plotly.express as px\n",
|
||||||
|
"import plotly.graph_objects as go\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"\n",
|
||||||
|
"# Load CSV file\n",
|
||||||
|
"df = pd.read_csv('churn.csv')\n",
|
||||||
|
"\n",
|
||||||
|
"category_labels = {\n",
|
||||||
|
" 'quick-exit': 'Free trial only',\n",
|
||||||
|
" 'fair-trial': '74 day churn',\n",
|
||||||
|
" 'short-termer': '6 month churn',\n",
|
||||||
|
" 'active-user': 'No churn'\n",
|
||||||
|
"}\n",
|
||||||
|
"timeframe_labels = {\n",
|
||||||
|
" 'short term': 'During free trial',\n",
|
||||||
|
" 'medium term': 'After trial, before 90 days',\n",
|
||||||
|
" 'long term': 'After 90 days, first 6 months'\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"def fix_dataset_label(k):\n",
|
||||||
|
" parts = k.split('_')\n",
|
||||||
|
" if \"term\" in parts:\n",
|
||||||
|
" timeframe = timeframe_labels[parts[-2:].join(\" \")]\n",
|
||||||
|
" name = parts[0:-2].join(\" \").title()\n",
|
||||||
|
" else:\n",
|
||||||
|
" timeframe = 'Lifetime'\n",
|
||||||
|
" name = parts.join(\" \").title()\n",
|
||||||
|
" return f\"{name}: {timeframe}\"\n",
|
||||||
|
"\n",
|
||||||
|
"def x_labels(categories):\n",
|
||||||
|
" return [category_labels[cat] for cat in categories]\n",
|
||||||
|
"\n",
|
||||||
|
"def dataset_labels(columns):\n",
|
||||||
|
" return [fix_dataset_label(col) for col in columns]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>booking_forms_short_term</th>\n",
|
||||||
|
" <th>booking_forms_medium_term</th>\n",
|
||||||
|
" <th>booking_forms_long_term</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>category</th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>active-user</th>\n",
|
||||||
|
" <td>0.046701</td>\n",
|
||||||
|
" <td>0.041571</td>\n",
|
||||||
|
" <td>0.040686</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>fair-trial</th>\n",
|
||||||
|
" <td>0.268002</td>\n",
|
||||||
|
" <td>0.020303</td>\n",
|
||||||
|
" <td>0.000271</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>quick-exit</th>\n",
|
||||||
|
" <td>0.232673</td>\n",
|
||||||
|
" <td>0.000000</td>\n",
|
||||||
|
" <td>0.000000</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>short-termer</th>\n",
|
||||||
|
" <td>0.290954</td>\n",
|
||||||
|
" <td>0.082213</td>\n",
|
||||||
|
" <td>0.029034</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" booking_forms_short_term booking_forms_medium_term \\\n",
|
||||||
|
"category \n",
|
||||||
|
"active-user 0.046701 0.041571 \n",
|
||||||
|
"fair-trial 0.268002 0.020303 \n",
|
||||||
|
"quick-exit 0.232673 0.000000 \n",
|
||||||
|
"short-termer 0.290954 0.082213 \n",
|
||||||
|
"\n",
|
||||||
|
" booking_forms_long_term \n",
|
||||||
|
"category \n",
|
||||||
|
"active-user 0.040686 \n",
|
||||||
|
"fair-trial 0.000271 \n",
|
||||||
|
"quick-exit 0.000000 \n",
|
||||||
|
"short-termer 0.029034 "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"columns = ['booking_forms_short_term', 'booking_forms_medium_term', 'booking_forms_long_term']\n",
|
||||||
|
"categories = category_labels.keys() # All\n",
|
||||||
|
"\n",
|
||||||
|
"# Group by category and make sure they're in the correct order\n",
|
||||||
|
"df.groupby('category')[columns].mean()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": "# Order categories as defined in category_labels\ncategory_order = [category_labels[k] for k in category_labels.keys()]"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": "# Generate charts for all metrics\nmetrics = [\n 'booking_forms',\n 'bookings',\n 'employees', \n 'contacts',\n 'emails',\n 'sms',\n 'appointments',\n 'checklists',\n 'checklists_filled'\n]\n\n# Get total users per category for percentage calculations\ncategory_totals = df.groupby('category').size()\n\ndef create_charts_for_metric(metric_name):\n cols = [f'{metric_name}_short_term', f'{metric_name}_medium_term', f'{metric_name}_long_term']\n title_name = metric_name.replace('_', ' ').title()\n \n # Chart 1: Average usage\n grouped = df.groupby('category')[cols].mean().reset_index()\n melted = grouped.melt(id_vars='category', var_name='timeframe', value_name='value')\n melted['category_label'] = melted['category'].map(category_labels)\n \n fig1 = px.bar(\n melted,\n x='category_label',\n y='value',\n color='timeframe',\n barmode='group',\n title=f'{title_name} Usage by Churn Category and Timeframe',\n category_orders={'category_label': category_order}\n )\n fig1.update_layout(\n xaxis_title='Category',\n yaxis_title='Average Usage',\n legend_title='Timeframe'\n )\n fig1.show()\n \n # Chart 2: Percentage of users with at least one\n counts = df.groupby('category')[cols].apply(lambda x: (x > 0).sum()).reset_index()\n counts_melted = counts.melt(id_vars='category', var_name='timeframe', value_name='count')\n counts_melted['total'] = counts_melted['category'].map(category_totals)\n counts_melted['percentage'] = (counts_melted['count'] / counts_melted['total']) * 100\n counts_melted['category_label'] = counts_melted['category'].map(category_labels)\n \n fig2 = px.bar(\n counts_melted,\n x='category_label',\n y='percentage',\n color='timeframe',\n barmode='group',\n title=f'% of Users with At Least One {title_name} by Churn Category and Timeframe',\n category_orders={'category_label': category_order}\n )\n fig2.update_layout(\n xaxis_title='Category',\n yaxis_title='% of Users',\n legend_title='Timeframe'\n )\n fig2.show()\n\n# Generate all charts\nfor metric in metrics:\n create_charts_for_metric(metric)",
|
||||||
|
"metadata": {},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": "# Generate cumulative charts for all metrics\ndef create_cumulative_charts_for_metric(metric_name):\n cols = [f'{metric_name}_short_term', f'{metric_name}_medium_term', f'{metric_name}_long_term']\n title_name = metric_name.replace('_', ' ').title()\n \n # Chart 1: Cumulative average usage\n grouped = df.groupby('category')[cols].mean().reset_index()\n # Make cumulative\n grouped['cumulative_short'] = grouped[cols[0]]\n grouped['cumulative_medium'] = grouped[cols[0]] + grouped[cols[1]]\n grouped['cumulative_long'] = grouped[cols[0]] + grouped[cols[1]] + grouped[cols[2]]\n \n cumulative_cols = ['cumulative_short', 'cumulative_medium', 'cumulative_long']\n melted = grouped.melt(id_vars='category', value_vars=cumulative_cols, var_name='timeframe', value_name='value')\n melted['category_label'] = melted['category'].map(category_labels)\n \n timeframe_labels_cumulative = {\n 'cumulative_short': 'Through trial',\n 'cumulative_medium': 'Through 90 days',\n 'cumulative_long': 'Through 6 months'\n }\n melted['timeframe_label'] = melted['timeframe'].map(timeframe_labels_cumulative)\n \n fig1 = px.bar(\n melted,\n x='category_label',\n y='value',\n color='timeframe_label',\n barmode='group',\n title=f'{title_name} Cumulative Usage by Churn Category',\n category_orders={\n 'category_label': category_order,\n 'timeframe_label': ['Through trial', 'Through 90 days', 'Through 6 months']\n }\n )\n fig1.update_layout(\n xaxis_title='Category',\n yaxis_title='Cumulative Average Usage',\n legend_title='Timeframe'\n )\n fig1.show()\n \n # Chart 2: Cumulative percentage of users with at least one\n # Check if user used feature in ANY period up to that point (not sum of counts)\n df['_cumul_short'] = df[cols[0]] > 0\n df['_cumul_medium'] = (df[cols[0]] > 0) | (df[cols[1]] > 0)\n df['_cumul_long'] = (df[cols[0]] > 0) | (df[cols[1]] > 0) | (df[cols[2]] > 0)\n \n counts = df.groupby('category')[['_cumul_short', '_cumul_medium', '_cumul_long']].sum().reset_index()\n counts.columns = ['category', 'cumulative_short', 'cumulative_medium', 'cumulative_long']\n \n counts_melted = counts.melt(id_vars='category', value_vars=cumulative_cols, var_name='timeframe', value_name='count')\n counts_melted['total'] = counts_melted['category'].map(category_totals)\n counts_melted['percentage'] = (counts_melted['count'] / counts_melted['total']) * 100\n counts_melted['category_label'] = counts_melted['category'].map(category_labels)\n counts_melted['timeframe_label'] = counts_melted['timeframe'].map(timeframe_labels_cumulative)\n \n fig2 = px.bar(\n counts_melted,\n x='category_label',\n y='percentage',\n color='timeframe_label',\n barmode='group',\n title=f'Cumulative % of Users with At Least One {title_name} by Churn Category',\n category_orders={\n 'category_label': category_order,\n 'timeframe_label': ['Through trial', 'Through 90 days', 'Through 6 months']\n }\n )\n fig2.update_layout(\n xaxis_title='Category',\n yaxis_title='Cumulative % of Users',\n legend_title='Timeframe'\n )\n fig2.show()\n\n# Generate all cumulative charts\nfor metric in metrics:\n create_cumulative_charts_for_metric(metric)",
|
||||||
|
"metadata": {},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.13.5"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
2579
poetry.lock
generated
Normal file
2579
poetry.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
16
pyproject.toml
Normal file
16
pyproject.toml
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
[tool.poetry]
|
||||||
|
name = "jupyter-analysis"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Jupyter notebook project for data analysis and charting"
|
||||||
|
authors = ["Justin"]
|
||||||
|
package-mode = false
|
||||||
|
|
||||||
|
[tool.poetry.dependencies]
|
||||||
|
python = "^3.13"
|
||||||
|
jupyter = "^1.1.1"
|
||||||
|
pandas = "^3.0.0"
|
||||||
|
plotly = "^6.0.0"
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
||||||
|
build-backend = "poetry.core.masonry.api"
|
||||||
Reference in New Issue
Block a user