First round of fiddling

2026-02-02 16:26:10 -05:00
commit a253955528
9 changed files with 16042 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,46 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Virtual environments
+.venv/
+venv/
+ENV/
+
+# Jupyter
+.ipynb_checkpoints/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Environment
+.env
+.python-version
+
+# Data (uncomment if you want to ignore data files)
+# *.csv
--- a/2026-01-02_query-01-booking-people.sql
+++ b/2026-01-02_query-01-booking-people.sql
@ -0,0 +1,146 @@
+-- First establish some base parameters for our users
+WITH config AS (
+    SELECT
+        14 AS short_term,
+        90 AS medium_term,
+        360 AS long_term,
+        CAST('2023-01-01' AS timestamp) AS date_cutoff
+),
+-- Then establish a standard core set of user data
+users_with_churn_stats AS (
+    SELECT id, active, billing_state,
+           DATE_TRUNC('month', created_at) AS month_created,
+           created_at, free_trial_ends_at, updated_at,
+           updated_at::date - created_at::date AS lifespan
+    FROM users, config
+    -- We'll only look at newish users.
+    WHERE created_at >= config.date_cutoff
+),
+-- Next use that core data to extract the records we want to analyze
+-- and categorize them by date and status
+users_to_examine AS (
+    -- First the users who exited just after the trial
+    SELECT 'quick-exit' AS category, uwcs.*
+    FROM users_with_churn_stats uwcs, config
+    WHERE billing_state = 'CHURNED'
+      AND lifespan <= config.short_term
+
+    UNION
+
+    -- Next, users who gave the tools some time but still left
+    SELECT 'fair-trial' AS category, uwcs.*
+    FROM users_with_churn_stats uwcs, config
+    WHERE billing_state = 'CHURNED'
+      AND lifespan < config.medium_term
+      AND lifespan > config.short_term
+
+    UNION
+
+    -- Then some longer-time users who still churned.
+    -- We'll cut this off after 6 months since afterwards
+    -- company closures, etc. could factor in.
+
+    SELECT 'short-termer' AS category, uwcs.*
+    FROM users_with_churn_stats uwcs, config
+    WHERE billing_state = 'CHURNED'
+      AND lifespan >= config.medium_term
+      AND lifespan < config.long_term
+
+    UNION
+
+    -- Finally, people who are still kicking.
+    -- This cohort is going to be a little wrong for older
+    -- dates since it only includes those who are STILL around.
+    SELECT 'active-user' AS category, uwcs.*
+    FROM users_with_churn_stats uwcs, config
+    WHERE BILLING_STATE != 'CHURNED'
+		AND lifespan >= config.long_term
+),
+-- Well add booking forms created by era
+booking_form_counts AS (
+    SELECT bf.user_id,
+           SUM(CASE WHEN created_age < config.short_term THEN 1 ELSE 0 END) AS short_term,
+           SUM(CASE WHEN created_age >= config.short_term AND created_age < config.medium_term THEN 1 ELSE 0 END) AS medium_term,
+           SUM(CASE WHEN created_age >= config.medium_term AND created_age < config.long_term THEN 1 ELSE 0 END) AS long_term,
+           COUNT(*) AS total
+    FROM config, (
+        SELECT booking_forms.user_id, booking_forms.created_at::date - users.created_at::date AS created_age
+        FROM config, booking_forms JOIN users ON booking_forms.user_id = users.id
+        WHERE booking_forms.created_at >= config.date_cutoff
+            AND users.created_at >= config.date_cutoff
+    ) bf
+    GROUP BY bf.user_id
+),
+-- Bookings made by era
+booking_counts AS (
+   SELECT bk.user_id,
+       SUM(CASE WHEN created_age < config.short_term THEN 1 ELSE 0 END) AS short_term,
+       SUM(CASE WHEN created_age >= config.short_term AND created_age < config.medium_term THEN 1 ELSE 0 END) AS medium_term,
+       SUM(CASE WHEN created_age >= config.medium_term AND created_age < config.long_term THEN 1 ELSE 0 END) AS long_term,
+       COUNT(*) AS total
+    FROM config, (
+        SELECT booking_forms.user_id,  bookings.created_at::date - users.created_at::date AS created_age
+        FROM config, booking_forms
+            JOIN users ON booking_forms.user_id = users.id
+            JOIN bookings ON bookings.booking_form_id = booking_forms.id
+        WHERE booking_forms.created_at >= config.date_cutoff
+            AND bookings.created_at >= config.date_cutoff
+            AND users.created_at >= config.date_cutoff
+    ) bk
+    GROUP BY bk.user_id
+),
+-- Employees added by era
+employee_counts AS (
+    SELECT empl.user_id,
+           SUM(CASE WHEN created_age < config.short_term THEN 1 ELSE 0 END) AS short_term,
+           SUM(CASE WHEN created_age >= config.short_term AND created_age < config.medium_term THEN 1 ELSE 0 END) AS medium_term,
+           SUM(CASE WHEN created_age >= config.medium_term AND created_age < config.long_term THEN 1 ELSE 0 END) AS long_term,
+           COUNT(*) AS total
+    FROM config, (
+        SELECT employees.user_id,  employees.created_at::date - users.created_at::date AS created_age
+        FROM config, employees JOIN users ON employees.user_id = users.id
+        WHERE employees.created_at >= config.date_cutoff
+            AND users.created_at >= config.date_cutoff
+    ) empl
+    GROUP BY empl.user_id
+),
+-- Contacts created by era
+contact_counts AS (
+    SELECT cust.user_id,
+           SUM(CASE WHEN created_age < config.short_term THEN 1 ELSE 0 END) AS short_term,
+           SUM(CASE WHEN created_age >= config.short_term AND created_age < config.medium_term THEN 1 ELSE 0 END) AS medium_term,
+           SUM(CASE WHEN created_age >= config.medium_term AND created_age < config.long_term THEN 1 ELSE 0 END) AS long_term,
+           COUNT(*) AS total
+    FROM config, (
+        SELECT customers.user_id,  customers.created_at::date - users.created_at::date AS created_age
+        FROM config, customers JOIN users ON customers.user_id = users.id
+        WHERE customers.created_at >= config.date_cutoff
+            AND users.created_at >= config.date_cutoff
+    ) cust
+    GROUP BY cust.user_id
+),
+-- Finally, we'll flatten it all out into a repurposable flat table
+SELECT users_to_examine.id,
+       users_to_examine.category,
+       COALESCE(booking_form_counts.short_term, 0) AS booking_forms_short_term,
+       COALESCE(booking_form_counts.medium_term, 0) AS booking_forms_medium_term,
+       COALESCE(booking_form_counts.long_term, 0) AS booking_forms_long_term,
+       COALESCE(booking_form_counts.total, 0) AS booking_forms,
+       COALESCE(booking_counts.short_term, 0) AS bookings_short_term,
+       COALESCE(booking_counts.medium_term, 0) AS bookings_medium_term,
+       COALESCE(booking_counts.long_term, 0) AS bookings_long_term,
+       COALESCE(booking_counts.total, 0) AS bookings,
+       COALESCE(employee_counts.short_term, 0) AS employees_short_term,
+       COALESCE(employee_counts.medium_term, 0) AS employees_medium_term,
+       COALESCE(employee_counts.long_term, 0) AS employees_long_term,
+       COALESCE(employee_counts.total, 0) AS employees,
+       COALESCE(contact_counts.short_term, 0) AS contacts_short_term,
+       COALESCE(contact_counts.medium_term, 0) AS contacts_medium_term,
+       COALESCE(contact_counts.long_term, 0) AS contacts_long_term,
+       COALESCE(contact_counts.total, 0) AS contacts
+FROM users_to_examine
+    LEFT OUTER JOIN booking_form_counts ON booking_form_counts.user_id = users_to_examine.id
+    LEFT OUTER JOIN booking_counts ON booking_counts.user_id = users_to_examine.id
+    LEFT OUTER JOIN employee_counts ON employee_counts.user_id = users_to_examine.id
+    LEFT OUTER JOIN contact_counts ON contact_counts.user_id = users_to_examine.id
+;
--- a/2026-01-02_query-02-email.sql
+++ b/2026-01-02_query-02-email.sql
@ -0,0 +1,83 @@
+-- First establish some base parameters for our users
+WITH config AS (
+    SELECT
+        14 AS short_term,
+        90 AS medium_term,
+        360 AS long_term,
+        CAST('2023-01-01' AS timestamp) AS date_cutoff
+),
+-- Then establish a standard core set of user data
+users_with_churn_stats AS (
+    SELECT id, active, billing_state,
+           DATE_TRUNC('month', created_at) AS month_created,
+           created_at, free_trial_ends_at, updated_at,
+           updated_at::date - created_at::date AS lifespan
+    FROM users, config
+    -- We'll only look at newish users.
+    WHERE created_at >= config.date_cutoff
+),
+-- Next use that core data to extract the records we want to analyze
+-- and categorize them by date and status
+users_to_examine AS (
+    -- First the users who exited just after the trial
+    SELECT 'quick-exit' AS category, uwcs.*
+    FROM users_with_churn_stats uwcs, config
+    WHERE billing_state = 'CHURNED'
+      AND lifespan <= config.short_term
+
+    UNION
+
+    -- Next, users who gave the tools some time but still left
+    SELECT 'fair-trial' AS category, uwcs.*
+    FROM users_with_churn_stats uwcs, config
+    WHERE billing_state = 'CHURNED'
+      AND lifespan < config.medium_term
+      AND lifespan > config.short_term
+
+    UNION
+
+    -- Then some longer-time users who still churned.
+    -- We'll cut this off after 6 months since afterwards
+    -- company closures, etc. could factor in.
+
+    SELECT 'short-termer' AS category, uwcs.*
+    FROM users_with_churn_stats uwcs, config
+    WHERE billing_state = 'CHURNED'
+      AND lifespan >= config.medium_term
+      AND lifespan < config.long_term
+
+    UNION
+
+    -- Finally, people who are still kicking.
+    -- This cohort is going to be a little wrong for older
+    -- dates since it only includes those who are STILL around.
+    SELECT 'active-user' AS category, uwcs.*
+    FROM users_with_churn_stats uwcs, config
+    WHERE BILLING_STATE != 'CHURNED'
+		AND lifespan >= config.long_term
+),
+-- Emails sent by era
+email_counts AS (
+    SELECT email.user_id,
+           SUM(CASE WHEN created_age < config.short_term THEN 1 ELSE 0 END) AS short_term,
+           SUM(CASE WHEN created_age >= config.short_term AND created_age < config.medium_term THEN 1 ELSE 0 END) AS medium_term,
+           SUM(CASE WHEN created_age >= config.medium_term AND created_age < config.long_term THEN 1 ELSE 0 END) AS long_term,
+           COUNT(*) AS total
+    FROM config, (
+        SELECT sent_emails.user_id,  sent_emails.created_at::date - users.created_at::date AS created_age
+        FROM config, sent_emails JOIN users ON sent_emails.user_id = users.id
+        WHERE sent_emails.created_at >= config.date_cutoff
+            AND users.created_at >= config.date_cutoff
+    ) email
+    GROUP BY email.user_id
+)
+-- Finally, we'll flatten it all out into a repurposable flat table
+SELECT users_to_examine.id,
+       users_to_examine.category,
+       COALESCE(email_counts.short_term, 0) AS emails_short_term,
+       COALESCE(email_counts.medium_term, 0) AS emails_medium_term,
+       COALESCE(email_counts.long_term, 0) AS emails_long_term,
+       COALESCE(email_counts.total, 0) AS emails
+FROM users_to_examine
+    LEFT OUTER JOIN email_counts ON email_counts.user_id = users_to_examine.id
+;
--- a/2026-01-02_query-03-sms-appts-checklist.sql
+++ b/2026-01-02_query-03-sms-appts-checklist.sql
@ -0,0 +1,151 @@
+-- First establish some base parameters for our users
+WITH config AS (
+    SELECT
+        14 AS short_term,
+        90 AS medium_term,
+        360 AS long_term,
+        CAST('2023-01-01' AS timestamp) AS date_cutoff
+),
+-- Then establish a standard core set of user data
+users_with_churn_stats AS (
+    SELECT id, active, billing_state,
+           DATE_TRUNC('month', created_at) AS month_created,
+           created_at, free_trial_ends_at, updated_at,
+           updated_at::date - created_at::date AS lifespan
+    FROM users, config
+    -- We'll only look at newish users.
+    WHERE created_at >= config.date_cutoff
+),
+-- Next use that core data to extract the records we want to analyze
+-- and categorize them by date and status
+users_to_examine AS (
+    -- First the users who exited just after the trial
+    SELECT 'quick-exit' AS category, uwcs.*
+    FROM users_with_churn_stats uwcs, config
+    WHERE billing_state = 'CHURNED'
+      AND lifespan <= config.short_term
+
+    UNION
+
+    -- Next, users who gave the tools some time but still left
+    SELECT 'fair-trial' AS category, uwcs.*
+    FROM users_with_churn_stats uwcs, config
+    WHERE billing_state = 'CHURNED'
+      AND lifespan < config.medium_term
+      AND lifespan > config.short_term
+
+    UNION
+
+    -- Then some longer-time users who still churned.
+    -- We'll cut this off after 6 months since afterwards
+    -- company closures, etc. could factor in.
+
+    SELECT 'short-termer' AS category, uwcs.*
+    FROM users_with_churn_stats uwcs, config
+    WHERE billing_state = 'CHURNED'
+      AND lifespan >= config.medium_term
+      AND lifespan < config.long_term
+
+    UNION
+
+    -- Finally, people who are still kicking.
+    -- This cohort is going to be a little wrong for older
+    -- dates since it only includes those who are STILL around.
+    SELECT 'active-user' AS category, uwcs.*
+    FROM users_with_churn_stats uwcs, config
+    WHERE BILLING_STATE != 'CHURNED'
+		AND lifespan >= config.long_term
+),
+-- Messages sent by era
+sms_counts AS (
+    SELECT texts.user_id,
+           SUM(CASE WHEN created_age < config.short_term THEN 1 ELSE 0 END) AS short_term,
+           SUM(CASE WHEN created_age >= config.short_term AND created_age < config.medium_term THEN 1 ELSE 0 END) AS medium_term,
+           SUM(CASE WHEN created_age >= config.medium_term AND created_age < config.long_term THEN 1 ELSE 0 END) AS long_term,
+           COUNT(*) AS total
+    FROM config, (
+        SELECT sent_texts.user_id,  sent_texts.created_at::date - users.created_at::date AS created_age
+        FROM config, sent_texts JOIN users ON sent_texts.user_id = users.id
+        WHERE sent_texts.created_at >= config.date_cutoff
+            AND users.created_at >= config.date_cutoff
+    ) texts
+    GROUP BY texts.user_id
+),
+-- Appointments by era
+appointment_counts AS (
+    SELECT appts.user_id,
+           SUM(CASE WHEN created_age < config.short_term THEN 1 ELSE 0 END) AS short_term,
+           SUM(CASE WHEN created_age >= config.short_term AND created_age < config.medium_term THEN 1 ELSE 0 END) AS medium_term,
+           SUM(CASE WHEN created_age >= config.medium_term AND created_age < config.long_term THEN 1 ELSE 0 END) AS long_term,
+           COUNT(*) AS total
+    FROM config, (
+        SELECT appointments.user_id,  appointments.created_at::date - users.created_at::date AS created_age
+        FROM config, appointments JOIN users ON appointments.user_id = users.id
+        WHERE appointments.created_at >= config.date_cutoff
+            AND users.created_at >= config.date_cutoff
+    ) appts
+    GROUP BY appts.user_id
+),
+-- Checklists created by era
+checklist_counts AS (
+    SELECT checks.user_id,
+           SUM(CASE WHEN created_age < config.short_term THEN 1 ELSE 0 END) AS short_term,
+           SUM(CASE WHEN created_age >= config.short_term AND created_age < config.medium_term THEN 1 ELSE 0 END) AS medium_term,
+           SUM(CASE WHEN created_age >= config.medium_term AND created_age < config.long_term THEN 1 ELSE 0 END) AS long_term,
+           COUNT(*) AS total
+    FROM config, (
+        SELECT checklist_templates.user_id, checklists.created_at::date - users.created_at::date AS created_age
+        FROM config, checklist_templates
+            JOIN users ON checklist_templates.user_id = users.id
+            JOIN checklists ON checklists.checklist_template_id = checklist_templates.id
+        WHERE checklist_templates.created_at >= config.date_cutoff
+            AND checklists.created_at >= config.date_cutoff
+            AND users.created_at >= config.date_cutoff
+    ) checks
+    GROUP BY checks.user_id
+),
+-- Checklist events created by era
+checklist_filled_counts AS (
+    SELECT events.user_id,
+           SUM(CASE WHEN created_age < config.short_term THEN 1 ELSE 0 END) AS short_term,
+           SUM(CASE WHEN created_age >= config.short_term AND created_age < config.medium_term THEN 1 ELSE 0 END) AS medium_term,
+           SUM(CASE WHEN created_age >= config.medium_term AND created_age < config.long_term THEN 1 ELSE 0 END) AS long_term,
+           COUNT(*) AS total
+    FROM config, (
+        SELECT employees.user_id, checklist_item_events.created_at::date - users.created_at::date AS created_age
+        FROM config, employees
+            JOIN users ON employees.user_id = users.id
+            JOIN checklist_item_events ON checklist_item_events.employee_id = employees.id
+        WHERE checklist_item_events.created_at >= config.date_cutoff
+            AND checklist_item_events.created_at >= config.date_cutoff
+            AND users.created_at >= config.date_cutoff
+    ) events
+    GROUP BY events.user_id
+)
+-- Finally, we'll flatten it all out into a repurposable flat table
+SELECT users_to_examine.id,
+       users_to_examine.category,
+	   users_to_examine.month_created,
+	   users_to_examine.lifespan,
+       COALESCE(sms_counts.short_term, 0) AS sms_short_term,
+       COALESCE(sms_counts.medium_term, 0) AS sms_medium_term,
+       COALESCE(sms_counts.long_term, 0) AS sms_long_term,
+       COALESCE(sms_counts.total, 0) AS sms,
+       COALESCE(appointment_counts.short_term, 0) AS appointments_short_term,
+       COALESCE(appointment_counts.medium_term, 0) AS appointments_medium_term,
+       COALESCE(appointment_counts.long_term, 0) AS appointments_long_term,
+       COALESCE(appointment_counts.total, 0) AS appointments,
+       COALESCE(checklist_counts.short_term, 0) AS checklists_short_term,
+       COALESCE(checklist_counts.medium_term, 0) AS checklists_medium_term,
+       COALESCE(checklist_counts.long_term, 0) AS checklists_long_term,
+       COALESCE(checklist_counts.total, 0) AS checklists,
+       COALESCE(checklist_filled_counts.short_term, 0) AS checklists_filled_short_term,
+       COALESCE(checklist_filled_counts.medium_term, 0) AS checklists_filled_medium_term,
+       COALESCE(checklist_filled_counts.long_term, 0) AS checklists_filled_long_term,
+       COALESCE(checklist_filled_counts.total, 0) AS checklists_filled
+FROM users_to_examine
+    LEFT OUTER JOIN sms_counts ON sms_counts.user_id = users_to_examine.id
+    LEFT OUTER JOIN appointment_counts ON appointment_counts.user_id = users_to_examine.id
+    LEFT OUTER JOIN checklist_counts ON checklist_counts.user_id = users_to_examine.id
+    LEFT OUTER JOIN checklist_filled_counts ON checklist_filled_counts.user_id = users_to_examine.id
+;
--- a/README.md
+++ b/README.md
@ -0,0 +1,15 @@
+# Jupyter Analysis
+
+## Setup
+
+```bash
+poetry install
+```
+
+## Run
+
+```bash
+poetry run jupyter notebook
+# or
+poetry run jupyter lab
+```
--- a/churn-analysis.ipynb
+++ b/churn-analysis.ipynb
@ -0,0 +1,184 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import plotly.express as px\n",
+    "import plotly.graph_objects as go\n",
+    "import numpy as np\n",
+    "\n",
+    "# Load CSV file\n",
+    "df = pd.read_csv('churn.csv')\n",
+    "\n",
+    "category_labels = {\n",
+    "    'quick-exit': 'Free trial only',\n",
+    "    'fair-trial': '74 day churn',\n",
+    "    'short-termer': '6 month churn',\n",
+    "    'active-user': 'No churn'\n",
+    "}\n",
+    "timeframe_labels = {\n",
+    "    'short term': 'During free trial',\n",
+    "    'medium term': 'After trial, before 90 days',\n",
+    "    'long term': 'After 90 days, first 6 months'\n",
+    "}\n",
+    "\n",
+    "def fix_dataset_label(k):\n",
+    "    parts = k.split('_')\n",
+    "    if \"term\" in parts:\n",
+    "        timeframe = timeframe_labels[parts[-2:].join(\" \")]\n",
+    "        name = parts[0:-2].join(\" \").title()\n",
+    "    else:\n",
+    "        timeframe = 'Lifetime'\n",
+    "        name = parts.join(\" \").title()\n",
+    "    return f\"{name}: {timeframe}\"\n",
+    "\n",
+    "def x_labels(categories):\n",
+    "    return [category_labels[cat] for cat in categories]\n",
+    "\n",
+    "def dataset_labels(columns):\n",
+    "    return [fix_dataset_label(col) for col in columns]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>booking_forms_short_term</th>\n",
+       "      <th>booking_forms_medium_term</th>\n",
+       "      <th>booking_forms_long_term</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>category</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>active-user</th>\n",
+       "      <td>0.046701</td>\n",
+       "      <td>0.041571</td>\n",
+       "      <td>0.040686</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>fair-trial</th>\n",
+       "      <td>0.268002</td>\n",
+       "      <td>0.020303</td>\n",
+       "      <td>0.000271</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>quick-exit</th>\n",
+       "      <td>0.232673</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>short-termer</th>\n",
+       "      <td>0.290954</td>\n",
+       "      <td>0.082213</td>\n",
+       "      <td>0.029034</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "              booking_forms_short_term  booking_forms_medium_term  \\\n",
+       "category                                                            \n",
+       "active-user                   0.046701                   0.041571   \n",
+       "fair-trial                    0.268002                   0.020303   \n",
+       "quick-exit                    0.232673                   0.000000   \n",
+       "short-termer                  0.290954                   0.082213   \n",
+       "\n",
+       "              booking_forms_long_term  \n",
+       "category                               \n",
+       "active-user                  0.040686  \n",
+       "fair-trial                   0.000271  \n",
+       "quick-exit                   0.000000  \n",
+       "short-termer                 0.029034  "
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "columns = ['booking_forms_short_term', 'booking_forms_medium_term', 'booking_forms_long_term']\n",
+    "categories = category_labels.keys() # All\n",
+    "\n",
+    "# Group by category and make sure they're in the correct order\n",
+    "df.groupby('category')[columns].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": "# Order categories as defined in category_labels\ncategory_order = [category_labels[k] for k in category_labels.keys()]"
+  },
+  {
+   "cell_type": "code",
+   "source": "# Generate charts for all metrics\nmetrics = [\n    'booking_forms',\n    'bookings',\n    'employees', \n    'contacts',\n    'emails',\n    'sms',\n    'appointments',\n    'checklists',\n    'checklists_filled'\n]\n\n# Get total users per category for percentage calculations\ncategory_totals = df.groupby('category').size()\n\ndef create_charts_for_metric(metric_name):\n    cols = [f'{metric_name}_short_term', f'{metric_name}_medium_term', f'{metric_name}_long_term']\n    title_name = metric_name.replace('_', ' ').title()\n    \n    # Chart 1: Average usage\n    grouped = df.groupby('category')[cols].mean().reset_index()\n    melted = grouped.melt(id_vars='category', var_name='timeframe', value_name='value')\n    melted['category_label'] = melted['category'].map(category_labels)\n    \n    fig1 = px.bar(\n        melted,\n        x='category_label',\n        y='value',\n        color='timeframe',\n        barmode='group',\n        title=f'{title_name} Usage by Churn Category and Timeframe',\n        category_orders={'category_label': category_order}\n    )\n    fig1.update_layout(\n        xaxis_title='Category',\n        yaxis_title='Average Usage',\n        legend_title='Timeframe'\n    )\n    fig1.show()\n    \n    # Chart 2: Percentage of users with at least one\n    counts = df.groupby('category')[cols].apply(lambda x: (x > 0).sum()).reset_index()\n    counts_melted = counts.melt(id_vars='category', var_name='timeframe', value_name='count')\n    counts_melted['total'] = counts_melted['category'].map(category_totals)\n    counts_melted['percentage'] = (counts_melted['count'] / counts_melted['total']) * 100\n    counts_melted['category_label'] = counts_melted['category'].map(category_labels)\n    \n    fig2 = px.bar(\n        counts_melted,\n        x='category_label',\n        y='percentage',\n        color='timeframe',\n        barmode='group',\n        title=f'% of Users with At Least One {title_name} by Churn Category and Timeframe',\n        category_orders={'category_label': category_order}\n    )\n    fig2.update_layout(\n        xaxis_title='Category',\n        yaxis_title='% of Users',\n        legend_title='Timeframe'\n    )\n    fig2.show()\n\n# Generate all charts\nfor metric in metrics:\n    create_charts_for_metric(metric)",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "source": "# Generate cumulative charts for all metrics\ndef create_cumulative_charts_for_metric(metric_name):\n    cols = [f'{metric_name}_short_term', f'{metric_name}_medium_term', f'{metric_name}_long_term']\n    title_name = metric_name.replace('_', ' ').title()\n    \n    # Chart 1: Cumulative average usage\n    grouped = df.groupby('category')[cols].mean().reset_index()\n    # Make cumulative\n    grouped['cumulative_short'] = grouped[cols[0]]\n    grouped['cumulative_medium'] = grouped[cols[0]] + grouped[cols[1]]\n    grouped['cumulative_long'] = grouped[cols[0]] + grouped[cols[1]] + grouped[cols[2]]\n    \n    cumulative_cols = ['cumulative_short', 'cumulative_medium', 'cumulative_long']\n    melted = grouped.melt(id_vars='category', value_vars=cumulative_cols, var_name='timeframe', value_name='value')\n    melted['category_label'] = melted['category'].map(category_labels)\n    \n    timeframe_labels_cumulative = {\n        'cumulative_short': 'Through trial',\n        'cumulative_medium': 'Through 90 days',\n        'cumulative_long': 'Through 6 months'\n    }\n    melted['timeframe_label'] = melted['timeframe'].map(timeframe_labels_cumulative)\n    \n    fig1 = px.bar(\n        melted,\n        x='category_label',\n        y='value',\n        color='timeframe_label',\n        barmode='group',\n        title=f'{title_name} Cumulative Usage by Churn Category',\n        category_orders={\n            'category_label': category_order,\n            'timeframe_label': ['Through trial', 'Through 90 days', 'Through 6 months']\n        }\n    )\n    fig1.update_layout(\n        xaxis_title='Category',\n        yaxis_title='Cumulative Average Usage',\n        legend_title='Timeframe'\n    )\n    fig1.show()\n    \n    # Chart 2: Cumulative percentage of users with at least one\n    # Check if user used feature in ANY period up to that point (not sum of counts)\n    df['_cumul_short'] = df[cols[0]] > 0\n    df['_cumul_medium'] = (df[cols[0]] > 0) | (df[cols[1]] > 0)\n    df['_cumul_long'] = (df[cols[0]] > 0) | (df[cols[1]] > 0) | (df[cols[2]] > 0)\n    \n    counts = df.groupby('category')[['_cumul_short', '_cumul_medium', '_cumul_long']].sum().reset_index()\n    counts.columns = ['category', 'cumulative_short', 'cumulative_medium', 'cumulative_long']\n    \n    counts_melted = counts.melt(id_vars='category', value_vars=cumulative_cols, var_name='timeframe', value_name='count')\n    counts_melted['total'] = counts_melted['category'].map(category_totals)\n    counts_melted['percentage'] = (counts_melted['count'] / counts_melted['total']) * 100\n    counts_melted['category_label'] = counts_melted['category'].map(category_labels)\n    counts_melted['timeframe_label'] = counts_melted['timeframe'].map(timeframe_labels_cumulative)\n    \n    fig2 = px.bar(\n        counts_melted,\n        x='category_label',\n        y='percentage',\n        color='timeframe_label',\n        barmode='group',\n        title=f'Cumulative % of Users with At Least One {title_name} by Churn Category',\n        category_orders={\n            'category_label': category_order,\n            'timeframe_label': ['Through trial', 'Through 90 days', 'Through 6 months']\n        }\n    )\n    fig2.update_layout(\n        xaxis_title='Category',\n        yaxis_title='Cumulative % of Users',\n        legend_title='Timeframe'\n    )\n    fig2.show()\n\n# Generate all cumulative charts\nfor metric in metrics:\n    create_cumulative_charts_for_metric(metric)",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/churn.csv
+++ b/churn.csv
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,16 @@
+[tool.poetry]
+name = "jupyter-analysis"
+version = "0.1.0"
+description = "Jupyter notebook project for data analysis and charting"
+authors = ["Justin"]
+package-mode = false
+
+[tool.poetry.dependencies]
+python = "^3.13"
+jupyter = "^1.1.1"
+pandas = "^3.0.0"
+plotly = "^6.0.0"
+
+[build-system]
+requires = ["poetry-core>=2.0.0,<3.0.0"]
+build-backend = "poetry.core.masonry.api"