Files
2026-02-01_churn/churn-analysis.ipynb

500 lines
14 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Setup and initialization\n",
"import json\n",
"import pandas as pd\n",
"import plotly.express as px\n",
"import plotly.graph_objects as go\n",
"import numpy as np\n",
"\n",
"# Load CSV file into a raw dataframe\n",
"df = pd.read_csv('churn.csv')\n",
"\n",
"# Pull some quick stats\n",
"category_totals = df.groupby('category').size()\n",
"\n",
"user_category_labels = {\n",
" 'quick-exit': 'Free trial only',\n",
" 'fair-trial': '74 day churn',\n",
" 'short-termer': '6 month churn',\n",
" 'long-termer': '> 6 months active'\n",
"}\n",
"metric_timeframe_labels = {\n",
" 'short term': 'During free trial',\n",
" 'medium term': 'After trial, before 90 days',\n",
" 'long term': 'After 90 days, first 6 months'\n",
"}\n",
"\n",
"category_order = {\n",
" 'category': list(user_category_labels.keys())\n",
"}\n",
"\n",
"def metric_label(metric_key):\n",
" parts = metric_key.split('_')\n",
" if \"term\" in parts:\n",
" timeframe = metric_timeframe_labels[\" \".join(parts[-2:])]\n",
" name = \" \".join(parts[0:-2]).title()\n",
" else:\n",
" timeframe = 'Lifetime'\n",
" name = parts.join(\" \").title()\n",
" return f\"{name}: {timeframe}\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Weekly data processing\n",
"max_weeks = 30\n",
"\n",
"def weekly_averages(metric):\n",
" records = []\n",
" metric_col = f\"{metric}_weekly_counts\"\n",
" for category in df['category'].unique():\n",
" cat_df = df[df['category'] == category]\n",
" num_users = len(cat_df)\n",
" \n",
" weekly_totals = [0] * max_weeks\n",
" \n",
" for value in cat_df[metric_col]:\n",
" if pd.notna(value) and value != '{}':\n",
" try:\n",
" counts_dict = json.loads(value)\n",
" for key, count in counts_dict.items():\n",
" week_num = int(key.replace('week_', ''))\n",
" if week_num < max_weeks:\n",
" weekly_totals[week_num] += count\n",
" except (json.JSONDecodeError, ValueError):\n",
" pass\n",
" \n",
" # Calculate cumulative averages (cumulative total / number of users)\n",
" cumulative = np.cumsum(weekly_totals)\n",
" cumulative_avg = cumulative / num_users if num_users > 0 else 0\n",
" for week in range(max_weeks):\n",
" records.append({\n",
" 'category': user_category_labels[category],\n",
" 'week': week,\n",
" 'cumulative_avg': cumulative_avg[week]\n",
" })\n",
" return records"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Averages chart renderer\n",
"def averages_chart(metric_name, title):\n",
" user_categories_to_include = ['quick-exit', 'fair-trial', 'short-termer', 'active-user']\n",
" metrics_to_include = [f'{metric_name}_short_term', f'{metric_name}_medium_term', f'{metric_name}_long_term']\n",
" metric_labels = [metric_label(key) for key in metrics_to_include]\n",
" # Get our averages\n",
" grouped = df.groupby('category')[metrics_to_include].mean().reset_index()\n",
" # Pivot the data to a form Plotly will understand\n",
" melted = grouped.melt(id_vars='category', var_name='timeframe', value_name='value')\n",
" # Set up the basics of the chart\n",
" bar_chart = px.bar(\n",
" melted,\n",
" x='category',\n",
" y='value',\n",
" color='timeframe',\n",
" barmode='group',\n",
" title=title,\n",
" category_orders=category_order,\n",
" labels=metric_labels\n",
" )\n",
" # Make some UX tweaks\n",
" bar_chart.update_layout(\n",
" xaxis_title='User Group',\n",
" yaxis_title='Average Created',\n",
" legend_title='Creation Timeframe'\n",
" )\n",
" bar_chart.update_xaxes(\n",
" ticktext=list(user_category_labels.values()),\n",
" tickvals=list(user_category_labels.keys())\n",
" )\n",
" for idx, name in enumerate(metric_labels):\n",
" bar_chart.data[idx].name = name\n",
" # Go!!!\n",
" \n",
" bar_chart.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Cumulative averages chart renderer\n",
"def cumulative_averages_chart(metric_name, title):\n",
" user_categories_to_include = ['quick-exit', 'fair-trial', 'short-termer', 'active-user']\n",
" metrics_to_include = [f'{metric_name}_short_term', f'{metric_name}_medium_term', f'{metric_name}_long_term']\n",
" metric_labels = [metric_label(key) for key in metrics_to_include]\n",
" # Get our averages\n",
" grouped = df.groupby('category')[metrics_to_include].mean().reset_index()\n",
" # Make each column represent itself plus the total of its predecessors to represet a \"snapshot\" at that point.\n",
" for i in range(1, len(metrics_to_include)):\n",
" grouped[metrics_to_include[i]] += grouped[metrics_to_include[i - 1]]\n",
" # Pivot the data to a form Plotly will understand\n",
" melted = grouped.melt(id_vars='category', var_name='timeframe', value_name='value')\n",
" # Set up the basics of the chart\n",
" bar_chart = px.bar(\n",
" melted,\n",
" x='category',\n",
" y='value',\n",
" color='timeframe',\n",
" barmode='group',\n",
" title=title,\n",
" category_orders=category_order,\n",
" labels=metric_labels\n",
" )\n",
" # Make some UX tweaks\n",
" bar_chart.update_layout(\n",
" xaxis_title='User Group',\n",
" yaxis_title='Average Created',\n",
" legend_title='Creation Timeframe'\n",
" )\n",
" bar_chart.update_xaxes(\n",
" ticktext=list(user_category_labels.values()),\n",
" tickvals=list(user_category_labels.keys())\n",
" )\n",
" for idx, name in enumerate(metric_labels):\n",
" bar_chart.data[idx].name = name\n",
" # Go!!!\n",
" \n",
" bar_chart.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Weekly averages line chart renderer\n",
"def weekly_averages_chart(metric_name, title):\n",
" chart_data = pd.DataFrame(weekly_averages(metric_name))\n",
" \n",
" # Plot\n",
" line_chart = px.line(\n",
" chart_data,\n",
" x='week',\n",
" y='cumulative_avg',\n",
" color='category',\n",
" title=title,\n",
" category_orders=category_order,\n",
" labels={'week': 'Week', 'cumulative_avg': 'Average Created', 'category': 'User Group'},\n",
" markers=True\n",
" )\n",
" line_chart.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"averages_chart('booking_forms', 'Average # of Booking Forms Created')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cumulative_averages_chart('booking_forms', 'Cumulative average # of Booking Forms Created')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"weekly_averages_chart('booking_forms', 'Cumulative average # of Booking Forms Created')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": true
}
},
"outputs": [],
"source": [
"averages_chart('bookings', 'Average # of Bookings Created')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cumulative_averages_chart('bookings', 'Cumulative average # of Bookings Created')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"weekly_averages_chart('bookings', 'Cumulative average # of Bookings Created')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": true
}
},
"outputs": [],
"source": [
"averages_chart('employees', 'Average # of Employees Created')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cumulative_averages_chart('employees', 'Cumulative average # of Employees Created')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"weekly_averages_chart('employee', 'Cumulative average # of Employeess Created')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": true
}
},
"outputs": [],
"source": [
"averages_chart('contacts', 'Average # of Contacts Created')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cumulative_averages_chart('contacts', 'Cumulative average # of Contacts Created')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"weekly_averages_chart('contacts', 'Cumulative average # of Contacts Created')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"averages_chart('emails', 'Average # of Emails Sent')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cumulative_averages_chart('emails', 'Cumulative average # of Emails Sent')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"weekly_averages_chart('emails', 'Cumulative average # of Emails Sent')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"averages_chart('sms', 'Average # of SMSs Sent')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cumulative_averages_chart('sms', 'Cumulative average # of SMSs Sent')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"weekly_averages_chart('sms', 'Cumulative average # of SMSs Sent')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": true
}
},
"outputs": [],
"source": [
"averages_chart('appointments', 'Average # of Appointments Created')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cumulative_averages_chart('appointments', 'Cumulative average # of Appointments Created')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"weekly_averages_chart('appointments', 'Cumulative average # of Appointments Created')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": true
}
},
"outputs": [],
"source": [
"averages_chart('checklists', 'Average # of Checklists Created')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cumulative_averages_chart('checklists', 'Cumulative average # of Checklists Created')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"weekly_averages_chart('checklists', 'Cumulative average # of Checklists Created')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": true
}
},
"outputs": [],
"source": [
"averages_chart('checklists_filled', 'Average # of Checklists Filled')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cumulative_averages_chart('checklists_filled', 'Cumulative average # of Checklists Filled')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"weekly_averages_chart('checklists_filled', 'Cumulative average # of Checklists Filled')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}