{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Setup and initialization\n", "import json\n", "import pandas as pd\n", "import plotly.express as px\n", "import plotly.graph_objects as go\n", "import numpy as np\n", "\n", "# Load CSV file into a raw dataframe\n", "df = pd.read_csv('churn.csv')\n", "\n", "# Pull some quick stats\n", "category_totals = df.groupby('category').size()\n", "\n", "user_category_labels = {\n", " 'quick-exit': 'Free trial only',\n", " 'fair-trial': '74 day churn',\n", " 'short-termer': '6 month churn',\n", " 'long-termer': '> 6 months active'\n", "}\n", "metric_timeframe_labels = {\n", " 'short term': 'During free trial',\n", " 'medium term': 'After trial, before 90 days',\n", " 'long term': 'After 90 days, first 6 months'\n", "}\n", "\n", "category_order = {\n", " 'category': list(user_category_labels.keys())\n", "}\n", "\n", "def metric_label(metric_key):\n", " parts = metric_key.split('_')\n", " if \"term\" in parts:\n", " timeframe = metric_timeframe_labels[\" \".join(parts[-2:])]\n", " name = \" \".join(parts[0:-2]).title()\n", " else:\n", " timeframe = 'Lifetime'\n", " name = parts.join(\" \").title()\n", " return f\"{name}: {timeframe}\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Weekly data processing\n", "max_weeks = 30\n", "\n", "def weekly_averages(metric):\n", " records = []\n", " metric_col = f\"{metric}_weekly_counts\"\n", " for category in df['category'].unique():\n", " cat_df = df[df['category'] == category]\n", " num_users = len(cat_df)\n", " \n", " weekly_totals = [0] * max_weeks\n", " \n", " for value in cat_df[metric_col]:\n", " if pd.notna(value) and value != '{}':\n", " try:\n", " counts_dict = json.loads(value)\n", " for key, count in counts_dict.items():\n", " week_num = int(key.replace('week_', ''))\n", " if week_num < max_weeks:\n", " weekly_totals[week_num] += count\n", " except (json.JSONDecodeError, ValueError):\n", " pass\n", " \n", " # Calculate cumulative averages (cumulative total / number of users)\n", " cumulative = np.cumsum(weekly_totals)\n", " cumulative_avg = cumulative / num_users if num_users > 0 else 0\n", " for week in range(max_weeks):\n", " records.append({\n", " 'category': user_category_labels[category],\n", " 'week': week,\n", " 'cumulative_avg': cumulative_avg[week]\n", " })\n", " return records" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Averages chart renderer\n", "def averages_chart(metric_name, title):\n", " user_categories_to_include = ['quick-exit', 'fair-trial', 'short-termer', 'active-user']\n", " metrics_to_include = [f'{metric_name}_short_term', f'{metric_name}_medium_term', f'{metric_name}_long_term']\n", " metric_labels = [metric_label(key) for key in metrics_to_include]\n", " # Get our averages\n", " grouped = df.groupby('category')[metrics_to_include].mean().reset_index()\n", " # Pivot the data to a form Plotly will understand\n", " melted = grouped.melt(id_vars='category', var_name='timeframe', value_name='value')\n", " # Set up the basics of the chart\n", " bar_chart = px.bar(\n", " melted,\n", " x='category',\n", " y='value',\n", " color='timeframe',\n", " barmode='group',\n", " title=title,\n", " category_orders=category_order,\n", " labels=metric_labels\n", " )\n", " # Make some UX tweaks\n", " bar_chart.update_layout(\n", " xaxis_title='User Group',\n", " yaxis_title='Average Created',\n", " legend_title='Creation Timeframe'\n", " )\n", " bar_chart.update_xaxes(\n", " ticktext=list(user_category_labels.values()),\n", " tickvals=list(user_category_labels.keys())\n", " )\n", " for idx, name in enumerate(metric_labels):\n", " bar_chart.data[idx].name = name\n", " # Go!!!\n", " \n", " bar_chart.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Cumulative averages chart renderer\n", "def cumulative_averages_chart(metric_name, title):\n", " user_categories_to_include = ['quick-exit', 'fair-trial', 'short-termer', 'active-user']\n", " metrics_to_include = [f'{metric_name}_short_term', f'{metric_name}_medium_term', f'{metric_name}_long_term']\n", " metric_labels = [metric_label(key) for key in metrics_to_include]\n", " # Get our averages\n", " grouped = df.groupby('category')[metrics_to_include].mean().reset_index()\n", " # Make each column represent itself plus the total of its predecessors to represet a \"snapshot\" at that point.\n", " for i in range(1, len(metrics_to_include)):\n", " grouped[metrics_to_include[i]] += grouped[metrics_to_include[i - 1]]\n", " # Pivot the data to a form Plotly will understand\n", " melted = grouped.melt(id_vars='category', var_name='timeframe', value_name='value')\n", " # Set up the basics of the chart\n", " bar_chart = px.bar(\n", " melted,\n", " x='category',\n", " y='value',\n", " color='timeframe',\n", " barmode='group',\n", " title=title,\n", " category_orders=category_order,\n", " labels=metric_labels\n", " )\n", " # Make some UX tweaks\n", " bar_chart.update_layout(\n", " xaxis_title='User Group',\n", " yaxis_title='Average Created',\n", " legend_title='Creation Timeframe'\n", " )\n", " bar_chart.update_xaxes(\n", " ticktext=list(user_category_labels.values()),\n", " tickvals=list(user_category_labels.keys())\n", " )\n", " for idx, name in enumerate(metric_labels):\n", " bar_chart.data[idx].name = name\n", " # Go!!!\n", " \n", " bar_chart.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Weekly averages line chart renderer\n", "def weekly_averages_chart(metric_name, title):\n", " chart_data = pd.DataFrame(weekly_averages(metric_name))\n", " \n", " # Plot\n", " line_chart = px.line(\n", " chart_data,\n", " x='week',\n", " y='cumulative_avg',\n", " color='category',\n", " title=title,\n", " category_orders=category_order,\n", " labels={'week': 'Week', 'cumulative_avg': 'Average Created', 'category': 'User Group'},\n", " markers=True\n", " )\n", " line_chart.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "averages_chart('booking_forms', 'Average # of Booking Forms Created')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cumulative_averages_chart('booking_forms', 'Cumulative average # of Booking Forms Created')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "weekly_averages_chart('booking_forms', 'Cumulative average # of Booking Forms Created')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "jupyter": { "source_hidden": true } }, "outputs": [], "source": [ "averages_chart('bookings', 'Average # of Bookings Created')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cumulative_averages_chart('bookings', 'Cumulative average # of Bookings Created')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "weekly_averages_chart('bookings', 'Cumulative average # of Bookings Created')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "jupyter": { "source_hidden": true } }, "outputs": [], "source": [ "averages_chart('employees', 'Average # of Employees Created')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cumulative_averages_chart('employees', 'Cumulative average # of Employees Created')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "weekly_averages_chart('employee', 'Cumulative average # of Employeess Created')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "jupyter": { "source_hidden": true } }, "outputs": [], "source": [ "averages_chart('contacts', 'Average # of Contacts Created')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cumulative_averages_chart('contacts', 'Cumulative average # of Contacts Created')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "weekly_averages_chart('contacts', 'Cumulative average # of Contacts Created')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "averages_chart('emails', 'Average # of Emails Sent')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cumulative_averages_chart('emails', 'Cumulative average # of Emails Sent')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "weekly_averages_chart('emails', 'Cumulative average # of Emails Sent')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "averages_chart('sms', 'Average # of SMSs Sent')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cumulative_averages_chart('sms', 'Cumulative average # of SMSs Sent')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "weekly_averages_chart('sms', 'Cumulative average # of SMSs Sent')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "jupyter": { "source_hidden": true } }, "outputs": [], "source": [ "averages_chart('appointments', 'Average # of Appointments Created')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cumulative_averages_chart('appointments', 'Cumulative average # of Appointments Created')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "weekly_averages_chart('appointments', 'Cumulative average # of Appointments Created')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "jupyter": { "source_hidden": true } }, "outputs": [], "source": [ "averages_chart('checklists', 'Average # of Checklists Created')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cumulative_averages_chart('checklists', 'Cumulative average # of Checklists Created')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "weekly_averages_chart('checklists', 'Cumulative average # of Checklists Created')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "jupyter": { "source_hidden": true } }, "outputs": [], "source": [ "averages_chart('checklists_filled', 'Average # of Checklists Filled')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cumulative_averages_chart('checklists_filled', 'Cumulative average # of Checklists Filled')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "weekly_averages_chart('checklists_filled', 'Cumulative average # of Checklists Filled')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.5" } }, "nbformat": 4, "nbformat_minor": 4 }