nhsengland · josephwilson8-nhs · Dec 12, 2024 · Dec 12, 2024 · Dec 12, 2024 · Dec 12, 2024
diff --git a/code_your_own_pandas_pipeline/aggregations.py b/code_your_own_pandas_pipeline/aggregations.py
@@ -1,65 +1,197 @@
 """
-This modules provides function to pivot and summarize the practice level appointment data.
-"""
+This module provides functions to pivot and summarize practice-level appointment data.
 
-import pandas as pd
-from loguru import logger
+Functions:
+    pivot_practice_level_data(practice_level_data: pd.DataFrame, index: Optional[list[str]] = None, columns="APPT_STATUS", values="COUNT_OF_APPOINTMENTS", rename_columns: Optional[dict[str, str]] = None) -> pd.DataFrame:
 
-placeholder_df = pd.DataFrame()
+    summarize_monthly_appointment_status(practice_level_data: pd.DataFrame) -> pd.DataFrame:
+
+    summarize_monthly_aggregate_appointments(practice_level_pivot: pd.DataFrame, agg_cols: Optional[list[str]] = None, add_rate_cols: bool = True) -> pd.DataFrame:
+
+    batch_summarize_monthly_aggregate_appointments(practice_level_pivot: pd.DataFrame, agg_cols: Optional[list[str]] = None, add_rate_cols: bool = True) -> Dict[str, pd.DataFrame]:
+"""
 
+from typing import Dict, Optional
 
-def pivot_practice_level_data(practice_data: pd.DataFrame) -> pd.DataFrame:
+import pandas as pd
+from loguru import logger
+from tqdm import tqdm
+
+from code_your_own_pandas_pipeline.calculations import calculate_appointment_columns
+
+AGG_COLS = [
+    "GP_NAME",
+    "SUPPLIER",
+    "PCN_NAME",
+    "SUB_ICB_LOCATION_NAME",
+    "ICB_NAME",
+    "REGION_NAME",
+    "HCP_TYPE",
+    "APPT_MODE",
+    "NATIONAL_CATEGORY",
+    "TIME_BETWEEN_BOOK_AND_APPT",
+]
+
+
+def pivot_practice_level_data(
+    practice_level_data: pd.DataFrame,
+    index: Optional[list[str]] = None,
+    columns="APPT_STATUS",
+    values="COUNT_OF_APPOINTMENTS",
+    rename_columns: Optional[dict[str, str]] = None,
+) -> pd.DataFrame:
     """
     Pivot the practice level data.
 
     Parameters
     ----------
-    practice_data : pd.DataFrame
-        The practice data.
+    practice_level_data : pd.DataFrame
+        The DataFrame containing the practice level data.
+    index : list of str, optional
+        The columns to use as index for the pivot table. If None, defaults to [
+            "APPOINTMENT_MONTH_START_DATE",
+            "GP_NAME",
+            "SUPPLIER",
+            "PCN_NAME",
+            "SUB_ICB_LOCATION_NAME",
+            "ICB_NAME",
+            "REGION_NAME",
+            "HCP_TYPE",
+            "APPT_MODE",
+            "NATIONAL_CATEGORY",
+            "TIME_BETWEEN_BOOK_AND_APPT"
+        ].
+    columns : str, optional
+        The column to use for the pivot table columns, by default "APPT_STATUS".
+    values : str, optional
+        The column to use for the pivot table values, by default "COUNT_OF_APPOINTMENTS".
+    rename_columns : dict of str, str, optional
+        Dictionary to rename columns, by default None.
 
     Returns
     -------
     pd.DataFrame
-        The pivoted data.
+        The pivoted DataFrame.
     """
-    logger.info("Pivoting the practice level data.")
+    if not index:
+        index = ["APPOINTMENT_MONTH_START_DATE", *AGG_COLS]
+    if not rename_columns:
+        rename_columns = {"DNA": "DID_NOT_ATTEND", "Attended": "ATTENDED", "Unknown": "UNKNOWN"}
+
+    logger.info("Pivoting practice level data")
+    practice_level_pivot = practice_level_data.pivot(index=index, columns=columns, values=values)
+
+    practice_level_pivot = practice_level_pivot.reset_index()
+
+    practice_level_pivot = practice_level_pivot.rename(columns=rename_columns)
+
+    return practice_level_pivot
 
-    logger.warning("This function is not yet implemented.")
 
+def summarize_monthly_appointment_status(practice_level_data: pd.DataFrame) -> pd.DataFrame:
+    """
+    Summarize the monthly appointment status.
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        The DataFrame containing the appointment data.
+    date_column : str
+        The name of the column containing the date information.
+    status_column : str
+        The name of the column containing the appointment status.
 
-def summarize_monthly_gp_appointments(pivot_practice_data: pd.DataFrame) -> pd.DataFrame:
+    Returns
+    -------
+    pd.DataFrame
+        A DataFrame summarizing the count of each appointment status per month.
+    """
+    logger.info("Summarizing monthly appointment status")
+    month_and_status_appointments = (
+        practice_level_data.groupby(["APPOINTMENT_MONTH_START_DATE", "APPT_STATUS"])
+        .agg({"COUNT_OF_APPOINTMENTS": "sum"})
+        .reset_index()
+        .rename(columns={"APPT_STATUS": "Appointment Status"})
+    )
+
+    return month_and_status_appointments
+
+
+def summarize_monthly_aggregate_appointments(
+    practice_level_pivot: pd.DataFrame,
+    agg_cols: Optional[list[str]] = None,
+    add_rate_cols: bool = True,
+) -> pd.DataFrame:
     """
-    Summarize the monthly appointments by GP and Appointment Status.
+    Summarize the monthly aggregate appointments.
 
     Parameters
     ----------
-    practice_data : pd.DataFrame
-        The practice data.
+    df : pd.DataFrame
+        The DataFrame containing the appointment data.
+    date_column : str
+        The name of the column containing the date information.
+    agg_column : str
+        The name of the column to aggregate.
+    aggfunc : str or function, default 'sum'
+        Aggregation function to apply to the agg_column.
 
     Returns
     -------
     pd.DataFrame
-        The summarized data.
+        A DataFrame summarizing the aggregated values per month.
     """
-    logger.info("Summarizing the monthly GP appointments.")
+    if not agg_cols:
+        agg_cols = []
+
+    monthly_aggregate_appointments = (
+        practice_level_pivot.groupby(["APPOINTMENT_MONTH_START_DATE", *agg_cols])
+        .agg({"ATTENDED": "sum", "DID_NOT_ATTEND": "sum", "UNKNOWN": "sum"})
+        .reset_index()
+    )
 
-    logger.warning("This function is not yet implemented.")
+    if add_rate_cols:
+        monthly_aggregate_appointments = calculate_appointment_columns(
+            monthly_aggregate_appointments
+        )
 
+    return monthly_aggregate_appointments
 
-def summarize_monthly_region_appointments(pivot_practice_data: pd.DataFrame) -> pd.DataFrame:
+
+def batch_summarize_monthly_aggregate_appointments(
+    practice_level_pivot: pd.DataFrame,
+    agg_cols: Optional[list[str]] = None,
+    add_rate_cols: bool = True,
+) -> Dict[str, pd.DataFrame]:
     """
-    Summarize the monthly appointments by Region and Appointment Status.
+    Batch summarize monthly aggregate appointments.
 
     Parameters
     ----------
-    practice_data : pd.DataFrame
-        The practice data.
+    practice_level_pivot : pd.DataFrame
+        DataFrame containing practice level pivot data.
+    agg_cols : list of str, optional
+        List of columns to aggregate. If None, defaults to AGG_COLS.
+    add_rate_cols : bool, optional
+        Whether to add rate columns to the summary DataFrame, by default True.
 
     Returns
     -------
-    pd.DataFrame
-        The summarized data.
+    Dict[str, pd.DataFrame]
+        Dictionary where keys are the aggregation columns and values are the
+        summarized DataFrames for each aggregation column.
     """
-    logger.info("Summarizing the monthly region appointments.")
-
-    logger.warning("This function is not yet implemented.")
+    if agg_cols is None:
+        agg_cols = AGG_COLS
+
+    monthly_aggregate_appointments = {}
+    logger.info("Batch summarizing monthly aggregate appointments")
+    tqdm_agg_cols = tqdm(agg_cols)
+    for agg_col in tqdm_agg_cols:
+        tqdm_agg_cols.set_description_str(f"Creating monthly appointment summaries for {agg_col}")
+        summary_df = summarize_monthly_aggregate_appointments(
+            practice_level_pivot, [agg_col], add_rate_cols
+        )
+        monthly_aggregate_appointments[agg_col] = summary_df
+
+    return monthly_aggregate_appointments
diff --git a/code_your_own_pandas_pipeline/calculations.py b/code_your_own_pandas_pipeline/calculations.py
@@ -0,0 +1,117 @@
+"""
+This module provides functions to calculate appointment statistics for a given practice level DataFrame.
+
+Functions
+---------
+calculate_total_appointments(practice_level_pivot: pd.DataFrame) -> pd.DataFrame
+
+calculate_did_not_attend_rate(practice_level_pivot: pd.DataFrame) -> pd.DataFrame
+
+calculate_attended_rate(practice_level_pivot: pd.DataFrame) -> pd.DataFrame
+
+calculate_appointment_columns(practice_level_pivot: pd.DataFrame) -> pd.DataFrame
+
+"""
+
+from typing import Optional
+
+import pandas as pd
+from matplotlib import axis
+
+# from loguru import logger
+
+
+def calculate_total_appointments(
+    practice_level_pivot: pd.DataFrame, appointment_cols: Optional[list[str]] = None
+) -> pd.DataFrame:
+    """
+    Calculate the total number of appointments by summing attended and did not attend appointments.
+
+    Parameters
+    ----------
+    practice_level_pivot : pd.DataFrame
+        A DataFrame containing columns "ATTENDED" and "DID_NOT_ATTEND" representing the number of attended and missed appointments respectively.
+
+    Returns
+    -------
+    pd.DataFrame
+        The input DataFrame with an additional column "TOTAL_APPOINTMENTS" which is the sum of "ATTENDED" and "DID_NOT_ATTEND".
+    """
+    if not appointment_cols:
+        appointment_cols = ["ATTENDED", "DID_NOT_ATTEND", "UNKNOWN"]
+
+    # logger.info("Calculating total appointments")
+    practice_level_pivot[appointment_cols] = practice_level_pivot[appointment_cols].fillna(
+        0, inplace=False
+    )
+
+    practice_level_pivot["TOTAL_APPOINTMENTS"] = practice_level_pivot[appointment_cols].sum(axis=1)
+
+    return practice_level_pivot
+
+
+def calculate_did_not_attend_rate(practice_level_pivot) -> pd.DataFrame:
+    """
+    Calculate the rate of missed appointments.
+
+    Parameters
+    ----------
+    practice_level_pivot : pd.DataFrame
+        A DataFrame containing columns "ATTENDED" and "DID_NOT_ATTEND" representing the number of attended and missed appointments respectively.
+
+    Returns
+    -------
+    pd.DataFrame
+        The input DataFrame with an additional column "DID_NOT_ATTEND_RATE" which is the rate of missed appointments.
+    """
+    # logger.info("Calculating did not attend rate")
+    practice_level_pivot["DID_NOT_ATTEND_RATE"] = (
+        practice_level_pivot["DID_NOT_ATTEND"] / practice_level_pivot["TOTAL_APPOINTMENTS"]
+    )
+
+    return practice_level_pivot
+
+
+def calculate_attended_rate(practice_level_pivot) -> pd.DataFrame:
+    """
+    Calculate the rate of attended appointments.
+
+    Parameters
+    ----------
+    practice_level_pivot : pd.DataFrame
+        A DataFrame containing columns "ATTENDED" and "DID_NOT_ATTEND" representing the number of attended and missed appointments respectively.
+
+    Returns
+    -------
+    pd.DataFrame
+        The input DataFrame with an additional column "ATTENDED_RATE" which is the rate of attended appointments.
+    """
+    # logger.info("Calculating attended rate")
+    practice_level_pivot["ATTENDED_RATE"] = (
+        practice_level_pivot["ATTENDED"] / practice_level_pivot["TOTAL_APPOINTMENTS"]
+    )
+
+    return practice_level_pivot
+
+
+def calculate_appointment_columns(practice_level_pivot) -> pd.DataFrame:
+    """
+    Calculate the total number of appointments, the rate of missed appointments, and the rate of attended appointments.
+
+    Parameters
+    ----------
+    practice_level_pivot : pd.DataFrame
+        A DataFrame containing columns "ATTENDED" and "DID_NOT_ATTEND" representing the number of attended and missed appointments respectively.
+
+    Returns
+    -------
+    pd.DataFrame
+        The input DataFrame with additional columns "TOTAL_APPOINTMENTS", "DID_NOT_ATTEND_RATE", and "ATTENDED_RATE".
+    """
+    # logger.info("Calculating appointment columns")
+    practice_level_pivot = (
+        practice_level_pivot.pipe(calculate_total_appointments)
+        .pipe(calculate_attended_rate)
+        .pipe(calculate_did_not_attend_rate)
+    )
+    return practice_level_pivot