nhsengland · josephwilson8-nhs · Dec 17, 2024 · Dec 17, 2024 · Dec 17, 2024 · Dec 17, 2024
diff --git a/.gitignore b/.gitignore
@@ -170,3 +170,4 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+.vscode/
diff --git a/Makefile b/Makefile
@@ -106,8 +106,9 @@ create_tests:
 .PHONY: get_data
 get_data:
 	mkdir -p data/raw
-	wget -O data/raw/Practice_Level_Crosstab_Sep_24.zip https://files.digital.nhs.uk/A5/B4AB19/Practice_Level_Crosstab_Sep_24.zip
-	unzip -o data/raw/Practice_Level_Crosstab_Sep_24.zip -d data/raw
+	wget -O data/raw/_data.zip https://files.digital.nhs.uk/A5/B4AB19/Practice_Level_Crosstab_Sep_24.zip
+	unzip -o data/raw/_data.zip -d data/raw
+	rm data/raw/_data.zip
 
 
 #################################################################################
@@ -152,4 +153,4 @@ endef
 export PRINT_HELP_PYSCRIPT
 
 help:
-	@$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)
+	@$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)
diff --git a/code_your_own_pandas_pipeline/aggregations.py b/code_your_own_pandas_pipeline/aggregations.py
@@ -0,0 +1,65 @@
+"""
+This modules provides function to pivot and summarize the practice level appointment data.
+"""
+
+import pandas as pd
+from loguru import logger
+
+placeholder_df = pd.DataFrame()
+
+
+def pivot_practice_level_data(practice_data: pd.DataFrame) -> pd.DataFrame:
+    """
+    Pivot the practice level data.
+
+    Parameters
+    ----------
+    practice_data : pd.DataFrame
+        The practice data.
+
+    Returns
+    -------
+    pd.DataFrame
+        The pivoted data.
+    """
+    logger.info("Pivoting the practice level data.")
+
+    logger.warning("This function is not yet implemented.")
+
+
+def summarize_monthly_gp_appointments(pivot_practice_data: pd.DataFrame) -> pd.DataFrame:
+    """
+    Summarize the monthly appointments by GP and Appointment Status.
+
+    Parameters
+    ----------
+    practice_data : pd.DataFrame
+        The practice data.
+
+    Returns
+    -------
+    pd.DataFrame
+        The summarized data.
+    """
+    logger.info("Summarizing the monthly GP appointments.")
+
+    logger.warning("This function is not yet implemented.")
+
+
+def summarize_monthly_region_appointments(pivot_practice_data: pd.DataFrame) -> pd.DataFrame:
+    """
+    Summarize the monthly appointments by Region and Appointment Status.
+
+    Parameters
+    ----------
+    practice_data : pd.DataFrame
+        The practice data.
+
+    Returns
+    -------
+    pd.DataFrame
+        The summarized data.
+    """
+    logger.info("Summarizing the monthly region appointments.")
+
+    logger.warning("This function is not yet implemented.")
diff --git a/code_your_own_pandas_pipeline/config.py b/code_your_own_pandas_pipeline/config.py
@@ -0,0 +1,36 @@
+"""
+Configuration file for the code_your_own_pandas_pipeline package.
+"""
+
+from pathlib import Path
+
+from dotenv import load_dotenv
+from loguru import logger
+
+# Load environment variables from .env file if it exists
+load_dotenv()
+
+# Paths
+PROJ_ROOT = Path(__file__).resolve().parents[1]
+logger.info(f"PROJ_ROOT path is: {PROJ_ROOT}")
+
+DATA_DIR = (PROJ_ROOT / "data").relative_to(PROJ_ROOT)
+RAW_DATA_DIR = DATA_DIR / "raw"
+INTERIM_DATA_DIR = DATA_DIR / "interim"
+PROCESSED_DATA_DIR = DATA_DIR / "processed"
+EXTERNAL_DATA_DIR = DATA_DIR / "external"
+
+MODELS_DIR = PROJ_ROOT / "models"
+
+REPORTS_DIR = PROJ_ROOT / "reports"
+FIGURES_DIR = REPORTS_DIR / "figures"
+
+# If tqdm is installed, configure loguru with tqdm.write
+# https://github.com/Delgan/loguru/issues/135
+try:
+    from tqdm import tqdm
+
+    logger.remove(0)
+    logger.add(lambda msg: tqdm.write(msg, end=""), colorize=True)
+except ModuleNotFoundError:
+    pass
diff --git a/code_your_own_pandas_pipeline/data_in.py b/code_your_own_pandas_pipeline/data_in.py
@@ -0,0 +1,35 @@
+"""
+This module contains the function to read the mapping and practice crosstab data from the data
+folder.
+"""
+
+import pandas as pd
+from loguru import logger
+
+
+def read_mapping_data() -> pd.DataFrame:
+    """
+    Read the mapping data from the data folder.
+
+    Returns
+    -------
+    pd.DataFrame
+        The mapping data.
+    """
+    logger.info(f"Reading mapping data from {""}")
+
+    logger.warning("This function is not yet implemented.")
+
+
+def read_practice_crosstab_data() -> pd.DataFrame:
+    """
+    Read the practice crosstab data from the data folder.
+
+    Returns
+    -------
+    pd.DataFrame
+        The practice crosstab data.
+    """
+    logger.info(f"Reading practice crosstab data from {""}")
+
+    logger.warning("This function is not yet implemented.")
diff --git a/code_your_own_pandas_pipeline/pipeline.py b/code_your_own_pandas_pipeline/pipeline.py
@@ -0,0 +1,41 @@
+"""
+Main pipeline for the code_your_own_pandas_pipeline package.
+"""
+
+import pandas as pd
+from loguru import logger
+
+from code_your_own_pandas_pipeline import aggregations, data_in, plots, processing
+
+placeholder_df = pd.DataFrame()
+
+
+def main() -> None:
+    """
+    Main function to run the pipeline.
+
+    Returns
+    -------
+    None
+    """
+    logger.level("START", no=15, color="<green><bold>")
+    logger.log("START", "Starting the GP Appointment Data Pipeline")
+
+    data_in.read_mapping_data()
+    data_in.read_practice_crosstab_data()
+
+    processing.tidy_practice_level_data(placeholder_df)
+    processing.merge_mapping_and_practice_data(placeholder_df, placeholder_df)
+
+    aggregations.pivot_practice_level_data(placeholder_df)
+    aggregations.summarize_monthly_gp_appointments(placeholder_df)
+    aggregations.summarize_monthly_region_appointments(placeholder_df)
+
+    plots.plot_monthly_gp_appointments(placeholder_df, "placeholder_str")
+    plots.plot_monthly_region_appointments(placeholder_df, "placeholder_str")
+
+    logger.success("GP Appointment Data Pipeline Completed")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/code_your_own_pandas_pipeline/plots.py b/code_your_own_pandas_pipeline/plots.py
@@ -0,0 +1,72 @@
+"""
+This module provides function for generating and saving plots.
+"""
+
+import pandas as pd
+from loguru import logger
+
+
+def save_plot(plot, output_folder: str, plot_name: str) -> None:
+    """
+    Save the plot to the output folder.
+
+    Parameters
+    ----------
+    plot : matplotlib.pyplot
+        The plot to save.
+    output_folder : str
+        The output folder to save the plot.
+    plot_name : str
+        The plot name.
+
+    Returns
+    -------
+    None
+    """
+    logger.info(f"Saving the plot {plot_name} to {output_folder}.")
+
+    logger.warning("This function is not yet implemented.")
+
+
+def plot_monthly_gp_appointments(
+    monthly_gp_appointments: pd.DataFrame, output_folder: str
+) -> None:
+    """
+    Plot the monthly GP appointments.
+
+    Parameters
+    ----------
+    monthly_gp_appointments : pd.DataFrame
+        The monthly GP appointments data.
+    output_folder : str
+        The output folder to save the plots.
+
+    Returns
+    -------
+    None
+    """
+    logger.info("Plotting the monthly GP appointments.")
+
+    logger.warning("This function is not yet implemented.")
+
+
+def plot_monthly_region_appointments(
+    monthly_region_appointments: pd.DataFrame, output_folder: str
+) -> None:
+    """
+    Plot the monthly region appointments.
+
+    Parameters
+    ----------
+    monthly_region_appointments : pd.DataFrame
+        The monthly region appointments data.
+    output_folder : str
+        The output folder to save the plots.
+
+    Returns
+    -------
+    None
+    """
+    logger.info("Plotting the monthly region appointments.")
+
+    logger.warning("This function is not yet implemented.")
diff --git a/code_your_own_pandas_pipeline/processing.py b/code_your_own_pandas_pipeline/processing.py
@@ -0,0 +1,50 @@
+"""
+This module contains the functions to process the mapping and practice crosstab data and merge them.
+"""
+
+import pandas as pd
+from loguru import logger
+
+placeholder_df = pd.DataFrame()
+
+
+def tidy_practice_level_data(practice_data: pd.DataFrame) -> pd.DataFrame:
+    """
+    Tidy the practice crosstab data.
+
+    Parameters
+    ----------
+    practice_crosstab : pd.DataFrame
+        The practice crosstab data.
+
+    Returns
+    -------
+    pd.DataFrame
+        The tidy practice crosstab data.
+    """
+    logger.info("Tidying the practice crosstab data.")
+
+    logger.warning("This function is not yet implemented.")
+
+
+def merge_mapping_and_practice_data(
+    mapping_data: pd.DataFrame, practice_data: pd.DataFrame
+) -> pd.DataFrame:
+    """
+    Merge the mapping and practice data.
+
+    Parameters
+    ----------
+    mapping_data : pd.DataFrame
+        The mapping data.
+    practice_data : pd.DataFrame
+        The practice data.
+
+    Returns
+    -------
+    pd.DataFrame
+        The merged data.
+    """
+    logger.info("Merging the mapping and practice data.")
+
+    logger.warning("This function is not yet implemented.")