Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds Tests for Template Code #3

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -170,3 +170,4 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
.vscode/
7 changes: 4 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,9 @@ create_tests:
.PHONY: get_data
get_data:
mkdir -p data/raw
wget -O data/raw/Practice_Level_Crosstab_Sep_24.zip https://files.digital.nhs.uk/A5/B4AB19/Practice_Level_Crosstab_Sep_24.zip
unzip -o data/raw/Practice_Level_Crosstab_Sep_24.zip -d data/raw
wget -O data/raw/_data.zip https://files.digital.nhs.uk/A5/B4AB19/Practice_Level_Crosstab_Sep_24.zip
unzip -o data/raw/_data.zip -d data/raw
rm data/raw/_data.zip


#################################################################################
Expand Down Expand Up @@ -152,4 +153,4 @@ endef
export PRINT_HELP_PYSCRIPT

help:
@$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)
@$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)
65 changes: 65 additions & 0 deletions code_your_own_pandas_pipeline/aggregations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""
This modules provides function to pivot and summarize the practice level appointment data.
"""

import pandas as pd
from loguru import logger

placeholder_df = pd.DataFrame()


def pivot_practice_level_data(practice_data: pd.DataFrame) -> pd.DataFrame:
"""
Pivot the practice level data.

Parameters
----------
practice_data : pd.DataFrame
The practice data.

Returns
-------
pd.DataFrame
The pivoted data.
"""
logger.info("Pivoting the practice level data.")

logger.warning("This function is not yet implemented.")


def summarize_monthly_gp_appointments(pivot_practice_data: pd.DataFrame) -> pd.DataFrame:
"""
Summarize the monthly appointments by GP and Appointment Status.

Parameters
----------
practice_data : pd.DataFrame
The practice data.

Returns
-------
pd.DataFrame
The summarized data.
"""
logger.info("Summarizing the monthly GP appointments.")

logger.warning("This function is not yet implemented.")


def summarize_monthly_region_appointments(pivot_practice_data: pd.DataFrame) -> pd.DataFrame:
"""
Summarize the monthly appointments by Region and Appointment Status.

Parameters
----------
practice_data : pd.DataFrame
The practice data.

Returns
-------
pd.DataFrame
The summarized data.
"""
logger.info("Summarizing the monthly region appointments.")

logger.warning("This function is not yet implemented.")
36 changes: 36 additions & 0 deletions code_your_own_pandas_pipeline/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""
Configuration file for the code_your_own_pandas_pipeline package.
"""

from pathlib import Path

from dotenv import load_dotenv
from loguru import logger

# Load environment variables from .env file if it exists
load_dotenv()

# Paths
PROJ_ROOT = Path(__file__).resolve().parents[1]
logger.info(f"PROJ_ROOT path is: {PROJ_ROOT}")

DATA_DIR = (PROJ_ROOT / "data").relative_to(PROJ_ROOT)
RAW_DATA_DIR = DATA_DIR / "raw"
INTERIM_DATA_DIR = DATA_DIR / "interim"
PROCESSED_DATA_DIR = DATA_DIR / "processed"
EXTERNAL_DATA_DIR = DATA_DIR / "external"

MODELS_DIR = PROJ_ROOT / "models"

REPORTS_DIR = PROJ_ROOT / "reports"
FIGURES_DIR = REPORTS_DIR / "figures"

# If tqdm is installed, configure loguru with tqdm.write
# https://github.com/Delgan/loguru/issues/135
try:
from tqdm import tqdm

logger.remove(0)
logger.add(lambda msg: tqdm.write(msg, end=""), colorize=True)
except ModuleNotFoundError:
pass
35 changes: 35 additions & 0 deletions code_your_own_pandas_pipeline/data_in.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""
This module contains the function to read the mapping and practice crosstab data from the data
folder.
"""

import pandas as pd
from loguru import logger


def read_mapping_data() -> pd.DataFrame:
"""
Read the mapping data from the data folder.

Returns
-------
pd.DataFrame
The mapping data.
"""
logger.info(f"Reading mapping data from {""}")

logger.warning("This function is not yet implemented.")


def read_practice_crosstab_data() -> pd.DataFrame:
"""
Read the practice crosstab data from the data folder.

Returns
-------
pd.DataFrame
The practice crosstab data.
"""
logger.info(f"Reading practice crosstab data from {""}")

logger.warning("This function is not yet implemented.")
41 changes: 41 additions & 0 deletions code_your_own_pandas_pipeline/pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
Main pipeline for the code_your_own_pandas_pipeline package.
"""

import pandas as pd
from loguru import logger

from code_your_own_pandas_pipeline import aggregations, data_in, plots, processing

placeholder_df = pd.DataFrame()


def main() -> None:
"""
Main function to run the pipeline.

Returns
-------
None
"""
logger.level("START", no=15, color="<green><bold>")
logger.log("START", "Starting the GP Appointment Data Pipeline")

data_in.read_mapping_data()
data_in.read_practice_crosstab_data()

processing.tidy_practice_level_data(placeholder_df)
processing.merge_mapping_and_practice_data(placeholder_df, placeholder_df)

aggregations.pivot_practice_level_data(placeholder_df)
aggregations.summarize_monthly_gp_appointments(placeholder_df)
aggregations.summarize_monthly_region_appointments(placeholder_df)

plots.plot_monthly_gp_appointments(placeholder_df, "placeholder_str")
plots.plot_monthly_region_appointments(placeholder_df, "placeholder_str")

logger.success("GP Appointment Data Pipeline Completed")


if __name__ == "__main__":
main()
72 changes: 72 additions & 0 deletions code_your_own_pandas_pipeline/plots.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""
This module provides function for generating and saving plots.
"""

import pandas as pd
from loguru import logger


def save_plot(plot, output_folder: str, plot_name: str) -> None:
"""
Save the plot to the output folder.

Parameters
----------
plot : matplotlib.pyplot
The plot to save.
output_folder : str
The output folder to save the plot.
plot_name : str
The plot name.

Returns
-------
None
"""
logger.info(f"Saving the plot {plot_name} to {output_folder}.")

logger.warning("This function is not yet implemented.")


def plot_monthly_gp_appointments(
monthly_gp_appointments: pd.DataFrame, output_folder: str
) -> None:
"""
Plot the monthly GP appointments.

Parameters
----------
monthly_gp_appointments : pd.DataFrame
The monthly GP appointments data.
output_folder : str
The output folder to save the plots.

Returns
-------
None
"""
logger.info("Plotting the monthly GP appointments.")

logger.warning("This function is not yet implemented.")


def plot_monthly_region_appointments(
monthly_region_appointments: pd.DataFrame, output_folder: str
) -> None:
"""
Plot the monthly region appointments.

Parameters
----------
monthly_region_appointments : pd.DataFrame
The monthly region appointments data.
output_folder : str
The output folder to save the plots.

Returns
-------
None
"""
logger.info("Plotting the monthly region appointments.")

logger.warning("This function is not yet implemented.")
50 changes: 50 additions & 0 deletions code_your_own_pandas_pipeline/processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""
This module contains the functions to process the mapping and practice crosstab data and merge them.
"""

import pandas as pd
from loguru import logger

placeholder_df = pd.DataFrame()


def tidy_practice_level_data(practice_data: pd.DataFrame) -> pd.DataFrame:
"""
Tidy the practice crosstab data.

Parameters
----------
practice_crosstab : pd.DataFrame
The practice crosstab data.

Returns
-------
pd.DataFrame
The tidy practice crosstab data.
"""
logger.info("Tidying the practice crosstab data.")

logger.warning("This function is not yet implemented.")


def merge_mapping_and_practice_data(
mapping_data: pd.DataFrame, practice_data: pd.DataFrame
) -> pd.DataFrame:
"""
Merge the mapping and practice data.

Parameters
----------
mapping_data : pd.DataFrame
The mapping data.
practice_data : pd.DataFrame
The practice data.

Returns
-------
pd.DataFrame
The merged data.
"""
logger.info("Merging the mapping and practice data.")

logger.warning("This function is not yet implemented.")
Loading