Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Enable public version of app with courser data #111

Merged
merged 15 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
**/renv
**/.Rprofile
**/.Renviron
17 changes: 10 additions & 7 deletions .env.sample
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
ENV=test
DATA_VOLUME_PATH=./data/test_data
DATA_VOLUME_PATH=./data/test_data/internal

SHINY_PORT: 3838

# For preprocessing
PREPROCESS_DB_NAME= # name of the source database
PREPROCESS_HOST= # host address for the source database
PREPROCESS_PORT= # port on which to connect to the source database
PREPROCESS_DB_USERNAME= # username for the source database
PREPROCESS_DB_PASSWORD= # password for the source database
PREPROCESS_DB_CDM_SCHEMA= # Schema name in the database to connect the OMOP CDM to
PREPROCESS_DB_NAME= # name of the source database
PREPROCESS_HOST= # host address for the source database
PREPROCESS_PORT= # port on which to connect to the source database
PREPROCESS_DB_USERNAME= # username for the source database
PREPROCESS_DB_PASSWORD= # password for the source database
PREPROCESS_DB_CDM_SCHEMA= # Schema name in the database to connect the OMOP CDM to
PREPROCESS_SUMMARISE_LEVEL=monthly # Level to summarise record counts at (monthly or quarterly)

# Low-frequency replacement
LOW_FREQUENCY_THRESHOLD=5
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,4 @@ po/*~
rsconnect/

!dev
.env
*.env
16 changes: 15 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,4 +100,18 @@ docker compose up -d omopcat

By default, the app will be hosted at `http://localhost:3838`.

See the [deployment docs](./deploy/README.md) for more details.
### Public version

Copy the `public.env.sample` to `public.env` and fill out the necessary environment variables.

Then run the pre-processing pipeline with

```sh
docker compose --env-file public.env run preprocess
```

and deploy the app with

```sh
docker compose --env-file public.env -p omopcat-public up -d omopcat
```
9 changes: 4 additions & 5 deletions app/R/fct_monthly_count_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ monthly_count_plot <- function(monthly_counts) {
stopifnot(is.data.frame(monthly_counts))
stopifnot(all(c("date_year", "date_month", "person_count") %in% colnames(monthly_counts)))

monthly_counts$date <- .convert_to_date(monthly_counts$date_year, monthly_counts$date_month)
monthly_counts$date <- lubridate::make_date(
year = monthly_counts$date_year,
month = monthly_counts$date_month
)

ggplot(monthly_counts, aes(x = .data$date, y = .data$record_count)) +
geom_bar(aes(fill = .data$concept_name), stat = "identity") +
Expand All @@ -26,7 +29,3 @@ monthly_count_plot <- function(monthly_counts) {
scale_x_date(date_labels = "%b %Y", date_breaks = "3 months") +
theme(legend.position = "none", axis.text.x = element_text(angle = 45, hjust = 1))
}

.convert_to_date <- function(date_year, date_month) {
as.Date(paste0(date_year, "-", date_month, "-01"))
}
10 changes: 10 additions & 0 deletions app/R/utils_get_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,19 @@ get_monthly_counts <- function() {
} else {
data <- .read_parquet_table("omopcat_monthly_counts")
}

if ("date_quarter" %in% colnames(data)) {
## Set `date_month` to the first month of the quarter
data$date_month <- .quarter_to_month(data$date_quarter)
}

return(data)
}

.quarter_to_month <- function(quarter) {
return((quarter - 1) * 3 + 1)
}

get_summary_stats <- function() {
if (should_use_dev_data()) {
out <- readr::read_csv(
Expand Down
Binary file added data/test_data/internal/omopcat_concepts.parquet
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file removed data/test_data/omopcat_concepts.parquet
Binary file not shown.
Binary file removed data/test_data/omopcat_monthly_counts.parquet
Binary file not shown.
Binary file removed data/test_data/omopcat_summary_stats.parquet
Binary file not shown.
Binary file added data/test_data/public/omopcat_concepts.parquet
Binary file not shown.
Binary file not shown.
Binary file not shown.
3 changes: 2 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ services:
EUNOMIA_DATA_FOLDER: /mnt/preprocessing/data-raw/test_db
LOW_FREQUENCY_THRESHOLD: ${LOW_FREQUENCY_THRESHOLD}
LOW_FREQUENCY_REPLACEMENT: ${LOW_FREQUENCY_REPLACEMENT}
SUMMARISE_LEVEL: ${PREPROCESS_SUMMARISE_LEVEL}
command: ["R", "-e", "omopcat.preprocessing::preprocess()"]
volumes:
- ${DATA_VOLUME_PATH}:/mnt/preprocessing/data
Expand Down Expand Up @@ -49,4 +50,4 @@ services:
volumes:
- ${DATA_VOLUME_PATH}:/etc/omopcat/data
ports:
- 3838:3838
- ${SHINY_PORT}:3838
milanmlft marked this conversation as resolved.
Show resolved Hide resolved
97 changes: 69 additions & 28 deletions preprocessing/R/monthly_counts.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,57 @@
#' @param cdm A [`CDMConnector`] object, e.g. from [`CDMConnector::cdm_from_con()`]
#' @param threshold Threshold value below which values will be replaced by `replacement`
#' @param replacement Value with which values below `threshold` will be replaced
#' @param level At which resolution the counts should be summarised.
#' Currently supports `"monthly"` or `"quarterly"`.
#'
#' @return A `data.frame` with the monthly counts
#' @keywords internal
generate_monthly_counts <- function(cdm, threshold, replacement) {
generate_monthly_counts <- function(cdm, threshold, replacement,
level = c("monthly", "quarterly")) {
level <- match.arg(level)
.summarise <- function(...) summarise_counts(..., level = level)

# Combine results for all tables
out <- dplyr::bind_rows(
cdm$condition_occurrence |> calculate_monthly_counts(
.data$condition_concept_id, .data$condition_start_date
arg_list <- list(
list(
omop_table = cdm[["measurement"]],
concept_col = "measurement_concept_id",
date_col = "measurement_date"
),
list(
omop_table = cdm[["observation"]],
concept_col = "observation_concept_id",
date_col = "observation_date"
),
list(
omop_table = cdm[["condition_occurrence"]],
concept_col = "condition_concept_id",
date_col = "condition_start_date"
),
list(
omop_table = cdm[["drug_exposure"]],
concept_col = "drug_concept_id",
date_col = "drug_exposure_start_date"
),
list(
omop_table = cdm[["procedure_occurrence"]],
concept_col = "procedure_concept_id",
date_col = "procedure_date"
),
list(
omop_table = cdm[["device_exposure"]],
concept_col = "device_concept_id",
date_col = "device_exposure_start_date"
),
cdm$drug_exposure |>
calculate_monthly_counts(.data$drug_concept_id, .data$drug_exposure_start_date),
cdm$procedure_occurrence |>
calculate_monthly_counts(.data$procedure_concept_id, .data$procedure_date),
cdm$device_exposure |>
calculate_monthly_counts(.data$device_concept_id, .data$device_exposure_start_date),
cdm$measurement |>
calculate_monthly_counts(.data$measurement_concept_id, .data$measurement_date),
cdm$observation |>
calculate_monthly_counts(.data$observation_concept_id, .data$observation_date),
cdm$specimen |>
calculate_monthly_counts(.data$specimen_concept_id, .data$specimen_date)
list(
omop_table = cdm[["specimen"]],
concept_col = "specimen_concept_id",
date_col = "specimen_date"
)
)

out <- purrr::map_dfr(arg_list, function(args) do.call(.summarise, args))

# Map concept names to the concept IDs
concept_names <- dplyr::select(cdm$concept, "concept_id", "concept_name") |>
dplyr::filter(.data$concept_id %in% out$concept_id) |>
Expand All @@ -39,31 +67,44 @@ generate_monthly_counts <- function(cdm, threshold, replacement) {
)
}


#' Calculate monthly statistics for an OMOP concept
#' Summarise record counts
#'
#' @param omop_table A table from the OMOP CDM
#' @param concept The name of the concept column to calculate statistics for
#' @param date The name of the date column to calculate statistics for
#' @param concept_col The name of the concept column to calculate statistics for
#' @param date_col The name of the date column to calculate statistics for
#' @param level The resolution at which to summarise the record counts.
#' Currently supports `"monthly"` or `"quarterly"`
#'
#' @return A `data.frame` with the following columns:
#' - `concept_id`: The concept ID
#' - `concept_name`: The concept name
#' - `date_year`: The year of the date
#' - `date_month`: The month of the date
#' - `date_month` or `date_quarter`: The month or quarter of the date, depending on `level`
#' - `person_count`: The number of unique patients per concept for each month
#' - `records_per_person`: The average number of records per person per concept for each month
#' @keywords internal
calculate_monthly_counts <- function(omop_table, concept, date) {
# Extract year and month from date column
summarise_counts <- function(omop_table, concept_col, date_col, level) {
group_by_var <- switch(level,
monthly = "date_month",
quarterly = "date_quarter",
stop(sprintf("Summary level `%s` not supported", level))
)

# Extract year, month and quarter from date column
omop_table <- dplyr::mutate(omop_table,
concept_id = {{ concept }},
date_year = as.integer(lubridate::year({{ date }})),
date_month = as.integer(lubridate::month({{ date }}))
concept_id = .data[[concept_col]],
date_year = as.integer(lubridate::year(.data[[date_col]])),
date_month = as.integer(lubridate::month(.data[[date_col]]))
)

if (level == "quarterly") {
# NOTE: lubridate::quarter is not supported for all SQL back-ends
omop_table <- omop_table |>
dplyr::mutate(date_quarter = as.integer(lubridate::quarter(.data[[date_col]])))
}

omop_table |>
dplyr::group_by(.data$date_year, .data$date_month, .data$concept_id) |>
dplyr::group_by(.data$date_year, .data[[group_by_var]], .data$concept_id) |>
dplyr::summarise(
record_count = dplyr::n(),
person_count = dplyr::n_distinct(.data$person_id),
Expand All @@ -79,7 +120,7 @@ calculate_monthly_counts <- function(omop_table, concept, date) {
dplyr::select(
"concept_id",
"date_year",
"date_month",
dplyr::all_of(group_by_var),
"record_count",
"person_count",
"records_per_person"
Expand Down
11 changes: 9 additions & 2 deletions preprocessing/R/preprocess.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,15 @@ preprocess <- function(out_path = Sys.getenv("PREPROCESS_OUT_PATH")) {

threshold <- Sys.getenv("LOW_FREQUENCY_THRESHOLD")
replacement <- Sys.getenv("LOW_FREQUENCY_REPLACEMENT")
summarise_level <- Sys.getenv("SUMMARISE_LEVEL")

cli::cli_alert_info("Summarising record counts at the '{summarise_level}' level")

cli::cli_progress_message("Generating monthly_counts table")
monthly_counts <- generate_monthly_counts(cdm, threshold = threshold, replacement = replacement)
monthly_counts <- generate_monthly_counts(cdm,
threshold = threshold, replacement = replacement,
level = summarise_level
)

cli::cli_progress_message("Generating summary_stats table")
summary_stats <- generate_summary_stats(cdm, threshold = threshold, replacement = replacement)
Expand Down Expand Up @@ -79,7 +85,8 @@ preprocess <- function(out_path = Sys.getenv("PREPROCESS_OUT_PATH")) {
"DB_USERNAME",
"DB_PASSWORD",
"LOW_FREQUENCY_THRESHOLD",
"LOW_FREQUENCY_REPLACEMENT"
"LOW_FREQUENCY_REPLACEMENT",
"SUMMARISE_LEVEL"
)

missing <- Sys.getenv(required_envvars) == ""
Expand Down
10 changes: 9 additions & 1 deletion preprocessing/man/generate_monthly_counts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading