diff --git a/DESCRIPTION b/DESCRIPTION index 919c6e8..0127813 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,6 +16,7 @@ Imports: glue, tidyr, withr, + forcats, readr, lubridate, dplyr diff --git a/NAMESPACE b/NAMESPACE index 939f17c..117a674 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,8 +8,10 @@ importFrom(duckdb,duckdb) importFrom(ggplot2,aes) importFrom(ggplot2,geom_bar) importFrom(ggplot2,geom_boxplot) +importFrom(ggplot2,geom_col) importFrom(ggplot2,ggplot) importFrom(ggplot2,ggtitle) +importFrom(ggplot2,labs) importFrom(ggplot2,xlab) importFrom(ggplot2,ylab) importFrom(glue,glue) diff --git a/R/app_server.R b/R/app_server.R index d96315f..f259d0e 100644 --- a/R/app_server.R +++ b/R/app_server.R @@ -16,10 +16,10 @@ app_server <- function(input, output, session) { selected_row <- mod_datatable_server("totals", selected_data) selected_dates <- mod_date_range_server("date_range") - # TODO: refactor monthly_count and stat_numeric modules into a single module + # TODO: refactor monthly_count and summary_stat modules into a single module?n # https://github.com/UCLH-Foundry/omop-data-catalogue/issues/30 mod_monthly_count_server("monthly_count", monthly_counts, selected_row, selected_dates) - mod_stat_numeric_server("stat_numeric", summary_stats, selected_row) + mod_summary_stat_server("summary_stat", summary_stats, selected_row) mod_export_tab_server("export_tab", selected_data) } diff --git a/R/app_ui.R b/R/app_ui.R index 26ca545..d65e551 100644 --- a/R/app_ui.R +++ b/R/app_ui.R @@ -22,7 +22,7 @@ app_ui <- function(request) { card(mod_datatable_ui("totals")), layout_columns( card(mod_monthly_count_ui("monthly_count")), - card(mod_stat_numeric_ui("stat_numeric")) + card(mod_summary_stat_ui("summary_stat")) ) ), nav_panel( diff --git a/R/fct_stat_numeric_plot.R b/R/fct_stat_numeric_plot.R deleted file mode 100644 index 7d3b2f5..0000000 --- a/R/fct_stat_numeric_plot.R +++ /dev/null @@ -1,49 +0,0 @@ -#' stat_numeric_plot -#' -#' Generates a boxplot of the summary statistics for a numeric concept. -#' Uses pre-calculated `mean` and `sd` to generate the boxplot. -#' -#' Expects the input data to have the following columns: -#' - `concept_id`: The concept ID. -#' - `summary_attribute`: The type of the summary attribute, e.g. `mean` or `sd`. -#' - `value_as_number`: The value of the summary attribute as a numeric value. -#' -#' @param summary_stats A `data.frame` containing the summary statistics. -#' @param plot_title A `character`, to be used as title of the plot. -#' -#' @return A `ggplot2` object. -#' -#' @importFrom ggplot2 ggplot aes geom_boxplot -#' @noRd -stat_numeric_plot <- function(summary_stats, plot_title) { - processed_stats <- .process_summary_stats(summary_stats) - - mean <- sd <- concept_id <- NULL - ggplot(processed_stats, aes(x = factor(concept_id))) + - geom_boxplot( - aes( - lower = mean - sd, - upper = mean + sd, - middle = mean, - ymin = mean - 3 * sd, - ymax = mean + 3 * sd - ), - stat = "identity" - ) + - xlab(NULL) + - ggtitle(plot_title) -} - -.process_summary_stats <- function(summary_stats) { - # We expect only single concept ID at this point - # NOTE: this might change when we support bundles of concepts, in which case we might want to - # display the entire batch in one plot - stopifnot("Expecting a single concept ID" = length(unique(summary_stats$concept_id)) == 1) - stopifnot(c("concept_id", "summary_attribute", "value_as_number") %in% names(summary_stats)) - - tidyr::pivot_wider(summary_stats, - id_cols = "concept_id", - names_from = "summary_attribute", - values_from = "value_as_number" - ) -} diff --git a/R/fct_summary_stat_plot.R b/R/fct_summary_stat_plot.R new file mode 100644 index 0000000..9a14fe1 --- /dev/null +++ b/R/fct_summary_stat_plot.R @@ -0,0 +1,111 @@ +#' summary_stat_plot +#' +#' Wrapper function to generate a plot for a summary statistic depending on its type +#' (categorical or numeric). +#' +#' @param summary_stats A `data.frame` containing the summary statistics. +#' @param plot_title A `character`, to be used as title of the plot. +#' +#' @return A `ggplot2` object. +#' +#' @noRd +summary_stat_plot <- function(summary_stats, plot_title) { + if (.is_categorical(summary_stats)) { + stat_categorical_plot(summary_stats, plot_title) + } else { + stat_numeric_plot(summary_stats, plot_title) + } +} + +#' stat_numeric_plot +#' +#' Generates a boxplot of the summary statistics for a numeric concept. +#' Uses pre-calculated `mean` and `sd` to generate the boxplot. +#' +#' Expects the input data to have the following columns: +#' - `concept_id`: The concept ID. +#' - `summary_attribute`: The type of the summary attribute, e.g. `mean` or `sd`. +#' - `value_as_number`: The value of the summary attribute as a numeric value. +#' +#' @param summary_stats A `data.frame` containing the summary statistics. +#' @param plot_title A `character`, to be used as title of the plot. +#' +#' @return A `ggplot2` object. +#' +#' @importFrom ggplot2 ggplot aes geom_boxplot +#' @noRd +stat_numeric_plot <- function(summary_stats, plot_title) { + processed_stats <- .process_numeric_stats(summary_stats) + + mean <- sd <- concept_id <- NULL + ggplot(processed_stats, aes(x = factor(concept_id))) + + geom_boxplot( + aes( + lower = mean - sd, + upper = mean + sd, + middle = mean, + ymin = mean - 3 * sd, + ymax = mean + 3 * sd + ), + stat = "identity" + ) + + xlab(NULL) + + ggtitle(plot_title) +} + +#' stat_categorical_plot +#' +#' Generates a bar plot of the category frequencies for a categorical concept. +#' Uses pre-calculated frequencies to generate the plot. +#' +#' Expects the input data to have the following columns: +#' - `concept_id`: The concept ID. +#' - `summary_attribute`: The type of the summary attribute, should be 'frequency'. +#' - `value_as_string`: The name of the category +#' - `value_as_number`: The value of the summary attribute as a numeric value. +#' +#' @param summary_stats A `data.frame` containing the summary statistics. +#' @param plot_title A `character`, to be used as title of the plot. +#' +#' @return A `ggplot2` object. +#' +#' @importFrom ggplot2 ggplot aes geom_col labs +#' @noRd +stat_categorical_plot <- function(summary_stats, plot_title) { + # We expect only single concept ID at this point + # NOTE: this might change when we support bundles of concepts, in which case we might want to + # display the entire batch in one plot + stopifnot("Expecting a single concept ID" = length(unique(summary_stats$concept_id)) == 1) + stopifnot(c("concept_id", "value_as_string", "value_as_number") %in% names(summary_stats)) + + summary_stats$value_as_string <- as.factor(summary_stats$value_as_string) + # Reorder factor levels by frequency + summary_stats$value_as_string <- forcats::fct_reorder( + summary_stats$value_as_string, summary_stats$value_as_number, + .desc = TRUE + ) + + value_as_string <- value_as_number <- NULL + ggplot(summary_stats, aes(value_as_string, value_as_number)) + + geom_col(aes(fill = value_as_string), show.legend = FALSE) + + labs(x = "Category", y = "Frequency") + + ggtitle(plot_title) +} + +.process_numeric_stats <- function(summary_stats) { + # We expect only single concept ID at this point + # NOTE: this might change when we support bundles of concepts, in which case we might want to + # display the entire batch in one plot + stopifnot("Expecting a single concept ID" = length(unique(summary_stats$concept_id)) == 1) + stopifnot(c("concept_id", "summary_attribute", "value_as_number") %in% names(summary_stats)) + + tidyr::pivot_wider(summary_stats, + id_cols = "concept_id", + names_from = "summary_attribute", + values_from = "value_as_number" + ) +} + +.is_categorical <- function(summary_stats) { + "frequency" %in% summary_stats$summary_attribute +} diff --git a/R/mod_monthly_count.R b/R/mod_monthly_count.R index f798326..70e66d7 100644 --- a/R/mod_monthly_count.R +++ b/R/mod_monthly_count.R @@ -28,17 +28,17 @@ mod_monthly_count_server <- function(id, data, selected_concept, selected_dates) stopifnot(is.reactive(selected_concept)) stopifnot(is.reactive(selected_dates)) - moduleServer(id, function(input, output, session) { - ## Filter data based on selected concept and datea range + ## Filter data based on selected concept and date range selected_concept_id <- reactive(selected_concept()$concept_id) selected_concept_name <- reactive(selected_concept()$concept_name) filtered_monthly_counts <- reactive({ if (is.null(selected_concept_id())) { return(NULL) } - req(selected_dates()) # we expect always to have dates selected out <- data[data$concept_id == selected_concept_id(), ] + + req(selected_dates()) # we expect always to have dates selected .filter_dates(out, selected_dates()) }) @@ -47,21 +47,20 @@ mod_monthly_count_server <- function(id, data, selected_concept, selected_dates) if (is.null(filtered_monthly_counts()) || nrow(filtered_monthly_counts()) == 0) { return(NULL) } + monthly_count_plot(filtered_monthly_counts(), selected_concept_name()) }) }) } - .filter_dates <- function(monthly_counts, date_range) { - range_years <- lubridate::year(date_range) - range_months <- lubridate::month(date_range) + date_range <- as.Date(date_range) + if (date_range[2] < date_range[1]) { + stop("Invalid date range, end date is before start date") + } - date_year <- date_month <- NULL - dplyr::filter( - monthly_counts, - date_year >= range_years[1] & date_year <= range_years[2], - date_month >= range_months[1] & date_month <= range_months[2] - ) + dates <- lubridate::make_date(year = monthly_counts$date_year, month = monthly_counts$date_month) + keep_dates <- dplyr::between(dates, date_range[1], date_range[2]) + dplyr::filter(monthly_counts, keep_dates) } diff --git a/R/mod_stat_numeric.R b/R/mod_summary_stat.R similarity index 70% rename from R/mod_stat_numeric.R rename to R/mod_summary_stat.R index fb98557..447a29c 100644 --- a/R/mod_stat_numeric.R +++ b/R/mod_summary_stat.R @@ -1,4 +1,4 @@ -#' stat_numeric UI Function +#' summary_stat UI Function #' #' Displays the boxplot of the summary statistics for a numeric concept. #' @@ -7,14 +7,14 @@ #' @noRd #' #' @importFrom shiny NS tagList -mod_stat_numeric_ui <- function(id) { +mod_summary_stat_ui <- function(id) { ns <- NS(id) tagList( - plotOutput(ns("stat_numeric_plot"), height = 250) + plotOutput(ns("summary_stat_plot"), height = 250) ) } -#' stat_numeric Server Functions +#' summary_stat Server Functions #' #' Generates the boxplot of the summary statistics for a numeric concept. #' When no concept was selected, an empty plot is returned. @@ -23,7 +23,7 @@ mod_stat_numeric_ui <- function(id) { #' @param selected_concept Reactive value containing the selected concept, used for filtering #' #' @noRd -mod_stat_numeric_server <- function(id, data, selected_concept) { +mod_summary_stat_server <- function(id, data, selected_concept) { stopifnot(is.data.frame(data)) stopifnot(is.reactive(selected_concept)) @@ -40,10 +40,11 @@ mod_stat_numeric_server <- function(id, data, selected_concept) { data[data$concept_id == selected_concept_id(), ] }) - output$stat_numeric_plot <- renderPlot({ - ## Return empty plot if no data is selected + output$summary_stat_plot <- renderPlot({ + ## Return empty plot if no data is selected or if no data is available for the selected concept if (is.null(filtered_summary_stats())) return(NULL) - stat_numeric_plot(filtered_summary_stats(), selected_concept_name()) + if (nrow(filtered_summary_stats()) == 0) return(NULL) + summary_stat_plot(filtered_summary_stats(), selected_concept_name()) }) }) } diff --git a/tests/testthat/test-mod_monthly_count.R b/tests/testthat/test-mod_monthly_count.R index d69763e..24fa019 100644 --- a/tests/testthat/test-mod_monthly_count.R +++ b/tests/testthat/test-mod_monthly_count.R @@ -9,7 +9,7 @@ mock_monthly_counts <- data.frame( # Application-logic tests --------------------------------------------------------------------- mock_concept_row <- reactiveVal() -mock_date_range <- reactiveVal(c("2000-01-01", "2200-12-31")) +mock_date_range <- reactiveVal(c("2019-04-01", "2024-08-01")) test_that("mod_monthly_count_server reacts to changes in the selected concept", { testServer( @@ -62,6 +62,31 @@ test_that("mod_monthly_count_server reacts to changes in the selected date range ) }) +test_that("Date filtering works as expected", { + testServer( + mod_monthly_count_server, + args = list(data = mock_monthly_counts, selected_concept = mock_concept_row, selected_dates = mock_date_range), + { + # We have data for this concept from 2019-04 to 2020-05 + mock_concept_row(list(concept_id = 40213251, concept_name = "test")) # update reactive value + + # Test boundary dates, we only care up to the month level + selected_dates <- c("2019-04-01", "2020-05-01") + mock_date_range(selected_dates) + session$flushReact() + expect_equal(nrow(filtered_monthly_counts()), 3) + + # This checks a previous bug where a row with date_month larger than the date range months + # would always get removed while it should be kept in case the year is within the range + # e.g. 2019-04 should be kept when the range is 2019-01 to 2020-01 + selected_dates2 <- c("2019-01-01", "2020-01-01") + mock_date_range(selected_dates2) + session$flushReact() + expect_equal(nrow(filtered_monthly_counts()), 1) + } + ) +}) + test_that("mod_monthly_count_server generates an empty plot when no row is selected", { testServer( mod_monthly_count_server, @@ -73,6 +98,17 @@ test_that("mod_monthly_count_server generates an empty plot when no row is selec ) }) +test_that("mod_monthly_count_server generates an empty plot when no data is available for the selected concept", { + testServer( + mod_monthly_count_server, + args = list(data = mock_monthly_counts, selected_concept = mock_concept_row, selected_dates = mock_date_range), + { + mock_concept_row(list(concept_id = 9999999, concept_name = "idontexist")) + expect_length(output$monthly_count_plot$coordmap$panels[[1]]$mapping, 0) + } + ) +}) + test_that("module ui works", { ui <- mod_monthly_count_ui(id = "test") golem::expect_shinytaglist(ui) @@ -99,3 +135,8 @@ test_that("monthly_count_plot correctly parses dates", { expect_false(is.null(p$mapping)) expect_false(is.null(p$layers)) }) + +test_that("Date range filtering fails for invalid date range", { + selected_dates <- c("2020-01-01", "2019-01-01") + expect_error(.filter_dates(monthly_counts, selected_dates), "Invalid date range, end date is before start date") +}) diff --git a/tests/testthat/test-mod_stat_numeric.R b/tests/testthat/test-mod_summary_stat.R similarity index 55% rename from tests/testthat/test-mod_stat_numeric.R rename to tests/testthat/test-mod_summary_stat.R index 85a6591..301c6d2 100644 --- a/tests/testthat/test-mod_stat_numeric.R +++ b/tests/testthat/test-mod_summary_stat.R @@ -9,9 +9,9 @@ mock_stats <- data.frame( # Application-logic tests --------------------------------------------------------------------- mock_concept_row <- reactiveVal() -test_that("mod_stat_numeric_server reacts to changes in the selected concept", { +test_that("mod_summary_stat_server reacts to changes in the selected concept", { testServer( - mod_stat_numeric_server, + mod_summary_stat_server, # Add here your module params args = list(data = mock_stats, selected_concept = mock_concept_row), { @@ -35,22 +35,34 @@ test_that("mod_stat_numeric_server reacts to changes in the selected concept", { ) }) -test_that("mod_stat_numeric_server generates an empty plot when no row is selected", { +test_that("mod_summary_stat_server generates an empty plot when no row is selected", { testServer( - mod_stat_numeric_server, + mod_summary_stat_server, args = list(data = mock_stats, selected_concept = reactiveVal(NULL)), { # When no concept_id is selected, no plot should be rendered - expect_length(output$stat_numeric_plot$coordmap$panels[[1]]$mapping, 0) + expect_length(output$summary_stat_plot$coordmap$panels[[1]]$mapping, 0) + } + ) +}) + +test_that("mod_summary_stat_server generates an empty plot when no data is available for the selected concept", { + testServer( + mod_summary_stat_server, + args = list(data = mock_stats, selected_concept = mock_concept_row), + { + mock_concept_row(list(concept_id = 9999999, concept_name = "idontexist")) + session$flushReact() + expect_length(output$summary_stat_plot$coordmap$panels[[1]]$mapping, 0) } ) }) test_that("module ui works", { - ui <- mod_stat_numeric_ui(id = "test") + ui <- mod_summary_stat_ui(id = "test") golem::expect_shinytaglist(ui) # Check that formals have not been removed - fmls <- formals(mod_stat_numeric_ui) + fmls <- formals(mod_summary_stat_ui) for (i in c("id")) { expect_true(i %in% names(fmls)) } @@ -59,20 +71,22 @@ test_that("module ui works", { # Business-logic tests ------------------------------------------------------------------------ -test_that("stat_numeric_plot correctly processes data", { +test_that("summary_stat_plot correctly processes data", { # GIVEN: a data frame with summary statistics that still needs to be processed before plotting - # WHEN: stat_numeric_plot is called with this data + # WHEN: summary_stat_plot is called with this data # THEN: the data is first processed correctly and a plot is generated without errors mock_stats <- mock_stats[mock_stats$concept_id == 40213251, ] expected_data <- data.frame(concept_id = 40213251, mean = 1.5, sd = 0.5) - p <- stat_numeric_plot(mock_stats, plot_title = "test") + p <- summary_stat_plot(mock_stats, plot_title = "test") expect_identical(as.data.frame(p$data), expected_data) + expect_s3_class(p, "ggplot") + expect_true(inherits(p$layers[[1]]$geom, "GeomBoxplot")) }) -test_that("stat_numeric_plot only works for a single concept", { +test_that("summary_stat_plot only works for a single concept", { # GIVEN: a data frame with summary statistics for multiple concepts - # WHEN: stat_numeric_plot is called with this data + # WHEN: summary_stat_plot is called with this data # THEN: an error is thrown because the function only works for a single concept mock_stats <- data.frame( concept_id = rep(c(40213251, 40213252), each = 2), @@ -81,5 +95,26 @@ test_that("stat_numeric_plot only works for a single concept", { value_as_number = c(1.5, 0.5, 2.5, 0.7) ) - expect_error(stat_numeric_plot(mock_stats, plot_title = "test"), "Expecting a single concept ID") + expect_error(summary_stat_plot(mock_stats, plot_title = "test"), "Expecting a single concept ID") +}) + +test_that("summary_stat_plot works for categorical concepts", { + # GIVEN: a data frame with summary statistics for a categorical concept + # WHEN: summary_stat_plot is called with this data + # THEN: the data is processed correctly and a plot is generated without errors + mock_stats <- data.frame( + concept_id = rep(1234567, 3), + summary_attribute = rep("frequency", 3), + value_as_string = paste0("cat_", seq(3)), + value_as_number = c(42, 23, 68) + ) + expected_plot_data <- mock_stats + expected_plot_data$value_as_string <- factor(expected_plot_data$value_as_string, + levels = c("cat_3", "cat_1", "cat_2") + ) + + p <- summary_stat_plot(mock_stats, plot_title = "test") + expect_identical(as.data.frame(p$data), expected_plot_data) + expect_s3_class(p, "ggplot") + expect_true(inherits(p$layers[[1]]$geom, "GeomBar")) })