From 1f0a16b3ac37c8b5d711897cf2f3f1d63ff4bd17 Mon Sep 17 00:00:00 2001 From: Milan Malfait <38256462+milanmlft@users.noreply.github.com> Date: Wed, 20 Nov 2024 16:16:03 +0100 Subject: [PATCH] Properly test the quarterly level summaries --- preprocessing/R/monthly_counts.R | 5 +- .../tests/testthat/test-monthly_counts.R | 62 +++++++++++++++---- 2 files changed, 53 insertions(+), 14 deletions(-) diff --git a/preprocessing/R/monthly_counts.R b/preprocessing/R/monthly_counts.R index 988e35d..0062848 100644 --- a/preprocessing/R/monthly_counts.R +++ b/preprocessing/R/monthly_counts.R @@ -98,8 +98,9 @@ summarise_counts <- function(omop_table, concept_col, date_col, level) { ) if (level == "quarterly") { - # NOTE: lubridate::quarter is supported for postgres back-ends, but not sqlite - omop_table$date_quarter <- as.integer(lubridate::quarter(.data[[date_col]])) + # NOTE: lubridate::quarter is not supported for all SQL back-ends + omop_table <- omop_table |> + dplyr::mutate(date_quarter = as.integer(lubridate::quarter(.data[[date_col]]))) } omop_table |> diff --git a/preprocessing/tests/testthat/test-monthly_counts.R b/preprocessing/tests/testthat/test-monthly_counts.R index f7a8d97..9476077 100644 --- a/preprocessing/tests/testthat/test-monthly_counts.R +++ b/preprocessing/tests/testthat/test-monthly_counts.R @@ -8,6 +8,20 @@ test_that("generate_monthly_counts works on a CDM object", { )) }) +test_that("generate_monthly_counts can generate quarterly counts from CDM object", { + quarterly_counts <- generate_monthly_counts(mock_cdm, threshold = 0, replacement = 0, level = "quarterly") + expect_s3_class(quarterly_counts, "data.frame") + expect_true(nrow(quarterly_counts) > 0) + expect_named(quarterly_counts, c( + "concept_id", "concept_name", "date_year", "date_quarter", "record_count", + "person_count", "records_per_person" + )) + + ## Sanity check date_quarter + expect_type(quarterly_counts$date_quarter, "integer") + expect_true(all(quarterly_counts$date_quarter >= 1 & quarterly_counts$date_quarter <= 4)) +}) + test_that("generate_monthly_counts replaces low-frequency values", { threshold <- 5 replacement <- 0.5 @@ -20,26 +34,50 @@ test_that("generate_monthly_counts replaces low-frequency values", { ## Set up a mock measurement OMOP table ## Measurements for 3 different patients on the same day, with 1 patient having 2 measurements -mock_measurement <- data.frame( - measurement_id = 1:4, - person_id = c(1, 1, 2, 3), - measurement_type_concept_id = 12345, - measurement_concept_id = 1, - measurement_date = "2020-01-01", - value_as_number = c(2, 1, 2, 1), - value_as_concept_id = 0 -) +generate_mock_measurements <- function(dates, n_persons) { + grid <- expand.grid(measurement_date = as.Date(dates), person_id = seq_len(n_persons)) + data.frame( + grid, + measurement_id = seq_len(nrow(grid)), + measurement_type_concept_id = 12345, + measurement_concept_id = 1, + value_as_number = 0, + value_as_concept_id = 0 + ) +} +mock_measurement <- generate_mock_measurements("2020-01-01", 3) test_that("summarise_counts produces the expected results at monthly level", { res <- summarise_counts(mock_measurement, "measurement_concept_id", "measurement_date", level = "monthly") + expect_s3_class(res, "data.frame") expect_named(res, c("concept_id", "date_year", "date_month", "record_count", "person_count", "records_per_person")) expect_equal(nrow(res), 1) + + expect_equal(res$record_count, 3) expect_equal(res$person_count, 3) - expect_equal(res$records_per_person, 4 / 3) + expect_equal(res$records_per_person, 1) }) -con <- duckdb::dbConnect(duckdb::duckdb()) +test_that("summarise_counts produces the expected results at quarterly level", { + mock_measurement <- generate_mock_measurements( + dates = c("2012-03-26", "2012-05-04", "2012-09-23", "2012-12-31"), + n_persons = 3 + ) + res <- summarise_counts(mock_measurement, "measurement_concept_id", "measurement_date", level = "quarterly") + + expect_s3_class(res, "data.frame") + expect_named(res, c("concept_id", "date_year", "date_quarter", "record_count", "person_count", "records_per_person")) + expect_equal(nrow(res), 4) + + expect_equal(res$date_quarter, c(1, 2, 3, 4)) + + expect_equal(res$person_count, rep(3, 4)) + expect_equal(res$records_per_person, rep(1, 4)) + expect_equal(res$record_count, rep(3, 4)) +}) + +con <- connect_to_db(duckdb::duckdb()) duckdb::duckdb_register(con, "measurement", mock_measurement) db_measurement <- dplyr::tbl(con, "measurement") test_that("summarise_counts works on Database-stored tables at monthly level", { @@ -59,7 +97,7 @@ test_that("summarise_counts works on Database-stored tables at quarterly level", db_res <- summarise_counts(db_measurement, "measurement_concept_id", "measurement_date", level = "quarterly") expect_s3_class(db_res, "data.frame") - expect_named(db_res, c("concept_id", "date_year", "date_month", "record_count", "person_count", "records_per_person")) + expect_named(db_res, c("concept_id", "date_year", "date_quarter", "record_count", "person_count", "records_per_person")) expect_type(db_res$record_count, "integer") expect_type(db_res$person_count, "integer") expect_type(db_res$records_per_person, "double")