From 21b55c74649eb15912b68ab68b3e9ed17957ef17 Mon Sep 17 00:00:00 2001 From: Sean Kent Date: Sat, 3 Feb 2024 14:22:41 -0600 Subject: [PATCH 1/4] Update `brsmatch()` to handle NA rows * Remove rows with NA that isn't in the `trt_time` column * Minor internal code update to .brsmatch for cleaner code * Add tests to check for this going forward --- R/brsmatch.R | 38 ++++++++++++++++++++------------ tests/testthat/test-brsmatch.R | 40 ++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 14 deletions(-) diff --git a/R/brsmatch.R b/R/brsmatch.R index d5b6a54..246e164 100644 --- a/R/brsmatch.R +++ b/R/brsmatch.R @@ -121,6 +121,20 @@ brsmatch <- function( data[[trt_time]] <- data[[trt_time]] - 1 } + id_list <- unique(data[[id]]) # compute before any NA removal + + # Remove NA rows except those in `trt_time` column, with a warning + na_action <- na.omit(data[, setdiff(colnames(data), trt_time)]) + na_rows <- attributes(na_action)$na.action + if (!is.null(na_rows)) { + rlang::warn(c( + "ID, time, and covariates should not have NA entries.", + i = paste("Removed", length(na_rows), "rows.") + )) + data <- data[-na_rows, ] + } + + if (!is.null(exact_match)) { data_split <- split(data, data[, exact_match, drop = FALSE]) covariates <- setdiff(covariates, exact_match) @@ -151,7 +165,7 @@ brsmatch <- function( ) } - .output_pairs(matched_ids, id = id, id_list = unique(data[[id]])) + .output_pairs(matched_ids, id = id, id_list = id_list) } .brsmatch <- function( @@ -168,27 +182,25 @@ brsmatch <- function( optimizer <- options$optimizer verbose <- options$verbose - if (verbose) { - rlang::inform("Computing distance from data...") + .print_if <- function(condition, message, ...) { + if (condition) { + rlang::inform(message, ...) + } } + + .print_if(verbose, "Computing distance from data...") edges <- .compute_distances(data, id, time, trt_time, covariates, options) bal <- NULL if (balance) { - if (verbose) { - rlang::inform("Building balance columns from data...") - } + .print_if(verbose, "Building balance columns from data...") bal <- .balance_columns(data, id, time, trt_time, balance_covariates) } - if (verbose) { - rlang::inform("Constructing optimization model...") - } + .print_if(verbose, "Constructing optimization model...") model <- .rsm_optimization_model(n_pairs, edges, bal, optimizer, verbose, balance) - if (verbose) { - rlang::inform("Preparing to run optimization model") - } + .print_if(verbose, "Preparing to run optimization model") if (optimizer == "gurobi") { res <- gurobi::gurobi(model, params = list(OutputFlag = 1 * verbose)) matches <- res$x[grepl("f", model$varnames)] @@ -202,8 +214,6 @@ brsmatch <- function( max = model$max, control = list(verbose = verbose, presolve = TRUE) ) - # res <- with(model, Rglpk::Rglpk_solve_LP(obj, mat, dir, rhs, types = types, max = max, - # control = list(verbose = verbose, presolve = TRUE))) matches <- res$solution[grepl("f", model$varnames)] } diff --git a/tests/testthat/test-brsmatch.R b/tests/testthat/test-brsmatch.R index c200e75..d00a674 100644 --- a/tests/testthat/test-brsmatch.R +++ b/tests/testthat/test-brsmatch.R @@ -402,3 +402,43 @@ test_that("`brsmatch()` works for different input values.", { options = list(time_lag = TRUE) ) }) + + +test_that("brsmatch works when some input are NA", { + df1 <- data.frame( + id = rep(1:3, each = 3), + time = rep(1:3, 3), + trt_time = rep(c(2, 3, NA), each = 3), + X1 = c(2, 2, 2, 3, 3, 3, 9, 9, 9), + X2 = rep(c("a", "a", "b"), each = 3), + X3 = c(9, 4, 5, 6, 7, 2, 3, 4, 8), + X4 = c(8, 9, 4, 5, 6, 7, 2, 3, 4) + ) + + check_for_glpk() + pairs1 <- brsmatch(n_pairs = 1, data = df1) + + expect_equal(nrow(pairs1), length(unique(df1$id))) + + # Check when trt type "all" is removed" + df2 <- df1 + df2$X3[5:6] <- NA + + pairs2 <- brsmatch(n_pairs = 1, data = df2) %>% + expect_warning("should not have NA") + + expect_equal(nrow(pairs2), length(unique(df1$id))) + + # Check when trt type "trt" is removed" + df3 <- df1 + df3$X1[1:3] <- NA + + pairs3 <- brsmatch(n_pairs = 1, data = df3) %>% + expect_warning("should not have NA") + + expect_equal(nrow(pairs3), length(unique(df1$id))) + + # NOTE: this still isn't graceful if the NA removes too many rows, but we + # can't hold everyone's hand all the time... +}) + From ad9add1a71c2bdac3241f5f694a81663e958e063 Mon Sep 17 00:00:00 2001 From: Sean Kent Date: Sat, 3 Feb 2024 14:34:09 -0600 Subject: [PATCH 2/4] Update `coxpsmatch()` to handle NA rows * Same fix as brsmatch function * Add testing --- R/coxpsmatch.R | 15 +++++++++++- tests/testthat/test-coxpsmatch.R | 41 ++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/R/coxpsmatch.R b/R/coxpsmatch.R index 151c072..892ea56 100644 --- a/R/coxpsmatch.R +++ b/R/coxpsmatch.R @@ -73,6 +73,19 @@ coxpsmatch <- function( data[[trt_time]] <- as.numeric(data[[trt_time]]) } + id_list <- unique(data[[id]]) # compute before any NA removal + + # Remove NA rows except those in `trt_time` column, with a warning + na_action <- na.omit(data[, setdiff(colnames(data), trt_time)]) + na_rows <- attributes(na_action)$na.action + if (!is.null(na_rows)) { + rlang::warn(c( + "ID, time, and covariates should not have NA entries.", + i = paste("Removed", length(na_rows), "rows.") + )) + data <- data[-na_rows, ] + } + if (!is.null(exact_match)) { balance_split <- split(data, data[, exact_match, drop = FALSE]) matches <- NULL @@ -94,7 +107,7 @@ coxpsmatch <- function( } colnames(matches)[1:2] <- c("trt_id", "all_id") - return(.output_pairs(matches, id = id, id_list = unique(data[[id]]))) + return(.output_pairs(matches, id = id, id_list = id_list)) } #' Propensity Score Matching with Time-Dependent Covariates diff --git a/tests/testthat/test-coxpsmatch.R b/tests/testthat/test-coxpsmatch.R index c9d060e..bd6f8e9 100644 --- a/tests/testthat/test-coxpsmatch.R +++ b/tests/testthat/test-coxpsmatch.R @@ -131,3 +131,44 @@ test_that("`coxpsmatch()` works when there are no never-treated individuals", { "ghost value" ) }) + + +test_that("coxpsmatch works when some input are NA", { + df1 <- data.frame( + id = rep(1:3, each = 3), + time = rep(1:3, 3), + trt_time = rep(c(2, 3, NA), each = 3), + X1 = c(2, 2, 2, 3, 3, 3, 9, 9, 9), + X2 = rep(c("a", "a", "b"), each = 3), + X3 = c(9, 4, 5, 6, 7, 2, 3, 4, 8), + X4 = c(8, 9, 4, 5, 6, 7, 2, 3, 4) + ) + + check_for_coxpsmatch_packages() + + pairs1 <- coxpsmatch(n_pairs = 1, data = df1) %>% + expect_warning() + + expect_equal(nrow(pairs1), length(unique(df1$id))) + + # Check when trt type "all" is removed" + df2 <- df1 + df2$X3[5:6] <- NA + + pairs2 <- coxpsmatch(n_pairs = 1, data = df2) %>% + expect_warning("should not have NA") + + expect_equal(nrow(pairs2), length(unique(df1$id))) + + # Check when trt type "trt" is removed" + df3 <- df1 + df3$X1[1:3] <- NA + + pairs3 <- brsmatch(n_pairs = 1, data = df3) %>% + expect_warning("should not have NA") + + expect_equal(nrow(pairs3), length(unique(df1$id))) + + # NOTE: this still isn't graceful if the NA removes too many rows, but we + # can't hold everyone's hand all the time... +}) From 11ee8b7e9ef1e701b2995b557ce74a060f31cf1d Mon Sep 17 00:00:00 2001 From: Sean Kent Date: Sat, 3 Feb 2024 14:37:04 -0600 Subject: [PATCH 3/4] Increment version and NEWS.md --- DESCRIPTION | 2 +- NEWS.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 420007d..f4263bc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: rsmatch Title: Matching Methods for Time-Varying Observational Studies -Version: 0.2.0.9000 +Version: 0.2.0.9001 Authors@R: c( person("Sean", "Kent", , "skent259@gmail.com", role = c("aut", "cre", "cph"), comment = c(ORCID = "0000-0001-8697-9069")), diff --git a/NEWS.md b/NEWS.md index fc469b0..f635acc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,5 @@ # rsmatch (development version) +* Update `brsmatch()` and `coxpsmatch()` to handle NA rows via removing them # rsmatch 0.2.0 From 91f0b0f16f30416e06f65828610d548625aeff62 Mon Sep 17 00:00:00 2001 From: Sean Kent Date: Sat, 3 Feb 2024 14:41:43 -0600 Subject: [PATCH 4/4] fix Namespace issue --- R/brsmatch.R | 2 +- R/coxpsmatch.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/brsmatch.R b/R/brsmatch.R index 246e164..b209843 100644 --- a/R/brsmatch.R +++ b/R/brsmatch.R @@ -124,7 +124,7 @@ brsmatch <- function( id_list <- unique(data[[id]]) # compute before any NA removal # Remove NA rows except those in `trt_time` column, with a warning - na_action <- na.omit(data[, setdiff(colnames(data), trt_time)]) + na_action <- stats::na.omit(data[, setdiff(colnames(data), trt_time)]) na_rows <- attributes(na_action)$na.action if (!is.null(na_rows)) { rlang::warn(c( diff --git a/R/coxpsmatch.R b/R/coxpsmatch.R index 892ea56..93e3a08 100644 --- a/R/coxpsmatch.R +++ b/R/coxpsmatch.R @@ -76,7 +76,7 @@ coxpsmatch <- function( id_list <- unique(data[[id]]) # compute before any NA removal # Remove NA rows except those in `trt_time` column, with a warning - na_action <- na.omit(data[, setdiff(colnames(data), trt_time)]) + na_action <- stats::na.omit(data[, setdiff(colnames(data), trt_time)]) na_rows <- attributes(na_action)$na.action if (!is.null(na_rows)) { rlang::warn(c(