Skip to content

Commit

Permalink
Improve functions so that they can save historical datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
truenomad committed Mar 19, 2024
1 parent 60c1249 commit d10c18f
Show file tree
Hide file tree
Showing 9 changed files with 140 additions and 7 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,7 @@ Suggests:
jsonlite,
mockery,
withr,
httpcode
httpcode,
cli,
stringr
Config/testthat/edition: 3
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@ export(get_api_date_suffix)
export(get_polis_api_data)
export(iterative_api_call)
export(process_api_response)
export(save_polis_data)
export(update_polis_api_data)
export(write_log_file_api)
19 changes: 19 additions & 0 deletions R/get_api_date_suffix.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,24 @@ get_api_date_suffix <- function(data_type) {
sub_activ = "SubActivity", lqas = "Lqas"
)

# Define date fields for each data type
# for initial data
date_fields_initial <- c(
cases = "ParalysisOnsetDate",
virus = "VirusDate",
population = "CreatedDate",
env = "CollectionDate",
geo = "CreatedDate",
geo_synonym = "UpdatedDate",
im = "PublishDate",
activity = "ActivityDateFrom",
lab_specimen_virus = "PublishDate",
lab_specimen = "LastUpdateDate",
sub_activ = "DateFrom", lqas = "Start"
)

# Define date fields for each data type
# for updated data
date_fields <- c(
cases = "LastUpdateDate",
virus = "UpdatedDate", population = "UpdatedDate",
Expand All @@ -53,6 +71,7 @@ get_api_date_suffix <- function(data_type) {
# Return endpoint suffix and date field
list(
endpoint_suffix = endpoint_suffixes[[data_type]],
date_fields_initial = date_fields_initial[[data_type]],
date_field = date_fields[[data_type]]
)
}
18 changes: 15 additions & 3 deletions R/get_polis_api_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,15 @@
#' (Human Specimen Viruses). Default is 'cases'.
#' @param region Region code for data filtering.
#' Represents the WHO region from which to retrieve the data.
#' Default is 'AFRO' (African Region).
#' Possible values are AFRO; AMRO; EMRO; EURO; SEARO; WPRO Use
#' 'Global' to retrieve global data. Default is 'AFRO'.
#' @param select_vars Vector of variables to select from the API response.
#' If NULL (default), all variables are selected.
#' @param polis_api_key API key for authentication.
#' Default is retrieved from the environment variable
#' 'POLIS_API_KEY'. An explicit API key can be provided
#' if required.
#'
#' @param updated_dates Logical indicating whether to use the 'LastUpdateDate'
#' @return A data frame containing the requested data aggregated from all pages
#' of the API response. Each row represents a record, and columns
#' correspond to the variables in the dataset.
Expand All @@ -49,13 +50,24 @@ get_polis_api_data <- function(min_date,
data_type = "cases",
region = "AFRO",
select_vars = NULL,
updated_dates = TRUE,
polis_api_key) {

# API Endpoint and URL Construction
api_endpoint <- "https://extranet.who.int/polis/api/v2/"
endpoint_suffix <- get_api_date_suffix(data_type)$endpoint_suffix

# set up the dates
date_field <- get_api_date_suffix(data_type)$date_field
if (updated_dates) {
date_field <- get_api_date_suffix(data_type)$date_field
} else {
date_field <- get_api_date_suffix(data_type)$date_fields_initial
}

# set up region field
if (tolower(region) == "global") {
region_field <- NULL
}

# set up region field name
region_field <- if (data_type == "virus") "RegionName" else "WHORegion"
Expand Down
56 changes: 56 additions & 0 deletions R/save_polis_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#' Save Polis Data to compressed RDS File
#'
#' This function saves a given POLIS data object to an RDS file using a
#' specific naming convention based on the current date. It also manages the
#' retention of only the 5 most recent datasets in the specified directory,
#' removing older datasets if necessary.
#'
#' @param polis_data The POLIS data object to be saved.
#' @param polis_path The directory path where the RDS file will be saved. This
#' function will check this directory for existing datasets and will maintain
#' only the 5 most recent datasets, deleting older ones.
#' @param max_datasets The max number of datasets to retain in the directory.
#'
#' @return Invisible NULL. This function is used for its side effect of
#' saving a file and potentially deleting older files, rather than for
#' returning a value.
#'
#' @examples
#' # Assume `polis_data` is your dataset and `./polis_datasets` is your
#' # target directory
#' # save_polis_data(polis_data, "./polis_datasets")
#'
#' @export
save_polis_data <- function(polis_data, polis_path,
filname, max_datasets = 5) {

cli::cli_process_start("Saving POLIS data into a compressed RDS file.")

# generate the file name based on the current date
suffix_name <- sprintf("_%s.rds", format(Sys.Date(), "%Y_%V"))
full_path <- file.path(polis_path, paste0(filname, suffix_name))

# save polis list
saveRDS(polis_data, full_path, compress = "xz")

cli::cli_process_done( )

# Check existing datasets and keep only the 5 most recent
existing_files <- list.files(polis_path, full.names = TRUE)

if (length(existing_files) > 5) {
# Sort files by date, assuming the naming convention holds the date info
file_dates <- sapply(existing_files, function(x) {
as.Date(stringr::str_extract(x, "\\d{4}_\\d{2}"), "%Y_%V")
})

oldest_files <- existing_files[order(
file_dates)][1:(length(existing_files)-5)]

cli::cli_alert_success(
"Removing {length(oldest_files)} old file(s) to keep top {max_datasets}.")

suppressMessages(file.remove(oldest_files))
}
cli::cli_process_done( )
}
1 change: 1 addition & 0 deletions R/update_polis_api_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ update_polis_api_data <- function(min_date,
save_directly = FALSE,
log_results = FALSE,
polis_api_key = NULL) {

# Construct file names for data and log
data_file_name <- paste0(file_path, "/", data_type, "_polis_data.rds")
log_file_name <- paste0(file_path, "/", "polis_data_update_log.xlsx")
Expand Down
6 changes: 5 additions & 1 deletion man/get_polis_api_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 34 additions & 0 deletions man/save_polis_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 6 additions & 2 deletions tests/testthat/test-get_api_date_suffix.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
testthat::test_that("Get API Date Suffix Functionality", {
# Test for valid data types
expect_equal(get_api_date_suffix("cases"),
list(endpoint_suffix = "Case", date_field = "LastUpdateDate"))
list(endpoint_suffix = "Case",
date_fields_initial = "ParalysisOnsetDate",
date_field = "LastUpdateDate"))
expect_equal(get_api_date_suffix("virus"),
list(endpoint_suffix = "Virus", date_field = "UpdatedDate"))
list(endpoint_suffix = "Virus",
date_fields_initial = "VirusDate",
date_field = "UpdatedDate"))
# Add more tests for other valid data types

# Test for an invalid data type
Expand Down

0 comments on commit d10c18f

Please sign in to comment.