Skip to content

Commit

Permalink
Reduce test data size
Browse files Browse the repository at this point in the history
  • Loading branch information
milanmlft committed Aug 23, 2024
1 parent 9997728 commit 92b91e4
Show file tree
Hide file tree
Showing 7 changed files with 79 additions and 24,215 deletions.
36 changes: 27 additions & 9 deletions dev/test_db/produce_test_data.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
cli::cli_h1("Producing test data")

library(dplyr)
suppressPackageStartupMessages({
library(dplyr)
})

dir <- Sys.getenv("EUNOMIA_DATA_FOLDER")
name <- Sys.getenv("TEST_DB_NAME")
Expand All @@ -13,21 +15,37 @@ con <- DBI::dbConnect(
withr::defer(DBI::dbDisconnect(con))

# Function to write results from a table to the test data folder
write_results <- function(con, table) {
read_table <- function(con, table) {
schema <- Sys.getenv("TEST_DB_RESULTS_SCHEMA")
# Get all rows from the table
query <- glue::glue("SELECT * FROM {schema}.{table}")
# Run the query and write results
path <- here::here(glue::glue("inst/test_data/{table}.csv"))
cli::cli_alert_info("Writing {table} to {path}")
con |>
DBI::dbGetQuery(query) |>
arrange(across(everything())) |>
readr::write_csv(file = path)
DBI::dbGetQuery(query)
}

# Write all results to the test data folder
# Get the relevant tables and filter
table_names <- c("calypso_concepts", "calypso_monthly_counts", "calypso_summary_stats")
purrr::walk(table_names, write_results, con = con)
tables <- purrr::map(table_names, read_table, con = con)
names(tables) <- table_names

# Keep only concepts for which we have summary statistics
keep_concepts <- tables$calypso_summary_stats$concept_id
tables <- purrr::map(tables, ~ .x[.x$concept_id %in% keep_concepts, ])

# Keep only data from 2019 onwards
monthly_counts <- tables$calypso_monthly_counts
filtered_monthly <- monthly_counts[monthly_counts$date_year >= 2019, ]
tables$calypso_monthly_counts <- filtered_monthly

# Filter the other tables to match the concepts left over after year filtering
tables <- purrr::map(tables, ~ .x[.x$concept_id %in% filtered_monthly$concept_id, ])

# Write all results to the test data folder
purrr::iwalk(tables, function(tbl, name) {
path <- here::here(glue::glue("inst/test_data/{name}.csv"))
cli::cli_alert_info("Writing {name} to {path}")
readr::write_csv(tbl, file = path)
})

cli::cli_alert_success("Test data produced")
56 changes: 4 additions & 52 deletions inst/test_data/calypso_concepts.csv
Original file line number Diff line number Diff line change
@@ -1,58 +1,10 @@
concept_id,concept_name,vocabulary_id,domain_id,concept_class_id,standard_concept,concept_code
313217,Atrial fibrillation,SNOMED,Condition,Clinical Finding,S,49436004
317576,Coronary arteriosclerosis,SNOMED,Condition,Clinical Finding,S,53741008
321042,Cardiac arrest,SNOMED,Condition,Clinical Finding,S,410429000
381316,Cerebrovascular accident,SNOMED,Condition,Clinical Finding,S,230690007
1310034,3 ML amiodarone hydrochloride 50 MG/ML Prefilled Syringe,RxNorm,Drug,Quant Clinical Drug,S,834357
1332419,amlodipine 5 MG Oral Tablet,RxNorm,Drug,Clinical Drug,S,197361
1539411,simvastatin 20 MG Oral Tablet,RxNorm,Drug,Clinical Drug,S,312961
1545959,atorvastatin 80 MG Oral Tablet,RxNorm,Drug,Clinical Drug,S,259255
3001079,Blood group antibody screen [Presence] in Serum or Plasma,LOINC,Measurement,Lab Test,S,890-4
4057420,Catheter ablation of tissue of heart,SNOMED,Procedure,Procedure,S,18286008
4078793,Direct current cardioversion,SNOMED,Procedure,Procedure,S,180325003
4108450,Inspiration/expiration time ratio,SNOMED,Measurement,Observable Entity,S,250822000
4128111,T - Tumor stage,SNOMED,Observation,Attribute,S,260878002
4202451,Percutaneous mechanical thrombectomy of portal vein using fluoroscopic guidance,SNOMED,Procedure,Procedure,S,433112001
4216130,Percutaneous coronary intervention,SNOMED,Procedure,Procedure,S,415070008
4216746,Positive end expiratory pressure setting,SNOMED,Observation,Observable Entity,S,416595007
4217646,Implantable defibrillator,SNOMED,Device,Physical Object,S,72506001
4230911,Echocardiography,SNOMED,Procedure,Procedure,S,40701008
4248525,Lying systolic blood pressure,SNOMED,Measurement,Observable Entity,S,407556006
4257036,Antenatal hepatitis B blood screening test status,SNOMED,Observation,Clinical Finding,S,408823004
4329847,Myocardial infarction,SNOMED,Condition,Clinical Finding,S,22298006
4336464,Coronary artery bypass graft,SNOMED,Procedure,Procedure,S,232717009
4353713,Positive end expiratory pressure,SNOMED,Observation,Observable Entity,S,250854009
4353717,Ventilator delivered minute volume,SNOMED,Observation,Observable Entity,S,250875001
4353843,Invasive systolic arterial pressure,SNOMED,Measurement,Observable Entity,S,251071003
4354252,Non-invasive systolic arterial pressure,SNOMED,Measurement,Observable Entity,S,251070002
19018935,digoxin 0.125 MG Oral Tablet,RxNorm,Drug,Clinical Drug,S,197604
19075601,clopidogrel 75 MG Oral Tablet,RxNorm,Drug,Clinical Drug,S,309362
19126352,nitroglycerin 0.4 MG/ACTUAT Mucosal Spray,RxNorm,Drug,Clinical Drug,S,705129
19133768,acetaminophen 750 MG / hydrocodone bitartrate 7.5 MG Oral Tablet,RxNorm,Drug,Clinical Drug,S,833036
40163554,warfarin sodium 5 MG Oral Tablet,RxNorm,Drug,Clinical Drug,S,855332
40171902,verapamil hydrochloride 40 MG,RxNorm,Drug,Clinical Drug Comp,S,897718
40213154,"Influenza, seasonal, injectable, preservative free",CVX,Drug,CVX,S,140
40213160,"poliovirus vaccine, inactivated",CVX,Drug,CVX,S,10
40213180,"meningococcal polysaccharide (groups A, C, Y and W-135) diphtheria toxoid conjugate vaccine (MCV4P)",CVX,Drug,CVX,S,114
40213183,"measles, mumps and rubella virus vaccine",CVX,Drug,CVX,S,03
40213198,"pneumococcal conjugate vaccine, 13 valent",CVX,Drug,CVX,S,133
40213201,"pneumococcal polysaccharide vaccine, 23 valent",CVX,Drug,CVX,S,33
40213216,"rotavirus, live, monovalent vaccine",CVX,Drug,CVX,S,119
40213227,"tetanus and diphtheria toxoids, adsorbed, preservative free, for adult use (5 Lf of tetanus toxoid and 2 Lf of diphtheria toxoid)",CVX,Drug,CVX,S,113
40213230,"tetanus toxoid, reduced diphtheria toxoid, and acellular pertussis vaccine, adsorbed",CVX,Drug,CVX,S,115
40213251,varicella virus vaccine,CVX,Drug,CVX,S,21
40213260,"zoster vaccine, live",CVX,Drug,CVX,S,121
40213281,"diphtheria, tetanus toxoids and acellular pertussis vaccine",CVX,Drug,CVX,S,20
40213296,"hepatitis A vaccine, adult dosage",CVX,Drug,CVX,S,52
40213299,"hepatitis A vaccine, pediatric/adolescent dosage, 2 dose schedule",CVX,Drug,CVX,S,83
40213304,"hepatitis B vaccine, pediatric or pediatric/adolescent dosage",CVX,Drug,CVX,S,08
40213306,"hepatitis B vaccine, adult dosage",CVX,Drug,CVX,S,43
40213314,"Haemophilus influenzae type b vaccine, PRP-OMP conjugate",CVX,Drug,CVX,S,49
40213320,"human papilloma virus vaccine, quadrivalent",CVX,Drug,CVX,S,62
40220960,alteplase 100 MG Injection,RxNorm,Drug,Clinical Drug,S,1804799
40243436,1 ML atropine sulfate 1 MG/ML Injection,RxNorm,Drug,Quant Clinical Drug,S,1190795
40492359,Insertion of biventricular implantable cardioverter defibrillator,SNOMED,Procedure,Procedure,S,447365002
4353843,Invasive systolic arterial pressure,SNOMED,Measurement,Observable Entity,S,251071003
4128111,T - Tumor stage,SNOMED,Observation,Attribute,S,260878002
3001079,Blood group antibody screen [Presence] in Serum or Plasma,LOINC,Measurement,Lab Test,S,890-4
4248525,Lying systolic blood pressure,SNOMED,Measurement,Observable Entity,S,407556006
45766147,Appearance,SNOMED,Observation,Observable Entity,S,703248002
45767945,Coronary artery stent,SNOMED,Device,Physical Object,S,705643001
45772840,Implantable cardiac pacemaker,SNOMED,Device,Physical Object,S,706004007
46275916,1 ML epinephrine 1 MG/ML Injection,RxNorm,Drug,Quant Clinical Drug,S,1660014
Loading

0 comments on commit 92b91e4

Please sign in to comment.