From 47850964141469d3b878cdcda67de68cfc52e784 Mon Sep 17 00:00:00 2001
From: Dan Chaltiel <15105152+DanChaltiel@users.noreply.github.com>
Date: Tue, 2 Apr 2024 20:09:58 +0200
Subject: [PATCH] clinical utils (WIP)

---
 DESCRIPTION                  |   1 +
 NAMESPACE                    |  14 +++
 R/ae_table_grade.R           | 179 +++++++++++++++++++++++++++++++
 R/ae_table_soc.R             | 197 +++++++++++++++++++++++++++++++++++
 R/data.R                     |  40 +++++++
 R/utils.R                    |   6 ++
 R/waterfall_plot.R           |  66 ++++++++++++
 _pkgdown.yml                 |   8 +-
 man/ae_table_grade_max.Rd    |  77 ++++++++++++++
 man/ae_table_grade_n.Rd      |  55 ++++++++++
 man/ae_table_soc.Rd          |  70 +++++++++++++
 man/data_example.Rd          |  65 ++++++------
 man/waterfall_plot.Rd        |  38 +++++++
 tests/testthat/helper-init.R |   1 +
 14 files changed, 784 insertions(+), 33 deletions(-)
 create mode 100644 R/ae_table_grade.R
 create mode 100644 R/ae_table_soc.R
 create mode 100644 R/waterfall_plot.R
 create mode 100644 man/ae_table_grade_max.Rd
 create mode 100644 man/ae_table_grade_n.Rd
 create mode 100644 man/ae_table_soc.Rd
 create mode 100644 man/waterfall_plot.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index d2fd92d..682ec47 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -32,6 +32,7 @@ Imports:
     lifecycle
 Suggests: 
     callr,
+    crosstable,
     gtools,
     htmlwidgets,
     janitor,
diff --git a/NAMESPACE b/NAMESPACE
index 8282b81..7ad1daf 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -3,10 +3,16 @@
 S3method(print,tm_database)
 S3method(summary,common_cols)
 export("%>%")
+export(ae_plot_grade_max)
+export(ae_table_grade_max)
+export(ae_table_grade_n)
+export(ae_table_soc)
+export(as_flextable.ae_table_soc)
 export(assert_no_duplicate)
 export(build_lookup)
 export(check_subjid)
 export(edc_example)
+export(edc_example_ae)
 export(edc_example_mixed)
 export(edc_example_plot)
 export(edc_options)
@@ -34,6 +40,7 @@ export(save_plotly)
 export(split_mixed_datasets)
 export(tibble)
 export(unify)
+export(waterfall_plot)
 importFrom(cli,cat_rule)
 importFrom(cli,cli_abort)
 importFrom(cli,cli_bullets)
@@ -50,6 +57,7 @@ importFrom(dplyr,bind_rows)
 importFrom(dplyr,count)
 importFrom(dplyr,desc)
 importFrom(dplyr,filter)
+importFrom(dplyr,full_join)
 importFrom(dplyr,group_by)
 importFrom(dplyr,left_join)
 importFrom(dplyr,mutate)
@@ -58,6 +66,7 @@ importFrom(dplyr,na_if)
 importFrom(dplyr,pull)
 importFrom(dplyr,relocate)
 importFrom(dplyr,rename)
+importFrom(dplyr,rename_with)
 importFrom(dplyr,rowwise)
 importFrom(dplyr,select)
 importFrom(dplyr,slice)
@@ -68,10 +77,14 @@ importFrom(dplyr,ungroup)
 importFrom(forcats,as_factor)
 importFrom(ggplot2,aes)
 importFrom(ggplot2,facet_wrap)
+importFrom(ggplot2,geom_bar)
 importFrom(ggplot2,geom_line)
 importFrom(ggplot2,geom_point)
 importFrom(ggplot2,ggplot)
 importFrom(ggplot2,labs)
+importFrom(ggplot2,scale_x_continuous)
+importFrom(ggplot2,theme)
+importFrom(ggplot2,waiver)
 importFrom(glue,glue)
 importFrom(haven,read_xpt)
 importFrom(labelled,var_label)
@@ -129,6 +142,7 @@ importFrom(stringr,str_starts)
 importFrom(stringr,str_subset)
 importFrom(stringr,str_trim)
 importFrom(tibble,as_tibble)
+importFrom(tibble,deframe)
 importFrom(tibble,lst)
 importFrom(tibble,tibble)
 importFrom(tidyr,pivot_longer)
diff --git a/R/ae_table_grade.R b/R/ae_table_grade.R
new file mode 100644
index 0000000..8d62d6c
--- /dev/null
+++ b/R/ae_table_grade.R
@@ -0,0 +1,179 @@
+
+
+# Grade maximum -------------------------------------------------------------------------------
+
+
+#' Summary tables for AE by grade max
+#' 
+#' The function `ae_table_grade_max()` creates a summary table of the maximum AE grade experienced per each patient. 
+#' The resulting crosstable can be piped to `as_flextable()` to get a nicely formatted flextable.
+#' 
+#' The function `ae_plot_grade_max()` creates summary plots of maximum AE grades in up to 3 different ways. 
+#'
+#'
+#' @param df input data, one row per event
+#' @param arm name of the treatment column in `df`. Case-insensitive. Can be set to `NULL` to not group.
+#' @param soc name of the SOC column in `df`. Case-insensitive. Grade will be considered 0 is missing.
+#' @param subjid,grade names of the other relevant columns in `df`. Case-insensitive. 
+#' @param total whether to add totals
+#' @param digits number of signigicant digits for percentages
+#'
+#' @return a crosstable (dataframe)
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' 
+#' tm = edc_example_ae()
+#' ae_table_grade_max(df_ae=tm$ae, df_enrol=tm$enrolres)
+#' ae_plot_grade_max(df_ae=tm$ae, df_enrol=tm$enrolres)
+#' 
+#' # 1) Apply table functions
+#' #you can use as_flextable() to get an HTML flextable
+#' #you can use modificators modificators from the flextable package
+#' ae_table_grade_max(df_ae=ae, df_enrol=enrolres, arm=NULL) %>% 
+#'   add_footer_lines("Percentages are given as the proportion of patients presenting at most one AE of given grade")
+#' ae_table_grade_max(df_ae=ae, df_enrol=enrolres) %>%
+#'   as_flextable(by_header="Both arms") %>% 
+#'   highlight(i=~variable=="Grade 5", j=-1)
+#'   
+#' # 2) Apply plot functions
+#' #you can choose the type
+#' #you can use modificators from the patchwork package, like "&"
+#' ae_plot_grade_max(df_ae=ae, df_enrol=enrolres) & labs(fill="Group")
+#' ae_plot_grade_max(df_ae=ae, df_enrol=enrolres, type=c("dodge", "fill"))
+#' ae_plot_grade_max(df_ae=ae, df_enrol=enrolres, arm=NULL) + coord_flip()
+#' }
+ae_table_grade_max = function(
+    df_ae, df_enrol, 
+    arm="ARM", subjid="SUBJID", soc="AESOC", grade="AEGR", total=TRUE, digits=0
+){
+  
+  
+  df_ae = df_ae %>% rename_with(tolower) %>%
+    select(subjid=tolower(subjid), soc=tolower(soc), grade=tolower(grade))
+  df = df_enrol %>% rename_with(tolower) %>%
+    select(subjid=tolower(subjid), arm=tolower(arm)) %>%
+    full_join(df_ae, by=tolower(subjid)) %>% 
+    arrange(subjid) %>% 
+    mutate(grade = ifelse(is.na(soc), 0, grade))
+  
+  df %>% 
+    summarise(grade_max = max_narm(grade), .by=c(subjid, arm)) %>% 
+    mutate(grade_max = ifelse(is.na(grade_max), "NA", paste("Grade", grade_max))) %>% 
+    crosstable::apply_labels(grade_max = "Max grade") %>% 
+    crosstable::crosstable(grade_max, by=arm, total=total, percent_digits=digits, margin="col") 
+}
+
+#' @rdname ae_table_grade_max
+#' @return a patchwork of ggplots
+#' @importFrom ggplot2 aes geom_bar ggplot labs scale_x_continuous theme waiver
+#' @export
+ae_plot_grade_max = function(
+    df_ae, df_enrol, type = c("stack", "dodge", "fill"),
+    arm="ARM", subjid="SUBJID", soc="AESOC", grade="AEGR"
+){
+  
+  df_ae = df_ae %>% rename_with(tolower) %>%
+    select(subjid=tolower(subjid), soc=tolower(soc), grade=tolower(grade)) 
+  x = df_enrol %>% rename_with(tolower) %>%
+    select(subjid=tolower(subjid), arm=tolower(arm)) %>%
+    full_join(df_ae, by=tolower(subjid)) %>% 
+    arrange(subjid) %>% 
+    mutate(grade = ifelse(is.na(soc), 0, grade)) %>% 
+    summarise(grade_max = max_narm(grade), .by=c(subjid, arm)) %>% 
+    mutate(grade_max = ifelse(is.na(grade_max), "NA", paste("Grade", grade_max)))
+  if(is.null(arm)) type="stack"
+  p_list = type %>% set_names() %>% 
+    map(~{
+      y_lab = if(.x=="fill") "Proportion" else "Count"
+      p = x %>% 
+        ggplot(aes(y=grade_max, fill=arm, by=factor(grade_max))) +
+        geom_bar(position=.x) +
+        scale_x_continuous(labels = if(.x=="fill") scales::percent else waiver()) +
+        labs(y="Max AE grade experienced", x=y_lab, fill="Treatment")
+      # StatProp = ggstats:::StatProp
+      # if(.x=="fill") p = 
+      #   p + geom_text(stat="prop", position = position_fill(.5))
+      p
+    })
+  
+  patchwork::wrap_plots(p_list) + 
+    patchwork::plot_layout(guides="collect") & 
+    theme(legend.position="top")
+  
+}
+# Nb of grades --------------------------------------------------------------------------------
+
+
+#' Title
+#' 
+#' @param df_ae adverse event table, one row per AE, containing subjid, soc, and grade
+#' @param df_enrol enrollment table, one row per patient, containing subjid (and arm if needed)
+#' @inheritParams ae_table_grade_max
+#'
+#' @return a crosstable
+#' @importFrom dplyr arrange count filter full_join rename_with select
+#' @importFrom tibble deframe
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' library(flextable)
+#' tm = edc_example_ae()
+#' load_list(tm)
+#' ae_table_grade_n(df_ae=ae, df_enrol=enrolres) %>% 
+#'   as_flextable() %>% 
+#'   add_footer_lines("¹ Percentages are given as the proportion of patients presenting at least one AE of given grade")
+#' 
+#' ae_table_grade_n(df_ae=ae, df_enrol=enrolres, arm=NULL) %>% 
+#'   af(by_header=F) %>% 
+#'   set_header_labels(values=c("","N (%)")) 
+#'   
+#' #To get SAE only, filter df_ae first
+#' ae %>% filter(AESER=="1-Yes") %>% ae_table_grade_n(df_enrol=enrolres, arm=NULL)
+#' }
+ae_table_grade_n = function(
+    df_ae, df_enrol, 
+    arm="ARM", grade="AEGR", subjid="SUBJID", soc="AESOC",
+    total=FALSE, digits=0
+){
+  df_ae = df_ae %>% rename_with(tolower) %>%
+    select(subjid=tolower(subjid), soc=tolower(soc), grade=tolower(grade))
+  df = df_enrol %>% rename_with(tolower) %>%
+    select(subjid=tolower(subjid), arm=tolower(arm)) %>%
+    full_join(df_ae, by=tolower(subjid)) %>% 
+    arrange(subjid) %>% 
+    filter(!is.na(soc)) 
+  
+  default_arm = "All patients" 
+  # `:=` = rlang::`:=`
+  npat = rlang::int(!!default_arm:=nrow(df_enrol)) 
+  if(!is.null(arm)){
+    npat = deframe(count(df_enrol, !!ensym(arm)))
+    npat["Total"] = sum(npat)
+  }
+  total = if(total) "row" else FALSE 
+  
+  if(!any(names(df)=="arm")) df$arm=default_arm %>% set_label("Treatment arm")
+  rtn = df %>% 
+    distinct(subjid, arm, grade) %>% 
+    mutate(arm) %>%
+    mutate(grade = ifelse(is.na(grade), "NA", paste("Grade", grade)) %>% copy_label_from(grade)) %>% 
+    crosstable::crosstable(grade, by=arm, total=total,
+               percent_pattern=crosstable::get_percent_pattern("none")) %>% 
+    mutate(across(-(.id:variable), function(x){
+      x = as.numeric(x)
+      tot = npat[cur_column()]
+      p = format_fixed(x/tot, digits, percent=TRUE)
+      paste0(x, " (", p, ")")
+    }))
+  attr(rtn, "by_table")[] = npat[names(npat)!="Total"]
+  rtn
+}
+
+# ae_table_grade_n(ae, df_enrol=enrolres)
+# ae_table_grade_n(ae, df_enrol=enrolres, total=TRUE)
+# ae_table_grade_n(ae, df_enrol=enrolres, arm=NULL)
+# ae_table_grade_n(ae, df_enrol=enrolres, arm=NULL, total=TRUE)
+# ae %>% filter(AESER=="1-Yes") %>% ae_table_grade_n(df_enrol=enrolres, total=TRUE) %>% af
diff --git a/R/ae_table_soc.R b/R/ae_table_soc.R
new file mode 100644
index 0000000..d9f236b
--- /dev/null
+++ b/R/ae_table_soc.R
@@ -0,0 +1,197 @@
+
+#TODO min_percent=1 -> n minimal for percents ?
+#TODO total by arm OK, total total aussi?
+#TODO vline dans as_flextable ?
+
+#' Summary tables for AE by SOC
+#' 
+#' The function `ae_table_soc()` creates a summary table of maximum AE grades for each patient according to term and SOC CTCAE. 
+#' The resulting dataframe can be piped to `as_flextable()` to get a nicely formatted flextable.
+#' 
+#'
+#' @param df_ae adverse event table, one row per AE, containing subjid, soc, and grade
+#' @param df_enrol enrollment table, one row per patient, containing subjid (and arm if needed)
+#' @param arm,term name of the treatment and term columns in `df`. Case-insensitive. Can be set to `NULL`.
+#' @param subjid,soc,grade names of the other relevant columns in `df`. Case-insensitive. 
+#' @param sort_by_ae should the table be sorted by number or alphabetically
+#' @param total should there be a `total` column for each arm
+#' @param digits significant digits for percentages
+#'
+#' @return a dataframe (`ae_table_soc()`) or a flextable (`as_flextable()`).
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' # 1) Create data
+#' #enrolres = enrollment table, with one row per patient
+#' #ae = adverse event table, with one row per AE
+#' 
+#' # 2) Apply functions
+#' #the resulting flextable can be customized using the flextable package
+#' ae_table_soc(ae, enrolres, total=FALSE) %>% 
+#'   as_flextable() %>% flextable::hline(i=~soc=="" & soc!=lead(soc))
+#' ae_table_soc(ae, enrolres, term=NULL, sort_by_ae=FALSE) %>% 
+#'   as_flextable() %>% flextable::hline()
+#' ae_table_soc(ae, enrolres, arm=NULL) %>% 
+#'   as_flextable()
+#' }
+ae_table_soc = function(
+    df_ae, df_enrol, 
+    arm="ARM", term="AETERM", soc="AESOC", grade="AEGR", subjid="SUBJID",
+    sort_by_ae=TRUE, total=TRUE, digits=0, warn_miss=FALSE
+){
+  
+  null_term = is.null(term)
+  null_arm = is.null(arm)
+  
+  not_found1 = lst(term, soc, grade, subjid) %>% discard(is.null) %>% discard(~.x %in% names(df_ae))
+  not_found2 = lst(arm, subjid) %>% discard(is.null) %>% discard(~.x %in% names(df_enrol))
+  not_found = c(not_found1, not_found2)
+  if(length(not_found)>0){
+    a = paste0(names(not_found), "='", not_found, "'")
+    cli_abort("AE columns not found in {.arg df}: {.val {a}}",
+              class="edc_ae_cols_notfound_error")
+  }
+  
+  df_ae = df_ae %>% rename_with(tolower) %>%
+    select(subjid=tolower(subjid), soc=tolower(soc), term=tolower(term), grade=tolower(grade))
+  df = df_enrol %>% rename_with(tolower) %>%
+    select(subjid=tolower(subjid), arm=tolower(arm)) %>%
+    full_join(df_ae, by=tolower(subjid)) %>% 
+    filter(!is.na(soc))  %>% 
+    arrange(subjid)
+  
+  #check missing data
+  if(warn_miss){
+    miss = names(df) %>% set_names() %>% map(~df %>% filter(is.na(!!ensym(.x))) %>% pull(subjid) %>% unique() %>% sort()) %>% keep(~!is_empty(.x))
+    miss %>% iwalk(~{
+      cli_warn("{.fn ae_table_soc}: Missing values in column {.val {.y}} for patients {.val {.x}}.",
+               class="edc_ae_missing_values_warning")
+    })
+  }
+  
+  max_na = function(x, na.rm=TRUE) if(all(is.na(x))) NA else max(x, na.rm=na.rm)
+  df = df %>% 
+    summarise(grade=max_na(grade), 
+              .by=c(subjid, arm, soc, term))
+  
+  rtn = df %>% count(arm, soc, term, grade=as.character(grade))
+  
+  if(total){
+    rtn = rtn %>% 
+      bind_rows(
+        count(df, arm, soc, term, grade="Tot")
+      )
+  }
+  
+  if(!null_arm){
+    n_patients = count(df, arm, name="n_arm")
+    rtn = rtn %>% left_join(n_patients, by="arm")
+    header = n_patients %>% 
+      transmute(name=janitor::make_clean_names(arm),
+                value=glue("{arm} (N={n_arm})") %>% as.character()) %>% 
+      deframe()
+  } else {
+    n_patients = as.numeric(n_patients)
+    rtn = rtn %>% mutate(arm="all", n_arm=nrow(df_enrol))
+    header = glue("All patients (N={n_patients})") %>% set_names("all")
+  }
+  
+  rtn =
+    rtn %>% 
+    arrange(arm, soc, term, grade) %>% 
+    mutate(
+      n_soc=sum(n[grade!="Tot"], na.rm=TRUE),
+      .by=soc
+    ) %>% 
+    mutate(
+      n_term=sum(n[grade!="Tot"], na.rm=TRUE),
+      .by=term
+    ) %>% 
+    mutate(
+      n2 = glue("{n} ({format_fixed(100*n/n_arm,digits)}%)"),
+      .by=arm,
+    ) %>% 
+    mutate(
+      soc = as.character(soc),
+      grade2 = paste0(fct_relabel(arm, janitor::make_clean_names), "_G", grade),
+      grade2 = grade2 %>% str_replace("_GNA", "_NA") %>% str_replace("_GTot", "_Tot")
+    ) %>% 
+    arrange(grade2) %>%
+    select(-arm, -grade, -n, -n_arm) %>% 
+    pivot_wider(id_cols=c("soc", if(!null_term) c("term", "n_term"), "n_soc"), 
+                names_from="grade2", values_from="n2") %>% 
+    arrange(soc)
+  # browser()
+  
+  if(sort_by_ae){
+    rtn = rtn %>% arrange(desc(n_soc), if(!null_term) desc(n_term))
+  }
+  rtn = rtn %>% 
+    select(-n_soc, -any_of("n_term")) %>% 
+    mutate(
+      soc=if_else(!is.na(lag(soc)) & soc==lag(soc), "", soc),
+    )
+  
+  class(rtn) = c("ae_table_soc", class(rtn))
+  attr(rtn, "header") = header
+  rtn
+}
+
+# https://coolors.co/palette/dbe5f1-b8cce4-f2dcdb-e5b9b7-ebf1dd-d7e3bc-e5e0ec-ccc1d9-dbeef3-b7dde8
+#' Turns an `ae_table_soc` object into a formatted `flextable`
+#'
+#' @param x a dataframe, resulting of `ae_table_soc()`
+#' @param arm_colors colors for the arm groups
+#'
+#' @return a formatted flextable
+#' @rdname ae_table_soc
+#' @export
+as_flextable.ae_table_soc = function(x, arm_colors=c("#f2dcdb", "#dbe5f1", "#ebf1dd", "#e5e0ec")
+){
+  table_ae_header = attr(x, "header")
+  arm_cols = names(table_ae_header) %>% set_names() %>% map_int(~sum(str_starts(names(x), .x)))
+  
+  col1 = min(which(str_detect(names(x), "G1"))) - 1 #moche mais marche...
+  colwidths = c(col1, arm_cols)
+  header_labels = set_names(names(x)) %>% map(~str_replace_all(.x, ".*_", ""))
+  header_labels$soc = "CTCAE SOC"
+  header_labels$term = "CTCAE v4.0 Term"
+  
+  rtn = x %>%
+    flextable() %>%
+    set_header_labels(values=header_labels) %>%
+    add_header_row(values=c(" ", table_ae_header), colwidths = colwidths) %>%
+    align(i=1, part="header", align="center") %>%
+    align(j=seq(col1), part="all", align="right") %>%
+    padding(padding.top=0, padding.bottom=0) %>%
+    autofit() %>% 
+    fontsize(size=8, part="all") %>%
+    bold(part="header")
+  
+  a = cumsum(colwidths)[-1]
+  for(i in seq_along(a)){
+    from = lag(a, default=col1)[i] + 1
+    to = a[i]
+    rtn = rtn %>% bg(j=seq(from, to), bg = arm_colors[i], part="all")
+  }
+  
+  rtn
+}
+
+#TESTS
+if(FALSE){
+  library(testthat)
+  ae_desc %>% 
+    ae_table_soc(n_patients=table(enrolres$ARM), 
+                 arm="ARsM", term="AETEeRM", soc="AEtSOC", grade="AEGeR", subjid="SUBaJID") %>% 
+    expect_error()
+  
+}
+
+
+
+# ae_table_soc(ae, enrolres, arm=NULL) %>% as_flextable()
+# ae_table_soc(ae, enrolres, term=NULL) %>% as_flextable()
+# ae_table_soc(ae, enrolres, arm=NULL) %>% print()
+# ae_table_soc(ae, enrolres, term=NULL) %>% print()
diff --git a/R/data.R b/R/data.R
index a66f414..f0293c4 100644
--- a/R/data.R
+++ b/R/data.R
@@ -78,3 +78,43 @@ edc_example_plot = function(N=50, seed=42){
 #' @rdname data_example
 #' @export
 edc_example = edc_example_plot
+
+
+#' @rdname data_example
+#' @export
+#' @importFrom dplyr bind_rows mutate n select
+#' @importFrom stats rnorm runif
+#' @importFrom tibble lst tibble
+edc_example_ae = function(N=50, seed=42){
+  set.seed(seed)
+  
+  enrolres = tibble(subjid=1:N, arm=sample(c("Trt", "Ctl"), size=N, replace=TRUE))
+  ae = tibble(subjid=1:N, n_ae=rbinom(n=N, size=15, prob=0.2)) %>% 
+    mutate(x = map(n_ae, ~seq_len(.x))) %>% 
+    unnest(x) %>% 
+    mutate(aegr = sample(1:5, size=n(), replace=TRUE, prob=c(0.3,0.25,0.2,0.1,0.05)),
+           aesoc = sample(sample_soc, size=n(), replace=TRUE))
+  
+  
+  
+  # .lookup = tibble(dataset=paste0("db", 0:3))
+  # rtn$.lookup=build_lookup(rtn) %>% extend_lookup()
+  rtn = lst(enrolres, ae) %>% 
+    imap(~.x %>% mutate(crfname=.y %>% set_label("Form name")))
+  # rtn$.lookup = build_lookup(rtn) %>% extend_lookup()
+  rtn$.lookup=build_lookup(rtn)
+  set_lookup(rtn$.lookup)
+  rtn
+}
+
+sample_soc = c("Gastrointestinal disorders", "General disorders and administration site conditions", 
+        "Renal and urinary disorders", "Blood and lymphatic system disorders", 
+        "Reproductive system and breast disorders", "Infections and infestations", 
+        "Investigations", "Metabolism and nutrition disorders", "Skin and subcutaneous tissue disorders", 
+        "Ear and labyrinth disorders", "Nervous system disorders", "Musculoskeletal and connective tissue disorders", 
+        "Vascular disorders", "Endocrine disorders", "Respiratory, thoracic and mediastinal disorders", 
+        "Psychiatric disorders", "Hepatobiliary disorders", "Cardiac disorders", 
+        "Immune system disorders", "Injury, poisoning and procedural complications", 
+        "Eye disorders", "Neoplasms benign, malignant and unspecified (incl cysts and polyps)", 
+        "Surgical and medical procedures")
+
diff --git a/R/utils.R b/R/utils.R
index 1d811a4..3e25e78 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -177,3 +177,9 @@ set_label = function(x, lab){
   attr(x, "label") = lab
   x
 }
+
+
+
+max_narm = function(x, na.rm=TRUE) if(all(is.na(x))) NA else max(x, na.rm=na.rm)
+min_narm = function(x, na.rm=TRUE) if(all(is.na(x))) NA else min(x, na.rm=na.rm)
+
diff --git a/R/waterfall_plot.R b/R/waterfall_plot.R
new file mode 100644
index 0000000..9877582
--- /dev/null
+++ b/R/waterfall_plot.R
@@ -0,0 +1,66 @@
+
+
+
+#' Generate a waterfall plot
+#'
+#' @param data_recist 
+#' @param rc_date 
+#' @param rc_sum 
+#' @param rc_resp 
+#' @param arm 
+#' @param warn_missing 
+#'
+#' @return
+#' @export
+#'
+#' @examples
+#' waterfall_plot(rc, rc_date=RCDT, rc_sum=RCTLSUM, rc_resp=RCRESP)
+#' rc %>% 
+#'   left_join(arm, by="SUBJID") %>%
+#'   waterfall_plot(rc_date=RCDT, rc_sum=RCTLSUM, rc_resp=RCRESP, arm=ARM) +
+#'   NULL
+waterfall_plot = function(data_recist, rc_date, rc_sum, rc_resp, arm=NULL, warn_missing=TRUE) {
+  subjid = get_key_cols()$patient_id
+  armname =  as_label(enquo(arm))
+  
+  data_recist = data_recist %>% 
+    select(subjid=!!sym(subjid), date={{rc_date}}, sum={{rc_sum}}, resp={{rc_resp}}, 
+           arm=any_of(armname))
+  
+  db_wf = data_recist %>%
+    filter(!is.na(sum)) %>%
+    # filter(!is.na(resp)) %>%
+    distinct() %>% 
+    summarise(
+      arm = unify(arm),
+      first_date = min(date),
+      first_sum = first(sum, order_by=date), 
+      last_date = max(date),
+      last_sum = last(sum, order_by=date), 
+      final_resp = last(resp, order_by=date),
+      min_sum = min(sum),
+      diff_first = (last_sum - first_sum)/first_sum,
+      diff_min = (last_sum - min_sum)/min_sum,
+      .by=subjid
+    ) %>% 
+    mutate(subjid = fct_reorder(factor(subjid), diff_first, .desc=TRUE))
+  
+  #TODO gérer les missings selon ce qu'on prend comme data dans la macro
+  
+  # missings = db_wf %>% summarise(across(-subjid, anyNA)) %>% unlist()
+  # missings2 = data_recist %>% summarise(across(-subjid, anyNA)) %>% unlist()
+  # if(any(missings) & warn_missing) {
+  #   cli_warn(c("Missing values, the waterfall plot will be incomplete."))
+  # }
+  
+  p = db_wf %>%
+    ggplot(aes(x=subjid, y=diff_first, group=subjid, fill=final_resp)) +
+    geom_hline(yintercept=c(-.3, .2), linetype="dashed") +
+    geom_col(color='black') +
+    scale_x_discrete(labels = NULL, breaks = NULL) + 
+    scale_y_continuous(labels=label_percent(), limits=c(-1, .5)) +
+    labs(x = "", y="Percent change from baseline", fill="Final global response \n(RECIST v1.1)")
+  
+  if(!missing(arm)) p = p + facet_wrap(~arm, scales="free_x", ncol=1)
+  p
+}
diff --git a/_pkgdown.yml b/_pkgdown.yml
index def179b..ec98e7f 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -46,9 +46,13 @@ reference:
   - get_lookup
   - extend_lookup
   - build_lookup
-- title: "List Utils"
+- title: "Clinical Utils"
 - contents:
-  - load_list
+  - ae_table_grade_max
+  - ae_plot_grade_max
+  - ae_table_grade_n
+  - ae_table_soc
+  - ae_table_grade_max
 - title: "List Utils"
 - contents:
   - load_list
diff --git a/man/ae_table_grade_max.Rd b/man/ae_table_grade_max.Rd
new file mode 100644
index 0000000..320880d
--- /dev/null
+++ b/man/ae_table_grade_max.Rd
@@ -0,0 +1,77 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ae_table_grade.R
+\name{ae_table_grade_max}
+\alias{ae_table_grade_max}
+\alias{ae_plot_grade_max}
+\title{Summary tables for AE by grade max}
+\usage{
+ae_table_grade_max(
+  df_ae,
+  df_enrol,
+  arm = "ARM",
+  subjid = "SUBJID",
+  soc = "AESOC",
+  grade = "AEGR",
+  total = TRUE,
+  digits = 0
+)
+
+ae_plot_grade_max(
+  df_ae,
+  df_enrol,
+  type = c("stack", "dodge", "fill"),
+  arm = "ARM",
+  subjid = "SUBJID",
+  soc = "AESOC",
+  grade = "AEGR"
+)
+}
+\arguments{
+\item{arm}{name of the treatment column in \code{df}. Case-insensitive. Can be set to \code{NULL} to not group.}
+
+\item{subjid, grade}{names of the other relevant columns in \code{df}. Case-insensitive.}
+
+\item{soc}{name of the SOC column in \code{df}. Case-insensitive. Grade will be considered 0 is missing.}
+
+\item{total}{whether to add totals}
+
+\item{digits}{number of signigicant digits for percentages}
+
+\item{df}{input data, one row per event}
+}
+\value{
+a crosstable (dataframe)
+
+a patchwork of ggplots
+}
+\description{
+The function \code{ae_table_grade_max()} creates a summary table of the maximum AE grade experienced per each patient.
+The resulting crosstable can be piped to \code{as_flextable()} to get a nicely formatted flextable.
+}
+\details{
+The function \code{ae_plot_grade_max()} creates summary plots of maximum AE grades in up to 3 different ways.
+}
+\examples{
+\dontrun{
+
+tm = edc_example_ae()
+ae_table_grade_max(df_ae=tm$ae, df_enrol=tm$enrolres)
+ae_plot_grade_max(df_ae=tm$ae, df_enrol=tm$enrolres)
+
+# 1) Apply table functions
+#you can use as_flextable() to get an HTML flextable
+#you can use modificators modificators from the flextable package
+ae_table_grade_max(df_ae=ae, df_enrol=enrolres, arm=NULL) \%>\% 
+  add_footer_lines("Percentages are given as the proportion of patients presenting at most one AE of given grade")
+ae_table_grade_max(df_ae=ae, df_enrol=enrolres) \%>\%
+  as_flextable(by_header="Both arms") \%>\% 
+  highlight(i=~variable=="Grade 5", j=-1)
+  
+# 2) Apply plot functions
+#you can choose the type
+#you can use modificators from the patchwork package, like "&"
+ae_plot_grade_max(df_ae=ae, df_enrol=enrolres) & labs(fill="Group")
+ae_plot_grade_max(df_ae=ae, df_enrol=enrolres, type=c("dodge", "fill"))
+ae_plot_grade_max(df_ae=ae, df_enrol=enrolres, arm=NULL) + coord_flip()
+}
+}
diff --git a/man/ae_table_grade_n.Rd b/man/ae_table_grade_n.Rd
new file mode 100644
index 0000000..7407cb2
--- /dev/null
+++ b/man/ae_table_grade_n.Rd
@@ -0,0 +1,55 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ae_table_grade.R
+\name{ae_table_grade_n}
+\alias{ae_table_grade_n}
+\title{Title}
+\usage{
+ae_table_grade_n(
+  df_ae,
+  df_enrol,
+  arm = "ARM",
+  grade = "AEGR",
+  subjid = "SUBJID",
+  soc = "AESOC",
+  total = FALSE,
+  digits = 0
+)
+}
+\arguments{
+\item{df_ae}{adverse event table, one row per AE, containing subjid, soc, and grade}
+
+\item{df_enrol}{enrollment table, one row per patient, containing subjid (and arm if needed)}
+
+\item{arm}{name of the treatment column in \code{df}. Case-insensitive. Can be set to \code{NULL} to not group.}
+
+\item{subjid, grade}{names of the other relevant columns in \code{df}. Case-insensitive.}
+
+\item{soc}{name of the SOC column in \code{df}. Case-insensitive. Grade will be considered 0 is missing.}
+
+\item{total}{whether to add totals}
+
+\item{digits}{number of signigicant digits for percentages}
+}
+\value{
+a crosstable
+}
+\description{
+Title
+}
+\examples{
+\dontrun{
+library(flextable)
+tm = edc_example_ae()
+load_list(tm)
+ae_table_grade_n(df_ae=ae, df_enrol=enrolres) \%>\% 
+  as_flextable() \%>\% 
+  add_footer_lines("¹ Percentages are given as the proportion of patients presenting at least one AE of given grade")
+
+ae_table_grade_n(df_ae=ae, df_enrol=enrolres, arm=NULL) \%>\% 
+  af(by_header=F) \%>\% 
+  set_header_labels(values=c("","N (\%)")) 
+  
+#To get SAE only, filter df_ae first
+ae \%>\% filter(AESER=="1-Yes") \%>\% ae_table_grade_n(df_enrol=enrolres, arm=NULL)
+}
+}
diff --git a/man/ae_table_soc.Rd b/man/ae_table_soc.Rd
new file mode 100644
index 0000000..21ac9bb
--- /dev/null
+++ b/man/ae_table_soc.Rd
@@ -0,0 +1,70 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ae_table_soc.R
+\name{ae_table_soc}
+\alias{ae_table_soc}
+\alias{as_flextable.ae_table_soc}
+\title{Summary tables for AE by SOC}
+\usage{
+ae_table_soc(
+  df_ae,
+  df_enrol,
+  arm = "ARM",
+  term = "AETERM",
+  soc = "AESOC",
+  grade = "AEGR",
+  subjid = "SUBJID",
+  sort_by_ae = TRUE,
+  total = TRUE,
+  digits = 0,
+  warn_miss = FALSE
+)
+
+as_flextable.ae_table_soc(
+  x,
+  arm_colors = c("#f2dcdb", "#dbe5f1", "#ebf1dd", "#e5e0ec")
+)
+}
+\arguments{
+\item{df_ae}{adverse event table, one row per AE, containing subjid, soc, and grade}
+
+\item{df_enrol}{enrollment table, one row per patient, containing subjid (and arm if needed)}
+
+\item{arm, term}{name of the treatment and term columns in \code{df}. Case-insensitive. Can be set to \code{NULL}.}
+
+\item{subjid, soc, grade}{names of the other relevant columns in \code{df}. Case-insensitive.}
+
+\item{sort_by_ae}{should the table be sorted by number or alphabetically}
+
+\item{total}{should there be a \code{total} column for each arm}
+
+\item{digits}{significant digits for percentages}
+
+\item{x}{a dataframe, resulting of \code{ae_table_soc()}}
+
+\item{arm_colors}{colors for the arm groups}
+}
+\value{
+a dataframe (\code{ae_table_soc()}) or a flextable (\code{as_flextable()}).
+
+a formatted flextable
+}
+\description{
+The function \code{ae_table_soc()} creates a summary table of maximum AE grades for each patient according to term and SOC CTCAE.
+The resulting dataframe can be piped to \code{as_flextable()} to get a nicely formatted flextable.
+}
+\examples{
+\dontrun{
+# 1) Create data
+#enrolres = enrollment table, with one row per patient
+#ae = adverse event table, with one row per AE
+
+# 2) Apply functions
+#the resulting flextable can be customized using the flextable package
+ae_table_soc(ae, enrolres, total=FALSE) \%>\% 
+  as_flextable() \%>\% flextable::hline(i=~soc=="" & soc!=lead(soc))
+ae_table_soc(ae, enrolres, term=NULL, sort_by_ae=FALSE) \%>\% 
+  as_flextable() \%>\% flextable::hline()
+ae_table_soc(ae, enrolres, arm=NULL) \%>\% 
+  as_flextable()
+}
+}
diff --git a/man/data_example.Rd b/man/data_example.Rd
index 78cf861..d9037df 100644
--- a/man/data_example.Rd
+++ b/man/data_example.Rd
@@ -1,31 +1,34 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
-\name{data_example}
-\alias{data_example}
-\alias{edc_example_mixed}
-\alias{edc_example_plot}
-\alias{edc_example}
-\title{Example databases}
-\usage{
-edc_example_mixed(N = 100, seed = 42)
-
-edc_example_plot(N = 50, seed = 42)
-
-edc_example(N = 50, seed = 42)
-}
-\arguments{
-\item{N}{the number of patients}
-
-\item{seed}{the random seed}
-}
-\value{
-a list of tables
-}
-\description{
-List of tables used in EDCimport examples:
-\itemize{
-\item \code{edc_example()} can be used as the result of \code{\link[=read_trialmaster]{read_trialmaster()}}
-\item \code{edc_example_plot()} can be used to test \code{\link[=edc_swimmerplot]{edc_swimmerplot()}}
-\item \code{edc_example_mixed()} can be used to test \code{\link[=split_mixed_datasets]{split_mixed_datasets()}}
-}
-}
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\name{data_example}
+\alias{data_example}
+\alias{edc_example_mixed}
+\alias{edc_example_plot}
+\alias{edc_example}
+\alias{edc_example_ae}
+\title{Example databases}
+\usage{
+edc_example_mixed(N = 100, seed = 42)
+
+edc_example_plot(N = 50, seed = 42)
+
+edc_example(N = 50, seed = 42)
+
+edc_example_ae(N = 50, seed = 42)
+}
+\arguments{
+\item{N}{the number of patients}
+
+\item{seed}{the random seed}
+}
+\value{
+a list of tables
+}
+\description{
+List of tables used in EDCimport examples:
+\itemize{
+\item \code{edc_example()} can be used as the result of \code{\link[=read_trialmaster]{read_trialmaster()}}
+\item \code{edc_example_plot()} can be used to test \code{\link[=edc_swimmerplot]{edc_swimmerplot()}}
+\item \code{edc_example_mixed()} can be used to test \code{\link[=split_mixed_datasets]{split_mixed_datasets()}}
+}
+}
diff --git a/man/waterfall_plot.Rd b/man/waterfall_plot.Rd
new file mode 100644
index 0000000..33cde0f
--- /dev/null
+++ b/man/waterfall_plot.Rd
@@ -0,0 +1,38 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/waterfall_plot.R
+\name{waterfall_plot}
+\alias{waterfall_plot}
+\title{Generate a waterfall plot}
+\usage{
+waterfall_plot(
+  data_recist,
+  rc_date,
+  rc_sum,
+  rc_resp,
+  arm = NULL,
+  warn_missing = TRUE
+)
+}
+\arguments{
+\item{data_recist}{}
+
+\item{rc_date}{}
+
+\item{rc_sum}{}
+
+\item{rc_resp}{}
+
+\item{arm}{}
+
+\item{warn_missing}{}
+}
+\description{
+Generate a waterfall plot
+}
+\examples{
+waterfall_plot(rc, rc_date=RCDT, rc_sum=RCTLSUM, rc_resp=RCRESP)
+rc \%>\% 
+  left_join(arm, by="SUBJID") \%>\%
+  waterfall_plot(rc_date=RCDT, rc_sum=RCTLSUM, rc_resp=RCRESP, arm=ARM) +
+  NULL
+}
diff --git a/tests/testthat/helper-init.R b/tests/testthat/helper-init.R
index 47a2206..81d00c0 100644
--- a/tests/testthat/helper-init.R
+++ b/tests/testthat/helper-init.R
@@ -1,6 +1,7 @@
 Sys.setenv(LANGUAGE = "en")
 Sys.setenv(TZ="Europe/Paris")
 
+Sys.setenv("TESTTHAT_CPUS" = 5)
 
 options(
   encoding="UTF-8",