Merge pull request #65 from trias-project/add_indicator_native_range_…

…year Add indicator native range year
trias-project · Sep 16, 2020 · ce5832e · ce5832e
2 parents e758217 + 43a5c84
commit ce5832e
Show file tree

Hide file tree

Showing 25 changed files with 1,734 additions and 1,449 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,13 +1,14 @@
 Package: trias
 Title: Process Data for the Project Tracking Invasive Alien Species (TrIAS)
-Version: 1.3.0.9000
+Version: 1.4.0.9000
 Description: TrIAS provides functionality to facilitate the data processing 
     for the project Tracking Invasive Alien Species (TrIAS 
     <http://trias-project.be>).
 Authors@R: c(
     person("Damiano", "Oldoni", role = c("aut", "cre"), email = "[email protected]", comment = c(ORCID = "0000-0003-3445-7562")),
     person("Peter", "Desmet", role = "aut", email = "[email protected]", comment = c(ORCID = "0000-0002-8442-8025")),
-    person("Stijn", "Van Hoey", role = "ctb", email = "[email protected]", comment = c(ORCID = "0000-0001-6413-3185"))
+    person("Stijn", "Van Hoey", role = "ctb", email = "[email protected]", comment = c(ORCID = "0000-0001-6413-3185")),
+    person("Sander", "Devisscher", role = "ctb", email = "[email protected]", comment = c(ORCID = "0000-0003-2015-5731"))
     )
 License: MIT + file LICENSE
 URL: https://github.com/trias-project/trias, https://trias-project.github.io/trias
@@ -17,6 +18,7 @@ Depends:
 Imports:
     assertthat,
     assertable,
+    data.table,
     dplyr,
     egg,
     forcats,
@@ -25,21 +27,22 @@ Imports:
     lazyeval,
     magrittr,
     mgcv,
+    plotly,
     purrr,
     readr,
-    rgbif,
+    rgbif (>= 3.0),
     rlang,
+    reshape2,
+    scales,
     stringr,
     tibble,
     tidyr,
     tidyselect
 Suggests:
     knitr,
     testthat
-Remotes:
-    ropensci/rgbif
 LazyData: true
 Encoding: UTF-8
 VignetteBuilder: knitr
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.1.0
+RoxygenNote: 7.1.1
diff --git a/NAMESPACE b/NAMESPACE
@@ -7,6 +7,7 @@ export(gbif_has_distribution)
 export(gbif_verify_keys)
 export(get_table_pathways)
 export(indicator_introduction_year)
+export(indicator_native_range_year)
 export(indicator_total_year)
 export(pathways_cbd)
 export(update_download_list)
@@ -51,7 +52,6 @@ importFrom(dplyr,n_distinct)
 importFrom(dplyr,pull)
 importFrom(dplyr,rename)
 importFrom(dplyr,rename_at)
-importFrom(dplyr,right_join)
 importFrom(dplyr,rowwise)
 importFrom(dplyr,sample_n)
 importFrom(dplyr,select)
@@ -80,6 +80,7 @@ importFrom(ggplot2,ggsave)
 importFrom(ggplot2,ggtitle)
 importFrom(ggplot2,scale_colour_manual)
 importFrom(ggplot2,scale_x_continuous)
+importFrom(ggplot2,scale_y_continuous)
 importFrom(ggplot2,theme)
 importFrom(ggplot2,xlab)
 importFrom(ggplot2,ylab)
@@ -90,6 +91,8 @@ importFrom(lazyeval,interp)
 importFrom(mgcv,gam)
 importFrom(mgcv,nb)
 importFrom(mgcv,summary.gam)
+importFrom(plotly,ggplotly)
+importFrom(plotly,layout)
 importFrom(purrr,cross_df)
 importFrom(purrr,map)
 importFrom(purrr,map2)
@@ -100,6 +103,7 @@ importFrom(purrr,map_dfr)
 importFrom(purrr,pmap_dfr)
 importFrom(purrr,reduce)
 importFrom(readr,read_delim)
+importFrom(reshape2,melt)
 importFrom(rgbif,name_lookup)
 importFrom(rgbif,name_usage)
 importFrom(rgbif,occ_download_meta)
@@ -108,6 +112,7 @@ importFrom(rlang,"!!")
 importFrom(rlang,":=")
 importFrom(rlang,.data)
 importFrom(rlang,sym)
+importFrom(scales,percent_format)
 importFrom(stats,formula)
 importFrom(stats,median)
 importFrom(stats,predict)

diff --git a/R/gbif_get_taxa.R b/R/gbif_get_taxa.R
@@ -134,7 +134,7 @@ gbif_get_taxa <- function(
     taxon_keys <- as.integer(taxon_keys[1:maxlimit])
     taxon_keys_df <- as.data.frame(taxon_keys)
     taxon_taxa <- map_dfr(
-      taxon_keys_df$taxon_keys, ~ name_usage(key = ., return = "data")
+      taxon_keys_df$taxon_keys, ~ name_usage(key = .)$data
     )
     taxon_taxa <- taxon_taxa %>%
       ungroup() %>%
@@ -167,17 +167,15 @@ gbif_get_taxa <- function(
           checklist_keys, ~ name_lookup(
             datasetKey = .,
             origin = origins,
-            limit = maxlimit,
-            return = "data"
-          )
+            limit = maxlimit
+          )$data
         )
     } else {
       checklist_taxa <-
         map_dfr(checklist_keys, ~ name_lookup(
           datasetKey = .,
-          limit = maxlimit,
-          return = "data"
-        ))
+          limit = maxlimit
+        )$data)
     }
 
     checklist_taxa <-

diff --git a/R/gbif_has_distribution.R b/R/gbif_has_distribution.R
@@ -97,9 +97,8 @@ gbif_has_distribution <- function(taxon_key, ...) {
   # retrieve distribution properties from GBIF
   distr_properties <- rgbif::name_usage(
     key = as.integer(taxon_key),
-    return = "data",
     data = "distribution"
-  )
+  )$data
 
   # no ditribution properties values specified by user
   if (is.null(names(user_properties))) {

diff --git a/R/gbif_verify_keys.R b/R/gbif_verify_keys.R
@@ -112,7 +112,7 @@ gbif_verify_keys <- function(keys, col_keys = "key") {
   names(keys) <- as.character(keys)
   gbif_info <-
     keys %>%
-    map(~ tryCatch(name_usage(., return = "data")[1, ],
+    map(~ tryCatch(name_usage(.)$data[1, ],
       error = function(e) {
         print(paste("Key", ., "is an invalid GBIF taxon key."))
       }

diff --git a/R/get_table_pathways.R b/R/get_table_pathways.R
@@ -267,15 +267,19 @@ get_table_pathways <- function(df,
     )
   }
   # Join pathways and samples together
-  pathway_data <-
+  if (nrow(pathway_data) == 0) {
+    tibble(
+      pathway_level1 = character(0),
+      pathway_level2 = character(0),
+      n = integer(0),
+      examples = character(0)
+    )
+  } else {
     pathway_data %>%
-    # if pathway_data is empty
-    mutate_if(is.logical, as.character) %>%
-    left_join(samples,
-      by = c("pathway_level1", "pathway_level2")
-    ) %>%
-    select(-.data$size_sample) %>%
-    ungroup()
-
-  pathway_data
+      left_join(samples,
+        by = c("pathway_level1", "pathway_level2")
+      ) %>%
+      select(-.data$size_sample) %>%
+      ungroup()
+  }
 }
diff --git a/R/indicator_introduction_year.R b/R/indicator_introduction_year.R
@@ -201,7 +201,8 @@ indicator_introduction_year <- function(df,
         start_year_plot,
         maxDate,
         x_minor_scale_stepsize
-      )) +
+      )
+    ) +
     coord_cartesian(xlim = c(start_year_plot, maxDate))
 
   if (is.null(facet_column)) {
@@ -232,7 +233,8 @@ indicator_introduction_year <- function(df,
           start_year_plot,
           maxDate,
           x_minor_scale_stepsize
-        )) + 
+        )
+      ) +
       coord_cartesian(xlim = c(start_year_plot, maxDate))
 
     ggarrange(top_graph, facet_graph)

diff --git a/R/indicator_native_range_year.R b/R/indicator_native_range_year.R
@@ -0,0 +1,135 @@
+#' Create an interactive plot for the number of alien species per native region
+#' and year of introduction
+#'
+#' Based on
+#' [countYearProvince](https://github.com/inbo/reporting-rshiny-grofwildjacht/blob/exoten/reporting-grofwild/R/countYearProvince.R)
+#' plot from reporting - rshiny - grofwildjacht
+#' @param data input data.frame.
+#' @param years (numeric) vector years we are interested to. If \code{NULL}
+#'   (default) all years from minimum and maximum of years of first observation
+#'   are taken into account.
+#' @param type character, native_range level of interest should be one of
+#'   \code{c("native_continent", "native_range")}.
+#' @param x_lab character string, label of the x-axis. Default: "year".
+#' @param y_lab character string, label of the y-axis. Default: "number of alien
+#'   species".
+#' @param relative (logical) if TRUE, each bar is standardised before stacking
+#' @param first_observed (character) Name of the column in \code{data} containing temporal information about introduction of the alien species. Expressed as years.
+#' @return list with: \itemize{ \item{'static_plot': }{ggplot object, for a
+#'   given species the observed number per year and per native range is plotted
+#'   in a stacked bar chart} \item{'interactive_plot': }{plotly object, for a
+#'   given species the observed number per year and per native range is plotted
+#'   in a stacked bar chart} \item{'data': }{data displayed in the plot, as
+#'   data.frame with: \itemize{ \item{'year': }{year at which the species were
+#'   introduced} \item{'native_range': }{native range of the introduced species}
+#'   \item{'n': }{number of species introduced from the native range for a given
+#'   year} \item{'total': }{total number of species, from all around the world,
+#'   introduced during a given year} \item{'perc': }{percentage of species
+#'   introduced from the native range for a given year. (n/total)*100} } } }
+#' @export
+#' @importFrom reshape2 melt
+#' @importFrom ggplot2 ggplot geom_bar scale_y_continuous xlab ylab theme element_text
+#' @importFrom plotly ggplotly layout
+#' @importFrom scales percent_format
+#' @importFrom dplyr %>% mutate group_by case_when rename_at
+
+indicator_native_range_year <- function(data, years = NULL,
+                                        type = c("native_continent", "native_range"),
+                                        x_lab = "year",
+                                        y_lab = "alien species",
+                                        relative = FALSE,
+                                        first_observed = "first_observed") {
+  type <- match.arg(type)
+
+  # Rename to default column name
+  data <-
+    data %>%
+    rename_at(vars(first_observed), ~"first_observed")
+
+  if (is.null(years)) {
+    years <- sort(unique(data$first_observed))
+  }
+
+  plotData <- data
+
+  plotData$location <- switch(type,
+    native_range = plotData$native_range,
+    native_continent = plotData$native_continent
+  )
+
+  # Select data
+  plotData <- plotData[plotData$first_observed %in% years, c("first_observed", "location")]
+  plotData <- plotData[!is.na(plotData$first_observed) & !is.na(plotData$location), ]
+
+  # Exclude unused provinces
+  plotData$location <- as.factor(plotData$location)
+  plotData$location <- droplevels(plotData$location)
+
+  # Summarize data per native_range and year
+  plotData$first_observed <- with(plotData, factor(first_observed,
+    levels =
+      min(years):max(years)
+  ))
+
+  summaryData <- melt(table(plotData), id.vars = "first_observed")
+  summaryData <- summaryData %>%
+    group_by(.data$first_observed) %>%
+    mutate(
+      total = sum(.data$value),
+      perc = round((.data$value / .data$total) * 100, 2)
+    )
+
+  # Summarize data per year
+  totalCount <- table(plotData$first_observed)
+
+
+  # For optimal displaying in the plot
+  summaryData$location <- as.factor(summaryData$location)
+  summaryData$location <- factor(summaryData$location, levels = rev(levels(summaryData$location)))
+  summaryData$first_observed <- as.factor(summaryData$first_observed)
+
+
+
+  # Create plot
+
+  if (relative == TRUE) {
+    position <- "fill"
+    text <- paste0(summaryData$location, "<br>", summaryData$perc, "%")
+  } else {
+    position <- "stack"
+    text <- paste0(summaryData$location, "<br>", summaryData$value)
+  }
+
+  pl <- ggplot(data = summaryData, aes(
+    x = .data$first_observed,
+    y = .data$value,
+    fill = .data$location,
+    text = text
+  )) +
+    geom_bar(position = position, stat = "identity") +
+    xlab(x_lab) +
+    ylab(y_lab) +
+    theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
+
+  if (relative == TRUE) {
+    pl <- pl + scale_y_continuous(labels = percent_format())
+  }
+
+  pl_2 <- ggplotly(data = summaryData, pl, tooltip = "text") %>%
+    layout(
+      xaxis = list(title = x_lab, tickangle = "auto"),
+      yaxis = list(title = y_lab, tickformat = ",d"),
+      margin = list(b = 80, t = 100),
+      barmode = ifelse(nlevels(summaryData$first_observed) == 1, "group", "stack")
+    )
+
+  # To prevent warnings in UI
+  pl$elementId <- NULL
+
+  # Change variable name
+  names(summaryData)[names(summaryData) == "value"] <- "n"
+  names(summaryData)[names(summaryData) == "first_observed"] <- "year"
+  names(summaryData)[names(summaryData) == "location"] <- "native_range"
+
+  return(list(static_plot = pl, interactive_plot = pl_2, data = summaryData))
+}
diff --git a/R/indicator_total_year.R b/R/indicator_total_year.R
@@ -195,7 +195,8 @@ indicator_total_year <- function(df, start_year_plot = 1940,
         start_year_plot,
         maxDate,
         x_minor_scale_stepsize
-      )) +
+      )
+    ) +
     coord_cartesian(xlim = c(start_year_plot, maxDate))
 
   if (is.null(facet_column)) {
@@ -220,7 +221,8 @@ indicator_total_year <- function(df, start_year_plot = 1940,
       facet_wrap(facet_column) +
       scale_x_continuous(
         breaks = seq(start_year_plot, maxDate, x_major_scale_stepsize),
-        minor_breaks = seq(start_year_plot, maxDate, x_minor_scale_stepsize)) +
+        minor_breaks = seq(start_year_plot, maxDate, x_minor_scale_stepsize)
+      ) +
       coord_cartesian(xlim = c(start_year_plot, maxDate))
 
     ggarrange(top_graph, facet_graph)