From a82fe2fc00f2d802ec442be6131ffff261103227 Mon Sep 17 00:00:00 2001 From: Laura Brenskelle <160157051+laurabrenskelle@users.noreply.github.com> Date: Wed, 18 Sep 2024 14:57:13 -0400 Subject: [PATCH] Updating OBIS EOV file --- .../2024-09-13-OBIS_EOVs.ipynb | 218 +++++++----------- 1 file changed, 78 insertions(+), 140 deletions(-) diff --git a/jupyterbook/content/code_gallery/data_analysis_and_visualization_notebooks/2024-09-13-OBIS_EOVs.ipynb b/jupyterbook/content/code_gallery/data_analysis_and_visualization_notebooks/2024-09-13-OBIS_EOVs.ipynb index 975f28d4..d42ae8e5 100644 --- a/jupyterbook/content/code_gallery/data_analysis_and_visualization_notebooks/2024-09-13-OBIS_EOVs.ipynb +++ b/jupyterbook/content/code_gallery/data_analysis_and_visualization_notebooks/2024-09-13-OBIS_EOVs.ipynb @@ -7,38 +7,22 @@ "source": [ "# Using aphiaIDs to download OBIS occurrences for Bio/Eco EOVs\n", "\n", - "Created: 2024-09-13" + "Created: 2024-09-13\n", + "\n", + "The [IOOS Marine Life Data Network](https://ioos.github.io/marine_life_data_network/) has developed a list of aphiaIDs for taxa mentioned in the [GOOS Biology and Ecosystems Variables](https://goosocean.org/what-we-do/framework/essential-ocean-variables/). These lists are meant to assist in querying OBIS to evaluate the abundance and distribution of these taxa. Here is an R notebook of how this might work using mangroves as an example." ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "390bf2be-3828-4705-b64b-ed68005ff98b", "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "Attaching package: 'dplyr'\n", - "\n", - "\n", - "The following objects are masked from 'package:stats':\n", - "\n", - " filter, lag\n", - "\n", - "\n", - "The following objects are masked from 'package:base':\n", - "\n", - " intersect, setdiff, setequal, union\n", - "\n", - "\n" - ] + "scrolled": true, + "vscode": { + "languageId": "r" } - ], + }, + "outputs": [], "source": [ "library(gh)\n", "library(readr)\n", @@ -47,153 +31,95 @@ "library(htmlwidgets)" ] }, + { + "cell_type": "markdown", + "id": "3089302a", + "metadata": {}, + "source": [ + "First, we will pull the file with the mangrove aphiaIDs from the Marine Life Data Network GitHub repo. **Note**: the acceptedTaxonIds in these files are based on what was up-to-date in the [WoRMS](https://marinespecies.org/) database as of the date this script was written." + ] + }, { "cell_type": "code", "execution_count": 5, "id": "43964864-7fe8-4139-a9d0-2a844161ccd1", - "metadata": {}, + "metadata": { + "vscode": { + "languageId": "r" + } + }, "outputs": [], "source": [ - "# first we will pull the files where the EOV taxonomy are stored from GitHub\n", - "repo_files <- gh(\"GET /repos/:owner/:repo/contents/:path\",\n", - " owner = \"ioos\",\n", - " repo = \"marine_life_data_network\",\n", - " path = \"eov_taxonomy\")\n", - "\n", - "for (file_info in repo_files) {\n", - " if (file_info$type == \"file\") {\n", - " download.file(file_info$download_url, destfile = file_info$name)\n", - " }\n", - "}" + "mangroves <- read.csv(\"https://raw.githubusercontent.com/ioos/marine_life_data_network/main/eov_taxonomy/mangroves.csv\")" + ] + }, + { + "cell_type": "markdown", + "id": "45fa26db", + "metadata": {}, + "source": [ + "Now we will do a bit of cleanup to get a list of aphiaIDs for mangroves so we can run our `robis` query using these as taxon identifiers." ] }, { "cell_type": "code", "execution_count": 3, "id": "0bb6ab17-d239-410c-90d0-dcb50359b095", - "metadata": {}, + "metadata": { + "vscode": { + "languageId": "r" + } + }, "outputs": [], "source": [ - "# let's try mangroves as our first example\n", - "# we will open the mangroves taxonomy CSV that we downloaded and grab the taxonIDs\n", - "mangroves <- read.csv(\"mangroves.csv\")\n", "mangroves$ID <- gsub(\"urn:lsid:marinespecies.org:taxname.\", \"\", mangroves$acceptedTaxonId)\n", "mangroves$ID <- as.numeric(mangroves$ID)\n", "mangroveIdentifiers <- paste(mangroves$ID, collapse = \", \")" ] }, + { + "cell_type": "markdown", + "id": "8a6cefa9", + "metadata": {}, + "source": [ + "Using the taxonIDs from the last step, let's query OBIS for occurrence data for mangroves. This step may take a bit of time to run. When this script was written, there were over 130,000 records for mangroves in OBIS." + ] + }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "d9aa2697-4ca0-4631-8d77-b0407e7a73b5", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Retrieved 5000 records of approximately 132901 (3%)\n", - "Retrieved 10000 records of approximately 132901 (7%)\n", - "Retrieved 15000 records of approximately 132901 (11%)\n", - "Retrieved 20000 records of approximately 132901 (15%)\n", - "Retrieved 25000 records of approximately 132901 (18%)\n", - "Retrieved 30000 records of approximately 132901 (22%)\n", - "Retrieved 35000 records of approximately 132901 (26%)\n", - "Retrieved 40000 records of approximately 132901 (30%)\n", - "Retrieved 45000 records of approximately 132901 (33%)\n", - "Retrieved 50000 records of approximately 132901 (37%)\n", - "Retrieved 55000 records of approximately 132901 (41%)\n", - "Retrieved 60000 records of approximately 132901 (45%)\n", - "Retrieved 65000 records of approximately 132901 (48%)\n", - "Retrieved 70000 records of approximately 132901 (52%)\n", - "Retrieved 75000 records of approximately 132901 (56%)\n", - "Retrieved 80000 records of approximately 132901 (60%)\n", - "Retrieved 85000 records of approximately 132901 (63%)\n", - "Retrieved 90000 records of approximately 132901 (67%)\n", - "Retrieved 95000 records of approximately 132901 (71%)\n", - "Retrieved 1e+05 records of approximately 132901 (75%)\n", - "Retrieved 105000 records of approximately 132901 (79%)\n", - "Retrieved 110000 records of approximately 132901 (82%)\n", - "Retrieved 115000 records of approximately 132901 (86%)\n", - "Retrieved 120000 records of approximately 132901 (90%)\n", - "Retrieved 125000 records of approximately 132901 (94%)\n", - "Retrieved 130000 records of approximately 132901 (97%)\n", - "Retrieved 132901 records of approximately 132901 (100%)\n" - ] - }, - { - "data": { - "text/html": [ - "132901" - ], - "text/latex": [ - "132901" - ], - "text/markdown": [ - "132901" - ], - "text/plain": [ - "[1] 132901" - ] - }, - "metadata": {}, - "output_type": "display_data" + "metadata": { + "vscode": { + "languageId": "r" } - ], + }, + "outputs": [], "source": [ - "# using the taxonIDs from the last step, let's search OBIS for occurrence data for this EOV\n", - "# this will take a bit to download because there were over 130k records when this script was written in Sept 2024\n", "mangrove_occ <- robis::occurrence(taxonid = mangroveIdentifiers)\n", "# let's check how many occurrences we got from OBIS\n", "nrow(mangrove_occ)" ] }, + { + "cell_type": "markdown", + "id": "c837a3ac", + "metadata": {}, + "source": [ + "Now that we have all of our mangrove records from OBIS, we will map the global distribution of records using the `map_leaflet` function found in the `robis` package. With the leaflet functionality, you can zoom into records, click them, and it list the scientific name for that occurrence record." + ] + }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "1ccc9837-4585-4020-852b-cdc6b33ae62d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "\t
\n", - "\t\t\n", - "\t\t\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\t\n", - "\t\n", - "\t\t\n", - "\n", - "\t\n", - "\n" - ], - "text/plain": [ - "HTML widgets cannot be represented in plain text (need html)" - ] - }, - "metadata": { - "text/html": { - "isolated": true - } - }, - "output_type": "display_data" + "metadata": { + "vscode": { + "languageId": "r" } - ], + }, + "outputs": [], "source": [ - "# use the built in leaflet capability from robis to map the occurrences\n", "m <- map_leaflet(mangrove_occ,\n", " provider_tiles = \"Esri.WorldGrayCanvas\",\n", " popup = function(x) { x[\"scientificName\"] },\n", @@ -201,14 +127,26 @@ "m" ] }, + { + "cell_type": "markdown", + "id": "202e2512", + "metadata": {}, + "source": [ + "This next step is not required, but if you'd like to save this map to view it outside of R, here's how." + ] + }, { "cell_type": "code", "execution_count": null, "id": "13a8bc6c-5963-4e4e-acab-f0f6de776045", - "metadata": {}, + "metadata": { + "vscode": { + "languageId": "r" + } + }, "outputs": [], "source": [ - "# if you want to save the leaflet map\n", + "\n", "saveWidget(m, \"mangroveMap.html\", selfcontained = TRUE)" ] }