diff --git a/jupyterbook/content/code_gallery/data_analysis_and_visualization_notebooks/OBIS_EOVs.ipynb b/jupyterbook/content/code_gallery/data_analysis_and_visualization_notebooks/OBIS_EOVs.ipynb new file mode 100644 index 00000000..57918a19 --- /dev/null +++ b/jupyterbook/content/code_gallery/data_analysis_and_visualization_notebooks/OBIS_EOVs.ipynb @@ -0,0 +1,257 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ad98f7a1-d7a1-4a51-a9a2-0da9da7fe042", + "metadata": {}, + "source": [ + "# Using aphiaIDs to download OBIS occurrences for Bio/Eco EOVs\n", + "\n", + "Created: 2024-09-13" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "390bf2be-3828-4705-b64b-ed68005ff98b", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "library(gh)\n", + "library(readr)\n", + "library(robis)\n", + "library(dplyr)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "43964864-7fe8-4139-a9d0-2a844161ccd1", + "metadata": {}, + "outputs": [], + "source": [ + "# first we will pull the files where the EOV taxonomy are stored from GitHub\n", + "repo_files <- gh(\"GET /repos/:owner/:repo/contents/:path\",\n", + " owner = \"ioos\",\n", + " repo = \"marine_life_data_network\",\n", + " path = \"eov_taxonomy\")\n", + "\n", + "for (file_info in repo_files) {\n", + " if (file_info$type == \"file\") {\n", + " download.file(file_info$download_url, destfile = file_info$name)\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "0bb6ab17-d239-410c-90d0-dcb50359b095", + "metadata": {}, + "outputs": [], + "source": [ + "# let's try mangroves as our first example\n", + "# we will open the mangroves taxonomy CSV that we downloaded and grab the taxonIDs\n", + "mangroves <- read.csv(\"mangroves.csv\")\n", + "mangroves$ID <- gsub(\"urn:lsid:marinespecies.org:taxname.\", \"\", mangroves$acceptedTaxonId)\n", + "mangroves$ID <- as.numeric(mangroves$ID)\n", + "mangroveIdentifiers <- paste(mangroves$ID, collapse = \", \")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "d9aa2697-4ca0-4631-8d77-b0407e7a73b5", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Retrieved 5000 records of approximately 132901 (3%)\n", + "Retrieved 10000 records of approximately 132901 (7%)\n", + "Retrieved 15000 records of approximately 132901 (11%)\n", + "Retrieved 20000 records of approximately 132901 (15%)\n", + "Retrieved 25000 records of approximately 132901 (18%)\n", + "Retrieved 30000 records of approximately 132901 (22%)\n", + "Retrieved 35000 records of approximately 132901 (26%)\n", + "Retrieved 40000 records of approximately 132901 (30%)\n", + "Retrieved 45000 records of approximately 132901 (33%)\n", + "Retrieved 50000 records of approximately 132901 (37%)\n", + "Retrieved 55000 records of approximately 132901 (41%)\n", + "Retrieved 60000 records of approximately 132901 (45%)\n", + "Retrieved 65000 records of approximately 132901 (48%)\n", + "Retrieved 70000 records of approximately 132901 (52%)\n", + "Retrieved 75000 records of approximately 132901 (56%)\n", + "Retrieved 80000 records of approximately 132901 (60%)\n", + "Retrieved 85000 records of approximately 132901 (63%)\n", + "Retrieved 90000 records of approximately 132901 (67%)\n", + "Retrieved 95000 records of approximately 132901 (71%)\n", + "Retrieved 1e+05 records of approximately 132901 (75%)\n", + "Retrieved 105000 records of approximately 132901 (79%)\n", + "Retrieved 110000 records of approximately 132901 (82%)\n", + "Retrieved 115000 records of approximately 132901 (86%)\n", + "Retrieved 120000 records of approximately 132901 (90%)\n", + "Retrieved 125000 records of approximately 132901 (94%)\n", + "Retrieved 130000 records of approximately 132901 (97%)\n", + "Retrieved 132901 records of approximately 132901 (100%)\n" + ] + }, + { + "data": { + "text/html": [ + "132901" + ], + "text/latex": [ + "132901" + ], + "text/markdown": [ + "132901" + ], + "text/plain": [ + "[1] 132901" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# using the taxonIDs from the last step, let's search OBIS for occurrence data for this EOV\n", + "# this will take a bit to download because there were over 130k records when this script was written in Sept 2024\n", + "mangrove_occ <- robis::occurrence(taxonid = mangroveIdentifiers)\n", + "# let's check how many occurrences we got from OBIS\n", + "nrow(mangrove_occ)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1ccc9837-4585-4020-852b-cdc6b33ae62d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1] \"235048, 235033, 234450, 234495, 235086, 235089, 235091, 235106, 235056, 235060, 235045, 235116, 235063, 235072, 235075, 235077, 235068, 234488, 235103\"\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Retrieved 5000 records of approximately 132901 (3%)\n", + "Retrieved 10000 records of approximately 132901 (7%)\n", + "Retrieved 15000 records of approximately 132901 (11%)\n", + "Retrieved 20000 records of approximately 132901 (15%)\n", + "Retrieved 25000 records of approximately 132901 (18%)\n", + "Retrieved 30000 records of approximately 132901 (22%)\n", + "Retrieved 35000 records of approximately 132901 (26%)\n", + "Retrieved 40000 records of approximately 132901 (30%)\n", + "Retrieved 45000 records of approximately 132901 (33%)\n", + "Retrieved 50000 records of approximately 132901 (37%)\n", + "Retrieved 55000 records of approximately 132901 (41%)\n", + "Retrieved 60000 records of approximately 132901 (45%)\n", + "Retrieved 65000 records of approximately 132901 (48%)\n", + "Retrieved 70000 records of approximately 132901 (52%)\n", + "Retrieved 75000 records of approximately 132901 (56%)\n", + "Retrieved 80000 records of approximately 132901 (60%)\n", + "Retrieved 85000 records of approximately 132901 (63%)\n", + "Retrieved 90000 records of approximately 132901 (67%)\n", + "Retrieved 95000 records of approximately 132901 (71%)\n", + "Retrieved 1e+05 records of approximately 132901 (75%)\n", + "Retrieved 105000 records of approximately 132901 (79%)\n", + "Retrieved 110000 records of approximately 132901 (82%)\n", + "Retrieved 115000 records of approximately 132901 (86%)\n", + "Retrieved 120000 records of approximately 132901 (90%)\n", + "Retrieved 125000 records of approximately 132901 (94%)\n", + "Retrieved 130000 records of approximately 132901 (97%)\n", + "Retrieved 132901 records of approximately 132901 (100%)\n" + ] + }, + { + "data": { + "text/html": [ + "132901" + ], + "text/latex": [ + "132901" + ], + "text/markdown": [ + "132901" + ], + "text/plain": [ + "[1] 132901" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\t
\n", + "\t\t\n", + "\t\t\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\t\n", + "\n", + "\t\n", + "\n" + ], + "text/plain": [ + "HTML widgets cannot be represented in plain text (need html)" + ] + }, + "metadata": { + "text/html": { + "isolated": true + } + }, + "output_type": "display_data" + } + ], + "source": [ + "# use the built in leaflet capability from robis to map the occurrences\n", + "map_leaflet(mangrove_occ,\n", + " provider_tiles = \"Esri.WorldGrayCanvas\",\n", + " popup = function(x) { x[\"scientificName\"] },\n", + " )" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "4.1.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}