{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Searching datasets\n", "\n", "erddapy can wrap the same form-like search capabilities of ERDDAP with the\n", "_search_for_ keyword.\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2023-08-09T14:32:00.298353Z", "iopub.status.busy": "2023-08-09T14:32:00.297973Z", "iopub.status.idle": "2023-08-09T14:32:00.864307Z", "shell.execute_reply": "2023-08-09T14:32:00.863572Z" } }, "outputs": [], "source": [ "from erddapy import ERDDAP\n", "\n", "\n", "e = ERDDAP(server=\"https://pae-paha.pacioos.hawaii.edu/erddap\", protocol=\"griddap\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Single word search.\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2023-08-09T14:32:00.867462Z", "iopub.status.busy": "2023-08-09T14:32:00.867223Z", "iopub.status.idle": "2023-08-09T14:32:01.289142Z", "shell.execute_reply": "2023-08-09T14:32:01.288501Z" } }, "outputs": [ { "data": { "text/plain": [ "0 etopo1_bedrock\n", "1 etopo1_bedrock_lon360\n", "2 etopo1_ice\n", "3 etopo1_ice_lon360\n", "4 etopo5\n", "5 etopo5_lon180\n", "Name: Dataset ID, dtype: object" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "search_for = \"etopo\"\n", "\n", "url = e.get_search_url(search_for=search_for, response=\"csv\")\n", "\n", "pd.read_csv(url)[\"Dataset ID\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Filtering the search with extra words.\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2023-08-09T14:32:01.331511Z", "iopub.status.busy": "2023-08-09T14:32:01.330946Z", "iopub.status.idle": "2023-08-09T14:32:01.733352Z", "shell.execute_reply": "2023-08-09T14:32:01.732579Z" } }, "outputs": [ { "data": { "text/plain": [ "0 etopo5\n", "1 etopo5_lon180\n", "Name: Dataset ID, dtype: object" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "search_for = \"etopo5\"\n", "\n", "url = e.get_search_url(search_for=search_for, response=\"csv\")\n", "\n", "pd.read_csv(url)[\"Dataset ID\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Filtering the search with words that should **not** be found.\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2023-08-09T14:32:01.736904Z", "iopub.status.busy": "2023-08-09T14:32:01.736302Z", "iopub.status.idle": "2023-08-09T14:32:02.049410Z", "shell.execute_reply": "2023-08-09T14:32:02.048579Z" } }, "outputs": [ { "data": { "text/plain": [ "0 etopo5\n", "1 etopo5_lon180\n", "Name: Dataset ID, dtype: object" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "search_for = \"etopo5 -lon360\"\n", "\n", "url = e.get_search_url(search_for=search_for, response=\"csv\")\n", "\n", "pd.read_csv(url)[\"Dataset ID\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Quoted search or \"phrase search,\" first let us try the unquoted search.\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2023-08-09T14:32:02.053593Z", "iopub.status.busy": "2023-08-09T14:32:02.052940Z", "iopub.status.idle": "2023-08-09T14:32:03.589885Z", "shell.execute_reply": "2023-08-09T14:32:03.589145Z" }, "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "70" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "search_for = \"ocean bathymetry\"\n", "\n", "url = e.get_search_url(search_for=search_for, response=\"csv\")\n", "\n", "len(pd.read_csv(url)[\"Dataset ID\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Too many datasets because wind, speed, and wind speed are matched. Now let's use\n", "the quoted search to reduce the number of results to only wind speed.\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2023-08-09T14:32:03.592943Z", "iopub.status.busy": "2023-08-09T14:32:03.592705Z", "iopub.status.idle": "2023-08-09T14:32:04.687787Z", "shell.execute_reply": "2023-08-09T14:32:04.687068Z" } }, "outputs": [ { "data": { "text/plain": [ "6" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "search_for = '\"ocean bathymetry\"'\n", "\n", "url = e.get_search_url(search_for=search_for, response=\"csv\")\n", "\n", "len(pd.read_csv(url)[\"Dataset ID\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Another common search operation would be to search multiple servers instead of\n", "only one. In erddapy we can achieve that with `search_servers`:" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2023-08-09T14:32:04.690532Z", "iopub.status.busy": "2023-08-09T14:32:04.690300Z", "iopub.status.idle": "2023-08-09T14:34:26.131363Z", "shell.execute_reply": "2023-08-09T14:34:26.130314Z" } }, "outputs": [], "source": [ "from erddapy.multiple_server_search import search_servers\n", "\n", "df = search_servers(\n", " query=\"glider\",\n", " servers_list=None,\n", " parallel=True,\n", " protocol=\"tabledap\",\n", ")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2023-08-09T14:34:26.136011Z", "iopub.status.busy": "2023-08-09T14:34:26.134644Z", "iopub.status.idle": "2023-08-09T14:34:26.140397Z", "shell.execute_reply": "2023-08-09T14:34:26.139742Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "There are 4869 entries in this search!\n" ] } ], "source": [ "print(f\"There are {len(df)} entries in this search!\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "These are the servers that have glider data according to our query.\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2023-08-09T14:34:26.143457Z", "iopub.status.busy": "2023-08-09T14:34:26.142897Z", "iopub.status.idle": "2023-08-09T14:34:26.150064Z", "shell.execute_reply": "2023-08-09T14:34:26.149477Z" }, "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "{'http://erddap.cencoos.org/erddap/',\n", " 'http://erddap.secoora.org/erddap/',\n", " 'http://erddap.sochic-h2020.eu/erddap/',\n", " 'http://tds.marine.rutgers.edu/erddap/',\n", " 'https://cwcgom.aoml.noaa.gov/erddap/',\n", " 'https://erddap-goldcopy.dataexplorer.oceanobservatories.org/erddap/',\n", " 'https://erddap.axiomdatascience.com/erddap/',\n", " 'https://erddap.bco-dmo.org/erddap/',\n", " 'https://erddap.emodnet-physics.eu/erddap/',\n", " 'https://erddap.griidc.org/erddap/',\n", " 'https://erddap.sensors.ioos.us/erddap/',\n", " 'https://gliders.ioos.us/erddap/',\n", " 'https://pae-paha.pacioos.hawaii.edu/erddap/',\n", " 'https://polarwatch.noaa.gov/erddap/',\n", " 'https://spraydata.ucsd.edu/erddap/',\n", " 'https://upwell.pfeg.noaa.gov/erddap/',\n", " 'https://www.ifremer.fr/erddap/',\n", " 'https://www.smartatlantic.ca/erddap/'}" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "set(df[\"Server url\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "One way to reduce is to search a subset of the servers with the `servers_list`\n", "argument. We can also use it to search servers that are not part of the awesome\n", "ERDDAP list (https://github.com/IrishMarineInstitute/awesome-erddap).\n", "\n", "One can also perform an advanced search with ERDDAP constraints\n", "`advanced_search_servers`.\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2023-08-09T14:34:26.152579Z", "iopub.status.busy": "2023-08-09T14:34:26.152261Z", "iopub.status.idle": "2023-08-09T14:34:27.549087Z", "shell.execute_reply": "2023-08-09T14:34:27.548353Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TitleInstitutionDataset IDServer url
0Coastal Endurance: Oregon Inshore Surface Moor...Ocean Observatories Initiative (OOI)ooi-ce01issm-rid16-02-flortd000https://erddap.dataexplorer.oceanobservatories...
1Coastal Endurance: Oregon Inshore Surface Moor...Ocean Observatories Initiative (OOI)ooi-ce01issm-rid16-03-ctdbpc000https://erddap.dataexplorer.oceanobservatories...
2Coastal Endurance: Oregon Inshore Surface Moor...Ocean Observatories Initiative (OOI)ooi-ce01issm-rid16-03-dostad000https://erddap.dataexplorer.oceanobservatories...
3Coastal Endurance: Oregon Inshore Surface Moor...Ocean Observatories Initiative (OOI)ooi-ce01issm-rid16-07-nutnrb000https://erddap.dataexplorer.oceanobservatories...
4Coastal Endurance: Oregon Inshore Surface Moor...Ocean Observatories Initiative (OOI)ooi-ce01issm-rid16-06-phsend000https://erddap.dataexplorer.oceanobservatories...
\n", "
" ], "text/plain": [ " Title \\\n", "0 Coastal Endurance: Oregon Inshore Surface Moor... \n", "1 Coastal Endurance: Oregon Inshore Surface Moor... \n", "2 Coastal Endurance: Oregon Inshore Surface Moor... \n", "3 Coastal Endurance: Oregon Inshore Surface Moor... \n", "4 Coastal Endurance: Oregon Inshore Surface Moor... \n", "\n", " Institution Dataset ID \\\n", "0 Ocean Observatories Initiative (OOI) ooi-ce01issm-rid16-02-flortd000 \n", "1 Ocean Observatories Initiative (OOI) ooi-ce01issm-rid16-03-ctdbpc000 \n", "2 Ocean Observatories Initiative (OOI) ooi-ce01issm-rid16-03-dostad000 \n", "3 Ocean Observatories Initiative (OOI) ooi-ce01issm-rid16-07-nutnrb000 \n", "4 Ocean Observatories Initiative (OOI) ooi-ce01issm-rid16-06-phsend000 \n", "\n", " Server url \n", "0 https://erddap.dataexplorer.oceanobservatories... \n", "1 https://erddap.dataexplorer.oceanobservatories... \n", "2 https://erddap.dataexplorer.oceanobservatories... \n", "3 https://erddap.dataexplorer.oceanobservatories... \n", "4 https://erddap.dataexplorer.oceanobservatories... " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from erddapy.multiple_server_search import advanced_search_servers\n", "\n", "\n", "min_time = \"2017-07-01T00:00:00Z\"\n", "max_time = \"2017-09-01T00:00:00Z\"\n", "min_lon, max_lon = -127, -123.75\n", "min_lat, max_lat = 43, 48\n", "standard_name = \"sea_water_practical_salinity\"\n", "\n", "\n", "kw = {\n", " \"standard_name\": standard_name,\n", " \"min_lon\": min_lon,\n", " \"max_lon\": max_lon,\n", " \"min_lat\": min_lat,\n", " \"max_lat\": max_lat,\n", " \"min_time\": min_time,\n", " \"max_time\": max_time,\n", " \"cdm_data_type\": \"timeseries\", # let's exclude AUV's tracks\n", "}\n", "\n", "\n", "servers = {\n", " \"ooi\": \"https://erddap.dataexplorer.oceanobservatories.org/erddap/\",\n", " \"ioos\": \"https://erddap.sensors.ioos.us/erddap/\",\n", "}\n", "\n", "\n", "df = advanced_search_servers(servers_list=servers.values(), **kw)\n", "\n", "df.head()" ] } ], "metadata": { "_draft": { "nbviewer_url": "https://gist.github.com/8ec655651bf16b6e0328367caff971c1" }, "gist": { "data": { "description": "ERDDAP searchFor", "public": true }, "id": "8ec655651bf16b6e0328367caff971c1" }, "gist_id": "3f0f25b13ade0c64c84607bd92903d1b", "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 1 }