{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Searching datasets\n", "\n", "Erddapy can wrap the same form-like search capabilities of ERDDAP with the\n", "_search_for_ keyword.\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2025-04-14T17:43:37.329291Z", "iopub.status.busy": "2025-04-14T17:43:37.329137Z", "iopub.status.idle": "2025-04-14T17:43:37.759668Z", "shell.execute_reply": "2025-04-14T17:43:37.759104Z" } }, "outputs": [], "source": [ "from erddapy import ERDDAP\n", "\n", "e = ERDDAP(\n", " server=\"https://pae-paha.pacioos.hawaii.edu/erddap\",\n", " protocol=\"griddap\",\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Single word search.\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2025-04-14T17:43:37.761825Z", "iopub.status.busy": "2025-04-14T17:43:37.761494Z", "iopub.status.idle": "2025-04-14T17:43:38.073325Z", "shell.execute_reply": "2025-04-14T17:43:38.072827Z" } }, "outputs": [ { "data": { "text/plain": [ "0 etopo1_bedrock\n", "1 etopo1_bedrock_lon360\n", "2 etopo1_ice\n", "3 etopo1_ice_lon360\n", "4 etopo5\n", "5 etopo5_lon180\n", "Name: Dataset ID, dtype: object" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "search_for = \"etopo\"\n", "\n", "url = e.get_search_url(search_for=search_for, response=\"csv\")\n", "\n", "pd.read_csv(url)[\"Dataset ID\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Filtering the search with extra words.\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2025-04-14T17:43:38.102248Z", "iopub.status.busy": "2025-04-14T17:43:38.101865Z", "iopub.status.idle": "2025-04-14T17:43:38.524069Z", "shell.execute_reply": "2025-04-14T17:43:38.523595Z" } }, "outputs": [ { "data": { "text/plain": [ "0 etopo5\n", "1 etopo5_lon180\n", "Name: Dataset ID, dtype: object" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "search_for = \"etopo5\"\n", "\n", "url = e.get_search_url(search_for=search_for, response=\"csv\")\n", "\n", "pd.read_csv(url)[\"Dataset ID\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Filtering the search with words that should **not** be found.\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2025-04-14T17:43:38.525740Z", "iopub.status.busy": "2025-04-14T17:43:38.525575Z", "iopub.status.idle": "2025-04-14T17:43:38.825470Z", "shell.execute_reply": "2025-04-14T17:43:38.824976Z" } }, "outputs": [ { "data": { "text/plain": [ "0 etopo5\n", "1 etopo5_lon180\n", "Name: Dataset ID, dtype: object" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "search_for = \"etopo5 -lon360\"\n", "\n", "url = e.get_search_url(search_for=search_for, response=\"csv\")\n", "\n", "pd.read_csv(url)[\"Dataset ID\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Quoted search or \"phrase search,\" first let us try the unquoted search.\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2025-04-14T17:43:38.827223Z", "iopub.status.busy": "2025-04-14T17:43:38.826943Z", "iopub.status.idle": "2025-04-14T17:43:39.441017Z", "shell.execute_reply": "2025-04-14T17:43:39.440601Z" }, "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "70" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "search_for = \"ocean bathymetry\"\n", "\n", "url = e.get_search_url(search_for=search_for, response=\"csv\")\n", "\n", "len(pd.read_csv(url)[\"Dataset ID\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Too many datasets because wind, speed, and wind speed are matched. Now let's use\n", "the quoted search to reduce the number of results to only wind speed.\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2025-04-14T17:43:39.442782Z", "iopub.status.busy": "2025-04-14T17:43:39.442390Z", "iopub.status.idle": "2025-04-14T17:43:39.749605Z", "shell.execute_reply": "2025-04-14T17:43:39.749194Z" } }, "outputs": [ { "data": { "text/plain": [ "6" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "search_for = '\"ocean bathymetry\"'\n", "\n", "url = e.get_search_url(search_for=search_for, response=\"csv\")\n", "\n", "len(pd.read_csv(url)[\"Dataset ID\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Another common search operation would be to search multiple servers instead of\n", "only one. In erddapy we can achieve that with `search_servers`:" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2025-04-14T17:43:39.751236Z", "iopub.status.busy": "2025-04-14T17:43:39.750988Z", "iopub.status.idle": "2025-04-14T17:45:57.868077Z", "shell.execute_reply": "2025-04-14T17:45:57.867476Z" } }, "outputs": [], "source": [ "from erddapy.multiple_server_search import search_servers\n", "\n", "df = search_servers(\n", " query=\"glider\",\n", " servers_list=None,\n", " parallel=True,\n", " protocol=\"tabledap\",\n", ")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2025-04-14T17:45:57.869906Z", "iopub.status.busy": "2025-04-14T17:45:57.869728Z", "iopub.status.idle": "2025-04-14T17:45:57.872958Z", "shell.execute_reply": "2025-04-14T17:45:57.872563Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "There are 4649 entries in this search!\n" ] } ], "source": [ "print(f\"There are {len(df)} entries in this search!\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "These are the servers that have glider data according to our query.\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2025-04-14T17:45:57.874305Z", "iopub.status.busy": "2025-04-14T17:45:57.874162Z", "iopub.status.idle": "2025-04-14T17:45:57.877800Z", "shell.execute_reply": "2025-04-14T17:45:57.877418Z" }, "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "{'http://erddap.cencoos.org/erddap/',\n", " 'http://erddap.secoora.org/erddap/',\n", " 'http://tds.marine.rutgers.edu/erddap/',\n", " 'https://basin.ceoe.udel.edu/erddap/',\n", " 'https://coastwatch.pfeg.noaa.gov/erddap/',\n", " 'https://cwcgom.aoml.noaa.gov/erddap/',\n", " 'https://erddap-goldcopy.dataexplorer.oceanobservatories.org/erddap/',\n", " 'https://erddap.bco-dmo.org/erddap/',\n", " 'https://erddap.emodnet-physics.eu/erddap/',\n", " 'https://erddap.griidc.org/erddap/',\n", " 'https://erddap.observations.voiceoftheocean.org/erddap/',\n", " 'https://erddap.sensors.ioos.us/erddap/',\n", " 'https://gliders.ioos.us/erddap/',\n", " 'https://pae-paha.pacioos.hawaii.edu/erddap/',\n", " 'https://polarwatch.noaa.gov/erddap/',\n", " 'https://spraydata.ucsd.edu/erddap/',\n", " 'https://upwell.pfeg.noaa.gov/erddap/',\n", " 'https://www.ifremer.fr/erddap/',\n", " 'https://www.smartatlantic.ca/erddap/'}" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "set(df[\"Server url\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "One way to reduce is to search a subset of the servers with the `servers_list`\n", "argument. We can also use it to search servers that are not part of the awesome\n", "ERDDAP list (https://github.com/IrishMarineInstitute/awesome-erddap).\n", "\n", "One can also perform an advanced search with ERDDAP constraints\n", "`advanced_search_servers`.\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2025-04-14T17:45:57.879304Z", "iopub.status.busy": "2025-04-14T17:45:57.879142Z", "iopub.status.idle": "2025-04-14T17:45:58.913275Z", "shell.execute_reply": "2025-04-14T17:45:58.912791Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TitleInstitutionDataset IDServer url
0Coastal Endurance: Oregon Inshore Surface Moor...Ocean Observatories Initiative (OOI)ooi-ce01issm-rid16-02-flortd000https://erddap.dataexplorer.oceanobservatories...
1Coastal Endurance: Oregon Inshore Surface Moor...Ocean Observatories Initiative (OOI)ooi-ce01issm-rid16-03-ctdbpc000https://erddap.dataexplorer.oceanobservatories...
2Coastal Endurance: Oregon Inshore Surface Moor...Ocean Observatories Initiative (OOI)ooi-ce01issm-rid16-03-dostad000https://erddap.dataexplorer.oceanobservatories...
3Coastal Endurance: Oregon Inshore Surface Moor...Ocean Observatories Initiative (OOI)ooi-ce01issm-rid16-07-nutnrb000https://erddap.dataexplorer.oceanobservatories...
4Coastal Endurance: Oregon Inshore Surface Moor...Ocean Observatories Initiative (OOI)ooi-ce01issm-rid16-06-phsend000https://erddap.dataexplorer.oceanobservatories...
\n", "
" ], "text/plain": [ " Title \\\n", "0 Coastal Endurance: Oregon Inshore Surface Moor... \n", "1 Coastal Endurance: Oregon Inshore Surface Moor... \n", "2 Coastal Endurance: Oregon Inshore Surface Moor... \n", "3 Coastal Endurance: Oregon Inshore Surface Moor... \n", "4 Coastal Endurance: Oregon Inshore Surface Moor... \n", "\n", " Institution Dataset ID \\\n", "0 Ocean Observatories Initiative (OOI) ooi-ce01issm-rid16-02-flortd000 \n", "1 Ocean Observatories Initiative (OOI) ooi-ce01issm-rid16-03-ctdbpc000 \n", "2 Ocean Observatories Initiative (OOI) ooi-ce01issm-rid16-03-dostad000 \n", "3 Ocean Observatories Initiative (OOI) ooi-ce01issm-rid16-07-nutnrb000 \n", "4 Ocean Observatories Initiative (OOI) ooi-ce01issm-rid16-06-phsend000 \n", "\n", " Server url \n", "0 https://erddap.dataexplorer.oceanobservatories... \n", "1 https://erddap.dataexplorer.oceanobservatories... \n", "2 https://erddap.dataexplorer.oceanobservatories... \n", "3 https://erddap.dataexplorer.oceanobservatories... \n", "4 https://erddap.dataexplorer.oceanobservatories... " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from erddapy.multiple_server_search import advanced_search_servers\n", "\n", "min_time = \"2017-07-01T00:00:00Z\"\n", "max_time = \"2017-09-01T00:00:00Z\"\n", "min_lon, max_lon = -127, -123.75\n", "min_lat, max_lat = 43, 48\n", "standard_name = \"sea_water_practical_salinity\"\n", "\n", "\n", "kw = {\n", " \"standard_name\": standard_name,\n", " \"min_lon\": min_lon,\n", " \"max_lon\": max_lon,\n", " \"min_lat\": min_lat,\n", " \"max_lat\": max_lat,\n", " \"min_time\": min_time,\n", " \"max_time\": max_time,\n", " \"cdm_data_type\": \"timeseries\", # let's exclude AUV's tracks\n", "}\n", "\n", "\n", "servers = {\n", " \"ooi\": \"https://erddap.dataexplorer.oceanobservatories.org/erddap/\",\n", " \"ioos\": \"https://erddap.sensors.ioos.us/erddap/\",\n", "}\n", "\n", "\n", "df = advanced_search_servers(servers_list=servers.values(), **kw)\n", "\n", "df.head()" ] } ], "metadata": { "_draft": { "nbviewer_url": "https://gist.github.com/8ec655651bf16b6e0328367caff971c1" }, "gist": { "data": { "description": "ERDDAP searchFor", "public": true }, "id": "8ec655651bf16b6e0328367caff971c1" }, "gist_id": "3f0f25b13ade0c64c84607bd92903d1b", "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.3" } }, "nbformat": 4, "nbformat_minor": 1 }