Accessing glider data via the Glider DAC API with Python

Accessing glider data via the Glider DAC API with Python#

Created: 2016-12-20

Modified: 2024-03-29

IOOS provides an API for getting information on all the glider deployments available in the Glider DAC.

The raw JSON can be accessed at https://gliders.ioos.us/providers/api/deployment and it is quite simple to parse it with Python.

First, lets check how many glider deployments exist in the Glider DAC.

import requests

url = "http://data.ioos.us/gliders/providers/api/deployment"

response = requests.get(url)

res = response.json()

print(f"Found {res['num_results']} deployments!")
Found 1817 deployments!

And here is the JSON of the last deployment found in the list.

deployments = res["results"]

deployment = deployments[-1]

deployment
{'archive_safe': True,
 'attribution': 'National Science Foundation',
 'checksum': 'c682792afa6fb4bdfa4b2a957bf2d59b',
 'completed': False,
 'created': 1710618007971,
 'dap': 'http://gliders.ioos.us/thredds/dodsC/deployments/rutgers/unit_507-20200109T1549-delayed/unit_507-20200109T1549-delayed.nc3.nc',
 'delayed_mode': True,
 'deployment_date': 1578584940000,
 'deployment_dir': 'rutgers/unit_507-20200109T1549-delayed',
 'erddap': 'http://gliders.ioos.us/erddap/tabledap/unit_507-20200109T1549-delayed.html',
 'estimated_deploy_date': None,
 'estimated_deploy_location': None,
 'glider_name': 'unit_507',
 'id': '65f5f59753a610008b2a5ec2',
 'iso': 'http://gliders.ioos.us/erddap/tabledap/unit_507-20200109T1549-delayed.iso19115',
 'latest_file': 'unit_507_20200221T163816Z_delayed.nc',
 'latest_file_mtime': 1710724069899,
 'name': 'unit_507-20200109T1549-delayed',
 'operator': 'University of Alaska Fairbanks',
 'sos': 'http://gliders.ioos.us/thredds/sos/deployments/rutgers/unit_507-20200109T1549-delayed/unit_507-20200109T1549-delayed.nc3.nc?service=SOS&request=GetCapabilities&AcceptVersions=1.0.0',
 'thredds': 'http://gliders.ioos.us/thredds/catalog/deployments/rutgers/unit_507-20200109T1549-delayed/catalog.html?dataset=deployments/rutgers/unit_507-20200109T1549-delayed/unit_507-20200109T1549-delayed.nc3.nc',
 'updated': 1710724568267,
 'username': 'rutgers',
 'wmo_id': None}

The metadata is very rich and informative. A quick way to get to the data is to read dap endpoint with xarray.

import cf_xarray  # noqa
import xarray as xr

# Get this specific glider because it looks cool ;-)
for deployment in deployments:
    if deployment["name"] == "sp064-20161214T1913":
        url = deployment["dap"]

ds = xr.open_dataset(url)
ds
<xarray.Dataset> Size: 51MB
Dimensions:                             (trajectory: 1, profile: 931, obs: 367)
Coordinates:
  * trajectory                          (trajectory) object 8B 'sp064-2016121...
    time                                (trajectory, profile) datetime64[ns] 7kB ...
    latitude                            (trajectory, profile) float64 7kB ...
    longitude                           (trajectory, profile) float64 7kB ...
    lat_uv                              (trajectory, profile) float64 7kB ...
    lon_uv                              (trajectory, profile) float64 7kB ...
    time_uv                             (trajectory, profile) datetime64[ns] 7kB ...
    depth                               (trajectory, profile, obs) float32 1MB ...
Dimensions without coordinates: profile, obs
Data variables: (12/37)
    wmo_id                              (trajectory) object 8B ...
    profile_id                          (trajectory, profile) float64 7kB ...
    u                                   (trajectory, profile) float64 7kB ...
    v                                   (trajectory, profile) float64 7kB ...
    conductivity                        (trajectory, profile, obs) float32 1MB ...
    conductivity_qc                     (trajectory, profile, obs) float32 1MB ...
    ...                                  ...
    temperature                         (trajectory, profile, obs) float32 1MB ...
    temperature_qc                      (trajectory, profile, obs) float32 1MB ...
    time_qc                             (trajectory, profile, obs) float32 1MB ...
    time_uv_qc                          (trajectory, profile, obs) float32 1MB ...
    u_qc                                (trajectory, profile, obs) float32 1MB ...
    v_qc                                (trajectory, profile, obs) float32 1MB ...
Attributes: (12/60)
    acknowledgment:                This deployment supported by NOAA.
    cdm_data_type:                 TrajectoryProfile
    cdm_profile_variables:         time_uv,lat_uv,lon_uv,u,v,profile_id,time,...
    cdm_trajectory_variables:      trajectory,wmo_id
    contributor_name:              Bob Simons (bob.simons@noaa.gov)
    contributor_role:              additional data management
    ...                            ...
    time_coverage_end:             2017-03-29T17:31:45Z
    time_coverage_start:           2016-12-14T20:46:45Z
    title:                         sp064-20161214T1913
    Westernmost_Easting:           -122.64472
    DODS.strlen:                   7
    DODS.dimName:                  wmo_id_strlen

In order to plot, for example sea water temperature data, one must clean the data first for missing values

import matplotlib.pyplot as plt
import numpy as np
import seawater as sw


def distance(x, y, units="km"):
    dist, pha = sw.dist(x, y, units=units)
    return np.r_[0, np.cumsum(dist)]


def plot_glider(x, y, z, t, data, cmap=plt.cm.viridis, figsize=(11, 3.75)):
    fig, ax = plt.subplots(figsize=figsize)
    dist = distance(x, y, units="km")
    z = np.abs(z)
    dist, z = np.broadcast_arrays(dist[..., np.newaxis], z)
    cs = ax.scatter(dist, z, s=5, c=data, cmap=cmap)
    kw = dict(orientation="vertical", extend="both", shrink=0.65)
    cbar = fig.colorbar(cs, **kw)

    ax.invert_yaxis()
    ax.set_xlabel("Distance (km)")
    ax.set_ylabel("Depth (m)")
    return fig, ax, cbar

The functions above apply the actual_range metadata to the data, mask the invalid/bad values, and prepare the parameters for plotting.

The figure below shows the temperature slice (left), and glider track (right) with start and end points marked with green and red respectively.

Note: This glider was deployed off the west of the U.S.

temp = ds["temperature"]
x = temp.cf["X"].squeeze()
y = temp.cf["Y"].squeeze()
z = temp.cf["Z"].squeeze()
t = temp.cf["T"].squeeze()

vmin, vmax = z.attrs["actual_range"]
z = np.ma.masked_outside(z.to_numpy(), vmin, vmax)

fig, ax, cbar = plot_glider(x, y, z, t, temp)
cbar.ax.set_xlabel(r"($^\circ$C)")
cbar.ax.xaxis.set_label_position("top")
ax.set_title("Temperature");
../../../_images/1c4c3681e956a35682da71a7ad9a7a329d2d1b117393b39bc7542212f4784225.png

There are many things the user can do with the API. Here is another example that finds all glider deployments within a boundary box.

bbox = [[-125.72, 32.60], [-117.57, 36.93]]

The cell below defines two helper functions to parse the geometry from the JSON and convert the trajectory to a shapely LineString to prepare the data for GIS operations later.

from shapely.geometry import LineString


def parse_geometry(geometry):
    """
    Filters out potentially bad coordinate pairs as returned from
    GliderDAC. Returns a safe geometry object.

    :param dict geometry: A GeoJSON Geometry object

    """
    for geom in geometry["features"]:
        if geom["geometry"]["type"] == "LineString":
            coordinates = geom["geometry"]["coordinates"]
            break

    coords = []
    for lon, lat in coordinates:
        if lon is None or lat is None:
            continue
        coords.append([lon, lat])
    return {"coordinates": coords}


def fetch_trajectory(deployment):
    """
    Downloads the track as GeoJSON from GliderDAC

    :param dict deployment: The deployment object as returned from GliderDAC

    """
    track_url = f"https://gliders.ioos.us/status/api/tracks.php?dataset_id={deployment['name']}&type=full"
    response = requests.get(track_url)
    if response.status_code != 200:
        raise OSError(
            "Failed to get Glider Track for %s" % deployment["deployment_dir"]
        )
    geometry = parse_geometry(response.json())
    coords = LineString(geometry["coordinates"])
    return coords

Now it is easy to check which tracks lie inside the box.

res = response.json()["results"]
len(res[-100:])
100
from shapely.geometry import box

search_box = box(bbox[0][0], bbox[0][1], bbox[1][0], bbox[1][1])

inside = dict()
# Getting only the first 20 deployments.
for deployment in response.json()["results"][0:21]:
    try:
        coords = fetch_trajectory(deployment)
    except OSError:
        continue
    if search_box.intersects(coords):
        inside.update({deployment["name"]: coords})

Finally, we can create an interactive map displaying the tracks found in the bounding box.

def plot_track(coords, name, color="orange"):
    x, y = coords.xy
    locations = list(zip(y.tolist(), x.tolist()))
    kw = {"fill": True, "radius": 10, "stroke": False}
    folium.CircleMarker(locations[0], color="green", **kw).add_to(m)
    folium.CircleMarker(locations[-1], color="red", **kw).add_to(m)

    folium.PolyLine(
        locations=locations, color=color, weight=8, opacity=0.2, popup=name
    ).add_to(m)
import folium

tiles = (
    "http://services.arcgisonline.com/arcgis/rest/services/"
    "World_Topo_Map/MapServer/MapServer/tile/{z}/{y}/{x}"
)

location = [search_box.centroid.y, search_box.centroid.x]

m = folium.Map(location=location, zoom_start=5, tiles=tiles, attr="ESRI")


for name, coords in inside.items():
    plot_track(coords, name, color="orange")


m
Make this Notebook Trusted to load map: File -> Trust Notebook