Fetching data from a CSW catalog with Python tools
Fetching data from a CSW catalog with Python tools#
Created: 2017-12-15
This notebook shows a typical workflow to query a Catalog Service for the Web (CSW) and create a request for data endpoints that are suitable for download.
In this queries multiple catalogs for the near real time HF-Radar current data.
The first step is to create the data filter based on the geographical region bounding box, the time span, and the CF variable standard name.
from datetime import datetime, timedelta
# Region: West coast.
min_lon, max_lon = -123, -121
min_lat, max_lat = 36, 40
bbox = [min_lon, min_lat, max_lon, max_lat]
crs = "urn:ogc:def:crs:OGC:1.3:CRS84"
# Temporal range: past week.
now = datetime.utcnow()
start, stop = now - timedelta(days=(14)), now - timedelta(days=(7))
# Surface velocity CF names.
cf_names = [
"surface_northward_sea_water_velocity",
"surface_eastward_sea_water_velocity",
]
msg = """
*standard_names*: {cf_names}
*start and stop dates*: {start} to {stop}
*bounding box*:{bbox}
*crs*: {crs}""".format
print(
msg(
**{
"cf_names": ", ".join(cf_names),
"start": start,
"stop": stop,
"bbox": bbox,
"crs": crs,
}
)
)
*standard_names*: surface_northward_sea_water_velocity, surface_eastward_sea_water_velocity
*start and stop dates*: 2018-09-21 19:42:26.750025 to 2018-09-28 19:42:26.750025
*bounding box*:[-123, 36, -121, 40]
*crs*: urn:ogc:def:crs:OGC:1.3:CRS84
Now it is possible to assemble a OGC Filter Encoding (FE) for the search using owslib.fes
*. Note that the final result is only a list with all the filtering conditions.
* OWSLib is a Python package for client programming with Open Geospatial Consortium (OGC) web service (hence OWS) interface standards, and their related content models.
from owslib import fes
def fes_date_filter(start, stop, constraint="overlaps"):
start = start.strftime("%Y-%m-%d %H:00")
stop = stop.strftime("%Y-%m-%d %H:00")
if constraint == "overlaps":
propertyname = "apiso:TempExtent_begin"
begin = fes.PropertyIsLessThanOrEqualTo(propertyname=propertyname, literal=stop)
propertyname = "apiso:TempExtent_end"
end = fes.PropertyIsGreaterThanOrEqualTo(
propertyname=propertyname, literal=start
)
elif constraint == "within":
propertyname = "apiso:TempExtent_begin"
begin = fes.PropertyIsGreaterThanOrEqualTo(
propertyname=propertyname, literal=start
)
propertyname = "apiso:TempExtent_end"
end = fes.PropertyIsLessThanOrEqualTo(propertyname=propertyname, literal=stop)
else:
raise NameError("Unrecognized constraint {}".format(constraint))
return begin, end
kw = dict(wildCard="*", escapeChar="\\", singleChar="?", propertyname="apiso:AnyText")
or_filt = fes.Or([fes.PropertyIsLike(literal=("*%s*" % val), **kw) for val in cf_names])
# Exclude GNOME returns.
not_filt = fes.Not([fes.PropertyIsLike(literal="*GNOME*", **kw)])
begin, end = fes_date_filter(start, stop)
bbox_crs = fes.BBox(bbox, crs=crs)
filter_list = [fes.And([bbox_crs, begin, end, or_filt, not_filt])]
It is possible to use the same filter to search multiple catalogs. The cell below loops over 3 catalogs hoping to find which one is more up-to-date and returns the near real time data.
def get_csw_records(csw, filter_list, pagesize=10, maxrecords=1000):
"""Iterate `maxrecords`/`pagesize` times until the requested value in
`maxrecords` is reached.
"""
from owslib.fes import SortBy, SortProperty
# Iterate over sorted results.
sortby = SortBy([SortProperty("dc:title", "ASC")])
csw_records = {}
startposition = 0
nextrecord = getattr(csw, "results", 1)
while nextrecord != 0:
csw.getrecords2(
constraints=filter_list,
startposition=startposition,
maxrecords=pagesize,
sortby=sortby,
)
csw_records.update(csw.records)
if csw.results["nextrecord"] == 0:
break
startposition += pagesize + 1 # Last one is included.
if startposition >= maxrecords:
break
csw.records.update(csw_records)
from owslib.csw import CatalogueServiceWeb
endpoint = "https://data.ioos.us/csw"
csw = CatalogueServiceWeb(endpoint, timeout=60)
get_csw_records(csw, filter_list, pagesize=10, maxrecords=1000)
records = "\n".join(csw.records.keys())
print("Found {} records.\n".format(len(csw.records.keys())))
for key, value in list(csw.records.items()):
print("[{}]: {}".format(value.title, key))
Found 5 records.
[HYbrid Coordinate Ocean Model (HYCOM): Global]: hycom_global
[Near-Real Time Surface Ocean Velocity, U.S. West Coast,
1 km Resolution]: HFR/USWC/1km/hourly/RTV/HFRADAR_US_West_Coast_1km_Resolution_Hourly_RTV_best.ncd
[Near-Real Time Surface Ocean Velocity, U.S. West Coast,
2 km Resolution]: HFR/USWC/2km/hourly/RTV/HFRADAR_US_West_Coast_2km_Resolution_Hourly_RTV_best.ncd
[Near-Real Time Surface Ocean Velocity, U.S. West Coast,
500 m Resolution]: HFR/USWC/500m/hourly/RTV/HFRADAR_US_West_Coast_500m_Resolution_Hourly_RTV_best.ncd
[Near-Real Time Surface Ocean Velocity, U.S. West Coast,
6 km Resolution]: HFR/USWC/6km/hourly/RTV/HFRADAR_US_West_Coast_6km_Resolution_Hourly_RTV_best.ncd
Let us check the 6 km resolution metadata record we found above.
value = csw.records[
"HFR/USWC/6km/hourly/RTV/HFRADAR_US_West_Coast_6km_Resolution_Hourly_RTV_best.ncd"
]
print(value.abstract)
Surface ocean velocities estimated from HF-Radar are
representative of the upper 0.3 - 2.5 meters of the
ocean. The main objective of near-real time
processing is to produce the best product from
available data at the time of processing. Radial
velocity measurements are obtained from individual
radar sites through the U.S. HF-Radar Network.
Hourly radial data are processed by unweighted
least-squares on a 6 km resolution grid of the U.S.
West Coast to produce near real-time surface current
maps.
attrs = [attr for attr in dir(value) if not attr.startswith("_")]
nonzero = [attr for attr in attrs if getattr(value, attr)]
nonzero
['abstract',
'bbox',
'identifier',
'identifiers',
'modified',
'references',
'subjects',
'title',
'type',
'xml']
The xml
has the full dataset metadata from the catalog. Let’s print a few key ones here:
value.subjects # What is in there?
['SIO/UCSD',
'surface_eastward_sea_water_velocity',
'surface_northward_sea_water_velocity',
'latitude',
'longitude',
'forecast_period',
'latitude',
'longitude',
'time',
'forecast_reference_time',
'climatologyMeteorologyAtmosphere']
value.modified # Is it up-to-date?
'2018-10-04'
bbox = value.bbox.minx, value.bbox.miny, value.bbox.maxx, value.bbox.maxy
bbox # The actual bounding box of the data.
('-130.36', '30.25', '-115.81', '49.99')
The next step is to inspect the type services and schemes available for downloading the data.
The easiest way to accomplish that is with by “sniffing” the URLs with geolinks
.
from geolinks import sniff_link
msg = "geolink: {geolink}\nscheme: {scheme}\nURL: {url}\n".format
for ref in value.references:
if ref["scheme"] == "OPeNDAP:OPeNDAP":
url = ref["url"]
print(msg(geolink=sniff_link(ref["url"]), **ref))
geolink: WWW:LINK
scheme: WWW:LINK
URL: http://hfrnet-tds.ucsd.edu/thredds/dodsC/HFR/USWC/6km/hourly/RTV/HFRADAR_US_West_Coast_6km_Resolution_Hourly_RTV_best.ncd.html
geolink: None
scheme: WWW:LINK
URL: http://www.ncdc.noaa.gov/oa/wct/wct-jnlp-beta.php?singlefile=http://hfrnet-tds.ucsd.edu/thredds/dodsC/HFR/USWC/6km/hourly/RTV/HFRADAR_US_West_Coast_6km_Resolution_Hourly_RTV_best.ncd
geolink: None
scheme: OPeNDAP:OPeNDAP
URL: http://hfrnet-tds.ucsd.edu/thredds/dodsC/HFR/USWC/6km/hourly/RTV/HFRADAR_US_West_Coast_6km_Resolution_Hourly_RTV_best.ncd
geolink: OGC:WCS
scheme: OGC:WCS
URL: http://hfrnet-tds.ucsd.edu/thredds/wcs/HFR/USWC/6km/hourly/RTV/HFRADAR_US_West_Coast_6km_Resolution_Hourly_RTV_best.ncd?service=WCS&version=1.0.0&request=GetCapabilities
geolink: OGC:WMS
scheme: OGC:WMS
URL: http://hfrnet-tds.ucsd.edu/thredds/wms/HFR/USWC/6km/hourly/RTV/HFRADAR_US_West_Coast_6km_Resolution_Hourly_RTV_best.ncd?service=WMS&version=1.3.0&request=GetCapabilities
geolink: UNIDATA:NCSS
scheme: UNIDATA:NCSS
URL: http://hfrnet-tds.ucsd.edu/thredds/ncss/grid/HFR/USWC/6km/hourly/RTV/HFRADAR_US_West_Coast_6km_Resolution_Hourly_RTV_best.ncd/dataset.html
For a detailed description of what those geolink
results mean check the lookup table.
There are Web Coverage Service (WCS), Web Map Service (WMS),
direct links, and OPeNDAP services available.
We can use any of those to obtain the data but the easiest one to explore interactively is the open OPeNDAP endpoint.
import xarray as xr
ds = xr.open_dataset(url)
ds
<xarray.Dataset>
Dimensions: (lat: 367, lon: 234, nProcParam: 7, nSites: 54, time: 61425)
Coordinates:
* lat (lat) float32 30.25 30.30394 30.35788 ... 49.9381 49.99204
* lon (lon) float32 -130.36 -130.29753 ... -115.86803 -115.805565
* time (time) datetime64[ns] 2011-10-01 ... 2018-10-05T18:00:00
time_run (time) datetime64[ns] ...
Dimensions without coordinates: nProcParam, nSites
Data variables:
site_lat (nSites) float32 ...
site_lon (nSites) float32 ...
site_code (nSites) |S64 ...
site_netCode (nSites) |S64 ...
procParams (nProcParam) float32 ...
time_offset (time) datetime64[ns] ...
u (time, lat, lon) float32 ...
v (time, lat, lon) float32 ...
DOPx (time, lat, lon) float32 ...
DOPy (time, lat, lon) float32 ...
Attributes:
netcdf_library_version: 4.1.3
format_version: HFRNet_1.0.0
product_version: HFRNet_1.1.05
Conventions: CF-1.4
title: Near-Real Time Surface Ocean Velocity, U.S. West...
institution: Scripps Institution of Oceanography
source: Surface Ocean HF-Radar
history: 05-Oct-2018 08:07:19: NetCDF file created\n05-Oc...
references: Terrill, E. et al., 2006. Data Management and Re...
creator_name: Mark Otero
creator_email: motero@ucsd.edu
creator_url: http://cordc.ucsd.edu/projects/mapping/
summary: Surface ocean velocities estimated from HF-Radar...
geospatial_lat_min: 30.25
geospatial_lat_max: 49.99204
geospatial_lon_min: -130.36
geospatial_lon_max: -115.805565
grid_resolution: 6km
grid_projection: equidistant cylindrical
regional_description: Unites States, West Coast
_CoordSysBuilder: ucar.nc2.dataset.conv.CF1Convention
cdm_data_type: GRID
featureType: GRID
location: Proto fmrc:HFRADAR_US_West_Coast_6km_Resolution_...
DODS.strlen: 25
DODS.dimName: nSites_maxStrlen
Select “yesterday” data.
from datetime import date, timedelta
yesterday = date.today() - timedelta(days=1)
ds = ds.sel(time=yesterday)
Compute the speed while masking invalid values.
import numpy.ma as ma
u = ds["u"].data
v = ds["v"].data
lon = ds.coords["lon"].data
lat = ds.coords["lat"].data
time = ds.coords["time"].data
u = ma.masked_invalid(u)
v = ma.masked_invalid(v)
This cell is only a trick to show all quiver arrows with the same length, for visualization purposes, and indicate the vector magnitude with colors instead.
import numpy as np
from oceans.ocfis import spdir2uv, uv2spdir
angle, speed = uv2spdir(u, v)
us, vs = spdir2uv(np.ones_like(speed), angle, deg=True)
And now we are ready to create the plot.
%matplotlib inline
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
from cartopy import feature
from cartopy.mpl.gridliner import LATITUDE_FORMATTER, LONGITUDE_FORMATTER
LAND = feature.NaturalEarthFeature(
"physical", "land", "10m", edgecolor="face", facecolor="lightgray"
)
sub = 2
dx = dy = 0.5
center = -122.416667, 37.783333 # San Francisco.
bbox = lon.min(), lon.max(), lat.min(), lat.max()
fig, (ax0, ax1) = plt.subplots(
ncols=2, figsize=(20, 20), subplot_kw=dict(projection=ccrs.PlateCarree())
)
ax0.set_extent(bbox)
cs = ax0.pcolormesh(lon, lat, ma.masked_invalid(speed))
gl = ax0.gridlines(draw_labels=True)
gl.xlabels_top = gl.ylabels_right = False
gl.xformatter = LONGITUDE_FORMATTER
gl.yformatter = LATITUDE_FORMATTER
cbar = fig.colorbar(cs, ax=ax0, shrink=0.45, extend="both")
cbar.ax.set_title(r"speed m s$^{-1}$", loc="left")
ax0.add_feature(LAND, zorder=0, edgecolor="black")
ax0.set_title("{}\n{}".format(value.title, ds["time"].values))
ax1.set_extent([center[0] - dx - dx, center[0] + dx, center[1] - dy, center[1] + dy])
q = ax1.quiver(
lon[::sub],
lat[::sub],
us[::sub, ::sub],
vs[::sub, ::sub],
speed[::sub, ::sub],
scale=30,
)
ax1.quiverkey(q, 0.5, 0.85, 1, r"1 m s$^{-1}$", coordinates="axes")
gl = ax1.gridlines(draw_labels=True)
gl.xlabels_top = gl.ylabels_right = False
gl.xformatter = LONGITUDE_FORMATTER
gl.yformatter = LATITUDE_FORMATTER
ax1.add_feature(LAND, zorder=0, edgecolor="black")
ax1.plot(
ds["site_lon"], ds["site_lat"], marker="o", linestyle="none", color="darkorange"
)
ax1.set_title("San Francisco Bay area")

And here is yesterday’s sea surface currents from the west coast with a zoom in the San Francisco Bay area.