Setup¶
Import ioos qc libraries, as well as erddapy for data fetching and Bokeh for plotting
[1]:
import numpy as np
import pandas as pd
import pooch
import xarray as xr
from bokeh.plotting import output_notebook
from ioos_qc.config import Config
from ioos_qc.qartod import aggregate
from ioos_qc.results import CollectedResult, collect_results
from ioos_qc.streams import XarrayStream
output_notebook()
Load the dataset¶
Here we use a glider mission from the Baltic as a test dataset.
https://observations.voiceoftheocean.org/SEA067/M37
[2]:
url = "https://github.com/ioos/ioos_qc/releases/download"
version = "2.1.0"
fname = "nrt_SEA067_M37.nc"
download = pooch.retrieve(
url=f"{url}/{version}/{fname}",
known_hash="sha256:06e8a79cc17a2d55bb32dbfdc85f9922c1a1cc14726df004ae971125f91b27ac",
)
ds = xr.open_dataset(download)
Generate test configs¶
Make dictionaries of test configurations for salinity. To generate salinity flags, we test against salinity, conductivity and temperature
[3]:
config = {
"temperature": {
"qartod": {
"gross_range_test": {"suspect_span": [0, 30], "fail_span": [-2.5, 40]},
"spike_test": {"suspect_threshold": 2.0, "fail_threshold": 6.0},
},
},
"conductivity": {
"qartod": {
"gross_range_test": {"suspect_span": [6, 42], "fail_span": [3, 45]},
},
},
"salinity": {
"qartod": {
"gross_range_test": {"suspect_span": [5, 38], "fail_span": [2, 41]},
"spike_test": {"suspect_threshold": 0.3, "fail_threshold": 0.9},
"location_test": {"bbox": [10, 50, 25, 60]},
},
},
}
Run the QC¶
Create the config stream and run it on the salinity data
[4]:
c = Config(config)
qc = XarrayStream(ds, lon="longitude", lat="latitude")
runner = list(qc.run(c))
results = collect_results(runner, how="list")
Aggregate results¶
This makes the plotting a bit simpler, as we roll up the flags into one array
[5]:
agg = CollectedResult(
stream_id="",
package="qartod",
test="qc_rollup",
function=aggregate,
results=aggregate(results),
tinp=qc.time(),
data=ds,
)
flag_vals = agg.results
Plot results¶
[6]:
time = ds.time
meaning = np.empty(len(time), dtype=object)
meaning[:] = "UNKNOWN"
meaning[flag_vals == 1] = "GOOD"
meaning[flag_vals == 9] = "MISSING"
meaning[flag_vals == 3] = "SUSPECT"
meaning[flag_vals == 4] = "FAIL"
df = pd.DataFrame(
{
"time": time,
"salinity": ds["salinity"],
"flag": flag_vals,
"depth": ds.depth,
"quality control": meaning,
},
)
[7]:
from bokeh.plotting import figure, show
from bokeh.transform import factor_cmap, factor_mark
flag_vals = ["GOOD", "UNKNOWN", "MISSING", "SUSPECT", "FAIL"]
markers = ["hex", "circle_x", "circle", "triangle", "square"]
p = figure(title="Salinity flags", background_fill_color="#fafafa", x_axis_type="datetime")
p.yaxis.axis_label = "salinity (PSU)"
p.scatter(
"time",
"salinity",
source=df,
legend_group="quality control",
fill_alpha=0.4,
size=12,
marker=factor_mark("quality control", markers, flag_vals),
color=factor_cmap("quality control", "Category10_5", flag_vals),
)
p.legend.location = "top_left"
p.legend.title = "IOOS flags"
show(p)