Source code for ioos_qc.config_creator.get_assets

#!python
"""Download and process source data used for ConfigCreator"""
import logging
import shutil
from pathlib import Path
from urllib import request

import h5netcdf.legacyapi as nc

import xarray as xr
from nco import Nco

logger = logging.getLogger('ConfigCreator:get_assets.py')
logger.setLevel(logging.INFO)

SOURCES = {
    'OCEAN_ATLAS': {
        'ts_url': 'https://data.nodc.noaa.gov/thredds/fileServer/ncei/woa/{0}/A5B7/1.00/woa18_A5B7_{1}{2:02d}_01.nc',
        'other_url': 'https://data.nodc.noaa.gov/thredds/fileServer/ncei/woa/{0}/all/1.00/woa18_all_{1}{2:02d}_01.nc'
    },
    'NARR': {
        'url': 'ftp://ftp.cdc.noaa.gov/Datasets/ncep.reanalysis.derived/surface/{0}'
    }
}


[docs]def ocean_atlas_download(output_dir, month, sources=SOURCES): variable_map = { 'temperature': 't', 'salinity': 's', 'oxygen': 'o' } for name, name_in_file in variable_map.items(): if name in ['temperature', 'salinity']: url = sources['OCEAN_ATLAS']['ts_url'].format(name, name_in_file, month.month) else: url = sources['OCEAN_ATLAS']['other_url'].format(name, name_in_file, month.month) r = request.urlopen(url) data = r.read() fname = f'ocean_atlas_{name}_{month.month:02}.nc' out_file = output_dir / fname with open(out_file, 'wb') as f: f.write(data)
[docs]def ocean_atlas_merge_variables(output_dir, month): ocean_atlas_files = output_dir.glob(f'ocean_atlas_*_{month.month:02}.nc') ocean_atlas_files = list(ocean_atlas_files) ocean_atlas_files.sort() outfile = output_dir / f'ocean_atlas_{month.month:02}.nc' nco = Nco() options = [ '-A', '-4', '-L 5' ] outfile = shutil.copy(ocean_atlas_files[0], outfile) for f in ocean_atlas_files[1::]: nco.ncks(input=str(f), output=str(outfile), options=options)
[docs]def ocean_atlas_variable_enhance(output_dir, month): fname = output_dir / f'ocean_atlas_{month.month:02}.nc' # only keep variables needed nco = Nco() vars_to_keep = ','.join(['s_an', 't_an', 'o_an']) options = [ '-h', f'-v {vars_to_keep}' ] nco.ncks(input=str(fname), output=str(fname), options=options) # original time in months from 1900-01-01. # - force to days since 1970-01-01 for first of each month # - makes it easy to use with xarray new_time_units = 'days since 1970-01-01' num = nc.date2num(month, new_time_units) with nc.Dataset(fname, 'a') as ncds: tvar = ncds.variables['time'] tvar[:] = num tvar.units = new_time_units tvar.calendar = 'standard' # make time a record options = [ '-O', '--mk_rec_dmn time' ] nco.ncks(input=str(fname), output=str(fname), options=options)
[docs]def ocean_atlas_merge_time(output_dir): variable_merged_files = output_dir.glob('ocean_atlas_??.nc') variable_merged_files = [str(merged_file) for merged_file in list(variable_merged_files)] variable_merged_files.sort() output_file = output_dir.parent / 'ocean_atlas.nc' nco = Nco() options = [ '-A' ] nco.ncrcat(input=variable_merged_files, output=str(output_file), options=options)
[docs]def ocean_atlas_enhance(output_dir): output_file = output_dir.parent / 'ocean_atlas.nc' output_tmp_file = output_dir.parent / 'ocean_atlas_tmp.nc' # change value that fits in a packed short var nco = Nco() options = [ '-O', '-a _FillValue,,o,f,-127' ] nco.ncatted(input=str(output_file), output=str(output_tmp_file), options=options) # pack to use bytes # - requires output file defined with -o option options = [ '-O', '-M flt_byt', f"-o {str(output_file)}" ] nco.ncpdq(input=str(output_tmp_file), output=str(output_file), options=options)
[docs]def get_ocean_atlas(output_dir): time_range = xr.cftime_range(start='2018', end='2018-12-31', freq='MS') for month in time_range: logger.info(f'downloading Ocean Atlas for {month}') ocean_atlas_download(output_dir, month) ocean_atlas_merge_variables(output_dir, month) ocean_atlas_variable_enhance(output_dir, month) ocean_atlas_merge_time(output_dir) ocean_atlas_enhance(output_dir)
[docs]def narr_download(output_dir, sources=SOURCES): variables = { 'air': 'air.sig995.mon.ltm.nc', 'rhum': 'rhum.sig995.mon.ltm.nc', 'slp': 'slp.mon.ltm.nc', 'uwind': 'uwnd.sig995.mon.ltm.nc', 'vwind': 'vwnd.sig995.mon.ltm.nc' } for variable_name, variable_file in variables.items(): url = sources['NARR']['url'].format(variable_file) r = request.urlopen(url) data = r.read() fname = f'narr_{variable_name}.nc' out_file = output_dir / fname with open(out_file, 'wb') as f: f.write(data)
[docs]def narr_merge_variables(output_dir): narr_files = output_dir.glob('narr_*.nc') narr_files = list(narr_files) narr_files.sort() outfile = output_dir.parent / 'narr.nc' nco = Nco() options = [ '-A', '-4', '-L 5' ] outfile = shutil.copy(narr_files[0], outfile) for f in narr_files[1::]: nco.ncks(input=str(f), output=str(outfile), options=options)
[docs]def narr_enhance(output_dir): outfile = output_dir.parent / 'narr.nc' outtmp = output_dir.parent / 'narr_tmp.nc' # remove unnecessary vars nco = Nco() options = [ '-O', '-C', '-x', '-v climatology_bounds,valid_yr_count' ] nco.ncks(input=str(outfile), output=str(outfile), options=options) # change lon from [0, 360) to [-180, 180) options = [ '-O', '--msa', '-d lon,181.,360.', '-d lon,0.,180.0', ] nco.ncks(input=str(outfile), output=str(outtmp), options=options) options = [ '-O', "-s 'where(lon > 180) lon=lon-360'" ] nco.ncap2(input=str(outtmp), output=str(outfile), options=options) # times don't work with xarray, so the year is changed to the middle of climatology # climatology: 1981-01-01 - 2010-12-31 # midyear is 1996 time_range = xr.cftime_range(start='1996', end='1996-12-31', freq='MS') new_units = 'days since 1970-01-01 00:00:00' new_times = nc.date2num(time_range, units=new_units) with nc.Dataset(outfile, 'a') as ncd: time = ncd.variables['time'] time[:] = new_times time.units = new_units
[docs]def get_narr(output_dir): logger.info('downloading NARR') narr_download(output_dir) narr_merge_variables(output_dir) narr_enhance(output_dir)
[docs]def remove_tmp_files(dirs_to_delete): logger.info('removing tmp files') for dir in dirs_to_delete: logger.info(f'removing {dir}') shutil.rmtree(str(dir))
[docs]def main(output_dir, remove_tmp_files=False): output_dir = Path(output_dir).resolve() output_dir.mkdir(parents=True, exist_ok=True) logger.info(f'Downloading and saving data for QcConfigCreator to {output_dir}') logger.info('Downloading Ocean Atlas') ocean_atlas_dir = output_dir / 'ocean_atlas' ocean_atlas_dir.mkdir(exist_ok=True) get_ocean_atlas(ocean_atlas_dir) logger.info('Downloading NARR') narr_dir = output_dir / 'narr' narr_dir.mkdir(exist_ok=True) get_narr(narr_dir) if remove_tmp_files: remove_tmp_files([ocean_atlas_dir, narr_dir])
if __name__ == '__main__': import argparse parser = argparse.ArgumentParser('Download source data for ConfigCreator') parser.add_argument( '-o', '--outdir', type=str, default='resources', help='Output path for downloaded data' ) parser.add_argument( '-d', '--delete', action='store_true', help='Remove temporary files' ) args = parser.parse_args() main(args.outdir, args.delete)