diff --git a/.bumpversion.cfg b/.bumpversion.cfg deleted file mode 100644 index 6153423..0000000 --- a/.bumpversion.cfg +++ /dev/null @@ -1,18 +0,0 @@ -[bumpversion] -commit = False -tag = False -tag_name = {new_version} -tag_message = Bump version to {new_version} -message = Bump version to {new_version} -parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\.(?P[a-z]+)(?P\d+))? -serialize = - {major}.{minor}.{patch} - -[bumpversion:file:setup.py] -search = version='0.0.0' -replace = version='{new_version}' - -[bumpversion:file:aodntools/__init__.py] -search = __version__ = '0.0.0' -replace = __version__ = '{new_version}' - diff --git a/.coveragerc b/.coveragerc index 9103da9..aee87c3 100644 --- a/.coveragerc +++ b/.coveragerc @@ -2,6 +2,8 @@ branch = True source = aodntools omit = + setup.py + aodntools/__init__.py examples/* test_aodntools/* diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 43364d6..48dada6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ '3.5', '3.8' ] + python-version: [ '3.8' ] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} diff --git a/.gitignore b/.gitignore index eef60be..defd38e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# Ignore the version file, it's created on install +aodntools/_version.py + # Python files *.pyc *.egg-info diff --git a/Dockerfile b/Dockerfile index 91c246b..0183cc9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ ENV TZ=Australia/Hobart ENV LC_ALL C.UTF-8 ENV LANG C.UTF-8 ENV PATH /home/builder/.local/bin:$PATH -ENV PYTHON_VERSION 3.5.2 +ENV PYTHON_VERSION 3.8.13 RUN apt-get update && \ apt-get install -y software-properties-common && \ @@ -15,25 +15,16 @@ RUN apt-get update && \ RUN add-apt-repository ppa:rael-gc/rvm && apt-get update -RUN if [ X"$PYTHON_VERSION" = X"3.5.2" ]; \ - then apt-get install -y libssl1.0-dev; \ - else apt-get install -y libssl-dev; \ - fi - RUN apt-get install -y --no-install-recommends \ - build-essential \ ca-certificates \ git \ libmagic1 \ libudunits2-dev \ python3-dev \ - wget \ - libffi-dev \ - # Pyenv pre-requisites - make zlib1g-dev libbz2-dev libreadline-dev \ - libsqlite3-dev wget curl llvm libncurses5-dev \ - libncursesw5-dev xz-utils tk-dev libffi-dev \ - liblzma-dev python-openssl \ + # Pyenv pre-requisites (from https://github.com/pyenv/pyenv/wiki#suggested-build-environment) + make build-essential libssl-dev zlib1g-dev \ + libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm \ + libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev \ && rm -rf /var/lib/apt/lists/* # Set-up necessary Env vars for PyEnv @@ -48,13 +39,13 @@ RUN set -ex \ && pyenv rehash \ && chmod -R a+w $PYENV_ROOT/shims -RUN pip install --upgrade pip==20.3.4 setuptools==50.3.2 +RUN pip install --upgrade pip==22.1.2 setuptools==63.1.0 wheel build RUN pip install \ - Cython==0.29 \ - numpy>=1.13.0 \ - bump2version==0.5.10 \ - wheel + Cython==0.29.30 \ + bump2version==1.0.1 \ + numpy==1.23.0 \ + setuptools-scm==7.0.4 RUN useradd --create-home --no-log-init --shell /bin/bash --uid $BUILDER_UID builder USER builder diff --git a/Jenkinsfile b/Jenkinsfile index f414731..48b6f69 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -34,7 +34,7 @@ pipeline { } stage('package') { steps { - sh 'python setup.py bdist_wheel' + sh 'python -m build -w' } } } diff --git a/aodntools/__init__.py b/aodntools/__init__.py index c57bfd5..368d1d8 100644 --- a/aodntools/__init__.py +++ b/aodntools/__init__.py @@ -1 +1,4 @@ -__version__ = '0.0.0' +try: + from ._version import version as __version__ +except ImportError: + __version__ = "Unknown/Not Installed" diff --git a/aodntools/timeseries_products/aggregated_timeseries.py b/aodntools/timeseries_products/aggregated_timeseries.py index 2b63f6e..87c38ed 100644 --- a/aodntools/timeseries_products/aggregated_timeseries.py +++ b/aodntools/timeseries_products/aggregated_timeseries.py @@ -1,11 +1,11 @@ #!/usr/bin/env python3 import argparse +from copy import deepcopy import json import os import shutil import tempfile -from datetime import datetime import numpy as np import xarray as xr @@ -13,7 +13,8 @@ from pkg_resources import resource_filename from aodntools import __version__ -from aodntools.timeseries_products.common import NoInputFilesError, check_file, in_water +from aodntools.timeseries_products.common import (NoInputFilesError, check_file, in_water, current_utc_timestamp, + TIMESTAMP_FORMAT, DATESTAMP_FORMAT) TEMPLATE_JSON = resource_filename(__name__, 'aggregated_timeseries_template.json') @@ -39,7 +40,7 @@ def get_variable_values(nc, variable): Get values of the variable and its QC flags. If variable is not present, nan returned, its QC flags set to 9 If variable present but not its QC flags, QC set to 0 - :param nc: dataset + :param nc: xarray dataset :param variable: name of the variable to get :return: variable values and variable qc flags """ @@ -48,6 +49,8 @@ def get_variable_values(nc, variable): if variable in file_variables: variable_values = nc[variable].values + if any(np.isnan(variable_values)): + variable_values = np.ma.masked_array(variable_values, mask=np.isnan(variable_values)) if variable+'_quality_control' in file_variables: variableQC_values = nc[variable+'_quality_control'].values else: @@ -207,8 +210,9 @@ def main_aggregator(files_to_agg, var_to_agg, site_code, input_dir='', output_di rejected_files = [] # default name for temporary file. It will be renamed at the end - _, temp_outfile = tempfile.mkstemp(suffix='.nc', dir=output_dir) - + fd, temp_outfile = tempfile.mkstemp(suffix='.nc', dir=output_dir) + os.close(fd) + ## check files and get total number of flattened obs n_obs_total = 0 for file in files_to_agg: @@ -223,6 +227,7 @@ def main_aggregator(files_to_agg, var_to_agg, site_code, input_dir='', output_di rejected_files.append(file) ## remove bad files form the list and sort in chronological order + files_to_agg = deepcopy(files_to_agg) for file in bad_files.keys(): files_to_agg.remove(file) if len(files_to_agg) == 0: @@ -303,13 +308,10 @@ def main_aggregator(files_to_agg, var_to_agg, site_code, input_dir='', output_di ds['source_file'].setncatts(source_file_attributes(download_url_prefix, opendap_url_prefix)) ## set global attrs - timeformat = '%Y-%m-%dT%H:%M:%SZ' - file_timeformat = '%Y%m%d' - - time_start = num2date(np.min(TIME[:]), time_units, time_calendar).strftime(timeformat) - time_end = num2date(np.max(TIME[:]), time_units, time_calendar).strftime(timeformat) - time_start_filename = num2date(np.min(TIME[:]), time_units, time_calendar).strftime(file_timeformat) - time_end_filename = num2date(np.max(TIME[:]), time_units, time_calendar).strftime(file_timeformat) + time_start = num2date(np.min(TIME[:]), time_units, time_calendar).strftime(TIMESTAMP_FORMAT) + time_end = num2date(np.max(TIME[:]), time_units, time_calendar).strftime(TIMESTAMP_FORMAT) + time_start_filename = num2date(np.min(TIME[:]), time_units, time_calendar).strftime(DATESTAMP_FORMAT) + time_end_filename = num2date(np.max(TIME[:]), time_units, time_calendar).strftime(DATESTAMP_FORMAT) add_attribute = { 'title': ("Long Timeseries Velocity Aggregated product: " + var_to_agg + " at " + @@ -317,14 +319,14 @@ def main_aggregator(files_to_agg, var_to_agg, site_code, input_dir='', output_di 'site_code': site_code, 'time_coverage_start': time_start, 'time_coverage_end': time_end, - 'geospatial_vertical_min': np.min(ds['DEPTH']), - 'geospatial_vertical_max': np.max(ds['DEPTH']), - 'geospatial_lat_min': np.min(ds['LATITUDE']), - 'geospatial_lat_max': np.max(ds['LATITUDE']), - 'geospatial_lon_min': np.min(ds['LONGITUDE']), - 'geospatial_lon_max': np.max(ds['LONGITUDE']), - 'date_created': datetime.utcnow().strftime(timeformat), - 'history': datetime.utcnow().strftime(timeformat) + ': Aggregated file created.', + 'geospatial_vertical_min': np.min(ds['DEPTH'][:]), + 'geospatial_vertical_max': np.max(ds['DEPTH'][:]), + 'geospatial_lat_min': np.min(ds['LATITUDE'][:]), + 'geospatial_lat_max': np.max(ds['LATITUDE'][:]), + 'geospatial_lon_min': np.min(ds['LONGITUDE'][:]), + 'geospatial_lon_max': np.max(ds['LONGITUDE'][:]), + 'date_created': current_utc_timestamp(), + 'history': current_utc_timestamp() + ': Aggregated file created.', 'keywords': ', '.join([var_to_agg, 'AGGREGATED']), 'rejected_files': "\n".join(rejected_files), 'generating_code_version': __version__} @@ -346,7 +348,7 @@ def main_aggregator(files_to_agg, var_to_agg, site_code, input_dir='', output_di file_version = 1 output_name = '_'.join(['IMOS', facility_code, data_code, time_start_filename, site_code, ('FV0'+str(file_version)), (var_to_agg + "-" + product_type), - ('END-'+ time_end_filename), 'C-' + datetime.utcnow().strftime(file_timeformat)]) + '.nc' + ('END-'+ time_end_filename), 'C-' + current_utc_timestamp(DATESTAMP_FORMAT)]) + '.nc' ncout_path = os.path.join(output_dir, output_name) shutil.move(temp_outfile, os.path.join(output_dir, ncout_path)) diff --git a/aodntools/timeseries_products/aggregated_timeseries_template.json b/aodntools/timeseries_products/aggregated_timeseries_template.json index 6c34061..3130f2a 100644 --- a/aodntools/timeseries_products/aggregated_timeseries_template.json +++ b/aodntools/timeseries_products/aggregated_timeseries_template.json @@ -289,8 +289,8 @@ "_global":{ "abstract": "Aggregated Time-series Product: This file contains all measurements of the selected variable from all instruments deployed at the selected site. Timestamps are chronologically ordered, but may not be at uniform intervals. Instrument details are stored as a variable in order to keep a record of the origin of each measurement. The quality control flags of the variable of interest and DEPTH are preserved. Out-of-water measurements have been excluded, but no other filtering has been applied to the input data.", "acknowledgement": "Any users of IMOS data are required to clearly acknowledge the source of the material derived from IMOS in the format: \"Data was sourced from the Integrated Marine Observing System (IMOS) - IMOS is a national collaborative research infrastructure, supported by the Australian Government.\" If relevant, also credit other organisations involved in collection of this particular datastream (as listed in 'credit' in the metadata record).", - "author": "Klein, Eduardo", - "author_email": "eduardo.kleinsalas@utas.edu.au", + "author": "Australian Ocean Data Network (AODN)", + "author_email": "info@aodn.org.au", "citation": "The citation in a list of references is: \"IMOS [year-of-data-download], [Title], [data-access-URL], accessed [date-of-access].\".", "Conventions": "CF-1.6,IMOS-1.4", "data_centre": "Australian Ocean Data Network (AODN)", diff --git a/aodntools/timeseries_products/common.py b/aodntools/timeseries_products/common.py index 7a454af..75c75f2 100644 --- a/aodntools/timeseries_products/common.py +++ b/aodntools/timeseries_products/common.py @@ -1,6 +1,12 @@ """Code shared by all timeseries product generating code""" +from datetime import datetime, timezone + import numpy as np +# Common date/time format strings +TIMESTAMP_FORMAT = '%Y-%m-%dT%H:%M:%SZ' +DATESTAMP_FORMAT = '%Y%m%d' + class NoInputFilesError(Exception): """Exception raised if there are no valid input files to aggregate""" @@ -183,4 +189,8 @@ def in_water(nc): :param nc: xarray dataset :return: xarray dataset """ - return nc.where(in_water_index(nc), drop=True) \ No newline at end of file + return nc.where(in_water_index(nc), drop=True) + + +def current_utc_timestamp(format=TIMESTAMP_FORMAT): + return datetime.now(timezone.utc).strftime(format) diff --git a/aodntools/timeseries_products/gridded_timeseries.py b/aodntools/timeseries_products/gridded_timeseries.py index 12a681a..c19506c 100644 --- a/aodntools/timeseries_products/gridded_timeseries.py +++ b/aodntools/timeseries_products/gridded_timeseries.py @@ -3,7 +3,8 @@ import argparse import os.path import json -from datetime import datetime +from datetime import datetime, timezone +from collections import defaultdict import xarray as xr import pandas as pd @@ -11,6 +12,7 @@ from pkg_resources import resource_filename from aodntools import __version__ +from aodntools.timeseries_products.common import current_utc_timestamp, TIMESTAMP_FORMAT, DATESTAMP_FORMAT import aodntools.timeseries_products.aggregated_timeseries as TStools @@ -90,24 +92,28 @@ def write_netCDF_aggfile(agg_dataset, output_path, encoding): return output_path -def set_variableattr(varlist, variable_attribute_dictionary, add_variable_attribute): +def set_variableattr(varlist, variable_attribute_dictionary): """ - set variables variables atributes + Set variable atributes, separate attributes that should be passed to xarray separately as encoding + parameters - :param varlist: list of variable names + :param varlist: list of variable names to pick out :param variable_attribute_dictionary: dictionary of the variable attributes - :param add_variable_attribute: additional attributes to add - :return: dictionary of attributes + :return: tuple (dictionary of attributes, dictionary of encoding attributes) """ - # with open(templatefile) as json_file: - # variable_metadata = json.load(json_file)['_variables'] - variable_attributes = {key: variable_attribute_dictionary[key] for key in varlist} - if len(add_variable_attribute)>0: - for key in add_variable_attribute.keys(): - variable_attributes[key].update(add_variable_attribute[key]) + encoding_attributes = {'_FillValue'} + time_encoding_attributes = {'units', 'calendar'} + variable_attributes = defaultdict(dict) + variable_encodings = defaultdict(dict) + for var in varlist: + for name, value in variable_attribute_dictionary[var].items(): + if name in encoding_attributes or (var == 'TIME' and name in time_encoding_attributes): + variable_encodings[var][name] = value + else: + variable_attributes[var][name] = value - return variable_attributes + return variable_attributes, variable_encodings def generate_netcdf_output_filename(nc, facility_code, data_code, VoI, site_code, product_type, file_version): """ @@ -122,14 +128,12 @@ def generate_netcdf_output_filename(nc, facility_code, data_code, VoI, site_code :return: name of the output file """ - file_timeformat = '%Y%m%d' - if '_' in VoI: VoI = VoI.replace('_', '-') - t_start = pd.to_datetime(nc.TIME.min().values).strftime(file_timeformat) - t_end = pd.to_datetime(nc.TIME.max().values).strftime(file_timeformat) + t_start = pd.to_datetime(nc.TIME.min().values).strftime(DATESTAMP_FORMAT) + t_end = pd.to_datetime(nc.TIME.max().values).strftime(DATESTAMP_FORMAT) - output_name = '_'.join(['IMOS', facility_code, data_code, t_start, site_code, ('FV0'+str(file_version)), (VoI+"-"+product_type), ('END-'+ t_end), 'C-' + datetime.utcnow().strftime(file_timeformat)]) + '.nc' + output_name = '_'.join(['IMOS', facility_code, data_code, t_start, site_code, ('FV0'+str(file_version)), (VoI+"-"+product_type), ('END-'+ t_end), 'C-' + current_utc_timestamp(DATESTAMP_FORMAT)]) + '.nc' return output_name @@ -238,10 +242,7 @@ def grid_variable(input_file, VoI, depth_bins=None, max_separation=50, depth_bin ## set variable attributes varlist = list(VoI_interpolated.variables) - add_variable_attribute = {} - variable_attributes = set_variableattr(varlist, variable_attribute_dictionary, add_variable_attribute) - time_units = variable_attributes['TIME'].pop('units') - time_calendar = variable_attributes['TIME'].pop('calendar') + variable_attributes, encoding = set_variableattr(varlist, variable_attribute_dictionary) for variable in varlist: VoI_interpolated[variable].attrs = variable_attributes[variable] @@ -250,10 +251,9 @@ def grid_variable(input_file, VoI, depth_bins=None, max_separation=50, depth_bin for attr in ('geospatial_lat_min', 'geospatial_lat_max', 'geospatial_lon_min', 'geospatial_lon_max', 'site_code', 'included_values_flagged_as', 'contributor_name', 'contributor_role', 'contributor_email'): VoI_interpolated.attrs[attr] = input_global_attributes[attr] - timeformat = '%Y-%m-%dT%H:%M:%SZ' - date_start = pd.to_datetime(VoI_interpolated.TIME.values.min()).strftime(timeformat) - date_end = pd.to_datetime(VoI_interpolated.TIME.values.max()).strftime(timeformat) - date_created = datetime.utcnow().strftime(timeformat) + date_start = pd.to_datetime(VoI_interpolated.TIME.values.min()).strftime(TIMESTAMP_FORMAT) + date_end = pd.to_datetime(VoI_interpolated.TIME.values.max()).strftime(TIMESTAMP_FORMAT) + date_created = current_utc_timestamp() VoI_interpolated.attrs.update(global_attribute_dictionary) VoI_interpolated.attrs.update({ 'source_file': input_file, @@ -295,22 +295,12 @@ def grid_variable(input_file, VoI, depth_bins=None, max_separation=50, depth_bin file_version=file_version) ncout_path = os.path.join(output_dir, ncout_filename) - encoding = {'TIME': {'_FillValue': None, - 'units': time_units, - 'calendar': time_calendar, - 'zlib': True, - 'complevel': 5}, - VoI: {'zlib': True, - 'complevel': 5, - 'dtype': np.dtype('float32')}, - VoI+'_count': {'dtype': np.dtype('int16'), - 'zlib': True, - 'complevel': 5}, - 'DEPTH': {'dtype': np.dtype('float32'), - 'zlib': True, - 'complevel': 5}, - 'LONGITUDE': {'_FillValue': False}, - 'LATITUDE': {'_FillValue': False}} + # data types and compression for encoding + for var in {'TIME', VoI, VoI+'_count', 'DEPTH'}: + encoding[var].update({'zlib': True, 'complevel': 5}) + encoding[VoI].update({'dtype': np.dtype('float32')}) + encoding[VoI+'_count'].update({'dtype': np.dtype('int16')}) + encoding['DEPTH'].update({'dtype': np.dtype('float32')}) write_netCDF_aggfile(VoI_interpolated, ncout_path, encoding) diff --git a/aodntools/timeseries_products/gridded_timeseries_template.json b/aodntools/timeseries_products/gridded_timeseries_template.json index 784e226..42601f0 100644 --- a/aodntools/timeseries_products/gridded_timeseries_template.json +++ b/aodntools/timeseries_products/gridded_timeseries_template.json @@ -77,8 +77,8 @@ "title": "Gridded Time Series Product: {VoI} interpolated at {site_code} to fixed target depths at 1-hour time intervals, between {time_min} and {time_max} and {depth_min} and {depth_max} meters.", "abstract": "Gridded Time Series Product: This file contains {VoI} readings from all instruments deployed at the {site_code} mooring site. The source of the values is the Hourly Time Series Product where TIME is fixed to 1-hour interval. The variable values are interpolated to a fixed target depths using a linear interpolation between consecutive existing depths. Only values flagged as 1 or 2 are used in the interpolation.", "acknowledgement": "Any users of IMOS data are required to clearly acknowledge the source of the material derived from IMOS in the format: \"Data was sourced from the Integrated Marine Observing System (IMOS) - IMOS is a national collaborative research infrastructure, supported by the Australian Government.\" If relevant, also credit other organisations involved in collection of this particular datastream (as listed in 'credit' in the metadata record).", - "author": "Klein, Eduardo", - "author_email": "eduardo.kleinsalas@utas.edu.au", + "author": "Australian Ocean Data Network (AODN)", + "author_email": "info@aodn.org.au", "citation": "The citation in a list of references is: \"IMOS [year-of-data-download], [Title], [data-access-URL], accessed [date-of-access].\".", "Conventions": "CF-1.6,IMOS-1.4", "data_centre": "Australian Ocean Data Network (AODN)", diff --git a/aodntools/timeseries_products/hourly_timeseries.py b/aodntools/timeseries_products/hourly_timeseries.py index 66e5292..bc07953 100644 --- a/aodntools/timeseries_products/hourly_timeseries.py +++ b/aodntools/timeseries_products/hourly_timeseries.py @@ -4,7 +4,6 @@ import json import os.path from collections import OrderedDict -from datetime import datetime import numpy as np import pandas as pd @@ -14,7 +13,8 @@ from aodntools import __version__ from aodntools.timeseries_products import aggregated_timeseries as utils -from aodntools.timeseries_products.common import NoInputFilesError, check_file, get_qc_variable_names, in_water +from aodntools.timeseries_products.common import (NoInputFilesError, check_file, get_qc_variable_names, in_water, + current_utc_timestamp, TIMESTAMP_FORMAT, DATESTAMP_FORMAT) TEMPLATE_JSON = resource_filename(__name__, 'hourly_timeseries_template.json') BINNING_METHOD_JSON = resource_filename(__name__, 'binning_method.json') @@ -180,8 +180,8 @@ def set_globalattr(nc_aggregated, templatefile, site_code, add_attribute, parame 'geospatial_lat_max': nc_aggregated.LATITUDE.values.max(), 'geospatial_lon_min': nc_aggregated.LONGITUDE.values.min(), 'geospatial_lon_max': nc_aggregated.LONGITUDE.values.max(), - 'date_created': datetime.utcnow().strftime(timeformat), - 'history': datetime.utcnow().strftime(timeformat) + ': Hourly aggregated file created.', + 'date_created': current_utc_timestamp(), + 'history': current_utc_timestamp() + ': Hourly aggregated file created.', 'keywords': ', '.join(parameter_names + ['HOURLY', 'AGGREGATED'])} global_metadata.update(agg_attr) global_metadata.update(add_attribute) @@ -259,14 +259,12 @@ def generate_netcdf_output_filename(nc, facility_code, data_code, site_code, pro :return: name of the output file """ - file_timeformat = '%Y%m%d' - - t_start = pd.to_datetime(nc.TIME.min().values).strftime(file_timeformat) - t_end = pd.to_datetime(nc.TIME.max().values).strftime(file_timeformat) + t_start = pd.to_datetime(nc.TIME.min().values).strftime(DATESTAMP_FORMAT) + t_end = pd.to_datetime(nc.TIME.max().values).strftime(DATESTAMP_FORMAT) output_name = '_'.join( ['IMOS', facility_code, data_code, t_start, site_code, ('FV0' + str(file_version)), product_type, - ('END-' + t_end), 'C-' + datetime.utcnow().strftime(file_timeformat)]) + '.nc' + ('END-' + t_end), 'C-' + current_utc_timestamp(DATESTAMP_FORMAT)]) + '.nc' return output_name @@ -412,8 +410,11 @@ def hourly_aggregator(files_to_aggregate, site_code, qcflags, input_dir='', outp # https://stackoverflow.com/questions/55786995/converting-cftime-datetimejulian-to-datetime/55787899#55787899 if isinstance(nc_clean.indexes['TIME'], xr.coding.cftimeindex.CFTimeIndex): nc_clean['TIME'] = nc_clean.indexes['TIME'].to_datetimeindex() - df_temp = nc_clean.to_dataframe() + df_temp = nc_clean[parameter_names].to_dataframe() + ## keep TIME as the only index (for ADCP files it would be a MultiIndex at this point) + df_temp.reset_index(inplace=True) + df_temp.set_index('TIME', inplace=True) df_temp = df_temp[parameter_names] df_temp = PDresample_by_hour(df_temp, function_dict, function_stats) # do the magic @@ -565,4 +566,4 @@ def hourly_aggregator(files_to_aggregate, site_code, qcflags, input_dir='', outp qcflags = [int(i) for i in args.qcflags] hourly_aggregator(files_to_aggregate=files_to_aggregate, site_code=args.site_code, qcflags=qcflags, - input_dir=args.input_dir, output_dir=args.output_path) + input_dir=args.input_dir, output_dir=args.output_dir) diff --git a/aodntools/timeseries_products/hourly_timeseries_template.json b/aodntools/timeseries_products/hourly_timeseries_template.json index 2d3aa0f..ae5c99f 100644 --- a/aodntools/timeseries_products/hourly_timeseries_template.json +++ b/aodntools/timeseries_products/hourly_timeseries_template.json @@ -368,8 +368,8 @@ "_global":{ "abstract": "Hourly Time Series Product: This file contains selected variables from all instruments deployed at the {site_code} mooring site. The values are binned to a fixed 1-hour interval. Instrument details are stored as variables in order to keep a record of the origin of each measurement. Out-of-water measurements have been excluded. Only values flagged as {flags} are retained in the aggregation.", "acknowledgement": "Any users of IMOS data are required to clearly acknowledge the source of the material derived from IMOS in the format: \"Data was sourced from the Integrated Marine Observing System (IMOS) - IMOS is a national collaborative research infrastructure, supported by the Australian Government.\" If relevant, also credit other organisations involved in collection of this particular datastream (as listed in 'credit' in the metadata record).", - "author": "Klein, Eduardo", - "author_email": "eduardo.kleinsalas@utas.edu.au", + "author": "Australian Ocean Data Network (AODN)", + "author_email": "info@aodn.org.au", "citation": "The citation in a list of references is: \"IMOS [year-of-data-download], [Title], [data-access-URL], accessed [date-of-access].\".", "Conventions": "CF-1.6,IMOS-1.4", "data_centre": "Australian Ocean Data Network (AODN)", diff --git a/aodntools/timeseries_products/velocity_aggregated_timeseries.py b/aodntools/timeseries_products/velocity_aggregated_timeseries.py index e04c58b..b5649ad 100644 --- a/aodntools/timeseries_products/velocity_aggregated_timeseries.py +++ b/aodntools/timeseries_products/velocity_aggregated_timeseries.py @@ -1,10 +1,11 @@ import os import tempfile import shutil +from copy import deepcopy + from netCDF4 import Dataset, num2date, stringtochar import numpy as np import json -from datetime import datetime import argparse from pkg_resources import resource_filename from aodntools import __version__ @@ -12,7 +13,8 @@ import xarray as xr from aodntools.timeseries_products import aggregated_timeseries as utils -from aodntools.timeseries_products.common import NoInputFilesError, check_velocity_file +from aodntools.timeseries_products.common import (NoInputFilesError, check_velocity_file, current_utc_timestamp, + TIMESTAMP_FORMAT, DATESTAMP_FORMAT) TEMPLATE_JSON = resource_filename(__name__, 'velocity_aggregated_timeseries_template.json') @@ -69,8 +71,8 @@ def velocity_aggregated(files_to_agg, site_code, input_dir='', output_dir='./', bad_files = {} # default name for temporary file. It will be renamed at the end - _, temp_outfile = tempfile.mkstemp(suffix='.nc', dir=output_dir) - + fd, temp_outfile = tempfile.mkstemp(suffix='.nc', dir=output_dir) + os.close(fd) ## check files and get total number of flattened obs n_obs_total = 0 for file in files_to_agg: @@ -83,6 +85,7 @@ def velocity_aggregated(files_to_agg, site_code, input_dir='', output_dir='./', bad_files.update({file: error_list}) # remove bad files form the list and sort in chronological order + files_to_agg = deepcopy(files_to_agg) for file in bad_files.keys(): files_to_agg.remove(file) if len(files_to_agg) == 0: @@ -144,7 +147,7 @@ def velocity_aggregated(files_to_agg, site_code, input_dir='', output_dir='./', WCUR[start:end] = flat_variable(nc, 'WCUR') WCURqc[start:end] = flat_variable(nc, 'WCUR_quality_control') else: - WCUR[start:end] = np.full(n_obs, np.nan) + WCUR[start:end] = np.ma.masked WCURqc[start:end] = np.full(n_obs, 9) ##calculate depth and add CELL_INDEX @@ -188,13 +191,10 @@ def velocity_aggregated(files_to_agg, site_code, input_dir='', output_dir='./', ds['source_file'].setncatts(utils.source_file_attributes(download_url_prefix, opendap_url_prefix)) ## set global attrs - timeformat = '%Y-%m-%dT%H:%M:%SZ' - file_timeformat = '%Y%m%d' - - time_start = num2date(np.min(TIME[:]), time_units, time_calendar).strftime(timeformat) - time_end = num2date(np.max(TIME[:]), time_units, time_calendar).strftime(timeformat) - time_start_filename = num2date(np.min(TIME[:]), time_units, time_calendar).strftime(file_timeformat) - time_end_filename = num2date(np.max(TIME[:]), time_units, time_calendar).strftime(file_timeformat) + time_start = num2date(np.min(TIME[:]), time_units, time_calendar).strftime(TIMESTAMP_FORMAT) + time_end = num2date(np.max(TIME[:]), time_units, time_calendar).strftime(TIMESTAMP_FORMAT) + time_start_filename = num2date(np.min(TIME[:]), time_units, time_calendar).strftime(DATESTAMP_FORMAT) + time_end_filename = num2date(np.max(TIME[:]), time_units, time_calendar).strftime(DATESTAMP_FORMAT) add_attribute = { 'title': ("Long Timeseries Velocity Aggregated product: " + ', '.join(varlist) + " at " + @@ -208,8 +208,8 @@ def velocity_aggregated(files_to_agg, site_code, input_dir='', output_dir='./', 'geospatial_lat_max': np.max(ds['LATITUDE']), 'geospatial_lon_min': np.min(ds['LONGITUDE']), 'geospatial_lon_max': np.max(ds['LONGITUDE']), - 'date_created': datetime.utcnow().strftime(timeformat), - 'history': datetime.utcnow().strftime(timeformat) + ': Aggregated file created.', + 'date_created': current_utc_timestamp(), + 'history': current_utc_timestamp() + ': Aggregated file created.', 'keywords': ', '.join(varlist + ['AGGREGATED']), 'rejected_files': "\n".join(bad_files.keys()), 'generating_code_version': __version__ @@ -235,8 +235,9 @@ def velocity_aggregated(files_to_agg, site_code, input_dir='', output_dir='./', file_version = 1 output_name = '_'.join(['IMOS', facility_code, data_code, time_start_filename, site_code, ('FV0'+str(file_version)), ("velocity-"+product_type), - ('END-'+ time_end_filename), 'C-' + datetime.utcnow().strftime(file_timeformat)]) + '.nc' + ('END-'+ time_end_filename), 'C-' + current_utc_timestamp(DATESTAMP_FORMAT)]) + '.nc' ncout_path = os.path.join(output_dir, output_name) + shutil.move(temp_outfile, ncout_path) diff --git a/aodntools/timeseries_products/velocity_aggregated_timeseries_template.json b/aodntools/timeseries_products/velocity_aggregated_timeseries_template.json index 7ef5fc5..eb87a20 100644 --- a/aodntools/timeseries_products/velocity_aggregated_timeseries_template.json +++ b/aodntools/timeseries_products/velocity_aggregated_timeseries_template.json @@ -153,8 +153,8 @@ "_global":{ "abstract": "Velocity Aggregated Time-series Product: This file contains all measurements of UCUR, VCUR and WCUR from all instruments deployed at the selected site. Timestamps are chronologically ordered, but may not be at uniform intervals. Measurements are referenced to its absolute DEPTH. Instrument details are stored as a variable in order to keep a record of the origin of each measurement. The quality control flags of the variable of interest and DEPTH are preserved. Out-of-water measurements have been excluded, but no other filtering has been applied to the input data.", "acknowledgement": "Any users of IMOS data are required to clearly acknowledge the source of the material derived from IMOS in the format: \"Data was sourced from the Integrated Marine Observing System (IMOS) - IMOS is a national collaborative research infrastructure, supported by the Australian Government.\" If relevant, also credit other organisations involved in collection of this particular datastream (as listed in 'credit' in the metadata record).", - "author": "Klein, Eduardo", - "author_email": "eduardo.kleinsalas@utas.edu.au", + "author": "Australian Ocean Data Network (AODN)", + "author_email": "info@aodn.org.au", "citation": "The citation in a list of references is: \"IMOS [year-of-data-download], [Title], [data-access-URL], accessed [date-of-access].\".", "Conventions": "CF-1.6,IMOS-1.4", "data_centre": "Australian Ocean Data Network (AODN)", diff --git a/aodntools/timeseries_products/velocity_hourly_timeseries.py b/aodntools/timeseries_products/velocity_hourly_timeseries.py index 35cc60f..fd12b49 100644 --- a/aodntools/timeseries_products/velocity_hourly_timeseries.py +++ b/aodntools/timeseries_products/velocity_hourly_timeseries.py @@ -3,7 +3,7 @@ import os import shutil import tempfile -from datetime import datetime +from copy import deepcopy import numpy as np import pandas as pd @@ -13,7 +13,8 @@ import aodntools.timeseries_products.aggregated_timeseries as utils from aodntools import __version__ -from aodntools.timeseries_products.common import NoInputFilesError, check_velocity_file +from aodntools.timeseries_products.common import (NoInputFilesError, check_velocity_file, current_utc_timestamp, + TIMESTAMP_FORMAT, DATESTAMP_FORMAT) TEMPLATE_JSON = resource_filename(__name__, 'velocity_hourly_timeseries_template.json') QC_FLAG_MAX = 2 @@ -32,16 +33,15 @@ def cell_velocity_resample(df, binning_function): :return: binned U, v, W CUR according to the binning function """ df_binned = df.apply(binning_function) - UCUR = np.array(df_binned['UCUR']) - VCUR = np.array(df_binned['VCUR']) - if 'WCUR' in df_binned: - WCUR = np.array(df_binned['WCUR']) - else: - WCUR = np.full(len(df), np.nan) - DEPTH = np.array(df_binned['DEPTH']) - - return UCUR, VCUR, WCUR, DEPTH + binned_vars = [] + for var in ('UCUR', 'VCUR', 'WCUR', 'DEPTH'): + if var in df_binned: + x = np.ma.masked_array(df_binned[var], mask=np.isnan(df_binned[var])) + else: + x = np.ma.masked + binned_vars.append(x) + return tuple(binned_vars) def append_resampled_values(nc_cell, ds, slice_start, binning_functions): """ @@ -54,7 +54,7 @@ def append_resampled_values(nc_cell, ds, slice_start, binning_functions): :param binning_functions: list of numpy function names for binning :return: end index of the slice """ - df_cell = nc_cell.squeeze().to_dataframe() + df_cell = nc_cell.to_dataframe().reset_index().set_index('TIME') # shift the index forward 30min to centre the bins on the hour df_cell.index = df_cell.index + pd.Timedelta(minutes=30) @@ -105,8 +105,8 @@ def velocity_hourly_aggregated(files_to_agg, site_code, input_dir='', output_dir chunk_size = 90 ## size in days ## default name for temporary file. It will be renamed at the end - _, temp_outfile = tempfile.mkstemp(suffix='.nc', dir=output_dir) - + fd, temp_outfile = tempfile.mkstemp(suffix='.nc', dir=output_dir) + os.close(fd) ## check files and get total number of flattened obs print("CHECKING FILES...") for index, file in enumerate(files_to_agg): @@ -118,6 +118,7 @@ def velocity_hourly_aggregated(files_to_agg, site_code, input_dir='', output_dir print(" ") ## remove bad files form the list + files_to_agg = deepcopy(files_to_agg) for file in bad_files.keys(): files_to_agg.remove(file) if len(files_to_agg) == 0: @@ -186,16 +187,16 @@ def velocity_hourly_aggregated(files_to_agg, site_code, input_dir='', output_dir with xr.open_dataset(os.path.join(input_dir, file)) as nc: is_2D = 'HEIGHT_ABOVE_SENSOR' in list(nc.variables) + varlist_nc = [v for v in varlist if v in nc.variables.keys()] ## mask values with QC flag>2 - for var in varlist: + for var in varlist_nc: nc[var] = nc[var].where(nc[var+'_quality_control'] <= QC_FLAG_MAX) ## process in chunks ## in water only - chunk_start = np.datetime64(nc.attrs['time_deployment_start']) - chunk_end = np.datetime64(nc.attrs['time_deployment_end']) - + chunk_start = max(np.datetime64(nc.attrs['time_deployment_start']), nc.TIME.data.min()) + chunk_end = min(np.datetime64(nc.attrs['time_deployment_end']), nc.TIME.data.max()) time_increment = 60*60*24*chunk_size ## secs x mins x hours x days chunk_increment = np.timedelta64(time_increment, 's') chunk_partial = chunk_start + chunk_increment @@ -210,12 +211,12 @@ def velocity_hourly_aggregated(files_to_agg, site_code, input_dir='', output_dir nc_cell = nc_chunk.sel(HEIGHT_ABOVE_SENSOR=cell_height) ## convert to absolute DEPTH nc_cell['DEPTH'] = nc_cell['DEPTH'] - cell_height - slice_end = append_resampled_values(nc_cell[varlist], ds, slice_start, binning_fun) + slice_end = append_resampled_values(nc_cell[varlist_nc], ds, slice_start, binning_fun) CELL_INDEX[slice_start:slice_end] = np.full(slice_end - slice_start, cell_idx, dtype=np.uint32) slice_start = slice_end else: - slice_end = append_resampled_values(nc_chunk[varlist], ds, slice_start, binning_fun) + slice_end = append_resampled_values(nc_chunk[varlist_nc], ds, slice_start, binning_fun) CELL_INDEX[slice_start:slice_end] = np.full(slice_end - slice_start, 0, dtype=np.uint32) slice_start = slice_end @@ -252,13 +253,10 @@ def velocity_hourly_aggregated(files_to_agg, site_code, input_dir='', output_dir ds['source_file'].setncatts(utils.source_file_attributes(download_url_prefix, opendap_url_prefix)) ## set global attrs - timeformat = '%Y-%m-%dT%H:%M:%SZ' - file_timeformat = '%Y%m%d' - - time_start = num2date(np.min(TIME[:]), TIME_UNITS, TIME_CALENDAR).strftime(timeformat) - time_end = num2date(np.max(TIME[:]), TIME_UNITS, TIME_CALENDAR).strftime(timeformat) - time_start_filename = num2date(np.min(TIME[:]), TIME_UNITS, TIME_CALENDAR).strftime(file_timeformat) - time_end_filename = num2date(np.max(TIME[:]), TIME_UNITS, TIME_CALENDAR).strftime(file_timeformat) + time_start = num2date(np.min(TIME[:]), TIME_UNITS, TIME_CALENDAR).strftime(TIMESTAMP_FORMAT) + time_end = num2date(np.max(TIME[:]), TIME_UNITS, TIME_CALENDAR).strftime(TIMESTAMP_FORMAT) + time_start_filename = num2date(np.min(TIME[:]), TIME_UNITS, TIME_CALENDAR).strftime(DATESTAMP_FORMAT) + time_end_filename = num2date(np.max(TIME[:]), TIME_UNITS, TIME_CALENDAR).strftime(DATESTAMP_FORMAT) add_attribute = { @@ -273,8 +271,8 @@ def velocity_hourly_aggregated(files_to_agg, site_code, input_dir='', output_dir 'geospatial_lat_max': np.float64(np.max(ds['LATITUDE'])), 'geospatial_lon_min': np.float64(np.min(ds['LONGITUDE'])), 'geospatial_lon_max': np.float64(np.max(ds['LONGITUDE'])), - 'date_created': datetime.utcnow().strftime(timeformat), - 'history': datetime.utcnow().strftime(timeformat) + ': Aggregated file created.', + 'date_created': current_utc_timestamp(), + 'history': current_utc_timestamp() + ': Aggregated file created.', 'keywords': ', '.join(varlist + ['AGGREGATED']), 'rejected_files': "\n".join(bad_files.keys()), 'generating_code_version': __version__ @@ -306,7 +304,7 @@ def velocity_hourly_aggregated(files_to_agg, site_code, input_dir='', output_dir file_version = 2 output_name = '_'.join(['IMOS', facility_code, data_code, time_start_filename, site_code, ('FV0'+str(file_version)), ("velocity-"+product_type), - ('END-'+ time_end_filename), 'C-' + datetime.utcnow().strftime(file_timeformat)]) + '.nc' + ('END-'+ time_end_filename), 'C-' + current_utc_timestamp(DATESTAMP_FORMAT)]) + '.nc' ncout_path = os.path.join(output_dir, output_name) shutil.move(temp_outfile, ncout_path) diff --git a/aodntools/timeseries_products/velocity_hourly_timeseries_template.json b/aodntools/timeseries_products/velocity_hourly_timeseries_template.json index 37c47e0..700bae2 100644 --- a/aodntools/timeseries_products/velocity_hourly_timeseries_template.json +++ b/aodntools/timeseries_products/velocity_hourly_timeseries_template.json @@ -195,7 +195,7 @@ "_global":{ "abstract": "Hourly Time-series Product: This file contains all measurements of quality-controlled U, V and W sea water velocity variables from all instruments deployed at the selected site, binned into 1-hour time intervals. Out-of-water measurements, and those flagged as bad by IMOS standard automated quality-control procedures, have been excluded. Timestamps in the input files indicate the start of each measurement interval (up to an hour in duration), and these have not been shifted to the centre of the interval before binning. Instrument details are stored as variables in order to keep a record of the origin of each measurement.", "acknowledgement": "Any users of IMOS data are required to clearly acknowledge the source of the material derived from IMOS in the format: \"Data was sourced from the Integrated Marine Observing System (IMOS) - IMOS is a national collaborative research infrastructure, supported by the Australian Government.\" If relevant, also credit other organisations involved in collection of this particular datastream (as listed in 'credit' in the metadata record).", - "author": "Klein, Eduardo", + "author": "Australian Ocean Data Network (AODN)", "author_email": "info@aodn.org.au", "citation": "The citation in a list of references is: \"IMOS [year-of-data-download], [Title], [data-access-URL], accessed [date-of-access].\".", "comment": "Timestamps in the input files indicate the start of each measurement interval (instrument-dependent; up to an hour in duration), and these have not been shifted to the centre of the interval before binning. This could lead to an artificial shift of up to half an hour in the output data. The size of this shift, where known, has been recorded in the SECONDS_TO_MIDDLE variable.", diff --git a/bumpversion.sh b/bumpversion.sh index 4559475..282027e 100755 --- a/bumpversion.sh +++ b/bumpversion.sh @@ -4,9 +4,9 @@ set -euxo pipefail main() { git fetch --prune origin "+refs/tags/*:refs/tags/*" - OLD_VERSION=$(git tag -l '*.*.*' --sort=-version:refname | head -n 1) - NEW_VERSION=$(bump2version --current-version $OLD_VERSION --list --tag --commit --allow-dirty patch | grep -oP '^new_version=\K.*$') - git push origin tag $NEW_VERSION + bump2version --current-version $(git describe) \ + --tag --tag-name {new_version} --tag-message 'Bump version to {new_version}' patch + git push --tags exit 0 } diff --git a/docker-compose.yml b/docker-compose.yml index 4ac178b..f487b1f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,7 @@ version: '2' services: dev: - image: ncwriter-build + image: aodntools-build build: context: . args: diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..ef3da94 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,5 @@ +[build-system] +requires = ["setuptools>=42", "wheel", "setuptools_scm[toml]>=3.4"] + +[tool.setuptools_scm] +write_to = "aodntools/_version.py" diff --git a/setup.py b/setup.py index be44198..cbe40b9 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,8 @@ ] TESTS_REQUIRE = [ - 'pytest' + 'pytest', + 'setuptools_scm', ] EXTRAS_REQUIRE = { @@ -26,7 +27,8 @@ setup( name=PACKAGE_NAME, - version='0.0.0', + use_scm_version=True, + setup_requires=['setuptools_scm'], packages=find_packages(exclude=PACKAGE_EXCLUDES), package_data=PACKAGE_DATA, url='https://github.com/aodn', diff --git a/test_aodntools/base_test.py b/test_aodntools/base_test.py index f8d2901..8f4548b 100644 --- a/test_aodntools/base_test.py +++ b/test_aodntools/base_test.py @@ -2,8 +2,12 @@ import tempfile import unittest +import numpy as np +from netCDF4 import Dataset + class BaseTestCase(unittest.TestCase): + EXPECTED_OUTPUT_FILE = None @property def temp_dir(self): @@ -22,3 +26,59 @@ def temp_nc_file(self): def tearDown(self): if hasattr(self, '_temp_dir'): shutil.rmtree(self._temp_dir) + + def compare_global_attributes(self, dataset, + attrs = ('geospatial_lat_max', 'geospatial_lat_min', + 'geospatial_lon_max', 'geospatial_lon_min', + 'geospatial_vertical_max', 'geospatial_vertical_min', + 'time_coverage_start', 'time_coverage_end' + ) + ): + "Compare global attributes of the given dataset with those in self.EXPECTED_OUTPUT_FILE" + + not_matching = [] + with Dataset(self.EXPECTED_OUTPUT_FILE) as expected: + for attr in attrs: + if dataset.getncattr(attr) != expected.getncattr(attr): + not_matching.append((attr, + "expected: {exp}; found: {found}".format(exp=dataset.getncattr(attr), + found=dataset.getncattr(attr)) + )) + + self.assertEqual([], not_matching) + + def check_nan_values(self, dataset): + "check that there are no NaN values in any variable (they should be fill values instead)" + nan_vars = [(name, "contains NaN values") + for name, var in dataset.variables.items() + if var.dtype in (np.dtype('float32'), np.dtype('float64')) and np.isnan(var[:]).any() + ] + self.assertEqual([], nan_vars) + + def compare_variables(self, dataset, skip_vars=('source_file', 'instrument_id')): + """Compare dimensions and values of all variables in dataset with those in self.EXPECTED_OUTPUT_FILE, + except for variables listed in skip_vars. + """ + + def _arrays_equal(testvar, expected): + """compare two numpy arrays, handling the case of scalar variables""" + if expected.shape == (): + if np.isclose(testvar, expected): + return True + elif (np.isclose(testvar, expected)).all(): + return True + return False + + differences = [] + with Dataset(self.EXPECTED_OUTPUT_FILE) as expected: + for var in set(expected.variables.keys()) - set(skip_vars): + if not dataset[var].dimensions == expected[var].dimensions: + differences.append((var, "dimensions differ")) + if not dataset[var].shape == expected[var].shape: + differences.append((var, "shapes differ")) + + # compare the raw data arrays (not the masked_array) + if not _arrays_equal(dataset[var][:].data, expected[var][:].data): + differences.append((var, "variable values differ")) + + self.assertEqual([], differences) diff --git a/test_aodntools/timeseries_products/IMOS_ANMN-NRS_ADCP_LAT_LON_DIMS.nc b/test_aodntools/timeseries_products/IMOS_ANMN-NRS_ADCP_LAT_LON_DIMS.nc new file mode 100644 index 0000000..0578737 Binary files /dev/null and b/test_aodntools/timeseries_products/IMOS_ANMN-NRS_ADCP_LAT_LON_DIMS.nc differ diff --git a/test_aodntools/timeseries_products/IMOS_ANMN-NRS_ADCP_SINGLE_TIMESTAMP.nc b/test_aodntools/timeseries_products/IMOS_ANMN-NRS_ADCP_SINGLE_TIMESTAMP.nc new file mode 100644 index 0000000..62bc5d6 Binary files /dev/null and b/test_aodntools/timeseries_products/IMOS_ANMN-NRS_ADCP_SINGLE_TIMESTAMP.nc differ diff --git a/test_aodntools/timeseries_products/IMOS_ANMN-NRS_AETVZ_20191016T080000Z_NRSROT-ADCP_FV01_NRSROT-ADCP-1910-Sentinel-or-Monitor-Workhorse-ADCP-44_END-20191018T100000Z_C-20200430T000000Z.nc b/test_aodntools/timeseries_products/IMOS_ANMN-NRS_AETVZ_20191016T080000Z_NRSROT-ADCP_FV01_NRSROT-ADCP-1910-Sentinel-or-Monitor-Workhorse-ADCP-44_END-20191018T100000Z_C-20200430T000000Z.nc index 30205a2..44b8728 100644 Binary files a/test_aodntools/timeseries_products/IMOS_ANMN-NRS_AETVZ_20191016T080000Z_NRSROT-ADCP_FV01_NRSROT-ADCP-1910-Sentinel-or-Monitor-Workhorse-ADCP-44_END-20191018T100000Z_C-20200430T000000Z.nc and b/test_aodntools/timeseries_products/IMOS_ANMN-NRS_AETVZ_20191016T080000Z_NRSROT-ADCP_FV01_NRSROT-ADCP-1910-Sentinel-or-Monitor-Workhorse-ADCP-44_END-20191018T100000Z_C-20200430T000000Z.nc differ diff --git a/test_aodntools/timeseries_products/IMOS_ANMN-NRS_TZ_20181213_NRSROT_FV01_TEMP-aggregated-timeseries_END-20190523_C-20200622.nc b/test_aodntools/timeseries_products/IMOS_ANMN-NRS_TZ_20181213_NRSROT_FV01_TEMP-aggregated-timeseries_END-20190523_C-20220607.nc similarity index 66% rename from test_aodntools/timeseries_products/IMOS_ANMN-NRS_TZ_20181213_NRSROT_FV01_TEMP-aggregated-timeseries_END-20190523_C-20200622.nc rename to test_aodntools/timeseries_products/IMOS_ANMN-NRS_TZ_20181213_NRSROT_FV01_TEMP-aggregated-timeseries_END-20190523_C-20220607.nc index 5a1d4fa..32060f3 100644 Binary files a/test_aodntools/timeseries_products/IMOS_ANMN-NRS_TZ_20181213_NRSROT_FV01_TEMP-aggregated-timeseries_END-20190523_C-20200622.nc and b/test_aodntools/timeseries_products/IMOS_ANMN-NRS_TZ_20181213_NRSROT_FV01_TEMP-aggregated-timeseries_END-20190523_C-20220607.nc differ diff --git a/test_aodntools/timeseries_products/IMOS_ANMN-NRS_TZ_20181213_NRSROT_FV02_TEMP-gridded-timeseries_END-20190523_C-20230110.nc b/test_aodntools/timeseries_products/IMOS_ANMN-NRS_TZ_20181213_NRSROT_FV02_TEMP-gridded-timeseries_END-20190523_C-20230110.nc new file mode 100644 index 0000000..4c64112 Binary files /dev/null and b/test_aodntools/timeseries_products/IMOS_ANMN-NRS_TZ_20181213_NRSROT_FV02_TEMP-gridded-timeseries_END-20190523_C-20230110.nc differ diff --git a/test_aodntools/timeseries_products/IMOS_ANMN-NRS_VZ_20180816_NRSROT_FV01_velocity-aggregated-timeseries_END-20191018_C-20200623.nc b/test_aodntools/timeseries_products/IMOS_ANMN-NRS_VZ_20180816_NRSROT_FV01_velocity-aggregated-timeseries_END-20191018_C-20200623.nc index cba1b91..14c0539 100644 Binary files a/test_aodntools/timeseries_products/IMOS_ANMN-NRS_VZ_20180816_NRSROT_FV01_velocity-aggregated-timeseries_END-20191018_C-20200623.nc and b/test_aodntools/timeseries_products/IMOS_ANMN-NRS_VZ_20180816_NRSROT_FV01_velocity-aggregated-timeseries_END-20191018_C-20200623.nc differ diff --git a/test_aodntools/timeseries_products/IMOS_ANMN-NRS_VZ_20180816_NRSROT_FV02_velocity-hourly-timeseries_END-20191018_C-20220502.nc b/test_aodntools/timeseries_products/IMOS_ANMN-NRS_VZ_20180816_NRSROT_FV02_velocity-hourly-timeseries_END-20191018_C-20220608.nc similarity index 69% rename from test_aodntools/timeseries_products/IMOS_ANMN-NRS_VZ_20180816_NRSROT_FV02_velocity-hourly-timeseries_END-20191018_C-20220502.nc rename to test_aodntools/timeseries_products/IMOS_ANMN-NRS_VZ_20180816_NRSROT_FV02_velocity-hourly-timeseries_END-20191018_C-20220608.nc index 48433ee..0677047 100644 Binary files a/test_aodntools/timeseries_products/IMOS_ANMN-NRS_VZ_20180816_NRSROT_FV02_velocity-hourly-timeseries_END-20191018_C-20220502.nc and b/test_aodntools/timeseries_products/IMOS_ANMN-NRS_VZ_20180816_NRSROT_FV02_velocity-hourly-timeseries_END-20191018_C-20220608.nc differ diff --git a/test_aodntools/timeseries_products/test_aggregated_timeseries.py b/test_aodntools/timeseries_products/test_aggregated_timeseries.py index 2055efd..9f79d6c 100644 --- a/test_aodntools/timeseries_products/test_aggregated_timeseries.py +++ b/test_aodntools/timeseries_products/test_aggregated_timeseries.py @@ -18,16 +18,18 @@ 'IMOS_ANMN-NRS_BCKOSTUZ_20181213T080038Z_NRSROT_FV01_NRSROT-1812-WQM-55_END-20181215T013118Z_C-20190828T000000Z.nc', BAD_FILE ] -EXPECTED_OUTPUT_FILE = os.path.join( - TEST_ROOT, 'IMOS_ANMN-NRS_TZ_20181213_NRSROT_FV01_TEMP-aggregated-timeseries_END-20190523_C-20200622.nc' -) class TestAggregatedTimeseries(BaseTestCase): + EXPECTED_OUTPUT_FILE = os.path.join( + TEST_ROOT, 'IMOS_ANMN-NRS_TZ_20181213_NRSROT_FV01_TEMP-aggregated-timeseries_END-20190523_C-20220607.nc' + ) + def test_main_aggregator(self): output_file, bad_files = main_aggregator(INPUT_FILES, 'TEMP', 'NRSROT', input_dir=TEST_ROOT, output_dir='/tmp') + self.assertEqual(4, len(INPUT_FILES)) self.assertEqual(1, len(bad_files)) for file, errors in bad_files.items(): self.assertEqual(BAD_FILE, file) @@ -69,13 +71,11 @@ def test_main_aggregator(self): self.assertIn(__version__, dataset.lineage) self.assertIn(BAD_FILE, dataset.rejected_files) - # check aggregated variable values - expected = Dataset(EXPECTED_OUTPUT_FILE) - compare_vars = ('TIME', 'TEMP', 'TEMP_quality_control', 'NOMINAL_DEPTH', 'instrument_index') - non_match_vars = [var for var in compare_vars - if not all(dataset[var][:] == expected[var][:]) - ] - self.assertEqual(non_match_vars, []) + self.compare_global_attributes(dataset) + + self.check_nan_values(dataset) + + self.compare_variables(dataset) def test_source_file_attributes(self): output_file, bad_files = main_aggregator(INPUT_FILES, 'PSAL', 'NRSROT', input_dir=TEST_ROOT, diff --git a/test_aodntools/timeseries_products/test_gridded_timeseries.py b/test_aodntools/timeseries_products/test_gridded_timeseries.py new file mode 100644 index 0000000..a93919f --- /dev/null +++ b/test_aodntools/timeseries_products/test_gridded_timeseries.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 + +import os +import unittest + +from netCDF4 import Dataset + +from test_aodntools.base_test import BaseTestCase +from aodntools import __version__ +from aodntools.timeseries_products.gridded_timeseries import grid_variable + + +TEST_ROOT = os.path.dirname(__file__) +INPUT_FILE = 'IMOS_ANMN-NRS_STZ_20181213_NRSROT_FV02_hourly-timeseries_END-20190523_C-20220428.nc' + + +class TestGriddedTimeseries(BaseTestCase): + EXPECTED_OUTPUT_FILE = os.path.join( + TEST_ROOT, 'IMOS_ANMN-NRS_TZ_20181213_NRSROT_FV02_TEMP-gridded-timeseries_END-20190523_C-20230110.nc' + ) + + def test_grid_variable(self): + output_file = grid_variable(INPUT_FILE, 'TEMP', input_dir=TEST_ROOT, output_dir='/tmp') + + self.assertRegex(output_file, + r'IMOS_ANMN-NRS_TZ_20181213_NRSROT_FV02_TEMP-gridded-timeseries_END-20190523_C-\d{8}\.nc' + ) + + dataset = Dataset(output_file) + self.assertSetEqual(set(dataset.dimensions), {'TIME', 'DEPTH'}) + self.assertSetEqual(set(dataset.variables.keys()), + {'TIME', 'DEPTH', 'LATITUDE', 'LONGITUDE', 'TEMP', 'TEMP_count'}) + + # check metadata + self.assertEqual(__version__, dataset.generating_code_version) + self.assertIn(__version__, dataset.lineage) + self.assertIn('gridded_timeseries.py', dataset.lineage) + self.assertIn(INPUT_FILE, dataset.source_file) + + self.compare_global_attributes(dataset) + + self.check_nan_values(dataset) + + self.compare_variables(dataset) + + +if __name__ == '__main__': + unittest.main() diff --git a/test_aodntools/timeseries_products/test_hourly_timeseries.py b/test_aodntools/timeseries_products/test_hourly_timeseries.py index 07ad35b..9e6a7d4 100644 --- a/test_aodntools/timeseries_products/test_hourly_timeseries.py +++ b/test_aodntools/timeseries_products/test_hourly_timeseries.py @@ -20,9 +20,6 @@ BAD_FILE ] INPUT_PATHS = [os.path.join(TEST_ROOT, f) for f in INPUT_FILES] -EXPECTED_OUTPUT_FILE = os.path.join( - TEST_ROOT, 'IMOS_ANMN-NRS_STZ_20181213_NRSROT_FV02_hourly-timeseries_END-20190523_C-20220428.nc' -) INST_VARIABLES = {'instrument_id', 'source_file', 'LONGITUDE', 'LATITUDE', 'NOMINAL_DEPTH'} OBS_VARIABLES = {'instrument_index', 'TIME'} @@ -49,6 +46,10 @@ class TestHourlyTimeseries(BaseTestCase): + EXPECTED_OUTPUT_FILE = os.path.join( + TEST_ROOT, 'IMOS_ANMN-NRS_STZ_20181213_NRSROT_FV02_hourly-timeseries_END-20190523_C-20220428.nc' + ) + def test_hourly_aggregator(self): output_file, bad_files = hourly_aggregator(files_to_aggregate=INPUT_PATHS, site_code='NRSROT', @@ -87,15 +88,11 @@ def test_hourly_aggregator(self): self.assertIn('hourly_timeseries.py', dataset.lineage) self.assertIn(BAD_FILE, dataset.rejected_files) - # check variable values - expected = Dataset(EXPECTED_OUTPUT_FILE) - self.assertEqual(len(expected['TIME']), len(dataset['TIME'])) - compare_vars = ('TIME', 'NOMINAL_DEPTH', 'instrument_index', - 'TEMP', 'TEMP_count', 'TEMP_min', 'TEMP_max') - non_match_vars = [var for var in compare_vars - if not all(dataset[var][:] == expected[var][:]) - ] - self.assertEqual(non_match_vars, []) + self.compare_global_attributes(dataset) + + self.check_nan_values(dataset) + + self.compare_variables(dataset) def test_hourly_aggregator_with_nonqc(self): output_file, bad_files = hourly_aggregator(files_to_aggregate=INPUT_FILES, @@ -117,6 +114,19 @@ def test_hourly_aggregator_with_nonqc(self): for f in chartostring(dataset['source_file'][:]): self.assertIn(f, INPUT_FILES) + def test_with_adcp(self): + # Replace the BAD_FILE with an ADCP file - aggregation should work (only takes TEMP from the ADCP) + input_files = INPUT_FILES[:2] + \ + ['IMOS_ANMN-NRS_AETVZ_20180816T080000Z_NRSROT-ADCP_FV01_NRSROT-ADCP-1808-Sentinel-or-Monitor-Workhorse-ADCP-44_END-20180822T053000Z_C-20200623T000000Z.nc'] + output_file, bad_files = hourly_aggregator(files_to_aggregate=input_files, + site_code='NRSROT', + qcflags=(1, 2), + input_dir=TEST_ROOT, + output_dir='/tmp' + ) + + self.assertEqual(0, len(bad_files)) + def test_all_rejected(self): self.assertRaises(NoInputFilesError, hourly_aggregator, [BAD_FILE], 'NRSROT', (1, 2), input_dir=TEST_ROOT) @@ -131,6 +141,8 @@ def test_some_files_without_good_data(self): for path, errors in bad_files.items(): self.assertEqual(NO_INWATER_DATA_FILE, path) self.assertIn('no in-water data', errors) + with Dataset(output_file) as dataset: + self.check_nan_values(dataset) def test_bad_timestamps(self): output_file, bad_files = hourly_aggregator(files_to_aggregate=SYD100_FILES, diff --git a/test_aodntools/timeseries_products/test_velocity_aggregated_timeseries.py b/test_aodntools/timeseries_products/test_velocity_aggregated_timeseries.py index 8001978..856e17f 100644 --- a/test_aodntools/timeseries_products/test_velocity_aggregated_timeseries.py +++ b/test_aodntools/timeseries_products/test_velocity_aggregated_timeseries.py @@ -18,9 +18,6 @@ 'IMOS_ANMN-NRS_AETVZ_20191016T080000Z_NRSROT-ADCP_FV01_NRSROT-ADCP-1910-Sentinel-or-Monitor-Workhorse-ADCP-44_END-20191018T100000Z_C-20200430T000000Z.nc', BAD_FILE ] -EXPECTED_OUTPUT_FILE = os.path.join( - TEST_ROOT, 'IMOS_ANMN-NRS_VZ_20180816_NRSROT_FV01_velocity-aggregated-timeseries_END-20191018_C-20200623.nc' -) OBS_VARS = {'TIME', 'DEPTH', 'DEPTH_quality_control', 'UCUR', 'UCUR_quality_control', 'VCUR', 'VCUR_quality_control', 'WCUR', 'WCUR_quality_control', 'instrument_index', 'CELL_INDEX'} @@ -29,9 +26,14 @@ class TestVelocityAggregatedTimeseries(BaseTestCase): + EXPECTED_OUTPUT_FILE = os.path.join( + TEST_ROOT, 'IMOS_ANMN-NRS_VZ_20180816_NRSROT_FV01_velocity-aggregated-timeseries_END-20191018_C-20200623.nc' + ) + def test_velocity_aggregated(self): output_file, bad_files = velocity_aggregated(INPUT_FILES, 'NRSROT', input_dir=TEST_ROOT, output_dir='/tmp') + self.assertEqual(4, len(INPUT_FILES)) self.assertEqual(1, len(bad_files)) for file, errors in bad_files.items(): self.assertEqual(BAD_FILE, file) @@ -56,13 +58,11 @@ def test_velocity_aggregated(self): self.assertEqual(__version__, dataset.generating_code_version) self.assertIn(__version__, dataset.lineage) - # check aggregated variable values - expected = Dataset(EXPECTED_OUTPUT_FILE) - compare_vars = set(expected.variables.keys()) - STR_VARS - non_match_vars = [var for var in compare_vars - if not all(dataset[var][:] == expected[var][:]) - ] - self.assertEqual(non_match_vars, []) + self.compare_global_attributes(dataset) + + self.check_nan_values(dataset) + + self.compare_variables(dataset) def test_all_rejected(self): self.assertRaises(NoInputFilesError, velocity_aggregated, [BAD_FILE], 'NRSROT', diff --git a/test_aodntools/timeseries_products/test_velocity_hourly_timeseries.py b/test_aodntools/timeseries_products/test_velocity_hourly_timeseries.py index 5f734b7..d527f93 100644 --- a/test_aodntools/timeseries_products/test_velocity_hourly_timeseries.py +++ b/test_aodntools/timeseries_products/test_velocity_hourly_timeseries.py @@ -19,9 +19,6 @@ 'IMOS_ANMN-NRS_AETVZ_20191016T080000Z_NRSROT-ADCP_FV01_NRSROT-ADCP-1910-Sentinel-or-Monitor-Workhorse-ADCP-44_END-20191018T100000Z_C-20200430T000000Z.nc', BAD_FILE ] -EXPECTED_OUTPUT_FILE = os.path.join( - TEST_ROOT, 'IMOS_ANMN-NRS_VZ_20180816_NRSROT_FV02_velocity-hourly-timeseries_END-20191018_C-20220502.nc' -) OBS_VARS = {'TIME', 'instrument_index', 'CELL_INDEX'} INST_VARS = {'LATITUDE', 'LONGITUDE', 'NOMINAL_DEPTH', 'SECONDS_TO_MIDDLE'} @@ -33,10 +30,14 @@ class TestVelocityHourlyTimeseries(BaseTestCase): + EXPECTED_OUTPUT_FILE = os.path.join( + TEST_ROOT, 'IMOS_ANMN-NRS_VZ_20180816_NRSROT_FV02_velocity-hourly-timeseries_END-20191018_C-20220608.nc' + ) + def test_velocity_hourly(self): output_file, bad_files = velocity_hourly_aggregated(INPUT_FILES, 'NRSROT', input_dir=TEST_ROOT, output_dir='/tmp') - + self.assertEqual(4, len(INPUT_FILES)) self.assertEqual(1, len(bad_files)) for file, errors in bad_files.items(): self.assertEqual(BAD_FILE, file) @@ -61,20 +62,26 @@ def test_velocity_hourly(self): self.assertEqual(__version__, dataset.generating_code_version) self.assertIn(__version__, dataset.lineage) - # check aggregated variable values - expected = Dataset(EXPECTED_OUTPUT_FILE) - self.assertEqual(len(expected['TIME']), len(dataset['TIME'])) + self.compare_global_attributes(dataset) + + self.check_nan_values(dataset) - non_match_vars = [] - for var in set(expected.variables.keys()) - STR_VARS: - if not all(np.isclose(dataset[var], expected[var], equal_nan=True)): - non_match_vars.append(var) - self.assertEqual(non_match_vars, []) + self.compare_variables(dataset) def test_all_rejected(self): self.assertRaises(NoInputFilesError, velocity_hourly_aggregated, [BAD_FILE], 'NRSROT', input_dir=TEST_ROOT, output_dir='/tmp') + def test_size1_dimensions(self): + input_files = [ + 'IMOS_ANMN-NRS_ADCP_LAT_LON_DIMS.nc', + 'IMOS_ANMN-NRS_ADCP_SINGLE_TIMESTAMP.nc' + ] + output_file, bad_files = velocity_hourly_aggregated(input_files, 'NRSROT', + input_dir=TEST_ROOT, output_dir='/tmp') + + self.assertEqual(0, len(bad_files)) + if __name__ == '__main__': unittest.main()