diff --git a/Dockerfile b/Dockerfile index 74eb71e..91c246b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:latest +FROM ubuntu:20.04 ARG BUILDER_UID=9999 ARG DEBIAN_FRONTEND=noninteractive diff --git a/aodntools/timeseries_products/hourly_timeseries.py b/aodntools/timeseries_products/hourly_timeseries.py index 15c6e03..9904d93 100644 --- a/aodntools/timeseries_products/hourly_timeseries.py +++ b/aodntools/timeseries_products/hourly_timeseries.py @@ -326,6 +326,8 @@ def PDresample_by_hour(df, function_dict, function_stats): df_data = pd.DataFrame() for variable in varnames: ds_var = df[variable] + print(" {type} {name} {n} ({variable})".format(variable=variable, type=type(ds_var.index), + name=ds_var.index.name, n=len(ds_var))) ds_var_mean = ds_var.resample('1H').apply(function_dict[variable]).astype(np.float32) df_data = pd.concat([df_data, ds_var_mean], axis=1, sort=False) for stat_method in function_stats: @@ -398,7 +400,7 @@ def hourly_aggregator(files_to_aggregate, site_code, qcflags, input_dir='', outp qc_count_all = {} for file_index, file in enumerate(files_to_aggregate): - print(file_index) + print(file_index, file) with xr.open_dataset(os.path.join(input_dir, file), mask_and_scale=True, decode_times=True) as nc: parameter_names = list(set(list(nc.variables)) & set(parameter_names_accepted)) parameter_names_all += parameter_names @@ -576,4 +578,4 @@ def hourly_aggregator(files_to_aggregate, site_code, qcflags, input_dir='', outp qcflags = [int(i) for i in args.qcflags] hourly_aggregator(files_to_aggregate=files_to_aggregate, site_code=args.site_code, qcflags=qcflags, - input_dir=args.input_dir, output_dir=args.output_path) + input_dir=args.input_dir, output_dir=args.output_dir) diff --git a/aodntools/timeseries_products/velocity_hourly_timeseries.py b/aodntools/timeseries_products/velocity_hourly_timeseries.py index e5621e3..f15c797 100644 --- a/aodntools/timeseries_products/velocity_hourly_timeseries.py +++ b/aodntools/timeseries_products/velocity_hourly_timeseries.py @@ -179,7 +179,7 @@ def velocity_hourly_aggregated(files_to_agg, site_code, input_dir='', output_dir print('PROCESSING...') slice_start = 0 for index, file in enumerate(files_to_agg): - print(index, end=",", flush=True) + print(index, file, flush=True) ## this is for filling the slice of variables with INSTRUMENT dim slice_instrument_start = slice_start @@ -203,6 +203,7 @@ def velocity_hourly_aggregated(files_to_agg, site_code, input_dir='', output_dir chunk_index = 0 while chunk_start < chunk_partial and chunk_start <= chunk_end: nc_chunk = nc.where((nc.TIME >= chunk_start) & (nc.TIME < chunk_partial), drop=True) + print(" {i}: {n} samples".format(i=chunk_index, n=len(nc_chunk.TIME))) if is_2D: ## process all cells, one by one heights = nc_chunk.HEIGHT_ABOVE_SENSOR.values diff --git a/test_aodntools/timeseries_products/IMOS_ANMN-NSW_TZ_20080729T130004Z_SYD100_FV01_SYD100-0807-Aqualogger-520PT-97_END-20080826T074504Z_C-20160809T001556Z.nc b/test_aodntools/timeseries_products/IMOS_ANMN-NSW_TZ_20080729T130004Z_SYD100_FV01_SYD100-0807-Aqualogger-520PT-97_END-20080826T074504Z_C-20160809T001556Z.nc new file mode 100755 index 0000000..b9e9038 Binary files /dev/null and b/test_aodntools/timeseries_products/IMOS_ANMN-NSW_TZ_20080729T130004Z_SYD100_FV01_SYD100-0807-Aqualogger-520PT-97_END-20080826T074504Z_C-20160809T001556Z.nc differ diff --git a/test_aodntools/timeseries_products/IMOS_ANMN-NSW_TZ_20100807T130000Z_SYD100_FV01_SYD100-1008-Aqualogger-520T-42_END-20101014T021000Z_C-20160809T050432Z.nc b/test_aodntools/timeseries_products/IMOS_ANMN-NSW_TZ_20100807T130000Z_SYD100_FV01_SYD100-1008-Aqualogger-520T-42_END-20101014T021000Z_C-20160809T050432Z.nc new file mode 100755 index 0000000..412ae17 Binary files /dev/null and b/test_aodntools/timeseries_products/IMOS_ANMN-NSW_TZ_20100807T130000Z_SYD100_FV01_SYD100-1008-Aqualogger-520T-42_END-20101014T021000Z_C-20160809T050432Z.nc differ diff --git a/test_aodntools/timeseries_products/IMOS_ANMN-NSW_TZ_20200703T001500Z_PH100_FV01_PH100-2007-Aqualogger-520T-96_END-20200907T233000Z_C-20210112T044909Z.nc b/test_aodntools/timeseries_products/IMOS_ANMN-NSW_TZ_20200703T001500Z_PH100_FV01_PH100-2007-Aqualogger-520T-96_END-20200907T233000Z_C-20210112T044909Z.nc new file mode 100644 index 0000000..1fd003a Binary files /dev/null and b/test_aodntools/timeseries_products/IMOS_ANMN-NSW_TZ_20200703T001500Z_PH100_FV01_PH100-2007-Aqualogger-520T-96_END-20200907T233000Z_C-20210112T044909Z.nc differ diff --git a/test_aodntools/timeseries_products/IMOS_ANMN-NSW_TZ_PH100_ALL_FLAGGED_BAD.nc b/test_aodntools/timeseries_products/IMOS_ANMN-NSW_TZ_PH100_ALL_FLAGGED_BAD.nc new file mode 100644 index 0000000..b9015e4 Binary files /dev/null and b/test_aodntools/timeseries_products/IMOS_ANMN-NSW_TZ_PH100_ALL_FLAGGED_BAD.nc differ diff --git a/test_aodntools/timeseries_products/IMOS_ANMN-NSW_TZ_PH100_NO_INWATER_DATA.nc b/test_aodntools/timeseries_products/IMOS_ANMN-NSW_TZ_PH100_NO_INWATER_DATA.nc new file mode 100644 index 0000000..d215f8e Binary files /dev/null and b/test_aodntools/timeseries_products/IMOS_ANMN-NSW_TZ_PH100_NO_INWATER_DATA.nc differ diff --git a/test_aodntools/timeseries_products/test_hourly_timeseries.py b/test_aodntools/timeseries_products/test_hourly_timeseries.py index d262cdf..25ec1f9 100644 --- a/test_aodntools/timeseries_products/test_hourly_timeseries.py +++ b/test_aodntools/timeseries_products/test_hourly_timeseries.py @@ -33,6 +33,13 @@ for s in function_stats: OBS_VARIABLES.add(v + s) +NO_INWATER_DATA_FILE = 'IMOS_ANMN-NSW_TZ_PH100_NO_INWATER_DATA.nc' +PH100_FILES = [ + 'IMOS_ANMN-NSW_TZ_20200703T001500Z_PH100_FV01_PH100-2007-Aqualogger-520T-96_END-20200907T233000Z_C-20210112T044909Z.nc', + 'IMOS_ANMN-NSW_TZ_PH100_ALL_FLAGGED_BAD.nc', + NO_INWATER_DATA_FILE +] + class TestHourlyTimeseries(BaseTestCase): def test_hourly_aggregator(self): @@ -96,6 +103,38 @@ def test_hourly_aggregator_with_nonqc(self): def test_all_rejected(self): self.assertRaises(NoInputFilesError, hourly_aggregator, [BAD_FILE], 'NRSROT', (1, 2), input_dir=TEST_ROOT) + def test_some_files_without_good_data(self): + output_file, bad_files = hourly_aggregator(files_to_aggregate=PH100_FILES, + site_code='PH100', + qcflags=(1, 2), + input_dir=TEST_ROOT, + output_dir='/tmp' + ) + # The "NO_INWATER_DATA" file should be in the bad_files list + self.assertEqual(1, len(bad_files)) + for path, errors in bad_files.items(): + self.assertEqual(os.path.join(TEST_ROOT, NO_INWATER_DATA_FILE), path) + self.assertIn('no in-water data', errors) + + + +### Temp tests for debugging + +SYD100_LIST = [ + 'IMOS_ANMN-NSW_TZ_20100807T130000Z_SYD100_FV01_SYD100-1008-Aqualogger-520T-42_END-20101014T021000Z_C-20160809T050432Z.nc', + 'IMOS_ANMN-NSW_TZ_20080729T130004Z_SYD100_FV01_SYD100-0807-Aqualogger-520PT-97_END-20080826T074504Z_C-20160809T001556Z.nc', +] + +class TestHourlyTimeseriesDebugging(BaseTestCase): + def test_typeerror_syd100(self): + output_file, bad_files = hourly_aggregator(files_to_aggregate=SYD100_LIST, + site_code='SYD100', + qcflags=(1, 2), + input_dir=TEST_ROOT, + output_dir='/tmp' + ) + self.assertEqual(0, len(bad_files)) + if __name__ == '__main__': unittest.main()