From 8b92e71bed50e72e5f91c65696eadcce0c2591db Mon Sep 17 00:00:00 2001 From: Mats Veldhuizen Date: Wed, 17 Jul 2024 16:27:51 +0200 Subject: [PATCH 01/65] import directly to xarray and rewrite utils_transformation to work with it --- ci/ci_test_env.yml | 1 + pysteps/converters.py | 202 ++++++++++ pysteps/decorators.py | 6 +- pysteps/io/importers.py | 4 +- pysteps/io/readers.py | 63 +-- pysteps/nowcasts/anvil.py | 5 +- pysteps/tests/helpers.py | 39 +- pysteps/tests/test_exporters.py | 14 +- pysteps/tests/test_utils_transformation.py | 424 +++++++++++++++------ pysteps/utils/conversion.py | 93 +++-- pysteps/utils/transformation.py | 220 +++++------ requirements.txt | 1 + requirements_dev.txt | 1 + 13 files changed, 754 insertions(+), 319 deletions(-) create mode 100644 pysteps/converters.py diff --git a/ci/ci_test_env.yml b/ci/ci_test_env.yml index 4c0b8732f..7857de61b 100644 --- a/ci/ci_test_env.yml +++ b/ci/ci_test_env.yml @@ -18,6 +18,7 @@ dependencies: - pillow - pyproj - scipy + - xarray # Optional dependencies - dask - pyfftw diff --git a/pysteps/converters.py b/pysteps/converters.py new file mode 100644 index 000000000..f27e1572e --- /dev/null +++ b/pysteps/converters.py @@ -0,0 +1,202 @@ +# -*- coding: utf-8 -*- +""" +pysteps.converters +================== + +Module with data converter functions. + +.. autosummary:: + :toctree: ../generated/ + + convert_to_xarray_dataset +""" + +import numpy as np +import pyproj +import xarray as xr + +from pysteps.utils.conversion import cf_parameters_from_unit + +# TODO(converters): Write methods for converting Proj.4 projection definitions +# into CF grid mapping attributes. Currently this has been implemented for +# the stereographic projection. +# The conversions implemented here are take from: +# https://github.com/cf-convention/cf-convention.github.io/blob/master/wkt-proj-4.md + + +def _convert_proj4_to_grid_mapping(proj4str): + tokens = proj4str.split("+") + + d = {} + for t in tokens[1:]: + t = t.split("=") + if len(t) > 1: + d[t[0]] = t[1].strip() + + params = {} + # TODO(exporters): implement more projection types here + if d["proj"] == "stere": + grid_mapping_var_name = "polar_stereographic" + grid_mapping_name = "polar_stereographic" + v = d["lon_0"] if d["lon_0"][-1] not in ["E", "W"] else d["lon_0"][:-1] + params["straight_vertical_longitude_from_pole"] = float(v) + v = d["lat_0"] if d["lat_0"][-1] not in ["N", "S"] else d["lat_0"][:-1] + params["latitude_of_projection_origin"] = float(v) + if "lat_ts" in list(d.keys()): + params["standard_parallel"] = float(d["lat_ts"]) + elif "k_0" in list(d.keys()): + params["scale_factor_at_projection_origin"] = float(d["k_0"]) + params["false_easting"] = float(d["x_0"]) + params["false_northing"] = float(d["y_0"]) + elif d["proj"] == "aea": # Albers Conical Equal Area + grid_mapping_var_name = "proj" + grid_mapping_name = "albers_conical_equal_area" + params["false_easting"] = float(d["x_0"]) if "x_0" in d else float(0) + params["false_northing"] = float(d["y_0"]) if "y_0" in d else float(0) + v = d["lon_0"] if "lon_0" in d else float(0) + params["longitude_of_central_meridian"] = float(v) + v = d["lat_0"] if "lat_0" in d else float(0) + params["latitude_of_projection_origin"] = float(v) + v1 = d["lat_1"] if "lat_1" in d else float(0) + v2 = d["lat_2"] if "lat_2" in d else float(0) + params["standard_parallel"] = (float(v1), float(v2)) + else: + print("unknown projection", d["proj"]) + return None, None, None + + return grid_mapping_var_name, grid_mapping_name, params + + +def convert_to_xarray_dataset( + precip: np.ndarray, + quality: np.ndarray | None, + metadata: dict[str, str | float | None], +) -> xr.Dataset: + """ + Read a precip, quality, metadata tuple as returned by the importers + (:py:mod:`pysteps.io.importers`) and return an xarray dataset containing + this data. + + Parameters + ---------- + precip: array + 2D array containing imported precipitation data. + quality: array, None + 2D array containing the quality values of the imported precipitation + data, can be None. + metadata: dict + Metadata dictionary containing the attributes described in the + documentation of :py:mod:`pysteps.io.importers`. + + Returns + ------- + out: Dataset + A CF compliant xarray dataset, which contains all data and metadata. + + """ + var_name, attrs = cf_parameters_from_unit(metadata["unit"]) + h, w = precip.shape + x_r = np.linspace(metadata["x1"], metadata["x2"], w + 1)[:-1] + x_r += 0.5 * (x_r[1] - x_r[0]) + y_r = np.linspace(metadata["y1"], metadata["y2"], h + 1)[:-1] + y_r += 0.5 * (y_r[1] - y_r[0]) + + # flip yr vector if yorigin is upper + if metadata["yorigin"] == "upper": + y_r = np.flip(y_r) + + x_2d, y_2d = np.meshgrid(x_r, y_r) + pr = pyproj.Proj(metadata["projection"]) + lon, lat = pr(x_2d.flatten(), y_2d.flatten(), inverse=True) + + ( + grid_mapping_var_name, + grid_mapping_name, + grid_mapping_params, + ) = _convert_proj4_to_grid_mapping(metadata["projection"]) + + data_vars = { + var_name: ( + ["y", "x"], + precip, + { + "units": attrs["units"], + "standard_name": attrs["standard_name"], + "long_name": attrs["long_name"], + "grid_mapping": "projection", + "transform": metadata["transform"], + "accutime": metadata["accutime"], + "threshold": metadata["threshold"], + "zerovalue": metadata["zerovalue"], + "zr_a": metadata["zr_a"], + "zr_b": metadata["zr_b"], + }, + ) + } + if quality is not None: + data_vars["quality"] = ( + ["y", "x"], + quality, + { + "units": "1", + "standard_name": "quality_flag", + "grid_mapping": "projection", + }, + ) + coords = { + "y": ( + ["y"], + y_r, + { + "axis": "Y", + "long_name": "y-coordinate in Cartesian system", + "standard_name": "projection_y_coordinate", + "units": metadata["cartesian_unit"], + }, + ), + "x": ( + ["x"], + x_r, + { + "axis": "X", + "long_name": "x-coordinate in Cartesian system", + "standard_name": "projection_x_coordinate", + "units": metadata["cartesian_unit"], + }, + ), + "lon": ( + ["y", "x"], + lon.reshape(precip.shape), + { + "long_name": "longitude coordinate", + "standard_name": "longitude", + # TODO(converters): Don't hard-code the unit. + "units": "degrees_east", + }, + ), + "lat": ( + ["y", "x"], + lat.reshape(precip.shape), + { + "long_name": "latitude coordinate", + "standard_name": "latitude", + # TODO(converters): Don't hard-code the unit. + "units": "degrees_north", + }, + ), + } + if grid_mapping_var_name is not None: + coords[grid_mapping_name] = ( + ( + [], + None, + {"grid_mapping_name": grid_mapping_name, **grid_mapping_params}, + ), + ) + attrs = { + "Conventions": "CF-1.7", + "institution": metadata["institution"], + "projection": metadata["projection"], + "precip_var": var_name, + } + return xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) diff --git a/pysteps/decorators.py b/pysteps/decorators.py index 44fbaebdb..ee421977e 100644 --- a/pysteps/decorators.py +++ b/pysteps/decorators.py @@ -22,6 +22,8 @@ import numpy as np +from pysteps.converters import convert_to_xarray_dataset + def _add_extra_kwrds_to_docstrings(target_func, extra_kwargs_doc_text): """ @@ -66,7 +68,7 @@ def postprocess_import(fillna=np.nan, dtype="double"): def _postprocess_import(importer): @wraps(importer) def _import_with_postprocessing(*args, **kwargs): - precip, *other_args = importer(*args, **kwargs) + precip, quality, metadata = importer(*args, **kwargs) _dtype = kwargs.get("dtype", dtype) @@ -88,7 +90,7 @@ def _import_with_postprocessing(*args, **kwargs): mask = ~np.isfinite(precip) precip[mask] = _fillna - return (precip.astype(_dtype),) + tuple(other_args) + return convert_to_xarray_dataset(precip.astype(_dtype), quality, metadata) extra_kwargs_doc = """ Other Parameters diff --git a/pysteps/io/importers.py b/pysteps/io/importers.py index 1493b6d4b..5c2928203 100644 --- a/pysteps/io/importers.py +++ b/pysteps/io/importers.py @@ -89,12 +89,10 @@ from functools import partial import numpy as np - from matplotlib.pyplot import imread from pysteps.decorators import postprocess_import -from pysteps.exceptions import DataModelError -from pysteps.exceptions import MissingOptionalDependency +from pysteps.exceptions import DataModelError, MissingOptionalDependency from pysteps.utils import aggregate_fields try: diff --git a/pysteps/io/readers.py b/pysteps/io/readers.py index fcc6bda2e..7295b5ff7 100644 --- a/pysteps/io/readers.py +++ b/pysteps/io/readers.py @@ -12,13 +12,14 @@ """ import numpy as np +import xarray as xr -def read_timeseries(inputfns, importer, **kwargs): +def read_timeseries(inputfns, importer, **kwargs) -> xr.Dataset | None: """ Read a time series of input files using the methods implemented in the - :py:mod:`pysteps.io.importers` module and stack them into a 3d array of - shape (num_timesteps, height, width). + :py:mod:`pysteps.io.importers` module and stack them into a 3d xarray + dataset of shape (num_timesteps, height, width). Parameters ---------- @@ -32,50 +33,50 @@ def read_timeseries(inputfns, importer, **kwargs): Returns ------- - out: tuple - A three-element tuple containing the read data and quality rasters and + out: Dataset + A dataset containing the read data and quality rasters and associated metadata. If an input file name is None, the corresponding precipitation and quality fields are filled with nan values. If all input file names are None or if the length of the file name list is - zero, a three-element tuple containing None values is returned. + zero, None is returned. """ # check for missing data - precip_ref = None + dataset_ref = None if all(ifn is None for ifn in inputfns): - return None, None, None + return None else: if len(inputfns[0]) == 0: - return None, None, None + return None for ifn in inputfns[0]: if ifn is not None: - precip_ref, quality_ref, metadata = importer(ifn, **kwargs) + dataset_ref = importer(ifn, **kwargs) break - if precip_ref is None: - return None, None, None + if dataset_ref is None: + return None - precip = [] - quality = [] - timestamps = [] + startdate = min(inputfns[1]) + + datasets = [] for i, ifn in enumerate(inputfns[0]): if ifn is not None: - precip_, quality_, _ = importer(ifn, **kwargs) - precip.append(precip_) - quality.append(quality_) - timestamps.append(inputfns[1][i]) + dataset_ = importer(ifn, **kwargs) else: - precip.append(precip_ref * np.nan) - if quality_ref is not None: - quality.append(quality_ref * np.nan) - else: - quality.append(None) - timestamps.append(inputfns[1][i]) - - # Replace this with stack? - precip = np.concatenate([precip_[None, :, :] for precip_ in precip]) - # TODO: Q should be organized as R, but this is not trivial as Q_ can be also None or a scalar - metadata["timestamps"] = np.array(timestamps) + dataset_ = dataset_ref * np.nan + dataset_ = dataset_.expand_dims(dim="time", axis=0) + dataset_ = dataset_.assign_coords( + time=( + "time", + [inputfns[1][i]], + { + "long_name": "forecast time", + "units": f"seconds since {startdate:%Y-%m-%d %H:%M:%S}", + }, + ) + ) + datasets.append(dataset_) - return precip, quality, metadata + dataset = xr.concat(datasets, dim="time") + return dataset diff --git a/pysteps/nowcasts/anvil.py b/pysteps/nowcasts/anvil.py index 9da0fb47e..f5af038bb 100644 --- a/pysteps/nowcasts/anvil.py +++ b/pysteps/nowcasts/anvil.py @@ -19,12 +19,13 @@ """ import time + import numpy as np from scipy.ndimage import gaussian_filter -from pysteps import cascade, extrapolation + +from pysteps import cascade, extrapolation, utils from pysteps.nowcasts.utils import nowcast_main_loop from pysteps.timeseries import autoregression -from pysteps import utils try: import dask diff --git a/pysteps/tests/helpers.py b/pysteps/tests/helpers.py index 85bd861f5..2ae10026b 100644 --- a/pysteps/tests/helpers.py +++ b/pysteps/tests/helpers.py @@ -9,6 +9,7 @@ import numpy as np import pytest +import xarray as xr import pysteps as stp from pysteps import io, rcparams @@ -24,6 +25,32 @@ _reference_dates["mrms"] = datetime(2019, 6, 10, 0, 0) +def assert_dataset_equivalent(dataset1: xr.Dataset, dataset2: xr.Dataset) -> None: + xr.testing.assert_allclose(dataset1, dataset2) + assert np.isclose( + dataset1["precip_intensity"].attrs["threshold"], + dataset2["precip_intensity"].attrs["threshold"], + ) + assert ( + dataset1["precip_intensity"].attrs["units"] + == dataset2["precip_intensity"].attrs["units"] + ) + assert ( + dataset1["precip_intensity"].attrs["transform"] + == dataset2["precip_intensity"].attrs["transform"] + or dataset1["precip_intensity"].attrs["transform"] is None + and dataset2["precip_intensity"].attrs["transform"] is None + ) + assert ( + dataset1["precip_intensity"].attrs["accutime"] + == dataset2["precip_intensity"].attrs["accutime"] + ) + assert ( + dataset1["precip_intensity"].attrs["zerovalue"] + == dataset2["precip_intensity"].attrs["zerovalue"] + ) + + def get_precipitation_fields( num_prev_files=0, num_next_files=0, @@ -161,9 +188,7 @@ def get_precipitation_fields( # Read the radar composites importer = io.get_method(importer_name, "importer") - reference_field, __, ref_metadata = io.read_timeseries( - fns, importer, **_importer_kwargs - ) + ref_dataset = io.read_timeseries(fns, importer, **_importer_kwargs) if not return_raw: if (num_prev_files == 0) and (num_next_files == 0): @@ -171,14 +196,10 @@ def get_precipitation_fields( reference_field = np.squeeze(reference_field) # Convert to mm/h - reference_field, ref_metadata = stp.utils.to_rainrate( - reference_field, ref_metadata - ) + ref_dataset = stp.utils.to_rainrate(ref_dataset) # Clip domain - reference_field, ref_metadata = stp.utils.clip_domain( - reference_field, ref_metadata, clip - ) + ref_dataset = stp.utils.clip_domain(ref_dataset, clip) # Upscale data reference_field, ref_metadata = aggregate_fields_space( diff --git a/pysteps/tests/test_exporters.py b/pysteps/tests/test_exporters.py index 10e87d46e..dfe7e8ace 100644 --- a/pysteps/tests/test_exporters.py +++ b/pysteps/tests/test_exporters.py @@ -9,12 +9,14 @@ from numpy.testing import assert_array_almost_equal from pysteps.io import import_netcdf_pysteps -from pysteps.io.exporters import _get_geotiff_filename -from pysteps.io.exporters import close_forecast_files -from pysteps.io.exporters import export_forecast_dataset -from pysteps.io.exporters import initialize_forecast_exporter_netcdf -from pysteps.io.exporters import _convert_proj4_to_grid_mapping -from pysteps.tests.helpers import get_precipitation_fields, get_invalid_mask +from pysteps.io.exporters import ( + _convert_proj4_to_grid_mapping, + _get_geotiff_filename, + close_forecast_files, + export_forecast_dataset, + initialize_forecast_exporter_netcdf, +) +from pysteps.tests.helpers import get_invalid_mask, get_precipitation_fields # Test arguments exporter_arg_names = ( diff --git a/pysteps/tests/test_utils_transformation.py b/pysteps/tests/test_utils_transformation.py index 101e6b9d5..29e6e639c 100644 --- a/pysteps/tests/test_utils_transformation.py +++ b/pysteps/tests/test_utils_transformation.py @@ -1,190 +1,392 @@ # -*- coding: utf-8 -*- - import numpy as np import pytest -from numpy.testing import assert_array_almost_equal +import xarray as xr +from pysteps.tests.helpers import assert_dataset_equivalent from pysteps.utils import transformation # boxcox_transform -test_data = [ +test_data_boxcox_transform = [ ( - np.array([1]), - { - "accutime": 5, - "transform": None, - "unit": "mm/h", - "threshold": 0, - "zerovalue": 0, - }, + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0]), + { + "units": "mm/h", + "transform": None, + "accutime": 5, + "threshold": np.e, + "zerovalue": 0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), None, None, None, False, - np.array([0]), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([0.0]), + { + "units": "mm/h", + "transform": "BoxCox", + "accutime": 5, + "threshold": 1, + "zerovalue": 0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), ), ( - np.array([1]), - { - "accutime": 5, - "transform": "BoxCox", - "unit": "mm/h", - "threshold": 0, - "zerovalue": 0, - }, + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0]), + { + "units": "mm/h", + "transform": "BoxCox", + "accutime": 5, + "threshold": 1, + "zerovalue": 0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), None, None, None, True, - np.array([np.exp(1)]), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([np.exp(1.0)]), + { + "units": "mm/h", + "transform": None, + "accutime": 5, + "threshold": np.e, + "zerovalue": 0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), ), ( - np.array([1]), - { - "accutime": 5, - "transform": None, - "unit": "mm/h", - "threshold": 0, - "zerovalue": 0, - }, + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0]), + { + "units": "mm/h", + "transform": None, + "accutime": 5, + "threshold": np.e, + "zerovalue": 0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), 1.0, None, None, False, - np.array([0]), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([np.e - 2]), + { + "units": "mm/h", + "transform": "BoxCox", + "accutime": 5, + "threshold": np.e - 1, + "zerovalue": np.e - 2, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), ), ( - np.array([1]), - { - "accutime": 5, - "transform": "BoxCox", - "unit": "mm/h", - "threshold": 0, - "zerovalue": 0, - }, + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([np.e - 2]), + { + "units": "mm/h", + "transform": "BoxCox", + "accutime": 5, + "threshold": np.e - 1, + "zerovalue": np.e - 2, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), 1.0, None, None, True, - np.array([2.0]), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([0.0]), + { + "units": "mm/h", + "transform": None, + "accutime": 5, + "threshold": np.e, + "zerovalue": 0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), ), ] @pytest.mark.parametrize( - "R, metadata, Lambda, threshold, zerovalue, inverse, expected", test_data + "dataset, Lambda, threshold, zerovalue, inverse, expected", + test_data_boxcox_transform, ) -def test_boxcox_transform(R, metadata, Lambda, threshold, zerovalue, inverse, expected): +def test_boxcox_transform(dataset, Lambda, threshold, zerovalue, inverse, expected): """Test the boxcox_transform.""" - assert_array_almost_equal( - transformation.boxcox_transform( - R, metadata, Lambda, threshold, zerovalue, inverse - )[0], - expected, + actual = transformation.boxcox_transform( + dataset, Lambda, threshold, zerovalue, inverse ) + assert_dataset_equivalent(actual, expected) # dB_transform -test_data = [ +test_data_dB_transform = [ ( - np.array([1]), - { - "accutime": 5, - "transform": None, - "unit": "mm/h", - "threshold": 0, - "zerovalue": 0, - }, + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0]), + { + "units": "mm/h", + "transform": None, + "accutime": 5, + "threshold": 1, + "zerovalue": 1, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), None, None, False, - np.array([0]), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([0.0]), + { + "units": "mm/h", + "transform": "dB", + "accutime": 5, + "threshold": 0, + "zerovalue": -5, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), ), ( - np.array([1]), - { - "accutime": 5, - "transform": "dB", - "unit": "mm/h", - "threshold": 0, - "zerovalue": 0, - }, + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([0.0]), + { + "units": "mm/h", + "transform": "dB", + "accutime": 5, + "threshold": 0, + "zerovalue": -5, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), None, None, True, - np.array([1.25892541]), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0]), + { + "units": "mm/h", + "transform": None, + "accutime": 5, + "threshold": 1, + "zerovalue": 0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), ), ] @pytest.mark.parametrize( - "R, metadata, threshold, zerovalue, inverse, expected", test_data + "dataset, threshold, zerovalue, inverse, expected", test_data_dB_transform ) -def test_dB_transform(R, metadata, threshold, zerovalue, inverse, expected): +def test_dB_transform(dataset, threshold, zerovalue, inverse, expected): """Test the dB_transform.""" - assert_array_almost_equal( - transformation.dB_transform(R, metadata, threshold, zerovalue, inverse)[0], - expected, - ) + actual = transformation.dB_transform(dataset, threshold, zerovalue, inverse) + assert_dataset_equivalent(actual, expected) # NQ_transform -test_data = [ +test_data_NQ_transform = [ ( - np.array([1, 2]), - { - "accutime": 5, - "transform": None, - "unit": "mm/h", - "threshold": 0, - "zerovalue": 0, - }, + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0, 2.0]), + { + "units": "mm/h", + "transform": None, + "accutime": 5, + "threshold": 0, + "zerovalue": 0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), False, - np.array([-0.4307273, 0.4307273]), - ) + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([-0.4307273, 0.4307273]), + { + "units": "mm/h", + "transform": "NQT", + "accutime": 5, + "threshold": 0.4307273, + "zerovalue": 0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), + ), ] -@pytest.mark.parametrize("R, metadata, inverse, expected", test_data) -def test_NQ_transform(R, metadata, inverse, expected): +@pytest.mark.parametrize("dataset, inverse, expected", test_data_NQ_transform) +def test_NQ_transform(dataset, inverse, expected): """Test the NQ_transform.""" - assert_array_almost_equal( - transformation.NQ_transform(R, metadata, inverse)[0], expected - ) + actual = transformation.NQ_transform(dataset, inverse) + assert_dataset_equivalent(actual, expected) # sqrt_transform -test_data = [ +test_data_sqrt_transform = [ ( - np.array([1]), - { - "accutime": 5, - "transform": None, - "unit": "mm/h", - "threshold": 0, - "zerovalue": 0, - }, + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0, 4.0]), + { + "units": "mm/h", + "transform": None, + "accutime": 5, + "threshold": 4, + "zerovalue": 0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), False, - np.array([1]), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0, 2.0]), + { + "units": "mm/h", + "transform": "sqrt", + "accutime": 5, + "threshold": 2, + "zerovalue": 0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), ), ( - np.array([1]), - { - "accutime": 5, - "transform": "sqrt", - "unit": "mm/h", - "threshold": 0, - "zerovalue": 0, - }, + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0, 2.0]), + { + "units": "mm/h", + "transform": "sqrt", + "accutime": 5, + "threshold": 2, + "zerovalue": 0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), True, - np.array([1]), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0, 4.0]), + { + "units": "mm/h", + "transform": None, + "accutime": 5, + "threshold": 4, + "zerovalue": 0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), ), ] -@pytest.mark.parametrize("R, metadata, inverse, expected", test_data) -def test_sqrt_transform(R, metadata, inverse, expected): +@pytest.mark.parametrize("dataset, inverse, expected", test_data_sqrt_transform) +def test_sqrt_transform(dataset, inverse, expected): """Test the sqrt_transform.""" - assert_array_almost_equal( - transformation.sqrt_transform(R, metadata, inverse)[0], expected - ) + actual = transformation.sqrt_transform(dataset, inverse) + assert_dataset_equivalent(actual, expected) diff --git a/pysteps/utils/conversion.py b/pysteps/utils/conversion.py index f8dfae23b..527c06b9a 100644 --- a/pysteps/utils/conversion.py +++ b/pysteps/utils/conversion.py @@ -14,6 +14,9 @@ """ import warnings + +import xarray as xr + from . import transformation # TODO: This should not be done. Instead fix the code so that it doesn't @@ -22,18 +25,53 @@ warnings.filterwarnings("ignore", category=RuntimeWarning) -def to_rainrate(R, metadata, zr_a=None, zr_b=None): +def cf_parameters_from_unit(unit: str) -> tuple[str, dict[str, str | None]]: + if unit == "mm/h": + var_name = "precip_intensity" + var_standard_name = None + var_long_name = "instantaneous precipitation rate" + var_unit = "mm/h" + elif unit == "mm": + var_name = "precip_accum" + var_standard_name = None + var_long_name = "accumulated precipitation" + var_unit = "mm" + elif unit == "dBZ": + var_name = "reflectivity" + var_long_name = "equivalent reflectivity factor" + var_standard_name = "equivalent_reflectivity_factor" + var_unit = "dBZ" + else: + raise ValueError(f"unknown unit {unit}") + + return var_name, { + "standard_name": var_standard_name, + "long_name": var_long_name, + "units": var_unit, + } + + +def _change_unit(dataset: xr.Dataset, precip_var: str, new_unit: str) -> xr.Dataset: + new_var, new_attrs = cf_parameters_from_unit(new_unit) + dataset = dataset.rename_vars({precip_var: new_var}) + dataset.attrs["precip_var"] = new_var + + dataset[new_var].attrs = { + **dataset[new_var].attrs, + **new_attrs, + } + + return dataset + + +def to_rainrate(dataset: xr.Dataset, zr_a=None, zr_b=None): """ Convert to rain rate [mm/h]. Parameters ---------- - R: array-like - Array of any shape to be (back-)transformed. - metadata: dict - Metadata dictionary containing the accutime, transform, unit, threshold - and zerovalue attributes as described in the documentation of - :py:mod:`pysteps.io.importers`. + dataset: Dataset + Dataset to be (back-)transformed. Additionally, in case of conversion to/from reflectivity units, the zr_a and zr_b attributes are also required, @@ -45,46 +83,46 @@ def to_rainrate(R, metadata, zr_a=None, zr_b=None): Returns ------- - R: array-like - Array of any shape containing the converted units. - metadata: dict - The metadata with updated attributes. + dataset: Dataset + Dataset containing the converted units. """ - R = R.copy() - metadata = metadata.copy() + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + metadata = dataset[precip_var].attrs + precip_data = dataset[precip_var].values if metadata["transform"] is not None: if metadata["transform"] == "dB": - R, metadata = transformation.dB_transform(R, metadata, inverse=True) + dataset = transformation.dB_transform(dataset, inverse=True) elif metadata["transform"] in ["BoxCox", "log"]: - R, metadata = transformation.boxcox_transform(R, metadata, inverse=True) + dataset = transformation.boxcox_transform(dataset, inverse=True) elif metadata["transform"] == "NQT": - R, metadata = transformation.NQ_transform(R, metadata, inverse=True) + dataset = transformation.NQ_transform(dataset, inverse=True) elif metadata["transform"] == "sqrt": - R, metadata = transformation.sqrt_transform(R, metadata, inverse=True) + dataset = transformation.sqrt_transform(dataset, inverse=True) else: - raise ValueError("Unknown transformation %s" % metadata["transform"]) + raise ValueError(f'Unknown transformation {metadata["transform"]}') - if metadata["unit"] == "mm/h": + if metadata["units"] == "mm/h": pass - elif metadata["unit"] == "mm": + elif metadata["units"] == "mm": threshold = metadata["threshold"] # convert the threshold, too zerovalue = metadata["zerovalue"] # convert the zerovalue, too - R = R / float(metadata["accutime"]) * 60.0 + precip_data = precip_data / float(metadata["accutime"]) * 60.0 threshold = threshold / float(metadata["accutime"]) * 60.0 zerovalue = zerovalue / float(metadata["accutime"]) * 60.0 metadata["threshold"] = threshold metadata["zerovalue"] = zerovalue - elif metadata["unit"] == "dBZ": + elif metadata["units"] == "dBZ": threshold = metadata["threshold"] # convert the threshold, too zerovalue = metadata["zerovalue"] # convert the zerovalue, too @@ -93,7 +131,7 @@ def to_rainrate(R, metadata, zr_a=None, zr_b=None): zr_a = metadata.get("zr_a", 200.0) # default to Marshall–Palmer if zr_b is None: zr_b = metadata.get("zr_b", 1.6) # default to Marshall–Palmer - R = (R / zr_a) ** (1.0 / zr_b) + precip_data = (precip_data / zr_a) ** (1.0 / zr_b) threshold = (threshold / zr_a) ** (1.0 / zr_b) zerovalue = (zerovalue / zr_a) ** (1.0 / zr_b) @@ -104,13 +142,12 @@ def to_rainrate(R, metadata, zr_a=None, zr_b=None): else: raise ValueError( - "Cannot convert unit %s and transform %s to mm/h" - % (metadata["unit"], metadata["transform"]) + f'Cannot convert unit {metadata["units"]} and transform {metadata["transform"]} to mm/h' ) - metadata["unit"] = "mm/h" - - return R, metadata + dataset[precip_var].data[:] = precip_data + dataset = _change_unit(dataset, precip_var, "mm/h") + return dataset def to_raindepth(R, metadata, zr_a=None, zr_b=None): diff --git a/pysteps/utils/transformation.py b/pysteps/utils/transformation.py index 87ac9adc7..1977583c6 100644 --- a/pysteps/utils/transformation.py +++ b/pysteps/utils/transformation.py @@ -14,9 +14,11 @@ sqrt_transform """ +import warnings + import numpy as np import scipy.stats as scipy_stats -import warnings +import xarray as xr from scipy.interpolate import interp1d warnings.filterwarnings( @@ -25,8 +27,8 @@ def boxcox_transform( - R, metadata=None, Lambda=None, threshold=None, zerovalue=None, inverse=False -): + dataset: xr.Dataset, Lambda=None, threshold=None, zerovalue=None, inverse=False +) -> xr.Dataset: """ The one-parameter Box-Cox transformation. @@ -39,12 +41,8 @@ def boxcox_transform( Parameters ---------- - R: array-like - Array of any shape to be transformed. - metadata: dict, optional - Metadata dictionary containing the transform, zerovalue and threshold - attributes as described in the documentation of - :py:mod:`pysteps.io.importers`. + dataset: Dataset + Dataset to be transformed. Lambda: float, optional Parameter Lambda of the Box-Cox transformation. It is 0 by default, which produces the log transformation. @@ -52,7 +50,7 @@ def boxcox_transform( Choose Lambda < 1 for positively skewed data, Lambda > 1 for negatively skewed data. threshold: float, optional - The value that is used for thresholding with the same units as R. + The value that is used for thresholding with the same units as in the dataset. If None, the threshold contained in metadata is used. If no threshold is found in the metadata, a value of 0.1 is used as default. @@ -64,10 +62,8 @@ def boxcox_transform( Returns ------- - R: array-like - Array of any shape containing the (back-)transformed units. - metadata: dict - The metadata with updated attributes. + dataset: Dataset + Dataset containing the (back-)transformed units. References ---------- @@ -76,20 +72,14 @@ def boxcox_transform( doi:10.1111/j.2517-6161.1964.tb00553.x """ - R = R.copy() - - if metadata is None: - if inverse: - metadata = {"transform": "BoxCox"} - else: - metadata = {"transform": None} - - else: - metadata = metadata.copy() + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + metadata = dataset[precip_var].attrs + precip_data = dataset[precip_var].values if not inverse: if metadata["transform"] == "BoxCox": - return R, metadata + return dataset if Lambda is None: Lambda = metadata.get("BoxCox_lambda", 0.0) @@ -97,21 +87,21 @@ def boxcox_transform( if threshold is None: threshold = metadata.get("threshold", 0.1) - zeros = R < threshold + zeros = precip_data < threshold # Apply Box-Cox transform if Lambda == 0.0: - R[~zeros] = np.log(R[~zeros]) + precip_data[~zeros] = np.log(precip_data[~zeros]) threshold = np.log(threshold) else: - R[~zeros] = (R[~zeros] ** Lambda - 1) / Lambda + precip_data[~zeros] = (precip_data[~zeros] ** Lambda - 1) / Lambda threshold = (threshold**Lambda - 1) / Lambda # Set value for zeros if zerovalue is None: zerovalue = threshold - 1 # TODO: set to a more meaningful value - R[zeros] = zerovalue + precip_data[zeros] = zerovalue metadata["transform"] = "BoxCox" metadata["BoxCox_lambda"] = Lambda @@ -120,7 +110,7 @@ def boxcox_transform( elif inverse: if metadata["transform"] not in ["BoxCox", "log"]: - return R, metadata + return precip_data, metadata if Lambda is None: Lambda = metadata.pop("BoxCox_lambda", 0.0) @@ -131,35 +121,35 @@ def boxcox_transform( # Apply inverse Box-Cox transform if Lambda == 0.0: - R = np.exp(R) + precip_data = np.exp(precip_data) threshold = np.exp(threshold) else: - R = np.exp(np.log(Lambda * R + 1) / Lambda) + precip_data = np.exp(np.log(Lambda * precip_data + 1) / Lambda) threshold = np.exp(np.log(Lambda * threshold + 1) / Lambda) - R[R < threshold] = zerovalue + precip_data[precip_data < threshold] = zerovalue metadata["transform"] = None metadata["zerovalue"] = zerovalue metadata["threshold"] = threshold - return R, metadata + dataset[precip_var].data[:] = precip_data + + return dataset -def dB_transform(R, metadata=None, threshold=None, zerovalue=None, inverse=False): +def dB_transform( + dataset: xr.Dataset, threshold=None, zerovalue=None, inverse=False +) -> xr.Dataset: """Methods to transform precipitation intensities to/from dB units. Parameters ---------- - R: array-like - Array of any shape to be (back-)transformed. - metadata: dict, optional - Metadata dictionary containing the transform, zerovalue and threshold - attributes as described in the documentation of - :py:mod:`pysteps.io.importers`. + dataset: Dataset + Dataset to be (back-)transformed. threshold: float, optional - Optional value that is used for thresholding with the same units as R. + Optional value that is used for thresholding with the same units as in the dataset. If None, the threshold contained in metadata is used. If no threshold is found in the metadata, a value of 0.1 is used as default. @@ -171,82 +161,70 @@ def dB_transform(R, metadata=None, threshold=None, zerovalue=None, inverse=False Returns ------- - R: array-like - Array of any shape containing the (back-)transformed units. - metadata: dict - The metadata with updated attributes. + dataset: Dataset + Dataset containing the (back-)transformed units. """ - R = R.copy() - - if metadata is None: - if inverse: - metadata = {"transform": "dB"} - else: - metadata = {"transform": None} - - else: - metadata = metadata.copy() + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + metadata = dataset[precip_var].attrs + precip_data = dataset[precip_var].values # to dB units if not inverse: if metadata["transform"] == "dB": - return R, metadata + return dataset if threshold is None: threshold = metadata.get("threshold", 0.1) - zeros = R < threshold + zeros = precip_data < threshold # Convert to dB - R[~zeros] = 10.0 * np.log10(R[~zeros]) + precip_data[~zeros] = 10.0 * np.log10(precip_data[~zeros]) threshold = 10.0 * np.log10(threshold) # Set value for zeros if zerovalue is None: zerovalue = threshold - 5 # TODO: set to a more meaningful value - R[zeros] = zerovalue + precip_data[zeros] = zerovalue metadata["transform"] = "dB" metadata["zerovalue"] = zerovalue metadata["threshold"] = threshold - return R, metadata - # from dB units elif inverse: if metadata["transform"] != "dB": - return R, metadata + return dataset if threshold is None: threshold = metadata.get("threshold", -10.0) if zerovalue is None: zerovalue = 0.0 - R = 10.0 ** (R / 10.0) + precip_data = 10.0 ** (precip_data / 10.0) threshold = 10.0 ** (threshold / 10.0) - R[R < threshold] = zerovalue + precip_data[precip_data < threshold] = zerovalue metadata["transform"] = None metadata["threshold"] = threshold metadata["zerovalue"] = zerovalue - return R, metadata + dataset[precip_var].data[:] = precip_data + + return dataset -def NQ_transform(R, metadata=None, inverse=False, **kwargs): +def NQ_transform(dataset: xr.Dataset, inverse: bool = False, **kwargs) -> xr.Dataset: """ The normal quantile transformation as in Bogner et al (2012). Zero rain vales are set to zero in norm space. Parameters ---------- - R: array-like - Array of any shape to be transformed. - metadata: dict, optional - Metadata dictionary containing the transform, zerovalue and threshold - attributes as described in the documentation of - :py:mod:`pysteps.io.importers`. + dataset: Dataset + Dataset to be transformed. inverse: bool, optional If set to True, it performs the inverse transform. False by default. @@ -260,10 +238,8 @@ def NQ_transform(R, metadata=None, inverse=False, **kwargs): Returns ------- - R: array-like - Array of any shape containing the (back-)transformed units. - metadata: dict - The metadata with updated attributes. + dataset: Dataset + Dataset containing the (back-)transformed units. References ---------- @@ -276,105 +252,95 @@ def NQ_transform(R, metadata=None, inverse=False, **kwargs): # defaults a = kwargs.get("a", 0.0) - R = R.copy() - shape0 = R.shape - R = R.ravel().astype(float) - idxNan = np.isnan(R) - R_ = R[~idxNan] - - if metadata is None: - if inverse: - metadata = {"transform": "NQT"} - else: - metadata = {"transform": None} - metadata["zerovalue"] = np.min(R_) + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + metadata = dataset[precip_var].attrs + precip_data = dataset[precip_var].values - else: - metadata = metadata.copy() + shape0 = precip_data.shape + precip_data = precip_data.ravel().astype(float) + idxNan = np.isnan(precip_data) + precip_data_ = precip_data[~idxNan] if not inverse: # Plotting positions # https://en.wikipedia.org/wiki/Q%E2%80%93Q_plot#Plotting_position - n = R_.size - Rpp = ((np.arange(n) + 1 - a) / (n + 1 - 2 * a)).reshape(R_.shape) + n = precip_data_.size + Rpp = ((np.arange(n) + 1 - a) / (n + 1 - 2 * a)).reshape(precip_data_.shape) # NQ transform Rqn = scipy_stats.norm.ppf(Rpp) - R__ = np.interp(R_, R_[np.argsort(R_)], Rqn) + precip_data__ = np.interp( + precip_data_, precip_data_[np.argsort(precip_data_)], Rqn + ) # set zero rain to 0 in norm space - R__[R[~idxNan] == metadata["zerovalue"]] = 0 + precip_data__[precip_data[~idxNan] == metadata["zerovalue"]] = 0 # build inverse transform metadata["inqt"] = interp1d( - Rqn, R_[np.argsort(R_)], bounds_error=False, fill_value=(R_.min(), R_.max()) + Rqn, + precip_data_[np.argsort(precip_data_)], + bounds_error=False, + fill_value=(precip_data_.min(), precip_data_.max()), ) metadata["transform"] = "NQT" metadata["zerovalue"] = 0 - metadata["threshold"] = R__[R__ > 0].min() + metadata["threshold"] = precip_data__[precip_data__ > 0].min() else: f = metadata.pop("inqt") - R__ = f(R_) + precip_data__ = f(precip_data_) metadata["transform"] = None - metadata["zerovalue"] = R__.min() - metadata["threshold"] = R__[R__ > R__.min()].min() + metadata["zerovalue"] = precip_data__.min() + metadata["threshold"] = precip_data__[precip_data__ > precip_data__.min()].min() - R[~idxNan] = R__ + precip_data[~idxNan] = precip_data__ - return R.reshape(shape0), metadata + dataset[precip_var].data[:] = precip_data.reshape(shape0) + return dataset -def sqrt_transform(R, metadata=None, inverse=False, **kwargs): + +def sqrt_transform(dataset: xr.Dataset, inverse: bool = False, **kwargs) -> xr.Dataset: """ Square-root transform. Parameters ---------- - R: array-like - Array of any shape to be transformed. - metadata: dict, optional - Metadata dictionary containing the transform, zerovalue and threshold - attributes as described in the documentation of - :py:mod:`pysteps.io.importers`. + dataset: Dataset + Dataset to be transformed. inverse: bool, optional If set to True, it performs the inverse transform. False by default. Returns ------- - R: array-like - Array of any shape containing the (back-)transformed units. - metadata: dict - The metadata with updated attributes. + dataset: Dataset + Dataset containing the (back-)transformed units. """ - R = R.copy() - - if metadata is None: - if inverse: - metadata = {"transform": "sqrt"} - else: - metadata = {"transform": None} - metadata["zerovalue"] = np.nan - metadata["threshold"] = np.nan - else: - metadata = metadata.copy() + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + metadata = dataset[precip_var].attrs + precip_data = dataset[precip_var].values if not inverse: # sqrt transform - R = np.sqrt(R) + precip_data = np.sqrt(precip_data) metadata["transform"] = "sqrt" metadata["zerovalue"] = np.sqrt(metadata["zerovalue"]) metadata["threshold"] = np.sqrt(metadata["threshold"]) else: # inverse sqrt transform - R = R**2 + precip_data = precip_data**2 metadata["transform"] = None metadata["zerovalue"] = metadata["zerovalue"] ** 2 metadata["threshold"] = metadata["threshold"] ** 2 - return R, metadata + dataset[precip_var].data[:] = precip_data + + return dataset diff --git a/requirements.txt b/requirements.txt index 1804df1d9..b5075ad35 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ matplotlib jsmin jsonschema netCDF4 +xarray diff --git a/requirements_dev.txt b/requirements_dev.txt index 84cf372b1..2899e560d 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -9,6 +9,7 @@ matplotlib jsmin jsonschema netCDF4 +xarray # Optional dependencies dask From 23825dd54d8b5c3322b2ba8759459935071bb061 Mon Sep 17 00:00:00 2001 From: Mats Veldhuizen Date: Wed, 17 Jul 2024 18:10:46 +0200 Subject: [PATCH 02/65] convert to_rainrate function also --- pysteps/tests/test_utils_conversion.py | 381 +++++++++++++++++++------ pysteps/utils/conversion.py | 4 + 2 files changed, 299 insertions(+), 86 deletions(-) diff --git a/pysteps/tests/test_utils_conversion.py b/pysteps/tests/test_utils_conversion.py index 169cdb50e..80d052b77 100644 --- a/pysteps/tests/test_utils_conversion.py +++ b/pysteps/tests/test_utils_conversion.py @@ -1,119 +1,328 @@ # -*- coding: utf-8 -*- - import numpy as np import pytest +import xarray as xr from numpy.testing import assert_array_almost_equal +from pysteps.tests.helpers import assert_dataset_equivalent from pysteps.utils import conversion # to_rainrate -test_data = [ +test_data_to_rainrate = [ ( - np.array([1]), - { - "accutime": 5, - "transform": None, - "unit": "mm/h", - "threshold": 0, - "zerovalue": 0, - }, - np.array([1]), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0]), + { + "units": "mm/h", + "transform": None, + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0]), + { + "units": "mm/h", + "transform": None, + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), ), ( - np.array([1]), - { - "accutime": 5, - "transform": None, - "unit": "mm", - "threshold": 0, - "zerovalue": 0, - }, - np.array([12]), + xr.Dataset( + data_vars={ + "precip_accum": ( + ["x"], + np.array([1.0]), + { + "units": "mm", + "transform": None, + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_accum"}, + ), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([12.0]), + { + "units": "mm/h", + "transform": None, + "accutime": 5, + "threshold": 12.0, + "zerovalue": 12.0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), ), ( - np.array([1]), - { - "accutime": 5, - "transform": "dB", - "unit": "mm/h", - "threshold": 0, - "zerovalue": 0, - }, - np.array([1.25892541]), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0]), + { + "units": "mm/h", + "transform": "dB", + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.25892541]), + { + "units": "mm/h", + "transform": None, + "accutime": 5, + "threshold": 1.25892541, + "zerovalue": 0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), ), ( - np.array([1]), - { - "accutime": 5, - "transform": "dB", - "unit": "mm", - "threshold": 0, - "zerovalue": 0, - }, - np.array([15.10710494]), + xr.Dataset( + data_vars={ + "precip_accum": ( + ["x"], + np.array([1.0]), + { + "units": "mm", + "transform": "dB", + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_accum"}, + ), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([15.10710494]), + { + "units": "mm/h", + "transform": None, + "accutime": 5, + "threshold": 15.10710494, + "zerovalue": 0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), ), ( - np.array([1]), - { - "accutime": 5, - "transform": "dB", - "unit": "dBZ", - "threshold": 0, - "zerovalue": 0, - }, - np.array([0.04210719]), + xr.Dataset( + data_vars={ + "reflectivity": ( + ["x"], + np.array([1.0]), + { + "units": "dBZ", + "transform": "dB", + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "reflectivity"}, + ), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([0.04210719]), + { + "units": "mm/h", + "transform": None, + "accutime": 5, + "threshold": 0.04210719, + "zerovalue": 0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), ), ( - np.array([1]), - { - "accutime": 5, - "transform": "log", - "unit": "mm/h", - "threshold": 0, - "zerovalue": 0, - }, - np.array([2.71828183]), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0]), + { + "units": "mm/h", + "transform": "log", + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([2.71828183]), + { + "units": "mm/h", + "transform": None, + "accutime": 5, + "threshold": 2.71828183, + "zerovalue": 0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), ), ( - np.array([1.0]), - { - "accutime": 5, - "transform": "log", - "unit": "mm", - "threshold": 0, - "zerovalue": 0, - }, - np.array([32.61938194]), + xr.Dataset( + data_vars={ + "precip_accum": ( + ["x"], + np.array([1.0]), + { + "units": "mm", + "transform": "log", + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_accum"}, + ), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([32.61938194]), + { + "units": "mm/h", + "transform": None, + "accutime": 5, + "threshold": 32.61938194, + "zerovalue": 0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), ), ( - np.array([1]), - { - "accutime": 5, - "transform": "sqrt", - "unit": "mm/h", - "threshold": 0, - "zerovalue": 0, - }, - np.array([1]), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0]), + { + "units": "mm/h", + "transform": "sqrt", + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0]), + { + "units": "mm/h", + "transform": None, + "accutime": 5, + "threshold": 1, + "zerovalue": 1, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), ), ( - np.array([1.0]), - { - "accutime": 5, - "transform": "sqrt", - "unit": "mm", - "threshold": 0, - "zerovalue": 0, - }, - np.array([12.0]), + xr.Dataset( + data_vars={ + "precip_accum": ( + ["x"], + np.array([1.0]), + { + "units": "mm", + "transform": "sqrt", + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_accum"}, + ), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([12.0]), + { + "units": "mm/h", + "transform": None, + "accutime": 5, + "threshold": 12, + "zerovalue": 12, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), ), ] -@pytest.mark.parametrize("R, metadata, expected", test_data) -def test_to_rainrate(R, metadata, expected): +@pytest.mark.parametrize("dataset, expected", test_data_to_rainrate) +def test_to_rainrate(dataset, expected): """Test the to_rainrate.""" - assert_array_almost_equal(conversion.to_rainrate(R, metadata)[0], expected) + actual = conversion.to_rainrate(dataset) + assert_dataset_equivalent(actual, expected) # to_raindepth diff --git a/pysteps/utils/conversion.py b/pysteps/utils/conversion.py index 527c06b9a..87fe22deb 100644 --- a/pysteps/utils/conversion.py +++ b/pysteps/utils/conversion.py @@ -108,6 +108,10 @@ def to_rainrate(dataset: xr.Dataset, zr_a=None, zr_b=None): else: raise ValueError(f'Unknown transformation {metadata["transform"]}') + precip_var = dataset.attrs["precip_var"] + metadata = dataset[precip_var].attrs + precip_data = dataset[precip_var].values + if metadata["units"] == "mm/h": pass From 9952bd3317a277546cbeeafb921327589c5c0bf8 Mon Sep 17 00:00:00 2001 From: Mats Veldhuizen Date: Thu, 18 Jul 2024 10:43:27 +0200 Subject: [PATCH 03/65] Rewrite more code to xarray --- pysteps/tests/helpers.py | 28 +- pysteps/tests/test_utils_conversion.py | 603 +++++++++++++++++-------- pysteps/utils/conversion.py | 121 ++--- 3 files changed, 480 insertions(+), 272 deletions(-) diff --git a/pysteps/tests/helpers.py b/pysteps/tests/helpers.py index 2ae10026b..68f3f7527 100644 --- a/pysteps/tests/helpers.py +++ b/pysteps/tests/helpers.py @@ -27,27 +27,25 @@ def assert_dataset_equivalent(dataset1: xr.Dataset, dataset2: xr.Dataset) -> None: xr.testing.assert_allclose(dataset1, dataset2) + precip_var = dataset1.attrs["precip_var"] + assert precip_var == dataset2.attrs["precip_var"] assert np.isclose( - dataset1["precip_intensity"].attrs["threshold"], - dataset2["precip_intensity"].attrs["threshold"], + dataset1[precip_var].attrs["threshold"], + dataset2[precip_var].attrs["threshold"], ) - assert ( - dataset1["precip_intensity"].attrs["units"] - == dataset2["precip_intensity"].attrs["units"] - ) - assert ( - dataset1["precip_intensity"].attrs["transform"] - == dataset2["precip_intensity"].attrs["transform"] - or dataset1["precip_intensity"].attrs["transform"] is None - and dataset2["precip_intensity"].attrs["transform"] is None + assert np.isclose( + dataset1[precip_var].attrs["zerovalue"], + dataset2[precip_var].attrs["zerovalue"], ) + assert dataset1[precip_var].attrs["units"] == dataset2[precip_var].attrs["units"] assert ( - dataset1["precip_intensity"].attrs["accutime"] - == dataset2["precip_intensity"].attrs["accutime"] + dataset1[precip_var].attrs["transform"] + == dataset2[precip_var].attrs["transform"] + or dataset1[precip_var].attrs["transform"] is None + and dataset2[precip_var].attrs["transform"] is None ) assert ( - dataset1["precip_intensity"].attrs["zerovalue"] - == dataset2["precip_intensity"].attrs["zerovalue"] + dataset1[precip_var].attrs["accutime"] == dataset2[precip_var].attrs["accutime"] ) diff --git a/pysteps/tests/test_utils_conversion.py b/pysteps/tests/test_utils_conversion.py index 80d052b77..de48c928d 100644 --- a/pysteps/tests/test_utils_conversion.py +++ b/pysteps/tests/test_utils_conversion.py @@ -104,7 +104,7 @@ "transform": None, "accutime": 5, "threshold": 1.25892541, - "zerovalue": 0, + "zerovalue": 0.0, }, ) }, @@ -138,7 +138,7 @@ "transform": None, "accutime": 5, "threshold": 15.10710494, - "zerovalue": 0, + "zerovalue": 0.0, }, ) }, @@ -172,7 +172,7 @@ "transform": None, "accutime": 5, "threshold": 0.04210719, - "zerovalue": 0, + "zerovalue": 0.0, }, ) }, @@ -206,7 +206,7 @@ "transform": None, "accutime": 5, "threshold": 2.71828183, - "zerovalue": 0, + "zerovalue": 0.0, }, ) }, @@ -240,7 +240,7 @@ "transform": None, "accutime": 5, "threshold": 32.61938194, - "zerovalue": 0, + "zerovalue": 0.0, }, ) }, @@ -273,8 +273,8 @@ "units": "mm/h", "transform": None, "accutime": 5, - "threshold": 1, - "zerovalue": 1, + "threshold": 1.0, + "zerovalue": 1.0, }, ) }, @@ -307,8 +307,8 @@ "units": "mm/h", "transform": None, "accutime": 5, - "threshold": 12, - "zerovalue": 12, + "threshold": 12.0, + "zerovalue": 12.0, }, ) }, @@ -326,220 +326,429 @@ def test_to_rainrate(dataset, expected): # to_raindepth -test_data = [ - ( - np.array([1]), - { - "accutime": 5, - "transform": None, - "unit": "mm/h", - "threshold": 0, - "zerovalue": 0, - }, - np.array([0.08333333]), - ), +test_data_to_raindepth = [ ( - np.array([1]), - { - "accutime": 5, - "transform": None, - "unit": "mm", - "threshold": 0, - "zerovalue": 0, - }, - np.array([1]), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0]), + { + "units": "mm/h", + "transform": None, + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), + xr.Dataset( + data_vars={ + "precip_accum": ( + ["x"], + np.array([0.08333333]), + { + "units": "mm", + "transform": None, + "accutime": 5, + "threshold": 0.08333333, + "zerovalue": 0.08333333, + }, + ) + }, + attrs={"precip_var": "precip_accum"}, + ), ), ( - np.array([1]), - { - "accutime": 5, - "transform": "dB", - "unit": "mm/h", - "threshold": 0, - "zerovalue": 0, - }, - np.array([0.10491045]), + xr.Dataset( + data_vars={ + "precip_accum": ( + ["x"], + np.array([1.0]), + { + "units": "mm", + "transform": None, + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_accum"}, + ), + xr.Dataset( + data_vars={ + "precip_accum": ( + ["x"], + np.array([1.0]), + { + "units": "mm", + "transform": None, + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_accum"}, + ), ), ( - np.array([1]), - { - "accutime": 5, - "transform": "dB", - "unit": "mm", - "threshold": 0, - "zerovalue": 0, - }, - np.array([1.25892541]), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0]), + { + "units": "mm/h", + "transform": "dB", + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), + xr.Dataset( + data_vars={ + "precip_accum": ( + ["x"], + np.array([0.10491045]), + { + "units": "mm", + "transform": None, + "accutime": 5, + "threshold": 0.10491045, + "zerovalue": 0.0, + }, + ) + }, + attrs={"precip_var": "precip_accum"}, + ), ), ( - np.array([1]), - { - "accutime": 5, - "transform": "dB", - "unit": "dBZ", - "threshold": 0, - "zerovalue": 0, - }, - np.array([0.00350893]), + xr.Dataset( + data_vars={ + "precip_accum": ( + ["x"], + np.array([1.0]), + { + "units": "mm", + "transform": "dB", + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_accum"}, + ), + xr.Dataset( + data_vars={ + "precip_accum": ( + ["x"], + np.array([1.25892541]), + { + "units": "mm", + "transform": None, + "accutime": 5, + "threshold": 1.25892541, + "zerovalue": 0.0, + }, + ) + }, + attrs={"precip_var": "precip_accum"}, + ), ), ( - np.array([1]), - { - "accutime": 5, - "transform": "log", - "unit": "mm/h", - "threshold": 0, - "zerovalue": 0, - }, - np.array([0.22652349]), + xr.Dataset( + data_vars={ + "reflectivity": ( + ["x"], + np.array([1.0]), + { + "units": "dBZ", + "transform": "dB", + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "reflectivity"}, + ), + xr.Dataset( + data_vars={ + "precip_accum": ( + ["x"], + np.array([0.00350893]), + { + "units": "mm", + "transform": None, + "accutime": 5, + "threshold": 0.00350893, + "zerovalue": 0.0, + }, + ) + }, + attrs={"precip_var": "precip_accum"}, + ), ), ( - np.array([1.0]), - { - "accutime": 5, - "transform": "log", - "unit": "mm", - "threshold": 0, - "zerovalue": 0, - }, - np.array([2.71828183]), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0]), + { + "units": "mm/h", + "transform": "log", + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), + xr.Dataset( + data_vars={ + "precip_accum": ( + ["x"], + np.array([0.22652349]), + { + "units": "mm", + "transform": None, + "accutime": 5, + "threshold": 0.22652349, + "zerovalue": 0.0, + }, + ) + }, + attrs={"precip_var": "precip_accum"}, + ), ), ( - np.array([1]), - { - "accutime": 5, - "transform": "sqrt", - "unit": "mm/h", - "threshold": 0, - "zerovalue": 0, - }, - np.array([0.08333333]), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0]), + { + "units": "mm/h", + "transform": "sqrt", + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), + xr.Dataset( + data_vars={ + "precip_accum": ( + ["x"], + np.array([0.08333333]), + { + "units": "mm", + "transform": None, + "accutime": 5, + "threshold": 0.08333333, + "zerovalue": 0.08333333, + }, + ) + }, + attrs={"precip_var": "precip_accum"}, + ), ), ( - np.array([1.0]), - { - "accutime": 5, - "transform": "sqrt", - "unit": "mm", - "threshold": 0, - "zerovalue": 0, - }, - np.array([1.0]), + xr.Dataset( + data_vars={ + "precip_accum": ( + ["x"], + np.array([1.0]), + { + "units": "mm", + "transform": "sqrt", + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_accum"}, + ), + xr.Dataset( + data_vars={ + "precip_accum": ( + ["x"], + np.array([1.0]), + { + "units": "mm", + "transform": None, + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_accum"}, + ), ), ] -@pytest.mark.parametrize("R, metadata, expected", test_data) -def test_to_raindepth(R, metadata, expected): +@pytest.mark.parametrize("dataset, expected", test_data_to_raindepth) +def test_to_raindepth(dataset, expected): """Test the to_raindepth.""" - assert_array_almost_equal(conversion.to_raindepth(R, metadata)[0], expected) + actual = conversion.to_raindepth(dataset) + assert_dataset_equivalent(actual, expected) # to_reflectivity -test_data = [ - ( - np.array([1]), - { - "accutime": 5, - "transform": None, - "unit": "mm/h", - "threshold": 0, - "zerovalue": 0, - }, - np.array([23.01029996]), - ), - ( - np.array([1]), - { - "accutime": 5, - "transform": None, - "unit": "mm", - "threshold": 0, - "zerovalue": 0, - }, - np.array([40.27719989]), - ), - ( - np.array([1]), - { - "accutime": 5, - "transform": "dB", - "unit": "mm/h", - "threshold": 0, - "zerovalue": 0, - }, - np.array([24.61029996]), - ), - ( - np.array([1]), - { - "accutime": 5, - "transform": "dB", - "unit": "mm", - "threshold": 0, - "zerovalue": 0, - }, - np.array([41.87719989]), - ), - ( - np.array([1]), - { - "accutime": 5, - "transform": "dB", - "unit": "dBZ", - "threshold": 0, - "zerovalue": 0, - }, - np.array([1]), - ), +test_data_to_reflectivity = [ ( - np.array([1]), - { - "accutime": 5, - "transform": "log", - "unit": "mm/h", - "threshold": 0, - "zerovalue": 0, - }, - np.array([29.95901167]), - ), - ( - np.array([1.0]), - { - "accutime": 5, - "transform": "log", - "unit": "mm", - "threshold": 0, - "zerovalue": 0, - }, - np.array([47.2259116]), - ), - ( - np.array([1]), - { - "accutime": 5, - "transform": "sqrt", - "unit": "mm/h", - "threshold": 0, - "zerovalue": 0, - }, - np.array([23.01029996]), - ), - ( - np.array([1.0]), - { - "accutime": 5, - "transform": "sqrt", - "unit": "mm", - "threshold": 0, - "zerovalue": 0, - }, - np.array([40.27719989]), + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0]), + { + "units": "mm/h", + "transform": None, + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), + xr.Dataset( + data_vars={ + "reflectivity": ( + ["x"], + np.array([23.01029996]), + { + "units": "dBZ", + "transform": None, + "accutime": 5, + "threshold": 23.01029996, + "zerovalue": 23.01029996, + }, + ) + }, + attrs={"precip_var": "reflectivity"}, + ), ), + # ( + # np.array([1]), + # { + # "accutime": 5, + # "transform": None, + # "unit": "mm/h", + # "threshold": 0, + # "zerovalue": 0, + # }, + # np.array([23.01029996]), + # ), + # ( + # np.array([1]), + # { + # "accutime": 5, + # "transform": None, + # "unit": "mm", + # "threshold": 0, + # "zerovalue": 0, + # }, + # np.array([40.27719989]), + # ), + # ( + # np.array([1]), + # { + # "accutime": 5, + # "transform": "dB", + # "unit": "mm/h", + # "threshold": 0, + # "zerovalue": 0, + # }, + # np.array([24.61029996]), + # ), + # ( + # np.array([1]), + # { + # "accutime": 5, + # "transform": "dB", + # "unit": "mm", + # "threshold": 0, + # "zerovalue": 0, + # }, + # np.array([41.87719989]), + # ), + # ( + # np.array([1]), + # { + # "accutime": 5, + # "transform": "dB", + # "unit": "dBZ", + # "threshold": 0, + # "zerovalue": 0, + # }, + # np.array([1]), + # ), + # ( + # np.array([1]), + # { + # "accutime": 5, + # "transform": "log", + # "unit": "mm/h", + # "threshold": 0, + # "zerovalue": 0, + # }, + # np.array([29.95901167]), + # ), + # ( + # np.array([1.0]), + # { + # "accutime": 5, + # "transform": "log", + # "unit": "mm", + # "threshold": 0, + # "zerovalue": 0, + # }, + # np.array([47.2259116]), + # ), + # ( + # np.array([1]), + # { + # "accutime": 5, + # "transform": "sqrt", + # "unit": "mm/h", + # "threshold": 0, + # "zerovalue": 0, + # }, + # np.array([23.01029996]), + # ), + # ( + # np.array([1.0]), + # { + # "accutime": 5, + # "transform": "sqrt", + # "unit": "mm", + # "threshold": 0, + # "zerovalue": 0, + # }, + # np.array([40.27719989]), + # ), ] -@pytest.mark.parametrize("R, metadata, expected", test_data) -def test_to_reflectivity(R, metadata, expected): +@pytest.mark.parametrize("dataset, expected", test_data_to_reflectivity) +def test_to_reflectivity(dataset, expected): """Test the to_reflectivity.""" - assert_array_almost_equal(conversion.to_reflectivity(R, metadata)[0], expected) + actual = conversion.to_reflectivity(dataset) + assert_dataset_equivalent(actual, expected) diff --git a/pysteps/utils/conversion.py b/pysteps/utils/conversion.py index 87fe22deb..21e32360b 100644 --- a/pysteps/utils/conversion.py +++ b/pysteps/utils/conversion.py @@ -90,7 +90,6 @@ def to_rainrate(dataset: xr.Dataset, zr_a=None, zr_b=None): dataset = dataset.copy(deep=True) precip_var = dataset.attrs["precip_var"] metadata = dataset[precip_var].attrs - precip_data = dataset[precip_var].values if metadata["transform"] is not None: if metadata["transform"] == "dB": @@ -154,18 +153,14 @@ def to_rainrate(dataset: xr.Dataset, zr_a=None, zr_b=None): return dataset -def to_raindepth(R, metadata, zr_a=None, zr_b=None): +def to_raindepth(dataset: xr.Dataset, zr_a=None, zr_b=None): """ Convert to rain depth [mm]. Parameters ---------- - R: array-like - Array of any shape to be (back-)transformed. - metadata: dict - Metadata dictionary containing the accutime, transform, unit, threshold - and zerovalue attributes as described in the documentation of - :py:mod:`pysteps.io.importers`. + dataset: Dataset + Dataset to be (back-)transformed. Additionally, in case of conversion to/from reflectivity units, the zr_a and zr_b attributes are also required, @@ -177,46 +172,49 @@ def to_raindepth(R, metadata, zr_a=None, zr_b=None): Returns ------- - R: array-like - Array of any shape containing the converted units. - metadata: dict - The metadata with updated attributes. + dataset: Dataset + Dataset containing the converted units. """ - R = R.copy() - metadata = metadata.copy() + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + metadata = dataset[precip_var].attrs if metadata["transform"] is not None: if metadata["transform"] == "dB": - R, metadata = transformation.dB_transform(R, metadata, inverse=True) + dataset = transformation.dB_transform(dataset, inverse=True) elif metadata["transform"] in ["BoxCox", "log"]: - R, metadata = transformation.boxcox_transform(R, metadata, inverse=True) + dataset = transformation.boxcox_transform(dataset, inverse=True) elif metadata["transform"] == "NQT": - R, metadata = transformation.NQ_transform(R, metadata, inverse=True) + dataset = transformation.NQ_transform(dataset, inverse=True) elif metadata["transform"] == "sqrt": - R, metadata = transformation.sqrt_transform(R, metadata, inverse=True) + dataset = transformation.sqrt_transform(dataset, inverse=True) else: - raise ValueError("Unknown transformation %s" % metadata["transform"]) + raise ValueError(f'Unknown transformation {metadata["transform"]}') + + precip_var = dataset.attrs["precip_var"] + metadata = dataset[precip_var].attrs + precip_data = dataset[precip_var].values - if metadata["unit"] == "mm" and metadata["transform"] is None: + if metadata["units"] == "mm" and metadata["transform"] is None: pass - elif metadata["unit"] == "mm/h": + elif metadata["units"] == "mm/h": threshold = metadata["threshold"] # convert the threshold, too zerovalue = metadata["zerovalue"] # convert the zerovalue, too - R = R / 60.0 * metadata["accutime"] + precip_data = precip_data / 60.0 * metadata["accutime"] threshold = threshold / 60.0 * metadata["accutime"] zerovalue = zerovalue / 60.0 * metadata["accutime"] metadata["threshold"] = threshold metadata["zerovalue"] = zerovalue - elif metadata["unit"] == "dBZ": + elif metadata["units"] == "dBZ": threshold = metadata["threshold"] # convert the threshold, too zerovalue = metadata["zerovalue"] # convert the zerovalue, too @@ -225,7 +223,7 @@ def to_raindepth(R, metadata, zr_a=None, zr_b=None): zr_a = metadata.get("zr_a", 200.0) # Default to Marshall–Palmer if zr_b is None: zr_b = metadata.get("zr_b", 1.6) # Default to Marshall–Palmer - R = (R / zr_a) ** (1.0 / zr_b) / 60.0 * metadata["accutime"] + precip_data = (precip_data / zr_a) ** (1.0 / zr_b) / 60.0 * metadata["accutime"] threshold = (threshold / zr_a) ** (1.0 / zr_b) / 60.0 * metadata["accutime"] zerovalue = (zerovalue / zr_a) ** (1.0 / zr_b) / 60.0 * metadata["accutime"] @@ -236,27 +234,22 @@ def to_raindepth(R, metadata, zr_a=None, zr_b=None): else: raise ValueError( - "Cannot convert unit %s and transform %s to mm" - % (metadata["unit"], metadata["transform"]) + f'Cannot convert unit {metadata["units"]} and transform {metadata["transform"]} to mm' ) - metadata["unit"] = "mm" - - return R, metadata + dataset[precip_var].data[:] = precip_data + dataset = _change_unit(dataset, precip_var, "mm") + return dataset -def to_reflectivity(R, metadata, zr_a=None, zr_b=None): +def to_reflectivity(dataset: xr.Dataset, zr_a=None, zr_b=None): """ Convert to reflectivity [dBZ]. Parameters ---------- - R: array-like - Array of any shape to be (back-)transformed. - metadata: dict - Metadata dictionary containing the accutime, transform, unit, threshold - and zerovalue attributes as described in the documentation of - :py:mod:`pysteps.io.importers`. + dataset: Dataset + Dataset to be (back-)transformed. Additionally, in case of conversion to/from reflectivity units, the zr_a and zr_b attributes are also required, @@ -268,73 +261,81 @@ def to_reflectivity(R, metadata, zr_a=None, zr_b=None): Returns ------- - R: array-like - Array of any shape containing the converted units. - metadata: dict - The metadata with updated attributes. + dataset: Dataset + Dataset containing the converted units. """ - R = R.copy() - metadata = metadata.copy() + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + metadata = dataset[precip_var].attrs if metadata["transform"] is not None: if metadata["transform"] == "dB": - R, metadata = transformation.dB_transform(R, metadata, inverse=True) + dataset = transformation.dB_transform(dataset, inverse=True) elif metadata["transform"] in ["BoxCox", "log"]: - R, metadata = transformation.boxcox_transform(R, metadata, inverse=True) + dataset = transformation.boxcox_transform(dataset, inverse=True) elif metadata["transform"] == "NQT": - R, metadata = transformation.NQ_transform(R, metadata, inverse=True) + dataset = transformation.NQ_transform(dataset, inverse=True) elif metadata["transform"] == "sqrt": - R, metadata = transformation.sqrt_transform(R, metadata, inverse=True) + dataset = transformation.sqrt_transform(dataset, inverse=True) else: - raise ValueError("Unknown transformation %s" % metadata["transform"]) + raise ValueError(f'Unknown transformation {metadata["transform"]}') - if metadata["unit"] == "mm/h": + precip_var = dataset.attrs["precip_var"] + metadata = dataset[precip_var].attrs + precip_data = dataset[precip_var].values + + if metadata["units"] == "mm/h": # Z to R if zr_a is None: zr_a = metadata.get("zr_a", 200.0) # Default to Marshall–Palmer if zr_b is None: zr_b = metadata.get("zr_b", 1.6) # Default to Marshall–Palmer - R = zr_a * R**zr_b + precip_data = zr_a * precip_data**zr_b metadata["threshold"] = zr_a * metadata["threshold"] ** zr_b metadata["zerovalue"] = zr_a * metadata["zerovalue"] ** zr_b metadata["zr_a"] = zr_a metadata["zr_b"] = zr_b # Z to dBZ - R, metadata = transformation.dB_transform(R, metadata) + dataset = transformation.dB_transform(dataset) - elif metadata["unit"] == "mm": + elif metadata["units"] == "mm": # depth to rate - R, metadata = to_rainrate(R, metadata) + dataset = to_rainrate(dataset) # Z to R if zr_a is None: zr_a = metadata.get("zr_a", 200.0) # Default to Marshall-Palmer if zr_b is None: zr_b = metadata.get("zr_b", 1.6) # Default to Marshall-Palmer - R = zr_a * R**zr_b + precip_data = zr_a * precip_data**zr_b metadata["threshold"] = zr_a * metadata["threshold"] ** zr_b metadata["zerovalue"] = zr_a * metadata["zerovalue"] ** zr_b metadata["zr_a"] = zr_a metadata["zr_b"] = zr_b # Z to dBZ - R, metadata = transformation.dB_transform(R, metadata) + dataset = transformation.dB_transform(dataset) - elif metadata["unit"] == "dBZ": + elif metadata["units"] == "dBZ": # Z to dBZ - R, metadata = transformation.dB_transform(R, metadata) + dataset = transformation.dB_transform(dataset) else: raise ValueError( - "Cannot convert unit %s and transform %s to mm/h" - % (metadata["unit"], metadata["transform"]) + f'Cannot convert unit {metadata["units"]} and transform {metadata["transform"]} to dBZ' ) - metadata["unit"] = "dBZ" - return R, metadata + + precip_var = dataset.attrs["precip_var"] + metadata = dataset[precip_var].attrs + precip_data = dataset[precip_var].values + + dataset[precip_var].data[:] = precip_data + dataset = _change_unit(dataset, precip_var, "dBZ") + return dataset From 904bce88255ea2c03875d0b8116d044c98f20fb1 Mon Sep 17 00:00:00 2001 From: Mats Veldhuizen Date: Thu, 18 Jul 2024 12:11:20 +0200 Subject: [PATCH 04/65] conversion.py works on the xarray datamodel --- pysteps/tests/test_utils_conversion.py | 373 ++++++++++++++++++------- pysteps/utils/conversion.py | 17 +- 2 files changed, 282 insertions(+), 108 deletions(-) diff --git a/pysteps/tests/test_utils_conversion.py b/pysteps/tests/test_utils_conversion.py index de48c928d..bdf1fa42f 100644 --- a/pysteps/tests/test_utils_conversion.py +++ b/pysteps/tests/test_utils_conversion.py @@ -635,115 +635,288 @@ def test_to_raindepth(dataset, expected): np.array([23.01029996]), { "units": "dBZ", + "transform": "dB", + "accutime": 5, + "threshold": 23.01029996, + "zerovalue": 18.01029996, + }, + ) + }, + attrs={"precip_var": "reflectivity"}, + ), + ), + ( + xr.Dataset( + data_vars={ + "precip_accum": ( + ["x"], + np.array([1.0]), + { + "units": "mm", "transform": None, "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_accum"}, + ), + xr.Dataset( + data_vars={ + "reflectivity": ( + ["x"], + np.array([40.27719989]), + { + "units": "dBZ", + "transform": "dB", + "accutime": 5, + "threshold": 40.27719989, + "zerovalue": 35.27719989, + }, + ) + }, + attrs={"precip_var": "reflectivity"}, + ), + ), + ( + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0]), + { + "units": "mm/h", + "transform": "dB", + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), + xr.Dataset( + data_vars={ + "reflectivity": ( + ["x"], + np.array([24.61029996]), + { + "units": "dBZ", + "transform": "dB", + "accutime": 5, + "threshold": 24.61029996, + "zerovalue": 19.61029996, + }, + ) + }, + attrs={"precip_var": "reflectivity"}, + ), + ), + ( + xr.Dataset( + data_vars={ + "precip_accum": ( + ["x"], + np.array([1.0]), + { + "units": "mm", + "transform": "dB", + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_accum"}, + ), + xr.Dataset( + data_vars={ + "reflectivity": ( + ["x"], + np.array([41.87719989]), + { + "units": "dBZ", + "transform": "dB", + "accutime": 5, + "threshold": 41.87719989, + "zerovalue": 36.87719989, + }, + ) + }, + attrs={"precip_var": "reflectivity"}, + ), + ), + ( + xr.Dataset( + data_vars={ + "reflectivity": ( + ["x"], + np.array([1.0]), + { + "units": "dBZ", + "transform": "dB", + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "reflectivity"}, + ), + xr.Dataset( + data_vars={ + "reflectivity": ( + ["x"], + np.array([1.0]), + { + "units": "dBZ", + "transform": "dB", + "accutime": 5, + "threshold": 1.0, + "zerovalue": -4.0, + }, + ) + }, + attrs={"precip_var": "reflectivity"}, + ), + ), + ( + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0]), + { + "units": "mm/h", + "transform": "log", + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), + xr.Dataset( + data_vars={ + "reflectivity": ( + ["x"], + np.array([29.95901167]), + { + "units": "dBZ", + "transform": "dB", + "accutime": 5, + "threshold": 29.95901167, + "zerovalue": 24.95901167, + }, + ) + }, + attrs={"precip_var": "reflectivity"}, + ), + ), + ( + xr.Dataset( + data_vars={ + "precip_accum": ( + ["x"], + np.array([1.0]), + { + "units": "mm", + "transform": "log", + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_accum"}, + ), + xr.Dataset( + data_vars={ + "reflectivity": ( + ["x"], + np.array([47.2259116]), + { + "units": "dBZ", + "transform": "dB", + "accutime": 5, + "threshold": 47.2259116, + "zerovalue": 42.2259116, + }, + ) + }, + attrs={"precip_var": "reflectivity"}, + ), + ), + ( + xr.Dataset( + data_vars={ + "precip_intensity": ( + ["x"], + np.array([1.0]), + { + "units": "mm/h", + "transform": "sqrt", + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_intensity"}, + ), + xr.Dataset( + data_vars={ + "reflectivity": ( + ["x"], + np.array([23.01029996]), + { + "units": "dBZ", + "transform": "dB", + "accutime": 5, "threshold": 23.01029996, - "zerovalue": 23.01029996, + "zerovalue": 18.01029996, + }, + ) + }, + attrs={"precip_var": "reflectivity"}, + ), + ), + ( + xr.Dataset( + data_vars={ + "precip_accum": ( + ["x"], + np.array([1.0]), + { + "units": "mm", + "transform": "sqrt", + "accutime": 5, + "threshold": 1.0, + "zerovalue": 1.0, + }, + ) + }, + attrs={"precip_var": "precip_accum"}, + ), + xr.Dataset( + data_vars={ + "reflectivity": ( + ["x"], + np.array([40.27719989]), + { + "units": "dBZ", + "transform": "dB", + "accutime": 5, + "threshold": 40.27719989, + "zerovalue": 35.27719989, }, ) }, attrs={"precip_var": "reflectivity"}, ), ), - # ( - # np.array([1]), - # { - # "accutime": 5, - # "transform": None, - # "unit": "mm/h", - # "threshold": 0, - # "zerovalue": 0, - # }, - # np.array([23.01029996]), - # ), - # ( - # np.array([1]), - # { - # "accutime": 5, - # "transform": None, - # "unit": "mm", - # "threshold": 0, - # "zerovalue": 0, - # }, - # np.array([40.27719989]), - # ), - # ( - # np.array([1]), - # { - # "accutime": 5, - # "transform": "dB", - # "unit": "mm/h", - # "threshold": 0, - # "zerovalue": 0, - # }, - # np.array([24.61029996]), - # ), - # ( - # np.array([1]), - # { - # "accutime": 5, - # "transform": "dB", - # "unit": "mm", - # "threshold": 0, - # "zerovalue": 0, - # }, - # np.array([41.87719989]), - # ), - # ( - # np.array([1]), - # { - # "accutime": 5, - # "transform": "dB", - # "unit": "dBZ", - # "threshold": 0, - # "zerovalue": 0, - # }, - # np.array([1]), - # ), - # ( - # np.array([1]), - # { - # "accutime": 5, - # "transform": "log", - # "unit": "mm/h", - # "threshold": 0, - # "zerovalue": 0, - # }, - # np.array([29.95901167]), - # ), - # ( - # np.array([1.0]), - # { - # "accutime": 5, - # "transform": "log", - # "unit": "mm", - # "threshold": 0, - # "zerovalue": 0, - # }, - # np.array([47.2259116]), - # ), - # ( - # np.array([1]), - # { - # "accutime": 5, - # "transform": "sqrt", - # "unit": "mm/h", - # "threshold": 0, - # "zerovalue": 0, - # }, - # np.array([23.01029996]), - # ), - # ( - # np.array([1.0]), - # { - # "accutime": 5, - # "transform": "sqrt", - # "unit": "mm", - # "threshold": 0, - # "zerovalue": 0, - # }, - # np.array([40.27719989]), - # ), ] diff --git a/pysteps/utils/conversion.py b/pysteps/utils/conversion.py index 21e32360b..68228e981 100644 --- a/pysteps/utils/conversion.py +++ b/pysteps/utils/conversion.py @@ -302,13 +302,14 @@ def to_reflectivity(dataset: xr.Dataset, zr_a=None, zr_b=None): metadata["zr_a"] = zr_a metadata["zr_b"] = zr_b - # Z to dBZ - dataset = transformation.dB_transform(dataset) - elif metadata["units"] == "mm": # depth to rate dataset = to_rainrate(dataset) + precip_var = dataset.attrs["precip_var"] + metadata = dataset[precip_var].attrs + precip_data = dataset[precip_var].values + # Z to R if zr_a is None: zr_a = metadata.get("zr_a", 200.0) # Default to Marshall-Palmer @@ -320,18 +321,18 @@ def to_reflectivity(dataset: xr.Dataset, zr_a=None, zr_b=None): metadata["zr_a"] = zr_a metadata["zr_b"] = zr_b - # Z to dBZ - dataset = transformation.dB_transform(dataset) - elif metadata["units"] == "dBZ": - # Z to dBZ - dataset = transformation.dB_transform(dataset) + pass else: raise ValueError( f'Cannot convert unit {metadata["units"]} and transform {metadata["transform"]} to dBZ' ) + dataset[precip_var].data[:] = precip_data + # Z to dBZ + dataset = transformation.dB_transform(dataset) + precip_var = dataset.attrs["precip_var"] metadata = dataset[precip_var].attrs precip_data = dataset[precip_var].values From b3aa00912ea172443851cbd526fcf633e8d9e880 Mon Sep 17 00:00:00 2001 From: Mats Veldhuizen Date: Thu, 18 Jul 2024 14:02:58 +0200 Subject: [PATCH 05/65] fix converters.py --- pysteps/converters.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pysteps/converters.py b/pysteps/converters.py index f27e1572e..6c576c658 100644 --- a/pysteps/converters.py +++ b/pysteps/converters.py @@ -187,11 +187,9 @@ def convert_to_xarray_dataset( } if grid_mapping_var_name is not None: coords[grid_mapping_name] = ( - ( - [], - None, - {"grid_mapping_name": grid_mapping_name, **grid_mapping_params}, - ), + [], + None, + {"grid_mapping_name": grid_mapping_name, **grid_mapping_params}, ) attrs = { "Conventions": "CF-1.7", @@ -199,4 +197,5 @@ def convert_to_xarray_dataset( "projection": metadata["projection"], "precip_var": var_name, } - return xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) + dataset = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) + return dataset.sortby(["y", "x"]) From 016d28f3219443bf88c5b54015623af543971319 Mon Sep 17 00:00:00 2001 From: mats-knmi <145579783+mats-knmi@users.noreply.github.com> Date: Mon, 12 Aug 2024 08:57:19 +0200 Subject: [PATCH 06/65] make dimension.py xarray compatible (#397) * make dimension.py xarray compatible * convert final method in the dimension module * nanmin in stead of zerovalue in square domain method * make test steps skill run * undo accidental change * remove commented out code * The dataset can contain more than one dataarray * Address pull request comments * Add links to dataset documentation everywhere --- pysteps/converters.py | 18 +- pysteps/io/importers.py | 87 ++++ pysteps/tests/helpers.py | 40 +- pysteps/tests/test_nowcasts_steps.py | 20 +- pysteps/tests/test_utils_dimension.py | 230 ++++++---- pysteps/utils/conversion.py | 21 +- pysteps/utils/dimension.py | 618 ++++++++++---------------- pysteps/utils/transformation.py | 28 +- 8 files changed, 522 insertions(+), 540 deletions(-) diff --git a/pysteps/converters.py b/pysteps/converters.py index 6c576c658..2825af612 100644 --- a/pysteps/converters.py +++ b/pysteps/converters.py @@ -12,6 +12,7 @@ """ import numpy as np +import numpy.typing as npt import pyproj import xarray as xr @@ -67,6 +68,15 @@ def _convert_proj4_to_grid_mapping(proj4str): return grid_mapping_var_name, grid_mapping_name, params +def compute_lat_lon( + x_r: npt.ArrayLike, y_r: npt.ArrayLike, projection: str +) -> tuple[npt.ArrayLike, npt.ArrayLike]: + x_2d, y_2d = np.meshgrid(x_r, y_r) + pr = pyproj.Proj(projection) + lon, lat = pr(x_2d.flatten(), y_2d.flatten(), inverse=True) + return lat.reshape(x_2d.shape), lon.reshape(x_2d.shape) + + def convert_to_xarray_dataset( precip: np.ndarray, quality: np.ndarray | None, @@ -105,9 +115,7 @@ def convert_to_xarray_dataset( if metadata["yorigin"] == "upper": y_r = np.flip(y_r) - x_2d, y_2d = np.meshgrid(x_r, y_r) - pr = pyproj.Proj(metadata["projection"]) - lon, lat = pr(x_2d.flatten(), y_2d.flatten(), inverse=True) + lat, lon = compute_lat_lon(x_r, y_r, metadata["projection"]) ( grid_mapping_var_name, @@ -166,7 +174,7 @@ def convert_to_xarray_dataset( ), "lon": ( ["y", "x"], - lon.reshape(precip.shape), + lon, { "long_name": "longitude coordinate", "standard_name": "longitude", @@ -176,7 +184,7 @@ def convert_to_xarray_dataset( ), "lat": ( ["y", "x"], - lat.reshape(precip.shape), + lat, { "long_name": "latitude coordinate", "standard_name": "latitude", diff --git a/pysteps/io/importers.py b/pysteps/io/importers.py index 5c2928203..f61d4b25b 100644 --- a/pysteps/io/importers.py +++ b/pysteps/io/importers.py @@ -65,6 +65,93 @@ | zr_b | the Z-R exponent b in Z = a*R**b | +------------------+----------------------------------------------------------+ +The data and metadata is then postprocessed into an xarray dataset. This dataset will +always contain an x and y dimension, but can be extended with a time dimension and/or +an ensemble member dimension over the course of the process. + +The dataset can contain the following coordinate variables: + + +.. tabularcolumns:: |p{2cm}|L| + ++---------------+-------------------------------------------------------------------------------------------+ +| Coordinate | Description | ++===============+===========================================================================================+ +| y | y-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` | ++---------------+-------------------------------------------------------------------------------------------+ +| x | x-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` | ++---------------+-------------------------------------------------------------------------------------------+ +| lat | latitude coordinate in degrees | ++---------------+-------------------------------------------------------------------------------------------+ +| lon | longitude coordinate in degrees | ++---------------+-------------------------------------------------------------------------------------------+ +| time | forecast time in seconds since forecast start time | ++---------------+-------------------------------------------------------------------------------------------+ +| member | ensemble member number (integer) | ++---------------+-------------------------------------------------------------------------------------------+ + + +The dataset can contain the following data variables: + +.. tabularcolumns:: |p{2cm}|L| + ++-------------------+-----------------------------------------------------------------------------------------------------------+ +| Variable | Description | ++===================+===========================================================================================================+ +| precip_intensity, | precipitation data, based on the unit the data has it is stored in one of these 3 possible variables | +| precip_accum | precip_intensity if unit is ``mm/h``, precip_accum if unit is ``mm`` and reflectivity if unit is ``dBZ``, | +| or reflectivity | the attributes of this variable contain metadata relevant to this attribute (see below) | ++-------------------+-----------------------------------------------------------------------------------------------------------+ +| quality | value between 0 and 1 denoting the quality of the precipitation data, currently not used for anything | ++-------------------+-----------------------------------------------------------------------------------------------------------+ + +Some of the metadata in the metadata dictionary is not explicitely stored in the dataset, +but is still implicitly present. For example ``x1`` can easily be found by taking the first +value from the x coordinate variable. Metadata that is not implicitly present is explicitly +stored either in the datasets global attributes or as attributes of the precipitation variable. +Data that relates to the entire dataset is stored in the global attributes. The following data +is stored in the global attributes: + +.. tabularcolumns:: |p{2cm}|L| + ++------------------+----------------------------------------------------------+ +| Key | Value | ++==================+==========================================================+ +| projection | PROJ.4-compatible projection definition | ++------------------+----------------------------------------------------------+ +| institution | name of the institution who provides the data | ++------------------+----------------------------------------------------------+ +| precip_var | the name of the precipitation variable in this dataset | ++------------------+----------------------------------------------------------+ + +The following data is stored as attributes of the precipitation variable: + +.. tabularcolumns:: |p{2cm}|L| + ++------------------+----------------------------------------------------------+ +| Key | Value | ++==================+==========================================================+ +| units | the physical unit of the data: 'mm/h', 'mm' or 'dBZ' | ++------------------+----------------------------------------------------------+ +| transform | the transformation of the data: None, 'dB', 'Box-Cox' or | +| | others | ++------------------+----------------------------------------------------------+ +| accutime | the accumulation time in minutes of the data, float | ++------------------+----------------------------------------------------------+ +| threshold | the rain/no rain threshold with the same unit, | +| | transformation and accutime of the data. | ++------------------+----------------------------------------------------------+ +| zerovalue | the value assigned to the no rain pixels with the same | +| | unit, transformation and accutime of the data. | ++------------------+----------------------------------------------------------+ +| zr_a | the Z-R constant a in Z = a*R**b | ++------------------+----------------------------------------------------------+ +| zr_b | the Z-R exponent b in Z = a*R**b | ++------------------+----------------------------------------------------------+ + +Furthermore the dataset can contain some additional metadata to make the dataset +CF-compliant. + Available Importers ------------------- diff --git a/pysteps/tests/helpers.py b/pysteps/tests/helpers.py index 68f3f7527..24c58f8d1 100644 --- a/pysteps/tests/helpers.py +++ b/pysteps/tests/helpers.py @@ -14,6 +14,7 @@ import pysteps as stp from pysteps import io, rcparams from pysteps.utils import aggregate_fields_space +from pysteps.utils.dimension import clip_domain _reference_dates = dict() _reference_dates["bom"] = datetime(2018, 6, 16, 10, 0) @@ -53,7 +54,6 @@ def get_precipitation_fields( num_prev_files=0, num_next_files=0, return_raw=False, - metadata=False, upscale=None, source="mch", log_transform=True, @@ -100,9 +100,6 @@ def get_precipitation_fields( The pre-processing steps are: 1) Convert to mm/h, 2) Mask invalid values, 3) Log-transform the data [dBR]. - metadata: bool, optional - If True, also return file metadata. - upscale: float or None, optional Upscale fields in space during the pre-processing steps. If it is None, the precipitation field is not modified. @@ -127,8 +124,8 @@ def get_precipitation_fields( Returns ------- - reference_field : array - metadata : dict + dataset: xarray.Dataset + As described in the documentation of :py:mod:`pysteps.io.importers`. """ if source == "bom": @@ -186,41 +183,34 @@ def get_precipitation_fields( # Read the radar composites importer = io.get_method(importer_name, "importer") - ref_dataset = io.read_timeseries(fns, importer, **_importer_kwargs) + dataset = io.read_timeseries(fns, importer, **_importer_kwargs) if not return_raw: - if (num_prev_files == 0) and (num_next_files == 0): - # Remove time dimension - reference_field = np.squeeze(reference_field) + precip_var = dataset.attrs["precip_var"] # Convert to mm/h - ref_dataset = stp.utils.to_rainrate(ref_dataset) + dataset = stp.utils.to_rainrate(dataset) + precip_var = dataset.attrs["precip_var"] # Clip domain - ref_dataset = stp.utils.clip_domain(ref_dataset, clip) + dataset = clip_domain(dataset, clip) # Upscale data - reference_field, ref_metadata = aggregate_fields_space( - reference_field, ref_metadata, upscale - ) + dataset = aggregate_fields_space(dataset, upscale) # Mask invalid values - reference_field = np.ma.masked_invalid(reference_field) + valid_mask = np.isfinite(dataset[precip_var].values) if log_transform: # Log-transform the data [dBR] - reference_field, ref_metadata = stp.utils.dB_transform( - reference_field, ref_metadata, threshold=0.1, zerovalue=-15.0 - ) + dataset = stp.utils.dB_transform(dataset, threshold=0.1, zerovalue=-15.0) # Set missing values with the fill value - np.ma.set_fill_value(reference_field, ref_metadata["zerovalue"]) - reference_field.data[reference_field.mask] = ref_metadata["zerovalue"] - - if metadata: - return reference_field, ref_metadata + metadata = dataset[precip_var].attrs + zerovalue = metadata["zerovalue"] + dataset[precip_var].data[~valid_mask] = zerovalue - return reference_field + return dataset def smart_assert(actual_value, expected, tolerance=None): diff --git a/pysteps/tests/test_nowcasts_steps.py b/pysteps/tests/test_nowcasts_steps.py index 61af86ba5..adb6ea917 100644 --- a/pysteps/tests/test_nowcasts_steps.py +++ b/pysteps/tests/test_nowcasts_steps.py @@ -7,7 +7,6 @@ from pysteps import io, motion, nowcasts, verification from pysteps.tests.helpers import get_precipitation_fields - steps_arg_names = ( "n_ens_members", "n_cascade_levels", @@ -44,28 +43,29 @@ def test_steps_skill( ): """Tests STEPS nowcast skill.""" # inputs - precip_input, metadata = get_precipitation_fields( + dataset_input = get_precipitation_fields( num_prev_files=2, num_next_files=0, return_raw=False, metadata=True, upscale=2000, ) - precip_input = precip_input.filled() - precip_obs = get_precipitation_fields( + dataset_obs = get_precipitation_fields( num_prev_files=0, num_next_files=3, return_raw=False, upscale=2000 - )[1:, :, :] - precip_obs = precip_obs.filled() + ).isel(time=slice(1, None, None)) + precip_var = dataset_input.attrs["precip_var"] + metadata = dataset_input[precip_var].attrs + precip_data = dataset_input[precip_var].values pytest.importorskip("cv2") oflow_method = motion.get_method("LK") - retrieved_motion = oflow_method(precip_input) + retrieved_motion = oflow_method(precip_data) nowcast_method = nowcasts.get_method("steps") precip_forecast = nowcast_method( - precip_input, + precip_data, retrieved_motion, timesteps=timesteps, precip_thr=metadata["threshold"], @@ -86,7 +86,9 @@ def test_steps_skill( timesteps if isinstance(timesteps, int) else len(timesteps) ) - crps = verification.probscores.CRPS(precip_forecast[:, -1], precip_obs[-1]) + crps = verification.probscores.CRPS( + precip_forecast[:, -1], dataset_obs[precip_var].values[-1] + ) assert crps < max_crps, f"CRPS={crps:.2f}, required < {max_crps:.2f}" diff --git a/pysteps/tests/test_utils_dimension.py b/pysteps/tests/test_utils_dimension.py index ab753ed7d..2bbb63f58 100644 --- a/pysteps/tests/test_utils_dimension.py +++ b/pysteps/tests/test_utils_dimension.py @@ -4,63 +4,89 @@ import numpy as np import pytest +import xarray as xr from numpy.testing import assert_array_equal from pytest import raises +from pysteps.converters import convert_to_xarray_dataset from pysteps.utils import dimension +fillvalues_metadata = { + "x1": 0, + "x2": 4, + "y1": 0, + "y2": 4, + "xpixelsize": 1, + "ypixelsize": 1, + "zerovalue": 0, + "yorigin": "lower", + "unit": "mm/h", + "transform": None, + "accutime": 5, + "threshold": 1.0, + "projection": "+proj=stere +lat_0=90 +lon_0=0.0 +lat_ts=60.0 +a=6378.137 +b=6356.752 +x_0=0 +y_0=0", + "zr_a": 200, + "zr_b": 1.6, + "cartesian_unit": "km", + "institution": "KNMI", +} + test_data_not_trim = ( # "data, window_size, axis, method, expected" - (np.arange(6), 2, 0, "mean", np.array([0.5, 2.5, 4.5])), + ( + np.arange(12).reshape(2, 6), + 2, + "x", + "mean", + np.array([[0.5, 2.5, 4.5], [6.5, 8.5, 10.5]]), + ), ( np.arange(4 * 6).reshape(4, 6), (2, 3), - (0, 1), + ("y", "x"), "sum", np.array([[24, 42], [96, 114]]), ), ( np.arange(4 * 6).reshape(4, 6), (2, 2), - (0, 1), + ("y", "x"), "sum", np.array([[14, 22, 30], [62, 70, 78]]), ), ( np.arange(4 * 6).reshape(4, 6), 2, - (0, 1), + ("y", "x"), "sum", np.array([[14, 22, 30], [62, 70, 78]]), ), ( np.arange(4 * 6).reshape(4, 6), (2, 3), - (0, 1), + ("y", "x"), "mean", np.array([[4.0, 7.0], [16.0, 19.0]]), ), ( np.arange(4 * 6).reshape(4, 6), (2, 2), - (0, 1), + ("y", "x"), "mean", np.array([[3.5, 5.5, 7.5], [15.5, 17.5, 19.5]]), ), ( np.arange(4 * 6).reshape(4, 6), 2, - (0, 1), + ("y", "x"), "mean", np.array([[3.5, 5.5, 7.5], [15.5, 17.5, 19.5]]), ), ) -@pytest.mark.parametrize( - "data, window_size, axis, method, expected", test_data_not_trim -) -def test_aggregate_fields(data, window_size, axis, method, expected): +@pytest.mark.parametrize("data, window_size, dim, method, expected", test_data_not_trim) +def test_aggregate_fields(data, window_size, dim, method, expected): """ Test the aggregate_fields function. The windows size must divide exactly the data dimensions. @@ -68,23 +94,25 @@ def test_aggregate_fields(data, window_size, axis, method, expected): windows size does not divide the data dimensions. The length of each dimension should be larger than 2. """ + dataset = convert_to_xarray_dataset(data, None, fillvalues_metadata) - assert_array_equal( - dimension.aggregate_fields(data, window_size, axis=axis, method=method), - expected, - ) + actual = dimension.aggregate_fields(dataset, window_size, dim=dim, method=method) + assert_array_equal(actual["precip_intensity"].values, expected) # Test the trimming capabilities. - data = np.pad(data, (0, 1)) - assert_array_equal( - dimension.aggregate_fields( - data, window_size, axis=axis, method=method, trim=True - ), - expected, + if np.ndim(window_size) == 0: + data = np.pad(data, ((0, 0), (0, 1))) + else: + data = np.pad(data, (0, 1)) + dataset = convert_to_xarray_dataset(data, None, fillvalues_metadata) + + actual = dimension.aggregate_fields( + dataset, window_size, dim=dim, method=method, trim=True ) + assert_array_equal(actual["precip_intensity"].values, expected) with raises(ValueError): - dimension.aggregate_fields(data, window_size, axis=axis, method=method) + dimension.aggregate_fields(dataset, window_size, dim=dim, method=method) def test_aggregate_fields_errors(): @@ -93,80 +121,124 @@ def test_aggregate_fields_errors(): function. """ data = np.arange(4 * 6).reshape(4, 6) + dataset = convert_to_xarray_dataset(data, None, fillvalues_metadata) with raises(ValueError): - dimension.aggregate_fields(data, -1, axis=0) + dimension.aggregate_fields(dataset, -1, dim="y") with raises(ValueError): - dimension.aggregate_fields(data, 0, axis=0) + dimension.aggregate_fields(dataset, 0, dim="y") with raises(ValueError): - dimension.aggregate_fields(data, 1, method="invalid") + dimension.aggregate_fields(dataset, 1, method="invalid") with raises(TypeError): - dimension.aggregate_fields(data, (1, 1), axis=0) + dimension.aggregate_fields(dataset, (1, 1), dim="y") # aggregate_fields_time -timestamps = [dt.datetime.now() + dt.timedelta(minutes=t) for t in range(10)] -test_data = [ +now = dt.datetime.now() +timestamps = [now + dt.timedelta(minutes=t) for t in range(10)] +test_data_time = [ ( - np.ones((10, 1, 1)), + np.ones((2, 2)), {"unit": "mm/h", "timestamps": timestamps}, 2, False, - np.ones((5, 1, 1)), + np.ones((5, 2, 2)), ), ( - np.ones((10, 1, 1)), + np.ones((2, 2)), {"unit": "mm", "timestamps": timestamps}, 2, False, - 2 * np.ones((5, 1, 1)), + 2 * np.ones((5, 2, 2)), ), ] @pytest.mark.parametrize( - "R, metadata, time_window_min, ignore_nan, expected", test_data + "data, metadata, time_window_min, ignore_nan, expected", test_data_time ) -def test_aggregate_fields_time(R, metadata, time_window_min, ignore_nan, expected): +def test_aggregate_fields_time(data, metadata, time_window_min, ignore_nan, expected): """Test the aggregate_fields_time.""" + dataset_ref = convert_to_xarray_dataset( + data, None, {**fillvalues_metadata, **metadata} + ) + datasets = [] + for timestamp in metadata["timestamps"]: + dataset_ = dataset_ref.copy(deep=True) + dataset_ = dataset_.expand_dims(dim="time", axis=0) + dataset_ = dataset_.assign_coords(time=("time", [timestamp])) + datasets.append(dataset_) + + dataset = xr.concat(datasets, dim="time") assert_array_equal( - dimension.aggregate_fields_time(R, metadata, time_window_min, ignore_nan)[0], + dimension.aggregate_fields_time(dataset, time_window_min, ignore_nan)[ + "precip_intensity" if metadata["unit"] == "mm/h" else "precip_accum" + ].values, expected, ) # aggregate_fields_space -test_data = [ +test_data_space = [ ( - np.ones((1, 10, 10)), - {"unit": "mm/h", "xpixelsize": 1, "ypixelsize": 1}, + np.ones((10, 10)), + { + "unit": "mm/h", + "x1": 0, + "x2": 10, + "y1": 0, + "y2": 10, + "xpixelsize": 1, + "ypixelsize": 1, + }, 2, False, - np.ones((1, 5, 5)), + np.ones((5, 5)), ), ( - np.ones((1, 10, 10)), - {"unit": "mm", "xpixelsize": 1, "ypixelsize": 1}, + np.ones((10, 10)), + { + "unit": "mm", + "x1": 0, + "x2": 10, + "y1": 0, + "y2": 10, + "xpixelsize": 1, + "ypixelsize": 1, + }, 2, False, - np.ones((1, 5, 5)), + np.ones((5, 5)), ), ( - np.ones((1, 10, 10)), - {"unit": "mm/h", "xpixelsize": 1, "ypixelsize": 2}, - (2, 4), + np.ones((10, 10)), + { + "unit": "mm/h", + "x1": 0, + "x2": 10, + "y1": 0, + "y2": 20, + "xpixelsize": 1, + "ypixelsize": 2, + }, + (4, 2), False, - np.ones((1, 5, 5)), + np.ones((5, 5)), ), ] -@pytest.mark.parametrize("R, metadata, space_window, ignore_nan, expected", test_data) -def test_aggregate_fields_space(R, metadata, space_window, ignore_nan, expected): +@pytest.mark.parametrize( + "data, metadata, space_window, ignore_nan, expected", test_data_space +) +def test_aggregate_fields_space(data, metadata, space_window, ignore_nan, expected): """Test the aggregate_fields_space.""" + dataset = convert_to_xarray_dataset(data, None, {**fillvalues_metadata, **metadata}) assert_array_equal( - dimension.aggregate_fields_space(R, metadata, space_window, ignore_nan)[0], + dimension.aggregate_fields_space(dataset, space_window, ignore_nan)[ + "precip_intensity" if metadata["unit"] == "mm/h" else "precip_accum" + ].values, expected, ) @@ -174,64 +246,40 @@ def test_aggregate_fields_space(R, metadata, space_window, ignore_nan, expected) # clip_domain R = np.zeros((4, 4)) R[:2, :] = 1 -test_data = [ +test_data_clip_domain = [ ( R, - { - "x1": 0, - "x2": 4, - "y1": 0, - "y2": 4, - "xpixelsize": 1, - "ypixelsize": 1, - "zerovalue": 0, - "yorigin": "upper", - }, + {"yorigin": "lower"}, None, R, ), ( R, - { - "x1": 0, - "x2": 4, - "y1": 0, - "y2": 4, - "xpixelsize": 1, - "ypixelsize": 1, - "zerovalue": 0, - "yorigin": "lower", - }, + {"yorigin": "lower"}, (2, 4, 2, 4), np.zeros((2, 2)), ), ( R, - { - "x1": 0, - "x2": 4, - "y1": 0, - "y2": 4, - "xpixelsize": 1, - "ypixelsize": 1, - "zerovalue": 0, - "yorigin": "upper", - }, + {"yorigin": "upper"}, (2, 4, 2, 4), np.ones((2, 2)), ), ] -@pytest.mark.parametrize("R, metadata, extent, expected", test_data) +@pytest.mark.parametrize("R, metadata, extent, expected", test_data_clip_domain) def test_clip_domain(R, metadata, extent, expected): """Test the clip_domain.""" - assert_array_equal(dimension.clip_domain(R, metadata, extent)[0], expected) + dataset = convert_to_xarray_dataset(R, None, {**fillvalues_metadata, **metadata}) + assert_array_equal( + dimension.clip_domain(dataset, extent)["precip_intensity"].values, expected + ) # square_domain R = np.zeros((4, 2)) -test_data = [ +test_data_square = [ # square by padding ( R, @@ -258,7 +306,7 @@ def test_clip_domain(R, metadata, extent, expected): "y2": 4, "xpixelsize": 1, "ypixelsize": 1, - "orig_domain": (4, 2), + "orig_domain": (np.array([0.5, 1.5, 2.5, 3.5]), np.array([0.5, 1.5])), "square_method": "pad", }, "pad", @@ -275,7 +323,7 @@ def test_clip_domain(R, metadata, extent, expected): "y2": 3, "xpixelsize": 1, "ypixelsize": 1, - "orig_domain": (4, 2), + "orig_domain": (np.array([0.5, 1.5, 2.5, 3.5]), np.array([0.5, 1.5])), "square_method": "crop", }, "crop", @@ -285,9 +333,15 @@ def test_clip_domain(R, metadata, extent, expected): ] -@pytest.mark.parametrize("R, metadata, method, inverse, expected", test_data) -def test_square_domain(R, metadata, method, inverse, expected): +@pytest.mark.parametrize("data, metadata, method, inverse, expected", test_data_square) +def test_square_domain(data, metadata, method, inverse, expected): """Test the square_domain.""" + dataset = convert_to_xarray_dataset(data, None, {**fillvalues_metadata, **metadata}) + dataset["precip_intensity"].attrs = { + **dataset["precip_intensity"].attrs, + **metadata, + } assert_array_equal( - dimension.square_domain(R, metadata, method, inverse)[0], expected + dimension.square_domain(dataset, method, inverse)["precip_intensity"].values, + expected, ) diff --git a/pysteps/utils/conversion.py b/pysteps/utils/conversion.py index 68228e981..2ea6a3a12 100644 --- a/pysteps/utils/conversion.py +++ b/pysteps/utils/conversion.py @@ -70,8 +70,9 @@ def to_rainrate(dataset: xr.Dataset, zr_a=None, zr_b=None): Parameters ---------- - dataset: Dataset - Dataset to be (back-)transformed. + dataset: xarray.Dataset + Dataset to be (back-)transformed as described in the documentation of + :py:mod:`pysteps.io.importers`. Additionally, in case of conversion to/from reflectivity units, the zr_a and zr_b attributes are also required, @@ -83,7 +84,7 @@ def to_rainrate(dataset: xr.Dataset, zr_a=None, zr_b=None): Returns ------- - dataset: Dataset + dataset: xarray.Dataset Dataset containing the converted units. """ @@ -159,8 +160,9 @@ def to_raindepth(dataset: xr.Dataset, zr_a=None, zr_b=None): Parameters ---------- - dataset: Dataset - Dataset to be (back-)transformed. + dataset: xarray.Dataset + Dataset to be (back-)transformed as described in the documentation of + :py:mod:`pysteps.io.importers`. Additionally, in case of conversion to/from reflectivity units, the zr_a and zr_b attributes are also required, @@ -172,7 +174,7 @@ def to_raindepth(dataset: xr.Dataset, zr_a=None, zr_b=None): Returns ------- - dataset: Dataset + dataset: xarray.Dataset Dataset containing the converted units. """ @@ -248,8 +250,9 @@ def to_reflectivity(dataset: xr.Dataset, zr_a=None, zr_b=None): Parameters ---------- - dataset: Dataset - Dataset to be (back-)transformed. + dataset: xarray.Dataset + Dataset to be (back-)transformed as described in the documentation of + :py:mod:`pysteps.io.importers`. Additionally, in case of conversion to/from reflectivity units, the zr_a and zr_b attributes are also required, @@ -261,7 +264,7 @@ def to_reflectivity(dataset: xr.Dataset, zr_a=None, zr_b=None): Returns ------- - dataset: Dataset + dataset: xarray.Dataset Dataset containing the converted units. """ diff --git a/pysteps/utils/dimension.py b/pysteps/utils/dimension.py index 43b7e2ca5..efa459610 100644 --- a/pysteps/utils/dimension.py +++ b/pysteps/utils/dimension.py @@ -14,26 +14,34 @@ clip_domain square_domain """ - import numpy as np +import xarray as xr + +from pysteps.converters import compute_lat_lon _aggregation_methods = dict( sum=np.sum, mean=np.mean, nanmean=np.nanmean, nansum=np.nansum ) -def aggregate_fields_time(R, metadata, time_window_min, ignore_nan=False): +def aggregate_fields_time( + dataset: xr.Dataset, time_window_min, ignore_nan=False +) -> xr.Dataset: """Aggregate fields in time. + It attempts to aggregate the given dataset in the time direction in an integer + number of sections of length = ``time_window_min``. + If such a aggregation is not possible, an error is raised. + The data is aggregated by a method chosen based on the unit of the precipitation + data in the dataset. ``mean`` is used when the unit is ``mm/h`` and ``sum`` + is used when the unit is ``mm``. For other units an error is raised. + Parameters ---------- - R: array-like - Array of shape (t,m,n) or (l,t,m,n) containing - a time series of (ensemble) input fields. + dataset: xarray.Dataset + Dataset containing a time series of (ensemble) input fields + as described in the documentation of :py:mod:`pysteps.io.importers`. They must be evenly spaced in time. - metadata: dict - Metadata dictionary containing the timestamps and unit attributes as - described in the documentation of :py:mod:`pysteps.io.importers`. time_window_min: float or None The length in minutes of the time window that is used to aggregate the fields. @@ -45,12 +53,8 @@ def aggregate_fields_time(R, metadata, time_window_min, ignore_nan=False): Returns ------- - outputarray: array-like - The new array of aggregated fields of shape (k,m,n) or (l,k,m,n), where - k = t*delta/time_window_min and delta is the time interval between two - successive timestamps. - metadata: dict - The metadata with updated attributes. + dataset: xarray.Dataset + The new dataset. See also -------- @@ -58,40 +62,24 @@ def aggregate_fields_time(R, metadata, time_window_min, ignore_nan=False): pysteps.utils.dimension.aggregate_fields """ - R = R.copy() - metadata = metadata.copy() - if time_window_min is None: - return R, metadata - - unit = metadata["unit"] - timestamps = metadata["timestamps"] - if "leadtimes" in metadata: - leadtimes = metadata["leadtimes"] - - if len(R.shape) < 3: - raise ValueError("The number of dimension must be > 2") - if len(R.shape) == 3: - axis = 0 - if len(R.shape) == 4: - axis = 1 - if len(R.shape) > 4: - raise ValueError("The number of dimension must be <= 4") - - if R.shape[axis] != len(timestamps): - raise ValueError( - "The list of timestamps has length %i, " % len(timestamps) - + "but R contains %i frames" % R.shape[axis] - ) + return dataset + + precip_var = dataset.attrs["precip_var"] + metadata = dataset[precip_var].attrs + + unit = metadata["units"] + + timestamps = dataset["time"].values # assumes that frames are evenly spaced - delta = (timestamps[1] - timestamps[0]).seconds / 60 + delta = (timestamps[1] - timestamps[0]) / np.timedelta64(1, "m") if delta == time_window_min: - return R, metadata - if (R.shape[axis] * delta) % time_window_min: - raise ValueError("time_window_size does not equally split R") + return dataset + if time_window_min % delta: + raise ValueError("time_window_size does not equally split dataset") - nframes = int(time_window_min / delta) + window_size = int(time_window_min / delta) # specify the operator to be used to aggregate # the values within the time window @@ -100,55 +88,47 @@ def aggregate_fields_time(R, metadata, time_window_min, ignore_nan=False): elif unit == "mm": method = "sum" else: - raise ValueError( - "can only aggregate units of 'mm/h' or 'mm'" + " not %s" % unit - ) + raise ValueError(f"can only aggregate units of 'mm/h' or 'mm' not {unit}") if ignore_nan: method = "".join(("nan", method)) - R = aggregate_fields(R, nframes, axis=axis, method=method) - - metadata["accutime"] = time_window_min - metadata["timestamps"] = timestamps[nframes - 1 :: nframes] - if "leadtimes" in metadata: - metadata["leadtimes"] = leadtimes[nframes - 1 :: nframes] + return aggregate_fields(dataset, window_size, dim="time", method=method) - return R, metadata - -def aggregate_fields_space(R, metadata, space_window, ignore_nan=False): +def aggregate_fields_space( + dataset: xr.Dataset, space_window, ignore_nan=False +) -> xr.Dataset: """ Upscale fields in space. + It attempts to aggregate the given dataset in y and x direction in an integer + number of sections of length = ``(window_size_y, window_size_x)``. + If such a aggregation is not possible, an error is raised. + The data is aggregated by computing the mean. Only datasets with precipitation + data in the ``mm`` or ``mm/h`` unit are currently supported. + Parameters ---------- - R: array-like - Array of shape (m,n), (t,m,n) or (l,t,m,n) containing a single field or - a time series of (ensemble) input fields. - metadata: dict - Metadata dictionary containing the xpixelsize, ypixelsize and unit - attributes as described in the documentation of + dataset: xarray.Dataset + Dataset containing a single field or + a time series of (ensemble) input fields as described in the documentation of :py:mod:`pysteps.io.importers`. space_window: float, tuple or None The length of the space window that is used to upscale the fields. If a float is given, the same window size is used for the x- and y-directions. Separate window sizes are used for x- and y-directions if - a two-element tuple is given. The space_window unit is the same used in - the geographical projection of R and hence the same as for the xpixelsize - and ypixelsize attributes. The space spanned by the n- and m-dimensions - of R must be a multiple of space_window. If set to None, the function - returns a copy of the original R and metadata. + a two-element tuple is given (y, x). The space_window unit is the same + as the unit of x and y in the input dataset. The space spanned by the + n- and m-dimensions of the dataset content must be a multiple of space_window. + If set to None, the function returns a copy of the original dataset. ignore_nan: bool, optional If True, ignore nan values. Returns ------- - outputarray: array-like - The new array of aggregated fields of shape (k,j), (t,k,j) or (l,t,k,j), - where k = m*ypixelsize/space_window[1] and j = n*xpixelsize/space_window[0]. - metadata: dict - The metadata with updated attributes. + dataset: xarray.Dataset + The new dataset. See also -------- @@ -156,110 +136,85 @@ def aggregate_fields_space(R, metadata, space_window, ignore_nan=False): pysteps.utils.dimension.aggregate_fields """ - R = R.copy() - metadata = metadata.copy() - if space_window is None: - return R, metadata - - unit = metadata["unit"] - ypixelsize = metadata["ypixelsize"] - xpixelsize = metadata["xpixelsize"] - - if len(R.shape) < 2: - raise ValueError("The number of dimensions must be >= 2") - if len(R.shape) == 2: - axes = [0, 1] - if len(R.shape) == 3: - axes = [1, 2] - if len(R.shape) == 4: - axes = [2, 3] - if len(R.shape) > 4: - raise ValueError("The number of dimensions must be <= 4") + return dataset + + precip_var = dataset.attrs["precip_var"] + metadata = dataset[precip_var].attrs + + unit = metadata["units"] if np.isscalar(space_window): space_window = (space_window, space_window) # assumes that frames are evenly spaced - if ypixelsize == space_window[1] and xpixelsize == space_window[0]: - return R, metadata - - ysize = R.shape[axes[0]] * ypixelsize - xsize = R.shape[axes[1]] * xpixelsize - - if ( - abs(ysize / space_window[1] - round(ysize / space_window[1])) > 1e-10 - or abs(xsize / space_window[0] - round(xsize / space_window[0])) > 1e-10 - ): - raise ValueError("space_window does not equally split R") + ydelta = dataset["y"].values[1] - dataset["y"].values[0] + xdelta = dataset["x"].values[1] - dataset["x"].values[0] - nframes = [int(space_window[1] / ypixelsize), int(space_window[0] / xpixelsize)] + if space_window[0] % ydelta > 1e-10 or space_window[1] % xdelta > 1e-10: + raise ValueError("space_window does not equally split dataset") # specify the operator to be used to aggregate the values # within the space window if unit == "mm/h" or unit == "mm": method = "mean" else: - raise ValueError( - "can only aggregate units of 'mm/h' or 'mm' " + "not %s" % unit - ) + raise ValueError(f"can only aggregate units of 'mm/h' or 'mm' not {unit}") if ignore_nan: method = "".join(("nan", method)) - R = aggregate_fields(R, nframes[0], axis=axes[0], method=method) - R = aggregate_fields(R, nframes[1], axis=axes[1], method=method) - - metadata["ypixelsize"] = space_window[1] - metadata["xpixelsize"] = space_window[0] + window_size = (int(space_window[0] / ydelta), int(space_window[1] / xdelta)) - return R, metadata + return aggregate_fields(dataset, window_size, ["y", "x"], method) -def aggregate_fields(data, window_size, axis=0, method="mean", trim=False): +def aggregate_fields( + dataset: xr.Dataset, window_size, dim="x", method="mean", trim=False +) -> xr.Dataset: """Aggregate fields along a given direction. - It attempts to aggregate the given R axis in an integer number of sections + It attempts to aggregate the given dataset dim in an integer number of sections of length = ``window_size``. If such a aggregation is not possible, an error is raised unless ``trim`` - set to True, in which case the axis is trimmed (from the end) + set to True, in which case the dim is trimmed (from the end) to make it perfectly divisible". Parameters ---------- - data: array-like - Array of any shape containing the input fields. - window_size: int or tuple of ints + dataset: xarray.Dataset + Dataset containing the input fields as described in the documentation of + :py:mod:`pysteps.io.importers`. + window_size: int or array-like of ints The length of the window that is used to aggregate the fields. If a single integer value is given, the same window is used for - all the selected axis. + all the selected dim. If ``window_size`` is a 1D array-like, each element indicates the length of the window that is used - to aggregate the fields along each axis. In this case, + to aggregate the fields along each dim. In this case, the number of elements of 'window_size' must be the same as the elements - in the ``axis`` argument. - axis: int or array-like of ints - Axis or axes where to perform the aggregation. - If this is a tuple of ints, the aggregation is performed over multiple - axes, instead of a single axis + in the ``dim`` argument. + dim: str or array-like of strs + Dim or dims where to perform the aggregation. + If this is an array-like of strs, the aggregation is performed over multiple + dims, instead of a single dim method: string, optional Optional argument that specifies the operation to use to aggregate the values within the window. Default to mean operator. trim: bool In case that the ``data`` is not perfectly divisible by - ``window_size`` along the selected axis: + ``window_size`` along the selected dim: - trim=True: the data will be trimmed (from the end) along that - axis to make it perfectly divisible. + dim to make it perfectly divisible. - trim=False: a ValueError exception is raised. Returns ------- - new_array: array-like - The new aggregated array with shape[axis] = k, - where k = R.shape[axis] / window_size. + dataset: xarray.Dataset + The new dataset. See also -------- @@ -267,90 +222,60 @@ def aggregate_fields(data, window_size, axis=0, method="mean", trim=False): pysteps.utils.dimension.aggregate_fields_space """ - if np.ndim(axis) > 1: + if np.ndim(dim) > 1: raise TypeError( "Only integers or integer 1D arrays can be used for the " "'axis' argument." ) - if np.ndim(axis) == 1: - axis = np.asarray(axis) - if np.ndim(window_size) == 0: - window_size = (window_size,) * axis.size - - window_size = np.asarray(window_size, dtype="int") - - if window_size.shape != axis.shape: - raise ValueError( - "The 'window_size' and 'axis' shapes are incompatible." - f"window_size.shape: {str(window_size.shape)}, " - f"axis.shape: {str(axis.shape)}, " - ) - - new_data = data.copy() - for i in range(axis.size): - # Recursively call the aggregate_fields function - new_data = aggregate_fields( - new_data, window_size[i], axis=axis[i], method=method, trim=trim - ) - - return new_data + if np.ndim(dim) == 0: + dim = [dim] - if np.ndim(window_size) != 0: - raise TypeError( - "A single axis was selected for the aggregation but several" - f"of window_sizes were given: {str(window_size)}." - ) + if np.ndim(window_size) == 0: + window_size = [window_size for _ in dim] - data = np.asarray(data).copy() - orig_shape = data.shape + if len(window_size) != len(dim): + raise TypeError("The length of window size does not to match the length of dim") if method not in _aggregation_methods: raise ValueError( "Aggregation method not recognized. " f"Available methods: {str(list(_aggregation_methods.keys()))}" ) + for ws in window_size: + if ws <= 0: + raise ValueError("'window_size' must be strictly positive") - if window_size <= 0: - raise ValueError("'window_size' must be strictly positive") + for d, ws in zip(dim, window_size): + if (dataset.sizes[d] % ws) and (not trim): + raise ValueError( + f"Since 'trim' argument was set to False," + f"the 'window_size' {ws} must exactly divide" + f"the dimension along the selected axis:" + f"dataset.sizes[dim]={dataset.sizes[d]}" + ) - if (orig_shape[axis] % window_size) and (not trim): - raise ValueError( - f"Since 'trim' argument was set to False," - f"the 'window_size' {window_size} must exactly divide" - f"the dimension along the selected axis:" - f"data.shape[axis]={orig_shape[axis]}" + # FIXME: The aggregation method is applied to all DataArrays in the Dataset + # Fix to allow support for an aggregation method per DataArray + return ( + dataset.rolling(dict(zip(dim, window_size))) + .reduce(_aggregation_methods[method]) + .isel( + { + d: slice(ws - 1, dataset.sizes[d] - dataset.sizes[d] % ws, ws) + for d, ws in zip(dim, window_size) + } ) - - new_data = data.swapaxes(axis, 0) - if trim: - trim_size = data.shape[axis] % window_size - if trim_size > 0: - new_data = new_data[:-trim_size] - - new_data_shape = list(new_data.shape) - new_data_shape[0] //= window_size # Final shape - - new_data = new_data.reshape(new_data_shape[0], window_size, -1) - - new_data = _aggregation_methods[method](new_data, axis=1) - - new_data = new_data.reshape(new_data_shape).swapaxes(axis, 0) - - return new_data + ) -def clip_domain(R, metadata, extent=None): +def clip_domain(dataset: xr.Dataset, extent=None): """ Clip the field domain by geographical coordinates. Parameters ---------- - R: array-like - Array of shape (m,n) or (t,m,n) containing the input fields. - metadata: dict - Metadata dictionary containing the x1, x2, y1, y2, - xpixelsize, ypixelsize, - zerovalue and yorigin attributes as described in the documentation of + dataset: xarray.Dataset + Dataset containing the input fields as described in the documentation of :py:mod:`pysteps.io.importers`. extent: scalars (left, right, bottom, top), optional The extent of the bounding box in data coordinates to be used to clip @@ -362,107 +287,48 @@ def clip_domain(R, metadata, extent=None): Returns ------- - R: array-like - the clipped array - metadata: dict - the metadata with updated attributes. + dataset: xarray.Dataset + The clipped dataset """ + if extent is None: + return dataset + return dataset.sel(x=slice(extent[0], extent[1]), y=slice(extent[2], extent[3])) - R = R.copy() - R_shape = np.array(R.shape) - metadata = metadata.copy() - if extent is None: - return R, metadata - - if len(R.shape) < 2: - raise ValueError("The number of dimension must be > 1") - if len(R.shape) == 2: - R = R[None, None, :, :] - if len(R.shape) == 3: - R = R[None, :, :, :] - if len(R.shape) > 4: - raise ValueError("The number of dimension must be <= 4") - - # extract original domain coordinates - left = metadata["x1"] - right = metadata["x2"] - bottom = metadata["y1"] - top = metadata["y2"] - - # extract bounding box coordinates - left_ = extent[0] - right_ = extent[1] - bottom_ = extent[2] - top_ = extent[3] - - # compute its extent in pixels - dim_x_ = int((right_ - left_) / metadata["xpixelsize"]) - dim_y_ = int((top_ - bottom_) / metadata["ypixelsize"]) - R_ = np.ones((R.shape[0], R.shape[1], dim_y_, dim_x_)) * metadata["zerovalue"] - - # build set of coordinates for the original domain - y_coord = ( - np.linspace(bottom, top - metadata["ypixelsize"], R.shape[2]) - + metadata["ypixelsize"] / 2.0 - ) - x_coord = ( - np.linspace(left, right - metadata["xpixelsize"], R.shape[3]) - + metadata["xpixelsize"] / 2.0 +def _pad_domain( + dataset: xr.Dataset, dim_to_pad: str, idx_buffer: int, zerovalue: float +) -> xr.Dataset: + # assumes that frames are evenly spaced + delta = dataset[dim_to_pad].values[1] - dataset[dim_to_pad].values[0] + end_values = ( + dataset[dim_to_pad].values[0] - delta * idx_buffer, + dataset[dim_to_pad].values[-1] + delta * idx_buffer, ) - # build set of coordinates for the new domain - y_coord_ = ( - np.linspace(bottom_, top_ - metadata["ypixelsize"], R_.shape[2]) - + metadata["ypixelsize"] / 2.0 + dataset_ref = dataset + + # FIXME: The same zerovalue is used for all DataArrays in the Dataset + # Fix to allow support for a zerovalue per DataArray + dataset = dataset_ref.pad({dim_to_pad: idx_buffer}, constant_values=zerovalue) + dataset[dim_to_pad] = dataset_ref[dim_to_pad].pad( + {dim_to_pad: idx_buffer}, + mode="linear_ramp", + end_values={dim_to_pad: end_values}, ) - x_coord_ = ( - np.linspace(left_, right_ - metadata["xpixelsize"], R_.shape[3]) - + metadata["xpixelsize"] / 2.0 + dataset.lat.data[:], dataset.lon.data[:] = compute_lat_lon( + dataset.x.values, dataset.y.values, dataset.attrs["projection"] ) + return dataset - # origin='upper' reverses the vertical axes direction - if metadata["yorigin"] == "upper": - y_coord = y_coord[::-1] - y_coord_ = y_coord_[::-1] - - # extract original domain - idx_y = np.where(np.logical_and(y_coord < top_, y_coord > bottom_))[0] - idx_x = np.where(np.logical_and(x_coord < right_, x_coord > left_))[0] - - # extract new domain - idx_y_ = np.where(np.logical_and(y_coord_ < top, y_coord_ > bottom))[0] - idx_x_ = np.where(np.logical_and(x_coord_ < right, x_coord_ > left))[0] - - # compose the new array - R_[:, :, idx_y_[0] : (idx_y_[-1] + 1), idx_x_[0] : (idx_x_[-1] + 1)] = R[ - :, :, idx_y[0] : (idx_y[-1] + 1), idx_x[0] : (idx_x[-1] + 1) - ] - - # update coordinates - metadata["y1"] = bottom_ - metadata["y2"] = top_ - metadata["x1"] = left_ - metadata["x2"] = right_ - R_shape[-2] = R_.shape[-2] - R_shape[-1] = R_.shape[-1] - - return R_.reshape(R_shape), metadata - - -def square_domain(R, metadata, method="pad", inverse=False): +def square_domain(dataset: xr.Dataset, method="pad", inverse=False): """ Either pad or crop a field to obtain a square domain. Parameters ---------- - R: array-like - Array of shape (m,n) or (t,m,n) containing the input fields. - metadata: dict - Metadata dictionary containing the x1, x2, y1, y2, - xpixelsize, ypixelsize, - attributes as described in the documentation of + dataset: xarray.Dataset + Dataset containing the input fields as described in the documentation of :py:mod:`pysteps.io.importers`. method: {'pad', 'crop'}, optional Either pad or crop. @@ -477,123 +343,91 @@ def square_domain(R, metadata, method="pad", inverse=False): Returns ------- - R: array-like - the reshape dataset - metadata: dict - the metadata with updated attributes. + dataset: xarray.Dataset + the reshaped dataset """ - R = R.copy() - R_shape = np.array(R.shape) - metadata = metadata.copy() - - if not inverse: - if len(R.shape) < 2: - raise ValueError("The number of dimension must be > 1") - if len(R.shape) == 2: - R = R[None, None, :] - if len(R.shape) == 3: - R = R[None, :] - if len(R.shape) > 4: - raise ValueError("The number of dimension must be <= 4") - - if R.shape[2] == R.shape[3]: - return R.squeeze() - - orig_dim = R.shape - orig_dim_n = orig_dim[0] - orig_dim_t = orig_dim[1] - orig_dim_y = orig_dim[2] - orig_dim_x = orig_dim[3] + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + precip_data = dataset[precip_var].values + + x_len = len(dataset.x.values) + y_len = len(dataset.y.values) + + if inverse: + if "orig_domain" not in dataset.attrs or "square_method" not in dataset.attrs: + raise ValueError("Attempting to inverse a non squared dataset") + method = dataset.attrs.pop("square_method") + orig_domain = dataset.attrs.pop("orig_domain") if method == "pad": - new_dim = np.max(orig_dim[2:]) - R_ = np.ones((orig_dim_n, orig_dim_t, new_dim, new_dim)) * R.min() - - if orig_dim_x < new_dim: - idx_buffer = int((new_dim - orig_dim_x) / 2.0) - R_[:, :, :, idx_buffer : (idx_buffer + orig_dim_x)] = R - metadata["x1"] -= idx_buffer * metadata["xpixelsize"] - metadata["x2"] += idx_buffer * metadata["xpixelsize"] - - elif orig_dim_y < new_dim: - idx_buffer = int((new_dim - orig_dim_y) / 2.0) - R_[:, :, idx_buffer : (idx_buffer + orig_dim_y), :] = R - metadata["y1"] -= idx_buffer * metadata["ypixelsize"] - metadata["y2"] += idx_buffer * metadata["ypixelsize"] - - elif method == "crop": - new_dim = np.min(orig_dim[2:]) - R_ = np.zeros((orig_dim_n, orig_dim_t, new_dim, new_dim)) - - if orig_dim_x > new_dim: - idx_buffer = int((orig_dim_x - new_dim) / 2.0) - R_ = R[:, :, :, idx_buffer : (idx_buffer + new_dim)] - metadata["x1"] += idx_buffer * metadata["xpixelsize"] - metadata["x2"] -= idx_buffer * metadata["xpixelsize"] - - elif orig_dim_y > new_dim: - idx_buffer = int((orig_dim_y - new_dim) / 2.0) - R_ = R[:, :, idx_buffer : (idx_buffer + new_dim), :] - metadata["y1"] += idx_buffer * metadata["ypixelsize"] - metadata["y2"] -= idx_buffer * metadata["ypixelsize"] - - else: - raise ValueError("Unknown type") - - metadata["orig_domain"] = (orig_dim_y, orig_dim_x) - metadata["square_method"] = method - - R_shape[-2] = R_.shape[-2] - R_shape[-1] = R_.shape[-1] - - return R_.reshape(R_shape), metadata - - elif inverse: - if len(R.shape) < 2: - raise ValueError("The number of dimension must be > 2") - if len(R.shape) == 2: - R = R[None, None, :] - if len(R.shape) == 3: - R = R[None, :] - if len(R.shape) > 4: - raise ValueError("The number of dimension must be <= 4") - - method = metadata.pop("square_method") - shape = metadata.pop("orig_domain") - - if R.shape[2] == shape[0] and R.shape[3] == shape[1]: - return R.squeeze(), metadata - - R_ = np.zeros((R.shape[0], R.shape[1], shape[0], shape[1])) + if x_len > len(orig_domain[1]): + extent = ( + orig_domain[1].min(), + orig_domain[1].max(), + dataset.y.values.min(), + dataset.y.values.max(), + ) + elif y_len > len(orig_domain[0]): + extent = ( + dataset.x.values.min(), + dataset.x.values.max(), + orig_domain[0].min(), + orig_domain[0].max(), + ) + else: + return dataset + return clip_domain(dataset, extent) + + if method == "crop": + if x_len < len(orig_domain[1]): + dim_to_pad = "x" + idx_buffer = int((len(orig_domain[1]) - x_len) / 2.0) + elif y_len < len(orig_domain[0]): + dim_to_pad = "y" + idx_buffer = int((len(orig_domain[0]) - y_len) / 2.0) + else: + return dataset + return _pad_domain(dataset, dim_to_pad, idx_buffer, np.nanmin(precip_data)) + + raise ValueError(f"Unknown square method: {method}") + + else: + if "orig_domain" in dataset.attrs and "square_method" in dataset.attrs: + raise ValueError("Attempting to square an already squared dataset") + dataset.attrs["orig_domain"] = (dataset.y.values, dataset.x.values) + dataset.attrs["square_method"] = method if method == "pad": - if R.shape[2] == shape[0]: - idx_buffer = int((R.shape[3] - shape[1]) / 2.0) - R_ = R[:, :, :, idx_buffer : (idx_buffer + shape[1])] - metadata["x1"] += idx_buffer * metadata["xpixelsize"] - metadata["x2"] -= idx_buffer * metadata["xpixelsize"] - - elif R.shape[3] == shape[1]: - idx_buffer = int((R.shape[2] - shape[0]) / 2.0) - R_ = R[:, :, idx_buffer : (idx_buffer + shape[0]), :] - metadata["y1"] += idx_buffer * metadata["ypixelsize"] - metadata["y2"] -= idx_buffer * metadata["ypixelsize"] - - elif method == "crop": - if R.shape[2] == shape[0]: - idx_buffer = int((shape[1] - R.shape[3]) / 2.0) - R_[:, :, :, idx_buffer : (idx_buffer + R.shape[3])] = R - metadata["x1"] -= idx_buffer * metadata["xpixelsize"] - metadata["x2"] += idx_buffer * metadata["xpixelsize"] - - elif R.shape[3] == shape[1]: - idx_buffer = int((shape[0] - R.shape[2]) / 2.0) - R_[:, :, idx_buffer : (idx_buffer + R.shape[2]), :] = R - metadata["y1"] -= idx_buffer * metadata["ypixelsize"] - metadata["y2"] += idx_buffer * metadata["ypixelsize"] - - R_shape[-2] = R_.shape[-2] - R_shape[-1] = R_.shape[-1] - - return R_.reshape(R_shape), metadata + if x_len > y_len: + dim_to_pad = "y" + idx_buffer = int((x_len - y_len) / 2.0) + elif y_len > x_len: + dim_to_pad = "x" + idx_buffer = int((y_len - x_len) / 2.0) + else: + return dataset + return _pad_domain(dataset, dim_to_pad, idx_buffer, np.nanmin(precip_data)) + + if method == "crop": + if x_len > y_len: + idx_buffer = int((x_len - y_len) / 2.0) + extent = ( + dataset.x.values[idx_buffer], + dataset.x.values[-idx_buffer - 1], + dataset.y.values.min(), + dataset.y.values.max(), + ) + elif y_len > x_len: + idx_buffer = int((y_len - x_len) / 2.0) + extent = ( + dataset.x.values.min(), + dataset.x.values.max(), + dataset.y.values[idx_buffer], + dataset.y.values[-idx_buffer - 1], + ) + else: + return dataset + return clip_domain(dataset, extent) + + raise ValueError(f"Unknown square method: {method}") diff --git a/pysteps/utils/transformation.py b/pysteps/utils/transformation.py index 1977583c6..3e48fe0d8 100644 --- a/pysteps/utils/transformation.py +++ b/pysteps/utils/transformation.py @@ -41,8 +41,9 @@ def boxcox_transform( Parameters ---------- - dataset: Dataset - Dataset to be transformed. + dataset: xarray.Dataset + Dataset to be transformed as described in the documentation of + :py:mod:`pysteps.io.importers`. Lambda: float, optional Parameter Lambda of the Box-Cox transformation. It is 0 by default, which produces the log transformation. @@ -62,7 +63,7 @@ def boxcox_transform( Returns ------- - dataset: Dataset + dataset: xarray.Dataset Dataset containing the (back-)transformed units. References @@ -146,8 +147,9 @@ def dB_transform( Parameters ---------- - dataset: Dataset - Dataset to be (back-)transformed. + dataset: xarray.Dataset + Dataset to be (back-)transformed as described in the documentation of + :py:mod:`pysteps.io.importers`. threshold: float, optional Optional value that is used for thresholding with the same units as in the dataset. If None, the threshold contained in metadata is used. @@ -161,7 +163,7 @@ def dB_transform( Returns ------- - dataset: Dataset + dataset: xarray.Dataset Dataset containing the (back-)transformed units. """ @@ -223,8 +225,9 @@ def NQ_transform(dataset: xr.Dataset, inverse: bool = False, **kwargs) -> xr.Dat Parameters ---------- - dataset: Dataset - Dataset to be transformed. + dataset: xarray.Dataset + Dataset to be transformed as described in the documentation of + :py:mod:`pysteps.io.importers`. inverse: bool, optional If set to True, it performs the inverse transform. False by default. @@ -238,7 +241,7 @@ def NQ_transform(dataset: xr.Dataset, inverse: bool = False, **kwargs) -> xr.Dat Returns ------- - dataset: Dataset + dataset: xarray.Dataset Dataset containing the (back-)transformed units. References @@ -309,14 +312,15 @@ def sqrt_transform(dataset: xr.Dataset, inverse: bool = False, **kwargs) -> xr.D Parameters ---------- - dataset: Dataset - Dataset to be transformed. + dataset: xarray.Dataset + Dataset to be transformed as described in the documentation of + :py:mod:`pysteps.io.importers`. inverse: bool, optional If set to True, it performs the inverse transform. False by default. Returns ------- - dataset: Dataset + dataset: xarray.Dataset Dataset containing the (back-)transformed units. """ From e7c081c1d71b3ae6093489b70fe1a7a79b526de1 Mon Sep 17 00:00:00 2001 From: mats-knmi <145579783+mats-knmi@users.noreply.github.com> Date: Mon, 2 Sep 2024 14:51:27 +0200 Subject: [PATCH 07/65] make all nowcast methods xarray compatible (#414) * make test steps skill run * undo accidental change * make steps nowcast xarray compatible * wrap all nowcasts in xarray * fix dimension.py tests * update dimension.py to work with new dataarrays * fix test_nowcast_utils tests * update docs and make xarray usage more explicit in nowcasts * update docs and make xarray usage in motion methods more explicit --- pysteps/decorators.py | 10 +- pysteps/io/importers.py | 43 +++-- pysteps/io/readers.py | 16 +- pysteps/motion/constant.py | 26 ++- pysteps/motion/darts.py | 29 ++-- pysteps/motion/lucaskanade.py | 61 +++---- pysteps/motion/proesmans.py | 44 +++-- pysteps/motion/vet.py | 39 +++-- pysteps/nowcasts/anvil.py | 60 ++++--- pysteps/nowcasts/extrapolation.py | 44 +++-- pysteps/nowcasts/lagrangian_probability.py | 46 ++--- pysteps/nowcasts/linda.py | 77 ++++----- pysteps/nowcasts/sprog.py | 62 +++---- pysteps/nowcasts/sseps.py | 78 +++++---- pysteps/nowcasts/steps.py | 57 +++---- pysteps/nowcasts/utils.py | 9 +- pysteps/tests/test_motion_lk.py | 16 +- pysteps/tests/test_nowcasts_anvil.py | 22 ++- .../test_nowcasts_lagrangian_probability.py | 63 +++++-- pysteps/tests/test_nowcasts_linda.py | 91 ++++++---- pysteps/tests/test_nowcasts_sprog.py | 20 +-- pysteps/tests/test_nowcasts_sseps.py | 28 +-- pysteps/tests/test_nowcasts_steps.py | 9 +- pysteps/tests/test_nowcasts_utils.py | 7 +- pysteps/tests/test_utils_dimension.py | 160 ++++++++++++++++-- pysteps/utils/dimension.py | 122 ++++++++++--- pysteps/{converters.py => xarray_helpers.py} | 67 +++++++- 27 files changed, 865 insertions(+), 441 deletions(-) rename pysteps/{converters.py => xarray_helpers.py} (76%) diff --git a/pysteps/decorators.py b/pysteps/decorators.py index ee421977e..69c9945bc 100644 --- a/pysteps/decorators.py +++ b/pysteps/decorators.py @@ -22,7 +22,7 @@ import numpy as np -from pysteps.converters import convert_to_xarray_dataset +from pysteps.xarray_helpers import convert_input_to_xarray_dataset def _add_extra_kwrds_to_docstrings(target_func, extra_kwargs_doc_text): @@ -90,7 +90,9 @@ def _import_with_postprocessing(*args, **kwargs): mask = ~np.isfinite(precip) precip[mask] = _fillna - return convert_to_xarray_dataset(precip.astype(_dtype), quality, metadata) + return convert_input_to_xarray_dataset( + precip.astype(_dtype), quality, metadata + ) extra_kwargs_doc = """ Other Parameters @@ -126,7 +128,9 @@ def new_function(*args, **kwargs): target motion_method_func function. """ - input_images = args[0] + dataset = args[0] + precip_var = dataset.attrs["precip_var"] + input_images = dataset[precip_var].values if input_images.ndim != 3: raise ValueError( "input_images dimension mismatch.\n" diff --git a/pysteps/io/importers.py b/pysteps/io/importers.py index f61d4b25b..71fe6dc78 100644 --- a/pysteps/io/importers.py +++ b/pysteps/io/importers.py @@ -74,22 +74,27 @@ .. tabularcolumns:: |p{2cm}|L| -+---------------+-------------------------------------------------------------------------------------------+ -| Coordinate | Description | -+===============+===========================================================================================+ -| y | y-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` | -+---------------+-------------------------------------------------------------------------------------------+ -| x | x-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` | -+---------------+-------------------------------------------------------------------------------------------+ -| lat | latitude coordinate in degrees | -+---------------+-------------------------------------------------------------------------------------------+ -| lon | longitude coordinate in degrees | -+---------------+-------------------------------------------------------------------------------------------+ -| time | forecast time in seconds since forecast start time | -+---------------+-------------------------------------------------------------------------------------------+ -| member | ensemble member number (integer) | -+---------------+-------------------------------------------------------------------------------------------+ - ++--------------------+-------------------------------------------------------------------------------------------+ +| Coordinate | Description | ++====================+===========================================================================================+ +| y | y-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` | ++--------------------+-------------------------------------------------------------------------------------------+ +| x | x-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` | ++--------------------+-------------------------------------------------------------------------------------------+ +| lat | latitude coordinate in degrees | ++--------------------+-------------------------------------------------------------------------------------------+ +| lon | longitude coordinate in degrees | ++--------------------+-------------------------------------------------------------------------------------------+ +| time | forecast time in seconds since forecast start time | ++--------------------+-------------------------------------------------------------------------------------------+ +| ens_number | ensemble member number (integer) | ++--------------------+-------------------------------------------------------------------------------------------+ +| direction | used by proesmans to return the forward and backward advection and consistency fields | ++--------------------+-------------------------------------------------------------------------------------------+ + +The time, x and y dimensions all MUST be regularly spaced, with the stepsize included +in a ``stepsize`` attribute. The stepsize is given in the unit of the dimension (this +is alwyas seconds for the time dimension). The dataset can contain the following data variables: @@ -102,8 +107,14 @@ | precip_accum | precip_intensity if unit is ``mm/h``, precip_accum if unit is ``mm`` and reflectivity if unit is ``dBZ``, | | or reflectivity | the attributes of this variable contain metadata relevant to this attribute (see below) | +-------------------+-----------------------------------------------------------------------------------------------------------+ +| velocity_x | x-component of the advection field in cartesian_unit per timestep | ++-------------------+-----------------------------------------------------------------------------------------------------------+ +| velocity_y | y-component of the advection field in cartesian_unit per timestep | ++-------------------+-----------------------------------------------------------------------------------------------------------+ | quality | value between 0 and 1 denoting the quality of the precipitation data, currently not used for anything | +-------------------+-----------------------------------------------------------------------------------------------------------+ +| velocity_quality | value between 0 and 1 denoting the quality of the velocity data, currently only returned by proesmans | ++-------------------+-----------------------------------------------------------------------------------------------------------+ Some of the metadata in the metadata dictionary is not explicitely stored in the dataset, but is still implicitly present. For example ``x1`` can easily be found by taking the first diff --git a/pysteps/io/readers.py b/pysteps/io/readers.py index 7295b5ff7..30c4d4fc0 100644 --- a/pysteps/io/readers.py +++ b/pysteps/io/readers.py @@ -15,7 +15,7 @@ import xarray as xr -def read_timeseries(inputfns, importer, **kwargs) -> xr.Dataset | None: +def read_timeseries(inputfns, importer, timestep=None, **kwargs) -> xr.Dataset | None: """ Read a time series of input files using the methods implemented in the :py:mod:`pysteps.io.importers` module and stack them into a 3d xarray @@ -28,6 +28,9 @@ def read_timeseries(inputfns, importer, **kwargs) -> xr.Dataset | None: :py:mod:`pysteps.io.archive` module. importer: function A function implemented in the :py:mod:`pysteps.io.importers` module. + timestep: int, optional + The timestep in seconds, this value is optional if more than 1 inputfns + are given. kwargs: dict Optional keyword arguments for the importer. @@ -58,6 +61,16 @@ def read_timeseries(inputfns, importer, **kwargs) -> xr.Dataset | None: return None startdate = min(inputfns[1]) + sorted_dates = sorted(inputfns[1]) + timestep_dates = int((sorted_dates[1] - sorted_dates[0]).total_seconds()) + + if timestep is None: + timestep = timestep_dates + if timestep != timestep_dates: + raise ValueError("given timestep does not match inputfns") + for i in range(len(sorted_dates) - 1): + if int((sorted_dates[i + 1] - sorted_dates[i]).total_seconds()) != timestep: + raise ValueError("supplied dates are not evenly spaced") datasets = [] for i, ifn in enumerate(inputfns[0]): @@ -73,6 +86,7 @@ def read_timeseries(inputfns, importer, **kwargs) -> xr.Dataset | None: { "long_name": "forecast time", "units": f"seconds since {startdate:%Y-%m-%d %H:%M:%S}", + "stepsize": timestep, }, ) ) diff --git a/pysteps/motion/constant.py b/pysteps/motion/constant.py index a5c153616..a26831ac0 100644 --- a/pysteps/motion/constant.py +++ b/pysteps/motion/constant.py @@ -14,27 +14,32 @@ import numpy as np import scipy.optimize as op +import xarray as xr from scipy.ndimage import map_coordinates -def constant(R, **kwargs): +def constant(dataset: xr.Dataset, **kwargs): """ Compute a constant advection field by finding a translation vector that maximizes the correlation between two successive images. Parameters ---------- - R: array_like - Array of shape (T,m,n) containing a sequence of T two-dimensional input - images of shape (m,n). If T > 2, two last elements along axis 0 are used. + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable. + The dataset has to have a time dimension. If the size of this dimension + is larger than 2, the last 2 entries of this dimension are used. Returns ------- - out: array_like - The constant advection field having shape (2, m, n), where out[0, :, :] - contains the x-components of the motion vectors and out[1, :, :] - contains the y-components. + out: xarray.Dataset + The input dataset with the constant advection field added in the ``velocity_x`` + and ``velocity_y`` data variables. """ + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + R = dataset[precip_var].values m, n = R.shape[1:] X, Y = np.meshgrid(np.arange(n), np.arange(m)) @@ -51,4 +56,7 @@ def f(v): options = {"initial_simplex": (np.array([(0, 1), (1, 0), (1, 1)]))} result = op.minimize(f, (1, 1), method="Nelder-Mead", options=options) - return np.stack([-result.x[0] * np.ones((m, n)), -result.x[1] * np.ones((m, n))]) + output = np.stack([-result.x[0] * np.ones((m, n)), -result.x[1] * np.ones((m, n))]) + dataset["velocity_x"] = (["y", "x"], output[0]) + dataset["velocity_y"] = (["y", "x"], output[1]) + return dataset diff --git a/pysteps/motion/darts.py b/pysteps/motion/darts.py index 4e5050d48..4aac80cd3 100644 --- a/pysteps/motion/darts.py +++ b/pysteps/motion/darts.py @@ -11,8 +11,10 @@ DARTS """ -import numpy as np import time + +import numpy as np +import xarray as xr from numpy.linalg import lstsq, svd from pysteps import utils @@ -20,16 +22,17 @@ @check_input_frames(just_ndim=True) -def DARTS(input_images, **kwargs): +def DARTS(dataset: xr.Dataset, **kwargs): """ Compute the advection field from a sequence of input images by using the DARTS method. :cite:`RCW2011` Parameters ---------- - input_images: array-like - Array of shape (T,m,n) containing a sequence of T two-dimensional input - images of shape (m,n). + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable. + The dataset has to have a time dimension. Other Parameters ---------------- @@ -67,13 +70,15 @@ def DARTS(input_images, **kwargs): Returns ------- - out: ndarray - Three-dimensional array (2,m,n) containing the dense x- and y-components - of the motion field in units of pixels / timestep as given by the input - array R. + out: xarray.Dataset + The input dataset with the advection field added in the ``velocity_x`` + and ``velocity_y`` data variables. """ + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + input_images = dataset[precip_var].values N_x = kwargs.get("N_x", 50) N_y = kwargs.get("N_y", 50) N_t = kwargs.get("N_t", 4) @@ -214,10 +219,14 @@ def DARTS(input_images, **kwargs): fft.ifft2(_fill(V, input_images.shape[0], input_images.shape[1], k_x, k_y)) ) + output = np.stack([U, V]) + dataset["velocity_x"] = (["y", "x"], output[0]) + dataset["velocity_y"] = (["y", "x"], output[1]) + if verbose: print("--- %s seconds ---" % (time.time() - t0)) - return np.stack([U, V]) + return dataset def _leastsq(A, B, y): diff --git a/pysteps/motion/lucaskanade.py b/pysteps/motion/lucaskanade.py index 133f860b7..b7a51a26b 100644 --- a/pysteps/motion/lucaskanade.py +++ b/pysteps/motion/lucaskanade.py @@ -22,22 +22,22 @@ dense_lucaskanade """ +import time + import numpy as np +import xarray as xr from numpy.ma.core import MaskedArray +from pysteps import feature, utils from pysteps.decorators import check_input_frames - -from pysteps import utils, feature from pysteps.tracking.lucaskanade import track_features from pysteps.utils.cleansing import decluster, detect_outliers from pysteps.utils.images import morph_opening -import time - @check_input_frames(2) def dense_lucaskanade( - input_images, + dataset: xr.Dataset, lk_kwargs=None, fd_method="shitomasi", fd_kwargs=None, @@ -73,18 +73,14 @@ def dense_lucaskanade( Parameters ---------- - input_images: ndarray_ or MaskedArray_ - Array of shape (T, m, n) containing a sequence of *T* two-dimensional - input images of shape (m, n). The indexing order in **input_images** is - assumed to be (time, latitude, longitude). - - *T* = 2 is the minimum required number of images. - With *T* > 2, all the resulting sparse vectors are pooled together for - the final interpolation on a regular grid. - - In case of ndarray_, invalid values (Nans or infs) are masked, - otherwise the mask of the MaskedArray_ is used. Such mask defines a - region where features are not detected for the tracking algorithm. + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable. + The dataset has to have a time dimension. The size of the time dimension needs to + be at least 2. If it is larger than 2, all the resulting sparse vectors are pooled + together for the final interpolation on a regular grid. Invalid values (Nans or infs) + are masked. This mask defines a region where features are not detected for the tracking + algorithm. lk_kwargs: dict, optional Optional dictionary containing keyword arguments for the `Lucas-Kanade`_ @@ -151,14 +147,10 @@ def dense_lucaskanade( Returns ------- - out: ndarray_ or tuple - If **dense=True** (the default), return the advection field having shape - (2, m, n), where out[0, :, :] contains the x-components of the motion - vectors and out[1, :, :] contains the y-components. - The velocities are in units of pixels / timestep, where timestep is the - time difference between the two input images. - Return a zero motion field of shape (2, m, n) when no motion is - detected. + out: xarray.Dataset or tuple + If **dense=True** (the default), return the input dataset with the advection + field added in the ``velocity_x`` and ``velocity_y`` data variables. + Return a zero motion field when no motion is detected. If **dense=False**, it returns a tuple containing the 2-dimensional arrays **xy** and **uv**, where x, y define the vector locations, @@ -179,7 +171,9 @@ def dense_lucaskanade( Understanding Workshop, pp. 121–130, 1981. """ - input_images = input_images.copy() + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + input_images = dataset[precip_var].values if verbose: print("Computing the motion field with the Lucas-Kanade method.") @@ -244,7 +238,10 @@ def dense_lucaskanade( # return zero motion field is no sparse vectors are found if xy.shape[0] == 0: if dense: - return np.zeros((2, domain_size[0], domain_size[1])) + uvgrid = np.zeros((2, domain_size[0], domain_size[1])) + dataset["velocity_x"] = (["y", "x"], uvgrid[0]) + dataset["velocity_y"] = (["y", "x"], uvgrid[1]) + return dataset else: return xy, uv @@ -266,14 +263,20 @@ def dense_lucaskanade( # return zero motion field if no sparse vectors are left for interpolation if xy.shape[0] == 0: - return np.zeros((2, domain_size[0], domain_size[1])) + uvgrid = np.zeros((2, domain_size[0], domain_size[1])) + dataset["velocity_x"] = (["y", "x"], uvgrid[0]) + dataset["velocity_y"] = (["y", "x"], uvgrid[1]) + return dataset # interpolation xgrid = np.arange(domain_size[1]) ygrid = np.arange(domain_size[0]) uvgrid = interpolation_method(xy, uv, xgrid, ygrid, **interp_kwargs) + dataset["velocity_x"] = (["y", "x"], uvgrid[0]) + dataset["velocity_y"] = (["y", "x"], uvgrid[1]) + if verbose: print("--- total time: %.2f seconds ---" % (time.time() - t0)) - return uvgrid + return dataset diff --git a/pysteps/motion/proesmans.py b/pysteps/motion/proesmans.py index 8760092ba..4b122a620 100644 --- a/pysteps/motion/proesmans.py +++ b/pysteps/motion/proesmans.py @@ -12,6 +12,7 @@ """ import numpy as np +import xarray as xr from scipy.ndimage import gaussian_filter from pysteps.decorators import check_input_frames @@ -20,7 +21,7 @@ @check_input_frames(2, 2) def proesmans( - input_images, + dataset: xr.Dataset, lam=50.0, num_iter=100, num_levels=6, @@ -34,8 +35,11 @@ def proesmans( Parameters ---------- - input_images: array_like - Array of shape (2, m, n) containing the first and second input image. + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable. + The dataset has to have a time dimension. The size of this dimension + has to be 2. lam: float Multiplier of the smoothness term. Smaller values give a smoother motion field. @@ -49,22 +53,20 @@ def proesmans( verbose: bool, optional Verbosity enabled if True (default). full_output: bool, optional - If True, the output is a two-element tuple containing the - forward-backward advection and consistency fields. The first element - is shape (2, 2, m, n), where the index along the first dimension refers - to the forward and backward advection fields. The second element is an - array of shape (2, m, n), where the index along the first dimension - refers to the forward and backward consistency fields. - Default: False. + If True, both the forward and backwards advection fields are returned + and the consistency fields are returned as well in the ``velocity_quality`` + data variable. Returns ------- out: ndarray - If full_output=False, the advection field having shape (2, m, n), where - out[0, :, :] contains the x-components of the motion vectors and - out[1, :, :] contains the y-components. The velocities are in units of - pixels / timestep, where timestep is the time difference between the - two input images. + The input dataset with the advection field added in the ``velocity_x`` + and ``velocity_y`` data variables. + + If full_output=True, a ``velocity_direction`` dimension + is added to the dataset, so that the velocity data can be returned containing + the forward and backwards advection fields. Also the ``velocity_quality`` data + coordinate is present containing the forward and backward consistency fields. References ---------- @@ -73,6 +75,9 @@ def proesmans( """ del verbose # Not used + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + input_images = dataset[precip_var].values im1 = input_images[-2, :, :].copy() im2 = input_images[-1, :, :].copy() @@ -89,6 +94,11 @@ def proesmans( advfield, quality = _compute_advection_field(im, lam, num_iter, num_levels) if not full_output: - return advfield[0] + dataset["velocity_x"] = (["y", "x"], advfield[0, 0]) + dataset["velocity_y"] = (["y", "x"], advfield[0, 1]) else: - return advfield, quality + dataset["velocity_x"] = (["direction", "y", "x"], advfield[:, 0]) + dataset["velocity_y"] = (["direction", "y", "x"], advfield[:, 1]) + dataset["velocity_quality"] = (["direction", "y", "x"], quality) + + return dataset diff --git a/pysteps/motion/vet.py b/pysteps/motion/vet.py index 391ebe189..f30703bee 100644 --- a/pysteps/motion/vet.py +++ b/pysteps/motion/vet.py @@ -35,12 +35,13 @@ """ import numpy +import xarray as xr from numpy.ma.core import MaskedArray from scipy.ndimage import zoom from scipy.optimize import minimize from pysteps.decorators import check_input_frames -from pysteps.motion._vet import _warp, _cost_function +from pysteps.motion._vet import _cost_function, _warp def round_int(scalar): @@ -301,7 +302,7 @@ def vet_cost_function( @check_input_frames(2, 3) def vet( - input_images, + dataset: xr.Dataset, sectors=((32, 16, 4, 2), (32, 16, 4, 2)), smooth_gain=1e6, first_guess=None, @@ -366,15 +367,13 @@ def vet( Parameters ---------- - input_images: ndarray_ or MaskedArray - Input images, sequence of 2D arrays, or 3D arrays. - The first dimension represents the images time dimension. - - The template_image (first element in first dimensions) denotes the + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable. + The dataset has to have a time dimension. The size of this dimension + has to be 2. The first element in the time dimension denotes the reference image used to obtain the displacement (2D array). The second is the target image. - - The expected dimensions are (2,ni,nj). sectors: list or array, optional Number of sectors on each dimension used in the scaling procedure. If dimension is 1, the same sectors will be used both image dimensions @@ -411,13 +410,11 @@ def vet( Returns ------- - displacement_field: ndarray_ - Displacement Field (2D array representing the transformation) that - warps the template image into the input image. - The dimensions are (2,ni,nj), where the first - dimension indicates the displacement along x (0) or y (1) in units of - pixels / timestep as given by the input_images array. - intermediate_steps: list of ndarray_ + out: xarray.Dataset + The input dataset with the displacement field that + warps the template image into the input image added in the ``velocity_x`` + and ``velocity_y`` data variables. + intermediate_steps: list of ndarray_, optional List with the first guesses obtained during the scaling procedure. References @@ -437,6 +434,9 @@ def vet( Nocedal, J, and S J Wright. 2006. Numerical Optimization. Springer New York. """ + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + input_images = dataset[precip_var].values if verbose: def debug_print(*args, **kwargs): @@ -642,7 +642,10 @@ def debug_print(*args, **kwargs): if padding > 0: first_guess = first_guess[:, padding:-padding, padding:-padding] + dataset["velocity_x"] = (["y", "x"], first_guess[0]) + dataset["velocity_y"] = (["y", "x"], first_guess[1]) + if intermediate_steps: - return first_guess, scaling_guesses + return dataset, scaling_guesses - return first_guess + return dataset diff --git a/pysteps/nowcasts/anvil.py b/pysteps/nowcasts/anvil.py index f5af038bb..88ed6b0af 100644 --- a/pysteps/nowcasts/anvil.py +++ b/pysteps/nowcasts/anvil.py @@ -21,11 +21,13 @@ import time import numpy as np +import xarray as xr from scipy.ndimage import gaussian_filter from pysteps import cascade, extrapolation, utils from pysteps.nowcasts.utils import nowcast_main_loop from pysteps.timeseries import autoregression +from pysteps.xarray_helpers import convert_output_to_xarray_dataset try: import dask @@ -36,10 +38,8 @@ def forecast( - vil, - velocity, + dataset: xr.Dataset, timesteps, - rainrate=None, n_cascade_levels=6, extrap_method="semilagrangian", ar_order=2, @@ -70,22 +70,21 @@ def forecast( Parameters ---------- - vil: array_like - Array of shape (ar_order+2,m,n) containing the input fields ordered by - timestamp from oldest to newest. The inputs are expected to contain VIL - or rain rate. The time steps between the inputs are assumed to be regular. - velocity: array_like - Array of shape (2,m,n) containing the x- and y-components of the - advection field. The velocities are assumed to represent one time step - between the inputs. All values are required to be finite. + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and + ``velocity_y`` data variables, as well as either VIL values in the + ``precip_accum`` data variable or rainrate in the ``precip_intensity`` + data variable. The time dimension of the dataset has to be size + ``ar_order + 2`` and the precipitation variable has to have this dimension. + When VIL values are supplied, optionally ``precip_accum`` can be supplied + as well without a time dimension, containing the most recently observed rain + rate field. If not supplied, no R(VIL) conversion is done and the outputs + are in the same units as the inputs. timesteps: int or list of floats Number of time steps to forecast or a list of time steps for which the forecasts are computed (relative to the input time step). The elements of the list are required to be in ascending order. - rainrate: array_like - Array of shape (m,n) containing the most recently observed rain rate - field. If set to None, no R(VIL) conversion is done and the outputs - are in the same units as the inputs. n_cascade_levels: int, optional The number of cascade levels to use. Defaults to 6, see issue #385 on GitHub. @@ -128,18 +127,28 @@ def forecast( Returns ------- - out: ndarray - A three-dimensional array of shape (num_timesteps,m,n) containing a time - series of forecast precipitation fields. The time series starts from - t0+timestep, where timestep is taken from the input VIL/rain rate - fields. If measure_time is True, the return value is a three-element - tuple containing the nowcast array, the initialization time of the - nowcast generator and the time used in the main loop (seconds). + out: xarray.Dataset + If return_output is True, a dataset as described in the documentation of + :py:mod:`pysteps.io.importers` is returned containing a time series of forecast + precipitation fields. Otherwise, a None value + is returned. The time series starts from t0+timestep, where timestep is + taken from the metadata of the time coordinate. If measure_time is True, the + return value is a three-element tuple containing the nowcast dataset, the + initialization time of the nowcast generator and the time used in the + main loop (seconds). References ---------- :cite:`PCLH2020` """ + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + vil = dataset[precip_var].values + velocity = np.stack([dataset["velocity_x"], dataset["velocity_y"]]) + rainrate = None + if precip_var == "precip_intensity" and "precip_accum" in dataset: + rainrate = dataset["precip_accum"].values + _check_inputs(vil, rainrate, velocity, timesteps, ar_order) if extrap_kwargs is None: @@ -292,8 +301,6 @@ def worker(vil, i): print("Starting nowcast computation.") - rainrate_f = [] - extrap_kwargs["return_displacement"] = True state = {"vil_dec": vil_dec} @@ -323,10 +330,11 @@ def worker(vil, i): if measure_time: rainrate_f, mainloop_time = rainrate_f + output_dataset = convert_output_to_xarray_dataset(dataset, timesteps, rainrate_f) if measure_time: - return np.stack(rainrate_f), init_time, mainloop_time + return output_dataset, init_time, mainloop_time else: - return np.stack(rainrate_f) + return output_dataset def _check_inputs(vil, rainrate, velocity, timesteps, ar_order): diff --git a/pysteps/nowcasts/extrapolation.py b/pysteps/nowcasts/extrapolation.py index 143a39d7c..a70b6985c 100644 --- a/pysteps/nowcasts/extrapolation.py +++ b/pysteps/nowcasts/extrapolation.py @@ -11,14 +11,16 @@ """ import time + import numpy as np +import xarray as xr from pysteps import extrapolation +from pysteps.xarray_helpers import convert_output_to_xarray_dataset def forecast( - precip, - velocity, + dataset: xr.Dataset, timesteps, extrap_method="semilagrangian", extrap_kwargs=None, @@ -32,13 +34,11 @@ def forecast( Parameters ---------- - precip: array-like - Two-dimensional array of shape (m,n) containing the input precipitation - field. - velocity: array-like - Array of shape (2,m,n) containing the x- and y-components of the - advection field. The velocities are assumed to represent one time step - between the inputs. + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and + ``velocity_y`` data variables, as well as any pecipitation data variable. + It should contain a time dimension of size 1. timesteps: int or list of floats Number of time steps to forecast or a list of time steps for which the forecasts are computed (relative to the input time step). The elements @@ -54,18 +54,25 @@ def forecast( Returns ------- - out: ndarray_ - Three-dimensional array of shape (num_timesteps, m, n) containing a time - series of nowcast precipitation fields. The time series starts from - t0 + timestep, where timestep is taken from the advection field velocity. - If *measure_time* is True, the return value is a two-element tuple - containing this array and the computation time (seconds). + out: xarray.Dataset + If return_output is True, a dataset as described in the documentation of + :py:mod:`pysteps.io.importers` is returned containing a time series of forecast + precipitation fields. Otherwise, a None value + is returned. The time series starts from t0+timestep, where timestep is + taken from the metadata of the time coordinate. If measure_time is True, the + return value is a three-element tuple containing the nowcast dataset, the + initialization time of the nowcast generator and the time used in the + main loop (seconds). See also -------- pysteps.extrapolation.interface """ + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + precip = dataset[precip_var].values[0] + velocity = np.stack([dataset["velocity_x"], dataset["velocity_y"]]) _check_inputs(precip, velocity, timesteps) if extrap_kwargs is None: @@ -95,10 +102,13 @@ def forecast( computation_time = time.time() - start_time print(f"{computation_time:.2f} seconds.") + output_dataset = convert_output_to_xarray_dataset( + dataset, timesteps, precip_forecast + ) if measure_time: - return precip_forecast, computation_time + return output_dataset, computation_time else: - return precip_forecast + return output_dataset def _check_inputs(precip, velocity, timesteps): diff --git a/pysteps/nowcasts/lagrangian_probability.py b/pysteps/nowcasts/lagrangian_probability.py index 727e94806..7bae440cc 100644 --- a/pysteps/nowcasts/lagrangian_probability.py +++ b/pysteps/nowcasts/lagrangian_probability.py @@ -12,20 +12,20 @@ """ import numpy as np +import xarray as xr from scipy.signal import convolve from pysteps.nowcasts import extrapolation def forecast( - precip, - velocity, + dataset: xr.Dataset, timesteps, threshold, extrap_method="semilagrangian", extrap_kwargs=None, slope=5, -): +) -> xr.Dataset: """ Generate a probability nowcast by a local lagrangian approach. The ouput is the probability of exceeding a given intensity threshold, i.e. @@ -33,13 +33,11 @@ def forecast( Parameters ---------- - precip: array_like - Two-dimensional array of shape (m,n) containing the input precipitation - field. - velocity: array_like - Array of shape (2,m,n) containing the x- and y-components of the - advection field. The velocities are assumed to represent one time step - between the inputs. + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and + ``velocity_y`` data variables, as well as any pecipitation data variable. + It should contain a time dimension of size 1. timesteps: int or list of floats Number of time steps to forecast or a sorted list of time steps for which the forecasts are computed (relative to the input time step). @@ -54,10 +52,15 @@ def forecast( Returns ------- - out: ndarray - Three-dimensional array of shape (num_timesteps, m, n) containing a time - series of nowcast exceedence probabilities. The time series starts from - t0 + timestep, where timestep is taken from the advection field velocity. + out: xarray.Dataset + If return_output is True, a dataset as described in the documentation of + :py:mod:`pysteps.io.importers` is returned containing a time series of forecast + precipitation fields. Otherwise, a None value + is returned. The time series starts from t0+timestep, where timestep is + taken from the metadata of the time coordinate. If measure_time is True, the + return value is a three-element tuple containing the nowcast dataset, the + initialization time of the nowcast generator and the time used in the + main loop (seconds). References ---------- @@ -68,16 +71,14 @@ def forecast( """ # Compute deterministic extrapolation forecast if isinstance(timesteps, int) and timesteps > 0: - timesteps = np.arange(1, timesteps + 1) + timesteps = list(range(1, timesteps + 1)) elif not isinstance(timesteps, list): raise ValueError(f"invalid value for argument 'timesteps': {timesteps}") - precip_forecast = extrapolation.forecast( - precip, - velocity, - timesteps, - extrap_method, - extrap_kwargs, + dataset_forecast = extrapolation.forecast( + dataset, timesteps, extrap_method, extrap_kwargs ) + precip_var = dataset_forecast.attrs["precip_var"] + precip_forecast = dataset_forecast[precip_var].values # Ignore missing values nanmask = np.isnan(precip_forecast) @@ -104,7 +105,8 @@ def forecast( precip_forecast[i, ...] /= kernel_sum precip_forecast = np.clip(precip_forecast, 0, 1) precip_forecast[nanmask] = np.nan - return precip_forecast + dataset_forecast[precip_var].data[:] = precip_forecast + return dataset_forecast def _get_kernel(size): diff --git a/pysteps/nowcasts/linda.py b/pysteps/nowcasts/linda.py index 7d737eaa9..2fc5dfd70 100644 --- a/pysteps/nowcasts/linda.py +++ b/pysteps/nowcasts/linda.py @@ -40,6 +40,8 @@ import time import warnings +from pysteps.xarray_helpers import convert_output_to_xarray_dataset + try: import dask @@ -47,28 +49,19 @@ except ImportError: DASK_IMPORTED = False import numpy as np +import xarray as xr +from scipy import optimize as opt +from scipy import stats from scipy.integrate import nquad from scipy.interpolate import interp1d -from scipy import optimize as opt from scipy.signal import convolve -from scipy import stats from pysteps import extrapolation, feature, noise -from pysteps.decorators import deprecate_args from pysteps.nowcasts.utils import nowcast_main_loop -@deprecate_args( - { - "precip_fields": "precip", - "advection_field": "velocity", - "num_ens_members": "n_ens_members", - }, - "1.8.0", -) def forecast( - precip, - velocity, + dataset: xr.Dataset, timesteps, feature_method="blob", max_num_features=25, @@ -100,15 +93,13 @@ def forecast( Parameters ---------- - precip: array_like - Array of shape (ari_order + 2, m, n) containing the input rain rate - or reflectivity fields (in linear scale) ordered by timestamp from - oldest to newest. The time steps between the inputs are assumed to be - regular. - velocity: array_like - Array of shape (2, m, n) containing the x- and y-components of the - advection field. The velocities are assumed to represent one time step - between the inputs. + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and + ``velocity_y`` data variables, as well as either reflectivity values in the + ``reflectivity`` data variable (in linear scale) or rainrate in the ``precip_intensity`` + data variable. The time dimension of the dataset has to be size + ``ari_order + 2`` and the precipitation variable has to have this dimension. timesteps: int Number of time steps to forecast. feature_method: {'blob', 'domain' 'shitomasi'} @@ -202,16 +193,15 @@ def forecast( Returns ------- - out: numpy.ndarray - A four-dimensional array of shape (n_ens_members, timesteps, m, n) - containing a time series of forecast precipitation fields for each - ensemble member. If add_perturbations is False, the first dimension is - dropped. The time series starts from t0 + timestep, where timestep is - taken from the input fields. If measure_time is True, the return value - is a three-element tuple containing the nowcast array, the initialization - time of the nowcast generator and the time used in the main loop - (seconds). If return_output is set to False, a single None value is - returned instead. + out: xarray.Dataset + If return_output is True, a dataset as described in the documentation of + :py:mod:`pysteps.io.importers` is returned containing a time series of forecast + precipitation fields for each ensemble member. Otherwise, a None value + is returned. The time series starts from t0+timestep, where timestep is + taken from the metadata of the time coordinate. If measure_time is True, the + return value is a three-element tuple containing the nowcast dataset, the + initialization time of the nowcast generator and the time used in the + main loop (seconds). Notes ----- @@ -224,6 +214,10 @@ def forecast( variable OMP_NUM_THREADS to 1. This avoids slowdown caused by too many simultaneous threads. """ + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + precip = dataset[precip_var].values + velocity = np.stack([dataset["velocity_x"], dataset["velocity_y"]]) _check_inputs(precip, velocity, timesteps, ari_order) if feature_kwargs is None: @@ -363,14 +357,21 @@ def forecast( callback, ) - if return_output: - if measure_time: - return precip_forecast[0], init_time, precip_forecast[1] - else: - return precip_forecast - else: + if not return_output: return None + if measure_time: + precip_forecast, mainloop_time = precip_forecast + + output_dataset = convert_output_to_xarray_dataset( + dataset, timesteps, precip_forecast + ) + + if measure_time: + return output_dataset, init_time, mainloop_time + else: + return output_dataset + def _check_inputs(precip, velocity, timesteps, ari_order): if ari_order not in [1, 2]: diff --git a/pysteps/nowcasts/sprog.py b/pysteps/nowcasts/sprog.py index 86c840dcb..2ebcfde41 100644 --- a/pysteps/nowcasts/sprog.py +++ b/pysteps/nowcasts/sprog.py @@ -10,17 +10,17 @@ forecast """ -import numpy as np import time -from pysteps import cascade -from pysteps import extrapolation -from pysteps import utils -from pysteps.decorators import deprecate_args +import numpy as np +import xarray as xr + +from pysteps import cascade, extrapolation, utils from pysteps.nowcasts import utils as nowcast_utils +from pysteps.nowcasts.utils import compute_percentile_mask, nowcast_main_loop from pysteps.postprocessing import probmatching from pysteps.timeseries import autoregression, correlation -from pysteps.nowcasts.utils import compute_percentile_mask, nowcast_main_loop +from pysteps.xarray_helpers import convert_output_to_xarray_dataset try: import dask @@ -30,10 +30,8 @@ DASK_IMPORTED = False -@deprecate_args({"R": "precip", "V": "velocity", "R_thr": "precip_thr"}, "1.8.0") def forecast( - precip, - velocity, + dataset: xr.Dataset, timesteps, precip_thr=None, n_cascade_levels=6, @@ -55,15 +53,13 @@ def forecast( Parameters ---------- - precip: array-like - Array of shape (ar_order+1,m,n) containing the input precipitation fields - ordered by timestamp from oldest to newest. The time steps between - the inputs are assumed to be regular. - velocity: array-like - Array of shape (2,m,n) containing the x- and y-components of the - advection field. - The velocities are assumed to represent one time step between the - inputs. All values are required to be finite. + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and + ``velocity_y`` data variables, as well as any precipitation data variable. + The time dimension of the dataset has to be size + ``ar_order + 1`` and the precipitation variable has to have this dimension. All + velocity values are required to be finite. timesteps: int or list of floats Number of time steps to forecast or a list of time steps for which the forecasts are computed (relative to the input time step). The elements @@ -120,13 +116,15 @@ def forecast( Returns ------- - out: ndarray - A three-dimensional array of shape (num_timesteps,m,n) containing a time - series of forecast precipitation fields. The time series starts from - t0+timestep, where timestep is taken from the input precipitation fields - precip. If measure_time is True, the return value is a three-element - tuple containing the nowcast array, the initialization time of the - nowcast generator and the time used in the main loop (seconds). + out: xarray.Dataset + If return_output is True, a dataset as described in the documentation of + :py:mod:`pysteps.io.importers` is returned containing a time series of forecast + precipitation fields. Otherwise, a None value + is returned. The time series starts from t0+timestep, where timestep is + taken from the metadata of the time coordinate. If measure_time is True, the + return value is a three-element tuple containing the nowcast dataset, the + initialization time of the nowcast generator and the time used in the + main loop (seconds). See also -------- @@ -137,6 +135,10 @@ def forecast( :cite:`Seed2003`, :cite:`PCH2019a` """ + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + precip = dataset[precip_var].values + velocity = np.stack([dataset["velocity_x"], dataset["velocity_y"]]) _check_inputs(precip, velocity, timesteps, ar_order) if extrap_kwargs is None: @@ -327,8 +329,6 @@ def f(precip, i): print("Starting nowcast computation.") - precip_forecast = [] - state = {"precip_cascades": precip_cascades, "precip_decomp": precip_decomp} params = { "domain": domain, @@ -358,12 +358,14 @@ def f(precip, i): if measure_time: precip_forecast, mainloop_time = precip_forecast - precip_forecast = np.stack(precip_forecast) + output_dataset = convert_output_to_xarray_dataset( + dataset, timesteps, precip_forecast + ) if measure_time: - return precip_forecast, init_time, mainloop_time + return output_dataset, init_time, mainloop_time else: - return precip_forecast + return output_dataset def _check_inputs(precip, velocity, timesteps, ar_order): diff --git a/pysteps/nowcasts/sseps.py b/pysteps/nowcasts/sseps.py index a8848d3e3..1d083c04b 100644 --- a/pysteps/nowcasts/sseps.py +++ b/pysteps/nowcasts/sseps.py @@ -18,18 +18,17 @@ forecast """ -import numpy as np import time -from scipy.ndimage import generate_binary_structure, iterate_structure +import numpy as np +import xarray as xr +from scipy.ndimage import generate_binary_structure, iterate_structure -from pysteps import cascade -from pysteps import extrapolation -from pysteps import noise -from pysteps.decorators import deprecate_args +from pysteps import cascade, extrapolation, noise from pysteps.nowcasts import utils as nowcast_utils from pysteps.postprocessing import probmatching from pysteps.timeseries import autoregression, correlation +from pysteps.xarray_helpers import convert_output_to_xarray_dataset try: import dask @@ -39,11 +38,8 @@ dask_imported = False -@deprecate_args({"R": "precip", "V": "velocity"}, "1.8.0") def forecast( - precip, - metadata, - velocity, + dataset: xr.Dataset, timesteps, n_ens_members=24, n_cascade_levels=6, @@ -78,18 +74,14 @@ def forecast( Parameters ---------- - precip: array-like - Array of shape (ar_order+1,m,n) containing the input precipitation fields - ordered by timestamp from oldest to newest. The time steps between the inputs - are assumed to be regular, and the inputs are required to have finite values. - metadata: dict - Metadata dictionary containing the accutime, xpixelsize, threshold and - zerovalue attributes as described in the documentation of - :py:mod:`pysteps.io.importers`. xpixelsize is assumed to be in meters. - velocity: array-like - Array of shape (2,m,n) containing the x- and y-components of the advection - field. The velocities are assumed to represent one time step between the - inputs. All values are required to be finite. + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and + ``velocity_y`` data variables, as well as any precipitation data variable. + The units and stepsize of ``y`` and ``x`` have to be the same and the only supported + units are meters and kilometers. The time dimension of the dataset has to be size + ``ar_order + 1`` and the precipitation variable has to have this dimension. All + velocity values are required to be finite. win_size: int or two-element sequence of ints Size-length of the localization window. overlap: float [0,1[ @@ -181,12 +173,15 @@ def forecast( Returns ------- - out: ndarray - If return_output is True, a four-dimensional array of shape - (n_ens_members,num_timesteps,m,n) containing a time series of forecast + out: xarray.Dataset + If return_output is True, a dataset as described in the documentation of + :py:mod:`pysteps.io.importers` is returned containing a time series of forecast precipitation fields for each ensemble member. Otherwise, a None value is returned. The time series starts from t0+timestep, where timestep is - taken from the input precipitation fields. + taken from the metadata of the time coordinate. If measure_time is True, the + return value is a three-element tuple containing the nowcast dataset, the + initialization time of the nowcast generator and the time used in the + main loop (seconds). See also -------- @@ -201,7 +196,20 @@ def forecast( ---------- :cite:`Seed2003`, :cite:`BPS2006`, :cite:`SPN2013`, :cite:`NBSG2017` """ - + timesteps_in = timesteps + x_units = dataset["x"].attrs["units"] + y_units = dataset["y"].attrs["units"] + x_stepsize = dataset["x"].attrs["stepsize"] + y_stepsize = dataset["y"].attrs["stepsize"] + if x_units != y_units or x_stepsize != y_stepsize: + raise ValueError("units and stepsize needs to be the same for x and y") + if x_units not in ["m", "km"]: + raise ValueError("only m and km supported as x and y units") + + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + precip = dataset[precip_var].values + velocity = np.stack([dataset["velocity_x"], dataset["velocity_y"]]) _check_inputs(precip, velocity, timesteps, ar_order) if extrap_kwargs is None: @@ -237,8 +245,10 @@ def forecast( else: win_size = tuple([int(win_size[i]) for i in range(2)]) - timestep = metadata["accutime"] - kmperpixel = metadata["xpixelsize"] / 1000 + timestep = dataset["time"].attrs["stepsize"] / 60 + kmperpixel = x_stepsize + if x_units == "m": + kmperpixel = kmperpixel / 1000 print("Computing SSEPS nowcast") print("-----------------------") @@ -292,8 +302,8 @@ def forecast( f"velocity perturbations, perpendicular: {vp_perp[0]},{vp_perp[1]},{vp_perp[2]}" ) - precip_thr = metadata["threshold"] - precip_min = metadata["zerovalue"] + precip_thr = dataset[precip_var].attrs["threshold"] + precip_min = dataset[precip_var].attrs["zerovalue"] num_ensemble_workers = n_ens_members if num_workers > n_ens_members else num_workers @@ -911,10 +921,12 @@ def worker(j): if return_output: outarr = np.stack([np.stack(precip_forecast[j]) for j in range(n_ens_members)]) + output_dataset = convert_output_to_xarray_dataset(dataset, timesteps_in, outarr) + if measure_time: - return outarr, init_time, mainloop_time + return output_dataset, init_time, mainloop_time else: - return outarr + return output_dataset else: return None diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index 50c6a5d22..366f32f6d 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -11,19 +11,18 @@ forecast """ +import time + import numpy as np +import xarray as xr from scipy.ndimage import generate_binary_structure, iterate_structure -import time -from pysteps import cascade -from pysteps import extrapolation -from pysteps import noise -from pysteps import utils -from pysteps.decorators import deprecate_args +from pysteps import cascade, extrapolation, noise, utils from pysteps.nowcasts import utils as nowcast_utils +from pysteps.nowcasts.utils import compute_percentile_mask, nowcast_main_loop from pysteps.postprocessing import probmatching from pysteps.timeseries import autoregression, correlation -from pysteps.nowcasts.utils import compute_percentile_mask, nowcast_main_loop +from pysteps.xarray_helpers import convert_output_to_xarray_dataset try: import dask @@ -33,10 +32,8 @@ DASK_IMPORTED = False -@deprecate_args({"R": "precip", "V": "velocity", "R_thr": "precip_thr"}, "1.8.0") def forecast( - precip, - velocity, + dataset: xr.Dataset, timesteps, n_ens_members=24, n_cascade_levels=6, @@ -72,14 +69,13 @@ def forecast( Parameters ---------- - precip: array-like - Array of shape (ar_order+1,m,n) containing the input precipitation fields - ordered by timestamp from oldest to newest. The time steps between the - inputs are assumed to be regular. - velocity: array-like - Array of shape (2,m,n) containing the x- and y-components of the advection - field. The velocities are assumed to represent one time step between the - inputs. All values are required to be finite. + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and + ``velocity_y`` data variables, as well as any precipitation data variable. + The time dimension of the dataset has to be size + ``ar_order + 1`` and the precipitation variable has to have this dimension. All + velocity values are required to be finite. timesteps: int or list of floats Number of time steps to forecast or a list of time steps for which the forecasts are computed (relative to the input time step). The elements @@ -241,13 +237,13 @@ def forecast( Returns ------- - out: ndarray - If return_output is True, a four-dimensional array of shape - (n_ens_members,num_timesteps,m,n) containing a time series of forecast + out: xarray.Dataset + If return_output is True, a dataset as described in the documentation of + :py:mod:`pysteps.io.importers` is returned containing a time series of forecast precipitation fields for each ensemble member. Otherwise, a None value is returned. The time series starts from t0+timestep, where timestep is - taken from the input precipitation fields. If measure_time is True, the - return value is a three-element tuple containing the nowcast array, the + taken from the metadata of the time coordinate. If measure_time is True, the + return value is a three-element tuple containing the nowcast dataset, the initialization time of the nowcast generator and the time used in the main loop (seconds). @@ -261,6 +257,11 @@ def forecast( :cite:`Seed2003`, :cite:`BPS2006`, :cite:`SPN2013`, :cite:`PCH2019b` """ + timesteps_in = timesteps + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + precip = dataset[precip_var].values + velocity = np.stack([dataset["velocity_x"], dataset["velocity_y"]]) _check_inputs(precip, velocity, timesteps, ar_order) if extrap_kwargs is None: @@ -574,8 +575,6 @@ def f(precip, i): else: velocity_perturbators = None - precip_forecast = [[] for _ in range(n_ens_members)] - if probmatching_method == "mean": mu_0 = np.mean(precip[-1, :, :][precip[-1, :, :] >= precip_thr]) else: @@ -680,13 +679,13 @@ def f(precip, i): precip_forecast, mainloop_time = precip_forecast if return_output: - precip_forecast = np.stack( - [np.stack(precip_forecast[j]) for j in range(n_ens_members)] + output_dataset = convert_output_to_xarray_dataset( + dataset, timesteps_in, precip_forecast ) if measure_time: - return precip_forecast, init_time, mainloop_time + return output_dataset, init_time, mainloop_time else: - return precip_forecast + return output_dataset else: return None diff --git a/pysteps/nowcasts/utils.py b/pysteps/nowcasts/utils.py index fd111e28d..fed1c2f96 100644 --- a/pysteps/nowcasts/utils.py +++ b/pysteps/nowcasts/utils.py @@ -17,6 +17,7 @@ """ import time + import numpy as np from scipy.ndimage import binary_dilation, generate_binary_structure @@ -412,10 +413,10 @@ def worker2(i): if not ensemble: precip_forecast_out = precip_forecast_out[0, :] - if measure_time: - return precip_forecast_out, time.time() - starttime_total - else: - return precip_forecast_out + if measure_time: + return precip_forecast_out, time.time() - starttime_total + else: + return precip_forecast_out def print_ar_params(phi): diff --git a/pysteps/tests/test_motion_lk.py b/pysteps/tests/test_motion_lk.py index 871dcd98b..a8f640533 100644 --- a/pysteps/tests/test_motion_lk.py +++ b/pysteps/tests/test_motion_lk.py @@ -3,8 +3,8 @@ """ """ -import pytest import numpy as np +import pytest from pysteps import motion, verification from pysteps.tests.helpers import get_precipitation_fields @@ -61,19 +61,19 @@ def test_lk( pytest.importorskip("pandas") # inputs - precip, metadata = get_precipitation_fields( + dataset = get_precipitation_fields( num_prev_files=2, num_next_files=0, return_raw=False, metadata=True, upscale=2000, ) - precip = precip.filled() + precip_var = dataset.attrs["precip_var"] # Retrieve motion field oflow_method = motion.get_method("LK") - output = oflow_method( - precip, + output_dataset = oflow_method( + dataset, lk_kwargs=lk_kwargs, fd_method=fd_method, dense=dense, @@ -86,13 +86,17 @@ def test_lk( # Check format of ouput if dense: + output = np.stack( + [output_dataset["velocity_x"].values, output_dataset["velocity_y"].values] + ) assert isinstance(output, np.ndarray) assert output.ndim == 3 assert output.shape[0] == 2 - assert output.shape[1:] == precip[0].shape + assert output.shape[1:] == dataset[precip_var].values[0].shape if nr_std_outlier == 0: assert output.sum() == 0 else: + output = output_dataset assert isinstance(output, tuple) assert len(output) == 2 assert isinstance(output[0], np.ndarray) diff --git a/pysteps/tests/test_nowcasts_anvil.py b/pysteps/tests/test_nowcasts_anvil.py index 14a130fb1..35d84e0f3 100644 --- a/pysteps/tests/test_nowcasts_anvil.py +++ b/pysteps/tests/test_nowcasts_anvil.py @@ -31,31 +31,28 @@ def test_anvil_rainrate( ): """Tests ANVIL nowcast using rain rate precipitation fields.""" # inputs - precip_input = get_precipitation_fields( + dataset_input = get_precipitation_fields( num_prev_files=4, num_next_files=0, return_raw=False, metadata=False, upscale=2000, ) - precip_input = precip_input.filled() - precip_obs = get_precipitation_fields( + dataset_obs = get_precipitation_fields( num_prev_files=0, num_next_files=3, return_raw=False, upscale=2000 - )[1:, :, :] - precip_obs = precip_obs.filled() + ).isel(time=slice(1, None, None)) + precip_var = dataset_input.attrs["precip_var"] pytest.importorskip("cv2") oflow_method = motion.get_method("LK") - retrieved_motion = oflow_method(precip_input) + dataset_w_motion = oflow_method(dataset_input) nowcast_method = nowcasts.get_method("anvil") output = nowcast_method( - precip_input[-(ar_order + 2) :], - retrieved_motion, + dataset_w_motion.isel(time=slice(-(ar_order + 2), None, None)), timesteps=timesteps, - rainrate=None, # no R(VIL) conversion is done n_cascade_levels=n_cascade_levels, ar_order=ar_order, ar_window_radius=ar_window_radius, @@ -63,9 +60,10 @@ def test_anvil_rainrate( measure_time=measure_time, ) if measure_time: - precip_forecast, __, __ = output + dataset_forecast, __, __ = output else: - precip_forecast = output + dataset_forecast = output + precip_forecast = dataset_forecast[precip_var].values assert precip_forecast.ndim == 3 assert precip_forecast.shape[0] == ( @@ -73,7 +71,7 @@ def test_anvil_rainrate( ) result = verification.det_cat_fct( - precip_forecast[-1], precip_obs[-1], thr=0.1, scores="CSI" + precip_forecast[-1], dataset_obs[precip_var].values[-1], thr=0.1, scores="CSI" )["CSI"] assert result > min_csi, f"CSI={result:.2f}, required > {min_csi:.2f}" diff --git a/pysteps/tests/test_nowcasts_lagrangian_probability.py b/pysteps/tests/test_nowcasts_lagrangian_probability.py index 1ec352b0b..d75b29e87 100644 --- a/pysteps/tests/test_nowcasts_lagrangian_probability.py +++ b/pysteps/tests/test_nowcasts_lagrangian_probability.py @@ -1,10 +1,13 @@ # -*- coding: utf-8 -*- +from datetime import datetime, timezone + import numpy as np import pytest +import xarray as xr +from pysteps.motion.lucaskanade import dense_lucaskanade from pysteps.nowcasts.lagrangian_probability import forecast from pysteps.tests.helpers import get_precipitation_fields -from pysteps.motion.lucaskanade import dense_lucaskanade def test_numerical_example(): @@ -12,12 +15,23 @@ def test_numerical_example(): precip = np.zeros((20, 20)) precip[5:10, 5:10] = 1 velocity = np.zeros((2, *precip.shape)) + now = datetime.now(tz=timezone.utc).replace(tzinfo=None) + dataset_input = xr.Dataset( + data_vars={ + "precip_intensity": (["time", "y", "x"], [precip]), + "velocity_x": (["y", "x"], velocity[0]), + "velocity_y": (["y", "x"], velocity[1]), + }, + coords={"time": (["time"], [now], {"stepsize": 300})}, + attrs={"precip_var": "precip_intensity"}, + ) timesteps = 4 thr = 0.5 slope = 1 # pixels / timestep # compute probability forecast - fct = forecast(precip, velocity, timesteps, thr, slope=slope) + dataset_forecast = forecast(dataset_input, timesteps, thr, slope=slope) + fct = dataset_forecast["precip_intensity"].values assert fct.ndim == 3 assert fct.shape[0] == timesteps @@ -26,7 +40,8 @@ def test_numerical_example(): assert fct.min() >= 0.0 # slope = 0 should return a binary field - fct = forecast(precip, velocity, timesteps, thr, slope=0) + dataset_forecast = forecast(dataset_input, timesteps, thr, slope=0) + fct = dataset_forecast["precip_intensity"].values ref = (np.repeat(precip[None, ...], timesteps, axis=0) >= thr).astype(float) assert np.allclose(fct, fct.astype(bool)) assert np.allclose(fct, ref) @@ -37,12 +52,23 @@ def test_numerical_example_with_float_slope_and_float_list_timesteps(): precip = np.zeros((20, 20)) precip[5:10, 5:10] = 1 velocity = np.zeros((2, *precip.shape)) + now = datetime.now(tz=timezone.utc).replace(tzinfo=None) + dataset_input = xr.Dataset( + data_vars={ + "precip_intensity": (["time", "y", "x"], [precip]), + "velocity_x": (["y", "x"], velocity[0]), + "velocity_y": (["y", "x"], velocity[1]), + }, + coords={"time": (["time"], [now], {"stepsize": 300})}, + attrs={"precip_var": "precip_intensity"}, + ) timesteps = [1.0, 2.0, 5.0, 12.0] thr = 0.5 slope = 1.0 # pixels / timestep # compute probability forecast - fct = forecast(precip, velocity, timesteps, thr, slope=slope) + dataset_forecast = forecast(dataset_input, timesteps, thr, slope=slope) + fct = dataset_forecast["precip_intensity"].values assert fct.ndim == 3 assert fct.shape[0] == len(timesteps) @@ -56,16 +82,18 @@ def test_real_case(): pytest.importorskip("cv2") # inputs - precip, metadata = get_precipitation_fields( + dataset_input = get_precipitation_fields( num_prev_files=2, num_next_files=0, return_raw=False, metadata=True, upscale=2000, ) + precip_var = dataset_input.attrs["precip_var"] + metadata = dataset_input[precip_var].attrs # motion - motion = dense_lucaskanade(precip) + dataset_w_motion = dense_lucaskanade(dataset_input) # parameters timesteps = [1, 2, 3] @@ -74,13 +102,18 @@ def test_real_case(): # compute probability forecast extrap_kwargs = dict(allow_nonfinite_values=True) - fct = forecast( - precip[-1], motion, timesteps, thr, slope=slope, extrap_kwargs=extrap_kwargs + dataset_forecast = forecast( + dataset_w_motion.isel(time=slice(-1, None, None)), + timesteps, + thr, + slope=slope, + extrap_kwargs=extrap_kwargs, ) + fct = dataset_forecast["precip_intensity"].values assert fct.ndim == 3 assert fct.shape[0] == len(timesteps) - assert fct.shape[1:] == precip.shape[1:] + assert fct.shape[1:] == dataset_input[precip_var].values.shape[1:] assert np.nanmax(fct) <= 1.0 assert np.nanmin(fct) >= 0.0 @@ -89,11 +122,19 @@ def test_wrong_inputs(): # dummy inputs precip = np.zeros((3, 3)) velocity = np.zeros((2, *precip.shape)) + dataset_input = xr.Dataset( + data_vars={ + "precip_intensity": (["y", "x"], precip), + "velocity_x": (["y", "x"], velocity[0]), + "velocity_y": (["y", "x"], velocity[1]), + }, + attrs={"precip_var": "precip_intensity"}, + ) # timesteps must be > 0 with pytest.raises(ValueError): - forecast(precip, velocity, 0, 1) + forecast(dataset_input, 0, 1) # timesteps must be a sorted list with pytest.raises(ValueError): - forecast(precip, velocity, [2, 1], 1) + forecast(dataset_input, [2, 1], 1) diff --git a/pysteps/tests/test_nowcasts_linda.py b/pysteps/tests/test_nowcasts_linda.py index 2d5f03b71..a5b60611b 100644 --- a/pysteps/tests/test_nowcasts_linda.py +++ b/pysteps/tests/test_nowcasts_linda.py @@ -1,13 +1,13 @@ -from datetime import timedelta import os +from datetime import timedelta + import numpy as np import pytest +import xarray as xr from pysteps import io, motion, nowcasts, verification -from pysteps.nowcasts.linda import forecast from pysteps.tests.helpers import get_precipitation_fields - linda_arg_names = ( "add_perturbations", "kernel_type", @@ -42,7 +42,7 @@ def test_linda( pytest.importorskip("skimage") # inputs - precip_input, metadata = get_precipitation_fields( + dataset_input = get_precipitation_fields( num_prev_files=2, num_next_files=0, metadata=True, @@ -51,20 +51,23 @@ def test_linda( log_transform=False, ) - precip_obs = get_precipitation_fields( + dataset_obs = get_precipitation_fields( num_prev_files=0, num_next_files=3, clip=(354000, 866000, -96000, 416000), upscale=4000, log_transform=False, - )[1:, :, :] + ).isel(time=slice(1, None, None)) + precip_var = dataset_input.attrs["precip_var"] + metadata = dataset_input[precip_var].attrs oflow_method = motion.get_method("LK") - retrieved_motion = oflow_method(precip_input) + dataset_w_motion = oflow_method(dataset_input) - precip_forecast = forecast( - precip_input, - retrieved_motion, + nowcast_method = nowcasts.get_method("linda") + + dataset_forecast = nowcast_method( + dataset_w_motion, 3, kernel_type=kernel_type, vel_pert_method=vel_pert_method, @@ -78,68 +81,82 @@ def test_linda( seed=42, ) if measure_time: - assert len(precip_forecast) == 3 - assert isinstance(precip_forecast[1], float) - precip_forecast = precip_forecast[0] + assert len(dataset_forecast) == 3 + assert isinstance(dataset_forecast[1], float) + dataset_forecast = dataset_forecast[0] + + precip_forecast = dataset_forecast[precip_var].values if not add_perturbations: assert precip_forecast.ndim == 3 assert precip_forecast.shape[0] == 3 - assert precip_forecast.shape[1:] == precip_input.shape[1:] + assert precip_forecast.shape[1:] == dataset_input[precip_var].values.shape[1:] csi = verification.det_cat_fct( - precip_forecast[-1], precip_obs[-1], thr=1.0, scores="CSI" + precip_forecast[-1], + dataset_obs[precip_var].values[-1], + thr=1.0, + scores="CSI", )["CSI"] assert csi > min_csi, f"CSI={csi:.1f}, required > {min_csi:.1f}" else: assert precip_forecast.ndim == 4 assert precip_forecast.shape[0] == 5 assert precip_forecast.shape[1] == 3 - assert precip_forecast.shape[2:] == precip_input.shape[1:] + assert precip_forecast.shape[2:] == dataset_input[precip_var].values.shape[1:] - crps = verification.probscores.CRPS(precip_forecast[:, -1], precip_obs[-1]) + crps = verification.probscores.CRPS( + precip_forecast[:, -1], dataset_obs[precip_var].values[-1] + ) assert crps < max_crps, f"CRPS={crps:.2f}, required < {max_crps:.2f}" def test_linda_wrong_inputs(): # dummy inputs - precip = np.zeros((3, 3, 3)) - velocity = np.zeros((2, 3, 3)) + dataset_input = xr.Dataset( + data_vars={ + "precip_intensity": (["time", "y", "x"], np.zeros((3, 3, 3))), + "velocity_x": (["y", "x"], np.zeros((3, 3))), + "velocity_y": (["y", "x"], np.zeros((3, 3))), + }, + attrs={"precip_var": "precip_intensity"}, + ) + dataset_input_4d = xr.Dataset( + data_vars={ + "precip_intensity": ( + ["ens_number", "time", "y", "x"], + np.zeros((3, 3, 3, 3)), + ), + "velocity_x": (["y", "x"], np.zeros((3, 3))), + "velocity_y": (["y", "x"], np.zeros((3, 3))), + }, + attrs={"precip_var": "precip_intensity"}, + ) + + nowcast_method = nowcasts.get_method("linda") # vel_pert_method is set but kmperpixel is None with pytest.raises(ValueError): - forecast(precip, velocity, 1, vel_pert_method="bps", kmperpixel=None) + nowcast_method(dataset_input, 1, vel_pert_method="bps", kmperpixel=None) # vel_pert_method is set but timestep is None with pytest.raises(ValueError): - forecast( - precip, velocity, 1, vel_pert_method="bps", kmperpixel=1, timestep=None + nowcast_method( + dataset_input, 1, vel_pert_method="bps", kmperpixel=1, timestep=None ) # fractional time steps not yet implemented # timesteps is not an integer with pytest.raises(ValueError): - forecast(precip, velocity, [1.0, 2.0]) + nowcast_method(dataset_input, [1.0, 2.0]) # ari_order 1 or 2 required with pytest.raises(ValueError): - forecast(precip, velocity, 1, ari_order=3) + nowcast_method(dataset_input, 1, ari_order=3) # precip_fields must be a three-dimensional array with pytest.raises(ValueError): - forecast(np.zeros((3, 3, 3, 3)), velocity, 1) - - # precip_fields.shape[0] < ari_order+2 - with pytest.raises(ValueError): - forecast(np.zeros((2, 3, 3)), velocity, 1, ari_order=1) - - # advection_field must be a three-dimensional array - with pytest.raises(ValueError): - forecast(precip, velocity[0], 1) - - # dimension mismatch between precip_fields and advection_field - with pytest.raises(ValueError): - forecast(np.zeros((3, 2, 3)), velocity, 1) + nowcast_method(dataset_input_4d, 1) def test_linda_callback(tmp_path): diff --git a/pysteps/tests/test_nowcasts_sprog.py b/pysteps/tests/test_nowcasts_sprog.py index 1077c3edd..f64900cd8 100644 --- a/pysteps/tests/test_nowcasts_sprog.py +++ b/pysteps/tests/test_nowcasts_sprog.py @@ -30,29 +30,28 @@ def test_sprog( ): """Tests SPROG nowcast.""" # inputs - precip_input, metadata = get_precipitation_fields( + dataset_input = get_precipitation_fields( num_prev_files=2, num_next_files=0, return_raw=False, metadata=True, upscale=2000, ) - precip_input = precip_input.filled() - precip_obs = get_precipitation_fields( + dataset_obs = get_precipitation_fields( num_prev_files=0, num_next_files=3, return_raw=False, upscale=2000 - )[1:, :, :] - precip_obs = precip_obs.filled() + ).isel(time=slice(1, None, None)) + precip_var = dataset_input.attrs["precip_var"] + metadata = dataset_input[precip_var].attrs pytest.importorskip("cv2") oflow_method = motion.get_method("LK") - retrieved_motion = oflow_method(precip_input) + dataset_w_motion = oflow_method(dataset_input) nowcast_method = nowcasts.get_method("sprog") - precip_forecast = nowcast_method( - precip_input, - retrieved_motion, + dataset_forecast = nowcast_method( + dataset_w_motion, timesteps=timesteps, precip_thr=metadata["threshold"], n_cascade_levels=n_cascade_levels, @@ -60,6 +59,7 @@ def test_sprog( probmatching_method=probmatching_method, domain=domain, ) + precip_forecast = dataset_forecast[precip_var].values assert precip_forecast.ndim == 3 assert precip_forecast.shape[0] == ( @@ -67,7 +67,7 @@ def test_sprog( ) result = verification.det_cat_fct( - precip_forecast[-1], precip_obs[-1], thr=0.1, scores="CSI" + precip_forecast[-1], dataset_obs[precip_var].values[-1], thr=0.1, scores="CSI" )["CSI"] assert result > min_csi, f"CSI={result:.1f}, required > {min_csi:.1f}" diff --git a/pysteps/tests/test_nowcasts_sseps.py b/pysteps/tests/test_nowcasts_sseps.py index 4d89fd33a..6d3a3c9c0 100644 --- a/pysteps/tests/test_nowcasts_sseps.py +++ b/pysteps/tests/test_nowcasts_sseps.py @@ -17,8 +17,8 @@ ) sseps_arg_values = [ - (5, 6, 2, "incremental", "cdf", 200, 3, 0.60), - (5, 6, 2, "incremental", "cdf", 200, [3], 0.60), + (5, 6, 2, "incremental", "cdf", 200, 3, 0.62), + (5, 6, 2, "incremental", "cdf", 200, [3], 0.62), ] @@ -35,32 +35,29 @@ def test_sseps( ): """Tests SSEPS nowcast.""" # inputs - precip_input, metadata = get_precipitation_fields( + dataset_input = get_precipitation_fields( num_prev_files=2, num_next_files=0, return_raw=False, metadata=True, upscale=2000, ) - precip_input = precip_input.filled() + precip_var = dataset_input.attrs["precip_var"] - precip_obs = get_precipitation_fields( + dataset_obs = get_precipitation_fields( num_prev_files=0, num_next_files=3, return_raw=False, upscale=2000 - )[1:, :, :] - precip_obs = precip_obs.filled() + ).isel(time=slice(1, None, None)) pytest.importorskip("cv2") oflow_method = motion.get_method("LK") - retrieved_motion = oflow_method(precip_input) + dataset_w_motion = oflow_method(dataset_input) nowcast_method = nowcasts.get_method("sseps") - precip_forecast = nowcast_method( - precip_input, - metadata, - retrieved_motion, + dataset_forecast = nowcast_method( + dataset_w_motion, + timesteps, win_size=win_size, - timesteps=timesteps, n_ens_members=n_ens_members, n_cascade_levels=n_cascade_levels, ar_order=ar_order, @@ -68,6 +65,7 @@ def test_sseps( mask_method=mask_method, probmatching_method=probmatching_method, ) + precip_forecast = dataset_forecast[precip_var].values assert precip_forecast.ndim == 4 assert precip_forecast.shape[0] == n_ens_members @@ -75,7 +73,9 @@ def test_sseps( timesteps if isinstance(timesteps, int) else len(timesteps) ) - crps = verification.probscores.CRPS(precip_forecast[:, -1], precip_obs[-1]) + crps = verification.probscores.CRPS( + precip_forecast[:, -1], dataset_obs[precip_var].values[-1] + ) assert crps < max_crps, f"CRPS={crps:.2f}, required < {max_crps:.2f}" diff --git a/pysteps/tests/test_nowcasts_steps.py b/pysteps/tests/test_nowcasts_steps.py index adb6ea917..16d2e4de5 100644 --- a/pysteps/tests/test_nowcasts_steps.py +++ b/pysteps/tests/test_nowcasts_steps.py @@ -56,17 +56,15 @@ def test_steps_skill( ).isel(time=slice(1, None, None)) precip_var = dataset_input.attrs["precip_var"] metadata = dataset_input[precip_var].attrs - precip_data = dataset_input[precip_var].values pytest.importorskip("cv2") oflow_method = motion.get_method("LK") - retrieved_motion = oflow_method(precip_data) + dataset_w_motion = oflow_method(dataset_input) nowcast_method = nowcasts.get_method("steps") - precip_forecast = nowcast_method( - precip_data, - retrieved_motion, + dataset_forecast = nowcast_method( + dataset_w_motion, timesteps=timesteps, precip_thr=metadata["threshold"], kmperpixel=2.0, @@ -79,6 +77,7 @@ def test_steps_skill( probmatching_method=probmatching_method, domain=domain, ) + precip_forecast = dataset_forecast[precip_var].values assert precip_forecast.ndim == 4 assert precip_forecast.shape[0] == n_ens_members diff --git a/pysteps/tests/test_nowcasts_utils.py b/pysteps/tests/test_nowcasts_utils.py index 075225427..1dfeb27a9 100644 --- a/pysteps/tests/test_nowcasts_utils.py +++ b/pysteps/tests/test_nowcasts_utils.py @@ -26,17 +26,18 @@ def test_nowcast_main_loop( timesteps, ensemble, num_ensemble_members, velocity_perturbations ): """Test the nowcast_main_loop function.""" - precip = get_precipitation_fields( + dataset = get_precipitation_fields( num_prev_files=2, num_next_files=0, return_raw=False, metadata=False, upscale=2000, ) - precip = precip.filled() oflow_method = motion.get_method("LK") - velocity = oflow_method(precip) + dataset = oflow_method(dataset) + precip = dataset["precip_intensity"].values + velocity = np.stack([dataset["velocity_x"].values, dataset["velocity_y"].values]) precip = precip[-1] diff --git a/pysteps/tests/test_utils_dimension.py b/pysteps/tests/test_utils_dimension.py index 2bbb63f58..038b725a0 100644 --- a/pysteps/tests/test_utils_dimension.py +++ b/pysteps/tests/test_utils_dimension.py @@ -5,19 +5,17 @@ import numpy as np import pytest import xarray as xr -from numpy.testing import assert_array_equal +from numpy.testing import assert_array_almost_equal, assert_array_equal from pytest import raises -from pysteps.converters import convert_to_xarray_dataset from pysteps.utils import dimension +from pysteps.xarray_helpers import convert_input_to_xarray_dataset fillvalues_metadata = { "x1": 0, "x2": 4, "y1": 0, "y2": 4, - "xpixelsize": 1, - "ypixelsize": 1, "zerovalue": 0, "yorigin": "lower", "unit": "mm/h", @@ -32,7 +30,6 @@ } test_data_not_trim = ( - # "data, window_size, axis, method, expected" ( np.arange(12).reshape(2, 6), 2, @@ -94,7 +91,7 @@ def test_aggregate_fields(data, window_size, dim, method, expected): windows size does not divide the data dimensions. The length of each dimension should be larger than 2. """ - dataset = convert_to_xarray_dataset(data, None, fillvalues_metadata) + dataset = convert_input_to_xarray_dataset(data, None, fillvalues_metadata) actual = dimension.aggregate_fields(dataset, window_size, dim=dim, method=method) assert_array_equal(actual["precip_intensity"].values, expected) @@ -104,7 +101,7 @@ def test_aggregate_fields(data, window_size, dim, method, expected): data = np.pad(data, ((0, 0), (0, 1))) else: data = np.pad(data, (0, 1)) - dataset = convert_to_xarray_dataset(data, None, fillvalues_metadata) + dataset = convert_input_to_xarray_dataset(data, None, fillvalues_metadata) actual = dimension.aggregate_fields( dataset, window_size, dim=dim, method=method, trim=True @@ -115,13 +112,85 @@ def test_aggregate_fields(data, window_size, dim, method, expected): dimension.aggregate_fields(dataset, window_size, dim=dim, method=method) +test_data_agg_w_velocity = ( + ( + np.arange(12).reshape(2, 6), + np.arange(12).reshape(2, 6), + np.arange(12).reshape(2, 6), + np.arange(0, 1.2, 0.1).reshape(2, 6), + 2, + "x", + "mean", + "mean", + np.array([[0.5, 2.5, 4.5], [6.5, 8.5, 10.5]]), + np.array([[0.5, 2.5, 4.5], [6.5, 8.5, 10.5]]), + np.array([[0, 0.2, 0.4], [0.6, 0.8, 1]]), + ), + ( + np.arange(4 * 6).reshape(4, 6), + np.arange(4 * 6).reshape(4, 6), + np.arange(4 * 6).reshape(4, 6), + np.arange(0, 1.2, 0.05).reshape(4, 6), + (2, 3), + ("y", "x"), + "mean", + "sum", + np.array([[4, 7], [16, 19]]), + np.array([[24, 42], [96, 114]]), + np.array([[0, 0.15], [0.6, 0.75]]), + ), +) + + +@pytest.mark.parametrize( + "data, data_vx, data_vy, data_qual, window_size, dim, method, velocity_method, expected, expected_v, expected_qual", + test_data_agg_w_velocity, +) +def test_aggregate_fields_w_velocity( + data, + data_vx, + data_vy, + data_qual, + window_size, + dim, + method, + velocity_method, + expected, + expected_v, + expected_qual, +): + """ + Test the aggregate_fields function for dataset with velocity information. + The windows size must divide exactly the data dimensions. + Internally, additional test are generated for situations where the + windows size does not divide the data dimensions. + The length of each dimension should be larger than 2. + """ + dataset = convert_input_to_xarray_dataset(data, None, fillvalues_metadata) + dataset = dataset.assign( + { + "velocity_x": (("y", "x"), data_vx), + "velocity_y": (("y", "x"), data_vy), + "quality": (("y", "x"), data_qual), + } + ) + + actual = dimension.aggregate_fields( + dataset, window_size, dim=dim, method=method, velocity_method=velocity_method + ) + assert_array_equal(actual["precip_intensity"].values, expected) + assert_array_equal(actual["velocity_x"].values, expected_v) + assert_array_equal(actual["velocity_y"].values, expected_v) + assert_array_almost_equal(actual["quality"].values, expected_qual) + + def test_aggregate_fields_errors(): """ Test that the errors are correctly captured in the aggregate_fields function. """ data = np.arange(4 * 6).reshape(4, 6) - dataset = convert_to_xarray_dataset(data, None, fillvalues_metadata) + dataset = convert_input_to_xarray_dataset(data, None, fillvalues_metadata) with raises(ValueError): dimension.aggregate_fields(dataset, -1, dim="y") @@ -160,7 +229,7 @@ def test_aggregate_fields_errors(): ) def test_aggregate_fields_time(data, metadata, time_window_min, ignore_nan, expected): """Test the aggregate_fields_time.""" - dataset_ref = convert_to_xarray_dataset( + dataset_ref = convert_input_to_xarray_dataset( data, None, {**fillvalues_metadata, **metadata} ) datasets = [] @@ -234,7 +303,9 @@ def test_aggregate_fields_time(data, metadata, time_window_min, ignore_nan, expe ) def test_aggregate_fields_space(data, metadata, space_window, ignore_nan, expected): """Test the aggregate_fields_space.""" - dataset = convert_to_xarray_dataset(data, None, {**fillvalues_metadata, **metadata}) + dataset = convert_input_to_xarray_dataset( + data, None, {**fillvalues_metadata, **metadata} + ) assert_array_equal( dimension.aggregate_fields_space(dataset, space_window, ignore_nan)[ "precip_intensity" if metadata["unit"] == "mm/h" else "precip_accum" @@ -271,7 +342,9 @@ def test_aggregate_fields_space(data, metadata, space_window, ignore_nan, expect @pytest.mark.parametrize("R, metadata, extent, expected", test_data_clip_domain) def test_clip_domain(R, metadata, extent, expected): """Test the clip_domain.""" - dataset = convert_to_xarray_dataset(R, None, {**fillvalues_metadata, **metadata}) + dataset = convert_input_to_xarray_dataset( + R, None, {**fillvalues_metadata, **metadata} + ) assert_array_equal( dimension.clip_domain(dataset, extent)["precip_intensity"].values, expected ) @@ -336,12 +409,67 @@ def test_clip_domain(R, metadata, extent, expected): @pytest.mark.parametrize("data, metadata, method, inverse, expected", test_data_square) def test_square_domain(data, metadata, method, inverse, expected): """Test the square_domain.""" - dataset = convert_to_xarray_dataset(data, None, {**fillvalues_metadata, **metadata}) - dataset["precip_intensity"].attrs = { - **dataset["precip_intensity"].attrs, - **metadata, - } + dataset = convert_input_to_xarray_dataset( + data, None, {**fillvalues_metadata, **metadata} + ) + if "square_method" in metadata: + dataset.attrs["square_method"] = metadata["square_method"] + if "orig_domain" in metadata: + dataset.attrs["orig_domain"] = metadata["orig_domain"] assert_array_equal( dimension.square_domain(dataset, method, inverse)["precip_intensity"].values, expected, ) + + +# square_domain +R = np.ones((4, 2)) +test_data_square_w_velocity = [ + # square by padding + ( + R, + {"x1": 0, "x2": 2, "y1": 0, "y2": 4, "xpixelsize": 1, "ypixelsize": 1}, + "pad", + False, + np.array([[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]), + np.array([[0, 1, 1, 0], [0, 1, 1, 0], [0, 1, 1, 0], [0, 1, 1, 0]]), + ) +] + + +@pytest.mark.parametrize( + "data, metadata, method, inverse, expected, expected_velqual", + test_data_square_w_velocity, +) +def test_square_w_velocity(data, metadata, method, inverse, expected, expected_velqual): + """Test the square_domain.""" + dataset = convert_input_to_xarray_dataset( + data, None, {**fillvalues_metadata, **metadata} + ) + dataset = dataset.assign( + { + "velocity_x": (("y", "x"), data), + "velocity_y": (("y", "x"), data), + "quality": (("y", "x"), data), + } + ) + if "square_method" in metadata: + dataset.attrs["square_method"] = metadata["square_method"] + if "orig_domain" in metadata: + dataset.attrs["orig_domain"] = metadata["orig_domain"] + assert_array_equal( + dimension.square_domain(dataset, method, inverse)["precip_intensity"].values, + expected, + ) + assert_array_equal( + dimension.square_domain(dataset, method, inverse)["velocity_x"].values, + expected_velqual, + ) + assert_array_equal( + dimension.square_domain(dataset, method, inverse)["velocity_y"].values, + expected_velqual, + ) + assert_array_equal( + dimension.square_domain(dataset, method, inverse)["quality"].values, + expected_velqual, + ) diff --git a/pysteps/utils/dimension.py b/pysteps/utils/dimension.py index efa459610..d039d8ef0 100644 --- a/pysteps/utils/dimension.py +++ b/pysteps/utils/dimension.py @@ -14,14 +14,23 @@ clip_domain square_domain """ +from typing import Any, Callable + import numpy as np import xarray as xr -from pysteps.converters import compute_lat_lon +from pysteps.xarray_helpers import compute_lat_lon -_aggregation_methods = dict( - sum=np.sum, mean=np.mean, nanmean=np.nanmean, nansum=np.nansum -) +_aggregation_methods: dict[str, Callable[..., Any]] = { + "sum": np.sum, + "mean": np.mean, + "min": np.min, + "max": np.max, + "nanmean": np.nanmean, + "nansum": np.nansum, + "nanmin": np.nanmin, + "nanmax": np.nanmax, +} def aggregate_fields_time( @@ -93,7 +102,9 @@ def aggregate_fields_time( if ignore_nan: method = "".join(("nan", method)) - return aggregate_fields(dataset, window_size, dim="time", method=method) + return aggregate_fields( + dataset, window_size, dim="time", method=method, velocity_method="sum" + ) def aggregate_fields_space( @@ -147,9 +158,8 @@ def aggregate_fields_space( if np.isscalar(space_window): space_window = (space_window, space_window) - # assumes that frames are evenly spaced - ydelta = dataset["y"].values[1] - dataset["y"].values[0] - xdelta = dataset["x"].values[1] - dataset["x"].values[0] + ydelta = dataset["y"].attrs["stepsize"] + xdelta = dataset["x"].attrs["stepsize"] if space_window[0] % ydelta > 1e-10 or space_window[1] % xdelta > 1e-10: raise ValueError("space_window does not equally split dataset") @@ -166,11 +176,16 @@ def aggregate_fields_space( window_size = (int(space_window[0] / ydelta), int(space_window[1] / xdelta)) - return aggregate_fields(dataset, window_size, ["y", "x"], method) + return aggregate_fields(dataset, window_size, ["y", "x"], method, "mean") def aggregate_fields( - dataset: xr.Dataset, window_size, dim="x", method="mean", trim=False + dataset: xr.Dataset, + window_size, + dim="x", + method="mean", + velocity_method="mean", + trim=False, ) -> xr.Dataset: """Aggregate fields along a given direction. @@ -201,7 +216,11 @@ def aggregate_fields( dims, instead of a single dim method: string, optional Optional argument that specifies the operation to use - to aggregate the values within the window. + to aggregate the precipitation values within the window. + Default to mean operator. + velocity_method: string, optional + Optional argument that specifies the operation to use + to aggregate the velocity values within the window. Default to mean operator. trim: bool In case that the ``data`` is not perfectly divisible by @@ -254,9 +273,8 @@ def aggregate_fields( f"dataset.sizes[dim]={dataset.sizes[d]}" ) - # FIXME: The aggregation method is applied to all DataArrays in the Dataset - # Fix to allow support for an aggregation method per DataArray - return ( + dataset_ref = dataset + dataset = ( dataset.rolling(dict(zip(dim, window_size))) .reduce(_aggregation_methods[method]) .isel( @@ -266,6 +284,50 @@ def aggregate_fields( } ) ) + if "velocity_x" in dataset_ref: + dataset["velocity_x"] = ( + dataset_ref["velocity_x"] + .rolling(dict(zip(dim, window_size))) + .reduce(_aggregation_methods[velocity_method]) + .isel( + { + d: slice( + ws - 1, dataset_ref.sizes[d] - dataset_ref.sizes[d] % ws, ws + ) + for d, ws in zip(dim, window_size) + } + ) + ) + if "velocity_y" in dataset_ref: + dataset["velocity_y"] = ( + dataset_ref["velocity_y"] + .rolling(dict(zip(dim, window_size))) + .reduce(_aggregation_methods[velocity_method]) + .isel( + { + d: slice( + ws - 1, dataset_ref.sizes[d] - dataset_ref.sizes[d] % ws, ws + ) + for d, ws in zip(dim, window_size) + } + ) + ) + if "quality" in dataset_ref: + dataset["quality"] = ( + dataset_ref["quality"] + .rolling(dict(zip(dim, window_size))) + .reduce(_aggregation_methods["min"]) + .isel( + { + d: slice( + ws - 1, dataset_ref.sizes[d] - dataset_ref.sizes[d] % ws, ws + ) + for d, ws in zip(dim, window_size) + } + ) + ) + + return dataset def clip_domain(dataset: xr.Dataset, extent=None): @@ -298,8 +360,7 @@ def clip_domain(dataset: xr.Dataset, extent=None): def _pad_domain( dataset: xr.Dataset, dim_to_pad: str, idx_buffer: int, zerovalue: float ) -> xr.Dataset: - # assumes that frames are evenly spaced - delta = dataset[dim_to_pad].values[1] - dataset[dim_to_pad].values[0] + delta = dataset[dim_to_pad].attrs["stepsize"] end_values = ( dataset[dim_to_pad].values[0] - delta * idx_buffer, dataset[dim_to_pad].values[-1] + delta * idx_buffer, @@ -307,8 +368,6 @@ def _pad_domain( dataset_ref = dataset - # FIXME: The same zerovalue is used for all DataArrays in the Dataset - # Fix to allow support for a zerovalue per DataArray dataset = dataset_ref.pad({dim_to_pad: idx_buffer}, constant_values=zerovalue) dataset[dim_to_pad] = dataset_ref[dim_to_pad].pad( {dim_to_pad: idx_buffer}, @@ -318,6 +377,24 @@ def _pad_domain( dataset.lat.data[:], dataset.lon.data[:] = compute_lat_lon( dataset.x.values, dataset.y.values, dataset.attrs["projection"] ) + if "velocity_x" in dataset_ref: + dataset["velocity_x"].data = ( + dataset_ref["velocity_x"] + .pad({dim_to_pad: idx_buffer}, constant_values=0.0) + .values + ) + if "velocity_y" in dataset_ref: + dataset["velocity_y"].data = ( + dataset_ref["velocity_y"] + .pad({dim_to_pad: idx_buffer}, constant_values=0.0) + .values + ) + if "quality" in dataset_ref: + dataset["quality"].data = ( + dataset_ref["quality"] + .pad({dim_to_pad: idx_buffer}, constant_values=0.0) + .values + ) return dataset @@ -332,14 +409,17 @@ def square_domain(dataset: xr.Dataset, method="pad", inverse=False): :py:mod:`pysteps.io.importers`. method: {'pad', 'crop'}, optional Either pad or crop. - If pad, an equal number of zeros is added to both ends of its shortest - side in order to produce a square domain. + If pad, an equal number of pixels + filled with the minimum value of the precipitation + field is added to both ends of the precipitation fields shortest + side in order to produce a square domain. The quality and velocity fields + are always padded with zeros. If crop, an equal number of pixels is removed to both ends of its longest side in order to produce a square domain. Note that the crop method involves an irreversible loss of data. inverse: bool, optional Perform the inverse method to recover the original domain shape. - After a crop, the inverse is performed by padding the field with zeros. + After a crop, the inverse is performed by doing a pad. Returns ------- diff --git a/pysteps/converters.py b/pysteps/xarray_helpers.py similarity index 76% rename from pysteps/converters.py rename to pysteps/xarray_helpers.py index 2825af612..a1049f32f 100644 --- a/pysteps/converters.py +++ b/pysteps/xarray_helpers.py @@ -3,7 +3,7 @@ pysteps.converters ================== -Module with data converter functions. +Module with xarray helper functions. .. autosummary:: :toctree: ../generated/ @@ -11,6 +11,8 @@ convert_to_xarray_dataset """ +from datetime import datetime, timedelta + import numpy as np import numpy.typing as npt import pyproj @@ -77,7 +79,7 @@ def compute_lat_lon( return lat.reshape(x_2d.shape), lon.reshape(x_2d.shape) -def convert_to_xarray_dataset( +def convert_input_to_xarray_dataset( precip: np.ndarray, quality: np.ndarray | None, metadata: dict[str, str | float | None], @@ -111,6 +113,21 @@ def convert_to_xarray_dataset( y_r = np.linspace(metadata["y1"], metadata["y2"], h + 1)[:-1] y_r += 0.5 * (y_r[1] - y_r[0]) + if "xpixelsize" in metadata: + xpixelsize = metadata["xpixelsize"] + else: + xpixelsize = x_r[1] - x_r[0] + + if "ypixelsize" in metadata: + ypixelsize = metadata["ypixelsize"] + else: + ypixelsize = y_r[1] - y_r[0] + + if x_r[1] - x_r[0] != xpixelsize: + raise ValueError("xpixelsize does not match x1, x2 and array shape") + if y_r[1] - y_r[0] != ypixelsize: + raise ValueError("ypixelsize does not match y1, y2 and array shape") + # flip yr vector if yorigin is upper if metadata["yorigin"] == "upper": y_r = np.flip(y_r) @@ -160,6 +177,7 @@ def convert_to_xarray_dataset( "long_name": "y-coordinate in Cartesian system", "standard_name": "projection_y_coordinate", "units": metadata["cartesian_unit"], + "stepsize": ypixelsize, }, ), "x": ( @@ -170,6 +188,7 @@ def convert_to_xarray_dataset( "long_name": "x-coordinate in Cartesian system", "standard_name": "projection_x_coordinate", "units": metadata["cartesian_unit"], + "stepsize": xpixelsize, }, ), "lon": ( @@ -178,7 +197,6 @@ def convert_to_xarray_dataset( { "long_name": "longitude coordinate", "standard_name": "longitude", - # TODO(converters): Don't hard-code the unit. "units": "degrees_east", }, ), @@ -188,7 +206,6 @@ def convert_to_xarray_dataset( { "long_name": "latitude coordinate", "standard_name": "latitude", - # TODO(converters): Don't hard-code the unit. "units": "degrees_north", }, ), @@ -207,3 +224,45 @@ def convert_to_xarray_dataset( } dataset = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) return dataset.sortby(["y", "x"]) + + +def convert_output_to_xarray_dataset( + dataset: xr.Dataset, timesteps: int | list[int], output: np.ndarray +) -> xr.Dataset: + precip_var = dataset.attrs["precip_var"] + metadata = dataset[precip_var].attrs + + last_timestamp = ( + dataset["time"][-1].values.astype("datetime64[us]").astype(datetime) + ) + time_metadata = dataset["time"].attrs + timestep_seconds = dataset["time"].attrs["stepsize"] + dataset = dataset.drop_vars([precip_var]).drop_dims(["time"]) + if isinstance(timesteps, int): + timesteps = list(range(1, timesteps + 1)) + next_timestamps = [ + last_timestamp + timedelta(seconds=timestep_seconds * i) for i in timesteps + ] + dataset = dataset.assign_coords( + {"time": (["time"], next_timestamps, time_metadata)} + ) + + if output.ndim == 4: + dataset = dataset.assign_coords( + { + "ens_number": ( + ["ens_number"], + list(range(1, output.shape[0] + 1)), + { + "long_name": "ensemble member", + "standard_name": "realization", + "units": "", + }, + ) + } + ) + dataset[precip_var] = (["ens_number", "time", "y", "x"], output, metadata) + else: + dataset[precip_var] = (["time", "y", "x"], output, metadata) + + return dataset From c2ae9db5a86c3f41e1ff390a27f75c2ab27ad55a Mon Sep 17 00:00:00 2001 From: gjm174 <56946945+gjm174@users.noreply.github.com> Date: Mon, 23 Sep 2024 15:34:40 +0200 Subject: [PATCH 08/65] Added member and time dimension (#432) --- pysteps/xarray_helpers.py | 75 +++++++++++++++++++++++++++++++++------ 1 file changed, 65 insertions(+), 10 deletions(-) diff --git a/pysteps/xarray_helpers.py b/pysteps/xarray_helpers.py index a1049f32f..33ec2f40c 100644 --- a/pysteps/xarray_helpers.py +++ b/pysteps/xarray_helpers.py @@ -83,6 +83,7 @@ def convert_input_to_xarray_dataset( precip: np.ndarray, quality: np.ndarray | None, metadata: dict[str, str | float | None], + startdate: datetime | None = None, ) -> xr.Dataset: """ Read a precip, quality, metadata tuple as returned by the importers @@ -99,6 +100,8 @@ def convert_input_to_xarray_dataset( metadata: dict Metadata dictionary containing the attributes described in the documentation of :py:mod:`pysteps.io.importers`. + startdate: datetime, None + Datetime object containing the start date and time for the nowcast Returns ------- @@ -107,7 +110,31 @@ def convert_input_to_xarray_dataset( """ var_name, attrs = cf_parameters_from_unit(metadata["unit"]) - h, w = precip.shape + + dims = None + timesteps = None + ens_number = None + + if precip.ndim == 4: + ens_number, timesteps, h, w = precip.shape + dims = ["ens_number", "time", "y", "x"] + + if startdate is None: + raise Exception("startdate missing") + + elif precip.ndim == 3: + timesteps, h, w = precip.shape + dims = ["time", "y", "x"] + + if startdate is None: + raise Exception("startdate missing") + + elif precip.ndim == 2: + h, w = precip.shape + dims = ["y", "x"] + else: + raise Exception(f"Precip field shape: {precip.shape} not supported") + x_r = np.linspace(metadata["x1"], metadata["x2"], w + 1)[:-1] x_r += 0.5 * (x_r[1] - x_r[0]) y_r = np.linspace(metadata["y1"], metadata["y2"], h + 1)[:-1] @@ -142,25 +169,33 @@ def convert_input_to_xarray_dataset( data_vars = { var_name: ( - ["y", "x"], + dims, precip, { "units": attrs["units"], "standard_name": attrs["standard_name"], "long_name": attrs["long_name"], "grid_mapping": "projection", - "transform": metadata["transform"], - "accutime": metadata["accutime"], - "threshold": metadata["threshold"], - "zerovalue": metadata["zerovalue"], - "zr_a": metadata["zr_a"], - "zr_b": metadata["zr_b"], }, ) } + + metadata_keys = [ + "transform", + "accutime", + "threshold", + "zerovalue", + "zr_a", + "zr_b", + ] + + for metadata_field in metadata_keys: + if metadata_field in metadata: + data_vars[var_name][2][metadata_field] = metadata[metadata_field] + if quality is not None: data_vars["quality"] = ( - ["y", "x"], + dims, quality, { "units": "1", @@ -210,6 +245,26 @@ def convert_input_to_xarray_dataset( }, ), } + + if ens_number is not None: + coords["ens_number"] = ( + ["ens_number"], + list(range(1, ens_number + 1, 1)), + { + "long_name": "ensemble member", + "standard_name": "realization", + "units": "", + }, + ) + + if timesteps is not None: + startdate_str = datetime.strftime(startdate, "%Y-%m-%d %H:%M:%S") + + coords["time"] = ( + ["time"], + list(range(1, timesteps + 1, 1)), + {"long_name": "forecast time", "units": "seconds since %s" % startdate_str}, + ) if grid_mapping_var_name is not None: coords[grid_mapping_name] = ( [], @@ -223,7 +278,7 @@ def convert_input_to_xarray_dataset( "precip_var": var_name, } dataset = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) - return dataset.sortby(["y", "x"]) + return dataset.sortby(dims) def convert_output_to_xarray_dataset( From 9ea07fc509f99df83e72d7035c873f6c903cb7cf Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Tue, 8 Oct 2024 16:09:34 +0200 Subject: [PATCH 09/65] Initial commit to branch --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6ffe446bf..cb599f234 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/psf/black - rev: 24.8.0 + rev: 24.10.0 hooks: - id: black language_version: python3 From 53a98a90ff1e0e980af8ec6473691c90e6bdb2d1 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Tue, 8 Oct 2024 18:39:40 +0200 Subject: [PATCH 10/65] Working on _apply_noise_and_ar_model method --- pysteps/nowcasts/steps.py | 819 +++++++++++++++++++++++++++----------- 1 file changed, 577 insertions(+), 242 deletions(-) diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index 50c6a5d22..f72ae51c1 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -33,6 +33,583 @@ DASK_IMPORTED = False +class StepsNowcaster: + def __init__(self, precip, velocity, timesteps, **kwargs): + # Store inputs and optional parameters + self.precip = precip + self.velocity = velocity + self.timesteps = timesteps + self.n_ens_members = kwargs.get("n_ens_members", 24) + self.n_cascade_levels = kwargs.get("n_cascade_levels", 6) + self.precip_thr = kwargs.get("precip_thr", None) + self.kmperpixel = kwargs.get("kmperpixel", None) + self.timestep = kwargs.get("timestep", None) + self.extrap_method = kwargs.get("extrap_method", "semilagrangian") + self.decomp_method = kwargs.get("decomp_method", "fft") + self.bandpass_filter_method = kwargs.get("bandpass_filter_method", "gaussian") + self.noise_method = kwargs.get("noise_method", "nonparametric") + self.noise_stddev_adj = kwargs.get("noise_stddev_adj", None) + self.ar_order = kwargs.get("ar_order", 2) + self.vel_pert_method = kwargs.get("vel_pert_method", "bps") + self.conditional = kwargs.get("conditional", False) + self.probmatching_method = kwargs.get("probmatching_method", "cdf") + self.mask_method = kwargs.get("mask_method", "incremental") + self.seed = kwargs.get("seed", None) + self.num_workers = kwargs.get("num_workers", 1) + self.fft_method = kwargs.get("fft_method", "numpy") + self.domain = kwargs.get("domain", "spatial") + self.extrap_kwargs = kwargs.get("extrap_kwargs", None) + self.filter_kwargs = kwargs.get("filter_kwargs", None) + self.noise_kwargs = kwargs.get("noise_kwargs", None) + self.vel_pert_kwargs = kwargs.get("vel_pert_kwargs", None) + self.mask_kwargs = kwargs.get("mask_kwargs", None) + self.measure_time = kwargs.get("measure_time", False) + self.callback = kwargs.get("callback", None) + self.return_output = kwargs.get("return_output", True) + + # Additional variables for internal state management + self.fft = None + self.bp_filter = None + self.extrapolator_method = None + self.domain_mask = None + self.precip_cascades = None + self.gamma = None + self.phi = None + self.pert_gen = None + self.noise_std_coeffs = None + self.randgen_prec = None + self.randgen_motion = None + self.velocity_perturbators = None + self.precip_forecast = None + self.mask_prec = None + self.mask_thr = None + + # Initialize number of ensemble workers + self.num_ensemble_workers = min(self.n_ens_members, self.num_workers) + + def compute_forecast(self): + """ + Main loop for nowcast ensemble generation. This handles extrapolation, + noise application, autoregressive modeling, and recomposition of cascades. + """ + self._check_inputs() + self._print_forecast_info() + # Measure time for initialization + if self.measure_time: + start_time_init = time.time() + + self._initialize_nowcast_components() + # Slice the precipitation field to only use the last ar_order + 1 fields + self.precip = self.precip[-(self.ar_order + 1) :, :, :].copy() + # Measure and print initialization time + if self.measure_time: + self._measure_time("Initialization", start_time_init) + + self._perform_extrapolation() + + self._apply_noise_and_ar_model() + + # Main forecasting loop for each timestep + for t in range(len(self.timesteps)): + # Measure time for each timestep + if self.measure_time: + start_time_loop = time.time() + + # Apply noise and autoregressive model + self._apply_noise_and_ar_model() + + # Recompose the cascades into forecast fields + self._recompose_cascades() + + # Optionally apply a mask if required + if self.mask_method: + self._apply_mask() + + # Measure and print time taken for each timestep + if self.measure_time: + self._measure_time(f"Timestep {t}", start_time_loop) + + print("Forecasting complete.") + + def _check_inputs(self): + """ + Validate the inputs to ensure consistency and correct shapes. + """ + + if self.precip.ndim != 3: + raise ValueError("precip must be a three-dimensional array") + if self.precip.shape[0] < self.ar_order + 1: + raise ValueError( + f"precip.shape[0] must be at least ar_order+1, " + f"but found {self.precip.shape[0]}" + ) + if self.velocity.ndim != 3: + raise ValueError("velocity must be a three-dimensional array") + if self.precip.shape[1:3] != self.velocity.shape[1:3]: + raise ValueError( + f"Dimension mismatch between precip and velocity: " + f"shape(precip)={self.precip.shape}, shape(velocity)={self.velocity.shape}" + ) + if ( + isinstance(self.timesteps, list) + and not sorted(self.timesteps) == self.timesteps + ): + raise ValueError("timesteps must be in ascending order") + if np.any(~np.isfinite(self.velocity)): + raise ValueError("velocity contains non-finite values") + if self.mask_method not in ["obs", "sprog", "incremental", None]: + raise ValueError( + f"Unknown mask method '{self.mask_method}'. " + "Must be 'obs', 'sprog', 'incremental', or None." + ) + if self.precip_thr is None: + if self.conditional: + raise ValueError("conditional=True but precip_thr is not specified.") + if self.mask_method is not None: + raise ValueError("mask_method is set but precip_thr is not specified.") + if self.probmatching_method == "mean": + raise ValueError( + "probmatching_method='mean' but precip_thr is not specified." + ) + if self.noise_method is not None and self.noise_stddev_adj == "auto": + raise ValueError( + "noise_stddev_adj='auto' but precip_thr is not specified." + ) + if self.noise_stddev_adj not in ["auto", "fixed", None]: + raise ValueError( + f"Unknown noise_stddev_adj method '{self.noise_stddev_adj}'. " + "Must be 'auto', 'fixed', or None." + ) + if self.kmperpixel is None: + if self.vel_pert_method is not None: + raise ValueError("vel_pert_method is set but kmperpixel=None") + if self.mask_method == "incremental": + raise ValueError("mask_method='incremental' but kmperpixel=None") + if self.timestep is None: + if self.vel_pert_method is not None: + raise ValueError("vel_pert_method is set but timestep=None") + if self.mask_method == "incremental": + raise ValueError("mask_method='incremental' but timestep=None") + + # Handle None values for various kwargs + if self.extrap_kwargs is None: + self.extrap_kwargs = {} + if self.filter_kwargs is None: + self.filter_kwargs = {} + if self.noise_kwargs is None: + self.noise_kwargs = {} + if self.vel_pert_kwargs is None: + self.vel_pert_kwargs = {} + if self.mask_kwargs is None: + self.mask_kwargs = {} + + print("Inputs validated and initialized successfully.") + + def _print_forecast_info(self): + """ + Print information about the forecast setup, including inputs, methods, and parameters. + """ + print("Computing STEPS nowcast") + print("-----------------------") + print("") + + print("Inputs") + print("------") + print(f"input dimensions: {self.precip.shape[1]}x{self.precip.shape[2]}") + if self.kmperpixel is not None: + print(f"km/pixel: {self.kmperpixel}") + if self.timestep is not None: + print(f"time step: {self.timestep} minutes") + print("") + + print("Methods") + print("-------") + print(f"extrapolation: {self.extrap_method}") + print(f"bandpass filter: {self.bandpass_filter_method}") + print(f"decomposition: {self.decomp_method}") + print(f"noise generator: {self.noise_method}") + print( + "noise adjustment: {}".format( + ("yes" if self.noise_stddev_adj else "no") + ) + ) + print(f"velocity perturbator: {self.vel_pert_method}") + print( + "conditional statistics: {}".format(("yes" if self.conditional else "no")) + ) + print(f"precip. mask method: {self.mask_method}") + print(f"probability matching: {self.probmatching_method}") + print(f"FFT method: {self.fft_method}") + print(f"domain: {self.domain}") + print("") + + print("Parameters") + print("----------") + if isinstance(self.timesteps, int): + print(f"number of time steps: {self.timesteps}") + else: + print(f"time steps: {self.timesteps}") + print(f"ensemble size: {self.n_ens_members}") + print(f"parallel threads: {self.num_workers}") + print(f"number of cascade levels: {self.n_cascade_levels}") + print(f"order of the AR(p) model: {self.ar_order}") + + if self.vel_pert_method == "bps": + vp_par = self.vel_pert_kwargs.get( + "p_par", noise.motion.get_default_params_bps_par() + ) + vp_perp = self.vel_pert_kwargs.get( + "p_perp", noise.motion.get_default_params_bps_perp() + ) + print( + f"velocity perturbations, parallel: {vp_par[0]},{vp_par[1]},{vp_par[2]}" + ) + print( + f"velocity perturbations, perpendicular: {vp_perp[0]},{vp_perp[1]},{vp_perp[2]}" + ) + + if self.precip_thr is not None: + print(f"precip. intensity threshold: {self.precip_thr}") + + def _initialize_nowcast_components(self): + """ + Initialize the FFT, bandpass filters, decomposition methods, and extrapolation method. + """ + M, N = self.precip.shape[1:] # Extract the spatial dimensions (height, width) + + # Initialize FFT method + self.fft = utils.get_method( + self.fft_method, shape=(M, N), n_threads=self.num_workers + ) + + # Initialize the band-pass filter for the cascade decomposition + filter_method = cascade.get_method(self.bandpass_filter_method) + self.bp_filter = filter_method( + (M, N), self.n_cascade_levels, **(self.filter_kwargs or {}) + ) + + # Get the decomposition method (e.g., FFT) + self.decomp_method, self.recomp_method = cascade.get_method(self.decomp_method) + + # Get the extrapolation method (e.g., semilagrangian) + self.extrapolator_method = extrapolation.get_method(self.extrap_method) + + # Generate the mesh grid for spatial coordinates + x_values, y_values = np.meshgrid(np.arange(N), np.arange(M)) + self.xy_coords = np.stack([x_values, y_values]) + + # Determine the domain mask from non-finite values in the precipitation data + self.domain_mask = np.logical_or.reduce( + [~np.isfinite(self.precip[i, :]) for i in range(self.precip.shape[0])] + ) + + print("Nowcast components initialized successfully.") + + def _perform_extrapolation(self): + """ + Extrapolate (advect) precipitation fields based on the velocity field to align + them in time. This prepares the precipitation fields for autoregressive modeling. + """ + # Determine the precipitation threshold mask if conditional is set + if self.conditional: + self.mask_thr = np.logical_and.reduce( + [ + self.precip[i, :, :] >= self.precip_thr + for i in range(self.precip.shape[0]) + ] + ) + else: + self.mask_thr = None + + extrap_kwargs = self.extrap_kwargs.copy() + extrap_kwargs["xy_coords"] = self.xy_coords + extrap_kwargs["allow_nonfinite_values"] = ( + True if np.any(~np.isfinite(self.precip)) else False + ) + + res = [] + + def _extrapolate_single_field(precip, i): + # Extrapolate a single precipitation field using the velocity field + return self.extrapolator_method( + precip[i, :, :], + self.velocity, + self.ar_order - i, + "min", + **extrap_kwargs, + )[-1] + + for i in range(self.ar_order): + if ( + not DASK_IMPORTED + ): # If Dask is not available, perform sequential extrapolation + self.precip[i, :, :] = _extrapolate_single_field(self.precip, i) + else: + # If Dask is available, accumulate delayed computations for parallel execution + res.append(dask.delayed(_extrapolate_single_field)(self.precip, i)) + + # If Dask is available, perform the parallel computation + if DASK_IMPORTED and res: + num_workers_ = min(self.num_ensemble_workers, len(res)) + self.precip = np.stack( + list(dask.compute(*res, num_workers=num_workers_)) + + [self.precip[-1, :, :]] + ) + + print("Extrapolation complete and precipitation fields aligned.") + + def _apply_noise_and_ar_model(self): + """ + Apply noise and autoregressive (AR) models to precipitation cascades. + This method applies the AR model to the decomposed precipitation cascades + and adds noise perturbations if necessary. + """ + # Make a copy of the precipitation data and replace non-finite values + self.precip = self.precip.copy() + for i in range(self.precip.shape[0]): + # Replace non-finite values with the minimum finite value of the precipitation field + self.precip[i, ~np.isfinite(self.precip[i, :])] = np.nanmin( + self.precip[i, :] + ) + + # Initialize the noise generator if the noise_method is provided + if self.noise_method is not None: + np.random.seed(self.seed) # Set the random seed for reproducibility + init_noise, generate_noise = noise.get_method( + self.noise_method + ) # Get noise methods + + # Initialize the perturbation generator for the precipitation field + self.pert_gen = init_noise( + self.precip, fft_method=self.fft, **self.noise_kwargs + ) + + # Handle noise standard deviation adjustments if necessary + if self.noise_stddev_adj == "auto": + print("Computing noise adjustment coefficients... ", end="", flush=True) + if self.measure_time: + starttime = time.time() + + # Compute noise adjustment coefficients + self.noise_std_coeffs = noise.utils.compute_noise_stddev_adjs( + self.precip[-1, :, :], + self.precip_thr, + np.min(self.precip), + self.bp_filter, + self.decomp_method, + self.pert_gen, + generate_noise, + 20, + conditional=self.conditional, + num_workers=self.num_workers, + seed=self.seed, + ) + + # Measure and print time taken + if self.measure_time: + self._measure_time( + "Noise adjustment coefficient computation", starttime + ) + else: + print("done.") + + elif self.noise_stddev_adj == "fixed": + # Set fixed noise adjustment coefficients + func = lambda k: 1.0 / (0.75 + 0.09 * k) + self.noise_std_coeffs = [ + func(k) for k in range(1, self.n_cascade_levels + 1) + ] + + else: + # Default to no adjustment + self.noise_std_coeffs = np.ones(self.n_cascade_levels) + + if self.noise_stddev_adj is not None: + # Print noise std deviation coefficients if adjustments were made + print(f"noise std. dev. coeffs: {str(self.noise_std_coeffs)}") + + else: + # No noise, so set perturbation generator and noise_std_coeffs to None + self.pert_gen = None + self.noise_std_coeffs = np.ones( + self.n_cascade_levels + ) # Keep default as 1.0 to avoid breaking AR model + # TODO: The following parts of the method are not yet fully checked compared to the original + + # Decompose the input precipitation fields + precip_decomp = [] + for i in range(self.ar_order + 1): + precip_ = self.decomp_method( + self.precip[i, :, :], + self.bp_filter, + mask=self.mask_thr, + fft_method=self.fft, + output_domain=self.domain, + normalize=True, + compute_stats=True, + compact_output=True, + ) + precip_decomp.append(precip_) + + # Normalize the cascades and rearrange them into a 4D array + self.precip_cascades = nowcast_utils.stack_cascades( + precip_decomp, self.n_cascade_levels + ) + precip_decomp = precip_decomp[-1] + precip_decomp = [precip_decomp.copy() for _ in range(self.n_ens_members)] + + # Compute temporal autocorrelation coefficients for each cascade level + self.gamma = np.empty((self.n_cascade_levels, self.ar_order)) + for i in range(self.n_cascade_levels): + self.gamma[i, :] = correlation.temporal_autocorrelation( + self.precip_cascades[i], mask=self.mask_thr + ) + + nowcast_utils.print_corrcoefs(self.gamma) + + # Adjust the lag-2 correlation coefficient if AR(2) model is used + if self.ar_order == 2: + for i in range(self.n_cascade_levels): + self.gamma[i, 1] = autoregression.adjust_lag2_corrcoef2( + self.gamma[i, 0], self.gamma[i, 1] + ) + + # Estimate the parameters of the AR model using autocorrelation coefficients + self.phi = np.empty((self.n_cascade_levels, self.ar_order + 1)) + for i in range(self.n_cascade_levels): + self.phi[i, :] = autoregression.estimate_ar_params_yw(self.gamma[i, :]) + + nowcast_utils.print_ar_params(self.phi) + + # Discard all except the last ar_order cascades for AR model + self.precip_cascades = [ + self.precip_cascades[i][-self.ar_order :] + for i in range(self.n_cascade_levels) + ] + + # Stack the cascades into a list containing all ensemble members + self.precip_cascades = [ + [self.precip_cascades[j].copy() for j in range(self.n_cascade_levels)] + for _ in range(self.n_ens_members) + ] + + # Initialize random generators if noise_method is provided + if self.noise_method is not None: + self.randgen_prec = [] + for _ in range(self.n_ens_members): + rs = np.random.RandomState(self.seed) + self.randgen_prec.append(rs) + self.seed = rs.randint(0, high=int(1e9)) + else: + self.randgen_prec = None + + print("AR model and noise applied to precipitation cascades.") + + def _recompose_cascades(self): + """ + Recompose the cascades into the final precipitation forecast fields. + """ + # Logic to recompose cascades back into forecast fields + pass + + def _apply_mask(self): + """ + Apply the precipitation mask (if applicable) to the forecast fields. + """ + # Logic for applying the mask based on the mask method + pass + + def _update_state(self): + """ + Update the internal state of the nowcasting system after each loop iteration. + """ + # Logic to handle updates to internal state (e.g., for velocity, precipitation, etc.) + pass + + def _measure_time(self, label, start_time): + """ + Measure and print the time taken for a specific part of the process. + + Parameters: + - label: A description of the part of the process being measured. + - start_time: The timestamp when the process started (from time.time()). + """ + if self.measure_time: + elapsed_time = time.time() - start_time + print(f"{label} took {elapsed_time:.2f} seconds.") + + +# Wrapper function to preserve backward compatibility +def forecast( + precip, + velocity, + timesteps, + n_ens_members=24, + n_cascade_levels=6, + precip_thr=None, + kmperpixel=None, + timestep=None, + extrap_method="semilagrangian", + decomp_method="fft", + bandpass_filter_method="gaussian", + noise_method="nonparametric", + noise_stddev_adj=None, + ar_order=2, + vel_pert_method="bps", + conditional=False, + probmatching_method="cdf", + mask_method="incremental", + seed=None, + num_workers=1, + fft_method="numpy", + domain="spatial", + extrap_kwargs=None, + filter_kwargs=None, + noise_kwargs=None, + vel_pert_kwargs=None, + mask_kwargs=None, + measure_time=False, + callback=None, + return_output=True, +): + # Create an instance of the new class with all the provided arguments + nowcaster = StepsNowcaster( + precip, + velocity, + timesteps, + n_ens_members=n_ens_members, + n_cascade_levels=n_cascade_levels, + precip_thr=precip_thr, + kmperpixel=kmperpixel, + timestep=timestep, + extrap_method=extrap_method, + decomp_method=decomp_method, + bandpass_filter_method=bandpass_filter_method, + noise_method=noise_method, + noise_stddev_adj=noise_stddev_adj, + ar_order=ar_order, + vel_pert_method=vel_pert_method, + conditional=conditional, + probmatching_method=probmatching_method, + mask_method=mask_method, + seed=seed, + num_workers=num_workers, + fft_method=fft_method, + domain=domain, + extrap_kwargs=extrap_kwargs, + filter_kwargs=filter_kwargs, + noise_kwargs=noise_kwargs, + vel_pert_kwargs=vel_pert_kwargs, + mask_kwargs=mask_kwargs, + measure_time=measure_time, + callback=callback, + return_output=return_output, + ) + + # Call the appropriate methods within the class + return nowcaster.compute_forecast() + + @deprecate_args({"R": "precip", "V": "velocity", "R_thr": "precip_thr"}, "1.8.0") def forecast( precip, @@ -261,232 +838,6 @@ def forecast( :cite:`Seed2003`, :cite:`BPS2006`, :cite:`SPN2013`, :cite:`PCH2019b` """ - _check_inputs(precip, velocity, timesteps, ar_order) - - if extrap_kwargs is None: - extrap_kwargs = dict() - - if filter_kwargs is None: - filter_kwargs = dict() - - if noise_kwargs is None: - noise_kwargs = dict() - - if vel_pert_kwargs is None: - vel_pert_kwargs = dict() - - if mask_kwargs is None: - mask_kwargs = dict() - - if np.any(~np.isfinite(velocity)): - raise ValueError("velocity contains non-finite values") - - if mask_method not in ["obs", "sprog", "incremental", None]: - raise ValueError( - "unknown mask method %s: must be 'obs', 'sprog' or 'incremental' or None" - % mask_method - ) - - if precip_thr is None: - if conditional: - raise ValueError("conditional = True but precip_thr not specified") - - if mask_method is not None: - raise ValueError("mask_method is not None but precip_thr not specified") - - if probmatching_method == "mean": - raise ValueError( - "probmatching_method = 'mean' but precip_thr not specified" - ) - - if noise_method is not None and noise_stddev_adj == "auto": - raise ValueError("noise_stddev_adj = 'auto' but precip_thr not specified") - - if noise_stddev_adj not in ["auto", "fixed", None]: - raise ValueError( - "unknown noise_std_dev_adj method %s: must be 'auto', 'fixed', or None" - % noise_stddev_adj - ) - - if kmperpixel is None: - if vel_pert_method is not None: - raise ValueError("vel_pert_method is set but kmperpixel=None") - if mask_method == "incremental": - raise ValueError("mask_method='incremental' but kmperpixel=None") - - if timestep is None: - if vel_pert_method is not None: - raise ValueError("vel_pert_method is set but timestep=None") - if mask_method == "incremental": - raise ValueError("mask_method='incremental' but timestep=None") - - print("Computing STEPS nowcast") - print("-----------------------") - print("") - - print("Inputs") - print("------") - print(f"input dimensions: {precip.shape[1]}x{precip.shape[2]}") - if kmperpixel is not None: - print(f"km/pixel: {kmperpixel}") - if timestep is not None: - print(f"time step: {timestep} minutes") - print("") - - print("Methods") - print("-------") - print(f"extrapolation: {extrap_method}") - print(f"bandpass filter: {bandpass_filter_method}") - print(f"decomposition: {decomp_method}") - print(f"noise generator: {noise_method}") - print("noise adjustment: {}".format(("yes" if noise_stddev_adj else "no"))) - print(f"velocity perturbator: {vel_pert_method}") - print("conditional statistics: {}".format(("yes" if conditional else "no"))) - print(f"precip. mask method: {mask_method}") - print(f"probability matching: {probmatching_method}") - print(f"FFT method: {fft_method}") - print(f"domain: {domain}") - print("") - - print("Parameters") - print("----------") - if isinstance(timesteps, int): - print(f"number of time steps: {timesteps}") - else: - print(f"time steps: {timesteps}") - print(f"ensemble size: {n_ens_members}") - print(f"parallel threads: {num_workers}") - print(f"number of cascade levels: {n_cascade_levels}") - print(f"order of the AR(p) model: {ar_order}") - if vel_pert_method == "bps": - vp_par = vel_pert_kwargs.get("p_par", noise.motion.get_default_params_bps_par()) - vp_perp = vel_pert_kwargs.get( - "p_perp", noise.motion.get_default_params_bps_perp() - ) - print( - f"velocity perturbations, parallel: {vp_par[0]},{vp_par[1]},{vp_par[2]}" - ) - print( - f"velocity perturbations, perpendicular: {vp_perp[0]},{vp_perp[1]},{vp_perp[2]}" - ) - - if precip_thr is not None: - print(f"precip. intensity threshold: {precip_thr}") - - num_ensemble_workers = min(n_ens_members, num_workers) - - if measure_time: - starttime_init = time.time() - - fft = utils.get_method(fft_method, shape=precip.shape[1:], n_threads=num_workers) - - M, N = precip.shape[1:] - - # initialize the band-pass filter - filter_method = cascade.get_method(bandpass_filter_method) - bp_filter = filter_method((M, N), n_cascade_levels, **filter_kwargs) - - decomp_method, recomp_method = cascade.get_method(decomp_method) - - extrapolator_method = extrapolation.get_method(extrap_method) - - x_values, y_values = np.meshgrid( - np.arange(precip.shape[2]), np.arange(precip.shape[1]) - ) - - xy_coords = np.stack([x_values, y_values]) - - precip = precip[-(ar_order + 1) :, :, :].copy() - - # determine the domain mask from non-finite values - domain_mask = np.logical_or.reduce( - [~np.isfinite(precip[i, :]) for i in range(precip.shape[0])] - ) - - # determine the precipitation threshold mask - if conditional: - mask_thr = np.logical_and.reduce( - [precip[i, :, :] >= precip_thr for i in range(precip.shape[0])] - ) - else: - mask_thr = None - - # advect the previous precipitation fields to the same position with the - # most recent one (i.e. transform them into the Lagrangian coordinates) - extrap_kwargs = extrap_kwargs.copy() - extrap_kwargs["xy_coords"] = xy_coords - extrap_kwargs["allow_nonfinite_values"] = ( - True if np.any(~np.isfinite(precip)) else False - ) - - res = list() - - def f(precip, i): - return extrapolator_method( - precip[i, :, :], velocity, ar_order - i, "min", **extrap_kwargs - )[-1] - - for i in range(ar_order): - if not DASK_IMPORTED: - precip[i, :, :] = f(precip, i) - else: - res.append(dask.delayed(f)(precip, i)) - - if DASK_IMPORTED: - num_workers_ = len(res) if num_workers > len(res) else num_workers - precip = np.stack( - list(dask.compute(*res, num_workers=num_workers_)) + [precip[-1, :, :]] - ) - - # replace non-finite values with the minimum value - precip = precip.copy() - for i in range(precip.shape[0]): - precip[i, ~np.isfinite(precip[i, :])] = np.nanmin(precip[i, :]) - - if noise_method is not None: - np.random.seed(seed) - # get methods for perturbations - init_noise, generate_noise = noise.get_method(noise_method) - - # initialize the perturbation generator for the precipitation field - pert_gen = init_noise(precip, fft_method=fft, **noise_kwargs) - - if noise_stddev_adj == "auto": - print("Computing noise adjustment coefficients... ", end="", flush=True) - if measure_time: - starttime = time.time() - - precip_min = np.min(precip) - noise_std_coeffs = noise.utils.compute_noise_stddev_adjs( - precip[-1, :, :], - precip_thr, - precip_min, - bp_filter, - decomp_method, - pert_gen, - generate_noise, - 20, - conditional=True, - num_workers=num_workers, - seed=seed, - ) - - if measure_time: - print(f"{time.time() - starttime:.2f} seconds.") - else: - print("done.") - elif noise_stddev_adj == "fixed": - func = lambda k: 1.0 / (0.75 + 0.09 * k) - noise_std_coeffs = [func(k) for k in range(1, n_cascade_levels + 1)] - else: - noise_std_coeffs = np.ones(n_cascade_levels) - - if noise_stddev_adj is not None: - print(f"noise std. dev. coeffs: {str(noise_std_coeffs)}") - else: - pert_gen = None - noise_std_coeffs = None - # compute the cascade decompositions of the input precipitation fields precip_decomp = [] for i in range(ar_order + 1): @@ -691,22 +1042,6 @@ def f(precip, i): return None -def _check_inputs(precip, velocity, timesteps, ar_order): - if precip.ndim != 3: - raise ValueError("precip must be a three-dimensional array") - if precip.shape[0] < ar_order + 1: - raise ValueError("precip.shape[0] < ar_order+1") - if velocity.ndim != 3: - raise ValueError("velocity must be a three-dimensional array") - if precip.shape[1:3] != velocity.shape[1:3]: - raise ValueError( - "dimension mismatch between precip and velocity: shape(precip)=%s, shape(velocity)=%s" - % (str(precip.shape), str(velocity.shape)) - ) - if isinstance(timesteps, list) and not sorted(timesteps) == timesteps: - raise ValueError("timesteps is not in ascending order") - - def _update(state, params): precip_forecast_out = [None] * params["n_ens_members"] From 0c5185f02176ac5b7abf1fbdf6a6cef3ec29e352 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Tue, 8 Oct 2024 23:03:39 +0200 Subject: [PATCH 11/65] Only update function needs to be added --- pysteps/nowcasts/steps.py | 557 +++++++++++++++++--------------------- 1 file changed, 246 insertions(+), 311 deletions(-) diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index f72ae51c1..a594b6e63 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -83,6 +83,16 @@ def __init__(self, precip, velocity, timesteps, **kwargs): self.precip_forecast = None self.mask_prec = None self.mask_thr = None + self.precip_decomp = None + self.vp_par = None + self.vp_perp = None + self.fft_objs = None + self.generate_noise = None + + # Additional variables for time measurement + self.start_time_init = None + self.init_time = None + self.mainloop_time = None # Initialize number of ensemble workers self.num_ensemble_workers = min(self.n_ens_members, self.num_workers) @@ -96,40 +106,73 @@ def compute_forecast(self): self._print_forecast_info() # Measure time for initialization if self.measure_time: - start_time_init = time.time() + self.start_time_init = time.time() self._initialize_nowcast_components() # Slice the precipitation field to only use the last ar_order + 1 fields self.precip = self.precip[-(self.ar_order + 1) :, :, :].copy() - # Measure and print initialization time - if self.measure_time: - self._measure_time("Initialization", start_time_init) self._perform_extrapolation() - self._apply_noise_and_ar_model() + self._initialize_velocity_perturbators() + self._initialize_precipitation_mask() + self._initialize_fft_objects() + # Measure and print initialization time + if self.measure_time: + self._measure_time("Initialization", self.start_time_init) - # Main forecasting loop for each timestep - for t in range(len(self.timesteps)): - # Measure time for each timestep - if self.measure_time: - start_time_loop = time.time() - - # Apply noise and autoregressive model - self._apply_noise_and_ar_model() - - # Recompose the cascades into forecast fields - self._recompose_cascades() + # RUn the main nowcast loop + self._nowcast_main() - # Optionally apply a mask if required - if self.mask_method: - self._apply_mask() + if self.measure_time: + self.precip_forecast, self.mainloop_time = self.precip_forecast - # Measure and print time taken for each timestep + # Stack and return the forecast output + if self.return_output: + self.precip_forecast = np.stack( + [np.stack(self.precip_forecast[j]) for j in range(self.n_ens_members)] + ) if self.measure_time: - self._measure_time(f"Timestep {t}", start_time_loop) + return self.precip_forecast, self.init_time, self.mainloop_time + else: + return self.precip_forecast + else: + return None - print("Forecasting complete.") + def _nowcast_main(self): + """ + Main nowcast loop that iterates through the ensemble members and time steps + to generate forecasts. + """ + # Prepare state and params dictionaries + state = self._initialize_state() + params = self._initialize_params(self.precip) + + # Isolate the last time slice of precipitation + self.precip = self.precip[ + -1, :, : + ] # Extract the last available precipitation field + + print("Starting nowcast computation.") + + # Run the nowcast main loop + self.precip_forecast = nowcast_main_loop( + self.precip, + self.velocity, + state, + self.timesteps, + self.extrap_method, + self._update_state, # Reference to the update function + extrap_kwargs=self.extrap_kwargs, + velocity_pert_gen=self.velocity_perturbators, + params=params, + ensemble=True, + num_ensemble_members=self.n_ens_members, + callback=self.callback, + return_output=self.return_output, + num_workers=self.num_ensemble_workers, + measure_time=self.measure_time, + ) def _check_inputs(self): """ @@ -255,17 +298,17 @@ def _print_forecast_info(self): print(f"order of the AR(p) model: {self.ar_order}") if self.vel_pert_method == "bps": - vp_par = self.vel_pert_kwargs.get( + self.vp_par = self.vel_pert_kwargs.get( "p_par", noise.motion.get_default_params_bps_par() ) - vp_perp = self.vel_pert_kwargs.get( + self.vp_perp = self.vel_pert_kwargs.get( "p_perp", noise.motion.get_default_params_bps_perp() ) print( - f"velocity perturbations, parallel: {vp_par[0]},{vp_par[1]},{vp_par[2]}" + f"velocity perturbations, parallel: {self.vp_par[0]},{self.vp_par[1]},{self.vp_par[2]}" ) print( - f"velocity perturbations, perpendicular: {vp_perp[0]},{vp_perp[1]},{vp_perp[2]}" + f"velocity perturbations, perpendicular: {self.vp_perp[0]},{self.vp_perp[1]},{self.vp_perp[2]}" ) if self.precip_thr is not None: @@ -375,7 +418,7 @@ def _apply_noise_and_ar_model(self): # Initialize the noise generator if the noise_method is provided if self.noise_method is not None: np.random.seed(self.seed) # Set the random seed for reproducibility - init_noise, generate_noise = noise.get_method( + init_noise, self.generate_noise = noise.get_method( self.noise_method ) # Get noise methods @@ -398,7 +441,7 @@ def _apply_noise_and_ar_model(self): self.bp_filter, self.decomp_method, self.pert_gen, - generate_noise, + self.generate_noise, 20, conditional=self.conditional, num_workers=self.num_workers, @@ -434,10 +477,9 @@ def _apply_noise_and_ar_model(self): self.noise_std_coeffs = np.ones( self.n_cascade_levels ) # Keep default as 1.0 to avoid breaking AR model - # TODO: The following parts of the method are not yet fully checked compared to the original # Decompose the input precipitation fields - precip_decomp = [] + self.precip_decomp = [] for i in range(self.ar_order + 1): precip_ = self.decomp_method( self.precip[i, :, :], @@ -449,14 +491,16 @@ def _apply_noise_and_ar_model(self): compute_stats=True, compact_output=True, ) - precip_decomp.append(precip_) + self.precip_decomp.append(precip_) # Normalize the cascades and rearrange them into a 4D array self.precip_cascades = nowcast_utils.stack_cascades( - precip_decomp, self.n_cascade_levels + self.precip_decomp, self.n_cascade_levels ) - precip_decomp = precip_decomp[-1] - precip_decomp = [precip_decomp.copy() for _ in range(self.n_ens_members)] + self.precip_decomp = self.precip_decomp[-1] + self.precip_decomp = [ + self.precip_decomp.copy() for _ in range(self.n_ens_members) + ] # Compute temporal autocorrelation coefficients for each cascade level self.gamma = np.empty((self.n_cascade_levels, self.ar_order)) @@ -496,34 +540,161 @@ def _apply_noise_and_ar_model(self): # Initialize random generators if noise_method is provided if self.noise_method is not None: self.randgen_prec = [] + self.randgen_motion = [] + for _ in range(self.n_ens_members): + # Create random state for precipitation noise generator rs = np.random.RandomState(self.seed) self.randgen_prec.append(rs) - self.seed = rs.randint(0, high=int(1e9)) + self.seed = rs.randint(0, high=int(1e9)) # Update seed after generating + + # Create random state for motion perturbations generator + rs = np.random.RandomState(self.seed) + self.randgen_motion.append(rs) + self.seed = rs.randint(0, high=int(1e9)) # Update seed after generating else: self.randgen_prec = None - + self.randgen_motion = None print("AR model and noise applied to precipitation cascades.") - def _recompose_cascades(self): + def _initialize_velocity_perturbators(self): """ - Recompose the cascades into the final precipitation forecast fields. + Initialize the velocity perturbators for each ensemble member if the velocity + perturbation method is specified. """ - # Logic to recompose cascades back into forecast fields - pass + if self.vel_pert_method is not None: + init_vel_noise, generate_vel_noise = noise.get_method(self.vel_pert_method) + + self.velocity_perturbators = [] + for j in range(self.n_ens_members): + kwargs = { + "randstate": self.randgen_motion[j], + "p_par": self.vel_pert_kwargs.get("p_par", self.vp_par), + "p_perp": self.vel_pert_kwargs.get("p_perp", self.vp_perp), + } + vp = init_vel_noise( + self.velocity, 1.0 / self.kmperpixel, self.timestep, **kwargs + ) + self.velocity_perturbators.append( + lambda t, vp=vp: generate_vel_noise(vp, t * self.timestep) + ) + else: + self.velocity_perturbators = None + print("Velocity perturbations initialized successfully.") - def _apply_mask(self): + def _initialize_precipitation_mask(self): """ - Apply the precipitation mask (if applicable) to the forecast fields. + Initialize the precipitation mask and handle different mask methods (sprog, incremental). """ - # Logic for applying the mask based on the mask method - pass + self.precip_forecast = [[] for _ in range(self.n_ens_members)] - def _update_state(self): + if self.probmatching_method == "mean": + self.mu_0 = np.mean( + self.precip[-1, :, :][self.precip[-1, :, :] >= self.precip_thr] + ) + else: + self.mu_0 = None + + self.precip_m = None + self.precip_m_d = None + self.war = None + self.struct = None + self.mask_rim = None + + if self.mask_method is not None: + self.mask_prec = self.precip[-1, :, :] >= self.precip_thr + + if self.mask_method == "sprog": + # Compute the wet area ratio and the precipitation mask + self.war = np.sum(self.mask_prec) / ( + self.precip.shape[1] * self.precip.shape[2] + ) + self.precip_m = [ + self.precip_cascades[0][i].copy() + for i in range(self.n_cascade_levels) + ] + self.precip_m_d = self.precip_decomp[0].copy() + + elif self.mask_method == "incremental": + # Get mask parameters + self.mask_rim = self.mask_kwargs.get("mask_rim", 10) + mask_f = self.mask_kwargs.get("mask_f", 1.0) + # Initialize the structuring element + self.struct = generate_binary_structure(2, 1) + # Expand the structuring element based on mask factor and timestep + n = mask_f * self.timestep / self.kmperpixel + self.struct = iterate_structure(self.struct, int((n - 1) / 2.0)) + # Compute and apply the dilated mask for each ensemble member + self.mask_prec = nowcast_utils.compute_dilated_mask( + self.mask_prec, self.struct, self.mask_rim + ) + self.mask_prec = [ + self.mask_prec.copy() for _ in range(self.n_ens_members) + ] + else: + self.mask_prec = None + + if self.noise_method is None and self.precip_m is None: + self.precip_m = [ + self.precip_cascades[0][i].copy() for i in range(self.n_cascade_levels) + ] + print("Precipitation mask initialized successfully.") + + def _initialize_fft_objects(self): """ - Update the internal state of the nowcasting system after each loop iteration. + Initialize FFT objects for each ensemble member. + """ + self.fft_objs = [] + for _ in range(self.n_ens_members): + fft_obj = utils.get_method(self.fft_method, shape=self.precip.shape[1:]) + self.fft_objs.append(fft_obj) + print("FFT objects initialized successfully.") + + def _initialize_state(self): """ - # Logic to handle updates to internal state (e.g., for velocity, precipitation, etc.) + Initialize the state dictionary used during the nowcast iteration. + """ + return { + "fft_objs": self.fft_objs, + "mask_prec": self.mask_prec, + "precip_cascades": self.precip_cascades, + "precip_decomp": self.precip_decomp, + "precip_m": self.precip_m, + "precip_m_d": self.precip_m_d, + "randgen_prec": self.randgen_prec, + } + + def _initialize_params(self, precip): + """ + Initialize the params dictionary used during the nowcast iteration. + """ + return { + "decomp_method": self.decomp_method, + "domain": self.domain, + "domain_mask": self.domain_mask, + "filter": self.bp_filter, + "fft": self.fft, + "generate_noise": self.generate_noise, + "mask_method": self.mask_method, + "mask_rim": self.mask_rim, + "mu_0": self.mu_0, + "n_cascade_levels": self.n_cascade_levels, + "n_ens_members": self.n_ens_members, + "noise_method": self.noise_method, + "noise_std_coeffs": self.noise_std_coeffs, + "num_ensemble_workers": self.num_ensemble_workers, + "phi": self.phi, + "pert_gen": self.pert_gen, + "probmatching_method": self.probmatching_method, + "precip": precip, + "precip_thr": self.precip_thr, + "recomp_method": self.recomp_method, + "struct": self.struct, + "war": self.war, + } + + def _update_state(self): + # TODO pass def _measure_time(self, label, start_time): @@ -540,76 +711,6 @@ def _measure_time(self, label, start_time): # Wrapper function to preserve backward compatibility -def forecast( - precip, - velocity, - timesteps, - n_ens_members=24, - n_cascade_levels=6, - precip_thr=None, - kmperpixel=None, - timestep=None, - extrap_method="semilagrangian", - decomp_method="fft", - bandpass_filter_method="gaussian", - noise_method="nonparametric", - noise_stddev_adj=None, - ar_order=2, - vel_pert_method="bps", - conditional=False, - probmatching_method="cdf", - mask_method="incremental", - seed=None, - num_workers=1, - fft_method="numpy", - domain="spatial", - extrap_kwargs=None, - filter_kwargs=None, - noise_kwargs=None, - vel_pert_kwargs=None, - mask_kwargs=None, - measure_time=False, - callback=None, - return_output=True, -): - # Create an instance of the new class with all the provided arguments - nowcaster = StepsNowcaster( - precip, - velocity, - timesteps, - n_ens_members=n_ens_members, - n_cascade_levels=n_cascade_levels, - precip_thr=precip_thr, - kmperpixel=kmperpixel, - timestep=timestep, - extrap_method=extrap_method, - decomp_method=decomp_method, - bandpass_filter_method=bandpass_filter_method, - noise_method=noise_method, - noise_stddev_adj=noise_stddev_adj, - ar_order=ar_order, - vel_pert_method=vel_pert_method, - conditional=conditional, - probmatching_method=probmatching_method, - mask_method=mask_method, - seed=seed, - num_workers=num_workers, - fft_method=fft_method, - domain=domain, - extrap_kwargs=extrap_kwargs, - filter_kwargs=filter_kwargs, - noise_kwargs=noise_kwargs, - vel_pert_kwargs=vel_pert_kwargs, - mask_kwargs=mask_kwargs, - measure_time=measure_time, - callback=callback, - return_output=return_output, - ) - - # Call the appropriate methods within the class - return nowcaster.compute_forecast() - - @deprecate_args({"R": "precip", "V": "velocity", "R_thr": "precip_thr"}, "1.8.0") def forecast( precip, @@ -838,208 +939,42 @@ def forecast( :cite:`Seed2003`, :cite:`BPS2006`, :cite:`SPN2013`, :cite:`PCH2019b` """ - # compute the cascade decompositions of the input precipitation fields - precip_decomp = [] - for i in range(ar_order + 1): - precip_ = decomp_method( - precip[i, :, :], - bp_filter, - mask=mask_thr, - fft_method=fft, - output_domain=domain, - normalize=True, - compute_stats=True, - compact_output=True, - ) - precip_decomp.append(precip_) - - # normalize the cascades and rearrange them into a four-dimensional array - # of shape (n_cascade_levels,ar_order+1,m,n) for the autoregressive model - precip_cascades = nowcast_utils.stack_cascades(precip_decomp, n_cascade_levels) - - precip_decomp = precip_decomp[-1] - precip_decomp = [precip_decomp.copy() for _ in range(n_ens_members)] - - # compute lag-l temporal autocorrelation coefficients for each cascade level - gamma = np.empty((n_cascade_levels, ar_order)) - for i in range(n_cascade_levels): - gamma[i, :] = correlation.temporal_autocorrelation( - precip_cascades[i], mask=mask_thr - ) - - nowcast_utils.print_corrcoefs(gamma) - - if ar_order == 2: - # adjust the lag-2 correlation coefficient to ensure that the AR(p) - # process is stationary - for i in range(n_cascade_levels): - gamma[i, 1] = autoregression.adjust_lag2_corrcoef2(gamma[i, 0], gamma[i, 1]) - - # estimate the parameters of the AR(p) model from the autocorrelation - # coefficients - phi = np.empty((n_cascade_levels, ar_order + 1)) - for i in range(n_cascade_levels): - phi[i, :] = autoregression.estimate_ar_params_yw(gamma[i, :]) - - nowcast_utils.print_ar_params(phi) - - # discard all except the p-1 last cascades because they are not needed for - # the AR(p) model - precip_cascades = [precip_cascades[i][-ar_order:] for i in range(n_cascade_levels)] - - # stack the cascades into a list containing all ensemble members - precip_cascades = [ - [precip_cascades[j].copy() for j in range(n_cascade_levels)] - for _ in range(n_ens_members) - ] - - # initialize the random generators - if noise_method is not None: - randgen_prec = [] - randgen_motion = [] - for _ in range(n_ens_members): - rs = np.random.RandomState(seed) - randgen_prec.append(rs) - seed = rs.randint(0, high=1e9) - rs = np.random.RandomState(seed) - randgen_motion.append(rs) - seed = rs.randint(0, high=1e9) - else: - randgen_prec = None - - if vel_pert_method is not None: - init_vel_noise, generate_vel_noise = noise.get_method(vel_pert_method) - - # initialize the perturbation generators for the motion field - velocity_perturbators = [] - for j in range(n_ens_members): - kwargs = { - "randstate": randgen_motion[j], - "p_par": vp_par, - "p_perp": vp_perp, - } - vp = init_vel_noise(velocity, 1.0 / kmperpixel, timestep, **kwargs) - velocity_perturbators.append( - lambda t, vp=vp: generate_vel_noise(vp, t * timestep) - ) - else: - velocity_perturbators = None - - precip_forecast = [[] for _ in range(n_ens_members)] - - if probmatching_method == "mean": - mu_0 = np.mean(precip[-1, :, :][precip[-1, :, :] >= precip_thr]) - else: - mu_0 = None - - precip_m = None - precip_m_d = None - war = None - struct = None - mask_rim = None - - if mask_method is not None: - mask_prec = precip[-1, :, :] >= precip_thr - - if mask_method == "sprog": - # compute the wet area ratio and the precipitation mask - war = 1.0 * np.sum(mask_prec) / (precip.shape[1] * precip.shape[2]) - precip_m = [precip_cascades[0][i].copy() for i in range(n_cascade_levels)] - precip_m_d = precip_decomp[0].copy() - elif mask_method == "incremental": - # get mask parameters - mask_rim = mask_kwargs.get("mask_rim", 10) - mask_f = mask_kwargs.get("mask_f", 1.0) - # initialize the structuring element - struct = generate_binary_structure(2, 1) - # iterate it to expand it nxn - n = mask_f * timestep / kmperpixel - struct = iterate_structure(struct, int((n - 1) / 2.0)) - # initialize precip mask for each member - mask_prec = nowcast_utils.compute_dilated_mask(mask_prec, struct, mask_rim) - mask_prec = [mask_prec.copy() for _ in range(n_ens_members)] - else: - mask_prec = None - - if noise_method is None and precip_m is None: - precip_m = [precip_cascades[0][i].copy() for i in range(n_cascade_levels)] - - fft_objs = [] - for _ in range(n_ens_members): - fft_objs.append(utils.get_method(fft_method, shape=precip.shape[1:])) - - if measure_time: - init_time = time.time() - starttime_init - - precip = precip[-1, :, :] - - print("Starting nowcast computation.") - - # the nowcast iteration for each ensemble member - state = { - "fft_objs": fft_objs, - "mask_prec": mask_prec, - "precip_cascades": precip_cascades, - "precip_decomp": precip_decomp, - "precip_m": precip_m, - "precip_m_d": precip_m_d, - "randgen_prec": randgen_prec, - } - params = { - "decomp_method": decomp_method, - "domain": domain, - "domain_mask": domain_mask, - "filter": bp_filter, - "fft": fft, - "generate_noise": generate_noise, - "mask_method": mask_method, - "mask_rim": mask_rim, - "mu_0": mu_0, - "n_cascade_levels": n_cascade_levels, - "n_ens_members": n_ens_members, - "noise_method": noise_method, - "noise_std_coeffs": noise_std_coeffs, - "num_ensemble_workers": num_ensemble_workers, - "phi": phi, - "pert_gen": pert_gen, - "probmatching_method": probmatching_method, - "precip": precip, - "precip_thr": precip_thr, - "recomp_method": recomp_method, - "struct": struct, - "war": war, - } - - precip_forecast = nowcast_main_loop( + # Create an instance of the new class with all the provided arguments + nowcaster = StepsNowcaster( precip, velocity, - state, timesteps, - extrap_method, - _update, + n_ens_members=n_ens_members, + n_cascade_levels=n_cascade_levels, + precip_thr=precip_thr, + kmperpixel=kmperpixel, + timestep=timestep, + extrap_method=extrap_method, + decomp_method=decomp_method, + bandpass_filter_method=bandpass_filter_method, + noise_method=noise_method, + noise_stddev_adj=noise_stddev_adj, + ar_order=ar_order, + vel_pert_method=vel_pert_method, + conditional=conditional, + probmatching_method=probmatching_method, + mask_method=mask_method, + seed=seed, + num_workers=num_workers, + fft_method=fft_method, + domain=domain, extrap_kwargs=extrap_kwargs, - velocity_pert_gen=velocity_perturbators, - params=params, - ensemble=True, - num_ensemble_members=n_ens_members, + filter_kwargs=filter_kwargs, + noise_kwargs=noise_kwargs, + vel_pert_kwargs=vel_pert_kwargs, + mask_kwargs=mask_kwargs, + measure_time=measure_time, callback=callback, return_output=return_output, - num_workers=num_ensemble_workers, - measure_time=measure_time, ) - if measure_time: - precip_forecast, mainloop_time = precip_forecast - if return_output: - precip_forecast = np.stack( - [np.stack(precip_forecast[j]) for j in range(n_ens_members)] - ) - if measure_time: - return precip_forecast, init_time, mainloop_time - else: - return precip_forecast - else: - return None + # Call the appropriate methods within the class + return nowcaster.compute_forecast() def _update(state, params): From 46bc44ae1fb65458de0646edace59889844447c2 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Wed, 9 Oct 2024 10:28:08 +0200 Subject: [PATCH 12/65] Fully refactored code --- pysteps/nowcasts/steps.py | 342 +++++++++++++++++++++----------------- 1 file changed, 191 insertions(+), 151 deletions(-) diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index a594b6e63..a4dfe73f7 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -79,7 +79,7 @@ def __init__(self, precip, velocity, timesteps, **kwargs): self.noise_std_coeffs = None self.randgen_prec = None self.randgen_motion = None - self.velocity_perturbators = None + self.velocity_perturbations = None self.precip_forecast = None self.mask_prec = None self.mask_thr = None @@ -114,7 +114,7 @@ def compute_forecast(self): self._perform_extrapolation() self._apply_noise_and_ar_model() - self._initialize_velocity_perturbators() + self._initialize_velocity_perturbations() self._initialize_precipitation_mask() self._initialize_fft_objects() # Measure and print initialization time @@ -164,7 +164,7 @@ def _nowcast_main(self): self.extrap_method, self._update_state, # Reference to the update function extrap_kwargs=self.extrap_kwargs, - velocity_pert_gen=self.velocity_perturbators, + velocity_pert_gen=self.velocity_perturbations, params=params, ensemble=True, num_ensemble_members=self.n_ens_members, @@ -557,7 +557,7 @@ def _apply_noise_and_ar_model(self): self.randgen_motion = None print("AR model and noise applied to precipitation cascades.") - def _initialize_velocity_perturbators(self): + def _initialize_velocity_perturbations(self): """ Initialize the velocity perturbators for each ensemble member if the velocity perturbation method is specified. @@ -565,7 +565,7 @@ def _initialize_velocity_perturbators(self): if self.vel_pert_method is not None: init_vel_noise, generate_vel_noise = noise.get_method(self.vel_pert_method) - self.velocity_perturbators = [] + self.velocity_perturbations = [] for j in range(self.n_ens_members): kwargs = { "randstate": self.randgen_motion[j], @@ -575,11 +575,11 @@ def _initialize_velocity_perturbators(self): vp = init_vel_noise( self.velocity, 1.0 / self.kmperpixel, self.timestep, **kwargs ) - self.velocity_perturbators.append( + self.velocity_perturbations.append( lambda t, vp=vp: generate_vel_noise(vp, t * self.timestep) ) else: - self.velocity_perturbators = None + self.velocity_perturbations = None print("Velocity perturbations initialized successfully.") def _initialize_precipitation_mask(self): @@ -693,9 +693,190 @@ def _initialize_params(self, precip): "war": self.war, } - def _update_state(self): - # TODO - pass + def _update_state(self, state, params): + """ + Update the state during the nowcasting loop. This function handles the AR model iteration, + noise generation, recomposition, and mask application for each ensemble member. + """ + precip_forecast_out = [None] * params["n_ens_members"] + + # Update the deterministic AR(p) model if noise or sprog mask is used + if params["noise_method"] is None or params["mask_method"] == "sprog": + self._update_deterministic_ar_model(state, params) + + # Worker function for each ensemble member + def worker(j): + self._apply_ar_model_to_cascades(j, state, params) + precip_forecast_out[j] = self._recompose_and_apply_mask(j, state, params) + + # Use Dask for parallel execution if available + if ( + DASK_IMPORTED + and params["n_ens_members"] > 1 + and params["num_ensemble_workers"] > 1 + ): + res = [] + for j in range(params["n_ens_members"]): + res.append(dask.delayed(worker)(j)) + dask.compute(*res, num_workers=params["num_ensemble_workers"]) + else: + for j in range(params["n_ens_members"]): + worker(j) + + return np.stack(precip_forecast_out), state + + def _update_deterministic_ar_model(self, state, params): + """ + Update the deterministic AR(p) model for each cascade level if noise is disabled + or if the sprog mask is used. + """ + for i in range(params["n_cascade_levels"]): + state["precip_m"][i] = autoregression.iterate_ar_model( + state["precip_m"][i], params["phi"][i, :] + ) + + state["precip_m_d"]["cascade_levels"] = [ + state["precip_m"][i][-1] for i in range(params["n_cascade_levels"]) + ] + + if params["domain"] == "spatial": + state["precip_m_d"]["cascade_levels"] = np.stack( + state["precip_m_d"]["cascade_levels"] + ) + + precip_m_ = params["recomp_method"](state["precip_m_d"]) + + if params["domain"] == "spectral": + precip_m_ = params["fft"].irfft2(precip_m_) + + if params["mask_method"] == "sprog": + state["mask_prec"] = compute_percentile_mask(precip_m_, params["war"]) + + def _apply_ar_model_to_cascades(self, j, state, params): + """ + Apply the AR(p) model to the cascades for each ensemble member, including + noise generation and normalization. + """ + # Generate noise if enabled + if params["noise_method"] is not None: + eps = self._generate_and_decompose_noise(j, state, params) + else: + eps = None + + # Iterate the AR(p) model for each cascade level + for i in range(params["n_cascade_levels"]): + if eps is not None: + eps_ = eps["cascade_levels"][i] + eps_ *= params["noise_std_coeffs"][i] + else: + eps_ = None + + # Apply the AR(p) model with or without perturbations + if eps is not None or params["vel_pert_method"] is not None: + state["precip_cascades"][j][i] = autoregression.iterate_ar_model( + state["precip_cascades"][j][i], params["phi"][i, :], eps=eps_ + ) + else: + # use the deterministic AR(p) model computed above if + # perturbations are disabled + state["precip_cascades"][j][i] = state["precip_m"][i] + + eps = None + eps_ = None + + def _generate_and_decompose_noise(self, j, state, params): + """ + Generate and decompose the noise field into cascades for a given ensemble member. + """ + eps = params["generate_noise"]( + params["pert_gen"], + randstate=state["randgen_prec"][j], + fft_method=state["fft_objs"][j], + domain=params["domain"], + ) + + eps = params["decomp_method"]( + eps, + params["filter"], + fft_method=state["fft_objs"][j], + input_domain=params["domain"], + output_domain=params["domain"], + compute_stats=True, + normalize=True, + compact_output=True, + ) + + return eps + + def _recompose_and_apply_mask(self, j, state, params): + """ + Recompose the precipitation field from cascades and apply the precipitation mask. + """ + state["precip_decomp"][j]["cascade_levels"] = [ + state["precip_cascades"][j][i][-1, :] + for i in range(params["n_cascade_levels"]) + ] + + if params["domain"] == "spatial": + state["precip_decomp"][j]["cascade_levels"] = np.stack( + state["precip_decomp"][j]["cascade_levels"] + ) + + precip_forecast = params["recomp_method"](state["precip_decomp"][j]) + + if params["domain"] == "spectral": + precip_forecast = state["fft_objs"][j].irfft2(precip_forecast) + + # Apply the precipitation mask + if params["mask_method"] is not None: + precip_forecast = self._apply_precipitation_mask( + precip_forecast, j, state, params + ) + + # Adjust the CDF of the forecast to match the observed precipitation field + if params["probmatching_method"] == "cdf": + precip_forecast = probmatching.nonparam_match_empirical_cdf( + precip_forecast, params["precip"] + ) + # Adjust the mean of the forecast to match the observed mean + elif params["probmatching_method"] == "mean": + mask = precip_forecast >= params["precip_thr"] + mu_fct = np.mean(precip_forecast[mask]) + precip_forecast[mask] = precip_forecast[mask] - mu_fct + params["mu_0"] + + # Update the mask for incremental method + if params["mask_method"] == "incremental": + state["mask_prec"][j] = nowcast_utils.compute_dilated_mask( + precip_forecast >= params["precip_thr"], + params["struct"], + params["mask_rim"], + ) + + # Apply the domain mask (set masked areas to NaN) + precip_forecast[params["domain_mask"]] = np.nan + + return precip_forecast + + def _apply_precipitation_mask(self, precip_forecast, j, state, params): + """ + Apply the precipitation mask to prevent new precipitation from generating + in areas where it was not observed. + """ + precip_forecast_min = precip_forecast.min() + + if params["mask_method"] == "incremental": + precip_forecast = ( + precip_forecast_min + + (precip_forecast - precip_forecast_min) * state["mask_prec"][j] + ) + mask_prec_ = precip_forecast > precip_forecast_min + else: + mask_prec_ = state["mask_prec"] + + # Set to min value outside the mask + precip_forecast[~mask_prec_] = precip_forecast_min + + return precip_forecast def _measure_time(self, label, start_time): """ @@ -975,144 +1156,3 @@ def forecast( # Call the appropriate methods within the class return nowcaster.compute_forecast() - - -def _update(state, params): - precip_forecast_out = [None] * params["n_ens_members"] - - if params["noise_method"] is None or params["mask_method"] == "sprog": - for i in range(params["n_cascade_levels"]): - # use a separate AR(p) model for the non-perturbed forecast, - # from which the mask is obtained - state["precip_m"][i] = autoregression.iterate_ar_model( - state["precip_m"][i], params["phi"][i, :] - ) - - state["precip_m_d"]["cascade_levels"] = [ - state["precip_m"][i][-1] for i in range(params["n_cascade_levels"]) - ] - if params["domain"] == "spatial": - state["precip_m_d"]["cascade_levels"] = np.stack( - state["precip_m_d"]["cascade_levels"] - ) - precip_m_ = params["recomp_method"](state["precip_m_d"]) - if params["domain"] == "spectral": - precip_m_ = params["fft"].irfft2(precip_m_) - - if params["mask_method"] == "sprog": - state["mask_prec"] = compute_percentile_mask(precip_m_, params["war"]) - - def worker(j): - if params["noise_method"] is not None: - # generate noise field - eps = params["generate_noise"]( - params["pert_gen"], - randstate=state["randgen_prec"][j], - fft_method=state["fft_objs"][j], - domain=params["domain"], - ) - - # decompose the noise field into a cascade - eps = params["decomp_method"]( - eps, - params["filter"], - fft_method=state["fft_objs"][j], - input_domain=params["domain"], - output_domain=params["domain"], - compute_stats=True, - normalize=True, - compact_output=True, - ) - else: - eps = None - - # iterate the AR(p) model for each cascade level - for i in range(params["n_cascade_levels"]): - # normalize the noise cascade - if eps is not None: - eps_ = eps["cascade_levels"][i] - eps_ *= params["noise_std_coeffs"][i] - else: - eps_ = None - # apply AR(p) process to cascade level - if eps is not None or params["vel_pert_method"] is not None: - state["precip_cascades"][j][i] = autoregression.iterate_ar_model( - state["precip_cascades"][j][i], params["phi"][i, :], eps=eps_ - ) - else: - # use the deterministic AR(p) model computed above if - # perturbations are disabled - state["precip_cascades"][j][i] = state["precip_m"][i] - - eps = None - eps_ = None - - # compute the recomposed precipitation field(s) from the cascades - # obtained from the AR(p) model(s) - state["precip_decomp"][j]["cascade_levels"] = [ - state["precip_cascades"][j][i][-1, :] - for i in range(params["n_cascade_levels"]) - ] - if params["domain"] == "spatial": - state["precip_decomp"][j]["cascade_levels"] = np.stack( - state["precip_decomp"][j]["cascade_levels"] - ) - - precip_forecast = params["recomp_method"](state["precip_decomp"][j]) - - if params["domain"] == "spectral": - precip_forecast = state["fft_objs"][j].irfft2(precip_forecast) - - if params["mask_method"] is not None: - # apply the precipitation mask to prevent generation of new - # precipitation into areas where it was not originally - # observed - precip_forecast_min = precip_forecast.min() - if params["mask_method"] == "incremental": - precip_forecast = ( - precip_forecast_min - + (precip_forecast - precip_forecast_min) * state["mask_prec"][j] - ) - mask_prec_ = precip_forecast > precip_forecast_min - else: - mask_prec_ = state["mask_prec"] - - # set to min value outside mask - precip_forecast[~mask_prec_] = precip_forecast_min - - if params["probmatching_method"] == "cdf": - # adjust the CDF of the forecast to match the most recently - # observed precipitation field - precip_forecast = probmatching.nonparam_match_empirical_cdf( - precip_forecast, params["precip"] - ) - elif params["probmatching_method"] == "mean": - mask = precip_forecast >= params["precip_thr"] - mu_fct = np.mean(precip_forecast[mask]) - precip_forecast[mask] = precip_forecast[mask] - mu_fct + params["mu_0"] - - if params["mask_method"] == "incremental": - state["mask_prec"][j] = nowcast_utils.compute_dilated_mask( - precip_forecast >= params["precip_thr"], - params["struct"], - params["mask_rim"], - ) - - precip_forecast[params["domain_mask"]] = np.nan - - precip_forecast_out[j] = precip_forecast - - if ( - DASK_IMPORTED - and params["n_ens_members"] > 1 - and params["num_ensemble_workers"] > 1 - ): - res = [] - for j in range(params["n_ens_members"]): - res.append(dask.delayed(worker)(j)) - dask.compute(*res, num_workers=params["num_ensemble_workers"]) - else: - for j in range(params["n_ens_members"]): - worker(j) - - return np.stack(precip_forecast_out), state From eb33c06da86675e7f9d8dc960e782abde75342b9 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Wed, 9 Oct 2024 11:18:02 +0200 Subject: [PATCH 13/65] Possible solution for errors solved --- pysteps/nowcasts/steps.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index a4dfe73f7..dd0ff1c38 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -146,18 +146,16 @@ def _nowcast_main(self): """ # Prepare state and params dictionaries state = self._initialize_state() - params = self._initialize_params(self.precip) + params = self._initialize_params() # Isolate the last time slice of precipitation - self.precip = self.precip[ - -1, :, : - ] # Extract the last available precipitation field + precip = self.precip[-1, :, :] # Extract the last available precipitation field print("Starting nowcast computation.") # Run the nowcast main loop self.precip_forecast = nowcast_main_loop( - self.precip, + precip, self.velocity, state, self.timesteps, @@ -408,12 +406,12 @@ def _apply_noise_and_ar_model(self): and adds noise perturbations if necessary. """ # Make a copy of the precipitation data and replace non-finite values - self.precip = self.precip.copy() + precip = self.precip.copy() for i in range(self.precip.shape[0]): # Replace non-finite values with the minimum finite value of the precipitation field - self.precip[i, ~np.isfinite(self.precip[i, :])] = np.nanmin( - self.precip[i, :] - ) + precip[i, ~np.isfinite(precip[i, :])] = np.nanmin(precip[i, :]) + # Store the precipitation data back in the object + self.precip = precip # Initialize the noise generator if the noise_method is provided if self.noise_method is not None: @@ -664,7 +662,7 @@ def _initialize_state(self): "randgen_prec": self.randgen_prec, } - def _initialize_params(self, precip): + def _initialize_params(self): """ Initialize the params dictionary used during the nowcast iteration. """ @@ -686,7 +684,7 @@ def _initialize_params(self, precip): "phi": self.phi, "pert_gen": self.pert_gen, "probmatching_method": self.probmatching_method, - "precip": precip, + "precip": self.precip, "precip_thr": self.precip_thr, "recomp_method": self.recomp_method, "struct": self.struct, From a1ce4bcfa8adf09fe7b9989ade038913c77180f3 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Wed, 9 Oct 2024 11:23:40 +0200 Subject: [PATCH 14/65] Fixed small bug in _nowcast_main --- pysteps/nowcasts/steps.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index dd0ff1c38..1e7c8d872 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -144,13 +144,13 @@ def _nowcast_main(self): Main nowcast loop that iterates through the ensemble members and time steps to generate forecasts. """ - # Prepare state and params dictionaries - state = self._initialize_state() - params = self._initialize_params() - # Isolate the last time slice of precipitation precip = self.precip[-1, :, :] # Extract the last available precipitation field + # Prepare state and params dictionaries + state = self._initialize_state() + params = self._initialize_params(precip) + print("Starting nowcast computation.") # Run the nowcast main loop @@ -662,7 +662,7 @@ def _initialize_state(self): "randgen_prec": self.randgen_prec, } - def _initialize_params(self): + def _initialize_params(self, precip): """ Initialize the params dictionary used during the nowcast iteration. """ @@ -684,7 +684,7 @@ def _initialize_params(self): "phi": self.phi, "pert_gen": self.pert_gen, "probmatching_method": self.probmatching_method, - "precip": self.precip, + "precip": precip, "precip_thr": self.precip_thr, "recomp_method": self.recomp_method, "struct": self.struct, From b002354e1d3a0677657d0991b01dd2527ef48538 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Tue, 22 Oct 2024 12:56:29 +0200 Subject: [PATCH 15/65] Name changes from feedback Ruben --- pysteps/nowcasts/steps.py | 120 ++++++++++++++++++++------------------ 1 file changed, 64 insertions(+), 56 deletions(-) diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index 1e7c8d872..34b797910 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -41,16 +41,16 @@ def __init__(self, precip, velocity, timesteps, **kwargs): self.timesteps = timesteps self.n_ens_members = kwargs.get("n_ens_members", 24) self.n_cascade_levels = kwargs.get("n_cascade_levels", 6) - self.precip_thr = kwargs.get("precip_thr", None) + self.precip_threshold = kwargs.get("precip_thr", None) self.kmperpixel = kwargs.get("kmperpixel", None) self.timestep = kwargs.get("timestep", None) - self.extrap_method = kwargs.get("extrap_method", "semilagrangian") - self.decomp_method = kwargs.get("decomp_method", "fft") + self.extrapolation_method = kwargs.get("extrap_method", "semilagrangian") + self.decomposition_method = kwargs.get("decomp_method", "fft") self.bandpass_filter_method = kwargs.get("bandpass_filter_method", "gaussian") self.noise_method = kwargs.get("noise_method", "nonparametric") self.noise_stddev_adj = kwargs.get("noise_stddev_adj", None) self.ar_order = kwargs.get("ar_order", 2) - self.vel_pert_method = kwargs.get("vel_pert_method", "bps") + self.velocity_perturbation_method = kwargs.get("vel_pert_method", "bps") self.conditional = kwargs.get("conditional", False) self.probmatching_method = kwargs.get("probmatching_method", "cdf") self.mask_method = kwargs.get("mask_method", "incremental") @@ -58,10 +58,10 @@ def __init__(self, precip, velocity, timesteps, **kwargs): self.num_workers = kwargs.get("num_workers", 1) self.fft_method = kwargs.get("fft_method", "numpy") self.domain = kwargs.get("domain", "spatial") - self.extrap_kwargs = kwargs.get("extrap_kwargs", None) + self.extrapolation_kwargs = kwargs.get("extrap_kwargs", None) self.filter_kwargs = kwargs.get("filter_kwargs", None) self.noise_kwargs = kwargs.get("noise_kwargs", None) - self.vel_pert_kwargs = kwargs.get("vel_pert_kwargs", None) + self.velocity_pertubation_kwargs = kwargs.get("vel_pert_kwargs", None) self.mask_kwargs = kwargs.get("mask_kwargs", None) self.measure_time = kwargs.get("measure_time", False) self.callback = kwargs.get("callback", None) @@ -69,7 +69,7 @@ def __init__(self, precip, velocity, timesteps, **kwargs): # Additional variables for internal state management self.fft = None - self.bp_filter = None + self.bandpass_filter = None self.extrapolator_method = None self.domain_mask = None self.precip_cascades = None @@ -84,8 +84,8 @@ def __init__(self, precip, velocity, timesteps, **kwargs): self.mask_prec = None self.mask_thr = None self.precip_decomp = None - self.vp_par = None - self.vp_perp = None + self.velocity_pertubation_parallel = None + self.velocity_pertubation_perp = None self.fft_objs = None self.generate_noise = None @@ -121,7 +121,7 @@ def compute_forecast(self): if self.measure_time: self._measure_time("Initialization", self.start_time_init) - # RUn the main nowcast loop + # Run the main nowcast loop self._nowcast_main() if self.measure_time: @@ -159,9 +159,9 @@ def _nowcast_main(self): self.velocity, state, self.timesteps, - self.extrap_method, + self.extrapolation_method, self._update_state, # Reference to the update function - extrap_kwargs=self.extrap_kwargs, + extrap_kwargs=self.extrapolation_kwargs, velocity_pert_gen=self.velocity_perturbations, params=params, ensemble=True, @@ -203,7 +203,7 @@ def _check_inputs(self): f"Unknown mask method '{self.mask_method}'. " "Must be 'obs', 'sprog', 'incremental', or None." ) - if self.precip_thr is None: + if self.precip_threshold is None: if self.conditional: raise ValueError("conditional=True but precip_thr is not specified.") if self.mask_method is not None: @@ -222,25 +222,25 @@ def _check_inputs(self): "Must be 'auto', 'fixed', or None." ) if self.kmperpixel is None: - if self.vel_pert_method is not None: + if self.velocity_perturbation_method is not None: raise ValueError("vel_pert_method is set but kmperpixel=None") if self.mask_method == "incremental": raise ValueError("mask_method='incremental' but kmperpixel=None") if self.timestep is None: - if self.vel_pert_method is not None: + if self.velocity_perturbation_method is not None: raise ValueError("vel_pert_method is set but timestep=None") if self.mask_method == "incremental": raise ValueError("mask_method='incremental' but timestep=None") # Handle None values for various kwargs - if self.extrap_kwargs is None: - self.extrap_kwargs = {} + if self.extrapolation_kwargs is None: + self.extrapolation_kwargs = {} if self.filter_kwargs is None: self.filter_kwargs = {} if self.noise_kwargs is None: self.noise_kwargs = {} - if self.vel_pert_kwargs is None: - self.vel_pert_kwargs = {} + if self.velocity_pertubation_kwargs is None: + self.velocity_pertubation_kwargs = {} if self.mask_kwargs is None: self.mask_kwargs = {} @@ -265,16 +265,16 @@ def _print_forecast_info(self): print("Methods") print("-------") - print(f"extrapolation: {self.extrap_method}") + print(f"extrapolation: {self.extrapolation_method}") print(f"bandpass filter: {self.bandpass_filter_method}") - print(f"decomposition: {self.decomp_method}") + print(f"decomposition: {self.decomposition_method}") print(f"noise generator: {self.noise_method}") print( "noise adjustment: {}".format( ("yes" if self.noise_stddev_adj else "no") ) ) - print(f"velocity perturbator: {self.vel_pert_method}") + print(f"velocity perturbator: {self.velocity_perturbation_method}") print( "conditional statistics: {}".format(("yes" if self.conditional else "no")) ) @@ -295,22 +295,22 @@ def _print_forecast_info(self): print(f"number of cascade levels: {self.n_cascade_levels}") print(f"order of the AR(p) model: {self.ar_order}") - if self.vel_pert_method == "bps": - self.vp_par = self.vel_pert_kwargs.get( + if self.velocity_perturbation_method == "bps": + self.velocity_pertubation_parallel = self.velocity_pertubation_kwargs.get( "p_par", noise.motion.get_default_params_bps_par() ) - self.vp_perp = self.vel_pert_kwargs.get( + self.velocity_pertubation_perp = self.velocity_pertubation_kwargs.get( "p_perp", noise.motion.get_default_params_bps_perp() ) print( - f"velocity perturbations, parallel: {self.vp_par[0]},{self.vp_par[1]},{self.vp_par[2]}" + f"velocity perturbations, parallel: {self.velocity_pertubation_parallel[0]},{self.velocity_pertubation_parallel[1]},{self.velocity_pertubation_parallel[2]}" ) print( - f"velocity perturbations, perpendicular: {self.vp_perp[0]},{self.vp_perp[1]},{self.vp_perp[2]}" + f"velocity perturbations, perpendicular: {self.velocity_pertubation_perp[0]},{self.velocity_pertubation_perp[1]},{self.velocity_pertubation_perp[2]}" ) - if self.precip_thr is not None: - print(f"precip. intensity threshold: {self.precip_thr}") + if self.precip_threshold is not None: + print(f"precip. intensity threshold: {self.precip_threshold}") def _initialize_nowcast_components(self): """ @@ -325,15 +325,17 @@ def _initialize_nowcast_components(self): # Initialize the band-pass filter for the cascade decomposition filter_method = cascade.get_method(self.bandpass_filter_method) - self.bp_filter = filter_method( + self.bandpass_filter = filter_method( (M, N), self.n_cascade_levels, **(self.filter_kwargs or {}) ) # Get the decomposition method (e.g., FFT) - self.decomp_method, self.recomp_method = cascade.get_method(self.decomp_method) + self.decomposition_method, self.recomp_method = cascade.get_method( + self.decomposition_method + ) # Get the extrapolation method (e.g., semilagrangian) - self.extrapolator_method = extrapolation.get_method(self.extrap_method) + self.extrapolator_method = extrapolation.get_method(self.extrapolation_method) # Generate the mesh grid for spatial coordinates x_values, y_values = np.meshgrid(np.arange(N), np.arange(M)) @@ -355,14 +357,14 @@ def _perform_extrapolation(self): if self.conditional: self.mask_thr = np.logical_and.reduce( [ - self.precip[i, :, :] >= self.precip_thr + self.precip[i, :, :] >= self.precip_threshold for i in range(self.precip.shape[0]) ] ) else: self.mask_thr = None - extrap_kwargs = self.extrap_kwargs.copy() + extrap_kwargs = self.extrapolation_kwargs.copy() extrap_kwargs["xy_coords"] = self.xy_coords extrap_kwargs["allow_nonfinite_values"] = ( True if np.any(~np.isfinite(self.precip)) else False @@ -434,10 +436,10 @@ def _apply_noise_and_ar_model(self): # Compute noise adjustment coefficients self.noise_std_coeffs = noise.utils.compute_noise_stddev_adjs( self.precip[-1, :, :], - self.precip_thr, + self.precip_threshold, np.min(self.precip), - self.bp_filter, - self.decomp_method, + self.bandpass_filter, + self.decomposition_method, self.pert_gen, self.generate_noise, 20, @@ -479,9 +481,9 @@ def _apply_noise_and_ar_model(self): # Decompose the input precipitation fields self.precip_decomp = [] for i in range(self.ar_order + 1): - precip_ = self.decomp_method( + precip_ = self.decomposition_method( self.precip[i, :, :], - self.bp_filter, + self.bandpass_filter, mask=self.mask_thr, fft_method=self.fft, output_domain=self.domain, @@ -560,15 +562,21 @@ def _initialize_velocity_perturbations(self): Initialize the velocity perturbators for each ensemble member if the velocity perturbation method is specified. """ - if self.vel_pert_method is not None: - init_vel_noise, generate_vel_noise = noise.get_method(self.vel_pert_method) + if self.velocity_perturbation_method is not None: + init_vel_noise, generate_vel_noise = noise.get_method( + self.velocity_perturbation_method + ) self.velocity_perturbations = [] for j in range(self.n_ens_members): kwargs = { "randstate": self.randgen_motion[j], - "p_par": self.vel_pert_kwargs.get("p_par", self.vp_par), - "p_perp": self.vel_pert_kwargs.get("p_perp", self.vp_perp), + "p_par": self.velocity_pertubation_kwargs.get( + "p_par", self.velocity_pertubation_parallel + ), + "p_perp": self.velocity_pertubation_kwargs.get( + "p_perp", self.velocity_pertubation_perp + ), } vp = init_vel_noise( self.velocity, 1.0 / self.kmperpixel, self.timestep, **kwargs @@ -588,30 +596,30 @@ def _initialize_precipitation_mask(self): if self.probmatching_method == "mean": self.mu_0 = np.mean( - self.precip[-1, :, :][self.precip[-1, :, :] >= self.precip_thr] + self.precip[-1, :, :][self.precip[-1, :, :] >= self.precip_threshold] ) else: self.mu_0 = None - self.precip_m = None - self.precip_m_d = None + self.precip_mask = None + self.precip_mask_decomposed = None self.war = None self.struct = None self.mask_rim = None if self.mask_method is not None: - self.mask_prec = self.precip[-1, :, :] >= self.precip_thr + self.mask_prec = self.precip[-1, :, :] >= self.precip_threshold if self.mask_method == "sprog": # Compute the wet area ratio and the precipitation mask self.war = np.sum(self.mask_prec) / ( self.precip.shape[1] * self.precip.shape[2] ) - self.precip_m = [ + self.precip_mask = [ self.precip_cascades[0][i].copy() for i in range(self.n_cascade_levels) ] - self.precip_m_d = self.precip_decomp[0].copy() + self.precip_mask_decomposed = self.precip_decomp[0].copy() elif self.mask_method == "incremental": # Get mask parameters @@ -632,8 +640,8 @@ def _initialize_precipitation_mask(self): else: self.mask_prec = None - if self.noise_method is None and self.precip_m is None: - self.precip_m = [ + if self.noise_method is None and self.precip_mask is None: + self.precip_mask = [ self.precip_cascades[0][i].copy() for i in range(self.n_cascade_levels) ] print("Precipitation mask initialized successfully.") @@ -657,8 +665,8 @@ def _initialize_state(self): "mask_prec": self.mask_prec, "precip_cascades": self.precip_cascades, "precip_decomp": self.precip_decomp, - "precip_m": self.precip_m, - "precip_m_d": self.precip_m_d, + "precip_m": self.precip_mask, + "precip_m_d": self.precip_mask_decomposed, "randgen_prec": self.randgen_prec, } @@ -667,10 +675,10 @@ def _initialize_params(self, precip): Initialize the params dictionary used during the nowcast iteration. """ return { - "decomp_method": self.decomp_method, + "decomp_method": self.decomposition_method, "domain": self.domain, "domain_mask": self.domain_mask, - "filter": self.bp_filter, + "filter": self.bandpass_filter, "fft": self.fft, "generate_noise": self.generate_noise, "mask_method": self.mask_method, @@ -685,7 +693,7 @@ def _initialize_params(self, precip): "pert_gen": self.pert_gen, "probmatching_method": self.probmatching_method, "precip": precip, - "precip_thr": self.precip_thr, + "precip_thr": self.precip_threshold, "recomp_method": self.recomp_method, "struct": self.struct, "war": self.war, From e105a24809fd65630348958ef8f6982457750e22 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Tue, 22 Oct 2024 13:07:53 +0200 Subject: [PATCH 16/65] Name changes from feedback Ruben v2 --- pysteps/nowcasts/steps.py | 86 ++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 42 deletions(-) diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index 34b797910..6884c36d0 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -61,7 +61,7 @@ def __init__(self, precip, velocity, timesteps, **kwargs): self.extrapolation_kwargs = kwargs.get("extrap_kwargs", None) self.filter_kwargs = kwargs.get("filter_kwargs", None) self.noise_kwargs = kwargs.get("noise_kwargs", None) - self.velocity_pertubation_kwargs = kwargs.get("vel_pert_kwargs", None) + self.velocity_perturbation_kwargs = kwargs.get("vel_pert_kwargs", None) self.mask_kwargs = kwargs.get("mask_kwargs", None) self.measure_time = kwargs.get("measure_time", False) self.callback = kwargs.get("callback", None) @@ -75,17 +75,17 @@ def __init__(self, precip, velocity, timesteps, **kwargs): self.precip_cascades = None self.gamma = None self.phi = None - self.pert_gen = None + self.perturbation_generator = None self.noise_std_coeffs = None self.randgen_prec = None self.randgen_motion = None self.velocity_perturbations = None self.precip_forecast = None - self.mask_prec = None - self.mask_thr = None - self.precip_decomp = None - self.velocity_pertubation_parallel = None - self.velocity_pertubation_perp = None + self.mask_precip = None + self.mask_threshold = None + self.precip_decomposed = None + self.velocity_perturbation_parallel = None + self.velocity_perturbation_perpendicular = None self.fft_objs = None self.generate_noise = None @@ -239,8 +239,8 @@ def _check_inputs(self): self.filter_kwargs = {} if self.noise_kwargs is None: self.noise_kwargs = {} - if self.velocity_pertubation_kwargs is None: - self.velocity_pertubation_kwargs = {} + if self.velocity_perturbation_kwargs is None: + self.velocity_perturbation_kwargs = {} if self.mask_kwargs is None: self.mask_kwargs = {} @@ -296,17 +296,19 @@ def _print_forecast_info(self): print(f"order of the AR(p) model: {self.ar_order}") if self.velocity_perturbation_method == "bps": - self.velocity_pertubation_parallel = self.velocity_pertubation_kwargs.get( + self.velocity_perturbation_parallel = self.velocity_perturbation_kwargs.get( "p_par", noise.motion.get_default_params_bps_par() ) - self.velocity_pertubation_perp = self.velocity_pertubation_kwargs.get( - "p_perp", noise.motion.get_default_params_bps_perp() + self.velocity_perturbation_perpendicular = ( + self.velocity_perturbation_kwargs.get( + "p_perp", noise.motion.get_default_params_bps_perp() + ) ) print( - f"velocity perturbations, parallel: {self.velocity_pertubation_parallel[0]},{self.velocity_pertubation_parallel[1]},{self.velocity_pertubation_parallel[2]}" + f"velocity perturbations, parallel: {self.velocity_perturbation_parallel[0]},{self.velocity_perturbation_parallel[1]},{self.velocity_perturbation_parallel[2]}" ) print( - f"velocity perturbations, perpendicular: {self.velocity_pertubation_perp[0]},{self.velocity_pertubation_perp[1]},{self.velocity_pertubation_perp[2]}" + f"velocity perturbations, perpendicular: {self.velocity_perturbation_perpendicular[0]},{self.velocity_perturbation_perpendicular[1]},{self.velocity_perturbation_perpendicular[2]}" ) if self.precip_threshold is not None: @@ -355,14 +357,14 @@ def _perform_extrapolation(self): """ # Determine the precipitation threshold mask if conditional is set if self.conditional: - self.mask_thr = np.logical_and.reduce( + self.mask_threshold = np.logical_and.reduce( [ self.precip[i, :, :] >= self.precip_threshold for i in range(self.precip.shape[0]) ] ) else: - self.mask_thr = None + self.mask_threshold = None extrap_kwargs = self.extrapolation_kwargs.copy() extrap_kwargs["xy_coords"] = self.xy_coords @@ -423,7 +425,7 @@ def _apply_noise_and_ar_model(self): ) # Get noise methods # Initialize the perturbation generator for the precipitation field - self.pert_gen = init_noise( + self.perturbation_generator = init_noise( self.precip, fft_method=self.fft, **self.noise_kwargs ) @@ -440,7 +442,7 @@ def _apply_noise_and_ar_model(self): np.min(self.precip), self.bandpass_filter, self.decomposition_method, - self.pert_gen, + self.perturbation_generator, self.generate_noise, 20, conditional=self.conditional, @@ -473,40 +475,40 @@ def _apply_noise_and_ar_model(self): else: # No noise, so set perturbation generator and noise_std_coeffs to None - self.pert_gen = None + self.perturbation_generator = None self.noise_std_coeffs = np.ones( self.n_cascade_levels ) # Keep default as 1.0 to avoid breaking AR model # Decompose the input precipitation fields - self.precip_decomp = [] + self.precip_decomposed = [] for i in range(self.ar_order + 1): precip_ = self.decomposition_method( self.precip[i, :, :], self.bandpass_filter, - mask=self.mask_thr, + mask=self.mask_threshold, fft_method=self.fft, output_domain=self.domain, normalize=True, compute_stats=True, compact_output=True, ) - self.precip_decomp.append(precip_) + self.precip_decomposed.append(precip_) # Normalize the cascades and rearrange them into a 4D array self.precip_cascades = nowcast_utils.stack_cascades( - self.precip_decomp, self.n_cascade_levels + self.precip_decomposed, self.n_cascade_levels ) - self.precip_decomp = self.precip_decomp[-1] - self.precip_decomp = [ - self.precip_decomp.copy() for _ in range(self.n_ens_members) + self.precip_decomposed = self.precip_decomposed[-1] + self.precip_decomposed = [ + self.precip_decomposed.copy() for _ in range(self.n_ens_members) ] # Compute temporal autocorrelation coefficients for each cascade level self.gamma = np.empty((self.n_cascade_levels, self.ar_order)) for i in range(self.n_cascade_levels): self.gamma[i, :] = correlation.temporal_autocorrelation( - self.precip_cascades[i], mask=self.mask_thr + self.precip_cascades[i], mask=self.mask_threshold ) nowcast_utils.print_corrcoefs(self.gamma) @@ -571,11 +573,11 @@ def _initialize_velocity_perturbations(self): for j in range(self.n_ens_members): kwargs = { "randstate": self.randgen_motion[j], - "p_par": self.velocity_pertubation_kwargs.get( - "p_par", self.velocity_pertubation_parallel + "p_par": self.velocity_perturbation_kwargs.get( + "p_par", self.velocity_perturbation_parallel ), - "p_perp": self.velocity_pertubation_kwargs.get( - "p_perp", self.velocity_pertubation_perp + "p_perp": self.velocity_perturbation_kwargs.get( + "p_perp", self.velocity_perturbation_perpendicular ), } vp = init_vel_noise( @@ -608,18 +610,18 @@ def _initialize_precipitation_mask(self): self.mask_rim = None if self.mask_method is not None: - self.mask_prec = self.precip[-1, :, :] >= self.precip_threshold + self.mask_precip = self.precip[-1, :, :] >= self.precip_threshold if self.mask_method == "sprog": # Compute the wet area ratio and the precipitation mask - self.war = np.sum(self.mask_prec) / ( + self.war = np.sum(self.mask_precip) / ( self.precip.shape[1] * self.precip.shape[2] ) self.precip_mask = [ self.precip_cascades[0][i].copy() for i in range(self.n_cascade_levels) ] - self.precip_mask_decomposed = self.precip_decomp[0].copy() + self.precip_mask_decomposed = self.precip_decomposed[0].copy() elif self.mask_method == "incremental": # Get mask parameters @@ -631,14 +633,14 @@ def _initialize_precipitation_mask(self): n = mask_f * self.timestep / self.kmperpixel self.struct = iterate_structure(self.struct, int((n - 1) / 2.0)) # Compute and apply the dilated mask for each ensemble member - self.mask_prec = nowcast_utils.compute_dilated_mask( - self.mask_prec, self.struct, self.mask_rim + self.mask_precip = nowcast_utils.compute_dilated_mask( + self.mask_precip, self.struct, self.mask_rim ) - self.mask_prec = [ - self.mask_prec.copy() for _ in range(self.n_ens_members) + self.mask_precip = [ + self.mask_precip.copy() for _ in range(self.n_ens_members) ] else: - self.mask_prec = None + self.mask_precip = None if self.noise_method is None and self.precip_mask is None: self.precip_mask = [ @@ -662,9 +664,9 @@ def _initialize_state(self): """ return { "fft_objs": self.fft_objs, - "mask_prec": self.mask_prec, + "mask_prec": self.mask_precip, "precip_cascades": self.precip_cascades, - "precip_decomp": self.precip_decomp, + "precip_decomp": self.precip_decomposed, "precip_m": self.precip_mask, "precip_m_d": self.precip_mask_decomposed, "randgen_prec": self.randgen_prec, @@ -690,7 +692,7 @@ def _initialize_params(self, precip): "noise_std_coeffs": self.noise_std_coeffs, "num_ensemble_workers": self.num_ensemble_workers, "phi": self.phi, - "pert_gen": self.pert_gen, + "pert_gen": self.perturbation_generator, "probmatching_method": self.probmatching_method, "precip": precip, "precip_thr": self.precip_threshold, From b6c47af71f4863860ba7bc1b9c433e4358e84bed Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Tue, 22 Oct 2024 15:34:13 +0200 Subject: [PATCH 17/65] Name changes from feedback Ruben v3 --- pysteps/nowcasts/steps.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index 6884c36d0..1a09548ab 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -77,8 +77,8 @@ def __init__(self, precip, velocity, timesteps, **kwargs): self.phi = None self.perturbation_generator = None self.noise_std_coeffs = None - self.randgen_prec = None - self.randgen_motion = None + self.random_generator_precip = None + self.random_generator_motion = None self.velocity_perturbations = None self.precip_forecast = None self.mask_precip = None @@ -541,22 +541,22 @@ def _apply_noise_and_ar_model(self): # Initialize random generators if noise_method is provided if self.noise_method is not None: - self.randgen_prec = [] - self.randgen_motion = [] + self.random_generator_precip = [] + self.random_generator_motion = [] for _ in range(self.n_ens_members): # Create random state for precipitation noise generator rs = np.random.RandomState(self.seed) - self.randgen_prec.append(rs) + self.random_generator_precip.append(rs) self.seed = rs.randint(0, high=int(1e9)) # Update seed after generating # Create random state for motion perturbations generator rs = np.random.RandomState(self.seed) - self.randgen_motion.append(rs) + self.random_generator_motion.append(rs) self.seed = rs.randint(0, high=int(1e9)) # Update seed after generating else: - self.randgen_prec = None - self.randgen_motion = None + self.random_generator_precip = None + self.random_generator_motion = None print("AR model and noise applied to precipitation cascades.") def _initialize_velocity_perturbations(self): @@ -572,7 +572,7 @@ def _initialize_velocity_perturbations(self): self.velocity_perturbations = [] for j in range(self.n_ens_members): kwargs = { - "randstate": self.randgen_motion[j], + "randstate": self.random_generator_motion[j], "p_par": self.velocity_perturbation_kwargs.get( "p_par", self.velocity_perturbation_parallel ), @@ -669,7 +669,7 @@ def _initialize_state(self): "precip_decomp": self.precip_decomposed, "precip_m": self.precip_mask, "precip_m_d": self.precip_mask_decomposed, - "randgen_prec": self.randgen_prec, + "randgen_prec": self.random_generator_precip, } def _initialize_params(self, precip): From 5aaf24d2f5598f459a930adf6c2fb99f1fa20803 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Thu, 31 Oct 2024 09:45:02 +0100 Subject: [PATCH 18/65] Added config dataclass to steps nowcast --- pysteps/nowcasts/steps.py | 367 +++++++++++++++++++++----------------- 1 file changed, 203 insertions(+), 164 deletions(-) diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index 1a09548ab..fcdb1831f 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -25,6 +25,9 @@ from pysteps.timeseries import autoregression, correlation from pysteps.nowcasts.utils import compute_percentile_mask, nowcast_main_loop +from dataclasses import dataclass, field +from typing import Optional, Dict, Any, Callable + try: import dask @@ -33,39 +36,43 @@ DASK_IMPORTED = False +class StepsNowcasterConfig: + n_ens_members: int = 24 + n_cascade_levels: int = 6 + precip_threshold: Optional[float] = None + kmperpixel: Optional[float] = None + timestep: Optional[float] = None + extrapolation_method: str = "semilagrangian" + decomposition_method: str = "fft" + bandpass_filter_method: str = "gaussian" + noise_method: Optional[str] = "nonparametric" + noise_stddev_adj: Optional[str] = None + ar_order: int = 2 + velocity_perturbation_method: Optional[str] = "bps" + conditional: bool = False + probmatching_method: Optional[str] = "cdf" + mask_method: Optional[str] = "incremental" + seed: Optional[int] = None + num_workers: int = 1 + fft_method: str = "numpy" + domain: str = "spatial" + extrapolation_kwargs: Dict[str, Any] = field(default_factory=dict) + filter_kwargs: Dict[str, Any] = field(default_factory=dict) + noise_kwargs: Dict[str, Any] = field(default_factory=dict) + velocity_perturbation_kwargs: Dict[str, Any] = field(default_factory=dict) + mask_kwargs: Dict[str, Any] = field(default_factory=dict) + measure_time: bool = False + callback: Optional[Callable[[Any], None]] = None + return_output: bool = True + + class StepsNowcaster: - def __init__(self, precip, velocity, timesteps, **kwargs): + def __init__(self, precip, velocity, timesteps, steps_config, **kwargs): + self.config = steps_config # Store inputs and optional parameters self.precip = precip self.velocity = velocity self.timesteps = timesteps - self.n_ens_members = kwargs.get("n_ens_members", 24) - self.n_cascade_levels = kwargs.get("n_cascade_levels", 6) - self.precip_threshold = kwargs.get("precip_thr", None) - self.kmperpixel = kwargs.get("kmperpixel", None) - self.timestep = kwargs.get("timestep", None) - self.extrapolation_method = kwargs.get("extrap_method", "semilagrangian") - self.decomposition_method = kwargs.get("decomp_method", "fft") - self.bandpass_filter_method = kwargs.get("bandpass_filter_method", "gaussian") - self.noise_method = kwargs.get("noise_method", "nonparametric") - self.noise_stddev_adj = kwargs.get("noise_stddev_adj", None) - self.ar_order = kwargs.get("ar_order", 2) - self.velocity_perturbation_method = kwargs.get("vel_pert_method", "bps") - self.conditional = kwargs.get("conditional", False) - self.probmatching_method = kwargs.get("probmatching_method", "cdf") - self.mask_method = kwargs.get("mask_method", "incremental") - self.seed = kwargs.get("seed", None) - self.num_workers = kwargs.get("num_workers", 1) - self.fft_method = kwargs.get("fft_method", "numpy") - self.domain = kwargs.get("domain", "spatial") - self.extrapolation_kwargs = kwargs.get("extrap_kwargs", None) - self.filter_kwargs = kwargs.get("filter_kwargs", None) - self.noise_kwargs = kwargs.get("noise_kwargs", None) - self.velocity_perturbation_kwargs = kwargs.get("vel_pert_kwargs", None) - self.mask_kwargs = kwargs.get("mask_kwargs", None) - self.measure_time = kwargs.get("measure_time", False) - self.callback = kwargs.get("callback", None) - self.return_output = kwargs.get("return_output", True) # Additional variables for internal state management self.fft = None @@ -88,6 +95,9 @@ def __init__(self, precip, velocity, timesteps, **kwargs): self.velocity_perturbation_perpendicular = None self.fft_objs = None self.generate_noise = None + self.decomposition_method = None + self.recomp_method = None + self.xy_coords = None # Additional variables for time measurement self.start_time_init = None @@ -95,7 +105,9 @@ def __init__(self, precip, velocity, timesteps, **kwargs): self.mainloop_time = None # Initialize number of ensemble workers - self.num_ensemble_workers = min(self.n_ens_members, self.num_workers) + self.num_ensemble_workers = min( + self.config.n_ens_members, self.config.num_workers + ) def compute_forecast(self): """ @@ -105,12 +117,12 @@ def compute_forecast(self): self._check_inputs() self._print_forecast_info() # Measure time for initialization - if self.measure_time: + if self.config.measure_time: self.start_time_init = time.time() self._initialize_nowcast_components() # Slice the precipitation field to only use the last ar_order + 1 fields - self.precip = self.precip[-(self.ar_order + 1) :, :, :].copy() + self.precip = self.precip[-(self.config.ar_order + 1) :, :, :].copy() self._perform_extrapolation() self._apply_noise_and_ar_model() @@ -118,21 +130,24 @@ def compute_forecast(self): self._initialize_precipitation_mask() self._initialize_fft_objects() # Measure and print initialization time - if self.measure_time: + if self.config.measure_time: self._measure_time("Initialization", self.start_time_init) # Run the main nowcast loop self._nowcast_main() - if self.measure_time: + if self.config.measure_time: self.precip_forecast, self.mainloop_time = self.precip_forecast # Stack and return the forecast output - if self.return_output: + if self.config.return_output: self.precip_forecast = np.stack( - [np.stack(self.precip_forecast[j]) for j in range(self.n_ens_members)] + [ + np.stack(self.precip_forecast[j]) + for j in range(self.config.n_ens_members) + ] ) - if self.measure_time: + if self.config.measure_time: return self.precip_forecast, self.init_time, self.mainloop_time else: return self.precip_forecast @@ -159,17 +174,17 @@ def _nowcast_main(self): self.velocity, state, self.timesteps, - self.extrapolation_method, + self.config.extrapolation_method, self._update_state, # Reference to the update function extrap_kwargs=self.extrapolation_kwargs, velocity_pert_gen=self.velocity_perturbations, params=params, ensemble=True, - num_ensemble_members=self.n_ens_members, - callback=self.callback, - return_output=self.return_output, + num_ensemble_members=self.config.n_ens_members, + callback=self.config.callback, + return_output=self.config.return_output, num_workers=self.num_ensemble_workers, - measure_time=self.measure_time, + measure_time=self.config.measure_time, ) def _check_inputs(self): @@ -179,7 +194,7 @@ def _check_inputs(self): if self.precip.ndim != 3: raise ValueError("precip must be a three-dimensional array") - if self.precip.shape[0] < self.ar_order + 1: + if self.precip.shape[0] < self.config.ar_order + 1: raise ValueError( f"precip.shape[0] must be at least ar_order+1, " f"but found {self.precip.shape[0]}" @@ -198,50 +213,53 @@ def _check_inputs(self): raise ValueError("timesteps must be in ascending order") if np.any(~np.isfinite(self.velocity)): raise ValueError("velocity contains non-finite values") - if self.mask_method not in ["obs", "sprog", "incremental", None]: + if self.config.mask_method not in ["obs", "sprog", "incremental", None]: raise ValueError( - f"Unknown mask method '{self.mask_method}'. " + f"Unknown mask method '{self.config.mask_method}'. " "Must be 'obs', 'sprog', 'incremental', or None." ) - if self.precip_threshold is None: - if self.conditional: + if self.config.precip_threshold is None: + if self.config.conditional: raise ValueError("conditional=True but precip_thr is not specified.") - if self.mask_method is not None: + if self.config.mask_method is not None: raise ValueError("mask_method is set but precip_thr is not specified.") - if self.probmatching_method == "mean": + if self.config.probmatching_method == "mean": raise ValueError( "probmatching_method='mean' but precip_thr is not specified." ) - if self.noise_method is not None and self.noise_stddev_adj == "auto": + if ( + self.config.noise_method is not None + and self.config.noise_stddev_adj == "auto" + ): raise ValueError( "noise_stddev_adj='auto' but precip_thr is not specified." ) - if self.noise_stddev_adj not in ["auto", "fixed", None]: + if self.config.noise_stddev_adj not in ["auto", "fixed", None]: raise ValueError( - f"Unknown noise_stddev_adj method '{self.noise_stddev_adj}'. " + f"Unknown noise_stddev_adj method '{self.config.noise_stddev_adj}'. " "Must be 'auto', 'fixed', or None." ) - if self.kmperpixel is None: - if self.velocity_perturbation_method is not None: + if self.config.kmperpixel is None: + if self.config.velocity_perturbation_method is not None: raise ValueError("vel_pert_method is set but kmperpixel=None") - if self.mask_method == "incremental": + if self.config.mask_method == "incremental": raise ValueError("mask_method='incremental' but kmperpixel=None") - if self.timestep is None: - if self.velocity_perturbation_method is not None: + if self.config.timestep is None: + if self.config.velocity_perturbation_method is not None: raise ValueError("vel_pert_method is set but timestep=None") - if self.mask_method == "incremental": + if self.config.mask_method == "incremental": raise ValueError("mask_method='incremental' but timestep=None") # Handle None values for various kwargs - if self.extrapolation_kwargs is None: + if self.config.extrapolation_kwargs is None: self.extrapolation_kwargs = {} - if self.filter_kwargs is None: + if self.config.filter_kwargs is None: self.filter_kwargs = {} - if self.noise_kwargs is None: + if self.config.noise_kwargs is None: self.noise_kwargs = {} - if self.velocity_perturbation_kwargs is None: + if self.config.velocity_perturbation_kwargs is None: self.velocity_perturbation_kwargs = {} - if self.mask_kwargs is None: + if self.config.mask_kwargs is None: self.mask_kwargs = {} print("Inputs validated and initialized successfully.") @@ -257,31 +275,33 @@ def _print_forecast_info(self): print("Inputs") print("------") print(f"input dimensions: {self.precip.shape[1]}x{self.precip.shape[2]}") - if self.kmperpixel is not None: - print(f"km/pixel: {self.kmperpixel}") - if self.timestep is not None: - print(f"time step: {self.timestep} minutes") + if self.config.kmperpixel is not None: + print(f"km/pixel: {self.config.kmperpixel}") + if self.config.timestep is not None: + print(f"time step: {self.config.timestep} minutes") print("") print("Methods") print("-------") - print(f"extrapolation: {self.extrapolation_method}") - print(f"bandpass filter: {self.bandpass_filter_method}") - print(f"decomposition: {self.decomposition_method}") - print(f"noise generator: {self.noise_method}") + print(f"extrapolation: {self.config.extrapolation_method}") + print(f"bandpass filter: {self.config.bandpass_filter_method}") + print(f"decomposition: {self.config.decomposition_method}") + print(f"noise generator: {self.config.noise_method}") print( "noise adjustment: {}".format( - ("yes" if self.noise_stddev_adj else "no") + ("yes" if self.config.noise_stddev_adj else "no") ) ) - print(f"velocity perturbator: {self.velocity_perturbation_method}") + print(f"velocity perturbator: {self.config.velocity_perturbation_method}") print( - "conditional statistics: {}".format(("yes" if self.conditional else "no")) + "conditional statistics: {}".format( + ("yes" if self.config.conditional else "no") + ) ) - print(f"precip. mask method: {self.mask_method}") - print(f"probability matching: {self.probmatching_method}") - print(f"FFT method: {self.fft_method}") - print(f"domain: {self.domain}") + print(f"precip. mask method: {self.config.mask_method}") + print(f"probability matching: {self.config.probmatching_method}") + print(f"FFT method: {self.config.fft_method}") + print(f"domain: {self.config.domain}") print("") print("Parameters") @@ -290,12 +310,12 @@ def _print_forecast_info(self): print(f"number of time steps: {self.timesteps}") else: print(f"time steps: {self.timesteps}") - print(f"ensemble size: {self.n_ens_members}") - print(f"parallel threads: {self.num_workers}") - print(f"number of cascade levels: {self.n_cascade_levels}") - print(f"order of the AR(p) model: {self.ar_order}") + print(f"ensemble size: {self.config.n_ens_members}") + print(f"parallel threads: {self.config.num_workers}") + print(f"number of cascade levels: {self.config.n_cascade_levels}") + print(f"order of the AR(p) model: {self.config.ar_order}") - if self.velocity_perturbation_method == "bps": + if self.config.velocity_perturbation_method == "bps": self.velocity_perturbation_parallel = self.velocity_perturbation_kwargs.get( "p_par", noise.motion.get_default_params_bps_par() ) @@ -311,8 +331,8 @@ def _print_forecast_info(self): f"velocity perturbations, perpendicular: {self.velocity_perturbation_perpendicular[0]},{self.velocity_perturbation_perpendicular[1]},{self.velocity_perturbation_perpendicular[2]}" ) - if self.precip_threshold is not None: - print(f"precip. intensity threshold: {self.precip_threshold}") + if self.config.precip_threshold is not None: + print(f"precip. intensity threshold: {self.config.precip_threshold}") def _initialize_nowcast_components(self): """ @@ -322,22 +342,24 @@ def _initialize_nowcast_components(self): # Initialize FFT method self.fft = utils.get_method( - self.fft_method, shape=(M, N), n_threads=self.num_workers + self.config.fft_method, shape=(M, N), n_threads=self.config.num_workers ) # Initialize the band-pass filter for the cascade decomposition - filter_method = cascade.get_method(self.bandpass_filter_method) + filter_method = cascade.get_method(self.config.bandpass_filter_method) self.bandpass_filter = filter_method( - (M, N), self.n_cascade_levels, **(self.filter_kwargs or {}) + (M, N), self.config.n_cascade_levels, **(self.filter_kwargs or {}) ) # Get the decomposition method (e.g., FFT) self.decomposition_method, self.recomp_method = cascade.get_method( - self.decomposition_method + self.config.decomposition_method ) # Get the extrapolation method (e.g., semilagrangian) - self.extrapolator_method = extrapolation.get_method(self.extrapolation_method) + self.extrapolator_method = extrapolation.get_method( + self.config.extrapolation_method + ) # Generate the mesh grid for spatial coordinates x_values, y_values = np.meshgrid(np.arange(N), np.arange(M)) @@ -356,10 +378,10 @@ def _perform_extrapolation(self): them in time. This prepares the precipitation fields for autoregressive modeling. """ # Determine the precipitation threshold mask if conditional is set - if self.conditional: + if self.config.conditional: self.mask_threshold = np.logical_and.reduce( [ - self.precip[i, :, :] >= self.precip_threshold + self.precip[i, :, :] >= self.config.precip_threshold for i in range(self.precip.shape[0]) ] ) @@ -379,12 +401,12 @@ def _extrapolate_single_field(precip, i): return self.extrapolator_method( precip[i, :, :], self.velocity, - self.ar_order - i, + self.config.ar_order - i, "min", **extrap_kwargs, )[-1] - for i in range(self.ar_order): + for i in range(self.config.ar_order): if ( not DASK_IMPORTED ): # If Dask is not available, perform sequential extrapolation @@ -418,10 +440,10 @@ def _apply_noise_and_ar_model(self): self.precip = precip # Initialize the noise generator if the noise_method is provided - if self.noise_method is not None: - np.random.seed(self.seed) # Set the random seed for reproducibility + if self.config.noise_method is not None: + np.random.seed(self.config.seed) # Set the random seed for reproducibility init_noise, self.generate_noise = noise.get_method( - self.noise_method + self.config.noise_method ) # Get noise methods # Initialize the perturbation generator for the precipitation field @@ -430,46 +452,46 @@ def _apply_noise_and_ar_model(self): ) # Handle noise standard deviation adjustments if necessary - if self.noise_stddev_adj == "auto": + if self.config.noise_stddev_adj == "auto": print("Computing noise adjustment coefficients... ", end="", flush=True) - if self.measure_time: + if self.config.measure_time: starttime = time.time() # Compute noise adjustment coefficients self.noise_std_coeffs = noise.utils.compute_noise_stddev_adjs( self.precip[-1, :, :], - self.precip_threshold, + self.config.precip_threshold, np.min(self.precip), self.bandpass_filter, self.decomposition_method, self.perturbation_generator, - self.generate_noise, + self.config.generate_noise, 20, - conditional=self.conditional, - num_workers=self.num_workers, - seed=self.seed, + conditional=self.config.conditional, + num_workers=self.config.num_workers, + seed=self.config.seed, ) # Measure and print time taken - if self.measure_time: + if self.config.measure_time: self._measure_time( "Noise adjustment coefficient computation", starttime ) else: print("done.") - elif self.noise_stddev_adj == "fixed": + elif self.config.noise_stddev_adj == "fixed": # Set fixed noise adjustment coefficients func = lambda k: 1.0 / (0.75 + 0.09 * k) self.noise_std_coeffs = [ - func(k) for k in range(1, self.n_cascade_levels + 1) + func(k) for k in range(1, self.config.n_cascade_levels + 1) ] else: # Default to no adjustment - self.noise_std_coeffs = np.ones(self.n_cascade_levels) + self.noise_std_coeffs = np.ones(self.config.n_cascade_levels) - if self.noise_stddev_adj is not None: + if self.config.noise_stddev_adj is not None: # Print noise std deviation coefficients if adjustments were made print(f"noise std. dev. coeffs: {str(self.noise_std_coeffs)}") @@ -477,18 +499,18 @@ def _apply_noise_and_ar_model(self): # No noise, so set perturbation generator and noise_std_coeffs to None self.perturbation_generator = None self.noise_std_coeffs = np.ones( - self.n_cascade_levels + self.config.n_cascade_levels ) # Keep default as 1.0 to avoid breaking AR model # Decompose the input precipitation fields self.precip_decomposed = [] - for i in range(self.ar_order + 1): + for i in range(self.config.ar_order + 1): precip_ = self.decomposition_method( self.precip[i, :, :], self.bandpass_filter, mask=self.mask_threshold, fft_method=self.fft, - output_domain=self.domain, + output_domain=self.config.domain, normalize=True, compute_stats=True, compact_output=True, @@ -497,16 +519,16 @@ def _apply_noise_and_ar_model(self): # Normalize the cascades and rearrange them into a 4D array self.precip_cascades = nowcast_utils.stack_cascades( - self.precip_decomposed, self.n_cascade_levels + self.precip_decomposed, self.config.n_cascade_levels ) self.precip_decomposed = self.precip_decomposed[-1] self.precip_decomposed = [ - self.precip_decomposed.copy() for _ in range(self.n_ens_members) + self.precip_decomposed.copy() for _ in range(self.config.n_ens_members) ] # Compute temporal autocorrelation coefficients for each cascade level - self.gamma = np.empty((self.n_cascade_levels, self.ar_order)) - for i in range(self.n_cascade_levels): + self.gamma = np.empty((self.config.n_cascade_levels, self.config.ar_order)) + for i in range(self.config.n_cascade_levels): self.gamma[i, :] = correlation.temporal_autocorrelation( self.precip_cascades[i], mask=self.mask_threshold ) @@ -514,37 +536,40 @@ def _apply_noise_and_ar_model(self): nowcast_utils.print_corrcoefs(self.gamma) # Adjust the lag-2 correlation coefficient if AR(2) model is used - if self.ar_order == 2: - for i in range(self.n_cascade_levels): + if self.config.ar_order == 2: + for i in range(self.config.n_cascade_levels): self.gamma[i, 1] = autoregression.adjust_lag2_corrcoef2( self.gamma[i, 0], self.gamma[i, 1] ) # Estimate the parameters of the AR model using autocorrelation coefficients - self.phi = np.empty((self.n_cascade_levels, self.ar_order + 1)) - for i in range(self.n_cascade_levels): + self.phi = np.empty((self.config.n_cascade_levels, self.config.ar_order + 1)) + for i in range(self.config.n_cascade_levels): self.phi[i, :] = autoregression.estimate_ar_params_yw(self.gamma[i, :]) nowcast_utils.print_ar_params(self.phi) # Discard all except the last ar_order cascades for AR model self.precip_cascades = [ - self.precip_cascades[i][-self.ar_order :] - for i in range(self.n_cascade_levels) + self.precip_cascades[i][-self.config.ar_order :] + for i in range(self.config.n_cascade_levels) ] # Stack the cascades into a list containing all ensemble members self.precip_cascades = [ - [self.precip_cascades[j].copy() for j in range(self.n_cascade_levels)] - for _ in range(self.n_ens_members) + [ + self.precip_cascades[j].copy() + for j in range(self.config.n_cascade_levels) + ] + for _ in range(self.config.n_ens_members) ] # Initialize random generators if noise_method is provided - if self.noise_method is not None: + if self.config.noise_method is not None: self.random_generator_precip = [] self.random_generator_motion = [] - for _ in range(self.n_ens_members): + for _ in range(self.config.n_ens_members): # Create random state for precipitation noise generator rs = np.random.RandomState(self.seed) self.random_generator_precip.append(rs) @@ -564,13 +589,13 @@ def _initialize_velocity_perturbations(self): Initialize the velocity perturbators for each ensemble member if the velocity perturbation method is specified. """ - if self.velocity_perturbation_method is not None: + if self.config.velocity_perturbation_method is not None: init_vel_noise, generate_vel_noise = noise.get_method( - self.velocity_perturbation_method + self.config.velocity_perturbation_method ) self.velocity_perturbations = [] - for j in range(self.n_ens_members): + for j in range(self.config.n_ens_members): kwargs = { "randstate": self.random_generator_motion[j], "p_par": self.velocity_perturbation_kwargs.get( @@ -581,10 +606,13 @@ def _initialize_velocity_perturbations(self): ), } vp = init_vel_noise( - self.velocity, 1.0 / self.kmperpixel, self.timestep, **kwargs + self.velocity, + 1.0 / self.config.kmperpixel, + self.config.timestep, + **kwargs, ) self.velocity_perturbations.append( - lambda t, vp=vp: generate_vel_noise(vp, t * self.timestep) + lambda t, vp=vp: generate_vel_noise(vp, t * self.config.timestep) ) else: self.velocity_perturbations = None @@ -594,11 +622,13 @@ def _initialize_precipitation_mask(self): """ Initialize the precipitation mask and handle different mask methods (sprog, incremental). """ - self.precip_forecast = [[] for _ in range(self.n_ens_members)] + self.precip_forecast = [[] for _ in range(self.config.n_ens_members)] - if self.probmatching_method == "mean": + if self.config.probmatching_method == "mean": self.mu_0 = np.mean( - self.precip[-1, :, :][self.precip[-1, :, :] >= self.precip_threshold] + self.precip[-1, :, :][ + self.precip[-1, :, :] >= self.config.precip_threshold + ] ) else: self.mu_0 = None @@ -609,42 +639,43 @@ def _initialize_precipitation_mask(self): self.struct = None self.mask_rim = None - if self.mask_method is not None: - self.mask_precip = self.precip[-1, :, :] >= self.precip_threshold + if self.config.mask_method is not None: + self.mask_precip = self.precip[-1, :, :] >= self.config.precip_threshold - if self.mask_method == "sprog": + if self.config.mask_method == "sprog": # Compute the wet area ratio and the precipitation mask self.war = np.sum(self.mask_precip) / ( self.precip.shape[1] * self.precip.shape[2] ) self.precip_mask = [ self.precip_cascades[0][i].copy() - for i in range(self.n_cascade_levels) + for i in range(self.config.n_cascade_levels) ] self.precip_mask_decomposed = self.precip_decomposed[0].copy() - elif self.mask_method == "incremental": + elif self.config.mask_method == "incremental": # Get mask parameters self.mask_rim = self.mask_kwargs.get("mask_rim", 10) mask_f = self.mask_kwargs.get("mask_f", 1.0) # Initialize the structuring element self.struct = generate_binary_structure(2, 1) # Expand the structuring element based on mask factor and timestep - n = mask_f * self.timestep / self.kmperpixel + n = mask_f * self.config.timestep / self.config.kmperpixel self.struct = iterate_structure(self.struct, int((n - 1) / 2.0)) # Compute and apply the dilated mask for each ensemble member self.mask_precip = nowcast_utils.compute_dilated_mask( self.mask_precip, self.struct, self.mask_rim ) self.mask_precip = [ - self.mask_precip.copy() for _ in range(self.n_ens_members) + self.mask_precip.copy() for _ in range(self.config.n_ens_members) ] else: self.mask_precip = None - if self.noise_method is None and self.precip_mask is None: + if self.config.noise_method is None and self.precip_mask is None: self.precip_mask = [ - self.precip_cascades[0][i].copy() for i in range(self.n_cascade_levels) + self.precip_cascades[0][i].copy() + for i in range(self.config.n_cascade_levels) ] print("Precipitation mask initialized successfully.") @@ -653,8 +684,10 @@ def _initialize_fft_objects(self): Initialize FFT objects for each ensemble member. """ self.fft_objs = [] - for _ in range(self.n_ens_members): - fft_obj = utils.get_method(self.fft_method, shape=self.precip.shape[1:]) + for _ in range(self.config.n_ens_members): + fft_obj = utils.get_method( + self.config.fft_method, shape=self.precip.shape[1:] + ) self.fft_objs.append(fft_obj) print("FFT objects initialized successfully.") @@ -678,24 +711,24 @@ def _initialize_params(self, precip): """ return { "decomp_method": self.decomposition_method, - "domain": self.domain, + "domain": self.config.domain, "domain_mask": self.domain_mask, "filter": self.bandpass_filter, "fft": self.fft, "generate_noise": self.generate_noise, - "mask_method": self.mask_method, + "mask_method": self.config.mask_method, "mask_rim": self.mask_rim, "mu_0": self.mu_0, - "n_cascade_levels": self.n_cascade_levels, - "n_ens_members": self.n_ens_members, - "noise_method": self.noise_method, + "n_cascade_levels": self.config.n_cascade_levels, + "n_ens_members": self.config.n_ens_members, + "noise_method": self.config.noise_method, "noise_std_coeffs": self.noise_std_coeffs, "num_ensemble_workers": self.num_ensemble_workers, "phi": self.phi, "pert_gen": self.perturbation_generator, - "probmatching_method": self.probmatching_method, + "probmatching_method": self.config.probmatching_method, "precip": precip, - "precip_thr": self.precip_threshold, + "precip_thr": self.config.precip_threshold, "recomp_method": self.recomp_method, "struct": self.struct, "war": self.war, @@ -894,10 +927,14 @@ def _measure_time(self, label, start_time): - label: A description of the part of the process being measured. - start_time: The timestamp when the process started (from time.time()). """ - if self.measure_time: + if self.config.measure_time: elapsed_time = time.time() - start_time print(f"{label} took {elapsed_time:.2f} seconds.") + def reset_states(self): + # TODO: Implement a method to reset the state of the nowcast object to make multiple iterations possible + pass + # Wrapper function to preserve backward compatibility @deprecate_args({"R": "precip", "V": "velocity", "R_thr": "precip_thr"}, "1.8.0") @@ -1128,23 +1165,19 @@ def forecast( :cite:`Seed2003`, :cite:`BPS2006`, :cite:`SPN2013`, :cite:`PCH2019b` """ - # Create an instance of the new class with all the provided arguments - nowcaster = StepsNowcaster( - precip, - velocity, - timesteps, + nowcaster_config = StepsNowcasterConfig( n_ens_members=n_ens_members, n_cascade_levels=n_cascade_levels, - precip_thr=precip_thr, + precip_threshold=precip_thr, kmperpixel=kmperpixel, timestep=timestep, - extrap_method=extrap_method, - decomp_method=decomp_method, + extrapolation_method=extrap_method, + decomposition_method=decomp_method, bandpass_filter_method=bandpass_filter_method, noise_method=noise_method, noise_stddev_adj=noise_stddev_adj, ar_order=ar_order, - vel_pert_method=vel_pert_method, + velocity_perturbation_method=vel_pert_method, conditional=conditional, probmatching_method=probmatching_method, mask_method=mask_method, @@ -1152,15 +1185,21 @@ def forecast( num_workers=num_workers, fft_method=fft_method, domain=domain, - extrap_kwargs=extrap_kwargs, + extrapolation_kwargs=extrap_kwargs, filter_kwargs=filter_kwargs, noise_kwargs=noise_kwargs, - vel_pert_kwargs=vel_pert_kwargs, + velocity_perturbation_kwargs=vel_pert_kwargs, mask_kwargs=mask_kwargs, measure_time=measure_time, callback=callback, return_output=return_output, ) + # Create an instance of the new class with all the provided arguments + nowcaster = StepsNowcaster( + precip, velocity, timesteps, steps_config=nowcaster_config + ) + forecast = nowcaster.compute_forecast() + nowcaster.reset_states() # Call the appropriate methods within the class - return nowcaster.compute_forecast() + return forecast From fa9a1efd5872d21733b1b6c8549753ce18749e7e Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Thu, 31 Oct 2024 09:53:55 +0100 Subject: [PATCH 19/65] Added config dataclass to steps nowcast, v2 --- pysteps/nowcasts/steps.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index fcdb1831f..24f0187f7 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -25,7 +25,7 @@ from pysteps.timeseries import autoregression, correlation from pysteps.nowcasts.utils import compute_percentile_mask, nowcast_main_loop -from dataclasses import dataclass, field +from dataclasses import field from typing import Optional, Dict, Any, Callable try: @@ -99,6 +99,8 @@ def __init__(self, precip, velocity, timesteps, steps_config, **kwargs): self.recomp_method = None self.xy_coords = None + self.mu_0 = None + # Additional variables for time measurement self.start_time_init = None self.init_time = None @@ -542,7 +544,7 @@ def _apply_noise_and_ar_model(self): self.gamma[i, 0], self.gamma[i, 1] ) - # Estimate the parameters of the AR model using autocorrelation coefficients + # Estimate the parameters of the AR model using auto-correlation coefficients self.phi = np.empty((self.config.n_cascade_levels, self.config.ar_order + 1)) for i in range(self.config.n_cascade_levels): self.phi[i, :] = autoregression.estimate_ar_params_yw(self.gamma[i, :]) @@ -571,12 +573,12 @@ def _apply_noise_and_ar_model(self): for _ in range(self.config.n_ens_members): # Create random state for precipitation noise generator - rs = np.random.RandomState(self.seed) + rs = np.random.RandomState(self.config.seed) self.random_generator_precip.append(rs) self.seed = rs.randint(0, high=int(1e9)) # Update seed after generating # Create random state for motion perturbations generator - rs = np.random.RandomState(self.seed) + rs = np.random.RandomState(self.config.seed) self.random_generator_motion.append(rs) self.seed = rs.randint(0, high=int(1e9)) # Update seed after generating else: From 3da1696c68ded4dd48de88352e8c345bccb0ab66 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Thu, 31 Oct 2024 10:21:45 +0100 Subject: [PATCH 20/65] Added config dataclass to steps nowcast, v3 --- pysteps/nowcasts/steps.py | 51 ++++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index 24f0187f7..7cdd0d3f1 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -25,8 +25,8 @@ from pysteps.timeseries import autoregression, correlation from pysteps.nowcasts.utils import compute_percentile_mask, nowcast_main_loop -from dataclasses import field -from typing import Optional, Dict, Any, Callable +from dataclasses import dataclass, field +from typing import Optional, Dict, Any, Callable, List try: import dask @@ -36,6 +36,7 @@ DASK_IMPORTED = False +@dataclass class StepsNowcasterConfig: n_ens_members: int = 24 n_cascade_levels: int = 6 @@ -66,14 +67,52 @@ class StepsNowcasterConfig: return_output: bool = True +@dataclass +class StepsNowcasterParams: + fft: Any = None + bandpass_filter: Any = None + decomposition_method: Any = None + recomposition_method: Any = None + noise_generator: Optional[callable] = None + perturbation_generator: Optional[callable] = None + noise_std_coeffs: Optional[np.ndarray] = None + ar_model_coefficients: Optional[np.ndarray] = None + domain_mask: Optional[np.ndarray] = None + structuring_element: Optional[np.ndarray] = None + precipitation_mean: Optional[float] = None + wet_area_ratio: Optional[float] = None + num_workers: int = 1 + + +@dataclass +class StepsNowcasterState: + precip_cascades: Optional[List[List[np.ndarray]]] = field(default_factory=list) + precip_decomposed: Optional[List[Dict[str, Any]]] = field(default_factory=list) + mask_precip: Optional[np.ndarray] = None + random_generator_precip: Optional[List[np.random.RandomState]] = field( + default_factory=list + ) + random_generator_motion: Optional[List[np.random.RandomState]] = field( + default_factory=list + ) + velocity_perturbations: Optional[List[callable]] = field(default_factory=list) + fft_objects: Optional[List[Any]] = field(default_factory=list) + + class StepsNowcaster: - def __init__(self, precip, velocity, timesteps, steps_config, **kwargs): - self.config = steps_config + def __init__(self, precip, velocity, timesteps, steps_config): # Store inputs and optional parameters self.precip = precip self.velocity = velocity self.timesteps = timesteps + # Store the config data: + self.config = steps_config + + # Store the state and params data: + self.state = StepsNowcasterState() + self.params = StepsNowcasterParams() + # Additional variables for internal state management self.fft = None self.bandpass_filter = None @@ -1201,7 +1240,7 @@ def forecast( nowcaster = StepsNowcaster( precip, velocity, timesteps, steps_config=nowcaster_config ) - forecast = nowcaster.compute_forecast() + forecast_steps_nowcast = nowcaster.compute_forecast() nowcaster.reset_states() # Call the appropriate methods within the class - return forecast + return forecast_steps_nowcast From 8c7982cd3a2f4fda7eb361ed4a073ebe180ed029 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Thu, 31 Oct 2024 10:52:54 +0100 Subject: [PATCH 21/65] Added config dataclass to steps nowcast, v4 --- pysteps/nowcasts/steps.py | 59 ++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index 7cdd0d3f1..abbc01007 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -71,17 +71,24 @@ class StepsNowcasterConfig: class StepsNowcasterParams: fft: Any = None bandpass_filter: Any = None + extrapolator_method: Any = None decomposition_method: Any = None recomposition_method: Any = None noise_generator: Optional[callable] = None perturbation_generator: Optional[callable] = None - noise_std_coeffs: Optional[np.ndarray] = None - ar_model_coefficients: Optional[np.ndarray] = None + noise_std_coefficients: Optional[np.ndarray] = None + ar_model_coefficients: Optional[np.ndarray] = None # Corresponds to phi + autocorrelation_coefficients: Optional[np.ndarray] = None # Corresponds to gamma domain_mask: Optional[np.ndarray] = None structuring_element: Optional[np.ndarray] = None precipitation_mean: Optional[float] = None wet_area_ratio: Optional[float] = None num_workers: int = 1 + generate_noise: Optional[callable] = None + xy_coordinates: Optional[np.ndarray] = None + velocity_perturbation_parallel: Optional[List[float]] = None + velocity_perturbation_perpendicular: Optional[List[float]] = None + num_ensemble_workers: int = 1 @dataclass @@ -89,6 +96,7 @@ class StepsNowcasterState: precip_cascades: Optional[List[List[np.ndarray]]] = field(default_factory=list) precip_decomposed: Optional[List[Dict[str, Any]]] = field(default_factory=list) mask_precip: Optional[np.ndarray] = None + mask_threshold: Optional[np.ndarray] = None random_generator_precip: Optional[List[np.random.RandomState]] = field( default_factory=list ) @@ -97,22 +105,19 @@ class StepsNowcasterState: ) velocity_perturbations: Optional[List[callable]] = field(default_factory=list) fft_objects: Optional[List[Any]] = field(default_factory=list) + precip_forecast: Optional[List[Any]] = field(default_factory=list) class StepsNowcaster: - def __init__(self, precip, velocity, timesteps, steps_config): + def __init__(self, precip, velocity, time_steps, steps_config): # Store inputs and optional parameters self.precip = precip self.velocity = velocity - self.timesteps = timesteps + self.tim_esteps = time_steps # Store the config data: self.config = steps_config - # Store the state and params data: - self.state = StepsNowcasterState() - self.params = StepsNowcasterParams() - # Additional variables for internal state management self.fft = None self.bandpass_filter = None @@ -138,18 +143,21 @@ def __init__(self, precip, velocity, timesteps, steps_config): self.recomp_method = None self.xy_coords = None - self.mu_0 = None + self.precipitation_mean = None + # Initialize number of ensemble workers + self.num_ensemble_workers = min( + self.config.n_ens_members, self.config.num_workers + ) + + # Store the state and params data: + self.state = StepsNowcasterState() + self.params = StepsNowcasterParams() # Additional variables for time measurement self.start_time_init = None self.init_time = None self.mainloop_time = None - # Initialize number of ensemble workers - self.num_ensemble_workers = min( - self.config.n_ens_members, self.config.num_workers - ) - def compute_forecast(self): """ Main loop for nowcast ensemble generation. This handles extrapolation, @@ -214,7 +222,7 @@ def _nowcast_main(self): precip, self.velocity, state, - self.timesteps, + self.tim_esteps, self.config.extrapolation_method, self._update_state, # Reference to the update function extrap_kwargs=self.extrapolation_kwargs, @@ -248,8 +256,8 @@ def _check_inputs(self): f"shape(precip)={self.precip.shape}, shape(velocity)={self.velocity.shape}" ) if ( - isinstance(self.timesteps, list) - and not sorted(self.timesteps) == self.timesteps + isinstance(self.tim_esteps, list) + and not sorted(self.tim_esteps) == self.tim_esteps ): raise ValueError("timesteps must be in ascending order") if np.any(~np.isfinite(self.velocity)): @@ -347,10 +355,10 @@ def _print_forecast_info(self): print("Parameters") print("----------") - if isinstance(self.timesteps, int): - print(f"number of time steps: {self.timesteps}") + if isinstance(self.tim_esteps, int): + print(f"number of time steps: {self.tim_esteps}") else: - print(f"time steps: {self.timesteps}") + print(f"time steps: {self.tim_esteps}") print(f"ensemble size: {self.config.n_ens_members}") print(f"parallel threads: {self.config.num_workers}") print(f"number of cascade levels: {self.config.n_cascade_levels}") @@ -379,6 +387,11 @@ def _initialize_nowcast_components(self): """ Initialize the FFT, bandpass filters, decomposition methods, and extrapolation method. """ + # Initialize number of ensemble workers + self.params.num_ensemble_workers = min( + self.config.n_ens_members, self.config.num_workers + ) + M, N = self.precip.shape[1:] # Extract the spatial dimensions (height, width) # Initialize FFT method @@ -666,13 +679,13 @@ def _initialize_precipitation_mask(self): self.precip_forecast = [[] for _ in range(self.config.n_ens_members)] if self.config.probmatching_method == "mean": - self.mu_0 = np.mean( + self.precipitation_mean = np.mean( self.precip[-1, :, :][ self.precip[-1, :, :] >= self.config.precip_threshold ] ) else: - self.mu_0 = None + self.precipitation_mean = None self.precip_mask = None self.precip_mask_decomposed = None @@ -759,7 +772,7 @@ def _initialize_params(self, precip): "generate_noise": self.generate_noise, "mask_method": self.config.mask_method, "mask_rim": self.mask_rim, - "mu_0": self.mu_0, + "mu_0": self.precipitation_mean, "n_cascade_levels": self.config.n_cascade_levels, "n_ens_members": self.config.n_ens_members, "noise_method": self.config.noise_method, From aa26517a711152f257ae3af5df82b1e404f4ffc8 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Thu, 31 Oct 2024 11:20:01 +0100 Subject: [PATCH 22/65] Added config dataclass to steps nowcast, fixed some assignment issues and found some first declartions in the code that where not in the init --- pysteps/nowcasts/steps.py | 86 +++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index abbc01007..d16a4e7f1 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -113,7 +113,7 @@ def __init__(self, precip, velocity, time_steps, steps_config): # Store inputs and optional parameters self.precip = precip self.velocity = velocity - self.tim_esteps = time_steps + self.time_steps = time_steps # Store the config data: self.config = steps_config @@ -121,7 +121,7 @@ def __init__(self, precip, velocity, time_steps, steps_config): # Additional variables for internal state management self.fft = None self.bandpass_filter = None - self.extrapolator_method = None + self.extrapolation_method = None self.domain_mask = None self.precip_cascades = None self.gamma = None @@ -140,14 +140,14 @@ def __init__(self, precip, velocity, time_steps, steps_config): self.fft_objs = None self.generate_noise = None self.decomposition_method = None - self.recomp_method = None + self.recomposition_method = None self.xy_coords = None - self.precipitation_mean = None - # Initialize number of ensemble workers - self.num_ensemble_workers = min( - self.config.n_ens_members, self.config.num_workers - ) + self.precip_mask = None + self.precip_mask_decomposed = None + self.war = None + self.struct = None + self.mask_rim = None # Store the state and params data: self.state = StepsNowcasterState() @@ -222,17 +222,17 @@ def _nowcast_main(self): precip, self.velocity, state, - self.tim_esteps, + self.time_steps, self.config.extrapolation_method, self._update_state, # Reference to the update function - extrap_kwargs=self.extrapolation_kwargs, + extrap_kwargs=self.config.extrapolation_kwargs, velocity_pert_gen=self.velocity_perturbations, params=params, ensemble=True, num_ensemble_members=self.config.n_ens_members, callback=self.config.callback, return_output=self.config.return_output, - num_workers=self.num_ensemble_workers, + num_workers=self.params.num_ensemble_workers, measure_time=self.config.measure_time, ) @@ -256,8 +256,8 @@ def _check_inputs(self): f"shape(precip)={self.precip.shape}, shape(velocity)={self.velocity.shape}" ) if ( - isinstance(self.tim_esteps, list) - and not sorted(self.tim_esteps) == self.tim_esteps + isinstance(self.time_steps, list) + and not sorted(self.time_steps) == self.time_steps ): raise ValueError("timesteps must be in ascending order") if np.any(~np.isfinite(self.velocity)): @@ -301,15 +301,15 @@ def _check_inputs(self): # Handle None values for various kwargs if self.config.extrapolation_kwargs is None: - self.extrapolation_kwargs = {} + self.config.extrapolation_kwargs = {} if self.config.filter_kwargs is None: - self.filter_kwargs = {} + self.config.filter_kwargs = {} if self.config.noise_kwargs is None: - self.noise_kwargs = {} + self.config.noise_kwargs = {} if self.config.velocity_perturbation_kwargs is None: - self.velocity_perturbation_kwargs = {} + self.config.velocity_perturbation_kwargs = {} if self.config.mask_kwargs is None: - self.mask_kwargs = {} + self.config.mask_kwargs = {} print("Inputs validated and initialized successfully.") @@ -355,21 +355,23 @@ def _print_forecast_info(self): print("Parameters") print("----------") - if isinstance(self.tim_esteps, int): - print(f"number of time steps: {self.tim_esteps}") + if isinstance(self.time_steps, int): + print(f"number of time steps: {self.time_steps}") else: - print(f"time steps: {self.tim_esteps}") + print(f"time steps: {self.time_steps}") print(f"ensemble size: {self.config.n_ens_members}") print(f"parallel threads: {self.config.num_workers}") print(f"number of cascade levels: {self.config.n_cascade_levels}") print(f"order of the AR(p) model: {self.config.ar_order}") if self.config.velocity_perturbation_method == "bps": - self.velocity_perturbation_parallel = self.velocity_perturbation_kwargs.get( - "p_par", noise.motion.get_default_params_bps_par() + self.velocity_perturbation_parallel = ( + self.config.velocity_perturbation_kwargs.get( + "p_par", noise.motion.get_default_params_bps_par() + ) ) self.velocity_perturbation_perpendicular = ( - self.velocity_perturbation_kwargs.get( + self.config.velocity_perturbation_kwargs.get( "p_perp", noise.motion.get_default_params_bps_perp() ) ) @@ -402,16 +404,16 @@ def _initialize_nowcast_components(self): # Initialize the band-pass filter for the cascade decomposition filter_method = cascade.get_method(self.config.bandpass_filter_method) self.bandpass_filter = filter_method( - (M, N), self.config.n_cascade_levels, **(self.filter_kwargs or {}) + (M, N), self.config.n_cascade_levels, **(self.config.filter_kwargs or {}) ) # Get the decomposition method (e.g., FFT) - self.decomposition_method, self.recomp_method = cascade.get_method( + self.decomposition_method, self.recomposition_method = cascade.get_method( self.config.decomposition_method ) # Get the extrapolation method (e.g., semilagrangian) - self.extrapolator_method = extrapolation.get_method( + self.extrapolation_method = extrapolation.get_method( self.config.extrapolation_method ) @@ -442,7 +444,7 @@ def _perform_extrapolation(self): else: self.mask_threshold = None - extrap_kwargs = self.extrapolation_kwargs.copy() + extrap_kwargs = self.config.extrapolation_kwargs.copy() extrap_kwargs["xy_coords"] = self.xy_coords extrap_kwargs["allow_nonfinite_values"] = ( True if np.any(~np.isfinite(self.precip)) else False @@ -452,7 +454,7 @@ def _perform_extrapolation(self): def _extrapolate_single_field(precip, i): # Extrapolate a single precipitation field using the velocity field - return self.extrapolator_method( + return self.extrapolation_method( precip[i, :, :], self.velocity, self.config.ar_order - i, @@ -502,7 +504,7 @@ def _apply_noise_and_ar_model(self): # Initialize the perturbation generator for the precipitation field self.perturbation_generator = init_noise( - self.precip, fft_method=self.fft, **self.noise_kwargs + self.precip, fft_method=self.fft, **self.config.noise_kwargs ) # Handle noise standard deviation adjustments if necessary @@ -627,12 +629,16 @@ def _apply_noise_and_ar_model(self): # Create random state for precipitation noise generator rs = np.random.RandomState(self.config.seed) self.random_generator_precip.append(rs) - self.seed = rs.randint(0, high=int(1e9)) # Update seed after generating + self.config.seed = rs.randint( + 0, high=int(1e9) + ) # Update seed after generating # Create random state for motion perturbations generator rs = np.random.RandomState(self.config.seed) self.random_generator_motion.append(rs) - self.seed = rs.randint(0, high=int(1e9)) # Update seed after generating + self.config.seed = rs.randint( + 0, high=int(1e9) + ) # Update seed after generating else: self.random_generator_precip = None self.random_generator_motion = None @@ -652,10 +658,10 @@ def _initialize_velocity_perturbations(self): for j in range(self.config.n_ens_members): kwargs = { "randstate": self.random_generator_motion[j], - "p_par": self.velocity_perturbation_kwargs.get( + "p_par": self.config.velocity_perturbation_kwargs.get( "p_par", self.velocity_perturbation_parallel ), - "p_perp": self.velocity_perturbation_kwargs.get( + "p_perp": self.config.velocity_perturbation_kwargs.get( "p_perp", self.velocity_perturbation_perpendicular ), } @@ -687,12 +693,6 @@ def _initialize_precipitation_mask(self): else: self.precipitation_mean = None - self.precip_mask = None - self.precip_mask_decomposed = None - self.war = None - self.struct = None - self.mask_rim = None - if self.config.mask_method is not None: self.mask_precip = self.precip[-1, :, :] >= self.config.precip_threshold @@ -709,8 +709,8 @@ def _initialize_precipitation_mask(self): elif self.config.mask_method == "incremental": # Get mask parameters - self.mask_rim = self.mask_kwargs.get("mask_rim", 10) - mask_f = self.mask_kwargs.get("mask_f", 1.0) + self.mask_rim = self.config.mask_kwargs.get("mask_rim", 10) + mask_f = self.config.mask_kwargs.get("mask_f", 1.0) # Initialize the structuring element self.struct = generate_binary_structure(2, 1) # Expand the structuring element based on mask factor and timestep @@ -783,7 +783,7 @@ def _initialize_params(self, precip): "probmatching_method": self.config.probmatching_method, "precip": precip, "precip_thr": self.config.precip_threshold, - "recomp_method": self.recomp_method, + "recomp_method": self.recomposition_method, "struct": self.struct, "war": self.war, } From ff2e2bedb6b2329701fd419d183b8eac1df98e21 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Thu, 31 Oct 2024 13:03:24 +0100 Subject: [PATCH 23/65] Fixed num_ensemble_workers bug --- pysteps/nowcasts/steps.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index d16a4e7f1..3bdba522e 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -473,7 +473,7 @@ def _extrapolate_single_field(precip, i): # If Dask is available, perform the parallel computation if DASK_IMPORTED and res: - num_workers_ = min(self.num_ensemble_workers, len(res)) + num_workers_ = min(self.params.num_ensemble_workers, len(res)) self.precip = np.stack( list(dask.compute(*res, num_workers=num_workers_)) + [self.precip[-1, :, :]] @@ -777,7 +777,7 @@ def _initialize_params(self, precip): "n_ens_members": self.config.n_ens_members, "noise_method": self.config.noise_method, "noise_std_coeffs": self.noise_std_coeffs, - "num_ensemble_workers": self.num_ensemble_workers, + "num_ensemble_workers": self.params.num_ensemble_workers, "phi": self.phi, "pert_gen": self.perturbation_generator, "probmatching_method": self.config.probmatching_method, From 2543683c758f0fd174ae2b8dd8f8761977976cb4 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Thu, 31 Oct 2024 19:08:20 +0100 Subject: [PATCH 24/65] Added params and state dataclasses --- pysteps/nowcasts/steps.py | 322 +++++++++++++++++++------------------- 1 file changed, 162 insertions(+), 160 deletions(-) diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index 3bdba522e..78457c4ca 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -71,7 +71,7 @@ class StepsNowcasterConfig: class StepsNowcasterParams: fft: Any = None bandpass_filter: Any = None - extrapolator_method: Any = None + extrapolation_method: Any = None decomposition_method: Any = None recomposition_method: Any = None noise_generator: Optional[callable] = None @@ -83,18 +83,25 @@ class StepsNowcasterParams: structuring_element: Optional[np.ndarray] = None precipitation_mean: Optional[float] = None wet_area_ratio: Optional[float] = None - num_workers: int = 1 - generate_noise: Optional[callable] = None + mask_rim: Optional[int] = None + # TODO: remove these comented fields + # num_workers: int = 1 + num_ensemble_workers: int = 1 + # generate_noise: Optional[callable] = None xy_coordinates: Optional[np.ndarray] = None velocity_perturbation_parallel: Optional[List[float]] = None velocity_perturbation_perpendicular: Optional[List[float]] = None - num_ensemble_workers: int = 1 @dataclass class StepsNowcasterState: + precip_forecast: Optional[List[Any]] = field(default_factory=list) precip_cascades: Optional[List[List[np.ndarray]]] = field(default_factory=list) precip_decomposed: Optional[List[Dict[str, Any]]] = field(default_factory=list) + # The observation mask (where the radar can observe the precipitation) + precip_mask: Optional[List[Any]] = field(default_factory=list) + precip_mask_decomposed: Optional[Dict[str, Any]] = field(default_factory=dict) + # The mask around the precipitation fields (to get only non-zero values) mask_precip: Optional[np.ndarray] = None mask_threshold: Optional[np.ndarray] = None random_generator_precip: Optional[List[np.random.RandomState]] = field( @@ -105,7 +112,6 @@ class StepsNowcasterState: ) velocity_perturbations: Optional[List[callable]] = field(default_factory=list) fft_objects: Optional[List[Any]] = field(default_factory=list) - precip_forecast: Optional[List[Any]] = field(default_factory=list) class StepsNowcaster: @@ -118,37 +124,6 @@ def __init__(self, precip, velocity, time_steps, steps_config): # Store the config data: self.config = steps_config - # Additional variables for internal state management - self.fft = None - self.bandpass_filter = None - self.extrapolation_method = None - self.domain_mask = None - self.precip_cascades = None - self.gamma = None - self.phi = None - self.perturbation_generator = None - self.noise_std_coeffs = None - self.random_generator_precip = None - self.random_generator_motion = None - self.velocity_perturbations = None - self.precip_forecast = None - self.mask_precip = None - self.mask_threshold = None - self.precip_decomposed = None - self.velocity_perturbation_parallel = None - self.velocity_perturbation_perpendicular = None - self.fft_objs = None - self.generate_noise = None - self.decomposition_method = None - self.recomposition_method = None - self.xy_coords = None - self.precipitation_mean = None - self.precip_mask = None - self.precip_mask_decomposed = None - self.war = None - self.struct = None - self.mask_rim = None - # Store the state and params data: self.state = StepsNowcasterState() self.params = StepsNowcasterParams() @@ -186,20 +161,20 @@ def compute_forecast(self): self._nowcast_main() if self.config.measure_time: - self.precip_forecast, self.mainloop_time = self.precip_forecast + self.state.precip_forecast, self.mainloop_time = self.state.precip_forecast # Stack and return the forecast output if self.config.return_output: - self.precip_forecast = np.stack( + self.state.precip_forecast = np.stack( [ - np.stack(self.precip_forecast[j]) + np.stack(self.state.precip_forecast[j]) for j in range(self.config.n_ens_members) ] ) if self.config.measure_time: - return self.precip_forecast, self.init_time, self.mainloop_time + return self.state.precip_forecast, self.init_time, self.mainloop_time else: - return self.precip_forecast + return self.state.precip_forecast else: return None @@ -211,14 +186,14 @@ def _nowcast_main(self): # Isolate the last time slice of precipitation precip = self.precip[-1, :, :] # Extract the last available precipitation field - # Prepare state and params dictionaries + # Prepare state and params dictionaries, these need to be formatted a specific way for the nowcast_main_loop state = self._initialize_state() params = self._initialize_params(precip) print("Starting nowcast computation.") # Run the nowcast main loop - self.precip_forecast = nowcast_main_loop( + self.state.precip_forecast = nowcast_main_loop( precip, self.velocity, state, @@ -226,7 +201,7 @@ def _nowcast_main(self): self.config.extrapolation_method, self._update_state, # Reference to the update function extrap_kwargs=self.config.extrapolation_kwargs, - velocity_pert_gen=self.velocity_perturbations, + velocity_pert_gen=self.state.velocity_perturbations, params=params, ensemble=True, num_ensemble_members=self.config.n_ens_members, @@ -365,21 +340,21 @@ def _print_forecast_info(self): print(f"order of the AR(p) model: {self.config.ar_order}") if self.config.velocity_perturbation_method == "bps": - self.velocity_perturbation_parallel = ( + self.params.velocity_perturbation_parallel = ( self.config.velocity_perturbation_kwargs.get( "p_par", noise.motion.get_default_params_bps_par() ) ) - self.velocity_perturbation_perpendicular = ( + self.params.velocity_perturbation_perpendicular = ( self.config.velocity_perturbation_kwargs.get( "p_perp", noise.motion.get_default_params_bps_perp() ) ) print( - f"velocity perturbations, parallel: {self.velocity_perturbation_parallel[0]},{self.velocity_perturbation_parallel[1]},{self.velocity_perturbation_parallel[2]}" + f"velocity perturbations, parallel: {self.params.velocity_perturbation_parallel[0]},{self.params.velocity_perturbation_parallel[1]},{self.params.velocity_perturbation_parallel[2]}" ) print( - f"velocity perturbations, perpendicular: {self.velocity_perturbation_perpendicular[0]},{self.velocity_perturbation_perpendicular[1]},{self.velocity_perturbation_perpendicular[2]}" + f"velocity perturbations, perpendicular: {self.params.velocity_perturbation_perpendicular[0]},{self.params.velocity_perturbation_perpendicular[1]},{self.params.velocity_perturbation_perpendicular[2]}" ) if self.config.precip_threshold is not None: @@ -397,32 +372,32 @@ def _initialize_nowcast_components(self): M, N = self.precip.shape[1:] # Extract the spatial dimensions (height, width) # Initialize FFT method - self.fft = utils.get_method( + self.params.fft = utils.get_method( self.config.fft_method, shape=(M, N), n_threads=self.config.num_workers ) # Initialize the band-pass filter for the cascade decomposition filter_method = cascade.get_method(self.config.bandpass_filter_method) - self.bandpass_filter = filter_method( + self.params.bandpass_filter = filter_method( (M, N), self.config.n_cascade_levels, **(self.config.filter_kwargs or {}) ) # Get the decomposition method (e.g., FFT) - self.decomposition_method, self.recomposition_method = cascade.get_method( - self.config.decomposition_method + self.params.decomposition_method, self.params.recomposition_method = ( + cascade.get_method(self.config.decomposition_method) ) # Get the extrapolation method (e.g., semilagrangian) - self.extrapolation_method = extrapolation.get_method( + self.params.extrapolation_method = extrapolation.get_method( self.config.extrapolation_method ) # Generate the mesh grid for spatial coordinates x_values, y_values = np.meshgrid(np.arange(N), np.arange(M)) - self.xy_coords = np.stack([x_values, y_values]) + self.params.xy_coordinates = np.stack([x_values, y_values]) # Determine the domain mask from non-finite values in the precipitation data - self.domain_mask = np.logical_or.reduce( + self.params.domain_mask = np.logical_or.reduce( [~np.isfinite(self.precip[i, :]) for i in range(self.precip.shape[0])] ) @@ -435,17 +410,17 @@ def _perform_extrapolation(self): """ # Determine the precipitation threshold mask if conditional is set if self.config.conditional: - self.mask_threshold = np.logical_and.reduce( + self.state.mask_threshold = np.logical_and.reduce( [ self.precip[i, :, :] >= self.config.precip_threshold for i in range(self.precip.shape[0]) ] ) else: - self.mask_threshold = None + self.state.mask_threshold = None extrap_kwargs = self.config.extrapolation_kwargs.copy() - extrap_kwargs["xy_coords"] = self.xy_coords + extrap_kwargs["xy_coords"] = self.params.xy_coordinates extrap_kwargs["allow_nonfinite_values"] = ( True if np.any(~np.isfinite(self.precip)) else False ) @@ -454,7 +429,7 @@ def _perform_extrapolation(self): def _extrapolate_single_field(precip, i): # Extrapolate a single precipitation field using the velocity field - return self.extrapolation_method( + return self.params.extrapolation_method( precip[i, :, :], self.velocity, self.config.ar_order - i, @@ -498,13 +473,11 @@ def _apply_noise_and_ar_model(self): # Initialize the noise generator if the noise_method is provided if self.config.noise_method is not None: np.random.seed(self.config.seed) # Set the random seed for reproducibility - init_noise, self.generate_noise = noise.get_method( - self.config.noise_method - ) # Get noise methods + init_noise, generate_noise = noise.get_method(self.config.noise_method) + self.params.noise_generator = generate_noise - # Initialize the perturbation generator for the precipitation field - self.perturbation_generator = init_noise( - self.precip, fft_method=self.fft, **self.config.noise_kwargs + self.params.perturbation_generator = init_noise( + self.precip, fft_method=self.params.fft, **self.config.noise_kwargs ) # Handle noise standard deviation adjustments if necessary @@ -514,18 +487,20 @@ def _apply_noise_and_ar_model(self): starttime = time.time() # Compute noise adjustment coefficients - self.noise_std_coeffs = noise.utils.compute_noise_stddev_adjs( - self.precip[-1, :, :], - self.config.precip_threshold, - np.min(self.precip), - self.bandpass_filter, - self.decomposition_method, - self.perturbation_generator, - self.config.generate_noise, - 20, - conditional=self.config.conditional, - num_workers=self.config.num_workers, - seed=self.config.seed, + self.params.noise_std_coefficients = ( + noise.utils.compute_noise_stddev_adjs( + self.precip[-1, :, :], + self.config.precip_threshold, + np.min(self.precip), + self.params.bandpass_filter, + self.params.decomposition_method, + self.params.perturbation_generator, + self.params.noise_generator, + 20, + conditional=self.config.conditional, + num_workers=self.config.num_workers, + seed=self.config.seed, + ) ) # Measure and print time taken @@ -539,82 +514,100 @@ def _apply_noise_and_ar_model(self): elif self.config.noise_stddev_adj == "fixed": # Set fixed noise adjustment coefficients func = lambda k: 1.0 / (0.75 + 0.09 * k) - self.noise_std_coeffs = [ + self.params.noise_std_coefficients = [ func(k) for k in range(1, self.config.n_cascade_levels + 1) ] else: # Default to no adjustment - self.noise_std_coeffs = np.ones(self.config.n_cascade_levels) + self.params.noise_std_coefficients = np.ones( + self.config.n_cascade_levels + ) if self.config.noise_stddev_adj is not None: # Print noise std deviation coefficients if adjustments were made - print(f"noise std. dev. coeffs: {str(self.noise_std_coeffs)}") + print( + f"noise std. dev. coeffs: {str(self.params.noise_std_coefficients)}" + ) else: - # No noise, so set perturbation generator and noise_std_coeffs to None - self.perturbation_generator = None - self.noise_std_coeffs = np.ones( + # No noise, so set perturbation generator and noise_std_coefficients to None + self.params.perturbation_generator = None + self.params.noise_std_coefficients = np.ones( self.config.n_cascade_levels ) # Keep default as 1.0 to avoid breaking AR model # Decompose the input precipitation fields - self.precip_decomposed = [] + self.state.precip_decomposed = [] for i in range(self.config.ar_order + 1): - precip_ = self.decomposition_method( + precip_ = self.params.decomposition_method( self.precip[i, :, :], - self.bandpass_filter, - mask=self.mask_threshold, - fft_method=self.fft, + self.params.bandpass_filter, + mask=self.state.mask_threshold, + fft_method=self.params.fft, output_domain=self.config.domain, normalize=True, compute_stats=True, compact_output=True, ) - self.precip_decomposed.append(precip_) + self.state.precip_decomposed.append(precip_) # Normalize the cascades and rearrange them into a 4D array - self.precip_cascades = nowcast_utils.stack_cascades( - self.precip_decomposed, self.config.n_cascade_levels + self.state.precip_cascades = nowcast_utils.stack_cascades( + self.state.precip_decomposed, self.config.n_cascade_levels ) - self.precip_decomposed = self.precip_decomposed[-1] - self.precip_decomposed = [ - self.precip_decomposed.copy() for _ in range(self.config.n_ens_members) + self.state.precip_decomposed = self.state.precip_decomposed[-1] + self.state.precip_decomposed = [ + self.state.precip_decomposed.copy() + for _ in range(self.config.n_ens_members) ] # Compute temporal autocorrelation coefficients for each cascade level - self.gamma = np.empty((self.config.n_cascade_levels, self.config.ar_order)) + self.params.autocorrelation_coefficients = np.empty( + (self.config.n_cascade_levels, self.config.ar_order) + ) for i in range(self.config.n_cascade_levels): - self.gamma[i, :] = correlation.temporal_autocorrelation( - self.precip_cascades[i], mask=self.mask_threshold + self.params.autocorrelation_coefficients[i, :] = ( + correlation.temporal_autocorrelation( + self.state.precip_cascades[i], mask=self.state.mask_threshold + ) ) - nowcast_utils.print_corrcoefs(self.gamma) + nowcast_utils.print_corrcoefs(self.params.autocorrelation_coefficients) # Adjust the lag-2 correlation coefficient if AR(2) model is used if self.config.ar_order == 2: for i in range(self.config.n_cascade_levels): - self.gamma[i, 1] = autoregression.adjust_lag2_corrcoef2( - self.gamma[i, 0], self.gamma[i, 1] + self.params.autocorrelation_coefficients[i, 1] = ( + autoregression.adjust_lag2_corrcoef2( + self.params.autocorrelation_coefficients[i, 0], + self.params.autocorrelation_coefficients[i, 1], + ) ) # Estimate the parameters of the AR model using auto-correlation coefficients - self.phi = np.empty((self.config.n_cascade_levels, self.config.ar_order + 1)) + self.params.ar_model_coefficients = np.empty( + (self.config.n_cascade_levels, self.config.ar_order + 1) + ) for i in range(self.config.n_cascade_levels): - self.phi[i, :] = autoregression.estimate_ar_params_yw(self.gamma[i, :]) + self.params.ar_model_coefficients[i, :] = ( + autoregression.estimate_ar_params_yw( + self.params.autocorrelation_coefficients[i, :] + ) + ) - nowcast_utils.print_ar_params(self.phi) + nowcast_utils.print_ar_params(self.params.ar_model_coefficients) # Discard all except the last ar_order cascades for AR model - self.precip_cascades = [ - self.precip_cascades[i][-self.config.ar_order :] + self.state.precip_cascades = [ + self.state.precip_cascades[i][-self.config.ar_order :] for i in range(self.config.n_cascade_levels) ] # Stack the cascades into a list containing all ensemble members - self.precip_cascades = [ + self.state.precip_cascades = [ [ - self.precip_cascades[j].copy() + self.state.precip_cascades[j].copy() for j in range(self.config.n_cascade_levels) ] for _ in range(self.config.n_ens_members) @@ -622,26 +615,26 @@ def _apply_noise_and_ar_model(self): # Initialize random generators if noise_method is provided if self.config.noise_method is not None: - self.random_generator_precip = [] - self.random_generator_motion = [] + self.state.random_generator_precip = [] + self.state.random_generator_motion = [] for _ in range(self.config.n_ens_members): # Create random state for precipitation noise generator rs = np.random.RandomState(self.config.seed) - self.random_generator_precip.append(rs) + self.state.random_generator_precip.append(rs) self.config.seed = rs.randint( 0, high=int(1e9) ) # Update seed after generating # Create random state for motion perturbations generator rs = np.random.RandomState(self.config.seed) - self.random_generator_motion.append(rs) + self.state.random_generator_motion.append(rs) self.config.seed = rs.randint( 0, high=int(1e9) ) # Update seed after generating else: - self.random_generator_precip = None - self.random_generator_motion = None + self.state.random_generator_precip = None + self.state.random_generator_motion = None print("AR model and noise applied to precipitation cascades.") def _initialize_velocity_perturbations(self): @@ -654,15 +647,15 @@ def _initialize_velocity_perturbations(self): self.config.velocity_perturbation_method ) - self.velocity_perturbations = [] + self.state.velocity_perturbations = [] for j in range(self.config.n_ens_members): kwargs = { - "randstate": self.random_generator_motion[j], + "randstate": self.state.random_generator_motion[j], "p_par": self.config.velocity_perturbation_kwargs.get( - "p_par", self.velocity_perturbation_parallel + "p_par", self.params.velocity_perturbation_parallel ), "p_perp": self.config.velocity_perturbation_kwargs.get( - "p_perp", self.velocity_perturbation_perpendicular + "p_perp", self.params.velocity_perturbation_perpendicular ), } vp = init_vel_noise( @@ -671,64 +664,73 @@ def _initialize_velocity_perturbations(self): self.config.timestep, **kwargs, ) - self.velocity_perturbations.append( + self.state.velocity_perturbations.append( lambda t, vp=vp: generate_vel_noise(vp, t * self.config.timestep) ) else: - self.velocity_perturbations = None + self.state.velocity_perturbations = None print("Velocity perturbations initialized successfully.") def _initialize_precipitation_mask(self): """ Initialize the precipitation mask and handle different mask methods (sprog, incremental). """ - self.precip_forecast = [[] for _ in range(self.config.n_ens_members)] + self.state.precip_forecast = [[] for _ in range(self.config.n_ens_members)] if self.config.probmatching_method == "mean": - self.precipitation_mean = np.mean( + self.params.precipitation_mean = np.mean( self.precip[-1, :, :][ self.precip[-1, :, :] >= self.config.precip_threshold ] ) else: - self.precipitation_mean = None + self.params.precipitation_mean = None if self.config.mask_method is not None: - self.mask_precip = self.precip[-1, :, :] >= self.config.precip_threshold + self.state.mask_precip = ( + self.precip[-1, :, :] >= self.config.precip_threshold + ) if self.config.mask_method == "sprog": # Compute the wet area ratio and the precipitation mask - self.war = np.sum(self.mask_precip) / ( + self.params.wet_area_ratio = np.sum(self.state.mask_precip) / ( self.precip.shape[1] * self.precip.shape[2] ) - self.precip_mask = [ - self.precip_cascades[0][i].copy() + self.state.precip_mask = [ + self.state.precip_cascades[0][i].copy() for i in range(self.config.n_cascade_levels) ] - self.precip_mask_decomposed = self.precip_decomposed[0].copy() + self.state.precip_mask_decomposed = self.state.precip_decomposed[ + 0 + ].copy() elif self.config.mask_method == "incremental": # Get mask parameters - self.mask_rim = self.config.mask_kwargs.get("mask_rim", 10) + self.params.mask_rim = self.config.mask_kwargs.get("mask_rim", 10) mask_f = self.config.mask_kwargs.get("mask_f", 1.0) # Initialize the structuring element - self.struct = generate_binary_structure(2, 1) + self.params.structuring_element = generate_binary_structure(2, 1) # Expand the structuring element based on mask factor and timestep n = mask_f * self.config.timestep / self.config.kmperpixel - self.struct = iterate_structure(self.struct, int((n - 1) / 2.0)) + self.params.structuring_element = iterate_structure( + self.params.structuring_element, int((n - 1) / 2.0) + ) # Compute and apply the dilated mask for each ensemble member - self.mask_precip = nowcast_utils.compute_dilated_mask( - self.mask_precip, self.struct, self.mask_rim + self.state.mask_precip = nowcast_utils.compute_dilated_mask( + self.state.mask_precip, + self.params.structuring_element, + self.params.mask_rim, ) - self.mask_precip = [ - self.mask_precip.copy() for _ in range(self.config.n_ens_members) + self.state.mask_precip = [ + self.state.mask_precip.copy() + for _ in range(self.config.n_ens_members) ] else: - self.mask_precip = None + self.state.mask_precip = None - if self.config.noise_method is None and self.precip_mask is None: - self.precip_mask = [ - self.precip_cascades[0][i].copy() + if self.config.noise_method is None and self.state.precip_mask is None: + self.state.precip_mask = [ + self.state.precip_cascades[0][i].copy() for i in range(self.config.n_cascade_levels) ] print("Precipitation mask initialized successfully.") @@ -737,12 +739,12 @@ def _initialize_fft_objects(self): """ Initialize FFT objects for each ensemble member. """ - self.fft_objs = [] + self.state.fft_objs = [] for _ in range(self.config.n_ens_members): fft_obj = utils.get_method( self.config.fft_method, shape=self.precip.shape[1:] ) - self.fft_objs.append(fft_obj) + self.state.fft_objs.append(fft_obj) print("FFT objects initialized successfully.") def _initialize_state(self): @@ -750,13 +752,13 @@ def _initialize_state(self): Initialize the state dictionary used during the nowcast iteration. """ return { - "fft_objs": self.fft_objs, - "mask_prec": self.mask_precip, - "precip_cascades": self.precip_cascades, - "precip_decomp": self.precip_decomposed, - "precip_m": self.precip_mask, - "precip_m_d": self.precip_mask_decomposed, - "randgen_prec": self.random_generator_precip, + "fft_objs": self.state.fft_objs, + "mask_prec": self.state.mask_precip, + "precip_cascades": self.state.precip_cascades, + "precip_decomp": self.state.precip_decomposed, + "precip_m": self.state.precip_mask, + "precip_m_d": self.state.precip_mask_decomposed, + "randgen_prec": self.state.random_generator_precip, } def _initialize_params(self, precip): @@ -764,28 +766,28 @@ def _initialize_params(self, precip): Initialize the params dictionary used during the nowcast iteration. """ return { - "decomp_method": self.decomposition_method, + "decomp_method": self.params.decomposition_method, "domain": self.config.domain, - "domain_mask": self.domain_mask, - "filter": self.bandpass_filter, - "fft": self.fft, - "generate_noise": self.generate_noise, + "domain_mask": self.params.domain_mask, + "filter": self.params.bandpass_filter, + "fft": self.params.fft, + "generate_noise": self.params.noise_generator, "mask_method": self.config.mask_method, - "mask_rim": self.mask_rim, - "mu_0": self.precipitation_mean, + "mask_rim": self.params.mask_rim, + "mu_0": self.params.precipitation_mean, "n_cascade_levels": self.config.n_cascade_levels, "n_ens_members": self.config.n_ens_members, "noise_method": self.config.noise_method, - "noise_std_coeffs": self.noise_std_coeffs, + "noise_std_coeffs": self.params.noise_std_coefficients, "num_ensemble_workers": self.params.num_ensemble_workers, - "phi": self.phi, - "pert_gen": self.perturbation_generator, + "phi": self.params.ar_model_coefficients, + "pert_gen": self.params.perturbation_generator, "probmatching_method": self.config.probmatching_method, "precip": precip, "precip_thr": self.config.precip_threshold, - "recomp_method": self.recomposition_method, - "struct": self.struct, - "war": self.war, + "recomp_method": self.params.recomposition_method, + "struct": self.params.structuring_element, + "war": self.params.wet_area_ratio, } def _update_state(self, state, params): From 37739de53a14b4f21e340bf1624f0efc60d06ec7 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Thu, 31 Oct 2024 19:29:24 +0100 Subject: [PATCH 25/65] Implemented a reset of the states and params to allow for multiple forecast runs in a row --- pysteps/nowcasts/steps.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index 78457c4ca..a9ced155f 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -987,9 +987,20 @@ def _measure_time(self, label, start_time): elapsed_time = time.time() - start_time print(f"{label} took {elapsed_time:.2f} seconds.") - def reset_states(self): - # TODO: Implement a method to reset the state of the nowcast object to make multiple iterations possible - pass + def reset_states_and_params(self): + """ + Reset the internal state and parameters of the nowcaster to allow multiple forecasts. + This method resets the state and params to their initial conditions without reinitializing + the inputs like precip, velocity, time_steps, or config. + """ + # Re-initialize the state and parameters + self.state = StepsNowcasterState() + self.params = StepsNowcasterParams() + + # Reset time measurement variables + self.start_time_init = None + self.init_time = None + self.mainloop_time = None # Wrapper function to preserve backward compatibility @@ -1256,6 +1267,6 @@ def forecast( precip, velocity, timesteps, steps_config=nowcaster_config ) forecast_steps_nowcast = nowcaster.compute_forecast() - nowcaster.reset_states() + nowcaster.reset_states_and_params() # Call the appropriate methods within the class return forecast_steps_nowcast From f123bdedf8318892e38a3a88f2664eabb6cbbd10 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Thu, 31 Oct 2024 19:32:11 +0100 Subject: [PATCH 26/65] Removed some redundant fields --- pysteps/nowcasts/steps.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index a9ced155f..decd42468 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -84,10 +84,7 @@ class StepsNowcasterParams: precipitation_mean: Optional[float] = None wet_area_ratio: Optional[float] = None mask_rim: Optional[int] = None - # TODO: remove these comented fields - # num_workers: int = 1 num_ensemble_workers: int = 1 - # generate_noise: Optional[callable] = None xy_coordinates: Optional[np.ndarray] = None velocity_perturbation_parallel: Optional[List[float]] = None velocity_perturbation_perpendicular: Optional[List[float]] = None From 1a71e61781f87d3e9fa4b54db937fbfed262758c Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Mon, 4 Nov 2024 11:33:08 +0100 Subject: [PATCH 27/65] Update pysteps/nowcasts/steps.py Co-authored-by: mats-knmi <145579783+mats-knmi@users.noreply.github.com> --- pysteps/nowcasts/steps.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index decd42468..ed1cd43c2 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -112,7 +112,7 @@ class StepsNowcasterState: class StepsNowcaster: - def __init__(self, precip, velocity, time_steps, steps_config): + def __init__(self, precip, velocity, time_steps, steps_config: StepsNowcasterConfig): # Store inputs and optional parameters self.precip = precip self.velocity = velocity From db953fb3b4d931ad95fcd92ebd5a0de07fde5f25 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Mon, 4 Nov 2024 11:46:11 +0100 Subject: [PATCH 28/65] Added suggested changed by Mats regarding __ and typing --- pysteps/nowcasts/steps.py | 788 +++++++++++++++++++------------------- 1 file changed, 402 insertions(+), 386 deletions(-) diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index ed1cd43c2..dedbb726a 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -26,7 +26,7 @@ from pysteps.nowcasts.utils import compute_percentile_mask, nowcast_main_loop from dataclasses import dataclass, field -from typing import Optional, Dict, Any, Callable, List +from typing import Any, Callable try: import dask @@ -40,30 +40,30 @@ class StepsNowcasterConfig: n_ens_members: int = 24 n_cascade_levels: int = 6 - precip_threshold: Optional[float] = None - kmperpixel: Optional[float] = None - timestep: Optional[float] = None + precip_threshold: float | None = None + kmperpixel: float | None = None + timestep: float | None = None extrapolation_method: str = "semilagrangian" decomposition_method: str = "fft" bandpass_filter_method: str = "gaussian" - noise_method: Optional[str] = "nonparametric" - noise_stddev_adj: Optional[str] = None + noise_method: str | None = "nonparametric" + noise_stddev_adj: str | None = None ar_order: int = 2 - velocity_perturbation_method: Optional[str] = "bps" + velocity_perturbation_method: str | None = "bps" conditional: bool = False - probmatching_method: Optional[str] = "cdf" - mask_method: Optional[str] = "incremental" - seed: Optional[int] = None + probmatching_method: str | None = "cdf" + mask_method: str | None = "incremental" + seed: int | None = None num_workers: int = 1 fft_method: str = "numpy" domain: str = "spatial" - extrapolation_kwargs: Dict[str, Any] = field(default_factory=dict) - filter_kwargs: Dict[str, Any] = field(default_factory=dict) - noise_kwargs: Dict[str, Any] = field(default_factory=dict) - velocity_perturbation_kwargs: Dict[str, Any] = field(default_factory=dict) - mask_kwargs: Dict[str, Any] = field(default_factory=dict) + extrapolation_kwargs: dict[str, Any] = field(default_factory=dict) + filter_kwargs: dict[str, Any] = field(default_factory=dict) + noise_kwargs: dict[str, Any] = field(default_factory=dict) + velocity_perturbation_kwargs: dict[str, Any] = field(default_factory=dict) + mask_kwargs: dict[str, Any] = field(default_factory=dict) measure_time: bool = False - callback: Optional[Callable[[Any], None]] = None + callback: Callable[[Any], None] | None = None return_output: bool = True @@ -74,218 +74,228 @@ class StepsNowcasterParams: extrapolation_method: Any = None decomposition_method: Any = None recomposition_method: Any = None - noise_generator: Optional[callable] = None - perturbation_generator: Optional[callable] = None - noise_std_coefficients: Optional[np.ndarray] = None - ar_model_coefficients: Optional[np.ndarray] = None # Corresponds to phi - autocorrelation_coefficients: Optional[np.ndarray] = None # Corresponds to gamma - domain_mask: Optional[np.ndarray] = None - structuring_element: Optional[np.ndarray] = None - precipitation_mean: Optional[float] = None - wet_area_ratio: Optional[float] = None - mask_rim: Optional[int] = None + noise_generator: Callable | None = None + perturbation_generator: Callable | None = None + noise_std_coefficients: np.ndarray | None = None + ar_model_coefficients: np.ndarray | None = None # Corresponds to phi + autocorrelation_coefficients: np.ndarray | None = None # Corresponds to gamma + domain_mask: np.ndarray | None = None + structuring_element: np.ndarray | None = None + precipitation_mean: float | None = None + wet_area_ratio: float | None = None + mask_rim: int | None = None num_ensemble_workers: int = 1 - xy_coordinates: Optional[np.ndarray] = None - velocity_perturbation_parallel: Optional[List[float]] = None - velocity_perturbation_perpendicular: Optional[List[float]] = None + xy_coordinates: np.ndarray | None = None + velocity_perturbation_parallel: list[float] | None = None + velocity_perturbation_perpendicular: list[float] | None = None @dataclass class StepsNowcasterState: - precip_forecast: Optional[List[Any]] = field(default_factory=list) - precip_cascades: Optional[List[List[np.ndarray]]] = field(default_factory=list) - precip_decomposed: Optional[List[Dict[str, Any]]] = field(default_factory=list) + precip_forecast: list[Any] | None = field(default_factory=list) + precip_cascades: list[list[np.ndarray]] | None = field(default_factory=list) + precip_decomposed: list[dict[str, Any]] | None = field(default_factory=list) # The observation mask (where the radar can observe the precipitation) - precip_mask: Optional[List[Any]] = field(default_factory=list) - precip_mask_decomposed: Optional[Dict[str, Any]] = field(default_factory=dict) + precip_mask: list[Any] | None = field(default_factory=list) + precip_mask_decomposed: dict[str, Any] | None = field(default_factory=dict) # The mask around the precipitation fields (to get only non-zero values) - mask_precip: Optional[np.ndarray] = None - mask_threshold: Optional[np.ndarray] = None - random_generator_precip: Optional[List[np.random.RandomState]] = field( + mask_precip: np.ndarray | None = None + mask_threshold: np.ndarray | None = None + random_generator_precip: list[np.random.RandomState] | None = field( default_factory=list ) - random_generator_motion: Optional[List[np.random.RandomState]] = field( + random_generator_motion: list[np.random.RandomState] | None = field( default_factory=list ) - velocity_perturbations: Optional[List[callable]] = field(default_factory=list) - fft_objects: Optional[List[Any]] = field(default_factory=list) + velocity_perturbations: list[Callable] | None = field(default_factory=list) + fft_objects: list[Any] | None = field(default_factory=list) class StepsNowcaster: - def __init__(self, precip, velocity, time_steps, steps_config: StepsNowcasterConfig): + def __init__( + self, precip, velocity, time_steps, steps_config: StepsNowcasterConfig + ): # Store inputs and optional parameters - self.precip = precip - self.velocity = velocity - self.time_steps = time_steps + self.__precip = precip + self.__velocity = velocity + self.__time_steps = time_steps # Store the config data: - self.config = steps_config + self.__config = steps_config # Store the state and params data: - self.state = StepsNowcasterState() - self.params = StepsNowcasterParams() + self.__state = StepsNowcasterState() + self.__params = StepsNowcasterParams() # Additional variables for time measurement - self.start_time_init = None - self.init_time = None - self.mainloop_time = None + self.__start_time_init = None + self.__init_time = None + self.__mainloop_time = None def compute_forecast(self): """ Main loop for nowcast ensemble generation. This handles extrapolation, noise application, autoregressive modeling, and recomposition of cascades. """ - self._check_inputs() - self._print_forecast_info() + self.__check_inputs() + self.__print_forecast_info() # Measure time for initialization - if self.config.measure_time: - self.start_time_init = time.time() + if self.__config.measure_time: + self.__start_time_init = time.time() - self._initialize_nowcast_components() + self.__initialize_nowcast_components() # Slice the precipitation field to only use the last ar_order + 1 fields - self.precip = self.precip[-(self.config.ar_order + 1) :, :, :].copy() + self.__precip = self.__precip[-(self.__config.ar_order + 1) :, :, :].copy() - self._perform_extrapolation() - self._apply_noise_and_ar_model() - self._initialize_velocity_perturbations() - self._initialize_precipitation_mask() - self._initialize_fft_objects() + self.__perform_extrapolation() + self.__apply_noise_and_ar_model() + self.__initialize_velocity_perturbations() + self.__initialize_precipitation_mask() + self.__initialize_fft_objects() # Measure and print initialization time - if self.config.measure_time: - self._measure_time("Initialization", self.start_time_init) + if self.__config.measure_time: + self.__measure_time("Initialization", self.__start_time_init) # Run the main nowcast loop - self._nowcast_main() + self.__nowcast_main() - if self.config.measure_time: - self.state.precip_forecast, self.mainloop_time = self.state.precip_forecast + if self.__config.measure_time: + self.__state.precip_forecast, self.__mainloop_time = ( + self.__state.precip_forecast + ) # Stack and return the forecast output - if self.config.return_output: - self.state.precip_forecast = np.stack( + if self.__config.return_output: + self.__state.precip_forecast = np.stack( [ - np.stack(self.state.precip_forecast[j]) - for j in range(self.config.n_ens_members) + np.stack(self.__state.precip_forecast[j]) + for j in range(self.__config.n_ens_members) ] ) - if self.config.measure_time: - return self.state.precip_forecast, self.init_time, self.mainloop_time + if self.__config.measure_time: + return ( + self.__state.precip_forecast, + self.__init_time, + self.__mainloop_time, + ) else: - return self.state.precip_forecast + return self.__state.precip_forecast else: return None - def _nowcast_main(self): + def __nowcast_main(self): """ Main nowcast loop that iterates through the ensemble members and time steps to generate forecasts. """ # Isolate the last time slice of precipitation - precip = self.precip[-1, :, :] # Extract the last available precipitation field + precip = self.__precip[ + -1, :, : + ] # Extract the last available precipitation field # Prepare state and params dictionaries, these need to be formatted a specific way for the nowcast_main_loop - state = self._initialize_state() - params = self._initialize_params(precip) + state = self.__initialize_state() + params = self.__initialize_params(precip) print("Starting nowcast computation.") # Run the nowcast main loop - self.state.precip_forecast = nowcast_main_loop( + self.__state.precip_forecast = nowcast_main_loop( precip, - self.velocity, + self.__velocity, state, - self.time_steps, - self.config.extrapolation_method, - self._update_state, # Reference to the update function - extrap_kwargs=self.config.extrapolation_kwargs, - velocity_pert_gen=self.state.velocity_perturbations, + self.__time_steps, + self.__config.extrapolation_method, + self.__update_state, # Reference to the update function + extrap_kwargs=self.__config.extrapolation_kwargs, + velocity_pert_gen=self.__state.velocity_perturbations, params=params, ensemble=True, - num_ensemble_members=self.config.n_ens_members, - callback=self.config.callback, - return_output=self.config.return_output, - num_workers=self.params.num_ensemble_workers, - measure_time=self.config.measure_time, + num_ensemble_members=self.__config.n_ens_members, + callback=self.__config.callback, + return_output=self.__config.return_output, + num_workers=self.__params.num_ensemble_workers, + measure_time=self.__config.measure_time, ) - def _check_inputs(self): + def __check_inputs(self): """ Validate the inputs to ensure consistency and correct shapes. """ - if self.precip.ndim != 3: + if self.__precip.ndim != 3: raise ValueError("precip must be a three-dimensional array") - if self.precip.shape[0] < self.config.ar_order + 1: + if self.__precip.shape[0] < self.__config.ar_order + 1: raise ValueError( f"precip.shape[0] must be at least ar_order+1, " - f"but found {self.precip.shape[0]}" + f"but found {self.__precip.shape[0]}" ) - if self.velocity.ndim != 3: + if self.__velocity.ndim != 3: raise ValueError("velocity must be a three-dimensional array") - if self.precip.shape[1:3] != self.velocity.shape[1:3]: + if self.__precip.shape[1:3] != self.__velocity.shape[1:3]: raise ValueError( f"Dimension mismatch between precip and velocity: " - f"shape(precip)={self.precip.shape}, shape(velocity)={self.velocity.shape}" + f"shape(precip)={self.__precip.shape}, shape(velocity)={self.__velocity.shape}" ) if ( - isinstance(self.time_steps, list) - and not sorted(self.time_steps) == self.time_steps + isinstance(self.__time_steps, list) + and not sorted(self.__time_steps) == self.__time_steps ): raise ValueError("timesteps must be in ascending order") - if np.any(~np.isfinite(self.velocity)): + if np.any(~np.isfinite(self.__velocity)): raise ValueError("velocity contains non-finite values") - if self.config.mask_method not in ["obs", "sprog", "incremental", None]: + if self.__config.mask_method not in ["obs", "sprog", "incremental", None]: raise ValueError( - f"Unknown mask method '{self.config.mask_method}'. " + f"Unknown mask method '{self.__config.mask_method}'. " "Must be 'obs', 'sprog', 'incremental', or None." ) - if self.config.precip_threshold is None: - if self.config.conditional: + if self.__config.precip_threshold is None: + if self.__config.conditional: raise ValueError("conditional=True but precip_thr is not specified.") - if self.config.mask_method is not None: + if self.__config.mask_method is not None: raise ValueError("mask_method is set but precip_thr is not specified.") - if self.config.probmatching_method == "mean": + if self.__config.probmatching_method == "mean": raise ValueError( "probmatching_method='mean' but precip_thr is not specified." ) if ( - self.config.noise_method is not None - and self.config.noise_stddev_adj == "auto" + self.__config.noise_method is not None + and self.__config.noise_stddev_adj == "auto" ): raise ValueError( "noise_stddev_adj='auto' but precip_thr is not specified." ) - if self.config.noise_stddev_adj not in ["auto", "fixed", None]: + if self.__config.noise_stddev_adj not in ["auto", "fixed", None]: raise ValueError( - f"Unknown noise_stddev_adj method '{self.config.noise_stddev_adj}'. " + f"Unknown noise_stddev_adj method '{self.__config.noise_stddev_adj}'. " "Must be 'auto', 'fixed', or None." ) - if self.config.kmperpixel is None: - if self.config.velocity_perturbation_method is not None: + if self.__config.kmperpixel is None: + if self.__config.velocity_perturbation_method is not None: raise ValueError("vel_pert_method is set but kmperpixel=None") - if self.config.mask_method == "incremental": + if self.__config.mask_method == "incremental": raise ValueError("mask_method='incremental' but kmperpixel=None") - if self.config.timestep is None: - if self.config.velocity_perturbation_method is not None: + if self.__config.timestep is None: + if self.__config.velocity_perturbation_method is not None: raise ValueError("vel_pert_method is set but timestep=None") - if self.config.mask_method == "incremental": + if self.__config.mask_method == "incremental": raise ValueError("mask_method='incremental' but timestep=None") # Handle None values for various kwargs - if self.config.extrapolation_kwargs is None: - self.config.extrapolation_kwargs = {} - if self.config.filter_kwargs is None: - self.config.filter_kwargs = {} - if self.config.noise_kwargs is None: - self.config.noise_kwargs = {} - if self.config.velocity_perturbation_kwargs is None: - self.config.velocity_perturbation_kwargs = {} - if self.config.mask_kwargs is None: - self.config.mask_kwargs = {} + if self.__config.extrapolation_kwargs is None: + self.__config.extrapolation_kwargs = {} + if self.__config.filter_kwargs is None: + self.__config.filter_kwargs = {} + if self.__config.noise_kwargs is None: + self.__config.noise_kwargs = {} + if self.__config.velocity_perturbation_kwargs is None: + self.__config.velocity_perturbation_kwargs = {} + if self.__config.mask_kwargs is None: + self.__config.mask_kwargs = {} print("Inputs validated and initialized successfully.") - def _print_forecast_info(self): + def __print_forecast_info(self): """ Print information about the forecast setup, including inputs, methods, and parameters. """ @@ -295,499 +305,505 @@ def _print_forecast_info(self): print("Inputs") print("------") - print(f"input dimensions: {self.precip.shape[1]}x{self.precip.shape[2]}") - if self.config.kmperpixel is not None: - print(f"km/pixel: {self.config.kmperpixel}") - if self.config.timestep is not None: - print(f"time step: {self.config.timestep} minutes") + print(f"input dimensions: {self.__precip.shape[1]}x{self.__precip.shape[2]}") + if self.__config.kmperpixel is not None: + print(f"km/pixel: {self.__config.kmperpixel}") + if self.__config.timestep is not None: + print(f"time step: {self.__config.timestep} minutes") print("") print("Methods") print("-------") - print(f"extrapolation: {self.config.extrapolation_method}") - print(f"bandpass filter: {self.config.bandpass_filter_method}") - print(f"decomposition: {self.config.decomposition_method}") - print(f"noise generator: {self.config.noise_method}") + print(f"extrapolation: {self.__config.extrapolation_method}") + print(f"bandpass filter: {self.__config.bandpass_filter_method}") + print(f"decomposition: {self.__config.decomposition_method}") + print(f"noise generator: {self.__config.noise_method}") print( "noise adjustment: {}".format( - ("yes" if self.config.noise_stddev_adj else "no") + ("yes" if self.__config.noise_stddev_adj else "no") ) ) - print(f"velocity perturbator: {self.config.velocity_perturbation_method}") + print(f"velocity perturbator: {self.__config.velocity_perturbation_method}") print( "conditional statistics: {}".format( - ("yes" if self.config.conditional else "no") + ("yes" if self.__config.conditional else "no") ) ) - print(f"precip. mask method: {self.config.mask_method}") - print(f"probability matching: {self.config.probmatching_method}") - print(f"FFT method: {self.config.fft_method}") - print(f"domain: {self.config.domain}") + print(f"precip. mask method: {self.__config.mask_method}") + print(f"probability matching: {self.__config.probmatching_method}") + print(f"FFT method: {self.__config.fft_method}") + print(f"domain: {self.__config.domain}") print("") print("Parameters") print("----------") - if isinstance(self.time_steps, int): - print(f"number of time steps: {self.time_steps}") + if isinstance(self.__time_steps, int): + print(f"number of time steps: {self.__time_steps}") else: - print(f"time steps: {self.time_steps}") - print(f"ensemble size: {self.config.n_ens_members}") - print(f"parallel threads: {self.config.num_workers}") - print(f"number of cascade levels: {self.config.n_cascade_levels}") - print(f"order of the AR(p) model: {self.config.ar_order}") - - if self.config.velocity_perturbation_method == "bps": - self.params.velocity_perturbation_parallel = ( - self.config.velocity_perturbation_kwargs.get( + print(f"time steps: {self.__time_steps}") + print(f"ensemble size: {self.__config.n_ens_members}") + print(f"parallel threads: {self.__config.num_workers}") + print(f"number of cascade levels: {self.__config.n_cascade_levels}") + print(f"order of the AR(p) model: {self.__config.ar_order}") + + if self.__config.velocity_perturbation_method == "bps": + self.__params.velocity_perturbation_parallel = ( + self.__config.velocity_perturbation_kwargs.get( "p_par", noise.motion.get_default_params_bps_par() ) ) - self.params.velocity_perturbation_perpendicular = ( - self.config.velocity_perturbation_kwargs.get( + self.__params.velocity_perturbation_perpendicular = ( + self.__config.velocity_perturbation_kwargs.get( "p_perp", noise.motion.get_default_params_bps_perp() ) ) print( - f"velocity perturbations, parallel: {self.params.velocity_perturbation_parallel[0]},{self.params.velocity_perturbation_parallel[1]},{self.params.velocity_perturbation_parallel[2]}" + f"velocity perturbations, parallel: {self.__params.velocity_perturbation_parallel[0]},{self.__params.velocity_perturbation_parallel[1]},{self.__params.velocity_perturbation_parallel[2]}" ) print( - f"velocity perturbations, perpendicular: {self.params.velocity_perturbation_perpendicular[0]},{self.params.velocity_perturbation_perpendicular[1]},{self.params.velocity_perturbation_perpendicular[2]}" + f"velocity perturbations, perpendicular: {self.__params.velocity_perturbation_perpendicular[0]},{self.__params.velocity_perturbation_perpendicular[1]},{self.__params.velocity_perturbation_perpendicular[2]}" ) - if self.config.precip_threshold is not None: - print(f"precip. intensity threshold: {self.config.precip_threshold}") + if self.__config.precip_threshold is not None: + print(f"precip. intensity threshold: {self.__config.precip_threshold}") - def _initialize_nowcast_components(self): + def __initialize_nowcast_components(self): """ Initialize the FFT, bandpass filters, decomposition methods, and extrapolation method. """ # Initialize number of ensemble workers - self.params.num_ensemble_workers = min( - self.config.n_ens_members, self.config.num_workers + self.__params.num_ensemble_workers = min( + self.__config.n_ens_members, self.__config.num_workers ) - M, N = self.precip.shape[1:] # Extract the spatial dimensions (height, width) + M, N = self.__precip.shape[1:] # Extract the spatial dimensions (height, width) # Initialize FFT method - self.params.fft = utils.get_method( - self.config.fft_method, shape=(M, N), n_threads=self.config.num_workers + self.__params.fft = utils.get_method( + self.__config.fft_method, shape=(M, N), n_threads=self.__config.num_workers ) # Initialize the band-pass filter for the cascade decomposition - filter_method = cascade.get_method(self.config.bandpass_filter_method) - self.params.bandpass_filter = filter_method( - (M, N), self.config.n_cascade_levels, **(self.config.filter_kwargs or {}) + filter_method = cascade.get_method(self.__config.bandpass_filter_method) + self.__params.bandpass_filter = filter_method( + (M, N), + self.__config.n_cascade_levels, + **(self.__config.filter_kwargs or {}), ) # Get the decomposition method (e.g., FFT) - self.params.decomposition_method, self.params.recomposition_method = ( - cascade.get_method(self.config.decomposition_method) + self.__params.decomposition_method, self.__params.recomposition_method = ( + cascade.get_method(self.__config.decomposition_method) ) # Get the extrapolation method (e.g., semilagrangian) - self.params.extrapolation_method = extrapolation.get_method( - self.config.extrapolation_method + self.__params.extrapolation_method = extrapolation.get_method( + self.__config.extrapolation_method ) # Generate the mesh grid for spatial coordinates x_values, y_values = np.meshgrid(np.arange(N), np.arange(M)) - self.params.xy_coordinates = np.stack([x_values, y_values]) + self.__params.xy_coordinates = np.stack([x_values, y_values]) # Determine the domain mask from non-finite values in the precipitation data - self.params.domain_mask = np.logical_or.reduce( - [~np.isfinite(self.precip[i, :]) for i in range(self.precip.shape[0])] + self.__params.domain_mask = np.logical_or.reduce( + [~np.isfinite(self.__precip[i, :]) for i in range(self.__precip.shape[0])] ) print("Nowcast components initialized successfully.") - def _perform_extrapolation(self): + def __perform_extrapolation(self): """ Extrapolate (advect) precipitation fields based on the velocity field to align them in time. This prepares the precipitation fields for autoregressive modeling. """ # Determine the precipitation threshold mask if conditional is set - if self.config.conditional: - self.state.mask_threshold = np.logical_and.reduce( + if self.__config.conditional: + self.__state.mask_threshold = np.logical_and.reduce( [ - self.precip[i, :, :] >= self.config.precip_threshold - for i in range(self.precip.shape[0]) + self.__precip[i, :, :] >= self.__config.precip_threshold + for i in range(self.__precip.shape[0]) ] ) else: - self.state.mask_threshold = None + self.__state.mask_threshold = None - extrap_kwargs = self.config.extrapolation_kwargs.copy() - extrap_kwargs["xy_coords"] = self.params.xy_coordinates + extrap_kwargs = self.__config.extrapolation_kwargs.copy() + extrap_kwargs["xy_coords"] = self.__params.xy_coordinates extrap_kwargs["allow_nonfinite_values"] = ( - True if np.any(~np.isfinite(self.precip)) else False + True if np.any(~np.isfinite(self.__precip)) else False ) res = [] - def _extrapolate_single_field(precip, i): + def __extrapolate_single_field(precip, i): # Extrapolate a single precipitation field using the velocity field - return self.params.extrapolation_method( + return self.__params.extrapolation_method( precip[i, :, :], - self.velocity, - self.config.ar_order - i, + self.__velocity, + self.__config.ar_order - i, "min", **extrap_kwargs, )[-1] - for i in range(self.config.ar_order): + for i in range(self.__config.ar_order): if ( not DASK_IMPORTED ): # If Dask is not available, perform sequential extrapolation - self.precip[i, :, :] = _extrapolate_single_field(self.precip, i) + self.__precip[i, :, :] = __extrapolate_single_field(self.__precip, i) else: # If Dask is available, accumulate delayed computations for parallel execution - res.append(dask.delayed(_extrapolate_single_field)(self.precip, i)) + res.append(dask.delayed(__extrapolate_single_field)(self.__precip, i)) # If Dask is available, perform the parallel computation if DASK_IMPORTED and res: - num_workers_ = min(self.params.num_ensemble_workers, len(res)) - self.precip = np.stack( + num_workers_ = min(self.__params.num_ensemble_workers, len(res)) + self.__precip = np.stack( list(dask.compute(*res, num_workers=num_workers_)) - + [self.precip[-1, :, :]] + + [self.__precip[-1, :, :]] ) print("Extrapolation complete and precipitation fields aligned.") - def _apply_noise_and_ar_model(self): + def __apply_noise_and_ar_model(self): """ Apply noise and autoregressive (AR) models to precipitation cascades. This method applies the AR model to the decomposed precipitation cascades and adds noise perturbations if necessary. """ # Make a copy of the precipitation data and replace non-finite values - precip = self.precip.copy() - for i in range(self.precip.shape[0]): + precip = self.__precip.copy() + for i in range(self.__precip.shape[0]): # Replace non-finite values with the minimum finite value of the precipitation field precip[i, ~np.isfinite(precip[i, :])] = np.nanmin(precip[i, :]) # Store the precipitation data back in the object - self.precip = precip + self.__precip = precip # Initialize the noise generator if the noise_method is provided - if self.config.noise_method is not None: - np.random.seed(self.config.seed) # Set the random seed for reproducibility - init_noise, generate_noise = noise.get_method(self.config.noise_method) - self.params.noise_generator = generate_noise - - self.params.perturbation_generator = init_noise( - self.precip, fft_method=self.params.fft, **self.config.noise_kwargs + if self.__config.noise_method is not None: + np.random.seed( + self.__config.seed + ) # Set the random seed for reproducibility + init_noise, generate_noise = noise.get_method(self.__config.noise_method) + self.__params.noise_generator = generate_noise + + self.__params.perturbation_generator = init_noise( + self.__precip, + fft_method=self.__params.fft, + **self.__config.noise_kwargs, ) # Handle noise standard deviation adjustments if necessary - if self.config.noise_stddev_adj == "auto": + if self.__config.noise_stddev_adj == "auto": print("Computing noise adjustment coefficients... ", end="", flush=True) - if self.config.measure_time: + if self.__config.measure_time: starttime = time.time() # Compute noise adjustment coefficients - self.params.noise_std_coefficients = ( + self.__params.noise_std_coefficients = ( noise.utils.compute_noise_stddev_adjs( - self.precip[-1, :, :], - self.config.precip_threshold, - np.min(self.precip), - self.params.bandpass_filter, - self.params.decomposition_method, - self.params.perturbation_generator, - self.params.noise_generator, + self.__precip[-1, :, :], + self.__config.precip_threshold, + np.min(self.__precip), + self.__params.bandpass_filter, + self.__params.decomposition_method, + self.__params.perturbation_generator, + self.__params.noise_generator, 20, - conditional=self.config.conditional, - num_workers=self.config.num_workers, - seed=self.config.seed, + conditional=self.__config.conditional, + num_workers=self.__config.num_workers, + seed=self.__config.seed, ) ) # Measure and print time taken - if self.config.measure_time: - self._measure_time( + if self.__config.measure_time: + self.__measure_time( "Noise adjustment coefficient computation", starttime ) else: print("done.") - elif self.config.noise_stddev_adj == "fixed": + elif self.__config.noise_stddev_adj == "fixed": # Set fixed noise adjustment coefficients func = lambda k: 1.0 / (0.75 + 0.09 * k) - self.params.noise_std_coefficients = [ - func(k) for k in range(1, self.config.n_cascade_levels + 1) + self.__params.noise_std_coefficients = [ + func(k) for k in range(1, self.__config.n_cascade_levels + 1) ] else: # Default to no adjustment - self.params.noise_std_coefficients = np.ones( - self.config.n_cascade_levels + self.__params.noise_std_coefficients = np.ones( + self.__config.n_cascade_levels ) - if self.config.noise_stddev_adj is not None: + if self.__config.noise_stddev_adj is not None: # Print noise std deviation coefficients if adjustments were made print( - f"noise std. dev. coeffs: {str(self.params.noise_std_coefficients)}" + f"noise std. dev. coeffs: {str(self.__params.noise_std_coefficients)}" ) else: # No noise, so set perturbation generator and noise_std_coefficients to None - self.params.perturbation_generator = None - self.params.noise_std_coefficients = np.ones( - self.config.n_cascade_levels + self.__params.perturbation_generator = None + self.__params.noise_std_coefficients = np.ones( + self.__config.n_cascade_levels ) # Keep default as 1.0 to avoid breaking AR model # Decompose the input precipitation fields - self.state.precip_decomposed = [] - for i in range(self.config.ar_order + 1): - precip_ = self.params.decomposition_method( - self.precip[i, :, :], - self.params.bandpass_filter, - mask=self.state.mask_threshold, - fft_method=self.params.fft, - output_domain=self.config.domain, + self.__state.precip_decomposed = [] + for i in range(self.__config.ar_order + 1): + precip_ = self.__params.decomposition_method( + self.__precip[i, :, :], + self.__params.bandpass_filter, + mask=self.__state.mask_threshold, + fft_method=self.__params.fft, + output_domain=self.__config.domain, normalize=True, compute_stats=True, compact_output=True, ) - self.state.precip_decomposed.append(precip_) + self.__state.precip_decomposed.append(precip_) # Normalize the cascades and rearrange them into a 4D array - self.state.precip_cascades = nowcast_utils.stack_cascades( - self.state.precip_decomposed, self.config.n_cascade_levels + self.__state.precip_cascades = nowcast_utils.stack_cascades( + self.__state.precip_decomposed, self.__config.n_cascade_levels ) - self.state.precip_decomposed = self.state.precip_decomposed[-1] - self.state.precip_decomposed = [ - self.state.precip_decomposed.copy() - for _ in range(self.config.n_ens_members) + self.__state.precip_decomposed = self.__state.precip_decomposed[-1] + self.__state.precip_decomposed = [ + self.__state.precip_decomposed.copy() + for _ in range(self.__config.n_ens_members) ] # Compute temporal autocorrelation coefficients for each cascade level - self.params.autocorrelation_coefficients = np.empty( - (self.config.n_cascade_levels, self.config.ar_order) + self.__params.autocorrelation_coefficients = np.empty( + (self.__config.n_cascade_levels, self.__config.ar_order) ) - for i in range(self.config.n_cascade_levels): - self.params.autocorrelation_coefficients[i, :] = ( + for i in range(self.__config.n_cascade_levels): + self.__params.autocorrelation_coefficients[i, :] = ( correlation.temporal_autocorrelation( - self.state.precip_cascades[i], mask=self.state.mask_threshold + self.__state.precip_cascades[i], mask=self.__state.mask_threshold ) ) - nowcast_utils.print_corrcoefs(self.params.autocorrelation_coefficients) + nowcast_utils.print_corrcoefs(self.__params.autocorrelation_coefficients) # Adjust the lag-2 correlation coefficient if AR(2) model is used - if self.config.ar_order == 2: - for i in range(self.config.n_cascade_levels): - self.params.autocorrelation_coefficients[i, 1] = ( + if self.__config.ar_order == 2: + for i in range(self.__config.n_cascade_levels): + self.__params.autocorrelation_coefficients[i, 1] = ( autoregression.adjust_lag2_corrcoef2( - self.params.autocorrelation_coefficients[i, 0], - self.params.autocorrelation_coefficients[i, 1], + self.__params.autocorrelation_coefficients[i, 0], + self.__params.autocorrelation_coefficients[i, 1], ) ) # Estimate the parameters of the AR model using auto-correlation coefficients - self.params.ar_model_coefficients = np.empty( - (self.config.n_cascade_levels, self.config.ar_order + 1) + self.__params.ar_model_coefficients = np.empty( + (self.__config.n_cascade_levels, self.__config.ar_order + 1) ) - for i in range(self.config.n_cascade_levels): - self.params.ar_model_coefficients[i, :] = ( + for i in range(self.__config.n_cascade_levels): + self.__params.ar_model_coefficients[i, :] = ( autoregression.estimate_ar_params_yw( - self.params.autocorrelation_coefficients[i, :] + self.__params.autocorrelation_coefficients[i, :] ) ) - nowcast_utils.print_ar_params(self.params.ar_model_coefficients) + nowcast_utils.print_ar_params(self.__params.ar_model_coefficients) # Discard all except the last ar_order cascades for AR model - self.state.precip_cascades = [ - self.state.precip_cascades[i][-self.config.ar_order :] - for i in range(self.config.n_cascade_levels) + self.__state.precip_cascades = [ + self.__state.precip_cascades[i][-self.__config.ar_order :] + for i in range(self.__config.n_cascade_levels) ] # Stack the cascades into a list containing all ensemble members - self.state.precip_cascades = [ + self.__state.precip_cascades = [ [ - self.state.precip_cascades[j].copy() - for j in range(self.config.n_cascade_levels) + self.__state.precip_cascades[j].copy() + for j in range(self.__config.n_cascade_levels) ] - for _ in range(self.config.n_ens_members) + for _ in range(self.__config.n_ens_members) ] # Initialize random generators if noise_method is provided - if self.config.noise_method is not None: - self.state.random_generator_precip = [] - self.state.random_generator_motion = [] + if self.__config.noise_method is not None: + self.__state.random_generator_precip = [] + self.__state.random_generator_motion = [] - for _ in range(self.config.n_ens_members): + for _ in range(self.__config.n_ens_members): # Create random state for precipitation noise generator - rs = np.random.RandomState(self.config.seed) - self.state.random_generator_precip.append(rs) - self.config.seed = rs.randint( + rs = np.random.RandomState(self.__config.seed) + self.__state.random_generator_precip.append(rs) + self.__config.seed = rs.randint( 0, high=int(1e9) ) # Update seed after generating # Create random state for motion perturbations generator - rs = np.random.RandomState(self.config.seed) - self.state.random_generator_motion.append(rs) - self.config.seed = rs.randint( + rs = np.random.RandomState(self.__config.seed) + self.__state.random_generator_motion.append(rs) + self.__config.seed = rs.randint( 0, high=int(1e9) ) # Update seed after generating else: - self.state.random_generator_precip = None - self.state.random_generator_motion = None + self.__state.random_generator_precip = None + self.__state.random_generator_motion = None print("AR model and noise applied to precipitation cascades.") - def _initialize_velocity_perturbations(self): + def __initialize_velocity_perturbations(self): """ Initialize the velocity perturbators for each ensemble member if the velocity perturbation method is specified. """ - if self.config.velocity_perturbation_method is not None: + if self.__config.velocity_perturbation_method is not None: init_vel_noise, generate_vel_noise = noise.get_method( - self.config.velocity_perturbation_method + self.__config.velocity_perturbation_method ) - self.state.velocity_perturbations = [] - for j in range(self.config.n_ens_members): + self.__state.velocity_perturbations = [] + for j in range(self.__config.n_ens_members): kwargs = { - "randstate": self.state.random_generator_motion[j], - "p_par": self.config.velocity_perturbation_kwargs.get( - "p_par", self.params.velocity_perturbation_parallel + "randstate": self.__state.random_generator_motion[j], + "p_par": self.__config.velocity_perturbation_kwargs.get( + "p_par", self.__params.velocity_perturbation_parallel ), - "p_perp": self.config.velocity_perturbation_kwargs.get( - "p_perp", self.params.velocity_perturbation_perpendicular + "p_perp": self.__config.velocity_perturbation_kwargs.get( + "p_perp", self.__params.velocity_perturbation_perpendicular ), } vp = init_vel_noise( - self.velocity, - 1.0 / self.config.kmperpixel, - self.config.timestep, + self.__velocity, + 1.0 / self.__config.kmperpixel, + self.__config.timestep, **kwargs, ) - self.state.velocity_perturbations.append( - lambda t, vp=vp: generate_vel_noise(vp, t * self.config.timestep) + self.__state.velocity_perturbations.append( + lambda t, vp=vp: generate_vel_noise(vp, t * self.__config.timestep) ) else: - self.state.velocity_perturbations = None + self.__state.velocity_perturbations = None print("Velocity perturbations initialized successfully.") - def _initialize_precipitation_mask(self): + def __initialize_precipitation_mask(self): """ Initialize the precipitation mask and handle different mask methods (sprog, incremental). """ - self.state.precip_forecast = [[] for _ in range(self.config.n_ens_members)] + self.__state.precip_forecast = [[] for _ in range(self.__config.n_ens_members)] - if self.config.probmatching_method == "mean": - self.params.precipitation_mean = np.mean( - self.precip[-1, :, :][ - self.precip[-1, :, :] >= self.config.precip_threshold + if self.__config.probmatching_method == "mean": + self.__params.precipitation_mean = np.mean( + self.__precip[-1, :, :][ + self.__precip[-1, :, :] >= self.__config.precip_threshold ] ) else: - self.params.precipitation_mean = None + self.__params.precipitation_mean = None - if self.config.mask_method is not None: - self.state.mask_precip = ( - self.precip[-1, :, :] >= self.config.precip_threshold + if self.__config.mask_method is not None: + self.__state.mask_precip = ( + self.__precip[-1, :, :] >= self.__config.precip_threshold ) - if self.config.mask_method == "sprog": + if self.__config.mask_method == "sprog": # Compute the wet area ratio and the precipitation mask - self.params.wet_area_ratio = np.sum(self.state.mask_precip) / ( - self.precip.shape[1] * self.precip.shape[2] + self.__params.wet_area_ratio = np.sum(self.__state.mask_precip) / ( + self.__precip.shape[1] * self.__precip.shape[2] ) - self.state.precip_mask = [ - self.state.precip_cascades[0][i].copy() - for i in range(self.config.n_cascade_levels) + self.__state.precip_mask = [ + self.__state.precip_cascades[0][i].copy() + for i in range(self.__config.n_cascade_levels) ] - self.state.precip_mask_decomposed = self.state.precip_decomposed[ + self.__state.precip_mask_decomposed = self.__state.precip_decomposed[ 0 ].copy() - elif self.config.mask_method == "incremental": + elif self.__config.mask_method == "incremental": # Get mask parameters - self.params.mask_rim = self.config.mask_kwargs.get("mask_rim", 10) - mask_f = self.config.mask_kwargs.get("mask_f", 1.0) + self.__params.mask_rim = self.__config.mask_kwargs.get("mask_rim", 10) + mask_f = self.__config.mask_kwargs.get("mask_f", 1.0) # Initialize the structuring element - self.params.structuring_element = generate_binary_structure(2, 1) + self.__params.structuring_element = generate_binary_structure(2, 1) # Expand the structuring element based on mask factor and timestep - n = mask_f * self.config.timestep / self.config.kmperpixel - self.params.structuring_element = iterate_structure( - self.params.structuring_element, int((n - 1) / 2.0) + n = mask_f * self.__config.timestep / self.__config.kmperpixel + self.__params.structuring_element = iterate_structure( + self.__params.structuring_element, int((n - 1) / 2.0) ) # Compute and apply the dilated mask for each ensemble member - self.state.mask_precip = nowcast_utils.compute_dilated_mask( - self.state.mask_precip, - self.params.structuring_element, - self.params.mask_rim, + self.__state.mask_precip = nowcast_utils.compute_dilated_mask( + self.__state.mask_precip, + self.__params.structuring_element, + self.__params.mask_rim, ) - self.state.mask_precip = [ - self.state.mask_precip.copy() - for _ in range(self.config.n_ens_members) + self.__state.mask_precip = [ + self.__state.mask_precip.copy() + for _ in range(self.__config.n_ens_members) ] else: - self.state.mask_precip = None + self.__state.mask_precip = None - if self.config.noise_method is None and self.state.precip_mask is None: - self.state.precip_mask = [ - self.state.precip_cascades[0][i].copy() - for i in range(self.config.n_cascade_levels) + if self.__config.noise_method is None and self.__state.precip_mask is None: + self.__state.precip_mask = [ + self.__state.precip_cascades[0][i].copy() + for i in range(self.__config.n_cascade_levels) ] print("Precipitation mask initialized successfully.") - def _initialize_fft_objects(self): + def __initialize_fft_objects(self): """ Initialize FFT objects for each ensemble member. """ - self.state.fft_objs = [] - for _ in range(self.config.n_ens_members): + self.__state.fft_objs = [] + for _ in range(self.__config.n_ens_members): fft_obj = utils.get_method( - self.config.fft_method, shape=self.precip.shape[1:] + self.__config.fft_method, shape=self.__precip.shape[1:] ) - self.state.fft_objs.append(fft_obj) + self.__state.fft_objs.append(fft_obj) print("FFT objects initialized successfully.") - def _initialize_state(self): + def __initialize_state(self): """ Initialize the state dictionary used during the nowcast iteration. """ return { - "fft_objs": self.state.fft_objs, - "mask_prec": self.state.mask_precip, - "precip_cascades": self.state.precip_cascades, - "precip_decomp": self.state.precip_decomposed, - "precip_m": self.state.precip_mask, - "precip_m_d": self.state.precip_mask_decomposed, - "randgen_prec": self.state.random_generator_precip, + "fft_objs": self.__state.fft_objs, + "mask_prec": self.__state.mask_precip, + "precip_cascades": self.__state.precip_cascades, + "precip_decomp": self.__state.precip_decomposed, + "precip_m": self.__state.precip_mask, + "precip_m_d": self.__state.precip_mask_decomposed, + "randgen_prec": self.__state.random_generator_precip, } - def _initialize_params(self, precip): + def __initialize_params(self, precip): """ Initialize the params dictionary used during the nowcast iteration. """ return { - "decomp_method": self.params.decomposition_method, - "domain": self.config.domain, - "domain_mask": self.params.domain_mask, - "filter": self.params.bandpass_filter, - "fft": self.params.fft, - "generate_noise": self.params.noise_generator, - "mask_method": self.config.mask_method, - "mask_rim": self.params.mask_rim, - "mu_0": self.params.precipitation_mean, - "n_cascade_levels": self.config.n_cascade_levels, - "n_ens_members": self.config.n_ens_members, - "noise_method": self.config.noise_method, - "noise_std_coeffs": self.params.noise_std_coefficients, - "num_ensemble_workers": self.params.num_ensemble_workers, - "phi": self.params.ar_model_coefficients, - "pert_gen": self.params.perturbation_generator, - "probmatching_method": self.config.probmatching_method, + "decomp_method": self.__params.decomposition_method, + "domain": self.__config.domain, + "domain_mask": self.__params.domain_mask, + "filter": self.__params.bandpass_filter, + "fft": self.__params.fft, + "generate_noise": self.__params.noise_generator, + "mask_method": self.__config.mask_method, + "mask_rim": self.__params.mask_rim, + "mu_0": self.__params.precipitation_mean, + "n_cascade_levels": self.__config.n_cascade_levels, + "n_ens_members": self.__config.n_ens_members, + "noise_method": self.__config.noise_method, + "noise_std_coeffs": self.__params.noise_std_coefficients, + "num_ensemble_workers": self.__params.num_ensemble_workers, + "phi": self.__params.ar_model_coefficients, + "pert_gen": self.__params.perturbation_generator, + "probmatching_method": self.__config.probmatching_method, "precip": precip, - "precip_thr": self.config.precip_threshold, - "recomp_method": self.params.recomposition_method, - "struct": self.params.structuring_element, - "war": self.params.wet_area_ratio, + "precip_thr": self.__config.precip_threshold, + "recomp_method": self.__params.recomposition_method, + "struct": self.__params.structuring_element, + "war": self.__params.wet_area_ratio, } - def _update_state(self, state, params): + def __update_state(self, state, params): """ Update the state during the nowcasting loop. This function handles the AR model iteration, noise generation, recomposition, and mask application for each ensemble member. @@ -796,12 +812,12 @@ def _update_state(self, state, params): # Update the deterministic AR(p) model if noise or sprog mask is used if params["noise_method"] is None or params["mask_method"] == "sprog": - self._update_deterministic_ar_model(state, params) + self.__update_deterministic_ar_model(state, params) # Worker function for each ensemble member def worker(j): - self._apply_ar_model_to_cascades(j, state, params) - precip_forecast_out[j] = self._recompose_and_apply_mask(j, state, params) + self.__apply_ar_model_to_cascades(j, state, params) + precip_forecast_out[j] = self.__recompose_and_apply_mask(j, state, params) # Use Dask for parallel execution if available if ( @@ -819,7 +835,7 @@ def worker(j): return np.stack(precip_forecast_out), state - def _update_deterministic_ar_model(self, state, params): + def __update_deterministic_ar_model(self, state, params): """ Update the deterministic AR(p) model for each cascade level if noise is disabled or if the sprog mask is used. @@ -846,14 +862,14 @@ def _update_deterministic_ar_model(self, state, params): if params["mask_method"] == "sprog": state["mask_prec"] = compute_percentile_mask(precip_m_, params["war"]) - def _apply_ar_model_to_cascades(self, j, state, params): + def __apply_ar_model_to_cascades(self, j, state, params): """ Apply the AR(p) model to the cascades for each ensemble member, including noise generation and normalization. """ # Generate noise if enabled if params["noise_method"] is not None: - eps = self._generate_and_decompose_noise(j, state, params) + eps = self.__generate_and_decompose_noise(j, state, params) else: eps = None @@ -878,7 +894,7 @@ def _apply_ar_model_to_cascades(self, j, state, params): eps = None eps_ = None - def _generate_and_decompose_noise(self, j, state, params): + def __generate_and_decompose_noise(self, j, state, params): """ Generate and decompose the noise field into cascades for a given ensemble member. """ @@ -902,7 +918,7 @@ def _generate_and_decompose_noise(self, j, state, params): return eps - def _recompose_and_apply_mask(self, j, state, params): + def __recompose_and_apply_mask(self, j, state, params): """ Recompose the precipitation field from cascades and apply the precipitation mask. """ @@ -923,7 +939,7 @@ def _recompose_and_apply_mask(self, j, state, params): # Apply the precipitation mask if params["mask_method"] is not None: - precip_forecast = self._apply_precipitation_mask( + precip_forecast = self.__apply_precipitation_mask( precip_forecast, j, state, params ) @@ -951,7 +967,7 @@ def _recompose_and_apply_mask(self, j, state, params): return precip_forecast - def _apply_precipitation_mask(self, precip_forecast, j, state, params): + def __apply_precipitation_mask(self, precip_forecast, j, state, params): """ Apply the precipitation mask to prevent new precipitation from generating in areas where it was not observed. @@ -972,7 +988,7 @@ def _apply_precipitation_mask(self, precip_forecast, j, state, params): return precip_forecast - def _measure_time(self, label, start_time): + def __measure_time(self, label, start_time): """ Measure and print the time taken for a specific part of the process. @@ -980,7 +996,7 @@ def _measure_time(self, label, start_time): - label: A description of the part of the process being measured. - start_time: The timestamp when the process started (from time.time()). """ - if self.config.measure_time: + if self.__config.measure_time: elapsed_time = time.time() - start_time print(f"{label} took {elapsed_time:.2f} seconds.") @@ -991,13 +1007,13 @@ def reset_states_and_params(self): the inputs like precip, velocity, time_steps, or config. """ # Re-initialize the state and parameters - self.state = StepsNowcasterState() - self.params = StepsNowcasterParams() + self.__state = StepsNowcasterState() + self.__params = StepsNowcasterParams() # Reset time measurement variables - self.start_time_init = None - self.init_time = None - self.mainloop_time = None + self.__start_time_init = None + self.__init_time = None + self.__mainloop_time = None # Wrapper function to preserve backward compatibility From eff9248a40da2979b93fbbb2f0fa07b59679f2fd Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Mon, 4 Nov 2024 11:50:01 +0100 Subject: [PATCH 29/65] Possible fix for static code analysis --- pysteps/nowcasts/steps.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index dedbb726a..2cd01f790 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -222,7 +222,6 @@ def __check_inputs(self): """ Validate the inputs to ensure consistency and correct shapes. """ - if self.__precip.ndim != 3: raise ValueError("precip must be a three-dimensional array") if self.__precip.shape[0] < self.__config.ar_order + 1: From 663a9a2efe8f5bb5394f9a73eceaf307a46439a3 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Mon, 4 Nov 2024 12:06:51 +0100 Subject: [PATCH 30/65] Added the needed documentation to the class --- pysteps/nowcasts/steps.py | 202 +++++++++++++++++++++++++++++++++++++- 1 file changed, 200 insertions(+), 2 deletions(-) diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index 2cd01f790..806c4082b 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -38,6 +38,166 @@ @dataclass class StepsNowcasterConfig: + """ + Parameters + ---------- + + n_ens_members: int, optional + The number of ensemble members to generate. + n_cascade_levels: int, optional + The number of cascade levels to use. Defaults to 6, see issue #385 + on GitHub. + precip_threshold: float, optional + Specifies the threshold value for minimum observable precipitation + intensity. Required if mask_method is not None or conditional is True. + kmperpixel: float, optional + Spatial resolution of the input data (kilometers/pixel). Required if + vel_pert_method is not None or mask_method is 'incremental'. + timestep: float, optional + Time step of the motion vectors (minutes). Required if vel_pert_method is + not None or mask_method is 'incremental'. + extrapolation_method: str, optional + Name of the extrapolation method to use. See the documentation of + pysteps.extrapolation.interface. + decomposition_method: {'fft'}, optional + Name of the cascade decomposition method to use. See the documentation + of pysteps.cascade.interface. + bandpass_filter_method: {'gaussian', 'uniform'}, optional + Name of the bandpass filter method to use with the cascade decomposition. + See the documentation of pysteps.cascade.interface. + noise_method: {'parametric','nonparametric','ssft','nested',None}, optional + Name of the noise generator to use for perturbating the precipitation + field. See the documentation of pysteps.noise.interface. If set to None, + no noise is generated. + noise_stddev_adj: {'auto','fixed',None}, optional + Optional adjustment for the standard deviations of the noise fields added + to each cascade level. This is done to compensate incorrect std. dev. + estimates of casace levels due to presence of no-rain areas. 'auto'=use + the method implemented in pysteps.noise.utils.compute_noise_stddev_adjs. + 'fixed'= use the formula given in :cite:`BPS2006` (eq. 6), None=disable + noise std. dev adjustment. + ar_order: int, optional + The order of the autoregressive model to use. Must be >= 1. + velocity_perturbation_method: {'bps',None}, optional + Name of the noise generator to use for perturbing the advection field. See + the documentation of pysteps.noise.interface. If set to None, the advection + field is not perturbed. + conditional: bool, optional + If set to True, compute the statistics of the precipitation field + conditionally by excluding pixels where the values are below the + threshold precip_thr. + mask_method: {'obs','sprog','incremental',None}, optional + The method to use for masking no precipitation areas in the forecast + field. The masked pixels are set to the minimum value of the observations. + 'obs' = apply precip_thr to the most recently observed precipitation + intensity field, 'sprog' = use the smoothed forecast field from S-PROG, + where the AR(p) model has been applied, 'incremental' = iteratively + buffer the mask with a certain rate (currently it is 1 km/min), + None=no masking. + probmatching_method: {'cdf','mean',None}, optional + Method for matching the statistics of the forecast field with those of + the most recently observed one. 'cdf'=map the forecast CDF to the observed + one, 'mean'=adjust only the conditional mean value of the forecast field + in precipitation areas, None=no matching applied. Using 'mean' requires + that precip_thr and mask_method are not None. + seed: int, optional + Optional seed number for the random generators. + num_workers: int, optional + The number of workers to use for parallel computation. Applicable if dask + is enabled or pyFFTW is used for computing the FFT. When num_workers>1, it + is advisable to disable OpenMP by setting the environment variable + OMP_NUM_THREADS to 1. This avoids slowdown caused by too many simultaneous + threads. + fft_method: str, optional + A string defining the FFT method to use (see utils.fft.get_method). + Defaults to 'numpy' for compatibility reasons. If pyFFTW is installed, + the recommended method is 'pyfftw'. + domain: {"spatial", "spectral"} + If "spatial", all computations are done in the spatial domain (the + classical STEPS model). If "spectral", the AR(2) models and stochastic + perturbations are applied directly in the spectral domain to reduce + memory footprint and improve performance :cite:`PCH2019b`. + extrapolation_kwargs: dict, optional + Optional dictionary containing keyword arguments for the extrapolation + method. See the documentation of pysteps.extrapolation. + filter_kwargs: dict, optional + Optional dictionary containing keyword arguments for the filter method. + See the documentation of pysteps.cascade.bandpass_filters.py. + noise_kwargs: dict, optional + Optional dictionary containing keyword arguments for the initializer of + the noise generator. See the documentation of pysteps.noise.fftgenerators. + velocity_perturbation_kwargs: dict, optional + Optional dictionary containing keyword arguments 'p_par' and 'p_perp' for + the initializer of the velocity perturbator. The choice of the optimal + parameters depends on the domain and the used optical flow method. + + Default parameters from :cite:`BPS2006`: + p_par = [10.88, 0.23, -7.68] + p_perp = [5.76, 0.31, -2.72] + + Parameters fitted to the data (optical flow/domain): + + darts/fmi: + p_par = [13.71259667, 0.15658963, -16.24368207] + p_perp = [8.26550355, 0.17820458, -9.54107834] + + darts/mch: + p_par = [24.27562298, 0.11297186, -27.30087471] + p_perp = [-7.80797846e+01, -3.38641048e-02, 7.56715304e+01] + + darts/fmi+mch: + p_par = [16.55447057, 0.14160448, -19.24613059] + p_perp = [14.75343395, 0.11785398, -16.26151612] + + lucaskanade/fmi: + p_par = [2.20837526, 0.33887032, -2.48995355] + p_perp = [2.21722634, 0.32359621, -2.57402761] + + lucaskanade/mch: + p_par = [2.56338484, 0.3330941, -2.99714349] + p_perp = [1.31204508, 0.3578426, -1.02499891] + + lucaskanade/fmi+mch: + p_par = [2.31970635, 0.33734287, -2.64972861] + p_perp = [1.90769947, 0.33446594, -2.06603662] + + vet/fmi: + p_par = [0.25337388, 0.67542291, 11.04895538] + p_perp = [0.02432118, 0.99613295, 7.40146505] + + vet/mch: + p_par = [0.5075159, 0.53895212, 7.90331791] + p_perp = [0.68025501, 0.41761289, 4.73793581] + + vet/fmi+mch: + p_par = [0.29495222, 0.62429207, 8.6804131 ] + p_perp = [0.23127377, 0.59010281, 5.98180004] + + fmi=Finland, mch=Switzerland, fmi+mch=both pooled into the same data set + + The above parameters have been fitten by using run_vel_pert_analysis.py + and fit_vel_pert_params.py located in the scripts directory. + + See pysteps.noise.motion for additional documentation. + mask_kwargs: dict + Optional dictionary containing mask keyword arguments 'mask_f' and + 'mask_rim', the factor defining the the mask increment and the rim size, + respectively. + The mask increment is defined as mask_f*timestep/kmperpixel. + measure_time: bool + If set to True, measure, print and return the computation time. + callback: function, optional + Optional function that is called after computation of each time step of + the nowcast. The function takes one argument: a three-dimensional array + of shape (n_ens_members,h,w), where h and w are the height and width + of the input precipitation fields, respectively. This can be used, for + instance, writing the outputs into files. + return_output: bool, optional + Set to False to disable returning the outputs as numpy arrays. This can + save memory if the intermediate results are written to output files using + the callback function. + """ + n_ens_members: int = 24 n_cascade_levels: int = 6 precip_threshold: float | None = None @@ -134,8 +294,46 @@ def __init__( def compute_forecast(self): """ - Main loop for nowcast ensemble generation. This handles extrapolation, - noise application, autoregressive modeling, and recomposition of cascades. + Generate a nowcast ensemble by using the Short-Term Ensemble Prediction + System (STEPS) method. + + Parameters + ---------- + precip: array-like + Array of shape (ar_order+1,m,n) containing the input precipitation fields + ordered by timestamp from oldest to newest. The time steps between the + inputs are assumed to be regular. + velocity: array-like + Array of shape (2,m,n) containing the x- and y-components of the advection + field. The velocities are assumed to represent one time step between the + inputs. All values are required to be finite. + timesteps: int or list of floats + Number of time steps to forecast or a list of time steps for which the + forecasts are computed (relative to the input time step). The elements + of the list are required to be in ascending order. + config: StepsNowcasterConfig + Provides a set of configuration parameters for the nowcast ensemble generation. + + Returns + ------- + out: ndarray + If return_output is True, a four-dimensional array of shape + (n_ens_members,num_timesteps,m,n) containing a time series of forecast + precipitation fields for each ensemble member. Otherwise, a None value + is returned. The time series starts from t0+timestep, where timestep is + taken from the input precipitation fields. If measure_time is True, the + return value is a three-element tuple containing the nowcast array, the + initialization time of the nowcast generator and the time used in the + main loop (seconds). + + See also + -------- + pysteps.extrapolation.interface, pysteps.cascade.interface, + pysteps.noise.interface, pysteps.noise.utils.compute_noise_stddev_adjs + + References + ---------- + :cite:`Seed2003`, :cite:`BPS2006`, :cite:`SPN2013`, :cite:`PCH2019b` """ self.__check_inputs() self.__print_forecast_info() From 2066f14bfd64b76b68d39e3d4b448eae05b886b5 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Mon, 18 Nov 2024 11:28:55 +0100 Subject: [PATCH 31/65] Refactored all names in the steps blending code from old to new --- pysteps/blending/steps.py | 847 ++++++++++++++++++++++++-------------- pysteps/nowcasts/steps.py | 1 - 2 files changed, 534 insertions(+), 314 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index a6ecc09f0..6069cfcab 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -616,28 +616,34 @@ def forecast( # Create an empty np array with shape [n_ens_members, rows, cols] # and fill it with the minimum value from precip (corresponding to # zero precipitation) - R_f_ = np.full( + precip_forecast = np.full( (n_ens_members, precip_shape[0], precip_shape[1]), np.nanmin(precip) ) if subtimestep_idx: if callback is not None: - if R_f_.shape[1] > 0: - callback(R_f_.squeeze()) + if precip_forecast.shape[1] > 0: + callback(precip_forecast.squeeze()) if return_output: for j in range(n_ens_members): - R_f[j].append(R_f_[j]) + R_f[j].append(precip_forecast[j]) - R_f_ = None + precip_forecast = None if measure_time: zero_precip_time = time.time() - starttime_init if return_output: - outarr = np.stack([np.stack(R_f[j]) for j in range(n_ens_members)]) + precip_forecast_all_members_all_times = np.stack( + [np.stack(R_f[j]) for j in range(n_ens_members)] + ) if measure_time: - return outarr, zero_precip_time, zero_precip_time + return ( + precip_forecast_all_members_all_times, + zero_precip_time, + zero_precip_time, + ) else: - return outarr + return precip_forecast_all_members_all_times else: return None @@ -699,7 +705,7 @@ def forecast( else: precip_noise_input = precip.copy() - pp, generate_noise, noise_std_coeffs = _init_noise( + generate_perturb, generate_noise, noise_std_coeffs = _init_noise( precip_noise_input, precip_thr, n_cascade_levels, @@ -734,18 +740,29 @@ def forecast( ) # 6. Initialize all the random generators and prepare for the forecast loop - randgen_prec, vps, generate_vel_noise = _init_random_generators( - velocity, - noise_method, - vel_pert_method, - vp_par, - vp_perp, - seed, - n_ens_members, - kmperpixel, - timestep, + randgen_precip, velocity_perturbations, generate_vel_noise = ( + _init_random_generators( + velocity, + noise_method, + vel_pert_method, + vp_par, + vp_perp, + seed, + n_ens_members, + kmperpixel, + timestep, + ) ) - D, D_Yn, D_pb, R_f, R_m, mask_rim, struct, fft_objs = _prepare_forecast_loop( + ( + previous_displacement, + previous_displacement_noise_cascade, + previous_displacement_prob_matching, + R_f, + precip_forecast_non_perturbed, + mask_rim, + struct, + fft_objs, + ) = _prepare_forecast_loop( precip_cascade, noise_method, fft_method, @@ -765,8 +782,8 @@ def forecast( n_cascade_levels=n_cascade_levels, generate_noise=generate_noise, decompositor=decompositor, - pp=pp, - randgen_prec=randgen_prec, + generate_perturb=generate_perturb, + randgen_precip=randgen_precip, fft_objs=fft_objs, bp_filter=bp_filter, domain=domain, @@ -779,8 +796,10 @@ def forecast( # 7. initizalize the current and previous extrapolation forecast scale # for the nowcasting component - rho_extr_prev = np.repeat(1.0, PHI.shape[0]) - rho_extr = PHI[:, 0] / (1.0 - PHI[:, 1]) # phi1 / (1 - phi2), see BPS2004 + rho_extrap_cascade_prev = np.repeat(1.0, PHI.shape[0]) + rho_extrap_cascade = PHI[:, 0] / ( + 1.0 - PHI[:, 1] + ) # phi1 / (1 - phi2), see BPS2004 if measure_time: init_time = time.time() - starttime_init @@ -795,11 +814,11 @@ def forecast( extrap_kwargs["return_displacement"] = True - forecast_prev = deepcopy(precip_cascade) - noise_prev = deepcopy(noise_cascade) + precip_forc_prev_subtimestep = deepcopy(precip_cascade) + noise_prev_subtimestep = deepcopy(noise_cascade) - t_prev = [0.0 for j in range(n_ens_members)] - t_total = [0.0 for j in range(n_ens_members)] + t_prev_timestep = [0.0 for j in range(n_ens_members)] + t_leadtime_since_start_forecast = [0.0 for j in range(n_ens_members)] # iterate each time step for t, subtimestep_idx in enumerate(timesteps): @@ -927,14 +946,16 @@ def forecast( # 8.1.3 Determine the skill of the components for lead time (t0 + t) # First for the extrapolation component. Only calculate it when t > 0. ( - rho_extr, - rho_extr_prev, + rho_extrap_cascade, + rho_extrap_cascade_prev, ) = blending.skill_scores.lt_dependent_cor_extrapolation( - PHI=PHI, correlations=rho_extr, correlations_prev=rho_extr_prev + PHI=PHI, + correlations=rho_extrap_cascade, + correlations_prev=rho_extrap_cascade_prev, ) # the nowcast iteration for each ensemble member - R_f_ = [None for _ in range(n_ens_members)] + precip_forecast = [None for _ in range(n_ens_members)] def worker(j): # 8.1.2 Determine the skill of the nwp components for lead time (t0 + t) @@ -951,8 +972,10 @@ def worker(j): for n_model in range(rho_nwp_models.shape[0]) ] rho_nwp_fc = np.stack(rho_nwp_fc) - # Concatenate rho_extr and rho_nwp - rho_fc = np.concatenate((rho_extr[None, :], rho_nwp_fc), axis=0) + # Concatenate rho_extrap_cascade and rho_nwp + rho_fc = np.concatenate( + (rho_extrap_cascade[None, :], rho_nwp_fc), axis=0 + ) else: rho_nwp_fc = blending.skill_scores.lt_dependent_cor_nwp( lt=(t * int(timestep)), @@ -961,9 +984,9 @@ def worker(j): n_model=n_model_indices[j], skill_kwargs=clim_kwargs, ) - # Concatenate rho_extr and rho_nwp + # Concatenate rho_extrap_cascade and rho_nwp rho_fc = np.concatenate( - (rho_extr[None, :], rho_nwp_fc[None, :]), axis=0 + (rho_extrap_cascade[None, :], rho_nwp_fc[None, :]), axis=0 ) # 8.2 Determine the weights per component @@ -991,7 +1014,7 @@ def worker(j): for i in range(n_cascade_levels): # Determine the normalized covariance matrix (containing) # the cross-correlations between the models - cov = np.corrcoef( + covariance_nwp_models = np.corrcoef( np.stack( [ precip_models_cascade_temp[ @@ -1005,7 +1028,8 @@ def worker(j): ) # Determine the weights for this cascade level weights_model_only[:, i] = calculate_weights_spn( - correlations=rho_fc[1:, i], cov=cov + correlations=rho_fc[1:, i], + covariance=covariance_nwp_models, ) else: # Same as correlation and noise is 1 - correlation @@ -1024,16 +1048,16 @@ def worker(j): # but spatially correlated noise if noise_method is not None: # generate noise field - EPS = generate_noise( - pp, - randstate=randgen_prec[j], + epsilon = generate_noise( + generate_perturb, + randstate=randgen_precip[j], fft_method=fft_objs[j], domain=domain, ) # decompose the noise field into a cascade - EPS = decompositor( - EPS, + epsilon_decomposed = decompositor( + epsilon, bp_filter, fft_method=fft_objs[j], input_domain=domain, @@ -1043,14 +1067,14 @@ def worker(j): compact_output=True, ) else: - EPS = None + epsilon_decomposed = None # 8.3.2 regress the extrapolation component to the subsequent time # step # iterate the AR(p) model for each cascade level for i in range(n_cascade_levels): # apply AR(p) process to extrapolation cascade level - if EPS is not None or vel_pert_method is not None: + if epsilon_decomposed is not None or vel_pert_method is not None: precip_cascade[j][i] = autoregression.iterate_ar_model( precip_cascade[j][i], PHI[i, :] ) @@ -1059,25 +1083,25 @@ def worker(j): else: # use the deterministic AR(p) model computed above if # perturbations are disabled - precip_cascade[j][i] = R_m[i] + precip_cascade[j][i] = precip_forecast_non_perturbed[i] # 8.3.3 regress the noise component to the subsequent time step # iterate the AR(p) model for each cascade level for i in range(n_cascade_levels): # normalize the noise cascade - if EPS is not None: - EPS_ = EPS["cascade_levels"][i] - EPS_ *= noise_std_coeffs[i] + if epsilon_decomposed is not None: + epsilon_temp = epsilon_decomposed["cascade_levels"][i] + epsilon_temp *= noise_std_coeffs[i] else: - EPS_ = None + epsilon_temp = None # apply AR(p) process to noise cascade level - # (Returns zero noise if EPS is None) + # (Returns zero noise if epsilon_decomposed is None) noise_cascade[j][i] = autoregression.iterate_ar_model( - noise_cascade[j][i], PHI[i, :], eps=EPS_ + noise_cascade[j][i], PHI[i, :], eps=epsilon_temp ) - EPS = None - EPS_ = None + epsilon_decomposed = None + epsilon_temp = None # 8.4 Perturb and blend the advection fields + advect the # extrapolation and noise cascade to the current time step @@ -1087,72 +1111,86 @@ def worker(j): extrap_kwargs_ = extrap_kwargs.copy() extrap_kwargs_noise = extrap_kwargs.copy() extrap_kwargs_pb = extrap_kwargs.copy() - velocity_pert = velocity - R_f_ep_out = [] - Yn_ep_out = [] - R_pm_ep = [] + velocity_perturbations_extrapolation = velocity + precip_forecast_extrapolated_decomp_done = [] + noise_extrapolated_decomp_done = [] + precip_forecast_extrapolated_probability_matching = [] # Extrapolate per sub time step for t_sub in subtimesteps: if t_sub > 0: - t_diff_prev_int = t_sub - int(t_sub) - if t_diff_prev_int > 0.0: - R_f_ip = [ - (1.0 - t_diff_prev_int) * forecast_prev[j][i][-1, :] - + t_diff_prev_int * precip_cascade[j][i][-1, :] + t_diff_prev_subtimestep_int = t_sub - int(t_sub) + if t_diff_prev_subtimestep_int > 0.0: + precip_forecast_cascade_subtimestep = [ + (1.0 - t_diff_prev_subtimestep_int) + * precip_forc_prev_subtimestep[j][i][-1, :] + + t_diff_prev_subtimestep_int + * precip_cascade[j][i][-1, :] for i in range(n_cascade_levels) ] - Yn_ip = [ - (1.0 - t_diff_prev_int) * noise_prev[j][i][-1, :] - + t_diff_prev_int * noise_cascade[j][i][-1, :] + noise_cascade_subtimestep = [ + (1.0 - t_diff_prev_subtimestep_int) + * noise_prev_subtimestep[j][i][-1, :] + + t_diff_prev_subtimestep_int + * noise_cascade[j][i][-1, :] for i in range(n_cascade_levels) ] else: - R_f_ip = [ - forecast_prev[j][i][-1, :] + precip_forecast_cascade_subtimestep = [ + precip_forc_prev_subtimestep[j][i][-1, :] for i in range(n_cascade_levels) ] - Yn_ip = [ - noise_prev[j][i][-1, :] for i in range(n_cascade_levels) + noise_cascade_subtimestep = [ + noise_prev_subtimestep[j][i][-1, :] + for i in range(n_cascade_levels) ] - R_f_ip = np.stack(R_f_ip) - Yn_ip = np.stack(Yn_ip) + precip_forecast_cascade_subtimestep = np.stack( + precip_forecast_cascade_subtimestep + ) + noise_cascade_subtimestep = np.stack(noise_cascade_subtimestep) - t_diff_prev = t_sub - t_prev[j] - t_total[j] += t_diff_prev + t_diff_prev_subtimestep = t_sub - t_prev_timestep[j] + t_leadtime_since_start_forecast[j] += t_diff_prev_subtimestep # compute the perturbed motion field - include the NWP # velocities and the weights. Note that we only perturb # the extrapolation velocity field, as the NWP velocity # field is present per time step if vel_pert_method is not None: - velocity_pert = velocity + generate_vel_noise( - vps[j], t_total[j] * timestep + velocity_perturbations_extrapolation = ( + velocity + + generate_vel_noise( + velocity_perturbations[j], + t_leadtime_since_start_forecast[j] * timestep, + ) ) # Stack the perturbed extrapolation and the NWP velocities if blend_nwp_members: - V_stack = np.concatenate( + velocity_stack_all = np.concatenate( ( - velocity_pert[None, :, :, :], + velocity_perturbations_extrapolation[None, :, :, :], velocity_models_temp, ), axis=0, ) else: - V_model_ = velocity_models_temp[j] - V_stack = np.concatenate( - (velocity_pert[None, :, :, :], V_model_[None, :, :, :]), + velocity_models = velocity_models_temp[j] + velocity_stack_all = np.concatenate( + ( + velocity_perturbations_extrapolation[None, :, :, :], + velocity_models[None, :, :, :], + ), axis=0, ) - V_model_ = None + velocity_models = None # Obtain a blended optical flow, using the weights of the # second cascade following eq. 24 in BPS2006 velocity_blended = blending.utils.blend_optical_flows( - flows=V_stack, + flows=velocity_stack_all, weights=weights[ :-1, 1 ], # [(extr_field, n_model_fields), cascade_level=2] @@ -1163,36 +1201,51 @@ def worker(j): # This is needed to remove the interpolation artifacts. # In addition, the number of extrapolations is greatly reduced # A. Radar Rain - R_f_ip_recomp = blending.utils.recompose_cascade( - combined_cascade=R_f_ip, - combined_mean=mu_extrapolation, - combined_sigma=sigma_extrapolation, + precip_forecast_recomp_subtimestep = ( + blending.utils.recompose_cascade( + combined_cascade=precip_forecast_cascade_subtimestep, + combined_mean=mu_extrapolation, + combined_sigma=sigma_extrapolation, + ) ) # Make sure we have values outside the mask if zero_precip_radar: - R_f_ip_recomp = np.nan_to_num( - R_f_ip_recomp, + precip_forecast_recomp_subtimestep = np.nan_to_num( + precip_forecast_recomp_subtimestep, copy=True, nan=zerovalue, posinf=zerovalue, neginf=zerovalue, ) # Put back the mask - R_f_ip_recomp[domain_mask] = np.nan - extrap_kwargs["displacement_prev"] = D[j] - R_f_ep_recomp_, D[j] = extrapolator( - R_f_ip_recomp, + precip_forecast_recomp_subtimestep[domain_mask] = np.nan + extrap_kwargs["displacement_prev"] = previous_displacement[j] + ( + precip_forecast_extrapolated_recomp_subtimestep_temp, + previous_displacement[j], + ) = extrapolator( + precip_forecast_recomp_subtimestep, velocity_blended, - [t_diff_prev], + [t_diff_prev_subtimestep], allow_nonfinite_values=True, **extrap_kwargs, ) - R_f_ep_recomp = R_f_ep_recomp_[0].copy() - temp_mask = ~np.isfinite(R_f_ep_recomp) + precip_forecast_extrapolated_recomp_subtimestep = ( + precip_forecast_extrapolated_recomp_subtimestep_temp[ + 0 + ].copy() + ) + temp_mask = ~np.isfinite( + precip_forecast_extrapolated_recomp_subtimestep + ) # TODO WHERE DO CAN I FIND THIS -15.0 - R_f_ep_recomp[~np.isfinite(R_f_ep_recomp)] = zerovalue - R_f_ep = decompositor( - R_f_ep_recomp, + precip_forecast_extrapolated_recomp_subtimestep[ + ~np.isfinite( + precip_forecast_extrapolated_recomp_subtimestep + ) + ] = zerovalue + precip_forecast_extrapolated_decomp = decompositor( + precip_forecast_extrapolated_recomp_subtimestep, bp_filter, mask=MASK_thr, fft_method=fft, @@ -1203,33 +1256,42 @@ def worker(j): )["cascade_levels"] # Make sure we have values outside the mask if zero_precip_radar: - R_f_ep = np.nan_to_num( - R_f_ep, + precip_forecast_extrapolated_decomp = np.nan_to_num( + precip_forecast_extrapolated_decomp, copy=True, - nan=np.nanmin(R_f_ip), - posinf=np.nanmin(R_f_ip), - neginf=np.nanmin(R_f_ip), + nan=np.nanmin(precip_forecast_cascade_subtimestep), + posinf=np.nanmin(precip_forecast_cascade_subtimestep), + neginf=np.nanmin(precip_forecast_cascade_subtimestep), ) for i in range(n_cascade_levels): - R_f_ep[i][temp_mask] = np.nan + precip_forecast_extrapolated_decomp[i][temp_mask] = np.nan # B. Noise - Yn_ip_recomp = blending.utils.recompose_cascade( - combined_cascade=Yn_ip, - combined_mean=mu_noise[j], - combined_sigma=sigma_noise[j], + noise_cascade_subtimestep_recomp = ( + blending.utils.recompose_cascade( + combined_cascade=noise_cascade_subtimestep, + combined_mean=mu_noise[j], + combined_sigma=sigma_noise[j], + ) + ) + extrap_kwargs_noise["displacement_prev"] = ( + previous_displacement_noise_cascade[j] ) - extrap_kwargs_noise["displacement_prev"] = D_Yn[j] extrap_kwargs_noise["map_coordinates_mode"] = "wrap" - Yn_ep_recomp_, D_Yn[j] = extrapolator( - Yn_ip_recomp, + ( + noise_extrapolated_recomp_temp, + previous_displacement_noise_cascade[j], + ) = extrapolator( + noise_cascade_subtimestep_recomp, velocity_blended, - [t_diff_prev], + [t_diff_prev_subtimestep], allow_nonfinite_values=True, **extrap_kwargs_noise, ) - Yn_ep_recomp = Yn_ep_recomp_[0].copy() - Yn_ep = decompositor( - Yn_ep_recomp, + noise_extrapolated_recomp = noise_extrapolated_recomp_temp[ + 0 + ].copy() + noise_extrapolated_decomp = decompositor( + noise_extrapolated_recomp, bp_filter, mask=MASK_thr, fft_method=fft, @@ -1239,77 +1301,104 @@ def worker(j): compact_output=True, )["cascade_levels"] for i in range(n_cascade_levels): - Yn_ep[i] *= noise_std_coeffs[i] + noise_extrapolated_decomp[i] *= noise_std_coeffs[i] # Append the results to the output lists - R_f_ep_out.append(R_f_ep.copy()) - Yn_ep_out.append(Yn_ep.copy()) - R_f_ip = None - R_f_ip_recomp = None - R_f_ep_recomp_ = None - R_f_ep_recomp = None - R_f_ep = None - Yn_ip = None - Yn_ip_recomp = None - Yn_ep_recomp_ = None - Yn_ep_recomp = None - Yn_ep = None + precip_forecast_extrapolated_decomp_done.append( + precip_forecast_extrapolated_decomp.copy() + ) + noise_extrapolated_decomp_done.append( + noise_extrapolated_decomp.copy() + ) + precip_forecast_cascade_subtimestep = None + precip_forecast_recomp_subtimestep = None + precip_forecast_extrapolated_recomp_subtimestep_temp = None + precip_forecast_extrapolated_recomp_subtimestep = None + precip_forecast_extrapolated_decomp = None + noise_cascade_subtimestep = None + noise_cascade_subtimestep_recomp = None + noise_extrapolated_recomp_temp = None + noise_extrapolated_recomp = None + noise_extrapolated_decomp = None # Finally, also extrapolate the initial radar rainfall # field. This will be blended with the rainfall field(s) # of the (NWP) model(s) for Lagrangian blended prob. matching # min_R = np.min(precip) - extrap_kwargs_pb["displacement_prev"] = D_pb[j] + extrap_kwargs_pb["displacement_prev"] = ( + previous_displacement_prob_matching[j] + ) # Apply the domain mask to the extrapolation component - R_ = precip.copy() - R_[domain_mask] = np.nan - R_pm_ep_, D_pb[j] = extrapolator( - R_, + precip_forecast = precip.copy() + precip_forecast[domain_mask] = np.nan + ( + precip_forecast_extrapolated_probability_matching_temp, + previous_displacement_prob_matching[j], + ) = extrapolator( + precip_forecast, velocity_blended, - [t_diff_prev], + [t_diff_prev_subtimestep], allow_nonfinite_values=True, **extrap_kwargs_pb, ) - R_pm_ep.append(R_pm_ep_[0]) + precip_forecast_extrapolated_probability_matching.append( + precip_forecast_extrapolated_probability_matching_temp[0] + ) - t_prev[j] = t_sub + t_prev_timestep[j] = t_sub - if len(R_f_ep_out) > 0: - R_f_ep_out = np.stack(R_f_ep_out) - Yn_ep_out = np.stack(Yn_ep_out) - R_pm_ep = np.stack(R_pm_ep) + if len(precip_forecast_extrapolated_decomp_done) > 0: + precip_forecast_extrapolated_decomp_done = np.stack( + precip_forecast_extrapolated_decomp_done + ) + noise_extrapolated_decomp_done = np.stack( + noise_extrapolated_decomp_done + ) + precip_forecast_extrapolated_probability_matching = np.stack( + precip_forecast_extrapolated_probability_matching + ) # advect the forecast field by one time step if no subtimesteps in the # current interval were found if not subtimesteps: - t_diff_prev = t + 1 - t_prev[j] - t_total[j] += t_diff_prev + t_diff_prev_subtimestep = t + 1 - t_prev_timestep[j] + t_leadtime_since_start_forecast[j] += t_diff_prev_subtimestep # compute the perturbed motion field - include the NWP # velocities and the weights if vel_pert_method is not None: - velocity_pert = velocity + generate_vel_noise( - vps[j], t_total[j] * timestep + velocity_perturbations_extrapolation = ( + velocity + + generate_vel_noise( + velocity_perturbations[j], + t_leadtime_since_start_forecast[j] * timestep, + ) ) # Stack the perturbed extrapolation and the NWP velocities if blend_nwp_members: - V_stack = np.concatenate( - (velocity_pert[None, :, :, :], velocity_models_temp), + velocity_stack_all = np.concatenate( + ( + velocity_perturbations_extrapolation[None, :, :, :], + velocity_models_temp, + ), axis=0, ) else: - V_model_ = velocity_models_temp[j] - V_stack = np.concatenate( - (velocity_pert[None, :, :, :], V_model_[None, :, :, :]), + velocity_models = velocity_models_temp[j] + velocity_stack_all = np.concatenate( + ( + velocity_perturbations_extrapolation[None, :, :, :], + velocity_models[None, :, :, :], + ), axis=0, ) - V_model_ = None + velocity_models = None # Obtain a blended optical flow, using the weights of the # second cascade following eq. 24 in BPS2006 velocity_blended = blending.utils.blend_optical_flows( - flows=V_stack, + flows=velocity_stack_all, weights=weights[ :-1, 1 ], # [(extr_field, n_model_fields), cascade_level=2] @@ -1317,44 +1406,48 @@ def worker(j): # Extrapolate the extrapolation and noise cascade - extrap_kwargs_["displacement_prev"] = D[j] - extrap_kwargs_noise["displacement_prev"] = D_Yn[j] + extrap_kwargs_["displacement_prev"] = previous_displacement[j] + extrap_kwargs_noise["displacement_prev"] = ( + previous_displacement_noise_cascade[j] + ) extrap_kwargs_noise["map_coordinates_mode"] = "wrap" - _, D[j] = extrapolator( + _, previous_displacement[j] = extrapolator( None, velocity_blended, - [t_diff_prev], + [t_diff_prev_subtimestep], allow_nonfinite_values=True, **extrap_kwargs_, ) - _, D_Yn[j] = extrapolator( + _, previous_displacement_noise_cascade[j] = extrapolator( None, velocity_blended, - [t_diff_prev], + [t_diff_prev_subtimestep], allow_nonfinite_values=True, **extrap_kwargs_noise, ) # Also extrapolate the radar observation, used for the probability # matching and post-processing steps - extrap_kwargs_pb["displacement_prev"] = D_pb[j] - _, D_pb[j] = extrapolator( + extrap_kwargs_pb["displacement_prev"] = ( + previous_displacement_prob_matching[j] + ) + _, previous_displacement_prob_matching[j] = extrapolator( None, velocity_blended, - [t_diff_prev], + [t_diff_prev_subtimestep], allow_nonfinite_values=True, **extrap_kwargs_pb, ) - t_prev[j] = t + 1 + t_prev_timestep[j] = t + 1 - forecast_prev[j] = precip_cascade[j] - noise_prev[j] = noise_cascade[j] + precip_forc_prev_subtimestep[j] = precip_cascade[j] + noise_prev_subtimestep[j] = noise_cascade[j] # 8.5 Blend the cascades - R_f_out = [] + final_blended_forecast = [] for t_sub in subtimesteps: # TODO: does it make sense to use sub time steps - check if it works? @@ -1363,11 +1456,13 @@ def worker(j): # First concatenate the cascades and the means and sigmas # precip_models = [n_models,timesteps,n_cascade_levels,m,n] if blend_nwp_members: - cascades_stacked = np.concatenate( + cascade_stack_all_components = np.concatenate( ( - R_f_ep_out[None, t_index], + precip_forecast_extrapolated_decomp_done[ + None, t_index + ], precip_models_cascade_temp, - Yn_ep_out[None, t_index], + noise_extrapolated_decomp_done[None, t_index], ), axis=0, ) # [(extr_field, n_model_fields, noise), n_cascade_levels, ...] @@ -1379,11 +1474,13 @@ def worker(j): axis=0, ) else: - cascades_stacked = np.concatenate( + cascade_stack_all_components = np.concatenate( ( - R_f_ep_out[None, t_index], + precip_forecast_extrapolated_decomp_done[ + None, t_index + ], precip_models_cascade_temp[None, j], - Yn_ep_out[None, t_index], + noise_extrapolated_decomp_done[None, t_index], ), axis=0, ) # [(extr_field, n_model_fields, noise), n_cascade_levels, ...] @@ -1403,36 +1500,46 @@ def worker(j): # weights for method bps have already been determined. if weights_method == "spn": weights = np.zeros( - (cascades_stacked.shape[0], n_cascade_levels) + ( + cascade_stack_all_components.shape[0], + n_cascade_levels, + ) ) for i in range(n_cascade_levels): # Determine the normalized covariance matrix (containing) # the cross-correlations between the models - cascades_stacked_ = np.stack( + cascade_stack_all_components_temp = np.stack( [ - cascades_stacked[n_model, i, :, :].flatten() + cascade_stack_all_components[ + n_model, i, :, : + ].flatten() for n_model in range( - cascades_stacked.shape[0] - 1 + cascade_stack_all_components.shape[0] - 1 ) ] ) # -1 to exclude the noise component - cov = np.ma.corrcoef( - np.ma.masked_invalid(cascades_stacked_) + covariance_nwp_models = np.ma.corrcoef( + np.ma.masked_invalid( + cascade_stack_all_components_temp + ) ) # Determine the weights for this cascade level weights[:, i] = calculate_weights_spn( - correlations=rho_fc[:, i], cov=cov + correlations=rho_fc[:, i], + covariance=covariance_nwp_models, ) # Blend the extrapolation, (NWP) model(s) and noise cascades - R_f_blended = blending.utils.blend_cascades( - cascades_norm=cascades_stacked, weights=weights + precip_forecast_blended = blending.utils.blend_cascades( + cascades_norm=cascade_stack_all_components, weights=weights ) # Also blend the cascade without the extrapolation component - R_f_blended_mod_only = blending.utils.blend_cascades( - cascades_norm=cascades_stacked[1:, :], - weights=weights_model_only, + precip_forecast_blended_mod_only = ( + blending.utils.blend_cascades( + cascades_norm=cascade_stack_all_components[1:, :], + weights=weights_model_only, + ) ) # Blend the means and standard deviations @@ -1451,24 +1558,30 @@ def worker(j): ) # 8.6 Recompose the cascade to a precipitation field - # (The function first normalizes the blended cascade, R_f_blended + # (The function first normalizes the blended cascade, precip_forecast_blended # again) - R_f_new = blending.utils.recompose_cascade( - combined_cascade=R_f_blended, + precip_forecast_recomposed = blending.utils.recompose_cascade( + combined_cascade=precip_forecast_blended, combined_mean=means_blended, combined_sigma=sigmas_blended, ) # The recomposed cascade without the extrapolation (for NaN filling # outside the radar domain) - R_f_new_mod_only = blending.utils.recompose_cascade( - combined_cascade=R_f_blended_mod_only, - combined_mean=means_blended_mod_only, - combined_sigma=sigmas_blended_mod_only, + precip_forecast_recomposed_mod_only = ( + blending.utils.recompose_cascade( + combined_cascade=precip_forecast_blended_mod_only, + combined_mean=means_blended_mod_only, + combined_sigma=sigmas_blended_mod_only, + ) ) if domain == "spectral": # TODO: Check this! (Only tested with domain == 'spatial') - R_f_new = fft_objs[j].irfft2(R_f_new) - R_f_new_mod_only = fft_objs[j].irfft2(R_f_new_mod_only) + precip_forecast_recomposed = fft_objs[j].irfft2( + precip_forecast_recomposed + ) + precip_forecast_recomposed_mod_only = fft_objs[j].irfft2( + precip_forecast_recomposed_mod_only + ) # 8.7 Post-processing steps - use the mask and fill no data with # the blended NWP forecast. Probability matching following @@ -1480,20 +1593,28 @@ def worker(j): # that is only used for post-processing steps) with the NWP # rainfall forecast for this time step using the weights # at scale level 2. - weights_pm = weights[:-1, 1] # Weights without noise, level 2 - weights_pm_normalized = weights_pm / np.sum(weights_pm) + weights_probability_matching = weights[ + :-1, 1 + ] # Weights without noise, level 2 + weights_probability_matching_normalized = ( + weights_probability_matching + / np.sum(weights_probability_matching) + ) # And the weights for outside the radar domain - weights_pm_mod_only = weights_model_only[ + weights_probability_matching_mod_only = weights_model_only[ :-1, 1 ] # Weights without noise, level 2 - weights_pm_normalized_mod_only = weights_pm_mod_only / np.sum( - weights_pm_mod_only + weights_probability_matching_normalized_mod_only = ( + weights_probability_matching_mod_only + / np.sum(weights_probability_matching_mod_only) ) # Stack the fields if blend_nwp_members: R_pm_stacked = np.concatenate( ( - R_pm_ep[None, t_index], + precip_forecast_extrapolated_probability_matching[ + None, t_index + ], precip_models_temp, ), axis=0, @@ -1501,29 +1622,37 @@ def worker(j): else: R_pm_stacked = np.concatenate( ( - R_pm_ep[None, t_index], + precip_forecast_extrapolated_probability_matching[ + None, t_index + ], precip_models_temp[None, j], ), axis=0, ) # Blend it - R_pm_blended = np.sum( - weights_pm_normalized.reshape( - weights_pm_normalized.shape[0], 1, 1 + precip_forecast_probability_matching_blended = np.sum( + weights_probability_matching_normalized.reshape( + weights_probability_matching_normalized.shape[0], 1, 1 ) * R_pm_stacked, axis=0, ) if blend_nwp_members: - R_pm_blended_mod_only = np.sum( - weights_pm_normalized_mod_only.reshape( - weights_pm_normalized_mod_only.shape[0], 1, 1 + precip_forecast_probability_matching_blended_mod_only = np.sum( + weights_probability_matching_normalized_mod_only.reshape( + weights_probability_matching_normalized_mod_only.shape[ + 0 + ], + 1, + 1, ) * precip_models_temp, axis=0, ) else: - R_pm_blended_mod_only = precip_models_temp[j] + precip_forecast_probability_matching_blended_mod_only = ( + precip_models_temp[j] + ) # The extrapolation components are NaN outside the advected # radar domain. This results in NaN values in the blended @@ -1531,7 +1660,7 @@ def worker(j): # areas with the "..._mod_only" blended forecasts, consisting # of the NWP and noise components. - nan_indices = np.isnan(R_f_new) + nan_indices = np.isnan(precip_forecast_recomposed) if smooth_radar_mask_range != 0: # Compute the smooth dilated mask new_mask = blending.utils.compute_smooth_dilated_mask( @@ -1543,49 +1672,68 @@ def worker(j): mask_model = np.clip(new_mask, 0, 1) mask_radar = np.clip(1 - new_mask, 0, 1) - # Handle NaNs in R_f_new and R_f_new_mod_only by setting NaNs to 0 in the blending step - R_f_new_mod_only_no_nan = np.nan_to_num( - R_f_new_mod_only, nan=0 + # Handle NaNs in precip_forecast_new and precip_forecast_new_mod_only by setting NaNs to 0 in the blending step + precip_forecast_recomposed_mod_only_no_nan = np.nan_to_num( + precip_forecast_recomposed_mod_only, nan=0 + ) + precip_forecast_recomposed_no_nan = np.nan_to_num( + precip_forecast_recomposed, nan=0 ) - R_f_new_no_nan = np.nan_to_num(R_f_new, nan=0) # Perform the blending of radar and model inside the radar domain using a weighted combination - R_f_new = np.nansum( + precip_forecast_recomposed = np.nansum( [ - mask_model * R_f_new_mod_only_no_nan, - mask_radar * R_f_new_no_nan, + mask_model + * precip_forecast_recomposed_mod_only_no_nan, + mask_radar * precip_forecast_recomposed_no_nan, ], axis=0, ) - nan_indices = np.isnan(R_pm_blended) - R_pm_blended = np.nansum( + nan_indices = np.isnan( + precip_forecast_probability_matching_blended + ) + precip_forecast_probability_matching_blended = np.nansum( [ - R_pm_blended * mask_radar, - R_pm_blended_mod_only * mask_model, + precip_forecast_probability_matching_blended + * mask_radar, + precip_forecast_probability_matching_blended_mod_only + * mask_model, ], axis=0, ) else: - R_f_new[nan_indices] = R_f_new_mod_only[nan_indices] - nan_indices = np.isnan(R_pm_blended) - R_pm_blended[nan_indices] = R_pm_blended_mod_only[ + precip_forecast_recomposed[nan_indices] = ( + precip_forecast_recomposed_mod_only[nan_indices] + ) + nan_indices = np.isnan( + precip_forecast_probability_matching_blended + ) + precip_forecast_probability_matching_blended[ + nan_indices + ] = precip_forecast_probability_matching_blended_mod_only[ nan_indices ] # Finally, fill the remaining nan values, if present, with # the minimum value in the forecast - nan_indices = np.isnan(R_f_new) - R_f_new[nan_indices] = np.nanmin(R_f_new) - nan_indices = np.isnan(R_pm_blended) - R_pm_blended[nan_indices] = np.nanmin(R_pm_blended) + nan_indices = np.isnan(precip_forecast_recomposed) + precip_forecast_recomposed[nan_indices] = np.nanmin( + precip_forecast_recomposed + ) + nan_indices = np.isnan( + precip_forecast_probability_matching_blended + ) + precip_forecast_probability_matching_blended[nan_indices] = ( + np.nanmin(precip_forecast_probability_matching_blended) + ) # 8.7.2. Apply the masking and prob. matching if mask_method is not None: # apply the precipitation mask to prevent generation of new # precipitation into areas where it was not originally # observed - R_cmin = R_f_new.min() + precip_forecast_min_value = precip_forecast_recomposed.min() if mask_method == "incremental": # The incremental mask is slightly different from # the implementation in the non-blended steps.py, as @@ -1593,60 +1741,100 @@ def worker(j): # on R_pm_blended. Therefore, the buffer does not # increase over time. # Get the mask for this forecast - MASK_prec = R_pm_blended >= precip_thr + precip_field_mask = ( + precip_forecast_probability_matching_blended + >= precip_thr + ) # Buffer the mask - MASK_prec = _compute_incremental_mask( - MASK_prec, struct, mask_rim + precip_field_mask = _compute_incremental_mask( + precip_field_mask, struct, mask_rim ) # Get the final mask - R_f_new = R_cmin + (R_f_new - R_cmin) * MASK_prec - MASK_prec_ = R_f_new > R_cmin + precip_forecast_recomposed = ( + precip_forecast_min_value + + ( + precip_forecast_recomposed + - precip_forecast_min_value + ) + * precip_field_mask + ) + precip_field_mask_temp = ( + precip_forecast_recomposed + > precip_forecast_min_value + ) elif mask_method == "obs": # The mask equals the most recent benchmark # rainfall field - MASK_prec_ = R_pm_blended >= precip_thr + precip_field_mask_temp = ( + precip_forecast_probability_matching_blended + >= precip_thr + ) # Set to min value outside of mask - R_f_new[~MASK_prec_] = R_cmin + precip_forecast_recomposed[~precip_field_mask_temp] = ( + precip_forecast_min_value + ) # If probmatching_method is not None, resample the distribution from # both the extrapolation cascade and the model (NWP) cascade and use # that for the probability matching. if probmatching_method is not None and resample_distribution: - arr1 = R_pm_ep[t_index] + arr1 = precip_forecast_extrapolated_probability_matching[ + t_index + ] arr2 = precip_models_temp[j] # resample weights based on cascade level 2. # Areas where one of the fields is nan are not included. R_pm_resampled = probmatching.resample_distributions( first_array=arr1, second_array=arr2, - probability_first_array=weights_pm_normalized[0], + probability_first_array=weights_probability_matching_normalized[ + 0 + ], ) else: - R_pm_resampled = R_pm_blended.copy() + R_pm_resampled = ( + precip_forecast_probability_matching_blended.copy() + ) if probmatching_method == "cdf": # nan indices in the extrapolation nowcast - nan_indices = np.isnan(R_pm_ep[t_index]) + nan_indices = np.isnan( + precip_forecast_extrapolated_probability_matching[ + t_index + ] + ) # Adjust the CDF of the forecast to match the resampled distribution combined from # extrapolation and model fields. # Rainfall outside the pure extrapolation domain is not taken into account. - if np.any(np.isfinite(R_f_new)): - R_f_new = probmatching.nonparam_match_empirical_cdf( - R_f_new, R_pm_resampled, nan_indices + if np.any(np.isfinite(precip_forecast_recomposed)): + precip_forecast_recomposed = ( + probmatching.nonparam_match_empirical_cdf( + precip_forecast_recomposed, + R_pm_resampled, + nan_indices, + ) ) R_pm_resampled = None elif probmatching_method == "mean": # Use R_pm_blended as benchmark field and - mu_0 = np.mean(R_pm_resampled[R_pm_resampled >= precip_thr]) - MASK = R_f_new >= precip_thr - mu_fct = np.mean(R_f_new[MASK]) - R_f_new[MASK] = R_f_new[MASK] - mu_fct + mu_0 + mean_probabiltity_matching_forecast = np.mean( + R_pm_resampled[R_pm_resampled >= precip_thr] + ) + no_rain_mask = precip_forecast_recomposed >= precip_thr + mean_precip_forecast = np.mean( + precip_forecast_recomposed[no_rain_mask] + ) + precip_forecast_recomposed[no_rain_mask] = ( + precip_forecast_recomposed[no_rain_mask] + - mean_precip_forecast + + mean_probabiltity_matching_forecast + ) R_pm_resampled = None - R_f_out.append(R_f_new) + final_blended_forecast.append(precip_forecast_recomposed) - R_f_[j] = R_f_out + precip_forecast[j] = final_blended_forecast res = [] @@ -1667,25 +1855,27 @@ def worker(j): print("done.") if callback is not None: - R_f_stacked = np.stack(R_f_) - if R_f_stacked.shape[1] > 0: - callback(R_f_stacked.squeeze()) + precip_forecast_final = np.stack(precip_forecast) + if precip_forecast_final.shape[1] > 0: + callback(precip_forecast_final.squeeze()) if return_output: for j in range(n_ens_members): - R_f[j].extend(R_f_[j]) + R_f[j].extend(precip_forecast[j]) - R_f_ = None + precip_forecast = None if measure_time: mainloop_time = time.time() - starttime_mainloop if return_output: - outarr = np.stack([np.stack(R_f[j]) for j in range(n_ens_members)]) + precip_forecast_all_members_all_times = np.stack( + [np.stack(R_f[j]) for j in range(n_ens_members)] + ) if measure_time: - return outarr, init_time, mainloop_time + return precip_forecast_all_members_all_times, init_time, mainloop_time else: - return outarr + return precip_forecast_all_members_all_times else: return None @@ -1773,7 +1963,7 @@ def calculate_weights_bps(correlations): return weights -def calculate_weights_spn(correlations, cov): +def calculate_weights_spn(correlations, covariance): """Calculate SPN blending weights for STEPS blending from correlation. Parameters @@ -1781,7 +1971,7 @@ def calculate_weights_spn(correlations, cov): correlations : array-like Array of shape [n_components] containing correlation (skills) for each component (NWP models and nowcast). - cov : array-like + covariance : array-like Array of shape [n_components, n_components] containing the covariance matrix of the models that will be blended. If cov is set to None and correlations only contains one model, the weight equals the correlation @@ -1801,21 +1991,21 @@ def calculate_weights_spn(correlations, cov): # Check if the correlations are positive, otherwise rho = 10e-5 correlations = np.where(correlations < 10e-5, 10e-5, correlations) - if correlations.shape[0] > 1 and len(cov) > 1: - if isinstance(cov, type(None)): + if correlations.shape[0] > 1 and len(covariance) > 1: + if isinstance(covariance, type(None)): raise ValueError("cov must contain a covariance matrix") else: # Make a numpy array out of cov and get the inverse - cov = np.where(cov == 0.0, 10e-5, cov) + covariance = np.where(covariance == 0.0, 10e-5, covariance) # Make sure the determinant of the matrix is not zero, otherwise # subtract 10e-5 from the cross-correlations between the models - if np.linalg.det(cov) == 0.0: - cov = cov - 10e-5 + if np.linalg.det(covariance) == 0.0: + covariance = covariance - 10e-5 # Ensure the correlation of the model with itself is always 1.0 - for i, _ in enumerate(cov): - cov[i][i] = 1.0 + for i, _ in enumerate(covariance): + covariance[i][i] = 1.0 # Use a numpy array instead of a matrix - cov_matrix = np.array(cov) + cov_matrix = np.array(covariance) # Get the inverse of the matrix using scipy's inv function cov_matrix_inv = inv(cov_matrix) # The component weights are the dot product between cov_matrix_inv and cor_vec @@ -2041,21 +2231,21 @@ def _init_noise( init_noise, generate_noise = noise.get_method(noise_method) # initialize the perturbation generator for the precipitation field - pp = init_noise(precip, fft_method=fft, **noise_kwargs) + generate_perturb = init_noise(precip, fft_method=fft, **noise_kwargs) if noise_stddev_adj == "auto": print("Computing noise adjustment coefficients... ", end="", flush=True) if measure_time: starttime = time.time() - R_min = np.min(precip) + precip_forecast_min = np.min(precip) noise_std_coeffs = noise.utils.compute_noise_stddev_adjs( precip[-1, :, :], precip_thr, - R_min, + precip_forecast_min, bp_filter, decompositor, - pp, + generate_perturb, generate_noise, 20, conditional=True, @@ -2076,7 +2266,7 @@ def _init_noise( if noise_stddev_adj is not None: print(f"noise std. dev. coeffs: {noise_std_coeffs}") - return pp, generate_noise, noise_std_coeffs + return generate_perturb, generate_noise, noise_std_coeffs def _compute_cascade_decomposition_radar( @@ -2091,9 +2281,9 @@ def _compute_cascade_decomposition_radar( fft, ): """Compute the cascade decompositions of the input precipitation fields.""" - R_d = [] + precip_forecast_decomp = [] for i in range(ar_order + 1): - R_ = decompositor( + precip_forecast = decompositor( precip[i, :, :], bp_filter, mask=MASK_thr, @@ -2103,17 +2293,21 @@ def _compute_cascade_decomposition_radar( compute_stats=True, compact_output=True, ) - R_d.append(R_) + precip_forecast_decomp.append(precip_forecast) # Rearrange the cascaded into a four-dimensional array of shape # (n_cascade_levels,ar_order+1,m,n) for the autoregressive model - R_c = nowcast_utils.stack_cascades(R_d, n_cascade_levels) + precip_forecast_cascades = nowcast_utils.stack_cascades( + precip_forecast_decomp, n_cascade_levels + ) - R_d = R_d[-1] - mu_extrapolation = np.array(R_d["means"]) - sigma_extrapolation = np.array(R_d["stds"]) - R_d = [R_d.copy() for j in range(n_ens_members)] - return R_c, mu_extrapolation, sigma_extrapolation + precip_forecast_decomp = precip_forecast_decomp[-1] + mu_extrapolation = np.array(precip_forecast_decomp["means"]) + sigma_extrapolation = np.array(precip_forecast_decomp["stds"]) + precip_forecast_decomp = [ + precip_forecast_decomp.copy() for j in range(n_ens_members) + ] + return precip_forecast_cascades, mu_extrapolation, sigma_extrapolation def _compute_cascade_recomposition_nwp(precip_models_cascade, recompositor): @@ -2137,7 +2331,7 @@ def _compute_cascade_recomposition_nwp(precip_models_cascade, recompositor): def _estimate_ar_parameters_radar( - R_c, ar_order, n_cascade_levels, MASK_thr, zero_precip_radar + precip_forecast_cascades, ar_order, n_cascade_levels, MASK_thr, zero_precip_radar ): """Estimate AR parameters for the radar rainfall field.""" # If there are values in the radar fields, compute the autocorrelations @@ -2145,7 +2339,9 @@ def _estimate_ar_parameters_radar( if not zero_precip_radar: # compute lag-l temporal autocorrelation coefficients for each cascade level for i in range(n_cascade_levels): - GAMMA[i, :] = correlation.temporal_autocorrelation(R_c[i], mask=MASK_thr) + GAMMA[i, :] = correlation.temporal_autocorrelation( + precip_forecast_cascades[i], mask=MASK_thr + ) # Else, use standard values for the autocorrelations else: @@ -2204,7 +2400,7 @@ def _estimate_ar_parameters_radar( def _find_nwp_combination( precip_models, - R_models_pm, + precip_forecast_probability_matching, velocity_models, mu_models, sigma_models, @@ -2258,7 +2454,9 @@ def _find_nwp_combination( sigma_models = np.repeat(sigma_models, n_ens_members_max, axis=0) velocity_models = np.repeat(velocity_models, n_ens_members_max, axis=0) # For the prob. matching - R_models_pm = np.repeat(R_models_pm, n_ens_members_max, axis=0) + precip_forecast_probability_matching = np.repeat( + precip_forecast_probability_matching, n_ens_members_max, axis=0 + ) # Finally, for the model indices n_model_indices = np.repeat(n_model_indices, n_ens_members_max, axis=0) @@ -2273,13 +2471,15 @@ def _find_nwp_combination( sigma_models = np.repeat(sigma_models, repeats, axis=0) velocity_models = np.repeat(velocity_models, repeats, axis=0) # For the prob. matching - R_models_pm = np.repeat(R_models_pm, repeats, axis=0) + precip_forecast_probability_matching = np.repeat( + precip_forecast_probability_matching, repeats, axis=0 + ) # Finally, for the model indices n_model_indices = np.repeat(n_model_indices, repeats, axis=0) return ( precip_models, - R_models_pm, + precip_forecast_probability_matching, velocity_models, mu_models, sigma_models, @@ -2300,11 +2500,11 @@ def _init_random_generators( ): """Initialize all the random generators.""" if noise_method is not None: - randgen_prec = [] + randgen_precip = [] randgen_motion = [] for j in range(n_ens_members): rs = np.random.RandomState(seed) - randgen_prec.append(rs) + randgen_precip.append(rs) seed = rs.randint(0, high=1e9) rs = np.random.RandomState(seed) randgen_motion.append(rs) @@ -2314,7 +2514,7 @@ def _init_random_generators( init_vel_noise, generate_vel_noise = noise.get_method(vel_pert_method) # initialize the perturbation generators for the motion field - vps = [] + velocity_perturbations = [] for j in range(n_ens_members): kwargs = { "randstate": randgen_motion[j], @@ -2322,15 +2522,15 @@ def _init_random_generators( "p_perp": vp_perp, } vp_ = init_vel_noise(velocity, 1.0 / kmperpixel, timestep, **kwargs) - vps.append(vp_) + velocity_perturbations.append(vp_) else: - vps, generate_vel_noise = None, None + velocity_perturbations, generate_vel_noise = None, None - return randgen_prec, vps, generate_vel_noise + return randgen_precip, velocity_perturbations, generate_vel_noise def _prepare_forecast_loop( - R_c, + precip_forecast_cascades, noise_method, fft_method, n_cascade_levels, @@ -2342,9 +2542,9 @@ def _prepare_forecast_loop( ): """Prepare for the forecast loop.""" # Empty arrays for the previous displacements and the forecast cascade - D = np.stack([None for j in range(n_ens_members)]) - D_Yn = np.stack([None for j in range(n_ens_members)]) - D_pb = np.stack([None for j in range(n_ens_members)]) + previous_displacement = np.stack([None for j in range(n_ens_members)]) + previous_displacement_noise_cascade = np.stack([None for j in range(n_ens_members)]) + previous_displacement_prob_matching = np.stack([None for j in range(n_ens_members)]) R_f = [[] for j in range(n_ens_members)] if mask_method == "incremental": @@ -2360,24 +2560,42 @@ def _prepare_forecast_loop( mask_rim, struct = None, None if noise_method is None: - R_m = [R_c[0][i].copy() for i in range(n_cascade_levels)] + precip_forecast_non_perturbed = [ + precip_forecast_cascades[0][i].copy() for i in range(n_cascade_levels) + ] else: - R_m = None + precip_forecast_non_perturbed = None fft_objs = [] for i in range(n_ens_members): - fft_objs.append(utils.get_method(fft_method, shape=R_c.shape[-2:])) + fft_objs.append( + utils.get_method(fft_method, shape=precip_forecast_cascades.shape[-2:]) + ) - return D, D_Yn, D_pb, R_f, R_m, mask_rim, struct, fft_objs + return ( + previous_displacement, + previous_displacement_noise_cascade, + previous_displacement_prob_matching, + R_f, + precip_forecast_non_perturbed, + mask_rim, + struct, + fft_objs, + ) def _compute_initial_nwp_skill( - R_c, precip_models, domain_mask, issuetime, outdir_path_skill, clim_kwargs + precip_forecast_cascades, + precip_models, + domain_mask, + issuetime, + outdir_path_skill, + clim_kwargs, ): """Calculate the initial skill of the (NWP) model forecasts at t=0.""" rho_nwp_models = [ blending.skill_scores.spatial_correlation( - obs=R_c[0, :, -1, :, :].copy(), + obs=precip_forecast_cascades[0, :, -1, :, :].copy(), mod=precip_models[n_model, :, :, :].copy(), domain_mask=domain_mask, ) @@ -2408,8 +2626,8 @@ def _init_noise_cascade( n_cascade_levels, generate_noise, decompositor, - pp, - randgen_prec, + generate_perturb, + randgen_precip, fft_objs, bp_filter, domain, @@ -2426,11 +2644,14 @@ def _init_noise_cascade( sigma_noise = np.zeros((n_ens_members, n_cascade_levels)) if noise_method: for j in range(n_ens_members): - EPS = generate_noise( - pp, randstate=randgen_prec[j], fft_method=fft_objs[j], domain=domain + epsilon = generate_noise( + generate_perturb, + randstate=randgen_precip[j], + fft_method=fft_objs[j], + domain=domain, ) - EPS = decompositor( - EPS, + epsilon_decomposed = decompositor( + epsilon, bp_filter, fft_method=fft_objs[j], input_domain=domain, @@ -2439,15 +2660,15 @@ def _init_noise_cascade( normalize=True, compact_output=True, ) - mu_noise[j] = EPS["means"] - sigma_noise[j] = EPS["stds"] + mu_noise[j] = epsilon_decomposed["means"] + sigma_noise[j] = epsilon_decomposed["stds"] for i in range(n_cascade_levels): - EPS_ = EPS["cascade_levels"][i] - EPS_ *= noise_std_coeffs[i] + epsilon_temp = epsilon_decomposed["cascade_levels"][i] + epsilon_temp *= noise_std_coeffs[i] for n in range(ar_order): - noise_cascade[j][i][n] = EPS_ - EPS = None - EPS_ = None + noise_cascade[j][i][n] = epsilon_temp + epsilon_decomposed = None + epsilon_temp = None return noise_cascade, mu_noise, sigma_noise diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index 806c4082b..0f0b27b25 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -1214,7 +1214,6 @@ def reset_states_and_params(self): # Wrapper function to preserve backward compatibility -@deprecate_args({"R": "precip", "V": "velocity", "R_thr": "precip_thr"}, "1.8.0") def forecast( precip, velocity, From 72d0fbc102a1492f09e0be71f1b697961235dc6d Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Mon, 18 Nov 2024 14:32:43 +0100 Subject: [PATCH 32/65] Made some name changes but test still do not pass --- pysteps/blending/steps.py | 59 ++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 6069cfcab..f873d463d 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -606,7 +606,7 @@ def forecast( ) print("The resulting forecast will contain only zeros") # Create the output list - R_f = [[] for j in range(n_ens_members)] + precip_forecast = [[] for j in range(n_ens_members)] # Save per time step to ensure the array does not become too large if # no return_output is requested and callback is not None. @@ -616,25 +616,25 @@ def forecast( # Create an empty np array with shape [n_ens_members, rows, cols] # and fill it with the minimum value from precip (corresponding to # zero precipitation) - precip_forecast = np.full( + precip_forecast_temp = np.full( (n_ens_members, precip_shape[0], precip_shape[1]), np.nanmin(precip) ) if subtimestep_idx: if callback is not None: - if precip_forecast.shape[1] > 0: - callback(precip_forecast.squeeze()) + if precip_forecast_temp.shape[1] > 0: + callback(precip_forecast_temp.squeeze()) if return_output: for j in range(n_ens_members): - R_f[j].append(precip_forecast[j]) + precip_forecast[j].append(precip_forecast_temp[j]) - precip_forecast = None + precip_forecast_temp = None if measure_time: zero_precip_time = time.time() - starttime_init if return_output: precip_forecast_all_members_all_times = np.stack( - [np.stack(R_f[j]) for j in range(n_ens_members)] + [np.stack(precip_forecast[j]) for j in range(n_ens_members)] ) if measure_time: return ( @@ -757,7 +757,7 @@ def forecast( previous_displacement, previous_displacement_noise_cascade, previous_displacement_prob_matching, - R_f, + precip_forecast, precip_forecast_non_perturbed, mask_rim, struct, @@ -955,7 +955,7 @@ def forecast( ) # the nowcast iteration for each ensemble member - precip_forecast = [None for _ in range(n_ens_members)] + precip_forecast_temp = [None for _ in range(n_ens_members)] def worker(j): # 8.1.2 Determine the skill of the nwp components for lead time (t0 + t) @@ -1329,13 +1329,13 @@ def worker(j): previous_displacement_prob_matching[j] ) # Apply the domain mask to the extrapolation component - precip_forecast = precip.copy() - precip_forecast[domain_mask] = np.nan + precip_forecast_temp = precip.copy() + precip_forecast_temp[domain_mask] = np.nan ( precip_forecast_extrapolated_probability_matching_temp, previous_displacement_prob_matching[j], ) = extrapolator( - precip_forecast, + precip_forecast_temp, velocity_blended, [t_diff_prev_subtimestep], allow_nonfinite_values=True, @@ -1610,7 +1610,7 @@ def worker(j): ) # Stack the fields if blend_nwp_members: - R_pm_stacked = np.concatenate( + precip_forecast_probability_matching_final = np.concatenate( ( precip_forecast_extrapolated_probability_matching[ None, t_index @@ -1620,7 +1620,7 @@ def worker(j): axis=0, ) else: - R_pm_stacked = np.concatenate( + precip_forecast_probability_matching_final = np.concatenate( ( precip_forecast_extrapolated_probability_matching[ None, t_index @@ -1634,7 +1634,7 @@ def worker(j): weights_probability_matching_normalized.reshape( weights_probability_matching_normalized.shape[0], 1, 1 ) - * R_pm_stacked, + * precip_forecast_probability_matching_final, axis=0, ) if blend_nwp_members: @@ -1785,7 +1785,7 @@ def worker(j): arr2 = precip_models_temp[j] # resample weights based on cascade level 2. # Areas where one of the fields is nan are not included. - R_pm_resampled = probmatching.resample_distributions( + precip_forecast_probability_matching_resampled = probmatching.resample_distributions( first_array=arr1, second_array=arr2, probability_first_array=weights_probability_matching_normalized[ @@ -1793,7 +1793,7 @@ def worker(j): ], ) else: - R_pm_resampled = ( + precip_forecast_probability_matching_resampled = ( precip_forecast_probability_matching_blended.copy() ) @@ -1811,15 +1811,18 @@ def worker(j): precip_forecast_recomposed = ( probmatching.nonparam_match_empirical_cdf( precip_forecast_recomposed, - R_pm_resampled, + precip_forecast_probability_matching_resampled, nan_indices, ) ) - R_pm_resampled = None + precip_forecast_probability_matching_resampled = None elif probmatching_method == "mean": # Use R_pm_blended as benchmark field and mean_probabiltity_matching_forecast = np.mean( - R_pm_resampled[R_pm_resampled >= precip_thr] + precip_forecast_probability_matching_resampled[ + precip_forecast_probability_matching_resampled + >= precip_thr + ] ) no_rain_mask = precip_forecast_recomposed >= precip_thr mean_precip_forecast = np.mean( @@ -1830,11 +1833,11 @@ def worker(j): - mean_precip_forecast + mean_probabiltity_matching_forecast ) - R_pm_resampled = None + precip_forecast_probability_matching_resampled = None final_blended_forecast.append(precip_forecast_recomposed) - precip_forecast[j] = final_blended_forecast + precip_forecast_temp[j] = final_blended_forecast res = [] @@ -1855,22 +1858,22 @@ def worker(j): print("done.") if callback is not None: - precip_forecast_final = np.stack(precip_forecast) + precip_forecast_final = np.stack(precip_forecast_temp) if precip_forecast_final.shape[1] > 0: callback(precip_forecast_final.squeeze()) if return_output: for j in range(n_ens_members): - R_f[j].extend(precip_forecast[j]) + precip_forecast[j].extend(precip_forecast_temp[j]) - precip_forecast = None + precip_forecast_temp = None if measure_time: mainloop_time = time.time() - starttime_mainloop if return_output: precip_forecast_all_members_all_times = np.stack( - [np.stack(R_f[j]) for j in range(n_ens_members)] + [np.stack(precip_forecast[j]) for j in range(n_ens_members)] ) if measure_time: return precip_forecast_all_members_all_times, init_time, mainloop_time @@ -2545,7 +2548,7 @@ def _prepare_forecast_loop( previous_displacement = np.stack([None for j in range(n_ens_members)]) previous_displacement_noise_cascade = np.stack([None for j in range(n_ens_members)]) previous_displacement_prob_matching = np.stack([None for j in range(n_ens_members)]) - R_f = [[] for j in range(n_ens_members)] + precip_forecast = [[] for j in range(n_ens_members)] if mask_method == "incremental": # get mask parameters @@ -2576,7 +2579,7 @@ def _prepare_forecast_loop( previous_displacement, previous_displacement_noise_cascade, previous_displacement_prob_matching, - R_f, + precip_forecast, precip_forecast_non_perturbed, mask_rim, struct, From 1ce563e4275ef687bcb3da9e90f5db7224ba6185 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Mon, 18 Nov 2024 15:30:36 +0100 Subject: [PATCH 33/65] Fixed naming changes, now the tests pass --- .gitignore | 3 +++ pysteps/blending/steps.py | 28 +++++++++++++++------------- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index e12dc8bf8..4588187d7 100644 --- a/.gitignore +++ b/.gitignore @@ -91,3 +91,6 @@ venv.bak/ # Mac OS Stuff .DS_Store + +# Running lcoal tests +/tmp diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index f873d463d..fc4ffa345 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -616,18 +616,18 @@ def forecast( # Create an empty np array with shape [n_ens_members, rows, cols] # and fill it with the minimum value from precip (corresponding to # zero precipitation) - precip_forecast_temp = np.full( + precip_forecast_workers = np.full( (n_ens_members, precip_shape[0], precip_shape[1]), np.nanmin(precip) ) if subtimestep_idx: if callback is not None: - if precip_forecast_temp.shape[1] > 0: - callback(precip_forecast_temp.squeeze()) + if precip_forecast_workers.shape[1] > 0: + callback(precip_forecast_workers.squeeze()) if return_output: for j in range(n_ens_members): - precip_forecast[j].append(precip_forecast_temp[j]) + precip_forecast[j].append(precip_forecast_workers[j]) - precip_forecast_temp = None + precip_forecast_workers = None if measure_time: zero_precip_time = time.time() - starttime_init @@ -955,7 +955,7 @@ def forecast( ) # the nowcast iteration for each ensemble member - precip_forecast_temp = [None for _ in range(n_ens_members)] + precip_forecast_workers = [None for _ in range(n_ens_members)] def worker(j): # 8.1.2 Determine the skill of the nwp components for lead time (t0 + t) @@ -1329,13 +1329,15 @@ def worker(j): previous_displacement_prob_matching[j] ) # Apply the domain mask to the extrapolation component - precip_forecast_temp = precip.copy() - precip_forecast_temp[domain_mask] = np.nan + precip_forecast_temp_for_probability_matching = precip.copy() + precip_forecast_temp_for_probability_matching[domain_mask] = ( + np.nan + ) ( precip_forecast_extrapolated_probability_matching_temp, previous_displacement_prob_matching[j], ) = extrapolator( - precip_forecast_temp, + precip_forecast_temp_for_probability_matching, velocity_blended, [t_diff_prev_subtimestep], allow_nonfinite_values=True, @@ -1837,7 +1839,7 @@ def worker(j): final_blended_forecast.append(precip_forecast_recomposed) - precip_forecast_temp[j] = final_blended_forecast + precip_forecast_workers[j] = final_blended_forecast res = [] @@ -1858,15 +1860,15 @@ def worker(j): print("done.") if callback is not None: - precip_forecast_final = np.stack(precip_forecast_temp) + precip_forecast_final = np.stack(precip_forecast_workers) if precip_forecast_final.shape[1] > 0: callback(precip_forecast_final.squeeze()) if return_output: for j in range(n_ens_members): - precip_forecast[j].extend(precip_forecast_temp[j]) + precip_forecast[j].extend(precip_forecast_workers[j]) - precip_forecast_temp = None + precip_forecast_workers = None if measure_time: mainloop_time = time.time() - starttime_mainloop From fbe551b5f511900e64ca775e81337fccbb3de0bc Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Mon, 18 Nov 2024 16:33:01 +0100 Subject: [PATCH 34/65] Built the rough scaffolding for the blending class --- pysteps/blending/steps.py | 218 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 218 insertions(+) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index fc4ffa345..2fd9e3b3f 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -66,6 +66,224 @@ DASK_IMPORTED = False +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional + + +@dataclass +class StepsBlendingConfig: + # Configuration parameters + n_ens_members: int + n_cascade_levels: int + ar_order: int + timestep: float + blend_nwp_members: bool + precip_thr: float + norain_thr: float + kmperpixel: float + seed: Optional[int] + num_workers: int + measure_time: bool + domain: str = "spatial" + fft_method: str = "numpy" + extrap_method: str = "semilagrangian" + extrap_kwargs: Dict[str, Any] = field(default_factory=dict) + decomp_method: str = "fft" + bandpass_filter_method: str = "gaussian" + filter_kwargs: Dict[str, Any] = field(default_factory=dict) + noise_method: Optional[str] = "nonparametric" + noise_stddev_adj: Optional[str] = "auto" + noise_kwargs: Dict[str, Any] = field(default_factory=dict) + vel_pert_method: Optional[str] = "bps" + vel_pert_kwargs: Dict[str, Any] = field(default_factory=dict) + weights_method: str = "bps" + mask_method: Optional[str] = "incremental" + mask_kwargs: Dict[str, Any] = field(default_factory=dict) + probmatching_method: Optional[str] = "cdf" + resample_distribution: bool = True + smooth_radar_mask_range: int = 0 + outdir_path_skill: str = "./tmp/" + clim_kwargs: Dict[str, Any] = field(default_factory=dict) + callback: Optional[Any] = None + return_output: bool = True + # Additional configuration parameters as needed + + +@dataclass +class StepsBlendingState: + issuetime: Any # Replace with appropriate type, e.g., datetime.datetime + # Precomputed or intermediate data + domain_mask: Optional[np.ndarray] = None + MASK_thr: Optional[np.ndarray] = None + forecast_output: Optional[np.ndarray] = None + # Additional state variables as needed + + +@dataclass +class StepsBlendingParams: + # Parameters and variables calculated during initialization or processing + fft_method: Any = None + bandpass_filter: Any = None + decomposer: Any = None + recomposer: Any = None + generate_perturb: Any = None + generate_noise: Any = None + noise_std_coeffs: Optional[np.ndarray] = None + PHI: Optional[np.ndarray] = None + randgen_precip: Optional[List[np.random.RandomState]] = None + velocity_perturbations: Optional[List[Any]] = None + generate_vel_noise: Any = None + previous_displacement: Optional[np.ndarray] = None + previous_displacement_noise_cascade: Optional[np.ndarray] = None + previous_displacement_prob_matching: Optional[np.ndarray] = None + precip_cascade: Optional[np.ndarray] = None + mu: Optional[np.ndarray] = None + sigma: Optional[np.ndarray] = None + noise_cascade: Optional[np.ndarray] = None + mask_rim: Optional[int] = None + struct: Any = None + fft_objs: Optional[List[Any]] = None + # Additional parameters and variables as needed + + +class BlendingEngine: + def __init__( + self, + precip, + precip_models, + velocity, + velocity_models, + time_steps, + steps_blending_config: StepsBlendingConfig, + ): + # Store inputs and optional parameters + self.__precip = precip + self.__precip_models = precip_models + self.__velocity = velocity + self.__velocity_models = velocity_models + self.__time_steps = time_steps + + # Store the config data: + self.__config = steps_blending_config + + # Store the state and params data: + self.__state = StepsBlendingState() + self.__params = StepsBlendingParams() + + def forecast(self): + """Main method to perform the forecast.""" + self._check_inputs() + self._initialize() + self._prepare_data() + self._initialize_noise() + self._estimate_ar_parameters() + self._init_random_generators() + self._prepare_forecast_loop() + self._compute_forecast() + return self.state.forecast_output + + # Private methods for internal processing + def _initialize_methods(self): + """Set up methods for extrapolation, decomposition, etc.""" + pass + + def _initialize_bandpass_filter(self): + """Initialize the bandpass filter.""" + pass + + def _prepare_data(self): + """Transform data into Lagrangian coordinates and perform initial decomposition.""" + pass + + def _initialize_noise(self): + """Set up the noise generation mechanism.""" + pass + + def _estimate_ar_parameters(self): + """Estimate autoregressive model parameters.""" + self._estimate_ar_parameters_radar() + + def _init_random_generators(self): + """Initialize random number generators.""" + self._init_random_generators_for_noise() + if self.config.vel_pert_method: + self._init_velocity_perturbations() + + def _prepare_forecast_loop(self): + """Set up variables and structures for the forecasting loop.""" + self._initialize_forecast_variables() + + def _compute_forecast(self): + """Main loop to compute the forecast over the specified time steps.""" + self._run_forecast_loop() + + # Methods for specific functionalities + def _check_inputs(self): + """Validate input data and configurations.""" + # Implement input checks as needed + pass + + def _initialize(self): + """Perform any additional initialization steps.""" + # Initialize variables in self.params as needed + pass + + def _transform_to_lagrangian(self): + """Transform precipitation data to Lagrangian coordinates.""" + # Use self.state and self.params as needed + pass + + def _compute_cascade_decomposition_radar(self): + """Compute the cascade decomposition for radar data.""" + # Update self.params with computed cascades + pass + + def _estimate_ar_parameters_radar(self): + """Estimate AR parameters for radar data.""" + # Update self.params.PHI + pass + + def _init_random_generators_for_noise(self): + """Initialize random generators for noise.""" + # Update self.params.randgen_precip + pass + + def _init_velocity_perturbations(self): + """Initialize velocity perturbations if required.""" + # Update self.params.velocity_perturbations, generate_vel_noise + pass + + def _initialize_forecast_variables(self): + """Set up variables needed for the forecast loop.""" + # Initialize variables in self.params + pass + + def _run_forecast_loop(self): + """Run the main forecast loop over time steps.""" + # Use self.state and self.params as needed + pass + + # Additional helper methods as needed + + +class SkillScoreManager: + def __init__(self, config: StepsBlendingConfig): + self.outdir_path_skill = config.outdir_path_skill + self.clim_kwargs = config.clim_kwargs + + def compute_initial_skill(self, observed_cascades, model_cascades, domain_mask): + """Calculate the initial skill of NWP models at t=0.""" + pass # Implement as needed + + def update_skill(self, lead_time, correlations, model_indices): + """Update the skill scores based on lead time.""" + pass # Implement as needed + + def save_skill(self, current_skill, validtime): + """Save the skill scores to disk.""" + pass # Implement as needed + + def forecast( precip, precip_models, From 46a93e53084fd4506540a0b14823e072166ef151 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Wed, 27 Nov 2024 16:31:02 +0100 Subject: [PATCH 35/65] Refactored untill no rain case --- pysteps/blending/steps.py | 861 +++++++++++++++++++++++++++----------- pysteps/nowcasts/steps.py | 24 +- 2 files changed, 640 insertions(+), 245 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 2fd9e3b3f..be3eee067 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -65,88 +65,111 @@ except ImportError: DASK_IMPORTED = False - from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional +from typing import Optional, List, Dict, Any, Callable @dataclass class StepsBlendingConfig: - # Configuration parameters + precip_threshold: Optional[float] + norain_threshold: float + kmperpixel: float + timestep: float n_ens_members: int n_cascade_levels: int - ar_order: int - timestep: float blend_nwp_members: bool - precip_thr: float - norain_thr: float - kmperpixel: float + extrapolation_method: str + decomposition_method: str + bandpass_filter_method: str + noise_method: Optional[str] + noise_stddev_adj: Optional[str] + ar_order: int + vel_pert_method: Optional[str] + weights_method: str + conditional: bool + probmatching_method: Optional[str] + mask_method: Optional[str] + resample_distribution: bool + smooth_radar_mask_range: int seed: Optional[int] num_workers: int - measure_time: bool - domain: str = "spatial" - fft_method: str = "numpy" - extrap_method: str = "semilagrangian" + fft_method: str + domain: str + outdir_path_skill: str extrap_kwargs: Dict[str, Any] = field(default_factory=dict) - decomp_method: str = "fft" - bandpass_filter_method: str = "gaussian" filter_kwargs: Dict[str, Any] = field(default_factory=dict) - noise_method: Optional[str] = "nonparametric" - noise_stddev_adj: Optional[str] = "auto" noise_kwargs: Dict[str, Any] = field(default_factory=dict) - vel_pert_method: Optional[str] = "bps" vel_pert_kwargs: Dict[str, Any] = field(default_factory=dict) - weights_method: str = "bps" - mask_method: Optional[str] = "incremental" - mask_kwargs: Dict[str, Any] = field(default_factory=dict) - probmatching_method: Optional[str] = "cdf" - resample_distribution: bool = True - smooth_radar_mask_range: int = 0 - outdir_path_skill: str = "./tmp/" clim_kwargs: Dict[str, Any] = field(default_factory=dict) + mask_kwargs: Dict[str, Any] = field(default_factory=dict) + measure_time: bool = False callback: Optional[Any] = None return_output: bool = True - # Additional configuration parameters as needed @dataclass -class StepsBlendingState: - issuetime: Any # Replace with appropriate type, e.g., datetime.datetime - # Precomputed or intermediate data - domain_mask: Optional[np.ndarray] = None - MASK_thr: Optional[np.ndarray] = None - forecast_output: Optional[np.ndarray] = None - # Additional state variables as needed +class StepsBlendingParams: + PHI: np.ndarray # AR(p) model parameters + noise_std_coeffs: np.ndarray # Noise standard deviation coefficients + mu_extrapolation: np.ndarray # Means of extrapolated cascades + sigma_extrapolation: np.ndarray # Std devs of extrapolated cascades + bandpass_filter: Any # Band-pass filter object + fft: Any # FFT method object + generate_perturb: Callable # Perturbation generator + generate_noise: Callable # Noise generator + generate_vel_noise: Optional[Callable] # Velocity noise generator + extrapolation_method: Any = None + decomposition_method: Any = None + recomposition_method: Any = None + vp_par: Optional[np.ndarray] = None # Velocity perturbation parameters (parallel) + vp_perp: Optional[np.ndarray] = ( + None # Velocity perturbation parameters (perpendicular) + ) + fft_objs: List[Any] = field( + default_factory=list + ) # FFT objects for ensemble members + mask_rim: Optional[int] = None # Rim size for masking + struct: Optional[np.ndarray] = None # Structuring element for mask + n_model_indices: Optional[np.ndarray] = None # NWP model indices + noise_method: Optional[str] = None # Noise method used + ar_order: int = 2 # Order of the AR model + seed: Optional[int] = None # Random seed for reproducibility + time_steps_is_list: bool = False # Time steps is a list + precip_models_provided_is_cascade: bool = False # Precip models are decomposed + xy_coordinates: np.ndarray | None = None + precip_zerovalue: Any = None + mask_threshold: Any = None + zero_precip_radar: bool = False + zero_precip_model_fields: bool = False @dataclass -class StepsBlendingParams: - # Parameters and variables calculated during initialization or processing - fft_method: Any = None - bandpass_filter: Any = None - decomposer: Any = None - recomposer: Any = None +class StepsBlendingState: + precip_cascade: Any = None + mu_extrapolation: Any = None + sigma_extrapolation: Any = None + precip_models_cascade: Any = None generate_perturb: Any = None generate_noise: Any = None - noise_std_coeffs: Optional[np.ndarray] = None - PHI: Optional[np.ndarray] = None - randgen_precip: Optional[List[np.random.RandomState]] = None - velocity_perturbations: Optional[List[Any]] = None + noise_std_coeffs: Any = None + PHI: Any = None + randgen_precip: Any = None + velocity_perturbations: Any = None generate_vel_noise: Any = None - previous_displacement: Optional[np.ndarray] = None - previous_displacement_noise_cascade: Optional[np.ndarray] = None - previous_displacement_prob_matching: Optional[np.ndarray] = None - precip_cascade: Optional[np.ndarray] = None - mu: Optional[np.ndarray] = None - sigma: Optional[np.ndarray] = None - noise_cascade: Optional[np.ndarray] = None - mask_rim: Optional[int] = None + previous_displacement: Any = None + previous_displacement_noise_cascade: Any = None + previous_displacement_prob_matching: Any = None + precip_forecast: Any = None + precip_forecast_non_perturbed: Any = None + mask_rim: Any = None struct: Any = None - fft_objs: Optional[List[Any]] = None - # Additional parameters and variables as needed + fft_objs: Any = None + t_prev_timestep: Any = None + t_leadtime_since_start_forecast: Any = None + # Add more state variables as needed -class BlendingEngine: +class StepsBlendingNowcaster: def __init__( self, precip, @@ -154,134 +177,586 @@ def __init__( velocity, velocity_models, time_steps, + issue_time, steps_blending_config: StepsBlendingConfig, ): - # Store inputs and optional parameters + """Initializes the StepsBlendingNowcaster with inputs and configurations.""" + # Store inputs self.__precip = precip self.__precip_models = precip_models self.__velocity = velocity self.__velocity_models = velocity_models - self.__time_steps = time_steps + self.__timesteps = time_steps + self.__issuetime = issue_time - # Store the config data: self.__config = steps_blending_config - # Store the state and params data: - self.__state = StepsBlendingState() + # Initialize Params and State self.__params = StepsBlendingParams() + self.__state = StepsBlendingState() - def forecast(self): - """Main method to perform the forecast.""" - self._check_inputs() - self._initialize() - self._prepare_data() - self._initialize_noise() - self._estimate_ar_parameters() - self._init_random_generators() - self._prepare_forecast_loop() - self._compute_forecast() - return self.state.forecast_output - - # Private methods for internal processing - def _initialize_methods(self): - """Set up methods for extrapolation, decomposition, etc.""" - pass + # Perform input validation + self.__check_inputs() - def _initialize_bandpass_filter(self): - """Initialize the bandpass filter.""" - pass + # Initialize nowcast components and parameters + self.__initialize_nowcast_components() - def _prepare_data(self): - """Transform data into Lagrangian coordinates and perform initial decomposition.""" - pass + # Additional variables for time measurement + self.__start_time_init = None + self.__init_time = None + self.__mainloop_time = None - def _initialize_noise(self): - """Set up the noise generation mechanism.""" + def compute_forecast(self): pass - def _estimate_ar_parameters(self): - """Estimate autoregressive model parameters.""" - self._estimate_ar_parameters_radar() + def __nowcast_main(self): + self.__check_inputs() + self.__print_forecast_info() + # Measure time for initialization + if self.__config.measure_time: + self.__start_time_init = time.time() + + # Slice the precipitation field to only use the last ar_order + 1 fields + self.__precip = self.__precip[-(self.__config.ar_order + 1) :, :, :].copy() + self.__initialize_nowcast_components() + self.__prepare_radar_and_NWP_fields() + if self.__params.zero_precip_radar and self.__params.zero_precip_model_fields: + self.__zero_precipitation_forecast() + else: + pass + + def __check_inputs(self): + """Validates the inputs and determines if the user provided raw forecasts or decomposed forecasts.""" + # Check dimensions of precip + if self.__precip.ndim != 3: + raise ValueError( + "precip must be a three-dimensional array of shape (ar_order + 1, m, n)" + ) + if self.__precip.shape[0] < self.__config.ar_order + 1: + raise ValueError( + f"precip must have at least {self.__config.ar_order + 1} time steps in the first dimension " + f"to match the autoregressive order (ar_order={self.__config.ar_order})" + ) + + # Check dimensions of velocity + if self.__velocity.ndim != 3: + raise ValueError( + "velocity must be a three-dimensional array of shape (2, m, n)" + ) + if self.__velocity_models.ndim != 5: + raise ValueError( + "velocity_models must be a five-dimensional array of shape (n_models, timestep, 2, m, n)" + ) + if self.__velocity.shape[0] != 2 or self.__velocity_models.shape[2] != 2: + raise ValueError( + "velocity and velocity_models must have an x- and y-component, check the shape" + ) + + # Check that spatial dimensions match between precip and velocity + if self.__precip.shape[1:3] != self.__velocity.shape[1:3]: + raise ValueError( + f"Spatial dimensions of precip and velocity do not match: " + f"{self.__precip.shape[1:3]} vs {self.__velocity.shape[1:3]}" + ) + # Check if the number of members in the precipitation models and velocity models match + if self.__precip_models.shape[0] != self.__velocity_models.shape[0]: + raise ValueError( + "The number of members in the precipitation models and velocity models must match" + ) + + if isinstance(self.__timesteps, list): + self.__params.time_steps_is_list = True + original_timesteps = [0] + list(self.__timesteps) + self.__timesteps = nowcast_utils.binned_timesteps(original_timesteps) + if not sorted(self.__timesteps) == self.__timesteps: + raise ValueError("timesteps is not in ascending order") + if self.__precip_models.shape[1] != math.ceil(self.__timesteps[-1]) + 1: + raise ValueError( + "precip_models does not contain sufficient lead times for this forecast" + ) + else: + self.__params.time_steps_is_list = False + self.__timesteps = list(range(self.__timesteps + 1)) + if self.__precip_models.shape[1] != self.__timesteps + 1: + raise ValueError( + "precip_models does not contain sufficient lead times for this forecast" + ) + + precip_nwp_dim = self.__precip_models.ndim + if precip_nwp_dim == 2: + if isinstance(self.__precip_models[0], dict): + # It's a 2D array of dictionaries with decomposed cascades + self.__params.precip_models_provided_is_cascade = True + else: + raise ValueError( + "When precip_models has ndim == 2, it must contain dictionaries with decomposed cascades." + ) + elif precip_nwp_dim == 4: + self.__params.precip_models_provided_is_cascade = False + else: + raise ValueError( + "precip_models must be either a two-dimensional array containing dictionaries with decomposed model fields" + "or a four-dimensional array containing the original (NWP) model forecasts" + ) + + if self.__config.extrap_kwargs is None: + self.__config.extrap_kwargs = dict() + + if self.__config.filter_kwargs is None: + self.__config.filter_kwargs = dict() + + if self.__config.noise_kwargs is None: + self.__config.noise_kwargs = dict() + + if self.__config.vel_pert_kwargs is None: + self.__config.vel_pert_kwargs = dict() + + if not self.__params.precip_models_provided_is_cascade: + if self.__config.clim_kwargs is None: + # Make sure clim_kwargs at least contains the number of models + self.__config.clim_kwargs = dict( + {"n_models": self.__precip_models.shape[0]} + ) + + if self.__config.mask_kwargs is None: + mask_kwargs = dict() + + if np.any(~np.isfinite(self.__velocity)): + raise ValueError("velocity contains non-finite values") + + if self.__config.mask_method not in ["obs", "incremental", None]: + raise ValueError( + "unknown mask method %s: must be 'obs', 'incremental' or None" + % self.__config.mask_method + ) + + if self.__config.conditional and self.__config.precip_threshold is None: + raise ValueError("conditional=True but precip_thr is not set") + + if ( + self.__config.mask_method is not None + and self.__config.precip_threshold is None + ): + raise ValueError("mask_method!=None but precip_thr=None") + + if self.__config.noise_stddev_adj not in ["auto", "fixed", None]: + raise ValueError( + "unknown noise_std_dev_adj method %s: must be 'auto', 'fixed', or None" + % self.__config.noise_stddev_adj + ) + + if self.__config.kmperpixel is None: + if self.__config.vel_pert_method is not None: + raise ValueError("vel_pert_method is set but kmperpixel=None") + if self.__config.mask_method == "incremental": + raise ValueError("mask_method='incremental' but kmperpixel=None") + + if self.__config.timestep is None: + if self.__config.vel_pert_method is not None: + raise ValueError("vel_pert_method is set but timestep=None") + if self.__config.mask_method == "incremental": + raise ValueError("mask_method='incremental' but timestep=None") + + def __print_forecast_info(self): + print("STEPS blending") + print("==============") + print("") + + print("Inputs") + print("------") + print(f"forecast issue time: {self.__issuetime.isoformat()}") + print( + f"input dimensions: {self.__precip.shape[1]}x{self.__precip.shape[2]}" + ) + if self.__config.kmperpixel is not None: + print(f"km/pixel: {self.__config.kmperpixel}") + if self.__config.timestep is not None: + print(f"time step: {self.__config.timestep} minutes") + print("") + + print("NWP and blending inputs") + print("-----------------------") + print(f"number of (NWP) models: {self.__precip_models.shape[0]}") + print(f"blend (NWP) model members: {self.__config.blend_nwp_members}") + print( + f"decompose (NWP) models: {'yes' if self.__precip_models.ndim == 4 else 'no'}" + ) + print("") + + print("Methods") + print("-------") + print(f"extrapolation: {self.__config.extrapolation_method}") + print(f"bandpass filter: {self.__config.bandpass_filter_method}") + print(f"decomposition: {self.__config.decomposition_method}") + print(f"noise generator: {self.__config.noise_method}") + print( + f"noise adjustment: {'yes' if self.__config.noise_stddev_adj else 'no'}" + ) + print(f"velocity perturbator: {self.__config.vel_pert_method}") + print(f"blending weights method: {self.__config.weights_method}") + print( + f"conditional statistics: {'yes' if self.__config.conditional else 'no'}" + ) + print(f"precip. mask method: {self.__config.mask_method}") + print(f"probability matching: {self.__config.probmatching_method}") + print(f"FFT method: {self.__config.fft_method}") + print(f"domain: {self.__config.domain}") + print("") + + print("Parameters") + print("----------") + if isinstance(self.__timesteps, int): + print(f"number of time steps: {self.__timesteps}") + else: + print(f"time steps: {self.__timesteps}") + print(f"ensemble size: {self.__config.n_ens_members}") + print(f"parallel threads: {self.__config.num_workers}") + print(f"number of cascade levels: {self.__config.n_cascade_levels}") + print(f"order of the AR(p) model: {self.__config.ar_order}") + if self.__config.vel_pert_method == "bps": + vp_par = self.__config.vel_pert_kwargs.get( + "p_par", noise.motion.get_default_params_bps_par() + ) + vp_perp = self.__config.vel_pert_kwargs.get( + "p_perp", noise.motion.get_default_params_bps_perp() + ) + print(f"vel. pert., parallel: {vp_par[0]},{vp_par[1]},{vp_par[2]}") + print( + f"vel. pert., perpendicular: {vp_perp[0]},{vp_perp[1]},{vp_perp[2]}" + ) + else: + vp_par, vp_perp = None, None + + if self.__config.conditional or self.__config.mask_method is not None: + print(f"precip. intensity threshold: {self.__config.precip_threshold}") + print(f"no-rain fraction threshold for radar: {self.__config.norain_threshold}") + print("") + + def __initialize_nowcast_components(self): + """ + Initialize the FFT, bandpass filters, decomposition methods, and extrapolation method. + """ + # Initialize number of ensemble workers + self.__params.num_ensemble_workers = min( + self.__config.n_ens_members, self.__config.num_workers + ) + + M, N = self.__precip.shape[1:] # Extract the spatial dimensions (height, width) + + # Initialize FFT method + self.__params.fft = utils.get_method( + self.__config.fft_method, shape=(M, N), n_threads=self.__config.num_workers + ) + + # Initialize the band-pass filter for the cascade decomposition + filter_method = cascade.get_method(self.__config.bandpass_filter_method) + self.__params.bandpass_filter = filter_method( + (M, N), + self.__config.n_cascade_levels, + **(self.__config.filter_kwargs or {}), + ) - def _init_random_generators(self): - """Initialize random number generators.""" - self._init_random_generators_for_noise() - if self.config.vel_pert_method: - self._init_velocity_perturbations() + # Get the decomposition method (e.g., FFT) + ( + self.__params.decomposition_method, + self.__params.recomposition_method, + ) = cascade.get_method(self.__config.decomposition_method) + + # Get the extrapolation method (e.g., semilagrangian) + self.__params.extrapolation_method = extrapolation.get_method( + self.__config.extrapolation_method + ) - def _prepare_forecast_loop(self): - """Set up variables and structures for the forecasting loop.""" - self._initialize_forecast_variables() + # Generate the mesh grid for spatial coordinates + x_values, y_values = np.meshgrid(np.arange(N), np.arange(M)) + self.__params.xy_coordinates = np.stack([x_values, y_values]) + + precip_copy = self.__precip[-(self.__config.ar_order + 1) :, :, :].copy() + # Determine the domain mask from non-finite values in the precipitation data + self.__params.domain_mask = np.logical_or.reduce( + [~np.isfinite(precip_copy[i, :]) for i in range(precip_copy.shape[0])] + ) + + print("Blended nowcast components initialized successfully.") + + def __prepare_radar_and_NWP_fields(self): + # determine the precipitation threshold mask + if self.__config.conditional: + self.__params.mask_threshold = np.logical_and.reduce( + [ + self.__precip[i, :, :] >= self.__config.precip_threshold + for i in range(self.__precip.shape[0]) + ] + ) + else: + self.__params.mask_threshold = None + + # we need to know the zerovalue of precip to replace the mask when decomposing after + # extrapolation + self.__params.precip_zerovalue = np.nanmin(self.__precip) + + # 1. Start with the radar rainfall fields. We want the fields in a + # Lagrangian space + self.__precip = _transform_to_lagrangian( + self.__precip, + self.__velocity, + self.__config.ar_order, + self.__params.xy_coordinates, + self.__params.extrapolation_method, + self.__config.extrap_kwargs, + self.__config.num_workers, + ) + + # 2. Perform the cascade decomposition for the input precip fields and + # and, if necessary, for the (NWP) model fields + # 2.1 Compute the cascade decompositions of the input precipitation fields + """Compute the cascade decompositions of the input precipitation fields.""" + precip_forecast_decomp = [] + for i in range(self.__config.ar_order + 1): + precip_forecast = self.__params.extrapolation_method( + self.__precip[i, :, :], + self.__params.bandpass_filter, + mask=self.__params.mask_threshold, + fft_method=self.__params.fft, + output_domain=self.__config.domain, + normalize=True, + compute_stats=True, + compact_output=True, + ) + precip_forecast_decomp.append(precip_forecast) + + # Rearrange the cascaded into a four-dimensional array of shape + # (n_cascade_levels,ar_order+1,m,n) for the autoregressive model + self.__state.precip_cascade = nowcast_utils.stack_cascades( + precip_forecast_decomp, self.__config.n_cascade_levels + ) - def _compute_forecast(self): - """Main loop to compute the forecast over the specified time steps.""" - self._run_forecast_loop() + precip_forecast_decomp = precip_forecast_decomp[-1] + self.__state.mu_extrapolation = np.array(precip_forecast_decomp["means"]) + self.__state.sigma_extrapolation = np.array(precip_forecast_decomp["stds"]) - # Methods for specific functionalities - def _check_inputs(self): - """Validate input data and configurations.""" - # Implement input checks as needed + # 2.2 If necessary, recompose (NWP) model forecasts + self.__state.precip_models_cascade = None + + if self.__precip_models.ndim != 4: + self.__state.precip_models_cascade = self.__precip_models + self.__precip_models = _compute_cascade_recomposition_nwp( + self.__precip_models, self.__params.recomposition_method + ) + + # 2.3 Check for zero input fields in the radar and NWP data. + self.__params.zero_precip_radar = blending.utils.check_norain( + self.__precip, + self.__config.precip_threshold, + self.__config.norain_threshold, + ) + # The norain fraction threshold used for nwp is the default value of 0.0, + # since nwp does not suffer from clutter. + self.__params.zero_precip_model_fields = blending.utils.check_norain( + self.__precip_models, + self.__config.precip_threshold, + self.__config.norain_threshold, + ) + + def __zero_precipitation_forecast(self): + print( + "No precipitation above the threshold found in both the radar and NWP fields" + ) + print("The resulting forecast will contain only zeros") + # Create the output list + precip_forecast = [[] for j in range(self.__config.n_ens_members)] + + # Save per time step to ensure the array does not become too large if + # no return_output is requested and callback is not None. + for t, subtimestep_idx in enumerate(self.__timesteps): + # If the timestep is not the first one, we need to provide the zero forecast + if t > 0: + # Create an empty np array with shape [n_ens_members, rows, cols] + # and fill it with the minimum value from precip (corresponding to + # zero precipitation) + N, M = self.__precip.shape + precip_forecast_workers = np.full( + (self.__config.n_ens_members, N, M), self.__params.precip_zerovalue + ) + if subtimestep_idx: + if self.__config.callback is not None: + if precip_forecast_workers.shape[1] > 0: + self.__config.callback(precip_forecast_workers.squeeze()) + if self.__config.return_output: + for j in range(self.__config.n_ens_members): + precip_forecast[j].append(precip_forecast_workers[j]) + + precip_forecast_workers = None + + if self.__config.measure_time: + zero_precip_time = time.time() - self.__start_time_init + + if self.__config.return_output: + precip_forecast_all_members_all_times = np.stack( + [ + np.stack(precip_forecast[j]) + for j in range(self.__config.n_ens_members) + ] + ) + if self.__config.measure_time: + return ( + precip_forecast_all_members_all_times, + zero_precip_time, + zero_precip_time, + ) + else: + return precip_forecast_all_members_all_times + else: + return None + + def __perform_extrapolation(self): + pass + + def __apply_noise_and_ar_model(self): + pass + + def __initialize_velocity_perturbations(self): pass - def _initialize(self): - """Perform any additional initialization steps.""" - # Initialize variables in self.params as needed + def __initialize_precipitation_mask(self): pass - def _transform_to_lagrangian(self): - """Transform precipitation data to Lagrangian coordinates.""" - # Use self.state and self.params as needed + def __initialize_fft_objects(self): pass - def _compute_cascade_decomposition_radar(self): - """Compute the cascade decomposition for radar data.""" - # Update self.params with computed cascades + def __return_state_dict(self): pass - def _estimate_ar_parameters_radar(self): - """Estimate AR parameters for radar data.""" - # Update self.params.PHI + def __return_params_dict(self): pass - def _init_random_generators_for_noise(self): - """Initialize random generators for noise.""" - # Update self.params.randgen_precip + def __update_state(self, state, params): pass - def _init_velocity_perturbations(self): - """Initialize velocity perturbations if required.""" - # Update self.params.velocity_perturbations, generate_vel_noise + def __update_deterministic_ar_model(self, state, params): pass - def _initialize_forecast_variables(self): - """Set up variables needed for the forecast loop.""" - # Initialize variables in self.params + def __apply_ar_model_to_cascades(self, j, state, params): pass - def _run_forecast_loop(self): - """Run the main forecast loop over time steps.""" - # Use self.state and self.params as needed + def __generate_and_decompose_noise(self, j, state, params): pass - # Additional helper methods as needed + def __recompose_and_apply_mask(self, j, state, params): + pass + + def __apply_precipitation_mask(self, precip_forecast, j, state, params): + pass + + def __measure_time(self, label, start_time): + """ + Measure and print the time taken for a specific part of the process. + + Parameters: + - label: A description of the part of the process being measured. + - start_time: The timestamp when the process started (from time.time()). + """ + if self.__config.measure_time: + elapsed_time = time.time() - start_time + print(f"{label} took {elapsed_time:.2f} seconds.") + + def reset_states_and_params(self): + """ + Reset the internal state and parameters of the nowcaster to allow multiple forecasts. + This method resets the state and params to their initial conditions without reinitializing + the inputs like precip, velocity, time_steps, or config. + """ + # Re-initialize the state and parameters + self.__state = StepsBlendingState() + self.__params = StepsBlendingParams() + + # Reset time measurement variables + self.__start_time_init = None + self.__init_time = None + self.__mainloop_time = None + + +def calculate_ratios(correlations): + """Calculate explained variance ratios from correlation. + + Parameters + ---------- + Array of shape [component, scale_level, ...] + containing correlation (skills) for each component (NWP and nowcast), + scale level, and optionally along [y, x] dimensions. + + Returns + ------- + out : numpy array + An array containing the ratios of explain variance for each + component, scale level, ... + """ + # correlations: [component, scale, ...] + square_corrs = np.square(correlations) + # Calculate the ratio of the explained variance to the unexplained + # variance of the nowcast and NWP model components + out = square_corrs / (1 - square_corrs) + # out: [component, scale, ...] + return out -class SkillScoreManager: - def __init__(self, config: StepsBlendingConfig): - self.outdir_path_skill = config.outdir_path_skill - self.clim_kwargs = config.clim_kwargs +def calculate_weights_bps(correlations): + """Calculate BPS blending weights for STEPS blending from correlation. - def compute_initial_skill(self, observed_cascades, model_cascades, domain_mask): - """Calculate the initial skill of NWP models at t=0.""" - pass # Implement as needed + Parameters + ---------- + correlations : array-like + Array of shape [component, scale_level, ...] + containing correlation (skills) for each component (NWP and nowcast), + scale level, and optionally along [y, x] dimensions. - def update_skill(self, lead_time, correlations, model_indices): - """Update the skill scores based on lead time.""" - pass # Implement as needed + Returns + ------- + weights : array-like + Array of shape [component+1, scale_level, ...] + containing the weights to be used in STEPS blending for + each original component plus an addtional noise component, scale level, + and optionally along [y, x] dimensions. - def save_skill(self, current_skill, validtime): - """Save the skill scores to disk.""" - pass # Implement as needed + References + ---------- + :cite:`BPS2006` + + Notes + ----- + The weights in the BPS method can sum op to more than 1.0. + """ + # correlations: [component, scale, ...] + # Check if the correlations are positive, otherwise rho = 10e-5 + correlations = np.where(correlations < 10e-5, 10e-5, correlations) + + # If we merge more than one component with the noise cascade, we follow + # the weights impolementation in either :cite:`BPS2006` or :cite:`SPN2013`. + if correlations.shape[0] > 1: + # Calculate weights for each source + ratios = calculate_ratios(correlations) + # ratios: [component, scale, ...] + total_ratios = np.sum(ratios, axis=0) + # total_ratios: [scale, ...] - the denominator of eq. 11 & 12 in BPS2006 + weights = correlations * np.sqrt(ratios / total_ratios) + # weights: [component, scale, ...] + # Calculate the weight of the noise component. + # Original BPS2006 method in the following two lines (eq. 13) + total_square_weights = np.sum(np.square(weights), axis=0) + noise_weight = np.sqrt(1.0 - total_square_weights) + # Finally, add the noise_weights to the weights variable. + weights = np.concatenate((weights, noise_weight[None, ...]), axis=0) + + # Otherwise, the weight equals the correlation on that scale level and + # the noise component weight equals 1 - this weight. This only occurs for + # the weights calculation outside the radar domain where in the case of 1 + # NWP model or ensemble member, no blending of multiple models has to take + # place + else: + noise_weight = 1.0 - correlations + weights = np.concatenate((correlations, noise_weight), axis=0) + + return weights def forecast( @@ -380,7 +855,6 @@ def forecast( Time step of the motion vectors (minutes). Required if vel_pert_method is not None or mask_method is 'incremental'. issuetime: datetime - Datetime object containing the date and time for which the forecast is issued. n_ens_members: int The number of ensemble members to generate. This number should always be @@ -958,18 +1432,20 @@ def forecast( ) # 6. Initialize all the random generators and prepare for the forecast loop - randgen_precip, velocity_perturbations, generate_vel_noise = ( - _init_random_generators( - velocity, - noise_method, - vel_pert_method, - vp_par, - vp_perp, - seed, - n_ens_members, - kmperpixel, - timestep, - ) + ( + randgen_precip, + velocity_perturbations, + generate_vel_noise, + ) = _init_random_generators( + velocity, + noise_method, + vel_pert_method, + vp_par, + vp_perp, + seed, + n_ens_members, + kmperpixel, + timestep, ) ( previous_displacement, @@ -2103,89 +2579,6 @@ def worker(j): return None -def calculate_ratios(correlations): - """Calculate explained variance ratios from correlation. - - Parameters - ---------- - Array of shape [component, scale_level, ...] - containing correlation (skills) for each component (NWP and nowcast), - scale level, and optionally along [y, x] dimensions. - - Returns - ------- - out : numpy array - An array containing the ratios of explain variance for each - component, scale level, ... - """ - # correlations: [component, scale, ...] - square_corrs = np.square(correlations) - # Calculate the ratio of the explained variance to the unexplained - # variance of the nowcast and NWP model components - out = square_corrs / (1 - square_corrs) - # out: [component, scale, ...] - return out - - -def calculate_weights_bps(correlations): - """Calculate BPS blending weights for STEPS blending from correlation. - - Parameters - ---------- - correlations : array-like - Array of shape [component, scale_level, ...] - containing correlation (skills) for each component (NWP and nowcast), - scale level, and optionally along [y, x] dimensions. - - Returns - ------- - weights : array-like - Array of shape [component+1, scale_level, ...] - containing the weights to be used in STEPS blending for - each original component plus an addtional noise component, scale level, - and optionally along [y, x] dimensions. - - References - ---------- - :cite:`BPS2006` - - Notes - ----- - The weights in the BPS method can sum op to more than 1.0. - """ - # correlations: [component, scale, ...] - # Check if the correlations are positive, otherwise rho = 10e-5 - correlations = np.where(correlations < 10e-5, 10e-5, correlations) - - # If we merge more than one component with the noise cascade, we follow - # the weights impolementation in either :cite:`BPS2006` or :cite:`SPN2013`. - if correlations.shape[0] > 1: - # Calculate weights for each source - ratios = calculate_ratios(correlations) - # ratios: [component, scale, ...] - total_ratios = np.sum(ratios, axis=0) - # total_ratios: [scale, ...] - the denominator of eq. 11 & 12 in BPS2006 - weights = correlations * np.sqrt(ratios / total_ratios) - # weights: [component, scale, ...] - # Calculate the weight of the noise component. - # Original BPS2006 method in the following two lines (eq. 13) - total_square_weights = np.sum(np.square(weights), axis=0) - noise_weight = np.sqrt(1.0 - total_square_weights) - # Finally, add the noise_weights to the weights variable. - weights = np.concatenate((weights, noise_weight[None, ...]), axis=0) - - # Otherwise, the weight equals the correlation on that scale level and - # the noise component weight equals 1 - this weight. This only occurs for - # the weights calculation outside the radar domain where in the case of 1 - # NWP model or ensemble member, no blending of multiple models has to take - # place - else: - noise_weight = 1.0 - correlations - weights = np.concatenate((correlations, noise_weight), axis=0) - - return weights - - def calculate_weights_spn(correlations, covariance): """Calculate SPN blending weights for STEPS blending from correlation. diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index 0f0b27b25..b04e82756 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -341,9 +341,9 @@ def compute_forecast(self): if self.__config.measure_time: self.__start_time_init = time.time() - self.__initialize_nowcast_components() # Slice the precipitation field to only use the last ar_order + 1 fields self.__precip = self.__precip[-(self.__config.ar_order + 1) :, :, :].copy() + self.__initialize_nowcast_components() self.__perform_extrapolation() self.__apply_noise_and_ar_model() @@ -358,9 +358,10 @@ def compute_forecast(self): self.__nowcast_main() if self.__config.measure_time: - self.__state.precip_forecast, self.__mainloop_time = ( - self.__state.precip_forecast - ) + ( + self.__state.precip_forecast, + self.__mainloop_time, + ) = self.__state.precip_forecast # Stack and return the forecast output if self.__config.return_output: @@ -392,8 +393,8 @@ def __nowcast_main(self): ] # Extract the last available precipitation field # Prepare state and params dictionaries, these need to be formatted a specific way for the nowcast_main_loop - state = self.__initialize_state() - params = self.__initialize_params(precip) + state = self.__return_state_dict() + params = self.__return_params_dict(precip) print("Starting nowcast computation.") @@ -589,9 +590,10 @@ def __initialize_nowcast_components(self): ) # Get the decomposition method (e.g., FFT) - self.__params.decomposition_method, self.__params.recomposition_method = ( - cascade.get_method(self.__config.decomposition_method) - ) + ( + self.__params.decomposition_method, + self.__params.recomposition_method, + ) = cascade.get_method(self.__config.decomposition_method) # Get the extrapolation method (e.g., semilagrangian) self.__params.extrapolation_method = extrapolation.get_method( @@ -957,7 +959,7 @@ def __initialize_fft_objects(self): self.__state.fft_objs.append(fft_obj) print("FFT objects initialized successfully.") - def __initialize_state(self): + def __return_state_dict(self): """ Initialize the state dictionary used during the nowcast iteration. """ @@ -971,7 +973,7 @@ def __initialize_state(self): "randgen_prec": self.__state.random_generator_precip, } - def __initialize_params(self, precip): + def __return_params_dict(self, precip): """ Initialize the params dictionary used during the nowcast iteration. """ From 1eede39494d9d8ca6f265bc3c564ae3123cadbd2 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Thu, 28 Nov 2024 17:58:44 +0100 Subject: [PATCH 36/65] Added code to estimation of ar parameters of radar --- pysteps/blending/steps.py | 168 +++++++++++++++++++++++++++++++++++++- 1 file changed, 165 insertions(+), 3 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index be3eee067..302a2ea96 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -141,6 +141,7 @@ class StepsBlendingParams: mask_threshold: Any = None zero_precip_radar: bool = False zero_precip_model_fields: bool = False + PHI: Any = None @dataclass @@ -166,6 +167,7 @@ class StepsBlendingState: fft_objs: Any = None t_prev_timestep: Any = None t_leadtime_since_start_forecast: Any = None + precip_noise_input: Any = None # Add more state variables as needed @@ -220,10 +222,17 @@ def __nowcast_main(self): self.__precip = self.__precip[-(self.__config.ar_order + 1) :, :, :].copy() self.__initialize_nowcast_components() self.__prepare_radar_and_NWP_fields() + + # Determine if rain is present in both radar and NWP fields if self.__params.zero_precip_radar and self.__params.zero_precip_model_fields: self.__zero_precipitation_forecast() else: - pass + # Prepare the data for the zero precipitation radar case and initialize the noise correctly + if self.__params.zero_precip_radar: + self.__prepare_nowcast_for_zero_radar() + else: + self.__state.precip_noise_input = self.__precip.copy() + self.__estimate_ar_parameters_radar() def __check_inputs(self): """Validates the inputs and determines if the user provided raw forecasts or decomposed forecasts.""" @@ -482,6 +491,7 @@ def __initialize_nowcast_components(self): def __prepare_radar_and_NWP_fields(self): # determine the precipitation threshold mask if self.__config.conditional: + # TODO: is this logical_and correct here? Now only those places where precip is in all images is saved? self.__params.mask_threshold = np.logical_and.reduce( [ self.__precip[i, :, :] >= self.__config.precip_threshold @@ -507,8 +517,8 @@ def __prepare_radar_and_NWP_fields(self): self.__config.num_workers, ) - # 2. Perform the cascade decomposition for the input precip fields and - # and, if necessary, for the (NWP) model fields + # 2. Perform the cascade decomposition for the input precip fields and, + # if necessary, for the (NWP) model fields # 2.1 Compute the cascade decompositions of the input precipitation fields """Compute the cascade decompositions of the input precipitation fields.""" precip_forecast_decomp = [] @@ -609,6 +619,158 @@ def __zero_precipitation_forecast(self): else: return None + def __prepare_nowcast_for_zero_radar(self): + # 2.3.3 If zero_precip_radar, make sure that precip_cascade does not contain + # only nans or infs. If so, fill it with the zero value. + + # Look for a timestep and member with rain so that we have a sensible decomposition + done = False + for t in self.__timesteps: + if done: + break + for j in range(self.__precip_models.shape[0]): + if not blending.utils.check_norain( + self.__precip_models[j, t], + self.__config.precip_threshold, + self.__config.norain_threshold, + ): + if self.__state.precip_models_cascade is not None: + self.__state.precip_cascade[ + ~np.isfinite(self.__state.precip_cascade) + ] = np.nanmin( + self.__state.precip_models_cascade[j, t]["cascade_levels"] + ) + continue + precip_models_cascade_temp = self.__params.decomposition_method( + self.__precip_models[j, t, :, :], + bp_filter=self.__params.bandpass_filter, + fft_method=self.__params.fft, + output_domain=self.__config.domain, + normalize=True, + compute_stats=True, + compact_output=True, + )["cascade_levels"] + self.__state.precip_cascade[ + ~np.isfinite(self.__state.precip_cascade) + ] = np.nanmin(precip_models_cascade_temp) + done = True + break + + # 2.3.5 If zero_precip_radar is True, only use the velocity field of the NWP + # forecast. I.e., velocity (radar) equals velocity_model at the first time + # step. + # Use the velocity from velocity_models at time step 0 + self.__velocity = self.__velocity_models[:, 0, :, :, :].astype( + np.float64, copy=False + ) + # Take the average over the first axis, which corresponds to n_models + # (hence, the model average) + self.__velocity = np.mean(self.__velocity, axis=0) + + # 3. Initialize the noise method. + # If zero_precip_radar is True, initialize noise based on the NWP field time + # step where the fraction of rainy cells is highest (because other lead times + # might be zero as well). Else, initialize the noise with the radar + # rainfall data + """Initialize noise based on the NWP field time step where the fraction of rainy cells is highest""" + if self.__config.precip_threshold is None: + self.__config.precip_threshold = np.nanmin(self.__precip_models) + + max_rain_pixels = -1 + max_rain_pixels_j = -1 + max_rain_pixels_t = -1 + for j in range(self.__precip_models.shape[0]): + for t in self.__timesteps: + rain_pixels = self.__precip_models[j][t][ + self.__precip_models[j][t] > self.__config.precip_threshold + ].size + if rain_pixels > max_rain_pixels: + max_rain_pixels = rain_pixels + max_rain_pixels_j = j + max_rain_pixels_t = t + self.__state.precip_noise_input = self.__precip_models[max_rain_pixels_j][ + max_rain_pixels_t + ] + + # Make sure precip_noise_input is three-dimensional + if len(self.__state.precip_noise_input.shape) != 3: + self.__state.precip_noise_input = self.__state.precip_noise_input[ + np.newaxis, :, : + ] + + def __estimate_ar_parameters_radar(self): + # 4. Estimate AR parameters for the radar rainfall field + """Estimate AR parameters for the radar rainfall field.""" + # If there are values in the radar fields, compute the auto-correlations + GAMMA = np.empty((self.__config.n_cascade_levels, self.__config.ar_order)) + if not self.__params.zero_precip_radar: + # compute lag-l temporal auto-correlation coefficients for each cascade level + for i in range(self.__config.n_cascade_levels): + GAMMA[i, :] = correlation.temporal_autocorrelation( + self.__state.precip_cascade[i], mask=self.__params.mask_threshold + ) + + # Else, use standard values for the auto-correlations + else: + # Get the climatological lag-1 and lag-2 auto-correlation values from Table 2 + # in `BPS2004`. + # Hard coded, change to own (climatological) values when present. + # TODO: add user warning here so users can be aware of this without reading the code? + GAMMA = np.array( + [ + [0.99805, 0.9925, 0.9776, 0.9297, 0.796, 0.482, 0.079, 0.0006], + [0.9933, 0.9752, 0.923, 0.750, 0.367, 0.069, 0.0018, 0.0014], + ] + ) + + # Check whether the number of cascade_levels is correct + if GAMMA.shape[1] > self.__config.n_cascade_levels: + GAMMA = GAMMA[:, 0 : self.__config.n_cascade_levels] + elif GAMMA.shape[1] < self.__config.n_cascade_levels: + # Get the number of cascade levels that is missing + n_extra_lev = self.__config.n_cascade_levels - GAMMA.shape[1] + # Append the array with correlation values of 10e-4 + GAMMA = np.append( + GAMMA, + [np.repeat(0.0006, n_extra_lev), np.repeat(0.0014, n_extra_lev)], + axis=1, + ) + + # Finally base GAMMA.shape[0] on the AR-level + if self.__config.ar_order == 1: + GAMMA = GAMMA[0, :] + if self.__config.ar_order > 2: + for repeat_index in range(self.__config.ar_order - 2): + GAMMA = np.vstack((GAMMA, GAMMA[1, :])) + + # Finally, transpose GAMMA to ensure that the shape is the same as np.empty((n_cascade_levels, ar_order)) + GAMMA = GAMMA.transpose() + assert GAMMA.shape == ( + self.__config.n_cascade_levels, + self.__config.ar_order, + ) + + # Print the GAMMA value + nowcast_utils.print_corrcoefs(GAMMA) + + if self.__config.ar_order == 2: + # adjust the lag-2 correlation coefficient to ensure that the AR(p) + # process is stationary + for i in range(self.__config.n_cascade_levels): + GAMMA[i, 1] = autoregression.adjust_lag2_corrcoef2( + GAMMA[i, 0], GAMMA[i, 1] + ) + + # estimate the parameters of the AR(p) model from the auto-correlation + # coefficients + self.__params.PHI = np.empty( + (self.__config.n_cascade_levels, self.__config.ar_order + 1) + ) + for i in range(self.__config.n_cascade_levels): + self.__params.PHI[i, :] = autoregression.estimate_ar_params_yw(GAMMA[i, :]) + + nowcast_utils.print_ar_params(self.__params.PHI) + def __perform_extrapolation(self): pass From a18f1f62d374b65838efd30268107fcb4c4a58d7 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Fri, 29 Nov 2024 16:23:20 +0100 Subject: [PATCH 37/65] Next go, start with forecast loop #7 --- pysteps/blending/steps.py | 286 +++++++++++++++++++++++++++++++++----- 1 file changed, 252 insertions(+), 34 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 302a2ea96..6209bb6b6 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -84,7 +84,7 @@ class StepsBlendingConfig: noise_method: Optional[str] noise_stddev_adj: Optional[str] ar_order: int - vel_pert_method: Optional[str] + velocity_perturbation_method: Optional[str] weights_method: str conditional: bool probmatching_method: Optional[str] @@ -115,14 +115,16 @@ class StepsBlendingParams: sigma_extrapolation: np.ndarray # Std devs of extrapolated cascades bandpass_filter: Any # Band-pass filter object fft: Any # FFT method object - generate_perturb: Callable # Perturbation generator - generate_noise: Callable # Noise generator + perturbation_generator: Callable # Perturbation generator + noise_generator: Callable # Noise generator generate_vel_noise: Optional[Callable] # Velocity noise generator extrapolation_method: Any = None decomposition_method: Any = None recomposition_method: Any = None - vp_par: Optional[np.ndarray] = None # Velocity perturbation parameters (parallel) - vp_perp: Optional[np.ndarray] = ( + velocity_perturbations_parallel: Optional[np.ndarray] = ( + None # Velocity perturbation parameters (parallel) + ) + velocity_perturbations_perpendicular: Optional[np.ndarray] = ( None # Velocity perturbation parameters (perpendicular) ) fft_objs: List[Any] = field( @@ -146,17 +148,14 @@ class StepsBlendingParams: @dataclass class StepsBlendingState: - precip_cascade: Any = None + precip_cascades: Any = None mu_extrapolation: Any = None sigma_extrapolation: Any = None - precip_models_cascade: Any = None - generate_perturb: Any = None - generate_noise: Any = None - noise_std_coeffs: Any = None + precip_models_cascades: Any = None PHI: Any = None randgen_precip: Any = None velocity_perturbations: Any = None - generate_vel_noise: Any = None + generate_velocity_noise: Any = None previous_displacement: Any = None previous_displacement_noise_cascade: Any = None previous_displacement_prob_matching: Any = None @@ -168,6 +167,9 @@ class StepsBlendingState: t_prev_timestep: Any = None t_leadtime_since_start_forecast: Any = None precip_noise_input: Any = None + precip_noise_cascade: Any = None + precip_mean_noise: Any = None + precip_std_noise: Any = None # Add more state variables as needed @@ -232,7 +234,12 @@ def __nowcast_main(self): self.__prepare_nowcast_for_zero_radar() else: self.__state.precip_noise_input = self.__precip.copy() + self.__initialize_noise() self.__estimate_ar_parameters_radar() + self.__multiply_precip_cascade_to_match_ensemble_members() + self.__initialize_random_generators() + self.__prepare_forecast_loop() + self.__initialize_noise_cascade() def __check_inputs(self): """Validates the inputs and determines if the user provided raw forecasts or decomposed forecasts.""" @@ -355,14 +362,18 @@ def __check_inputs(self): ) if self.__config.kmperpixel is None: - if self.__config.vel_pert_method is not None: - raise ValueError("vel_pert_method is set but kmperpixel=None") + if self.__config.velocity_perturbation_method is not None: + raise ValueError( + "velocity_perturbation_method is set but kmperpixel=None" + ) if self.__config.mask_method == "incremental": raise ValueError("mask_method='incremental' but kmperpixel=None") if self.__config.timestep is None: - if self.__config.vel_pert_method is not None: - raise ValueError("vel_pert_method is set but timestep=None") + if self.__config.velocity_perturbation_method is not None: + raise ValueError( + "velocity_perturbation_method is set but timestep=None" + ) if self.__config.mask_method == "incremental": raise ValueError("mask_method='incremental' but timestep=None") @@ -401,7 +412,9 @@ def __print_forecast_info(self): print( f"noise adjustment: {'yes' if self.__config.noise_stddev_adj else 'no'}" ) - print(f"velocity perturbator: {self.__config.vel_pert_method}") + print( + f"velocity perturbator: {self.__config.velocity_perturbation_method}" + ) print(f"blending weights method: {self.__config.weights_method}") print( f"conditional statistics: {'yes' if self.__config.conditional else 'no'}" @@ -422,19 +435,28 @@ def __print_forecast_info(self): print(f"parallel threads: {self.__config.num_workers}") print(f"number of cascade levels: {self.__config.n_cascade_levels}") print(f"order of the AR(p) model: {self.__config.ar_order}") - if self.__config.vel_pert_method == "bps": - vp_par = self.__config.vel_pert_kwargs.get( - "p_par", noise.motion.get_default_params_bps_par() + if self.__config.velocity_perturbation_method == "bps": + self.__params.velocity_perturbations_parallel = ( + self.__config.vel_pert_kwargs.get( + "p_par", noise.motion.get_default_params_bps_par() + ) ) - vp_perp = self.__config.vel_pert_kwargs.get( - "p_perp", noise.motion.get_default_params_bps_perp() + self.__params.velocity_perturbations_perpendicular = ( + self.__config.vel_pert_kwargs.get( + "p_perp", noise.motion.get_default_params_bps_perp() + ) + ) + print( + f"vel. pert., parallel: {self.__params.velocity_perturbations_parallel[0]},{self.__params.velocity_perturbations_parallel[1]},{self.__params.velocity_perturbations_parallel[2]}" ) - print(f"vel. pert., parallel: {vp_par[0]},{vp_par[1]},{vp_par[2]}") print( - f"vel. pert., perpendicular: {vp_perp[0]},{vp_perp[1]},{vp_perp[2]}" + f"vel. pert., perpendicular: {self.__params.velocity_perturbations_perpendicular[0]},{self.__params.velocity_perturbations_perpendicular[1]},{self.__params.velocity_perturbations_perpendicular[2]}" ) else: - vp_par, vp_perp = None, None + ( + self.__params.velocity_perturbations_parallel, + self.__params.velocity_perturbations_perpendicular, + ) = (None, None) if self.__config.conditional or self.__config.mask_method is not None: print(f"precip. intensity threshold: {self.__config.precip_threshold}") @@ -537,7 +559,7 @@ def __prepare_radar_and_NWP_fields(self): # Rearrange the cascaded into a four-dimensional array of shape # (n_cascade_levels,ar_order+1,m,n) for the autoregressive model - self.__state.precip_cascade = nowcast_utils.stack_cascades( + self.__state.precip_cascades = nowcast_utils.stack_cascades( precip_forecast_decomp, self.__config.n_cascade_levels ) @@ -546,10 +568,10 @@ def __prepare_radar_and_NWP_fields(self): self.__state.sigma_extrapolation = np.array(precip_forecast_decomp["stds"]) # 2.2 If necessary, recompose (NWP) model forecasts - self.__state.precip_models_cascade = None + self.__state.precip_models_cascades = None if self.__precip_models.ndim != 4: - self.__state.precip_models_cascade = self.__precip_models + self.__state.precip_models_cascades = self.__precip_models self.__precip_models = _compute_cascade_recomposition_nwp( self.__precip_models, self.__params.recomposition_method ) @@ -634,11 +656,11 @@ def __prepare_nowcast_for_zero_radar(self): self.__config.precip_threshold, self.__config.norain_threshold, ): - if self.__state.precip_models_cascade is not None: - self.__state.precip_cascade[ - ~np.isfinite(self.__state.precip_cascade) + if self.__state.precip_models_cascades is not None: + self.__state.precip_cascades[ + ~np.isfinite(self.__state.precip_cascades) ] = np.nanmin( - self.__state.precip_models_cascade[j, t]["cascade_levels"] + self.__state.precip_models_cascades[j, t]["cascade_levels"] ) continue precip_models_cascade_temp = self.__params.decomposition_method( @@ -650,8 +672,8 @@ def __prepare_nowcast_for_zero_radar(self): compute_stats=True, compact_output=True, )["cascade_levels"] - self.__state.precip_cascade[ - ~np.isfinite(self.__state.precip_cascade) + self.__state.precip_cascades[ + ~np.isfinite(self.__state.precip_cascades) ] = np.nanmin(precip_models_cascade_temp) done = True break @@ -698,6 +720,61 @@ def __prepare_nowcast_for_zero_radar(self): np.newaxis, :, : ] + def __initialize_noise(self): + """Initialize the noise method.""" + if self.__config.noise_method is not None: + # get methods for perturbations + init_noise, self.__params.noise_generator = noise.get_method( + self.__config.noise_method + ) + + # initialize the perturbation generator for the precipitation field + self.__params.perturbation_generator = init_noise( + self.__precip, + fft_method=self.__params.fft, + **self.__config.noise_kwargs, + ) + + if self.__config.noise_stddev_adj == "auto": + print("Computing noise adjustment coefficients... ", end="", flush=True) + if self.__config.measure_time: + starttime = time.time() + + precip_forecast_min = np.min(self.__precip) + self.__params.noise_std_coeffs = noise.utils.compute_noise_stddev_adjs( + self.__precip[-1, :, :], + self.__config.precip_threshold, + precip_forecast_min, + self.__params.bandpass_filter, + self.__params.decomposition_method, + self.__params.perturbation_generator, + self.__params.noise_generator, + 20, + conditional=True, + num_workers=self.__config.num_workers, + seed=self.__config.seed, + ) + + if self.__config.measure_time: + print(f"{time.time() - starttime:.2f} seconds.") + else: + print("done.") + elif self.__config.noise_stddev_adj == "fixed": + f = lambda k: 1.0 / (0.75 + 0.09 * k) + self.__params.noise_std_coeffs = [ + f(k) for k in range(1, self.__config.n_cascade_levels + 1) + ] + else: + self.__params.noise_std_coeffs = np.ones(self.__config.n_cascade_levels) + + if self.__params.noise_stddev_adj is not None: + print(f"noise std. dev. coeffs: {self.__params.noise_std_coeffs}") + + else: + self.__params.perturbation_generator = None + self.__params.noise_generator = None + self.__params.noise_std_coeffs = None + def __estimate_ar_parameters_radar(self): # 4. Estimate AR parameters for the radar rainfall field """Estimate AR parameters for the radar rainfall field.""" @@ -707,7 +784,7 @@ def __estimate_ar_parameters_radar(self): # compute lag-l temporal auto-correlation coefficients for each cascade level for i in range(self.__config.n_cascade_levels): GAMMA[i, :] = correlation.temporal_autocorrelation( - self.__state.precip_cascade[i], mask=self.__params.mask_threshold + self.__state.precip_cascades[i], mask=self.__params.mask_threshold ) # Else, use standard values for the auto-correlations @@ -771,6 +848,147 @@ def __estimate_ar_parameters_radar(self): nowcast_utils.print_ar_params(self.__params.PHI) + def __multiply_precip_cascade_to_match_ensemble_members(self): + # 5. Repeat precip_cascade for n ensemble members + # First, discard all except the p-1 last cascades because they are not needed + # for the AR(p) model + + self.__state.precip_cascades = np.stack( + [ + [ + self.__state.precip_cascades[i][-self.__config.ar_order :].copy() + for i in range(self.__config.n_cascade_levels) + ] + ] + * self.__config.n_ens_members + ) + + def __initialize_random_generators(self): + # 6. Initialize all the random generators and prepare for the forecast loop + """Initialize all the random generators.""" + # TODO: randgen_motion and randgen_precip are not defined if no noise method is given? Should we end the program in that case? + if self.__config.noise_method is not None: + self.__state.randgen_precip = [] + randgen_motion = [] + for j in range(self.__config.n_ens_members): + rs = np.random.RandomState(self.__config.seed) + self.__state.randgen_precip.append(rs) + seed = rs.randint(0, high=1e9) + rs = np.random.RandomState(seed) + randgen_motion.append(rs) + seed = rs.randint(0, high=1e9) + + if self.__config.velocity_perturbation_method is not None: + ( + init_velocity_noise, + self.__state.generate_velocity_noise, + ) = noise.get_method(self.__config.velocity_perturbation_method) + + # initialize the perturbation generators for the motion field + self.__state.velocity_perturbations = [] + for j in range(self.__config.n_ens_members): + kwargs = { + "randstate": randgen_motion[j], + "p_par": self.__params.velocity_perturbations_parallel, + "p_perp": self.__params.velocity_perturbations_perpendicular, + } + vp_ = init_velocity_noise( + self.__velocity, + 1.0 / self.__config.kmperpixel, + self.__config.timestep, + **kwargs, + ) + self.__state.velocity_perturbations.append(vp_) + else: + ( + self.__state.velocity_perturbations, + self.__state.generate_velocity_noise, + ) = (None, None) + + def __prepare_forecast_loop(self): + """Prepare for the forecast loop.""" + # Empty arrays for the previous displacements and the forecast cascade + self.__state.previous_displacement = np.stack( + [None for j in range(self.__config.n_ens_members)] + ) + self.__state.previous_displacement_noise_cascade = np.stack( + [None for j in range(self.__config.n_ens_members)] + ) + self.__state.previous_displacement_prob_matching = np.stack( + [None for j in range(self.__config.n_ens_members)] + ) + self.__state.precip_forecast = [[] for j in range(self.__config.n_ens_members)] + + if self.__config.mask_method == "incremental": + # get mask parameters + self.__state.mask_rim = self.__config.mask_kwargs.get("mask_rim", 10) + mask_f = self.__config.mask_kwargs.get("mask_f", 1.0) + # initialize the structuring element + struct = generate_binary_structure(2, 1) + # iterate it to expand it nxn + n = mask_f * self.__config.timestep / self.__config.kmperpixel + self.__state.struct = iterate_structure(struct, int((n - 1) / 2.0)) + else: + self.__state.mask_rim, self.__state.struct = None, None + + if self.__config.noise_method is None: + self.__state.precip_forecast_non_perturbed = [ + self.__state.precip_cascades[0][i].copy() + for i in range(self.__config.n_cascade_levels) + ] + else: + self.__state.precip_forecast_non_perturbed = None + + self.__state.fft_objs = [] + for i in range(self.__config.n_ens_members): + self.__state.fft_objs.append( + utils.get_method( + self.__config.fft_method, + shape=self.__state.precip_cascades.shape[-2:], + ) + ) + + def __initialize_noise_cascade(self): + """Initialize the noise cascade with identical noise for all AR(n) steps + We also need to return the mean and standard deviations of the noise + for the recombination of the noise before advecting it. + """ + self.__state.precip_noise_cascade = np.zeros(self.__state.precip_cascades.shape) + self.__state.precip_mean_noise = np.zeros( + (self.__config.n_ens_members, self.__config.n_cascade_levels) + ) + self.__state.precip_std_noise = np.zeros( + (self.__config.n_ens_members, self.__config.n_cascade_levels) + ) + if self.__config.noise_method: + for j in range(self.__config.n_ens_members): + # TODO: check rest later, starts at #3 so should look above what these terms match to + epsilon = self.__params.noise_generator( + self.__params.perturbation_generator, + randstate=self.__state.randgen_precip[j], + fft_method=self.__state.fft_objs[j], + domain=self.__config.domain, + ) + epsilon_decomposed = self.__params.decomposition_method( + epsilon, + self.__params.bandpass_filter, + fft_method=self.__state.fft_objs[j], + input_domain=self.__config.domain, + output_domain=self.__config.domain, + compute_stats=True, + normalize=True, + compact_output=True, + ) + self.__state.precip_mean_noise[j] = epsilon_decomposed["means"] + self.__state.precip_std_noise[j] = epsilon_decomposed["stds"] + for i in range(self.__config.n_cascade_levels): + epsilon_temp = epsilon_decomposed["cascade_levels"][i] + epsilon_temp *= self.__params.noise_std_coeffs[i] + for n in range(self.__config.ar_order): + self.__state.precip_noise_cascade[j][i][n] = epsilon_temp + epsilon_decomposed = None + epsilon_temp = None + def __perform_extrapolation(self): pass From 8d16c11043e224c8e22126f5d0c45d32c676dc17 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Mon, 2 Dec 2024 13:47:17 +0100 Subject: [PATCH 38/65] Added some uniformity between nowcast and blending steps. Now at # 8.4 for the refactoring --- pysteps/blending/steps.py | 552 +++++++++++++++++++++++++++++++++++--- pysteps/nowcasts/steps.py | 11 +- 2 files changed, 518 insertions(+), 45 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 6209bb6b6..1f314a757 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -107,6 +107,7 @@ class StepsBlendingConfig: return_output: bool = True +# TODO: typing could be improved here @dataclass class StepsBlendingParams: PHI: np.ndarray # AR(p) model parameters @@ -132,7 +133,6 @@ class StepsBlendingParams: ) # FFT objects for ensemble members mask_rim: Optional[int] = None # Rim size for masking struct: Optional[np.ndarray] = None # Structuring element for mask - n_model_indices: Optional[np.ndarray] = None # NWP model indices noise_method: Optional[str] = None # Noise method used ar_order: int = 2 # Order of the AR model seed: Optional[int] = None # Random seed for reproducibility @@ -144,13 +144,18 @@ class StepsBlendingParams: zero_precip_radar: bool = False zero_precip_model_fields: bool = False PHI: Any = None + original_timesteps: Any = None + num_ensemble_workers: int = None + rho_nwp_models: Any = None + domain_mask: Any = None +# TODO: typing could be improved here @dataclass class StepsBlendingState: precip_cascades: Any = None - mu_extrapolation: Any = None - sigma_extrapolation: Any = None + mean_extrapolation: Any = None + std_extrapolation: Any = None precip_models_cascades: Any = None PHI: Any = None randgen_precip: Any = None @@ -167,10 +172,23 @@ class StepsBlendingState: t_prev_timestep: Any = None t_leadtime_since_start_forecast: Any = None precip_noise_input: Any = None - precip_noise_cascade: Any = None + precip_noise_cascades: Any = None precip_mean_noise: Any = None precip_std_noise: Any = None - # Add more state variables as needed + rho_extrap_cascade_prev: Any = None + rho_extrap_cascade: Any = None + subtimesteps: Any = None + is_nowcast_time_step: bool = None + # Variables to save data over (sub)time steps + precip_models_cascades_temp: Any = None + precip_models_temp: Any = None + mean_models_temp: Any = None + std_models_temp: Any = None + velocity_models_temp: Any = None + n_model_indices: Optional[np.ndarray] = None # NWP model indices + rho_forecast: Any = None + weights: Any = None + weights_model_only: Any = None class StepsBlendingNowcaster: @@ -207,13 +225,11 @@ def __init__( # Additional variables for time measurement self.__start_time_init = None + self.__zero_precip_time = None self.__init_time = None self.__mainloop_time = None def compute_forecast(self): - pass - - def __nowcast_main(self): self.__check_inputs() self.__print_forecast_info() # Measure time for initialization @@ -240,6 +256,76 @@ def __nowcast_main(self): self.__initialize_random_generators() self.__prepare_forecast_loop() self.__initialize_noise_cascade() + if self.__config.measure_time: + self.__init_time = self.__measure_time( + "initialization", self.__start_time_init + ) + + self.__blended_nowcast_main() + # Stack and return the forecast output + if self.__config.return_output: + self.__state.precip_forecast = np.stack( + [ + np.stack(self.__state.precip_forecast[j]) + for j in range(self.__config.n_ens_members) + ] + ) + if self.__config.measure_time: + return ( + self.__state.precip_forecast, + self.__init_time, + self.__mainloop_time, + ) + else: + return self.__state.precip_forecast + else: + return None + + def __blended_nowcast_main(self): + """ + Main nowcast loop that iterates through the ensemble members and time steps + to generate forecasts. + """ + ### + # 8. Start the forecasting loop + ### + # Isolate the last time slice of observed precipitation + precip = self.__precip[-1, :, :] + print("Starting blended nowcast computation.") + + if self.__config.measure_time: + starttime_mainloop = time.time() + + self.__config.extrap_kwargs["return_displacement"] = True + + precip_forc_prev_subtimestep = deepcopy(self.__state.precip_cascades) + noise_prev_subtimestep = deepcopy(self.__state.precip_noise_cascades) + + t_prev_timestep = [0.0 for j in range(self.__config.n_ens_members)] + t_leadtime_since_start_forecast = [ + 0.0 for j in range(self.__config.n_ens_members) + ] + + # iterate each time step + for t, subtimestep_idx in enumerate(self.__timesteps): + self.__determine_subtimesteps_and_nowcast_time_step(t, subtimestep_idx) + if self.__config.measure_time: + starttime = time.time() + self.__decompose_nwp_if_needed_and_fill_nans_in_nwp(t) + self.__find_nowcast_NWP_combination(t) + self.__determine_skill_for_current_timestep(t) + # the nowcast iteration for each ensemble member + precip_forecast_workers = [None for _ in range(self.__config.n_ens_members)] + + def worker(j): + self.__determine_skill_for_next_timestep(t, j) + self.__determine_weights_per_component() + self.__regress_extrapolation_and_noise_cascades(j) + + # Perturb and blend the advection fields + advect the extrapolation and noise cascade to the current time step + # Blend the cascades + + pass def __check_inputs(self): """Validates the inputs and determines if the user provided raw forecasts or decomposed forecasts.""" @@ -282,8 +368,10 @@ def __check_inputs(self): if isinstance(self.__timesteps, list): self.__params.time_steps_is_list = True - original_timesteps = [0] + list(self.__timesteps) - self.__timesteps = nowcast_utils.binned_timesteps(original_timesteps) + self.__params.original_timesteps = [0] + list(self.__timesteps) + self.__timesteps = nowcast_utils.binned_timesteps( + self.__params.original_timesteps + ) if not sorted(self.__timesteps) == self.__timesteps: raise ValueError("timesteps is not in ascending order") if self.__precip_models.shape[1] != math.ceil(self.__timesteps[-1]) + 1: @@ -564,8 +652,8 @@ def __prepare_radar_and_NWP_fields(self): ) precip_forecast_decomp = precip_forecast_decomp[-1] - self.__state.mu_extrapolation = np.array(precip_forecast_decomp["means"]) - self.__state.sigma_extrapolation = np.array(precip_forecast_decomp["stds"]) + self.__state.mean_extrapolation = np.array(precip_forecast_decomp["means"]) + self.__state.std_extrapolation = np.array(precip_forecast_decomp["stds"]) # 2.2 If necessary, recompose (NWP) model forecasts self.__state.precip_models_cascades = None @@ -756,7 +844,7 @@ def __initialize_noise(self): ) if self.__config.measure_time: - print(f"{time.time() - starttime:.2f} seconds.") + __ = self.__measure_time("Initialize noise", starttime) else: print("done.") elif self.__config.noise_stddev_adj == "fixed": @@ -947,13 +1035,24 @@ def __prepare_forecast_loop(self): shape=self.__state.precip_cascades.shape[-2:], ) ) + # TODO: moved this from # 7 to here as it seems to fit better here. The only parameter used and needed is PHI, this is its last use untill # 7 + # initizalize the current and previous extrapolation forecast scale for the nowcasting component + # phi1 / (1 - phi2), see BPS2004 + self.__state.rho_extrap_cascade_prev = np.repeat( + 1.0, self.__params.PHI.shape[0] + ) + self.__state.rho_extrap_cascade = self.__params.PHI[:, 0] / ( + 1.0 - self.__params.PHI[:, 1] + ) def __initialize_noise_cascade(self): """Initialize the noise cascade with identical noise for all AR(n) steps We also need to return the mean and standard deviations of the noise for the recombination of the noise before advecting it. """ - self.__state.precip_noise_cascade = np.zeros(self.__state.precip_cascades.shape) + self.__state.precip_noise_cascades = np.zeros( + self.__state.precip_cascades.shape + ) self.__state.precip_mean_noise = np.zeros( (self.__config.n_ens_members, self.__config.n_cascade_levels) ) @@ -962,7 +1061,6 @@ def __initialize_noise_cascade(self): ) if self.__config.noise_method: for j in range(self.__config.n_ens_members): - # TODO: check rest later, starts at #3 so should look above what these terms match to epsilon = self.__params.noise_generator( self.__params.perturbation_generator, randstate=self.__state.randgen_precip[j], @@ -985,48 +1083,416 @@ def __initialize_noise_cascade(self): epsilon_temp = epsilon_decomposed["cascade_levels"][i] epsilon_temp *= self.__params.noise_std_coeffs[i] for n in range(self.__config.ar_order): - self.__state.precip_noise_cascade[j][i][n] = epsilon_temp + self.__state.precip_noise_cascades[j][i][n] = epsilon_temp epsilon_decomposed = None epsilon_temp = None - def __perform_extrapolation(self): - pass + def __determine_subtimesteps_and_nowcast_time_step(self, t, subtimestep_idx): + if self.__params.time_steps_is_list: + self.__state.subtimesteps = [ + self.__params.original_timesteps[t_] for t_ in subtimestep_idx + ] + else: + self.__state.subtimesteps = [t] - def __apply_noise_and_ar_model(self): - pass + if (self.__params.time_steps_is_list and self.__state.subtimesteps) or ( + not self.__params.time_steps_is_list and t > 0 + ): + self.__state.is_nowcast_time_step = True + else: + self.__state.is_nowcast_time_step = False - def __initialize_velocity_perturbations(self): - pass + if self.__state.is_nowcast_time_step: + print( + f"Computing nowcast for time step {t}... ", + end="", + flush=True, + ) - def __initialize_precipitation_mask(self): - pass + def __decompose_nwp_if_needed_and_fill_nans_in_nwp(self, t): + if self.__state.precip_models_cascades is not None: + decomp_precip_models = list(self.__state.precip_models_cascades[:, t]) - def __initialize_fft_objects(self): - pass + else: + if self.__precip_models.shape[0] == 1: + decomp_precip_models = [ + self.__params.decomposition_method( + self.__precip_models[0, t, :, :], + bp_filter=self.__params.bandpass_filter, + fft_method=self.__params.fft, + output_domain=self.__config.domain, + normalize=True, + compute_stats=True, + compact_output=True, + ) + ] + else: + with ThreadPool(self.__config.num_workers) as pool: + decomp_precip_models = pool.map( + partial( + self.__params.decomposition_method, + bp_filter=self.__params.bandpass_filter, + fft_method=self.__params.fft, + output_domain=self.__config.domain, + normalize=True, + compute_stats=True, + compact_output=True, + ), + list(self.__precip_models[:, t, :, :]), + ) - def __return_state_dict(self): - pass + self.__state.precip_models_cascades_temp = np.array( + [decomp["cascade_levels"] for decomp in decomp_precip_models] + ) + self.__state.mean_models_temp = np.array( + [decomp["means"] for decomp in decomp_precip_models] + ) + self.__state.std_models_temp = np.array( + [decomp["stds"] for decomp in decomp_precip_models] + ) - def __return_params_dict(self): - pass + # 2.3.4 Check if the NWP fields contain nans or infinite numbers. If so, + # fill these with the minimum value present in precip (corresponding to + # zero rainfall in the radar observations) - def __update_state(self, state, params): - pass + """Ensure that the NWP cascade and fields do no contain any nans or infinite number""" + # Fill nans and infinite numbers with the minimum value present in precip + self.__state.precip_models_temp = self.__precip_models[:, t, :, :].astype( + np.float64, copy=False + ) # (corresponding to zero rainfall in the radar observations) + min_cascade = np.nanmin(self.__state.precip_cascades) + min_precip = np.nanmin(self.__precip) + self.__state.precip_models_cascades_temp[ + ~np.isfinite(self.__state.precip_models_cascades_temp) + ] = min_cascade + self.__state.precip_models_temp[ + ~np.isfinite(self.__state.precip_models_temp) + ] = min_precip + # Also set any nans or infs in the mean and sigma of the cascade to + # respectively 0.0 and 1.0 + self.__state.mean_models_temp[~np.isfinite(self.__state.mean_models_temp)] = 0.0 + self.__state.std_models_temp[~np.isfinite(self.__state.std_models_temp)] = 0.0 + + def __find_nowcast_NWP_combination(self, t): + # 8.1.1 Before calling the worker for the forecast loop, determine which (NWP) + # models will be combined with which nowcast ensemble members. With the + # way it is implemented at this moment: n_ens_members of the output equals + # the maximum number of (ensemble) members in the input (either the nowcasts or NWP). + + """Determine which (NWP) models will be combined with which nowcast ensemble members. + With the way it is implemented at this moment: n_ens_members of the output equals + the maximum number of (ensemble) members in the input (either the nowcasts or NWP). + """ + self.__state.velocity_models_temp = self.__velocity_models[ + :, t, :, :, : + ].astype(np.float64, copy=False) + # Make sure the number of model members is not larger than or equal to n_ens_members + n_model_members = self.__state.precip_models_cascades_temp.shape[0] + if n_model_members > self.__config.n_ens_members: + raise ValueError( + "The number of NWP model members is larger than the given number of ensemble members. n_model_members <= n_ens_members." + ) - def __update_deterministic_ar_model(self, state, params): - pass + # Check if NWP models/members should be used individually, or if all of + # them are blended together per nowcast ensemble member. + if self.__config.blend_nwp_members: + self.__state.n_model_indices = None - def __apply_ar_model_to_cascades(self, j, state, params): - pass + else: + # Start with determining the maximum and mimimum number of members/models + # in both input products + n_ens_members_max = max(self.__config.n_ens_members, n_model_members) + n_ens_members_min = min(self.__config.n_ens_members, n_model_members) + # Also make a list of the model index numbers. These indices are needed + # for indexing the right climatological skill file when pysteps calculates + # the blended forecast in parallel. + if n_model_members > 1: + self.__state.n_model_indices = np.arange(n_model_members) + else: + self.__state.n_model_indices = [0] + + # Now, repeat the nowcast ensemble members or the nwp models/members until + # it has the same amount of members as n_ens_members_max. For instance, if + # you have 10 ensemble nowcasts members and 3 NWP members, the output will + # be an ensemble of 10 members. Hence, the three NWP members are blended + # with the first three members of the nowcast (member one with member one, + # two with two, etc.), subsequently, the same NWP members are blended with + # the next three members (NWP member one with member 4, NWP member 2 with + # member 5, etc.), until 10 is reached. + if n_ens_members_min != n_ens_members_max: + if n_model_members == 1: + self.__state.precip_models_cascades_temp = np.repeat( + self.__state.precip_models_cascades_temp, + n_ens_members_max, + axis=0, + ) + self.__state.mean_models_temp = np.repeat( + self.__state.mean_models_temp, n_ens_members_max, axis=0 + ) + self.__state.std_models_temp = np.repeat( + self.__state.std_models_temp, n_ens_members_max, axis=0 + ) + self.__state.velocity_models_temp = np.repeat( + self.__state.velocity_models_temp, n_ens_members_max, axis=0 + ) + # For the prob. matching + self.__state.precip_models_temp = np.repeat( + self.__state.precip_models_temp, n_ens_members_max, axis=0 + ) + # Finally, for the model indices + self.__state.n_model_indices = np.repeat( + self.__state.n_model_indices, n_ens_members_max, axis=0 + ) + + elif n_model_members == n_ens_members_min: + repeats = [ + (n_ens_members_max + i) // n_ens_members_min + for i in range(n_ens_members_min) + ] + if n_model_members == n_ens_members_min: + self.__state.precip_models_cascades_temp = np.repeat( + self.__state.precip_models_cascades_temp, repeats, axis=0 + ) + self.__state.mean_models_temp = np.repeat( + self.__state.mean_models_temp, repeats, axis=0 + ) + self.__state.std_models_temp = np.repeat( + self.__state.std_models_temp, repeats, axis=0 + ) + self.__state.velocity_models_temp = np.repeat( + self.__state.velocity_models_temp, repeats, axis=0 + ) + # For the prob. matching + self.__state.precip_models_temp = np.repeat( + self.__state.precip_models_temp, repeats, axis=0 + ) + # Finally, for the model indices + self.__state.n_model_indices = np.repeat( + self.__state.n_model_indices, repeats, axis=0 + ) + + # TODO: is this not duplicate from part 2.3.5? + # If zero_precip_radar is True, set the velocity field equal to the NWP + # velocity field for the current time step (velocity_models_temp). + if self.__params.zero_precip_radar: + # Use the velocity from velocity_models and take the average over + # n_models (axis=0) + self.__velocity = np.mean(self.__state.velocity_models_temp, axis=0) + + def __determine_skill_for_current_timestep(self, t): + if t == 0: + """Calculate the initial skill of the (NWP) model forecasts at t=0.""" + # TODO: n_model is not defined here, how does this work? + self.__params.rho_nwp_models = [ + blending.skill_scores.spatial_correlation( + obs=self.__state.precip_cascades[0, :, -1, :, :].copy(), + mod=self.__state.precip_models_cascades_temp[ + n_model, :, :, : + ].copy(), + domain_mask=self.__params.domain_mask, + ) + for n_model in range(self.__state.precip_models_cascades_temp.shape[0]) + ] + self.__params.rho_nwp_models = np.stack(self.__params.rho_nwp_models) + + # Ensure that the model skill decreases with increasing scale level. + for n_model in range(self.__state.precip_models_cascades_temp.shape[0]): + for i in range(1, self.__state.precip_models_cascades_temp.shape[1]): + if ( + self.__params.rho_nwp_models[n_model, i] + > self.__params.rho_nwp_models[n_model, i - 1] + ): + # Set it equal to the previous scale level + self.__params.rho_nwp_models[n_model, i] = ( + self.__params.rho_nwp_models[n_model, i - 1] + ) + + # Save this in the climatological skill file + blending.clim.save_skill( + current_skill=self.__params.rho_nwp_models, + validtime=self.__issuetime, + outdir_path=self.__config.outdir_path_skill, + **self.__config.clim_kwargs, + ) + if t > 0: + # 8.1.3 Determine the skill of the components for lead time (t0 + t) + # First for the extrapolation component. Only calculate it when t > 0. + ( + self.__state.rho_extrap_cascade, + self.__state.rho_extrap_cascade_prev, + ) = blending.skill_scores.lt_dependent_cor_extrapolation( + PHI=self.__params.PHI, + correlations=self.__state.rho_extrap_cascade, + correlations_prev=self.__state.rho_extrap_cascade_prev, + ) - def __generate_and_decompose_noise(self, j, state, params): - pass + def __determine_skill_for_next_timestep(self, t, j): + # 8.1.2 Determine the skill of the nwp components for lead time (t0 + t) + # Then for the model components + if self.__config.blend_nwp_members: + rho_nwp_forecast = [ + blending.skill_scores.lt_dependent_cor_nwp( + lt=(t * int(self.__config.timestep)), + correlations=self.__params.rho_nwp_models[n_model], + outdir_path=self.__config.outdir_path_skill, + n_model=n_model, + skill_kwargs=self.__config.clim_kwargs, + ) + for n_model in range(self.__params.rho_nwp_models.shape[0]) + ] + rho_nwp_forecast = np.stack(rho_nwp_forecast) + # Concatenate rho_extrap_cascade and rho_nwp + self.__state.rho_forecast = np.concatenate( + (self.__state.rho_extrap_cascade[None, :], rho_nwp_forecast), axis=0 + ) + else: + rho_nwp_forecast = blending.skill_scores.lt_dependent_cor_nwp( + lt=(t * int(self.__config.timestep)), + correlations=self.__params.rho_nwp_models[j], + outdir_path=self.__config.outdir_path_skill, + n_model=self.__params.n_model_indices[j], + skill_kwargs=self.__config.clim_kwargs, + ) + # Concatenate rho_extrap_cascade and rho_nwp + self.__state.rho_forecast = np.concatenate( + (self.__state.rho_extrap_cascade[None, :], rho_nwp_forecast[None, :]), + axis=0, + ) - def __recompose_and_apply_mask(self, j, state, params): - pass + def __determine_weights_per_component(self): + # 8.2 Determine the weights per component + + # Weights following the bps method. These are needed for the velocity + # weights prior to the advection step. If weights method spn is + # selected, weights will be overwritten with those weights prior to + # blending step. + # weight = [(extr_field, n_model_fields, noise), n_cascade_levels, ...] + self.__state.weights = calculate_weights_bps(self.__state.rho_forecast) + + # The model only weights + if self.__config.weights_method == "bps": + # Determine the weights of the components without the extrapolation + # cascade, in case this is no data or outside the mask. + self.__state.weights_model_only = calculate_weights_bps( + self.__state.rho_forecast[1:, :] + ) + elif self.__config.weights_method == "spn": + # Only the weights of the components without the extrapolation + # cascade will be determined here. The full set of weights are + # determined after the extrapolation step in this method. + if ( + self.__config.blend_nwp_members + and self.__state.precip_models_cascades_temp.shape[0] > 1 + ): + self.__state.weights_model_only = np.zeros( + ( + self.__state.precip_models_cascades_temp.shape[0] + 1, + self.__config.n_cascade_levels, + ) + ) + for i in range(self.__config.n_cascade_levels): + # Determine the normalized covariance matrix (containing) + # the cross-correlations between the models + covariance_nwp_models = np.corrcoef( + np.stack( + [ + self.__state.precip_models_cascades_temp[ + n_model, i, :, : + ].flatten() + for n_model in range( + self.__state.precip_models_cascades_temp.shape[0] + ) + ] + ) + ) + # Determine the weights for this cascade level + self.__state.weights_model_only[:, i] = calculate_weights_spn( + correlations=self.__state.rho_forecast[1:, i], + covariance=covariance_nwp_models, + ) + else: + # Same as correlation and noise is 1 - correlation + self.__state.weights_model_only = calculate_weights_bps( + self.__state.rho_forecast[1:, :] + ) + else: + raise ValueError( + "Unknown weights method %s: must be 'bps' or 'spn'" + % self.__config.weights_method + ) + + def __regress_extrapolation_and_noise_cascades(self, j): + # 8.3 Determine the noise cascade and regress this to the subsequent + # time step + regress the extrapolation component to the subsequent + # time step + + # 8.3.1 Determine the epsilon, a cascade of temporally independent + # but spatially correlated noise + if self.__config.noise_method is not None: + # generate noise field + epsilon = self.__params.noise_generator( + self.__params.perturbation_generator, + randstate=self.__state.randgen_precip[j], + fft_method=self.__state.fft_objs[j], + domain=self.__config.domain, + ) + + # decompose the noise field into a cascade + epsilon_decomposed = self.__params.decomposition_method( + epsilon, + self.__params.bandpass_filter, + fft_method=self.__state.fft_objs[j], + input_domain=self.__config.domain, + output_domain=self.__config.domain, + compute_stats=True, + normalize=True, + compact_output=True, + ) + else: + epsilon_decomposed = None + + # 8.3.2 regress the extrapolation component to the subsequent time + # step + # iterate the AR(p) model for each cascade level + for i in range(self.__config.n_cascade_levels): + # apply AR(p) process to extrapolation cascade level + if ( + epsilon_decomposed is not None + or self.__config.velocity_perturbation_method is not None + ): + self.__state.precip_cascade[j][i] = autoregression.iterate_ar_model( + self.__state.precip_cascade[j][i], self.__params.PHI[i, :] + ) + # Renormalize the cascade + self.__state.precip_cascade[j][i][1] /= np.std( + self.__state.precip_cascade[j][i][1] + ) + else: + # use the deterministic AR(p) model computed above if + # perturbations are disabled + self.__state.precip_cascade[j][i] = ( + self.__state.precip_forecast_non_perturbed[i] + ) + + # 8.3.3 regress the noise component to the subsequent time step + # iterate the AR(p) model for each cascade level + for i in range(self.__config.n_cascade_levels): + # normalize the noise cascade + if epsilon_decomposed is not None: + epsilon_temp = epsilon_decomposed["cascade_levels"][i] + epsilon_temp *= self.__paramsnoise_std_coeffs[i] + else: + epsilon_temp = None + # apply AR(p) process to noise cascade level + # (Returns zero noise if epsilon_decomposed is None) + self.__state.precip_noise_cascades[j][i] = autoregression.iterate_ar_model( + self.__state.precip_noise_cascades[j][i], + self.__params.PHI[i, :], + eps=epsilon_temp, + ) - def __apply_precipitation_mask(self, precip_forecast, j, state, params): - pass + epsilon_decomposed = None + epsilon_temp = None def __measure_time(self, label, start_time): """ @@ -1039,6 +1505,8 @@ def __measure_time(self, label, start_time): if self.__config.measure_time: elapsed_time = time.time() - start_time print(f"{label} took {elapsed_time:.2f} seconds.") + return elapsed_time + return None def reset_states_and_params(self): """ diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index b04e82756..6efd97586 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -352,11 +352,14 @@ def compute_forecast(self): self.__initialize_fft_objects() # Measure and print initialization time if self.__config.measure_time: - self.__measure_time("Initialization", self.__start_time_init) + self.__init_time = self.__measure_time( + "Initialization", self.__start_time_init + ) # Run the main nowcast loop self.__nowcast_main() + # Unstack nowcast output if return_output is True if self.__config.measure_time: ( self.__state.precip_forecast, @@ -387,7 +390,7 @@ def __nowcast_main(self): Main nowcast loop that iterates through the ensemble members and time steps to generate forecasts. """ - # Isolate the last time slice of precipitation + # Isolate the last time slice of observed precipitation precip = self.__precip[ -1, :, : ] # Extract the last available precipitation field @@ -717,7 +720,7 @@ def __apply_noise_and_ar_model(self): # Measure and print time taken if self.__config.measure_time: - self.__measure_time( + __ = self.__measure_time( "Noise adjustment coefficient computation", starttime ) else: @@ -1198,6 +1201,8 @@ def __measure_time(self, label, start_time): if self.__config.measure_time: elapsed_time = time.time() - start_time print(f"{label} took {elapsed_time:.2f} seconds.") + return elapsed_time + return None def reset_states_and_params(self): """ From 88df97dbfd9a8bdfbe43626e01b1939dc94929f1 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Mon, 2 Dec 2024 14:03:16 +0100 Subject: [PATCH 39/65] Small changes since prev commit --- pysteps/blending/steps.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 1f314a757..5e3cb43ee 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -255,7 +255,7 @@ def compute_forecast(self): self.__multiply_precip_cascade_to_match_ensemble_members() self.__initialize_random_generators() self.__prepare_forecast_loop() - self.__initialize_noise_cascade() + self.__initialize_noise_cascades() if self.__config.measure_time: self.__init_time = self.__measure_time( "initialization", self.__start_time_init @@ -1045,7 +1045,7 @@ def __prepare_forecast_loop(self): 1.0 - self.__params.PHI[:, 1] ) - def __initialize_noise_cascade(self): + def __initialize_noise_cascades(self): """Initialize the noise cascade with identical noise for all AR(n) steps We also need to return the mean and standard deviations of the noise for the recombination of the noise before advecting it. From 7ee0020d5e5863e04557d7d4b57f1578501f9551 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Mon, 2 Dec 2024 16:57:33 +0100 Subject: [PATCH 40/65] All code is tranfered. Last part of the main loop needs to be refactored --- pysteps/blending/steps.py | 778 +++++++++++++++++++++++++++++++++++++- 1 file changed, 759 insertions(+), 19 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 5e3cb43ee..15ac25b9e 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -148,6 +148,8 @@ class StepsBlendingParams: num_ensemble_workers: int = None rho_nwp_models: Any = None domain_mask: Any = None + velocity_perturbations: Any = None + generate_velocity_noise: Any = None # TODO: typing could be improved here @@ -159,8 +161,6 @@ class StepsBlendingState: precip_models_cascades: Any = None PHI: Any = None randgen_precip: Any = None - velocity_perturbations: Any = None - generate_velocity_noise: Any = None previous_displacement: Any = None previous_displacement_noise_cascade: Any = None previous_displacement_prob_matching: Any = None @@ -189,6 +189,11 @@ class StepsBlendingState: rho_forecast: Any = None weights: Any = None weights_model_only: Any = None + precip_forecast_extrapolated_decomp_done: Any = None + noise_extrapolated_decomp_done: Any = None + precip_forecast_extrapolated_probability_matching: Any = None + precip_forecast_prev_subtimestep: Any = None + noise_prev_subtimestep: Any = None class StepsBlendingNowcaster: @@ -295,14 +300,19 @@ def __blended_nowcast_main(self): if self.__config.measure_time: starttime_mainloop = time.time() - + # TODO: problem with the config here! This variable changes over time... + # extrap_kwargs is in config but by adding info to it, the next run of a blended forecast will have issues! self.__config.extrap_kwargs["return_displacement"] = True - precip_forc_prev_subtimestep = deepcopy(self.__state.precip_cascades) - noise_prev_subtimestep = deepcopy(self.__state.precip_noise_cascades) + self.__state.precip_forecast_prev_subtimestep = deepcopy( + self.__state.precip_cascades + ) + self.__state.noise_prev_subtimestep = deepcopy( + self.__state.precip_noise_cascades + ) - t_prev_timestep = [0.0 for j in range(self.__config.n_ens_members)] - t_leadtime_since_start_forecast = [ + self.__state.t_prev_timestep = [0.0 for j in range(self.__config.n_ens_members)] + self.__state.t_leadtime_since_start_forecast = [ 0.0 for j in range(self.__config.n_ens_members) ] @@ -321,11 +331,49 @@ def worker(j): self.__determine_skill_for_next_timestep(t, j) self.__determine_weights_per_component() self.__regress_extrapolation_and_noise_cascades(j) + self.__perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( + t, j + ) + # 8.5 Blend the cascades + final_blended_forecast_single_member = [] + for t_sub in self.__state.subtimesteps: + # TODO: does it make sense to use sub time steps - check if it works? + if t_sub > 0: + self.__blend_cascades() + self.__recompose_cascade_to_rainfall_field() + self.__post_process_output(final_blended_forecast_single_member) + precip_forecast_workers[j] = final_blended_forecast_single_member + + result = [] + + if DASK_IMPORTED and self.__config.n_ens_members > 1: + for j in range(self.__config.n_ens_members): + result.append(dask.delayed(worker)(j)) + dask.compute(*result, num_workers=self.__params.num_ensemble_workers) + else: + for j in range(self.__config.n_ens_members): + worker(j) - # Perturb and blend the advection fields + advect the extrapolation and noise cascade to the current time step - # Blend the cascades + result = None - pass + if self.__state.is_nowcast_time_step: + if self.__config.measure_time: + __ = self.__measure_time("subtimestep", starttime) + else: + print("done.") + + if self.__config.callback is not None: + precip_forecast_final = np.stack(precip_forecast_workers) + if precip_forecast_final.shape[1] > 0: + self.__config.callback(precip_forecast_final.squeeze()) + + if self.__config.return_output: + for j in range(self.__config.n_ens_members): + self.__state.precip_forecast[j].extend(precip_forecast_workers[j]) + + precip_forecast_workers = None + if self.__config.measure_time: + self.__mainloop_time = time.time() - starttime_mainloop def __check_inputs(self): """Validates the inputs and determines if the user provided raw forecasts or decomposed forecasts.""" @@ -969,7 +1017,7 @@ def __initialize_random_generators(self): if self.__config.velocity_perturbation_method is not None: ( init_velocity_noise, - self.__state.generate_velocity_noise, + self.__params.generate_velocity_noise, ) = noise.get_method(self.__config.velocity_perturbation_method) # initialize the perturbation generators for the motion field @@ -989,8 +1037,8 @@ def __initialize_random_generators(self): self.__state.velocity_perturbations.append(vp_) else: ( - self.__state.velocity_perturbations, - self.__state.generate_velocity_noise, + self.__params.velocity_perturbations, + self.__params.generate_velocity_noise, ) = (None, None) def __prepare_forecast_loop(self): @@ -1460,17 +1508,17 @@ def __regress_extrapolation_and_noise_cascades(self, j): epsilon_decomposed is not None or self.__config.velocity_perturbation_method is not None ): - self.__state.precip_cascade[j][i] = autoregression.iterate_ar_model( - self.__state.precip_cascade[j][i], self.__params.PHI[i, :] + self.__state.precip_cascades[j][i] = autoregression.iterate_ar_model( + self.__state.precip_cascades[j][i], self.__params.PHI[i, :] ) # Renormalize the cascade - self.__state.precip_cascade[j][i][1] /= np.std( - self.__state.precip_cascade[j][i][1] + self.__state.precip_cascades[j][i][1] /= np.std( + self.__state.precip_cascades[j][i][1] ) else: # use the deterministic AR(p) model computed above if # perturbations are disabled - self.__state.precip_cascade[j][i] = ( + self.__state.precip_cascades[j][i] = ( self.__state.precip_forecast_non_perturbed[i] ) @@ -1480,7 +1528,7 @@ def __regress_extrapolation_and_noise_cascades(self, j): # normalize the noise cascade if epsilon_decomposed is not None: epsilon_temp = epsilon_decomposed["cascade_levels"][i] - epsilon_temp *= self.__paramsnoise_std_coeffs[i] + epsilon_temp *= self.__params.noise_std_coeffs[i] else: epsilon_temp = None # apply AR(p) process to noise cascade level @@ -1494,6 +1542,698 @@ def __regress_extrapolation_and_noise_cascades(self, j): epsilon_decomposed = None epsilon_temp = None + def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( + self, t, j + ): + # 8.4 Perturb and blend the advection fields + advect the + # extrapolation and noise cascade to the current time step + # (or subtimesteps if non-integer time steps are given) + + # Settings and initialize the output + extrap_kwargs_ = self.__config.extrap_kwargs.copy() + extrap_kwargs_noise = self.__config.extrap_kwargs.copy() + extrap_kwargs_pb = self.__config.extrap_kwargs.copy() + velocity_perturbations_extrapolation = self.__velocity + # The following should be accesseble after this function + self.__state.precip_forecast_extrapolated_decomp_done = [] + self.__state.noise_extrapolated_decomp_done = [] + self.__state.precip_forecast_extrapolated_probability_matching = [] + + # Extrapolate per sub time step + for t_sub in self.__state.subtimesteps: + if t_sub > 0: + t_diff_prev_subtimestep_int = t_sub - int(t_sub) + if t_diff_prev_subtimestep_int > 0.0: + precip_forecast_cascade_subtimestep = [ + (1.0 - t_diff_prev_subtimestep_int) + * self.__state.precip_forecast_prev_subtimestep[j][i][-1, :] + + t_diff_prev_subtimestep_int + * self.__state.precip_cascades[j][i][-1, :] + for i in range(self.__config.n_cascade_levels) + ] + noise_cascade_subtimestep = [ + (1.0 - t_diff_prev_subtimestep_int) + * self.__state.noise_prev_subtimestep[j][i][-1, :] + + t_diff_prev_subtimestep_int + * self.__state.precip_noise_cascades[j][i][-1, :] + for i in range(self.__config.n_cascade_levels) + ] + + else: + precip_forecast_cascade_subtimestep = [ + self.__state.precip_forecast_prev_subtimestep[j][i][-1, :] + for i in range(self.__config.n_cascade_levels) + ] + noise_cascade_subtimestep = [ + self.__state.noise_prev_subtimestep[j][i][-1, :] + for i in range(self.__config.n_cascade_levels) + ] + + precip_forecast_cascade_subtimestep = np.stack( + precip_forecast_cascade_subtimestep + ) + noise_cascade_subtimestep = np.stack(noise_cascade_subtimestep) + + t_diff_prev_subtimestep = t_sub - self.__state.t_prev_timestep[j] + self.__state.t_leadtime_since_start_forecast[ + j + ] += t_diff_prev_subtimestep + + # compute the perturbed motion field - include the NWP + # velocities and the weights. Note that we only perturb + # the extrapolation velocity field, as the NWP velocity + # field is present per time step + if self.__config.velocity_perturbation_method is not None: + velocity_perturbations_extrapolation = ( + self.__velocity + + self.__params.generate_velocity_noise( + self.__params.velocity_perturbations[j], + self.__state.t_leadtime_since_start_forecast[j] + * self.__config.timestep, + ) + ) + + # Stack the perturbed extrapolation and the NWP velocities + if self.__config.blend_nwp_members: + velocity_stack_all = np.concatenate( + ( + velocity_perturbations_extrapolation[None, :, :, :], + self.__state.velocity_models_temp, + ), + axis=0, + ) + else: + velocity_models = self.__state.velocity_models_temp[j] + velocity_stack_all = np.concatenate( + ( + velocity_perturbations_extrapolation[None, :, :, :], + velocity_models[None, :, :, :], + ), + axis=0, + ) + velocity_models = None + + # Obtain a blended optical flow, using the weights of the + # second cascade following eq. 24 in BPS2006 + velocity_blended = blending.utils.blend_optical_flows( + flows=velocity_stack_all, + weights=self.__state.weights[ + :-1, 1 + ], # [(extr_field, n_model_fields), cascade_level=2] + ) + + # Extrapolate both cascades to the next time step + # First recompose the cascade, advect it and decompose it again + # This is needed to remove the interpolation artifacts. + # In addition, the number of extrapolations is greatly reduced + # A. Radar Rain + precip_forecast_recomp_subtimestep = blending.utils.recompose_cascade( + combined_cascade=precip_forecast_cascade_subtimestep, + combined_mean=self.__state.mean_extrapolation, + combined_sigma=self.__state.std_extrapolation, + ) + # Make sure we have values outside the mask + if self.__params.zero_precip_radar: + precip_forecast_recomp_subtimestep = np.nan_to_num( + precip_forecast_recomp_subtimestep, + copy=True, + nan=self.__params.precip_zerovalue, + posinf=self.__params.precip_zerovalue, + neginf=self.__params.precip_zerovalue, + ) + # Put back the mask + precip_forecast_recomp_subtimestep[self.__params.domain_mask] = np.nan + # TODO: problem with the config here! This variable changes over time... + # extrap_kwargs is in config but by adding info to it, the next run of a blended forecast will have issues! + self.__config.extrap_kwargs["displacement_prev"] = ( + self.__state.previous_displacement[j] + ) + ( + precip_forecast_extrapolated_recomp_subtimestep_temp, + self.__state.previous_displacement[j], + ) = self.__params.extrapolation_method( + precip_forecast_recomp_subtimestep, + velocity_blended, + [t_diff_prev_subtimestep], + allow_nonfinite_values=True, + **self.__config.extrap_kwargs, + ) + precip_forecast_extrapolated_recomp_subtimestep = ( + precip_forecast_extrapolated_recomp_subtimestep_temp[0].copy() + ) + temp_mask = ~np.isfinite( + precip_forecast_extrapolated_recomp_subtimestep + ) + # TODO: WHERE DO CAN I FIND THIS -15.0 + precip_forecast_extrapolated_recomp_subtimestep[ + ~np.isfinite(precip_forecast_extrapolated_recomp_subtimestep) + ] = self.__params.precip_zerovalue + precip_forecast_extrapolated_decomp = ( + self.__params.decomposition_method( + precip_forecast_extrapolated_recomp_subtimestep, + self.__params.bandpass_filter, + mask=self.__params.mask_threshold, + fft_method=self.__params.fft, + output_domain=self.__config.domain, + normalize=True, + compute_stats=True, + compact_output=True, + )["cascade_levels"] + ) + # Make sure we have values outside the mask + if self.__params.zero_precip_radar: + precip_forecast_extrapolated_decomp = np.nan_to_num( + precip_forecast_extrapolated_decomp, + copy=True, + nan=np.nanmin(precip_forecast_cascade_subtimestep), + posinf=np.nanmin(precip_forecast_cascade_subtimestep), + neginf=np.nanmin(precip_forecast_cascade_subtimestep), + ) + for i in range(self.__config.n_cascade_levels): + precip_forecast_extrapolated_decomp[i][temp_mask] = np.nan + # B. Noise + noise_cascade_subtimestep_recomp = blending.utils.recompose_cascade( + combined_cascade=noise_cascade_subtimestep, + combined_mean=self.__state.precip_mean_noise[j], + combined_sigma=self.__state.precip_std_noise[j], + ) + extrap_kwargs_noise["displacement_prev"] = ( + self.__state.previous_displacement_noise_cascade[j] + ) + extrap_kwargs_noise["map_coordinates_mode"] = "wrap" + ( + noise_extrapolated_recomp_temp, + self.__state.previous_displacement_noise_cascade[j], + ) = self.__params.extrapolation_method( + noise_cascade_subtimestep_recomp, + velocity_blended, + [t_diff_prev_subtimestep], + allow_nonfinite_values=True, + **extrap_kwargs_noise, + ) + noise_extrapolated_recomp = noise_extrapolated_recomp_temp[0].copy() + noise_extrapolated_decomp = self.__params.decomposition_method( + noise_extrapolated_recomp, + self.__params.bandpass_filter, + mask=self.__params.mask_threshold, + fft_method=self.__params.fft, + output_domain=self.__config.domain, + normalize=True, + compute_stats=True, + compact_output=True, + )["cascade_levels"] + for i in range(self.__config.n_cascade_levels): + noise_extrapolated_decomp[i] *= self.__params.noise_std_coeffs[i] + + # Append the results to the output lists + self.__state.precip_forecast_extrapolated_decomp_done.append( + precip_forecast_extrapolated_decomp.copy() + ) + self.__state.noise_extrapolated_decomp_done.append( + noise_extrapolated_decomp.copy() + ) + precip_forecast_cascade_subtimestep = None + precip_forecast_recomp_subtimestep = None + precip_forecast_extrapolated_recomp_subtimestep_temp = None + precip_forecast_extrapolated_recomp_subtimestep = None + precip_forecast_extrapolated_decomp = None + noise_cascade_subtimestep = None + noise_cascade_subtimestep_recomp = None + noise_extrapolated_recomp_temp = None + noise_extrapolated_recomp = None + noise_extrapolated_decomp = None + + # Finally, also extrapolate the initial radar rainfall + # field. This will be blended with the rainfall field(s) + # of the (NWP) model(s) for Lagrangian blended prob. matching + # min_R = np.min(precip) + extrap_kwargs_pb["displacement_prev"] = ( + self.__state.previous_displacement_prob_matching[j] + ) + # Apply the domain mask to the extrapolation component + precip_forecast_temp_for_probability_matching = self.__precip.copy() + precip_forecast_temp_for_probability_matching[ + self.__params.domain_mask + ] = np.nan + ( + precip_forecast_extrapolated_probability_matching_temp, + self.__state.previous_displacement_prob_matching[j], + ) = self.__params.extrapolation_method( + precip_forecast_temp_for_probability_matching, + velocity_blended, + [t_diff_prev_subtimestep], + allow_nonfinite_values=True, + **extrap_kwargs_pb, + ) + self.__state.precip_forecast_extrapolated_probability_matching.append( + precip_forecast_extrapolated_probability_matching_temp[0] + ) + + self.__state.t_prev_timestep[j] = t_sub + + if len(self.__state.precip_forecast_extrapolated_decomp_done) > 0: + self.__state.precip_forecast_extrapolated_decomp_done = np.stack( + self.__state.precip_forecast_extrapolated_decomp_done + ) + self.__state.noise_extrapolated_decomp_done = np.stack( + self.__state.noise_extrapolated_decomp_done + ) + self.__state.precip_forecast_extrapolated_probability_matching = np.stack( + self.__state.precip_forecast_extrapolated_probability_matching + ) + + # advect the forecast field by one time step if no subtimesteps in the + # current interval were found + if not self.__state.subtimesteps: + t_diff_prev_subtimestep = t + 1 - self.__state.t_prev_timestep[j] + self.__state.t_leadtime_since_start_forecast[j] += t_diff_prev_subtimestep + + # compute the perturbed motion field - include the NWP + # velocities and the weights + if self.__config.velocity_perturbation_method is not None: + velocity_perturbations_extrapolation = ( + self.__velocity + + self.__params.generate_velocity_noise( + self.__params.velocity_perturbations[j], + self.__state.t_leadtime_since_start_forecast[j] + * self.__config.timestep, + ) + ) + + # Stack the perturbed extrapolation and the NWP velocities + if self.__config.blend_nwp_members: + velocity_stack_all = np.concatenate( + ( + velocity_perturbations_extrapolation[None, :, :, :], + self.__state.velocity_models_temp, + ), + axis=0, + ) + else: + velocity_models = self.__state.velocity_models_temp[j] + velocity_stack_all = np.concatenate( + ( + velocity_perturbations_extrapolation[None, :, :, :], + velocity_models[None, :, :, :], + ), + axis=0, + ) + velocity_models = None + + # Obtain a blended optical flow, using the weights of the + # second cascade following eq. 24 in BPS2006 + velocity_blended = blending.utils.blend_optical_flows( + flows=velocity_stack_all, + weights=self.__state.weights[ + :-1, 1 + ], # [(extr_field, n_model_fields), cascade_level=2] + ) + + # Extrapolate the extrapolation and noise cascade + + extrap_kwargs_["displacement_prev"] = self.__state.previous_displacement[j] + extrap_kwargs_noise["displacement_prev"] = ( + self.__state.previous_displacement_noise_cascade[j] + ) + extrap_kwargs_noise["map_coordinates_mode"] = "wrap" + + _, self.__state.previous_displacement[j] = ( + self.__params.extrapolation_method( + None, + velocity_blended, + [t_diff_prev_subtimestep], + allow_nonfinite_values=True, + **extrap_kwargs_, + ) + ) + + _, self.__state.previous_displacement_noise_cascade[j] = ( + self.__params.extrapolation_method( + None, + velocity_blended, + [t_diff_prev_subtimestep], + allow_nonfinite_values=True, + **extrap_kwargs_noise, + ) + ) + + # Also extrapolate the radar observation, used for the probability + # matching and post-processing steps + extrap_kwargs_pb["displacement_prev"] = ( + self.__state.previous_displacement_prob_matching[j] + ) + _, self.__state.previous_displacement_prob_matching[j] = ( + self.__params.extrapolation_method( + None, + velocity_blended, + [t_diff_prev_subtimestep], + allow_nonfinite_values=True, + **extrap_kwargs_pb, + ) + ) + + self.__state.t_prev_timestep[j] = t + 1 + + self.__state.precip_forecast_prev_subtimestep[j] = self.__state.precip_cascades[ + j + ] + self.__state.noise_prev_subtimestep[j] = self.__state.precip_noise_cascades[j] + + def __blend_cascades(self): + t_index = np.where(np.array(subtimesteps) == t_sub)[0][0] + # First concatenate the cascades and the means and sigmas + # precip_models = [n_models,timesteps,n_cascade_levels,m,n] + if blend_nwp_members: + cascade_stack_all_components = np.concatenate( + ( + precip_forecast_extrapolated_decomp_done[None, t_index], + precip_models_cascade_temp, + noise_extrapolated_decomp_done[None, t_index], + ), + axis=0, + ) # [(extr_field, n_model_fields, noise), n_cascade_levels, ...] + means_stacked = np.concatenate( + (mu_extrapolation[None, :], mu_models_temp), axis=0 + ) + sigmas_stacked = np.concatenate( + (sigma_extrapolation[None, :], sigma_models_temp), + axis=0, + ) + else: + cascade_stack_all_components = np.concatenate( + ( + precip_forecast_extrapolated_decomp_done[None, t_index], + precip_models_cascade_temp[None, j], + noise_extrapolated_decomp_done[None, t_index], + ), + axis=0, + ) # [(extr_field, n_model_fields, noise), n_cascade_levels, ...] + means_stacked = np.concatenate( + (mu_extrapolation[None, :], mu_models_temp[None, j]), + axis=0, + ) + sigmas_stacked = np.concatenate( + ( + sigma_extrapolation[None, :], + sigma_models_temp[None, j], + ), + axis=0, + ) + + # First determine the blending weights if method is spn. The + # weights for method bps have already been determined. + if weights_method == "spn": + weights = np.zeros( + ( + cascade_stack_all_components.shape[0], + n_cascade_levels, + ) + ) + for i in range(n_cascade_levels): + # Determine the normalized covariance matrix (containing) + # the cross-correlations between the models + cascade_stack_all_components_temp = np.stack( + [ + cascade_stack_all_components[n_model, i, :, :].flatten() + for n_model in range(cascade_stack_all_components.shape[0] - 1) + ] + ) # -1 to exclude the noise component + covariance_nwp_models = np.ma.corrcoef( + np.ma.masked_invalid(cascade_stack_all_components_temp) + ) + # Determine the weights for this cascade level + weights[:, i] = calculate_weights_spn( + correlations=rho_fc[:, i], + covariance=covariance_nwp_models, + ) + + # Blend the extrapolation, (NWP) model(s) and noise cascades + precip_forecast_blended = blending.utils.blend_cascades( + cascades_norm=cascade_stack_all_components, weights=weights + ) + + # Also blend the cascade without the extrapolation component + precip_forecast_blended_mod_only = blending.utils.blend_cascades( + cascades_norm=cascade_stack_all_components[1:, :], + weights=weights_model_only, + ) + + # Blend the means and standard deviations + # Input is array of shape [number_components, scale_level, ...] + means_blended, sigmas_blended = blend_means_sigmas( + means=means_stacked, sigmas=sigmas_stacked, weights=weights + ) + # Also blend the means and sigmas for the cascade without extrapolation + ( + means_blended_mod_only, + sigmas_blended_mod_only, + ) = blend_means_sigmas( + means=means_stacked[1:, :], + sigmas=sigmas_stacked[1:, :], + weights=weights_model_only, + ) + + def __recompose_cascade_to_rainfall_field(self): + # 8.6 Recompose the cascade to a precipitation field + # (The function first normalizes the blended cascade, precip_forecast_blended + # again) + precip_forecast_recomposed = blending.utils.recompose_cascade( + combined_cascade=precip_forecast_blended, + combined_mean=means_blended, + combined_sigma=sigmas_blended, + ) + # The recomposed cascade without the extrapolation (for NaN filling + # outside the radar domain) + precip_forecast_recomposed_mod_only = blending.utils.recompose_cascade( + combined_cascade=precip_forecast_blended_mod_only, + combined_mean=means_blended_mod_only, + combined_sigma=sigmas_blended_mod_only, + ) + if domain == "spectral": + # TODO: Check this! (Only tested with domain == 'spatial') + precip_forecast_recomposed = fft_objs[j].irfft2(precip_forecast_recomposed) + precip_forecast_recomposed_mod_only = fft_objs[j].irfft2( + precip_forecast_recomposed_mod_only + ) + + def __post_process_output(self, final_blended_forecast_single_member): + # 8.7 Post-processing steps - use the mask and fill no data with + # the blended NWP forecast. Probability matching following + # Lagrangian blended probability matching which uses the + # latest extrapolated radar rainfall field blended with the + # nwp model(s) rainfall forecast fields as 'benchmark'. + + # 8.7.1 first blend the extrapolated rainfall field (the field + # that is only used for post-processing steps) with the NWP + # rainfall forecast for this time step using the weights + # at scale level 2. + weights_probability_matching = weights[:-1, 1] # Weights without noise, level 2 + weights_probability_matching_normalized = weights_probability_matching / np.sum( + weights_probability_matching + ) + # And the weights for outside the radar domain + weights_probability_matching_mod_only = weights_model_only[ + :-1, 1 + ] # Weights without noise, level 2 + weights_probability_matching_normalized_mod_only = ( + weights_probability_matching_mod_only + / np.sum(weights_probability_matching_mod_only) + ) + # Stack the fields + if blend_nwp_members: + precip_forecast_probability_matching_final = np.concatenate( + ( + precip_forecast_extrapolated_probability_matching[None, t_index], + precip_models_temp, + ), + axis=0, + ) + else: + precip_forecast_probability_matching_final = np.concatenate( + ( + precip_forecast_extrapolated_probability_matching[None, t_index], + precip_models_temp[None, j], + ), + axis=0, + ) + # Blend it + precip_forecast_probability_matching_blended = np.sum( + weights_probability_matching_normalized.reshape( + weights_probability_matching_normalized.shape[0], 1, 1 + ) + * precip_forecast_probability_matching_final, + axis=0, + ) + if blend_nwp_members: + precip_forecast_probability_matching_blended_mod_only = np.sum( + weights_probability_matching_normalized_mod_only.reshape( + weights_probability_matching_normalized_mod_only.shape[0], + 1, + 1, + ) + * precip_models_temp, + axis=0, + ) + else: + precip_forecast_probability_matching_blended_mod_only = precip_models_temp[ + j + ] + + # The extrapolation components are NaN outside the advected + # radar domain. This results in NaN values in the blended + # forecast outside the radar domain. Therefore, fill these + # areas with the "..._mod_only" blended forecasts, consisting + # of the NWP and noise components. + + nan_indices = np.isnan(precip_forecast_recomposed) + if smooth_radar_mask_range != 0: + # Compute the smooth dilated mask + new_mask = blending.utils.compute_smooth_dilated_mask( + nan_indices, + max_padding_size_in_px=smooth_radar_mask_range, + ) + + # Ensure mask values are between 0 and 1 + mask_model = np.clip(new_mask, 0, 1) + mask_radar = np.clip(1 - new_mask, 0, 1) + + # Handle NaNs in precip_forecast_new and precip_forecast_new_mod_only by setting NaNs to 0 in the blending step + precip_forecast_recomposed_mod_only_no_nan = np.nan_to_num( + precip_forecast_recomposed_mod_only, nan=0 + ) + precip_forecast_recomposed_no_nan = np.nan_to_num( + precip_forecast_recomposed, nan=0 + ) + + # Perform the blending of radar and model inside the radar domain using a weighted combination + precip_forecast_recomposed = np.nansum( + [ + mask_model * precip_forecast_recomposed_mod_only_no_nan, + mask_radar * precip_forecast_recomposed_no_nan, + ], + axis=0, + ) + + nan_indices = np.isnan(precip_forecast_probability_matching_blended) + precip_forecast_probability_matching_blended = np.nansum( + [ + precip_forecast_probability_matching_blended * mask_radar, + precip_forecast_probability_matching_blended_mod_only * mask_model, + ], + axis=0, + ) + else: + precip_forecast_recomposed[nan_indices] = ( + precip_forecast_recomposed_mod_only[nan_indices] + ) + nan_indices = np.isnan(precip_forecast_probability_matching_blended) + precip_forecast_probability_matching_blended[nan_indices] = ( + precip_forecast_probability_matching_blended_mod_only[nan_indices] + ) + + # Finally, fill the remaining nan values, if present, with + # the minimum value in the forecast + nan_indices = np.isnan(precip_forecast_recomposed) + precip_forecast_recomposed[nan_indices] = np.nanmin(precip_forecast_recomposed) + nan_indices = np.isnan(precip_forecast_probability_matching_blended) + precip_forecast_probability_matching_blended[nan_indices] = np.nanmin( + precip_forecast_probability_matching_blended + ) + + # 8.7.2. Apply the masking and prob. matching + if mask_method is not None: + # apply the precipitation mask to prevent generation of new + # precipitation into areas where it was not originally + # observed + precip_forecast_min_value = precip_forecast_recomposed.min() + if mask_method == "incremental": + # The incremental mask is slightly different from + # the implementation in the non-blended steps.py, as + # it is not based on the last forecast, but instead + # on R_pm_blended. Therefore, the buffer does not + # increase over time. + # Get the mask for this forecast + precip_field_mask = ( + precip_forecast_probability_matching_blended >= precip_thr + ) + # Buffer the mask + precip_field_mask = _compute_incremental_mask( + precip_field_mask, struct, mask_rim + ) + # Get the final mask + precip_forecast_recomposed = ( + precip_forecast_min_value + + (precip_forecast_recomposed - precip_forecast_min_value) + * precip_field_mask + ) + precip_field_mask_temp = ( + precip_forecast_recomposed > precip_forecast_min_value + ) + elif mask_method == "obs": + # The mask equals the most recent benchmark + # rainfall field + precip_field_mask_temp = ( + precip_forecast_probability_matching_blended >= precip_thr + ) + + # Set to min value outside of mask + precip_forecast_recomposed[~precip_field_mask_temp] = ( + precip_forecast_min_value + ) + + # If probmatching_method is not None, resample the distribution from + # both the extrapolation cascade and the model (NWP) cascade and use + # that for the probability matching. + if probmatching_method is not None and resample_distribution: + arr1 = precip_forecast_extrapolated_probability_matching[t_index] + arr2 = precip_models_temp[j] + # resample weights based on cascade level 2. + # Areas where one of the fields is nan are not included. + precip_forecast_probability_matching_resampled = ( + probmatching.resample_distributions( + first_array=arr1, + second_array=arr2, + probability_first_array=weights_probability_matching_normalized[0], + ) + ) + else: + precip_forecast_probability_matching_resampled = ( + precip_forecast_probability_matching_blended.copy() + ) + + if probmatching_method == "cdf": + # nan indices in the extrapolation nowcast + nan_indices = np.isnan( + precip_forecast_extrapolated_probability_matching[t_index] + ) + # Adjust the CDF of the forecast to match the resampled distribution combined from + # extrapolation and model fields. + # Rainfall outside the pure extrapolation domain is not taken into account. + if np.any(np.isfinite(precip_forecast_recomposed)): + precip_forecast_recomposed = probmatching.nonparam_match_empirical_cdf( + precip_forecast_recomposed, + precip_forecast_probability_matching_resampled, + nan_indices, + ) + precip_forecast_probability_matching_resampled = None + elif probmatching_method == "mean": + # Use R_pm_blended as benchmark field and + mean_probabiltity_matching_forecast = np.mean( + precip_forecast_probability_matching_resampled[ + precip_forecast_probability_matching_resampled >= precip_thr + ] + ) + no_rain_mask = precip_forecast_recomposed >= precip_thr + mean_precip_forecast = np.mean(precip_forecast_recomposed[no_rain_mask]) + precip_forecast_recomposed[no_rain_mask] = ( + precip_forecast_recomposed[no_rain_mask] + - mean_precip_forecast + + mean_probabiltity_matching_forecast + ) + precip_forecast_probability_matching_resampled = None + + final_blended_forecast_single_member.append(precip_forecast_recomposed) + def __measure_time(self, label, start_time): """ Measure and print the time taken for a specific part of the process. From f3879812de7a79c7c93ecd52f12b5215af038d76 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Thu, 5 Dec 2024 10:37:16 +0100 Subject: [PATCH 41/65] Everything is refactored, no test ran as of yet --- pysteps/blending/steps.py | 395 +++++++++++++++++++++++--------------- pysteps/nowcasts/steps.py | 1 - 2 files changed, 242 insertions(+), 154 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 15ac25b9e..e866d45c9 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -180,11 +180,11 @@ class StepsBlendingState: subtimesteps: Any = None is_nowcast_time_step: bool = None # Variables to save data over (sub)time steps - precip_models_cascades_temp: Any = None - precip_models_temp: Any = None - mean_models_temp: Any = None - std_models_temp: Any = None - velocity_models_temp: Any = None + precip_models_cascades_timestep: Any = None + precip_models_timestep: Any = None + mean_models_timestep: Any = None + std_models_timestep: Any = None + velocity_models_timestep: Any = None n_model_indices: Optional[np.ndarray] = None # NWP model indices rho_forecast: Any = None weights: Any = None @@ -194,6 +194,16 @@ class StepsBlendingState: precip_forecast_extrapolated_probability_matching: Any = None precip_forecast_prev_subtimestep: Any = None noise_prev_subtimestep: Any = None + final_blended_forecast_single_member: Any = None + means_blended: Any = None + sigmas_blended: Any = None + means_blended_mod_only: Any = None + sigmas_blended_mod_only: Any = None + precip_forecast_blended: Any = None + precip_forecast_blended_mod_only: Any = None + precip_forecast_recomposed: Any = None + precip_forecast_recomposed_mod_only: Any = None + t_index: Any = None class StepsBlendingNowcaster: @@ -295,7 +305,8 @@ def __blended_nowcast_main(self): # 8. Start the forecasting loop ### # Isolate the last time slice of observed precipitation - precip = self.__precip[-1, :, :] + # TODO: This precip was "precip = self.__precip[-1, :, :]", changed to self.__precip = self.__precip[-1, :, :]. Might need to chage again and user local variable precip in all following functions + self.__precip = self.__precip[-1, :, :] print("Starting blended nowcast computation.") if self.__config.measure_time: @@ -335,14 +346,17 @@ def worker(j): t, j ) # 8.5 Blend the cascades - final_blended_forecast_single_member = [] + self.__state.final_blended_forecast_single_member = [] for t_sub in self.__state.subtimesteps: # TODO: does it make sense to use sub time steps - check if it works? if t_sub > 0: - self.__blend_cascades() - self.__recompose_cascade_to_rainfall_field() - self.__post_process_output(final_blended_forecast_single_member) - precip_forecast_workers[j] = final_blended_forecast_single_member + self.__blend_cascades(t_sub, j) + self.__recompose_cascade_to_rainfall_field(j) + # TODO: could be I need to return and ave final_blended_forecast_single_member + self.__post_process_output(j) + precip_forecast_workers[j] = ( + self.__state.final_blended_forecast_single_member + ) result = [] @@ -706,6 +720,7 @@ def __prepare_radar_and_NWP_fields(self): # 2.2 If necessary, recompose (NWP) model forecasts self.__state.precip_models_cascades = None + # TODO: This type of check needs to be changed when going to xarray if self.__precip_models.ndim != 4: self.__state.precip_models_cascades = self.__precip_models self.__precip_models = _compute_cascade_recomposition_nwp( @@ -799,7 +814,7 @@ def __prepare_nowcast_for_zero_radar(self): self.__state.precip_models_cascades[j, t]["cascade_levels"] ) continue - precip_models_cascade_temp = self.__params.decomposition_method( + precip_models_cascade_timestep = self.__params.decomposition_method( self.__precip_models[j, t, :, :], bp_filter=self.__params.bandpass_filter, fft_method=self.__params.fft, @@ -810,7 +825,7 @@ def __prepare_nowcast_for_zero_radar(self): )["cascade_levels"] self.__state.precip_cascades[ ~np.isfinite(self.__state.precip_cascades) - ] = np.nanmin(precip_models_cascade_temp) + ] = np.nanmin(precip_models_cascade_timestep) done = True break @@ -903,7 +918,7 @@ def __initialize_noise(self): else: self.__params.noise_std_coeffs = np.ones(self.__config.n_cascade_levels) - if self.__params.noise_stddev_adj is not None: + if self.__config.noise_stddev_adj is not None: print(f"noise std. dev. coeffs: {self.__params.noise_std_coeffs}") else: @@ -1189,13 +1204,13 @@ def __decompose_nwp_if_needed_and_fill_nans_in_nwp(self, t): list(self.__precip_models[:, t, :, :]), ) - self.__state.precip_models_cascades_temp = np.array( + self.__state.precip_models_cascades_timestep = np.array( [decomp["cascade_levels"] for decomp in decomp_precip_models] ) - self.__state.mean_models_temp = np.array( + self.__state.mean_models_timestep = np.array( [decomp["means"] for decomp in decomp_precip_models] ) - self.__state.std_models_temp = np.array( + self.__state.std_models_timestep = np.array( [decomp["stds"] for decomp in decomp_precip_models] ) @@ -1205,21 +1220,25 @@ def __decompose_nwp_if_needed_and_fill_nans_in_nwp(self, t): """Ensure that the NWP cascade and fields do no contain any nans or infinite number""" # Fill nans and infinite numbers with the minimum value present in precip - self.__state.precip_models_temp = self.__precip_models[:, t, :, :].astype( + self.__state.precip_models_timestep = self.__precip_models[:, t, :, :].astype( np.float64, copy=False ) # (corresponding to zero rainfall in the radar observations) min_cascade = np.nanmin(self.__state.precip_cascades) min_precip = np.nanmin(self.__precip) - self.__state.precip_models_cascades_temp[ - ~np.isfinite(self.__state.precip_models_cascades_temp) + self.__state.precip_models_cascades_timestep[ + ~np.isfinite(self.__state.precip_models_cascades_timestep) ] = min_cascade - self.__state.precip_models_temp[ - ~np.isfinite(self.__state.precip_models_temp) + self.__state.precip_models_timestep[ + ~np.isfinite(self.__state.precip_models_timestep) ] = min_precip # Also set any nans or infs in the mean and sigma of the cascade to # respectively 0.0 and 1.0 - self.__state.mean_models_temp[~np.isfinite(self.__state.mean_models_temp)] = 0.0 - self.__state.std_models_temp[~np.isfinite(self.__state.std_models_temp)] = 0.0 + self.__state.mean_models_timestep[ + ~np.isfinite(self.__state.mean_models_timestep) + ] = 0.0 + self.__state.std_models_timestep[ + ~np.isfinite(self.__state.std_models_timestep) + ] = 0.0 def __find_nowcast_NWP_combination(self, t): # 8.1.1 Before calling the worker for the forecast loop, determine which (NWP) @@ -1231,11 +1250,11 @@ def __find_nowcast_NWP_combination(self, t): With the way it is implemented at this moment: n_ens_members of the output equals the maximum number of (ensemble) members in the input (either the nowcasts or NWP). """ - self.__state.velocity_models_temp = self.__velocity_models[ + self.__state.velocity_models_timestep = self.__velocity_models[ :, t, :, :, : ].astype(np.float64, copy=False) # Make sure the number of model members is not larger than or equal to n_ens_members - n_model_members = self.__state.precip_models_cascades_temp.shape[0] + n_model_members = self.__state.precip_models_cascades_timestep.shape[0] if n_model_members > self.__config.n_ens_members: raise ValueError( "The number of NWP model members is larger than the given number of ensemble members. n_model_members <= n_ens_members." @@ -1269,23 +1288,23 @@ def __find_nowcast_NWP_combination(self, t): # member 5, etc.), until 10 is reached. if n_ens_members_min != n_ens_members_max: if n_model_members == 1: - self.__state.precip_models_cascades_temp = np.repeat( - self.__state.precip_models_cascades_temp, + self.__state.precip_models_cascades_timestep = np.repeat( + self.__state.precip_models_cascades_timestep, n_ens_members_max, axis=0, ) - self.__state.mean_models_temp = np.repeat( - self.__state.mean_models_temp, n_ens_members_max, axis=0 + self.__state.mean_models_timestep = np.repeat( + self.__state.mean_models_timestep, n_ens_members_max, axis=0 ) - self.__state.std_models_temp = np.repeat( - self.__state.std_models_temp, n_ens_members_max, axis=0 + self.__state.std_models_timestep = np.repeat( + self.__state.std_models_timestep, n_ens_members_max, axis=0 ) - self.__state.velocity_models_temp = np.repeat( - self.__state.velocity_models_temp, n_ens_members_max, axis=0 + self.__state.velocity_models_timestep = np.repeat( + self.__state.velocity_models_timestep, n_ens_members_max, axis=0 ) # For the prob. matching - self.__state.precip_models_temp = np.repeat( - self.__state.precip_models_temp, n_ens_members_max, axis=0 + self.__state.precip_models_timestep = np.repeat( + self.__state.precip_models_timestep, n_ens_members_max, axis=0 ) # Finally, for the model indices self.__state.n_model_indices = np.repeat( @@ -1298,21 +1317,23 @@ def __find_nowcast_NWP_combination(self, t): for i in range(n_ens_members_min) ] if n_model_members == n_ens_members_min: - self.__state.precip_models_cascades_temp = np.repeat( - self.__state.precip_models_cascades_temp, repeats, axis=0 + self.__state.precip_models_cascades_timestep = np.repeat( + self.__state.precip_models_cascades_timestep, + repeats, + axis=0, ) - self.__state.mean_models_temp = np.repeat( - self.__state.mean_models_temp, repeats, axis=0 + self.__state.mean_models_timestep = np.repeat( + self.__state.mean_models_timestep, repeats, axis=0 ) - self.__state.std_models_temp = np.repeat( - self.__state.std_models_temp, repeats, axis=0 + self.__state.std_models_timestep = np.repeat( + self.__state.std_models_timestep, repeats, axis=0 ) - self.__state.velocity_models_temp = np.repeat( - self.__state.velocity_models_temp, repeats, axis=0 + self.__state.velocity_models_timestep = np.repeat( + self.__state.velocity_models_timestep, repeats, axis=0 ) # For the prob. matching - self.__state.precip_models_temp = np.repeat( - self.__state.precip_models_temp, repeats, axis=0 + self.__state.precip_models_timestep = np.repeat( + self.__state.precip_models_timestep, repeats, axis=0 ) # Finally, for the model indices self.__state.n_model_indices = np.repeat( @@ -1325,7 +1346,7 @@ def __find_nowcast_NWP_combination(self, t): if self.__params.zero_precip_radar: # Use the velocity from velocity_models and take the average over # n_models (axis=0) - self.__velocity = np.mean(self.__state.velocity_models_temp, axis=0) + self.__velocity = np.mean(self.__state.velocity_models_timestep, axis=0) def __determine_skill_for_current_timestep(self, t): if t == 0: @@ -1334,18 +1355,22 @@ def __determine_skill_for_current_timestep(self, t): self.__params.rho_nwp_models = [ blending.skill_scores.spatial_correlation( obs=self.__state.precip_cascades[0, :, -1, :, :].copy(), - mod=self.__state.precip_models_cascades_temp[ + mod=self.__state.precip_models_cascades_timestep[ n_model, :, :, : ].copy(), domain_mask=self.__params.domain_mask, ) - for n_model in range(self.__state.precip_models_cascades_temp.shape[0]) + for n_model in range( + self.__state.precip_models_cascades_timestep.shape[0] + ) ] self.__params.rho_nwp_models = np.stack(self.__params.rho_nwp_models) # Ensure that the model skill decreases with increasing scale level. - for n_model in range(self.__state.precip_models_cascades_temp.shape[0]): - for i in range(1, self.__state.precip_models_cascades_temp.shape[1]): + for n_model in range(self.__state.precip_models_cascades_timestep.shape[0]): + for i in range( + 1, self.__state.precip_models_cascades_timestep.shape[1] + ): if ( self.__params.rho_nwp_models[n_model, i] > self.__params.rho_nwp_models[n_model, i - 1] @@ -1398,7 +1423,7 @@ def __determine_skill_for_next_timestep(self, t, j): lt=(t * int(self.__config.timestep)), correlations=self.__params.rho_nwp_models[j], outdir_path=self.__config.outdir_path_skill, - n_model=self.__params.n_model_indices[j], + n_model=self.__state.n_model_indices[j], skill_kwargs=self.__config.clim_kwargs, ) # Concatenate rho_extrap_cascade and rho_nwp @@ -1430,11 +1455,11 @@ def __determine_weights_per_component(self): # determined after the extrapolation step in this method. if ( self.__config.blend_nwp_members - and self.__state.precip_models_cascades_temp.shape[0] > 1 + and self.__state.precip_models_cascades_timestep.shape[0] > 1 ): self.__state.weights_model_only = np.zeros( ( - self.__state.precip_models_cascades_temp.shape[0] + 1, + self.__state.precip_models_cascades_timestep.shape[0] + 1, self.__config.n_cascade_levels, ) ) @@ -1444,11 +1469,13 @@ def __determine_weights_per_component(self): covariance_nwp_models = np.corrcoef( np.stack( [ - self.__state.precip_models_cascades_temp[ + self.__state.precip_models_cascades_timestep[ n_model, i, :, : ].flatten() for n_model in range( - self.__state.precip_models_cascades_temp.shape[0] + self.__state.precip_models_cascades_timestep.shape[ + 0 + ] ) ] ) @@ -1618,12 +1645,12 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( velocity_stack_all = np.concatenate( ( velocity_perturbations_extrapolation[None, :, :, :], - self.__state.velocity_models_temp, + self.__state.velocity_models_timestep, ), axis=0, ) else: - velocity_models = self.__state.velocity_models_temp[j] + velocity_models = self.__state.velocity_models_timestep[j] velocity_stack_all = np.concatenate( ( velocity_perturbations_extrapolation[None, :, :, :], @@ -1825,12 +1852,12 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( velocity_stack_all = np.concatenate( ( velocity_perturbations_extrapolation[None, :, :, :], - self.__state.velocity_models_temp, + self.__state.velocity_models_timestep, ), axis=0, ) else: - velocity_models = self.__state.velocity_models_temp[j] + velocity_models = self.__state.velocity_models_timestep[j] velocity_stack_all = np.concatenate( ( velocity_perturbations_extrapolation[None, :, :, :], @@ -1899,57 +1926,82 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( ] self.__state.noise_prev_subtimestep[j] = self.__state.precip_noise_cascades[j] - def __blend_cascades(self): - t_index = np.where(np.array(subtimesteps) == t_sub)[0][0] + def __blend_cascades(self, t_sub, j): + self.__state.t_index = np.where(np.array(self.__state.subtimesteps) == t_sub)[ + 0 + ][0] # First concatenate the cascades and the means and sigmas # precip_models = [n_models,timesteps,n_cascade_levels,m,n] - if blend_nwp_members: + if self.__config.blend_nwp_members: cascade_stack_all_components = np.concatenate( ( - precip_forecast_extrapolated_decomp_done[None, t_index], - precip_models_cascade_temp, - noise_extrapolated_decomp_done[None, t_index], + self.__state.precip_forecast_extrapolated_decomp_done[ + None, self.__state.t_index + ], + self.__state.precip_models_cascades_timestep, + self.__state.noise_extrapolated_decomp_done[ + None, self.__state.t_index + ], ), axis=0, ) # [(extr_field, n_model_fields, noise), n_cascade_levels, ...] means_stacked = np.concatenate( - (mu_extrapolation[None, :], mu_models_temp), axis=0 + ( + self.__state.mean_extrapolation[None, :], + self.__state.mean_models_timestep, + ), + axis=0, ) sigmas_stacked = np.concatenate( - (sigma_extrapolation[None, :], sigma_models_temp), + ( + self.__state.std_extrapolation[None, :], + self.__state.std_models_timestep, + ), axis=0, ) else: cascade_stack_all_components = np.concatenate( ( - precip_forecast_extrapolated_decomp_done[None, t_index], - precip_models_cascade_temp[None, j], - noise_extrapolated_decomp_done[None, t_index], + self.__state.precip_forecast_extrapolated_decomp_done[ + None, self.__state.t_index + ], + self.__state.precip_models_cascades_timestep[None, j], + self.__state.noise_extrapolated_decomp_done[ + None, self.__state.t_index + ], ), axis=0, ) # [(extr_field, n_model_fields, noise), n_cascade_levels, ...] means_stacked = np.concatenate( - (mu_extrapolation[None, :], mu_models_temp[None, j]), + ( + self.__state.mean_extrapolation[None, :], + self.__state.mean_models_timestep[None, j], + ), axis=0, ) sigmas_stacked = np.concatenate( ( - sigma_extrapolation[None, :], - sigma_models_temp[None, j], + self.__state.std_extrapolation[None, :], + self.__state.std_models_timestep[None, j], ), axis=0, ) # First determine the blending weights if method is spn. The # weights for method bps have already been determined. - if weights_method == "spn": - weights = np.zeros( + # TODO: no other weight method is possible, should we not al least give a user warning if a different weight + # method is given? Or does this mean that in all other circumstances the weights + # have been calculated in a different way? + + # TODO: changed weights to self.__state.weights + if self.__config.weights_method == "spn": + self.__state.weights = np.zeros( ( cascade_stack_all_components.shape[0], - n_cascade_levels, + self.__config.n_cascade_levels, ) ) - for i in range(n_cascade_levels): + for i in range(self.__config.n_cascade_levels): # Determine the normalized covariance matrix (containing) # the cross-correlations between the models cascade_stack_all_components_temp = np.stack( @@ -1962,61 +2014,68 @@ def __blend_cascades(self): np.ma.masked_invalid(cascade_stack_all_components_temp) ) # Determine the weights for this cascade level - weights[:, i] = calculate_weights_spn( - correlations=rho_fc[:, i], + self.__state.weights[:, i] = calculate_weights_spn( + correlations=self.__state.rho_forecast[:, i], covariance=covariance_nwp_models, ) # Blend the extrapolation, (NWP) model(s) and noise cascades - precip_forecast_blended = blending.utils.blend_cascades( - cascades_norm=cascade_stack_all_components, weights=weights + self.__state.precip_forecast_blended = blending.utils.blend_cascades( + cascades_norm=cascade_stack_all_components, weights=self.__state.weights ) # Also blend the cascade without the extrapolation component - precip_forecast_blended_mod_only = blending.utils.blend_cascades( + self.__state.precip_forecast_blended_mod_only = blending.utils.blend_cascades( cascades_norm=cascade_stack_all_components[1:, :], - weights=weights_model_only, + weights=self.__state.weights_model_only, ) # Blend the means and standard deviations # Input is array of shape [number_components, scale_level, ...] - means_blended, sigmas_blended = blend_means_sigmas( - means=means_stacked, sigmas=sigmas_stacked, weights=weights + self.__state.means_blended, self.__state.sigmas_blended = blend_means_sigmas( + means=means_stacked, sigmas=sigmas_stacked, weights=self.__state.weights ) # Also blend the means and sigmas for the cascade without extrapolation + ( - means_blended_mod_only, - sigmas_blended_mod_only, + self.__state.means_blended_mod_only, + self.__state.sigmas_blended_mod_only, ) = blend_means_sigmas( means=means_stacked[1:, :], sigmas=sigmas_stacked[1:, :], - weights=weights_model_only, + weights=self.__state.weights_model_only, ) - def __recompose_cascade_to_rainfall_field(self): + def __recompose_cascade_to_rainfall_field(self, j): # 8.6 Recompose the cascade to a precipitation field # (The function first normalizes the blended cascade, precip_forecast_blended # again) - precip_forecast_recomposed = blending.utils.recompose_cascade( - combined_cascade=precip_forecast_blended, - combined_mean=means_blended, - combined_sigma=sigmas_blended, + self.__state.precip_forecast_recomposed = blending.utils.recompose_cascade( + combined_cascade=self.__state.precip_forecast_blended, + combined_mean=self.__state.means_blended, + combined_sigma=self.__state.sigmas_blended, ) # The recomposed cascade without the extrapolation (for NaN filling # outside the radar domain) - precip_forecast_recomposed_mod_only = blending.utils.recompose_cascade( - combined_cascade=precip_forecast_blended_mod_only, - combined_mean=means_blended_mod_only, - combined_sigma=sigmas_blended_mod_only, + self.__state.precip_forecast_recomposed_mod_only = ( + blending.utils.recompose_cascade( + combined_cascade=self.__state.precip_forecast_blended_mod_only, + combined_mean=self.__state.means_blended_mod_only, + combined_sigma=self.__state.sigmas_blended_mod_only, + ) ) - if domain == "spectral": + if self.__config.domain == "spectral": # TODO: Check this! (Only tested with domain == 'spatial') - precip_forecast_recomposed = fft_objs[j].irfft2(precip_forecast_recomposed) - precip_forecast_recomposed_mod_only = fft_objs[j].irfft2( - precip_forecast_recomposed_mod_only + + # TODO: what needs to happen with above TODO? + self.__state.precip_forecast_recomposed = self.__state.fft_objs[j].irfft2( + self.__state.precip_forecast_recomposed ) + self.__state.precip_forecast_recomposed_mod_only = self.__state.fft_objs[ + j + ].irfft2(self.__state.precip_forecast_recomposed_mod_only) - def __post_process_output(self, final_blended_forecast_single_member): + def __post_process_output(self, j): # 8.7 Post-processing steps - use the mask and fill no data with # the blended NWP forecast. Probability matching following # Lagrangian blended probability matching which uses the @@ -2027,12 +2086,14 @@ def __post_process_output(self, final_blended_forecast_single_member): # that is only used for post-processing steps) with the NWP # rainfall forecast for this time step using the weights # at scale level 2. - weights_probability_matching = weights[:-1, 1] # Weights without noise, level 2 + weights_probability_matching = self.__state.weights[ + :-1, 1 + ] # Weights without noise, level 2 weights_probability_matching_normalized = weights_probability_matching / np.sum( weights_probability_matching ) # And the weights for outside the radar domain - weights_probability_matching_mod_only = weights_model_only[ + weights_probability_matching_mod_only = self.__state.weights_model_only[ :-1, 1 ] # Weights without noise, level 2 weights_probability_matching_normalized_mod_only = ( @@ -2040,19 +2101,23 @@ def __post_process_output(self, final_blended_forecast_single_member): / np.sum(weights_probability_matching_mod_only) ) # Stack the fields - if blend_nwp_members: + if self.__config.blend_nwp_members: precip_forecast_probability_matching_final = np.concatenate( ( - precip_forecast_extrapolated_probability_matching[None, t_index], - precip_models_temp, + self.__state.precip_forecast_extrapolated_probability_matching[ + None, self.__state.t_index + ], + self.__state.precip_models_timestep, ), axis=0, ) else: precip_forecast_probability_matching_final = np.concatenate( ( - precip_forecast_extrapolated_probability_matching[None, t_index], - precip_models_temp[None, j], + self.__state.precip_forecast_extrapolated_probability_matching[ + None, self.__state.t_index + ], + self.__state.precip_models_timestep[None, j], ), axis=0, ) @@ -2064,20 +2129,20 @@ def __post_process_output(self, final_blended_forecast_single_member): * precip_forecast_probability_matching_final, axis=0, ) - if blend_nwp_members: + if self.__config.blend_nwp_members: precip_forecast_probability_matching_blended_mod_only = np.sum( weights_probability_matching_normalized_mod_only.reshape( weights_probability_matching_normalized_mod_only.shape[0], 1, 1, ) - * precip_models_temp, + * self.__state.precip_models_timestep, axis=0, ) else: - precip_forecast_probability_matching_blended_mod_only = precip_models_temp[ - j - ] + precip_forecast_probability_matching_blended_mod_only = ( + self.__state.precip_models_timestep[j] + ) # The extrapolation components are NaN outside the advected # radar domain. This results in NaN values in the blended @@ -2085,12 +2150,12 @@ def __post_process_output(self, final_blended_forecast_single_member): # areas with the "..._mod_only" blended forecasts, consisting # of the NWP and noise components. - nan_indices = np.isnan(precip_forecast_recomposed) - if smooth_radar_mask_range != 0: + nan_indices = np.isnan(self.__state.precip_forecast_recomposed) + if self.__config.smooth_radar_mask_range != 0: # Compute the smooth dilated mask new_mask = blending.utils.compute_smooth_dilated_mask( nan_indices, - max_padding_size_in_px=smooth_radar_mask_range, + max_padding_size_in_px=self.__config.smooth_radar_mask_range, ) # Ensure mask values are between 0 and 1 @@ -2099,10 +2164,10 @@ def __post_process_output(self, final_blended_forecast_single_member): # Handle NaNs in precip_forecast_new and precip_forecast_new_mod_only by setting NaNs to 0 in the blending step precip_forecast_recomposed_mod_only_no_nan = np.nan_to_num( - precip_forecast_recomposed_mod_only, nan=0 + self.__state.precip_forecast_recomposed_mod_only, nan=0 ) precip_forecast_recomposed_no_nan = np.nan_to_num( - precip_forecast_recomposed, nan=0 + self.__state.precip_forecast_recomposed, nan=0 ) # Perform the blending of radar and model inside the radar domain using a weighted combination @@ -2114,7 +2179,6 @@ def __post_process_output(self, final_blended_forecast_single_member): axis=0, ) - nan_indices = np.isnan(precip_forecast_probability_matching_blended) precip_forecast_probability_matching_blended = np.nansum( [ precip_forecast_probability_matching_blended * mask_radar, @@ -2123,8 +2187,8 @@ def __post_process_output(self, final_blended_forecast_single_member): axis=0, ) else: - precip_forecast_recomposed[nan_indices] = ( - precip_forecast_recomposed_mod_only[nan_indices] + self.__state.precip_forecast_recomposed[nan_indices] = ( + self.__state.precip_forecast_recomposed_mod_only[nan_indices] ) nan_indices = np.isnan(precip_forecast_probability_matching_blended) precip_forecast_probability_matching_blended[nan_indices] = ( @@ -2133,20 +2197,23 @@ def __post_process_output(self, final_blended_forecast_single_member): # Finally, fill the remaining nan values, if present, with # the minimum value in the forecast - nan_indices = np.isnan(precip_forecast_recomposed) - precip_forecast_recomposed[nan_indices] = np.nanmin(precip_forecast_recomposed) + nan_indices = np.isnan(self.__state.precip_forecast_recomposed) + self.__state.precip_forecast_recomposed[nan_indices] = np.nanmin( + self.__state.precip_forecast_recomposed + ) nan_indices = np.isnan(precip_forecast_probability_matching_blended) precip_forecast_probability_matching_blended[nan_indices] = np.nanmin( precip_forecast_probability_matching_blended ) # 8.7.2. Apply the masking and prob. matching - if mask_method is not None: + precip_field_mask_temp = None + if self.__config.mask_method is not None: # apply the precipitation mask to prevent generation of new # precipitation into areas where it was not originally # observed - precip_forecast_min_value = precip_forecast_recomposed.min() - if mask_method == "incremental": + precip_forecast_min_value = self.__state.precip_forecast_recomposed.min() + if self.__config.mask_method == "incremental": # The incremental mask is slightly different from # the implementation in the non-blended steps.py, as # it is not based on the last forecast, but instead @@ -2154,39 +2221,49 @@ def __post_process_output(self, final_blended_forecast_single_member): # increase over time. # Get the mask for this forecast precip_field_mask = ( - precip_forecast_probability_matching_blended >= precip_thr + precip_forecast_probability_matching_blended + >= self.__config.precip_threshold ) # Buffer the mask precip_field_mask = _compute_incremental_mask( - precip_field_mask, struct, mask_rim + precip_field_mask, self.__params.struct, self.__params.mask_rim ) # Get the final mask - precip_forecast_recomposed = ( + self.__state.precip_forecast_recomposed = ( precip_forecast_min_value - + (precip_forecast_recomposed - precip_forecast_min_value) + + ( + self.__state.precip_forecast_recomposed + - precip_forecast_min_value + ) * precip_field_mask ) precip_field_mask_temp = ( - precip_forecast_recomposed > precip_forecast_min_value + self.__state.precip_forecast_recomposed > precip_forecast_min_value ) - elif mask_method == "obs": + elif self.__config.mask_method == "obs": # The mask equals the most recent benchmark # rainfall field precip_field_mask_temp = ( - precip_forecast_probability_matching_blended >= precip_thr + precip_forecast_probability_matching_blended + >= self.__config.precip_threshold ) # Set to min value outside of mask - precip_forecast_recomposed[~precip_field_mask_temp] = ( + self.__state.precip_forecast_recomposed[~precip_field_mask_temp] = ( precip_forecast_min_value ) # If probmatching_method is not None, resample the distribution from # both the extrapolation cascade and the model (NWP) cascade and use # that for the probability matching. - if probmatching_method is not None and resample_distribution: - arr1 = precip_forecast_extrapolated_probability_matching[t_index] - arr2 = precip_models_temp[j] + if ( + self.__config.probmatching_method is not None + and self.__config.resample_distribution + ): + arr1 = self.__state.precip_forecast_extrapolated_probability_matching[ + self.__state.t_index + ] + arr2 = self.__state.precip_models_timestep[j] # resample weights based on cascade level 2. # Areas where one of the fields is nan are not included. precip_forecast_probability_matching_resampled = ( @@ -2201,38 +2278,50 @@ def __post_process_output(self, final_blended_forecast_single_member): precip_forecast_probability_matching_blended.copy() ) - if probmatching_method == "cdf": + if self.__config.probmatching_method == "cdf": # nan indices in the extrapolation nowcast nan_indices = np.isnan( - precip_forecast_extrapolated_probability_matching[t_index] + self.__state.precip_forecast_extrapolated_probability_matching[ + self.__state.t_index + ] ) # Adjust the CDF of the forecast to match the resampled distribution combined from # extrapolation and model fields. # Rainfall outside the pure extrapolation domain is not taken into account. - if np.any(np.isfinite(precip_forecast_recomposed)): - precip_forecast_recomposed = probmatching.nonparam_match_empirical_cdf( - precip_forecast_recomposed, - precip_forecast_probability_matching_resampled, - nan_indices, + if np.any(np.isfinite(self.__state.precip_forecast_recomposed)): + self.__state.precip_forecast_recomposed = ( + probmatching.nonparam_match_empirical_cdf( + self.__state.precip_forecast_recomposed, + precip_forecast_probability_matching_resampled, + nan_indices, + ) ) precip_forecast_probability_matching_resampled = None - elif probmatching_method == "mean": + elif self.__config.probmatching_method == "mean": # Use R_pm_blended as benchmark field and mean_probabiltity_matching_forecast = np.mean( precip_forecast_probability_matching_resampled[ - precip_forecast_probability_matching_resampled >= precip_thr + precip_forecast_probability_matching_resampled + >= self.__config.precip_threshold ] ) - no_rain_mask = precip_forecast_recomposed >= precip_thr - mean_precip_forecast = np.mean(precip_forecast_recomposed[no_rain_mask]) - precip_forecast_recomposed[no_rain_mask] = ( - precip_forecast_recomposed[no_rain_mask] + no_rain_mask = ( + self.__state.precip_forecast_recomposed + >= self.__config.precip_threshold + ) + mean_precip_forecast = np.mean( + self.__state.precip_forecast_recomposed[no_rain_mask] + ) + self.__state.precip_forecast_recomposed[no_rain_mask] = ( + self.__state.precip_forecast_recomposed[no_rain_mask] - mean_precip_forecast + mean_probabiltity_matching_forecast ) precip_forecast_probability_matching_resampled = None - final_blended_forecast_single_member.append(precip_forecast_recomposed) + self.__state.final_blended_forecast_single_member.append( + self.__state.precip_forecast_recomposed + ) def __measure_time(self, label, start_time): """ diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index 6efd97586..b61ee8e7f 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -19,7 +19,6 @@ from pysteps import extrapolation from pysteps import noise from pysteps import utils -from pysteps.decorators import deprecate_args from pysteps.nowcasts import utils as nowcast_utils from pysteps.postprocessing import probmatching from pysteps.timeseries import autoregression, correlation From 760c1850474c983b7d206585a479170461dc43a7 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Thu, 5 Dec 2024 10:46:27 +0100 Subject: [PATCH 42/65] Old forecast function is updated to fit newly refactored code --- pysteps/blending/steps.py | 1562 ++----------------------------------- 1 file changed, 55 insertions(+), 1507 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index e866d45c9..0e229a432 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -96,10 +96,10 @@ class StepsBlendingConfig: fft_method: str domain: str outdir_path_skill: str - extrap_kwargs: Dict[str, Any] = field(default_factory=dict) + extrapolation_kwargs: Dict[str, Any] = field(default_factory=dict) filter_kwargs: Dict[str, Any] = field(default_factory=dict) noise_kwargs: Dict[str, Any] = field(default_factory=dict) - vel_pert_kwargs: Dict[str, Any] = field(default_factory=dict) + velocity_perturbation_kwargs: Dict[str, Any] = field(default_factory=dict) clim_kwargs: Dict[str, Any] = field(default_factory=dict) mask_kwargs: Dict[str, Any] = field(default_factory=dict) measure_time: bool = False @@ -313,7 +313,7 @@ def __blended_nowcast_main(self): starttime_mainloop = time.time() # TODO: problem with the config here! This variable changes over time... # extrap_kwargs is in config but by adding info to it, the next run of a blended forecast will have issues! - self.__config.extrap_kwargs["return_displacement"] = True + self.__config.extrapolation_kwargs["return_displacement"] = True self.__state.precip_forecast_prev_subtimestep = deepcopy( self.__state.precip_cascades @@ -465,8 +465,8 @@ def __check_inputs(self): "or a four-dimensional array containing the original (NWP) model forecasts" ) - if self.__config.extrap_kwargs is None: - self.__config.extrap_kwargs = dict() + if self.__config.extrapolation_kwargs is None: + self.__config.extrapolation_kwargs = dict() if self.__config.filter_kwargs is None: self.__config.filter_kwargs = dict() @@ -474,8 +474,8 @@ def __check_inputs(self): if self.__config.noise_kwargs is None: self.__config.noise_kwargs = dict() - if self.__config.vel_pert_kwargs is None: - self.__config.vel_pert_kwargs = dict() + if self.__config.velocity_perturbation_kwargs is None: + self.__config.velocity_perturbation_kwargs = dict() if not self.__params.precip_models_provided_is_cascade: if self.__config.clim_kwargs is None: @@ -587,12 +587,12 @@ def __print_forecast_info(self): print(f"order of the AR(p) model: {self.__config.ar_order}") if self.__config.velocity_perturbation_method == "bps": self.__params.velocity_perturbations_parallel = ( - self.__config.vel_pert_kwargs.get( + self.__config.velocity_perturbation_kwargs.get( "p_par", noise.motion.get_default_params_bps_par() ) ) self.__params.velocity_perturbations_perpendicular = ( - self.__config.vel_pert_kwargs.get( + self.__config.velocity_perturbation_kwargs.get( "p_perp", noise.motion.get_default_params_bps_perp() ) ) @@ -685,7 +685,7 @@ def __prepare_radar_and_NWP_fields(self): self.__config.ar_order, self.__params.xy_coordinates, self.__params.extrapolation_method, - self.__config.extrap_kwargs, + self.__config.extrapolation_kwargs, self.__config.num_workers, ) @@ -1577,9 +1577,9 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( # (or subtimesteps if non-integer time steps are given) # Settings and initialize the output - extrap_kwargs_ = self.__config.extrap_kwargs.copy() - extrap_kwargs_noise = self.__config.extrap_kwargs.copy() - extrap_kwargs_pb = self.__config.extrap_kwargs.copy() + extrap_kwargs_ = self.__config.extrapolation_kwargs.copy() + extrap_kwargs_noise = self.__config.extrapolation_kwargs.copy() + extrap_kwargs_pb = self.__config.extrapolation_kwargs.copy() velocity_perturbations_extrapolation = self.__velocity # The following should be accesseble after this function self.__state.precip_forecast_extrapolated_decomp_done = [] @@ -1692,7 +1692,7 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( precip_forecast_recomp_subtimestep[self.__params.domain_mask] = np.nan # TODO: problem with the config here! This variable changes over time... # extrap_kwargs is in config but by adding info to it, the next run of a blended forecast will have issues! - self.__config.extrap_kwargs["displacement_prev"] = ( + self.__config.extrapolation_kwargs["displacement_prev"] = ( self.__state.previous_displacement[j] ) ( @@ -1703,7 +1703,7 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( velocity_blended, [t_diff_prev_subtimestep], allow_nonfinite_values=True, - **self.__config.extrap_kwargs, + **self.__config.extrapolation_kwargs, ) precip_forecast_extrapolated_recomp_subtimestep = ( precip_forecast_extrapolated_recomp_subtimestep_temp[0].copy() @@ -2757,1503 +2757,51 @@ def forecast( turns out to be a warranted functionality. """ - # 0.1 Start with some checks - _check_inputs(precip, precip_models, velocity, velocity_models, timesteps, ar_order) - - if extrap_kwargs is None: - extrap_kwargs = dict() - - if filter_kwargs is None: - filter_kwargs = dict() - - if noise_kwargs is None: - noise_kwargs = dict() - - if vel_pert_kwargs is None: - vel_pert_kwargs = dict() - - if clim_kwargs is None: - # Make sure clim_kwargs at least contains the number of models - clim_kwargs = dict({"n_models": precip_models.shape[0]}) - - if mask_kwargs is None: - mask_kwargs = dict() - - if np.any(~np.isfinite(velocity)): - raise ValueError("velocity contains non-finite values") - - if mask_method not in ["obs", "incremental", None]: - raise ValueError( - "unknown mask method %s: must be 'obs', 'incremental' or None" % mask_method - ) - - if conditional and precip_thr is None: - raise ValueError("conditional=True but precip_thr is not set") - - if mask_method is not None and precip_thr is None: - raise ValueError("mask_method!=None but precip_thr=None") - - if noise_stddev_adj not in ["auto", "fixed", None]: - raise ValueError( - "unknown noise_std_dev_adj method %s: must be 'auto', 'fixed', or None" - % noise_stddev_adj - ) - - if kmperpixel is None: - if vel_pert_method is not None: - raise ValueError("vel_pert_method is set but kmperpixel=None") - if mask_method == "incremental": - raise ValueError("mask_method='incremental' but kmperpixel=None") - - if timestep is None: - if vel_pert_method is not None: - raise ValueError("vel_pert_method is set but timestep=None") - if mask_method == "incremental": - raise ValueError("mask_method='incremental' but timestep=None") - - # 0.2 Log some settings - print("STEPS blending") - print("==============") - print("") - - print("Inputs") - print("------") - print(f"forecast issue time: {issuetime.isoformat()}") - print(f"input dimensions: {precip.shape[1]}x{precip.shape[2]}") - if kmperpixel is not None: - print(f"km/pixel: {kmperpixel}") - if timestep is not None: - print(f"time step: {timestep} minutes") - print("") - - print("NWP and blending inputs") - print("-----------------------") - print(f"number of (NWP) models: {precip_models.shape[0]}") - print(f"blend (NWP) model members: {blend_nwp_members}") - print(f"decompose (NWP) models: {'yes' if precip_models.ndim == 4 else 'no'}") - print("") - - print("Methods") - print("-------") - print(f"extrapolation: {extrap_method}") - print(f"bandpass filter: {bandpass_filter_method}") - print(f"decomposition: {decomp_method}") - print(f"noise generator: {noise_method}") - print(f"noise adjustment: {'yes' if noise_stddev_adj else 'no'}") - print(f"velocity perturbator: {vel_pert_method}") - print(f"blending weights method: {weights_method}") - print(f"conditional statistics: {'yes' if conditional else 'no'}") - print(f"precip. mask method: {mask_method}") - print(f"probability matching: {probmatching_method}") - print(f"FFT method: {fft_method}") - print(f"domain: {domain}") - print("") - - print("Parameters") - print("----------") - if isinstance(timesteps, int): - print(f"number of time steps: {timesteps}") - else: - print(f"time steps: {timesteps}") - print(f"ensemble size: {n_ens_members}") - print(f"parallel threads: {num_workers}") - print(f"number of cascade levels: {n_cascade_levels}") - print(f"order of the AR(p) model: {ar_order}") - if vel_pert_method == "bps": - vp_par = vel_pert_kwargs.get("p_par", noise.motion.get_default_params_bps_par()) - vp_perp = vel_pert_kwargs.get( - "p_perp", noise.motion.get_default_params_bps_perp() - ) - print(f"vel. pert., parallel: {vp_par[0]},{vp_par[1]},{vp_par[2]}") - print(f"vel. pert., perpendicular: {vp_perp[0]},{vp_perp[1]},{vp_perp[2]}") - else: - vp_par, vp_perp = None, None - - if conditional or mask_method is not None: - print(f"precip. intensity threshold: {precip_thr}") - print(f"no-rain fraction threshold for radar: {norain_thr}") - print("") - - # 0.3 Get the methods that will be used - num_ensemble_workers = n_ens_members if num_workers > n_ens_members else num_workers - - if measure_time: - starttime_init = time.time() - - fft = utils.get_method(fft_method, shape=precip.shape[1:], n_threads=num_workers) - - precip_shape = precip.shape[1:] - - # initialize the band-pass filter - filter_method = cascade.get_method(bandpass_filter_method) - bp_filter = filter_method(precip_shape, n_cascade_levels, **filter_kwargs) - - decompositor, recompositor = cascade.get_method(decomp_method) - - extrapolator = extrapolation.get_method(extrap_method) - - x_values, y_values = np.meshgrid( - np.arange(precip.shape[2]), np.arange(precip.shape[1]) - ) - - xy_coords = np.stack([x_values, y_values]) - - precip = precip[-(ar_order + 1) :, :, :].copy() - - # determine the domain mask from non-finite values - domain_mask = np.logical_or.reduce( - [~np.isfinite(precip[i, :]) for i in range(precip.shape[0])] - ) - - # determine the precipitation threshold mask - if conditional: - MASK_thr = np.logical_and.reduce( - [precip[i, :, :] >= precip_thr for i in range(precip.shape[0])] - ) - else: - MASK_thr = None - - # we need to know the zerovalue of precip to replace the mask when decomposing after extrapolation - zerovalue = np.nanmin(precip) - - # 1. Start with the radar rainfall fields. We want the fields in a - # Lagrangian space - precip = _transform_to_lagrangian( - precip, velocity, ar_order, xy_coords, extrapolator, extrap_kwargs, num_workers + blending_config = StepsBlendingConfig( + n_ens_members=n_ens_members, + n_cascade_levels=n_cascade_levels, + precip_threshold=precip_thr, + kmperpixel=kmperpixel, + timestep=timestep, + extrapolation_method=extrap_method, + decomposition_method=decomp_method, + bandpass_filter_method=bandpass_filter_method, + noise_method=noise_method, + noise_stddev_adj=noise_stddev_adj, + ar_order=ar_order, + velocity_perturbation_method=vel_pert_method, + conditional=conditional, + probmatching_method=probmatching_method, + mask_method=mask_method, + seed=seed, + num_workers=num_workers, + fft_method=fft_method, + domain=domain, + extrapolation_kwargs=extrap_kwargs, + filter_kwargs=filter_kwargs, + noise_kwargs=noise_kwargs, + velocity_perturbation_kwargs=vel_pert_kwargs, + mask_kwargs=mask_kwargs, + measure_time=measure_time, + callback=callback, + return_output=return_output, ) - # 2. Perform the cascade decomposition for the input precip fields and - # and, if necessary, for the (NWP) model fields - # 2.1 Compute the cascade decompositions of the input precipitation fields - ( - precip_cascade, - mu_extrapolation, - sigma_extrapolation, - ) = _compute_cascade_decomposition_radar( + # Create an instance of the new class with all the provided arguments + blended_nowcaster = StepsBlendingNowcaster( precip, - ar_order, - n_cascade_levels, - n_ens_members, - MASK_thr, - domain, - bp_filter, - decompositor, - fft, - ) - - # 2.2 If necessary, recompose (NWP) model forecasts - precip_models_cascade = None - - if precip_models.ndim != 4: - precip_models_cascade = precip_models - precip_models = _compute_cascade_recomposition_nwp(precip_models, recompositor) - - # 2.3 Check for zero input fields in the radar and NWP data. - zero_precip_radar = blending.utils.check_norain(precip, precip_thr, norain_thr) - # The norain fraction threshold used for nwp is the default value of 0.0, - # since nwp does not suffer from clutter. - zero_model_fields = blending.utils.check_norain( - precip_models, precip_thr, norain_thr + precip_models, + velocity, + velocity_models, + timesteps, + issuetime, + blending_config, ) - if isinstance(timesteps, int): - timesteps = list(range(timesteps + 1)) - timestep_type = "int" - else: - original_timesteps = [0] + list(timesteps) - timesteps = nowcast_utils.binned_timesteps(original_timesteps) - timestep_type = "list" - - # 2.3.1 If precip is below the norain threshold and precip_models is zero, - # we consider it as no rain in the domain. - # The forecast will directly return an array filled with the minimum - # value present in precip (which equals zero rainfall in the used - # transformation) - if zero_precip_radar and zero_model_fields: - print( - "No precipitation above the threshold found in both the radar and NWP fields" - ) - print("The resulting forecast will contain only zeros") - # Create the output list - precip_forecast = [[] for j in range(n_ens_members)] - - # Save per time step to ensure the array does not become too large if - # no return_output is requested and callback is not None. - for t, subtimestep_idx in enumerate(timesteps): - # If the timestep is not the first one, we need to provide the zero forecast - if t > 0: - # Create an empty np array with shape [n_ens_members, rows, cols] - # and fill it with the minimum value from precip (corresponding to - # zero precipitation) - precip_forecast_workers = np.full( - (n_ens_members, precip_shape[0], precip_shape[1]), np.nanmin(precip) - ) - if subtimestep_idx: - if callback is not None: - if precip_forecast_workers.shape[1] > 0: - callback(precip_forecast_workers.squeeze()) - if return_output: - for j in range(n_ens_members): - precip_forecast[j].append(precip_forecast_workers[j]) - - precip_forecast_workers = None - - if measure_time: - zero_precip_time = time.time() - starttime_init - - if return_output: - precip_forecast_all_members_all_times = np.stack( - [np.stack(precip_forecast[j]) for j in range(n_ens_members)] - ) - if measure_time: - return ( - precip_forecast_all_members_all_times, - zero_precip_time, - zero_precip_time, - ) - else: - return precip_forecast_all_members_all_times - else: - return None - - else: - # 2.3.3 If zero_precip_radar, make sure that precip_cascade does not contain - # only nans or infs. If so, fill it with the zero value. - if zero_precip_radar: - # Look for a timestep and member with rain so that we have a sensible decomposition - done = False - for t in timesteps: - if done: - break - for j in range(precip_models.shape[0]): - if not blending.utils.check_norain( - precip_models[j, t], precip_thr, norain_thr - ): - if precip_models_cascade is not None: - precip_cascade[~np.isfinite(precip_cascade)] = np.nanmin( - precip_models_cascade[j, t]["cascade_levels"] - ) - continue - precip_models_cascade_temp = decompositor( - precip_models[j, t, :, :], - bp_filter=bp_filter, - fft_method=fft, - output_domain=domain, - normalize=True, - compute_stats=True, - compact_output=True, - )["cascade_levels"] - precip_cascade[~np.isfinite(precip_cascade)] = np.nanmin( - precip_models_cascade_temp - ) - done = True - break - - # 2.3.5 If zero_precip_radar is True, only use the velocity field of the NWP - # forecast. I.e., velocity (radar) equals velocity_model at the first time - # step. - if zero_precip_radar: - # Use the velocity from velocity_models at time step 0 - velocity = velocity_models[:, 0, :, :, :].astype(np.float64, copy=False) - # Take the average over the first axis, which corresponds to n_models - # (hence, the model average) - velocity = np.mean(velocity, axis=0) - - # 3. Initialize the noise method. - # If zero_precip_radar is True, initialize noise based on the NWP field time - # step where the fraction of rainy cells is highest (because other lead times - # might be zero as well). Else, initialize the noise with the radar - # rainfall data - if zero_precip_radar: - precip_noise_input = _determine_max_nr_rainy_cells_nwp( - precip_models, precip_thr, precip_models.shape[0], timesteps - ) - # Make sure precip_noise_input is three dimensional - if len(precip_noise_input.shape) != 3: - precip_noise_input = precip_noise_input[np.newaxis, :, :] - else: - precip_noise_input = precip.copy() - - generate_perturb, generate_noise, noise_std_coeffs = _init_noise( - precip_noise_input, - precip_thr, - n_cascade_levels, - bp_filter, - decompositor, - fft, - noise_method, - noise_kwargs, - noise_stddev_adj, - measure_time, - num_workers, - seed, - ) - precip_noise_input = None - - # 4. Estimate AR parameters for the radar rainfall field - PHI = _estimate_ar_parameters_radar( - precip_cascade, - ar_order, - n_cascade_levels, - MASK_thr, - zero_precip_radar, - ) - - # 5. Repeat precip_cascade for n ensemble members - # First, discard all except the p-1 last cascades because they are not needed - # for the AR(p) model - - precip_cascade = np.stack( - [[precip_cascade[i][-ar_order:].copy() for i in range(n_cascade_levels)]] - * n_ens_members - ) - - # 6. Initialize all the random generators and prepare for the forecast loop - ( - randgen_precip, - velocity_perturbations, - generate_vel_noise, - ) = _init_random_generators( - velocity, - noise_method, - vel_pert_method, - vp_par, - vp_perp, - seed, - n_ens_members, - kmperpixel, - timestep, - ) - ( - previous_displacement, - previous_displacement_noise_cascade, - previous_displacement_prob_matching, - precip_forecast, - precip_forecast_non_perturbed, - mask_rim, - struct, - fft_objs, - ) = _prepare_forecast_loop( - precip_cascade, - noise_method, - fft_method, - n_cascade_levels, - n_ens_members, - mask_method, - mask_kwargs, - timestep, - kmperpixel, - ) - - # Also initialize the cascade of temporally correlated noise, which has the - # same shape as precip_cascade, but starts random noise. - noise_cascade, mu_noise, sigma_noise = _init_noise_cascade( - shape=precip_cascade.shape, - n_ens_members=n_ens_members, - n_cascade_levels=n_cascade_levels, - generate_noise=generate_noise, - decompositor=decompositor, - generate_perturb=generate_perturb, - randgen_precip=randgen_precip, - fft_objs=fft_objs, - bp_filter=bp_filter, - domain=domain, - noise_method=noise_method, - noise_std_coeffs=noise_std_coeffs, - ar_order=ar_order, - ) - - precip = precip[-1, :, :] - - # 7. initizalize the current and previous extrapolation forecast scale - # for the nowcasting component - rho_extrap_cascade_prev = np.repeat(1.0, PHI.shape[0]) - rho_extrap_cascade = PHI[:, 0] / ( - 1.0 - PHI[:, 1] - ) # phi1 / (1 - phi2), see BPS2004 - - if measure_time: - init_time = time.time() - starttime_init - - ### - # 8. Start the forecasting loop - ### - print("Starting blended nowcast computation.") - - if measure_time: - starttime_mainloop = time.time() - - extrap_kwargs["return_displacement"] = True - - precip_forc_prev_subtimestep = deepcopy(precip_cascade) - noise_prev_subtimestep = deepcopy(noise_cascade) - - t_prev_timestep = [0.0 for j in range(n_ens_members)] - t_leadtime_since_start_forecast = [0.0 for j in range(n_ens_members)] - - # iterate each time step - for t, subtimestep_idx in enumerate(timesteps): - if timestep_type == "list": - subtimesteps = [original_timesteps[t_] for t_ in subtimestep_idx] - else: - subtimesteps = [t] - - if (timestep_type == "list" and subtimesteps) or ( - timestep_type == "int" and t > 0 - ): - is_nowcast_time_step = True - else: - is_nowcast_time_step = False - - if is_nowcast_time_step: - print( - f"Computing nowcast for time step {t}... ", - end="", - flush=True, - ) - - if measure_time: - starttime = time.time() - - if precip_models_cascade is not None: - decomp_precip_models = list(precip_models_cascade[:, t]) - else: - if precip_models.shape[0] == 1: - decomp_precip_models = [ - decompositor( - precip_models[0, t, :, :], - bp_filter=bp_filter, - fft_method=fft, - output_domain=domain, - normalize=True, - compute_stats=True, - compact_output=True, - ) - ] - else: - with ThreadPool(num_workers) as pool: - decomp_precip_models = pool.map( - partial( - decompositor, - bp_filter=bp_filter, - fft_method=fft, - output_domain=domain, - normalize=True, - compute_stats=True, - compact_output=True, - ), - list(precip_models[:, t, :, :]), - ) - - precip_models_cascade_temp = np.array( - [decomp["cascade_levels"] for decomp in decomp_precip_models] - ) - mu_models_temp = np.array( - [decomp["means"] for decomp in decomp_precip_models] - ) - sigma_models_temp = np.array( - [decomp["stds"] for decomp in decomp_precip_models] - ) - - # 2.3.4 Check if the NWP fields contain nans or infinite numbers. If so, - # fill these with the minimum value present in precip (corresponding to - # zero rainfall in the radar observations) - ( - precip_models_cascade_temp, - precip_models_temp, - mu_models_temp, - sigma_models_temp, - ) = _fill_nans_infs_nwp_cascade( - precip_models_cascade_temp, - precip_models[:, t, :, :].astype(np.float64, copy=False), - precip_cascade, - precip, - mu_models_temp, - sigma_models_temp, - ) - - # 8.1.1 Before calling the worker for the forecast loop, determine which (NWP) - # models will be combined with which nowcast ensemble members. With the - # way it is implemented at this moment: n_ens_members of the output equals - # the maximum number of (ensemble) members in the input (either the nowcasts or NWP). - ( - precip_models_cascade_temp, - precip_models_temp, - velocity_models_temp, - mu_models_temp, - sigma_models_temp, - n_model_indices, - ) = _find_nwp_combination( - precip_models_cascade_temp, - precip_models_temp, - velocity_models[:, t, :, :, :].astype(np.float64, copy=False), - mu_models_temp, - sigma_models_temp, - n_ens_members, - ar_order, - n_cascade_levels, - blend_nwp_members, - ) - - # If zero_precip_radar is True, set the velocity field equal to the NWP - # velocity field for the current time step (velocity_models_temp). - if zero_precip_radar: - # Use the velocity from velocity_models and take the average over - # n_models (axis=0) - velocity = np.mean(velocity_models_temp, axis=0) - - if t == 0: - # 8.1.2 Calculate the initial skill of the (NWP) model forecasts at t=0 - rho_nwp_models = _compute_initial_nwp_skill( - precip_cascade, - precip_models_cascade_temp, - domain_mask, - issuetime, - outdir_path_skill, - clim_kwargs, - ) - - if t > 0: - # 8.1.3 Determine the skill of the components for lead time (t0 + t) - # First for the extrapolation component. Only calculate it when t > 0. - ( - rho_extrap_cascade, - rho_extrap_cascade_prev, - ) = blending.skill_scores.lt_dependent_cor_extrapolation( - PHI=PHI, - correlations=rho_extrap_cascade, - correlations_prev=rho_extrap_cascade_prev, - ) - - # the nowcast iteration for each ensemble member - precip_forecast_workers = [None for _ in range(n_ens_members)] - - def worker(j): - # 8.1.2 Determine the skill of the nwp components for lead time (t0 + t) - # Then for the model components - if blend_nwp_members: - rho_nwp_fc = [ - blending.skill_scores.lt_dependent_cor_nwp( - lt=(t * int(timestep)), - correlations=rho_nwp_models[n_model], - outdir_path=outdir_path_skill, - n_model=n_model, - skill_kwargs=clim_kwargs, - ) - for n_model in range(rho_nwp_models.shape[0]) - ] - rho_nwp_fc = np.stack(rho_nwp_fc) - # Concatenate rho_extrap_cascade and rho_nwp - rho_fc = np.concatenate( - (rho_extrap_cascade[None, :], rho_nwp_fc), axis=0 - ) - else: - rho_nwp_fc = blending.skill_scores.lt_dependent_cor_nwp( - lt=(t * int(timestep)), - correlations=rho_nwp_models[j], - outdir_path=outdir_path_skill, - n_model=n_model_indices[j], - skill_kwargs=clim_kwargs, - ) - # Concatenate rho_extrap_cascade and rho_nwp - rho_fc = np.concatenate( - (rho_extrap_cascade[None, :], rho_nwp_fc[None, :]), axis=0 - ) - - # 8.2 Determine the weights per component - - # Weights following the bps method. These are needed for the velocity - # weights prior to the advection step. If weights method spn is - # selected, weights will be overwritten with those weights prior to - # blending step. - # weight = [(extr_field, n_model_fields, noise), n_cascade_levels, ...] - weights = calculate_weights_bps(rho_fc) - - # The model only weights - if weights_method == "bps": - # Determine the weights of the components without the extrapolation - # cascade, in case this is no data or outside the mask. - weights_model_only = calculate_weights_bps(rho_fc[1:, :]) - elif weights_method == "spn": - # Only the weights of the components without the extrapolation - # cascade will be determined here. The full set of weights are - # determined after the extrapolation step in this method. - if blend_nwp_members and precip_models_cascade_temp.shape[0] > 1: - weights_model_only = np.zeros( - (precip_models_cascade_temp.shape[0] + 1, n_cascade_levels) - ) - for i in range(n_cascade_levels): - # Determine the normalized covariance matrix (containing) - # the cross-correlations between the models - covariance_nwp_models = np.corrcoef( - np.stack( - [ - precip_models_cascade_temp[ - n_model, i, :, : - ].flatten() - for n_model in range( - precip_models_cascade_temp.shape[0] - ) - ] - ) - ) - # Determine the weights for this cascade level - weights_model_only[:, i] = calculate_weights_spn( - correlations=rho_fc[1:, i], - covariance=covariance_nwp_models, - ) - else: - # Same as correlation and noise is 1 - correlation - weights_model_only = calculate_weights_bps(rho_fc[1:, :]) - else: - raise ValueError( - "Unknown weights method %s: must be 'bps' or 'spn'" - % weights_method - ) - - # 8.3 Determine the noise cascade and regress this to the subsequent - # time step + regress the extrapolation component to the subsequent - # time step - - # 8.3.1 Determine the epsilon, a cascade of temporally independent - # but spatially correlated noise - if noise_method is not None: - # generate noise field - epsilon = generate_noise( - generate_perturb, - randstate=randgen_precip[j], - fft_method=fft_objs[j], - domain=domain, - ) - - # decompose the noise field into a cascade - epsilon_decomposed = decompositor( - epsilon, - bp_filter, - fft_method=fft_objs[j], - input_domain=domain, - output_domain=domain, - compute_stats=True, - normalize=True, - compact_output=True, - ) - else: - epsilon_decomposed = None - - # 8.3.2 regress the extrapolation component to the subsequent time - # step - # iterate the AR(p) model for each cascade level - for i in range(n_cascade_levels): - # apply AR(p) process to extrapolation cascade level - if epsilon_decomposed is not None or vel_pert_method is not None: - precip_cascade[j][i] = autoregression.iterate_ar_model( - precip_cascade[j][i], PHI[i, :] - ) - # Renormalize the cascade - precip_cascade[j][i][1] /= np.std(precip_cascade[j][i][1]) - else: - # use the deterministic AR(p) model computed above if - # perturbations are disabled - precip_cascade[j][i] = precip_forecast_non_perturbed[i] - - # 8.3.3 regress the noise component to the subsequent time step - # iterate the AR(p) model for each cascade level - for i in range(n_cascade_levels): - # normalize the noise cascade - if epsilon_decomposed is not None: - epsilon_temp = epsilon_decomposed["cascade_levels"][i] - epsilon_temp *= noise_std_coeffs[i] - else: - epsilon_temp = None - # apply AR(p) process to noise cascade level - # (Returns zero noise if epsilon_decomposed is None) - noise_cascade[j][i] = autoregression.iterate_ar_model( - noise_cascade[j][i], PHI[i, :], eps=epsilon_temp - ) - - epsilon_decomposed = None - epsilon_temp = None - - # 8.4 Perturb and blend the advection fields + advect the - # extrapolation and noise cascade to the current time step - # (or subtimesteps if non-integer time steps are given) - - # Settings and initialize the output - extrap_kwargs_ = extrap_kwargs.copy() - extrap_kwargs_noise = extrap_kwargs.copy() - extrap_kwargs_pb = extrap_kwargs.copy() - velocity_perturbations_extrapolation = velocity - precip_forecast_extrapolated_decomp_done = [] - noise_extrapolated_decomp_done = [] - precip_forecast_extrapolated_probability_matching = [] - - # Extrapolate per sub time step - for t_sub in subtimesteps: - if t_sub > 0: - t_diff_prev_subtimestep_int = t_sub - int(t_sub) - if t_diff_prev_subtimestep_int > 0.0: - precip_forecast_cascade_subtimestep = [ - (1.0 - t_diff_prev_subtimestep_int) - * precip_forc_prev_subtimestep[j][i][-1, :] - + t_diff_prev_subtimestep_int - * precip_cascade[j][i][-1, :] - for i in range(n_cascade_levels) - ] - noise_cascade_subtimestep = [ - (1.0 - t_diff_prev_subtimestep_int) - * noise_prev_subtimestep[j][i][-1, :] - + t_diff_prev_subtimestep_int - * noise_cascade[j][i][-1, :] - for i in range(n_cascade_levels) - ] - - else: - precip_forecast_cascade_subtimestep = [ - precip_forc_prev_subtimestep[j][i][-1, :] - for i in range(n_cascade_levels) - ] - noise_cascade_subtimestep = [ - noise_prev_subtimestep[j][i][-1, :] - for i in range(n_cascade_levels) - ] - - precip_forecast_cascade_subtimestep = np.stack( - precip_forecast_cascade_subtimestep - ) - noise_cascade_subtimestep = np.stack(noise_cascade_subtimestep) - - t_diff_prev_subtimestep = t_sub - t_prev_timestep[j] - t_leadtime_since_start_forecast[j] += t_diff_prev_subtimestep - - # compute the perturbed motion field - include the NWP - # velocities and the weights. Note that we only perturb - # the extrapolation velocity field, as the NWP velocity - # field is present per time step - if vel_pert_method is not None: - velocity_perturbations_extrapolation = ( - velocity - + generate_vel_noise( - velocity_perturbations[j], - t_leadtime_since_start_forecast[j] * timestep, - ) - ) - - # Stack the perturbed extrapolation and the NWP velocities - if blend_nwp_members: - velocity_stack_all = np.concatenate( - ( - velocity_perturbations_extrapolation[None, :, :, :], - velocity_models_temp, - ), - axis=0, - ) - else: - velocity_models = velocity_models_temp[j] - velocity_stack_all = np.concatenate( - ( - velocity_perturbations_extrapolation[None, :, :, :], - velocity_models[None, :, :, :], - ), - axis=0, - ) - velocity_models = None - - # Obtain a blended optical flow, using the weights of the - # second cascade following eq. 24 in BPS2006 - velocity_blended = blending.utils.blend_optical_flows( - flows=velocity_stack_all, - weights=weights[ - :-1, 1 - ], # [(extr_field, n_model_fields), cascade_level=2] - ) - - # Extrapolate both cascades to the next time step - # First recompose the cascade, advect it and decompose it again - # This is needed to remove the interpolation artifacts. - # In addition, the number of extrapolations is greatly reduced - # A. Radar Rain - precip_forecast_recomp_subtimestep = ( - blending.utils.recompose_cascade( - combined_cascade=precip_forecast_cascade_subtimestep, - combined_mean=mu_extrapolation, - combined_sigma=sigma_extrapolation, - ) - ) - # Make sure we have values outside the mask - if zero_precip_radar: - precip_forecast_recomp_subtimestep = np.nan_to_num( - precip_forecast_recomp_subtimestep, - copy=True, - nan=zerovalue, - posinf=zerovalue, - neginf=zerovalue, - ) - # Put back the mask - precip_forecast_recomp_subtimestep[domain_mask] = np.nan - extrap_kwargs["displacement_prev"] = previous_displacement[j] - ( - precip_forecast_extrapolated_recomp_subtimestep_temp, - previous_displacement[j], - ) = extrapolator( - precip_forecast_recomp_subtimestep, - velocity_blended, - [t_diff_prev_subtimestep], - allow_nonfinite_values=True, - **extrap_kwargs, - ) - precip_forecast_extrapolated_recomp_subtimestep = ( - precip_forecast_extrapolated_recomp_subtimestep_temp[ - 0 - ].copy() - ) - temp_mask = ~np.isfinite( - precip_forecast_extrapolated_recomp_subtimestep - ) - # TODO WHERE DO CAN I FIND THIS -15.0 - precip_forecast_extrapolated_recomp_subtimestep[ - ~np.isfinite( - precip_forecast_extrapolated_recomp_subtimestep - ) - ] = zerovalue - precip_forecast_extrapolated_decomp = decompositor( - precip_forecast_extrapolated_recomp_subtimestep, - bp_filter, - mask=MASK_thr, - fft_method=fft, - output_domain=domain, - normalize=True, - compute_stats=True, - compact_output=True, - )["cascade_levels"] - # Make sure we have values outside the mask - if zero_precip_radar: - precip_forecast_extrapolated_decomp = np.nan_to_num( - precip_forecast_extrapolated_decomp, - copy=True, - nan=np.nanmin(precip_forecast_cascade_subtimestep), - posinf=np.nanmin(precip_forecast_cascade_subtimestep), - neginf=np.nanmin(precip_forecast_cascade_subtimestep), - ) - for i in range(n_cascade_levels): - precip_forecast_extrapolated_decomp[i][temp_mask] = np.nan - # B. Noise - noise_cascade_subtimestep_recomp = ( - blending.utils.recompose_cascade( - combined_cascade=noise_cascade_subtimestep, - combined_mean=mu_noise[j], - combined_sigma=sigma_noise[j], - ) - ) - extrap_kwargs_noise["displacement_prev"] = ( - previous_displacement_noise_cascade[j] - ) - extrap_kwargs_noise["map_coordinates_mode"] = "wrap" - ( - noise_extrapolated_recomp_temp, - previous_displacement_noise_cascade[j], - ) = extrapolator( - noise_cascade_subtimestep_recomp, - velocity_blended, - [t_diff_prev_subtimestep], - allow_nonfinite_values=True, - **extrap_kwargs_noise, - ) - noise_extrapolated_recomp = noise_extrapolated_recomp_temp[ - 0 - ].copy() - noise_extrapolated_decomp = decompositor( - noise_extrapolated_recomp, - bp_filter, - mask=MASK_thr, - fft_method=fft, - output_domain=domain, - normalize=True, - compute_stats=True, - compact_output=True, - )["cascade_levels"] - for i in range(n_cascade_levels): - noise_extrapolated_decomp[i] *= noise_std_coeffs[i] - - # Append the results to the output lists - precip_forecast_extrapolated_decomp_done.append( - precip_forecast_extrapolated_decomp.copy() - ) - noise_extrapolated_decomp_done.append( - noise_extrapolated_decomp.copy() - ) - precip_forecast_cascade_subtimestep = None - precip_forecast_recomp_subtimestep = None - precip_forecast_extrapolated_recomp_subtimestep_temp = None - precip_forecast_extrapolated_recomp_subtimestep = None - precip_forecast_extrapolated_decomp = None - noise_cascade_subtimestep = None - noise_cascade_subtimestep_recomp = None - noise_extrapolated_recomp_temp = None - noise_extrapolated_recomp = None - noise_extrapolated_decomp = None - - # Finally, also extrapolate the initial radar rainfall - # field. This will be blended with the rainfall field(s) - # of the (NWP) model(s) for Lagrangian blended prob. matching - # min_R = np.min(precip) - extrap_kwargs_pb["displacement_prev"] = ( - previous_displacement_prob_matching[j] - ) - # Apply the domain mask to the extrapolation component - precip_forecast_temp_for_probability_matching = precip.copy() - precip_forecast_temp_for_probability_matching[domain_mask] = ( - np.nan - ) - ( - precip_forecast_extrapolated_probability_matching_temp, - previous_displacement_prob_matching[j], - ) = extrapolator( - precip_forecast_temp_for_probability_matching, - velocity_blended, - [t_diff_prev_subtimestep], - allow_nonfinite_values=True, - **extrap_kwargs_pb, - ) - precip_forecast_extrapolated_probability_matching.append( - precip_forecast_extrapolated_probability_matching_temp[0] - ) - - t_prev_timestep[j] = t_sub - - if len(precip_forecast_extrapolated_decomp_done) > 0: - precip_forecast_extrapolated_decomp_done = np.stack( - precip_forecast_extrapolated_decomp_done - ) - noise_extrapolated_decomp_done = np.stack( - noise_extrapolated_decomp_done - ) - precip_forecast_extrapolated_probability_matching = np.stack( - precip_forecast_extrapolated_probability_matching - ) - - # advect the forecast field by one time step if no subtimesteps in the - # current interval were found - if not subtimesteps: - t_diff_prev_subtimestep = t + 1 - t_prev_timestep[j] - t_leadtime_since_start_forecast[j] += t_diff_prev_subtimestep - - # compute the perturbed motion field - include the NWP - # velocities and the weights - if vel_pert_method is not None: - velocity_perturbations_extrapolation = ( - velocity - + generate_vel_noise( - velocity_perturbations[j], - t_leadtime_since_start_forecast[j] * timestep, - ) - ) - - # Stack the perturbed extrapolation and the NWP velocities - if blend_nwp_members: - velocity_stack_all = np.concatenate( - ( - velocity_perturbations_extrapolation[None, :, :, :], - velocity_models_temp, - ), - axis=0, - ) - else: - velocity_models = velocity_models_temp[j] - velocity_stack_all = np.concatenate( - ( - velocity_perturbations_extrapolation[None, :, :, :], - velocity_models[None, :, :, :], - ), - axis=0, - ) - velocity_models = None - - # Obtain a blended optical flow, using the weights of the - # second cascade following eq. 24 in BPS2006 - velocity_blended = blending.utils.blend_optical_flows( - flows=velocity_stack_all, - weights=weights[ - :-1, 1 - ], # [(extr_field, n_model_fields), cascade_level=2] - ) - - # Extrapolate the extrapolation and noise cascade - - extrap_kwargs_["displacement_prev"] = previous_displacement[j] - extrap_kwargs_noise["displacement_prev"] = ( - previous_displacement_noise_cascade[j] - ) - extrap_kwargs_noise["map_coordinates_mode"] = "wrap" - - _, previous_displacement[j] = extrapolator( - None, - velocity_blended, - [t_diff_prev_subtimestep], - allow_nonfinite_values=True, - **extrap_kwargs_, - ) - - _, previous_displacement_noise_cascade[j] = extrapolator( - None, - velocity_blended, - [t_diff_prev_subtimestep], - allow_nonfinite_values=True, - **extrap_kwargs_noise, - ) - - # Also extrapolate the radar observation, used for the probability - # matching and post-processing steps - extrap_kwargs_pb["displacement_prev"] = ( - previous_displacement_prob_matching[j] - ) - _, previous_displacement_prob_matching[j] = extrapolator( - None, - velocity_blended, - [t_diff_prev_subtimestep], - allow_nonfinite_values=True, - **extrap_kwargs_pb, - ) - - t_prev_timestep[j] = t + 1 - - precip_forc_prev_subtimestep[j] = precip_cascade[j] - noise_prev_subtimestep[j] = noise_cascade[j] - - # 8.5 Blend the cascades - final_blended_forecast = [] - - for t_sub in subtimesteps: - # TODO: does it make sense to use sub time steps - check if it works? - if t_sub > 0: - t_index = np.where(np.array(subtimesteps) == t_sub)[0][0] - # First concatenate the cascades and the means and sigmas - # precip_models = [n_models,timesteps,n_cascade_levels,m,n] - if blend_nwp_members: - cascade_stack_all_components = np.concatenate( - ( - precip_forecast_extrapolated_decomp_done[ - None, t_index - ], - precip_models_cascade_temp, - noise_extrapolated_decomp_done[None, t_index], - ), - axis=0, - ) # [(extr_field, n_model_fields, noise), n_cascade_levels, ...] - means_stacked = np.concatenate( - (mu_extrapolation[None, :], mu_models_temp), axis=0 - ) - sigmas_stacked = np.concatenate( - (sigma_extrapolation[None, :], sigma_models_temp), - axis=0, - ) - else: - cascade_stack_all_components = np.concatenate( - ( - precip_forecast_extrapolated_decomp_done[ - None, t_index - ], - precip_models_cascade_temp[None, j], - noise_extrapolated_decomp_done[None, t_index], - ), - axis=0, - ) # [(extr_field, n_model_fields, noise), n_cascade_levels, ...] - means_stacked = np.concatenate( - (mu_extrapolation[None, :], mu_models_temp[None, j]), - axis=0, - ) - sigmas_stacked = np.concatenate( - ( - sigma_extrapolation[None, :], - sigma_models_temp[None, j], - ), - axis=0, - ) - - # First determine the blending weights if method is spn. The - # weights for method bps have already been determined. - if weights_method == "spn": - weights = np.zeros( - ( - cascade_stack_all_components.shape[0], - n_cascade_levels, - ) - ) - for i in range(n_cascade_levels): - # Determine the normalized covariance matrix (containing) - # the cross-correlations between the models - cascade_stack_all_components_temp = np.stack( - [ - cascade_stack_all_components[ - n_model, i, :, : - ].flatten() - for n_model in range( - cascade_stack_all_components.shape[0] - 1 - ) - ] - ) # -1 to exclude the noise component - covariance_nwp_models = np.ma.corrcoef( - np.ma.masked_invalid( - cascade_stack_all_components_temp - ) - ) - # Determine the weights for this cascade level - weights[:, i] = calculate_weights_spn( - correlations=rho_fc[:, i], - covariance=covariance_nwp_models, - ) - - # Blend the extrapolation, (NWP) model(s) and noise cascades - precip_forecast_blended = blending.utils.blend_cascades( - cascades_norm=cascade_stack_all_components, weights=weights - ) - - # Also blend the cascade without the extrapolation component - precip_forecast_blended_mod_only = ( - blending.utils.blend_cascades( - cascades_norm=cascade_stack_all_components[1:, :], - weights=weights_model_only, - ) - ) - - # Blend the means and standard deviations - # Input is array of shape [number_components, scale_level, ...] - means_blended, sigmas_blended = blend_means_sigmas( - means=means_stacked, sigmas=sigmas_stacked, weights=weights - ) - # Also blend the means and sigmas for the cascade without extrapolation - ( - means_blended_mod_only, - sigmas_blended_mod_only, - ) = blend_means_sigmas( - means=means_stacked[1:, :], - sigmas=sigmas_stacked[1:, :], - weights=weights_model_only, - ) - - # 8.6 Recompose the cascade to a precipitation field - # (The function first normalizes the blended cascade, precip_forecast_blended - # again) - precip_forecast_recomposed = blending.utils.recompose_cascade( - combined_cascade=precip_forecast_blended, - combined_mean=means_blended, - combined_sigma=sigmas_blended, - ) - # The recomposed cascade without the extrapolation (for NaN filling - # outside the radar domain) - precip_forecast_recomposed_mod_only = ( - blending.utils.recompose_cascade( - combined_cascade=precip_forecast_blended_mod_only, - combined_mean=means_blended_mod_only, - combined_sigma=sigmas_blended_mod_only, - ) - ) - if domain == "spectral": - # TODO: Check this! (Only tested with domain == 'spatial') - precip_forecast_recomposed = fft_objs[j].irfft2( - precip_forecast_recomposed - ) - precip_forecast_recomposed_mod_only = fft_objs[j].irfft2( - precip_forecast_recomposed_mod_only - ) - - # 8.7 Post-processing steps - use the mask and fill no data with - # the blended NWP forecast. Probability matching following - # Lagrangian blended probability matching which uses the - # latest extrapolated radar rainfall field blended with the - # nwp model(s) rainfall forecast fields as 'benchmark'. - - # 8.7.1 first blend the extrapolated rainfall field (the field - # that is only used for post-processing steps) with the NWP - # rainfall forecast for this time step using the weights - # at scale level 2. - weights_probability_matching = weights[ - :-1, 1 - ] # Weights without noise, level 2 - weights_probability_matching_normalized = ( - weights_probability_matching - / np.sum(weights_probability_matching) - ) - # And the weights for outside the radar domain - weights_probability_matching_mod_only = weights_model_only[ - :-1, 1 - ] # Weights without noise, level 2 - weights_probability_matching_normalized_mod_only = ( - weights_probability_matching_mod_only - / np.sum(weights_probability_matching_mod_only) - ) - # Stack the fields - if blend_nwp_members: - precip_forecast_probability_matching_final = np.concatenate( - ( - precip_forecast_extrapolated_probability_matching[ - None, t_index - ], - precip_models_temp, - ), - axis=0, - ) - else: - precip_forecast_probability_matching_final = np.concatenate( - ( - precip_forecast_extrapolated_probability_matching[ - None, t_index - ], - precip_models_temp[None, j], - ), - axis=0, - ) - # Blend it - precip_forecast_probability_matching_blended = np.sum( - weights_probability_matching_normalized.reshape( - weights_probability_matching_normalized.shape[0], 1, 1 - ) - * precip_forecast_probability_matching_final, - axis=0, - ) - if blend_nwp_members: - precip_forecast_probability_matching_blended_mod_only = np.sum( - weights_probability_matching_normalized_mod_only.reshape( - weights_probability_matching_normalized_mod_only.shape[ - 0 - ], - 1, - 1, - ) - * precip_models_temp, - axis=0, - ) - else: - precip_forecast_probability_matching_blended_mod_only = ( - precip_models_temp[j] - ) - - # The extrapolation components are NaN outside the advected - # radar domain. This results in NaN values in the blended - # forecast outside the radar domain. Therefore, fill these - # areas with the "..._mod_only" blended forecasts, consisting - # of the NWP and noise components. - - nan_indices = np.isnan(precip_forecast_recomposed) - if smooth_radar_mask_range != 0: - # Compute the smooth dilated mask - new_mask = blending.utils.compute_smooth_dilated_mask( - nan_indices, - max_padding_size_in_px=smooth_radar_mask_range, - ) - - # Ensure mask values are between 0 and 1 - mask_model = np.clip(new_mask, 0, 1) - mask_radar = np.clip(1 - new_mask, 0, 1) - - # Handle NaNs in precip_forecast_new and precip_forecast_new_mod_only by setting NaNs to 0 in the blending step - precip_forecast_recomposed_mod_only_no_nan = np.nan_to_num( - precip_forecast_recomposed_mod_only, nan=0 - ) - precip_forecast_recomposed_no_nan = np.nan_to_num( - precip_forecast_recomposed, nan=0 - ) - - # Perform the blending of radar and model inside the radar domain using a weighted combination - precip_forecast_recomposed = np.nansum( - [ - mask_model - * precip_forecast_recomposed_mod_only_no_nan, - mask_radar * precip_forecast_recomposed_no_nan, - ], - axis=0, - ) - - nan_indices = np.isnan( - precip_forecast_probability_matching_blended - ) - precip_forecast_probability_matching_blended = np.nansum( - [ - precip_forecast_probability_matching_blended - * mask_radar, - precip_forecast_probability_matching_blended_mod_only - * mask_model, - ], - axis=0, - ) - else: - precip_forecast_recomposed[nan_indices] = ( - precip_forecast_recomposed_mod_only[nan_indices] - ) - nan_indices = np.isnan( - precip_forecast_probability_matching_blended - ) - precip_forecast_probability_matching_blended[ - nan_indices - ] = precip_forecast_probability_matching_blended_mod_only[ - nan_indices - ] - - # Finally, fill the remaining nan values, if present, with - # the minimum value in the forecast - nan_indices = np.isnan(precip_forecast_recomposed) - precip_forecast_recomposed[nan_indices] = np.nanmin( - precip_forecast_recomposed - ) - nan_indices = np.isnan( - precip_forecast_probability_matching_blended - ) - precip_forecast_probability_matching_blended[nan_indices] = ( - np.nanmin(precip_forecast_probability_matching_blended) - ) - - # 8.7.2. Apply the masking and prob. matching - if mask_method is not None: - # apply the precipitation mask to prevent generation of new - # precipitation into areas where it was not originally - # observed - precip_forecast_min_value = precip_forecast_recomposed.min() - if mask_method == "incremental": - # The incremental mask is slightly different from - # the implementation in the non-blended steps.py, as - # it is not based on the last forecast, but instead - # on R_pm_blended. Therefore, the buffer does not - # increase over time. - # Get the mask for this forecast - precip_field_mask = ( - precip_forecast_probability_matching_blended - >= precip_thr - ) - # Buffer the mask - precip_field_mask = _compute_incremental_mask( - precip_field_mask, struct, mask_rim - ) - # Get the final mask - precip_forecast_recomposed = ( - precip_forecast_min_value - + ( - precip_forecast_recomposed - - precip_forecast_min_value - ) - * precip_field_mask - ) - precip_field_mask_temp = ( - precip_forecast_recomposed - > precip_forecast_min_value - ) - elif mask_method == "obs": - # The mask equals the most recent benchmark - # rainfall field - precip_field_mask_temp = ( - precip_forecast_probability_matching_blended - >= precip_thr - ) - - # Set to min value outside of mask - precip_forecast_recomposed[~precip_field_mask_temp] = ( - precip_forecast_min_value - ) - - # If probmatching_method is not None, resample the distribution from - # both the extrapolation cascade and the model (NWP) cascade and use - # that for the probability matching. - if probmatching_method is not None and resample_distribution: - arr1 = precip_forecast_extrapolated_probability_matching[ - t_index - ] - arr2 = precip_models_temp[j] - # resample weights based on cascade level 2. - # Areas where one of the fields is nan are not included. - precip_forecast_probability_matching_resampled = probmatching.resample_distributions( - first_array=arr1, - second_array=arr2, - probability_first_array=weights_probability_matching_normalized[ - 0 - ], - ) - else: - precip_forecast_probability_matching_resampled = ( - precip_forecast_probability_matching_blended.copy() - ) - - if probmatching_method == "cdf": - # nan indices in the extrapolation nowcast - nan_indices = np.isnan( - precip_forecast_extrapolated_probability_matching[ - t_index - ] - ) - # Adjust the CDF of the forecast to match the resampled distribution combined from - # extrapolation and model fields. - # Rainfall outside the pure extrapolation domain is not taken into account. - if np.any(np.isfinite(precip_forecast_recomposed)): - precip_forecast_recomposed = ( - probmatching.nonparam_match_empirical_cdf( - precip_forecast_recomposed, - precip_forecast_probability_matching_resampled, - nan_indices, - ) - ) - precip_forecast_probability_matching_resampled = None - elif probmatching_method == "mean": - # Use R_pm_blended as benchmark field and - mean_probabiltity_matching_forecast = np.mean( - precip_forecast_probability_matching_resampled[ - precip_forecast_probability_matching_resampled - >= precip_thr - ] - ) - no_rain_mask = precip_forecast_recomposed >= precip_thr - mean_precip_forecast = np.mean( - precip_forecast_recomposed[no_rain_mask] - ) - precip_forecast_recomposed[no_rain_mask] = ( - precip_forecast_recomposed[no_rain_mask] - - mean_precip_forecast - + mean_probabiltity_matching_forecast - ) - precip_forecast_probability_matching_resampled = None - - final_blended_forecast.append(precip_forecast_recomposed) - - precip_forecast_workers[j] = final_blended_forecast - - res = [] - - if DASK_IMPORTED and n_ens_members > 1: - for j in range(n_ens_members): - res.append(dask.delayed(worker)(j)) - dask.compute(*res, num_workers=num_ensemble_workers) - else: - for j in range(n_ens_members): - worker(j) - - res = None - - if is_nowcast_time_step: - if measure_time: - print(f"{time.time() - starttime:.2f} seconds.") - else: - print("done.") - - if callback is not None: - precip_forecast_final = np.stack(precip_forecast_workers) - if precip_forecast_final.shape[1] > 0: - callback(precip_forecast_final.squeeze()) - - if return_output: - for j in range(n_ens_members): - precip_forecast[j].extend(precip_forecast_workers[j]) - - precip_forecast_workers = None - - if measure_time: - mainloop_time = time.time() - starttime_mainloop - - if return_output: - precip_forecast_all_members_all_times = np.stack( - [np.stack(precip_forecast[j]) for j in range(n_ens_members)] - ) - if measure_time: - return precip_forecast_all_members_all_times, init_time, mainloop_time - else: - return precip_forecast_all_members_all_times - else: - return None + forecast_steps_nowcast = blended_nowcaster.compute_forecast() + blended_nowcaster.reset_states_and_params() + # Call the appropriate methods within the class + return forecast_steps_nowcast def calculate_weights_spn(correlations, covariance): From 8d8905a9624de39af087d3b01d84e2311642973f Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Thu, 5 Dec 2024 11:00:46 +0100 Subject: [PATCH 43/65] Removed old code which is no longer used --- pysteps/blending/steps.py | 532 +------------------------------------- 1 file changed, 1 insertion(+), 531 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 0e229a432..0d461bcf8 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -232,9 +232,6 @@ def __init__( self.__params = StepsBlendingParams() self.__state = StepsBlendingState() - # Perform input validation - self.__check_inputs() - # Initialize nowcast components and parameters self.__initialize_nowcast_components() @@ -2804,6 +2801,7 @@ def forecast( return forecast_steps_nowcast +# TODO: add the following code to the main body def calculate_weights_spn(correlations, covariance): """Calculate SPN blending weights for STEPS blending from correlation. @@ -2951,48 +2949,6 @@ def blend_means_sigmas(means, sigmas, weights): return combined_means, combined_sigmas -def _check_inputs( - precip, precip_models, velocity, velocity_models, timesteps, ar_order -): - if precip.ndim != 3: - raise ValueError("precip must be a three-dimensional array") - if precip.shape[0] < ar_order + 1: - raise ValueError("precip.shape[0] < ar_order+1") - if precip_models.ndim != 2 and precip_models.ndim != 4: - raise ValueError( - "precip_models must be either a two-dimensional array containing dictionaries with decomposed model fields or a four-dimensional array containing the original (NWP) model forecasts" - ) - if velocity.ndim != 3: - raise ValueError("velocity must be a three-dimensional array") - if velocity_models.ndim != 5: - raise ValueError("velocity_models must be a five-dimensional array") - if velocity.shape[0] != 2 or velocity_models.shape[2] != 2: - raise ValueError( - "velocity and velocity_models must have an x- and y-component, check the shape" - ) - if precip.shape[1:3] != velocity.shape[1:3]: - raise ValueError( - "dimension mismatch between precip and velocity: shape(precip)=%s, shape(velocity)=%s" - % (str(precip.shape), str(velocity.shape)) - ) - if precip_models.shape[0] != velocity_models.shape[0]: - raise ValueError( - "precip_models and velocity_models must consist of the same number of models" - ) - if isinstance(timesteps, list) and not sorted(timesteps) == timesteps: - raise ValueError("timesteps is not in ascending order") - if isinstance(timesteps, list): - if precip_models.shape[1] != math.ceil(timesteps[-1]) + 1: - raise ValueError( - "precip_models does not contain sufficient lead times for this forecast" - ) - else: - if precip_models.shape[1] != timesteps + 1: - raise ValueError( - "precip_models does not contain sufficient lead times for this forecast" - ) - - def _compute_incremental_mask(Rbin, kr, r): # buffer the observation mask Rbin using the kernel kr # add a grayscale rim r (for smooth rain/no-rain transition) @@ -3050,107 +3006,6 @@ def f(precip, i): return precip -def _init_noise( - precip, - precip_thr, - n_cascade_levels, - bp_filter, - decompositor, - fft, - noise_method, - noise_kwargs, - noise_stddev_adj, - measure_time, - num_workers, - seed, -): - """Initialize the noise method.""" - if noise_method is None: - return None, None, None - - # get methods for perturbations - init_noise, generate_noise = noise.get_method(noise_method) - - # initialize the perturbation generator for the precipitation field - generate_perturb = init_noise(precip, fft_method=fft, **noise_kwargs) - - if noise_stddev_adj == "auto": - print("Computing noise adjustment coefficients... ", end="", flush=True) - if measure_time: - starttime = time.time() - - precip_forecast_min = np.min(precip) - noise_std_coeffs = noise.utils.compute_noise_stddev_adjs( - precip[-1, :, :], - precip_thr, - precip_forecast_min, - bp_filter, - decompositor, - generate_perturb, - generate_noise, - 20, - conditional=True, - num_workers=num_workers, - seed=seed, - ) - - if measure_time: - print(f"{time.time() - starttime:.2f} seconds.") - else: - print("done.") - elif noise_stddev_adj == "fixed": - f = lambda k: 1.0 / (0.75 + 0.09 * k) - noise_std_coeffs = [f(k) for k in range(1, n_cascade_levels + 1)] - else: - noise_std_coeffs = np.ones(n_cascade_levels) - - if noise_stddev_adj is not None: - print(f"noise std. dev. coeffs: {noise_std_coeffs}") - - return generate_perturb, generate_noise, noise_std_coeffs - - -def _compute_cascade_decomposition_radar( - precip, - ar_order, - n_cascade_levels, - n_ens_members, - MASK_thr, - domain, - bp_filter, - decompositor, - fft, -): - """Compute the cascade decompositions of the input precipitation fields.""" - precip_forecast_decomp = [] - for i in range(ar_order + 1): - precip_forecast = decompositor( - precip[i, :, :], - bp_filter, - mask=MASK_thr, - fft_method=fft, - output_domain=domain, - normalize=True, - compute_stats=True, - compact_output=True, - ) - precip_forecast_decomp.append(precip_forecast) - - # Rearrange the cascaded into a four-dimensional array of shape - # (n_cascade_levels,ar_order+1,m,n) for the autoregressive model - precip_forecast_cascades = nowcast_utils.stack_cascades( - precip_forecast_decomp, n_cascade_levels - ) - - precip_forecast_decomp = precip_forecast_decomp[-1] - mu_extrapolation = np.array(precip_forecast_decomp["means"]) - sigma_extrapolation = np.array(precip_forecast_decomp["stds"]) - precip_forecast_decomp = [ - precip_forecast_decomp.copy() for j in range(n_ens_members) - ] - return precip_forecast_cascades, mu_extrapolation, sigma_extrapolation - - def _compute_cascade_recomposition_nwp(precip_models_cascade, recompositor): """If necessary, recompose (NWP) model forecasts.""" precip_models = None @@ -3169,388 +3024,3 @@ def _compute_cascade_recomposition_nwp(precip_models_cascade, recompositor): precip_model = None return precip_models - - -def _estimate_ar_parameters_radar( - precip_forecast_cascades, ar_order, n_cascade_levels, MASK_thr, zero_precip_radar -): - """Estimate AR parameters for the radar rainfall field.""" - # If there are values in the radar fields, compute the autocorrelations - GAMMA = np.empty((n_cascade_levels, ar_order)) - if not zero_precip_radar: - # compute lag-l temporal autocorrelation coefficients for each cascade level - for i in range(n_cascade_levels): - GAMMA[i, :] = correlation.temporal_autocorrelation( - precip_forecast_cascades[i], mask=MASK_thr - ) - - # Else, use standard values for the autocorrelations - else: - # Get the climatological lag-1 and lag-2 autocorrelation values from Table 2 - # in `BPS2004`. - # Hard coded, change to own (climatological) values when present. - GAMMA = np.array( - [ - [0.99805, 0.9925, 0.9776, 0.9297, 0.796, 0.482, 0.079, 0.0006], - [0.9933, 0.9752, 0.923, 0.750, 0.367, 0.069, 0.0018, 0.0014], - ] - ) - - # Check whether the number of cascade_levels is correct - if GAMMA.shape[1] > n_cascade_levels: - GAMMA = GAMMA[:, 0:n_cascade_levels] - elif GAMMA.shape[1] < n_cascade_levels: - # Get the number of cascade levels that is missing - n_extra_lev = n_cascade_levels - GAMMA.shape[1] - # Append the array with correlation values of 10e-4 - GAMMA = np.append( - GAMMA, - [np.repeat(0.0006, n_extra_lev), np.repeat(0.0014, n_extra_lev)], - axis=1, - ) - - # Finally base GAMMA.shape[0] on the AR-level - if ar_order == 1: - GAMMA = GAMMA[0, :] - if ar_order > 2: - for repeat_index in range(ar_order - 2): - GAMMA = np.vstack((GAMMA, GAMMA[1, :])) - - # Finally, transpose GAMMA to ensure that the shape is the same as np.empty((n_cascade_levels, ar_order)) - GAMMA = GAMMA.transpose() - assert GAMMA.shape == (n_cascade_levels, ar_order) - - # Print the GAMMA value - nowcast_utils.print_corrcoefs(GAMMA) - - if ar_order == 2: - # adjust the lag-2 correlation coefficient to ensure that the AR(p) - # process is stationary - for i in range(n_cascade_levels): - GAMMA[i, 1] = autoregression.adjust_lag2_corrcoef2(GAMMA[i, 0], GAMMA[i, 1]) - - # estimate the parameters of the AR(p) model from the autocorrelation - # coefficients - PHI = np.empty((n_cascade_levels, ar_order + 1)) - for i in range(n_cascade_levels): - PHI[i, :] = autoregression.estimate_ar_params_yw(GAMMA[i, :]) - - nowcast_utils.print_ar_params(PHI) - return PHI - - -def _find_nwp_combination( - precip_models, - precip_forecast_probability_matching, - velocity_models, - mu_models, - sigma_models, - n_ens_members, - ar_order, - n_cascade_levels, - blend_nwp_members, -): - """Determine which (NWP) models will be combined with which nowcast ensemble members. - With the way it is implemented at this moment: n_ens_members of the output equals - the maximum number of (ensemble) members in the input (either the nowcasts or NWP). - """ - # Make sure the number of model members is not larger than than or equal to - # n_ens_members - n_model_members = precip_models.shape[0] - if n_model_members > n_ens_members: - raise ValueError( - "The number of NWP model members is larger than the given number of ensemble members. n_model_members <= n_ens_members." - ) - - # Check if NWP models/members should be used individually, or if all of - # them are blended together per nowcast ensemble member. - if blend_nwp_members: - n_model_indices = None - - else: - # Start with determining the maximum and mimimum number of members/models - # in both input products - n_ens_members_max = max(n_ens_members, n_model_members) - n_ens_members_min = min(n_ens_members, n_model_members) - # Also make a list of the model index numbers. These indices are needed - # for indexing the right climatological skill file when pysteps calculates - # the blended forecast in parallel. - if n_model_members > 1: - n_model_indices = np.arange(n_model_members) - else: - n_model_indices = [0] - - # Now, repeat the nowcast ensemble members or the nwp models/members until - # it has the same amount of members as n_ens_members_max. For instance, if - # you have 10 ensemble nowcasts members and 3 NWP members, the output will - # be an ensemble of 10 members. Hence, the three NWP members are blended - # with the first three members of the nowcast (member one with member one, - # two with two, etc.), subsequently, the same NWP members are blended with - # the next three members (NWP member one with member 4, NWP member 2 with - # member 5, etc.), until 10 is reached. - if n_ens_members_min != n_ens_members_max: - if n_model_members == 1: - precip_models = np.repeat(precip_models, n_ens_members_max, axis=0) - mu_models = np.repeat(mu_models, n_ens_members_max, axis=0) - sigma_models = np.repeat(sigma_models, n_ens_members_max, axis=0) - velocity_models = np.repeat(velocity_models, n_ens_members_max, axis=0) - # For the prob. matching - precip_forecast_probability_matching = np.repeat( - precip_forecast_probability_matching, n_ens_members_max, axis=0 - ) - # Finally, for the model indices - n_model_indices = np.repeat(n_model_indices, n_ens_members_max, axis=0) - - elif n_model_members == n_ens_members_min: - repeats = [ - (n_ens_members_max + i) // n_ens_members_min - for i in range(n_ens_members_min) - ] - if n_model_members == n_ens_members_min: - precip_models = np.repeat(precip_models, repeats, axis=0) - mu_models = np.repeat(mu_models, repeats, axis=0) - sigma_models = np.repeat(sigma_models, repeats, axis=0) - velocity_models = np.repeat(velocity_models, repeats, axis=0) - # For the prob. matching - precip_forecast_probability_matching = np.repeat( - precip_forecast_probability_matching, repeats, axis=0 - ) - # Finally, for the model indices - n_model_indices = np.repeat(n_model_indices, repeats, axis=0) - - return ( - precip_models, - precip_forecast_probability_matching, - velocity_models, - mu_models, - sigma_models, - n_model_indices, - ) - - -def _init_random_generators( - velocity, - noise_method, - vel_pert_method, - vp_par, - vp_perp, - seed, - n_ens_members, - kmperpixel, - timestep, -): - """Initialize all the random generators.""" - if noise_method is not None: - randgen_precip = [] - randgen_motion = [] - for j in range(n_ens_members): - rs = np.random.RandomState(seed) - randgen_precip.append(rs) - seed = rs.randint(0, high=1e9) - rs = np.random.RandomState(seed) - randgen_motion.append(rs) - seed = rs.randint(0, high=1e9) - - if vel_pert_method is not None: - init_vel_noise, generate_vel_noise = noise.get_method(vel_pert_method) - - # initialize the perturbation generators for the motion field - velocity_perturbations = [] - for j in range(n_ens_members): - kwargs = { - "randstate": randgen_motion[j], - "p_par": vp_par, - "p_perp": vp_perp, - } - vp_ = init_vel_noise(velocity, 1.0 / kmperpixel, timestep, **kwargs) - velocity_perturbations.append(vp_) - else: - velocity_perturbations, generate_vel_noise = None, None - - return randgen_precip, velocity_perturbations, generate_vel_noise - - -def _prepare_forecast_loop( - precip_forecast_cascades, - noise_method, - fft_method, - n_cascade_levels, - n_ens_members, - mask_method, - mask_kwargs, - timestep, - kmperpixel, -): - """Prepare for the forecast loop.""" - # Empty arrays for the previous displacements and the forecast cascade - previous_displacement = np.stack([None for j in range(n_ens_members)]) - previous_displacement_noise_cascade = np.stack([None for j in range(n_ens_members)]) - previous_displacement_prob_matching = np.stack([None for j in range(n_ens_members)]) - precip_forecast = [[] for j in range(n_ens_members)] - - if mask_method == "incremental": - # get mask parameters - mask_rim = mask_kwargs.get("mask_rim", 10) - mask_f = mask_kwargs.get("mask_f", 1.0) - # initialize the structuring element - struct = generate_binary_structure(2, 1) - # iterate it to expand it nxn - n = mask_f * timestep / kmperpixel - struct = iterate_structure(struct, int((n - 1) / 2.0)) - else: - mask_rim, struct = None, None - - if noise_method is None: - precip_forecast_non_perturbed = [ - precip_forecast_cascades[0][i].copy() for i in range(n_cascade_levels) - ] - else: - precip_forecast_non_perturbed = None - - fft_objs = [] - for i in range(n_ens_members): - fft_objs.append( - utils.get_method(fft_method, shape=precip_forecast_cascades.shape[-2:]) - ) - - return ( - previous_displacement, - previous_displacement_noise_cascade, - previous_displacement_prob_matching, - precip_forecast, - precip_forecast_non_perturbed, - mask_rim, - struct, - fft_objs, - ) - - -def _compute_initial_nwp_skill( - precip_forecast_cascades, - precip_models, - domain_mask, - issuetime, - outdir_path_skill, - clim_kwargs, -): - """Calculate the initial skill of the (NWP) model forecasts at t=0.""" - rho_nwp_models = [ - blending.skill_scores.spatial_correlation( - obs=precip_forecast_cascades[0, :, -1, :, :].copy(), - mod=precip_models[n_model, :, :, :].copy(), - domain_mask=domain_mask, - ) - for n_model in range(precip_models.shape[0]) - ] - rho_nwp_models = np.stack(rho_nwp_models) - - # Ensure that the model skill decreases with increasing scale level. - for n_model in range(precip_models.shape[0]): - for i in range(1, precip_models.shape[1]): - if rho_nwp_models[n_model, i] > rho_nwp_models[n_model, i - 1]: - # Set it equal to the previous scale level - rho_nwp_models[n_model, i] = rho_nwp_models[n_model, i - 1] - - # Save this in the climatological skill file - blending.clim.save_skill( - current_skill=rho_nwp_models, - validtime=issuetime, - outdir_path=outdir_path_skill, - **clim_kwargs, - ) - return rho_nwp_models - - -def _init_noise_cascade( - shape, - n_ens_members, - n_cascade_levels, - generate_noise, - decompositor, - generate_perturb, - randgen_precip, - fft_objs, - bp_filter, - domain, - noise_method, - noise_std_coeffs, - ar_order, -): - """Initialize the noise cascade with identical noise for all AR(n) steps - We also need to return the mean and standard deviations of the noise - for the recombination of the noise before advecting it. - """ - noise_cascade = np.zeros(shape) - mu_noise = np.zeros((n_ens_members, n_cascade_levels)) - sigma_noise = np.zeros((n_ens_members, n_cascade_levels)) - if noise_method: - for j in range(n_ens_members): - epsilon = generate_noise( - generate_perturb, - randstate=randgen_precip[j], - fft_method=fft_objs[j], - domain=domain, - ) - epsilon_decomposed = decompositor( - epsilon, - bp_filter, - fft_method=fft_objs[j], - input_domain=domain, - output_domain=domain, - compute_stats=True, - normalize=True, - compact_output=True, - ) - mu_noise[j] = epsilon_decomposed["means"] - sigma_noise[j] = epsilon_decomposed["stds"] - for i in range(n_cascade_levels): - epsilon_temp = epsilon_decomposed["cascade_levels"][i] - epsilon_temp *= noise_std_coeffs[i] - for n in range(ar_order): - noise_cascade[j][i][n] = epsilon_temp - epsilon_decomposed = None - epsilon_temp = None - return noise_cascade, mu_noise, sigma_noise - - -def _fill_nans_infs_nwp_cascade( - precip_models_cascade, - precip_models, - precip_cascade, - precip, - mu_models, - sigma_models, -): - """Ensure that the NWP cascade and fields do no contain any nans or infinite number""" - # Fill nans and infinite numbers with the minimum value present in precip - # (corresponding to zero rainfall in the radar observations) - min_cascade = np.nanmin(precip_cascade) - min_precip = np.nanmin(precip) - precip_models_cascade[~np.isfinite(precip_models_cascade)] = min_cascade - precip_models[~np.isfinite(precip_models)] = min_precip - # Also set any nans or infs in the mean and sigma of the cascade to - # respectively 0.0 and 1.0 - mu_models[~np.isfinite(mu_models)] = 0.0 - sigma_models[~np.isfinite(sigma_models)] = 0.0 - - return precip_models_cascade, precip_models, mu_models, sigma_models - - -def _determine_max_nr_rainy_cells_nwp(precip_models, precip_thr, n_models, timesteps): - """Initialize noise based on the NWP field time step where the fraction of rainy cells is highest""" - if precip_thr is None: - precip_thr = np.nanmin(precip_models) - - max_rain_pixels = -1 - max_rain_pixels_j = -1 - max_rain_pixels_t = -1 - for j in range(n_models): - for t in timesteps: - rain_pixels = precip_models[j][t][precip_models[j][t] > precip_thr].size - if rain_pixels > max_rain_pixels: - max_rain_pixels = rain_pixels - max_rain_pixels_j = j - max_rain_pixels_t = t - precip_noise_input = precip_models[max_rain_pixels_j][max_rain_pixels_t] - - return precip_noise_input.astype(np.float64, copy=False) From d6249f5673a9a5a38a0fcb71a6ae9b9dcd485fcb Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Thu, 5 Dec 2024 17:22:05 +0100 Subject: [PATCH 44/65] 6 more tests that fail --- .gitignore | 1 + pysteps/blending/steps.py | 166 +++++++++++++++------------ pysteps/tests/test_blending_steps.py | 2 + 3 files changed, 96 insertions(+), 73 deletions(-) diff --git a/.gitignore b/.gitignore index 4588187d7..8955e65b4 100644 --- a/.gitignore +++ b/.gitignore @@ -94,3 +94,4 @@ venv.bak/ # Running lcoal tests /tmp +./pysteps/tests/tmp/* diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 0d461bcf8..236353367 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -42,6 +42,8 @@ calculate_weights_spn blend_means_sigmas """ +# TODO: remove sys after debugging +import sys import math import time @@ -100,7 +102,7 @@ class StepsBlendingConfig: filter_kwargs: Dict[str, Any] = field(default_factory=dict) noise_kwargs: Dict[str, Any] = field(default_factory=dict) velocity_perturbation_kwargs: Dict[str, Any] = field(default_factory=dict) - clim_kwargs: Dict[str, Any] = field(default_factory=dict) + climatology_kwargs: Dict[str, Any] = field(default_factory=dict) mask_kwargs: Dict[str, Any] = field(default_factory=dict) measure_time: bool = False callback: Optional[Any] = None @@ -110,15 +112,12 @@ class StepsBlendingConfig: # TODO: typing could be improved here @dataclass class StepsBlendingParams: - PHI: np.ndarray # AR(p) model parameters - noise_std_coeffs: np.ndarray # Noise standard deviation coefficients - mu_extrapolation: np.ndarray # Means of extrapolated cascades - sigma_extrapolation: np.ndarray # Std devs of extrapolated cascades - bandpass_filter: Any # Band-pass filter object - fft: Any # FFT method object - perturbation_generator: Callable # Perturbation generator - noise_generator: Callable # Noise generator - generate_vel_noise: Optional[Callable] # Velocity noise generator + noise_std_coeffs: np.ndarray = None # Noise standard deviation coefficients + bandpass_filter: Any = None # Band-pass filter object + fft: Any = None # FFT method object + perturbation_generator: Callable = None # Perturbation generator + noise_generator: Callable = None # Noise generator + PHI: np.ndarray = None # AR(p) model parameters extrapolation_method: Any = None decomposition_method: Any = None recomposition_method: Any = None @@ -143,7 +142,6 @@ class StepsBlendingParams: mask_threshold: Any = None zero_precip_radar: bool = False zero_precip_model_fields: bool = False - PHI: Any = None original_timesteps: Any = None num_ensemble_workers: int = None rho_nwp_models: Any = None @@ -159,15 +157,12 @@ class StepsBlendingState: mean_extrapolation: Any = None std_extrapolation: Any = None precip_models_cascades: Any = None - PHI: Any = None randgen_precip: Any = None previous_displacement: Any = None previous_displacement_noise_cascade: Any = None previous_displacement_prob_matching: Any = None precip_forecast: Any = None precip_forecast_non_perturbed: Any = None - mask_rim: Any = None - struct: Any = None fft_objs: Any = None t_prev_timestep: Any = None t_leadtime_since_start_forecast: Any = None @@ -194,7 +189,6 @@ class StepsBlendingState: precip_forecast_extrapolated_probability_matching: Any = None precip_forecast_prev_subtimestep: Any = None noise_prev_subtimestep: Any = None - final_blended_forecast_single_member: Any = None means_blended: Any = None sigmas_blended: Any = None means_blended_mod_only: Any = None @@ -232,9 +226,6 @@ def __init__( self.__params = StepsBlendingParams() self.__state = StepsBlendingState() - # Initialize nowcast components and parameters - self.__initialize_nowcast_components() - # Additional variables for time measurement self.__start_time_init = None self.__zero_precip_time = None @@ -273,7 +264,7 @@ def compute_forecast(self): "initialization", self.__start_time_init ) - self.__blended_nowcast_main() + self.__blended_nowcast_main_loop() # Stack and return the forecast output if self.__config.return_output: self.__state.precip_forecast = np.stack( @@ -293,7 +284,7 @@ def compute_forecast(self): else: return None - def __blended_nowcast_main(self): + def __blended_nowcast_main_loop(self): """ Main nowcast loop that iterates through the ensemble members and time steps to generate forecasts. @@ -333,7 +324,9 @@ def __blended_nowcast_main(self): self.__find_nowcast_NWP_combination(t) self.__determine_skill_for_current_timestep(t) # the nowcast iteration for each ensemble member - precip_forecast_workers = [None for _ in range(self.__config.n_ens_members)] + precip_ensemble_single_timestep = [ + None for _ in range(self.__config.n_ens_members) + ] def worker(j): self.__determine_skill_for_next_timestep(t, j) @@ -343,29 +336,36 @@ def worker(j): t, j ) # 8.5 Blend the cascades - self.__state.final_blended_forecast_single_member = [] + final_blended_forecast_single_member = [] for t_sub in self.__state.subtimesteps: # TODO: does it make sense to use sub time steps - check if it works? if t_sub > 0: self.__blend_cascades(t_sub, j) self.__recompose_cascade_to_rainfall_field(j) # TODO: could be I need to return and ave final_blended_forecast_single_member - self.__post_process_output(j) - precip_forecast_workers[j] = ( - self.__state.final_blended_forecast_single_member + final_blended_forecast_single_member = ( + self.__post_process_output( + j, final_blended_forecast_single_member + ) + ) + precip_ensemble_single_timestep[j] = ( + final_blended_forecast_single_member ) - result = [] + dask_worker_collection = [] if DASK_IMPORTED and self.__config.n_ens_members > 1: for j in range(self.__config.n_ens_members): - result.append(dask.delayed(worker)(j)) - dask.compute(*result, num_workers=self.__params.num_ensemble_workers) + dask_worker_collection.append(dask.delayed(worker)(j)) + dask.compute( + *dask_worker_collection, + num_workers=self.__params.num_ensemble_workers, + ) else: for j in range(self.__config.n_ens_members): worker(j) - result = None + dask_worker_collection = None if self.__state.is_nowcast_time_step: if self.__config.measure_time: @@ -374,15 +374,17 @@ def worker(j): print("done.") if self.__config.callback is not None: - precip_forecast_final = np.stack(precip_forecast_workers) + precip_forecast_final = np.stack(precip_ensemble_single_timestep) if precip_forecast_final.shape[1] > 0: self.__config.callback(precip_forecast_final.squeeze()) if self.__config.return_output: for j in range(self.__config.n_ens_members): - self.__state.precip_forecast[j].extend(precip_forecast_workers[j]) + self.__state.precip_forecast[j].extend( + precip_ensemble_single_timestep[j] + ) - precip_forecast_workers = None + precip_ensemble_single_timestep = None if self.__config.measure_time: self.__mainloop_time = time.time() - starttime_mainloop @@ -424,30 +426,33 @@ def __check_inputs(self): raise ValueError( "The number of members in the precipitation models and velocity models must match" ) - + print(self.__timesteps, file=sys.stderr) if isinstance(self.__timesteps, list): self.__params.time_steps_is_list = True - self.__params.original_timesteps = [0] + list(self.__timesteps) - self.__timesteps = nowcast_utils.binned_timesteps( - self.__params.original_timesteps - ) if not sorted(self.__timesteps) == self.__timesteps: - raise ValueError("timesteps is not in ascending order") + raise ValueError( + "timesteps is not in ascending order", self.__timesteps + ) if self.__precip_models.shape[1] != math.ceil(self.__timesteps[-1]) + 1: raise ValueError( "precip_models does not contain sufficient lead times for this forecast" ) + self.__params.original_timesteps = [0] + list(self.__timesteps) + self.__timesteps = nowcast_utils.binned_timesteps( + self.__params.original_timesteps + ) else: self.__params.time_steps_is_list = False - self.__timesteps = list(range(self.__timesteps + 1)) if self.__precip_models.shape[1] != self.__timesteps + 1: raise ValueError( "precip_models does not contain sufficient lead times for this forecast" ) + self.__timesteps = list(range(self.__timesteps + 1)) + print(self.__timesteps, file=sys.stderr) precip_nwp_dim = self.__precip_models.ndim if precip_nwp_dim == 2: - if isinstance(self.__precip_models[0], dict): + if isinstance(self.__precip_models[0][0], dict): # It's a 2D array of dictionaries with decomposed cascades self.__params.precip_models_provided_is_cascade = True else: @@ -474,15 +479,14 @@ def __check_inputs(self): if self.__config.velocity_perturbation_kwargs is None: self.__config.velocity_perturbation_kwargs = dict() - if not self.__params.precip_models_provided_is_cascade: - if self.__config.clim_kwargs is None: - # Make sure clim_kwargs at least contains the number of models - self.__config.clim_kwargs = dict( - {"n_models": self.__precip_models.shape[0]} - ) + if self.__config.climatology_kwargs is None: + # Make sure clim_kwargs at least contains the number of models + self.__config.climatology_kwargs = dict( + {"n_models": self.__precip_models.shape[0]} + ) if self.__config.mask_kwargs is None: - mask_kwargs = dict() + self.__config.mask_kwargs = dict() if np.any(~np.isfinite(self.__velocity)): raise ValueError("velocity contains non-finite values") @@ -649,10 +653,11 @@ def __initialize_nowcast_components(self): x_values, y_values = np.meshgrid(np.arange(N), np.arange(M)) self.__params.xy_coordinates = np.stack([x_values, y_values]) - precip_copy = self.__precip[-(self.__config.ar_order + 1) :, :, :].copy() + # TODO: changed precip_copy for self.__precip + self.__precip = self.__precip[-(self.__config.ar_order + 1) :, :, :].copy() # Determine the domain mask from non-finite values in the precipitation data self.__params.domain_mask = np.logical_or.reduce( - [~np.isfinite(precip_copy[i, :]) for i in range(precip_copy.shape[0])] + [~np.isfinite(self.__precip[i, :]) for i in range(self.__precip.shape[0])] ) print("Blended nowcast components initialized successfully.") @@ -692,7 +697,7 @@ def __prepare_radar_and_NWP_fields(self): """Compute the cascade decompositions of the input precipitation fields.""" precip_forecast_decomp = [] for i in range(self.__config.ar_order + 1): - precip_forecast = self.__params.extrapolation_method( + precip_forecast = self.__params.decomposition_method( self.__precip[i, :, :], self.__params.bandpass_filter, mask=self.__params.mask_threshold, @@ -754,7 +759,7 @@ def __zero_precipitation_forecast(self): # Create an empty np array with shape [n_ens_members, rows, cols] # and fill it with the minimum value from precip (corresponding to # zero precipitation) - N, M = self.__precip.shape + N, M = self.__precip.shape[1:] precip_forecast_workers = np.full( (self.__config.n_ens_members, N, M), self.__params.precip_zerovalue ) @@ -861,6 +866,9 @@ def __prepare_nowcast_for_zero_radar(self): self.__state.precip_noise_input = self.__precip_models[max_rain_pixels_j][ max_rain_pixels_t ] + self.__state.precip_noise_input = self.__state.precip_noise_input.astype( + np.float64, copy=False + ) # Make sure precip_noise_input is three-dimensional if len(self.__state.precip_noise_input.shape) != 3: @@ -878,7 +886,7 @@ def __initialize_noise(self): # initialize the perturbation generator for the precipitation field self.__params.perturbation_generator = init_noise( - self.__precip, + self.__state.precip_noise_input, fft_method=self.__params.fft, **self.__config.noise_kwargs, ) @@ -888,9 +896,9 @@ def __initialize_noise(self): if self.__config.measure_time: starttime = time.time() - precip_forecast_min = np.min(self.__precip) + precip_forecast_min = np.min(self.__state.precip_noise_input) self.__params.noise_std_coeffs = noise.utils.compute_noise_stddev_adjs( - self.__precip[-1, :, :], + self.__state.precip_noise_input[-1, :, :], self.__config.precip_threshold, precip_forecast_min, self.__params.bandpass_filter, @@ -1069,15 +1077,15 @@ def __prepare_forecast_loop(self): if self.__config.mask_method == "incremental": # get mask parameters - self.__state.mask_rim = self.__config.mask_kwargs.get("mask_rim", 10) + self.__params.mask_rim = self.__config.mask_kwargs.get("mask_rim", 10) mask_f = self.__config.mask_kwargs.get("mask_f", 1.0) # initialize the structuring element struct = generate_binary_structure(2, 1) # iterate it to expand it nxn n = mask_f * self.__config.timestep / self.__config.kmperpixel - self.__state.struct = iterate_structure(struct, int((n - 1) / 2.0)) + self.__params.struct = iterate_structure(struct, int((n - 1) / 2.0)) else: - self.__state.mask_rim, self.__state.struct = None, None + self.__params.mask_rim, self.__params.struct = None, None if self.__config.noise_method is None: self.__state.precip_forecast_non_perturbed = [ @@ -1348,33 +1356,35 @@ def __find_nowcast_NWP_combination(self, t): def __determine_skill_for_current_timestep(self, t): if t == 0: """Calculate the initial skill of the (NWP) model forecasts at t=0.""" - # TODO: n_model is not defined here, how does this work? + # TODO rewrite loop self.__params.rho_nwp_models = [ blending.skill_scores.spatial_correlation( obs=self.__state.precip_cascades[0, :, -1, :, :].copy(), mod=self.__state.precip_models_cascades_timestep[ - n_model, :, :, : + model_index, :, :, : ].copy(), domain_mask=self.__params.domain_mask, ) - for n_model in range( + for model_index in range( self.__state.precip_models_cascades_timestep.shape[0] ) ] self.__params.rho_nwp_models = np.stack(self.__params.rho_nwp_models) # Ensure that the model skill decreases with increasing scale level. - for n_model in range(self.__state.precip_models_cascades_timestep.shape[0]): + for model_index in range( + self.__state.precip_models_cascades_timestep.shape[0] + ): for i in range( 1, self.__state.precip_models_cascades_timestep.shape[1] ): if ( - self.__params.rho_nwp_models[n_model, i] - > self.__params.rho_nwp_models[n_model, i - 1] + self.__params.rho_nwp_models[model_index, i] + > self.__params.rho_nwp_models[model_index, i - 1] ): # Set it equal to the previous scale level - self.__params.rho_nwp_models[n_model, i] = ( - self.__params.rho_nwp_models[n_model, i - 1] + self.__params.rho_nwp_models[model_index, i] = ( + self.__params.rho_nwp_models[model_index, i - 1] ) # Save this in the climatological skill file @@ -1382,7 +1392,7 @@ def __determine_skill_for_current_timestep(self, t): current_skill=self.__params.rho_nwp_models, validtime=self.__issuetime, outdir_path=self.__config.outdir_path_skill, - **self.__config.clim_kwargs, + **self.__config.climatology_kwargs, ) if t > 0: # 8.1.3 Determine the skill of the components for lead time (t0 + t) @@ -1400,15 +1410,16 @@ def __determine_skill_for_next_timestep(self, t, j): # 8.1.2 Determine the skill of the nwp components for lead time (t0 + t) # Then for the model components if self.__config.blend_nwp_members: + # TODO rewrite loop rho_nwp_forecast = [ blending.skill_scores.lt_dependent_cor_nwp( lt=(t * int(self.__config.timestep)), - correlations=self.__params.rho_nwp_models[n_model], + correlations=self.__params.rho_nwp_models[model_index], outdir_path=self.__config.outdir_path_skill, - n_model=n_model, - skill_kwargs=self.__config.clim_kwargs, + n_model=model_index, + skill_kwargs=self.__config.climatology_kwargs, ) - for n_model in range(self.__params.rho_nwp_models.shape[0]) + for model_index in range(self.__params.rho_nwp_models.shape[0]) ] rho_nwp_forecast = np.stack(rho_nwp_forecast) # Concatenate rho_extrap_cascade and rho_nwp @@ -1416,12 +1427,13 @@ def __determine_skill_for_next_timestep(self, t, j): (self.__state.rho_extrap_cascade[None, :], rho_nwp_forecast), axis=0 ) else: + # TODO: check if j is the best accessor for this variable rho_nwp_forecast = blending.skill_scores.lt_dependent_cor_nwp( lt=(t * int(self.__config.timestep)), correlations=self.__params.rho_nwp_models[j], outdir_path=self.__config.outdir_path_skill, n_model=self.__state.n_model_indices[j], - skill_kwargs=self.__config.clim_kwargs, + skill_kwargs=self.__config.climatology_kwargs, ) # Concatenate rho_extrap_cascade and rho_nwp self.__state.rho_forecast = np.concatenate( @@ -2072,7 +2084,7 @@ def __recompose_cascade_to_rainfall_field(self, j): j ].irfft2(self.__state.precip_forecast_recomposed_mod_only) - def __post_process_output(self, j): + def __post_process_output(self, j, final_blended_forecast_single_member): # 8.7 Post-processing steps - use the mask and fill no data with # the blended NWP forecast. Probability matching following # Lagrangian blended probability matching which uses the @@ -2316,9 +2328,10 @@ def __post_process_output(self, j): ) precip_forecast_probability_matching_resampled = None - self.__state.final_blended_forecast_single_member.append( + final_blended_forecast_single_member.append( self.__state.precip_forecast_recomposed ) + return final_blended_forecast_single_member def __measure_time(self, label, start_time): """ @@ -2757,7 +2770,9 @@ def forecast( blending_config = StepsBlendingConfig( n_ens_members=n_ens_members, n_cascade_levels=n_cascade_levels, + blend_nwp_members=blend_nwp_members, precip_threshold=precip_thr, + norain_threshold=norain_thr, kmperpixel=kmperpixel, timestep=timestep, extrapolation_method=extrap_method, @@ -2767,17 +2782,22 @@ def forecast( noise_stddev_adj=noise_stddev_adj, ar_order=ar_order, velocity_perturbation_method=vel_pert_method, + weights_method=weights_method, conditional=conditional, probmatching_method=probmatching_method, mask_method=mask_method, + resample_distribution=resample_distribution, + smooth_radar_mask_range=smooth_radar_mask_range, seed=seed, num_workers=num_workers, fft_method=fft_method, domain=domain, + outdir_path_skill=outdir_path_skill, extrapolation_kwargs=extrap_kwargs, filter_kwargs=filter_kwargs, noise_kwargs=noise_kwargs, velocity_perturbation_kwargs=vel_pert_kwargs, + climatology_kwargs=clim_kwargs, mask_kwargs=mask_kwargs, measure_time=measure_time, callback=callback, diff --git a/pysteps/tests/test_blending_steps.py b/pysteps/tests/test_blending_steps.py index ac2d16b93..18a4e90a4 100644 --- a/pysteps/tests/test_blending_steps.py +++ b/pysteps/tests/test_blending_steps.py @@ -25,6 +25,8 @@ (5, 3, 5, 8, "incremental", "cdf", False, "spn", True, 5, False, False, 0, False), (1, 10, 1, 8, "incremental", "cdf", False, "spn", True, 1, False, False, 0, False), (2, 3, 2, 8, "incremental", "cdf", True, "spn", True, 2, False, False, 0, False), + # TODO: make next test work! This is currently not working on the main branch + # (2, 3, 4, 8, "incremental", "cdf", True, "spn", True, 2, False, False, 0, False), (1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 0, False), # Test the case where the radar image contains no rain. (1, 3, 6, 8, None, None, False, "spn", True, 6, True, False, 0, False), From 38702b3bc46f22fb02561d9b190cb93da7318af9 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Thu, 5 Dec 2024 18:36:39 +0100 Subject: [PATCH 45/65] All tests pass, still need to fix TODOs --- .gitignore | 2 +- pysteps/blending/steps.py | 371 ++++++++++++++------------- pysteps/tests/test_blending_steps.py | 2 +- 3 files changed, 188 insertions(+), 187 deletions(-) diff --git a/.gitignore b/.gitignore index 8955e65b4..c865918f5 100644 --- a/.gitignore +++ b/.gitignore @@ -94,4 +94,4 @@ venv.bak/ # Running lcoal tests /tmp -./pysteps/tests/tmp/* +./pysteps/tests/tmp/ diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 236353367..fd1101872 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -42,9 +42,6 @@ calculate_weights_spn blend_means_sigmas """ -# TODO: remove sys after debugging -import sys - import math import time from copy import deepcopy @@ -246,7 +243,7 @@ def compute_forecast(self): # Determine if rain is present in both radar and NWP fields if self.__params.zero_precip_radar and self.__params.zero_precip_model_fields: - self.__zero_precipitation_forecast() + return self.__zero_precipitation_forecast() else: # Prepare the data for the zero precipitation radar case and initialize the noise correctly if self.__params.zero_precip_radar: @@ -329,28 +326,30 @@ def __blended_nowcast_main_loop(self): ] def worker(j): - self.__determine_skill_for_next_timestep(t, j) - self.__determine_weights_per_component() - self.__regress_extrapolation_and_noise_cascades(j) + # The state needs to be copied as a dataclass is not threadsafe in python + worker_state = deepcopy(self.__state) + self.__determine_skill_for_next_timestep(t, j, worker_state) + self.__determine_weights_per_component(worker_state) + self.__regress_extrapolation_and_noise_cascades(j, worker_state) self.__perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( - t, j + t, j, worker_state ) # 8.5 Blend the cascades final_blended_forecast_single_member = [] for t_sub in self.__state.subtimesteps: # TODO: does it make sense to use sub time steps - check if it works? if t_sub > 0: - self.__blend_cascades(t_sub, j) - self.__recompose_cascade_to_rainfall_field(j) + self.__blend_cascades(t_sub, j, worker_state) + self.__recompose_cascade_to_rainfall_field(j, worker_state) # TODO: could be I need to return and ave final_blended_forecast_single_member final_blended_forecast_single_member = ( self.__post_process_output( - j, final_blended_forecast_single_member + j, final_blended_forecast_single_member, worker_state ) ) - precip_ensemble_single_timestep[j] = ( - final_blended_forecast_single_member - ) + precip_ensemble_single_timestep[j] = ( + final_blended_forecast_single_member + ) dask_worker_collection = [] @@ -426,7 +425,7 @@ def __check_inputs(self): raise ValueError( "The number of members in the precipitation models and velocity models must match" ) - print(self.__timesteps, file=sys.stderr) + if isinstance(self.__timesteps, list): self.__params.time_steps_is_list = True if not sorted(self.__timesteps) == self.__timesteps: @@ -448,7 +447,6 @@ def __check_inputs(self): "precip_models does not contain sufficient lead times for this forecast" ) self.__timesteps = list(range(self.__timesteps + 1)) - print(self.__timesteps, file=sys.stderr) precip_nwp_dim = self.__precip_models.ndim if precip_nwp_dim == 2: @@ -770,7 +768,6 @@ def __zero_precipitation_forecast(self): if self.__config.return_output: for j in range(self.__config.n_ens_members): precip_forecast[j].append(precip_forecast_workers[j]) - precip_forecast_workers = None if self.__config.measure_time: @@ -783,6 +780,7 @@ def __zero_precipitation_forecast(self): for j in range(self.__config.n_ens_members) ] ) + if self.__config.measure_time: return ( precip_forecast_all_members_all_times, @@ -1406,7 +1404,7 @@ def __determine_skill_for_current_timestep(self, t): correlations_prev=self.__state.rho_extrap_cascade_prev, ) - def __determine_skill_for_next_timestep(self, t, j): + def __determine_skill_for_next_timestep(self, t, j, worker_state): # 8.1.2 Determine the skill of the nwp components for lead time (t0 + t) # Then for the model components if self.__config.blend_nwp_members: @@ -1423,8 +1421,8 @@ def __determine_skill_for_next_timestep(self, t, j): ] rho_nwp_forecast = np.stack(rho_nwp_forecast) # Concatenate rho_extrap_cascade and rho_nwp - self.__state.rho_forecast = np.concatenate( - (self.__state.rho_extrap_cascade[None, :], rho_nwp_forecast), axis=0 + worker_state.rho_forecast = np.concatenate( + (worker_state.rho_extrap_cascade[None, :], rho_nwp_forecast), axis=0 ) else: # TODO: check if j is the best accessor for this variable @@ -1432,16 +1430,16 @@ def __determine_skill_for_next_timestep(self, t, j): lt=(t * int(self.__config.timestep)), correlations=self.__params.rho_nwp_models[j], outdir_path=self.__config.outdir_path_skill, - n_model=self.__state.n_model_indices[j], + n_model=worker_state.n_model_indices[j], skill_kwargs=self.__config.climatology_kwargs, ) # Concatenate rho_extrap_cascade and rho_nwp - self.__state.rho_forecast = np.concatenate( - (self.__state.rho_extrap_cascade[None, :], rho_nwp_forecast[None, :]), + worker_state.rho_forecast = np.concatenate( + (worker_state.rho_extrap_cascade[None, :], rho_nwp_forecast[None, :]), axis=0, ) - def __determine_weights_per_component(self): + def __determine_weights_per_component(self, worker_state): # 8.2 Determine the weights per component # Weights following the bps method. These are needed for the velocity @@ -1449,14 +1447,14 @@ def __determine_weights_per_component(self): # selected, weights will be overwritten with those weights prior to # blending step. # weight = [(extr_field, n_model_fields, noise), n_cascade_levels, ...] - self.__state.weights = calculate_weights_bps(self.__state.rho_forecast) + worker_state.weights = calculate_weights_bps(worker_state.rho_forecast) # The model only weights if self.__config.weights_method == "bps": # Determine the weights of the components without the extrapolation # cascade, in case this is no data or outside the mask. - self.__state.weights_model_only = calculate_weights_bps( - self.__state.rho_forecast[1:, :] + worker_state.weights_model_only = calculate_weights_bps( + worker_state.rho_forecast[1:, :] ) elif self.__config.weights_method == "spn": # Only the weights of the components without the extrapolation @@ -1464,11 +1462,11 @@ def __determine_weights_per_component(self): # determined after the extrapolation step in this method. if ( self.__config.blend_nwp_members - and self.__state.precip_models_cascades_timestep.shape[0] > 1 + and worker_state.precip_models_cascades_timestep.shape[0] > 1 ): - self.__state.weights_model_only = np.zeros( + worker_state.weights_model_only = np.zeros( ( - self.__state.precip_models_cascades_timestep.shape[0] + 1, + worker_state.precip_models_cascades_timestep.shape[0] + 1, self.__config.n_cascade_levels, ) ) @@ -1478,11 +1476,11 @@ def __determine_weights_per_component(self): covariance_nwp_models = np.corrcoef( np.stack( [ - self.__state.precip_models_cascades_timestep[ + worker_state.precip_models_cascades_timestep[ n_model, i, :, : ].flatten() for n_model in range( - self.__state.precip_models_cascades_timestep.shape[ + worker_state.precip_models_cascades_timestep.shape[ 0 ] ) @@ -1490,14 +1488,14 @@ def __determine_weights_per_component(self): ) ) # Determine the weights for this cascade level - self.__state.weights_model_only[:, i] = calculate_weights_spn( - correlations=self.__state.rho_forecast[1:, i], + worker_state.weights_model_only[:, i] = calculate_weights_spn( + correlations=worker_state.rho_forecast[1:, i], covariance=covariance_nwp_models, ) else: # Same as correlation and noise is 1 - correlation - self.__state.weights_model_only = calculate_weights_bps( - self.__state.rho_forecast[1:, :] + worker_state.weights_model_only = calculate_weights_bps( + worker_state.rho_forecast[1:, :] ) else: raise ValueError( @@ -1505,7 +1503,7 @@ def __determine_weights_per_component(self): % self.__config.weights_method ) - def __regress_extrapolation_and_noise_cascades(self, j): + def __regress_extrapolation_and_noise_cascades(self, j, worker_state): # 8.3 Determine the noise cascade and regress this to the subsequent # time step + regress the extrapolation component to the subsequent # time step @@ -1516,8 +1514,8 @@ def __regress_extrapolation_and_noise_cascades(self, j): # generate noise field epsilon = self.__params.noise_generator( self.__params.perturbation_generator, - randstate=self.__state.randgen_precip[j], - fft_method=self.__state.fft_objs[j], + randstate=worker_state.randgen_precip[j], + fft_method=worker_state.fft_objs[j], domain=self.__config.domain, ) @@ -1525,7 +1523,7 @@ def __regress_extrapolation_and_noise_cascades(self, j): epsilon_decomposed = self.__params.decomposition_method( epsilon, self.__params.bandpass_filter, - fft_method=self.__state.fft_objs[j], + fft_method=worker_state.fft_objs[j], input_domain=self.__config.domain, output_domain=self.__config.domain, compute_stats=True, @@ -1544,18 +1542,18 @@ def __regress_extrapolation_and_noise_cascades(self, j): epsilon_decomposed is not None or self.__config.velocity_perturbation_method is not None ): - self.__state.precip_cascades[j][i] = autoregression.iterate_ar_model( - self.__state.precip_cascades[j][i], self.__params.PHI[i, :] + worker_state.precip_cascades[j][i] = autoregression.iterate_ar_model( + worker_state.precip_cascades[j][i], self.__params.PHI[i, :] ) # Renormalize the cascade - self.__state.precip_cascades[j][i][1] /= np.std( - self.__state.precip_cascades[j][i][1] + worker_state.precip_cascades[j][i][1] /= np.std( + worker_state.precip_cascades[j][i][1] ) else: # use the deterministic AR(p) model computed above if # perturbations are disabled - self.__state.precip_cascades[j][i] = ( - self.__state.precip_forecast_non_perturbed[i] + worker_state.precip_cascades[j][i] = ( + worker_state.precip_forecast_non_perturbed[i] ) # 8.3.3 regress the noise component to the subsequent time step @@ -1569,8 +1567,8 @@ def __regress_extrapolation_and_noise_cascades(self, j): epsilon_temp = None # apply AR(p) process to noise cascade level # (Returns zero noise if epsilon_decomposed is None) - self.__state.precip_noise_cascades[j][i] = autoregression.iterate_ar_model( - self.__state.precip_noise_cascades[j][i], + worker_state.precip_noise_cascades[j][i] = autoregression.iterate_ar_model( + worker_state.precip_noise_cascades[j][i], self.__params.PHI[i, :], eps=epsilon_temp, ) @@ -1579,7 +1577,7 @@ def __regress_extrapolation_and_noise_cascades(self, j): epsilon_temp = None def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( - self, t, j + self, t, j, worker_state ): # 8.4 Perturb and blend the advection fields + advect the # extrapolation and noise cascade to the current time step @@ -1591,37 +1589,37 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( extrap_kwargs_pb = self.__config.extrapolation_kwargs.copy() velocity_perturbations_extrapolation = self.__velocity # The following should be accesseble after this function - self.__state.precip_forecast_extrapolated_decomp_done = [] - self.__state.noise_extrapolated_decomp_done = [] - self.__state.precip_forecast_extrapolated_probability_matching = [] + worker_state.precip_forecast_extrapolated_decomp_done = [] + worker_state.noise_extrapolated_decomp_done = [] + worker_state.precip_forecast_extrapolated_probability_matching = [] # Extrapolate per sub time step - for t_sub in self.__state.subtimesteps: + for t_sub in worker_state.subtimesteps: if t_sub > 0: t_diff_prev_subtimestep_int = t_sub - int(t_sub) if t_diff_prev_subtimestep_int > 0.0: precip_forecast_cascade_subtimestep = [ (1.0 - t_diff_prev_subtimestep_int) - * self.__state.precip_forecast_prev_subtimestep[j][i][-1, :] + * worker_state.precip_forecast_prev_subtimestep[j][i][-1, :] + t_diff_prev_subtimestep_int - * self.__state.precip_cascades[j][i][-1, :] + * worker_state.precip_cascades[j][i][-1, :] for i in range(self.__config.n_cascade_levels) ] noise_cascade_subtimestep = [ (1.0 - t_diff_prev_subtimestep_int) - * self.__state.noise_prev_subtimestep[j][i][-1, :] + * worker_state.noise_prev_subtimestep[j][i][-1, :] + t_diff_prev_subtimestep_int - * self.__state.precip_noise_cascades[j][i][-1, :] + * worker_state.precip_noise_cascades[j][i][-1, :] for i in range(self.__config.n_cascade_levels) ] else: precip_forecast_cascade_subtimestep = [ - self.__state.precip_forecast_prev_subtimestep[j][i][-1, :] + worker_state.precip_forecast_prev_subtimestep[j][i][-1, :] for i in range(self.__config.n_cascade_levels) ] noise_cascade_subtimestep = [ - self.__state.noise_prev_subtimestep[j][i][-1, :] + worker_state.noise_prev_subtimestep[j][i][-1, :] for i in range(self.__config.n_cascade_levels) ] @@ -1630,8 +1628,8 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( ) noise_cascade_subtimestep = np.stack(noise_cascade_subtimestep) - t_diff_prev_subtimestep = t_sub - self.__state.t_prev_timestep[j] - self.__state.t_leadtime_since_start_forecast[ + t_diff_prev_subtimestep = t_sub - worker_state.t_prev_timestep[j] + worker_state.t_leadtime_since_start_forecast[ j ] += t_diff_prev_subtimestep @@ -1644,7 +1642,7 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( self.__velocity + self.__params.generate_velocity_noise( self.__params.velocity_perturbations[j], - self.__state.t_leadtime_since_start_forecast[j] + worker_state.t_leadtime_since_start_forecast[j] * self.__config.timestep, ) ) @@ -1654,12 +1652,12 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( velocity_stack_all = np.concatenate( ( velocity_perturbations_extrapolation[None, :, :, :], - self.__state.velocity_models_timestep, + worker_state.velocity_models_timestep, ), axis=0, ) else: - velocity_models = self.__state.velocity_models_timestep[j] + velocity_models = worker_state.velocity_models_timestep[j] velocity_stack_all = np.concatenate( ( velocity_perturbations_extrapolation[None, :, :, :], @@ -1673,7 +1671,7 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( # second cascade following eq. 24 in BPS2006 velocity_blended = blending.utils.blend_optical_flows( flows=velocity_stack_all, - weights=self.__state.weights[ + weights=worker_state.weights[ :-1, 1 ], # [(extr_field, n_model_fields), cascade_level=2] ) @@ -1685,8 +1683,8 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( # A. Radar Rain precip_forecast_recomp_subtimestep = blending.utils.recompose_cascade( combined_cascade=precip_forecast_cascade_subtimestep, - combined_mean=self.__state.mean_extrapolation, - combined_sigma=self.__state.std_extrapolation, + combined_mean=worker_state.mean_extrapolation, + combined_sigma=worker_state.std_extrapolation, ) # Make sure we have values outside the mask if self.__params.zero_precip_radar: @@ -1702,11 +1700,11 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( # TODO: problem with the config here! This variable changes over time... # extrap_kwargs is in config but by adding info to it, the next run of a blended forecast will have issues! self.__config.extrapolation_kwargs["displacement_prev"] = ( - self.__state.previous_displacement[j] + worker_state.previous_displacement[j] ) ( precip_forecast_extrapolated_recomp_subtimestep_temp, - self.__state.previous_displacement[j], + worker_state.previous_displacement[j], ) = self.__params.extrapolation_method( precip_forecast_recomp_subtimestep, velocity_blended, @@ -1750,16 +1748,16 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( # B. Noise noise_cascade_subtimestep_recomp = blending.utils.recompose_cascade( combined_cascade=noise_cascade_subtimestep, - combined_mean=self.__state.precip_mean_noise[j], - combined_sigma=self.__state.precip_std_noise[j], + combined_mean=worker_state.precip_mean_noise[j], + combined_sigma=worker_state.precip_std_noise[j], ) extrap_kwargs_noise["displacement_prev"] = ( - self.__state.previous_displacement_noise_cascade[j] + worker_state.previous_displacement_noise_cascade[j] ) extrap_kwargs_noise["map_coordinates_mode"] = "wrap" ( noise_extrapolated_recomp_temp, - self.__state.previous_displacement_noise_cascade[j], + worker_state.previous_displacement_noise_cascade[j], ) = self.__params.extrapolation_method( noise_cascade_subtimestep_recomp, velocity_blended, @@ -1782,10 +1780,10 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( noise_extrapolated_decomp[i] *= self.__params.noise_std_coeffs[i] # Append the results to the output lists - self.__state.precip_forecast_extrapolated_decomp_done.append( + worker_state.precip_forecast_extrapolated_decomp_done.append( precip_forecast_extrapolated_decomp.copy() ) - self.__state.noise_extrapolated_decomp_done.append( + worker_state.noise_extrapolated_decomp_done.append( noise_extrapolated_decomp.copy() ) precip_forecast_cascade_subtimestep = None @@ -1804,7 +1802,7 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( # of the (NWP) model(s) for Lagrangian blended prob. matching # min_R = np.min(precip) extrap_kwargs_pb["displacement_prev"] = ( - self.__state.previous_displacement_prob_matching[j] + worker_state.previous_displacement_prob_matching[j] ) # Apply the domain mask to the extrapolation component precip_forecast_temp_for_probability_matching = self.__precip.copy() @@ -1813,7 +1811,7 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( ] = np.nan ( precip_forecast_extrapolated_probability_matching_temp, - self.__state.previous_displacement_prob_matching[j], + worker_state.previous_displacement_prob_matching[j], ) = self.__params.extrapolation_method( precip_forecast_temp_for_probability_matching, velocity_blended, @@ -1821,28 +1819,28 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( allow_nonfinite_values=True, **extrap_kwargs_pb, ) - self.__state.precip_forecast_extrapolated_probability_matching.append( + worker_state.precip_forecast_extrapolated_probability_matching.append( precip_forecast_extrapolated_probability_matching_temp[0] ) - self.__state.t_prev_timestep[j] = t_sub + worker_state.t_prev_timestep[j] = t_sub - if len(self.__state.precip_forecast_extrapolated_decomp_done) > 0: - self.__state.precip_forecast_extrapolated_decomp_done = np.stack( - self.__state.precip_forecast_extrapolated_decomp_done + if len(worker_state.precip_forecast_extrapolated_decomp_done) > 0: + worker_state.precip_forecast_extrapolated_decomp_done = np.stack( + worker_state.precip_forecast_extrapolated_decomp_done ) - self.__state.noise_extrapolated_decomp_done = np.stack( - self.__state.noise_extrapolated_decomp_done + worker_state.noise_extrapolated_decomp_done = np.stack( + worker_state.noise_extrapolated_decomp_done ) - self.__state.precip_forecast_extrapolated_probability_matching = np.stack( - self.__state.precip_forecast_extrapolated_probability_matching + worker_state.precip_forecast_extrapolated_probability_matching = np.stack( + worker_state.precip_forecast_extrapolated_probability_matching ) # advect the forecast field by one time step if no subtimesteps in the # current interval were found - if not self.__state.subtimesteps: - t_diff_prev_subtimestep = t + 1 - self.__state.t_prev_timestep[j] - self.__state.t_leadtime_since_start_forecast[j] += t_diff_prev_subtimestep + if not worker_state.subtimesteps: + t_diff_prev_subtimestep = t + 1 - worker_state.t_prev_timestep[j] + worker_state.t_leadtime_since_start_forecast[j] += t_diff_prev_subtimestep # compute the perturbed motion field - include the NWP # velocities and the weights @@ -1851,7 +1849,7 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( self.__velocity + self.__params.generate_velocity_noise( self.__params.velocity_perturbations[j], - self.__state.t_leadtime_since_start_forecast[j] + worker_state.t_leadtime_since_start_forecast[j] * self.__config.timestep, ) ) @@ -1861,12 +1859,12 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( velocity_stack_all = np.concatenate( ( velocity_perturbations_extrapolation[None, :, :, :], - self.__state.velocity_models_timestep, + worker_state.velocity_models_timestep, ), axis=0, ) else: - velocity_models = self.__state.velocity_models_timestep[j] + velocity_models = worker_state.velocity_models_timestep[j] velocity_stack_all = np.concatenate( ( velocity_perturbations_extrapolation[None, :, :, :], @@ -1880,20 +1878,20 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( # second cascade following eq. 24 in BPS2006 velocity_blended = blending.utils.blend_optical_flows( flows=velocity_stack_all, - weights=self.__state.weights[ + weights=worker_state.weights[ :-1, 1 ], # [(extr_field, n_model_fields), cascade_level=2] ) # Extrapolate the extrapolation and noise cascade - extrap_kwargs_["displacement_prev"] = self.__state.previous_displacement[j] + extrap_kwargs_["displacement_prev"] = worker_state.previous_displacement[j] extrap_kwargs_noise["displacement_prev"] = ( - self.__state.previous_displacement_noise_cascade[j] + worker_state.previous_displacement_noise_cascade[j] ) extrap_kwargs_noise["map_coordinates_mode"] = "wrap" - _, self.__state.previous_displacement[j] = ( + _, worker_state.previous_displacement[j] = ( self.__params.extrapolation_method( None, velocity_blended, @@ -1903,7 +1901,7 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( ) ) - _, self.__state.previous_displacement_noise_cascade[j] = ( + _, worker_state.previous_displacement_noise_cascade[j] = ( self.__params.extrapolation_method( None, velocity_blended, @@ -1916,9 +1914,9 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( # Also extrapolate the radar observation, used for the probability # matching and post-processing steps extrap_kwargs_pb["displacement_prev"] = ( - self.__state.previous_displacement_prob_matching[j] + worker_state.previous_displacement_prob_matching[j] ) - _, self.__state.previous_displacement_prob_matching[j] = ( + _, worker_state.previous_displacement_prob_matching[j] = ( self.__params.extrapolation_method( None, velocity_blended, @@ -1928,15 +1926,15 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( ) ) - self.__state.t_prev_timestep[j] = t + 1 + worker_state.t_prev_timestep[j] = t + 1 - self.__state.precip_forecast_prev_subtimestep[j] = self.__state.precip_cascades[ + worker_state.precip_forecast_prev_subtimestep[j] = worker_state.precip_cascades[ j ] - self.__state.noise_prev_subtimestep[j] = self.__state.precip_noise_cascades[j] + worker_state.noise_prev_subtimestep[j] = worker_state.precip_noise_cascades[j] - def __blend_cascades(self, t_sub, j): - self.__state.t_index = np.where(np.array(self.__state.subtimesteps) == t_sub)[ + def __blend_cascades(self, t_sub, j, worker_state): + worker_state.t_index = np.where(np.array(worker_state.subtimesteps) == t_sub)[ 0 ][0] # First concatenate the cascades and the means and sigmas @@ -1944,54 +1942,54 @@ def __blend_cascades(self, t_sub, j): if self.__config.blend_nwp_members: cascade_stack_all_components = np.concatenate( ( - self.__state.precip_forecast_extrapolated_decomp_done[ - None, self.__state.t_index + worker_state.precip_forecast_extrapolated_decomp_done[ + None, worker_state.t_index ], - self.__state.precip_models_cascades_timestep, - self.__state.noise_extrapolated_decomp_done[ - None, self.__state.t_index + worker_state.precip_models_cascades_timestep, + worker_state.noise_extrapolated_decomp_done[ + None, worker_state.t_index ], ), axis=0, ) # [(extr_field, n_model_fields, noise), n_cascade_levels, ...] means_stacked = np.concatenate( ( - self.__state.mean_extrapolation[None, :], - self.__state.mean_models_timestep, + worker_state.mean_extrapolation[None, :], + worker_state.mean_models_timestep, ), axis=0, ) sigmas_stacked = np.concatenate( ( - self.__state.std_extrapolation[None, :], - self.__state.std_models_timestep, + worker_state.std_extrapolation[None, :], + worker_state.std_models_timestep, ), axis=0, ) else: cascade_stack_all_components = np.concatenate( ( - self.__state.precip_forecast_extrapolated_decomp_done[ - None, self.__state.t_index + worker_state.precip_forecast_extrapolated_decomp_done[ + None, worker_state.t_index ], - self.__state.precip_models_cascades_timestep[None, j], - self.__state.noise_extrapolated_decomp_done[ - None, self.__state.t_index + worker_state.precip_models_cascades_timestep[None, j], + worker_state.noise_extrapolated_decomp_done[ + None, worker_state.t_index ], ), axis=0, ) # [(extr_field, n_model_fields, noise), n_cascade_levels, ...] means_stacked = np.concatenate( ( - self.__state.mean_extrapolation[None, :], - self.__state.mean_models_timestep[None, j], + worker_state.mean_extrapolation[None, :], + worker_state.mean_models_timestep[None, j], ), axis=0, ) sigmas_stacked = np.concatenate( ( - self.__state.std_extrapolation[None, :], - self.__state.std_models_timestep[None, j], + worker_state.std_extrapolation[None, :], + worker_state.std_models_timestep[None, j], ), axis=0, ) @@ -2002,9 +2000,9 @@ def __blend_cascades(self, t_sub, j): # method is given? Or does this mean that in all other circumstances the weights # have been calculated in a different way? - # TODO: changed weights to self.__state.weights + # TODO: changed weights to worker_state.weights if self.__config.weights_method == "spn": - self.__state.weights = np.zeros( + worker_state.weights = np.zeros( ( cascade_stack_all_components.shape[0], self.__config.n_cascade_levels, @@ -2023,68 +2021,70 @@ def __blend_cascades(self, t_sub, j): np.ma.masked_invalid(cascade_stack_all_components_temp) ) # Determine the weights for this cascade level - self.__state.weights[:, i] = calculate_weights_spn( - correlations=self.__state.rho_forecast[:, i], + worker_state.weights[:, i] = calculate_weights_spn( + correlations=worker_state.rho_forecast[:, i], covariance=covariance_nwp_models, ) # Blend the extrapolation, (NWP) model(s) and noise cascades - self.__state.precip_forecast_blended = blending.utils.blend_cascades( - cascades_norm=cascade_stack_all_components, weights=self.__state.weights + worker_state.precip_forecast_blended = blending.utils.blend_cascades( + cascades_norm=cascade_stack_all_components, weights=worker_state.weights ) # Also blend the cascade without the extrapolation component - self.__state.precip_forecast_blended_mod_only = blending.utils.blend_cascades( + worker_state.precip_forecast_blended_mod_only = blending.utils.blend_cascades( cascades_norm=cascade_stack_all_components[1:, :], - weights=self.__state.weights_model_only, + weights=worker_state.weights_model_only, ) # Blend the means and standard deviations # Input is array of shape [number_components, scale_level, ...] - self.__state.means_blended, self.__state.sigmas_blended = blend_means_sigmas( - means=means_stacked, sigmas=sigmas_stacked, weights=self.__state.weights + worker_state.means_blended, worker_state.sigmas_blended = blend_means_sigmas( + means=means_stacked, sigmas=sigmas_stacked, weights=worker_state.weights ) # Also blend the means and sigmas for the cascade without extrapolation ( - self.__state.means_blended_mod_only, - self.__state.sigmas_blended_mod_only, + worker_state.means_blended_mod_only, + worker_state.sigmas_blended_mod_only, ) = blend_means_sigmas( means=means_stacked[1:, :], sigmas=sigmas_stacked[1:, :], - weights=self.__state.weights_model_only, + weights=worker_state.weights_model_only, ) - def __recompose_cascade_to_rainfall_field(self, j): + def __recompose_cascade_to_rainfall_field(self, j, worker_state): # 8.6 Recompose the cascade to a precipitation field # (The function first normalizes the blended cascade, precip_forecast_blended # again) - self.__state.precip_forecast_recomposed = blending.utils.recompose_cascade( - combined_cascade=self.__state.precip_forecast_blended, - combined_mean=self.__state.means_blended, - combined_sigma=self.__state.sigmas_blended, + worker_state.precip_forecast_recomposed = blending.utils.recompose_cascade( + combined_cascade=worker_state.precip_forecast_blended, + combined_mean=worker_state.means_blended, + combined_sigma=worker_state.sigmas_blended, ) # The recomposed cascade without the extrapolation (for NaN filling # outside the radar domain) - self.__state.precip_forecast_recomposed_mod_only = ( + worker_state.precip_forecast_recomposed_mod_only = ( blending.utils.recompose_cascade( - combined_cascade=self.__state.precip_forecast_blended_mod_only, - combined_mean=self.__state.means_blended_mod_only, - combined_sigma=self.__state.sigmas_blended_mod_only, + combined_cascade=worker_state.precip_forecast_blended_mod_only, + combined_mean=worker_state.means_blended_mod_only, + combined_sigma=worker_state.sigmas_blended_mod_only, ) ) if self.__config.domain == "spectral": # TODO: Check this! (Only tested with domain == 'spatial') # TODO: what needs to happen with above TODO? - self.__state.precip_forecast_recomposed = self.__state.fft_objs[j].irfft2( - self.__state.precip_forecast_recomposed + worker_state.precip_forecast_recomposed = worker_state.fft_objs[j].irfft2( + worker_state.precip_forecast_recomposed ) - self.__state.precip_forecast_recomposed_mod_only = self.__state.fft_objs[ + worker_state.precip_forecast_recomposed_mod_only = worker_state.fft_objs[ j - ].irfft2(self.__state.precip_forecast_recomposed_mod_only) + ].irfft2(worker_state.precip_forecast_recomposed_mod_only) - def __post_process_output(self, j, final_blended_forecast_single_member): + def __post_process_output( + self, j, final_blended_forecast_single_member, worker_state + ): # 8.7 Post-processing steps - use the mask and fill no data with # the blended NWP forecast. Probability matching following # Lagrangian blended probability matching which uses the @@ -2095,14 +2095,14 @@ def __post_process_output(self, j, final_blended_forecast_single_member): # that is only used for post-processing steps) with the NWP # rainfall forecast for this time step using the weights # at scale level 2. - weights_probability_matching = self.__state.weights[ + weights_probability_matching = worker_state.weights[ :-1, 1 ] # Weights without noise, level 2 weights_probability_matching_normalized = weights_probability_matching / np.sum( weights_probability_matching ) # And the weights for outside the radar domain - weights_probability_matching_mod_only = self.__state.weights_model_only[ + weights_probability_matching_mod_only = worker_state.weights_model_only[ :-1, 1 ] # Weights without noise, level 2 weights_probability_matching_normalized_mod_only = ( @@ -2113,20 +2113,20 @@ def __post_process_output(self, j, final_blended_forecast_single_member): if self.__config.blend_nwp_members: precip_forecast_probability_matching_final = np.concatenate( ( - self.__state.precip_forecast_extrapolated_probability_matching[ - None, self.__state.t_index + worker_state.precip_forecast_extrapolated_probability_matching[ + None, worker_state.t_index ], - self.__state.precip_models_timestep, + worker_state.precip_models_timestep, ), axis=0, ) else: precip_forecast_probability_matching_final = np.concatenate( ( - self.__state.precip_forecast_extrapolated_probability_matching[ - None, self.__state.t_index + worker_state.precip_forecast_extrapolated_probability_matching[ + None, worker_state.t_index ], - self.__state.precip_models_timestep[None, j], + worker_state.precip_models_timestep[None, j], ), axis=0, ) @@ -2145,12 +2145,12 @@ def __post_process_output(self, j, final_blended_forecast_single_member): 1, 1, ) - * self.__state.precip_models_timestep, + * worker_state.precip_models_timestep, axis=0, ) else: precip_forecast_probability_matching_blended_mod_only = ( - self.__state.precip_models_timestep[j] + worker_state.precip_models_timestep[j] ) # The extrapolation components are NaN outside the advected @@ -2159,7 +2159,7 @@ def __post_process_output(self, j, final_blended_forecast_single_member): # areas with the "..._mod_only" blended forecasts, consisting # of the NWP and noise components. - nan_indices = np.isnan(self.__state.precip_forecast_recomposed) + nan_indices = np.isnan(worker_state.precip_forecast_recomposed) if self.__config.smooth_radar_mask_range != 0: # Compute the smooth dilated mask new_mask = blending.utils.compute_smooth_dilated_mask( @@ -2173,10 +2173,10 @@ def __post_process_output(self, j, final_blended_forecast_single_member): # Handle NaNs in precip_forecast_new and precip_forecast_new_mod_only by setting NaNs to 0 in the blending step precip_forecast_recomposed_mod_only_no_nan = np.nan_to_num( - self.__state.precip_forecast_recomposed_mod_only, nan=0 + worker_state.precip_forecast_recomposed_mod_only, nan=0 ) precip_forecast_recomposed_no_nan = np.nan_to_num( - self.__state.precip_forecast_recomposed, nan=0 + worker_state.precip_forecast_recomposed, nan=0 ) # Perform the blending of radar and model inside the radar domain using a weighted combination @@ -2196,8 +2196,8 @@ def __post_process_output(self, j, final_blended_forecast_single_member): axis=0, ) else: - self.__state.precip_forecast_recomposed[nan_indices] = ( - self.__state.precip_forecast_recomposed_mod_only[nan_indices] + worker_state.precip_forecast_recomposed[nan_indices] = ( + worker_state.precip_forecast_recomposed_mod_only[nan_indices] ) nan_indices = np.isnan(precip_forecast_probability_matching_blended) precip_forecast_probability_matching_blended[nan_indices] = ( @@ -2206,9 +2206,9 @@ def __post_process_output(self, j, final_blended_forecast_single_member): # Finally, fill the remaining nan values, if present, with # the minimum value in the forecast - nan_indices = np.isnan(self.__state.precip_forecast_recomposed) - self.__state.precip_forecast_recomposed[nan_indices] = np.nanmin( - self.__state.precip_forecast_recomposed + nan_indices = np.isnan(worker_state.precip_forecast_recomposed) + worker_state.precip_forecast_recomposed[nan_indices] = np.nanmin( + worker_state.precip_forecast_recomposed ) nan_indices = np.isnan(precip_forecast_probability_matching_blended) precip_forecast_probability_matching_blended[nan_indices] = np.nanmin( @@ -2221,7 +2221,7 @@ def __post_process_output(self, j, final_blended_forecast_single_member): # apply the precipitation mask to prevent generation of new # precipitation into areas where it was not originally # observed - precip_forecast_min_value = self.__state.precip_forecast_recomposed.min() + precip_forecast_min_value = worker_state.precip_forecast_recomposed.min() if self.__config.mask_method == "incremental": # The incremental mask is slightly different from # the implementation in the non-blended steps.py, as @@ -2238,16 +2238,16 @@ def __post_process_output(self, j, final_blended_forecast_single_member): precip_field_mask, self.__params.struct, self.__params.mask_rim ) # Get the final mask - self.__state.precip_forecast_recomposed = ( + worker_state.precip_forecast_recomposed = ( precip_forecast_min_value + ( - self.__state.precip_forecast_recomposed + worker_state.precip_forecast_recomposed - precip_forecast_min_value ) * precip_field_mask ) precip_field_mask_temp = ( - self.__state.precip_forecast_recomposed > precip_forecast_min_value + worker_state.precip_forecast_recomposed > precip_forecast_min_value ) elif self.__config.mask_method == "obs": # The mask equals the most recent benchmark @@ -2258,7 +2258,7 @@ def __post_process_output(self, j, final_blended_forecast_single_member): ) # Set to min value outside of mask - self.__state.precip_forecast_recomposed[~precip_field_mask_temp] = ( + worker_state.precip_forecast_recomposed[~precip_field_mask_temp] = ( precip_forecast_min_value ) @@ -2269,10 +2269,10 @@ def __post_process_output(self, j, final_blended_forecast_single_member): self.__config.probmatching_method is not None and self.__config.resample_distribution ): - arr1 = self.__state.precip_forecast_extrapolated_probability_matching[ - self.__state.t_index + arr1 = worker_state.precip_forecast_extrapolated_probability_matching[ + worker_state.t_index ] - arr2 = self.__state.precip_models_timestep[j] + arr2 = worker_state.precip_models_timestep[j] # resample weights based on cascade level 2. # Areas where one of the fields is nan are not included. precip_forecast_probability_matching_resampled = ( @@ -2290,17 +2290,17 @@ def __post_process_output(self, j, final_blended_forecast_single_member): if self.__config.probmatching_method == "cdf": # nan indices in the extrapolation nowcast nan_indices = np.isnan( - self.__state.precip_forecast_extrapolated_probability_matching[ - self.__state.t_index + worker_state.precip_forecast_extrapolated_probability_matching[ + worker_state.t_index ] ) # Adjust the CDF of the forecast to match the resampled distribution combined from # extrapolation and model fields. # Rainfall outside the pure extrapolation domain is not taken into account. - if np.any(np.isfinite(self.__state.precip_forecast_recomposed)): - self.__state.precip_forecast_recomposed = ( + if np.any(np.isfinite(worker_state.precip_forecast_recomposed)): + worker_state.precip_forecast_recomposed = ( probmatching.nonparam_match_empirical_cdf( - self.__state.precip_forecast_recomposed, + worker_state.precip_forecast_recomposed, precip_forecast_probability_matching_resampled, nan_indices, ) @@ -2315,21 +2315,21 @@ def __post_process_output(self, j, final_blended_forecast_single_member): ] ) no_rain_mask = ( - self.__state.precip_forecast_recomposed + worker_state.precip_forecast_recomposed >= self.__config.precip_threshold ) mean_precip_forecast = np.mean( - self.__state.precip_forecast_recomposed[no_rain_mask] + worker_state.precip_forecast_recomposed[no_rain_mask] ) - self.__state.precip_forecast_recomposed[no_rain_mask] = ( - self.__state.precip_forecast_recomposed[no_rain_mask] + worker_state.precip_forecast_recomposed[no_rain_mask] = ( + worker_state.precip_forecast_recomposed[no_rain_mask] - mean_precip_forecast + mean_probabiltity_matching_forecast ) precip_forecast_probability_matching_resampled = None final_blended_forecast_single_member.append( - self.__state.precip_forecast_recomposed + worker_state.precip_forecast_recomposed ) return final_blended_forecast_single_member @@ -2816,6 +2816,7 @@ def forecast( ) forecast_steps_nowcast = blended_nowcaster.compute_forecast() + print(forecast_steps_nowcast) blended_nowcaster.reset_states_and_params() # Call the appropriate methods within the class return forecast_steps_nowcast diff --git a/pysteps/tests/test_blending_steps.py b/pysteps/tests/test_blending_steps.py index 18a4e90a4..5840279fc 100644 --- a/pysteps/tests/test_blending_steps.py +++ b/pysteps/tests/test_blending_steps.py @@ -9,6 +9,7 @@ from pysteps import blending, cascade steps_arg_values = [ + # Test the case where both the radar image and the NWP fields contain no rain. (1, 3, 4, 8, None, None, False, "spn", True, 4, False, False, 0, False), (1, 3, 4, 8, "obs", None, False, "spn", True, 4, False, False, 0, False), (1, 3, 4, 8, "incremental", None, False, "spn", True, 4, False, False, 0, False), @@ -35,7 +36,6 @@ # Test the case where the NWP fields contain no rain. (1, 3, 6, 8, None, None, False, "spn", True, 6, False, True, 0, False), (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, False, True, 0, True), - # Test the case where both the radar image and the NWP fields contain no rain. (1, 3, 6, 8, None, None, False, "spn", True, 6, True, True, 0, False), (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, True, 0, False), (5, 3, 5, 6, "obs", "mean", True, "spn", True, 5, True, True, 0, False), From 5ff1713900495e58bcff41536cf7632df09ddf8c Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Thu, 5 Dec 2024 18:37:48 +0100 Subject: [PATCH 46/65] Updated gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index c865918f5..136c1e46e 100644 --- a/.gitignore +++ b/.gitignore @@ -94,4 +94,4 @@ venv.bak/ # Running lcoal tests /tmp -./pysteps/tests/tmp/ +/pysteps/tests/tmp/ From d999501c7d5269a322055b21f2294aafdfaa6764 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Fri, 6 Dec 2024 16:05:30 +0100 Subject: [PATCH 47/65] Cleanup of params and state dataclasses, next step: better typing --- pysteps/blending/steps.py | 426 +++++++++++++++++++++----------------- 1 file changed, 235 insertions(+), 191 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index fd1101872..7fc42aebc 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -67,6 +67,10 @@ from dataclasses import dataclass, field from typing import Optional, List, Dict, Any, Callable +# TODO: compare old and new version of the code +# TODO: look for better typing in state and params +# TODO: GO over all other todos and check if they can be removed + @dataclass class StepsBlendingConfig: @@ -118,6 +122,9 @@ class StepsBlendingParams: extrapolation_method: Any = None decomposition_method: Any = None recomposition_method: Any = None + # TODO: check of the following two are relevant or can be replaced vel_pert_... and noise_generator + velocity_perturbations: Any = None + generate_velocity_noise: Any = None velocity_perturbations_parallel: Optional[np.ndarray] = ( None # Velocity perturbation parameters (parallel) ) @@ -129,9 +136,6 @@ class StepsBlendingParams: ) # FFT objects for ensemble members mask_rim: Optional[int] = None # Rim size for masking struct: Optional[np.ndarray] = None # Structuring element for mask - noise_method: Optional[str] = None # Noise method used - ar_order: int = 2 # Order of the AR model - seed: Optional[int] = None # Random seed for reproducibility time_steps_is_list: bool = False # Time steps is a list precip_models_provided_is_cascade: bool = False # Precip models are decomposed xy_coordinates: np.ndarray | None = None @@ -143,58 +147,75 @@ class StepsBlendingParams: num_ensemble_workers: int = None rho_nwp_models: Any = None domain_mask: Any = None - velocity_perturbations: Any = None - generate_velocity_noise: Any = None # TODO: typing could be improved here @dataclass class StepsBlendingState: + # States related to the observations precip_cascades: Any = None - mean_extrapolation: Any = None - std_extrapolation: Any = None - precip_models_cascades: Any = None - randgen_precip: Any = None - previous_displacement: Any = None - previous_displacement_noise_cascade: Any = None - previous_displacement_prob_matching: Any = None - precip_forecast: Any = None - precip_forecast_non_perturbed: Any = None - fft_objs: Any = None - t_prev_timestep: Any = None - t_leadtime_since_start_forecast: Any = None precip_noise_input: Any = None precip_noise_cascades: Any = None precip_mean_noise: Any = None precip_std_noise: Any = None + + # States related to the extrapolation + mean_extrapolation: Any = None + std_extrapolation: Any = None rho_extrap_cascade_prev: Any = None rho_extrap_cascade: Any = None - subtimesteps: Any = None - is_nowcast_time_step: bool = None - # Variables to save data over (sub)time steps + precip_cascades_prev_subtimestep: Any = None + cascade_noise_prev_subtimestep: Any = None + precip_extrapolated_after_decomp: Any = None + noise_extrapolated_after_decomp: Any = None + precip_extrapolated_probability_matching: Any = None + + # States related to the NWP models + precip_models_cascades: Any = None + # States related to NWP models for (sub)time steps precip_models_cascades_timestep: Any = None precip_models_timestep: Any = None mean_models_timestep: Any = None std_models_timestep: Any = None velocity_models_timestep: Any = None - n_model_indices: Optional[np.ndarray] = None # NWP model indices - rho_forecast: Any = None + + # State that links NWP member to final output ensemble member + mapping_list_NWP_member_to_ensemble_member: Optional[np.ndarray] = ( + None # NWP model indices + ) + + # States related to the random generation of precip and motion + randgen_precip: Any = None + randgen_motion: Any = None + + # Variables related to the (sub)timestep calculations of the final forecast + previous_displacement: Any = None + previous_displacement_noise_cascade: Any = None + previous_displacement_prob_matching: Any = None + rho_final_blended_forecast: Any = None + final_blended_forecast_means: Any = None + final_blended_forecast_stds: Any = None + final_blended_forecast_means_mod_only: Any = None + final_blended_forecast_stds_mod_only: Any = None + final_blended_forecast_cascades: Any = None + final_blended_forecast_cascades_mod_only: Any = None + final_blended_forecast_recomposed: Any = None + final_blended_forecast_recomposed_mod_only: Any = None + + # The return outputs and probability matching are stored in these states: + final_blended_forecast: Any = None + final_blended_forecast_non_perturbed: Any = None + + # Variables to keep track of the times for the forecast + time_prev_timestep: Any = None + leadtime_since_start_forecast: Any = None + subtimesteps: Any = None + is_nowcast_time_step: bool = None + subtimestep_index: Any = None + + # States related to weights weights: Any = None weights_model_only: Any = None - precip_forecast_extrapolated_decomp_done: Any = None - noise_extrapolated_decomp_done: Any = None - precip_forecast_extrapolated_probability_matching: Any = None - precip_forecast_prev_subtimestep: Any = None - noise_prev_subtimestep: Any = None - means_blended: Any = None - sigmas_blended: Any = None - means_blended_mod_only: Any = None - sigmas_blended_mod_only: Any = None - precip_forecast_blended: Any = None - precip_forecast_blended_mod_only: Any = None - precip_forecast_recomposed: Any = None - precip_forecast_recomposed_mod_only: Any = None - t_index: Any = None class StepsBlendingNowcaster: @@ -264,20 +285,20 @@ def compute_forecast(self): self.__blended_nowcast_main_loop() # Stack and return the forecast output if self.__config.return_output: - self.__state.precip_forecast = np.stack( + self.__state.final_blended_forecast = np.stack( [ - np.stack(self.__state.precip_forecast[j]) + np.stack(self.__state.final_blended_forecast[j]) for j in range(self.__config.n_ens_members) ] ) if self.__config.measure_time: return ( - self.__state.precip_forecast, + self.__state.final_blended_forecast, self.__init_time, self.__mainloop_time, ) else: - return self.__state.precip_forecast + return self.__state.final_blended_forecast else: return None @@ -300,15 +321,17 @@ def __blended_nowcast_main_loop(self): # extrap_kwargs is in config but by adding info to it, the next run of a blended forecast will have issues! self.__config.extrapolation_kwargs["return_displacement"] = True - self.__state.precip_forecast_prev_subtimestep = deepcopy( + self.__state.precip_cascades_prev_subtimestep = deepcopy( self.__state.precip_cascades ) - self.__state.noise_prev_subtimestep = deepcopy( + self.__state.cascade_noise_prev_subtimestep = deepcopy( self.__state.precip_noise_cascades ) - self.__state.t_prev_timestep = [0.0 for j in range(self.__config.n_ens_members)] - self.__state.t_leadtime_since_start_forecast = [ + self.__state.time_prev_timestep = [ + 0.0 for j in range(self.__config.n_ens_members) + ] + self.__state.leadtime_since_start_forecast = [ 0.0 for j in range(self.__config.n_ens_members) ] @@ -321,7 +344,7 @@ def __blended_nowcast_main_loop(self): self.__find_nowcast_NWP_combination(t) self.__determine_skill_for_current_timestep(t) # the nowcast iteration for each ensemble member - precip_ensemble_single_timestep = [ + final_blended_forecast_all_members_one_timestep = [ None for _ in range(self.__config.n_ens_members) ] @@ -347,7 +370,7 @@ def worker(j): j, final_blended_forecast_single_member, worker_state ) ) - precip_ensemble_single_timestep[j] = ( + final_blended_forecast_all_members_one_timestep[j] = ( final_blended_forecast_single_member ) @@ -373,17 +396,19 @@ def worker(j): print("done.") if self.__config.callback is not None: - precip_forecast_final = np.stack(precip_ensemble_single_timestep) + precip_forecast_final = np.stack( + final_blended_forecast_all_members_one_timestep + ) if precip_forecast_final.shape[1] > 0: self.__config.callback(precip_forecast_final.squeeze()) if self.__config.return_output: for j in range(self.__config.n_ens_members): - self.__state.precip_forecast[j].extend( - precip_ensemble_single_timestep[j] + self.__state.final_blended_forecast[j].extend( + final_blended_forecast_all_members_one_timestep[j] ) - precip_ensemble_single_timestep = None + final_blended_forecast_all_members_one_timestep = None if self.__config.measure_time: self.__mainloop_time = time.time() - starttime_mainloop @@ -721,7 +746,7 @@ def __prepare_radar_and_NWP_fields(self): self.__state.precip_models_cascades = None # TODO: This type of check needs to be changed when going to xarray - if self.__precip_models.ndim != 4: + if self.__params.precip_models_provided_is_cascade: self.__state.precip_models_cascades = self.__precip_models self.__precip_models = _compute_cascade_recomposition_nwp( self.__precip_models, self.__params.recomposition_method @@ -1020,16 +1045,15 @@ def __multiply_precip_cascade_to_match_ensemble_members(self): def __initialize_random_generators(self): # 6. Initialize all the random generators and prepare for the forecast loop """Initialize all the random generators.""" - # TODO: randgen_motion and randgen_precip are not defined if no noise method is given? Should we end the program in that case? if self.__config.noise_method is not None: self.__state.randgen_precip = [] - randgen_motion = [] + self.__state.randgen_motion = [] for j in range(self.__config.n_ens_members): rs = np.random.RandomState(self.__config.seed) self.__state.randgen_precip.append(rs) seed = rs.randint(0, high=1e9) rs = np.random.RandomState(seed) - randgen_motion.append(rs) + self.__state.randgen_motion.append(rs) seed = rs.randint(0, high=1e9) if self.__config.velocity_perturbation_method is not None: @@ -1042,7 +1066,7 @@ def __initialize_random_generators(self): self.__state.velocity_perturbations = [] for j in range(self.__config.n_ens_members): kwargs = { - "randstate": randgen_motion[j], + "randstate": self.__state.randgen_motion[j], "p_par": self.__params.velocity_perturbations_parallel, "p_perp": self.__params.velocity_perturbations_perpendicular, } @@ -1071,7 +1095,9 @@ def __prepare_forecast_loop(self): self.__state.previous_displacement_prob_matching = np.stack( [None for j in range(self.__config.n_ens_members)] ) - self.__state.precip_forecast = [[] for j in range(self.__config.n_ens_members)] + self.__state.final_blended_forecast = [ + [] for j in range(self.__config.n_ens_members) + ] if self.__config.mask_method == "incremental": # get mask parameters @@ -1086,16 +1112,16 @@ def __prepare_forecast_loop(self): self.__params.mask_rim, self.__params.struct = None, None if self.__config.noise_method is None: - self.__state.precip_forecast_non_perturbed = [ + self.__state.final_blended_forecast_non_perturbed = [ self.__state.precip_cascades[0][i].copy() for i in range(self.__config.n_cascade_levels) ] else: - self.__state.precip_forecast_non_perturbed = None + self.__state.final_blended_forecast_non_perturbed = None - self.__state.fft_objs = [] + self.__params.fft_objs = [] for i in range(self.__config.n_ens_members): - self.__state.fft_objs.append( + self.__params.fft_objs.append( utils.get_method( self.__config.fft_method, shape=self.__state.precip_cascades.shape[-2:], @@ -1130,13 +1156,13 @@ def __initialize_noise_cascades(self): epsilon = self.__params.noise_generator( self.__params.perturbation_generator, randstate=self.__state.randgen_precip[j], - fft_method=self.__state.fft_objs[j], + fft_method=self.__params.fft_objs[j], domain=self.__config.domain, ) epsilon_decomposed = self.__params.decomposition_method( epsilon, self.__params.bandpass_filter, - fft_method=self.__state.fft_objs[j], + fft_method=self.__params.fft_objs[j], input_domain=self.__config.domain, output_domain=self.__config.domain, compute_stats=True, @@ -1266,7 +1292,7 @@ def __find_nowcast_NWP_combination(self, t): # Check if NWP models/members should be used individually, or if all of # them are blended together per nowcast ensemble member. if self.__config.blend_nwp_members: - self.__state.n_model_indices = None + self.__state.mapping_list_NWP_member_to_ensemble_member = None else: # Start with determining the maximum and mimimum number of members/models @@ -1277,9 +1303,11 @@ def __find_nowcast_NWP_combination(self, t): # for indexing the right climatological skill file when pysteps calculates # the blended forecast in parallel. if n_model_members > 1: - self.__state.n_model_indices = np.arange(n_model_members) + self.__state.mapping_list_NWP_member_to_ensemble_member = np.arange( + n_model_members + ) else: - self.__state.n_model_indices = [0] + self.__state.mapping_list_NWP_member_to_ensemble_member = [0] # Now, repeat the nowcast ensemble members or the nwp models/members until # it has the same amount of members as n_ens_members_max. For instance, if @@ -1310,8 +1338,10 @@ def __find_nowcast_NWP_combination(self, t): self.__state.precip_models_timestep, n_ens_members_max, axis=0 ) # Finally, for the model indices - self.__state.n_model_indices = np.repeat( - self.__state.n_model_indices, n_ens_members_max, axis=0 + self.__state.mapping_list_NWP_member_to_ensemble_member = np.repeat( + self.__state.mapping_list_NWP_member_to_ensemble_member, + n_ens_members_max, + axis=0, ) elif n_model_members == n_ens_members_min: @@ -1339,8 +1369,12 @@ def __find_nowcast_NWP_combination(self, t): self.__state.precip_models_timestep, repeats, axis=0 ) # Finally, for the model indices - self.__state.n_model_indices = np.repeat( - self.__state.n_model_indices, repeats, axis=0 + self.__state.mapping_list_NWP_member_to_ensemble_member = ( + np.repeat( + self.__state.mapping_list_NWP_member_to_ensemble_member, + repeats, + axis=0, + ) ) # TODO: is this not duplicate from part 2.3.5? @@ -1421,7 +1455,7 @@ def __determine_skill_for_next_timestep(self, t, j, worker_state): ] rho_nwp_forecast = np.stack(rho_nwp_forecast) # Concatenate rho_extrap_cascade and rho_nwp - worker_state.rho_forecast = np.concatenate( + worker_state.rho_final_blended_forecast = np.concatenate( (worker_state.rho_extrap_cascade[None, :], rho_nwp_forecast), axis=0 ) else: @@ -1430,11 +1464,11 @@ def __determine_skill_for_next_timestep(self, t, j, worker_state): lt=(t * int(self.__config.timestep)), correlations=self.__params.rho_nwp_models[j], outdir_path=self.__config.outdir_path_skill, - n_model=worker_state.n_model_indices[j], + n_model=worker_state.mapping_list_NWP_member_to_ensemble_member[j], skill_kwargs=self.__config.climatology_kwargs, ) # Concatenate rho_extrap_cascade and rho_nwp - worker_state.rho_forecast = np.concatenate( + worker_state.rho_final_blended_forecast = np.concatenate( (worker_state.rho_extrap_cascade[None, :], rho_nwp_forecast[None, :]), axis=0, ) @@ -1447,14 +1481,16 @@ def __determine_weights_per_component(self, worker_state): # selected, weights will be overwritten with those weights prior to # blending step. # weight = [(extr_field, n_model_fields, noise), n_cascade_levels, ...] - worker_state.weights = calculate_weights_bps(worker_state.rho_forecast) + worker_state.weights = calculate_weights_bps( + worker_state.rho_final_blended_forecast + ) # The model only weights if self.__config.weights_method == "bps": # Determine the weights of the components without the extrapolation # cascade, in case this is no data or outside the mask. worker_state.weights_model_only = calculate_weights_bps( - worker_state.rho_forecast[1:, :] + worker_state.rho_final_blended_forecast[1:, :] ) elif self.__config.weights_method == "spn": # Only the weights of the components without the extrapolation @@ -1489,13 +1525,13 @@ def __determine_weights_per_component(self, worker_state): ) # Determine the weights for this cascade level worker_state.weights_model_only[:, i] = calculate_weights_spn( - correlations=worker_state.rho_forecast[1:, i], + correlations=worker_state.rho_final_blended_forecast[1:, i], covariance=covariance_nwp_models, ) else: # Same as correlation and noise is 1 - correlation worker_state.weights_model_only = calculate_weights_bps( - worker_state.rho_forecast[1:, :] + worker_state.rho_final_blended_forecast[1:, :] ) else: raise ValueError( @@ -1553,7 +1589,7 @@ def __regress_extrapolation_and_noise_cascades(self, j, worker_state): # use the deterministic AR(p) model computed above if # perturbations are disabled worker_state.precip_cascades[j][i] = ( - worker_state.precip_forecast_non_perturbed[i] + worker_state.final_blended_forecast_non_perturbed[i] ) # 8.3.3 regress the noise component to the subsequent time step @@ -1589,9 +1625,9 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( extrap_kwargs_pb = self.__config.extrapolation_kwargs.copy() velocity_perturbations_extrapolation = self.__velocity # The following should be accesseble after this function - worker_state.precip_forecast_extrapolated_decomp_done = [] - worker_state.noise_extrapolated_decomp_done = [] - worker_state.precip_forecast_extrapolated_probability_matching = [] + worker_state.precip_extrapolated_decomp = [] + worker_state.noise_extrapolated_decomp = [] + worker_state.precip_extrapolated_probability_matching = [] # Extrapolate per sub time step for t_sub in worker_state.subtimesteps: @@ -1600,14 +1636,14 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( if t_diff_prev_subtimestep_int > 0.0: precip_forecast_cascade_subtimestep = [ (1.0 - t_diff_prev_subtimestep_int) - * worker_state.precip_forecast_prev_subtimestep[j][i][-1, :] + * worker_state.precip_cascades_prev_subtimestep[j][i][-1, :] + t_diff_prev_subtimestep_int * worker_state.precip_cascades[j][i][-1, :] for i in range(self.__config.n_cascade_levels) ] noise_cascade_subtimestep = [ (1.0 - t_diff_prev_subtimestep_int) - * worker_state.noise_prev_subtimestep[j][i][-1, :] + * worker_state.cascade_noise_prev_subtimestep[j][i][-1, :] + t_diff_prev_subtimestep_int * worker_state.precip_noise_cascades[j][i][-1, :] for i in range(self.__config.n_cascade_levels) @@ -1615,11 +1651,11 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( else: precip_forecast_cascade_subtimestep = [ - worker_state.precip_forecast_prev_subtimestep[j][i][-1, :] + worker_state.precip_cascades_prev_subtimestep[j][i][-1, :] for i in range(self.__config.n_cascade_levels) ] noise_cascade_subtimestep = [ - worker_state.noise_prev_subtimestep[j][i][-1, :] + worker_state.cascade_noise_prev_subtimestep[j][i][-1, :] for i in range(self.__config.n_cascade_levels) ] @@ -1628,10 +1664,8 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( ) noise_cascade_subtimestep = np.stack(noise_cascade_subtimestep) - t_diff_prev_subtimestep = t_sub - worker_state.t_prev_timestep[j] - worker_state.t_leadtime_since_start_forecast[ - j - ] += t_diff_prev_subtimestep + t_diff_prev_subtimestep = t_sub - worker_state.time_prev_timestep[j] + worker_state.leadtime_since_start_forecast[j] += t_diff_prev_subtimestep # compute the perturbed motion field - include the NWP # velocities and the weights. Note that we only perturb @@ -1642,7 +1676,7 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( self.__velocity + self.__params.generate_velocity_noise( self.__params.velocity_perturbations[j], - worker_state.t_leadtime_since_start_forecast[j] + worker_state.leadtime_since_start_forecast[j] * self.__config.timestep, ) ) @@ -1712,39 +1746,35 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( allow_nonfinite_values=True, **self.__config.extrapolation_kwargs, ) - precip_forecast_extrapolated_recomp_subtimestep = ( + precip_extrapolated_recomp_subtimestep = ( precip_forecast_extrapolated_recomp_subtimestep_temp[0].copy() ) - temp_mask = ~np.isfinite( - precip_forecast_extrapolated_recomp_subtimestep - ) + temp_mask = ~np.isfinite(precip_extrapolated_recomp_subtimestep) # TODO: WHERE DO CAN I FIND THIS -15.0 - precip_forecast_extrapolated_recomp_subtimestep[ - ~np.isfinite(precip_forecast_extrapolated_recomp_subtimestep) + precip_extrapolated_recomp_subtimestep[ + ~np.isfinite(precip_extrapolated_recomp_subtimestep) ] = self.__params.precip_zerovalue - precip_forecast_extrapolated_decomp = ( - self.__params.decomposition_method( - precip_forecast_extrapolated_recomp_subtimestep, - self.__params.bandpass_filter, - mask=self.__params.mask_threshold, - fft_method=self.__params.fft, - output_domain=self.__config.domain, - normalize=True, - compute_stats=True, - compact_output=True, - )["cascade_levels"] - ) + precip_extrapolated_decomp = self.__params.decomposition_method( + precip_extrapolated_recomp_subtimestep, + self.__params.bandpass_filter, + mask=self.__params.mask_threshold, + fft_method=self.__params.fft, + output_domain=self.__config.domain, + normalize=True, + compute_stats=True, + compact_output=True, + )["cascade_levels"] # Make sure we have values outside the mask if self.__params.zero_precip_radar: - precip_forecast_extrapolated_decomp = np.nan_to_num( - precip_forecast_extrapolated_decomp, + precip_extrapolated_decomp = np.nan_to_num( + precip_extrapolated_decomp, copy=True, nan=np.nanmin(precip_forecast_cascade_subtimestep), posinf=np.nanmin(precip_forecast_cascade_subtimestep), neginf=np.nanmin(precip_forecast_cascade_subtimestep), ) for i in range(self.__config.n_cascade_levels): - precip_forecast_extrapolated_decomp[i][temp_mask] = np.nan + precip_extrapolated_decomp[i][temp_mask] = np.nan # B. Noise noise_cascade_subtimestep_recomp = blending.utils.recompose_cascade( combined_cascade=noise_cascade_subtimestep, @@ -1780,17 +1810,17 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( noise_extrapolated_decomp[i] *= self.__params.noise_std_coeffs[i] # Append the results to the output lists - worker_state.precip_forecast_extrapolated_decomp_done.append( - precip_forecast_extrapolated_decomp.copy() + worker_state.precip_extrapolated_decomp.append( + precip_extrapolated_decomp.copy() ) - worker_state.noise_extrapolated_decomp_done.append( + worker_state.noise_extrapolated_decomp.append( noise_extrapolated_decomp.copy() ) precip_forecast_cascade_subtimestep = None precip_forecast_recomp_subtimestep = None precip_forecast_extrapolated_recomp_subtimestep_temp = None - precip_forecast_extrapolated_recomp_subtimestep = None - precip_forecast_extrapolated_decomp = None + precip_extrapolated_recomp_subtimestep = None + precip_extrapolated_decomp = None noise_cascade_subtimestep = None noise_cascade_subtimestep_recomp = None noise_extrapolated_recomp_temp = None @@ -1819,28 +1849,28 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( allow_nonfinite_values=True, **extrap_kwargs_pb, ) - worker_state.precip_forecast_extrapolated_probability_matching.append( + worker_state.precip_extrapolated_probability_matching.append( precip_forecast_extrapolated_probability_matching_temp[0] ) - worker_state.t_prev_timestep[j] = t_sub + worker_state.time_prev_timestep[j] = t_sub - if len(worker_state.precip_forecast_extrapolated_decomp_done) > 0: - worker_state.precip_forecast_extrapolated_decomp_done = np.stack( - worker_state.precip_forecast_extrapolated_decomp_done + if len(worker_state.precip_extrapolated_decomp) > 0: + worker_state.precip_extrapolated_decomp = np.stack( + worker_state.precip_extrapolated_decomp ) - worker_state.noise_extrapolated_decomp_done = np.stack( - worker_state.noise_extrapolated_decomp_done + worker_state.noise_extrapolated_decomp = np.stack( + worker_state.noise_extrapolated_decomp ) - worker_state.precip_forecast_extrapolated_probability_matching = np.stack( - worker_state.precip_forecast_extrapolated_probability_matching + worker_state.precip_extrapolated_probability_matching = np.stack( + worker_state.precip_extrapolated_probability_matching ) # advect the forecast field by one time step if no subtimesteps in the # current interval were found if not worker_state.subtimesteps: - t_diff_prev_subtimestep = t + 1 - worker_state.t_prev_timestep[j] - worker_state.t_leadtime_since_start_forecast[j] += t_diff_prev_subtimestep + t_diff_prev_subtimestep = t + 1 - worker_state.time_prev_timestep[j] + worker_state.leadtime_since_start_forecast[j] += t_diff_prev_subtimestep # compute the perturbed motion field - include the NWP # velocities and the weights @@ -1849,7 +1879,7 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( self.__velocity + self.__params.generate_velocity_noise( self.__params.velocity_perturbations[j], - worker_state.t_leadtime_since_start_forecast[j] + worker_state.leadtime_since_start_forecast[j] * self.__config.timestep, ) ) @@ -1926,28 +1956,30 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( ) ) - worker_state.t_prev_timestep[j] = t + 1 + worker_state.time_prev_timestep[j] = t + 1 - worker_state.precip_forecast_prev_subtimestep[j] = worker_state.precip_cascades[ + worker_state.precip_cascades_prev_subtimestep[j] = worker_state.precip_cascades[ j ] - worker_state.noise_prev_subtimestep[j] = worker_state.precip_noise_cascades[j] + worker_state.cascade_noise_prev_subtimestep[j] = ( + worker_state.precip_noise_cascades[j] + ) def __blend_cascades(self, t_sub, j, worker_state): - worker_state.t_index = np.where(np.array(worker_state.subtimesteps) == t_sub)[ - 0 - ][0] + worker_state.subtimestep_index = np.where( + np.array(worker_state.subtimesteps) == t_sub + )[0][0] # First concatenate the cascades and the means and sigmas # precip_models = [n_models,timesteps,n_cascade_levels,m,n] if self.__config.blend_nwp_members: cascade_stack_all_components = np.concatenate( ( - worker_state.precip_forecast_extrapolated_decomp_done[ - None, worker_state.t_index + worker_state.precip_extrapolated_decomp[ + None, worker_state.subtimestep_index ], worker_state.precip_models_cascades_timestep, - worker_state.noise_extrapolated_decomp_done[ - None, worker_state.t_index + worker_state.noise_extrapolated_decomp[ + None, worker_state.subtimestep_index ], ), axis=0, @@ -1969,12 +2001,12 @@ def __blend_cascades(self, t_sub, j, worker_state): else: cascade_stack_all_components = np.concatenate( ( - worker_state.precip_forecast_extrapolated_decomp_done[ - None, worker_state.t_index + worker_state.precip_extrapolated_decomp[ + None, worker_state.subtimestep_index ], worker_state.precip_models_cascades_timestep[None, j], - worker_state.noise_extrapolated_decomp_done[ - None, worker_state.t_index + worker_state.noise_extrapolated_decomp[ + None, worker_state.subtimestep_index ], ), axis=0, @@ -2022,31 +2054,36 @@ def __blend_cascades(self, t_sub, j, worker_state): ) # Determine the weights for this cascade level worker_state.weights[:, i] = calculate_weights_spn( - correlations=worker_state.rho_forecast[:, i], + correlations=worker_state.rho_final_blended_forecast[:, i], covariance=covariance_nwp_models, ) # Blend the extrapolation, (NWP) model(s) and noise cascades - worker_state.precip_forecast_blended = blending.utils.blend_cascades( + worker_state.final_blended_forecast_cascades = blending.utils.blend_cascades( cascades_norm=cascade_stack_all_components, weights=worker_state.weights ) # Also blend the cascade without the extrapolation component - worker_state.precip_forecast_blended_mod_only = blending.utils.blend_cascades( - cascades_norm=cascade_stack_all_components[1:, :], - weights=worker_state.weights_model_only, + worker_state.final_blended_forecast_cascades_mod_only = ( + blending.utils.blend_cascades( + cascades_norm=cascade_stack_all_components[1:, :], + weights=worker_state.weights_model_only, + ) ) # Blend the means and standard deviations # Input is array of shape [number_components, scale_level, ...] - worker_state.means_blended, worker_state.sigmas_blended = blend_means_sigmas( + ( + worker_state.final_blended_forecast_means, + worker_state.final_blended_forecast_stds, + ) = blend_means_sigmas( means=means_stacked, sigmas=sigmas_stacked, weights=worker_state.weights ) # Also blend the means and sigmas for the cascade without extrapolation ( - worker_state.means_blended_mod_only, - worker_state.sigmas_blended_mod_only, + worker_state.final_blended_forecast_means_mod_only, + worker_state.final_blended_forecast_stds_mod_only, ) = blend_means_sigmas( means=means_stacked[1:, :], sigmas=sigmas_stacked[1:, :], @@ -2057,30 +2094,34 @@ def __recompose_cascade_to_rainfall_field(self, j, worker_state): # 8.6 Recompose the cascade to a precipitation field # (The function first normalizes the blended cascade, precip_forecast_blended # again) - worker_state.precip_forecast_recomposed = blending.utils.recompose_cascade( - combined_cascade=worker_state.precip_forecast_blended, - combined_mean=worker_state.means_blended, - combined_sigma=worker_state.sigmas_blended, + worker_state.final_blended_forecast_recomposed = ( + blending.utils.recompose_cascade( + combined_cascade=worker_state.final_blended_forecast_cascades, + combined_mean=worker_state.final_blended_forecast_means, + combined_sigma=worker_state.final_blended_forecast_stds, + ) ) # The recomposed cascade without the extrapolation (for NaN filling # outside the radar domain) - worker_state.precip_forecast_recomposed_mod_only = ( + worker_state.final_blended_forecast_recomposed_mod_only = ( blending.utils.recompose_cascade( - combined_cascade=worker_state.precip_forecast_blended_mod_only, - combined_mean=worker_state.means_blended_mod_only, - combined_sigma=worker_state.sigmas_blended_mod_only, + combined_cascade=worker_state.final_blended_forecast_cascades_mod_only, + combined_mean=worker_state.final_blended_forecast_means_mod_only, + combined_sigma=worker_state.final_blended_forecast_stds_mod_only, ) ) if self.__config.domain == "spectral": # TODO: Check this! (Only tested with domain == 'spatial') # TODO: what needs to happen with above TODO? - worker_state.precip_forecast_recomposed = worker_state.fft_objs[j].irfft2( - worker_state.precip_forecast_recomposed - ) - worker_state.precip_forecast_recomposed_mod_only = worker_state.fft_objs[ + worker_state.final_blended_forecast_recomposed = worker_state.fft_objs[ j - ].irfft2(worker_state.precip_forecast_recomposed_mod_only) + ].irfft2(worker_state.final_blended_forecast_recomposed) + worker_state.final_blended_forecast_recomposed_mod_only = ( + worker_state.fft_objs[j].irfft2( + worker_state.final_blended_forecast_recomposed_mod_only + ) + ) def __post_process_output( self, j, final_blended_forecast_single_member, worker_state @@ -2113,8 +2154,8 @@ def __post_process_output( if self.__config.blend_nwp_members: precip_forecast_probability_matching_final = np.concatenate( ( - worker_state.precip_forecast_extrapolated_probability_matching[ - None, worker_state.t_index + worker_state.precip_extrapolated_probability_matching[ + None, worker_state.subtimestep_index ], worker_state.precip_models_timestep, ), @@ -2123,8 +2164,8 @@ def __post_process_output( else: precip_forecast_probability_matching_final = np.concatenate( ( - worker_state.precip_forecast_extrapolated_probability_matching[ - None, worker_state.t_index + worker_state.precip_extrapolated_probability_matching[ + None, worker_state.subtimestep_index ], worker_state.precip_models_timestep[None, j], ), @@ -2159,7 +2200,7 @@ def __post_process_output( # areas with the "..._mod_only" blended forecasts, consisting # of the NWP and noise components. - nan_indices = np.isnan(worker_state.precip_forecast_recomposed) + nan_indices = np.isnan(worker_state.final_blended_forecast_recomposed) if self.__config.smooth_radar_mask_range != 0: # Compute the smooth dilated mask new_mask = blending.utils.compute_smooth_dilated_mask( @@ -2173,10 +2214,10 @@ def __post_process_output( # Handle NaNs in precip_forecast_new and precip_forecast_new_mod_only by setting NaNs to 0 in the blending step precip_forecast_recomposed_mod_only_no_nan = np.nan_to_num( - worker_state.precip_forecast_recomposed_mod_only, nan=0 + worker_state.final_blended_forecast_recomposed_mod_only, nan=0 ) precip_forecast_recomposed_no_nan = np.nan_to_num( - worker_state.precip_forecast_recomposed, nan=0 + worker_state.final_blended_forecast_recomposed, nan=0 ) # Perform the blending of radar and model inside the radar domain using a weighted combination @@ -2196,8 +2237,8 @@ def __post_process_output( axis=0, ) else: - worker_state.precip_forecast_recomposed[nan_indices] = ( - worker_state.precip_forecast_recomposed_mod_only[nan_indices] + worker_state.final_blended_forecast_recomposed[nan_indices] = ( + worker_state.final_blended_forecast_recomposed_mod_only[nan_indices] ) nan_indices = np.isnan(precip_forecast_probability_matching_blended) precip_forecast_probability_matching_blended[nan_indices] = ( @@ -2206,9 +2247,9 @@ def __post_process_output( # Finally, fill the remaining nan values, if present, with # the minimum value in the forecast - nan_indices = np.isnan(worker_state.precip_forecast_recomposed) - worker_state.precip_forecast_recomposed[nan_indices] = np.nanmin( - worker_state.precip_forecast_recomposed + nan_indices = np.isnan(worker_state.final_blended_forecast_recomposed) + worker_state.final_blended_forecast_recomposed[nan_indices] = np.nanmin( + worker_state.final_blended_forecast_recomposed ) nan_indices = np.isnan(precip_forecast_probability_matching_blended) precip_forecast_probability_matching_blended[nan_indices] = np.nanmin( @@ -2221,7 +2262,9 @@ def __post_process_output( # apply the precipitation mask to prevent generation of new # precipitation into areas where it was not originally # observed - precip_forecast_min_value = worker_state.precip_forecast_recomposed.min() + precip_forecast_min_value = ( + worker_state.final_blended_forecast_recomposed.min() + ) if self.__config.mask_method == "incremental": # The incremental mask is slightly different from # the implementation in the non-blended steps.py, as @@ -2238,16 +2281,17 @@ def __post_process_output( precip_field_mask, self.__params.struct, self.__params.mask_rim ) # Get the final mask - worker_state.precip_forecast_recomposed = ( + worker_state.final_blended_forecast_recomposed = ( precip_forecast_min_value + ( - worker_state.precip_forecast_recomposed + worker_state.final_blended_forecast_recomposed - precip_forecast_min_value ) * precip_field_mask ) precip_field_mask_temp = ( - worker_state.precip_forecast_recomposed > precip_forecast_min_value + worker_state.final_blended_forecast_recomposed + > precip_forecast_min_value ) elif self.__config.mask_method == "obs": # The mask equals the most recent benchmark @@ -2258,7 +2302,7 @@ def __post_process_output( ) # Set to min value outside of mask - worker_state.precip_forecast_recomposed[~precip_field_mask_temp] = ( + worker_state.final_blended_forecast_recomposed[~precip_field_mask_temp] = ( precip_forecast_min_value ) @@ -2269,8 +2313,8 @@ def __post_process_output( self.__config.probmatching_method is not None and self.__config.resample_distribution ): - arr1 = worker_state.precip_forecast_extrapolated_probability_matching[ - worker_state.t_index + arr1 = worker_state.precip_extrapolated_probability_matching[ + worker_state.subtimestep_index ] arr2 = worker_state.precip_models_timestep[j] # resample weights based on cascade level 2. @@ -2290,17 +2334,17 @@ def __post_process_output( if self.__config.probmatching_method == "cdf": # nan indices in the extrapolation nowcast nan_indices = np.isnan( - worker_state.precip_forecast_extrapolated_probability_matching[ - worker_state.t_index + worker_state.precip_extrapolated_probability_matching[ + worker_state.subtimestep_index ] ) # Adjust the CDF of the forecast to match the resampled distribution combined from # extrapolation and model fields. # Rainfall outside the pure extrapolation domain is not taken into account. - if np.any(np.isfinite(worker_state.precip_forecast_recomposed)): - worker_state.precip_forecast_recomposed = ( + if np.any(np.isfinite(worker_state.final_blended_forecast_recomposed)): + worker_state.final_blended_forecast_recomposed = ( probmatching.nonparam_match_empirical_cdf( - worker_state.precip_forecast_recomposed, + worker_state.final_blended_forecast_recomposed, precip_forecast_probability_matching_resampled, nan_indices, ) @@ -2315,21 +2359,21 @@ def __post_process_output( ] ) no_rain_mask = ( - worker_state.precip_forecast_recomposed + worker_state.final_blended_forecast_recomposed >= self.__config.precip_threshold ) mean_precip_forecast = np.mean( - worker_state.precip_forecast_recomposed[no_rain_mask] + worker_state.final_blended_forecast_recomposed[no_rain_mask] ) - worker_state.precip_forecast_recomposed[no_rain_mask] = ( - worker_state.precip_forecast_recomposed[no_rain_mask] + worker_state.final_blended_forecast_recomposed[no_rain_mask] = ( + worker_state.final_blended_forecast_recomposed[no_rain_mask] - mean_precip_forecast + mean_probabiltity_matching_forecast ) precip_forecast_probability_matching_resampled = None final_blended_forecast_single_member.append( - worker_state.precip_forecast_recomposed + worker_state.final_blended_forecast_recomposed ) return final_blended_forecast_single_member From ed20ecc1b7ca3d83e09f2a435d16f330d2520482 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Fri, 6 Dec 2024 16:09:55 +0100 Subject: [PATCH 48/65] Cleanup of params and state dataclasses, now all tests pass --- pysteps/blending/steps.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 7fc42aebc..4e567a106 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -1551,7 +1551,7 @@ def __regress_extrapolation_and_noise_cascades(self, j, worker_state): epsilon = self.__params.noise_generator( self.__params.perturbation_generator, randstate=worker_state.randgen_precip[j], - fft_method=worker_state.fft_objs[j], + fft_method=self.__params.fft_objs[j], domain=self.__config.domain, ) @@ -1559,7 +1559,7 @@ def __regress_extrapolation_and_noise_cascades(self, j, worker_state): epsilon_decomposed = self.__params.decomposition_method( epsilon, self.__params.bandpass_filter, - fft_method=worker_state.fft_objs[j], + fft_method=self.__params.fft_objs[j], input_domain=self.__config.domain, output_domain=self.__config.domain, compute_stats=True, @@ -2114,11 +2114,11 @@ def __recompose_cascade_to_rainfall_field(self, j, worker_state): # TODO: Check this! (Only tested with domain == 'spatial') # TODO: what needs to happen with above TODO? - worker_state.final_blended_forecast_recomposed = worker_state.fft_objs[ + worker_state.final_blended_forecast_recomposed = self.__params.fft_objs[ j ].irfft2(worker_state.final_blended_forecast_recomposed) worker_state.final_blended_forecast_recomposed_mod_only = ( - worker_state.fft_objs[j].irfft2( + self.__params.fft_objs[j].irfft2( worker_state.final_blended_forecast_recomposed_mod_only ) ) From 701e726ec02660c821af7e922796d4555545c78e Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Fri, 6 Dec 2024 16:47:10 +0100 Subject: [PATCH 49/65] Added correct typing to all parts of params and state --- pysteps/blending/steps.py | 211 ++++++++++++++++++-------------------- 1 file changed, 99 insertions(+), 112 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 4e567a106..a9f2becff 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -65,11 +65,12 @@ DASK_IMPORTED = False from dataclasses import dataclass, field -from typing import Optional, List, Dict, Any, Callable +from typing import Optional, List, Dict, Any, Callable, Union # TODO: compare old and new version of the code # TODO: look for better typing in state and params # TODO: GO over all other todos and check if they can be removed +# TODO: look at the documentation and try to improve it, lots of things are now combined together @dataclass @@ -113,109 +114,104 @@ class StepsBlendingConfig: # TODO: typing could be improved here @dataclass class StepsBlendingParams: - noise_std_coeffs: np.ndarray = None # Noise standard deviation coefficients - bandpass_filter: Any = None # Band-pass filter object - fft: Any = None # FFT method object - perturbation_generator: Callable = None # Perturbation generator - noise_generator: Callable = None # Noise generator - PHI: np.ndarray = None # AR(p) model parameters - extrapolation_method: Any = None - decomposition_method: Any = None - recomposition_method: Any = None - # TODO: check of the following two are relevant or can be replaced vel_pert_... and noise_generator - velocity_perturbations: Any = None - generate_velocity_noise: Any = None - velocity_perturbations_parallel: Optional[np.ndarray] = ( - None # Velocity perturbation parameters (parallel) + noise_std_coeffs: Optional[np.ndarray] = ( + None # Noise standard deviation coefficients ) - velocity_perturbations_perpendicular: Optional[np.ndarray] = ( - None # Velocity perturbation parameters (perpendicular) + bandpass_filter: Optional[Any] = None # Band-pass filter object + fft: Optional[Any] = None # FFT method object + perturbation_generator: Optional[Callable[..., np.ndarray]] = ( + None # Perturbation generator ) - fft_objs: List[Any] = field( - default_factory=list - ) # FFT objects for ensemble members + noise_generator: Optional[Callable[..., np.ndarray]] = None # Noise generator + PHI: Optional[np.ndarray] = None # AR(p) model parameters + extrapolation_method: Optional[Callable[..., Any]] = None + decomposition_method: Optional[Callable[..., dict]] = None + recomposition_method: Optional[Callable[..., np.ndarray]] = None + # TODO: check of the following two are relevant or can be replaced vel_pert_... and noise_generator + velocity_perturbations: Optional[Any] = None + generate_velocity_noise: Optional[Callable[[Any, float], np.ndarray]] = None + velocity_perturbations_parallel: Optional[np.ndarray] = None + velocity_perturbations_perpendicular: Optional[np.ndarray] = None + fft_objs: List[Any] = field(default_factory=list) mask_rim: Optional[int] = None # Rim size for masking struct: Optional[np.ndarray] = None # Structuring element for mask time_steps_is_list: bool = False # Time steps is a list precip_models_provided_is_cascade: bool = False # Precip models are decomposed - xy_coordinates: np.ndarray | None = None - precip_zerovalue: Any = None - mask_threshold: Any = None + xy_coordinates: Optional[np.ndarray] = None + precip_zerovalue: Optional[float] = None + mask_threshold: Optional[np.ndarray] = None zero_precip_radar: bool = False zero_precip_model_fields: bool = False - original_timesteps: Any = None - num_ensemble_workers: int = None - rho_nwp_models: Any = None - domain_mask: Any = None + original_timesteps: Optional[Union[list, np.ndarray]] = None + num_ensemble_workers: Optional[int] = None + rho_nwp_models: Optional[np.ndarray] = None + domain_mask: Optional[np.ndarray] = None # TODO: typing could be improved here @dataclass class StepsBlendingState: - # States related to the observations - precip_cascades: Any = None - precip_noise_input: Any = None - precip_noise_cascades: Any = None - precip_mean_noise: Any = None - precip_std_noise: Any = None - - # States related to the extrapolation - mean_extrapolation: Any = None - std_extrapolation: Any = None - rho_extrap_cascade_prev: Any = None - rho_extrap_cascade: Any = None - precip_cascades_prev_subtimestep: Any = None - cascade_noise_prev_subtimestep: Any = None - precip_extrapolated_after_decomp: Any = None - noise_extrapolated_after_decomp: Any = None - precip_extrapolated_probability_matching: Any = None - - # States related to the NWP models - precip_models_cascades: Any = None - # States related to NWP models for (sub)time steps - precip_models_cascades_timestep: Any = None - precip_models_timestep: Any = None - mean_models_timestep: Any = None - std_models_timestep: Any = None - velocity_models_timestep: Any = None - - # State that links NWP member to final output ensemble member - mapping_list_NWP_member_to_ensemble_member: Optional[np.ndarray] = ( - None # NWP model indices - ) - - # States related to the random generation of precip and motion - randgen_precip: Any = None - randgen_motion: Any = None - - # Variables related to the (sub)timestep calculations of the final forecast - previous_displacement: Any = None - previous_displacement_noise_cascade: Any = None - previous_displacement_prob_matching: Any = None - rho_final_blended_forecast: Any = None - final_blended_forecast_means: Any = None - final_blended_forecast_stds: Any = None - final_blended_forecast_means_mod_only: Any = None - final_blended_forecast_stds_mod_only: Any = None - final_blended_forecast_cascades: Any = None - final_blended_forecast_cascades_mod_only: Any = None - final_blended_forecast_recomposed: Any = None - final_blended_forecast_recomposed_mod_only: Any = None - - # The return outputs and probability matching are stored in these states: - final_blended_forecast: Any = None - final_blended_forecast_non_perturbed: Any = None - - # Variables to keep track of the times for the forecast - time_prev_timestep: Any = None - leadtime_since_start_forecast: Any = None - subtimesteps: Any = None - is_nowcast_time_step: bool = None - subtimestep_index: Any = None - - # States related to weights - weights: Any = None - weights_model_only: Any = None + # Radar and noise states + precip_cascades: Optional[np.ndarray] = None + precip_noise_input: Optional[np.ndarray] = None + precip_noise_cascades: Optional[np.ndarray] = None + precip_mean_noise: Optional[np.ndarray] = None + precip_std_noise: Optional[np.ndarray] = None + + # Extrapolation states + mean_extrapolation: Optional[np.ndarray] = None + std_extrapolation: Optional[np.ndarray] = None + rho_extrap_cascade_prev: Optional[np.ndarray] = None + rho_extrap_cascade: Optional[np.ndarray] = None + precip_cascades_prev_subtimestep: Optional[np.ndarray] = None + cascade_noise_prev_subtimestep: Optional[np.ndarray] = None + precip_extrapolated_after_decomp: Optional[np.ndarray] = None + noise_extrapolated_after_decomp: Optional[np.ndarray] = None + precip_extrapolated_probability_matching: Optional[np.ndarray] = None + + # NWP model states + precip_models_cascades: Optional[np.ndarray] = None + precip_models_cascades_timestep: Optional[np.ndarray] = None + precip_models_timestep: Optional[np.ndarray] = None + mean_models_timestep: Optional[np.ndarray] = None + std_models_timestep: Optional[np.ndarray] = None + velocity_models_timestep: Optional[np.ndarray] = None + + # Mapping from NWP members to ensemble members + mapping_list_NWP_member_to_ensemble_member: Optional[np.ndarray] = None + + # Random states for precipitation and motion + randgen_precip: Optional[List[np.random.RandomState]] = None + randgen_motion: Optional[List[np.random.RandomState]] = None + + # Variables for final forecast computation + previous_displacement: Optional[List[Any]] = None + previous_displacement_noise_cascade: Optional[List[Any]] = None + previous_displacement_prob_matching: Optional[List[Any]] = None + rho_final_blended_forecast: Optional[np.ndarray] = None + final_blended_forecast_means: Optional[np.ndarray] = None + final_blended_forecast_stds: Optional[np.ndarray] = None + final_blended_forecast_means_mod_only: Optional[np.ndarray] = None + final_blended_forecast_stds_mod_only: Optional[np.ndarray] = None + final_blended_forecast_cascades: Optional[np.ndarray] = None + final_blended_forecast_cascades_mod_only: Optional[np.ndarray] = None + final_blended_forecast_recomposed: Optional[np.ndarray] = None + final_blended_forecast_recomposed_mod_only: Optional[np.ndarray] = None + + # Final outputs + final_blended_forecast: Optional[np.ndarray] = None + final_blended_forecast_non_perturbed: Optional[np.ndarray] = None + + # Timing and indexing + time_prev_timestep: Optional[List[float]] = None + leadtime_since_start_forecast: Optional[List[float]] = None + subtimesteps: Optional[List[float]] = None + is_nowcast_time_step: Optional[bool] = None + subtimestep_index: Optional[int] = None + + # Weights used for blending + weights: Optional[np.ndarray] = None + weights_model_only: Optional[np.ndarray] = None class StepsBlendingNowcaster: @@ -311,7 +307,6 @@ def __blended_nowcast_main_loop(self): # 8. Start the forecasting loop ### # Isolate the last time slice of observed precipitation - # TODO: This precip was "precip = self.__precip[-1, :, :]", changed to self.__precip = self.__precip[-1, :, :]. Might need to chage again and user local variable precip in all following functions self.__precip = self.__precip[-1, :, :] print("Starting blended nowcast computation.") @@ -364,7 +359,6 @@ def worker(j): if t_sub > 0: self.__blend_cascades(t_sub, j, worker_state) self.__recompose_cascade_to_rainfall_field(j, worker_state) - # TODO: could be I need to return and ave final_blended_forecast_single_member final_blended_forecast_single_member = ( self.__post_process_output( j, final_blended_forecast_single_member, worker_state @@ -676,7 +670,6 @@ def __initialize_nowcast_components(self): x_values, y_values = np.meshgrid(np.arange(N), np.arange(M)) self.__params.xy_coordinates = np.stack([x_values, y_values]) - # TODO: changed precip_copy for self.__precip self.__precip = self.__precip[-(self.__config.ar_order + 1) :, :, :].copy() # Determine the domain mask from non-finite values in the precipitation data self.__params.domain_mask = np.logical_or.reduce( @@ -745,7 +738,6 @@ def __prepare_radar_and_NWP_fields(self): # 2.2 If necessary, recompose (NWP) model forecasts self.__state.precip_models_cascades = None - # TODO: This type of check needs to be changed when going to xarray if self.__params.precip_models_provided_is_cascade: self.__state.precip_models_cascades = self.__precip_models self.__precip_models = _compute_cascade_recomposition_nwp( @@ -1127,7 +1119,7 @@ def __prepare_forecast_loop(self): shape=self.__state.precip_cascades.shape[-2:], ) ) - # TODO: moved this from # 7 to here as it seems to fit better here. The only parameter used and needed is PHI, this is its last use untill # 7 + # initizalize the current and previous extrapolation forecast scale for the nowcasting component # phi1 / (1 - phi2), see BPS2004 self.__state.rho_extrap_cascade_prev = np.repeat( @@ -1377,7 +1369,7 @@ def __find_nowcast_NWP_combination(self, t): ) ) - # TODO: is this not duplicate from part 2.3.5? + # TODO: is this not duplicate from part 2.3.5? If so, is it still needed here? # If zero_precip_radar is True, set the velocity field equal to the NWP # velocity field for the current time step (velocity_models_temp). if self.__params.zero_precip_radar: @@ -1388,19 +1380,18 @@ def __find_nowcast_NWP_combination(self, t): def __determine_skill_for_current_timestep(self, t): if t == 0: """Calculate the initial skill of the (NWP) model forecasts at t=0.""" - # TODO rewrite loop - self.__params.rho_nwp_models = [ - blending.skill_scores.spatial_correlation( + self.__params.rho_nwp_models = [] + for model_index in range( + self.__state.precip_models_cascades_timestep.shape[0] + ): + rho_value = blending.skill_scores.spatial_correlation( obs=self.__state.precip_cascades[0, :, -1, :, :].copy(), mod=self.__state.precip_models_cascades_timestep[ model_index, :, :, : ].copy(), domain_mask=self.__params.domain_mask, ) - for model_index in range( - self.__state.precip_models_cascades_timestep.shape[0] - ) - ] + self.__params.rho_nwp_models.append(rho_value) self.__params.rho_nwp_models = np.stack(self.__params.rho_nwp_models) # Ensure that the model skill decreases with increasing scale level. @@ -1442,17 +1433,16 @@ def __determine_skill_for_next_timestep(self, t, j, worker_state): # 8.1.2 Determine the skill of the nwp components for lead time (t0 + t) # Then for the model components if self.__config.blend_nwp_members: - # TODO rewrite loop - rho_nwp_forecast = [ - blending.skill_scores.lt_dependent_cor_nwp( + rho_nwp_forecast = [] + for model_index in range(self.__params.rho_nwp_models.shape[0]): + rho_value = blending.skill_scores.lt_dependent_cor_nwp( lt=(t * int(self.__config.timestep)), correlations=self.__params.rho_nwp_models[model_index], outdir_path=self.__config.outdir_path_skill, n_model=model_index, skill_kwargs=self.__config.climatology_kwargs, ) - for model_index in range(self.__params.rho_nwp_models.shape[0]) - ] + rho_nwp_forecast.append(rho_value) rho_nwp_forecast = np.stack(rho_nwp_forecast) # Concatenate rho_extrap_cascade and rho_nwp worker_state.rho_final_blended_forecast = np.concatenate( @@ -2032,7 +2022,6 @@ def __blend_cascades(self, t_sub, j, worker_state): # method is given? Or does this mean that in all other circumstances the weights # have been calculated in a different way? - # TODO: changed weights to worker_state.weights if self.__config.weights_method == "spn": worker_state.weights = np.zeros( ( @@ -2112,8 +2101,6 @@ def __recompose_cascade_to_rainfall_field(self, j, worker_state): ) if self.__config.domain == "spectral": # TODO: Check this! (Only tested with domain == 'spatial') - - # TODO: what needs to happen with above TODO? worker_state.final_blended_forecast_recomposed = self.__params.fft_objs[ j ].irfft2(worker_state.final_blended_forecast_recomposed) From b9de5113b96b34ffa426aae8e87009d794bcd67e Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Fri, 6 Dec 2024 17:20:03 +0100 Subject: [PATCH 50/65] Ready for pull request --- pysteps/blending/steps.py | 192 +++++++++++++++++--------------------- 1 file changed, 84 insertions(+), 108 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index a9f2becff..ee53462c1 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -67,9 +67,7 @@ from dataclasses import dataclass, field from typing import Optional, List, Dict, Any, Callable, Union -# TODO: compare old and new version of the code -# TODO: look for better typing in state and params -# TODO: GO over all other todos and check if they can be removed +# TODO: compare old and new version of the code, run a benchmark to compare the two # TODO: look at the documentation and try to improve it, lots of things are now combined together @@ -111,7 +109,6 @@ class StepsBlendingConfig: return_output: bool = True -# TODO: typing could be improved here @dataclass class StepsBlendingParams: noise_std_coeffs: Optional[np.ndarray] = ( @@ -127,7 +124,6 @@ class StepsBlendingParams: extrapolation_method: Optional[Callable[..., Any]] = None decomposition_method: Optional[Callable[..., dict]] = None recomposition_method: Optional[Callable[..., np.ndarray]] = None - # TODO: check of the following two are relevant or can be replaced vel_pert_... and noise_generator velocity_perturbations: Optional[Any] = None generate_velocity_noise: Optional[Callable[[Any, float], np.ndarray]] = None velocity_perturbations_parallel: Optional[np.ndarray] = None @@ -148,7 +144,6 @@ class StepsBlendingParams: domain_mask: Optional[np.ndarray] = None -# TODO: typing could be improved here @dataclass class StepsBlendingState: # Radar and noise states @@ -213,6 +208,9 @@ class StepsBlendingState: weights: Optional[np.ndarray] = None weights_model_only: Optional[np.ndarray] = None + # Is stores here aswell becasue this is changed during the forecast loop and thus not part of the config + extrapolation_kwargs: Dict[str, Any] = field(default_factory=dict) + class StepsBlendingNowcaster: def __init__( @@ -312,9 +310,8 @@ def __blended_nowcast_main_loop(self): if self.__config.measure_time: starttime_mainloop = time.time() - # TODO: problem with the config here! This variable changes over time... - # extrap_kwargs is in config but by adding info to it, the next run of a blended forecast will have issues! - self.__config.extrapolation_kwargs["return_displacement"] = True + self.__state.extrapolation_kwargs = deepcopy(self.__config.extrapolation_kwargs) + self.__state.extrapolation_kwargs["return_displacement"] = True self.__state.precip_cascades_prev_subtimestep = deepcopy( self.__state.precip_cascades @@ -697,15 +694,48 @@ def __prepare_radar_and_NWP_fields(self): # 1. Start with the radar rainfall fields. We want the fields in a # Lagrangian space - self.__precip = _transform_to_lagrangian( - self.__precip, - self.__velocity, - self.__config.ar_order, - self.__params.xy_coordinates, - self.__params.extrapolation_method, - self.__config.extrapolation_kwargs, - self.__config.num_workers, - ) + + """Advect the previous precipitation fields to the same position with the + most recent one (i.e. transform them into the Lagrangian coordinates). + """ + self.__config.extrapolation_kwargs["xy_coords"] = self.__params.xy_coordinates + res = [] + + # TODO: create beter names here fore this part, adapted from previous code which is now inlined (old function was called _transform_to_lagrangian) + def f(precip, i): + return self.__params.extrapolation_method( + precip[i, :, :], + self.__velocity, + self.__config.ar_order - i, + "min", + allow_nonfinite_values=True, + **self.__config.extrapolation_kwargs.copy(), + )[-1] + + if not DASK_IMPORTED: + # Process each earlier precipitation field directly + for i in range(self.__config.ar_order): + self.__precip[i, :, :] = f(self.__precip, i) + else: + # Use Dask delayed for parallelization if DASK_IMPORTED is True + for i in range(self.__config.ar_order): + res.append(dask.delayed(f)(self.__precip, i)) + num_workers_ = ( + len(res) + if self.__config.num_workers > len(res) + else self.__config.num_workers + ) + self.__precip = np.stack( + list(dask.compute(*res, num_workers=num_workers_)) + + [self.__precip[-1, :, :]] + ) + + # Replace non-finite values with the minimum value for each field + self.__precip = self.__precip.copy() + for i in range(self.__precip.shape[0]): + self.__precip[i, ~np.isfinite(self.__precip[i, :])] = np.nanmin( + self.__precip[i, :] + ) # 2. Perform the cascade decomposition for the input precip fields and, # if necessary, for the (NWP) model fields @@ -740,9 +770,19 @@ def __prepare_radar_and_NWP_fields(self): if self.__params.precip_models_provided_is_cascade: self.__state.precip_models_cascades = self.__precip_models - self.__precip_models = _compute_cascade_recomposition_nwp( - self.__precip_models, self.__params.recomposition_method - ) + # Inline logic of _compute_cascade_recomposition_nwp + temp_precip_models = [] + for i in range(self.__precip_models.shape[0]): + precip_model = [] + for time_step in range(self.__precip_models.shape[1]): + # Use the recomposition method to rebuild the rainfall fields + recomposed = self.__params.recomposition_method( + self.__precip_models[i, time_step] + ) + precip_model.append(recomposed) + temp_precip_models.append(precip_model) + + self.__precip_models = np.stack(temp_precip_models) # 2.3 Check for zero input fields in the radar and NWP data. self.__params.zero_precip_radar = blending.utils.check_norain( @@ -1610,9 +1650,9 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( # (or subtimesteps if non-integer time steps are given) # Settings and initialize the output - extrap_kwargs_ = self.__config.extrapolation_kwargs.copy() - extrap_kwargs_noise = self.__config.extrapolation_kwargs.copy() - extrap_kwargs_pb = self.__config.extrapolation_kwargs.copy() + extrap_kwargs_ = self.__state.extrapolation_kwargs.copy() + extrap_kwargs_noise = self.__state.extrapolation_kwargs.copy() + extrap_kwargs_pb = self.__state.extrapolation_kwargs.copy() velocity_perturbations_extrapolation = self.__velocity # The following should be accesseble after this function worker_state.precip_extrapolated_decomp = [] @@ -1721,9 +1761,7 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( ) # Put back the mask precip_forecast_recomp_subtimestep[self.__params.domain_mask] = np.nan - # TODO: problem with the config here! This variable changes over time... - # extrap_kwargs is in config but by adding info to it, the next run of a blended forecast will have issues! - self.__config.extrapolation_kwargs["displacement_prev"] = ( + self.__state.extrapolation_kwargs["displacement_prev"] = ( worker_state.previous_displacement[j] ) ( @@ -1734,7 +1772,7 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( velocity_blended, [t_diff_prev_subtimestep], allow_nonfinite_values=True, - **self.__config.extrapolation_kwargs, + **self.__state.extrapolation_kwargs, ) precip_extrapolated_recomp_subtimestep = ( precip_forecast_extrapolated_recomp_subtimestep_temp[0].copy() @@ -2264,9 +2302,23 @@ def __post_process_output( >= self.__config.precip_threshold ) # Buffer the mask - precip_field_mask = _compute_incremental_mask( - precip_field_mask, self.__params.struct, self.__params.mask_rim + # buffer the observation mask Rbin using the kernel kr + # add a grayscale rim r (for smooth rain/no-rain transition) + + # buffer observation mask + precip_field_mask_temp = np.ndarray.astype( + precip_field_mask.copy(), "uint8" ) + Rd = binary_dilation(precip_field_mask_temp, self.__params.struct) + + # add grayscale rim + kr1 = generate_binary_structure(2, 1) + mask = Rd.astype(float) + for n in range(self.__params.mask_rim): + Rd = binary_dilation(Rd, kr1) + mask += Rd + # normalize between 0 and 1 + precip_field_mask = mask / mask.max() # Get the final mask worker_state.final_blended_forecast_recomposed = ( precip_forecast_min_value @@ -2853,7 +2905,7 @@ def forecast( return forecast_steps_nowcast -# TODO: add the following code to the main body +# TODO: Where does this piece of code best fit: in utils or inside the class? def calculate_weights_spn(correlations, covariance): """Calculate SPN blending weights for STEPS blending from correlation. @@ -2935,6 +2987,7 @@ def calculate_weights_spn(correlations, covariance): return weights +# TODO: Where does this piece of code best fit: in utils or inside the class? def blend_means_sigmas(means, sigmas, weights): """Calculate the blended means and sigmas, the normalization parameters needed to recompose the cascade. This procedure uses the weights of the @@ -2999,80 +3052,3 @@ def blend_means_sigmas(means, sigmas, weights): # TODO: substract covariances to weigthed sigmas - still necessary? return combined_means, combined_sigmas - - -def _compute_incremental_mask(Rbin, kr, r): - # buffer the observation mask Rbin using the kernel kr - # add a grayscale rim r (for smooth rain/no-rain transition) - - # buffer observation mask - Rbin = np.ndarray.astype(Rbin.copy(), "uint8") - Rd = binary_dilation(Rbin, kr) - - # add grayscale rim - kr1 = generate_binary_structure(2, 1) - mask = Rd.astype(float) - for n in range(r): - Rd = binary_dilation(Rd, kr1) - mask += Rd - # normalize between 0 and 1 - return mask / mask.max() - - -def _transform_to_lagrangian( - precip, velocity, ar_order, xy_coords, extrapolator, extrap_kwargs, num_workers -): - """Advect the previous precipitation fields to the same position with the - most recent one (i.e. transform them into the Lagrangian coordinates). - """ - extrap_kwargs = extrap_kwargs.copy() - extrap_kwargs["xy_coords"] = xy_coords - res = list() - - def f(precip, i): - return extrapolator( - precip[i, :, :], - velocity, - ar_order - i, - "min", - allow_nonfinite_values=True, - **extrap_kwargs, - )[-1] - - for i in range(ar_order): - if not DASK_IMPORTED: - precip[i, :, :] = f(precip, i) - else: - res.append(dask.delayed(f)(precip, i)) - - if DASK_IMPORTED: - num_workers_ = len(res) if num_workers > len(res) else num_workers - precip = np.stack( - list(dask.compute(*res, num_workers=num_workers_)) + [precip[-1, :, :]] - ) - - # replace non-finite values with the minimum value - precip = precip.copy() - for i in range(precip.shape[0]): - precip[i, ~np.isfinite(precip[i, :])] = np.nanmin(precip[i, :]) - return precip - - -def _compute_cascade_recomposition_nwp(precip_models_cascade, recompositor): - """If necessary, recompose (NWP) model forecasts.""" - precip_models = None - - # Recompose the (NWP) model cascades to have rainfall fields per - # model and time step, which will be used in the probability matching steps. - # Recomposed cascade will have shape: [n_models, n_timesteps, m, n] - precip_models = [] - for i in range(precip_models_cascade.shape[0]): - precip_model = [] - for time_step in range(precip_models_cascade.shape[1]): - precip_model.append(recompositor(precip_models_cascade[i, time_step])) - precip_models.append(precip_model) - - precip_models = np.stack(precip_models) - precip_model = None - - return precip_models From 38ed19513d80bfd7077ec4b7a2033f3e87d2c2dd Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Fri, 6 Dec 2024 17:47:06 +0100 Subject: [PATCH 51/65] Made changes for Codacy review --- pysteps/blending/steps.py | 107 ++++++++++++++++++++------------------ 1 file changed, 56 insertions(+), 51 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index ee53462c1..f889276d3 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -382,7 +382,7 @@ def worker(j): if self.__state.is_nowcast_time_step: if self.__config.measure_time: - __ = self.__measure_time("subtimestep", starttime) + _ = self.__measure_time("subtimestep", starttime) else: print("done.") @@ -695,9 +695,9 @@ def __prepare_radar_and_NWP_fields(self): # 1. Start with the radar rainfall fields. We want the fields in a # Lagrangian space - """Advect the previous precipitation fields to the same position with the - most recent one (i.e. transform them into the Lagrangian coordinates). - """ + # Advect the previous precipitation fields to the same position with the + # most recent one (i.e. transform them into the Lagrangian coordinates). + self.__config.extrapolation_kwargs["xy_coords"] = self.__params.xy_coordinates res = [] @@ -740,7 +740,7 @@ def f(precip, i): # 2. Perform the cascade decomposition for the input precip fields and, # if necessary, for the (NWP) model fields # 2.1 Compute the cascade decompositions of the input precipitation fields - """Compute the cascade decompositions of the input precipitation fields.""" + # Compute the cascade decompositions of the input precipitation fields. precip_forecast_decomp = [] for i in range(self.__config.ar_order + 1): precip_forecast = self.__params.decomposition_method( @@ -902,7 +902,7 @@ def __prepare_nowcast_for_zero_radar(self): # step where the fraction of rainy cells is highest (because other lead times # might be zero as well). Else, initialize the noise with the radar # rainfall data - """Initialize noise based on the NWP field time step where the fraction of rainy cells is highest""" + # Initialize noise based on the NWP field time step where the fraction of rainy cells is highest if self.__config.precip_threshold is None: self.__config.precip_threshold = np.nanmin(self.__precip_models) @@ -967,7 +967,7 @@ def __initialize_noise(self): ) if self.__config.measure_time: - __ = self.__measure_time("Initialize noise", starttime) + _ = self.__measure_time("Initialize noise", starttime) else: print("done.") elif self.__config.noise_stddev_adj == "fixed": @@ -1028,7 +1028,7 @@ def __estimate_ar_parameters_radar(self): if self.__config.ar_order == 1: GAMMA = GAMMA[0, :] if self.__config.ar_order > 2: - for repeat_index in range(self.__config.ar_order - 2): + for _ in range(self.__config.ar_order - 2): GAMMA = np.vstack((GAMMA, GAMMA[1, :])) # Finally, transpose GAMMA to ensure that the shape is the same as np.empty((n_cascade_levels, ar_order)) @@ -1279,7 +1279,7 @@ def __decompose_nwp_if_needed_and_fill_nans_in_nwp(self, t): # fill these with the minimum value present in precip (corresponding to # zero rainfall in the radar observations) - """Ensure that the NWP cascade and fields do no contain any nans or infinite number""" + # Ensure that the NWP cascade and fields do no contain any nans or infinite number # Fill nans and infinite numbers with the minimum value present in precip self.__state.precip_models_timestep = self.__precip_models[:, t, :, :].astype( np.float64, copy=False @@ -1419,7 +1419,7 @@ def __find_nowcast_NWP_combination(self, t): def __determine_skill_for_current_timestep(self, t): if t == 0: - """Calculate the initial skill of the (NWP) model forecasts at t=0.""" + # Calculate the initial skill of the (NWP) model forecasts at t=0. self.__params.rho_nwp_models = [] for model_index in range( self.__state.precip_models_cascades_timestep.shape[0] @@ -1949,24 +1949,26 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( ) extrap_kwargs_noise["map_coordinates_mode"] = "wrap" - _, worker_state.previous_displacement[j] = ( - self.__params.extrapolation_method( - None, - velocity_blended, - [t_diff_prev_subtimestep], - allow_nonfinite_values=True, - **extrap_kwargs_, - ) + ( + _, + worker_state.previous_displacement[j], + ) = self.__params.extrapolation_method( + None, + velocity_blended, + [t_diff_prev_subtimestep], + allow_nonfinite_values=True, + **extrap_kwargs_, ) - _, worker_state.previous_displacement_noise_cascade[j] = ( - self.__params.extrapolation_method( - None, - velocity_blended, - [t_diff_prev_subtimestep], - allow_nonfinite_values=True, - **extrap_kwargs_noise, - ) + ( + _, + worker_state.previous_displacement_noise_cascade[j], + ) = self.__params.extrapolation_method( + None, + velocity_blended, + [t_diff_prev_subtimestep], + allow_nonfinite_values=True, + **extrap_kwargs_noise, ) # Also extrapolate the radar observation, used for the probability @@ -1974,14 +1976,15 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( extrap_kwargs_pb["displacement_prev"] = ( worker_state.previous_displacement_prob_matching[j] ) - _, worker_state.previous_displacement_prob_matching[j] = ( - self.__params.extrapolation_method( - None, - velocity_blended, - [t_diff_prev_subtimestep], - allow_nonfinite_values=True, - **extrap_kwargs_pb, - ) + ( + _, + worker_state.previous_displacement_prob_matching[j], + ) = self.__params.extrapolation_method( + None, + velocity_blended, + [t_diff_prev_subtimestep], + allow_nonfinite_values=True, + **extrap_kwargs_pb, ) worker_state.time_prev_timestep[j] = t + 1 @@ -2246,7 +2249,7 @@ def __post_process_output( ) # Perform the blending of radar and model inside the radar domain using a weighted combination - precip_forecast_recomposed = np.nansum( + worker_state.final_blended_forecast_recomposed = np.nansum( [ mask_model * precip_forecast_recomposed_mod_only_no_nan, mask_radar * precip_forecast_recomposed_no_nan, @@ -2302,23 +2305,25 @@ def __post_process_output( >= self.__config.precip_threshold ) # Buffer the mask - # buffer the observation mask Rbin using the kernel kr - # add a grayscale rim r (for smooth rain/no-rain transition) + # Convert the precipitation field mask into an 8-bit unsigned integer mask + obs_mask_uint8 = precip_field_mask.astype("uint8") - # buffer observation mask - precip_field_mask_temp = np.ndarray.astype( - precip_field_mask.copy(), "uint8" - ) - Rd = binary_dilation(precip_field_mask_temp, self.__params.struct) - - # add grayscale rim - kr1 = generate_binary_structure(2, 1) - mask = Rd.astype(float) - for n in range(self.__params.mask_rim): - Rd = binary_dilation(Rd, kr1) - mask += Rd - # normalize between 0 and 1 - precip_field_mask = mask / mask.max() + # Perform an initial binary dilation using the provided structuring element + dilated_mask = binary_dilation(obs_mask_uint8, self.__params.struct) + + # Create a binary structure element for incremental dilations + struct_element = generate_binary_structure(2, 1) + + # Initialize a floating-point mask to accumulate dilations for a smooth transition + accumulated_mask = dilated_mask.astype(float) + + # Iteratively dilate the mask and accumulate the results to create a grayscale rim + for _ in range(self.__params.mask_rim): + dilated_mask = binary_dilation(dilated_mask, struct_element) + accumulated_mask += dilated_mask + + # Normalize the accumulated mask values between 0 and 1 + precip_field_mask = accumulated_mask / np.max(accumulated_mask) # Get the final mask worker_state.final_blended_forecast_recomposed = ( precip_forecast_min_value From 32b656f475f12ba2d781c6c85628c42cab6f5c34 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Mon, 16 Dec 2024 11:35:36 +0100 Subject: [PATCH 52/65] Added aditional tests which currently fail in master branch --- pysteps/blending/steps.py | 2 +- pysteps/tests/test_blending_steps.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index f889276d3..fcf072220 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -701,7 +701,7 @@ def __prepare_radar_and_NWP_fields(self): self.__config.extrapolation_kwargs["xy_coords"] = self.__params.xy_coordinates res = [] - # TODO: create beter names here fore this part, adapted from previous code which is now inlined (old function was called _transform_to_lagrangian) + # TODO: create beter names here for this part, adapted from previous code which is now inlined (old function was called _transform_to_lagrangian) def f(precip, i): return self.__params.extrapolation_method( precip[i, :, :], diff --git a/pysteps/tests/test_blending_steps.py b/pysteps/tests/test_blending_steps.py index 5840279fc..99bd17946 100644 --- a/pysteps/tests/test_blending_steps.py +++ b/pysteps/tests/test_blending_steps.py @@ -28,6 +28,7 @@ (2, 3, 2, 8, "incremental", "cdf", True, "spn", True, 2, False, False, 0, False), # TODO: make next test work! This is currently not working on the main branch # (2, 3, 4, 8, "incremental", "cdf", True, "spn", True, 2, False, False, 0, False), + # (2, 3, 4, 8, "incremental", "cdf", False, "spn", True, 2, False, False, 0, False), (1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 0, False), # Test the case where the radar image contains no rain. (1, 3, 6, 8, None, None, False, "spn", True, 6, True, False, 0, False), From 4fe9f78cbb5bcd2db5c080e01d64317e47b43159 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Mon, 16 Dec 2024 15:03:21 +0100 Subject: [PATCH 53/65] Update .gitignore Co-authored-by: mats-knmi <145579783+mats-knmi@users.noreply.github.com> --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 136c1e46e..c2006db35 100644 --- a/.gitignore +++ b/.gitignore @@ -92,6 +92,6 @@ venv.bak/ # Mac OS Stuff .DS_Store -# Running lcoal tests +# Running local tests /tmp /pysteps/tests/tmp/ From b31d55c3079f3ee0c52a0db0474c16e496747af3 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Mon, 16 Dec 2024 18:23:15 +0100 Subject: [PATCH 54/65] Used the __zero_precip_time in __zero_precipitation_forecast() --- pysteps/blending/steps.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index fcf072220..1eacfa21d 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -828,7 +828,7 @@ def __zero_precipitation_forecast(self): precip_forecast_workers = None if self.__config.measure_time: - zero_precip_time = time.time() - self.__start_time_init + self.__zero_precip_time = time.time() - self.__start_time_init if self.__config.return_output: precip_forecast_all_members_all_times = np.stack( @@ -841,8 +841,8 @@ def __zero_precipitation_forecast(self): if self.__config.measure_time: return ( precip_forecast_all_members_all_times, - zero_precip_time, - zero_precip_time, + self.__zero_precip_time, + self.__zero_precip_time, ) else: return precip_forecast_all_members_all_times From cc025938b29d1ec200c33328d00d9ecb0845ac9b Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Mon, 16 Dec 2024 19:26:22 +0100 Subject: [PATCH 55/65] Changed typing hints to python 3.10+ version --- pysteps/blending/steps.py | 202 ++++++++++++++++++-------------------- 1 file changed, 94 insertions(+), 108 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 1eacfa21d..589e3ddbd 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -65,7 +65,7 @@ DASK_IMPORTED = False from dataclasses import dataclass, field -from typing import Optional, List, Dict, Any, Callable, Union +from typing import Any, Callable # TODO: compare old and new version of the code, run a benchmark to compare the two # TODO: look at the documentation and try to improve it, lots of things are now combined together @@ -73,7 +73,7 @@ @dataclass class StepsBlendingConfig: - precip_threshold: Optional[float] + precip_threshold: float | None norain_threshold: float kmperpixel: float timestep: float @@ -83,133 +83,119 @@ class StepsBlendingConfig: extrapolation_method: str decomposition_method: str bandpass_filter_method: str - noise_method: Optional[str] - noise_stddev_adj: Optional[str] + noise_method: str | None + noise_stddev_adj: str | None ar_order: int - velocity_perturbation_method: Optional[str] + velocity_perturbation_method: str | None weights_method: str conditional: bool - probmatching_method: Optional[str] - mask_method: Optional[str] + probmatching_method: str | None + mask_method: str | None resample_distribution: bool smooth_radar_mask_range: int - seed: Optional[int] + seed: int | None num_workers: int fft_method: str domain: str outdir_path_skill: str - extrapolation_kwargs: Dict[str, Any] = field(default_factory=dict) - filter_kwargs: Dict[str, Any] = field(default_factory=dict) - noise_kwargs: Dict[str, Any] = field(default_factory=dict) - velocity_perturbation_kwargs: Dict[str, Any] = field(default_factory=dict) - climatology_kwargs: Dict[str, Any] = field(default_factory=dict) - mask_kwargs: Dict[str, Any] = field(default_factory=dict) + extrapolation_kwargs: dict[str, Any] = field(default_factory=dict) + filter_kwargs: dict[str, Any] = field(default_factory=dict) + noise_kwargs: dict[str, Any] = field(default_factory=dict) + velocity_perturbation_kwargs: dict[str, Any] = field(default_factory=dict) + climatology_kwargs: dict[str, Any] = field(default_factory=dict) + mask_kwargs: dict[str, Any] = field(default_factory=dict) measure_time: bool = False - callback: Optional[Any] = None + callback: Any | None = None return_output: bool = True @dataclass class StepsBlendingParams: - noise_std_coeffs: Optional[np.ndarray] = ( - None # Noise standard deviation coefficients - ) - bandpass_filter: Optional[Any] = None # Band-pass filter object - fft: Optional[Any] = None # FFT method object - perturbation_generator: Optional[Callable[..., np.ndarray]] = ( - None # Perturbation generator - ) - noise_generator: Optional[Callable[..., np.ndarray]] = None # Noise generator - PHI: Optional[np.ndarray] = None # AR(p) model parameters - extrapolation_method: Optional[Callable[..., Any]] = None - decomposition_method: Optional[Callable[..., dict]] = None - recomposition_method: Optional[Callable[..., np.ndarray]] = None - velocity_perturbations: Optional[Any] = None - generate_velocity_noise: Optional[Callable[[Any, float], np.ndarray]] = None - velocity_perturbations_parallel: Optional[np.ndarray] = None - velocity_perturbations_perpendicular: Optional[np.ndarray] = None - fft_objs: List[Any] = field(default_factory=list) - mask_rim: Optional[int] = None # Rim size for masking - struct: Optional[np.ndarray] = None # Structuring element for mask - time_steps_is_list: bool = False # Time steps is a list - precip_models_provided_is_cascade: bool = False # Precip models are decomposed - xy_coordinates: Optional[np.ndarray] = None - precip_zerovalue: Optional[float] = None - mask_threshold: Optional[np.ndarray] = None + noise_std_coeffs: np.ndarray | None = None + bandpass_filter: Any | None = None + fft: Any | None = None + perturbation_generator: Callable[..., np.ndarray] | None = None + noise_generator: Callable[..., np.ndarray] | None = None + PHI: np.ndarray | None = None + extrapolation_method: Callable[..., Any] | None = None + decomposition_method: Callable[..., dict] | None = None + recomposition_method: Callable[..., np.ndarray] | None = None + velocity_perturbations: Any | None = None + generate_velocity_noise: Callable[[Any, float], np.ndarray] | None = None + velocity_perturbations_parallel: np.ndarray | None = None + velocity_perturbations_perpendicular: np.ndarray | None = None + fft_objs: list[Any] = field(default_factory=list) + mask_rim: int | None = None + struct: np.ndarray | None = None + time_steps_is_list: bool = False + precip_models_provided_is_cascade: bool = False + xy_coordinates: np.ndarray | None = None + precip_zerovalue: float | None = None + mask_threshold: np.ndarray | None = None zero_precip_radar: bool = False zero_precip_model_fields: bool = False - original_timesteps: Optional[Union[list, np.ndarray]] = None - num_ensemble_workers: Optional[int] = None - rho_nwp_models: Optional[np.ndarray] = None - domain_mask: Optional[np.ndarray] = None + original_timesteps: list | np.ndarray | None = None + num_ensemble_workers: int | None = None + rho_nwp_models: np.ndarray | None = None + domain_mask: np.ndarray | None = None @dataclass class StepsBlendingState: - # Radar and noise states - precip_cascades: Optional[np.ndarray] = None - precip_noise_input: Optional[np.ndarray] = None - precip_noise_cascades: Optional[np.ndarray] = None - precip_mean_noise: Optional[np.ndarray] = None - precip_std_noise: Optional[np.ndarray] = None - - # Extrapolation states - mean_extrapolation: Optional[np.ndarray] = None - std_extrapolation: Optional[np.ndarray] = None - rho_extrap_cascade_prev: Optional[np.ndarray] = None - rho_extrap_cascade: Optional[np.ndarray] = None - precip_cascades_prev_subtimestep: Optional[np.ndarray] = None - cascade_noise_prev_subtimestep: Optional[np.ndarray] = None - precip_extrapolated_after_decomp: Optional[np.ndarray] = None - noise_extrapolated_after_decomp: Optional[np.ndarray] = None - precip_extrapolated_probability_matching: Optional[np.ndarray] = None - - # NWP model states - precip_models_cascades: Optional[np.ndarray] = None - precip_models_cascades_timestep: Optional[np.ndarray] = None - precip_models_timestep: Optional[np.ndarray] = None - mean_models_timestep: Optional[np.ndarray] = None - std_models_timestep: Optional[np.ndarray] = None - velocity_models_timestep: Optional[np.ndarray] = None - - # Mapping from NWP members to ensemble members - mapping_list_NWP_member_to_ensemble_member: Optional[np.ndarray] = None - - # Random states for precipitation and motion - randgen_precip: Optional[List[np.random.RandomState]] = None - randgen_motion: Optional[List[np.random.RandomState]] = None - - # Variables for final forecast computation - previous_displacement: Optional[List[Any]] = None - previous_displacement_noise_cascade: Optional[List[Any]] = None - previous_displacement_prob_matching: Optional[List[Any]] = None - rho_final_blended_forecast: Optional[np.ndarray] = None - final_blended_forecast_means: Optional[np.ndarray] = None - final_blended_forecast_stds: Optional[np.ndarray] = None - final_blended_forecast_means_mod_only: Optional[np.ndarray] = None - final_blended_forecast_stds_mod_only: Optional[np.ndarray] = None - final_blended_forecast_cascades: Optional[np.ndarray] = None - final_blended_forecast_cascades_mod_only: Optional[np.ndarray] = None - final_blended_forecast_recomposed: Optional[np.ndarray] = None - final_blended_forecast_recomposed_mod_only: Optional[np.ndarray] = None - - # Final outputs - final_blended_forecast: Optional[np.ndarray] = None - final_blended_forecast_non_perturbed: Optional[np.ndarray] = None - - # Timing and indexing - time_prev_timestep: Optional[List[float]] = None - leadtime_since_start_forecast: Optional[List[float]] = None - subtimesteps: Optional[List[float]] = None - is_nowcast_time_step: Optional[bool] = None - subtimestep_index: Optional[int] = None - - # Weights used for blending - weights: Optional[np.ndarray] = None - weights_model_only: Optional[np.ndarray] = None - - # Is stores here aswell becasue this is changed during the forecast loop and thus not part of the config - extrapolation_kwargs: Dict[str, Any] = field(default_factory=dict) + precip_cascades: np.ndarray | None = None + precip_noise_input: np.ndarray | None = None + precip_noise_cascades: np.ndarray | None = None + precip_mean_noise: np.ndarray | None = None + precip_std_noise: np.ndarray | None = None + + mean_extrapolation: np.ndarray | None = None + std_extrapolation: np.ndarray | None = None + rho_extrap_cascade_prev: np.ndarray | None = None + rho_extrap_cascade: np.ndarray | None = None + precip_cascades_prev_subtimestep: np.ndarray | None = None + cascade_noise_prev_subtimestep: np.ndarray | None = None + precip_extrapolated_after_decomp: np.ndarray | None = None + noise_extrapolated_after_decomp: np.ndarray | None = None + precip_extrapolated_probability_matching: np.ndarray | None = None + + precip_models_cascades: np.ndarray | None = None + precip_models_cascades_timestep: np.ndarray | None = None + precip_models_timestep: np.ndarray | None = None + mean_models_timestep: np.ndarray | None = None + std_models_timestep: np.ndarray | None = None + velocity_models_timestep: np.ndarray | None = None + + mapping_list_NWP_member_to_ensemble_member: np.ndarray | None = None + + randgen_precip: list[np.random.RandomState] | None = None + randgen_motion: list[np.random.RandomState] | None = None + + previous_displacement: list[Any] | None = None + previous_displacement_noise_cascade: list[Any] | None = None + previous_displacement_prob_matching: list[Any] | None = None + rho_final_blended_forecast: np.ndarray | None = None + final_blended_forecast_means: np.ndarray | None = None + final_blended_forecast_stds: np.ndarray | None = None + final_blended_forecast_means_mod_only: np.ndarray | None = None + final_blended_forecast_stds_mod_only: np.ndarray | None = None + final_blended_forecast_cascades: np.ndarray | None = None + final_blended_forecast_cascades_mod_only: np.ndarray | None = None + final_blended_forecast_recomposed: np.ndarray | None = None + final_blended_forecast_recomposed_mod_only: np.ndarray | None = None + + final_blended_forecast: np.ndarray | None = None + final_blended_forecast_non_perturbed: np.ndarray | None = None + + time_prev_timestep: list[float] | None = None + leadtime_since_start_forecast: list[float] | None = None + subtimesteps: list[float] | None = None + is_nowcast_time_step: bool | None = None + subtimestep_index: int | None = None + + weights: np.ndarray | None = None + weights_model_only: np.ndarray | None = None + + extrapolation_kwargs: dict[str, Any] = field(default_factory=dict) class StepsBlendingNowcaster: From 4e4a148dd971163dcaec1b0d292beb8fef71d09f Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Mon, 16 Dec 2024 19:31:20 +0100 Subject: [PATCH 56/65] Added comments back to the State dataclass --- pysteps/blending/steps.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 589e3ddbd..ba285fbb8 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -142,12 +142,14 @@ class StepsBlendingParams: @dataclass class StepsBlendingState: + # Radar and noise states precip_cascades: np.ndarray | None = None precip_noise_input: np.ndarray | None = None precip_noise_cascades: np.ndarray | None = None precip_mean_noise: np.ndarray | None = None precip_std_noise: np.ndarray | None = None + # Extrapolation states mean_extrapolation: np.ndarray | None = None std_extrapolation: np.ndarray | None = None rho_extrap_cascade_prev: np.ndarray | None = None @@ -158,6 +160,7 @@ class StepsBlendingState: noise_extrapolated_after_decomp: np.ndarray | None = None precip_extrapolated_probability_matching: np.ndarray | None = None + # NWP model states precip_models_cascades: np.ndarray | None = None precip_models_cascades_timestep: np.ndarray | None = None precip_models_timestep: np.ndarray | None = None @@ -165,11 +168,14 @@ class StepsBlendingState: std_models_timestep: np.ndarray | None = None velocity_models_timestep: np.ndarray | None = None + # Mapping from NWP members to ensemble members mapping_list_NWP_member_to_ensemble_member: np.ndarray | None = None + # Random states for precipitation and motion randgen_precip: list[np.random.RandomState] | None = None randgen_motion: list[np.random.RandomState] | None = None + # Variables for final forecast computation previous_displacement: list[Any] | None = None previous_displacement_noise_cascade: list[Any] | None = None previous_displacement_prob_matching: list[Any] | None = None @@ -183,18 +189,22 @@ class StepsBlendingState: final_blended_forecast_recomposed: np.ndarray | None = None final_blended_forecast_recomposed_mod_only: np.ndarray | None = None + # Final outputs final_blended_forecast: np.ndarray | None = None final_blended_forecast_non_perturbed: np.ndarray | None = None + # Timing and indexing time_prev_timestep: list[float] | None = None leadtime_since_start_forecast: list[float] | None = None subtimesteps: list[float] | None = None is_nowcast_time_step: bool | None = None subtimestep_index: int | None = None + # Weights used for blending weights: np.ndarray | None = None weights_model_only: np.ndarray | None = None + # This is stores here as well because this is changed during the forecast loop and thus no longer part of the config extrapolation_kwargs: dict[str, Any] = field(default_factory=dict) From 0f4e037912fcddb85f52366b3c7bbfb661c87304 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Tue, 17 Dec 2024 12:46:45 +0100 Subject: [PATCH 57/65] Changed the self.__state.velocity_perturbations = [] to self.__params.velocity_perturbations = [] in __initialize_random_generators --- pysteps/blending/steps.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index ba285fbb8..37976f63e 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -1091,7 +1091,7 @@ def __initialize_random_generators(self): ) = noise.get_method(self.__config.velocity_perturbation_method) # initialize the perturbation generators for the motion field - self.__state.velocity_perturbations = [] + self.__params.velocity_perturbations = [] for j in range(self.__config.n_ens_members): kwargs = { "randstate": self.__state.randgen_motion[j], @@ -1104,7 +1104,7 @@ def __initialize_random_generators(self): self.__config.timestep, **kwargs, ) - self.__state.velocity_perturbations.append(vp_) + self.__params.velocity_perturbations.append(vp_) else: ( self.__params.velocity_perturbations, @@ -2902,7 +2902,6 @@ def forecast( forecast_steps_nowcast = blended_nowcaster.compute_forecast() print(forecast_steps_nowcast) blended_nowcaster.reset_states_and_params() - # Call the appropriate methods within the class return forecast_steps_nowcast From 9f413aa4431f41a53e97bb9c2ac4a70a8aaca12e Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Wed, 18 Dec 2024 23:53:22 +0100 Subject: [PATCH 58/65] Added code changes as suggested by Ruben, comments and documentation to come later --- pysteps/blending/steps.py | 238 ++++++++++++++++++-------------------- 1 file changed, 114 insertions(+), 124 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 37976f63e..14866bc4e 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -339,7 +339,7 @@ def __blended_nowcast_main_loop(self): def worker(j): # The state needs to be copied as a dataclass is not threadsafe in python worker_state = deepcopy(self.__state) - self.__determine_skill_for_next_timestep(t, j, worker_state) + self.__determine_NWP_skill_for_next_timestep(t, j, worker_state) self.__determine_weights_per_component(worker_state) self.__regress_extrapolation_and_noise_cascades(j, worker_state) self.__perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( @@ -697,7 +697,7 @@ def __prepare_radar_and_NWP_fields(self): self.__config.extrapolation_kwargs["xy_coords"] = self.__params.xy_coordinates res = [] - # TODO: create beter names here for this part, adapted from previous code which is now inlined (old function was called _transform_to_lagrangian) + # TODO: create better names here for this part, adapted from previous code which is now inlined (old function was called _transform_to_lagrangian) def f(precip, i): return self.__params.extrapolation_method( precip[i, :, :], @@ -1377,41 +1377,30 @@ def __find_nowcast_NWP_combination(self, t): (n_ens_members_max + i) // n_ens_members_min for i in range(n_ens_members_min) ] - if n_model_members == n_ens_members_min: - self.__state.precip_models_cascades_timestep = np.repeat( - self.__state.precip_models_cascades_timestep, - repeats, - axis=0, - ) - self.__state.mean_models_timestep = np.repeat( - self.__state.mean_models_timestep, repeats, axis=0 - ) - self.__state.std_models_timestep = np.repeat( - self.__state.std_models_timestep, repeats, axis=0 - ) - self.__state.velocity_models_timestep = np.repeat( - self.__state.velocity_models_timestep, repeats, axis=0 - ) - # For the prob. matching - self.__state.precip_models_timestep = np.repeat( - self.__state.precip_models_timestep, repeats, axis=0 - ) - # Finally, for the model indices - self.__state.mapping_list_NWP_member_to_ensemble_member = ( - np.repeat( - self.__state.mapping_list_NWP_member_to_ensemble_member, - repeats, - axis=0, - ) - ) - - # TODO: is this not duplicate from part 2.3.5? If so, is it still needed here? - # If zero_precip_radar is True, set the velocity field equal to the NWP - # velocity field for the current time step (velocity_models_temp). - if self.__params.zero_precip_radar: - # Use the velocity from velocity_models and take the average over - # n_models (axis=0) - self.__velocity = np.mean(self.__state.velocity_models_timestep, axis=0) + self.__state.precip_models_cascades_timestep = np.repeat( + self.__state.precip_models_cascades_timestep, + repeats, + axis=0, + ) + self.__state.mean_models_timestep = np.repeat( + self.__state.mean_models_timestep, repeats, axis=0 + ) + self.__state.std_models_timestep = np.repeat( + self.__state.std_models_timestep, repeats, axis=0 + ) + self.__state.velocity_models_timestep = np.repeat( + self.__state.velocity_models_timestep, repeats, axis=0 + ) + # For the prob. matching + self.__state.precip_models_timestep = np.repeat( + self.__state.precip_models_timestep, repeats, axis=0 + ) + # Finally, for the model indices + self.__state.mapping_list_NWP_member_to_ensemble_member = np.repeat( + self.__state.mapping_list_NWP_member_to_ensemble_member, + repeats, + axis=0, + ) def __determine_skill_for_current_timestep(self, t): if t == 0: @@ -1465,7 +1454,7 @@ def __determine_skill_for_current_timestep(self, t): correlations_prev=self.__state.rho_extrap_cascade_prev, ) - def __determine_skill_for_next_timestep(self, t, j, worker_state): + def __determine_NWP_skill_for_next_timestep(self, t, j, worker_state): # 8.1.2 Determine the skill of the nwp components for lead time (t0 + t) # Then for the model components if self.__config.blend_nwp_members: @@ -1650,7 +1639,7 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( extrap_kwargs_noise = self.__state.extrapolation_kwargs.copy() extrap_kwargs_pb = self.__state.extrapolation_kwargs.copy() velocity_perturbations_extrapolation = self.__velocity - # The following should be accesseble after this function + # The following should be accessible after this function worker_state.precip_extrapolated_decomp = [] worker_state.noise_extrapolated_decomp = [] worker_state.precip_extrapolated_probability_matching = [] @@ -2447,89 +2436,6 @@ def reset_states_and_params(self): self.__mainloop_time = None -def calculate_ratios(correlations): - """Calculate explained variance ratios from correlation. - - Parameters - ---------- - Array of shape [component, scale_level, ...] - containing correlation (skills) for each component (NWP and nowcast), - scale level, and optionally along [y, x] dimensions. - - Returns - ------- - out : numpy array - An array containing the ratios of explain variance for each - component, scale level, ... - """ - # correlations: [component, scale, ...] - square_corrs = np.square(correlations) - # Calculate the ratio of the explained variance to the unexplained - # variance of the nowcast and NWP model components - out = square_corrs / (1 - square_corrs) - # out: [component, scale, ...] - return out - - -def calculate_weights_bps(correlations): - """Calculate BPS blending weights for STEPS blending from correlation. - - Parameters - ---------- - correlations : array-like - Array of shape [component, scale_level, ...] - containing correlation (skills) for each component (NWP and nowcast), - scale level, and optionally along [y, x] dimensions. - - Returns - ------- - weights : array-like - Array of shape [component+1, scale_level, ...] - containing the weights to be used in STEPS blending for - each original component plus an addtional noise component, scale level, - and optionally along [y, x] dimensions. - - References - ---------- - :cite:`BPS2006` - - Notes - ----- - The weights in the BPS method can sum op to more than 1.0. - """ - # correlations: [component, scale, ...] - # Check if the correlations are positive, otherwise rho = 10e-5 - correlations = np.where(correlations < 10e-5, 10e-5, correlations) - - # If we merge more than one component with the noise cascade, we follow - # the weights impolementation in either :cite:`BPS2006` or :cite:`SPN2013`. - if correlations.shape[0] > 1: - # Calculate weights for each source - ratios = calculate_ratios(correlations) - # ratios: [component, scale, ...] - total_ratios = np.sum(ratios, axis=0) - # total_ratios: [scale, ...] - the denominator of eq. 11 & 12 in BPS2006 - weights = correlations * np.sqrt(ratios / total_ratios) - # weights: [component, scale, ...] - # Calculate the weight of the noise component. - # Original BPS2006 method in the following two lines (eq. 13) - total_square_weights = np.sum(np.square(weights), axis=0) - noise_weight = np.sqrt(1.0 - total_square_weights) - # Finally, add the noise_weights to the weights variable. - weights = np.concatenate((weights, noise_weight[None, ...]), axis=0) - - # Otherwise, the weight equals the correlation on that scale level and - # the noise component weight equals 1 - this weight. This only occurs for - # the weights calculation outside the radar domain where in the case of 1 - # NWP model or ensemble member, no blending of multiple models has to take - # place - else: - noise_weight = 1.0 - correlations - weights = np.concatenate((correlations, noise_weight), axis=0) - - return weights - - def forecast( precip, precip_models, @@ -2798,7 +2704,7 @@ def forecast( fmi=Finland, mch=Switzerland, fmi+mch=both pooled into the same data set - The above parameters have been fitten by using run_vel_pert_analysis.py + The above parameters have been fitted by using run_vel_pert_analysis.py and fit_vel_pert_params.py located in the scripts directory. See :py:mod:`pysteps.noise.motion` for additional documentation. @@ -2905,6 +2811,91 @@ def forecast( return forecast_steps_nowcast +# TODO: Where does this piece of code best fit: in utils or inside the class? +def calculate_ratios(correlations): + """Calculate explained variance ratios from correlation. + + Parameters + ---------- + Array of shape [component, scale_level, ...] + containing correlation (skills) for each component (NWP and nowcast), + scale level, and optionally along [y, x] dimensions. + + Returns + ------- + out : numpy array + An array containing the ratios of explain variance for each + component, scale level, ... + """ + # correlations: [component, scale, ...] + square_corrs = np.square(correlations) + # Calculate the ratio of the explained variance to the unexplained + # variance of the nowcast and NWP model components + out = square_corrs / (1 - square_corrs) + # out: [component, scale, ...] + return out + + +# TODO: Where does this piece of code best fit: in utils or inside the class? +def calculate_weights_bps(correlations): + """Calculate BPS blending weights for STEPS blending from correlation. + + Parameters + ---------- + correlations : array-like + Array of shape [component, scale_level, ...] + containing correlation (skills) for each component (NWP and nowcast), + scale level, and optionally along [y, x] dimensions. + + Returns + ------- + weights : array-like + Array of shape [component+1, scale_level, ...] + containing the weights to be used in STEPS blending for + each original component plus an addtional noise component, scale level, + and optionally along [y, x] dimensions. + + References + ---------- + :cite:`BPS2006` + + Notes + ----- + The weights in the BPS method can sum op to more than 1.0. + """ + # correlations: [component, scale, ...] + # Check if the correlations are positive, otherwise rho = 10e-5 + correlations = np.where(correlations < 10e-5, 10e-5, correlations) + + # If we merge more than one component with the noise cascade, we follow + # the weights impolementation in either :cite:`BPS2006` or :cite:`SPN2013`. + if correlations.shape[0] > 1: + # Calculate weights for each source + ratios = calculate_ratios(correlations) + # ratios: [component, scale, ...] + total_ratios = np.sum(ratios, axis=0) + # total_ratios: [scale, ...] - the denominator of eq. 11 & 12 in BPS2006 + weights = correlations * np.sqrt(ratios / total_ratios) + # weights: [component, scale, ...] + # Calculate the weight of the noise component. + # Original BPS2006 method in the following two lines (eq. 13) + total_square_weights = np.sum(np.square(weights), axis=0) + noise_weight = np.sqrt(1.0 - total_square_weights) + # Finally, add the noise_weights to the weights variable. + weights = np.concatenate((weights, noise_weight[None, ...]), axis=0) + + # Otherwise, the weight equals the correlation on that scale level and + # the noise component weight equals 1 - this weight. This only occurs for + # the weights calculation outside the radar domain where in the case of 1 + # NWP model or ensemble member, no blending of multiple models has to take + # place + else: + noise_weight = 1.0 - correlations + weights = np.concatenate((correlations, noise_weight), axis=0) + + return weights + + # TODO: Where does this piece of code best fit: in utils or inside the class? def calculate_weights_spn(correlations, covariance): """Calculate SPN blending weights for STEPS blending from correlation. @@ -3049,6 +3040,5 @@ def blend_means_sigmas(means, sigmas, weights): for i in range(weights.shape[0]): combined_means += (weights[i] / total_weight) * means[i] combined_sigmas += (weights[i] / total_weight) * sigmas[i] - # TODO: substract covariances to weigthed sigmas - still necessary? return combined_means, combined_sigmas From c72d9539871d2aede891784a681933eceda50744 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Thu, 19 Dec 2024 11:47:43 +0100 Subject: [PATCH 59/65] Added frozen functionality to dataclasses, removed reset_state and fixed seed assingments --- pysteps/blending/steps.py | 82 ++++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 36 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 14866bc4e..4a1048e55 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -71,7 +71,7 @@ # TODO: look at the documentation and try to improve it, lots of things are now combined together -@dataclass +@dataclass(frozen=True) class StepsBlendingConfig: precip_threshold: float | None norain_threshold: float @@ -138,6 +138,11 @@ class StepsBlendingParams: num_ensemble_workers: int | None = None rho_nwp_models: np.ndarray | None = None domain_mask: np.ndarray | None = None + filter_kwargs: dict | None = None + noise_kwargs: dict | None = None + velocity_perturbation_kwargs: dict | None = None + climatology_kwargs: dict | None = None + mask_kwargs: dict | None = None @dataclass @@ -306,7 +311,7 @@ def __blended_nowcast_main_loop(self): if self.__config.measure_time: starttime_mainloop = time.time() - self.__state.extrapolation_kwargs = deepcopy(self.__config.extrapolation_kwargs) + # self.__state.extrapolation_kwargs = deepcopy(self.__config.extrapolation_kwargs) self.__state.extrapolation_kwargs["return_displacement"] = True self.__state.precip_cascades_prev_subtimestep = deepcopy( @@ -478,25 +483,43 @@ def __check_inputs(self): ) if self.__config.extrapolation_kwargs is None: - self.__config.extrapolation_kwargs = dict() + self.__state.extrapolation_kwargs = dict() + else: + self.__state.extrapolation_kwargs = deepcopy( + self.__config.extrapolation_kwargs + ) if self.__config.filter_kwargs is None: - self.__config.filter_kwargs = dict() + self.__params.filter_kwargs = dict() + else: + self.__params.filter_kwargs = deepcopy(self.__config.filter_kwargs) if self.__config.noise_kwargs is None: - self.__config.noise_kwargs = dict() + self.__params.noise_kwargs = dict() + else: + self.__params.noise_kwargs = deepcopy(self.__config.noise_kwargs) if self.__config.velocity_perturbation_kwargs is None: - self.__config.velocity_perturbation_kwargs = dict() + self.__params.velocity_perturbation_kwargs = dict() + else: + self.__params.velocity_perturbation_kwargs = deepcopy( + self.__config.velocity_perturbation_kwargs + ) if self.__config.climatology_kwargs is None: # Make sure clim_kwargs at least contains the number of models - self.__config.climatology_kwargs = dict( + self.__params.climatology_kwargs = dict( {"n_models": self.__precip_models.shape[0]} ) + else: + self.__params.climatology_kwargs = deepcopy( + self.__config.climatology_kwargs + ) if self.__config.mask_kwargs is None: - self.__config.mask_kwargs = dict() + self.__params.mask_kwargs = dict() + else: + self.__params.mask_kwargs = deepcopy(self.__config.mask_kwargs) if np.any(~np.isfinite(self.__velocity)): raise ValueError("velocity contains non-finite values") @@ -598,12 +621,12 @@ def __print_forecast_info(self): print(f"order of the AR(p) model: {self.__config.ar_order}") if self.__config.velocity_perturbation_method == "bps": self.__params.velocity_perturbations_parallel = ( - self.__config.velocity_perturbation_kwargs.get( + self.__params.velocity_perturbation_kwargs.get( "p_par", noise.motion.get_default_params_bps_par() ) ) self.__params.velocity_perturbations_perpendicular = ( - self.__config.velocity_perturbation_kwargs.get( + self.__params.velocity_perturbation_kwargs.get( "p_perp", noise.motion.get_default_params_bps_perp() ) ) @@ -645,7 +668,7 @@ def __initialize_nowcast_components(self): self.__params.bandpass_filter = filter_method( (M, N), self.__config.n_cascade_levels, - **(self.__config.filter_kwargs or {}), + **(self.__params.filter_kwargs or {}), ) # Get the decomposition method (e.g., FFT) @@ -694,7 +717,7 @@ def __prepare_radar_and_NWP_fields(self): # Advect the previous precipitation fields to the same position with the # most recent one (i.e. transform them into the Lagrangian coordinates). - self.__config.extrapolation_kwargs["xy_coords"] = self.__params.xy_coordinates + self.__state.extrapolation_kwargs["xy_coords"] = self.__params.xy_coordinates res = [] # TODO: create better names here for this part, adapted from previous code which is now inlined (old function was called _transform_to_lagrangian) @@ -705,7 +728,7 @@ def f(precip, i): self.__config.ar_order - i, "min", allow_nonfinite_values=True, - **self.__config.extrapolation_kwargs.copy(), + **self.__state.extrapolation_kwargs.copy(), )[-1] if not DASK_IMPORTED: @@ -939,7 +962,7 @@ def __initialize_noise(self): self.__params.perturbation_generator = init_noise( self.__state.precip_noise_input, fft_method=self.__params.fft, - **self.__config.noise_kwargs, + **self.__params.noise_kwargs, ) if self.__config.noise_stddev_adj == "auto": @@ -1076,8 +1099,9 @@ def __initialize_random_generators(self): if self.__config.noise_method is not None: self.__state.randgen_precip = [] self.__state.randgen_motion = [] + seed = self.__config.seed for j in range(self.__config.n_ens_members): - rs = np.random.RandomState(self.__config.seed) + rs = np.random.RandomState(seed) self.__state.randgen_precip.append(rs) seed = rs.randint(0, high=1e9) rs = np.random.RandomState(seed) @@ -1129,8 +1153,8 @@ def __prepare_forecast_loop(self): if self.__config.mask_method == "incremental": # get mask parameters - self.__params.mask_rim = self.__config.mask_kwargs.get("mask_rim", 10) - mask_f = self.__config.mask_kwargs.get("mask_f", 1.0) + self.__params.mask_rim = self.__params.mask_kwargs.get("mask_rim", 10) + mask_f = self.__params.mask_kwargs.get("mask_f", 1.0) # initialize the structuring element struct = generate_binary_structure(2, 1) # iterate it to expand it nxn @@ -1440,7 +1464,7 @@ def __determine_skill_for_current_timestep(self, t): current_skill=self.__params.rho_nwp_models, validtime=self.__issuetime, outdir_path=self.__config.outdir_path_skill, - **self.__config.climatology_kwargs, + **self.__params.climatology_kwargs, ) if t > 0: # 8.1.3 Determine the skill of the components for lead time (t0 + t) @@ -1465,7 +1489,7 @@ def __determine_NWP_skill_for_next_timestep(self, t, j, worker_state): correlations=self.__params.rho_nwp_models[model_index], outdir_path=self.__config.outdir_path_skill, n_model=model_index, - skill_kwargs=self.__config.climatology_kwargs, + skill_kwargs=self.__params.climatology_kwargs, ) rho_nwp_forecast.append(rho_value) rho_nwp_forecast = np.stack(rho_nwp_forecast) @@ -1480,7 +1504,7 @@ def __determine_NWP_skill_for_next_timestep(self, t, j, worker_state): correlations=self.__params.rho_nwp_models[j], outdir_path=self.__config.outdir_path_skill, n_model=worker_state.mapping_list_NWP_member_to_ensemble_member[j], - skill_kwargs=self.__config.climatology_kwargs, + skill_kwargs=self.__params.climatology_kwargs, ) # Concatenate rho_extrap_cascade and rho_nwp worker_state.rho_final_blended_forecast = np.concatenate( @@ -2420,21 +2444,6 @@ def __measure_time(self, label, start_time): return elapsed_time return None - def reset_states_and_params(self): - """ - Reset the internal state and parameters of the nowcaster to allow multiple forecasts. - This method resets the state and params to their initial conditions without reinitializing - the inputs like precip, velocity, time_steps, or config. - """ - # Re-initialize the state and parameters - self.__state = StepsBlendingState() - self.__params = StepsBlendingParams() - - # Reset time measurement variables - self.__start_time_init = None - self.__init_time = None - self.__mainloop_time = None - def forecast( precip, @@ -2794,6 +2803,8 @@ def forecast( return_output=return_output, ) + # TODO: add comment about how this class based method is supposed to be used: for each forecast run, a new forecaster needs to be made. The config file can stay the same. + # Create an instance of the new class with all the provided arguments blended_nowcaster = StepsBlendingNowcaster( precip, @@ -2807,7 +2818,6 @@ def forecast( forecast_steps_nowcast = blended_nowcaster.compute_forecast() print(forecast_steps_nowcast) - blended_nowcaster.reset_states_and_params() return forecast_steps_nowcast From 00f057b078609e13af28a812abea269a5396e2dc Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Thu, 19 Dec 2024 12:06:11 +0100 Subject: [PATCH 60/65] Added frozen dataclass to nowcast --- pysteps/blending/steps.py | 1 - pysteps/nowcasts/steps.py | 71 +++++++++++++++++++++++++-------------- 2 files changed, 45 insertions(+), 27 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 4a1048e55..94cce0423 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -311,7 +311,6 @@ def __blended_nowcast_main_loop(self): if self.__config.measure_time: starttime_mainloop = time.time() - # self.__state.extrapolation_kwargs = deepcopy(self.__config.extrapolation_kwargs) self.__state.extrapolation_kwargs["return_displacement"] = True self.__state.precip_cascades_prev_subtimestep = deepcopy( diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index b61ee8e7f..818123da4 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -14,6 +14,7 @@ import numpy as np from scipy.ndimage import generate_binary_structure, iterate_structure import time +from copy import deepcopy from pysteps import cascade from pysteps import extrapolation @@ -35,7 +36,7 @@ DASK_IMPORTED = False -@dataclass +@dataclass(frozen=True) class StepsNowcasterConfig: """ Parameters @@ -247,6 +248,10 @@ class StepsNowcasterParams: xy_coordinates: np.ndarray | None = None velocity_perturbation_parallel: list[float] | None = None velocity_perturbation_perpendicular: list[float] | None = None + filter_kwargs: dict | None = None + noise_kwargs: dict | None = None + velocity_perturbation_kwargs: dict | None = None + mask_kwargs: dict | None = None @dataclass @@ -268,6 +273,7 @@ class StepsNowcasterState: ) velocity_perturbations: list[Callable] | None = field(default_factory=list) fft_objects: list[Any] | None = field(default_factory=list) + extrapolation_kwargs: dict[str, Any] | None = field(default_factory=dict) class StepsNowcaster: @@ -408,7 +414,7 @@ def __nowcast_main(self): self.__time_steps, self.__config.extrapolation_method, self.__update_state, # Reference to the update function - extrap_kwargs=self.__config.extrapolation_kwargs, + extrap_kwargs=self.__state.extrapolation_kwargs, velocity_pert_gen=self.__state.velocity_perturbations, params=params, ensemble=True, @@ -483,15 +489,33 @@ def __check_inputs(self): # Handle None values for various kwargs if self.__config.extrapolation_kwargs is None: - self.__config.extrapolation_kwargs = {} + self.__state.extrapolation_kwargs = dict() + else: + self.__state.extrapolation_kwargs = deepcopy( + self.__config.extrapolation_kwargs + ) + if self.__config.filter_kwargs is None: - self.__config.filter_kwargs = {} + self.__params.filter_kwargs = dict() + else: + self.__params.filter_kwargs = deepcopy(self.__config.filter_kwargs) + if self.__config.noise_kwargs is None: - self.__config.noise_kwargs = {} + self.__params.noise_kwargs = dict() + else: + self.__params.noise_kwargs = deepcopy(self.__config.noise_kwargs) + if self.__config.velocity_perturbation_kwargs is None: - self.__config.velocity_perturbation_kwargs = {} + self.__params.velocity_perturbation_kwargs = dict() + else: + self.__params.velocity_perturbation_kwargs = deepcopy( + self.__config.velocity_perturbation_kwargs + ) + if self.__config.mask_kwargs is None: - self.__config.mask_kwargs = {} + self.__params.mask_kwargs = dict() + else: + self.__params.mask_kwargs = deepcopy(self.__config.mask_kwargs) print("Inputs validated and initialized successfully.") @@ -548,12 +572,12 @@ def __print_forecast_info(self): if self.__config.velocity_perturbation_method == "bps": self.__params.velocity_perturbation_parallel = ( - self.__config.velocity_perturbation_kwargs.get( + self.__params.velocity_perturbation_kwargs.get( "p_par", noise.motion.get_default_params_bps_par() ) ) self.__params.velocity_perturbation_perpendicular = ( - self.__config.velocity_perturbation_kwargs.get( + self.__params.velocity_perturbation_kwargs.get( "p_perp", noise.motion.get_default_params_bps_perp() ) ) @@ -588,7 +612,7 @@ def __initialize_nowcast_components(self): self.__params.bandpass_filter = filter_method( (M, N), self.__config.n_cascade_levels, - **(self.__config.filter_kwargs or {}), + **(self.__params.filter_kwargs or {}), ) # Get the decomposition method (e.g., FFT) @@ -629,7 +653,7 @@ def __perform_extrapolation(self): else: self.__state.mask_threshold = None - extrap_kwargs = self.__config.extrapolation_kwargs.copy() + extrap_kwargs = self.__state.extrapolation_kwargs.copy() extrap_kwargs["xy_coords"] = self.__params.xy_coordinates extrap_kwargs["allow_nonfinite_values"] = ( True if np.any(~np.isfinite(self.__precip)) else False @@ -691,7 +715,7 @@ def __apply_noise_and_ar_model(self): self.__params.perturbation_generator = init_noise( self.__precip, fft_method=self.__params.fft, - **self.__config.noise_kwargs, + **self.__params.noise_kwargs, ) # Handle noise standard deviation adjustments if necessary @@ -831,21 +855,16 @@ def __apply_noise_and_ar_model(self): if self.__config.noise_method is not None: self.__state.random_generator_precip = [] self.__state.random_generator_motion = [] - + seed = self.__config.seed for _ in range(self.__config.n_ens_members): # Create random state for precipitation noise generator - rs = np.random.RandomState(self.__config.seed) + rs = np.random.RandomState(seed) self.__state.random_generator_precip.append(rs) - self.__config.seed = rs.randint( - 0, high=int(1e9) - ) # Update seed after generating - + seed = rs.randint(0, high=int(1e9)) # Create random state for motion perturbations generator - rs = np.random.RandomState(self.__config.seed) + rs = np.random.RandomState(seed) self.__state.random_generator_motion.append(rs) - self.__config.seed = rs.randint( - 0, high=int(1e9) - ) # Update seed after generating + seed = rs.randint(0, high=int(1e9)) else: self.__state.random_generator_precip = None self.__state.random_generator_motion = None @@ -865,10 +884,10 @@ def __initialize_velocity_perturbations(self): for j in range(self.__config.n_ens_members): kwargs = { "randstate": self.__state.random_generator_motion[j], - "p_par": self.__config.velocity_perturbation_kwargs.get( + "p_par": self.__params.velocity_perturbation_kwargs.get( "p_par", self.__params.velocity_perturbation_parallel ), - "p_perp": self.__config.velocity_perturbation_kwargs.get( + "p_perp": self.__params.velocity_perturbation_kwargs.get( "p_perp", self.__params.velocity_perturbation_perpendicular ), } @@ -920,8 +939,8 @@ def __initialize_precipitation_mask(self): elif self.__config.mask_method == "incremental": # Get mask parameters - self.__params.mask_rim = self.__config.mask_kwargs.get("mask_rim", 10) - mask_f = self.__config.mask_kwargs.get("mask_f", 1.0) + self.__params.mask_rim = self.__params.mask_kwargs.get("mask_rim", 10) + mask_f = self.__params.mask_kwargs.get("mask_f", 1.0) # Initialize the structuring element self.__params.structuring_element = generate_binary_structure(2, 1) # Expand the structuring element based on mask factor and timestep From 1b8251283fe9d57b11df5dad348470a7c96b19df Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Thu, 19 Dec 2024 17:21:18 +0100 Subject: [PATCH 61/65] The needed checks are done for this TODO so it can be removed --- pysteps/blending/steps.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 94cce0423..3c8317d84 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -2067,9 +2067,6 @@ def __blend_cascades(self, t_sub, j, worker_state): # First determine the blending weights if method is spn. The # weights for method bps have already been determined. - # TODO: no other weight method is possible, should we not al least give a user warning if a different weight - # method is given? Or does this mean that in all other circumstances the weights - # have been calculated in a different way? if self.__config.weights_method == "spn": worker_state.weights = np.zeros( From 47ab6c3f274e5e333560ae929da20c782bc8d3e6 Mon Sep 17 00:00:00 2001 From: mats-knmi <145579783+mats-knmi@users.noreply.github.com> Date: Thu, 2 Jan 2025 18:05:11 +0100 Subject: [PATCH 62/65] Use the seed in all rng in blending code (#449) * Use seed for all rng to make a test run completely deterministic * fix probmatching test and some copy paste oversights * Add test for vel_pert_method * Change the test so that it actually runs the lines that need to be covered --- pysteps/blending/steps.py | 19 ++++- pysteps/noise/utils.py | 5 +- pysteps/postprocessing/probmatching.py | 4 +- pysteps/tests/test_blending_steps.py | 78 ++++++++++--------- .../tests/test_postprocessing_probmatching.py | 25 +++--- 5 files changed, 74 insertions(+), 57 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 3c8317d84..1c86ee2e6 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -176,9 +176,10 @@ class StepsBlendingState: # Mapping from NWP members to ensemble members mapping_list_NWP_member_to_ensemble_member: np.ndarray | None = None - # Random states for precipitation and motion + # Random states for precipitation, motion and probmatching randgen_precip: list[np.random.RandomState] | None = None randgen_motion: list[np.random.RandomState] | None = None + randgen_probmatching: list[np.random.RandomState] | None = None # Variables for final forecast computation previous_displacement: list[Any] | None = None @@ -1095,19 +1096,28 @@ def __multiply_precip_cascade_to_match_ensemble_members(self): def __initialize_random_generators(self): # 6. Initialize all the random generators and prepare for the forecast loop """Initialize all the random generators.""" + seed = self.__config.seed if self.__config.noise_method is not None: self.__state.randgen_precip = [] - self.__state.randgen_motion = [] - seed = self.__config.seed for j in range(self.__config.n_ens_members): rs = np.random.RandomState(seed) self.__state.randgen_precip.append(rs) seed = rs.randint(0, high=1e9) + + if self.__config.probmatching_method is not None: + self.__state.randgen_probmatching = [] + for j in range(self.__config.n_ens_members): rs = np.random.RandomState(seed) - self.__state.randgen_motion.append(rs) + self.__state.randgen_probmatching.append(rs) seed = rs.randint(0, high=1e9) if self.__config.velocity_perturbation_method is not None: + self.__state.randgen_motion = [] + for j in range(self.__config.n_ens_members): + rs = np.random.RandomState(seed) + self.__state.randgen_motion.append(rs) + seed = rs.randint(0, high=1e9) + ( init_velocity_noise, self.__params.generate_velocity_noise, @@ -2373,6 +2383,7 @@ def __post_process_output( first_array=arr1, second_array=arr2, probability_first_array=weights_probability_matching_normalized[0], + randgen=self.__state.randgen_probmatching[j], ) ) else: diff --git a/pysteps/noise/utils.py b/pysteps/noise/utils.py index 58495b8ad..aaae82f9b 100644 --- a/pysteps/noise/utils.py +++ b/pysteps/noise/utils.py @@ -101,8 +101,9 @@ def compute_noise_stddev_adjs( randstates = [] for k in range(num_iter): - randstates.append(np.random.RandomState(seed=seed)) - seed = np.random.randint(0, high=1e9) + rs = np.random.RandomState(seed=seed) + randstates.append(rs) + seed = rs.randint(0, high=1e9) def worker(k): # generate Gaussian white noise field, filter it using the chosen diff --git a/pysteps/postprocessing/probmatching.py b/pysteps/postprocessing/probmatching.py index afc951542..efda2b103 100644 --- a/pysteps/postprocessing/probmatching.py +++ b/pysteps/postprocessing/probmatching.py @@ -274,7 +274,7 @@ def _get_error(scale): return shift, scale, R.reshape(shape) -def resample_distributions(first_array, second_array, probability_first_array): +def resample_distributions(first_array, second_array, probability_first_array, randgen): """ Merges two distributions (e.g., from the extrapolation nowcast and NWP in the blending module) to effectively combine two distributions for probability matching without losing extremes. @@ -324,7 +324,7 @@ def resample_distributions(first_array, second_array, probability_first_array): n = asort.shape[0] # Resample the distributions - idxsamples = np.random.binomial(1, probability_first_array, n).astype(bool) + idxsamples = randgen.binomial(1, probability_first_array, n).astype(bool) csort = np.where(idxsamples, asort, bsort) csort = np.sort(csort)[::-1] diff --git a/pysteps/tests/test_blending_steps.py b/pysteps/tests/test_blending_steps.py index 99bd17946..4752ea32b 100644 --- a/pysteps/tests/test_blending_steps.py +++ b/pysteps/tests/test_blending_steps.py @@ -8,48 +8,48 @@ import pysteps from pysteps import blending, cascade +# fmt:off steps_arg_values = [ - # Test the case where both the radar image and the NWP fields contain no rain. - (1, 3, 4, 8, None, None, False, "spn", True, 4, False, False, 0, False), - (1, 3, 4, 8, "obs", None, False, "spn", True, 4, False, False, 0, False), - (1, 3, 4, 8, "incremental", None, False, "spn", True, 4, False, False, 0, False), - (1, 3, 4, 8, None, "mean", False, "spn", True, 4, False, False, 0, False), - (1, 3, 4, 8, None, "mean", False, "spn", True, 4, False, False, 0, True), - (1, 3, 4, 8, None, "cdf", False, "spn", True, 4, False, False, 0, False), - (1, [1, 2, 3], 4, 8, None, "cdf", False, "spn", True, 4, False, False, 0, False), - (1, 3, 4, 8, "incremental", "cdf", False, "spn", True, 4, False, False, 0, False), - (1, 3, 4, 6, "incremental", "cdf", False, "bps", True, 4, False, False, 0, False), - (1, 3, 4, 6, "incremental", "cdf", False, "bps", False, 4, False, False, 0, False), - (1, 3, 4, 6, "incremental", "cdf", False, "bps", False, 4, False, False, 0, True), - (1, 3, 4, 9, "incremental", "cdf", False, "spn", True, 4, False, False, 0, False), - (2, 3, 10, 8, "incremental", "cdf", False, "spn", True, 10, False, False, 0, False), - (5, 3, 5, 8, "incremental", "cdf", False, "spn", True, 5, False, False, 0, False), - (1, 10, 1, 8, "incremental", "cdf", False, "spn", True, 1, False, False, 0, False), - (2, 3, 2, 8, "incremental", "cdf", True, "spn", True, 2, False, False, 0, False), - # TODO: make next test work! This is currently not working on the main branch - # (2, 3, 4, 8, "incremental", "cdf", True, "spn", True, 2, False, False, 0, False), - # (2, 3, 4, 8, "incremental", "cdf", False, "spn", True, 2, False, False, 0, False), - (1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 0, False), + (1, 3, 4, 8, None, None, False, "spn", True, 4, False, False, 0, False, None), + (1, 3, 4, 8, "obs", None, False, "spn", True, 4, False, False, 0, False, None), + (1, 3, 4, 8, "incremental", None, False, "spn", True, 4, False, False, 0, False, None), + (1, 3, 4, 8, None, "mean", False, "spn", True, 4, False, False, 0, False, None), + (1, 3, 4, 8, None, "mean", False, "spn", True, 4, False, False, 0, True, None), + (1, 3, 4, 8, None, "cdf", False, "spn", True, 4, False, False, 0, False, None), + (1, [1, 2, 3], 4, 8, None, "cdf", False, "spn", True, 4, False, False, 0, False, None), + (1, 3, 4, 8, "incremental", "cdf", False, "spn", True, 4, False, False, 0, False, None), + (1, 3, 4, 6, "incremental", "cdf", False, "bps", True, 4, False, False, 0, False, None), + (1, 3, 4, 6, "incremental", "cdf", False, "bps", False, 4, False, False, 0, False, None), + (1, 3, 4, 6, "incremental", "cdf", False, "bps", False, 4, False, False, 0, True, None), + (1, 3, 4, 9, "incremental", "cdf", False, "spn", True, 4, False, False, 0, False, None), + (2, 3, 10, 8, "incremental", "cdf", False, "spn", True, 10, False, False, 0, False, None), + (5, 3, 5, 8, "incremental", "cdf", False, "spn", True, 5, False, False, 0, False, None), + (1, 10, 1, 8, "incremental", "cdf", False, "spn", True, 1, False, False, 0, False, None), + (2, 3, 2, 8, "incremental", "cdf", True, "spn", True, 2, False, False, 0, False, None), + (1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 0, False, None), + (1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 0, False, "bps"), # Test the case where the radar image contains no rain. - (1, 3, 6, 8, None, None, False, "spn", True, 6, True, False, 0, False), - (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 0, False), - (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 0, True), + (1, 3, 6, 8, None, None, False, "spn", True, 6, True, False, 0, False, None), + (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 0, False, None), + (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 0, True, None), # Test the case where the NWP fields contain no rain. - (1, 3, 6, 8, None, None, False, "spn", True, 6, False, True, 0, False), - (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, False, True, 0, True), - (1, 3, 6, 8, None, None, False, "spn", True, 6, True, True, 0, False), - (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, True, 0, False), - (5, 3, 5, 6, "obs", "mean", True, "spn", True, 5, True, True, 0, False), + (1, 3, 6, 8, None, None, False, "spn", True, 6, False, True, 0, False, None), + (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, False, True, 0, True, None), + # Test the case where both the radar image and the NWP fields contain no rain. + (1, 3, 6, 8, None, None, False, "spn", True, 6, True, True, 0, False, None), + (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, True, 0, False, None), + (5, 3, 5, 6, "obs", "mean", True, "spn", True, 5, True, True, 0, False, None), # Test for smooth radar mask - (1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 80, False), - (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, False, False, 80, False), - (5, 3, 5, 6, "obs", "mean", False, "spn", False, 5, False, False, 80, False), - (1, 3, 6, 8, None, None, False, "spn", True, 6, False, True, 80, False), - (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 80, True), - (5, 3, 5, 6, "obs", "mean", False, "spn", False, 5, True, True, 80, False), - (5, [1, 2, 3], 5, 6, "obs", "mean", False, "spn", False, 5, True, True, 80, False), - (5, [1, 3], 5, 6, "obs", "mean", False, "spn", False, 5, True, True, 80, False), + (1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 80, False, None), + (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, False, False, 80, False, None), + (5, 3, 5, 6, "obs", "mean", False, "spn", False, 5, False, False, 80, False, None), + (1, 3, 6, 8, None, None, False, "spn", True, 6, False, True, 80, False, None), + (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 80, True, None), + (5, 3, 5, 6, "obs", "mean", False, "spn", False, 5, True, True, 80, False, None), + (5, [1, 2, 3], 5, 6, "obs", "mean", False, "spn", False, 5, True, True, 80, False, None), + (5, [1, 3], 5, 6, "obs", "mean", False, "spn", False, 5, True, True, 80, False, None), ] +# fmt:on steps_arg_names = ( "n_models", @@ -66,6 +66,7 @@ "zero_nwp", "smooth_radar_mask_range", "resample_distribution", + "vel_pert_method", ) @@ -85,6 +86,7 @@ def test_steps_blending( zero_nwp, smooth_radar_mask_range, resample_distribution, + vel_pert_method, ): pytest.importorskip("cv2") @@ -278,7 +280,7 @@ def test_steps_blending( noise_method="nonparametric", noise_stddev_adj="auto", ar_order=2, - vel_pert_method=None, + vel_pert_method=vel_pert_method, weights_method=weights_method, conditional=False, probmatching_method=probmatching_method, diff --git a/pysteps/tests/test_postprocessing_probmatching.py b/pysteps/tests/test_postprocessing_probmatching.py index 8c7c12f4f..b42a95e7e 100644 --- a/pysteps/tests/test_postprocessing_probmatching.py +++ b/pysteps/tests/test_postprocessing_probmatching.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pysteps.postprocessing.probmatching import resample_distributions -from pysteps.postprocessing.probmatching import nonparam_match_empirical_cdf + +from pysteps.postprocessing.probmatching import ( + nonparam_match_empirical_cdf, + resample_distributions, +) class TestResampleDistributions: @@ -16,7 +19,7 @@ def test_valid_inputs(self): second_array = np.array([2, 4, 6, 8, 10]) probability_first_array = 0.6 result = resample_distributions( - first_array, second_array, probability_first_array + first_array, second_array, probability_first_array, np.random ) expected_result = np.array([9, 8, 6, 3, 1]) # Expected result based on the seed assert result.shape == first_array.shape @@ -27,7 +30,7 @@ def test_probability_zero(self): second_array = np.array([2, 4, 6, 8, 10]) probability_first_array = 0.0 result = resample_distributions( - first_array, second_array, probability_first_array + first_array, second_array, probability_first_array, np.random ) assert np.array_equal(result, np.sort(second_array)[::-1]) @@ -36,7 +39,7 @@ def test_probability_one(self): second_array = np.array([2, 4, 6, 8, 10]) probability_first_array = 1.0 result = resample_distributions( - first_array, second_array, probability_first_array + first_array, second_array, probability_first_array, np.random ) assert np.array_equal(result, np.sort(first_array)[::-1]) @@ -45,7 +48,7 @@ def test_nan_in_arr1_prob_1(self): array_without_nan = np.array([2.0, 4, 6, 8, 10]) probability_first_array = 1.0 result = resample_distributions( - array_with_nan, array_without_nan, probability_first_array + array_with_nan, array_without_nan, probability_first_array, np.random ) expected_result = np.array([np.nan, 9, 7, 3, 1], dtype=float) assert np.allclose(result, expected_result, equal_nan=True) @@ -55,7 +58,7 @@ def test_nan_in_arr1_prob_0(self): array_without_nan = np.array([2, 4, 6, 8, 10]) probability_first_array = 0.0 result = resample_distributions( - array_with_nan, array_without_nan, probability_first_array + array_with_nan, array_without_nan, probability_first_array, np.random ) expected_result = np.array([np.nan, 10, 8, 4, 2], dtype=float) assert np.allclose(result, expected_result, equal_nan=True) @@ -65,7 +68,7 @@ def test_nan_in_arr2_prob_1(self): array_with_nan = np.array([2.0, 4, 6, np.nan, 10]) probability_first_array = 1.0 result = resample_distributions( - array_without_nan, array_with_nan, probability_first_array + array_without_nan, array_with_nan, probability_first_array, np.random ) expected_result = np.array([np.nan, 9, 5, 3, 1], dtype=float) assert np.allclose(result, expected_result, equal_nan=True) @@ -75,7 +78,7 @@ def test_nan_in_arr2_prob_0(self): array_with_nan = np.array([2, 4, 6, np.nan, 10]) probability_first_array = 0.0 result = resample_distributions( - array_without_nan, array_with_nan, probability_first_array + array_without_nan, array_with_nan, probability_first_array, np.random ) expected_result = np.array([np.nan, 10, 6, 4, 2], dtype=float) assert np.allclose(result, expected_result, equal_nan=True) @@ -85,7 +88,7 @@ def test_nan_in_both_prob_1(self): array2_with_nan = np.array([2.0, 4, np.nan, np.nan, 10]) probability_first_array = 1.0 result = resample_distributions( - array1_with_nan, array2_with_nan, probability_first_array + array1_with_nan, array2_with_nan, probability_first_array, np.random ) expected_result = np.array([np.nan, np.nan, np.nan, 9, 1], dtype=float) assert np.allclose(result, expected_result, equal_nan=True) @@ -95,7 +98,7 @@ def test_nan_in_both_prob_0(self): array2_with_nan = np.array([2.0, 4, np.nan, np.nan, 10]) probability_first_array = 0.0 result = resample_distributions( - array1_with_nan, array2_with_nan, probability_first_array + array1_with_nan, array2_with_nan, probability_first_array, np.random ) expected_result = np.array([np.nan, np.nan, np.nan, 10, 2], dtype=float) assert np.allclose(result, expected_result, equal_nan=True) From 48187c4a55f03844e78a332e97584b46180efb6d Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Fri, 3 Jan 2025 15:49:22 +0100 Subject: [PATCH 63/65] Removed deepcopy of worker_state. The state is now accessable to all workers at the same time --- pysteps/blending/steps.py | 16 +++++++--------- pysteps/tests/test_blending_steps.py | 3 +++ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 1c86ee2e6..5ec243eae 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -342,24 +342,22 @@ def __blended_nowcast_main_loop(self): ] def worker(j): - # The state needs to be copied as a dataclass is not threadsafe in python - worker_state = deepcopy(self.__state) - self.__determine_NWP_skill_for_next_timestep(t, j, worker_state) - self.__determine_weights_per_component(worker_state) - self.__regress_extrapolation_and_noise_cascades(j, worker_state) + self.__determine_NWP_skill_for_next_timestep(t, j, self.__state) + self.__determine_weights_per_component(self.__state) + self.__regress_extrapolation_and_noise_cascades(j, self.__state) self.__perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( - t, j, worker_state + t, j, self.__state ) # 8.5 Blend the cascades final_blended_forecast_single_member = [] for t_sub in self.__state.subtimesteps: # TODO: does it make sense to use sub time steps - check if it works? if t_sub > 0: - self.__blend_cascades(t_sub, j, worker_state) - self.__recompose_cascade_to_rainfall_field(j, worker_state) + self.__blend_cascades(t_sub, j, self.__state) + self.__recompose_cascade_to_rainfall_field(j, self.__state) final_blended_forecast_single_member = ( self.__post_process_output( - j, final_blended_forecast_single_member, worker_state + j, final_blended_forecast_single_member, self.__state ) ) final_blended_forecast_all_members_one_timestep[j] = ( diff --git a/pysteps/tests/test_blending_steps.py b/pysteps/tests/test_blending_steps.py index 4752ea32b..77f975c75 100644 --- a/pysteps/tests/test_blending_steps.py +++ b/pysteps/tests/test_blending_steps.py @@ -28,6 +28,9 @@ (2, 3, 2, 8, "incremental", "cdf", True, "spn", True, 2, False, False, 0, False, None), (1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 0, False, None), (1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 0, False, "bps"), + # TODO: make next test work! This is currently not working on the main branch + # (2, 3, 4, 8, "incremental", "cdf", True, "spn", True, 2, False, False, 0, False), + # (2, 3, 4, 8, "incremental", "cdf", False, "spn", True, 2, False, False, 0, False), # Test the case where the radar image contains no rain. (1, 3, 6, 8, None, None, False, "spn", True, 6, True, False, 0, False, None), (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 0, False, None), From 9b216a746f29e5e54675e4dd9b0e59afec6e3da1 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Wed, 8 Jan 2025 14:20:06 +0100 Subject: [PATCH 64/65] Update to probmatching comments to keep in track with main --- pysteps/postprocessing/probmatching.py | 9 +++++++-- .../tests/test_postprocessing_probmatching.py | 18 +++++++++--------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/pysteps/postprocessing/probmatching.py b/pysteps/postprocessing/probmatching.py index efda2b103..ba493c51a 100644 --- a/pysteps/postprocessing/probmatching.py +++ b/pysteps/postprocessing/probmatching.py @@ -274,7 +274,9 @@ def _get_error(scale): return shift, scale, R.reshape(shape) -def resample_distributions(first_array, second_array, probability_first_array, randgen): +def resample_distributions( + first_array, second_array, probability_first_array, randgen=np.random +): """ Merges two distributions (e.g., from the extrapolation nowcast and NWP in the blending module) to effectively combine two distributions for probability matching without losing extremes. @@ -287,10 +289,13 @@ def resample_distributions(first_array, second_array, probability_first_array, r cascade). It must be of the same shape as `second_array`. Input must not contain NaNs. second_array: array_like One of the two arrays from which the distribution should be sampled (e.g., the NWP (model) - cascade). It must be of the same shape as `first_array`.. Input must not contain NaNs. + cascade). It must be of the same shape as `first_array`. Input must not contain NaNs. probability_first_array: float The weight that `first_array` should get (a value between 0 and 1). This determines the likelihood of selecting elements from `first_array` over `second_array`. + randgen: numpy.random or numpy.RandomState + The random number generator to be used for the binomial distribution. You can pass a seeded + random state here for reproducibility. Default is numpy.random. Returns ------- diff --git a/pysteps/tests/test_postprocessing_probmatching.py b/pysteps/tests/test_postprocessing_probmatching.py index b42a95e7e..c9da81530 100644 --- a/pysteps/tests/test_postprocessing_probmatching.py +++ b/pysteps/tests/test_postprocessing_probmatching.py @@ -19,7 +19,7 @@ def test_valid_inputs(self): second_array = np.array([2, 4, 6, 8, 10]) probability_first_array = 0.6 result = resample_distributions( - first_array, second_array, probability_first_array, np.random + first_array, second_array, probability_first_array ) expected_result = np.array([9, 8, 6, 3, 1]) # Expected result based on the seed assert result.shape == first_array.shape @@ -30,7 +30,7 @@ def test_probability_zero(self): second_array = np.array([2, 4, 6, 8, 10]) probability_first_array = 0.0 result = resample_distributions( - first_array, second_array, probability_first_array, np.random + first_array, second_array, probability_first_array ) assert np.array_equal(result, np.sort(second_array)[::-1]) @@ -39,7 +39,7 @@ def test_probability_one(self): second_array = np.array([2, 4, 6, 8, 10]) probability_first_array = 1.0 result = resample_distributions( - first_array, second_array, probability_first_array, np.random + first_array, second_array, probability_first_array ) assert np.array_equal(result, np.sort(first_array)[::-1]) @@ -48,7 +48,7 @@ def test_nan_in_arr1_prob_1(self): array_without_nan = np.array([2.0, 4, 6, 8, 10]) probability_first_array = 1.0 result = resample_distributions( - array_with_nan, array_without_nan, probability_first_array, np.random + array_with_nan, array_without_nan, probability_first_array ) expected_result = np.array([np.nan, 9, 7, 3, 1], dtype=float) assert np.allclose(result, expected_result, equal_nan=True) @@ -58,7 +58,7 @@ def test_nan_in_arr1_prob_0(self): array_without_nan = np.array([2, 4, 6, 8, 10]) probability_first_array = 0.0 result = resample_distributions( - array_with_nan, array_without_nan, probability_first_array, np.random + array_with_nan, array_without_nan, probability_first_array ) expected_result = np.array([np.nan, 10, 8, 4, 2], dtype=float) assert np.allclose(result, expected_result, equal_nan=True) @@ -68,7 +68,7 @@ def test_nan_in_arr2_prob_1(self): array_with_nan = np.array([2.0, 4, 6, np.nan, 10]) probability_first_array = 1.0 result = resample_distributions( - array_without_nan, array_with_nan, probability_first_array, np.random + array_without_nan, array_with_nan, probability_first_array ) expected_result = np.array([np.nan, 9, 5, 3, 1], dtype=float) assert np.allclose(result, expected_result, equal_nan=True) @@ -78,7 +78,7 @@ def test_nan_in_arr2_prob_0(self): array_with_nan = np.array([2, 4, 6, np.nan, 10]) probability_first_array = 0.0 result = resample_distributions( - array_without_nan, array_with_nan, probability_first_array, np.random + array_without_nan, array_with_nan, probability_first_array ) expected_result = np.array([np.nan, 10, 6, 4, 2], dtype=float) assert np.allclose(result, expected_result, equal_nan=True) @@ -88,7 +88,7 @@ def test_nan_in_both_prob_1(self): array2_with_nan = np.array([2.0, 4, np.nan, np.nan, 10]) probability_first_array = 1.0 result = resample_distributions( - array1_with_nan, array2_with_nan, probability_first_array, np.random + array1_with_nan, array2_with_nan, probability_first_array ) expected_result = np.array([np.nan, np.nan, np.nan, 9, 1], dtype=float) assert np.allclose(result, expected_result, equal_nan=True) @@ -98,7 +98,7 @@ def test_nan_in_both_prob_0(self): array2_with_nan = np.array([2.0, 4, np.nan, np.nan, 10]) probability_first_array = 0.0 result = resample_distributions( - array1_with_nan, array2_with_nan, probability_first_array, np.random + array1_with_nan, array2_with_nan, probability_first_array ) expected_result = np.array([np.nan, np.nan, np.nan, 10, 2], dtype=float) assert np.allclose(result, expected_result, equal_nan=True) From 561e7aca63e1eab39bb2900f88c005b5c927bd27 Mon Sep 17 00:00:00 2001 From: Simon De Kock Date: Mon, 20 Jan 2025 17:06:46 +0100 Subject: [PATCH 65/65] Fix for multithreading issue, this produces exactly the same results as the master --- pysteps/blending/steps.py | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 5ec243eae..b2dc6843b 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -44,7 +44,7 @@ """ import math import time -from copy import deepcopy +from copy import copy, deepcopy from functools import partial from multiprocessing.pool import ThreadPool @@ -342,22 +342,23 @@ def __blended_nowcast_main_loop(self): ] def worker(j): - self.__determine_NWP_skill_for_next_timestep(t, j, self.__state) - self.__determine_weights_per_component(self.__state) - self.__regress_extrapolation_and_noise_cascades(j, self.__state) + worker_state = copy(self.__state) + self.__determine_NWP_skill_for_next_timestep(t, j, worker_state) + self.__determine_weights_per_component(worker_state) + self.__regress_extrapolation_and_noise_cascades(j, worker_state) self.__perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( - t, j, self.__state + t, j, worker_state ) # 8.5 Blend the cascades final_blended_forecast_single_member = [] for t_sub in self.__state.subtimesteps: # TODO: does it make sense to use sub time steps - check if it works? if t_sub > 0: - self.__blend_cascades(t_sub, j, self.__state) - self.__recompose_cascade_to_rainfall_field(j, self.__state) + self.__blend_cascades(t_sub, j, worker_state) + self.__recompose_cascade_to_rainfall_field(j, worker_state) final_blended_forecast_single_member = ( self.__post_process_output( - j, final_blended_forecast_single_member, self.__state + j, final_blended_forecast_single_member, worker_state ) ) final_blended_forecast_all_members_one_timestep[j] = ( @@ -1666,9 +1667,9 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( # (or subtimesteps if non-integer time steps are given) # Settings and initialize the output - extrap_kwargs_ = self.__state.extrapolation_kwargs.copy() - extrap_kwargs_noise = self.__state.extrapolation_kwargs.copy() - extrap_kwargs_pb = self.__state.extrapolation_kwargs.copy() + extrap_kwargs_ = worker_state.extrapolation_kwargs.copy() + extrap_kwargs_noise = worker_state.extrapolation_kwargs.copy() + extrap_kwargs_pb = worker_state.extrapolation_kwargs.copy() velocity_perturbations_extrapolation = self.__velocity # The following should be accessible after this function worker_state.precip_extrapolated_decomp = [] @@ -1777,7 +1778,7 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( ) # Put back the mask precip_forecast_recomp_subtimestep[self.__params.domain_mask] = np.nan - self.__state.extrapolation_kwargs["displacement_prev"] = ( + worker_state.extrapolation_kwargs["displacement_prev"] = ( worker_state.previous_displacement[j] ) ( @@ -1788,7 +1789,7 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( velocity_blended, [t_diff_prev_subtimestep], allow_nonfinite_values=True, - **self.__state.extrapolation_kwargs, + **worker_state.extrapolation_kwargs, ) precip_extrapolated_recomp_subtimestep = ( precip_forecast_extrapolated_recomp_subtimestep_temp[0].copy() @@ -1883,6 +1884,7 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( precip_forecast_temp_for_probability_matching[ self.__params.domain_mask ] = np.nan + ( precip_forecast_extrapolated_probability_matching_temp, worker_state.previous_displacement_prob_matching[j], @@ -1893,6 +1895,7 @@ def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( allow_nonfinite_values=True, **extrap_kwargs_pb, ) + worker_state.precip_extrapolated_probability_matching.append( precip_forecast_extrapolated_probability_matching_temp[0] ) @@ -2381,7 +2384,7 @@ def __post_process_output( first_array=arr1, second_array=arr2, probability_first_array=weights_probability_matching_normalized[0], - randgen=self.__state.randgen_probmatching[j], + randgen=worker_state.randgen_probmatching[j], ) ) else: @@ -2822,7 +2825,6 @@ def forecast( ) forecast_steps_nowcast = blended_nowcaster.compute_forecast() - print(forecast_steps_nowcast) return forecast_steps_nowcast