From e06027ae71134e2ae98c502d2104a8e233feacc6 Mon Sep 17 00:00:00 2001 From: Sam Levang Date: Fri, 29 Jan 2021 19:18:36 -0500 Subject: [PATCH 01/10] Basic curvefit implementation --- xarray/core/dataarray.py | 74 +++++++++++++++++++++ xarray/core/dataset.py | 136 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 210 insertions(+) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 2fef3edbc43..77cef25a5be 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4285,6 +4285,80 @@ def argmax( else: return self._replace_maybe_drop_dims(result) + def curvefit( + self, + x: "DataArray", + dim: Union[Hashable, Iterable[Hashable]], + func: Callable[..., Any], + skipna: bool = True, + cov: bool = False, + kwargs: Mapping[str, Any] = {}, + ): + """ + Curve fitting optimization for arbitrary functions. + + Wraps `scipy.optimize.curve_fit` with `apply_ufunc`. + + Parameters + ---------- + x : DataArray + x coordinate over which to perform the curve fitting. Must share at least + one dimension with the calling object. + dim : str or sequence of str + Dimension(s) over which to aggregate during curve fitting. + func : callable + User specified function in the form `f(x, *params)` which returns a numpy + array of length x. `params` are the fittable parameters which are optimized + by scipy curve_fit. + skipna : bool, optional + Whether to skip missing values when fitting. Default is True. + cov : bool or str, optional + Whether to return the covariance matrix in addition to the coefficients. + kwargs : mapping + Additional keyword arguments to pass to scipy curve_fit. + + Returns + ------- + curvefit_results : Dataset + A single dataset which contains: + + [var]_curvefit_coefficients + The coefficients of the best fit. + [var]_curvefit_covariance + The covariance matrix of the coefficient estimates (only included if + `cov=True`) + + See also + -------- + DataArray.polyfit + scipy.optimize.curve_fit + + Examples + -------- + + Fit a linear trend at every lat/lon grid point. + + >>> def linear(x, m, b): + ... return m*x + b + ... + >>> ds = xr.tutorial.open_dataset('air_temperature') + >>> ds.curvefit(x=ds.time, dim='time', func=linear) + + Dimensions: (cov_i: 2, cov_j: 2, lat: 25, lon: 53, param: 2) + Coordinates: + * lat (lat) float32 75.0 72.5 70.0 ... 20.0 17.5 15.0 + * lon (lon) float32 200.0 202.5 205.0 ... 327.5 330.0 + * param (param) >> def linear(x, m, b): + ... return m*x + b + ... + >>> ds = xr.tutorial.open_dataset('air_temperature') + >>> ds.curvefit(x=ds.time, dim='time', func=linear) + + Dimensions: (cov_i: 2, cov_j: 2, lat: 25, lon: 53, param: 2) + Coordinates: + * lat (lat) float32 75.0 72.5 70.0 ... 20.0 17.5 15.0 + * lon (lon) float32 200.0 202.5 205.0 ... 327.5 330.0 + * param (param) curvefit was called on a DataArray + name = "" + + popt, pcov = xr.apply_ufunc(_wrapper, x, da, + vectorize=True, + dask="parallelized", + input_core_dims=[dims, dims], + output_core_dims=[["param"], ["cov_i", "cov_j"]], + dask_gufunc_kwargs={ + "output_sizes":{"param":n_params, + "cov_i":n_params, + "cov_j":n_params}, + }, + output_dtypes=(np.float64, np.float64), + exclude_dims=set(dims), + kwargs=kwargs, + ) + result[name + "curvefit_coefficients"] = popt + result[name + "curvefit_covariance"] = pcov + cov_fields.append(name + "curvefit_covariance") + + result = result.assign_coords({"param":params, "cov_i":params, "cov_j":params}) + result = result.transpose("param", "cov_i", "cov_j", ...) + + if n_params==1: + result = result.squeeze(["param", "cov_i", "cov_j"]) + + if not cov: + result = result.drop(cov_fields) + + return result ops.inject_all_ops_and_reduce_methods(Dataset, array_only=False) From 2de083d2323be7a522cab954982545eadc9c5ac7 Mon Sep 17 00:00:00 2001 From: Sam Levang Date: Sun, 31 Jan 2021 19:49:10 -0500 Subject: [PATCH 02/10] logic for p0 and bounds --- doc/api.rst | 3 +- xarray/core/dataarray.py | 36 ++++++++----- xarray/core/dataset.py | 108 ++++++++++++++++++++++++++++++--------- 3 files changed, 108 insertions(+), 39 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 9cb02441d37..3329ba0531e 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -178,6 +178,7 @@ Computation Dataset.integrate Dataset.map_blocks Dataset.polyfit + Dataset.curvefit **Aggregation**: :py:attr:`~Dataset.all` @@ -371,7 +372,7 @@ Computation DataArray.integrate DataArray.polyfit DataArray.map_blocks - + DataArray.curvefit **Aggregation**: :py:attr:`~DataArray.all` diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 77cef25a5be..99395f948dd 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4289,10 +4289,12 @@ def curvefit( self, x: "DataArray", dim: Union[Hashable, Iterable[Hashable]], - func: Callable[..., Any], + func: Callable[..., Any], skipna: bool = True, cov: bool = False, - kwargs: Mapping[str, Any] = {}, + p0: Dict[str, Any] = None, + bounds: Dict[str, Any] = None, + kwargs: Dict[str, Any] = None, ): """ Curve fitting optimization for arbitrary functions. @@ -4302,20 +4304,28 @@ def curvefit( Parameters ---------- x : DataArray - x coordinate over which to perform the curve fitting. Must share at least + x coordinate over which to perform the curve fitting. Must share at least one dimension with the calling object. dim : str or sequence of str - Dimension(s) over which to aggregate during curve fitting. + Dimension(s) over which to fit. func : callable - User specified function in the form `f(x, *params)` which returns a numpy - array of length x. `params` are the fittable parameters which are optimized + User specified function in the form `f(x, *params)` which returns a numpy + array of length x. `params` are the fittable parameters which are optimized by scipy curve_fit. skipna : bool, optional Whether to skip missing values when fitting. Default is True. cov : bool or str, optional Whether to return the covariance matrix in addition to the coefficients. - kwargs : mapping - Additional keyword arguments to pass to scipy curve_fit. + p0 : dictionary + Optional dictionary of parameter names to initial guesses passed to the + `curve_fit` `p0` arg. If none or only some parameters are passed, the rest will + be assigned initial values following the default scipy behavior. + bounds : dictionary + Optional dictionary of parameter names to initial guesses passed to the + `curve_fit` `bounds` arg. If none or only some parameters are passed, the rest + will be unbounded following the default scipy behavior. + kwargs : dictionary + Additional keyword arguments to passed to scipy curve_fit. Returns ------- @@ -4325,7 +4335,7 @@ def curvefit( [var]_curvefit_coefficients The coefficients of the best fit. [var]_curvefit_covariance - The covariance matrix of the coefficient estimates (only included if + The covariance matrix of the coefficient estimates (only included if `cov=True`) See also @@ -4339,10 +4349,10 @@ def curvefit( Fit a linear trend at every lat/lon grid point. >>> def linear(x, m, b): - ... return m*x + b + ... return m * x + b ... - >>> ds = xr.tutorial.open_dataset('air_temperature') - >>> ds.curvefit(x=ds.time, dim='time', func=linear) + >>> ds = xr.tutorial.open_dataset("air_temperature") + >>> ds.curvefit(x=ds.time, dim="time", func=linear) Dimensions: (cov_i: 2, cov_j: 2, lat: 25, lon: 53, param: 2) Coordinates: @@ -4356,7 +4366,7 @@ def curvefit( air_curvefit_covariance (cov_i, cov_j, lat, lon) float64 1.379e-34 ...... """ return self._to_temp_dataset().curvefit( - x, dim, func, skipna=skipna, cov=cov, kwargs=kwargs + x, dim, func, skipna=skipna, cov=cov, p0=p0, bounds=bounds, kwargs=kwargs ) # this needs to be at the end, or mypy will confuse with `str` diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 982b2ab2cd0..2da292dd87e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1,6 +1,7 @@ import copy import datetime import functools +import inspect import sys import warnings from collections import defaultdict @@ -9,7 +10,6 @@ from numbers import Number from operator import methodcaller from pathlib import Path -from inspect import getfullargspec from typing import ( TYPE_CHECKING, Any, @@ -7008,12 +7008,14 @@ def argmax(self, dim=None, axis=None, **kwargs): def curvefit( self, - x: "Dataset", + x: "DataArray", dim: Union[Hashable, Iterable[Hashable]], - func: Callable[..., Any], + func: Callable[..., Any], skipna: bool = True, cov: bool = False, - kwargs: Mapping[str, Any] = None, + p0: Dict[str, Any] = None, + bounds: Dict[str, Any] = None, + kwargs: Dict[str, Any] = None, ): """ Curve fitting optimization for arbitrary functions. @@ -7023,20 +7025,28 @@ def curvefit( Parameters ---------- x : DataArray - x coordinate over which to perform the curve fitting. Must share at least + x coordinate over which to perform the curve fitting. Must share at least one dimension with the calling object. dim : str or sequence of str - Dimension(s) over which to aggregate during curve fitting. + Dimension(s) over which to fit. func : callable - User specified function in the form `f(x, *params)` which returns a numpy - array of length x. `params` are the fittable parameters which are optimized + User specified function in the form `f(x, *params)` which returns a numpy + array of length x. `params` are the fittable parameters which are optimized by scipy curve_fit. skipna : bool, optional Whether to skip missing values when fitting. Default is True. cov : bool or str, optional Whether to return the covariance matrix in addition to the coefficients. - kwargs : mapping - Additional keyword arguments to pass to scipy curve_fit. + p0 : dictionary + Optional dictionary of parameter names to initial guesses passed to the + `curve_fit` `p0` arg. If none or only some parameters are passed, the rest will + be assigned initial values following the default scipy behavior. + bounds : dictionary + Optional dictionary of parameter names to initial guesses passed to the + `curve_fit` `bounds` arg. If none or only some parameters are passed, the rest + will be unbounded following the default scipy behavior. + kwargs : dictionary + Additional keyword arguments to passed to scipy curve_fit. Returns ------- @@ -7046,7 +7056,7 @@ def curvefit( [var]_curvefit_coefficients The coefficients of the best fit. [var]_curvefit_covariance - The covariance matrix of the coefficient estimates (only included if + The covariance matrix of the coefficient estimates (only included if `cov=True`) See also @@ -7060,10 +7070,10 @@ def curvefit( Fit a linear trend at every lat/lon grid point. >>> def linear(x, m, b): - ... return m*x + b + ... return m * x + b ... - >>> ds = xr.tutorial.open_dataset('air_temperature') - >>> ds.curvefit(x=ds.time, dim='time', func=linear) + >>> ds = xr.tutorial.open_dataset("air_temperature") + >>> ds.curvefit(x=ds.time, dim="time", func=linear) Dimensions: (cov_i: 2, cov_j: 2, lat: 25, lon: 53, param: 2) Coordinates: @@ -7076,9 +7086,14 @@ def curvefit( air_curvefit_coefficients (param, lat, lon) float64 1.183e-16 ... 260.9 air_curvefit_covariance (cov_i, cov_j, lat, lon) float64 1.379e-34 ...... """ - from .computation import apply_ufunc from scipy.optimize import curve_fit + from .computation import apply_ufunc + + if p0 is None: + p0 = {} + if bounds is None: + bounds = {} if kwargs is None: kwargs = {} @@ -7087,10 +7102,45 @@ def curvefit( else: dims = list(dim) - params = getfullargspec(func).args[1:] + def _initialize_feasible(lb, ub): + # Mimics functionality of scipy.optimize.minpack._initialize_feasible + lb_finite = np.isfinite(lb) + ub_finite = np.isfinite(ub) + p0 = ( + 0.5 * (lb + ub) * (lb_finite & ub_finite) + + (lb + 1) * (lb_finite & ~ub_finite) + + (ub - 1) * (~lb_finite & ub_finite) + ) + return p0 + + # Get p0 and bounds + # Priority: 1) passed args 2) func signature 3) scipy defaults + func_args = inspect.signature(func).parameters + params = list(func_args)[1:] n_params = len(params) + param_defaults = {p: 1 for p in params} + bounds_defaults = {p: (-np.inf, np.inf) for p in params} + for p in params: + if func_args[p].default is not func_args[p].empty: + param_defaults[p] = func_args[p].default + if p in bounds: + bounds_defaults[p] = bounds[p] + if param_defaults[p] < bounds[p][0] or param_defaults[p] > bounds[p][1]: + param_defaults[p] = _initialize_feasible(bounds[p][0], bounds[p][1]) + if p in p0: + param_defaults[p] = p0[p] + kwargs.setdefault("p0", [param_defaults[p] for p in params]) + kwargs.setdefault( + "bounds", + [ + [bounds_defaults[p][0] for p in params], + [bounds_defaults[p][1] for p in params], + ], + ) + def _wrapper(x, y, **kwargs): + # Wrap curve_fit with pointwise handling of NaNs if skipna: # If some points are missing, fit with what is there mask = np.all([~np.isnan(x), ~np.isnan(y)], axis=0) @@ -7100,7 +7150,7 @@ def _wrapper(x, y, **kwargs): # If all points are missing, return NaN popt = np.full([n_params], np.nan) pcov = np.full([n_params, n_params], np.nan) - return popt, pcov + return popt, pcov popt, pcov = curve_fit(func, x, y, **kwargs) return popt, pcov @@ -7108,20 +7158,24 @@ def _wrapper(x, y, **kwargs): cov_fields = ["cov_i", "cov_j"] for name, da in self.data_vars.items(): if isinstance(name, str): - name = "{}_".format(name) + name = f"{name}_" else: - # Thus a ReprObject => curvefit was called on a DataArray name = "" - popt, pcov = xr.apply_ufunc(_wrapper, x, da, + popt, pcov = apply_ufunc( + _wrapper, + x, + da, vectorize=True, dask="parallelized", input_core_dims=[dims, dims], output_core_dims=[["param"], ["cov_i", "cov_j"]], dask_gufunc_kwargs={ - "output_sizes":{"param":n_params, - "cov_i":n_params, - "cov_j":n_params}, + "output_sizes": { + "param": n_params, + "cov_i": n_params, + "cov_j": n_params, + }, }, output_dtypes=(np.float64, np.float64), exclude_dims=set(dims), @@ -7131,10 +7185,13 @@ def _wrapper(x, y, **kwargs): result[name + "curvefit_covariance"] = pcov cov_fields.append(name + "curvefit_covariance") - result = result.assign_coords({"param":params, "cov_i":params, "cov_j":params}) + result = result.assign_coords( + {"param": params, "cov_i": params, "cov_j": params} + ) result = result.transpose("param", "cov_i", "cov_j", ...) + result.attrs = self.attrs.copy() - if n_params==1: + if n_params == 1: result = result.squeeze(["param", "cov_i", "cov_j"]) if not cov: @@ -7142,4 +7199,5 @@ def _wrapper(x, y, **kwargs): return result + ops.inject_all_ops_and_reduce_methods(Dataset, array_only=False) From e5456f8e6434dd83fd057d5312dc93e7938a0c65 Mon Sep 17 00:00:00 2001 From: Sam Levang Date: Mon, 1 Feb 2021 21:07:27 -0500 Subject: [PATCH 03/10] multi-dimensional fits with coordinate raveling --- xarray/core/dataarray.py | 51 +++++++++------------ xarray/core/dataset.py | 98 +++++++++++++++++++++++----------------- 2 files changed, 77 insertions(+), 72 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 99395f948dd..8dbd866eba9 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4287,9 +4287,9 @@ def argmax( def curvefit( self, - x: "DataArray", - dim: Union[Hashable, Iterable[Hashable]], + coords: Union["DataArray", Iterable["DataArray"]], func: Callable[..., Any], + reduce_dim: Union[Hashable, Iterable[Hashable]] = None, skipna: bool = True, cov: bool = False, p0: Dict[str, Any] = None, @@ -4303,15 +4303,21 @@ def curvefit( Parameters ---------- - x : DataArray - x coordinate over which to perform the curve fitting. Must share at least - one dimension with the calling object. - dim : str or sequence of str - Dimension(s) over which to fit. + coords : DataArray or sequence of DataArray + Independent coordinate(s) over which to perform the curve fitting. Must share + at least one dimension with the calling object. When fitting multi-dimensional + functions, supply `coords` as a list in the same order as arguments in `func`. + To fit along existing dimensions of the calling object, `coords` can also be + specified as a str or sequence of strs. func : callable User specified function in the form `f(x, *params)` which returns a numpy array of length x. `params` are the fittable parameters which are optimized by scipy curve_fit. + reduce_dim : str or sequence of str + Additional dimension(s) over which to aggregate while fitting. For example, + calling `ds.curvefit(coords='time', reduce_dims=['lat', 'lon'], ...)` will + aggregate all lat and lon points and fit the specified function along the + time dimension. skipna : bool, optional Whether to skip missing values when fitting. Default is True. cov : bool or str, optional @@ -4342,31 +4348,16 @@ def curvefit( -------- DataArray.polyfit scipy.optimize.curve_fit - - Examples - -------- - - Fit a linear trend at every lat/lon grid point. - - >>> def linear(x, m, b): - ... return m * x + b - ... - >>> ds = xr.tutorial.open_dataset("air_temperature") - >>> ds.curvefit(x=ds.time, dim="time", func=linear) - - Dimensions: (cov_i: 2, cov_j: 2, lat: 25, lon: 53, param: 2) - Coordinates: - * lat (lat) float32 75.0 72.5 70.0 ... 20.0 17.5 15.0 - * lon (lon) float32 200.0 202.5 205.0 ... 327.5 330.0 - * param (param) >> def linear(x, m, b): - ... return m * x + b - ... - >>> ds = xr.tutorial.open_dataset("air_temperature") - >>> ds.curvefit(x=ds.time, dim="time", func=linear) - - Dimensions: (cov_i: 2, cov_j: 2, lat: 25, lon: 53, param: 2) - Coordinates: - * lat (lat) float32 75.0 72.5 70.0 ... 20.0 17.5 15.0 - * lon (lon) float32 200.0 202.5 205.0 ... 327.5 330.0 - * param (param) Date: Tue, 2 Feb 2021 14:15:16 -0500 Subject: [PATCH 04/10] allow manually defining param_names, doc examples --- doc/computation.rst | 83 ++++++++++++++++++++++++++++++++++++++++ xarray/core/dataarray.py | 26 ++++++++----- xarray/core/dataset.py | 36 ++++++++++------- 3 files changed, 123 insertions(+), 22 deletions(-) diff --git a/doc/computation.rst b/doc/computation.rst index dcfe270a942..72acd8edd4e 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -443,6 +443,89 @@ The inverse operation is done with :py:meth:`~xarray.polyval`, .. note:: These methods replicate the behaviour of :py:func:`numpy.polyfit` and :py:func:`numpy.polyval`. + +.. _compute.curvefit: + +Fitting arbitrary functions +=========================== + +Xarray objects also provide an interface for fitting more complex functions using +:py:meth:`scipy.optimize.curve_fit`. :py:meth:`~xarray.DataArray.curvefit` accepts +user-defined functions and can fit along multiple coordinates. + +For example, we can fit a relationship between two ``DataArray`` objects, maintaining +a unique fit at each spatial coordinate but aggregating over the time dimension: + +.. ipython:: python + + def exponential(x, a, xc): + return np.exp((x - xc) / a) + + + x = np.arange(-5, 5, 0.1) + t = np.arange(-5, 5, 0.1) + X, T = np.meshgrid(x, t) + Z1 = np.random.uniform(low=-5, high=5, size=X.shape) + Z2 = exponential(Z1, 3, X) + Z3 = exponential(Z1, 1, -X) + + ds = xr.Dataset( + data_vars=dict( + var1=(["t", "x"], Z1), var2=(["t", "x"], Z2), var3=(["t", "x"], Z3) + ), + coords={"t": t, "x": x}, + ) + ds[["var2", "var1"]].curvefit( + coords=ds.var1, + func=exponential, + reduce_dim="t", + bounds={"a": (0.5, 5), "xc": (-5, 5)}, + ) + +We can also fit multi-dimensional functions, and even use a wrapper function to +simultaneously fit a summation of several functions, such as this field containing +two gaussian peaks: + +.. ipython:: python + + def gaussian_2d(coords, a, xc, yc, xalpha, yalpha): + x, y = coords + z = a * np.exp( + -np.square(x - xc) / 2 / np.square(xalpha) + - np.square(y - yc) / 2 / np.square(yalpha) + ) + return z + + + def multi_peak(coords, *args): + z = np.zeros(coords[0].shape) + for i in range(len(args) // 5): + z += gaussian_2d(coords, *args[i * 5 : i * 5 + 5]) + return z + + + x = np.arange(-5, 5, 0.1) + y = np.arange(-5, 5, 0.1) + X, Y = np.meshgrid(x, y) + + n_peaks = 2 + names = ["a", "xc", "yc", "xalpha", "yalpha"] + names = [f"{name}{i}" for i in range(n_peaks) for name in names] + Z = gaussian_2d((X, Y), 3, 1, 1, 2, 1) + gaussian_2d((X, Y), 2, -1, -2, 1, 1) + Z += np.random.normal(scale=0.1, size=Z.shape) + + da = xr.DataArray(Z, dims=["y", "x"], coords={"y": y, "x": x}) + da.curvefit( + coords=["x", "y"], + func=multi_peak, + param_names=names, + kwargs={"maxfev": 10000}, + ) + +.. note:: + This method replicates the behavior of :py:func:`scipy.optimize.curve_fit`. + + .. _compute.broadcasting: Broadcasting by dimension name diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 8dbd866eba9..4f36623b41b 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4294,6 +4294,7 @@ def curvefit( cov: bool = False, p0: Dict[str, Any] = None, bounds: Dict[str, Any] = None, + param_names: Sequence[str] = None, kwargs: Dict[str, Any] = None, ): """ @@ -4303,16 +4304,17 @@ def curvefit( Parameters ---------- - coords : DataArray or sequence of DataArray + coords : DataArray, str or sequence of DataArray, str Independent coordinate(s) over which to perform the curve fitting. Must share at least one dimension with the calling object. When fitting multi-dimensional - functions, supply `coords` as a list in the same order as arguments in `func`. - To fit along existing dimensions of the calling object, `coords` can also be - specified as a str or sequence of strs. + functions, supply `coords` as a sequence in the same order as arguments in + `func`. To fit along existing dimensions of the calling object, `coords` can + also be specified as a str or sequence of strs. func : callable User specified function in the form `f(x, *params)` which returns a numpy array of length x. `params` are the fittable parameters which are optimized - by scipy curve_fit. + by scipy curve_fit. `x` can also be specified as a sequence containing multiple + coordinates, e.g. `f((x, y), *params)`. reduce_dim : str or sequence of str Additional dimension(s) over which to aggregate while fitting. For example, calling `ds.curvefit(coords='time', reduce_dims=['lat', 'lon'], ...)` will @@ -4320,16 +4322,21 @@ def curvefit( time dimension. skipna : bool, optional Whether to skip missing values when fitting. Default is True. - cov : bool or str, optional + cov : bool, optional Whether to return the covariance matrix in addition to the coefficients. - p0 : dictionary + p0 : dictionary, optional Optional dictionary of parameter names to initial guesses passed to the `curve_fit` `p0` arg. If none or only some parameters are passed, the rest will be assigned initial values following the default scipy behavior. - bounds : dictionary - Optional dictionary of parameter names to initial guesses passed to the + bounds : dictionary, optional + Optional dictionary of parameter names to bounding values passed to the `curve_fit` `bounds` arg. If none or only some parameters are passed, the rest will be unbounded following the default scipy behavior. + param_names: iterable, optional + Sequence of names for the fittable parameters of `func`. If not supplied, + this will be automatically determined by arguments of `func`. `param_names` + should be manually supplied when fitting a function that takes a variable + number of parameters. kwargs : dictionary Additional keyword arguments to passed to scipy curve_fit. @@ -4357,6 +4364,7 @@ def curvefit( cov=cov, p0=p0, bounds=bounds, + param_names=param_names, kwargs=kwargs, ) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index bd135cf2d21..d63b27cd2fb 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -7015,6 +7015,7 @@ def curvefit( cov: bool = False, p0: Dict[str, Any] = None, bounds: Dict[str, Any] = None, + param_names: Sequence[str] = None, kwargs: Dict[str, Any] = None, ): """ @@ -7024,16 +7025,17 @@ def curvefit( Parameters ---------- - coords : DataArray or sequence of DataArray + coords : DataArray, str or sequence of DataArray, str Independent coordinate(s) over which to perform the curve fitting. Must share at least one dimension with the calling object. When fitting multi-dimensional - functions, supply `coords` as a list in the same order as arguments in `func`. - To fit along existing dimensions of the calling object, `coords` can also be - specified as a str or sequence of strs. + functions, supply `coords` as a sequence in the same order as arguments in + `func`. To fit along existing dimensions of the calling object, `coords` can + also be specified as a str or sequence of strs. func : callable User specified function in the form `f(x, *params)` which returns a numpy array of length x. `params` are the fittable parameters which are optimized - by scipy curve_fit. + by scipy curve_fit. `x` can also be specified as a sequence containing multiple + coordinates, e.g. `f((x, y), *params)`. reduce_dim : str or sequence of str Additional dimension(s) over which to aggregate while fitting. For example, calling `ds.curvefit(coords='time', reduce_dims=['lat', 'lon'], ...)` will @@ -7041,16 +7043,21 @@ def curvefit( time dimension. skipna : bool, optional Whether to skip missing values when fitting. Default is True. - cov : bool or str, optional + cov : bool, optional Whether to return the covariance matrix in addition to the coefficients. - p0 : dictionary + p0 : dictionary, optional Optional dictionary of parameter names to initial guesses passed to the `curve_fit` `p0` arg. If none or only some parameters are passed, the rest will be assigned initial values following the default scipy behavior. - bounds : dictionary - Optional dictionary of parameter names to initial guesses passed to the + bounds : dictionary, optional + Optional dictionary of parameter names to bounding values passed to the `curve_fit` `bounds` arg. If none or only some parameters are passed, the rest will be unbounded following the default scipy behavior. + param_names: seq, optional + Sequence of names for the fittable parameters of `func`. If not supplied, + this will be automatically determined by arguments of `func`. `param_names` + should be manually supplied when fitting a function that takes a variable + number of parameters. kwargs : dictionary Additional keyword arguments to passed to scipy curve_fit. @@ -7106,7 +7113,8 @@ def curvefit( if not reduce_dims: raise ValueError( "No arguments to `coords` were identified as a dimension on the calling " - "object, and no dims were supplied to `reduce_dims`." + "object, and no dims were supplied to `reduce_dim`. This would result " + "in fitting on scalar data." ) # Broadcast all coords with each other @@ -7129,13 +7137,15 @@ def _initialize_feasible(lb, ub): # Get p0 and bounds # Priority: 1) passed args 2) func signature 3) scipy defaults func_args = inspect.signature(func).parameters - params = list(func_args)[1:] + if param_names: + params = param_names + else: + params = list(func_args)[1:] n_params = len(params) - param_defaults = {p: 1 for p in params} bounds_defaults = {p: (-np.inf, np.inf) for p in params} for p in params: - if func_args[p].default is not func_args[p].empty: + if p in func_args and func_args[p].default is not func_args[p].empty: param_defaults[p] = func_args[p].default if p in bounds: bounds_defaults[p] = bounds[p] From 3c41355d1e61b1f02be35510e59a13d8d9efa003 Mon Sep 17 00:00:00 2001 From: Sam Levang Date: Tue, 2 Feb 2021 15:50:30 -0500 Subject: [PATCH 05/10] add whats-new entry --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 471e91a8512..27103d87add 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,7 @@ Breaking changes - xarray no longer supports python 3.6 The minimum versions of some other dependencies were changed: + ============ ====== ==== Package Old New ============ ====== ==== @@ -62,6 +63,8 @@ New Features - :py:meth:`DataArray.swap_dims` & :py:meth:`Dataset.swap_dims` now accept dims in the form of kwargs as well as a dict, like most similar methods. By `Maximilian Roos `_. +- Added :py:meth:`DataArray.curvefit` and :py:meth:`Dataset.curvefit` for general curve fitting applications. (:issue:`4300`, :pull:`4849`) + By `Sam Levang `_. Bug fixes ~~~~~~~~~ From 5b422b478a50c2f881ebc271b1c632dbdae2121d Mon Sep 17 00:00:00 2001 From: Sam Levang Date: Wed, 3 Feb 2021 17:30:33 -0500 Subject: [PATCH 06/10] additional checks for func args --- xarray/core/dataset.py | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index e5e58468d42..616c1295857 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -7081,10 +7081,6 @@ def curvefit( """ from scipy.optimize import curve_fit - from .alignment import broadcast - from .computation import apply_ufunc - from .dataarray import DataArray - if p0 is None: p0 = {} if bounds is None: @@ -7101,7 +7097,7 @@ def curvefit( if ( isinstance(coords, str) - or isinstance(coords, DataArray) + or isinstance(coords, xr.DataArray) or not isinstance(coords, Iterable) ): coords = [coords] @@ -7109,7 +7105,7 @@ def curvefit( # Figure out whether any coords are dims on self for coord in coords: - reduce_dims += [c for c in self.coords if coord.equals(self[c])] + reduce_dims += [c for c in self.dims if coord.equals(self[c])] reduce_dims = list(set(reduce_dims)) preserved_dims = list(set([dim for dim in self.dims]) - set(reduce_dims)) if not reduce_dims: @@ -7120,7 +7116,7 @@ def curvefit( ) # Broadcast all coords with each other - coords = broadcast(*coords) + coords = xr.broadcast(*coords) coords = [ coord.broadcast_like(self, exclude=preserved_dims) for coord in coords ] @@ -7138,11 +7134,28 @@ def _initialize_feasible(lb, ub): # Get p0 and bounds # Priority: 1) passed args 2) func signature 3) scipy defaults - func_args = inspect.signature(func).parameters + try: + func_args = inspect.signature(func).parameters + except ValueError: + func_args = {} + if not param_names: + raise ValueError( + "Unable to inspect `func` signature, and `param_names` was not provided." + ) if param_names: params = param_names else: params = list(func_args)[1:] + if any( + [ + (p.kind in [p.VAR_POSITIONAL, p.VAR_KEYWORD]) + for p in func_args.values() + ] + ): + raise ValueError( + "`param_names` must be provided because `func` takes variable length arguments." + ) + n_params = len(params) param_defaults = {p: 1 for p in params} bounds_defaults = {p: (-np.inf, np.inf) for p in params} @@ -7180,7 +7193,7 @@ def _wrapper(Y, *coords, **kwargs): popt, pcov = curve_fit(func, x, y, **kwargs) return popt, pcov - result = Dataset() + result = xr.Dataset() cov_fields = ["cov_i", "cov_j"] for name, da in self.data_vars.items(): if isinstance(name, str): @@ -7188,7 +7201,7 @@ def _wrapper(Y, *coords, **kwargs): else: name = "" - popt, pcov = apply_ufunc( + popt, pcov = xr.apply_ufunc( _wrapper, da, *coords, @@ -7221,7 +7234,7 @@ def _wrapper(Y, *coords, **kwargs): result = result.squeeze(["param", "cov_i", "cov_j"]) if not cov: - result = result.drop(cov_fields) + result = result.drop_vars(cov_fields) return result From d61a184272db40a7be0bca74bf4945b2a547dc18 Mon Sep 17 00:00:00 2001 From: Sam Levang Date: Wed, 3 Feb 2021 17:31:00 -0500 Subject: [PATCH 07/10] minimal tests --- xarray/tests/test_dataarray.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index fc84687511e..ef1137a536d 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4565,6 +4565,35 @@ def test_pad_reflect(self, mode, reflect_type): assert actual.shape == (7, 4, 9) assert_identical(actual, expected) + @requires_scipy + @pytest.mark.parametrize("use_dask", [True, False]) + def test_curvefit(self, use_dask): + if use_dask and not has_dask: + pytest.skip("requires dask") + + def exp_decay(t, n0, tau): + return n0 * np.exp(-t / tau) + + t = np.arange(0, 5, 0.5) + da = DataArray( + np.stack([exp_decay(t, 3, 2), exp_decay(t, 5, 4)], axis=-1), + dims=("t", "x"), + coords={"t": t, "x": [0, 1]}, + ) + da[0, 0] = np.nan + + expected = DataArray( + [[3, 5], [2, 4]], + dims=("param", "x"), + coords={"param": ["n0", "tau"], "x": [0, 1]}, + ) + + if use_dask: + da = da.chunk({"x": 1}) + + fit = da.curvefit(coords="t", func=exp_decay) + assert_allclose(fit.curvefit_coefficients, expected, rtol=1e-3) + class TestReduce: @pytest.fixture(autouse=True) From a3925763d5ada2d012403e78d2e07cf64709e761 Mon Sep 17 00:00:00 2001 From: Sam Levang Date: Wed, 3 Feb 2021 18:14:11 -0500 Subject: [PATCH 08/10] improve codecov --- xarray/core/dataset.py | 2 +- xarray/tests/test_dataarray.py | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 616c1295857..257e6c1eafa 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -7185,7 +7185,7 @@ def _wrapper(Y, *coords, **kwargs): mask = np.all([np.any(~np.isnan(x), axis=0), ~np.isnan(y)], axis=0) x = x[:, mask] y = y[mask] - if not len(x): + if not len(y): popt = np.full([n_params], np.nan) pcov = np.full([n_params, n_params], np.nan) return popt, pcov diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index ef1137a536d..d233a017dee 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4571,29 +4571,36 @@ def test_curvefit(self, use_dask): if use_dask and not has_dask: pytest.skip("requires dask") - def exp_decay(t, n0, tau): + def exp_decay(t, n0, tau=1): return n0 * np.exp(-t / tau) t = np.arange(0, 5, 0.5) da = DataArray( - np.stack([exp_decay(t, 3, 2), exp_decay(t, 5, 4)], axis=-1), + np.stack([exp_decay(t, 3, 3), exp_decay(t, 5, 4), np.nan * t], axis=-1), dims=("t", "x"), - coords={"t": t, "x": [0, 1]}, + coords={"t": t, "x": [0, 1, 2]}, ) da[0, 0] = np.nan expected = DataArray( - [[3, 5], [2, 4]], + [[3, 5, np.nan], [3, 4, np.nan]], dims=("param", "x"), - coords={"param": ["n0", "tau"], "x": [0, 1]}, + coords={"param": ["n0", "tau"], "x": [0, 1, 2]}, ) if use_dask: da = da.chunk({"x": 1}) - fit = da.curvefit(coords="t", func=exp_decay) + fit = da.curvefit( + coords="t", func=exp_decay, p0={"n0": 4}, bounds={"tau": [2, 6]} + ) assert_allclose(fit.curvefit_coefficients, expected, rtol=1e-3) + da = da.compute() + fit = da.curvefit(coords="t", func=np.power, reduce_dim="x", param_names=["a"]) + assert "a" in fit.param + assert "param" not in fit.dims + class TestReduce: @pytest.fixture(autouse=True) From 8c8809a37304b37518f59fe22802d4ff9f98159d Mon Sep 17 00:00:00 2001 From: Sam Levang Date: Fri, 5 Feb 2021 12:21:59 -0500 Subject: [PATCH 09/10] rename coords->coords_, tweak docs and tests --- doc/computation.rst | 2 +- xarray/core/dataarray.py | 2 +- xarray/core/dataset.py | 22 +++++++++++----------- xarray/tests/test_dataarray.py | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/doc/computation.rst b/doc/computation.rst index 72acd8edd4e..ef15aa9a789 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -475,7 +475,7 @@ a unique fit at each spatial coordinate but aggregating over the time dimension: ), coords={"t": t, "x": x}, ) - ds[["var2", "var1"]].curvefit( + ds[["var2", "var3"]].curvefit( coords=ds.var1, func=exponential, reduce_dim="t", diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 10d12be55fb..eebf5d301ce 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4308,7 +4308,7 @@ def argmax( def curvefit( self, - coords: Union["DataArray", Iterable["DataArray"]], + coords: Union[Union[str, "DataArray"], Iterable[Union[str, "DataArray"]]], func: Callable[..., Any], reduce_dim: Union[Hashable, Iterable[Hashable]] = None, skipna: bool = True, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 257e6c1eafa..4d04f71382e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -7010,7 +7010,7 @@ def argmax(self, dim=None, axis=None, **kwargs): def curvefit( self, - coords: Union["DataArray", Iterable["DataArray"]], + coords: Union[Union[str, "DataArray"], Iterable[Union[str, "DataArray"]]], func: Callable[..., Any], reduce_dim: Union[Hashable, Iterable[Hashable]] = None, skipna: bool = True, @@ -7101,10 +7101,10 @@ def curvefit( or not isinstance(coords, Iterable) ): coords = [coords] - coords = [self[coord] if isinstance(coord, str) else coord for coord in coords] + coords_ = [self[coord] if isinstance(coord, str) else coord for coord in coords] - # Figure out whether any coords are dims on self - for coord in coords: + # Determine whether any coords are dims on self + for coord in coords_: reduce_dims += [c for c in self.dims if coord.equals(self[c])] reduce_dims = list(set(reduce_dims)) preserved_dims = list(set([dim for dim in self.dims]) - set(reduce_dims)) @@ -7116,9 +7116,9 @@ def curvefit( ) # Broadcast all coords with each other - coords = xr.broadcast(*coords) - coords = [ - coord.broadcast_like(self, exclude=preserved_dims) for coord in coords + coords_ = xr.broadcast(*coords_) + coords_ = [ + coord.broadcast_like(self, exclude=preserved_dims) for coord in coords_ ] def _initialize_feasible(lb, ub): @@ -7177,9 +7177,9 @@ def _initialize_feasible(lb, ub): ], ) - def _wrapper(Y, *coords, **kwargs): + def _wrapper(Y, *coords_, **kwargs): # Wrap curve_fit with raveled coordinates and pointwise NaN handling - x = np.vstack([c.ravel() for c in coords]) + x = np.vstack([c.ravel() for c in coords_]) y = Y.ravel() if skipna: mask = np.all([np.any(~np.isnan(x), axis=0), ~np.isnan(y)], axis=0) @@ -7204,10 +7204,10 @@ def _wrapper(Y, *coords, **kwargs): popt, pcov = xr.apply_ufunc( _wrapper, da, - *coords, + *coords_, vectorize=True, dask="parallelized", - input_core_dims=[reduce_dims for d in range(len(coords) + 1)], + input_core_dims=[reduce_dims for d in range(len(coords_) + 1)], output_core_dims=[["param"], ["cov_i", "cov_j"]], dask_gufunc_kwargs={ "output_sizes": { diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index d233a017dee..1f80b089771 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4592,7 +4592,7 @@ def exp_decay(t, n0, tau=1): da = da.chunk({"x": 1}) fit = da.curvefit( - coords="t", func=exp_decay, p0={"n0": 4}, bounds={"tau": [2, 6]} + coords=[da.t], func=exp_decay, p0={"n0": 4}, bounds={"tau": [2, 6]} ) assert_allclose(fit.curvefit_coefficients, expected, rtol=1e-3) From 9ace76f89041d45c366ffab9d3980a183f232e63 Mon Sep 17 00:00:00 2001 From: Sam Levang Date: Sat, 20 Feb 2021 13:34:50 -0500 Subject: [PATCH 10/10] refactor with helper functions, remove extraneous steps --- doc/computation.rst | 2 +- xarray/core/dataarray.py | 19 ++-- xarray/core/dataset.py | 160 +++++++++++++++++---------------- xarray/tests/test_dataarray.py | 28 ++++-- 4 files changed, 112 insertions(+), 97 deletions(-) diff --git a/doc/computation.rst b/doc/computation.rst index ef15aa9a789..d154ecd95e4 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -478,7 +478,7 @@ a unique fit at each spatial coordinate but aggregating over the time dimension: ds[["var2", "var3"]].curvefit( coords=ds.var1, func=exponential, - reduce_dim="t", + reduce_dims="t", bounds={"a": (0.5, 5), "xc": (-5, 5)}, ) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 996ab28b5d2..8c92dda0bf2 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4309,9 +4309,8 @@ def curvefit( self, coords: Union[Union[str, "DataArray"], Iterable[Union[str, "DataArray"]]], func: Callable[..., Any], - reduce_dim: Union[Hashable, Iterable[Hashable]] = None, + reduce_dims: Union[Hashable, Iterable[Hashable]] = None, skipna: bool = True, - cov: bool = False, p0: Dict[str, Any] = None, bounds: Dict[str, Any] = None, param_names: Sequence[str] = None, @@ -4332,18 +4331,16 @@ def curvefit( also be specified as a str or sequence of strs. func : callable User specified function in the form `f(x, *params)` which returns a numpy - array of length x. `params` are the fittable parameters which are optimized + array of length `len(x)`. `params` are the fittable parameters which are optimized by scipy curve_fit. `x` can also be specified as a sequence containing multiple - coordinates, e.g. `f((x, y), *params)`. - reduce_dim : str or sequence of str + coordinates, e.g. `f((x0, x1), *params)`. + reduce_dims : str or sequence of str Additional dimension(s) over which to aggregate while fitting. For example, calling `ds.curvefit(coords='time', reduce_dims=['lat', 'lon'], ...)` will aggregate all lat and lon points and fit the specified function along the time dimension. skipna : bool, optional Whether to skip missing values when fitting. Default is True. - cov : bool, optional - Whether to return the covariance matrix in addition to the coefficients. p0 : dictionary, optional Optional dictionary of parameter names to initial guesses passed to the `curve_fit` `p0` arg. If none or only some parameters are passed, the rest will @@ -4352,7 +4349,7 @@ def curvefit( Optional dictionary of parameter names to bounding values passed to the `curve_fit` `bounds` arg. If none or only some parameters are passed, the rest will be unbounded following the default scipy behavior. - param_names: iterable, optional + param_names: seq, optional Sequence of names for the fittable parameters of `func`. If not supplied, this will be automatically determined by arguments of `func`. `param_names` should be manually supplied when fitting a function that takes a variable @@ -4368,8 +4365,7 @@ def curvefit( [var]_curvefit_coefficients The coefficients of the best fit. [var]_curvefit_covariance - The covariance matrix of the coefficient estimates (only included if - `cov=True`) + The covariance matrix of the coefficient estimates. See also -------- @@ -4379,9 +4375,8 @@ def curvefit( return self._to_temp_dataset().curvefit( coords, func, - reduce_dim=reduce_dim, + reduce_dims=reduce_dims, skipna=skipna, - cov=cov, p0=p0, bounds=bounds, param_names=param_names, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 4d04f71382e..e5cbf3157ad 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -457,6 +457,63 @@ def as_dataset(obj: Any) -> "Dataset": return obj +def _get_func_args(func, param_names): + """Use `inspect.signature` to try accessing `func` args. Otherwise, ensure + they are provided by user. + """ + try: + func_args = inspect.signature(func).parameters + except ValueError: + func_args = {} + if not param_names: + raise ValueError( + "Unable to inspect `func` signature, and `param_names` was not provided." + ) + if param_names: + params = param_names + else: + params = list(func_args)[1:] + if any( + [(p.kind in [p.VAR_POSITIONAL, p.VAR_KEYWORD]) for p in func_args.values()] + ): + raise ValueError( + "`param_names` must be provided because `func` takes variable length arguments." + ) + return params, func_args + + +def _initialize_curvefit_params(params, p0, bounds, func_args): + """Set initial guess and bounds for curvefit. + Priority: 1) passed args 2) func signature 3) scipy defaults + """ + + def _initialize_feasible(lb, ub): + # Mimics functionality of scipy.optimize.minpack._initialize_feasible + lb_finite = np.isfinite(lb) + ub_finite = np.isfinite(ub) + p0 = np.nansum( + [ + 0.5 * (lb + ub) * int(lb_finite & ub_finite), + (lb + 1) * int(lb_finite & ~ub_finite), + (ub - 1) * int(~lb_finite & ub_finite), + ] + ) + return p0 + + param_defaults = {p: 1 for p in params} + bounds_defaults = {p: (-np.inf, np.inf) for p in params} + for p in params: + if p in func_args and func_args[p].default is not func_args[p].empty: + param_defaults[p] = func_args[p].default + if p in bounds: + bounds_defaults[p] = tuple(bounds[p]) + if param_defaults[p] < bounds[p][0] or param_defaults[p] > bounds[p][1]: + param_defaults[p] = _initialize_feasible(bounds[p][0], bounds[p][1]) + if p in p0: + param_defaults[p] = p0[p] + return param_defaults, bounds_defaults + + class DataVariables(Mapping[Hashable, "DataArray"]): __slots__ = ("_dataset",) @@ -7012,9 +7069,8 @@ def curvefit( self, coords: Union[Union[str, "DataArray"], Iterable[Union[str, "DataArray"]]], func: Callable[..., Any], - reduce_dim: Union[Hashable, Iterable[Hashable]] = None, + reduce_dims: Union[Hashable, Iterable[Hashable]] = None, skipna: bool = True, - cov: bool = False, p0: Dict[str, Any] = None, bounds: Dict[str, Any] = None, param_names: Sequence[str] = None, @@ -7035,18 +7091,16 @@ def curvefit( also be specified as a str or sequence of strs. func : callable User specified function in the form `f(x, *params)` which returns a numpy - array of length x. `params` are the fittable parameters which are optimized + array of length `len(x)`. `params` are the fittable parameters which are optimized by scipy curve_fit. `x` can also be specified as a sequence containing multiple - coordinates, e.g. `f((x, y), *params)`. - reduce_dim : str or sequence of str + coordinates, e.g. `f((x0, x1), *params)`. + reduce_dims : str or sequence of str Additional dimension(s) over which to aggregate while fitting. For example, calling `ds.curvefit(coords='time', reduce_dims=['lat', 'lon'], ...)` will aggregate all lat and lon points and fit the specified function along the time dimension. skipna : bool, optional Whether to skip missing values when fitting. Default is True. - cov : bool, optional - Whether to return the covariance matrix in addition to the coefficients. p0 : dictionary, optional Optional dictionary of parameter names to initial guesses passed to the `curve_fit` `p0` arg. If none or only some parameters are passed, the rest will @@ -7071,8 +7125,7 @@ def curvefit( [var]_curvefit_coefficients The coefficients of the best fit. [var]_curvefit_covariance - The covariance matrix of the coefficient estimates (only included if - `cov=True`) + The covariance matrix of the coefficient estimates. See also -------- @@ -7088,12 +7141,12 @@ def curvefit( if kwargs is None: kwargs = {} - if not reduce_dim: - reduce_dims = [] - elif isinstance(reduce_dim, str) or not isinstance(reduce_dim, Iterable): - reduce_dims = [reduce_dim] + if not reduce_dims: + reduce_dims_ = [] + elif isinstance(reduce_dims, str) or not isinstance(reduce_dims, Iterable): + reduce_dims_ = [reduce_dims] else: - reduce_dims = list(reduce_dim) + reduce_dims_ = list(reduce_dims) if ( isinstance(coords, str) @@ -7105,13 +7158,13 @@ def curvefit( # Determine whether any coords are dims on self for coord in coords_: - reduce_dims += [c for c in self.dims if coord.equals(self[c])] - reduce_dims = list(set(reduce_dims)) - preserved_dims = list(set([dim for dim in self.dims]) - set(reduce_dims)) - if not reduce_dims: + reduce_dims_ += [c for c in self.dims if coord.equals(self[c])] + reduce_dims_ = list(set(reduce_dims_)) + preserved_dims = list(set(self.dims) - set(reduce_dims_)) + if not reduce_dims_: raise ValueError( "No arguments to `coords` were identified as a dimension on the calling " - "object, and no dims were supplied to `reduce_dim`. This would result " + "object, and no dims were supplied to `reduce_dims`. This would result " "in fitting on scalar data." ) @@ -7121,53 +7174,11 @@ def curvefit( coord.broadcast_like(self, exclude=preserved_dims) for coord in coords_ ] - def _initialize_feasible(lb, ub): - # Mimics functionality of scipy.optimize.minpack._initialize_feasible - lb_finite = np.isfinite(lb) - ub_finite = np.isfinite(ub) - p0 = ( - 0.5 * (lb + ub) * (lb_finite & ub_finite) - + (lb + 1) * (lb_finite & ~ub_finite) - + (ub - 1) * (~lb_finite & ub_finite) - ) - return p0 - - # Get p0 and bounds - # Priority: 1) passed args 2) func signature 3) scipy defaults - try: - func_args = inspect.signature(func).parameters - except ValueError: - func_args = {} - if not param_names: - raise ValueError( - "Unable to inspect `func` signature, and `param_names` was not provided." - ) - if param_names: - params = param_names - else: - params = list(func_args)[1:] - if any( - [ - (p.kind in [p.VAR_POSITIONAL, p.VAR_KEYWORD]) - for p in func_args.values() - ] - ): - raise ValueError( - "`param_names` must be provided because `func` takes variable length arguments." - ) - + params, func_args = _get_func_args(func, param_names) + param_defaults, bounds_defaults = _initialize_curvefit_params( + params, p0, bounds, func_args + ) n_params = len(params) - param_defaults = {p: 1 for p in params} - bounds_defaults = {p: (-np.inf, np.inf) for p in params} - for p in params: - if p in func_args and func_args[p].default is not func_args[p].empty: - param_defaults[p] = func_args[p].default - if p in bounds: - bounds_defaults[p] = bounds[p] - if param_defaults[p] < bounds[p][0] or param_defaults[p] > bounds[p][1]: - param_defaults[p] = _initialize_feasible(bounds[p][0], bounds[p][1]) - if p in p0: - param_defaults[p] = p0[p] kwargs.setdefault("p0", [param_defaults[p] for p in params]) kwargs.setdefault( "bounds", @@ -7194,12 +7205,11 @@ def _wrapper(Y, *coords_, **kwargs): return popt, pcov result = xr.Dataset() - cov_fields = ["cov_i", "cov_j"] for name, da in self.data_vars.items(): - if isinstance(name, str): - name = f"{name}_" - else: + if name is xr.core.dataarray._THIS_ARRAY: name = "" + else: + name = f"{str(name)}_" popt, pcov = xr.apply_ufunc( _wrapper, @@ -7207,7 +7217,7 @@ def _wrapper(Y, *coords_, **kwargs): *coords_, vectorize=True, dask="parallelized", - input_core_dims=[reduce_dims for d in range(len(coords_) + 1)], + input_core_dims=[reduce_dims_ for d in range(len(coords_) + 1)], output_core_dims=[["param"], ["cov_i", "cov_j"]], dask_gufunc_kwargs={ "output_sizes": { @@ -7217,25 +7227,17 @@ def _wrapper(Y, *coords_, **kwargs): }, }, output_dtypes=(np.float64, np.float64), - exclude_dims=set(reduce_dims), + exclude_dims=set(reduce_dims_), kwargs=kwargs, ) result[name + "curvefit_coefficients"] = popt result[name + "curvefit_covariance"] = pcov - cov_fields.append(name + "curvefit_covariance") result = result.assign_coords( {"param": params, "cov_i": params, "cov_j": params} ) - result = result.transpose("param", "cov_i", "cov_j", ...) result.attrs = self.attrs.copy() - if n_params == 1: - result = result.squeeze(["param", "cov_i", "cov_j"]) - - if not cov: - result = result.drop_vars(cov_fields) - return result diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 88ee5601b66..00ffe364f70 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4603,9 +4603,9 @@ def exp_decay(t, n0, tau=1): da[0, 0] = np.nan expected = DataArray( - [[3, 5, np.nan], [3, 4, np.nan]], - dims=("param", "x"), - coords={"param": ["n0", "tau"], "x": [0, 1, 2]}, + [[3, 3], [5, 4], [np.nan, np.nan]], + dims=("x", "param"), + coords={"x": [0, 1, 2], "param": ["n0", "tau"]}, ) if use_dask: @@ -4617,9 +4617,27 @@ def exp_decay(t, n0, tau=1): assert_allclose(fit.curvefit_coefficients, expected, rtol=1e-3) da = da.compute() - fit = da.curvefit(coords="t", func=np.power, reduce_dim="x", param_names=["a"]) + fit = da.curvefit(coords="t", func=np.power, reduce_dims="x", param_names=["a"]) assert "a" in fit.param - assert "param" not in fit.dims + assert "x" not in fit.dims + + def test_curvefit_helpers(self): + def exp_decay(t, n0, tau=1): + return n0 * np.exp(-t / tau) + + params, func_args = xr.core.dataset._get_func_args(exp_decay, []) + assert params == ["n0", "tau"] + param_defaults, bounds_defaults = xr.core.dataset._initialize_curvefit_params( + params, {"n0": 4}, {"tau": [5, np.inf]}, func_args + ) + assert param_defaults == {"n0": 4, "tau": 6} + assert bounds_defaults == {"n0": (-np.inf, np.inf), "tau": (5, np.inf)} + + param_names = ["a"] + params, func_args = xr.core.dataset._get_func_args(np.power, param_names) + assert params == param_names + with pytest.raises(ValueError): + xr.core.dataset._get_func_args(np.power, []) class TestReduce: