Skip to content

Commit f1bd401

Browse files
Start adding GRW RV Op and distribution (#5298)
* Add v4 grw and tests Co-authored-by: Ricardo <[email protected]> Co-authored-by: Ricardo Vieira <[email protected]>
1 parent 02897d1 commit f1bd401

File tree

5 files changed

+323
-229
lines changed

5 files changed

+323
-229
lines changed

pymc/distributions/timeseries.py

Lines changed: 196 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,20 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
from typing import Tuple, Union
16+
1517
import aesara.tensor as at
1618
import numpy as np
1719

1820
from aesara import scan
19-
from scipy import stats
21+
from aesara.tensor.random.op import RandomVariable
2022

21-
from pymc.distributions import distribution, multivariate
23+
from pymc.aesaraf import change_rv_size, floatX, intX
24+
from pymc.distributions import distribution, logprob, multivariate
2225
from pymc.distributions.continuous import Flat, Normal, get_tau_sigma
26+
from pymc.distributions.dist_math import check_parameters
2327
from pymc.distributions.shape_utils import to_tuple
28+
from pymc.util import check_dist_not_registered
2429

2530
__all__ = [
2631
"AR1",
@@ -33,6 +38,195 @@
3338
]
3439

3540

41+
class GaussianRandomWalkRV(RandomVariable):
42+
"""
43+
GaussianRandomWalk Random Variable
44+
"""
45+
46+
name = "GaussianRandomWalk"
47+
ndim_supp = 1
48+
ndims_params = [0, 0, 0, 0]
49+
dtype = "floatX"
50+
_print_name = ("GaussianRandomWalk", "\\operatorname{GaussianRandomWalk}")
51+
52+
def make_node(self, rng, size, dtype, mu, sigma, init, steps):
53+
steps = at.as_tensor_variable(steps)
54+
if not steps.ndim == 0 or not steps.dtype.startswith("int"):
55+
raise ValueError("steps must be an integer scalar (ndim=0).")
56+
57+
return super().make_node(rng, size, dtype, mu, sigma, init, steps)
58+
59+
def _supp_shape_from_params(self, dist_params, reop_param_idx=0, param_shapes=None):
60+
steps = dist_params[3]
61+
62+
return (steps + 1,)
63+
64+
@classmethod
65+
def rng_fn(
66+
cls,
67+
rng: np.random.RandomState,
68+
mu: Union[np.ndarray, float],
69+
sigma: Union[np.ndarray, float],
70+
init: float,
71+
steps: int,
72+
size: Tuple[int],
73+
) -> np.ndarray:
74+
"""Gaussian Random Walk generator.
75+
76+
The init value is drawn from the Normal distribution with the same sigma as the
77+
innovations.
78+
79+
Notes
80+
-----
81+
Currently does not support custom init distribution
82+
83+
Parameters
84+
----------
85+
rng: np.random.RandomState
86+
Numpy random number generator
87+
mu: array_like
88+
Random walk mean
89+
sigma: np.ndarray
90+
Standard deviation of innovation (sigma > 0)
91+
init: float
92+
Initialization value for GaussianRandomWalk
93+
steps: int
94+
Length of random walk, must be greater than 1. Returned array will be of size+1 to
95+
account as first value is initial value
96+
size: int
97+
The number of Random Walk time series generated
98+
99+
Returns
100+
-------
101+
ndarray
102+
"""
103+
104+
if steps < 1:
105+
raise ValueError("Steps must be greater than 0")
106+
107+
# If size is None then the returned series should be (*implied_dims, 1+steps)
108+
if size is None:
109+
# broadcast parameters with each other to find implied dims
110+
bcast_shape = np.broadcast_shapes(
111+
np.asarray(mu).shape,
112+
np.asarray(sigma).shape,
113+
np.asarray(init).shape,
114+
)
115+
dist_shape = (*bcast_shape, int(steps))
116+
117+
# If size is None then the returned series should be (*size, 1+steps)
118+
else:
119+
init_size = (*size, 1)
120+
dist_shape = (*size, int(steps))
121+
122+
innovations = rng.normal(loc=mu, scale=sigma, size=dist_shape)
123+
grw = np.concatenate([init[..., None], innovations], axis=-1)
124+
return np.cumsum(grw, axis=-1)
125+
126+
127+
gaussianrandomwalk = GaussianRandomWalkRV()
128+
129+
130+
class GaussianRandomWalk(distribution.Continuous):
131+
r"""Random Walk with Normal innovations
132+
133+
Parameters
134+
----------
135+
mu : tensor_like of float
136+
innovation drift, defaults to 0.0
137+
sigma : tensor_like of float, optional
138+
sigma > 0, innovation standard deviation, defaults to 1.0
139+
init : Univariate PyMC distribution
140+
Univariate distribution of the initial value, created with the `.dist()` API.
141+
Defaults to Normal with same `mu` and `sigma` as the GaussianRandomWalk
142+
steps : int
143+
Number of steps in Gaussian Random Walks (steps > 0).
144+
"""
145+
146+
rv_op = gaussianrandomwalk
147+
148+
def __new__(cls, name, mu=0.0, sigma=1.0, init=None, steps=None, **kwargs):
149+
if init is not None:
150+
check_dist_not_registered(init)
151+
return super().__new__(cls, name, mu, sigma, init, steps, **kwargs)
152+
153+
@classmethod
154+
def dist(
155+
cls, mu=0.0, sigma=1.0, init=None, steps=None, size=None, **kwargs
156+
) -> at.TensorVariable:
157+
158+
mu = at.as_tensor_variable(floatX(mu))
159+
sigma = at.as_tensor_variable(floatX(sigma))
160+
if steps is None:
161+
raise ValueError("Must specify steps parameter")
162+
steps = at.as_tensor_variable(intX(steps))
163+
164+
shape = kwargs.get("shape", None)
165+
if size is None and shape is None:
166+
init_size = None
167+
else:
168+
init_size = to_tuple(size) if size is not None else to_tuple(shape)[:-1]
169+
170+
# If no scalar distribution is passed then initialize with a Normal of same mu and sigma
171+
if init is None:
172+
init = Normal.dist(mu, sigma, size=init_size)
173+
else:
174+
if not (
175+
isinstance(init, at.TensorVariable)
176+
and init.owner is not None
177+
and isinstance(init.owner.op, RandomVariable)
178+
and init.owner.op.ndim_supp == 0
179+
):
180+
raise TypeError("init must be a univariate distribution variable")
181+
182+
if init_size is not None:
183+
init = change_rv_size(init, init_size)
184+
else:
185+
# If not explicit, size is determined by the shapes of mu, sigma, and init
186+
bcast_shape = at.broadcast_arrays(mu, sigma, init)[0].shape
187+
init = change_rv_size(init, bcast_shape)
188+
189+
# Ignores logprob of init var because that's accounted for in the logp method
190+
init.tag.ignore_logprob = True
191+
192+
return super().dist([mu, sigma, init, steps], size=size, **kwargs)
193+
194+
def logp(
195+
value: at.Variable,
196+
mu: at.Variable,
197+
sigma: at.Variable,
198+
init: at.Variable,
199+
steps: at.Variable,
200+
) -> at.TensorVariable:
201+
"""Calculate log-probability of Gaussian Random Walk distribution at specified value.
202+
203+
Parameters
204+
----------
205+
value: at.Variable,
206+
mu: at.Variable,
207+
sigma: at.Variable,
208+
init: at.Variable, Not used
209+
steps: at.Variable, Not used
210+
211+
Returns
212+
-------
213+
TensorVariable
214+
"""
215+
216+
# Calculate initialization logp
217+
init_logp = logprob.logp(init, value[..., 0])
218+
219+
# Make time series stationary around the mean value
220+
stationary_series = value[..., 1:] - value[..., :-1]
221+
series_logp = logprob.logp(Normal.dist(mu, sigma), stationary_series)
222+
223+
return check_parameters(
224+
init_logp + series_logp.sum(axis=-1),
225+
steps > 0,
226+
msg="steps > 0",
227+
)
228+
229+
36230
class AR1(distribution.Continuous):
37231
"""
38232
Autoregressive process with 1 lag.
@@ -171,125 +365,6 @@ def logp(self, value):
171365
return at.sum(innov_like) + at.sum(init_like)
172366

173367

174-
class GaussianRandomWalk(distribution.Continuous):
175-
r"""Random Walk with Normal innovations
176-
177-
Note that this is mainly a user-friendly wrapper to enable an easier specification
178-
of GRW. You are not restricted to use only Normal innovations but can use any
179-
distribution: just use `aesara.tensor.cumsum()` to create the random walk behavior.
180-
181-
Parameters
182-
----------
183-
mu : tensor_like of float, default 0
184-
innovation drift, defaults to 0.0
185-
For vector valued `mu`, first dimension must match shape of the random walk, and
186-
the first element will be discarded (since there is no innovation in the first timestep)
187-
sigma : tensor_like of float, optional
188-
`sigma` > 0, innovation standard deviation (only required if `tau` is not specified)
189-
For vector valued `sigma`, first dimension must match shape of the random walk, and
190-
the first element will be discarded (since there is no innovation in the first timestep)
191-
tau : tensor_like of float, optional
192-
`tau` > 0, innovation precision (only required if `sigma` is not specified)
193-
For vector valued `tau`, first dimension must match shape of the random walk, and
194-
the first element will be discarded (since there is no innovation in the first timestep)
195-
init : pymc.Distribution, optional
196-
distribution for initial value (Defaults to Flat())
197-
"""
198-
199-
def __init__(self, tau=None, init=None, sigma=None, mu=0.0, *args, **kwargs):
200-
kwargs.setdefault("shape", 1)
201-
super().__init__(*args, **kwargs)
202-
if sum(self.shape) == 0:
203-
raise TypeError("GaussianRandomWalk must be supplied a non-zero shape argument!")
204-
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
205-
self.tau = at.as_tensor_variable(tau)
206-
sigma = at.as_tensor_variable(sigma)
207-
self.sigma = sigma
208-
self.mu = at.as_tensor_variable(mu)
209-
self.init = init or Flat.dist()
210-
self.mean = at.as_tensor_variable(0.0)
211-
212-
def _mu_and_sigma(self, mu, sigma):
213-
"""Helper to get mu and sigma if they are high dimensional."""
214-
if sigma.ndim > 0:
215-
sigma = sigma[1:]
216-
if mu.ndim > 0:
217-
mu = mu[1:]
218-
return mu, sigma
219-
220-
def logp(self, x):
221-
"""
222-
Calculate log-probability of Gaussian Random Walk distribution at specified value.
223-
224-
Parameters
225-
----------
226-
x : numeric
227-
Value for which log-probability is calculated.
228-
229-
Returns
230-
-------
231-
TensorVariable
232-
"""
233-
if x.ndim > 0:
234-
x_im1 = x[:-1]
235-
x_i = x[1:]
236-
mu, sigma = self._mu_and_sigma(self.mu, self.sigma)
237-
innov_like = Normal.dist(mu=x_im1 + mu, sigma=sigma).logp(x_i)
238-
return self.init.logp(x[0]) + at.sum(innov_like)
239-
return self.init.logp(x)
240-
241-
def random(self, point=None, size=None):
242-
"""Draw random values from GaussianRandomWalk.
243-
244-
Parameters
245-
----------
246-
point : dict or Point, optional
247-
Dict of variable values on which random values are to be
248-
conditioned (uses default point if not specified).
249-
size : int, optional
250-
Desired size of random sample (returns one sample if not
251-
specified).
252-
253-
Returns
254-
-------
255-
array
256-
"""
257-
# sigma, mu = distribution.draw_values([self.sigma, self.mu], point=point, size=size)
258-
# return distribution.generate_samples(
259-
# self._random,
260-
# sigma=sigma,
261-
# mu=mu,
262-
# size=size,
263-
# dist_shape=self.shape,
264-
# not_broadcast_kwargs={"sample_shape": to_tuple(size)},
265-
# )
266-
pass
267-
268-
def _random(self, sigma, mu, size, sample_shape):
269-
"""Implement a Gaussian random walk as a cumulative sum of normals.
270-
axis = len(size) - 1 denotes the axis along which cumulative sum would be calculated.
271-
This might need to be corrected in future when issue #4010 is fixed.
272-
"""
273-
if size[len(sample_shape)] == sample_shape:
274-
axis = len(sample_shape)
275-
else:
276-
axis = len(size) - 1
277-
rv = stats.norm(mu, sigma)
278-
data = rv.rvs(size).cumsum(axis=axis)
279-
data = np.array(data)
280-
281-
# the following lines center the random walk to start at the origin.
282-
if len(data.shape) > 1:
283-
for i in range(data.shape[0]):
284-
data[i] = data[i] - data[i][0]
285-
else:
286-
data = data - data[0]
287-
return data
288-
289-
def _distr_parameters_for_repr(self):
290-
return ["mu", "sigma"]
291-
292-
293368
class GARCH11(distribution.Continuous):
294369
r"""
295370
GARCH(1,1) with Normal innovations. The model is specified by

pymc/tests/test_distributions.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2619,6 +2619,22 @@ def test_interpolated_transform(self, transform):
26192619
assert not np.isfinite(m.compile_logp()({"x": -1.0}))
26202620
assert not np.isfinite(m.compile_logp()({"x": 11.0}))
26212621

2622+
def test_gaussianrandomwalk(self):
2623+
def ref_logp(value, mu, sigma, steps):
2624+
# Relying on fact that init will be normal by default
2625+
return (
2626+
scipy.stats.norm.logpdf(value[0], mu, sigma)
2627+
+ scipy.stats.norm.logpdf(np.diff(value), mu, sigma).sum()
2628+
)
2629+
2630+
self.check_logp(
2631+
pm.GaussianRandomWalk,
2632+
Vector(R, 4),
2633+
{"mu": R, "sigma": Rplus, "steps": Nat},
2634+
ref_logp,
2635+
decimal=select_by_precision(float64=6, float32=1),
2636+
)
2637+
26222638

26232639
class TestBound:
26242640
"""Tests for pm.Bound distribution"""

0 commit comments

Comments
 (0)