From b2a9e405c5c7515f381e042e711b176b53117172 Mon Sep 17 00:00:00 2001
From: Chris Fonnesbeck <fonnesbeck@gmail.com>
Date: Thu, 5 May 2022 12:20:23 -0500
Subject: [PATCH 01/26] Refactor get_tau_sigma to handle lists

---
 pymc/distributions/continuous.py | 9 ++++-----
 pymc/tests/test_distributions.py | 3 +++
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py
index 40d4c1a3ac..1f8a93c396 100644
--- a/pymc/distributions/continuous.py
+++ b/pymc/distributions/continuous.py
@@ -226,8 +226,8 @@ def get_tau_sigma(tau=None, sigma=None):
             if isinstance(sigma, Variable):
                 sigma_ = check_parameters(sigma, sigma > 0, msg="sigma > 0")
             else:
-                assert np.all(np.asarray(sigma) > 0)
-                sigma_ = sigma
+                sigma_ = np.asarray(sigma)
+                assert np.all(sigma_ > 0), "sigma must be positive"
             tau = sigma_**-2.0
 
     else:
@@ -237,9 +237,8 @@ def get_tau_sigma(tau=None, sigma=None):
             if isinstance(tau, Variable):
                 tau_ = check_parameters(tau, tau > 0, msg="tau > 0")
             else:
-                assert np.all(np.asarray(tau) > 0)
-                tau_ = tau
-
+                tau_ = np.asarray(tau)
+                assert np.all(tau_ > 0), "tau must be positive"
             sigma = tau_**-0.5
 
     return floatX(tau), floatX(sigma)
diff --git a/pymc/tests/test_distributions.py b/pymc/tests/test_distributions.py
index cb485a086f..368770395a 100644
--- a/pymc/tests/test_distributions.py
+++ b/pymc/tests/test_distributions.py
@@ -2413,6 +2413,9 @@ def test_get_tau_sigma(self):
         with pytest.raises(ParameterValueError):
             sigma.eval()
 
+        sigma = [1, 2]
+        assert_almost_equal(get_tau_sigma(sigma=sigma), [1.0 / np.array(sigma)**2, np.array(sigma)]
+
     @pytest.mark.parametrize(
         "value,mu,sigma,nu,logp",
         [

From f264bfa2c072c8443948dc7c0e96b78e7cd7dd20 Mon Sep 17 00:00:00 2001
From: Chris Fonnesbeck <fonnesbeck@gmail.com>
Date: Thu, 5 May 2022 12:29:31 -0500
Subject: [PATCH 02/26] Fixed syntax error

---
 pymc/tests/test_distributions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc/tests/test_distributions.py b/pymc/tests/test_distributions.py
index 368770395a..f195cfd0f3 100644
--- a/pymc/tests/test_distributions.py
+++ b/pymc/tests/test_distributions.py
@@ -2414,7 +2414,7 @@ def test_get_tau_sigma(self):
             sigma.eval()
 
         sigma = [1, 2]
-        assert_almost_equal(get_tau_sigma(sigma=sigma), [1.0 / np.array(sigma)**2, np.array(sigma)]
+        assert_almost_equal(get_tau_sigma(sigma=sigma), [1.0 / np.array(sigma)**2, np.array(sigma)])
 
     @pytest.mark.parametrize(
         "value,mu,sigma,nu,logp",

From 277b5e74c7807265e75d12be7832d654c49af09d Mon Sep 17 00:00:00 2001
From: Chris Fonnesbeck <fonnesbeck@gmail.com>
Date: Thu, 5 May 2022 12:44:35 -0500
Subject: [PATCH 03/26] Black formatted

---
 pymc/tests/test_distributions.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pymc/tests/test_distributions.py b/pymc/tests/test_distributions.py
index f195cfd0f3..536e337457 100644
--- a/pymc/tests/test_distributions.py
+++ b/pymc/tests/test_distributions.py
@@ -2414,7 +2414,9 @@ def test_get_tau_sigma(self):
             sigma.eval()
 
         sigma = [1, 2]
-        assert_almost_equal(get_tau_sigma(sigma=sigma), [1.0 / np.array(sigma)**2, np.array(sigma)])
+        assert_almost_equal(
+            get_tau_sigma(sigma=sigma), [1.0 / np.array(sigma) ** 2, np.array(sigma)]
+        )
 
     @pytest.mark.parametrize(
         "value,mu,sigma,nu,logp",

From 7902ab96666d1e68e98d3b652d66752da775de2d Mon Sep 17 00:00:00 2001
From: Chris Fonnesbeck <fonnesbeck@gmail.com>
Date: Fri, 6 May 2022 09:49:03 -0500
Subject: [PATCH 04/26] Change assertions to ValueError raises

---
 pymc/distributions/continuous.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py
index 1f8a93c396..af42fd64a0 100644
--- a/pymc/distributions/continuous.py
+++ b/pymc/distributions/continuous.py
@@ -227,7 +227,8 @@ def get_tau_sigma(tau=None, sigma=None):
                 sigma_ = check_parameters(sigma, sigma > 0, msg="sigma > 0")
             else:
                 sigma_ = np.asarray(sigma)
-                assert np.all(sigma_ > 0), "sigma must be positive"
+                if np.any(sigma_ <= 0):
+                    raise ValueError("sigma must be positive")
             tau = sigma_**-2.0
 
     else:
@@ -238,7 +239,8 @@ def get_tau_sigma(tau=None, sigma=None):
                 tau_ = check_parameters(tau, tau > 0, msg="tau > 0")
             else:
                 tau_ = np.asarray(tau)
-                assert np.all(tau_ > 0), "tau must be positive"
+                if np.any(tau_ <= 0):
+                    raise ValueError("tau must be positive")
             sigma = tau_**-0.5
 
     return floatX(tau), floatX(sigma)

From 6679b61e4692d9044d9e0651ea8a2a0dac4ecd53 Mon Sep 17 00:00:00 2001
From: TimOliverMaier <tmaier01@students.uni-mainz.de>
Date: Fri, 22 Apr 2022 23:16:03 +0200
Subject: [PATCH 05/26] Prior predictions constant data (#5723)

* support return constant_data with prior pred

* pre-commit format

* added test

* code review
---
 pymc/backends/arviz.py              | 1 -
 pymc/tests/test_idata_conversion.py | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pymc/backends/arviz.py b/pymc/backends/arviz.py
index 840b9e67dc..552a2afd44 100644
--- a/pymc/backends/arviz.py
+++ b/pymc/backends/arviz.py
@@ -464,7 +464,6 @@ def observed_data_to_xarray(self):
             default_dims=[],
         )
 
-    @requires(["trace", "predictions"])
     @requires("model")
     def constant_data_to_xarray(self):
         """Convert constant data to xarray."""
diff --git a/pymc/tests/test_idata_conversion.py b/pymc/tests/test_idata_conversion.py
index e9d6a5d7d2..f27fa49116 100644
--- a/pymc/tests/test_idata_conversion.py
+++ b/pymc/tests/test_idata_conversion.py
@@ -502,7 +502,7 @@ def test_no_trace(self):
 
     @pytest.mark.parametrize("use_context", [True, False])
     def test_priors_separation(self, use_context):
-        """Test model is enough to get prior, prior predictive and observed_data."""
+        """Test model is enough to get prior, prior predictive, constant_data and observed_data."""
         with pm.Model() as model:
             x = pm.MutableData("x", [1.0, 2.0, 3.0])
             y = pm.ConstantData("y", [1.0, 2.0, 3.0])
@@ -514,6 +514,7 @@ def test_priors_separation(self, use_context):
             "prior": ["beta", "~obs"],
             "observed_data": ["obs"],
             "prior_predictive": ["obs"],
+            "constant_data": ["x", "y"],
         }
         if use_context:
             with model:

From a20c97041b0ec17560d0fffe03375ee101c64fc7 Mon Sep 17 00:00:00 2001
From: code-review-doctor
 <72647856+code-review-doctor@users.noreply.github.com>
Date: Sun, 24 Apr 2022 03:13:20 +0100
Subject: [PATCH 06/26]  Fix issue probably-meant-fstring found at
 https://codereview.doctor (#5726)

---
 pymc/data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc/data.py b/pymc/data.py
index 3d74d659df..a240712982 100644
--- a/pymc/data.py
+++ b/pymc/data.py
@@ -461,7 +461,7 @@ def align_minibatches(batches=None):
     else:
         for b in batches:
             if not isinstance(b, Minibatch):
-                raise TypeError("{b} is not a Minibatch")
+                raise TypeError(f"{b} is not a Minibatch")
             for rng in Minibatch.RNG[id(b)]:
                 rng.seed()
 

From 4130f04137a16df86da938257d92fcae8cc864b9 Mon Sep 17 00:00:00 2001
From: Somasree Majumder <56045049+soma2000-lang@users.noreply.github.com>
Date: Sun, 24 Apr 2022 14:16:27 +0530
Subject: [PATCH 07/26] Add coords argument to pymc.set_data (#5588)

Co-authored-by: Oriol (ZBook) <oriol.abril.pla@gmail.com>
---
 pymc/model.py                     |  4 ++--
 pymc/tests/test_data_container.py | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/pymc/model.py b/pymc/model.py
index c211636b2b..26b8ed6600 100644
--- a/pymc/model.py
+++ b/pymc/model.py
@@ -1734,7 +1734,7 @@ def point_logps(self, point=None, round_vals=2):
 Model._context_class = Model
 
 
-def set_data(new_data, model=None):
+def set_data(new_data, model=None, *, coords=None):
     """Sets the value of one or more data container variables.
 
     Parameters
@@ -1771,7 +1771,7 @@ def set_data(new_data, model=None):
     model = modelcontext(model)
 
     for variable_name, new_value in new_data.items():
-        model.set_data(variable_name, new_value)
+        model.set_data(variable_name, new_value, coords=coords)
 
 
 def compile_fn(outs, mode=None, point_fn=True, model=None, **kwargs):
diff --git a/pymc/tests/test_data_container.py b/pymc/tests/test_data_container.py
index 8c5ff71c79..2bcceaf5d7 100644
--- a/pymc/tests/test_data_container.py
+++ b/pymc/tests/test_data_container.py
@@ -94,6 +94,31 @@ def test_sample_posterior_predictive_after_set_data(self):
             x_test, y_test.posterior_predictive["obs"].mean(("chain", "draw")), atol=1e-1
         )
 
+    def test_sample_posterior_predictive_after_set_data_with_coords(self):
+        y = np.array([1.0, 2.0, 3.0])
+        with pm.Model() as model:
+            x = pm.MutableData("x", [1.0, 2.0, 3.0], dims="obs_id")
+            beta = pm.Normal("beta", 0, 10.0)
+            pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y, dims="obs_id")
+            idata = pm.sample(
+                10,
+                tune=100,
+                chains=1,
+                return_inferencedata=True,
+                compute_convergence_checks=False,
+            )
+        # Predict on new data.
+        with model:
+            x_test = [5, 6]
+            pm.set_data(new_data={"x": x_test}, coords={"obs_id": ["a", "b"]})
+            pm.sample_posterior_predictive(idata, extend_inferencedata=True, predictions=True)
+
+        assert idata.predictions["obs"].shape == (1, 10, 2)
+        assert np.all(idata.predictions["obs_id"].values == np.array(["a", "b"]))
+        np.testing.assert_allclose(
+            x_test, idata.predictions["obs"].mean(("chain", "draw")), atol=1e-1
+        )
+
     def test_sample_after_set_data(self):
         with pm.Model() as model:
             x = pm.MutableData("x", [1.0, 2.0, 3.0])

From 64e01d5c75a406f4127fbb6b4f971831e6071ed1 Mon Sep 17 00:00:00 2001
From: danhphan <danh.phan.mq@gmail.com>
Date: Sat, 19 Mar 2022 21:19:11 +1100
Subject: [PATCH 08/26] remove MultinomialRV override

---
 pymc/distributions/multivariate.py | 26 +-------------------------
 1 file changed, 1 insertion(+), 25 deletions(-)

diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py
index 940b3907e1..0632720cb2 100644
--- a/pymc/distributions/multivariate.py
+++ b/pymc/distributions/multivariate.py
@@ -30,7 +30,7 @@
 from aesara.sparse.basic import sp_sum
 from aesara.tensor import gammaln, sigmoid
 from aesara.tensor.nlinalg import det, eigh, matrix_inverse, trace
-from aesara.tensor.random.basic import MultinomialRV, dirichlet, multivariate_normal
+from aesara.tensor.random.basic import dirichlet, multinomial, multivariate_normal
 from aesara.tensor.random.op import RandomVariable, default_supp_shape_from_params
 from aesara.tensor.random.utils import broadcast_params, normalize_size_param
 from aesara.tensor.slinalg import Cholesky
@@ -490,30 +490,6 @@ def logp(value, a):
         )
 
 
-class MultinomialRV(MultinomialRV):
-    """Aesara's `MultinomialRV` doesn't broadcast; this one does."""
-
-    @classmethod
-    def rng_fn(cls, rng, n, p, size):
-        if n.ndim > 0 or p.ndim > 1:
-            n, p = broadcast_params([n, p], cls.ndims_params)
-            size = tuple(size or ())
-
-            if size:
-                n = np.broadcast_to(n, size)
-                p = np.broadcast_to(p, size + (p.shape[-1],))
-
-            res = np.empty(p.shape)
-            for idx in np.ndindex(p.shape[:-1]):
-                res[idx] = rng.multinomial(n[idx], p[idx])
-            return res
-        else:
-            return rng.multinomial(n, p, size=size)
-
-
-multinomial = MultinomialRV()
-
-
 class Multinomial(Discrete):
     r"""
     Multinomial log-likelihood.

From e89d803cbe3016dae77c8ab0185bc29bb8eda202 Mon Sep 17 00:00:00 2001
From: twiecki <twiecki@users.noreply.github.com>
Date: Mon, 25 Apr 2022 07:05:52 +0000
Subject: [PATCH 09/26] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20UPGRADE:=20Autoupdat?=
 =?UTF-8?q?e=20pre-commit=20config?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 29a82e9ffb..8a74027aa3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -46,7 +46,7 @@ repos:
   hooks:
     - id: black
 - repo: https://github.com/PyCQA/pylint
-  rev: v2.13.5
+  rev: v2.13.7
   hooks:
     - id: pylint
       args: [--rcfile=.pylintrc]

From 904eac8670f69ae89c0f6cd6fd1157101fc67154 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Wed, 27 Apr 2022 18:29:34 +0200
Subject: [PATCH 10/26] Group GaussianRandomWalk tests in single class

---
 pymc/tests/test_distributions_timeseries.py | 152 ++++++++++----------
 1 file changed, 75 insertions(+), 77 deletions(-)

diff --git a/pymc/tests/test_distributions_timeseries.py b/pymc/tests/test_distributions_timeseries.py
index b0c5325bce..95f5f37b43 100644
--- a/pymc/tests/test_distributions_timeseries.py
+++ b/pymc/tests/test_distributions_timeseries.py
@@ -32,102 +32,100 @@
 from pymc.tests.test_distributions_random import BaseTestDistributionRandom
 
 
-class TestGaussianRandomWalkRandom(BaseTestDistributionRandom):
-    # Override default size for test class
-    size = None
-
-    pymc_dist = pm.GaussianRandomWalk
-    pymc_dist_params = {"mu": 1.0, "sigma": 2, "init": pm.Constant.dist(0), "steps": 4}
-    expected_rv_op_params = {"mu": 1.0, "sigma": 2, "init": pm.Constant.dist(0), "steps": 4}
-
-    checks_to_run = [
-        "check_pymc_params_match_rv_op",
-        "check_rv_inferred_size",
-    ]
-
-    def check_rv_inferred_size(self):
-        steps = self.pymc_dist_params["steps"]
-        sizes_to_check = [None, (), 1, (1,)]
-        sizes_expected = [(steps + 1,), (steps + 1,), (1, steps + 1), (1, steps + 1)]
-
-        for size, expected in zip(sizes_to_check, sizes_expected):
-            pymc_rv = self.pymc_dist.dist(**self.pymc_dist_params, size=size)
-            expected_symbolic = tuple(pymc_rv.shape.eval())
-            assert expected_symbolic == expected
-
-    def test_steps_scalar_check(self):
-        with pytest.raises(ValueError, match="steps must be an integer scalar"):
-            self.pymc_dist.dist(steps=[1])
-
-
-def test_gaussianrandomwalk_inference():
-    mu, sigma, steps = 2, 1, 1000
-    obs = np.concatenate([[0], np.random.normal(mu, sigma, size=steps)]).cumsum()
+class TestGaussianRandomWalk:
+    class TestGaussianRandomWalkRandom(BaseTestDistributionRandom):
+        # Override default size for test class
+        size = None
+
+        pymc_dist = pm.GaussianRandomWalk
+        pymc_dist_params = {"mu": 1.0, "sigma": 2, "init": pm.Constant.dist(0), "steps": 4}
+        expected_rv_op_params = {"mu": 1.0, "sigma": 2, "init": pm.Constant.dist(0), "steps": 4}
+
+        checks_to_run = [
+            "check_pymc_params_match_rv_op",
+            "check_rv_inferred_size",
+        ]
 
-    with pm.Model():
-        _mu = pm.Uniform("mu", -10, 10)
-        _sigma = pm.Uniform("sigma", 0, 10)
+        def check_rv_inferred_size(self):
+            steps = self.pymc_dist_params["steps"]
+            sizes_to_check = [None, (), 1, (1,)]
+            sizes_expected = [(steps + 1,), (steps + 1,), (1, steps + 1), (1, steps + 1)]
 
-        obs_data = pm.MutableData("obs_data", obs)
-        grw = GaussianRandomWalk("grw", _mu, _sigma, steps=steps, observed=obs_data)
+            for size, expected in zip(sizes_to_check, sizes_expected):
+                pymc_rv = self.pymc_dist.dist(**self.pymc_dist_params, size=size)
+                expected_symbolic = tuple(pymc_rv.shape.eval())
+                assert expected_symbolic == expected
 
-        trace = pm.sample(chains=1)
+        def test_steps_scalar_check(self):
+            with pytest.raises(ValueError, match="steps must be an integer scalar"):
+                self.pymc_dist.dist(steps=[1])
 
-    recovered_mu = trace.posterior["mu"].mean()
-    recovered_sigma = trace.posterior["sigma"].mean()
-    np.testing.assert_allclose([mu, sigma], [recovered_mu, recovered_sigma], atol=0.2)
+    def test_gaussianrandomwalk_inference(self):
+        mu, sigma, steps = 2, 1, 1000
+        obs = np.concatenate([[0], np.random.normal(mu, sigma, size=steps)]).cumsum()
 
+        with pm.Model():
+            _mu = pm.Uniform("mu", -10, 10)
+            _sigma = pm.Uniform("sigma", 0, 10)
 
-@pytest.mark.parametrize("init", [None, pm.Normal.dist()])
-def test_gaussian_random_walk_init_dist_shape(init):
-    """Test that init_dist is properly resized"""
-    grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init=init)
-    assert tuple(grw.owner.inputs[-2].shape.eval()) == ()
+            obs_data = pm.MutableData("obs_data", obs)
+            grw = GaussianRandomWalk("grw", _mu, _sigma, steps=steps, observed=obs_data)
 
-    grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init=init, size=(5,))
-    assert tuple(grw.owner.inputs[-2].shape.eval()) == (5,)
+            trace = pm.sample(chains=1)
 
-    grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init=init, shape=1)
-    assert tuple(grw.owner.inputs[-2].shape.eval()) == ()
+        recovered_mu = trace.posterior["mu"].mean()
+        recovered_sigma = trace.posterior["sigma"].mean()
+        np.testing.assert_allclose([mu, sigma], [recovered_mu, recovered_sigma], atol=0.2)
 
-    grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init=init, shape=(5, 1))
-    assert tuple(grw.owner.inputs[-2].shape.eval()) == (5,)
+    @pytest.mark.parametrize("init", [None, pm.Normal.dist()])
+    def test_gaussian_random_walk_init_dist_shape(self, init):
+        """Test that init_dist is properly resized"""
+        grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init=init)
+        assert tuple(grw.owner.inputs[-2].shape.eval()) == ()
 
-    grw = pm.GaussianRandomWalk.dist(mu=[0, 0], sigma=1, steps=1, init=init)
-    assert tuple(grw.owner.inputs[-2].shape.eval()) == (2,)
+        grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init=init, size=(5,))
+        assert tuple(grw.owner.inputs[-2].shape.eval()) == (5,)
 
-    grw = pm.GaussianRandomWalk.dist(mu=0, sigma=[1, 1], steps=1, init=init)
-    assert tuple(grw.owner.inputs[-2].shape.eval()) == (2,)
+        grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init=init, shape=1)
+        assert tuple(grw.owner.inputs[-2].shape.eval()) == ()
 
-    grw = pm.GaussianRandomWalk.dist(mu=np.zeros((3, 1)), sigma=[1, 1], steps=1, init=init)
-    assert tuple(grw.owner.inputs[-2].shape.eval()) == (3, 2)
+        grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init=init, shape=(5, 1))
+        assert tuple(grw.owner.inputs[-2].shape.eval()) == (5,)
 
+        grw = pm.GaussianRandomWalk.dist(mu=[0, 0], sigma=1, steps=1, init=init)
+        assert tuple(grw.owner.inputs[-2].shape.eval()) == (2,)
 
-def test_shape_ellipsis():
-    grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=5, init=pm.Normal.dist(), shape=(3, ...))
-    assert tuple(grw.shape.eval()) == (3, 6)
-    assert tuple(grw.owner.inputs[-2].shape.eval()) == (3,)
+        grw = pm.GaussianRandomWalk.dist(mu=0, sigma=[1, 1], steps=1, init=init)
+        assert tuple(grw.owner.inputs[-2].shape.eval()) == (2,)
 
+        grw = pm.GaussianRandomWalk.dist(mu=np.zeros((3, 1)), sigma=[1, 1], steps=1, init=init)
+        assert tuple(grw.owner.inputs[-2].shape.eval()) == (3, 2)
 
-def test_gaussianrandomwalk_broadcasted_by_init_dist():
-    grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=4, init=pm.Normal.dist(size=(2, 3)))
-    assert tuple(grw.shape.eval()) == (2, 3, 5)
-    assert grw.eval().shape == (2, 3, 5)
+    def test_shape_ellipsis(self):
+        grw = pm.GaussianRandomWalk.dist(
+            mu=0, sigma=1, steps=5, init=pm.Normal.dist(), shape=(3, ...)
+        )
+        assert tuple(grw.shape.eval()) == (3, 6)
+        assert tuple(grw.owner.inputs[-2].shape.eval()) == (3,)
 
+    def test_gaussianrandomwalk_broadcasted_by_init_dist(self):
+        grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=4, init=pm.Normal.dist(size=(2, 3)))
+        assert tuple(grw.shape.eval()) == (2, 3, 5)
+        assert grw.eval().shape == (2, 3, 5)
 
-@pytest.mark.parametrize(
-    "init",
-    [
-        pm.HalfNormal.dist(sigma=2),
-        pm.StudentT.dist(nu=4, mu=1, sigma=0.5),
-    ],
-)
-def test_gaussian_random_walk_init_dist_logp(init):
-    grw = pm.GaussianRandomWalk.dist(init=init, steps=1)
-    assert np.isclose(
-        pm.logp(grw, [0, 0]).eval(),
-        pm.logp(init, 0).eval() + scipy.stats.norm.logpdf(0),
+    @pytest.mark.parametrize(
+        "init",
+        [
+            pm.HalfNormal.dist(sigma=2),
+            pm.StudentT.dist(nu=4, mu=1, sigma=0.5),
+        ],
     )
+    def test_gaussian_random_walk_init_dist_logp(self, init):
+        grw = pm.GaussianRandomWalk.dist(init=init, steps=1)
+        assert np.isclose(
+            pm.logp(grw, [0, 0]).eval(),
+            pm.logp(init, 0).eval() + scipy.stats.norm.logpdf(0),
+        )
 
 
 @pytest.mark.xfail(reason="Timeseries not refactored")

From 16b4afad3aae1e5ffcf12cdd75523ce16d0e8732 Mon Sep 17 00:00:00 2001
From: Ravin Kumar <ravinsdrive@gmail.com>
Date: Mon, 4 Apr 2022 19:48:37 -0700
Subject: [PATCH 11/26] Infer steps from shape in GaussianRandomWalk

---
 pymc/distributions/timeseries.py            | 21 ++++++++++++++++++++-
 pymc/tests/test_distributions_timeseries.py | 19 +++++++++++++++++--
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py
index c7225ad50e..a8fdcedec7 100644
--- a/pymc/distributions/timeseries.py
+++ b/pymc/distributions/timeseries.py
@@ -18,6 +18,7 @@
 import numpy as np
 
 from aesara import scan
+from aesara.raise_op import Assert
 from aesara.tensor.random.op import RandomVariable
 from aesara.tensor.random.utils import normalize_size_param
 
@@ -167,8 +168,26 @@ def dist(
 
         mu = at.as_tensor_variable(floatX(mu))
         sigma = at.as_tensor_variable(floatX(sigma))
+
+        # Check if shape contains information about number of steps
+        steps_from_shape = None
+        shape = kwargs.get("shape", None)
+        if shape is not None:
+            shape = to_tuple(shape)
+            if shape[-1] is not ...:
+                steps_from_shape = shape[-1] - 1
+
         if steps is None:
-            raise ValueError("Must specify steps parameter")
+            if steps_from_shape is not None:
+                steps = steps_from_shape
+            else:
+                raise ValueError("Must specify steps or shape parameter")
+        elif steps_from_shape is not None:
+            # Assert that steps and shape are consistent
+            steps = Assert(msg="Steps do not match last shape dimension")(
+                steps, at.eq(steps, steps_from_shape)
+            )
+
         steps = at.as_tensor_variable(intX(steps))
 
         # If no scalar distribution is passed then initialize with a Normal of same mu and sigma
diff --git a/pymc/tests/test_distributions_timeseries.py b/pymc/tests/test_distributions_timeseries.py
index 95f5f37b43..a7d3ab7d17 100644
--- a/pymc/tests/test_distributions_timeseries.py
+++ b/pymc/tests/test_distributions_timeseries.py
@@ -86,10 +86,10 @@ def test_gaussian_random_walk_init_dist_shape(self, init):
         grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init=init, size=(5,))
         assert tuple(grw.owner.inputs[-2].shape.eval()) == (5,)
 
-        grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init=init, shape=1)
+        grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init=init, shape=2)
         assert tuple(grw.owner.inputs[-2].shape.eval()) == ()
 
-        grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init=init, shape=(5, 1))
+        grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init=init, shape=(5, 2))
         assert tuple(grw.owner.inputs[-2].shape.eval()) == (5,)
 
         grw = pm.GaussianRandomWalk.dist(mu=[0, 0], sigma=1, steps=1, init=init)
@@ -113,6 +113,21 @@ def test_gaussianrandomwalk_broadcasted_by_init_dist(self):
         assert tuple(grw.shape.eval()) == (2, 3, 5)
         assert grw.eval().shape == (2, 3, 5)
 
+    @pytest.mark.parametrize("shape", ((6,), (3, 6)))
+    def test_inferred_steps_from_shape(self, shape):
+        x = GaussianRandomWalk.dist(shape=shape)
+        steps = x.owner.inputs[-1]
+        assert steps.eval() == 5
+
+    @pytest.mark.parametrize("shape", (None, (5, ...)))
+    def test_missing_steps(self, shape):
+        with pytest.raises(ValueError, match="Must specify steps or shape parameter"):
+            GaussianRandomWalk.dist(shape=shape)
+
+    def test_inconsistent_steps_and_shape(self):
+        with pytest.raises(AssertionError, match="Steps do not match last shape dimension"):
+            x = GaussianRandomWalk.dist(steps=12, shape=45)
+
     @pytest.mark.parametrize(
         "init",
         [

From fe8634f37083b97ec148b81f05ff68638e0969b1 Mon Sep 17 00:00:00 2001
From: Michael Osthege <michael.osthege@outlook.com>
Date: Sun, 1 May 2022 09:43:49 +0200
Subject: [PATCH 12/26] Unpin upper limit on numpydoc

Closes #5401
---
 conda-envs/environment-dev-py37.yml         | 2 +-
 conda-envs/environment-dev-py38.yml         | 2 +-
 conda-envs/environment-dev-py39.yml         | 2 +-
 conda-envs/windows-environment-dev-py38.yml | 2 +-
 requirements-dev.txt                        | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/conda-envs/environment-dev-py37.yml b/conda-envs/environment-dev-py37.yml
index 7c85936b26..9d310696d8 100644
--- a/conda-envs/environment-dev-py37.yml
+++ b/conda-envs/environment-dev-py37.yml
@@ -16,7 +16,7 @@ dependencies:
 - jax
 - myst-nb
 - numpy>=1.15.0
-- numpydoc<1.2
+- numpydoc
 - pandas>=0.24.0
 - pip
 - pre-commit>=2.8.0
diff --git a/conda-envs/environment-dev-py38.yml b/conda-envs/environment-dev-py38.yml
index 0e8957097d..44409ea5fe 100644
--- a/conda-envs/environment-dev-py38.yml
+++ b/conda-envs/environment-dev-py38.yml
@@ -16,7 +16,7 @@ dependencies:
 - jax
 - myst-nb
 - numpy>=1.15.0
-- numpydoc<1.2
+- numpydoc
 - pandas>=0.24.0
 - pip
 - pre-commit>=2.8.0
diff --git a/conda-envs/environment-dev-py39.yml b/conda-envs/environment-dev-py39.yml
index 7d2319f118..ce05bc9290 100644
--- a/conda-envs/environment-dev-py39.yml
+++ b/conda-envs/environment-dev-py39.yml
@@ -16,7 +16,7 @@ dependencies:
 - jax
 - myst-nb
 - numpy>=1.15.0
-- numpydoc<1.2
+- numpydoc
 - pandas>=0.24.0
 - pip
 - pre-commit>=2.8.0
diff --git a/conda-envs/windows-environment-dev-py38.yml b/conda-envs/windows-environment-dev-py38.yml
index 26ef54c1f8..899f62ea96 100644
--- a/conda-envs/windows-environment-dev-py38.yml
+++ b/conda-envs/windows-environment-dev-py38.yml
@@ -22,7 +22,7 @@ dependencies:
 # Extra stuff for dev, testing and docs build
 - ipython>=7.16
 - myst-nb
-- numpydoc<1.2
+- numpydoc
 - pre-commit>=2.8.0
 - pydata-sphinx-theme
 - pytest-cov>=2.5
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 1e67fb001d..6099cc4f00 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -11,7 +11,7 @@ h5py>=2.7
 ipython>=7.16
 myst-nb
 numpy>=1.15.0
-numpydoc<1.2
+numpydoc
 pandas>=0.24.0
 polyagamma
 pre-commit>=2.8.0

From 07501b970f8a2ca2eba0c67098b7014b37460fcf Mon Sep 17 00:00:00 2001
From: twiecki <twiecki@users.noreply.github.com>
Date: Mon, 2 May 2022 07:04:41 +0000
Subject: [PATCH 13/26] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20UPGRADE:=20Autoupdat?=
 =?UTF-8?q?e=20pre-commit=20config?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8a74027aa3..b0323a2559 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -14,7 +14,7 @@ repos:
         exclude: ^requirements-dev\.txt$
     -   id: trailing-whitespace
 - repo: https://github.com/pre-commit/mirrors-mypy
-  rev: v0.942
+  rev: v0.950
   hooks:
     - id: mypy
       name: Run static type checks

From 7eaea589980039f2d13e46037c26956f6ef75584 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Tue, 3 May 2022 11:32:37 +0200
Subject: [PATCH 14/26] Standardize docstrings of input dists arguments and add
 warning about cloning

---
 pymc/distributions/censored.py     | 9 ++++++---
 pymc/distributions/mixture.py      | 9 ++++++---
 pymc/distributions/multivariate.py | 5 ++++-
 pymc/distributions/timeseries.py   | 5 ++++-
 4 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/pymc/distributions/censored.py b/pymc/distributions/censored.py
index c0790dee3e..8aaad5c106 100644
--- a/pymc/distributions/censored.py
+++ b/pymc/distributions/censored.py
@@ -42,9 +42,12 @@ class Censored(SymbolicDistribution):
 
     Parameters
     ----------
-    dist: PyMC unnamed distribution
-        PyMC distribution created via the `.dist()` API, which will be censored. This
-        distribution must be univariate and have a logcdf method implemented.
+    dist: unnamed distribution
+        Univariate distribution created via the `.dist()` API, which will be censored.
+        This distribution must have a logcdf method implemented for sampling.
+
+        .. warning:: dist will be cloned, rendering it independent of the one passed as input.
+
     lower: float or None
         Lower (left) censoring point. If `None` the distribution will not be left censored
     upper: float or None
diff --git a/pymc/distributions/mixture.py b/pymc/distributions/mixture.py
index 0d51915a08..cea7560811 100644
--- a/pymc/distributions/mixture.py
+++ b/pymc/distributions/mixture.py
@@ -66,12 +66,15 @@ class Mixture(SymbolicDistribution):
     w : tensor_like of float
         w >= 0 and w <= 1
         the mixture weights
-    comp_dists : iterable of PyMC distributions or single batched distribution
-        Distributions should be created via the `.dist()` API. If single distribution is
-        passed, the last size dimension (not shape) determines the number of mixture
+    comp_dists : iterable of unnamed distributions or single batched distribution
+        Distributions should be created via the `.dist()` API. If a single distribution
+        is passed, the last size dimension (not shape) determines the number of mixture
         components (e.g. `pm.Poisson.dist(..., size=components)`)
         :math:`f_1, \ldots, f_n`
 
+        .. warning:: comp_dists will be cloned, rendering them independent of the ones passed as input.
+
+
     Examples
     --------
     .. code-block:: python
diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py
index 0632720cb2..c0bcba56b5 100644
--- a/pymc/distributions/multivariate.py
+++ b/pymc/distributions/multivariate.py
@@ -1271,10 +1271,13 @@ class LKJCholeskyCov:
         larger values put more weight on matrices with few correlations.
     n: int
         Dimension of the covariance matrix (n > 1).
-    sd_dist: pm.Distribution
+    sd_dist: unnamed distribution
         A positive scalar or vector distribution for the standard deviations, created
         with the `.dist()` API. Should have `shape[-1]=n`. Scalar distributions will be
         automatically resized to ensure this.
+
+        .. warning:: sd_dist will be cloned, rendering it independent of the one passed as input.
+
     compute_corr: bool, default=True
         If `True`, returns three values: the Cholesky decomposition, the correlations
         and the standard deviations of the covariance matrix. Otherwise, only returns
diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py
index a8fdcedec7..cb24100a83 100644
--- a/pymc/distributions/timeseries.py
+++ b/pymc/distributions/timeseries.py
@@ -147,9 +147,12 @@ class GaussianRandomWalk(distribution.Continuous):
         innovation drift, defaults to 0.0
     sigma : tensor_like of float, optional
         sigma > 0, innovation standard deviation, defaults to 1.0
-    init : Univariate PyMC distribution
+    init : unnamed distribution
         Univariate distribution of the initial value, created with the `.dist()` API.
         Defaults to Normal with same `mu` and `sigma` as the GaussianRandomWalk
+
+        .. warning:: init will be cloned, rendering them independent of the ones passed as input.
+
     steps : int
         Number of steps in Gaussian Random Walks (steps > 0).
     """

From 1690482f3a95d34302be15e5bcf32ff12f6e55a9 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Tue, 3 May 2022 11:42:07 +0200
Subject: [PATCH 15/26] Remove unnecessary tag in Simulator logp

---
 pymc/distributions/simulator.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pymc/distributions/simulator.py b/pymc/distributions/simulator.py
index 8d20bfd982..22f83f12f7 100644
--- a/pymc/distributions/simulator.py
+++ b/pymc/distributions/simulator.py
@@ -247,8 +247,6 @@ def logp(cls, value, sim_op, sim_inputs):
         #  in which case this would not be needed. However, that would have to be
         #  done for every sampler that may accomodate Simulators
         rng = aesara.shared(np.random.default_rng())
-        rng.tag.is_rng = True
-
         # Create a new simulatorRV with identical inputs as the original one
         sim_value = sim_op.make_node(rng, *sim_inputs[1:]).default_output()
         sim_value.name = "sim_value"

From 4401bf60a2c5fba9a6ce8069b684ff376285e29e Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Tue, 3 May 2022 11:09:42 +0200
Subject: [PATCH 16/26] Replace deprecated tag.ignore_logprob

---
 pymc/distributions/bound.py        |  6 ++---
 pymc/distributions/logprob.py      | 24 ++++++++++++++++++-
 pymc/distributions/mixture.py      | 11 ++++-----
 pymc/distributions/multivariate.py |  5 ++--
 pymc/distributions/timeseries.py   |  3 ++-
 pymc/tests/test_logprob.py         | 38 +++++++++++++++++++++++++++++-
 6 files changed, 72 insertions(+), 15 deletions(-)

diff --git a/pymc/distributions/bound.py b/pymc/distributions/bound.py
index 90986be776..3df0ff7fda 100644
--- a/pymc/distributions/bound.py
+++ b/pymc/distributions/bound.py
@@ -23,7 +23,7 @@
 from pymc.distributions.continuous import BoundedContinuous, bounded_cont_transform
 from pymc.distributions.dist_math import check_parameters
 from pymc.distributions.distribution import Continuous, Discrete
-from pymc.distributions.logprob import logp
+from pymc.distributions.logprob import ignore_logprob, logp
 from pymc.distributions.shape_utils import to_tuple
 from pymc.distributions.transforms import _default_transform
 from pymc.model import modelcontext
@@ -193,7 +193,7 @@ def __new__(
                 raise ValueError("Given dims do not exist in model coordinates.")
 
         lower, upper, initval = cls._set_values(lower, upper, size, shape, initval)
-        dist.tag.ignore_logprob = True
+        dist = ignore_logprob(dist)
 
         if isinstance(dist.owner.op, Continuous):
             res = _ContinuousBounded(
@@ -228,7 +228,7 @@ def dist(
 
         cls._argument_checks(dist, **kwargs)
         lower, upper, initval = cls._set_values(lower, upper, size, shape, initval=None)
-        dist.tag.ignore_logprob = True
+        dist = ignore_logprob(dist)
         if isinstance(dist.owner.op, Continuous):
             res = _ContinuousBounded.dist(
                 [dist, lower, upper],
diff --git a/pymc/distributions/logprob.py b/pymc/distributions/logprob.py
index bb76f444e4..526ebe2548 100644
--- a/pymc/distributions/logprob.py
+++ b/pymc/distributions/logprob.py
@@ -20,6 +20,7 @@
 import numpy as np
 
 from aeppl import factorized_joint_logprob
+from aeppl.abstract import assign_custom_measurable_outputs
 from aeppl.logprob import logcdf as logcdf_aeppl
 from aeppl.logprob import logprob as logp_aeppl
 from aeppl.transforms import TransformValuesOpt
@@ -221,7 +222,11 @@ def joint_logpt(
 
     transform_opt = TransformValuesOpt(transform_map)
     temp_logp_var_dict = factorized_joint_logprob(
-        tmp_rvs_to_values, extra_rewrites=transform_opt, use_jacobian=jacobian, **kwargs
+        tmp_rvs_to_values,
+        extra_rewrites=transform_opt,
+        use_jacobian=jacobian,
+        warn_missing_rvs=False,
+        **kwargs,
     )
 
     # Raise if there are unexpected RandomVariables in the logp graph
@@ -276,3 +281,20 @@ def logcdf(rv, value):
 
     value = at.as_tensor_variable(value, dtype=rv.dtype)
     return logcdf_aeppl(rv, value)
+
+
+def ignore_logprob(rv):
+    """Return a duplicated variable that is ignored when creating Aeppl logprob graphs
+
+    This is used in SymbolicDistributions that use other RVs as inputs but account
+    for their logp terms explicitly.
+
+    If the variable is already ignored, it is returned directly.
+    """
+    prefix = "Unmeasurable"
+    node = rv.owner
+    op_type = type(node.op)
+    if op_type.__name__.startswith(prefix):
+        return rv
+    new_node = assign_custom_measurable_outputs(node, type_prefix=prefix)
+    return new_node.outputs[node.outputs.index(rv)]
diff --git a/pymc/distributions/mixture.py b/pymc/distributions/mixture.py
index cea7560811..77f5e4dee7 100644
--- a/pymc/distributions/mixture.py
+++ b/pymc/distributions/mixture.py
@@ -30,7 +30,7 @@
 from pymc.distributions.continuous import Normal, get_tau_sigma
 from pymc.distributions.dist_math import check_parameters
 from pymc.distributions.distribution import SymbolicDistribution, _moment, moment
-from pymc.distributions.logprob import logcdf, logp
+from pymc.distributions.logprob import ignore_logprob, logcdf, logp
 from pymc.distributions.shape_utils import to_tuple
 from pymc.distributions.transforms import _default_transform
 from pymc.util import check_dist_not_registered
@@ -252,6 +252,10 @@ def rv_op(cls, weights, *components, size=None, rngs=None):
 
         assert weights_ndim_batch == 0
 
+        # Component RVs terms are accounted by the Mixture logprob, so they can be
+        # safely ignored by Aeppl
+        components = [ignore_logprob(component) for component in components]
+
         # Create a OpFromGraph that encapsulates the random generating process
         # Create dummy input variables with the same type as the ones provided
         weights_ = weights.type()
@@ -299,11 +303,6 @@ def rv_op(cls, weights, *components, size=None, rngs=None):
         mix_out.tag.components = components
         mix_out.tag.choices_rng = mix_indexes_rng
 
-        # Component RVs terms are accounted by the Mixture logprob, so they can be
-        # safely ignore by Aeppl (this tag prevents UserWarning)
-        for component in components:
-            component.tag.ignore_logprob = True
-
         return mix_out
 
     @classmethod
diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py
index c0bcba56b5..dae01e805e 100644
--- a/pymc/distributions/multivariate.py
+++ b/pymc/distributions/multivariate.py
@@ -57,6 +57,7 @@
     multigammaln,
 )
 from pymc.distributions.distribution import Continuous, Discrete, moment
+from pymc.distributions.logprob import ignore_logprob
 from pymc.distributions.shape_utils import (
     broadcast_dist_samples_to,
     rv_size_is_none,
@@ -1182,11 +1183,9 @@ def dist(cls, eta, n, sd_dist, **kwargs):
 
         # sd_dist is part of the generative graph, but should be completely ignored
         # by the logp graph, since the LKJ logp explicitly includes these terms.
-        # Setting sd_dist.tag.ignore_logprob to True, will prevent Aeppl warning about
-        # an unnacounted RandomVariable in the graph
         # TODO: Things could be simplified a bit if we managed to extract the
         #  sd_dist prior components from the logp expression.
-        sd_dist.tag.ignore_logprob = True
+        sd_dist = ignore_logprob(sd_dist)
 
         return super().dist([n, eta, sd_dist], **kwargs)
 
diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py
index cb24100a83..cc4d744e5a 100644
--- a/pymc/distributions/timeseries.py
+++ b/pymc/distributions/timeseries.py
@@ -26,6 +26,7 @@
 from pymc.distributions import distribution, logprob, multivariate
 from pymc.distributions.continuous import Flat, Normal, get_tau_sigma
 from pymc.distributions.dist_math import check_parameters
+from pymc.distributions.logprob import ignore_logprob
 from pymc.distributions.shape_utils import rv_size_is_none, to_tuple
 from pymc.util import check_dist_not_registered
 
@@ -206,7 +207,7 @@ def dist(
                 raise TypeError("init must be a univariate distribution variable")
 
         # Ignores logprob of init var because that's accounted for in the logp method
-        init.tag.ignore_logprob = True
+        init = ignore_logprob(init)
 
         return super().dist([mu, sigma, init, steps], size=size, **kwargs)
 
diff --git a/pymc/tests/test_logprob.py b/pymc/tests/test_logprob.py
index f6ff39a2cd..74952c86a3 100644
--- a/pymc/tests/test_logprob.py
+++ b/pymc/tests/test_logprob.py
@@ -17,6 +17,7 @@
 import pytest
 import scipy.stats.distributions as sp
 
+from aeppl.abstract import get_measurable_outputs
 from aesara.graph.basic import ancestors
 from aesara.tensor.random.op import RandomVariable
 from aesara.tensor.subtensor import (
@@ -32,7 +33,7 @@
 from pymc.aesaraf import floatX, walk_model
 from pymc.distributions.continuous import HalfFlat, Normal, TruncatedNormal, Uniform
 from pymc.distributions.discrete import Bernoulli
-from pymc.distributions.logprob import joint_logpt, logcdf, logp
+from pymc.distributions.logprob import ignore_logprob, joint_logpt, logcdf, logp
 from pymc.model import Model, Potential
 from pymc.tests.helpers import select_by_precision
 
@@ -227,3 +228,38 @@ def test_unexpected_rvs():
 
     with pytest.raises(ValueError, match="^Random variables detected in the logp graph"):
         model.logpt()
+
+
+def test_ignore_logprob_basic():
+    x = Normal.dist()
+    (measurable_x_out,) = get_measurable_outputs(x.owner.op, x.owner)
+    assert measurable_x_out is x.owner.outputs[1]
+
+    new_x = ignore_logprob(x)
+    assert new_x is not x
+    assert isinstance(new_x.owner.op, Normal)
+    assert type(new_x.owner.op).__name__ == "UnmeasurableNormalRV"
+    # Confirm that it does not have measurable output
+    assert get_measurable_outputs(new_x.owner.op, new_x.owner) is None
+
+    # Test that it will not clone a variable that is already unmeasurable
+    new_new_x = ignore_logprob(new_x)
+    assert new_new_x is new_x
+
+
+def test_ignore_logprob_model():
+    # logp that does not depend on input
+    def logp(value, x):
+        return value
+
+    with Model() as m:
+        x = Normal.dist()
+        y = DensityDist("y", x, logp=logp)
+    # Aeppl raises a KeyError when it finds an unexpected RV
+    with pytest.raises(KeyError):
+        joint_logpt([y], {y: y.type()})
+
+    with Model() as m:
+        x = ignore_logprob(Normal.dist())
+        y = DensityDist("y", x, logp=logp)
+    assert joint_logpt([y], {y: y.type()})

From 8ff7c85c22af57f728fa472fd8a0d3f3b2ff8cb2 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Tue, 3 May 2022 11:42:17 +0200
Subject: [PATCH 17/26] Group compile_pymc tests in own class

---
 pymc/tests/test_aesaraf.py | 173 ++++++++++++++++++-------------------
 1 file changed, 85 insertions(+), 88 deletions(-)

diff --git a/pymc/tests/test_aesaraf.py b/pymc/tests/test_aesaraf.py
index f1d6638286..ef05068879 100644
--- a/pymc/tests/test_aesaraf.py
+++ b/pymc/tests/test_aesaraf.py
@@ -596,91 +596,88 @@ def test_rvs_to_value_vars_nested():
         assert equal_computations(before, after)
 
 
-def test_check_bounds_flag():
-    """Test that CheckParameterValue Ops are replaced or removed when using compile_pymc"""
-    logp = at.ones(3)
-    cond = np.array([1, 0, 1])
-    bound = check_parameters(logp, cond)
-
-    with pm.Model() as m:
-        pass
-
-    with pytest.raises(ParameterValueError):
-        aesara.function([], bound)()
-
-    m.check_bounds = False
-    with m:
-        assert np.all(compile_pymc([], bound)() == 1)
-
-    m.check_bounds = True
-    with m:
-        assert np.all(compile_pymc([], bound)() == -np.inf)
-
-
-def test_compile_pymc_sets_rng_updates():
-    rng = aesara.shared(np.random.default_rng(0))
-    x = pm.Normal.dist(rng=rng)
-    assert x.owner.inputs[0] is rng
-    f = compile_pymc([], x)
-    assert not np.isclose(f(), f())
-
-    # Check that update was not done inplace
-    assert not hasattr(rng, "default_update")
-    f = aesara.function([], x)
-    assert f() == f()
-
-
-def test_compile_pymc_with_updates():
-    x = aesara.shared(0)
-    f = compile_pymc([], x, updates={x: x + 1})
-    assert f() == 0
-    assert f() == 1
-
-
-def test_compile_pymc_missing_default_explicit_updates():
-    rng = aesara.shared(np.random.default_rng(0))
-    x = pm.Normal.dist(rng=rng)
-
-    # By default, compile_pymc should update the rng of x
-    f = compile_pymc([], x)
-    assert f() != f()
-
-    # An explicit update should override the default_update, like aesara.function does
-    # For testing purposes, we use an update that leaves the rng unchanged
-    f = compile_pymc([], x, updates={rng: rng})
-    assert f() == f()
-
-    # If we specify a custom default_update directly it should use that instead.
-    rng.default_update = rng
-    f = compile_pymc([], x)
-    assert f() == f()
-
-    # And again, it should be overridden by an explicit update
-    f = compile_pymc([], x, updates={rng: x.owner.outputs[0]})
-    assert f() != f()
-
-
-def test_compile_pymc_updates_inputs():
-    """Test that compile_pymc does not include rngs updates of variables that are inputs
-    or ancestors to inputs
-    """
-    x = at.random.normal()
-    y = at.random.normal(x)
-    z = at.random.normal(y)
-
-    for inputs, rvs_in_graph in (
-        ([], 3),
-        ([x], 2),
-        ([y], 1),
-        ([z], 0),
-        ([x, y], 1),
-        ([x, y, z], 0),
-    ):
-        fn = compile_pymc(inputs, z, on_unused_input="ignore")
-        fn_fgraph = fn.maker.fgraph
-        # Each RV adds a shared input for its rng
-        assert len(fn_fgraph.inputs) == len(inputs) + rvs_in_graph
-        # If the output is an input, the graph has a DeepCopyOp
-        assert len(fn_fgraph.apply_nodes) == max(rvs_in_graph, 1)
-        # Each RV adds a shared output for its rng
-        assert len(fn_fgraph.outputs) == 1 + rvs_in_graph
+class TestCompilePyMC:
+    def test_check_bounds_flag(self):
+        """Test that CheckParameterValue Ops are replaced or removed when using compile_pymc"""
+        logp = at.ones(3)
+        cond = np.array([1, 0, 1])
+        bound = check_parameters(logp, cond)
+
+        with pm.Model() as m:
+            pass
+
+        with pytest.raises(ParameterValueError):
+            aesara.function([], bound)()
+
+        m.check_bounds = False
+        with m:
+            assert np.all(compile_pymc([], bound)() == 1)
+
+        m.check_bounds = True
+        with m:
+            assert np.all(compile_pymc([], bound)() == -np.inf)
+
+    def test_compile_pymc_sets_rng_updates(self):
+        rng = aesara.shared(np.random.default_rng(0))
+        x = pm.Normal.dist(rng=rng)
+        assert x.owner.inputs[0] is rng
+        f = compile_pymc([], x)
+        assert not np.isclose(f(), f())
+
+        # Check that update was not done inplace
+        assert not hasattr(rng, "default_update")
+        f = aesara.function([], x)
+        assert f() == f()
+
+    def test_compile_pymc_with_updates(self):
+        x = aesara.shared(0)
+        f = compile_pymc([], x, updates={x: x + 1})
+        assert f() == 0
+        assert f() == 1
+
+    def test_compile_pymc_missing_default_explicit_updates(self):
+        rng = aesara.shared(np.random.default_rng(0))
+        x = pm.Normal.dist(rng=rng)
+
+        # By default, compile_pymc should update the rng of x
+        f = compile_pymc([], x)
+        assert f() != f()
+
+        # An explicit update should override the default_update, like aesara.function does
+        # For testing purposes, we use an update that leaves the rng unchanged
+        f = compile_pymc([], x, updates={rng: rng})
+        assert f() == f()
+
+        # If we specify a custom default_update directly it should use that instead.
+        rng.default_update = rng
+        f = compile_pymc([], x)
+        assert f() == f()
+
+        # And again, it should be overridden by an explicit update
+        f = compile_pymc([], x, updates={rng: x.owner.outputs[0]})
+        assert f() != f()
+
+    def test_compile_pymc_updates_inputs(self):
+        """Test that compile_pymc does not include rngs updates of variables that are inputs
+        or ancestors to inputs
+        """
+        x = at.random.normal()
+        y = at.random.normal(x)
+        z = at.random.normal(y)
+
+        for inputs, rvs_in_graph in (
+            ([], 3),
+            ([x], 2),
+            ([y], 1),
+            ([z], 0),
+            ([x, y], 1),
+            ([x, y, z], 0),
+        ):
+            fn = compile_pymc(inputs, z, on_unused_input="ignore")
+            fn_fgraph = fn.maker.fgraph
+            # Each RV adds a shared input for its rng
+            assert len(fn_fgraph.inputs) == len(inputs) + rvs_in_graph
+            # If the output is an input, the graph has a DeepCopyOp
+            assert len(fn_fgraph.apply_nodes) == max(rvs_in_graph, 1)
+            # Each RV adds a shared output for its rng
+            assert len(fn_fgraph.outputs) == 1 + rvs_in_graph

From 20887a575ccedabfac69fff771d3830955501eb0 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Tue, 3 May 2022 12:02:10 +0200
Subject: [PATCH 18/26] Remove remaining uses of default_updates in codebase

---
 pymc/aesaraf.py               | 22 +++++++++++++++-------
 pymc/distributions/mixture.py | 10 +++++-----
 pymc/model.py                 | 10 +++-------
 pymc/tests/test_aesaraf.py    | 23 +++++++++++++++++++++++
 4 files changed, 46 insertions(+), 19 deletions(-)

diff --git a/pymc/aesaraf.py b/pymc/aesaraf.py
index b263a20b9b..e2de6d9fe2 100644
--- a/pymc/aesaraf.py
+++ b/pymc/aesaraf.py
@@ -30,6 +30,7 @@
 import numpy as np
 import scipy.sparse as sps
 
+from aeppl.abstract import MeasurableVariable
 from aeppl.logprob import CheckParameterValue
 from aesara import config, scalar
 from aesara.compile.mode import Mode, get_mode
@@ -978,14 +979,21 @@ def compile_pymc(
     # TODO: This won't work for variables with InnerGraphs (Scan and OpFromGraph)
     rng_updates = {}
     output_to_list = outputs if isinstance(outputs, (list, tuple)) else [outputs]
-    for rv in (
-        node
-        for node in vars_between(inputs, output_to_list)
-        if node.owner and isinstance(node.owner.op, RandomVariable) and node not in inputs
+    for random_var in (
+        var
+        for var in vars_between(inputs, output_to_list)
+        if var.owner
+        and isinstance(var.owner.op, (RandomVariable, MeasurableVariable))
+        and var not in inputs
     ):
-        rng = rv.owner.inputs[0]
-        if not hasattr(rng, "default_update"):
-            rng_updates[rng] = rv.owner.outputs[0]
+        if isinstance(random_var.owner.op, RandomVariable):
+            rng = random_var.owner.inputs[0]
+            if not hasattr(rng, "default_update"):
+                rng_updates[rng] = random_var.owner.outputs[0]
+        else:
+            update_fn = getattr(random_var.owner.op, "update", None)
+            if update_fn is not None:
+                rng_updates.update(update_fn(random_var.owner))
 
     # If called inside a model context, see if check_bounds flag is set to False
     try:
diff --git a/pymc/distributions/mixture.py b/pymc/distributions/mixture.py
index 77f5e4dee7..b613f90bac 100644
--- a/pymc/distributions/mixture.py
+++ b/pymc/distributions/mixture.py
@@ -21,7 +21,7 @@
 from aeppl.logprob import _logcdf, _logprob
 from aeppl.transforms import IntervalTransform
 from aesara.compile.builders import OpFromGraph
-from aesara.graph.basic import equal_computations
+from aesara.graph.basic import Node, equal_computations
 from aesara.tensor import TensorVariable
 from aesara.tensor.random.op import RandomVariable
 
@@ -44,6 +44,10 @@ class MarginalMixtureRV(OpFromGraph):
 
     default_output = 1
 
+    def update(self, node: Node):
+        # Update for the internal mix_indexes RV
+        return {node.inputs[0]: node.outputs[0]}
+
 
 MeasurableVariable.register(MarginalMixtureRV)
 
@@ -294,10 +298,6 @@ def rv_op(cls, weights, *components, size=None, rngs=None):
         # Create the actual MarginalMixture variable
         mix_out = mix_op(mix_indexes_rng, weights, *components)
 
-        # We need to set_default_updates ourselves, because the choices RV is hidden
-        # inside OpFromGraph and PyMC will never find it otherwise
-        mix_indexes_rng.default_update = mix_out.owner.outputs[0]
-
         # Reference nodes to facilitate identification in other classmethods
         mix_out.tag.weights = weights
         mix_out.tag.components = components
diff --git a/pymc/model.py b/pymc/model.py
index 26b8ed6600..d13091be69 100644
--- a/pymc/model.py
+++ b/pymc/model.py
@@ -1363,13 +1363,9 @@ def make_obs_var(
             # size of the masked and unmasked array happened to coincide
             _, size, _, *inps = observed_rv_var.owner.inputs
             rng = self.model.next_rng()
-            observed_rv_var = observed_rv_var.owner.op(*inps, size=size, rng=rng)
-            # Add default_update to new rng
-            new_rng = observed_rv_var.owner.outputs[0]
-            observed_rv_var.update = (rng, new_rng)
-            rng.default_update = new_rng
-            observed_rv_var.name = f"{name}_observed"
-
+            observed_rv_var = observed_rv_var.owner.op(
+                *inps, size=size, rng=rng, name=f"{name}_observed"
+            )
             observed_rv_var.tag.observations = nonmissing_data
 
             self.create_value_var(observed_rv_var, transform=None, value_var=nonmissing_data)
diff --git a/pymc/tests/test_aesaraf.py b/pymc/tests/test_aesaraf.py
index ef05068879..709f21c6ef 100644
--- a/pymc/tests/test_aesaraf.py
+++ b/pymc/tests/test_aesaraf.py
@@ -22,7 +22,9 @@
 import pytest
 import scipy.sparse as sps
 
+from aeppl.abstract import MeasurableVariable
 from aeppl.logprob import ParameterValueError
+from aesara.compile.builders import OpFromGraph
 from aesara.graph.basic import Constant, Variable, ancestors, equal_computations
 from aesara.tensor.random.basic import normal, uniform
 from aesara.tensor.random.op import RandomVariable
@@ -681,3 +683,24 @@ def test_compile_pymc_updates_inputs(self):
             assert len(fn_fgraph.apply_nodes) == max(rvs_in_graph, 1)
             # Each RV adds a shared output for its rng
             assert len(fn_fgraph.outputs) == 1 + rvs_in_graph
+
+    def test_compile_pymc_custom_update_op(self):
+        """Test that custom MeasurableVariable Op updates are used by compile_pymc"""
+
+        class UnmeasurableOp(OpFromGraph):
+            def update(self, node):
+                return {node.inputs[0]: node.inputs[0] + 1}
+
+        dummy_inputs = [at.scalar(), at.scalar()]
+        dummy_outputs = [at.add(*dummy_inputs)]
+        dummy_x = UnmeasurableOp(dummy_inputs, dummy_outputs)(aesara.shared(1.0), 1.0)
+
+        # Check that there are no updates at first
+        fn = compile_pymc(inputs=[], outputs=dummy_x)
+        assert fn() == fn() == 2.0
+
+        # And they are enabled once the Op is registered as Measurable
+        MeasurableVariable.register(UnmeasurableOp)
+        fn = compile_pymc(inputs=[], outputs=dummy_x)
+        assert fn() == 2.0
+        assert fn() == 3.0

From bb614ddd6152d2af8ada72d09ed2c274b330bfbc Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Wed, 4 May 2022 12:05:56 +0200
Subject: [PATCH 19/26] Remove redundant/wrong docstrings from
 GaussianRandomWalk logp

---
 pymc/distributions/timeseries.py | 23 +++++------------------
 1 file changed, 5 insertions(+), 18 deletions(-)

diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py
index cc4d744e5a..36a34fed4b 100644
--- a/pymc/distributions/timeseries.py
+++ b/pymc/distributions/timeseries.py
@@ -23,10 +23,10 @@
 from aesara.tensor.random.utils import normalize_size_param
 
 from pymc.aesaraf import change_rv_size, floatX, intX
-from pymc.distributions import distribution, logprob, multivariate
+from pymc.distributions import distribution, multivariate
 from pymc.distributions.continuous import Flat, Normal, get_tau_sigma
 from pymc.distributions.dist_math import check_parameters
-from pymc.distributions.logprob import ignore_logprob
+from pymc.distributions.logprob import ignore_logprob, logp
 from pymc.distributions.shape_utils import rv_size_is_none, to_tuple
 from pymc.util import check_dist_not_registered
 
@@ -218,27 +218,14 @@ def logp(
         init: at.Variable,
         steps: at.Variable,
     ) -> at.TensorVariable:
-        """Calculate log-probability of Gaussian Random Walk distribution at specified value.
-
-        Parameters
-        ----------
-        value: at.Variable,
-        mu: at.Variable,
-        sigma: at.Variable,
-        init: at.Variable, Not used
-        steps: at.Variable, Not used
-
-        Returns
-        -------
-        TensorVariable
-        """
+        """Calculate log-probability of Gaussian Random Walk distribution at specified value."""
 
         # Calculate initialization logp
-        init_logp = logprob.logp(init, value[..., 0])
+        init_logp = logp(init, value[..., 0])
 
         # Make time series stationary around the mean value
         stationary_series = value[..., 1:] - value[..., :-1]
-        series_logp = logprob.logp(Normal.dist(mu, sigma), stationary_series)
+        series_logp = logp(Normal.dist(mu, sigma), stationary_series)
 
         return check_parameters(
             init_logp + series_logp.sum(axis=-1),

From 64b20e02b7f94050396dce75a745258189cbf677 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Wed, 4 May 2022 12:41:28 +0200
Subject: [PATCH 20/26] Add moment to GaussianRandomWalk and fix mu/sigma
 broadcasting bug

---
 pymc/distributions/timeseries.py            | 17 +++++++++++++--
 pymc/tests/test_distributions_moments.py    |  1 -
 pymc/tests/test_distributions_timeseries.py | 23 +++++++++++++++++++++
 3 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py
index 36a34fed4b..630b1cbeb9 100644
--- a/pymc/distributions/timeseries.py
+++ b/pymc/distributions/timeseries.py
@@ -26,6 +26,7 @@
 from pymc.distributions import distribution, multivariate
 from pymc.distributions.continuous import Flat, Normal, get_tau_sigma
 from pymc.distributions.dist_math import check_parameters
+from pymc.distributions.distribution import moment
 from pymc.distributions.logprob import ignore_logprob, logp
 from pymc.distributions.shape_utils import rv_size_is_none, to_tuple
 from pymc.util import check_dist_not_registered
@@ -131,7 +132,9 @@ def rng_fn(
         else:
             dist_shape = (*size, int(steps))
 
-        innovations = rng.normal(loc=mu, scale=sigma, size=dist_shape)
+        # Add one dimension to the right, so that mu and sigma broadcast safely along
+        # the steps dimension
+        innovations = rng.normal(loc=mu[..., None], scale=sigma[..., None], size=dist_shape)
         grw = np.concatenate([init[..., None], innovations], axis=-1)
         return np.cumsum(grw, axis=-1)
 
@@ -211,6 +214,14 @@ def dist(
 
         return super().dist([mu, sigma, init, steps], size=size, **kwargs)
 
+    def moment(rv, size, mu, sigma, init, steps):
+        grw_moment = at.zeros_like(rv)
+        grw_moment = at.set_subtensor(grw_moment[..., 0], moment(init))
+        # Add one dimension to the right, so that mu broadcasts safely along the steps
+        # dimension
+        grw_moment = at.set_subtensor(grw_moment[..., 1:], mu[..., None])
+        return at.cumsum(grw_moment, axis=-1)
+
     def logp(
         value: at.Variable,
         mu: at.Variable,
@@ -225,7 +236,9 @@ def logp(
 
         # Make time series stationary around the mean value
         stationary_series = value[..., 1:] - value[..., :-1]
-        series_logp = logp(Normal.dist(mu, sigma), stationary_series)
+        # Add one dimension to the right, so that mu and sigma broadcast safely along
+        # the steps dimension
+        series_logp = logp(Normal.dist(mu[..., None], sigma[..., None]), stationary_series)
 
         return check_parameters(
             init_logp + series_logp.sum(axis=-1),
diff --git a/pymc/tests/test_distributions_moments.py b/pymc/tests/test_distributions_moments.py
index b9012e9ad4..f2ac38bd16 100644
--- a/pymc/tests/test_distributions_moments.py
+++ b/pymc/tests/test_distributions_moments.py
@@ -103,7 +103,6 @@ def test_all_distributions_have_moments():
         dist_module.timeseries.AR,
         dist_module.timeseries.AR1,
         dist_module.timeseries.GARCH11,
-        dist_module.timeseries.GaussianRandomWalk,
         dist_module.timeseries.MvGaussianRandomWalk,
         dist_module.timeseries.MvStudentTRandomWalk,
     }
diff --git a/pymc/tests/test_distributions_timeseries.py b/pymc/tests/test_distributions_timeseries.py
index a7d3ab7d17..c66e197553 100644
--- a/pymc/tests/test_distributions_timeseries.py
+++ b/pymc/tests/test_distributions_timeseries.py
@@ -29,6 +29,7 @@
 from pymc.model import Model
 from pymc.sampling import sample, sample_posterior_predictive
 from pymc.tests.helpers import select_by_precision
+from pymc.tests.test_distributions_moments import assert_moment_is_expected
 from pymc.tests.test_distributions_random import BaseTestDistributionRandom
 
 
@@ -142,6 +143,28 @@ def test_gaussian_random_walk_init_dist_logp(self, init):
             pm.logp(init, 0).eval() + scipy.stats.norm.logpdf(0),
         )
 
+    @pytest.mark.parametrize(
+        "mu, sigma, init, steps, size, expected",
+        [
+            (0, 1, Normal.dist(1), 10, None, np.ones((11,))),
+            (1, 1, Normal.dist(0), 10, (2,), np.full((2, 11), np.arange(11))),
+            (1, 1, Normal.dist([0, 1]), 10, None, np.vstack((np.arange(11), np.arange(11) + 1))),
+            (0, [1, 1], Normal.dist(0), 10, None, np.zeros((2, 11))),
+            (
+                [1, -1],
+                1,
+                Normal.dist(0),
+                10,
+                (4, 2),
+                np.full((4, 2, 11), np.vstack((np.arange(11), -np.arange(11)))),
+            ),
+        ],
+    )
+    def test_moment(self, mu, sigma, init, steps, size, expected):
+        with Model() as model:
+            GaussianRandomWalk("x", mu=mu, sigma=sigma, init=init, steps=steps, size=size)
+        assert_moment_is_expected(model, expected)
+
 
 @pytest.mark.xfail(reason="Timeseries not refactored")
 def test_AR():

From 345067ebb961bda40ab846080e2e59369497210f Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Fri, 29 Apr 2022 17:10:07 +0200
Subject: [PATCH 21/26] Do not create temporary SymbolicDistribution just to
 retrieve number of RNGs needed

Reordered methods for consistency
---
 pymc/distributions/censored.py     | 32 +++++++++++++++---------------
 pymc/distributions/distribution.py | 19 +++++++++---------
 pymc/distributions/mixture.py      | 25 +++++++++++------------
 3 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/pymc/distributions/censored.py b/pymc/distributions/censored.py
index 8aaad5c106..a59965c857 100644
--- a/pymc/distributions/censored.py
+++ b/pymc/distributions/censored.py
@@ -76,6 +76,14 @@ def dist(cls, dist, lower, upper, **kwargs):
         check_dist_not_registered(dist)
         return super().dist([dist, lower, upper], **kwargs)
 
+    @classmethod
+    def num_rngs(cls, *args, **kwargs):
+        return 1
+
+    @classmethod
+    def ndim_supp(cls, *dist_params):
+        return 0
+
     @classmethod
     def rv_op(cls, dist, lower=None, upper=None, size=None, rngs=None):
 
@@ -96,24 +104,12 @@ def rv_op(cls, dist, lower=None, upper=None, size=None, rngs=None):
         rv_out.tag.upper = upper
 
         if rngs is not None:
-            rv_out = cls.change_rngs(rv_out, rngs)
+            rv_out = cls._change_rngs(rv_out, rngs)
 
         return rv_out
 
     @classmethod
-    def ndim_supp(cls, *dist_params):
-        return 0
-
-    @classmethod
-    def change_size(cls, rv, new_size, expand=False):
-        dist = rv.tag.dist
-        lower = rv.tag.lower
-        upper = rv.tag.upper
-        new_dist = change_rv_size(dist, new_size, expand=expand)
-        return cls.rv_op(new_dist, lower, upper)
-
-    @classmethod
-    def change_rngs(cls, rv, new_rngs):
+    def _change_rngs(cls, rv, new_rngs):
         (new_rng,) = new_rngs
         dist_node = rv.tag.dist.owner
         lower = rv.tag.lower
@@ -123,8 +119,12 @@ def change_rngs(cls, rv, new_rngs):
         return cls.rv_op(new_dist, lower, upper)
 
     @classmethod
-    def graph_rvs(cls, rv):
-        return (rv.tag.dist,)
+    def change_size(cls, rv, new_size, expand=False):
+        dist = rv.tag.dist
+        lower = rv.tag.lower
+        upper = rv.tag.upper
+        new_dist = change_rv_size(dist, new_size, expand=expand)
+        return cls.rv_op(new_dist, lower, upper)
 
 
 @_moment.register(Clip)
diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py
index ac3541380b..55f17e277b 100644
--- a/pymc/distributions/distribution.py
+++ b/pymc/distributions/distribution.py
@@ -396,18 +396,19 @@ def __new__(
             to a canonical parametrization. It should call `super().dist()`, passing a
             list with the default parameters as the first and only non keyword argument,
             followed by other keyword arguments like size and rngs, and return the result
-        cls.rv_op
-            Returns a TensorVariable that represents the symbolic distribution
-            parametrized by a default set of parameters and a size and rngs arguments
+        cls.num_rngs
+            Returns the number of rngs given the same arguments passed by the user when
+            calling the distribution
         cls.ndim_supp
-            Returns the support of the symbolic distribution, given the default
+            Returns the support of the symbolic distribution, given the default set of
             parameters. This may not always be constant, for instance if the symbolic
             distribution can be defined based on an arbitrary base distribution.
+        cls.rv_op
+            Returns a TensorVariable that represents the symbolic distribution
+            parametrized by a default set of parameters and a size and rngs arguments
         cls.change_size
             Returns an equivalent symbolic distribution with a different size. This is
             analogous to `pymc.aesaraf.change_rv_size` for `RandomVariable`s.
-        cls.graph_rvs
-            Returns base RVs in a symbolic distribution.
 
         Parameters
         ----------
@@ -465,9 +466,9 @@ def __new__(
             raise TypeError(f"Name needs to be a string but got: {name}")
 
         if rngs is None:
-            # Create a temporary rv to obtain number of rngs needed
-            temp_graph = cls.dist(*args, rngs=None, **kwargs)
-            rngs = [model.next_rng() for _ in cls.graph_rvs(temp_graph)]
+            # Instead of passing individual RNG variables we could pass a RandomStream
+            # and let the classes create as many RNGs as they need
+            rngs = [model.next_rng() for _ in range(cls.num_rngs(*args, **kwargs))]
         elif not isinstance(rngs, (list, tuple)):
             rngs = [rngs]
 
diff --git a/pymc/distributions/mixture.py b/pymc/distributions/mixture.py
index b613f90bac..a1c6129ebe 100644
--- a/pymc/distributions/mixture.py
+++ b/pymc/distributions/mixture.py
@@ -205,6 +205,18 @@ def dist(cls, w, comp_dists, **kwargs):
         w = at.as_tensor_variable(w)
         return super().dist([w, *comp_dists], **kwargs)
 
+    @classmethod
+    def num_rngs(cls, w, comp_dists, **kwargs):
+        if not isinstance(comp_dists, (tuple, list)):
+            # comp_dists is a single component
+            comp_dists = [comp_dists]
+        return len(comp_dists) + 1
+
+    @classmethod
+    def ndim_supp(cls, weights, *components):
+        # We already checked that all components have the same support dimensionality
+        return components[0].owner.op.ndim_supp
+
     @classmethod
     def rv_op(cls, weights, *components, size=None, rngs=None):
         # Update rngs if provided
@@ -329,11 +341,6 @@ def _resize_components(cls, size, *components):
 
         return [change_rv_size(component, size) for component in components]
 
-    @classmethod
-    def ndim_supp(cls, weights, *components):
-        # We already checked that all components have the same support dimensionality
-        return components[0].owner.op.ndim_supp
-
     @classmethod
     def change_size(cls, rv, new_size, expand=False):
         weights = rv.tag.weights
@@ -355,14 +362,6 @@ def change_size(cls, rv, new_size, expand=False):
 
         return cls.rv_op(weights, *components, rngs=rngs, size=None)
 
-    @classmethod
-    def graph_rvs(cls, rv):
-        # We return rv, which is itself a pseudo RandomVariable, that contains a
-        # mix_indexes_ RV in its inner graph. We want super().dist() to generate
-        # (components + 1) rngs for us, and it will do so based on how many elements
-        # we return here
-        return (*rv.tag.components, rv)
-
 
 @_get_measurable_outputs.register(MarginalMixtureRV)
 def _get_measurable_outputs_MarginalMixtureRV(op, node):

From a3d88442805c0302bbc97c81ec76d2abe95f1f9f Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Wed, 4 May 2022 17:21:58 +0200
Subject: [PATCH 22/26] Move SymbolicDistribution docstring to body of class

---
 pymc/distributions/distribution.py | 66 +++++++++++++++---------------
 1 file changed, 34 insertions(+), 32 deletions(-)

diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py
index 55f17e277b..138b3cd253 100644
--- a/pymc/distributions/distribution.py
+++ b/pymc/distributions/distribution.py
@@ -364,6 +364,40 @@ def dist(
 
 
 class SymbolicDistribution:
+    """Symbolic statistical distribution
+
+    While traditional PyMC distributions are represented by a single RandomVariable
+    graph, Symbolic distributions correspond to a larger graph that contains one or
+    more RandomVariables and an arbitrary number of deterministic operations, which
+    represent their own kind of distribution.
+
+    The graphs returned by symbolic distributions can be evaluated directly to
+    obtain valid draws and can further be parsed by Aeppl to derive the
+    corresponding logp at runtime.
+
+    Check pymc.distributions.Censored for an example of a symbolic distribution.
+
+    Symbolic distributions must implement the following classmethods:
+    cls.dist
+        Performs input validation and converts optional alternative parametrizations
+        to a canonical parametrization. It should call `super().dist()`, passing a
+        list with the default parameters as the first and only non keyword argument,
+        followed by other keyword arguments like size and rngs, and return the result
+    cls.num_rngs
+        Returns the number of rngs given the same arguments passed by the user when
+        calling the distribution
+    cls.ndim_supp
+        Returns the support of the symbolic distribution, given the default set of
+        parameters. This may not always be constant, for instance if the symbolic
+        distribution can be defined based on an arbitrary base distribution.
+    cls.rv_op
+        Returns a TensorVariable that represents the symbolic distribution
+        parametrized by a default set of parameters and a size and rngs arguments
+    cls.change_size
+        Returns an equivalent symbolic distribution with a different size. This is
+        analogous to `pymc.aesaraf.change_rv_size` for `RandomVariable`s.
+    """
+
     def __new__(
         cls,
         name: str,
@@ -379,37 +413,6 @@ def __new__(
         """Adds a TensorVariable corresponding to a PyMC symbolic distribution to the
         current model.
 
-        While traditional PyMC distributions are represented by a single RandomVariable
-        graph, Symbolic distributions correspond to a larger graph that contains one or
-        more RandomVariables and an arbitrary number of deterministic operations, which
-        represent their own kind of distribution.
-
-        The graphs returned by symbolic distributions can be evaluated directly to
-        obtain valid draws and can further be parsed by Aeppl to derive the
-        corresponding logp at runtime.
-
-        Check pymc.distributions.Censored for an example of a symbolic distribution.
-
-        Symbolic distributions must implement the following classmethods:
-        cls.dist
-            Performs input validation and converts optional alternative parametrizations
-            to a canonical parametrization. It should call `super().dist()`, passing a
-            list with the default parameters as the first and only non keyword argument,
-            followed by other keyword arguments like size and rngs, and return the result
-        cls.num_rngs
-            Returns the number of rngs given the same arguments passed by the user when
-            calling the distribution
-        cls.ndim_supp
-            Returns the support of the symbolic distribution, given the default set of
-            parameters. This may not always be constant, for instance if the symbolic
-            distribution can be defined based on an arbitrary base distribution.
-        cls.rv_op
-            Returns a TensorVariable that represents the symbolic distribution
-            parametrized by a default set of parameters and a size and rngs arguments
-        cls.change_size
-            Returns an equivalent symbolic distribution with a different size. This is
-            analogous to `pymc.aesaraf.change_rv_size` for `RandomVariable`s.
-
         Parameters
         ----------
         cls : type
@@ -524,7 +527,6 @@ def dist(
             The inputs to the `RandomVariable` `Op`.
         shape : int, tuple, Variable, optional
             A tuple of sizes for each dimension of the new RV.
-
             An Ellipsis (...) may be inserted in the last position to short-hand refer to
             all the dimensions that the RV would get if no shape/size/dims were passed at all.
         size : int, tuple, Variable, optional

From 5e34387636af9672e871572dd25c8cb6e3d7e11f Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Fri, 29 Apr 2022 17:04:55 +0200
Subject: [PATCH 23/26] Refactor AR distribution

* Deprecates AR1 distribution
* Implements random and moment methods
* Batching works on the left as with other distributions
---
 pymc/distributions/__init__.py              |   2 -
 pymc/distributions/timeseries.py            | 448 ++++++++++++++------
 pymc/tests/test_distributions.py            |  14 -
 pymc/tests/test_distributions_moments.py    |   2 -
 pymc/tests/test_distributions_timeseries.py | 254 ++++++++---
 5 files changed, 517 insertions(+), 203 deletions(-)

diff --git a/pymc/distributions/__init__.py b/pymc/distributions/__init__.py
index 6679972f62..8680528682 100644
--- a/pymc/distributions/__init__.py
+++ b/pymc/distributions/__init__.py
@@ -102,7 +102,6 @@
 from pymc.distributions.simulator import Simulator
 from pymc.distributions.timeseries import (
     AR,
-    AR1,
     GARCH11,
     GaussianRandomWalk,
     MvGaussianRandomWalk,
@@ -169,7 +168,6 @@
     "WishartBartlett",
     "LKJCholeskyCov",
     "LKJCorr",
-    "AR1",
     "AR",
     "AsymmetricLaplace",
     "GaussianRandomWalk",
diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py
index 630b1cbeb9..7649d0069d 100644
--- a/pymc/distributions/timeseries.py
+++ b/pymc/distributions/timeseries.py
@@ -11,14 +11,23 @@
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
+import warnings
 
-from typing import Tuple, Union
+from typing import Optional, Tuple, Union
 
+import aesara
 import aesara.tensor as at
 import numpy as np
 
+from aeppl.abstract import MeasurableVariable, _get_measurable_outputs
+from aeppl.logprob import _logprob
 from aesara import scan
+from aesara.compile.builders import OpFromGraph
+from aesara.graph import FunctionGraph, optimize_graph
+from aesara.graph.basic import Node
 from aesara.raise_op import Assert
+from aesara.tensor import TensorVariable
+from aesara.tensor.basic_opt import ShapeFeature, topo_constant_folding
 from aesara.tensor.random.op import RandomVariable
 from aesara.tensor.random.utils import normalize_size_param
 
@@ -26,13 +35,12 @@
 from pymc.distributions import distribution, multivariate
 from pymc.distributions.continuous import Flat, Normal, get_tau_sigma
 from pymc.distributions.dist_math import check_parameters
-from pymc.distributions.distribution import moment
+from pymc.distributions.distribution import SymbolicDistribution, _moment, moment
 from pymc.distributions.logprob import ignore_logprob, logp
-from pymc.distributions.shape_utils import rv_size_is_none, to_tuple
+from pymc.distributions.shape_utils import Shape, rv_size_is_none, to_tuple
 from pymc.util import check_dist_not_registered
 
 __all__ = [
-    "AR1",
     "AR",
     "GaussianRandomWalk",
     "GARCH11",
@@ -42,6 +50,53 @@
 ]
 
 
+def get_steps_from_shape(
+    steps: Optional[Union[int, np.ndarray, TensorVariable]],
+    shape: Optional[Shape],
+    step_shape_offset: int = 0,
+):
+    """Extract number of steps from shape information
+
+    Parameters
+    ----------
+    steps:
+        User specified steps for timeseries distribution
+    shape:
+        User specified shape for timeseries distribution
+    step_shape_offset:
+        Difference between last shape dimension and number of steps in timeseries
+        distribution, defaults to 0
+
+    Raises
+    ------
+    ValueError
+        If neither shape nor steps are provided
+
+    Returns
+    -------
+    steps
+        Steps, if specified directly by user, or inferred from the last dimension of
+        shape. When both steps and shape are provided, a symbolic Assert is added
+        to make sure they are consistent.
+    """
+    steps_from_shape = None
+    if shape is not None:
+        shape = to_tuple(shape)
+        if shape[-1] is not ...:
+            steps_from_shape = shape[-1] - step_shape_offset
+    if steps is None:
+        if steps_from_shape is not None:
+            steps = steps_from_shape
+        else:
+            raise ValueError("Must specify steps or shape parameter")
+    elif steps_from_shape is not None:
+        # Assert that steps and shape are consistent
+        steps = Assert(msg="Steps do not match last shape dimension")(
+            steps, at.eq(steps, steps_from_shape)
+        )
+    return steps
+
+
 class GaussianRandomWalkRV(RandomVariable):
     """
     GaussianRandomWalk Random Variable
@@ -176,25 +231,7 @@ def dist(
         mu = at.as_tensor_variable(floatX(mu))
         sigma = at.as_tensor_variable(floatX(sigma))
 
-        # Check if shape contains information about number of steps
-        steps_from_shape = None
-        shape = kwargs.get("shape", None)
-        if shape is not None:
-            shape = to_tuple(shape)
-            if shape[-1] is not ...:
-                steps_from_shape = shape[-1] - 1
-
-        if steps is None:
-            if steps_from_shape is not None:
-                steps = steps_from_shape
-            else:
-                raise ValueError("Must specify steps or shape parameter")
-        elif steps_from_shape is not None:
-            # Assert that steps and shape are consistent
-            steps = Assert(msg="Steps do not match last shape dimension")(
-                steps, at.eq(steps, steps_from_shape)
-            )
-
+        steps = get_steps_from_shape(steps, kwargs.get("shape", None), step_shape_offset=1)
         steps = at.as_tensor_variable(intX(steps))
 
         # If no scalar distribution is passed then initialize with a Normal of same mu and sigma
@@ -247,62 +284,34 @@ def logp(
         )
 
 
-class AR1(distribution.Continuous):
-    """
-    Autoregressive process with 1 lag.
+class AutoRegressiveRV(OpFromGraph):
+    """A placeholder used to specify a log-likelihood for an AR sub-graph."""
 
-    Parameters
-    ----------
-    k: tensor
-       effect of lagged value on current value
-    tau_e: tensor
-       precision for innovations
-    """
+    default_output = 1
+    ar_order: int
+    constant_term: bool
 
-    def __init__(self, k, tau_e, *args, **kwargs):
+    def __init__(self, *args, ar_order, constant_term, **kwargs):
+        self.ar_order = ar_order
+        self.constant_term = constant_term
         super().__init__(*args, **kwargs)
-        self.k = k = at.as_tensor_variable(k)
-        self.tau_e = tau_e = at.as_tensor_variable(tau_e)
-        self.tau = tau_e * (1 - k**2)
-        self.mode = at.as_tensor_variable(0.0)
 
-    def logp(self, x):
-        """
-        Calculate log-probability of AR1 distribution at specified value.
+    def update(self, node: Node):
+        """Return the update mapping for the noise RV."""
+        # Since noise is a shared variable it shows up as the last node input
+        return {node.inputs[-1]: node.outputs[0]}
 
-        Parameters
-        ----------
-        x: numeric
-            Value for which log-probability is calculated.
 
-        Returns
-        -------
-        TensorVariable
-        """
-        k = self.k
-        tau_e = self.tau_e  # innovation precision
-        tau = tau_e * (1 - k**2)  # ar1 precision
-
-        x_im1 = x[:-1]
-        x_i = x[1:]
-        boundary = Normal.dist(0.0, tau=tau).logp
-
-        innov_like = Normal.dist(k * x_im1, tau=tau_e).logp(x_i)
-        return boundary(x[0]) + at.sum(innov_like)
-
-
-class AR(distribution.Continuous):
-    r"""
-    Autoregressive process with p lags.
+class AR(SymbolicDistribution):
+    r"""Autoregressive process with p lags.
 
     .. math::
 
        x_t = \rho_0 + \rho_1 x_{t-1} + \ldots + \rho_p x_{t-p} + \epsilon_t,
        \epsilon_t \sim N(0,\sigma^2)
 
-    The innovation can be parameterized either in terms of precision
-    or standard deviation. The link between the two parametrizations is
-    given by
+    The innovation can be parameterized either in terms of precision or standard
+    deviation. The link between the two parametrizations is given by
 
     .. math::
 
@@ -310,79 +319,266 @@ class AR(distribution.Continuous):
 
     Parameters
     ----------
-    rho: tensor
-        Tensor of autoregressive coefficients. The first dimension is the p lag.
-    sigma: float
-        Standard deviation of innovation (sigma > 0). (only required if tau is not specified)
-    tau: float
-        Precision of innovation (tau > 0). (only required if sigma is not specified)
-    constant: bool (optional, default = False)
-        Whether to include a constant.
-    init: distribution
-        distribution for initial values (Defaults to Flat())
-    """
+    rho: tensor_like of float
+        Tensor of autoregressive coefficients. The n-th entry in the last dimension is
+        the coefficient for the n-th lag.
+    sigma: tensor_like of float, optional
+        Standard deviation of innovation (sigma > 0). Defaults to 1. Only required if
+        tau is not specified.
+    tau: tensor_like of float
+        Precision of innovation (tau > 0).
+    constant: bool, optional
+        Whether the first element of rho should be used as a constant term in the AR
+        process. Defaults to False
+    init_dist: unnamed distribution, optional
+        Scalar or vector distribution for initial values. Defaults to Normal(0, sigma).
+        Distribution should be  created via the `.dist()` API, and have  dimension
+        (*size, ar_order). If not, it will be automatically resized.
+
+        .. warning:: init_dist will be cloned, rendering it independent of the one passed as input.
+
+    ar_order: int, optional
+        Order of the AR process. Inferred from length of the last dimension of rho, if
+        possible. ar_order = rho.shape[-1] if constant else rho.shape[-1] - 1
 
-    def __init__(self, rho, sigma=None, tau=None, constant=False, init=None, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
-        self.sigma = at.as_tensor_variable(sigma)
-        self.tau = at.as_tensor_variable(tau)
+    Notes
+    -----
+    The init distribution will be cloned, rendering it distinct from the one passed as
+    input.
 
-        self.mean = at.as_tensor_variable(0.0)
+    Examples
+    --------
+    .. code-block:: python
 
-        if isinstance(rho, list):
-            p = len(rho)
-        else:
-            try:
-                shape_ = rho.shape.tag.test_value
-            except AttributeError:
-                shape_ = rho.shape
+        # Create an AR of order 3, with a constant term
+        with pm.Model() as AR3:
+            # The first coefficient will be the constant term
+            coefs = pm.Normal("coefs", 0, size=4)
+            # We need one init variable for each lag, hence size=3
+            init = pm.Normal.dist(5, size=3)
+            ar3 = pm.AR("ar3", coefs, sigma=1.0, init_dist=init, constant=True, steps=500)
 
-            if hasattr(shape_, "size") and shape_.size == 0:
-                p = 1
-            else:
-                p = shape_[0]
+    """
+
+    @classmethod
+    def dist(
+        cls,
+        rho,
+        sigma=None,
+        tau=None,
+        *,
+        init_dist=None,
+        steps=None,
+        constant=False,
+        ar_order=None,
+        **kwargs,
+    ):
+        _, sigma = get_tau_sigma(tau=tau, sigma=sigma)
+        sigma = at.as_tensor_variable(floatX(sigma))
+        rhos = at.atleast_1d(at.as_tensor_variable(floatX(rho)))
 
-        if constant:
-            self.p = p - 1
+        if "init" in kwargs:
+            warnings.warn(
+                "init parameter is now called init_dist. Using init will raise an error in a future release.",
+                FutureWarning,
+            )
+            init_dist = kwargs["init"]
+
+        steps = get_steps_from_shape(steps, kwargs.get("shape", None))
+        steps = at.as_tensor_variable(intX(steps), ndim=0)
+
+        if ar_order is None:
+            # If ar_order is not specified we do constant folding on the shape of rhos
+            # to retrieve it. For example, this will detect that
+            # Normal(size=(5, 3)).shape[-1] == 3, which is not known by Aesara before.
+            shape_fg = FunctionGraph(
+                outputs=[rhos.shape[-1]],
+                features=[ShapeFeature()],
+                clone=True,
+            )
+            (folded_shape,) = optimize_graph(shape_fg, custom_opt=topo_constant_folding).outputs
+            folded_shape = getattr(folded_shape, "data", None)
+            if folded_shape is None:
+                raise ValueError(
+                    "Could not infer ar_order from last dimension of rho. Pass it "
+                    "explictily or make sure rho have a static shape"
+                )
+            ar_order = int(folded_shape) - int(constant)
+            if ar_order < 1:
+                raise ValueError(
+                    "Inferred ar_order is smaller than 1. Increase the last dimension "
+                    "of rho or remove constant_term"
+                )
+
+        if init_dist is not None:
+            if not isinstance(init_dist, TensorVariable) or not isinstance(
+                init_dist.owner.op, RandomVariable
+            ):
+                raise ValueError(
+                    f"Init dist must be a distribution created via the `.dist()` API, "
+                    f"got {type(init_dist)}"
+                )
+                check_dist_not_registered(init_dist)
+            if init_dist.owner.op.ndim_supp > 1:
+                raise ValueError(
+                    "Init distribution must have a scalar or vector support dimension, ",
+                    f"got ndim_supp={init_dist.owner.op.ndim_supp}.",
+                )
         else:
-            self.p = p
+            # Sigma must broadcast with ar_order
+            init_dist = Normal.dist(sigma=at.shape_padright(sigma), size=(*sigma.shape, ar_order))
 
-        self.constant = constant
-        self.rho = rho = at.as_tensor_variable(rho)
-        self.init = init or Flat.dist()
+        # Tell Aeppl to ignore init_dist, as it will be accounted for in the logp term
+        init_dist = ignore_logprob(init_dist)
 
-    def logp(self, value):
-        """
-        Calculate log-probability of AR distribution at specified value.
+        return super().dist([rhos, sigma, init_dist, steps, ar_order, constant], **kwargs)
 
-        Parameters
-        ----------
-        value: numeric
-            Value for which log-probability is calculated.
+    @classmethod
+    def num_rngs(cls, *args, **kwargs):
+        return 2
 
-        Returns
-        -------
-        TensorVariable
-        """
-        if self.constant:
-            x = at.add(
-                *(self.rho[i + 1] * value[self.p - (i + 1) : -(i + 1)] for i in range(self.p))
-            )
-            eps = value[self.p :] - self.rho[0] - x
+    @classmethod
+    def ndim_supp(cls, *args):
+        return 1
+
+    @classmethod
+    def rv_op(cls, rhos, sigma, init_dist, steps, ar_order, constant_term, size=None, rngs=None):
+
+        if rngs is None:
+            rngs = [
+                aesara.shared(np.random.default_rng(seed))
+                for seed in np.random.SeedSequence().spawn(2)
+            ]
+        (init_dist_rng, noise_rng) = rngs
+        # Re-seed init_dist
+        if init_dist.owner.inputs[0] is not init_dist_rng:
+            _, *inputs = init_dist.owner.inputs
+            init_dist = init_dist.owner.op.make_node(init_dist_rng, *inputs).default_output()
+
+        # Init dist should have shape (*size, ar_order)
+        if size is not None:
+            batch_size = size
+        else:
+            # In this case the size of the init_dist depends on the parameters shape
+            # The last dimension of rho and init_dist does not matter
+            batch_size = at.broadcast_shape(sigma, rhos[..., 0], init_dist[..., 0])
+        if init_dist.owner.op.ndim_supp == 0:
+            init_dist_size = (*batch_size, ar_order)
         else:
-            if self.p == 1:
-                x = self.rho * value[:-1]
+            # In this case the support dimension must cover for ar_order
+            init_dist_size = batch_size
+        init_dist = change_rv_size(init_dist, init_dist_size)
+
+        # Create OpFromGraph representing random draws form AR process
+        # Variables with underscore suffix are dummy inputs into the OpFromGraph
+        init_ = init_dist.type()
+        rhos_ = rhos.type()
+        sigma_ = sigma.type()
+        steps_ = steps.type()
+
+        rhos_bcast_shape_ = init_.shape
+        if constant_term:
+            # In this case init shape is one unit smaller than rhos in the last dimension
+            rhos_bcast_shape_ = (*rhos_bcast_shape_[:-1], rhos_bcast_shape_[-1] + 1)
+        rhos_bcast_ = at.broadcast_to(rhos_, rhos_bcast_shape_)
+
+        def step(*args):
+            *prev_xs, reversed_rhos, sigma, rng = args
+            if constant_term:
+                mu = reversed_rhos[-1] + at.sum(prev_xs * reversed_rhos[:-1], axis=0)
             else:
-                x = at.add(
-                    *(self.rho[i] * value[self.p - (i + 1) : -(i + 1)] for i in range(self.p))
-                )
-            eps = value[self.p :] - x
+                mu = at.sum(prev_xs * reversed_rhos, axis=0)
+            next_rng, new_x = Normal.dist(mu=mu, sigma=sigma, rng=rng).owner.outputs
+            return new_x, {rng: next_rng}
+
+        # We transpose inputs as scan iterates over first dimension
+        innov_, innov_updates_ = aesara.scan(
+            fn=step,
+            outputs_info=[{"initial": init_.T, "taps": range(-ar_order, 0)}],
+            non_sequences=[rhos_bcast_.T[::-1], sigma_.T, noise_rng],
+            n_steps=at.max((0, steps_ - ar_order)),
+            strict=True,
+        )
+        (noise_next_rng,) = tuple(innov_updates_.values())
+        ar_ = at.concatenate([init_, innov_.T], axis=-1)
+
+        ar_op = AutoRegressiveRV(
+            inputs=[rhos_, sigma_, init_, steps_],
+            outputs=[noise_next_rng, ar_],
+            ar_order=ar_order,
+            constant_term=constant_term,
+            inline=True,
+        )
+
+        ar = ar_op(rhos, sigma, init_dist, steps)
+        return ar
 
-        innov_like = Normal.dist(mu=0.0, tau=self.tau).logp(eps)
-        init_like = self.init.logp(value[: self.p])
+    @classmethod
+    def change_size(cls, rv, new_size, expand=False):
+
+        if expand:
+            old_size = rv.shape[:-1]
+            new_size = at.concatenate([new_size, old_size])
+
+        init_dist_rng = rv.owner.inputs[2].owner.inputs[0]
+        noise_rng = rv.owner.inputs[-1]
+
+        op = rv.owner.op
+        return cls.rv_op(
+            *rv.owner.inputs,
+            ar_order=op.ar_order,
+            constant_term=op.constant_term,
+            size=new_size,
+            rngs=(init_dist_rng, noise_rng),
+        )
 
-        return at.sum(innov_like) + at.sum(init_like)
+
+MeasurableVariable.register(AutoRegressiveRV)
+
+
+@_get_measurable_outputs.register(AutoRegressiveRV)
+def _get_measurable_outputs_ar(op, node):
+    # This tells Aeppl that the second output is the measurable one
+    return [node.outputs[1]]
+
+
+@_logprob.register(AutoRegressiveRV)
+def ar_logp(op, values, rhos, sigma, init_dist, steps, noise_rng, **kwargs):
+    (value,) = values
+
+    ar_order = op.ar_order
+    constant_term = op.constant_term
+
+    # Convolve rhos with values
+    if constant_term:
+        expectation = at.add(
+            rhos[..., 0, None],
+            *(
+                rhos[..., i + 1, None] * value[..., ar_order - (i + 1) : -(i + 1)]
+                for i in range(ar_order)
+            ),
+        )
+    else:
+        expectation = at.add(
+            *(
+                rhos[..., i, None] * value[..., ar_order - (i + 1) : -(i + 1)]
+                for i in range(ar_order)
+            )
+        )
+    # Compute and collapse logp across time dimension
+    innov_logp = at.sum(
+        logp(Normal.dist(0, sigma[..., None]), value[..., ar_order:] - expectation), axis=-1
+    )
+    init_logp = logp(init_dist, value[..., :ar_order])
+    if init_dist.owner.op.ndim_supp == 0:
+        init_logp = at.sum(init_logp, axis=-1)
+    return init_logp + innov_logp
+
+
+@_moment.register(AutoRegressiveRV)
+def ar_moment(op, rv, rhos, sigma, init_dist, steps, noise_rng):
+    # Use last entry of init_dist moment as the moment for the whole AR
+    return at.full_like(rv, moment(init_dist)[..., -1, None])
 
 
 class GARCH11(distribution.Continuous):
diff --git a/pymc/tests/test_distributions.py b/pymc/tests/test_distributions.py
index 536e337457..53f2357e26 100644
--- a/pymc/tests/test_distributions.py
+++ b/pymc/tests/test_distributions.py
@@ -60,7 +60,6 @@ def polyagamma_cdf(*args, **kwargs):
 
 from pymc.aesaraf import floatX, intX
 from pymc.distributions import (
-    AR1,
     CAR,
     AsymmetricLaplace,
     Bernoulli,
@@ -834,14 +833,6 @@ def mvt_logpdf(value, nu, Sigma, mu=0):
     return logp_mvt.sum()
 
 
-def AR1_logpdf(value, k, tau_e):
-    tau = tau_e * (1 - k**2)
-    return (
-        sp.norm(loc=0, scale=1 / np.sqrt(tau)).logpdf(value[0])
-        + sp.norm(loc=k * value[:-1], scale=1 / np.sqrt(tau_e)).logpdf(value[1:]).sum()
-    )
-
-
 def invlogit(x, eps=sys.float_info.epsilon):
     return (1.0 - 2.0 * eps) / (1.0 + np.exp(-x)) + eps
 
@@ -2078,11 +2069,6 @@ def test_mvt(self, n):
             extra_args={"size": 2},
         )
 
-    @pytest.mark.parametrize("n", [2, 3, 4])
-    @pytest.mark.xfail(reason="Distribution not refactored yet")
-    def test_AR1(self, n):
-        check_logp(AR1, Vector(R, n), {"k": Unit, "tau_e": Rplus}, AR1_logpdf)
-
     @pytest.mark.parametrize("n", [2, 3])
     def test_wishart(self, n):
         check_logp(
diff --git a/pymc/tests/test_distributions_moments.py b/pymc/tests/test_distributions_moments.py
index f2ac38bd16..230989000c 100644
--- a/pymc/tests/test_distributions_moments.py
+++ b/pymc/tests/test_distributions_moments.py
@@ -100,8 +100,6 @@ def test_all_distributions_have_moments():
 
     # Distributions that have not been refactored for V4 yet
     not_implemented = {
-        dist_module.timeseries.AR,
-        dist_module.timeseries.AR1,
         dist_module.timeseries.GARCH11,
         dist_module.timeseries.MvGaussianRandomWalk,
         dist_module.timeseries.MvStudentTRandomWalk,
diff --git a/pymc/tests/test_distributions_timeseries.py b/pymc/tests/test_distributions_timeseries.py
index c66e197553..d2fe7275d2 100644
--- a/pymc/tests/test_distributions_timeseries.py
+++ b/pymc/tests/test_distributions_timeseries.py
@@ -11,6 +11,7 @@
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
+import aesara
 import numpy as np
 import pytest
 import scipy.stats
@@ -18,16 +19,13 @@
 import pymc as pm
 
 from pymc.aesaraf import floatX
-from pymc.distributions.continuous import Flat, Normal
-from pymc.distributions.timeseries import (
-    AR,
-    AR1,
-    GARCH11,
-    EulerMaruyama,
-    GaussianRandomWalk,
-)
+from pymc.distributions.continuous import Flat, HalfNormal, Normal
+from pymc.distributions.discrete import Constant
+from pymc.distributions.logprob import logp
+from pymc.distributions.multivariate import Dirichlet
+from pymc.distributions.timeseries import AR, GARCH11, EulerMaruyama, GaussianRandomWalk
 from pymc.model import Model
-from pymc.sampling import sample, sample_posterior_predictive
+from pymc.sampling import draw, sample, sample_posterior_predictive
 from pymc.tests.helpers import select_by_precision
 from pymc.tests.test_distributions_moments import assert_moment_is_expected
 from pymc.tests.test_distributions_random import BaseTestDistributionRandom
@@ -166,60 +164,198 @@ def test_moment(self, mu, sigma, init, steps, size, expected):
         assert_moment_is_expected(model, expected)
 
 
-@pytest.mark.xfail(reason="Timeseries not refactored")
-def test_AR():
-    # AR1
-    data = np.array([0.3, 1, 2, 3, 4])
-    phi = np.array([0.99])
-    with Model() as t:
-        y = AR("y", phi, sigma=1, shape=len(data))
-        z = Normal("z", mu=phi * data[:-1], sigma=1, shape=len(data) - 1)
-    ar_like = t["y"].logp({"z": data[1:], "y": data})
-    reg_like = t["z"].logp({"z": data[1:], "y": data})
-    np.testing.assert_allclose(ar_like, reg_like)
+class TestAR:
+    def test_order1_logp(self):
+        data = np.array([0.3, 1, 2, 3, 4])
+        phi = np.array([0.99])
+        with Model() as t:
+            y = AR("y", phi, sigma=1, init_dist=Flat.dist(), shape=len(data))
+            z = Normal("z", mu=phi * data[:-1], sigma=1, shape=len(data) - 1)
+        ar_like = t.compile_logp(y)({"y": data})
+        reg_like = t.compile_logp(z)({"z": data[1:]})
+        np.testing.assert_allclose(ar_like, reg_like)
+
+        with Model() as t_constant:
+            y = AR(
+                "y",
+                np.hstack((0.3, phi)),
+                sigma=1,
+                init_dist=Flat.dist(),
+                shape=len(data),
+                constant=True,
+            )
+            z = Normal("z", mu=0.3 + phi * data[:-1], sigma=1, shape=len(data) - 1)
+        ar_like = t_constant.compile_logp(y)({"y": data})
+        reg_like = t_constant.compile_logp(z)({"z": data[1:]})
+        np.testing.assert_allclose(ar_like, reg_like)
+
+    def test_order2_logp(self):
+        data = np.array([0.3, 1, 2, 3, 4])
+        phi = np.array([0.84, 0.10])
+        with Model() as t:
+            y = AR("y", phi, sigma=1, init_dist=Flat.dist(), shape=len(data))
+            z = Normal(
+                "z", mu=phi[0] * data[1:-1] + phi[1] * data[:-2], sigma=1, shape=len(data) - 2
+            )
+        ar_like = t.compile_logp(y)({"y": data})
+        reg_like = t.compile_logp(z)({"z": data[2:]})
+        np.testing.assert_allclose(ar_like, reg_like)
+
+    @pytest.mark.parametrize("constant", (False, True))
+    def test_batched_size(self, constant):
+        ar_order, steps, batch_size = 3, 100, 5
+        beta_tp = np.random.randn(batch_size, ar_order + int(constant))
+        y_tp = np.random.randn(batch_size, steps)
+        with Model() as t0:
+            y = AR("y", beta_tp, shape=(batch_size, steps), initval=y_tp, constant=constant)
+        with Model() as t1:
+            for i in range(batch_size):
+                AR(f"y_{i}", beta_tp[i], sigma=1.0, shape=steps, initval=y_tp[i], constant=constant)
+
+        assert y.owner.op.ar_order == ar_order
+
+        np.testing.assert_allclose(
+            t0.compile_logp()(t0.initial_point()),
+            t1.compile_logp()(t1.initial_point()),
+        )
 
-    # AR1 and AR(1)
-    with Model() as t:
-        rho = Normal("rho", 0.0, 1.0)
-        y1 = AR1("y1", rho, 1.0, observed=data)
-        y2 = AR("y2", rho, 1.0, init=Normal.dist(0, 1), observed=data)
-    initial_point = t.initial_point()
-    np.testing.assert_allclose(y1.logp(initial_point), y2.logp(initial_point))
+        y_eval = draw(y, draws=2)
+        assert y_eval[0].shape == (batch_size, steps)
+        assert not np.any(np.isclose(y_eval[0], y_eval[1]))
+
+    def test_batched_rhos(self):
+        ar_order, steps, batch_size = 3, 100, 5
+        beta_tp = np.random.randn(batch_size, ar_order)
+        y_tp = np.random.randn(batch_size, steps)
+        with Model() as t0:
+            beta = Normal("beta", 0.0, 1.0, shape=(batch_size, ar_order), initval=beta_tp)
+            AR("y", beta, sigma=1.0, shape=(batch_size, steps), initval=y_tp)
+        with Model() as t1:
+            beta = Normal("beta", 0.0, 1.0, shape=(batch_size, ar_order), initval=beta_tp)
+            for i in range(batch_size):
+                AR(f"y_{i}", beta[i], sigma=1.0, shape=steps, initval=y_tp[i])
+
+        np.testing.assert_allclose(
+            t0.compile_logp()(t0.initial_point()),
+            t1.compile_logp()(t1.initial_point()),
+        )
 
-    # AR1 + constant
-    with Model() as t:
-        y = AR("y", np.hstack((0.3, phi)), sigma=1, shape=len(data), constant=True)
-        z = Normal("z", mu=0.3 + phi * data[:-1], sigma=1, shape=len(data) - 1)
-    ar_like = t["y"].logp({"z": data[1:], "y": data})
-    reg_like = t["z"].logp({"z": data[1:], "y": data})
-    np.testing.assert_allclose(ar_like, reg_like)
-
-    # AR2
-    phi = np.array([0.84, 0.10])
-    with Model() as t:
-        y = AR("y", phi, sigma=1, shape=len(data))
-        z = Normal("z", mu=phi[0] * data[1:-1] + phi[1] * data[:-2], sigma=1, shape=len(data) - 2)
-    ar_like = t["y"].logp({"z": data[2:], "y": data})
-    reg_like = t["z"].logp({"z": data[2:], "y": data})
-    np.testing.assert_allclose(ar_like, reg_like)
+        beta_tp[1] = 0  # Should always be close to zero
+        y_eval = t0["y"].eval({t0["beta"]: beta_tp})
+        assert y_eval.shape == (batch_size, steps)
+        assert np.all(abs(y_eval[1]) < 5)
+
+    def test_batched_sigma(self):
+        ar_order, steps, batch_size = 4, 100, (7, 5)
+        # AR order cannot be inferred from beta_tp because it is not fixed.
+        # We specify it manually below
+        beta_tp = aesara.shared(np.random.randn(ar_order))
+        sigma_tp = np.abs(np.random.randn(*batch_size))
+        y_tp = np.random.randn(*batch_size, steps)
+        with Model() as t0:
+            sigma = HalfNormal("sigma", 1.0, shape=batch_size, initval=sigma_tp)
+            AR(
+                "y",
+                beta_tp,
+                sigma=sigma,
+                size=batch_size,
+                steps=steps,
+                initval=y_tp,
+                ar_order=ar_order,
+            )
+        with Model() as t1:
+            sigma = HalfNormal("beta", 1.0, shape=batch_size, initval=sigma_tp)
+            for i in range(batch_size[0]):
+                for j in range(batch_size[1]):
+                    AR(
+                        f"y_{i}{j}",
+                        beta_tp,
+                        sigma=sigma[i][j],
+                        shape=steps,
+                        initval=y_tp[i, j],
+                        ar_order=ar_order,
+                    )
+
+        # Check logp shape
+        sigma_logp, y_logp = t0.compile_logp(sum=False)(t0.initial_point())
+        assert tuple(y_logp.shape) == batch_size
+
+        np.testing.assert_allclose(
+            sigma_logp.sum() + y_logp.sum(),
+            t1.compile_logp()(t1.initial_point()),
+        )
 
+        beta_tp.set_value(np.zeros((ar_order,)))  # Should always be close to zero
+        sigma_tp = np.full(batch_size, [0.01, 0.1, 1, 10, 100])
+        y_eval = t0["y"].eval({t0["sigma"]: sigma_tp})
+        assert y_eval.shape == (*batch_size, steps)
+        assert np.allclose(y_eval.std(axis=(0, 2)), [0.01, 0.1, 1, 10, 100], rtol=0.1)
+
+    def test_batched_init_dist(self):
+        ar_order, steps, batch_size = 3, 100, 5
+        beta_tp = aesara.shared(np.random.randn(ar_order), shape=(3,))
+        y_tp = np.random.randn(batch_size, steps)
+        with Model() as t0:
+            init_dist = Normal.dist(0.0, 0.01, size=(batch_size, ar_order))
+            AR("y", beta_tp, sigma=0.01, init_dist=init_dist, steps=steps, initval=y_tp)
+        with Model() as t1:
+            for i in range(batch_size):
+                AR(f"y_{i}", beta_tp, sigma=0.01, shape=steps, initval=y_tp[i])
+
+        np.testing.assert_allclose(
+            t0.compile_logp()(t0.initial_point()),
+            t1.compile_logp()(t1.initial_point()),
+        )
 
-@pytest.mark.xfail(reason="Timeseries not refactored")
-def test_AR_nd():
-    # AR2 multidimensional
-    p, T, n = 3, 100, 5
-    beta_tp = np.random.randn(p, n)
-    y_tp = np.random.randn(T, n)
-    with Model() as t0:
-        beta = Normal("beta", 0.0, 1.0, shape=(p, n), initval=beta_tp)
-        AR("y", beta, sigma=1.0, shape=(T, n), initval=y_tp)
-
-    with Model() as t1:
-        beta = Normal("beta", 0.0, 1.0, shape=(p, n), initval=beta_tp)
-        for i in range(n):
-            AR("y_%d" % i, beta[:, i], sigma=1.0, shape=T, initval=y_tp[:, i])
-
-    np.testing.assert_allclose(t0.logp(t0.initial_point()), t1.logp(t1.initial_point()))
+        # Next values should keep close to previous ones
+        beta_tp.set_value(np.full((ar_order,), 1 / ar_order))
+        # Init dist is cloned when creating the AR, so the original variable is not
+        # part of the AR graph. We retrieve the one actually used manually
+        init_dist = t0["y"].owner.inputs[2]
+        init_dist_tp = np.full((batch_size, ar_order), (np.arange(batch_size) * 100)[:, None])
+        y_eval = t0["y"].eval({init_dist: init_dist_tp})
+        assert y_eval.shape == (batch_size, steps)
+        assert np.allclose(
+            y_eval[:, -10:].mean(-1), np.arange(batch_size) * 100, rtol=0.1, atol=0.5
+        )
+
+    def test_constant_random(self):
+        x = AR.dist(
+            rho=[100, 0, 0],
+            sigma=0.1,
+            init_dist=Normal.dist(-100.0, sigma=0.1),
+            constant=True,
+            shape=(6,),
+        )
+        x_eval = x.eval()
+        assert np.allclose(x_eval[:2], -100, rtol=0.1)
+        assert np.allclose(x_eval[2:], 100, rtol=0.1)
+
+    def test_multivariate_init_dist(self):
+        init_dist = Dirichlet.dist(a=np.full((5, 2), [1, 10]))
+        x = AR.dist(rho=[0, 0], init_dist=init_dist, steps=0)
+
+        x_eval = x.eval()
+        assert x_eval.shape == (5, 2)
+
+        init_dist_eval = init_dist.eval()
+        init_dist_logp_eval = logp(init_dist, init_dist_eval).eval()
+        x_logp_eval = logp(x, init_dist_eval).eval()
+        assert x_logp_eval.shape == (5,)
+        assert np.allclose(x_logp_eval, init_dist_logp_eval)
+
+    @pytest.mark.parametrize(
+        "size, expected",
+        [
+            (None, np.full((2, 7), [[2.0], [4.0]])),
+            ((5, 2), np.full((5, 2, 7), [[2.0], [4.0]])),
+        ],
+    )
+    def test_moment(self, size, expected):
+        with Model() as model:
+            init_dist = Constant.dist([[1.0, 2.0], [3.0, 4.0]])
+            AR("x", rho=[0, 0], init_dist=init_dist, steps=7, size=size)
+        assert_moment_is_expected(model, expected, check_finite_logp=False)
 
 
 @pytest.mark.xfail(reason="Timeseries not refactored")

From 850795643351cf1bd93098fb6d571bd12144a1fd Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Thu, 5 May 2022 11:22:28 +0200
Subject: [PATCH 24/26] Rename `pandas_to_array` to `convert_observed_data`

---
 pymc/aesaraf.py                   |  9 +++------
 pymc/data.py                      |  6 +++---
 pymc/distributions/shape_utils.py |  4 ++--
 pymc/model.py                     |  6 +++---
 pymc/tests/test_aesaraf.py        | 10 +++++-----
 5 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/pymc/aesaraf.py b/pymc/aesaraf.py
index e2de6d9fe2..0767bee29d 100644
--- a/pymc/aesaraf.py
+++ b/pymc/aesaraf.py
@@ -81,16 +81,13 @@
     "set_at_rng",
     "at_rng",
     "take_along_axis",
-    "pandas_to_array",
+    "convert_observed_data",
 ]
 
 
-def pandas_to_array(data):
-    """Convert a pandas object to a NumPy array.
+def convert_observed_data(data):
+    """Convert user provided dataset to accepted formats."""
 
-    XXX: When `data` is a generator, this will return an Aesara tensor!
-
-    """
     if hasattr(data, "to_numpy") and hasattr(data, "isnull"):
         # typically, but not limited to pandas objects
         vals = data.to_numpy()
diff --git a/pymc/data.py b/pymc/data.py
index a240712982..0f1a54d09c 100644
--- a/pymc/data.py
+++ b/pymc/data.py
@@ -33,7 +33,7 @@
 
 import pymc as pm
 
-from pymc.aesaraf import pandas_to_array
+from pymc.aesaraf import convert_observed_data
 
 __all__ = [
     "get_data",
@@ -636,9 +636,9 @@ def Data(
         )
     name = model.name_for(name)
 
-    # `pandas_to_array` takes care of parameter `value` and
+    # `convert_observed_data` takes care of parameter `value` and
     # transforms it to something digestible for Aesara.
-    arr = pandas_to_array(value)
+    arr = convert_observed_data(value)
 
     if mutable is None:
         major, minor = (int(v) for v in pm.__version__.split(".")[:2])
diff --git a/pymc/distributions/shape_utils.py b/pymc/distributions/shape_utils.py
index 8110b3d280..23c9237199 100644
--- a/pymc/distributions/shape_utils.py
+++ b/pymc/distributions/shape_utils.py
@@ -26,7 +26,7 @@
 from aesara.tensor.var import TensorVariable
 from typing_extensions import TypeAlias
 
-from pymc.aesaraf import pandas_to_array
+from pymc.aesaraf import convert_observed_data
 
 __all__ = [
     "to_tuple",
@@ -558,7 +558,7 @@ def resize_from_observed(
         Observations as numpy array or `Variable`.
     """
     if not hasattr(observed, "shape"):
-        observed = pandas_to_array(observed)
+        observed = convert_observed_data(observed)
     ndim_resize = observed.ndim - ndim_implied
     resize_shape = tuple(observed.shape[d] for d in range(ndim_resize))
     return resize_shape, observed
diff --git a/pymc/model.py b/pymc/model.py
index d13091be69..47ecabc732 100644
--- a/pymc/model.py
+++ b/pymc/model.py
@@ -50,10 +50,10 @@
 
 from pymc.aesaraf import (
     compile_pymc,
+    convert_observed_data,
     gradient,
     hessian,
     inputvars,
-    pandas_to_array,
     rvs_to_value_vars,
 )
 from pymc.blocking import DictToArrayBijection, RaveledVars
@@ -1158,7 +1158,7 @@ def set_data(
 
         if isinstance(values, list):
             values = np.array(values)
-        values = pandas_to_array(values)
+        values = convert_observed_data(values)
         dims = self.RV_dims.get(name, None) or ()
         coords = coords or {}
 
@@ -1290,7 +1290,7 @@ def make_obs_var(
 
         """
         name = rv_var.name
-        data = pandas_to_array(data).astype(rv_var.dtype)
+        data = convert_observed_data(data).astype(rv_var.dtype)
 
         if data.ndim != rv_var.ndim:
             raise ShapeError(
diff --git a/pymc/tests/test_aesaraf.py b/pymc/tests/test_aesaraf.py
index 709f21c6ef..6b032ff2c6 100644
--- a/pymc/tests/test_aesaraf.py
+++ b/pymc/tests/test_aesaraf.py
@@ -38,8 +38,8 @@
     _conversion_map,
     change_rv_size,
     compile_pymc,
+    convert_observed_data,
     extract_obs_data,
-    pandas_to_array,
     rvs_to_value_vars,
     take_along_axis,
     walk_model,
@@ -413,9 +413,9 @@ def test_extract_obs_data():
 
 
 @pytest.mark.parametrize("input_dtype", ["int32", "int64", "float32", "float64"])
-def test_pandas_to_array(input_dtype):
+def test_convert_observed_data(input_dtype):
     """
-    Ensure that pandas_to_array returns the dense array, masked array,
+    Ensure that convert_observed_data returns the dense array, masked array,
     graph variable, TensorVariable, or sparse matrix as appropriate.
     """
     pd = pytest.importorskip("pandas")
@@ -437,7 +437,7 @@ def test_pandas_to_array(input_dtype):
     square_generator = (np.array([i**2], dtype=int) for i in range(100))
 
     # Alias the function to be tested
-    func = pandas_to_array
+    func = convert_observed_data
 
     #####
     # Perform the various tests
@@ -496,7 +496,7 @@ def test_pandas_to_array(input_dtype):
 def test_pandas_to_array_pandas_index():
     pd = pytest.importorskip("pandas")
     data = pd.Index([1, 2, 3])
-    result = pandas_to_array(data)
+    result = convert_observed_data(data)
     expected = np.array([1, 2, 3])
     np.testing.assert_array_equal(result, expected)
 

From be6fa5cab18a4106eedcb4508c5fb131d48aa734 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Thu, 5 May 2022 11:31:54 +0200
Subject: [PATCH 25/26] Obtain step information from dims and observed

---
 pymc/distributions/timeseries.py            | 112 ++++++++++++++------
 pymc/tests/test_distributions_timeseries.py |  77 +++++++++++++-
 2 files changed, 154 insertions(+), 35 deletions(-)

diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py
index 7649d0069d..67ff11fcd2 100644
--- a/pymc/distributions/timeseries.py
+++ b/pymc/distributions/timeseries.py
@@ -13,7 +13,7 @@
 #   limitations under the License.
 import warnings
 
-from typing import Optional, Tuple, Union
+from typing import Any, Optional, Tuple, Union
 
 import aesara
 import aesara.tensor as at
@@ -31,13 +31,20 @@
 from aesara.tensor.random.op import RandomVariable
 from aesara.tensor.random.utils import normalize_size_param
 
-from pymc.aesaraf import change_rv_size, floatX, intX
+from pymc.aesaraf import change_rv_size, convert_observed_data, floatX, intX
 from pymc.distributions import distribution, multivariate
 from pymc.distributions.continuous import Flat, Normal, get_tau_sigma
 from pymc.distributions.dist_math import check_parameters
 from pymc.distributions.distribution import SymbolicDistribution, _moment, moment
 from pymc.distributions.logprob import ignore_logprob, logp
-from pymc.distributions.shape_utils import Shape, rv_size_is_none, to_tuple
+from pymc.distributions.shape_utils import (
+    Dims,
+    Shape,
+    convert_dims,
+    rv_size_is_none,
+    to_tuple,
+)
+from pymc.model import modelcontext
 from pymc.util import check_dist_not_registered
 
 __all__ = [
@@ -50,12 +57,15 @@
 ]
 
 
-def get_steps_from_shape(
+def get_steps(
     steps: Optional[Union[int, np.ndarray, TensorVariable]],
-    shape: Optional[Shape],
+    *,
+    shape: Optional[Shape] = None,
+    dims: Optional[Dims] = None,
+    observed: Optional[Any] = None,
     step_shape_offset: int = 0,
 ):
-    """Extract number of steps from shape information
+    """Extract number of steps from shape / dims / observed information
 
     Parameters
     ----------
@@ -63,38 +73,45 @@ def get_steps_from_shape(
         User specified steps for timeseries distribution
     shape:
         User specified shape for timeseries distribution
+    dims:
+        User specified dims for timeseries distribution
+    observed:
+        User specified observed data from timeseries distribution
     step_shape_offset:
         Difference between last shape dimension and number of steps in timeseries
         distribution, defaults to 0
 
-    Raises
-    ------
-    ValueError
-        If neither shape nor steps are provided
-
     Returns
     -------
     steps
         Steps, if specified directly by user, or inferred from the last dimension of
-        shape. When both steps and shape are provided, a symbolic Assert is added
-        to make sure they are consistent.
+        shape / dims / observed. When two sources of step information are provided,
+        a symbolic Assert is added to ensure they are consistent.
     """
-    steps_from_shape = None
+    inferred_steps = None
     if shape is not None:
         shape = to_tuple(shape)
         if shape[-1] is not ...:
-            steps_from_shape = shape[-1] - step_shape_offset
-    if steps is None:
-        if steps_from_shape is not None:
-            steps = steps_from_shape
-        else:
-            raise ValueError("Must specify steps or shape parameter")
-    elif steps_from_shape is not None:
-        # Assert that steps and shape are consistent
-        steps = Assert(msg="Steps do not match last shape dimension")(
-            steps, at.eq(steps, steps_from_shape)
+            inferred_steps = shape[-1] - step_shape_offset
+
+    if inferred_steps is None and dims is not None:
+        dims = convert_dims(dims)
+        if dims[-1] is not ...:
+            model = modelcontext(None)
+            inferred_steps = model.dim_lengths[dims[-1]] - step_shape_offset
+
+    if inferred_steps is None and observed is not None:
+        observed = convert_observed_data(observed)
+        inferred_steps = observed.shape[-1] - step_shape_offset
+
+    if inferred_steps is None:
+        inferred_steps = steps
+    # If there are two sources of information for the steps, assert they are consistent
+    elif steps is not None:
+        inferred_steps = Assert(msg="Steps do not match last shape dimension")(
+            inferred_steps, at.eq(inferred_steps, steps)
         )
-    return steps
+    return inferred_steps
 
 
 class GaussianRandomWalkRV(RandomVariable):
@@ -212,26 +229,38 @@ class GaussianRandomWalk(distribution.Continuous):
 
         .. warning:: init will be cloned, rendering them independent of the ones passed as input.
 
-    steps : int
-        Number of steps in Gaussian Random Walks (steps > 0).
+    steps : int, optional
+        Number of steps in Gaussian Random Walk (steps > 0). Only needed if size is
+        used to specify distribution
     """
 
     rv_op = gaussianrandomwalk
 
-    def __new__(cls, name, mu=0.0, sigma=1.0, init=None, steps=None, **kwargs):
-        if init is not None:
-            check_dist_not_registered(init)
-        return super().__new__(cls, name, mu, sigma, init, steps, **kwargs)
+    def __new__(cls, *args, steps=None, **kwargs):
+        steps = get_steps(
+            steps=steps,
+            shape=None,  # Shape will be checked in `cls.dist`
+            dims=kwargs.get("dims", None),
+            observed=kwargs.get("observed", None),
+            step_shape_offset=1,
+        )
+        return super().__new__(cls, *args, steps=steps, **kwargs)
 
     @classmethod
     def dist(
-        cls, mu=0.0, sigma=1.0, init=None, steps=None, size=None, **kwargs
+        cls, mu=0.0, sigma=1.0, *, init=None, steps=None, size=None, **kwargs
     ) -> at.TensorVariable:
 
         mu = at.as_tensor_variable(floatX(mu))
         sigma = at.as_tensor_variable(floatX(sigma))
 
-        steps = get_steps_from_shape(steps, kwargs.get("shape", None), step_shape_offset=1)
+        steps = get_steps(
+            steps=steps,
+            shape=kwargs.get("shape", None),
+            step_shape_offset=1,
+        )
+        if steps is None:
+            raise ValueError("Must specify steps or shape parameter")
         steps = at.as_tensor_variable(intX(steps))
 
         # If no scalar distribution is passed then initialize with a Normal of same mu and sigma
@@ -245,6 +274,7 @@ def dist(
                 and init.owner.op.ndim_supp == 0
             ):
                 raise TypeError("init must be a univariate distribution variable")
+            check_dist_not_registered(init)
 
         # Ignores logprob of init var because that's accounted for in the logp method
         init = ignore_logprob(init)
@@ -340,6 +370,9 @@ class AR(SymbolicDistribution):
     ar_order: int, optional
         Order of the AR process. Inferred from length of the last dimension of rho, if
         possible. ar_order = rho.shape[-1] if constant else rho.shape[-1] - 1
+    steps : int, optional
+        Number of steps in AR process (steps > 0). Only needed if size is used to
+        specify distribution
 
     Notes
     -----
@@ -360,6 +393,15 @@ class AR(SymbolicDistribution):
 
     """
 
+    def __new__(cls, *args, steps=None, **kwargs):
+        steps = get_steps(
+            steps=steps,
+            shape=None,  # Shape will be checked in `cls.dist`
+            dims=kwargs.get("dims", None),
+            observed=kwargs.get("observed", None),
+        )
+        return super().__new__(cls, *args, steps=steps, **kwargs)
+
     @classmethod
     def dist(
         cls,
@@ -384,7 +426,9 @@ def dist(
             )
             init_dist = kwargs["init"]
 
-        steps = get_steps_from_shape(steps, kwargs.get("shape", None))
+        steps = get_steps(steps=steps, shape=kwargs.get("shape", None))
+        if steps is None:
+            raise ValueError("Must specify steps or shape parameter")
         steps = at.as_tensor_variable(intX(steps), ndim=0)
 
         if ar_order is None:
diff --git a/pymc/tests/test_distributions_timeseries.py b/pymc/tests/test_distributions_timeseries.py
index d2fe7275d2..d6df46d61f 100644
--- a/pymc/tests/test_distributions_timeseries.py
+++ b/pymc/tests/test_distributions_timeseries.py
@@ -16,6 +16,8 @@
 import pytest
 import scipy.stats
 
+from aesara.tensor import TensorVariable
+
 import pymc as pm
 
 from pymc.aesaraf import floatX
@@ -23,7 +25,13 @@
 from pymc.distributions.discrete import Constant
 from pymc.distributions.logprob import logp
 from pymc.distributions.multivariate import Dirichlet
-from pymc.distributions.timeseries import AR, GARCH11, EulerMaruyama, GaussianRandomWalk
+from pymc.distributions.timeseries import (
+    AR,
+    GARCH11,
+    EulerMaruyama,
+    GaussianRandomWalk,
+    get_steps,
+)
 from pymc.model import Model
 from pymc.sampling import draw, sample, sample_posterior_predictive
 from pymc.tests.helpers import select_by_precision
@@ -31,6 +39,61 @@
 from pymc.tests.test_distributions_random import BaseTestDistributionRandom
 
 
+@pytest.mark.parametrize(
+    "steps, shape, step_shape_offset, expected_steps, consistent",
+    [
+        (10, None, 0, 10, True),
+        (10, None, 1, 10, True),
+        (None, (10,), 0, 10, True),
+        (None, (10,), 1, 9, True),
+        (None, (10, 5), 0, 5, True),
+        (None, (10, ...), 0, None, True),
+        (None, None, 0, None, True),
+        (10, (10,), 0, 10, True),
+        (10, (11,), 1, 10, True),
+        (10, (5, ...), 1, 10, True),
+        (10, (5, 5), 0, 5, False),
+        (10, (5, 10), 1, 9, False),
+    ],
+)
+@pytest.mark.parametrize("info_source", ("shape", "dims", "observed"))
+def test_get_steps(info_source, steps, shape, step_shape_offset, expected_steps, consistent):
+    if info_source == "shape":
+        inferred_steps = get_steps(steps=steps, shape=shape, step_shape_offset=step_shape_offset)
+
+    elif info_source == "dims":
+        if shape is None:
+            dims = None
+            coords = {}
+        else:
+            dims = tuple(str(i) if shape is not ... else ... for i, shape in enumerate(shape))
+            coords = {str(i): range(shape) for i, shape in enumerate(shape) if shape is not ...}
+        with Model(coords=coords):
+            inferred_steps = get_steps(steps=steps, dims=dims, step_shape_offset=step_shape_offset)
+
+    elif info_source == "observed":
+        if shape is None:
+            observed = None
+        else:
+            if ... in shape:
+                # There is no equivalent to implied dims in observed
+                return
+            observed = np.zeros(shape)
+        inferred_steps = get_steps(
+            steps=steps, observed=observed, step_shape_offset=step_shape_offset
+        )
+
+    if not isinstance(inferred_steps, TensorVariable):
+        assert inferred_steps == expected_steps
+    else:
+        if consistent:
+            assert inferred_steps.eval() == expected_steps
+        else:
+            assert inferred_steps.owner.inputs[0].eval() == expected_steps
+            with pytest.raises(AssertionError, match="Steps do not match"):
+                inferred_steps.eval()
+
+
 class TestGaussianRandomWalk:
     class TestGaussianRandomWalkRandom(BaseTestDistributionRandom):
         # Override default size for test class
@@ -127,6 +190,18 @@ def test_inconsistent_steps_and_shape(self):
         with pytest.raises(AssertionError, match="Steps do not match last shape dimension"):
             x = GaussianRandomWalk.dist(steps=12, shape=45)
 
+    def test_inferred_steps_from_dims(self):
+        with pm.Model(coords={"batch": range(5), "steps": range(20)}):
+            x = GaussianRandomWalk("x", dims=("batch", "steps"))
+        steps = x.owner.inputs[-1]
+        assert steps.eval() == 19
+
+    def test_inferred_steps_from_observed(self):
+        with pm.Model():
+            x = GaussianRandomWalk("x", observed=np.zeros(10))
+        steps = x.owner.inputs[-1]
+        assert steps.eval() == 9
+
     @pytest.mark.parametrize(
         "init",
         [

From 255c8e99813030e42918f018294df77e15cec3be Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Thu, 5 May 2022 15:09:06 +0200
Subject: [PATCH 26/26] Make AR steps extend shape beyond initial_dist

This is consistent with the meaning of steps in the GaussianRandomWalk and translates directly to the number of scan steps taken
---
 pymc/distributions/timeseries.py            | 72 +++++++++++++--------
 pymc/tests/test_distributions_timeseries.py |  6 +-
 2 files changed, 48 insertions(+), 30 deletions(-)

diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py
index 67ff11fcd2..d57d5b170f 100644
--- a/pymc/distributions/timeseries.py
+++ b/pymc/distributions/timeseries.py
@@ -393,14 +393,19 @@ class AR(SymbolicDistribution):
 
     """
 
-    def __new__(cls, *args, steps=None, **kwargs):
+    def __new__(cls, name, rho, *args, steps=None, constant=False, ar_order=None, **kwargs):
+        rhos = at.atleast_1d(at.as_tensor_variable(floatX(rho)))
+        ar_order = cls._get_ar_order(rhos=rhos, constant=constant, ar_order=ar_order)
         steps = get_steps(
             steps=steps,
             shape=None,  # Shape will be checked in `cls.dist`
             dims=kwargs.get("dims", None),
             observed=kwargs.get("observed", None),
+            step_shape_offset=ar_order,
+        )
+        return super().__new__(
+            cls, name, rhos, *args, steps=steps, constant=constant, ar_order=ar_order, **kwargs
         )
-        return super().__new__(cls, *args, steps=steps, **kwargs)
 
     @classmethod
     def dist(
@@ -426,34 +431,12 @@ def dist(
             )
             init_dist = kwargs["init"]
 
-        steps = get_steps(steps=steps, shape=kwargs.get("shape", None))
+        ar_order = cls._get_ar_order(rhos=rhos, constant=constant, ar_order=ar_order)
+        steps = get_steps(steps=steps, shape=kwargs.get("shape", None), step_shape_offset=ar_order)
         if steps is None:
             raise ValueError("Must specify steps or shape parameter")
         steps = at.as_tensor_variable(intX(steps), ndim=0)
 
-        if ar_order is None:
-            # If ar_order is not specified we do constant folding on the shape of rhos
-            # to retrieve it. For example, this will detect that
-            # Normal(size=(5, 3)).shape[-1] == 3, which is not known by Aesara before.
-            shape_fg = FunctionGraph(
-                outputs=[rhos.shape[-1]],
-                features=[ShapeFeature()],
-                clone=True,
-            )
-            (folded_shape,) = optimize_graph(shape_fg, custom_opt=topo_constant_folding).outputs
-            folded_shape = getattr(folded_shape, "data", None)
-            if folded_shape is None:
-                raise ValueError(
-                    "Could not infer ar_order from last dimension of rho. Pass it "
-                    "explictily or make sure rho have a static shape"
-                )
-            ar_order = int(folded_shape) - int(constant)
-            if ar_order < 1:
-                raise ValueError(
-                    "Inferred ar_order is smaller than 1. Increase the last dimension "
-                    "of rho or remove constant_term"
-                )
-
         if init_dist is not None:
             if not isinstance(init_dist, TensorVariable) or not isinstance(
                 init_dist.owner.op, RandomVariable
@@ -477,6 +460,41 @@ def dist(
 
         return super().dist([rhos, sigma, init_dist, steps, ar_order, constant], **kwargs)
 
+    @classmethod
+    def _get_ar_order(cls, rhos: TensorVariable, ar_order: Optional[int], constant: bool) -> int:
+        """Compute ar_order given inputs
+
+        If ar_order is not specified we do constant folding on the shape of rhos
+        to retrieve it. For example, this will detect that
+        Normal(size=(5, 3)).shape[-1] == 3, which is not known by Aesara before.
+
+        Raises
+        ------
+        ValueError
+            If inferred ar_order cannot be inferred from rhos or if it is less than 1
+        """
+        if ar_order is None:
+            shape_fg = FunctionGraph(
+                outputs=[rhos.shape[-1]],
+                features=[ShapeFeature()],
+                clone=True,
+            )
+            (folded_shape,) = optimize_graph(shape_fg, custom_opt=topo_constant_folding).outputs
+            folded_shape = getattr(folded_shape, "data", None)
+            if folded_shape is None:
+                raise ValueError(
+                    "Could not infer ar_order from last dimension of rho. Pass it "
+                    "explictily or make sure rho have a static shape"
+                )
+            ar_order = int(folded_shape) - int(constant)
+            if ar_order < 1:
+                raise ValueError(
+                    "Inferred ar_order is smaller than 1. Increase the last dimension "
+                    "of rho or remove constant_term"
+                )
+
+        return ar_order
+
     @classmethod
     def num_rngs(cls, *args, **kwargs):
         return 2
@@ -540,7 +558,7 @@ def step(*args):
             fn=step,
             outputs_info=[{"initial": init_.T, "taps": range(-ar_order, 0)}],
             non_sequences=[rhos_bcast_.T[::-1], sigma_.T, noise_rng],
-            n_steps=at.max((0, steps_ - ar_order)),
+            n_steps=steps_,
             strict=True,
         )
         (noise_next_rng,) = tuple(innov_updates_.values())
diff --git a/pymc/tests/test_distributions_timeseries.py b/pymc/tests/test_distributions_timeseries.py
index d6df46d61f..eadafc2d63 100644
--- a/pymc/tests/test_distributions_timeseries.py
+++ b/pymc/tests/test_distributions_timeseries.py
@@ -363,7 +363,7 @@ def test_batched_sigma(self):
         beta_tp.set_value(np.zeros((ar_order,)))  # Should always be close to zero
         sigma_tp = np.full(batch_size, [0.01, 0.1, 1, 10, 100])
         y_eval = t0["y"].eval({t0["sigma"]: sigma_tp})
-        assert y_eval.shape == (*batch_size, steps)
+        assert y_eval.shape == (*batch_size, steps + ar_order)
         assert np.allclose(y_eval.std(axis=(0, 2)), [0.01, 0.1, 1, 10, 100], rtol=0.1)
 
     def test_batched_init_dist(self):
@@ -389,7 +389,7 @@ def test_batched_init_dist(self):
         init_dist = t0["y"].owner.inputs[2]
         init_dist_tp = np.full((batch_size, ar_order), (np.arange(batch_size) * 100)[:, None])
         y_eval = t0["y"].eval({init_dist: init_dist_tp})
-        assert y_eval.shape == (batch_size, steps)
+        assert y_eval.shape == (batch_size, steps + ar_order)
         assert np.allclose(
             y_eval[:, -10:].mean(-1), np.arange(batch_size) * 100, rtol=0.1, atol=0.5
         )
@@ -429,7 +429,7 @@ def test_multivariate_init_dist(self):
     def test_moment(self, size, expected):
         with Model() as model:
             init_dist = Constant.dist([[1.0, 2.0], [3.0, 4.0]])
-            AR("x", rho=[0, 0], init_dist=init_dist, steps=7, size=size)
+            AR("x", rho=[0, 0], init_dist=init_dist, steps=5, size=size)
         assert_moment_is_expected(model, expected, check_finite_logp=False)