From 39e2ee74ae7be850510de2fecdedb58d24c6941c Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Mon, 12 Dec 2022 17:30:08 +0300
Subject: [PATCH 01/43] remove deprecated modules from pytensor.graph

---
 pytensor/graph/kanren.py    | 10 ----------
 pytensor/graph/opt.py       | 29 -----------------------------
 pytensor/graph/opt_utils.py | 29 -----------------------------
 pytensor/graph/optdb.py     | 29 -----------------------------
 pytensor/graph/toolbox.py   |  9 ---------
 pytensor/graph/unify.py     | 10 ----------
 6 files changed, 116 deletions(-)
 delete mode 100644 pytensor/graph/kanren.py
 delete mode 100644 pytensor/graph/opt.py
 delete mode 100644 pytensor/graph/opt_utils.py
 delete mode 100644 pytensor/graph/optdb.py
 delete mode 100644 pytensor/graph/toolbox.py
 delete mode 100644 pytensor/graph/unify.py

diff --git a/pytensor/graph/kanren.py b/pytensor/graph/kanren.py
deleted file mode 100644
index 29dfce1dc1..0000000000
--- a/pytensor/graph/kanren.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import warnings
-
-
-warnings.warn(
-    "The module `pytensor.graph.kanren` is deprecated; use `pytensor.graph.rewriting.kanren` instead.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-from pytensor.graph.rewriting.kanren import *  # noqa: F401 E402 F403
diff --git a/pytensor/graph/opt.py b/pytensor/graph/opt.py
deleted file mode 100644
index c769cdfbc6..0000000000
--- a/pytensor/graph/opt.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import warnings
-
-
-warnings.warn(
-    "The module `pytensor.graph.opt` is deprecated; use `pytensor.graph.rewriting.basic` instead.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-from pytensor.graph.rewriting.basic import *  # noqa: F401 E402 F403
-from pytensor.graph.rewriting.basic import DEPRECATED_NAMES  # noqa: F401 E402 F403
-
-
-def __getattr__(name):
-    """Intercept module-level attribute access of deprecated symbols.
-
-    Adapted from https://stackoverflow.com/a/55139609/3006474.
-
-    """
-    global DEPRECATED_NAMES
-
-    from warnings import warn
-
-    for old_name, msg, old_object in DEPRECATED_NAMES:
-        if name == old_name:
-            warn(msg, DeprecationWarning, stacklevel=2)
-            return old_object
-
-    raise AttributeError(f"module {__name__} has no attribute {name}")
diff --git a/pytensor/graph/opt_utils.py b/pytensor/graph/opt_utils.py
deleted file mode 100644
index 8f07be9f12..0000000000
--- a/pytensor/graph/opt_utils.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import warnings
-
-
-warnings.warn(
-    "The module `pytensor.graph.opt_utils` is deprecated; use `pytensor.graph.rewriting.utils` instead.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-from pytensor.graph.rewriting.utils import *  # noqa: F401 E402 F403
-from pytensor.graph.rewriting.utils import DEPRECATED_NAMES  # noqa: F401 E402 F403
-
-
-def __getattr__(name):
-    """Intercept module-level attribute access of deprecated symbols.
-
-    Adapted from https://stackoverflow.com/a/55139609/3006474.
-
-    """
-    global DEPRECATED_NAMES
-
-    from warnings import warn
-
-    for old_name, msg, old_object in DEPRECATED_NAMES:
-        if name == old_name:
-            warn(msg, DeprecationWarning, stacklevel=2)
-            return old_object
-
-    raise AttributeError(f"module {__name__} has no attribute {name}")
diff --git a/pytensor/graph/optdb.py b/pytensor/graph/optdb.py
deleted file mode 100644
index af1ff47686..0000000000
--- a/pytensor/graph/optdb.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import warnings
-
-
-warnings.warn(
-    "The module `pytensor.graph.optdb` is deprecated; use `pytensor.graph.rewriting.db` instead.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-from pytensor.graph.rewriting.db import *  # noqa: F401 E402 F403
-from pytensor.graph.rewriting.db import DEPRECATED_NAMES  # noqa: F401 E402 F403
-
-
-def __getattr__(name):
-    """Intercept module-level attribute access of deprecated symbols.
-
-    Adapted from https://stackoverflow.com/a/55139609/3006474.
-
-    """
-    global DEPRECATED_NAMES
-
-    from warnings import warn
-
-    for old_name, msg, old_object in DEPRECATED_NAMES:
-        if name == old_name:
-            warn(msg, DeprecationWarning, stacklevel=2)
-            return old_object
-
-    raise AttributeError(f"module {__name__} has no attribute {name}")
diff --git a/pytensor/graph/toolbox.py b/pytensor/graph/toolbox.py
deleted file mode 100644
index 7ab36616cf..0000000000
--- a/pytensor/graph/toolbox.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import warnings
-
-
-warnings.warn(
-    "The module `pytensor.graph.toolbox` is deprecated "
-    "and has been renamed to `pytensor.graph.features`",
-    DeprecationWarning,
-    stacklevel=2,
-)
diff --git a/pytensor/graph/unify.py b/pytensor/graph/unify.py
deleted file mode 100644
index db9b40c7e4..0000000000
--- a/pytensor/graph/unify.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import warnings
-
-
-warnings.warn(
-    "The module `pytensor.graph.unify` is deprecated; use `pytensor.graph.rewriting.unify` instead.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-from pytensor.graph.rewriting.unify import *  # noqa: F401 E402 F403

From 0a0d00b15f8725e49788cdfca336c6e54e47a553 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Mon, 12 Dec 2022 17:40:58 +0300
Subject: [PATCH 02/43] clean up pytensor.graph.sched

---
 pytensor/graph/sched.py | 283 ----------------------------------------
 pytensor/tensor/io.py   |   2 +-
 pytensor/utils.py       |  11 ++
 3 files changed, 12 insertions(+), 284 deletions(-)
 delete mode 100644 pytensor/graph/sched.py

diff --git a/pytensor/graph/sched.py b/pytensor/graph/sched.py
deleted file mode 100644
index 7e9743f022..0000000000
--- a/pytensor/graph/sched.py
+++ /dev/null
@@ -1,283 +0,0 @@
-from collections import defaultdict
-
-from pytensor.graph.basic import list_of_nodes
-from pytensor.utils import cmp
-
-
-# {{{ http://code.activestate.com/recipes/578231/ (r1)
-# Copyright (c) Oren Tirosh 2012
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-
-def memodict(f):
-    """
-    Memoization decorator for a function taking a single argument.
-
-    """
-
-    class memodict(defaultdict):
-        def __missing__(self, key):
-            ret = self[key] = f(key)
-            return ret
-
-    return memodict().__getitem__
-
-
-# end of http://code.activestate.com/recipes/578231/ }}}
-
-
-def make_depends():
-    @memodict
-    def depends(pair):
-        """
-        Returns True if a depends on b.
-
-        """
-        a, b = pair
-        return any(bout in a.inputs for bout in b.outputs) or any(
-            depends((ainp.owner, b)) for ainp in a.inputs if ainp.owner
-        )
-
-    return depends
-
-
-def make_dependence_cmp():
-    """
-    Create a comparator to represent the dependence of nodes in a graph.
-
-    """
-    depends = make_depends()
-
-    def dependence(a, b):
-        """
-        A cmp function for nodes in a graph - does a depend on b?
-
-        Returns
-        -------
-        int
-            Positive number if a depends on b, negative number
-            if b depends on a, 0 otherwise.
-
-        """
-        if depends((a, b)):
-            return 1
-        if depends((b, a)):
-            return -1
-        return 0
-
-    return dependence
-
-
-def reverse_dict(d):
-    """
-    Reverses direction of dependence dict.
-
-    Notes
-    -----
-    dict order is not deterministic. As we iterate on the
-    input dict, it makes the output of this function depend on the
-    dict order. So this function output order should be considered
-    as undeterministic.
-
-    Examples
-    --------
-    >>> d = {'a': (1, 2), 'b': (2, 3), 'c':()}
-    >>> reverse_dict(d)
-    {1: ('a',), 2: ('a', 'b'), 3: ('b',)}
-
-    """
-    result = {}
-    for key in d:
-        for val in d[key]:
-            result[val] = result.get(val, tuple()) + (key,)
-    return result
-
-
-def _toposort(edges):
-    """
-    Topological sort algorithm by Kahn [1] - O(nodes + vertices).
-
-    Parameters
-    ----------
-    edges
-        A dict of the form {a: {b, c}} where b and c depend on a.
-
-    Returns
-    -------
-    L : list
-        An ordered list of nodes that satisfy the dependencies of edges.
-
-    Closely follows the wikipedia page [2]
-
-    References
-    ----------
-    [1] Kahn, Arthur B. (1962), "Topological sorting of large networks",
-    Communications of the ACM
-    [2] http://en.wikipedia.org/wiki/Toposort#Algorithms
-
-    Examples
-    --------
-    >>> _toposort({1: {2, 3}, 2: (3, )})
-    [1, 2, 3]
-
-    """
-    incoming_edges = reverse_dict(edges)
-    incoming_edges = {k: set(val) for k, val in incoming_edges.items()}
-    S = {v for v in edges if v not in incoming_edges}
-    L = []
-
-    while S:
-        n = S.pop()
-        L.append(n)
-        for m in edges.get(n, ()):
-            assert n in incoming_edges[m]
-            incoming_edges[m].remove(n)
-            if not incoming_edges[m]:
-                S.add(m)
-    if any(incoming_edges.get(v, None) for v in edges):
-        raise ValueError("Input has cycles")
-    return L
-
-
-def posort(nodes, *cmps):
-    """
-    Partially ordered sort with multiple comparators.
-
-    Given a list of comparators, orders the elements in `nodes` so that the
-    comparators are satisfied as much as possible giving precedence to
-    earlier comparators.
-
-    Parameters
-    ----------
-    nodes
-        An iterable of nodes in a graph.
-    cmps
-        A sequence of comparator functions that describe which nodes should
-        come before which others.
-
-    Returns
-    -------
-    list
-        A list of nodes which satisfy the comparators as much as possible.
-
-    Notes
-    -----
-    Implemented with _toposort.
-
-    Examples
-    --------
-    >>> lower_tens = lambda a, b: a/10 - b/10 # prefer lower numbers div 10
-    >>> prefer evens = lambda a, b: a%2 - b%2 # prefer even numbers
-    >>> posort(list(range(20)), lower_tens, prefer_evens)
-    [0, 8, 2, 4, 6, 1, 3, 5, 7, 9, 16, 18, 10, 12, 14, 17, 19, 11, 13, 15]
-
-    """
-    comes_before = {a: set() for a in nodes}
-    comes_after = {a: set() for a in nodes}
-
-    def add_links(a, b):  # b depends on a
-        comes_after[a].add(b)
-        comes_after[a].update(comes_after[b])
-        for c in comes_before[a]:
-            comes_after[c].update(comes_after[a])
-        comes_before[b].add(a)
-        comes_before[b].update(comes_before[a])
-        for c in comes_after[b]:
-            comes_before[c].update(comes_before[b])
-
-    def check():
-        """
-        Tests for cycles in manufactured edges.
-
-        """
-        for a in nodes:
-            for b in nodes:
-                assert not (b in comes_after[a] and a in comes_after[b])
-
-    for cmp_fn in cmps:
-        for a in nodes:
-            for b in nodes:
-                if cmp_fn(a, b) < 0:  # a wants to come before b
-                    # if this wouldn't cause a cycle and isn't already known
-                    if b not in comes_before[a] and b not in comes_after[a]:
-                        add_links(a, b)
-    # check() # debug code
-
-    return _toposort(comes_after)
-
-
-def sort_apply_nodes(inputs, outputs, cmps):
-    """
-    Order a graph of apply nodes according to a list of comparators.
-
-    The following example sorts first by dependence of nodes (this is a
-    topological sort) and then by lexicographical ordering (nodes that start
-    with 'E' come before nodes that start with 'I' if there is no dependence.
-
-    Examples
-    --------
-    >>> from pytensor.graph.basic import sort_apply_nodes, dependence
-    >>> from pytensor.tensor.type import matrix
-    >>> from pytensor.tensor.math import dot
-    >>> x = matrix('x')
-    >>> y = dot(x*2, x+1)
-    >>> str_cmp = lambda a, b: cmp(str(a), str(b)) # lexicographical sort
-    >>> sort_apply_nodes([x], [y], cmps=[dependence, str_cmp])
-    [Elemwise{add,no_inplace}(x, InplaceDimShuffle{x,x}.0),
-     InplaceDimShuffle{x,x}(TensorConstant{2}),
-     Elemwise{mul,no_inplace}(x, InplaceDimShuffle{x,x}.0),
-     InplaceDimShuffle{x,x}(TensorConstant{1}),
-     dot(Elemwise{mul,no_inplace}.0, Elemwise{add,no_inplace}.0)]
-
-    """
-    return posort(list_of_nodes(inputs, outputs), *cmps)
-
-
-def sort_schedule_fn(*cmps):
-    """
-    Make a schedule function from comparators.
-
-    See Also
-    --------
-    sort_apply_nodes
-
-    """
-    dependence = make_dependence_cmp()
-    cmps = (dependence,) + cmps
-
-    def schedule(fgraph):
-        """
-        Order nodes in a FunctionGraph.
-
-        """
-        return sort_apply_nodes(fgraph.inputs, fgraph.outputs, cmps)
-
-    return schedule
-
-
-def key_to_cmp(key):
-    """
-    comparator function based on "key" function
-    """
-
-    def key_cmp(a, b):
-        return cmp(key(a), key(b))
-
-    return key_cmp
diff --git a/pytensor/tensor/io.py b/pytensor/tensor/io.py
index 0984d67d2a..c9be517a9c 100644
--- a/pytensor/tensor/io.py
+++ b/pytensor/tensor/io.py
@@ -2,9 +2,9 @@
 
 from pytensor.graph.basic import Apply, Constant, Variable
 from pytensor.graph.op import Op
-from pytensor.graph.sched import key_to_cmp
 from pytensor.link.c.type import Generic
 from pytensor.tensor.type import tensor
+from pytensor.utils import key_to_cmp
 
 
 class LoadFromDisk(Op):
diff --git a/pytensor/utils.py b/pytensor/utils.py
index 613d39ad16..8b78cec4ae 100644
--- a/pytensor/utils.py
+++ b/pytensor/utils.py
@@ -111,6 +111,17 @@ def cmp(x, y):
     return (x > y) - (x < y)
 
 
+def key_to_cmp(key):
+    """
+    comparator function based on "key" function
+    """
+
+    def key_cmp(a, b):
+        return cmp(key(a), key(b))
+
+    return key_cmp
+
+
 def get_unbound_function(unbound):
     # Op.make_thunk isn't bound, so don't have a __func__ attr.
     # But bound method, have a __func__ method that point to the

From c66d89d11210b1c17f242484b08fe9b011ef228a Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Tue, 13 Dec 2022 15:51:20 +0300
Subject: [PATCH 03/43] remove MPI

---
 doc/library/tensor/io.rst           |   5 -
 pytensor/tensor/io.py               | 228 +---------------------------
 tests/link/test_link.py             |  26 +---
 tests/tensor/_test_mpi_roundtrip.py |  64 --------
 tests/tensor/test_mpi.py            | 108 -------------
 5 files changed, 3 insertions(+), 428 deletions(-)
 delete mode 100644 tests/tensor/_test_mpi_roundtrip.py
 delete mode 100644 tests/tensor/test_mpi.py

diff --git a/doc/library/tensor/io.rst b/doc/library/tensor/io.rst
index 0cfe91cb31..40a4bb9cf8 100644
--- a/doc/library/tensor/io.rst
+++ b/doc/library/tensor/io.rst
@@ -12,11 +12,6 @@ File operation
 
 - Load from disk with the function :func:`load <pytensor.tensor.io.load>` and its associated op :class:`LoadFromDisk <pytensor.tensor.io.LoadFromDisk>`
 
-MPI operation
-=============
-- Non-blocking transfer: :func:`isend <pytensor.tensor.io.isend>` and :func:`irecv <pytensor.tensor.io.irecv>`.
-- Blocking transfer: :func:`send <pytensor.tensor.io.send>` and :func:`recv <pytensor.tensor.io.recv>`
-
 Details
 =======
 
diff --git a/pytensor/tensor/io.py b/pytensor/tensor/io.py
index c9be517a9c..911be9bf79 100644
--- a/pytensor/tensor/io.py
+++ b/pytensor/tensor/io.py
@@ -1,10 +1,9 @@
 import numpy as np
 
-from pytensor.graph.basic import Apply, Constant, Variable
+from pytensor.graph.basic import Apply, Constant
 from pytensor.graph.op import Op
 from pytensor.link.c.type import Generic
 from pytensor.tensor.type import tensor
-from pytensor.utils import key_to_cmp
 
 
 class LoadFromDisk(Op):
@@ -92,229 +91,4 @@ def load(path, dtype, shape, mmap_mode=None):
     return LoadFromDisk(dtype, shape, mmap_mode)(path)
 
 
-##########################
-# MPI
-##########################
-
-try:
-    from mpi4py import MPI
-except ImportError:
-    mpi_enabled = False
-else:
-    comm = MPI.COMM_WORLD
-    mpi_enabled = True
-
-
-class MPIRecv(Op):
-    """
-    An operation to asynchronously receive an array to a remote host using MPI.
-
-    See Also
-    --------
-    MPIRecv
-    MPIWait
-
-    Notes
-    -----
-    Non-differentiable.
-
-    """
-
-    __props__ = ("source", "tag", "shape", "dtype")
-
-    def __init__(self, source, tag, shape, dtype):
-        self.source = source
-        self.tag = tag
-        self.shape = shape
-        self.dtype = np.dtype(dtype)  # turn "float64" into numpy.float64
-        self.static_shape = (None,) * len(shape)
-
-    def make_node(self):
-        return Apply(
-            self,
-            [],
-            [
-                Variable(Generic(), None),
-                tensor(dtype=self.dtype, shape=self.static_shape),
-            ],
-        )
-
-    def perform(self, node, inp, out):
-
-        data = np.zeros(self.shape, dtype=self.dtype)
-        request = comm.Irecv(data, self.source, self.tag)
-
-        out[0][0] = request
-        out[1][0] = data
-
-    def __str__(self):
-        return f"MPIRecv{{source: {int(self.source)}, tag: {int(self.tag)}, shape: {self.shape}, dtype: {self.dtype}}}"
-
-    def infer_shape(self, fgraph, node, shapes):
-        return [None, self.shape]
-
-    def do_constant_folding(self, fgraph, node):
-        return False
-
-
-class MPIRecvWait(Op):
-    """
-    An operation to wait on a previously received array using MPI.
-
-    See Also
-    --------
-    MPIRecv
-
-    Notes
-    -----
-    Non-differentiable.
-
-    """
-
-    __props__ = ("tag",)
-
-    def __init__(self, tag):
-        self.tag = tag
-
-    def make_node(self, request, data):
-        return Apply(
-            self,
-            [request, data],
-            [tensor(dtype=data.dtype, shape=data.type.shape)],
-        )
-
-    def perform(self, node, inp, out):
-
-        request = inp[0]
-        data = inp[1]
-
-        request.wait()
-
-        out[0][0] = data
-
-    def infer_shape(self, fgraph, node, shapes):
-        return [shapes[1]]
-
-    view_map = {0: [1]}
-
-
-class MPISend(Op):
-    """
-    An operation to asynchronously Send an array to a remote host using MPI.
-
-    See Also
-    --------
-    MPIRecv
-    MPISendWait
-
-    Notes
-    -----
-    Non-differentiable.
-
-    """
-
-    __props__ = ("dest", "tag")
-
-    def __init__(self, dest, tag):
-        self.dest = dest
-        self.tag = tag
-
-    def make_node(self, data):
-        return Apply(self, [data], [Variable(Generic(), None), data.type()])
-
-    view_map = {1: [0]}
-
-    def perform(self, node, inp, out):
-
-        data = inp[0]
-
-        request = comm.Isend(data, self.dest, self.tag)
-
-        out[0][0] = request
-        out[1][0] = data
-
-    def __str__(self):
-        return f"MPISend{{dest: {int(self.dest)}, tag: {int(self.tag)}}}"
-
-
-class MPISendWait(Op):
-    """
-    An operation to wait on a previously sent array using MPI.
-
-    See Also
-    --------
-    MPISend
-
-    Notes
-    -----
-    Non-differentiable.
-
-    """
-
-    __props__ = ("tag",)
-
-    def __init__(self, tag):
-        self.tag = tag
-
-    def make_node(self, request, data):
-        return Apply(self, [request, data], [Variable(Generic(), None)])
-
-    def perform(self, node, inp, out):
-        request = inp[0]
-        request.wait()
-        out[0][0] = True
-
-
-def isend(var, dest, tag):
-    """
-    Non blocking send.
-    """
-    return MPISend(dest, tag)(var)
-
-
-def send(var, dest, tag):
-    """
-    Blocking send.
-    """
-    return MPISendWait(tag)(*isend(var, dest, tag))
-
-
-def irecv(shape, dtype, source, tag):
-    """
-    Non-blocking receive.
-    """
-    return MPIRecv(source, tag, shape, dtype)()
-
-
-def recv(shape, dtype, source, tag):
-    """
-    Blocking receive.
-    """
-    return MPIRecvWait(tag)(*irecv(shape, dtype, source, tag))
-
-
-# Ordering keys for scheduling
-def mpi_send_wait_key(a):
-    """Wait as long as possible on Waits, Start Send/Recvs early."""
-    if isinstance(a.op, (MPIRecvWait, MPISendWait)):
-        return 1
-    if isinstance(a.op, (MPIRecv, MPISend)):
-        return -1
-    return 0
-
-
-def mpi_tag_key(a):
-    """Break MPI ties by using the variable tag - prefer lower tags first."""
-    if isinstance(a.op, (MPISend, MPIRecv, MPIRecvWait, MPISendWait)):
-        return a.op.tag
-    else:
-        return 0
-
-
-mpi_send_wait_cmp = key_to_cmp(mpi_send_wait_key)
-mpi_tag_cmp = key_to_cmp(mpi_tag_key)
-
-mpi_keys = (mpi_send_wait_key, mpi_tag_key)
-mpi_cmps = (mpi_send_wait_cmp, mpi_tag_cmp)
-
 __all__ = ["load"]
diff --git a/tests/link/test_link.py b/tests/link/test_link.py
index 6dfe846f63..acc96a164e 100644
--- a/tests/link/test_link.py
+++ b/tests/link/test_link.py
@@ -4,15 +4,13 @@
 import numpy as np
 
 import pytensor
-from pytensor.compile.mode import Mode
 from pytensor.graph import fg
 from pytensor.graph.basic import Apply, Constant, Variable, clone
 from pytensor.graph.op import Op
 from pytensor.graph.type import Type
 from pytensor.link.basic import Container, Linker, PerformLinker, WrapLinker
-from pytensor.link.c.basic import OpWiseCLinker
-from pytensor.tensor.type import matrix, scalar
-from pytensor.utils import cmp, to_return_values
+from pytensor.tensor.type import scalar
+from pytensor.utils import to_return_values
 
 
 def make_function(linker: Linker, unpack_single: bool = True, **kwargs) -> Callable:
@@ -219,26 +217,6 @@ def wrap(fgraph, i, node, th):
         assert o[0].data == 1.5
 
 
-def test_sort_schedule_fn():
-    from pytensor.graph.sched import make_depends, sort_schedule_fn
-
-    x = matrix("x")
-    y = pytensor.tensor.dot(x[:5] * 2, x.T + 1).T
-
-    def str_cmp(a, b):
-        return cmp(str(a), str(b))  # lexicographical sort
-
-    linker = OpWiseCLinker(schedule=sort_schedule_fn(str_cmp))
-    mode = Mode(linker=linker)
-    f = pytensor.function((x,), (y,), mode=mode)
-
-    nodes = f.maker.linker.make_all()[-1]
-    depends = make_depends()
-    for a, b in zip(nodes[:-1], nodes[1:]):
-        if not depends((b, a)):
-            assert str(a) < str(b)
-
-
 def test_container_deepcopy():
     # This is a test to a work around a NumPy bug.
 
diff --git a/tests/tensor/_test_mpi_roundtrip.py b/tests/tensor/_test_mpi_roundtrip.py
deleted file mode 100644
index 8a4492fa85..0000000000
--- a/tests/tensor/_test_mpi_roundtrip.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Run using
-# mpiexec -np 2 python _test_mpi_roundtrip.py
-
-from sys import exit, stderr, stdout
-
-import numpy as np
-from mpi4py import MPI
-
-import pytensor
-from pytensor.configdefaults import config
-from pytensor.graph.sched import sort_schedule_fn
-from pytensor.tensor.io import mpi_cmps, recv, send
-from pytensor.tensor.type import matrix
-
-
-comm = MPI.COMM_WORLD
-
-rank = comm.Get_rank()
-size = comm.Get_size()
-
-if size != 2:
-    stderr.write(
-        "mpiexec failed to create a world with two nodes.\n"
-        "Closing with success message."
-    )
-    stdout.write("True")
-    exit(0)
-
-shape = (2, 2)
-dtype = "float32"
-
-scheduler = sort_schedule_fn(*mpi_cmps)
-mode = pytensor.compile.mode.Mode(
-    optimizer=None, linker=pytensor.link.c.basic.OpWiseCLinker(schedule=scheduler)
-)
-
-with config.change_flags(compute_test_value="off"):
-    if rank == 0:
-        x = matrix("x", dtype=dtype)
-        y = x + 1
-        send_request = send(y, 1, 11)
-
-        z = recv(shape, dtype, 1, 12)
-
-        f = pytensor.function([x], [send_request, z], mode=mode)
-
-        xx = np.random.random(shape).astype(dtype)
-        expected = (xx + 1) * 2
-
-        _, zz = f(xx)
-
-        same = np.linalg.norm(zz - expected) < 0.001
-        # The parent test will look for "True" in the output
-        stdout.write(str(same))
-
-    if rank == 1:
-
-        y = recv(shape, dtype, 0, 11)
-        z = y * 2
-        send_request = send(z, 0, 12)
-
-        f = pytensor.function([], send_request, mode=mode)
-
-        f()
diff --git a/tests/tensor/test_mpi.py b/tests/tensor/test_mpi.py
deleted file mode 100644
index 9628006f98..0000000000
--- a/tests/tensor/test_mpi.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import os
-import subprocess
-
-import pytest
-
-import pytensor
-from pytensor.compile.mode import Mode
-from pytensor.configdefaults import config
-from pytensor.graph.sched import sort_schedule_fn
-from pytensor.link.c.basic import OpWiseCLinker
-from pytensor.tensor.io import (
-    MPISend,
-    MPISendWait,
-    mpi_cmps,
-    mpi_enabled,
-    mpi_send_wait_cmp,
-    recv,
-    send,
-)
-from pytensor.tensor.type import matrix
-
-
-mpi_scheduler = sort_schedule_fn(*mpi_cmps)
-mpi_linker = OpWiseCLinker(schedule=mpi_scheduler)
-mpi_mode = Mode(linker=mpi_linker)
-
-
-@config.change_flags(compute_test_value="off")
-def test_recv():
-    x = recv((10, 10), "float64", 0, 11)
-    assert x.dtype == "float64"
-    assert x.broadcastable == (False, False)
-
-    recvnode = x.owner.inputs[0].owner
-    assert recvnode.op.source == 0
-    assert recvnode.op.tag == 11
-
-
-def test_send():
-    x = matrix("x")
-    y = send(x, 1, 11)
-    sendnode = y.owner.inputs[0].owner
-    assert sendnode.op.dest == 1
-    assert sendnode.op.tag == 11
-
-
-@config.change_flags(compute_test_value="off")
-def test_can_make_function():
-    x = recv((5, 5), "float32", 0, 11)
-    y = x + 1
-    assert pytensor.function([], [y])
-
-
-@pytest.mark.skipif(not mpi_enabled, reason="MPI not enabled")
-def test_mpi_roundtrip():
-    pytensor_root = pytensor.__file__.split("__init__")[0]
-    env = os.environ.copy()
-    flags = env.get("PYTENSOR_FLAGS", "")
-    keep_flags = ",".join(
-        f for f in flags.split(",") if not f.startswith("init_gpu_device")
-    )
-    env["PYTENSOR_FLAGS"] = keep_flags
-    p = subprocess.Popen(
-        "mpiexec -np 2 python " + pytensor_root + "tensor/tests/_test_mpi_roundtrip.py",
-        stdin=subprocess.PIPE,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        shell=True,
-        close_fds=True,
-        env=env,
-    )
-    (stdout, stderr) = p.communicate()
-
-    result = stdout.decode()
-    assert "True" in result, stderr.decode()
-
-
-def test_mpi_send_wait_cmp():
-    x = matrix("x")
-    y = send(x, 1, 11)
-    z = x + x
-    waitnode = y.owner
-    sendnode = y.owner.inputs[0].owner
-    addnode = z.owner
-    assert mpi_send_wait_cmp(sendnode, addnode) < 0  # send happens first
-    assert mpi_send_wait_cmp(waitnode, addnode) > 0  # wait happens last
-
-
-@config.change_flags(compute_test_value="off")
-def test_mpi_tag_ordering():
-    x = recv((2, 2), "float32", 1, 12)
-    y = recv((2, 2), "float32", 1, 11)
-    z = recv((2, 2), "float32", 1, 13)
-    f = pytensor.function([], [x, y, z], mode=mpi_mode)
-    nodes = f.maker.linker.make_all()[-1]
-
-    assert all(node.op.tag == tag for node, tag in zip(nodes, (11, 12, 13, 11, 12, 13)))
-
-
-def test_mpi_schedule():
-    x = matrix("x")
-    y = send(x, 1, 11)
-    z = x + x
-
-    f = pytensor.function([x], [y, z], mode=mpi_mode)
-    nodes = f.maker.linker.make_all()[-1]
-    optypes = [MPISend, pytensor.tensor.elemwise.Elemwise, MPISendWait]
-    assert all(isinstance(node.op, optype) for node, optype in zip(nodes, optypes))

From 112ee6eb7aa9a7c428c6949e9f1068a54156efae Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Tue, 13 Dec 2022 15:52:37 +0300
Subject: [PATCH 04/43] remove test sched

---
 tests/graph/test_sched.py | 102 --------------------------------------
 1 file changed, 102 deletions(-)
 delete mode 100644 tests/graph/test_sched.py

diff --git a/tests/graph/test_sched.py b/tests/graph/test_sched.py
deleted file mode 100644
index ffcabc4b65..0000000000
--- a/tests/graph/test_sched.py
+++ /dev/null
@@ -1,102 +0,0 @@
-from pytensor.graph.basic import io_toposort
-from pytensor.graph.sched import (
-    _toposort,
-    make_dependence_cmp,
-    posort,
-    reverse_dict,
-    sort_apply_nodes,
-)
-from pytensor.tensor.math import dot
-from pytensor.tensor.type import matrix
-from pytensor.utils import cmp
-
-
-def test_dependence():
-    dependence = make_dependence_cmp()
-
-    x = matrix("x")
-    y = dot(x * 2, x + 1)
-    nodes = io_toposort([x], [y])
-
-    for a, b in zip(nodes[:-1], nodes[1:]):
-        assert dependence(a, b) <= 0
-
-
-def test_sort_apply_nodes():
-    x = matrix("x")
-    y = dot(x * 2, x + 1)
-
-    def str_cmp(a, b):
-        return cmp(str(a), str(b))  # lexicographical sort
-
-    nodes = sort_apply_nodes([x], [y], cmps=[str_cmp])
-
-    for a, b in zip(nodes[:-1], nodes[1:]):
-        assert str(a) <= str(b)
-
-
-def test_reverse_dict():
-    d = {"a": (1, 2), "b": (2, 3), "c": ()}
-    # Python 3.3 enable by default random hash for dict.
-    # This change the order of traversal, so this can give 2 outputs
-    assert reverse_dict(d) == {1: ("a",), 2: ("a", "b"), 3: ("b",)} or reverse_dict(
-        d
-    ) == {1: ("a",), 2: ("b", "a"), 3: ("b",)}
-
-
-def test__toposort():
-    edges = {
-        1: {4, 6, 7},
-        2: {4, 6, 7},
-        3: {5, 7},
-        4: {6, 7},
-        5: {7},
-    }
-    order = _toposort(edges)
-    assert not any(
-        a in edges.get(b, ()) for i, a in enumerate(order) for b in order[i:]
-    )
-
-
-def test_posort_easy():
-    nodes = "asdfghjkl"
-
-    def mycmp(a, b):
-        if a < b:
-            return -1
-        elif a > b:
-            return 1
-        else:
-            return 0
-
-    assert posort(nodes, mycmp) == list("adfghjkls")
-
-
-def test_posort():
-    l = list(range(1, 20))
-    cmps = [
-        lambda a, b: a % 10 - b % 10,
-        lambda a, b: (a / 10) % 2 - (b / 10) % 2,
-        lambda a, b: a - b,
-    ]
-    assert posort(l, *cmps) == [
-        10,
-        1,
-        11,
-        2,
-        12,
-        3,
-        13,
-        4,
-        14,
-        5,
-        15,
-        6,
-        16,
-        7,
-        17,
-        8,
-        18,
-        9,
-        19,
-    ]

From a585a3cfdb5348f99edb03755f4c5bf224ec5624 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Tue, 13 Dec 2022 16:18:30 +0300
Subject: [PATCH 05/43] remove test for deprecated import

---
 tests/graph/rewriting/test_basic.py | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/tests/graph/rewriting/test_basic.py b/tests/graph/rewriting/test_basic.py
index 241313c957..14a55f14f1 100644
--- a/tests/graph/rewriting/test_basic.py
+++ b/tests/graph/rewriting/test_basic.py
@@ -1,5 +1,3 @@
-import sys
-
 import pytest
 
 from pytensor.configdefaults import config
@@ -832,20 +830,3 @@ def perform(self, *args):
         local_rewriter_2,
         local_rewriter_1,
     ]
-
-
-def test_deprecations():
-    """Make sure we can import deprecated classes from current and deprecated modules."""
-    with pytest.deprecated_call():
-        from pytensor.graph.rewriting.basic import GlobalOptimizer
-
-    with pytest.deprecated_call():
-        from pytensor.graph.opt import (  # noqa: F401 F811
-            GlobalOptimizer,
-            LocalOptimizer,
-        )
-
-    del sys.modules["pytensor.graph.opt"]
-
-    with pytest.deprecated_call():
-        from pytensor.graph.opt import GraphRewriter  # noqa: F401

From 936b4b84781d7afd41b43cc21cf1038e661d8f03 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Tue, 13 Dec 2022 18:04:30 +0300
Subject: [PATCH 06/43] remove more deprecated modules

---
 pytensor/tensor/basic_opt.py                  | 13 -------------
 pytensor/tensor/math_opt.py                   | 10 ----------
 pytensor/tensor/opt_uncanonicalize.py         | 10 ----------
 pytensor/tensor/subtensor_opt.py              | 10 ----------
 tests/graph/rewriting/test_db.py              | 16 ----------------
 tests/graph/rewriting/test_kanren.py          |  6 ------
 tests/graph/rewriting/test_unify.py           |  6 ------
 tests/graph/rewriting/test_utils.py           | 18 ------------------
 tests/tensor/rewriting/test_basic.py          |  9 ---------
 tests/tensor/rewriting/test_math.py           |  6 ------
 tests/tensor/rewriting/test_subtensor.py      |  8 --------
 tests/tensor/rewriting/test_uncanonicalize.py |  9 ---------
 12 files changed, 121 deletions(-)
 delete mode 100644 pytensor/tensor/basic_opt.py
 delete mode 100644 pytensor/tensor/math_opt.py
 delete mode 100644 pytensor/tensor/opt_uncanonicalize.py
 delete mode 100644 pytensor/tensor/subtensor_opt.py

diff --git a/pytensor/tensor/basic_opt.py b/pytensor/tensor/basic_opt.py
deleted file mode 100644
index 39404b5239..0000000000
--- a/pytensor/tensor/basic_opt.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import warnings
-
-
-warnings.warn(
-    "The module `pytensor.tensor.basic_opt` is deprecated; use `pytensor.tensor.rewriting.basic` instead.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-from pytensor.tensor.rewriting.basic import *  # noqa: F401 E402 F403
-from pytensor.tensor.rewriting.elemwise import *  # noqa: F401 E402 F403
-from pytensor.tensor.rewriting.extra_ops import *  # noqa: F401 E402 F403
-from pytensor.tensor.rewriting.shape import *  # noqa: F401 E402 F403
diff --git a/pytensor/tensor/math_opt.py b/pytensor/tensor/math_opt.py
deleted file mode 100644
index 79477cacde..0000000000
--- a/pytensor/tensor/math_opt.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import warnings
-
-
-warnings.warn(
-    "The module `pytensor.tensor.math_opt` is deprecated; use `pytensor.tensor.rewriting.math` instead.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-from pytensor.tensor.rewriting.math import *  # noqa: F401 E402 F403
diff --git a/pytensor/tensor/opt_uncanonicalize.py b/pytensor/tensor/opt_uncanonicalize.py
deleted file mode 100644
index 1625f6cd31..0000000000
--- a/pytensor/tensor/opt_uncanonicalize.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import warnings
-
-
-warnings.warn(
-    "The module `pytensor.tensor.opt_uncanonicalize` is deprecated; use `pytensor.tensor.rewriting.uncanonicalize` instead.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-from pytensor.tensor.rewriting.uncanonicalize import *  # noqa: F401 E402 F403
diff --git a/pytensor/tensor/subtensor_opt.py b/pytensor/tensor/subtensor_opt.py
deleted file mode 100644
index a91482f756..0000000000
--- a/pytensor/tensor/subtensor_opt.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import warnings
-
-
-warnings.warn(
-    "The module `pytensor.tensor.subtensor_opt` is deprecated; use `pytensor.tensor.rewriting.subtensor` instead.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-from pytensor.tensor.rewriting.subtensor import *  # noqa: F401 E402 F403
diff --git a/tests/graph/rewriting/test_db.py b/tests/graph/rewriting/test_db.py
index c8864124df..ec790dbfe2 100644
--- a/tests/graph/rewriting/test_db.py
+++ b/tests/graph/rewriting/test_db.py
@@ -1,5 +1,3 @@
-import sys
-
 import pytest
 
 from pytensor.graph.rewriting.basic import GraphRewriter, SequentialGraphRewriter
@@ -86,17 +84,3 @@ def test_LocalGroupDB(self):
     def test_ProxyDB(self):
         with pytest.raises(TypeError, match=r"`db` must be.*"):
             ProxyDB(object())
-
-
-def test_deprecations():
-    """Make sure we can import deprecated classes from current and deprecated modules."""
-    with pytest.deprecated_call():
-        from pytensor.graph.rewriting.db import OptimizationDatabase  # noqa: F401 F811
-
-    with pytest.deprecated_call():
-        from pytensor.graph.optdb import OptimizationDatabase  # noqa: F401 F811
-
-    del sys.modules["pytensor.graph.optdb"]
-
-    with pytest.deprecated_call():
-        from pytensor.graph.optdb import RewriteDatabase  # noqa: F401
diff --git a/tests/graph/rewriting/test_kanren.py b/tests/graph/rewriting/test_kanren.py
index 73cc010b4a..7c2bced1de 100644
--- a/tests/graph/rewriting/test_kanren.py
+++ b/tests/graph/rewriting/test_kanren.py
@@ -165,9 +165,3 @@ def distributes(in_lv, out_lv):
     assert expr_opt.owner.inputs[1].owner.op == at.add
     assert isinstance(expr_opt.owner.inputs[1].owner.inputs[0].owner.op, Dot)
     assert isinstance(expr_opt.owner.inputs[1].owner.inputs[1].owner.op, Dot)
-
-
-def test_deprecations():
-    """Make sure we can import deprecated classes from current and deprecated modules."""
-    with pytest.deprecated_call():
-        from pytensor.graph.kanren import KanrenRelationSub  # noqa: F401 F811
diff --git a/tests/graph/rewriting/test_unify.py b/tests/graph/rewriting/test_unify.py
index a831bc038d..100e0cc565 100644
--- a/tests/graph/rewriting/test_unify.py
+++ b/tests/graph/rewriting/test_unify.py
@@ -350,9 +350,3 @@ def constraint(x):
     res = convert_strs_to_vars((val,))
     assert isinstance(res[0], Constant)
     assert np.array_equal(res[0].data, val)
-
-
-def test_deprecations():
-    """Make sure we can import deprecated classes from current and deprecated modules."""
-    with pytest.deprecated_call():
-        from pytensor.graph.unify import eval_if_etuple  # noqa: F401 F811
diff --git a/tests/graph/rewriting/test_utils.py b/tests/graph/rewriting/test_utils.py
index 3bd2c0be0a..798b2edc9c 100644
--- a/tests/graph/rewriting/test_utils.py
+++ b/tests/graph/rewriting/test_utils.py
@@ -1,7 +1,3 @@
-import sys
-
-import pytest
-
 from pytensor.graph.fg import FunctionGraph
 from pytensor.graph.rewriting.basic import graph_rewriter
 from pytensor.graph.rewriting.utils import is_same_graph, rewrite_graph
@@ -160,17 +156,3 @@ def custom_rewrite(fgraph):
     )
 
     assert x_rewritten.outputs[0] is y
-
-
-def test_deprecations():
-    """Make sure we can import deprecated classes from current and deprecated modules."""
-    with pytest.deprecated_call():
-        from pytensor.graph.rewriting.utils import optimize_graph  # noqa: F401 F811
-
-    with pytest.deprecated_call():
-        from pytensor.graph.opt_utils import optimize_graph  # noqa: F401 F811
-
-    del sys.modules["pytensor.graph.opt_utils"]
-
-    with pytest.deprecated_call():
-        from pytensor.graph.opt_utils import rewrite_graph  # noqa: F401
diff --git a/tests/tensor/rewriting/test_basic.py b/tests/tensor/rewriting/test_basic.py
index 1cf2edec04..f28c95037f 100644
--- a/tests/tensor/rewriting/test_basic.py
+++ b/tests/tensor/rewriting/test_basic.py
@@ -1884,12 +1884,3 @@ def test_misc(self):
         x_val = np.random.random((1, 5)).astype(self.dtype)
         exp_res = np.broadcast_to(x_val, (5, 5))[..., None] + y_val
         assert np.array_equal(func(y_val, x_val), exp_res)
-
-
-def test_deprecations():
-    """Make sure we can import from deprecated modules."""
-    with pytest.deprecated_call():
-        from pytensor.tensor.basic_opt import register_useless  # noqa: F401 F811
-
-    with pytest.deprecated_call():
-        from pytensor.tensor.rewriting.basic import ShapeFeature  # noqa: F401
diff --git a/tests/tensor/rewriting/test_math.py b/tests/tensor/rewriting/test_math.py
index 0c589d8519..3f508af79b 100644
--- a/tests/tensor/rewriting/test_math.py
+++ b/tests/tensor/rewriting/test_math.py
@@ -4625,12 +4625,6 @@ def test_local_useless_conj():
     assert any(node.op == _conj for node in f.maker.fgraph.apply_nodes)
 
 
-def test_deprecations():
-    """Make sure we can import from deprecated modules."""
-    with pytest.deprecated_call():
-        from pytensor.tensor.math_opt import AlgebraicCanonizer  # noqa: F401 F811
-
-
 def test_local_sub_neg_to_add():
     x = scalar("x")
     y = vector("y")
diff --git a/tests/tensor/rewriting/test_subtensor.py b/tests/tensor/rewriting/test_subtensor.py
index b2d2395828..1a59a07bde 100644
--- a/tests/tensor/rewriting/test_subtensor.py
+++ b/tests/tensor/rewriting/test_subtensor.py
@@ -2166,14 +2166,6 @@ def test_local_join_subtensors(axis, slices_fn, expected_nodes):
     np.testing.assert_array_equal(f(x_val, stop_val), f_val)
 
 
-def test_deprecations():
-    """Make sure we can import from deprecated modules."""
-    with pytest.deprecated_call():
-        from pytensor.tensor.subtensor_opt import (  # noqa: F401 F811
-            get_advsubtensor_axis,
-        )
-
-
 def test_local_uint_constant_indices():
     mode = get_default_mode().including("specialize", "local_uint_constant_indices")
     rng = np.random.default_rng(20900)
diff --git a/tests/tensor/rewriting/test_uncanonicalize.py b/tests/tensor/rewriting/test_uncanonicalize.py
index 867bdc8a46..6a38a0c132 100644
--- a/tests/tensor/rewriting/test_uncanonicalize.py
+++ b/tests/tensor/rewriting/test_uncanonicalize.py
@@ -1,5 +1,4 @@
 import numpy as np
-import pytest
 
 import pytensor
 import pytensor.tensor as at
@@ -219,11 +218,3 @@ def test_local_dimshuffle_subtensor():
     assert x[:, :, 0:3, ::-1].dimshuffle(0, 2, 3).eval(
         {x: np.ones((5, 1, 6, 7))}
     ).shape == (5, 3, 7)
-
-
-def test_deprecations():
-    """Make sure we can import from deprecated modules."""
-    with pytest.deprecated_call():
-        from pytensor.tensor.opt_uncanonicalize import (  # noqa: F401 F811
-            local_reshape_dimshuffle,
-        )

From 06ea6682b76441889c8e8688078cbedec7ad1cd8 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Tue, 13 Dec 2022 23:53:11 +0300
Subject: [PATCH 07/43] remove deprecated MRG_RandomStream

---
 doc/library/sandbox/index.rst                 |    1 -
 doc/library/sandbox/rng_mrg.rst               |   16 -
 pytensor/compile/profiling.py                 |   14 +-
 pytensor/sandbox/multinomial.py               |  438 ------
 pytensor/sandbox/rng_mrg.py                   | 1372 -----------------
 pytensor/sandbox/samples_MRG31k3p_12_7_5.txt  |  420 -----
 tests/misc/test_pkl_utils.py                  |   12 -
 tests/sandbox/test_multinomial.py             |  120 --
 .../test_multinomial_wo_replacement.py        |  223 ---
 tests/sandbox/test_rng_mrg.py                 | 1144 --------------
 tests/test_gradient.py                        |    6 +-
 11 files changed, 4 insertions(+), 3762 deletions(-)
 delete mode 100644 doc/library/sandbox/rng_mrg.rst
 delete mode 100644 pytensor/sandbox/multinomial.py
 delete mode 100644 pytensor/sandbox/rng_mrg.py
 delete mode 100644 pytensor/sandbox/samples_MRG31k3p_12_7_5.txt
 delete mode 100644 tests/sandbox/test_multinomial.py
 delete mode 100644 tests/sandbox/test_multinomial_wo_replacement.py
 delete mode 100644 tests/sandbox/test_rng_mrg.py

diff --git a/doc/library/sandbox/index.rst b/doc/library/sandbox/index.rst
index 71f6096a13..f8f742de3a 100644
--- a/doc/library/sandbox/index.rst
+++ b/doc/library/sandbox/index.rst
@@ -15,4 +15,3 @@
 
     linalg
     neighbours
-    rng_mrg
diff --git a/doc/library/sandbox/rng_mrg.rst b/doc/library/sandbox/rng_mrg.rst
deleted file mode 100644
index d6f9c4871e..0000000000
--- a/doc/library/sandbox/rng_mrg.rst
+++ /dev/null
@@ -1,16 +0,0 @@
-.. _libdoc_rng_mrg:
-
-===================================================================
-:mod:`sandbox.rng_mrg` --  MRG random number generator
-===================================================================
-
-.. module:: sandbox.rng_mrg
-   :platform: Unix, Windows
-   :synopsis: MRG random number generator
-.. moduleauthor:: LISA
-
-API
-===
-
-.. automodule:: pytensor.sandbox.rng_mrg
-    :members:
diff --git a/pytensor/compile/profiling.py b/pytensor/compile/profiling.py
index 5a9e2ade7a..53c4242841 100644
--- a/pytensor/compile/profiling.py
+++ b/pytensor/compile/profiling.py
@@ -1485,7 +1485,6 @@ def print_tips(self, file):
         from pytensor import scalar as aes
         from pytensor.tensor.elemwise import Elemwise
         from pytensor.tensor.math import Dot
-        from pytensor.tensor.random.op import RandomVariable
 
         scalar_op_amdlibm_no_speed_up = [
             aes.LT,
@@ -1628,18 +1627,7 @@ def exp_float32_op(op):
                 printed_tip = True
 
         # tip 5
-        for (fgraph, a) in self.apply_time:
-            node = a
-            if isinstance(node.op, RandomVariable):
-                printed_tip = True
-                print(
-                    "  - Replace the default random number generator by "
-                    "'from pytensor.sandbox.rng_mrg import MRG_RandomStream "
-                    "as RandomStream', as this is is faster. It is still "
-                    "experimental, but seems to work correctly.",
-                    file=file,
-                )
-                break
+        # The tip was about MRG_RandomStream which is removed
 
         # tip 6
         for (fgraph, a) in self.apply_time:
diff --git a/pytensor/sandbox/multinomial.py b/pytensor/sandbox/multinomial.py
deleted file mode 100644
index 2baccd5190..0000000000
--- a/pytensor/sandbox/multinomial.py
+++ /dev/null
@@ -1,438 +0,0 @@
-import copy
-from typing import Tuple
-
-import numpy as np
-
-import pytensor.tensor as at
-from pytensor.configdefaults import config
-from pytensor.graph.basic import Apply
-from pytensor.link.c.op import COp
-from pytensor.scalar import ScalarType, as_scalar
-from pytensor.tensor.type import discrete_dtypes
-
-
-class MultinomialFromUniform(COp):
-    """
-    Converts samples from a uniform into sample from a multinomial.
-
-    TODO : need description for parameter 'odtype'
-    """
-
-    __props__: Tuple[str, ...] = ("odtype",)
-
-    def __init__(self, odtype):
-        self.odtype = odtype
-
-    def __str__(self):
-        return f"{self.__class__.__name__}{{{self.odtype}}}"
-
-    def __setstate__(self, dct):
-        self.__dict__.update(dct)
-        try:
-            self.odtype
-        except AttributeError:
-            self.odtype = "auto"
-
-    def make_node(self, pvals, unis, n=1):
-        pvals = at.as_tensor_variable(pvals)
-        unis = at.as_tensor_variable(unis)
-        if pvals.ndim != 2:
-            raise NotImplementedError("pvals ndim should be 2", pvals.ndim)
-        if unis.ndim != 1:
-            raise NotImplementedError("unis ndim should be 1", unis.ndim)
-        if self.odtype == "auto":
-            odtype = pvals.dtype
-        else:
-            odtype = self.odtype
-        out = at.tensor(
-            dtype=odtype, shape=tuple(1 if s == 1 else None for s in pvals.type.shape)
-        )
-        return Apply(self, [pvals, unis, as_scalar(n)], [out])
-
-    def grad(self, ins, outgrads):
-        pvals, unis, n = ins
-        (gz,) = outgrads
-        return [
-            at.zeros_like(x, dtype=config.floatX)
-            if x.dtype in discrete_dtypes
-            else at.zeros_like(x)
-            for x in ins
-        ]
-
-    def c_code_cache_version(self):
-        return (8,)
-
-    def c_code(self, node, name, ins, outs, sub):
-        # support old pickled graphs
-        if len(ins) == 2:
-            (pvals, unis) = ins
-            n = 1
-        else:
-            (pvals, unis, n) = ins
-        (z,) = outs
-        if self.odtype == "auto":
-            t = f"PyArray_TYPE({pvals})"
-        else:
-            t = ScalarType(self.odtype).dtype_specs()[1]
-            if t.startswith("pytensor_complex"):
-                t = t.replace("pytensor_complex", "NPY_COMPLEX")
-            else:
-                t = t.upper()
-        fail = sub["fail"]
-        return (
-            """
-        if (PyArray_NDIM(%(pvals)s) != 2)
-        {
-            PyErr_Format(PyExc_TypeError, "pvals ndim should be 2");
-            %(fail)s;
-        }
-        if (PyArray_NDIM(%(unis)s) != 1)
-        {
-            PyErr_Format(PyExc_TypeError, "unis ndim should be 2");
-            %(fail)s;
-        }
-
-        if (PyArray_DIMS(%(unis)s)[0] != (PyArray_DIMS(%(pvals)s)[0] * %(n)s))
-        {
-            PyErr_Format(PyExc_ValueError, "unis.shape[0] != pvals.shape[0] * n");
-            %(fail)s;
-        }
-
-        if ((NULL == %(z)s)
-            || ((PyArray_DIMS(%(z)s))[0] != (PyArray_DIMS(%(pvals)s))[0])
-            || ((PyArray_DIMS(%(z)s))[1] != (PyArray_DIMS(%(pvals)s))[1])
-        )
-        {
-            Py_XDECREF(%(z)s);
-            %(z)s = (PyArrayObject*) PyArray_EMPTY(2,
-                PyArray_DIMS(%(pvals)s),
-                %(t)s,
-                0);
-            if (!%(z)s)
-            {
-                PyErr_SetString(PyExc_MemoryError, "failed to alloc z output");
-                %(fail)s;
-            }
-        }
-
-        { // NESTED SCOPE
-
-        const int nb_multi = PyArray_DIMS(%(pvals)s)[0];
-        const int nb_outcomes = PyArray_DIMS(%(pvals)s)[1];
-        const int n_samples = %(n)s;
-
-        //
-        // For each multinomial, loop over each possible outcome
-        //
-        for (int c = 0; c < n_samples; ++c){
-            for (int n = 0; n < nb_multi; ++n)
-            {
-                int waiting = 1;
-                double cummul = 0.;
-                const dtype_%(unis)s* unis_n = (dtype_%(unis)s*)PyArray_GETPTR1(%(unis)s, c*nb_multi + n);
-                for (int m = 0; m < nb_outcomes; ++m)
-                {
-                    dtype_%(z)s* z_nm = (dtype_%(z)s*)PyArray_GETPTR2(%(z)s, n,m);
-                    const dtype_%(pvals)s* pvals_nm = (dtype_%(pvals)s*)PyArray_GETPTR2(%(pvals)s, n,m);
-                    cummul += *pvals_nm;
-                    if (c == 0)
-                    {
-                        if (waiting && (cummul > *unis_n))
-                        {
-                            *z_nm = 1.;
-                            waiting = 0;
-                        }
-                        else
-                        {
-                            // if we re-used old z pointer, we have to clear it out.
-                            *z_nm = 0.;
-                        }
-                    }
-                    else {
-                        if (cummul > *unis_n)
-                        {
-                            *z_nm = *z_nm + 1.;
-                            break;
-                        }
-                    }
-                }
-            }
-        }
-        } // END NESTED SCOPE
-        """
-            % locals()
-        )
-
-    def perform(self, node, ins, outs):
-        # support old pickled graphs
-        if len(ins) == 2:
-            (pvals, unis) = ins
-            n_samples = 1
-        else:
-            (pvals, unis, n_samples) = ins
-        (z,) = outs
-
-        if unis.shape[0] != pvals.shape[0] * n_samples:
-            raise ValueError(
-                "unis.shape[0] != pvals.shape[0] * n_samples",
-                unis.shape[0],
-                pvals.shape[0],
-                n_samples,
-            )
-        if z[0] is None or z[0].shape != pvals.shape:
-            z[0] = np.zeros(pvals.shape, dtype=node.outputs[0].dtype)
-        else:
-            z[0].fill(0)
-
-        nb_multi = pvals.shape[0]
-        # Original version that is not vectorized. I keep it here as
-        # it is more readable.
-        # For each multinomial, loop over each possible outcome
-        # nb_outcomes = pvals.shape[1]
-        # for c in range(n_samples):
-        #    for n in range(nb_multi):
-        #        waiting = True
-        #        cummul = 0
-        #        unis_n = unis[c * nb_multi + n]
-        #        for m in range(nb_outcomes):
-        #            cummul += pvals[n, m]
-        #            if c == 0:
-        #                if (waiting and (cummul > unis_n)):
-        #                    z[0][n, m] = 1
-        #                    waiting = False
-        #                else:
-        #                    # Only needed if we don't init the output to 0
-        #                    z[0][n, m] = 0
-        #            else:
-        #                if (cummul > unis_n):
-        #                    z[0][n, m] += 1
-        #                    break
-
-        # Vectorized version that is much faster as all the looping is
-        # done in C even if this make extra work.
-        for c in range(n_samples):
-            for n in range(nb_multi):
-                unis_n = unis[c * nb_multi + n]
-                # The dtype='float64' is important. Otherwise we don't
-                # have the same answer as the c code as in the c code
-                # the cumul is in double precision.
-                cumsum = pvals[n].cumsum(dtype="float64")
-                z[0][n, np.searchsorted(cumsum, unis_n)] += 1
-
-
-class ChoiceFromUniform(MultinomialFromUniform):
-    """
-    Converts samples from a uniform into sample (without replacement) from a
-    multinomial.
-
-    """
-
-    __props__ = (
-        "odtype",
-        "replace",
-    )
-
-    def __init__(self, odtype, replace=False, *args, **kwargs):
-        self.replace = replace
-        super().__init__(odtype=odtype, *args, **kwargs)
-
-    def __setstate__(self, state):
-        self.__dict__.update(state)
-        if "replace" not in state:
-            self.replace = False
-
-    def make_node(self, pvals, unis, n=1):
-        pvals = at.as_tensor_variable(pvals)
-        unis = at.as_tensor_variable(unis)
-        if pvals.ndim != 2:
-            raise NotImplementedError("pvals ndim should be 2", pvals.ndim)
-        if unis.ndim != 1:
-            raise NotImplementedError("unis ndim should be 1", unis.ndim)
-        if self.odtype == "auto":
-            odtype = "int64"
-        else:
-            odtype = self.odtype
-        out = at.tensor(dtype=odtype, shape=pvals.type.shape)
-        return Apply(self, [pvals, unis, as_scalar(n)], [out])
-
-    def c_code_cache_version(self):
-        return (1,)
-
-    def c_code(self, node, name, ins, outs, sub):
-        (pvals, unis, n) = ins
-        (z,) = outs
-        replace = int(self.replace)
-        if self.odtype == "auto":
-            t = "NPY_INT64"
-        else:
-            t = ScalarType(self.odtype).dtype_specs()[1]
-            if t.startswith("pytensor_complex"):
-                t = t.replace("pytensor_complex", "NPY_COMPLEX")
-            else:
-                t = t.upper()
-        fail = sub["fail"]
-        return (
-            """
-        // create a copy of pvals matrix
-        PyArrayObject* pvals_copy = NULL;
-
-        if (PyArray_NDIM(%(pvals)s) != 2)
-        {
-            PyErr_Format(PyExc_TypeError, "pvals ndim should be 2");
-            %(fail)s;
-        }
-        if (PyArray_NDIM(%(unis)s) != 1)
-        {
-            PyErr_Format(PyExc_TypeError, "unis ndim should be 2");
-            %(fail)s;
-        }
-
-        if ( %(n)s > (PyArray_DIMS(%(pvals)s)[1]) )
-        {
-            PyErr_Format(PyExc_ValueError, "Cannot sample without replacement n samples bigger than the size of the distribution.");
-            %(fail)s;
-        }
-
-        if (PyArray_DIMS(%(unis)s)[0] != (PyArray_DIMS(%(pvals)s)[0] * %(n)s))
-        {
-            PyErr_Format(PyExc_ValueError, "unis.shape[0] != pvals.shape[0] * n");
-            %(fail)s;
-        }
-
-        pvals_copy = (PyArrayObject*) PyArray_EMPTY(2,
-            PyArray_DIMS(%(pvals)s),
-            PyArray_TYPE(%(pvals)s),
-            0);
-
-        if (!pvals_copy)
-        {
-            PyErr_SetString(PyExc_MemoryError, "failed to alloc pvals_copy");
-            %(fail)s;
-        }
-        PyArray_CopyInto(pvals_copy, %(pvals)s);
-
-        if ((NULL == %(z)s)
-            || ((PyArray_DIMS(%(z)s))[0] != (PyArray_DIMS(%(pvals)s))[0])
-            || ((PyArray_DIMS(%(z)s))[1] != %(n)s)
-        )
-        {
-            Py_XDECREF(%(z)s);
-            npy_intp dims[2];
-            dims[0] = PyArray_DIMS(%(pvals)s)[0];
-            dims[1] = %(n)s;
-            %(z)s = (PyArrayObject*) PyArray_EMPTY(2,
-                dims,
-                %(t)s,
-                -1);
-            if (!%(z)s)
-            {
-                PyErr_SetString(PyExc_MemoryError, "failed to alloc z output");
-                %(fail)s;
-            }
-        }
-
-        { // NESTED SCOPE
-
-        const int nb_multi = PyArray_DIMS(%(pvals)s)[0];
-        const int nb_outcomes = PyArray_DIMS(%(pvals)s)[1];
-        const int n_samples = %(n)s;
-
-        //
-        // For each multinomial, loop over each possible outcome,
-        // and set selected pval to 0 after being selected
-        //
-        for (int c = 0; c < n_samples; ++c){
-            for (int n = 0; n < nb_multi; ++n)
-            {
-                double cummul = 0.;
-                const dtype_%(unis)s* unis_n = (dtype_%(unis)s*)PyArray_GETPTR1(%(unis)s, c*nb_multi + n);
-                dtype_%(z)s* z_nc = (dtype_%(z)s*)PyArray_GETPTR2(%(z)s, n, c);
-                for (int m = 0; m < nb_outcomes; ++m)
-                {
-                    dtype_%(pvals)s* pvals_nm = (dtype_%(pvals)s*)PyArray_GETPTR2(pvals_copy, n, m);
-                    cummul += *pvals_nm;
-                    if (cummul > *unis_n)
-                    {
-                        *z_nc = m;
-                        // No need to renormalize after the last samples.
-                        if (c == (n_samples - 1))
-                            break;
-                        if (! %(replace)s )
-                        {
-                            // renormalize the nth row of pvals, reuse (cummul-*pvals_nm) to initialize the sum
-                            dtype_%(pvals)s sum = cummul - *pvals_nm;
-                            dtype_%(pvals)s* pvals_n = (dtype_%(pvals)s*)PyArray_GETPTR2(pvals_copy, n, m);
-                            *pvals_nm = 0.;
-                            for (int k = m; k < nb_outcomes; ++k)
-                            {
-                                sum = sum + *pvals_n;
-                                pvals_n++;
-                            }
-                            pvals_n = (dtype_%(pvals)s*)PyArray_GETPTR2(pvals_copy, n, 0);
-                            for (int k = 0; k < nb_outcomes; ++k)
-                            {
-                                *pvals_n = *pvals_n / sum;
-                                pvals_n++;
-                            }
-                        }
-                        break;
-                    }
-                }
-            }
-        }
-
-        // delete pvals_copy
-        {
-            Py_XDECREF(pvals_copy);
-        }
-        } // END NESTED SCOPE
-        """
-            % locals()
-        )
-
-    def perform(self, node, ins, outs):
-        (pvals, unis, n_samples) = ins
-        # make a copy so we do not overwrite the input
-        pvals = copy.copy(pvals)
-        (z,) = outs
-
-        if n_samples > pvals.shape[1]:
-            raise ValueError(
-                "Cannot sample without replacement n samples "
-                "bigger than the size of the distribution."
-            )
-
-        if unis.shape[0] != pvals.shape[0] * n_samples:
-            raise ValueError(
-                "unis.shape[0] != pvals.shape[0] * n_samples",
-                unis.shape[0],
-                pvals.shape[0],
-                n_samples,
-            )
-
-        if self.odtype == "auto":
-            odtype = "int64"
-        else:
-            odtype = self.odtype
-        if z[0] is None or not np.all(z[0].shape == [pvals.shape[0], n_samples]):
-            z[0] = -1 * np.ones((pvals.shape[0], n_samples), dtype=odtype)
-
-        nb_multi = pvals.shape[0]
-        nb_outcomes = pvals.shape[1]
-
-        # For each multinomial, loop over each possible outcome,
-        # and set selected pval to 0 after being selected
-        for c in range(n_samples):
-            for n in range(nb_multi):
-                cummul = 0
-                unis_n = unis[c * nb_multi + n]
-                for m in range(nb_outcomes):
-                    cummul += pvals[n, m]
-                    if cummul > unis_n:
-                        z[0][n, c] = m
-                        # set to zero and re-normalize so that it's not
-                        # selected again
-                        if not self.replace:
-                            pvals[n, m] = 0.0
-                            pvals[n] /= pvals[n].sum()
-                        break
diff --git a/pytensor/sandbox/rng_mrg.py b/pytensor/sandbox/rng_mrg.py
deleted file mode 100644
index 542b4a7f5d..0000000000
--- a/pytensor/sandbox/rng_mrg.py
+++ /dev/null
@@ -1,1372 +0,0 @@
-"""
-Implementation of MRG31k3p random number generator for PyTensor.
-
-Generator code in SSJ package (L'Ecuyer & Simard).
-http://www.iro.umontreal.ca/~simardr/ssj/indexe.html
-
-The MRG31k3p algorithm was published in:
-
-P. L'Ecuyer and R. Touzin, Fast Combined Multiple Recursive Generators with Multipliers of the form a = +/- 2^d +/- 2^e, Proceedings of the 2000 Winter Simulation Conference, Dec. 2000, 683-689.
-
-The conception of the multi-stream from MRG31k3p was published in:
-
-P. L'Ecuyer and R. Simard and E. Jack Chen and W. David Kelton, An Object-Oriented Random-Number Package with Many Long Streams and Substreams, Operations Research, volume 50, number 6, 2002, 1073-1075.
-"""
-
-import warnings
-
-import numpy as np
-
-from pytensor import function, gradient
-from pytensor import scalar as aes
-from pytensor import shared
-from pytensor import tensor as at
-from pytensor.compile import optdb
-from pytensor.configdefaults import config
-from pytensor.gradient import undefined_grad
-from pytensor.graph.basic import Apply, Constant, Variable
-from pytensor.graph.rewriting.basic import in2out, node_rewriter
-from pytensor.link.c.op import COp, Op
-from pytensor.link.c.params_type import ParamsType
-from pytensor.sandbox import multinomial
-from pytensor.scalar import bool as bool_t
-from pytensor.scalar import int32 as int_t
-from pytensor.tensor import as_tensor_variable, cast, get_vector_length
-from pytensor.tensor.math import cos, log, prod, sin, sqrt
-from pytensor.tensor.shape import reshape
-from pytensor.tensor.type import TensorType, iscalar, ivector, lmatrix
-
-
-warnings.warn(
-    "The module `pytensor.sandbox.rng_mrg` is deprecated. "
-    "Use the module `pytensor.tensor.random` for random variables instead.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-
-def matVecModM(A, s, m):
-    # TODO : need description for method, parameter and return
-    assert A.dtype == "int64"
-    return np.int32(np.sum((A * s) % m, 1) % m)
-
-
-def multMatVect(v, A, m1, B, m2):
-    # TODO : need description for parameter and return
-    """
-    Multiply the first half of v by A with a modulo of m1 and the second half
-    by B with a modulo of m2.
-
-    Notes
-    -----
-    The parameters of dot_modulo are passed implicitly because passing them
-    explicitly takes more time than running the function's C-code.
-
-    """
-    if multMatVect.dot_modulo is None:
-        A_sym = lmatrix("A")
-        s_sym = ivector("s")
-        m_sym = iscalar("m")
-        A2_sym = lmatrix("A2")
-        s2_sym = ivector("s2")
-        m2_sym = iscalar("m2")
-        o = DotModulo()(A_sym, s_sym, m_sym, A2_sym, s2_sym, m2_sym)
-        multMatVect.dot_modulo = function(
-            [A_sym, s_sym, m_sym, A2_sym, s2_sym, m2_sym], o, profile=False
-        )
-
-    # This way of calling the PyTensor fct is done to bypass PyTensor overhead.
-    f = multMatVect.dot_modulo
-    f.input_storage[0].storage[0] = A
-    f.input_storage[1].storage[0] = v[:3]
-    f.input_storage[2].storage[0] = m1
-    f.input_storage[3].storage[0] = B
-    f.input_storage[4].storage[0] = v[3:]
-    f.input_storage[5].storage[0] = m2
-    f.vm()
-    r = f.output_storage[0].storage[0]
-
-    return r
-
-
-multMatVect.dot_modulo = None
-
-
-class DotModulo(COp):
-    """
-    Efficient and numerically stable implementation of a dot product followed
-    by a modulo operation. This performs the same function as matVecModM.
-
-    We do this 2 times on 2 triple inputs and concatenating the output.
-
-    """
-
-    __props__ = ()
-
-    def make_node(self, A, s, m, A2, s2, m2):
-        return Apply(self, [A, s, m, A2, s2, m2], [s.type()])
-
-    def perform(self, node, inputs, outputs):
-        (A, s, m, A2, s2, m2) = inputs
-        (out,) = outputs
-        o1 = matVecModM(A, s, m)
-        o2 = matVecModM(A2, s2, m2)
-        out[0] = np.concatenate((o1, o2))
-
-    def c_code_cache_version(self):
-        return (6,)
-
-    def c_code(self, node, name, inputs, outputs, sub):
-        (_A, _s, _m, _A2, _s2, _m2) = inputs
-        (_z,) = outputs
-        return """
-        int osize = -1;
-        if (PyArray_NDIM(%(_A)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(A) != 2"); %(fail)s;}
-        if (PyArray_NDIM(%(_s)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(v) != 1"); %(fail)s;}
-        if (PyArray_NDIM(%(_m)s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(m) != 0"); %(fail)s;}
-        if (PyArray_NDIM(%(_A2)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(A2) != 2"); %(fail)s;}
-        if (PyArray_NDIM(%(_s2)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(v2) != 1"); %(fail)s;}
-        if (PyArray_NDIM(%(_m2)s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(m2) != 0"); %(fail)s;}
-
-        if( PyArray_DIMS(%(_A)s)[1] != PyArray_DIMS(%(_s)s)[0])
-        {PyErr_SetString(PyExc_NotImplementedError, "A and s shapes don't agree."); %(fail)s;}
-        if( PyArray_DIMS(%(_A2)s)[1] != PyArray_DIMS(%(_s2)s)[0])
-        {PyErr_SetString(PyExc_NotImplementedError, "A2 and s2 shapes don't agree."); %(fail)s;}
-
-        osize = PyArray_DIMS(%(_A)s)[0] + PyArray_DIMS(%(_A2)s)[0];
-        if (!%(_z)s
-            || (PyArray_DIMS(%(_z)s)[0] != osize))
-        {
-            {Py_XDECREF(%(_z)s);}
-            npy_intp dims[] = {0,};
-            dims[0] = osize;
-            %(_z)s = (PyArrayObject*) PyArray_SimpleNew(1, dims, PyArray_TYPE(%(_s)s));
-        }
-
-        if(!%(_z)s){%(fail)s;}
-
-        {   //makes it compile even though labels jump over variable definitions.
-
-            // A has size MxN, s has N, output M
-            npy_intp M = PyArray_DIMS(%(_A)s)[0];
-            npy_intp N = PyArray_DIMS(%(_A)s)[1];
-
-            const dtype_%(_A)s* __restrict__ DA = (dtype_%(_A)s*)PyArray_DATA(%(_A)s);
-            dtype_%(_s)s* __restrict__ Ds = (dtype_%(_s)s*)PyArray_DATA(%(_s)s);
-            dtype_%(_z)s* __restrict__ Dz = (dtype_%(_z)s*)PyArray_DATA(%(_z)s);
-            const dtype_%(_m)s m = ((dtype_%(_m)s*)PyArray_DATA(%(_m)s))[0];
-
-            npy_intp SA = PyArray_STRIDES(%(_A)s)[1] / PyArray_DESCR(%(_A)s)->elsize;
-            npy_intp Ss = PyArray_STRIDES(%(_s)s)[0] / PyArray_DESCR(%(_s)s)->elsize;
-            npy_intp Sz = PyArray_STRIDES(%(_z)s)[0] / PyArray_DESCR(%(_z)s)->elsize;
-
-            for (npy_int32 i = 0; i < M; ++i)
-            {
-                const dtype_%(_A)s* __restrict__ Ak = (dtype_%(_A)s*)(PyArray_BYTES(%(_A)s) + PyArray_STRIDES(%(_A)s)[0] * i);
-
-                npy_int64 r = 0;
-
-                for (npy_int32 j = 0; j < N; ++j)
-                {
-                    r += (npy_int64)(Ds[j * Ss] * (npy_int64)(Ak[j * SA])) %% m;
-                }
-
-                Dz[i * Sz] = r %% m;
-            }
-        }
-
-        //redo it with the second triple of inputs
-        {
-            // A has size MxN, s has N, output M
-            npy_intp M = PyArray_DIMS(%(_A2)s)[0];
-            npy_intp N = PyArray_DIMS(%(_A2)s)[1];
-
-            const dtype_%(_A2)s* __restrict__ DA = (dtype_%(_A2)s*)PyArray_DATA(%(_A2)s);
-            dtype_%(_s2)s* __restrict__ Ds = (dtype_%(_s2)s*)PyArray_DATA(%(_s2)s);
-            const dtype_%(_m2)s m = ((dtype_%(_m2)s*)PyArray_DATA(%(_m2)s))[0];
-
-            npy_intp SA = PyArray_STRIDES(%(_A2)s)[1] / PyArray_DESCR(%(_A2)s)->elsize;
-            npy_intp Ss = PyArray_STRIDES(%(_s2)s)[0] / PyArray_DESCR(%(_s2)s)->elsize;
-            npy_intp Sz = PyArray_STRIDES(%(_z)s)[0] / PyArray_DESCR(%(_z)s)->elsize;
-
-            dtype_%(_z)s* __restrict__ Dz = (dtype_%(_z)s*)PyArray_DATA(%(_z)s) + PyArray_DIMS(%(_A)s)[0] * Sz;
-
-            for (npy_int32 i = 0; i < M; ++i)
-            {
-                const dtype_%(_A2)s* __restrict__ Ak = (dtype_%(_A2)s*)(PyArray_BYTES(%(_A2)s) + PyArray_STRIDES(%(_A2)s)[0] * i);
-
-                npy_int64 r = 0;
-
-                for (npy_int32 j = 0; j < N; ++j)
-                {
-                    r += (npy_int64)(Ds[j * Ss] * (npy_int64)(Ak[j * SA])) %% m;
-                }
-
-                Dz[i * Sz] = r %% m;
-            }
-
-        }
-
-        """ % dict(
-            locals(), **sub
-        )
-
-
-# MRG31k3p
-# generator constants :
-M1 = np.asarray(np.int32(2147483647))  # 2^31 - 1
-M2 = np.asarray(np.int32(2147462579))  # 2^31 - 21069
-MASK12 = np.int32(511)  # 2^9 - 1
-MASK13 = np.int32(16777215)  # 2^24 - 1
-MASK2 = np.int32(65535)  # 2^16 - 1
-MULT2 = np.int32(21069)
-NORM = 4.656612873077392578125e-10  # 1./2^31
-
-# A1p0 = np.asarray([[0, 4194304, 129], [1, 0, 0], [0, 1, 0]],
-#                      dtype='int64')
-# A2p0 = np.asarray([[32768, 0, 32769], [1, 0, 0], [0, 1, 0]],
-#                      dtype='int64')
-
-A1p72 = np.asarray(
-    [
-        [1516919229, 758510237, 499121365],
-        [1884998244, 1516919229, 335398200],
-        [601897748, 1884998244, 358115744],
-    ],
-    dtype="int64",
-)
-A2p72 = np.asarray(
-    [
-        [1228857673, 1496414766, 954677935],
-        [1133297478, 1407477216, 1496414766],
-        [2002613992, 1639496704, 1407477216],
-    ],
-    dtype="int64",
-)
-
-A1p134 = np.asarray(
-    [
-        [1702500920, 1849582496, 1656874625],
-        [828554832, 1702500920, 1512419905],
-        [1143731069, 828554832, 102237247],
-    ],
-    dtype="int64",
-)
-A2p134 = np.asarray(
-    [
-        [796789021, 1464208080, 607337906],
-        [1241679051, 1431130166, 1464208080],
-        [1401213391, 1178684362, 1431130166],
-    ],
-    dtype="int64",
-)
-np_int32_vals = [np.int32(i) for i in (0, 7, 9, 15, 16, 22, 24)]
-
-
-def ff_2p134(rstate):
-    # TODO : need description for method, parameter and return
-    return multMatVect(rstate, A1p134, M1, A2p134, M2)
-
-
-def ff_2p72(rstate):
-    # TODO : need description for method, parameter and return
-    return multMatVect(rstate, A1p72, M1, A2p72, M2)
-
-
-def mrg_next_value(rstate, new_rstate, NORM, mask, offset):
-    # TODO : need description for method, parameter and return
-    x11, x12, x13, x21, x22, x23 = rstate
-    assert isinstance(x11, np.int32)
-
-    i0, i7, i9, i15, i16, i22, i24 = np_int32_vals
-    # first component
-    y1 = ((x12 & MASK12) << i22) + (x12 >> i9) + ((x13 & MASK13) << i7) + (x13 >> i24)
-
-    assert isinstance(y1, np.int32)
-    if y1 < 0 or y1 >= M1:  # must also check overflow
-        y1 -= M1
-    y1 += x13
-    if y1 < 0 or y1 >= M1:
-        y1 -= M1
-
-    x13 = x12
-    x12 = x11
-    x11 = y1
-
-    # second component
-    y1 = ((x21 & MASK2) << i15) + (MULT2 * (x21 >> i16))
-    assert isinstance(y1, np.int32)
-    if y1 < 0 or y1 >= M2:
-        y1 -= M2
-    y2 = ((x23 & MASK2) << i15) + (MULT2 * (x23 >> i16))
-    assert isinstance(y2, np.int32)
-    if y2 < 0 or y2 >= M2:
-        y2 -= M2
-    y2 += x23
-    if y2 < 0 or y2 >= M2:
-        y2 -= M2
-    y2 += y1
-    if y2 < 0 or y2 >= M2:
-        y2 -= M2
-
-    x23 = x22
-    x22 = x21
-    x21 = y2
-
-    # Must never return either 0 or M1+1
-    new_rstate[...] = [x11, x12, x13, x21, x22, x23]
-    assert new_rstate.dtype == np.int32
-    if x11 <= x21:
-        return (((x11 - x21 + M1) & mask) + offset) * NORM
-    else:
-        return (((x11 - x21) & mask) + offset) * NORM
-
-
-class mrg_uniform_base(Op):
-    # TODO : need description for class, parameter
-    __props__ = ("output_type", "inplace")
-    params_type = ParamsType(
-        inplace=bool_t,
-        # following params will come from self.output_type.
-        # NB: As output object may not be allocated in C code,
-        # we can not be sure to get these properties from output.
-        # So, we should better get them as params from self.output_type.
-        ndim=int_t,
-        otypenum=int_t,
-        otype_is_float32=bool_t,
-    )
-
-    def __init__(self, output_type, inplace=False):
-        Op.__init__(self)
-        self.output_type = output_type
-        self.inplace = inplace
-        if inplace:
-            self.destroy_map = {0: [0]}
-        self.warned_numpy_version = False
-
-    # These attributes (used as params) are created as properties
-    # to make them available even for old pickled objects, e.g.
-    # when testing old interface or when using FAST_COMPILE mode.
-    ndim = property(lambda self: self.output_type.ndim)
-    otypenum = property(lambda self: np.dtype(self.output_type.dtype).num)
-    otype_is_float32 = property(lambda self: self.output_type.dtype == "float32")
-
-    def __str__(self):
-        if self.inplace:
-            s = "inplace"
-        else:
-            s = "no_inplace"
-        return self.__class__.__name__ + f"{{{self.output_type},{s}}}"
-
-    def grad(self, inputs, ograd):
-        return [
-            gradient.grad_undefined(
-                self, k, inp, "No gradient defined through random sampling op"
-            )
-            for k, inp in enumerate(inputs)
-        ]
-
-    def R_op(self, inputs, eval_points):
-        return [None for i in eval_points]
-
-
-class mrg_uniform(COp, mrg_uniform_base):
-    # CPU VERSION
-    _f16_ok = True
-
-    def make_node(self, rstate, size):
-        # error checking slightly redundant here, since
-        # this op should not be called directly.
-        #
-        # call through MRG_RandomStream instead.
-        out_shape = ()
-        for i in range(self.output_type.ndim):
-            if at.extract_constant(size[i]) == 1:
-                out_shape += (1,)
-            else:
-                out_shape += (None,)
-        output_var = self.output_type.clone(shape=out_shape)()
-        rstate = as_tensor_variable(rstate)
-        size = as_tensor_variable(size)
-        return Apply(self, [rstate, size], [rstate.type(), output_var])
-
-    @classmethod
-    def new(cls, rstate, ndim, dtype, size):
-        v_size = as_tensor_variable(size)
-        if ndim is None:
-            ndim = get_vector_length(v_size)
-        op = cls(TensorType(dtype, shape=(None,) * ndim))
-        return op(rstate, v_size)
-
-    def perform(self, node, inp, out, params):
-        rstate, size = inp
-        o_rstate, o_sample = out
-        n_elements = 1
-        for s in size:
-            n_elements *= s
-        if n_elements > M1:
-            # The limit is on the C code. This perform don't
-            # have this limit.  But to have all of them behave the
-            # same (and have DebugMode don't use too much memory for
-            # some rng_mrg tests) I also add this limit here.
-            raise ValueError("rng_mrg does not support more then (2**31 -1) samples")
-
-        rstate = np.asarray(rstate)  # bring state from XXX if necessary
-        if not self.inplace:
-            rstate = rstate.copy()
-
-        n_streams, _ = rstate.shape
-
-        rval = np.zeros(n_elements, dtype=self.output_type.dtype)
-        if rval.dtype == "float16":
-            mask = 0x7FFF
-            offset = 1
-            NORM = np.float16(3.0458e-05)
-        elif rval.dtype == "float32":
-            mask = 0xFFFFFFFF
-            offset = 0
-            NORM = np.float32(4.6566126e-10)
-        elif rval.dtype == "float64":
-            mask = 0xFFFFFFFF
-            offset = 0
-            NORM = 4.656612873077392578125e-10  # 1./2^31
-
-        err_orig = np.seterr(over="ignore")
-        try:
-            for i in range(n_elements):
-                sample = mrg_next_value(
-                    rstate[i % n_streams],
-                    rstate[i % n_streams],
-                    NORM=NORM,
-                    mask=mask,
-                    offset=offset,
-                )
-                rval[i] = sample
-        finally:
-            np.seterr(**err_orig)
-
-        # send to GPU if necessary
-        o_rstate[0] = node.outputs[0].type.filter(rstate)
-        o_sample[0] = node.outputs[1].type.filter(rval.reshape(size))
-
-    def c_support_code(self, **kwargs):
-        return "\n".join(
-            """
-        void cpu_rng_mrg_uniform_%(dtype)s(PyArrayObject* o_sample, PyArrayObject* o_rstate,
-                                           npy_int64 n_elements, int n_streams) {
-            const npy_int32 i0 = 0;
-            const npy_int32 i7 = 7;
-            const npy_int32 i9 = 9;
-            const npy_int32 i15 = 15;
-            const npy_int32 i16 = 16;
-            const npy_int32 i22 = 22;
-            const npy_int32 i24 = 24;
-
-            const npy_int32 M1 = 2147483647;      //2^31 - 1
-            const npy_int32 M2 = 2147462579;      //2^31 - 21069
-            const npy_int32 MASK12 = 511;       //2^9 - 1
-            const npy_int32 MASK13 = 16777215;  //2^24 - 1
-            const npy_int32 MASK2 = 65535;      //2^16 - 1
-            const npy_int32 MULT2 = 21069;
-
-            %(dtype)s* sample_data = (%(dtype)s *) PyArray_DATA(o_sample);
-            npy_int32* state_data = (npy_int32 *) PyArray_DATA(o_rstate);
-            for (int i = 0; i < n_elements; ++i)
-            {
-                npy_int32 * state_data_i = state_data + (i%%n_streams)*6;
-                npy_int32 y1, y2, x11, x12, x13, x21, x22, x23;
-
-                x11 = state_data_i[0];
-                x12 = state_data_i[1];
-                x13 = state_data_i[2];
-                x21 = state_data_i[3];
-                x22 = state_data_i[4];
-                x23 = state_data_i[5];
-
-                y1 = ((x12 & MASK12) << i22) + (x12 >> i9) + ((x13 & MASK13) << i7) + (x13 >> i24);
-                if ((y1 < 0 || y1 >= M1))     //must also check overflow
-                    y1 -= M1;
-                y1 += x13;
-                if ((y1 < 0 or y1 >= M1))
-                    y1 -= M1;
-                x13 = x12;
-                x12 = x11;
-                x11 = y1;
-
-                y1 = ((x21 & MASK2) << i15) + (MULT2 * (x21 >> i16));
-                if (y1 < 0 || y1 >= M2)
-                    y1 -= M2;
-                y2 = ((x23 & MASK2) << i15) + (MULT2 * (x23 >> i16));
-                if (y2 < 0 || y2 >= M2)
-                    y2 -= M2;
-                y2 += x23;
-                if (y2 < 0 || y2 >= M2)
-                    y2 -= M2;
-                y2 += y1;
-                if (y2 < 0 or y2 >= M2)
-                    y2 -= M2;
-
-                x23 = x22;
-                x22 = x21;
-                x21 = y2;
-
-                if (x11 <= x21) {
-                    assert((x11 - x21 + M1) <= M1);
-                    sample_data[i] = (x11 - x21 + M1) * %(NORM)s;
-                }
-                else
-                {
-                    assert(x11 - x21 <= M1);
-                    sample_data[i] = (x11 - x21) * %(NORM)s;
-                }
-
-                state_data_i[0]= x11;
-                state_data_i[1]= x12;
-                state_data_i[2]= x13;
-                state_data_i[3]= x21;
-                state_data_i[4]= x22;
-                state_data_i[5]= x23;
-            }
-        }
-        """
-            % dict(dtype=dtype, NORM=NORM)
-            for dtype, NORM in (
-                ("npy_float32", "4.6566126e-10f"),
-                ("npy_float64", "4.656612873077392578125e-10"),
-            )
-        )
-
-    def c_code(self, node, name, inp, out, sub):
-        # If we try to use the C code here with something else than a
-        # TensorType, something is wrong.
-        assert isinstance(node.inputs[0].type, TensorType)
-        if self.output_type.dtype == "float16":
-            # C code is not tested, fall back to Python
-            raise NotImplementedError()
-        return """
-        //////// <code generated by mrg_uniform>
-        npy_int64 odims_i;
-        npy_int64 n_elements = 1;
-        int n_streams = 0;
-        int must_alloc_sample = ((NULL == %(o_sample)s)
-                                 || (PyArray_NDIM(%(o_sample)s) != %(params)s->ndim)
-                                 || !(PyArray_ISCONTIGUOUS(%(o_sample)s)));
-        int o_rstate_requirement = %(params)s->inplace ?
-                                    (NPY_ARRAY_C_CONTIGUOUS|NPY_ARRAY_ALIGNED) :
-                                    (NPY_ARRAY_ENSURECOPY|NPY_ARRAY_C_CONTIGUOUS|NPY_ARRAY_ALIGNED);
-
-        const npy_int32 i0 = 0;
-        const npy_int32 i7 = 7;
-        const npy_int32 i9 = 9;
-        const npy_int32 i15 = 15;
-        const npy_int32 i16 = 16;
-        const npy_int32 i22 = 22;
-        const npy_int32 i24 = 24;
-
-        const npy_int32 M1 = 2147483647;      //2^31 - 1
-        const npy_int32 M2 = 2147462579;      //2^31 - 21069
-        const npy_int32 MASK12 = 511;       //2^9 - 1
-        const npy_int32 MASK13 = 16777215;  //2^24 - 1
-        const npy_int32 MASK2 = 65535;      //2^16 - 1
-        const npy_int32 MULT2 = 21069;
-
-        // We have to read size[i] as an int64, but odims has to be intp*
-        // for NumPy on 32-bit platforms.
-        npy_intp* odims = (npy_intp*)malloc(%(params)s->ndim * sizeof(npy_intp));
-        if (odims == NULL) {
-            PyErr_NoMemory();
-            %(just_fail)s
-        }
-
-        if (PyArray_NDIM(%(size)s) != 1)
-        {
-            PyErr_SetString(PyExc_ValueError, "size must be vector");
-            %(fail)s
-        }
-        if (PyArray_DIMS(%(size)s)[0] != %(params)s->ndim)
-        {
-            PyErr_Format(PyExc_ValueError, "size must have length %%i (not %%i)",
-                %(params)s->ndim, int(PyArray_DIMS(%(size)s)[0]));
-            %(fail)s
-        }
-
-        for (int i = 0; i < %(params)s->ndim; ++i)
-        {
-            odims_i = *(dtype_%(size)s *)PyArray_GETPTR1(%(size)s, i);
-            odims[i] = odims_i;
-            n_elements *= odims_i;
-            must_alloc_sample = must_alloc_sample || (PyArray_DIMS(%(o_sample)s)[i] != odims[i]);
-            //fprintf(stderr, "size %%i %%i\\n", i, (int)odims[i]);
-            //printf("%%li", n_elements);
-        }
-        //fprintf(stderr, "n_elements %%lld\\n", (long long)n_elements);
-        if (n_elements > M1)
-        {
-            PyErr_SetString(
-                PyExc_ValueError,
-                "rng_mrg cpu-implementation does not support more than (2**31 -1) samples");
-            %(fail)s
-        }
-
-        if (must_alloc_sample)
-        {
-            Py_XDECREF(%(o_sample)s);
-            %(o_sample)s = (PyArrayObject*)PyArray_SimpleNew(%(params)s->ndim, odims, %(params)s->otypenum);
-            if(!%(o_sample)s) {
-                PyErr_SetString(PyExc_MemoryError, "failed to alloc mrg_uniform output");
-                %(fail)s
-            }
-        }
-        Py_XDECREF(%(o_rstate)s);
-        %(o_rstate)s = (PyArrayObject*)PyArray_FromAny(
-            (PyObject*)%(rstate)s,
-            NULL, 0, 0, o_rstate_requirement,NULL);
-
-        if (PyArray_NDIM(%(o_rstate)s) != 2)
-        {
-            PyErr_SetString(PyExc_ValueError, "rstate must be matrix");
-            %(fail)s
-        }
-        if (PyArray_DIMS(%(o_rstate)s)[1] != 6)
-        {
-            PyErr_Format(PyExc_ValueError, "rstate must have 6 columns");
-            %(fail)s
-        }
-        if (PyArray_DESCR(%(o_rstate)s)->type_num != NPY_INT32)
-        {
-            PyErr_SetString(PyExc_ValueError, "rstate must be int32");
-            %(fail)s
-        }
-        n_streams = PyArray_DIMS(%(o_rstate)s)[0];
-
-        if (%(params)s->otype_is_float32) {
-            cpu_rng_mrg_uniform_npy_float32(%(o_sample)s, %(o_rstate)s, n_elements, n_streams);
-        } else {
-            cpu_rng_mrg_uniform_npy_float64(%(o_sample)s, %(o_rstate)s, n_elements, n_streams);
-        }
-
-        free(odims);
-        //////// </ code generated by mrg_uniform>
-        """ % dict(
-            rstate=inp[0],
-            size=inp[1],
-            o_rstate=out[0],
-            o_sample=out[1],
-            params=sub["params"],
-            just_fail=sub["fail"],
-            fail="""
-                   {
-                       free(odims);
-                       %(fail)s
-                   }
-                   """
-            % dict(fail=sub["fail"]),
-        )
-
-    def c_code_cache_version(self):
-        return (10,)
-
-
-def guess_n_streams(size, warn=False):
-    # TODO : need description for parameter 'size'
-    """
-    Return a guess at a good number of streams.
-
-    Parameters
-    ----------
-    warn : bool, optional
-        If True, warn when a guess cannot be made (in which case we
-        return 60 * 256).
-
-    """
-    # TODO: a smart way of choosing the number of streams, see #612.
-    # Note that this code was moved out of `MRG_RandomStream` so that it can
-    # be easily accessed from tests, where we want to disable the warning.
-    if isinstance(size, (tuple, list)) and all(isinstance(i, int) for i in size):
-        # We can make a guess.
-        r = 1
-        for s in size:
-            r *= s
-        if r > 6:
-            r = r // 6  # chosen as fastest for rbm_benchmark
-
-        # The purpose of sampling from many streams is to be able to use
-        # the GPU to its full capacity. It just wastes RAM and
-        # stream-initialization time to allocate more streams than necessary
-        # for the GPU.
-        # XXX: This number is chosen to be good for 280 and 480 architectures,
-        #      Better would be to use pycuda to query the number of
-        #      processors on the GPU device,
-        #      rather than guessing 60.
-        return min(r, 60 * 256)
-    else:
-        if warn:
-            warnings.warn(
-                (
-                    "MRG_RandomStream can't determine the number ofstreams "
-                    f"from size ({size}), guessing 60*256"
-                ),
-                DeprecationWarning,
-                stacklevel=3,
-            )
-        return 60 * 256
-
-
-class MRG_RandomStream:
-    """
-    Module component with similar interface to numpy.random
-    (numpy.random.RandomState).
-
-    Parameters
-    ----------
-    seed : int or list of 6 int
-        A default seed to initialize the random state.
-        If a single int is given, it will be replicated 6 times.
-        The first 3 values of the seed must all be less than M1 = 2147483647,
-        and not all 0; and the last 3 values must all be less than
-        M2 = 2147462579, and not all 0.
-
-    """
-
-    def updates(self):
-        # TODO : need description for method and return
-        return list(self.state_updates)
-
-    def __init__(self, seed=12345):
-        # A list of pairs of the form (input_r, output_r), representing the
-        # update rules of all the random states generated
-        # by this RandomStream.
-        self.state_updates = []
-
-        super().__init__()
-
-        # Needed to reset the streams.
-        self.default_instance_seed = seed
-
-        self.set_rstate(seed)
-
-    def set_rstate(self, seed):
-        # TODO : need description for method, parameter
-        if isinstance(seed, (int, np.int32, np.int64)):
-            if seed == 0:
-                raise ValueError("seed should not be 0", seed)
-            elif seed >= M2:
-                raise ValueError(f"seed should be less than {int(M2)}", seed)
-            self.rstate = np.asarray([seed] * 6, dtype="int32")
-        elif len(seed) == 6:
-            if seed[0] == 0 and seed[1] == 0 and seed[2] == 0:
-                raise ValueError("The first 3 values of seed should not be all 0", seed)
-            if seed[3] == 0 and seed[4] == 0 and seed[5] == 0:
-                raise ValueError("The last 3 values of seed should not be all 0", seed)
-            if seed[0] >= M1 or seed[1] >= M1 or seed[2] >= M1:
-                raise ValueError(
-                    f"The first 3 values of seed should be less than {int(M1)}", seed
-                )
-            if seed[3] >= M2 or seed[4] >= M2 or seed[5] >= M2:
-                raise ValueError(
-                    f"The last 3 values of seed should be less than {M2}", seed
-                )
-            self.rstate = np.asarray(seed, dtype="int32")
-        else:
-            raise TypeError("seed should be 1 integer or 6 integers")
-
-    def seed(self, seed=None):
-        """
-        Re-initialize each random stream.
-
-        Parameters
-        ----------
-        seed : None or integer in range 0 to 2**30
-            Each random stream will be assigned a unique state that depends
-            deterministically on this value.
-
-        Returns
-        -------
-        None
-
-        """
-        if seed is None:
-            seed = self.default_instance_seed
-        self.set_rstate(seed)
-
-        for old_r, new_r, size, nstreams in self.state_updates:
-            if nstreams is None:
-                nstreams = self.n_streams(size)
-            rstates = self.get_substream_rstates(nstreams, new_r.owner.outputs[1].dtype)
-            assert (
-                old_r.get_value(borrow=True, return_internal_type=True).shape
-                == rstates.shape
-            )
-            assert rstates.dtype == old_r.dtype
-            old_r.set_value(rstates, borrow=True)
-
-    def inc_rstate(self):
-        """
-        Update self.rstate to be skipped 2^134 steps forward to the next stream
-        start.
-
-        """
-        # self.rstate = ff_2p134(self.rstate)
-        self.rstate = multMatVect(self.rstate, A1p134, M1, A2p134, M2)
-        assert self.rstate.dtype == np.int32
-
-    @config.change_flags(compute_test_value="off")
-    def get_substream_rstates(self, n_streams, dtype, inc_rstate=True):
-        # TODO : need description for parameter and return
-        """
-        Initialize a matrix in which each row is a MRG stream state,
-        and they are spaced by 2**72 samples.
-
-        """
-        assert isinstance(dtype, str)
-        assert n_streams < 2**72
-        assert n_streams > 0
-        rval = np.zeros((n_streams, 6), dtype="int32")
-        rval[0] = self.rstate
-
-        # If multMatVect.dot_modulo isn't compiled, compile it.
-        if multMatVect.dot_modulo is None:
-            multMatVect(rval[0], A1p72, M1, A2p72, M2)
-
-        # This way of calling the PyTensor fct is done to bypass PyTensor overhead.
-        f = multMatVect.dot_modulo
-        f.input_storage[0].storage[0] = A1p72
-        f.input_storage[2].storage[0] = M1
-        f.input_storage[3].storage[0] = A2p72
-        f.input_storage[5].storage[0] = M2
-        for i in range(1, n_streams):
-            # Inline the following call to bypass Python overhead
-            # rval[i] = ff_2p72(rval[i - 1])
-            v = rval[i - 1]
-            f.input_storage[1].storage[0] = v[:3]
-            f.input_storage[4].storage[0] = v[3:]
-            f.vm()
-            rval[i] = f.output_storage[0].storage[0]
-
-        if inc_rstate:
-            self.inc_rstate()
-
-        return rval
-
-    def n_streams(self, size):
-        # TODO : need description for method, parameter and return
-        return guess_n_streams(size)
-
-    def pretty_return(self, node_rstate, new_rstate, sample, size, nstreams):
-        # TODO : need description for method, parameter and return
-        sample.rstate = node_rstate
-        sample.update = (node_rstate, new_rstate)
-        self.state_updates.append((node_rstate, new_rstate, size, nstreams))
-        node_rstate.default_update = new_rstate
-        return sample
-
-    def uniform(
-        self, size, low=0.0, high=1.0, ndim=None, dtype=None, nstreams=None, **kwargs
-    ):
-        # TODO : need description for parameter 'size', 'ndim', 'nstreams'
-        """
-        Sample a tensor of given size whose element from a uniform
-        distribution between low and high.
-
-        If the size argument is ambiguous on the number of dimensions,
-        ndim may be a plain integer to supplement the missing information.
-
-        Parameters
-        ----------
-        low
-            Lower bound of the interval on which values are sampled.
-            If the ``dtype`` arg is provided, ``low`` will be cast into
-            dtype. This bound is excluded.
-        high
-            Higher bound of the interval on which values are sampled.
-            If the ``dtype`` arg is provided, ``high`` will be cast into
-            dtype. This bound is excluded.
-        size
-          Can be a list of integer or PyTensor variable (ex: the shape
-          of other PyTensor Variable).
-        dtype
-            The output data type. If dtype is not specified, it will be
-            inferred from the dtype of low and high, but will be at
-            least as precise as floatX.
-
-        """
-        low = as_tensor_variable(low)
-        high = as_tensor_variable(high)
-
-        if dtype is None:
-            dtype = aes.upcast(config.floatX, low.dtype, high.dtype)
-
-        low = cast(low, dtype=dtype)
-        high = cast(high, dtype=dtype)
-
-        low = undefined_grad(low)
-        high = undefined_grad(high)
-
-        if isinstance(size, tuple):
-            msg = "size must be a tuple of int or an PyTensor variable"
-            assert all(isinstance(i, (np.integer, int, Variable)) for i in size), msg
-            if any(isinstance(i, (np.integer, int)) and i <= 0 for i in size):
-                raise ValueError(
-                    "The specified size contains a dimension with value <= 0", size
-                )
-
-        else:
-            if not (isinstance(size, Variable) and size.ndim == 1):
-                raise TypeError(
-                    "size must be a tuple of int or an PyTensor "
-                    "Variable with 1 dimension, got "
-                    + str(size)
-                    + " of type "
-                    + str(type(size))
-                )
-        orig_nstreams = nstreams
-        if nstreams is None:
-            nstreams = self.n_streams(size)
-        rstates = self.get_substream_rstates(nstreams, dtype)
-
-        d = {}
-        if "target" in kwargs:
-            d = dict(target=kwargs.pop("target"))
-        if len(kwargs) > 0:
-            raise TypeError(
-                f"uniform() got unexpected keyword arguments {kwargs.keys()}"
-            )
-        node_rstate = shared(rstates, **d)
-        u = self.pretty_return(
-            node_rstate,
-            *mrg_uniform.new(node_rstate, ndim, dtype, size),
-            size=size,
-            nstreams=orig_nstreams,
-        )
-        r = u * (high - low) + low
-
-        if u.type.broadcastable != r.type.broadcastable:
-            raise NotImplementedError(
-                "Increase the size to match the broadcasting pattern of "
-                "`low` and `high` arguments"
-            )
-
-        assert r.dtype == dtype
-        return r
-
-    def binomial(
-        self, size=None, n=1, p=0.5, ndim=None, dtype="int64", nstreams=None, **kwargs
-    ):
-        # TODO : need description for method, parameter and return
-        if n == 1:
-            p = undefined_grad(as_tensor_variable(p))
-            x = self.uniform(size=size, nstreams=nstreams, **kwargs)
-            return cast(x < p, dtype)
-        else:
-            raise NotImplementedError("MRG_RandomStream.binomial with n > 1")
-
-    def multinomial(
-        self,
-        size=None,
-        n=1,
-        pvals=None,
-        ndim=None,
-        dtype="int64",
-        nstreams=None,
-        **kwargs,
-    ):
-        # TODO : need description for parameter and return
-        """
-        Sample `n` (`n` needs to be >= 1, default 1) times from a multinomial
-        distribution defined by probabilities pvals.
-
-        Example : pvals = [[.98, .01, .01], [.01, .49, .50]] and n=1 will
-        probably result in [[1,0,0],[0,0,1]]. When setting n=2, this
-        will probably result in [[2,0,0],[0,1,1]].
-
-        Notes
-        -----
-        -`size` and `ndim` are only there keep the same signature as other
-        uniform, binomial, normal, etc.
-        TODO : adapt multinomial to take that into account
-
-        -Does not do any value checking on pvals, i.e. there is no
-        check that the elements are non-negative, less than 1, or
-        sum to 1. passing pvals = [[-2., 2.]] will result in
-        sampling [[0, 0]]
-
-        """
-        if pvals is None:
-            raise TypeError("You have to specify pvals")
-        pvals = as_tensor_variable(pvals)
-        pvals = undefined_grad(pvals)
-        if size is not None:
-            if any(isinstance(i, int) and i <= 0 for i in size):
-                raise ValueError(
-                    "The specified size contains a dimension with value <= 0", size
-                )
-
-        if size is not None:
-            raise ValueError(
-                "Provided a size argument to MRG_RandomStream.multinomial, "
-                "which does not use the size argument."
-            )
-        if ndim is not None:
-            raise ValueError(
-                "Provided an ndim argument to MRG_RandomStream.multinomial, "
-                "which does not use the ndim argument."
-            )
-        if pvals.ndim == 2:
-            size = pvals[:, 0].shape * n
-            unis = self.uniform(size=size, ndim=1, nstreams=nstreams, **kwargs)
-            op = multinomial.MultinomialFromUniform(dtype)
-            n_samples = as_tensor_variable(n)
-            return op(pvals, unis, n_samples)
-        else:
-            raise NotImplementedError(
-                "MRG_RandomStream.multinomial only implemented for pvals.ndim = 2"
-            )
-
-    def choice(
-        self,
-        size=1,
-        a=None,
-        replace=True,
-        p=None,
-        ndim=None,
-        dtype="int64",
-        nstreams=None,
-        **kwargs,
-    ):
-        """
-        Sample `size` times from a multinomial distribution defined by
-        probabilities `p`, and returns the indices of the sampled elements.
-        Sampled values are between 0 and `p.shape[1]-1`.
-        Only sampling without replacement is implemented for now.
-
-        Parameters
-        ----------
-        size: integer or integer tensor (default 1)
-            The number of samples. It should be between 1 and `p.shape[1]-1`.
-        a: int or None (default None)
-            For now, a should be None. This function will sample
-            values between 0 and `p.shape[1]-1`. When a != None will be
-            implemented, if `a` is a scalar, the samples are drawn from the
-            range 0,...,a-1. We default to 2 as to have the same interface as
-            RandomStream.
-        replace: bool (default True)
-            Whether the sample is with or without replacement.
-            Only replace=False is implemented for now.
-        p: 2d numpy array or pytensor tensor
-            the probabilities of the distribution, corresponding to values
-            0 to `p.shape[1]-1`.
-
-        Example : p = [[.98, .01, .01], [.01, .49, .50]] and size=1 will
-        probably result in [[0],[2]]. When setting size=2, this
-        will probably result in [[0,1],[2,1]].
-
-        Notes
-        -----
-        -`ndim` is only there keep the same signature as other
-        uniform, binomial, normal, etc.
-
-        -Does not do any value checking on pvals, i.e. there is no
-        check that the elements are non-negative, less than 1, or
-        sum to 1. passing pvals = [[-2., 2.]] will result in
-        sampling [[0, 0]]
-
-        -Only replace=False is implemented for now.
-
-        """
-        if replace:
-            raise NotImplementedError(
-                "MRG_RandomStream.choice only works without replacement for now."
-            )
-
-        if a is not None:
-            raise TypeError(
-                "For now, a has to be None in "
-                "MRG_RandomStream.choice. Sampled values are "
-                "between 0 and p.shape[1]-1"
-            )
-
-        if p is None:
-            raise TypeError(
-                "For now, p has to be specified in MRG_RandomStream.choice."
-            )
-        p = as_tensor_variable(p)
-        p = undefined_grad(p)
-
-        if ndim is not None:
-            raise ValueError("ndim argument to MRG_RandomStream.choice is not used.")
-
-        if p.ndim != 2:
-            raise NotImplementedError(
-                "MRG_RandomStream.choice is only implemented for p.ndim = 2"
-            )
-
-        shape = p[:, 0].shape * size
-        unis = self.uniform(size=shape, ndim=1, nstreams=nstreams, **kwargs)
-        op = multinomial.ChoiceFromUniform(odtype=dtype)
-        return op(p, unis, as_tensor_variable(size))
-
-    def multinomial_wo_replacement(
-        self,
-        size=None,
-        n=1,
-        pvals=None,
-        ndim=None,
-        dtype="int64",
-        nstreams=None,
-        **kwargs,
-    ):
-        warnings.warn(
-            "`MRG_RandomStream.multinomial_wo_replacement` is "
-            "deprecated; use `MRG_RandomStream.choice` instead.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        assert size is None
-        return self.choice(
-            size=n,
-            a=None,
-            replace=False,
-            p=pvals,
-            dtype=dtype,
-            nstreams=nstreams,
-            ndim=ndim,
-            **kwargs,
-        )
-
-    def normal(
-        self,
-        size,
-        avg=0.0,
-        std=1.0,
-        ndim=None,
-        dtype=None,
-        nstreams=None,
-        truncate=False,
-        **kwargs,
-    ):
-        """
-        Sample a tensor of values from a normal distribution.
-
-        Parameters
-        ----------
-        size : int_vector_like
-            Array dimensions for the output tensor.
-        avg : float_like, optional
-            The mean value for the truncated normal to sample from (defaults to 0.0).
-        std : float_like, optional
-            The standard deviation for the truncated normal to sample from (defaults to 1.0).
-        truncate : bool, optional
-            Truncates the normal distribution at 2 standard deviations if True (defaults to False).
-            When this flag is set, the standard deviation of the result will be less than the one specified.
-        ndim : int, optional
-            The number of dimensions for the output tensor (defaults to None).
-            This argument is necessary if the size argument is ambiguous on the number of dimensions.
-        dtype : str, optional
-            The data-type for the output tensor. If not specified,
-            the dtype is inferred from avg and std, but it is at least as precise as floatX.
-        kwargs
-            Other keyword arguments for random number generation (see uniform).
-
-        Returns
-        -------
-        samples : TensorVariable
-            A PyTensor tensor of samples randomly drawn from a normal distribution.
-
-        """
-        size = _check_size(size)
-        avg = undefined_grad(as_tensor_variable(avg))
-        std = undefined_grad(as_tensor_variable(std))
-
-        if dtype is None:
-            dtype = aes.upcast(config.floatX, avg.dtype, std.dtype)
-
-        avg = at.cast(avg, dtype=dtype)
-        std = at.cast(std, dtype=dtype)
-
-        # generate even number of uniform samples
-        # Do manual constant folding to lower optiimizer work.
-        if isinstance(size, Constant):
-            n_odd_samples = size.prod(dtype="int64")
-        else:
-            n_odd_samples = prod(size, dtype="int64")
-        n_even_samples = n_odd_samples + n_odd_samples % 2
-        uniform = self.uniform(
-            (n_even_samples,),
-            low=0.0,
-            high=1.0,
-            ndim=1,
-            dtype=dtype,
-            nstreams=nstreams,
-            **kwargs,
-        )
-
-        # box-muller transform
-        u1 = uniform[: n_even_samples // 2]
-        u2 = uniform[n_even_samples // 2 :]
-        r = sqrt(-2.0 * log(u1))
-        theta = np.array(2.0 * np.pi, dtype=dtype) * u2
-        cos_theta, sin_theta = cos(theta), sin(theta)
-        z0 = r * cos_theta
-        z1 = r * sin_theta
-
-        if truncate:
-            # use valid samples
-            to_fix0 = (z0 < -2.0) | (z0 > 2.0)
-            to_fix1 = (z1 < -2.0) | (z1 > 2.0)
-            z0_valid = z0[at.nonzero(~to_fix0)]
-            z1_valid = z1[at.nonzero(~to_fix1)]
-
-            # re-sample invalid samples
-            to_fix0 = at.nonzero(to_fix0)[0]
-            to_fix1 = at.nonzero(to_fix1)[0]
-            n_fix_samples = to_fix0.size + to_fix1.size
-            lower = at.constant(1.0 / np.e**2, dtype=dtype)
-            u_fix = self.uniform(
-                (n_fix_samples,),
-                low=lower,
-                high=1.0,
-                ndim=1,
-                dtype=dtype,
-                nstreams=nstreams,
-                **kwargs,
-            )
-            r_fix = sqrt(-2.0 * log(u_fix))
-            z0_fixed = r_fix[: to_fix0.size] * cos_theta[to_fix0]
-            z1_fixed = r_fix[to_fix0.size :] * sin_theta[to_fix1]
-
-            # pack everything together to a useful result
-            norm_samples = at.join(0, z0_valid, z0_fixed, z1_valid, z1_fixed)
-        else:
-            norm_samples = at.join(0, z0, z1)
-        if isinstance(n_odd_samples, Variable):
-            samples = norm_samples[:n_odd_samples]
-        elif n_odd_samples % 2 == 1:
-            samples = norm_samples[:-1]
-        else:
-            samples = norm_samples
-        samples = reshape(samples, newshape=size, ndim=ndim)
-        samples *= std
-        samples += avg
-
-        return samples
-
-    def truncated_normal(
-        self, size, avg=0.0, std=1.0, ndim=None, dtype=None, nstreams=None, **kwargs
-    ):
-        """
-        Sample a tensor of values from a symmetrically truncated normal distribution.
-
-        Parameters
-        ----------
-        size : int_vector_like
-            Array dimensions for the output tensor.
-        avg : float_like, optional
-            The mean value for the truncated normal to sample from (defaults to 0.0).
-        std : float_like, optional
-            The standard deviation for the truncated normal to sample from (defaults to 1.0).
-        ndim : int, optional
-            The number of dimensions for the output tensor (defaults to None).
-            This argument is necessary if the size argument is ambiguous on the number of dimensions.
-        dtype : str, optional
-            The data-type for the output tensor. If not specified,
-            the dtype is inferred from avg and std, but it is at least as precise as floatX.
-        kwargs
-            Other keyword arguments for random number generation (see uniform).
-
-        Returns
-        -------
-        samples : TensorVariable
-            A PyTensor tensor of samples randomly drawn from a truncated normal distribution.
-
-        See Also
-        --------
-        normal
-        """
-        # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
-        std = std / at.constant(0.87962566103423978)
-        return self.normal(
-            size=size,
-            avg=avg,
-            std=std,
-            truncate=True,
-            ndim=ndim,
-            dtype=dtype,
-            nstreams=nstreams,
-            **kwargs,
-        )
-
-
-def _check_size(size):
-    """
-    Canonicalise inputs to get valid output sizes for PyTensor tensors.
-
-    Parameters
-    ----------
-    size : int_vector_like
-        Some variable that could serve as the shape for an PyTensor tensor.
-        This can be an int, a tuple of ints, a list of ints
-        or an PyTensor Variable with similar properties.
-
-    Returns
-    -------
-    size_var : int_vector
-        A one-dimensional PyTensor variable encapsulating the given size.
-
-    Raises
-    ------
-    ValueError
-        If this method can not build a valid size from the input.
-    """
-    # non-tuple checks and scalar-to-tuple transform
-    if isinstance(size, Variable):
-        if size.ndim == 1:
-            return size
-        elif size.ndim == 0:
-            return at.stack([size], ndim=1)
-        else:
-            raise ValueError(
-                "PyTensor variable must have 1 dimension to be a valid size.", size
-            )
-    elif isinstance(size, (np.integer, int)):
-        return at.constant([size], ndim=1)
-    elif not isinstance(size, (tuple, list)):
-        raise ValueError("Size must be a int, tuple, list or PyTensor variable.", size)
-
-    # check entries of list or tuple
-    for i in size:
-        if isinstance(i, Variable):
-            if i.ndim != 0:
-                raise ValueError("Non-scalar PyTensor variable in size", size, i)
-        elif isinstance(i, (np.integer, int)):
-            if i <= 0:
-                raise ValueError(
-                    "Non-positive dimensions not allowed in size.", size, i
-                )
-        else:
-            raise ValueError(
-                "Only PyTensor variables and integers are allowed in a size-tuple.",
-                size,
-                i,
-            )
-
-    return at.as_tensor_variable(size, ndim=1)
-
-
-@node_rewriter((mrg_uniform_base,))
-def mrg_random_make_inplace(fgraph, node):
-
-    op = node.op
-    if isinstance(op, mrg_uniform_base) and not op.inplace:
-        # op might be gpu version
-        new_op = op.__class__(op.output_type, inplace=True)
-        return new_op.make_node(*node.inputs).outputs
-    return False
-
-
-optdb.register(
-    "random_make_inplace_mrg",
-    in2out(mrg_random_make_inplace, ignore_newtrees=True),
-    "fast_run",
-    "inplace",
-    position=99,
-)
diff --git a/pytensor/sandbox/samples_MRG31k3p_12_7_5.txt b/pytensor/sandbox/samples_MRG31k3p_12_7_5.txt
deleted file mode 100644
index ee7647277f..0000000000
--- a/pytensor/sandbox/samples_MRG31k3p_12_7_5.txt
+++ /dev/null
@@ -1,420 +0,0 @@
-0.7353244530968368
-0.6142074400559068
-0.11007806099951267
-0.6487741703167558
-0.36619443260133266
-0.2585685825906694
-0.9489980279468
-0.4309556516818702
-0.12257590936496854
-0.9760319022461772
-0.6940806899219751
-0.18046841165050864
-0.003993193618953228
-0.5351603352464736
-0.02472442388534546
-0.7705746139399707
-0.8138928869739175
-0.9650539481081069
-0.24507411010563374
-0.35767574002966285
-0.4939101580530405
-0.9027785388752818
-0.27498403564095497
-0.03848231676965952
-0.3081609820947051
-0.9062023567967117
-0.009030417073518038
-0.7953705741092563
-0.5061718439683318
-0.5975547162815928
-0.5435514179989696
-0.330895590595901
-0.49919482320547104
-0.9409166998229921
-0.8276205519214272
-0.5180770065635443
-0.2319392478093505
-0.36197659047320485
-0.11120751267299056
-0.5018561617471278
-0.47852187464013696
-0.7188052111305296
-0.3030327311716974
-0.6756376498378813
-0.03624899685382843
-0.34987151669338346
-0.031225718092173338
-0.06772322440519929
-0.06820952938869596
-0.9987128847278655
-0.08330700965598226
-0.9731874465942383
-0.6345655219629407
-0.7169904578477144
-0.5793502484448254
-0.7396790678612888
-0.9926023166626692
-0.7522463691420853
-0.6768838302232325
-0.3253784184344113
-0.05375300580635667
-0.4912636987864971
-0.6485021142289042
-0.3043024237267673
-0.24868384934961796
-0.8166692252270877
-0.5274319797754288
-0.31434731651097536
-0.9961257497780025
-0.3549888739362359
-0.8423425843939185
-0.21591948671266437
-0.8698299624957144
-0.17033040337264538
-0.22816143138334155
-0.11795765580609441
-0.7024209997616708
-0.15607220400124788
-0.5493582566268742
-0.5827712984755635
-0.8592293248511851
-0.785309090744704
-0.6115233600139618
-0.019046304281800985
-0.2573754615150392
-0.03130705002695322
-0.6572857238352299
-0.2033171127550304
-0.5058645992539823
-0.15793190989643335
-0.6273676953278482
-0.7285307059064507
-0.265245848800987
-0.6073522809892893
-0.3896624594926834
-0.27189663611352444
-0.705508322454989
-0.12823439668864012
-0.39648046158254147
-0.6584051586687565
-0.07818163838237524
-0.33628708589822054
-0.20613654889166355
-0.4277639244683087
-0.5401185592636466
-0.07513022050261497
-0.4920963351614773
-0.18214095244184136
-0.3235122123733163
-0.29958881670609117
-0.7304665613919497
-0.05146520072594285
-0.2471711952239275
-0.8797005712985992
-0.5029069227166474
-0.526974250562489
-0.15968210343271494
-0.4696163134649396
-0.17607332626357675
-0.362843859475106
-0.7626461815088987
-0.960180682130158
-0.2536660563200712
-0.710880630183965
-0.28728525526821613
-0.78940424695611
-0.5242114691063762
-0.8314367309212685
-0.5898511232808232
-0.015212591737508774
-0.4944482510909438
-0.06396882887929678
-0.519745257217437
-0.3558214954100549
-0.04566589882597327
-0.8368005948141217
-0.979805170558393
-0.7622401369735599
-0.2578657674603164
-0.5378834479488432
-0.9926298237405717
-0.4013678622432053
-0.510077933780849
-0.018817965406924486
-0.21481098141521215
-0.5357040031813085
-0.8512061606161296
-0.009026535786688328
-0.27302876580506563
-0.21162108704447746
-0.5273029855452478
-0.1086404686793685
-0.14079083362594247
-0.14331109775230289
-0.8190496540628374
-0.3947252375073731
-0.28109811525791883
-0.4066850380040705
-0.9154577874578536
-0.8929708409123123
-0.13500721845775843
-0.6328344400972128
-0.5668322211131454
-0.5448646773584187
-0.5418433886952698
-0.1141617177054286
-0.15885689994320273
-0.3867143443785608
-0.5574855520389974
-0.9173167692497373
-0.22908265376463532
-0.2047420055605471
-0.05979115655645728
-0.44121386017650366
-0.9507057839073241
-0.15352962678298354
-0.23290937673300505
-0.46427791472524405
-8.519855327904224E-4
-0.7947354763746262
-0.6385304923169315
-0.8696001935750246
-0.6022149357013404
-0.02299323584884405
-0.5036068987101316
-0.7541037476621568
-0.9995524706318974
-0.5888469088822603
-0.3318097642622888
-0.32492663664743304
-0.6643895329907537
-0.3656829949468374
-0.4912424306385219
-0.1900841724127531
-0.5945985522121191
-0.5709856003522873
-0.35780346347019076
-0.388774358201772
-0.9446004652418196
-0.14594348100945354
-0.6250799335539341
-0.5504232128150761
-0.16380576323717833
-0.7428167965263128
-0.5522975320927799
-0.655389194842428
-0.47579632699489594
-0.29743909696117043
-0.6319712968543172
-0.8178138644434512
-0.2785301594994962
-0.46813122322782874
-0.2898342702537775
-0.3287009159103036
-0.12909299414604902
-0.5859099281951785
-0.1891166502609849
-0.14497734932228923
-0.5543341124430299
-0.11846801871433854
-0.8499364419840276
-0.6603211951442063
-0.35630465345457196
-0.9680569358170033
-0.6639338186942041
-0.24408268369734287
-0.030771974939852953
-0.17226932244375348
-0.7909302446059883
-0.4327161009423435
-0.6732332338578999
-0.0849734228104353
-0.7278832173906267
-0.5536605608649552
-0.7091806619428098
-0.01754110073670745
-0.8406045655719936
-0.4815619965083897
-0.0535086034797132
-0.9874794147908688
-0.07097038673236966
-0.023544831201434135
-0.42413365049287677
-0.2970325672067702
-0.48028060607612133
-0.1990663455799222
-0.6099434774369001
-0.5050413520075381
-0.7814605687744915
-0.2650358658283949
-0.5148864723742008
-0.7807142282836139
-0.0976667134091258
-0.1516015767119825
-0.6566055505536497
-0.3946392172947526
-0.8052488421089947
-0.2964451564475894
-0.07394864456728101
-0.6961450576782227
-0.01576960226520896
-0.3434433783404529
-0.08799878368154168
-0.785557022318244
-0.7494717631489038
-0.45548726338893175
-0.7672475459985435
-0.5134695749729872
-0.7000438082031906
-0.49818582693114877
-0.4293400440365076
-0.9961911663413048
-0.016769078094512224
-0.013044610153883696
-0.8661804771982133
-0.7819683295674622
-0.33438047766685486
-0.966121535282582
-0.7259743176400661
-0.9887824659235775
-0.9494950002990663
-0.037431647535413504
-0.8268285538069904
-0.7355263698846102
-0.3120658891275525
-0.3588241692632437
-0.471130283549428
-0.7047113911248744
-0.980073744431138
-0.6762627908028662
-0.869295812677592
-0.9070576094090939
-0.7852784115821123
-0.16342713963240385
-0.06330870278179646
-0.6165989111177623
-0.342802997212857
-0.8414176292717457
-0.6921333004720509
-0.2594374935142696
-0.4386491202749312
-0.555369642097503
-0.3660965468734503
-0.6484139142557979
-0.9005299550481141
-0.25335891311988235
-0.23852926725521684
-0.9044205779209733
-0.8694673446007073
-0.46783560374751687
-0.34727911837399006
-0.19556640228256583
-0.8798208390362561
-0.3131108647212386
-0.6312824171036482
-0.5722001581452787
-0.9441223978064954
-0.7707183314487338
-0.17464511329308152
-0.08897313429042697
-0.5044040409848094
-0.5735817537643015
-0.4467783076688647
-0.19051036844030023
-0.4578995378687978
-0.6395204453729093
-0.460110604763031
-0.576092894654721
-0.7038368303328753
-0.5555814192630351
-0.4171535111963749
-0.8905360852368176
-0.12811446748673916
-0.6814800254069269
-0.8502416326664388
-0.12028768053278327
-0.16715052351355553
-0.3563938206061721
-0.049810963682830334
-0.27328392397612333
-0.2407418810762465
-0.6631906591355801
-0.674483266659081
-0.10489491606131196
-0.04698043642565608
-0.0812066881917417
-0.312124056275934
-0.6798701109364629
-0.7286937129683793
-0.9784366562962532
-0.5650205011479557
-0.833059043623507
-0.8976074242964387
-0.9441233519464731
-0.6146679543890059
-0.9019614770077169
-0.5529476394876838
-0.7665416682139039
-0.39598167687654495
-0.26307358546182513
-0.14862705068662763
-0.9521124185994267
-0.17644333699718118
-0.7684473628178239
-0.4274347145110369
-0.6102834036573768
-0.9328651092946529
-0.058630190789699554
-0.04729347629472613
-0.9597438890486956
-0.6761234584264457
-0.21832499839365482
-0.20707347383722663
-0.7274158899672329
-0.9477886455133557
-0.7821800266392529
-0.07305240212008357
-0.40399201214313507
-0.22684293938800693
-0.053185423370450735
-0.330069282092154
-0.6862794999033213
-0.7821815954521298
-0.22617859859019518
-0.8118352359160781
-0.015444065444171429
-0.6732339109294116
-0.9980663135647774
-0.8833195753395557
-0.21191661106422544
-0.32638366147875786
-0.5747208022512496
-0.07515769777819514
-0.02952938713133335
-0.4980746121145785
-0.8762881984002888
-0.17386484891176224
-0.10696181375533342
-0.5474299816414714
-0.016154434997588396
-0.6960771018639207
-0.47133891424164176
-0.9015861176885664
-0.782880718819797
-0.6602211343124509
-0.6578835439868271
-0.6049443730153143
-0.17169494135305285
-0.9915955001488328
-0.10519243823364377
-0.37815978936851025
-0.20879409136250615
-0.45666090911254287
-0.6456936108879745
-0.684759714640677
-0.8762755445204675
-0.8020628895610571
-0.1663151141256094
-0.31246642768383026
-0.18852565623819828
diff --git a/tests/misc/test_pkl_utils.py b/tests/misc/test_pkl_utils.py
index e426112877..5594781c0e 100644
--- a/tests/misc/test_pkl_utils.py
+++ b/tests/misc/test_pkl_utils.py
@@ -6,7 +6,6 @@
 
 import pytensor
 from pytensor.misc.pkl_utils import StripPickler, dump, load
-from pytensor.sandbox.rng_mrg import MRG_RandomStream
 from pytensor.tensor.type import matrix
 
 
@@ -23,17 +22,6 @@ def teardown_method(self):
         if self.tmpdir is not None:
             shutil.rmtree(self.tmpdir)
 
-    def test_dump_load_mrg(self):
-        rng = MRG_RandomStream()
-
-        with open("test", "wb") as f:
-            dump(rng, f)
-
-        with open("test", "rb") as f:
-            rng = load(f)
-
-        assert type(rng) == MRG_RandomStream
-
     def test_dump_zip_names(self):
         foo_1 = pytensor.shared(0, name="foo")
         foo_2 = pytensor.shared(1, name="foo")
diff --git a/tests/sandbox/test_multinomial.py b/tests/sandbox/test_multinomial.py
deleted file mode 100644
index a0c4f6ee45..0000000000
--- a/tests/sandbox/test_multinomial.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import numpy as np
-
-import tests.unittest_tools as utt
-from pytensor import function
-from pytensor.configdefaults import config
-from pytensor.sandbox.multinomial import MultinomialFromUniform
-from pytensor.tensor.type import dmatrix, dvector, fmatrix, fvector, iscalar
-
-
-def test_n_samples_1():
-    p = fmatrix()
-    u = fvector()
-    n = iscalar()
-    m = MultinomialFromUniform("auto")(p, u, n)
-
-    f = function([p, u, n], m, allow_input_downcast=True)
-
-    rng = np.random.default_rng(12345)
-    for i in [1, 5, 10, 100, 1000, 10000]:
-        uni = rng.random(2 * i).astype(config.floatX)
-        res = f([[1.0, 0.0], [0.0, 1.0]], uni, i)
-        utt.assert_allclose(res, [[i * 1.0, 0.0], [0.0, i * 1.0]])
-
-
-def test_n_samples_2():
-    p = fmatrix()
-    u = fvector()
-    n = iscalar()
-    m = MultinomialFromUniform("auto")(p, u, n)
-
-    f = function([p, u, n], m, allow_input_downcast=True)
-
-    rng = np.random.default_rng(12345)
-
-    for i in [1, 5, 10, 100, 1000]:
-        uni = rng.random(i).astype(config.floatX)
-        pvals = rng.integers(1, 1000, (1, 1000)).astype(config.floatX)
-        pvals /= pvals.sum(1)
-        res = f(pvals, uni, i)
-        assert res.sum() == i
-
-    for i in [1, 5, 10, 100, 1000]:
-        uni = rng.random(i).astype(config.floatX)
-        pvals = rng.integers(1, 1000000, (1, 1000000)).astype(config.floatX)
-        pvals /= pvals.sum(1)
-        res = f(pvals, uni, i)
-        assert res.sum() == i
-
-
-def test_multinomial_0():
-    # This tests the MultinomialFromUniform Op directly, not going through the
-    # multinomial() call in GPU random generation.
-
-    p = fmatrix()
-    u = fvector()
-
-    m = MultinomialFromUniform("auto")(p, u)
-
-    # the m*2 allows the multinomial to reuse output
-    f = function([p, u], m * 2, allow_input_downcast=True)
-
-    # test that both first and second samples can be drawn
-    utt.assert_allclose(f([[1, 0], [0, 1]], [0.1, 0.1]), [[2, 0], [0, 2]])
-
-    # test that both second labels can be drawn
-    r = f([[0.2, 0.8], [0.3, 0.7]], [0.31, 0.31])
-    utt.assert_allclose(r, [[0, 2], [0, 2]])
-
-    # test that both first labels can be drawn
-    r = f([[0.2, 0.8], [0.3, 0.7]], [0.21, 0.21])
-    utt.assert_allclose(r, [[0, 2], [2, 0]])
-
-    # change the size to make sure output gets reallocated ok
-    # and also make sure that the GPU version doesn't screw up the
-    # transposed-ness
-    r = f([[0.2, 0.8]], [0.25])
-    utt.assert_allclose(r, [[0, 2]])
-
-
-# TODO: check a bigger example (make sure blocking on GPU is handled correctly)
-def test_multinomial_large():
-    p = fmatrix()
-    u = fvector()
-    m = MultinomialFromUniform("auto")(p, u)
-    f = function([p, u], m * 2, allow_input_downcast=True)
-
-    pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1
-    pval = pval / pval.sum(axis=1)[:, None]
-    uval = np.ones_like(pval[:, 0]) * 0.5
-    mval = f(pval, uval)
-
-    assert mval.shape == pval.shape
-    if config.cast_policy == "custom":
-        assert mval.dtype == pval.dtype
-    elif config.cast_policy == "numpy+floatX":
-        assert mval.dtype == config.floatX
-    elif config.cast_policy == "numpy":
-        assert mval.dtype == "float64"
-    else:
-        raise NotImplementedError(config.cast_policy)
-    utt.assert_allclose(mval.sum(axis=1), 2)
-    asdf = np.asarray([0, 0, 2, 0]) + 0 * pval
-    utt.assert_allclose(mval, asdf)  # broadcast over all rows
-
-
-def test_multinomial_dtypes():
-    p = dmatrix()
-    u = dvector()
-    m = MultinomialFromUniform("auto")(p, u)
-    assert m.dtype == "float64", m.dtype
-
-    p = fmatrix()
-    u = fvector()
-    m = MultinomialFromUniform("auto")(p, u)
-    assert m.dtype == "float32", m.dtype
-
-    p = fmatrix()
-    u = fvector()
-    m = MultinomialFromUniform("float64")(p, u)
-    assert m.dtype == "float64", m.dtype
diff --git a/tests/sandbox/test_multinomial_wo_replacement.py b/tests/sandbox/test_multinomial_wo_replacement.py
deleted file mode 100644
index 731e94f21a..0000000000
--- a/tests/sandbox/test_multinomial_wo_replacement.py
+++ /dev/null
@@ -1,223 +0,0 @@
-import numpy as np
-import pytest
-
-from pytensor import function
-from pytensor.configdefaults import config
-from pytensor.sandbox import multinomial
-from pytensor.sandbox.rng_mrg import MRG_RandomStream as RandomStream
-from pytensor.tensor.type import fmatrix, fvector, iscalar
-
-
-class TestOP:
-    @pytest.mark.xfail(
-        reason="This test is designed around very specific random draws from the old NumPy API"
-    )
-    def test_select_distinct(self):
-        # Tests that ChoiceFromUniform always selects distinct elements
-
-        p = fmatrix()
-        u = fvector()
-        n = iscalar()
-        m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n)
-
-        f = function([p, u, n], m, allow_input_downcast=True)
-
-        n_elements = 1000
-        all_indices = range(n_elements)
-
-        rng = np.random.default_rng(12345)
-
-        expected = [
-            np.asarray([[931, 318, 185, 209, 559]]),
-            np.asarray([[477, 887, 2, 717, 333, 665, 159, 559, 348, 136]]),
-            np.asarray(
-                [
-                    [
-                        546,
-                        28,
-                        79,
-                        665,
-                        295,
-                        779,
-                        433,
-                        531,
-                        411,
-                        716,
-                        244,
-                        234,
-                        70,
-                        88,
-                        612,
-                        639,
-                        383,
-                        335,
-                        451,
-                        100,
-                        175,
-                        492,
-                        848,
-                        771,
-                        559,
-                        214,
-                        568,
-                        596,
-                        370,
-                        486,
-                        855,
-                        925,
-                        138,
-                        300,
-                        528,
-                        507,
-                        730,
-                        199,
-                        882,
-                        357,
-                        58,
-                        195,
-                        705,
-                        900,
-                        66,
-                        468,
-                        513,
-                        410,
-                        816,
-                        672,
-                    ]
-                ]
-            ),
-        ]
-
-        for i in [5, 10, 50, 100, 500, n_elements]:
-            uni = rng.random(i).astype(config.floatX)
-            pvals = rng.integers(1, 100, (1, n_elements)).astype(config.floatX)
-            pvals /= pvals.sum(1)
-            res = f(pvals, uni, i)
-            for ii in range(len(expected)):
-                if expected[ii].shape == res.shape:
-                    assert (expected[ii] == res).all()
-            res = np.squeeze(res)
-            assert len(res) == i
-            assert np.all(np.in1d(np.unique(res), all_indices)), res
-
-    def test_fail_select_alot(self):
-        # Tests that ChoiceFromUniform fails when asked to sample more
-        # elements than the actual number of elements
-
-        p = fmatrix()
-        u = fvector()
-        n = iscalar()
-        m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n)
-
-        f = function([p, u, n], m, allow_input_downcast=True)
-
-        n_elements = 100
-        n_selected = 200
-        rng = np.random.default_rng(12345)
-        uni = rng.random(n_selected).astype(config.floatX)
-        pvals = rng.integers(1, 100, (1, n_elements)).astype(config.floatX)
-        pvals /= pvals.sum(1)
-        with pytest.raises(ValueError):
-            f(pvals, uni, n_selected)
-
-    def test_select_proportional_to_weight(self):
-        # Tests that ChoiceFromUniform selects elements, on average,
-        # proportional to the their probabilities
-
-        p = fmatrix()
-        u = fvector()
-        n = iscalar()
-        m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n)
-
-        f = function([p, u, n], m, allow_input_downcast=True)
-
-        n_elements = 100
-        n_selected = 10
-        mean_rtol = 0.0005
-        rng = np.random.default_rng(12345)
-        pvals = rng.integers(1, 100, (1, n_elements)).astype(config.floatX)
-        pvals /= pvals.sum(1)
-        avg_pvals = np.zeros((n_elements,), dtype=config.floatX)
-
-        for rep in range(10000):
-            uni = rng.random(n_selected).astype(config.floatX)
-            res = f(pvals, uni, n_selected)
-            res = np.squeeze(res)
-            avg_pvals[res] += 1
-        avg_pvals /= avg_pvals.sum()
-        avg_diff = np.mean(abs(avg_pvals - pvals))
-        assert avg_diff < mean_rtol, avg_diff
-
-
-class TestFunction:
-    def test_select_distinct(self):
-        # Tests that multinomial_wo_replacement always selects distinct elements
-
-        th_rng = RandomStream(12345)
-
-        p = fmatrix()
-        n = iscalar()
-        with pytest.deprecated_call():
-            m = th_rng.multinomial_wo_replacement(pvals=p, n=n)
-
-        f = function([p, n], m, allow_input_downcast=True)
-
-        n_elements = 1000
-        all_indices = range(n_elements)
-        rng = np.random.default_rng(12345)
-        for i in [5, 10, 50, 100, 500, n_elements]:
-            pvals = rng.integers(1, 100, (1, n_elements)).astype(config.floatX)
-            pvals /= pvals.sum(1)
-            res = f(pvals, i)
-            res = np.squeeze(res)
-            assert len(res) == i
-            assert np.all(np.in1d(np.unique(res), all_indices)), res
-
-    def test_fail_select_alot(self):
-        # Tests that multinomial_wo_replacement fails when asked to sample more
-        # elements than the actual number of elements
-
-        th_rng = RandomStream(12345)
-
-        p = fmatrix()
-        n = iscalar()
-        with pytest.deprecated_call():
-            m = th_rng.multinomial_wo_replacement(pvals=p, n=n)
-
-        f = function([p, n], m, allow_input_downcast=True)
-
-        n_elements = 100
-        n_selected = 200
-        rng = np.random.default_rng(12345)
-        pvals = rng.integers(1, 100, (1, n_elements)).astype(config.floatX)
-        pvals /= pvals.sum(1)
-        with pytest.raises(ValueError):
-            f(pvals, n_selected)
-
-    def test_select_proportional_to_weight(self):
-        # Tests that multinomial_wo_replacement selects elements, on average,
-        # proportional to the their probabilities
-
-        th_rng = RandomStream(12345)
-
-        p = fmatrix()
-        n = iscalar()
-        m = th_rng.choice(size=n, p=p, replace=False)
-
-        f = function([p, n], m, allow_input_downcast=True)
-
-        n_elements = 100
-        n_selected = 10
-        mean_rtol = 0.0005
-        rng = np.random.default_rng(12345)
-        pvals = rng.integers(1, 100, (1, n_elements)).astype(config.floatX)
-        pvals /= pvals.sum(1)
-        avg_pvals = np.zeros((n_elements,), dtype=config.floatX)
-
-        for rep in range(10000):
-            res = f(pvals, n_selected)
-            res = np.squeeze(res)
-            avg_pvals[res] += 1
-        avg_pvals /= avg_pvals.sum()
-        avg_diff = np.mean(abs(avg_pvals - pvals))
-        assert avg_diff < mean_rtol
diff --git a/tests/sandbox/test_rng_mrg.py b/tests/sandbox/test_rng_mrg.py
deleted file mode 100644
index 3e41bb0401..0000000000
--- a/tests/sandbox/test_rng_mrg.py
+++ /dev/null
@@ -1,1144 +0,0 @@
-import contextlib
-import os
-import sys
-import time
-
-import numpy as np
-import pytest
-
-import pytensor
-from pytensor.compile.function import function
-from pytensor.compile.sharedvalue import shared
-from pytensor.configdefaults import config
-from pytensor.gradient import NullTypeGradError, UndefinedGrad, grad, zero_grad
-from pytensor.sandbox import rng_mrg
-from pytensor.sandbox.rng_mrg import MRG_RandomStream, mrg_uniform
-from pytensor.scan.basic import scan
-from pytensor.tensor.basic import as_tensor_variable, cast
-from pytensor.tensor.math import sum as at_sum
-from pytensor.tensor.random.utils import RandomStream
-from pytensor.tensor.type import iscalar, ivector, lmatrix, matrix, scalar, vector
-from tests import unittest_tools as utt
-
-
-# TODO: test MRG_RandomStream
-# Partly done in test_consistency_randomstreams
-
-# TODO: test optimizer mrg_random_make_inplace
-
-
-# Results generated by Java code using L'Ecuyer et al.'s code, with:
-# main seed: [12345]*6 (default)
-# 12 streams
-# 7 substreams for each stream
-# 5 samples drawn from each substream
-java_samples = np.loadtxt(
-    os.path.join(
-        os.path.split(pytensor.__file__)[0], "sandbox", "samples_MRG31k3p_12_7_5.txt"
-    )
-)
-
-
-def test_deterministic():
-    seed = utt.fetch_seed()
-    sample_size = (10, 20)
-
-    R = MRG_RandomStream(seed=seed)
-    u = R.uniform(size=sample_size)
-    f = function([], u)
-
-    fsample1 = f()
-    fsample2 = f()
-    assert not np.allclose(fsample1, fsample2)
-
-    R2 = MRG_RandomStream(seed=seed)
-    u2 = R2.uniform(size=sample_size)
-    g = function([], u2)
-    gsample1 = g()
-    gsample2 = g()
-    assert np.allclose(fsample1, gsample1)
-    assert np.allclose(fsample2, gsample2)
-
-
-def test_consistency_randomstreams():
-    # Verify that the random numbers generated by MRG_RandomStream
-    # are the same as the reference (Java) implementation by L'Ecuyer et al.
-    seed = 12345
-    n_samples = 5
-    n_streams = 12
-    n_substreams = 7
-
-    samples = []
-    rng = MRG_RandomStream(seed=seed)
-    for i in range(n_streams):
-        stream_samples = []
-        u = rng.uniform(size=(n_substreams,), nstreams=n_substreams)
-        f = function([], u)
-        for j in range(n_samples):
-            s = f()
-            stream_samples.append(s)
-        stream_samples = np.array(stream_samples)
-        stream_samples = stream_samples.T.flatten()
-        samples.append(stream_samples)
-
-    samples = np.array(samples).flatten()
-    assert np.allclose(samples, java_samples)
-
-
-def test_get_substream_rstates():
-    with config.change_flags(compute_test_value="raise"):
-        n_streams = 100
-
-        dtype = "float32"
-        rng = MRG_RandomStream(
-            np.random.default_rng(utt.fetch_seed()).integers(2147462579)
-        )
-
-        rng.get_substream_rstates(n_streams, dtype)
-
-
-def test_consistency_cpu_serial():
-    # Verify that the random numbers generated by mrg_uniform, serially,
-    # are the same as the reference (Java) implementation by L'Ecuyer et al.
-
-    seed = 12345
-    n_samples = 5
-    n_streams = 12
-    n_substreams = 7
-
-    samples = []
-    curr_rstate = np.array([seed] * 6, dtype="int32")
-
-    for i in range(n_streams):
-        stream_rstate = curr_rstate.copy()
-        for j in range(n_substreams):
-            rstate = shared(np.array([stream_rstate.copy()], dtype="int32"))
-            new_rstate, sample = rng_mrg.mrg_uniform.new(
-                rstate, ndim=None, dtype=config.floatX, size=(1,)
-            )
-            # Not really necessary, just mimicking
-            # rng_mrg.MRG_RandomStream' behavior
-            sample.rstate = rstate
-            sample.update = (rstate, new_rstate)
-
-            rstate.default_update = new_rstate
-            f = function([], sample)
-            for k in range(n_samples):
-                s = f()
-                samples.append(s)
-
-            # next substream
-            stream_rstate = rng_mrg.ff_2p72(stream_rstate)
-
-        # next stream
-        curr_rstate = rng_mrg.ff_2p134(curr_rstate)
-
-    samples = np.array(samples).flatten()
-    assert np.allclose(samples, java_samples)
-
-
-def test_consistency_cpu_parallel():
-    # Verify that the random numbers generated by mrg_uniform, in parallel,
-    # are the same as the reference (Java) implementation by L'Ecuyer et al.
-
-    seed = 12345
-    n_samples = 5
-    n_streams = 12
-    n_substreams = 7  # 7 samples will be drawn in parallel
-
-    samples = []
-    curr_rstate = np.array([seed] * 6, dtype="int32")
-
-    for i in range(n_streams):
-        stream_samples = []
-        rstate = [curr_rstate.copy()]
-        for j in range(1, n_substreams):
-            rstate.append(rng_mrg.ff_2p72(rstate[-1]))
-        rstate = np.asarray(rstate)
-        rstate = shared(rstate)
-
-        new_rstate, sample = rng_mrg.mrg_uniform.new(
-            rstate, ndim=None, dtype=config.floatX, size=(n_substreams,)
-        )
-        # Not really necessary, just mimicking
-        # rng_mrg.MRG_RandomStream' behavior
-        sample.rstate = rstate
-        sample.update = (rstate, new_rstate)
-
-        rstate.default_update = new_rstate
-        f = function([], sample)
-
-        for k in range(n_samples):
-            s = f()
-            stream_samples.append(s)
-
-        samples.append(np.array(stream_samples).T.flatten())
-
-        # next stream
-        curr_rstate = rng_mrg.ff_2p134(curr_rstate)
-
-    samples = np.array(samples).flatten()
-    assert np.allclose(samples, java_samples)
-
-
-def check_basics(
-    f,
-    steps,
-    sample_size,
-    prefix="",
-    allow_01=False,
-    inputs=None,
-    target_avg=0.5,
-    target_std=None,
-    mean_rtol=0.01,
-    std_tol=0.01,
-):
-    if inputs is None:
-        inputs = []
-    dt = 0.0
-    avg_var = 0.0
-
-    for i in range(steps):
-        t0 = time.perf_counter()
-        ival = f(*inputs)
-        assert ival.shape == sample_size
-        dt += time.perf_counter() - t0
-        ival = np.asarray(ival)
-        if i == 0:
-            mean = np.array(ival, copy=True)
-            avg_var = np.mean((ival - target_avg) ** 2)
-            min_ = ival.min()
-            max_ = ival.max()
-        else:
-            alpha = 1.0 / (1 + i)
-            mean = alpha * ival + (1 - alpha) * mean
-            avg_var = alpha * np.mean((ival - target_avg) ** 2) + (1 - alpha) * avg_var
-            min_ = min(min_, ival.min())
-            max_ = max(max_, ival.max())
-        if not allow_01:
-            assert min_ > 0
-            assert max_ < 1
-
-    if hasattr(target_avg, "shape"):  # looks if target_avg is an array
-        diff = np.mean(abs(mean - target_avg))
-        # print prefix, 'mean diff with mean', diff
-        assert np.all(
-            diff < mean_rtol * (1 + abs(target_avg))
-        ), f"bad mean? {mean} {target_avg}"
-    else:
-        # if target_avg is a scalar, then we can do the mean of
-        # `mean` to get something more precise
-        mean = np.mean(mean)
-        # print prefix, 'mean', mean
-        assert abs(mean - target_avg) < mean_rtol * (
-            1 + abs(target_avg)
-        ), f"bad mean? {mean:f} {target_avg:f}"
-
-    std = np.sqrt(avg_var)
-    # print prefix, 'var', avg_var
-    # print prefix, 'std', std
-    if target_std is not None:
-        assert abs(std - target_std) < std_tol * (
-            1 + abs(target_std)
-        ), f"bad std? {std:f} {target_std:f} {std_tol:f}"
-    # print prefix, 'time', dt
-    # print prefix, 'elements', steps * sample_size[0] * sample_size[1]
-    # print prefix, 'samples/sec', steps * sample_size[0] * sample_size[1] / dt
-    # print prefix, 'min', min_, 'max', max_
-
-
-@pytest.mark.slow
-def test_uniform():
-    # TODO: test param low, high
-    # TODO: test size=None
-    # TODO: test ndim!=size.ndim
-    # TODO: test bad seed
-    # TODO: test size=Var, with shape that change from call to call
-    if (
-        config.mode in ("DEBUG_MODE", "DebugMode", "FAST_COMPILE")
-        or config.mode == "Mode"
-        and config.linker in ["py"]
-    ):
-        sample_size = (10, 100)
-        steps = 50
-    else:
-        sample_size = (500, 50)
-        steps = int(1e3)
-
-    x = matrix()
-    for size, const_size, var_input, input in [
-        (sample_size, sample_size, [], []),
-        (x.shape, sample_size, [x], [np.zeros(sample_size, dtype=config.floatX)]),
-        (
-            (x.shape[0], sample_size[1]),
-            sample_size,
-            [x],
-            [np.zeros(sample_size, dtype=config.floatX)],
-        ),
-        # test empty size (scalar)
-        ((), (), [], []),
-    ]:
-
-        # TEST CPU IMPLEMENTATION
-        # The python and C implementation are tested with DebugMode
-        x = matrix()
-        R = MRG_RandomStream(234)
-        # Note: we specify `nstreams` to avoid a warning.
-        # TODO Look for all occurrences of `guess_n_streams` and `30 * 256`
-        # for such situations: it would be better to instead filter the
-        # warning using the warning module.
-        u = R.uniform(size=size, nstreams=rng_mrg.guess_n_streams(size, warn=False))
-        f = function(var_input, u)
-        assert any(
-            isinstance(node.op, mrg_uniform) for node in f.maker.fgraph.toposort()
-        )
-        f(*input)
-
-        # Increase the number of steps if sizes implies only a few samples
-        if np.prod(const_size) < 10:
-            steps_ = steps * 100
-        else:
-            steps_ = steps
-        check_basics(f, steps_, const_size, prefix="mrg cpu", inputs=input)
-
-        RR = RandomStream(234)
-
-        uu = RR.uniform(size=size)
-        ff = function(var_input, uu)
-        # It's not our problem if numpy generates 0 or 1
-        check_basics(
-            ff, steps_, const_size, prefix="numpy", allow_01=True, inputs=input
-        )
-
-
-def test_broadcastable():
-    R = MRG_RandomStream(234)
-    x = matrix()
-    size1 = (10, 1)
-    size2 = (x.shape[0], 1)
-    pvals_1 = np.random.uniform(0, 1, size=size1)
-    pvals_1 = pvals_1 / sum(pvals_1)
-    pvals_2 = R.uniform(size=size2)
-    pvals_2 = pvals_2 / at_sum(pvals_2)
-
-    for distribution in [
-        R.uniform,
-        R.normal,
-        R.truncated_normal,
-        R.binomial,
-        R.multinomial,
-        R.multinomial_wo_replacement,
-    ]:
-        # multinomial or multinomial_wo_replacement does not support "size" argument,
-        # the sizes of them are implicitly defined with "pvals" argument.
-        if distribution in [R.multinomial, R.multinomial_wo_replacement]:
-            # check when all dimensions are constant
-            context_mgr = (
-                pytest.deprecated_call()
-                if distribution == R.multinomial_wo_replacement
-                else contextlib.suppress()
-            )
-
-            with context_mgr:
-                uu = distribution(pvals=pvals_1)
-                assert uu.broadcastable == (False, True)
-
-            # check when some dimensions are pytensor variables
-            with context_mgr:
-                uu = distribution(pvals=pvals_2)
-                assert uu.broadcastable == (False, True)
-        else:
-            # check when all dimensions are constant
-            uu = distribution(size=size1)
-            assert uu.broadcastable == (False, True)
-
-            # check when some dimensions are pytensor variables
-            uu = distribution(size=size2)
-            assert uu.broadcastable == (False, True)
-
-
-def check_binomial(mean, size, const_size, var_input, input, steps, rtol):
-    R = MRG_RandomStream(234)
-    u = R.binomial(size=size, p=mean)
-    f = function(var_input, u)
-    f(*input)
-
-    # Increase the number of steps if sizes implies only a few samples
-    if np.prod(const_size) < 10:
-        steps_ = steps * 100
-    else:
-        steps_ = steps
-    check_basics(
-        f,
-        steps_,
-        const_size,
-        prefix="mrg  cpu",
-        inputs=input,
-        allow_01=True,
-        target_avg=mean,
-        mean_rtol=rtol,
-    )
-
-    RR = RandomStream(234)
-
-    uu = RR.binomial(1, mean, size=size)
-    ff = function(var_input, uu)
-    # It's not our problem if numpy generates 0 or 1
-    check_basics(
-        ff,
-        steps_,
-        const_size,
-        prefix="numpy",
-        allow_01=True,
-        inputs=input,
-        target_avg=mean,
-        mean_rtol=rtol,
-    )
-
-
-@pytest.mark.slow
-def test_binomial():
-    # TODO: test size=None, ndim=X
-    # TODO: test size=X, ndim!=X.ndim
-    # TODO: test random seed in legal value(!=0 and other)
-    # TODO: test sample_size not a multiple of guessed #streams
-    # TODO: test size=Var, with shape that change from call to call
-    # we test size in a tuple of int and a tensor.shape.
-    # we test the param p with int.
-
-    if (
-        config.mode in ("DEBUG_MODE", "DebugMode", "FAST_COMPILE")
-        or config.mode == "Mode"
-        and config.linker in ["py"]
-    ):
-        sample_size = (10, 50)
-        steps = 50
-        rtol = 0.02
-    else:
-        sample_size = (500, 50)
-        steps = int(1e3)
-        rtol = 0.01
-
-    x = matrix()
-    for mean in [0.1, 0.5]:
-        for size, const_size, var_input, input in [
-            (sample_size, sample_size, [], []),
-            (x.shape, sample_size, [x], [np.zeros(sample_size, dtype=config.floatX)]),
-            # test empty size (scalar)
-            ((), (), [], []),
-        ]:
-            check_binomial(mean, size, const_size, var_input, input, steps, rtol)
-
-
-@pytest.mark.slow
-def test_normal0():
-    steps = 50
-    std = 2.0
-    if (
-        config.mode in ("DEBUG_MODE", "DebugMode", "FAST_COMPILE")
-        or config.mode == "Mode"
-        and config.linker in ["py"]
-    ):
-        sample_size = (25, 30)
-        default_rtol = 0.02
-    else:
-        sample_size = (999, 50)
-        default_rtol = 0.01
-    sample_size_odd = (sample_size[0], sample_size[1] - 1)
-    x = matrix()
-
-    test_cases = [
-        (sample_size, sample_size, [], [], -5.0, default_rtol, default_rtol),
-        (
-            x.shape,
-            sample_size,
-            [x],
-            [np.zeros(sample_size, dtype=config.floatX)],
-            -5.0,
-            default_rtol,
-            default_rtol,
-        ),
-        # test odd value
-        (
-            x.shape,
-            sample_size_odd,
-            [x],
-            [np.zeros(sample_size_odd, dtype=config.floatX)],
-            -5.0,
-            default_rtol,
-            default_rtol,
-        ),
-        (
-            sample_size,
-            sample_size,
-            [],
-            [],
-            np.arange(np.prod(sample_size), dtype="float32").reshape(sample_size),
-            10.0 * std / np.sqrt(steps),
-            default_rtol,
-        ),
-        # test empty size (scalar)
-        ((), (), [], [], -5.0, default_rtol, 0.02),
-        # test with few samples at the same time
-        ((1,), (1,), [], [], -5.0, default_rtol, 0.02),
-        ((3,), (3,), [], [], -5.0, default_rtol, 0.02),
-    ]
-
-    for size, const_size, var_input, input, avg, rtol, std_tol in test_cases:
-        R = MRG_RandomStream(234)
-        # Note: we specify `nstreams` to avoid a warning.
-        n = R.normal(
-            size=size,
-            avg=avg,
-            std=std,
-            nstreams=rng_mrg.guess_n_streams(size, warn=False),
-        )
-        f = function(var_input, n)
-        f(*input)
-
-        # Increase the number of steps if size implies only a few samples
-        if np.prod(const_size) < 10:
-            steps_ = steps * 50
-        else:
-            steps_ = steps
-        check_basics(
-            f,
-            steps_,
-            const_size,
-            target_avg=avg,
-            target_std=std,
-            prefix="mrg ",
-            allow_01=True,
-            inputs=input,
-            mean_rtol=rtol,
-            std_tol=std_tol,
-        )
-
-        sys.stdout.flush()
-
-        RR = RandomStream(235)
-
-        nn = RR.normal(avg, std, size=size)
-        ff = function(var_input, nn)
-
-        check_basics(
-            ff,
-            steps_,
-            const_size,
-            target_avg=avg,
-            target_std=std,
-            prefix="numpy ",
-            allow_01=True,
-            inputs=input,
-            mean_rtol=rtol,
-        )
-
-
-@pytest.mark.slow
-def test_normal_truncation():
-    # just a copy of test_normal0 with extra bound check
-    steps = 50
-    std = 2.0
-    # standard deviation is slightly less than for a regular Gaussian
-    # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
-    target_std = 0.87962566103423978 * std
-
-    if (
-        config.mode in ("DEBUG_MODE", "DebugMode", "FAST_COMPILE")
-        or config.mode == "Mode"
-        and config.linker in ["py"]
-    ):
-        sample_size = (25, 30)
-        default_rtol = 0.02
-    else:
-        sample_size = (999, 50)
-        default_rtol = 0.01
-    sample_size_odd = (sample_size[0], sample_size[1] - 1)
-    x = matrix()
-
-    test_cases = [
-        (sample_size, sample_size, [], [], -5.0, default_rtol, default_rtol),
-        (
-            x.shape,
-            sample_size,
-            [x],
-            [np.zeros(sample_size, dtype=config.floatX)],
-            -5.0,
-            default_rtol,
-            default_rtol,
-        ),
-        # test odd value
-        (
-            x.shape,
-            sample_size_odd,
-            [x],
-            [np.zeros(sample_size_odd, dtype=config.floatX)],
-            -5.0,
-            default_rtol,
-            default_rtol,
-        ),
-        (
-            sample_size,
-            sample_size,
-            [],
-            [],
-            np.arange(np.prod(sample_size), dtype="float32").reshape(sample_size),
-            10.0 * std / np.sqrt(steps),
-            default_rtol,
-        ),
-        # test empty size (scalar)
-        ((), (), [], [], -5.0, default_rtol, 0.02),
-        # test with few samples at the same time
-        ((1,), (1,), [], [], -5.0, default_rtol, 0.02),
-        ((3,), (3,), [], [], -5.0, default_rtol, 0.02),
-    ]
-
-    for size, const_size, var_input, input, avg, rtol, std_tol in test_cases:
-        R = MRG_RandomStream(234)
-        # Note: we specify `nstreams` to avoid a warning.
-        n = R.normal(
-            size=size,
-            avg=avg,
-            std=std,
-            truncate=True,
-            nstreams=rng_mrg.guess_n_streams(size, warn=False),
-        )
-        f = function(var_input, n)
-
-        # check if truncated at 2*std
-        samples = f(*input)
-        assert np.all(avg + 2 * std - samples >= 0), "bad upper bound? {} {}".format(
-            samples,
-            avg + 2 * std,
-        )
-        assert np.all(samples - (avg - 2 * std) >= 0), "bad lower bound? {} {}".format(
-            samples,
-            avg - 2 * std,
-        )
-
-        # Increase the number of steps if size implies only a few samples
-        if np.prod(const_size) < 10:
-            steps_ = steps * 50
-        else:
-            steps_ = steps
-        check_basics(
-            f,
-            steps_,
-            const_size,
-            target_avg=avg,
-            target_std=target_std,
-            prefix="mrg ",
-            allow_01=True,
-            inputs=input,
-            mean_rtol=rtol,
-            std_tol=std_tol,
-        )
-
-        sys.stdout.flush()
-
-
-@pytest.mark.slow
-def test_truncated_normal():
-    # just a copy of test_normal0 for truncated normal
-    steps = 50
-    std = 2.0
-
-    if (
-        config.mode in ("DEBUG_MODE", "DebugMode", "FAST_COMPILE")
-        or config.mode == "Mode"
-        and config.linker in ["py"]
-    ):
-        sample_size = (25, 30)
-        default_rtol = 0.02
-    else:
-        sample_size = (999, 50)
-        default_rtol = 0.01
-    sample_size_odd = (sample_size[0], sample_size[1] - 1)
-    x = matrix()
-
-    test_cases = [
-        (sample_size, sample_size, [], [], -5.0, default_rtol, default_rtol),
-        (
-            x.shape,
-            sample_size,
-            [x],
-            [np.zeros(sample_size, dtype=config.floatX)],
-            -5.0,
-            default_rtol,
-            default_rtol,
-        ),
-        # test odd value
-        (
-            x.shape,
-            sample_size_odd,
-            [x],
-            [np.zeros(sample_size_odd, dtype=config.floatX)],
-            -5.0,
-            default_rtol,
-            default_rtol,
-        ),
-        (
-            sample_size,
-            sample_size,
-            [],
-            [],
-            np.arange(np.prod(sample_size), dtype="float32").reshape(sample_size),
-            10.0 * std / np.sqrt(steps),
-            default_rtol,
-        ),
-        # test empty size (scalar)
-        ((), (), [], [], -5.0, default_rtol, 0.02),
-        # test with few samples at the same time
-        ((1,), (1,), [], [], -5.0, default_rtol, 0.02),
-        ((3,), (3,), [], [], -5.0, default_rtol, 0.02),
-    ]
-
-    for size, const_size, var_input, input, avg, rtol, std_tol in test_cases:
-        R = MRG_RandomStream(234)
-        # Note: we specify `nstreams` to avoid a warning.
-        n = R.truncated_normal(
-            size=size,
-            avg=avg,
-            std=std,
-            nstreams=rng_mrg.guess_n_streams(size, warn=False),
-        )
-        f = function(var_input, n)
-
-        # Increase the number of steps if size implies only a few samples
-        if np.prod(const_size) < 10:
-            steps_ = steps * 60
-        else:
-            steps_ = steps
-        check_basics(
-            f,
-            steps_,
-            const_size,
-            target_avg=avg,
-            target_std=std,
-            prefix="mrg ",
-            allow_01=True,
-            inputs=input,
-            mean_rtol=rtol,
-            std_tol=std_tol,
-        )
-
-        sys.stdout.flush()
-
-
-def basic_multinomialtest(
-    f, steps, sample_size, target_pvals, n_samples, prefix="", mean_rtol=0.04
-):
-
-    dt = 0.0
-    avg_pvals = np.zeros(target_pvals.shape, dtype=config.floatX)
-
-    for i in range(steps):
-        t0 = time.perf_counter()
-        ival = f()
-        assert ival.shape == sample_size
-        assert np.all(np.sum(ival, axis=1) == n_samples)
-        dt += time.perf_counter() - t0
-        avg_pvals += ival
-    avg_pvals /= steps * n_samples
-
-    assert np.mean(abs(avg_pvals - target_pvals)) < mean_rtol
-
-    # print("random?[:10]\n", np.asarray(f()[:10]))
-    # print(prefix, "mean", avg_pvals)
-    # # < mean_rtol, 'bad mean? %s %s' % (str(avg_pvals), str(target_pvals))
-    # print(np.mean(abs(avg_pvals - target_pvals)))
-    # print(prefix, "time", dt)
-    # print(prefix, "elements", steps * np.prod(target_pvals.shape))
-    # print(prefix, "samples/sec", steps * np.prod(target_pvals.shape) / dt)
-
-
-def test_multinomial():
-    steps = 100
-
-    if (
-        config.mode in ("DEBUG_MODE", "DebugMode", "FAST_COMPILE")
-        or config.mode == "Mode"
-        and config.linker in ["py"]
-    ):
-        sample_size = (49, 5)
-    else:
-        sample_size = (450, 6)
-
-    pvals = np.asarray(np.random.uniform(size=sample_size))
-    pvals = np.apply_along_axis(lambda row: row / np.sum(row), 1, pvals)
-    R = MRG_RandomStream(234)
-    # Note: we specify `nstreams` to avoid a warning.
-    m = R.multinomial(pvals=pvals, dtype=config.floatX, nstreams=30 * 256)
-    f = function([], m)
-    f()
-    basic_multinomialtest(f, steps, sample_size, pvals, n_samples=1, prefix="mrg ")
-
-
-def test_multinomial_n_samples():
-    if (
-        config.mode in ("DEBUG_MODE", "DebugMode", "FAST_COMPILE")
-        or config.mode == "Mode"
-        and config.linker in ["py"]
-    ):
-        sample_size = (49, 5)
-    else:
-        sample_size = (450, 6)
-
-    pvals = np.asarray(np.random.uniform(size=sample_size))
-    pvals = np.apply_along_axis(lambda row: row / np.sum(row), 1, pvals)
-    R = MRG_RandomStream(234)
-
-    for n_samples, steps in zip([5, 10, 100, 1000], [20, 10, 1, 1]):
-        m = R.multinomial(
-            pvals=pvals, n=n_samples, dtype=config.floatX, nstreams=30 * 256
-        )
-        f = function([], m)
-        basic_multinomialtest(f, steps, sample_size, pvals, n_samples, prefix="mrg ")
-        sys.stdout.flush()
-
-
-class TestMRG:
-    def test_bad_size(self):
-
-        R = MRG_RandomStream(234)
-
-        for size in [
-            (0, 100),
-            (-1, 100),
-            (1, 0),
-        ]:
-
-            with pytest.raises(ValueError):
-                R.uniform(size)
-            with pytest.raises(ValueError):
-                R.binomial(size)
-            with pytest.raises(ValueError):
-                R.multinomial(size, 1, [])
-            with pytest.raises(ValueError):
-                R.normal(size)
-            with pytest.raises(ValueError):
-                R.truncated_normal(size)
-
-
-def test_multiple_rng_aliasing():
-    # Test that when we have multiple random number generators, we do not alias
-    # the state_updates member. `state_updates` can be useful when attempting to
-    # copy the (random) state between two similar pytensor graphs. The test is
-    # meant to detect a previous bug where state_updates was initialized as a
-    # class-attribute, instead of the __init__ function.
-
-    rng1 = MRG_RandomStream(1234)
-    rng2 = MRG_RandomStream(2392)
-    assert rng1.state_updates is not rng2.state_updates
-
-
-def test_random_state_transfer():
-    # Test that random state can be transferred from one pytensor graph to another.
-
-    class Graph:
-        def __init__(self, seed=123):
-            self.rng = MRG_RandomStream(seed)
-            self.y = self.rng.uniform(size=(1,))
-
-    g1 = Graph(seed=123)
-    f1 = function([], g1.y)
-    g2 = Graph(seed=987)
-    f2 = function([], g2.y)
-
-    g2.rng.rstate = g1.rng.rstate
-    for (su1, su2) in zip(g1.rng.state_updates, g2.rng.state_updates):
-        su2[0].set_value(su1[0].get_value())
-
-    np.testing.assert_array_almost_equal(f1(), f2(), decimal=6)
-
-
-@pytest.mark.parametrize(
-    "mode",
-    [
-        "FAST_RUN",
-        "FAST_COMPILE",
-    ],
-)
-def test_gradient_scan(mode):
-    pytensor_rng = MRG_RandomStream(10)
-    w = shared(np.ones(1, dtype="float32"))
-
-    def one_step(x):
-        return x + pytensor_rng.uniform((1,), dtype="float32") * w
-
-    x = vector(dtype="float32")
-    values, updates = scan(one_step, outputs_info=x, n_steps=10)
-    gw = grad(at_sum(values[-1]), w)
-    f = function([x], gw, mode=mode)
-    assert np.allclose(
-        f(np.arange(1, dtype=np.float32)),
-        np.array([0.13928187], dtype=np.float32),
-        rtol=1e6,
-    )
-
-
-def test_simple_shared_mrg_random():
-    pytensor_rng = MRG_RandomStream(10)
-
-    values, updates = scan(
-        lambda: pytensor_rng.uniform((2,), -1, 1),
-        [],
-        [],
-        [],
-        n_steps=5,
-        truncate_gradient=-1,
-        go_backwards=False,
-    )
-    my_f = function([], values, updates=updates, allow_input_downcast=True)
-
-    # Just check for run-time errors
-    my_f()
-    my_f()
-
-
-def test_multMatVect():
-    A1 = lmatrix("A1")
-    s1 = ivector("s1")
-    m1 = iscalar("m1")
-    A2 = lmatrix("A2")
-    s2 = ivector("s2")
-    m2 = iscalar("m2")
-
-    g0 = rng_mrg.DotModulo()(A1, s1, m1, A2, s2, m2)
-    f0 = function([A1, s1, m1, A2, s2, m2], g0)
-
-    i32max = np.iinfo(np.int32).max
-    rng = np.random.default_rng(utt.fetch_seed())
-    A1 = rng.integers(0, i32max, (3, 3)).astype("int64")
-    s1 = rng.integers(0, i32max, 3).astype("int32")
-    m1 = np.asarray(rng.integers(i32max), dtype="int32")
-    A2 = rng.integers(0, i32max, (3, 3)).astype("int64")
-    s2 = rng.integers(0, i32max, 3).astype("int32")
-    m2 = np.asarray(rng.integers(i32max), dtype="int32")
-
-    f0.input_storage[0].storage[0] = A1
-    f0.input_storage[1].storage[0] = s1
-    f0.input_storage[2].storage[0] = m1
-    f0.input_storage[3].storage[0] = A2
-    f0.input_storage[4].storage[0] = s2
-    f0.input_storage[5].storage[0] = m2
-
-    r_a1 = rng_mrg.matVecModM(A1, s1, m1)
-    r_a2 = rng_mrg.matVecModM(A2, s2, m2)
-    f0.vm()
-    r_b = f0.output_storage[0].value
-
-    assert np.allclose(r_a1, r_b[:3])
-    assert np.allclose(r_a2, r_b[3:])
-
-
-def test_seed_fn():
-    idx = ivector()
-
-    for new_seed, same in [(234, True), (None, True), (23, False)]:
-        random = MRG_RandomStream(234)
-        fn1 = function([], random.uniform((2, 2), dtype="float32"))
-        fn2 = function([], random.uniform((3, 3), nstreams=2, dtype="float32"))
-        fn3 = function([idx], random.uniform(idx, nstreams=3, ndim=1, dtype="float32"))
-
-        fn1_val0 = fn1()
-        fn1_val1 = fn1()
-        assert not np.allclose(fn1_val0, fn1_val1)
-        fn2_val0 = fn2()
-        fn2_val1 = fn2()
-        assert not np.allclose(fn2_val0, fn2_val1)
-        fn3_val0 = fn3([4])
-        fn3_val1 = fn3([4])
-        assert not np.allclose(fn3_val0, fn3_val1)
-        assert fn1_val0.size == 4
-        assert fn2_val0.size == 9
-
-        random.seed(new_seed)
-
-        fn1_val2 = fn1()
-        fn1_val3 = fn1()
-        fn2_val2 = fn2()
-        fn2_val3 = fn2()
-        fn3_val2 = fn3([4])
-        fn3_val3 = fn3([4])
-        assert np.allclose(fn1_val0, fn1_val2) == same
-        assert np.allclose(fn1_val1, fn1_val3) == same
-        assert np.allclose(fn2_val0, fn2_val2) == same
-        assert np.allclose(fn2_val1, fn2_val3) == same
-        assert np.allclose(fn3_val0, fn3_val2) == same
-        assert np.allclose(fn3_val1, fn3_val3) == same
-
-
-def rng_mrg_overflow(sizes, fct, mode, should_raise_error):
-    for size in sizes:
-        y = fct(size=size)
-        f = function([], y, mode=mode)
-        if should_raise_error:
-            with pytest.raises(ValueError):
-                f()
-        else:
-            f()
-
-
-@pytest.mark.slow
-def test_overflow_cpu():
-    # run with PYTENSOR_FLAGS=mode=FAST_RUN,device=cpu,floatX=float32
-    rng = MRG_RandomStream(np.random.default_rng(utt.fetch_seed()).integers(1234))
-    fct = rng.uniform
-    with config.change_flags(compute_test_value="off"):
-        # should raise error as the size overflows
-        sizes = [
-            (2**31,),
-            (2**32,),
-            (
-                2**15,
-                2**16,
-            ),
-            (2, 2**15, 2**15),
-        ]
-        rng_mrg_overflow(sizes, fct, config.mode, should_raise_error=True)
-    # should not raise error
-    sizes = [(2**5,), (2**5, 2**5), (2**5, 2**5, 2**5)]
-    rng_mrg_overflow(sizes, fct, config.mode, should_raise_error=False)
-    # should support int32 sizes
-    sizes = [(np.int32(2**10),), (np.int32(2), np.int32(2**10), np.int32(2**10))]
-    rng_mrg_overflow(sizes, fct, config.mode, should_raise_error=False)
-
-
-def test_undefined_grad():
-    srng = MRG_RandomStream(seed=1234)
-
-    # checking uniform distribution
-    low = scalar()
-    out = srng.uniform((), low=low)
-    with pytest.raises(NullTypeGradError):
-        grad(out, low)
-
-    high = scalar()
-    out = srng.uniform((), low=0, high=high)
-    with pytest.raises(NullTypeGradError):
-        grad(out, high)
-
-    out = srng.uniform((), low=low, high=high)
-    with pytest.raises(NullTypeGradError):
-        grad(out, (low, high))
-
-    # checking binomial distribution
-    prob = scalar()
-    out = srng.binomial((), p=prob)
-    with pytest.raises(NullTypeGradError):
-        grad(out, prob)
-
-    # checking multinomial distribution
-    prob1 = scalar()
-    prob2 = scalar()
-    p = [as_tensor_variable([prob1, 0.5, 0.25])]
-    out = srng.multinomial(size=None, pvals=p, n=4)[0]
-    with pytest.raises(NullTypeGradError):
-        grad(at_sum(out), prob1)
-
-    p = [as_tensor_variable([prob1, prob2])]
-    out = srng.multinomial(size=None, pvals=p, n=4)[0]
-    with pytest.raises(NullTypeGradError):
-        grad(at_sum(out), (prob1, prob2))
-
-    # checking choice
-    p = [as_tensor_variable([prob1, prob2, 0.1, 0.2])]
-    out = srng.choice(a=None, size=1, p=p, replace=False)[0]
-    with pytest.raises(NullTypeGradError):
-        grad(out[0], (prob1, prob2))
-
-    p = [as_tensor_variable([prob1, prob2])]
-    out = srng.choice(a=None, size=1, p=p, replace=False)[0]
-    with pytest.raises(NullTypeGradError):
-        grad(out[0], (prob1, prob2))
-
-    p = [as_tensor_variable([prob1, 0.2, 0.3])]
-    out = srng.choice(a=None, size=1, p=p, replace=False)[0]
-    with pytest.raises(NullTypeGradError):
-        grad(out[0], prob1)
-
-    # checking normal distribution
-    avg = scalar()
-    out = srng.normal((), avg=avg)
-    with pytest.raises(NullTypeGradError):
-        grad(out, avg)
-
-    std = scalar()
-    out = srng.normal((), avg=0, std=std)
-    with pytest.raises(NullTypeGradError):
-        grad(out, std)
-
-    out = srng.normal((), avg=avg, std=std)
-    with pytest.raises(NullTypeGradError):
-        grad(out, (avg, std))
-
-    # checking truncated normal distribution
-    avg = scalar()
-    out = srng.truncated_normal((), avg=avg)
-    with pytest.raises(NullTypeGradError):
-        grad(out, avg)
-
-    std = scalar()
-    out = srng.truncated_normal((), avg=0, std=std)
-    with pytest.raises(NullTypeGradError):
-        grad(out, std)
-
-    out = srng.truncated_normal((), avg=avg, std=std)
-    with pytest.raises(NullTypeGradError):
-        grad(out, (avg, std))
-
-
-def test_f16_nonzero(mode=None, op_to_check=rng_mrg.mrg_uniform):
-    srng = MRG_RandomStream(seed=utt.fetch_seed())
-    m = srng.uniform(size=(1000, 1000), dtype="float16")
-    assert m.dtype == "float16", m.type
-    f = function([], m, mode=mode)
-    assert any(isinstance(n.op, op_to_check) for n in f.maker.fgraph.apply_nodes)
-    m_val = f()
-    assert np.all((0 < m_val) & (m_val < 1))
-
-
-@pytest.mark.slow
-def test_target_parameter():
-    srng = MRG_RandomStream()
-    pvals = np.array([[0.98, 0.01, 0.01], [0.01, 0.49, 0.50]])
-
-    def basic_target_parameter_test(x):
-        f = function([], x)
-        assert isinstance(f(), np.ndarray)
-
-    basic_target_parameter_test(srng.uniform((3, 2), target="cpu"))
-    basic_target_parameter_test(srng.normal((3, 2), target="cpu"))
-    basic_target_parameter_test(srng.truncated_normal((3, 2), target="cpu"))
-    basic_target_parameter_test(srng.binomial((3, 2), target="cpu"))
-    basic_target_parameter_test(
-        srng.multinomial(pvals=pvals.astype("float32"), target="cpu")
-    )
-    basic_target_parameter_test(
-        srng.choice(p=pvals.astype("float32"), replace=False, target="cpu")
-    )
-    with pytest.deprecated_call():
-        basic_target_parameter_test(
-            srng.multinomial_wo_replacement(pvals=pvals.astype("float32"), target="cpu")
-        )
-
-
-@config.change_flags(compute_test_value="off")
-def test_undefined_grad_opt():
-    # Make sure that undefined grad get removed in optimized graph.
-    random = MRG_RandomStream(
-        np.random.default_rng(utt.fetch_seed()).integers(1, 2147462579)
-    )
-    pvals = shared(np.random.random((10, 20)).astype(config.floatX))
-    pvals = pvals / pvals.sum(axis=1)
-    pvals = zero_grad(pvals)
-    samples = random.multinomial(pvals=pvals, n=1)
-    samples = cast(samples, pvals.dtype)
-    samples = zero_grad(samples)
-    cost = at_sum(samples + pvals)
-    grad_out = grad(cost, samples)
-    f = function([], grad_out)
-    assert not any(
-        isinstance(node.op, UndefinedGrad) for node in f.maker.fgraph.apply_nodes
-    )
diff --git a/tests/test_gradient.py b/tests/test_gradient.py
index c102d22e06..a456f58388 100644
--- a/tests/test_gradient.py
+++ b/tests/test_gradient.py
@@ -30,10 +30,10 @@
 from pytensor.graph.basic import Apply, graph_inputs
 from pytensor.graph.null_type import NullType
 from pytensor.graph.op import Op
-from pytensor.sandbox.rng_mrg import MRG_RandomStream
 from pytensor.tensor.math import add, dot, exp, sigmoid, sqr
 from pytensor.tensor.math import sum as at_sum
 from pytensor.tensor.math import tanh
+from pytensor.tensor.random import RandomStream
 from pytensor.tensor.type import (
     discrete_dtypes,
     dmatrix,
@@ -956,13 +956,13 @@ def test_grad_scale():
 @config.change_flags(compute_test_value="off")
 def test_undefined_grad_opt():
     # Make sure that undefined grad get removed in optimized graph.
-    random = MRG_RandomStream(np.random.default_rng().integers(1, 2147462579))
+    random = RandomStream(np.random.default_rng().integers(1, 2147462579))
 
     pvals = pytensor.shared(np.random.random((10, 20)).astype(config.floatX))
     pvals = pvals / pvals.sum(axis=1)
     pvals = zero_grad(pvals)
 
-    samples = random.multinomial(pvals=pvals, n=1)
+    samples = random.multinomial(p=pvals, n=1)
     samples = at.cast(samples, pvals.dtype)
     samples = zero_grad(samples)
 

From 7d173623817ae27adb310834f068b0ee77cc725c Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 00:09:49 +0300
Subject: [PATCH 08/43] remove deprecated pytensor.tensor.nnet

---
 .github/workflows/test.yml                    |    2 +-
 doc/library/tensor/conv.rst                   |   11 +
 doc/library/tensor/index.rst                  |    1 +
 pytensor/scalar/basic_scipy.py                |    9 -
 pytensor/tensor/conv/__init__.py              |   10 +
 .../tensor/{nnet => conv}/abstract_conv.py    |   51 +-
 pytensor/tensor/nnet/__init__.py              |   52 -
 pytensor/tensor/nnet/basic.py                 | 2171 --------------
 pytensor/tensor/nnet/batchnorm.py             |  923 ------
 pytensor/tensor/nnet/blocksparse.py           |  272 --
 pytensor/tensor/nnet/c_code/ctc_wrapper.c     |  251 --
 pytensor/tensor/nnet/conv.py                  | 2639 -----------------
 pytensor/tensor/nnet/conv3d2d.py              |  329 --
 pytensor/tensor/nnet/ctc.py                   |  263 --
 pytensor/tensor/nnet/neighbours.py            |  830 ------
 pytensor/tensor/nnet/opt.py                   |   10 -
 pytensor/tensor/nnet/rewriting.py             |  605 ----
 pytensor/tensor/nnet/sigm.py                  |  176 --
 tests/tensor/{nnet => conv}/__init__.py       |    0
 .../tensor/conv}/c_code/corr3d_gemm.c         |    0
 .../tensor/conv}/c_code/corr_gemm.c           |    0
 .../tensor/conv/c_conv3d_corr3d_ref.py        |    2 +-
 .../tensor/conv/c_conv_corr_ref.py            |    2 +-
 .../{nnet => conv}/test_abstract_conv.py      |  385 +--
 tests/tensor/nnet/speed_test_conv.py          |  451 ---
 tests/tensor/nnet/test_basic.py               | 1222 --------
 tests/tensor/nnet/test_batchnorm.py           |  685 -----
 tests/tensor/nnet/test_blocksparse.py         |  338 ---
 tests/tensor/nnet/test_conv.py                |  784 -----
 tests/tensor/nnet/test_conv3d2d.py            |  237 --
 tests/tensor/nnet/test_corr.py                |  582 ----
 tests/tensor/nnet/test_corr3d.py              |  562 ----
 tests/tensor/nnet/test_ctc.py                 |  187 --
 tests/tensor/nnet/test_neighbours.py          |  661 -----
 tests/tensor/nnet/test_rewriting.py           |   57 -
 tests/tensor/nnet/test_sigm.py                |  139 -
 tests/tensor/test_misc.py                     |   73 -
 37 files changed, 132 insertions(+), 14840 deletions(-)
 create mode 100644 doc/library/tensor/conv.rst
 delete mode 100644 pytensor/scalar/basic_scipy.py
 create mode 100644 pytensor/tensor/conv/__init__.py
 rename pytensor/tensor/{nnet => conv}/abstract_conv.py (98%)
 delete mode 100644 pytensor/tensor/nnet/__init__.py
 delete mode 100644 pytensor/tensor/nnet/basic.py
 delete mode 100644 pytensor/tensor/nnet/batchnorm.py
 delete mode 100644 pytensor/tensor/nnet/blocksparse.py
 delete mode 100644 pytensor/tensor/nnet/c_code/ctc_wrapper.c
 delete mode 100644 pytensor/tensor/nnet/conv.py
 delete mode 100644 pytensor/tensor/nnet/conv3d2d.py
 delete mode 100644 pytensor/tensor/nnet/ctc.py
 delete mode 100644 pytensor/tensor/nnet/neighbours.py
 delete mode 100644 pytensor/tensor/nnet/opt.py
 delete mode 100644 pytensor/tensor/nnet/rewriting.py
 delete mode 100644 pytensor/tensor/nnet/sigm.py
 rename tests/tensor/{nnet => conv}/__init__.py (100%)
 rename {pytensor/tensor/nnet => tests/tensor/conv}/c_code/corr3d_gemm.c (100%)
 rename {pytensor/tensor/nnet => tests/tensor/conv}/c_code/corr_gemm.c (100%)
 rename pytensor/tensor/nnet/corr3d.py => tests/tensor/conv/c_conv3d_corr3d_ref.py (99%)
 rename pytensor/tensor/nnet/corr.py => tests/tensor/conv/c_conv_corr_ref.py (99%)
 rename tests/tensor/{nnet => conv}/test_abstract_conv.py (88%)
 delete mode 100644 tests/tensor/nnet/speed_test_conv.py
 delete mode 100644 tests/tensor/nnet/test_basic.py
 delete mode 100644 tests/tensor/nnet/test_batchnorm.py
 delete mode 100644 tests/tensor/nnet/test_blocksparse.py
 delete mode 100644 tests/tensor/nnet/test_conv.py
 delete mode 100644 tests/tensor/nnet/test_conv3d2d.py
 delete mode 100644 tests/tensor/nnet/test_corr.py
 delete mode 100644 tests/tensor/nnet/test_corr3d.py
 delete mode 100644 tests/tensor/nnet/test_ctc.py
 delete mode 100644 tests/tensor/nnet/test_neighbours.py
 delete mode 100644 tests/tensor/nnet/test_rewriting.py
 delete mode 100644 tests/tensor/nnet/test_sigm.py
 delete mode 100644 tests/tensor/test_misc.py

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 710e1d9577..b91c77c4f8 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -77,7 +77,7 @@ jobs:
           - "tests/tensor tests/sparse --ignore=tests/tensor/test_basic.py --ignore=tests/tensor/test_math.py --ignore=tests/tensor/test_math_scipy.py --ignore=tests/tensor/test_inplace.py --ignore=tests/tensor/test_elemwise.py --ignore=tests/tensor/rewriting/test_basic.py --ignore=tests/tensor/rewriting/test_math.py --ignore=tests/tensor/nnet --ignore=tests/tensor/signal"
           - "tests/tensor/test_basic.py tests/tensor/test_math.py tests/tensor/test_math_scipy.py tests/tensor/test_inplace.py"
           - "tests/tensor/test_elemwise.py tests/tensor/rewriting/test_basic.py tests/tensor/rewriting/test_math.py"
-          - "tests/tensor/nnet/test_conv.py"
+          - "tests/tensor/conv/test_abstract_conv.py"
         include:
           - python-version: "3.7"
             fast-compile: 1
diff --git a/doc/library/tensor/conv.rst b/doc/library/tensor/conv.rst
new file mode 100644
index 0000000000..5c49d3ca92
--- /dev/null
+++ b/doc/library/tensor/conv.rst
@@ -0,0 +1,11 @@
+=========================================
+:mod:`tensor.conv` -- Tensor Convolutions
+=========================================
+
+.. module:: tensor.conv
+   :platform: Unix, Windows
+   :synopsis: Tensor Convolutions
+.. moduleauthor:: LISA, PyMC Developers, PyTensor Developers
+
+.. automodule:: pytensor.tensor.conv
+    :members:
\ No newline at end of file
diff --git a/doc/library/tensor/index.rst b/doc/library/tensor/index.rst
index 2f08f13ee5..dbd7c1c600 100644
--- a/doc/library/tensor/index.rst
+++ b/doc/library/tensor/index.rst
@@ -26,5 +26,6 @@ They are grouped into the following sections:
     slinalg
     nlinalg
     fft
+    conv
     math_opt
     basic_opt
diff --git a/pytensor/scalar/basic_scipy.py b/pytensor/scalar/basic_scipy.py
deleted file mode 100644
index a26371d020..0000000000
--- a/pytensor/scalar/basic_scipy.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import warnings
-
-
-warnings.warn(
-    "The module `pytensor.scalar.basic_scipy` is deprecated "
-    "and has been renamed to `pytensor.scalar.math`",
-    DeprecationWarning,
-    stacklevel=2,
-)
diff --git a/pytensor/tensor/conv/__init__.py b/pytensor/tensor/conv/__init__.py
new file mode 100644
index 0000000000..e84a9fe1d3
--- /dev/null
+++ b/pytensor/tensor/conv/__init__.py
@@ -0,0 +1,10 @@
+from .abstract_conv import (
+    bilinear_upsampling,
+    causal_conv1d,
+    conv2d,
+    conv2d_transpose,
+    conv3d,
+    frac_bilinear_upsampling,
+    separable_conv2d,
+    separable_conv3d,
+)
diff --git a/pytensor/tensor/nnet/abstract_conv.py b/pytensor/tensor/conv/abstract_conv.py
similarity index 98%
rename from pytensor/tensor/nnet/abstract_conv.py
rename to pytensor/tensor/conv/abstract_conv.py
index ded9725c61..f57d7d092e 100644
--- a/pytensor/tensor/nnet/abstract_conv.py
+++ b/pytensor/tensor/conv/abstract_conv.py
@@ -5,14 +5,8 @@
 
 import logging
 import sys
-
-
-try:
-    from math import gcd
-except ImportError:
-    from fractions import gcd
-
 import warnings
+from math import gcd
 
 import numpy as np
 
@@ -35,8 +29,7 @@
 from pytensor.tensor.var import TensorConstant, TensorVariable
 
 
-__docformat__ = "restructuredtext en"
-_logger = logging.getLogger("pytensor.tensor.nnet.abstract_conv")
+_logger = logging.getLogger(__name__)
 
 
 def get_conv_output_shape(
@@ -678,7 +671,7 @@ def abstract_conv2d(
     stack of 2D inputs with a set of 2D filters. The implementation is modelled
     after Convolutional Neural Networks (CNN).
 
-    Refer to :func:`nnet.conv2d <pytensor.tensor.nnet.conv2d>` for a more detailed documentation.
+    Refer to :func:`nnet.conv2d <pytensor.tensor.conv.conv2d>` for a more detailed documentation.
     """
 
     input = as_tensor_variable(input)
@@ -2430,7 +2423,7 @@ def unshared2d(self, inp, kern, out_shape, direction="forward"):
 
 class AbstractConv(BaseAbstractConv):
     """Abstract Op for the forward convolution.
-    Refer to :func:`BaseAbstractConv <pytensor.tensor.nnet.abstract_conv.BaseAbstractConv>`
+    Refer to :func:`BaseAbstractConv <pytensor.tensor.conv.abstract_conv.BaseAbstractConv>`
     for a more detailed documentation.
     """
 
@@ -2646,7 +2639,7 @@ def infer_shape(self, fgraph, node, input_shapes):
 
 class AbstractConv2d(AbstractConv):
     """Abstract Op for the forward convolution.
-    Refer to :func:`BaseAbstractConv <pytensor.tensor.nnet.abstract_conv.BaseAbstractConv>`
+    Refer to :func:`BaseAbstractConv <pytensor.tensor.conv.abstract_conv.BaseAbstractConv>`
     for a more detailed documentation.
     """
 
@@ -2708,7 +2701,7 @@ def grad(self, inp, grads):
 
 class AbstractConv3d(AbstractConv):
     """Abstract Op for the forward convolution.
-    Refer to :func:`BaseAbstractConv <pytensor.tensor.nnet.abstract_conv.BaseAbstractConv>`
+    Refer to :func:`BaseAbstractConv <pytensor.tensor.conv.abstract_conv.BaseAbstractConv>`
     for a more detailed documentation.
     """
 
@@ -3489,11 +3482,9 @@ def conv2d(
     border_mode="valid",
     subsample=(1, 1),
     filter_flip=True,
-    image_shape=None,
     filter_dilation=(1, 1),
     num_groups=1,
     unshared=False,
-    **kwargs,
 ):
     """
     This function will build the symbolic graph for convolving a mini-batch of a
@@ -3584,36 +3575,6 @@ def conv2d(
         of shape (batch size, output channels, output rows, output columns)
     """
 
-    if "imshp_logical" in kwargs or "kshp_logical" in kwargs:
-        raise ValueError(
-            "Keyword arguments 'imshp_logical' and 'kshp_logical' for conv2d "
-            "are not supported anymore (and have not been a reliable way to "
-            "perform upsampling). That feature is still available by calling "
-            "pytensor.tensor.nnet.conv.conv2d() for the time being."
-        )
-    if len(kwargs.keys()) > 0:
-        warnings.warn(
-            str(kwargs.keys()) + " are now deprecated in "
-            "`tensor.nnet.abstract_conv.conv2d` interface"
-            " and will be ignored.",
-            stacklevel=2,
-        )
-
-    if image_shape is not None:
-        warnings.warn(
-            "The `image_shape` keyword argument to "
-            "`tensor.nnet.conv2d` is deprecated, it has been "
-            "renamed to `input_shape`.",
-            stacklevel=2,
-        )
-        if input_shape is None:
-            input_shape = image_shape
-        else:
-            raise ValueError(
-                "input_shape and image_shape should not"
-                " be provided at the same time."
-            )
-
     return abstract_conv2d(
         input,
         filters,
diff --git a/pytensor/tensor/nnet/__init__.py b/pytensor/tensor/nnet/__init__.py
deleted file mode 100644
index 3af3df4480..0000000000
--- a/pytensor/tensor/nnet/__init__.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import warnings
-
-
-warnings.warn(
-    "The module `pytensor.tensor.nnet` is deprecated and will "
-    "be removed from PyTensor in version 2.9.0",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-import pytensor.tensor.nnet.rewriting
-from pytensor.tensor.nnet.abstract_conv import (
-    abstract_conv2d,
-    conv2d,
-    conv2d_grad_wrt_inputs,
-    conv2d_transpose,
-    conv3d,
-    separable_conv2d,
-)
-from pytensor.tensor.nnet.basic import (
-    binary_crossentropy,
-    categorical_crossentropy,
-    confusion_matrix,
-    crossentropy_categorical_1hot,
-    crossentropy_categorical_1hot_grad,
-    crossentropy_softmax_1hot,
-    crossentropy_softmax_1hot_with_bias,
-    crossentropy_softmax_1hot_with_bias_dx,
-    crossentropy_softmax_argmax_1hot_with_bias,
-    crossentropy_softmax_max_and_argmax_1hot,
-    crossentropy_softmax_max_and_argmax_1hot_with_bias,
-    crossentropy_to_crossentropy_with_softmax,
-    crossentropy_to_crossentropy_with_softmax_with_bias,
-    elu,
-    graph_merge_softmax_with_crossentropy_softmax,
-    h_softmax,
-    logsoftmax,
-    prepend_0_to_each_row,
-    prepend_1_to_each_row,
-    prepend_scalar_to_each_row,
-    relu,
-    selu,
-    sigmoid_binary_crossentropy,
-    softmax,
-    softmax_grad_legacy,
-    softmax_legacy,
-    softmax_simplifier,
-    softmax_with_bias,
-    softsign,
-)
-from pytensor.tensor.nnet.batchnorm import batch_normalization
-from pytensor.tensor.nnet.sigm import hard_sigmoid, ultra_fast_sigmoid
diff --git a/pytensor/tensor/nnet/basic.py b/pytensor/tensor/nnet/basic.py
deleted file mode 100644
index b5733751e3..0000000000
--- a/pytensor/tensor/nnet/basic.py
+++ /dev/null
@@ -1,2171 +0,0 @@
-"""
-Provides neural-network specific Ops.
-
-Notes
------
-TODO: factor this out into a neural-network toolbox.
-"""
-
-import numpy as np
-
-import pytensor
-from pytensor import scalar as aes
-from pytensor.compile import optdb
-from pytensor.gradient import DisconnectedType, grad_not_implemented
-from pytensor.graph.basic import Apply
-from pytensor.graph.op import Op
-from pytensor.graph.rewriting.basic import (
-    copy_stack_trace,
-    graph_rewriter,
-    node_rewriter,
-)
-from pytensor.link.c.op import COp
-from pytensor.raise_op import Assert
-from pytensor.scalar import UnaryScalarOp
-from pytensor.tensor import basic as at
-from pytensor.tensor.basic import ARange
-from pytensor.tensor.elemwise import DimShuffle, Elemwise
-from pytensor.tensor.exceptions import NotScalarConstantError
-from pytensor.tensor.extra_ops import Unique
-from pytensor.tensor.math import (
-    MaxAndArgmax,
-    Sum,
-    add,
-    dot,
-    eq,
-    exp,
-    expm1,
-    log,
-    max_and_argmax,
-    mul,
-    neg,
-    or_,
-    sigmoid,
-    softplus,
-)
-from pytensor.tensor.math import sum as at_sum
-from pytensor.tensor.math import tanh, tensordot, true_div
-from pytensor.tensor.nnet.blocksparse import sparse_block_dot
-from pytensor.tensor.rewriting.basic import (
-    register_canonicalize,
-    register_specialize,
-    register_stabilize,
-)
-from pytensor.tensor.rewriting.math import local_mul_canonizer
-from pytensor.tensor.shape import Shape, shape_padleft
-from pytensor.tensor.special import Softmax, SoftmaxGrad, log_softmax, softmax
-from pytensor.tensor.subtensor import AdvancedIncSubtensor, AdvancedSubtensor
-from pytensor.tensor.type import (
-    TensorType,
-    discrete_dtypes,
-    float_dtypes,
-    integer_dtypes,
-)
-
-
-class SoftmaxWithBias(COp):
-    """
-    An L{Op} for the output of neural-net multiclass classifiers.
-
-    Attributes
-    ----------
-    x : a matrix of floats (32 or 64)
-    b : a [row] vector of floats (32 or 64), length is number of cols in x
-
-    This L{Op}'s output is softmax(x+b).
-    softmax(x[i]) is the i'th distribution over len(x[i]) options.
-
-    """
-
-    nin = 2
-    nout = 1
-    __props__ = ()
-
-    def make_node(self, x, b):
-        x = at.as_tensor_variable(x)
-        b = at.as_tensor_variable(b)
-        if x.type.ndim != 2 or x.type.dtype not in float_dtypes:
-            raise ValueError("x must be 2-d tensor of floats")
-        if b.type.ndim != 1 or b.type.dtype not in float_dtypes:
-            raise ValueError("b must be 1-d tensor of floats")
-
-        sm = x.type()
-        return Apply(self, [x, b], [sm])
-
-    def perform(self, node, input_storage, output_storage):
-        x, b = input_storage
-        if b.shape[0] != x.shape[1]:
-            raise ValueError("b must have same number of columns as x")
-
-        # sm = numpy.zeros_like(x)
-        # for i in range(sm.shape[0]):
-        # row = x[i] + b
-        # sm[i] = numpy.exp(row - numpy.max(row))
-        # sm[i] *= 1.0 / numpy.sum(sm[i])
-        # output_storage[0][0] = sm
-
-        if x.size == 0:
-            # Numpy doesn't like the max of a zero-sized object.
-            output_storage[0][0] = np.zeros(x.shape, dtype=x.dtype)
-            return
-
-        x_dtype = x.dtype
-        # Perform computations in float32 otherwise the result is too imprecise
-        if x.dtype == "float16":
-            x = x.astype("float32")
-
-        x_plus_b = x + b[None, :]
-        e_x = np.exp(x_plus_b - x_plus_b.max(axis=1)[:, None])
-        e_x *= 1.0 / e_x.sum(axis=1)[:, None]
-        # default for copy is True and we don't need a copy if the
-        # data type matches.
-        output_storage[0][0] = e_x.astype(x_dtype, copy=False)
-
-    def L_op(self, inp, outputs, grads):
-        x, b = inp
-        (g_sm,) = grads
-
-        if isinstance(g_sm.type, DisconnectedType):
-            return [DisconnectedType()(), DisconnectedType()()]
-
-        dx = softmax_grad_legacy(g_sm, outputs[0])
-        db = at_sum(dx, axis=0)
-        return dx, db
-
-    def infer_shape(self, fgraph, node, shape):
-        return [shape[0]]
-
-    def c_headers(self, **kwargs):
-        return ["<iostream>", "<cmath>"]
-
-    @staticmethod
-    def c_code_template(dtype):
-        # this implementation was lifted from
-        # /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx
-
-        # TODO: put this into a templated function, in the support code
-        # TODO: declare the max of each row as an Op output
-
-        # TODO: set error messages for failures in this code
-
-        # TODO: use this to accept float32 and int32:
-        # node.inputs[0].type.dtype_specs()[1]
-        init_decl = """
-        npy_intp* Nx = PyArray_DIMS(%(x)s);
-        npy_intp Sx = 0;
-        npy_intp Sb = 0;
-        npy_intp Ssm = 0;
-
-
-        if (PyArray_NDIM(%(x)s) != 2)
-        {
-            PyErr_SetString(PyExc_ValueError, "not a 2d tensor");
-            %(fail)s;
-        }
-        if (PyArray_NDIM(%(b)s) != 1)
-        {
-            PyErr_SetString(PyExc_ValueError, "b not 1d tensor");
-            %(fail)s;
-        }
-        if ((PyArray_TYPE(%(x)s) != NPY_DOUBLE) &&
-            (PyArray_TYPE(%(x)s) != NPY_FLOAT))
-        {
-            PyErr_SetString(PyExc_TypeError, "not a float");
-            %(fail)s;
-        }
-        if ((PyArray_TYPE(%(b)s) != NPY_DOUBLE) &&
-            (PyArray_TYPE(%(b)s) != NPY_FLOAT))
-        {
-            PyErr_SetString(PyExc_TypeError, "b not float");
-            %(fail)s;
-        }
-        if ((PyArray_DIMS(%(x)s)[1] != PyArray_DIMS(%(b)s)[0]))
-        {
-            PyErr_Format(PyExc_ValueError,
-                         "number of columns in x (%%ld) does not match length of b (%%ld)",
-                (long int)PyArray_DIMS(%(x)s)[1], (long int)PyArray_DIMS(%(b)s)[0]);
-            %(fail)s;
-        }
-
-        if ((NULL == %(sm)s)
-            || (PyArray_DIMS(%(sm)s)[0] != PyArray_DIMS(%(x)s)[0])
-            || (PyArray_DIMS(%(sm)s)[1] != PyArray_DIMS(%(x)s)[1]))
-        {
-            if (NULL != %(sm)s) Py_XDECREF(%(sm)s);
-            %(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s),
-                                                       PyArray_TYPE(%(x)s));
-            if(!%(sm)s) {
-                PyErr_SetString(PyExc_MemoryError,
-                     "failed to alloc sm output");
-                %(fail)s
-            }
-        }
-        Sx = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s);
-        Sb = PyArray_STRIDES(%(b)s)[0]/sizeof(dtype_%(b)s);
-        Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
-
-        """
-
-        begin_row_loop = """
-        for (size_t i = 0; i < Nx[0]; ++i)
-        {
-            size_t j;
-            double sum = 0.0;
-
-            const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(PyArray_BYTES(%(x)s) + PyArray_STRIDES(%(x)s)[0] * i);
-            const dtype_%(b)s* __restrict__ b_i = (dtype_%(b)s*)(PyArray_BYTES(%(b)s));
-            dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(PyArray_BYTES(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i);
-
-            npy_intp Sx = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s);
-            npy_intp Sb = PyArray_STRIDES(%(b)s)[0]/sizeof(dtype_%(b)s);
-            npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
-
-            size_t row_max_j=0;
-            dtype_%(sm)s row_max = x_i[0] + b_i[0];
-            //std::cout << "0 " << row_max << "\\n";
-            // Get the maximum value of the row
-            for (j = 1; j < Nx[1]; ++j)
-            {
-                dtype_%(sm)s row_ij = x_i[j * Sx] +  b_i[j * Sb];
-                //std::cout << "1 " << row_ij << "\\n";
-                row_max_j = (row_ij > row_max) ? j : row_max_j;
-                row_max   = (row_ij > row_max) ? row_ij : row_max;
-            }
-
-        """
-
-        inside_row_loop = """
-            for (j = 0; j < Nx[1]; ++j)
-            {
-                dtype_%(sm)s row_ij = x_i[j * Sx] +  b_i[j * Sb];
-                //std::cout << "2 " << j << " " << row_ij << " " << row_max << "\\n";
-                dtype_%(sm)s sm_ij = exp(row_ij - row_max);
-                //std::cout << "3 " << j << " " << sm_ij << "\\n";
-                sum += sm_ij;
-                sm_i[j * Ssm] = sm_ij;
-            }
-
-            //cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
-            double sum_inv = 1.0 / sum;
-            for (j = 0; j < Nx[1]; ++j)
-            {
-                sm_i[j * Ssm] *= sum_inv;
-            }
-
-        """
-
-        # Get the vectorized version of exp if it exist
-        try:
-            vec_exp = pytensor.scalar.exp.c_code_contiguous_raw(
-                dtype, "Nx[1]", "sm_i", "sm_i"
-            )
-            inside_row_loop_contig = (
-                """
-            for (j = 0; j < Nx[1]; ++j)
-            {
-                dtype_%%(sm)s row_ij = x_i[j * Sx] +  b_i[j * Sb];
-                //std::cout << "2 " << j << " " << row_ij << " " << row_max << "\\n";
-                dtype_%%(sm)s sm_ij = row_ij - row_max;
-                //std::cout << "3 " << j << " " << sm_ij << "\\n";
-                sm_i[j * Ssm] = sm_ij;
-            }
-            %(vec_exp)s;
-            for (j = 0; j < Nx[1]; ++j)
-            {
-                sum += sm_i[j * Ssm];
-            }
-
-            //cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
-            double sum_inv = 1.0 / sum;
-            for (j = 0; j < Nx[1]; ++j)
-            {
-                sm_i[j * Ssm] *= sum_inv;
-            }
-
-        """
-                % locals()
-            )
-            inside_row_loop = (
-                """
-            if(Ssm == 1){
-                %(inside_row_loop_contig)s
-            }else{
-                %(inside_row_loop)s
-            }
-            """
-                % locals()
-            )
-        except pytensor.graph.utils.MethodNotDefined:
-            pass
-        end_row_loop = """
-        }
-        """
-
-        return (init_decl, begin_row_loop, inside_row_loop, end_row_loop)
-
-    def c_code(self, node, name, inp, out, sub):
-        x, b = inp
-        (sm,) = out
-        code_template = "".join(
-            self.c_code_template(node.inputs[0].type.dtype_specs()[1])
-        )
-        return code_template % dict(locals(), **sub)
-
-    @staticmethod
-    def c_code_cache_version():
-        return (8,)
-
-
-softmax_with_bias = SoftmaxWithBias()
-
-
-softmax_grad_legacy = SoftmaxGrad(axis=-1)
-
-
-softmax_legacy = Softmax(axis=-1)
-
-
-@register_specialize("fast_compile")
-@node_rewriter([softmax_legacy])
-def local_softmax_with_bias(fgraph, node):
-    """
-    Try to turn softmax(sum_of_stuff) -> softmax_w_bias(matrix, bias).
-
-    """
-    if node.op == softmax_legacy and node.outputs[0].ndim == 2:
-        (x,) = node.inputs
-        if x.owner and x.owner.op == add:
-            vectors = []
-            non_vectors = []
-            for x_in in x.owner.inputs:
-                if list(x_in.type.broadcastable) == [True, False]:
-                    # print isinstance(x_in.owner.op,
-                    # DimShuffle) since specialization comes
-                    # relatively late in optimization, we don't want to
-                    # put in extra DimShuffles un-necessarily.
-                    if (
-                        x_in.owner
-                        and isinstance(x_in.owner.op, DimShuffle)
-                        and list(x_in.owner.inputs[0].type.broadcastable) == [False]
-                    ):
-                        # cut out the DimShuffle that was broadcasting a vector
-                        vectors.append(x_in.owner.inputs[0])
-                    else:
-                        # insert an extra DimShuffle to correct the old one
-                        vectors.append(DimShuffle((True, False), (1,))(x_in))
-                else:
-                    non_vectors.append(x_in)
-
-            # If all the inputs were vectors or broadcasted vectors,
-            # we broadcast one of them to be used as a matrix
-            if len(non_vectors) == 0:
-                assert len(vectors) > 0  # we should have at least 1 input...
-                promoted_vector = vectors.pop()
-                non_vectors.append(shape_padleft(promoted_vector))
-            assert non_vectors  # not empty
-
-            if vectors:
-                # we're in business...
-                if len(vectors) > 1:
-                    vector_sum = add(*vectors)
-                    copy_stack_trace(x_in, vector_sum)
-                else:
-                    vector_sum = vectors[0]
-
-                if len(non_vectors) > 1:
-                    non_vector_sum = add(*non_vectors)
-                    copy_stack_trace(x_in, non_vector_sum)
-                else:
-                    non_vector_sum = non_vectors[0]
-
-                try:
-                    sm_bias = softmax_with_bias(non_vector_sum, vector_sum)
-                    copy_stack_trace(node.outputs[0], sm_bias)
-                except Exception:
-                    # if our arguments have the wrong types, then
-                    # forget about it
-                    return
-
-                out_type = node.outputs[0].type
-                if (
-                    out_type.dtype == sm_bias.type.dtype
-                    and out_type.broadcastable == sm_bias.type.broadcastable
-                ):
-                    # This condition is not always true. See the test
-                    # nnet/tests/test_basic.py:T_SoftmaxWithBias.test_broadcast
-                    return [sm_bias]
-
-
-def softmax_simplifier(numerators, denominators):
-    for numerator in list(numerators):
-        if not numerator.type.dtype.startswith("float"):
-            continue
-
-        if not (numerator.owner and numerator.owner.op == exp):
-            continue
-
-        matching_denom = None
-
-        for denominator in denominators:
-            # Division with dimshuffle
-            if denominator.owner and isinstance(denominator.owner.op, DimShuffle):
-                ds_order = denominator.owner.op.new_order
-                # Check that at most only one dimension is being reintroduced by
-                # a dimshuffle. The cases where all dimensions are reintroduced
-                # after a complete sum reduction end up in the else branch
-                if ds_order.count("x") != 1:
-                    continue
-                # Check that dimshuffle does not change order of original dims
-                ds_order_without_x = tuple(dim for dim in ds_order if dim != "x")
-                if tuple(sorted(ds_order_without_x)) != ds_order_without_x:
-                    continue
-                new_dim = ds_order.index("x")
-                z = denominator.owner.inputs[0]
-                if z.owner and isinstance(z.owner.op, Sum):
-                    sum_axis = z.owner.op.axis
-                    # Check that reintroduced dim was the one reduced
-                    if (
-                        (sum_axis is not None)
-                        and (len(sum_axis) == 1)
-                        and (sum_axis[0] == new_dim)
-                    ):
-                        if z.owner.inputs[0] is numerator:
-                            (sum_axis,) = sum_axis
-                            matching_denom = denominator
-                            break
-
-            # Division without dimshuffle
-            else:
-                z = denominator
-                if z.owner and isinstance(z.owner.op, Sum):
-                    sum_axis = z.owner.op.axis
-                    # Filter out partial summations over more than one axis
-                    # The cases where all axis of summation are explicitly given
-                    # as in `sum(matrix, axis=(0, 1))` are eventually rewritten
-                    # to `sum(matrix)` and this branch is not a blocker
-                    if sum_axis is not None and len(sum_axis) != 1:
-                        continue
-                    if z.owner.inputs[0] is numerator:
-                        if sum_axis is not None:
-                            (sum_axis,) = sum_axis
-                        matching_denom = denominator
-                        break
-
-        if matching_denom:
-            softmax = Softmax(axis=sum_axis)(numerator.owner.inputs[0])
-            copy_stack_trace(numerator, softmax)
-            numerators.remove(numerator)
-            denominators.remove(matching_denom)
-            numerators.append(softmax)
-
-    return numerators, denominators
-
-
-local_mul_canonizer.add_simplifier(softmax_simplifier, "softmax_simplifier")
-
-
-class CrossentropySoftmaxArgmax1HotWithBias(COp):
-    """
-    A special compound L{Op} for the output of neural-net classifiers.
-
-    Parameters
-    ----------
-    x : a matrix of floats (32 or 64)
-    b : a [row] vector of floats (32 or 64), length is number of cols in x
-    y_idx : a [column] vector of int (32 or 64), length is number of rows in x
-
-    Returns
-    -------
-    object
-        row-wise NLL, softmax(x+b), row-wise argmax of (x+b).
-
-    @precondition: every entry in y_idx is a valid (non-negative)
-                   column index into x
-
-    This L{Op} has three outputs:
-     - KL(softmax(x+b), y)
-     - softmax(x+b)
-     - argmax(x+b)
-
-    softmax(x[i]) is the i'th distribution over len(x[i]) options
-    argmax(x) is the index of x's greatest element
-    y_idx[i] is an integer index, encoding a 1-hot distribution.
-
-    In practice, when we are trying to do classification, we have one row in x
-    and y_idx per example, and y[i] is the index of the (correct) class of the
-    i'th example.
-
-    """
-
-    nin = 3
-    nout = 3
-    __props__ = ()
-
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-
-    def make_node(self, x, b, y_idx):
-        x = at.as_tensor_variable(x)
-        b = at.as_tensor_variable(b)
-        y_idx = at.as_tensor_variable(y_idx)
-        if x.type.ndim != 2 or x.type.dtype not in float_dtypes:
-            raise ValueError("x must be 2-d tensor of floats", x.type)
-        if b.type.ndim != 1 or x.type.dtype not in float_dtypes:
-            raise ValueError("b must be 1-d tensor of floats", b.type)
-        if y_idx.type.ndim != 1 or y_idx.type.dtype not in discrete_dtypes:
-            raise ValueError("y_idx must be 1-d tensor of [u]ints", y_idx.type)
-
-        #       TODO: Is this correct? It used to be y, not y_idx
-        out_shape = tuple(1 if s == 1 else None for s in y_idx.type.shape)
-        nll = TensorType(x.type.dtype, shape=out_shape).make_variable()
-        sm = x.type()
-        am = y_idx.type()
-        return Apply(self, [x, b, y_idx], [nll, sm, am])
-
-    def perform(self, node, input_storage, output_storage):
-        """
-        The math, where x is an input vector, and t is a target index:
-
-            softmax(x)[i] = exp(x[i]) / sum_j(exp(x[j]))
-            nll(x,t) = -log(softmax(x)[t])
-
-        We compute this by subtracting off the max of x. This avoids
-        numerical instability.
-
-            m = max_j x[j]
-            softmax(x)[i] = exp(x[i] -m) / sum_j(exp(x[j] - m))
-
-            nll = -log(exp(x[t] -m) / sum_j(exp(x[j] - m)))
-                = -x[t] + m + log( sum_j(exp(x[j] - m)))
-
-        """
-        x, b, y_idx = input_storage
-        if b.shape[0] != x.shape[1]:
-            raise ValueError("b must have same number of columns as x")
-        if y_idx.shape[0] != x.shape[0]:
-            raise ValueError("y_idx must have same number of rows as x")
-        if any(y_idx < 0):
-            raise ValueError("y_i value out of bounds")
-        sm = np.zeros_like(x)  # softmax
-        nll = np.zeros(
-            x.shape[0], dtype=node.outputs[0].type.dtype
-        )  # nll(y | softmax(x))
-        am = np.zeros_like(y_idx)
-        for i in range(sm.shape[0]):
-            # add the bias vector to the i'th row of x
-            row = x[i] + b
-
-            # get the maximum value of i'th row for numerically safe
-            # softmax / nll
-            am[i] = np.argmax(row)
-            m = row[am[i]]
-
-            # compute the unnormalized softmax, and normalization constant
-            sm[i] = np.exp(row - m)
-            sum_j = np.sum(sm[i])  # sum_j(exp(x[j] - m))
-
-            # normalized our softmax
-            sm[i] *= 1.0 / sum_j
-
-            # store the nll
-            nll[i] = -row[y_idx[i]] + m + np.log(sum_j)
-
-        output_storage[0][0] = nll
-        output_storage[1][0] = sm
-        output_storage[2][0] = am
-
-    def infer_shape(self, fgraph, node, shapes):
-        x_shp, b_shp, idx_shp = shapes
-        nll_shp = (x_shp[0],)
-        sm_shp = x_shp
-        am_shp = idx_shp
-        return [nll_shp, sm_shp, am_shp]
-
-    def connection_pattern(self, node):
-
-        return [
-            [True, True, True],  # x
-            [True, True, True],  # b
-            [False, False, True],
-        ]  # y_idx
-
-    def grad(self, inp, grads):
-        x, b, y_idx = inp
-        g_nll, g_sm, g_am = grads
-
-        dx_terms = []
-        db_terms = []
-        d_idx_terms = []
-
-        if not isinstance(g_nll.type, DisconnectedType):
-            nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx)
-            dx = crossentropy_softmax_1hot_with_bias_dx(g_nll, sm, y_idx)
-            db = at_sum(dx, axis=[0])
-            dx_terms.append(dx)
-            db_terms.append(db)
-
-        if not isinstance(g_sm.type, DisconnectedType):
-            dx, db = softmax_with_bias.L_op((x, b), [softmax_with_bias(x, b)], (g_sm,))
-            dx_terms.append(dx)
-            db_terms.append(db)
-
-        if not isinstance(g_am.type, DisconnectedType):
-            dx_terms.append(x.zeros_like())
-            db_terms.append(b.zeros_like())
-            d_idx_terms.append(y_idx.zeros_like())
-
-        def fancy_sum(terms):
-            if len(terms) == 0:
-                return DisconnectedType()()
-            rval = terms[0]
-            for term in terms[1:]:
-                rval = rval + term
-            return rval
-
-        return [fancy_sum(terms) for terms in [dx_terms, db_terms, d_idx_terms]]
-
-    def c_headers(self, **kwargs):
-        return ["<iostream>", "<cmath>"]
-
-    @staticmethod
-    def c_code_template(dtype):
-        # this implementation was lifted from
-        # /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx
-
-        # TODO: put this into a templated function, in the support code
-        # TODO: declare the max of each row as an Op output
-
-        # TODO: set error messages for failures in this code
-
-        # TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
-        (
-            init_decl,
-            begin_row_loop,
-            inside_row_loop,
-            end_row_loop,
-        ) = SoftmaxWithBias.c_code_template(dtype)
-        return (
-            init_decl,
-            """
-        if (PyArray_NDIM(%(y_idx)s) != 1)
-        {
-            PyErr_SetString(PyExc_ValueError, "y_idx not 1d tensor");
-            %(fail)s;
-        }
-        if (PyArray_DIMS(%(x)s)[0] != PyArray_DIMS(%(y_idx)s)[0])
-        {
-            PyErr_Format(PyExc_ValueError,
-                "number of rows in x (%%ld) does not match length of y (%%ld)",
-                (long int)PyArray_DIMS(%(x)s)[0],
-                (long int)PyArray_DIMS(%(y_idx)s)[0]);
-            %(fail)s;
-        }
-
-        if ((NULL == %(nll)s) //initial condition
-            || (PyArray_DIMS(%(nll)s)[0] != PyArray_DIMS(%(y_idx)s)[0]))
-        {
-            if (NULL != %(nll)s) Py_XDECREF(%(nll)s);
-            %(nll)s = (PyArrayObject*)PyArray_SimpleNew(1,
-                PyArray_DIMS(%(y_idx)s), PyArray_TYPE(%(x)s));
-            if(!%(nll)s)
-            {
-                PyErr_SetString(PyExc_MemoryError,
-                     "failed to alloc nll output");
-                %(fail)s;
-            }
-        }
-        if ((NULL == %(am)s)
-            || (PyArray_DIMS(%(am)s)[0] != PyArray_DIMS(%(y_idx)s)[0]))
-        {
-            Py_XDECREF(%(am)s);
-            %(am)s = (PyArrayObject*) PyArray_SimpleNew(1,
-                PyArray_DIMS(%(y_idx)s), PyArray_TYPE(%(y_idx)s));
-            if(!%(am)s)
-            {
-                PyErr_SetString(PyExc_MemoryError,
-                     "failed to alloc am output");
-                %(fail)s;
-            }
-        }
-                """,
-            begin_row_loop,
-            """
-            const %(y_idx_type) s y_i = ((%(y_idx_type)s*)(PyArray_BYTES(%(y_idx)s) + PyArray_STRIDES(%(y_idx)s)[0] * i))[0];
-            dtype_%(nll) s* __restrict__ nll_i = (dtype_%(nll)s*)(PyArray_BYTES(%(nll)s) + PyArray_STRIDES(%(nll)s)[0] * i);
-            %(am_type)s* __restrict__ am_i = (%(am_type)s*) (PyArray_BYTES(%(am)s) + PyArray_STRIDES(%(am)s)[0] * i);
-                """,
-            inside_row_loop,
-            """
-            if ((y_i >= PyArray_DIMS(%(x)s)[1]) || (y_i < 0))
-            {
-                PyErr_SetString(PyExc_ValueError, "y_i value out of bounds");
-                %(fail)s;
-            }
-            nll_i[0] = - x_i[y_i*Sx]
-                       - b_i[y_i*Sb]
-                       + row_max
-                       + log(sum);
-            am_i[0] = row_max_j;
-                """,
-            end_row_loop,
-        )
-
-    def c_code_cache_version(self):
-        return (5,) + SoftmaxWithBias.c_code_cache_version()
-
-    def c_code(self, node, name, inp, out, sub):
-        x, b, y_idx = inp
-        nll, sm, am = out
-        y_idx_type = node.inputs[2].type.dtype_specs()[1]
-        am_type = y_idx_type
-        dtype = node.inputs[0].type.dtype_specs()[1]
-        code_template = "".join(self.c_code_template(dtype))
-        return code_template % dict(locals(), **sub)
-
-
-class CrossentropySoftmax1HotWithBiasDx(COp):
-    """
-    Gradient wrt x of the CrossentropySoftmaxArgmax1HotWithBias Op.
-
-    """
-
-    nin = 3
-    nout = 1
-    __props__ = ()
-
-    def make_node(self, dy, sm, y_idx, **kwargs):
-        dy = at.as_tensor_variable(dy)
-        sm = at.as_tensor_variable(sm)
-        y_idx = at.as_tensor_variable(y_idx)
-        if dy.type.ndim > 1 or dy.type.dtype not in float_dtypes:
-            raise ValueError("dy must be {0,1}-d tensor of floats", dy.type)
-        if sm.type.ndim != 2 or sm.type.dtype not in float_dtypes:
-            raise ValueError("sm must be 2-d tensor of floats", sm.type)
-        if y_idx.type.ndim != 1 or y_idx.type.dtype not in discrete_dtypes:
-            raise ValueError("y_idx must be 1-d tensor of [u]ints", y_idx.type)
-        return Apply(self, [dy, sm, y_idx], [sm.type()])
-
-    def perform(self, node, input_storage, output_storage):
-        dy, sm, y_idx = input_storage
-        if any(y_idx < 0):
-            raise ValueError("y_i value out of bounds")
-        dx = np.zeros_like(sm)
-        if dy.ndim == 0:
-            dy = dy[None]
-        incr = int(dy.shape[0] > 1)
-        for i in range(sm.shape[0]):
-            dy_i = dy[i * incr]
-            dx[i] = dy_i * sm[i]  # vector scale
-            dx[i, y_idx[i]] -= dy_i  # scalar decrement
-        output_storage[0][0] = dx
-
-    def infer_shape(self, fgraph, node, shapes):
-        return [shapes[1]]
-
-    def grad(self, inp, grads):
-        dy, sm, y_idx = inp
-        (g_dx,) = grads
-        # TODO: currently we do not compute the gradient w.r.t. dy, because
-        # advanced indexing is not working yet. When it works, do it to avoid
-        # potentially misleading behavior in gradient computations! (although
-        # typically we should not need the gradient w.r.t. dy).
-        y_idx_range = at.arange(y_idx.shape[0])
-        g_dy = at_sum(
-            g_dx * AdvancedIncSubtensor()(sm, at.fill(dy, -1), y_idx_range, y_idx),
-            axis=1,
-        )
-        g_sm = dy.dimshuffle(0, "x") * g_dx
-        g_y_idx = grad_not_implemented(self, 2, y_idx)
-        return [g_dy, g_sm, g_y_idx]
-
-    def c_code_cache_version(self):
-        return (6,)
-
-    def c_code(self, node, name, inp, out, sub):
-        dnll, sm, y_idx = inp
-        (dx,) = out
-        y_idx_type = node.inputs[2].type.dtype_specs()[1]
-        return """
-        if ((PyArray_TYPE(%(dnll)s) != NPY_DOUBLE) &&
-            (PyArray_TYPE(%(dnll)s) != NPY_FLOAT))
-        {
-            PyErr_SetString(PyExc_TypeError,
-                 "dnll type should be float32 or float64");
-            %(fail)s;
-        }
-        if ((PyArray_TYPE(%(sm)s) != NPY_DOUBLE) &&
-            (PyArray_TYPE(%(sm)s) != NPY_FLOAT))
-        {
-            PyErr_SetString(PyExc_TypeError,
-                 "sm type should be float32 or float64");
-            %(fail)s;
-        }
-
-        // new scope because of variable declaration
-        // TODO: proper indentation, but the diff will get messy
-        {
-        // Get `dnll.shape[0]` or set it to zero if `dnll` is a scalar.
-        const npy_intp %(dnll)s_dims0 = (PyArray_NDIM(%(dnll)s) > 0 ?
-                                         PyArray_DIMS(%(dnll)s)[0] :
-                                         (npy_intp) 0);
-
-        // Get `dnll.strides[0]` and set it to zero if `dnll` is a scalar
-        // or a vector with just one element.
-        const npy_intp %(dnll)s_strides0 = (%(dnll)s_dims0 > 1 ?
-                                            PyArray_STRIDES(%(dnll)s)[0] :
-                                            (npy_intp) 0);
-
-        if ((PyArray_NDIM(%(dnll)s) > 1)
-            || (PyArray_NDIM(%(sm)s) != 2)
-            || (PyArray_NDIM(%(y_idx)s) != 1))
-        {
-            PyErr_SetString(PyExc_ValueError, "rank error");
-            %(fail)s;
-        }
-        if (%(dnll)s_dims0 != PyArray_DIMS(%(sm)s)[0] && %(dnll)s_dims0 > 1)
-        {
-            PyErr_Format(PyExc_ValueError,
-                         "dnll.shape[0] (%%ld) != sm.shape[0] (%%ld)",
-                         (long int)%(dnll)s_dims0,
-                         (long int)PyArray_DIMS(%(sm)s)[0]);
-            %(fail)s;
-        }
-        if (%(dnll)s_dims0 != PyArray_DIMS(%(y_idx)s)[0] && %(dnll)s_dims0 > 1)
-        {
-            PyErr_Format(PyExc_ValueError,
-                         "dnll.shape[0] (%%ld) != y_idx.shape[0] (%%ld)",
-                         (long int)%(dnll)s_dims0,
-                         (long int)PyArray_DIMS(%(y_idx)s)[0]);
-            %(fail)s;
-        }
-        if (PyArray_DIMS(%(sm)s)[0] !=
-            PyArray_DIMS(%(y_idx)s)[0])
-        {
-            PyErr_SetString(PyExc_ValueError,
-                            "sm.shape[0] != y_idx.shape[0]");
-            %(fail)s;
-        }
-        if ((NULL == %(dx)s)
-            || (PyArray_DIMS(%(dx)s)[0] != PyArray_DIMS(%(sm)s)[0])
-            || (PyArray_DIMS(%(dx)s)[1] != PyArray_DIMS(%(sm)s)[1]))
-        {
-            if (NULL != %(dx)s) Py_XDECREF(%(dx)s);
-            %(dx)s = (PyArrayObject*) PyArray_SimpleNew(2,
-                                                        PyArray_DIMS(%(sm)s),
-                                                        PyArray_TYPE(%(sm)s));
-            if(!%(dx)s) {
-                PyErr_SetString(PyExc_MemoryError,
-                     "failed to alloc dx output");
-                %(fail)s
-            }
-        }
-
-        for (size_t i = 0; i < PyArray_DIMS(%(dx)s)[0]; ++i)
-        {
-            const dtype_%(dnll)s dnll_i = ((dtype_%(dnll)s*)(PyArray_BYTES(%(dnll)s) + %(dnll)s_strides0 * i))[0];
-
-            const %(y_idx_type) s y_i = ((%(y_idx_type)s*)(PyArray_BYTES(%(y_idx)s) + PyArray_STRIDES(%(y_idx)s)[0] * i))[0];
-
-            const dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*)(PyArray_BYTES(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i);
-            npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
-
-            dtype_%(dx) s* __restrict__ dx_i = (dtype_%(dx)s*)(PyArray_BYTES(%(dx)s) + PyArray_STRIDES(%(dx)s)[0] * i);
-            npy_intp Sdx = PyArray_STRIDES(%(dx)s)[1]/sizeof(dtype_%(dx)s);
-
-            for (size_t j = 0; j < PyArray_DIMS(%(dx)s)[1]; ++j)
-            {
-                dx_i[j * Sdx] = dnll_i * sm_i[j * Ssm];
-            }
-            if (y_i >= PyArray_DIMS(%(dx)s)[1] || (y_i < 0))
-            {
-                PyErr_SetString(PyExc_ValueError, "y_i >= dx dimensions[1] or y_i < 0.");
-                %(fail)s;
-            }
-            dx_i[y_i * Sdx] -= dnll_i;
-        }
-        }
-        """ % dict(
-            locals(), **sub
-        )
-
-
-crossentropy_softmax_argmax_1hot_with_bias = CrossentropySoftmaxArgmax1HotWithBias()
-
-crossentropy_softmax_1hot_with_bias_dx = CrossentropySoftmax1HotWithBiasDx()
-
-
-def crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs):
-    return crossentropy_softmax_argmax_1hot_with_bias(x, b, y_idx, **kwargs)[0:2]
-
-
-def crossentropy_softmax_1hot(x, y_idx, **kwargs):
-    b = at.zeros_like(x[0, :])
-    return crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs)
-
-
-def crossentropy_softmax_max_and_argmax_1hot_with_bias(x, b, y_idx, **kwargs):
-    """
-    Returns
-    -------
-    object
-        The cross-entropy, the softmax output, the max probability,
-        and the argmax index.
-
-    TODO: Since we are recomputing the argmax,
-           we might as well assert that it is correct.
-
-    TODO: Make this entire function is
-    unnecessary? e.g. CrossentropySoftmaxArgmax1HotWithBias should return
-    the appropriate information (i.e. the max probability)?
-
-    """
-    (xent, softmax) = crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs)
-    (max_pr, argmax) = max_and_argmax(softmax, axis=-1)
-    return (xent, softmax, max_pr, argmax)
-
-
-def crossentropy_softmax_max_and_argmax_1hot(x, y_idx, **kwargs):
-    b = at.zeros_like(x[0, :])
-    return crossentropy_softmax_max_and_argmax_1hot_with_bias(x, b, y_idx, **kwargs)
-
-
-class CrossentropyCategorical1HotGrad(Op):
-
-    __props__ = ()
-
-    def make_node(self, g_y, coding_dist, true_one_of_n):
-        return Apply(self, [g_y, coding_dist, true_one_of_n], [coding_dist.type()])
-
-    def perform(self, node, inp, out):
-        g_y, coding_dist, true_one_of_n = inp
-        (g_coding_strg,) = out
-        g_coding = np.zeros_like(coding_dist)
-        for i in range(len(g_y)):
-            g_coding[i, true_one_of_n[i]] = -g_y[i] / coding_dist[i, true_one_of_n[i]]
-        g_coding_strg[0] = g_coding
-
-    def infer_shape(self, fgraph, node, in_shapes):
-        return [in_shapes[1]]
-
-
-crossentropy_categorical_1hot_grad = CrossentropyCategorical1HotGrad()
-
-
-class CrossentropyCategorical1Hot(Op):
-    r"""
-    Compute the cross entropy between a coding distribution and
-    a true distribution of the form [0, 0, ... 0, 1, 0, ..., 0].
-
-    .. math::
-
-        y[i] = - \log(coding_dist[i, one_of_n[i])
-
-    Notes
-    -----
-    In the case that the coding distribution is the output of a
-    softmax, an application of this Op will probably be optimized
-    away in favour of one with a C implementation.
-
-    """
-
-    __props__ = ()
-
-    def make_node(self, coding_dist, true_one_of_n):
-        """
-        Parameters
-        ----------
-        coding_dist : dense matrix
-        true_one_of_n : lvector
-
-        Returns
-        -------
-        dvector
-
-        """
-        _coding_dist = at.as_tensor_variable(coding_dist)
-        _true_one_of_n = at.as_tensor_variable(true_one_of_n)
-        if _coding_dist.type.ndim != 2:
-            raise TypeError("Matrix required for argument `coding_dist`")
-        if not (
-            _true_one_of_n.type.ndim == 1
-            and _true_one_of_n.type.dtype in integer_dtypes
-        ):
-            raise TypeError("Integer vector required for argument `true_one_of_n`")
-
-        return Apply(
-            self,
-            [_coding_dist, _true_one_of_n],
-            [TensorType(dtype=_coding_dist.dtype, shape=(None,))()],
-        )
-
-    def perform(self, node, inp, out):
-        coding, one_of_n = inp
-        (y_out,) = out
-        y = np.zeros_like(coding[:, 0])
-        for i in range(len(y)):
-            y[i] = -np.log(coding[i, one_of_n[i]])
-        y_out[0] = y
-
-    def infer_shape(self, fgraph, node, in_shapes):
-        return [(in_shapes[0][0],)]
-
-    def grad(self, inp, grads):
-        coding, one_of_n = inp
-        (g_y,) = grads
-        return [
-            crossentropy_categorical_1hot_grad(g_y, coding, one_of_n),
-            grad_not_implemented(self, 1, one_of_n),
-        ]
-
-
-crossentropy_categorical_1hot = CrossentropyCategorical1Hot()
-
-
-@register_stabilize("fast_compile")
-@register_specialize("fast_compile")
-@graph_rewriter
-def crossentropy_to_crossentropy_with_softmax_with_bias(fgraph):
-    def search_make_one_sub():
-        for node in fgraph.toposort():
-            if node.op == crossentropy_categorical_1hot:
-                (nll,) = node.outputs
-                sm, one_of_n = node.inputs
-                if sm.owner and sm.owner.op == softmax_with_bias:
-                    x, b = sm.owner.inputs
-                    (
-                        new_nll,
-                        new_sm,
-                        new_am,
-                    ) = crossentropy_softmax_argmax_1hot_with_bias(x, b, one_of_n)
-                    fgraph.replace_all_validate(
-                        [(nll, new_nll), (sm, new_sm)],
-                        reason="crossentropy_to_crossentropy_with_softmax_with_bias",
-                    )
-                    return True
-
-        return False
-
-    while search_make_one_sub():
-        pass
-    return
-
-
-@graph_rewriter
-def crossentropy_to_crossentropy_with_softmax(fgraph):
-    """
-    This is a stabilization rewrite that is more general than
-    `crossentropy_to_crossentropy_with_softmax_with_bias`.
-
-    Notes
-    -----
-    It must be executed after `local_softmax_with_bias` during the
-    specialization passes.
-
-    """
-
-    def search_make_one_sub():
-        for node in fgraph.toposort():
-            if node.op == crossentropy_categorical_1hot:
-                (nll,) = node.outputs
-                sm, one_of_n = node.inputs
-                if sm.owner and sm.owner.op == softmax_legacy and sm.ndim == 2:
-                    (x,) = sm.owner.inputs
-                    (
-                        new_nll,
-                        new_sm,
-                        new_am,
-                    ) = crossentropy_softmax_argmax_1hot_with_bias(
-                        x, at.zeros_like(x[0]), one_of_n
-                    )
-                    fgraph.replace_all_validate(
-                        [(nll, new_nll), (sm, new_sm)],
-                        reason="crossentropy_to_crossentropy_with_softmax",
-                    )
-                    return True
-                if sm.owner and sm.owner.op == softmax_with_bias:
-                    x, b = sm.owner.inputs
-                    (
-                        new_nll,
-                        new_sm,
-                        new_am,
-                    ) = crossentropy_softmax_argmax_1hot_with_bias(x, b, one_of_n)
-                    fgraph.replace_all_validate(
-                        [(nll, new_nll), (sm, new_sm)],
-                        reason="crossentropy_to_crossentropy_with_softmax",
-                    )
-                    return True
-
-        return False
-
-    while search_make_one_sub():
-        pass
-    return
-
-
-optdb.register(
-    "crossentropy_to_crossentropy_with_softmax",
-    crossentropy_to_crossentropy_with_softmax,
-    "fast_run",
-    "xent",
-    "fast_compile",
-    position=2.01,
-)
-
-
-@register_specialize(
-    "fast_compile", "local_crossentropy_to_crossentropy_with_softmax_grad"
-)  # old name
-@node_rewriter([softmax_grad_legacy])
-def local_softmax_grad_to_crossentropy_with_softmax_grad(fgraph, node):
-    if node.op == softmax_grad_legacy and node.inputs[1].ndim == 2:
-        g_coding_dist, coding_dist = node.inputs
-        if (
-            g_coding_dist.owner
-            and g_coding_dist.owner.op == crossentropy_categorical_1hot_grad
-        ):
-            g_nll, coding_dist, true_one_of_n = g_coding_dist.owner.inputs
-            dx = crossentropy_softmax_1hot_with_bias_dx(
-                g_nll, coding_dist, true_one_of_n
-            )
-            copy_stack_trace(node.outputs[0], dx)
-            return [dx]
-
-
-@register_specialize("fast_compile")
-@node_rewriter([MaxAndArgmax])
-def local_argmax_pushdown(fgraph, node):
-    if (
-        isinstance(node.op, MaxAndArgmax)
-        and node.inputs[0].owner
-        and len(fgraph.clients[node.outputs[0]]) == 0
-    ):
-        x_max, x_argmax = node.outputs
-        x = node.inputs[0]
-        axis = node.op.get_params(node)
-        # TODO: Make a list/set of monotonic ops...
-        if x.owner and (
-            x.owner.op
-            in (
-                softplus,
-                exp,
-                log,
-                tanh,
-                sigmoid,
-            )
-            or isinstance(x.owner.op, Softmax)
-        ):
-            (pre_x,) = x.owner.inputs
-            ret = max_and_argmax(pre_x, axis)
-            copy_stack_trace(x_max, ret)
-            return ret
-        if x.owner and x.owner.op == softmax_with_bias:
-            pre_x, pre_bias = x.owner.inputs
-            ret = max_and_argmax(
-                pre_x + DimShuffle(pre_bias.broadcastable, ("x", 0))(pre_bias),
-                axis,
-            )
-            # copy both stack traces
-            copy_stack_trace(x_max, ret)
-            return ret
-
-
-def _check_rows_is_arange_len_labels(fgraph, rows, labels):
-    """Check that `rows` is the same node as `at.arange(labels.shape[0])`.
-
-    Also considers the case where `labels.shape[0]` is constant and equal to 1,
-    and `at.arange(labels.shape[0])` has been constant-folded into
-    0.
-
-    """
-
-    shape_of = None
-    if hasattr(fgraph, "shape_feature"):
-        shape_of = fgraph.shape_feature.shape_of
-        # TODO: consider cases where shape_of[labels] is constant, and
-        # has a value different from 1.
-        # This case is harder, as _is_const only accepts a scalar value
-        # as second argument, so checking for
-        # _is_const(rows, numpy.arange(...)) does not work for the moment.
-        if len(shape_of[labels]) == 1 and _is_const(shape_of[labels][0], 1):
-            return _is_const(rows, 0)
-
-    if rows.owner and isinstance(rows.owner.op, ARange):
-        start, stop, step = rows.owner.inputs
-        if getattr(start, "data", None) != 0:  # constants will have data
-            return False
-        if getattr(step, "data", None) != 1:  # constant step will have data
-            return False
-        if not stop.owner:
-            return False
-
-        # Not sure if that case happens any more after the introduction of
-        # ShapeOptimizer, but we keep it if ShapeOptimizer is not present
-        if isinstance(stop.owner.op, DimShuffle) and stop.owner.op.new_order == ():
-            shape_var = stop.owner.inputs[0]
-            if shape_var.owner and isinstance(shape_var.owner.op, Shape):
-                return shape_var.owner.inputs[0] is labels
-        elif shape_of:
-            shape_of = fgraph.shape_feature.shape_of
-            return shape_of[labels][0] is stop
-
-
-def _is_const(z, val, approx=False):
-    try:
-        maybe = at.get_scalar_constant_value(z)
-    except NotScalarConstantError:
-        return False
-    if approx:
-        return np.allclose(maybe, val)
-    else:
-        return np.all(maybe == val)
-
-
-@register_specialize("fast_compile")
-@node_rewriter([AdvancedSubtensor, log])
-def local_advanced_indexing_crossentropy_onehot(fgraph, node):
-    log_op = None
-    sm = None
-    # First case: log(softmax(x))[rows, labels]
-    if isinstance(node.op, AdvancedSubtensor):
-        try:
-            log_op, rows, labels = node.inputs
-        except Exception:
-            pass
-        if log_op and log_op.owner and log_op.owner.op == log:
-            sm = log_op.owner.inputs[0]
-
-    # Second case: log(softmax(x)[rows, labels])
-    elif node.op == log:
-        pre_log = node.inputs[0].owner
-        if pre_log and isinstance(pre_log.op, AdvancedSubtensor):
-            try:
-                sm, rows, labels = pre_log.inputs
-            except Exception:
-                pass
-
-    if (
-        sm is not None
-        and sm.owner
-        and sm.owner.op in (softmax_legacy, softmax_with_bias)
-        and sm.ndim == 2
-    ):
-        sm_w_bias = local_softmax_with_bias.transform(fgraph, sm.owner)
-        if sm_w_bias:
-            assert sm_w_bias[0].owner.op == softmax_with_bias
-            x_var, b_var = sm_w_bias[0].owner.inputs
-        else:
-            x_var = sm.owner.inputs[0]
-            b_var = at.zeros_like(x_var[0])
-
-        # Check that rows == arange(labels.shape[0])
-        if _check_rows_is_arange_len_labels(fgraph, rows, labels):
-            if labels.ndim == 1 and x_var.ndim == 2:
-                minus_ret = crossentropy_softmax_argmax_1hot_with_bias(
-                    x_var, b_var, labels
-                )[0]
-                ret = -minus_ret
-                copy_stack_trace(node.outputs[0], [minus_ret, ret])
-                return [ret]
-
-
-@register_specialize("fast_compile")
-@node_rewriter([softmax_grad_legacy])
-def local_advanced_indexing_crossentropy_onehot_grad(fgraph, node):
-    if not (node.op == softmax_grad_legacy and node.inputs[1].ndim == 2):
-        return
-
-    sm = None
-    try:
-        d_sm, sm = node.inputs
-    except Exception:
-        return
-
-    if (
-        (sm is not None)
-        and sm.owner
-        and (sm.owner.op in (softmax_legacy, softmax_with_bias))
-        and sm.ndim == 2
-    ):
-        sm_w_bias = local_softmax_with_bias.transform(fgraph, sm.owner)
-        if sm_w_bias:
-            assert sm_w_bias[0].owner.op == softmax_with_bias
-            x_var, b_var = sm_w_bias[0].owner.inputs
-        else:
-            x_var = sm.owner.inputs[0]
-    else:
-        return
-
-    # Two cases are supported:
-    # 1. AdvancedIncSubtensor(
-    #           zeros_like(softmax(x)),
-    #           -out_grad / AdvancedSubtensor(softmax(x), arange(y.shape[0]), y),
-    #           arange(y.shape[0]),
-    #           y)
-    #   which arises from the gradient of log(softmax(x)[arange(y.shape[0]), y])
-    #
-    # 2. AdvancedIncSubtensor(
-    #           zeros_like(log(softmax(x))),
-    #           -out_grad,
-    #           arange(y.shape[0]),
-    #           y)
-    #           / softmax(x)
-    #   which arises from the gradient of log(softmax(x))[arange(y.shape[0]), y]
-    #
-    # out_grad represents the gradient of the (final) cost wrt the output.
-
-    #
-    # N.B. Regarding clients -- This substitution is important for numerical stability, so we
-    # perform the substitution even when intermediate values have multiple clients.
-    #
-
-    # First case.
-    # After the check for AdvancedIncSubtensor, if anything does not fit with
-    # the formula above, there's no way to fit it with the the second case,
-    # so we return immediately.
-    if d_sm.owner and isinstance(d_sm.owner.op, AdvancedIncSubtensor):
-        try:
-            z, incr, rows, labels = d_sm.owner.inputs
-        except Exception:
-            return
-        # Check that z == zeros_like(softmax(x))
-        # We know z has the right size because z has the same size as d_sm,
-        # and d_sm and sm are both inputs of softmax_grad (so they have
-        # the same size).
-        if not _is_const(z, 0):
-            return
-
-        # In the base case (output gradient = 1), incr is -1./sm[arange(len(y)), y]
-        # Here, we are looking for the AdvancedSubtensor term (sm[arange(len(y)), y]),
-        # and constructing out_grad by incorporating the other terms.
-        # out_grad will be constructed in 3 steps as follow:
-        # out_grad = +/- 1. (according to sign)
-        # out_grad *= -numerator
-        # out_grad /= denominator
-        # Then, if out_grad is a scalar, it will be allocated as a vector
-        adv_subtensor = None
-        out_grad = 1.0
-
-        # If there's a 'minus' sign before the whole expression, put it in
-        # out_grad and iterate
-        if incr.owner and incr.owner.op == neg:
-            out_grad = -out_grad
-            incr = incr.owner.inputs[0]
-
-        if incr.owner and incr.owner.op == true_div:
-            num, denom = incr.owner.inputs
-
-            # set out_grad according to the numerator, it may be divided later
-            # num should be a vector or a scalar
-            if num.ndim == 1 or all(num.broadcastable):
-                out_grad *= -num
-            else:
-                return
-
-            if not denom.owner:
-                return
-
-            if isinstance(denom.owner.op, AdvancedSubtensor):
-                # Base case
-                adv_subtensor = denom
-                # out_grad /= 1.
-            elif denom.owner.op == mul:
-                # Try to find the AdvancedSubtensor node mentioned above,
-                # and the output gradient
-                for i, input in enumerate(denom.owner.inputs):
-                    if input.owner and isinstance(input.owner.op, AdvancedSubtensor):
-                        other_inputs = [
-                            in_ for (j, in_) in enumerate(denom.owner.inputs) if j != i
-                        ]
-                        if len(other_inputs) == 1:
-                            rest = other_inputs[0]
-                        else:
-                            rest = mul(*[other_inputs])
-
-                        # Check that rest is a vector or a scalar
-                        if rest.ndim == 1 or all(rest.broadcastable):
-                            adv_subtensor = input
-                            out_grad /= rest
-                            break
-            else:
-                return
-
-            # The output gradient needs to be a vector
-            out_grad = at.fill(x_var[:, 0], out_grad)
-
-            if adv_subtensor is not None:
-                try:
-                    maybe_sm, maybe_rows, maybe_labels = adv_subtensor.owner.inputs
-                except Exception:
-                    return
-
-                if not (
-                    maybe_sm is sm and maybe_rows is rows and maybe_labels is labels
-                ):
-                    return
-                # else: OK
-            else:
-                return
-        else:
-            return
-
-        # Check that rows is arange(labels.shape[0])
-        if not _check_rows_is_arange_len_labels(fgraph, rows, labels):
-            return
-        # else, arguments of AdvancedIncSubtensor are OK,
-        # it was really case 1.
-
-    # Second case
-    elif d_sm.owner and d_sm.owner.op == true_div:
-        # we're looking for
-        # AdvIncSubtensor(zeros, grad_nll, arange(len(y)), y) / softmax
-        try:
-            num, denom = d_sm.owner.inputs
-        except Exception:
-            return
-
-        if denom != sm:
-            return
-
-        # Check the numerator (AdvancedIncSubtensor)
-        if num.owner and isinstance(num.owner.op, AdvancedIncSubtensor):
-            try:
-                z, incr, rows, labels = num.owner.inputs
-            except Exception:
-                return
-
-            # Check z is zeros_like(log(sm))
-            if not _is_const(z, 0):
-                return
-            if z.broadcastable not in [(False, False), (True, False)]:
-                return
-            # here we know that we are incrementing a matrix of zeros
-            # (or a broadcasted vector).
-            # Since d_sm and sm are the inputs of softmax_grad,
-            # if the graph is valid, they have the same shape, so we
-            # also know that z has the right shape.
-
-            if incr.ndim != 1 or incr.dtype not in float_dtypes:
-                return
-
-            # here we know that we are incrementing some part of
-            # matrix z by a vector
-
-            # unless the user has taken care to mark that the data and
-            # labels have the same number of rows, we cannot be sure
-            # here that len(y) == len(z) However, in the common case
-            # that these are predictions and labels it is true.  We
-            # leave it to the Op to crash (and the user to complain)
-            # if this assumption is ever not true.
-
-            out_grad = -incr
-
-            # Check that rows is arange(labels.shape[0])
-            if not _check_rows_is_arange_len_labels(fgraph, rows, labels):
-                return
-            # else, arguments of AdvancedIncSubtensor are OK
-        else:
-            return
-
-        # numerator and denominator are OK,
-        # it was really case 2.
-
-    else:
-        return
-
-    # Dimension check before substitution
-    if labels.ndim == 1 and x_var.ndim == 2:
-        ret = crossentropy_softmax_1hot_with_bias_dx(out_grad, sm, labels)
-        # The stack trace is not added to output_grad, sm and labels at
-        # the moment but may need to be added at a future point
-        copy_stack_trace(node.outputs[0], ret)
-        return [ret]
-    else:
-        return
-
-
-@register_specialize("fast_compile")
-@node_rewriter([softmax_with_bias])
-def graph_merge_softmax_with_crossentropy_softmax(fgraph, node):
-    if node.op == softmax_with_bias:
-        x, b = node.inputs
-        for x_client in fgraph.clients[x]:
-            if x_client[0].op == crossentropy_softmax_argmax_1hot_with_bias:
-                big_client = x_client[0]
-                if big_client in [b_client[0] for b_client in fgraph.clients[b]]:
-                    xx, bb, ll = big_client.inputs
-                    mergeable_client = big_client.op(x, b, ll)
-                    copy_stack_trace(node.outputs[0], mergeable_client[1])
-                    return [mergeable_client[1]]
-
-
-@register_specialize
-@register_stabilize
-@register_canonicalize
-@node_rewriter([CrossentropySoftmax1HotWithBiasDx])
-def local_useless_crossentropy_softmax_1hot_with_bias_dx_alloc(fgraph, node):
-    """
-    Replace a CrossentropySoftmax1HotWithBiasDx op, whose incoming gradient is
-    an `alloc` of a scalar variable or one that has either broadcastable or
-    matching dimensions with the output variable, by one that skips the
-    intermediate `alloc`.
-
-    """
-    if isinstance(node.op, CrossentropySoftmax1HotWithBiasDx):
-        dy, sm, y_idx = node.inputs
-
-        # Those cases are directly handled by the internal broadcasting of the
-        # `CrossentropySoftmax1HotWithBiasDx` op.
-        if dy.ndim == 0:
-            return False
-        if dy.ndim == 1 and dy.broadcastable[0]:
-            return False
-
-        assert dy.ndim == 1
-
-        if dy.owner is not None and isinstance(dy.owner.op, at.Alloc):
-            # dz is the input of the Alloc op, i.e. at.alloc(dz, <shape>)
-            dz = dy.owner.inputs[0]
-
-            try:
-                shape_feature = fgraph.shape_feature
-            except AttributeError:
-                # The shape feature may not be available in some mode, but we
-                # need it for this optimization, so don't continue.
-                return False
-
-            shape_of = shape_feature.shape_of
-            same_shape = shape_feature.same_shape
-
-            # Build `dz_broad` explicitly to include extra implicit dimensions.
-            dz_broad = (True,) * (dy.ndim - dz.ndim) + dz.broadcastable
-
-            # If we can infer statically that the shape of `sm` and
-            # `dy` are the same in dimension `k` or the shape of `dy` is equal
-            # to 1 (which triggers the internal broadcasting in
-            # `CrossentropySoftmax1HotWithBiasDx`) we do not need to
-            # check it at runtime.
-            if (
-                dz_broad[0]
-                and not same_shape(sm, dy, dim_x=0, dim_y=0)
-                and shape_of[dy][0] != 1
-            ):
-                # If `dz` is broadcastable, we need to check whether the shapes
-                # of `dy` and `sm` are the same or whether the shape of `dy` is
-                # equal to 1.
-                cond = or_(eq(dy.shape[0], 1), eq(dy.shape[0], sm.shape[0]))
-                msg = "`sm` and `dy` do not have the same shape."
-                dz = Assert(msg)(dz, cond)
-
-            ret = node.op(dz, sm, y_idx)
-            copy_stack_trace(node.outputs[0], ret)
-            return [ret]
-
-
-def binary_crossentropy(output, target):
-    """
-    Compute the crossentropy of binary random variables.
-
-    Output and target are each expectations of binary random
-    variables; target may be exactly 0 or 1 but output must
-    lie strictly between 0 and 1.
-
-    Notes
-    -----
-    We could use the x log y op to support output=0 and output=1.
-    The gradient would still be undefined though.
-
-    We do not sum, crossentropy is computed by component.
-    TODO : Rewrite as a scalar, and then broadcast to tensor.
-
-    """
-    return -(target * log(output) + (1.0 - target) * log(1.0 - output))
-
-
-def sigmoid_binary_crossentropy(output, target):
-    """
-    Compute the cross-entropy of binary random variables.
-
-    `output` should be real-valued (range (-inf, +inf)); `sigmoid` will be
-    applied to produce a (0, 1) valued input.
-
-    `target` is assumed to be probabilities in [0, 1].
-
-    Notes
-    -----
-    Mathematically equivalent to `binary_crossentropy(sigmoid(output), target)`,
-    but with more efficient and numerically stable computation.
-    """
-
-    def grad(inputs, out_grads):
-        (output, target), (out_grad,) = inputs, out_grads
-        g_output = out_grad * (sigmoid(output) - target)
-        g_target = out_grad * (-output)
-        return [g_output, g_target]
-
-    inp = [output, target]
-    outp = softplus(-abs(output)) + output * ((output > 0) - target)
-    return pytensor.compile.builders.OpFromGraph(
-        inp,
-        [outp],
-        grad_overrides=grad,
-        inline=True,
-        name="sigmoid_binary_crossentropy",
-    )(*inp)
-
-
-def categorical_crossentropy(coding_dist, true_dist):
-    r"""
-    Return the cross-entropy between an approximating distribution and a true
-    distribution.
-
-    .. warning:: THIS FUNCTION IS UNNECESSARILY POLYMORPHIC.
-    We ultimately don't want the polymorphism, and will move this function
-    to pylearn.algorithms.cost. The 1hot version will be removed.
-    The length of the documentation here is a form of code smell.
-
-    The cross entropy between two probability distributions measures the average
-    number of bits needed to identify an event from a set of possibilities, if a
-    coding scheme is used based on a given probability distribution q, rather
-    than the "true" distribution p.
-
-    Mathematically it is defined as follows:
-
-    .. math::
-
-        H(p,q) = - \sum_x p(x) \log(q(x))
-
-    Parameters
-    ----------
-    coding_dist : a dense matrix
-        Each slice along axis represents one distribution.
-    true_dist : a dense matrix or sparse matrix or integer vector
-        In the case of a matrix argument, each slice along axis represents one
-        distribution. In the case of an integer vector argument, each element
-        represents the position of the '1' in a 1-of-N encoding.
-
-    Returns
-    -------
-    tensor of rank one-less-than `coding_dist`
-        The cross entropy between each coding and true distribution.
-
-    Notes
-    -----
-    axis : int
-        The dimension over which each distribution runs
-        (1 for row distributions, 0 for column distributions).
-
-    """
-    if true_dist.ndim == coding_dist.ndim:
-        return -at_sum(true_dist * log(coding_dist), axis=coding_dist.ndim - 1)
-    elif true_dist.ndim == coding_dist.ndim - 1:
-        return crossentropy_categorical_1hot(coding_dist, true_dist)
-    else:
-        raise TypeError("rank mismatch between coding and true distributions")
-
-
-class Prepend_scalar_constant_to_each_row(Op):
-
-    __props__ = ()
-
-    def __init__(self, val=0):
-        if isinstance(val, float):
-            val = aes.constant(val)
-        self.val = val
-
-    def __str__(self):
-        return f"{self.__class__.__name__}{{{self.val}}}"
-
-    def make_node(self, mat):
-        # check type of input
-        x = at.as_tensor_variable(mat)
-        if mat.type.broadcastable != (False, False):
-            raise TypeError("Expected a matrix as input")
-        y = at.as_tensor_variable(self.val)
-        assert y.ndim == 0
-        if x.type.dtype != y.type.dtype:
-            TypeError("the value to prepend don't have the same type as the matrix")
-
-        node = Apply(op=self, inputs=[mat], outputs=[mat.type()])
-        return node
-
-    def perform(self, node, inp, out):
-        (mat,) = inp
-        (output,) = out
-        new_shape = (mat.shape[0], mat.shape[1] + 1)
-        if output[0] is None:
-            output[0] = np.empty(new_shape, dtype=mat.dtype)
-            out = output[0]
-        else:
-            if output[0].shape != new_shape:
-                try:
-                    output[0].resize(new_shape)
-                except Exception:
-                    output[0] = np.empty(new_shape, dtype=mat.dtype)
-            out = output[0]
-
-        out[:, 0].fill(self.val.data)
-        out[:, 1:] = mat
-
-    def infer_shape(self, fgraph, node, in_shapes):
-        shp = (in_shapes[0][0], in_shapes[0][1] + 1)
-        return [shp]
-
-    def grad(self, inp, grads):
-        (mat,) = inp
-        (goutput,) = grads
-        return goutput[:, 1:]
-
-
-class Prepend_scalar_to_each_row(Op):
-
-    __props__ = ()
-
-    def make_node(self, val, mat):
-        # check type of input
-        x = at.as_tensor_variable(mat)
-        if isinstance(val, float):
-            val = aes.constant(val)
-        if mat.type.broadcastable != (False, False):
-            raise TypeError("Expected a matrix as input")
-        y = at.as_tensor_variable(val)
-        assert y.ndim == 0
-        if x.type.dtype != y.type.dtype:
-            TypeError("the value to prepend don't have the same type as the matrix")
-
-        node = Apply(op=self, inputs=[val, mat], outputs=[mat.type()])
-        return node
-
-    def perform(self, node, inp, out):
-        val, mat = inp
-        (output,) = out
-        new_shape = (mat.shape[0], mat.shape[1] + 1)
-        if output[0] is None:
-            output[0] = np.empty(new_shape, dtype=mat.dtype)
-            out = output[0]
-        else:
-            if output[0].shape != new_shape:
-                try:
-                    output[0].resize(new_shape)
-                except Exception:
-                    output[0] = np.empty(new_shape, dtype=mat.dtype)
-            out = output[0]
-        out[:, 0].fill(val)
-        out[:, 1:] = mat
-
-    def infer_shape(self, fgraph, node, in_shapes):
-        shp = (in_shapes[1][0], in_shapes[1][1] + 1)
-        return [shp]
-
-    def grad(self, inp, grads):
-        val, mat = inp
-        (goutput,) = grads
-        return goutput[:, 0], goutput[:, 1:]
-
-
-prepend_scalar_to_each_row = Prepend_scalar_to_each_row()
-prepend_0_to_each_row = Prepend_scalar_constant_to_each_row(0.0)
-prepend_1_to_each_row = Prepend_scalar_constant_to_each_row(1.0)
-
-
-def relu(x, alpha=0):
-    """
-    Compute the element-wise rectified linear activation function.
-
-    .. versionadded:: 0.7.1
-
-    Parameters
-    ----------
-    x : symbolic tensor
-        Tensor to compute the activation function for.
-    alpha : `scalar or tensor, optional`
-        Slope for negative input, usually between 0 and 1. The default value
-        of 0 will lead to the standard rectifier, 1 will lead to
-        a linear activation function, and any value in between will give a
-        leaky rectifier. A shared variable (broadcastable against `x`) will
-        result in a parameterized rectifier with learnable slope(s).
-
-    Returns
-    -------
-    symbolic tensor
-        Element-wise rectifier applied to `x`.
-
-    Notes
-    -----
-    This is numerically equivalent to ``switch(x > 0, x, alpha * x)``
-    (or ``maximum(x, alpha * x)`` for ``alpha < 1``), but uses a faster
-    formulation or an optimized Op, so we encourage to use this function.
-
-    """
-    # This is probably the fastest implementation for GPUs. Both the forward
-    # pass and the gradient get compiled into a single GpuElemwise call.
-    # TODO: Check if it's optimal for CPU as well; add an "if" clause if not.
-    # TODO: Check if there's a faster way for the gradient; create an Op if so.
-    if alpha == 0:
-        return 0.5 * (x + abs(x))
-    else:
-        # We can't use 0.5 and 1 for one and half.  as if alpha is a
-        # numpy dtype, they will be considered as float64, so would
-        # cause upcast to float64.
-        alpha = at.as_tensor_variable(alpha)
-        f1 = 0.5 * (1 + alpha)
-        f2 = 0.5 * (1 - alpha)
-        return f1 * x + f2 * abs(x)
-
-
-def h_softmax(
-    x,
-    batch_size,
-    n_outputs,
-    n_classes,
-    n_outputs_per_class,
-    W1,
-    b1,
-    W2,
-    b2,
-    target=None,
-):
-    """Two-level hierarchical softmax.
-
-    This function implements a two-layer hierarchical softmax. It is commonly
-    used as an alternative of the softmax when the number of outputs is
-    important (it is common to use it for millions of outputs). See
-    reference [1]_ for more information about the computational gains.
-
-    The `n_outputs` outputs are organized in `n_classes` classes, each class
-    containing the same number `n_outputs_per_class` of outputs.
-    For an input `x` (last hidden activation), the first softmax layer predicts
-    its class and the second softmax layer predicts its output among its class.
-
-    If `target` is specified, it will only compute the outputs of the
-    corresponding targets. Otherwise, if `target` is `None`, it will compute
-    all the outputs.
-
-    The outputs are grouped in classes in the same order as they are initially
-    defined: if `n_outputs=10` and `n_classes=2`, then the first class is
-    composed of the outputs labeled `{0,1,2,3,4}` while the second class is
-    composed of `{5,6,7,8,9}`. If you need to change the classes, you have to
-    re-label your outputs.
-
-    .. versionadded:: 0.7.1
-
-    Parameters
-    ----------
-    x: tensor of shape (batch_size, number of features)
-        the minibatch input of the two-layer hierarchical softmax.
-    batch_size: int
-        the size of the minibatch input x.
-    n_outputs: int
-        the number of outputs.
-    n_classes: int
-        the number of classes of the two-layer hierarchical softmax. It
-        corresponds to the number of outputs of the first softmax. See note at
-        the end.
-    n_outputs_per_class: int
-        the number of outputs per class. See note at the end.
-    W1: tensor of shape (number of features of the input x, n_classes)
-        the weight matrix of the first softmax, which maps the input x to the
-        probabilities of the classes.
-    b1: tensor of shape (n_classes,)
-        the bias vector of the first softmax layer.
-    W2: tensor of shape (n_classes, number of features of the input x,
-            n_outputs_per_class)
-        the weight matrix of the second softmax, which maps the input x to
-        the probabilities of the outputs.
-    b2: tensor of shape (n_classes, n_outputs_per_class)
-        the bias vector of the second softmax layer.
-    target: tensor of shape either (batch_size,) or (batch_size, 1)
-        (optional, default None)
-        contains the indices of the targets for the minibatch
-        input x. For each input, the function computes the output for its
-        corresponding target. If target is None, then all the outputs are
-        computed for each input.
-
-    Returns
-    -------
-    tensor of shape (`batch_size`, `n_outputs`) or (`batch_size`, 1)
-        Output tensor of the two-layer hierarchical softmax for input `x`.
-        Depending on argument `target`, it can have two different shapes.
-        If `target` is not specified (`None`), then all the outputs are
-        computed and the returned tensor has shape (`batch_size`, `n_outputs`).
-        Otherwise, when `target` is specified, only the corresponding outputs
-        are computed and the returned tensor has thus shape (`batch_size`, 1).
-
-    Notes
-    -----
-    The product of `n_outputs_per_class` and `n_classes` has to be greater or
-    equal to `n_outputs`. If it is strictly greater, then the irrelevant
-    outputs will be ignored.
-    `n_outputs_per_class` and `n_classes` have to be the same as the
-    corresponding dimensions of the tensors of `W1`, `b1`, `W2` and `b2`.
-    The most computational efficient configuration is when
-    `n_outputs_per_class` and `n_classes` are equal to the square root of
-    `n_outputs`.
-
-    Examples
-    --------
-    The following example builds a simple hierarchical softmax layer.
-
-    >>> import numpy as np
-    >>> import pytensor
-    >>> import pytensor.tensor as at
-    >>> from pytensor.tensor.nnet import h_softmax
-    >>>
-    >>> # Parameters
-    >>> batch_size = 32
-    >>> n_outputs = 100
-    >>> dim_x = 10  # dimension of the input
-    >>> n_classes = int(np.ceil(np.sqrt(n_outputs)))
-    >>> n_outputs_per_class = n_classes
-    >>> output_size = n_outputs_per_class * n_outputs_per_class
-    >>>
-    >>> # First level of h_softmax
-    >>> floatX = pytensor.config.floatX
-    >>> W1 = pytensor.shared(
-    ...     np.random.normal(0, 0.001, (dim_x, n_classes)).astype(floatX))
-    >>> b1 = pytensor.shared(np.zeros((n_classes,), floatX))
-    >>>
-    >>> # Second level of h_softmax
-    >>> W2 = np.random.normal(0, 0.001,
-    ...     size=(n_classes, dim_x, n_outputs_per_class)).astype(floatX)
-    >>> W2 = pytensor.shared(W2)
-    >>> b2 = pytensor.shared(np.zeros((n_classes, n_outputs_per_class), floatX))
-    >>>
-    >>> # We can now build the graph to compute a loss function, typically the
-    >>> # negative log-likelihood:
-    >>>
-    >>> x = at.imatrix('x')
-    >>> target = at.imatrix('target')
-    >>>
-    >>> # This only computes the output corresponding to the target.
-    >>> # The complexity is O(n_classes + n_outputs_per_class).
-    >>> y_hat_tg = h_softmax(x, batch_size, output_size, n_classes,
-    ...                      n_outputs_per_class, W1, b1, W2, b2, target)
-    >>>
-    >>> negll = -at.mean(at.log(y_hat_tg))
-    >>>
-    >>> # We may need to compute all the outputs (at test time usually):
-    >>>
-    >>> # This computes all the outputs.
-    >>> # The complexity is O(n_classes * n_outputs_per_class).
-    >>> output = h_softmax(x, batch_size, output_size, n_classes,
-    ...                    n_outputs_per_class, W1, b1, W2, b2)
-
-
-    References
-    ----------
-    .. [1] J. Goodman, "Classes for Fast Maximum Entropy Training,"
-        ICASSP, 2001, <http://arxiv.org/abs/cs/0108006>`.
-    """
-
-    # First softmax that computes the probabilities of belonging to each class
-    class_probs = softmax(dot(x, W1) + b1)
-
-    if target is None:  # Computes the probabilities of all the outputs
-
-        # Second softmax that computes the output probabilities
-        activations = tensordot(x, W2, (1, 1)) + b2
-        output_probs = softmax(activations.reshape((-1, n_outputs_per_class)))
-        output_probs = output_probs.reshape((batch_size, n_classes, -1))
-        output_probs = class_probs.dimshuffle(0, 1, "x") * output_probs
-        output_probs = output_probs.reshape((batch_size, -1))
-        # output_probs.shape[1] is n_classes * n_outputs_per_class, which might
-        # be greater than n_outputs, so we ignore the potential irrelevant
-        # outputs with the next line:
-        output_probs = output_probs[:, :n_outputs]
-
-    else:  # Computes the probabilities of the outputs specified by the targets
-
-        target = target.flatten()
-
-        # Classes to which belong each target
-        target_classes = target // n_outputs_per_class
-
-        # Outputs to which belong each target inside a class
-        target_outputs_in_class = target % n_outputs_per_class
-
-        # Second softmax that computes the output probabilities
-        activations = sparse_block_dot(
-            W2.dimshuffle("x", 0, 1, 2),
-            x.dimshuffle(0, "x", 1),
-            at.zeros((batch_size, 1), dtype="int32"),
-            b2,
-            target_classes.dimshuffle(0, "x"),
-        )
-
-        output_probs = softmax(activations.dimshuffle(0, 2))
-        target_class_probs = class_probs[at.arange(batch_size), target_classes]
-        output_probs = output_probs[at.arange(batch_size), target_outputs_in_class]
-        output_probs = target_class_probs * output_probs
-
-    return output_probs
-
-
-def elu(x, alpha=1):
-    """
-    Compute the element-wise exponential linear activation function [2]_.
-
-    .. versionadded:: 0.8.0
-
-    Parameters
-    ----------
-    x : symbolic tensor
-        Tensor to compute the activation function for.
-    alpha : scalar
-
-
-    Returns
-    -------
-    symbolic tensor
-        Element-wise exponential linear activation function applied to `x`.
-
-    References
-    -----
-    .. [2] Djork-Arne Clevert,  Thomas Unterthiner, Sepp Hochreiter
-        "Fast and Accurate Deep Network Learning by
-        Exponential Linear Units (ELUs)" <http://arxiv.org/abs/1511.07289>`.
-    """
-    return at.switch(x > 0, x, alpha * expm1(x))
-
-
-def selu(x):
-    """Compute the element-wise Scaled Exponential Linear unit [3]_.
-
-    .. versionadded:: 0.9.0
-
-    Parameters
-    ----------
-    x : symbolic tensor
-        Tensor to compute the activation function for.
-
-    Returns
-    -------
-    symbolic tensor
-        Element-wise scaled exponential linear activation function applied to `x`.
-
-    References
-    ----------
-    .. [3] Klambauer G, Unterthiner T, Mayr A, Hochreiter S.
-        "Self-Normalizing Neural Networks" <https://arxiv.org/abs/1706.02515>
-    """
-    alpha = 1.6732632423543772848170429916717
-    scale = 1.0507009873554804934193349852946
-    return scale * elu(x, alpha)
-
-
-class ScalarSoftsign(UnaryScalarOp):
-    """
-    Softsign activation function
-    :math:`\\varphi(\\mathbf{x}) = \\frac{1}{1+|x|}`
-
-    """
-
-    @staticmethod
-    def static_impl(x):
-        return x / (1.0 + abs(x))
-
-    def impl(self, x):
-        return ScalarSoftsign.static_impl(x)
-
-    def grad(self, inp, grads):
-        (x,) = inp
-        (gz,) = grads
-        if "float" in x.type.dtype:
-            d = 1.0 + abs(x)
-            return [gz / (d * d)]
-        else:
-            return NotImplemented
-
-    def c_code(self, node, name, inp, out, sub):
-        (x,) = inp
-        (z,) = out
-        if node.inputs[0].type in [aes.float32, aes.float64]:
-            return f"{z} = {x} / (1.0+fabs({x}));"
-        raise NotImplementedError("only floating point x is implemented")
-
-
-scalar_softsign = ScalarSoftsign(aes.upgrade_to_float, name="scalar_softsign")
-softsign = Elemwise(scalar_softsign, name="softsign")
-
-
-def confusion_matrix(actual, pred):
-    """
-    Computes the confusion matrix of given vectors containing
-    actual observations and predicted observations.
-
-    Parameters
-    ----------
-    actual : 1-d tensor variable
-    pred : 1-d tensor variable
-
-    Returns
-    -------
-    conf_mat : Confusion matrix of actual and predictions observations as shown below.
-
-               | Predicted
-    ___________|___________
-       Actual  |
-               |
-
-    order : 1-d array of order of entries in rows and columns
-
-    Examples
-    --------
-    >>> import pytensor
-    >>> import pytensor.tensor as at
-    >>> from pytensor.tensor.nnet import confusion_matrix
-
-    >>> x = at.vector()
-    >>> y = at.vector()
-    >>> f = pytensor.function([x, y], confusion_matrix(x, y))
-    >>> y_true = [2, 0, 2, 2, 0, 1]
-    >>> y_pred = [0, 0, 2, 2, 0, 2]
-    >>> print(f(y_true, y_pred))
-    [array([[2, 0, 0],
-       [0, 0, 1],
-       [1, 0, 2]]), array([ 0.,  1.,  2.])]
-    """
-    if actual.ndim != 1:
-        raise ValueError("actual must be 1-d tensor variable")
-    if pred.ndim != 1:
-        raise ValueError("pred must be 1-d tensor variable")
-
-    order = Unique(False, False, False)(at.concatenate([actual, pred]))
-
-    colA = actual.dimshuffle(0, "x")
-    colP = pred.dimshuffle(0, "x")
-
-    oneHotA = eq(colA, order).astype("int64")
-    oneHotP = eq(colP, order).astype("int64")
-
-    conf_mat = dot(oneHotA.T, oneHotP)
-    return [conf_mat, order]
-
-
-DEPRECATED_NAMES = [
-    (
-        "softmax",
-        "`pytensor.tensor.nnet.basic.softmax` has been moved to `pytensor.tensor.special.softmax`.",
-        softmax,
-    ),
-    (
-        "logsoftmax",
-        "`pytensor.tensor.nnet.basic.logsoftmax` has been moved to `pytensor.tensor.special.log_softmax`.",
-        log_softmax,
-    ),
-]
-
-
-def __getattr__(name):
-    """Intercept module-level attribute access of deprecated symbols.
-
-    Adapted from https://stackoverflow.com/a/55139609/3006474.
-
-    """
-    from warnings import warn
-
-    for old_name, msg, old_object in DEPRECATED_NAMES:
-        if name == old_name:
-            warn(msg, DeprecationWarning, stacklevel=2)
-            return old_object
-
-    raise AttributeError(f"module {__name__} has no attribute {name}")
diff --git a/pytensor/tensor/nnet/batchnorm.py b/pytensor/tensor/nnet/batchnorm.py
deleted file mode 100644
index 1be97b776e..0000000000
--- a/pytensor/tensor/nnet/batchnorm.py
+++ /dev/null
@@ -1,923 +0,0 @@
-import numpy as np
-
-import pytensor
-from pytensor.configdefaults import config
-from pytensor.graph.basic import Apply
-from pytensor.graph.op import Op
-from pytensor.graph.rewriting.basic import copy_stack_trace, node_rewriter
-from pytensor.scalar import Composite, add, as_common_dtype, mul, sub, true_div
-from pytensor.tensor import basic as at
-from pytensor.tensor.basic import as_tensor_variable
-from pytensor.tensor.elemwise import Elemwise
-from pytensor.tensor.math import mean, prod, reciprocal, sqrt
-from pytensor.tensor.math import sum as at_sum
-from pytensor.tensor.rewriting.basic import register_specialize_device
-from pytensor.tensor.shape import specify_broadcastable
-from pytensor.tensor.type import TensorType
-
-
-class BNComposite(Composite):
-    init_param = ("dtype",)
-
-    @config.change_flags(compute_test_value="off")
-    def __init__(self, dtype):
-        self.dtype = dtype
-        x = pytensor.scalar.ScalarType(dtype=dtype).make_variable()
-        mean = pytensor.scalar.ScalarType(dtype=dtype).make_variable()
-        std = pytensor.scalar.ScalarType(dtype=dtype).make_variable()
-        gamma = pytensor.scalar.ScalarType(dtype=dtype).make_variable()
-        beta = pytensor.scalar.ScalarType(dtype=dtype).make_variable()
-        o = add(mul(true_div(sub(x, mean), std), gamma), beta)
-        inputs = [x, mean, std, gamma, beta]
-        outputs = [o]
-        super().__init__(inputs, outputs)
-
-    def grad(self, inps, grads):
-        x, mean, std, gamma, beta = inps
-        (top,) = grads
-        top_gamma = top * gamma
-        x_mean = x - mean
-        dx = top_gamma / std
-        dmean = -dx
-        dstd = -(top_gamma * x_mean) / (std * std)
-        dgamma = top * x_mean / std
-        return [dx, dmean, dstd, dgamma, top]
-
-
-def batch_normalization(inputs, gamma, beta, mean, std, mode="low_mem"):
-    """
-    This function will build the symbolic graph for applying batch normalization
-    to a set of activations.
-
-    .. versionadded:: 0.7.1
-
-    Parameters
-    ----------
-    inputs : symbolic tensor
-        Mini-batch of activations
-    gamma: symbolic tensor
-        BN scale parameter, must be of same dimensionality as
-        inputs and broadcastable against it
-    beta: symbolic tensor
-        BN shift parameter, must be of same dimensionality as
-        inputs and broadcastable against it
-    mean: symbolic tensor
-        inputs means, must be of same dimensionality as
-        inputs and broadcastable against it
-    std: symbolic tensor
-        inputs standard deviation, must be of same dimensionality as
-        inputs and broadcastable against it
-    mode: 'low_mem' or 'high_mem'
-        Specify which batch_normalization implementation that will be
-        used.
-        As no intermediate representations are stored for the back-propagation,
-        'low_mem' implementation lower the memory usage, however,
-        it is 5-10% slower than 'high_mem' implementation. Note that 5-10% computation
-        time difference compare the batch_normalization operation only, time difference
-        between implementation is likely to be less important on the full model fprop/bprop.
-    """
-    if mode == "low_mem":
-        elm_bn = Elemwise(scalar_op=BNComposite(dtype=inputs.dtype))
-        rval = elm_bn(inputs, mean, std, gamma, beta)
-    elif mode == "high_mem":
-        rval = (inputs - mean) * (gamma / std) + beta
-    else:
-        raise ValueError('mode must be either "low_mem", "high_mem"')
-    return rval
-
-
-def _prepare_batch_normalization_axes(axes, ndim):
-    if axes == "per-activation":
-        axes = (0,)
-    elif axes == "spatial":
-        axes = (0,) + tuple(range(2, ndim))
-    elif isinstance(axes, (tuple, list, np.ndarray)):
-        axes = tuple(int(a) for a in axes)
-    else:
-        raise ValueError(f"invalid axes: {axes}")
-    axes = tuple(sorted(axes))
-    if len(axes) == 0:
-        raise ValueError("there should be at least one normalization axis")
-    if min(axes) < 0 or max(axes) >= ndim:
-        raise ValueError(
-            f"axes should be less than ndim (<{int(ndim)}), but {axes} given"
-        )
-    non_bc_axes = tuple(i for i in range(ndim) if i not in axes)
-    return axes, non_bc_axes
-
-
-def batch_normalization_train(
-    inputs,
-    gamma,
-    beta,
-    axes="per-activation",
-    epsilon=1e-4,
-    running_average_factor=0.1,
-    running_mean=None,
-    running_var=None,
-):
-    """
-    Performs batch normalization of the given inputs, using the mean and
-    variance of the inputs.
-
-    Parameters
-    ----------
-    axes : 'per-activation', 'spatial' or a tuple of ints
-        The axes along which the input should be normalized. ``'per-activation'``
-        normalizes per activation and is equal to ``axes=(0,)``.
-        ``'spatial'`` shares normalization factors across spatial dimensions
-        (i.e., all dimensions past the second), which for 4D inputs would be
-        equal to ``axes=(0, 2, 3)``.
-    gamma : tensor
-        Learnable scale factors. The shape must match the shape of `inputs`,
-        except for the axes in `axes`. These axes should be set to 1 or be
-        skipped altogether (such that `gamma.ndim == inputs.ndim - len(axes)`).
-    beta : tensor
-        Learnable biases. Must match the tensor layout of `gamma`.
-    epsilon : float
-        Epsilon value used in the batch normalization formula. Minimum allowed
-        value is 1e-5 (imposed by cuDNN).
-    running_average_factor : float
-        Factor for updating the values or `running_mean` and `running_var`.
-        If the factor is close to one, the running averages will update quickly,
-        if the factor is close to zero it will update slowly.
-    running_mean : tensor or None
-        Previous value of the running mean. If this is given, the new value
-        ``running_mean * (1 - r_a_factor) + batch mean * r_a_factor``
-        will be returned as one of the outputs of this function.
-        `running_mean` and `running_var` should either both be given or
-        both be None. The shape should match that of `gamma` and `beta`.
-    running_var : tensor or None
-        Previous value of the running variance. If this is given, the new value
-        ``running_var * (1 - r_a_factor) + (m / (m - 1)) * batch var * r_a_factor``
-        will be returned as one of the outputs of this function,
-        where `m` is the product of lengths of the averaged-over dimensions.
-        `running_mean` and `running_var` should either both be given or
-        both be None. The shape should match that of `gamma` and `beta`.
-
-    Returns
-    -------
-    out : tensor
-        Batch-normalized inputs.
-    mean : tensor
-        Means of `inputs` across the normalization axes.
-    invstd : tensor
-        Inverse standard deviations of `inputs` across the normalization axes.
-    new_running_mean : tensor
-        New value of the running mean (only if both `running_mean` and
-        `running_var` were given).
-    new_running_var : tensor
-        New value of the running variance (only if both `running_var` and
-        `running_mean` were given).
-
-    Notes
-    -----
-    If per-activation or spatial normalization is selected, this operation
-    will use the cuDNN implementation. (This requires cuDNN 5 or newer.)
-
-    The returned values are equivalent to:
-
-    .. code-block:: python
-
-        # for per-activation normalization
-        axes = (0,)
-        # for spatial normalization
-        axes = (0,) + tuple(range(2, inputs.ndim))
-        mean = inputs.mean(axes, keepdims=True)
-        var = inputs.var(axes, keepdims=True)
-        invstd = at.reciprocal(at.sqrt(var + epsilon))
-        out = (inputs - mean) * gamma * invstd + beta
-
-        m = at.cast(ate.prod(inputs.shape) / at.prod(mean.shape), 'float32')
-        running_mean = running_mean * (1 - running_average_factor) + \\
-                       mean * running_average_factor
-        running_var = running_var * (1 - running_average_factor) + \\
-                      (m / (m - 1)) * var * running_average_factor
-    """
-    ndim = inputs.ndim
-    axes, non_bc_axes = _prepare_batch_normalization_axes(axes, ndim)
-
-    # have the parameter tensors been broadcasted yet?
-    if gamma.ndim == ndim:
-        params_ndim = ndim
-    else:
-        params_ndim = len(non_bc_axes)
-        params_dimshuffle_pattern = ["x"] * ndim
-        for i, axis in enumerate(non_bc_axes):
-            params_dimshuffle_pattern[axis] = i
-
-    if gamma.ndim != params_ndim or beta.ndim != params_ndim:
-        raise ValueError(
-            "gamma and beta dimensionality must match the "
-            "number of non-normalized axes, or have the "
-            "same number of dimensions as the inputs; "
-            f"got {int(gamma.ndim)} and {int(beta.ndim)} instead of {int(params_ndim)}"
-        )
-    if (running_mean is None) != (running_var is None):
-        raise ValueError(
-            "running_mean and running_var must either both be given or both be None"
-        )
-    if running_mean is not None and running_mean.ndim != params_ndim:
-        raise ValueError(
-            "running_mean must be of the same dimensionality "
-            f"as gamma and beta; got {int(running_mean.ndim)} instead of {int(params_ndim)}"
-        )
-    if running_var is not None and running_var.ndim != params_ndim:
-        raise ValueError(
-            "running_var must be of the same dimensionality "
-            f"as gamma and beta; got {int(running_var.ndim)} instead of {int(params_ndim)}"
-        )
-
-    # epsilon will be converted to floatX later. we need to check
-    # for rounding errors now, since numpy.float32(1e-5) < 1e-5.
-    epsilon = np.cast[config.floatX](epsilon)
-    if epsilon < 1e-5:
-        raise ValueError(f"epsilon must be at least 1e-5, got {epsilon}")
-
-    inputs = as_tensor_variable(inputs)
-    gamma = as_tensor_variable(gamma)
-    beta = as_tensor_variable(beta)
-
-    if params_ndim != ndim:
-        gamma = gamma.dimshuffle(params_dimshuffle_pattern)
-        beta = beta.dimshuffle(params_dimshuffle_pattern)
-    else:
-        gamma = specify_broadcastable(gamma, *axes)
-        beta = specify_broadcastable(beta, *axes)
-
-    batchnorm_op = AbstractBatchNormTrain(axes=axes)
-
-    if running_mean is not None and running_var is not None:
-        running_mean = as_tensor_variable(running_mean)
-        running_var = as_tensor_variable(running_var)
-        if params_ndim != ndim:
-            running_mean = running_mean.dimshuffle(params_dimshuffle_pattern)
-            running_var = running_var.dimshuffle(params_dimshuffle_pattern)
-        else:
-            running_mean = specify_broadcastable(running_mean, *axes)
-            running_var = specify_broadcastable(running_var, *axes)
-        out, mean, invstd, new_running_mean, new_running_var = batchnorm_op(
-            inputs,
-            gamma,
-            beta,
-            epsilon=epsilon,
-            running_average_factor=running_average_factor,
-            running_mean=running_mean,
-            running_var=running_var,
-        )
-        if new_running_mean.broadcastable != running_mean.broadcastable:
-            new_running_mean = specify_broadcastable(
-                new_running_mean,
-                *(ax for (ax, b) in enumerate(running_mean.type.broadcastable) if b),
-            )
-        if new_running_var.broadcastable != running_var.broadcastable:
-            new_running_var = specify_broadcastable(
-                new_running_var,
-                *(ax for (ax, b) in enumerate(running_var.type.broadcastable) if b),
-            )
-        results = (out, mean, invstd, new_running_mean, new_running_var)
-    else:
-        results = batchnorm_op(inputs, gamma, beta, epsilon=epsilon)
-
-    if params_ndim != ndim:
-        # remove the broadcasted dimensions (except from the output)
-        results = [results[0]] + [r.dimshuffle(non_bc_axes) for r in results[1:]]
-    return tuple(results)
-
-
-def batch_normalization_test(
-    inputs, gamma, beta, mean, var, axes="per-activation", epsilon=1e-4
-):
-    """
-    Performs batch normalization of the given inputs, using the given mean and
-    variance.
-
-    Parameters
-    ----------
-    axes : 'per-activation', 'spatial' or a tuple of ints
-        The axes along which the input should be normalized. ``'per-activation'``
-        normalizes per activation and is equal to ``axes=(0,)``.
-        ``'spatial'`` shares normalization factors across spatial dimensions
-        (i.e., all dimensions past the second), which for 4D inputs would be
-        equal to ``axes=(0, 2, 3)``.
-    gamma : tensor
-        Scale factors. The shape must match the shape of `inputs`,
-        except for the axes in `axes`. These axes should be set to 1 or be
-        skipped altogether (such that `gamma.ndim == inputs.ndim - len(axes)`).
-    beta : tensor
-        Biases. Must match the tensor layout of `gamma`.
-    mean : tensor
-        Means. Usually these are running averages computed during training.
-        Must match the tensor layout of `gamma`.
-    var : tensor
-        Variances. Usually these are running averages computed during training.
-        Must match the tensor layout of `gamma`.
-    epsilon : float
-        Epsilon value used in the batch normalization formula. Minimum allowed
-        value is 1e-5 (imposed by cuDNN).
-
-    Returns
-    -------
-    out : tensor
-        Batch-normalized inputs.
-
-    Notes
-    -----
-    If per-activation or spatial normalization is selected, this operation
-    will use the cuDNN implementation. (This requires cuDNN 5 or newer.)
-
-    The returned value is equivalent to:
-
-    .. code-block:: python
-
-        # for per-activation normalization
-        axes = (0,)
-        # for spatial normalization
-        axes = (0,) + tuple(range(2, inputs.ndim))
-        gamma, beta, mean, var = (at.specify_broadcastable(t, *axes)
-                                  for t in (gamma, beta, mean, var))
-        out = (inputs - mean) * gamma / at.sqrt(var + epsilon) + beta
-    """
-    ndim = inputs.ndim
-    axes, non_bc_axes = _prepare_batch_normalization_axes(axes, ndim)
-
-    # have the parameter tensors been broadcasted yet?
-    if gamma.ndim == ndim:
-        params_ndim = ndim
-    else:
-        params_ndim = len(non_bc_axes)
-        params_dimshuffle_pattern = ["x"] * ndim
-        for i, axis in enumerate(non_bc_axes):
-            params_dimshuffle_pattern[axis] = i
-
-    if gamma.ndim != params_ndim or beta.ndim != params_ndim:
-        raise ValueError(
-            "gamma and beta dimensionality must match the "
-            "number of non-normalized axes, or have the "
-            "same number of dimensions as the inputs; "
-            f"got {int(gamma.ndim)} and {int(beta.ndim)} instead of {int(params_ndim)}"
-        )
-    if mean.ndim != params_ndim or var.ndim != params_ndim:
-        raise ValueError(
-            "mean and var must be of the same dimensionality "
-            f"as gamma and beta; got {int(mean.ndim)} and {int(var.ndim)} instead of {int(params_ndim)}"
-        )
-
-    # epsilon will be converted to floatX later. we need to check
-    # for rounding errors now, since numpy.float32(1e-5) < 1e-5.
-    epsilon = np.cast[config.floatX](epsilon)
-    if epsilon < 1e-5:
-        raise ValueError(f"epsilon must be at least 1e-5, got {epsilon}")
-
-    gamma = as_tensor_variable(gamma)
-    beta = as_tensor_variable(beta)
-    mean = as_tensor_variable(mean)
-    var = as_tensor_variable(var)
-
-    if params_ndim != ndim:
-        gamma = gamma.dimshuffle(params_dimshuffle_pattern)
-        beta = beta.dimshuffle(params_dimshuffle_pattern)
-        mean = mean.dimshuffle(params_dimshuffle_pattern)
-        var = var.dimshuffle(params_dimshuffle_pattern)
-    else:
-        gamma = specify_broadcastable(gamma, *axes)
-        beta = specify_broadcastable(beta, *axes)
-        mean = specify_broadcastable(mean, *axes)
-        var = specify_broadcastable(var, *axes)
-
-    batchnorm_op = AbstractBatchNormInference(axes=axes)
-    return batchnorm_op(inputs, gamma, beta, mean, var, epsilon=epsilon)
-
-
-class AbstractBatchNormTrain(Op):
-    """
-    Abstract Op for Batch Normalization.
-
-    Parameters
-    ----------
-    axes : a tuple of ints
-        The axes along which the input should be normalized.
-    x : tensor
-        The input to be normalized along `axes`.
-    scale : tensor
-        `scale` should have the same number of dimensions as `x`.
-        All dimensions listed in `axes` should have length 1.
-    bias : tensor
-        `bias` should have the same number of dimensions as `x`.
-        All dimensions listed in `axes` should have length 1.
-    epsilon
-        Epsilon value used in the batch normalization formula. Minimum allowed
-        value is 1e-5 (imposed by cuDNN).
-    running_average_factor : float
-        Factor for updating the values or `running_mean` and `running_var`.
-        If the factor is close to one, the running averages will update quickly,
-        if the factor is close to zero it will update slowly.
-    running_mean : tensor or None
-        Previous value of the running mean. If this is given, the new value
-        ``running_mean * (1 - running_average_factor) + batch mean * running_average_factor``
-        will be returned as one of the outputs of this function.
-        `running_mean` and `running_var` should either both be given or
-        both be None.
-    running_var : tensor or None
-        Previous value of the running variance. If this is given, the new value
-        ``running_var * (1 - running_average_factor) + (m / (m - 1)) * batch var * running_average_factor``
-        will be returned as one of the outputs of this function,
-        where `m` is the product of lengths of the averaged-over dimensions.
-        `running_mean` and `running_var` should either both be given or
-        both be None.
-    """
-
-    __props__ = ("axes",)
-
-    def __init__(self, axes=(0,)):
-        assert isinstance(axes, (tuple, list))
-        assert len(axes) > 0
-        axes = tuple(int(a) for a in axes)
-        self.axes = axes
-
-    def infer_shape(self, fgraph, node, shape):
-        return [shape[0]] + [shape[1]] * (len(node.outputs) - 1)
-
-    def make_node(
-        self,
-        x,
-        scale,
-        bias,
-        epsilon=1e-4,
-        running_average_factor=0.1,
-        running_mean=None,
-        running_var=None,
-    ):
-        x = as_tensor_variable(x)
-        scale = as_tensor_variable(scale)
-        bias = as_tensor_variable(bias)
-        epsilon = as_tensor_variable(epsilon)
-        running_average_factor = as_tensor_variable(running_average_factor)
-        if running_mean is not None:
-            running_mean = as_tensor_variable(running_mean)
-        if running_var is not None:
-            running_var = as_tensor_variable(running_var)
-        assert x.ndim == scale.ndim == bias.ndim
-        assert (running_mean is None and running_var is None) or (
-            running_mean is not None and running_var is not None
-        )
-        assert running_mean is None or running_mean.ndim == x.ndim
-        assert running_var is None or running_var.ndim == x.ndim
-        # Upcast to common dtype on the non-scalar
-        # Keep as is dtype of scalar (epsilon and running_average_factor)
-        if running_mean:
-            x, scale, bias, running_mean, running_var = as_common_dtype(
-                x, scale, bias, running_mean, running_var
-            )
-        else:
-            x, scale, bias = as_common_dtype(x, scale, bias)
-        inputs = [x, scale, bias, epsilon, running_average_factor]
-        output_types = [x.type(), scale.type(), scale.type()]
-        if running_mean is not None and running_var is not None:
-            inputs.append(running_mean)
-            inputs.append(running_var)
-            output_types.append(scale.type())
-            output_types.append(scale.type())
-        return Apply(self, inputs, output_types)
-
-    def L_op(self, inputs, outputs, grads):
-        x, scale, bias, epsilon, running_average_factor = inputs[:5]
-        dy = grads[0]
-        _, x_mean, x_invstd = outputs[:3]
-        disconnected_outputs = [
-            pytensor.gradient.DisconnectedType()(),  # epsilon
-            pytensor.gradient.DisconnectedType()(),
-        ]  # running_average_factor
-        # Optional running_mean and running_var.
-        for i in range(5, len(inputs)):
-            disconnected_outputs.append(pytensor.gradient.DisconnectedType()())
-        return (
-            AbstractBatchNormTrainGrad(self.axes)(
-                x, dy, scale, x_mean, x_invstd, epsilon
-            )
-            + disconnected_outputs
-        )
-
-    def connection_pattern(self, node):
-        # Specify that epsilon and running_average_factor are not connected to outputs.
-        patterns = [
-            [True, True, True],  # x
-            [True, True, True],  # scale
-            [True, True, True],  # bias
-            [False, False, False],  # epsilon
-            [False, False, False],
-        ]  # running_average_factor
-        # Optional running_mean and running_var are only
-        # connected to their new values.
-        for i in range(5, len(node.inputs)):
-            patterns[0].append(True)
-            for pattern in patterns[1:]:
-                pattern.append(False)
-            patterns.append([False] * (3 + i - 5) + [True])
-        return patterns
-
-    def perform(self, node, inputs, output_storage):
-        x, scale, bias, epsilon, running_average_factor = inputs[:5]
-        axes = self.axes
-        if min(axes) < 0 or max(axes) >= x.ndim:
-            raise ValueError(
-                f"axes should be less than ndim (<{x.ndim}), but {axes} given"
-            )
-
-        mean = x.mean(axes, keepdims=True)
-        var = x.var(axes, keepdims=True)
-        invstd = 1.0 / np.sqrt(var + epsilon)
-        out = (x - mean) * (scale * invstd) + bias
-
-        output_storage[0][0] = out
-        output_storage[1][0] = mean
-        output_storage[2][0] = invstd
-
-        if len(inputs) > 5:
-            running_mean = inputs[5]
-            running_mean = (
-                running_mean * (1.0 - running_average_factor)
-                + mean * running_average_factor
-            )
-            output_storage[3][0] = running_mean
-        if len(inputs) > 6:
-            m = float(np.prod(x.shape) / np.prod(scale.shape))
-            running_var = inputs[6]
-            running_var = (
-                running_var * (1.0 - running_average_factor)
-                + (m / (m - 1)) * var * running_average_factor
-            )
-            output_storage[4][0] = running_var
-
-
-class AbstractBatchNormInference(Op):
-    """
-    Abstract Op for Batch Normalization.
-
-    Parameters
-    ----------
-    axes : a tuple of ints
-        The axes along which the input is normalized.
-    epsilon
-        Epsilon value used in the batch normalization formula. Minimum allowed
-        value is 1e-5 (imposed by cuDNN).
-    """
-
-    __props__ = ("axes",)
-
-    def __init__(self, axes=(0,)):
-        assert isinstance(axes, (tuple, list))
-        assert len(axes) > 0
-        axes = tuple(int(a) for a in axes)
-        self.axes = axes
-
-    def infer_shape(self, fgraph, node, shape):
-        return [shape[0]]
-
-    def make_node(
-        self, x, scale, bias, estimated_mean, estimated_variance, epsilon=1e-4
-    ):
-        x = as_tensor_variable(x)
-        scale = as_tensor_variable(scale)
-        bias = as_tensor_variable(bias)
-        estimated_mean = as_tensor_variable(estimated_mean)
-        estimated_variance = as_tensor_variable(estimated_variance)
-        epsilon = as_tensor_variable(epsilon)
-        # Upcast to common dtype on the non-scalar
-        # Keep as is dtype of scalar (epsilon)
-        x, scale, bias, estimated_mean, estimated_variance = as_common_dtype(
-            x, scale, bias, estimated_mean, estimated_variance
-        )
-        assert (
-            x.ndim
-            == scale.ndim
-            == bias.ndim
-            == estimated_mean.ndim
-            == estimated_variance.ndim
-        )
-
-        return Apply(
-            self,
-            [x, scale, bias, estimated_mean, estimated_variance, epsilon],
-            [x.type()],
-        )
-
-    def grad(self, inputs, grads):
-        x, scale, bias, est_mean, est_var, epsilon = inputs
-        dy = grads[0]
-        axes = self.axes
-        if min(axes) < 0 or max(axes) >= x.ndim:
-            raise ValueError(
-                f"axes should be less than ndim (<{x.ndim}), but {axes} given"
-            )
-
-        scale, bias, est_mean, est_var = (
-            specify_broadcastable(t, *axes) for t in (scale, bias, est_mean, est_var)
-        )
-
-        # define helper expressions
-        est_var_eps = est_var + epsilon
-        est_std = sqrt(est_var_eps)
-        two = at.constant(2.0)
-
-        # define and return gradients
-        dx = dy * (scale / est_std)
-        dscale = (dy * (x - est_mean)).sum(axes, keepdims=True) / est_std
-        dbias = dy.sum(axes, keepdims=True)
-        dmean = -dy.sum(axes, keepdims=True) * (scale / est_std)
-        dvar = -(dy * (x - est_mean)).sum(axes, keepdims=True) * (
-            scale / (two * est_var_eps * est_std)
-        )
-        return [dx, dscale, dbias, dmean, dvar, pytensor.gradient.DisconnectedType()()]
-
-    def connection_pattern(self, node):
-        # Specify that epsilon is not connected to outputs.
-        return [[True], [True], [True], [True], [True], [False]]
-
-    def perform(self, node, inputs, output_storage):
-        x, scale, bias, estimated_mean, estimated_variance, epsilon = inputs
-        out = (x - estimated_mean) * (
-            scale / np.sqrt(estimated_variance + epsilon)
-        ) + bias
-        output_storage[0][0] = out
-
-
-class AbstractBatchNormTrainGrad(Op):
-    __props__ = ("axes",)
-
-    def __init__(self, axes=(0,)):
-        assert isinstance(axes, (tuple, list))
-        assert len(axes) > 0
-        axes = tuple(int(a) for a in axes)
-        self.axes = axes
-
-    def make_node(self, x, dy, scale, x_mean, x_invstd, epsilon=1e-4):
-        x = as_tensor_variable(x)
-        dy = as_tensor_variable(dy)
-        scale = as_tensor_variable(scale)
-        x_mean = as_tensor_variable(x_mean)
-        x_invstd = as_tensor_variable(x_invstd)
-        epsilon = as_tensor_variable(epsilon)
-
-        # Upcast to common dtype on the non-scalar
-        # Keep as is dtype of scalar (epsilon)
-        x, dy, scale, x_mean, x_invstd = as_common_dtype(x, dy, scale, x_mean, x_invstd)
-        assert x.ndim == dy.ndim == scale.ndim == x_mean.ndim == x_invstd.ndim
-        return Apply(
-            self,
-            [x, dy, scale, x_mean, x_invstd, epsilon],
-            [x.type(), scale.type(), scale.type()],
-        )
-
-    def grad(self, inp, grads):
-        x, dy, scale, x_mean, x_invstd, epsilon = inp
-        ddinputs, ddscale, ddbias = grads
-
-        x_diff = x - x_mean
-        mean_dy_x_diff = mean(dy * x_diff, axis=self.axes, keepdims=True)
-
-        # compute gradients given each of the output gradients
-        g_wrt_x = 0
-        g_wrt_dy = 0
-        g_wrt_scale = 0
-        g_wrt_x_mean = 0
-        g_wrt_x_invstd = 0
-
-        if not isinstance(ddinputs.type, pytensor.gradient.DisconnectedType):
-            ccc = scale * (ddinputs - mean(ddinputs, axis=self.axes, keepdims=True))
-            ddd = (x_invstd**3) * (
-                ccc * mean(dy * x_diff, axis=self.axes, keepdims=True)
-                + dy * mean(ccc * x_diff, axis=self.axes, keepdims=True)
-            )
-
-            g_wrt_x = g_wrt_x - ddd
-            g_wrt_dy = g_wrt_dy + (
-                (ccc * x_invstd)
-                - (
-                    (x_invstd**3)
-                    * x_diff
-                    * mean(ccc * x_diff, axis=self.axes, keepdims=True)
-                )
-            )
-
-            eee = (dy * x_invstd) - ((x_invstd**3) * x_diff * mean_dy_x_diff)
-            g_wrt_scale = g_wrt_scale + at_sum(
-                ddinputs * (eee - mean(eee, axis=self.axes, keepdims=True)),
-                axis=self.axes,
-                keepdims=True,
-            )
-
-            g_wrt_x_mean = g_wrt_x_mean + at_sum(ddd, axis=self.axes, keepdims=True)
-            g_wrt_x_invstd = g_wrt_x_invstd + at_sum(
-                ccc * (dy - 3 * (x_invstd**2) * x_diff * mean_dy_x_diff),
-                axis=self.axes,
-                keepdims=True,
-            )
-
-        if not isinstance(ddscale.type, pytensor.gradient.DisconnectedType):
-            g_wrt_x = g_wrt_x + (x_invstd * ddscale * dy)
-            g_wrt_dy = g_wrt_dy + (x_invstd * ddscale * x_diff)
-            g_wrt_x_mean = g_wrt_x_mean - (
-                x_invstd * ddscale * at_sum(dy, axis=self.axes, keepdims=True)
-            )
-            g_wrt_x_invstd = g_wrt_x_invstd + (
-                ddscale * at_sum(dy * x_diff, axis=self.axes, keepdims=True)
-            )
-
-        if not isinstance(ddbias.type, pytensor.gradient.DisconnectedType):
-            g_wrt_dy = g_wrt_dy + at.fill(dy, ddbias)
-
-        # depending on which output gradients are given,
-        # some inputs should be disconnected
-        results = [
-            g_wrt_x,
-            g_wrt_dy,
-            g_wrt_scale,
-            g_wrt_x_mean,
-            g_wrt_x_invstd,
-            pytensor.gradient.DisconnectedType()(),
-        ]
-        return [
-            pytensor.gradient.DisconnectedType()()
-            if (isinstance(r, int) and r == 0)
-            else r
-            for r in results
-        ]
-
-    def connection_pattern(self, node):
-        return [
-            [True, True, False],  # x
-            [True, True, True],  # dy
-            [True, False, False],  # scale
-            [True, True, False],  # x_mean
-            [True, True, False],  # x_invstd
-            [False, False, False],
-        ]  # epsilon
-
-    def infer_shape(self, fgraph, node, shape):
-        return [shape[0], shape[2], shape[2]]
-
-    def perform(self, node, inputs, output_storage):
-        x, dy, scale, x_mean, x_invstd, epsilon = inputs
-        axes = self.axes
-        if min(axes) < 0 or max(axes) >= x.ndim:
-            raise ValueError(
-                f"axes should be less than ndim (<{x.ndim}), but {axes} given"
-            )
-
-        x_diff = x - x_mean
-        mean_dy_x_diff = np.mean(dy * x_diff, axis=axes, keepdims=True)
-        c = (dy * x_invstd) - (x_diff * mean_dy_x_diff * (x_invstd**3))
-
-        g_wrt_inputs = scale * (c - np.mean(c, axis=axes, keepdims=True))
-        g_wrt_scale = np.sum(dy * x_invstd * x_diff, axis=axes, keepdims=True)
-        g_wrt_bias = np.sum(dy, axis=axes, keepdims=True)
-
-        output_storage[0][0] = g_wrt_inputs
-        output_storage[1][0] = g_wrt_scale
-        output_storage[2][0] = g_wrt_bias
-
-
-@node_rewriter([AbstractBatchNormTrain])
-def local_abstract_batch_norm_train(fgraph, node):
-    if not isinstance(node.op, AbstractBatchNormTrain):
-        return None
-
-    x, scale, bias, epsilon, running_average_factor = node.inputs[:5]
-    axes = node.op.axes
-    if min(axes) < 0 or max(axes) > x.ndim:
-        return None
-    if (
-        not isinstance(x.type, TensorType)
-        or not isinstance(scale.type, TensorType)
-        or not isinstance(bias.type, TensorType)
-        or not isinstance(epsilon.type, TensorType)
-        or not isinstance(running_average_factor.type, TensorType)
-    ):
-        return None
-    # optional running_mean and running_var
-    if len(node.inputs) > 5 and not isinstance(node.inputs[5].type, TensorType):
-        return None
-    if len(node.inputs) > 6 and not isinstance(node.inputs[6].type, TensorType):
-        return None
-
-    mean = x.mean(axes, keepdims=True)
-    var = x.var(axes, keepdims=True)
-    # The epsilon should not upcast the dtype.
-    if var.dtype == "float32" and epsilon.dtype == "float64":
-        epsilon = epsilon.astype("float32")
-    invstd = reciprocal(sqrt(var + epsilon))
-    out = (x - mean) * (scale * invstd) + bias
-    results = [out, mean, invstd]
-
-    if len(node.inputs) > 5:
-        running_mean = node.inputs[5]
-        running_mean = (
-            running_mean * (1.0 - running_average_factor)
-            + mean * running_average_factor
-        )
-        results.append(running_mean)
-    if len(node.inputs) > 6:
-        m = at.cast(prod(x.shape) / prod(scale.shape), config.floatX)
-        running_var = node.inputs[6]
-        running_var = (
-            running_var * (1.0 - running_average_factor)
-            + (m / (m - 1)) * var * running_average_factor
-        )
-        results.append(running_var)
-
-    for var in pytensor.graph.basic.vars_between(node.inputs, results):
-        if var not in node.inputs:
-            copy_stack_trace(node.outputs[0], var)
-    return results
-
-
-@node_rewriter([AbstractBatchNormTrainGrad])
-def local_abstract_batch_norm_train_grad(fgraph, node):
-    if not isinstance(node.op, AbstractBatchNormTrainGrad):
-        return None
-
-    x, dy, scale, x_mean, x_invstd, epsilon = node.inputs
-    axes = node.op.axes
-    if min(axes) < 0 or max(axes) > x.ndim:
-        return None
-    if (
-        not isinstance(x.type, TensorType)
-        or not isinstance(dy.type, TensorType)
-        or not isinstance(scale.type, TensorType)
-        or not isinstance(x_mean.type, TensorType)
-        or not isinstance(x_invstd.type, TensorType)
-        or not isinstance(epsilon.type, TensorType)
-    ):
-        return None
-
-    x_diff = x - x_mean
-    mean_dy_x_diff = mean(dy * x_diff, axis=axes, keepdims=True)
-    c = (dy * x_invstd) - x_diff * (mean_dy_x_diff * (x_invstd**3))
-
-    g_wrt_inputs = scale * (c - mean(c, axis=axes, keepdims=True))
-    g_wrt_scale = at_sum(dy * x_invstd * x_diff, axis=axes, keepdims=True)
-    g_wrt_bias = at_sum(dy, axis=axes, keepdims=True)
-    results = [g_wrt_inputs, g_wrt_scale, g_wrt_bias]
-
-    for var in pytensor.graph.basic.vars_between(node.inputs, results):
-        if var not in node.inputs:
-            copy_stack_trace(node.outputs[0], var)
-    return results
-
-
-@node_rewriter([AbstractBatchNormInference])
-def local_abstract_batch_norm_inference(fgraph, node):
-    if not isinstance(node.op, AbstractBatchNormInference):
-        return None
-
-    x, scale, bias, estimated_mean, estimated_variance, epsilon = node.inputs
-
-    if (
-        not isinstance(x.type, TensorType)
-        or not isinstance(scale.type, TensorType)
-        or not isinstance(bias.type, TensorType)
-        or not isinstance(estimated_mean.type, TensorType)
-        or not isinstance(estimated_variance.type, TensorType)
-        or not isinstance(epsilon.type, TensorType)
-    ):
-        return None
-
-    # The epsilon should not upcast the dtype.
-    if estimated_variance.dtype == "float32" and epsilon.dtype == "float64":
-        epsilon = epsilon.astype("float32")
-
-    result = (x - estimated_mean) * (scale / sqrt(estimated_variance + epsilon)) + bias
-
-    for var in pytensor.graph.basic.vars_between(node.inputs, [result]):
-        if var not in node.inputs:
-            copy_stack_trace(node.outputs[0], var)
-    return [result]
-
-
-# Register Cpu Optimization
-bn_groupopt = pytensor.graph.rewriting.db.LocalGroupDB()
-bn_groupopt.__name__ = "batchnorm_opts"
-register_specialize_device(bn_groupopt, "fast_compile", "fast_run")
-
-bn_groupopt.register(
-    "local_abstract_batch_norm_train",
-    local_abstract_batch_norm_train,
-    "fast_compile",
-    "fast_run",
-    position=30,
-)
-bn_groupopt.register(
-    "local_abstract_batch_norm_train_grad",
-    local_abstract_batch_norm_train_grad,
-    "fast_compile",
-    "fast_run",
-    position=30,
-)
-bn_groupopt.register(
-    "local_abstract_batch_norm_inference",
-    local_abstract_batch_norm_inference,
-    "fast_compile",
-    "fast_run",
-    position=30,
-)
diff --git a/pytensor/tensor/nnet/blocksparse.py b/pytensor/tensor/nnet/blocksparse.py
deleted file mode 100644
index c0ed1eec83..0000000000
--- a/pytensor/tensor/nnet/blocksparse.py
+++ /dev/null
@@ -1,272 +0,0 @@
-from typing import List
-
-import numpy as np
-
-import pytensor
-from pytensor.gradient import grad_undefined
-from pytensor.graph.basic import Apply
-from pytensor.graph.op import Op
-from pytensor.tensor.type import discrete_dtypes
-
-
-class SparseBlockGemv(Op):
-    """
-    This op computes the dot product of specified pieces of vectors
-    and matrices, returning pieces of vectors::
-
-        for b in range(batch_size):
-            for j in range(o.shape[1]):
-                for i in range(h.shape[1]):
-                    o[b, j, :] += numpy.dot(h[b, i], W[iIdx[b, i], oIdx[b, j]])
-
-    where b, h, W, o iIdx, oIdx are defined in the docstring of make_node.
-
-    .. image:: ../../../images/blocksparse.png
-        :scale: 50 %
-
-    """
-
-    __props__ = ("inplace",)
-
-    registered_opts: List = []
-
-    def __init__(self, inplace=False):
-        self.inplace = inplace
-        if self.inplace:
-            self.destroy_map = {0: [0]}
-
-    def make_node(self, o, W, h, inputIdx, outputIdx):
-        """
-        Compute the dot product of the specified pieces of vectors
-        and matrices.
-
-        The parameter types are actually their expected shapes
-        relative to each other.
-
-        Parameters
-        ----------
-        o : batch, oWin, oSize
-            output vector
-        W : iBlocks, oBlocks, iSize, oSize
-            weight matrix
-        h : batch, iWin, iSize
-            input from lower layer (sparse)
-        inputIdx : batch, iWin
-            indexes of the input blocks
-        outputIdx : batch, oWin
-            indexes of the output blocks
-
-        Returns
-        -------
-        (batch, oWin, oSize)
-            dot(W[i, j], h[i]) + o[j]
-
-        Notes
-        -----
-        - `batch` is the number of examples in a minibatch (batch size).
-        - `iBlocks` is the total number of blocks in the input (from lower
-            layer).
-        - `iSize` is the size of each of these input blocks.
-        - `iWin` is the number of blocks that will be used as inputs. Which
-           blocks will be used is specified in `inputIdx`.
-        - `oBlocks` is the number or possible output blocks.
-        - `oSize` is the size of each of these output blocks.
-        - `oWin` is the number of output blocks that will actually be computed.
-            Which blocks will be computed is specified in `outputIdx`.
-
-        """
-        o = pytensor.tensor.as_tensor_variable(o)
-        W = pytensor.tensor.as_tensor_variable(W)
-        h = pytensor.tensor.as_tensor_variable(h)
-        inputIdx = pytensor.tensor.as_tensor_variable(inputIdx)
-        outputIdx = pytensor.tensor.as_tensor_variable(outputIdx)
-
-        if o.ndim != 3:
-            raise TypeError("The output o must be a 2D tensor")
-        if W.ndim != 4:
-            raise TypeError("The weight matrix W must be a 4D tensor")
-        if h.ndim != 3:
-            raise TypeError("The input h must be a 3D tensor")
-        if inputIdx.ndim != 2:
-            raise TypeError("The input indices inputIdx must be a 2D tensor")
-        if outputIdx.ndim != 2:
-            raise TypeError("The output indices outputIdx must be a 2D tensor")
-
-        assert inputIdx.type.dtype in discrete_dtypes
-        assert outputIdx.type.dtype in discrete_dtypes
-
-        return Apply(self, [o, W, h, inputIdx, outputIdx], [o.type()])
-
-    def perform(self, node, inp, out_):
-        o, W, h, iIdx, oIdx = inp[:5]
-
-        if not self.inplace:
-            o = o.copy()
-
-        for b in range(o.shape[0]):
-            for j in range(o.shape[1]):
-                outputIdx = oIdx[b, j]
-                for i in range(h.shape[1]):
-                    inputIdx = iIdx[b, i]
-                    w = W[inputIdx, outputIdx]
-                    o[b, j, :] += np.dot(h[b, i], w)
-        out_[0][0] = o
-
-    def infer_shape(self, fgraph, node, input_shapes):
-        return [input_shapes[0]]
-
-    def grad(self, inputs, grads):
-        o, W, h, inputIdx, outputIdx = inputs
-        go = grads[0]
-
-        outer_fun = SparseBlockOuter(self.inplace)
-        gemv_fun = SparseBlockGemv(self.inplace)
-
-        Wgrad = outer_fun(W.zeros_like(), h, go, inputIdx, outputIdx)
-        hgrad = gemv_fun(
-            h.zeros_like(), W.dimshuffle((1, 0, 3, 2)), go, outputIdx, inputIdx
-        )
-        return [
-            go,
-            Wgrad,
-            hgrad,
-            grad_undefined(self, 3, inputIdx, "grad of inputIdx makes no sense"),
-            grad_undefined(self, 4, outputIdx, "grad of outputIdx makes no sense"),
-        ]
-
-
-class SparseBlockOuter(Op):
-    """
-    This computes the outer product of two sets of pieces of vectors
-    updating a full matrix with the results::
-
-        for b in range(batch_size):
-            o[xIdx[b, i], yIdx[b, j]] += (alpha * outer(x[b, i], y[b, j]))
-
-    This op is involved in the gradient of SparseBlockGemv.
-
-    """
-
-    __props__ = ("inplace",)
-
-    registered_opts: List = []
-
-    def __init__(self, inplace=False):
-        self.inplace = inplace
-        if self.inplace:
-            self.destroy_map = {0: [0]}
-
-    def make_node(self, o, x, y, xIdx, yIdx, alpha=None):
-        """
-        Compute the dot product of the specified pieces of vectors
-        and matrices.
-
-        The parameter types are actually their expected shapes
-        relative to each other.
-
-        Parameters
-        ----------
-        o : xBlocks, yBlocks, xSize, ySize
-        x : batch, xWin, xSize
-        y : batch, yWin, ySize
-        xIdx : batch, iWin
-            indexes of the x blocks
-        yIdx : batch, oWin
-            indexes of the y blocks
-
-        Returns
-        -------
-        (xBlocks, yBlocks, xSize, ySize)
-            outer(x[i], y[j]) + o[i, j]
-
-        Notes
-        -----
-        - `batch` is the number of examples in a minibatch (batch size).
-        - `xBlocks` is the total number of blocks in x.
-        - `xSize` is the size of each of these x blocks.
-        - `xWin` is the number of blocks that will be used as x. Which blocks
-          will be used is specified in `xIdx`.
-        - `yBlocks` is the number or possible y blocks.
-        - `ySize` is the size of each of these y blocks.
-        - `yWin` is the number of y blocks that will actually be computed.
-          Which blocks will be computed is specified in `yIdx`.
-
-        """
-        one = pytensor.tensor.constant(np.asarray(1.0, dtype="float32"))
-        o = pytensor.tensor.as_tensor_variable(o)
-        x = pytensor.tensor.as_tensor_variable(x)
-        y = pytensor.tensor.as_tensor_variable(y)
-
-        if alpha is None:
-            alpha = one
-
-        return Apply(self, [o, x, y, xIdx, yIdx, alpha], [o.type()])
-
-    def infer_shape(self, fgraph, node, input_shapes):
-        return [input_shapes[0]]
-
-    def perform(self, node, inp, out_):
-        o, x, y, xIdx, yIdx, alpha = inp[:6]
-
-        if not self.inplace:
-            o = o.copy()
-
-        for b in range(x.shape[0]):
-            for i in range(xIdx.shape[1]):
-                for j in range(yIdx.shape[1]):
-                    o[xIdx[b, i], yIdx[b, j]] += np.outer(x[b, i], y[b, j, :])
-        out_[0][0] = o
-
-
-sparse_block_gemv = SparseBlockGemv(False)
-sparse_block_gemv_inplace = SparseBlockGemv(True)
-sparse_block_outer = SparseBlockOuter(False)
-sparse_block_outer_inplace = SparseBlockOuter(True)
-
-
-def sparse_block_dot(W, h, inputIdx, b, outputIdx):
-    """
-    Compute the dot product (plus bias) of the specified pieces of vectors
-    and matrices. See SparseBlockGemv to get more information.
-
-    The parameter types are actually their expected shapes relative to
-    each other.
-
-    Parameters
-    ----------
-    W : iBlocks, oBlocks, iSize, oSize
-        weight matrix
-    h : batch, iWin, iSize
-        input from lower layer (sparse)
-    inputIdx : batch, iWin
-        indexes of the input blocks
-    b : oBlocks, oSize
-        bias vector
-    outputIdx : batch, oWin
-        indexes of the output blocks
-
-    Returns
-    -------
-    (batch, oWin, oSize)
-        dot(W[i, j], h[i]) + b[j] but b[j] is only added once
-
-    Notes
-    -----
-    - `batch` is the number of examples in a minibatch (batch size).
-    - `iBlocks` is the total number of blocks in the input (from lower layer).
-    - `iSize` is the size of each of these input blocks.
-    - `iWin` is the number of blocks that will be used as inputs. Which blocks
-       will be used is specified in `inputIdx`.
-    - `oBlocks` is the number or possible output blocks.
-    - `oSize` is the size of each of these output blocks.
-    - `oWin` is the number of output blocks that will actually be computed.
-       Which blocks will be computed is specified in `outputIdx`.
-
-    """
-    assert inputIdx.ndim == h.ndim - 1
-    assert outputIdx.ndim == inputIdx.ndim
-    if h.ndim == 2:
-        h = h.dimshuffle("x", 0, 1)
-        inputIdx = inputIdx.dimshuffle("x", 0)
-        outputIdx = outputIdx.dimshuffle("x", 0)
-    return SparseBlockGemv()(b.take(outputIdx, axis=0), W, h, inputIdx, outputIdx)
diff --git a/pytensor/tensor/nnet/c_code/ctc_wrapper.c b/pytensor/tensor/nnet/c_code/ctc_wrapper.c
deleted file mode 100644
index 5a51398f53..0000000000
--- a/pytensor/tensor/nnet/c_code/ctc_wrapper.c
+++ /dev/null
@@ -1,251 +0,0 @@
-#section support_code
-
-typedef struct ctc_context {
-    struct ctcOptions options;
-    void * workspace;
-    int * input_lengths;
-    int * flat_labels;
-    int * label_lengths;
-} ctc_context_t;
-
-void ctc_context_init(ctc_context_t * context)
-{
-    struct ctcOptions * options = &(context->options);
-    memset(options, 0, sizeof(struct ctcOptions));
-    options->loc = CTC_CPU;
-#if defined(_OPENMP)
-    options->num_threads = omp_get_num_threads();
-#else
-    options->num_threads = 1;
-#endif
-    context->workspace = NULL;
-    context->input_lengths = NULL;
-    context->flat_labels = NULL;
-    context->label_lengths = NULL;
-}
-
-void ctc_context_destroy(ctc_context_t * context)
-{
-    free( context->workspace );
-
-    free( context->input_lengths );
-
-    free( context->flat_labels );
-
-    free( context->label_lengths );
-}
-
-int ctc_check_result(ctcStatus_t retcode, const char * msg)
-{
-    if( CTC_STATUS_SUCCESS != retcode )
-    {
-        // Get error message from underlying library
-        const char * ctc_msg = ctcGetStatusString( retcode );
-
-        PyErr_Format( PyExc_RuntimeError,
-                      "ConnectionistTemporalClassification: %s CTC error: %s",
-                      msg,
-                      ctc_msg );
-        return 1;
-    }
-    return 0;
-}
-
-void create_contiguous_input_lengths( PyArrayObject * input_lengths_arr,
-    int ** input_lengths )
-{
-    npy_int num_elements = PyArray_DIMS( input_lengths_arr )[0];
-
-    *input_lengths = (int *) calloc( num_elements, sizeof(int) );
-
-    if ( NULL == (*input_lengths) )
-        return;
-
-    for( npy_int elem_idx = 0; elem_idx < num_elements; ++elem_idx )
-    {
-        (*input_lengths)[elem_idx] = *( (npy_int *) PyArray_GETPTR1( input_lengths_arr, elem_idx ) );
-    }
-}
-
-void create_flat_labels( PyArrayObject * label_matrix, int ** flat_labels,
-    int ** label_lengths )
-{
-    npy_int rows = PyArray_DIMS( label_matrix )[0];
-    npy_int cols = PyArray_DIMS( label_matrix )[1];
-
-    *flat_labels = (int *) calloc( rows * cols, sizeof(int) );
-    if ( NULL == (*flat_labels) )
-        return;
-
-    *label_lengths = (int *) calloc( rows, sizeof(int) );
-    if ( NULL == (*label_lengths) )
-    {
-        free( *flat_labels );
-        *flat_labels = NULL;
-        return;
-    }
-
-    npy_int label_index = 0;
-    for( npy_int row_idx = 0; row_idx < rows; ++row_idx )
-    {
-        npy_int label_length = 0;
-        for( npy_int col_idx = 0; col_idx < cols; ++col_idx )
-        {
-            npy_int label = *( (npy_int *) PyArray_GETPTR2( label_matrix, row_idx, col_idx ) );
-            if ( label >= 0 )  // negative values are assumed to be padding
-            {
-                (*flat_labels)[ label_index++ ] = label;
-                ++label_length;
-            }
-        }
-        (*label_lengths)[ row_idx ] = label_length;
-    }
-}
-
-#section support_code_apply
-
-int APPLY_SPECIFIC(ctc_cost_cpu)(PyArrayObject *  in_activations,
-                                 PyArrayObject *  in_labels,
-                                 PyArrayObject *  in_input_lengths,
-                                 PyArrayObject ** out_costs,
-                                 PyArrayObject ** out_gradients)
-{
-    ctc_context_t ctc_object;
-    ctc_context_t * context = &ctc_object;
-    ctc_context_init( context );
-
-    if ( !PyArray_IS_C_CONTIGUOUS( in_activations ) )
-    {
-        PyErr_SetString( PyExc_RuntimeError,
-            "ConnectionistTemporalClassification: activations array must be C-contiguous." );
-        return 1;
-    }
-
-    npy_float32 * activations = (npy_float32 *) PyArray_DATA( in_activations );
-
-    create_contiguous_input_lengths( in_input_lengths, &(context->input_lengths) );
-
-    if ( NULL == context->input_lengths )
-    {
-        // Destroy previous CTC context before returning exception
-        ctc_context_destroy( context );
-
-        PyErr_Format( PyExc_MemoryError,
-            "ConnectionistTemporalClassification: Could not allocate memory for input lengths" );
-        return 1;
-    }
-
-    // flatten labels to conform with library memory layout
-    create_flat_labels( in_labels, &(context->flat_labels), &(context->label_lengths) );
-
-    if ( ( NULL == context->label_lengths ) || ( NULL == context->flat_labels ) )
-    {
-        // Destroy previous CTC context before returning exception
-        ctc_context_destroy( context );
-
-        PyErr_Format( PyExc_MemoryError,
-            "ConnectionistTemporalClassification: Could not allocate memory for labels and their lengths" );
-        return 1;
-    }
-
-    npy_int minibatch_size = PyArray_DIMS( in_activations )[1];
-    npy_int alphabet_size = PyArray_DIMS( in_activations )[2];
-
-    npy_float32 * costs = NULL;
-    npy_intp cost_size = minibatch_size;
-
-    if ( (*out_costs) == NULL ||                       // Symbolic variable has no memory backing
-         PyArray_NDIM( *out_costs ) != 1 ||            // or, matrix has the wrong size
-         PyArray_DIMS( *out_costs )[0] != cost_size )
-    {
-        Py_XDECREF( *out_costs );
-        // Allocate new matrix
-        *out_costs = (PyArrayObject *) PyArray_ZEROS( 1, &cost_size, NPY_FLOAT32, 0 );
-
-        if ( NULL == (*out_costs) )
-        {
-            // Destroy previous CTC context before returning exception
-            ctc_context_destroy( context );
-
-            PyErr_Format( PyExc_MemoryError,
-                "ConnectionistTemporalClassification: Could not allocate memory for CTC costs" );
-            return 1;
-        }
-    }
-
-    costs = (npy_float32 *) PyArray_DATA( *out_costs );
-
-    npy_float32 * gradients = NULL;
-
-    if ( NULL != out_gradients )  // If gradient computation is not disabled
-    {
-        if ( NULL == (*out_gradients) ||  // Symbolic variable has no real backing
-            PyArray_NDIM( *out_gradients ) != 3 ||
-            PyArray_DIMS( *out_gradients )[0] != PyArray_DIMS( in_activations )[0] ||
-            PyArray_DIMS( *out_gradients )[1] != PyArray_DIMS( in_activations )[1] ||
-            PyArray_DIMS( *out_gradients )[2] != PyArray_DIMS( in_activations )[2] )
-        {
-            // Existing matrix is the wrong size. Make a new one.
-            // Decrement ref counter to existing array
-            Py_XDECREF( *out_gradients );
-            // Allocate new array
-            *out_gradients = (PyArrayObject *) PyArray_ZEROS(3, PyArray_DIMS( in_activations ),
-                NPY_FLOAT32, 0);
-
-            if ( NULL == (*out_gradients) )
-            {
-                // Destroy previous CTC context before returning exception
-                ctc_context_destroy( context );
-
-                PyErr_Format( PyExc_MemoryError,
-                    "ConnectionistTemporalClassification: Could not allocate memory for CTC gradients!" );
-                return 1;
-            }
-        }
-        gradients = (npy_float32 *) PyArray_DATA( *out_gradients );
-    }
-
-    size_t cpu_workspace_size;
-    int ctc_error;
-
-    ctc_error = ctc_check_result( get_workspace_size( context->label_lengths,
-        context->input_lengths, alphabet_size, minibatch_size, context->options,
-        &cpu_workspace_size ),
-        "Failed to obtain CTC workspace size." );
-
-    if ( ctc_error )  // Exception is set by ctc_check_result, return error here
-    {
-        // Destroy previous CTC context before returning exception
-        ctc_context_destroy( context );
-
-        return 1;
-    }
-
-    context->workspace = malloc( cpu_workspace_size );
-
-    if ( NULL == context->workspace )
-    {
-        // Destroy previous CTC context before returning exception
-        ctc_context_destroy( context );
-
-        PyErr_Format( PyExc_MemoryError,
-            "ConnectionistTemporalClassification: Failed to allocate memory for CTC workspace." );
-        return 1;
-    }
-
-    ctc_error = ctc_check_result( compute_ctc_loss( activations, gradients,
-        context->flat_labels, context->label_lengths, context->input_lengths,
-        alphabet_size, minibatch_size, costs, context->workspace,
-        context->options ), "Failed to compute CTC loss function." );
-
-    if ( ctc_error )  // Exception is set by ctc_check_result, return error here
-    {
-        ctc_context_destroy( context );
-
-        return 1;
-    }
-
-    ctc_context_destroy( context );
-
-    return 0;
-}
diff --git a/pytensor/tensor/nnet/conv.py b/pytensor/tensor/nnet/conv.py
deleted file mode 100644
index c600a7a1aa..0000000000
--- a/pytensor/tensor/nnet/conv.py
+++ /dev/null
@@ -1,2639 +0,0 @@
-"""
-Contains an Op for convolving input images with a set of filters. This was
-developed especially for Convolutional Neural Networks.
-
-For related ops, including downsampling and subsampling, see
-tensor.signal and tensor.signal.pool.
-
-See especially conv2d().
-"""
-
-
-import logging
-import warnings
-
-import numpy as np
-
-
-try:
-    from scipy.signal.signaltools import _bvalfromboundary, _valfrommode
-    from scipy.signal.sigtools import _convolve2d
-except ImportError:
-    from scipy.signal._signaltools import _bvalfromboundary, _valfrommode
-    from scipy.signal._sigtools import _convolve2d
-
-import pytensor
-from pytensor.graph.basic import Apply
-from pytensor.link.c.op import OpenMPOp
-from pytensor.tensor import blas
-from pytensor.tensor.basic import as_tensor_variable, get_scalar_constant_value
-from pytensor.tensor.exceptions import NotScalarConstantError
-from pytensor.tensor.nnet.abstract_conv import (
-    get_conv_output_shape,
-    get_conv_shape_1axis,
-)
-from pytensor.tensor.shape import specify_broadcastable
-from pytensor.tensor.type import discrete_dtypes, tensor
-
-
-__docformat__ = "restructuredtext en"
-_logger = logging.getLogger("pytensor.tensor.nnet.conv")
-
-
-def conv2d(
-    input,
-    filters,
-    image_shape=None,
-    filter_shape=None,
-    border_mode="valid",
-    subsample=(1, 1),
-    **kargs,
-):
-    """Build the symbolic graph for convolving a stack of input images with a set of filters.
-
-    The implementation is modelled after Convolutional Neural Networks
-    (CNN). It is simply a wrapper to the `ConvOp` but provides a much cleaner
-    interface.
-
-    This is deprecated.
-
-    Parameters
-    ----------
-    input : symbolic 4D tensor
-        Mini-batch of feature map stacks, of shape
-        (batch size, stack size, nb row, nb col)
-        see the optional parameter image_shape
-    filters: symbolic 4D tensor
-        Set of filters used in CNN layer of shape
-        (nb filters, stack size, nb row, nb col)
-        see the optional parameter filter_shape
-    border_mode : {'valid', 'full'}
-       'valid'only apply filter to complete patches of the image. Generates
-       output of shape: image_shape - filter_shape + 1.
-       'full' zero-pads image to multiple of filter shape to generate output
-       of shape: image_shape + filter_shape - 1.
-    subsample: tuple of len 2
-        Factor by which to subsample the output. Also called strides elsewhere.
-    image_shape: None, tuple/list of len 4 of int, None or Constant variable
-        The shape of the input parameter.
-        Optional, used for optimization like loop unrolling
-        You can put None for any element of the list to tell that this element
-        is not constant.
-    filter_shape : None, tuple/list of len 4 of int, None or Constant variable
-        Optional, used for optimization like loop unrolling
-        You can put None for any element of the list
-        to tell that this element is not constant.
-    kwargs
-        Kwargs are passed onto ConvOp. Can be used to set the following:
-        unroll_batch, unroll_kern, unroll_patch, openmp (see ConvOp doc).
-
-        openmp: By default have the same value as
-                config.openmp. For small image, filter,
-                batch size, nkern and stack size, it can be
-                faster to disable manually openmp. A fast and
-                incomplete test show that with image size
-                6x6, filter size 4x4, batch size==1,
-                n kern==1 and stack size==1, it is faster
-                to disable it in valid mode. But if we
-                grow the batch size to 10, it is faster
-                with openmp on a core 2 duo.
-
-    Returns
-    -------
-    symbolic 4D tensor
-        Set of feature maps generated by convolutional layer. Tensor is
-        of shape (batch size, nb filters, output row, output col).
-
-    """
-
-    warnings.warn(
-        "pytensor.tensor.nnet.conv.conv2d is deprecated."
-        " Use pytensor.tensor.nnet.conv2d instead.",
-        DeprecationWarning,
-    )
-
-    # accept Constant value for image_shape and filter_shape.
-    if image_shape is not None:
-        image_shape = list(image_shape)
-        for i in range(len(image_shape)):
-            if image_shape[i] is not None:
-                try:
-                    image_shape[i] = get_scalar_constant_value(
-                        as_tensor_variable(image_shape[i])
-                    )
-                except NotScalarConstantError:
-                    raise NotScalarConstantError(
-                        "The convolution need that the shape"
-                        " information are constant values. We got"
-                        " {image_shape[i]} for the image_shape parameter"
-                    )
-                assert image_shape[i].dtype in discrete_dtypes
-                image_shape[i] = int(image_shape[i])
-    if filter_shape is not None:
-        filter_shape = list(filter_shape)
-        for i in range(len(filter_shape)):
-            if filter_shape[i] is not None:
-                try:
-                    filter_shape[i] = get_scalar_constant_value(
-                        as_tensor_variable(filter_shape[i])
-                    )
-                except NotScalarConstantError:
-                    raise NotScalarConstantError(
-                        "The convolution need that the shape"
-                        " information are constant values. We got"
-                        " {filter_shape[i]} for the filter_shape "
-                        "parameter"
-                    )
-                assert filter_shape[i].dtype in discrete_dtypes
-                filter_shape[i] = int(filter_shape[i])
-
-    if image_shape and filter_shape:
-        try:
-            if image_shape[1] is not None and filter_shape[1] is not None:
-                assert image_shape[1] == filter_shape[1]
-        except Exception:
-            print("image ", image_shape, " filters ", filter_shape)
-            raise
-
-    if filter_shape is not None:
-        nkern = filter_shape[0]
-        kshp = filter_shape[2:]
-    else:
-        nkern, kshp = None, None
-
-    if image_shape is not None:
-        bsize = image_shape[0]
-        imshp = image_shape[1:]
-    else:
-        bsize, imshp = None, None
-
-    op = ConvOp(
-        output_mode=border_mode,
-        dx=subsample[0],
-        dy=subsample[1],
-        imshp=imshp,
-        kshp=kshp,
-        nkern=nkern,
-        bsize=bsize,
-        **kargs,
-    )
-
-    return op(input, filters)
-
-
-class ConvOp(OpenMPOp):
-    r"""
-    This Op serves a dual purpose: it can implement a vanilla 2D convolution
-    (as taught in any signal processing class) or implement the
-    convolutional layers found in Convolutional Neural Networks.
-
-    In this setting, a set of 3D images is convolved with a set of 3D kernels,
-    with the particularity that their leading dimensions are of equal length.
-    Vanilla 2D convolution is treated as a special case of this.
-
-    The input parameter represents a mini-batch of multiple images. Its shape is:
-        batch size x num. input feature maps x image height x image width
-
-    The kernel parameter represents a set of 3D kernels. Its shape is:
-        number of filters x num. input images x filter height x filter width
-
-    The output of ConvOp is a 4D tensor, generated as follows:
-        output[b,k,:,:] = \sum_i input[b,i,:,:] * filter[k,i,:,:] \forall b,k
-    where b is the mini-batch index, k the filter index and * is the
-    convolution operator.
-
-    The constructor initializes a ConvOp with given output_mode (full/valid).
-    All other parameters are optional and are only used to generate more
-    optimized c code, or to enable graph optimizers to optimally replace the
-    ConvOp.
-
-    NOTES ON OPTIMIZATION:
-    There are two types of optimization. The first is the selection of the
-    fastest algo when bsize and nkern are provided with imshp and kshp.
-    By default we try to select the fastest version. You can specify it
-    with the unroll_batch, unroll_kern, and unroll_patch parameter.
-
-    The second type of optimization is hardcoding some dimensions into the
-    code when all shape are know.
-    This make a significant difference for the 'full' output_mode.
-
-    Sometimes, the fastest implementation on x86-64 uses
-    {unroll_batch=4, unroll_kern=4, unroll_patch=False}
-    with all other shape parameters being provided.
-
-    For optimizing other architectures, see:
-    Kazushige Goto and Robert A. Van De Geijn, Anatomy of High-Performance
-    Matrix Multiplication, (mr x nr). ACM Transactions on Mathematical
-    Software, May 2008.
-    Figure 12: (mr x nr). For x86 use 2x4, itanium 8x8, etc.
-
-    Parameters
-    ----------
-    output_mode : {'valid', 'full'}
-        'valid' gives an output smaller then the image.
-        'full' gives an output bigger then the image.
-         See 'border_mode' in conv2d's doc.
-
-    Optional parameters: (will generate more optimal c code)
-
-    imshp : tuple of len 2 or 3: 2 for 2d image, 3 for a stack of 2d images.
-        Stacksize, nb image row, nb image col.
-    kshp : tuple of len 2
-        Nb kernel row, nb kernel col.
-    nkern : int
-        The number of kernel.
-    bsize : int
-        The size of the minibatch.
-    dx : int
-        Patch stride rows.
-    dy : int
-        Patch stride cols
-
-    Params which select the version of code used:
-
-    unroll_patch : bool
-        Use a version of c_code that unroll the patch loop that don't
-        request all shape information to work, but if all shape information
-        are present, will use it to hardcode the value in the code for
-        faster code.
-    unroll_batch : int
-        Use a version of c_code that unroll the batch (by unroll_batch)
-        and the nkern (by unroll_kern) loop. The size must by a multiple
-        of bsize or nkern respectively.
-    unroll_kern : int
-        Use a version of c_code that unroll the batch
-        (by unroll_batch) and the nkern(by unroll_kern) loop. The size
-        must by a multiple of bsize or nkern respectively.
-    The 3 following parameters are used internally when we generate
-    the gradient when dx!=1 or dy!=1.
-
-    imshp_logical
-        Default None. None value is equivalent to imshp value.
-        When imshp_logical != imshp, it tell we need to insert 0 in
-        the image before we do the convolution. For example, when dx==dy==2
-        and the image is [[1, 2], [3, 4]], we should make as if the image
-        was [[1, 0, 2, 0], [0, 0, 0, 0], [3, 0, 4, 0], [0, 0, 0, 0]].
-        Our python code insert the zero, but the c code optimize it.
-        imshp_logical != imshp when taking the grad again the weights or
-        the image when the output_mode is full and `dx != 1` or `dy != 1`.
-    kshp_logical
-        Idem but for kshp and used for the grad again the
-        weights when the output_mode is valid and `dx != 1` or `dy != 1`.
-    kshp_logical_top_aligned
-        Used in the same case. Default to True.
-        Set to False in the grad again the weight when the
-        output_mode is full.
-
-    """
-
-    __attrnames = [
-        "imshp",
-        "kshp",
-        "nkern",
-        "bsize",
-        "dx",
-        "dy",
-        "out_mode",
-        "unroll_batch",
-        "unroll_kern",
-        "unroll_patch",
-        "imshp_logical",
-        "kshp_logical",
-        "kshp_logical_top_aligned",
-    ]
-    """These attributes uniquely identify the behaviour of this op for
-    given inputs. Do not set openmp here.
-    """
-
-    # the value of speed_unroll_batch_kern,speed_unroll_patch_noshape,speed_unroll_patch_shape
-    # have bean calculated on maggie36 when their is only 1 session logged on and only this was running.
-    # It is an Intel(R) Xeon(R) CPU E5430 @ 2.66GHz. It is computer with pytensor/tensor/nnet/tests/speed_test_conv.py
-    # and took 5 minutes to run.
-    # TODO: we should compute this table for each computer/os as this can change.
-    #      I saw on one computer that the speed with the shape can be slower than without!
-    #      using the real shape and the same dtype could also help.
-
-    # unroll_batch, unroll_kern, valid time, full time
-    speed_unroll_batch_kern = [
-        (1, 1, 2.4661250114440918, 6.5472931861877441),
-        (1, 2, 1.5869178771972656, 5.1499760150909424),
-        (1, 3, 1.4270510673522949, 3.6593470573425293),
-        (1, 4, 1.3373479843139648, 3.3451821804046631),
-        (1, 5, 1.2818830013275146, 3.1444568634033203),
-        (1, 6, 1.2521560192108154, 3.0256359577178955),
-        (1, 10, 1.2134110927581787, 2.9174180030822754),
-        (2, 1, 1.657214879989624, 4.5261678695678711),
-        (2, 2, 1.2123160362243652, 2.9747390747070312),
-        (2, 3, 1.0758891105651855, 2.5690360069274902),
-        (2, 4, 1.0683329105377197, 2.4233770370483398),
-        (2, 5, 1.0955719947814941, 2.3999948501586914),
-        (2, 6, 1.5935721397399902, 2.6878271102905273),
-        (2, 10, 1.8511250019073486, 3.2417428493499756),
-        (3, 1, 1.5948119163513184, 3.631148099899292),
-        (3, 2, 1.0761330127716064, 2.6011371612548828),
-        (3, 3, 1.0551531314849854, 2.4200370311737061),
-        (3, 4, 1.3930759429931641, 2.5211219787597656),
-        (3, 5, 1.4330689907073975, 2.5704989433288574),
-        (3, 6, 1.362138032913208, 2.5964410305023193),
-        (3, 10, 1.6582000255584717, 2.9907989501953125),
-        (4, 1, 1.4793620109558105, 3.3473429679870605),
-        (4, 2, 1.0671560764312744, 2.4171769618988037),
-        (4, 3, 1.2569692134857178, 2.2807950973510742),
-        (4, 4, 1.3456289768218994, 2.6219108104705811),
-        (4, 5, 1.4055080413818359, 2.4606490135192871),
-        (4, 6, 1.372107982635498, 2.551663875579834),
-        (4, 10, 1.599470853805542, 2.9172940254211426),
-        (5, 1, 1.4115700721740723, 3.2077109813690186),
-        (5, 2, 1.0635769367218018, 2.2648060321807861),
-        (5, 3, 1.3842809200286865, 2.6135518550872803),
-        (5, 4, 1.3470511436462402, 2.3852400779724121),
-        (5, 5, 1.3539440631866455, 2.5245928764343262),
-        (5, 6, 1.4037849903106689, 2.5985310077667236),
-        (5, 10, 1.6120610237121582, 2.8127608299255371),
-        (6, 1, 1.3623628616333008, 3.021122932434082),
-        (6, 2, 1.1697649955749512, 2.6285450458526611),
-        (6, 3, 1.2980999946594238, 2.4746189117431641),
-        (6, 4, 1.3739941120147705, 2.5579929351806641),
-        (6, 5, 1.3967819213867188, 2.5522029399871826),
-        (6, 6, 1.4279270172119141, 2.6127138137817383),
-        (6, 10, 1.605496883392334, 2.864037036895752),
-        (10, 1, 1.6401121616363525, 2.970099925994873),
-        (10, 2, 1.46710205078125, 2.7231831550598145),
-        (10, 3, 1.4193780422210693, 2.6087639331817627),
-        (10, 4, 1.4657118320465088, 2.6246678829193115),
-        (10, 5, 1.5052611827850342, 2.6542458534240723),
-        (10, 6, 1.5214400291442871, 2.7243161201477051),
-        (10, 10, 1.6116268634796143, 2.956165075302124),
-    ]
-
-    # valid time, full time
-    speed_unroll_patch_noshape = [2.0109100341796875, 5.8175678253173828]
-    # valid time, full time
-    speed_unroll_patch_shape = [1.2967290878295898, 5.5283889770507812]
-
-    @staticmethod
-    def has_all_shape(imshp, kshp, nkern=1, bsize=1):
-        return (
-            nkern is not None
-            and bsize is not None
-            and all(shp is not None for shp in imshp)
-            and all(shp is not None for shp in kshp)
-        )
-
-    @staticmethod
-    def getOutputShape(inshp, kshp, stride=(1, 1), mode="valid"):
-        """
-        Computes the output dimensions of convolving an image of shape "inshp"
-        with kernels of shape "kshp". Accepts symbolic or integer shapes.
-        Propagates `None`s (for unknown shapes).
-
-        Parameters
-        ----------
-        inshp
-            (rows,cols) of input image.
-        kshp
-            (rows,cols) of filters.
-        mode: {'valid', 'full'}
-            See 'border_mode' in conv2d's doc.
-
-        Returns
-        -------
-        object
-            (rows,cols) of output image.
-
-        """
-        # The formula would be ceil((i + s * k - s * 1) / float(d)),
-        # with s=1 for mode=='full' and s=-1 for mode=='valid'.
-        # To support symbolic shapes, we express this with integer arithmetic.
-        warnings.warn(
-            "`getOutputShape` is deprecated; use `get_conv_output_shape` instead.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        return tuple(
-            get_conv_shape_1axis(i, k, mode, d) for i, k, d in zip(inshp, kshp, stride)
-        )
-
-    def __init__(
-        self,
-        imshp=None,
-        kshp=None,
-        nkern=None,
-        bsize=None,
-        dx=1,
-        dy=1,
-        output_mode="valid",
-        unroll_batch=None,
-        unroll_kern=None,
-        unroll_patch=None,
-        imshp_logical=None,
-        kshp_logical=None,
-        kshp_logical_top_aligned=True,
-        verbose=False,
-        openmp=None,
-    ):
-        # Expand unknown image / kernel shapes into tuples of Nones
-        if imshp is None:
-            imshp = (None, None, None)
-        else:
-            imshp = tuple(imshp)
-        if kshp is None:
-            kshp = (None, None)
-        else:
-            kshp = tuple(kshp)
-
-        # Check imshp and kshp dimensionality
-        if len(imshp) == 2:
-            imshp = (1,) + imshp
-        elif len(imshp) != 3:
-            raise ValueError(f"len(imshp) must be 2 or 3, got {len(imshp)}")
-        if len(kshp) != 2:
-            raise ValueError(f"len(kshp) must be 2, got {len(kshp)}")
-
-        # We must continue to consider None as 1 for backward compatibility.
-        if dx is None:
-            dx = 1
-        if dy is None:
-            dy = 1
-
-        if int(dx) != dx:
-            raise TypeError("ConvOp.__init__ param dx must be an int", dx)
-        dx = int(dx)
-
-        if int(dy) != dy:
-            raise TypeError("ConvOp.__init__ param dy must be an int", dy)
-        dy = int(dy)
-
-        all_shape = self.has_all_shape(imshp, kshp, nkern, bsize)
-        if (unroll_batch or unroll_kern) and not all_shape:
-            raise ValueError(
-                "In ConvOp, when using unroll_batch and"
-                " unroll_nkern, all shape are needed"
-            )
-
-        # Init the openmp attribute
-        super().__init__(openmp=openmp)
-        if not all_shape or self.openmp:
-            # Only this version is parallelized
-            unroll_patch = True
-        self.verbose = verbose
-        self.imshp = imshp
-        self.kshp = kshp
-        self.nkern = nkern
-        self.bsize = bsize
-        self.dx = dx
-        self.dy = dy
-
-        # a triple
-        if imshp_logical is None:
-            self.imshp_logical = self.imshp
-        else:
-            imshp_logical = tuple(imshp_logical)
-            if len(imshp_logical) != 3:
-                raise ValueError(
-                    f"len(imshp_logical) must be 3, got {len(imshp_logical)}"
-                )
-            self.imshp_logical = imshp_logical
-
-        # a pair
-        if kshp_logical is None:
-            self.kshp_logical = self.kshp
-        else:
-            kshp_logical = tuple(kshp_logical)
-            if len(kshp_logical) != 2:
-                raise ValueError(
-                    f"len(kshp_logical) must be 2, got {len(kshp_logical)}"
-                )
-            self.kshp_logical = kshp_logical
-
-        # a bool
-        self.kshp_logical_top_aligned = kshp_logical_top_aligned
-
-        self.unroll_batch = unroll_batch
-        self.unroll_kern = unroll_kern
-        self.unroll_patch = unroll_patch
-
-        if self.unroll_batch and not self.unroll_kern:
-            self.unroll_kern = 1
-        if self.unroll_kern and not self.unroll_batch:
-            self.unroll_batch = 1
-
-        # downcast unroll_batch if not a divisor of batch size
-        if (
-            self.unroll_batch is not None
-            and self.unroll_batch > 0
-            and self.bsize % self.unroll_batch != 0
-        ):
-
-            if self.bsize <= self.unroll_batch:
-                self.unroll_batch = self.bsize
-            else:
-                # find the maximum value under unroll_batch that would work
-                new = self.unroll_batch
-                assert new >= 1
-                while self.bsize % new != 0:
-                    new -= 1
-
-                warnstr = (
-                    "In ConvOp.__init__(): "
-                    f"unroll_batch({self.unroll_batch}) must be 0 or a divisor of"
-                    f" bsize({self.bsize}). We revert it to {new}. This"
-                    " won't change the result, but may make it slower."
-                )
-                _logger.warning(warnstr)
-
-                self.unroll_batch = new
-
-        # downcast unroll_kern if not a divisor of nb of kernel
-        if (
-            self.unroll_kern is not None
-            and self.unroll_kern > 0
-            and self.nkern % self.unroll_kern != 0
-        ):
-
-            if self.nkern <= self.unroll_kern:
-                self.unroll_kern = self.nkern
-            else:
-                # find the maximum value under unroll_kern that would work
-                new = self.unroll_kern
-                assert new >= 1
-                while self.nkern % new != 0:
-                    new -= 1
-
-                warnstr = (
-                    "In ConvOp.__init__(): "
-                    f"unroll_kern({self.unroll_kern}) must be 0 or a divisor of"
-                    f" nkern({self.nkern}). We revert it to {new}. This"
-                    " won't change the result, but may make it slower."
-                )
-                _logger.warning(warnstr)
-                self.unroll_kern = new
-
-        self.outshp = get_conv_output_shape(
-            (None,) + self.imshp_logical,
-            (
-                None,
-                None,
-            )
-            + self.kshp_logical,
-            output_mode,
-            (dx, dy),
-        )[2:]
-        self.fulloutshp = get_conv_output_shape(
-            (None,) + self.imshp_logical,
-            (
-                None,
-                None,
-            )
-            + self.kshp_logical,
-            output_mode,
-            (1, 1),
-        )[2:]
-
-        self.out_mode = output_mode
-
-        if self.out_mode not in ("valid", "full"):
-            raise NotImplementedError(f"Mode {self.out_mode} not implemented")
-
-        if any((shp is not None) and (shp <= 0) for shp in self.outshp):
-            raise ValueError(
-                "Bad size for the output shape. Verify that [post-"
-                f"supersampling] input shape ({self.imshp_logical}) and kern"
-                f" shape({self.kshp_logical}) are ok. (Hint: kerns must fit inside"
-                " image in valid mode)"
-            )
-
-        if (
-            self.unroll_kern is None
-            and self.unroll_batch is None
-            and self.unroll_patch is None
-        ):
-            # no version specified. Find the faster we have
-            if self.bsize is None and self.nkern is None:
-                self.unroll_patch = True
-            elif self.bsize is not None and self.nkern is not None:
-                bsize = self.bsize
-                nkern = self.nkern
-                mode_idx = 0
-                if self.out_mode != "valid":
-                    mode_idx = 1
-                if self.has_all_shape(self.imshp, self.kshp):
-                    time_unroll_patch = self.speed_unroll_patch_shape[mode_idx]
-                else:
-                    time_unroll_patch = self.speed_unroll_patch_noshape[mode_idx]
-                time_unroll_batch_kern = 9999999
-                for i in range(len(self.speed_unroll_batch_kern)):
-                    if (
-                        bsize % self.speed_unroll_batch_kern[i][0] == 0
-                        and nkern % self.speed_unroll_batch_kern[i][1] == 0
-                    ):
-                        if (
-                            self.speed_unroll_batch_kern[i][2 + mode_idx]
-                            < time_unroll_batch_kern
-                        ):
-                            time_unroll_batch_kern = self.speed_unroll_batch_kern[i][
-                                2 + mode_idx
-                            ]
-                            time_unroll_batch_kern_idx = i
-                if time_unroll_patch < time_unroll_batch_kern:
-                    self.unroll_patch = True
-                else:
-                    self.unroll_batch = self.speed_unroll_batch_kern[
-                        time_unroll_batch_kern_idx
-                    ][0]
-                    self.unroll_kern = self.speed_unroll_batch_kern[
-                        time_unroll_batch_kern_idx
-                    ][1]
-                    self.unroll_patch = False
-
-            _logger.debug(
-                "AUTO FIND VERSION OF C_CODE OF CONV OP %s %s %s %s %s %s %s",
-                self.unroll_batch,
-                self.unroll_kern,
-                self.unroll_patch,
-                self.bsize,
-                self.nkern,
-                time_unroll_patch,
-                time_unroll_batch_kern,
-            )
-
-        self._rehash()
-
-    def __eq__(self, other):
-        if type(self) != type(other):
-            return False
-        for a in self.__attrnames:
-            if getattr(self, a) != getattr(other, a):
-                return False
-        return True
-
-    def __setstate__(self, d):
-        super().__setstate__(d)
-        self._rehash()
-
-    def _rehash(self):
-        hashval = hash(type(self))
-        for a in self.__attrnames:
-            hashval = hashval ^ hash(getattr(self, a))
-        self.__hashval = hashval
-
-    def __hash__(self):
-        return self.__hashval
-
-    def __str__(self):
-        return (
-            "ConvOp{"
-            + ",".join(str((a, getattr(self, a))) for a in self.__attrnames)
-            + "}"
-        )
-
-    def flops(self, inputs, outputs):
-        """
-        Useful with the hack in profiling to print the MFlops.
-
-        """
-        images, kerns = inputs
-        (out,) = outputs
-        assert images[1] == kerns[1]
-        flops = 0
-        if self.out_mode == "valid":
-            # nb mul and add by output pixel
-            flops = kerns[2] * kerns[3] * 2
-            # nb flops by output image
-            flops *= out[2] * out[3]
-            # nb patch multiplied
-            flops *= images[1] * kerns[0] * images[0]
-        else:
-            flops = (
-                images[0]
-                * kerns[0]
-                * images[1]
-                * kerns[2]
-                * kerns[3]
-                * images[2]
-                * images[3]
-                * 2
-            )
-        return flops
-
-    def make_node(self, inputs, kerns):
-        # TODO: find a way to make ConvOp work for N-D (after NIPS09)
-        """
-        Parameters
-        ----------
-        inputs
-            4 dim: batches x stacksize x rows x cols.
-        kerns
-            4 dim: nkern x stackidx x rows x cols.
-
-        """
-        _inputs = as_tensor_variable(inputs)
-        _kerns = as_tensor_variable(kerns)
-        # TODO: lift this restriction by upcasting either inputs or kerns
-        if _inputs.ndim != 4:
-            raise TypeError(
-                "ConvOp (make_node) requires input be a 4D tensor;"
-                f' received "{inputs}" ({_inputs.ndim} dims)'
-            )
-        if _kerns.ndim != 4:
-            raise TypeError("make_node requires 4D tensor of kernels")
-        if _inputs.type.dtype != _kerns.type.dtype:
-            raise NotImplementedError(
-                "The image and the kernel must have the same type."
-                "inputs({_inputs.dtype}), kerns({_kerns.dtype})"
-            )
-        out_shape = (
-            _inputs.type.shape[0],
-            _kerns.type.shape[0],
-            self.outshp[0],
-            self.outshp[1],
-        )
-        out_shape = tuple(1 if s == 1 else None for s in out_shape)
-        output = tensor(
-            dtype=_inputs.type.dtype,
-            shape=out_shape,
-        )
-
-        return Apply(self, [_inputs, _kerns], [output])
-
-    def infer_shape(self, fgraph, node, input_shapes):
-        imshp = input_shapes[0]  # 4D image shape
-        kshp = input_shapes[1]  # 4D filter shape
-        bsize, imshp = imshp[0], list(imshp[1:])
-        nkern, kshp = kshp[0], list(kshp[2:])
-        # replace symbolic shapes with known shapes
-        if self.bsize is not None:
-            bsize = self.bsize
-        for i in (0, 1, 2):
-            if self.imshp_logical[i] is not None:
-                imshp[i] = self.imshp_logical[i]
-        if self.nkern is not None:
-            nkern = self.nkern
-        for i in (0, 1):
-            if self.kshp_logical[i] is not None:
-                kshp[i] = self.kshp_logical[i]
-        # infer output shape from what we have
-        res = get_conv_output_shape(
-            (bsize,) + tuple(imshp),
-            (
-                nkern,
-                None,
-            )
-            + tuple(kshp),
-            self.out_mode,
-            (self.dx, self.dy),
-        )
-        return [res]
-
-    def perform(self, node, inp, out):
-        """
-        By default if len(img2d.shape)==3, we TODO
-
-        """
-        img2d, filtersflipped = inp
-        (z,) = out
-
-        # TODO: move these back out to global scope when they no longer
-        #       cause an atexit error
-        imshp = self.imshp
-        if any(x is None for x in imshp):
-            imshp = tuple(img2d.shape[1:])
-        if imshp != img2d.shape[1:]:
-            raise ValueError(
-                "The image shape provided at build time "
-                "is different from the one passed at run time",
-                imshp,
-                img2d.shape[1:],
-            )
-        kshp = self.kshp
-        if any(x is None for x in kshp):
-            kshp = tuple(filtersflipped.shape[2:])
-        if kshp != filtersflipped.shape[2:]:
-            raise ValueError(
-                "The filter shape provided at build time "
-                "is different from the one passed at run time",
-                kshp,
-                filtersflipped.shape[2:],
-            )
-        bsize = self.bsize
-        if bsize is None:
-            bsize = img2d.shape[0]
-        elif bsize != img2d.shape[0]:
-            raise ValueError(
-                "The batch size provided at build time "
-                "is different from the one passed at run time",
-                bsize,
-                img2d.shape[0],
-            )
-        nkern = self.nkern
-        if nkern is None:
-            nkern = filtersflipped.shape[0]
-        elif nkern != filtersflipped.shape[0]:
-            raise ValueError(
-                "The number of filters provided at build time "
-                "is different from the one passed at run time",
-                nkern,
-                filtersflipped.shape[0],
-            )
-
-        imshp_logical = self.imshp_logical
-        if imshp_logical[0] is None:
-            imshp_logical = (imshp[0],) + imshp_logical[1:]
-        if imshp_logical[1] is None:
-            imshp_logical = (imshp_logical[0], imshp[1], imshp_logical[2])
-        if imshp_logical[2] is None:
-            imshp_logical = imshp_logical[:2] + (imshp[2],)
-        assert all(x is not None for x in imshp_logical)
-
-        kshp_logical = self.kshp_logical
-        if kshp_logical[0] is None:
-            kshp_logical = (kshp[0], kshp_logical[1])
-        if kshp_logical[1] is None:
-            kshp_logical = (kshp_logical[0], kshp[1])
-        assert all(x is not None for x in kshp_logical)
-
-        if all(shp is not None for shp in self.fulloutshp):
-            fulloutshp = tuple(self.fulloutshp)
-        else:
-            fulloutshp = get_conv_output_shape(
-                (None,) + imshp_logical,
-                (
-                    None,
-                    None,
-                )
-                + kshp_logical,
-                self.out_mode,
-                (1, 1),
-            )[2:]
-
-        if (
-            z[0] is None
-            or z[0].shape
-            != (
-                bsize,
-                nkern,
-            )
-            + fulloutshp
-        ):
-            z[0] = np.zeros(
-                (
-                    bsize,
-                    nkern,
-                )
-                + fulloutshp,
-                dtype=img2d.dtype,
-            )
-        zz = z[0]
-
-        stacklen = imshp[0]
-
-        img2d = img2d.reshape((bsize,) + imshp)
-        filtersflipped = filtersflipped.reshape((nkern, stacklen) + kshp)
-
-        if self.imshp != self.imshp_logical:
-            # assuming that to get from imshp to imshp logical we insert zeros in missing spots
-            rstride = int(np.ceil(imshp_logical[1] / float(imshp[1])))
-            cstride = int(np.ceil(imshp_logical[2] / float(imshp[2])))
-            buf = np.zeros((bsize,) + imshp_logical, dtype=img2d.dtype)
-            buf[:, :, ::rstride, ::cstride] = img2d
-            img2d = buf
-            del buf, rstride, cstride
-
-        if kshp != kshp_logical:
-            rstride = int(np.ceil(kshp_logical[0] / float(kshp[0])))
-            cstride = int(np.ceil(kshp_logical[1] / float(kshp[1])))
-            buf = np.zeros(
-                (nkern, stacklen) + self.kshp_logical, dtype=filtersflipped.dtype
-            )
-            if self.kshp_logical_top_aligned:
-                roffset = coffset = 0
-            else:
-                roffset = (
-                    kshp_logical[0] - (kshp[0] * rstride) - 1 + rstride
-                ) % rstride
-                coffset = (
-                    kshp_logical[1] - (kshp[1] * cstride) - 1 + cstride
-                ) % cstride
-                assert roffset >= 0
-                assert coffset >= 0
-            buf[:, :, roffset::rstride, coffset::cstride] = filtersflipped
-            filtersflipped = buf
-            del buf, rstride, cstride
-
-        val = _valfrommode(self.out_mode)
-        bval = _bvalfromboundary("fill")
-
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", np.ComplexWarning)
-            for b in range(bsize):
-                for n in range(nkern):
-                    zz[b, n, ...].fill(0)
-                    for im0 in range(stacklen):
-                        # some cast generates a warning here
-                        zz[b, n, ...] += _convolve2d(
-                            img2d[b, im0, ...],
-                            filtersflipped[n, im0, ...],
-                            1,
-                            val,
-                            bval,
-                            0,
-                        )
-
-        if False:
-            if False and self.out_mode == "full":
-                img2d2 = np.zeros(
-                    (
-                        bsize,
-                        stacklen,
-                        imshp[1] + 2 * kshp[0] - 2,
-                        imshp[2] + 2 * kshp[1] - 2,
-                    )
-                )
-                img2d2[
-                    :,
-                    :,
-                    kshp[0] - 1 : kshp[0] - 1 + imshp[1],
-                    kshp[1] - 1 : kshp[1] - 1 + imshp[2],
-                ] = img2d
-                img2d = img2d2
-            # N_image_shape = image_data.shape
-
-            for b in range(bsize):
-                for n in range(nkern):
-                    zz[b, n, ...].fill(0)
-                    for im0 in range(stacklen):
-                        for row in range(0, zz.shape[2], self.dx):
-                            for col in range(0, zz.shape[3], self.dy):
-                                zz[b, n, row, col] += (
-                                    img2d[
-                                        b, im0, row : row + kshp[0], col : col + kshp[1]
-                                    ]
-                                    * filtersflipped[n, im0, ::-1, ::-1]
-                                ).sum()
-
-        # We copy it to remove the Stride mismatch warning from DEBUG_MODE.
-        # The copy make that we return an object with the same stride as the c version.
-        # The copy don't affect the performance during our experience as in that case we
-        # execute the c version which is much faster.
-        if self.dx > 1 or self.dy > 1:
-            zz = zz[:, :, 0 :: self.dx, 0 :: self.dy].copy()
-        z[0] = zz
-
-    def R_op(self, inputs, eval_points):
-        rval = None
-        if eval_points[0] is not None:
-            rval = self.make_node(eval_points[0], inputs[1]).outputs[0]
-        if eval_points[1] is not None:
-            if rval is None:
-                rval = self.make_node(inputs[0], eval_points[1]).outputs[0]
-            else:
-                rval += self.make_node(inputs[0], eval_points[1]).outputs[0]
-        return [rval]
-
-    def grad(self, inp, grads):
-        inputs, kerns = inp
-        (gz,) = grads
-
-        if self.imshp != self.imshp_logical or self.kshp != self.kshp_logical:
-            raise NotImplementedError("todo")
-
-        if self.out_mode == "valid" and (self.dx, self.dy) != (1, 1):
-            raise NotImplementedError(
-                "ERROR: ConvOp.grad is now disabled for 'valid' convolutions with"
-                " stride != (1, 1); call pytensor.tensor.nnet.conv2d() instead."
-            )
-
-        if self.dx not in (1, 2) or self.dy not in (1, 2):
-            raise NotImplementedError(
-                "ERROR: We disable ConvOp.grad now when output_mode is not"
-                " 'valid' and dx or dy are greater than 2, as there is a bug"
-                " in it. See `abstract_conv2d <>`_ for a version that support this."
-            )
-
-        all_shape = self.has_all_shape(self.imshp, self.kshp, self.nkern, self.bsize)
-
-        if not all_shape and (self.dx != 1 or self.dy != 1):
-            raise ValueError(
-                "ConvOp.grad when dx!=1 or dy!=1 we must have all "
-                "the optional shape information"
-            )
-
-        # Determine gradient on kernels ########
-        assert inputs.ndim == 4 and kerns.ndim == 4
-
-        newin = inputs.dimshuffle((1, 0, 2, 3))
-        newgz = gz.dimshuffle((1, 0, 2, 3))
-
-        if self.out_mode == "valid":
-            (img, filters) = (newin, newgz)
-            kshp_logical = self.fulloutshp
-            kshp_logical_top_aligned = False
-            imshp_logical = None
-            (bsize, nkern) = (self.imshp[0], self.nkern)
-            imshp = (self.bsize, self.imshp[1], self.imshp[2])
-            kshp = self.outshp
-        elif self.out_mode == "full":
-            (img, filters) = (newgz, newin)
-            kshp_logical = None
-            kshp_logical_top_aligned = True
-            imshp_logical = (self.bsize, self.fulloutshp[0], self.fulloutshp[1])
-            (bsize, nkern) = (self.nkern, self.imshp[0])
-            imshp = (self.bsize, self.outshp[0], self.outshp[1])
-            kshp = self.imshp[1:]
-        else:
-            raise NotImplementedError(
-                "Only [full,valid] modes are currently supported."
-            )
-
-        filters = filters[:, :, ::-1, ::-1]  # flip them
-
-        dw = ConvOp(
-            imshp,
-            kshp,
-            nkern,
-            bsize,
-            1,
-            1,
-            output_mode="valid",
-            unroll_batch=None,
-            unroll_kern=None,
-            unroll_patch=None,
-            imshp_logical=imshp_logical,
-            kshp_logical=kshp_logical,
-            kshp_logical_top_aligned=kshp_logical_top_aligned,
-            verbose=self.verbose,
-        )
-
-        dw = dw(img, filters)
-
-        if all_shape:
-            assert all(o == k for o, k in zip(dw.owner.op.outshp, self.kshp))
-        if self.out_mode == "valid":
-            # before DimShuffle, dw is of shape visdim x nkern x kshp[0] x kshp[1]
-            dw = dw.dimshuffle((1, 0, 2, 3))
-            dw = dw[:, :, ::-1, ::-1]
-
-        # Determine gradient on inputs ########
-        mode = "valid"
-        if self.out_mode != "full":
-            mode = "full"
-
-        filters = kerns.dimshuffle((1, 0, 2, 3))
-        filters = filters[:, :, ::-1, ::-1]
-
-        nkern = self.imshp[0]
-        imshp = (self.nkern, self.outshp[0], self.outshp[1])
-        imshp_logical = (self.nkern, self.fulloutshp[0], self.fulloutshp[1])
-
-        din = ConvOp(
-            imshp,
-            self.kshp,
-            nkern,
-            self.bsize,
-            1,
-            1,
-            output_mode=mode,
-            unroll_batch=None,
-            unroll_kern=None,
-            unroll_patch=None,
-            imshp_logical=imshp_logical,
-            kshp_logical=None,
-            verbose=self.verbose,
-        )
-
-        din = din(gz, filters)
-
-        assert all(
-            o is None or o == i for o, i in zip(din.owner.op.outshp, self.imshp[1:])
-        )
-
-        # din and dw should have the same broadcasting pattern as the
-        # parameters they are the gradient of (resp. inputs and kerns).
-        if din.type.broadcastable != inputs.type.broadcastable:
-            din = specify_broadcastable(
-                din, *(ax for (ax, b) in enumerate(inputs.type.broadcastable) if b)
-            )
-        if dw.type.broadcastable != kerns.type.broadcastable:
-            dw = specify_broadcastable(
-                dw, *(ax for (ax, b) in enumerate(kerns.type.broadcastable) if b)
-            )
-        return [din, dw]
-
-    def c_headers(self, **kwargs):
-        return ["<numpy/noprefix.h>", "<iostream>", "<sstream>"]
-
-    def c_code_cache_version(self):
-        return (15, self.openmp, blas.blas_header_version())
-
-    def c_support_code(self, **kwargs):
-        return (
-            """
-#define STRIDES(arr) (PyArray_STRIDES(arr))
-#define FULL  2
-#define SAME  1
-#define VALID 0
-#define MOD %
-using namespace std;
-"""
-            + blas.blas_header_text()
-        )
-
-    def use_blas(self):
-        """Return True if we will generate code that use gemm."""
-        # the gemm version only support that case
-        if self.out_mode == "valid" and self.dx == 0 and self.dy == 0:
-            # We use a faster version in those case.
-            if (
-                self.imshp != self.imshp_logical
-                or self.kshp != self.kshp_logical
-                or self.unroll_patch
-                or self.unroll_batch > 0
-                or self.unroll_kern > 0
-            ):
-                return False
-            return True
-        return False
-
-    def c_libraries(self, **kwargs):
-        if self.use_blas():
-            return blas.ldflags()
-        return []
-
-    def c_no_compile_args(self, **kwargs):
-        # when the ksph==(1,1) gcc 4.3.0 segfault during the
-        # compilation with -O3.  This don't happen at -O2
-        if pytensor.link.c.cmodule.gcc_version() in ["4.3.0"] and self.kshp == (1, 1):
-            return ["-O3"]
-        else:
-            return []
-
-    def c_compile_args(self, **kwargs):
-        ret = []
-
-        if self.use_blas():
-            ret = blas.ldflags(libs=False, flags=True)
-        if pytensor.link.c.cmodule.gcc_version() in ["4.3.0"] and self.kshp == (1, 1):
-            ret += ["-O2"]
-        # Add the -fopenmp flags
-        ret += super().c_compile_args(**kwargs)
-
-        return ret
-
-    def c_lib_dirs(self, **kwargs):
-        if self.use_blas():
-            return blas.ldflags(libs=False, libs_dir=True)
-        return []
-
-    def c_header_dirs(self, **kwargs):
-        if self.use_blas():
-            return blas.ldflags(libs=False, include_dir=True)
-        return []
-
-    def c_code(self, node, name, inp, out, sub):
-        img2d, filtersflipped = inp
-        (z,) = out
-        if node.inputs[0].type.dtype != node.inputs[1].type.dtype:
-            raise NotImplementedError()
-        assert node.inputs[0].type.dtype == node.inputs[1].type.dtype
-        d = locals()
-        d.update(sub)
-
-        all_shape = self.has_all_shape(
-            self.imshp, self.kshp, self.nkern, self.bsize
-        ) and self.has_all_shape(self.imshp_logical, self.kshp_logical)
-
-        d["self_out_mode"] = self.out_mode
-        d["self_dx"] = self.dx
-        d["self_dy"] = self.dy
-        d["mode"] = self.out_mode.upper()
-        d["affectation"] = "="
-
-        # Default values, will be overridden if the shape info is provided
-        d["self_bsize"] = f"PyArray_DIMS({d['img2d']})[0]"
-        d["self_nkern"] = f"PyArray_DIMS({d['filtersflipped']})[0]"
-        d["self_outshp0"] = "-1"
-        d["self_outshp1"] = "-1"
-        d["self_imshp0"] = f"PyArray_DIMS({d['img2d']})[1]"
-        d["self_imshp1"] = f"PyArray_DIMS({d['img2d']})[2]"
-        d["self_imshp2"] = f"PyArray_DIMS({d['img2d']})[3]"
-        d["self_kshp0"] = f"PyArray_DIMS({d['filtersflipped']})[2]"
-        d["self_kshp1"] = f"PyArray_DIMS({d['filtersflipped']})[3]"
-        d["assert_size"] = ""
-
-        # Override the default value if we have it
-        if self.kshp[0] is not None:
-            expected = d["self_kshp0"]
-            value = self.kshp[0]
-            d[
-                "assert_size"
-            ] += """
-if(%(value)s != %(expected)s){
-    PyErr_Format(PyExc_ValueError,
-            "The hardcoded shape for the number of rows in the filter "
-            "(%%ld) isn't the run time shape (%%ld).",
-            (long)%(value)s, (long)%(expected)s);
-    %(fail)s;
-}
-            """ % dict(
-                expected=expected, value=value, **sub
-            )
-            d["self_kshp0"] = self.kshp[0]
-        if self.kshp[1] is not None:
-            expected = d["self_kshp1"]
-            value = self.kshp[1]
-            d[
-                "assert_size"
-            ] += """
-if(%(value)s != %(expected)s){
-    PyErr_Format(PyExc_ValueError,
-            "The hardcoded shape for the number of columns in the filter "
-            "(%%ld) isn't the run time shape (%%ld).",
-            (long)%(value)s, (long)%(expected)s);
-    %(fail)s;
-}
-            """ % dict(
-                expected=expected, value=value, **sub
-            )
-            d["self_kshp1"] = self.kshp[1]
-        if self.outshp[0] is not None:
-            expected = "dim_zz[0]"
-            value = self.outshp[0]
-            d[
-                "assert_size"
-            ] += """
-if(%(value)s != %(expected)s){
-    PyErr_Format(PyExc_ValueError,
-            "The hardcoded shape for the number of rows in the output "
-            "(%%ld) isn't the run time shape (%%ld).",
-            (long)%(value)s, (long)%(expected)s);
-    %(fail)s;
-}
-            """ % dict(
-                expected=expected, value=value, **sub
-            )
-            d["self_outshp0"] = self.outshp[0]
-        if self.outshp[1] is not None:
-            expected = "dim_zz[1]"
-            value = self.outshp[1]
-            d[
-                "assert_size"
-            ] += """
-if(%(value)s != %(expected)s){
-    PyErr_Format(PyExc_ValueError,
-            "The hardcoded shape for the number of columns in the output "
-            "(%%ld) isn't the run time shape (%%ld).",
-            (long)%(value)s, (long)%(expected)s);
-    %(fail)s;
-}
-            """ % dict(
-                expected=expected, value=value, **sub
-            )
-            d["self_outshp1"] = self.outshp[1]
-        if self.imshp[0] is not None:
-            expected = d["self_imshp0"]
-            value = self.imshp[0]
-            d[
-                "assert_size"
-            ] += """
-if(%(value)s != %(expected)s){
-    PyErr_Format(PyExc_ValueError,
-            "The hardcoded shape for the image stack size (%%ld) "
-            "isn't the run time shape (%%ld).",
-            (long)%(value)s, (long)%(expected)s);
-    %(fail)s;
-}
-            """ % dict(
-                expected=expected, value=value, **sub
-            )
-            expected = "kerns_dim[1]"
-            value = self.imshp[0]
-            d[
-                "assert_size"
-            ] += """
-if(%(value)s != %(expected)s){
-    PyErr_Format(PyExc_ValueError,
-            "The hardcoded shape for the kernel stack size (%%ld) "
-            "isn't the run time shape (%%ld).",
-            (long)%(value)s, (long)%(expected)s);
-    %(fail)s;
-}
-            """ % dict(
-                expected=expected, value=value, **sub
-            )
-            d["self_imshp0"] = self.imshp[0]
-        if self.imshp[1] is not None:
-            expected = d["self_imshp1"]
-            value = self.imshp[1]
-            d[
-                "assert_size"
-            ] += """
-if(%(value)s != %(expected)s){
-    PyErr_Format(PyExc_ValueError,
-            "The hardcoded shape for the number of rows in the image "
-            "(%%ld) isn't the run time shape (%%ld).",
-            (long)%(value)s, (long)%(expected)s);
-    %(fail)s;
-}
-            """ % dict(
-                expected=expected, value=value, **sub
-            )
-            d["self_imshp1"] = self.imshp[1]
-        if self.imshp[2] is not None:
-            expected = d["self_imshp2"]
-            value = self.imshp[2]
-            d[
-                "assert_size"
-            ] += """
-if(%(value)s != %(expected)s){
-    PyErr_Format(PyExc_ValueError,
-            "The hardcoded shape for the number of columns in the image "
-            "(%%ld) isn't the run time shape (%%ld).",
-            (long)%(value)s, (long)%(expected)s);
-    %(fail)s;
-}
-            """ % dict(
-                expected=expected, value=value, **sub
-            )
-            d["self_imshp2"] = self.imshp[2]
-        if self.bsize is not None:
-            expected = d["self_bsize"]
-            value = self.bsize
-            d[
-                "assert_size"
-            ] += """
-if(%(value)s != %(expected)s){
-    PyErr_Format(PyExc_ValueError,
-            "The hardcoded shape for the batch size (%%ld) "
-            "isn't the run time shape (%%ld).",
-            (long)%(value)s, (long)%(expected)s);
-    %(fail)s;
-}
-            """ % dict(
-                expected=expected, value=value, **sub
-            )
-            d["self_bsize"] = self.bsize
-        if self.nkern is not None:
-            expected = d["self_nkern"]
-            value = self.nkern
-            d[
-                "assert_size"
-            ] += """
-if(%(value)s != %(expected)s){
-    PyErr_Format(PyExc_ValueError,
-            "The hardcoded shape for the number of kernels in the filter "
-            "(%%ld) isn't the run time shape (%%ld).",
-            (long)%(value)s, (long)%(expected)s);
-    %(fail)s;
-}
-            """ % dict(
-                expected=expected, value=value, **sub
-            )
-            d["self_nkern"] = self.nkern
-
-        # Other hard coded stuff only if we have all shapes
-        if all_shape:
-            d["self_kshp_logical_r"] = self.kshp_logical[0]
-            d["self_kshp_logical_c"] = self.kshp_logical[1]
-            d["self_kshp_logical_stride_r"] = int(
-                np.ceil(self.kshp_logical[0] / float(self.kshp[0]))
-            )
-            d["self_kshp_logical_stride_c"] = int(
-                np.ceil(self.kshp_logical[1] / float(self.kshp[1]))
-            )
-            d["self_imshp_logical_r"] = self.imshp_logical[1]
-            # numpy.B. 1  not 0
-            d["self_imshp_logical_c"] = self.imshp_logical[2]
-            # numpy.B. 2  not 1
-            d["self_imshp_logical_stride_r"] = int(
-                np.ceil(self.imshp_logical[1] / float(self.imshp[1]))
-            )
-            d["self_imshp_logical_stride_c"] = int(
-                np.ceil(self.imshp_logical[2] / float(self.imshp[2]))
-            )
-            if self.imshp[0] != 1:
-                d["affectation"] = "+="
-            d["all_shape"] = "1"
-            d["dim_zz_const"] = "const"
-            d["dim_zz_affect"] = ""
-        else:
-            d["affectation"] = "+="
-            d["all_shape"] = "0"
-            d["dim_zz_const"] = ""
-            d["dim_zz_affect"] = (
-                """
-  if (mode == FULL) {
-    dim_zz[0] = (int)ceil((dim_im[0]+dim_ker0-1)/float(%(self_dx)s));
-    dim_zz[1] = (int)ceil((dim_im[1]+dim_ker1-1)/float(%(self_dy)s));
-  } else {
-    dim_zz[0] = (int)ceil((dim_im[0]-dim_ker0+1)/float(%(self_dx)s));
-    dim_zz[1] = (int)ceil((dim_im[1]-dim_ker1+1)/float(%(self_dy)s));
-  }
-"""
-                % d
-            )
-            d["assert_size"] += (
-                """
-// Check the stack size of the filter and images are equals
-if(kerns_dim[1] != img2d_dim[1]){
-    PyErr_Format(PyExc_ValueError,
-            "the filter stack size (%%ld) and image stack size (%%ld) differ",
-            (long)kerns_dim[1], (long)img2d_dim[1]);
-    %(fail)s;
-}
-            """
-                % sub
-            )
-
-        if self.kshp_logical_top_aligned:
-            d["self_kshp_logical_offset_r"] = 0
-            d["self_kshp_logical_offset_c"] = 0
-        elif all_shape:
-            rstride = d["self_kshp_logical_stride_r"]
-            cstride = d["self_kshp_logical_stride_c"]
-            d["self_kshp_logical_offset_r"] = (
-                self.kshp_logical[0] - (self.kshp[0] * rstride) - 1 + rstride
-            ) % rstride
-            d["self_kshp_logical_offset_c"] = (
-                self.kshp_logical[1] - (self.kshp[1] * cstride) - 1 + cstride
-            ) % cstride
-            del rstride, cstride
-
-        if node.inputs[0].type.dtype == "float32":
-            d["type"] = "float"
-        elif node.inputs[0].type.dtype == "float64":
-            d["type"] = "double"
-        else:
-            raise NotImplementedError(
-                f"Type {node.inputs[0].type.dtype} not implemented"
-            )
-        d["gemm"] = "dgemm_"
-        if d["type"] != "double":
-            d["gemm"] = "sgemm_"
-
-        if self.imshp != self.imshp_logical or self.kshp != self.kshp_logical:
-            if self.verbose:
-                _logger.debug(
-                    "return imshp!=imshp_logical or"
-                    " self.kshp != self.kshp_logical shape version"
-                )
-            return _conv_op_code_a % d
-
-        if self.unroll_patch:
-            if self.verbose:
-                _logger.debug("return unroll patch version. all_shape=%s", all_shape)
-            return _conv_op_code_unroll_patch % d
-        if (self.unroll_batch is not None and self.unroll_batch > 0) or (
-            self.unroll_kern is not None and self.unroll_kern > 0
-        ):
-            assert self.unroll_batch > 0
-            assert self.unroll_kern > 0
-            if self.verbose:
-                _logger.debug(
-                    "return unrolled batch (%s) and kern code (%s)",
-                    str(self.unroll_batch),
-                    str(self.unroll_kern),
-                )
-            return gen_conv_code_unroll_batch_kern(
-                d, self.unroll_batch, self.unroll_kern
-            )
-
-        # TODO: should we choose the unroll size automatically with the bigger divisor under 5?
-        if self.out_mode == "valid" and self.dx == 0 and self.dy == 0:
-            if self.verbose:
-                _logger.debug("return gemm version")
-            return _conv_op_code_valid_gemm % d
-        else:
-            if self.verbose:
-                _logger.debug("return no gemm version")
-            return _conv_op_code_a % d
-
-
-_conv_op_code_a = """
-const int mode=%(mode)s;
-int typenum=0, typenum_f=0;
-PyArrayObject *ain1=NULL, *ain2=NULL;
-PyArrayObject *filtersflipped_arr=NULL, *img2d_arr=NULL, *z_arr=NULL;
-const %(type)s fill_value = 0;
-
-int type_im=PyArray_TYPE(%(img2d)s);
-int type_ker=PyArray_TYPE(%(filtersflipped)s);
-
-npy_intp dim_zz[2]={%(self_outshp0)s,%(self_outshp1)s};
-npy_intp dim_im_phys[2]={%(self_imshp1)s,%(self_imshp2)s};
-npy_intp dim_im_log[2]={%(self_imshp_logical_r)s,%(self_imshp_logical_c)s};
-npy_intp dim_ker_phys[2]={%(self_kshp0)s,%(self_kshp1)s};
-npy_intp dim_ker_log[2]={%(self_kshp_logical_r)s,%(self_kshp_logical_c)s};
-
-PyArray_Dims img2d_shape;
-npy_intp img2d_dim[4]={1,1,0,0};
-img2d_shape.ptr=img2d_dim;
-img2d_shape.len=4;
-
-PyArray_Dims kerns_shape;
-npy_intp kerns_dim[4]={1,1,0,0};
-kerns_shape.ptr=kerns_dim;
-kerns_shape.len=4;
-PyObject *img2d=NULL, *contig, *filtersflipped=NULL;
-
-
-if(PyArray_NDIM(%(img2d)s)==2){
-  img2d_dim[3]=PyArray_DIMS(%(img2d)s)[1];
-  img2d_dim[2]=PyArray_DIMS(%(img2d)s)[0];
-}else if(PyArray_NDIM(%(img2d)s)==3){
-  img2d_dim[3]=PyArray_DIMS(%(img2d)s)[2];
-  img2d_dim[2]=PyArray_DIMS(%(img2d)s)[1];
-  img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
-}else if(PyArray_NDIM(%(img2d)s)==4){
-  img2d_dim[3]=PyArray_DIMS(%(img2d)s)[3];
-  img2d_dim[2]=PyArray_DIMS(%(img2d)s)[2];
-  img2d_dim[1]=PyArray_DIMS(%(img2d)s)[1];
-  img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
-}else {
-    PyErr_SetString(PyExc_ValueError, "img don't have a good shape");
-    %(fail)s;
-}
-
-if(PyArray_NDIM(%(filtersflipped)s)==3){
-  kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[2];
-  kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[1];
-  kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
-}else if(PyArray_NDIM(%(filtersflipped)s)==4){
-  kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[3];
-  kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[2];
-  kerns_dim[1]=PyArray_DIMS(%(filtersflipped)s)[1];
-  kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
-}else{
-    std::stringstream temp;
-    temp << "nddim="<<PyArray_NDIM(%(filtersflipped)s);
-    std::string param = temp.str();
-    PyErr_SetString(PyExc_ValueError,
-      ("kernel don't have a good shape. " + param).c_str());
-    %(fail)s;
-}
-
-%(assert_size)s
-
-img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, NPY_CORDER);
-img2d_arr = (PyArrayObject*)img2d;
-if ((PyArray_STRIDES(img2d_arr)[3] != (npy_intp)sizeof(%(type)s))
-     || (PyArray_STRIDES(img2d_arr)[2] != PyArray_DIMS(img2d_arr)[3]*(npy_intp)sizeof(%(type)s))){
-    contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
-    Py_DECREF(img2d);
-    img2d = contig;
-    img2d_arr = (PyArrayObject*)img2d;
-    if (!PyArray_ISCONTIGUOUS(img2d_arr)){
-        PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
-        %(fail)s;
-    }
-}
-
-filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, NPY_CORDER);
-filtersflipped_arr = (PyArrayObject*)filtersflipped;
-if ((PyArray_STRIDES(filtersflipped_arr)[3] != (npy_intp)sizeof(%(type)s))
-     || (PyArray_STRIDES(filtersflipped_arr)[2] != PyArray_DIMS(filtersflipped_arr)[3]*(npy_intp)sizeof(%(type)s))){
-    contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
-    Py_DECREF(filtersflipped);
-    filtersflipped = contig;
-    filtersflipped_arr = (PyArrayObject*)filtersflipped;
-    if (!PyArray_ISCONTIGUOUS(filtersflipped_arr)){
-        PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
-        %(fail)s;
-    }
-}
-
-if(mode != VALID && mode != FULL){
-  PyErr_SetString(PyExc_ValueError,
-                  "invalid mode, only full and valid are supported");
-  %(fail)s;
-}
-typenum = PyArray_ObjectType((PyObject*)%(img2d)s, 0);
-typenum_f = PyArray_ObjectType((PyObject*)%(filtersflipped)s, 0);
-if (typenum < 0) {PyErr_SetString(PyExc_ValueError, "Invalid type"); %(fail)s;}
-if (typenum != typenum_f) {
-  PyErr_SetString(PyExc_ValueError, "Input types must match");
-  %(fail)s;
-}
-
-if (!img2d)
-{
-    PyErr_SetString(PyExc_AssertionError, "!img2d");
-    %(fail)s;
-}
-if (!filtersflipped)
-{
-    PyErr_SetString(PyExc_AssertionError, "!filtersflipped");
-    %(fail)s;
-}
-
-if ((!%(z)s)
-  || *PyArray_DIMS(%(z)s)!=4
-  ||(PyArray_DIMS(%(z)s)[0] != %(self_bsize)s)
-  ||(PyArray_DIMS(%(z)s)[1] != %(self_nkern)s)
-  ||(PyArray_DIMS(%(z)s)[2] != dim_zz[0])
-  ||(PyArray_DIMS(%(z)s)[3] != dim_zz[1])
-  ||!PyArray_ISCONTIGUOUS(%(z)s)
-  )
-{
-  {Py_XDECREF(%(z)s);}
-  npy_intp dims[4] = {0,0,0,0};
-  dims[0]=%(self_bsize)s;
-  dims[1]=%(self_nkern)s;
-  dims[2]=dim_zz[0];
-  dims[3]=dim_zz[1];
-  %(z)s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0);
-}else{
-  //PyArray_FILLWBYTE((PyObject*)%(z)s,0);
-}
-z_arr = (PyArrayObject*) %(z)s;
-
-int Os[2];
-Os[0]=%(self_outshp0)s;
-Os[1]=%(self_outshp1)s;
-
-//assertions
-if (!PyArray_ISCONTIGUOUS(%(z)s))
-{
-    PyErr_SetString(PyExc_AssertionError, "Output (%(z)s) not contiguous");
-    %(fail)s;
-}
-
-for(int b=0;b< %(self_bsize)s;b++){
-  for(int n_kern=0;n_kern<%(self_nkern)s;n_kern++){
-
-    %(type)s * __restrict__ out=(%(type)s *)(PyArray_GETPTR2(z_arr,b,n_kern));
-    for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out[i] = 0;
-
-    for(int stack_size=0;stack_size<%(self_imshp0)s;stack_size++){
-
-      const %(type)s * __restrict__ in=(%(type)s *)(PyArray_GETPTR2(img2d_arr,b,stack_size));
-      const %(type)s * __restrict__ hvals=(%(type)s *)(PyArray_GETPTR2(filtersflipped_arr,n_kern,stack_size));
-
-
-      for (int iter_m=0; iter_m < Os[0]; iter_m++) {
-        // Reposition index into input image based on requested output size
-        //row position in logical output image
-        int pos_m = iter_m*%(self_dx)s;
-        //row anchor in logical input image (we will loop upward from here)
-        int new_m;
-        if (mode == FULL) new_m = pos_m ;
-        else new_m = (pos_m+dim_ker_log[0]-1);
-
-        for (int iter_n=0; iter_n < Os[1]; iter_n++) {  // loop over columns
-          // current col position in logical output image
-          int pos_n=iter_n*%(self_dy)s;
-          %(type)s sum=0;
-
-          // Sum over kernel, if index into image is out of bounds
-          // fill with the value
-          // loop over logical rows in kernel
-          for (int j_log=0; j_log < %(self_kshp_logical_r)s; j_log++) {
-            // ind0_log: row position in logical input image
-            int ind0_log = (new_m-j_log);
-
-            if ((j_log < %(self_kshp_logical_offset_r)s) ||
-                (j_log - %(self_kshp_logical_offset_r)s) MOD %(self_kshp_logical_stride_r)s)
-                continue;
-
-            if (ind0_log MOD %(self_imshp_logical_stride_r)s)
-                continue;
-
-            int j_phys = ((j_log- %(self_kshp_logical_offset_r)s) /
-                          %(self_kshp_logical_stride_r)s);
-            int ind0_phys = (ind0_log / %(self_imshp_logical_stride_r)s);
-            //std::cerr <<"j_log" << j_log << " j_phys " << j_phys << " " << ind0_phys << "\\n";
-
-            if(mode==FULL){
-              //This is a pointer to the current row of the kernel
-              const %(type)s * idx_hvals=&hvals[j_phys*dim_ker_phys[1]];
-              if(ind0_log < 0 || ind0_log >= dim_im_log[0]){
-                   // the current row of the kernel is off the image
-              }else{
-                int k = max((int)(pos_n-dim_im_log[1])+1,0);
-                int max_k=min(pos_n+1,(int)dim_ker_log[1]);
-                const %(type)s * idx_in=&in[ind0_phys*dim_im_phys[1]];
-                for (int ind1_log=pos_n-k; k<max_k; k++,ind1_log--) {
-                    if (1)
-                    {
-                                if ((k < %(self_kshp_logical_offset_c)s) ||
-                                    (k - %(self_kshp_logical_offset_c)s) MOD
-                                    %(self_kshp_logical_stride_c)s)
-                                    continue;
-
-                                if (ind1_log MOD
-                                    %(self_imshp_logical_stride_c)s)
-                                    continue;
-                    }
-                  sum += idx_hvals[(k-%(self_kshp_logical_offset_c)s) /
-                                   %(self_kshp_logical_stride_c)s] *
-                            idx_in[ind1_log / %(self_imshp_logical_stride_c)s];
-                }
-              }
-            }else{ // mode==VALID
-              //JB: should be dim_im[1] right? (was dim_im[0])
-              const %(type)s* idx_in=&in[ind0_phys*dim_im_phys[1]];
-              const %(type)s* idx_hvals=&hvals[j_phys*dim_ker_phys[1]];
-              int new_n = (pos_n+dim_ker_log[1]-1);
-              if (%(self_imshp_logical_stride_c)s != 1)  // a general loop
-              {
-                  for (int k=0,last=new_n; k < dim_ker_log[1]; k++,last--) {
-                        if ((k < %(self_kshp_logical_offset_c)s) ||
-                            (k - %(self_kshp_logical_offset_c)s) MOD
-                            %(self_kshp_logical_stride_c)s)
-                            continue;
-
-                        else if (last MOD %(self_imshp_logical_stride_c)s)
-                            continue;
-                            else
-                            {
-                    sum+=idx_hvals[(k-%(self_kshp_logical_offset_c)s) /
-                                   %(self_kshp_logical_stride_c)s] *
-                             idx_in[last/%(self_imshp_logical_stride_c)s];
-                    }
-                  }
-              }
-              else  // self_imshp_stride_c == 1
-              {
-                  int offset = %(self_kshp_logical_offset_c)s;
-                  int k_phys=0;
-                  for (int k_log=offset,last=new_n-offset;
-                       k_log < dim_ker_log[1]; ) {
-                    sum += idx_hvals[k_phys]*idx_in[last];
-                    ++k_phys;
-                    last -= %(self_kshp_logical_stride_c)s;
-                    k_log += %(self_kshp_logical_stride_c)s;
-                  }
-              }
-            }
-          }//for j_log
-          out[iter_m*dim_zz[1]+iter_n] %(affectation)s sum;
-        }//for iter_n
-      }//for iter_m
-    }//for stack_size
-    if (0 && (mode==FULL)){
-      for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i)
-        std::cout << " " << out[i];
-      std::cout << "\\n";
-    }
-  }//for n_kern
-}//for b
-Py_XDECREF(img2d);
-Py_XDECREF(filtersflipped);
-"""
-
-
-#########
-# ConvOp c_code for valid mode (uses gemm)
-#########
-
-_conv_op_code_valid_gemm = """
-int typenum=0, typenum_f=0;
-PyArrayObject *ain1=NULL, *ain2=NULL, *img2d_arr=NULL, *z_arr=NULL;
-const int NKERN = %(self_nkern)s;
-
-int type_im=PyArray_TYPE(%(img2d)s);
-int type_ker=PyArray_TYPE(%(filtersflipped)s);
-
-npy_intp dim_zz[2]={%(self_outshp0)s,%(self_outshp1)s};
-npy_intp dim_im[2]={%(self_imshp1)s,%(self_imshp2)s};
-const npy_intp dim_ker0=%(self_kshp0)s;
-const npy_intp dim_ker1=%(self_kshp1)s;
-
-PyArray_Dims img2d_shape;
-npy_intp img2d_dim[4]={1,1,0,0};
-img2d_shape.ptr=img2d_dim;
-img2d_shape.len=4;
-
-PyArray_Dims kerns_shape;
-npy_intp kerns_dim[4]={1,1,0,0};
-kerns_shape.ptr=kerns_dim;
-kerns_shape.len=4;
-PyObject *img2d=NULL, *contig;
-
-if(PyArray_NDIM(%(img2d)s)==2){
-  img2d_dim[3]=PyArray_DIMS(%(img2d)s)[1];
-  img2d_dim[2]=PyArray_DIMS(%(img2d)s)[0];
-}else if(PyArray_NDIM(%(img2d)s)==3){
-  img2d_dim[3]=PyArray_DIMS(%(img2d)s)[2];
-  img2d_dim[2]=PyArray_DIMS(%(img2d)s)[1];
-  img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
-}else if(PyArray_NDIM(%(img2d)s)==4){
-  img2d_dim[3]=PyArray_DIMS(%(img2d)s)[3];
-  img2d_dim[2]=PyArray_DIMS(%(img2d)s)[2];
-  img2d_dim[1]=PyArray_DIMS(%(img2d)s)[1];
-  img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
-}else {
-    PyErr_SetString(PyExc_ValueError, "img don't have a good shape");
-    %(fail)s;
-}
-
-if(PyArray_NDIM(%(filtersflipped)s)==3){
-  kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[2];
-  kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[1];
-  kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
-}else if(PyArray_NDIM(%(filtersflipped)s)==4){
-  kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[3];
-  kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[2];
-  kerns_dim[1]=PyArray_DIMS(%(filtersflipped)s)[1];
-  kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
-}else{
-    std::stringstream temp;
-    temp << "nddim="<<PyArray_NDIM(%(filtersflipped)s);
-    std::string param = temp.str();
-    PyErr_SetString(PyExc_ValueError,
-      ("kernel don't have a good shape. " + param).c_str());
-    %(fail)s;
-}
-if (NKERN != kerns_dim[0])
-{
-    PyErr_SetString(PyExc_NotImplementedError, "nonsense nkern");
-    %(fail)s;
-}
-
-img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, NPY_CORDER);
-img2d_arr = (PyArrayObject*)img2d;
-if ((PyArray_STRIDES(img2d_arr)[3] != (npy_intp)sizeof(%(type)s))
-     || (PyArray_STRIDES(img2d_arr)[2] != PyArray_DIMS(img2d_arr)[3]*(npy_intp)sizeof(%(type)s))){
-    contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
-    Py_DECREF(img2d);
-    img2d = contig;
-    img2d_arr = (PyArrayObject*)img2d;
-    if (!PyArray_ISCONTIGUOUS(img2d_arr)){
-        PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
-        %(fail)s;
-    }
-}
-
-typenum = PyArray_ObjectType((PyObject*)%(img2d)s, 0);
-typenum_f = PyArray_ObjectType((PyObject*)%(filtersflipped)s, 0);
-if (typenum < 0) {PyErr_SetString(PyExc_ValueError, "Invalid type"); %(fail)s;}
-if (typenum != typenum_f) {PyErr_SetString(PyExc_ValueError, "Input types must match"); %(fail)s;}
-
-if (!img2d) {
-    PyErr_SetString(PyExc_ValueError, "Null argument img2d");
-    %(fail)s;
-}
-if ((!%(z)s)
-  || *PyArray_DIMS(%(z)s)!=4
-  ||(PyArray_DIMS(%(z)s)[0] != %(self_bsize)s)
-  ||(PyArray_DIMS(%(z)s)[1] != %(self_nkern)s)
-  ||(PyArray_DIMS(%(z)s)[2] != dim_zz[0])
-  || (PyArray_DIMS(%(z)s)[3] != dim_zz[1])
-  )
-{
-  {Py_XDECREF(%(z)s);}
-  npy_intp dims[4] = {0,0,0,0};
-  dims[0]=%(self_bsize)s;
-  dims[1]=%(self_nkern)s;
-  dims[2]=dim_zz[0];
-  dims[3]=dim_zz[1];
-  %(z)s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0);
-}else{
-  PyArray_FILLWBYTE((PyObject*)%(z)s,0);
-}
-z_arr = (PyArrayObject*) %(z)s;
-
-%(assert_size)s
-
-int Os[2];
-Os[0] = dim_im[0]-dim_ker0+1;
-Os[1] = dim_im[1]-dim_ker1+1;
-
-// allocate a temporary buffer for storing the inner product of each nth kernel row
-// with each row of an image
-{
-%(type)s * kbuf = (%(type)s *)malloc((Os[0] * NKERN + PyArray_Size((PyObject*)%(filtersflipped)s))* (npy_intp)sizeof(%(type)s));
-int kbufstride = NKERN;
-%(type)s * myfilters = kbuf + Os[0] * NKERN;
-
-//copy out filtersflipped into filters un-flipped format
-//std::cerr << "__filling myfilters__\\n";
-for(int i=0;i < kerns_dim[0];++i){
-    for(int j=0;j < kerns_dim[1];++j){
-        for(int k=0;k < kerns_dim[2];++k){
-            for(int l=0;l < kerns_dim[3];++l){
-                %(type)s * ff = ((PyArray_NDIM(%(filtersflipped)s)) == 3)
-                    ? (%(type)s *)PyArray_GETPTR3(%(filtersflipped)s, i, kerns_dim[2]-1-k, kerns_dim[3]-1-l)
-                    : (%(type)s *)PyArray_GETPTR4(%(filtersflipped)s, i, j, kerns_dim[2]-1-k, kerns_dim[3]-1-l);
-                myfilters[i * (kerns_dim[1]*kerns_dim[2]*kerns_dim[3])
-                          + j * (kerns_dim[2]*kerns_dim[3])
-                          + k * (kerns_dim[3])
-                          + l] = ff[0];
-                //std::cerr << " " << ff[0];
-            }
-            //std::cerr << "\\n";
-        }
-        //std::cerr << "(end of stack/batch " <<j << "/" << i << "  ) \\n";
-    }
-}
-
-//std::cerr << "-----new loop ----\\n";
-for(int b=0;b< %(self_bsize)s;b++){
-    for (int img_col = 0; img_col < Os[1]; ++img_col){
-        for (int filter_row = 0; filter_row < kerns_dim[2]; ++filter_row){
-            for (int stackidx = 0; stackidx < %(self_imshp0)s; ++stackidx){
-                %(type)s * img_colview =
-                    (%(type)s *)(PyArray_GETPTR4(img2d, b, stackidx, filter_row, img_col));
-                %(type)s * filter_rows = myfilters + stackidx * (kerns_dim[2]*kerns_dim[3]) +
-                filter_row * kerns_dim[3];
-                //std::cerr << "filterview offset: " << filter_rows - myfilters << "\\n";
-
-                char N = 'N'; char T = 'T';
-                int Nz0 = Os[0];
-                int Nz1 = NKERN;
-                int K = kerns_dim[3];
-                %(type)s alpha = 1.0;
-                %(type)s beta = stackidx ? 1.0 : 0.0;
-                int imgview_stride = dim_im[1];
-                int filter_rows_stride =kerns_dim[1]*kerns_dim[2]*kerns_dim[3];
-                //remember, Fortran wants a column-major interpretation
-                assert(PyArray_STRIDES(img2d)[3] == (npy_intp)sizeof(%(type)s));
-
-                if (0){
-                    std::cerr << "b " << b << " img_col " << img_col << " filterrow " << filter_row << " stackidx " <<stackidx << "\\n";
-                    std::cerr << "colview (physical layout) stride: " << imgview_stride << "\\n";
-                    for (int ii = 0; ii < Nz0; ++ii){
-                        for (int jj = 0; jj < K; ++jj){
-                            std::cerr << " " << img_colview[ii * imgview_stride + jj];
-                        }
-                        std::cerr << "\\n";
-                    }
-                    std::cerr << "filterview ("<<filter_row<<"'th rows) stride: " << filter_rows_stride << "\\n";
-                    for (int ii = 0; ii < Nz1; ++ii){
-                        for (int jj = 0; jj < K; ++jj){
-                            std::cerr << " " << filter_rows[ii * filter_rows_stride + jj];
-                        }
-                        std::cerr << "\\n";
-                    }
-
-                    std::cerr << Nz1 << " " << Nz0 << " " << K << "\\n" ;
-                }
-
-                %(gemm)s(&T, &N,
-                    &Nz1, &Nz0, &K,
-                    &alpha,
-                    filter_rows, &filter_rows_stride,
-                    img_colview, &imgview_stride,
-                    &beta, kbuf, &kbufstride);
-
-                if (0){
-                    std::cerr << "z (logical layout) beta" << beta << "\\n";
-                    for (int ii = 0; ii < Nz0; ++ii){
-                        for (int jj = 0; jj < Nz1; ++jj){
-                            std::cerr << " " << kbuf[ii * kbufstride + jj];
-                        }
-                        std::cerr << "\\n";
-                    }
-                }
-            }
-            // now kbuf the sum over the stack, put it into the outbuf
-            for (int img_row = 0; img_row < Os[0]; ++img_row) {
-                for (int kernel_idx = 0; kernel_idx < NKERN; ++kernel_idx) {
-                    %(type)s * z_p =  (%(type)s *)PyArray_GETPTR4(%(z)s, b, kernel_idx, img_row, img_col);
-                    if (0)
-                    {
-                        if (b >= PyArray_DIMS(%(z)s)[0]) %(fail)s;
-                        if (kernel_idx >= PyArray_DIMS(%(z)s)[1]) %(fail)s;
-                        if (img_row >= PyArray_DIMS(%(z)s)[2]) %(fail)s;
-                        if (img_col >= PyArray_DIMS(%(z)s)[3]) %(fail)s;
-                    }
-                    z_p[0] += kbuf[img_row * kbufstride + kernel_idx];
-                }
-            }
-        }
-    }
-}
-free(kbuf);
-}
-Py_XDECREF(img2d);
-"""
-
-
-def gen_conv_code_unroll_batch_kern(d, unroll_bsize=1, unroll_ksize=1):
-    """
-    c_code for ConvOp that unroll the batch size loop.
-
-    """
-    assert unroll_bsize > 0 and unroll_ksize > 0
-    if (
-        "unroll_bsize" in d
-        or "unroll_ksize" in d
-        or "unroll_iter" in d
-        or "unroll_biter" in d
-        or "unroll_kiter" in d
-    ):
-        raise ValueError(
-            "We can't use this dictionary as we will overwrite some of its content"
-        )
-    d = d.copy()
-
-    d["unroll_bsize"] = unroll_bsize
-    d["unroll_ksize"] = unroll_ksize
-
-    def my_dup(st, size):
-        s = ""
-        for i in range(size):
-            d["unroll_iter"] = i
-            s += st % d
-        return s + "\n"
-
-    def my_dup2(st):
-        s = ""
-        iter = 0
-        for i in range(unroll_bsize):
-            d["unroll_biter"] = i
-            for j in range(unroll_ksize):
-                d["unroll_kiter"] = j
-                d["unroll_iter"] = iter
-                iter += 1
-                s += st % d
-        return s + "\n"
-
-    ret = (
-        """
-const int mode=%(mode)s;
-int typenum=0, typenum_f=0;
-PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL, *z_arr=NULL;;
-const %(type)s fill_value = 0;
-
-int type_im=PyArray_TYPE(%(img2d)s);
-int type_ker=PyArray_TYPE(%(filtersflipped)s);
-
-npy_intp dim_zz[2]={%(self_outshp0)s,%(self_outshp1)s};
-npy_intp dim_im[2]={%(self_imshp1)s,%(self_imshp2)s};
-const npy_intp dim_ker0=%(self_kshp0)s;
-const npy_intp dim_ker1=%(self_kshp1)s;
-
-PyArray_Dims img2d_shape;
-npy_intp img2d_dim[4]={1,1,0,0};
-img2d_shape.ptr=img2d_dim;
-img2d_shape.len=4;
-
-PyArray_Dims kerns_shape;
-npy_intp kerns_dim[4]={1,1,0,0};
-kerns_shape.ptr=kerns_dim;
-kerns_shape.len=4;
-PyObject *img2d=NULL, *contig, *filtersflipped=NULL;
-
-if(PyArray_NDIM(%(img2d)s)==2){
-  img2d_dim[3]=PyArray_DIMS(%(img2d)s)[1];
-  img2d_dim[2]=PyArray_DIMS(%(img2d)s)[0];
-}else if(PyArray_NDIM(%(img2d)s)==3){
-  img2d_dim[3]=PyArray_DIMS(%(img2d)s)[2];
-  img2d_dim[2]=PyArray_DIMS(%(img2d)s)[1];
-  img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
-}else if(PyArray_NDIM(%(img2d)s)==4){
-  img2d_dim[3]=PyArray_DIMS(%(img2d)s)[3];
-  img2d_dim[2]=PyArray_DIMS(%(img2d)s)[2];
-  img2d_dim[1]=PyArray_DIMS(%(img2d)s)[1];
-  img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
-}else {
-    std::stringstream temp;
-    temp << "nddim="<<PyArray_NDIM(%(img2d)s);
-    std::string param = temp.str();
-    PyErr_SetString(PyExc_ValueError,
-      ("img don't have a good shape. " + param).c_str());
-    %(fail)s;
-}
-
-if(PyArray_NDIM(%(filtersflipped)s)==3){
-  kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[2];
-  kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[1];
-  kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
-}else if(PyArray_NDIM(%(filtersflipped)s)==4){
-  kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[3];
-  kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[2];
-  kerns_dim[1]=PyArray_DIMS(%(filtersflipped)s)[1];
-  kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
-}else{
-    PyErr_SetString(PyExc_ValueError, "kernel don't have a good shape");
-    %(fail)s;
-}
-
-%(assert_size)s
-
-img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, NPY_CORDER);
-img2d_arr = (PyArrayObject*)img2d;
-if ((PyArray_STRIDES(img2d_arr)[3] != (npy_intp)sizeof(%(type)s))
-     || (PyArray_STRIDES(img2d_arr)[2] != PyArray_DIMS(img2d_arr)[3]*(npy_intp)sizeof(%(type)s))){
-    contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
-    Py_DECREF(img2d);
-    img2d = contig;
-    img2d_arr = (PyArrayObject*)img2d;
-    if (!PyArray_ISCONTIGUOUS(img2d_arr)){
-        PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
-        %(fail)s;
-    }
-}
-
-filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, NPY_CORDER);
-filtersflipped_arr = (PyArrayObject*)filtersflipped;
-if ((PyArray_STRIDES(filtersflipped_arr)[3] != (npy_intp)sizeof(%(type)s))
-     || (PyArray_STRIDES(filtersflipped_arr)[2] != PyArray_DIMS(filtersflipped_arr)[3]*(npy_intp)sizeof(%(type)s))){
-    contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
-    Py_DECREF(filtersflipped);
-    filtersflipped = contig;
-    filtersflipped_arr = (PyArrayObject*)filtersflipped;
-    if (!PyArray_ISCONTIGUOUS(filtersflipped_arr)){
-        PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
-        %(fail)s;
-    }
-}
-
-if(mode != VALID && mode != FULL){
-  PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported"); %(fail)s;
-}
-typenum = PyArray_ObjectType((PyObject*)%(img2d)s, 0);
-typenum_f = PyArray_ObjectType((PyObject*)%(filtersflipped)s, 0);
-if (typenum < 0) {PyErr_SetString(PyExc_ValueError, "Invalid type"); %(fail)s;}
-if (typenum != typenum_f) {PyErr_SetString(PyExc_ValueError, "Input types must match"); %(fail)s;}
-
-if (!img2d)
-{
-    PyErr_SetString(PyExc_AssertionError, "!img2d");
-    %(fail)s;
-}
-if (!filtersflipped)
-{
-    PyErr_SetString(PyExc_AssertionError, "!filtersflipped");
-    %(fail)s;
-}
-
-if ((!%(z)s)
-  || *PyArray_DIMS(%(z)s)!=4
-  ||(PyArray_DIMS(%(z)s)[0] != %(self_bsize)s)
-  ||(PyArray_DIMS(%(z)s)[1] != %(self_nkern)s)
-  ||(PyArray_DIMS(%(z)s)[2] != dim_zz[0])
-  ||(PyArray_DIMS(%(z)s)[3] != dim_zz[1])
-  ||!PyArray_ISCONTIGUOUS(%(z)s)
-  )
-{
-  {Py_XDECREF(%(z)s);}
-  npy_intp dims[4] = {0,0,0,0};
-  dims[0]=%(self_bsize)s;
-  dims[1]=%(self_nkern)s;
-  dims[2]=dim_zz[0];
-  dims[3]=dim_zz[1];
-  %(z)s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0);
-}else{
-  //PyArray_FILLWBYTE((PyObject*)%(z)s,0);
-}
-z_arr = (PyArrayObject*) %(z)s;
-
-int Os[2];
-Os[0]=%(self_outshp0)s;
-Os[1]=%(self_outshp1)s;
-
-//assertions
-if (!PyArray_ISCONTIGUOUS(%(z)s))
-{
-    PyErr_SetString(PyExc_AssertionError, "Output (%(z)s) not contiguous");
-    %(fail)s;
-}
-
-for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
-  for(int n_kern=0;n_kern<%(self_nkern)s;n_kern+=%(unroll_ksize)s){
-
-"""
-        % d
-    )
-    ret += my_dup2(
-        "%(type)s * __restrict__ out%(unroll_iter)s=(%(type)s *)(PyArray_GETPTR2(z_arr,b+%(unroll_biter)s,n_kern+%(unroll_kiter)s));"
-    )
-    ret += my_dup(
-        "for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out%(unroll_iter)s[i] = 0;",
-        unroll_bsize * unroll_ksize,
-    )
-    ret += (
-        """
-    for(int stack_size=0;stack_size<%(self_imshp0)s;stack_size++){
-"""
-        % d
-    )
-    ret += my_dup(
-        "const %(type)s * __restrict__ in%(unroll_iter)d=(%(type)s *)(PyArray_GETPTR2(img2d_arr,b+%(unroll_iter)s,stack_size));",
-        unroll_bsize,
-    )
-    ret += my_dup(
-        "const %(type)s * __restrict__ hvals%(unroll_iter)s=(%(type)s *)(PyArray_GETPTR2(filtersflipped_arr,n_kern+%(unroll_iter)s,stack_size));",
-        unroll_ksize,
-    )
-    ret += (
-        """
-
-      int new_m;
-
-      for (int iter_m=0; iter_m < Os[0]; iter_m++) {
-        // Reposition index into input image based on requested output size
-        int pos_m = iter_m*%(self_dx)s;//The position of the patch in the image
-        if (mode == FULL) new_m = pos_m ;
-        else new_m = (pos_m+dim_ker0-1);
-
-        for (int iter_n=0; iter_n < Os[1]; iter_n++) {  // loop over columns
-          int pos_n=iter_n*%(self_dy)s;
-        """
-        % d
-    )
-    ret += my_dup("%(type)s sum%(unroll_iter)s=0;", unroll_bsize * unroll_ksize)
-    ret += (
-        """
-
-          // Sum over kernel, if index into image is out of bounds
-          // fill with the value
-          for (int j=0; j < dim_ker0; j++) {
-            int ind0 = (new_m-j);
-
-            if(mode==FULL){
-"""
-        % d
-    )
-    ret += my_dup(
-        "const %(type)s * idx_hvals%(unroll_iter)s=&hvals%(unroll_iter)s[j*dim_ker1];",
-        unroll_ksize,
-    )
-    ret += (
-        """
-              if(ind0 < 0 || ind0 >= dim_im[0]){
-                if(fill_value!=0)
-                  for (int k=0; k < dim_ker1; k++) {
-"""
-        % d
-    )
-    ret += my_dup2("sum%(unroll_iter)s += idx_hvals%(unroll_kiter)s[k] * fill_value;")
-    ret += (
-        """
-                  }
-              }else{
-                //do the part where kernel is to the right of the img
-
-                int k=0,max_k=max((int)(pos_n-dim_im[1])+1,0);
-                if(fill_value!=0){
-
-                  for(k=0;k<max_k;k++){
-"""
-        % d
-    )
-    ret += my_dup2("sum%(unroll_iter)s += idx_hvals%(unroll_kiter)s[k] * fill_value;")
-    ret += (
-        """
-                  }
-                }else {k=max_k;}
-
-                //do the part where the kernel is on the img
-                max_k=min(pos_n+1,(int)dim_ker1);
-"""
-        % d
-    )
-    ret += my_dup(
-        "const %(type)s * idx_in%(unroll_iter)s=&in%(unroll_iter)s[ind0*dim_im[1]];",
-        unroll_bsize,
-    )
-    ret += (
-        """
-                for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
-
-"""
-        % d
-    )
-    ret += my_dup2(
-        "sum%(unroll_iter)s+= idx_hvals%(unroll_kiter)s[k] * idx_in%(unroll_biter)s[ind1];"
-    )
-    ret += (
-        """
-                }
-                //do the part to the left of the img
-                if(fill_value!=0)
-                  for(;k<dim_ker1;k++){
-"""
-        % d
-    )
-    ret += my_dup2("sum%(unroll_iter)s += idx_hvals%(unroll_kiter)s[k] * fill_value;")
-    ret += (
-        """
-                  }
-              }
-            }else{//valid mode
-"""
-        % d
-    )
-    ret += my_dup(
-        "const %(type)s* idx_in%(unroll_iter)s=&in%(unroll_iter)s[ind0*dim_im[1]];",
-        unroll_bsize,
-    )
-    ret += my_dup(
-        "const %(type)s* idx_hvals%(unroll_iter)s=&hvals%(unroll_iter)s[j*dim_ker1];",
-        unroll_ksize,
-    )
-    ret += (
-        """
-              int new_n = (pos_n+dim_ker1-1);
-
-              for (int k=0,last=new_n; k < dim_ker1; k++,last--) {
-"""
-        % d
-    )
-    ret += my_dup2(
-        "sum%(unroll_iter)s+=idx_hvals%(unroll_kiter)s[k]*idx_in%(unroll_biter)s[last];"
-    )
-    ret += (
-        """
-              }
-            }
-
-          }//for j
-"""
-        % d
-    )
-    ret += my_dup(
-        "out%(unroll_iter)s[iter_m*dim_zz[1]+iter_n] %(affectation)s sum%(unroll_iter)s;",
-        unroll_bsize * unroll_ksize,
-    )
-    ret += """
-        }//for n
-      }//for m
-    }//for stack_size
-  }//for n_kern
-}//for b
-Py_XDECREF(img2d);
-Py_XDECREF(filtersflipped);
-"""
-    return ret
-
-
-_conv_op_code_unroll_patch = """
-const int mode=%(mode)s;
-int typenum=0, typenum_f=0;
-PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL, *z_arr=NULL;
-const %(type)s fill_value = 0;//only value of 0 are currently tested and correctly implemented
-
-int type_im=PyArray_TYPE(%(img2d)s);
-int type_ker=PyArray_TYPE(%(filtersflipped)s);
-
-const npy_intp dim_im[2]={%(self_imshp1)s,%(self_imshp2)s};
-//The following line caused gcc 4.3.0 20080428 (Red Hat 4.3.0-8) to crash
-//const npy_intp dim_ker[2]={%(self_kshp0)s,%(self_kshp1)s};
-// The next line had gcc don't crash.
-const npy_intp dim_ker0=%(self_kshp0)s;
-const npy_intp dim_ker1=%(self_kshp1)s;
-%(dim_zz_const)s npy_intp dim_zz[2]={%(self_outshp0)s,%(self_outshp1)s};
-
-%(dim_zz_affect)s
-PyArray_Dims img2d_shape;
-npy_intp img2d_dim[4]={1,1,0,0};
-img2d_shape.ptr=img2d_dim;
-img2d_shape.len=4;
-
-PyArray_Dims kerns_shape;
-npy_intp kerns_dim[4]={1,1,0,0};
-kerns_shape.ptr=kerns_dim;
-kerns_shape.len=4;
-PyObject *img2d=NULL, *contig, *filtersflipped=NULL;
-
-if(PyArray_NDIM(%(img2d)s)==2){
-  img2d_dim[3]=PyArray_DIMS(%(img2d)s)[1];
-  img2d_dim[2]=PyArray_DIMS(%(img2d)s)[0];
-}else if(PyArray_NDIM(%(img2d)s)==3){
-  img2d_dim[3]=PyArray_DIMS(%(img2d)s)[2];
-  img2d_dim[2]=PyArray_DIMS(%(img2d)s)[1];
-  img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
-}else if(PyArray_NDIM(%(img2d)s)==4){
-  img2d_dim[3]=PyArray_DIMS(%(img2d)s)[3];
-  img2d_dim[2]=PyArray_DIMS(%(img2d)s)[2];
-  img2d_dim[1]=PyArray_DIMS(%(img2d)s)[1];
-  img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
-}else {
-    PyErr_Format(PyExc_ValueError,
-      "image don't have a good number of dimensions %%d. ", PyArray_NDIM(%(filtersflipped)s));
-    %(fail)s;
-}
-
-if(PyArray_NDIM(%(filtersflipped)s)==3){
-  kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[2];
-  kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[1];
-  kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
-}else if(PyArray_NDIM(%(filtersflipped)s)==4){
-  kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[3];
-  kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[2];
-  kerns_dim[1]=PyArray_DIMS(%(filtersflipped)s)[1];
-  kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
-}else{
-    PyErr_Format(PyExc_ValueError,
-      "kernel don't have a good number of dimensions %%d. ", PyArray_NDIM(%(filtersflipped)s));
-    %(fail)s;
-}
-
-%(assert_size)s
-
-img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, NPY_CORDER);
-img2d_arr = (PyArrayObject*)img2d;
-if ((PyArray_STRIDES(img2d_arr)[3] != sizeof(%(type)s))
-     || (PyArray_STRIDES(img2d_arr)[2] != PyArray_DIMS(img2d_arr)[3]*sizeof(%(type)s))){
-    contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
-    Py_DECREF(img2d);
-    img2d = contig;
-    img2d_arr = (PyArrayObject*)img2d;
-    if (!PyArray_ISCONTIGUOUS(img2d_arr)){
-        PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
-        %(fail)s;
-    }
-}
-
-filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, NPY_CORDER);
-filtersflipped_arr = (PyArrayObject*)filtersflipped;
-if ((PyArray_STRIDES(filtersflipped_arr)[3] != sizeof(%(type)s))
-     || (PyArray_STRIDES(filtersflipped_arr)[2] != PyArray_DIMS(filtersflipped_arr)[3]*sizeof(%(type)s))){
-    contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
-    Py_DECREF(filtersflipped);
-    filtersflipped = contig;
-    filtersflipped_arr = (PyArrayObject*)filtersflipped;
-    if (!PyArray_ISCONTIGUOUS(filtersflipped_arr)){
-        PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
-        %(fail)s;
-    }
-}
-
-if(mode != VALID && mode != FULL){
-  PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported"); %(fail)s;
-}
-
-if(dim_zz[0]<=0 || dim_zz[1]<=0){
-PyErr_Format(PyExc_ValueError,
-      "Output dimensions are not valid %%ldx%%ld",(long int)dim_zz[0],(long int)dim_zz[1]);
-      %(fail)s;
-}
-
-typenum = PyArray_ObjectType((PyObject*)%(img2d)s, 0);
-typenum_f = PyArray_ObjectType((PyObject*)%(filtersflipped)s, 0);
-if (typenum < 0) {PyErr_SetString(PyExc_ValueError, "Invalid type"); %(fail)s;}
-if (typenum != typenum_f) {PyErr_SetString(PyExc_ValueError, "Input types must match"); %(fail)s;}
-
-if (!img2d) %(fail)s;
-if (!filtersflipped) %(fail)s;
-if ((!%(z)s)
-  || *PyArray_DIMS(%(z)s)!=4
-  ||(PyArray_DIMS(%(z)s)[0] != %(self_bsize)s)
-  ||(PyArray_DIMS(%(z)s)[1] != %(self_nkern)s)
-  ||(PyArray_DIMS(%(z)s)[2] != dim_zz[0])
-  || (PyArray_DIMS(%(z)s)[3] != dim_zz[1])
-  )
-{
-  if (%(z)s) Py_DECREF(%(z)s);
-  npy_intp dims[4] = {0,0,0,0};
-  if(!dims) %(fail)s;
-  dims[0]=%(self_bsize)s;
-  dims[1]=%(self_nkern)s;
-  dims[2]=dim_zz[0];
-  dims[3]=dim_zz[1];
-  %(z)s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0);
-}else{
-  //PyArray_FILLWBYTE((PyObject*)%(z)s,0);
-}
-z_arr = (PyArrayObject*) %(z)s;
-
-// assert the output is C-contiguous
-if (!PyArray_ISCONTIGUOUS(%(z)s))
-{
-    PyErr_SetString(PyExc_AssertionError, "Output (%(z)s) not contiguous");
-    %(fail)s;
-}
-
-//The if on the number of loop make a speed up for small array.
-//with g++ 4.5.1. The compiler should be smart enough to do this himself!
-#pragma omp parallel for schedule(static) if(%(self_bsize)s * %(self_nkern)s > 1)
-// We merge the 2 loop into one to make it easier to parallelize on both
-// This is the equivalent of those 2 lines.
-//for(int b=0;b< %(self_bsize)s;b++){
-// for(int n_kern=0;n_kern<%(self_nkern)s;n_kern++){
-for(int batch_kern_idx=0;
-    batch_kern_idx < %(self_bsize)s * %(self_nkern)s;
-    batch_kern_idx++){
-    int b = batch_kern_idx / %(self_nkern)s;
-    int n_kern = batch_kern_idx %% %(self_nkern)s;
-
-    %(type)s * __restrict__ out=(%(type)s *)(PyArray_GETPTR2(z_arr,b,n_kern));
-    for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out[i] = 0;
-
-    for(int stack_size=0;stack_size<%(self_imshp0)s;stack_size++){
-
-      const %(type)s * __restrict__ in=(%(type)s *)(PyArray_GETPTR2(img2d_arr,b,stack_size));
-      const %(type)s * __restrict__ hvals=(%(type)s *)(PyArray_GETPTR2(filtersflipped_arr,n_kern,stack_size));
-
-      int new_m;
-
-      for (int iter_m=0; iter_m < dim_zz[0]; iter_m++) {
-        // Reposition index into input image based on requested output size
-        int pos_m = iter_m*%(self_dx)s;//The position of the patch in the image
-        if (mode == FULL) new_m = pos_m ;
-        else new_m = (pos_m+dim_ker0-1);
-
-        for (int iter_n=0; iter_n < dim_zz[1]; iter_n++) {  // loop over columns
-          int pos_n=iter_n*%(self_dy)s;
-          %(type)s sum=0;
-          %(type)s sum2=0;
-          %(type)s sum3=0;
-          %(type)s sum4=0;
-          int nb_sum=0;
-          // Sum over kernel, if index into image is out of bounds
-          // fill with the value
-          for (int j=0; j < dim_ker0; j++) {
-            int ind0 = (new_m-j);
-
-            if(mode==FULL){
-              const %(type)s * idx_hvals=&hvals[j*dim_ker1];
-              if(ind0 < 0 || ind0 >= dim_im[0]){
-                if(fill_value!=0)
-                  for (int k=0; k < dim_ker1; k++) {
-                    sum+= idx_hvals[k] * fill_value;
-                  }
-              }else{
-                //do the part where kernel is to the right of the img
-                int k=0,max_k=max((int)(pos_n-dim_im[1])+1,0);
-                if(fill_value!=0){
-
-                  for(k=0;k<max_k;k++){
-                    sum+= idx_hvals[k]*fill_value;
-                  }
-                }else {k=max_k;}
-
-                //do the part where the kernel is on the img
-                max_k=min(pos_n+1,(int)dim_ker1);
-                const %(type)s * idx_in=&in[ind0*dim_im[1]];
-
-                if(iter_n + 4*%(self_dy)s < dim_zz[1]
-                         && iter_n>dim_ker1-1
-                         && iter_n<dim_im[1]-dim_ker1+1-3){
-                  nb_sum=4;
-                  for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
-                    sum+=idx_hvals[k]*idx_in[ind1];
-                    sum2+=idx_hvals[k]*idx_in[ind1+%(self_dy)s];
-                    sum3+=idx_hvals[k]*idx_in[ind1+2*%(self_dy)s];
-                    sum4+=idx_hvals[k]*idx_in[ind1+3*%(self_dy)s];
-                  }
-                }else if(iter_n + 2*%(self_dy)s < dim_zz[1]
-                         && iter_n>dim_ker1-1
-                         && iter_n<dim_im[1]-dim_ker1+1){
-                  nb_sum=2;
-                  for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
-                    sum+=idx_hvals[k]*idx_in[ind1];
-                    sum2+=idx_hvals[k]*idx_in[ind1+%(self_dy)s];
-                  }
-                }else{
-                  nb_sum=1;
-                  /*
-                  %(type)s sum_=0;
-                  if((k-max_k) & 0x1 != 0){
-                    sum+= idx_hvals[k] * idx_in[pos_n-k];
-                  }
-                  for (int ind1=pos_n-k; k<max_k; k+=2,ind1-=2) {
-                    sum+= idx_hvals[k] * idx_in[ind1];
-                    sum_+= idx_hvals[k+1] * idx_in[ind1-1];
-                  }
-                  sum+=sum_;
-                  */
-                  for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
-                    sum+=idx_hvals[k]*idx_in[ind1];
-                  }
-                }
-                //do the part to the left of the img
-                if(fill_value!=0)
-                  for(;k<dim_ker1;k++) sum+= idx_hvals[k]*fill_value;
-              }
-            }else{//valid mode
-              const %(type)s* idx_in=&in[ind0*dim_im[1]];
-              const %(type)s* idx_hvals=&hvals[j*dim_ker1];
-              if(iter_n + 4*%(self_dy)s < dim_zz[1]){
-                nb_sum=4;
-                for (int k=dim_ker1-1,im_idx=pos_n; k >=0; k--,im_idx++) {
-                  sum+=idx_hvals[k]*idx_in[im_idx];
-                  sum2+=idx_hvals[k]*idx_in[im_idx+%(self_dy)s];
-                  sum3+=idx_hvals[k]*idx_in[im_idx+2*%(self_dy)s];
-                  sum4+=idx_hvals[k]*idx_in[im_idx+3*%(self_dy)s];
-                }
-              }else if(iter_n + 2*%(self_dy)s < dim_zz[1]){
-                nb_sum=2;
-                for (int k=dim_ker1-1,im_idx=pos_n; k >=0; k--,im_idx++) {
-                  sum+=idx_hvals[k]*idx_in[im_idx];
-                  sum2+=idx_hvals[k]*idx_in[im_idx+%(self_dy)s];
-                }
-              }else{
-                nb_sum=1;
-                for (int k=dim_ker1-1,im_idx=pos_n; k >=0; k--,im_idx++) {
-                  sum+=idx_hvals[k]*idx_in[im_idx];
-                }
-              }
-            }//else valid mode
-          }//for j
-          switch(nb_sum){
-          case 4: out[iter_m*dim_zz[1]+iter_n+3] %(affectation)s sum4;
-          case 3: out[iter_m*dim_zz[1]+iter_n+2] %(affectation)s sum3;
-          case 2: out[iter_m*dim_zz[1]+iter_n+1] %(affectation)s sum2;
-          case 1: out[iter_m*dim_zz[1]+iter_n] %(affectation)s sum;
-          }
-          iter_n+=nb_sum-1;
-        }//for iter_n
-      }//for iter_m
-    }//for stack_size
-}//for b and n_kern
-
-Py_XDECREF(img2d);
-Py_XDECREF(filtersflipped);
-"""
diff --git a/pytensor/tensor/nnet/conv3d2d.py b/pytensor/tensor/nnet/conv3d2d.py
deleted file mode 100644
index 3d161fe4db..0000000000
--- a/pytensor/tensor/nnet/conv3d2d.py
+++ /dev/null
@@ -1,329 +0,0 @@
-import pytensor
-from pytensor import tensor as at
-from pytensor.gradient import DisconnectedType
-from pytensor.graph.basic import Apply
-from pytensor.graph.op import Op
-from pytensor.graph.rewriting.basic import (
-    WalkingGraphRewriter,
-    copy_stack_trace,
-    node_rewriter,
-)
-
-
-def get_diagonal_subtensor_view(x, i0, i1):
-    """
-    Helper function for DiagonalSubtensor and IncDiagonalSubtensor.
-
-    Notes
-    -----
-    It returns a partial view of x, not a partial copy.
-
-    """
-    # We have to cast i0 and i0 to int because python
-    # do not support indexing with 0-dim, 'int*' ndarrays.
-    i0 = int(i0)
-    i1 = int(i1)
-    if x.shape[i0] < x.shape[i1]:
-        raise NotImplementedError("is this allowed?")
-    idx = [slice(None)] * x.ndim
-    idx[i0] = slice(x.shape[i1] - 1, None, None)
-    xview = x.__getitem__(tuple(idx))
-    strides = list(xview.strides)
-    if x.shape[i1] != 1:
-        strides[i1] -= strides[i0]
-        xview.strides = strides
-    return xview
-
-
-class DiagonalSubtensor(Op):
-    """
-    Return a form a nd diagonal subtensor.
-
-    Parameters
-    ----------
-    x
-        n-d tensor
-    i0
-        Axis index in x
-    i1
-        Axis index in x
-
-
-    Extended summary
-    ----------------
-    ``x`` is some n-dimensional tensor, but this Op only deals with a
-    matrix-shaped slice, using axes i0 and i1. Without loss of
-    generality, suppose that ``i0`` picks out our ``row`` dimension,
-    and i1 the ``column`` dimension.
-
-    So the relevant part of ``x`` is some matrix ``u``. Suppose it has 7 rows
-    and 4 columns::
-
-        [ 0 0 0 0 ]
-        [ 0 0 0 0 ]
-        [ 0 0 0 0 ]
-        [ 0 0 0 0 ]
-        [ 0 0 0 0 ]
-        [ 0 0 0 0 ]
-
-    The view returned by this function is also a matrix. It's a thick,
-    diagonal ``stripe`` across u that discards the lower left triangle
-    and the upper right triangle:
-
-        [ x 0 0 0 ]
-        [ x x 0 0 ]
-        [ x x x 0 ]
-        [ 0 x x x ]
-        [ 0 0 x x ]
-        [ 0 0 0 x ]
-
-    In this case the return value would be this view of shape 3x4. The
-    returned view has the same number of dimensions as the input
-    ``x``, and the only difference is that the shape along dimension
-    ``i0`` has been reduced by ``shape[i1] - 1`` because of the
-    triangles that got chopped out.
-
-    The NotImplementedError is meant to catch the case where shape[i0]
-    is too small for the stripe to reach across the matrix, in which
-    case it's not clear what this function should do. Maybe always
-    raise an error. I'd look back to the call site in the Conv3D to
-    see what's necessary at that point.
-
-    """
-
-    __props__ = ("inplace",)
-
-    def __str__(self):
-        if self.inplace:
-            return "%s{inplace}" % self.__class__.__name__
-        return f"{self.__class__.__name__}"
-
-    def __init__(self, inplace=False):
-        self.inplace = inplace
-        if inplace:
-            self.view_map = {0: [0]}
-
-    def make_node(self, x, i0, i1):
-        _i0 = at.as_tensor_variable(i0)
-        _i1 = at.as_tensor_variable(i1)
-        # TODO: We could produce a more precise static shape output type
-        type_shape = (1 if shape == 1 else None for shape in x.type.shape)
-        out_type = at.TensorType(x.type.dtype, shape=type_shape)
-        return Apply(self, [x, _i0, _i1], [out_type()])
-
-    def perform(self, node, inputs, output_storage):
-        xview = get_diagonal_subtensor_view(*inputs)
-        if self.inplace:
-            output_storage[0][0] = xview
-        else:
-            output_storage[0][0] = xview.copy()
-
-    def grad(self, inputs, g_outputs):
-        z = at.zeros_like(inputs[0])
-        gx = inc_diagonal_subtensor(z, inputs[1], inputs[2], g_outputs[0])
-        return [gx, DisconnectedType()(), DisconnectedType()()]
-
-    def connection_pattern(self, node):
-        rval = [[True], [False], [False]]
-        return rval
-
-
-diagonal_subtensor = DiagonalSubtensor(False)
-
-
-class IncDiagonalSubtensor(Op):
-    """
-    The gradient of DiagonalSubtensor.
-
-    """
-
-    __props__ = ("inplace",)
-
-    def __str__(self):
-        if self.inplace:
-            return "%s{inplace}" % self.__class__.__name__
-        return f"{self.__class__.__name__}"
-
-    def __init__(self, inplace=False):
-        self.inplace = inplace
-        if inplace:
-            self.destroy_map = {0: [0]}
-
-    def make_node(self, x, i0, i1, amt):
-        _i0 = at.as_tensor_variable(i0)
-        _i1 = at.as_tensor_variable(i1)
-        return Apply(self, [x, _i0, _i1, amt], [x.type()])
-
-    def perform(self, node, inputs, output_storage):
-        x, i0, i1, amt = inputs
-        if not self.inplace:
-            x = x.copy()
-        xview = get_diagonal_subtensor_view(x, i0, i1)
-        xview += amt
-        output_storage[0][0] = x
-
-    def grad(self, inputs, g_outputs):
-        x, i0, i1, amt = inputs
-        gy = g_outputs[0]
-        return [
-            gy,
-            DisconnectedType()(),
-            DisconnectedType()(),
-            diagonal_subtensor(gy, i0, i1),
-        ]
-
-    def connection_pattern(self, node):
-        rval = [[True], [False], [False], [True]]
-        return rval
-
-
-inc_diagonal_subtensor = IncDiagonalSubtensor(False)
-
-
-def conv3d(
-    signals, filters, signals_shape=None, filters_shape=None, border_mode="valid"
-):
-    """
-    Convolve spatio-temporal filters with a movie.
-
-    It flips the filters.
-
-    Parameters
-    ----------
-    signals
-        Timeseries of images whose pixels have color channels.
-        Shape: [Ns, Ts, C, Hs, Ws].
-    filters
-        Spatio-temporal filters.
-        Shape: [Nf, Tf, C, Hf, Wf].
-    signals_shape
-        None or a tuple/list with the shape of signals.
-    filters_shape
-        None or a tuple/list with the shape of filters.
-    border_mode
-        One of 'valid', 'full' or 'half'.
-
-    Notes
-    -----
-    Another way to define signals: (batch,  time, in channel, row, column)
-    Another way to define filters: (out channel,time,in channel, row, column)
-
-    See Also
-    --------
-    Someone made a script that shows how to swap the axes between
-    both 3d convolution implementations in PyTensor. See the last
-    `attachment <https://groups.google.com/d/msg/pytensor-users/1S9_bZgHxVw/0cQR9a4riFUJ>`_
-
-    """
-
-    if isinstance(border_mode, str):
-        border_mode = (border_mode, border_mode, border_mode)
-
-    if signals_shape is None:
-        _signals_shape_5d = signals.shape
-    else:
-        _signals_shape_5d = signals_shape
-
-    if filters_shape is None:
-        _filters_shape_5d = filters.shape
-    else:
-        _filters_shape_5d = filters_shape
-
-    Ns, Ts, C, Hs, Ws = _signals_shape_5d
-    Nf, Tf, C, Hf, Wf = _filters_shape_5d
-
-    _signals_shape_4d = (Ns * Ts, C, Hs, Ws)
-    _filters_shape_4d = (Nf * Tf, C, Hf, Wf)
-
-    if border_mode[1] != border_mode[2]:
-        raise NotImplementedError("height and width bordermodes must match")
-    conv2d_signal_shape = _signals_shape_4d
-    conv2d_filter_shape = _filters_shape_4d
-    if signals_shape is None:
-        conv2d_signal_shape = None
-    if filters_shape is None:
-        conv2d_filter_shape = None
-
-    out_4d = pytensor.tensor.nnet.conv2d(
-        signals.reshape(_signals_shape_4d),
-        filters.reshape(_filters_shape_4d),
-        input_shape=conv2d_signal_shape,
-        filter_shape=conv2d_filter_shape,
-        border_mode=border_mode[1],
-    )  # ignoring border_mode[2]
-
-    # compute the intended output size
-    if border_mode[1] == "valid":
-        Hout = Hs - Hf + 1
-        Wout = Ws - Wf + 1
-    elif border_mode[1] == "full":
-        Hout = Hs + Hf - 1
-        Wout = Ws + Wf - 1
-    elif border_mode[1] == "half":
-        Hout = Hs - (Hf % 2) + 1
-        Wout = Ws - (Wf % 2) + 1
-    elif border_mode[1] == "same":
-        raise NotImplementedError()
-    else:
-        raise ValueError("invalid border mode", border_mode[1])
-
-    # reshape the temporary output to restore its original size
-    out_tmp = out_4d.reshape((Ns, Ts, Nf, Tf, Hout, Wout))
-
-    # now sum out along the Tf to get the output
-    # but we have to sum on a diagonal through the Tf and Ts submatrix.
-    if Tf == 1:
-        # for Tf==1, no sum along Tf, the Ts-axis of the output is unchanged!
-        out_5d = out_tmp.reshape((Ns, Ts, Nf, Hout, Wout))
-    else:
-        # for some types of convolution, pad out_tmp with zeros
-        if border_mode[0] == "valid":
-            Tpad = 0
-        elif border_mode[0] == "full":
-            Tpad = Tf - 1
-        elif border_mode[0] == "half":
-            Tpad = Tf // 2
-        elif border_mode[0] == "same":
-            raise NotImplementedError()
-        else:
-            raise ValueError("invalid border mode", border_mode[0])
-
-        if Tpad == 0:
-            out_5d = diagonal_subtensor(out_tmp, 1, 3).sum(axis=3)
-        else:
-            # pad out_tmp with zeros before summing over the diagonal
-            out_tmp_padded = at.zeros(
-                dtype=out_tmp.dtype, shape=(Ns, Ts + 2 * Tpad, Nf, Tf, Hout, Wout)
-            )
-            out_tmp_padded = pytensor.tensor.subtensor.set_subtensor(
-                out_tmp_padded[:, Tpad : (Ts + Tpad), :, :, :, :], out_tmp
-            )
-            out_5d = diagonal_subtensor(out_tmp_padded, 1, 3).sum(axis=3)
-
-    return out_5d
-
-
-@node_rewriter([DiagonalSubtensor, IncDiagonalSubtensor])
-def local_inplace_DiagonalSubtensor(fgraph, node):
-    """Also work for IncDiagonalSubtensor."""
-    if (
-        isinstance(node.op, (DiagonalSubtensor, IncDiagonalSubtensor))
-        and not node.op.inplace
-    ):
-        new_op = node.op.__class__(inplace=True)
-        new_node = new_op(*node.inputs)
-        copy_stack_trace(node.outputs[0], new_node)
-        return [new_node]
-    return False
-
-
-pytensor.compile.optdb.register(
-    "local_inplace_DiagonalSubtensor",
-    WalkingGraphRewriter(
-        local_inplace_DiagonalSubtensor,
-        failure_callback=WalkingGraphRewriter.warn_inplace,
-    ),
-    "fast_run",
-    "inplace",
-    position=60,
-)
diff --git a/pytensor/tensor/nnet/ctc.py b/pytensor/tensor/nnet/ctc.py
deleted file mode 100644
index 04a71d97d6..0000000000
--- a/pytensor/tensor/nnet/ctc.py
+++ /dev/null
@@ -1,263 +0,0 @@
-import os
-import sys
-
-import pytensor.tensor as at
-from pytensor.configdefaults import config
-from pytensor.gradient import grad_undefined
-from pytensor.graph.basic import Apply
-from pytensor.graph.rewriting.basic import node_rewriter
-from pytensor.link.c.cmodule import GCC_compiler
-from pytensor.link.c.op import ExternalCOp, OpenMPOp
-from pytensor.tensor.blas import batched_dot
-from pytensor.tensor.extra_ops import cpu_contiguous
-from pytensor.tensor.rewriting.basic import register_canonicalize
-from pytensor.tensor.type import ftensor3, fvector
-
-
-def _ctc_find_lib():
-    """
-    Find the directory that contains libwarpctc.so
-    """
-    if config.ctc__root != "":
-        for lib_dir in ("build", "lib", "lib64"):
-            lib_path = os.path.join(config.ctc__root, lib_dir)
-            if os.path.isdir(lib_path) and os.path.exists(lib_path):
-                lib_found = os.path.exists(os.path.join(lib_path, "libwarpctc.so"))
-                if lib_found:
-                    return lib_path
-    return None
-
-
-def _ctc_check_compile(ctc_lib_path):
-    preamble = """
-#include <string.h>
-#include "ctc.h"
-"""
-
-    body = """
-ctcOptions options;
-memset(&options, 0, sizeof(ctcOptions));
-options.loc = CTC_CPU;
-options.num_threads = 1;
-"""
-
-    params = [f"-I{os.path.dirname(__file__)}"]
-    if ctc_lib_path is not None:
-        params.extend([f"-I{os.path.join(config.ctc__root, 'include')}"])
-        params.extend([f"-L{ctc_lib_path}"])
-    params.extend(["-l", "warpctc"])
-    compiler_res = GCC_compiler.try_flags(
-        params, preamble=preamble, body=body, try_run=False, output=True
-    )
-
-    avail, out, err = (
-        compiler_res if isinstance(compiler_res, tuple) else (compiler_res, None, None)
-    )
-    if not avail:
-        return (
-            False,
-            ("cannot compile with warp-ctc. " "We got this error:\n" + str(err)),
-        )
-    return True, None
-
-
-def ctc_present():
-    if ctc_present.avail is not None:
-        return ctc_present.avail
-    ctc_lib_path = _ctc_find_lib()
-    ctc_present.path = ctc_lib_path
-    ctc_present.avail, ctc_present.msg = _ctc_check_compile(ctc_present.path)
-    return ctc_present.avail
-
-
-ctc_present.avail = None
-ctc_present.msg = None
-ctc_present.path = None
-
-
-def ctc_available():
-    if os.name == "nt":
-        ctc_available.msg = ("Windows platforms are currently not supported ",)
-        "by underlying CTC library (warp-ctc)."
-        return False
-    elif not ctc_present():
-        ctc_available.msg = ctc_present.msg
-        return False
-
-    ctc_available.path = ctc_present.path
-    return True
-
-
-ctc_available.msg = None
-ctc_available.path = None
-
-
-class ConnectionistTemporalClassification(ExternalCOp, OpenMPOp):
-    """
-    CTC loss function wrapper.
-
-    Notes
-    -----
-    Using the wrapper requires that Baidu's warp-ctc library is installed.
-    If the warp-ctc library is not on your compiler's default library path,
-    you must set the configuration variable ``config.ctc__root`` appropriately.
-
-    Parameters
-    ----------
-    compute_grad
-        If set to True, enables the computation of gradients of the CTC loss function.
-    """
-
-    __props__ = ("compute_grad",)
-
-    _cop_num_inputs = 3
-    _cop_num_outputs = 2
-
-    func_file = os.path.join("c_code", "ctc_wrapper.c")
-    func_name = "APPLY_SPECIFIC(ctc_cost_cpu)"
-
-    def __init__(self, compute_grad=True, openmp=None):
-        if not ctc_available():
-            raise RuntimeError(
-                "Baidu CTC is not available and "
-                "ConnectionistTemporalClassification Op "
-                "can not be constructed."
-            )
-
-        super().__init__(self.func_file, self.func_name)
-        OpenMPOp.__init__(self, openmp=openmp)
-
-        self.compute_grad = compute_grad
-        # Return only the cost. Gradient will be returned by grad()
-        self.default_output = 0
-
-    def c_lib_dirs(self, **kwargs):
-        lib_dirs = []
-        if ctc_available.path is not None:
-            lib_dirs += [ctc_available.path]
-        return lib_dirs
-
-    def c_compile_args(self, **kwargs):
-        if ctc_available.path is not None:
-            if sys.platform != "darwin" and " " in ctc_available.path:
-                return ['-Wl,-rpath,"' + ctc_available.path + '"']
-            else:
-                return ["-Wl,-rpath," + ctc_available.path]
-        return []
-
-    def c_libraries(self, **kwargs):
-        return ["warpctc"]
-
-    def c_header_dirs(self, **kwargs):
-        header_dirs = []
-        if config.ctc__root != "":
-            # We assume here that the header is available at the include directory
-            # of the CTC root directory.
-            header_dirs += [os.path.join(config.ctc__root, "include")]
-        return header_dirs
-
-    def c_headers(self, **kwargs):
-        return ["ctc.h"] + super().c_headers(**kwargs)
-
-    def make_node(self, activations, labels, input_lengths):
-        t_activations = at.as_tensor_variable(activations)
-        # Ensure activations array is C-contiguous
-        t_activations = cpu_contiguous(t_activations)
-
-        t_labels = at.as_tensor_variable(labels)
-        t_input_lengths = at.as_tensor_variable(input_lengths)
-
-        if t_activations.type.dtype != "float32":
-            raise TypeError("activations must use the float32 type!")
-
-        if t_activations.ndim != 3:
-            raise ValueError("activations must have 3 dimensions.")
-
-        if t_labels.type.dtype != "int32":
-            raise TypeError("labels must use the int32 type!")
-
-        if t_labels.ndim != 2:
-            raise ValueError("labels must have 2 dimensions.")
-
-        if t_input_lengths.type.dtype != "int32":
-            raise TypeError("input_lengths must use the int32 type!")
-
-        if t_input_lengths.ndim != 1:
-            raise ValueError("input_lengths must have 1 dimension.")
-
-        costs = fvector(name="ctc_cost")
-        outputs = [costs]
-        if self.compute_grad:
-            gradients = ftensor3(name="ctc_grad")
-            outputs += [gradients]
-
-        return Apply(
-            self, inputs=[t_activations, t_labels, t_input_lengths], outputs=outputs
-        )
-
-    def L_op(self, inputs, outputs, output_grads):
-        assert self.compute_grad and len(outputs) == 2
-        gradients = outputs[1]
-        assert gradients is not None
-
-        grad_op = output_grads[0]
-        total_grad = batched_dot(grad_op, gradients.dimshuffle(1, 0, 2)).dimshuffle(
-            1, 0, 2
-        )
-        return [
-            total_grad,
-            grad_undefined(self, 1, inputs[1]),
-            grad_undefined(self, 2, inputs[2]),
-        ]
-
-
-def ctc(activations, labels, input_lengths):
-    """
-    Compute CTC loss function.
-
-    Notes
-    -----
-    Using the loss function requires that the Baidu's warp-ctc library be installed.
-    If the warp-ctc library is not on the compiler's default library path, the
-    configuration variable ``config.ctc__root`` must be properly set.
-
-    Parameters
-    ----------
-    activations
-        Three-dimensional tensor, which has a shape of (t, m, p), where
-        t is the time index, m is the minibatch index, and p is the index
-        over the probabilities of each symbol in the alphabet. The memory
-        layout is assumed to be in C-order, which consists in the slowest
-        to the fastest changing dimension, from left to right. In this case,
-        p is the fastest changing dimension.
-    labels
-        A 2-D tensor of all the labels for the minibatch. In each row, there
-        is a sequence of target labels. Negative values are assumed to be padding,
-        and thus are ignored. Blank symbol is assumed to have index 0 in the
-        alphabet.
-    input_lengths
-        A 1-D tensor with the number of time steps for each sequence in
-        the minibatch.
-
-    Returns
-    -------
-    1-D array
-        Cost of each example in the minibatch.
-    """
-    return ConnectionistTemporalClassification()(activations, labels, input_lengths)
-
-
-# Disable gradient computation if not needed
-@register_canonicalize("fast_compile")
-@node_rewriter([ConnectionistTemporalClassification])
-def local_ctc_no_grad(fgraph, node):
-    if isinstance(node.op, ConnectionistTemporalClassification):
-        if len(node.outputs) > 1:
-            if len(fgraph.clients[node.outputs[1]]) == 0:  # gradient is not used
-                return [
-                    ConnectionistTemporalClassification(compute_grad=False)(
-                        *node.inputs
-                    ),
-                    None,
-                ]
-    return False
diff --git a/pytensor/tensor/nnet/neighbours.py b/pytensor/tensor/nnet/neighbours.py
deleted file mode 100644
index 521e0ef99e..0000000000
--- a/pytensor/tensor/nnet/neighbours.py
+++ /dev/null
@@ -1,830 +0,0 @@
-"""
-TODO: implement Images2Neibs.infer_shape() methods
-
-"""
-import numpy as np
-
-import pytensor
-from pytensor.gradient import grad_not_implemented, grad_undefined
-from pytensor.graph.basic import Apply
-from pytensor.link.c.op import COp
-from pytensor.link.c.type import EnumList
-from pytensor.tensor.basic import arange, as_tensor_variable, concatenate, stack, zeros
-from pytensor.tensor.math import ceil_intdiv
-from pytensor.tensor.subtensor import inc_subtensor, set_subtensor
-from pytensor.tensor.type import matrix
-
-
-class Images2Neibs(COp):
-    """
-    Reshapes the input as a 2D tensor where each row is an pooling
-    example.
-
-    Parameters
-    ----------
-    mode : {'valid', 'ignore_borders', 'wrap_centered'}
-        - 'valid' :
-            Requires an input that is a multiple of the pooling factor
-            (in each direction).
-        - 'half' :
-            Equivalent to 'valid' if we pre-pad with zeros the input on
-            each side by (neib_shape[0]//2, neib_shape[1]//2)
-        - 'full' :
-            Equivalent to 'valid' if we pre-pad with zeros the input on
-            each side by (neib_shape[0] - 1, neib_shape[1] - 1)
-        - 'ignore_borders' :
-            Same as valid, but will ignore the borders if the shape(s)
-            of the input is not a multiple of the pooling factor(s).
-        - 'wrap_centered' :
-            ?? TODO comment
-
-    """
-
-    __props__ = ("mode",)
-    BORDER_MODE = EnumList(
-        ("MODE_VALID", "valid"),
-        ("MODE_HALF", "half"),
-        ("MODE_FULL", "full"),
-        ("MODE_WRAP_CENTERED", "wrap_centered"),
-        ("MODE_IGNORE_BORDERS", "ignore_borders"),
-    )
-    params_type = BORDER_MODE
-
-    def get_params(self, node):
-        return self.mode
-
-    def __init__(self, mode="valid"):
-        implemented_modes = self.BORDER_MODE.get_aliases()
-        if mode not in implemented_modes:
-            raise NotImplementedError(
-                f"Only modes {', '.join(implemented_modes)} have been implemented for {type(self).__name__}"
-            )
-        self.mode = mode
-
-    def __str__(self):
-        return self.__class__.__name__ + "{%s}" % self.mode
-
-    def __setstate__(self, d):
-        self.__dict__.update(d)
-        if not hasattr(self, "mode"):
-            self.mode = "valid"
-
-    def make_node(self, ten4, neib_shape, neib_step=None):
-        """
-        Parameters
-        ----------
-        ten4 : a list of lists of images
-            ten4 is of shape (list 1 dim, list 2 dim, row, col).
-        neib_shape
-            (r,c) where r is the height of the neighborhood in rows and c is
-            the width of the neighborhood in columns.
-        neib_step
-            (dr,dc) where dr is the number of rows to skip between patch and dc
-            is the number of columns. When None, this is the same as neib_shape
-            (patch are disjoint).
-
-        Returns
-        -------
-        matrix
-            A 2D matrix, written using the following pattern::
-
-                idx = 0
-                for i in range(list 1 dim)
-                    for j in range(list 2 dim)
-                        for k in <image column coordinates>
-                            for l in <image row coordinates>
-                                output[idx,:]
-                                     = flattened version of ten4[i,j,l:l+r,k:k+c]
-                                idx += 1
-
-            .. note:: The op isn't necessarily implemented internally with these
-                for loops, they're just the easiest way to describe the output
-                pattern.
-
-        """
-        ten4 = as_tensor_variable(ten4)
-        neib_shape = as_tensor_variable(neib_shape)
-        if neib_step is None:
-            neib_step = neib_shape
-        else:
-            neib_step = as_tensor_variable(neib_step)
-
-        assert ten4.ndim == 4
-        assert neib_shape.ndim == 1
-        assert neib_step.ndim == 1
-
-        return Apply(
-            self, [ten4, neib_shape, neib_step], [matrix(dtype=ten4.type.dtype)]
-        )
-
-    def grad(self, inp, grads):
-        x, neib_shape, neib_step = inp
-        (gz,) = grads
-
-        if self.mode in ("valid", "ignore_borders"):
-            if (
-                neib_shape is neib_step
-                or neib_shape == neib_step
-                or
-                # PyTensor Constant == do not compare the data
-                # the equals function do that.
-                (hasattr(neib_shape, "equals") and neib_shape.equals(neib_step))
-            ):
-                return [
-                    neibs2images(gz, neib_shape, x.shape, mode=self.mode),
-                    grad_undefined(self, 1, neib_shape),
-                    grad_undefined(self, 2, neib_step),
-                ]
-
-        if self.mode in ["valid"]:
-            # Iterate over neighborhood positions, summing contributions.
-            def pos2map(pidx, pgz, prior_result, neib_shape, neib_step):
-                """
-                Helper function that adds gradient contribution from a single
-                neighborhood position i,j.
-                pidx = Index of position within neighborhood.
-                pgz  = Gradient of shape (batch_size*num_channels*neibs)
-                prior_result  = Shape (batch_size, num_channnels, rows, cols)
-                neib_shape = Number of rows, cols in a neighborhood.
-                neib_step  = Step sizes from image2neibs.
-                """
-                nrows, ncols = neib_shape
-                rstep, cstep = neib_step
-                batch_size, num_channels, rows, cols = prior_result.shape
-                i = pidx // ncols
-                j = pidx - (i * ncols)
-                # This position does not touch some img pixels in valid mode.
-                result_indices = prior_result[
-                    :,
-                    :,
-                    i : (rows - nrows + i + 1) : rstep,
-                    j : (cols - ncols + j + 1) : cstep,
-                ]
-                newshape = (
-                    (batch_size, num_channels)
-                    + ((rows - nrows) // rstep + 1,)
-                    + ((cols - ncols) // cstep + 1,)
-                )
-                return inc_subtensor(result_indices, pgz.reshape(newshape))
-
-            indices = arange(neib_shape[0] * neib_shape[1])
-            pgzs = gz.dimshuffle((1, 0))
-            result, _ = pytensor.scan(
-                fn=pos2map,
-                sequences=[indices, pgzs],
-                outputs_info=zeros(x.shape),
-                non_sequences=[neib_shape, neib_step],
-            )
-            grad_input = result[-1]
-            return [
-                grad_input,
-                grad_undefined(self, 1, neib_shape),
-                grad_undefined(self, 2, neib_step),
-            ]
-
-        return [
-            grad_not_implemented(self, 0, x),
-            grad_undefined(self, 1, neib_shape),
-            grad_undefined(self, 2, neib_step),
-        ]
-
-    def c_code_cache_version(self):
-        return (10,)
-
-    def perform(self, node, inp, out_, params):
-        ten4, neib_shape, neib_step = inp
-        (z,) = out_
-        # XXX: GpuImages2Neibs should not run this perform in DebugMode
-        if not isinstance(self, Images2Neibs):
-            raise pytensor.graph.utils.MethodNotDefined()
-
-        def CEIL_INTDIV(a, b):
-            if a % b:
-                return (a // b) + 1
-            else:
-                return a // b
-
-        grid_c = -1  # number of patch in height
-        grid_d = -1  # number of patch in width
-        assert ten4.ndim == 4
-        assert neib_shape.ndim == 1
-        assert neib_shape.shape[0] == 2
-        assert neib_step.ndim == 1
-        assert neib_step.shape[0] == 2
-        c, d = neib_shape
-        step_x, step_y = neib_step
-        mode = self.mode
-        if step_x <= 0 or step_y <= 0:
-            raise ValueError(
-                "neib_step wrong step ; values <= 0. Got " + str(neib_step)
-            )
-        if c <= 0 or d <= 0:
-            raise ValueError("neib_shape values <=0. Got " + str(neib_shape))
-
-        if mode == "wrap_centered":
-            if (c % 2 != 1) or (d % 2 != 1):
-                raise TypeError(
-                    "Images2Neibs: in mode wrap_centered need patch with odd shapes"
-                )
-
-            if (ten4.shape[2] < c) or (ten4.shape[3] < d):
-                raise TypeError(
-                    "Images2Neibs: in wrap_centered mode, don't support"
-                    " image shapes smaller then the patch shapes:"
-                    f" neib_shape=({int(c)},{int(d)}), ten4[2:]=[{int(ten4.shape[2])},{int(ten4.shape[3])}]"
-                )
-            grid_c = CEIL_INTDIV(ten4.shape[2], step_x)
-            grid_d = CEIL_INTDIV(ten4.shape[3], step_y)
-        elif mode == "valid":
-            if (ten4.shape[2] < c) or (((ten4.shape[2] - c) % step_x) != 0):
-                raise TypeError(
-                    f"neib_shape[0]={int(c)}, neib_step[0]={int(step_x)} and"
-                    f" ten4.shape[2]={int(ten4.shape[2])} not consistent"
-                )
-            if (ten4.shape[3] < d) or (((ten4.shape[3] - d) % step_y) != 0):
-                raise TypeError(
-                    f"neib_shape[1]={int(d)}, neib_step[1]={int(step_y)} and"
-                    f" ten4.shape[3]={int(ten4.shape[3])} not consistent"
-                )
-            # number of patch in height
-            grid_c = 1 + ((ten4.shape[2] - c) // step_x)
-            # number of patch in width
-            grid_d = 1 + ((ten4.shape[3] - d) // step_y)
-        elif mode == "ignore_borders":
-            # number of patch in height
-            grid_c = 1 + ((ten4.shape[2] - c) // step_x)
-            # number of patch in width
-            grid_d = 1 + ((ten4.shape[3] - d) // step_y)
-        elif mode == "half":
-            # This is equivalent to 'valid' with padding (c // 2, d // 2) on both sides
-            # Thus the expanded image will have size (h + 2 * (c // 2), w + 2 * (d // 2))
-            # Plugging these in the equation for 'valid' we get
-            # h + 2 * (c // 2) - c  = h - (c % 2)
-            # w + 2 * (d // 2) - c  = w - (d % 2)
-            if (ten4.shape[2] < c) or (((ten4.shape[2] - (c % 2)) % step_x) != 0):
-                raise TypeError(
-                    f"neib_shape[0]={int(c)}, neib_step[0]={int(step_x)} and"
-                    f" ten4.shape[2]={int(ten4.shape[2])} not consistent"
-                )
-            if (ten4.shape[3] < d) or (((ten4.shape[3] - (d % 2)) % step_y) != 0):
-                raise TypeError(
-                    f"neib_shape[0]={int(d)}, neib_step[0]={int(step_y)} and"
-                    f" ten4.shape[3]={int(ten4.shape[3])} not consistent"
-                )
-            # number of patch in height
-            grid_c = 1 + ((ten4.shape[2] - (c % 2)) // step_x)
-            # number of patch in width
-            grid_d = 1 + ((ten4.shape[3] - (d % 2)) // step_y)
-        elif mode == "full":
-            # This is equivalent to 'valid' with padding (c - 1, d - 1) on both sides
-            # Thus the expanded image will have size (h + 2 * (c - 1), w + 2 * (d - 1))
-            # Plugging these in the equation for 'valid' we get
-            # h + 2 * (c - 1) - c  = h + c - 2
-            # w + 2 * (d - 1) - c  = w + d - 2
-            if (ten4.shape[2] < c) or (((ten4.shape[2] + c - 2) % step_x) != 0):
-                raise TypeError(
-                    f"neib_shape[0]={int(c)}, neib_step[0]={int(step_x)} and"
-                    f" ten4.shape[2]={int(ten4.shape[2])} not consistent"
-                )
-            if (ten4.shape[3] < d) or (((ten4.shape[3] + d - 2) % step_y) != 0):
-                raise TypeError(
-                    f"neib_shape[0]={int(d)}, neib_step[0]={int(step_y)} and"
-                    f" ten4.shape[3]={int(ten4.shape[3])} not consistent"
-                )
-            # number of patch in height
-            grid_c = 1 + ((ten4.shape[2] + c - 2) // step_x)
-            # number of patch in width
-            grid_d = 1 + ((ten4.shape[3] + d - 2) // step_y)
-        else:
-            raise TypeError(f"Images2Neibs: unknown mode '{mode}'")
-        z_dim0 = grid_c * grid_d * ten4.shape[1] * ten4.shape[0]
-        z_dim1 = c * d
-        z[0] = np.empty((z_dim0, z_dim1), dtype=node.outputs[0].dtype)
-
-        nb_batch = ten4.shape[0]
-        nb_stack = ten4.shape[1]
-        height = ten4.shape[2]
-        width = ten4.shape[3]
-
-        wrap_centered_half_idx_shift_x = c // 2
-        wrap_centered_half_idx_shift_y = d // 2
-        for n in range(nb_batch):
-            for s in range(nb_stack):
-                # loop over the number of patch in height
-                for a in range(grid_c):
-                    # loop over the number of patch in width
-                    for b in range(grid_d):
-                        z_row = b + grid_d * (a + grid_c * (s + nb_stack * n))
-                        for i in range(c):
-                            ten4_2 = i + a * step_x
-                            if mode == "wrap_centered":
-                                ten4_2 -= wrap_centered_half_idx_shift_x
-                                if ten4_2 < 0:
-                                    ten4_2 += height
-                                elif ten4_2 >= height:
-                                    ten4_2 -= height
-                            elif mode == "half":
-                                ten4_2 -= wrap_centered_half_idx_shift_x
-                            elif mode == "full":
-                                ten4_2 -= c - 1
-                            if ten4_2 < 0 or ten4_2 >= height:
-                                z[0][z_row, d * i : d * i + d] = 0
-                            else:
-                                for j in range(d):
-                                    ten4_3 = j + b * step_y
-                                    if mode == "wrap_centered":
-                                        ten4_3 -= wrap_centered_half_idx_shift_y
-                                        if ten4_3 < 0:
-                                            ten4_3 += width
-                                        elif ten4_3 >= width:
-                                            ten4_3 -= width
-                                    elif mode == "half":
-                                        ten4_3 -= wrap_centered_half_idx_shift_y
-                                    elif mode == "full":
-                                        ten4_3 -= d - 1
-                                    z_col = j + d * i
-                                    if ten4_3 < 0 or ten4_3 >= width:
-                                        z[0][z_row, z_col] = 0
-                                    else:
-                                        z[0][z_row, z_col] = ten4[n, s, ten4_2, ten4_3]
-
-    def infer_shape(self, fgraph, node, input_shape):
-        in_shape = input_shape[0]
-        c, d = node.inputs[1]
-        step_x, step_y = node.inputs[2]
-        if self.mode == "wrap_centered":
-            grid_c = ceil_intdiv(in_shape[2], step_x)
-            grid_d = ceil_intdiv(in_shape[3], step_y)
-        elif self.mode == "valid":
-            grid_c = 1 + ((in_shape[2] - c) // step_x)
-            grid_d = 1 + ((in_shape[3] - d) // step_y)
-        elif self.mode == "ignore_borders":
-            grid_c = 1 + ((in_shape[2] - c) // step_x)
-            grid_d = 1 + ((in_shape[3] - d) // step_y)
-        elif self.mode == "half":
-            grid_c = 1 + ((in_shape[2] - (c % 2)) // step_x)
-            grid_d = 1 + ((in_shape[3] - (d % 2)) // step_y)
-        elif self.mode == "full":
-            grid_c = 1 + ((in_shape[2] + c - 2) // step_x)
-            grid_d = 1 + ((in_shape[3] + d - 2) // step_y)
-        else:
-            raise TypeError(f"Images2Neibs: unknown mode '{self.mode}'")
-        z_dim0 = grid_c * grid_d * in_shape[1] * in_shape[0]
-        z_dim1 = c * d
-        return [(z_dim0, z_dim1)]
-
-    def c_code(self, node, name, inp, out, sub):
-        return """
-#ifndef CEIL_INTDIV
-#define CEIL_INTDIV(a, b) ((a/b) + ((a %% b) ? 1: 0))
-#endif
-
-        int grid_c = -1; //number of patch in height
-        int grid_d = -1; //number of patch in width
-        {
-        if (PyArray_NDIM(%(ten4)s) != 4)
-        {
-            PyErr_Format(PyExc_TypeError, "ten4 wrong rank");
-            %(fail)s;
-        }
-        if (PyArray_NDIM(%(neib_shape)s) != 1)
-        {
-            PyErr_Format(PyExc_TypeError, "neib_shape wrong rank");
-            %(fail)s;
-        }
-        if ( (PyArray_DIMS(%(neib_shape)s))[0] != 2)
-        {
-            PyErr_Format(PyExc_TypeError, "neib_shape wrong shape ; has to"
-                                          " contain 2 elements");
-            %(fail)s;
-        }
-        if (PyArray_NDIM(%(neib_step)s) != 1)
-        {
-            PyErr_Format(PyExc_TypeError, "neib_step wrong rank");
-            %(fail)s;
-        }
-        if ( (PyArray_DIMS(%(neib_step)s))[0] != 2)
-        {
-            PyErr_Format(PyExc_TypeError,
-                         "neib_step wrong step ; has to contain 2 elements");
-            %(fail)s;
-        }
-
-        // (c,d) = neib_shape
-        const npy_intp c = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 0);
-        const npy_intp d = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 1);
-        // (step_x,step_y) = neib_step
-        const dtype_%(neib_step)s step_x = *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 0);
-        const dtype_%(neib_step)s step_y = *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 1);
-
-        if (step_x <=0 || step_y <=0)
-        {
-            PyErr_Format(PyExc_ValueError,
-                         "neib_step wrong step ; values <= 0. Got %%lld %%lld.",
-                         (long long) step_x, (long long) step_y);
-            %(fail)s;
-        }
-
-        if (c <=0 || d <=0)
-        {
-            PyErr_Format(PyExc_ValueError,
-                         "neib_shape values <= 0. Got %%lld %%lld.",
-                         (long long)c, (long long)d);
-            %(fail)s;
-        }
-
-        if (%(mode)s == MODE_WRAP_CENTERED) {
-            if (c%%2!=1 || d%%2!=1){
-                PyErr_Format(PyExc_TypeError,
-                             "Images2Neibs: in mode wrap_centered"
-                             " need patch with odd shapes");
-                %(fail)s;
-            }
-            if ( (PyArray_DIMS(%(ten4)s))[2] < c ||
-                 (PyArray_DIMS(%(ten4)s))[3] < d)
-            {
-                PyErr_Format(PyExc_TypeError,
-                    "Images2Neibs: in wrap_centered mode, don't support image"
-                    " shapes smaller then the patch shapes:"
-                    " neib_shape=(%%ld,%%ld), ten4[2:]=[%%ld,%%ld]",
-                    (long int)c, (long int)d,
-                    (long int)(PyArray_DIMS(%(ten4)s)[2]),
-                    (long int)(PyArray_DIMS(%(ten4)s)[3]));
-                %(fail)s;
-            }
-            grid_c = CEIL_INTDIV(((PyArray_DIMS(%(ten4)s))[2]),step_x);
-            grid_d = CEIL_INTDIV(((PyArray_DIMS(%(ten4)s))[3]),step_y);
-
-        } else if (%(mode)s == MODE_VALID) {
-            if ( ((PyArray_DIMS(%(ten4)s))[2] < c) ||
-                 ( (((PyArray_DIMS(%(ten4)s))[2]-c) %% step_x)!=0))
-            {
-                PyErr_Format(PyExc_TypeError,
-                             "neib_shape[0]=%%ld, neib_step[0]=%%ld and"
-                             " ten4.shape[2]=%%ld not consistent",
-                             (long int)c, (long int)step_x,
-                             (long int)(PyArray_DIMS(%(ten4)s)[2]));
-                %(fail)s;
-            }
-            if ( ((PyArray_DIMS(%(ten4)s))[3] < d) ||
-                 ( (((PyArray_DIMS(%(ten4)s))[3]-d) %% step_y)!=0))
-            {
-                PyErr_Format(PyExc_TypeError,
-                             "neib_shape[1]=%%ld, neib_step[1]=%%ld and"
-                             " ten4.shape[3]=%%ld not consistent",
-                             (long int)d, (long int)step_y,
-                             (long int)(PyArray_DIMS(%(ten4)s)[3]));
-                %(fail)s;
-            }
-            //number of patch in height
-            grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-c)/step_x);
-            //number of patch in width
-            grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-d)/step_y);
-        } else if (%(mode)s == MODE_IGNORE_BORDERS) {
-            //number of patch in height
-            grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-c)/step_x);
-            //number of patch in width
-            grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-d)/step_y);
-        } else if (%(mode)s == MODE_HALF) {
-            if ( ((PyArray_DIMS(%(ten4)s))[2] < c) ||
-                 ( (((PyArray_DIMS(%(ten4)s))[2]-(c%%2)) %% step_x)!=0))
-            {
-                PyErr_Format(PyExc_TypeError,
-                             "neib_shape[0]=%%ld, neib_step[0]=%%ld and"
-                             " ten4.shape[2]=%%ld not consistent",
-                             (long int)c, (long int)step_x,
-                             (long int)(PyArray_DIMS(%(ten4)s)[2]));
-                %(fail)s;
-            }
-            if ( ((PyArray_DIMS(%(ten4)s))[3] < d) ||
-                 ( (((PyArray_DIMS(%(ten4)s))[3]-(d%%2)) %% step_y)!=0))
-            {
-                PyErr_Format(PyExc_TypeError,
-                             "neib_shape[1]=%%ld, neib_step[1]=%%ld and"
-                             " ten4.shape[3]=%%ld not consistent",
-                             (long int)d, (long int)step_y,
-                             (long int)(PyArray_DIMS(%(ten4)s)[3]));
-                %(fail)s;
-            }
-            //number of patch in height
-            grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-(c%%2))/step_x);
-            //number of patch in width
-            grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-(d%%2))/step_y);
-        } else if (%(mode)s == MODE_FULL) {
-            if ( ((PyArray_DIMS(%(ten4)s))[2] < c) ||
-                 ( (((PyArray_DIMS(%(ten4)s))[2]+c-2) %% step_x)!=0))
-            {
-                PyErr_Format(PyExc_TypeError,
-                             "neib_shape[0]=%%ld, neib_step[0]=%%ld and"
-                             " ten4.shape[2]=%%ld not consistent",
-                             (long int)c, (long int)step_x,
-                             (long int)(PyArray_DIMS(%(ten4)s)[2]));
-                %(fail)s;
-            }
-            if ( ((PyArray_DIMS(%(ten4)s))[3] < d) ||
-                 ( (((PyArray_DIMS(%(ten4)s))[3]+d-2) %% step_y)!=0))
-            {
-                PyErr_Format(PyExc_TypeError,
-                             "neib_shape[1]=%%ld, neib_step[1]=%%ld and"
-                             " ten4.shape[3]=%%ld not consistent",
-                             (long int)d, (long int)step_y,
-                             (long int)(PyArray_DIMS(%(ten4)s)[3]));
-                %(fail)s;
-            }
-            //number of patch in height
-            grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]+c-2)/step_x);
-            //number of patch in width
-            grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]+d-2)/step_y);
-        } else {
-            PyErr_Format(PyExc_TypeError,
-                         "Images2Neibs: unknown mode %%d", %(mode)s);
-            %(fail)s;
-        }
-
-        // new dimensions for z
-        const npy_intp z_dim1 = c * d;
-        const npy_intp z_dim0 =  grid_c
-                            * grid_d
-                            * (PyArray_DIMS(%(ten4)s))[1]
-                            * (PyArray_DIMS(%(ten4)s))[0];
-
-        if ((NULL == %(z)s)
-            || ((PyArray_DIMS(%(z)s))[0] != z_dim0 )
-            || ((PyArray_DIMS(%(z)s))[1] != z_dim1 )
-        )
-        {
-            Py_XDECREF(%(z)s);
-            npy_intp dims[2];
-            dims[0] = z_dim0;
-            dims[1] = z_dim1;
-
-            %(z)s = (PyArrayObject*) PyArray_EMPTY(2,
-                dims,
-                PyArray_TYPE((PyArrayObject*) py_%(ten4)s),
-                0);
-
-            if (!%(z)s)
-            {
-                PyErr_SetString(PyExc_MemoryError, "failed to alloc z output");
-                %(fail)s;
-            }
-        }
-        }
-
-        { // NESTED SCOPE
-
-        const int nb_batch = (PyArray_DIMS(%(ten4)s))[0];
-        const int nb_stack = (PyArray_DIMS(%(ten4)s))[1];
-        const int height = (PyArray_DIMS(%(ten4)s))[2];
-        const int width = (PyArray_DIMS(%(ten4)s))[3];
-
-        // (c,d) = neib_shape
-        const npy_intp c = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 0);
-        const npy_intp d = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 1);
-        // (step_x,step_y) = neib_step
-        const npy_intp step_x = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 0);
-        const npy_intp step_y = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 1);
-
-        const int wrap_centered_half_idx_shift_x = c/2;
-        const int wrap_centered_half_idx_shift_y = d/2;
-        // Oh this is messed up...
-        for (int n = 0; n < nb_batch; n++)              // loop over batches
-            for (int s = 0; s < nb_stack; s++)          // loop over stacks
-                for (int a = 0; a < grid_c; a++)        // loop over the number of patch in height
-                    for (int b = 0; b < grid_d; b++)    // loop over the number of patch in width
-                    {
-                        int z_row = b + grid_d*(a + grid_c*(s + nb_stack*n));
-                        for (int i = 0; i < c; i++)     // loop over c
-                        {
-                            int ten4_2 = i + a * step_x;
-                            if (%(mode)s == MODE_WRAP_CENTERED) {
-                                ten4_2 -= wrap_centered_half_idx_shift_x;
-                                if ( ten4_2 < 0 ) ten4_2 += height;
-                                else if (ten4_2 >= height) ten4_2 -= height;
-                            } else if (%(mode)s == MODE_HALF) {
-                                ten4_2 -= wrap_centered_half_idx_shift_x;
-                            } else if (%(mode)s == MODE_FULL) {
-                                ten4_2 -= c - 1;
-                            }
-                            if (ten4_2 < 0 | ten4_2 >= height) {
-                                dtype_%(z)s* curr_z = (dtype_%(z)s*) PyArray_GETPTR2(%(z)s, z_row, d * i);
-                                memset(curr_z, 0, d*sizeof(*curr_z));
-                            } else {
-                                for (int j = 0; j < d; j++)  // loop over d
-                                {
-                                    int ten4_3 = j + b * step_y;
-                                    if (%(mode)s == MODE_WRAP_CENTERED) {
-                                        ten4_3 -= wrap_centered_half_idx_shift_y;
-                                        if ( ten4_3 < 0 ) ten4_3 += width;
-                                        else if (ten4_3 >= width) ten4_3 -= width;
-                                    } else if (%(mode)s == MODE_HALF) {
-                                        ten4_3 -= wrap_centered_half_idx_shift_y;
-                                    } else if (%(mode)s == MODE_FULL) {
-                                        ten4_3 -= d - 1;
-                                    }
-                                    int z_col = j + d * i;
-                                    dtype_%(z)s* curr_z = (dtype_%(z)s*) PyArray_GETPTR2(%(z)s, z_row, z_col);
-                                    if (ten4_3 < 0 | ten4_3 >= width) {
-                                        *curr_z = 0;
-                                    } else {
-                                        *curr_z = *( (dtype_%(ten4)s*) PyArray_GETPTR4(%(ten4)s, n, s, ten4_2, ten4_3));
-                                    }
-                                }
-                            }
-                        }
-                    }
-        } // END NESTED SCOPE
-        """ % dict(
-            ten4=inp[0],
-            neib_shape=inp[1],
-            neib_step=inp[2],
-            z=out[0],
-            fail=sub["fail"],
-            mode=sub["params"],
-        )
-
-
-def images2neibs(ten4, neib_shape, neib_step=None, mode="valid"):
-    r"""
-    Function :func:`images2neibs <pytensor.tensor.nnet.neighbours.images2neibs>`
-    allows to apply a sliding window operation to a tensor containing
-    images or other two-dimensional objects.
-    The sliding window operation loops over points in input data and stores
-    a rectangular neighbourhood of each point.
-    It is possible to assign a step of selecting patches (parameter `neib_step`).
-
-    Parameters
-    ----------
-    ten4 : A 4d tensor-like
-        A 4-dimensional tensor which represents a list of lists of images.
-        It should have shape (list 1 dim, list 2 dim, row, col). The first
-        two dimensions can be useful to store different channels and batches.
-    neib_shape : A 1d tensor-like of 2 values
-        A tuple containing two values: height and width of the neighbourhood.
-        It should have shape (r,c) where r is the height of the neighborhood
-        in rows and c is the width of the neighborhood in columns.
-    neib_step : A 1d tensor-like of 2 values
-        (dr,dc) where dr is the number of rows to skip between patch and dc is
-        the number of columns. The parameter should be a tuple of two elements:
-        number of rows and number of columns to skip each iteration.
-        Basically, when the step is 1, the neighbourhood of every first element
-        is taken and every possible rectangular subset is returned.
-        By default it is equal to `neib_shape` in other words, the patches are
-        disjoint. When the step is greater than `neib_shape`, some elements are
-        omitted. When None, this is the same as neib_shape (patch are disjoint).
-    mode : {'valid', 'ignore_borders', 'wrap_centered', 'half'}
-        ``valid``
-            Requires an input that is a multiple of the
-            pooling factor (in each direction).
-        ``half``
-            Equivalent to 'valid' if we pre-pad with zeros the input on
-            each side by (neib_shape[0]//2, neib_shape[1]//2)
-        ``full``
-            Equivalent to 'valid' if we pre-pad with zeros the input on
-            each side by (neib_shape[0] - 1, neib_shape[1] - 1)
-        ``ignore_borders``
-            Same as valid, but will ignore the borders if the shape(s) of
-            the input is not a multiple of the pooling factor(s).
-        ``wrap_centered``
-            ?? TODO comment
-
-    Returns
-    -------
-    object
-        Reshapes the input as a 2D tensor where each row is an
-        pooling example. Pseudo-code of the output:
-
-          .. code-block:: python
-
-             idx = 0
-             for i in range(list 1 dim):
-                 for j in range(list 2 dim):
-                     for k in <image column coordinates>:
-                         for l in <image row coordinates>:
-                             output[idx,:]
-                                  = flattened version of ten4[i,j,l:l+r,k:k+c]
-                             idx += 1
-
-          .. note:: The operation isn't necessarily implemented internally with
-             these for loops, they're just the easiest way to describe the
-             output pattern.
-
-    Notes
-    -----
-    .. note::
-        Currently the step size should be chosen in the way that the
-        corresponding dimension :math:`i` (width or height) is equal
-        to :math:`n * step\_size_i + neib\_shape_i` for some :math:`n`.
-
-    Examples
-    --------
-
-    .. code-block:: python
-
-        # Defining variables
-        images = pytensor.tensor.type.tensor4('images')
-        neibs = images2neibs(images, neib_shape=(5, 5))
-
-        # Constructing pytensor function
-        window_function = pytensor.function([images], neibs)
-
-        # Input tensor (one image 10x10)
-        im_val = np.arange(100.).reshape((1, 1, 10, 10))
-
-        # Function application
-        neibs_val = window_function(im_val)
-
-    .. note:: The underlying code will construct a 2D tensor of disjoint
-       patches 5x5. The output has shape 4x25.
-
-    """
-    return Images2Neibs(mode)(ten4, neib_shape, neib_step)
-
-
-def neibs2images(neibs, neib_shape, original_shape, mode="valid"):
-    """
-    Function :func:`neibs2images <pytensor.sandbox.neighbours.neibs2images>`
-    performs the inverse operation of
-    :func:`images2neibs <pytensor.sandbox.neighbours.neibs2images>`. It inputs
-    the output of :func:`images2neibs <pytensor.sandbox.neighbours.neibs2images>`
-    and reconstructs its input.
-
-    Parameters
-    ----------
-    neibs : 2d tensor
-        Like the one obtained by
-        :func:`images2neibs <pytensor.sandbox.neighbours.neibs2images>`.
-    neib_shape
-        `neib_shape` that was used in
-        :func:`images2neibs <pytensor.sandbox.neighbours.neibs2images>`.
-    original_shape
-        Original shape of the 4d tensor given to
-        :func:`images2neibs <pytensor.sandbox.neighbours.neibs2images>`
-
-    Returns
-    -------
-    object
-        Reconstructs the input of
-        :func:`images2neibs <pytensor.sandbox.neighbours.neibs2images>`,
-        a 4d tensor of shape `original_shape`.
-
-    Notes
-    -----
-    Currently, the function doesn't support tensors created with
-    `neib_step` different from default value. This means that it may be
-    impossible to compute the gradient of a variable gained by
-    :func:`images2neibs <pytensor.sandbox.neighbours.neibs2images>` w.r.t.
-    its inputs in this case, because it uses
-    :func:`images2neibs <pytensor.sandbox.neighbours.neibs2images>` for
-    gradient computation.
-
-    Examples
-    --------
-    Example, which uses a tensor gained in example for
-    :func:`images2neibs <pytensor.sandbox.neighbours.neibs2images>`:
-
-    .. code-block:: python
-
-        im_new = neibs2images(neibs, (5, 5), im_val.shape)
-        # PyTensor function definition
-        inv_window = pytensor.function([neibs], im_new)
-        # Function application
-        im_new_val = inv_window(neibs_val)
-
-    .. note:: The code will output the initial image array.
-
-    """
-    neibs = as_tensor_variable(neibs)
-    neib_shape = as_tensor_variable(neib_shape)
-    original_shape = as_tensor_variable(original_shape)
-
-    new_neib_shape = stack([original_shape[-1] // neib_shape[1], neib_shape[1]])
-    output_2d = images2neibs(
-        neibs.dimshuffle("x", "x", 0, 1), new_neib_shape, mode=mode
-    )
-
-    if mode == "ignore_borders":
-        # We use set_subtensor to accept original_shape we can't infer
-        # the shape and still raise error when it don't have the right
-        # shape.
-        valid_shape = original_shape
-        valid_shape = set_subtensor(
-            valid_shape[2], (valid_shape[2] // neib_shape[0]) * neib_shape[0]
-        )
-        valid_shape = set_subtensor(
-            valid_shape[3], (valid_shape[3] // neib_shape[1]) * neib_shape[1]
-        )
-        output_4d = output_2d.reshape(valid_shape, ndim=4)
-        # padding the borders with zeros
-        for d in (2, 3):
-            pad_shape = list(output_4d.shape)
-            pad_shape[d] = original_shape[d] - valid_shape[d]
-            output_4d = concatenate([output_4d, zeros(pad_shape)], axis=d)
-    elif mode == "valid":
-        # TODO: we do not implement all mode with this code.
-        # Add a check for the good cases.
-        output_4d = output_2d.reshape(original_shape, ndim=4)
-    else:
-        raise NotImplementedError(f"neibs2images do not support mode={mode}")
-
-    return output_4d
diff --git a/pytensor/tensor/nnet/opt.py b/pytensor/tensor/nnet/opt.py
deleted file mode 100644
index 33e35cb96b..0000000000
--- a/pytensor/tensor/nnet/opt.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import warnings
-
-
-warnings.warn(
-    "The module `pytensor.tensor.nnet.opt` is deprecated; use `pytensor.tensor.nnet.rewriting` instead.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-from pytensor.tensor.nnet.rewriting import *  # noqa: F401 E402 F403
diff --git a/pytensor/tensor/nnet/rewriting.py b/pytensor/tensor/nnet/rewriting.py
deleted file mode 100644
index 4b4847782e..0000000000
--- a/pytensor/tensor/nnet/rewriting.py
+++ /dev/null
@@ -1,605 +0,0 @@
-"""
-Optimizations addressing the ops in nnet root directory
-"""
-
-import pytensor
-from pytensor import compile
-from pytensor.compile import optdb
-from pytensor.configdefaults import config
-from pytensor.graph.rewriting.basic import (
-    MetaNodeRewriterSkip,
-    WalkingGraphRewriter,
-    copy_stack_trace,
-    in2out,
-    node_rewriter,
-)
-from pytensor.tensor.nnet.abstract_conv import (
-    AbstractConv2d,
-    AbstractConv2d_gradInputs,
-    AbstractConv2d_gradWeights,
-    AbstractConv3d,
-    AbstractConv3d_gradInputs,
-    AbstractConv3d_gradWeights,
-    get_conv_output_shape,
-)
-from pytensor.tensor.nnet.blocksparse import (
-    SparseBlockGemv,
-    SparseBlockOuter,
-    sparse_block_gemv_inplace,
-    sparse_block_outer_inplace,
-)
-
-# Cpu implementation
-from pytensor.tensor.nnet.conv import ConvOp, conv2d
-from pytensor.tensor.nnet.corr import CorrMM, CorrMM_gradInputs, CorrMM_gradWeights
-from pytensor.tensor.nnet.corr3d import (
-    Corr3dMM,
-    Corr3dMMGradInputs,
-    Corr3dMMGradWeights,
-)
-from pytensor.tensor.rewriting.basic import register_specialize_device
-from pytensor.tensor.type import TensorType
-
-
-@node_rewriter([SparseBlockGemv], inplace=True)
-def local_inplace_sparse_block_gemv(fgraph, node):
-    """
-    SparseBlockGemv(inplace=False) -> SparseBlockGemv(inplace=True)
-    """
-    if isinstance(node.op, SparseBlockGemv) and not node.op.inplace:
-        new_node = sparse_block_gemv_inplace(*node.inputs)
-        copy_stack_trace(node.outputs[0], new_node)
-        return [new_node]
-    return False
-
-
-compile.optdb.register(
-    "local_inplace_sparse_block_gemv",
-    WalkingGraphRewriter(
-        local_inplace_sparse_block_gemv,
-        failure_callback=WalkingGraphRewriter.warn_inplace,
-    ),
-    "fast_run",
-    "inplace",
-    position=60,
-)
-
-
-@node_rewriter([SparseBlockOuter], inplace=True)
-def local_inplace_sparse_block_outer(fgraph, node):
-    """
-    SparseBlockOuter(inplace=False) -> SparseBlockOuter(inplace=True)
-    """
-    if isinstance(node.op, SparseBlockOuter) and not node.op.inplace:
-        new_node = sparse_block_outer_inplace(*node.inputs)
-        copy_stack_trace(node.outputs[0], new_node)
-        return [new_node]
-    return False
-
-
-compile.optdb.register(
-    "local_inplace_sparse_block_outer",
-    WalkingGraphRewriter(
-        local_inplace_sparse_block_outer,
-        failure_callback=WalkingGraphRewriter.warn_inplace,
-    ),
-    "fast_run",
-    "inplace",
-    position=60,
-)
-
-
-# Conv opts
-@node_rewriter([AbstractConv2d])
-def local_abstractconv_gemm(fgraph, node):
-    # If config.blas__ldflags is empty, PyTensor will use
-    # a NumPy C implementation of [sd]gemm_.
-    if config.cxx == "" or node.inputs[0].dtype == "float16":
-        return
-    if not isinstance(node.op, AbstractConv2d):
-        return None
-    img, kern = node.inputs
-    if not isinstance(img.type, TensorType) or not isinstance(kern.type, TensorType):
-        return None
-
-    # need to flip the kernel if necessary
-    if node.op.filter_flip:
-        flip = (slice(None),) * (kern.ndim - 2) + (slice(None, None, -1),) * 2
-        kern = kern[flip]
-    rval = CorrMM(
-        border_mode=node.op.border_mode,
-        subsample=node.op.subsample,
-        filter_dilation=node.op.filter_dilation,
-        num_groups=node.op.num_groups,
-        unshared=node.op.unshared,
-    )(img, kern)
-    copy_stack_trace(node.outputs[0], rval)
-
-    return [rval]
-
-
-@node_rewriter([AbstractConv3d])
-def local_abstractconv3d_gemm(fgraph, node):
-    # If config.blas__ldflags is empty, PyTensor will use
-    # a NumPy C implementation of [sd]gemm_.
-    if config.cxx == "" or node.inputs[0].dtype == "float16":
-        return
-    if not isinstance(node.op, AbstractConv3d):
-        return None
-    img, kern = node.inputs
-    if not isinstance(img.type, TensorType) or not isinstance(kern.type, TensorType):
-        return None
-
-    # need to flip the kernel if necessary
-    if node.op.filter_flip:
-        kern = kern[:, :, ::-1, ::-1, ::-1]
-    rval = Corr3dMM(
-        border_mode=node.op.border_mode,
-        subsample=node.op.subsample,
-        filter_dilation=node.op.filter_dilation,
-        num_groups=node.op.num_groups,
-    )(img, kern)
-    copy_stack_trace(node.outputs[0], rval)
-
-    return [rval]
-
-
-@node_rewriter([AbstractConv2d_gradWeights])
-def local_abstractconv_gradweight_gemm(fgraph, node):
-    # If config.blas__ldflags is empty, PyTensor will use
-    # a NumPy C implementation of [sd]gemm_.
-    if config.cxx == "" or node.inputs[0].dtype == "float16":
-        return
-    if not isinstance(node.op, AbstractConv2d_gradWeights):
-        return None
-    img, topgrad, shape = node.inputs
-    if not isinstance(img.type, TensorType) or not isinstance(topgrad.type, TensorType):
-        return None
-
-    rval = CorrMM_gradWeights(
-        border_mode=node.op.border_mode,
-        subsample=node.op.subsample,
-        filter_dilation=node.op.filter_dilation,
-        num_groups=node.op.num_groups,
-        unshared=node.op.unshared,
-    )(img, topgrad, shape)
-    copy_stack_trace(node.outputs[0], rval)
-
-    # need to flip the kernel if necessary
-    if node.op.filter_flip:
-        flip = (slice(None),) * (rval.ndim - 2) + (slice(None, None, -1),) * 2
-        rval = rval[flip]
-        copy_stack_trace(node.outputs[0], rval)
-
-    return [rval]
-
-
-@node_rewriter([AbstractConv3d_gradWeights])
-def local_abstractconv3d_gradweight_gemm(fgraph, node):
-    # If config.blas__ldflags is empty, PyTensor will use
-    # a NumPy C implementation of [sd]gemm_.
-    if config.cxx == "" or node.inputs[0].dtype == "float16":
-        return
-    if not isinstance(node.op, AbstractConv3d_gradWeights):
-        return None
-    img, topgrad, shape = node.inputs
-    if not isinstance(img.type, TensorType) or not isinstance(topgrad.type, TensorType):
-        return None
-
-    rval = Corr3dMMGradWeights(
-        border_mode=node.op.border_mode,
-        subsample=node.op.subsample,
-        filter_dilation=node.op.filter_dilation,
-        num_groups=node.op.num_groups,
-    )(img, topgrad, shape)
-    copy_stack_trace(node.outputs[0], rval)
-
-    # need to flip the kernel if necessary
-    if node.op.filter_flip:
-        rval = rval[:, :, ::-1, ::-1, ::-1]
-        copy_stack_trace(node.outputs[0], rval)
-
-    return [rval]
-
-
-@node_rewriter([AbstractConv2d_gradInputs])
-def local_abstractconv_gradinputs_gemm(fgraph, node):
-    # If config.blas__ldflags is empty, PyTensor will use
-    # a NumPy C implementation of [sd]gemm_.
-    if config.cxx == "" or node.inputs[0].dtype == "float16":
-        return
-    if not isinstance(node.op, AbstractConv2d_gradInputs):
-        return None
-    kern, topgrad, shape = node.inputs
-    if not isinstance(kern.type, TensorType) or not isinstance(
-        topgrad.type, TensorType
-    ):
-        return None
-
-    # need to flip the kernel if necessary
-    if node.op.filter_flip:
-        flip = (slice(None),) * (kern.ndim - 2) + (slice(None, None, -1),) * 2
-        kern = kern[flip]
-    rval = CorrMM_gradInputs(
-        border_mode=node.op.border_mode,
-        subsample=node.op.subsample,
-        filter_dilation=node.op.filter_dilation,
-        num_groups=node.op.num_groups,
-        unshared=node.op.unshared,
-    )(kern, topgrad, shape)
-    copy_stack_trace(node.outputs[0], rval)
-
-    return [rval]
-
-
-@node_rewriter([AbstractConv3d_gradInputs])
-def local_abstractconv3d_gradinputs_gemm(fgraph, node):
-    # If config.blas__ldflags is empty, PyTensor will use
-    # a NumPy C implementation of [sd]gemm_.
-    if config.cxx == "" or node.inputs[0].dtype == "float16":
-        return
-    if not isinstance(node.op, AbstractConv3d_gradInputs):
-        return None
-    kern, topgrad, shape = node.inputs
-    if not isinstance(kern.type, TensorType) or not isinstance(
-        topgrad.type, TensorType
-    ):
-        return None
-
-    # need to flip the kernel if necessary
-    if node.op.filter_flip:
-        kern = kern[:, :, ::-1, ::-1, ::-1]
-    rval = Corr3dMMGradInputs(
-        border_mode=node.op.border_mode,
-        subsample=node.op.subsample,
-        filter_dilation=node.op.filter_dilation,
-        num_groups=node.op.num_groups,
-    )(kern, topgrad, shape)
-    copy_stack_trace(node.outputs[0], rval)
-
-    return [rval]
-
-
-@node_rewriter([AbstractConv2d])
-def local_conv2d_cpu(fgraph, node):
-
-    if not isinstance(node.op, AbstractConv2d) or node.inputs[0].dtype == "float16":
-        return None
-
-    img, kern = node.inputs
-    if not isinstance(img.type, TensorType) or not isinstance(kern.type, TensorType):
-        return None
-    if node.op.border_mode not in ("full", "valid"):
-        return None
-    if not node.op.filter_flip:
-        # Not tested yet
-        return None
-    if node.op.num_groups > 1 or node.op.unshared:
-        return None
-    if node.op.filter_dilation != (1, 1):
-        return None
-
-    rval = conv2d(
-        img,
-        kern,
-        node.op.imshp,
-        node.op.kshp,
-        border_mode=node.op.border_mode,
-        subsample=node.op.subsample,
-    )
-
-    copy_stack_trace(node.outputs[0], rval)
-    return [rval]
-
-
-@node_rewriter([AbstractConv2d_gradWeights])
-def local_conv2d_gradweight_cpu(fgraph, node):
-    if (
-        not isinstance(node.op, AbstractConv2d_gradWeights)
-        or node.inputs[0].dtype == "float16"
-    ):
-        return None
-
-    img, topgrad, shape = node.inputs
-
-    if not isinstance(img.type, TensorType) or not isinstance(topgrad.type, TensorType):
-        return None
-    if node.op.border_mode not in ("full", "valid"):
-        return None
-    if not node.op.filter_flip:
-        # Not tested yet
-        return
-    if node.op.num_groups > 1 or node.op.unshared:
-        return None
-
-    if node.op.border_mode == "valid" and (node.op.subsample != (1, 1)):
-        return None
-
-    dx, dy = node.op.subsample
-    if dx not in (1, 2) or dy not in (1, 2):
-        # Not implemented in the gradient of ConvOp
-        return None
-
-    if node.op.imshp is None:
-        op_imshp = (None, None, None, None)
-    else:
-        op_imshp = node.op.imshp
-
-    if node.op.kshp is None:
-        op_kshp = (None, None, None, None)
-    else:
-        op_kshp = node.op.kshp
-
-    if None in op_imshp or None in op_kshp:
-        if (dx, dy) != (1, 1):
-            # We cannot infer the shapes
-            return None
-
-    # Determine gradient on kernels
-    assert len(op_imshp) == 4 and len(op_kshp) == 4
-
-    outshp = get_conv_output_shape(
-        op_imshp,
-        op_kshp,
-        node.op.border_mode,
-        node.op.subsample,
-        node.op.filter_dilation,
-    )[2:]
-    fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[
-        2:
-    ]
-
-    newimg = img.dimshuffle((1, 0, 2, 3))
-    newtopgrad = topgrad.dimshuffle((1, 0, 2, 3))
-
-    if node.op.border_mode == "valid":
-        (img, filters) = (newimg, newtopgrad)
-        kshp_logical = fulloutshp
-        kshp_logical_top_aligned = False
-        imshp_logical = None
-        (bsize, nkern) = (op_imshp[1], op_kshp[0])
-        imshp = (op_imshp[0], op_imshp[2], op_imshp[3])
-        kshp = outshp
-    elif node.op.border_mode == "full":
-        (img, filters) = (newtopgrad, newimg)
-        kshp_logical = None
-        kshp_logical_top_aligned = True
-        imshp_logical = (op_imshp[0], fulloutshp[0], fulloutshp[1])
-        (bsize, nkern) = (op_kshp[0], op_imshp[1])
-        imshp = (op_imshp[0], outshp[0], outshp[1])
-        kshp = op_imshp[2:]
-    else:
-        raise NotImplementedError("Only [full,valid] modes are currently supported.")
-
-    # Flip the kernels
-    filters = filters[:, :, ::-1, ::-1]
-
-    dw = ConvOp(
-        imshp,
-        kshp,
-        nkern,
-        bsize,
-        1,
-        1,
-        output_mode="valid",
-        unroll_batch=None,
-        unroll_kern=None,
-        unroll_patch=None,
-        imshp_logical=imshp_logical,
-        kshp_logical=kshp_logical,
-        kshp_logical_top_aligned=kshp_logical_top_aligned,
-        direction_hint="bprop weights",
-    )
-    res = dw(img, filters)
-    copy_stack_trace(node.outputs[0], res)
-
-    if node.op.border_mode == "valid":
-        res = res.dimshuffle((1, 0, 2, 3))
-        res = res[:, :, ::-1, ::-1]
-        copy_stack_trace(node.outputs[0], res)
-
-    return [res]
-
-
-@node_rewriter([AbstractConv2d_gradInputs])
-def local_conv2d_gradinputs_cpu(fgraph, node):
-    if (
-        not isinstance(node.op, AbstractConv2d_gradInputs)
-        or node.inputs[0].dtype == "float16"
-    ):
-        return None
-
-    kern, topgrad, shape = node.inputs
-
-    if not isinstance(kern.type, TensorType) or not isinstance(
-        topgrad.type, TensorType
-    ):
-        return None
-    if node.op.border_mode not in ("full", "valid"):
-        return None
-    if not node.op.filter_flip:
-        # Not tested yet
-        return None
-    if node.op.num_groups > 1 or node.op.unshared:
-        return None
-
-    # Conv 3d implementation, needed when subsample > 2
-    if node.op.border_mode == "valid" and node.op.subsample != (1, 1):
-        # The op don't support that anymore.
-        return False
-
-    # Conv2d Implementation
-    dx, dy = node.op.subsample
-    if dx not in (1, 2) or dy not in (1, 2):
-        # Not implemented in the gradient of ConvOp
-        return None
-
-    if node.op.imshp is None:
-        op_imshp = (None, None, None, None)
-    else:
-        op_imshp = node.op.imshp
-
-    if node.op.kshp is None:
-        op_kshp = (None, None, None, None)
-    else:
-        op_kshp = node.op.kshp
-
-    if None in op_imshp or None in op_kshp:
-        if (dx, dy) != (1, 1):
-            return None
-
-    mode = "valid"
-    if node.op.border_mode != "full":
-        mode = "full"
-    filters = kern.dimshuffle((1, 0, 2, 3))
-    filters = filters[:, :, ::-1, ::-1]
-
-    outshp = get_conv_output_shape(
-        op_imshp,
-        op_kshp,
-        node.op.border_mode,
-        node.op.subsample,
-        node.op.filter_dilation,
-    )[2:]
-    fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[
-        2:
-    ]
-
-    nkern = op_imshp[1]
-    imshp = (op_kshp[0], outshp[0], outshp[1])
-    imshp_logical = (op_kshp[0], fulloutshp[0], fulloutshp[1])
-    din = ConvOp(
-        imshp,
-        op_kshp[2:],
-        nkern,
-        op_imshp[0],
-        1,
-        1,
-        output_mode=mode,
-        unroll_batch=None,
-        unroll_kern=None,
-        unroll_patch=None,
-        imshp_logical=imshp_logical,
-        kshp_logical=None,
-        version=-1,
-        direction_hint="bprop inputs",
-    )
-    din = din(topgrad, filters)
-    copy_stack_trace(node.outputs[0], din)
-    return [din]
-
-
-# Register Cpu Optimization
-conv_groupopt = pytensor.graph.rewriting.db.LocalGroupDB()
-conv_groupopt.__name__ = "conv_opts"
-register_specialize_device(conv_groupopt, "fast_compile", "fast_run")
-
-# GEMM-based convolution
-# It can be disabled by excluding 'conv_gemm'.
-conv_groupopt.register(
-    "local_abstractconv_gemm",
-    local_abstractconv_gemm,
-    "conv_gemm",
-    "fast_compile",
-    "fast_run",
-    position=30,
-)
-conv_groupopt.register(
-    "local_abstractconv_gradweight_gemm",
-    local_abstractconv_gradweight_gemm,
-    "conv_gemm",
-    "fast_compile",
-    "fast_run",
-    position=30,
-)
-conv_groupopt.register(
-    "local_abstractconv_gradinputs_gemm",
-    local_abstractconv_gradinputs_gemm,
-    "conv_gemm",
-    "fast_compile",
-    "fast_run",
-    position=30,
-)
-conv_groupopt.register(
-    "local_abstractconv3d_gemm",
-    local_abstractconv3d_gemm,
-    "conv_gemm",
-    "fast_compile",
-    "fast_run",
-    position=30,
-)
-conv_groupopt.register(
-    "local_abstractconv3d_gradweight_gemm",
-    local_abstractconv3d_gradweight_gemm,
-    "conv_gemm",
-    "fast_compile",
-    "fast_run",
-    position=30,
-)
-conv_groupopt.register(
-    "local_abstractconv3d_gradinputs_gemm",
-    local_abstractconv3d_gradinputs_gemm,
-    "conv_gemm",
-    "fast_compile",
-    "fast_run",
-    position=30,
-)
-
-# Legacy convolution
-conv_groupopt.register(
-    "local_conv2d_cpu", local_conv2d_cpu, "fast_compile", "fast_run", position=40
-)
-conv_groupopt.register(
-    "local_conv2d_gradweight_cpu",
-    local_conv2d_gradweight_cpu,
-    "fast_compile",
-    "fast_run",
-    position=40,
-)
-conv_groupopt.register(
-    "local_conv2d_gradinputs_cpu",
-    local_conv2d_gradinputs_cpu,
-    "fast_compile",
-    "fast_run",
-    position=40,
-)
-
-
-# Verify that no AbstractConv are present in the graph
-@node_rewriter(
-    [
-        AbstractConv2d,
-        AbstractConv2d_gradWeights,
-        AbstractConv2d_gradInputs,
-        AbstractConv3d,
-        AbstractConv3d_gradWeights,
-        AbstractConv3d_gradInputs,
-    ]
-)
-def local_abstractconv_check(fgraph, node):
-    if isinstance(
-        node.op,
-        (
-            AbstractConv2d,
-            AbstractConv2d_gradWeights,
-            AbstractConv2d_gradInputs,
-            AbstractConv3d,
-            AbstractConv3d_gradWeights,
-            AbstractConv3d_gradInputs,
-        ),
-    ):
-        raise MetaNodeRewriterSkip(
-            f"{node.op.__class__.__name__} PyTensor rewriting failed: there is no implementation "
-            "available supporting the requested options. If on CPU, "
-            "do you have a BLAS library installed PyTensor can link against? "
-            "On the CPU we do not support float16."
-        )
-
-
-optdb.register(
-    "AbstractConvCheck",
-    in2out(local_abstractconv_check, name="AbstractConvCheck"),
-    "fast_compile",
-    "fast_run",
-    position=48.7,
-)
diff --git a/pytensor/tensor/nnet/sigm.py b/pytensor/tensor/nnet/sigm.py
deleted file mode 100644
index d4e962d5ad..0000000000
--- a/pytensor/tensor/nnet/sigm.py
+++ /dev/null
@@ -1,176 +0,0 @@
-"""
-These functions implement special cases of exp and log to improve numerical
-stability.
-
-"""
-
-import pytensor
-from pytensor import printing
-from pytensor import scalar as aes
-from pytensor.graph.rewriting.basic import copy_stack_trace, node_rewriter
-from pytensor.printing import pprint
-from pytensor.scalar import sigmoid as scalar_sigmoid
-from pytensor.scalar.math import Sigmoid
-from pytensor.tensor.basic import constant
-from pytensor.tensor.elemwise import Elemwise
-from pytensor.tensor.math import clip, sigmoid
-from pytensor.tensor.type import TensorType
-
-
-class UltraFastScalarSigmoid(aes.UnaryScalarOp):
-    """
-    This is just speed opt. Not for stability.
-
-    """
-
-    nfunc_spec = ("scipy.special.expit", 1, 1)
-
-    @staticmethod
-    def st_impl(x):
-        x = 0.5 * x
-        # The if is a tanh approximate.
-        if x >= 0:
-            if x < 1.7:
-                z = 1.5 * x / (1 + x)
-            elif x < 3:
-                z = 0.935409070603099 + 0.0458812946797165 * (x - 1.7)
-            else:
-                z = 0.99505475368673
-        else:
-            xx = -x
-            if xx < 1.7:
-                z = 1.5 * xx / (1 + xx)
-            elif xx < 3:
-                z = 0.935409070603099 + 0.0458812946797165 * (xx - 1.7)
-            else:
-                z = 0.99505475368673
-            z = -z
-
-        return 0.5 * (z + 1.0)
-
-    def impl(self, x):
-        return UltraFastScalarSigmoid.st_impl(x)
-
-    def c_code(self, node, name, inp, out, sub):
-        (x,) = inp
-        (z,) = out
-        dtype = node.outputs[0].type.dtype_specs()[1]
-
-        return (
-            """{
-        %(dtype)s x = 0.5 * %(x)s;
-   // The if is a tanh approximate.
-   if(x>=0) {
-        %(z)s = (x<1.7 ? (1.5*x/(1+x)) :
-                         (x<3 ? (0.935409070603099 + 0.0458812946797165*(x-1.7)):
-                         0.99505475368673));
-    } else {
-        %(dtype)s xx = -x;
-        %(z)s = -(xx<1.7 ? (1.5*xx/(1+xx)) :
-                           (xx<3 ? (0.935409070603099 + 0.0458812946797165*(xx-1.7)):
-                                   0.99505475368673));
-    }
-
-        //%(z)s = 0.5*(ultrafasttanh(0.5*x)+1.);
-        %(z)s = 0.5*(%(z)s+1.);
-        }"""
-            % locals()
-        )
-
-    @staticmethod
-    def c_code_cache_version():
-        return (5,)
-
-
-ultra_fast_scalar_sigmoid = UltraFastScalarSigmoid(
-    aes.upgrade_to_float, name="ultra_fast_scalar_sigmoid"
-)
-ultra_fast_sigmoid = Elemwise(ultra_fast_scalar_sigmoid, name="ultra_fast_sigmoid")
-
-ultra_fast_sigmoid_inplace = Elemwise(
-    UltraFastScalarSigmoid(aes.transfer_type(0)),
-    inplace_pattern={0: 0},
-    name="ultra_fast_sigmoid_inplace",
-)
-
-pprint.assign(ultra_fast_sigmoid, printing.FunctionPrinter(["ultra_fast_sigmoid"]))
-
-
-@node_rewriter(None)
-def local_ultra_fast_sigmoid(fgraph, node):
-    """
-    When enabled, change all sigmoid to ultra_fast_sigmoid.
-
-    For example do mode.including('local_ultra_fast_sigmoid')
-    or use the PyTensor flag optimizer_including=local_ultra_fast_sigmoid.
-
-    This speeds up the sigmoid op by using an approximation.
-
-    This is done after the stabilization and specialize phases
-    to avoid interacting with them.
-
-    """
-
-    if isinstance(node.op, Elemwise) and isinstance(node.op.scalar_op, Sigmoid):
-        if node.op.inplace_pattern:
-            out = ultra_fast_sigmoid_inplace(node.inputs[0])
-        else:
-            out = ultra_fast_sigmoid(node.inputs[0])
-
-        copy_stack_trace(node.outputs[0], out)
-
-        def values_eq_approx_remove_low_prec(a, b):
-            # atol is found by trial/error.
-            # Other test could fail without good reason.
-            return TensorType.values_eq_approx(a, b, atol=0.02)
-
-        # Let DebugMode know that there this opt approx the values.
-        out.tag.values_eq_approx = values_eq_approx_remove_low_prec
-        return [out]
-
-
-pytensor.compile.optdb["uncanonicalize"].register(
-    "local_ultra_fast_sigmoid", local_ultra_fast_sigmoid
-)
-
-
-def hard_sigmoid(x):
-    """
-    An approximation of sigmoid.
-
-    More approximate and faster than ultra_fast_sigmoid.
-
-    Approx in 3 parts: 0, scaled linear, 1.
-
-    Removing the slope and shift does not make it faster.
-
-    """
-    # Use the same dtype as determined by "upgrade_to_float",
-    # and perform computation in that dtype.
-    out_dtype = aes.upgrade_to_float(aes.ScalarType(dtype=x.dtype))[0].dtype
-    slope = constant(0.2, dtype=out_dtype)
-    shift = constant(0.5, dtype=out_dtype)
-    x = (x * slope) + shift
-    x = clip(x, 0, 1)
-    return x
-
-
-@node_rewriter([sigmoid])
-def local_hard_sigmoid(fgraph, node):
-    if isinstance(node.op, Elemwise) and node.op.scalar_op == scalar_sigmoid:
-        out = hard_sigmoid(node.inputs[0])
-        copy_stack_trace(node.outputs[0], out)
-
-        def values_eq_approx_remove_low_prec(a, b):
-            # atol is found by trial/error.
-            # Other test could fail without good reason.
-            return TensorType.values_eq_approx(a, b, atol=0.1)
-
-        # Let DebugMode know that there this opt approx the values.
-        out.tag.values_eq_approx = values_eq_approx_remove_low_prec
-        return [out]
-
-
-pytensor.compile.optdb["uncanonicalize"].register(
-    "local_hard_sigmoid", local_hard_sigmoid
-)
diff --git a/tests/tensor/nnet/__init__.py b/tests/tensor/conv/__init__.py
similarity index 100%
rename from tests/tensor/nnet/__init__.py
rename to tests/tensor/conv/__init__.py
diff --git a/pytensor/tensor/nnet/c_code/corr3d_gemm.c b/tests/tensor/conv/c_code/corr3d_gemm.c
similarity index 100%
rename from pytensor/tensor/nnet/c_code/corr3d_gemm.c
rename to tests/tensor/conv/c_code/corr3d_gemm.c
diff --git a/pytensor/tensor/nnet/c_code/corr_gemm.c b/tests/tensor/conv/c_code/corr_gemm.c
similarity index 100%
rename from pytensor/tensor/nnet/c_code/corr_gemm.c
rename to tests/tensor/conv/c_code/corr_gemm.c
diff --git a/pytensor/tensor/nnet/corr3d.py b/tests/tensor/conv/c_conv3d_corr3d_ref.py
similarity index 99%
rename from pytensor/tensor/nnet/corr3d.py
rename to tests/tensor/conv/c_conv3d_corr3d_ref.py
index a7ef7d2905..2b5ddaaa31 100644
--- a/pytensor/tensor/nnet/corr3d.py
+++ b/tests/tensor/conv/c_conv3d_corr3d_ref.py
@@ -13,7 +13,7 @@
 from pytensor.tensor import blas_headers
 from pytensor.tensor.basic import as_tensor_variable
 from pytensor.tensor.blas import blas_header_version, ldflags
-from pytensor.tensor.nnet.abstract_conv import get_conv_output_shape
+from pytensor.tensor.conv.abstract_conv import get_conv_output_shape
 from pytensor.tensor.type import TensorType
 
 
diff --git a/pytensor/tensor/nnet/corr.py b/tests/tensor/conv/c_conv_corr_ref.py
similarity index 99%
rename from pytensor/tensor/nnet/corr.py
rename to tests/tensor/conv/c_conv_corr_ref.py
index 6051a03b9b..9ef2e30d04 100644
--- a/pytensor/tensor/nnet/corr.py
+++ b/tests/tensor/conv/c_conv_corr_ref.py
@@ -13,7 +13,7 @@
 from pytensor.tensor import blas_headers
 from pytensor.tensor.basic import as_tensor_variable
 from pytensor.tensor.blas import blas_header_version, ldflags
-from pytensor.tensor.nnet.abstract_conv import get_conv_output_shape
+from pytensor.tensor.conv.abstract_conv import get_conv_output_shape
 from pytensor.tensor.type import TensorType
 
 
diff --git a/tests/tensor/nnet/test_abstract_conv.py b/tests/tensor/conv/test_abstract_conv.py
similarity index 88%
rename from tests/tensor/nnet/test_abstract_conv.py
rename to tests/tensor/conv/test_abstract_conv.py
index 87486f97d4..a61e0efbc1 100644
--- a/tests/tensor/nnet/test_abstract_conv.py
+++ b/tests/tensor/conv/test_abstract_conv.py
@@ -6,9 +6,8 @@
 from pytensor.compile.mode import Mode
 from pytensor.configdefaults import config
 from pytensor.graph.rewriting.basic import check_stack_trace
-from pytensor.tensor.nnet import abstract_conv as conv
-from pytensor.tensor.nnet import conv2d_transpose, corr, corr3d
-from pytensor.tensor.nnet.abstract_conv import (
+from pytensor.tensor.conv import abstract_conv
+from pytensor.tensor.conv.abstract_conv import (
     AbstractConv2d,
     AbstractConv2d_gradInputs,
     AbstractConv2d_gradWeights,
@@ -19,18 +18,13 @@
     bilinear_upsampling,
     causal_conv1d,
     check_conv_gradinputs_shape,
+    conv2d_transpose,
     get_conv_gradinputs_shape,
     get_conv_gradweights_shape,
     get_conv_output_shape,
     separable_conv2d,
     separable_conv3d,
 )
-from pytensor.tensor.nnet.corr import CorrMM, CorrMM_gradInputs, CorrMM_gradWeights
-from pytensor.tensor.nnet.corr3d import (
-    Corr3dMM,
-    Corr3dMMGradInputs,
-    Corr3dMMGradWeights,
-)
 from pytensor.tensor.type import (
     TensorType,
     ftensor4,
@@ -41,6 +35,7 @@
     tensor5,
 )
 from tests import unittest_tools as utt
+from tests.tensor.conv import c_conv3d_corr3d_ref, c_conv_corr_ref
 
 
 def conv2d_corr(
@@ -53,7 +48,9 @@ def conv2d_corr(
 ):
     if conv_mode == "conv":
         filters = filters[:, :, ::-1, ::-1]
-    return corr.CorrMM(border_mode, subsample, filter_dilation)(inputs, filters)
+    return c_conv_corr_ref.CorrMM(border_mode, subsample, filter_dilation)(
+        inputs, filters
+    )
 
 
 def conv2d_corr_gw(
@@ -65,7 +62,7 @@ def conv2d_corr_gw(
     conv_mode="conv",
     filter_dilation=(1, 1),
 ):
-    rval = corr.CorrMM_gradWeights(border_mode, subsample, filter_dilation)(
+    rval = c_conv_corr_ref.CorrMM_gradWeights(border_mode, subsample, filter_dilation)(
         inputs, topgrad, filters_shape[2:]
     )
     if conv_mode == "conv":
@@ -84,7 +81,7 @@ def conv2d_corr_gi(
 ):
     if conv_mode == "conv":
         filters = filters[:, :, ::-1, ::-1]
-    return corr.CorrMM_gradInputs(border_mode, subsample, filter_dilation)(
+    return c_conv_corr_ref.CorrMM_gradInputs(border_mode, subsample, filter_dilation)(
         filters, topgrad, inputs_shape[2:]
     )
 
@@ -99,7 +96,9 @@ def conv3d_corr(
 ):
     if conv_mode == "conv":
         filters = filters[:, :, ::-1, ::-1, ::-1]
-    return corr3d.Corr3dMM(border_mode, subsample, filter_dilation)(inputs, filters)
+    return c_conv3d_corr3d_ref.Corr3dMM(border_mode, subsample, filter_dilation)(
+        inputs, filters
+    )
 
 
 def conv3d_corr_gw(
@@ -111,9 +110,9 @@ def conv3d_corr_gw(
     conv_mode="conv",
     filter_dilation=(1, 1, 1),
 ):
-    rval = corr3d.Corr3dMMGradWeights(border_mode, subsample, filter_dilation)(
-        inputs, topgrad, filters_shape[2:]
-    )
+    rval = c_conv3d_corr3d_ref.Corr3dMMGradWeights(
+        border_mode, subsample, filter_dilation
+    )(inputs, topgrad, filters_shape[2:])
     if conv_mode == "conv":
         rval = rval[:, :, ::-1, ::-1, ::-1]
     return rval
@@ -130,9 +129,9 @@ def conv3d_corr_gi(
 ):
     if conv_mode == "conv":
         filters = filters[:, :, ::-1, ::-1, ::-1]
-    return corr3d.Corr3dMMGradInputs(border_mode, subsample, filter_dilation)(
-        filters, topgrad, inputs_shape[2:]
-    )
+    return c_conv3d_corr3d_ref.Corr3dMMGradInputs(
+        border_mode, subsample, filter_dilation
+    )(filters, topgrad, inputs_shape[2:])
 
 
 class TestGetConvOutShape:
@@ -338,7 +337,7 @@ def test_shape_check_conv2d(self):
         input = tensor4()
         filters = tensor4()
 
-        out = conv.abstract_conv2d(
+        out = abstract_conv.abstract_conv2d(
             input, filters, input_shape=(3, 5, 7, 11), filter_shape=(7, 5, 3, 3)
         )
         f = pytensor.function([input, filters], out)
@@ -361,7 +360,7 @@ def test_shape_check_conv3d(self):
         input = tensor5()
         filters = tensor5()
 
-        out = conv.conv3d(
+        out = abstract_conv.conv3d(
             input, filters, input_shape=(3, 5, 7, 11, 13), filter_shape=(7, 5, 3, 3, 3)
         )
         f = pytensor.function([input, filters], out)
@@ -383,7 +382,7 @@ def test_shape_check_conv2d_grad_wrt_inputs(self):
         output_grad = tensor4()
         filters = tensor4()
 
-        out = conv.conv2d_grad_wrt_inputs(
+        out = abstract_conv.conv2d_grad_wrt_inputs(
             output_grad,
             filters,
             input_shape=(None, None, 7, 11),
@@ -403,7 +402,7 @@ def test_shape_check_conv3d_grad_wrt_inputs(self):
         output_grad = tensor5()
         filters = tensor5()
 
-        out = conv.conv3d_grad_wrt_inputs(
+        out = abstract_conv.conv3d_grad_wrt_inputs(
             output_grad,
             filters,
             input_shape=(None, None, 7, 11, 13),
@@ -422,7 +421,7 @@ def test_shape_check_conv2d_grad_wrt_weights(self):
         input = tensor4()
         output_grad = tensor4()
 
-        out = conv.conv2d_grad_wrt_weights(
+        out = abstract_conv.conv2d_grad_wrt_weights(
             input,
             output_grad,
             filter_shape=(None, None, 3, 3),
@@ -442,7 +441,7 @@ def test_shape_check_conv3d_grad_wrt_weights(self):
         input = tensor5()
         output_grad = tensor5()
 
-        out = conv.conv3d_grad_wrt_weights(
+        out = abstract_conv.conv3d_grad_wrt_weights(
             input,
             output_grad,
             filter_shape=(None, None, 3, 3, 3),
@@ -892,8 +891,8 @@ def run_fwd(
         self,
         inputs_shape,
         filters_shape,
-        conv_fn=conv.abstract_conv2d,
-        conv_op=conv.AbstractConv2d,
+        conv_fn=abstract_conv.abstract_conv2d,
+        conv_op=abstract_conv.AbstractConv2d,
         ref=conv2d_corr,
         **kwargs,
     ):
@@ -911,7 +910,7 @@ def run_gradweight(
         inputs_shape,
         filters_shape,
         output_shape,
-        gradWeights_fn=conv.AbstractConv2d_gradWeights,
+        gradWeights_fn=abstract_conv.AbstractConv2d_gradWeights,
         ref=conv2d_corr_gw,
         **kwargs,
     ):
@@ -929,7 +928,7 @@ def run_gradinput(
         inputs_shape,
         filters_shape,
         output_shape,
-        gradInputs_fn=conv.AbstractConv2d_gradInputs,
+        gradInputs_fn=abstract_conv.AbstractConv2d_gradInputs,
         ref=conv2d_corr_gi,
         **kwargs,
     ):
@@ -943,96 +942,6 @@ def run_gradinput(
         )
 
 
-@pytest.mark.skipif(
-    not config.cxx or config.mode == "FAST_COMPILE",
-    reason="Need blas to test conv2d",
-)
-class TestCorrConv2d(BaseTestConv2d):
-    @classmethod
-    def setup_class(cls):
-        # This tests can run even when config.blas__ldflags is empty.
-        super().setup_class()
-
-    def run_test_case(self, i, f, s, b, flip, provide_shape, fd=(1, 1)):
-        o = self.get_output_shape(i, f, s, b, fd)
-        self.run_fwd(
-            inputs_shape=i,
-            filters_shape=f,
-            subsample=s,
-            verify_grad=True,
-            provide_shape=provide_shape,
-            border_mode=b,
-            filter_flip=flip,
-            target_op=CorrMM,
-            check_trace=True,
-            filter_dilation=fd,
-        )
-        self.run_gradweight(
-            inputs_shape=i,
-            filters_shape=f,
-            output_shape=o,
-            subsample=s,
-            verify_grad=True,
-            provide_shape=provide_shape,
-            border_mode=b,
-            filter_flip=flip,
-            target_op=CorrMM_gradWeights,
-            check_trace=True,
-            filter_dilation=fd,
-        )
-        self.run_gradinput(
-            inputs_shape=i,
-            filters_shape=f,
-            output_shape=o,
-            subsample=s,
-            verify_grad=True,
-            provide_shape=provide_shape,
-            border_mode=b,
-            filter_flip=flip,
-            target_op=CorrMM_gradInputs,
-            check_trace=True,
-            filter_dilation=fd,
-        )
-
-    def run_test_case_gi(
-        self, i, f, o, s, b, flip, provide_shape, fd=(1, 1), expect_error=False
-    ):
-        if not expect_error:
-            self.run_gradinput(
-                inputs_shape=i,
-                filters_shape=f,
-                output_shape=o,
-                subsample=s,
-                verify_grad=True,
-                provide_shape=provide_shape,
-                border_mode=b,
-                filter_flip=flip,
-                target_op=CorrMM_gradInputs,
-                check_trace=True,
-                filter_dilation=fd,
-            )
-        else:
-            with pytest.raises(ValueError):
-                self.run_gradinput(
-                    inputs_shape=i,
-                    filters_shape=f,
-                    output_shape=o,
-                    subsample=s,
-                    verify_grad=False,
-                    provide_shape=provide_shape,
-                    border_mode=b,
-                    filter_flip=flip,
-                    target_op=CorrMM_gradInputs,
-                    ref=None,
-                    check_trace=True,
-                    filter_dilation=fd,
-                )
-
-    @pytest.mark.slow
-    def test_all(self):
-        super().test_all()
-
-
 @pytest.mark.skipif(
     config.cxx == "",
     reason="SciPy and cxx needed",
@@ -1252,8 +1161,8 @@ def run_fwd(
         self,
         inputs_shape,
         filters_shape,
-        conv_fn=conv.conv3d,
-        conv_op=conv.AbstractConv3d,
+        conv_fn=abstract_conv.conv3d,
+        conv_op=abstract_conv.AbstractConv3d,
         ref=conv3d_corr,
         **kwargs,
     ):
@@ -1271,7 +1180,7 @@ def run_gradweight(
         inputs_shape,
         filters_shape,
         output_shape,
-        gradWeights_fn=conv.AbstractConv3d_gradWeights,
+        gradWeights_fn=abstract_conv.AbstractConv3d_gradWeights,
         ref=conv3d_corr_gw,
         **kwargs,
     ):
@@ -1289,7 +1198,7 @@ def run_gradinput(
         inputs_shape,
         filters_shape,
         output_shape,
-        gradInputs_fn=conv.AbstractConv3d_gradInputs,
+        gradInputs_fn=abstract_conv.AbstractConv3d_gradInputs,
         ref=conv3d_corr_gi,
         **kwargs,
     ):
@@ -1303,94 +1212,6 @@ def run_gradinput(
         )
 
 
-@pytest.mark.skipif(
-    not config.cxx or config.mode == "FAST_COMPILE",
-    reason="Need blas to test conv3d",
-)
-class TestCorrConv3d(BaseTestConv3d):
-    @classmethod
-    def setup_class(cls):
-        # This tests can run even when config.blas__ldflags is empty.
-        super().setup_class()
-
-    def run_test_case(self, i, f, s, b, flip, provide_shape, fd=(1, 1, 1)):
-        o = self.get_output_shape(i, f, s, b, fd)
-        # This test can run even when config.blas__ldflags is empty.
-        self.run_fwd(
-            inputs_shape=i,
-            filters_shape=f,
-            subsample=s,
-            verify_grad=True,
-            provide_shape=provide_shape,
-            border_mode=b,
-            filter_flip=flip,
-            target_op=Corr3dMM,
-            check_trace=True,
-            filter_dilation=fd,
-        )
-        self.run_gradweight(
-            inputs_shape=i,
-            filters_shape=f,
-            output_shape=o,
-            subsample=s,
-            verify_grad=True,
-            provide_shape=provide_shape,
-            border_mode=b,
-            filter_flip=flip,
-            target_op=Corr3dMMGradWeights,
-            check_trace=True,
-            filter_dilation=fd,
-        )
-        self.run_gradinput(
-            inputs_shape=i,
-            filters_shape=f,
-            output_shape=o,
-            subsample=s,
-            verify_grad=True,
-            provide_shape=provide_shape,
-            border_mode=b,
-            filter_flip=flip,
-            target_op=Corr3dMMGradInputs,
-            check_trace=True,
-            filter_dilation=fd,
-        )
-
-    def run_test_case_gi(
-        self, i, f, o, s, b, flip, provide_shape, fd=(1, 1, 1), expect_error=False
-    ):
-        # This test can run even when config.blas__ldflags is empty.
-        if not expect_error:
-            self.run_gradinput(
-                inputs_shape=i,
-                filters_shape=f,
-                output_shape=o,
-                subsample=s,
-                verify_grad=True,
-                provide_shape=provide_shape,
-                border_mode=b,
-                filter_flip=flip,
-                target_op=Corr3dMMGradInputs,
-                check_trace=True,
-                filter_dilation=fd,
-            )
-        else:
-            with pytest.raises(ValueError):
-                self.run_gradinput(
-                    inputs_shape=i,
-                    filters_shape=f,
-                    output_shape=o,
-                    subsample=s,
-                    verify_grad=False,
-                    provide_shape=provide_shape,
-                    border_mode=b,
-                    filter_flip=flip,
-                    target_op=Corr3dMMGradInputs,
-                    ref=None,
-                    check_trace=True,
-                    filter_dilation=fd,
-                )
-
-
 def test_constant_shapes():
     # Check that the `imshp` and `kshp` parameters of the AbstractConv Ops
     # are rejected if not constant or None
@@ -1451,7 +1272,7 @@ def test_grad_types(self):
 
         out_shape = lvector()
 
-        output = conv.abstract_conv2d(input, filters)
+        output = abstract_conv.abstract_conv2d(input, filters)
         grad_input, grad_filters = pytensor.grad(output.sum(), wrt=(input, filters))
         assert grad_input.type == input.type, (
             grad_input,
@@ -1466,7 +1287,9 @@ def test_grad_types(self):
             filters.type,
         )
 
-        grad_filters = conv.AbstractConv2d_gradWeights()(input, topgrad, out_shape)
+        grad_filters = abstract_conv.AbstractConv2d_gradWeights()(
+            input, topgrad, out_shape
+        )
         grad_input, grad_topgrad = pytensor.grad(
             grad_filters.sum(), wrt=(input, topgrad)
         )
@@ -1484,7 +1307,9 @@ def test_grad_types(self):
             topgrad.type,
         )
 
-        grad_input = conv.AbstractConv2d_gradInputs()(filters, topgrad, out_shape)
+        grad_input = abstract_conv.AbstractConv2d_gradInputs()(
+            filters, topgrad, out_shape
+        )
         grad_filters, grad_topgrad = pytensor.grad(
             grad_input.sum(), wrt=(filters, topgrad)
         )
@@ -1511,7 +1336,7 @@ def test_constant_input(self):
         out_shape = lvector()
 
         # Check the forward Op
-        output = conv.abstract_conv2d(constant_tensor, filters)
+        output = abstract_conv.abstract_conv2d(constant_tensor, filters)
         grad_filters = pytensor.grad(output.sum(), wrt=filters)
         assert filters.type.is_super(grad_filters.type), (
             grad_filters,
@@ -1520,7 +1345,7 @@ def test_constant_input(self):
             filters.type,
         )
 
-        output = conv.abstract_conv2d(input, constant_tensor)
+        output = abstract_conv.abstract_conv2d(input, constant_tensor)
         grad_input = pytensor.grad(output.sum(), wrt=input)
         assert input.type.is_super(grad_input.type), (
             grad_input,
@@ -1530,7 +1355,7 @@ def test_constant_input(self):
         )
 
         # Check grad wrt weights
-        grad_filters = conv.AbstractConv2d_gradWeights()(
+        grad_filters = abstract_conv.AbstractConv2d_gradWeights()(
             constant_tensor, topgrad, out_shape
         )
         grad_topgrad = pytensor.grad(grad_filters.sum(), wrt=topgrad)
@@ -1541,7 +1366,7 @@ def test_constant_input(self):
             topgrad.type,
         )
 
-        grad_filters = conv.AbstractConv2d_gradWeights()(
+        grad_filters = abstract_conv.AbstractConv2d_gradWeights()(
             input, constant_tensor, out_shape
         )
         grad_input = pytensor.grad(grad_filters.sum(), wrt=input)
@@ -1553,7 +1378,7 @@ def test_constant_input(self):
         )
 
         # Check grad wrt inputs
-        grad_input = conv.AbstractConv2d_gradInputs()(
+        grad_input = abstract_conv.AbstractConv2d_gradInputs()(
             constant_tensor, topgrad, out_shape
         )
         grad_topgrad = pytensor.grad(grad_input.sum(), wrt=topgrad)
@@ -1564,7 +1389,7 @@ def test_constant_input(self):
             topgrad.type,
         )
 
-        grad_input = conv.AbstractConv2d_gradInputs()(
+        grad_input = abstract_conv.AbstractConv2d_gradInputs()(
             filters, constant_tensor, out_shape
         )
         grad_filters = pytensor.grad(grad_input.sum(), wrt=filters)
@@ -1923,13 +1748,13 @@ def setup_method(self):
         self.output_grad = tensor4()
         self.output_grad_wrt = tensor4()
 
-        self.x = tensor4("x", config.floatX)  # inputs
-        self.w = tensor4("w", config.floatX)  # filter weights
+        self.x = tensor4("x", dtype=config.floatX)  # inputs
+        self.w = tensor4("w", dtype=config.floatX)  # filter weights
 
     def test_conv2d_grad_wrt_inputs(self):
         # Compares calculated abstract grads wrt inputs with the fwd grads
         # This method checks the outputs of `conv2_grad_wrt_inputs` against
-        # the outputs of `pytensor.tensor.nnet.conv` forward grads to make sure the
+        # the outputs of `pytensor.tensor.conv` forward grads to make sure the
         # results are the same.
 
         for (in_shape, fltr_shape) in zip(self.inputs_shapes, self.filters_shapes):
@@ -1942,18 +1767,16 @@ def test_conv2d_grad_wrt_inputs(self):
                         filter_val = self.random_stream.random(fltr_shape).astype(
                             config.floatX
                         )
-                        out_grad_shape = (
-                            pytensor.tensor.nnet.abstract_conv.get_conv_output_shape(
-                                image_shape=in_shape,
-                                kernel_shape=fltr_shape,
-                                border_mode=bm,
-                                subsample=ss,
-                            )
+                        out_grad_shape = abstract_conv.get_conv_output_shape(
+                            image_shape=in_shape,
+                            kernel_shape=fltr_shape,
+                            border_mode=bm,
+                            subsample=ss,
                         )
                         out_grad_val = self.random_stream.random(out_grad_shape).astype(
                             config.floatX
                         )
-                        conv_out = pytensor.tensor.nnet.conv2d(
+                        conv_out = abstract_conv.conv2d(
                             self.x,
                             filters=self.w,
                             border_mode=bm,
@@ -1971,16 +1794,14 @@ def test_conv2d_grad_wrt_inputs(self):
                             [self.x, self.w, self.output_grad], conv_grad
                         )
 
-                        conv_wrt_i_out = (
-                            pytensor.tensor.nnet.abstract_conv.conv2d_grad_wrt_inputs(
-                                output_grad=self.output_grad_wrt,
-                                filters=self.w,
-                                border_mode=bm,
-                                subsample=ss,
-                                input_shape=in_shape,
-                                filter_shape=fltr_shape,
-                                filter_flip=ff,
-                            )
+                        conv_wrt_i_out = abstract_conv.conv2d_grad_wrt_inputs(
+                            output_grad=self.output_grad_wrt,
+                            filters=self.w,
+                            border_mode=bm,
+                            subsample=ss,
+                            input_shape=in_shape,
+                            filter_shape=fltr_shape,
+                            filter_flip=ff,
                         )
                         f_new = pytensor.function(
                             [self.w, self.output_grad_wrt], conv_wrt_i_out
@@ -1995,7 +1816,7 @@ def test_conv2d_grad_wrt_inputs(self):
     def test_conv2d_grad_wrt_weights(self):
         # Compares calculated abstract grads wrt weights with the fwd grads
         # This method checks the outputs of `conv2_grad_wrt_weights` against
-        # the outputs of `pytensor.tensor.nnet.conv` forward grads to make sure the
+        # the outputs of `pytensor.tensor.conv` forward grads to make sure the
         # results are the same.
 
         for (in_shape, fltr_shape) in zip(self.inputs_shapes, self.filters_shapes):
@@ -2008,18 +1829,16 @@ def test_conv2d_grad_wrt_weights(self):
                         filter_val = self.random_stream.random(fltr_shape).astype(
                             config.floatX
                         )
-                        out_grad_shape = (
-                            pytensor.tensor.nnet.abstract_conv.get_conv_output_shape(
-                                image_shape=in_shape,
-                                kernel_shape=fltr_shape,
-                                border_mode=bm,
-                                subsample=ss,
-                            )
+                        out_grad_shape = abstract_conv.get_conv_output_shape(
+                            image_shape=in_shape,
+                            kernel_shape=fltr_shape,
+                            border_mode=bm,
+                            subsample=ss,
                         )
                         out_grad_val = self.random_stream.random(out_grad_shape).astype(
                             config.floatX
                         )
-                        conv_out = pytensor.tensor.nnet.conv2d(
+                        conv_out = abstract_conv.conv2d(
                             self.x,
                             filters=self.w,
                             border_mode=bm,
@@ -2037,16 +1856,14 @@ def test_conv2d_grad_wrt_weights(self):
                             [self.x, self.w, self.output_grad], conv_grad
                         )
 
-                        conv_wrt_w_out = (
-                            pytensor.tensor.nnet.abstract_conv.conv2d_grad_wrt_weights(
-                                self.x,
-                                output_grad=self.output_grad_wrt,
-                                border_mode=bm,
-                                subsample=ss,
-                                input_shape=in_shape,
-                                filter_shape=fltr_shape,
-                                filter_flip=ff,
-                            )
+                        conv_wrt_w_out = abstract_conv.conv2d_grad_wrt_weights(
+                            self.x,
+                            output_grad=self.output_grad_wrt,
+                            border_mode=bm,
+                            subsample=ss,
+                            input_shape=in_shape,
+                            filter_shape=fltr_shape,
+                            filter_flip=ff,
                         )
                         f_new = pytensor.function(
                             [self.x, self.output_grad_wrt], conv_wrt_w_out
@@ -2062,12 +1879,12 @@ def test_conv2d_grad_wrt_weights(self):
     reason="SciPy and cxx needed",
 )
 class TestGroupedConvNoOptim:
-    conv = pytensor.tensor.nnet.abstract_conv.AbstractConv2d
-    conv_gradw = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
-    conv_gradi = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
-    conv_op = pytensor.tensor.nnet.abstract_conv.AbstractConv2d
-    conv_gradw_op = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
-    conv_gradi_op = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
+    conv = abstract_conv.AbstractConv2d
+    conv_gradw = abstract_conv.AbstractConv2d_gradWeights
+    conv_gradi = abstract_conv.AbstractConv2d_gradInputs
+    conv_op = abstract_conv.AbstractConv2d
+    conv_gradw_op = abstract_conv.AbstractConv2d_gradWeights
+    conv_gradi_op = abstract_conv.AbstractConv2d_gradInputs
     mode = Mode(optimizer=None)
     is_dnn = False
 
@@ -2266,12 +2083,12 @@ def conv_gradinputs(filters_val, output_val):
     reason="SciPy and cxx needed",
 )
 class TestGroupedConv3dNoOptim(TestGroupedConvNoOptim):
-    conv = pytensor.tensor.nnet.abstract_conv.AbstractConv3d
-    conv_gradw = pytensor.tensor.nnet.abstract_conv.AbstractConv3d_gradWeights
-    conv_gradi = pytensor.tensor.nnet.abstract_conv.AbstractConv3d_gradInputs
-    conv_op = pytensor.tensor.nnet.abstract_conv.AbstractConv3d
-    conv_gradw_op = pytensor.tensor.nnet.abstract_conv.AbstractConv3d_gradWeights
-    conv_gradi_op = pytensor.tensor.nnet.abstract_conv.AbstractConv3d_gradInputs
+    conv = abstract_conv.AbstractConv3d
+    conv_gradw = abstract_conv.AbstractConv3d_gradWeights
+    conv_gradi = abstract_conv.AbstractConv3d_gradInputs
+    conv_op = abstract_conv.AbstractConv3d
+    conv_gradw_op = abstract_conv.AbstractConv3d_gradWeights
+    conv_gradi_op = abstract_conv.AbstractConv3d_gradInputs
     mode = Mode(optimizer=None)
 
     def setup_method(self):
@@ -2505,12 +2322,12 @@ def test_interface3d(self):
     reason="SciPy and cxx needed",
 )
 class TestUnsharedConv:
-    conv2d = pytensor.tensor.nnet.abstract_conv.AbstractConv2d
-    conv2d_gradw = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
-    conv2d_gradi = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
-    conv2d_op = pytensor.tensor.nnet.abstract_conv.AbstractConv2d
-    conv2d_gradw_op = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
-    conv2d_gradi_op = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
+    conv2d = abstract_conv.AbstractConv2d
+    conv2d_gradw = abstract_conv.AbstractConv2d_gradWeights
+    conv2d_gradi = abstract_conv.AbstractConv2d_gradInputs
+    conv2d_op = abstract_conv.AbstractConv2d
+    conv2d_gradw_op = abstract_conv.AbstractConv2d_gradWeights
+    conv2d_gradi_op = abstract_conv.AbstractConv2d_gradInputs
 
     mode = Mode(optimizer="None")
 
@@ -2733,12 +2550,12 @@ def conv_gradinputs(filters_val, output_val):
 
 
 class TestAsymmetricPadding:
-    conv2d = pytensor.tensor.nnet.abstract_conv.AbstractConv2d
-    conv2d_gradw = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
-    conv2d_gradi = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
-    conv2d_op = pytensor.tensor.nnet.abstract_conv.AbstractConv2d
-    conv2d_gradw_op = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
-    conv2d_gradi_op = pytensor.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
+    conv2d = abstract_conv.AbstractConv2d
+    conv2d_gradw = abstract_conv.AbstractConv2d_gradWeights
+    conv2d_gradi = abstract_conv.AbstractConv2d_gradInputs
+    conv2d_op = abstract_conv.AbstractConv2d
+    conv2d_gradw_op = abstract_conv.AbstractConv2d_gradWeights
+    conv2d_gradi_op = abstract_conv.AbstractConv2d_gradInputs
 
     mode = Mode(optimizer="None")
 
diff --git a/tests/tensor/nnet/speed_test_conv.py b/tests/tensor/nnet/speed_test_conv.py
deleted file mode 100644
index b40fe75b5f..0000000000
--- a/tests/tensor/nnet/speed_test_conv.py
+++ /dev/null
@@ -1,451 +0,0 @@
-import time
-
-import numpy as np
-
-from pytensor import function
-from pytensor.compile.mode import Mode
-from pytensor.tensor.nnet.conv import ConvOp
-from pytensor.tensor.type import TensorType, dmatrix
-
-
-def flip(kern, kshp):
-    "flip the kernel as scipy.convolv2d do it flipped."
-    flip = np.zeros(kern.shape)
-    if len(kern.shape) == 2:
-        kern = kern.reshape(-1)
-        it = reversed(kern)
-        for i in range(kshp[0]):
-            for j in range(kshp[1]):
-                flip[i, j] = next(it)
-    elif len(kern.shape) == 3:
-        kern = kern.reshape(kern.shape[0], -1)
-        for k in range(kern.shape[0]):
-            it = reversed(kern[k, :])
-            for i in range(kshp[0]):
-                for j in range(kshp[1]):
-                    flip[k, i, j] = next(it)
-    elif len(kern.shape) == 4:
-        kern = kern.reshape(kern.shape[0], kern.shape[1], -1)
-        for k in range(kern.shape[0]):
-            for m in range(kern.shape[1]):
-                it = reversed(kern[k, m, :])
-                for i in range(kshp[0]):
-                    for j in range(kshp[1]):
-                        flip[k, m, i, j] = next(it)
-    else:
-        raise NotImplementedError()
-    return flip
-
-
-global_rng = np.random.default_rng(3423489)
-
-dmatrix4 = TensorType("float64", shape=(None, None, None, None))
-
-
-def exec_multilayer_conv_nnet_old(
-    conv_mode,
-    ss,
-    bsize,
-    imshp,
-    kshps,
-    nkerns,
-    unroll_batch=0,
-    unroll_kern=0,
-    img=None,
-    validate=True,
-    conv_op_py=False,
-    do_print=True,
-    repeat=1,
-    unroll_patch=False,
-    unroll_patch_size=False,
-    verbose=0,
-):
-    if img is None:
-        img = dmatrix()
-
-    # build actual input images
-    imgval = global_rng.random((bsize, imshp[0], imshp[1], imshp[2]))
-
-    a = dmatrix()
-    kerns = [a for i in nkerns]
-    inputs4 = dmatrix4()
-    kerns4 = dmatrix4()
-
-    # for each layer
-    ntot = 0
-    tctot = 0
-    tpytot = 0
-
-    for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns, range(len(nkerns))):
-        if do_print:
-            print("************* layer %i ***************" % n_layer)
-            print(conv_mode, ss, n_layer, kshp, nkern)
-
-        # actual values
-        w = global_rng.random(np.r_[nkern, imshp[0], kshp])
-        w_flip = flip(w, kshp).reshape(w.shape)
-
-        # manual implementation
-        # check first stage
-        padimg = imgval
-        if conv_mode == "full":
-            padimg_shp = np.array(imshp[1:]) + 2 * (np.array(kshp) - np.array([1, 1]))
-            padimg = np.zeros(np.r_[bsize, imshp[0], padimg_shp])
-            padimg[
-                :, :, kshp[0] - 1 : -kshp[0] + 1, kshp[1] - 1 : -kshp[1] + 1
-            ] = imgval
-
-        outshp = np.hstack(
-            (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))
-        )
-
-        time1 = time.perf_counter()
-        outval = np.zeros(np.r_[bsize, outshp])
-        if validate:
-            # causes an atexit problem
-
-            try:
-                from scipy.signal.signaltools import _bvalfromboundary, _valfrommode
-                from scipy.signal.sigtools import _convolve2d
-            except ImportError:
-                from scipy.signal._signaltools import _bvalfromboundary, _valfrommode
-                from scipy.signal._sigtools import _convolve2d
-
-            val = _valfrommode(conv_mode)
-            bval = _bvalfromboundary("fill")
-            for b in range(bsize):  # loop over batches
-                for n in range(nkern):  # loop over filters
-                    for i in range(imshp[0]):  # loop over input feature maps
-                        outval[b, n, ...] += _convolve2d(
-                            imgval[b, i, ...], w_flip[n, i, ...], 1, val, bval, 0
-                        )[0 :: ss[0], 0 :: ss[1]]
-            ntot += time.perf_counter() - time1
-
-        # ConvOp
-        if unroll_patch and not unroll_patch_size:
-            conv_op = ConvOp(
-                dx=ss[0],
-                dy=ss[1],
-                output_mode=conv_mode,
-                unroll_patch=unroll_patch,
-                verbose=verbose,
-            )(inputs4, kerns4)
-        else:
-            conv_op = ConvOp(
-                imshp,
-                kshp,
-                nkern,
-                bsize,
-                ss[0],
-                ss[1],
-                conv_mode,
-                unroll_batch=unroll_batch,
-                unroll_kern=unroll_kern,
-                unroll_patch=unroll_patch,
-                verbose=verbose,
-            )(inputs4, kerns4)
-        # l1shp = np.hstack((nkern,
-        #                ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode)))
-        propup2 = function([inputs4, kerns4], conv_op)
-        propup3 = function([inputs4, kerns4], conv_op, mode=Mode(linker="py"))
-
-        time1 = time.perf_counter()
-        for i in range(repeat):
-            hidval2_ = propup2(imgval, w_flip)
-        hidval2 = hidval2_  # [:,:,0::ss[0],0::ss[1]]
-        tctot += time.perf_counter() - time1
-
-        if conv_op_py:
-            time1 = time.perf_counter()
-            for i in range(repeat):
-                hidval3_ = propup3(imgval, w_flip)
-            hidval3 = hidval3_  # [:,:,0::ss[0],0::ss[1]]
-            tpytot += time.perf_counter() - time1
-            assert (np.abs(hidval2 - hidval3) < 1e-5).all()
-        else:
-            tpytot += 0
-
-        if validate:
-            temp = np.abs(outval - hidval2)
-            assert (temp < 1e-5).all()
-        if validate and conv_op_py:
-            temp = np.abs(outval - hidval3)
-            assert (temp < 1e-5).all()
-
-        imshp = tuple(outshp)
-        imgval = outval.reshape(bsize, outshp[0], outshp[1], outshp[2])
-
-    return tctot, tpytot, ntot
-
-
-def exec_multilayer_conv_nnet(
-    conv_mode,
-    ss,
-    bsize,
-    imshp,
-    kshps,
-    nkerns,
-    unroll_batch=0,
-    unroll_kern=0,
-    img=None,
-    do_print=True,
-    repeat=1,
-    unroll_patch=False,
-    unroll_patch_size=False,
-    verbose=0,
-):
-    if img is None:
-        img = dmatrix()
-
-    # build actual input images
-    imgval = global_rng.random((bsize, imshp[0], imshp[1], imshp[2]))
-
-    a = dmatrix()
-    kerns = [a for i in nkerns]
-    inputs4 = dmatrix4()
-    kerns4 = dmatrix4()
-
-    # for each layer
-    ntot = 0
-    tctot = 0
-    tpytot = 0
-
-    for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns, range(len(nkerns))):
-        if do_print:
-            print("************* layer %i ***************" % n_layer)
-            print(conv_mode, ss, n_layer, kshp, nkern)
-
-        # actual values
-        w = global_rng.random(np.r_[nkern, imshp[0], kshp])
-        w_flip = flip(w, kshp).reshape(w.shape)
-
-        outshp = np.hstack(
-            (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))
-        )
-
-        time1 = time.perf_counter()
-        # outval = np.zeros(np.r_[bsize, outshp])
-
-        # ConvOp
-        if unroll_patch and not unroll_patch_size:
-            conv_op = ConvOp(
-                dx=ss[0],
-                dy=ss[1],
-                output_mode=conv_mode,
-                unroll_patch=unroll_patch,
-                verbose=verbose,
-            )(inputs4, kerns4)
-        else:
-            conv_op = ConvOp(
-                imshp,
-                kshp,
-                nkern,
-                bsize,
-                ss[0],
-                ss[1],
-                conv_mode,
-                unroll_batch=unroll_batch,
-                unroll_kern=unroll_kern,
-                unroll_patch=unroll_patch,
-                verbose=verbose,
-            )(inputs4, kerns4)
-        # l1shp = np.hstack((nkern,
-        #                ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode)))
-        propup2 = function([inputs4, kerns4], conv_op)
-
-        time1 = time.perf_counter()
-        for i in range(repeat):
-            propup2(imgval, w_flip)
-        tctot += time.perf_counter() - time1
-
-        imshp = tuple(outshp)
-        # imgval = outval.reshape(bsize, outshp[0], outshp[1], outshp[2])
-
-    return tctot, tpytot, ntot
-
-
-def speed_multilayer_conv():
-    # calculate the speed up of different combination of unroll
-    # put the parameter to the same you will try.
-    # validate = False  # we don't validate the result to have it much faster!
-    repeat = 3
-    verbose = 1
-    unroll_batch = [1, 2, 3, 4, 5, 6, 10]  # 15, 30, 60 always much slower
-    unroll_kern = [1, 2, 3, 4, 5, 6, 10]  # 15, 30, 60 always much slower
-    # unroll_batch = [1,4,5]
-    # unroll_kern = [1,4,5]
-    # unroll_batch = [1,4]
-    # unroll_kern = [1,4]
-    # unroll_patch = [True, False]
-    bsize = 60  # batch size
-    imshp_start = (1, 48, 48)  # un square shape to test more corner case.
-    kshps = ([11, 12],)  # un square shape to test more corner case.
-    nkerns = [60]  # per output pixel
-    ssizes = [
-        (1, 1),
-    ]  # (1,1)]#(2,2) bugged
-    convmodes = ["valid", "full"]
-    # do_convolve2 = False
-    a = dmatrix()
-    kerns = [a for i in nkerns]
-
-    assert len(kshps) == len(nkerns) == len(kerns)
-    timing = np.zeros(
-        (len(unroll_batch), len(unroll_kern), 3, len(convmodes) * len(ssizes))
-    )
-    t_b_k = []
-    # calculate the timing with unrolling
-
-    print("time unroll batch kern")
-    best = []
-    worst = []
-    t_ = []
-    for unroll_b, n_b in zip(unroll_batch, range(len(unroll_batch))):
-        for unroll_k, n_k in zip(unroll_kern, range(len(unroll_kern))):
-            t_b_k.append(str(unroll_b) + "/" + str(unroll_k))
-            if not t_:
-                tctot, tpytot, ntot = [], [], []
-                for conv_mode, n_mode in zip(convmodes, range(len(convmodes))):
-                    for ss, n_ss in zip(ssizes, range(len(ssizes))):
-                        # tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=unroll_b, unroll_kern=unroll_k, validate=validate, verbose=verbose,do_print=False)
-                        tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(
-                            conv_mode,
-                            ss,
-                            bsize,
-                            imshp_start,
-                            kshps,
-                            nkerns,
-                            unroll_batch=unroll_b,
-                            unroll_kern=unroll_k,
-                            verbose=verbose,
-                            do_print=False,
-                            repeat=repeat,
-                        )
-                        tctot += [tctot_]
-                        tpytot += [tpytot_]
-                        ntot += [ntot_]
-                if unroll_b == 4 and unroll_k == 4:
-                    # print "unroll 4/4",tctot
-                    best = tctot
-                if unroll_b == 1 and unroll_k == 1:
-                    # print "unroll 1/1",tctot
-                    worst = tctot
-                timing[n_b, n_k] = [
-                    tctot,
-                    tpytot,
-                    ntot,
-                ]  # [sum(tctot), sum(tpytot), sum(ntot)]
-    if not t_:
-        t = timing[:, :, 0, :]  # We select only the c timing.
-    else:
-        t = t_
-    t = np.asarray(t)
-    # calculate the old timing
-    print("time old version")
-    tctot, tpytot, ntot = [], [], []
-    tctot_ = []
-    if not tctot_:
-        for conv_mode, n_mode in zip(convmodes, range(len(convmodes))):
-            for ss, n_ss in zip(ssizes, range(len(ssizes))):
-                # tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate, verbose=verbose,do_print=False)
-                tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(
-                    conv_mode,
-                    ss,
-                    bsize,
-                    imshp_start,
-                    kshps,
-                    nkerns,
-                    unroll_batch=0,
-                    unroll_kern=0,
-                    verbose=verbose,
-                    do_print=False,
-                    repeat=repeat,
-                )
-                tctot += [tctot_]
-                tpytot += [tpytot_]
-                ntot += [ntot_]
-    else:
-        tctot = np.asarray(tctot_)
-    print(f"old code timing {sum(tctot):.3f}s", tctot)
-    best = np.asarray(best)
-    worst = np.asarray(worst)
-    print("timing for unrolled version")
-    print("unroll_batch/unroll_kern valid_mode full_mode")
-    for n_b in range(len(unroll_batch)):
-        for n_k in range(len(unroll_kern)):
-            print((unroll_batch[n_b], unroll_kern[n_k]) + tuple(t[n_b, n_k]), ",")
-    # t_detail = t
-    t = t.sum(axis=2)
-    print(
-        f"max {t.max():.3f}s",
-        "max param(batch unloop size/kernel unloop size)",
-        t_b_k[t.argmax()],
-    )
-    print(
-        f"min {t.min():.3f}s",
-        "min param(batch unloop size/kernel unloop size)",
-        t_b_k[t.argmin()],
-    )
-    print(
-        f"speedup vs (1/1){t.max() / t.min():.3f}x, vs old {sum(tctot) / t.min():.3f}x"
-    )
-    print(worst / best, tctot / best)
-
-    # calculate the timing of unroll_patch
-    print("time unroll_patch")
-    tctot_patch = []
-    tctot_patch_size = []
-    for conv_mode, n_mode in zip(convmodes, range(len(convmodes))):
-        for ss, n_ss in zip(ssizes, range(len(ssizes))):
-            # tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate,unroll_patch=True,verbose=verbose,do_print=False)
-            tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(
-                conv_mode,
-                ss,
-                bsize,
-                imshp_start,
-                kshps,
-                nkerns,
-                unroll_batch=0,
-                unroll_kern=0,
-                unroll_patch=True,
-                verbose=verbose,
-                do_print=False,
-                repeat=repeat,
-            )
-            tctot_patch += [tctot_]
-            # tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate,unroll_patch=True,verbose=verbose,do_print=False,unroll_patch_size=True)
-            tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(
-                conv_mode,
-                ss,
-                bsize,
-                imshp_start,
-                kshps,
-                nkerns,
-                unroll_batch=0,
-                unroll_kern=0,
-                unroll_patch=True,
-                verbose=verbose,
-                do_print=False,
-                unroll_patch_size=True,
-                repeat=repeat,
-            )
-            tctot_patch_size += [tctot_]
-
-    t_patch = sum(tctot_patch)
-    print("unroll_patch without shape time", tctot_patch)
-    print(
-        f"speedup vs (1/1){t.max() / t_patch:.3f}x, vs old {sum(tctot) / t_patch:.3f}x"
-    )
-    print(best / tctot_patch, worst / tctot_patch)
-    t_patch_size = sum(tctot_patch_size)
-    print("unroll_patch with shape time", tctot_patch_size)
-    print(
-        "speedup vs (1/1)%.3fx, vs old %.3fx"
-        % (t.max() / t_patch_size, sum(tctot) / t_patch_size)
-    )
-    print(best / tctot_patch_size, worst / tctot_patch_size)
-    return
-
-
-if __name__ == "__main__":
-    speed_multilayer_conv()
diff --git a/tests/tensor/nnet/test_basic.py b/tests/tensor/nnet/test_basic.py
deleted file mode 100644
index c6cc9c16eb..0000000000
--- a/tests/tensor/nnet/test_basic.py
+++ /dev/null
@@ -1,1222 +0,0 @@
-from contextlib import ExitStack as does_not_raise
-
-import numpy as np
-import pytest
-import scipy.special as sp
-
-import pytensor
-import pytensor.tensor as at
-from pytensor.compile.mode import OPT_FAST_RUN, optdb
-from pytensor.configdefaults import config
-from pytensor.gradient import grad
-from pytensor.graph.fg import FunctionGraph
-from pytensor.graph.rewriting.basic import check_stack_trace
-from pytensor.tensor.elemwise import CAReduce, DimShuffle, Elemwise
-from pytensor.tensor.math import (
-    Argmax,
-    add,
-    argmax,
-    dot,
-    exp,
-    log,
-    max_and_argmax,
-    mean,
-    sigmoid,
-)
-from pytensor.tensor.math import sum as at_sum
-from pytensor.tensor.math import tanh
-from pytensor.tensor.nnet.basic import (
-    CrossentropyCategorical1Hot,
-    CrossentropyCategorical1HotGrad,
-    CrossentropySoftmax1HotWithBiasDx,
-    CrossentropySoftmaxArgmax1HotWithBias,
-    Prepend_scalar_constant_to_each_row,
-    Prepend_scalar_to_each_row,
-    Softmax,
-    SoftmaxGrad,
-    SoftmaxWithBias,
-    binary_crossentropy,
-    categorical_crossentropy,
-    confusion_matrix,
-    crossentropy_categorical_1hot,
-    crossentropy_softmax_1hot,
-    crossentropy_softmax_1hot_with_bias,
-    crossentropy_softmax_1hot_with_bias_dx,
-    crossentropy_softmax_argmax_1hot_with_bias,
-    elu,
-    h_softmax,
-    relu,
-    selu,
-    sigmoid_binary_crossentropy,
-    softmax,
-    softmax_grad_legacy,
-    softmax_legacy,
-    softmax_with_bias,
-    softsign,
-)
-from pytensor.tensor.shape import shape_padleft
-from pytensor.tensor.subtensor import AdvancedSubtensor
-from pytensor.tensor.type import (
-    dmatrix,
-    dvector,
-    fmatrix,
-    fvector,
-    ivector,
-    lvector,
-    matrix,
-    scalar,
-    tensor3,
-    tensor4,
-    vector,
-    vectors,
-)
-from tests import unittest_tools as utt
-from tests.tensor.utils import (
-    _good_broadcast_unary_normal_float_no_complex,
-    check_floatX,
-    makeBroadcastTester,
-    upcast_int8_nfunc,
-)
-
-
-def softmax_graph(c):
-    return exp(c) / exp(c).sum(axis=-1, keepdims=True)
-
-
-def valid_axis_tester(Op):
-    with pytest.raises(TypeError):
-        Op(1.5)
-
-    x = [tensor3()] * Op.nin
-    with does_not_raise():
-        Op(2)(*x)
-
-    with pytest.raises(ValueError):
-        Op(3)(*x)
-
-    with does_not_raise():
-        Op(-3)(*x)
-
-    with pytest.raises(ValueError):
-        Op(-4)(*x)
-
-
-class TestSoftmaxWithBias(utt.InferShapeTester):
-    def test_basic(self):
-        def f(a, b):
-            return softmax_with_bias(a, b)[:, 0]
-
-        rng = np.random.default_rng(utt.fetch_seed())
-
-        utt.verify_grad(f, [rng.random((3, 4)), rng.random(4)])
-
-        def f(a, b):
-            return softmax_with_bias(a, b)[:, 1]
-
-        utt.verify_grad(f, [rng.random((3, 4)), rng.random(4)])
-
-        def f(a, b):
-            return softmax_with_bias(a, b)[:, 2]
-
-        utt.verify_grad(f, [rng.random((3, 4)), rng.random(4)])
-
-        def f(a, b):
-            return softmax_with_bias(a, b)[:, 3]
-
-        utt.verify_grad(f, [rng.random((3, 4)), rng.random(4)])
-
-    def test_broadcast(self):
-        """
-        Test that we don't raise an error during rewriting for no good reason
-        as `softmax_with_bias` don't support correctly some/all broadcasted
-        inputs pattern.
-        """
-        initial_W = np.asarray(
-            [[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.1, 0.1, 0.1]],
-            dtype=config.floatX,
-        )
-        W = pytensor.shared(value=initial_W, name="W")
-        vbias = pytensor.shared(value=0.1, name="vbias")  # 0.01
-        hid = vector("hid")
-        f = pytensor.function([hid], softmax_legacy(dot(hid, W.T) + vbias))
-        ops = [node.op for node in f.maker.fgraph.toposort()]
-        assert softmax_with_bias not in ops
-        assert softmax_legacy in ops
-
-        f([0, 1, 0])
-        # print f.maker.fgraph.toposort()
-
-    def test_softmax_with_bias_trace(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        a = pytensor.shared(rng.standard_normal((3,)).astype(config.floatX))
-        b = pytensor.shared(np.float32(rng.standard_normal()))
-        sm = softmax(a + b)
-        f = pytensor.function([], sm)
-        assert check_stack_trace(f, ops_to_check="last")
-
-    def test_infer_shape(self):
-        admat = matrix()
-        advec = vector()
-        rng = np.random.default_rng(utt.fetch_seed())
-        admat_val = rng.random((3, 4)).astype(config.floatX)
-        advec_val = rng.random(4).astype(config.floatX)
-        self._compile_and_check(
-            [admat, advec],
-            [SoftmaxWithBias()(admat, advec)],
-            [admat_val, advec_val],
-            SoftmaxWithBias,
-        )
-
-
-class TestCrossEntropySoftmax1Hot:
-    def test_basic(self):
-        y_idx = [0, 1, 3]
-
-        def f(a, b):
-            return crossentropy_softmax_1hot_with_bias(a, b, y_idx)[0]
-
-        rng = np.random.default_rng(utt.fetch_seed())
-
-        utt.verify_grad(f, [rng.random((3, 4)), rng.random(4)])
-
-        y_idx = [0, 1, 3]
-
-        def f(a):
-            return crossentropy_softmax_1hot(a, y_idx)[0]
-
-        utt.verify_grad(f, [rng.random((3, 4))])
-
-    def test_vector(self):
-        y_idx = [3]
-
-        def f(a):
-            return crossentropy_softmax_1hot(shape_padleft(a), y_idx)[0]
-
-        rng = np.random.default_rng(utt.fetch_seed())
-        utt.verify_grad(f, [rng.random((4,))])
-
-    def test_vectors(self):
-        y_idx = [3]
-
-        def f(a, b):
-            return crossentropy_softmax_1hot(shape_padleft(a) + b, y_idx)[0]
-
-        rng = np.random.default_rng(utt.fetch_seed())
-        utt.verify_grad(f, [rng.random((4,)), rng.random(4)])
-
-
-class TestCrossEntropySoftmax1HotWithBiasDx(utt.InferShapeTester):
-    def test_basic(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-
-        def ff(class_dtype):
-            def f(sm):
-                # Class indices
-                y = rng.integers(low=0, high=5, size=10).astype(class_dtype)
-                return crossentropy_softmax_1hot_with_bias_dx(
-                    rng.random(10),
-                    sm,
-                    y,  # Gradient w.r.t. NLL.  # Softmax output.
-                )
-
-            return f
-
-        # Build a random softmax output whose rows sum to 1.
-        softmax_output = rng.random((10, 5))
-        softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
-        for dtype in ["uint8", "int8", "uint64", "int64"]:
-            utt.verify_grad(ff(dtype), [softmax_output])
-
-    def test_basic_2(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        softmax_output = rng.random((10, 5))
-        softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
-
-        def f(dy):
-            return crossentropy_softmax_1hot_with_bias_dx(
-                dy, softmax_output, rng.integers(low=0, high=5, size=10)
-            )
-
-        utt.verify_grad(f, [rng.random(10)])
-
-    def test_infer_shape(self):
-        admat = matrix()
-        advec = vector()
-        alvec = lvector()
-        rng = np.random.default_rng(utt.fetch_seed())
-        admat_val = rng.random((10, 5)).astype(config.floatX)
-        admat_val /= admat_val.sum(axis=1).reshape(10, 1)
-        advec_val = rng.random(10).astype(config.floatX)
-        alvec_val = rng.integers(low=0, high=5, size=10)
-        self._compile_and_check(
-            [advec, admat, alvec],
-            [CrossentropySoftmax1HotWithBiasDx()(advec, admat, alvec)],
-            [advec_val, admat_val, alvec_val],
-            CrossentropySoftmax1HotWithBiasDx,
-        )
-
-    def test_neg_idx(self):
-        admat = matrix()
-        advec = vector()
-        alvec = lvector()
-        rng = np.random.default_rng(utt.fetch_seed())
-        admat_val = rng.random((10, 5)).astype(config.floatX)
-        admat_val /= admat_val.sum(axis=1).reshape(10, 1)
-        advec_val = rng.random(10).astype(config.floatX)
-        alvec_val = rng.integers(low=0, high=5, size=10)
-        alvec_val[1] = -1
-        out = CrossentropySoftmax1HotWithBiasDx()(advec, admat, alvec)
-        f = pytensor.function([advec, admat, alvec], out)
-        with pytest.raises(ValueError):
-            f(advec_val, admat_val, alvec_val)
-
-
-class TestCrossEntropySoftmaxArgmax1HotWithBias(utt.InferShapeTester):
-    def setup_method(self):
-        self.op = crossentropy_softmax_argmax_1hot_with_bias
-        super().setup_method()
-
-    def test_grads(self):
-        n_classes = 5
-        n_samples = 3
-
-        rng = np.random.default_rng(utt.fetch_seed())
-
-        # First test gradient when getting a gradient on the NLL output.
-        def grad_on_nll_dtype(dtype):
-            def grad_on_nll(x, b):
-                y_idx = rng.integers(low=0, high=n_classes, size=n_samples).astype(
-                    dtype
-                )
-                return self.op(x, b, y_idx=y_idx)[0]
-
-            return grad_on_nll
-
-        for dtype in ["uint8", "int8", "uint64", "int64"]:
-            utt.verify_grad(
-                grad_on_nll_dtype(dtype),
-                [
-                    rng.random((n_samples, n_classes)),
-                    rng.random(n_classes),
-                ],
-            )
-
-        # Then test gradient when getting a gradient on the softmax output.
-        def grad_on_softmax(x, b):
-            return self.op(
-                x,
-                b,
-                y_idx=rng.integers(low=0, high=n_classes, size=n_samples),
-            )[1]
-
-        utt.verify_grad(
-            grad_on_softmax,
-            [rng.random((n_samples, n_classes)), rng.random(n_classes)],
-        )
-
-    def test_infer_shape(self):
-        admat = matrix()
-        advec = vector()
-        alvec = lvector()
-        rng = np.random.default_rng(utt.fetch_seed())
-        admat_val = rng.random((3, 5)).astype(config.floatX)
-        advec_val = rng.random(5).astype(config.floatX)
-        alvec_val = rng.integers(low=0, high=5, size=3)
-        self._compile_and_check(
-            [admat, advec, alvec],
-            CrossentropySoftmaxArgmax1HotWithBias()(admat, advec, alvec),
-            [admat_val, advec_val, alvec_val],
-            CrossentropySoftmaxArgmax1HotWithBias,
-        )
-
-    def test_neg_idx(self):
-        admat = matrix()
-        advec = vector()
-        alvec = lvector()
-        rng = np.random.default_rng(utt.fetch_seed())
-        admat_val = rng.random((3, 5)).astype(config.floatX)
-        advec_val = rng.random(5).astype(config.floatX)
-        alvec_val = rng.integers(low=0, high=5, size=3)
-        alvec_val[1] = -1
-        out = CrossentropySoftmaxArgmax1HotWithBias()(admat, advec, alvec)
-        f = pytensor.function([admat, advec, alvec], out)
-        with pytest.raises(ValueError):
-            f(admat_val, advec_val, alvec_val)
-
-
-class TestPrepend(utt.InferShapeTester):
-    def test_prepend_constant(self):
-        x = matrix("x")
-        y = Prepend_scalar_constant_to_each_row(4.0)(x)
-        f = pytensor.function([x], y)
-        rng = np.random.default_rng(utt.fetch_seed())
-        m = rng.random((3, 5)).astype(config.floatX)
-        my = f(m)
-        assert my.shape == (3, 6)
-        assert np.all(my[:, 0] == 4.0)
-
-    def test_prepend_basic(self):
-        """Test basic functionality."""
-        x = matrix("x")
-        y = Prepend_scalar_to_each_row()(5.0, x)
-        f = pytensor.function([x], y)
-        m = np.ones((3, 5), dtype="float32")
-        my = f(m)
-        assert my.shape == (3, 6)
-        assert np.all(my[:, 0] == 5.0)
-
-    def test_infer_shape(self):
-        admat = matrix()
-        adscal = scalar()
-        rng = np.random.default_rng(utt.fetch_seed())
-        admat_val = rng.random((3, 5)).astype(config.floatX)
-        adscal_val = np.asarray(rng.random(), dtype=config.floatX).item()
-        self._compile_and_check(
-            [admat],
-            [Prepend_scalar_constant_to_each_row(adscal_val)(admat)],
-            [admat_val],
-            Prepend_scalar_constant_to_each_row,
-        )
-
-        self._compile_and_check(
-            [adscal, admat],
-            [Prepend_scalar_to_each_row()(adscal, admat)],
-            [adscal_val, admat_val],
-            Prepend_scalar_to_each_row,
-        )
-
-
-class TestCrossEntropyCategorical1HotGrad(utt.InferShapeTester):
-    def test_infer_shape(self):
-        advec = vector()
-        admat = matrix()
-        alvec = lvector()
-        rng = np.random.default_rng(utt.fetch_seed())
-        advec_val = rng.random(3).astype(config.floatX)
-        admat_val = rng.random((3, 2)).astype(config.floatX)
-        alvec_val = [0, 1, 0]
-        self._compile_and_check(
-            [advec, admat, alvec],
-            [CrossentropyCategorical1HotGrad()(advec, admat, alvec)],
-            [advec_val, admat_val, alvec_val],
-            CrossentropyCategorical1HotGrad,
-        )
-
-
-class TestCrossEntropyCategorical1Hot(utt.InferShapeTester):
-    def test_input_validation(self):
-        with pytest.raises(TypeError, match="Matrix.*"):
-            crossentropy_categorical_1hot(vector(), lvector())
-
-        with pytest.raises(TypeError, match="Integer.*"):
-            crossentropy_categorical_1hot(matrix(), vector())
-
-    def test_grad(self):
-        x = matrix("x")
-        one_of_n = lvector("one_of_n")
-        op = crossentropy_categorical_1hot
-        xe = op(x, one_of_n)
-        f = pytensor.function([x, one_of_n], xe)
-        x_val = np.asarray([[0.4, 0.6, 0.0], [0.1, 0.8, 0.1]], dtype=config.floatX)
-        xe_val = f(x_val, [0, 1])
-        assert np.allclose(xe_val, -np.log([0.4, 0.8]))
-
-        def oplike(x):
-            return op(x, [0, 1])
-
-        rng = np.random.default_rng(utt.fetch_seed())
-        utt.verify_grad(oplike, [x_val], rng=rng)
-
-    def test_infer_shape(self):
-        admat = matrix()
-        alvec = lvector()
-        rng = np.random.default_rng(utt.fetch_seed())
-        admat_val = rng.random((3, 2)).astype(config.floatX)
-        alvec_val = [0, 1, 0]
-        self._compile_and_check(
-            [admat, alvec],
-            [CrossentropyCategorical1Hot()(admat, alvec)],
-            [admat_val, alvec_val],
-            CrossentropyCategorical1Hot,
-        )
-
-    def test_softmax_rewrites(self):
-        x = matrix("x")
-        one_of_n = lvector("one_of_n")
-        op = crossentropy_categorical_1hot
-        # xe = op(x, one_of_n)
-
-        fgraph = FunctionGraph([x, one_of_n], [op(softmax_legacy(x), one_of_n)])
-        assert fgraph.outputs[0].owner.op == op
-
-        optdb.query(OPT_FAST_RUN).rewrite(fgraph)
-        assert fgraph.outputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
-
-    def test_softmax_rewrites_w_bias(self):
-        x = matrix("x")
-        b = vector("b")
-        one_of_n = lvector("one_of_n")
-        op = crossentropy_categorical_1hot
-
-        fgraph = FunctionGraph([x, b, one_of_n], [op(softmax_legacy(x + b), one_of_n)])
-        assert fgraph.outputs[0].owner.op == op
-
-        optdb.query(OPT_FAST_RUN).rewrite(fgraph)
-
-        assert len(fgraph.toposort()) == 1
-        assert fgraph.outputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
-
-    def test_softmax_rewrites_w_bias2(self):
-        x = matrix("x")
-        b = vector("b")
-        c = vector("c")
-        one_of_n = lvector("one_of_n")
-        op = crossentropy_categorical_1hot
-
-        fgraph = FunctionGraph(
-            [x, b, c, one_of_n], [op(softmax_legacy(add(x, b, c)), one_of_n)]
-        )
-        assert fgraph.outputs[0].owner.op == op
-
-        optdb.query(OPT_FAST_RUN).rewrite(fgraph)
-
-        assert len(fgraph.toposort()) == 2
-        assert fgraph.outputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
-
-    def test_softmax_grad_rewrites(self):
-        x = matrix("x")
-        one_of_n = lvector("one_of_n")
-        op = crossentropy_categorical_1hot
-        xe = op(softmax_legacy(x), one_of_n)
-        sum_xe = at_sum(xe)
-        g_x = grad(sum_xe, x)
-        fgraph = FunctionGraph([x, one_of_n], [g_x])
-        assert check_stack_trace(
-            fgraph,
-            ops_to_check=[crossentropy_softmax_1hot_with_bias_dx, softmax_legacy],
-        )
-
-        optdb.query(OPT_FAST_RUN).rewrite(fgraph)
-
-        ops = {node.op for node in fgraph.toposort()}
-        assert crossentropy_softmax_argmax_1hot_with_bias not in ops
-        assert crossentropy_softmax_1hot_with_bias_dx in ops
-        assert softmax_legacy in ops
-        assert softmax_grad_legacy not in ops
-
-    def test_get_rid_of_advanced_indexing_version_of_xent(self):
-        x = matrix("x")
-        b = vector("b")
-        y = lvector("y")
-
-        # Basic case
-        expressions = [
-            at_sum(-log(softmax(x)[at.arange(y.shape[0]), y])),
-            -at_sum(log(softmax(x)[at.arange(y.shape[0]), y])),
-            -at_sum(log(softmax(x))[at.arange(y.shape[0]), y]),
-            at_sum(-log(softmax(x))[at.arange(y.shape[0]), y]),
-        ]
-        for expr in expressions:
-
-            fgraph = FunctionGraph([x, y], [expr])
-            optdb.query(OPT_FAST_RUN).rewrite(fgraph)
-
-            ops = [node.op for node in fgraph.toposort()]
-            assert len(ops) == 4
-            assert crossentropy_softmax_argmax_1hot_with_bias in ops
-            assert not [1 for o in ops if isinstance(o, AdvancedSubtensor)]
-
-            # Also verify the gradient wrt x
-            fgraph = FunctionGraph([x, y], [grad(expr, x)])
-            optdb.query(OPT_FAST_RUN).rewrite(fgraph)
-
-            ops = [node.op for node in fgraph.toposort()]
-            assert len(ops) == 2
-            assert crossentropy_softmax_1hot_with_bias_dx in ops
-            assert softmax_legacy in ops
-            assert softmax_grad_legacy not in ops
-
-        # Test that a biased softmax is rewritten correctly
-        bias_expressions = [
-            at_sum(-log(softmax(x + b)[at.arange(y.shape[0]), y])),
-            -at_sum(log(softmax(b + x)[at.arange(y.shape[0]), y])),
-            -at_sum(log(softmax(x + b))[at.arange(y.shape[0]), y]),
-            at_sum(-log(softmax(b + x))[at.arange(y.shape[0]), y]),
-        ]
-
-        for expr in bias_expressions:
-            fgraph = FunctionGraph([x, b, y], [expr, x])
-            optdb.query(OPT_FAST_RUN).rewrite(fgraph)
-
-            ops = [node.op for node in fgraph.toposort()]
-            assert len(ops) == 2  # [big_op, sum]
-            assert crossentropy_softmax_argmax_1hot_with_bias in ops
-
-            fgraph = FunctionGraph([x, b, y], [grad(expr, x)])
-            optdb.query(OPT_FAST_RUN).rewrite(fgraph)
-
-            ops = [node.op for node in fgraph.toposort()]
-            assert len(ops) == 2
-            assert crossentropy_softmax_1hot_with_bias_dx in ops
-            assert softmax_with_bias in ops
-            assert softmax_grad_legacy not in ops
-
-        # Test that using "mean" instead of sum works, too
-        mean_expressions = [
-            mean(-log(softmax(x)[at.arange(y.shape[0]), y])),
-            -mean(log(softmax(x)[at.arange(y.shape[0]), y])),
-            -mean(log(softmax(x))[at.arange(y.shape[0]), y]),
-            mean(-log(softmax(x))[at.arange(y.shape[0]), y]),
-        ]
-
-        for expr in mean_expressions:
-
-            fgraph = FunctionGraph([x, y], [expr])
-            optdb.query(OPT_FAST_RUN).rewrite(fgraph)
-
-            ops = [node.op for node in fgraph.toposort()]
-            assert len(ops) == 6
-            assert crossentropy_softmax_argmax_1hot_with_bias in ops
-            assert not [1 for o in ops if isinstance(o, AdvancedSubtensor)]
-
-            fgraph = FunctionGraph([x, y], [grad(expr, x)])
-            optdb.query(OPT_FAST_RUN).rewrite(fgraph)
-
-            ops = [node.op for node in fgraph.toposort()]
-            assert len(ops) == 5
-            # there's an extra dimshuffle in there
-            # but I can't think of a good rule to get rid of it
-            assert crossentropy_softmax_1hot_with_bias_dx in ops
-            assert softmax_legacy in ops
-            assert softmax_grad_legacy not in ops
-
-        mean_bias_expressions = [
-            mean(-log(softmax(x + b)[at.arange(y.shape[0]), y])),
-            -mean(log(softmax(b + x)[at.arange(y.shape[0]), y])),
-            -mean(log(softmax(x + b))[at.arange(y.shape[0]), y]),
-            mean(-log(softmax(b + x))[at.arange(y.shape[0]), y]),
-        ]
-
-        for expr in mean_bias_expressions:
-
-            fgraph = FunctionGraph([x, b, y], [expr])
-            optdb.query(OPT_FAST_RUN).rewrite(fgraph)
-
-            ops = [node.op for node in fgraph.toposort()]
-            assert len(ops) == 4
-            assert crossentropy_softmax_argmax_1hot_with_bias in ops
-            assert not [1 for o in ops if isinstance(o, AdvancedSubtensor)]
-
-            fgraph = FunctionGraph([x, b, y], [grad(expr, x)])
-            optdb.query(OPT_FAST_RUN).rewrite(fgraph)
-
-            ops = [node.op for node in fgraph.toposort()]
-            assert len(ops) == 5
-            assert crossentropy_softmax_1hot_with_bias_dx in ops
-            assert softmax_with_bias in ops
-            assert softmax_grad_legacy not in ops
-
-    def test_xent_thing_int32(self):
-        x = matrix("x")
-        y = lvector("y")
-        yi = at.cast(y, "int32")
-        expressions = [
-            at_sum(-log(softmax(x)[at.arange(yi.shape[0]), yi])),
-            -at_sum(log(softmax(x)[at.arange(yi.shape[0]), yi])),
-            -at_sum(log(softmax(x))[at.arange(yi.shape[0]), yi]),
-            at_sum(-log(softmax(x))[at.arange(yi.shape[0]), yi]),
-        ]
-
-        for expr in expressions:
-            fgraph = FunctionGraph([x, y], [expr])
-            optdb.query(OPT_FAST_RUN).rewrite(fgraph)
-
-            ops = [node.op for node in fgraph.toposort()]
-            assert len(ops) == 5
-            assert crossentropy_softmax_argmax_1hot_with_bias in ops
-            assert not [1 for o in ops if isinstance(o, AdvancedSubtensor)]
-
-            # Also verify the gradient wrt x
-            fgraph = FunctionGraph([x, y], [grad(expr, x)])
-            optdb.query(OPT_FAST_RUN).rewrite(fgraph)
-
-            ops = [node.op for node in fgraph.toposort()]
-            assert len(ops) == 3
-            assert crossentropy_softmax_1hot_with_bias_dx in ops
-            assert softmax_legacy in ops
-            assert softmax_grad_legacy not in ops
-
-    def test_crossentropy_softmax_1hot_with_bias_dxcale_cost(self):
-        x = matrix("x")
-        y = lvector("y")
-        a = scalar("a")
-
-        def validate_grad_graph(func):
-            # The graph of the gradient should not have softmaxgrad anymore
-            has_cx1hotdx = False
-            has_softmax = False
-            has_softmaxdx = False
-            for node in func.maker.fgraph.toposort():
-                if node.op == crossentropy_softmax_1hot_with_bias_dx:
-                    has_cx1hotdx = True
-                if node.op == softmax_legacy:
-                    has_softmax = True
-                if node.op == softmax_grad_legacy:
-                    has_softmaxdx = True
-
-            assert has_cx1hotdx
-            assert has_softmax
-            assert not has_softmaxdx
-
-        # Cases to test
-        expressions = [
-            a * at_sum(-log(softmax(x)[at.arange(y.shape[0]), y])),
-            -a * at_sum(log(softmax(x)[at.arange(y.shape[0]), y])),
-            a * (-at_sum(log(softmax(x)[at.arange(y.shape[0]), y]))),
-            a * at_sum(log(softmax(x)[at.arange(y.shape[0]), y])),
-            a * at_sum(-log(softmax(x))[at.arange(y.shape[0]), y]),
-            -a * at_sum(log(softmax(x))[at.arange(y.shape[0]), y]),
-            a * (-at_sum(log(softmax(x))[at.arange(y.shape[0]), y])),
-            a * at_sum(log(softmax(x))[at.arange(y.shape[0]), y]),
-            a * mean(-log(softmax(x)[at.arange(y.shape[0]), y])),
-            -a * mean(log(softmax(x)[at.arange(y.shape[0]), y])),
-            a * (-mean(log(softmax(x)[at.arange(y.shape[0]), y]))),
-            a * mean(log(softmax(x)[at.arange(y.shape[0]), y])),
-            a * mean(-log(softmax(x))[at.arange(y.shape[0]), y]),
-            -a * mean(log(softmax(x))[at.arange(y.shape[0]), y]),
-            a * (-mean(log(softmax(x))[at.arange(y.shape[0]), y])),
-            a * mean(log(softmax(x))[at.arange(y.shape[0]), y]),
-        ]
-
-        for expr in expressions:
-            fgraph = FunctionGraph([x, y, a], [expr])
-            optdb.query(OPT_FAST_RUN).rewrite(fgraph)
-
-            assert 5 <= len(fgraph.toposort()) <= 10
-
-            ops = {node.op for node in fgraph.toposort()}
-            assert crossentropy_softmax_argmax_1hot_with_bias in ops
-            assert softmax_legacy not in ops
-
-            # Verify the gradient wrt x
-            fgraph = FunctionGraph([x, y, a], [grad(expr, x)])
-            optdb.query(OPT_FAST_RUN).rewrite(fgraph)
-
-            assert 3 <= len(fgraph.toposort()) <= 6
-
-            ops = {node.op for node in fgraph.toposort()}
-            assert crossentropy_softmax_1hot_with_bias_dx in ops
-            assert softmax_legacy in ops
-            assert softmax_grad_legacy not in ops
-
-            # Verify the gradient when providing output gradient
-            fgraph = FunctionGraph(
-                [x, y, a], [grad(expr, x, known_grads={expr: a * x.sum()})]
-            )
-            optdb.query(OPT_FAST_RUN).rewrite(fgraph)
-
-            assert 6 <= len(fgraph.toposort()) <= 8
-
-            ops = {node.op for node in fgraph.toposort()}
-            assert crossentropy_softmax_1hot_with_bias_dx in ops
-            assert softmax_legacy in ops
-            assert softmax_grad_legacy not in ops
-
-
-def test_argmax_pushdown():
-    x = matrix()
-    for sm in [softmax_graph, softmax_legacy]:
-        # test that the max_and_argmax is pushed down if the max is not used
-        out = max_and_argmax(sm(exp(tanh(sigmoid(x)))), axis=-1)[1]
-        fgraph = FunctionGraph([x], [out])
-        optdb.query(OPT_FAST_RUN).rewrite(fgraph)
-
-        # print 'AFTER'
-        # for node in fgraph.toposort():
-        # print node.op
-        assert len(fgraph.toposort()) == 1
-        assert isinstance(fgraph.toposort()[0].op, Argmax)
-        assert check_stack_trace(fgraph, ops_to_check=Argmax)
-        x = matrix()
-        # test that the max_and_argmax is not pushed down if the max is used
-        out = max_and_argmax(sm(exp(tanh(sigmoid(x)))), axis=-1)[0]
-        fgraph = FunctionGraph([x], [out])
-
-        assert hasattr(fgraph.outputs[0].tag, "trace")
-
-        optdb.query(OPT_FAST_RUN).rewrite(fgraph)
-
-        # print 'AFTER'
-        # for node in fgraph.toposort():
-        # print node.op
-        assert len(fgraph.toposort()) == 3
-        assert isinstance(fgraph.toposort()[0].op, Elemwise)
-        assert isinstance(fgraph.toposort()[1].op, Softmax)
-        assert isinstance(fgraph.toposort()[2].op, CAReduce)
-        assert isinstance(
-            fgraph.toposort()[2].op.scalar_op, pytensor.scalar.ScalarMaximum
-        )
-
-
-def test_argmax_pushdown_bias():
-    x = matrix()
-    b = vector()
-
-    out = argmax(softmax_with_bias(x, b), axis=-1)
-    fgraph = FunctionGraph([x, b], [out])
-
-    optdb.query(OPT_FAST_RUN).rewrite(fgraph)
-
-    types_to_check = (DimShuffle, Elemwise, Argmax)
-    assert len(fgraph.toposort()) == 3
-
-    for i, type in enumerate(types_to_check):
-        assert isinstance(fgraph.toposort()[i].op, type)
-    assert check_stack_trace(fgraph, ops_to_check=types_to_check)
-
-    x = matrix()
-    b = vector()
-    out = max_and_argmax(softmax_with_bias(x, b), axis=-1)[0]
-    fgraph = FunctionGraph([x, b], [out])
-
-    optdb.query(OPT_FAST_RUN).rewrite(fgraph)
-
-    assert len(fgraph.toposort()) == 2
-    assert isinstance(fgraph.toposort()[0].op, SoftmaxWithBias)
-    assert isinstance(fgraph.toposort()[1].op, CAReduce)
-    assert isinstance(fgraph.toposort()[1].op.scalar_op, pytensor.scalar.ScalarMaximum)
-    assert check_stack_trace(fgraph, ops_to_check=(SoftmaxWithBias, CAReduce))
-
-
-def test_asymptotic_32():
-    """Test that our functions behave sensibly when huge values are present."""
-
-    # TODO: consider adding the rewrite of crossentropy into the current
-    # mode for the purpose of running this test
-
-    for dtype in "float32", "float64":
-        if dtype == "float32":
-            x = fmatrix()
-            x2 = fvector()
-        else:
-            x = dmatrix()
-            x2 = dvector()
-        y = lvector()
-
-        c = categorical_crossentropy(softmax(x + x2), y)
-        f = pytensor.function([x, y, x2], [c.sum(), grad(c.sum(), x)], mode="FAST_RUN")
-
-        xval = np.zeros((5, 5), dtype=dtype).astype(dtype)
-        x2val = np.zeros(5, dtype=xval.dtype).astype(dtype)
-        for i in range(100):
-            cval, gxval = f(xval, np.arange(5), x2val)
-            xval -= 100.3 * gxval
-
-        assert cval == 0  # no problem going to zero error
-
-        # what about when x gets really big?
-
-        xval = np.zeros((5, 5), dtype=dtype)
-        x2val = np.zeros(5, dtype=xval.dtype)
-        for i in range(100):
-
-            cval, gxval = f(xval, np.arange(5), x2val)
-            xval += 100000.3 * gxval
-
-        assert cval > 61750000
-        assert gxval[0, 0] == -1.0
-        assert gxval[0, 1] == 0.25
-
-
-class TestSoftmaxRewrite:
-    """
-    Test that expressions of softmax in terms of exponentiated things
-    divided by row sums are replaced by softmax expressions.
-
-    `Softmax_grad` isn't that interesting as an Op, but it has the signature
-    we look for when trying to insert `CrossEntropySoftmax` grad.  So, for
-    now, we add `softmax_grad` to graphs. In the future, we may modify the
-    `CrossEntropySoftmax` grad to look for the more basic pattern.
-
-    """
-
-    def setup_method(self):
-        self.mode = pytensor.compile.mode.get_default_mode()
-        self.mode = self.mode.including("canonicalize")
-
-    @pytest.mark.parametrize("axis", [None, 0, 1, -1, (0, 1)])
-    def test_basic(self, axis):
-        c = matrix()
-        if axis is None:
-            p_y = exp(c) / exp(c).sum(axis=axis).dimshuffle("x", "x")
-        elif axis == 0:
-            p_y = exp(c) / exp(c).sum(axis=axis).dimshuffle("x", 0)
-        elif axis == (0, 1):
-            p_y = exp(c) / exp(c).sum(axis=axis).dimshuffle("x", "x")
-        else:
-            p_y = exp(c) / exp(c).sum(axis=axis).dimshuffle(0, "x")
-
-        # test that function contains softmax and no div.
-        f = pytensor.function([c], p_y, mode=self.mode)
-
-        assert check_stack_trace(f, ops_to_check=Softmax)
-
-        f_ops = [n.op for n in f.maker.fgraph.toposort()]
-
-        assert len(f_ops) == 1
-        assert isinstance(f_ops[0], Softmax)
-
-        rng = np.random.default_rng(utt.fetch_seed())
-        c_val = rng.random((3, 4)).astype(config.floatX)
-        assert np.allclose(f(c_val), sp.softmax(c_val, axis=axis))
-
-    @pytest.mark.parametrize("axis", [None, 0, 1, 2, -1, -2, -3, (0, 1, 2)])
-    def test_basic_keepdims(self, axis):
-        c = tensor3()
-        p_y = exp(c) / exp(c).sum(axis=axis, keepdims=True)
-
-        # test that function contains softmax and no div.
-        f = pytensor.function([c], p_y, mode=self.mode)
-
-        assert check_stack_trace(f, ops_to_check=Softmax)
-
-        f_ops = [n.op for n in f.maker.fgraph.toposort()]
-
-        assert len(f_ops) == 1
-        assert isinstance(f_ops[0], Softmax)
-
-        rng = np.random.default_rng(utt.fetch_seed())
-        c_val = rng.random((3, 4, 5)).astype(config.floatX)
-        assert np.allclose(f(c_val), sp.softmax(c_val, axis=axis))
-
-    @pytest.mark.skip(reason="Rewrite not enabled for the moment")
-    def test_grad(self):
-        c = matrix()
-        p_y = exp(c) / exp(c).sum(axis=1).dimshuffle(0, "x")
-
-        # test that function contains softmax and softmaxgrad
-        w = matrix()
-
-        g = pytensor.function([c, w], grad((p_y * w).sum(), c), mode=self.mode)
-
-        g_ops = [n.op for n in g.maker.fgraph.toposort()]
-
-        assert len(g_ops) == 2, g_ops
-        assert isinstance(g_ops[0], Softmax)
-        assert isinstance(g_ops[1], SoftmaxGrad)
-
-        rng = np.random.default_rng(utt.fetch_seed())
-        g(rng.random((3, 4)), rng.uniform(0.5, 1, (3, 4)))
-
-    def test_transpose_basic(self):
-        # this should be a transposed softmax
-        c = matrix()
-        p_y = exp(c) / exp(c).sum(axis=0)
-
-        # test that function contains softmax and no div.
-        f = pytensor.function([c], p_y, mode=self.mode)
-        f_ops = [n.op for n in f.maker.fgraph.toposort()]
-        assert len(f_ops) == 1
-        assert isinstance(f_ops[0], Softmax)
-
-    @pytest.mark.skip(reason="Rewrite not enabled for the moment")
-    def test_transpose_grad(self):
-        # this should be a transposed softmax
-        c = matrix()
-        p_y = exp(c) / exp(c).sum(axis=0)
-
-        # test that function contains softmax and no div.
-        g = pytensor.function([c], grad(p_y.sum(), c), mode=self.mode)
-        g_ops = [n.op for n in g.maker.fgraph.toposort()]
-        assert len(g_ops) == 2
-        assert isinstance(g_ops[0], Softmax)
-        assert isinstance(g_ops[1], SoftmaxGrad)
-
-    def test_1d_basic(self):
-        c = vector()
-        p_y = exp(c) / exp(c).sum()
-
-        # test that function contains softmax and no div.
-        f = pytensor.function([c], p_y, mode=self.mode)
-        f_ops = [n.op for n in f.maker.fgraph.toposort()]
-        assert len(f_ops) == 1
-        assert isinstance(f_ops[0], Softmax)
-
-    @pytest.mark.skip(reason="Rewrite not enabled for the moment")
-    def test_1D_grad(self):
-        c = vector()
-        p_y = exp(c) / exp(c).sum()
-
-        # test that function contains softmax and no div.
-        g = pytensor.function([c], grad(p_y.sum(), c), mode=self.mode)
-        g_ops = [n.op for n in g.maker.fgraph.toposort()]
-        assert len(g_ops) == 2
-        assert isinstance(g_ops[0], Softmax)
-        assert isinstance(g_ops[1], SoftmaxGrad)
-
-    @pytest.mark.parametrize(
-        "f",
-        [
-            lambda c: exp(c) / exp(c).sum(axis=0).dimshuffle(0, 1, "x"),
-            lambda c: exp(c) / exp(c).sum(axis=0).dimshuffle("x", 0, 1, "x"),
-            lambda c: exp(c) / exp(c).sum(axis=0).dimshuffle("x", 1, 0),
-            lambda c: exp(c) / exp(c).sum(axis=(0, 1), keepdims=True),
-        ],
-    )
-    def test_invalid_softmax_expressions(self, f):
-        # Test that graphs are not rewritten into a softmax when a dimshuffle
-        # swaps or adds extra dimensions, or when more than one but not all axis
-        # are summed over (which is not allowed by the Softmax Op but otherwise
-        # valid)
-        c = tensor3("c")
-        out = f(c)
-        f = pytensor.function([c], out, mode=self.mode)
-
-        f_ops = [n.op for n in f.maker.fgraph.toposort()]
-        assert len(f_ops) > 1
-        assert not any(isinstance(op, Softmax) for op in f_ops)
-
-
-def test_softmax_graph():
-    rng = np.random.default_rng(utt.fetch_seed())
-    x = pytensor.shared(rng.normal(size=(3, 4)))
-
-    def f(inputs):
-        y = softmax_graph(x)
-        return pytensor.grad(None, x, known_grads={y: inputs})
-
-    utt.verify_grad(f, [rng.random((3, 4))])
-
-
-def test_grad_softmax_grad():
-    rng = np.random.default_rng(utt.fetch_seed())
-    x = pytensor.shared(rng.normal(size=(3, 4)))
-
-    def f(inputs):
-        y = softmax_legacy(x)
-        return pytensor.grad(None, x, known_grads={y: inputs})
-
-    utt.verify_grad(f, [rng.random((3, 4))])
-
-
-def test_relu():
-    x = matrix("x")
-    rng = np.random.default_rng(utt.fetch_seed())
-    X = rng.standard_normal((20, 30)).astype(config.floatX)
-
-    # Test the base case, without custom alpha value
-    y = relu(x).eval({x: X})
-    assert np.allclose(y, np.maximum(X, 0))
-
-    # Test for different constant alpha values (also outside of [0, 1])
-    for alpha in 0, 0.3, 1, 2, -0.3, -1, -2:
-        y = relu(x, alpha).eval({x: X})
-        assert np.allclose(y, np.where(X > 0, X, alpha * X))
-
-    # Test for variable alpha (scalar, vector and matrix)
-    for alpha in scalar(), vector(), matrix():
-        # Create value for alpha (correct ndim and broadcastable against X)
-        A = np.array(
-            rng.standard_normal(X.shape[::-1][: alpha.ndim][::-1]), dtype=config.floatX
-        )
-        y = relu(x, alpha).eval({x: X, alpha: A})
-        assert np.allclose(y, np.where(X > 0, X, A * X), rtol=3e-5)
-
-        # Test that an alpha of type `ndarray` doesn't generate an upcast
-        x = matrix("x", dtype="float32")
-        X = rng.standard_normal((20, 30)).astype("float32")
-        alpha = np.asarray(0.123, dtype="float32")
-
-        y = relu(x, alpha).eval({x: X})
-        assert np.allclose(y, np.where(X > 0, X, alpha * X))
-        assert y.dtype == "float32"
-
-
-def test_h_softmax():
-    """Tests the output dimensions of the `h_softmax` when a target is provided or not."""
-
-    input_size = 4
-    batch_size = 2
-    h_softmax_level1_size = 5
-    h_softmax_level2_size = 3
-    output_size = h_softmax_level1_size * h_softmax_level2_size
-
-    rng = np.random.default_rng(utt.fetch_seed())
-
-    # First level of h_softmax
-    W1 = np.asarray(
-        rng.normal(size=(input_size, h_softmax_level1_size)), dtype=config.floatX
-    )
-    W1 = pytensor.shared(W1)
-    b1 = pytensor.shared(
-        np.asarray(np.zeros((h_softmax_level1_size,)), dtype=config.floatX)
-    )
-
-    # Second level of h_softmax
-    W2 = np.asarray(
-        rng.normal(size=(h_softmax_level1_size, input_size, h_softmax_level2_size)),
-        dtype=config.floatX,
-    )
-    W2 = pytensor.shared(W2)
-    b2 = pytensor.shared(
-        np.asarray(
-            np.zeros((h_softmax_level1_size, h_softmax_level2_size)),
-            dtype=config.floatX,
-        )
-    )
-
-    x = matrix("x")
-    y = ivector("y")
-
-    # This only computes the output corresponding to the target
-    y_hat_tg = h_softmax(
-        x,
-        batch_size,
-        output_size,
-        h_softmax_level1_size,
-        h_softmax_level2_size,
-        W1,
-        b1,
-        W2,
-        b2,
-        y,
-    )
-
-    # This computes all the outputs
-    y_hat_all = h_softmax(
-        x,
-        batch_size,
-        output_size,
-        h_softmax_level1_size,
-        h_softmax_level2_size,
-        W1,
-        b1,
-        W2,
-        b2,
-    )
-
-    fun_output_tg = pytensor.function([x, y], y_hat_tg)
-    fun_output = pytensor.function([x], y_hat_all)
-
-    x_mat = rng.normal(size=(batch_size, input_size)).astype(config.floatX)
-    y_mat = rng.integers(0, output_size, batch_size).astype("int32")
-    tg_output = fun_output_tg(x_mat, y_mat)
-    all_outputs = fun_output(x_mat)
-
-    assert tg_output.shape == (batch_size,)
-    assert all_outputs.shape == (batch_size, output_size)
-
-    # Verifies that the outputs computed by fun_output_tg are the same as those
-    # computed by fun_output.
-    utt.assert_allclose(all_outputs[np.arange(0, batch_size), y_mat], tg_output)
-
-
-def test_elu():
-    x = matrix("x")
-    rng = np.random.default_rng(utt.fetch_seed())
-    X = rng.standard_normal((20, 30)).astype(config.floatX)
-
-    # test the base case, without custom alpha value
-    y = elu(x).eval({x: X})
-    utt.assert_allclose(y, np.where(X > 0, X, np.exp(X) - 1))
-
-    # test for different constant alpha values
-    for alpha in 1.5, 2, -1, -1.5, -2:
-        y = elu(x, alpha).eval({x: X})
-        utt.assert_allclose(y, np.where(X > 0, X, alpha * (np.exp(X) - 1)))
-
-
-def test_selu():
-    alpha = 1.6732632423543772848170429916717
-    scale = 1.0507009873554804934193349852946
-
-    x = matrix("x")
-    rng = np.random.default_rng(utt.fetch_seed())
-    X = rng.standard_normal((20, 30)).astype(config.floatX)
-
-    y = selu(x).eval({x: X})
-    utt.assert_allclose(y, np.where(X > 0, scale * X, scale * alpha * (np.exp(X) - 1)))
-
-
-def test_binary_crossentropy_reshape():
-    # Reported as https://github.com/Theano/Theano/issues/4086
-    a = tensor4("a")
-    for c in (
-        binary_crossentropy(sigmoid(a.reshape((-1, 1))), 1).sum(),
-        binary_crossentropy(sigmoid(a).reshape((-1, 1)), 1).sum(),
-    ):
-
-        ga = pytensor.grad(c, a)
-        # This only works when "specialize" options are included
-        mode = pytensor.compile.get_default_mode().including("fast_run")
-        fga = pytensor.function([a], ga, mode=mode)
-        utt.assert_allclose(
-            fga(np.array([[[[30.0]]]], dtype=config.floatX)),
-            np.zeros((1, 1, 1, 1), dtype=config.floatX),
-        )
-
-
-TestSoftsign = makeBroadcastTester(
-    op=softsign,
-    expected=upcast_int8_nfunc(
-        lambda inputs: check_floatX(inputs, inputs / (1.0 + np.fabs(inputs)))
-    ),
-    good=_good_broadcast_unary_normal_float_no_complex,
-    name="SoftsignTester",
-)
-
-
-class TestSigmoidBinaryCrossentropy:
-    def test_matches_binary_crossentropy(self):
-        # Test sigmoid_binary_crossentropy(p, t) ==
-        #      binary_crossentropy(sigmoid(p), t).
-
-        pred, target = inputs = vectors("pt")
-
-        reference_val = binary_crossentropy(sigmoid(pred), target)
-        f_reference = pytensor.function(inputs, reference_val)
-
-        test_val = sigmoid_binary_crossentropy(pred, target)
-        f_test = pytensor.function(inputs, test_val)
-
-        rng = np.random.default_rng(utt.fetch_seed())
-        pred, target = rng.standard_normal((2, 50)).astype(config.floatX)
-        test_inputs = [pred, 1 / (1 + np.exp(-target))]
-
-        utt.assert_allclose(f_reference(*test_inputs), f_test(*test_inputs))
-
-    def test_grad(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        pred, target = rng.standard_normal((2, 50)).astype(config.floatX)
-        test_inputs = [pred, 1 / (1 + np.exp(-target))]
-
-        utt.verify_grad(sigmoid_binary_crossentropy, test_inputs)
-
-
-def test_confusion_matrix():
-    # Defining numpy implementation of confusion matrix
-    def numpy_conf_mat(actual, pred):
-        order = np.union1d(actual, pred)
-        colA = np.matrix(actual).T
-        colP = np.matrix(pred).T
-        oneHotA = colA.__eq__(order).astype("int64")
-        oneHotP = colP.__eq__(order).astype("int64")
-        conf_mat = np.dot(oneHotA.T, oneHotP)
-        conf_mat = np.asarray(conf_mat)
-        return [conf_mat, order]
-
-    x = vector()
-    y = vector()
-    f = pytensor.function([x, y], confusion_matrix(x, y))
-    list_inputs = [
-        [[0, 1, 2, 1, 0], [0, 0, 2, 1, 2]],
-        [[2, 0, 2, 2, 0, 1], [0, 0, 2, 2, 0, 2]],
-    ]
-
-    for case in list_inputs:
-        a = np.asarray(case[0])
-        b = np.asarray(case[1])
-        out_exp = numpy_conf_mat(a, b)
-        outs = f(case[0], case[1])
-        for exp_res, out in zip(out_exp, outs):
-            utt.assert_allclose(exp_res, out)
diff --git a/tests/tensor/nnet/test_batchnorm.py b/tests/tensor/nnet/test_batchnorm.py
deleted file mode 100644
index 416060aa2b..0000000000
--- a/tests/tensor/nnet/test_batchnorm.py
+++ /dev/null
@@ -1,685 +0,0 @@
-from collections import OrderedDict
-
-import numpy as np
-import pytest
-
-import pytensor
-import pytensor.tensor as at
-from pytensor.configdefaults import config
-from pytensor.tensor.math import sum as at_sum
-from pytensor.tensor.nnet import batchnorm
-from pytensor.tensor.shape import specify_broadcastable
-from pytensor.tensor.type import (
-    TensorType,
-    matrix,
-    scalar,
-    tensor3,
-    tensor4,
-    tensor5,
-    vector,
-)
-from tests import unittest_tools as utt
-
-
-def test_BNComposite():
-
-    with config.change_flags(compute_test_value="raise"):
-
-        def bn_ref(x, G, B, M, V):
-            n = (x - M) / V
-            return n * G + B
-
-        rng = np.random.default_rng(1234)
-        X = 1 + rng.random([10, 20]).astype("float32")
-        B = 1 + rng.random([20]).astype("float32")
-        G = 1 + rng.random([20]).astype("float32")
-        M = 1 + rng.random([20]).astype("float32")
-        V = 1 + rng.random([20]).astype("float32")
-
-        x = matrix("x")
-        b = vector("b")
-        g = vector("g")
-        m = vector("m")
-        v = vector("v")
-
-        x.tag.test_value = rng.random((2, 2)).astype(pytensor.config.floatX)
-        b.tag.test_value = rng.random(2).astype(pytensor.config.floatX)
-        g.tag.test_value = rng.random(2).astype(pytensor.config.floatX)
-        m.tag.test_value = rng.random(2).astype(pytensor.config.floatX)
-        v.tag.test_value = rng.random(2).astype(pytensor.config.floatX)
-
-        bn_ref_op = bn_ref(x, g, b, m, v)
-        f_ref = pytensor.function([x, b, g, m, v], [bn_ref_op])
-        res_ref = f_ref(X, G, B, M, V)
-        for mode in ["low_mem", "high_mem"]:
-            bn_op = batchnorm.batch_normalization(x, g, b, m, v, mode=mode)
-            f = pytensor.function([x, b, g, m, v], [bn_op])
-            res = f(X, G, B, M, V)
-            utt.assert_allclose(res_ref, res)
-
-
-def test_batch_normalization():
-    def bn_ref(x, G, B, M, V):
-        n = (x - M) / V
-        return n * G + B
-
-    rng = np.random.default_rng(1234)
-    X = 1 + rng.random([10, 20]).astype("float32")
-    B = 1 + rng.random([20]).astype("float32")
-    G = 1 + rng.random([20]).astype("float32")
-    M = 1 + rng.random([20]).astype("float32")
-    V = 1 + rng.random([20]).astype("float32")
-
-    x = matrix("x")
-    b = vector("b")
-    g = vector("g")
-    m = vector("m")
-    v = vector("v")
-
-    bn_ref_op = bn_ref(x, g, b, m, v)
-    f_ref = pytensor.function([x, g, b, m, v], [bn_ref_op])
-    res_ref = f_ref(X, G, B, M, V)
-    for mode in ["low_mem", "high_mem"]:
-        bn_op = batchnorm.batch_normalization(x, g, b, m, v, mode=mode)
-        f = pytensor.function([x, g, b, m, v], [bn_op])
-        res = f(X, G, B, M, V)
-        utt.assert_allclose(res_ref, res)
-
-        def bn_f(inputs, gamma, beta, mean, std):
-            return batchnorm.batch_normalization(
-                inputs, gamma, beta, mean, std, mode=mode
-            )
-
-        utt.verify_grad(bn_f, [X, G, B, M, V])
-
-    bn_ref_op = bn_ref(
-        x, g, b, x.mean(axis=0, keepdims=True), x.std(axis=0, keepdims=True)
-    )
-    f_ref = pytensor.function([x, b, g], [bn_ref_op])
-    res_ref = f_ref(X, G, B)
-    for mode in ["low_mem", "high_mem"]:
-        bn_op = batchnorm.batch_normalization(
-            x,
-            g,
-            b,
-            x.mean(axis=0, keepdims=True),
-            x.std(axis=0, keepdims=True),
-            mode=mode,
-        )
-        f = pytensor.function([x, b, g], [bn_op])
-        res = f(X, G, B)
-        utt.assert_allclose(res_ref, res)
-
-        def bn_f(inputs, gamma, beta, mean, std):
-            return batchnorm.batch_normalization(
-                inputs, gamma, beta, mean, std, mode=mode
-            )
-
-        utt.verify_grad(
-            bn_f, [X, G, B, X.mean(axis=0)[np.newaxis], X.std(axis=0)[np.newaxis]]
-        )
-
-
-def test_bn_feature_maps():
-    def bn_ref(x, G, B, M, V):
-        n = (x - M) / V
-        return n * G + B
-
-    rng = np.random.default_rng(1234)
-    X = 1 + rng.random([2, 3, 4, 4]).astype("float32")
-    B = 1 + rng.random([3]).astype("float32")
-    G = 1 + rng.random([3]).astype("float32")
-    M = 1 + rng.random([3]).astype("float32")
-    V = 1 + rng.random([3]).astype("float32")
-
-    x = tensor4("x")
-    b = vector("b")
-    g = vector("g")
-    m = vector("m")
-    v = vector("v")
-
-    bn_ref_op = bn_ref(
-        x,
-        g.dimshuffle("x", 0, "x", "x"),
-        b.dimshuffle("x", 0, "x", "x"),
-        m.dimshuffle("x", 0, "x", "x"),
-        v.dimshuffle("x", 0, "x", "x"),
-    )
-    f_ref = pytensor.function([x, b, g, m, v], [bn_ref_op])
-    res_ref = f_ref(X, G, B, M, V)
-
-    for mode in ["low_mem", "high_mem"]:
-        bn_op = batchnorm.batch_normalization(
-            x,
-            g.dimshuffle("x", 0, "x", "x"),
-            b.dimshuffle("x", 0, "x", "x"),
-            m.dimshuffle("x", 0, "x", "x"),
-            v.dimshuffle("x", 0, "x", "x"),
-            mode=mode,
-        )
-        f = pytensor.function([x, b, g, m, v], [bn_op])
-        res = f(X, G, B, M, V)
-        utt.assert_allclose(res_ref, res)
-
-        def conv_bn(inputs, gamma, beta, mean, std):
-            return batchnorm.batch_normalization(
-                inputs,
-                gamma.dimshuffle("x", 0, "x", "x"),
-                beta.dimshuffle("x", 0, "x", "x"),
-                mean.dimshuffle("x", 0, "x", "x"),
-                std.dimshuffle("x", 0, "x", "x"),
-                mode=mode,
-            )
-
-        utt.verify_grad(conv_bn, [X, G, B, M, V])
-
-
-@pytest.mark.slow
-def test_batch_normalization_train():
-
-    for axes in ("per-activation", "spatial", (1, 2, 3, 4)):
-        for vartype in (tensor5, tensor3, vector):
-            x, scale, bias, running_mean, running_var = (
-                vartype(n)
-                for n in ("x", "scale", "bias", "running_mean", "running_var")
-            )
-            ndim = x.ndim
-            eps = 5e-3  # some non-standard value to test if it's used
-            running_average_factor = 0.3
-
-            # remove non-existing axes
-            if isinstance(axes, tuple):
-                axes = tuple(i for i in axes if i < ndim)
-            if len(axes) == 0:
-                continue
-
-            # forward pass
-            (
-                out,
-                x_mean,
-                x_invstd,
-                out_running_mean,
-                out_running_var,
-            ) = batchnorm.batch_normalization_train(
-                x,
-                scale,
-                bias,
-                axes,
-                eps,
-                running_average_factor,
-                running_mean,
-                running_var,
-            )
-            # reference forward pass
-            if axes == "per-activation":
-                axes2 = (0,)
-            elif axes == "spatial":
-                axes2 = (0,) + tuple(range(2, ndim))
-            else:
-                axes2 = axes
-            x_mean2 = x.mean(axis=axes2, keepdims=True)
-            x_var2 = x.var(axis=axes2, keepdims=True)
-            x_invstd2 = at.reciprocal(at.sqrt(x_var2 + eps))
-            scale2 = specify_broadcastable(scale, *axes2)
-            bias2 = specify_broadcastable(bias, *axes2)
-            out2 = (x - x_mean2) * (scale2 * x_invstd2) + bias2
-            m = at.cast(at.prod(x.shape) / at.prod(scale.shape), pytensor.config.floatX)
-            out_running_mean2 = (
-                running_mean * (1 - running_average_factor)
-                + x_mean2 * running_average_factor
-            )
-            out_running_var2 = (
-                running_var * (1 - running_average_factor)
-                + (m / (m - 1)) * x_var2 * running_average_factor
-            )
-            # backward pass
-            dy = vartype("dy")
-            grads = at.grad(None, wrt=[x, scale, bias], known_grads={out: dy})
-            # reference backward pass
-            grads2 = at.grad(None, wrt=[x, scale, bias], known_grads={out2: dy})
-            # second-order backward pass
-            dx = vartype("dinputs")
-            dscale = vartype("dscale")
-            dbias = vartype("dbias")
-            grad_grads = at.grad(
-                None,
-                wrt=[x, dy, scale],
-                known_grads=OrderedDict(
-                    {grads[0]: dx, grads[1]: dscale, grads[2]: dbias}
-                ),
-                consider_constant=[
-                    x,
-                    dy,
-                    scale,
-                    bias,
-                    x_mean,
-                    x_invstd,
-                    running_mean,
-                    running_var,
-                ],
-                return_disconnected="zero",
-            )
-            # reference second-order backward pass
-            grad_grads2 = at.grad(
-                None,
-                wrt=[x, dy, scale],
-                known_grads=OrderedDict(
-                    {grads2[0]: dx, grads2[1]: dscale, grads2[2]: dbias}
-                ),
-                consider_constant=[
-                    x,
-                    dy,
-                    scale,
-                    bias,
-                    x_mean2,
-                    x_var2,
-                    running_mean,
-                    running_var,
-                ],
-                return_disconnected="zero",
-            )
-            # compile
-            f = pytensor.function(
-                [x, scale, bias, running_mean, running_var, dy, dx, dscale, dbias],
-                [
-                    out,
-                    x_mean,
-                    x_invstd,
-                    out_running_mean,
-                    out_running_var,
-                    out2,
-                    x_mean2,
-                    x_invstd2,
-                    out_running_mean2,
-                    out_running_var2,
-                ]
-                + grads
-                + grads2
-                + grad_grads
-                + grad_grads2,
-            )
-            # check if the abstract Ops have been replaced
-            assert not any(
-                isinstance(
-                    n.op,
-                    (
-                        batchnorm.AbstractBatchNormTrain,
-                        batchnorm.AbstractBatchNormInference,
-                        batchnorm.AbstractBatchNormTrainGrad,
-                    ),
-                )
-                for n in f.maker.fgraph.toposort()
-            )
-            # run
-            for data_shape in ((5, 10, 30, 40, 10), (4, 3, 1, 1, 1), (2, 3, 5, 5, 5)):
-                data_shape = data_shape[:ndim]
-                param_shape = tuple(
-                    1 if d in axes2 else s for d, s in enumerate(data_shape)
-                )
-
-                rng = np.random.default_rng(1234)
-
-                X = 4 + 3 * rng.random(data_shape).astype(pytensor.config.floatX)
-                Dy = -1 + 2 * rng.random(data_shape).astype(pytensor.config.floatX)
-                Scale = rng.random(param_shape).astype(pytensor.config.floatX)
-                Bias = rng.random(param_shape).astype(pytensor.config.floatX)
-                Running_mean = rng.random(param_shape).astype(pytensor.config.floatX)
-                Running_var = rng.random(param_shape).astype(pytensor.config.floatX)
-                Dx = 4 + 3 * rng.random(data_shape).astype(pytensor.config.floatX)
-                Dscale = -1 + 2 * rng.random(param_shape).astype(pytensor.config.floatX)
-                Dbias = rng.random(param_shape).astype(pytensor.config.floatX)
-
-                outputs = f(
-                    X, Scale, Bias, Running_mean, Running_var, Dy, Dx, Dscale, Dbias
-                )
-                # compare outputs
-                utt.assert_allclose(outputs[0], outputs[0 + 5])  # out
-                utt.assert_allclose(outputs[1], outputs[1 + 5])  # mean
-                utt.assert_allclose(outputs[2], outputs[2 + 5])  # invstd
-                utt.assert_allclose(outputs[3], outputs[3 + 5])  # running_mean
-                utt.assert_allclose(
-                    np.nan_to_num(outputs[4]), np.nan_to_num(outputs[4 + 5])
-                )  # running_var
-                # compare gradients
-                utt.assert_allclose(outputs[10], outputs[10 + 3], atol=1e-4)  # dx
-                utt.assert_allclose(
-                    outputs[11], outputs[11 + 3], rtol=2e-4, atol=1e-4
-                )  # dscale
-                utt.assert_allclose(outputs[12], outputs[12 + 3])  # dbias
-                # compare second-order gradients
-                utt.assert_allclose(outputs[16], outputs[16 + 3], atol=1e-4)  # ddx
-                utt.assert_allclose(outputs[17], outputs[17 + 3])  # ddy
-                utt.assert_allclose(
-                    outputs[18], outputs[18 + 3], rtol=3e-4, atol=1e-4
-                )  # ddscale
-
-
-@pytest.mark.slow
-def test_batch_normalization_train_grad_grad():
-
-    for axes in ("per-activation", "spatial", (1, 2, 3, 4)):
-        for vartype in (tensor5, tensor4, tensor3, matrix, vector):
-            # run these experiments with float64 for sufficient numerical stability
-            x, dy, scale, x_mean, x_invstd = (
-                vartype(n, dtype="float64")
-                for n in ("x", "dy", "scale", "x_mean", "x_invstd")
-            )
-            ndim = x.ndim
-
-            # reference forward pass
-            if axes == "per-activation":
-                axes = (0,)
-            elif axes == "spatial":
-                axes = (0,) + tuple(range(2, ndim))
-            else:
-                # remove non-existing axes
-                axes = tuple(i for i in axes if i < ndim)
-            if len(axes) == 0:
-                continue
-
-            def bn_grad_wrt_inputs_f(x, dy, scale, x_mean, x_invstd):
-                g_inputs, g_scale, g_bias = batchnorm.AbstractBatchNormTrainGrad(axes)(
-                    x, dy, scale, x_mean, x_invstd
-                )
-                return g_inputs
-
-            def bn_grad_wrt_scale_f(x, dy, scale, x_mean, x_invstd):
-                g_inputs, g_scale, g_bias = batchnorm.AbstractBatchNormTrainGrad(axes)(
-                    x, dy, scale, x_mean, x_invstd
-                )
-                return g_scale
-
-            def bn_grad_wrt_bias_f(x, dy, scale, x_mean, x_invstd):
-                g_inputs, g_scale, g_bias = batchnorm.AbstractBatchNormTrainGrad(axes)(
-                    x, dy, scale, x_mean, x_invstd
-                )
-                return g_bias
-
-            # run
-            for data_shape in ((4, 3, 3, 3, 3), (4, 3, 1, 1, 1), (2, 3, 5, 3, 2)):
-                data_shape = data_shape[:ndim]
-                param_shape = tuple(
-                    1 if d in axes else s for d, s in enumerate(data_shape)
-                )
-                rng = np.random.default_rng(1234)
-                # force float64 for sufficient numerical stability
-                x_val = 4 + 3 * rng.random(data_shape).astype("float64")
-                dy_val = -1 + 2 * rng.random(data_shape).astype("float64")
-                scale_val = rng.random(param_shape).astype("float64")
-                x_mean_val = rng.random(param_shape).astype("float64")
-                x_invstd_val = rng.random(param_shape).astype("float64")
-
-                utt.verify_grad(
-                    bn_grad_wrt_inputs_f,
-                    [x_val, dy_val, scale_val, x_mean_val, x_invstd_val],
-                    abs_tol=5e-4,
-                    rel_tol=5e-4,
-                )
-                utt.verify_grad(
-                    bn_grad_wrt_scale_f,
-                    [x_val, dy_val, scale_val, x_mean_val, x_invstd_val],
-                )
-                utt.verify_grad(
-                    bn_grad_wrt_bias_f,
-                    [x_val, dy_val, scale_val, x_mean_val, x_invstd_val],
-                )
-
-
-def test_batch_normalization_train_without_running_averages():
-    # compile and run batch_normalization_train without running averages
-
-    x, scale, bias, dy = (
-        tensor4("x"),
-        tensor4("scale"),
-        tensor4("bias"),
-        tensor4("dy"),
-    )
-    data_shape = (5, 10, 30, 25)
-    param_shape = (1, 10, 30, 25)
-
-    # forward pass
-    out, x_mean, x_invstd = batchnorm.batch_normalization_train(
-        x, scale, bias, "per-activation"
-    )
-    # backward pass
-    grads = at.grad(None, wrt=[x, scale, bias], known_grads={out: dy})
-    # compile
-    f = pytensor.function([x, scale, bias, dy], [out, x_mean, x_invstd] + grads)
-    # check if the abstract Ops have been replaced
-    assert not any(
-        isinstance(
-            n.op,
-            (
-                batchnorm.AbstractBatchNormTrain,
-                batchnorm.AbstractBatchNormInference,
-                batchnorm.AbstractBatchNormTrainGrad,
-            ),
-        )
-        for n in f.maker.fgraph.toposort()
-    )
-    # run
-    rng = np.random.default_rng(1234)
-    X = 4 + 3 * rng.random(data_shape).astype(pytensor.config.floatX)
-    Dy = -1 + 2 * rng.random(data_shape).astype(pytensor.config.floatX)
-    Scale = rng.random(param_shape).astype(pytensor.config.floatX)
-    Bias = rng.random(param_shape).astype(pytensor.config.floatX)
-    f(X, Scale, Bias, Dy)
-
-
-def test_batch_normalization_train_broadcast():
-    for axes in ("per-activation", "spatial", (1, 2, 3, 4)):
-        for vartype in (tensor5, tensor4, tensor3, matrix, vector):
-            x = vartype("x")
-            ndim = x.ndim
-            eps = 5e-3  # some non-standard value to test if it's used
-            running_average_factor = 0.3
-
-            # remove non-existing axes
-            if isinstance(axes, tuple):
-                axes = tuple(i for i in axes if i < ndim)
-            if len(axes) == 0:
-                continue
-
-            # convert axes to explicit list
-            if axes == "per-activation":
-                axes2 = (0,)
-            elif axes == "spatial":
-                axes2 = (0,) + tuple(range(2, ndim))
-            else:
-                axes2 = axes
-
-            # compute axes for parameter tensors
-            non_bc_axes = tuple(i for i in range(ndim) if i not in axes2)
-            params_dimshuffle = ["x"] * ndim
-            for i, axis in enumerate(non_bc_axes):
-                params_dimshuffle[axis] = i
-
-            # construct non-broadcasted parameter variables
-            param_type = TensorType(x.dtype, shape=(None,) * len(non_bc_axes))
-            scale, bias, running_mean, running_var = (
-                param_type(n) for n in ("scale", "bias", "running_mean", "running_var")
-            )
-
-            # broadcast parameter variables
-            scale_bc = scale.dimshuffle(params_dimshuffle)
-            bias_bc = bias.dimshuffle(params_dimshuffle)
-            running_mean_bc = running_mean.dimshuffle(params_dimshuffle)
-            running_var_bc = running_var.dimshuffle(params_dimshuffle)
-
-            # batch_normalization_train with original, non-broadcasted variables
-            train_non_bc = batchnorm.batch_normalization_train(
-                x,
-                scale,
-                bias,
-                axes,
-                eps,
-                running_average_factor,
-                running_mean,
-                running_var,
-            )
-            # batch_normalization_train with broadcasted variables
-            train_bc = batchnorm.batch_normalization_train(
-                x,
-                scale_bc,
-                bias_bc,
-                axes,
-                eps,
-                running_average_factor,
-                running_mean_bc,
-                running_var_bc,
-            )
-            train_bc = tuple(
-                [train_bc[0]] + [r.dimshuffle(non_bc_axes) for r in train_bc[1:]]  # out
-            )
-
-            # batch_normalization_test with original, non-broadcasted variables
-            test_non_bc = batchnorm.batch_normalization_test(
-                x, scale, bias, running_mean, running_var, axes, eps
-            )
-            # batch_normalization_test with broadcasted variables
-            test_bc = batchnorm.batch_normalization_test(
-                x, scale_bc, bias_bc, running_mean_bc, running_var_bc, axes, eps
-            )
-
-            # subtract the results of the non-broadcasted and broadcasted calls
-            results_non_bc = train_non_bc + (test_non_bc,)
-            results_bc = train_bc + (test_bc,)
-            results = [abs(r - r_bc) for (r, r_bc) in zip(results_non_bc, results_bc)]
-
-            # compile to compute all differences
-            f = pytensor.function(
-                [x, scale, bias, running_mean, running_var], at_sum(sum(results))
-            )
-
-            # the paired ops are exactly the same, so the optimizer should have
-            # collapsed the sum of differences to a constant zero
-            nodes = f.maker.fgraph.toposort()
-            if pytensor.config.mode != "FAST_COMPILE":
-                assert len(nodes) == 1
-                assert isinstance(nodes[0].op, pytensor.compile.DeepCopyOp)
-            inputs = [
-                np.asarray(np.random.random((4,) * n), x.dtype)
-                for n in [
-                    x.ndim,
-                    scale.ndim,
-                    bias.ndim,
-                    running_mean.ndim,
-                    running_var.ndim,
-                ]
-            ]
-            assert 0.0 == f(*inputs)
-
-
-@pytest.mark.slow
-def test_batch_normalization_test():
-    for axes in ("per-activation", "spatial", (1, 2, 3, 4)):
-        for vartype in (tensor5, tensor3, vector):
-            x, scale, bias, mean, var = (
-                vartype(n) for n in ("x", "scale", "bias", "mean", "var")
-            )
-            ndim = x.ndim
-            eps = 5e-3  # some non-standard value to test if it's used
-
-            # remove non-existing axes
-            if isinstance(axes, tuple):
-                axes = tuple(i for i in axes if i < ndim)
-            if len(axes) == 0:
-                continue
-
-            # forward pass
-            out = batchnorm.batch_normalization_test(
-                x, scale, bias, mean, var, axes, eps
-            )
-            # reference forward pass
-            if axes == "per-activation":
-                axes2 = (0,)
-            elif axes == "spatial":
-                axes2 = (0,) + tuple(range(2, ndim))
-            else:
-                axes2 = axes
-            scale2, bias2, mean2, var2 = (
-                specify_broadcastable(t, *axes2) for t in (scale, bias, mean, var)
-            )
-            out2 = (x - mean2) * (scale2 / at.sqrt(var2 + eps)) + bias2
-            # backward pass
-            dy = vartype("dy")
-            grads = at.grad(
-                None, wrt=[x, scale, bias, mean, var], known_grads={out: dy}
-            )
-            # reference backward pass
-            grads2 = at.grad(
-                None, wrt=[x, scale, bias, mean, var], known_grads={out2: dy}
-            )
-            # compile
-            f = pytensor.function(
-                [x, scale, bias, mean, var, dy], [out, out2] + grads + grads2
-            )
-            # check if the abstract Ops have been replaced
-            assert not any(
-                isinstance(
-                    n.op,
-                    (
-                        batchnorm.AbstractBatchNormTrain,
-                        batchnorm.AbstractBatchNormInference,
-                        batchnorm.AbstractBatchNormTrainGrad,
-                    ),
-                )
-                for n in f.maker.fgraph.toposort()
-            )
-            # run
-            for data_shape in ((10, 20, 30, 40, 10), (4, 3, 1, 1, 1), (1, 1, 5, 5, 5)):
-                data_shape = data_shape[:ndim]
-                param_shape = tuple(
-                    1 if d in axes2 else s for d, s in enumerate(data_shape)
-                )
-                rng = np.random.default_rng(1234)
-                X = 4 + 3 * rng.random(data_shape).astype(pytensor.config.floatX)
-                Dy = -1 + 2 * rng.random(data_shape).astype(pytensor.config.floatX)
-                Scale = rng.random(param_shape).astype(pytensor.config.floatX)
-                Bias = rng.random(param_shape).astype(pytensor.config.floatX)
-                Mean = rng.random(param_shape).astype(pytensor.config.floatX)
-                Var = rng.random(param_shape).astype(pytensor.config.floatX)
-                outputs = f(X, Scale, Bias, Mean, Var, Dy)
-                # compare outputs
-                utt.assert_allclose(outputs[0], outputs[1])  # out
-                # compare gradients
-                utt.assert_allclose(outputs[2], outputs[2 + 5], atol=4e-5)  # dx
-                utt.assert_allclose(outputs[3], outputs[3 + 5], atol=4e-5)  # dscale
-                utt.assert_allclose(outputs[4], outputs[4 + 5])  # dbias
-                utt.assert_allclose(outputs[5], outputs[5 + 5])  # dmean
-                utt.assert_allclose(
-                    outputs[6], outputs[6 + 5], rtol=2e-3, atol=4e-5
-                )  # dvar
-
-
-def test_batch_normalization_broadcastable():
-    # check if the broadcastable pattern is preserved by the optimizations
-    x, dy, scale, bias, mean, var = (
-        scalar(n).dimshuffle(["x"] * 5)
-        for n in ("x", "dy", "scale", "bias", "mean", "var")
-    )
-
-    # forward pass
-    out_train, x_mean, x_invstd = batchnorm.batch_normalization_train(
-        x, scale, bias, "spatial"
-    )
-    out_test = batchnorm.batch_normalization_test(x, scale, bias, mean, var, "spatial")
-    # backward pass
-    grads_train = at.grad(None, wrt=[x, scale, bias], known_grads={out_train: dy})
-    grads_test = at.grad(None, wrt=[x, scale, bias], known_grads={out_test: dy})
-    # compile
-    f = pytensor.function(
-        [x, scale, bias, mean, var, dy],
-        [out_train, x_mean, x_invstd, out_test] + grads_train + grads_test,
-    )
-    assert not any(
-        isinstance(
-            n.op,
-            (
-                batchnorm.AbstractBatchNormTrain,
-                batchnorm.AbstractBatchNormInference,
-                batchnorm.AbstractBatchNormTrainGrad,
-            ),
-        )
-        for n in f.maker.fgraph.toposort()
-    )
diff --git a/tests/tensor/nnet/test_blocksparse.py b/tests/tensor/nnet/test_blocksparse.py
deleted file mode 100644
index dca1a3f7cd..0000000000
--- a/tests/tensor/nnet/test_blocksparse.py
+++ /dev/null
@@ -1,338 +0,0 @@
-"""
-    Tests for block sparse dot
-"""
-import numpy as np
-
-import pytensor
-import pytensor.tensor as at
-import tests.unittest_tools as utt
-from pytensor.tensor.elemwise import DimShuffle
-from pytensor.tensor.nnet.blocksparse import (
-    SparseBlockGemv,
-    SparseBlockOuter,
-    sparse_block_dot,
-    sparse_block_gemv,
-    sparse_block_outer,
-)
-from pytensor.tensor.type import fmatrix, ftensor3, ftensor4, imatrix
-
-
-class TestBlockSparseGemvAndOuter(utt.InferShapeTester):
-    def setup_method(self):
-
-        mode = None
-        if pytensor.config.mode == "FAST_COMPILE":
-            mode = "FAST_RUN"
-        self.mode = pytensor.compile.get_mode(mode).excluding("constant_folding")
-        self.gemv_op = sparse_block_gemv
-        self.outer_op = sparse_block_outer
-        self.gemv_class = SparseBlockGemv
-        self.outer_class = SparseBlockOuter
-        super().setup_method()
-
-    @staticmethod
-    def gemv_data():
-
-        nInputBlock = 8
-        nOutputBlock = 7
-        inputSize = 6
-        outputSize = 5
-        inputWindowSize = 4
-        outputWindowSize = 3
-        batchSize = 2
-
-        rng = np.random.default_rng(230920)
-
-        input = rng.standard_normal((batchSize, inputWindowSize, inputSize)).astype(
-            "float32"
-        )
-        inputIndice = np.vstack(
-            rng.permutation(nInputBlock)[:inputWindowSize] for _ in range(batchSize)
-        ).astype("int32")
-        outputIndice = np.vstack(
-            rng.permutation(nOutputBlock)[:outputWindowSize] for _ in range(batchSize)
-        ).astype("int32")
-        weight = rng.standard_normal(
-            (nInputBlock, nOutputBlock, inputSize, outputSize)
-        ).astype("float32")
-        bias = rng.standard_normal((nOutputBlock, outputSize)).astype("float32")
-
-        return weight, input, inputIndice, bias, outputIndice
-
-    @staticmethod
-    def outer_data():
-        nInputBlock = 8
-        nOutputBlock = 7
-        xSize = 6
-        ySize = 5
-        xWindowSize = 4
-        yWindowSize = 3
-        batchSize = 2
-
-        rng = np.random.default_rng(230920)
-
-        o = rng.standard_normal((nInputBlock, nOutputBlock, xSize, ySize)).astype(
-            "float32"
-        )
-        x = rng.standard_normal((batchSize, xWindowSize, xSize)).astype("float32")
-        y = rng.standard_normal((batchSize, yWindowSize, ySize)).astype("float32")
-        xIdx = np.vstack(
-            rng.integers(0, nInputBlock, size=xWindowSize) for _ in range(batchSize)
-        ).astype("int32")
-        yIdx = np.vstack(
-            rng.integers(0, nOutputBlock, size=yWindowSize) for _ in range(batchSize)
-        ).astype("int32")
-
-        return o, x, y, xIdx, yIdx
-
-    @staticmethod
-    def gemv_numpy(o, W, h, iIdx, oIdx):
-        for b in range(o.shape[0]):
-            for j in range(o.shape[1]):
-                outputIdx = oIdx[b, j]
-                for i in range(h.shape[1]):
-                    inputIdx = iIdx[b, i]
-                    w = W[inputIdx, outputIdx]
-                    o[b, j, :] += np.dot(h[b, i], w)
-        return o
-
-    @staticmethod
-    def gemv_numpy2(o, W, h, iIdx, oIdx):
-        """
-        Other implementation
-        """
-        from numpy import ix_
-
-        for b in range(o.shape[0]):
-            w = W[ix_(iIdx[b], oIdx[b])].swapaxes(1, 2)
-            w = w.reshape((w.shape[0] * w.shape[1], w.shape[2] * w.shape[3]))
-            o[b] += np.dot(h[b].ravel(), w).reshape(o.shape[1:])
-        return o
-
-    @staticmethod
-    def gemv_numpy3(o, W, h, iIdx, oIdx):
-        """
-        Other implementation
-        """
-        from numpy import ix_
-
-        for b in range(o.shape[0]):
-            w = W[ix_(iIdx[b], oIdx[b])]
-            # The next three lines do the same operation. The last one is the
-            # fastest
-            # o[b] += (h[b][:, None, :, None] * w).sum(axis=(0, 2))
-            # o[b] += np.tensordot(h[b], w, [(0,1),(0,2)])
-            o[b] += np.einsum("ik,ijkl", h[b], w)
-        return o
-
-    @staticmethod
-    def outer_numpy(o, x, y, xIdx, yIdx):
-        for b in range(x.shape[0]):
-            for i in range(xIdx.shape[1]):
-                for j in range(yIdx.shape[1]):
-                    o[xIdx[b, i], yIdx[b, j]] += np.outer(x[b, i, :], y[b, j, :])
-        return o
-
-    def test_sparseblockdot(self):
-        # Compares the numpy version of sparseblockgemv to sparse_block_dot.
-
-        b = fmatrix()
-        W = ftensor4()
-        h = ftensor3()
-        iIdx = imatrix()
-        oIdx = imatrix()
-
-        o = sparse_block_dot(W, h, iIdx, b, oIdx)
-
-        f = pytensor.function([W, h, iIdx, b, oIdx], o, mode=self.mode)
-
-        W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()
-
-        th_out = f(W_val, h_val, iIdx_val, b_val, oIdx_val)
-
-        ref_out = self.gemv_numpy(
-            b_val.take(oIdx_val, axis=0), W_val, h_val, iIdx_val, oIdx_val
-        )
-
-        utt.assert_allclose(ref_out, th_out)
-
-    def test_sparseblockgemv(self):
-        # Compares the numpy and pytensor versions of sparseblockgemv.
-
-        b = fmatrix()
-        W = ftensor4()
-        h = ftensor3()
-        iIdx = imatrix()
-        oIdx = imatrix()
-
-        o = self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
-
-        f = pytensor.function([W, h, iIdx, b, oIdx], o, mode=self.mode)
-
-        W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()
-
-        th_out = f(W_val, h_val, iIdx_val, b_val, oIdx_val)
-        ref_out = self.gemv_numpy(
-            b_val.take(oIdx_val, axis=0), W_val, h_val, iIdx_val, oIdx_val
-        )
-
-        utt.assert_allclose(ref_out, th_out)
-
-    def test_sparseblockgemvF(self):
-        # Test the fortran order for W (which can happen in the grad for some
-        # graphs).
-
-        b = fmatrix()
-        W = ftensor4()
-        h = ftensor3()
-        iIdx = imatrix()
-        oIdx = imatrix()
-
-        o = self.gemv_op(
-            b.take(oIdx, axis=0),
-            DimShuffle((False, False, False, False), (0, 1, 3, 2))(
-                at.as_tensor_variable(W)
-            ),
-            h,
-            iIdx,
-            oIdx,
-        )
-
-        f = pytensor.function([W, h, iIdx, b, oIdx], o, mode=self.mode)
-
-        W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()
-
-        th_out = f(np.swapaxes(W_val, 2, 3), h_val, iIdx_val, b_val, oIdx_val)
-        ref_out = self.gemv_numpy(
-            b_val.take(oIdx_val, axis=0), W_val, h_val, iIdx_val, oIdx_val
-        )
-
-        utt.assert_allclose(ref_out, th_out)
-
-    def test_sparseblockgemv_grad(self):
-
-        W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()
-
-        iIdx = at.constant(iIdx_val)
-        oIdx = at.constant(oIdx_val)
-
-        def metaop(b, h, W):
-            return sparse_block_dot(W, h, iIdx, b, oIdx)
-
-        def op(b, h, W):
-            return self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
-
-        eps = 3e-3
-        utt.verify_grad(metaop, [b_val, h_val, W_val], mode=self.mode, eps=eps)
-        utt.verify_grad(op, [b_val, h_val, W_val], mode=self.mode, eps=eps)
-
-    def test_sparseblockgemv_grad_1(self):
-        # Test that we correctly handle cases where dimensions are 1.
-        rng = np.random.default_rng(230920)
-
-        h_val = rng.standard_normal((1, 1, 1)).astype("float32")
-        iIdx_val = rng.permutation(1)[:1][None, :]
-        oIdx_val = rng.permutation(1)[:1][None, :]
-        W_val = rng.standard_normal((1, 1, 1, 1)).astype("float32")
-        b_val = rng.standard_normal((1, 1)).astype("float32")
-
-        iIdx = at.constant(iIdx_val)
-        oIdx = at.constant(oIdx_val)
-
-        def metaop(b, h, W):
-            return sparse_block_dot(W, h, iIdx, b, oIdx)
-
-        def op(b, h, W):
-            return self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
-
-        utt.verify_grad(metaop, [b_val, h_val, W_val], mode=self.mode)
-        utt.verify_grad(op, [b_val, h_val, W_val], mode=self.mode)
-
-    def test_sparseblockgemv_grad_shape(self):
-        b = fmatrix()
-        W = ftensor4()
-        h = ftensor3()
-        iIdx = imatrix()
-        oIdx = imatrix()
-
-        o = self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
-        go = pytensor.grad(o.sum(), [b, W, h])
-
-        f = pytensor.function([W, h, iIdx, b, oIdx], go, mode=self.mode)
-
-        W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()
-
-        # just make sure that it runs correctly and all the shapes are ok.
-        b_g, W_g, h_g = f(W_val, h_val, iIdx_val, b_val, oIdx_val)
-
-        assert b_g.shape == b_val.shape
-        assert h_g.shape == h_val.shape
-        assert W_g.shape == W_val.shape
-
-    def test_sparseblockouter(self):
-        o = ftensor4()
-        x = ftensor3()
-        y = ftensor3()
-        xIdx = imatrix()
-        yIdx = imatrix()
-
-        out = self.outer_op(o, x, y, xIdx, yIdx)
-
-        f = pytensor.function(
-            [o, x, y, xIdx, yIdx], out, on_unused_input="warn", mode=self.mode
-        )
-
-        (
-            o_val,
-            x_val,
-            y_val,
-            xIdx_val,
-            yIdx_val,
-        ) = self.outer_data()
-
-        th_out = f(o_val, x_val, y_val, xIdx_val, yIdx_val)
-        ref_out = self.outer_numpy(o_val, x_val, y_val, xIdx_val, yIdx_val)
-
-        utt.assert_allclose(ref_out, th_out)
-
-    def test_dot_infershape(self):
-        b = fmatrix()
-        W = ftensor4()
-        h = ftensor3()
-        iIdx = imatrix()
-        oIdx = imatrix()
-
-        self._compile_and_check(
-            [W, h, iIdx, b, oIdx],
-            [sparse_block_dot(W, h, iIdx, b, oIdx)],
-            self.gemv_data(),
-            self.gemv_class,
-        )
-
-    def test_gemv_infershape(self):
-        b = fmatrix()
-        W = ftensor4()
-        h = ftensor3()
-        iIdx = imatrix()
-        oIdx = imatrix()
-
-        self._compile_and_check(
-            [W, h, iIdx, b, oIdx],
-            [self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)],
-            self.gemv_data(),
-            self.gemv_class,
-        )
-
-    def test_outer_infershape(self):
-        o = ftensor4()
-        x = ftensor3()
-        y = ftensor3()
-        xIdx = imatrix()
-        yIdx = imatrix()
-
-        self._compile_and_check(
-            [o, x, y, xIdx, yIdx],
-            [self.outer_op(o, x, y, xIdx, yIdx)],
-            self.outer_data(),
-            self.outer_class,
-        )
diff --git a/tests/tensor/nnet/test_conv.py b/tests/tensor/nnet/test_conv.py
deleted file mode 100644
index cae13390a8..0000000000
--- a/tests/tensor/nnet/test_conv.py
+++ /dev/null
@@ -1,784 +0,0 @@
-import time
-
-import numpy as np
-import pytest
-
-import pytensor
-import pytensor.tensor as at
-from pytensor.compile.mode import Mode
-from pytensor.tensor.exceptions import NotScalarConstantError
-from pytensor.tensor.math import _allclose, exp
-from pytensor.tensor.nnet import conv, conv2d
-from pytensor.tensor.type import dmatrix, dtensor3, dtensor4, dvector, scalar, tensor4
-from tests import unittest_tools as utt
-
-
-@pytest.mark.skipif(
-    pytensor.config.cxx == "",
-    reason="conv2d tests need SciPy or a c++ compiler",
-)
-class TestConv2D(utt.InferShapeTester):
-    # This class contains tests for the legacy 2d convolution,
-    # but will also be inherited from for other implementations
-    mode = None
-    dtype = pytensor.config.floatX
-    # This will be set to the appropriate function in the inherited classes.
-    # The call to `staticmethod` is necessary to prevent Python from passing
-    # `self` as the first argument.
-    conv2d = staticmethod(conv2d)
-
-    def setup_method(self):
-        self.input = tensor4("input", dtype=self.dtype)
-        self.input.name = "default_V"
-        self.filters = tensor4("filters", dtype=self.dtype)
-        self.filters.name = "default_filters"
-        super().setup_method()
-
-    def validate(
-        self,
-        image_shape,
-        filter_shape,
-        border_mode="valid",
-        subsample=(1, 1),
-        N_image_shape=None,
-        N_filter_shape=None,
-        input=None,
-        filters=None,
-        unroll_batch=None,
-        unroll_kern=None,
-        unroll_patch=None,
-        verify_grad=True,
-        should_raise=False,
-    ):
-        """
-        :param image_shape: The constant shape info passed to conv2d.
-        :param filter_shape: The constant shape info passed to conv2d.
-
-        :param N_image_shape: None(default to image_shape) or tuple of
-                              4 elements with the shape of the input image
-
-        :param N_filter_shape: None(default to filter_shape) or tuple
-                               of 4 elements with the shape of the
-                               input filter
-
-        """
-        if N_image_shape is None:
-            N_image_shape = [
-                at.get_scalar_constant_value(at.as_tensor_variable(x))
-                for x in image_shape
-            ]
-        if N_filter_shape is None:
-            N_filter_shape = [
-                at.get_scalar_constant_value(at.as_tensor_variable(x))
-                for x in filter_shape
-            ]
-
-        if input is None:
-            input = self.input
-        if not filters:
-            filters = self.filters
-
-        # PYTENSOR IMPLEMENTATION
-
-        # we create a symbolic function so that verify_grad can work
-        def sym_conv2d(input, filters):
-            # define pytensor graph and function
-            input.name = "input"
-            filters.name = "filters"
-            with pytest.warns(DeprecationWarning):
-                rval = conv.conv2d(
-                    input,
-                    filters,
-                    image_shape,
-                    filter_shape,
-                    border_mode,
-                    subsample,
-                    unroll_batch=unroll_batch,
-                    unroll_kern=unroll_kern,
-                    unroll_patch=unroll_patch,
-                )
-            rval.name = "conv_output"
-            return rval
-
-        output = sym_conv2d(input, filters)
-        output.name = f"conv2d({input.name},{filters.name})"
-        pytensor_conv = pytensor.function([input, filters], output, mode=self.mode)
-
-        # initialize input and compute result
-        image_data = np.random.random(N_image_shape).astype(self.dtype)
-        filter_data = np.random.random(N_filter_shape).astype(self.dtype)
-        try:
-            pytensor_output = pytensor_conv(image_data, filter_data)
-        except ValueError:
-            if not should_raise:
-                raise
-            return
-        else:
-            if should_raise:
-                raise Exception("ConvOp should have generated an error")
-
-        # REFERENCE IMPLEMENTATION
-        s = 1.0
-        orig_image_data = image_data
-        if border_mode != "full":
-            s = -1.0
-        out_shape2d = (
-            np.array(N_image_shape[-2:]) + s * np.array(N_filter_shape[-2:]) - s
-        )
-        out_shape2d = np.ceil(out_shape2d / np.array(subsample))
-        # avoid numpy deprecation
-        out_shape2d = out_shape2d.astype("int32")
-        out_shape = (N_image_shape[0], N_filter_shape[0]) + tuple(out_shape2d)
-        ref_output = np.zeros(out_shape)
-
-        # loop over output feature maps
-        ref_output.fill(0)
-        if border_mode == "full":
-            image_data2 = np.zeros(
-                (
-                    N_image_shape[0],
-                    N_image_shape[1],
-                    N_image_shape[2] + 2 * N_filter_shape[2] - 2,
-                    N_image_shape[3] + 2 * N_filter_shape[3] - 2,
-                )
-            )
-            image_data2[
-                :,
-                :,
-                N_filter_shape[2] - 1 : N_filter_shape[2] - 1 + N_image_shape[2],
-                N_filter_shape[3] - 1 : N_filter_shape[3] - 1 + N_image_shape[3],
-            ] = image_data
-            image_data = image_data2
-            N_image_shape = image_data.shape
-        for bb in range(N_image_shape[0]):
-            for nn in range(N_filter_shape[0]):
-                for im0 in range(N_image_shape[1]):
-                    filter2d = filter_data[nn, im0, :, :]
-                    image2d = image_data[bb, im0, :, :]
-                    for row in range(ref_output.shape[2]):
-                        irow = row * subsample[0]  # image row
-                        for col in range(ref_output.shape[3]):
-                            icol = col * subsample[1]  # image col
-                            ref_output[bb, nn, row, col] += (
-                                image2d[
-                                    irow : irow + N_filter_shape[2],
-                                    icol : icol + N_filter_shape[3],
-                                ]
-                                * filter2d[::-1, ::-1]
-                            ).sum()
-
-        assert _allclose(pytensor_output, ref_output)
-
-        # TEST GRADIENT
-        if verify_grad:
-            utt.verify_grad(sym_conv2d, [orig_image_data, filter_data])
-
-    def test_basic1(self):
-        # Tests that basic convolutions work for odd and even
-        # dimensions of image and filter shapes, as well as rectangular
-        # images and filters.
-
-        self.validate((2, 2, 3, 3), (2, 2, 2, 2), "valid", verify_grad=False)
-
-    def test_basic(self):
-        # Tests that basic convolutions work for odd and even
-        # dimensions of image and filter shapes, as well as rectangular
-        # images and filters.
-
-        self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", verify_grad=False)
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), "valid")
-        self.validate((3, 2, 7, 5), (5, 2, 3, 2), "valid", verify_grad=False)
-        self.validate((3, 2, 8, 8), (4, 2, 5, 5), "full", verify_grad=False)
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), "full")
-        # test filter same size as input
-
-    def test_uint_image_shape_datatype(self):
-        # Tests for uint datatype in image_shape.
-
-        self.validate((2, 2, 3, np.uint8(3)), (3, 2, 3, 3), "valid", verify_grad=False)
-        self.validate((np.uint16(2), 2, 3, 3), (3, 2, 3, 3), "valid", verify_grad=False)
-        self.validate((2, np.uint32(2), 3, 3), (3, 2, 3, 3), "valid", verify_grad=False)
-
-    def test_uint_filter_shape_datatype(self):
-        # Tests for uint datatype in filter_shape
-
-        self.validate((3, 2, 3, 3), (2, 2, 3, np.uint8(3)), "valid", verify_grad=False)
-        self.validate((3, 2, 3, 3), (np.uint16(2), 2, 3, 3), "valid", verify_grad=False)
-        self.validate((3, 2, 3, 3), (2, np.uint32(2), 3, 3), "valid", verify_grad=False)
-
-    def test_img_kernel_same_shape(self):
-        self.validate((3, 2, 3, 3), (4, 2, 3, 3), "full")
-        self.validate((3, 2, 3, 3), (4, 2, 3, 3), "valid")
-
-    def test_unroll_patch_true(self):
-        # Test basic convs with True.
-
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), "valid", unroll_patch=True)
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), "full", unroll_patch=True)
-        self.validate(
-            (3, 2, 3, 3), (4, 2, 3, 3), "valid", unroll_patch=True, verify_grad=False
-        )
-
-    def test_unroll_patch_false(self):
-        # Test basic convs with False.
-
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), "valid", unroll_patch=False)
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), "full", unroll_patch=False)
-        self.validate(
-            (3, 2, 3, 3), (4, 2, 3, 3), "valid", unroll_patch=False, verify_grad=False
-        )
-
-    def test_unroll_patch_true_fail(self):
-        # Test basic convs with True.
-
-        self.validate(
-            (3, 2, 7, 5),
-            (5, 2, 2, 3),
-            "valid",
-            unroll_patch=True,
-            N_image_shape=(1, 3, 3, 3),
-            N_filter_shape=(6, 3, 2, 2),
-            should_raise=True,
-        )
-        self.validate(
-            (3, 2, 7, 5),
-            (5, 2, 2, 3),
-            "full",
-            unroll_patch=True,
-            N_image_shape=(1, 3, 3, 3),
-            N_filter_shape=(6, 3, 2, 2),
-            should_raise=True,
-        )
-        self.validate(
-            (3, 2, 3, 3),
-            (4, 2, 3, 3),
-            "valid",
-            unroll_patch=True,
-            N_image_shape=(1, 3, 3, 3),
-            N_filter_shape=(6, 3, 2, 2),
-            should_raise=True,
-        )
-
-    def test_unroll_special(self):
-        # (unroll_kern, unroll_batch) in (0,1),(1,0) is special case.
-
-        self.validate((6, 2, 3, 3), (3, 2, 2, 2), "valid", unroll_batch=1)
-
-    def test_unroll_batch(self):
-        # Test mini-batch unrolling for various legal values.
-
-        # mini-batch of size 6 is multiple of 2 and 3. Should work.
-        self.validate(
-            (6, 2, 3, 3), (3, 2, 2, 2), "valid", unroll_batch=2, verify_grad=False
-        )
-        self.validate(
-            (6, 2, 3, 3), (3, 2, 2, 2), "valid", unroll_batch=3, verify_grad=False
-        )
-
-    def test_unroll_kern(self):
-        # Test kernel unrolling for various legal values.
-
-        # 6 filters is a multiple of 2 and 3. Should work.
-        self.validate(
-            (2, 3, 3, 3), (6, 3, 2, 2), "valid", unroll_kern=2, verify_grad=False
-        )
-        self.validate(
-            (2, 3, 3, 3), (6, 3, 2, 2), "valid", unroll_kern=3, verify_grad=False
-        )
-
-    def test_unroll_batch_kern(self):
-        # Test mini-batch unrolling with kernel unrolling for various
-        # legal values.
-
-        # mini-batch of size 6 is multiple of 2 and 3. Should work.
-        self.validate(
-            (6, 2, 3, 3),
-            (3, 2, 2, 2),
-            "valid",
-            unroll_batch=2,
-            unroll_kern=3,
-            verify_grad=False,
-        )
-        self.validate(
-            (6, 2, 3, 3),
-            (3, 2, 2, 2),
-            "valid",
-            unroll_batch=3,
-            unroll_kern=3,
-            verify_grad=False,
-        )
-        # 6 filters is a multiple of 2 and 3. Should work.
-        self.validate(
-            (2, 3, 3, 3),
-            (6, 3, 2, 2),
-            "valid",
-            unroll_batch=2,
-            unroll_kern=2,
-            verify_grad=False,
-        )
-        self.validate(
-            (2, 3, 3, 3),
-            (6, 3, 2, 2),
-            "valid",
-            unroll_batch=2,
-            unroll_kern=3,
-            verify_grad=False,
-        )
-
-    def test_unroll_batch_kern_fail(self):
-        # Test mini-batch unrolling with kernel unrolling for various
-        # legal values, but pass bad input.  All those test must
-        # generate errors
-
-        # mini-batch of size 6 is multiple of 2 and 3. Should work.
-        self.validate(
-            (6, 2, 3, 3),
-            (3, 2, 2, 2),
-            "valid",
-            unroll_batch=2,
-            unroll_kern=3,
-            N_image_shape=(7, 2, 3, 3),
-            N_filter_shape=(3, 2, 2, 2),
-            should_raise=True,
-        )
-        self.validate(
-            (6, 2, 3, 3),
-            (3, 2, 2, 2),
-            "valid",
-            unroll_batch=3,
-            unroll_kern=3,
-            N_image_shape=(6, 2, 3, 3),
-            N_filter_shape=(4, 2, 2, 2),
-            should_raise=True,
-        )
-        self.validate(
-            (2, 3, 3, 3),
-            (6, 3, 2, 2),
-            "valid",
-            unroll_batch=2,
-            unroll_kern=2,
-            N_image_shape=(1, 3, 3, 3),
-            N_filter_shape=(6, 3, 2, 2),
-            should_raise=True,
-        )
-        self.validate(
-            (2, 3, 3, 3),
-            (6, 3, 2, 2),
-            "valid",
-            unroll_batch=2,
-            unroll_kern=3,
-            N_image_shape=(2, 3, 3, 3),
-            N_filter_shape=(5, 3, 2, 2),
-            should_raise=True,
-        )
-
-    def test_subsample(self):
-        # Tests convolution where subsampling != (1,1)
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), "full", subsample=(2, 2))
-
-        # Fails as of 2012-07-11
-        with pytest.raises(NotImplementedError):
-            self.validate((1, 1, 6, 6), (1, 1, 3, 3), "full", subsample=(3, 3))
-
-        # Fails as of 2017-08-10
-        with pytest.raises(NotImplementedError):
-            self.validate((3, 2, 7, 5), (5, 2, 2, 3), "valid", subsample=(2, 2))
-        with pytest.raises(NotImplementedError):
-            self.validate((3, 2, 7, 5), (5, 2, 2, 3), "valid", subsample=(2, 1))
-        with pytest.raises(NotImplementedError):
-            self.validate((1, 1, 6, 6), (1, 1, 3, 3), "valid", subsample=(3, 3))
-
-    def test_shape_Constant_tensor(self):
-        # Tests convolution where the {image,filter}_shape is a Constant tensor.
-
-        as_t = at.as_tensor_variable
-        self.validate((as_t(3), as_t(2), as_t(7), as_t(5)), (5, 2, 2, 3), "valid")
-        self.validate(as_t([3, 2, 7, 5]), (5, 2, 2, 3), "valid")
-        self.validate(as_t((3, 2, 7, 5)), (5, 2, 2, 3), "valid")
-        self.validate((3, 2, 7, 5), (as_t(5), as_t(2), as_t(2), as_t(3)), "valid")
-        self.validate((3, 2, 7, 5), as_t([5, 2, 2, 3]), "valid")
-        self.validate((3, 2, 7, 5), as_t((5, 2, 2, 3)), "valid")
-        self.validate(as_t([3, 2, 7, 5]), as_t([5, 2, 2, 3]), "full")
-
-    def test_invalid_filter_shape(self):
-        # Tests scenario where filter_shape[1] != input_shape[1]
-
-        with pytest.raises(AssertionError):
-            self.validate((3, 2, 8, 8), (4, 3, 5, 5), "valid")
-
-    def test_invalid_input_shape(self):
-        # Tests that when the shape given at build time is not the same as
-        # run time we raise an error
-
-        for unroll_batch in [None, 1, 3]:
-            for unroll_kern in [None, 2, 4]:
-                for unroll_patch in [None, True, False]:
-                    for mode in ["valid", "full"]:
-                        with pytest.raises(ValueError):
-                            self.validate(
-                                (3, 2, 8, 8),
-                                (4, 2, 5, 5),
-                                mode,
-                                N_image_shape=(2, 2, 8, 8),
-                                unroll_batch=unroll_batch,
-                                unroll_kern=unroll_kern,
-                                unroll_patch=unroll_patch,
-                            )
-                        with pytest.raises(ValueError):
-                            self.validate(
-                                (3, 2, 8, 8),
-                                (4, 2, 5, 5),
-                                mode,
-                                N_image_shape=(3, 1, 8, 8),
-                                unroll_batch=unroll_batch,
-                                unroll_kern=unroll_kern,
-                                unroll_patch=unroll_patch,
-                            )
-                        with pytest.raises(ValueError):
-                            self.validate(
-                                (3, 2, 8, 8),
-                                (4, 2, 5, 5),
-                                mode,
-                                N_image_shape=(3, 2, 7, 8),
-                                unroll_batch=unroll_batch,
-                                unroll_kern=unroll_kern,
-                                unroll_patch=unroll_patch,
-                            )
-                        with pytest.raises(ValueError):
-                            self.validate(
-                                (3, 2, 8, 8),
-                                (4, 2, 5, 5),
-                                mode,
-                                N_image_shape=(3, 2, 8, 7),
-                                unroll_batch=unroll_batch,
-                                unroll_kern=unroll_kern,
-                                unroll_patch=unroll_patch,
-                            )
-
-                        with pytest.raises(ValueError):
-                            self.validate(
-                                (3, 2, 8, 8),
-                                (4, 2, 5, 5),
-                                mode,
-                                N_filter_shape=(3, 2, 5, 5),
-                                unroll_batch=unroll_batch,
-                                unroll_kern=unroll_kern,
-                                unroll_patch=unroll_patch,
-                            )
-                        with pytest.raises(ValueError):
-                            self.validate(
-                                (3, 2, 8, 8),
-                                (4, 2, 5, 5),
-                                mode,
-                                N_filter_shape=(4, 1, 5, 5),
-                                unroll_batch=unroll_batch,
-                                unroll_kern=unroll_kern,
-                                unroll_patch=unroll_patch,
-                            )
-                        with pytest.raises(ValueError):
-                            self.validate(
-                                (3, 2, 8, 8),
-                                (4, 2, 5, 5),
-                                mode,
-                                N_filter_shape=(4, 2, 6, 5),
-                                unroll_batch=unroll_batch,
-                                unroll_kern=unroll_kern,
-                                unroll_patch=unroll_patch,
-                            )
-                        with pytest.raises(ValueError):
-                            self.validate(
-                                (3, 2, 8, 8),
-                                (4, 2, 5, 5),
-                                mode,
-                                N_filter_shape=(4, 2, 5, 6),
-                                unroll_batch=unroll_batch,
-                                unroll_kern=unroll_kern,
-                                unroll_patch=unroll_patch,
-                            )
-
-    def test_missing_info(self):
-        # Test convolutions for various pieces of missing info.
-
-        self.validate(
-            None, None, N_image_shape=(3, 2, 8, 8), N_filter_shape=(4, 2, 5, 5)
-        )
-        self.validate(
-            (3, 2, None, None),
-            None,
-            N_image_shape=(3, 2, 8, 8),
-            N_filter_shape=(4, 2, 5, 5),
-        )
-        self.validate(
-            (None, 2, None, None),
-            (None, 2, 5, 5),
-            N_image_shape=(3, 2, 8, 8),
-            N_filter_shape=(4, 2, 5, 5),
-        )
-        self.validate(
-            (3, 2, 8, 8),
-            (4, 2, None, 5),
-            N_image_shape=(3, 2, 8, 8),
-            N_filter_shape=(4, 2, 5, 5),
-        )
-        self.validate(
-            (3, 2, 8, 8),
-            (4, 2, 5, None),
-            N_image_shape=(3, 2, 8, 8),
-            N_filter_shape=(4, 2, 5, 5),
-        )
-
-    def test_wrong_info(self):
-        # Test convolutions when we don't give a constant as shape information
-
-        i = pytensor.scalar.basic.int32()
-        with pytest.raises(NotScalarConstantError):
-            self.validate(
-                (3, 2, 8, i),
-                (4, 2, 5, 5),
-                N_image_shape=(3, 2, 8, 8),
-                N_filter_shape=(4, 2, 5, 5),
-            )
-        with pytest.raises(NotScalarConstantError):
-            self.validate(
-                (3, 2, 8, 8),
-                (4, 2, 5, i),
-                N_image_shape=(3, 2, 8, 8),
-                N_filter_shape=(4, 2, 5, 5),
-            )
-
-    def test_full_mode(self):
-        # Tests basic convolution in full mode and case where filter
-        # is larger than the input image.
-
-        self.validate((3, 2, 5, 5), (4, 2, 8, 8), "full")
-
-        def f():
-            self.validate((3, 2, 5, 5), (4, 2, 8, 8), "valid")
-
-        with pytest.raises(Exception):
-            f()
-
-    def test_wrong_input(self):
-        # Make sure errors are raised when image and kernel are not 4D tensors
-
-        with pytest.raises(Exception):
-            self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", input=dmatrix())
-        with pytest.raises(Exception):
-            self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", filters=dvector())
-        with pytest.raises(Exception):
-            self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", input=dtensor3())
-
-    def test_gcc_crash(self):
-        # gcc 4.3.0 20080428 (Red Hat 4.3.0-8)
-        #
-        # crashed in this following case. I changed the c code to don't hit
-        # gcc bug. So it should not crash anymore
-
-        self.validate((1, 10, 213, 129), (46, 10, 212, 1), "valid", verify_grad=False)
-
-    def speed(self):
-        n_calls = 20000
-        print("n_calls", n_calls)
-        for border_mode in ["valid", "full"]:
-            print()
-            print(border_mode)
-            for openmp in [False, True]:
-                print("OpenMP", openmp)
-                image_shapes = [
-                    (1, 5, 6, 6),
-                    (10, 5, 6, 6)
-                    # (10, 10, 16, 16),
-                    # (10, 10, 32, 32)]
-                ]
-                print("image_shape", image_shapes)
-                for image_shape in image_shapes:
-                    filter_shapes = [(1, 5, 4, 4), (2, 5, 4, 4), (5, 5, 4, 4)]
-                    print("filter_shapes", filter_shapes)
-                    for filter_shape in filter_shapes:
-
-                        input = pytensor.shared(np.random.random(image_shape))
-                        filters = pytensor.shared(np.random.random(filter_shape))
-
-                        with pytest.warns(DeprecationWarning):
-                            output = conv.conv2d(
-                                input,
-                                filters,
-                                image_shape,
-                                filter_shape,
-                                border_mode,
-                                unroll_patch=True,
-                                openmp=openmp,
-                            )
-                        mode = Mode(
-                            linker=pytensor.link.vm.VMLinker(
-                                allow_gc=False, use_cloop=True
-                            )
-                        )
-                        pytensor_conv = pytensor.function([], output, mode=mode)
-                        t1 = time.perf_counter()
-                        pytensor_conv.vm(n_calls=n_calls)
-                        t2 = time.perf_counter()
-                        print(t2 - t1, end=" ")
-                    print()
-
-    def test_infer_shape(self):
-        # Note: infer_shape is incomplete and thus input and filter shapes
-        # must be provided explicitly
-
-        rng = np.random.default_rng(280284)
-
-        def rand(*shape):
-            r = np.asarray(rng.random(shape), dtype="float64")
-            return r * 2 - 1
-
-        adtens = dtensor4()
-        bdtens = dtensor4()
-        aivec_val = [4, 5, 6, 3]
-        bivec_val = [7, 5, 3, 2]
-        adtens_val = rand(*aivec_val)
-        bdtens_val = rand(*bivec_val)
-        with pytest.warns(DeprecationWarning):
-            self._compile_and_check(
-                [adtens, bdtens],
-                [
-                    conv.conv2d(
-                        adtens, bdtens, aivec_val, bivec_val, border_mode="valid"
-                    )
-                ],
-                [adtens_val, bdtens_val],
-                conv.ConvOp,
-                excluding=["conv_gemm"],
-            )
-
-        with pytest.warns(DeprecationWarning):
-            self._compile_and_check(
-                [adtens, bdtens],
-                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val, border_mode="full")],
-                [adtens_val, bdtens_val],
-                conv.ConvOp,
-                excluding=["conv_gemm"],
-            )
-
-        aivec_val = [6, 2, 8, 3]
-        bivec_val = [4, 2, 5, 3]
-        adtens_val = rand(*aivec_val)
-        bdtens_val = rand(*bivec_val)
-        with pytest.warns(DeprecationWarning):
-            self._compile_and_check(
-                [adtens, bdtens],
-                [
-                    conv.conv2d(
-                        adtens, bdtens, aivec_val, bivec_val, border_mode="valid"
-                    )
-                ],
-                [adtens_val, bdtens_val],
-                conv.ConvOp,
-                excluding=["conv_gemm"],
-            )
-
-        with pytest.warns(DeprecationWarning):
-            self._compile_and_check(
-                [adtens, bdtens],
-                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val, border_mode="full")],
-                [adtens_val, bdtens_val],
-                conv.ConvOp,
-                excluding=["conv_gemm"],
-            )
-
-        aivec_val = [3, 6, 7, 5]
-        bivec_val = [5, 6, 3, 2]
-        adtens_val = rand(*aivec_val)
-        bdtens_val = rand(*bivec_val)
-        with pytest.warns(DeprecationWarning):
-            self._compile_and_check(
-                [adtens, bdtens],
-                [
-                    conv.conv2d(
-                        adtens, bdtens, aivec_val, bivec_val, border_mode="valid"
-                    )
-                ],
-                [adtens_val, bdtens_val],
-                conv.ConvOp,
-                excluding=["conv_gemm"],
-            )
-
-        with pytest.warns(DeprecationWarning):
-            self._compile_and_check(
-                [adtens, bdtens],
-                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val, border_mode="full")],
-                [adtens_val, bdtens_val],
-                conv.ConvOp,
-                excluding=["conv_gemm"],
-            )
-
-        aivec_val = [3, 6, 7, 5]
-        bivec_val = [5, 6, 2, 3]
-        adtens_val = rand(*aivec_val)
-        bdtens_val = rand(*bivec_val)
-        with pytest.warns(DeprecationWarning):
-            self._compile_and_check(
-                [adtens, bdtens],
-                [
-                    conv.conv2d(
-                        adtens, bdtens, aivec_val, bivec_val, border_mode="valid"
-                    )
-                ],
-                [adtens_val, bdtens_val],
-                conv.ConvOp,
-                excluding=["conv_gemm"],
-            )
-
-        with pytest.warns(DeprecationWarning):
-            self._compile_and_check(
-                [adtens, bdtens],
-                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val, border_mode="full")],
-                [adtens_val, bdtens_val],
-                conv.ConvOp,
-                excluding=["conv_gemm"],
-            )
-
-        aivec_val = [5, 2, 4, 3]
-        bivec_val = [6, 2, 4, 3]
-        adtens_val = rand(*aivec_val)
-        bdtens_val = rand(*bivec_val)
-        with pytest.warns(DeprecationWarning):
-            self._compile_and_check(
-                [adtens, bdtens],
-                [
-                    conv.conv2d(
-                        adtens, bdtens, aivec_val, bivec_val, border_mode="valid"
-                    )
-                ],
-                [adtens_val, bdtens_val],
-                conv.ConvOp,
-                excluding=["conv_gemm"],
-            )
-
-        with pytest.warns(DeprecationWarning):
-            self._compile_and_check(
-                [adtens, bdtens],
-                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val, border_mode="full")],
-                [adtens_val, bdtens_val],
-                conv.ConvOp,
-                excluding=["conv_gemm"],
-            )
-
-
-# Test that broadcasting of gradients works correctly when using the
-# nnet.conv2d() interface. This was reported in #3763, and uses the example
-# code from that ticket.
-def test_broadcast_grad():
-    x1 = tensor4("x")
-    sigma = scalar("sigma")
-    window_radius = 3
-
-    filter_1d = at.arange(-window_radius, window_radius + 1)
-    filter_1d = filter_1d.astype(pytensor.config.floatX)
-    filter_1d = exp(-0.5 * filter_1d**2 / sigma**2)
-    filter_1d = filter_1d / filter_1d.sum()
-
-    filter_W = filter_1d.dimshuffle(["x", "x", 0, "x"])
-
-    y = conv2d(x1, filter_W, border_mode="full", filter_shape=[1, 1, None, None])
-    # TODO FIXME: Make this a real test and `assert` something
-    pytensor.grad(y.sum(), sigma)
diff --git a/tests/tensor/nnet/test_conv3d2d.py b/tests/tensor/nnet/test_conv3d2d.py
deleted file mode 100644
index 6bd2822086..0000000000
--- a/tests/tensor/nnet/test_conv3d2d.py
+++ /dev/null
@@ -1,237 +0,0 @@
-import numpy as np
-import pytest
-
-import pytensor
-
-
-try:
-    from scipy import ndimage
-except ImportError:
-    ndimage = None
-
-import tests.unittest_tools as utt
-from pytensor.compile.sharedvalue import shared
-from pytensor.graph.rewriting.basic import check_stack_trace
-from pytensor.tensor.nnet.conv3d2d import (
-    DiagonalSubtensor,
-    IncDiagonalSubtensor,
-    conv3d,
-    get_diagonal_subtensor_view,
-)
-
-
-def test_get_diagonal_subtensor_view(wrap=lambda a: a):
-    x = np.arange(20).reshape(5, 4).astype("float32")
-    x = wrap(x)
-    xv01 = get_diagonal_subtensor_view(x, 0, 1)
-
-    # test that it works in 2d
-    assert np.array_equal(np.asarray(xv01), [[12, 9, 6, 3], [16, 13, 10, 7]])
-
-    x = np.arange(24).reshape(4, 3, 2)
-    xv01 = get_diagonal_subtensor_view(x, 0, 1)
-    xv02 = get_diagonal_subtensor_view(x, 0, 2)
-    xv12 = get_diagonal_subtensor_view(x, 1, 2)
-
-    # print 'x', x
-    # print 'xv01', xv01
-    # print 'xv02', xv02
-    assert np.array_equal(
-        np.asarray(xv01), [[[12, 13], [8, 9], [4, 5]], [[18, 19], [14, 15], [10, 11]]]
-    )
-
-    assert np.array_equal(
-        np.asarray(xv02),
-        [
-            [[6, 1], [8, 3], [10, 5]],
-            [[12, 7], [14, 9], [16, 11]],
-            [[18, 13], [20, 15], [22, 17]],
-        ],
-    )
-
-    # diagonal views of each leading matrix is the same
-    # as the slices out of the diagonal view of the entire 3d tensor
-    for xi, xvi in zip(x, xv12):
-        assert np.array_equal(xvi, get_diagonal_subtensor_view(xi, 0, 1))
-
-
-def pyconv3d(signals, filters, border_mode="valid"):
-    Ns, Ts, C, Hs, Ws = signals.shape
-    Nf, Tf, C, Hf, Wf = filters.shape
-
-    # if border_mode is not 'valid', the signals need zero-padding
-    if border_mode == "full":
-        Tpad = Tf - 1
-        Hpad = Hf - 1
-        Wpad = Wf - 1
-    elif border_mode == "half":
-        Tpad = Tf // 2
-        Hpad = Hf // 2
-        Wpad = Wf // 2
-    else:
-        Tpad = 0
-        Hpad = 0
-        Wpad = 0
-
-    if Tpad > 0 or Hpad > 0 or Wpad > 0:
-        # zero-pad signals
-        signals_padded = np.zeros(
-            (Ns, Ts + 2 * Tpad, C, Hs + 2 * Hpad, Ws + 2 * Wpad), "float32"
-        )
-        signals_padded[
-            :, Tpad : (Ts + Tpad), :, Hpad : (Hs + Hpad), Wpad : (Ws + Wpad)
-        ] = signals
-        Ns, Ts, C, Hs, Ws = signals_padded.shape
-        signals = signals_padded
-
-    Tf2 = Tf // 2
-    Hf2 = Hf // 2
-    Wf2 = Wf // 2
-
-    rval = np.zeros((Ns, Ts - Tf + 1, Nf, Hs - Hf + 1, Ws - Wf + 1))
-    for ns in range(Ns):
-        for nf in range(Nf):
-            for c in range(C):
-                s_i = signals[ns, :, c, :, :]
-                f_i = filters[nf, :, c, :, :]
-                r_i = rval[ns, :, nf, :, :]
-                o_i = ndimage.convolve(s_i, f_i, mode="constant", cval=1)
-                o_i_sh0 = o_i.shape[0]
-                # print s_i.shape, f_i.shape, r_i.shape, o_i.shape
-                r_i += o_i[Tf2 : o_i_sh0 - Tf2, Hf2:-Hf2, Wf2:-Wf2]
-    return rval
-
-
-def check_diagonal_subtensor_view_traces(fn):
-    assert check_stack_trace(fn, ops_to_check=(DiagonalSubtensor, IncDiagonalSubtensor))
-
-
-@pytest.mark.skipif(
-    ndimage is None or not pytensor.config.cxx,
-    reason="conv3d2d tests need SciPy and a c++ compiler",
-)
-@pytest.mark.parametrize("border_mode", ("valid", "full", "half"))
-def test_conv3d(border_mode):
-    if pytensor.config.mode == "FAST_COMPILE":
-        mode = pytensor.compile.mode.get_mode("FAST_RUN")
-    else:
-        mode = pytensor.compile.mode.get_default_mode()
-
-    Ns, Ts, C, Hs, Ws = 3, 10, 3, 32, 32
-    Nf, Tf, C, Hf, Wf = 32, 5, 3, 5, 5
-
-    signals = (
-        np.arange(Ns * Ts * C * Hs * Ws).reshape(Ns, Ts, C, Hs, Ws).astype("float32")
-    )
-    filters = (
-        np.arange(Nf * Tf * C * Hf * Wf).reshape(Nf, Tf, C, Hf, Wf).astype("float32")
-    )
-
-    # t0 = time.perf_counter()
-    pyres = pyconv3d(signals, filters, border_mode)
-    # print(time.perf_counter() - t0)
-
-    s_signals = shared(signals)
-    s_filters = shared(filters)
-    s_output = shared(signals * 0)
-
-    out = conv3d(
-        s_signals,
-        s_filters,
-        signals_shape=signals.shape,
-        filters_shape=filters.shape,
-        border_mode=border_mode,
-    )
-
-    newconv3d = pytensor.function([], [], updates={s_output: out}, mode=mode)
-
-    check_diagonal_subtensor_view_traces(newconv3d)
-    # t0 = time.perf_counter()
-    newconv3d()
-    # print(time.perf_counter() - t0)
-    utt.assert_allclose(pyres, s_output.get_value(borrow=True))
-    gsignals, gfilters = pytensor.grad(out.sum(), [s_signals, s_filters])
-    gnewconv3d = pytensor.function(
-        [],
-        [],
-        updates=[(s_filters, gfilters), (s_signals, gsignals)],
-        mode=mode,
-        name="grad",
-    )
-    check_diagonal_subtensor_view_traces(gnewconv3d)
-
-    # t0 = time.perf_counter()
-    gnewconv3d()
-    # print("grad", time.perf_counter() - t0)
-
-    Ns, Ts, C, Hs, Ws = 3, 3, 3, 5, 5
-    Nf, Tf, C, Hf, Wf = 4, 2, 3, 2, 2
-
-    rng = np.random.default_rng(280284)
-
-    signals = rng.random((Ns, Ts, C, Hs, Ws)).astype("float32")
-    filters = rng.random((Nf, Tf, C, Hf, Wf)).astype("float32")
-    utt.verify_grad(
-        lambda s, f: conv3d(s, f, border_mode=border_mode),
-        [signals, filters],
-        eps=1e-1,
-        mode=mode,
-    )
-
-    # Additional Test that covers the case of patched implementation for filter with Tf=1
-    Ns, Ts, C, Hs, Ws = 3, 10, 3, 32, 32
-    Nf, Tf, C, Hf, Wf = 32, 1, 3, 5, 5
-
-    signals = (
-        np.arange(Ns * Ts * C * Hs * Ws).reshape(Ns, Ts, C, Hs, Ws).astype("float32")
-    )
-    filters = (
-        np.arange(Nf * Tf * C * Hf * Wf).reshape(Nf, Tf, C, Hf, Wf).astype("float32")
-    )
-
-    # t0 = time.perf_counter()
-    pyres = pyconv3d(signals, filters, border_mode)
-    # print(time.perf_counter() - t0)
-
-    s_signals = shared(signals)
-    s_filters = shared(filters)
-    s_output = shared(signals * 0)
-
-    out = conv3d(
-        s_signals,
-        s_filters,
-        signals_shape=signals.shape,
-        filters_shape=filters.shape,
-        border_mode=border_mode,
-    )
-
-    newconv3d = pytensor.function([], [], updates={s_output: out}, mode=mode)
-
-    # t0 = time.perf_counter()
-    newconv3d()
-    # print(time.perf_counter() - t0)
-    utt.assert_allclose(pyres, s_output.get_value(borrow=True))
-    gsignals, gfilters = pytensor.grad(out.sum(), [s_signals, s_filters])
-    gnewconv3d = pytensor.function(
-        [],
-        [],
-        updates=[(s_filters, gfilters), (s_signals, gsignals)],
-        mode=mode,
-        name="grad",
-    )
-
-    # t0 = time.perf_counter()
-    gnewconv3d()
-    # print("grad", time.perf_counter() - t0)
-
-    Ns, Ts, C, Hs, Ws = 3, 3, 3, 5, 5
-    Nf, Tf, C, Hf, Wf = 4, 1, 3, 2, 2
-
-    signals = rng.random((Ns, Ts, C, Hs, Ws)).astype("float32")
-    filters = rng.random((Nf, Tf, C, Hf, Wf)).astype("float32")
-    utt.verify_grad(
-        lambda s, f: conv3d(s, f, border_mode=border_mode),
-        [signals, filters],
-        eps=1e-1,
-        mode=mode,
-    )
diff --git a/tests/tensor/nnet/test_corr.py b/tests/tensor/nnet/test_corr.py
deleted file mode 100644
index 7d35a78b4b..0000000000
--- a/tests/tensor/nnet/test_corr.py
+++ /dev/null
@@ -1,582 +0,0 @@
-import numpy as np
-import pytest
-
-import pytensor
-import pytensor.tensor as at
-from pytensor.tensor.nnet import corr
-from pytensor.tensor.type import dmatrix, dtensor3, dtensor4, dvector, tensor4
-from tests import unittest_tools as utt
-from tests.tensor.nnet.test_abstract_conv import (
-    TestAsymmetricPadding,
-    TestCausalConv,
-    TestGroupedConvNoOptim,
-    TestUnsharedConv,
-)
-
-
-@pytest.mark.skipif(
-    pytensor.config.cxx == "",
-    reason="SciPy and cxx needed",
-)
-class TestCorr2D(utt.InferShapeTester):
-    if pytensor.config.mode == "FAST_COMPILE":
-        mode = pytensor.compile.get_mode("FAST_RUN")
-    else:
-        mode = None
-    dtype = pytensor.config.floatX
-
-    def setup_method(self):
-        self.input = tensor4("input", dtype=self.dtype)
-        self.input.name = "default_V"
-        self.filters = tensor4("filters", dtype=self.dtype)
-        self.filters.name = "default_filters"
-        # This tests can run even when pytensor.config.blas__ldflags is empty.
-        super().setup_method()
-
-    def validate(
-        self,
-        image_shape,
-        filter_shape,
-        border_mode="valid",
-        subsample=(1, 1),
-        input=None,
-        filters=None,
-        verify_grad=True,
-        non_contiguous=False,
-        filter_dilation=(1, 1),
-    ):
-        """
-        :param image_shape: The constant shape info passed to corrMM.
-        :param filter_shape: The constant shape info passed to corrMM.
-        """
-        if not pytensor.config.cxx:
-            pytest.skip("Need cxx to test conv2d")
-        N_image_shape = [
-            at.get_scalar_constant_value(at.as_tensor_variable(x)) for x in image_shape
-        ]
-        N_filter_shape = [
-            at.get_scalar_constant_value(at.as_tensor_variable(x)) for x in filter_shape
-        ]
-
-        if input is None:
-            input = self.input
-        if filters is None:
-            filters = self.filters
-
-        # PYTENSOR IMPLEMENTATION
-
-        # we create a symbolic function so that verify_grad can work
-        def sym_CorrMM(input, filters):
-            # define pytensor graph and function
-            input.name = "input"
-            filters.name = "filters"
-            rval = corr.CorrMM(border_mode, subsample, filter_dilation)(input, filters)
-            rval.name = "corr_output"
-            return rval
-
-        output = sym_CorrMM(input, filters)
-        output.name = f"CorrMM()({input.name},{filters.name})"
-        pytensor_corr = pytensor.function([input, filters], output, mode=self.mode)
-
-        # initialize input and compute result
-        image_data = np.random.random(N_image_shape).astype(self.dtype)
-        filter_data = np.random.random(N_filter_shape).astype(self.dtype)
-        if non_contiguous:
-            image_data = np.transpose(image_data, axes=(0, 1, 3, 2))
-            image_data = image_data.copy()
-            image_data = np.transpose(image_data, axes=(0, 1, 3, 2))
-            filter_data = np.transpose(filter_data, axes=(0, 1, 3, 2))
-            filter_data = filter_data.copy()
-            filter_data = np.transpose(filter_data, axes=(0, 1, 3, 2))
-            assert not image_data.flags["CONTIGUOUS"]
-            assert not filter_data.flags["CONTIGUOUS"]
-
-        pytensor_output = pytensor_corr(image_data, filter_data)
-
-        # REFERENCE IMPLEMENTATION
-        # Testing correlation, not convolution. Reverse filters.
-        filter_data_corr = np.array(filter_data[:, :, ::-1, ::-1], copy=True, order="C")
-        orig_image_data = image_data
-        img_shape2d = np.array(N_image_shape[-2:])
-        fil_shape2d = np.array(N_filter_shape[-2:])
-        dil_shape2d = np.array(filter_dilation)
-        dil_fil_shape2d = (fil_shape2d - 1) * dil_shape2d + 1
-        subsample2d = np.array(subsample)
-        if border_mode == "full":
-            padHW = dil_fil_shape2d - 1
-        elif border_mode == "valid":
-            padHW = np.array([0, 0])
-        elif border_mode == "half":
-            padHW = np.floor(dil_fil_shape2d / 2).astype("int32")
-        elif isinstance(border_mode, tuple):
-            padHW = np.array(border_mode)
-        elif isinstance(border_mode, int):
-            padHW = np.array([border_mode, border_mode])
-        else:
-            raise NotImplementedError(f"Unsupported border_mode {border_mode}")
-        out_shape2d = (
-            np.floor((img_shape2d + 2 * (padHW) - dil_fil_shape2d) / subsample2d) + 1
-        )
-        # avoid numpy deprecation
-        out_shape2d = out_shape2d.astype("int32")
-        out_shape = (N_image_shape[0], N_filter_shape[0]) + tuple(out_shape2d)
-        ref_output = np.zeros(out_shape)
-
-        # loop over output feature maps
-        ref_output.fill(0)
-        image_data2 = np.zeros(
-            (
-                N_image_shape[0],
-                N_image_shape[1],
-                N_image_shape[2] + 2 * padHW[0],
-                N_image_shape[3] + 2 * padHW[1],
-            )
-        )
-        image_data2[
-            :,
-            :,
-            padHW[0] : padHW[0] + N_image_shape[2],
-            padHW[1] : padHW[1] + N_image_shape[3],
-        ] = image_data
-        image_data = image_data2
-        N_image_shape = image_data.shape
-        for bb in range(N_image_shape[0]):
-            for nn in range(N_filter_shape[0]):
-                for im0 in range(N_image_shape[1]):
-                    filter2d = filter_data_corr[nn, im0, :, :]
-                    image2d = image_data[bb, im0, :, :]
-                    for row in range(ref_output.shape[2]):
-                        irow = row * subsample[0]  # image row
-                        for col in range(ref_output.shape[3]):
-                            icol = col * subsample[1]  # image col
-                            ref_output[bb, nn, row, col] += (
-                                image2d[
-                                    irow : irow
-                                    + dil_fil_shape2d[0] : filter_dilation[0],
-                                    icol : icol
-                                    + dil_fil_shape2d[1] : filter_dilation[1],
-                                ]
-                                * filter2d[::-1, ::-1]
-                            ).sum()
-
-        utt.assert_allclose(ref_output, pytensor_output)
-
-        # TEST GRADIENT
-        if verify_grad:
-            utt.verify_grad(sym_CorrMM, [orig_image_data, filter_data], mode=self.mode)
-
-    @pytest.mark.slow
-    def test_basic(self):
-        # Tests that basic correlations work for odd and even
-        # dimensions of image and filter shapes, as well as rectangular
-        # images and filters.
-
-        border_modes = ["valid", "full", "half", (1, 1), (2, 1), (1, 2), (3, 3), 1]
-        img_shapes = [
-            (2, 2, 3, 3),
-            (3, 2, 8, 8),
-            (3, 2, 7, 5),
-            (3, 2, 7, 5),
-            (3, 2, 8, 8),
-            (3, 2, 7, 5),
-        ]
-        fil_shapes = [
-            (2, 2, 2, 2),
-            (4, 2, 5, 5),
-            (5, 2, 2, 3),
-            (5, 2, 3, 2),
-            (4, 2, 5, 5),
-            (5, 2, 2, 3),
-        ]
-
-        for border_mode in border_modes:
-            for img, fil in zip(img_shapes, fil_shapes):
-                self.validate(img, fil, border_mode, verify_grad=False)
-
-        # Very slow on with 'full' or 'half'
-        self.validate((1, 10, 213, 129), (46, 10, 212, 1), "valid", verify_grad=False)
-
-    def test_img_kernel_same_shape(self):
-        self.validate((3, 2, 3, 3), (4, 2, 3, 3), "full")
-        self.validate((3, 2, 3, 3), (4, 2, 3, 3), "valid")
-        self.validate((3, 2, 3, 3), (4, 2, 3, 3), "half")
-        self.validate((3, 2, 3, 3), (4, 2, 3, 3), (1, 1))
-        self.validate((3, 2, 3, 3), (4, 2, 3, 3), 1)
-
-    @pytest.mark.slow
-    def test_subsample(self):
-        # Tests correlation where subsampling != (1,1)
-
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), "valid", subsample=(2, 2))
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), "valid", subsample=(2, 1))
-        self.validate((1, 1, 6, 6), (1, 1, 3, 3), "valid", subsample=(3, 3))
-
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), "full", subsample=(2, 2))
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), "full", subsample=(2, 1))
-        self.validate((1, 1, 6, 6), (1, 1, 3, 3), "full", subsample=(3, 3))
-
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), "half", subsample=(2, 2))
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), "half", subsample=(2, 1))
-        self.validate((1, 1, 6, 6), (1, 1, 3, 3), "half", subsample=(3, 3))
-
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), (1, 1), subsample=(2, 2))
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), (2, 1), subsample=(2, 1))
-        self.validate((1, 1, 6, 6), (1, 1, 3, 3), (1, 2), subsample=(3, 3))
-
-        self.validate((1, 1, 6, 6), (1, 1, 3, 3), 1, subsample=(3, 3))
-
-    def test_filter_dilation(self):
-        # Tests correlation where filter dilation != (1,1)
-
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), "valid", filter_dilation=(2, 2))
-        self.validate((3, 2, 14, 10), (5, 2, 2, 3), "valid", filter_dilation=(3, 1))
-        self.validate((1, 1, 14, 14), (1, 1, 3, 3), "valid", filter_dilation=(2, 3))
-
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), "full", filter_dilation=(2, 2))
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), "full", filter_dilation=(3, 1))
-        self.validate((1, 1, 6, 6), (1, 1, 3, 3), "full", filter_dilation=(2, 3))
-
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), "half", filter_dilation=(2, 2))
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), "half", filter_dilation=(3, 1))
-        self.validate((1, 1, 6, 6), (1, 1, 3, 3), "half", filter_dilation=(2, 3))
-
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), (1, 1), filter_dilation=(2, 2))
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), (2, 1), filter_dilation=(2, 1))
-        self.validate((1, 1, 6, 6), (1, 1, 3, 3), (1, 2), filter_dilation=(1, 2))
-
-        self.validate(
-            (1, 1, 6, 6), (1, 1, 3, 3), 1, subsample=(3, 3), filter_dilation=(2, 2)
-        )
-
-    @pytest.mark.slow
-    def test_shape_Constant_tensor(self):
-        # Tests correlation where the {image,filter}_shape is a Constant tensor.
-
-        as_t = at.as_tensor_variable
-        border_modes = ["valid", "full", "half", (1, 1), (2, 1), (1, 2), (3, 3), 1]
-
-        for border_mode in border_modes:
-            self.validate(
-                (as_t(3), as_t(2), as_t(7), as_t(5)), (5, 2, 2, 3), border_mode
-            )
-            self.validate(as_t([3, 2, 7, 5]), (5, 2, 2, 3), border_mode)
-            self.validate(as_t((3, 2, 7, 5)), (5, 2, 2, 3), border_mode)
-            self.validate((3, 2, 7, 5), (as_t(5), as_t(2), as_t(2), as_t(3)), "valid")
-            self.validate((3, 2, 7, 5), as_t([5, 2, 2, 3]), border_mode)
-            self.validate(as_t([3, 2, 7, 5]), as_t([5, 2, 2, 3]), border_mode)
-
-    def test_invalid_filter_shape(self):
-        # Tests scenario where filter_shape[1] != input_shape[1]
-
-        with pytest.raises(ValueError):
-            self.validate((3, 2, 8, 8), (4, 3, 5, 5), "valid")
-
-    def test_full_mode(self):
-        # Tests basic correlation in full mode and case where filter
-        # is larger than the input image.
-
-        self.validate((3, 2, 5, 5), (4, 2, 8, 8), "full")
-
-        def f():
-            self.validate((3, 2, 5, 5), (4, 2, 8, 8), "valid")
-
-        with pytest.raises(Exception):
-            f()
-
-    def test_wrong_input(self):
-        # Make sure errors are raised when image and kernel are not 4D tensors
-
-        with pytest.raises(Exception):
-            self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", input=dmatrix())
-        with pytest.raises(Exception):
-            self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", filters=dvector())
-        with pytest.raises(Exception):
-            self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", input=dtensor3())
-
-    @pytest.mark.skipif(not pytensor.config.cxx, reason="Need cxx for this test")
-    def test_dtype_upcast(self):
-        # Checks dtype upcast for CorrMM methods.
-
-        rng = np.random.default_rng(280284)
-
-        def rand(shape, dtype="float64"):
-            r = np.asarray(rng.random(shape), dtype=dtype)
-            return r * 2 - 1
-
-        ops = [corr.CorrMM, corr.CorrMM_gradWeights, corr.CorrMM_gradInputs]
-        a_shapes = [[4, 5, 6, 3], [1, 5, 6, 3], [1, 5, 6, 3]]
-        b_shapes = [[7, 5, 3, 2], [1, 5, 3, 1], [7, 1, 3, 1]]
-        dtypes = ["float32", "float64"]
-
-        for op, a_shape, b_shape in zip(ops, a_shapes, b_shapes):
-            for a_dtype in dtypes:
-                for b_dtype in dtypes:
-                    c_dtype = pytensor.scalar.upcast(a_dtype, b_dtype)
-                    a_tens = tensor4(dtype=a_dtype)
-                    b_tens = tensor4(dtype=b_dtype)
-                    a_tens_val = rand(a_shape, dtype=a_dtype)
-                    b_tens_val = rand(b_shape, dtype=b_dtype)
-
-                    c_tens = op()(a_tens, b_tens)
-                    f = pytensor.function([a_tens, b_tens], c_tens, mode=self.mode)
-                    assert f(a_tens_val, b_tens_val).dtype == c_dtype
-
-    @pytest.mark.slow
-    @pytest.mark.skipif(
-        pytensor.config.cxx == "",
-        reason="SciPy and cxx needed",
-    )
-    def test_infer_shape_forward(self):
-
-        rng = np.random.default_rng(280284)
-
-        def rand(*shape):
-            r = np.asarray(rng.random(shape), dtype="float64")
-            return r * 2 - 1
-
-        corrMM = corr.CorrMM
-
-        adtens = dtensor4()
-        bdtens = dtensor4()
-        aivec_vals = [
-            [4, 5, 6, 3],
-            [6, 2, 8, 3],
-            [3, 6, 7, 5],
-            [3, 6, 7, 5],
-            [5, 2, 4, 3],
-        ]
-        bivec_vals = [
-            [7, 5, 3, 2],
-            [4, 2, 5, 3],
-            [5, 6, 3, 2],
-            [5, 6, 2, 3],
-            [6, 2, 4, 3],
-        ]
-        modes = ["valid", "full", "half", (1, 1), (2, 1), (1, 2), 1]
-        subsamples = [(1, 1), (2, 1), (1, 2)]
-
-        for aivec_val, bivec_val in zip(aivec_vals, bivec_vals):
-            adtens_val = rand(*aivec_val)
-            bdtens_val = rand(*bivec_val)
-            for mode in modes:
-                for subsample in subsamples:
-                    # CorrMM
-                    cdtens = corrMM(border_mode=mode, subsample=subsample)(
-                        adtens, bdtens
-                    )
-                    self._compile_and_check(
-                        [adtens, bdtens],
-                        [cdtens],
-                        [adtens_val, bdtens_val],
-                        corrMM,
-                        warn=False,
-                    )
-
-    @pytest.mark.slow
-    @pytest.mark.skipif(
-        pytensor.config.mode == "FAST_COMPILE" or pytensor.config.cxx == "",
-        reason="SciPy and cxx needed",
-    )
-    def test_infer_shape_gradW(self):
-
-        rng = np.random.default_rng(280284)
-
-        def rand(*shape):
-            r = np.asarray(rng.random(shape), dtype="float64")
-            return r * 2 - 1
-
-        corrMM = corr.CorrMM
-        gradW = corr.CorrMM_gradWeights
-
-        adtens = dtensor4()
-        bdtens = dtensor4()
-        aivec_vals = [
-            [1, 5, 6, 3],
-            [8, 2, 7, 3],
-            [1, 6, 9, 4],
-            [9, 6, 8, 5],
-            [9, 1, 6, 8],
-        ]
-        bivec_vals = [
-            [7, 5, 3, 1],
-            [4, 2, 5, 3],
-            [12, 6, 3, 2],
-            [5, 6, 1, 3],
-            [11, 1, 3, 3],
-        ]
-        modes = ["valid", "full", "half", (1, 1), (2, 1), (1, 2), 1]
-        subsamples = [(1, 1), (2, 1), (1, 2)]
-
-        for aivec_val, bivec_val in zip(aivec_vals, bivec_vals):
-            adtens_val = rand(*aivec_val)
-            bdtens_val = rand(*bivec_val)
-            for mode in modes:
-                for subsample in subsamples:
-                    # CorrMM
-                    cdtens = corrMM(border_mode=mode, subsample=subsample)(
-                        adtens, bdtens
-                    )
-                    f = pytensor.function([adtens, bdtens], cdtens)
-                    cdtens_val = f(adtens_val, bdtens_val)
-                    # CorrMM_gradWeights
-                    shape = (
-                        pytensor.shared(bivec_val[2]),
-                        pytensor.shared(bivec_val[3]),
-                    )
-                    bdtens_g = gradW(border_mode=mode, subsample=subsample)(
-                        adtens, cdtens, shape=shape
-                    )
-                    self._compile_and_check(
-                        [adtens, cdtens],
-                        [bdtens_g],
-                        [adtens_val, cdtens_val],
-                        gradW,
-                        warn=False,
-                    )
-
-    @pytest.mark.slow
-    @pytest.mark.skipif(
-        pytensor.config.mode == "FAST_COMPILE" or not pytensor.config.cxx,
-        reason="Need cxx for this test",
-    )
-    def test_infer_shape_gradI(self):
-
-        rng = np.random.default_rng(280284)
-
-        def rand(*shape):
-            r = np.asarray(rng.random(shape), dtype="float64")
-            return r * 2 - 1
-
-        corrMM = corr.CorrMM
-        gradI = corr.CorrMM_gradInputs
-
-        adtens = dtensor4()
-        bdtens = dtensor4()
-        aivec_vals = [
-            [1, 5, 6, 3],
-            [8, 2, 7, 3],
-            [1, 6, 9, 4],
-            [9, 6, 8, 5],
-            [9, 1, 6, 8],
-        ]
-        bivec_vals = [
-            [7, 5, 3, 1],
-            [4, 2, 5, 3],
-            [12, 6, 3, 2],
-            [5, 6, 1, 3],
-            [7, 1, 3, 4],
-        ]
-        modes = ["valid", "full", "half", (1, 1), (2, 1), (1, 2), 1]
-        subsamples = [(1, 1), (2, 1), (1, 2)]
-
-        for aivec_val, bivec_val in zip(aivec_vals, bivec_vals):
-            adtens_val = rand(*aivec_val)
-            bdtens_val = rand(*bivec_val)
-            for mode in modes:
-                for subsample in subsamples:
-                    # CorrMM
-                    cdtens = corrMM(border_mode=mode, subsample=subsample)(
-                        adtens, bdtens
-                    )
-                    f = pytensor.function([adtens, bdtens], cdtens)
-                    cdtens_val = f(adtens_val, bdtens_val)
-                    # CorrMM_gradInputs
-                    shape = (
-                        pytensor.shared(aivec_val[2]),
-                        pytensor.shared(aivec_val[3]),
-                    )
-                    adtens_g = gradI(border_mode=mode, subsample=subsample)(
-                        bdtens, cdtens, shape=shape
-                    )
-                    self._compile_and_check(
-                        [bdtens, cdtens],
-                        [adtens_g],
-                        [bdtens_val, cdtens_val],
-                        gradI,
-                        warn=False,
-                    )
-
-    def test_non_contiguous(self):
-        self.validate((2, 2, 3, 3), (2, 2, 2, 2), "valid", non_contiguous=True)
-        self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", non_contiguous=True)
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), "valid", non_contiguous=True)
-        self.validate((3, 2, 7, 5), (5, 2, 3, 2), "valid", non_contiguous=True)
-        self.validate((3, 2, 8, 8), (4, 2, 5, 5), "full", non_contiguous=True)
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), "full", non_contiguous=True)
-        self.validate((3, 2, 8, 8), (4, 2, 5, 5), "half", non_contiguous=True)
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), "half", non_contiguous=True)
-        self.validate((3, 2, 8, 8), (4, 2, 5, 5), (1, 1), non_contiguous=True)
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), (1, 2), non_contiguous=True)
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), (2, 1), non_contiguous=True)
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), 2, non_contiguous=True)
-
-
-class TestGroupCorr2d(TestGroupedConvNoOptim):
-    mode = pytensor.compile.get_mode("FAST_RUN").excluding("gpuarray")
-    conv_op = corr.CorrMM
-    conv_gradw_op = corr.CorrMM_gradWeights
-    conv_gradi_op = corr.CorrMM_gradInputs
-
-    def test_graph(self):
-        # define common values  first
-        groups = 3
-        rng = np.random.default_rng(280284)
-        bottom = rng.random((3, 6, 5, 5)).astype(pytensor.config.floatX)
-        kern = rng.random((9, 2, 3, 3)).astype(pytensor.config.floatX)
-        bottom_sym = tensor4("bottom")
-        kern_sym = tensor4("kern")
-
-        # grouped convolution graph
-        conv_group = self.conv(num_groups=groups)(bottom_sym, kern_sym)
-        gconv_func = pytensor.function(
-            [bottom_sym, kern_sym], conv_group, mode=self.mode
-        )
-
-        # Graph for the normal hard way
-        kern_offset = kern_sym.shape[0] // groups
-        bottom_offset = bottom_sym.shape[1] // groups
-        split_conv_output = [
-            self.conv()(
-                bottom_sym[:, i * bottom_offset : (i + 1) * bottom_offset, :, :],
-                kern_sym[i * kern_offset : (i + 1) * kern_offset, :, :, :],
-            )
-            for i in range(groups)
-        ]
-        concatenated_output = at.concatenate(split_conv_output, axis=1)
-        conv_func = pytensor.function(
-            [bottom_sym, kern_sym], concatenated_output, mode=self.mode
-        )
-
-        # calculate outputs for each graph
-        gconv_output = gconv_func(bottom, kern)
-        conv_output = conv_func(bottom, kern)
-
-        # compare values
-        utt.assert_allclose(gconv_output, conv_output)
-
-
-class TestUnsharedCorr2d(TestUnsharedConv):
-    if pytensor.config.mode == "FAST_COMPILE":
-        mode = pytensor.compile.get_mode("FAST_RUN").excluding("gpuarray")
-    else:
-        mode = None
-    conv2d_op = corr.CorrMM
-    conv2d_gradw_op = corr.CorrMM_gradWeights
-    conv2d_gradi_op = corr.CorrMM_gradInputs
-
-
-class TestAsymmetricCorr(TestAsymmetricPadding):
-    if pytensor.config.mode == "FAST_COMPILE":
-        mode = pytensor.compile.get_mode("FAST_RUN").excluding("gpuarray")
-    else:
-        mode = None
-    conv2d_op = corr.CorrMM
-    conv2d_gradw_op = corr.CorrMM_gradWeights
-    conv2d_gradi_op = corr.CorrMM_gradInputs
-
-
-class TestCausalCorr(TestCausalConv):
-    if pytensor.config.mode == "FAST_COMPILE":
-        mode = pytensor.compile.get_mode("FAST_RUN").excluding("gpuarray")
-    else:
-        mode = None
diff --git a/tests/tensor/nnet/test_corr3d.py b/tests/tensor/nnet/test_corr3d.py
deleted file mode 100644
index e26c18e799..0000000000
--- a/tests/tensor/nnet/test_corr3d.py
+++ /dev/null
@@ -1,562 +0,0 @@
-import numpy as np
-import pytest
-
-import pytensor
-import pytensor.tensor as at
-from pytensor.tensor.nnet import corr3d
-from pytensor.tensor.type import dmatrix, dtensor3, dtensor4, dtensor5, tensor5, vector
-from tests import unittest_tools as utt
-from tests.tensor.nnet.test_abstract_conv import TestGroupedConv3dNoOptim
-
-
-@pytest.mark.skipif(
-    pytensor.config.cxx == "",
-    reason="SciPy and cxx needed",
-)
-class TestCorr3D(utt.InferShapeTester):
-    if pytensor.config.mode == "FAST_COMPILE":
-        mode = pytensor.compile.get_mode("FAST_RUN")
-    else:
-        mode = None
-    dtype = pytensor.config.floatX
-
-    def setup_method(self):
-        self.input = tensor5("input", dtype=self.dtype)
-        self.input.name = "default_V"
-        self.filters = tensor5("filters", dtype=self.dtype)
-        self.filters.name = "default_filters"
-        # This tests can run even when pytensor.config.blas__ldflags is empty.
-        super().setup_method()
-
-    def validate(
-        self,
-        image_shape,
-        filter_shape,
-        border_mode="valid",
-        subsample=(1, 1, 1),
-        input=None,
-        filters=None,
-        verify_grad=True,
-        non_contiguous=False,
-        filter_dilation=(1, 1, 1),
-    ):
-        """
-        :param image_shape: The constant shape info passed to corr3dMM.
-        :param filter_shape: The constant shape info passed to corr3dMM.
-        """
-        if not pytensor.config.cxx:
-            pytest.skip("Need cxx for this test")
-
-        N_image_shape = [
-            at.get_scalar_constant_value(at.as_tensor_variable(x)) for x in image_shape
-        ]
-        N_filter_shape = [
-            at.get_scalar_constant_value(at.as_tensor_variable(x)) for x in filter_shape
-        ]
-
-        if input is None:
-            input = self.input
-        if filters is None:
-            filters = self.filters
-
-        # PYTENSOR IMPLEMENTATION
-
-        # we create a symbolic function so that verify_grad can work
-        def sym_Corr3dMM(input, filters):
-            # define pytensor graph and function
-            input.name = "input"
-            filters.name = "filters"
-            rval = corr3d.Corr3dMM(border_mode, subsample, filter_dilation)(
-                input, filters
-            )
-            rval.name = "corr_output"
-            return rval
-
-        output = sym_Corr3dMM(input, filters)
-        output.name = f"Corr3dMM()({input.name},{filters.name})"
-        pytensor_corr = pytensor.function([input, filters], output, mode=self.mode)
-
-        # initialize input and compute result
-        rng = np.random.default_rng(28483)
-
-        image_data = rng.random(N_image_shape).astype(self.dtype)
-        filter_data = rng.random(N_filter_shape).astype(self.dtype)
-        image_data /= 10
-        filter_data /= 10
-        if non_contiguous:
-            image_data = np.transpose(image_data, axes=(0, 1, 4, 3, 2))
-            image_data = image_data.copy()
-            image_data = np.transpose(image_data, axes=(0, 1, 4, 3, 2))
-            filter_data = np.transpose(filter_data, axes=(0, 1, 4, 3, 2))
-            filter_data = filter_data.copy()
-            filter_data = np.transpose(filter_data, axes=(0, 1, 4, 3, 2))
-            assert not image_data.flags["CONTIGUOUS"]
-            assert not filter_data.flags["CONTIGUOUS"]
-
-        pytensor_output = pytensor_corr(image_data, filter_data)
-
-        # REFERENCE IMPLEMENTATION
-        # Testing correlation, not convolution. Reverse filters.
-        filter_data_corr = np.array(
-            filter_data[:, :, ::-1, ::-1, ::-1], copy=True, order="C"
-        )
-        orig_image_data = image_data
-        img_shape3d = np.array(N_image_shape[-3:])
-        fil_shape3d = np.array(N_filter_shape[-3:])
-        dil_shape3d = np.array(filter_dilation)
-        dil_fil_shape3d = (fil_shape3d - 1) * dil_shape3d + 1
-        subsample3d = np.array(subsample)
-        if border_mode == "full":
-            padHWD = dil_fil_shape3d - 1
-        elif border_mode == "valid":
-            padHWD = np.array([0, 0, 0])
-        elif border_mode == "half":
-            padHWD = np.floor(dil_fil_shape3d / 2).astype("int32")
-        elif isinstance(border_mode, tuple):
-            padHWD = np.array(border_mode)
-        elif isinstance(border_mode, int):
-            padHWD = np.array([border_mode, border_mode, border_mode])
-        else:
-            raise NotImplementedError(f"Unsupported border_mode {border_mode}")
-        out_shape3d = (
-            np.floor((img_shape3d + 2 * (padHWD) - dil_fil_shape3d) / subsample3d) + 1
-        )
-        # avoid numpy deprecation
-        out_shape3d = out_shape3d.astype("int32")
-        out_shape = (N_image_shape[0], N_filter_shape[0]) + tuple(out_shape3d)
-        ref_output = np.zeros(out_shape)
-
-        # loop over output feature maps
-        ref_output.fill(0)
-        image_data2 = np.zeros(
-            (
-                N_image_shape[0],
-                N_image_shape[1],
-                N_image_shape[2] + 2 * padHWD[0],
-                N_image_shape[3] + 2 * padHWD[1],
-                N_image_shape[4] + 2 * padHWD[2],
-            )
-        )
-        image_data2[
-            :,
-            :,
-            padHWD[0] : padHWD[0] + N_image_shape[2],
-            padHWD[1] : padHWD[1] + N_image_shape[3],
-            padHWD[2] : padHWD[2] + N_image_shape[4],
-        ] = image_data
-        image_data = image_data2
-        N_image_shape = image_data.shape
-        for bb in range(N_image_shape[0]):
-            for nn in range(N_filter_shape[0]):
-                for im0 in range(N_image_shape[1]):
-                    filter3d = filter_data_corr[nn, im0, :, :, :]
-                    image3d = image_data[bb, im0, :, :, :]
-                    for row in range(ref_output.shape[2]):
-                        irow = row * subsample[0]  # image row
-                        for col in range(ref_output.shape[3]):
-                            icol = col * subsample[1]  # image col
-                            for slc in range(ref_output.shape[4]):
-                                islc = slc * subsample[2]  # image slice
-                                ref_output[bb, nn, row, col, slc] += (
-                                    image3d[
-                                        irow : irow
-                                        + dil_fil_shape3d[0] : filter_dilation[0],
-                                        icol : icol
-                                        + dil_fil_shape3d[1] : filter_dilation[1],
-                                        islc : islc
-                                        + dil_fil_shape3d[2] : filter_dilation[2],
-                                    ]
-                                    * filter3d[::-1, ::-1, ::-1]
-                                ).sum()
-
-        utt.assert_allclose(pytensor_output, ref_output)
-
-        # TEST GRADIENT
-        if verify_grad:
-            utt.verify_grad(
-                sym_Corr3dMM, [orig_image_data, filter_data], mode=self.mode
-            )
-
-    @pytest.mark.slow
-    def test_basic(self):
-        # Tests that basic correlations work for odd and even
-        # dimensions of image and filter shapes, as well as rectangular
-        # images and filters.
-        border_modes = [
-            "valid",
-            "full",
-            "half",
-            (1, 1, 1),
-            (2, 1, 1),
-            (1, 2, 1),
-            (1, 1, 2),
-            (3, 3, 3),
-            1,
-        ]
-        img_shapes = [
-            (2, 2, 3, 3, 3),
-            (3, 2, 8, 8, 8),
-            (3, 2, 7, 5, 5),
-            (3, 2, 7, 5, 5),
-            (1, 2, 8, 8, 8),
-            (1, 2, 7, 5, 5),
-        ]
-        fil_shapes = [
-            (2, 2, 2, 2, 2),
-            (1, 2, 5, 5, 5),
-            (2, 2, 2, 3, 2),
-            (2, 2, 3, 2, 2),
-            (1, 2, 5, 5, 5),
-            (1, 2, 2, 3, 3),
-        ]
-
-        for border_mode in border_modes:
-            for img, fil in zip(img_shapes, fil_shapes):
-                self.validate(img, fil, border_mode, verify_grad=False)
-
-        # Very slow on with 'full' or 'half'
-        self.validate((1, 2, 53, 29, 11), (13, 2, 12, 1, 1), "valid", verify_grad=False)
-
-    def test_img_kernel_same_shape(self):
-        self.validate((3, 2, 3, 3, 3), (1, 2, 3, 3, 3), "full")
-        self.validate((3, 2, 3, 3, 3), (1, 2, 3, 3, 3), "valid")
-        self.validate((3, 2, 3, 3, 3), (1, 2, 3, 3, 3), "half")
-        self.validate((3, 2, 3, 3, 3), (1, 2, 3, 3, 3), (1, 1, 1))
-        self.validate((3, 2, 3, 3, 3), (1, 2, 3, 3, 3), 1)
-
-    @pytest.mark.slow
-    def test_subsample(self):
-        # Tests correlation where subsampling != (1,1,1)
-        self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), "valid", subsample=(2, 2, 2))
-        self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), "valid", subsample=(2, 1, 1))
-        self.validate((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), "valid", subsample=(3, 3, 3))
-
-        self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), "full", subsample=(2, 2, 2))
-        self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), "full", subsample=(2, 1, 1))
-        self.validate((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), "full", subsample=(3, 3, 3))
-
-        self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), "half", subsample=(2, 2, 2))
-        self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), "half", subsample=(2, 1, 1))
-        self.validate((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), "half", subsample=(3, 3, 3))
-
-        self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), (1, 1, 1), subsample=(2, 2, 2))
-        self.validate((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), (2, 1, 1), subsample=(2, 1, 1))
-        self.validate((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), (1, 2, 2), subsample=(3, 3, 3))
-
-        self.validate((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), 1, subsample=(3, 3, 3))
-
-    # Tests correlation where filter dilation != (1,1,1)
-    @pytest.mark.parametrize(
-        "image_shape, filter_shape, border_mode, filter_dilation",
-        [
-            ((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), "valid", (2, 2, 2)),
-            ((3, 2, 14, 10, 10), (2, 2, 2, 3, 3), "valid", (3, 1, 1)),
-            ((1, 1, 14, 14, 14), (1, 1, 3, 3, 3), "valid", (2, 3, 3)),
-            ((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), "full", (2, 2, 2)),
-            ((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), "full", (3, 1, 1)),
-            ((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), "full", (2, 3, 3)),
-            ((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), "half", (2, 2, 2)),
-            ((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), "half", (3, 1, 1)),
-            ((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), "half", (2, 3, 3)),
-            ((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), (1, 1, 1), (2, 2, 2)),
-            ((3, 2, 7, 5, 5), (2, 2, 2, 3, 3), (2, 1, 1), (2, 1, 1)),
-            ((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), (1, 2, 1), (1, 2, 1)),
-            ((1, 1, 6, 6, 6), (1, 1, 3, 3, 3), (1, 1, 2), (1, 1, 2)),
-        ],
-    )
-    def test_filter_dilation(
-        self, image_shape, filter_shape, border_mode, filter_dilation
-    ):
-        self.validate(
-            image_shape, filter_shape, border_mode, filter_dilation=filter_dilation
-        )
-
-    def test_filter_dilation_subsample(self):
-        self.validate(
-            (1, 1, 6, 6, 6),
-            (1, 1, 3, 3, 3),
-            1,
-            subsample=(3, 3, 3),
-            filter_dilation=(2, 2, 2),
-        )
-
-    @pytest.mark.parametrize(
-        "border_mode",
-        [
-            "valid",
-            "full",
-            "half",
-            (1, 1, 1),
-            (2, 1, 1),
-            (1, 2, 1),
-            (1, 1, 2),
-            (3, 3, 3),
-            1,
-        ],
-    )
-    def test_shape_Constant_tensor(self, border_mode):
-        # Tests correlation where the {image,filter}_shape is a Constant tensor
-        as_t = at.as_tensor_variable
-        self.validate(
-            (as_t(3), as_t(2), as_t(7), as_t(5), as_t(5)), (5, 2, 2, 3, 3), border_mode
-        )
-        self.validate(as_t([3, 2, 7, 5, 5]), (5, 2, 2, 3, 3), border_mode)
-        self.validate(as_t((3, 2, 7, 5, 5)), (5, 2, 2, 3, 3), border_mode)
-        self.validate(
-            (3, 2, 7, 5, 5), (as_t(5), as_t(2), as_t(2), as_t(3), as_t(3)), "valid"
-        )
-        self.validate((3, 2, 7, 5, 5), as_t([5, 2, 2, 3, 3]), border_mode)
-        self.validate(as_t([3, 2, 7, 5, 5]), as_t([5, 2, 2, 3, 3]), border_mode)
-
-    def test_invalid_filter_shape(self):
-        # Tests scenario where filter_shape[1] != input_shape[1]
-        with pytest.raises(ValueError):
-            self.validate((3, 2, 8, 8, 8), (4, 3, 5, 5, 8), "valid")
-
-    def test_full_mode(self):
-        # Tests basic correlation in full mode and case where filter
-        # is larger than the input image.
-        self.validate((3, 1, 4, 4, 4), (2, 1, 5, 5, 5), "full")
-
-        def f():
-            self.validate((3, 2, 5, 5, 5), (4, 2, 8, 8, 8), "valid")
-
-        with pytest.raises(Exception):
-            f()
-
-    def test_wrong_input(self):
-        # Make sure errors are raised when image and kernel are not 5D tensors
-        with pytest.raises(Exception):
-            self.validate((3, 2, 8, 8, 8), (4, 2, 5, 5, 5), "valid", input=dmatrix())
-        with pytest.raises(Exception):
-            self.validate((3, 2, 8, 8, 8), (4, 2, 5, 5, 5), "valid", input=vector())
-        with pytest.raises(Exception):
-            self.validate((3, 2, 8, 8, 8), (4, 2, 5, 5, 5), "valid", input=dtensor3())
-        with pytest.raises(Exception):
-            self.validate((3, 2, 8, 8, 8), (4, 2, 5, 5, 5), "valid", input=dtensor4())
-
-    @pytest.mark.skipif(not pytensor.config.cxx, reason="Need cxx for this test")
-    def test_dtype_upcast(self):
-        # Checks dtype upcast for Corr3dMM methods.
-
-        rng = np.random.default_rng(28483)
-
-        def rand(shape, dtype="float64"):
-            r = np.asarray(rng.random(shape), dtype=dtype)
-            return r * 2 - 1
-
-        ops = [corr3d.Corr3dMM, corr3d.Corr3dMMGradWeights, corr3d.Corr3dMMGradInputs]
-        a_shapes = [[4, 5, 6, 3, 3], [1, 5, 6, 3, 3], [1, 5, 6, 3, 3]]
-        b_shapes = [[7, 5, 3, 2, 2], [1, 5, 3, 1, 1], [7, 1, 3, 1, 1]]
-        dtypes = ["float32", "float64"]
-
-        for op, a_shape, b_shape in zip(ops, a_shapes, b_shapes):
-            for a_dtype in dtypes:
-                for b_dtype in dtypes:
-                    c_dtype = pytensor.scalar.upcast(a_dtype, b_dtype)
-                    a_tens = tensor5(dtype=a_dtype)
-                    b_tens = tensor5(dtype=b_dtype)
-                    a_tens_val = rand(a_shape, dtype=a_dtype)
-                    b_tens_val = rand(b_shape, dtype=b_dtype)
-
-                    c_tens = op()(a_tens, b_tens)
-                    f = pytensor.function([a_tens, b_tens], c_tens, mode=self.mode)
-                    assert f(a_tens_val, b_tens_val).dtype == c_dtype
-
-    @pytest.mark.slow
-    @pytest.mark.skipif(
-        pytensor.config.mode == "FAST_COMPILE" or not pytensor.config.cxx,
-        reason="Need cxx for this test",
-    )
-    def test_infer_shape_forward(self):
-
-        rng = np.random.default_rng(28483)
-
-        def rand(*shape):
-            r = np.asarray(rng.random(shape), dtype="float64")
-            return r * 2 - 1
-
-        corr3dMM = corr3d.Corr3dMM
-
-        adtens = dtensor5()
-        bdtens = dtensor5()
-        aivec_vals = [
-            [4, 5, 6, 3, 3],
-            [6, 2, 8, 3, 3],
-            [3, 6, 7, 5, 5],
-            [3, 6, 7, 5, 5],
-            [5, 2, 4, 3, 3],
-        ]
-        bivec_vals = [
-            [7, 5, 3, 2, 2],
-            [4, 2, 5, 3, 3],
-            [5, 6, 3, 2, 2],
-            [5, 6, 2, 3, 3],
-            [6, 2, 4, 3, 3],
-        ]
-        modes = ["valid", "full", "half", (1, 1, 1), (2, 1, 1), (1, 2, 1), (1, 1, 2), 1]
-        subsamples = [(1, 1, 1), (2, 1, 1), (1, 2, 1), (1, 1, 2)]
-
-        for aivec_val, bivec_val in zip(aivec_vals, bivec_vals):
-            adtens_val = rand(*aivec_val)
-            bdtens_val = rand(*bivec_val)
-            for mode in modes:
-                for subsample in subsamples:
-                    # Corr3dMM
-                    cdtens = corr3dMM(border_mode=mode, subsample=subsample)(
-                        adtens, bdtens
-                    )
-                    self._compile_and_check(
-                        [adtens, bdtens],
-                        [cdtens],
-                        [adtens_val, bdtens_val],
-                        corr3dMM,
-                        warn=False,
-                    )
-
-    @pytest.mark.slow
-    @pytest.mark.skipif(
-        pytensor.config.mode == "FAST_COMPILE" or not pytensor.config.cxx,
-        reason="Need cxx for this test",
-    )
-    def test_infer_shape_gradW(self):
-
-        rng = np.random.default_rng(28483)
-
-        def rand(*shape):
-            r = np.asarray(rng.random(shape), dtype="float64")
-            return r * 2 - 1
-
-        corr3dMM = corr3d.Corr3dMM
-        gradW = corr3d.Corr3dMMGradWeights
-
-        adtens = dtensor5()
-        bdtens = dtensor5()
-        aivec_vals = [
-            [1, 5, 6, 3, 3],
-            [8, 2, 7, 3, 3],
-            [1, 6, 9, 4, 4],
-            [9, 6, 8, 5, 5],
-            [9, 1, 6, 8, 8],
-        ]
-        bivec_vals = [
-            [7, 5, 3, 1, 1],
-            [4, 2, 5, 3, 3],
-            [12, 6, 3, 2, 2],
-            [5, 6, 1, 3, 3],
-            [11, 1, 3, 3, 3],
-        ]
-        modes = ["valid", "full", "half", (1, 1, 1), (2, 1, 1), (1, 2, 1), (1, 1, 2), 1]
-        subsamples = [(1, 1, 1), (2, 1, 1), (1, 2, 1), (1, 1, 2)]
-
-        for aivec_val, bivec_val in zip(aivec_vals, bivec_vals):
-            adtens_val = rand(*aivec_val)
-            bdtens_val = rand(*bivec_val)
-            for mode in modes:
-                for subsample in subsamples:
-                    # Corr3dMM
-                    cdtens = corr3dMM(border_mode=mode, subsample=subsample)(
-                        adtens, bdtens
-                    )
-                    f = pytensor.function([adtens, bdtens], cdtens)
-                    cdtens_val = f(adtens_val, bdtens_val)
-                    # Corr3dMM_gradWeights
-                    shape = (
-                        pytensor.shared(bivec_val[2]),
-                        pytensor.shared(bivec_val[3]),
-                        pytensor.shared(bivec_val[4]),
-                    )
-                    bdtens_g = gradW(border_mode=mode, subsample=subsample)(
-                        adtens, cdtens, shape=shape
-                    )
-                    self._compile_and_check(
-                        [adtens, cdtens],
-                        [bdtens_g],
-                        [adtens_val, cdtens_val],
-                        gradW,
-                        warn=False,
-                    )
-
-    @pytest.mark.slow
-    @pytest.mark.skipif(
-        pytensor.config.mode == "FAST_COMPILE" or not pytensor.config.cxx,
-        reason="Need cxx for this test",
-    )
-    def test_infer_shape_gradI(self):
-
-        rng = np.random.default_rng(28483)
-
-        def rand(*shape):
-            r = np.asarray(rng.random(shape), dtype="float64")
-            return r * 2 - 1
-
-        corr3dMM = corr3d.Corr3dMM
-        gradI = corr3d.Corr3dMMGradInputs
-
-        adtens = dtensor5()
-        bdtens = dtensor5()
-        aivec_vals = [
-            [1, 5, 6, 3, 3],
-            [8, 2, 7, 3, 3],
-            [1, 6, 9, 4, 4],
-            [9, 6, 8, 5, 5],
-            [9, 1, 6, 8, 8],
-        ]
-        bivec_vals = [
-            [7, 5, 3, 1, 1],
-            [4, 2, 5, 3, 3],
-            [12, 6, 3, 2, 2],
-            [5, 6, 1, 3, 3],
-            [7, 1, 3, 4, 4],
-        ]
-        modes = ["valid", "full", "half", (1, 1, 1), (2, 1, 1), (1, 2, 1), (1, 1, 2), 1]
-        subsamples = [(1, 1, 1), (2, 1, 1), (1, 2, 1), (1, 1, 2)]
-
-        for aivec_val, bivec_val in zip(aivec_vals, bivec_vals):
-            adtens_val = rand(*aivec_val)
-            bdtens_val = rand(*bivec_val)
-            for mode in modes:
-                for subsample in subsamples:
-                    # Corr3dMM
-                    cdtens = corr3dMM(border_mode=mode, subsample=subsample)(
-                        adtens, bdtens
-                    )
-                    f = pytensor.function([adtens, bdtens], cdtens)
-                    cdtens_val = f(adtens_val, bdtens_val)
-                    # Corr3dMM_gradInputs
-                    shape = (
-                        pytensor.shared(aivec_val[2]),
-                        pytensor.shared(aivec_val[3]),
-                        pytensor.shared(aivec_val[4]),
-                    )
-                    adtens_g = gradI(border_mode=mode, subsample=subsample)(
-                        bdtens, cdtens, shape=shape
-                    )
-                    self._compile_and_check(
-                        [bdtens, cdtens],
-                        [adtens_g],
-                        [bdtens_val, cdtens_val],
-                        gradI,
-                        warn=False,
-                    )
-
-    def test_non_contiguous(self):
-        self.validate((2, 2, 3, 3, 3), (2, 2, 2, 2, 2), "valid", non_contiguous=True)
-        self.validate((3, 2, 8, 8, 8), (2, 2, 5, 5, 5), "valid", non_contiguous=True)
-        self.validate((3, 2, 7, 5, 5), (3, 2, 2, 3, 3), "valid", non_contiguous=True)
-        self.validate((3, 2, 7, 5, 5), (3, 2, 3, 2, 2), "valid", non_contiguous=True)
-        self.validate((3, 1, 8, 8, 8), (2, 1, 5, 5, 5), "full", non_contiguous=True)
-        self.validate((3, 1, 8, 8, 8), (2, 1, 5, 5, 5), "half", non_contiguous=True)
-        self.validate((3, 1, 8, 8, 8), (2, 1, 5, 5, 5), (1, 1, 1), non_contiguous=True)
-        self.validate((3, 1, 7, 5, 5), (2, 1, 2, 3, 3), (1, 1, 2), non_contiguous=True)
-        self.validate((3, 1, 7, 5, 5), (2, 1, 2, 3, 3), (1, 2, 1), non_contiguous=True)
-        self.validate((3, 1, 7, 5, 5), (2, 1, 2, 3, 3), (2, 1, 1), non_contiguous=True)
-
-
-class TestGroupCorr3d(TestGroupedConv3dNoOptim):
-    mode = pytensor.compile.get_mode("FAST_RUN")
-    conv_op = corr3d.Corr3dMM
-    conv_gradw_op = corr3d.Corr3dMMGradWeights
-    conv_gradi_op = corr3d.Corr3dMMGradInputs
-    flip_filter = True
-    is_dnn = False
diff --git a/tests/tensor/nnet/test_ctc.py b/tests/tensor/nnet/test_ctc.py
deleted file mode 100644
index b4402dcc4d..0000000000
--- a/tests/tensor/nnet/test_ctc.py
+++ /dev/null
@@ -1,187 +0,0 @@
-import numpy as np
-import pytest
-
-import pytensor
-import pytensor.tensor as at
-from pytensor.tensor.nnet.ctc import (
-    ConnectionistTemporalClassification,
-    ctc,
-    ctc_available,
-)
-from tests import unittest_tools as utt
-
-
-def setup_torch_case():
-    # Test obtained from Torch tutorial at:
-    # https://github.com/baidu-research/warp-ctc/blob/master/torch_binding/TUTORIAL.md
-
-    # Layout, from slowest to fastest changing dimension, is (time, batchSize, inputLayerSize)
-    activations = np.asarray(
-        [
-            [[0, 0, 0, 0, 0], [1, 2, 3, 4, 5], [-5, -4, -3, -2, -1]],
-            [[0, 0, 0, 0, 0], [6, 7, 8, 9, 10], [-10, -9, -8, -7, -6]],
-            [[0, 0, 0, 0, 0], [11, 12, 13, 14, 15], [-15, -14, -13, -12, -11]],
-        ],
-        dtype=np.float32,
-    )
-    # Duration of each sequence
-    activation_times = np.asarray([1, 3, 3], dtype=np.int32)
-    # Labels for each sequence
-    labels = np.asarray([[1, -1], [3, 3], [2, 3]], dtype=np.int32)
-
-    expected_costs = np.asarray(
-        [1.609437943, 7.355742931, 4.938849926], dtype=np.float32
-    )
-
-    grads = [
-        [
-            [0.2, -0.8, 0.2, 0.2, 0.2],
-            [0.01165623125, 0.03168492019, 0.08612854034, -0.7658783197, 0.636408627],
-            [-0.02115798369, 0.03168492019, -0.8810571432, 0.2341216654, 0.636408627],
-        ],
-        [
-            [0, 0, 0, 0, 0],
-            [-0.9883437753, 0.03168492019, 0.08612854034, 0.2341216654, 0.636408627],
-            [-0.02115798369, 0.03168492019, -0.1891518533, -0.4577836394, 0.636408627],
-        ],
-        [
-            [0, 0, 0, 0, 0],
-            [0.01165623125, 0.03168492019, 0.08612854034, -0.7658783197, 0.636408627],
-            [-0.02115798369, 0.03168492019, 0.08612854034, -0.7330639958, 0.636408627],
-        ],
-    ]
-    expected_gradients = np.asarray(grads, dtype=np.float32)
-
-    return [activations, labels, activation_times, expected_costs, expected_gradients]
-
-
-def setup_ctc_case():
-    activations = np.asarray(
-        [
-            [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]],
-            [[0.6, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.5, 0.2, 0.1]],
-        ],
-        dtype=np.float32,
-    )
-
-    activation_times = np.asarray([2, 2], dtype=np.int32)
-
-    labels = np.asarray([[1, 2], [1, 2]], dtype=np.int32)
-
-    expected_costs = np.asarray([2.962858438, 3.053659201], dtype=np.float32)
-
-    grads = [
-        [
-            [0.177031219, -0.7081246376, 0.177031219, 0.177031219, 0.177031219],
-            [0.177031219, -0.8229685426, 0.291875124, 0.177031219, 0.177031219],
-        ],
-        [
-            [0.291875124, 0.177031219, -0.8229685426, 0.177031219, 0.177031219],
-            [0.1786672771, 0.1786672771, -0.7334594727, 0.1974578798, 0.1786672771],
-        ],
-    ]
-
-    expected_gradients = np.asarray(grads, dtype=np.float32)
-
-    return [activations, labels, activation_times, expected_costs, expected_gradients]
-
-
-def setup_grad_case():
-    activations = np.asarray(
-        [
-            [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]],
-            [[0.6, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.5, 0.2, 0.1]],
-        ],
-        dtype=np.float32,
-    )
-
-    activation_times = np.asarray([2, 2], dtype=np.int32)
-
-    labels = np.asarray([[1, 2], [1, 2]], dtype=np.int32)
-
-    return [activations, labels, activation_times]
-
-
-@pytest.mark.skipif(
-    not ctc_available(), reason="Optional library warp-ctc not available"
-)
-@pytest.mark.skipif(
-    pytensor.config.mode == "FAST_COMPILE" or pytensor.config.cxx == "",
-    reason="We need a c compiler",
-)
-class TestCTC:
-    """
-    Test Baidu CTC wrapper implementation.
-
-    Expected values for costs and gradients are obtained through an external
-    C implementation, that uses the library directly.
-    """
-
-    def run_ctc(
-        self, activations, labels, input_length, expected_costs, expected_grads
-    ):
-        # Create symbolic variables
-        t_activations = pytensor.shared(activations, name="activations")
-        t_activation_times = pytensor.shared(input_length, name="activation_times")
-        t_labels = pytensor.shared(labels, name="labels")
-
-        t_cost = ctc(t_activations, t_labels, t_activation_times)
-        # Symbolic gradient of CTC cost
-        t_grad = at.grad(at.mean(t_cost), t_activations)
-        # Compile symbolic functions
-        train = pytensor.function([], [t_cost, t_grad])
-
-        cost, grad = train()
-
-        utt.assert_allclose(expected_grads / cost.shape[0], grad)
-        utt.assert_allclose(expected_costs, cost)
-
-        self.check_grads_disabled(t_activations, t_labels, t_activation_times)
-
-    def check_grads_disabled(self, activations, labels, input_length):
-        """
-        Check if optimization to disable gradients is working
-        """
-        ctc_cost = ctc(activations, labels, input_length)
-        ctc_function = pytensor.function([], [ctc_cost])
-        for node in ctc_function.maker.fgraph.apply_nodes:
-            if isinstance(node.op, ConnectionistTemporalClassification):
-                assert node.op.compute_grad is False
-
-    def test_torch_case(self):
-        (
-            activations,
-            labels,
-            input_length,
-            expected_costs,
-            expected_grads,
-        ) = setup_torch_case()
-        self.run_ctc(activations, labels, input_length, expected_costs, expected_grads)
-
-    def test_ctc(self):
-        (
-            activations,
-            labels,
-            input_length,
-            expected_costs,
-            expected_grads,
-        ) = setup_ctc_case()
-        self.run_ctc(activations, labels, input_length, expected_costs, expected_grads)
-
-    def test_verify_grad(self):
-        def ctc_op_functor(labels, in_lengths):
-            def wrapper(acts):
-                # Create auxiliary symbolic variables
-                t_activation_times = pytensor.shared(
-                    in_lengths, name="activation_times"
-                )
-                t_labels = pytensor.shared(labels, name="labels")
-                return ctc(acts, t_labels, t_activation_times)
-
-            return wrapper
-
-        activations, labels, activation_times = setup_grad_case()
-
-        ctc_op = ctc_op_functor(labels, activation_times)
-
-        utt.verify_grad(ctc_op, [activations])
diff --git a/tests/tensor/nnet/test_neighbours.py b/tests/tensor/nnet/test_neighbours.py
deleted file mode 100644
index e4f7452f56..0000000000
--- a/tests/tensor/nnet/test_neighbours.py
+++ /dev/null
@@ -1,661 +0,0 @@
-import numpy as np
-import pytest
-
-import pytensor
-import pytensor.tensor as at
-from pytensor import function, shared
-from pytensor.configdefaults import config
-from pytensor.tensor import nnet
-from pytensor.tensor.nnet.neighbours import Images2Neibs, images2neibs, neibs2images
-from pytensor.tensor.type import dtensor4, ftensor4, ivector, matrix, tensor4
-from tests import unittest_tools
-
-
-mode_without_gpu = pytensor.compile.mode.get_default_mode().excluding("gpu")
-
-
-class TestImages2Neibs(unittest_tools.InferShapeTester):
-    mode = mode_without_gpu
-    op = Images2Neibs
-    dtypes = ["int64", "float32", "float64"]
-
-    def test_neibs(self):
-        for shape, pshape in [
-            ((10, 7, 18, 18), (2, 2)),
-            ((10, 7, 6, 18), (3, 2)),
-            ((5, 7, 66, 66), (33, 33)),
-            ((5, 7, 68, 66), (34, 33)),
-        ]:
-            for border in ["valid", "ignore_borders"]:
-                for dtype in self.dtypes:
-                    images = shared(
-                        np.arange(np.prod(shape), dtype=dtype).reshape(shape)
-                    )
-                    neib_shape = at.as_tensor_variable(pshape)
-
-                    f = function(
-                        [],
-                        images2neibs(images, neib_shape, mode=border),
-                        mode=self.mode,
-                    )
-
-                    # print images.get_value(borrow=True)
-                    neibs = f()
-                    # print neibs
-                    g = function(
-                        [],
-                        neibs2images(neibs, neib_shape, images.shape),
-                        mode=self.mode,
-                    )
-                    assert any(
-                        isinstance(node.op, self.op)
-                        for node in f.maker.fgraph.toposort()
-                    )
-
-                    # print g()
-                    assert np.allclose(images.get_value(borrow=True), g())
-
-    def test_neibs_manual(self):
-        shape = (2, 3, 4, 4)
-        for dtype in self.dtypes:
-            images = shared(np.arange(np.prod(shape), dtype=dtype).reshape(shape))
-            neib_shape = at.as_tensor_variable((2, 2))
-
-            for border in ["valid", "ignore_borders"]:
-                f = function(
-                    [], images2neibs(images, neib_shape, mode=border), mode=self.mode
-                )
-                assert any(
-                    isinstance(node.op, self.op) for node in f.maker.fgraph.toposort()
-                )
-
-                # print images.get_value(borrow=True)
-                neibs = f()
-                # print neibs
-                assert np.allclose(
-                    neibs,
-                    [
-                        [0, 1, 4, 5],
-                        [2, 3, 6, 7],
-                        [8, 9, 12, 13],
-                        [10, 11, 14, 15],
-                        [16, 17, 20, 21],
-                        [18, 19, 22, 23],
-                        [24, 25, 28, 29],
-                        [26, 27, 30, 31],
-                        [32, 33, 36, 37],
-                        [34, 35, 38, 39],
-                        [40, 41, 44, 45],
-                        [42, 43, 46, 47],
-                        [48, 49, 52, 53],
-                        [50, 51, 54, 55],
-                        [56, 57, 60, 61],
-                        [58, 59, 62, 63],
-                        [64, 65, 68, 69],
-                        [66, 67, 70, 71],
-                        [72, 73, 76, 77],
-                        [74, 75, 78, 79],
-                        [80, 81, 84, 85],
-                        [82, 83, 86, 87],
-                        [88, 89, 92, 93],
-                        [90, 91, 94, 95],
-                    ],
-                )
-                g = function(
-                    [], neibs2images(neibs, neib_shape, images.shape), mode=self.mode
-                )
-
-                assert np.allclose(images.get_value(borrow=True), g())
-
-    def test_neibs_manual_step(self):
-        shape = (2, 3, 5, 5)
-        for dtype in self.dtypes:
-            images = shared(
-                np.asarray(np.arange(np.prod(shape)).reshape(shape), dtype=dtype)
-            )
-            neib_shape = at.as_tensor_variable((3, 3))
-            neib_step = at.as_tensor_variable((2, 2))
-            for border in ["valid", "ignore_borders"]:
-                f = function(
-                    [],
-                    images2neibs(images, neib_shape, neib_step, mode=border),
-                    mode=self.mode,
-                )
-
-                neibs = f()
-                assert self.op in [type(node.op) for node in f.maker.fgraph.toposort()]
-
-                assert np.allclose(
-                    neibs,
-                    [
-                        [0, 1, 2, 5, 6, 7, 10, 11, 12],
-                        [2, 3, 4, 7, 8, 9, 12, 13, 14],
-                        [10, 11, 12, 15, 16, 17, 20, 21, 22],
-                        [12, 13, 14, 17, 18, 19, 22, 23, 24],
-                        [25, 26, 27, 30, 31, 32, 35, 36, 37],
-                        [27, 28, 29, 32, 33, 34, 37, 38, 39],
-                        [35, 36, 37, 40, 41, 42, 45, 46, 47],
-                        [37, 38, 39, 42, 43, 44, 47, 48, 49],
-                        [50, 51, 52, 55, 56, 57, 60, 61, 62],
-                        [52, 53, 54, 57, 58, 59, 62, 63, 64],
-                        [60, 61, 62, 65, 66, 67, 70, 71, 72],
-                        [62, 63, 64, 67, 68, 69, 72, 73, 74],
-                        [75, 76, 77, 80, 81, 82, 85, 86, 87],
-                        [77, 78, 79, 82, 83, 84, 87, 88, 89],
-                        [85, 86, 87, 90, 91, 92, 95, 96, 97],
-                        [87, 88, 89, 92, 93, 94, 97, 98, 99],
-                        [100, 101, 102, 105, 106, 107, 110, 111, 112],
-                        [102, 103, 104, 107, 108, 109, 112, 113, 114],
-                        [110, 111, 112, 115, 116, 117, 120, 121, 122],
-                        [112, 113, 114, 117, 118, 119, 122, 123, 124],
-                        [125, 126, 127, 130, 131, 132, 135, 136, 137],
-                        [127, 128, 129, 132, 133, 134, 137, 138, 139],
-                        [135, 136, 137, 140, 141, 142, 145, 146, 147],
-                        [137, 138, 139, 142, 143, 144, 147, 148, 149],
-                    ],
-                )
-
-                # neibs2images do not seam to support step != neib_shape
-                # g = function([], neibs2images(neibs, neib_shape, images.shape),
-                #             mode=self.mode)
-
-                # print g()
-                # assert numpy.allclose(images.get_value(borrow=True), g())
-
-    @config.change_flags(compute_test_value="off")
-    def test_neibs_bad_shape(self):
-        shape = (2, 3, 10, 10)
-        for dtype in self.dtypes:
-            images = shared(np.arange(np.prod(shape), dtype=dtype).reshape(shape))
-
-            for neib_shape in [(3, 2), (2, 3)]:
-                neib_shape = at.as_tensor_variable(neib_shape)
-                f = function([], images2neibs(images, neib_shape), mode=self.mode)
-                with pytest.raises(TypeError):
-                    f()
-
-                # Test that ignore border work in that case.
-                f = function(
-                    [],
-                    images2neibs(images, neib_shape, mode="ignore_borders"),
-                    mode=self.mode,
-                )
-                assert self.op in [type(node.op) for node in f.maker.fgraph.toposort()]
-                f()
-
-    def test_neibs_wrap_centered_step_manual(self):
-
-        expected1 = [
-            [24, 20, 21, 4, 0, 1, 9, 5, 6],
-            [21, 22, 23, 1, 2, 3, 6, 7, 8],
-            [23, 24, 20, 3, 4, 0, 8, 9, 5],
-            [9, 5, 6, 14, 10, 11, 19, 15, 16],
-            [6, 7, 8, 11, 12, 13, 16, 17, 18],
-            [8, 9, 5, 13, 14, 10, 18, 19, 15],
-            [19, 15, 16, 24, 20, 21, 4, 0, 1],
-            [16, 17, 18, 21, 22, 23, 1, 2, 3],
-            [18, 19, 15, 23, 24, 20, 3, 4, 0],
-        ]
-        expected2 = [
-            [24, 20, 21, 4, 0, 1, 9, 5, 6],
-            [22, 23, 24, 2, 3, 4, 7, 8, 9],
-            [14, 10, 11, 19, 15, 16, 24, 20, 21],
-            [12, 13, 14, 17, 18, 19, 22, 23, 24],
-        ]
-        expected3 = [
-            [19, 15, 16, 24, 20, 21, 4, 0, 1, 9, 5, 6, 14, 10, 11],
-            [17, 18, 19, 22, 23, 24, 2, 3, 4, 7, 8, 9, 12, 13, 14],
-            [9, 5, 6, 14, 10, 11, 19, 15, 16, 24, 20, 21, 4, 0, 1],
-            [7, 8, 9, 12, 13, 14, 17, 18, 19, 22, 23, 24, 2, 3, 4],
-        ]
-        expected4 = [
-            [23, 24, 20, 21, 22, 3, 4, 0, 1, 2, 8, 9, 5, 6, 7],
-            [21, 22, 23, 24, 20, 1, 2, 3, 4, 0, 6, 7, 8, 9, 5],
-            [13, 14, 10, 11, 12, 18, 19, 15, 16, 17, 23, 24, 20, 21, 22],
-            [11, 12, 13, 14, 10, 16, 17, 18, 19, 15, 21, 22, 23, 24, 20],
-        ]
-        expected5 = [
-            [24, 20, 21, 4, 0, 1, 9, 5, 6],
-            [22, 23, 24, 2, 3, 4, 7, 8, 9],
-            [9, 5, 6, 14, 10, 11, 19, 15, 16],
-            [7, 8, 9, 12, 13, 14, 17, 18, 19],
-            [19, 15, 16, 24, 20, 21, 4, 0, 1],
-            [17, 18, 19, 22, 23, 24, 2, 3, 4],
-        ]
-        expected6 = [
-            [24, 20, 21, 4, 0, 1, 9, 5, 6],
-            [21, 22, 23, 1, 2, 3, 6, 7, 8],
-            [23, 24, 20, 3, 4, 0, 8, 9, 5],
-            [14, 10, 11, 19, 15, 16, 24, 20, 21],
-            [11, 12, 13, 16, 17, 18, 21, 22, 23],
-            [13, 14, 10, 18, 19, 15, 23, 24, 20],
-        ]
-
-        # TODO test discontinuous image
-
-        for shp_idx, (shape, neib_shape, neib_step, expected) in enumerate(
-            [
-                [(7, 8, 5, 5), (3, 3), (2, 2), expected1],
-                [(7, 8, 5, 5), (3, 3), (3, 3), expected2],
-                [(7, 8, 5, 5), (5, 3), (3, 3), expected3],
-                [(7, 8, 5, 5), (3, 5), (3, 3), expected4],
-                [(80, 90, 5, 5), (3, 3), (2, 3), expected5],
-                [(1025, 9, 5, 5), (3, 3), (3, 2), expected6],
-                [(1, 1, 5, 1035), (3, 3), (3, 3), None],
-                [(1, 1, 1045, 5), (3, 3), (3, 3), None],
-            ]
-        ):
-
-            for dtype in self.dtypes:
-
-                images = shared(
-                    np.asarray(np.arange(np.prod(shape)).reshape(shape), dtype=dtype)
-                )
-                neib_shape = at.as_tensor_variable(neib_shape)
-                neib_step = at.as_tensor_variable(neib_step)
-                expected = np.asarray(expected)
-
-                f = function(
-                    [],
-                    images2neibs(images, neib_shape, neib_step, mode="wrap_centered"),
-                    mode=self.mode,
-                )
-                neibs = f()
-
-                if expected.size > 1:
-                    for i in range(shape[0] * shape[1]):
-                        assert np.allclose(
-                            neibs[
-                                i * expected.shape[0] : (i + 1) * expected.shape[0], :
-                            ],
-                            expected + 25 * i,
-                        ), "wrap_centered"
-
-                assert self.op in [type(node.op) for node in f.maker.fgraph.toposort()]
-
-                # g = function([], neibs2images(neibs, neib_shape, images.shape), mode=self.mode)
-                # TODO: why this is commented?
-                # assert numpy.allclose(images.get_value(borrow=True), g())
-
-    @pytest.mark.slow
-    def test_neibs_half_step_by_valid(self):
-        neib_shapes = ((3, 3), (3, 5), (5, 3))
-        for shp_idx, (shape, neib_step) in enumerate(
-            [
-                [(7, 8, 5, 5), (1, 1)],
-                [(7, 8, 5, 5), (2, 2)],
-                [(7, 8, 5, 5), (4, 4)],
-                [(7, 8, 5, 5), (1, 4)],
-                [(7, 8, 5, 5), (4, 1)],
-                [(80, 90, 5, 5), (1, 2)],
-                [(1025, 9, 5, 5), (2, 1)],
-                [(1, 1, 5, 1037), (2, 4)],
-                [(1, 1, 1045, 5), (4, 2)],
-            ]
-        ):
-            for neib_shape in neib_shapes:
-                for dtype in self.dtypes:
-                    x = pytensor.shared(np.random.standard_normal(shape).astype(dtype))
-                    extra = (neib_shape[0] // 2, neib_shape[1] // 2)
-                    padded_shape = (
-                        x.shape[0],
-                        x.shape[1],
-                        x.shape[2] + 2 * extra[0],
-                        x.shape[3] + 2 * extra[1],
-                    )
-                    padded_x = at.zeros(padded_shape)
-                    padded_x = at.set_subtensor(
-                        padded_x[:, :, extra[0] : -extra[0], extra[1] : -extra[1]], x
-                    )
-                    x_using_valid = images2neibs(
-                        padded_x, neib_shape, neib_step, mode="valid"
-                    )
-                    x_using_half = images2neibs(x, neib_shape, neib_step, mode="half")
-                    f_valid = pytensor.function([], x_using_valid, mode="FAST_RUN")
-                    f_half = pytensor.function([], x_using_half, mode=self.mode)
-                    unittest_tools.assert_allclose(f_valid(), f_half())
-
-    @pytest.mark.slow
-    def test_neibs_full_step_by_valid(self):
-        for shp_idx, (shape, neib_step, neib_shapes) in enumerate(
-            [
-                [(7, 8, 5, 5), (1, 1), ((3, 3), (3, 5), (5, 3))],
-                [(7, 8, 5, 5), (2, 2), ((3, 3), (3, 5), (5, 3))],
-                [(7, 8, 6, 6), (3, 3), ((2, 2), (2, 5), (5, 2))],
-                [(7, 8, 6, 6), (1, 3), ((2, 2), (2, 5), (5, 2))],
-                [(7, 8, 6, 6), (3, 1), ((2, 2), (2, 5), (5, 2))],
-                [(80, 90, 5, 5), (1, 2), ((3, 3), (3, 5), (5, 3))],
-                [(1025, 9, 5, 5), (2, 1), ((3, 3), (3, 5), (5, 3))],
-                [(1, 1, 11, 1037), (2, 3), ((3, 3), (5, 3))],
-                [(1, 1, 1043, 11), (3, 2), ((3, 3), (3, 5))],
-            ]
-        ):
-            for neib_shape in neib_shapes:
-                for dtype in self.dtypes:
-                    x = pytensor.shared(np.random.standard_normal(shape).astype(dtype))
-                    extra = (neib_shape[0] - 1, neib_shape[1] - 1)
-                    padded_shape = (
-                        x.shape[0],
-                        x.shape[1],
-                        x.shape[2] + 2 * extra[0],
-                        x.shape[3] + 2 * extra[1],
-                    )
-                    padded_x = at.zeros(padded_shape)
-                    padded_x = at.set_subtensor(
-                        padded_x[:, :, extra[0] : -extra[0], extra[1] : -extra[1]], x
-                    )
-                    x_using_valid = images2neibs(
-                        padded_x, neib_shape, neib_step, mode="valid"
-                    )
-                    x_using_full = images2neibs(x, neib_shape, neib_step, mode="full")
-                    f_valid = pytensor.function([], x_using_valid, mode="FAST_RUN")
-                    f_full = pytensor.function([], x_using_full, mode=self.mode)
-                    unittest_tools.assert_allclose(f_valid(), f_full())
-
-    @config.change_flags(compute_test_value="off")
-    def test_neibs_bad_shape_wrap_centered(self):
-        shape = (2, 3, 10, 10)
-
-        for dtype in self.dtypes:
-            images = shared(np.arange(np.prod(shape), dtype=dtype).reshape(shape))
-
-            for neib_shape in [(3, 2), (2, 3)]:
-                neib_shape = at.as_tensor_variable(neib_shape)
-
-                f = function(
-                    [],
-                    images2neibs(images, neib_shape, mode="wrap_centered"),
-                    mode=self.mode,
-                )
-                with pytest.raises(TypeError):
-                    f()
-
-            for shape in [(2, 3, 2, 3), (2, 3, 3, 2)]:
-                images = shared(np.arange(np.prod(shape)).reshape(shape))
-                neib_shape = at.as_tensor_variable((3, 3))
-                f = function(
-                    [],
-                    images2neibs(images, neib_shape, mode="wrap_centered"),
-                    mode=self.mode,
-                )
-                with pytest.raises(TypeError):
-                    f()
-
-            # Test a valid shapes
-            shape = (2, 3, 3, 3)
-            images = shared(np.arange(np.prod(shape)).reshape(shape))
-            neib_shape = at.as_tensor_variable((3, 3))
-
-            f = function(
-                [],
-                images2neibs(images, neib_shape, mode="wrap_centered"),
-                mode=self.mode,
-            )
-            f()
-
-    def test_grad_wrap_centered(self):
-        # It is not implemented for now. So test that we raise an error.
-        shape = (2, 3, 6, 6)
-        images_val = np.random.random(shape).astype("float32")
-
-        def fn(images):
-            return images2neibs(images, (3, 3), mode="wrap_centered")
-
-        with pytest.raises(TypeError):
-            unittest_tools.verify_grad(fn, [images_val], mode=self.mode)
-
-    def test_grad_half(self):
-        # It is not implemented for now. So test that we raise an error.
-        shape = (2, 3, 6, 6)
-        rng = np.random.default_rng(28483)
-        images_val = rng.random(shape).astype("float32")
-
-        def fn(images):
-            return images2neibs(images, (3, 3), mode="half")
-
-        with pytest.raises(TypeError):
-            unittest_tools.verify_grad(fn, [images_val], mode=self.mode)
-
-    def test_grad_full(self):
-        # It is not implemented for now. So test that we raise an error.
-        shape = (2, 3, 6, 6)
-        rng = np.random.default_rng(28483)
-        images_val = rng.random(shape).astype("float32")
-
-        def fn(images):
-            return images2neibs(images, (3, 3), mode="full")
-
-        with pytest.raises(TypeError):
-            unittest_tools.verify_grad(fn, [images_val], mode=self.mode)
-
-    def test_grad_valid(self):
-        shape = (2, 3, 6, 6)
-        rng = np.random.default_rng(28483)
-        images_val = rng.random(shape).astype("float32")
-
-        def fn(images):
-            return images2neibs(images, (2, 2))
-
-        unittest_tools.verify_grad(fn, [images_val], mode=self.mode, eps=0.1)
-
-        def fn(images):
-            return images2neibs(images, (3, 2), (1, 2))
-
-        unittest_tools.verify_grad(fn, [images_val], mode=self.mode, eps=0.1)
-
-        def fn(images):
-            return images2neibs(images, (1, 2), (5, 2))
-
-        unittest_tools.verify_grad(fn, [images_val], mode=self.mode, eps=0.1)
-
-    def test_grad_ignore_border(self):
-        shape = (2, 3, 5, 5)
-        rng = np.random.default_rng(28483)
-        images_val = rng.random(shape).astype("float32")
-
-        def fn(images):
-            return images2neibs(images, (2, 2), mode="ignore_borders")
-
-        unittest_tools.verify_grad(fn, [images_val], mode=self.mode, eps=0.1)
-
-    def test_neibs2images_grad(self):
-        # say we had images of size (2, 3, 10, 10)
-        # then we extracted 2x2 neighbors on this, we get (2 * 3 * 5 * 5, 4)
-        rng = np.random.default_rng(28483)
-        neibs_val = rng.random((150, 4))
-
-        def fn(neibs):
-            return neibs2images(neibs, (2, 2), (2, 3, 10, 10))
-
-        unittest_tools.verify_grad(fn, [neibs_val], mode=self.mode, eps=0.1)
-
-    def test_neibs_valid_with_inconsistent_borders(self):
-        shape = (2, 3, 5, 5)
-        images = dtensor4()
-        images_val = np.arange(np.prod(shape), dtype="float32").reshape(shape)
-
-        f = pytensor.function(
-            [images],
-            at.sqr(images2neibs(images, (2, 2), mode="valid")),
-            mode=self.mode,
-        )
-        with pytest.raises(TypeError):
-            f(images_val)
-
-    def test_neibs_half_with_inconsistent_borders(self):
-        shape = (2, 3, 5, 5)
-        images = dtensor4()
-        images_val = np.arange(np.prod(shape), dtype="float32").reshape(shape)
-
-        f = pytensor.function(
-            [images], at.sqr(images2neibs(images, (2, 2), mode="half")), mode=self.mode
-        )
-        with pytest.raises(TypeError):
-            f(images_val)
-
-    def test_neibs_full_with_inconsistent_borders(self):
-        shape = (2, 3, 5, 5)
-        images = dtensor4()
-        images_val = np.arange(np.prod(shape), dtype="float32").reshape(shape)
-
-        f = pytensor.function(
-            [images], at.sqr(images2neibs(images, (2, 2), mode="full")), mode=self.mode
-        )
-        with pytest.raises(TypeError):
-            f(images_val)
-
-    def test_can_not_infer_nb_dim(self):
-        # Was reported in gh-5613. Test that we do not crash
-        # or that we crash in a few other case found while
-        # investigating that case
-
-        img = tensor4("img")
-        patches = nnet.neighbours.images2neibs(img, [16, 16])
-        extractPatches = pytensor.function([img], patches, mode=self.mode)
-
-        patsRecovery = matrix("patsRecovery")
-        original_size = ivector("original_size")
-
-        for mode in ["valid", "ignore_borders"]:
-            out = neibs2images(patsRecovery, (16, 16), original_size, mode=mode)
-            f = pytensor.function([patsRecovery, original_size], out, mode=self.mode)
-
-            im_val = np.ones((1, 3, 320, 320), dtype=np.float32)
-            neibs = extractPatches(im_val)
-
-            # TODO FIXME: Make this a real test and `assert` something
-            f(neibs, im_val.shape)
-
-            # Wrong number of dimensions
-            with pytest.raises(ValueError):
-                f(neibs, (1, 1, 3, 320, 320))
-            # End up with a step of 0
-            # This can lead to division by zero in DebugMode
-            with pytest.raises((ValueError, ZeroDivisionError)):
-                f(neibs, (3, 320, 320, 1))
-
-    def speed_neibs(self):
-        shape = (100, 40, 18, 18)
-        images = shared(np.arange(np.prod(shape), dtype="float32").reshape(shape))
-        neib_shape = at.as_tensor_variable((3, 3))
-
-        f = function([], images2neibs(images, neib_shape), mode=self.mode)
-
-        for i in range(1000):
-            f()
-
-    def speed_neibs_wrap_centered(self):
-        shape = (100, 40, 18, 18)
-        images = shared(np.arange(np.prod(shape), dtype="float32").reshape(shape))
-        neib_shape = at.as_tensor_variable((3, 3))
-
-        f = function(
-            [], images2neibs(images, neib_shape, mode="wrap_centered"), mode=self.mode
-        )
-
-        for i in range(1000):
-            f()
-
-    def speed_neibs_half(self):
-        shape = (100, 40, 18, 18)
-        images = shared(np.arange(np.prod(shape), dtype="float32").reshape(shape))
-        neib_shape = at.as_tensor_variable((3, 3))
-
-        f = function([], images2neibs(images, neib_shape, mode="half"), mode=self.mode)
-
-        for i in range(1000):
-            f()
-
-    def speed_neibs_full(self):
-        shape = (100, 40, 18, 18)
-        images = shared(np.arange(np.prod(shape), dtype="float32").reshape(shape))
-        neib_shape = at.as_tensor_variable((3, 3))
-
-        f = function([], images2neibs(images, neib_shape, mode="full"), mode=self.mode)
-
-        for i in range(1000):
-            f()
-
-    def test_infer_shape(self):
-        shape = (100, 40, 6, 3)
-        images = np.ones(shape).astype("float32")
-        x = ftensor4()
-        self._compile_and_check(
-            [x],
-            [images2neibs(x, neib_shape=(2, 1), mode="valid")],
-            [images],
-            Images2Neibs,
-        )
-        self._compile_and_check(
-            [x],
-            [images2neibs(x, neib_shape=(2, 3), mode="valid")],
-            [images],
-            Images2Neibs,
-        )
-        shape = (100, 40, 5, 4)
-        images = np.ones(shape).astype("float32")
-        x = ftensor4()
-        self._compile_and_check(
-            [x],
-            [images2neibs(x, neib_shape=(2, 1), mode="ignore_borders")],
-            [images],
-            Images2Neibs,
-        )
-        shape = (100, 40, 5, 3)
-        images = np.ones(shape).astype("float32")
-        x = ftensor4()
-        self._compile_and_check(
-            [x],
-            [images2neibs(x, neib_shape=(2, 3), mode="ignore_borders")],
-            [images],
-            Images2Neibs,
-        )
-
-        shape = (100, 40, 6, 7)
-        images = np.ones(shape).astype("float32")
-        x = ftensor4()
-        self._compile_and_check(
-            [x],
-            [images2neibs(x, neib_shape=(2, 2), mode="ignore_borders")],
-            [images],
-            Images2Neibs,
-        )
-        shape = (100, 40, 5, 10)
-        images = np.ones(shape).astype("float32")
-        x = ftensor4()
-        self._compile_and_check(
-            [x],
-            [images2neibs(x, neib_shape=(3, 3), mode="wrap_centered")],
-            [images],
-            Images2Neibs,
-        )
-        shape = (100, 40, 6, 4)
-        images = np.ones(shape).astype("float32")
-        x = ftensor4()
-        self._compile_and_check(
-            [x],
-            [images2neibs(x, neib_shape=(2, 1), mode="half")],
-            [images],
-            Images2Neibs,
-        )
-        self._compile_and_check(
-            [x],
-            [images2neibs(x, neib_shape=(2, 3), mode="half")],
-            [images],
-            Images2Neibs,
-        )
-        shape = (100, 40, 6, 5)
-        images = np.ones(shape).astype("float32")
-        x = ftensor4()
-        self._compile_and_check(
-            [x],
-            [images2neibs(x, neib_shape=(2, 1), mode="full")],
-            [images],
-            Images2Neibs,
-        )
-        self._compile_and_check(
-            [x],
-            [images2neibs(x, neib_shape=(2, 3), mode="full")],
-            [images],
-            Images2Neibs,
-        )
diff --git a/tests/tensor/nnet/test_rewriting.py b/tests/tensor/nnet/test_rewriting.py
deleted file mode 100644
index 1bbd47b6eb..0000000000
--- a/tests/tensor/nnet/test_rewriting.py
+++ /dev/null
@@ -1,57 +0,0 @@
-import pytensor
-from pytensor.graph.rewriting.basic import check_stack_trace
-from pytensor.tensor.nnet.blocksparse import (
-    sparse_block_dot,
-    sparse_block_gemv,
-    sparse_block_gemv_inplace,
-    sparse_block_outer,
-    sparse_block_outer_inplace,
-)
-from pytensor.tensor.type import fmatrix, ftensor3, ftensor4, lmatrix
-from tests.unittest_tools import assertFailure_fast
-
-
-def test_blocksparse_inplace_gemv_opt():
-    b = fmatrix()
-    W = ftensor4()
-    h = ftensor3()
-    iIdx = lmatrix()
-    oIdx = lmatrix()
-
-    o = sparse_block_dot(W, h, iIdx, b, oIdx)
-
-    f = pytensor.function([W, h, iIdx, b, oIdx], o)
-
-    if pytensor.config.mode == "FAST_COMPILE":
-        assert not f.maker.fgraph.toposort()[-1].op.inplace
-        assert check_stack_trace(f, ops_to_check=[sparse_block_gemv])
-    else:
-        assert f.maker.fgraph.toposort()[-1].op.inplace
-        assert check_stack_trace(f, ops_to_check=[sparse_block_gemv_inplace])
-
-
-if pytensor.config.mode != "FAST_COMPILE":
-    test_blocksparse_inplace_gemv_opt = assertFailure_fast(
-        test_blocksparse_inplace_gemv_opt
-    )
-
-
-def test_blocksparse_inplace_outer_opt():
-    b = fmatrix()
-    W = ftensor4()
-    h = ftensor3()
-    iIdx = lmatrix()
-    oIdx = lmatrix()
-
-    o = sparse_block_dot(W, h, iIdx, b, oIdx)
-
-    f = pytensor.function(
-        [W, h, iIdx, b, oIdx], [o, pytensor.gradient.grad(o.sum(), wrt=W)]
-    )
-
-    if pytensor.config.mode == "FAST_COMPILE":
-        assert not f.maker.fgraph.toposort()[-1].op.inplace
-        assert check_stack_trace(f, ops_to_check=sparse_block_outer)
-    else:
-        assert f.maker.fgraph.toposort()[-1].op.inplace
-        assert check_stack_trace(f, ops_to_check=sparse_block_outer_inplace)
diff --git a/tests/tensor/nnet/test_sigm.py b/tests/tensor/nnet/test_sigm.py
deleted file mode 100644
index cdc3f899e6..0000000000
--- a/tests/tensor/nnet/test_sigm.py
+++ /dev/null
@@ -1,139 +0,0 @@
-import numpy as np
-import pytest
-
-import pytensor
-from pytensor.compile.mode import get_default_mode, get_mode
-from pytensor.configdefaults import config
-from pytensor.graph.rewriting.basic import check_stack_trace
-from pytensor.scalar.basic import Composite
-from pytensor.tensor.elemwise import Elemwise
-from pytensor.tensor.inplace import sigmoid_inplace
-from pytensor.tensor.math import clip, sigmoid
-from pytensor.tensor.nnet.sigm import (
-    hard_sigmoid,
-    ultra_fast_scalar_sigmoid,
-    ultra_fast_sigmoid,
-    ultra_fast_sigmoid_inplace,
-)
-from pytensor.tensor.type import matrix
-from tests.tensor.utils import (
-    _good_broadcast_unary_normal_no_complex,
-    check_floatX,
-    copymod,
-    makeBroadcastTester,
-    upcast_int8_nfunc,
-)
-
-
-TestUltraFastSigmoidBroadcast = makeBroadcastTester(
-    op=ultra_fast_sigmoid,
-    expected=upcast_int8_nfunc(
-        lambda inputs: check_floatX(inputs, 1 / (1 + np.exp(-inputs)))
-    ),
-    good=copymod(
-        _good_broadcast_unary_normal_no_complex, without=["uint16"]
-    ),  # numpy fucnting overflows with uint16.
-    # grad=_grad_broadcast_unary_normal,
-    name="UltraFastSigmoidTester",
-    # This is an approx of the sigmoid. That is why we raise eps
-    eps=5e-2,
-)
-
-TestHardSigmoidBroadcast = makeBroadcastTester(
-    op=hard_sigmoid,
-    expected=upcast_int8_nfunc(
-        lambda inputs: check_floatX(inputs, 1 / (1 + np.exp(-inputs)))
-    ),
-    good=copymod(
-        _good_broadcast_unary_normal_no_complex, without=["uint16"]
-    ),  # numpy fucnting overflows with uint16.
-    # grad=_grad_broadcast_unary_normal,
-    name="HardSigmoidTester",
-    # This is an approx of the sigmoid. That is why we raise eps
-    eps=1e-1,
-)
-
-
-class TestSpecialSigmoidOpts:
-    def get_mode(self, excluding=None):
-        """
-        Return appropriate mode for the tests.
-
-        :param excluding: List of optimizations to exclude.
-
-        :return: The current default mode unless the `config.mode` option is
-        set to 'FAST_COMPILE' (in which case it is replaced by the 'FAST_RUN'
-        mode), without the optimizations specified in `excluding`.
-        """
-        if excluding is None:
-            excluding = []
-        m = config.mode
-        if m == "FAST_COMPILE":
-            mode = get_mode("FAST_RUN")
-        else:
-            mode = get_default_mode()
-        if excluding:
-            return mode.excluding(*excluding)
-        else:
-            return mode
-
-    def test_local_ultra_fast_sigmoid(self):
-        x = matrix("x")
-        s = sigmoid(x)
-
-        mode = self.get_mode("local_ultra_fast_sigmoid")
-        f = pytensor.function([x], s, mode=mode)
-        assert check_stack_trace(f, ops_to_check=sigmoid)
-        topo = f.maker.fgraph.toposort()
-        assert len(topo) == 1
-        assert topo[0].op == sigmoid
-
-        mode = self.get_mode().including("local_ultra_fast_sigmoid")
-        f = pytensor.function([x], s, mode=mode)
-        assert check_stack_trace(f, ops_to_check=ultra_fast_sigmoid)
-        topo = f.maker.fgraph.toposort()
-        assert topo[0].op == ultra_fast_sigmoid
-        assert len(topo) == 1
-
-        s = sigmoid_inplace(x)
-        f = pytensor.function([x], s, mode=mode, accept_inplace=True)
-        assert check_stack_trace(f, ops_to_check=ultra_fast_sigmoid_inplace)
-        topo = f.maker.fgraph.toposort()
-        assert topo[0].op == ultra_fast_sigmoid_inplace
-        assert len(topo) == 1
-
-    @pytest.mark.skipif(config.cxx == "", reason="Needs a C compiler.")
-    def test_composite_c_code(self):
-        """Make sure this `Op`'s `c_code` works within a `Composite`."""
-        x = matrix("x")
-        mode = get_mode("FAST_RUN").including("local_ultra_fast_sigmoid")
-        f = pytensor.function([x], sigmoid(x) + sigmoid(x + 1), mode=mode)
-        topo = f.maker.fgraph.toposort()
-
-        assert isinstance(topo[0].op, Elemwise)
-        assert isinstance(topo[0].op.scalar_op, Composite)
-        assert ultra_fast_scalar_sigmoid in {
-            node.op for node in topo[0].op.scalar_op.fgraph.toposort()
-        }
-        assert len(topo) == 1
-
-    def test_local_hard_sigmoid(self):
-        x = matrix("x")
-        s = sigmoid(x)
-
-        mode = self.get_mode("local_hard_sigmoid")
-        f = pytensor.function([x], s, mode=mode)
-        assert check_stack_trace(f, ops_to_check=sigmoid)
-        topo = f.maker.fgraph.toposort()
-        assert topo[0].op == sigmoid
-        assert len(topo) == 1
-
-        mode = self.get_mode().including("local_hard_sigmoid")
-        f = pytensor.function([x], s, mode=mode)
-        topo = f.maker.fgraph.toposort()
-        assert not any(n.op == sigmoid for n in topo)
-        f([[-50, -10, -4, -1, 0, 1, 4, 10, 50]])
-
-        mode2 = mode.excluding("fusion").excluding("inplace")
-        f2 = pytensor.function([x], s, mode=mode2)
-        assert check_stack_trace(f2, ops_to_check=clip)
diff --git a/tests/tensor/test_misc.py b/tests/tensor/test_misc.py
deleted file mode 100644
index 545b277381..0000000000
--- a/tests/tensor/test_misc.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import copy
-
-import numpy as np
-
-from pytensor.compile.function import function
-from pytensor.compile.io import Out
-from pytensor.tensor.math import dot
-from pytensor.tensor.nnet import crossentropy_softmax_argmax_1hot_with_bias
-from pytensor.tensor.type import dmatrix, dvector, ivector, matrix
-
-
-def test_bug_2009_07_17_borrowed_output():
-    # Regression test for a bug where output was borrowed by mistake.
-    a = dmatrix()
-    b = dmatrix()
-    # The output should *NOT* be borrowed.
-    g = function([a, b], Out(dot(a, b), borrow=False))
-
-    x = np.zeros((1, 2))
-    y = np.ones((2, 5))
-
-    z = g(x, y)
-    # print(z)  # Should be zero.
-    x.fill(1)
-    # print(g(x, y))  # Should be non-zero.
-    # print(z)  # Should still be zero.
-    assert np.linalg.norm(z) == 0
-
-    # The code above was supposed to fail when it was written (or, more
-    # accurately, on the next revision, i.e. when it was merged with the
-    # rest of the code, i.e. on revision cac9c9e9f08e).
-    # However, for some reason, it does not fail anymore when at this revision.
-    # Thus, a new test (below) was added that exhibits the same issue. Note
-    # that it may better be moved into the test_nnet.py test file if it turns
-    # out the bug was caused by 'crossentropy_softmax_argmax_1hot_with_bias',
-    # and was not a more general issue.
-    test_output_activation_no_bias = dmatrix()
-    test_b2 = dvector()
-    test_target = ivector()
-    nll_softmax_argmax = crossentropy_softmax_argmax_1hot_with_bias(
-        test_output_activation_no_bias, test_b2, test_target
-    )
-    output = nll_softmax_argmax[1]
-    g = function(
-        [test_output_activation_no_bias, test_b2, test_target],
-        Out(output, borrow=False),
-    )
-
-    a = np.zeros((1, 5))
-    b = np.ones(5)
-    c = np.zeros(1, dtype=np.int32)
-
-    z = g(a, b, c)
-    z_backup = copy.copy(z)
-    id_z = id(z)
-    # print(f"Output z after first call: {z}")
-    a[0, 0] = 1
-    id_other = id(g(a, b, c))
-    # print(f"Output z after second call: {z}")
-    # Ensure that calling the function again returns a pointer towards a new
-    # array.
-    assert id_z != id_other
-    # Just to be 100% sure, ensure that z was not altered.
-    assert (z == z_backup).all()
-
-
-def test_deepcopied_type_filter():
-    a = copy.deepcopy(matrix())
-
-    # The following should run cleanly.
-    # As of commit 731e2d2fa68487733320d341d08b454a50c90d12
-    # it was failing.
-    a.type.filter(np.ones((2, 2), dtype=a.dtype), strict=True)

From 26f0d55d8fc5b165b03dcc874b85433d97e5233d Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 00:30:31 +0300
Subject: [PATCH 09/43] remove deprecated module pytensor.assert_op

---
 pytensor/assert_op.py | 11 -----------
 1 file changed, 11 deletions(-)
 delete mode 100644 pytensor/assert_op.py

diff --git a/pytensor/assert_op.py b/pytensor/assert_op.py
deleted file mode 100644
index a3eca39218..0000000000
--- a/pytensor/assert_op.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import warnings
-
-
-warnings.warn(
-    "The module `pytensor.assert_op` is deprecated "
-    "and its `Op`s have been moved to `pytensor.raise_op`",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-from pytensor.raise_op import Assert, assert_op  # noqa: F401 E402

From 1ad1cd164996588fa180b05244a792f7493e6fb7 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 00:37:50 +0300
Subject: [PATCH 10/43] remove functions that are not referenced anywhere

---
 pytensor/utils.py | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/pytensor/utils.py b/pytensor/utils.py
index 8b78cec4ae..5af7f673c6 100644
--- a/pytensor/utils.py
+++ b/pytensor/utils.py
@@ -16,7 +16,6 @@
 
 
 __all__ = [
-    "cmp",
     "get_unbound_function",
     "maybe_add_to_os_environ_pathlist",
     "DefaultOrderedDict",
@@ -106,22 +105,6 @@ def exc_message(e):
     return msg
 
 
-def cmp(x, y):
-    """Return -1 if x < y, 0 if x == y, 1 if x > y."""
-    return (x > y) - (x < y)
-
-
-def key_to_cmp(key):
-    """
-    comparator function based on "key" function
-    """
-
-    def key_cmp(a, b):
-        return cmp(key(a), key(b))
-
-    return key_cmp
-
-
 def get_unbound_function(unbound):
     # Op.make_thunk isn't bound, so don't have a __func__ attr.
     # But bound method, have a __func__ method that point to the

From 5a1faa582288d0b2d644d5ac6cd257bbdd4d4e6f Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 01:03:16 +0300
Subject: [PATCH 11/43] remove deprecated config api

---
 pytensor/configdefaults.py |  32 ++---------
 pytensor/configparser.py   | 106 -------------------------------------
 tests/test_config.py       |  46 ----------------
 3 files changed, 3 insertions(+), 181 deletions(-)

diff --git a/pytensor/configdefaults.py b/pytensor/configdefaults.py
index d6afff0fd5..6efc6c3ff1 100644
--- a/pytensor/configdefaults.py
+++ b/pytensor/configdefaults.py
@@ -11,7 +11,6 @@
 from setuptools._distutils.spawn import find_executable
 
 import pytensor
-import pytensor.configparser
 from pytensor.configparser import (
     BoolParam,
     ConfigParam,
@@ -20,6 +19,7 @@
     FloatParam,
     IntParam,
     StrParam,
+    _create_default_config,
 )
 from pytensor.utils import (
     LOCAL_BITWIDTH,
@@ -1195,27 +1195,6 @@ def add_vm_configvars():
     )
 
 
-def add_deprecated_configvars():
-
-    # TODO: remove this?
-    config.add(
-        "unittests__rseed",
-        "Seed to use for randomized unit tests. "
-        "Special value 'random' means using a seed of None.",
-        StrParam(666, validate=_good_seem_param),
-        in_c_key=False,
-    )
-
-    config.add(
-        "warn__round",
-        "Warn when using `tensor.round` with the default mode. "
-        "Round changed its default from `half_away_from_zero` to "
-        "`half_to_even` to have the same default as NumPy.",
-        BoolParam(_warn_default("0.9")),
-        in_c_key=False,
-    )
-
-
 def add_scan_configvars():
     config.add(
         "scan__allow_gc",
@@ -1444,12 +1423,8 @@ def add_caching_dir_configvars():
 )
 
 # Eventually, the instance of `PyTensorConfigParser` should be created right here,
-# where it is also populated with settings.  But for a transition period, it
-# remains as `configparser._config`, while everybody accessing it through
-# `configparser.config` is flooded with deprecation warnings. These warnings
-# instruct one to use `pytensor.config`, which is an alias for
-# `pytensor.configdefaults.config`.
-config = pytensor.configparser._config
+# where it is also populated with settings.
+config = _create_default_config()
 
 # The functions below register config variables into the config instance above.
 add_basic_configvars()
@@ -1467,7 +1442,6 @@ def add_caching_dir_configvars():
 # that module, which introduces a circular dependency!
 add_metaopt_configvars()
 add_vm_configvars()
-add_deprecated_configvars()
 add_numba_configvars()
 
 # TODO: `gcc_version_str` is used by other modules.. Should it become an immutable config var?
diff --git a/pytensor/configparser.py b/pytensor/configparser.py
index 5f9716589b..1dfc3e31e7 100644
--- a/pytensor/configparser.py
+++ b/pytensor/configparser.py
@@ -65,27 +65,6 @@ def __exit__(self, *args):
             v.__set__(self._root, self.old_vals[k])
 
 
-class _SectionRedirect:
-    """Functions as a mock property on the PyTensorConfigParser.
-
-    It redirects attribute access (to config subsectinos) to the
-    new config variable properties that use "__" in their name.
-    """
-
-    def __init__(self, root, section_name):
-        self._root = root
-        self._section_name = section_name
-        super().__init__()
-
-    def __getattr__(self, attr):
-        warnings.warn(
-            f"Accessing section '{attr}' through old .-based API. "
-            f"This will be removed. Use 'config.{self._section_name}__{attr}' instead.",
-            DeprecationWarning,
-        )
-        return getattr(self._root, f"{self._section_name}__{attr}")
-
-
 class PyTensorConfigParser:
     """Object that holds configuration settings."""
 
@@ -189,18 +168,6 @@ def add(self, name, doc, configparam, in_c_key=True):
         # the ConfigParam implements __get__/__set__, enabling us to create a property:
         setattr(self.__class__, name, configparam)
 
-        # The old API used dots for accessing a hierarchy of sections.
-        # The following code adds redirects that spill DeprecationWarnings
-        # while allowing backwards-compatible access to dot-based subsections.
-        # Because the subsectioning is recursive, redirects must be added for
-        # all levels. For example: ".test", ".test.subsection".
-        sections = name.split("__")
-        for s in range(1, len(sections)):
-            section_name = "__".join(sections[:s])
-            if not hasattr(self, section_name):
-                redirect = _SectionRedirect(self, section_name)
-                setattr(self.__class__, section_name, redirect)
-
     def fetch_val_for_key(self, key, delete_key=False):
         """Return the overriding config value for a key.
         A successful search returns a string value.
@@ -565,76 +532,3 @@ def _create_default_config():
         pytensor_raw_cfg=pytensor_raw_cfg,
     )
     return config
-
-
-class _ConfigProxy:
-    """Like _SectionRedirect this class enables backwards-compatible access to the
-    config settings, but raises DeprecationWarnings with instructions to use `pytensor.config`.
-    """
-
-    def __init__(self, actual):
-        _ConfigProxy._actual = actual
-
-    def __getattr__(self, attr):
-        if attr == "_actual":
-            return _ConfigProxy._actual
-        warnings.warn(
-            "`pytensor.configparser.config` is deprecated; use `pytensor.config` instead.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        return getattr(self._actual, attr)
-
-    def __setattr__(self, attr, value):
-        if attr == "_actual":
-            return setattr(_ConfigProxy._actual, attr, value)
-        warnings.warn(
-            "`pytensor.configparser.config` is deprecated; use `pytensor.config` instead.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        return setattr(self._actual, attr, value)
-
-
-# Create the actual instance of the config. This one should eventually move to
-# `configdefaults`:
-_config = _create_default_config()
-
-# The old API often imported the default config object from `configparser`.
-# These imports/accesses should be replaced with `pytensor.config`, so this wraps
-# it with warnings:
-config = _ConfigProxy(_config)
-
-DEPRECATED_NAMES = [
-    (
-        "change_flags",
-        "`change_flags` is deprecated; use `pytensor.config.change_flags` instead.",
-        _config.change_flags,
-    ),
-    (
-        "_change_flags",
-        "`_change_flags` is deprecated; use `pytensor.config.change_flags` instead.",
-        _config.change_flags,
-    ),
-    (
-        "_config_print",
-        "`_config_print` is deprecated; use `pytensor.config.config_print` instead.",
-        _config.config_print,
-    ),
-]
-
-
-def __getattr__(name):
-    """Intercept module-level attribute access of deprecated symbols.
-
-    Adapted from https://stackoverflow.com/a/55139609/3006474.
-
-    """
-    from warnings import warn
-
-    for old_name, msg, old_object in DEPRECATED_NAMES:
-        if name == old_name:
-            warn(msg, DeprecationWarning, stacklevel=2)
-            return old_object
-
-    raise AttributeError(f"module {__name__} has no attribute {name}")
diff --git a/tests/test_config.py b/tests/test_config.py
index 33f4f32857..a30192fa47 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -18,52 +18,6 @@ def _create_test_config():
     )
 
 
-def test_api_deprecation_warning():
-    # accessing through configdefaults.config is the new best practice
-    with pytest.warns(None):
-        root = configdefaults.config
-        assert isinstance(str(root), str)
-
-    # accessing through configparser.config is discouraged
-    root = configparser.config
-    with pytest.warns(DeprecationWarning, match="instead"):
-        root.add(
-            "test_deprecationwarning",
-            "A config var from a test case.",
-            configparser.StrParam("test_default"),
-        )
-    with pytest.warns(DeprecationWarning, match="instead"):
-        with root.change_flags(test_deprecationwarning="new_value"):
-            pass
-
-
-def test_api_redirect():
-    root = _create_test_config()
-    # one section level
-    root.add(
-        "test__section_redirect",
-        "A config var from a test case.",
-        configparser.StrParam("test_default"),
-    )
-    assert hasattr(root, "test__section_redirect")
-    assert root.test__section_redirect == "test_default"
-    assert hasattr(root, "test")
-    assert isinstance(root.test, configparser._SectionRedirect)
-    with pytest.warns(DeprecationWarning):
-        assert root.test.section_redirect == "test_default"
-
-    # two section levels
-    root.add(
-        "test__subsection__redirect",
-        "A config var from a test case.",
-        configparser.StrParam("test_default2"),
-    )
-    assert hasattr(root, "test__subsection__redirect")
-    assert root.test__subsection__redirect == "test_default2"
-    with pytest.warns(DeprecationWarning):
-        assert root.test.subsection.redirect == "test_default2"
-
-
 def test_invalid_default():
     # Ensure an invalid default value found in the PyTensor code only causes
     # a crash if it is not overridden by the user.

From b272a3835d7fc88ffa0f424219f867ce2e30cdea Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 10:16:06 +0300
Subject: [PATCH 12/43] remove pytensor.tensor.signal module as it was marked
 deprecated

---
 pytensor/tensor/signal/__init__.py |    0
 pytensor/tensor/signal/conv.py     |  126 --
 pytensor/tensor/signal/pool.py     | 2567 ----------------------------
 tests/tensor/signal/test_pool.py   | 1408 ---------------
 tests/test_rop.py                  |   60 -
 5 files changed, 4161 deletions(-)
 delete mode 100644 pytensor/tensor/signal/__init__.py
 delete mode 100644 pytensor/tensor/signal/conv.py
 delete mode 100644 pytensor/tensor/signal/pool.py
 delete mode 100644 tests/tensor/signal/test_pool.py

diff --git a/pytensor/tensor/signal/__init__.py b/pytensor/tensor/signal/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/pytensor/tensor/signal/conv.py b/pytensor/tensor/signal/conv.py
deleted file mode 100644
index f6fc59c2de..0000000000
--- a/pytensor/tensor/signal/conv.py
+++ /dev/null
@@ -1,126 +0,0 @@
-"""
-Contains a wrapper function for tensor.nnet.ConvOp, which can be used to perform
-generic 2D convolution.
-
-"""
-import logging
-import warnings
-
-from pytensor import tensor as at
-from pytensor.tensor.nnet import conv
-from pytensor.tensor.shape import reshape
-
-
-warnings.warn(
-    "The module `pytensor.tensor.signal` is deprecated and will "
-    "be removed from PyTensor in version 2.9.0.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-
-__docformat__ = "restructuredtext en"
-
-
-_logger = logging.getLogger("pytensor.tensor.signal.conv")
-
-
-def conv2d(
-    input,
-    filters,
-    image_shape=None,
-    filter_shape=None,
-    border_mode="valid",
-    subsample=(1, 1),
-    **kargs,
-):
-    """
-    signal.conv.conv2d performs a basic 2D convolution of the input with the
-    given filters. The input parameter can be a single 2D image or a 3D tensor,
-    containing a set of images. Similarly, filters can be a single 2D filter or
-    a 3D tensor, corresponding to a set of 2D filters.
-
-    Shape parameters are optional and will result in faster execution.
-
-    Parameters
-    ----------
-    input   : Symbolic pytensor tensor for images to be filtered.
-              Dimensions: ([num_images], image height, image width)
-    filters : Symbolic pytensor tensor for convolution filter(s).
-              Dimensions: ([num_filters], filter height, filter width)
-    border_mode: {'valid', 'full'}
-        See scipy.signal.convolve2d.
-    subsample
-        Factor by which to subsample output.
-    image_shape : tuple of length 2 or 3
-        ([num_images,] image height, image width).
-    filter_shape : tuple of length 2 or 3
-        ([num_filters,] filter height, filter width).
-    kwargs
-        See pytensor.tensor.nnet.conv.conv2d.
-
-    Returns
-    -------
-    symbolic 2D,3D or 4D tensor
-        Tensor of filtered images, with shape
-        ([number images,] [number filters,] image height, image width).
-
-    """
-    assert input.ndim in (2, 3)
-    assert filters.ndim in (2, 3)
-
-    # use shape information if it is given to us ###
-    if filter_shape and image_shape:
-        if input.ndim == 3:
-            bsize = image_shape[0]
-        else:
-            bsize = 1
-        imshp = (1,) + tuple(image_shape[-2:])
-
-        if filters.ndim == 3:
-            nkern = filter_shape[0]
-        else:
-            nkern = 1
-        kshp = filter_shape[-2:]
-    else:
-        nkern, kshp = None, None
-        bsize, imshp = None, None
-
-    # reshape tensors to 4D, for compatibility with ConvOp ###
-    if input.ndim == 3:
-        sym_bsize = input.shape[0]
-    else:
-        sym_bsize = 1
-
-    if filters.ndim == 3:
-        sym_nkern = filters.shape[0]
-    else:
-        sym_nkern = 1
-
-    new_input_shape = at.join(0, at.stack([sym_bsize, 1]), input.shape[-2:])
-    input4D = reshape(input, new_input_shape, ndim=4)
-
-    new_filter_shape = at.join(0, at.stack([sym_nkern, 1]), filters.shape[-2:])
-    filters4D = reshape(filters, new_filter_shape, ndim=4)
-
-    # perform actual convolution ###
-    op = conv.ConvOp(
-        output_mode=border_mode,
-        dx=subsample[0],
-        dy=subsample[1],
-        imshp=imshp,
-        kshp=kshp,
-        nkern=nkern,
-        bsize=bsize,
-        **kargs,
-    )
-
-    output = op(input4D, filters4D)
-
-    # flatten to 3D tensor if convolving with single filter or single image
-    if input.ndim == 2 and filters.ndim == 2:
-        output = at.flatten(output.T, ndim=2).T
-    elif input.ndim == 2 or filters.ndim == 2:
-        output = at.flatten(output.T, ndim=3).T
-
-    return output
diff --git a/pytensor/tensor/signal/pool.py b/pytensor/tensor/signal/pool.py
deleted file mode 100644
index b7f8a24d6d..0000000000
--- a/pytensor/tensor/signal/pool.py
+++ /dev/null
@@ -1,2567 +0,0 @@
-"""
-Ops for downsampling images.
-Planned:
-Pool, DownsampleAvg, DownsampleSoftmax.
-"""
-import itertools
-import warnings
-
-import numpy as np
-
-import pytensor.tensor.basic as at
-import pytensor.tensor.math as tm
-from pytensor.gradient import DisconnectedType
-from pytensor.graph.basic import Apply, Constant, Variable
-from pytensor.graph.utils import MethodNotDefined
-from pytensor.link.c.op import OpenMPOp
-from pytensor.link.c.params_type import ParamsType
-from pytensor.scalar import bool as bool_t
-from pytensor.tensor.type import TensorType, int_dtypes
-
-
-warnings.warn(
-    "The module `pytensor.tensor.signal` is deprecated and will "
-    "be removed from PyTensor in version 2.8.5.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-
-def max_pool_2d_same_size(input, patch_size):
-    """
-    Takes as input a 4-D tensor. It sets all non maximum values
-    of non-overlapping patches of size (patch_size[0],patch_size[1]) to zero,
-    keeping only the maximum values. The output has the same dimensions as
-    the input.
-
-    Parameters
-    ----------
-    input : 4-D pytensor tensor of input images
-        Input images. Max pooling will be done over the 2 last dimensions.
-    patch_size : tuple of length 2 or pytensor vector of ints of size 2.
-        Size of the patch (patch height, patch width).
-        (2,2) will retain only one non-zero value per patch of 4 values.
-
-    """
-    output = Pool(True)(input, patch_size)
-    outs = MaxPoolGrad(True)(input, output, output, patch_size)
-    return outs
-
-
-def pool_2d(
-    input,
-    ws=None,
-    ignore_border=None,
-    stride=None,
-    pad=(0, 0),
-    mode="max",
-    ds=None,
-    st=None,
-    padding=None,
-):
-    """Downscale the input by a specified factor
-
-    Takes as input a N-D tensor, where N >= 2. It downscales the input image by
-    the specified factor, by keeping only the maximum value of non-overlapping
-    patches of size (ws[0],ws[1])
-
-    Parameters
-    ----------
-    input : N-D pytensor tensor of input images
-        Input images. Max pooling will be done over the 2 last dimensions.
-    ws : tuple of length 2 or pytensor vector of ints of size 2.
-        Factor by which to downscale (vertical ws, horizontal ws).
-        (2,2) will halve the image in each dimension.
-    ignore_border : bool (default None, will print a warning and set to False)
-        When True, (5,5) input with ws=(2,2) will generate a (2,2) output.
-        (3,3) otherwise.
-    stride : tuple of two ints or pytensor vector of ints of size 2.
-        Stride size, which is the number of shifts over rows/cols to get the
-        next pool region. If stride is None, it is considered equal to ws
-        (no overlap on pooling regions), eg: stride=(1,1) will shifts over
-        one row and one col for every iteration.
-    pad : tuple of two ints or pytensor vector of ints of size 2.
-        (pad_h, pad_w), pad zeros to extend beyond four borders of the
-        images, pad_h is the size of the top and bottom margins, and
-        pad_w is the size of the left and right margins.
-    mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'}
-        Operation executed on each window. `max` and `sum` always exclude
-        the padding in the computation. `average` gives you the choice to
-        include or exclude it.
-    ds
-        *deprecated*, use parameter ws instead.
-    st
-        *deprecated*, use parameter stride instead.
-    padding
-        *deprecated*, use parameter pad instead.
-
-    """
-    # check for deprecated parameter names
-    if ds is not None:
-        if ws is not None:
-            raise ValueError(
-                "You can't provide a tuple value to both 'ws' and 'ds'."
-                " Please provide a value only to 'ws'."
-            )
-        else:
-            warnings.warn(
-                "The 'ds' parameter is not going to exist"
-                " anymore as it is going to be replaced by the parameter"
-                " 'ws'.",
-                category=DeprecationWarning,
-                stacklevel=2,
-            )
-            ws = ds
-    elif ds is None and ws is None:
-        raise ValueError("You must provide a tuple value for the window size.")
-
-    if st is not None:
-        if stride is not None:
-            raise ValueError(
-                "You can't provide a tuple value to both 'st and 'stride'."
-                " Please provide a value only to 'stride'."
-            )
-        else:
-            warnings.warn(
-                "The 'st' parameter is not going to exist"
-                " anymore as it is going to be replaced by the parameter"
-                " 'stride'.",
-                category=DeprecationWarning,
-                stacklevel=2,
-            )
-            stride = st
-
-    if padding is not None:
-        if pad not in {None, (0, 0)}:
-            raise ValueError(
-                "You can't provide a tuple value to both 'padding' and pad."
-                "  Please provide a value only to pad."
-            )
-        else:
-            warnings.warn(
-                "The 'padding' parameter is not going to exist"
-                " anymore as it is going to be replaced by the parameter"
-                " 'pad'.",
-                category=DeprecationWarning,
-                stacklevel=2,
-            )
-            pad = padding
-
-    if input.ndim < 2:
-        raise NotImplementedError("pool_2d requires a dimension >= 2")
-    if ignore_border is None:
-        warnings.warn(
-            "pool_2d() will have the parameter ignore_border"
-            " default value changed to True (currently"
-            " False). To have consistent behavior with all PyTensor"
-            " version, explicitly add the parameter ignore_border=True.",
-            category=DeprecationWarning,
-            stacklevel=2,
-        )
-        ignore_border = False
-    op = Pool(ignore_border, ndim=2, mode=mode)
-    output = op(input, ws, stride, pad)
-    return output
-
-
-def pool_3d(
-    input,
-    ws=None,
-    ignore_border=None,
-    stride=None,
-    pad=(0, 0, 0),
-    mode="max",
-    ds=None,
-    st=None,
-    padding=None,
-):
-    """Downscale the input by a specified factor
-
-    Takes as input a N-D tensor, where N >= 3. It downscales the input image by
-    the specified factor, by keeping only the maximum value of non-overlapping
-    patches of size (ws[0],ws[1],ws[2])
-
-    Parameters
-    ----------
-    input : N-D pytensor tensor of input images
-        Input images. Max pooling will be done over the 3 last dimensions.
-    ws : tuple of length 3 or pytensor vector of ints of size 3
-        Factor by which to downscale (vertical ws, horizontal ws, depth ws).
-        (2,2,2) will halve the image in each dimension.
-    ignore_border : bool (default None, will print a warning and set to False)
-        When True, (5,5,5) input with ws=(2,2,2) will generate a (2,2,2) output.
-        (3,3,3) otherwise.
-    st : tuple of three ints or pytensor vector of ints of size 3
-        Stride size, which is the number of shifts over rows/cols/slices to get
-        the next pool region. If st is None, it is considered equal to ws
-        (no overlap on pooling regions).
-    pad : tuple of two ints or pytensor vector of ints of size 3
-        (pad_h, pad_w, pad_d), pad zeros to extend beyond six borders of the
-        images, pad_h is the size of the top and bottom margins,
-        pad_w is the size of the left and right margins, and pad_d is the size
-        of the front and back margins
-    mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'}
-        Operation executed on each window. `max` and `sum` always exclude
-        the padding in the computation. `average` gives you the choice to
-        include or exclude it.
-    ds
-        *deprecated*, use parameter ws instead.
-    st
-        *deprecated*, use parameter st instead.
-    padding
-        *deprecated*, use parameter pad instead.
-
-    """
-    # check for deprecated parameter names
-    if ds is not None:
-        if ws is not None:
-            raise ValueError(
-                "You can't provide a tuple value to both 'ws' and 'ds'."
-                " Please provide a value only to 'ws'."
-            )
-        else:
-            warnings.warn(
-                "The 'ds' parameter is not going to exist"
-                " anymore as it is going to be replaced by the parameter"
-                " 'ws'.",
-                category=DeprecationWarning,
-                stacklevel=2,
-            )
-            ws = ds
-    elif ds is None and ws is None:
-        raise ValueError("You must provide a tuple value for the window size.")
-
-    if st is not None:
-        if stride is not None:
-            raise ValueError(
-                "You can't provide a tuple value to both 'st and 'stride'."
-                " Please provide a value only to 'stride'."
-            )
-        else:
-            warnings.warn(
-                "The 'st' parameter is not going to exist"
-                " anymore as it is going to be replaced by the parameter"
-                " 'stride'.",
-                category=DeprecationWarning,
-                stacklevel=2,
-            )
-            stride = st
-
-    if padding is not None:
-        if pad not in {None, (0, 0, 0)}:
-            raise ValueError(
-                "You can't provide a tuple value to both 'padding' and pad."
-                "  Please provide a value only to pad."
-            )
-        else:
-            warnings.warn(
-                "The 'padding' parameter is not going to exist"
-                " anymore as it is going to be replaced by the parameter"
-                " 'pad'.",
-                category=DeprecationWarning,
-                stacklevel=2,
-            )
-            pad = padding
-
-    if input.ndim < 3:
-        raise NotImplementedError("pool_3d requires a dimension >= 3")
-    if ignore_border is None:
-        warnings.warn(
-            "pool_3d() will have the parameter ignore_border"
-            " default value changed to True (currently"
-            " False). To have consistent behavior with all PyTensor"
-            " version, explicitly add the parameter ignore_border=True.",
-            category=DeprecationWarning,
-            stacklevel=2,
-        )
-        ignore_border = False
-    op = Pool(ignore_border, ndim=3, mode=mode)
-    output = op(input, ws, stride, pad)
-    return output
-
-
-class Pool(OpenMPOp):
-    """
-    sum or average over different patches.
-
-    Parameters
-    ----------
-    ws : list or tuple of N ints
-        Downsample factor over rows, columns etc.
-        ws indicates the size of the pooling region.
-    ignore_border : bool
-        If ws doesn't divide imgshape, do we include an extra row/col/slice
-        of partial downsampling (False) or ignore it (True).
-    stride : list or tuple of N ints or None
-        Stride size, which is the number of shifts over rows/cols/slices to get the
-        next pool region. If stride is None, it is considered equal to ws
-        (no overlap on pooling regions).
-    pad : tuple of N ints or None
-        For each downsampling dimension, this specifies the number of zeros to
-        add as padding on both sides. For 2D and (pad_h, pad_w), pad_h specifies the
-        size of the top and bottom margins, pad_w specifies the size of the left and
-        right margins. No padding is added if pad is None.
-    mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'}
-        ('average_inc_pad' excludes the padding from the count,
-        'average_exc_pad' include it)
-    ndim : int
-        The number of pooling dimensions N.
-        The default is 2.
-    ds
-        *deprecated*, use parameter ws instead.
-    st
-        *deprecated*, use parameter st instead.
-    padding
-        *deprecated*, use parameter pad instead.
-
-
-    """
-
-    __props__ = ("ignore_border", "mode", "ndim")
-    params_type = ParamsType(
-        ignore_border=bool_t,
-    )
-
-    @staticmethod
-    def out_shape(
-        imgshape,
-        ws=None,
-        ignore_border=False,
-        stride=None,
-        pad=None,
-        ndim=2,
-        ds=None,
-        st=None,
-        padding=None,
-    ):
-        """
-        Return the shape of the output from this op, for input of given
-        shape and flags.
-
-        Parameters
-        ----------
-        imgshape : tuple, list, or similar of integer or scalar PyTensor variable
-            The shape of a tensor of images. The last N elements are
-            interpreted as the number of rows, and the number of cols.
-        ws : list or tuple of N ints
-            Downsample factor over rows and column.
-            ws indicates the pool region size.
-        ignore_border : bool
-            If ws doesn't divide imgshape, do we include an extra row/col/slice
-            of partial downsampling (False) or ignore it (True).
-        stride : list or tuple of N ints or None
-            Stride size, which is the number of shifts over rows/cols/slices to get the
-            next pool region. If stride is None, it is considered equal to ws
-            (no overlap on pooling regions).
-        pad : tuple of N ints or None
-            For each downsampling dimension, this specifies the number of zeros to
-            add as padding on both sides. For 2D and (pad_h, pad_w), pad_h specifies the
-            size of the top and bottom margins, pad_w specifies the size of the left and
-            right margins. No padding is added if pad is None.
-        ndim : int
-            The number of pooling dimensions N.
-            The default is 2.
-        ds
-            *deprecated*, use parameter ws instead.
-        st
-            *deprecated*, use parameter st instead.
-        padding
-            *deprecated*, use parameter pad instead.
-
-        Returns
-        -------
-        list
-            The shape of the output from this op, for input of given shape.
-            This will have the same length as imgshape, but with last N
-            elements reduced as per the downsampling & ignore_border flags.
-
-        """
-        # check for deprecated parameter names
-        if ds is not None:
-            if ws is not None:
-                raise ValueError(
-                    "You can't provide a tuple value to both 'ws' and 'ds'."
-                    " Please provide a value only to 'ws'."
-                )
-            else:
-                warnings.warn(
-                    "The 'ds' parameter is not going to exist"
-                    " anymore as it is going to be replaced by the parameter"
-                    " 'ws'.",
-                    category=DeprecationWarning,
-                    stacklevel=2,
-                )
-                ws = ds
-        elif ds is None and ws is None:
-            raise ValueError("You must provide a tuple value for the window size.")
-
-        if st is not None:
-            if stride is not None:
-                raise ValueError(
-                    "You can't provide a tuple value to both 'st and 'stride'."
-                    " Please provide a value only to 'stride'."
-                )
-            else:
-                warnings.warn(
-                    "The 'st' parameter is not going to exist"
-                    " anymore as it is going to be replaced by the parameter"
-                    " 'stride'.",
-                    category=DeprecationWarning,
-                    stacklevel=2,
-                )
-                stride = st
-
-        if padding is not None:
-            zero_pad = (0,) * ndim
-            if pad not in {None, zero_pad}:
-                raise ValueError(
-                    "You can't provide a tuple value to both 'padding' and pad."
-                    "  Please provide a value only to pad."
-                )
-            else:
-                warnings.warn(
-                    "The 'padding' parameter is not going to"
-                    " exist anymore as it is going to be replaced by the"
-                    " parameter 'pad'.",
-                    category=DeprecationWarning,
-                    stacklevel=2,
-                )
-                pad = padding
-
-        if ndim is None:
-            ndim = 2
-        assert ndim > 0
-        if len(imgshape) < ndim:
-            raise TypeError(f"imgshape must have at least {ndim} dimensions")
-
-        if stride is None:
-            stride = ws
-        if pad is None:
-            pad = (0,) * ndim
-        patch_shape = tuple(
-            at.extract_constant(imgshape[-ndim + i]) + pad[i] * 2 for i in range(ndim)
-        )
-
-        def compute_out(v, downsample, stride):
-            if ignore_border:
-                if downsample == stride:
-                    return v // stride
-                else:
-                    out = (v - downsample) // stride + 1
-                    if isinstance(out, Variable):
-                        return tm.maximum(out, 0)
-                    else:
-                        return np.maximum(out, 0)
-            else:
-                if isinstance(v, Variable):
-                    return at.switch(
-                        tm.ge(stride, downsample),
-                        (v - 1) // stride + 1,
-                        tm.maximum(0, (v - 1 - downsample) // stride + 1) + 1,
-                    )
-                elif stride >= downsample:
-                    return (v - 1) // stride + 1
-                else:
-                    return max(0, (v - 1 - downsample + stride) // stride) + 1
-
-        out_shape = [compute_out(patch_shape[i], ws[i], stride[i]) for i in range(ndim)]
-
-        rval = list(imgshape[:-ndim]) + out_shape
-        return rval
-
-    def __init__(self, ignore_border=False, mode="max", ndim=2, openmp=None):
-        super().__init__(openmp=openmp)
-        self.ndim = ndim
-        self.ignore_border = ignore_border
-        if mode == "max_deterministic":
-            # It seems max pool algo is already deterministic in CPU.
-            mode = "max"
-        if mode not in ("max", "average_inc_pad", "average_exc_pad", "sum"):
-            raise ValueError(
-                "Pool mode parameter only support 'max', 'sum',"
-                f" 'average_inc_pad' and 'average_exc_pad'. Got {mode}"
-            )
-        self.mode = mode
-
-    def prepare_node(self, node, storage_map, compute_map, impl):
-        if len(node.inputs) == 1:
-            # Old interface
-            self.ndim = len(node.op.ds)
-            self.mode = node.op.mode
-            ws = at.constant(node.op.ds)
-            st = at.constant(node.op.st)
-            pad = at.constant(node.op.padding)
-            node.inputs.append(ws)
-            node.inputs.append(st)
-            node.inputs.append(pad)
-            if isinstance(ws, Constant):
-                storage_map[ws] = [ws.data]
-                compute_map[ws] = [True]
-            else:
-                storage_map[ws] = [None]
-                compute_map[ws] = [False]
-            if isinstance(st, Constant):
-                storage_map[st] = [st.data]
-                compute_map[st] = [True]
-            else:
-                storage_map[st] = [None]
-                compute_map[st] = [False]
-            if isinstance(pad, Constant):
-                storage_map[pad] = [pad.data]
-                compute_map[pad] = [True]
-            else:
-                storage_map[pad] = [None]
-                compute_map[pad] = [False]
-
-    def make_node(self, x, ws, stride=None, pad=None):
-        # TODO: consider restricting the dtype?
-        x = at.as_tensor_variable(x)
-        nd = self.ndim
-        if stride is None:
-            stride = ws
-        if pad is None:
-            pad = (0,) * nd
-        elif isinstance(pad, (tuple, list)):
-            if max(pad) != 0 and not self.ignore_border:
-                raise NotImplementedError("padding works only with ignore_border=True")
-            if isinstance(ws, (tuple, list)):
-                if any(pad[i] >= ws[i] for i in range(nd)):
-                    raise NotImplementedError("padding must be smaller than strides")
-        ws = at.as_tensor_variable(ws)
-        stride = at.as_tensor_variable(stride)
-        pad = at.as_tensor_variable(pad)
-        assert ws.ndim == 1
-        assert stride.ndim == 1
-        assert pad.ndim == 1
-        if x.type.ndim < nd:
-            raise TypeError()
-        if ws.dtype not in int_dtypes:
-            raise TypeError("Pool downsample parameters must be ints.")
-        if stride.dtype not in int_dtypes:
-            raise TypeError("Stride parameters must be ints.")
-        if pad.dtype not in int_dtypes:
-            raise TypeError("Padding parameters must be ints.")
-        # If the input shape are broadcastable we can have 0 in the output shape
-        out_shape = tuple(
-            1 if s == 1 else None for s in x.type.shape[:-nd] + (None,) * nd
-        )
-        out = TensorType(x.dtype, shape=out_shape)
-        return Apply(self, [x, ws, stride, pad], [out()])
-
-    def perform(self, node, inp, out, params):
-        x, ws, stride, pad = inp
-        (z,) = out
-        nd = self.ndim
-        assert ws.shape == stride.shape == pad.shape == (nd,)
-        if len(x.shape) < nd:
-            raise NotImplementedError(
-                f"Pool requires input with {nd} or more dimensions"
-            )
-        z_shape = self.out_shape(x.shape, ws, params.ignore_border, stride, pad, nd)
-        if not params.ignore_border:
-            assert all(z > 0 for z in z_shape[-nd:])
-        if (z[0] is None) or (z[0].shape != z_shape):
-            z[0] = np.empty(z_shape, dtype=x.dtype)
-        zz = z[0]
-        # size of pooling output
-        pool_out_shp = zz.shape[-nd:]
-        img_shp = tuple(x.shape[-nd + i] + 2 * pad[i] for i in range(nd))
-        inc_pad = self.mode == "average_inc_pad"
-
-        # pad the image
-        if max(pad) != 0:
-            y = np.zeros(x.shape[:-nd] + img_shp, dtype=x.dtype)
-            y[
-                (slice(None),) * (len(x.shape) - nd)
-                + tuple(slice(pad[i], img_shp[i] - pad[i]) for i in range(nd))
-            ] = x
-        else:
-            y = x
-        func = np.max
-        if self.mode == "sum":
-            func = np.sum
-        elif self.mode != "max":
-            func = np.average
-
-        # precompute the region boundaries for each dimension
-        region_slices = [[] for i in range(nd)]
-        for i in range(nd):
-            for j in range(pool_out_shp[i]):
-                start = j * stride[i]
-                end = min(start + ws[i], img_shp[i])
-                if not inc_pad:
-                    start = max(start, pad[i])
-                    end = min(end, img_shp[i] - pad[i])
-                region_slices[i].append(slice(start, end))
-
-        # iterate over non-pooling dimensions
-        for k in np.ndindex(*x.shape[:-nd]):
-            zzk = zz[k]
-            yk = y[k]
-            # iterate over pooling regions
-            for r in np.ndindex(*pool_out_shp):
-                zzk[r] = func(yk[[region_slices[i][r[i]] for i in range(nd)]])
-
-    def infer_shape(self, fgraph, node, in_shapes):
-        ws, stride, pad = [node.inputs[1], node.inputs[2], node.inputs[3]]
-        shp = self.out_shape(
-            in_shapes[0], ws, self.ignore_border, stride, pad, self.ndim
-        )
-        return [shp]
-
-    def L_op(self, inputs, outputs, grads):
-        x, ws, stride, pad = inputs
-        (gz,) = grads
-        disc = [DisconnectedType()() for i in inputs[1:]]
-        if self.mode == "max":
-            return [
-                MaxPoolGrad(ndim=self.ndim, ignore_border=self.ignore_border)(
-                    x, outputs[0], gz, ws=ws, stride=stride, pad=pad
-                )
-            ] + disc
-        else:
-            return [
-                AveragePoolGrad(
-                    ndim=self.ndim, ignore_border=self.ignore_border, mode=self.mode
-                )(x, gz, ws=ws, stride=stride, pad=pad)
-            ] + disc
-
-    def connection_pattern(self, node):
-        return [[1], [0], [0], [0]]
-
-    def R_op(self, inputs, eval_points):
-        if self.mode != "max":
-            # Rop for average or sum is simply pooling evaluated at eval point
-            eval_inputs = [eval_points[0]] + inputs[1:]
-            return [self(*eval_inputs)]
-
-        # R_op can receive None as eval_points.
-        # That mean there is no diferientiable path through that input
-        # If this imply that you cannot compute some outputs,
-        # return None for those.
-        if eval_points[0] is None:
-            return [None]
-        z = self(*inputs)
-        x, ws, stride, pad = inputs
-        return [
-            DownsampleFactorMaxGradGrad(self.ignore_border, self.mode, self.ndim)(
-                x, z, eval_points[0], ws, stride, pad
-            )
-        ]
-
-    def c_headers(self, **kwargs):
-        headers = ["<algorithm>"]
-        headers += super().c_headers(**kwargs)
-        return headers
-
-    def c_code(self, node, name, inp, out, sub):
-        if self.mode not in ("max", "sum", "average_exc_pad", "average_inc_pad"):
-            raise MethodNotDefined()
-        x, ws, stride, pad = inp
-        (z,) = out
-        nd = self.ndim
-        total_ndim = node.inputs[0].ndim
-        non_pool_ndim = total_ndim - nd
-        fail = sub["fail"]
-        params = sub["params"]
-        if self.openmp:
-            # run in parallel over each pooling block
-            omp_parallel = "#pragma omp parallel for private(r_st, r_end, r_idx, i_idx, o_idx, collector) schedule(static)"
-        else:
-            omp_parallel = ""
-        ccode = """
-        int typenum = PyArray_ObjectType((PyObject*)%(x)s, 0);
-        if(PyArray_NDIM(%(x)s)!=%(total_ndim)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "x must be a %(total_ndim)sD ndarray");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(ws)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "ws must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(stride)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "stride must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(pad)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "pad must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        npy_intp z[%(nd)s]; // shape of the output
-        npy_intp r[%(nd)s]; // shape of the padded_input
-        npy_intp ws[%(nd)s];
-        npy_intp st[%(nd)s];
-        npy_intp pd[%(nd)s];
-        int nonzero_padding;
-        nonzero_padding = 0;
-        for (int i=0; i<%(nd)s; i++)
-        {
-            ws[i] = *((dtype_%(ws)s*)PyArray_GETPTR1(%(ws)s, i));
-            st[i] = *((dtype_%(stride)s*)PyArray_GETPTR1(%(stride)s, i));
-            pd[i] = *((dtype_%(pad)s*)PyArray_GETPTR1(%(pad)s, i));
-            r[i] = PyArray_DIMS(%(x)s)[%(non_pool_ndim)s + i] + 2 * pd[i];
-            if (pd[i]>0)
-                nonzero_padding = 1;
-        }
-        if (!%(params)s->ignore_border && nonzero_padding)
-        {
-            PyErr_SetString(PyExc_ValueError,
-              "padding must be zero when ignore border is False");
-            %(fail)s;
-        }
-        if (%(params)s->ignore_border)
-        {
-            for (int i=0; i<%(nd)s; i++)
-            {
-                // '/' in C is different from '/' in python
-                if (r[i] - ws[i] < 0)
-                {
-                  z[i] = 0;
-                }
-                else
-                {
-                  z[i] = (r[i] - ws[i]) / st[i] + 1;
-                }
-            }
-        }
-        else
-        {
-            for (int i=0; i<%(nd)s; i++)
-            {
-                // decide how many rows/cols the output has
-                if (st[i] >= ws[i])
-                {
-                    z[i] = (r[i] - 1) / st[i] + 1;
-                }
-                else
-                {
-                    z[i] = std::max((npy_intp)0, (r[i] - 1 - ws[i] + st[i]) / st[i]) + 1;
-                }
-                assert(z[i] > 0);
-            }
-        }
-        // memory allocation of z if necessary
-        int mem_nec;
-        mem_nec = 0;
-        if ((!%(z)s) || *PyArray_DIMS(%(z)s)!=%(total_ndim)s)
-        {
-            mem_nec = 1;
-        }
-        if (!mem_nec)
-        {
-            for (int i=0; i<%(non_pool_ndim)s; i++)
-            {
-                if (PyArray_DIMS(%(z)s)[i] != PyArray_DIMS(%(x)s)[i])
-                {
-                    mem_nec = 1;
-                    break;
-                }
-            }
-        }
-        if (!mem_nec)
-        {
-            for (int i=0; i<%(nd)s; i++)
-            {
-                if (PyArray_DIMS(%(z)s)[%(non_pool_ndim)s + i] != z[i])
-                {
-                    mem_nec = 1;
-                    break;
-                }
-            }
-        }
-        if (mem_nec)
-        {
-          if (%(z)s) Py_XDECREF(%(z)s);
-          npy_intp dims[%(total_ndim)s];
-          for (int i=0; i<%(non_pool_ndim)s; i++)
-          {
-              dims[i] = PyArray_DIMS(%(x)s)[i];
-          }
-          for (int i=0; i<%(nd)s; i++)
-          {
-              dims[%(non_pool_ndim)s + i] = z[i];
-          }
-          //TODO: zeros not necessary
-          %(z)s = (PyArrayObject*) PyArray_ZEROS(%(total_ndim)s, dims, typenum,0);
-        }
-        // initialize temp var for the value in a region
-        dtype_%(x)s collector;
-        npy_intp z_prod;
-        // do not run if any z[i] is zero
-        z_prod = 1;
-        for (int i=0; i<%(nd)s; i++)
-        {
-            z_prod *= z[i];
-        }
-        if (z_prod)
-        {
-            // will be used to hold start and end index of a region
-            npy_intp r_st[%(nd)s];
-            npy_intp r_end[%(nd)s];
-            // index for iterating over the pooling regions
-            npy_intp r_idx[%(nd)s];
-            // placeholder for PyArray indexing (output)
-            npy_intp o_idx[%(total_ndim)s];
-            // placeholder for PyArray indexing (input)
-            npy_intp i_idx[%(total_ndim)s];
-            // loop over non-pooling dimensions
-            npy_intp non_pooling_prod = 1;
-            for (int i=0; i<%(non_pool_ndim)s; i++)
-            {
-                non_pooling_prod *= PyArray_DIMS(%(x)s)[i];
-            }
-            %(omp_parallel)s
-            // first loop over non-pooling dimensions
-            for (npy_intp t=0; t<non_pooling_prod; t++)
-            {
-                // compute the non-pooling index in each dimension
-                if (%(non_pool_ndim)s!=0)
-                {
-                    o_idx[0] = t;
-                    i_idx[0] = t;
-                    for (int i=1; i<%(non_pool_ndim)s; i++)
-                    {
-                        o_idx[i] = o_idx[i - 1] / PyArray_DIMS(%(x)s)[i - 1];
-                        o_idx[i - 1] = o_idx[i - 1] %% PyArray_DIMS(%(x)s)[i - 1];
-                        i_idx[i] = o_idx[i];
-                        i_idx[i - 1] = o_idx[i - 1];
-                    }
-                }
-
-                // then loop over each region in each pooling dimension
-        """
-
-        for i in range(nd):
-            ccode += """
-                for (r_idx[%(i)s]=0; r_idx[%(i)s] < z[%(i)s]; r_idx[%(i)s]++) {
-                  r_st[%(i)s] = r_idx[%(i)s] * st[%(i)s];
-                  r_end[%(i)s] = r_st[%(i)s] + ws[%(i)s];
-                  // skip the padding
-                  r_st[%(i)s] = r_st[%(i)s] < pd[%(i)s] ? pd[%(i)s] : r_st[%(i)s];
-                  r_end[%(i)s] = r_end[%(i)s] > (r[%(i)s] - pd[%(i)s]) ? r[%(i)s] - pd[%(i)s] : r_end[%(i)s];
-                  // from padded_img space to img space
-                  r_st[%(i)s] -= pd[%(i)s];
-                  r_end[%(i)s] -= pd[%(i)s];
-                  // handle the case where no padding, ignore border is True
-                  if (%(params)s->ignore_border)
-                  {
-                    r_end[%(i)s] = r_end[%(i)s] > r[%(i)s] ? r[%(i)s] : r_end[%(i)s];
-                  }
-                  // use the index to find the correct position in the output
-                  o_idx[%(non_pool_ndim)s + %(i)s] = r_idx[%(i)s];
-            """ % dict(
-                i=i, non_pool_ndim=non_pool_ndim, params=sub["params"]
-            )
-
-        ccode += """
-                  // get a pointer to the correct position in the output
-                  dtype_%(z)s * z;
-                  if (%(total_ndim)s == 4)
-                    z = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s, o_idx[0], o_idx[1], o_idx[2], o_idx[3])));
-                  else
-                    z = ((dtype_%(z)s*)(PyArray_GetPtr(%(z)s, o_idx)));
-        """
-
-        if self.mode == "max":
-            for i in range(nd):
-                ccode += """
-                  // set the first index of dimension %(i)s
-                  i_idx[%(non_pool_ndim)s + %(i)s] = r_st[%(i)s];
-                """ % dict(
-                    i=i, non_pool_ndim=non_pool_ndim
-                )
-            ccode += """
-                  // use the first element as the initial value of collector
-                  if (%(total_ndim)s == 4)
-                    collector = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
-                  else
-                    collector = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
-            """
-            for i in range(nd):
-                ccode += """
-                  // go through the pooled region in the unpadded input
-                  for(npy_intp m%(i)s=r_st[%(i)s]; m%(i)s<r_end[%(i)s]; m%(i)s++)
-                  {
-                    i_idx[%(non_pool_ndim)s + %(i)s] = m%(i)s;
-                """ % dict(
-                    i=i, non_pool_ndim=non_pool_ndim
-                )
-            ccode += """
-                    // update maximum
-                    dtype_%(x)s a;
-                    if (%(total_ndim)s == 4)
-                      a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
-                    else
-                      a = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
-                    collector = (a > collector) ? a : collector;
-            """
-            for i in range(nd):
-                ccode += """
-                  } // for loop over region
-                """
-            ccode += """
-                  z[0] = collector;
-            """
-        elif self.mode in ("sum", "average_exc_pad", "average_inc_pad"):
-            ccode += """
-                  // initialize the sum at zero
-                  collector = ((dtype_%(x)s)(0));
-            """
-            for i in range(nd):
-                ccode += """
-                  // go through the pooled region in the unpadded input
-                  for(npy_intp m%(i)s=r_st[%(i)s]; m%(i)s<r_end[%(i)s]; m%(i)s++)
-                  {
-                    i_idx[%(non_pool_ndim)s + %(i)s] = m%(i)s;
-                """ % dict(
-                    i=i, non_pool_ndim=non_pool_ndim
-                )
-            ccode += """
-                    // update sum
-                    dtype_%(x)s a;
-                    if (%(total_ndim)s == 4)
-                      a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
-                    else
-                      a = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
-                    collector += a;
-            """
-            for i in range(nd):
-                ccode += """
-                  } // for loop over region
-                """
-            if self.mode == "sum":
-                ccode += """
-                  z[0] = collector;
-                """
-            elif self.mode == "average_inc_pad" and self.ignore_border:
-                # region size = product over all pooling dimensions
-                region_size = " * ".join(f"ws[{i}]" for i in range(nd))
-                ccode += """
-                  z[0] = collector / (%(region_size)s);
-                """ % dict(
-                    region_size=region_size
-                )
-            else:
-                # region size = number elements of in this region
-                region_size = " * ".join(f"(r_end[{i}]-r_st[{i}])" for i in range(nd))
-                ccode += """
-                  z[0] = collector / (%(region_size)s);
-                """ % dict(
-                    region_size=region_size
-                )
-        for i in range(nd):
-            ccode += """
-            } // loop over pooling dimension
-            """
-
-        ccode += """
-          } // for loop over non-pooling dimensions
-        } // if z_prod
-        """
-        return ccode % locals()
-
-    def c_code_cache_version(self):
-        return (10, self.openmp)
-
-
-class PoolGrad(OpenMPOp):
-    __props__ = ("ignore_border", "mode", "ndim")
-
-    @staticmethod
-    def out_shape(
-        imgshape,
-        ws=None,
-        ignore_border=False,
-        stride=None,
-        pad=None,
-        ndim=2,
-        ds=None,
-        st=None,
-        padding=None,
-    ):
-        """Return the shape of the output from this op, for input of given
-        shape and flags.
-
-        Parameters
-        ----------
-        imgshape : tuple of integers or scalar PyTensor variables
-            the shape of a tensor of images. The last N elements are
-            interpreted as the downsampling dimensions.
-        ws : tuple of N ints
-            downsample factor over rows and columns this parameter
-            indicates the size of the pooling region
-        ignore_border : bool
-            If ws doesn't divide imgshape, do we include an extra row/col/slice
-            of partial downsampling (False) or ignore it (True).
-        stride : list or tuple of N ints or None
-            Stride size, which is the number of shifts over rows/cols/slices to get the
-            next pool region. If stride is None, it is considered equal to ws
-            (no overlap on pooling regions).
-        pad : tuple of N ints or None
-            For each downsampling dimension, this specifies the number of zeros to
-            add as padding on both sides. For 2D and (pad_h, pad_w), pad_h specifies the
-            size of the top and bottom margins, pad_w specifies the size of the left and
-            right margins. No padding is added if pad is None.
-        ndim : int
-            The number of pooling dimensions N.
-            The default is 2.
-        ds
-            *deprecated*, use parameter ws instead.
-        st
-            *deprecated*, use parameter st instead.
-        padding
-            *deprecated*, use parameter pad instead.
-
-        Returns
-        -------
-        list :
-            the shape of the output from this op, for input of given
-            shape.  This will have the same length as imgshape, but
-            with last N elements reduced as per the downsampling &
-            ignore_border flags.
-
-        """
-        # check for deprecated parameter names
-        if ds is not None:
-            if ws is not None:
-                raise ValueError(
-                    "You can't provide a tuple value to both 'ws' and 'ds'."
-                    " Please provide a value only to 'ws'."
-                )
-            else:
-                warnings.warn(
-                    "The 'ds' parameter in PoolGrad is not going"
-                    " to exist anymore as it is going to be replaced by the"
-                    " parameter 'ws'.",
-                    category=DeprecationWarning,
-                    stacklevel=2,
-                )
-                ws = ds
-        elif ds is None and ws is None:
-            raise ValueError("You must provide a tuple value for the window size.")
-
-        if st is not None:
-            if stride is not None:
-                raise ValueError(
-                    "You can't provide a tuple value to both 'st and 'stride'."
-                    " Please provide a value only to 'stride'."
-                )
-            else:
-                warnings.warn(
-                    "The 'st' parameter in PoolGrad is not going"
-                    " to exist anymore as it is going to be replaced by the"
-                    " parameter 'stride'.",
-                    category=DeprecationWarning,
-                    stacklevel=2,
-                )
-                stride = st
-
-        if padding is not None:
-            if pad is not None:
-                raise ValueError(
-                    "You can't provide a tuple value to both 'padding' and pad."
-                    "  Please provide a value only to pad."
-                )
-            else:
-                warnings.warn(
-                    "The 'padding' parameter in PoolGrad is not"
-                    " going to exist anymore as it is going to be replaced"
-                    " by the parameter 'pad'.",
-                    category=DeprecationWarning,
-                    stacklevel=2,
-                )
-                pad = padding
-
-        if len(imgshape) < ndim:
-            raise TypeError(f"imgshape must have at least {ndim} dimensions")
-
-        if stride is None:
-            stride = ws
-        if pad is None:
-            pad = (0,) * ndim
-        patch_shape = tuple(
-            at.extract_constant(imgshape[-ndim + i]) + pad[i] * 2 for i in range(ndim)
-        )
-
-        def compute_out(v, downsample, stride):
-            if ignore_border:
-                out = (v - downsample) // stride + 1
-                if isinstance(out, Variable):
-                    return tm.maximum(out, 0)
-                else:
-                    return np.maximum(out, 0)
-            else:
-                if isinstance(v, Variable):
-                    return at.switch(
-                        tm.ge(stride, downsample),
-                        (v - 1) // stride + 1,
-                        tm.maximum(0, (v - 1 - downsample) // stride + 1) + 1,
-                    )
-                elif stride >= downsample:
-                    return (v - 1) // stride + 1
-                else:
-                    return max(0, (v - 1 - downsample) // stride + 1) + 1
-
-        out_shape = [compute_out(patch_shape[i], ws[i], stride[i]) for i in range(ndim)]
-
-        rval = list(imgshape[:-ndim]) + out_shape
-        return rval
-
-    def __init__(self, ignore_border, mode="max", ndim=2, openmp=None):
-        self.ndim = ndim
-        self.ignore_border = ignore_border
-        if mode == "max_deterministic":
-            # It seems max pool grad algo is already deterministic in CPU.
-            mode = "max"
-        if mode not in ("max", "sum", "average_inc_pad", "average_exc_pad"):
-            raise ValueError(
-                "Pool mode parameter only support 'max', 'sum',"
-                " 'average_inc_pad' and 'average_exc_pad'. Got {mode}"
-            )
-        self.mode = mode
-        super().__init__(openmp=openmp)
-
-    def prepare_node(self, node, storage_map, compute_map, impl):
-        if len(node.inputs) < 5:  # 5 for AveragePoolGrad, 6 for MaxPoolGrad
-            # Old interface
-            self.ndim = len(node.op.ds)
-            self.mode = node.op.mode
-            ws = at.constant(node.op.ds)
-            st = at.constant(node.op.st)
-            pad = at.constant(node.op.padding)
-            node.inputs.append(ws)
-            node.inputs.append(st)
-            node.inputs.append(pad)
-            if isinstance(ws, Constant):
-                storage_map[ws] = [ws.data]
-                compute_map[ws] = [True]
-            else:
-                storage_map[ws] = [None]
-                compute_map[ws] = [False]
-            if isinstance(st, Constant):
-                storage_map[st] = [st.data]
-                compute_map[st] = [True]
-            else:
-                storage_map[st] = [None]
-                compute_map[st] = [False]
-            if isinstance(pad, Constant):
-                storage_map[pad] = [pad.data]
-                compute_map[pad] = [True]
-            else:
-                storage_map[pad] = [None]
-                compute_map[pad] = [False]
-
-    def infer_shape(self, fgraph, node, in_shapes):
-        return [in_shapes[0]]
-
-
-class MaxPoolGrad(PoolGrad):
-    # params_type ignore_border don't change c code
-
-    def __init__(self, ignore_border, ndim=2, openmp=None):
-        PoolGrad.__init__(self, ignore_border, mode="max", ndim=ndim, openmp=openmp)
-
-    def make_node(self, x, maxout, gz, ws, stride=None, pad=None):
-        # make_node should only be called by the grad function of
-        # Pool, so these asserts should not fail.
-        x = at.as_tensor_variable(x)
-        maxout = at.as_tensor_variable(maxout)
-        gz = at.as_tensor_variable(gz)
-        nd = self.ndim
-        if stride is None:
-            stride = ws
-        if pad is None:
-            pad = (0,) * nd
-        ws = at.as_tensor_variable(ws)
-        stride = at.as_tensor_variable(stride)
-        pad = at.as_tensor_variable(pad)
-        assert isinstance(x, Variable) and x.ndim >= nd
-        assert isinstance(maxout, Variable) and maxout.ndim >= nd
-        assert isinstance(gz, Variable) and gz.ndim >= nd
-        assert isinstance(ws, Variable) and ws.ndim == 1
-        assert isinstance(stride, Variable) and stride.ndim == 1
-        assert isinstance(pad, Variable) and pad.ndim == 1
-        assert x.ndim == maxout.ndim == gz.ndim >= nd
-        if ws.dtype not in int_dtypes:
-            raise TypeError("Pool downsample parameters must be ints.")
-        if stride.dtype not in int_dtypes:
-            raise TypeError("Stride parameters must be ints.")
-        if pad.dtype not in int_dtypes:
-            raise TypeError("Padding parameters must be ints.")
-        return Apply(self, [x, maxout, gz, ws, stride, pad], [x.type()])
-
-    def perform(self, node, inp, out):
-        assert self.mode == "max"
-        x, maxout, gz, ws, stride, pad = inp
-        (gx_stg,) = out
-        nd = self.ndim
-        assert ws.shape == stride.shape == pad.shape == (nd,)
-        if len(x.shape) < nd:
-            raise NotImplementedError(
-                f"MaxPoolGrad requires input with {nd} or more dimensions"
-            )
-        pool_out_shp = maxout.shape[-nd:]
-        img_shp = tuple(x.shape[-nd + i] + 2 * pad[i] for i in range(nd))
-
-        # pad the image
-        if max(pad) != 0:
-            y = np.zeros(x.shape[:-nd] + img_shp, dtype=x.dtype)
-            y[
-                (slice(None),) * (len(x.shape) - nd)
-                + tuple(slice(pad[i], img_shp[i] - pad[i]) for i in range(nd))
-            ] = x
-        else:
-            y = x
-        gx = np.zeros_like(y)
-
-        # precompute the region boundaries for each dimension
-        region_ranges = [[] for i in range(nd)]
-        for i in range(nd):
-            for j in range(pool_out_shp[i]):
-                start = max(j * stride[i], pad[i])
-                end = min(start + ws[i], img_shp[i])
-                region_ranges[i].append(range(start, end))
-
-        # iterate over non-pooling dimensions
-        for k in np.ndindex(*x.shape[:-nd]):
-            gxk = gx[k]
-            gzk = gz[k]
-            yk = y[k]
-            maxoutk = maxout[k]
-            # iterate over pooling regions
-            for r in np.ndindex(*pool_out_shp):
-                maxout_value = maxoutk[r]
-                # iterate inside region
-                for c in itertools.product(
-                    *[region_ranges[i][r[i]] for i in range(nd)]
-                ):
-                    if maxout_value == yk[c]:
-                        gxk[c] += gzk[r]
-
-        # unpad the image
-        gx = gx[
-            (slice(None),) * (len(x.shape) - nd)
-            + tuple(slice(pad[i], img_shp[i] - pad[i]) for i in range(nd))
-        ]
-        gx_stg[0] = gx
-
-    def grad(self, inp, grads):
-        x, maxout, gz, ws, stride, pad = inp
-        (ggx,) = grads
-        return [
-            at.zeros_like(x),
-            at.zeros_like(maxout),
-            DownsampleFactorMaxGradGrad(
-                ndim=self.ndim, ignore_border=self.ignore_border
-            )(x, maxout, ggx, ws, stride, pad),
-        ] + [DisconnectedType()() for i in inp[3:]]
-
-    def connection_pattern(self, node):
-        return [[1], [1], [1], [0], [0], [0]]
-
-    def c_code(self, node, name, inp, out, sub):
-        assert self.mode == "max"
-        x, z, gz, ws, stride, pad = inp
-        (gx,) = out
-        nd = self.ndim
-        total_ndim = node.inputs[0].ndim
-        non_pool_ndim = total_ndim - nd
-        fail = sub["fail"]
-
-        if self.openmp:
-            # run in parallel over each pooling block
-            omp_parallel = "#pragma omp parallel for private(r_st, r_end, r_idx, i_idx, o_idx, maximum) schedule(static)"
-        else:
-            omp_parallel = ""
-
-        ccode = """
-        // sanity checks
-        int x_typenum = PyArray_ObjectType((PyObject*)%(x)s, 0);
-        int z_typenum = PyArray_ObjectType((PyObject*)%(z)s, 0);
-        int gz_typenum = PyArray_ObjectType((PyObject*)%(gz)s, 0);
-        if ((x_typenum != z_typenum) || (x_typenum != gz_typenum))
-        {
-            PyErr_SetString(PyExc_ValueError, "input types must all match");
-            %(fail)s;
-        }
-        if(PyArray_NDIM(%(x)s)!=%(total_ndim)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "x must be a %(total_ndim)sD ndarray");
-            %(fail)s;
-        }
-        if(PyArray_NDIM(%(z)s)!=%(total_ndim)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "z must be a %(total_ndim)sD ndarray");
-            %(fail)s;
-        }
-        if(PyArray_NDIM(%(gz)s)!=%(total_ndim)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "gz must be a %(total_ndim)sD ndarray");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(ws)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "ws must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(stride)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "stride must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(pad)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "pad must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        npy_intp z[%(nd)s]; // shape of the output
-        npy_intp r[%(nd)s]; // shape of the padded_input
-        npy_intp ws[%(nd)s];
-        npy_intp st[%(nd)s];
-        npy_intp pd[%(nd)s];
-        int nonzero_padding;
-        nonzero_padding = 0;
-        for (int i=0; i<%(nd)s; i++)
-        {
-            ws[i] = *((dtype_%(ws)s*)PyArray_GETPTR1(%(ws)s, i));
-            st[i] = *((dtype_%(stride)s*)PyArray_GETPTR1(%(stride)s, i));
-            pd[i] = *((dtype_%(pad)s*)PyArray_GETPTR1(%(pad)s, i));
-            z[i] = PyArray_DIMS(%(z)s)[%(non_pool_ndim)s + i];
-            r[i] = PyArray_DIMS(%(x)s)[%(non_pool_ndim)s + i] + 2 * pd[i];
-            if (pd[i]>0)
-                nonzero_padding = 1;
-        }
-        // allocating memory for output, if necessary
-        int mem_nec;
-        mem_nec = 0;
-        if ((!%(gx)s) || !PyArray_ISCONTIGUOUS(%(gx)s)
-            || *PyArray_DIMS(%(gx)s)!=%(total_ndim)s)
-        {
-            mem_nec = 1;
-        }
-        if (!mem_nec)
-        {
-            for (int i=0; i<%(total_ndim)s; i++)
-            {
-                if (PyArray_DIMS(%(gx)s)[i] != PyArray_DIMS(%(x)s)[i])
-                {
-                    mem_nec = 1;
-                    break;
-                }
-            }
-        }
-        if (mem_nec)
-        {
-          Py_XDECREF(%(gx)s);
-          %(gx)s = (PyArrayObject*) PyArray_ZEROS(%(total_ndim)s, PyArray_DIMS(%(x)s), x_typenum,0);
-        }
-        else {
-          PyArray_FILLWBYTE(%(gx)s, 0);
-        }
-        dtype_%(z)s maximum; // temp var for maximum value in a region
-        npy_intp z_prod;
-        // do not run if any z[i] is zero
-        z_prod = 1;
-        for (int i=0; i<%(nd)s; i++)
-        {
-            z_prod *= z[i];
-        }
-        if (z_prod)
-        {
-            // will be used to hold start and end index of a region
-            npy_intp r_st[%(nd)s];
-            npy_intp r_end[%(nd)s];
-            // index for iterating over the pooling regions
-            npy_intp r_idx[%(nd)s];
-            // placeholder for PyArray indexing (output)
-            npy_intp o_idx[%(total_ndim)s];
-            // placeholder for PyArray indexing (input)
-            npy_intp i_idx[%(total_ndim)s];
-            // loop over non-pooling dimensions
-            npy_intp non_pooling_prod = 1;
-            for (int i=0; i<%(non_pool_ndim)s; i++)
-            {
-                non_pooling_prod *= PyArray_DIMS(%(x)s)[i];
-            }
-            %(omp_parallel)s
-            // first loop over non-pooling dimensions
-            for (npy_intp t=0; t<non_pooling_prod; t++)
-            {
-                // compute the non-pooling index in each dimension
-                if (%(non_pool_ndim)s!=0)
-                {
-                    o_idx[0] = t;
-                    i_idx[0] = t;
-                    for (int i=1; i<%(non_pool_ndim)s; i++)
-                    {
-                        o_idx[i] = o_idx[i - 1] / PyArray_DIMS(%(x)s)[i - 1];
-                        o_idx[i - 1] =o_idx[i - 1] %% PyArray_DIMS(%(x)s)[i - 1];
-                        i_idx[i] = o_idx[i];
-                        i_idx[i - 1] = o_idx[i - 1];
-                    }
-                }
-
-                // then loop over each region in each pooling dimension
-        """
-
-        for i in range(nd):
-            ccode += """
-                for (r_idx[%(i)s]=0; r_idx[%(i)s] < z[%(i)s]; r_idx[%(i)s]++) {
-                  r_st[%(i)s] = r_idx[%(i)s] * st[%(i)s];
-                  r_end[%(i)s] = r_st[%(i)s] + ws[%(i)s];
-                  // skip the padding
-                  r_st[%(i)s] = r_st[%(i)s] < pd[%(i)s] ? pd[%(i)s] : r_st[%(i)s];
-                  r_end[%(i)s] = r_end[%(i)s] > (r[%(i)s] - pd[%(i)s]) ? r[%(i)s] - pd[%(i)s] : r_end[%(i)s];
-                  // from padded_img space to img space
-                  r_st[%(i)s] -= pd[%(i)s];
-                  r_end[%(i)s] -= pd[%(i)s];
-                  // use the index to find the correct position in the output
-                  o_idx[%(non_pool_ndim)s + %(i)s] = r_idx[%(i)s];
-            """ % dict(
-                i=i, non_pool_ndim=non_pool_ndim
-            )
-
-        ccode += """
-                  dtype_%(gz)s * gz;
-                  if (%(total_ndim)s == 4)
-                  {
-                    // the maximum value
-                    maximum = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s,o_idx[0],o_idx[1],o_idx[2],o_idx[3])))[0];
-                    // the gradient corresponding to this maximum value in z
-                    gz = ((dtype_%(gz)s*)(PyArray_GETPTR4(%(gz)s, o_idx[0],o_idx[1],o_idx[2],o_idx[3])));
-                  }
-                  else
-                  {
-                    // the maximum value
-                    maximum = ((dtype_%(z)s*)(PyArray_GetPtr(%(z)s,o_idx)))[0];
-                    // the gradient corresponding to this maximum value in z
-                    gz = ((dtype_%(gz)s*)(PyArray_GetPtr(%(gz)s, o_idx)));
-                  }
-        """
-        for i in range(nd):
-            ccode += """
-                  // go through the pooled region in the unpadded input
-                  for(npy_intp m%(i)s=r_st[%(i)s]; m%(i)s<r_end[%(i)s]; m%(i)s++)
-                  {
-                    i_idx[%(non_pool_ndim)s + %(i)s] = m%(i)s;
-                """ % dict(
-                i=i, non_pool_ndim=non_pool_ndim
-            )
-        ccode += """
-                    dtype_%(x)s a;
-                    dtype_%(gx)s * gx;
-                    if (%(total_ndim)s == 4)
-                    {
-                      a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
-                      gx = ((dtype_%(gx)s*)(PyArray_GETPTR4(%(gx)s, i_idx[0],i_idx[1],i_idx[2],i_idx[3])));
-                    }
-                    else
-                    {
-                      a = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
-                      gx = ((dtype_%(gx)s*)(PyArray_GetPtr(%(gx)s, i_idx)));
-                    }
-                    if (a == maximum){
-                      gx[0] = gx[0] + gz[0];
-                    }
-        """
-        for i in range(nd):
-            ccode += """
-                  } // for loop over region
-                """
-        for i in range(nd):
-            ccode += """
-                } // loop over pooling dimension
-            """
-
-        ccode += """
-            } // for loop over non-pooling dimensions
-        } // if z_prod
-        """
-        return ccode % locals()
-
-    def c_code_cache_version(self):
-        return (0, 11, self.openmp)
-
-
-class AveragePoolGrad(PoolGrad):
-    # ignore_border is used for perform, but not c code. No need in params_type
-
-    def __init__(self, ignore_border, mode="average_inc_pad", ndim=2):
-        assert mode in ("sum", "average_inc_pad", "average_exc_pad")
-        PoolGrad.__init__(self, ignore_border, mode, ndim)
-
-    # There is an extra dummy parameter to match the parameter count
-    # of MaxPoolGrad.  They have to keep the same interface because of
-    # the DownsampleFactorMaxGrad trick to keep old scripts working
-    # (see downsample.py for details on this).
-    def make_node(self, x, gz, ws, stride=None, pad=None, dummy=None):
-        # make_node should only be called by the grad function of
-        # Pool, so these asserts should not fail.
-        x = at.as_tensor_variable(x)
-        gz = at.as_tensor_variable(gz)
-        nd = self.ndim
-        if stride is None:
-            stride = ws
-        if pad is None:
-            pad = (0,) * nd
-        ws = at.as_tensor_variable(ws)
-        stride = at.as_tensor_variable(stride)
-        pad = at.as_tensor_variable(pad)
-        assert isinstance(x, Variable) and x.ndim >= nd
-        assert isinstance(gz, Variable) and gz.ndim >= nd
-        assert isinstance(ws, Variable) and ws.ndim == 1
-        assert isinstance(stride, Variable) and stride.ndim == 1
-        assert x.ndim == gz.ndim >= nd
-        assert isinstance(pad, Variable) and pad.ndim == 1
-        if ws.dtype not in int_dtypes:
-            raise TypeError("Pool downsample parameters must be ints.")
-        if stride.dtype not in int_dtypes:
-            raise TypeError("Stride parameters must be ints.")
-        if pad.dtype not in int_dtypes:
-            raise TypeError("Padding parameters must be ints.")
-        return Apply(self, [x, gz, ws, stride, pad], [x.type()])
-
-    def perform(self, node, inp, out):
-        x, gz, ws, stride, pad = inp
-        (gx_stg,) = out
-        nd = self.ndim
-        assert ws.shape == stride.shape == pad.shape == (nd,)
-        if len(x.shape) < nd:
-            raise NotImplementedError(
-                f"AveragePoolGrad requires input with {nd} or more dimensions"
-            )
-        if self.mode == "average_exc_pad" and max(pad) != 0:
-            raise NotImplementedError()
-        z_shape = self.out_shape(x.shape, ws, self.ignore_border, stride, pad, nd)
-        if (gx_stg[0] is None) or (gx_stg[0].shape != z_shape):
-            gx_stg[0] = np.empty(z_shape, dtype=x.dtype)
-        zz = gx_stg[0]
-        # size of pooling output
-        pool_out_shp = zz.shape[-nd:]
-        img_shp = tuple(x.shape[-nd + i] + 2 * pad[i] for i in range(nd))
-        inc_pad = self.mode == "average_inc_pad"
-        sum_mode = self.mode == "sum"
-
-        # initialize the padded output
-        gx = np.zeros((x.shape[:-nd] + img_shp), dtype=x.dtype)
-
-        # precompute the region boundaries and sizes for each dimension
-        region_slices = [[] for i in range(nd)]
-        region_sizes = [[] for i in range(nd)]
-        for i in range(nd):
-            for j in range(pool_out_shp[i]):
-                if sum_mode or inc_pad:
-                    start = j * stride[i]
-                else:
-                    start = max(j * stride[i], pad[i])
-                end = min(start + ws[i], img_shp[i])
-                region_slices[i].append(slice(start, end))
-                region_sizes[i].append(end - start)
-
-        # iterate over non-pooling dimensions
-        region_slice = [None] * nd
-        for k in np.ndindex(*x.shape[:-nd]):
-            gzk = gz[k]
-            gxk = gx[k]
-            # iterate over pooling regions
-            for r in np.ndindex(*pool_out_shp):
-                region_size = 1
-                for i in range(nd):
-                    region_slice[i] = region_slices[i][r[i]]
-                    region_size *= region_sizes[i][r[i]]
-                if sum_mode:
-                    val = gzk[r]
-                else:
-                    # divide by region size
-                    val = gzk[r] / region_size
-                gxk[region_slice] += val
-
-        # unpad the image
-        gx = gx[
-            (slice(None),) * (len(x.shape) - nd)
-            + tuple(slice(pad[i], img_shp[i] - pad[i]) for i in range(nd))
-        ]
-        gx_stg[0] = gx
-
-    def grad(self, inp, grads):
-        x, gz, ws, stride, pad = inp
-        (ggx,) = grads
-        return [
-            at.zeros_like(x),
-            Pool(ignore_border=self.ignore_border, ndim=self.ndim, mode=self.mode)(
-                ggx, ws, stride, pad
-            ),
-        ] + [DisconnectedType()() for i in inp[2:]]
-
-    def connection_pattern(self, node):
-        return [[1], [1], [0], [0], [0]]
-
-    def c_code(self, node, name, inp, out, sub):
-        x, gz, ws, stride, pad = inp
-        (gx,) = out
-        nd = self.ndim
-        total_ndim = node.inputs[0].ndim
-        non_pool_ndim = total_ndim - nd
-        fail = sub["fail"]
-        inc_pad = int(self.mode == "average_inc_pad")
-        sum_mode = int(self.mode == "sum")
-        if self.openmp:
-            # run in parallel over each pooling block
-            omp_parallel = "#pragma omp parallel for private(r_st, r_end, r_pad_width, r_idx, i_idx, o_idx) schedule(static)"
-        else:
-            omp_parallel = ""
-
-        ccode = """
-        // sanity checks
-        int x_typenum = PyArray_ObjectType((PyObject*)%(x)s, 0);
-        int gz_typenum = PyArray_ObjectType((PyObject*)%(gz)s, 0);
-        if (x_typenum != gz_typenum)
-        {
-            PyErr_SetString(PyExc_ValueError, "input types must all match");
-            %(fail)s;
-        }
-        if(PyArray_NDIM(%(x)s)!=%(total_ndim)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "x must be a %(total_ndim)sD ndarray");
-            %(fail)s;
-        }
-        if(PyArray_NDIM(%(gz)s)!=%(total_ndim)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "gz must be a %(total_ndim)sD ndarray");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(ws)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "ws must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(stride)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "stride must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(pad)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "pad must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        npy_intp z[%(nd)s]; // shape of the output
-        npy_intp r[%(nd)s]; // shape of the padded_input
-        npy_intp ws[%(nd)s];
-        npy_intp st[%(nd)s];
-        npy_intp pd[%(nd)s];
-        int nonzero_padding;
-        nonzero_padding = 0;
-        for (int i=0; i<%(nd)s; i++)
-        {
-            ws[i] = *((dtype_%(ws)s*)PyArray_GETPTR1(%(ws)s, i));
-            st[i] = *((dtype_%(stride)s*)PyArray_GETPTR1(%(stride)s, i));
-            pd[i] = *((dtype_%(pad)s*)PyArray_GETPTR1(%(pad)s, i));
-            z[i] = PyArray_DIMS(%(gz)s)[%(non_pool_ndim)s + i];
-            r[i] = PyArray_DIMS(%(x)s)[%(non_pool_ndim)s + i] + 2 * pd[i];
-            if (pd[i]>0)
-                nonzero_padding = 1;
-        }
-        if (!%(inc_pad)s && !%(sum_mode)s && nonzero_padding)
-        {
-            PyErr_SetString(PyExc_ValueError,
-              "padding must be zero for average_exc_pad");
-            %(fail)s;
-        }
-        // allocating memory for output, if necessary
-        int mem_nec;
-        mem_nec = 0;
-        if ((!%(gx)s) || !PyArray_ISCONTIGUOUS(%(gx)s)
-            || *PyArray_DIMS(%(gx)s)!=%(total_ndim)s)
-        {
-            mem_nec = 1;
-        }
-        if (!mem_nec)
-        {
-            for (int i=0; i<%(total_ndim)s; i++)
-            {
-                if (PyArray_DIMS(%(gx)s)[i] != PyArray_DIMS(%(x)s)[i])
-                {
-                    mem_nec = 1;
-                    break;
-                }
-            }
-        }
-        if (mem_nec)
-        {
-          Py_XDECREF(%(gx)s);
-          %(gx)s = (PyArrayObject*) PyArray_ZEROS(%(total_ndim)s, PyArray_DIMS(%(x)s), x_typenum,0);
-        }
-        else {
-          PyArray_FILLWBYTE(%(gx)s, 0);
-        }
-        npy_intp z_prod;
-        // do not run if any z[i] is zero
-        z_prod = 1;
-        for (int i=0; i<%(nd)s; i++)
-        {
-            z_prod *= z[i];
-        }
-        if (z_prod)
-        {
-            // will be used to hold start and end index of a region
-            npy_intp r_st[%(nd)s];
-            npy_intp r_end[%(nd)s];
-            // padded region size
-            npy_intp r_pad_width[%(nd)s];
-            // index for iterating over the pooling regions
-            npy_intp r_idx[%(nd)s];
-            // placeholder for PyArray indexing (output)
-            npy_intp o_idx[%(total_ndim)s];
-            // placeholder for PyArray indexing (input)
-            npy_intp i_idx[%(total_ndim)s];
-            // loop over non-pooling dimensions
-            npy_intp non_pooling_prod = 1;
-            for (int i=0; i<%(non_pool_ndim)s; i++)
-            {
-                non_pooling_prod *= PyArray_DIMS(%(x)s)[i];
-            }
-            %(omp_parallel)s
-            // first loop over non-pooling dimensions
-            for (npy_intp t=0; t<non_pooling_prod; t++)
-            {
-                // compute the non-pooling index in each dimension
-                if (%(non_pool_ndim)s!=0)
-                {
-                    o_idx[0] = t;
-                    i_idx[0] = t;
-                    for (int i=1; i<%(non_pool_ndim)s; i++)
-                    {
-                        o_idx[i] = o_idx[i - 1] / PyArray_DIMS(%(x)s)[i - 1];
-                        o_idx[i - 1] =o_idx[i - 1] %% PyArray_DIMS(%(x)s)[i - 1];
-                        i_idx[i] = o_idx[i];
-                        i_idx[i - 1] = o_idx[i - 1];
-                    }
-                }
-
-                // then loop over each region in each pooling dimension
-        """
-
-        for i in range(nd):
-            ccode += """
-                for (r_idx[%(i)s]=0; r_idx[%(i)s] < z[%(i)s]; r_idx[%(i)s]++) {
-                  r_st[%(i)s] = r_idx[%(i)s] * st[%(i)s];
-                  if (!%(sum_mode)s && !%(inc_pad)s && r_st[%(i)s] < pd[%(i)s])
-                  {
-                    r_st[%(i)s] = pd[%(i)s];
-                  }
-                  r_end[%(i)s] = r_st[%(i)s] + ws[%(i)s];
-                  r_end[%(i)s] = r_end[%(i)s] > r[%(i)s] ? r[%(i)s] : r_end[%(i)s];
-                  r_pad_width[%(i)s] = r_end[%(i)s] - r_st[%(i)s];
-                  // from padded_img space to img space
-                  r_st[%(i)s] = r_st[%(i)s] - pd[%(i)s] > 0 ? r_st[%(i)s] - pd[%(i)s] : 0;
-                  r_end[%(i)s] = r_end[%(i)s] > r[%(i)s] - pd[%(i)s] ? r[%(i)s] - 2 * pd[%(i)s] : r_end[%(i)s] - pd[%(i)s];
-
-                  // use the index to find the correct position in the output
-                  o_idx[%(non_pool_ndim)s + %(i)s] = r_idx[%(i)s];
-            """ % dict(
-                i=i, sum_mode=sum_mode, inc_pad=inc_pad, non_pool_ndim=non_pool_ndim
-            )
-
-        ccode += """
-                  dtype_%(gz)s * gz;
-                  dtype_%(gz)s val;
-                  if (%(total_ndim)s == 4)
-                  {
-                    // the gradient for this region
-                    gz = ((dtype_%(gz)s*)(PyArray_GETPTR4(%(gz)s, o_idx[0],o_idx[1],o_idx[2],o_idx[3])));
-                  }
-                  else
-                  {
-                    // the gradient for this region
-                    gz = ((dtype_%(gz)s*)(PyArray_GetPtr(%(gz)s, o_idx)));
-                  }
-                  // compute the contribution
-                  if (%(sum_mode)s)
-                  {
-                    val = gz[0];
-                  }
-                  else
-                  {
-                    val = gz[0] / (%(region_size)s);
-                  }
-        """
-        region_size = " * ".join(f"r_pad_width[{i}]" for i in range(nd))
-        for i in range(nd):
-            ccode += """
-                  // go through the pooled region in the unpadded input
-                  for(npy_intp m%(i)s=r_st[%(i)s]; m%(i)s<r_end[%(i)s]; m%(i)s++)
-                  {
-                    i_idx[%(non_pool_ndim)s + %(i)s] = m%(i)s;
-                """ % dict(
-                i=i, non_pool_ndim=non_pool_ndim
-            )
-        ccode += """
-                    dtype_%(gx)s * gx;
-                    if (%(total_ndim)s == 4)
-                    {
-                      gx = ((dtype_%(gx)s*)(PyArray_GETPTR4(%(gx)s, i_idx[0],i_idx[1],i_idx[2],i_idx[3])));
-                    }
-                    else
-                    {
-                      gx = ((dtype_%(gx)s*)(PyArray_GetPtr(%(gx)s, i_idx)));
-                    }
-                    gx[0] = gx[0] + val;
-        """
-        for i in range(nd):
-            ccode += """
-                  } // for loop over region
-                """
-        for i in range(nd):
-            ccode += """
-                } // loop over pooling dimension
-            """
-
-        ccode += """
-            } // for loop over non-pooling dimensions
-        } // if z_prod
-        """
-        return ccode % locals()
-
-    def c_code_cache_version(self):
-        return (0, 4, self.openmp)
-
-
-class DownsampleFactorMaxGradGrad(OpenMPOp):
-    __props__ = ("ignore_border", "mode", "ndim")
-
-    def __init__(self, ignore_border, mode="max", ndim=2, openmp=None):
-        self.ndim = ndim
-        self.ignore_border = ignore_border
-        self.mode = mode
-        super().__init__(openmp=openmp)
-        assert self.mode == "max"
-
-    def make_node(self, x, maxout, gz, ws, stride=None, pad=None):
-        # make_node should only be called by the grad function of
-        # MaxPoolGrad, so these asserts should not fail.
-        x = at.as_tensor_variable(x)
-        maxout = at.as_tensor_variable(maxout)
-        gz = at.as_tensor_variable(gz)
-        nd = self.ndim
-        if stride is None:
-            stride = ws
-        if pad is None:
-            pad = (0,) * nd
-        elif isinstance(pad, (tuple, list)):
-            if max(pad) != 0 and not self.ignore_border:
-                raise NotImplementedError("padding works only with ignore_border=True")
-            if isinstance(ws, (tuple, list)):
-                if any(pad[i] >= ws[i] for i in range(nd)):
-                    raise NotImplementedError("padding must be smaller than strides")
-        ws = at.as_tensor_variable(ws)
-        stride = at.as_tensor_variable(stride)
-        pad = at.as_tensor_variable(pad)
-        assert ws.ndim == 1
-        assert stride.ndim == 1
-        assert pad.ndim == 1
-        assert x.ndim == maxout.ndim == gz.ndim >= nd
-        if ws.dtype not in int_dtypes:
-            raise TypeError("Pool downsample parameters must be ints.")
-        if stride.dtype not in int_dtypes:
-            raise TypeError("Stride parameters must be ints.")
-        if pad.dtype not in int_dtypes:
-            raise TypeError("Padding parameters must be ints.")
-        return Apply(self, [x, maxout, gz, ws, stride, pad], [x.type()])
-
-    def perform(self, node, inp, out):
-        x, maxout, ggx, ws, stride, pad = inp
-        (z,) = out
-        nd = self.ndim
-        assert ws.shape == stride.shape == pad.shape == (nd,)
-        if len(x.shape) < nd:
-            raise NotImplementedError(
-                "DownsampleFactorMaxGradGrad requires input "
-                "with {} or more dimensions".format(nd)
-            )
-        if (z[0] is None) or (z[0].shape != maxout.shape):
-            z[0] = np.zeros(maxout.shape, dtype=x.dtype)
-        ggz = z[0]  # grad wrt maxout_grad has the same shape as maxout
-        # size of pooling output
-        pool_out_shp = ggz.shape[-nd:]
-        img_shp = tuple(x.shape[-nd + i] + 2 * pad[i] for i in range(nd))
-
-        # pad the image and its gradients
-        if max(pad) > 0:
-            y_padded = np.zeros(x.shape[:-nd] + img_shp, dtype=x.dtype)
-            y_padded[
-                (slice(None),) * (len(x.shape) - nd)
-                + tuple(slice(pad[i], img_shp[i] - pad[i]) for i in range(nd))
-            ] = x
-            ggx_padded = np.zeros(x.shape[:-nd] + img_shp, dtype=x.dtype)
-            ggx_padded[
-                (slice(None),) * (len(x.shape) - nd)
-                + tuple(slice(pad[i], img_shp[i] - pad[i]) for i in range(nd))
-            ] = ggx
-
-        else:
-            y_padded = x
-            ggx_padded = ggx
-
-        # precompute the region boundaries for each dimension
-        region_ranges = [[] for i in range(nd)]
-        for i in range(nd):
-            for j in range(pool_out_shp[i]):
-                start = j * stride[i]
-                end = min(start + ws[i], img_shp[i])
-                region_ranges[i].append(range(start, end))
-
-        # iterate over non-pooling dimensions
-        for k in np.ndindex(*x.shape[:-nd]):
-            ggxk = ggx_padded[k]
-            ggzk = ggz[k]
-            yk = y_padded[k]
-            maxoutk = maxout[k]
-            # iterate over pooling regions
-            for r in np.ndindex(*pool_out_shp):
-                # iterate inside region
-                maxout_value = maxoutk[r]
-                for c in itertools.product(
-                    *[region_ranges[i][r[i]] for i in range(nd)]
-                ):
-                    if maxout_value == yk[c]:
-                        ggzk[r] += ggxk[c]
-
-    def infer_shape(self, fgraph, node, in_shapes):
-        return [in_shapes[1]]
-
-    def grad(self, inp, grads):
-        x, maxout, ggx, ws, stride, pad = inp
-        (gz,) = grads
-        return [
-            at.zeros_like(x),
-            at.zeros_like(maxout),
-            MaxPoolGrad(ignore_border=self.ignore_border, ndim=self.ndim)(
-                x, maxout, gz, ws, stride, pad
-            ),
-            DisconnectedType()(),
-            DisconnectedType()(),
-            DisconnectedType()(),
-        ]
-
-    def connection_pattern(self, node):
-        return [[1], [1], [1], [0], [0], [0]]
-
-    def c_code(self, node, name, inp, out, sub):
-        if self.mode != "max":
-            raise MethodNotDefined()
-        x, maxout, ggx, ws, stride, pad = inp
-        (z,) = out  # the grad of grad
-        nd = self.ndim
-        total_ndim = node.inputs[0].ndim
-        non_pool_ndim = total_ndim - nd
-        fail = sub["fail"]
-
-        if self.openmp:
-            # run in parallel over each pooling block
-            omp_parallel = "#pragma omp parallel for private(r_st, r_end, r_idx, i_idx, o_idx, maximum) schedule(static)"
-        else:
-            omp_parallel = ""
-        ccode = """
-        int z_typenum = PyArray_ObjectType((PyObject*)%(maxout)s, 0);
-        npy_intp z[%(nd)s]; // shape of the output
-        npy_intp r[%(nd)s]; // shape of the padded_input
-        npy_intp ws[%(nd)s];
-        npy_intp st[%(nd)s];
-        npy_intp pd[%(nd)s];
-        if(PyArray_DIM(%(ws)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "ws must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(stride)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "stride must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(pad)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "pad must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        for (int i=0; i<%(nd)s; i++)
-        {
-            ws[i] = *((dtype_%(ws)s*)PyArray_GETPTR1(%(ws)s, i));
-            st[i] = *((dtype_%(stride)s*)PyArray_GETPTR1(%(stride)s, i));
-            pd[i] = *((dtype_%(pad)s*)PyArray_GETPTR1(%(pad)s, i));
-            z[i] = PyArray_DIMS(%(maxout)s)[%(non_pool_ndim)s + i];
-            r[i] = PyArray_DIMS(%(x)s)[%(non_pool_ndim)s + i] + 2 * pd[i];
-        }
-        // allocating memory for output, if necessary
-        int mem_nec;
-        mem_nec = 0;
-        if ((!%(z)s) || !PyArray_ISCONTIGUOUS(%(z)s)
-            || *PyArray_DIMS(%(z)s)!=%(total_ndim)s)
-        {
-            mem_nec = 1;
-        }
-        if (!mem_nec)
-        {
-            for (int i=0; i<%(total_ndim)s; i++)
-            {
-                if (PyArray_DIMS(%(z)s)[i] != PyArray_DIMS(%(maxout)s)[i])
-                {
-                    mem_nec = 1;
-                    break;
-                }
-            }
-        }
-        if (mem_nec)
-        {
-          Py_XDECREF(%(z)s);
-          %(z)s = (PyArrayObject*) PyArray_ZEROS(%(total_ndim)s, PyArray_DIMS(%(maxout)s), z_typenum,0);
-        }
-        else {
-          PyArray_FILLWBYTE(%(z)s, 0);
-        }
-        dtype_%(maxout)s maximum; // temp var for maximum value in a region
-        // will be used to hold start and end index of a region
-        npy_intp r_st[%(nd)s];
-        npy_intp r_end[%(nd)s];
-        // index for iterating over the pooling regions
-        npy_intp r_idx[%(nd)s];
-        // placeholder for PyArray indexing (output)
-        npy_intp o_idx[%(total_ndim)s];
-        // placeholder for PyArray indexing (input)
-        npy_intp i_idx[%(total_ndim)s];
-        // loop over non-pooling dimensions
-        npy_intp non_pooling_prod;
-        non_pooling_prod = 1;
-        for (int i=0; i<%(non_pool_ndim)s; i++)
-        {
-            non_pooling_prod *= PyArray_DIMS(%(x)s)[i];
-        }
-        %(omp_parallel)s
-        // first loop over non-pooling dimensions
-        for (npy_intp t=0; t<non_pooling_prod; t++)
-        {
-            // compute the non-pooling index in each dimension
-            if (%(non_pool_ndim)s!=0)
-            {
-                o_idx[0] = t;
-                i_idx[0] = t;
-                for (int i=1; i<%(non_pool_ndim)s; i++)
-                {
-                    o_idx[i] = o_idx[i - 1] / PyArray_DIMS(%(x)s)[i - 1];
-                    o_idx[i - 1] = o_idx[i - 1] %% PyArray_DIMS(%(x)s)[i - 1];
-                    i_idx[i] = o_idx[i];
-                    i_idx[i - 1] = o_idx[i - 1];
-                }
-            }
-
-            // then loop over each region in each pooling dimension
-        """
-
-        for i in range(nd):
-            ccode += """
-                for (r_idx[%(i)s]=0; r_idx[%(i)s] < z[%(i)s]; r_idx[%(i)s]++) {
-                  r_st[%(i)s] = r_idx[%(i)s] * st[%(i)s];
-                  r_end[%(i)s] = r_st[%(i)s] + ws[%(i)s];
-                  // skip the padding
-                  r_st[%(i)s] = r_st[%(i)s] < pd[%(i)s] ? pd[%(i)s] : r_st[%(i)s];
-                  r_end[%(i)s] = r_end[%(i)s] > (r[%(i)s] - pd[%(i)s]) ? r[%(i)s] - pd[%(i)s] : r_end[%(i)s];
-                  // from padded_img space to img space
-                  r_st[%(i)s] -= pd[%(i)s];
-                  r_end[%(i)s] -= pd[%(i)s];
-                  // use the index to find the correct position in the output
-                  o_idx[%(non_pool_ndim)s + %(i)s] = r_idx[%(i)s];
-            """ % dict(
-                i=i, non_pool_ndim=non_pool_ndim
-            )
-
-        ccode += """
-                  dtype_%(z)s * z;
-                  if (%(total_ndim)s == 4)
-                  {
-                    // the maximum value
-                    maximum = ((dtype_%(maxout)s*)(PyArray_GETPTR4(%(maxout)s,o_idx[0],o_idx[1],o_idx[2],o_idx[3])))[0];
-                    // z at this position
-                    z = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s,o_idx[0],o_idx[1],o_idx[2],o_idx[3])));
-                  }
-                  else
-                  {
-                    // the maximum value
-                    maximum = ((dtype_%(maxout)s*)(PyArray_GetPtr(%(maxout)s,o_idx)))[0];
-                    // z at this position
-                    z = ((dtype_%(z)s*)(PyArray_GetPtr(%(z)s,o_idx)));
-                  }
-        """
-        for i in range(nd):
-            ccode += """
-                  // go through the pooled region in the unpadded input
-                  for(npy_intp m%(i)s=r_st[%(i)s]; m%(i)s<r_end[%(i)s]; m%(i)s++)
-                  {
-                    i_idx[%(non_pool_ndim)s + %(i)s] = m%(i)s;
-                """ % dict(
-                i=i, non_pool_ndim=non_pool_ndim
-            )
-        ccode += """
-                    dtype_%(x)s a;
-                    dtype_%(ggx)s * ggx;
-                    if (%(total_ndim)s == 4)
-                    {
-                      a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
-                      ggx = ((dtype_%(ggx)s*)(PyArray_GETPTR4(%(ggx)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])));
-                    }
-                    else
-                    {
-                      a = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
-                      ggx = ((dtype_%(ggx)s*)(PyArray_GetPtr(%(ggx)s,i_idx)));
-                    }
-                    if (a == maximum){
-                      z[0] += ggx[0];
-                    }
-        """
-        for i in range(nd):
-            ccode += """
-                  } // for loop over region
-                """
-        for i in range(nd):
-            ccode += """
-              } // loop over pooling dimension
-            """
-
-        ccode += """
-          } // for loop over non-pooling dimensions
-        """
-        return ccode % locals()
-
-    def c_code_cache_version(self):
-        return (0, 5, self.openmp)
-
-
-class MaxPoolRop(OpenMPOp):
-    """
-    Implements the R-operator for the downsample operation.
-
-    Parameters
-    ----------
-    ws : list or tuple of N ints
-        Downsample factor over rows, columns etc.
-        ws indicates the size of the pooling region.
-    ignore_border : bool
-        If ws doesn't divide imgshape, do we include an extra row/col/slice
-        of partial downsampling (False) or ignore it (True).
-    stride : list or tuple of N ints or None
-        Stride size, which is the number of shifts over rows/cols/slices to get the
-        next pool region. If stride is None, it is considered equal to ws
-        (no overlap on pooling regions).
-    pad : tuple of N ints or None
-        For each downsampling dimension, this specifies the number of zeros to
-        add as padding on both sides. For 2D and (pad_h, pad_w), pad_h specifies the
-        size of the top and bottom margins, pad_w specifies the size of the left and
-        right margins. No padding is added if pad is None.
-    mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'}
-        ('average_inc_pad' excludes the padding from the count,
-        'average_exc_pad' include it)
-    ndim : int
-        The number of pooling dimensions N.
-        The default is 2.
-    """
-
-    __props__ = ("ignore_border", "mode", "ndim")
-    params_type = ParamsType(
-        ignore_border=bool_t,
-    )
-
-    def __init__(self, ignore_border=False, mode="max", ndim=2, openmp=None):
-        super().__init__(openmp=openmp)
-        self.ndim = ndim
-        self.ignore_border = ignore_border
-        self.mode = mode
-        assert mode == "max"
-
-    def make_node(self, x, eval_point, ws, stride=None, pad=None):
-        # TODO: consider restricting the dtype?
-        x = at.as_tensor_variable(x)
-        eval_point = at.as_tensor_variable(eval_point)
-        nd = self.ndim
-        if stride is None:
-            stride = ws
-        if pad is None:
-            pad = (0,) * nd
-        elif isinstance(pad, (tuple, list)):
-            if max(pad) != 0 and not self.ignore_border:
-                raise NotImplementedError("padding works only with ignore_border=True")
-            if isinstance(ws, (tuple, list)):
-                if any(pad[i] >= ws[i] for i in range(nd)):
-                    raise NotImplementedError("padding must be smaller than strides")
-        ws = at.as_tensor_variable(ws)
-        stride = at.as_tensor_variable(stride)
-        pad = at.as_tensor_variable(pad)
-        assert ws.ndim == 1
-        assert stride.ndim == 1
-        assert pad.ndim == 1
-        if x.type.ndim < nd:
-            raise TypeError()
-        if not ws.dtype.startswith("int"):
-            raise TypeError("Pool downsample parameters must be ints.")
-        if not stride.dtype.startswith("int"):
-            raise TypeError("Stride parameters must be ints.")
-        if not pad.dtype.startswith("int"):
-            raise TypeError("Padding parameters must be ints.")
-        # If the input shape are broadcastable we can have 0 in the output shape
-        out_shape = tuple(
-            1 if s == 1 else None for s in x.type.shape[:-nd] + (None,) * nd
-        )
-        out = TensorType(eval_point.dtype, shape=out_shape)
-        return Apply(self, [x, eval_point, ws, stride, pad], [out()])
-
-    def perform(self, node, inp, out, params):
-        x, ex, ws, stride, pad = inp
-        (z,) = out
-        nd = self.ndim
-        assert ws.shape == stride.shape == pad.shape == (nd,)
-        if len(x.shape) < nd:
-            raise NotImplementedError(
-                f"Pool requires input with {nd} or more dimensions"
-            )
-        z_shape = Pool.out_shape(x.shape, ws, params.ignore_border, stride, pad, nd)
-        if not self.ignore_border:
-            assert all(z > 0 for z in z_shape[-nd:])
-        if (z[0] is None) or (z[0].shape != z_shape):
-            z[0] = np.empty(z_shape, dtype=x.dtype)
-        zz = z[0]
-        # size of pooling output
-        pool_out_shp = zz.shape[-nd:]
-        img_shp = tuple(x.shape[-nd + i] + 2 * pad[i] for i in range(nd))
-        inc_pad = self.mode == "average_inc_pad"
-
-        # pad the image and the eval point
-        if max(pad) != 0:
-            y = np.zeros(x.shape[:-nd] + img_shp, dtype=x.dtype)
-            y[
-                (slice(None),) * (len(x.shape) - nd)
-                + tuple(slice(pad[i], img_shp[i] - pad[i]) for i in range(nd))
-            ] = x
-            ey = np.zeros(ex.shape[:-nd] + img_shp, dtype=ex.dtype)
-            ey[
-                (slice(None),) * (len(ex.shape) - nd)
-                + tuple(slice(pad[i], img_shp[i] - pad[i]) for i in range(nd))
-            ] = ex
-        else:
-            y = x
-            ey = ex
-
-        # precompute the region boundaries for each dimension
-        region_slices = [[] for i in range(nd)]
-        for i in range(nd):
-            for j in range(pool_out_shp[i]):
-                start = j * stride[i]
-                end = min(start + ws[i], img_shp[i])
-                if not inc_pad:
-                    start = max(start, pad[i])
-                    end = min(end, img_shp[i] - pad[i])
-                region_slices[i].append(slice(start, end))
-
-        # iterate over non-pooling dimensions
-        for k in np.ndindex(*x.shape[:-nd]):
-            zzk = zz[k]
-            yk = y[k]
-            eyk = ey[k]
-            # iterate over pooling regions
-            for r in np.ndindex(*pool_out_shp):
-                # current slice in padded input
-                ykslice = yk[[region_slices[i][r[i]] for i in range(nd)]]
-                # current slice in eval points
-                eykslice = eyk[[region_slices[i][r[i]] for i in range(nd)]]
-                # indices of maximum
-                idx = np.unravel_index(np.argmax(ykslice), ykslice.shape)
-                zzk[r] = eykslice[idx]
-
-    def c_headers(self, **kwargs):
-        headers = ["<algorithm>"]
-        headers += super().c_headers(**kwargs)
-        return headers
-
-    def c_code(self, node, name, inp, out, sub):
-        if self.mode != "max":
-            raise MethodNotDefined()
-        x, ex, ws, stride, pad = inp
-        (z,) = out
-        nd = self.ndim
-        total_ndim = node.inputs[0].ndim
-        non_pool_ndim = total_ndim - nd
-        fail = sub["fail"]
-        params = sub["params"]
-
-        if self.openmp:
-            # run in parallel over each pooling block
-            omp_parallel = "#pragma omp parallel for private(r_st, r_end, r_idx, i_idx, o_idx, collector, eval_collector) schedule(static)"
-        else:
-            omp_parallel = ""
-        ccode = """
-        int typenum = PyArray_ObjectType((PyObject*)%(x)s, 0);
-        if(PyArray_NDIM(%(x)s)!=%(total_ndim)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "x must be a %(total_ndim)sD ndarray");
-            %(fail)s;
-        }
-        if(PyArray_NDIM(%(ex)s)!=%(total_ndim)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "eval_point must be a %(total_ndim)sD ndarray");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(ws)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "ws must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(stride)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "stride must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(pad)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "pad must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        npy_intp z[%(nd)s]; // shape of the output
-        npy_intp r[%(nd)s]; // shape of the padded_input
-        npy_intp ws[%(nd)s];
-        npy_intp st[%(nd)s];
-        npy_intp pd[%(nd)s];
-        int nonzero_padding;
-        nonzero_padding = 0;
-        for (int i=0; i<%(nd)s; i++)
-        {
-            ws[i] = *((dtype_%(ws)s*)PyArray_GETPTR1(%(ws)s, i));
-            st[i] = *((dtype_%(stride)s*)PyArray_GETPTR1(%(stride)s, i));
-            pd[i] = *((dtype_%(pad)s*)PyArray_GETPTR1(%(pad)s, i));
-            r[i] = PyArray_DIMS(%(x)s)[%(non_pool_ndim)s + i] + 2 * pd[i];
-            if (pd[i]>0)
-                nonzero_padding = 1;
-        }
-        if (!%(params)s->ignore_border && nonzero_padding)
-        {
-            PyErr_SetString(PyExc_ValueError,
-              "padding must be zero when ignore border is False");
-            %(fail)s;
-        }
-        if (%(params)s->ignore_border)
-        {
-            for (int i=0; i<%(nd)s; i++)
-            {
-                // '/' in C is different from '/' in python
-                if (r[i] - ws[i] < 0)
-                {
-                  z[i] = 0;
-                }
-                else
-                {
-                  z[i] = (r[i] - ws[i]) / st[i] + 1;
-                }
-            }
-        }
-        else
-        {
-            for (int i=0; i<%(nd)s; i++)
-            {
-                // decide how many rows/cols the output has
-                if (st[i] >= ws[i])
-                {
-                    z[i] = (r[i] - 1) / st[i] + 1;
-                }
-                else
-                {
-                    z[i] = std::max((npy_intp)0, (r[i] - 1 - ws[i] + st[i]) / st[i]) + 1;
-                }
-                assert(z[i] > 0);
-            }
-        }
-        // memory allocation of z if necessary
-        int mem_nec;
-        mem_nec = 0;
-        if ((!%(z)s) || *PyArray_DIMS(%(z)s)!=%(total_ndim)s)
-        {
-            mem_nec = 1;
-        }
-        if (!mem_nec)
-        {
-            for (int i=0; i<%(non_pool_ndim)s; i++)
-            {
-                if (PyArray_DIMS(%(z)s)[i] != PyArray_DIMS(%(x)s)[i])
-                {
-                    mem_nec = 1;
-                    break;
-                }
-            }
-        }
-        if (!mem_nec)
-        {
-            for (int i=0; i<%(nd)s; i++)
-            {
-                if (PyArray_DIMS(%(z)s)[%(non_pool_ndim)s + i] != z[i])
-                {
-                    mem_nec = 1;
-                    break;
-                }
-            }
-        }
-        if (mem_nec)
-        {
-          if (%(z)s) Py_XDECREF(%(z)s);
-          npy_intp dims[%(total_ndim)s];
-          for (int i=0; i<%(non_pool_ndim)s; i++)
-          {
-              dims[i] = PyArray_DIMS(%(x)s)[i];
-          }
-          for (int i=0; i<%(nd)s; i++)
-          {
-              dims[%(non_pool_ndim)s + i] = z[i];
-          }
-          //TODO: zeros not necessary
-          %(z)s = (PyArrayObject*) PyArray_ZEROS(%(total_ndim)s, dims, typenum,0);
-        }
-        // initialize temp var for the value in a region
-        dtype_%(x)s collector;
-        dtype_%(ex)s eval_collector;
-        npy_intp z_prod;
-        // do not run if any z[i] is zero
-        z_prod = 1;
-        for (int i=0; i<%(nd)s; i++)
-        {
-            z_prod *= z[i];
-        }
-        if (z_prod)
-        {
-            // will be used to hold start and end index of a region
-            npy_intp r_st[%(nd)s];
-            npy_intp r_end[%(nd)s];
-            // index for iterating over the pooling regions
-            npy_intp r_idx[%(nd)s];
-            // placeholder for PyArray indexing (output)
-            npy_intp o_idx[%(total_ndim)s];
-            // placeholder for PyArray indexing (input)
-            npy_intp i_idx[%(total_ndim)s];
-            // loop over non-pooling dimensions
-            npy_intp non_pooling_prod = 1;
-            for (int i=0; i<%(non_pool_ndim)s; i++)
-            {
-                non_pooling_prod *= PyArray_DIMS(%(x)s)[i];
-            }
-            %(omp_parallel)s
-            // first loop over non-pooling dimensions
-            for (npy_intp t=0; t<non_pooling_prod; t++)
-            {
-                // compute the non-pooling index in each dimension
-                if (%(non_pool_ndim)s!=0)
-                {
-                    o_idx[0] = t;
-                    i_idx[0] = t;
-                    for (int i=1; i<%(non_pool_ndim)s; i++)
-                    {
-                        o_idx[i] = o_idx[i - 1] / PyArray_DIMS(%(x)s)[i - 1];
-                        o_idx[i - 1] = o_idx[i - 1] %% PyArray_DIMS(%(x)s)[i - 1];
-                        i_idx[i] = o_idx[i];
-                        i_idx[i - 1] = o_idx[i - 1];
-                    }
-                }
-
-                // then loop over each region in each pooling dimension
-        """
-
-        for i in range(nd):
-            ccode += """
-                for (r_idx[%(i)s]=0; r_idx[%(i)s] < z[%(i)s]; r_idx[%(i)s]++) {
-                  r_st[%(i)s] = r_idx[%(i)s] * st[%(i)s];
-                  r_end[%(i)s] = r_st[%(i)s] + ws[%(i)s];
-                  // skip the padding
-                  r_st[%(i)s] = r_st[%(i)s] < pd[%(i)s] ? pd[%(i)s] : r_st[%(i)s];
-                  r_end[%(i)s] = r_end[%(i)s] > (r[%(i)s] - pd[%(i)s]) ? r[%(i)s] - pd[%(i)s] : r_end[%(i)s];
-                  // from padded_img space to img space
-                  r_st[%(i)s] -= pd[%(i)s];
-                  r_end[%(i)s] -= pd[%(i)s];
-                  // handle the case where no padding, ignore border is True
-                  if (%(params)s->ignore_border)
-                  {
-                    r_end[%(i)s] = r_end[%(i)s] > r[%(i)s] ? r[%(i)s] : r_end[%(i)s];
-                  }
-                  // use the index to find the correct position in the output
-                  o_idx[%(non_pool_ndim)s + %(i)s] = r_idx[%(i)s];
-            """ % dict(
-                i=i, params=sub["params"], non_pool_ndim=non_pool_ndim
-            )
-
-        ccode += """
-                  // get a pointer to the correct position in the output
-                  dtype_%(z)s * z;
-                  if (%(total_ndim)s == 4)
-                    z = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s, o_idx[0], o_idx[1], o_idx[2], o_idx[3])));
-                  else
-                    z = ((dtype_%(z)s*)(PyArray_GetPtr(%(z)s, o_idx)));
-        """
-
-        for i in range(nd):
-            ccode += """
-              // set the first index of dimension %(i)s
-              i_idx[%(non_pool_ndim)s + %(i)s] = r_st[%(i)s];
-            """ % dict(
-                i=i, non_pool_ndim=non_pool_ndim
-            )
-        ccode += """
-              // use the first element as the initial value of collector
-              if (%(total_ndim)s == 4) {
-                collector = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
-                eval_collector = ((dtype_%(ex)s*)(PyArray_GETPTR4(%(ex)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
-              } else {
-                collector = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
-                eval_collector = ((dtype_%(ex)s*)(PyArray_GetPtr(%(ex)s,i_idx)))[0];
-              }
-        """
-        for i in range(nd):
-            ccode += """
-              // go through the pooled region in the unpadded input
-              for(npy_intp m%(i)s=r_st[%(i)s]; m%(i)s<r_end[%(i)s]; m%(i)s++)
-              {
-                i_idx[%(non_pool_ndim)s + %(i)s] = m%(i)s;
-            """ % dict(
-                i=i, non_pool_ndim=non_pool_ndim
-            )
-        ccode += """
-                // update maximum
-                dtype_%(x)s a;
-                dtype_%(ex)s ea;
-                if (%(total_ndim)s == 4) {
-                  a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
-                  ea = ((dtype_%(ex)s*)(PyArray_GETPTR4(%(ex)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
-                }
-                else {
-                  a = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
-                  ea = ((dtype_%(ex)s*)(PyArray_GetPtr(%(ex)s,i_idx)))[0];
-                }
-                if (a > collector) {
-                  collector = a;
-                  eval_collector = ea;
-                }
-        """
-        for i in range(nd):
-            ccode += """
-              } // for loop over region
-            """
-        ccode += """
-              z[0] = eval_collector;
-        """
-        for i in range(nd):
-            ccode += """
-            } // loop over pooling dimension
-            """
-
-        ccode += """
-          } // for loop over non-pooling dimensions
-        } // if z_prod
-        """
-        return ccode % locals()
-
-    def c_code_cache_version(self):
-        return (2, self.openmp)
diff --git a/tests/tensor/signal/test_pool.py b/tests/tensor/signal/test_pool.py
deleted file mode 100644
index e7ee421e9b..0000000000
--- a/tests/tensor/signal/test_pool.py
+++ /dev/null
@@ -1,1408 +0,0 @@
-import builtins
-from itertools import product
-
-import numpy as np
-import pytest
-
-import pytensor
-import pytensor.tensor as at
-from pytensor import function
-from pytensor.tensor.math import sum as at_sum
-from pytensor.tensor.signal.pool import (
-    AveragePoolGrad,
-    DownsampleFactorMaxGradGrad,
-    MaxPoolGrad,
-    Pool,
-    PoolGrad,
-    max_pool_2d_same_size,
-    pool_2d,
-    pool_3d,
-)
-from pytensor.tensor.type import (
-    TensorType,
-    dmatrix,
-    dtensor3,
-    dtensor4,
-    fmatrix,
-    ftensor3,
-    ftensor4,
-    ivector,
-    tensor,
-    tensor4,
-    vector,
-)
-from tests import unittest_tools as utt
-
-
-class TestDownsampleFactorMax(utt.InferShapeTester):
-    def test_out_shape(self):
-        assert Pool.out_shape((9, 8, 6), (2, 2)) == [9, 4, 3]
-        assert Pool.out_shape((8, 6), (2, 2)) == [4, 3]
-
-    @staticmethod
-    def numpy_max_pool_2d(input, ws, ignore_border=False, mode="max"):
-        """Helper function, implementing pool_2d in pure numpy"""
-        if len(input.shape) < 2:
-            raise NotImplementedError(
-                f"input should have at least 2 dim, shape is {input.shape}"
-            )
-        xi = 0
-        yi = 0
-        if not ignore_border:
-            if input.shape[-2] % ws[0]:
-                xi += 1
-            if input.shape[-1] % ws[1]:
-                yi += 1
-        out_shp = list(input.shape[:-2])
-        out_shp.append(input.shape[-2] // ws[0] + xi)
-        out_shp.append(input.shape[-1] // ws[1] + yi)
-        output_val = np.zeros(out_shp)
-        func = np.max
-        if mode == "sum":
-            func = np.sum
-        elif mode != "max":
-            func = np.average
-
-        for k in np.ndindex(*input.shape[:-2]):
-            for i in range(output_val.shape[-2]):
-                ii = i * ws[0]
-                for j in range(output_val.shape[-1]):
-                    jj = j * ws[1]
-                    patch = input[k][ii : ii + ws[0], jj : jj + ws[1]]
-                    output_val[k][i, j] = func(patch)
-        return output_val
-
-    @staticmethod
-    def numpy_max_pool_nd(input, ws, ignore_border=False, mode="max"):
-        """Helper function, implementing pool_nd in pure numpy"""
-        if len(input.shape) < len(ws):
-            raise NotImplementedError(
-                f"input should have at least {ws} dim, shape is {input.shape}"
-            )
-        nd = len(ws)
-        si = [0] * nd
-        if not ignore_border:
-            for i in range(nd):
-                if input.shape[-nd + i] % ws[i]:
-                    si[i] += 1
-        out_shp = list(input.shape[:-nd])
-        for i in range(nd):
-            out_shp.append(input.shape[-nd + i] // ws[i] + si[i])
-        output_val = np.zeros(out_shp)
-        func = np.max
-        if mode == "sum":
-            func = np.sum
-        elif mode != "max":
-            func = np.average
-
-        for l in np.ndindex(*input.shape[:-nd]):
-            for r in np.ndindex(*output_val.shape[-nd:]):
-                patch = input[l][
-                    tuple(slice(r[i] * ws[i], (r[i] + 1) * ws[i]) for i in range(nd))
-                ]
-                output_val[l][r] = func(patch)
-        return output_val
-
-    @staticmethod
-    def numpy_max_pool_2d_stride_pad(
-        x, ws, ignore_border=True, stride=None, pad=(0, 0), mode="max"
-    ):
-        assert ignore_border
-        pad_h = pad[0]
-        pad_w = pad[1]
-        h = x.shape[-2]
-        w = x.shape[-1]
-        assert ws[0] > pad_h
-        assert ws[1] > pad_w
-
-        def pad_img(x):
-            y = np.zeros(
-                (
-                    x.shape[0],
-                    x.shape[1],
-                    x.shape[2] + pad_h * 2,
-                    x.shape[3] + pad_w * 2,
-                ),
-                dtype=x.dtype,
-            )
-            y[:, :, pad_h : (x.shape[2] + pad_h), pad_w : (x.shape[3] + pad_w)] = x
-
-            return y
-
-        img_rows = h + 2 * pad_h
-        img_cols = w + 2 * pad_w
-        out_r = (img_rows - ws[0]) // stride[0] + 1
-        out_c = (img_cols - ws[1]) // stride[1] + 1
-        out_shp = list(x.shape[:-2])
-        out_shp.append(out_r)
-        out_shp.append(out_c)
-        ws0, ws1 = ws
-        stride0, stride1 = stride
-        output_val = np.zeros(out_shp)
-        y = pad_img(x)
-        func = np.max
-        if mode == "sum":
-            func = np.sum
-        elif mode != "max":
-            func = np.average
-        inc_pad = mode == "average_inc_pad"
-
-        for k in np.ndindex(*x.shape[:-2]):
-            for i in range(output_val.shape[-2]):
-                ii_stride = i * stride[0]
-                ii_end = builtins.min(ii_stride + ws[0], img_rows)
-                if not inc_pad:
-                    ii_stride = builtins.max(ii_stride, pad_h)
-                    ii_end = builtins.min(ii_end, h + pad_h)
-                for j in range(output_val.shape[-1]):
-                    jj_stride = j * stride[1]
-                    jj_end = builtins.min(jj_stride + ws[1], img_cols)
-                    if not inc_pad:
-                        jj_stride = builtins.max(jj_stride, pad_w)
-                        jj_end = builtins.min(jj_end, w + pad_w)
-                    patch = y[k][ii_stride:ii_end, jj_stride:jj_end]
-                    output_val[k][i, j] = func(patch)
-        return output_val
-
-    @staticmethod
-    def numpy_max_pool_nd_stride_pad(
-        input, ws, ignore_border=True, stride=None, pad=None, mode="max"
-    ):
-        assert ignore_border
-        nd = len(ws)
-        if pad is None:
-            pad = (0,) * nd
-        if stride is None:
-            stride = (0,) * nd
-        assert len(pad) == len(ws) == len(stride)
-        assert all(ws[i] > pad[i] for i in range(nd))
-
-        def pad_img(x):
-            # initialize padded input
-            y = np.zeros(
-                x.shape[0:-nd]
-                + tuple(x.shape[-nd + i] + pad[i] * 2 for i in range(nd)),
-                dtype=x.dtype,
-            )
-            # place the unpadded input in the center
-            block = (slice(None),) * (len(x.shape) - nd) + tuple(
-                slice(pad[i], x.shape[-nd + i] + pad[i]) for i in range(nd)
-            )
-            y[block] = x
-            return y
-
-        pad_img_shp = list(input.shape[:-nd])
-        out_shp = list(input.shape[:-nd])
-        for i in range(nd):
-            padded_size = input.shape[-nd + i] + 2 * pad[i]
-            pad_img_shp.append(padded_size)
-            out_shp.append((padded_size - ws[i]) // stride[i] + 1)
-        output_val = np.zeros(out_shp)
-        padded_input = pad_img(input)
-        func = np.max
-        if mode == "sum":
-            func = np.sum
-        elif mode != "max":
-            func = np.average
-        inc_pad = mode == "average_inc_pad"
-
-        for l in np.ndindex(*input.shape[:-nd]):
-            for r in np.ndindex(*output_val.shape[-nd:]):
-                region = []
-                for i in range(nd):
-                    r_stride = r[i] * stride[i]
-                    r_end = builtins.min(r_stride + ws[i], pad_img_shp[-nd + i])
-                    if not inc_pad:
-                        r_stride = builtins.max(r_stride, pad[i])
-                        r_end = builtins.min(r_end, input.shape[-nd + i] + pad[i])
-                    region.append(slice(r_stride, r_end))
-                patch = padded_input[l][tuple(region)]
-                output_val[l][r] = func(patch)
-        return output_val
-
-    @staticmethod
-    def numpy_max_pool_2d_stride(
-        input, ws, ignore_border=False, stride=None, mode="max"
-    ):
-        """Helper function, implementing pool_2d in pure numpy
-        this function provides stride input to indicate the stide size
-        for the pooling regions. if not indicated, stride == ws."""
-        if len(input.shape) < 2:
-            raise NotImplementedError(
-                f"input should have at least 2 dim, shape is {input.shape}"
-            )
-
-        if stride is None:
-            stride = ws
-        img_rows = input.shape[-2]
-        img_cols = input.shape[-1]
-
-        out_r = 0
-        out_c = 0
-        if img_rows - ws[0] >= 0:
-            out_r = (img_rows - ws[0]) // stride[0] + 1
-        if img_cols - ws[1] >= 0:
-            out_c = (img_cols - ws[1]) // stride[1] + 1
-
-        if not ignore_border:
-            if out_r > 0:
-                if img_rows - ((out_r - 1) * stride[0] + ws[0]) > 0:
-                    rr = img_rows - out_r * stride[0]
-                    if rr > 0:
-                        out_r += 1
-            else:
-                if img_rows > 0:
-                    out_r += 1
-            if out_c > 0:
-                if img_cols - ((out_c - 1) * stride[1] + ws[1]) > 0:
-                    cr = img_cols - out_c * stride[1]
-                    if cr > 0:
-                        out_c += 1
-            else:
-                if img_cols > 0:
-                    out_c += 1
-
-        out_shp = list(input.shape[:-2])
-        out_shp.append(out_r)
-        out_shp.append(out_c)
-
-        func = np.max
-        if mode == "sum":
-            func = np.sum
-        elif mode != "max":
-            func = np.average
-
-        output_val = np.zeros(out_shp)
-        for k in np.ndindex(*input.shape[:-2]):
-            for i in range(output_val.shape[-2]):
-                ii_stride = i * stride[0]
-                ii_end = builtins.min(ii_stride + ws[0], img_rows)
-                for j in range(output_val.shape[-1]):
-                    jj_stride = j * stride[1]
-                    jj_end = builtins.min(jj_stride + ws[1], img_cols)
-                    patch = input[k][ii_stride:ii_end, jj_stride:jj_end]
-                    output_val[k][i, j] = func(patch)
-        return output_val
-
-    @staticmethod
-    def numpy_max_pool_nd_stride(
-        input, ws, ignore_border=False, stride=None, mode="max"
-    ):
-        """Helper function, implementing pooling in pure numpy
-        this function provides stride input to indicate the stide size
-        for the pooling regions. if not indicated, stride == ws."""
-        nd = len(ws)
-        if stride is None:
-            stride = ws
-        assert len(stride) == len(ws)
-
-        out_shp = list(input.shape[:-nd])
-        for i in range(nd):
-            out = 0
-            if input.shape[-nd + i] - ws[i] >= 0:
-                out = (input.shape[-nd + i] - ws[i]) // stride[i] + 1
-            if not ignore_border:
-                if out > 0:
-                    if input.shape[-nd + i] - ((out - 1) * stride[i] + ws[i]) > 0:
-                        if input.shape[-nd + i] - out * stride[i] > 0:
-                            out += 1
-                else:
-                    if input.shape[-nd + i] > 0:
-                        out += 1
-            out_shp.append(out)
-
-        func = np.max
-        if mode == "sum":
-            func = np.sum
-        elif mode != "max":
-            func = np.average
-
-        output_val = np.zeros(out_shp)
-        for l in np.ndindex(*input.shape[:-nd]):
-            for r in np.ndindex(*output_val.shape[-nd:]):
-                region = []
-                for i in range(nd):
-                    r_stride = r[i] * stride[i]
-                    r_end = builtins.min(r_stride + ws[i], input.shape[-nd + i])
-                    region.append(slice(r_stride, r_end))
-                patch = input[l][tuple(region)]
-                output_val[l][r] = func(patch)
-        return output_val
-
-    def test_DownsampleFactorMax(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        # maxpool, input size
-        examples = (
-            ((2,), (16,)),
-            (
-                (2,),
-                (
-                    4,
-                    16,
-                ),
-            ),
-            (
-                (2,),
-                (
-                    4,
-                    2,
-                    16,
-                ),
-            ),
-            ((1, 1), (4, 2, 16, 16)),
-            ((2, 2), (4, 2, 16, 16)),
-            ((3, 3), (4, 2, 16, 16)),
-            ((3, 2), (4, 2, 16, 16)),
-            ((3, 2, 2), (3, 2, 16, 16, 16)),
-            ((2, 2, 3, 2), (3, 2, 6, 6, 6, 5)),
-        )
-
-        for example, ignore_border, mode in product(
-            examples,
-            [True, False],
-            ["max", "sum", "average_inc_pad", "average_exc_pad"],
-        ):
-            (maxpoolshp, inputsize) = example
-            imval = rng.random(inputsize)
-            images = pytensor.shared(imval)
-
-            # Pure Numpy computation
-            numpy_output_val = self.numpy_max_pool_nd(
-                imval, maxpoolshp, ignore_border, mode=mode
-            )
-
-            # The pool_2d or pool_3d helper methods
-            if len(maxpoolshp) == 2:
-                output = pool_2d(images, maxpoolshp, ignore_border, mode=mode)
-                f = function(
-                    [],
-                    [
-                        output,
-                    ],
-                )
-                output_val = f()
-                utt.assert_allclose(output_val, numpy_output_val)
-            elif len(maxpoolshp) == 3:
-                output = pool_3d(images, maxpoolshp, ignore_border, mode=mode)
-                f = function(
-                    [],
-                    [
-                        output,
-                    ],
-                )
-                output_val = f()
-                utt.assert_allclose(output_val, numpy_output_val)
-
-            # Pool op
-            maxpool_op = Pool(
-                ndim=len(maxpoolshp), ignore_border=ignore_border, mode=mode
-            )(images, maxpoolshp)
-
-            output_shape = Pool.out_shape(
-                imval.shape,
-                maxpoolshp,
-                ndim=len(maxpoolshp),
-                ignore_border=ignore_border,
-            )
-            utt.assert_allclose(np.asarray(output_shape), numpy_output_val.shape)
-            f = function([], maxpool_op)
-            output_val = f()
-            utt.assert_allclose(output_val, numpy_output_val)
-
-    def test_DownsampleFactorMaxStride(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        # maxpool, stride, ignore_border, input, output sizes
-        examples = (
-            ((1, 1), (1, 1), True, (4, 10, 16, 16), (4, 10, 16, 16)),
-            ((1, 1), (5, 7), True, (4, 10, 16, 16), (4, 10, 4, 3)),
-            ((1, 1), (1, 1), False, (4, 10, 16, 16), (4, 10, 16, 16)),
-            ((1, 1), (5, 7), False, (4, 10, 16, 16), (4, 10, 4, 3)),
-            ((3, 3), (1, 1), True, (4, 10, 16, 16), (4, 10, 14, 14)),
-            ((3, 3), (3, 3), True, (4, 10, 16, 16), (4, 10, 5, 5)),
-            ((3, 3), (5, 7), True, (4, 10, 16, 16), (4, 10, 3, 2)),
-            ((3, 3), (1, 1), False, (4, 10, 16, 16), (4, 10, 14, 14)),
-            ((3, 3), (3, 3), False, (4, 10, 16, 16), (4, 10, 6, 6)),
-            ((3, 3), (5, 7), False, (4, 10, 16, 16), (4, 10, 4, 3)),
-            ((5, 3), (1, 1), True, (4, 10, 16, 16), (4, 10, 12, 14)),
-            ((5, 3), (3, 3), True, (4, 10, 16, 16), (4, 10, 4, 5)),
-            ((5, 3), (5, 7), True, (4, 10, 16, 16), (4, 10, 3, 2)),
-            ((5, 3), (1, 1), False, (4, 10, 16, 16), (4, 10, 12, 14)),
-            ((5, 3), (3, 3), False, (4, 10, 16, 16), (4, 10, 5, 6)),
-            ((5, 3), (5, 7), False, (4, 10, 16, 16), (4, 10, 4, 3)),
-            ((16, 16), (1, 1), True, (4, 10, 16, 16), (4, 10, 1, 1)),
-            ((16, 16), (5, 7), True, (4, 10, 16, 16), (4, 10, 1, 1)),
-            ((16, 16), (1, 1), False, (4, 10, 16, 16), (4, 10, 1, 1)),
-            ((16, 16), (5, 7), False, (4, 10, 16, 16), (4, 10, 1, 1)),
-            ((3,), (5,), True, (16,), (3,)),
-            (
-                (3,),
-                (5,),
-                True,
-                (
-                    2,
-                    16,
-                ),
-                (
-                    2,
-                    3,
-                ),
-            ),
-            (
-                (5,),
-                (3,),
-                True,
-                (
-                    2,
-                    3,
-                    16,
-                ),
-                (
-                    2,
-                    3,
-                    4,
-                ),
-            ),
-            ((5, 1, 3), (3, 3, 3), True, (2, 16, 16, 16), (2, 4, 6, 5)),
-            ((5, 1, 3), (3, 3, 3), True, (4, 2, 16, 16, 16), (4, 2, 4, 6, 5)),
-        )
-
-        for example, mode in product(
-            examples, ["max", "sum", "average_inc_pad", "average_exc_pad"]
-        ):
-            (maxpoolshp, stride, ignore_border, inputshp, outputshp) = example
-            # generate random images
-            imval = rng.random(inputshp)
-            images = pytensor.shared(imval)
-            # Pool op
-            numpy_output_val = self.numpy_max_pool_nd_stride(
-                imval, maxpoolshp, ignore_border, stride, mode
-            )
-            assert (
-                numpy_output_val.shape == outputshp
-            ), f"outshape is {outputshp}, calculated shape is {numpy_output_val.shape}"
-            maxpool_op = Pool(
-                ndim=len(maxpoolshp), ignore_border=ignore_border, mode=mode
-            )(images, maxpoolshp, stride)
-            f = function([], maxpool_op)
-            output_val = f()
-            utt.assert_allclose(output_val, numpy_output_val)
-
-    def test_DownsampleFactorMaxStrideExtra(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        maxpoolshps = ((5, 3), (5, 3), (5, 3), (5, 5), (3, 2), (7, 7), (9, 9))
-        stridesizes = ((3, 2), (7, 5), (10, 6), (1, 1), (2, 3), (10, 10), (1, 1))
-        imvsizs = ((16, 16), (16, 16), (16, 16), (8, 5), (8, 5), (8, 5), (8, 5))
-        outputshps = (
-            (4, 10, 4, 7),
-            (4, 10, 5, 8),
-            (4, 10, 2, 3),
-            (4, 10, 3, 4),
-            (4, 10, 2, 3),
-            (4, 10, 2, 3),
-            (4, 10, 4, 1),
-            (4, 10, 4, 1),
-            (4, 10, 3, 2),
-            (4, 10, 4, 2),
-            (4, 10, 1, 0),
-            (4, 10, 1, 1),
-            (4, 10, 0, 0),
-            (4, 10, 1, 1),
-        )
-        images = dtensor4()
-        for indx in np.arange(len(maxpoolshps)):
-            imvsize = imvsizs[indx]
-            imval = rng.random((4, 10, imvsize[0], imvsize[1]))
-            stride = stridesizes[indx]
-            maxpoolshp = maxpoolshps[indx]
-            for ignore_border, mode in product(
-                [True, False], ["max", "sum", "average_inc_pad", "average_exc_pad"]
-            ):
-                indx_out = indx * 2
-                if not ignore_border:
-                    indx_out += 1
-                outputshp = outputshps[indx_out]
-                # Pool op
-                numpy_output_val = self.numpy_max_pool_2d_stride(
-                    imval, maxpoolshp, ignore_border, stride, mode
-                )
-                assert (
-                    numpy_output_val.shape == outputshp
-                ), "outshape is {}, calculated shape is {}".format(
-                    outputshp,
-                    numpy_output_val.shape,
-                )
-                maxpool_op = Pool(
-                    ignore_border=ignore_border, ndim=len(maxpoolshp), mode=mode
-                )(images, maxpoolshp, stride)
-                f = function([images], maxpool_op)
-                output_val = f(imval)
-                utt.assert_allclose(output_val, numpy_output_val)
-
-    def test_DownsampleFactorMaxPaddingStride(self):
-        ignore_border = True  # padding does not support ignore_border=False
-        rng = np.random.default_rng(utt.fetch_seed())
-        # maxpool, stride, pad, input sizes
-        examples = (
-            ((3,), (2,), (2,), (5,)),
-            ((3,), (2,), (2,), (4, 5)),
-            ((3,), (2,), (2,), (4, 2, 5, 5)),
-            ((3, 3), (2, 2), (2, 2), (4, 2, 5, 5)),
-            ((4, 4), (2, 2), (1, 2), (4, 2, 5, 5)),
-            ((3, 4), (1, 1), (2, 1), (4, 2, 5, 6)),
-            ((4, 3), (1, 2), (0, 0), (4, 2, 6, 5)),
-            ((2, 2), (2, 2), (1, 1), (4, 2, 5, 5)),
-            ((4, 3, 2), (1, 2, 2), (0, 2, 1), (4, 6, 6, 5)),
-            ((4, 3, 2), (1, 2, 2), (0, 2, 1), (4, 2, 6, 5, 5)),
-        )
-        for example, mode in product(
-            examples, ["max", "sum", "average_inc_pad", "average_exc_pad"]
-        ):
-            (maxpoolshp, stridesize, padsize, inputsize) = example
-            imval = rng.random(inputsize) - 0.5
-            images = pytensor.shared(imval)
-
-            numpy_output_val = self.numpy_max_pool_nd_stride_pad(
-                imval, maxpoolshp, ignore_border, stridesize, padsize, mode
-            )
-            maxpool_op = Pool(
-                ndim=len(maxpoolshp), ignore_border=ignore_border, mode=mode
-            )(images, maxpoolshp, stridesize, padsize)
-            f = function([], maxpool_op)
-            output_val = f()
-            utt.assert_allclose(output_val, numpy_output_val)
-
-    def test_DownsampleFactorMaxPaddingStride_grad(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        # maxpool, stride, pad, input sizes
-        examples = (
-            ((10,), (5,), (3,), (2,)),
-            ((10,), (5,), (3,), (2, 2)),
-            ((10,), (5,), (3,), (1, 1, 2)),
-            ((10, 10), (5, 3), (3, 2), (1, 1, 2, 2)),
-            ((10, 5), (3, 5), (2, 3), (1, 1, 2, 1)),
-            ((5, 5), (3, 3), (3, 3), (1, 1, 2, 2)),
-            ((5, 5, 5), (3, 3, 3), (3, 3, 3), (1, 1, 2, 2, 2)),
-        )
-        # average_inc_pad and average_exc_pad do not
-        # support grad with padding
-        for mode in ["max", "sum"]:
-            for example in examples:
-                (maxpoolshp, stridesize, padsize, inputsize) = example
-                imval = rng.random(inputsize) * 10.0
-
-                def mp(input):
-                    return Pool(
-                        ndim=len(maxpoolshp),
-                        ignore_border=True,
-                        mode=mode,
-                    )(input, maxpoolshp, stridesize, padsize)
-
-                utt.verify_grad(mp, [imval], rng=rng)
-
-    def test_DownsampleFactorMax_grad(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        # maxpool, input sizes
-        examples = (
-            ((2,), (3,)),
-            ((2,), (2, 3)),
-            ((2,), (2, 3, 3)),
-            ((1, 1), (2, 3, 3, 4)),
-            ((3, 2), (2, 3, 3, 4)),
-            ((2, 3), (2, 3, 3, 4)),
-            ((1, 1, 1), (2, 3, 3)),
-            ((3, 2, 2), (2, 3, 3, 4)),
-            ((2, 2, 3), (2, 3, 3, 4, 4)),
-        )
-
-        for example, ignore_border, mode in product(
-            examples,
-            [True, False],
-            ["max", "sum", "average_inc_pad", "average_exc_pad"],
-        ):
-            (maxpoolshp, inputsize) = example
-            imval = rng.random(inputsize) * 10.0
-
-            # more variance means numeric gradient will be more accurate
-            def mp(input):
-                return Pool(
-                    ndim=len(maxpoolshp), ignore_border=ignore_border, mode=mode
-                )(input, maxpoolshp)
-
-            utt.verify_grad(mp, [imval], rng=rng)
-
-    # pool, stride, input sizes
-    pool_grad_stride_examples = (
-        ((1,), (1,), (16,)),
-        ((1,), (3,), (1, 16)),
-        ((1,), (5,), (1, 2, 16)),
-        ((2,), (1,), (16,)),
-        ((2,), (3,), (1, 16)),
-        ((2,), (5,), (1, 2, 16)),
-        ((1, 1), (1, 1), (1, 2, 16, 16)),
-        ((1, 1), (3, 3), (1, 2, 16, 16)),
-        ((1, 1), (5, 7), (1, 2, 16, 16)),
-        ((3, 3), (1, 1), (1, 2, 16, 16)),
-        ((3, 3), (3, 3), (1, 2, 16, 16)),
-        ((3, 3), (5, 7), (1, 2, 16, 16)),
-        ((5, 3), (1, 1), (1, 2, 16, 16)),
-        ((5, 3), (3, 3), (1, 2, 16, 16)),
-        ((5, 3), (5, 7), (1, 2, 16, 16)),
-        ((5, 1, 2), (1, 1, 1), (16, 3, 16)),
-        ((5, 1, 2), (3, 1, 2), (1, 16, 3, 16)),
-        ((5, 1, 2), (5, 1, 4), (1, 2, 16, 3, 16)),
-        ((5, 3), (3, 2), (1, 2, 16, 16)),
-        ((5, 3), (7, 5), (1, 2, 16, 16)),
-        ((5, 3), (10, 6), (1, 2, 16, 16)),
-        ((5, 5), (1, 1), (1, 2, 8, 5)),
-        ((3, 2), (2, 3), (1, 2, 8, 5)),
-        ((7, 7), (10, 10), (1, 2, 8, 5)),
-        ((9, 9), (1, 1), (1, 2, 8, 5)),
-    )
-
-    @pytest.mark.slow
-    @pytest.mark.parametrize(
-        "example, ignore_border, mode",
-        product(
-            pool_grad_stride_examples,
-            [True, False],
-            ["max", "sum", "average_inc_pad", "average_exc_pad"],
-        ),
-    )
-    def test_DownsampleFactorMax_grad_stride(self, example, ignore_border, mode):
-        # checks the gradient for the case that stride is used
-        rng = np.random.default_rng(utt.fetch_seed())
-
-        (maxpoolshp, stridesize, inputsize) = example
-        imval = rng.random(inputsize)
-
-        def mp(input):
-            return Pool(ndim=len(maxpoolshp), ignore_border=ignore_border, mode=mode)(
-                input, maxpoolshp, stridesize
-            )
-
-        utt.verify_grad(mp, [imval], rng=rng)
-
-    def test_DownsampleFactorMaxGrad_grad(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        # maxpool, input sizes
-        examples = (
-            ((2,), (2,)),
-            ((2,), (2, 3)),
-            ((1, 1), (2, 3, 3, 4)),
-            ((3, 2), (2, 3, 3, 4)),
-            ((2, 3), (2, 3, 3, 4)),
-            ((1, 1, 1), (2, 3, 3, 4)),
-            ((3, 2, 2), (2, 3, 3, 4)),
-            ((2, 3, 2), (2, 3, 3, 4)),
-            ((2, 2, 3), (2, 3, 3, 4)),
-            ((2, 2, 3), (2, 1, 3, 3, 4)),
-        )
-
-        for (maxpoolshp, inputsize) in examples:
-            imval = rng.random(inputsize) * 10.0
-            # more variance means numeric gradient will be more accurate
-            for ignore_border in [True, False]:
-                # print 'maxpoolshp =', maxpoolshp
-                # print 'ignore_border =', ignore_border
-                # The shape of the gradient will be the shape of the output
-                grad_shape = Pool.out_shape(
-                    imval.shape,
-                    maxpoolshp,
-                    ndim=len(maxpoolshp),
-                    ignore_border=ignore_border,
-                )
-                grad_val = rng.random(grad_shape) * 10.0
-
-                def mp(input, grad):
-                    out = Pool(ndim=len(maxpoolshp), ignore_border=ignore_border)(
-                        input, maxpoolshp
-                    )
-                    grad_op = MaxPoolGrad(
-                        ndim=len(maxpoolshp), ignore_border=ignore_border
-                    )
-                    return grad_op(input, out, grad, maxpoolshp)
-
-                utt.verify_grad(mp, [imval, grad_val], rng=rng)
-
-    def test_AveragePoolGrad_grad(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        # avgpool, input sizes
-        examples = (
-            ((2,), (2,)),
-            ((2,), (2, 3)),
-            ((1, 1), (2, 3, 3, 4)),
-            ((3, 2), (2, 3, 3, 4)),
-            ((2, 3), (2, 3, 3, 4)),
-            ((3, 2, 2), (2, 3, 3, 4)),
-            ((2, 2, 3), (2, 3, 3, 4)),
-        )
-
-        for (avgpoolshp, inputsize) in examples:
-            imval = rng.random(inputsize) * 10.0
-            # more variance means numeric gradient will be more accurate
-            for ignore_border in [True, False]:
-                for mode in ["sum", "average_inc_pad", "average_exc_pad"]:
-                    # print 'maxpoolshp =', maxpoolshp
-                    # print 'ignore_border =', ignore_border
-                    # The shape of the gradient will be the shape of the output
-                    grad_shape = Pool.out_shape(
-                        imval.shape,
-                        avgpoolshp,
-                        ndim=len(avgpoolshp),
-                        ignore_border=ignore_border,
-                    )
-                    grad_val = rng.random(grad_shape) * 10.0
-
-                    def mp(input, grad):
-                        grad_op = AveragePoolGrad(
-                            ndim=len(avgpoolshp), ignore_border=ignore_border, mode=mode
-                        )
-                        return grad_op(input, grad, avgpoolshp)
-
-                    utt.verify_grad(mp, [imval, grad_val], rng=rng)
-
-    @pytest.mark.parametrize(
-        "example, ignore_border", product(pool_grad_stride_examples, [True, False])
-    )
-    def test_DownsampleFactorMaxGrad_grad_stride(self, example, ignore_border):
-        # checks the gradient of the gradient for
-        # the case that stride is used
-        rng = np.random.default_rng(utt.fetch_seed())
-        (maxpoolshp, stride, inputsize) = example
-        imval = rng.random(inputsize)
-        grad_shape = Pool.out_shape(
-            imval.shape,
-            maxpoolshp,
-            ndim=len(maxpoolshp),
-            ignore_border=ignore_border,
-            stride=stride,
-        )
-
-        # skip the grad verification when the output is empty
-        if np.prod(grad_shape) != 0:
-            grad_val = rng.random(grad_shape)
-
-            def mp(input, grad):
-                out = Pool(ndim=len(maxpoolshp), ignore_border=ignore_border)(
-                    input, maxpoolshp, stride
-                )
-                grad_op = MaxPoolGrad(ndim=len(maxpoolshp), ignore_border=ignore_border)
-                return grad_op(input, out, grad, maxpoolshp, stride)
-
-                utt.verify_grad(mp, [imval, grad_val], rng=rng)
-
-    @pytest.mark.slow
-    @pytest.mark.parametrize(
-        "example, ignore_border, mode",
-        product(
-            pool_grad_stride_examples,
-            [True, False],
-            ["sum", "average_inc_pad", "average_exc_pad"],
-        ),
-    )
-    def test_AveragePoolGrad_grad_stride(self, example, ignore_border, mode):
-        # checks the gradient of the gradient for
-        # the case that stride is used
-        rng = np.random.default_rng(utt.fetch_seed())
-        (avgpoolshp, stride, inputsize) = example
-        imval = rng.random(inputsize)
-        grad_shape = Pool.out_shape(
-            imval.shape,
-            avgpoolshp,
-            ndim=len(avgpoolshp),
-            ignore_border=ignore_border,
-            stride=stride,
-        )
-
-        # skip the grad verification when the output is empty
-        if np.prod(grad_shape) != 0:
-            grad_val = rng.random(grad_shape)
-
-            def mp(input, grad):
-                grad_op = AveragePoolGrad(
-                    ndim=len(avgpoolshp), ignore_border=ignore_border, mode=mode
-                )
-                return grad_op(input, grad, avgpoolshp, stride)
-
-            utt.verify_grad(mp, [imval, grad_val], rng=rng)
-
-    def test_DownsampleFactorMaxPaddingStride_grad_grad(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        # maxpool, stride, pad, input sizes
-        examples = (
-            ((3,), (2,), (2,), (10,)),
-            (
-                (3,),
-                (2,),
-                (2,),
-                (
-                    2,
-                    10,
-                ),
-            ),
-            (
-                (3,),
-                (2,),
-                (2,),
-                (
-                    2,
-                    1,
-                    10,
-                ),
-            ),
-            ((5, 3), (3, 2), (2, 2), (1, 1, 10, 10)),
-            ((3, 5), (2, 3), (2, 1), (1, 1, 10, 5)),
-            ((5, 3, 3), (3, 2, 2), (2, 2, 2), (1, 1, 10, 5, 5)),
-            ((3, 3, 5), (2, 2, 3), (2, 2, 1), (1, 1, 5, 5, 10)),
-        )
-
-        for (maxpoolshp, stridesize, padsize, inputsize) in examples:
-            imval = rng.random(inputsize) * 10.0
-
-            grad_shape = Pool.out_shape(
-                imval.shape,
-                maxpoolshp,
-                ndim=len(maxpoolshp),
-                stride=stridesize,
-                ignore_border=True,
-                pad=padsize,
-            )
-            grad_val = rng.random(grad_shape) * 10.0
-
-            def mp(input, grad):
-                out = Pool(
-                    ndim=len(maxpoolshp),
-                    ignore_border=True,
-                )(input, maxpoolshp, stridesize, padsize)
-                grad_op = MaxPoolGrad(ndim=len(maxpoolshp), ignore_border=True)
-                return grad_op(input, out, grad, maxpoolshp, stridesize, padsize)
-
-            utt.verify_grad(mp, [imval, grad_val], rng=rng)
-
-    def test_AveragePoolPaddingStride_grad_grad(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        # avgpool, stride, pad, input sizes
-        examples = (
-            ((3,), (2,), (2,), (10,)),
-            (
-                (3,),
-                (2,),
-                (2,),
-                (
-                    2,
-                    10,
-                ),
-            ),
-            (
-                (3,),
-                (2,),
-                (2,),
-                (
-                    2,
-                    1,
-                    10,
-                ),
-            ),
-            ((5, 3), (3, 2), (2, 2), (1, 1, 10, 10)),
-            ((3, 5), (2, 3), (2, 1), (1, 1, 10, 5)),
-            ((5, 3, 2), (3, 2, 1), (2, 2, 2), (1, 1, 10, 5, 5)),
-        )
-
-        for (avgpoolshp, stridesize, padsize, inputsize) in examples:
-            imval = rng.random(inputsize) * 10.0
-
-            # 'average_exc_pad' with non-zero padding is not implemented
-            for mode in ["sum", "average_inc_pad"]:
-                grad_shape = Pool.out_shape(
-                    imval.shape,
-                    avgpoolshp,
-                    ndim=len(avgpoolshp),
-                    stride=stridesize,
-                    ignore_border=True,
-                    pad=padsize,
-                )
-                grad_val = rng.random(grad_shape) * 10.0
-
-                def mp(input, grad):
-                    grad_op = AveragePoolGrad(
-                        ndim=len(avgpoolshp), ignore_border=True, mode=mode
-                    )
-                    return grad_op(input, grad, avgpoolshp, stridesize, padsize)
-
-                utt.verify_grad(mp, [imval, grad_val], rng=rng)
-
-    def test_DownsampleFactorMax_hessian(self):
-        # Example provided by Frans Cronje, see
-        # https://groups.google.com/d/msg/theano-users/qpqUy_3glhw/JMwIvlN5wX4J
-        x_vec = vector("x")
-        z = at.dot(x_vec.dimshuffle(0, "x"), x_vec.dimshuffle("x", 0))
-        y = pool_2d(input=z, ws=(2, 2), ignore_border=True)
-        C = at.exp(at_sum(y))
-
-        grad_hess = pytensor.gradient.hessian(cost=C, wrt=x_vec)
-        fn_hess = function(inputs=[x_vec], outputs=grad_hess)
-
-        # The value has been manually computed from the theoretical gradient,
-        # and confirmed by the implementation.
-
-        assert np.allclose(fn_hess([1, 2]), [[0.0, 0.0], [0.0, 982.7667]])
-
-    def test_DownsampleFactorMaxGradGrad_grad(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        # maxpool, stride, pad, input sizes
-        examples = (
-            ((3,), (2,), (2,), (10,)),
-            (
-                (3,),
-                (2,),
-                (2,),
-                (
-                    2,
-                    10,
-                ),
-            ),
-            (
-                (3,),
-                (2,),
-                (2,),
-                (
-                    2,
-                    1,
-                    10,
-                ),
-            ),
-            ((5, 3), (3, 2), (2, 2), (1, 1, 10, 10)),
-            ((3, 5), (2, 3), (2, 1), (1, 1, 10, 5)),
-            ((3, 3), (3, 3), (2, 2), (1, 1, 5, 5)),
-            ((5, 3, 3), (3, 2, 2), (2, 2, 2), (1, 1, 10, 5, 5)),
-            ((3, 3, 5), (2, 2, 3), (2, 2, 1), (1, 1, 5, 5, 10)),
-        )
-
-        for (maxpoolshp, stridesize, padsize, inputsize) in examples:
-            imval1 = rng.random(inputsize) * 10.0
-            imval2 = rng.random(inputsize) * 10.0
-
-            def mp(input1, input2):
-                op1 = Pool(ndim=len(maxpoolshp), ignore_border=True)
-                pooled_out = op1(input1, maxpoolshp, stridesize, padsize)
-                op2 = DownsampleFactorMaxGradGrad(
-                    ndim=len(maxpoolshp), ignore_border=True
-                )
-                out = op2(input1, pooled_out, input2, maxpoolshp, stridesize, padsize)
-                return out
-
-            utt.verify_grad(mp, [imval1, imval2], rng=rng)
-
-    def test_max_pool_2d_2D(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        maxpoolshps = ((1, 1), (3, 2))
-        imval = rng.random((4, 5))
-        images = dmatrix()
-
-        for maxpoolshp, ignore_border, mode in product(
-            maxpoolshps,
-            [True, False],
-            ["max", "sum", "average_inc_pad", "average_exc_pad"],
-        ):
-            # print 'maxpoolshp =', maxpoolshp
-            # print 'ignore_border =', ignore_border
-            numpy_output_val = self.numpy_max_pool_2d(
-                imval, maxpoolshp, ignore_border, mode=mode
-            )
-            output = pool_2d(images, maxpoolshp, ignore_border, mode=mode)
-            output_val = function([images], output)(imval)
-            utt.assert_allclose(output_val, numpy_output_val)
-
-            def mp(input):
-                return pool_2d(input, maxpoolshp, ignore_border, mode=mode)
-
-            utt.verify_grad(mp, [imval], rng=rng)
-
-    def test_max_pool_3d_3D(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        maxpoolshps = ((1, 1, 1), (3, 2, 1))
-        imval = rng.random((4, 5, 6))
-        images = dtensor3()
-
-        for maxpoolshp, ignore_border, mode in product(
-            maxpoolshps,
-            [True, False],
-            ["max", "sum", "average_inc_pad", "average_exc_pad"],
-        ):
-            # print 'maxpoolshp =', maxpoolshp
-            # print 'ignore_border =', ignore_border
-            numpy_output_val = self.numpy_max_pool_nd(
-                imval, maxpoolshp, ignore_border, mode=mode
-            )
-            output = pool_3d(images, maxpoolshp, ignore_border, mode=mode)
-            output_val = function([images], output)(imval)
-            utt.assert_allclose(output_val, numpy_output_val)
-
-            def mp(input):
-                return pool_3d(input, maxpoolshp, ignore_border, mode=mode)
-
-            utt.verify_grad(mp, [imval], rng=rng)
-
-    def test_max_pool_3d_3D_deprecated_interface(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        maxpoolshps = ((1, 1, 1), (3, 2, 1))
-        imval = rng.random((4, 5, 6))
-        images = dtensor3()
-
-        for maxpoolshp, ignore_border, mode in product(
-            maxpoolshps,
-            [True, False],
-            ["max", "sum", "average_inc_pad", "average_exc_pad"],
-        ):
-            # print 'maxpoolshp =', maxpoolshp
-            # print 'ignore_border =', ignore_border
-            numpy_output_val = self.numpy_max_pool_nd(
-                imval, maxpoolshp, ignore_border, mode=mode
-            )
-            output = pool_3d(
-                input=images,
-                ds=maxpoolshp,
-                ignore_border=ignore_border,
-                st=maxpoolshp,
-                padding=(0, 0, 0),
-                mode=mode,
-            )
-            output_val = function([images], output)(imval)
-            utt.assert_allclose(output_val, numpy_output_val)
-
-            def mp(input):
-                return pool_3d(input, maxpoolshp, ignore_border, mode=mode)
-
-    def test_max_pool_2d_2D_same_size(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        test_input_array = np.array(
-            [[[[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]]]
-        ).astype(pytensor.config.floatX)
-        test_answer_array = np.array(
-            [[[[0.0, 0.0, 0.0, 0.0], [0.0, 6.0, 0.0, 8.0]]]]
-        ).astype(pytensor.config.floatX)
-        input = tensor4(name="input")
-        patch_size = (2, 2)
-        op = max_pool_2d_same_size(input, patch_size)
-        op_output = function([input], op)(test_input_array)
-        utt.assert_allclose(op_output, test_answer_array)
-
-        def mp(input):
-            return max_pool_2d_same_size(input, patch_size)
-
-        utt.verify_grad(mp, [test_input_array], rng=rng)
-
-    def test_max_pool_2d_3D(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        maxpoolshps = [(1, 2)]
-        imval = rng.random((2, 3, 4))
-        images = dtensor3()
-
-        for maxpoolshp, ignore_border, mode in product(
-            maxpoolshps,
-            [True, False],
-            ["max", "sum", "average_inc_pad", "average_exc_pad"],
-        ):
-            # print 'maxpoolshp =', maxpoolshp
-            # print 'ignore_border =', ignore_border
-            numpy_output_val = self.numpy_max_pool_2d(
-                imval, maxpoolshp, ignore_border, mode
-            )
-            output = pool_2d(images, maxpoolshp, ignore_border, mode=mode)
-            output_val = function([images], output)(imval)
-            utt.assert_allclose(output_val, numpy_output_val)
-
-    # removed as already tested in test_max_pool_2d_2D
-    # This make test in debug mode too slow.
-    #                def mp(input):
-    #                    return pool_2d(input, maxpoolshp, ignore_border)
-    #                utt.verify_grad(mp, [imval], rng=rng)
-
-    def test_max_pool_2d_6D(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        maxpoolshps = [(3, 2)]
-        imval = rng.random((2, 1, 1, 1, 3, 4))
-        images = TensorType("float64", shape=(None,) * 6)()
-
-        for maxpoolshp, ignore_border, mode in product(
-            maxpoolshps,
-            [True, False],
-            ["max", "sum", "average_inc_pad", "average_exc_pad"],
-        ):
-            # print 'maxpoolshp =', maxpoolshp
-            # print 'ignore_border =', ignore_border
-            numpy_output_val = self.numpy_max_pool_2d(
-                imval, maxpoolshp, ignore_border, mode=mode
-            )
-            output = pool_2d(images, maxpoolshp, ignore_border, mode=mode)
-            output_val = function([images], output)(imval)
-            utt.assert_allclose(output_val, numpy_output_val)
-
-    # removed as already tested in test_max_pool_2d_2D
-    # This make test in debug mode too slow.
-    #                def mp(input):
-    #                    return pool_2d(input, maxpoolshp, ignore_border)
-    #                utt.verify_grad(mp, [imval], rng=rng)
-
-    def test_infer_shape(self):
-        image = dtensor4()
-        maxout = dtensor4()
-        gz = dtensor4()
-        rng = np.random.default_rng(utt.fetch_seed())
-        maxpoolshps = ((1, 1), (2, 2), (3, 3), (2, 3), (3, 2))
-
-        image_val = rng.random((4, 6, 7, 9))
-        out_shapes = [
-            [
-                [[4, 6, 7, 9], [4, 6, 7, 9]],
-                [[4, 6, 3, 4], [4, 6, 4, 5]],
-                [[4, 6, 2, 3], [4, 6, 3, 3]],
-                [[4, 6, 3, 3], [4, 6, 4, 3]],
-                [[4, 6, 2, 4], [4, 6, 3, 5]],
-            ],
-            [
-                [None, None],
-                [[4, 6, 4, 5], None],
-                [[4, 6, 3, 3], None],
-                [[4, 6, 4, 3], None],
-                [[4, 6, 3, 5], None],
-            ],
-            [
-                [None, None],
-                [None, None],
-                [[4, 6, 3, 4], None],
-                [[4, 6, 4, 4], None],
-                [None, None],
-            ],
-        ]
-
-        for i, maxpoolshp in enumerate(maxpoolshps):
-            for j, ignore_border in enumerate([True, False]):
-                for k, pad in enumerate([(0, 0), (1, 1), (1, 2)]):
-                    if out_shapes[k][i][j] is None:
-                        continue
-                    # checking shapes generated by Pool
-                    self._compile_and_check(
-                        [image],
-                        [Pool(ignore_border=ignore_border)(image, maxpoolshp, pad=pad)],
-                        [image_val],
-                        Pool,
-                    )
-
-                    # checking shapes generated by MaxPoolGrad
-                    maxout_val = rng.random(out_shapes[k][i][j])
-                    gz_val = rng.random(out_shapes[k][i][j])
-                    self._compile_and_check(
-                        [image, maxout, gz],
-                        [
-                            MaxPoolGrad(ignore_border=ignore_border)(
-                                image, maxout, gz, maxpoolshp, pad=pad
-                            )
-                        ],
-                        [image_val, maxout_val, gz_val],
-                        MaxPoolGrad,
-                        warn=False,
-                    )
-        # checking with broadcastable input
-        image = tensor(dtype="float64", shape=(None, None, 1, 1))
-        image_val = rng.random((4, 6, 1, 1))
-        self._compile_and_check(
-            [image],
-            [Pool(ignore_border=True)(image, (2, 2), pad=(0, 0))],
-            [image_val],
-            Pool,
-        )
-
-    def test_pooling_with_tensor_vars(self):
-        x = ftensor4()
-        window_size = ivector()
-        stride = ivector()
-        padding = ivector()
-        data = np.random.normal(0, 1, (1, 1, 5, 5)).astype("float32")
-
-        # checking variable params vs fixed params
-        for ignore_border in [True, False]:
-            for mode in ["max", "sum", "average_inc_pad", "average_exc_pad"]:
-                y = pool_2d(x, window_size, ignore_border, stride, padding, mode)
-                dx = pytensor.gradient.grad(y.sum(), x)
-                var_fct = pytensor.function([x, window_size, stride, padding], [y, dx])
-                for ws in (4, 2, 5):
-                    for st in (2, 3):
-                        for pad in (0, 1):
-                            if (
-                                pad > st
-                                or st > ws
-                                or (pad != 0 and not ignore_border)
-                                or (mode == "average_exc_pad" and pad != 0)
-                            ):
-                                continue
-                            y = pool_2d(
-                                x, (ws, ws), ignore_border, (st, st), (pad, pad), mode
-                            )
-                            dx = pytensor.gradient.grad(y.sum(), x)
-                            fix_fct = pytensor.function([x], [y, dx])
-                            var_y, var_dx = var_fct(
-                                data, (ws, ws), (st, st), (pad, pad)
-                            )
-                            fix_y, fix_dx = fix_fct(data)
-                            utt.assert_allclose(var_y, fix_y)
-                            utt.assert_allclose(var_dx, fix_dx)
-
-    def test_pooling_with_tensor_vars_deprecated_interface(self):
-        x = ftensor4()
-        window_size = ivector()
-        stride = ivector()
-        padding = ivector()
-        data = np.random.normal(0, 1, (1, 1, 5, 5)).astype("float32")
-
-        # checking variable params vs fixed params
-        for ignore_border in [True, False]:
-            for mode in ["max", "sum", "average_inc_pad", "average_exc_pad"]:
-                y = pool_2d(
-                    input=x,
-                    ds=window_size,
-                    ignore_border=ignore_border,
-                    st=stride,
-                    padding=padding,
-                    mode=mode,
-                )
-                dx = pytensor.gradient.grad(y.sum(), x)
-                var_fct = pytensor.function([x, window_size, stride, padding], [y, dx])
-                ws = 5
-                st = 3
-                pad = 1
-                if (
-                    pad > st
-                    or st > ws
-                    or (pad != 0 and not ignore_border)
-                    or (mode == "average_exc_pad" and pad != 0)
-                ):
-                    continue
-                y = pool_2d(
-                    input=x,
-                    ds=(ws, ws),
-                    ignore_border=ignore_border,
-                    st=(st, st),
-                    padding=(pad, pad),
-                    mode=mode,
-                )
-                dx = pytensor.gradient.grad(y.sum(), x)
-                fix_fct = pytensor.function([x], [y, dx])
-                var_y, var_dx = var_fct(data, (ws, ws), (st, st), (pad, pad))
-                fix_y, fix_dx = fix_fct(data)
-                utt.assert_allclose(var_y, fix_y)
-                utt.assert_allclose(var_dx, fix_dx)
-
-    @staticmethod
-    def checks_helper(func, x, ws, stride, pad):
-        with pytest.raises(
-            ValueError, match=r"You can't provide a tuple value to both 'ws' and 'ds'."
-        ):
-            func(x, ds=ws, ws=ws)
-
-        with pytest.raises(
-            ValueError, match="You must provide a tuple value for the window size."
-        ):
-            func(x)
-
-        with pytest.raises(
-            ValueError,
-            match=r"You can't provide a tuple value to both 'st and 'stride'.",
-        ):
-            func(x, ws=ws, st=stride, stride=stride)
-
-        with pytest.raises(
-            ValueError,
-            match=r"You can't provide a tuple value to both 'padding' and pad.",
-        ):
-            func(x, ws=ws, pad=pad, padding=pad)
-
-    def test_pool_2d_checks(self):
-        x = fmatrix()
-
-        self.checks_helper(pool_2d, x, ws=(1, 1), stride=(1, 1), pad=(1, 1))
-
-        with pytest.raises(
-            NotImplementedError, match="pool_2d requires a dimension >= 2"
-        ):
-            pool_2d(input=vector(), ws=(1, 1))
-
-        with pytest.deprecated_call():
-            out = pool_2d(input=x, ws=(1, 1))
-        assert not out.owner.op.ignore_border
-
-    def test_pool_3d_checks(self):
-        x = ftensor3()
-
-        self.checks_helper(pool_3d, x, ws=(1, 1, 1), stride=(1, 1, 1), pad=(1, 1, 1))
-
-        with pytest.raises(
-            NotImplementedError, match="pool_3d requires a dimension >= 3"
-        ):
-            pool_3d(input=fmatrix(), ws=(1, 1, 1))
-
-        with pytest.deprecated_call():
-            out = pool_3d(input=x, ws=(1, 1, 1))
-        assert not out.owner.op.ignore_border
-
-    @pytest.mark.parametrize("func", [Pool.out_shape, PoolGrad.out_shape])
-    def test_Pool_out_shape_checks(self, func):
-        x = (10, 10)
-
-        self.checks_helper(func, x, ws=(1, 1), stride=(1, 1), pad=(1, 1))
-
-        with pytest.raises(TypeError, match="imgshape must have at least 3 dimensions"):
-            func(x, (2, 2), ndim=3)
-
-    def test_Pool_make_node_checks(self):
-        x = fmatrix()
-
-        with pytest.raises(
-            NotImplementedError, match="padding works only with ignore_border=True"
-        ):
-            op = Pool(ignore_border=False, ndim=2)
-            op(x, (1, 1), pad=(1, 1))
-
-        with pytest.raises(
-            NotImplementedError, match="padding must be smaller than strides"
-        ):
-            op = Pool(ignore_border=True, ndim=2)
-            op(x, (1, 1), pad=(2, 2))
-
-        with pytest.raises(TypeError):
-            op = Pool(ignore_border=True, ndim=3)
-            op(x, (1, 1))
-
-        op = Pool(ignore_border=True, ndim=2)
-        with pytest.raises(TypeError, match="Pool downsample parameters must be ints."):
-            op(x, (1.0, 1.0))
-
-        with pytest.raises(TypeError, match="Stride parameters must be ints."):
-            op(x, (1, 1), stride=(1.0, 1.0))
-
-        with pytest.raises(TypeError, match="Padding parameters must be ints."):
-            op(x, (2, 2), pad=(1.0, 1.0))
-
-    def test_MaxPoolGrad_make_node_checks(self):
-        x = fmatrix()
-        op = MaxPoolGrad(ignore_border=True, ndim=2)
-        with pytest.raises(TypeError, match="Pool downsample parameters must be ints."):
-            op(x, maxout=[[1, 1], [1, 1]], gz=[[1, 1], [1, 1]], ws=(1.0, 1, 0))
-
-        with pytest.raises(TypeError, match="Stride parameters must be ints."):
-            op(
-                x,
-                maxout=[[1, 1], [1, 1]],
-                gz=[[1, 1], [1, 1]],
-                ws=(1, 1),
-                stride=(1.0, 1.0),
-            )
-
-        with pytest.raises(TypeError, match="Padding parameters must be ints."):
-            op(
-                x,
-                maxout=[[1, 1], [1, 1]],
-                gz=[[1, 1], [1, 1]],
-                ws=(1, 1),
-                pad=(1.0, 1.0),
-            )
diff --git a/tests/test_rop.py b/tests/test_rop.py
index 178fdc7286..3b5b754eaf 100644
--- a/tests/test_rop.py
+++ b/tests/test_rop.py
@@ -12,8 +12,6 @@
 """
 
 
-import itertools
-
 import numpy as np
 import pytest
 
@@ -26,7 +24,6 @@
 from pytensor.tensor.math import argmax, dot
 from pytensor.tensor.math import max as at_max
 from pytensor.tensor.shape import unbroadcast
-from pytensor.tensor.signal.pool import Pool
 from pytensor.tensor.type import matrix, vector
 from tests import unittest_tools as utt
 
@@ -248,63 +245,6 @@ def test_unbroadcast(self):
             unbroadcast(self.x[:4].dimshuffle("x", 0), 0).sum(axis=1), (1,)
         )
 
-    @pytest.mark.slow
-    def test_downsample(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        # ws, shp
-        examples = (
-            ((2,), (16,)),
-            (
-                (2,),
-                (
-                    4,
-                    16,
-                ),
-            ),
-            (
-                (2,),
-                (
-                    4,
-                    2,
-                    16,
-                ),
-            ),
-            ((1, 1), (4, 2, 16, 16)),
-            ((2, 2), (4, 2, 16, 16)),
-            ((3, 3), (4, 2, 16, 16)),
-            ((3, 2), (4, 2, 16, 16)),
-            ((3, 2, 2), (3, 2, 16, 16, 16)),
-            ((2, 3, 2), (3, 2, 16, 16, 16)),
-            ((2, 2, 3), (3, 2, 16, 16, 16)),
-            ((2, 2, 3, 2), (3, 2, 6, 6, 6, 5)),
-        )
-
-        for example, ignore_border in itertools.product(examples, [True, False]):
-            (ws, shp) = example
-            vx = rng.random(shp)
-            vex = rng.random(shp)
-
-            x = pytensor.shared(vx)
-            ex = pytensor.shared(vex)
-
-            maxpool_op = Pool(ignore_border, ndim=len(ws))
-            a_pooled = maxpool_op(x, ws).flatten()
-            yv = Rop(a_pooled, x, ex)
-            mode = None
-            if pytensor.config.mode == "FAST_COMPILE":
-                mode = "FAST_RUN"
-            rop_f = function([], yv, on_unused_input="ignore", mode=mode)
-            sy, _ = pytensor.scan(
-                lambda i, y, x, v: (grad(y[i], x) * v).sum(),
-                sequences=at.arange(a_pooled.shape[0]),
-                non_sequences=[a_pooled, x, ex],
-                mode=mode,
-            )
-            scan_f = function([], sy, on_unused_input="ignore", mode=mode)
-            v1 = rop_f()
-            v2 = scan_f()
-            assert np.allclose(v1, v2), f"Rop mismatch: {v1} {v2}"
-
     def test_join(self):
         tv = np.asarray(self.rng.uniform(size=(10,)), pytensor.config.floatX)
         t = pytensor.shared(tv)

From 1b8ea45ac17051c40507ad6c5501c5b0713a7b89 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 13:18:56 +0300
Subject: [PATCH 13/43] remove ConsiderConstant

---
 pytensor/gradient.py        | 63 -------------------------------------
 pytensor/tensor/__init__.py |  2 +-
 tests/test_gradient.py      | 42 -------------------------
 3 files changed, 1 insertion(+), 106 deletions(-)

diff --git a/pytensor/gradient.py b/pytensor/gradient.py
index 74cd02bd97..29106dec70 100644
--- a/pytensor/gradient.py
+++ b/pytensor/gradient.py
@@ -2108,44 +2108,6 @@ def _is_zero(x):
     return "yes"
 
 
-class ConsiderConstant(ViewOp):
-    def grad(self, args, g_outs):
-        return [g_out.zeros_like(g_out) for g_out in g_outs]
-
-
-consider_constant_ = ConsiderConstant()
-
-
-def consider_constant(x):
-    """Consider an expression constant when computing gradients.
-
-    DEPRECATED: use `zero_grad` or `disconnected_grad` instead.
-
-    The expression itself is unaffected, but when its gradient is
-    computed, or the gradient of another expression that this
-    expression is a subexpression of, it will not be backpropagated
-    through. In other words, the gradient of the expression is
-    truncated to 0.
-
-    :param x: A PyTensor expression whose gradient should be truncated.
-
-    :return: The expression is returned unmodified, but its gradient
-        is now truncated to 0.
-
-    .. versionadded:: 0.7
-    """
-    warnings.warn(
-        (
-            "`ConsiderConstant` is deprecated; use `zero_grad` or "
-            "`disconnected_grad` instead."
-        ),
-        category=DeprecationWarning,
-        stacklevel=3,
-    )
-
-    return ConsiderConstant()(x)
-
-
 class ZeroGrad(ViewOp):
     def grad(self, args, g_outs):
         return [g_out.zeros_like(g_out) for g_out in g_outs]
@@ -2352,28 +2314,3 @@ def grad_scale(x, multiplier):
     0.416...
     """
     return GradScale(multiplier)(x)
-
-
-DEPRECATED_NAMES = [
-    (
-        "consider_constant_",
-        "`consider_constant_` is deprecated; use `zero_grad` or `disconnected_grad` instead.",
-        ConsiderConstant(),
-    ),
-]
-
-
-def __getattr__(name):
-    """Intercept module-level attribute access of deprecated symbols.
-
-    Adapted from https://stackoverflow.com/a/55139609/3006474.
-
-    """
-    from warnings import warn
-
-    for old_name, msg, old_object in DEPRECATED_NAMES:
-        if name == old_name:
-            warn(msg, DeprecationWarning, stacklevel=2)
-            return old_object
-
-    raise AttributeError(f"module {__name__} has no attribute {name}")
diff --git a/pytensor/tensor/__init__.py b/pytensor/tensor/__init__.py
index 4598acd440..2e84db13e9 100644
--- a/pytensor/tensor/__init__.py
+++ b/pytensor/tensor/__init__.py
@@ -99,7 +99,7 @@ def _get_vector_length_Constant(op: Union[Op, Variable], var: Constant) -> int:
 
 
 import pytensor.tensor.exceptions  # noqa
-from pytensor.gradient import consider_constant, grad, hessian, jacobian  # noqa
+from pytensor.gradient import grad, hessian, jacobian  # noqa
 
 # adds shared-variable constructors
 from pytensor.tensor import sharedvar  # noqa
diff --git a/tests/test_gradient.py b/tests/test_gradient.py
index a456f58388..739289c3c4 100644
--- a/tests/test_gradient.py
+++ b/tests/test_gradient.py
@@ -766,48 +766,6 @@ def test_subgraph_grad():
         assert np.sum(np.abs(true_grad - pgrad)) < 0.00001
 
 
-class TestConsiderConstant:
-    def test_op_removed(self):
-        from pytensor.gradient import ConsiderConstant, consider_constant
-
-        x = matrix("x")
-
-        with pytest.deprecated_call():
-            y = x * consider_constant(x)
-
-        f = pytensor.function([x], y)
-
-        assert ConsiderConstant not in [
-            type(node.op) for node in f.maker.fgraph.toposort()
-        ]
-
-    def test_grad(self):
-        from pytensor.gradient import consider_constant
-
-        rng = np.random.default_rng(seed=utt.fetch_seed())
-
-        a = np.asarray(rng.standard_normal((5, 5)), dtype=config.floatX)
-
-        x = matrix("x")
-
-        with pytest.deprecated_call():
-            expressions_gradients = [
-                (x * consider_constant(x), x),
-                (x * consider_constant(exp(x)), exp(x)),
-                (consider_constant(x), at.constant(0.0)),
-                (x**2 * consider_constant(x), 2 * x**2),
-            ]
-
-            for expr, expr_grad in expressions_gradients:
-                g = grad(expr.sum(), x)
-                # gradient according to pytensor
-                f = pytensor.function([x], g, on_unused_input="ignore")
-                # desired gradient
-                f2 = pytensor.function([x], expr_grad, on_unused_input="ignore")
-
-                assert np.allclose(f(a), f2(a))
-
-
 class TestZeroGrad:
     def setup_method(self):
         self.rng = np.random.default_rng(seed=utt.fetch_seed())

From cad47ee551c08a95220403d13a7b07665e93de49 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 13:27:42 +0300
Subject: [PATCH 14/43] remove deprecated stack interface

---
 pytensor/tensor/basic.py   | 52 ++++++++------------------------------
 tests/tensor/test_basic.py |  6 +++--
 2 files changed, 14 insertions(+), 44 deletions(-)

diff --git a/pytensor/tensor/basic.py b/pytensor/tensor/basic.py
index 95734cec4b..1b35771c53 100644
--- a/pytensor/tensor/basic.py
+++ b/pytensor/tensor/basic.py
@@ -7,12 +7,9 @@
 
 import builtins
 import warnings
-from collections.abc import Sequence
 from functools import partial
 from numbers import Number
-from typing import TYPE_CHECKING, Optional
-from typing import Sequence as TypeSequence
-from typing import Tuple, Union
+from typing import TYPE_CHECKING, Optional, Sequence, Tuple, Union
 from typing import cast as type_cast
 
 import numpy as np
@@ -1337,8 +1334,8 @@ def identity_like(x, dtype: Optional[Union[str, np.generic, np.dtype]] = None):
 
 
 def infer_static_shape(
-    shape: Union[Variable, TypeSequence[Union[Variable, int]]]
-) -> Tuple[TypeSequence["TensorLike"], TypeSequence[Optional[int]]]:
+    shape: Union[Variable, Sequence[Union[Variable, int]]]
+) -> Tuple[Sequence["TensorLike"], Sequence[Optional[int]]]:
     """Infer the static shapes implied by the potentially symbolic elements in `shape`.
 
     `shape` will be validated and constant folded.  As a result, this function
@@ -2538,19 +2535,16 @@ def roll(x, shift, axis=None):
     )
 
 
-def stack(*tensors, **kwargs):
+def stack(tensors: Sequence[TensorVariable], axis: int = 0):
     """Stack tensors in sequence on given axis (default is 0).
 
     Take a sequence of tensors and stack them on given axis to make a single
     tensor. The size in dimension `axis` of the result will be equal to the number
     of tensors passed.
 
-    Note: The interface stack(*tensors) is deprecated, you should use
-    stack(tensors, axis=0) instead.
-
     Parameters
     ----------
-    tensors : list or tuple of tensors
+    tensors : Sequence[TensorVariable]
         A list of tensors to be stacked.
     axis : int
         The index of the new axis. Default value is 0.
@@ -2585,35 +2579,9 @@ def stack(*tensors, **kwargs):
     >>> rval.shape # 3 tensors are stacked on axis -2
     (2, 2, 2, 3, 2)
     """
-    # ---> Remove this when moving to the new interface:
-    if not tensors and not kwargs:
-        raise ValueError("No tensor arguments provided")
-
-    if not kwargs and not isinstance(tensors[0], (list, tuple)):
-        warnings.warn(
-            "stack(*tensors) interface is deprecated, use"
-            " stack(tensors, axis=0) instead.",
-            DeprecationWarning,
-            stacklevel=3,
-        )
-        axis = 0
-    elif "tensors" in kwargs:
-        tensors = kwargs["tensors"]
-        if "axis" in kwargs:
-            axis = kwargs["axis"]
-        else:
-            axis = 0
-    else:
-        if len(tensors) == 2:
-            axis = tensors[1]
-        elif "axis" in kwargs:
-            axis = kwargs["axis"]
-        else:
-            axis = 0
-        tensors = tensors[0]
-    # <--- Until here.
-
-    if len(tensors) == 0:
+    if not isinstance(tensors, Sequence):
+        raise TypeError("First argument should be Sequence[TensorVariable]")
+    elif len(tensors) == 0:
         raise ValueError("No tensor arguments provided")
 
     # If all tensors are scalars of the same type, call make_vector.
@@ -3662,8 +3630,8 @@ def swapaxes(y, axis1, axis2):
 
 def moveaxis(
     a: Union[np.ndarray, TensorVariable],
-    source: Union[int, TypeSequence[int]],
-    destination: Union[int, TypeSequence[int]],
+    source: Union[int, Sequence[int]],
+    destination: Union[int, Sequence[int]],
 ) -> TensorVariable:
     """Move axes of a TensorVariable to new positions.
 
diff --git a/tests/tensor/test_basic.py b/tests/tensor/test_basic.py
index 6e60f77fe7..6139089878 100644
--- a/tests/tensor/test_basic.py
+++ b/tests/tensor/test_basic.py
@@ -1332,8 +1332,10 @@ def test_stack_new_interface(self):
         with pytest.raises(IndexError):
             stack([a, b], -4)
 
-        # Testing depreciation warning
-        with pytest.warns(DeprecationWarning):
+        # Testing depreciation warning is now an informative error
+        with pytest.raises(
+            TypeError, match=r"First argument should be Sequence\[TensorVariable\]"
+        ):
             s = stack(a, b)
 
     def test_stack_hessian(self):

From 091933cee35ffd5185173b55eb92490ffaf7a9ce Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 13:34:20 +0300
Subject: [PATCH 15/43] remove deprecated printing api

---
 pytensor/printing.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/pytensor/printing.py b/pytensor/printing.py
index d8c65b8493..e7f9738426 100644
--- a/pytensor/printing.py
+++ b/pytensor/printing.py
@@ -4,7 +4,6 @@
 import logging
 import os
 import sys
-import warnings
 from abc import ABC, abstractmethod
 from contextlib import contextmanager
 from copy import copy
@@ -120,7 +119,6 @@ def debugprint(
     print_destroy_map: bool = False,
     print_view_map: bool = False,
     print_fgraph_inputs: bool = False,
-    ids: Optional[IDTypesType] = None,
 ) -> Union[str, TextIO]:
     r"""Print a graph as text.
 
@@ -193,14 +191,6 @@ def debugprint(
     else:
         _file = file
 
-    if ids is not None:
-        warnings.warn(
-            "`ids` is deprecated; use `id_type` instead.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        id_type = ids
-
     if done is None:
         done = dict()
 

From 1f2e88980276c9354132e2918e95fadfd6dd2e31 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 13:38:06 +0300
Subject: [PATCH 16/43] remove outdated deprecations

---
 pytensor/scalar/basic.py | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/pytensor/scalar/basic.py b/pytensor/scalar/basic.py
index 7a3cd4a640..f428f2528b 100644
--- a/pytensor/scalar/basic.py
+++ b/pytensor/scalar/basic.py
@@ -4449,26 +4449,3 @@ def handle_composite(node, mapping):
 
 
 Compositef32.special[Composite] = handle_composite
-
-
-DEPRECATED_NAMES = [
-    ("Inv", "`Inv` is deprecated; use `Reciprocal` instead.", Reciprocal),
-    ("inv", "`inv` is deprecated; use `reciprocal` instead.", reciprocal),
-    ("Scalar", "`Scalar` is deprecated; use `ScalarType` instead.", ScalarType),
-]
-
-
-def __getattr__(name):
-    """Intercept module-level attribute access of deprecated symbols.
-
-    Adapted from https://stackoverflow.com/a/55139609/3006474.
-
-    """
-    from warnings import warn
-
-    for old_name, msg, old_object in DEPRECATED_NAMES:
-        if name == old_name:
-            warn(msg, DeprecationWarning, stacklevel=2)
-            return old_object
-
-    raise AttributeError(f"module {__name__} has no attribute {name}")

From 629e0bd2de6435ad891acbe3f526156d9cd50245 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 13:49:03 +0300
Subject: [PATCH 17/43] remove deprecated module sparse.opt

---
 pytensor/sparse/opt.py | 10 ----------
 1 file changed, 10 deletions(-)
 delete mode 100644 pytensor/sparse/opt.py

diff --git a/pytensor/sparse/opt.py b/pytensor/sparse/opt.py
deleted file mode 100644
index fe713fe644..0000000000
--- a/pytensor/sparse/opt.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import warnings
-
-
-warnings.warn(
-    "The module `pytensor.sparse.opt` is deprecated; use `pytensor.sparse.rewriting` instead.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-from pytensor.sparse.rewriting import *  # noqa: F401 E402 F403

From 90ec896811de06ba7bb9efe7c959ee0c824f03c5 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 14:18:48 +0300
Subject: [PATCH 18/43] remove deprecated module scan.opt

---
 pytensor/scan/opt.py | 10 ----------
 1 file changed, 10 deletions(-)
 delete mode 100644 pytensor/scan/opt.py

diff --git a/pytensor/scan/opt.py b/pytensor/scan/opt.py
deleted file mode 100644
index fe381d0f44..0000000000
--- a/pytensor/scan/opt.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import warnings
-
-
-warnings.warn(
-    "The module `pytensor.scan.opt` is deprecated; use `pytensor.scan.rewriting` instead.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-from pytensor.scan.rewriting import *  # noqa: F401 E402 F403

From d096c41fc8f2ca592ebc2a52c1330cdf71297fd4 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 14:27:06 +0300
Subject: [PATCH 19/43] remove deprecation exception from scan argumens and
 replace it with an informative ValueError

---
 pytensor/scan/basic.py | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/pytensor/scan/basic.py b/pytensor/scan/basic.py
index 66e80e6fb3..678a3eea96 100644
--- a/pytensor/scan/basic.py
+++ b/pytensor/scan/basic.py
@@ -502,16 +502,6 @@ def wrap_into_list(x):
     # wrap outputs info in a dictionary if they are not already in one
     for i in range(n_outs):
         if outs_info[i] is not None:
-            if isinstance(outs_info[i], dict):
-                if outs_info[i].get("return_steps", None) is not None:
-                    raise DeprecationWarning(
-                        "Using `return_steps` has been deprecated. "
-                        "Simply select the entries you need using a "
-                        "subtensor. Scan will optimize memory "
-                        "consumption, so do not worry about that."
-                    )
-                # END
-
             if not isinstance(outs_info[i], dict):
                 # by default any output has a tap value of -1
                 outs_info[i] = dict([("initial", outs_info[i]), ("taps", [-1])])
@@ -551,6 +541,11 @@ def wrap_into_list(x):
                             ("All the tap values must be smaller than 0."),
                             outs_info[i],
                         )
+            _unexpected_keys = set(outs_info[i]) - {"initial", "taps", "inplace"}
+            if _unexpected_keys:
+                raise ValueError(
+                    f"These keys were unexpected in Scan outputs_info[{i}]: {_unexpected_keys}"
+                )
         else:
             # if a None is provided as the output info we replace it
             # with an empty OrdereDict() to simplify handling

From b1a1dfcc2cbcd162f61443a4a33daf826ecf2e22 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 14:40:02 +0300
Subject: [PATCH 20/43] remove uutdated deprecations from
 pytensor/tensor/math.py

---
 pytensor/tensor/math.py | 25 -------------------------
 1 file changed, 25 deletions(-)

diff --git a/pytensor/tensor/math.py b/pytensor/tensor/math.py
index 0d475009fb..25deba7a72 100644
--- a/pytensor/tensor/math.py
+++ b/pytensor/tensor/math.py
@@ -3133,28 +3133,3 @@ def matmul(x1: "ArrayLike", x2: "ArrayLike", dtype: Optional["DTypeLike"] = None
     "logaddexp",
     "logsumexp",
 ]
-
-DEPRECATED_NAMES = [
-    ("abs_", "`abs_` is deprecated; use `abs` instead.", abs),
-    ("inv", "`inv` is deprecated; use `reciprocal` instead.", reciprocal),
-]
-
-
-def __getattr__(name):
-    """Intercept module-level attribute access of deprecated symbols.
-
-    Adapted from https://stackoverflow.com/a/55139609/3006474.
-
-    """
-    from warnings import warn
-
-    for old_name, msg, old_object in DEPRECATED_NAMES:
-        if name == old_name:
-            warn(msg, DeprecationWarning, stacklevel=2)
-            return old_object
-
-    raise AttributeError(f"module {__name__} has no attribute {name}")
-
-
-def __dir__():
-    return sorted(__all__ + [names[0] for names in DEPRECATED_NAMES])

From 65cd5888b60c355946230c6023ca5a875d0b9612 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 14:44:02 +0300
Subject: [PATCH 21/43] remove uutdated deprecations from
 pytensor/tensor/slinalg.py

---
 doc/library/tensor/slinalg.rst |  4 ----
 pytensor/tensor/slinalg.py     | 38 ----------------------------------
 2 files changed, 42 deletions(-)

diff --git a/doc/library/tensor/slinalg.rst b/doc/library/tensor/slinalg.rst
index 324dca8875..b6a6768ff0 100644
--- a/doc/library/tensor/slinalg.rst
+++ b/doc/library/tensor/slinalg.rst
@@ -20,8 +20,4 @@ API
 
 .. automodule:: pytensor.tensor.slinalg
     :members:
-    :exclude-members: solve, solve_lower_triangular, solve_upper_triangular
 
-.. autofunction:: solve(a, b)
-.. autofunction:: solve_lower_triangular(a, b)
-.. autofunction:: solve_upper_triangular(a, b)
diff --git a/pytensor/tensor/slinalg.py b/pytensor/tensor/slinalg.py
index e967360e80..b268c7522e 100644
--- a/pytensor/tensor/slinalg.py
+++ b/pytensor/tensor/slinalg.py
@@ -835,41 +835,3 @@ def solve_continuous_lyapunov(A: "TensorLike", Q: "TensorLike") -> TensorVariabl
     "kron",
     "expm",
 ]
-
-DEPRECATED_NAMES = [
-    (
-        "solve_lower_triangular",
-        "`solve_lower_triangular` is deprecated; use `solve` instead.",
-        SolveTriangular(lower=True),
-    ),
-    (
-        "solve_upper_triangular",
-        "`solve_upper_triangular` is deprecated; use `solve` instead.",
-        SolveTriangular(lower=False),
-    ),
-    (
-        "solve_symmetric",
-        "`solve_symmetric` is deprecated; use `solve` instead.",
-        Solve(assume_a="sym"),
-    ),
-]
-
-
-def __getattr__(name):
-    """Intercept module-level attribute access of deprecated symbols.
-
-    Adapted from https://stackoverflow.com/a/55139609/3006474.
-
-    """
-    from warnings import warn
-
-    for old_name, msg, old_object in DEPRECATED_NAMES:
-        if name == old_name:
-            warn(msg, DeprecationWarning, stacklevel=2)
-            return old_object
-
-    raise AttributeError(f"module {__name__} has no attribute {name}")
-
-
-def __dir__():
-    return sorted(__all__ + [names[0] for names in DEPRECATED_NAMES])

From eca127dcfe5694653371df3c29e9aafc14a39b6d Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 15:06:22 +0300
Subject: [PATCH 22/43] remove outdated deprecations from
 pytensor/tensor/rewriting/basic.py

---
 pytensor/tensor/rewriting/basic.py | 31 ------------------------------
 1 file changed, 31 deletions(-)

diff --git a/pytensor/tensor/rewriting/basic.py b/pytensor/tensor/rewriting/basic.py
index 87639afccc..c39c20cc72 100644
--- a/pytensor/tensor/rewriting/basic.py
+++ b/pytensor/tensor/rewriting/basic.py
@@ -1267,35 +1267,4 @@ def local_useless_topk(fgraph, node):
     return {old_output: new_output}
 
 
-def import_ShapeFeature():
-    from pytensor.tensor.rewriting.shape import ShapeFeature
-
-    return ShapeFeature
-
-
-DEPRECATED_NAMES = {
-    "ShapeFeature": (
-        "`ShapeFeature` is now located in `pytensor.tensor.rewriting.shape`.",
-        import_ShapeFeature,
-    ),
-}
-
-
-def __getattr__(name):
-    """Intercept module-level attribute access of deprecated symbols.
-
-    Adapted from https://stackoverflow.com/a/55139609/3006474.
-
-    """
-    from warnings import warn
-
-    res = DEPRECATED_NAMES.get(name)
-    if res:
-        msg, fn = res
-        warn(msg, DeprecationWarning, stacklevel=2)
-        return fn()
-
-    raise AttributeError(f"module {__name__} has no attribute {name}")
-
-
 register_canonicalize(RemovalNodeRewriter(tensor_copy), name="remove_tensor_copy")

From 3e6efb4fd13342210329da5fa432d712b5c1d69f Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 15:07:52 +0300
Subject: [PATCH 23/43] remove outdated deprecations from
 pytensor/graph/rewriting/basic.py

---
 pytensor/graph/rewriting/basic.py | 141 ------------------------------
 1 file changed, 141 deletions(-)

diff --git a/pytensor/graph/rewriting/basic.py b/pytensor/graph/rewriting/basic.py
index ae246ce8cd..6d93a3c694 100644
--- a/pytensor/graph/rewriting/basic.py
+++ b/pytensor/graph/rewriting/basic.py
@@ -110,14 +110,6 @@ def apply(self, fgraph):
         """
         raise NotImplementedError()
 
-    def optimize(self, *args, **kwargs):
-        warnings.warn(
-            "`GraphRewriter.optimize` is deprecated; use `GraphRewriter.rewrite` instead.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        self.rewrite(*args, **kwargs)
-
     def rewrite(self, fgraph, *args, **kwargs):
         """
 
@@ -2306,14 +2298,6 @@ def __init__(
     def get_node_rewriters(self):
         yield from self.node_tracker.get_rewriters()
 
-    def get_local_optimizers(self):
-        warnings.warn(
-            "`get_local_optimizers` is deprecated; use `get_node_rewriters` instead.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        yield from self.get_node_rewriters()
-
     def add_requirements(self, fgraph):
         super().add_requirements(fgraph)
         for rewriter in self.get_node_rewriters():
@@ -3137,128 +3121,3 @@ def add_requirements(self, fgraph):
 
     def apply(self, fgraph):
         pass
-
-
-DEPRECATED_NAMES = [
-    (
-        "LocalMetaOptimizerSkipAssertionError",
-        "`LocalMetaOptimizerSkipAssertionError` is deprecated: use `MetaNodeRewriterSkip` instead.",
-        MetaNodeRewriterSkip,
-    ),
-    (
-        "GlobalOptimizer",
-        "`GlobalOptimizer` is deprecated: use `GraphRewriter` instead.",
-        GraphRewriter,
-    ),
-    (
-        "LocalOptimizer",
-        "`LocalOptimizer` is deprecated: use `NodeRewriter` instead.",
-        NodeRewriter,
-    ),
-    (
-        "local_optimizer",
-        "`local_optimizer` is deprecated: use `node_rewriter` instead.",
-        node_rewriter,
-    ),
-    (
-        "pre_greedy_local_optimizer",
-        "`pre_greedy_local_optimizer` is deprecated: use `pre_greedy_node_rewriter` instead.",
-        pre_greedy_node_rewriter,
-    ),
-    (
-        "FromFunctionOptimizer",
-        "`FromFunctionOptimizer` is deprecated: use `FromFunctionGraphRewriter` instead.",
-        FromFunctionGraphRewriter,
-    ),
-    (
-        "optimizer",
-        "`optimizer` is deprecated: use `graph_rewriter` instead.",
-        graph_rewriter,
-    ),
-    (
-        "inplace_optimizer",
-        "`inplace_optimizer` is deprecated: use `graph_rewriter` instead.",
-        graph_rewriter,
-    ),
-    (
-        "LocalMetaOptimizer",
-        "`LocalMetaOptimizer` is deprecated: use `MetaNodeRewriter` instead.",
-        MetaNodeRewriter,
-    ),
-    (
-        "SeqOptimizer",
-        "`SeqOptimizer` is deprecated: use `SequentialGraphRewriter` instead.",
-        SequentialGraphRewriter,
-    ),
-    (
-        "FromFunctionLocalOptimizer",
-        "`FromFunctionLocalOptimizer` is deprecated: use `FromFunctionNodeRewriter` instead.",
-        FromFunctionNodeRewriter,
-    ),
-    (
-        "LocalOptTracker",
-        "`LocalOptTracker` is deprecated: use `OpToRewriterTracker` instead.",
-        OpToRewriterTracker,
-    ),
-    (
-        "LocalOptGroup",
-        "`LocalOptGroup` is deprecated: use `SequentialNodeRewriter` instead.",
-        SequentialNodeRewriter,
-    ),
-    (
-        "OpSub",
-        "`OpSub` is deprecated: use `SubstitutionNodeRewriter` instead.",
-        SubstitutionNodeRewriter,
-    ),
-    (
-        "OpRemove",
-        "`OpRemove` is deprecated: use `RemovalNodeRewriter` instead.",
-        RemovalNodeRewriter,
-    ),
-    (
-        "PatternSub",
-        "`PatternSub` is deprecated: use `PatternNodeRewriter` instead.",
-        PatternNodeRewriter,
-    ),
-    (
-        "NavigatorOptimizer",
-        "`NavigatorOptimizer` is deprecated: use `NodeProcessingGraphRewriter` instead.",
-        NodeProcessingGraphRewriter,
-    ),
-    (
-        "TopoOptimizer",
-        "`TopoOptimizer` is deprecated: use `WalkingGraphRewriter` instead.",
-        WalkingGraphRewriter,
-    ),
-    (
-        "topogroup_optimizer",
-        "`topogroup_optimizer` is deprecated: use `walking_rewriter` instead.",
-        walking_rewriter,
-    ),
-    (
-        "OpKeyOptimizer",
-        "`OpKeyOptimizer` is deprecated: use `OpKeyGraphRewriter` instead.",
-        OpKeyGraphRewriter,
-    ),
-    (
-        "EquilibriumOptimizer",
-        "`EquilibriumOptimizer` is deprecated: use `EquilibriumGraphRewriter` instead.",
-        EquilibriumGraphRewriter,
-    ),
-]
-
-
-def __getattr__(name):
-    """Intercept module-level attribute access of deprecated symbols.
-
-    Adapted from https://stackoverflow.com/a/55139609/3006474.
-
-    """
-    from warnings import warn
-
-    for old_name, msg, old_object in DEPRECATED_NAMES:
-        if name == old_name:
-            warn(msg, DeprecationWarning, stacklevel=2)
-            return old_object
-
-    raise AttributeError(f"module {__name__} has no attribute {name}")

From 0c8e968756680137ebe9b1e60f67a699bf902f3c Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 15:10:07 +0300
Subject: [PATCH 24/43] remove outdated deprecations from
 pytensor/graph/rewriting/db.py

---
 pytensor/graph/rewriting/db.py | 40 ----------------------------------
 1 file changed, 40 deletions(-)

diff --git a/pytensor/graph/rewriting/db.py b/pytensor/graph/rewriting/db.py
index 45d13577ca..e9605acb79 100644
--- a/pytensor/graph/rewriting/db.py
+++ b/pytensor/graph/rewriting/db.py
@@ -538,43 +538,3 @@ def __init__(self, db):
 
     def query(self, *tags, **kwtags):
         return self.db.query(*tags, **kwtags)
-
-
-DEPRECATED_NAMES = [
-    (
-        "DB",
-        "`DB` is deprecated; use `RewriteDatabase` instead.",
-        RewriteDatabase,
-    ),
-    (
-        "Query",
-        "`Query` is deprecated; use `RewriteDatabaseQuery` instead.",
-        RewriteDatabaseQuery,
-    ),
-    (
-        "OptimizationDatabase",
-        "`OptimizationDatabase` is deprecated; use `RewriteDatabase` instead.",
-        RewriteDatabase,
-    ),
-    (
-        "OptimizationQuery",
-        "`OptimizationQuery` is deprecated; use `RewriteDatabaseQuery` instead.",
-        RewriteDatabaseQuery,
-    ),
-]
-
-
-def __getattr__(name):
-    """Intercept module-level attribute access of deprecated symbols.
-
-    Adapted from https://stackoverflow.com/a/55139609/3006474.
-
-    """
-    from warnings import warn
-
-    for old_name, msg, old_object in DEPRECATED_NAMES:
-        if name == old_name:
-            warn(msg, DeprecationWarning, stacklevel=2)
-            return old_object
-
-    raise AttributeError(f"module {__name__} has no attribute {name}")

From 8de0ed5850d7a318d545a3b657c2cd5ea72d59f8 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 15:17:33 +0300
Subject: [PATCH 25/43] remove outdated deprecations from
 pytensor/graph/rewriting/utils.py

---
 pytensor/graph/rewriting/utils.py | 35 -------------------------------
 1 file changed, 35 deletions(-)

diff --git a/pytensor/graph/rewriting/utils.py b/pytensor/graph/rewriting/utils.py
index e0d46e42bf..8bf8de87bb 100644
--- a/pytensor/graph/rewriting/utils.py
+++ b/pytensor/graph/rewriting/utils.py
@@ -1,5 +1,4 @@
 import copy
-import warnings
 from typing import TYPE_CHECKING, Generator, Optional, Sequence, Union, cast
 
 import pytensor
@@ -23,7 +22,6 @@ def rewrite_graph(
     include: Sequence[str] = ("canonicalize",),
     custom_rewrite: Optional["GraphRewriter"] = None,
     clone: bool = False,
-    custom_opt: Optional["GraphRewriter"] = None,
     **kwargs,
 ) -> Union[Variable, Sequence[Variable], FunctionGraph]:
     """Easily apply rewrites to a graph.
@@ -62,14 +60,6 @@ def rewrite_graph(
     query_rewrites = optdb.query(RewriteDatabaseQuery(include=include, **kwargs))
     _ = query_rewrites.rewrite(fgraph)
 
-    if custom_opt is not None:
-        warnings.warn(
-            "`custom_opt` is deprecated; use `custom_rewrite` instead.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        custom_rewrite = custom_opt
-
     if custom_rewrite:
         custom_rewrite.rewrite(fgraph)
 
@@ -248,28 +238,3 @@ def get_clients_at_depth(
         else:
             assert var.owner is not None
             yield var.owner
-
-
-DEPRECATED_NAMES = [
-    (
-        "optimize_graph",
-        "`optimize_graph` is deprecated: use `rewrite_graph` instead.",
-        rewrite_graph,
-    ),
-]
-
-
-def __getattr__(name):
-    """Intercept module-level attribute access of deprecated symbols.
-
-    Adapted from https://stackoverflow.com/a/55139609/3006474.
-
-    """
-    from warnings import warn
-
-    for old_name, msg, old_object in DEPRECATED_NAMES:
-        if name == old_name:
-            warn(msg, DeprecationWarning, stacklevel=2)
-            return old_object
-
-    raise AttributeError(f"module {__name__} has no attribute {name}")

From 9c6653ecf5e1f510c321031ac9babf37ef10892b Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 15:19:59 +0300
Subject: [PATCH 26/43] remove deprecated module pytensor/tensor/random/opt.py

---
 pytensor/tensor/random/opt.py | 10 ----------
 1 file changed, 10 deletions(-)
 delete mode 100644 pytensor/tensor/random/opt.py

diff --git a/pytensor/tensor/random/opt.py b/pytensor/tensor/random/opt.py
deleted file mode 100644
index d8ce2b7b87..0000000000
--- a/pytensor/tensor/random/opt.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import warnings
-
-
-warnings.warn(
-    "The module `pytensor.tensor.random.opt` is deprecated; use `pytensor.tensor.random.rewriting` instead.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-from pytensor.tensor.random.rewriting import *  # noqa: F401 E402 F403

From f6ec71b258c68deae8caff8641c845f95dc14f72 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 15:21:26 +0300
Subject: [PATCH 27/43] remove deprecated module
 pytensor/link/jax/jax_dispatch.py

---
 pytensor/link/jax/jax_dispatch.py | 9 ---------
 1 file changed, 9 deletions(-)
 delete mode 100644 pytensor/link/jax/jax_dispatch.py

diff --git a/pytensor/link/jax/jax_dispatch.py b/pytensor/link/jax/jax_dispatch.py
deleted file mode 100644
index 6dc40f6acb..0000000000
--- a/pytensor/link/jax/jax_dispatch.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import warnings
-
-
-warnings.warn(
-    "The module `pytensor.link.jax.jax_dispatch` is deprecated "
-    "and has been renamed to `pytensor.link.jax.dispatch`",
-    DeprecationWarning,
-    stacklevel=2,
-)

From 0b92f87489c12b6d0f38e4794c0424810284bd0f Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 15:22:05 +0300
Subject: [PATCH 28/43] remove deprecated module
 pytensor/link/jax/jax_linker.py

---
 pytensor/link/jax/jax_linker.py | 9 ---------
 1 file changed, 9 deletions(-)
 delete mode 100644 pytensor/link/jax/jax_linker.py

diff --git a/pytensor/link/jax/jax_linker.py b/pytensor/link/jax/jax_linker.py
deleted file mode 100644
index 0405f031a2..0000000000
--- a/pytensor/link/jax/jax_linker.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import warnings
-
-
-warnings.warn(
-    "The module `pytensor.link.jax.jax_linker` is deprecated "
-    "and has been renamed to `pytensor.link.jax.linker`",
-    DeprecationWarning,
-    stacklevel=2,
-)

From efe448c6d13843bafb6bcb3a8bb2f09b55cbe496 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 15:23:13 +0300
Subject: [PATCH 29/43] remove deprecated pytensoor.change_flags

---
 pytensor/__init__.py | 24 ------------------------
 1 file changed, 24 deletions(-)

diff --git a/pytensor/__init__.py b/pytensor/__init__.py
index 2319b98698..8cd4bcd972 100644
--- a/pytensor/__init__.py
+++ b/pytensor/__init__.py
@@ -171,27 +171,3 @@ def get_scalar_constant_value(v):
 # imports were executed, we can warn about remaining flags provided by the user
 # through PYTENSOR_FLAGS.
 config.warn_unused_flags()
-
-DEPRECATED_NAMES = [
-    (
-        "change_flags",
-        "`pytensor.change_flags` is deprecated: use `pytensor.config.change_flags` instead.",
-        config.change_flags,
-    ),
-]
-
-
-def __getattr__(name):
-    """Intercept module-level attribute access of deprecated symbols.
-
-    Adapted from https://stackoverflow.com/a/55139609/3006474.
-
-    """
-    from warnings import warn
-
-    for old_name, msg, old_object in DEPRECATED_NAMES:
-        if name == old_name:
-            warn(msg, DeprecationWarning, stacklevel=2)
-            return old_object
-
-    raise AttributeError(f"module {__name__} has no attribute {name}")

From bac999daef84e652ab43548158b2a44acf83f024 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 15:24:41 +0300
Subject: [PATCH 30/43] remove test for a scipy deprecated submodule

---
 tests/tensor/test_math_scipy.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tests/tensor/test_math_scipy.py b/tests/tensor/test_math_scipy.py
index 7e7d200fd3..6d1b0ad576 100644
--- a/tests/tensor/test_math_scipy.py
+++ b/tests/tensor/test_math_scipy.py
@@ -724,12 +724,6 @@ def expected_log1mexp(x):
     inplace=True,
 )
 
-
-def test_deprecated_module():
-    with pytest.warns(DeprecationWarning):
-        import pytensor.scalar.basic_scipy  # noqa: F401
-
-
 _good_broadcast_ternary_betainc = dict(
     normal=(
         random_ranged(0, 1000, (2, 3)),

From b007acaa546f8885e59b6595fc832c900d6d0254 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 15:33:29 +0300
Subject: [PATCH 31/43] remove deprecation utility, favor pydeprecate
 (https://pypi.org/project/pyDeprecate/)

---
 environment.yml   |  2 ++
 pytensor/utils.py | 44 +-------------------------------------------
 2 files changed, 3 insertions(+), 43 deletions(-)

diff --git a/environment.yml b/environment.yml
index ddcc6f41a2..639eedc617 100644
--- a/environment.yml
+++ b/environment.yml
@@ -16,6 +16,7 @@ dependencies:
   - logical-unification
   - miniKanren
   - cons
+  - pydeprecate
   # Intel BLAS
   - mkl
   - mkl-service
@@ -49,3 +50,4 @@ dependencies:
   # optional
   - sympy
   - cython
+
diff --git a/pytensor/utils.py b/pytensor/utils.py
index 5af7f673c6..fdbba34c34 100644
--- a/pytensor/utils.py
+++ b/pytensor/utils.py
@@ -1,17 +1,14 @@
 """Utility functions that only depend on the standard library."""
 
 import hashlib
-import inspect
 import logging
 import os
 import struct
 import subprocess
 import sys
-import traceback
-import warnings
 from collections import OrderedDict
 from collections.abc import Callable
-from functools import partial, wraps
+from functools import partial
 from typing import List, Set
 
 
@@ -19,7 +16,6 @@
     "get_unbound_function",
     "maybe_add_to_os_environ_pathlist",
     "DefaultOrderedDict",
-    "deprecated",
     "subprocess_Popen",
     "call_subprocess_Popen",
     "output_subprocess_Popen",
@@ -140,44 +136,6 @@ def maybe_add_to_os_environ_pathlist(var, newpath):
             pass
 
 
-def deprecated(message: str = ""):
-    """
-    This is a decorator which can be used to mark functions
-    as deprecated. It will result in a warning being emitted
-    when the function is used first time and filter is set for show DeprecationWarning.
-
-    Taken from https://stackoverflow.com/a/40899499/4473230
-    """
-
-    def decorator_wrapper(func):
-        @wraps(func)
-        def function_wrapper(*args, **kwargs):
-            nonlocal message
-
-            current_call_source = "|".join(
-                traceback.format_stack(inspect.currentframe())
-            )
-            if current_call_source not in function_wrapper.last_call_source:
-
-                if not message:
-                    message = f"Function {func.__name__} is deprecated."
-
-                warnings.warn(
-                    message,
-                    category=DeprecationWarning,
-                    stacklevel=2,
-                )
-                function_wrapper.last_call_source.add(current_call_source)
-
-            return func(*args, **kwargs)
-
-        function_wrapper.last_call_source = set()
-
-        return function_wrapper
-
-    return decorator_wrapper
-
-
 def subprocess_Popen(command, **params):
     """
     Utility function to work around windows behavior that open windows.

From ac2694bd90d8e56969075f8b7ae80e1cb927bc3d Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 19:38:19 +0300
Subject: [PATCH 32/43] manual curation of docs to eliminate 90% of warnings

---
 doc/conf.py                         |  3 ++-
 doc/environment.yml                 |  2 +-
 doc/extending/type.rst              |  4 +--
 doc/extending/using_params.rst      |  1 -
 doc/library/compile/io.rst          |  4 +--
 doc/library/compile/opfromgraph.rst |  4 +--
 doc/library/graph/index.rst         |  1 -
 doc/library/graph/params_type.rst   | 16 ------------
 doc/library/index.rst               |  1 -
 doc/library/sandbox/index.rst       |  1 -
 doc/library/tensor/basic.rst        |  7 ++----
 doc/sandbox/sparse.rst              |  2 +-
 doc/troubleshooting.rst             |  6 ++---
 doc/tutorial/profiling.rst          |  7 +++---
 environment.yml                     |  2 +-
 pytensor/graph/op.py                |  2 ++
 pytensor/graph/rewriting/basic.py   |  2 +-
 pytensor/misc/pkl_utils.py          |  3 ++-
 pytensor/tensor/elemwise.py         |  2 +-
 pytensor/tensor/math.py             |  8 +++---
 pytensor/tensor/rewriting/math.py   |  2 +-
 pytensor/tensor/var.py              | 38 ++++++++++++++---------------
 22 files changed, 50 insertions(+), 68 deletions(-)
 delete mode 100644 doc/library/graph/params_type.rst

diff --git a/doc/conf.py b/doc/conf.py
index 2d186b1968..79f2fd3d47 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -113,7 +113,8 @@
 
 # html4_writer added to Fix colon & whitespace misalignment
 # https://github.com/readthedocs/sphinx_rtd_theme/issues/766#issuecomment-513852197
-html4_writer = True
+# https://github.com/readthedocs/sphinx_rtd_theme/issues/766#issuecomment-629666319
+# html4_writer = False
 
 html_logo = "images/pytensor_logo.svg"
 html_theme = "pymc_sphinx_theme"
diff --git a/doc/environment.yml b/doc/environment.yml
index 7d7b34c5ac..09378ea657 100644
--- a/doc/environment.yml
+++ b/doc/environment.yml
@@ -9,7 +9,7 @@ dependencies:
   - numpy
   - scipy
   - six
-  - sphinx>=3
+  - sphinx>=5.1.0
   - mock
   - pillow
   - pip
diff --git a/doc/extending/type.rst b/doc/extending/type.rst
index ab306e8016..d9542038b0 100644
--- a/doc/extending/type.rst
+++ b/doc/extending/type.rst
@@ -324,8 +324,8 @@ For certain mechanisms, you can register functions and other such
 things to plus your type into pytensor's mechanisms.  These are optional
 but will allow people to use you type with familiar interfaces.
 
-`transfer`
-~~~~~~~~~~
+**`transfer`**
+
 
 To plug in additional options for the transfer target, define a
 function which takes an PyTensor variable and a target argument and
diff --git a/doc/extending/using_params.rst b/doc/extending/using_params.rst
index 500c463aed..9aae7ef983 100644
--- a/doc/extending/using_params.rst
+++ b/doc/extending/using_params.rst
@@ -76,7 +76,6 @@ attribute :attr:`params_type` to an instance of your params Type.
    If you want to have multiple parameters, PyTensor provides the convenient class
    :class:`pytensor.link.c.params_type.ParamsType` that allows to bundle many parameters into
    one object that will be available in both Python (as a Python object) and C code (as a struct).
-   See :ref:`ParamsType tutorial and API documentation <libdoc_graph_params_type>` for more infos.
 
 For example if we decide to use an int as the params the following
 would be appropriate:
diff --git a/doc/library/compile/io.rst b/doc/library/compile/io.rst
index 165838622b..406ac89548 100644
--- a/doc/library/compile/io.rst
+++ b/doc/library/compile/io.rst
@@ -6,9 +6,9 @@
 
 .. _function_inputs:
 
-===========================================
+============================================
 :mod:`io` - defines pytensor.function [TODO]
-===========================================
+============================================
 
 .. module:: pytensor.compile.io
    :platform: Unix, Windows
diff --git a/doc/library/compile/opfromgraph.rst b/doc/library/compile/opfromgraph.rst
index f468c85a8f..e2407f3fba 100644
--- a/doc/library/compile/opfromgraph.rst
+++ b/doc/library/compile/opfromgraph.rst
@@ -2,9 +2,9 @@
 
 .. _opfromgraph:
 
-============
+=============
 `OpFromGraph`
-============
+=============
 
 This page describes :class:`pytensor.compile.builders.OpFromGraph
 <pytensor.compile.builders.OpFromGraph>`, an `Op` constructor that allows one to
diff --git a/doc/library/graph/index.rst b/doc/library/graph/index.rst
index 7aea11f794..1328d193fd 100644
--- a/doc/library/graph/index.rst
+++ b/doc/library/graph/index.rst
@@ -18,5 +18,4 @@
     features
     op
     type
-    params_type
     utils
diff --git a/doc/library/graph/params_type.rst b/doc/library/graph/params_type.rst
deleted file mode 100644
index c0233ff9e1..0000000000
--- a/doc/library/graph/params_type.rst
+++ /dev/null
@@ -1,16 +0,0 @@
-.. _libdoc_graph_params_type:
-
-=======================================================================
-:mod:`pytensor.graph.params_type` -- Wrapper class for :class:`Op` params
-=======================================================================
-
----------
-Reference
----------
-
-.. automodule:: pytensor.graph.params_type
-   :platform: Unix, Windows
-   :synopsis: Wrapper class for op params
-   :members:
-   :member-order: bysource
-.. moduleauthor:: LISA
diff --git a/doc/library/index.rst b/doc/library/index.rst
index 3eb3f5b7f7..6a05a5a7bf 100644
--- a/doc/library/index.rst
+++ b/doc/library/index.rst
@@ -19,7 +19,6 @@ Modules
    config
    d3viz/index
    graph/index
-   gpuarray/index
    gradient
    misc/pkl_utils
    printing
diff --git a/doc/library/sandbox/index.rst b/doc/library/sandbox/index.rst
index f8f742de3a..b4012cd9df 100644
--- a/doc/library/sandbox/index.rst
+++ b/doc/library/sandbox/index.rst
@@ -14,4 +14,3 @@
     :maxdepth: 1
 
     linalg
-    neighbours
diff --git a/doc/library/tensor/basic.rst b/doc/library/tensor/basic.rst
index 5ad7601100..a386de0032 100644
--- a/doc/library/tensor/basic.rst
+++ b/doc/library/tensor/basic.rst
@@ -557,6 +557,7 @@ them perfectly, but a `dscalar` otherwise.
     .. method:: astype(dtype)
     .. method:: take(indices, axis=None, mode='raise')
     .. method:: copy()
+       :noindex:
 
         Return a new symbolic variable that is a copy of the variable. Does not copy the tag.
 
@@ -667,11 +668,7 @@ dimensions, see :meth:`_tensor_py_operators.dimshuffle`.
     >>> pytensor.tensor.shape_padaxis(tensor, axis=-1)
     InplaceDimShuffle{0,1,2,x}.0
 
-.. autofunction:: unbroadcast(x, *axes)
-
-.. autofunction:: addbroadcast(x, *axes)
-
-.. autofunction:: patternbroadcast(x, broadcastable)
+.. autofunction:: specify_shape(x, shape)
 
 .. function:: flatten(x, ndim=1)
 
diff --git a/doc/sandbox/sparse.rst b/doc/sandbox/sparse.rst
index 04bc0d781f..27ccb8c449 100644
--- a/doc/sandbox/sparse.rst
+++ b/doc/sandbox/sparse.rst
@@ -120,7 +120,7 @@ Misc
 The sparse equivalent of `dmatrix` is `csc_matrix` and `csr_matrix`.
 
 :class:`~pytensor.sparse.basic.Dot` vs. :class:`~pytensor.sparse.basic.StructuredDot`
----------------------------------------------------------------------------------
+-------------------------------------------------------------------------------------
 
 Often when you use a sparse matrix it is because there is a meaning to the
 structure of non-zeros. The gradient on terms outside that structure
diff --git a/doc/troubleshooting.rst b/doc/troubleshooting.rst
index b5e1f2c809..26d2a1ffc2 100644
--- a/doc/troubleshooting.rst
+++ b/doc/troubleshooting.rst
@@ -16,7 +16,7 @@ Here are Linux troubleshooting instructions. There is a specific `MacOS`_ sectio
 .. _network_error_proxy:
 
 Why do I get a network error when I install PyTensor
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 If you are behind a proxy, you must do some extra configuration steps
 before starting the installation. You must set the environment
@@ -69,7 +69,7 @@ large enough.
 .. _float64_output:
 
 pytensor.function returns a float64 when the inputs are float32 and int{32, 64}
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 It should be noted that using float32 and int{32, 64} together
 inside a function would provide float64 as output.
@@ -80,7 +80,7 @@ To help you find where float64 are created, see the
 .. _test_pytensor:
 
 How to test that PyTensor works properly
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 An easy way to check something that could be wrong is by making sure ``PYTENSOR_FLAGS``
 have the desired values as well as the ``~/.pytensorrc``
diff --git a/doc/tutorial/profiling.rst b/doc/tutorial/profiling.rst
index 96f70709ac..bbd9c83ec8 100644
--- a/doc/tutorial/profiling.rst
+++ b/doc/tutorial/profiling.rst
@@ -27,9 +27,10 @@ of the following two options:
 2. Pass the argument :attr:`profile=True` to the function :func:`pytensor.function
    <function.function>` and then call :attr:`f.profile.summary()` for a single
    function.
-    - Use this option when you want to profile not all the
-      functions but only one or more specific function(s).
-    - You can also combine the profile results of many functions:
+
+   - Use this option when you want to profile not all the
+     functions but only one or more specific function(s).
+   - You can also combine the profile results of many functions:
 
       .. doctest::
           :hide:
diff --git a/environment.yml b/environment.yml
index 639eedc617..10214afdb8 100644
--- a/environment.yml
+++ b/environment.yml
@@ -31,7 +31,7 @@ dependencies:
   - pytest-cov
   - pytest-xdist
   # For building docs
-  - sphinx>=1.3
+  - sphinx>=5.1.0
   - sphinx_rtd_theme
   - pygments
   - pydot
diff --git a/pytensor/graph/op.py b/pytensor/graph/op.py
index b45c3a0169..10f1057e37 100644
--- a/pytensor/graph/op.py
+++ b/pytensor/graph/op.py
@@ -356,6 +356,8 @@ def grad(
         grads
             The gradients with respect to each `Variable` in `inputs`.
 
+        References
+        ----------
         .. [1] Giles, Mike. 2008. “An Extended Collection of Matrix Derivative Results for Forward and Reverse Mode Automatic Differentiation.”
 
         """
diff --git a/pytensor/graph/rewriting/basic.py b/pytensor/graph/rewriting/basic.py
index 6d93a3c694..1af3ff743f 100644
--- a/pytensor/graph/rewriting/basic.py
+++ b/pytensor/graph/rewriting/basic.py
@@ -168,7 +168,7 @@ def transform(
         - ``False`` to indicate that this rewrite cannot be applied to `node`
         - A list of `Variable`\s to use in place of the `node`'s current outputs
         - A ``dict`` mapping old `Variable`\s to `Variable`\s, or the key
-        ``"remove"`` mapping to a list of `Variable`\s to be removed.
+            ``"remove"`` mapping to a list of `Variable`\s to be removed.
 
         Parameters
         ----------
diff --git a/pytensor/misc/pkl_utils.py b/pytensor/misc/pkl_utils.py
index 322619345c..84a4ca93e4 100644
--- a/pytensor/misc/pkl_utils.py
+++ b/pytensor/misc/pkl_utils.py
@@ -47,6 +47,8 @@ class StripPickler(Pickler):
     Example
     -------
 
+    ..code-block:: python
+
         fn_args = dict(inputs=inputs,
                        outputs=outputs,
                        updates=updates)
@@ -54,7 +56,6 @@ class StripPickler(Pickler):
         with open(dest_pkl, 'wb') as f:
             strip_pickler = StripPickler(f, protocol=-1)
             strip_pickler.dump(fn_args)
-
     """
 
     def __init__(self, file, protocol=0, extra_tag_to_remove=None):
diff --git a/pytensor/tensor/elemwise.py b/pytensor/tensor/elemwise.py
index 0db7871892..4ac1ffdd33 100644
--- a/pytensor/tensor/elemwise.py
+++ b/pytensor/tensor/elemwise.py
@@ -1758,7 +1758,7 @@ def construct(symbol):
             )
 
         if getattr(symbol, "__doc__"):
-            rval.__doc__ = symbol.__doc__ + "\n" + rval.__doc__
+            rval.__doc__ = symbol.__doc__ + "\n\n    " + rval.__doc__
 
         # for the meaning of this see the ./epydoc script
         # it makes epydoc display rval as if it were a function, not an object
diff --git a/pytensor/tensor/math.py b/pytensor/tensor/math.py
index 25deba7a72..d448a32028 100644
--- a/pytensor/tensor/math.py
+++ b/pytensor/tensor/math.py
@@ -1442,18 +1442,18 @@ def betainc(a, b, x):
 
 @scalar_elemwise
 def real(z):
-    """Return real component of complex-valued tensor `z`"""
+    """Return real component of complex-valued tensor `z`."""
 
 
-_tensor_py_operators.real = property(real)
+_tensor_py_operators.real = property(real, doc=real.__doc__)
 
 
 @scalar_elemwise
 def imag(z):
-    """Return imaginary component of complex-valued tensor `z`"""
+    """Return imaginary component of complex-valued tensor `z`."""
 
 
-_tensor_py_operators.imag = property(imag)
+_tensor_py_operators.imag = property(imag, doc=imag.__doc__)
 
 
 @scalar_elemwise
diff --git a/pytensor/tensor/rewriting/math.py b/pytensor/tensor/rewriting/math.py
index 644b9f56c0..590625445f 100644
--- a/pytensor/tensor/rewriting/math.py
+++ b/pytensor/tensor/rewriting/math.py
@@ -1,4 +1,4 @@
-r"""Rewrites for the `Op`\s in `pytensor.tensor.math`."""
+r"""Rewrites for the `Op`\s in :mod:`pytensor.tensor.math`."""
 
 import itertools
 import operator
diff --git a/pytensor/tensor/var.py b/pytensor/tensor/var.py
index 9d3ce67e80..1d74955237 100644
--- a/pytensor/tensor/var.py
+++ b/pytensor/tensor/var.py
@@ -654,13 +654,13 @@ def __rdot__(right, left):
     __rmatmul__ = __rdot__
 
     def sum(self, axis=None, dtype=None, keepdims=False, acc_dtype=None):
-        """See `pytensor.tensor.math.sum`."""
+        """See :func:`pytensor.tensor.math.sum`."""
         return at.math.sum(
             self, axis=axis, dtype=dtype, keepdims=keepdims, acc_dtype=acc_dtype
         )
 
     def prod(self, axis=None, dtype=None, keepdims=False, acc_dtype=None):
-        """See `pytensor.tensor.math.prod`."""
+        """See :func:`pytensor.tensor.math.prod`."""
         return at.math.prod(
             self, axis=axis, dtype=dtype, keepdims=keepdims, acc_dtype=acc_dtype
         )
@@ -681,73 +681,73 @@ def norm(self, L, axis=None, keepdims=False):
             return y
 
     def mean(self, axis=None, dtype=None, keepdims=False, acc_dtype=None):
-        """See `pytensor.tensor.math.mean`."""
+        """See :func:`pytensor.tensor.math.mean`."""
         return at.math.mean(
             self, axis=axis, dtype=dtype, keepdims=keepdims, acc_dtype=acc_dtype
         )
 
     def var(self, axis=None, ddof=0, keepdims=False, corrected=False):
-        """See `pytensor.tensor.math.var`."""
+        """See :func:`pytensor.tensor.math.var`."""
         return at.math.var(
             self, axis=axis, ddof=ddof, keepdims=keepdims, corrected=corrected
         )
 
     def std(self, axis=None, ddof=0, keepdims=False, corrected=False):
-        """See `pytensor.tensor.math.std`."""
+        """See :func:`pytensor.tensor.math.std`."""
         return at.math.std(
             self, axis=axis, ddof=ddof, keepdims=keepdims, corrected=corrected
         )
 
     def min(self, axis=None, keepdims=False):
-        """See `pytensor.tensor.math.min`."""
+        """See :func:`pytensor.tensor.math.min`."""
         return at.math.min(self, axis, keepdims=keepdims)
 
     def max(self, axis=None, keepdims=False):
-        """See `pytensor.tensor.math.max`."""
+        """See :func:`pytensor.tensor.math.max`."""
         return at.math.max(self, axis, keepdims=keepdims)
 
     def argmin(self, axis=None, keepdims=False):
-        """See `pytensor.tensor.math.argmin`."""
+        """See :func:`pytensor.tensor.math.argmin`."""
         return at.math.argmin(self, axis, keepdims=keepdims)
 
     def argmax(self, axis=None, keepdims=False):
-        """See `pytensor.tensor.math.argmax`."""
+        """See :func:`pytensor.tensor.math.argmax`."""
         return at.math.argmax(self, axis, keepdims=keepdims)
 
     def nonzero(self, return_matrix=False):
-        """See `pytensor.tensor.basic.nonzero`."""
+        """See :func:`pytensor.tensor.basic.nonzero`."""
         return at.nonzero(self, return_matrix=return_matrix)
 
     def nonzero_values(self):
-        """See `pytensor.tensor.basic.nonzero_values`."""
+        """See :func:`pytensor.tensor.basic.nonzero_values`."""
         return at.nonzero_values(self)
 
     def sort(self, axis=-1, kind="quicksort", order=None):
-        """See `pytensor.tensor.sort.sort`."""
+        """See :func:`pytensor.tensor.sort.sort`."""
         return at.sort(self, axis, kind, order)
 
     def argsort(self, axis=-1, kind="quicksort", order=None):
-        """See `pytensor.tensor.sort.argsort`."""
+        """See :func:`pytensor.tensor.sort.argsort`."""
         from pytensor.tensor.sort import argsort
 
         return argsort(self, axis, kind, order)
 
     def clip(self, a_min, a_max):
-        "See `pytensor.tensor.math.clip`."
+        "See :func:`pytensor.tensor.math.clip`."
         return at.math.clip(self, a_min, a_max)
 
     def conj(self):
-        """See `pytensor.tensor.math.conj`."""
+        """See :func:`pytensor.tensor.math.conj`."""
         return at.math.conj(self)
 
     conjugate = conj
 
     def repeat(self, repeats, axis=None):
-        """See `pytensor.tensor.basic.repeat`."""
+        """See :func:`pytensor.tensor.basic.repeat`."""
         return at.extra_ops.repeat(self, repeats, axis)
 
     def round(self, mode=None):
-        """See `pytensor.tensor.math.round`."""
+        """See :func:`pytensor.tensor.math.round`."""
         return at.math.round(self, mode)
 
     def trace(self):
@@ -775,12 +775,12 @@ def searchsorted(self, v, side="left", sorter=None):
         return at.extra_ops.searchsorted(self, v, side, sorter)
 
     def ptp(self, axis=None):
-        """See `pytensor.tensor.math.ptp`."""
+        """See :func:`pytensor.tensor.math.ptp`."""
 
         return at.math.ptp(self, axis)
 
     def swapaxes(self, axis1, axis2):
-        """See `pytensor.tensor.basic.swapaxes`.
+        """See :func:`pytensor.tensor.basic.swapaxes`.
 
         If a matrix is provided with the right axes, its transpose
         will be returned.

From 4c598f84502b13c54ced8feafe4a0ddce7ef172d Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 19:44:21 +0300
Subject: [PATCH 33/43] remove sympy support

---
 environment.yml                  |   1 -
 pytensor/scalar/basic_sympy.py   | 117 -------------------------------
 tests/scalar/test_basic_sympy.py |  41 -----------
 3 files changed, 159 deletions(-)
 delete mode 100644 pytensor/scalar/basic_sympy.py
 delete mode 100644 tests/scalar/test_basic_sympy.py

diff --git a/environment.yml b/environment.yml
index 10214afdb8..c4c9539920 100644
--- a/environment.yml
+++ b/environment.yml
@@ -48,6 +48,5 @@ dependencies:
   - packaging
   - typing_extensions
   # optional
-  - sympy
   - cython
 
diff --git a/pytensor/scalar/basic_sympy.py b/pytensor/scalar/basic_sympy.py
deleted file mode 100644
index c7a8cdd2f5..0000000000
--- a/pytensor/scalar/basic_sympy.py
+++ /dev/null
@@ -1,117 +0,0 @@
-import itertools as it
-
-from pytensor.scalar.basic import Apply, ScalarOp, as_scalar, float32, float64, int64
-
-
-imported_sympy = False
-try:
-    from sympy.utilities.codegen import codegen, get_default_datatype
-
-    imported_sympy = True
-except ImportError:
-    pass
-
-names = (f"sympy_func_{int(i)}" for i in it.count(0))
-
-
-def include_line(line):
-    return "#include" in line
-
-
-def sympy_dtype(expr):
-    return get_default_datatype(expr).cname
-
-
-def pytensor_dtype(expr):
-    return {"double": float64, "float": float32, "int": int64}[sympy_dtype(expr)]
-
-
-class SymPyCCode(ScalarOp):
-    """
-    An Operator that wraps SymPy's C code generation.
-
-    Examples
-    --------
-    >>> from sympy.abc import x, y  # SymPy Variables
-    >>> from pytensor.scalar.basic_sympy import SymPyCCode
-    >>> op = SymPyCCode([x, y], x + y)
-
-    >>> from pytensor.scalar.basic import floats
-    >>> xt, yt = floats('xy') # PyTensor variables
-    >>> zt = op(xt, yt)
-
-    >>> import pytensor
-    >>> f = pytensor.function([xt, yt], zt)
-    >>> f(1.0, 2.0)
-    3.0
-
-    """
-
-    def __init__(self, inputs, expr, name=None):
-        self.name = name or next(names)
-        self.inputs = inputs
-        self.expr = expr
-
-    def _sympy_c_code(self):
-        [(c_name, c_code), (h_name, c_header)] = codegen(
-            (self.name, self.expr),
-            "C",
-            "project_name",
-            header=False,
-            argument_sequence=self.inputs,
-        )
-        return c_code
-
-    def c_support_code(self, **kwargs):
-        c_code = self._sympy_c_code()
-        return "\n".join([x for x in c_code.split("\n") if not include_line(x)])
-
-    def c_headers(self, **kwargs):
-        c_code = self._sympy_c_code()
-        return [
-            line.replace("#include", "").strip()
-            for line in c_code.split("\n")
-            if include_line(line) and "project_name" not in line
-        ]
-
-    def c_code(self, node, name, input_names, output_names, sub):
-        (y,) = output_names
-        xs = ", ".join(input_names)
-        f = self.name
-        return f"{y} = {f}({xs});"
-
-    def output_types_preference(self, *inputs):
-        return [pytensor_dtype(self.expr)]
-
-    def make_node(self, *inputs):
-        # TODO: assert input types are correct use get_default_datatype
-
-        if len(inputs) != len(self.inputs):
-            raise TypeError(
-                "Wrong number of inputs for %s.make_node (got %i(%s), expected %i)"
-                % (self, len(inputs), str(inputs), self.nin)
-            )
-
-        inputs = [as_scalar(input) for input in inputs]
-        outputs = [t() for t in self.output_types([input.type for input in inputs])]
-        return Apply(self, inputs, outputs)
-
-    def perform(self, node, inputs, output_storage):
-        raise NotImplementedError()
-
-    def grad(self, inputs, output_grads):
-        return [
-            SymPyCCode(
-                self.inputs, self.expr.diff(inp), name=self.name + f"_grad_{int(i)}"
-            )(*inputs)
-            for i, inp in enumerate(self.inputs)
-        ]
-
-    def _info(self):
-        return type(self), self.name, tuple(self.inputs), self.expr
-
-    def __eq__(self, other):
-        return type(self) == type(other) and self._info() == other._info()
-
-    def __hash__(self):
-        return hash(self._info())
diff --git a/tests/scalar/test_basic_sympy.py b/tests/scalar/test_basic_sympy.py
deleted file mode 100644
index 2cfb477a36..0000000000
--- a/tests/scalar/test_basic_sympy.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import pytest
-
-import pytensor
-from pytensor.graph.fg import FunctionGraph
-from pytensor.link.c.basic import CLinker
-from pytensor.scalar.basic import floats
-from pytensor.scalar.basic_sympy import SymPyCCode
-from tests.link.test_link import make_function
-
-
-sympy = pytest.importorskip("sympy")
-
-
-xs = sympy.Symbol("x")
-ys = sympy.Symbol("y")
-
-xt, yt = floats("xy")
-
-
-@pytest.mark.skipif(not pytensor.config.cxx, reason="Need cxx for this test")
-def test_SymPyCCode():
-    op = SymPyCCode([xs, ys], xs + ys)
-    e = op(xt, yt)
-    g = FunctionGraph([xt, yt], [e])
-    fn = make_function(CLinker().accept(g))
-    assert fn(1.0, 2.0) == 3.0
-
-
-def test_grad():
-    op = SymPyCCode([xs], xs**2)
-    zt = op(xt)
-    ztprime = pytensor.grad(zt, xt)
-    assert ztprime.owner.op.expr == 2 * xs
-
-
-def test_multivar_grad():
-    op = SymPyCCode([xs, ys], xs**2 + ys**3)
-    zt = op(xt, yt)
-    dzdx, dzdy = pytensor.grad(zt, [xt, yt])
-    assert dzdx.owner.op.expr == 2 * xs
-    assert dzdy.owner.op.expr == 3 * ys**2

From 05a8db29fe8f6c36037c883b8774139796f7d4ce Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 21:02:21 +0300
Subject: [PATCH 34/43] remove gpu related script
 pytensor/misc/latence_gpu_transfert.py

---
 pytensor/misc/latence_gpu_transfert.py | 24 ------------------------
 1 file changed, 24 deletions(-)
 delete mode 100644 pytensor/misc/latence_gpu_transfert.py

diff --git a/pytensor/misc/latence_gpu_transfert.py b/pytensor/misc/latence_gpu_transfert.py
deleted file mode 100644
index 433a849643..0000000000
--- a/pytensor/misc/latence_gpu_transfert.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import time
-
-import numpy as np
-
-import pytensor
-
-
-y = pytensor.tensor.type.fvector()
-x = pytensor.shared(np.zeros(1, dtype="float32"))
-f1 = pytensor.function([y], updates={x: y})
-f2 = pytensor.function([], x.transfer("cpu"))
-print(f1.maker.fgraph.toposort())
-print(f2.maker.fgraph.toposort())
-for i in (1, 10, 100, 1000, 10000, 100000, 1000000, 10000000):
-    o = np.zeros(i, dtype="float32")
-    t0 = time.perf_counter()
-    f1(o)
-    t1 = time.perf_counter()
-    tf1 = t1 - t0
-    t0 = time.perf_counter()
-    f2()
-    t1 = time.perf_counter()
-
-    print("%8i %6.1f ns %7.1f ns" % (i, tf1 * 1e6, (t1 - t0) * 1e6))

From 1aef5214c6d14bad98a1996705d9eb36bc2257f0 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Wed, 14 Dec 2022 21:23:48 +0300
Subject: [PATCH 35/43] remove unused internal argument

---
 pytensor/compile/function/types.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/pytensor/compile/function/types.py b/pytensor/compile/function/types.py
index 3b14d5841c..073db7816d 100644
--- a/pytensor/compile/function/types.py
+++ b/pytensor/compile/function/types.py
@@ -1158,7 +1158,7 @@ def _constructor_Function(maker, input_storage, inputs_data, trust_input=False):
     if not config.unpickle_function:
         return None
 
-    f = maker.create(input_storage, trustme=True)
+    f = maker.create(input_storage)
     assert len(f.input_storage) == len(inputs_data)
     for container, x in zip(f.input_storage, inputs_data):
         assert (
@@ -1574,7 +1574,7 @@ def __init__(
             for i in self.inputs
         ]
 
-    def create(self, input_storage=None, trustme=False, storage_map=None):
+    def create(self, input_storage=None, storage_map=None):
         """
         Create a function.
 
@@ -1584,9 +1584,6 @@ def create(self, input_storage=None, trustme=False, storage_map=None):
             A list matching the inputs list and providing default values if the
             default for an input is None, then that input is a required input.
             For an input with an update, the default acts as initialization.
-        trustme
-            Disables some exceptions, used internally.
-
         """
 
         if input_storage is None:

From a84cee79e024ff0fcbd865a90cebf3c1897ab2b5 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Thu, 15 Dec 2022 00:27:30 +0300
Subject: [PATCH 36/43] remove pytensor/sandbox/solve.py as it is marked
 deprecated

---
 pytensor/sandbox/solve.py | 11 -----------
 1 file changed, 11 deletions(-)
 delete mode 100644 pytensor/sandbox/solve.py

diff --git a/pytensor/sandbox/solve.py b/pytensor/sandbox/solve.py
deleted file mode 100644
index 4b3a026e83..0000000000
--- a/pytensor/sandbox/solve.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import warnings
-
-
-from pytensor.tensor.slinalg import solve  # noqa
-
-message = (
-    "The module pytensor.sandbox.solve will soon be deprecated.\n"
-    "Please use tensor.slinalg.solve instead."
-)
-
-warnings.warn(message)

From 040054a9382bf0544f87a5b601d76407acd8137d Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Thu, 15 Dec 2022 00:32:19 +0300
Subject: [PATCH 37/43] remove pytensor/sandbox/fourier.py as it is marked
 deprecated

---
 pytensor/sandbox/fourier.py | 141 ------------------------------------
 1 file changed, 141 deletions(-)
 delete mode 100644 pytensor/sandbox/fourier.py

diff --git a/pytensor/sandbox/fourier.py b/pytensor/sandbox/fourier.py
deleted file mode 100644
index c5b2f0f1fd..0000000000
--- a/pytensor/sandbox/fourier.py
+++ /dev/null
@@ -1,141 +0,0 @@
-"""
-Provides Ops for FFT and DCT.
-
-"""
-
-
-# This module will soon be deprecated.
-import warnings
-
-import numpy as np
-import numpy.fft
-
-from pytensor.graph.basic import Apply
-from pytensor.graph.op import Op
-from pytensor.link.c.type import generic
-from pytensor.tensor.basic import as_tensor
-from pytensor.tensor.type import zmatrix
-
-
-message = (
-    "The module pytensor.sandbox.fourier will soon be deprecated."
-    " Please use pytensor.tensor.fft, which supports gradients."
-)
-warnings.warn(message)
-
-
-class GradTodo(Op):
-    # TODO : need description for class
-    __props__ = ()
-
-    def make_node(self, x):
-        return Apply(self, [x], [x.type()])
-
-    def perform(self, node, inputs, outputs):
-        raise NotImplementedError("TODO")
-
-
-grad_todo = GradTodo()
-
-
-class FFT(Op):
-    # TODO : need description for parameters
-    """
-    Fast Fourier Transform.
-
-    .. TODO:
-        The current implementation just works for matrix inputs, and permits
-        taking a 1D FFT over either rows or columns. Add support for N-D FFTs
-        as provided by either numpy or FFTW directly.
-
-    .. TODO:
-        Give the C code that uses FFTW.
-
-    .. TODO:
-        Unit tests.
-
-    """
-
-    default_output = 0
-    # don't return the plan object in the 'buf' output
-
-    half = False
-    """Only return the first half (positive-valued) of the frequency
-    components."""
-    __props__ = ("half", "inverse")
-
-    def __init__(self, half=False, inverse=False):
-        self.half = half
-        self.inverse = inverse
-
-    def make_node(self, frames, n, axis):
-        """
-        Compute an n-point fft of frames along given axis.
-
-        """
-        _frames = as_tensor(frames, ndim=2)
-        _n = as_tensor(n, ndim=0)
-        _axis = as_tensor(axis, ndim=0)
-        if self.half and _frames.type.dtype.startswith("complex"):
-            raise TypeError("Argument to HalfFFT must not be complex", frames)
-        spectrogram = zmatrix()
-        buf = generic()
-        # The `buf` output is present for future work
-        # when we call FFTW directly and re-use the 'plan' that FFTW creates.
-        # In that case, buf would store a CObject encapsulating the plan.
-        rval = Apply(self, [_frames, _n, _axis], [spectrogram, buf])
-        return rval
-
-    def perform(self, node, inp, out):
-        frames, n, axis = inp
-        spectrogram, buf = out
-        if self.inverse:
-            fft_fn = numpy.fft.ifft
-        else:
-            fft_fn = numpy.fft.fft
-
-        fft = fft_fn(frames, int(n), int(axis))
-        if self.half:
-            M, N = fft.shape
-            if axis == 0:
-                if M % 2:
-                    raise ValueError("halfFFT on odd-length vectors is undefined")
-                spectrogram[0] = fft[0 : M / 2, :]
-            elif axis == 1:
-                if N % 2:
-                    raise ValueError("halfFFT on odd-length vectors is undefined")
-                spectrogram[0] = fft[:, 0 : N / 2]
-            else:
-                raise NotImplementedError()
-        else:
-            spectrogram[0] = fft
-
-    def grad(self, inp, out):
-        frames, n, axis = inp
-        g_spectrogram, g_buf = out
-        return [grad_todo(frames), None, None]
-
-
-fft = FFT(half=False, inverse=False)
-half_fft = FFT(half=True, inverse=False)
-ifft = FFT(half=False, inverse=True)
-half_ifft = FFT(half=True, inverse=True)
-
-
-def dct_matrix(rows, cols, unitary=True):
-    # TODO : need description for parameters
-    """
-    Return a (rows x cols) matrix implementing a discrete cosine transform.
-
-    This algorithm is adapted from Dan Ellis' Rastmat spec2cep.m, lines 15-20.
-
-    """
-    rval = np.zeros((rows, cols))
-    col_range = np.arange(cols)
-    scale = np.sqrt(2.0 / cols)
-    for i in range(rows):
-        rval[i] = np.cos(i * (col_range * 2 + 1) / (2.0 * cols) * np.pi) * scale
-
-    if unitary:
-        rval[0] *= np.sqrt(0.5)
-    return rval

From 17adb0a0b191e72a89cb2efcd0506177e439b3bb Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Thu, 15 Dec 2022 00:34:56 +0300
Subject: [PATCH 38/43] refactor deprecated api

---
 pytensor/tensor/basic.py | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/pytensor/tensor/basic.py b/pytensor/tensor/basic.py
index 1b35771c53..c6b24e2324 100644
--- a/pytensor/tensor/basic.py
+++ b/pytensor/tensor/basic.py
@@ -2670,22 +2670,22 @@ def vertical_stack(*args):
     return concatenate(_args, axis=0)
 
 
-def is_flat(var, ndim=None, outdim=None):
+def is_flat(var, ndim=1):
     """
     Verifies the dimensionality of the var is equal to
-    outdim. This method is usually called after flatten method on a
-    variable, where the first outdim-1 dimension size(s) of the variable
+    ndim. This method is usually called after flatten method on a
+    variable, where the first ndim-1 dimension size(s) of the variable
     is kept intact, and the last dimension size of the variable is made
     equal to the multiplication of its remaining dimension size(s), such that
-    the variable would end up with as many dimension as outdim.
+    the variable would end up with as many dimension as ndim.
 
     Parameters
     ----------
-        var : pytensor.tensor.var.TensorVariable
-            the pytensor var on which the dimensionality is checked.
+    var : pytensor.tensor.var.TensorVariable
+        the pytensor var on which the dimensionality is checked.
 
-        outdim : int
-            the expected dimensionality of var.
+    ndim : int
+        the expected dimensionality of var.
 
     Returns
     -------
@@ -2693,13 +2693,6 @@ def is_flat(var, ndim=None, outdim=None):
         the comparison result of var's dim
         and the expected outdim.
     """
-    if outdim is None and ndim is None:
-        ndim = 1
-    elif outdim is not None and ndim is not None:
-        raise ValueError("You should only specify ndim")
-    elif outdim is not None:
-        warnings.warn("outdim` is deprecated; use `ndim` instead.")
-        ndim = outdim
     return var.ndim == ndim
 
 

From 393cafd8ff54cf939edc5896e7ee2276beedf152 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Thu, 15 Dec 2022 00:39:15 +0300
Subject: [PATCH 39/43] remove deprecated type from pytensor/sparse/type.py

---
 pytensor/sparse/type.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pytensor/sparse/type.py b/pytensor/sparse/type.py
index b471af7c52..a38b80179a 100644
--- a/pytensor/sparse/type.py
+++ b/pytensor/sparse/type.py
@@ -251,6 +251,3 @@ def is_super(self, otype):
     """,
     1,
 )
-
-# This is a deprecated alias used for (temporary) backward-compatibility
-SparseType = SparseTensorType

From 64b356e1e656636b2c531f131d61ff7782ccb5c7 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Thu, 15 Dec 2022 12:13:50 +0300
Subject: [PATCH 40/43] bring back deprecated configvals

---
 pytensor/configdefaults.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/pytensor/configdefaults.py b/pytensor/configdefaults.py
index 6efc6c3ff1..c20d4bdcff 100644
--- a/pytensor/configdefaults.py
+++ b/pytensor/configdefaults.py
@@ -1195,6 +1195,27 @@ def add_vm_configvars():
     )
 
 
+def add_deprecated_configvars():
+
+    # TODO: remove this? Agree
+    config.add(
+        "unittests__rseed",
+        "Seed to use for randomized unit tests. "
+        "Special value 'random' means using a seed of None.",
+        StrParam(666, validate=_good_seem_param),
+        in_c_key=False,
+    )
+
+    config.add(
+        "warn__round",
+        "Warn when using `tensor.round` with the default mode. "
+        "Round changed its default from `half_away_from_zero` to "
+        "`half_to_even` to have the same default as NumPy.",
+        BoolParam(_warn_default("0.9")),
+        in_c_key=False,
+    )
+
+
 def add_scan_configvars():
     config.add(
         "scan__allow_gc",
@@ -1441,6 +1462,7 @@ def add_caching_dir_configvars():
 # Blas-related config are a special pain-point, because their addition depends on a lot of stuff from
 # that module, which introduces a circular dependency!
 add_metaopt_configvars()
+add_deprecated_configvars()
 add_vm_configvars()
 add_numba_configvars()
 

From 0398c036224149db9137b495c3a3342bfa6d4edd Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Thu, 15 Dec 2022 14:40:00 +0300
Subject: [PATCH 41/43] remove deleted files from mypy script and improve the
 structure

---
 scripts/mypy-failing.txt | 36 +++++++++++++++++++++++++
 scripts/run_mypy.py      | 58 ++++++++++------------------------------
 2 files changed, 50 insertions(+), 44 deletions(-)
 create mode 100644 scripts/mypy-failing.txt

diff --git a/scripts/mypy-failing.txt b/scripts/mypy-failing.txt
new file mode 100644
index 0000000000..307dcb5572
--- /dev/null
+++ b/scripts/mypy-failing.txt
@@ -0,0 +1,36 @@
+pytensor/compile/builders.py
+pytensor/compile/compilelock.py
+pytensor/compile/debugmode.py
+pytensor/compile/function/pfunc.py
+pytensor/compile/function/types.py
+pytensor/compile/mode.py
+pytensor/compile/sharedvalue.py
+pytensor/graph/rewriting/basic.py
+pytensor/ifelse.py
+pytensor/link/basic.py
+pytensor/link/numba/dispatch/elemwise.py
+pytensor/link/numba/dispatch/random.py
+pytensor/link/numba/dispatch/scan.py
+pytensor/printing.py
+pytensor/raise_op.py
+pytensor/scalar/basic.py
+pytensor/sparse/basic.py
+pytensor/sparse/type.py
+pytensor/tensor/basic.py
+pytensor/tensor/blas.py
+pytensor/tensor/blas_c.py
+pytensor/tensor/blas_headers.py
+pytensor/tensor/elemwise.py
+pytensor/tensor/extra_ops.py
+pytensor/tensor/math.py
+pytensor/tensor/random/basic.py
+pytensor/tensor/random/op.py
+pytensor/tensor/random/utils.py
+pytensor/tensor/rewriting/basic.py
+pytensor/tensor/rewriting/elemwise.py
+pytensor/tensor/shape.py
+pytensor/tensor/slinalg.py
+pytensor/tensor/subtensor.py
+pytensor/tensor/type.py
+pytensor/tensor/type_other.py
+pytensor/tensor/var.py
\ No newline at end of file
diff --git a/scripts/run_mypy.py b/scripts/run_mypy.py
index 886511db39..64c36cdb85 100644
--- a/scripts/run_mypy.py
+++ b/scripts/run_mypy.py
@@ -20,48 +20,11 @@
 
 
 DP_ROOT = pathlib.Path(__file__).absolute().parent.parent
-FAILING = """
-pytensor/compile/builders.py
-pytensor/compile/compilelock.py
-pytensor/compile/debugmode.py
-pytensor/compile/function/pfunc.py
-pytensor/compile/function/types.py
-pytensor/compile/mode.py
-pytensor/compile/sharedvalue.py
-pytensor/graph/rewriting/basic.py
-pytensor/ifelse.py
-pytensor/link/basic.py
-pytensor/link/numba/dispatch/elemwise.py
-pytensor/link/numba/dispatch/random.py
-pytensor/link/numba/dispatch/scan.py
-pytensor/printing.py
-pytensor/raise_op.py
-pytensor/sandbox/rng_mrg.py
-pytensor/scalar/basic.py
-pytensor/sparse/basic.py
-pytensor/sparse/type.py
-pytensor/tensor/basic.py
-pytensor/tensor/blas.py
-pytensor/tensor/blas_c.py
-pytensor/tensor/blas_headers.py
-pytensor/tensor/elemwise.py
-pytensor/tensor/extra_ops.py
-pytensor/tensor/math.py
-pytensor/tensor/nnet/abstract_conv.py
-pytensor/tensor/nnet/ctc.py
-pytensor/tensor/nnet/neighbours.py
-pytensor/tensor/random/basic.py
-pytensor/tensor/random/op.py
-pytensor/tensor/random/utils.py
-pytensor/tensor/rewriting/basic.py
-pytensor/tensor/rewriting/elemwise.py
-pytensor/tensor/shape.py
-pytensor/tensor/slinalg.py
-pytensor/tensor/subtensor.py
-pytensor/tensor/type.py
-pytensor/tensor/type_other.py
-pytensor/tensor/var.py
-"""
+FAILING = [
+    line.strip()
+    for line in (DP_ROOT / "scripts" / "mypy-failing.txt").read_text().splitlines()
+    if line.strip()
+]
 
 
 def enforce_pep561(module_name):
@@ -130,7 +93,7 @@ def check_no_unexpected_results(mypy_lines: Iterator[str]):
             + "\n".join(sorted(map(str, failing - all_files)))
         )
     passing = all_files - failing
-    expected_failing = set(FAILING.strip().split("\n")) - {""}
+    expected_failing = set(FAILING)
     unexpected_failing = failing - expected_failing
     unexpected_passing = passing.intersection(expected_failing)
 
@@ -177,7 +140,14 @@ def check_no_unexpected_results(mypy_lines: Iterator[str]):
         help="How to group verbose output. One of {file|errorcode|message}.",
     )
     args, _ = parser.parse_known_args()
-
+    missing = list()
+    for path in FAILING:
+        if not os.path.exists(path):
+            missing.append(path)
+    if missing:
+        print("These files are missing but still kept in FAILING")
+        print("\n".join(missing))
+        sys.exit(1)
     cp = subprocess.run(
         ["mypy", "--show-error-codes", "pytensor"],
         capture_output=True,

From 61a6e4afb6c3ed52e0121ab1c67826efd3253f45 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Fri, 16 Dec 2022 10:00:45 +0300
Subject: [PATCH 42/43] remove toposort from pytensor/graph/utils.py it was not
 referenced anywhere

---
 pytensor/graph/utils.py | 41 -----------------------------------------
 1 file changed, 41 deletions(-)

diff --git a/pytensor/graph/utils.py b/pytensor/graph/utils.py
index 92e4af193f..dd79a48e69 100644
--- a/pytensor/graph/utils.py
+++ b/pytensor/graph/utils.py
@@ -379,44 +379,3 @@ def clear(self):
 
     def __repr__(self):
         return f"AssocList({self._dict}, {self._list})"
-
-
-def toposort(prereqs_d):
-    """
-    Sorts prereqs_d.keys() topologically.
-
-    prereqs_d[x] contains all the elements that must come before x
-    in the ordering.
-
-    """
-
-    #     all1 = set(prereqs_d.keys())
-    #     all2 = set()
-    #     for x, y in prereqs_d.items():
-    #         all2.update(y)
-    #     print all1.difference(all2)
-
-    seq = []
-    done = set()
-    postreqs_d = {}
-    for x, prereqs in prereqs_d.items():
-        for prereq in prereqs:
-            postreqs_d.setdefault(prereq, set()).add(x)
-    next = {k for k in prereqs_d if not prereqs_d[k]}
-    while next:
-        bases = next
-        next = set()
-        for x in bases:
-            done.add(x)
-            seq.append(x)
-        for x in bases:
-            for postreq in postreqs_d.get(x, []):
-                if not prereqs_d[postreq].difference(done):
-                    next.add(postreq)
-    if len(prereqs_d) != len(seq):
-        raise Exception(
-            "Cannot sort topologically: there might be cycles, "
-            "prereqs_d does not have a key for each element or "
-            "some orderings contain invalid elements."
-        )
-    return seq

From 2c413f36339ca54f8b4982f2ef5bbc141d5fc291 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Fri, 16 Dec 2022 10:01:36 +0300
Subject: [PATCH 43/43] remove D from pytensor/graph/utils.py it was not
 referenced anywhere

---
 pytensor/graph/utils.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/pytensor/graph/utils.py b/pytensor/graph/utils.py
index dd79a48e69..9b4b815bb9 100644
--- a/pytensor/graph/utils.py
+++ b/pytensor/graph/utils.py
@@ -304,11 +304,6 @@ def __setattr__(self, attr, obj):
         return object.__setattr__(self, attr, obj)
 
 
-class D:
-    def __init__(self, **d):
-        self.__dict__.update(d)
-
-
 class AssocList:
     """An associative list.