-
-
Theano is the deep-learning library PyMC3 uses to construct probability distributions and then access the gradient in order to implement cutting edge inference algorithms. More advanced models may be built by understanding this layer.
+
+
Aesara is the library PyMC3 uses to construct probability distributions and then access the gradient in order to implement cutting edge inference algorithms. More advanced models may be built by understanding this layer.
diff --git a/pymc3/__init__.py b/pymc3/__init__.py
index 1e51deeb64..8f33feef09 100644
--- a/pymc3/__init__.py
+++ b/pymc3/__init__.py
@@ -29,16 +29,17 @@
def __set_compiler_flags():
- # Workarounds for Theano compiler problems on various platforms
- import theano
+ # Workarounds for Aesara compiler problems on various platforms
+ import aesara
- current = theano.config.gcc__cxxflags
- theano.config.gcc__cxxflags = f"{current} -Wno-c++11-narrowing"
+ current = aesara.config.gcc__cxxflags
+ aesara.config.gcc__cxxflags = f"{current} -Wno-c++11-narrowing"
__set_compiler_flags()
from pymc3 import gp, ode, sampling
+from pymc3.aesaraf import *
from pymc3.backends import load_trace, save_trace
from pymc3.backends.tracetab import *
from pymc3.blocking import *
@@ -63,7 +64,6 @@ def __set_compiler_flags():
from pymc3.smc import *
from pymc3.step_methods import *
from pymc3.tests import test
-from pymc3.theanof import *
from pymc3.tuning import *
from pymc3.variational import *
from pymc3.vartypes import *
diff --git a/pymc3/theanof.py b/pymc3/aesaraf.py
similarity index 79%
rename from pymc3/theanof.py
rename to pymc3/aesaraf.py
index c40311da6e..87b370e55f 100644
--- a/pymc3/theanof.py
+++ b/pymc3/aesaraf.py
@@ -12,14 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara
import numpy as np
-import theano
-from theano import scalar
-from theano import tensor as tt
-from theano.graph.basic import Apply, graph_inputs
-from theano.graph.op import Op
-from theano.sandbox.rng_mrg import MRG_RandomStream as RandomStream
+from aesara import scalar
+from aesara import tensor as aet
+from aesara.gradient import grad
+from aesara.graph.basic import Apply, graph_inputs
+from aesara.graph.op import Op
+from aesara.sandbox.rng_mrg import MRG_RandomStream as RandomStream
+from aesara.tensor.elemwise import Elemwise
+from aesara.tensor.var import TensorVariable
from pymc3.blocking import ArrayOrdering
from pymc3.data import GeneratorAdapter
@@ -39,34 +42,34 @@
"join_nonshared_inputs",
"make_shared_replacements",
"generator",
- "set_tt_rng",
- "tt_rng",
+ "set_aet_rng",
+ "aet_rng",
"take_along_axis",
]
def inputvars(a):
"""
- Get the inputs into a theano variables
+ Get the inputs into a aesara variables
Parameters
----------
- a: theano variable
+ a: aesara variable
Returns
-------
r: list of tensor variables that are inputs
"""
- return [v for v in graph_inputs(makeiter(a)) if isinstance(v, tt.TensorVariable)]
+ return [v for v in graph_inputs(makeiter(a)) if isinstance(v, TensorVariable)]
def cont_inputs(f):
"""
- Get the continuous inputs into a theano variables
+ Get the continuous inputs into a aesara variables
Parameters
----------
- a: theano variable
+ a: aesara variable
Returns
-------
@@ -77,13 +80,13 @@ def cont_inputs(f):
def floatX(X):
"""
- Convert a theano tensor or numpy array to theano.config.floatX type.
+ Convert a aesara tensor or numpy array to aesara.config.floatX type.
"""
try:
- return X.astype(theano.config.floatX)
+ return X.astype(aesara.config.floatX)
except AttributeError:
# Scalar passed
- return np.asarray(X, dtype=theano.config.floatX)
+ return np.asarray(X, dtype=aesara.config.floatX)
_conversion_map = {"float64": "int32", "float32": "int16", "float16": "int8", "float8": "int8"}
@@ -91,9 +94,9 @@ def floatX(X):
def intX(X):
"""
- Convert a theano tensor or numpy array to theano.tensor.int32 type.
+ Convert a aesara tensor or numpy array to aesara.tensor.int32 type.
"""
- intX = _conversion_map[theano.config.floatX]
+ intX = _conversion_map[aesara.config.floatX]
try:
return X.astype(intX)
except AttributeError:
@@ -111,16 +114,16 @@ def smartfloatX(x):
"""
-Theano derivative functions
+Aesara derivative functions
"""
def gradient1(f, v):
"""flat gradient of f wrt v"""
- return tt.flatten(tt.grad(f, v, disconnected_inputs="warn"))
+ return aet.flatten(grad(f, v, disconnected_inputs="warn"))
-empty_gradient = tt.zeros(0, dtype="float32")
+empty_gradient = aet.zeros(0, dtype="float32")
def gradient(f, vars=None):
@@ -128,20 +131,20 @@ def gradient(f, vars=None):
vars = cont_inputs(f)
if vars:
- return tt.concatenate([gradient1(f, v) for v in vars], axis=0)
+ return aet.concatenate([gradient1(f, v) for v in vars], axis=0)
else:
return empty_gradient
def jacobian1(f, v):
"""jacobian of f wrt v"""
- f = tt.flatten(f)
- idx = tt.arange(f.shape[0], dtype="int32")
+ f = aet.flatten(f)
+ idx = aet.arange(f.shape[0], dtype="int32")
def grad_i(i):
return gradient1(f[i], v)
- return theano.map(grad_i, idx)[0]
+ return aesara.map(grad_i, idx)[0]
def jacobian(f, vars=None):
@@ -149,43 +152,43 @@ def jacobian(f, vars=None):
vars = cont_inputs(f)
if vars:
- return tt.concatenate([jacobian1(f, v) for v in vars], axis=1)
+ return aet.concatenate([jacobian1(f, v) for v in vars], axis=1)
else:
return empty_gradient
def jacobian_diag(f, x):
- idx = tt.arange(f.shape[0], dtype="int32")
+ idx = aet.arange(f.shape[0], dtype="int32")
def grad_ii(i):
- return theano.grad(f[i], x)[i]
+ return grad(f[i], x)[i]
- return theano.scan(grad_ii, sequences=[idx], n_steps=f.shape[0], name="jacobian_diag")[0]
+ return aesara.scan(grad_ii, sequences=[idx], n_steps=f.shape[0], name="jacobian_diag")[0]
-@theano.config.change_flags(compute_test_value="ignore")
+@aesara.config.change_flags(compute_test_value="ignore")
def hessian(f, vars=None):
return -jacobian(gradient(f, vars), vars)
-@theano.config.change_flags(compute_test_value="ignore")
+@aesara.config.change_flags(compute_test_value="ignore")
def hessian_diag1(f, v):
g = gradient1(f, v)
- idx = tt.arange(g.shape[0], dtype="int32")
+ idx = aet.arange(g.shape[0], dtype="int32")
def hess_ii(i):
return gradient1(g[i], v)[i]
- return theano.map(hess_ii, idx)[0]
+ return aesara.map(hess_ii, idx)[0]
-@theano.config.change_flags(compute_test_value="ignore")
+@aesara.config.change_flags(compute_test_value="ignore")
def hessian_diag(f, vars=None):
if vars is None:
vars = cont_inputs(f)
if vars:
- return -tt.concatenate([hessian_diag1(f, v) for v in vars], axis=0)
+ return -aet.concatenate([hessian_diag1(f, v) for v in vars], axis=0)
else:
return empty_gradient
@@ -235,16 +238,16 @@ def make_shared_replacements(vars, model):
Dict of variable -> new shared variable
"""
othervars = set(model.vars) - set(vars)
- return {var: theano.shared(var.tag.test_value, var.name + "_shared") for var in othervars}
+ return {var: aesara.shared(var.tag.test_value, var.name + "_shared") for var in othervars}
def join_nonshared_inputs(xs, vars, shared, make_shared=False):
"""
- Takes a list of theano Variables and joins their non shared inputs into a single input.
+ Takes a list of aesara Variables and joins their non shared inputs into a single input.
Parameters
----------
- xs: list of theano tensors
+ xs: list of aesara tensors
vars: list of variables to join
Returns
@@ -256,13 +259,13 @@ def join_nonshared_inputs(xs, vars, shared, make_shared=False):
if not vars:
raise ValueError("Empty list of variables.")
- joined = tt.concatenate([var.ravel() for var in vars])
+ joined = aet.concatenate([var.ravel() for var in vars])
if not make_shared:
tensor_type = joined.type
inarray = tensor_type("inarray")
else:
- inarray = theano.shared(joined.tag.test_value, "inarray")
+ inarray = aesara.shared(joined.tag.test_value, "inarray")
ordering = ArrayOrdering(vars)
inarray.tag.test_value = joined.tag.test_value
@@ -275,7 +278,7 @@ def join_nonshared_inputs(xs, vars, shared, make_shared=False):
replace.update(shared)
- xs_special = [theano.clone(x, replace, strict=False) for x in xs]
+ xs_special = [aesara.clone_replace(x, replace, strict=False) for x in xs]
return xs_special, inarray
@@ -303,16 +306,16 @@ def __call__(self, input):
input: TensorVariable
"""
(oldinput,) = inputvars(self.tensor)
- return theano.clone(self.tensor, {oldinput: input}, strict=False)
+ return aesara.clone_replace(self.tensor, {oldinput: input}, strict=False)
scalar_identity = IdentityOp(scalar.upgrade_to_float, name="scalar_identity")
-identity = tt.Elemwise(scalar_identity, name="identity")
+identity = Elemwise(scalar_identity, name="identity")
class GeneratorOp(Op):
"""
- Generator Op is designed for storing python generators inside theano graph.
+ Generator Op is designed for storing python generators inside aesara graph.
__call__ creates TensorVariable
It has 2 new methods
@@ -351,7 +354,7 @@ def perform(self, node, inputs, output_storage, params=None):
def do_constant_folding(self, fgraph, node):
return False
- __call__ = theano.config.change_flags(compute_test_value="off")(Op.__call__)
+ __call__ = aesara.config.change_flags(compute_test_value="off")(Op.__call__)
def set_gen(self, gen):
if not isinstance(gen, GeneratorAdapter):
@@ -394,10 +397,10 @@ def generator(gen, default=None):
return GeneratorOp(gen, default)()
-_tt_rng = RandomStream()
+_aet_rng = RandomStream()
-def tt_rng(random_seed=None):
+def aet_rng(random_seed=None):
"""
Get the package-level random number generator or new with specified seed.
@@ -405,36 +408,36 @@ def tt_rng(random_seed=None):
----------
random_seed: int
If not None
- returns *new* theano random generator without replacing package global one
+ returns *new* aesara random generator without replacing package global one
Returns
-------
- `theano.tensor.random.utils.RandomStream` instance
- `theano.tensor.random.utils.RandomStream`
- instance passed to the most recent call of `set_tt_rng`
+ `aesara.tensor.random.utils.RandomStream` instance
+ `aesara.tensor.random.utils.RandomStream`
+ instance passed to the most recent call of `set_aet_rng`
"""
if random_seed is None:
- return _tt_rng
+ return _aet_rng
else:
ret = RandomStream(random_seed)
return ret
-def set_tt_rng(new_rng):
+def set_aet_rng(new_rng):
"""
Set the package-level random number generator.
Parameters
----------
- new_rng: `theano.tensor.random.utils.RandomStream` instance
+ new_rng: `aesara.tensor.random.utils.RandomStream` instance
The random number generator to use.
"""
# pylint: disable=global-statement
- global _tt_rng
+ global _aet_rng
# pylint: enable=global-statement
if isinstance(new_rng, int):
new_rng = RandomStream(new_rng)
- _tt_rng = new_rng
+ _aet_rng = new_rng
def floatX_array(x):
@@ -443,7 +446,7 @@ def floatX_array(x):
def ix_(*args):
"""
- Theano np.ix_ analog
+ Aesara np.ix_ analog
See numpy.lib.index_tricks.ix_ for reference
"""
@@ -452,7 +455,7 @@ def ix_(*args):
for k, new in enumerate(args):
if new is None:
out.append(slice(None))
- new = tt.as_tensor(new)
+ new = aet.as_tensor(new)
if new.ndim != 1:
raise ValueError("Cross index must be 1 dimensional")
new = new.reshape((1,) * k + (new.size,) + (1,) * (nd - k - 1))
@@ -482,7 +485,7 @@ def _make_along_axis_idx(arr_shape, indices, axis):
fancy_index.append(indices)
else:
ind_shape = shape_ones[:dim] + (-1,) + shape_ones[dim + 1 :]
- fancy_index.append(tt.arange(n).reshape(ind_shape))
+ fancy_index.append(aet.arange(n).reshape(ind_shape))
return tuple(fancy_index)
@@ -497,8 +500,8 @@ def take_along_axis(arr, indices, axis=0):
Functions returning an index along an axis, like argsort and argpartition,
produce suitable indices for this function.
"""
- arr = tt.as_tensor_variable(arr)
- indices = tt.as_tensor_variable(indices)
+ arr = aet.as_tensor_variable(arr)
+ indices = aet.as_tensor_variable(indices)
# normalize inputs
if axis is None:
arr = arr.flatten()
diff --git a/pymc3/backends/base.py b/pymc3/backends/base.py
index 8b52c3e09c..37631b656c 100644
--- a/pymc3/backends/base.py
+++ b/pymc3/backends/base.py
@@ -23,8 +23,8 @@
from abc import ABC
from typing import List
+import aesara.tensor as aet
import numpy as np
-import theano.tensor as tt
from pymc3.backends.report import SamplerReport, merge_reports
from pymc3.model import modelcontext
@@ -434,7 +434,7 @@ def add_values(self, vals, overwrite=False) -> None:
for idx, chain in enumerate(chains.values()):
if new_var:
- dummy = tt.as_tensor_variable([], k)
+ dummy = aet.as_tensor_variable([], k)
chain.vars.append(dummy)
chain.samples[k] = v[idx]
diff --git a/pymc3/blocking.py b/pymc3/blocking.py
index 3669627350..4c07b4b47c 100644
--- a/pymc3/blocking.py
+++ b/pymc3/blocking.py
@@ -125,13 +125,13 @@ def mapf(self, f):
class ListArrayOrdering:
"""
- An ordering for a list to an array space. Takes also non theano.tensors.
+ An ordering for a list to an array space. Takes also non aesara.tensors.
Modified from pymc3 blocking.
Parameters
----------
list_arrays: list
- :class:`numpy.ndarray` or :class:`theano.tensor.Tensor`
+ :class:`numpy.ndarray` or :class:`aesara.tensor.Tensor`
intype: str
defining the input type 'tensor' or 'numpy'
"""
diff --git a/pymc3/data.py b/pymc3/data.py
index 4cdb793aa3..89760c1448 100644
--- a/pymc3/data.py
+++ b/pymc3/data.py
@@ -21,12 +21,14 @@
from copy import copy
from typing import Any, Dict, List
+import aesara
+import aesara.tensor as aet
import numpy as np
import pandas as pd
-import theano
-import theano.tensor as tt
-from theano.graph.basic import Apply
+from aesara.graph.basic import Apply
+from aesara.tensor.type import TensorType
+from aesara.tensor.var import TensorVariable
import pymc3 as pm
@@ -61,7 +63,7 @@ def get_data(filename):
return io.BytesIO(content)
-class GenTensorVariable(tt.TensorVariable):
+class GenTensorVariable(TensorVariable):
def __init__(self, op, type, name=None):
super().__init__(type=type, name=name)
self.op = op
@@ -96,7 +98,7 @@ def __init__(self, generator):
# make pickling potentially possible
self._yielded_test_value = False
self.gen = generator
- self.tensortype = tt.TensorType(self.test_value.dtype, ((False,) * self.test_value.ndim))
+ self.tensortype = TensorType(self.test_value.dtype, ((False,) * self.test_value.ndim))
# python3 generator
def __next__(self):
@@ -119,7 +121,7 @@ def __hash__(self):
return hash(id(self))
-class Minibatch(tt.TensorVariable):
+class Minibatch(TensorVariable):
"""Multidimensional minibatch that is pure TensorVariable
Parameters
@@ -143,7 +145,7 @@ class Minibatch(tt.TensorVariable):
you can use it to change source of
minibatches programmatically
in_memory_size: ``int`` or ``List[int|slice|Ellipsis]``
- data size for storing in ``theano.shared``
+ data size for storing in ``aesara.shared``
Attributes
----------
@@ -231,11 +233,11 @@ class Minibatch(tt.TensorVariable):
To be more concrete about how we create a minibatch, here is a demo:
1. create a shared variable
- >>> shared = theano.shared(data)
+ >>> shared = aesara.shared(data)
2. take a random slice of size 10:
- >>> ridx = pm.tt_rng().uniform(size=(10,), low=0, high=data.shape[0]-1e-10).astype('int64')
+ >>> ridx = pm.aet_rng().uniform(size=(10,), low=0, high=data.shape[0]-1e-10).astype('int64')
3) take the resulting slice:
@@ -255,7 +257,7 @@ class Minibatch(tt.TensorVariable):
Then you should create a `dict` with replacements:
>>> replacements = {x: testdata}
- >>> rnode = theano.clone(node, replacements)
+ >>> rnode = aesara.clone_replace(node, replacements)
>>> assert (testdata ** 2 == rnode.eval()).all()
*FIXME: In the following, what is the **reason** to replace the Minibatch variable with
@@ -266,7 +268,7 @@ class Minibatch(tt.TensorVariable):
For example
>>> replacements = {x.minibatch: x.shared}
- >>> rnode = theano.clone(node, replacements)
+ >>> rnode = aesara.clone_replace(node, replacements)
For more complex slices some more code is needed that can seem not so clear
@@ -296,7 +298,7 @@ class Minibatch(tt.TensorVariable):
RNG = collections.defaultdict(list) # type: Dict[str, List[Any]]
- @theano.config.change_flags(compute_test_value="raise")
+ @aesara.config.change_flags(compute_test_value="raise")
def __init__(
self,
data,
@@ -313,23 +315,23 @@ def __init__(
else:
data = np.asarray(data, dtype)
in_memory_slc = self.make_static_slices(in_memory_size)
- self.shared = theano.shared(data[in_memory_slc])
+ self.shared = aesara.shared(data[in_memory_slc])
self.update_shared_f = update_shared_f
self.random_slc = self.make_random_slices(self.shared.shape, batch_size, random_seed)
minibatch = self.shared[self.random_slc]
if broadcastable is None:
broadcastable = (False,) * minibatch.ndim
- minibatch = tt.patternbroadcast(minibatch, broadcastable)
+ minibatch = aet.patternbroadcast(minibatch, broadcastable)
self.minibatch = minibatch
super().__init__(self.minibatch.type, None, None, name=name)
- Apply(theano.compile.view_op, inputs=[self.minibatch], outputs=[self])
+ Apply(aesara.compile.view_op, inputs=[self.minibatch], outputs=[self])
self.tag.test_value = copy(self.minibatch.tag.test_value)
def rslice(self, total, size, seed):
if size is None:
return slice(None)
elif isinstance(size, int):
- rng = pm.tt_rng(seed)
+ rng = pm.aet_rng(seed)
Minibatch.RNG[id(self)].append(rng)
return rng.uniform(size=(size,), low=0.0, high=pm.floatX(total) - 1e-16).astype("int64")
else:
@@ -401,7 +403,7 @@ def check(t):
)
if len(end) > 0:
shp_mid = shape[sep : -len(end)]
- mid = [tt.arange(s) for s in shp_mid]
+ mid = [aet.arange(s) for s in shp_mid]
else:
mid = []
else:
@@ -419,17 +421,17 @@ def check(t):
shp_end = np.asarray([])
shp_begin = shape[: len(begin)]
slc_begin = [
- self.rslice(shp_begin[i], t[0], t[1]) if t is not None else tt.arange(shp_begin[i])
+ self.rslice(shp_begin[i], t[0], t[1]) if t is not None else aet.arange(shp_begin[i])
for i, t in enumerate(begin)
]
slc_end = [
- self.rslice(shp_end[i], t[0], t[1]) if t is not None else tt.arange(shp_end[i])
+ self.rslice(shp_end[i], t[0], t[1]) if t is not None else aet.arange(shp_end[i])
for i, t in enumerate(end)
]
slc = slc_begin + mid + slc_end
else:
raise TypeError("Unrecognized size type, %r" % batch_size)
- return pm.theanof.ix_(*slc)
+ return pm.aesaraf.ix_(*slc)
def update_shared(self):
if self.update_shared_f is None:
@@ -460,7 +462,7 @@ def align_minibatches(batches=None):
class Data:
- """Data container class that wraps the theano ``SharedVariable`` class
+ """Data container class that wraps the aesara ``SharedVariable`` class
and lets the model be aware of its inputs and outputs.
Parameters
@@ -524,7 +526,7 @@ def __new__(self, name, value, *, dims=None, export_index_as_coords=False):
# `pm.model.pandas_to_array` takes care of parameter `value` and
# transforms it to something digestible for pymc3
- shared_object = theano.shared(pm.model.pandas_to_array(value), name)
+ shared_object = aesara.shared(pm.model.pandas_to_array(value), name)
if isinstance(dims, str):
dims = (dims,)
diff --git a/pymc3/distributions/bound.py b/pymc3/distributions/bound.py
index 074a575eba..6443414734 100644
--- a/pymc3/distributions/bound.py
+++ b/pymc3/distributions/bound.py
@@ -14,9 +14,10 @@
from numbers import Real
+import aesara.tensor as aet
import numpy as np
-import theano.tensor as tt
+from pymc3.aesaraf import floatX
from pymc3.distributions import transforms
from pymc3.distributions.dist_math import bound
from pymc3.distributions.distribution import (
@@ -26,7 +27,6 @@
draw_values,
generate_samples,
)
-from pymc3.theanof import floatX
__all__ = ["Bound"]
@@ -207,9 +207,9 @@ class _ContinuousBounded(_Bounded, Continuous):
def __init__(self, distribution, lower, upper, transform="infer", *args, **kwargs):
if lower is not None:
- lower = tt.as_tensor_variable(floatX(lower))
+ lower = aet.as_tensor_variable(floatX(lower))
if upper is not None:
- upper = tt.as_tensor_variable(floatX(upper))
+ upper = aet.as_tensor_variable(floatX(upper))
if transform == "infer":
if lower is None and upper is None:
diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
index 234ed935f2..4d5310ecfe 100644
--- a/pymc3/distributions/continuous.py
+++ b/pymc3/distributions/continuous.py
@@ -19,13 +19,14 @@
"""
import warnings
+import aesara.tensor as aet
import numpy as np
-import theano.tensor as tt
from scipy import stats
from scipy.interpolate import InterpolatedUnivariateSpline
from scipy.special import expit
+from pymc3.aesaraf import floatX
from pymc3.distributions import transforms
from pymc3.distributions.dist_math import (
SplineWrapper,
@@ -44,7 +45,6 @@
from pymc3.distributions.distribution import Continuous, draw_values, generate_samples
from pymc3.distributions.special import log_i0
from pymc3.math import invlogit, log1mexp, log1pexp, logdiffexp, logit
-from pymc3.theanof import floatX
__all__ = [
"Uniform",
@@ -101,8 +101,8 @@ class BoundedContinuous(Continuous):
def __init__(self, transform="auto", lower=None, upper=None, *args, **kwargs):
- lower = tt.as_tensor_variable(lower) if lower is not None else None
- upper = tt.as_tensor_variable(upper) if upper is not None else None
+ lower = aet.as_tensor_variable(lower) if lower is not None else None
+ upper = aet.as_tensor_variable(upper) if upper is not None else None
if transform == "auto":
if lower is None and upper is None:
@@ -223,8 +223,8 @@ class Uniform(BoundedContinuous):
"""
def __init__(self, lower=0, upper=1, *args, **kwargs):
- self.lower = lower = tt.as_tensor_variable(floatX(lower))
- self.upper = upper = tt.as_tensor_variable(floatX(upper))
+ self.lower = lower = aet.as_tensor_variable(floatX(lower))
+ self.upper = upper = aet.as_tensor_variable(floatX(upper))
self.mean = (upper + lower) / 2.0
self.median = self.mean
@@ -268,7 +268,7 @@ def logp(self, value):
"""
lower = self.lower
upper = self.upper
- return bound(-tt.log(upper - lower), value >= lower, value <= upper)
+ return bound(-aet.log(upper - lower), value >= lower, value <= upper)
def logcdf(self, value):
"""
@@ -277,9 +277,9 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
@@ -288,12 +288,12 @@ def logcdf(self, value):
lower = self.lower
upper = self.upper
- return tt.switch(
- tt.lt(value, lower) | tt.lt(upper, lower),
+ return aet.switch(
+ aet.lt(value, lower) | aet.lt(upper, lower),
-np.inf,
- tt.switch(
- tt.lt(value, upper),
- tt.log(value - lower) - tt.log(upper - lower),
+ aet.switch(
+ aet.lt(value, upper),
+ aet.log(value - lower) - aet.log(upper - lower),
0,
),
)
@@ -331,13 +331,13 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
TensorVariable
"""
- return tt.zeros_like(value)
+ return aet.zeros_like(value)
def logcdf(self, value):
"""
@@ -346,16 +346,16 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
TensorVariable
"""
- return tt.switch(
- tt.eq(value, -np.inf), -np.inf, tt.switch(tt.eq(value, np.inf), 0, tt.log(0.5))
+ return aet.switch(
+ aet.eq(value, -np.inf), -np.inf, aet.switch(aet.eq(value, np.inf), 0, aet.log(0.5))
)
@@ -388,13 +388,13 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
TensorVariable
"""
- return bound(tt.zeros_like(value), value > 0)
+ return bound(aet.zeros_like(value), value > 0)
def logcdf(self, value):
"""
@@ -403,15 +403,17 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
TensorVariable
"""
- return tt.switch(tt.lt(value, np.inf), -np.inf, tt.switch(tt.eq(value, np.inf), 0, -np.inf))
+ return aet.switch(
+ aet.lt(value, np.inf), -np.inf, aet.switch(aet.eq(value, np.inf), 0, -np.inf)
+ )
class Normal(Continuous):
@@ -481,10 +483,10 @@ def __init__(self, mu=0, sigma=None, tau=None, sd=None, **kwargs):
if sd is not None:
sigma = sd
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
- self.sigma = self.sd = tt.as_tensor_variable(sigma)
- self.tau = tt.as_tensor_variable(tau)
+ self.sigma = self.sd = aet.as_tensor_variable(sigma)
+ self.tau = aet.as_tensor_variable(tau)
- self.mean = self.median = self.mode = self.mu = mu = tt.as_tensor_variable(floatX(mu))
+ self.mean = self.median = self.mode = self.mu = mu = aet.as_tensor_variable(floatX(mu))
self.variance = 1.0 / self.tau
assert_negative_support(sigma, "sigma", "Normal")
@@ -522,7 +524,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -532,7 +534,7 @@ def logp(self, value):
tau = self.tau
mu = self.mu
- return bound((-tau * (value - mu) ** 2 + tt.log(tau / np.pi / 2.0)) / 2.0, sigma > 0)
+ return bound((-tau * (value - mu) ** 2 + aet.log(tau / np.pi / 2.0)) / 2.0, sigma > 0)
def _distr_parameters_for_repr(self):
return ["mu", "sigma"]
@@ -544,9 +546,9 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
@@ -647,21 +649,21 @@ def __init__(
if sd is not None:
sigma = sd
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
- self.sigma = self.sd = tt.as_tensor_variable(sigma)
- self.tau = tt.as_tensor_variable(tau)
- self.lower_check = tt.as_tensor_variable(floatX(lower)) if lower is not None else lower
- self.upper_check = tt.as_tensor_variable(floatX(upper)) if upper is not None else upper
+ self.sigma = self.sd = aet.as_tensor_variable(sigma)
+ self.tau = aet.as_tensor_variable(tau)
+ self.lower_check = aet.as_tensor_variable(floatX(lower)) if lower is not None else lower
+ self.upper_check = aet.as_tensor_variable(floatX(upper)) if upper is not None else upper
self.lower = (
- tt.as_tensor_variable(floatX(lower))
+ aet.as_tensor_variable(floatX(lower))
if lower is not None
- else tt.as_tensor_variable(-np.inf)
+ else aet.as_tensor_variable(-np.inf)
)
self.upper = (
- tt.as_tensor_variable(floatX(upper))
+ aet.as_tensor_variable(floatX(upper))
if upper is not None
- else tt.as_tensor_variable(np.inf)
+ else aet.as_tensor_variable(np.inf)
)
- self.mu = tt.as_tensor_variable(floatX(mu))
+ self.mu = aet.as_tensor_variable(floatX(mu))
if self.lower_check is None and self.upper_check is None:
self._defaultval = mu
@@ -732,7 +734,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -763,7 +765,7 @@ def _normalization(self):
lsf_a = normal_lccdf(mu, sigma, self.lower)
lsf_b = normal_lccdf(mu, sigma, self.upper)
- return tt.switch(self.lower > 0, logdiffexp(lsf_a, lsf_b), logdiffexp(lcdf_b, lcdf_a))
+ return aet.switch(self.lower > 0, logdiffexp(lsf_a, lsf_b), logdiffexp(lcdf_b, lcdf_a))
if self.lower_check is not None:
return normal_lccdf(mu, sigma, self.lower)
@@ -843,10 +845,10 @@ def __init__(self, sigma=None, tau=None, sd=None, *args, **kwargs):
super().__init__(*args, **kwargs)
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
- self.sigma = self.sd = sigma = tt.as_tensor_variable(sigma)
- self.tau = tau = tt.as_tensor_variable(tau)
+ self.sigma = self.sd = sigma = aet.as_tensor_variable(sigma)
+ self.tau = tau = aet.as_tensor_variable(tau)
- self.mean = tt.sqrt(2 / (np.pi * self.tau))
+ self.mean = aet.sqrt(2 / (np.pi * self.tau))
self.variance = (1.0 - 2 / np.pi) / self.tau
assert_negative_support(tau, "tau", "HalfNormal")
@@ -882,7 +884,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -891,7 +893,7 @@ def logp(self, value):
tau = self.tau
sigma = self.sigma
return bound(
- -0.5 * tau * value ** 2 + 0.5 * tt.log(tau * 2.0 / np.pi),
+ -0.5 * tau * value ** 2 + 0.5 * aet.log(tau * 2.0 / np.pi),
value >= 0,
tau > 0,
sigma > 0,
@@ -907,9 +909,9 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
@@ -918,7 +920,7 @@ def logcdf(self, value):
sigma = self.sigma
z = zvalue(value, mu=0, sigma=sigma)
return bound(
- tt.log1p(-tt.erfc(z / tt.sqrt(2.0))),
+ aet.log1p(-aet.erfc(z / aet.sqrt(2.0))),
0 <= value,
0 < sigma,
)
@@ -1005,14 +1007,14 @@ class Wald(PositiveContinuous):
def __init__(self, mu=None, lam=None, phi=None, alpha=0.0, *args, **kwargs):
super().__init__(*args, **kwargs)
mu, lam, phi = self.get_mu_lam_phi(mu, lam, phi)
- self.alpha = alpha = tt.as_tensor_variable(floatX(alpha))
- self.mu = mu = tt.as_tensor_variable(floatX(mu))
- self.lam = lam = tt.as_tensor_variable(floatX(lam))
- self.phi = phi = tt.as_tensor_variable(floatX(phi))
+ self.alpha = alpha = aet.as_tensor_variable(floatX(alpha))
+ self.mu = mu = aet.as_tensor_variable(floatX(mu))
+ self.lam = lam = aet.as_tensor_variable(floatX(lam))
+ self.phi = phi = aet.as_tensor_variable(floatX(phi))
self.mean = self.mu + self.alpha
self.mode = (
- self.mu * (tt.sqrt(1.0 + (1.5 * self.mu / self.lam) ** 2) - 1.5 * self.mu / self.lam)
+ self.mu * (aet.sqrt(1.0 + (1.5 * self.mu / self.lam) ** 2) - 1.5 * self.mu / self.lam)
+ self.alpha
)
self.variance = (self.mu ** 3) / self.lam
@@ -1080,7 +1082,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -1113,9 +1115,9 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
@@ -1129,29 +1131,29 @@ def logcdf(self, value):
value -= alpha
q = value / mu
l = lam * mu
- r = tt.sqrt(value * lam)
+ r = aet.sqrt(value * lam)
a = normal_lcdf(0, 1, (q - 1.0) / r)
b = 2.0 / l + normal_lcdf(0, 1, -(q + 1.0) / r)
left_limit = (
- tt.lt(value, 0)
- | (tt.eq(value, 0) & tt.gt(mu, 0) & tt.lt(lam, np.inf))
- | (tt.lt(value, mu) & tt.eq(lam, 0))
+ aet.lt(value, 0)
+ | (aet.eq(value, 0) & aet.gt(mu, 0) & aet.lt(lam, np.inf))
+ | (aet.lt(value, mu) & aet.eq(lam, 0))
)
right_limit = (
- tt.eq(value, np.inf)
- | (tt.eq(lam, 0) & tt.gt(value, mu))
- | (tt.gt(value, 0) & tt.eq(lam, np.inf))
+ aet.eq(value, np.inf)
+ | (aet.eq(lam, 0) & aet.gt(value, mu))
+ | (aet.gt(value, 0) & aet.eq(lam, np.inf))
)
- degenerate_dist = (tt.lt(mu, np.inf) & tt.eq(mu, value) & tt.eq(lam, 0)) | (
- tt.eq(value, 0) & tt.eq(lam, np.inf)
+ degenerate_dist = (aet.lt(mu, np.inf) & aet.eq(mu, value) & aet.eq(lam, 0)) | (
+ aet.eq(value, 0) & aet.eq(lam, np.inf)
)
return bound(
- tt.switch(
+ aet.switch(
~(right_limit | degenerate_dist),
- a + tt.log1p(tt.exp(b - a)),
+ a + aet.log1p(aet.exp(b - a)),
0,
),
~left_limit,
@@ -1229,8 +1231,8 @@ def __init__(self, alpha=None, beta=None, mu=None, sigma=None, sd=None, *args, *
if sd is not None:
sigma = sd
alpha, beta = self.get_alpha_beta(alpha, beta, mu, sigma)
- self.alpha = alpha = tt.as_tensor_variable(floatX(alpha))
- self.beta = beta = tt.as_tensor_variable(floatX(beta))
+ self.alpha = alpha = aet.as_tensor_variable(floatX(alpha))
+ self.beta = beta = aet.as_tensor_variable(floatX(beta))
self.mean = self.alpha / (self.alpha + self.beta)
self.variance = (
@@ -1283,7 +1285,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -1292,11 +1294,11 @@ def logp(self, value):
alpha = self.alpha
beta = self.beta
- logval = tt.log(value)
- log1pval = tt.log1p(-value)
+ logval = aet.log(value)
+ log1pval = aet.log1p(-value)
logp = (
- tt.switch(tt.eq(alpha, 1), 0, (alpha - 1) * logval)
- + tt.switch(tt.eq(beta, 1), 0, (beta - 1) * log1pval)
+ aet.switch(aet.eq(alpha, 1), 0, (alpha - 1) * logval)
+ + aet.switch(aet.eq(beta, 1), 0, (beta - 1) * log1pval)
- betaln(alpha, beta)
)
@@ -1326,9 +1328,9 @@ def logcdf(self, value):
b = self.beta
return bound(
- tt.switch(
- tt.lt(value, 1),
- tt.log(incomplete_beta(a, b, value)),
+ aet.switch(
+ aet.lt(value, 1),
+ aet.log(incomplete_beta(a, b, value)),
0,
),
0 <= value,
@@ -1385,15 +1387,15 @@ class Kumaraswamy(UnitContinuous):
def __init__(self, a, b, *args, **kwargs):
super().__init__(*args, **kwargs)
- self.a = a = tt.as_tensor_variable(floatX(a))
- self.b = b = tt.as_tensor_variable(floatX(b))
+ self.a = a = aet.as_tensor_variable(floatX(a))
+ self.b = b = aet.as_tensor_variable(floatX(b))
- ln_mean = tt.log(b) + tt.gammaln(1 + 1 / a) + tt.gammaln(b) - tt.gammaln(1 + 1 / a + b)
- self.mean = tt.exp(ln_mean)
+ ln_mean = aet.log(b) + aet.gammaln(1 + 1 / a) + aet.gammaln(b) - aet.gammaln(1 + 1 / a + b)
+ self.mean = aet.exp(ln_mean)
ln_2nd_raw_moment = (
- tt.log(b) + tt.gammaln(1 + 2 / a) + tt.gammaln(b) - tt.gammaln(1 + 2 / a + b)
+ aet.log(b) + aet.gammaln(1 + 2 / a) + aet.gammaln(b) - aet.gammaln(1 + 2 / a + b)
)
- self.variance = tt.exp(ln_2nd_raw_moment) - self.mean ** 2
+ self.variance = aet.exp(ln_2nd_raw_moment) - self.mean ** 2
assert_negative_support(a, "a", "Kumaraswamy")
assert_negative_support(b, "b", "Kumaraswamy")
@@ -1430,7 +1432,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -1439,7 +1441,9 @@ def logp(self, value):
a = self.a
b = self.b
- logp = tt.log(a) + tt.log(b) + (a - 1) * tt.log(value) + (b - 1) * tt.log(1 - value ** a)
+ logp = (
+ aet.log(a) + aet.log(b) + (a - 1) * aet.log(value) + (b - 1) * aet.log(1 - value ** a)
+ )
return bound(logp, value >= 0, value <= 1, a > 0, b > 0)
@@ -1483,10 +1487,10 @@ class Exponential(PositiveContinuous):
def __init__(self, lam, *args, **kwargs):
super().__init__(*args, **kwargs)
- self.lam = lam = tt.as_tensor_variable(floatX(lam))
+ self.lam = lam = aet.as_tensor_variable(floatX(lam))
self.mean = 1.0 / self.lam
- self.median = self.mean * tt.log(2)
- self.mode = tt.zeros_like(self.lam)
+ self.median = self.mean * aet.log(2)
+ self.mode = aet.zeros_like(self.lam)
self.variance = self.lam ** -2
@@ -1522,14 +1526,14 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
TensorVariable
"""
lam = self.lam
- return bound(tt.log(lam) - lam * value, value >= 0, lam > 0)
+ return bound(aet.log(lam) - lam * value, value >= 0, lam > 0)
def logcdf(self, value):
r"""
@@ -1538,15 +1542,15 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
TensorVariable
"""
- value = floatX(tt.as_tensor(value))
+ value = floatX(aet.as_tensor(value))
lam = self.lam
a = lam * value
return bound(
@@ -1600,8 +1604,8 @@ class Laplace(Continuous):
def __init__(self, mu, b, *args, **kwargs):
super().__init__(*args, **kwargs)
- self.b = b = tt.as_tensor_variable(floatX(b))
- self.mean = self.median = self.mode = self.mu = mu = tt.as_tensor_variable(floatX(mu))
+ self.b = b = aet.as_tensor_variable(floatX(b))
+ self.mean = self.median = self.mode = self.mu = mu = aet.as_tensor_variable(floatX(mu))
self.variance = 2 * self.b ** 2
@@ -1635,7 +1639,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -1644,7 +1648,7 @@ def logp(self, value):
mu = self.mu
b = self.b
- return -tt.log(2 * b) - abs(value - mu) / b
+ return -aet.log(2 * b) - abs(value - mu) / b
def logcdf(self, value):
"""
@@ -1653,9 +1657,9 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
@@ -1665,13 +1669,13 @@ def logcdf(self, value):
b = self.b
y = (value - a) / b
return bound(
- tt.switch(
- tt.le(value, a),
- tt.log(0.5) + y,
- tt.switch(
- tt.gt(y, 1),
- tt.log1p(-0.5 * tt.exp(-y)),
- tt.log(1 - 0.5 * tt.exp(-y)),
+ aet.switch(
+ aet.le(value, a),
+ aet.log(0.5) + y,
+ aet.switch(
+ aet.gt(y, 1),
+ aet.log1p(-0.5 * aet.exp(-y)),
+ aet.log(1 - 0.5 * aet.exp(-y)),
),
),
0 < b,
@@ -1715,9 +1719,9 @@ class AsymmetricLaplace(Continuous):
"""
def __init__(self, b, kappa, mu=0, *args, **kwargs):
- self.b = tt.as_tensor_variable(floatX(b))
- self.kappa = tt.as_tensor_variable(floatX(kappa))
- self.mu = mu = tt.as_tensor_variable(floatX(mu))
+ self.b = aet.as_tensor_variable(floatX(b))
+ self.kappa = aet.as_tensor_variable(floatX(kappa))
+ self.mu = mu = aet.as_tensor_variable(floatX(mu))
self.mean = self.mu - (self.kappa - 1 / self.kappa) / b
self.variance = (1 + self.kappa ** 4) / (self.kappa ** 2 * self.b ** 2)
@@ -1763,7 +1767,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -1771,8 +1775,8 @@ def logp(self, value):
"""
value = value - self.mu
return bound(
- tt.log(self.b / (self.kappa + (self.kappa ** -1)))
- + (-value * self.b * tt.sgn(value) * (self.kappa ** tt.sgn(value))),
+ aet.log(self.b / (self.kappa + (self.kappa ** -1)))
+ + (-value * self.b * aet.sgn(value) * (self.kappa ** aet.sgn(value))),
0 < self.b,
0 < self.kappa,
)
@@ -1847,14 +1851,14 @@ def __init__(self, mu=0, sigma=None, tau=None, sd=None, *args, **kwargs):
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
- self.mu = mu = tt.as_tensor_variable(floatX(mu))
- self.tau = tau = tt.as_tensor_variable(tau)
- self.sigma = self.sd = sigma = tt.as_tensor_variable(sigma)
+ self.mu = mu = aet.as_tensor_variable(floatX(mu))
+ self.tau = tau = aet.as_tensor_variable(tau)
+ self.sigma = self.sd = sigma = aet.as_tensor_variable(sigma)
- self.mean = tt.exp(self.mu + 1.0 / (2 * self.tau))
- self.median = tt.exp(self.mu)
- self.mode = tt.exp(self.mu - 1.0 / self.tau)
- self.variance = (tt.exp(1.0 / self.tau) - 1) * tt.exp(2 * self.mu + 1.0 / self.tau)
+ self.mean = aet.exp(self.mu + 1.0 / (2 * self.tau))
+ self.median = aet.exp(self.mu)
+ self.mode = aet.exp(self.mu - 1.0 / self.tau)
+ self.variance = (aet.exp(1.0 / self.tau) - 1) * aet.exp(2 * self.mu + 1.0 / self.tau)
assert_negative_support(tau, "tau", "Lognormal")
assert_negative_support(sigma, "sigma", "Lognormal")
@@ -1891,7 +1895,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -1900,9 +1904,9 @@ def logp(self, value):
mu = self.mu
tau = self.tau
return bound(
- -0.5 * tau * (tt.log(value) - mu) ** 2
- + 0.5 * tt.log(tau / (2.0 * np.pi))
- - tt.log(value),
+ -0.5 * tau * (aet.log(value) - mu) ** 2
+ + 0.5 * aet.log(tau / (2.0 * np.pi))
+ - aet.log(value),
tau > 0,
)
@@ -1916,9 +1920,9 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
@@ -1929,7 +1933,7 @@ def logcdf(self, value):
tau = self.tau
return bound(
- normal_lcdf(mu, sigma, tt.log(value)),
+ normal_lcdf(mu, sigma, aet.log(value)),
0 < value,
0 < tau,
)
@@ -2002,13 +2006,13 @@ def __init__(self, nu, mu=0, lam=None, sigma=None, sd=None, *args, **kwargs):
super().__init__(*args, **kwargs)
if sd is not None:
sigma = sd
- self.nu = nu = tt.as_tensor_variable(floatX(nu))
+ self.nu = nu = aet.as_tensor_variable(floatX(nu))
lam, sigma = get_tau_sigma(tau=lam, sigma=sigma)
- self.lam = lam = tt.as_tensor_variable(lam)
- self.sigma = self.sd = sigma = tt.as_tensor_variable(sigma)
- self.mean = self.median = self.mode = self.mu = mu = tt.as_tensor_variable(mu)
+ self.lam = lam = aet.as_tensor_variable(lam)
+ self.sigma = self.sd = sigma = aet.as_tensor_variable(sigma)
+ self.mean = self.median = self.mode = self.mu = mu = aet.as_tensor_variable(mu)
- self.variance = tt.switch((nu > 2) * 1, (1 / self.lam) * (nu / (nu - 2)), np.inf)
+ self.variance = aet.switch((nu > 2) * 1, (1 / self.lam) * (nu / (nu - 2)), np.inf)
assert_negative_support(lam, "lam (sigma)", "StudentT")
assert_negative_support(nu, "nu", "StudentT")
@@ -2043,7 +2047,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -2056,9 +2060,9 @@ def logp(self, value):
return bound(
gammaln((nu + 1.0) / 2.0)
- + 0.5 * tt.log(lam / (nu * np.pi))
+ + 0.5 * aet.log(lam / (nu * np.pi))
- gammaln(nu / 2.0)
- - (nu + 1.0) / 2.0 * tt.log1p(lam * (value - mu) ** 2 / nu),
+ - (nu + 1.0) / 2.0 * aet.log1p(lam * (value - mu) ** 2 / nu),
lam > 0,
nu > 0,
sigma > 0,
@@ -2092,11 +2096,11 @@ def logcdf(self, value):
sigma = self.sigma
lam = self.lam
t = (value - mu) / sigma
- sqrt_t2_nu = tt.sqrt(t ** 2 + nu)
+ sqrt_t2_nu = aet.sqrt(t ** 2 + nu)
x = (t + sqrt_t2_nu) / (2.0 * sqrt_t2_nu)
return bound(
- tt.log(incomplete_beta(nu / 2.0, nu / 2.0, x)),
+ aet.log(incomplete_beta(nu / 2.0, nu / 2.0, x)),
0 < nu,
0 < sigma,
0 < lam,
@@ -2149,13 +2153,13 @@ class Pareto(Continuous):
"""
def __init__(self, alpha, m, transform="lowerbound", *args, **kwargs):
- self.alpha = alpha = tt.as_tensor_variable(floatX(alpha))
- self.m = m = tt.as_tensor_variable(floatX(m))
+ self.alpha = alpha = aet.as_tensor_variable(floatX(alpha))
+ self.m = m = aet.as_tensor_variable(floatX(m))
- self.mean = tt.switch(tt.gt(alpha, 1), alpha * m / (alpha - 1.0), np.inf)
+ self.mean = aet.switch(aet.gt(alpha, 1), alpha * m / (alpha - 1.0), np.inf)
self.median = m * 2.0 ** (1.0 / alpha)
- self.variance = tt.switch(
- tt.gt(alpha, 2), (alpha * m ** 2) / ((alpha - 2.0) * (alpha - 1.0) ** 2), np.inf
+ self.variance = aet.switch(
+ aet.gt(alpha, 2), (alpha * m ** 2) / ((alpha - 2.0) * (alpha - 1.0) ** 2), np.inf
)
assert_negative_support(alpha, "alpha", "Pareto")
@@ -2197,7 +2201,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -2206,7 +2210,7 @@ def logp(self, value):
alpha = self.alpha
m = self.m
return bound(
- tt.log(alpha) + logpow(m, alpha) - logpow(value, alpha + 1),
+ aet.log(alpha) + logpow(m, alpha) - logpow(value, alpha + 1),
value >= m,
alpha > 0,
m > 0,
@@ -2222,9 +2226,9 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
@@ -2234,10 +2238,10 @@ def logcdf(self, value):
alpha = self.alpha
arg = (m / value) ** alpha
return bound(
- tt.switch(
- tt.le(arg, 1e-5),
- tt.log1p(-arg),
- tt.log(1 - arg),
+ aet.switch(
+ aet.le(arg, 1e-5),
+ aet.log1p(-arg),
+ aet.log(1 - arg),
),
m <= value,
0 < alpha,
@@ -2292,8 +2296,8 @@ class Cauchy(Continuous):
def __init__(self, alpha, beta, *args, **kwargs):
super().__init__(*args, **kwargs)
- self.median = self.mode = self.alpha = tt.as_tensor_variable(floatX(alpha))
- self.beta = tt.as_tensor_variable(floatX(beta))
+ self.median = self.mode = self.alpha = aet.as_tensor_variable(floatX(alpha))
+ self.beta = aet.as_tensor_variable(floatX(beta))
assert_negative_support(beta, "beta", "Cauchy")
@@ -2329,7 +2333,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -2338,7 +2342,7 @@ def logp(self, value):
alpha = self.alpha
beta = self.beta
return bound(
- -tt.log(np.pi) - tt.log(beta) - tt.log1p(((value - alpha) / beta) ** 2), beta > 0
+ -aet.log(np.pi) - aet.log(beta) - aet.log1p(((value - alpha) / beta) ** 2), beta > 0
)
def logcdf(self, value):
@@ -2348,9 +2352,9 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
@@ -2359,7 +2363,7 @@ def logcdf(self, value):
alpha = self.alpha
beta = self.beta
return bound(
- tt.log(0.5 + tt.arctan((value - alpha) / beta) / np.pi),
+ aet.log(0.5 + aet.arctan((value - alpha) / beta) / np.pi),
0 < beta,
)
@@ -2404,8 +2408,8 @@ class HalfCauchy(PositiveContinuous):
def __init__(self, beta, *args, **kwargs):
super().__init__(*args, **kwargs)
- self.mode = tt.as_tensor_variable(0)
- self.median = self.beta = tt.as_tensor_variable(floatX(beta))
+ self.mode = aet.as_tensor_variable(0)
+ self.median = self.beta = aet.as_tensor_variable(floatX(beta))
assert_negative_support(beta, "beta", "HalfCauchy")
@@ -2441,7 +2445,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -2449,7 +2453,7 @@ def logp(self, value):
"""
beta = self.beta
return bound(
- tt.log(2) - tt.log(np.pi) - tt.log(beta) - tt.log1p((value / beta) ** 2),
+ aet.log(2) - aet.log(np.pi) - aet.log(beta) - aet.log1p((value / beta) ** 2),
value >= 0,
beta > 0,
)
@@ -2461,9 +2465,9 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
@@ -2471,7 +2475,7 @@ def logcdf(self, value):
"""
beta = self.beta
return bound(
- tt.log(2 * tt.arctan(value / beta) / np.pi),
+ aet.log(2 * aet.arctan(value / beta) / np.pi),
0 <= value,
0 < beta,
)
@@ -2541,10 +2545,10 @@ def __init__(self, alpha=None, beta=None, mu=None, sigma=None, sd=None, *args, *
sigma = sd
alpha, beta = self.get_alpha_beta(alpha, beta, mu, sigma)
- self.alpha = alpha = tt.as_tensor_variable(floatX(alpha))
- self.beta = beta = tt.as_tensor_variable(floatX(beta))
+ self.alpha = alpha = aet.as_tensor_variable(floatX(alpha))
+ self.beta = beta = aet.as_tensor_variable(floatX(beta))
self.mean = alpha / beta
- self.mode = tt.maximum((alpha - 1) / beta, 0)
+ self.mode = aet.maximum((alpha - 1) / beta, 0)
self.variance = alpha / beta ** 2
assert_negative_support(alpha, "alpha", "Gamma")
@@ -2595,7 +2599,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -2617,9 +2621,9 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
@@ -2628,12 +2632,12 @@ def logcdf(self, value):
alpha = self.alpha
beta = self.beta
# Avoid C-assertion when the gammainc function is called with invalid values (#4340)
- safe_alpha = tt.switch(tt.lt(alpha, 0), 0, alpha)
- safe_beta = tt.switch(tt.lt(beta, 0), 0, beta)
- safe_value = tt.switch(tt.lt(value, 0), 0, value)
+ safe_alpha = aet.switch(aet.lt(alpha, 0), 0, alpha)
+ safe_beta = aet.switch(aet.lt(beta, 0), 0, beta)
+ safe_value = aet.switch(aet.lt(value, 0), 0, value)
return bound(
- tt.log(tt.gammainc(safe_alpha, safe_beta * safe_value)),
+ aet.log(aet.gammainc(safe_alpha, safe_beta * safe_value)),
0 <= value,
0 < alpha,
0 < beta,
@@ -2698,13 +2702,13 @@ def __init__(self, alpha=None, beta=None, mu=None, sigma=None, sd=None, *args, *
sigma = sd
alpha, beta = InverseGamma._get_alpha_beta(alpha, beta, mu, sigma)
- self.alpha = alpha = tt.as_tensor_variable(floatX(alpha))
- self.beta = beta = tt.as_tensor_variable(floatX(beta))
+ self.alpha = alpha = aet.as_tensor_variable(floatX(alpha))
+ self.beta = beta = aet.as_tensor_variable(floatX(beta))
self.mean = self._calculate_mean()
self.mode = beta / (alpha + 1.0)
- self.variance = tt.switch(
- tt.gt(alpha, 2), (beta ** 2) / ((alpha - 2) * (alpha - 1.0) ** 2), np.inf
+ self.variance = aet.switch(
+ aet.gt(alpha, 2), (beta ** 2) / ((alpha - 2) * (alpha - 1.0) ** 2), np.inf
)
assert_negative_support(alpha, "alpha", "InverseGamma")
assert_negative_support(beta, "beta", "InverseGamma")
@@ -2766,7 +2770,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -2791,9 +2795,9 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
@@ -2802,12 +2806,12 @@ def logcdf(self, value):
alpha = self.alpha
beta = self.beta
# Avoid C-assertion when the gammaincc function is called with invalid values (#4340)
- safe_alpha = tt.switch(tt.lt(alpha, 0), 0, alpha)
- safe_beta = tt.switch(tt.lt(beta, 0), 0, beta)
- safe_value = tt.switch(tt.lt(value, 0), 0, value)
+ safe_alpha = aet.switch(aet.lt(alpha, 0), 0, alpha)
+ safe_beta = aet.switch(aet.lt(beta, 0), 0, beta)
+ safe_value = aet.switch(aet.lt(value, 0), 0, value)
return bound(
- tt.log(tt.gammaincc(safe_alpha, safe_beta / safe_value)),
+ aet.log(aet.gammaincc(safe_alpha, safe_beta / safe_value)),
0 <= value,
0 < alpha,
0 < beta,
@@ -2853,7 +2857,7 @@ class ChiSquared(Gamma):
"""
def __init__(self, nu, *args, **kwargs):
- self.nu = nu = tt.as_tensor_variable(floatX(nu))
+ self.nu = nu = aet.as_tensor_variable(floatX(nu))
super().__init__(alpha=nu / 2.0, beta=0.5, *args, **kwargs)
@@ -2903,12 +2907,12 @@ class Weibull(PositiveContinuous):
def __init__(self, alpha, beta, *args, **kwargs):
super().__init__(*args, **kwargs)
- self.alpha = alpha = tt.as_tensor_variable(floatX(alpha))
- self.beta = beta = tt.as_tensor_variable(floatX(beta))
- self.mean = beta * tt.exp(gammaln(1 + 1.0 / alpha))
- self.median = beta * tt.exp(gammaln(tt.log(2))) ** (1.0 / alpha)
- self.variance = beta ** 2 * tt.exp(gammaln(1 + 2.0 / alpha)) - self.mean ** 2
- self.mode = tt.switch(
+ self.alpha = alpha = aet.as_tensor_variable(floatX(alpha))
+ self.beta = beta = aet.as_tensor_variable(floatX(beta))
+ self.mean = beta * aet.exp(gammaln(1 + 1.0 / alpha))
+ self.median = beta * aet.exp(gammaln(aet.log(2))) ** (1.0 / alpha)
+ self.variance = beta ** 2 * aet.exp(gammaln(1 + 2.0 / alpha)) - self.mean ** 2
+ self.mode = aet.switch(
alpha >= 1, beta * ((alpha - 1) / alpha) ** (1 / alpha), 0
) # Reference: https://en.wikipedia.org/wiki/Weibull_distribution
@@ -2947,7 +2951,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -2956,9 +2960,9 @@ def logp(self, value):
alpha = self.alpha
beta = self.beta
return bound(
- tt.log(alpha)
- - tt.log(beta)
- + (alpha - 1) * tt.log(value / beta)
+ aet.log(alpha)
+ - aet.log(beta)
+ + (alpha - 1) * aet.log(value / beta)
- (value / beta) ** alpha,
value >= 0,
alpha > 0,
@@ -2972,9 +2976,9 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
@@ -3053,12 +3057,12 @@ def __init__(self, nu=1, sigma=None, lam=None, sd=None, *args, **kwargs):
if sd is not None:
sigma = sd
- self.mode = tt.as_tensor_variable(0)
+ self.mode = aet.as_tensor_variable(0)
lam, sigma = get_tau_sigma(lam, sigma)
- self.median = tt.as_tensor_variable(sigma)
- self.sigma = self.sd = tt.as_tensor_variable(sigma)
- self.lam = tt.as_tensor_variable(lam)
- self.nu = nu = tt.as_tensor_variable(floatX(nu))
+ self.median = aet.as_tensor_variable(sigma)
+ self.sigma = self.sd = aet.as_tensor_variable(sigma)
+ self.lam = aet.as_tensor_variable(lam)
+ self.nu = nu = aet.as_tensor_variable(floatX(nu))
assert_negative_support(sigma, "sigma", "HalfStudentT")
assert_negative_support(lam, "lam", "HalfStudentT")
@@ -3094,7 +3098,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -3105,11 +3109,11 @@ def logp(self, value):
lam = self.lam
return bound(
- tt.log(2)
+ aet.log(2)
+ gammaln((nu + 1.0) / 2.0)
- gammaln(nu / 2.0)
- - 0.5 * tt.log(nu * np.pi * sigma ** 2)
- - (nu + 1.0) / 2.0 * tt.log1p(value ** 2 / (nu * sigma ** 2)),
+ - 0.5 * aet.log(nu * np.pi * sigma ** 2)
+ - (nu + 1.0) / 2.0 * aet.log1p(value ** 2 / (nu * sigma ** 2)),
sigma > 0,
lam > 0,
nu > 0,
@@ -3191,9 +3195,9 @@ def __init__(self, mu=0.0, sigma=None, nu=None, sd=None, *args, **kwargs):
if sd is not None:
sigma = sd
- self.mu = mu = tt.as_tensor_variable(floatX(mu))
- self.sigma = self.sd = sigma = tt.as_tensor_variable(floatX(sigma))
- self.nu = nu = tt.as_tensor_variable(floatX(nu))
+ self.mu = mu = aet.as_tensor_variable(floatX(mu))
+ self.sigma = self.sd = sigma = aet.as_tensor_variable(floatX(sigma))
+ self.nu = nu = aet.as_tensor_variable(floatX(nu))
self.mean = mu + nu
self.variance = (sigma ** 2) + (nu ** 2)
@@ -3234,7 +3238,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -3246,10 +3250,10 @@ def logp(self, value):
# Alogithm is adapted from dexGAUS.R from gamlss
return bound(
- tt.switch(
- tt.gt(nu, 0.05 * sigma),
+ aet.switch(
+ aet.gt(nu, 0.05 * sigma),
(
- -tt.log(nu)
+ -aet.log(nu)
+ (mu - value) / nu
+ 0.5 * (sigma / nu) ** 2
+ normal_lcdf(mu + (sigma ** 2) / nu, sigma, value)
@@ -3273,9 +3277,9 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
@@ -3287,8 +3291,8 @@ def logcdf(self, value):
# Alogithm is adapted from pexGAUS.R from gamlss
return bound(
- tt.switch(
- tt.gt(nu, 0.05 * sigma),
+ aet.switch(
+ aet.gt(nu, 0.05 * sigma),
logdiffexp(
normal_lcdf(mu, sigma, value),
(
@@ -3355,8 +3359,8 @@ def __init__(self, mu=0.0, kappa=None, transform="circular", *args, **kwargs):
if transform == "circular":
transform = transforms.Circular()
super().__init__(transform=transform, *args, **kwargs)
- self.mean = self.median = self.mode = self.mu = mu = tt.as_tensor_variable(floatX(mu))
- self.kappa = kappa = tt.as_tensor_variable(floatX(kappa))
+ self.mean = self.median = self.mode = self.mu = mu = aet.as_tensor_variable(floatX(mu))
+ self.kappa = kappa = aet.as_tensor_variable(floatX(kappa))
assert_negative_support(kappa, "kappa", "VonMises")
@@ -3390,7 +3394,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -3399,7 +3403,7 @@ def logp(self, value):
mu = self.mu
kappa = self.kappa
return bound(
- kappa * tt.cos(mu - value) - (tt.log(2 * np.pi) + log_i0(kappa)),
+ kappa * aet.cos(mu - value) - (aet.log(2 * np.pi) + log_i0(kappa)),
kappa > 0,
value >= -np.pi,
value <= np.pi,
@@ -3474,11 +3478,11 @@ def __init__(self, mu=0.0, sigma=None, tau=None, alpha=1, sd=None, *args, **kwar
sigma = sd
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
- self.mu = mu = tt.as_tensor_variable(floatX(mu))
- self.tau = tt.as_tensor_variable(tau)
- self.sigma = self.sd = tt.as_tensor_variable(sigma)
+ self.mu = mu = aet.as_tensor_variable(floatX(mu))
+ self.tau = aet.as_tensor_variable(tau)
+ self.sigma = self.sd = aet.as_tensor_variable(sigma)
- self.alpha = alpha = tt.as_tensor_variable(floatX(alpha))
+ self.alpha = alpha = aet.as_tensor_variable(floatX(alpha))
self.mean = mu + self.sigma * (2 / np.pi) ** 0.5 * alpha / (1 + alpha ** 2) ** 0.5
self.variance = self.sigma ** 2 * (1 - (2 * alpha ** 2) / ((1 + alpha ** 2) * np.pi))
@@ -3518,7 +3522,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -3529,8 +3533,8 @@ def logp(self, value):
mu = self.mu
alpha = self.alpha
return bound(
- tt.log(1 + tt.erf(((value - mu) * tt.sqrt(tau) * alpha) / tt.sqrt(2)))
- + (-tau * (value - mu) ** 2 + tt.log(tau / np.pi / 2.0)) / 2.0,
+ aet.log(1 + aet.erf(((value - mu) * aet.sqrt(tau) * alpha) / aet.sqrt(2)))
+ + (-tau * (value - mu) ** 2 + aet.log(tau / np.pi / 2.0)) / 2.0,
tau > 0,
sigma > 0,
)
@@ -3594,9 +3598,9 @@ class Triangular(BoundedContinuous):
"""
def __init__(self, lower=0, upper=1, c=0.5, *args, **kwargs):
- self.median = self.mean = self.c = c = tt.as_tensor_variable(floatX(c))
- self.lower = lower = tt.as_tensor_variable(floatX(lower))
- self.upper = upper = tt.as_tensor_variable(floatX(upper))
+ self.median = self.mean = self.c = c = aet.as_tensor_variable(floatX(c))
+ self.lower = lower = aet.as_tensor_variable(floatX(lower))
+ self.upper = upper = aet.as_tensor_variable(floatX(upper))
super().__init__(lower=lower, upper=upper, *args, **kwargs)
@@ -3639,7 +3643,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -3649,10 +3653,10 @@ def logp(self, value):
lower = self.lower
upper = self.upper
return bound(
- tt.switch(
- tt.lt(value, c),
- tt.log(2 * (value - lower) / ((upper - lower) * (c - lower))),
- tt.log(2 * (upper - value) / ((upper - lower) * (upper - c))),
+ aet.switch(
+ aet.lt(value, c),
+ aet.log(2 * (value - lower) / ((upper - lower) * (c - lower))),
+ aet.log(2 * (upper - value) / ((upper - lower) * (upper - c))),
),
lower <= value,
value <= upper,
@@ -3665,9 +3669,9 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
@@ -3677,15 +3681,15 @@ def logcdf(self, value):
lower = self.lower
upper = self.upper
return bound(
- tt.switch(
- tt.le(value, lower),
+ aet.switch(
+ aet.le(value, lower),
-np.inf,
- tt.switch(
- tt.le(value, c),
- tt.log(((value - lower) ** 2) / ((upper - lower) * (c - lower))),
- tt.switch(
- tt.lt(value, upper),
- tt.log1p(-((upper - value) ** 2) / ((upper - lower) * (upper - c))),
+ aet.switch(
+ aet.le(value, c),
+ aet.log(((value - lower) ** 2) / ((upper - lower) * (c - lower))),
+ aet.switch(
+ aet.lt(value, upper),
+ aet.log1p(-((upper - value) ** 2) / ((upper - lower) * (upper - c))),
0,
),
),
@@ -3743,13 +3747,13 @@ class Gumbel(Continuous):
"""
def __init__(self, mu=0, beta=1.0, **kwargs):
- self.mu = tt.as_tensor_variable(floatX(mu))
- self.beta = tt.as_tensor_variable(floatX(beta))
+ self.mu = aet.as_tensor_variable(floatX(mu))
+ self.beta = aet.as_tensor_variable(floatX(beta))
assert_negative_support(beta, "beta", "Gumbel")
self.mean = self.mu + self.beta * np.euler_gamma
- self.median = self.mu - self.beta * tt.log(tt.log(2))
+ self.median = self.mu - self.beta * aet.log(aet.log(2))
self.mode = self.mu
self.variance = (np.pi ** 2 / 6.0) * self.beta ** 2
@@ -3785,7 +3789,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -3795,7 +3799,7 @@ def logp(self, value):
beta = self.beta
scaled = (value - mu) / beta
return bound(
- -scaled - tt.exp(-scaled) - tt.log(self.beta),
+ -scaled - aet.exp(-scaled) - aet.log(self.beta),
0 < beta,
)
@@ -3806,9 +3810,9 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
@@ -3818,7 +3822,7 @@ def logcdf(self, value):
mu = self.mu
return bound(
- -tt.exp(-(value - mu) / beta),
+ -aet.exp(-(value - mu) / beta),
0 < beta,
)
@@ -3888,18 +3892,18 @@ def __init__(self, nu=None, sigma=None, b=None, sd=None, *args, **kwargs):
sigma = sd
nu, b, sigma = self.get_nu_b(nu, b, sigma)
- self.nu = nu = tt.as_tensor_variable(floatX(nu))
- self.sigma = self.sd = sigma = tt.as_tensor_variable(floatX(sigma))
- self.b = b = tt.as_tensor_variable(floatX(b))
+ self.nu = nu = aet.as_tensor_variable(floatX(nu))
+ self.sigma = self.sd = sigma = aet.as_tensor_variable(floatX(sigma))
+ self.b = b = aet.as_tensor_variable(floatX(b))
nu_sigma_ratio = -(nu ** 2) / (2 * sigma ** 2)
self.mean = (
sigma
* np.sqrt(np.pi / 2)
- * tt.exp(nu_sigma_ratio / 2)
+ * aet.exp(nu_sigma_ratio / 2)
* (
- (1 - nu_sigma_ratio) * tt.i0(-nu_sigma_ratio / 2)
- - nu_sigma_ratio * tt.i1(-nu_sigma_ratio / 2)
+ (1 - nu_sigma_ratio) * aet.i0(-nu_sigma_ratio / 2)
+ - nu_sigma_ratio * aet.i1(-nu_sigma_ratio / 2)
)
)
self.variance = (
@@ -3907,10 +3911,10 @@ def __init__(self, nu=None, sigma=None, b=None, sd=None, *args, **kwargs):
+ nu ** 2
- (np.pi * sigma ** 2 / 2)
* (
- tt.exp(nu_sigma_ratio / 2)
+ aet.exp(nu_sigma_ratio / 2)
* (
- (1 - nu_sigma_ratio) * tt.i0(-nu_sigma_ratio / 2)
- - nu_sigma_ratio * tt.i1(-nu_sigma_ratio / 2)
+ (1 - nu_sigma_ratio) * aet.i0(-nu_sigma_ratio / 2)
+ - nu_sigma_ratio * aet.i1(-nu_sigma_ratio / 2)
)
)
** 2
@@ -3963,7 +3967,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -3974,7 +3978,7 @@ def logp(self, value):
b = self.b
x = value / sigma
return bound(
- tt.log(x * tt.exp((-(x - b) * (x - b)) / 2) * i0e(x * b) / sigma),
+ aet.log(x * aet.exp((-(x - b) * (x - b)) / 2) * i0e(x * b) / sigma),
sigma >= 0,
nu >= 0,
value > 0,
@@ -4030,8 +4034,8 @@ class Logistic(Continuous):
def __init__(self, mu=0.0, s=1.0, *args, **kwargs):
super().__init__(*args, **kwargs)
- self.mu = tt.as_tensor_variable(floatX(mu))
- self.s = tt.as_tensor_variable(floatX(s))
+ self.mu = aet.as_tensor_variable(floatX(mu))
+ self.s = aet.as_tensor_variable(floatX(s))
self.mean = self.mode = mu
self.variance = s ** 2 * np.pi ** 2 / 3.0
@@ -4067,7 +4071,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -4077,7 +4081,7 @@ def logp(self, value):
s = self.s
return bound(
- -(value - mu) / s - tt.log(s) - 2 * tt.log1p(tt.exp(-(value - mu) / s)),
+ -(value - mu) / s - aet.log(s) - 2 * aet.log1p(aet.exp(-(value - mu) / s)),
s > 0,
)
@@ -4088,9 +4092,9 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
@@ -4151,10 +4155,10 @@ class LogitNormal(UnitContinuous):
def __init__(self, mu=0, sigma=None, tau=None, sd=None, **kwargs):
if sd is not None:
sigma = sd
- self.mu = mu = tt.as_tensor_variable(floatX(mu))
+ self.mu = mu = aet.as_tensor_variable(floatX(mu))
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
- self.sigma = self.sd = tt.as_tensor_variable(sigma)
- self.tau = tau = tt.as_tensor_variable(tau)
+ self.sigma = self.sd = aet.as_tensor_variable(sigma)
+ self.tau = tau = aet.as_tensor_variable(tau)
self.median = invlogit(mu)
assert_negative_support(sigma, "sigma", "LogitNormal")
@@ -4192,7 +4196,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -4202,8 +4206,8 @@ def logp(self, value):
tau = self.tau
return bound(
-0.5 * tau * (logit(value) - mu) ** 2
- + 0.5 * tt.log(tau / (2.0 * np.pi))
- - tt.log(value * (1 - value)),
+ + 0.5 * aet.log(tau / (2.0 * np.pi))
+ - aet.log(value * (1 - value)),
value > 0,
value < 1,
tau > 0,
@@ -4242,15 +4246,15 @@ class Interpolated(BoundedContinuous):
"""
def __init__(self, x_points, pdf_points, *args, **kwargs):
- self.lower = lower = tt.as_tensor_variable(x_points[0])
- self.upper = upper = tt.as_tensor_variable(x_points[-1])
+ self.lower = lower = aet.as_tensor_variable(x_points[0])
+ self.upper = upper = aet.as_tensor_variable(x_points[-1])
super().__init__(lower=lower, upper=upper, *args, **kwargs)
interp = InterpolatedUnivariateSpline(x_points, pdf_points, k=1, ext="zeros")
Z = interp.integral(x_points[0], x_points[-1])
- self.Z = tt.as_tensor_variable(Z)
+ self.Z = aet.as_tensor_variable(Z)
self.interp_op = SplineWrapper(interp)
self.x_points = x_points
self.pdf_points = pdf_points / Z
@@ -4301,13 +4305,13 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
TensorVariable
"""
- return tt.log(self.interp_op(value) / self.Z)
+ return aet.log(self.interp_op(value) / self.Z)
def _distr_parameters_for_repr(self):
return []
@@ -4361,13 +4365,13 @@ class Moyal(Continuous):
"""
def __init__(self, mu=0, sigma=1.0, *args, **kwargs):
- self.mu = tt.as_tensor_variable(floatX(mu))
- self.sigma = tt.as_tensor_variable(floatX(sigma))
+ self.mu = aet.as_tensor_variable(floatX(mu))
+ self.sigma = aet.as_tensor_variable(floatX(sigma))
assert_negative_support(sigma, "sigma", "Moyal")
- self.mean = self.mu + self.sigma * (np.euler_gamma + tt.log(2))
- self.median = self.mu - self.sigma * tt.log(2 * tt.erfcinv(1 / 2) ** 2)
+ self.mean = self.mu + self.sigma * (np.euler_gamma + aet.log(2))
+ self.median = self.mu - self.sigma * aet.log(2 * aet.erfcinv(1 / 2) ** 2)
self.mode = self.mu
self.variance = (np.pi ** 2 / 2.0) * self.sigma ** 2
@@ -4403,7 +4407,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -4413,7 +4417,11 @@ def logp(self, value):
sigma = self.sigma
scaled = (value - mu) / sigma
return bound(
- (-(1 / 2) * (scaled + tt.exp(-scaled)) - tt.log(sigma) - (1 / 2) * tt.log(2 * np.pi)),
+ (
+ -(1 / 2) * (scaled + aet.exp(-scaled))
+ - aet.log(sigma)
+ - (1 / 2) * aet.log(2 * np.pi)
+ ),
0 < sigma,
)
@@ -4424,9 +4432,9 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
@@ -4437,6 +4445,6 @@ def logcdf(self, value):
scaled = (value - mu) / sigma
return bound(
- tt.log(tt.erfc(tt.exp(-scaled / 2) * (2 ** -0.5))),
+ aet.log(aet.erfc(aet.exp(-scaled / 2) * (2 ** -0.5))),
0 < sigma,
)
diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index 0bac6fd6b2..06cd504f40 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -14,11 +14,12 @@
import warnings
+import aesara.tensor as aet
import numpy as np
-import theano.tensor as tt
from scipy import stats
+from pymc3.aesaraf import floatX, intX, take_along_axis
from pymc3.distributions.dist_math import (
betaln,
binomln,
@@ -34,7 +35,6 @@
from pymc3.distributions.distribution import Discrete, draw_values, generate_samples
from pymc3.distributions.shape_utils import broadcast_distribution_samples
from pymc3.math import log1mexp, log1pexp, logaddexp, logit, logsumexp, sigmoid, tround
-from pymc3.theanof import floatX, intX, take_along_axis
__all__ = [
"Binomial",
@@ -100,9 +100,9 @@ class Binomial(Discrete):
def __init__(self, n, p, *args, **kwargs):
super().__init__(*args, **kwargs)
- self.n = n = tt.as_tensor_variable(intX(n))
- self.p = p = tt.as_tensor_variable(floatX(p))
- self.mode = tt.cast(tround(n * p), self.dtype)
+ self.n = n = aet.as_tensor_variable(intX(n))
+ self.p = p = aet.as_tensor_variable(floatX(p))
+ self.mode = aet.cast(tround(n * p), self.dtype)
def random(self, point=None, size=None):
r"""
@@ -132,7 +132,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -171,12 +171,12 @@ def logcdf(self, value):
n = self.n
p = self.p
- value = tt.floor(value)
+ value = aet.floor(value)
return bound(
- tt.switch(
- tt.lt(value, n),
- tt.log(incomplete_beta(n - value, value + 1, 1 - p)),
+ aet.switch(
+ aet.lt(value, n),
+ aet.log(incomplete_beta(n - value, value + 1, 1 - p)),
0,
),
0 <= value,
@@ -243,10 +243,10 @@ def BetaBinom(a, b, n, x):
def __init__(self, alpha, beta, n, *args, **kwargs):
super().__init__(*args, **kwargs)
- self.alpha = alpha = tt.as_tensor_variable(floatX(alpha))
- self.beta = beta = tt.as_tensor_variable(floatX(beta))
- self.n = n = tt.as_tensor_variable(intX(n))
- self.mode = tt.cast(tround(alpha / (alpha + beta)), "int8")
+ self.alpha = alpha = aet.as_tensor_variable(floatX(alpha))
+ self.beta = beta = aet.as_tensor_variable(floatX(beta))
+ self.n = n = aet.as_tensor_variable(intX(n))
+ self.mode = aet.cast(tround(alpha / (alpha + beta)), "int8")
def _random(self, alpha, beta, n, size=None):
size = size or ()
@@ -300,7 +300,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -340,12 +340,12 @@ def logcdf(self, value):
alpha = self.alpha
beta = self.beta
n = self.n
- safe_lower = tt.switch(tt.lt(value, 0), value, 0)
+ safe_lower = aet.switch(aet.lt(value, 0), value, 0)
return bound(
- tt.switch(
- tt.lt(value, n),
- logsumexp(self.logp(tt.arange(safe_lower, value + 1)), keepdims=False),
+ aet.switch(
+ aet.lt(value, n),
+ logsumexp(self.logp(aet.arange(safe_lower, value + 1)), keepdims=False),
0,
),
0 <= value,
@@ -401,14 +401,14 @@ def __init__(self, p=None, logit_p=None, *args, **kwargs):
raise ValueError("Specify one of p and logit_p")
if p is not None:
self._is_logit = False
- self.p = p = tt.as_tensor_variable(floatX(p))
+ self.p = p = aet.as_tensor_variable(floatX(p))
self._logit_p = logit(p)
else:
self._is_logit = True
- self.p = tt.nnet.sigmoid(floatX(logit_p))
- self._logit_p = tt.as_tensor_variable(logit_p)
+ self.p = aet.nnet.sigmoid(floatX(logit_p))
+ self._logit_p = aet.as_tensor_variable(logit_p)
- self.mode = tt.cast(tround(self.p), "int8")
+ self.mode = aet.cast(tround(self.p), "int8")
def random(self, point=None, size=None):
r"""
@@ -438,19 +438,23 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
TensorVariable
"""
if self._is_logit:
- lp = tt.switch(value, self._logit_p, -self._logit_p)
+ lp = aet.switch(value, self._logit_p, -self._logit_p)
return -log1pexp(-lp)
else:
p = self.p
return bound(
- tt.switch(value, tt.log(p), tt.log(1 - p)), value >= 0, value <= 1, p >= 0, p <= 1
+ aet.switch(value, aet.log(p), aet.log(1 - p)),
+ value >= 0,
+ value <= 1,
+ p >= 0,
+ p <= 1,
)
def logcdf(self, value):
@@ -460,9 +464,9 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
@@ -471,9 +475,9 @@ def logcdf(self, value):
p = self.p
return bound(
- tt.switch(
- tt.lt(value, 1),
- tt.log1p(-p),
+ aet.switch(
+ aet.lt(value, 1),
+ aet.log1p(-p),
0,
),
0 <= value,
@@ -527,8 +531,8 @@ def DiscreteWeibull(q, b, x):
def __init__(self, q, beta, *args, **kwargs):
super().__init__(*args, defaults=("median",), **kwargs)
- self.q = tt.as_tensor_variable(floatX(q))
- self.beta = tt.as_tensor_variable(floatX(beta))
+ self.q = aet.as_tensor_variable(floatX(q))
+ self.beta = aet.as_tensor_variable(floatX(beta))
self.median = self._ppf(0.5)
@@ -540,7 +544,7 @@ def _ppf(self, p):
q = self.q
beta = self.beta
- return (tt.ceil(tt.power(tt.log(1 - p) / tt.log(q), 1.0 / beta)) - 1).astype("int64")
+ return (aet.ceil(aet.power(aet.log(1 - p) / aet.log(q), 1.0 / beta)) - 1).astype("int64")
def _random(self, q, beta, size=None):
p = np.random.uniform(size=size)
@@ -576,7 +580,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -585,7 +589,9 @@ def logp(self, value):
q = self.q
beta = self.beta
return bound(
- tt.log(tt.power(q, tt.power(value, beta)) - tt.power(q, tt.power(value + 1, beta))),
+ aet.log(
+ aet.power(q, aet.power(value, beta)) - aet.power(q, aet.power(value + 1, beta))
+ ),
0 <= value,
0 < q,
q < 1,
@@ -599,9 +605,9 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
@@ -611,7 +617,7 @@ def logcdf(self, value):
beta = self.beta
return bound(
- tt.log1p(-tt.power(q, tt.power(value + 1, beta))),
+ aet.log1p(-aet.power(q, aet.power(value + 1, beta))),
0 <= value,
0 < q,
q < 1,
@@ -665,8 +671,8 @@ class Poisson(Discrete):
def __init__(self, mu, *args, **kwargs):
super().__init__(*args, **kwargs)
- self.mu = mu = tt.as_tensor_variable(floatX(mu))
- self.mode = intX(tt.floor(mu))
+ self.mu = mu = aet.as_tensor_variable(floatX(mu))
+ self.mode = intX(aet.floor(mu))
def random(self, point=None, size=None):
r"""
@@ -696,7 +702,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -705,7 +711,7 @@ def logp(self, value):
mu = self.mu
log_prob = bound(logpow(mu, value) - factln(value) - mu, mu >= 0, value >= 0)
# Return zero when mu and value are both zero
- return tt.switch(tt.eq(mu, 0) * tt.eq(value, 0), 0, log_prob)
+ return aet.switch(aet.eq(mu, 0) * aet.eq(value, 0), 0, log_prob)
def logcdf(self, value):
"""
@@ -714,22 +720,22 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
TensorVariable
"""
mu = self.mu
- value = tt.floor(value)
+ value = aet.floor(value)
# Avoid C-assertion when the gammaincc function is called with invalid values (#4340)
- safe_mu = tt.switch(tt.lt(mu, 0), 0, mu)
- safe_value = tt.switch(tt.lt(value, 0), 0, value)
+ safe_mu = aet.switch(aet.lt(mu, 0), 0, mu)
+ safe_value = aet.switch(aet.lt(value, 0), 0, value)
return bound(
- tt.log(tt.gammaincc(safe_value + 1, safe_mu)),
+ aet.log(aet.gammaincc(safe_value + 1, safe_mu)),
0 <= value,
0 <= mu,
)
@@ -800,16 +806,16 @@ def NegBinom(a, m, x):
def __init__(self, mu=None, alpha=None, p=None, n=None, *args, **kwargs):
super().__init__(*args, **kwargs)
mu, alpha = self.get_mu_alpha(mu, alpha, p, n)
- self.mu = mu = tt.as_tensor_variable(floatX(mu))
- self.alpha = alpha = tt.as_tensor_variable(floatX(alpha))
- self.mode = intX(tt.floor(mu))
+ self.mu = mu = aet.as_tensor_variable(floatX(mu))
+ self.alpha = alpha = aet.as_tensor_variable(floatX(alpha))
+ self.mode = intX(aet.floor(mu))
def get_mu_alpha(self, mu=None, alpha=None, p=None, n=None):
self._param_type = ["mu", "alpha"]
if alpha is None:
if n is not None:
self._param_type[1] = "n"
- self.n = tt.as_tensor_variable(intX(n))
+ self.n = aet.as_tensor_variable(intX(n))
alpha = n
else:
raise ValueError("Incompatible parametrization. Must specify either alpha or n.")
@@ -819,7 +825,7 @@ def get_mu_alpha(self, mu=None, alpha=None, p=None, n=None):
if mu is None:
if p is not None:
self._param_type[0] = "p"
- self.p = tt.as_tensor_variable(floatX(p))
+ self.p = aet.as_tensor_variable(floatX(p))
mu = alpha * (1 - p) / p
else:
raise ValueError("Incompatible parametrization. Must specify either mu or p.")
@@ -870,7 +876,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -888,7 +894,7 @@ def logp(self, value):
)
# Return Poisson when alpha gets very large.
- return tt.switch(tt.gt(alpha, 1e10), Poisson.dist(self.mu).logp(value), negbinom)
+ return aet.switch(aet.gt(alpha, 1e10), Poisson.dist(self.mu).logp(value), negbinom)
def logcdf(self, value):
"""
@@ -915,7 +921,7 @@ def logcdf(self, value):
p = alpha / (self.mu + alpha)
return bound(
- tt.log(incomplete_beta(alpha, tt.floor(value) + 1, p)),
+ aet.log(incomplete_beta(alpha, aet.floor(value) + 1, p)),
0 <= value,
0 < alpha,
0 <= p,
@@ -965,7 +971,7 @@ class Geometric(Discrete):
def __init__(self, p, *args, **kwargs):
super().__init__(*args, **kwargs)
- self.p = p = tt.as_tensor_variable(floatX(p))
+ self.p = p = aet.as_tensor_variable(floatX(p))
self.mode = 1
def random(self, point=None, size=None):
@@ -996,14 +1002,14 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
TensorVariable
"""
p = self.p
- return bound(tt.log(p) + logpow(1 - p, value - 1), 0 <= p, p <= 1, value >= 1)
+ return bound(aet.log(p) + logpow(1 - p, value - 1), 0 <= p, p <= 1, value >= 1)
def logcdf(self, value):
"""
@@ -1012,9 +1018,9 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
@@ -1023,7 +1029,7 @@ def logcdf(self, value):
p = self.p
return bound(
- log1mexp(-tt.log1p(-p) * value),
+ log1mexp(-aet.log1p(-p) * value),
0 <= value,
0 <= p,
p <= 1,
@@ -1081,7 +1087,7 @@ def __init__(self, N, k, n, *args, **kwargs):
self.N = intX(N)
self.k = intX(k)
self.n = intX(n)
- self.mode = intX(tt.floor((n + 1) * (k + 1) / (N + 2)))
+ self.mode = intX(aet.floor((n + 1) * (k + 1) / (N + 2)))
def random(self, point=None, size=None):
r"""
@@ -1120,7 +1126,7 @@ def logp(self, value):
----------
value : numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -1140,8 +1146,8 @@ def logp(self, value):
- betaln(tot + 1, 1)
)
# value in [max(0, n - N + k), min(k, n)]
- lower = tt.switch(tt.gt(n - N + k, 0), n - N + k, 0)
- upper = tt.switch(tt.lt(k, n), k, n)
+ lower = aet.switch(aet.gt(n - N + k, 0), n - N + k, 0)
+ upper = aet.switch(aet.lt(k, n), k, n)
return bound(result, lower <= value, value <= upper)
def logcdf(self, value):
@@ -1168,12 +1174,12 @@ def logcdf(self, value):
N = self.N
n = self.n
k = self.k
- safe_lower = tt.switch(tt.lt(value, 0), value, 0)
+ safe_lower = aet.switch(aet.lt(value, 0), value, 0)
return bound(
- tt.switch(
- tt.lt(value, n),
- logsumexp(self.logp(tt.arange(safe_lower, value + 1)), keepdims=False),
+ aet.switch(
+ aet.lt(value, n),
+ logsumexp(self.logp(aet.arange(safe_lower, value + 1)), keepdims=False),
0,
),
0 <= value,
@@ -1226,9 +1232,9 @@ class DiscreteUniform(Discrete):
def __init__(self, lower, upper, *args, **kwargs):
super().__init__(*args, **kwargs)
- self.lower = intX(tt.floor(lower))
- self.upper = intX(tt.floor(upper))
- self.mode = tt.maximum(intX(tt.floor((upper + lower) / 2.0)), self.lower)
+ self.lower = intX(aet.floor(lower))
+ self.upper = intX(aet.floor(upper))
+ self.mode = aet.maximum(intX(aet.floor((upper + lower) / 2.0)), self.lower)
def _random(self, lower, upper, size=None):
# This way seems to be the only to deal with lower and upper
@@ -1264,7 +1270,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -1272,7 +1278,7 @@ def logp(self, value):
"""
upper = self.upper
lower = self.lower
- return bound(-tt.log(upper - lower + 1), lower <= value, value <= upper)
+ return bound(-aet.log(upper - lower + 1), lower <= value, value <= upper)
def logcdf(self, value):
"""
@@ -1281,9 +1287,9 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
@@ -1293,9 +1299,10 @@ def logcdf(self, value):
lower = self.lower
return bound(
- tt.switch(
- tt.lt(value, upper),
- tt.log(tt.minimum(tt.floor(value), upper) - lower + 1) - tt.log(upper - lower + 1),
+ aet.switch(
+ aet.lt(value, upper),
+ aet.log(aet.minimum(aet.floor(value), upper) - lower + 1)
+ - aet.log(upper - lower + 1),
0,
),
lower <= value,
@@ -1341,17 +1348,17 @@ class Categorical(Discrete):
def __init__(self, p, *args, **kwargs):
super().__init__(*args, **kwargs)
try:
- self.k = tt.shape(p)[-1].tag.test_value
+ self.k = aet.shape(p)[-1].tag.test_value
except AttributeError:
- self.k = tt.shape(p)[-1]
- p = tt.as_tensor_variable(floatX(p))
+ self.k = aet.shape(p)[-1]
+ p = aet.as_tensor_variable(floatX(p))
# From #2082, it may be dangerous to automatically rescale p at this
# point without checking for positiveness
self.p = p
- self.mode = tt.argmax(p, axis=-1)
+ self.mode = aet.argmax(p, axis=-1)
if self.mode.ndim == 1:
- self.mode = tt.squeeze(self.mode)
+ self.mode = aet.squeeze(self.mode)
def random(self, point=None, size=None):
r"""
@@ -1389,7 +1396,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -1399,27 +1406,27 @@ def logp(self, value):
k = self.k
# Clip values before using them for indexing
- value_clip = tt.clip(value, 0, k - 1)
+ value_clip = aet.clip(value, 0, k - 1)
- p = p_ / tt.sum(p_, axis=-1, keepdims=True)
+ p = p_ / aet.sum(p_, axis=-1, keepdims=True)
if p.ndim > 1:
if p.ndim > value_clip.ndim:
- value_clip = tt.shape_padleft(value_clip, p_.ndim - value_clip.ndim)
+ value_clip = aet.shape_padleft(value_clip, p_.ndim - value_clip.ndim)
elif p.ndim < value_clip.ndim:
- p = tt.shape_padleft(p, value_clip.ndim - p_.ndim)
+ p = aet.shape_padleft(p, value_clip.ndim - p_.ndim)
pattern = (p.ndim - 1,) + tuple(range(p.ndim - 1))
- a = tt.log(
+ a = aet.log(
take_along_axis(
p.dimshuffle(pattern),
value_clip,
)
)
else:
- a = tt.log(p[value_clip])
+ a = aet.log(p[value_clip])
return bound(
- a, value >= 0, value <= (k - 1), tt.all(p_ >= 0, axis=-1), tt.all(p <= 1, axis=-1)
+ a, value >= 0, value <= (k - 1), aet.all(p_ >= 0, axis=-1), aet.all(p <= 1, axis=-1)
)
@@ -1439,7 +1446,7 @@ def __init__(self, c, *args, **kwargs):
DeprecationWarning,
)
super().__init__(*args, **kwargs)
- self.mean = self.median = self.mode = self.c = c = tt.as_tensor_variable(c)
+ self.mean = self.median = self.mode = self.c = c = aet.as_tensor_variable(c)
def random(self, point=None, size=None):
r"""
@@ -1474,14 +1481,14 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
TensorVariable
"""
c = self.c
- return bound(0, tt.eq(value, c))
+ return bound(0, aet.eq(value, c))
ConstantDist = Constant
@@ -1539,8 +1546,8 @@ class ZeroInflatedPoisson(Discrete):
def __init__(self, psi, theta, *args, **kwargs):
super().__init__(*args, **kwargs)
- self.theta = theta = tt.as_tensor_variable(floatX(theta))
- self.psi = tt.as_tensor_variable(floatX(psi))
+ self.theta = theta = aet.as_tensor_variable(floatX(theta))
+ self.psi = aet.as_tensor_variable(floatX(psi))
self.pois = Poisson.dist(theta)
self.mode = self.pois.mode
@@ -1574,7 +1581,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -1583,10 +1590,10 @@ def logp(self, value):
psi = self.psi
theta = self.theta
- logp_val = tt.switch(
- tt.gt(value, 0),
- tt.log(psi) + self.pois.logp(value),
- logaddexp(tt.log1p(-psi), tt.log(psi) - theta),
+ logp_val = aet.switch(
+ aet.gt(value, 0),
+ aet.log(psi) + self.pois.logp(value),
+ logaddexp(aet.log1p(-psi), aet.log(psi) - theta),
)
return bound(logp_val, 0 <= value, 0 <= psi, psi <= 1, 0 <= theta)
@@ -1598,9 +1605,9 @@ def logcdf(self, value):
Parameters
----------
- value: numeric or np.ndarray or theano.tensor
+ value: numeric or np.ndarray or aesara.tensor
Value(s) for which log CDF is calculated. If the log CDF for multiple
- values are desired the values must be provided in a numpy array or theano tensor.
+ values are desired the values must be provided in a numpy array or aesara tensor.
Returns
-------
@@ -1609,7 +1616,7 @@ def logcdf(self, value):
psi = self.psi
return bound(
- logaddexp(tt.log1p(-psi), tt.log(psi) + self.pois.logcdf(value)),
+ logaddexp(aet.log1p(-psi), aet.log(psi) + self.pois.logcdf(value)),
0 <= value,
0 <= psi,
psi <= 1,
@@ -1669,9 +1676,9 @@ class ZeroInflatedBinomial(Discrete):
def __init__(self, psi, n, p, *args, **kwargs):
super().__init__(*args, **kwargs)
- self.n = n = tt.as_tensor_variable(intX(n))
- self.p = p = tt.as_tensor_variable(floatX(p))
- self.psi = psi = tt.as_tensor_variable(floatX(psi))
+ self.n = n = aet.as_tensor_variable(intX(n))
+ self.p = p = aet.as_tensor_variable(floatX(p))
+ self.psi = psi = aet.as_tensor_variable(floatX(psi))
self.bin = Binomial.dist(n, p)
self.mode = self.bin.mode
@@ -1705,7 +1712,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -1715,10 +1722,10 @@ def logp(self, value):
p = self.p
n = self.n
- logp_val = tt.switch(
- tt.gt(value, 0),
- tt.log(psi) + self.bin.logp(value),
- logaddexp(tt.log1p(-psi), tt.log(psi) + n * tt.log1p(-p)),
+ logp_val = aet.switch(
+ aet.gt(value, 0),
+ aet.log(psi) + self.bin.logp(value),
+ logaddexp(aet.log1p(-psi), aet.log(psi) + n * aet.log1p(-p)),
)
return bound(logp_val, 0 <= value, value <= n, 0 <= psi, psi <= 1, 0 <= p, p <= 1)
@@ -1746,7 +1753,7 @@ def logcdf(self, value):
psi = self.psi
return bound(
- logaddexp(tt.log1p(-psi), tt.log(psi) + self.bin.logcdf(value)),
+ logaddexp(aet.log1p(-psi), aet.log(psi) + self.bin.logcdf(value)),
0 <= value,
0 <= psi,
psi <= 1,
@@ -1823,9 +1830,9 @@ def ZeroInfNegBinom(a, m, psi, x):
def __init__(self, psi, mu, alpha, *args, **kwargs):
super().__init__(*args, **kwargs)
- self.mu = mu = tt.as_tensor_variable(floatX(mu))
- self.alpha = alpha = tt.as_tensor_variable(floatX(alpha))
- self.psi = psi = tt.as_tensor_variable(floatX(psi))
+ self.mu = mu = aet.as_tensor_variable(floatX(mu))
+ self.alpha = alpha = aet.as_tensor_variable(floatX(alpha))
+ self.psi = psi = aet.as_tensor_variable(floatX(psi))
self.nb = NegativeBinomial.dist(mu, alpha)
self.mode = self.nb.mode
@@ -1872,7 +1879,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -1882,12 +1889,12 @@ def logp(self, value):
mu = self.mu
psi = self.psi
- logp_other = tt.log(psi) + self.nb.logp(value)
+ logp_other = aet.log(psi) + self.nb.logp(value)
logp_0 = logaddexp(
- tt.log1p(-psi), tt.log(psi) + alpha * (tt.log(alpha) - tt.log(alpha + mu))
+ aet.log1p(-psi), aet.log(psi) + alpha * (aet.log(alpha) - aet.log(alpha + mu))
)
- logp_val = tt.switch(tt.gt(value, 0), logp_other, logp_0)
+ logp_val = aet.switch(aet.gt(value, 0), logp_other, logp_0)
return bound(logp_val, 0 <= value, 0 <= psi, psi <= 1, mu > 0, alpha > 0)
@@ -1913,7 +1920,7 @@ def logcdf(self, value):
psi = self.psi
return bound(
- logaddexp(tt.log1p(-psi), tt.log(psi) + self.nb.logcdf(value)),
+ logaddexp(aet.log1p(-psi), aet.log(psi) + self.nb.logcdf(value)),
0 <= value,
0 <= psi,
psi <= 1,
@@ -1987,15 +1994,15 @@ class OrderedLogistic(Categorical):
"""
def __init__(self, eta, cutpoints, *args, **kwargs):
- self.eta = tt.as_tensor_variable(floatX(eta))
- self.cutpoints = tt.as_tensor_variable(cutpoints)
+ self.eta = aet.as_tensor_variable(floatX(eta))
+ self.cutpoints = aet.as_tensor_variable(cutpoints)
- pa = sigmoid(self.cutpoints - tt.shape_padright(self.eta))
- p_cum = tt.concatenate(
+ pa = sigmoid(self.cutpoints - aet.shape_padright(self.eta))
+ p_cum = aet.concatenate(
[
- tt.zeros_like(tt.shape_padright(pa[..., 0])),
+ aet.zeros_like(aet.shape_padright(pa[..., 0])),
pa,
- tt.ones_like(tt.shape_padright(pa[..., 0])),
+ aet.ones_like(aet.shape_padright(pa[..., 0])),
],
axis=-1,
)
@@ -2076,23 +2083,23 @@ class OrderedProbit(Categorical):
def __init__(self, eta, cutpoints, *args, **kwargs):
- self.eta = tt.as_tensor_variable(floatX(eta))
- self.cutpoints = tt.as_tensor_variable(cutpoints)
+ self.eta = aet.as_tensor_variable(floatX(eta))
+ self.cutpoints = aet.as_tensor_variable(cutpoints)
- probits = tt.shape_padright(self.eta) - self.cutpoints
- _log_p = tt.concatenate(
+ probits = aet.shape_padright(self.eta) - self.cutpoints
+ _log_p = aet.concatenate(
[
- tt.shape_padright(normal_lccdf(0, 1, probits[..., 0])),
+ aet.shape_padright(normal_lccdf(0, 1, probits[..., 0])),
log_diff_normal_cdf(0, 1, probits[..., :-1], probits[..., 1:]),
- tt.shape_padright(normal_lcdf(0, 1, probits[..., -1])),
+ aet.shape_padright(normal_lcdf(0, 1, probits[..., -1])),
],
axis=-1,
)
- _log_p = tt.as_tensor_variable(floatX(_log_p))
+ _log_p = aet.as_tensor_variable(floatX(_log_p))
self._log_p = _log_p
- self.mode = tt.argmax(_log_p, axis=-1)
- p = tt.exp(_log_p)
+ self.mode = aet.argmax(_log_p, axis=-1)
+ p = aet.exp(_log_p)
super().__init__(p=p, *args, **kwargs)
@@ -2104,7 +2111,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -2114,13 +2121,13 @@ def logp(self, value):
k = self.k
# Clip values before using them for indexing
- value_clip = tt.clip(value, 0, k - 1)
+ value_clip = aet.clip(value, 0, k - 1)
if logp.ndim > 1:
if logp.ndim > value_clip.ndim:
- value_clip = tt.shape_padleft(value_clip, logp.ndim - value_clip.ndim)
+ value_clip = aet.shape_padleft(value_clip, logp.ndim - value_clip.ndim)
elif logp.ndim < value_clip.ndim:
- logp = tt.shape_padleft(logp, value_clip.ndim - logp.ndim)
+ logp = aet.shape_padleft(logp, value_clip.ndim - logp.ndim)
pattern = (logp.ndim - 1,) + tuple(range(logp.ndim - 1))
a = take_along_axis(
logp.dimshuffle(pattern),
diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py
index 7087772227..e154e016f3 100644
--- a/pymc3/distributions/dist_math.py
+++ b/pymc3/distributions/dist_math.py
@@ -19,24 +19,25 @@
"""
import platform
+import aesara
+import aesara.tensor as aet
import numpy as np
import scipy.linalg
import scipy.stats
-import theano
-import theano.tensor as tt
-from theano import scan
-from theano.compile.builders import OpFromGraph
-from theano.graph.basic import Apply
-from theano.graph.op import Op
-from theano.scalar import UnaryScalarOp, upgrade_to_float_no_complex
-from theano.scan import until
-from theano.tensor.slinalg import Cholesky
+from aesara import scan
+from aesara.compile.builders import OpFromGraph
+from aesara.graph.basic import Apply
+from aesara.graph.op import Op
+from aesara.scalar import UnaryScalarOp, upgrade_to_float_no_complex
+from aesara.scan import until
+from aesara.tensor.elemwise import Elemwise
+from aesara.tensor.slinalg import Cholesky, Solve
+from pymc3.aesaraf import floatX
from pymc3.distributions.shape_utils import to_tuple
from pymc3.distributions.special import gammaln
from pymc3.model import modelcontext
-from pymc3.theanof import floatX
f = floatX
c = -0.5 * np.log(2.0 * np.pi)
@@ -86,7 +87,7 @@ def bound(logp, *conditions, **kwargs):
else:
alltrue = alltrue_scalar
- return tt.switch(alltrue(conditions), logp, -np.inf)
+ return aet.switch(alltrue(conditions), logp, -np.inf)
def alltrue_elemwise(vals):
@@ -97,7 +98,7 @@ def alltrue_elemwise(vals):
def alltrue_scalar(vals):
- return tt.all([tt.all(1 * val) for val in vals])
+ return aet.all([aet.all(1 * val) for val in vals])
def logpow(x, m):
@@ -105,7 +106,7 @@ def logpow(x, m):
Calculates log(x**m) since m*log(x) will fail when m, x = 0.
"""
# return m * log(x)
- return tt.switch(tt.eq(x, 0), tt.switch(tt.eq(m, 0), 0.0, -np.inf), m * tt.log(x))
+ return aet.switch(aet.eq(x, 0), aet.switch(aet.eq(m, 0), 0.0, -np.inf), m * aet.log(x))
def factln(n):
@@ -124,25 +125,25 @@ def std_cdf(x):
"""
Calculates the standard normal cumulative distribution function.
"""
- return 0.5 + 0.5 * tt.erf(x / tt.sqrt(2.0))
+ return 0.5 + 0.5 * aet.erf(x / aet.sqrt(2.0))
def normal_lcdf(mu, sigma, x):
"""Compute the log of the cumulative density function of the normal."""
z = (x - mu) / sigma
- return tt.switch(
- tt.lt(z, -1.0),
- tt.log(tt.erfcx(-z / tt.sqrt(2.0)) / 2.0) - tt.sqr(z) / 2.0,
- tt.log1p(-tt.erfc(z / tt.sqrt(2.0)) / 2.0),
+ return aet.switch(
+ aet.lt(z, -1.0),
+ aet.log(aet.erfcx(-z / aet.sqrt(2.0)) / 2.0) - aet.sqr(z) / 2.0,
+ aet.log1p(-aet.erfc(z / aet.sqrt(2.0)) / 2.0),
)
def normal_lccdf(mu, sigma, x):
z = (x - mu) / sigma
- return tt.switch(
- tt.gt(z, 1.0),
- tt.log(tt.erfcx(z / tt.sqrt(2.0)) / 2.0) - tt.sqr(z) / 2.0,
- tt.log1p(-tt.erfc(-z / tt.sqrt(2.0)) / 2.0),
+ return aet.switch(
+ aet.gt(z, 1.0),
+ aet.log(aet.erfcx(z / aet.sqrt(2.0)) / 2.0) - aet.sqr(z) / 2.0,
+ aet.log1p(-aet.erfc(-z / aet.sqrt(2.0)) / 2.0),
)
@@ -167,37 +168,38 @@ def log_diff_normal_cdf(mu, sigma, x, y):
log (\\Phi(x) - \\Phi(y))
"""
- x = (x - mu) / sigma / tt.sqrt(2.0)
- y = (y - mu) / sigma / tt.sqrt(2.0)
+ x = (x - mu) / sigma / aet.sqrt(2.0)
+ y = (y - mu) / sigma / aet.sqrt(2.0)
# To stabilize the computation, consider these three regions:
# 1) x > y > 0 => Use erf(x) = 1 - e^{-x^2} erfcx(x) and erf(y) =1 - e^{-y^2} erfcx(y)
# 2) 0 > x > y => Use erf(x) = e^{-x^2} erfcx(-x) and erf(y) = e^{-y^2} erfcx(-y)
# 3) x > 0 > y => Naive formula log( (erf(x) - erf(y)) / 2 ) works fine.
- return tt.log(0.5) + tt.switch(
- tt.gt(y, 0),
- -tt.square(y) + tt.log(tt.erfcx(y) - tt.exp(tt.square(y) - tt.square(x)) * tt.erfcx(x)),
- tt.switch(
- tt.lt(x, 0), # 0 > x > y
- -tt.square(x)
- + tt.log(tt.erfcx(-x) - tt.exp(tt.square(x) - tt.square(y)) * tt.erfcx(-y)),
- tt.log(tt.erf(x) - tt.erf(y)), # x >0 > y
+ return aet.log(0.5) + aet.switch(
+ aet.gt(y, 0),
+ -aet.square(y)
+ + aet.log(aet.erfcx(y) - aet.exp(aet.square(y) - aet.square(x)) * aet.erfcx(x)),
+ aet.switch(
+ aet.lt(x, 0), # 0 > x > y
+ -aet.square(x)
+ + aet.log(aet.erfcx(-x) - aet.exp(aet.square(x) - aet.square(y)) * aet.erfcx(-y)),
+ aet.log(aet.erf(x) - aet.erf(y)), # x >0 > y
),
)
def sigma2rho(sigma):
"""
- `sigma -> rho` theano converter
+ `sigma -> rho` aesara converter
:math:`mu + sigma*e = mu + log(1+exp(rho))*e`"""
- return tt.log(tt.exp(tt.abs_(sigma)) - 1.0)
+ return aet.log(aet.exp(aet.abs_(sigma)) - 1.0)
def rho2sigma(rho):
"""
- `rho -> sigma` theano converter
+ `rho -> sigma` aesara converter
:math:`mu + sigma*e = mu + log(1+exp(rho))*e`"""
- return tt.nnet.softplus(rho)
+ return aet.nnet.softplus(rho)
rho2sd = rho2sigma
@@ -240,13 +242,13 @@ def log_normal(x, mean, **kwargs):
if sigma is not None:
std = sigma
elif w is not None:
- std = tt.exp(w)
+ std = aet.exp(w)
elif rho is not None:
std = rho2sigma(rho)
else:
std = tau ** (-1)
std += f(eps)
- return f(c) - tt.log(tt.abs_(std)) - (x - mean) ** 2 / (2.0 * std ** 2)
+ return f(c) - aet.log(aet.abs_(std)) - (x - mean) ** 2 / (2.0 * std ** 2)
def MvNormalLogp():
@@ -256,34 +258,34 @@ def MvNormalLogp():
Parameters
----------
- cov: tt.matrix
+ cov: aet.matrix
The covariance matrix.
- delta: tt.matrix
+ delta: aet.matrix
Array of deviations from the mean.
"""
- cov = tt.matrix("cov")
+ cov = aet.matrix("cov")
cov.tag.test_value = floatX(np.eye(3))
- delta = tt.matrix("delta")
+ delta = aet.matrix("delta")
delta.tag.test_value = floatX(np.zeros((2, 3)))
- solve_lower = tt.slinalg.Solve(A_structure="lower_triangular")
- solve_upper = tt.slinalg.Solve(A_structure="upper_triangular")
+ solve_lower = Solve(A_structure="lower_triangular")
+ solve_upper = Solve(A_structure="upper_triangular")
cholesky = Cholesky(lower=True, on_error="nan")
n, k = delta.shape
n, k = f(n), f(k)
chol_cov = cholesky(cov)
- diag = tt.nlinalg.diag(chol_cov)
- ok = tt.all(diag > 0)
+ diag = aet.nlinalg.diag(chol_cov)
+ ok = aet.all(diag > 0)
- chol_cov = tt.switch(ok, chol_cov, tt.fill(chol_cov, 1))
+ chol_cov = aet.switch(ok, chol_cov, aet.fill(chol_cov, 1))
delta_trans = solve_lower(chol_cov, delta.T).T
- result = n * k * tt.log(f(2) * np.pi)
- result += f(2) * n * tt.sum(tt.log(diag))
+ result = n * k * aet.log(f(2) * np.pi)
+ result += f(2) * n * aet.sum(aet.log(diag))
result += (delta_trans ** f(2)).sum()
result = f(-0.5) * result
- logp = tt.switch(ok, result, -np.inf)
+ logp = aet.switch(ok, result, -np.inf)
def dlogp(inputs, gradients):
(g_logp,) = gradients
@@ -293,21 +295,21 @@ def dlogp(inputs, gradients):
n, k = delta.shape
chol_cov = cholesky(cov)
- diag = tt.nlinalg.diag(chol_cov)
- ok = tt.all(diag > 0)
+ diag = aet.nlinalg.diag(chol_cov)
+ ok = aet.all(diag > 0)
- chol_cov = tt.switch(ok, chol_cov, tt.fill(chol_cov, 1))
+ chol_cov = aet.switch(ok, chol_cov, aet.fill(chol_cov, 1))
delta_trans = solve_lower(chol_cov, delta.T).T
- inner = n * tt.eye(k) - tt.dot(delta_trans.T, delta_trans)
+ inner = n * aet.eye(k) - aet.dot(delta_trans.T, delta_trans)
g_cov = solve_upper(chol_cov.T, inner)
g_cov = solve_upper(chol_cov.T, g_cov.T)
tau_delta = solve_upper(chol_cov.T, delta_trans.T)
g_delta = tau_delta.T
- g_cov = tt.switch(ok, g_cov, -np.nan)
- g_delta = tt.switch(ok, g_delta, -np.nan)
+ g_cov = aet.switch(ok, g_cov, -np.nan)
+ g_delta = aet.switch(ok, g_delta, -np.nan)
return [-0.5 * g_cov * g_logp, -g_delta * g_logp]
@@ -316,7 +318,7 @@ def dlogp(inputs, gradients):
class SplineWrapper(Op):
"""
- Creates a theano operation from scipy.interpolate.UnivariateSpline
+ Creates a aesara operation from scipy.interpolate.UnivariateSpline
"""
__props__ = ("spline",)
@@ -325,7 +327,7 @@ def __init__(self, spline):
self.spline = spline
def make_node(self, x):
- x = tt.as_tensor_variable(x)
+ x = aet.as_tensor_variable(x)
return Apply(self, [x], [x.type()])
@property
@@ -363,7 +365,7 @@ def impl(self, x):
i1e_scalar = I1e(upgrade_to_float_no_complex, name="i1e")
-i1e = tt.Elemwise(i1e_scalar, name="Elemwise{i1e,no_inplace}")
+i1e = Elemwise(i1e_scalar, name="Elemwise{i1e,no_inplace}")
class I0e(UnaryScalarOp):
@@ -379,11 +381,11 @@ def impl(self, x):
def grad(self, inp, grads):
(x,) = inp
(gz,) = grads
- return (gz * (i1e_scalar(x) - theano.scalar.sgn(x) * i0e_scalar(x)),)
+ return (gz * (i1e_scalar(x) - aesara.scalar.sgn(x) * i0e_scalar(x)),)
i0e_scalar = I0e(upgrade_to_float_no_complex, name="i0e")
-i0e = tt.Elemwise(i0e_scalar, name="Elemwise{i0e,no_inplace}")
+i0e = Elemwise(i0e_scalar, name="Elemwise{i0e,no_inplace}")
def random_choice(*args, **kwargs):
@@ -437,13 +439,13 @@ def incomplete_beta_cfe(a, b, x, small):
based on Cephes library by Steve Moshier (incbet.c).
small: Choose element-wise which continued fraction expansion to use.
"""
- BIG = tt.constant(4.503599627370496e15, dtype="float64")
- BIGINV = tt.constant(2.22044604925031308085e-16, dtype="float64")
- THRESH = tt.constant(3.0 * np.MachAr().eps, dtype="float64")
+ BIG = aet.constant(4.503599627370496e15, dtype="float64")
+ BIGINV = aet.constant(2.22044604925031308085e-16, dtype="float64")
+ THRESH = aet.constant(3.0 * np.MachAr().eps, dtype="float64")
- zero = tt.constant(0.0, dtype="float64")
- one = tt.constant(1.0, dtype="float64")
- two = tt.constant(2.0, dtype="float64")
+ zero = aet.constant(0.0, dtype="float64")
+ one = aet.constant(1.0, dtype="float64")
+ two = aet.constant(2.0, dtype="float64")
r = one
k1 = a
@@ -452,11 +454,11 @@ def incomplete_beta_cfe(a, b, x, small):
k5 = one
k8 = a + two
- k2 = tt.switch(small, a + b, b - one)
- k6 = tt.switch(small, b - one, a + b)
- k7 = tt.switch(small, k4, a + one)
- k26update = tt.switch(small, one, -one)
- x = tt.switch(small, x, x / (one - x))
+ k2 = aet.switch(small, a + b, b - one)
+ k6 = aet.switch(small, b - one, a + b)
+ k7 = aet.switch(small, k4, a + one)
+ k26update = aet.switch(small, one, -one)
+ x = aet.switch(small, x, x / (one - x))
pkm2 = zero
qkm2 = one
@@ -482,7 +484,7 @@ def _step(i, pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r):
qkm1 = qk
old_r = r
- r = tt.switch(tt.eq(qk, zero), r, pk / qk)
+ r = aet.switch(aet.eq(qk, zero), r, pk / qk)
k1 += one
k2 += k26update
@@ -493,30 +495,32 @@ def _step(i, pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r):
k7 += two
k8 += two
- big_cond = tt.gt(tt.abs_(qk) + tt.abs_(pk), BIG)
- biginv_cond = tt.or_(tt.lt(tt.abs_(qk), BIGINV), tt.lt(tt.abs_(pk), BIGINV))
+ big_cond = aet.gt(aet.abs_(qk) + aet.abs_(pk), BIG)
+ biginv_cond = aet.or_(aet.lt(aet.abs_(qk), BIGINV), aet.lt(aet.abs_(pk), BIGINV))
- pkm2 = tt.switch(big_cond, pkm2 * BIGINV, pkm2)
- pkm1 = tt.switch(big_cond, pkm1 * BIGINV, pkm1)
- qkm2 = tt.switch(big_cond, qkm2 * BIGINV, qkm2)
- qkm1 = tt.switch(big_cond, qkm1 * BIGINV, qkm1)
+ pkm2 = aet.switch(big_cond, pkm2 * BIGINV, pkm2)
+ pkm1 = aet.switch(big_cond, pkm1 * BIGINV, pkm1)
+ qkm2 = aet.switch(big_cond, qkm2 * BIGINV, qkm2)
+ qkm1 = aet.switch(big_cond, qkm1 * BIGINV, qkm1)
- pkm2 = tt.switch(biginv_cond, pkm2 * BIG, pkm2)
- pkm1 = tt.switch(biginv_cond, pkm1 * BIG, pkm1)
- qkm2 = tt.switch(biginv_cond, qkm2 * BIG, qkm2)
- qkm1 = tt.switch(biginv_cond, qkm1 * BIG, qkm1)
+ pkm2 = aet.switch(biginv_cond, pkm2 * BIG, pkm2)
+ pkm1 = aet.switch(biginv_cond, pkm1 * BIG, pkm1)
+ qkm2 = aet.switch(biginv_cond, qkm2 * BIG, qkm2)
+ qkm1 = aet.switch(biginv_cond, qkm1 * BIG, qkm1)
return (
(pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r),
- until(tt.abs_(old_r - r) < (THRESH * tt.abs_(r))),
+ until(aet.abs_(old_r - r) < (THRESH * aet.abs_(r))),
)
(pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), _ = scan(
_step,
- sequences=[tt.arange(0, 300)],
+ sequences=[aet.arange(0, 300)],
outputs_info=[
e
- for e in tt.cast((pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), "float64")
+ for e in aet.cast(
+ (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), "float64"
+ )
],
)
@@ -528,28 +532,28 @@ def incomplete_beta_ps(a, b, value):
Use when b*x is small and value not too close to 1.
Based on Cephes library by Steve Moshier (incbet.c)
"""
- one = tt.constant(1, dtype="float64")
+ one = aet.constant(1, dtype="float64")
ai = one / a
u = (one - b) * value
t1 = u / (a + one)
t = u
threshold = np.MachAr().eps * ai
- s = tt.constant(0, dtype="float64")
+ s = aet.constant(0, dtype="float64")
def _step(i, t, s):
t *= (i - b) * value / i
step = t / (a + i)
s += step
- return ((t, s), until(tt.abs_(step) < threshold))
+ return ((t, s), until(aet.abs_(step) < threshold))
(t, s), _ = scan(
- _step, sequences=[tt.arange(2, 302)], outputs_info=[e for e in tt.cast((t, s), "float64")]
+ _step, sequences=[aet.arange(2, 302)], outputs_info=[e for e in aet.cast((t, s), "float64")]
)
s = s[-1] + t1 + ai
- t = gammaln(a + b) - gammaln(a) - gammaln(b) + a * tt.log(value) + tt.log(s)
- return tt.exp(t)
+ t = gammaln(a + b) - gammaln(a) - gammaln(b) + a * aet.log(value) + aet.log(s)
+ return aet.exp(t)
def incomplete_beta(a, b, value):
@@ -557,37 +561,37 @@ def incomplete_beta(a, b, value):
Power series and continued fraction expansions chosen for best numerical
convergence across the board based on inputs.
"""
- machep = tt.constant(np.MachAr().eps, dtype="float64")
- one = tt.constant(1, dtype="float64")
+ machep = aet.constant(np.MachAr().eps, dtype="float64")
+ one = aet.constant(1, dtype="float64")
w = one - value
ps = incomplete_beta_ps(a, b, value)
- flip = tt.gt(value, (a / (a + b)))
+ flip = aet.gt(value, (a / (a + b)))
aa, bb = a, b
- a = tt.switch(flip, bb, aa)
- b = tt.switch(flip, aa, bb)
- xc = tt.switch(flip, value, w)
- x = tt.switch(flip, w, value)
+ a = aet.switch(flip, bb, aa)
+ b = aet.switch(flip, aa, bb)
+ xc = aet.switch(flip, value, w)
+ x = aet.switch(flip, w, value)
tps = incomplete_beta_ps(a, b, x)
- tps = tt.switch(tt.le(tps, machep), one - machep, one - tps)
+ tps = aet.switch(aet.le(tps, machep), one - machep, one - tps)
# Choose which continued fraction expansion for best convergence.
- small = tt.lt(x * (a + b - 2.0) - (a - one), 0.0)
+ small = aet.lt(x * (a + b - 2.0) - (a - one), 0.0)
cfe = incomplete_beta_cfe(a, b, x, small)
- w = tt.switch(small, cfe, cfe / xc)
+ w = aet.switch(small, cfe, cfe / xc)
# Direct incomplete beta accounting for flipped a, b.
- t = tt.exp(
- a * tt.log(x) + b * tt.log(xc) + gammaln(a + b) - gammaln(a) - gammaln(b) + tt.log(w / a)
+ t = aet.exp(
+ a * aet.log(x) + b * aet.log(xc) + gammaln(a + b) - gammaln(a) - gammaln(b) + aet.log(w / a)
)
- t = tt.switch(flip, tt.switch(tt.le(t, machep), one - machep, one - t), t)
- return tt.switch(
- tt.and_(flip, tt.and_(tt.le((b * x), one), tt.le(x, 0.95))),
+ t = aet.switch(flip, aet.switch(aet.le(t, machep), one - machep, one - t), t)
+ return aet.switch(
+ aet.and_(flip, aet.and_(aet.le((b * x), one), aet.le(x, 0.95))),
tps,
- tt.switch(tt.and_(tt.le(b * value, one), tt.le(value, 0.95)), ps, t),
+ aet.switch(aet.and_(aet.le(b * value, one), aet.le(value, 0.95)), ps, t),
)
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index c24a9d9df6..d0ef10b236 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -27,12 +27,16 @@
if TYPE_CHECKING:
from typing import Optional, Callable
+import aesara
+import aesara.graph.basic
+import aesara.tensor as aet
import numpy as np
-import theano
-import theano.graph.basic
-import theano.tensor as tt
-from theano import function
+from aesara import function
+from aesara.compile.sharedvalue import SharedVariable
+from aesara.graph.basic import Constant
+from aesara.tensor.type import TensorType as AesaraTensorType
+from aesara.tensor.var import TensorVariable
from pymc3.distributions.shape_utils import (
broadcast_dist_samples_shape,
@@ -49,7 +53,7 @@
build_named_node_tree,
)
from pymc3.util import get_repr_for_variable, get_var_name
-from pymc3.vartypes import string_types, theano_constant
+from pymc3.vartypes import string_types
__all__ = [
"DensityDist",
@@ -164,13 +168,13 @@ def getattr_value(self, val):
if isinstance(val, string_types):
val = getattr(self, val)
- if isinstance(val, tt.TensorVariable):
+ if isinstance(val, TensorVariable):
return val.tag.test_value
- if isinstance(val, tt.sharedvar.SharedVariable):
+ if isinstance(val, SharedVariable):
return val.get_value()
- if isinstance(val, theano_constant):
+ if isinstance(val, Constant):
return val.value
return val
@@ -264,7 +268,7 @@ def logp_sum(self, *args, **kwargs):
Subclasses can use this to improve the speed of logp evaluations
if only the sum of the logp values is needed.
"""
- return tt.sum(self.logp(*args, **kwargs))
+ return aet.sum(self.logp(*args, **kwargs))
__latex__ = _repr_latex_
@@ -272,7 +276,7 @@ def logp_sum(self, *args, **kwargs):
def TensorType(dtype, shape, broadcastable=None):
if broadcastable is None:
broadcastable = np.atleast_1d(shape) == 1
- return tt.TensorType(str(dtype), broadcastable)
+ return AesaraTensorType(str(dtype), broadcastable)
class NoDistribution(Distribution):
@@ -311,7 +315,7 @@ def logp(self, x):
-------
TensorVariable
"""
- return tt.zeros_like(x)
+ return aet.zeros_like(x)
def _distr_parameters_for_repr(self):
return []
@@ -322,7 +326,7 @@ class Discrete(Distribution):
def __init__(self, shape=(), dtype=None, defaults=("mode",), *args, **kwargs):
if dtype is None:
- if theano.config.floatX == "float32":
+ if aesara.config.floatX == "float32":
dtype = "int16"
else:
dtype = "int64"
@@ -340,7 +344,7 @@ class Continuous(Distribution):
def __init__(self, shape=(), dtype=None, defaults=("median", "mean", "mode"), *args, **kwargs):
if dtype is None:
- dtype = theano.config.floatX
+ dtype = aesara.config.floatX
super().__init__(shape, dtype, defaults=defaults, *args, **kwargs)
@@ -371,7 +375,7 @@ def __init__(
logp: callable
A callable that has the following signature ``logp(value)`` and
- returns a theano tensor that represents the distribution's log
+ returns a aesara tensor that represents the distribution's log
probability density.
shape: tuple (Optional): defaults to `()`
The shape of the distribution. The default value indicates a scalar.
@@ -526,7 +530,7 @@ def __init__(
"""
if dtype is None:
- dtype = theano.config.floatX
+ dtype = aesara.config.floatX
super().__init__(shape, dtype, testval, *args, **kwargs)
self.logp = logp
if type(self.logp) == types.MethodType:
@@ -608,7 +612,7 @@ def random(self, point=None, size=None, **kwargs):
"DensityDist random method cannot "
"adapt to shape changes in the distribution's "
"shape, which sometimes are necessary for sampling "
- "when the model uses pymc3.Data or theano shared "
+ "when the model uses pymc3.Data or aesara shared "
"tensors, or when the DensityDist has observed "
"values.\n"
"This check can be disabled by passing "
@@ -673,9 +677,7 @@ def __init__(self):
def is_fast_drawable(var):
- return isinstance(
- var, (numbers.Number, np.ndarray, theano_constant, tt.sharedvar.SharedVariable)
- )
+ return isinstance(var, (numbers.Number, np.ndarray, Constant, SharedVariable))
def draw_values(params, point=None, size=None):
@@ -690,7 +692,7 @@ def draw_values(params, point=None, size=None):
c) parameter can be fixed using tag.test_value (last resort)
3) The parameter is a tensor variable/constant. Can be evaluated using
- theano.function, but a variable may contain nodes which
+ aesara.function, but a variable may contain nodes which
a) are named parameters in the point
b) are RVs with a random method
@@ -756,20 +758,19 @@ def draw_values(params, point=None, size=None):
if (next_, size) in drawn:
# If the node already has a givens value, skip it
continue
- elif isinstance(next_, (theano_constant, tt.sharedvar.SharedVariable)):
- # If the node is a theano.tensor.TensorConstant or a
- # theano.tensor.sharedvar.SharedVariable, its value will be
- # available automatically in _compile_theano_function so
- # we can skip it. Furthermore, if this node was treated as a
- # TensorVariable that should be compiled by theano in
- # _compile_theano_function, it would raise a `TypeError:
- # ('Constants not allowed in param list', ...)` for
- # TensorConstant, and a `TypeError: Cannot use a shared
- # variable (...) as explicit input` for SharedVariable.
- # ObservedRV and MultiObservedRV instances are ViewOPs
- # of TensorConstants or SharedVariables, we must add them
- # to the stack or risk evaluating deterministics with the
- # wrong values (issue #3354)
+ elif isinstance(next_, (Constant, SharedVariable)):
+ # If the node is a aesara.tensor.TensorConstant or a
+ # SharedVariable, its value will be available automatically in
+ # _compile_aesara_function so we can skip it. Furthermore, if
+ # this node was treated as a TensorVariable that should be
+ # compiled by aesara in _compile_aesara_function, it would
+ # raise a `TypeError: ('Constants not allowed in param list',
+ # ...)` for TensorConstant, and a `TypeError: Cannot use a
+ # shared variable (...) as explicit input` for SharedVariable.
+ # ObservedRV and MultiObservedRV instances are ViewOPs of
+ # TensorConstants or SharedVariables, we must add them to the
+ # stack or risk evaluating deterministics with the wrong values
+ # (issue #3354)
stack.extend(
[
node
@@ -791,7 +792,7 @@ def draw_values(params, point=None, size=None):
value = _draw_value(next_, point=point, givens=temp_givens, size=size)
givens[next_.name] = (next_, value)
drawn[(next_, size)] = value
- except theano.graph.fg.MissingInputError:
+ except aesara.graph.fg.MissingInputError:
# The node failed, so we must add the node's parents to
# the stack of nodes to try to draw from. We exclude the
# nodes in the `params` list.
@@ -834,17 +835,17 @@ def draw_values(params, point=None, size=None):
value = _draw_value(param, point=point, givens=givens.values(), size=size)
evaluated[param_idx] = drawn[(param, size)] = value
givens[param.name] = (param, value)
- except theano.graph.fg.MissingInputError:
+ except aesara.graph.fg.MissingInputError:
missing_inputs.add(param_idx)
return [evaluated[j] for j in params] # set the order back
@memoize
-def _compile_theano_function(param, vars, givens=None):
- """Compile theano function for a given parameter and input variables.
+def _compile_aesara_function(param, vars, givens=None):
+ """Compile aesara function for a given parameter and input variables.
- This function is memoized to avoid repeating costly theano compilations
+ This function is memoized to avoid repeating costly aesara compilations
when repeatedly drawing values, which is done when generating posterior
predictive samples.
@@ -852,11 +853,11 @@ def _compile_theano_function(param, vars, givens=None):
----------
param: Model variable from which to draw value
vars: Children variables of `param`
- givens: Variables to be replaced in the Theano graph
+ givens: Variables to be replaced in the Aesara graph
Returns
-------
- A compiled theano function that takes the values of `vars` as input
+ A compiled aesara function that takes the values of `vars` as input
positional args
"""
f = function(
@@ -867,32 +868,32 @@ def _compile_theano_function(param, vars, givens=None):
on_unused_input="ignore",
allow_input_downcast=True,
)
- return vectorize_theano_function(f, inputs=vars, output=param)
+ return vectorize_aesara_function(f, inputs=vars, output=param)
-def vectorize_theano_function(f, inputs, output):
- """Takes a compiled theano function and wraps it with a vectorized version.
- Theano compiled functions expect inputs and outputs of a fixed number of
+def vectorize_aesara_function(f, inputs, output):
+ """Takes a compiled aesara function and wraps it with a vectorized version.
+ Aesara compiled functions expect inputs and outputs of a fixed number of
dimensions. In our context, these usually come from deterministics which
are compiled against a given RV, with its core shape. If we draw i.i.d.
samples from said RV, we would not be able to compute the deterministic
over the i.i.d sampled dimensions (i.e. those that are not the core
- dimensions of the RV). To deal with this problem, we wrap the theano
+ dimensions of the RV). To deal with this problem, we wrap the aesara
compiled function with numpy.vectorize, providing the correct signature
for the core dimensions. The extra dimensions, will be interpreted as
i.i.d. sampled axis and will be broadcast following the usual rules.
Parameters
----------
- f: theano compiled function
- inputs: list of theano variables used as inputs for the function
- givens: theano variable which is the output of the function
+ f: aesara compiled function
+ inputs: list of aesara variables used as inputs for the function
+ givens: aesara variable which is the output of the function
Notes
-----
- If inputs is an empty list (theano function with no inputs needed), then
+ If inputs is an empty list (aesara function with no inputs needed), then
the same `f` is returned.
- Only functions that return a single theano variable's value can be
+ Only functions that return a single aesara variable's value can be
vectorized.
Returns
@@ -928,27 +929,27 @@ def _draw_value(param, point=None, givens=None, size=None):
Parameters
----------
- param: number, array like, theano variable or pymc3 random variable
+ param: number, array like, aesara variable or pymc3 random variable
The value or distribution. Constants or shared variables
- will be converted to an array and returned. Theano variables
+ will be converted to an array and returned. Aesara variables
are evaluated. If `param` is a pymc3 random variables, draw
a new value from it and return that, unless a value is specified
in `point`.
point: dict, optional
A dictionary from pymc3 variable names to their values.
givens: dict, optional
- A dictionary from theano variables to their values. These values
- are used to evaluate `param` if it is a theano variable.
+ A dictionary from aesara variables to their values. These values
+ are used to evaluate `param` if it is a aesara variable.
size: int, optional
Number of samples
"""
if isinstance(param, (numbers.Number, np.ndarray)):
return param
- elif isinstance(param, theano_constant):
+ elif isinstance(param, Constant):
return param.value
- elif isinstance(param, tt.sharedvar.SharedVariable):
+ elif isinstance(param, SharedVariable):
return param.get_value()
- elif isinstance(param, (tt.TensorVariable, MultiObservedRV)):
+ elif isinstance(param, (TensorVariable, MultiObservedRV)):
if point and hasattr(param, "model") and param.name in point:
return point[param.name]
elif hasattr(param, "random") and param.random is not None:
@@ -971,7 +972,7 @@ def _draw_value(param, point=None, givens=None, size=None):
return dist_tmp.random(point=point, size=size)
except (ValueError, TypeError):
# reset shape to account for shape changes
- # with theano.shared inputs
+ # with aesara.shared inputs
dist_tmp.shape = np.array([])
# We want to draw values to infer the dist_shape,
# we don't want to store these drawn values to the context
@@ -995,14 +996,14 @@ def _draw_value(param, point=None, givens=None, size=None):
variables = values = []
# We only truly care if the ancestors of param that were given
# value have the matching dshape and val.shape
- param_ancestors = set(theano.graph.basic.ancestors([param], blockers=list(variables)))
+ param_ancestors = set(aesara.graph.basic.ancestors([param], blockers=list(variables)))
inputs = [(var, val) for var, val in zip(variables, values) if var in param_ancestors]
if inputs:
input_vars, input_vals = list(zip(*inputs))
else:
input_vars = []
input_vals = []
- func = _compile_theano_function(param, input_vars)
+ func = _compile_aesara_function(param, input_vars)
output = func(*input_vals)
return output
raise ValueError("Unexpected type in draw_value: %s" % type(param))
diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py
index 756269d330..f423f298de 100644
--- a/pymc3/distributions/mixture.py
+++ b/pymc3/distributions/mixture.py
@@ -14,10 +14,11 @@
from collections.abc import Iterable
+import aesara
+import aesara.tensor as aet
import numpy as np
-import theano
-import theano.tensor as tt
+from pymc3.aesaraf import _conversion_map, take_along_axis
from pymc3.distributions.continuous import Normal, get_tau_sigma
from pymc3.distributions.dist_math import bound, random_choice
from pymc3.distributions.distribution import (
@@ -34,7 +35,6 @@
to_tuple,
)
from pymc3.math import logsumexp
-from pymc3.theanof import _conversion_map, take_along_axis
__all__ = ["Mixture", "NormalMixture", "MixtureSameFamily"]
@@ -143,15 +143,15 @@ def __init__(self, w, comp_dists, *args, **kwargs):
)
shape = kwargs.pop("shape", ())
- self.w = w = tt.as_tensor_variable(w)
+ self.w = w = aet.as_tensor_variable(w)
self.comp_dists = comp_dists
defaults = kwargs.pop("defaults", [])
if all_discrete(comp_dists):
- default_dtype = _conversion_map[theano.config.floatX]
+ default_dtype = _conversion_map[aesara.config.floatX]
else:
- default_dtype = theano.config.floatX
+ default_dtype = aesara.config.floatX
try:
self.mean = (w * self._comp_means()).sum(axis=-1)
@@ -166,9 +166,9 @@ def __init__(self, w, comp_dists, *args, **kwargs):
if isinstance(comp_dists, Distribution):
comp_mode_logps = comp_dists.logp(comp_dists.mode)
else:
- comp_mode_logps = tt.stack([cd.logp(cd.mode) for cd in comp_dists])
+ comp_mode_logps = aet.stack([cd.logp(cd.mode) for cd in comp_dists])
- mode_idx = tt.argmax(tt.log(w) + comp_mode_logps, axis=-1)
+ mode_idx = aet.argmax(aet.log(w) + comp_mode_logps, axis=-1)
self.mode = self._comp_modes()[mode_idx]
if "mode" not in defaults:
@@ -253,7 +253,7 @@ def _comp_logp(self, value):
val_shape = tuple(value.shape.eval())
except AttributeError:
val_shape = value.shape
- except theano.graph.fg.MissingInputError:
+ except aesara.graph.fg.MissingInputError:
val_shape = None
try:
self_shape = tuple(self.shape)
@@ -292,26 +292,30 @@ def _comp_logp(self, value):
if ndim <= 1:
ndim = len(comp_dists.shape) - 1
if ndim < len(comp_dists.shape):
- value_ = tt.shape_padright(value, len(comp_dists.shape) - ndim)
+ value_ = aet.shape_padright(value, len(comp_dists.shape) - ndim)
else:
value_ = value
return comp_dists.logp(value_)
else:
- return tt.squeeze(
- tt.stack([comp_dist.logp(value) for comp_dist in comp_dists], axis=-1)
+ return aet.squeeze(
+ aet.stack([comp_dist.logp(value) for comp_dist in comp_dists], axis=-1)
)
def _comp_means(self):
try:
- return tt.as_tensor_variable(self.comp_dists.mean)
+ return aet.as_tensor_variable(self.comp_dists.mean)
except AttributeError:
- return tt.squeeze(tt.stack([comp_dist.mean for comp_dist in self.comp_dists], axis=-1))
+ return aet.squeeze(
+ aet.stack([comp_dist.mean for comp_dist in self.comp_dists], axis=-1)
+ )
def _comp_modes(self):
try:
- return tt.as_tensor_variable(self.comp_dists.mode)
+ return aet.as_tensor_variable(self.comp_dists.mode)
except AttributeError:
- return tt.squeeze(tt.stack([comp_dist.mode for comp_dist in self.comp_dists], axis=-1))
+ return aet.squeeze(
+ aet.stack([comp_dist.mode for comp_dist in self.comp_dists], axis=-1)
+ )
def _comp_samples(self, point=None, size=None, comp_dist_shapes=None, broadcast_shape=None):
if self.comp_is_distribution:
@@ -418,7 +422,7 @@ def logp(self, value):
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -427,10 +431,10 @@ def logp(self, value):
w = self.w
return bound(
- logsumexp(tt.log(w) + self._comp_logp(value), axis=-1, keepdims=False),
+ logsumexp(aet.log(w) + self._comp_logp(value), axis=-1, keepdims=False),
w >= 0,
w <= 1,
- tt.allclose(w.sum(axis=-1), 1),
+ aet.allclose(w.sum(axis=-1), 1),
broadcast_conditions=False,
)
@@ -632,8 +636,8 @@ def __init__(self, w, mu, sigma=None, tau=None, sd=None, comp_shape=(), *args, *
sigma = sd
_, sigma = get_tau_sigma(tau=tau, sigma=sigma)
- self.mu = mu = tt.as_tensor_variable(mu)
- self.sigma = self.sd = sigma = tt.as_tensor_variable(sigma)
+ self.mu = mu = aet.as_tensor_variable(mu)
+ self.sigma = self.sd = sigma = aet.as_tensor_variable(sigma)
super().__init__(w, Normal.dist(mu, sigma=sigma, shape=comp_shape), *args, **kwargs)
@@ -675,7 +679,7 @@ class MixtureSameFamily(Distribution):
"""
def __init__(self, w, comp_dists, mixture_axis=-1, *args, **kwargs):
- self.w = tt.as_tensor_variable(w)
+ self.w = aet.as_tensor_variable(w)
if not isinstance(comp_dists, Distribution):
raise TypeError(
"The MixtureSameFamily distribution only accepts Distribution "
@@ -697,19 +701,19 @@ def __init__(self, w, comp_dists, mixture_axis=-1, *args, **kwargs):
# Compute the mode so we don't always have to pass a testval
defaults = kwargs.pop("defaults", [])
event_shape = self.comp_dists.shape[mixture_axis + 1 :]
- _w = tt.shape_padleft(
- tt.shape_padright(w, len(event_shape)),
+ _w = aet.shape_padleft(
+ aet.shape_padright(w, len(event_shape)),
len(self.comp_dists.shape) - w.ndim - len(event_shape),
)
mode = take_along_axis(
self.comp_dists.mode,
- tt.argmax(_w, keepdims=True),
+ aet.argmax(_w, keepdims=True),
axis=mixture_axis,
)
self.mode = mode[(..., 0) + (slice(None),) * len(event_shape)]
if not all_discrete(comp_dists):
- mean = tt.as_tensor_variable(self.comp_dists.mean)
+ mean = aet.as_tensor_variable(self.comp_dists.mean)
self.mean = (_w * mean).sum(axis=mixture_axis)
if "mean" not in defaults:
defaults.append("mean")
@@ -725,7 +729,7 @@ def logp(self, value):
----------
value : numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
- values are desired the values must be provided in a numpy array or theano tensor
+ values are desired the values must be provided in a numpy array or aesara tensor
Returns
-------
@@ -742,7 +746,7 @@ def logp(self, value):
# We first have to pad the shape of w to the right with ones
# so that it can broadcast with the event_shape.
- w = tt.shape_padright(w, len(event_shape))
+ w = aet.shape_padright(w, len(event_shape))
# Second, we have to add the mixture_axis to the value tensor
# To insert the mixture axis at the correct location, we use the
@@ -751,14 +755,14 @@ def logp(self, value):
# than the ones present in the comp_dists.
comp_dists_ndim = len(comp_dists.shape)
- value = tt.shape_padaxis(value, axis=mixture_axis - comp_dists_ndim)
+ value = aet.shape_padaxis(value, axis=mixture_axis - comp_dists_ndim)
comp_logp = comp_dists.logp(value)
return bound(
- logsumexp(tt.log(w) + comp_logp, axis=mixture_axis, keepdims=False),
+ logsumexp(aet.log(w) + comp_logp, axis=mixture_axis, keepdims=False),
w >= 0,
w <= 1,
- tt.allclose(w.sum(axis=mixture_axis - comp_dists_ndim), 1),
+ aet.allclose(w.sum(axis=mixture_axis - comp_dists_ndim), 1),
broadcast_conditions=False,
)
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 3fcdb8dbda..c23b9f191b 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -17,20 +17,27 @@
import warnings
+import aesara
+import aesara.tensor as aet
import numpy as np
import scipy
-import theano
-import theano.tensor as tt
+from aesara.graph.basic import Apply
+from aesara.graph.op import Op, get_test_value
+from aesara.graph.utils import TestValueError
+from aesara.tensor.nlinalg import det, eigh, matrix_inverse, trace
+from aesara.tensor.slinalg import (
+ Cholesky,
+ Solve,
+ solve_lower_triangular,
+ solve_upper_triangular,
+)
+from aesara.tensor.type import TensorType
from scipy import linalg, stats
-from theano.graph.basic import Apply
-from theano.graph.op import Op, get_test_value
-from theano.graph.utils import TestValueError
-from theano.tensor.nlinalg import det, eigh, matrix_inverse, trace
-from theano.tensor.slinalg import Cholesky
import pymc3 as pm
+from pymc3.aesaraf import floatX, intX
from pymc3.distributions import transforms
from pymc3.distributions.continuous import ChiSquared, Normal
from pymc3.distributions.dist_math import bound, factln, logpow
@@ -46,7 +53,6 @@
from pymc3.exceptions import ShapeError
from pymc3.math import kron_diag, kron_dot, kron_solve_lower, kronecker
from pymc3.model import Deterministic
-from pymc3.theanof import floatX, intX
__all__ = [
"MvNormal",
@@ -75,8 +81,8 @@ def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True, *args, **
raise ValueError(
"Incompatible parameterization. Specify exactly one of tau, cov, or chol."
)
- self.mu = mu = tt.as_tensor_variable(mu)
- self.solve_lower = tt.slinalg.Solve(A_structure="lower_triangular")
+ self.mu = mu = aet.as_tensor_variable(mu)
+ self.solve_lower = Solve(A_structure="lower_triangular")
# Step methods and advi do not catch LinAlgErrors at the
# moment. We work around that by using a cholesky op
# that returns a nan as first entry instead of raising
@@ -86,7 +92,7 @@ def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True, *args, **
if cov is not None:
self.k = cov.shape[0]
self._cov_type = "cov"
- cov = tt.as_tensor_variable(cov)
+ cov = aet.as_tensor_variable(cov)
if cov.ndim != 2:
raise ValueError("cov must be two dimensional.")
self.chol_cov = cholesky(cov)
@@ -95,7 +101,7 @@ def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True, *args, **
elif tau is not None:
self.k = tau.shape[0]
self._cov_type = "tau"
- tau = tt.as_tensor_variable(tau)
+ tau = aet.as_tensor_variable(tau)
if tau.ndim != 2:
raise ValueError("tau must be two dimensional.")
self.chol_tau = cholesky(tau)
@@ -106,7 +112,7 @@ def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True, *args, **
self._cov_type = "chol"
if chol.ndim != 2:
raise ValueError("chol must be two dimensional.")
- self.chol_cov = tt.as_tensor_variable(chol)
+ self.chol_cov = aet.as_tensor_variable(chol)
self._n = self.chol_cov.shape[-1]
def _quaddist(self, value):
@@ -137,16 +143,16 @@ def _quaddist(self, value):
def _quaddist_chol(self, delta):
chol_cov = self.chol_cov
- diag = tt.nlinalg.diag(chol_cov)
+ diag = aet.nlinalg.diag(chol_cov)
# Check if the covariance matrix is positive definite.
- ok = tt.all(diag > 0)
+ ok = aet.all(diag > 0)
# If not, replace the diagonal. We return -inf later, but
# need to prevent solve_lower from throwing an exception.
- chol_cov = tt.switch(ok, chol_cov, 1)
+ chol_cov = aet.switch(ok, chol_cov, 1)
delta_trans = self.solve_lower(chol_cov, delta.T).T
quaddist = (delta_trans ** 2).sum(axis=-1)
- logdet = tt.sum(tt.log(diag))
+ logdet = aet.sum(aet.log(diag))
return quaddist, logdet, ok
def _quaddist_cov(self, delta):
@@ -154,16 +160,16 @@ def _quaddist_cov(self, delta):
def _quaddist_tau(self, delta):
chol_tau = self.chol_tau
- diag = tt.nlinalg.diag(chol_tau)
+ diag = aet.nlinalg.diag(chol_tau)
# Check if the precision matrix is positive definite.
- ok = tt.all(diag > 0)
+ ok = aet.all(diag > 0)
# If not, replace the diagonal. We return -inf later, but
# need to prevent solve_lower from throwing an exception.
- chol_tau = tt.switch(ok, chol_tau, 1)
+ chol_tau = aet.switch(ok, chol_tau, 1)
- delta_trans = tt.dot(delta, chol_tau)
+ delta_trans = aet.dot(delta, chol_tau)
quaddist = (delta_trans ** 2).sum(axis=-1)
- logdet = -tt.sum(tt.log(diag))
+ logdet = -aet.sum(aet.log(diag))
return quaddist, logdet, ok
def _cov_param_for_repr(self):
@@ -235,7 +241,7 @@ class MvNormal(_QuadFormBase):
chol, _, _ = pm.LKJCholeskyCov('chol_cov', n=3, eta=2,
sd_dist=sd_dist, compute_corr=True)
vals_raw = pm.Normal('vals_raw', mu=0, sigma=1, shape=(5, 3))
- vals = pm.Deterministic('vals', tt.dot(chol, vals_raw.T).T)
+ vals = pm.Deterministic('vals', aet.dot(chol, vals_raw.T).T)
"""
def __init__(self, mu, cov=None, tau=None, chol=None, lower=True, *args, **kwargs):
@@ -362,7 +368,7 @@ def __init__(
raise ValueError("Specify only one of cov and Sigma")
cov = Sigma
super().__init__(mu=mu, cov=cov, tau=tau, chol=chol, lower=lower, *args, **kwargs)
- self.nu = nu = tt.as_tensor_variable(nu)
+ self.nu = nu = aet.as_tensor_variable(nu)
self.mean = self.median = self.mode = self.mu = self.mu
def random(self, point=None, size=None):
@@ -423,7 +429,7 @@ def logp(self, value):
- gammaln(self.nu / 2.0)
- 0.5 * k * floatX(np.log(self.nu * np.pi))
)
- inner = -(self.nu + k) / 2.0 * tt.log1p(quaddist / self.nu)
+ inner = -(self.nu + k) / 2.0 * aet.log1p(quaddist / self.nu)
return bound(norm + inner - logdet, ok)
def _distr_parameters_for_repr(self):
@@ -472,10 +478,10 @@ def __init__(self, a, transform=transforms.stick_breaking, *args, **kwargs):
super().__init__(transform=transform, *args, **kwargs)
- self.a = a = tt.as_tensor_variable(a)
- self.mean = a / tt.sum(a)
+ self.a = a = aet.as_tensor_variable(a)
+ self.mean = a / aet.sum(a)
- self.mode = tt.switch(tt.all(a > 1), (a - 1) / tt.sum(a - 1), np.nan)
+ self.mode = aet.switch(aet.all(a > 1), (a - 1) / aet.sum(a - 1), np.nan)
def random(self, point=None, size=None):
"""
@@ -519,10 +525,10 @@ def logp(self, value):
# only defined for sum(value) == 1
return bound(
- tt.sum(logpow(value, a - 1) - gammaln(a), axis=-1) + gammaln(tt.sum(a, axis=-1)),
- tt.all(value >= 0),
- tt.all(value <= 1),
- tt.all(a > 0),
+ aet.sum(logpow(value, a - 1) - gammaln(a), axis=-1) + gammaln(aet.sum(a, axis=-1)),
+ aet.all(value >= 0),
+ aet.all(value <= 1),
+ aet.all(a > 0),
broadcast_conditions=False,
)
@@ -566,21 +572,21 @@ class Multinomial(Discrete):
def __init__(self, n, p, *args, **kwargs):
super().__init__(*args, **kwargs)
- p = p / tt.sum(p, axis=-1, keepdims=True)
+ p = p / aet.sum(p, axis=-1, keepdims=True)
if len(self.shape) > 1:
- self.n = tt.shape_padright(n)
- self.p = p if p.ndim > 1 else tt.shape_padleft(p)
+ self.n = aet.shape_padright(n)
+ self.p = p if p.ndim > 1 else aet.shape_padleft(p)
else:
# n is a scalar, p is a 1d array
- self.n = tt.as_tensor_variable(n)
- self.p = tt.as_tensor_variable(p)
+ self.n = aet.as_tensor_variable(n)
+ self.p = aet.as_tensor_variable(p)
self.mean = self.n * self.p
- mode = tt.cast(tt.round(self.mean), "int32")
- diff = self.n - tt.sum(mode, axis=-1, keepdims=True)
- inc_bool_arr = tt.abs_(diff) > 0
- mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()])
+ mode = aet.cast(aet.round(self.mean), "int32")
+ diff = self.n - aet.sum(mode, axis=-1, keepdims=True)
+ inc_bool_arr = aet.abs_(diff) > 0
+ mode = aet.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()])
self.mode = mode
def _random(self, n, p, size=None, raw_size=None):
@@ -663,12 +669,12 @@ def logp(self, x):
p = self.p
return bound(
- factln(n) + tt.sum(-factln(x) + logpow(p, x), axis=-1, keepdims=True),
- tt.all(x >= 0),
- tt.all(tt.eq(tt.sum(x, axis=-1, keepdims=True), n)),
- tt.all(p <= 1),
- tt.all(tt.eq(tt.sum(p, axis=-1), 1)),
- tt.all(tt.ge(n, 0)),
+ factln(n) + aet.sum(-factln(x) + logpow(p, x), axis=-1, keepdims=True),
+ aet.all(x >= 0),
+ aet.all(aet.eq(aet.sum(x, axis=-1, keepdims=True), n)),
+ aet.all(p <= 1),
+ aet.all(aet.eq(aet.sum(p, axis=-1), 1)),
+ aet.all(aet.ge(n, 0)),
broadcast_conditions=False,
)
@@ -714,22 +720,22 @@ def __init__(self, n, a, shape, *args, **kwargs):
n = intX(n)
a = floatX(a)
if len(self.shape) > 1:
- self.n = tt.shape_padright(n)
- self.a = tt.as_tensor_variable(a) if a.ndim > 1 else tt.shape_padleft(a)
+ self.n = aet.shape_padright(n)
+ self.a = aet.as_tensor_variable(a) if a.ndim > 1 else aet.shape_padleft(a)
else:
# n is a scalar, p is a 1d array
- self.n = tt.as_tensor_variable(n)
- self.a = tt.as_tensor_variable(a)
+ self.n = aet.as_tensor_variable(n)
+ self.a = aet.as_tensor_variable(a)
p = self.a / self.a.sum(-1, keepdims=True)
self.mean = self.n * p
# Mode is only an approximation. Exact computation requires a complex
# iterative algorithm as described in https://doi.org/10.1016/j.spl.2009.09.013
- mode = tt.cast(tt.round(self.mean), "int32")
- diff = self.n - tt.sum(mode, axis=-1, keepdims=True)
- inc_bool_arr = tt.abs_(diff) > 0
- mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()])
+ mode = aet.cast(aet.round(self.mean), "int32")
+ diff = self.n - aet.sum(mode, axis=-1, keepdims=True)
+ inc_bool_arr = aet.abs_(diff) > 0
+ mode = aet.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()])
self._defaultval = mode
def _random(self, n, a, size=None):
@@ -816,10 +822,10 @@ def logp(self, value):
# and that each observation value_i sums to n_i.
return bound(
result,
- tt.all(tt.ge(value, 0)),
- tt.all(tt.gt(a, 0)),
- tt.all(tt.ge(n, 0)),
- tt.all(tt.eq(value.sum(axis=-1, keepdims=True), n)),
+ aet.all(aet.ge(value, 0)),
+ aet.all(aet.gt(a, 0)),
+ aet.all(aet.ge(n, 0)),
+ aet.all(aet.eq(value.sum(axis=-1, keepdims=True), n)),
broadcast_conditions=False,
)
@@ -847,9 +853,9 @@ class PosDefMatrix(Op):
# Compulsory if itypes and otypes are not defined
def make_node(self, x):
- x = tt.as_tensor_variable(x)
+ x = aet.as_tensor_variable(x)
assert x.ndim == 2
- o = tt.TensorType(dtype="int8", broadcastable=[])()
+ o = TensorType(dtype="int8", broadcastable=[])()
return Apply(self, [x], [o])
# Python implementation:
@@ -868,7 +874,7 @@ def infer_shape(self, fgraph, node, shapes):
def grad(self, inp, grads):
(x,) = inp
- return [x.zeros_like(theano.config.floatX)]
+ return [x.zeros_like(aesara.config.floatX)]
def __str__(self):
return "MatrixIsPositiveDefinite"
@@ -925,11 +931,11 @@ def __init__(self, nu, V, *args, **kwargs):
"https://github.com/pymc-devs/pymc3/issues/538.",
UserWarning,
)
- self.nu = nu = tt.as_tensor_variable(nu)
- self.p = p = tt.as_tensor_variable(V.shape[0])
- self.V = V = tt.as_tensor_variable(V)
+ self.nu = nu = aet.as_tensor_variable(nu)
+ self.p = p = aet.as_tensor_variable(V.shape[0])
+ self.V = V = aet.as_tensor_variable(V)
self.mean = nu * V
- self.mode = tt.switch(tt.ge(nu, p + 1), (nu - p - 1) * V, np.nan)
+ self.mode = aet.switch(aet.ge(nu, p + 1), (nu - p - 1) * V, np.nan)
def random(self, point=None, size=None):
"""
@@ -975,15 +981,15 @@ def logp(self, X):
return bound(
(
- (nu - p - 1) * tt.log(IXI)
+ (nu - p - 1) * aet.log(IXI)
- trace(matrix_inverse(V).dot(X))
- - nu * p * tt.log(2)
- - nu * tt.log(IVI)
+ - nu * p * aet.log(2)
+ - nu * aet.log(IVI)
- 2 * multigammaln(nu / 2.0, p)
)
/ 2,
matrix_pos_def(X),
- tt.eq(X, X.T),
+ aet.eq(X, X.T),
nu > (p - 1),
broadcast_conditions=False,
)
@@ -1053,44 +1059,44 @@ def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testv
diag_testval = None
tril_testval = None
- c = tt.sqrt(
+ c = aet.sqrt(
ChiSquared("%s_c" % name, nu - np.arange(2, 2 + n_diag), shape=n_diag, testval=diag_testval)
)
pm._log.info("Added new variable %s_c to model diagonal of Wishart." % name)
z = Normal("%s_z" % name, 0.0, 1.0, shape=n_tril, testval=tril_testval)
pm._log.info("Added new variable %s_z to model off-diagonals of Wishart." % name)
# Construct A matrix
- A = tt.zeros(S.shape, dtype=np.float32)
- A = tt.set_subtensor(A[diag_idx], c)
- A = tt.set_subtensor(A[tril_idx], z)
+ A = aet.zeros(S.shape, dtype=np.float32)
+ A = aet.set_subtensor(A[diag_idx], c)
+ A = aet.set_subtensor(A[tril_idx], z)
# L * A * A.T * L.T ~ Wishart(L*L.T, nu)
if return_cholesky:
- return Deterministic(name, tt.dot(L, A))
+ return Deterministic(name, aet.dot(L, A))
else:
- return Deterministic(name, tt.dot(tt.dot(tt.dot(L, A), A.T), L.T))
+ return Deterministic(name, aet.dot(aet.dot(aet.dot(L, A), A.T), L.T))
def _lkj_normalizing_constant(eta, n):
if eta == 1:
- result = gammaln(2.0 * tt.arange(1, int((n - 1) / 2) + 1)).sum()
+ result = gammaln(2.0 * aet.arange(1, int((n - 1) / 2) + 1)).sum()
if n % 2 == 1:
result += (
- 0.25 * (n ** 2 - 1) * tt.log(np.pi)
- - 0.25 * (n - 1) ** 2 * tt.log(2.0)
+ 0.25 * (n ** 2 - 1) * aet.log(np.pi)
+ - 0.25 * (n - 1) ** 2 * aet.log(2.0)
- (n - 1) * gammaln(int((n + 1) / 2))
)
else:
result += (
- 0.25 * n * (n - 2) * tt.log(np.pi)
- + 0.25 * (3 * n ** 2 - 4 * n) * tt.log(2.0)
+ 0.25 * n * (n - 2) * aet.log(np.pi)
+ + 0.25 * (3 * n ** 2 - 4 * n) * aet.log(2.0)
+ n * gammaln(n / 2)
- (n - 1) * gammaln(n)
)
else:
result = -(n - 1) * gammaln(eta + 0.5 * (n - 1))
- k = tt.arange(1, n)
- result += (0.5 * k * tt.log(np.pi) + gammaln(eta + 0.5 * (n - 1 - k))).sum()
+ k = aet.arange(1, n)
+ result += (0.5 * k * aet.log(np.pi) + gammaln(eta + 0.5 * (n - 1 - k))).sum()
return result
@@ -1100,8 +1106,8 @@ class _LKJCholeskyCov(Continuous):
"""
def __init__(self, eta, n, sd_dist, *args, **kwargs):
- self.n = tt.as_tensor_variable(n)
- self.eta = tt.as_tensor_variable(eta)
+ self.n = aet.as_tensor_variable(n)
+ self.eta = aet.as_tensor_variable(eta)
if "transform" in kwargs and kwargs["transform"] is not None:
raise ValueError("Invalid parameter: transform.")
@@ -1143,22 +1149,22 @@ def logp(self, x):
eta = self.eta
diag_idxs = self.diag_idxs
- cumsum = tt.cumsum(x ** 2)
- variance = tt.zeros(n)
- variance = tt.inc_subtensor(variance[0], x[0] ** 2)
- variance = tt.inc_subtensor(variance[1:], cumsum[diag_idxs[1:]] - cumsum[diag_idxs[:-1]])
- sd_vals = tt.sqrt(variance)
+ cumsum = aet.cumsum(x ** 2)
+ variance = aet.zeros(n)
+ variance = aet.inc_subtensor(variance[0], x[0] ** 2)
+ variance = aet.inc_subtensor(variance[1:], cumsum[diag_idxs[1:]] - cumsum[diag_idxs[:-1]])
+ sd_vals = aet.sqrt(variance)
logp_sd = self.sd_dist.logp(sd_vals).sum()
corr_diag = x[diag_idxs] / sd_vals
- logp_lkj = (2 * eta - 3 + n - tt.arange(n)) * tt.log(corr_diag)
- logp_lkj = tt.sum(logp_lkj)
+ logp_lkj = (2 * eta - 3 + n - aet.arange(n)) * aet.log(corr_diag)
+ logp_lkj = aet.sum(logp_lkj)
# Compute the log det jacobian of the second transformation
# described in the docstring.
- idx = tt.arange(n)
- det_invjac = tt.log(corr_diag) - idx * tt.log(sd_vals)
+ idx = aet.arange(n)
+ det_invjac = aet.log(corr_diag) - idx * aet.log(sd_vals)
det_invjac = det_invjac.sum()
norm = _lkj_normalizing_constant(eta, n)
@@ -1348,10 +1354,10 @@ def LKJCholeskyCov(name, eta, n, sd_dist, compute_corr=False, store_in_trace=Tru
# Or transform an uncorrelated normal:
vals_raw = pm.Normal('vals_raw', mu=0, sigma=1, shape=10)
- vals = tt.dot(chol, vals_raw)
+ vals = aet.dot(chol, vals_raw)
# Or compute the covariance matrix
- cov = tt.dot(chol, chol.T)
+ cov = aet.dot(chol, chol.T)
**Implementation** In the unconstrained space all values of the cholesky factor
are stored untransformed, except for the diagonal entries, where
@@ -1411,9 +1417,9 @@ def LKJCholeskyCov(name, eta, n, sd_dist, compute_corr=False, store_in_trace=Tru
else:
chol = pm.expand_packed_triangular(n, packed_chol, lower=True)
# compute covariance matrix
- cov = tt.dot(chol, chol.T)
+ cov = aet.dot(chol, chol.T)
# extract standard deviations and rho
- stds = tt.sqrt(tt.diag(cov))
+ stds = aet.sqrt(aet.diag(cov))
inv_stds = 1 / stds
corr = inv_stds[None, :] * cov * inv_stds[:, None]
if store_in_trace:
@@ -1562,14 +1568,14 @@ def logp(self, x):
eta = self.eta
X = x[self.tri_index]
- X = tt.fill_diagonal(X, 1)
+ X = aet.fill_diagonal(X, 1)
result = _lkj_normalizing_constant(eta, n)
- result += (eta - 1.0) * tt.log(det(X))
+ result += (eta - 1.0) * aet.log(det(X))
return bound(
result,
- tt.all(X <= 1),
- tt.all(X >= -1),
+ aet.all(X <= 1),
+ aet.all(X >= -1),
matrix_pos_def(X),
eta > 0,
broadcast_conditions=False,
@@ -1662,7 +1668,7 @@ class MatrixNormal(Continuous):
# Setup left covariance matrix
scale = pm.Lognormal('scale', mu=np.log(true_scale), sigma=0.5)
- rowcov = tt.nlinalg.diag([scale**(2*i) for i in range(m)])
+ rowcov = aet.nlinalg.diag([scale**(2*i) for i in range(m)])
vals = pm.MatrixNormal('vals', mu=mu, colchol=colchol, rowcov=rowcov,
observed=data, shape=(m, n))
@@ -1687,10 +1693,10 @@ def __init__(
assert len(shape) == 2, "shape must have length 2: mxn"
self.shape = shape
super().__init__(shape=shape, *args, **kwargs)
- self.mu = tt.as_tensor_variable(mu)
+ self.mu = aet.as_tensor_variable(mu)
self.mean = self.median = self.mode = self.mu
- self.solve_lower = tt.slinalg.solve_lower_triangular
- self.solve_upper = tt.slinalg.solve_upper_triangular
+ self.solve_lower = solve_lower_triangular
+ self.solve_upper = solve_upper_triangular
def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau):
cholesky = Cholesky(lower=True, on_error="raise")
@@ -1705,7 +1711,7 @@ def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau):
if rowcov is not None:
self.m = rowcov.shape[0]
self._rowcov_type = "cov"
- rowcov = tt.as_tensor_variable(rowcov)
+ rowcov = aet.as_tensor_variable(rowcov)
if rowcov.ndim != 2:
raise ValueError("rowcov must be two dimensional.")
self.rowchol_cov = cholesky(rowcov)
@@ -1714,7 +1720,7 @@ def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau):
raise ValueError("rowtau not supported at this time")
self.m = rowtau.shape[0]
self._rowcov_type = "tau"
- rowtau = tt.as_tensor_variable(rowtau)
+ rowtau = aet.as_tensor_variable(rowtau)
if rowtau.ndim != 2:
raise ValueError("rowtau must be two dimensional.")
self.rowchol_tau = cholesky(rowtau)
@@ -1724,7 +1730,7 @@ def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau):
self._rowcov_type = "chol"
if rowchol.ndim != 2:
raise ValueError("rowchol must be two dimensional.")
- self.rowchol_cov = tt.as_tensor_variable(rowchol)
+ self.rowchol_cov = aet.as_tensor_variable(rowchol)
# Among-column matrices
if len([i for i in [coltau, colcov, colchol] if i is not None]) != 1:
@@ -1736,7 +1742,7 @@ def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau):
if colcov is not None:
self.n = colcov.shape[0]
self._colcov_type = "cov"
- colcov = tt.as_tensor_variable(colcov)
+ colcov = aet.as_tensor_variable(colcov)
if colcov.ndim != 2:
raise ValueError("colcov must be two dimensional.")
self.colchol_cov = cholesky(colcov)
@@ -1745,7 +1751,7 @@ def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau):
raise ValueError("coltau not supported at this time")
self.n = coltau.shape[0]
self._colcov_type = "tau"
- coltau = tt.as_tensor_variable(coltau)
+ coltau = aet.as_tensor_variable(coltau)
if coltau.ndim != 2:
raise ValueError("coltau must be two dimensional.")
self.colchol_tau = cholesky(coltau)
@@ -1755,7 +1761,7 @@ def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau):
self._colcov_type = "chol"
if colchol.ndim != 2:
raise ValueError("colchol must be two dimensional.")
- self.colchol_cov = tt.as_tensor_variable(colchol)
+ self.colchol_cov = aet.as_tensor_variable(colchol)
def random(self, point=None, size=None):
"""
@@ -1802,15 +1808,15 @@ def _trquaddist(self, value):
# Find exponent piece by piece
right_quaddist = self.solve_lower(rowchol_cov, delta)
- quaddist = tt.nlinalg.matrix_dot(right_quaddist.T, right_quaddist)
+ quaddist = aet.nlinalg.matrix_dot(right_quaddist.T, right_quaddist)
quaddist = self.solve_lower(colchol_cov, quaddist)
quaddist = self.solve_upper(colchol_cov.T, quaddist)
- trquaddist = tt.nlinalg.trace(quaddist)
+ trquaddist = aet.nlinalg.trace(quaddist)
- coldiag = tt.nlinalg.diag(colchol_cov)
- rowdiag = tt.nlinalg.diag(rowchol_cov)
- half_collogdet = tt.sum(tt.log(coldiag)) # logdet(M) = 2*Tr(log(L))
- half_rowlogdet = tt.sum(tt.log(rowdiag)) # Using Cholesky: M = L L^T
+ coldiag = aet.nlinalg.diag(colchol_cov)
+ rowdiag = aet.nlinalg.diag(rowchol_cov)
+ half_collogdet = aet.sum(aet.log(coldiag)) # logdet(M) = 2*Tr(log(L))
+ half_rowlogdet = aet.sum(aet.log(rowdiag)) # Using Cholesky: M = L L^T
return trquaddist, half_collogdet, half_rowlogdet
def logp(self, value):
@@ -1869,7 +1875,7 @@ class KroneckerNormal(Continuous):
:math:`[(v_1, Q_1), (v_2, Q_2), ...]` such that
:math:`K_i = Q_i \text{diag}(v_i) Q_i'`. For example::
- v_i, Q_i = tt.nlinalg.eigh(K_i)
+ v_i, Q_i = aet.nlinalg.eigh(K_i)
sigma: scalar, variable
Standard deviation of the Gaussian white noise.
@@ -1930,7 +1936,7 @@ class KroneckerNormal(Continuous):
def __init__(self, mu, covs=None, chols=None, evds=None, sigma=None, *args, **kwargs):
self._setup(covs, chols, evds, sigma)
super().__init__(*args, **kwargs)
- self.mu = tt.as_tensor_variable(mu)
+ self.mu = aet.as_tensor_variable(mu)
self.mean = self.median = self.mode = self.mu
def _setup(self, covs, chols, evds, sigma):
@@ -1952,21 +1958,21 @@ def _setup(self, covs, chols, evds, sigma):
else:
# Otherwise use cholesky as usual
self.chols = list(map(self.cholesky, self.covs))
- self.chol_diags = list(map(tt.nlinalg.diag, self.chols))
- self.sizes = tt.as_tensor_variable([chol.shape[0] for chol in self.chols])
- self.N = tt.prod(self.sizes)
+ self.chol_diags = list(map(aet.nlinalg.diag, self.chols))
+ self.sizes = aet.as_tensor_variable([chol.shape[0] for chol in self.chols])
+ self.N = aet.prod(self.sizes)
elif chols is not None:
self._cov_type = "chol"
if self.is_noisy: # A strange case...
# Noise requires eigendecomposition
- covs = [tt.dot(chol, chol.T) for chol in chols]
+ covs = [aet.dot(chol, chol.T) for chol in chols]
eigh_map = map(eigh, covs)
self._setup_evd(eigh_map)
else:
self.chols = chols
- self.chol_diags = list(map(tt.nlinalg.diag, self.chols))
- self.sizes = tt.as_tensor_variable([chol.shape[0] for chol in self.chols])
- self.N = tt.prod(self.sizes)
+ self.chol_diags = list(map(aet.nlinalg.diag, self.chols))
+ self.sizes = aet.as_tensor_variable([chol.shape[0] for chol in self.chols])
+ self.N = aet.prod(self.sizes)
else:
self._cov_type = "evd"
self._setup_evd(evds)
@@ -1974,10 +1980,10 @@ def _setup(self, covs, chols, evds, sigma):
def _setup_evd(self, eigh_iterable):
self._isEVD = True
eigs_sep, Qs = zip(*eigh_iterable) # Unzip
- self.Qs = list(map(tt.as_tensor_variable, Qs))
- self.QTs = list(map(tt.transpose, self.Qs))
+ self.Qs = list(map(aet.as_tensor_variable, Qs))
+ self.QTs = list(map(aet.transpose, self.Qs))
- self.eigs_sep = list(map(tt.as_tensor_variable, eigs_sep))
+ self.eigs_sep = list(map(aet.as_tensor_variable, eigs_sep))
self.eigs = kron_diag(*self.eigs_sep) # Combine separate eigs
if self.is_noisy:
self.eigs += self.sigma ** 2
@@ -1989,28 +1995,28 @@ def _setup_random(self):
if self._cov_type == "cov":
cov = kronecker(*self.covs)
if self.is_noisy:
- cov = cov + self.sigma ** 2 * tt.identity_like(cov)
+ cov = cov + self.sigma ** 2 * aet.identity_like(cov)
self.mv_params["cov"] = cov
elif self._cov_type == "chol":
if self.is_noisy:
covs = []
for eig, Q in zip(self.eigs_sep, self.Qs):
- cov_i = tt.dot(Q, tt.dot(tt.diag(eig), Q.T))
+ cov_i = aet.dot(Q, aet.dot(aet.diag(eig), Q.T))
covs.append(cov_i)
cov = kronecker(*covs)
if self.is_noisy:
- cov = cov + self.sigma ** 2 * tt.identity_like(cov)
+ cov = cov + self.sigma ** 2 * aet.identity_like(cov)
self.mv_params["chol"] = self.cholesky(cov)
else:
self.mv_params["chol"] = kronecker(*self.chols)
elif self._cov_type == "evd":
covs = []
for eig, Q in zip(self.eigs_sep, self.Qs):
- cov_i = tt.dot(Q, tt.dot(tt.diag(eig), Q.T))
+ cov_i = aet.dot(Q, aet.dot(aet.diag(eig), Q.T))
covs.append(cov_i)
cov = kronecker(*covs)
if self.is_noisy:
- cov = cov + self.sigma ** 2 * tt.identity_like(cov)
+ cov = cov + self.sigma ** 2 * aet.identity_like(cov)
self.mv_params["cov"] = cov
def random(self, point=None, size=None):
@@ -2050,16 +2056,16 @@ def _quaddist(self, value):
delta = value - self.mu
if self._isEVD:
sqrt_quad = kron_dot(self.QTs, delta.T)
- sqrt_quad = sqrt_quad / tt.sqrt(self.eigs[:, None])
- logdet = tt.sum(tt.log(self.eigs))
+ sqrt_quad = sqrt_quad / aet.sqrt(self.eigs[:, None])
+ logdet = aet.sum(aet.log(self.eigs))
else:
sqrt_quad = kron_solve_lower(self.chols, delta.T)
logdet = 0
for chol_size, chol_diag in zip(self.sizes, self.chol_diags):
- logchol = tt.log(chol_diag) * self.N / chol_size
- logdet += tt.sum(2 * logchol)
+ logchol = aet.log(chol_diag) * self.N / chol_size
+ logdet += aet.sum(2 * logchol)
# Square each sample
- quad = tt.batched_dot(sqrt_quad.T, sqrt_quad.T)
+ quad = aet.batched_dot(sqrt_quad.T, sqrt_quad.T)
if onedim:
quad = quad[0]
return quad, logdet
@@ -2079,7 +2085,7 @@ def logp(self, value):
TensorVariable
"""
quad, logdet = self._quaddist(value)
- return -(quad + logdet + self.N * tt.log(2 * np.pi)) / 2.0
+ return -(quad + logdet + self.N * aet.log(2 * np.pi)) / 2.0
def _distr_parameters_for_repr(self):
return ["mu"]
diff --git a/pymc3/distributions/posterior_predictive.py b/pymc3/distributions/posterior_predictive.py
index 31aa3e40f5..1125ae9357 100644
--- a/pymc3/distributions/posterior_predictive.py
+++ b/pymc3/distributions/posterior_predictive.py
@@ -9,18 +9,20 @@
from contextlib import AbstractContextManager
from typing import TYPE_CHECKING, Any, Callable, Dict, List, cast, overload
+import aesara.graph.basic
+import aesara.graph.fg
import numpy as np
-import theano.graph.basic
-import theano.graph.fg
-import theano.tensor as tt
+from aesara.compile.sharedvalue import SharedVariable
+from aesara.graph.basic import Constant
+from aesara.tensor.var import TensorVariable
from arviz import InferenceData
from typing_extensions import Literal, Protocol
from xarray import Dataset
from pymc3.backends.base import MultiTrace
from pymc3.distributions.distribution import (
- _compile_theano_function,
+ _compile_aesara_function,
_DrawValuesContext,
_DrawValuesContextBlocker,
is_fast_drawable,
@@ -35,7 +37,6 @@
modelcontext,
)
from pymc3.util import chains_and_samples, dataset_to_point_list, get_var_name
-from pymc3.vartypes import theano_constant
# Failing tests:
# test_mixture_random_shape::test_mixture_random_shape
@@ -375,13 +376,13 @@ def draw_values(self) -> list[np.ndarray]:
if (next_, samples) in drawn:
# If the node already has a givens value, skip it
continue
- elif isinstance(next_, (theano_constant, tt.sharedvar.SharedVariable)):
- # If the node is a theano.tensor.TensorConstant or a
- # theano.tensor.sharedvar.SharedVariable, its value will be
- # available automatically in _compile_theano_function so
+ elif isinstance(next_, (Constant, SharedVariable)):
+ # If the node is a aesara.tensor.TensorConstant or a
+ # aesara.tensor.sharedvar.SharedVariable, its value will be
+ # available automatically in _compile_aesara_function so
# we can skip it. Furthermore, if this node was treated as a
- # TensorVariable that should be compiled by theano in
- # _compile_theano_function, it would raise a `TypeError:
+ # TensorVariable that should be compiled by aesara in
+ # _compile_aesara_function, it would raise a `TypeError:
# ('Constants not allowed in param list', ...)` for
# TensorConstant, and a `TypeError: Cannot use a shared
# variable (...) as explicit input` for SharedVariable.
@@ -411,7 +412,7 @@ def draw_values(self) -> list[np.ndarray]:
assert isinstance(value, np.ndarray)
givens[next_.name] = (next_, value)
drawn[(next_, samples)] = value
- except theano.graph.fg.MissingInputError:
+ except aesara.graph.fg.MissingInputError:
# The node failed, so we must add the node's parents to
# the stack of nodes to try to draw from. We exclude the
# nodes in the `params` list.
@@ -456,7 +457,7 @@ def draw_values(self) -> list[np.ndarray]:
assert isinstance(value, np.ndarray)
self.evaluated[param_idx] = drawn[(param, samples)] = value
givens[param.name] = (param, value)
- except theano.graph.fg.MissingInputError:
+ except aesara.graph.fg.MissingInputError:
missing_inputs.add(param_idx)
return [self.evaluated[j] for j in params]
@@ -527,9 +528,9 @@ def draw_value(self, param, trace: _TraceDict | None = None, givens=None):
Parameters
----------
- param: number, array like, theano variable or pymc3 random variable
+ param: number, array like, aesara variable or pymc3 random variable
The value or distribution. Constants or shared variables
- will be converted to an array and returned. Theano variables
+ will be converted to an array and returned. Aesara variables
are evaluated. If `param` is a pymc3 random variable, draw
values from it and return that (as ``np.ndarray``), unless a
value is specified in the ``trace``.
@@ -537,8 +538,8 @@ def draw_value(self, param, trace: _TraceDict | None = None, givens=None):
A dictionary from pymc3 variable names to samples of their values
used to provide context for evaluating ``param``.
givens: dict, optional
- A dictionary from theano variables to their values. These values
- are used to evaluate ``param`` if it is a theano variable.
+ A dictionary from aesara variables to their values. These values
+ are used to evaluate ``param`` if it is a aesara variable.
"""
samples = self.samples
@@ -569,11 +570,11 @@ def random_sample(
if isinstance(param, (numbers.Number, np.ndarray)):
return param
- elif isinstance(param, theano_constant):
+ elif isinstance(param, Constant):
return param.value
- elif isinstance(param, tt.sharedvar.SharedVariable):
+ elif isinstance(param, SharedVariable):
return param.get_value()
- elif isinstance(param, (tt.TensorVariable, MultiObservedRV)):
+ elif isinstance(param, (TensorVariable, MultiObservedRV)):
if hasattr(param, "model") and trace and param.name in trace.varnames:
return trace[param.name]
elif hasattr(param, "random") and param.random is not None:
@@ -605,7 +606,7 @@ def random_sample(
)
except (ValueError, TypeError):
# reset shape to account for shape changes
- # with theano.shared inputs
+ # with aesara.shared inputs
dist_tmp.shape = ()
# We want to draw values to infer the dist_shape,
# we don't want to store these drawn values to the context
@@ -651,7 +652,7 @@ def random_sample(
# We only truly care if the ancestors of param that were given
# value have the matching dshape and val.shape
param_ancestors = set(
- theano.graph.basic.ancestors([param], blockers=list(variables))
+ aesara.graph.basic.ancestors([param], blockers=list(variables))
)
inputs = [
(var, val) for var, val in zip(variables, values) if var in param_ancestors
@@ -661,7 +662,7 @@ def random_sample(
else:
input_vars = []
input_vals = []
- func = _compile_theano_function(param, input_vars)
+ func = _compile_aesara_function(param, input_vars)
if not input_vars:
assert input_vals == [] # AFAICT if there are now vars, there can't be vals
output = func(*input_vals)
@@ -685,7 +686,7 @@ def _param_shape(var_desig, model: Model) -> tuple[int, ...]:
if hasattr(v, "observations"):
try:
# To get shape of _observed_ data container `pm.Data`
- # (wrapper for theano.SharedVariable) we evaluate it.
+ # (wrapper for SharedVariable) we evaluate it.
shape = tuple(v.observations.shape.eval())
except AttributeError:
shape = v.observations.shape
diff --git a/pymc3/distributions/special.py b/pymc3/distributions/special.py
index ba4662b2df..8b218fea78 100644
--- a/pymc3/distributions/special.py
+++ b/pymc3/distributions/special.py
@@ -12,16 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara.tensor as aet
import numpy as np
-import theano.tensor as tt
-from theano import scalar
-from theano.scalar.basic_scipy import GammaLn, Psi
+from aesara import scalar
+from aesara.scalar.basic_scipy import GammaLn, Psi
+from aesara.tensor.elemwise import Elemwise
__all__ = ["gammaln", "multigammaln", "psi", "log_i0"]
scalar_gammaln = GammaLn(scalar.upgrade_to_float, name="scalar_gammaln")
-gammaln = tt.Elemwise(scalar_gammaln, name="gammaln")
+gammaln = Elemwise(scalar_gammaln, name="gammaln")
def multigammaln(a, p):
@@ -33,17 +34,17 @@ def multigammaln(a, p):
p: int
degrees of freedom. p > 0
"""
- i = tt.arange(1, p + 1)
- return p * (p - 1) * tt.log(np.pi) / 4.0 + tt.sum(gammaln(a + (1.0 - i) / 2.0), axis=0)
+ i = aet.arange(1, p + 1)
+ return p * (p - 1) * aet.log(np.pi) / 4.0 + aet.sum(gammaln(a + (1.0 - i) / 2.0), axis=0)
def log_i0(x):
"""
Calculates the logarithm of the 0 order modified Bessel function of the first kind""
"""
- return tt.switch(
- tt.lt(x, 5),
- tt.log1p(
+ return aet.switch(
+ aet.lt(x, 5),
+ aet.log1p(
x ** 2.0 / 4.0
+ x ** 4.0 / 64.0
+ x ** 6.0 / 2304.0
@@ -52,8 +53,8 @@ def log_i0(x):
+ x ** 12.0 / 2123366400.0
),
x
- - 0.5 * tt.log(2.0 * np.pi * x)
- + tt.log1p(
+ - 0.5 * aet.log(2.0 * np.pi * x)
+ + aet.log1p(
1.0 / (8.0 * x)
+ 9.0 / (128.0 * x ** 2.0)
+ 225.0 / (3072.0 * x ** 3.0)
@@ -63,4 +64,4 @@ def log_i0(x):
scalar_psi = Psi(scalar.upgrade_to_float, name="scalar_psi")
-psi = tt.Elemwise(scalar_psi, name="psi")
+psi = Elemwise(scalar_psi, name="psi")
diff --git a/pymc3/distributions/timeseries.py b/pymc3/distributions/timeseries.py
index e3e1aa15bc..ecd693df2f 100644
--- a/pymc3/distributions/timeseries.py
+++ b/pymc3/distributions/timeseries.py
@@ -12,11 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara.tensor as aet
import numpy as np
-import theano.tensor as tt
+from aesara import scan
from scipy import stats
-from theano import scan
from pymc3.distributions import distribution, multivariate
from pymc3.distributions.continuous import Flat, Normal, get_tau_sigma
@@ -47,10 +47,10 @@ class AR1(distribution.Continuous):
def __init__(self, k, tau_e, *args, **kwargs):
super().__init__(*args, **kwargs)
- self.k = k = tt.as_tensor_variable(k)
- self.tau_e = tau_e = tt.as_tensor_variable(tau_e)
+ self.k = k = aet.as_tensor_variable(k)
+ self.tau_e = tau_e = aet.as_tensor_variable(tau_e)
self.tau = tau_e * (1 - k ** 2)
- self.mode = tt.as_tensor_variable(0.0)
+ self.mode = aet.as_tensor_variable(0.0)
def logp(self, x):
"""
@@ -74,7 +74,7 @@ def logp(self, x):
boundary = Normal.dist(0.0, tau=tau).logp
innov_like = Normal.dist(k * x_im1, tau=tau_e).logp(x_i)
- return boundary(x[0]) + tt.sum(innov_like)
+ return boundary(x[0]) + aet.sum(innov_like)
class AR(distribution.Continuous):
@@ -116,10 +116,10 @@ def __init__(
sigma = sd
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
- self.sigma = self.sd = tt.as_tensor_variable(sigma)
- self.tau = tt.as_tensor_variable(tau)
+ self.sigma = self.sd = aet.as_tensor_variable(sigma)
+ self.tau = aet.as_tensor_variable(tau)
- self.mean = tt.as_tensor_variable(0.0)
+ self.mean = aet.as_tensor_variable(0.0)
if isinstance(rho, list):
p = len(rho)
@@ -140,7 +140,7 @@ def __init__(
self.p = p
self.constant = constant
- self.rho = rho = tt.as_tensor_variable(rho)
+ self.rho = rho = aet.as_tensor_variable(rho)
self.init = init
def logp(self, value):
@@ -157,7 +157,7 @@ def logp(self, value):
TensorVariable
"""
if self.constant:
- x = tt.add(
+ x = aet.add(
*[self.rho[i + 1] * value[self.p - (i + 1) : -(i + 1)] for i in range(self.p)]
)
eps = value[self.p :] - self.rho[0] - x
@@ -165,7 +165,7 @@ def logp(self, value):
if self.p == 1:
x = self.rho * value[:-1]
else:
- x = tt.add(
+ x = aet.add(
*[self.rho[i] * value[self.p - (i + 1) : -(i + 1)] for i in range(self.p)]
)
eps = value[self.p :] - x
@@ -173,7 +173,7 @@ def logp(self, value):
innov_like = Normal.dist(mu=0.0, tau=self.tau).logp(eps)
init_like = self.init.logp(value[: self.p])
- return tt.sum(innov_like) + tt.sum(init_like)
+ return aet.sum(innov_like) + aet.sum(init_like)
class GaussianRandomWalk(distribution.Continuous):
@@ -181,7 +181,7 @@ class GaussianRandomWalk(distribution.Continuous):
Note that this is mainly a user-friendly wrapper to enable an easier specification
of GRW. You are not restricted to use only Normal innovations but can use any
- distribution: just use `theano.tensor.cumsum()` to create the random walk behavior.
+ distribution: just use `aesara.tensor.cumsum()` to create the random walk behavior.
Parameters
----------
@@ -209,12 +209,12 @@ def __init__(self, tau=None, init=Flat.dist(), sigma=None, mu=0.0, sd=None, *arg
if sd is not None:
sigma = sd
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
- self.tau = tt.as_tensor_variable(tau)
- sigma = tt.as_tensor_variable(sigma)
+ self.tau = aet.as_tensor_variable(tau)
+ sigma = aet.as_tensor_variable(sigma)
self.sigma = self.sd = sigma
- self.mu = tt.as_tensor_variable(mu)
+ self.mu = aet.as_tensor_variable(mu)
self.init = init
- self.mean = tt.as_tensor_variable(0.0)
+ self.mean = aet.as_tensor_variable(0.0)
def _mu_and_sigma(self, mu, sigma):
"""Helper to get mu and sigma if they are high dimensional."""
@@ -242,7 +242,7 @@ def logp(self, x):
x_i = x[1:]
mu, sigma = self._mu_and_sigma(self.mu, self.sigma)
innov_like = Normal.dist(mu=x_im1 + mu, sigma=sigma).logp(x_i)
- return self.init.logp(x[0]) + tt.sum(innov_like)
+ return self.init.logp(x[0]) + aet.sum(innov_like)
return self.init.logp(x)
def random(self, point=None, size=None):
@@ -323,17 +323,17 @@ class GARCH11(distribution.Continuous):
def __init__(self, omega, alpha_1, beta_1, initial_vol, *args, **kwargs):
super().__init__(*args, **kwargs)
- self.omega = omega = tt.as_tensor_variable(omega)
- self.alpha_1 = alpha_1 = tt.as_tensor_variable(alpha_1)
- self.beta_1 = beta_1 = tt.as_tensor_variable(beta_1)
- self.initial_vol = tt.as_tensor_variable(initial_vol)
- self.mean = tt.as_tensor_variable(0.0)
+ self.omega = omega = aet.as_tensor_variable(omega)
+ self.alpha_1 = alpha_1 = aet.as_tensor_variable(alpha_1)
+ self.beta_1 = beta_1 = aet.as_tensor_variable(beta_1)
+ self.initial_vol = aet.as_tensor_variable(initial_vol)
+ self.mean = aet.as_tensor_variable(0.0)
def get_volatility(self, x):
x = x[:-1]
def volatility_update(x, vol, w, a, b):
- return tt.sqrt(w + a * tt.square(x) + b * tt.square(vol))
+ return aet.sqrt(w + a * aet.square(x) + b * aet.square(vol))
vol, _ = scan(
fn=volatility_update,
@@ -341,7 +341,7 @@ def volatility_update(x, vol, w, a, b):
outputs_info=[self.initial_vol],
non_sequences=[self.omega, self.alpha_1, self.beta_1],
)
- return tt.concatenate([[self.initial_vol], vol])
+ return aet.concatenate([[self.initial_vol], vol])
def logp(self, x):
"""
@@ -357,7 +357,7 @@ def logp(self, x):
TensorVariable
"""
vol = self.get_volatility(x)
- return tt.sum(Normal.dist(0.0, sigma=vol).logp(x))
+ return aet.sum(Normal.dist(0.0, sigma=vol).logp(x))
def _distr_parameters_for_repr(self):
return ["omega", "alpha_1", "beta_1"]
@@ -379,7 +379,7 @@ class EulerMaruyama(distribution.Continuous):
def __init__(self, dt, sde_fn, sde_pars, *args, **kwds):
super().__init__(*args, **kwds)
- self.dt = dt = tt.as_tensor_variable(dt)
+ self.dt = dt = aet.as_tensor_variable(dt)
self.sde_fn = sde_fn
self.sde_pars = sde_pars
@@ -399,8 +399,8 @@ def logp(self, x):
xt = x[:-1]
f, g = self.sde_fn(x[:-1], *self.sde_pars)
mu = xt + self.dt * f
- sd = tt.sqrt(self.dt) * g
- return tt.sum(Normal.dist(mu=mu, sigma=sd).logp(x[1:]))
+ sd = aet.sqrt(self.dt) * g
+ return aet.sum(Normal.dist(mu=mu, sigma=sd).logp(x[1:]))
def _distr_parameters_for_repr(self):
return ["dt"]
@@ -437,7 +437,7 @@ def __init__(
self.init = init
self.innovArgs = (mu, cov, tau, chol, lower)
self.innov = multivariate.MvNormal.dist(*self.innovArgs, shape=self.shape)
- self.mean = tt.as_tensor_variable(0.0)
+ self.mean = aet.as_tensor_variable(0.0)
def logp(self, x):
"""
@@ -551,7 +551,7 @@ class MvStudentTRandomWalk(MvGaussianRandomWalk):
def __init__(self, nu, *args, **kwargs):
super().__init__(*args, **kwargs)
- self.nu = tt.as_tensor_variable(nu)
+ self.nu = aet.as_tensor_variable(nu)
self.innov = multivariate.MvStudentT.dist(self.nu, None, *self.innovArgs)
def _distr_parameters_for_repr(self):
diff --git a/pymc3/distributions/transforms.py b/pymc3/distributions/transforms.py
index 880301182c..b17e7b27f4 100644
--- a/pymc3/distributions/transforms.py
+++ b/pymc3/distributions/transforms.py
@@ -14,16 +14,18 @@
import warnings
+import aesara.tensor as aet
import numpy as np
-import theano.tensor as tt
+from aesara.tensor.subtensor import advanced_set_subtensor1
+from aesara.tensor.type import TensorType
from scipy.special import logit as nplogit
+from pymc3.aesaraf import floatX, gradient
from pymc3.distributions import distribution
from pymc3.distributions.distribution import draw_values
from pymc3.math import invlogit, logit, logsumexp
from pymc3.model import FreeRV
-from pymc3.theanof import floatX, gradient
__all__ = [
"Transform",
@@ -131,8 +133,8 @@ def __str__(self):
class ElemwiseTransform(Transform):
def jacobian_det(self, x):
- grad = tt.reshape(gradient(tt.sum(self.backward(x)), [x]), x.shape)
- return tt.log(tt.abs_(grad))
+ grad = aet.reshape(gradient(aet.sum(self.backward(x)), [x]), x.shape)
+ return aet.log(aet.abs_(grad))
class TransformedDistribution(distribution.Distribution):
@@ -159,7 +161,7 @@ def __init__(self, dist, transform, *args, **kwargs):
if transform.name == "stickbreaking":
b = np.hstack(((np.atleast_1d(self.shape) == 1)[:-1], False))
# force the last dim not broadcastable
- self.type = tt.TensorType(v.dtype, b)
+ self.type = TensorType(v.dtype, b)
def logp(self, x):
"""
@@ -212,10 +214,10 @@ class Log(ElemwiseTransform):
name = "log"
def backward(self, x):
- return tt.exp(x)
+ return aet.exp(x)
def forward(self, x):
- return tt.log(x)
+ return aet.log(x)
def forward_val(self, x, point=None):
return np.log(x)
@@ -231,7 +233,7 @@ class LogExpM1(ElemwiseTransform):
name = "log_exp_m1"
def backward(self, x):
- return tt.nnet.softplus(x)
+ return aet.nnet.softplus(x)
def forward(self, x):
"""Inverse operation of softplus.
@@ -239,13 +241,13 @@ def forward(self, x):
y = Log(Exp(x) - 1)
= Log(1 - Exp(-x)) + x
"""
- return tt.log(1.0 - tt.exp(-x)) + x
+ return aet.log(1.0 - aet.exp(-x)) + x
def forward_val(self, x, point=None):
return np.log(1.0 - np.exp(-x)) + x
def jacobian_det(self, x):
- return -tt.nnet.softplus(-x)
+ return -aet.nnet.softplus(-x)
log_exp_m1 = LogExpM1()
@@ -273,18 +275,18 @@ class Interval(ElemwiseTransform):
name = "interval"
def __init__(self, a, b):
- self.a = tt.as_tensor_variable(a)
- self.b = tt.as_tensor_variable(b)
+ self.a = aet.as_tensor_variable(a)
+ self.b = aet.as_tensor_variable(b)
def backward(self, x):
a, b = self.a, self.b
- sigmoid_x = tt.nnet.sigmoid(x)
+ sigmoid_x = aet.nnet.sigmoid(x)
r = sigmoid_x * b + (1 - sigmoid_x) * a
return r
def forward(self, x):
a, b = self.a, self.b
- return tt.log(x - a) - tt.log(b - x)
+ return aet.log(x - a) - aet.log(b - x)
def forward_val(self, x, point=None):
# 2017-06-19
@@ -294,8 +296,8 @@ def forward_val(self, x, point=None):
return floatX(np.log(x - a) - np.log(b - x))
def jacobian_det(self, x):
- s = tt.nnet.softplus(-x)
- return tt.log(self.b - self.a) - 2 * s - x
+ s = aet.nnet.softplus(-x)
+ return aet.log(self.b - self.a) - 2 * s - x
interval = Interval
@@ -307,16 +309,16 @@ class LowerBound(ElemwiseTransform):
name = "lowerbound"
def __init__(self, a):
- self.a = tt.as_tensor_variable(a)
+ self.a = aet.as_tensor_variable(a)
def backward(self, x):
a = self.a
- r = tt.exp(x) + a
+ r = aet.exp(x) + a
return r
def forward(self, x):
a = self.a
- return tt.log(x - a)
+ return aet.log(x - a)
def forward_val(self, x, point=None):
# 2017-06-19
@@ -342,16 +344,16 @@ class UpperBound(ElemwiseTransform):
name = "upperbound"
def __init__(self, b):
- self.b = tt.as_tensor_variable(b)
+ self.b = aet.as_tensor_variable(b)
def backward(self, x):
b = self.b
- r = b - tt.exp(x)
+ r = b - aet.exp(x)
return r
def forward(self, x):
b = self.b
- return tt.log(b - x)
+ return aet.log(b - x)
def forward_val(self, x, point=None):
# 2017-06-19
@@ -375,15 +377,15 @@ class Ordered(Transform):
name = "ordered"
def backward(self, y):
- x = tt.zeros(y.shape)
- x = tt.inc_subtensor(x[..., 0], y[..., 0])
- x = tt.inc_subtensor(x[..., 1:], tt.exp(y[..., 1:]))
- return tt.cumsum(x, axis=-1)
+ x = aet.zeros(y.shape)
+ x = aet.inc_subtensor(x[..., 0], y[..., 0])
+ x = aet.inc_subtensor(x[..., 1:], aet.exp(y[..., 1:]))
+ return aet.cumsum(x, axis=-1)
def forward(self, x):
- y = tt.zeros(x.shape)
- y = tt.inc_subtensor(y[..., 0], x[..., 0])
- y = tt.inc_subtensor(y[..., 1:], tt.log(x[..., 1:] - x[..., :-1]))
+ y = aet.zeros(x.shape)
+ y = aet.inc_subtensor(y[..., 0], x[..., 0])
+ y = aet.inc_subtensor(y[..., 1:], aet.log(x[..., 1:] - x[..., :-1]))
return y
def forward_val(self, x, point=None):
@@ -393,7 +395,7 @@ def forward_val(self, x, point=None):
return y
def jacobian_det(self, y):
- return tt.sum(y[..., 1:], axis=-1)
+ return aet.sum(y[..., 1:], axis=-1)
ordered = Ordered()
@@ -412,8 +414,8 @@ class SumTo1(Transform):
name = "sumto1"
def backward(self, y):
- remaining = 1 - tt.sum(y[..., :], axis=-1, keepdims=True)
- return tt.concatenate([y[..., :], remaining], axis=-1)
+ remaining = 1 - aet.sum(y[..., :], axis=-1, keepdims=True)
+ return aet.concatenate([y[..., :], remaining], axis=-1)
def forward(self, x):
return x[..., :-1]
@@ -422,8 +424,8 @@ def forward_val(self, x, point=None):
return x[..., :-1]
def jacobian_det(self, x):
- y = tt.zeros(x.shape)
- return tt.sum(y, axis=-1)
+ y = aet.zeros(x.shape)
+ return aet.sum(y, axis=-1)
sum_to_1 = SumTo1()
@@ -450,8 +452,8 @@ def __init__(self, eps=None):
def forward(self, x_):
x = x_.T
n = x.shape[0]
- lx = tt.log(x)
- shift = tt.sum(lx, 0, keepdims=True) / n
+ lx = aet.log(x)
+ shift = aet.sum(lx, 0, keepdims=True) / n
y = lx[:-1] - shift
return floatX(y.T)
@@ -465,20 +467,20 @@ def forward_val(self, x_, point=None):
def backward(self, y_):
y = y_.T
- y = tt.concatenate([y, -tt.sum(y, 0, keepdims=True)])
+ y = aet.concatenate([y, -aet.sum(y, 0, keepdims=True)])
# "softmax" with vector support and no deprication warning:
- e_y = tt.exp(y - tt.max(y, 0, keepdims=True))
- x = e_y / tt.sum(e_y, 0, keepdims=True)
+ e_y = aet.exp(y - aet.max(y, 0, keepdims=True))
+ x = e_y / aet.sum(e_y, 0, keepdims=True)
return floatX(x.T)
def jacobian_det(self, y_):
y = y_.T
Km1 = y.shape[0] + 1
- sy = tt.sum(y, 0, keepdims=True)
- r = tt.concatenate([y + sy, tt.zeros(sy.shape)])
+ sy = aet.sum(y, 0, keepdims=True)
+ r = aet.concatenate([y + sy, aet.zeros(sy.shape)])
sr = logsumexp(r, 0, keepdims=True)
- d = tt.log(Km1) + (Km1 * sy) - (Km1 * sr)
- return tt.sum(d, 0).T
+ d = aet.log(Km1) + (Km1 * sy) - (Km1 * sr)
+ return aet.sum(d, 0).T
stick_breaking = StickBreaking()
@@ -490,16 +492,16 @@ class Circular(ElemwiseTransform):
name = "circular"
def backward(self, y):
- return tt.arctan2(tt.sin(y), tt.cos(y))
+ return aet.arctan2(aet.sin(y), aet.cos(y))
def forward(self, x):
- return tt.as_tensor_variable(x)
+ return aet.as_tensor_variable(x)
def forward_val(self, x, point=None):
return x
def jacobian_det(self, x):
- return tt.zeros(x.shape)
+ return aet.zeros(x.shape)
circular = Circular()
@@ -512,17 +514,17 @@ def __init__(self, n):
self.diag_idxs = np.arange(1, n + 1).cumsum() - 1
def backward(self, x):
- return tt.advanced_set_subtensor1(x, tt.exp(x[self.diag_idxs]), self.diag_idxs)
+ return advanced_set_subtensor1(x, aet.exp(x[self.diag_idxs]), self.diag_idxs)
def forward(self, y):
- return tt.advanced_set_subtensor1(y, tt.log(y[self.diag_idxs]), self.diag_idxs)
+ return advanced_set_subtensor1(y, aet.log(y[self.diag_idxs]), self.diag_idxs)
def forward_val(self, y, point=None):
y[..., self.diag_idxs] = np.log(y[..., self.diag_idxs])
return y
def jacobian_det(self, y):
- return tt.sum(y[self.diag_idxs])
+ return aet.sum(y[self.diag_idxs])
class Chain(Transform):
@@ -549,7 +551,7 @@ def backward(self, y):
return x
def jacobian_det(self, y):
- y = tt.as_tensor_variable(y)
+ y = aet.as_tensor_variable(y)
det_list = []
ndim0 = y.ndim
for transf in reversed(self.transform_list):
diff --git a/pymc3/glm/families.py b/pymc3/glm/families.py
index 23ca136cf8..57232e28d1 100644
--- a/pymc3/glm/families.py
+++ b/pymc3/glm/families.py
@@ -16,8 +16,8 @@
from copy import copy
+import aesara.tensor as aet
import numpy as np
-import theano.tensor as tt
from pymc3 import distributions as pm_dists
from pymc3.model import modelcontext
@@ -36,9 +36,9 @@ def __call__(self, x):
identity = Identity()
-logit = tt.nnet.sigmoid
-inverse = tt.inv
-exp = tt.exp
+logit = aet.nnet.sigmoid
+inverse = aet.inv
+exp = aet.exp
class Family:
@@ -80,7 +80,7 @@ def create_likelihood(self, name, y_est, y_data, model=None):
Parameters
----------
- y_est: theano.tensor
+ y_est: aesara.tensor
Estimate of dependent variable
y_data: array
Observed dependent variable
diff --git a/pymc3/glm/linear.py b/pymc3/glm/linear.py
index 81c916c118..9ec2a2b731 100644
--- a/pymc3/glm/linear.py
+++ b/pymc3/glm/linear.py
@@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara.tensor as aet
import numpy as np
-import theano.tensor as tt
from pymc3.distributions import Flat, Normal
from pymc3.glm import families
@@ -39,7 +39,7 @@ class LinearComponent(Model):
use `Regressor` key for defining default prior for all regressors
defaults to Normal.dist(mu=0, tau=1.0E-6)
vars: dict - random variables instead of creating new ones
- offset: scalar, or numpy/theano array with the same shape as y
+ offset: scalar, or numpy/aesara array with the same shape as y
this can be used to specify an a priori known component to be
included in the linear predictor during fitting.
"""
@@ -73,7 +73,7 @@ def __init__(
x, labels = any_to_tensor_and_labels(x, labels)
# now we have x, shape and labels
if intercept:
- x = tt.concatenate([tt.ones((x.shape[0], 1), x.dtype), x], axis=1)
+ x = aet.concatenate([aet.ones((x.shape[0], 1), x.dtype), x], axis=1)
labels = ["Intercept"] + labels
coeffs = list()
for name in labels:
@@ -94,7 +94,7 @@ def __init__(
),
)
coeffs.append(v)
- self.coeffs = tt.stack(coeffs, axis=0)
+ self.coeffs = aet.stack(coeffs, axis=0)
self.y_est = x.dot(self.coeffs) + offset
@classmethod
@@ -149,7 +149,7 @@ class GLM(LinearComponent):
init: dict - test_vals for coefficients
vars: dict - random variables instead of creating new ones
family: pymc3..families object
- offset: scalar, or numpy/theano array with the same shape as y
+ offset: scalar, or numpy/aesara array with the same shape as y
this can be used to specify an a priori known component to be
included in the linear predictor during fitting.
"""
diff --git a/pymc3/glm/utils.py b/pymc3/glm/utils.py
index 889284b317..6431892581 100644
--- a/pymc3/glm/utils.py
+++ b/pymc3/glm/utils.py
@@ -12,9 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara.tensor as aet
import numpy as np
import pandas as pd
-import theano.tensor as tt
+
+from aesara.graph.basic import Variable
def any_to_tensor_and_labels(x, labels=None):
@@ -33,7 +35,7 @@ def any_to_tensor_and_labels(x, labels=None):
Parameters
----------
- x: np.ndarray | pd.DataFrame | tt.Variable | dict | list
+ x: np.ndarray | pd.DataFrame | Variable | dict | list
labels: list - names for columns of output tensor
Returns
@@ -76,13 +78,13 @@ def any_to_tensor_and_labels(x, labels=None):
for k, v in x.items():
res.append(v)
labels.append(k)
- x = tt.stack(res, axis=1)
+ x = aet.stack(res, axis=1)
if x.ndim == 1:
x = x[:, None]
# case when it can appear to be some
# array like value like lists of lists
# numpy deals with it
- elif not isinstance(x, tt.Variable):
+ elif not isinstance(x, Variable):
x = np.asarray(x)
if x.ndim == 0:
raise ValueError("Cannot use scalars")
@@ -92,7 +94,7 @@ def any_to_tensor_and_labels(x, labels=None):
# but user passes labels trusting seems
# to be a good option
elif labels is not None:
- x = tt.as_tensor_variable(x)
+ x = aet.as_tensor_variable(x)
if x.ndim == 0:
raise ValueError("Cannot use scalars")
elif x.ndim == 1:
@@ -100,15 +102,15 @@ def any_to_tensor_and_labels(x, labels=None):
else: # trust input
pass
# we should check that we can extract labels
- if labels is None and not isinstance(x, tt.Variable):
+ if labels is None and not isinstance(x, Variable):
labels = ["x%d" % i for i in range(x.shape[1])]
- # for theano variables we should have labels from user
+ # for aesara variables we should have labels from user
elif labels is None:
raise ValueError("Please provide labels as " "we cannot infer shape of input")
else: # trust labels, user knows what he is doing
pass
# it's time to check shapes if we can
- if not isinstance(x, tt.Variable):
+ if not isinstance(x, Variable):
if not len(labels) == x.shape[1]:
raise ValueError(
"Please provide full list "
@@ -126,8 +128,8 @@ def any_to_tensor_and_labels(x, labels=None):
elif not isinstance(labels, list):
labels = list(labels)
# as output we need tensor
- if not isinstance(x, tt.Variable):
- x = tt.as_tensor_variable(x)
+ if not isinstance(x, Variable):
+ x = aet.as_tensor_variable(x)
# finally check dimensions
if x.ndim == 0:
raise ValueError("Cannot use scalars")
diff --git a/pymc3/gp/cov.py b/pymc3/gp/cov.py
index 7a01a9eec5..4a02827a5d 100644
--- a/pymc3/gp/cov.py
+++ b/pymc3/gp/cov.py
@@ -18,9 +18,12 @@
from numbers import Number
from operator import add, mul
+import aesara
+import aesara.tensor as aet
import numpy as np
-import theano
-import theano.tensor as tt
+
+from aesara.tensor.sharedvar import TensorSharedVariable
+from aesara.tensor.var import TensorConstant, TensorVariable
__all__ = [
"Constant",
@@ -96,9 +99,9 @@ def _slice(self, X, Xs):
" the number of columns to use. Ignore otherwise.",
UserWarning,
)
- X = tt.as_tensor_variable(X[:, self.active_dims])
+ X = aet.as_tensor_variable(X[:, self.active_dims])
if Xs is not None:
- Xs = tt.as_tensor_variable(Xs[:, self.active_dims])
+ Xs = aet.as_tensor_variable(Xs[:, self.active_dims])
return X, Xs
def __add__(self, other):
@@ -115,10 +118,10 @@ def __rmul__(self, other):
def __pow__(self, other):
if (
- isinstance(other, theano.compile.SharedVariable)
+ isinstance(other, aesara.compile.SharedVariable)
and other.get_value().squeeze().shape == ()
):
- other = tt.squeeze(other)
+ other = aet.squeeze(other)
return Exponentiated(self, other)
elif isinstance(other, Number):
return Exponentiated(self, other)
@@ -179,13 +182,13 @@ def merge_factors(self, X, Xs=None, diag=False):
elif isinstance(
factor,
(
- tt.TensorConstant,
- tt.TensorVariable,
- tt.sharedvar.TensorSharedVariable,
+ TensorConstant,
+ TensorVariable,
+ TensorSharedVariable,
),
):
if factor.ndim == 2 and diag:
- factor_list.append(tt.diag(factor))
+ factor_list.append(aet.diag(factor))
else:
factor_list.append(factor)
else:
@@ -264,13 +267,13 @@ def __init__(self, c):
self.c = c
def diag(self, X):
- return tt.alloc(self.c, X.shape[0])
+ return aet.alloc(self.c, X.shape[0])
def full(self, X, Xs=None):
if Xs is None:
- return tt.alloc(self.c, X.shape[0], X.shape[0])
+ return aet.alloc(self.c, X.shape[0], X.shape[0])
else:
- return tt.alloc(self.c, X.shape[0], Xs.shape[0])
+ return aet.alloc(self.c, X.shape[0], Xs.shape[0])
class WhiteNoise(Covariance):
@@ -287,13 +290,13 @@ def __init__(self, sigma):
self.sigma = sigma
def diag(self, X):
- return tt.alloc(tt.square(self.sigma), X.shape[0])
+ return aet.alloc(aet.square(self.sigma), X.shape[0])
def full(self, X, Xs=None):
if Xs is None:
- return tt.diag(self.diag(X))
+ return aet.diag(self.diag(X))
else:
- return tt.alloc(0.0, X.shape[0], Xs.shape[0])
+ return aet.alloc(0.0, X.shape[0], Xs.shape[0])
class Circular(Covariance):
@@ -330,25 +333,25 @@ class Circular(Covariance):
def __init__(self, input_dim, period, tau=4, active_dims=None):
super().__init__(input_dim, active_dims)
- self.c = tt.as_tensor_variable(period / 2)
+ self.c = aet.as_tensor_variable(period / 2)
self.tau = tau
def dist(self, X, Xs):
if Xs is None:
- Xs = tt.transpose(X)
+ Xs = aet.transpose(X)
else:
- Xs = tt.transpose(Xs)
- return tt.abs_((X - Xs + self.c) % (self.c * 2) - self.c)
+ Xs = aet.transpose(Xs)
+ return aet.abs_((X - Xs + self.c) % (self.c * 2) - self.c)
def weinland(self, t):
- return (1 + self.tau * t / self.c) * tt.clip(1 - t / self.c, 0, np.inf) ** self.tau
+ return (1 + self.tau * t / self.c) * aet.clip(1 - t / self.c, 0, np.inf) ** self.tau
def full(self, X, Xs=None):
X, Xs = self._slice(X, Xs)
return self.weinland(self.dist(X, Xs))
def diag(self, X):
- return tt.alloc(1.0, X.shape[0])
+ return aet.alloc(1.0, X.shape[0])
class Stationary(Covariance):
@@ -371,29 +374,29 @@ def __init__(self, input_dim, ls=None, ls_inv=None, active_dims=None):
ls = 1.0 / np.asarray(ls_inv)
else:
ls = 1.0 / ls_inv
- self.ls = tt.as_tensor_variable(ls)
+ self.ls = aet.as_tensor_variable(ls)
def square_dist(self, X, Xs):
- X = tt.mul(X, 1.0 / self.ls)
- X2 = tt.sum(tt.square(X), 1)
+ X = aet.mul(X, 1.0 / self.ls)
+ X2 = aet.sum(aet.square(X), 1)
if Xs is None:
- sqd = -2.0 * tt.dot(X, tt.transpose(X)) + (
- tt.reshape(X2, (-1, 1)) + tt.reshape(X2, (1, -1))
+ sqd = -2.0 * aet.dot(X, aet.transpose(X)) + (
+ aet.reshape(X2, (-1, 1)) + aet.reshape(X2, (1, -1))
)
else:
- Xs = tt.mul(Xs, 1.0 / self.ls)
- Xs2 = tt.sum(tt.square(Xs), 1)
- sqd = -2.0 * tt.dot(X, tt.transpose(Xs)) + (
- tt.reshape(X2, (-1, 1)) + tt.reshape(Xs2, (1, -1))
+ Xs = aet.mul(Xs, 1.0 / self.ls)
+ Xs2 = aet.sum(aet.square(Xs), 1)
+ sqd = -2.0 * aet.dot(X, aet.transpose(Xs)) + (
+ aet.reshape(X2, (-1, 1)) + aet.reshape(Xs2, (1, -1))
)
- return tt.clip(sqd, 0.0, np.inf)
+ return aet.clip(sqd, 0.0, np.inf)
def euclidean_dist(self, X, Xs):
r2 = self.square_dist(X, Xs)
- return tt.sqrt(r2 + 1e-12)
+ return aet.sqrt(r2 + 1e-12)
def diag(self, X):
- return tt.alloc(1.0, X.shape[0])
+ return aet.alloc(1.0, X.shape[0])
def full(self, X, Xs=None):
raise NotImplementedError
@@ -429,8 +432,8 @@ def full(self, X, Xs=None):
f1 = X.dimshuffle(0, "x", 1)
f2 = Xs.dimshuffle("x", 0, 1)
r = np.pi * (f1 - f2) / self.period
- r = tt.sum(tt.square(tt.sin(r) / self.ls), 2)
- return tt.exp(-0.5 * r)
+ r = aet.sum(aet.square(aet.sin(r) / self.ls), 2)
+ return aet.exp(-0.5 * r)
class ExpQuad(Stationary):
@@ -445,7 +448,7 @@ class ExpQuad(Stationary):
def full(self, X, Xs=None):
X, Xs = self._slice(X, Xs)
- return tt.exp(-0.5 * self.square_dist(X, Xs))
+ return aet.exp(-0.5 * self.square_dist(X, Xs))
class RatQuad(Stationary):
@@ -463,7 +466,7 @@ def __init__(self, input_dim, alpha, ls=None, ls_inv=None, active_dims=None):
def full(self, X, Xs=None):
X, Xs = self._slice(X, Xs)
- return tt.power(
+ return aet.power(
(1.0 + 0.5 * self.square_dist(X, Xs) * (1.0 / self.alpha)),
-1.0 * self.alpha,
)
@@ -483,7 +486,9 @@ class Matern52(Stationary):
def full(self, X, Xs=None):
X, Xs = self._slice(X, Xs)
r = self.euclidean_dist(X, Xs)
- return (1.0 + np.sqrt(5.0) * r + 5.0 / 3.0 * tt.square(r)) * tt.exp(-1.0 * np.sqrt(5.0) * r)
+ return (1.0 + np.sqrt(5.0) * r + 5.0 / 3.0 * aet.square(r)) * aet.exp(
+ -1.0 * np.sqrt(5.0) * r
+ )
class Matern32(Stationary):
@@ -499,7 +504,7 @@ class Matern32(Stationary):
def full(self, X, Xs=None):
X, Xs = self._slice(X, Xs)
r = self.euclidean_dist(X, Xs)
- return (1.0 + np.sqrt(3.0) * r) * tt.exp(-np.sqrt(3.0) * r)
+ return (1.0 + np.sqrt(3.0) * r) * aet.exp(-np.sqrt(3.0) * r)
class Matern12(Stationary):
@@ -512,7 +517,7 @@ class Matern12(Stationary):
def full(self, X, Xs=None):
X, Xs = self._slice(X, Xs)
r = self.euclidean_dist(X, Xs)
- return tt.exp(-r)
+ return aet.exp(-r)
class Exponential(Stationary):
@@ -526,7 +531,7 @@ class Exponential(Stationary):
def full(self, X, Xs=None):
X, Xs = self._slice(X, Xs)
- return tt.exp(-0.5 * self.euclidean_dist(X, Xs))
+ return aet.exp(-0.5 * self.euclidean_dist(X, Xs))
class Cosine(Stationary):
@@ -539,7 +544,7 @@ class Cosine(Stationary):
def full(self, X, Xs=None):
X, Xs = self._slice(X, Xs)
- return tt.cos(2.0 * np.pi * self.euclidean_dist(X, Xs))
+ return aet.cos(2.0 * np.pi * self.euclidean_dist(X, Xs))
class Linear(Covariance):
@@ -556,20 +561,20 @@ def __init__(self, input_dim, c, active_dims=None):
def _common(self, X, Xs=None):
X, Xs = self._slice(X, Xs)
- Xc = tt.sub(X, self.c)
+ Xc = aet.sub(X, self.c)
return X, Xc, Xs
def full(self, X, Xs=None):
X, Xc, Xs = self._common(X, Xs)
if Xs is None:
- return tt.dot(Xc, tt.transpose(Xc))
+ return aet.dot(Xc, aet.transpose(Xc))
else:
- Xsc = tt.sub(Xs, self.c)
- return tt.dot(Xc, tt.transpose(Xsc))
+ Xsc = aet.sub(Xs, self.c)
+ return aet.dot(Xc, aet.transpose(Xsc))
def diag(self, X):
X, Xc, _ = self._common(X, None)
- return tt.sum(tt.square(Xc), 1)
+ return aet.sum(aet.square(Xc), 1)
class Polynomial(Linear):
@@ -587,17 +592,17 @@ def __init__(self, input_dim, c, d, offset, active_dims=None):
def full(self, X, Xs=None):
linear = super().full(X, Xs)
- return tt.power(linear + self.offset, self.d)
+ return aet.power(linear + self.offset, self.d)
def diag(self, X):
linear = super().diag(X)
- return tt.power(linear + self.offset, self.d)
+ return aet.power(linear + self.offset, self.d)
class WarpedInput(Covariance):
r"""
Warp the inputs of any kernel using an arbitrary function
- defined using Theano.
+ defined using Aesara.
.. math::
k(x, x') = k(w(x), w(x'))
@@ -606,7 +611,7 @@ class WarpedInput(Covariance):
----------
cov_func: Covariance
warp_func: callable
- Theano function of X and additional optional arguments.
+ Aesara function of X and additional optional arguments.
args: optional, tuple or list of scalars or PyMC3 variables
Additional inputs (besides X or Xs) to warp_func.
"""
@@ -636,7 +641,7 @@ def diag(self, X):
class Gibbs(Covariance):
r"""
The Gibbs kernel. Use an arbitrary lengthscale function defined
- using Theano. Only tested in one dimension.
+ using Aesara. Only tested in one dimension.
.. math::
k(x, x') = \sqrt{\frac{2\ell(x)\ell(x')}{\ell^2(x) + \ell^2(x')}}
@@ -646,7 +651,7 @@ class Gibbs(Covariance):
Parameters
----------
lengthscale_func: callable
- Theano function of X and additional optional arguments.
+ Aesara function of X and additional optional arguments.
args: optional, tuple or list of scalars or PyMC3 variables
Additional inputs (besides X or Xs) to lengthscale_func.
"""
@@ -665,39 +670,39 @@ def __init__(self, input_dim, lengthscale_func, args=None, active_dims=None):
self.args = args
def square_dist(self, X, Xs=None):
- X2 = tt.sum(tt.square(X), 1)
+ X2 = aet.sum(aet.square(X), 1)
if Xs is None:
- sqd = -2.0 * tt.dot(X, tt.transpose(X)) + (
- tt.reshape(X2, (-1, 1)) + tt.reshape(X2, (1, -1))
+ sqd = -2.0 * aet.dot(X, aet.transpose(X)) + (
+ aet.reshape(X2, (-1, 1)) + aet.reshape(X2, (1, -1))
)
else:
- Xs2 = tt.sum(tt.square(Xs), 1)
- sqd = -2.0 * tt.dot(X, tt.transpose(Xs)) + (
- tt.reshape(X2, (-1, 1)) + tt.reshape(Xs2, (1, -1))
+ Xs2 = aet.sum(aet.square(Xs), 1)
+ sqd = -2.0 * aet.dot(X, aet.transpose(Xs)) + (
+ aet.reshape(X2, (-1, 1)) + aet.reshape(Xs2, (1, -1))
)
- return tt.clip(sqd, 0.0, np.inf)
+ return aet.clip(sqd, 0.0, np.inf)
def full(self, X, Xs=None):
X, Xs = self._slice(X, Xs)
- rx = self.lfunc(tt.as_tensor_variable(X), self.args)
+ rx = self.lfunc(aet.as_tensor_variable(X), self.args)
if Xs is None:
- rz = self.lfunc(tt.as_tensor_variable(X), self.args)
+ rz = self.lfunc(aet.as_tensor_variable(X), self.args)
r2 = self.square_dist(X, X)
else:
- rz = self.lfunc(tt.as_tensor_variable(Xs), self.args)
+ rz = self.lfunc(aet.as_tensor_variable(Xs), self.args)
r2 = self.square_dist(X, Xs)
- rx2 = tt.reshape(tt.square(rx), (-1, 1))
- rz2 = tt.reshape(tt.square(rz), (1, -1))
- return tt.sqrt((2.0 * tt.outer(rx, rz)) / (rx2 + rz2)) * tt.exp(-1.0 * r2 / (rx2 + rz2))
+ rx2 = aet.reshape(aet.square(rx), (-1, 1))
+ rz2 = aet.reshape(aet.square(rz), (1, -1))
+ return aet.sqrt((2.0 * aet.outer(rx, rz)) / (rx2 + rz2)) * aet.exp(-1.0 * r2 / (rx2 + rz2))
def diag(self, X):
- return tt.alloc(1.0, X.shape[0])
+ return aet.alloc(1.0, X.shape[0])
class ScaledCov(Covariance):
r"""
Construct a kernel by multiplying a base kernel with a scaling
- function defined using Theano. The scaling function is
+ function defined using Aesara. The scaling function is
non-negative, and can be parameterized.
.. math::
@@ -708,7 +713,7 @@ class ScaledCov(Covariance):
cov_func: Covariance
Base kernel or covariance function
scaling_func: callable
- Theano function of X and additional optional arguments.
+ Aesara function of X and additional optional arguments.
args: optional, tuple or list of scalars or PyMC3 variables
Additional inputs (besides X or Xs) to lengthscale_func.
"""
@@ -726,17 +731,17 @@ def __init__(self, input_dim, cov_func, scaling_func, args=None, active_dims=Non
def diag(self, X):
X, _ = self._slice(X, None)
cov_diag = self.cov_func(X, diag=True)
- scf_diag = tt.square(tt.flatten(self.scaling_func(X, self.args)))
+ scf_diag = aet.square(aet.flatten(self.scaling_func(X, self.args)))
return cov_diag * scf_diag
def full(self, X, Xs=None):
X, Xs = self._slice(X, Xs)
scf_x = self.scaling_func(X, self.args)
if Xs is None:
- return tt.outer(scf_x, scf_x) * self.cov_func(X)
+ return aet.outer(scf_x, scf_x) * self.cov_func(X)
else:
scf_xs = self.scaling_func(Xs, self.args)
- return tt.outer(scf_x, scf_xs) * self.cov_func(X, Xs)
+ return aet.outer(scf_x, scf_xs) * self.cov_func(X, Xs)
class Coregion(Covariance):
@@ -780,27 +785,27 @@ def __init__(self, input_dim, W=None, kappa=None, B=None, active_dims=None):
if make_B and B is not None:
raise ValueError("Exactly one of (W, kappa) and B must be provided to Coregion")
if make_B:
- self.W = tt.as_tensor_variable(W)
- self.kappa = tt.as_tensor_variable(kappa)
- self.B = tt.dot(self.W, self.W.T) + tt.diag(self.kappa)
+ self.W = aet.as_tensor_variable(W)
+ self.kappa = aet.as_tensor_variable(kappa)
+ self.B = aet.dot(self.W, self.W.T) + aet.diag(self.kappa)
elif B is not None:
- self.B = tt.as_tensor_variable(B)
+ self.B = aet.as_tensor_variable(B)
else:
raise ValueError("Exactly one of (W, kappa) and B must be provided to Coregion")
def full(self, X, Xs=None):
X, Xs = self._slice(X, Xs)
- index = tt.cast(X, "int32")
+ index = aet.cast(X, "int32")
if Xs is None:
index2 = index.T
else:
- index2 = tt.cast(Xs, "int32").T
+ index2 = aet.cast(Xs, "int32").T
return self.B[index, index2]
def diag(self, X):
X, _ = self._slice(X, None)
- index = tt.cast(X, "int32")
- return tt.diag(self.B)[index.ravel()]
+ index = aet.cast(X, "int32")
+ return aet.diag(self.B)[index.ravel()]
def handle_args(func, args):
diff --git a/pymc3/gp/gp.py b/pymc3/gp/gp.py
index 654bf536cf..43a52b2d16 100644
--- a/pymc3/gp/gp.py
+++ b/pymc3/gp/gp.py
@@ -15,10 +15,10 @@
import functools
import warnings
+import aesara.tensor as aet
import numpy as np
-import theano.tensor as tt
-from theano.tensor.nlinalg import eigh
+from aesara.tensor.nlinalg import eigh
import pymc3 as pm
@@ -195,9 +195,9 @@ def _build_conditional(self, Xnew, X, f, cov_total, mean_total):
L = cholesky(stabilize(Kxx))
A = solve_lower(L, Kxs)
v = solve_lower(L, f - mean_total(X))
- mu = self.mean_func(Xnew) + tt.dot(tt.transpose(A), v)
+ mu = self.mean_func(Xnew) + aet.dot(aet.transpose(A), v)
Kss = self.cov_func(Xnew)
- cov = Kss - tt.dot(tt.transpose(A), A)
+ cov = Kss - aet.dot(aet.transpose(A), A)
return mu, cov
def conditional(self, name, Xnew, given=None, **kwargs):
@@ -281,7 +281,7 @@ def _build_prior(self, name, X, reparameterize=True, **kwargs):
if reparameterize:
chi2 = pm.ChiSquared(name + "_chi2_", self.nu)
v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, shape=shape, **kwargs)
- f = pm.Deterministic(name, (tt.sqrt(self.nu) / chi2) * (mu + cholesky(cov).dot(v)))
+ f = pm.Deterministic(name, (aet.sqrt(self.nu) / chi2) * (mu + cholesky(cov).dot(v)))
else:
f = pm.MvStudentT(name, nu=self.nu, mu=mu, cov=cov, shape=shape, **kwargs)
return f
@@ -318,10 +318,10 @@ def _build_conditional(self, Xnew, X, f):
Kss = self.cov_func(Xnew)
L = cholesky(stabilize(Kxx))
A = solve_lower(L, Kxs)
- cov = Kss - tt.dot(tt.transpose(A), A)
+ cov = Kss - aet.dot(aet.transpose(A), A)
v = solve_lower(L, f - self.mean_func(X))
- mu = self.mean_func(Xnew) + tt.dot(tt.transpose(A), v)
- beta = tt.dot(v, v)
+ mu = self.mean_func(Xnew) + aet.dot(aet.transpose(A), v)
+ beta = aet.dot(v, v)
nu2 = self.nu + X.shape[0]
covT = (self.nu + beta - 2) / (nu2 - 2) * cov
return nu2, mu, covT
@@ -476,16 +476,16 @@ def _build_conditional(self, Xnew, pred_noise, diag, X, y, noise, cov_total, mea
L = cholesky(stabilize(Kxx) + Knx)
A = solve_lower(L, Kxs)
v = solve_lower(L, rxx)
- mu = self.mean_func(Xnew) + tt.dot(tt.transpose(A), v)
+ mu = self.mean_func(Xnew) + aet.dot(aet.transpose(A), v)
if diag:
Kss = self.cov_func(Xnew, diag=True)
- var = Kss - tt.sum(tt.square(A), 0)
+ var = Kss - aet.sum(aet.square(A), 0)
if pred_noise:
var += noise(Xnew, diag=True)
return mu, var
else:
Kss = self.cov_func(Xnew)
- cov = Kss - tt.dot(tt.transpose(A), A)
+ cov = Kss - aet.dot(aet.transpose(A), A)
if pred_noise:
cov += noise(Xnew)
return mu, cov if pred_noise else stabilize(cov)
@@ -664,32 +664,32 @@ def __add__(self, other):
# in marginal_likelihood instead of lambda. This makes pickling
# possible.
def _build_marginal_likelihood_logp(self, y, X, Xu, sigma):
- sigma2 = tt.square(sigma)
+ sigma2 = aet.square(sigma)
Kuu = self.cov_func(Xu)
Kuf = self.cov_func(Xu, X)
Luu = cholesky(stabilize(Kuu))
A = solve_lower(Luu, Kuf)
- Qffd = tt.sum(A * A, 0)
+ Qffd = aet.sum(A * A, 0)
if self.approx == "FITC":
Kffd = self.cov_func(X, diag=True)
- Lamd = tt.clip(Kffd - Qffd, 0.0, np.inf) + sigma2
+ Lamd = aet.clip(Kffd - Qffd, 0.0, np.inf) + sigma2
trace = 0.0
elif self.approx == "VFE":
- Lamd = tt.ones_like(Qffd) * sigma2
+ Lamd = aet.ones_like(Qffd) * sigma2
trace = (1.0 / (2.0 * sigma2)) * (
- tt.sum(self.cov_func(X, diag=True)) - tt.sum(tt.sum(A * A, 0))
+ aet.sum(self.cov_func(X, diag=True)) - aet.sum(aet.sum(A * A, 0))
)
else: # DTC
- Lamd = tt.ones_like(Qffd) * sigma2
+ Lamd = aet.ones_like(Qffd) * sigma2
trace = 0.0
A_l = A / Lamd
- L_B = cholesky(tt.eye(Xu.shape[0]) + tt.dot(A_l, tt.transpose(A)))
+ L_B = cholesky(aet.eye(Xu.shape[0]) + aet.dot(A_l, aet.transpose(A)))
r = y - self.mean_func(X)
r_l = r / Lamd
- c = solve_lower(L_B, tt.dot(A, r_l))
- constant = 0.5 * X.shape[0] * tt.log(2.0 * np.pi)
- logdet = 0.5 * tt.sum(tt.log(Lamd)) + tt.sum(tt.log(tt.diag(L_B)))
- quadratic = 0.5 * (tt.dot(r, r_l) - tt.dot(c, c))
+ c = solve_lower(L_B, aet.dot(A, r_l))
+ constant = 0.5 * X.shape[0] * aet.log(2.0 * np.pi)
+ logdet = 0.5 * aet.sum(aet.log(Lamd)) + aet.sum(aet.log(aet.diag(L_B)))
+ quadratic = 0.5 * (aet.dot(r, r_l) - aet.dot(c, c))
return -1.0 * (constant + logdet + quadratic + trace)
def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kwargs):
@@ -743,36 +743,38 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kw
return pm.DensityDist(name, logp, shape=shape, **kwargs)
def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total):
- sigma2 = tt.square(sigma)
+ sigma2 = aet.square(sigma)
Kuu = cov_total(Xu)
Kuf = cov_total(Xu, X)
Luu = cholesky(stabilize(Kuu))
A = solve_lower(Luu, Kuf)
- Qffd = tt.sum(A * A, 0)
+ Qffd = aet.sum(A * A, 0)
if self.approx == "FITC":
Kffd = cov_total(X, diag=True)
- Lamd = tt.clip(Kffd - Qffd, 0.0, np.inf) + sigma2
+ Lamd = aet.clip(Kffd - Qffd, 0.0, np.inf) + sigma2
else: # VFE or DTC
- Lamd = tt.ones_like(Qffd) * sigma2
+ Lamd = aet.ones_like(Qffd) * sigma2
A_l = A / Lamd
- L_B = cholesky(tt.eye(Xu.shape[0]) + tt.dot(A_l, tt.transpose(A)))
+ L_B = cholesky(aet.eye(Xu.shape[0]) + aet.dot(A_l, aet.transpose(A)))
r = y - mean_total(X)
r_l = r / Lamd
- c = solve_lower(L_B, tt.dot(A, r_l))
+ c = solve_lower(L_B, aet.dot(A, r_l))
Kus = self.cov_func(Xu, Xnew)
As = solve_lower(Luu, Kus)
- mu = self.mean_func(Xnew) + tt.dot(tt.transpose(As), solve_upper(tt.transpose(L_B), c))
+ mu = self.mean_func(Xnew) + aet.dot(aet.transpose(As), solve_upper(aet.transpose(L_B), c))
C = solve_lower(L_B, As)
if diag:
Kss = self.cov_func(Xnew, diag=True)
- var = Kss - tt.sum(tt.square(As), 0) + tt.sum(tt.square(C), 0)
+ var = Kss - aet.sum(aet.square(As), 0) + aet.sum(aet.square(C), 0)
if pred_noise:
var += sigma2
return mu, var
else:
- cov = self.cov_func(Xnew) - tt.dot(tt.transpose(As), As) + tt.dot(tt.transpose(C), C)
+ cov = (
+ self.cov_func(Xnew) - aet.dot(aet.transpose(As), As) + aet.dot(aet.transpose(C), C)
+ )
if pred_noise:
- cov += sigma2 * tt.identity_like(cov)
+ cov += sigma2 * aet.identity_like(cov)
return mu, cov if pred_noise else stabilize(cov)
def _get_given_vals(self, given):
@@ -891,7 +893,7 @@ def _build_prior(self, name, Xs, **kwargs):
chols = [cholesky(stabilize(cov(X))) for cov, X in zip(self.cov_funcs, Xs)]
# remove reparameterization option
v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, shape=self.N, **kwargs)
- f = pm.Deterministic(name, mu + tt.flatten(kron_dot(chols, v)))
+ f = pm.Deterministic(name, mu + aet.flatten(kron_dot(chols, v)))
return f
def prior(self, name, Xs, **kwargs):
@@ -925,15 +927,15 @@ def _build_conditional(self, Xnew):
delta = f - self.mean_func(X)
covs = [stabilize(cov(Xi)) for cov, Xi in zip(self.cov_funcs, Xs)]
chols = [cholesky(cov) for cov in covs]
- cholTs = [tt.transpose(chol) for chol in chols]
+ cholTs = [aet.transpose(chol) for chol in chols]
Kss = self.cov_func(Xnew)
Kxs = self.cov_func(X, Xnew)
- Ksx = tt.transpose(Kxs)
+ Ksx = aet.transpose(Kxs)
alpha = kron_solve_lower(chols, delta)
alpha = kron_solve_upper(cholTs, alpha)
- mu = tt.dot(Ksx, alpha).ravel() + self.mean_func(Xnew)
+ mu = aet.dot(Ksx, alpha).ravel() + self.mean_func(Xnew)
A = kron_solve_lower(chols, Kxs)
- cov = stabilize(Kss - tt.dot(tt.transpose(A), A))
+ cov = stabilize(Kss - aet.dot(aet.transpose(A), A))
return mu, cov
def conditional(self, name, Xnew, **kwargs):
@@ -1103,7 +1105,7 @@ def _build_conditional(self, Xnew, pred_noise, diag):
delta = y - self.mean_func(X)
Kns = [f(x) for f, x in zip(self.cov_funcs, Xs)]
eigs_sep, Qs = zip(*map(eigh, Kns)) # Unzip
- QTs = list(map(tt.transpose, Qs))
+ QTs = list(map(aet.transpose, Qs))
eigs = kron_diag(*eigs_sep) # Combine separate eigs
if sigma is not None:
eigs += sigma ** 2
@@ -1117,21 +1119,21 @@ def _build_conditional(self, Xnew, pred_noise, diag):
alpha = kron_dot(QTs, delta)
alpha = alpha / eigs[:, None]
alpha = kron_dot(Qs, alpha)
- mu = tt.dot(Kmn, alpha).ravel() + self.mean_func(Xnew)
+ mu = aet.dot(Kmn, alpha).ravel() + self.mean_func(Xnew)
# Build conditional cov
A = kron_dot(QTs, Knm)
- A = A / tt.sqrt(eigs[:, None])
+ A = A / aet.sqrt(eigs[:, None])
if diag:
- Asq = tt.sum(tt.square(A), 0)
+ Asq = aet.sum(aet.square(A), 0)
cov = Km - Asq
if pred_noise:
cov += sigma
else:
- Asq = tt.dot(A.T, A)
+ Asq = aet.dot(A.T, A)
cov = Km - Asq
if pred_noise:
- cov += sigma * tt.identity_like(cov)
+ cov += sigma * aet.identity_like(cov)
return mu, cov
def conditional(self, name, Xnew, pred_noise=False, **kwargs):
diff --git a/pymc3/gp/mean.py b/pymc3/gp/mean.py
index d2e93fdfe5..47d38d9897 100644
--- a/pymc3/gp/mean.py
+++ b/pymc3/gp/mean.py
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import theano.tensor as tt
+import aesara.tensor as aet
__all__ = ["Zero", "Constant", "Linear"]
@@ -46,7 +46,7 @@ class Zero(Mean):
"""
def __call__(self, X):
- return tt.alloc(0.0, X.shape[0])
+ return aet.alloc(0.0, X.shape[0])
class Constant(Mean):
@@ -64,7 +64,7 @@ def __init__(self, c=0):
self.c = c
def __call__(self, X):
- return tt.alloc(1.0, X.shape[0]) * self.c
+ return aet.alloc(1.0, X.shape[0]) * self.c
class Linear(Mean):
@@ -85,7 +85,7 @@ def __init__(self, coeffs, intercept=0):
self.A = coeffs
def __call__(self, X):
- return tt.squeeze(tt.dot(X, self.A) + self.b)
+ return aet.squeeze(aet.dot(X, self.A) + self.b)
class Add(Mean):
@@ -95,7 +95,7 @@ def __init__(self, first_mean, second_mean):
self.m2 = second_mean
def __call__(self, X):
- return tt.add(self.m1(X), self.m2(X))
+ return aet.add(self.m1(X), self.m2(X))
class Prod(Mean):
@@ -105,4 +105,4 @@ def __init__(self, first_mean, second_mean):
self.m2 = second_mean
def __call__(self, X):
- return tt.mul(self.m1(X), self.m2(X))
+ return aet.mul(self.m1(X), self.m2(X))
diff --git a/pymc3/gp/util.py b/pymc3/gp/util.py
index 698c300564..1ad05f6eab 100644
--- a/pymc3/gp/util.py
+++ b/pymc3/gp/util.py
@@ -14,16 +14,16 @@
import warnings
+import aesara.tensor as aet
import numpy as np
-import theano.tensor as tt
-import theano.tensor.slinalg # pylint: disable=unused-import
+from aesara.tensor.slinalg import Solve, cholesky # pylint: disable=unused-import
+from aesara.tensor.var import TensorConstant
from scipy.cluster.vq import kmeans
-cholesky = tt.slinalg.cholesky
-solve_lower = tt.slinalg.Solve(A_structure="lower_triangular")
-solve_upper = tt.slinalg.Solve(A_structure="upper_triangular")
-solve = tt.slinalg.Solve(A_structure="general")
+solve_lower = Solve(A_structure="lower_triangular")
+solve_upper = Solve(A_structure="upper_triangular")
+solve = Solve(A_structure="general")
def infer_shape(X, n_points=None):
@@ -37,12 +37,12 @@ def infer_shape(X, n_points=None):
def stabilize(K):
""" adds small diagonal to a covariance matrix """
- return K + 1e-6 * tt.identity_like(K)
+ return K + 1e-6 * aet.identity_like(K)
def kmeans_inducing_points(n_inducing, X):
# first whiten X
- if isinstance(X, tt.TensorConstant):
+ if isinstance(X, TensorConstant):
X = X.value
elif isinstance(X, (np.ndarray, tuple, list)):
X = np.asarray(X)
diff --git a/pymc3/math.py b/pymc3/math.py
index aff54d13b7..b90b85e09e 100644
--- a/pymc3/math.py
+++ b/pymc3/math.py
@@ -16,20 +16,19 @@
from functools import partial, reduce
+import aesara
+import aesara.sparse
+import aesara.tensor as aet
+import aesara.tensor.slinalg # pylint: disable=unused-import
import numpy as np
import scipy as sp
import scipy.sparse # pylint: disable=unused-import
-import theano
-import theano.sparse
-import theano.tensor as tt
-import theano.tensor.slinalg # pylint: disable=unused-import
-from scipy.linalg import block_diag as scipy_block_diag
-from theano.graph.basic import Apply
-from theano.graph.op import Op
+from aesara.graph.basic import Apply
+from aesara.graph.op import Op
# pylint: disable=unused-import
-from theano.tensor import (
+from aesara.tensor import (
abs_,
and_,
ceil,
@@ -71,10 +70,11 @@
where,
zeros_like,
)
-from theano.tensor.nlinalg import det, extract_diag, matrix_dot, matrix_inverse, trace
-from theano.tensor.nnet import sigmoid
+from aesara.tensor.nlinalg import det, extract_diag, matrix_dot, matrix_inverse, trace
+from aesara.tensor.nnet import sigmoid
+from scipy.linalg import block_diag as scipy_block_diag
-from pymc3.theanof import floatX, ix_, largest_common_dtype
+from pymc3.aesaraf import floatX, ix_, largest_common_dtype
# pylint: enable=unused-import
@@ -93,7 +93,7 @@ def kronecker(*Ks):
np.ndarray :
Block matrix Kroncker product of the argument matrices.
"""
- return reduce(tt.slinalg.kron, Ks)
+ return reduce(aet.slinalg.kron, Ks)
def cartesian(*arrays):
@@ -140,17 +140,17 @@ def kron_vector_op(v):
raise ValueError(f"m must have ndim <= 2, not {m.ndim}")
res = kron_vector_op(m)
res_shape = res.shape
- return tt.reshape(res, (res_shape[1], res_shape[0])).T
+ return aet.reshape(res, (res_shape[1], res_shape[0])).T
# Define kronecker functions that work on 1D and 2D arrays
-kron_dot = partial(kron_matrix_op, op=tt.dot)
-kron_solve_lower = partial(kron_matrix_op, op=tt.slinalg.solve_lower_triangular)
-kron_solve_upper = partial(kron_matrix_op, op=tt.slinalg.solve_upper_triangular)
+kron_dot = partial(kron_matrix_op, op=aet.dot)
+kron_solve_lower = partial(kron_matrix_op, op=aet.slinalg.solve_lower_triangular)
+kron_solve_upper = partial(kron_matrix_op, op=aet.slinalg.solve_upper_triangular)
def flat_outer(a, b):
- return tt.outer(a, b).ravel()
+ return aet.outer(a, b).ravel()
def kron_diag(*diags):
@@ -166,24 +166,24 @@ def kron_diag(*diags):
def tround(*args, **kwargs):
"""
- Temporary function to silence round warning in Theano. Please remove
+ Temporary function to silence round warning in Aesara. Please remove
when the warning disappears.
"""
kwargs["mode"] = "half_to_even"
- return tt.round(*args, **kwargs)
+ return aet.round(*args, **kwargs)
def logsumexp(x, axis=None, keepdims=True):
# Adapted from https://github.com/Theano/Theano/issues/1563
- x_max = tt.max(x, axis=axis, keepdims=True)
- x_max = tt.switch(tt.isinf(x_max), 0, x_max)
- res = tt.log(tt.sum(tt.exp(x - x_max), axis=axis, keepdims=True)) + x_max
+ x_max = aet.max(x, axis=axis, keepdims=True)
+ x_max = aet.switch(aet.isinf(x_max), 0, x_max)
+ res = aet.log(aet.sum(aet.exp(x - x_max), axis=axis, keepdims=True)) + x_max
return res if keepdims else res.squeeze()
def logaddexp(a, b):
diff = b - a
- return tt.switch(diff > 0, b + tt.log1p(tt.exp(-diff)), a + tt.log1p(tt.exp(diff)))
+ return aet.switch(diff > 0, b + aet.log1p(aet.exp(-diff)), a + aet.log1p(aet.exp(diff)))
def logdiffexp(a, b):
@@ -198,7 +198,7 @@ def logdiffexp_numpy(a, b):
def invlogit(x, eps=sys.float_info.epsilon):
"""The inverse of the logit function, 1 / (1 + exp(-x))."""
- return (1.0 - 2.0 * eps) / (1.0 + tt.exp(-x)) + eps
+ return (1.0 - 2.0 * eps) / (1.0 + aet.exp(-x)) + eps
def logbern(log_p):
@@ -208,7 +208,7 @@ def logbern(log_p):
def logit(p):
- return tt.log(p / (floatX(1) - p))
+ return aet.log(p / (floatX(1) - p))
def log1pexp(x):
@@ -216,7 +216,7 @@ def log1pexp(x):
This function is numerically more stable than the naive approach.
"""
- return tt.nnet.softplus(x)
+ return aet.nnet.softplus(x)
def log1mexp(x):
@@ -234,7 +234,9 @@ def log1mexp(x):
package"
"""
- return tt.switch(tt.lt(x, 0.6931471805599453), tt.log(-tt.expm1(-x)), tt.log1p(-tt.exp(-x)))
+ return aet.switch(
+ aet.lt(x, 0.6931471805599453), aet.log(-aet.expm1(-x)), aet.log1p(-aet.exp(-x))
+ )
def log1mexp_numpy(x):
@@ -253,7 +255,7 @@ def log1mexp_numpy(x):
def flatten_list(tensors):
- return tt.concatenate([var.ravel() for var in tensors])
+ return aet.concatenate([var.ravel() for var in tensors])
class LogDet(Op):
@@ -268,8 +270,8 @@ class LogDet(Op):
"""
def make_node(self, x):
- x = theano.tensor.as_tensor_variable(x)
- o = theano.tensor.scalar(dtype=x.dtype)
+ x = aesara.tensor.as_tensor_variable(x)
+ o = aesara.tensor.scalar(dtype=x.dtype)
return Apply(self, [x], [o])
def perform(self, node, inputs, outputs, params=None):
@@ -319,7 +321,7 @@ def expand_packed_triangular(n, packed, lower=True, diagonal_only=False):
----------
n: int
The number of rows of the triangular matrix.
- packed: theano.vector
+ packed: aesara.vector
The matrix in packed format.
lower: bool, default=True
If true, assume that the matrix is lower triangular.
@@ -338,13 +340,13 @@ def expand_packed_triangular(n, packed, lower=True, diagonal_only=False):
diag_idxs = np.arange(2, n + 2)[::-1].cumsum() - n - 1
return packed[diag_idxs]
elif lower:
- out = tt.zeros((n, n), dtype=theano.config.floatX)
+ out = aet.zeros((n, n), dtype=aesara.config.floatX)
idxs = np.tril_indices(n)
- return tt.set_subtensor(out[idxs], packed)
+ return aet.set_subtensor(out[idxs], packed)
elif not lower:
- out = tt.zeros((n, n), dtype=theano.config.floatX)
+ out = aet.zeros((n, n), dtype=aesara.config.floatX)
idxs = np.triu_indices(n)
- return tt.set_subtensor(out[idxs], packed)
+ return aet.set_subtensor(out[idxs], packed)
class BatchedDiag(Op):
@@ -355,11 +357,11 @@ class BatchedDiag(Op):
__props__ = ()
def make_node(self, diag):
- diag = tt.as_tensor_variable(diag)
+ diag = aet.as_tensor_variable(diag)
if diag.type.ndim != 2:
raise TypeError("data argument must be a matrix", diag.type)
- return Apply(self, [diag], [tt.tensor3(dtype=diag.dtype)])
+ return Apply(self, [diag], [aet.tensor3(dtype=diag.dtype)])
def perform(self, node, ins, outs, params=None):
(C,) = ins
@@ -375,7 +377,7 @@ def perform(self, node, ins, outs, params=None):
def grad(self, inputs, gout):
(gz,) = gout
- idx = tt.arange(gz.shape[-1])
+ idx = aet.arange(gz.shape[-1])
return [gz[..., idx, idx]]
def infer_shape(self, fgraph, nodes, shapes):
@@ -383,14 +385,14 @@ def infer_shape(self, fgraph, nodes, shapes):
def batched_diag(C):
- C = tt.as_tensor(C)
+ C = aet.as_tensor(C)
dim = C.shape[-1]
if C.ndim == 2:
# diag -> matrices
return BatchedDiag()(C)
elif C.ndim == 3:
# matrices -> diag
- idx = tt.arange(dim)
+ idx = aet.arange(dim)
return C[..., idx, idx]
else:
raise ValueError("Input should be 2 or 3 dimensional")
@@ -408,13 +410,13 @@ def __init__(self, sparse=False, format="csr"):
def make_node(self, *matrices):
if not matrices:
raise ValueError("no matrices to allocate")
- matrices = list(map(tt.as_tensor, matrices))
+ matrices = list(map(aet.as_tensor, matrices))
if any(mat.type.ndim != 2 for mat in matrices):
raise TypeError("all data arguments must be matrices")
if self.sparse:
- out_type = theano.sparse.matrix(self.format, dtype=largest_common_dtype(matrices))
+ out_type = aesara.sparse.matrix(self.format, dtype=largest_common_dtype(matrices))
else:
- out_type = theano.tensor.matrix(dtype=largest_common_dtype(matrices))
+ out_type = aesara.tensor.matrix(dtype=largest_common_dtype(matrices))
return Apply(self, matrices, [out_type])
def perform(self, node, inputs, output_storage, params=None):
@@ -425,13 +427,13 @@ def perform(self, node, inputs, output_storage, params=None):
output_storage[0][0] = scipy_block_diag(*inputs).astype(dtype)
def grad(self, inputs, gout):
- shapes = tt.stack([i.shape for i in inputs])
+ shapes = aet.stack([i.shape for i in inputs])
index_end = shapes.cumsum(0)
index_begin = index_end - shapes
slices = [
ix_(
- tt.arange(index_begin[i, 0], index_end[i, 0]),
- tt.arange(index_begin[i, 1], index_end[i, 1]),
+ aet.arange(index_begin[i, 0], index_end[i, 0]),
+ aet.arange(index_begin[i, 1], index_end[i, 1]),
)
for i in range(len(inputs))
]
@@ -439,7 +441,7 @@ def grad(self, inputs, gout):
def infer_shape(self, fgraph, nodes, shapes):
first, second = zip(*shapes)
- return [(tt.add(*first), tt.add(*second))]
+ return [(aet.add(*first), aet.add(*second))]
def block_diagonal(matrices, sparse=False, format="csr"):
diff --git a/pymc3/model.py b/pymc3/model.py
index 349affcfa0..a5a0a635c8 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -20,25 +20,27 @@
from sys import modules
from typing import TYPE_CHECKING, Any, List, Optional, Type, TypeVar, Union, cast
+import aesara
+import aesara.graph.basic
+import aesara.sparse as sparse
+import aesara.tensor as aet
import numpy as np
import scipy.sparse as sps
-import theano
-import theano.graph.basic
-import theano.sparse as sparse
-import theano.tensor as tt
+from aesara.compile.sharedvalue import SharedVariable
+from aesara.gradient import grad
+from aesara.graph.basic import Apply, Variable
+from aesara.tensor.type import TensorType as AesaraTensorType
+from aesara.tensor.var import TensorVariable
from pandas import Series
-from theano.compile import SharedVariable
-from theano.graph.basic import Apply
-from theano.tensor.var import TensorVariable
import pymc3 as pm
+from pymc3.aesaraf import floatX, generator, gradient, hessian, inputvars
from pymc3.blocking import ArrayOrdering, DictToArrayBijection
from pymc3.exceptions import ImputationWarning
from pymc3.math import flatten_list
from pymc3.memoize import WithMemoization, memoize
-from pymc3.theanof import floatX, generator, gradient, hessian, inputvars
from pymc3.util import get_transformed_name, get_var_name
from pymc3.vartypes import continuous_types, discrete_types, isgenerator, typefilter
@@ -59,13 +61,13 @@
class PyMC3Variable(TensorVariable):
- """Class to wrap Theano TensorVariable for custom behavior."""
+ """Class to wrap Aesara TensorVariable for custom behavior."""
# Implement matrix multiplication infix operator: X @ w
- __matmul__ = tt.dot
+ __matmul__ = aet.dot
def __rmatmul__(self, other):
- return tt.dot(other, self)
+ return aet.dot(other, self)
def _str_repr(self, name=None, dist=None, formatting="plain"):
if getattr(self, "distribution", None) is None:
@@ -143,28 +145,28 @@ def incorporate_methods(source, destination, methods, wrapper=None, override=Fal
def get_named_nodes_and_relations(graph):
- """Get the named nodes in a theano graph (i.e., nodes whose name
+ """Get the named nodes in a aesara graph (i.e., nodes whose name
attribute is not None) along with their relationships (i.e., the
node's named parents, and named children, while skipping unnamed
intermediate nodes)
Parameters
----------
- graph: a theano node
+ graph: a aesara node
Returns:
--------
leaf_dict: Dict[str, node]
A dictionary of name:node pairs, of the named nodes that
- have no named ancestors in the provided theano graph.
+ have no named ancestors in the provided aesara graph.
descendents: Dict[node, Set[node]]
- Each key is a theano named node, and the corresponding value
- is the set of theano named nodes that are descendents with no
+ Each key is a aesara named node, and the corresponding value
+ is the set of aesara named nodes that are descendents with no
intervening named nodes in the supplied ``graph``.
ancestors: Dict[node, Set[node]]
A dictionary of node:set([ancestors]) pairs. Each key
- is a theano named node, and the corresponding value is the set
- of theano named nodes that are ancestors with no intervening named
+ is a aesara named node, and the corresponding value is the set
+ of aesara named nodes that are ancestors with no intervening named
nodes in the supplied ``graph``.
"""
@@ -222,28 +224,28 @@ def _get_named_nodes_and_relations(graph, descendent, descendents, ancestors):
def build_named_node_tree(graphs):
"""Build the combined descence/ancestry tree of named nodes (i.e., nodes
- whose name attribute is not None) in a list (or iterable) of theano graphs.
+ whose name attribute is not None) in a list (or iterable) of aesara graphs.
The relationship tree does not include unnamed intermediate nodes present
in the supplied graphs.
Parameters
----------
- graphs - iterable of theano graphs
+ graphs - iterable of aesara graphs
Returns:
--------
leaf_dict: Dict[str, node]
A dictionary of name:node pairs, of the named nodes that
- have no named ancestors in the provided theano graphs.
+ have no named ancestors in the provided aesara graphs.
descendents: Dict[node, Set[node]]
A dictionary of node:set([parents]) pairs. Each key is
- a theano named node, and the corresponding value is the set of
- theano named nodes that are descendents with no intervening named
+ a aesara named node, and the corresponding value is the set of
+ aesara named nodes that are descendents with no intervening named
nodes in the supplied ``graphs``.
ancestors: Dict[node, Set[node]]
A dictionary of node:set([ancestors]) pairs. Each key
- is a theano named node, and the corresponding value is the set
- of theano named nodes that are ancestors with no intervening named
+ is a aesara named node, and the corresponding value is the set
+ of aesara named nodes that are ancestors with no intervening named
nodes in the supplied ``graphs``.
"""
@@ -282,16 +284,16 @@ def __new__(cls, name, bases, dct, **kargs): # pylint: disable=unused-argument
def __enter__(self):
self.__class__.context_class.get_contexts().append(self)
- # self._theano_config is set in Model.__new__
+ # self._aesara_config is set in Model.__new__
self._config_context = None
- if hasattr(self, "_theano_config"):
- self._config_context = theano.config.change_flags(**self._theano_config)
+ if hasattr(self, "_aesara_config"):
+ self._config_context = aesara.config.change_flags(**self._aesara_config)
self._config_context.__enter__()
return self
def __exit__(self, typ, value, traceback): # pylint: disable=unused-argument
self.__class__.context_class.get_contexts().pop()
- # self._theano_config is set in Model.__new__
+ # self._aesara_config is set in Model.__new__
if self._config_context:
self._config_context.__exit__(typ, value, traceback)
@@ -468,7 +470,7 @@ def fastd2logp_nojac(self, vars=None):
@property
def logpt(self):
- """Theano scalar of log-probability of the model"""
+ """Aesara scalar of log-probability of the model"""
if getattr(self, "total_size", None) is not None:
logp = self.logp_sum_unscaledt * self.scaling
else:
@@ -479,11 +481,11 @@ def logpt(self):
@property
def logp_nojact(self):
- """Theano scalar of log-probability, excluding jacobian terms."""
+ """Aesara scalar of log-probability, excluding jacobian terms."""
if getattr(self, "total_size", None) is not None:
- logp = tt.sum(self.logp_nojac_unscaledt) * self.scaling
+ logp = aet.sum(self.logp_nojac_unscaledt) * self.scaling
else:
- logp = tt.sum(self.logp_nojac_unscaledt)
+ logp = aet.sum(self.logp_nojac_unscaledt)
if self.name is not None:
logp.name = "__logp_%s" % self.name
return logp
@@ -578,20 +580,20 @@ def tree_contains(self, item):
class ValueGradFunction:
- """Create a theano function that computes a value and its gradient.
+ """Create a aesara function that computes a value and its gradient.
Parameters
----------
- costs: list of theano variables
- We compute the weighted sum of the specified theano values, and the gradient
+ costs: list of aesara variables
+ We compute the weighted sum of the specified aesara values, and the gradient
of that sum. The weights can be specified with `ValueGradFunction.set_weights`.
- grad_vars: list of named theano variables or None
+ grad_vars: list of named aesara variables or None
The arguments with respect to which the gradient is computed.
- extra_vars: list of named theano variables or None
+ extra_vars: list of named aesara variables or None
Other arguments of the function that are assumed constant. They
are stored in shared variables and can be set using
`set_extra_values`.
- dtype: str, default=theano.config.floatX
+ dtype: str, default=aesara.config.floatX
The dtype of the arrays.
casting: {'no', 'equiv', 'save', 'same_kind', 'unsafe'}, default='no'
Casting rule for casting `grad_args` to the array dtype.
@@ -601,14 +603,14 @@ class ValueGradFunction:
compute_grads: bool, default=True
If False, return only the logp, not the gradient.
kwargs
- Extra arguments are passed on to `theano.function`.
+ Extra arguments are passed on to `aesara.function`.
Attributes
----------
size: int
The number of elements in the parameter array.
- profile: theano profiling object or None
- The profiling object of the theano function that computes value and
+ profile: aesara profiling object or None
+ The profiling object of the aesara function that computes value and
gradient. This is None unless `profile=True` was set in the
kwargs.
"""
@@ -640,14 +642,14 @@ def __init__(
self._extra_var_names = {var.name for var in extra_vars}
if dtype is None:
- dtype = theano.config.floatX
+ dtype = aesara.config.floatX
self.dtype = dtype
self._n_costs = len(costs)
if self._n_costs == 0:
raise ValueError("At least one cost is required.")
weights = np.ones(self._n_costs - 1, dtype=self.dtype)
- self._weights = theano.shared(weights, "__weights")
+ self._weights = aesara.shared(weights, "__weights")
cost = costs[0]
for i, val in enumerate(costs[1:]):
@@ -674,7 +676,7 @@ def __init__(
givens = []
self._extra_vars_shared = {}
for var in extra_vars:
- shared = theano.shared(var.tag.test_value, var.name + "_shared__")
+ shared = aesara.shared(var.tag.test_value, var.name + "_shared__")
# test TensorType compatibility
if hasattr(var.tag.test_value, "shape"):
testtype = TensorType(var.dtype, var.tag.test_value.shape)
@@ -689,15 +691,15 @@ def __init__(
)
if compute_grads:
- grad = tt.grad(self._cost_joined, self._vars_joined)
- grad.name = "__grad"
- outputs = [self._cost_joined, grad]
+ grad_out = grad(self._cost_joined, self._vars_joined)
+ grad_out.name = "__grad"
+ outputs = [self._cost_joined, grad_out]
else:
outputs = self._cost_joined
inputs = [self._vars_joined]
- self._theano_function = theano.function(inputs, outputs, givens=givens, **kwargs)
+ self._aesara_function = aesara.function(inputs, outputs, givens=givens, **kwargs)
def set_weights(self, values):
if values.shape != (self._n_costs - 1,):
@@ -732,7 +734,7 @@ def __call__(self, array, grad_out=None, extra_vars=None):
else:
out = grad_out
- output = self._theano_function(array)
+ output = self._aesara_function(array)
if grad_out is None:
return output
else:
@@ -741,8 +743,8 @@ def __call__(self, array, grad_out=None, extra_vars=None):
@property
def profile(self):
- """Profiling information of the underlying theano function."""
- return self._theano_function.profile
+ """Profiling information of the underlying aesara function."""
+ return self._aesara_function.profile
def dict_to_array(self, point):
"""Convert a dictionary with values for grad_vars to an array."""
@@ -774,7 +776,7 @@ def array_to_full_dict(self, array):
return point
def _build_joined(self, cost, args, vmap):
- args_joined = tt.vector("__args_joined")
+ args_joined = aet.vector("__args_joined")
args_joined.tag.test_value = np.zeros(self.size, dtype=self.dtype)
joined_slices = {}
@@ -784,7 +786,7 @@ def _build_joined(self, cost, args, vmap):
joined_slices[vmap.var] = sliced
replace = {var: joined_slices[var.name] for var in args}
- return args_joined, theano.clone(cost, replace=replace)
+ return args_joined, aesara.clone_replace(cost, replace=replace)
class Model(Factor, WithMemoization, metaclass=ContextMeta):
@@ -806,10 +808,10 @@ class Model(Factor, WithMemoization, metaclass=ContextMeta):
defined within instance will be passed to the parent instance.
So that 'nested' model contributes to the variables and
likelihood factors of parent model.
- theano_config: dict
- A dictionary of theano config values that should be set
+ aesara_config: dict
+ A dictionary of aesara config values that should be set
temporarily in the model context. See the documentation
- of theano for a complete list. Set config key
+ of aesara for a complete list. Set config key
``compute_test_value`` to `raise` if it is None.
check_bounds: bool
Ensure that input parameters to distributions are in a valid
@@ -854,7 +856,7 @@ def __init__(self, mean=0, sigma=1, name='', model=None):
Deterministic('v3_sq', self.v3 ** 2)
# Potentials too
- Potential('p1', tt.constant(1))
+ Potential('p1', aet.constant(1))
# After defining a class CustomModel you can use it in several
# ways
@@ -896,13 +898,13 @@ def __new__(cls, *args, **kwargs):
instance._parent = kwargs.get("model")
else:
instance._parent = cls.get_context(error_if_none=False)
- theano_config = kwargs.get("theano_config", None)
- if theano_config is None or "compute_test_value" not in theano_config:
- theano_config = {"compute_test_value": "raise"}
- instance._theano_config = theano_config
+ aesara_config = kwargs.get("aesara_config", None)
+ if aesara_config is None or "compute_test_value" not in aesara_config:
+ aesara_config = {"compute_test_value": "raise"}
+ instance._aesara_config = aesara_config
return instance
- def __init__(self, name="", model=None, theano_config=None, coords=None, check_bounds=True):
+ def __init__(self, name="", model=None, aesara_config=None, coords=None, check_bounds=True):
self.name = name
self.coords = {}
self.RV_dims = {}
@@ -970,7 +972,7 @@ def dlogp_array(self):
return self.bijection.mapf(self.fastdlogp(vars))
def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs):
- """Compile a theano function that computes logp and gradient.
+ """Compile a aesara function that computes logp and gradient.
Parameters
----------
@@ -990,10 +992,10 @@ def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs):
if tempered:
with self:
- free_RVs_logp = tt.sum(
- [tt.sum(var.logpt) for var in self.free_RVs + self.potentials]
+ free_RVs_logp = aet.sum(
+ [aet.sum(var.logpt) for var in self.free_RVs + self.potentials]
)
- observed_RVs_logp = tt.sum([tt.sum(var.logpt) for var in self.observed_RVs])
+ observed_RVs_logp = aet.sum([aet.sum(var.logpt) for var in self.observed_RVs])
costs = [free_RVs_logp, observed_RVs_logp]
else:
@@ -1004,10 +1006,10 @@ def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs):
@property
def logpt(self):
- """Theano scalar of log-probability of the model"""
+ """Aesara scalar of log-probability of the model"""
with self:
factors = [var.logpt for var in self.basic_RVs] + self.potentials
- logp = tt.sum([tt.sum(factor) for factor in factors])
+ logp = aet.sum([aet.sum(factor) for factor in factors])
if self.name:
logp.name = "__logp_%s" % self.name
else:
@@ -1016,14 +1018,14 @@ def logpt(self):
@property
def logp_nojact(self):
- """Theano scalar of log-probability of the model but without the jacobian
+ """Aesara scalar of log-probability of the model but without the jacobian
if transformed Random Variable is presented.
Note that If there is no transformed variable in the model, logp_nojact
will be the same as logpt as there is no need for Jacobian correction.
"""
with self:
factors = [var.logp_nojact for var in self.basic_RVs] + self.potentials
- logp = tt.sum([tt.sum(factor) for factor in factors])
+ logp = aet.sum([aet.sum(factor) for factor in factors])
if self.name:
logp.name = "__logp_nojac_%s" % self.name
else:
@@ -1032,18 +1034,18 @@ def logp_nojact(self):
@property
def varlogpt(self):
- """Theano scalar of log-probability of the unobserved random variables
+ """Aesara scalar of log-probability of the unobserved random variables
(excluding deterministic)."""
with self:
factors = [var.logpt for var in self.free_RVs]
- return tt.sum(factors)
+ return aet.sum(factors)
@property
def datalogpt(self):
with self:
factors = [var.logpt for var in self.observed_RVs]
- factors += [tt.sum(factor) for factor in self.potentials]
- return tt.sum(factors)
+ factors += [aet.sum(factor) for factor in self.potentials]
+ return aet.sum(factors)
@property
def vars(self):
@@ -1237,20 +1239,20 @@ def __getitem__(self, key):
raise e
def makefn(self, outs, mode=None, *args, **kwargs):
- """Compiles a Theano function which returns ``outs`` and takes the variable
+ """Compiles a Aesara function which returns ``outs`` and takes the variable
ancestors of ``outs`` as inputs.
Parameters
----------
- outs: Theano variable or iterable of Theano variables
- mode: Theano compilation mode
+ outs: Aesara variable or iterable of Aesara variables
+ mode: Aesara compilation mode
Returns
-------
- Compiled Theano function
+ Compiled Aesara function
"""
with self:
- return theano.function(
+ return aesara.function(
self.vars,
outs,
allow_input_downcast=True,
@@ -1262,43 +1264,43 @@ def makefn(self, outs, mode=None, *args, **kwargs):
)
def fn(self, outs, mode=None, *args, **kwargs):
- """Compiles a Theano function which returns the values of ``outs``
+ """Compiles a Aesara function which returns the values of ``outs``
and takes values of model vars as arguments.
Parameters
----------
- outs: Theano variable or iterable of Theano variables
- mode: Theano compilation mode
+ outs: Aesara variable or iterable of Aesara variables
+ mode: Aesara compilation mode
Returns
-------
- Compiled Theano function
+ Compiled Aesara function
"""
return LoosePointFunc(self.makefn(outs, mode, *args, **kwargs), self)
def fastfn(self, outs, mode=None, *args, **kwargs):
- """Compiles a Theano function which returns ``outs`` and takes values
+ """Compiles a Aesara function which returns ``outs`` and takes values
of model vars as a dict as an argument.
Parameters
----------
- outs: Theano variable or iterable of Theano variables
- mode: Theano compilation mode
+ outs: Aesara variable or iterable of Aesara variables
+ mode: Aesara compilation mode
Returns
-------
- Compiled Theano function as point function.
+ Compiled Aesara function as point function.
"""
f = self.makefn(outs, mode, *args, **kwargs)
return FastPointFunc(f)
def profile(self, outs, n=1000, point=None, profile=True, *args, **kwargs):
- """Compiles and profiles a Theano function which returns ``outs`` and
+ """Compiles and profiles a Aesara function which returns ``outs`` and
takes values of model vars as a dict as an argument.
Parameters
----------
- outs: Theano variable or iterable of Theano variables
+ outs: Aesara variable or iterable of Aesara variables
n: int, default 1000
Number of iterations to run
point: point
@@ -1335,7 +1337,7 @@ def flatten(self, vars=None, order=None, inputvar=None):
if None, then all model.free_RVs are used for flattening input
order: ArrayOrdering
Optional, use predefined ordering
- inputvar: tt.vector
+ inputvar: aet.vector
Optional, use predefined inputvar
Returns
@@ -1347,8 +1349,8 @@ def flatten(self, vars=None, order=None, inputvar=None):
if order is None:
order = ArrayOrdering(vars)
if inputvar is None:
- inputvar = tt.vector("flat_view", dtype=theano.config.floatX)
- if theano.config.compute_test_value != "off":
+ inputvar = aet.vector("flat_view", dtype=aesara.config.floatX)
+ if aesara.config.compute_test_value != "off":
if vars:
inputvar.tag.test_value = flatten_list(vars).tag.test_value
else:
@@ -1482,34 +1484,34 @@ def set_data(new_data, model=None):
def fn(outs, mode=None, model=None, *args, **kwargs):
- """Compiles a Theano function which returns the values of ``outs`` and
+ """Compiles a Aesara function which returns the values of ``outs`` and
takes values of model vars as arguments.
Parameters
----------
- outs: Theano variable or iterable of Theano variables
- mode: Theano compilation mode
+ outs: Aesara variable or iterable of Aesara variables
+ mode: Aesara compilation mode
Returns
-------
- Compiled Theano function
+ Compiled Aesara function
"""
model = modelcontext(model)
return model.fn(outs, mode, *args, **kwargs)
def fastfn(outs, mode=None, model=None):
- """Compiles a Theano function which returns ``outs`` and takes values of model
+ """Compiles a Aesara function which returns ``outs`` and takes values of model
vars as a dict as an argument.
Parameters
----------
- outs: Theano variable or iterable of Theano variables
- mode: Theano compilation mode
+ outs: Aesara variable or iterable of Aesara variables
+ mode: Aesara compilation mode
Returns
-------
- Compiled Theano function as point function.
+ Compiled Aesara function as point function.
"""
model = modelcontext(model)
return model.fastfn(outs, mode)
@@ -1619,12 +1621,12 @@ def _get_scaling(total_size, shape, ndim):
begin_coef = [floatX(t) / shp_begin[i] for i, t in enumerate(begin) if t is not None]
end_coef = [floatX(t) / shp_end[i] for i, t in enumerate(end) if t is not None]
coefs = begin_coef + end_coef
- coef = tt.prod(coefs)
+ coef = aet.prod(coefs)
else:
raise TypeError(
"Unrecognized `total_size` type, expected int or list of ints, got %r" % total_size
)
- return tt.as_tensor(floatX(coef))
+ return aet.as_tensor(floatX(coef))
class FreeRV(Factor, PyMC3Variable):
@@ -1648,8 +1650,8 @@ def __init__(
"""
Parameters
----------
- type: theano type (optional)
- owner: theano owner (optional)
+ type: aesara type (optional)
+ owner: aesara owner (optional)
name: str
distribution: Distribution
model: Model
@@ -1692,7 +1694,7 @@ def init_value(self):
def pandas_to_array(data):
"""Convert a pandas object to a NumPy array.
- XXX: When `data` is a generator, this will return a Theano tensor!
+ XXX: When `data` is a generator, this will return a Aesara tensor!
"""
if hasattr(data, "to_numpy") and hasattr(data, "isnull"):
@@ -1720,7 +1722,7 @@ def pandas_to_array(data):
else:
# no masking required
ret = data
- elif isinstance(data, theano.graph.basic.Variable):
+ elif isinstance(data, Variable):
ret = data
elif sps.issparse(data):
ret = data
@@ -1762,9 +1764,9 @@ def as_tensor(data, name, model, distribution):
parent_dist=distribution,
)
missing_values = FreeRV(name=name + "_missing", distribution=fakedist, model=model)
- constant = tt.as_tensor_variable(data.filled())
+ constant = aet.as_tensor_variable(data.filled())
- dataTensor = tt.set_subtensor(constant[data.mask.nonzero()], missing_values)
+ dataTensor = aet.set_subtensor(constant[data.mask.nonzero()], missing_values)
dataTensor.missing_values = missing_values
return dataTensor
elif sps.issparse(data):
@@ -1772,7 +1774,7 @@ def as_tensor(data, name, model, distribution):
data.missing_values = None
return data
else:
- data = tt.as_tensor_variable(data, name=name)
+ data = aet.as_tensor_variable(data, name=name)
data.missing_values = None
return data
@@ -1796,8 +1798,8 @@ def __init__(
"""
Parameters
----------
- type: theano type (optional)
- owner: theano owner (optional)
+ type: aesara type (optional)
+ owner: aesara owner (optional)
name: str
distribution: Distribution
model: Model
@@ -1806,12 +1808,12 @@ def __init__(
"""
from pymc3.distributions import TensorType
- if hasattr(data, "type") and isinstance(data.type, tt.TensorType):
+ if hasattr(data, "type") and isinstance(data.type, AesaraTensorType):
type = data.type
if type is None:
data = pandas_to_array(data)
- if isinstance(data, theano.graph.basic.Variable):
+ if isinstance(data, Variable):
type = data.type
else:
type = TensorType(distribution.dtype, data.shape)
@@ -1834,8 +1836,8 @@ def __init__(
self.distribution = distribution
# make this RV a view on the combined missing/nonmissing array
- Apply(theano.compile.view_op, inputs=[data], outputs=[self])
- self.tag.test_value = theano.compile.view_op(data).tag.test_value.astype(self.dtype)
+ Apply(aesara.compile.view_op, inputs=[data], outputs=[self])
+ self.tag.test_value = aesara.compile.view_op(data).tag.test_value.astype(self.dtype)
self.scaling = _get_scaling(total_size, data.shape, data.ndim)
@property
@@ -1853,8 +1855,8 @@ def __init__(self, name, data, distribution, total_size=None, model=None):
"""
Parameters
----------
- type: theano type (optional)
- owner: theano owner (optional)
+ type: aesara type (optional)
+ owner: aesara owner (optional)
name: str
distribution: Distribution
model: Model
@@ -1893,7 +1895,7 @@ def __ne__(self, other):
def _walk_up_rv(rv, formatting="plain"):
- """Walk up theano graph to get inputs for deterministic RV."""
+ """Walk up aesara graph to get inputs for deterministic RV."""
all_rvs = []
parents = list(itertools.chain(*[j.inputs for j in rv.get_parents()]))
if parents:
@@ -1906,7 +1908,7 @@ def _walk_up_rv(rv, formatting="plain"):
return all_rvs
-class DeterministicWrapper(tt.TensorVariable):
+class DeterministicWrapper(TensorVariable):
def _str_repr(self, formatting="plain"):
if "latex" in formatting:
if formatting == "latex_with_params":
@@ -1935,7 +1937,7 @@ def Deterministic(name, var, model=None, dims=None):
Parameters
----------
name: str
- var: theano variables
+ var: aesara variables
Returns
-------
@@ -1956,7 +1958,7 @@ def Potential(name, var, model=None):
Parameters
----------
name: str
- var: theano variables
+ var: aesara variables
Returns
-------
@@ -1974,8 +1976,8 @@ class TransformedRV(PyMC3Variable):
Parameters
----------
- type: theano type (optional)
- owner: theano owner (optional)
+ type: aesara type (optional)
+ owner: aesara owner (optional)
name: str
distribution: Distribution
model: Model
@@ -2014,7 +2016,7 @@ def __init__(
normalRV = transform.backward(self.transformed)
- Apply(theano.compile.view_op, inputs=[normalRV], outputs=[self])
+ Apply(aesara.compile.view_op, inputs=[normalRV], outputs=[self])
self.tag.test_value = normalRV.tag.test_value
self.scaling = _get_scaling(total_size, self.shape, self.ndim)
incorporate_methods(
diff --git a/pymc3/model_graph.py b/pymc3/model_graph.py
index cd3feb3070..433dcfa54f 100644
--- a/pymc3/model_graph.py
+++ b/pymc3/model_graph.py
@@ -13,19 +13,19 @@
# limitations under the License.
from collections import deque
-from typing import Dict, Iterator, Optional, Set
+from typing import Dict, Iterator, NewType, Optional, Set
-VarName = str
-
-from theano.compile import SharedVariable
-from theano.graph.basic import walk
-from theano.tensor import Tensor
+from aesara.compile import SharedVariable
+from aesara.graph.basic import walk
+from aesara.tensor.var import TensorVariable
import pymc3 as pm
from pymc3.model import ObservedRV
from pymc3.util import get_default_varnames, get_var_name
+VarName = NewType("VarName", str)
+
class ModelGraph:
def __init__(self, model):
@@ -46,17 +46,17 @@ def get_deterministics(self, var):
deterministics.append(v)
return deterministics
- def _get_ancestors(self, var: Tensor, func) -> Set[Tensor]:
+ def _get_ancestors(self, var: TensorVariable, func) -> Set[TensorVariable]:
"""Get all ancestors of a function, doing some accounting for deterministics."""
# this contains all of the variables in the model EXCEPT var...
vars = set(self.var_list)
vars.remove(var)
- blockers = set() # type: Set[Tensor]
- retval = set() # type: Set[Tensor]
+ blockers = set() # type: Set[TensorVariable]
+ retval = set() # type: Set[TensorVariable]
- def _expand(node) -> Optional[Iterator[Tensor]]:
+ def _expand(node) -> Optional[Iterator[TensorVariable]]:
if node in blockers:
return None
elif node in vars:
@@ -87,7 +87,7 @@ def _filter_parents(self, var, parents) -> Set[VarName]:
raise AssertionError("Do not know what to do with {}".format(get_var_name(p)))
return keep
- def get_parents(self, var: Tensor) -> Set[VarName]:
+ def get_parents(self, var: TensorVariable) -> Set[VarName]:
"""Get the named nodes that are direct inputs to the var"""
if hasattr(var, "transformed"):
func = var.transformed.logpt
@@ -167,7 +167,7 @@ def get_plates(self):
if hasattr(v, "observations"):
try:
# To get shape of _observed_ data container `pm.Data`
- # (wrapper for theano.SharedVariable) we evaluate it.
+ # (wrapper for aesara.SharedVariable) we evaluate it.
shape = tuple(v.observations.shape.eval())
except AttributeError:
shape = v.observations.shape
diff --git a/pymc3/ode/ode.py b/pymc3/ode/ode.py
index 2eba398404..5563bf898c 100644
--- a/pymc3/ode/ode.py
+++ b/pymc3/ode/ode.py
@@ -14,19 +14,20 @@
import logging
+import aesara
+import aesara.tensor as aet
import numpy as np
import scipy
-import theano
-import theano.tensor as tt
-from theano.graph.basic import Apply
-from theano.graph.op import Op, get_test_value
+from aesara.graph.basic import Apply
+from aesara.graph.op import Op, get_test_value
+from aesara.tensor.type import TensorType
from pymc3.exceptions import DtypeError, ShapeError
from pymc3.ode import utils
_log = logging.getLogger("pymc3")
-floatX = theano.config.floatX
+floatX = aesara.config.floatX
class DifferentialEquation(Op):
@@ -65,12 +66,12 @@ def odefunc(y, t, p):
ode_model = DifferentialEquation(func=odefunc, times=times, n_states=1, n_theta=1, t0=0)
"""
_itypes = [
- tt.TensorType(floatX, (False,)), # y0 as 1D floatX vector
- tt.TensorType(floatX, (False,)), # theta as 1D floatX vector
+ TensorType(floatX, (False,)), # y0 as 1D floatX vector
+ TensorType(floatX, (False,)), # theta as 1D floatX vector
]
_otypes = [
- tt.TensorType(floatX, (False, False)), # model states as floatX of shape (T, S)
- tt.TensorType(
+ TensorType(floatX, (False, False)), # model states as floatX of shape (T, S)
+ TensorType(
floatX, (False, False, False)
), # sensitivities as floatX of shape (T, S, len(y0) + len(theta))
]
@@ -153,8 +154,8 @@ def __call__(self, y0, theta, return_sens=False, **kwargs):
)
# convert inputs to tensors (and check their types)
- y0 = tt.cast(tt.unbroadcast(tt.as_tensor_variable(y0), 0), floatX)
- theta = tt.cast(tt.unbroadcast(tt.as_tensor_variable(theta), 0), floatX)
+ y0 = aet.cast(aet.unbroadcast(aet.as_tensor_variable(y0), 0), floatX)
+ theta = aet.cast(aet.unbroadcast(aet.as_tensor_variable(theta), 0), floatX)
inputs = [y0, theta]
for i, (input_val, itype) in enumerate(zip(inputs, self._itypes)):
if not input_val.type == itype:
@@ -165,7 +166,7 @@ def __call__(self, y0, theta, return_sens=False, **kwargs):
# use default implementation to prepare symbolic outputs (via make_node)
states, sens = super().__call__(y0, theta, **kwargs)
- if theano.config.compute_test_value != "off":
+ if aesara.config.compute_test_value != "off":
# compute test values from input test values
test_states, test_sens = self._simulate(
y0=get_test_value(y0), theta=get_test_value(theta)
@@ -234,8 +235,8 @@ def grad(self, inputs, output_grads):
# for each parameter, multiply sensitivities with the output gradient and sum the result
# sens is (n_times, n_states, n_p)
# ograds is (n_times, n_states)
- grads = [tt.sum(sens[:, :, p] * ograds) for p in range(self.n_p)]
+ grads = [aet.sum(sens[:, :, p] * ograds) for p in range(self.n_p)]
# return separate gradient tensors for y0 and theta inputs
- result = tt.stack(grads[: self.n_states]), tt.stack(grads[self.n_states :])
+ result = aet.stack(grads[: self.n_states]), aet.stack(grads[self.n_states :])
return result
diff --git a/pymc3/ode/utils.py b/pymc3/ode/utils.py
index 141c5503f1..474ed901ba 100644
--- a/pymc3/ode/utils.py
+++ b/pymc3/ode/utils.py
@@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara
+import aesara.tensor as aet
import numpy as np
-import theano
-import theano.tensor as tt
def make_sens_ic(n_states, n_theta, floatX):
@@ -83,20 +83,20 @@ def augment_system(ode_func, n_states, n_theta):
"""
# Present state of the system
- t_y = tt.vector("y", dtype="float64")
+ t_y = aet.vector("y", dtype="float64")
t_y.tag.test_value = np.ones((n_states,), dtype="float64")
# Parameter(s). Should be vector to allow for generaliztion to multiparameter
# systems of ODEs. Is m dimensional because it includes all initial conditions as well as ode parameters
- t_p = tt.vector("p", dtype="float64")
+ t_p = aet.vector("p", dtype="float64")
t_p.tag.test_value = np.ones((n_states + n_theta,), dtype="float64")
# Time. Allow for non-automonous systems of ODEs to be analyzed
- t_t = tt.scalar("t", dtype="float64")
+ t_t = aet.scalar("t", dtype="float64")
t_t.tag.test_value = 2.459
# Present state of the gradients:
# Will always be 0 unless the parameter is the inital condition
# Entry i,j is partial of y[i] wrt to p[j]
- dydp_vec = tt.vector("dydp", dtype="float64")
+ dydp_vec = aet.vector("dydp", dtype="float64")
dydp_vec.tag.test_value = make_sens_ic(n_states, n_theta, "float64")
dydp = dydp_vec.reshape((n_states, n_states + n_theta))
@@ -106,19 +106,19 @@ def augment_system(ode_func, n_states, n_theta):
# Stack the results of the ode_func into a single tensor variable
if not isinstance(yhat, (list, tuple)):
yhat = (yhat,)
- t_yhat = tt.stack(yhat, axis=0)
+ t_yhat = aet.stack(yhat, axis=0)
# Now compute gradients
- J = tt.jacobian(t_yhat, t_y)
+ J = aet.jacobian(t_yhat, t_y)
- Jdfdy = tt.dot(J, dydp)
+ Jdfdy = aet.dot(J, dydp)
- grad_f = tt.jacobian(t_yhat, t_p)
+ grad_f = aet.jacobian(t_yhat, t_p)
# This is the time derivative of dydp
ddt_dydp = (Jdfdy + grad_f).flatten()
- system = theano.function(
+ system = aesara.function(
inputs=[t_y, t_t, t_p, dydp_vec], outputs=[t_yhat, ddt_dydp], on_unused_input="ignore"
)
diff --git a/pymc3/parallel_sampling.py b/pymc3/parallel_sampling.py
index bdfe1a274b..4cd39921b2 100644
--- a/pymc3/parallel_sampling.py
+++ b/pymc3/parallel_sampling.py
@@ -27,7 +27,7 @@
from fastprogress.fastprogress import progress_bar
-from pymc3 import theanof
+from pymc3 import aesaraf
from pymc3.exceptions import SamplingError
logger = logging.getLogger("pymc3")
@@ -99,7 +99,7 @@ def __init__(
self._step_method_is_pickled = step_method_is_pickled
self._shared_point = shared_point
self._seed = seed
- self._tt_seed = seed + 1
+ self._aet_seed = seed + 1
self._draws = draws
self._tune = tune
self._pickle_backend = pickle_backend
@@ -170,7 +170,7 @@ def _recv_msg(self):
def _start_loop(self):
np.random.seed(self._seed)
- theanof.set_tt_rng(self._tt_seed)
+ aesaraf.set_aet_rng(self._aet_seed)
draw = 0
tuning = True
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 481d20ff03..98a2e8f3e8 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -25,10 +25,10 @@
from copy import copy, deepcopy
from typing import Any, Dict, Iterable, List, Optional, Set, Union, cast
+import aesara.gradient as tg
import arviz
import numpy as np
import packaging
-import theano.gradient as tg
import xarray
from arviz import InferenceData
diff --git a/pymc3/sampling_jax.py b/pymc3/sampling_jax.py
index 522bca7b12..4f10414caf 100644
--- a/pymc3/sampling_jax.py
+++ b/pymc3/sampling_jax.py
@@ -9,13 +9,13 @@
xla_flags = re.sub(r"xla_force_host_platform_device_count=.+\s", "", xla_flags).split()
os.environ["XLA_FLAGS"] = " ".join(["--xla_force_host_platform_device_count={}".format(100)])
+import aesara.graph.fg
import arviz as az
import jax
import numpy as np
import pandas as pd
-import theano.graph.fg
-from theano.link.jax.jax_dispatch import jax_funcify
+from aesara.link.jax.jax_dispatch import jax_funcify
import pymc3 as pm
@@ -24,9 +24,9 @@
warnings.warn("This module is experimental.")
# Disable C compilation by default
-# theano.config.cxx = ""
+# aesara.config.cxx = ""
# This will make the JAX Linker the default
-# theano.config.mode = "JAX"
+# aesara.config.mode = "JAX"
def sample_tfp_nuts(
@@ -47,7 +47,7 @@ def sample_tfp_nuts(
seed = jax.random.PRNGKey(random_seed)
- fgraph = theano.graph.fg.FunctionGraph(model.free_RVs, [model.logpt])
+ fgraph = aesara.graph.fg.FunctionGraph(model.free_RVs, [model.logpt])
fns = jax_funcify(fgraph)
logp_fn_jax = fns[0]
@@ -133,7 +133,7 @@ def sample_numpyro_nuts(
seed = jax.random.PRNGKey(random_seed)
- fgraph = theano.graph.fg.FunctionGraph(model.free_RVs, [model.logpt])
+ fgraph = aesara.graph.fg.FunctionGraph(model.free_RVs, [model.logpt])
fns = jax_funcify(fgraph)
logp_fn_jax = fns[0]
@@ -199,7 +199,7 @@ def _transform_samples(samples, model, keep_untransformed=False):
ops_to_compute = [x for x in model.unobserved_RVs if x.name in names_to_compute]
# Create function graph for these:
- fgraph = theano.graph.fg.FunctionGraph(model.free_RVs, ops_to_compute)
+ fgraph = aesara.graph.fg.FunctionGraph(model.free_RVs, ops_to_compute)
# Jaxify, which returns a list of functions, one for each op
jax_fns = jax_funcify(fgraph)
diff --git a/pymc3/smc/smc.py b/pymc3/smc/smc.py
index 2e7e369ad3..25f278d188 100644
--- a/pymc3/smc/smc.py
+++ b/pymc3/smc/smc.py
@@ -14,22 +14,22 @@
from collections import OrderedDict
+import aesara.tensor as aet
import numpy as np
-import theano.tensor as tt
+from aesara import function as aesara_function
from scipy.special import logsumexp
from scipy.stats import multivariate_normal
-from theano import function as theano_function
-from pymc3.backends.ndarray import NDArray
-from pymc3.model import Point, modelcontext
-from pymc3.sampling import sample_prior_predictive
-from pymc3.theanof import (
+from pymc3.aesaraf import (
floatX,
inputvars,
join_nonshared_inputs,
make_shared_replacements,
)
+from pymc3.backends.ndarray import NDArray
+from pymc3.model import Point, modelcontext
+from pymc3.sampling import sample_prior_predictive
class SMC:
@@ -111,8 +111,8 @@ def setup_kernel(self):
if self.kernel == "abc":
factors = [var.logpt for var in self.model.free_RVs]
- factors += [tt.sum(factor) for factor in self.model.potentials]
- self.prior_logp_func = logp_forw([tt.sum(factors)], self.variables, shared)
+ factors += [aet.sum(factor) for factor in self.model.potentials]
+ self.prior_logp_func = logp_forw([aet.sum(factors)], self.variables, shared)
simulator = self.model.observed_RVs[0]
distance = simulator.distribution.distance
sum_stat = simulator.distribution.sum_stat
@@ -271,7 +271,7 @@ def posterior_to_trace(self):
def logp_forw(out_vars, vars, shared):
- """Compile Theano function of the model and the input and output variables.
+ """Compile Aesara function of the model and the input and output variables.
Parameters
----------
@@ -280,10 +280,10 @@ def logp_forw(out_vars, vars, shared):
vars: List
containing :class:`pymc3.Distribution` for the input variables
shared: List
- containing :class:`theano.tensor.Tensor` for depended shared data
+ containing :class:`aesara.tensor.Tensor` for depended shared data
"""
out_list, inarray0 = join_nonshared_inputs(out_vars, vars, shared)
- f = theano_function([inarray0], out_list[0])
+ f = aesara_function([inarray0], out_list[0])
f.trust_input = True
return f
diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py
index c3e1cf6f8b..7992153f71 100644
--- a/pymc3/step_methods/arraystep.py
+++ b/pymc3/step_methods/arraystep.py
@@ -19,10 +19,10 @@
from numpy.random import uniform
+from pymc3.aesaraf import inputvars
from pymc3.blocking import ArrayOrdering, DictToArrayBijection
from pymc3.model import PyMC3Variable, modelcontext
from pymc3.step_methods.compound import CompoundStep
-from pymc3.theanof import inputvars
from pymc3.util import get_var_name
__all__ = ["ArrayStep", "ArrayStepShared", "metrop_select", "Competence"]
@@ -137,7 +137,7 @@ class ArrayStep(BlockedStep):
----------
vars: list
List of variables for sampler.
- fs: list of logp theano functions
+ fs: list of logp aesara functions
allvars: Boolean (default False)
blocked: Boolean (default True)
"""
@@ -177,7 +177,7 @@ def __init__(self, vars, shared, blocked=True):
Parameters
----------
vars: list of sampling variables
- shared: dict of theano variable -> shared variable
+ shared: dict of aesara variable -> shared variable
blocked: Boolean (default True)
"""
self.vars = vars
@@ -212,7 +212,7 @@ def __init__(self, vars, shared, blocked=True):
Parameters
----------
vars: list of sampling variables
- shared: dict of theano variable -> shared variable
+ shared: dict of aesara variable -> shared variable
blocked: Boolean (default True)
"""
self.population = None
@@ -244,14 +244,14 @@ def link_population(self, population, chain_index):
class GradientSharedStep(BlockedStep):
def __init__(
- self, vars, model=None, blocked=True, dtype=None, logp_dlogp_func=None, **theano_kwargs
+ self, vars, model=None, blocked=True, dtype=None, logp_dlogp_func=None, **aesara_kwargs
):
model = modelcontext(model)
self.vars = vars
self.blocked = blocked
if logp_dlogp_func is None:
- func = model.logp_dlogp_function(vars, dtype=dtype, **theano_kwargs)
+ func = model.logp_dlogp_function(vars, dtype=dtype, **aesara_kwargs)
else:
func = logp_dlogp_func
@@ -263,8 +263,8 @@ def __init__(
except ValueError:
if logp_dlogp_func is not None:
raise
- theano_kwargs.update(mode="FAST_COMPILE")
- func = model.logp_dlogp_function(vars, dtype=dtype, **theano_kwargs)
+ aesara_kwargs.update(mode="FAST_COMPILE")
+ func = model.logp_dlogp_function(vars, dtype=dtype, **aesara_kwargs)
self._logp_dlogp_func = func
diff --git a/pymc3/step_methods/elliptical_slice.py b/pymc3/step_methods/elliptical_slice.py
index f1c1bb40d3..0a8d432644 100644
--- a/pymc3/step_methods/elliptical_slice.py
+++ b/pymc3/step_methods/elliptical_slice.py
@@ -12,14 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara.tensor as aet
import numpy as np
import numpy.random as nr
-import theano.tensor as tt
+from pymc3.aesaraf import inputvars
from pymc3.distributions import draw_values
from pymc3.model import modelcontext
from pymc3.step_methods.arraystep import ArrayStep, Competence
-from pymc3.theanof import inputvars
__all__ = ["EllipticalSlice"]
@@ -44,7 +44,7 @@ def get_chol(cov, chol):
raise ValueError("Must pass exactly one of cov or chol")
if cov is not None:
- chol = tt.slinalg.cholesky(cov)
+ chol = aet.slinalg.cholesky(cov)
return chol
@@ -86,7 +86,7 @@ class EllipticalSlice(ArrayStep):
def __init__(self, vars=None, prior_cov=None, prior_chol=None, model=None, **kwargs):
self.model = modelcontext(model)
chol = get_chol(prior_cov, prior_chol)
- self.prior_chol = tt.as_tensor_variable(chol)
+ self.prior_chol = aet.as_tensor_variable(chol)
if vars is None:
vars = self.model.cont_vars
diff --git a/pymc3/step_methods/gibbs.py b/pymc3/step_methods/gibbs.py
index 2646a8a9e8..f109d49b26 100644
--- a/pymc3/step_methods/gibbs.py
+++ b/pymc3/step_methods/gibbs.py
@@ -19,6 +19,8 @@
"""
from warnings import warn
+from aesara.graph.basic import graph_inputs
+from aesara.tensor import add
from numpy import (
arange,
array,
@@ -31,8 +33,6 @@
searchsorted,
)
from numpy.random import uniform
-from theano.graph.basic import graph_inputs
-from theano.tensor import add
from pymc3.distributions.discrete import Categorical
from pymc3.model import modelcontext
diff --git a/pymc3/step_methods/hmc/base_hmc.py b/pymc3/step_methods/hmc/base_hmc.py
index 323503fe49..7228b8a9c6 100644
--- a/pymc3/step_methods/hmc/base_hmc.py
+++ b/pymc3/step_methods/hmc/base_hmc.py
@@ -19,13 +19,13 @@
import numpy as np
+from pymc3.aesaraf import floatX, inputvars
from pymc3.backends.report import SamplerWarning, WarningType
from pymc3.exceptions import SamplingError
from pymc3.model import Point, modelcontext
from pymc3.step_methods import arraystep, step_sizes
from pymc3.step_methods.hmc import integration
from pymc3.step_methods.hmc.quadpotential import QuadPotentialDiagAdapt, quad_potential
-from pymc3.theanof import floatX, inputvars
from pymc3.tuning import guess_scaling
logger = logging.getLogger("pymc3")
@@ -57,13 +57,13 @@ def __init__(
t0=10,
adapt_step_size=True,
step_rand=None,
- **theano_kwargs
+ **aesara_kwargs
):
"""Set up Hamiltonian samplers with common structures.
Parameters
----------
- vars: list of theano variables
+ vars: list of aesara variables
scaling: array_like, ndim = {1,2}
Scaling for momentum distribution. 1d arrays interpreted matrix
diagonal.
@@ -77,7 +77,7 @@ def __init__(
potential: Potential, optional
An object that represents the Hamiltonian with methods `velocity`,
`energy`, and `random` methods.
- **theano_kwargs: passed to theano functions
+ **aesara_kwargs: passed to aesara functions
"""
self._model = modelcontext(model)
@@ -85,7 +85,7 @@ def __init__(
vars = self._model.cont_vars
vars = inputvars(vars)
- super().__init__(vars, blocked=blocked, model=model, dtype=dtype, **theano_kwargs)
+ super().__init__(vars, blocked=blocked, model=model, dtype=dtype, **aesara_kwargs)
self.adapt_step_size = adapt_step_size
self.Emax = Emax
diff --git a/pymc3/step_methods/hmc/hmc.py b/pymc3/step_methods/hmc/hmc.py
index 613160c27e..522a40d94f 100644
--- a/pymc3/step_methods/hmc/hmc.py
+++ b/pymc3/step_methods/hmc/hmc.py
@@ -59,7 +59,7 @@ def __init__(self, vars=None, path_length=2.0, max_steps=1024, **kwargs):
Parameters
----------
- vars: list of theano variables
+ vars: list of aesara variables
path_length: float, default=2
total length to travel
step_rand: function float -> float, default=unif
diff --git a/pymc3/step_methods/hmc/nuts.py b/pymc3/step_methods/hmc/nuts.py
index 4a00ec9873..8d7b9a69ad 100644
--- a/pymc3/step_methods/hmc/nuts.py
+++ b/pymc3/step_methods/hmc/nuts.py
@@ -16,13 +16,13 @@
import numpy as np
+from pymc3.aesaraf import floatX
from pymc3.backends.report import SamplerWarning, WarningType
from pymc3.distributions import BART
from pymc3.math import logbern, logdiffexp_numpy
from pymc3.step_methods.arraystep import Competence
from pymc3.step_methods.hmc.base_hmc import BaseHMC, DivergenceInfo, HMCStepData
from pymc3.step_methods.hmc.integration import IntegrationError
-from pymc3.theanof import floatX
from pymc3.vartypes import continuous_types
__all__ = ["NUTS"]
@@ -114,7 +114,7 @@ def __init__(self, vars=None, max_treedepth=10, early_max_treedepth=8, **kwargs)
Parameters
----------
- vars: list of Theano variables, default all continuous vars
+ vars: list of Aesara variables, default all continuous vars
Emax: float, default 1000
Maximum energy change allowed during leapfrog steps. Larger
deviations will abort the integration.
diff --git a/pymc3/step_methods/hmc/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py
index 4c2e6acc7a..f77f1f9988 100644
--- a/pymc3/step_methods/hmc/quadpotential.py
+++ b/pymc3/step_methods/hmc/quadpotential.py
@@ -14,14 +14,14 @@
import warnings
+import aesara
import numpy as np
import scipy.linalg
-import theano
from numpy.random import normal
from scipy.sparse import issparse
-from pymc3.theanof import floatX
+from pymc3.aesaraf import floatX
__all__ = [
"quad_potential",
@@ -170,7 +170,7 @@ def __init__(
)
if dtype is None:
- dtype = theano.config.floatX
+ dtype = aesara.config.floatX
if initial_diag is None:
initial_diag = np.ones(n, dtype=dtype)
@@ -189,7 +189,7 @@ def __init__(
def reset(self):
self._var = np.array(self._initial_diag, dtype=self.dtype, copy=True)
- self._var_theano = theano.shared(self._var)
+ self._var_aesara = aesara.shared(self._var)
self._stds = np.sqrt(self._initial_diag)
self._inv_stds = floatX(1.0) / self._stds
self._foreground_var = _WeightedVariance(
@@ -222,7 +222,7 @@ def _update_from_weightvar(self, weightvar):
weightvar.current_variance(out=self._var)
np.sqrt(self._var, out=self._stds)
np.divide(1, self._stds, out=self._inv_stds)
- self._var_theano.set_value(self._var)
+ self._var_aesara.set_value(self._var)
def update(self, sample, grad, tune):
"""Inform the potential about a new sample during tuning."""
@@ -304,7 +304,7 @@ def _update(self, var):
self._var[:] = var
np.sqrt(self._var, out=self._stds)
np.divide(1, self._stds, out=self._inv_stds)
- self._var_theano.set_value(self._var)
+ self._var_aesara.set_value(self._var)
def update(self, sample, grad, tune):
"""Inform the potential about a new sample during tuning."""
@@ -384,7 +384,7 @@ def __init__(self, v, dtype=None):
Diagonal of covariance matrix for the potential vector
"""
if dtype is None:
- dtype = theano.config.floatX
+ dtype = aesara.config.floatX
self.dtype = dtype
v = v.astype(self.dtype)
s = v ** 0.5
@@ -428,7 +428,7 @@ def __init__(self, A, dtype=None):
Inverse of covariance matrix for the potential vector
"""
if dtype is None:
- dtype = theano.config.floatX
+ dtype = aesara.config.floatX
self.dtype = dtype
self.L = floatX(scipy.linalg.cholesky(A, lower=True))
@@ -468,7 +468,7 @@ def __init__(self, cov, dtype=None):
scaling matrix for the potential vector
"""
if dtype is None:
- dtype = theano.config.floatX
+ dtype = aesara.config.floatX
self.dtype = dtype
self._cov = np.array(cov, dtype=self.dtype, copy=True)
self._chol = scipy.linalg.cholesky(self._cov, lower=True)
@@ -525,7 +525,7 @@ def __init__(
)
if dtype is None:
- dtype = theano.config.floatX
+ dtype = aesara.config.floatX
if initial_cov is None:
initial_cov = np.eye(n, dtype=dtype)
@@ -658,7 +658,7 @@ def current_mean(self):
if chol_available:
__all__ += ["QuadPotentialSparse"]
- import theano.sparse
+ import aesara.sparse
class QuadPotentialSparse(QuadPotential):
def __init__(self, A):
@@ -676,8 +676,8 @@ def __init__(self, A):
def velocity(self, x):
"""Compute the current velocity at a position in parameter space."""
- A = theano.sparse.as_sparse(self.A)
- return theano.sparse.dot(A, x)
+ A = aesara.sparse.as_sparse(self.A)
+ return aesara.sparse.dot(A, x)
def random(self):
"""Draw random value from QuadPotential."""
diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py
index 76804db2f8..0878b2b772 100644
--- a/pymc3/step_methods/metropolis.py
+++ b/pymc3/step_methods/metropolis.py
@@ -12,13 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara
import numpy as np
import numpy.random as nr
import scipy.linalg
-import theano
import pymc3 as pm
+from pymc3.aesaraf import floatX
from pymc3.distributions import draw_values
from pymc3.step_methods.arraystep import (
ArrayStep,
@@ -27,7 +28,6 @@
PopulationArrayStepShared,
metrop_select,
)
-from pymc3.theanof import floatX
__all__ = [
"Metropolis",
@@ -142,7 +142,7 @@ def __init__(
model: PyMC Model
Optional model for sampling step. Defaults to None (taken from context).
mode: string or `Mode` instance.
- compilation mode passed to Theano functions
+ compilation mode passed to Aesara functions
"""
model = pm.modelcontext(model)
@@ -571,7 +571,7 @@ class DEMetropolis(PopulationArrayStepShared):
model: PyMC Model
Optional model for sampling step. Defaults to None (taken from context).
mode: string or `Mode` instance.
- compilation mode passed to Theano functions
+ compilation mode passed to Aesara functions
References
----------
@@ -713,7 +713,7 @@ class DEMetropolisZ(ArrayStepShared):
model: PyMC Model
Optional model for sampling step. Defaults to None (taken from context).
mode: string or `Mode` instance.
- compilation mode passed to Theano functions
+ compilation mode passed to Aesara functions
References
----------
@@ -887,6 +887,6 @@ def delta_logp(logp, vars, shared):
logp1 = pm.CallableTensor(logp0)(inarray1)
- f = theano.function([inarray1, inarray0], logp1 - logp0)
+ f = aesara.function([inarray1, inarray0], logp1 - logp0)
f.trust_input = True
return f
diff --git a/pymc3/step_methods/mlda.py b/pymc3/step_methods/mlda.py
index 559f894f30..8edf54209b 100644
--- a/pymc3/step_methods/mlda.py
+++ b/pymc3/step_methods/mlda.py
@@ -17,10 +17,11 @@
from typing import List, Optional, Type, Union
+import aesara
import arviz as az
import numpy as np
-import theano
-import theano.tensor as tt
+
+from aesara.tensor.sharedvar import TensorSharedVariable
import pymc3 as pm
@@ -254,7 +255,7 @@ class MLDA(ArrayStepShared):
(taken from context). This model should be the finest of all
multilevel models.
mode : string or `Mode` instance.
- Compilation mode passed to Theano functions
+ Compilation mode passed to Aesara functions
subsampling_rates : integer or list of integers
One interger for all levels or a list with one number for each level
(excluding the finest level).
@@ -275,7 +276,7 @@ class MLDA(ArrayStepShared):
the PyMC3 model (also demonstrated in the example notebook):
- Include a `pm.Data()` variable with the name `Q` in the
model description of all levels.
- - Use a Theano Op to calculate the forward model (or the
+ - Use a Aesara Op to calculate the forward model (or the
combination of a forward model and a likelihood). This Op
should have a `perform()` method which (in addition to all
the other calculations), calculates the quantity of interest
@@ -300,7 +301,7 @@ class MLDA(ArrayStepShared):
extra variables mu_B and Sigma_B, which will capture
the bias between different levels. All these variables
should be instantiated using the pm.Data method.
- - Use a Theano Op to define the forward model (and
+ - Use a Aesara Op to define the forward model (and
optionally the likelihood) for all levels. The Op needs
to store the result of each forward model calculation
to the variable model_output of the PyMC3 model,
@@ -419,7 +420,7 @@ def __init__(
"for storing the fine Q."
"Use pm.Data() to define it."
)
- if not isinstance(self.model.Q, tt.sharedvar.TensorSharedVariable):
+ if not isinstance(self.model.Q, TensorSharedVariable):
raise TypeError(
"The variable 'Q' in the model definition is not of type "
"'TensorSharedVariable'. Use pm.Data() to define the"
@@ -454,8 +455,8 @@ def __init__(
"Use pm.Data() to define it."
)
if not (
- isinstance(self.model_below.mu_B, tt.sharedvar.TensorSharedVariable)
- and isinstance(self.model_below.Sigma_B, tt.sharedvar.TensorSharedVariable)
+ isinstance(self.model_below.mu_B, TensorSharedVariable)
+ and isinstance(self.model_below.Sigma_B, TensorSharedVariable)
):
raise TypeError(
"At least one of the variables 'mu_B' and 'Sigma_B' "
@@ -549,12 +550,12 @@ def __init__(
self.accepted = 0
- # Construct theano function for current-level model likelihood
+ # Construct aesara function for current-level model likelihood
# (for use in acceptance)
shared = pm.make_shared_replacements(vars, model)
self.delta_logp = delta_logp_inverse(model.logpt, vars, shared)
- # Construct theano function for below-level model likelihood
+ # Construct aesara function for below-level model likelihood
# (for use in acceptance)
model_below = pm.modelcontext(self.model_below)
vars_below = [var for var in model_below.vars if var.name in self.var_names]
@@ -964,7 +965,7 @@ def delta_logp_inverse(logp, vars, shared):
logp1 = pm.CallableTensor(logp0)(inarray1)
- f = theano.function([inarray1, inarray0], -logp0 + logp1)
+ f = aesara.function([inarray1, inarray0], -logp0 + logp1)
f.trust_input = True
return f
diff --git a/pymc3/step_methods/pgbart.py b/pymc3/step_methods/pgbart.py
index c3bac3ade9..9649a9cb8f 100644
--- a/pymc3/step_methods/pgbart.py
+++ b/pymc3/step_methods/pgbart.py
@@ -16,13 +16,13 @@
import numpy as np
-from theano import function as theano_function
+from aesara import function as aesara_function
+from pymc3.aesaraf import inputvars, join_nonshared_inputs, make_shared_replacements
from pymc3.distributions import BART
from pymc3.distributions.tree import Tree
from pymc3.model import modelcontext
from pymc3.step_methods.arraystep import ArrayStepShared, Competence
-from pymc3.theanof import inputvars, join_nonshared_inputs, make_shared_replacements
_log = logging.getLogger("pymc3")
@@ -274,7 +274,7 @@ def set_particle_to_step(self, t):
def logp(out_vars, vars, shared):
- """Compile Theano function of the model and the input and output variables.
+ """Compile Aesara function of the model and the input and output variables.
Parameters
----------
@@ -283,9 +283,9 @@ def logp(out_vars, vars, shared):
vars: List
containing :class:`pymc3.Distribution` for the input variables
shared: List
- containing :class:`theano.tensor.Tensor` for depended shared data
+ containing :class:`aesara.tensor.Tensor` for depended shared data
"""
out_list, inarray0 = join_nonshared_inputs(out_vars, vars, shared)
- f = theano_function([inarray0], out_list[0])
+ f = aesara_function([inarray0], out_list[0])
f.trust_input = True
return f
diff --git a/pymc3/step_methods/sgmcmc.py b/pymc3/step_methods/sgmcmc.py
index 1620f21b0e..80246db758 100644
--- a/pymc3/step_methods/sgmcmc.py
+++ b/pymc3/step_methods/sgmcmc.py
@@ -16,12 +16,12 @@
from collections import OrderedDict
-import theano
-import theano.tensor as tt
+import aesara
+import aesara.tensor as aet
+from pymc3.aesaraf import aet_rng, make_shared_replacements
from pymc3.model import inputvars, modelcontext
from pymc3.step_methods.arraystep import ArrayStepShared
-from pymc3.theanof import make_shared_replacements, tt_rng
__all__ = []
@@ -45,8 +45,8 @@ def _check_minibatches(minibatch_tensors, minibatches):
def prior_dlogp(vars, model, flat_view):
"""Returns the gradient of the prior on the parameters as a vector of size D x 1"""
- terms = tt.concatenate([theano.grad(var.logpt, var).flatten() for var in vars], axis=0)
- dlogp = theano.clone(terms, flat_view.replacements, strict=False)
+ terms = aet.concatenate([aesara.grad(var.logpt, var).flatten() for var in vars], axis=0)
+ dlogp = aesara.clone_replace(terms, flat_view.replacements, strict=False)
return dlogp
@@ -63,12 +63,14 @@ def elemwise_dlogL(vars, model, flat_view):
# calculate fisher information
terms = []
for var in vars:
- output, _ = theano.scan(
- lambda i, logX=logL, v=var: theano.grad(logX[i], v).flatten(),
- sequences=[tt.arange(logL.shape[0])],
+ output, _ = aesara.scan(
+ lambda i, logX=logL, v=var: aesara.grad(logX[i], v).flatten(),
+ sequences=[aet.arange(logL.shape[0])],
)
terms.append(output)
- dlogL = theano.clone(tt.concatenate(terms, axis=1), flat_view.replacements, strict=False)
+ dlogL = aesara.clone_replace(
+ aet.concatenate(terms, axis=1), flat_view.replacements, strict=False
+ )
return dlogL
@@ -106,7 +108,7 @@ class BaseStochasticGradient(ArrayStepShared):
Defining a BaseStochasticGradient needs
custom implementation of the following methods:
- :code: `.mk_training_fn()`
- Returns a theano function which is called for each sampling step
+ Returns a aesara function which is called for each sampling step
- :code: `._initialize_values()`
Returns None it creates class variables which are required for the training fn
"""
@@ -145,9 +147,9 @@ def __init__(
# set random stream
self.random = None
if random_seed is None:
- self.random = tt_rng()
+ self.random = aet_rng()
else:
- self.random = tt_rng(random_seed)
+ self.random = aet_rng(random_seed)
self.step_size = step_size
@@ -169,7 +171,7 @@ def __init__(
# Replace input shared variables with tensors
def is_shared(t):
- return isinstance(t, theano.compile.sharedvalue.SharedVariable)
+ return isinstance(t, aesara.compile.sharedvalue.SharedVariable)
tensors = [(t.type() if is_shared(t) else t) for t in minibatch_tensors]
updates = OrderedDict(
diff --git a/pymc3/step_methods/slicer.py b/pymc3/step_methods/slicer.py
index ef68dec993..b0320a9eff 100644
--- a/pymc3/step_methods/slicer.py
+++ b/pymc3/step_methods/slicer.py
@@ -17,9 +17,9 @@
import numpy as np
import numpy.random as nr
+from pymc3.aesaraf import inputvars
from pymc3.model import modelcontext
from pymc3.step_methods.arraystep import ArrayStep, Competence
-from pymc3.theanof import inputvars
from pymc3.vartypes import continuous_types
__all__ = ["Slice"]
diff --git a/pymc3/tests/backend_fixtures.py b/pymc3/tests/backend_fixtures.py
index 6fd0b1318c..9ef8d03a7d 100644
--- a/pymc3/tests/backend_fixtures.py
+++ b/pymc3/tests/backend_fixtures.py
@@ -16,10 +16,10 @@
import os
import shutil
+import aesara
import numpy as np
import numpy.testing as npt
import pytest
-import theano
from pymc3.backends import base
from pymc3.tests import models
@@ -250,7 +250,7 @@ def record_point(self, val):
else:
self.strace.record(point=point)
- @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+ @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
def test_standard_close(self):
for idx in range(self.draws):
self.record_point(idx)
@@ -293,14 +293,14 @@ class SelectionTestCase(ModelBackendSampledTestCase):
- shape
"""
- @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+ @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
def test_get_values_default(self):
for varname in self.test_point.keys():
expected = np.concatenate([self.expected[chain][varname] for chain in [0, 1]])
result = self.mtrace.get_values(varname)
npt.assert_equal(result, expected)
- @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+ @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
def test_get_values_nocombine_burn_keyword(self):
burn = 2
for varname in self.test_point.keys():
@@ -311,7 +311,7 @@ def test_get_values_nocombine_burn_keyword(self):
def test_len(self):
assert len(self.mtrace) == self.draws
- @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+ @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
def test_dtypes(self):
for varname in self.test_point.keys():
assert (
@@ -515,7 +515,7 @@ def test_chain_length(self):
assert self.mtrace0.nchains == self.mtrace1.nchains
assert len(self.mtrace0) == len(self.mtrace1)
- @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+ @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
def test_dtype(self):
for varname in self.test_point.keys():
assert (
diff --git a/pymc3/tests/conftest.py b/pymc3/tests/conftest.py
index e9d38d163f..1be0184c0e 100644
--- a/pymc3/tests/conftest.py
+++ b/pymc3/tests/conftest.py
@@ -12,31 +12,31 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara
import numpy as np
import pytest
-import theano
import pymc3 as pm
@pytest.fixture(scope="function", autouse=True)
-def theano_config():
- config = theano.config.change_flags(compute_test_value="raise")
+def aesara_config():
+ config = aesara.config.change_flags(compute_test_value="raise")
with config:
yield
@pytest.fixture(scope="function", autouse=True)
def exception_verbosity():
- config = theano.config.change_flags(exception_verbosity="high")
+ config = aesara.config.change_flags(exception_verbosity="high")
with config:
yield
@pytest.fixture(scope="function", autouse=False)
def strict_float32():
- if theano.config.floatX == "float32":
- config = theano.config.change_flags(warn_float64="raise")
+ if aesara.config.floatX == "float32":
+ config = aesara.config.change_flags(warn_float64="raise")
with config:
yield
else:
@@ -47,4 +47,4 @@ def strict_float32():
def seeded_test():
# TODO: use this instead of SeededTest
np.random.seed(42)
- pm.set_tt_rng(42)
+ pm.set_aet_rng(42)
diff --git a/pymc3/tests/helpers.py b/pymc3/tests/helpers.py
index 6e56fad9d0..9806fb0b8e 100644
--- a/pymc3/tests/helpers.py
+++ b/pymc3/tests/helpers.py
@@ -16,13 +16,13 @@
from logging.handlers import BufferingHandler
+import aesara
import numpy.random as nr
-import theano
-from theano.gradient import verify_grad as tt_verify_grad
-from theano.sandbox.rng_mrg import MRG_RandomStream as RandomStream
+from aesara.gradient import verify_grad as aet_verify_grad
+from aesara.sandbox.rng_mrg import MRG_RandomStream as RandomStream
-from pymc3.theanof import set_tt_rng, tt_rng
+from pymc3.aesaraf import aet_rng, set_aet_rng
class SeededTest:
@@ -34,11 +34,11 @@ def setup_class(cls):
def setup_method(self):
nr.seed(self.random_seed)
- self.old_tt_rng = tt_rng()
- set_tt_rng(RandomStream(self.random_seed))
+ self.old_aet_rng = aet_rng()
+ set_aet_rng(RandomStream(self.random_seed))
def teardown_method(self):
- set_tt_rng(self.old_tt_rng)
+ set_aet_rng(self.old_aet_rng)
class LoggingHandler(BufferingHandler):
@@ -104,7 +104,7 @@ def match_value(self, k, dv, v):
def select_by_precision(float64, float32):
"""Helper function to choose reasonable decimal cutoffs for different floatX modes."""
- decimal = float64 if theano.config.floatX == "float64" else float32
+ decimal = float64 if aesara.config.floatX == "float64" else float32
return decimal
@@ -116,4 +116,4 @@ def not_raises():
def verify_grad(op, pt, n_tests=2, rng=None, *args, **kwargs):
if rng is None:
rng = nr.RandomState(411342)
- tt_verify_grad(op, pt, n_tests, rng, *args, **kwargs)
+ aet_verify_grad(op, pt, n_tests, rng, *args, **kwargs)
diff --git a/pymc3/tests/models.py b/pymc3/tests/models.py
index 5f60734859..49f9cd1e7a 100644
--- a/pymc3/tests/models.py
+++ b/pymc3/tests/models.py
@@ -14,23 +14,23 @@
from itertools import product
+import aesara
+import aesara.tensor as aet
import numpy as np
-import theano
-import theano.tensor as tt
-from theano.compile.ops import as_op
+from aesara.compile.ops import as_op
import pymc3 as pm
from pymc3 import Categorical, Metropolis, Model, Normal
-from pymc3.theanof import floatX_array
+from pymc3.aesaraf import floatX_array
def simple_model():
mu = -2.1
tau = 1.3
with Model() as model:
- Normal("x", mu, tau=tau, shape=2, testval=tt.ones(2) * 0.1)
+ Normal("x", mu, tau=tau, shape=2, testval=aet.ones(2) * 0.1)
return model.test_point, model, (mu, tau ** -0.5)
@@ -50,13 +50,13 @@ def multidimensional_model():
mu = -2.1
tau = 1.3
with Model() as model:
- Normal("x", mu, tau=tau, shape=(3, 2), testval=0.1 * tt.ones((3, 2)))
+ Normal("x", mu, tau=tau, shape=(3, 2), testval=0.1 * aet.ones((3, 2)))
return model.test_point, model, (mu, tau ** -0.5)
def simple_arbitrary_det():
- scalar_type = tt.dscalar if theano.config.floatX == "float64" else tt.fscalar
+ scalar_type = aet.dscalar if aesara.config.floatX == "float64" else aet.fscalar
@as_op(itypes=[scalar_type], otypes=[scalar_type])
def arbitrary_det(value):
@@ -82,7 +82,7 @@ def simple_2model():
p = 0.4
with Model() as model:
x = pm.Normal("x", mu, tau=tau, testval=0.1)
- pm.Deterministic("logx", tt.log(x))
+ pm.Deterministic("logx", aet.log(x))
pm.Bernoulli("y", p)
return model.test_point, model
@@ -92,7 +92,7 @@ def simple_2model_continuous():
tau = 1.3
with Model() as model:
x = pm.Normal("x", mu, tau=tau, testval=0.1)
- pm.Deterministic("logx", tt.log(x))
+ pm.Deterministic("logx", aet.log(x))
pm.Beta("y", alpha=1, beta=1, shape=2)
return model.test_point, model
@@ -104,8 +104,8 @@ def mv_simple():
with pm.Model() as model:
pm.MvNormal(
"x",
- tt.constant(mu),
- tau=tt.constant(tau),
+ aet.constant(mu),
+ tau=aet.constant(tau),
shape=3,
testval=floatX_array([0.1, 1.0, 0.8]),
)
@@ -121,8 +121,8 @@ def mv_simple_coarse():
with pm.Model() as model:
pm.MvNormal(
"x",
- tt.constant(mu),
- tau=tt.constant(tau),
+ aet.constant(mu),
+ tau=aet.constant(tau),
shape=3,
testval=floatX_array([0.1, 1.0, 0.8]),
)
@@ -138,8 +138,8 @@ def mv_simple_very_coarse():
with pm.Model() as model:
pm.MvNormal(
"x",
- tt.constant(mu),
- tau=tt.constant(tau),
+ aet.constant(mu),
+ tau=aet.constant(tau),
shape=3,
testval=floatX_array([0.1, 1.0, 0.8]),
)
@@ -153,7 +153,7 @@ def mv_simple_discrete():
n = 5
p = floatX_array([0.15, 0.85])
with pm.Model() as model:
- pm.Multinomial("x", n, tt.constant(p), shape=d, testval=np.array([1, 4]))
+ pm.Multinomial("x", n, aet.constant(p), shape=d, testval=np.array([1, 4]))
mu = n * p
# covariance matrix
C = np.zeros((d, d))
diff --git a/pymc3/tests/sampler_fixtures.py b/pymc3/tests/sampler_fixtures.py
index fcf66f1556..aacb3fb3ab 100644
--- a/pymc3/tests/sampler_fixtures.py
+++ b/pymc3/tests/sampler_fixtures.py
@@ -12,10 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara.tensor as aet
import arviz as az
import numpy as np
import numpy.testing as npt
-import theano.tensor as tt
from scipy import stats
@@ -124,9 +124,9 @@ def make_model(cls):
sd_dist = pm.Lognormal.dist(mu=sd_mu, sigma=sd_mu / 10.0, shape=5)
chol_packed = pm.LKJCholeskyCov("chol_packed", eta=3, n=5, sd_dist=sd_dist)
chol = pm.expand_packed_triangular(5, chol_packed, lower=True)
- cov = tt.dot(chol, chol.T)
- stds = tt.sqrt(tt.diag(cov))
- pm.Deterministic("log_stds", tt.log(stds))
+ cov = aet.dot(chol, chol.T)
+ stds = aet.sqrt(aet.diag(cov))
+ pm.Deterministic("log_stds", aet.log(stds))
corr = cov / stds[None, :] / stds[:, None]
corr_entries_unit = (corr[np.tril_indices(5, -1)] + 1) / 2
pm.Deterministic("corr_entries_unit", corr_entries_unit)
diff --git a/pymc3/tests/test_theanof.py b/pymc3/tests/test_aesaraf.py
similarity index 90%
rename from pymc3/tests/test_theanof.py
rename to pymc3/tests/test_aesaraf.py
index d54aed680d..1b591e0a85 100644
--- a/pymc3/tests/test_theanof.py
+++ b/pymc3/tests/test_aesaraf.py
@@ -14,15 +14,17 @@
from itertools import product
+import aesara
+import aesara.tensor as aet
import numpy as np
import pytest
-import theano
-import theano.tensor as tt
-from pymc3.theanof import _conversion_map, take_along_axis
+from aesara.tensor.type import TensorType
+
+from pymc3.aesaraf import _conversion_map, take_along_axis
from pymc3.vartypes import int_types
-FLOATX = str(theano.config.floatX)
+FLOATX = str(aesara.config.floatX)
INTX = str(_conversion_map[FLOATX])
@@ -78,8 +80,8 @@ def setup_class(self):
def _input_tensors(self, shape):
ndim = len(shape)
- arr = tt.TensorType(FLOATX, [False] * ndim)("arr")
- indices = tt.TensorType(INTX, [False] * ndim)("indices")
+ arr = TensorType(FLOATX, [False] * ndim)("arr")
+ indices = TensorType(INTX, [False] * ndim)("indices")
arr.tag.test_value = np.zeros(shape, dtype=FLOATX)
indices.tag.test_value = np.zeros(shape, dtype=INTX)
return arr, indices
@@ -107,7 +109,7 @@ def get_output_tensors(self, shape, axis):
return out
def _function(self, arr, indices, out):
- return theano.function([arr, indices], [out])
+ return aesara.function([arr, indices], [out])
def get_function(self, shape, axis):
ndim = len(shape)
@@ -181,13 +183,13 @@ def test_take_along_axis_grad(self, shape, axis, samples):
_axis = len(shape) + axis
else:
_axis = axis
- # Setup the theano function
+ # Setup the aesara function
t_arr, t_indices = self.get_input_tensors(shape)
- t_out2 = theano.grad(
- tt.sum(self._output_tensor(t_arr ** 2, t_indices, axis)),
+ t_out2 = aesara.grad(
+ aet.sum(self._output_tensor(t_arr ** 2, t_indices, axis)),
t_arr,
)
- func = theano.function([t_arr, t_indices], [t_out2])
+ func = aesara.function([t_arr, t_indices], [t_out2])
# Test that the gradient gives the same output as what is expected
arr, indices = self.get_input_values(shape, axis, samples)
@@ -209,16 +211,16 @@ def test_axis_failure(self, axis):
take_along_axis(arr, indices, axis=axis)
def test_ndim_failure(self):
- arr = tt.TensorType(FLOATX, [False] * 3)("arr")
- indices = tt.TensorType(INTX, [False] * 2)("indices")
+ arr = TensorType(FLOATX, [False] * 3)("arr")
+ indices = TensorType(INTX, [False] * 2)("indices")
arr.tag.test_value = np.zeros((1,) * arr.ndim, dtype=FLOATX)
indices.tag.test_value = np.zeros((1,) * indices.ndim, dtype=INTX)
with pytest.raises(ValueError):
take_along_axis(arr, indices)
def test_dtype_failure(self):
- arr = tt.TensorType(FLOATX, [False] * 3)("arr")
- indices = tt.TensorType(FLOATX, [False] * 3)("indices")
+ arr = TensorType(FLOATX, [False] * 3)("arr")
+ indices = TensorType(FLOATX, [False] * 3)("indices")
arr.tag.test_value = np.zeros((1,) * arr.ndim, dtype=FLOATX)
indices.tag.test_value = np.zeros((1,) * indices.ndim, dtype=FLOATX)
with pytest.raises(IndexError):
diff --git a/pymc3/tests/test_data_container.py b/pymc3/tests/test_data_container.py
index 966ce47cd6..fb4a355749 100644
--- a/pymc3/tests/test_data_container.py
+++ b/pymc3/tests/test_data_container.py
@@ -16,12 +16,12 @@
import pandas as pd
import pytest
-from theano import shared
+from aesara import shared
import pymc3 as pm
+from pymc3.aesaraf import floatX
from pymc3.tests.helpers import SeededTest
-from pymc3.theanof import floatX
class TestData(SeededTest):
diff --git a/pymc3/tests/test_dist_math.py b/pymc3/tests/test_dist_math.py
index de9bbd5b7e..f3b193b8a1 100644
--- a/pymc3/tests/test_dist_math.py
+++ b/pymc3/tests/test_dist_math.py
@@ -13,16 +13,17 @@
# limitations under the License.
import sys
+import aesara
+import aesara.tensor as aet
import numpy as np
import numpy.testing as npt
import pytest
-import theano
-import theano.tensor as tt
from scipy import interpolate, stats
import pymc3 as pm
+from pymc3.aesaraf import floatX
from pymc3.distributions import Discrete
from pymc3.distributions.dist_math import (
MvNormalLogp,
@@ -34,28 +35,27 @@
i0e,
)
from pymc3.tests.helpers import verify_grad
-from pymc3.theanof import floatX
def test_bound():
- logp = tt.ones((10, 10))
- cond = tt.ones((10, 10))
+ logp = aet.ones((10, 10))
+ cond = aet.ones((10, 10))
assert np.all(bound(logp, cond).eval() == logp.eval())
- logp = tt.ones((10, 10))
- cond = tt.zeros((10, 10))
+ logp = aet.ones((10, 10))
+ cond = aet.zeros((10, 10))
assert np.all(bound(logp, cond).eval() == (-np.inf * logp).eval())
- logp = tt.ones((10, 10))
+ logp = aet.ones((10, 10))
cond = True
assert np.all(bound(logp, cond).eval() == logp.eval())
- logp = tt.ones(3)
+ logp = aet.ones(3)
cond = np.array([1, 0, 1])
assert not np.all(bound(logp, cond).eval() == 1)
assert np.prod(bound(logp, cond).eval()) == -np.inf
- logp = tt.ones((2, 3))
+ logp = aet.ones((2, 3))
cond = np.array([[1, 1, 1], [1, 0, 1]])
assert not np.all(bound(logp, cond).eval() == 1)
assert np.prod(bound(logp, cond).eval()) == -np.inf
@@ -63,7 +63,7 @@ def test_bound():
def test_check_bounds_false():
with pm.Model(check_bounds=False):
- logp = tt.ones(3)
+ logp = aet.ones(3)
cond = np.array([1, 0, 1])
assert np.all(bound(logp, cond).eval() == logp.eval())
@@ -71,21 +71,21 @@ def test_check_bounds_false():
def test_alltrue_scalar():
assert alltrue_scalar([]).eval()
assert alltrue_scalar([True]).eval()
- assert alltrue_scalar([tt.ones(10)]).eval()
- assert alltrue_scalar([tt.ones(10), 5 * tt.ones(101)]).eval()
- assert alltrue_scalar([np.ones(10), 5 * tt.ones(101)]).eval()
- assert alltrue_scalar([np.ones(10), True, 5 * tt.ones(101)]).eval()
- assert alltrue_scalar([np.array([1, 2, 3]), True, 5 * tt.ones(101)]).eval()
+ assert alltrue_scalar([aet.ones(10)]).eval()
+ assert alltrue_scalar([aet.ones(10), 5 * aet.ones(101)]).eval()
+ assert alltrue_scalar([np.ones(10), 5 * aet.ones(101)]).eval()
+ assert alltrue_scalar([np.ones(10), True, 5 * aet.ones(101)]).eval()
+ assert alltrue_scalar([np.array([1, 2, 3]), True, 5 * aet.ones(101)]).eval()
assert not alltrue_scalar([False]).eval()
- assert not alltrue_scalar([tt.zeros(10)]).eval()
+ assert not alltrue_scalar([aet.zeros(10)]).eval()
assert not alltrue_scalar([True, False]).eval()
- assert not alltrue_scalar([np.array([0, -1]), tt.ones(60)]).eval()
- assert not alltrue_scalar([np.ones(10), False, 5 * tt.ones(101)]).eval()
+ assert not alltrue_scalar([np.array([0, -1]), aet.ones(60)]).eval()
+ assert not alltrue_scalar([np.ones(10), False, 5 * aet.ones(101)]).eval()
def test_alltrue_shape():
- vals = [True, tt.ones(10), tt.zeros(5)]
+ vals = [True, aet.ones(10), aet.zeros(5)]
assert alltrue_scalar(vals).eval().shape == ()
@@ -102,11 +102,11 @@ def logp(self, value):
p = self.p
return bound(
- factln(n) - factln(value).sum() + (value * tt.log(p)).sum(),
+ factln(n) - factln(value).sum() + (value * aet.log(p)).sum(),
value >= 0,
0 <= p,
p <= 1,
- tt.isclose(p.sum(), 1),
+ aet.isclose(p.sum(), 1),
broadcast_conditions=False,
)
@@ -123,11 +123,11 @@ def logp(self, value):
p = self.p
return bound(
- factln(n) - factln(value).sum() + (value * tt.log(p)).sum(),
- tt.all(value >= 0),
- tt.all(0 <= p),
- tt.all(p <= 1),
- tt.isclose(p.sum(), 1),
+ factln(n) - factln(value).sum() + (value * aet.log(p)).sum(),
+ aet.all(value >= 0),
+ aet.all(0 <= p),
+ aet.all(p <= 1),
+ aet.isclose(p.sum(), 1),
broadcast_conditions=False,
)
@@ -156,30 +156,30 @@ def test_logp(self):
chol_val = floatX(np.array([[1, 0.9], [0, 2]]))
cov_val = floatX(np.dot(chol_val, chol_val.T))
- cov = tt.matrix("cov")
+ cov = aet.matrix("cov")
cov.tag.test_value = cov_val
delta_val = floatX(np.random.randn(5, 2))
- delta = tt.matrix("delta")
+ delta = aet.matrix("delta")
delta.tag.test_value = delta_val
expect = stats.multivariate_normal(mean=np.zeros(2), cov=cov_val)
expect = expect.logpdf(delta_val).sum()
logp = MvNormalLogp()(cov, delta)
- logp_f = theano.function([cov, delta], logp)
+ logp_f = aesara.function([cov, delta], logp)
logp = logp_f(cov_val, delta_val)
npt.assert_allclose(logp, expect)
- @theano.config.change_flags(compute_test_value="ignore")
+ @aesara.config.change_flags(compute_test_value="ignore")
def test_grad(self):
np.random.seed(42)
def func(chol_vec, delta):
- chol = tt.stack(
+ chol = aet.stack(
[
- tt.stack([tt.exp(0.1 * chol_vec[0]), 0]),
- tt.stack([chol_vec[1], 2 * tt.exp(chol_vec[2])]),
+ aet.stack([aet.exp(0.1 * chol_vec[0]), 0]),
+ aet.stack([chol_vec[1], 2 * aet.exp(chol_vec[2])]),
]
)
- cov = tt.dot(chol, chol.T)
+ cov = aet.dot(chol, chol.T)
return MvNormalLogp()(cov, delta)
chol_vec_val = floatX(np.array([0.5, 1.0, -0.1]))
@@ -190,46 +190,46 @@ def func(chol_vec, delta):
delta_val = floatX(np.random.randn(5, 2))
verify_grad(func, [chol_vec_val, delta_val])
- @pytest.mark.skip(reason="Fix in theano not released yet: Theano#5908")
- @theano.config.change_flags(compute_test_value="ignore")
+ @pytest.mark.skip(reason="Fix in aesara not released yet: Theano#5908")
+ @aesara.config.change_flags(compute_test_value="ignore")
def test_hessian(self):
- chol_vec = tt.vector("chol_vec")
+ chol_vec = aet.vector("chol_vec")
chol_vec.tag.test_value = np.array([0.1, 2, 3])
- chol = tt.stack(
+ chol = aet.stack(
[
- tt.stack([tt.exp(0.1 * chol_vec[0]), 0]),
- tt.stack([chol_vec[1], 2 * tt.exp(chol_vec[2])]),
+ aet.stack([aet.exp(0.1 * chol_vec[0]), 0]),
+ aet.stack([chol_vec[1], 2 * aet.exp(chol_vec[2])]),
]
)
- cov = tt.dot(chol, chol.T)
- delta = tt.matrix("delta")
+ cov = aet.dot(chol, chol.T)
+ delta = aet.matrix("delta")
delta.tag.test_value = np.ones((5, 2))
logp = MvNormalLogp()(cov, delta)
- g_cov, g_delta = tt.grad(logp, [cov, delta])
- tt.grad(g_delta.sum() + g_cov.sum(), [delta, cov])
+ g_cov, g_delta = aet.grad(logp, [cov, delta])
+ aet.grad(g_delta.sum() + g_cov.sum(), [delta, cov])
class TestSplineWrapper:
- @theano.config.change_flags(compute_test_value="ignore")
+ @aesara.config.change_flags(compute_test_value="ignore")
def test_grad(self):
x = np.linspace(0, 1, 100)
y = x * x
spline = SplineWrapper(interpolate.InterpolatedUnivariateSpline(x, y, k=1))
verify_grad(spline, [0.5])
- @theano.config.change_flags(compute_test_value="ignore")
+ @aesara.config.change_flags(compute_test_value="ignore")
def test_hessian(self):
x = np.linspace(0, 1, 100)
y = x * x
spline = SplineWrapper(interpolate.InterpolatedUnivariateSpline(x, y, k=1))
- x_var = tt.dscalar("x")
- (g_x,) = tt.grad(spline(x_var), [x_var])
+ x_var = aet.dscalar("x")
+ (g_x,) = aet.grad(spline(x_var), [x_var])
with pytest.raises(NotImplementedError):
- tt.grad(g_x, [x_var])
+ aet.grad(g_x, [x_var])
class TestI0e:
- @theano.config.change_flags(compute_test_value="ignore")
+ @aesara.config.change_flags(compute_test_value="ignore")
def test_grad(self):
verify_grad(i0e, [0.5])
verify_grad(i0e, [-2.0])
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 06efc90b8d..f26b6743b8 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -15,14 +15,15 @@
import itertools
import sys
+import aesara
+import aesara.tensor as aet
import numpy as np
import numpy.random as nr
import pytest
import scipy.stats
import scipy.stats.distributions as sp
-import theano
-import theano.tensor as tt
+from aesara.tensor.var import TensorVariable
from numpy import array, exp, inf, log
from numpy.testing import assert_allclose, assert_almost_equal, assert_equal
from packaging.version import parse
@@ -32,6 +33,7 @@
import pymc3 as pm
+from pymc3.aesaraf import floatX
from pymc3.blocking import DictToVarBijection
from pymc3.distributions import (
AR1,
@@ -98,7 +100,6 @@
from pymc3.math import kronecker, logsumexp
from pymc3.model import Deterministic, Model, Point
from pymc3.tests.helpers import select_by_precision
-from pymc3.theanof import floatX
from pymc3.vartypes import continuous_types
SCIPY_VERSION = parse(scipy_version)
@@ -126,7 +127,7 @@ class Domain:
def __init__(self, vals, dtype=None, edges=None, shape=None):
avals = array(vals, dtype=dtype)
if dtype is None and not str(avals.dtype).startswith("int"):
- avals = avals.astype(theano.config.floatX)
+ avals = avals.astype(aesara.config.floatX)
vals = [array(v, dtype=avals.dtype) for v in vals]
if edges is None:
@@ -941,7 +942,7 @@ def test_chi_squared(self):
)
@pytest.mark.xfail(
- condition=(theano.config.floatX == "float32"),
+ condition=(aesara.config.floatX == "float32"),
reason="Poor CDF in SciPy. See scipy/scipy#869 for details.",
)
def test_wald_scipy(self):
@@ -1240,12 +1241,12 @@ def test_fun(value, mu, sigma):
)
@pytest.mark.xfail(
- condition=(theano.config.floatX == "float32"),
+ condition=(aesara.config.floatX == "float32"),
reason="Fails on float32 due to numerical issues",
)
def test_gamma_logcdf(self):
- # pymc-devs/Theano-PyMC#224: skip_paramdomain_outside_edge_test has to be set
- # True to avoid triggering a C-level assertion in the Theano GammaQ function
+ # pymc-devs/aesara#224: skip_paramdomain_outside_edge_test has to be set
+ # True to avoid triggering a C-level assertion in the Aesara GammaQ function
# in gamma.c file. Can be set back to False (default) once that issue is solved
self.check_logcdf(
Gamma,
@@ -1256,7 +1257,7 @@ def test_gamma_logcdf(self):
)
@pytest.mark.xfail(
- condition=(theano.config.floatX == "float32"),
+ condition=(aesara.config.floatX == "float32"),
reason="Fails on float32 due to numerical issues",
)
def test_inverse_gamma(self):
@@ -1266,8 +1267,8 @@ def test_inverse_gamma(self):
{"alpha": Rplus, "beta": Rplus},
lambda value, alpha, beta: sp.invgamma.logpdf(value, alpha, scale=beta),
)
- # pymc-devs/Theano-PyMC#224: skip_paramdomain_outside_edge_test has to be set
- # True to avoid triggering a C-level assertion in the Theano GammaQ function
+ # pymc-devs/aesara#224: skip_paramdomain_outside_edge_test has to be set
+ # True to avoid triggering a C-level assertion in the Aesara GammaQ function
# in gamma.c file. Can be set back to False (default) once that issue is solved
self.check_logcdf(
InverseGamma,
@@ -1278,7 +1279,7 @@ def test_inverse_gamma(self):
)
@pytest.mark.xfail(
- condition=(theano.config.floatX == "float32"),
+ condition=(aesara.config.floatX == "float32"),
reason="Fails on float32 due to scaling issues",
)
def test_inverse_gamma_alt_params(self):
@@ -1309,7 +1310,7 @@ def test_pareto(self):
)
@pytest.mark.xfail(
- condition=(theano.config.floatX == "float32"),
+ condition=(aesara.config.floatX == "float32"),
reason="Fails on float32 due to inf issues",
)
def test_weibull(self):
@@ -1366,7 +1367,7 @@ def test_binomial(self):
)
# Too lazy to propagate decimal parameter through the whole chain of deps
- @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+ @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
@pytest.mark.xfail(
condition=(SCIPY_VERSION < parse("1.4.0")), reason="betabinom is new in Scipy 1.4.0"
)
@@ -1474,7 +1475,7 @@ def test_constantdist(self):
self.check_logp(Constant, I, {"c": I}, lambda value, c: np.log(c == value))
# Too lazy to propagate decimal parameter through the whole chain of deps
- @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+ @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
def test_zeroinflatedpoisson(self):
self.checkd(
ZeroInflatedPoisson,
@@ -1488,7 +1489,7 @@ def test_zeroinflatedpoisson(self):
)
# Too lazy to propagate decimal parameter through the whole chain of deps
- @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+ @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
def test_zeroinflatednegativebinomial(self):
self.checkd(
ZeroInflatedNegativeBinomial,
@@ -1503,7 +1504,7 @@ def test_zeroinflatednegativebinomial(self):
)
# Too lazy to propagate decimal parameter through the whole chain of deps
- @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+ @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
def test_zeroinflatedbinomial(self):
self.checkd(
ZeroInflatedBinomial,
@@ -1570,28 +1571,28 @@ def MvNormalUpper(*args, **kwargs):
)
@pytest.mark.xfail(
- condition=(theano.config.floatX == "float32"),
+ condition=(aesara.config.floatX == "float32"),
reason="Fails on float32 due to inf issues",
)
def test_mvnormal_indef(self):
cov_val = np.array([[1, 0.5], [0.5, -2]])
- cov = tt.matrix("cov")
+ cov = aet.matrix("cov")
cov.tag.test_value = np.eye(2)
mu = floatX(np.zeros(2))
- x = tt.vector("x")
+ x = aet.vector("x")
x.tag.test_value = np.zeros(2)
logp = MvNormal.dist(mu=mu, cov=cov).logp(x)
- f_logp = theano.function([cov, x], logp)
+ f_logp = aesara.function([cov, x], logp)
assert f_logp(cov_val, np.ones(2)) == -np.inf
- dlogp = tt.grad(logp, cov)
- f_dlogp = theano.function([cov, x], dlogp)
+ dlogp = aet.grad(logp, cov)
+ f_dlogp = aesara.function([cov, x], dlogp)
assert not np.all(np.isfinite(f_dlogp(cov_val, np.ones(2))))
logp = MvNormal.dist(mu=mu, tau=cov).logp(x)
- f_logp = theano.function([cov, x], logp)
+ f_logp = aesara.function([cov, x], logp)
assert f_logp(cov_val, np.ones(2)) == -np.inf
- dlogp = tt.grad(logp, cov)
- f_dlogp = theano.function([cov, x], dlogp)
+ dlogp = aet.grad(logp, cov)
+ f_dlogp = aesara.function([cov, x], dlogp)
assert not np.all(np.isfinite(f_dlogp(cov_val, np.ones(2))))
def test_mvnormal_init_fail(self):
@@ -1778,13 +1779,13 @@ def test_dirichlet_with_batch_shapes(self, dist_shape):
assert_almost_equal(pymc3_res[idx], scipy_res)
def test_dirichlet_shape(self):
- a = tt.as_tensor_variable(np.r_[1, 2])
+ a = aet.as_tensor_variable(np.r_[1, 2])
with pytest.warns(DeprecationWarning):
dir_rv = Dirichlet.dist(a)
assert dir_rv.shape == (2,)
- with pytest.warns(DeprecationWarning), theano.change_flags(compute_test_value="ignore"):
- dir_rv = Dirichlet.dist(tt.vector())
+ with pytest.warns(DeprecationWarning), aesara.change_flags(compute_test_value="ignore"):
+ dir_rv = Dirichlet.dist(aet.vector())
def test_dirichlet_2D(self):
self.check_logp(
@@ -1925,16 +1926,16 @@ def test_multinomial_vec_2d_p(self):
def test_batch_multinomial(self):
n = 10
vals = np.zeros((4, 5, 3), dtype="int32")
- p = np.zeros_like(vals, dtype=theano.config.floatX)
+ p = np.zeros_like(vals, dtype=aesara.config.floatX)
inds = np.random.randint(vals.shape[-1], size=vals.shape[:-1])[..., None]
np.put_along_axis(vals, inds, n, axis=-1)
np.put_along_axis(p, inds, 1, axis=-1)
dist = Multinomial.dist(n=n, p=p, shape=vals.shape)
- value = tt.tensor3(dtype="int32")
+ value = aet.tensor3(dtype="int32")
value.tag.test_value = np.zeros_like(vals, dtype="int32")
- logp = tt.exp(dist.logp(value))
- f = theano.function(inputs=[value], outputs=logp)
+ logp = aet.exp(dist.logp(value))
+ f = aesara.function(inputs=[value], outputs=logp)
assert_almost_equal(
f(vals),
np.ones(vals.shape[:-1] + (1,)),
@@ -2063,7 +2064,7 @@ def test_batch_dirichlet_multinomial(self):
# except for one category / dimension which is given the value of 1000
n = 5
vals = np.zeros((4, 5, 3), dtype="int32")
- a = np.zeros_like(vals, dtype=theano.config.floatX) + 0.001
+ a = np.zeros_like(vals, dtype=aesara.config.floatX) + 0.001
inds = np.random.randint(vals.shape[-1], size=vals.shape[:-1])[..., None]
np.put_along_axis(vals, inds, n, axis=-1)
np.put_along_axis(a, inds, 1000, axis=-1)
@@ -2213,7 +2214,7 @@ def test_ex_gaussian_cdf_outside_edges(self):
skip_paramdomain_inside_edge_test=True, # Valid values are tested above
)
- @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+ @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
def test_vonmises(self):
self.check_logp(
VonMises,
@@ -2278,7 +2279,7 @@ def test_rice(self):
lambda value, b, sigma: sp.rice.logpdf(value, b=b, loc=0, scale=sigma),
)
- @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+ @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
def test_moyal(self):
self.check_logp(
Moyal,
@@ -2293,7 +2294,7 @@ def test_moyal(self):
lambda value, mu, sigma: floatX(sp.moyal.logcdf(value, mu, sigma)),
)
- @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+ @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
def test_interpolated(self):
for mu in R.vals:
for sigma in Rplus.vals:
@@ -2352,8 +2353,8 @@ def test_bound():
a = ArrayNormal("c", shape=2)
assert_equal(a.tag.test_value, np.array([1.5, 2.5]))
- lower = tt.vector("lower")
- lower.tag.test_value = np.array([1, 2]).astype(theano.config.floatX)
+ lower = aet.vector("lower")
+ lower.tag.test_value = np.array([1, 2]).astype(aesara.config.floatX)
upper = 3
ArrayNormal = Bound(Normal, lower=lower, upper=upper)
dist = ArrayNormal.dist(mu=0, sigma=1, shape=2)
@@ -2421,7 +2422,7 @@ def setup_class(self):
nb2 = pm.NegativeBinomial("nb_with_p_n", p=pm.Uniform("nbp"), n=10)
# Expected value of outcome
- mu = Deterministic("mu", floatX(alpha + tt.dot(X, b)))
+ mu = Deterministic("mu", floatX(alpha + aet.dot(X, b)))
# add a bounded variable as well
bound_var = Bound(Normal, lower=1.0)("bound_var", mu=0, sigma=10)
@@ -2582,7 +2583,7 @@ def test_issue_3051(self, dims, dist_cls, kwargs):
X = np.random.normal(size=(20, dims))
actual_t = d.logp(X)
- assert isinstance(actual_t, tt.TensorVariable)
+ assert isinstance(actual_t, TensorVariable)
actual_a = actual_t.eval()
assert isinstance(actual_a, np.ndarray)
assert actual_a.shape == (X.shape[0],)
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
index a56f3f3b7b..684f1898ac 100644
--- a/pymc3/tests/test_distributions_random.py
+++ b/pymc3/tests/test_distributions_random.py
@@ -17,12 +17,12 @@
from contextlib import ExitStack as does_not_raise
+import aesara
import numpy as np
import numpy.random as nr
import numpy.testing as npt
import pytest
import scipy.stats as st
-import theano
from scipy import linalg
from scipy.special import expit
@@ -1127,7 +1127,7 @@ def ref_rand(size, mu, sigma):
pymc3_random(pm.Moyal, {"mu": R, "sigma": Rplus}, ref_rand=ref_rand)
- @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+ @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
def test_interpolated(self):
for mu in R.vals:
for sigma in Rplus.vals:
diff --git a/pymc3/tests/test_distributions_timeseries.py b/pymc3/tests/test_distributions_timeseries.py
index b1401bd90e..8319cde654 100644
--- a/pymc3/tests/test_distributions_timeseries.py
+++ b/pymc3/tests/test_distributions_timeseries.py
@@ -15,6 +15,7 @@
import numpy as np
import pytest
+from pymc3.aesaraf import floatX
from pymc3.distributions.continuous import Flat, Normal
from pymc3.distributions.timeseries import AR, AR1, GARCH11, EulerMaruyama
from pymc3.model import Model
@@ -24,7 +25,6 @@
sample_posterior_predictive,
)
from pymc3.tests.helpers import select_by_precision
-from pymc3.theanof import floatX
pytestmark = pytest.mark.usefixtures("seeded_test")
diff --git a/pymc3/tests/test_examples.py b/pymc3/tests/test_examples.py
index d79093b392..5cb6c9c8c2 100644
--- a/pymc3/tests/test_examples.py
+++ b/pymc3/tests/test_examples.py
@@ -12,20 +12,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara
+import aesara.tensor as aet
import arviz as az
import matplotlib
import numpy as np
import pandas as pd
import pytest
-import theano
-import theano.tensor as tt
from packaging import version
import pymc3 as pm
+from pymc3.aesaraf import floatX
from pymc3.tests.helpers import SeededTest
-from pymc3.theanof import floatX
if version.parse(matplotlib.__version__) < version.parse("3.3"):
matplotlib.use("Agg", warn=False)
@@ -68,7 +68,7 @@ def build_model(self):
with pm.Model() as model:
effects = pm.Normal("effects", mu=0, sigma=100, shape=len(P.columns))
- logit_p = tt.dot(floatX(np.array(P)), effects)
+ logit_p = aet.dot(floatX(np.array(P)), effects)
pm.Bernoulli("s", logit_p=logit_p, observed=floatX(data.switch.values))
return model
@@ -186,13 +186,13 @@ def build_disaster_model(masked=False):
# Allocate appropriate Poisson rates to years before and after current
# switchpoint location
idx = np.arange(years)
- rate = tt.switch(switchpoint >= idx, early_mean, late_mean)
+ rate = aet.switch(switchpoint >= idx, early_mean, late_mean)
# Data likelihood
pm.Poisson("disasters", rate, observed=disasters_data)
return model
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+@pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
class TestDisasterModel(SeededTest):
# Time series of recorded coal mining disasters in the UK from 1851 to 1962
def test_disaster_model(self):
@@ -294,7 +294,7 @@ def test_run(self):
@pytest.mark.xfail(
- condition=(theano.config.floatX == "float32"),
+ condition=(aesara.config.floatX == "float32"),
reason="Fails on float32 due to starting inf at starting logP",
)
class TestRSV(SeededTest):
diff --git a/pymc3/tests/test_gp.py b/pymc3/tests/test_gp.py
index 893aeeaf77..77f4261bc4 100644
--- a/pymc3/tests/test_gp.py
+++ b/pymc3/tests/test_gp.py
@@ -16,11 +16,11 @@
from functools import reduce
from operator import add
+import aesara
+import aesara.tensor as aet
import numpy as np
import numpy.testing as npt
import pytest
-import theano
-import theano.tensor as tt
import pymc3 as pm
@@ -34,7 +34,7 @@ def test_value(self):
X = np.linspace(0, 1, 10)[:, None]
with pm.Model() as model:
zero_mean = pm.gp.mean.Zero()
- M = theano.function([], zero_mean(X))()
+ M = aesara.function([], zero_mean(X))()
assert np.all(M == 0)
assert M.shape == (10,)
@@ -44,7 +44,7 @@ def test_value(self):
X = np.linspace(0, 1, 10)[:, None]
with pm.Model() as model:
const_mean = pm.gp.mean.Constant(6)
- M = theano.function([], const_mean(X))()
+ M = aesara.function([], const_mean(X))()
assert np.all(M == 6)
assert M.shape == (10,)
@@ -54,7 +54,7 @@ def test_value(self):
X = np.linspace(0, 1, 10)[:, None]
with pm.Model() as model:
linear_mean = pm.gp.mean.Linear(2, 0.5)
- M = theano.function([], linear_mean(X))()
+ M = aesara.function([], linear_mean(X))()
npt.assert_allclose(M[1], 0.7222, atol=1e-3)
assert M.shape == (10,)
@@ -66,7 +66,7 @@ def test_add(self):
mean1 = pm.gp.mean.Linear(coeffs=2, intercept=0.5)
mean2 = pm.gp.mean.Constant(2)
mean = mean1 + mean2 + mean2
- M = theano.function([], mean(X))()
+ M = aesara.function([], mean(X))()
npt.assert_allclose(M[1], 0.7222 + 2 + 2, atol=1e-3)
def test_prod(self):
@@ -75,7 +75,7 @@ def test_prod(self):
mean1 = pm.gp.mean.Linear(coeffs=2, intercept=0.5)
mean2 = pm.gp.mean.Constant(2)
mean = mean1 * mean2 * mean2
- M = theano.function([], mean(X))()
+ M = aesara.function([], mean(X))()
npt.assert_allclose(M[1], 0.7222 * 2 * 2, atol=1e-3)
def test_add_multid(self):
@@ -86,7 +86,7 @@ def test_add_multid(self):
mean1 = pm.gp.mean.Linear(coeffs=A, intercept=b)
mean2 = pm.gp.mean.Constant(2)
mean = mean1 + mean2 + mean2
- M = theano.function([], mean(X))()
+ M = aesara.function([], mean(X))()
npt.assert_allclose(M[1], 10.8965 + 2 + 2, atol=1e-3)
def test_prod_multid(self):
@@ -97,7 +97,7 @@ def test_prod_multid(self):
mean1 = pm.gp.mean.Linear(coeffs=A, intercept=b)
mean2 = pm.gp.mean.Constant(2)
mean = mean1 * mean2 * mean2
- M = theano.function([], mean(X))()
+ M = aesara.function([], mean(X))()
npt.assert_allclose(M[1], 10.8965 * 2 * 2, atol=1e-3)
@@ -108,10 +108,10 @@ def test_symadd_cov(self):
cov1 = pm.gp.cov.ExpQuad(1, 0.1)
cov2 = pm.gp.cov.ExpQuad(1, 0.1)
cov = cov1 + cov2
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 2 * 0.53940, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_rightadd_scalar(self):
@@ -119,10 +119,10 @@ def test_rightadd_scalar(self):
with pm.Model() as model:
a = 1
cov = pm.gp.cov.ExpQuad(1, 0.1) + a
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 1.53940, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_leftadd_scalar(self):
@@ -130,10 +130,10 @@ def test_leftadd_scalar(self):
with pm.Model() as model:
a = 1
cov = a + pm.gp.cov.ExpQuad(1, 0.1)
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 1.53940, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_rightadd_matrix(self):
@@ -141,21 +141,21 @@ def test_rightadd_matrix(self):
M = 2 * np.ones((10, 10))
with pm.Model() as model:
cov = pm.gp.cov.ExpQuad(1, 0.1) + M
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 2.53940, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_leftadd_matrixt(self):
X = np.linspace(0, 1, 10)[:, None]
- M = 2 * tt.ones((10, 10))
+ M = 2 * aet.ones((10, 10))
with pm.Model() as model:
cov = M + pm.gp.cov.ExpQuad(1, 0.1)
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 2.53940, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_leftprod_matrix(self):
@@ -164,8 +164,8 @@ def test_leftprod_matrix(self):
with pm.Model() as model:
cov = M + pm.gp.cov.ExpQuad(1, 0.1)
cov_true = pm.gp.cov.ExpQuad(1, 0.1) + M
- K = theano.function([], cov(X))()
- K_true = theano.function([], cov_true(X))()
+ K = aesara.function([], cov(X))()
+ K_true = aesara.function([], cov_true(X))()
assert np.allclose(K, K_true)
def test_inv_rightadd(self):
@@ -181,10 +181,10 @@ def test_symprod_cov(self):
cov1 = pm.gp.cov.ExpQuad(1, 0.1)
cov2 = pm.gp.cov.ExpQuad(1, 0.1)
cov = cov1 * cov2
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.53940 * 0.53940, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_rightprod_scalar(self):
@@ -192,10 +192,10 @@ def test_rightprod_scalar(self):
with pm.Model() as model:
a = 2
cov = pm.gp.cov.ExpQuad(1, 0.1) * a
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 2 * 0.53940, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_leftprod_scalar(self):
@@ -203,10 +203,10 @@ def test_leftprod_scalar(self):
with pm.Model() as model:
a = 2
cov = a * pm.gp.cov.ExpQuad(1, 0.1)
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 2 * 0.53940, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_rightprod_matrix(self):
@@ -214,10 +214,10 @@ def test_rightprod_matrix(self):
M = 2 * np.ones((10, 10))
with pm.Model() as model:
cov = pm.gp.cov.ExpQuad(1, 0.1) * M
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 2 * 0.53940, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_leftprod_matrix(self):
@@ -226,8 +226,8 @@ def test_leftprod_matrix(self):
with pm.Model() as model:
cov = M * pm.gp.cov.ExpQuad(1, 0.1)
cov_true = pm.gp.cov.ExpQuad(1, 0.1) * M
- K = theano.function([], cov(X))()
- K_true = theano.function([], cov_true(X))()
+ K = aesara.function([], cov(X))()
+ K_true = aesara.function([], cov_true(X))()
assert np.allclose(K, K_true)
def test_multiops(self):
@@ -244,12 +244,12 @@ def test_multiops(self):
+ pm.gp.cov.ExpQuad(1, 0.1)
+ 3
)
- K1 = theano.function([], cov1(X))()
- K2 = theano.function([], cov2(X))()
+ K1 = aesara.function([], cov1(X))()
+ K2 = aesara.function([], cov2(X))()
assert np.allclose(K1, K2)
# check diagonal
- K1d = theano.function([], cov1(X, diag=True))()
- K2d = theano.function([], cov2(X, diag=True))()
+ K1d = aesara.function([], cov1(X, diag=True))()
+ K2d = aesara.function([], cov2(X, diag=True))()
npt.assert_allclose(np.diag(K1), K2d, atol=1e-5)
npt.assert_allclose(np.diag(K2), K1d, atol=1e-5)
@@ -265,10 +265,10 @@ def test_symexp_cov(self):
with pm.Model() as model:
cov1 = pm.gp.cov.ExpQuad(1, 0.1)
cov = cov1 ** 2
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.53940 ** 2, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_covexp_numpy(self):
@@ -276,32 +276,32 @@ def test_covexp_numpy(self):
with pm.Model() as model:
a = np.array([[2]])
cov = pm.gp.cov.ExpQuad(1, 0.1) ** a
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.53940 ** 2, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
- def test_covexp_theano(self):
+ def test_covexp_aesara(self):
X = np.linspace(0, 1, 10)[:, None]
with pm.Model() as model:
- a = tt.alloc(2.0, 1, 1)
+ a = aet.alloc(2.0, 1, 1)
cov = pm.gp.cov.ExpQuad(1, 0.1) ** a
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.53940 ** 2, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_covexp_shared(self):
X = np.linspace(0, 1, 10)[:, None]
with pm.Model() as model:
- a = theano.shared(2.0)
+ a = aesara.shared(2.0)
cov = pm.gp.cov.ExpQuad(1, 0.1) ** a
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.53940 ** 2, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_invalid_covexp(self):
@@ -321,11 +321,11 @@ def test_symprod_cov(self):
cov1 = pm.gp.cov.ExpQuad(1, 0.1)
cov2 = pm.gp.cov.ExpQuad(1, 0.1)
cov = pm.gp.cov.Kron([cov1, cov2])
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 1 * 0.53940, atol=1e-3)
npt.assert_allclose(K[0, 11], 0.53940 * 0.53940, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_multiops(self):
@@ -342,8 +342,8 @@ def test_multiops(self):
)
cov2 = pm.gp.cov.ExpQuad(1, 0.1) * pm.gp.cov.ExpQuad(2, 0.1)
cov = pm.gp.cov.Kron([cov1, cov2])
- K_true = kronecker(theano.function([], cov1(X1))(), theano.function([], cov2(X2))()).eval()
- K = theano.function([], cov(X))()
+ K_true = kronecker(aesara.function([], cov1(X1))(), aesara.function([], cov2(X2))()).eval()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K_true, K)
@@ -352,30 +352,30 @@ def test_slice1(self):
X = np.linspace(0, 1, 30).reshape(10, 3)
with pm.Model() as model:
cov = pm.gp.cov.ExpQuad(3, 0.1, active_dims=[0, 0, 1])
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.20084298, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_slice2(self):
X = np.linspace(0, 1, 30).reshape(10, 3)
with pm.Model() as model:
cov = pm.gp.cov.ExpQuad(3, ls=[0.1, 0.1], active_dims=[1, 2])
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.34295549, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_slice3(self):
X = np.linspace(0, 1, 30).reshape(10, 3)
with pm.Model() as model:
cov = pm.gp.cov.ExpQuad(3, ls=np.array([0.1, 0.1]), active_dims=[1, 2])
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.34295549, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_diffslice(self):
@@ -384,10 +384,10 @@ def test_diffslice(self):
cov = pm.gp.cov.ExpQuad(3, ls=0.1, active_dims=[1, 0, 0]) + pm.gp.cov.ExpQuad(
3, ls=[0.1, 0.2, 0.3]
)
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.683572, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_raises(self):
@@ -402,7 +402,7 @@ def test_stable(self):
X = np.random.uniform(low=320.0, high=400.0, size=[2000, 2])
with pm.Model() as model:
cov = pm.gp.cov.ExpQuad(2, 0.1)
- dists = theano.function([], cov.square_dist(X, X))()
+ dists = aesara.function([], cov.square_dist(X, X))()
assert not np.any(dists < 0)
@@ -411,44 +411,44 @@ def test_1d(self):
X = np.linspace(0, 1, 10)[:, None]
with pm.Model() as model:
cov = pm.gp.cov.ExpQuad(1, 0.1)
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.53940, atol=1e-3)
- K = theano.function([], cov(X, X))()
+ K = aesara.function([], cov(X, X))()
npt.assert_allclose(K[0, 1], 0.53940, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_2d(self):
X = np.linspace(0, 1, 10).reshape(5, 2)
with pm.Model() as model:
cov = pm.gp.cov.ExpQuad(2, 0.5)
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.820754, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_2dard(self):
X = np.linspace(0, 1, 10).reshape(5, 2)
with pm.Model() as model:
cov = pm.gp.cov.ExpQuad(2, np.array([1, 2]))
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.969607, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_inv_lengthscale(self):
X = np.linspace(0, 1, 10)[:, None]
with pm.Model() as model:
cov = pm.gp.cov.ExpQuad(1, ls_inv=10)
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.53940, atol=1e-3)
- K = theano.function([], cov(X, X))()
+ K = aesara.function([], cov(X, X))()
npt.assert_allclose(K[0, 1], 0.53940, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
@@ -457,14 +457,14 @@ def test_1d(self):
X = np.linspace(0, 1, 10)[:, None]
with pm.Model() as model:
cov = pm.gp.cov.WhiteNoise(sigma=0.5)
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.0, atol=1e-3)
npt.assert_allclose(K[0, 0], 0.5 ** 2, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
# check predict
- K = theano.function([], cov(X, X))()
+ K = aesara.function([], cov(X, X))()
npt.assert_allclose(K[0, 1], 0.0, atol=1e-3)
# white noise predicting should return all zeros
npt.assert_allclose(K[0, 0], 0.0, atol=1e-3)
@@ -475,14 +475,14 @@ def test_1d(self):
X = np.linspace(0, 1, 10)[:, None]
with pm.Model() as model:
cov = pm.gp.cov.Constant(2.5)
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 2.5, atol=1e-3)
npt.assert_allclose(K[0, 0], 2.5, atol=1e-3)
- K = theano.function([], cov(X, X))()
+ K = aesara.function([], cov(X, X))()
npt.assert_allclose(K[0, 1], 2.5, atol=1e-3)
npt.assert_allclose(K[0, 0], 2.5, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
@@ -491,12 +491,12 @@ def test_1d(self):
X = np.linspace(0, 1, 10)[:, None]
with pm.Model() as model:
cov = pm.gp.cov.RatQuad(1, ls=0.1, alpha=0.5)
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.66896, atol=1e-3)
- K = theano.function([], cov(X, X))()
+ K = aesara.function([], cov(X, X))()
npt.assert_allclose(K[0, 1], 0.66896, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
@@ -505,12 +505,12 @@ def test_1d(self):
X = np.linspace(0, 1, 10)[:, None]
with pm.Model() as model:
cov = pm.gp.cov.Exponential(1, 0.1)
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.57375, atol=1e-3)
- K = theano.function([], cov(X, X))()
+ K = aesara.function([], cov(X, X))()
npt.assert_allclose(K[0, 1], 0.57375, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
@@ -519,12 +519,12 @@ def test_1d(self):
X = np.linspace(0, 1, 10)[:, None]
with pm.Model() as model:
cov = pm.gp.cov.Matern52(1, 0.1)
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.46202, atol=1e-3)
- K = theano.function([], cov(X, X))()
+ K = aesara.function([], cov(X, X))()
npt.assert_allclose(K[0, 1], 0.46202, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
@@ -533,12 +533,12 @@ def test_1d(self):
X = np.linspace(0, 1, 10)[:, None]
with pm.Model() as model:
cov = pm.gp.cov.Matern32(1, 0.1)
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.42682, atol=1e-3)
- K = theano.function([], cov(X, X))()
+ K = aesara.function([], cov(X, X))()
npt.assert_allclose(K[0, 1], 0.42682, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
@@ -547,11 +547,11 @@ def test_1d(self):
X = np.linspace(0, 1, 10)[:, None]
with pm.Model() as model:
cov = pm.gp.cov.Matern12(1, 0.1)
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.32919, atol=1e-3)
- K = theano.function([], cov(X, X))()
+ K = aesara.function([], cov(X, X))()
npt.assert_allclose(K[0, 1], 0.32919, atol=1e-3)
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
@@ -560,12 +560,12 @@ def test_1d(self):
X = np.linspace(0, 1, 10)[:, None]
with pm.Model() as model:
cov = pm.gp.cov.Cosine(1, 0.1)
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.766, atol=1e-3)
- K = theano.function([], cov(X, X))()
+ K = aesara.function([], cov(X, X))()
npt.assert_allclose(K[0, 1], 0.766, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
@@ -574,12 +574,12 @@ def test_1d(self):
X = np.linspace(0, 1, 10)[:, None]
with pm.Model() as model:
cov = pm.gp.cov.Periodic(1, 0.1, 0.1)
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.00288, atol=1e-3)
- K = theano.function([], cov(X, X))()
+ K = aesara.function([], cov(X, X))()
npt.assert_allclose(K[0, 1], 0.00288, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
@@ -588,12 +588,12 @@ def test_1d(self):
X = np.linspace(0, 1, 10)[:, None]
with pm.Model() as model:
cov = pm.gp.cov.Linear(1, 0.5)
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.19444, atol=1e-3)
- K = theano.function([], cov(X, X))()
+ K = aesara.function([], cov(X, X))()
npt.assert_allclose(K[0, 1], 0.19444, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
@@ -602,12 +602,12 @@ def test_1d(self):
X = np.linspace(0, 1, 10)[:, None]
with pm.Model() as model:
cov = pm.gp.cov.Polynomial(1, 0.5, 2, 0)
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.03780, atol=1e-3)
- K = theano.function([], cov(X, X))()
+ K = aesara.function([], cov(X, X))()
npt.assert_allclose(K[0, 1], 0.03780, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
@@ -616,17 +616,17 @@ def test_1d(self):
X = np.linspace(0, 1, 10)[:, None]
def warp_func(x, a, b, c):
- return x + (a * tt.tanh(b * (x - c)))
+ return x + (a * aet.tanh(b * (x - c)))
with pm.Model() as model:
cov_m52 = pm.gp.cov.Matern52(1, 0.2)
cov = pm.gp.cov.WarpedInput(1, warp_func=warp_func, args=(1, 10, 1), cov_func=cov_m52)
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 0.79593, atol=1e-3)
- K = theano.function([], cov(X, X))()
+ K = aesara.function([], cov(X, X))()
npt.assert_allclose(K[0, 1], 0.79593, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_raises(self):
@@ -642,16 +642,16 @@ def test_1d(self):
X = np.linspace(0, 2, 10)[:, None]
def tanh_func(x, x1, x2, w, x0):
- return (x1 + x2) / 2.0 - (x1 - x2) / 2.0 * tt.tanh((x - x0) / w)
+ return (x1 + x2) / 2.0 - (x1 - x2) / 2.0 * aet.tanh((x - x0) / w)
with pm.Model() as model:
cov = pm.gp.cov.Gibbs(1, tanh_func, args=(0.05, 0.6, 0.4, 1.0))
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[2, 3], 0.136683, atol=1e-4)
- K = theano.function([], cov(X, X))()
+ K = aesara.function([], cov(X, X))()
npt.assert_allclose(K[2, 3], 0.136683, atol=1e-4)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_raises(self):
@@ -673,12 +673,12 @@ def scaling_func(x, a, b):
with pm.Model() as model:
cov_m52 = pm.gp.cov.Matern52(1, 0.2)
cov = pm.gp.cov.ScaledCov(1, scaling_func=scaling_func, args=(2, -1), cov_func=cov_m52)
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], 3.00686, atol=1e-3)
- K = theano.function([], cov(X, X))()
+ K = aesara.function([], cov(X, X))()
npt.assert_allclose(K[0, 1], 3.00686, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_raises(self):
@@ -1200,12 +1200,12 @@ def test_1d_tau1(self):
etalon = 0.600881
with pm.Model():
cov = pm.gp.cov.Circular(1, 1, tau=5)
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], etalon, atol=1e-3)
- K = theano.function([], cov(X, X))()
+ K = aesara.function([], cov(X, X))()
npt.assert_allclose(K[0, 1], etalon, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def test_1d_tau2(self):
@@ -1213,10 +1213,10 @@ def test_1d_tau2(self):
etalon = 0.691239
with pm.Model():
cov = pm.gp.cov.Circular(1, 1, tau=4)
- K = theano.function([], cov(X))()
+ K = aesara.function([], cov(X))()
npt.assert_allclose(K[0, 1], etalon, atol=1e-3)
- K = theano.function([], cov(X, X))()
+ K = aesara.function([], cov(X, X))()
npt.assert_allclose(K[0, 1], etalon, atol=1e-3)
# check diagonal
- Kd = theano.function([], cov(X, diag=True))()
+ Kd = aesara.function([], cov(X, diag=True))()
npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
diff --git a/pymc3/tests/test_hmc.py b/pymc3/tests/test_hmc.py
index 057c317075..1a113343cc 100644
--- a/pymc3/tests/test_hmc.py
+++ b/pymc3/tests/test_hmc.py
@@ -19,9 +19,9 @@
import pymc3
+from pymc3.aesaraf import floatX
from pymc3.step_methods.hmc.base_hmc import BaseHMC
from pymc3.tests import models
-from pymc3.theanof import floatX
logger = logging.getLogger("pymc3")
diff --git a/pymc3/tests/test_math.py b/pymc3/tests/test_math.py
index b31319021f..b82459602d 100644
--- a/pymc3/tests/test_math.py
+++ b/pymc3/tests/test_math.py
@@ -12,14 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara
+import aesara.tensor as aet
import numpy as np
import numpy.testing as npt
import pytest
-import theano
-import theano.tensor as tt
from scipy.special import logsumexp as scipy_logsumexp
+from pymc3.aesaraf import floatX
from pymc3.math import (
LogDet,
cartesian,
@@ -36,7 +37,6 @@
probit,
)
from pymc3.tests.helpers import SeededTest, verify_grad
-from pymc3.theanof import floatX
def test_kronecker():
@@ -45,7 +45,7 @@ def test_kronecker():
[a, b, c] = [np.random.rand(3, 3 + i) for i in range(3)]
custom = kronecker(a, b, c) # Custom version
- nested = tt.slinalg.kron(a, tt.slinalg.kron(b, c))
+ nested = aet.slinalg.kron(a, aet.slinalg.kron(b, c))
np.testing.assert_array_almost_equal(custom.eval(), nested.eval()) # Standard nested version
@@ -83,7 +83,7 @@ def test_kron_dot():
x = np.random.rand(tot_size).reshape((tot_size, 1))
# Construct entire kronecker product then multiply
big = kronecker(*Ks)
- slow_ans = tt.dot(big, x)
+ slow_ans = aet.dot(big, x)
# Use tricks to avoid construction of entire kronecker product
fast_ans = kron_dot(Ks, x)
np.testing.assert_array_almost_equal(slow_ans.eval(), fast_ans.eval())
@@ -98,7 +98,7 @@ def test_kron_solve_lower():
x = np.random.rand(tot_size).reshape((tot_size, 1))
# Construct entire kronecker product then solve
big = kronecker(*Ls)
- slow_ans = tt.slinalg.solve_lower_triangular(big, x)
+ slow_ans = aet.slinalg.solve_lower_triangular(big, x)
# Use tricks to avoid construction of entire kronecker product
fast_ans = kron_solve_lower(Ls, x)
np.testing.assert_array_almost_equal(slow_ans.eval(), fast_ans.eval())
@@ -170,10 +170,10 @@ def setup_method(self):
self.op_class = LogDet
self.op = logdet
- @theano.config.change_flags(compute_test_value="ignore")
+ @aesara.config.change_flags(compute_test_value="ignore")
def validate(self, input_mat):
- x = theano.tensor.matrix()
- f = theano.function([x], self.op(x))
+ x = aesara.tensor.matrix()
+ f = aesara.function([x], self.op(x))
out = f(input_mat)
svd_diag = np.linalg.svd(input_mat, compute_uv=False)
numpy_out = np.sum(np.log(np.abs(svd_diag)))
@@ -185,24 +185,24 @@ def validate(self, input_mat):
verify_grad(self.op, [input_mat])
@pytest.mark.skipif(
- theano.config.device in ["cuda", "gpu"],
+ aesara.config.device in ["cuda", "gpu"],
reason="No logDet implementation on GPU.",
)
def test_basic(self):
# Calls validate with different params
test_case_1 = np.random.randn(3, 3) / np.sqrt(3)
test_case_2 = np.random.randn(10, 10) / np.sqrt(10)
- self.validate(test_case_1.astype(theano.config.floatX))
- self.validate(test_case_2.astype(theano.config.floatX))
+ self.validate(test_case_1.astype(aesara.config.floatX))
+ self.validate(test_case_2.astype(aesara.config.floatX))
def test_expand_packed_triangular():
with pytest.raises(ValueError):
- x = tt.matrix("x")
- x.tag.test_value = np.array([[1.0]], dtype=theano.config.floatX)
+ x = aet.matrix("x")
+ x.tag.test_value = np.array([[1.0]], dtype=aesara.config.floatX)
expand_packed_triangular(5, x)
N = 5
- packed = tt.vector("packed")
+ packed = aet.vector("packed")
packed.tag.test_value = floatX(np.zeros(N * (N + 1) // 2))
with pytest.raises(TypeError):
expand_packed_triangular(packed.shape[0], packed)
diff --git a/pymc3/tests/test_minibatches.py b/pymc3/tests/test_minibatches.py
index 34dadaa8eb..9629a0765c 100644
--- a/pymc3/tests/test_minibatches.py
+++ b/pymc3/tests/test_minibatches.py
@@ -15,18 +15,18 @@
import itertools
import pickle
+import aesara
import numpy as np
import pytest
-import theano
+from aesara import tensor as aet
from scipy import stats as stats
-from theano import tensor as tt
import pymc3 as pm
-from pymc3 import GeneratorAdapter, Normal, floatX, generator, tt_rng
+from pymc3 import GeneratorAdapter, Normal, aet_rng, floatX, generator
+from pymc3.aesaraf import GeneratorOp
from pymc3.tests.helpers import select_by_precision
-from pymc3.theanof import GeneratorOp
class _DataSampler:
@@ -35,7 +35,7 @@ class _DataSampler:
"""
def __init__(self, data, batchsize=50, random_seed=42, dtype="floatX"):
- self.dtype = theano.config.floatX if dtype == "floatX" else dtype
+ self.dtype = aesara.config.floatX if dtype == "floatX" else dtype
self.rng = np.random.RandomState(random_seed)
self.data = data
self.n = batchsize
@@ -77,7 +77,7 @@ def test_basic(self):
generator = GeneratorAdapter(integers())
gop = GeneratorOp(generator)()
assert gop.tag.test_value == np.float32(0)
- f = theano.function([], gop)
+ f = aesara.function([], gop)
assert f() == np.float32(0)
assert f() == np.float32(1)
for _ in range(2, 100):
@@ -89,7 +89,7 @@ def test_ndim(self):
res = list(itertools.islice(integers_ndim(ndim), 0, 2))
generator = GeneratorAdapter(integers_ndim(ndim))
gop = GeneratorOp(generator)()
- f = theano.function([], gop)
+ f = aesara.function([], gop)
assert ndim == res[0].ndim
np.testing.assert_equal(f(), res[0])
np.testing.assert_equal(f(), res[1])
@@ -97,9 +97,9 @@ def test_ndim(self):
def test_cloning_available(self):
gop = generator(integers())
res = gop ** 2
- shared = theano.shared(floatX(10))
- res1 = theano.clone(res, {gop: shared})
- f = theano.function([], res1)
+ shared = aesara.shared(floatX(10))
+ res1 = aesara.clone_replace(res, {gop: shared})
+ f = aesara.function([], res1)
assert f() == np.float32(100)
def test_default_value(self):
@@ -108,7 +108,7 @@ def gen():
yield floatX(np.ones((10, 10)) * i)
gop = generator(gen(), np.ones((10, 10)) * 10)
- f = theano.function([], gop)
+ f = aesara.function([], gop)
np.testing.assert_equal(np.ones((10, 10)) * 0, f())
np.testing.assert_equal(np.ones((10, 10)) * 1, f())
np.testing.assert_equal(np.ones((10, 10)) * 10, f())
@@ -121,7 +121,7 @@ def gen():
yield floatX(np.ones((10, 10)) * i)
gop = generator(gen())
- f = theano.function([], gop)
+ f = aesara.function([], gop)
np.testing.assert_equal(np.ones((10, 10)) * 0, f())
np.testing.assert_equal(np.ones((10, 10)) * 1, f())
with pytest.raises(StopIteration):
@@ -139,12 +139,12 @@ def test_pickling(self, datagen):
def test_gen_cloning_with_shape_change(self, datagen):
gen = generator(datagen)
- gen_r = tt_rng().normal(size=gen.shape).T
+ gen_r = aet_rng().normal(size=gen.shape).T
X = gen.dot(gen_r)
- res, _ = theano.scan(lambda x: x.sum(), X, n_steps=X.shape[0])
+ res, _ = aesara.scan(lambda x: x.sum(), X, n_steps=X.shape[0])
assert res.eval().shape == (50,)
- shared = theano.shared(datagen.data.astype(gen.dtype))
- res2 = theano.clone(res, {gen: shared ** 2})
+ shared = aesara.shared(datagen.data.astype(gen.dtype))
+ res2 = aesara.clone_replace(res, {gen: shared ** 2})
assert res2.eval().shape == (1000,)
@@ -170,11 +170,11 @@ class TestScaling:
def test_density_scaling(self):
with pm.Model() as model1:
Normal("n", observed=[[1]], total_size=1)
- p1 = theano.function([], model1.logpt)
+ p1 = aesara.function([], model1.logpt)
with pm.Model() as model2:
Normal("n", observed=[[1]], total_size=2)
- p2 = theano.function([], model2.logpt)
+ p2 = aesara.function([], model2.logpt)
assert p1() * 2 == p2()
def test_density_scaling_with_genarator(self):
@@ -189,12 +189,12 @@ def true_dens():
# We have same size models
with pm.Model() as model1:
Normal("n", observed=gen1(), total_size=100)
- p1 = theano.function([], model1.logpt)
+ p1 = aesara.function([], model1.logpt)
with pm.Model() as model2:
gen_var = generator(gen2())
Normal("n", observed=gen_var, total_size=100)
- p2 = theano.function([], model2.logpt)
+ p2 = aesara.function([], model2.logpt)
for i in range(10):
_1, _2, _t = p1(), p2(), next(t)
@@ -208,12 +208,12 @@ def test_gradient_with_scaling(self):
genvar = generator(gen1())
m = Normal("m")
Normal("n", observed=genvar, total_size=1000)
- grad1 = theano.function([m], tt.grad(model1.logpt, m))
+ grad1 = aesara.function([m], aet.grad(model1.logpt, m))
with pm.Model() as model2:
m = Normal("m")
- shavar = theano.shared(np.ones((1000, 100)))
+ shavar = aesara.shared(np.ones((1000, 100)))
Normal("n", observed=shavar)
- grad2 = theano.function([m], tt.grad(model2.logpt, m))
+ grad2 = aesara.function([m], aet.grad(model2.logpt, m))
for i in range(10):
shavar.set_value(np.ones((100, 100)) * i)
@@ -224,27 +224,27 @@ def test_gradient_with_scaling(self):
def test_multidim_scaling(self):
with pm.Model() as model0:
Normal("n", observed=[[1, 1], [1, 1]], total_size=[])
- p0 = theano.function([], model0.logpt)
+ p0 = aesara.function([], model0.logpt)
with pm.Model() as model1:
Normal("n", observed=[[1, 1], [1, 1]], total_size=[2, 2])
- p1 = theano.function([], model1.logpt)
+ p1 = aesara.function([], model1.logpt)
with pm.Model() as model2:
Normal("n", observed=[[1], [1]], total_size=[2, 2])
- p2 = theano.function([], model2.logpt)
+ p2 = aesara.function([], model2.logpt)
with pm.Model() as model3:
Normal("n", observed=[[1, 1]], total_size=[2, 2])
- p3 = theano.function([], model3.logpt)
+ p3 = aesara.function([], model3.logpt)
with pm.Model() as model4:
Normal("n", observed=[[1]], total_size=[2, 2])
- p4 = theano.function([], model4.logpt)
+ p4 = aesara.function([], model4.logpt)
with pm.Model() as model5:
Normal("n", observed=[[1]], total_size=[2, Ellipsis, 2])
- p5 = theano.function([], model5.logpt)
+ p5 = aesara.function([], model5.logpt)
_p0 = p0()
assert (
np.allclose(_p0, p1())
@@ -287,11 +287,11 @@ def test_mixed2(self):
def test_free_rv(self):
with pm.Model() as model4:
Normal("n", observed=[[1, 1], [1, 1]], total_size=[2, 2])
- p4 = theano.function([], model4.logpt)
+ p4 = aesara.function([], model4.logpt)
with pm.Model() as model5:
Normal("n", total_size=[2, Ellipsis, 2], shape=(1, 1), broadcastable=(False, False))
- p5 = theano.function([model5.n], model5.logpt)
+ p5 = aesara.function([model5.n], model5.logpt)
assert p4() == p5(pm.floatX([[1]]))
assert p4() == p5(pm.floatX([[1, 1], [1, 1]]))
@@ -327,15 +327,15 @@ def test_special4(self):
def test_cloning_available(self):
gop = pm.Minibatch(np.arange(100), 1)
res = gop ** 2
- shared = theano.shared(np.array([10]))
- res1 = theano.clone(res, {gop: shared})
- f = theano.function([], res1)
+ shared = aesara.shared(np.array([10]))
+ res1 = aesara.clone_replace(res, {gop: shared})
+ f = aesara.function([], res1)
assert f() == np.array([100])
def test_align(self):
m = pm.Minibatch(np.arange(1000), 1, random_seed=1)
n = pm.Minibatch(np.arange(1000), 1, random_seed=1)
- f = theano.function([], [m, n])
+ f = aesara.function([], [m, n])
n.eval() # not aligned
a, b = zip(*(f() for _ in range(1000)))
assert a != b
diff --git a/pymc3/tests/test_mixture.py b/pymc3/tests/test_mixture.py
index 94b272bf43..a6646b812b 100644
--- a/pymc3/tests/test_mixture.py
+++ b/pymc3/tests/test_mixture.py
@@ -12,14 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara
import numpy as np
import pytest
import scipy.stats as st
-import theano
+from aesara import tensor as aet
from numpy.testing import assert_allclose
from scipy.special import logsumexp
-from theano import tensor as tt
import pymc3 as pm
@@ -37,9 +37,9 @@
Poisson,
sample,
)
+from pymc3.aesaraf import floatX
from pymc3.distributions.shape_utils import to_tuple
from pymc3.tests.helpers import SeededTest
-from pymc3.theanof import floatX
# Generate data
@@ -248,7 +248,7 @@ def test_mixture_of_mvn(self):
st.multivariate_normal.logpdf(obs, mu2, cov2),
)
).T
- complogp = y.distribution._comp_logp(theano.shared(obs)).eval()
+ complogp = y.distribution._comp_logp(aesara.shared(obs)).eval()
assert_allclose(complogp, complogp_st)
# check logp of mixture
@@ -264,7 +264,7 @@ def test_mixture_of_mvn(self):
assert_allclose(model.logp(testpoint), mixlogp_st.sum() + priorlogp)
def test_mixture_of_mixture(self):
- if theano.config.floatX == "float32":
+ if aesara.config.floatX == "float32":
rtol = 1e-4
else:
rtol = 1e-7
@@ -290,7 +290,7 @@ def test_mixture_of_mixture(self):
test_point = model.test_point
def mixmixlogp(value, point):
- floatX = theano.config.floatX
+ floatX = aesara.config.floatX
priorlogp = (
st.dirichlet.logpdf(
x=point["g_w"],
@@ -392,7 +392,7 @@ def setup_method(self, *args, **kwargs):
super().setup_method(*args, **kwargs)
self.nd = 3
self.npop = 3
- self.mus = tt.as_tensor_variable(
+ self.mus = aet.as_tensor_variable(
np.tile(
np.reshape(
np.arange(self.npop),
@@ -446,7 +446,7 @@ def test_2d_w(self):
shape=nd,
)
z = pm.Categorical("z", p=np.ones(npop) / npop, shape=nd)
- mu = tt.as_tensor_variable([mus[i, z[i]] for i in range(nd)])
+ mu = aet.as_tensor_variable([mus[i, z[i]] for i in range(nd)])
latent_m = pm.Normal("latent_m", mu=mu, sigma=1e-5, shape=nd)
m_val = m.random(size=size)
@@ -470,7 +470,7 @@ def samples_from_same_distribution(self, *args):
assert p_marginal >= 0.05 and p_correlation >= 0.05
def logp_matches(self, mixture, latent_mix, z, npop, model):
- if theano.config.floatX == "float32":
+ if aesara.config.floatX == "float32":
rtol = 1e-4
else:
rtol = 1e-7
@@ -523,7 +523,7 @@ def test_with_multinomial(self, batch_shape):
assert prior["mixture"].shape == (self.n_samples, *batch_shape, 3)
assert mixture.random(size=self.size).shape == (self.size, *batch_shape, 3)
- if theano.config.floatX == "float32":
+ if aesara.config.floatX == "float32":
rtol = 1e-4
else:
rtol = 1e-7
@@ -558,7 +558,7 @@ def test_with_mvnormal(self):
assert prior["mixture"].shape == (self.n_samples, 3)
assert mixture.random(size=self.size).shape == (self.size, 3)
- if theano.config.floatX == "float32":
+ if aesara.config.floatX == "float32":
rtol = 1e-4
else:
rtol = 1e-7
diff --git a/pymc3/tests/test_model.py b/pymc3/tests/test_model.py
index 2e5a83c1c3..4d747e203d 100644
--- a/pymc3/tests/test_model.py
+++ b/pymc3/tests/test_model.py
@@ -15,12 +15,12 @@
import pickle
import unittest
+import aesara
+import aesara.tensor as aet
import numpy as np
import numpy.testing as npt
import pandas as pd
import pytest
-import theano
-import theano.tensor as tt
import pymc3 as pm
@@ -39,8 +39,8 @@ def __init__(self, name="", model=None):
self.v2 = pm.Normal("v2", mu=0, sigma=1)
# 2) Potentials and Deterministic variables with method too
# be sure that names will not overlap with other same models
- pm.Deterministic("d", tt.constant(1))
- pm.Potential("p", tt.constant(1))
+ pm.Deterministic("d", aet.constant(1))
+ pm.Potential("p", aet.constant(1))
class DocstringModel(pm.Model):
@@ -50,7 +50,7 @@ def __init__(self, mean=0, sigma=1, name="", model=None):
Normal("v2", mu=mean, sigma=sigma)
Normal("v3", mu=mean, sigma=HalfCauchy("sd", beta=10, testval=1.0))
Deterministic("v3_sq", self.v3 ** 2)
- Potential("p1", tt.constant(1))
+ Potential("p1", aet.constant(1))
class TestBaseModel:
@@ -156,7 +156,7 @@ def test_observed_rv_fail(self):
def test_observed_type(self):
X_ = np.random.randn(100, 5)
- X = pm.floatX(theano.shared(X_))
+ X = pm.floatX(aesara.shared(X_))
with pm.Model():
x1 = pm.Normal("x1", observed=X_)
x2 = pm.Normal("x2", observed=X)
@@ -165,21 +165,21 @@ def test_observed_type(self):
assert x2.type == X.type
-class TestTheanoConfig:
+class TestAesaraConfig:
def test_set_testval_raise(self):
- with theano.config.change_flags(compute_test_value="off"):
+ with aesara.config.change_flags(compute_test_value="off"):
with pm.Model():
- assert theano.config.compute_test_value == "raise"
- assert theano.config.compute_test_value == "off"
+ assert aesara.config.compute_test_value == "raise"
+ assert aesara.config.compute_test_value == "off"
def test_nested(self):
- with theano.config.change_flags(compute_test_value="off"):
- with pm.Model(theano_config={"compute_test_value": "ignore"}):
- assert theano.config.compute_test_value == "ignore"
- with pm.Model(theano_config={"compute_test_value": "warn"}):
- assert theano.config.compute_test_value == "warn"
- assert theano.config.compute_test_value == "ignore"
- assert theano.config.compute_test_value == "off"
+ with aesara.config.change_flags(compute_test_value="off"):
+ with pm.Model(aesara_config={"compute_test_value": "ignore"}):
+ assert aesara.config.compute_test_value == "ignore"
+ with pm.Model(aesara_config={"compute_test_value": "warn"}):
+ assert aesara.config.compute_test_value == "warn"
+ assert aesara.config.compute_test_value == "ignore"
+ assert aesara.config.compute_test_value == "off"
def test_matrix_multiplication():
@@ -262,7 +262,7 @@ def test_empty_observed():
class TestValueGradFunction(unittest.TestCase):
def test_no_extra(self):
- a = tt.vector("a")
+ a = aet.vector("a")
a.tag.test_value = np.zeros(3, dtype=a.dtype)
a.dshape = (3,)
a.dsize = 3
@@ -270,7 +270,7 @@ def test_no_extra(self):
assert f_grad.size == 3
def test_invalid_type(self):
- a = tt.ivector("a")
+ a = aet.ivector("a")
a.tag.test_value = np.zeros(3, dtype=a.dtype)
a.dshape = (3,)
a.dsize = 3
@@ -279,19 +279,19 @@ def test_invalid_type(self):
err.match("Invalid dtype")
def setUp(self):
- extra1 = tt.iscalar("extra1")
+ extra1 = aet.iscalar("extra1")
extra1_ = np.array(0, dtype=extra1.dtype)
extra1.tag.test_value = extra1_
extra1.dshape = tuple()
extra1.dsize = 1
- val1 = tt.vector("val1")
+ val1 = aet.vector("val1")
val1_ = np.zeros(3, dtype=val1.dtype)
val1.tag.test_value = val1_
val1.dshape = (3,)
val1.dsize = 3
- val2 = tt.matrix("val2")
+ val2 = aet.matrix("val2")
val2_ = np.zeros((2, 3), dtype=val2.dtype)
val2.tag.test_value = val2_
val2.dshape = (2, 3)
@@ -366,8 +366,8 @@ def test_tensor_type_conversion(self):
assert m["x2_missing"].type == gf._extra_vars_shared["x2_missing"].type
- def test_theano_switch_broadcast_edge_cases(self):
- # Tests against two subtle issues related to a previous bug in Theano where tt.switch would not
+ def test_aesara_switch_broadcast_edge_cases(self):
+ # Tests against two subtle issues related to a previous bug in Aesara where aet.switch would not
# always broadcast tensors with single values https://github.com/pymc-devs/aesara/issues/270
# Known issue 1: https://github.com/pymc-devs/pymc3/issues/4389
diff --git a/pymc3/tests/test_model_graph.py b/pymc3/tests/test_model_graph.py
index d68abafaab..fe0d10955c 100644
--- a/pymc3/tests/test_model_graph.py
+++ b/pymc3/tests/test_model_graph.py
@@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara as th
import numpy as np
-import theano as th
import pymc3 as pm
diff --git a/pymc3/tests/test_model_helpers.py b/pymc3/tests/test_model_helpers.py
index 20745febad..72bd1b058a 100644
--- a/pymc3/tests/test_model_helpers.py
+++ b/pymc3/tests/test_model_helpers.py
@@ -12,15 +12,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara
+import aesara.sparse as sparse
+import aesara.tensor as aet
import numpy as np
import numpy.ma as ma
import numpy.testing as npt
import pandas as pd
import pytest
import scipy.sparse as sps
-import theano
-import theano.sparse as sparse
-import theano.tensor as tt
+
+from aesara.graph.basic import Variable
+from aesara.tensor.var import TensorConstant, TensorVariable
import pymc3 as pm
@@ -37,7 +40,7 @@ def test_pandas_to_array(self, input_dtype):
dense_input = np.arange(9).reshape((3, 3)).astype(input_dtype)
input_name = "input_variable"
- theano_graph_input = tt.as_tensor(dense_input, name=input_name)
+ aesara_graph_input = aet.as_tensor(dense_input, name=input_name)
pandas_input = pd.DataFrame(dense_input)
# All the even numbers are replaced with NaN
@@ -77,22 +80,22 @@ def test_pandas_to_array(self, input_dtype):
assert func_output.shape == input_value.shape
npt.assert_allclose(func_output, masked_array_input)
- # Check function behavior with Theano graph variable
- theano_output = func(theano_graph_input)
- assert isinstance(theano_output, theano.graph.basic.Variable)
- npt.assert_allclose(theano_output.eval(), theano_graph_input.eval())
- intX = pm.theanof._conversion_map[theano.config.floatX]
- if dense_input.dtype == intX or dense_input.dtype == theano.config.floatX:
- assert theano_output.owner is None # func should not have added new nodes
- assert theano_output.name == input_name
+ # Check function behavior with Aesara graph variable
+ aesara_output = func(aesara_graph_input)
+ assert isinstance(aesara_output, Variable)
+ npt.assert_allclose(aesara_output.eval(), aesara_graph_input.eval())
+ intX = pm.aesaraf._conversion_map[aesara.config.floatX]
+ if dense_input.dtype == intX or dense_input.dtype == aesara.config.floatX:
+ assert aesara_output.owner is None # func should not have added new nodes
+ assert aesara_output.name == input_name
else:
- assert theano_output.owner is not None # func should have casted
- assert theano_output.owner.inputs[0].name == input_name
+ assert aesara_output.owner is not None # func should have casted
+ assert aesara_output.owner.inputs[0].name == input_name
if "float" in input_dtype:
- assert theano_output.dtype == theano.config.floatX
+ assert aesara_output.dtype == aesara.config.floatX
else:
- assert theano_output.dtype == intX
+ assert aesara_output.dtype == intX
# Check function behavior with generator data
generator_output = func(square_generator)
@@ -102,15 +105,15 @@ def test_pandas_to_array(self, input_dtype):
# Make sure the returned object has .set_gen and .set_default methods
assert hasattr(wrapped, "set_gen")
assert hasattr(wrapped, "set_default")
- # Make sure the returned object is a Theano TensorVariable
- assert isinstance(wrapped, tt.TensorVariable)
+ # Make sure the returned object is a Aesara TensorVariable
+ assert isinstance(wrapped, TensorVariable)
def test_as_tensor(self):
"""
Check returned values for `data` given known inputs to `as_tensor()`.
Note that ndarrays should return a TensorConstant and sparse inputs
- should return a Sparse Theano object.
+ should return a Sparse Aesara object.
"""
# Create the various inputs to the function
input_name = "testing_inputs"
@@ -137,18 +140,18 @@ def test_as_tensor(self):
for func_output in [dense_output, sparse_output]:
assert func_output.missing_values is None
- # Ensure that the Theano variable names are correctly set.
+ # Ensure that the Aesara variable names are correctly set.
# Note that the output for masked inputs do not have their names set
# to the passed value.
for func_output in [dense_output, sparse_output]:
assert func_output.name == input_name
# Ensure the that returned functions are all of the correct type
- assert isinstance(dense_output, tt.TensorConstant)
+ assert isinstance(dense_output, TensorConstant)
assert sparse.basic._is_sparse_variable(sparse_output)
# Masked output is something weird. Just ensure it has missing values
- # self.assertIsInstance(masked_output, tt.TensorConstant)
+ # self.assertIsInstance(masked_output, TensorConstant)
assert masked_output.missing_values is not None
return None
diff --git a/pymc3/tests/test_models_utils.py b/pymc3/tests/test_models_utils.py
index 84d25b3c2b..c6f55f8b09 100644
--- a/pymc3/tests/test_models_utils.py
+++ b/pymc3/tests/test_models_utils.py
@@ -12,10 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara.tensor as aet
import numpy as np
import pandas as pd
import pytest
-import theano.tensor as tt
from pymc3.glm import utils
@@ -51,7 +51,7 @@ def test_dict_input(self):
m, l = utils.any_to_tensor_and_labels(self.data.to_dict("list"))
self.assertMatrixLabels(m, l, mt=self.data[l].values, lt=l)
- inp = {k: tt.as_tensor_variable(v.values) for k, v in self.data.to_dict("series").items()}
+ inp = {k: aet.as_tensor_variable(v.values) for k, v in self.data.to_dict("series").items()}
m, l = utils.any_to_tensor_and_labels(inp)
self.assertMatrixLabels(m, l, mt=self.data[l].values, lt=l)
@@ -63,18 +63,18 @@ def test_list_input(self):
def test_tensor_input(self):
m, l = utils.any_to_tensor_and_labels(
- tt.as_tensor_variable(self.data.values.tolist()), labels=["x0", "x1"]
+ aet.as_tensor_variable(self.data.values.tolist()), labels=["x0", "x1"]
)
self.assertMatrixLabels(m, l, lt=["x0", "x1"])
m, l = utils.any_to_tensor_and_labels(
- tt.as_tensor_variable(self.data.values.tolist()), labels=["x2", "x3"]
+ aet.as_tensor_variable(self.data.values.tolist()), labels=["x2", "x3"]
)
self.assertMatrixLabels(m, l, lt=["x2", "x3"])
def test_user_mistakes(self):
# no labels for tensor variable
with pytest.raises(ValueError):
- utils.any_to_tensor_and_labels(tt.as_tensor_variable(self.data.values.tolist()))
+ utils.any_to_tensor_and_labels(aet.as_tensor_variable(self.data.values.tolist()))
# len of labels is bad
with pytest.raises(ValueError):
utils.any_to_tensor_and_labels(self.data.values.tolist(), labels=["x"])
diff --git a/pymc3/tests/test_ode.py b/pymc3/tests/test_ode.py
index 1d336bfba6..efdaa31812 100644
--- a/pymc3/tests/test_ode.py
+++ b/pymc3/tests/test_ode.py
@@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara
import numpy as np
import pytest
-import theano
from scipy.integrate import odeint
from scipy.stats import norm
@@ -26,13 +26,13 @@
def test_gradients():
- """Tests the computation of the sensitivities from the theano computation graph"""
+ """Tests the computation of the sensitivities from the aesara computation graph"""
# ODE system for which to compute gradients
def ode_func(y, t, p):
return np.exp(-t) - p[0] * y[0]
- # Computation of graidients with Theano
+ # Computation of graidients with Aesara
augmented_ode_func = augment_system(ode_func, 1, 1 + 1)
# This is the new system, ODE + Sensitivities, which will be integrated
@@ -210,22 +210,22 @@ def system(y, t, p):
ode_model = DifferentialEquation(func=system, t0=0, times=times, n_states=1, n_theta=1)
- @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+ @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
def test_too_many_params(self):
with pytest.raises(pm.ShapeError):
self.ode_model(theta=[1, 1], y0=[0])
- @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+ @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
def test_too_many_y0(self):
with pytest.raises(pm.ShapeError):
self.ode_model(theta=[1], y0=[0, 0])
- @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+ @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
def test_too_few_params(self):
with pytest.raises(pm.ShapeError):
self.ode_model(theta=[], y0=[1])
- @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+ @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
def test_too_few_y0(self):
with pytest.raises(pm.ShapeError):
self.ode_model(theta=[1], y0=[])
diff --git a/pymc3/tests/test_parallel_sampling.py b/pymc3/tests/test_parallel_sampling.py
index e458c609a8..bd1a37abcf 100644
--- a/pymc3/tests/test_parallel_sampling.py
+++ b/pymc3/tests/test_parallel_sampling.py
@@ -14,12 +14,13 @@
import multiprocessing
import os
+import aesara
+import aesara.tensor as aet
import numpy as np
import pytest
-import theano
-import theano.tensor as tt
-from theano.compile.ops import as_op
+from aesara.compile.ops import as_op
+from aesara.tensor.type import TensorType
import pymc3 as pm
import pymc3.parallel_sampling as ps
@@ -60,10 +61,10 @@ def test_bad_unpickle():
assert "could not be unpickled" in str(exc_info.getrepr(style="short"))
-tt_vector = tt.TensorType(theano.config.floatX, [False])
+aet_vector = TensorType(aesara.config.floatX, [False])
-@as_op([tt_vector, tt.iscalar], [tt_vector])
+@as_op([aet_vector, aet.iscalar], [aet_vector])
def _crash_remote_process(a, master_pid):
if os.getpid() != master_pid:
os.exit(0)
@@ -80,8 +81,8 @@ def test_remote_pipe_closed():
master_pid = os.getpid()
with pm.Model():
x = pm.Normal("x", shape=2, mu=0.1)
- tt_pid = tt.as_tensor_variable(np.array(master_pid, dtype="int32"))
- pm.Normal("y", mu=_crash_remote_process(x, tt_pid), shape=2)
+ aet_pid = aet.as_tensor_variable(np.array(master_pid, dtype="int32"))
+ pm.Normal("y", mu=_crash_remote_process(x, aet_pid), shape=2)
step = pm.Metropolis()
with pytest.raises(RuntimeError, match="Chain [0-9] failed"):
diff --git a/pymc3/tests/test_posdef_sym.py b/pymc3/tests/test_posdef_sym.py
index a7aa714357..cfb406ca1d 100644
--- a/pymc3/tests/test_posdef_sym.py
+++ b/pymc3/tests/test_posdef_sym.py
@@ -12,19 +12,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara
import numpy as np
-import theano
from pymc3.distributions import multivariate as mv
def test_posdef_symmetric1():
- data = np.array([[1.0, 0], [0, 1]], dtype=theano.config.floatX)
+ data = np.array([[1.0, 0], [0, 1]], dtype=aesara.config.floatX)
assert mv.posdef(data) == 1
def test_posdef_symmetric2():
- data = np.array([[1.0, 2], [2, 1]], dtype=theano.config.floatX)
+ data = np.array([[1.0, 2], [2, 1]], dtype=aesara.config.floatX)
assert mv.posdef(data) == 0
@@ -33,11 +33,11 @@ def test_posdef_symmetric3():
Is this correct?
"""
- data = np.array([[1.0, 1], [1, 1]], dtype=theano.config.floatX)
+ data = np.array([[1.0, 1], [1, 1]], dtype=aesara.config.floatX)
assert mv.posdef(data) == 0
def test_posdef_symmetric4():
- d = np.array([[1, 0.99, 1], [0.99, 1, 0.999], [1, 0.999, 1]], theano.config.floatX)
+ d = np.array([[1, 0.99, 1], [0.99, 1, 0.999], [1, 0.999, 1]], aesara.config.floatX)
assert mv.posdef(d) == 0
diff --git a/pymc3/tests/test_posteriors.py b/pymc3/tests/test_posteriors.py
index 453ae98efd..8ac068bd75 100644
--- a/pymc3/tests/test_posteriors.py
+++ b/pymc3/tests/test_posteriors.py
@@ -12,13 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara
import pytest
-import theano
from pymc3.tests import sampler_fixtures as sf
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+@pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
class TestNUTSUniform(sf.NutsFixture, sf.UniformFixture):
n_samples = 10000
tune = 1000
diff --git a/pymc3/tests/test_quadpotential.py b/pymc3/tests/test_quadpotential.py
index d91a80b5e9..aa89f37075 100644
--- a/pymc3/tests/test_quadpotential.py
+++ b/pymc3/tests/test_quadpotential.py
@@ -19,8 +19,8 @@
import pymc3
+from pymc3.aesaraf import floatX
from pymc3.step_methods.hmc import quadpotential
-from pymc3.theanof import floatX
def test_elemwise_posdef():
diff --git a/pymc3/tests/test_random.py b/pymc3/tests/test_random.py
index 7a4ae42ce2..f88e6f75f9 100644
--- a/pymc3/tests/test_random.py
+++ b/pymc3/tests/test_random.py
@@ -12,11 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara
+import aesara.tensor as aet
import numpy as np
import numpy.testing as npt
import pytest
-import theano
-import theano.tensor as tt
from numpy import random as nr
@@ -30,15 +30,15 @@ def test_draw_value():
npt.assert_equal(_draw_value(np.array([5, 6])), [5, 6])
npt.assert_equal(_draw_value(np.array(5.0)), 5)
- npt.assert_equal(_draw_value(tt.constant([5.0, 6.0])), [5, 6])
- assert _draw_value(tt.constant(5)) == 5
- npt.assert_equal(_draw_value(2 * tt.constant([5.0, 6.0])), [10, 12])
+ npt.assert_equal(_draw_value(aet.constant([5.0, 6.0])), [5, 6])
+ assert _draw_value(aet.constant(5)) == 5
+ npt.assert_equal(_draw_value(2 * aet.constant([5.0, 6.0])), [10, 12])
- val = theano.shared(np.array([5.0, 6.0]))
+ val = aesara.shared(np.array([5.0, 6.0]))
npt.assert_equal(_draw_value(val), [5, 6])
npt.assert_equal(_draw_value(2 * val), [10, 12])
- a = tt.scalar("a")
+ a = aet.scalar("a")
a.tag.test_value = 6
npt.assert_equal(_draw_value(2 * a, givens=[(a, 1)]), 2)
@@ -48,7 +48,7 @@ def test_draw_value():
assert isinstance(_draw_value(5), type(5))
with pm.Model():
- mu = 2 * tt.constant(np.array([5.0, 6.0])) + theano.shared(np.array(5))
+ mu = 2 * aet.constant(np.array([5.0, 6.0])) + aesara.shared(np.array(5))
a = pm.Normal("a", mu=mu, sigma=5, shape=2)
val1 = _draw_value(a)
@@ -68,17 +68,17 @@ def test_vals(self):
npt.assert_equal(draw_values([np.array([5, 6])])[0], [5, 6])
npt.assert_equal(draw_values([np.array(5.0)])[0], 5)
- npt.assert_equal(draw_values([tt.constant([5.0, 6.0])])[0], [5, 6])
- assert draw_values([tt.constant(5)])[0] == 5
- npt.assert_equal(draw_values([2 * tt.constant([5.0, 6.0])])[0], [10, 12])
+ npt.assert_equal(draw_values([aet.constant([5.0, 6.0])])[0], [5, 6])
+ assert draw_values([aet.constant(5)])[0] == 5
+ npt.assert_equal(draw_values([2 * aet.constant([5.0, 6.0])])[0], [10, 12])
- val = theano.shared(np.array([5.0, 6.0]))
+ val = aesara.shared(np.array([5.0, 6.0]))
npt.assert_equal(draw_values([val])[0], [5, 6])
npt.assert_equal(draw_values([2 * val])[0], [10, 12])
def test_simple_model(self):
with pm.Model():
- mu = 2 * tt.constant(np.array([5.0, 6.0])) + theano.shared(np.array(5))
+ mu = 2 * aet.constant(np.array([5.0, 6.0])) + aesara.shared(np.array(5))
a = pm.Normal("a", mu=mu, sigma=5, shape=2)
val1 = draw_values([a])
@@ -90,7 +90,7 @@ def test_simple_model(self):
def test_dep_vars(self):
with pm.Model():
- mu = 2 * tt.constant(np.array([5.0, 6.0])) + theano.shared(np.array(5))
+ mu = 2 * aet.constant(np.array([5.0, 6.0])) + aesara.shared(np.array(5))
sd = pm.HalfNormal("sd", shape=2)
tau = 1 / sd ** 2
a = pm.Normal("a", mu=mu, tau=tau, shape=2)
@@ -116,7 +116,7 @@ def test_dep_vars(self):
def test_graph_constant(self):
# Issue 3595 pointed out that slice(None) can introduce
- # theano.graph.basic.Constant into the compute graph, which wasn't
+ # aesara.graph.basic.Constant into the compute graph, which wasn't
# handled correctly by draw_values
n_d = 500
n_x = 2
diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
index f3f2872c44..3a31f9627e 100644
--- a/pymc3/tests/test_sampling.py
+++ b/pymc3/tests/test_sampling.py
@@ -18,15 +18,15 @@
from itertools import combinations
from typing import Tuple
+import aesara
+import aesara.tensor as aet
import arviz as az
import numpy as np
import numpy.testing as npt
import pytest
-import theano
-import theano.tensor as tt
+from aesara import shared
from scipy import stats
-from theano import shared
import pymc3 as pm
@@ -36,7 +36,7 @@
from pymc3.tests.models import simple_init
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+@pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
class TestSample(SeededTest):
def setup_method(self):
super().setup_method()
@@ -348,7 +348,7 @@ def test_choose_chains(n_points, tune, expected_length, expected_n_traces):
assert expected_n_traces == len(traces)
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+@pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
class TestNamedSampling(SeededTest):
def test_shared_named(self):
G_var = shared(value=np.atleast_2d(1.0), broadcastable=(True, False), name="G")
@@ -362,7 +362,7 @@ def test_shared_named(self):
testval=np.atleast_2d(0),
)
theta = pm.Normal(
- "theta", mu=tt.dot(G_var, theta0), tau=np.atleast_2d(1e20), shape=(1, 1)
+ "theta", mu=aet.dot(G_var, theta0), tau=np.atleast_2d(1e20), shape=(1, 1)
)
res = theta.random()
assert np.isclose(res, 0.0)
@@ -378,13 +378,13 @@ def test_shared_unnamed(self):
testval=np.atleast_2d(0),
)
theta = pm.Normal(
- "theta", mu=tt.dot(G_var, theta0), tau=np.atleast_2d(1e20), shape=(1, 1)
+ "theta", mu=aet.dot(G_var, theta0), tau=np.atleast_2d(1e20), shape=(1, 1)
)
res = theta.random()
assert np.isclose(res, 0.0)
def test_constant_named(self):
- G_var = tt.constant(np.atleast_2d(1.0), name="G")
+ G_var = aet.constant(np.atleast_2d(1.0), name="G")
with pm.Model():
theta0 = pm.Normal(
"theta0",
@@ -394,7 +394,7 @@ def test_constant_named(self):
testval=np.atleast_2d(0),
)
theta = pm.Normal(
- "theta", mu=tt.dot(G_var, theta0), tau=np.atleast_2d(1e20), shape=(1, 1)
+ "theta", mu=aet.dot(G_var, theta0), tau=np.atleast_2d(1e20), shape=(1, 1)
)
res = theta.random()
@@ -621,8 +621,8 @@ def test_model_not_drawable_prior(self):
def test_model_shared_variable(self):
x = np.random.randn(100)
y = x > 0
- x_shared = theano.shared(x)
- y_shared = theano.shared(y)
+ x_shared = aesara.shared(x)
+ y_shared = aesara.shared(y)
with pm.Model() as model:
coeff = pm.Normal("x", mu=0, sd=1)
logistic = pm.Deterministic("p", pm.math.sigmoid(coeff * x_shared))
@@ -655,8 +655,10 @@ def test_model_shared_variable(self):
npt.assert_allclose(post_pred["p"], expected_p)
def test_deterministic_of_observed(self):
- meas_in_1 = pm.theanof.floatX(2 + 4 * np.random.randn(10))
- meas_in_2 = pm.theanof.floatX(5 + 4 * np.random.randn(10))
+ np.random.seed(8442)
+
+ meas_in_1 = pm.aesaraf.floatX(2 + 4 * np.random.randn(10))
+ meas_in_2 = pm.aesaraf.floatX(5 + 4 * np.random.randn(10))
nchains = 2
with pm.Model() as model:
mu_in_1 = pm.Normal("mu_in_1", 0, 1)
@@ -671,7 +673,7 @@ def test_deterministic_of_observed(self):
trace = pm.sample(100, chains=nchains)
np.random.seed(0)
- rtol = 1e-5 if theano.config.floatX == "float64" else 1e-4
+ rtol = 1e-5 if aesara.config.floatX == "float64" else 1e-4
np.random.seed(0)
ppc = pm.sample_posterior_predictive(
@@ -694,8 +696,8 @@ def test_deterministic_of_observed(self):
npt.assert_allclose(ppc["in_1"] + ppc["in_2"], ppc["out"], rtol=rtol)
def test_deterministic_of_observed_modified_interface(self):
- meas_in_1 = pm.theanof.floatX(2 + 4 * np.random.randn(100))
- meas_in_2 = pm.theanof.floatX(5 + 4 * np.random.randn(100))
+ meas_in_1 = pm.aesaraf.floatX(2 + 4 * np.random.randn(100))
+ meas_in_2 = pm.aesaraf.floatX(5 + 4 * np.random.randn(100))
with pm.Model() as model:
mu_in_1 = pm.Normal("mu_in_1", 0, 1)
sigma_in_1 = pm.HalfNormal("sd_in_1", 1)
@@ -718,7 +720,7 @@ def test_deterministic_of_observed_modified_interface(self):
var_names=[x.name for x in (model.deterministics + model.basic_RVs)],
)
- rtol = 1e-5 if theano.config.floatX == "float64" else 1e-3
+ rtol = 1e-5 if aesara.config.floatX == "float64" else 1e-3
npt.assert_allclose(ppc["in_1"] + ppc["in_2"], ppc["out"], rtol=rtol)
ppc = pm.fast_sample_posterior_predictive(
@@ -728,7 +730,7 @@ def test_deterministic_of_observed_modified_interface(self):
var_names=[x.name for x in (model.deterministics + model.basic_RVs)],
)
- rtol = 1e-5 if theano.config.floatX == "float64" else 1e-3
+ rtol = 1e-5 if aesara.config.floatX == "float64" else 1e-3
npt.assert_allclose(ppc["in_1"] + ppc["in_2"], ppc["out"], rtol=rtol)
def test_variable_type(self):
@@ -987,7 +989,7 @@ def test_transformed(self):
phi = pm.Beta("phi", alpha=1.0, beta=1.0)
kappa_log = pm.Exponential("logkappa", lam=5.0)
- kappa = pm.Deterministic("kappa", tt.exp(kappa_log))
+ kappa = pm.Deterministic("kappa", aet.exp(kappa_log))
thetas = pm.Beta("thetas", alpha=phi * kappa, beta=(1.0 - phi) * kappa, shape=n)
@@ -1053,7 +1055,7 @@ def test_zeroinflatedpoisson(self):
def test_bounded_dist(self):
with pm.Model() as model:
BoundedNormal = pm.Bound(pm.Normal, lower=0.0)
- x = BoundedNormal("x", mu=tt.zeros((3, 1)), sd=1 * tt.ones((3, 1)), shape=(3, 1))
+ x = BoundedNormal("x", mu=aet.zeros((3, 1)), sd=1 * aet.ones((3, 1)), shape=(3, 1))
with model:
prior_trace = pm.sample_prior_predictive(5)
diff --git a/pymc3/tests/test_shape_handling.py b/pymc3/tests/test_shape_handling.py
index 070535969d..39cd181083 100644
--- a/pymc3/tests/test_shape_handling.py
+++ b/pymc3/tests/test_shape_handling.py
@@ -15,7 +15,7 @@
import numpy as np
import pytest
-from theano import tensor as tt
+from aesara import tensor as aet
import pymc3 as pm
@@ -106,7 +106,7 @@ def fixture_model():
cov = pm.InverseGamma("cov", alpha=1, beta=1)
x = pm.Normal("x", mu=np.ones((dim,)), sigma=pm.math.sqrt(cov), shape=(n, dim))
eps = pm.HalfNormal("eps", np.ones((n, 1)), shape=(n, dim))
- mu = pm.Deterministic("mu", tt.sum(x + eps, axis=-1))
+ mu = pm.Deterministic("mu", aet.sum(x + eps, axis=-1))
y = pm.Normal("y", mu=mu, sigma=1, shape=(n,))
return model, [cov, x, eps, y]
diff --git a/pymc3/tests/test_shared.py b/pymc3/tests/test_shared.py
index 723216362f..247b5ebdb5 100644
--- a/pymc3/tests/test_shared.py
+++ b/pymc3/tests/test_shared.py
@@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara
import numpy as np
-import theano
import pymc3 as pm
@@ -24,7 +24,7 @@ class TestShared(SeededTest):
def test_deterministic(self):
with pm.Model() as model:
data_values = np.array([0.5, 0.4, 5, 2])
- X = theano.shared(np.asarray(data_values, dtype=theano.config.floatX), borrow=True)
+ X = aesara.shared(np.asarray(data_values, dtype=aesara.config.floatX), borrow=True)
pm.Normal("y", 0, 1, observed=X)
model.logp(model.test_point)
@@ -34,7 +34,7 @@ def test_sample(self):
x_pred = np.linspace(-3, 3, 200)
- x_shared = theano.shared(x)
+ x_shared = aesara.shared(x)
with pm.Model() as model:
b = pm.Normal("b", 0.0, 10.0)
diff --git a/pymc3/tests/test_smc.py b/pymc3/tests/test_smc.py
index 695ea461f7..b2ebdd65dd 100644
--- a/pymc3/tests/test_smc.py
+++ b/pymc3/tests/test_smc.py
@@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara.tensor as aet
import numpy as np
import pytest
-import theano.tensor as tt
import pymc3 as pm
@@ -39,16 +39,16 @@ def setup_class(self):
def two_gaussians(x):
log_like1 = (
- -0.5 * n * tt.log(2 * np.pi)
- - 0.5 * tt.log(dsigma)
+ -0.5 * n * aet.log(2 * np.pi)
+ - 0.5 * aet.log(dsigma)
- 0.5 * (x - mu1).T.dot(isigma).dot(x - mu1)
)
log_like2 = (
- -0.5 * n * tt.log(2 * np.pi)
- - 0.5 * tt.log(dsigma)
+ -0.5 * n * aet.log(2 * np.pi)
+ - 0.5 * aet.log(dsigma)
- 0.5 * (x - mu2).T.dot(isigma).dot(x - mu2)
)
- return tt.log(w1 * tt.exp(log_like1) + w2 * tt.exp(log_like2))
+ return aet.log(w1 * aet.exp(log_like1) + w2 * aet.exp(log_like2))
with pm.Model() as self.SMC_test:
X = pm.Uniform("X", lower=-2, upper=2.0, shape=n)
diff --git a/pymc3/tests/test_special_functions.py b/pymc3/tests/test_special_functions.py
index e7e2e53cbc..b293163ad6 100644
--- a/pymc3/tests/test_special_functions.py
+++ b/pymc3/tests/test_special_functions.py
@@ -12,11 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara.tensor as aet
import numpy as np
import scipy.special as ss
-import theano.tensor as tt
-from theano import function
+from aesara import function
import pymc3.distributions.special as ps
@@ -26,10 +26,10 @@
def test_functions():
xvals = list(map(np.atleast_1d, [0.01, 0.1, 2, 100, 10000]))
- x = tt.dvector("x")
+ x = aet.dvector("x")
x.tag.test_value = xvals[0]
- p = tt.iscalar("p")
+ p = aet.iscalar("p")
p.tag.test_value = 1
gammaln = function([x], ps.gammaln(x))
@@ -55,10 +55,10 @@ def test_functions():
def t_multigamma():
xvals = list(map(np.atleast_1d, [0, 0.1, 2, 100]))
- x = tt.dvector("x")
+ x = aet.dvector("x")
x.tag.test_value = xvals[0]
- p = tt.iscalar("p")
+ p = aet.iscalar("p")
p.tag.test_value = 1
multigammaln = function([x, p], ps.multigammaln(x, p))
diff --git a/pymc3/tests/test_step.py b/pymc3/tests/test_step.py
index 6da70f2a7a..54b126ba0f 100644
--- a/pymc3/tests/test_step.py
+++ b/pymc3/tests/test_step.py
@@ -18,17 +18,18 @@
from math import isclose
+import aesara
+import aesara.tensor as aet
import arviz as az
import numpy as np
import numpy.testing as npt
import pytest
-import theano
-import theano.tensor as tt
+from aesara.compile.ops import as_op
+from aesara.graph.op import Op
from numpy.testing import assert_array_almost_equal
-from theano.compile.ops import as_op
-from theano.graph.op import Op
+from pymc3.aesaraf import floatX
from pymc3.data import Data
from pymc3.distributions import (
Bernoulli,
@@ -71,7 +72,6 @@
simple_2model_continuous,
simple_categorical,
)
-from pymc3.theanof import floatX
class TestStepMethods: # yield test doesn't work subclassing object
@@ -500,7 +500,7 @@ def setup_class(self):
def teardown_class(self):
shutil.rmtree(self.temp_dir)
- @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+ @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
def test_sample_exact(self):
for step_method in self.master_samples:
self.check_trace(step_method)
@@ -591,7 +591,7 @@ def test_step_continuous(self):
self.check_stat(check, trace, step.__class__.__name__)
def test_step_discrete(self):
- if theano.config.floatX == "float32":
+ if aesara.config.floatX == "float32":
return # Cannot use @skip because it only skips one iteration of the yield
start, model, (mu, C) = mv_simple_discrete()
unc = np.diag(C) ** 0.5
@@ -657,7 +657,7 @@ class TestCompoundStep:
samplers = (Metropolis, Slice, HamiltonianMC, NUTS, DEMetropolis)
@pytest.mark.skipif(
- theano.config.floatX == "float32", reason="Test fails on 32 bit due to linalg issues"
+ aesara.config.floatX == "float32", reason="Test fails on 32 bit due to linalg issues"
)
def test_non_blocked(self):
"""Test that samplers correctly create non-blocked compound steps."""
@@ -667,7 +667,7 @@ def test_non_blocked(self):
assert isinstance(sampler(blocked=False), CompoundStep)
@pytest.mark.skipif(
- theano.config.floatX == "float32", reason="Test fails on 32 bit due to linalg issues"
+ aesara.config.floatX == "float32", reason="Test fails on 32 bit due to linalg issues"
)
def test_blocked(self):
_, model = simple_2model_continuous()
@@ -716,17 +716,17 @@ def test_normal_nograd_op(self):
with Model() as model:
x = Normal("x", 0, 1)
- # a custom Theano Op that does not have a grad:
- is_64 = theano.config.floatX == "float64"
- itypes = [tt.dscalar] if is_64 else [tt.fscalar]
- otypes = [tt.dscalar] if is_64 else [tt.fscalar]
+ # a custom Aesara Op that does not have a grad:
+ is_64 = aesara.config.floatX == "float64"
+ itypes = [aet.dscalar] if is_64 else [aet.fscalar]
+ otypes = [aet.dscalar] if is_64 else [aet.fscalar]
@as_op(itypes, otypes)
def kill_grad(x):
return x
data = np.random.normal(size=(100,))
- Normal("y", mu=kill_grad(x), sigma=1, observed=data.astype(theano.config.floatX))
+ Normal("y", mu=kill_grad(x), sigma=1, observed=data.astype(aesara.config.floatX))
steps = assign_step_methods(model, [])
assert isinstance(steps, Slice)
@@ -957,7 +957,7 @@ def test_custom_proposal_dist(self):
pass
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+@pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
class TestNutsCheckTrace:
def test_multiple_samplers(self, caplog):
with Model():
@@ -986,8 +986,8 @@ def test_bad_init_parallel(self):
def test_linalg(self, caplog):
with Model():
a = Normal("a", shape=2)
- a = tt.switch(a > 0, np.inf, a)
- b = tt.slinalg.solve(floatX(np.eye(2)), a)
+ a = aet.switch(a > 0, np.inf, a)
+ b = aet.slinalg.solve(floatX(np.eye(2)), a)
Normal("c", mu=b, shape=2)
caplog.clear()
trace = sample(20, init=None, tune=5, chains=2)
@@ -1440,7 +1440,7 @@ def test_aem_mu_sigma(self):
"""Test that AEM estimates mu_B and Sigma_B in
the coarse models of a 3-level LR example correctly"""
# create data for linear regression
- if theano.config.floatX == "float32":
+ if aesara.config.floatX == "float32":
p = "float32"
else:
p = "float64"
@@ -1459,12 +1459,12 @@ def test_aem_mu_sigma(self):
# forward model Op - here, just the regression equation
class ForwardModel(Op):
- if theano.config.floatX == "float32":
- itypes = [tt.fvector]
- otypes = [tt.fvector]
+ if aesara.config.floatX == "float32":
+ itypes = [aet.fvector]
+ otypes = [aet.fvector]
else:
- itypes = [tt.dvector]
- otypes = [tt.dvector]
+ itypes = [aet.dvector]
+ otypes = [aet.dvector]
def __init__(self, x, pymc3_model):
self.x = x
@@ -1494,7 +1494,7 @@ def perform(self, node, inputs, outputs):
intercept = Normal("Intercept", 0, sigma=20)
x_coeff = Normal("x", 0, sigma=20)
- theta = tt.as_tensor_variable([intercept, x_coeff])
+ theta = aet.as_tensor_variable([intercept, x_coeff])
mout.append(ForwardModel(x, coarse_model_0))
@@ -1514,7 +1514,7 @@ def perform(self, node, inputs, outputs):
intercept = Normal("Intercept", 0, sigma=20)
x_coeff = Normal("x", 0, sigma=20)
- theta = tt.as_tensor_variable([intercept, x_coeff])
+ theta = aet.as_tensor_variable([intercept, x_coeff])
mout.append(ForwardModel(x, coarse_model_1))
@@ -1533,7 +1533,7 @@ def perform(self, node, inputs, outputs):
intercept = Normal("Intercept", 0, sigma=20)
x_coeff = Normal("x", 0, sigma=20)
- theta = tt.as_tensor_variable([intercept, x_coeff])
+ theta = aet.as_tensor_variable([intercept, x_coeff])
mout.append(ForwardModel(x, model))
@@ -1569,7 +1569,7 @@ def test_variance_reduction(self):
model with multiple levels where approximate levels have fewer data.
"""
# arithmetic precision
- if theano.config.floatX == "float32":
+ if aesara.config.floatX == "float32":
p = "float32"
else:
p = "float64"
@@ -1601,12 +1601,12 @@ def test_variance_reduction(self):
# define likelihoods with different Q
class Likelihood1(Op):
- if theano.config.floatX == "float32":
- itypes = [tt.fvector]
- otypes = [tt.fscalar]
+ if aesara.config.floatX == "float32":
+ itypes = [aet.fvector]
+ otypes = [aet.fscalar]
else:
- itypes = [tt.dvector]
- otypes = [tt.dscalar]
+ itypes = [aet.dvector]
+ otypes = [aet.dscalar]
def __init__(self, x, y, pymc3_model):
self.x = x
@@ -1624,12 +1624,12 @@ def perform(self, node, inputs, outputs):
)
class Likelihood2(Op):
- if theano.config.floatX == "float32":
- itypes = [tt.fvector]
- otypes = [tt.fscalar]
+ if aesara.config.floatX == "float32":
+ itypes = [aet.fvector]
+ otypes = [aet.fscalar]
else:
- itypes = [tt.dvector]
- otypes = [tt.dscalar]
+ itypes = [aet.dvector]
+ otypes = [aet.dscalar]
def __init__(self, x, y, pymc3_model):
self.x = x
@@ -1654,7 +1654,7 @@ def perform(self, node, inputs, outputs):
coarse_models = []
with Model() as coarse_model_0:
- if theano.config.floatX == "float32":
+ if aesara.config.floatX == "float32":
Q = Data("Q", np.float32(0.0))
else:
Q = Data("Q", np.float64(0.0))
@@ -1663,7 +1663,7 @@ def perform(self, node, inputs, outputs):
intercept = Normal("Intercept", 0, sigma=20)
x_coeff = Normal("x", 0, sigma=20)
- theta = tt.as_tensor_variable([intercept, x_coeff])
+ theta = aet.as_tensor_variable([intercept, x_coeff])
mout.append(f(x_coarse_0, y_coarse_0, coarse_model_0))
Potential("likelihood", mout[0](theta))
@@ -1671,7 +1671,7 @@ def perform(self, node, inputs, outputs):
coarse_models.append(coarse_model_0)
with Model() as coarse_model_1:
- if theano.config.floatX == "float32":
+ if aesara.config.floatX == "float32":
Q = Data("Q", np.float32(0.0))
else:
Q = Data("Q", np.float64(0.0))
@@ -1680,7 +1680,7 @@ def perform(self, node, inputs, outputs):
intercept = Normal("Intercept", 0, sigma=20)
x_coeff = Normal("x", 0, sigma=20)
- theta = tt.as_tensor_variable([intercept, x_coeff])
+ theta = aet.as_tensor_variable([intercept, x_coeff])
mout.append(f(x_coarse_1, y_coarse_1, coarse_model_1))
Potential("likelihood", mout[1](theta))
@@ -1688,7 +1688,7 @@ def perform(self, node, inputs, outputs):
coarse_models.append(coarse_model_1)
with Model() as model:
- if theano.config.floatX == "float32":
+ if aesara.config.floatX == "float32":
Q = Data("Q", np.float32(0.0))
else:
Q = Data("Q", np.float64(0.0))
@@ -1697,7 +1697,7 @@ def perform(self, node, inputs, outputs):
intercept = Normal("Intercept", 0, sigma=20)
x_coeff = Normal("x", 0, sigma=20)
- theta = tt.as_tensor_variable([intercept, x_coeff])
+ theta = aet.as_tensor_variable([intercept, x_coeff])
mout.append(f(x, y, model))
Potential("likelihood", mout[-1](theta))
diff --git a/pymc3/tests/test_transforms.py b/pymc3/tests/test_transforms.py
index e9ab89938b..844a9eb389 100644
--- a/pymc3/tests/test_transforms.py
+++ b/pymc3/tests/test_transforms.py
@@ -12,14 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara
+import aesara.tensor as aet
import numpy as np
import pytest
-import theano
-import theano.tensor as tt
+
+from aesara.tensor.var import TensorConstant
import pymc3 as pm
import pymc3.distributions.transforms as tr
+from pymc3.aesaraf import jacobian
from pymc3.tests.checks import close_to, close_to_logical
from pymc3.tests.helpers import SeededTest
from pymc3.tests.test_distributions import (
@@ -34,38 +37,37 @@
UnitSortedVector,
Vector,
)
-from pymc3.theanof import jacobian
# some transforms (stick breaking) require additon of small slack in order to be numerically
# stable. The minimal addable slack for float32 is higher thus we need to be less strict
-tol = 1e-7 if theano.config.floatX == "float64" else 1e-6
+tol = 1e-7 if aesara.config.floatX == "float64" else 1e-6
-def check_transform(transform, domain, constructor=tt.dscalar, test=0):
+def check_transform(transform, domain, constructor=aet.dscalar, test=0):
x = constructor("x")
x.tag.test_value = test
# test forward and forward_val
- forward_f = theano.function([x], transform.forward(x))
+ forward_f = aesara.function([x], transform.forward(x))
# test transform identity
- identity_f = theano.function([x], transform.backward(transform.forward(x)))
+ identity_f = aesara.function([x], transform.backward(transform.forward(x)))
for val in domain.vals:
close_to(val, identity_f(val), tol)
close_to(transform.forward_val(val), forward_f(val), tol)
def check_vector_transform(transform, domain):
- return check_transform(transform, domain, tt.dvector, test=np.array([0, 0]))
+ return check_transform(transform, domain, aet.dvector, test=np.array([0, 0]))
-def get_values(transform, domain=R, constructor=tt.dscalar, test=0):
+def get_values(transform, domain=R, constructor=aet.dscalar, test=0):
x = constructor("x")
x.tag.test_value = test
- f = theano.function([x], transform.backward(x))
+ f = aesara.function([x], transform.backward(x))
return np.array([f(val) for val in domain.vals])
def check_jacobian_det(
- transform, domain, constructor=tt.dscalar, test=0, make_comparable=None, elemwise=False
+ transform, domain, constructor=aet.dscalar, test=0, make_comparable=None, elemwise=False
):
y = constructor("y")
y.tag.test_value = test
@@ -75,15 +77,15 @@ def check_jacobian_det(
x = make_comparable(x)
if not elemwise:
- jac = tt.log(tt.nlinalg.det(jacobian(x, [y])))
+ jac = aet.log(aet.nlinalg.det(jacobian(x, [y])))
else:
- jac = tt.log(tt.abs_(tt.diag(jacobian(x, [y]))))
+ jac = aet.log(aet.abs_(aet.diag(jacobian(x, [y]))))
# ljd = log jacobian det
- actual_ljd = theano.function([y], jac)
+ actual_ljd = aesara.function([y], jac)
- computed_ljd = theano.function(
- [y], tt.as_tensor_variable(transform.jacobian_det(y)), on_unused_input="ignore"
+ computed_ljd = aesara.function(
+ [y], aet.as_tensor_variable(transform.jacobian_det(y)), on_unused_input="ignore"
)
for yval in domain.vals:
@@ -99,27 +101,27 @@ def test_stickbreaking():
check_vector_transform(tr.stick_breaking, Simplex(4))
check_transform(
- tr.stick_breaking, MultiSimplex(3, 2), constructor=tt.dmatrix, test=np.zeros((2, 2))
+ tr.stick_breaking, MultiSimplex(3, 2), constructor=aet.dmatrix, test=np.zeros((2, 2))
)
def test_stickbreaking_bounds():
- vals = get_values(tr.stick_breaking, Vector(R, 2), tt.dvector, np.array([0, 0]))
+ vals = get_values(tr.stick_breaking, Vector(R, 2), aet.dvector, np.array([0, 0]))
close_to(vals.sum(axis=1), 1, tol)
close_to_logical(vals > 0, True, tol)
close_to_logical(vals < 1, True, tol)
check_jacobian_det(
- tr.stick_breaking, Vector(R, 2), tt.dvector, np.array([0, 0]), lambda x: x[:-1]
+ tr.stick_breaking, Vector(R, 2), aet.dvector, np.array([0, 0]), lambda x: x[:-1]
)
def test_stickbreaking_accuracy():
val = np.array([-30])
- x = tt.dvector("x")
+ x = aet.dvector("x")
x.tag.test_value = val
- identity_f = theano.function([x], tr.stick_breaking.forward(tr.stick_breaking.backward(x)))
+ identity_f = aesara.function([x], tr.stick_breaking.forward(tr.stick_breaking.backward(x)))
close_to(val, identity_f(val), tol)
@@ -127,14 +129,16 @@ def test_sum_to_1():
check_vector_transform(tr.sum_to_1, Simplex(2))
check_vector_transform(tr.sum_to_1, Simplex(4))
- check_jacobian_det(tr.sum_to_1, Vector(Unit, 2), tt.dvector, np.array([0, 0]), lambda x: x[:-1])
+ check_jacobian_det(
+ tr.sum_to_1, Vector(Unit, 2), aet.dvector, np.array([0, 0]), lambda x: x[:-1]
+ )
def test_log():
check_transform(tr.log, Rplusbig)
check_jacobian_det(tr.log, Rplusbig, elemwise=True)
- check_jacobian_det(tr.log, Vector(Rplusbig, 2), tt.dvector, [0, 0], elemwise=True)
+ check_jacobian_det(tr.log, Vector(Rplusbig, 2), aet.dvector, [0, 0], elemwise=True)
vals = get_values(tr.log)
close_to_logical(vals > 0, True, tol)
@@ -144,7 +148,7 @@ def test_log_exp_m1():
check_transform(tr.log_exp_m1, Rplusbig)
check_jacobian_det(tr.log_exp_m1, Rplusbig, elemwise=True)
- check_jacobian_det(tr.log_exp_m1, Vector(Rplusbig, 2), tt.dvector, [0, 0], elemwise=True)
+ check_jacobian_det(tr.log_exp_m1, Vector(Rplusbig, 2), aet.dvector, [0, 0], elemwise=True)
vals = get_values(tr.log_exp_m1)
close_to_logical(vals > 0, True, tol)
@@ -154,7 +158,7 @@ def test_logodds():
check_transform(tr.logodds, Unit)
check_jacobian_det(tr.logodds, Unit, elemwise=True)
- check_jacobian_det(tr.logodds, Vector(Unit, 2), tt.dvector, [0.5, 0.5], elemwise=True)
+ check_jacobian_det(tr.logodds, Vector(Unit, 2), aet.dvector, [0.5, 0.5], elemwise=True)
vals = get_values(tr.logodds)
close_to_logical(vals > 0, True, tol)
@@ -166,7 +170,7 @@ def test_lowerbound():
check_transform(trans, Rplusbig)
check_jacobian_det(trans, Rplusbig, elemwise=True)
- check_jacobian_det(trans, Vector(Rplusbig, 2), tt.dvector, [0, 0], elemwise=True)
+ check_jacobian_det(trans, Vector(Rplusbig, 2), aet.dvector, [0, 0], elemwise=True)
vals = get_values(trans)
close_to_logical(vals > 0, True, tol)
@@ -177,7 +181,7 @@ def test_upperbound():
check_transform(trans, Rminusbig)
check_jacobian_det(trans, Rminusbig, elemwise=True)
- check_jacobian_det(trans, Vector(Rminusbig, 2), tt.dvector, [-1, -1], elemwise=True)
+ check_jacobian_det(trans, Vector(Rminusbig, 2), aet.dvector, [-1, -1], elemwise=True)
vals = get_values(trans)
close_to_logical(vals < 0, True, tol)
@@ -196,7 +200,7 @@ def test_interval():
close_to_logical(vals < b, True, tol)
-@pytest.mark.skipif(theano.config.floatX == "float32", reason="Test fails on 32 bit")
+@pytest.mark.skipif(aesara.config.floatX == "float32", reason="Test fails on 32 bit")
def test_interval_near_boundary():
lb = -1.0
ub = 1e-7
@@ -219,26 +223,26 @@ def test_circular():
close_to_logical(vals > -np.pi, True, tol)
close_to_logical(vals < np.pi, True, tol)
- assert isinstance(trans.forward(1), tt.TensorConstant)
+ assert isinstance(trans.forward(1), TensorConstant)
def test_ordered():
check_vector_transform(tr.ordered, SortedVector(6))
- check_jacobian_det(tr.ordered, Vector(R, 2), tt.dvector, np.array([0, 0]), elemwise=False)
+ check_jacobian_det(tr.ordered, Vector(R, 2), aet.dvector, np.array([0, 0]), elemwise=False)
- vals = get_values(tr.ordered, Vector(R, 3), tt.dvector, np.zeros(3))
+ vals = get_values(tr.ordered, Vector(R, 3), aet.dvector, np.zeros(3))
close_to_logical(np.diff(vals) >= 0, True, tol)
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+@pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
def test_chain():
chain_tranf = tr.Chain([tr.logodds, tr.ordered])
check_vector_transform(chain_tranf, UnitSortedVector(3))
- check_jacobian_det(chain_tranf, Vector(R, 4), tt.dvector, np.zeros(4), elemwise=False)
+ check_jacobian_det(chain_tranf, Vector(R, 4), aet.dvector, np.zeros(4), elemwise=False)
- vals = get_values(chain_tranf, Vector(R, 5), tt.dvector, np.zeros(5))
+ vals = get_values(chain_tranf, Vector(R, 5), aet.dvector, np.zeros(5))
close_to_logical(np.diff(vals) >= 0, True, tol)
@@ -260,7 +264,7 @@ def check_transform_elementwise_logp(self, model):
pt[x.name] = array
dist = x.distribution
logp_nojac = x0.distribution.logp(dist.transform_used.backward(array))
- jacob_det = dist.transform_used.jacobian_det(theano.shared(array))
+ jacob_det = dist.transform_used.jacobian_det(aesara.shared(array))
assert x.logp_elemwiset.ndim == jacob_det.ndim
elementwiselogp = logp_nojac + jacob_det
@@ -277,7 +281,7 @@ def check_vectortransform_elementwise_logp(self, model, vect_opt=0):
pt[x.name] = array
dist = x.distribution
logp_nojac = x0.distribution.logp(dist.transform_used.backward(array))
- jacob_det = dist.transform_used.jacobian_det(theano.shared(array))
+ jacob_det = dist.transform_used.jacobian_det(aesara.shared(array))
assert x.logp_elemwiset.ndim == jacob_det.ndim
if vect_opt == 0:
@@ -369,7 +373,7 @@ def test_normal_ordered(self):
(np.ones(3), (4, 3)),
],
)
- @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+ @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
def test_half_normal_ordered(self, sd, shape):
testval = np.sort(np.abs(np.random.randn(*shape)))
model = self.build_model(
diff --git a/pymc3/tests/test_types.py b/pymc3/tests/test_types.py
index bd8eaa42df..4adf8a6218 100644
--- a/pymc3/tests/test_types.py
+++ b/pymc3/tests/test_types.py
@@ -14,8 +14,8 @@
from copy import copy
+import aesara
import numpy as np
-import theano
from pymc3.distributions import Normal
from pymc3.model import Model
@@ -27,14 +27,14 @@ class TestType:
samplers = (Metropolis, Slice, HamiltonianMC, NUTS)
def setup_method(self):
- # save theano config object
- self.theano_config = copy(theano.config)
+ # save aesara config object
+ self.aesara_config = copy(aesara.config)
def teardown_method(self):
- # restore theano config
- theano.config = self.theano_config
+ # restore aesara config
+ aesara.config = self.aesara_config
- @theano.config.change_flags({"floatX": "float64", "warn_float64": "ignore"})
+ @aesara.config.change_flags({"floatX": "float64", "warn_float64": "ignore"})
def test_float64(self):
with Model() as model:
x = Normal("x", testval=np.array(1.0, dtype="float64"))
@@ -47,7 +47,7 @@ def test_float64(self):
with model:
sample(10, sampler())
- @theano.config.change_flags({"floatX": "float32", "warn_float64": "warn"})
+ @aesara.config.change_flags({"floatX": "float32", "warn_float64": "warn"})
def test_float32(self):
with Model() as model:
x = Normal("x", testval=np.array(1.0, dtype="float32"))
@@ -60,7 +60,7 @@ def test_float32(self):
with model:
sample(10, sampler())
- @theano.config.change_flags({"floatX": "float64", "warn_float64": "ignore"})
+ @aesara.config.change_flags({"floatX": "float64", "warn_float64": "ignore"})
def test_float64_MLDA(self):
data = np.random.randn(5)
@@ -78,7 +78,7 @@ def test_float64_MLDA(self):
with model:
sample(10, MLDA(coarse_models=[coarse_model]))
- @theano.config.change_flags({"floatX": "float32", "warn_float64": "warn"})
+ @aesara.config.change_flags({"floatX": "float32", "warn_float64": "warn"})
def test_float32_MLDA(self):
data = np.random.randn(5).astype("float32")
diff --git a/pymc3/tests/test_updates.py b/pymc3/tests/test_updates.py
index 9d8f644075..77dff3f17a 100644
--- a/pymc3/tests/test_updates.py
+++ b/pymc3/tests/test_updates.py
@@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara
import numpy as np
import pytest
-import theano
from pymc3.variational.updates import (
adadelta,
@@ -28,12 +28,12 @@
sgd,
)
-_a = theano.shared(1.0)
+_a = aesara.shared(1.0)
_b = _a * 2
-_m = theano.shared(np.empty((10,), theano.config.floatX))
+_m = aesara.shared(np.empty((10,), aesara.config.floatX))
_n = _m.sum()
-_m2 = theano.shared(np.empty((10, 10, 10), theano.config.floatX))
+_m2 = aesara.shared(np.empty((10, 10, 10), aesara.config.floatX))
_n2 = _b + _n + _m2.sum()
@@ -71,7 +71,7 @@
ids=["scalar", "matrix", "mixed"],
)
def test_updates_fast(opt, loss_and_params, kwargs, getter):
- with theano.config.change_flags(compute_test_value="ignore"):
+ with aesara.config.change_flags(compute_test_value="ignore"):
loss, param = getter(loss_and_params)
args = dict()
args.update(**kwargs)
diff --git a/pymc3/tests/test_variational_inference.py b/pymc3/tests/test_variational_inference.py
index 1ef9b61629..8e115350b4 100644
--- a/pymc3/tests/test_variational_inference.py
+++ b/pymc3/tests/test_variational_inference.py
@@ -16,18 +16,18 @@
import io
import operator
+import aesara
+import aesara.tensor as aet
import numpy as np
import pytest
-import theano
-import theano.tensor as tt
import pymc3 as pm
import pymc3.memoize
import pymc3.util
+from pymc3.aesaraf import intX
from pymc3.tests import models
from pymc3.tests.helpers import not_raises
-from pymc3.theanof import intX
from pymc3.variational import flows, opvi
from pymc3.variational.approximations import (
Empirical,
@@ -51,7 +51,7 @@ def test_callbacks_convergence(diff, ord):
cb = pm.variational.callbacks.CheckParametersConvergence(every=1, diff=diff, ord=ord)
class _approx:
- params = (theano.shared(np.asarray([1, 2, 3])),)
+ params = (aesara.shared(np.asarray([1, 2, 3])),)
approx = _approx()
@@ -186,7 +186,7 @@ def test_sample_simple(three_var_approx, request):
@pytest.fixture
def aevb_initial():
- return theano.shared(np.random.rand(3, 7).astype("float32"))
+ return aesara.shared(np.random.rand(3, 7).astype("float32"))
@pytest.fixture(
@@ -251,7 +251,7 @@ def test_sample_aevb(three_var_aevb_approx, aevb_initial):
def test_replacements_in_sample_node_aevb(three_var_aevb_approx, aevb_initial):
- inp = tt.matrix(dtype="float32")
+ inp = aet.matrix(dtype="float32")
three_var_aevb_approx.sample_node(
three_var_aevb_approx.model.one, 2, more_replacements={aevb_initial: inp}
).eval({inp: np.random.rand(7, 7).astype("float32")})
@@ -265,14 +265,14 @@ def test_vae():
minibatch_size = 10
data = pm.floatX(np.random.rand(100))
x_mini = pm.Minibatch(data, minibatch_size)
- x_inp = tt.vector()
+ x_inp = aet.vector()
x_inp.tag.test_value = data[:minibatch_size]
- ae = theano.shared(pm.floatX([0.1, 0.1]))
- be = theano.shared(pm.floatX(1.0))
+ ae = aesara.shared(pm.floatX([0.1, 0.1]))
+ be = aesara.shared(pm.floatX(1.0))
- ad = theano.shared(pm.floatX(1.0))
- bd = theano.shared(pm.floatX(1.0))
+ ad = aesara.shared(pm.floatX(1.0))
+ bd = aesara.shared(pm.floatX(1.0))
enc = x_inp.dimshuffle(0, "x") * ae.dimshuffle("x", 0) + be
mu, rho = enc[:, 0], enc[:, 1]
@@ -496,8 +496,8 @@ def test_elbo():
sigma = 1.0
y_obs = np.array([1.6, 1.4])
- post_mu = np.array([1.88], dtype=theano.config.floatX)
- post_sigma = np.array([1], dtype=theano.config.floatX)
+ post_mu = np.array([1.88], dtype=aesara.config.floatX)
+ post_sigma = np.array([1], dtype=aesara.config.floatX)
# Create a model for test
with pm.Model() as model:
mu = pm.Normal("mu", mu=mu0, sigma=sigma)
@@ -505,13 +505,13 @@ def test_elbo():
# Create variational gradient tensor
mean_field = MeanField(model=model)
- with theano.config.change_flags(compute_test_value="off"):
+ with aesara.config.change_flags(compute_test_value="off"):
elbo = -pm.operators.KL(mean_field)()(10000)
mean_field.shared_params["mu"].set_value(post_mu)
mean_field.shared_params["rho"].set_value(np.log(np.exp(post_sigma) - 1))
- f = theano.function([], elbo)
+ f = aesara.function([], elbo)
elbo_mc = f()
# Exact value
@@ -534,17 +534,17 @@ def test_scale_cost_to_minibatch_works(aux_total_size):
y_obs = np.array([1.6, 1.4])
beta = len(y_obs) / float(aux_total_size)
- # TODO: theano_config
- # with pm.Model(theano_config=dict(floatX='float64')):
+ # TODO: aesara_config
+ # with pm.Model(aesara_config=dict(floatX='float64')):
# did not not work as expected
# there were some numeric problems, so float64 is forced
- with theano.config.change_flags(floatX="float64", warn_float64="ignore"):
+ with aesara.config.change_flags(floatX="float64", warn_float64="ignore"):
- assert theano.config.floatX == "float64"
- assert theano.config.warn_float64 == "ignore"
+ assert aesara.config.floatX == "float64"
+ assert aesara.config.warn_float64 == "ignore"
- post_mu = np.array([1.88], dtype=theano.config.floatX)
- post_sigma = np.array([1], dtype=theano.config.floatX)
+ post_mu = np.array([1.88], dtype=aesara.config.floatX)
+ post_sigma = np.array([1], dtype=aesara.config.floatX)
with pm.Model():
mu = pm.Normal("mu", mu=mu0, sigma=sigma)
@@ -555,7 +555,7 @@ def test_scale_cost_to_minibatch_works(aux_total_size):
mean_field_1.shared_params["mu"].set_value(post_mu)
mean_field_1.shared_params["rho"].set_value(np.log(np.exp(post_sigma) - 1))
- with theano.config.change_flags(compute_test_value="off"):
+ with aesara.config.change_flags(compute_test_value="off"):
elbo_via_total_size_scaled = -pm.operators.KL(mean_field_1)()(10000)
with pm.Model():
@@ -569,7 +569,7 @@ def test_scale_cost_to_minibatch_works(aux_total_size):
mean_field_2.shared_params["mu"].set_value(post_mu)
mean_field_2.shared_params["rho"].set_value(np.log(np.exp(post_sigma) - 1))
- with theano.config.change_flags(compute_test_value="off"):
+ with aesara.config.change_flags(compute_test_value="off"):
elbo_via_total_size_unscaled = -pm.operators.KL(mean_field_2)()(10000)
np.testing.assert_allclose(
@@ -587,10 +587,10 @@ def test_elbo_beta_kl(aux_total_size):
y_obs = np.array([1.6, 1.4])
beta = len(y_obs) / float(aux_total_size)
- with theano.config.change_flags(floatX="float64", warn_float64="ignore"):
+ with aesara.config.change_flags(floatX="float64", warn_float64="ignore"):
- post_mu = np.array([1.88], dtype=theano.config.floatX)
- post_sigma = np.array([1], dtype=theano.config.floatX)
+ post_mu = np.array([1.88], dtype=aesara.config.floatX)
+ post_sigma = np.array([1], dtype=aesara.config.floatX)
with pm.Model():
mu = pm.Normal("mu", mu=mu0, sigma=sigma)
@@ -601,7 +601,7 @@ def test_elbo_beta_kl(aux_total_size):
mean_field_1.shared_params["mu"].set_value(post_mu)
mean_field_1.shared_params["rho"].set_value(np.log(np.exp(post_sigma) - 1))
- with theano.config.change_flags(compute_test_value="off"):
+ with aesara.config.change_flags(compute_test_value="off"):
elbo_via_total_size_scaled = -pm.operators.KL(mean_field_1)()(10000)
with pm.Model():
@@ -612,7 +612,7 @@ def test_elbo_beta_kl(aux_total_size):
mean_field_3.shared_params["mu"].set_value(post_mu)
mean_field_3.shared_params["rho"].set_value(np.log(np.exp(post_sigma) - 1))
- with theano.config.change_flags(compute_test_value="off"):
+ with aesara.config.change_flags(compute_test_value="off"):
elbo_via_beta_kl = -pm.operators.KL(mean_field_3, beta=beta)()(10000)
np.testing.assert_allclose(
@@ -750,7 +750,7 @@ def test_remove_scan_op():
inference = ADVI()
buff = io.StringIO()
inference.run_profiling(n=10).summary(buff)
- assert "theano.scan.op.Scan" not in buff.getvalue()
+ assert "aesara.scan.op.Scan" not in buff.getvalue()
buff.close()
@@ -780,7 +780,7 @@ def test_clear_cache():
def another_simple_model():
_model = models.simple_model()[1]
with _model:
- pm.Potential("pot", tt.ones((10, 10)))
+ pm.Potential("pot", aet.ones((10, 10)))
return _model
@@ -831,8 +831,8 @@ def aevb_model():
pm.Normal("y", shape=(2,))
x = model.x
y = model.y
- mu = theano.shared(x.init_value)
- rho = theano.shared(np.zeros_like(x.init_value))
+ mu = aesara.shared(x.init_value)
+ rho = aesara.shared(np.zeros_like(x.init_value))
return {"model": model, "y": y, "x": x, "replace": dict(mu=mu, rho=rho)}
@@ -911,13 +911,13 @@ def binomial_model_inference(binomial_model, inference_spec):
def test_replacements(binomial_model_inference):
- d = tt.bscalar()
+ d = aet.bscalar()
d.tag.test_value = 1
approx = binomial_model_inference.approx
p = approx.model.p
p_t = p ** 3
p_s = approx.sample_node(p_t)
- if theano.config.compute_test_value != "off":
+ if aesara.config.compute_test_value != "off":
assert p_s.tag.test_value.shape == p_t.tag.test_value.shape
sampled = [p_s.eval() for _ in range(100)]
assert any(map(operator.ne, sampled[1:], sampled[:-1])) # stochastic
@@ -934,13 +934,13 @@ def test_replacements(binomial_model_inference):
def test_sample_replacements(binomial_model_inference):
- i = tt.iscalar()
+ i = aet.iscalar()
i.tag.test_value = 1
approx = binomial_model_inference.approx
p = approx.model.p
p_t = p ** 3
p_s = approx.sample_node(p_t, size=100)
- if theano.config.compute_test_value != "off":
+ if aesara.config.compute_test_value != "off":
assert p_s.tag.test_value.shape == (100,) + p_t.tag.test_value.shape
sampled = p_s.eval()
assert any(map(operator.ne, sampled[1:], sampled[:-1])) # stochastic
@@ -961,7 +961,7 @@ def test_discrete_not_allowed():
with pm.Model():
mu = pm.Normal("mu", mu=0, sigma=10, shape=3)
- z = pm.Categorical("z", p=tt.ones(3) / 3, shape=len(y))
+ z = pm.Categorical("z", p=aet.ones(3) / 3, shape=len(y))
pm.Normal("y_obs", mu=mu[z], sigma=1.0, observed=y)
with pytest.raises(opvi.ParametrizationError):
pm.fit(n=1) # fails
@@ -1016,34 +1016,34 @@ def init_(**kw):
def test_flow_det(flow_spec):
- z0 = tt.arange(0, 20).astype("float32")
+ z0 = aet.arange(0, 20).astype("float32")
flow = flow_spec(dim=20, z0=z0.dimshuffle("x", 0))
- with theano.config.change_flags(compute_test_value="off"):
+ with aesara.config.change_flags(compute_test_value="off"):
z1 = flow.forward.flatten()
- J = tt.jacobian(z1, z0)
- logJdet = tt.log(tt.abs_(tt.nlinalg.det(J)))
+ J = aet.jacobian(z1, z0)
+ logJdet = aet.log(aet.abs_(aet.nlinalg.det(J)))
det = flow.logdet[0]
np.testing.assert_allclose(logJdet.eval(), det.eval(), atol=0.0001)
def test_flow_det_local(flow_spec):
- z0 = tt.arange(0, 12).astype("float32")
+ z0 = aet.arange(0, 12).astype("float32")
spec = flow_spec.cls.get_param_spec_for(d=12)
params = dict()
for k, shp in spec.items():
params[k] = np.random.randn(1, *shp).astype("float32")
flow = flow_spec(dim=12, z0=z0.reshape((1, 1, 12)), **params)
assert flow.batched
- with theano.config.change_flags(compute_test_value="off"):
+ with aesara.config.change_flags(compute_test_value="off"):
z1 = flow.forward.flatten()
- J = tt.jacobian(z1, z0)
- logJdet = tt.log(tt.abs_(tt.nlinalg.det(J)))
+ J = aet.jacobian(z1, z0)
+ logJdet = aet.log(aet.abs_(aet.nlinalg.det(J)))
det = flow.logdet[0]
np.testing.assert_allclose(logJdet.eval(), det.eval(), atol=0.0001)
def test_flows_collect_chain():
- initial = tt.ones((3, 2))
+ initial = aet.ones((3, 2))
flow1 = flows.PlanarFlow(dim=2, z0=initial)
flow2 = flows.PlanarFlow(dim=2, z0=flow1)
assert len(flow2.params) == 3
@@ -1067,4 +1067,4 @@ def test_flow_formula(formula, length, order):
assert len(flows_list) == length
if order is not None:
assert flows_list == order
- spec(dim=2, jitter=1)(tt.ones((3, 2))).eval() # should work
+ spec(dim=2, jitter=1)(aet.ones((3, 2))).eval() # should work
diff --git a/pymc3/tuning/scaling.py b/pymc3/tuning/scaling.py
index 49a59ff0d7..41d2af2820 100644
--- a/pymc3/tuning/scaling.py
+++ b/pymc3/tuning/scaling.py
@@ -16,9 +16,9 @@
from numpy import exp, log, sqrt
+from pymc3.aesaraf import hessian_diag, inputvars
from pymc3.blocking import ArrayOrdering, DictToArrayBijection
from pymc3.model import Point, modelcontext
-from pymc3.theanof import hessian_diag, inputvars
from pymc3.util import get_var_name
__all__ = ["find_hessian", "trace_cov", "guess_scaling"]
diff --git a/pymc3/tuning/starting.py b/pymc3/tuning/starting.py
index 2a800b2b4d..fcdd4fe8c4 100644
--- a/pymc3/tuning/starting.py
+++ b/pymc3/tuning/starting.py
@@ -19,8 +19,8 @@
"""
import copy
+import aesara.gradient as tg
import numpy as np
-import theano.gradient as tg
from fastprogress.fastprogress import ProgressBar, progress_bar
from numpy import isfinite, nan_to_num
@@ -28,9 +28,9 @@
import pymc3 as pm
+from pymc3.aesaraf import inputvars
from pymc3.blocking import ArrayOrdering, DictToArrayBijection
from pymc3.model import Point, modelcontext
-from pymc3.theanof import inputvars
from pymc3.util import (
check_start_vals,
get_default_varnames,
diff --git a/pymc3/util.py b/pymc3/util.py
index 84b4f6c3e5..f0429901f8 100644
--- a/pymc3/util.py
+++ b/pymc3/util.py
@@ -22,7 +22,7 @@
import numpy as np
import xarray
-from theano.tensor import TensorVariable
+from aesara.tensor.var import TensorVariable
from pymc3.exceptions import SamplingError
@@ -169,7 +169,7 @@ def get_repr_for_variable(variable, formatting="plain"):
def get_var_name(var):
"""Get an appropriate, plain variable name for a variable. Necessary
- because we override theano.tensor.TensorVariable.__str__ to give informative
+ because we override aesara.tensor.var.TensorVariable.__str__ to give informative
string representations to our pymc3.PyMC3Variables, yet we want to use the
plain name as e.g. keys in dicts.
"""
diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py
index 896f7422c3..4b6784f2ef 100644
--- a/pymc3/variational/approximations.py
+++ b/pymc3/variational/approximations.py
@@ -12,10 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara
import numpy as np
-import theano
-from theano import tensor as tt
+from aesara import tensor as aet
+from aesara.graph.basic import Variable
+from aesara.tensor.var import TensorVariable
import pymc3 as pm
@@ -53,13 +55,13 @@ def cov(self):
if self.batched:
return batched_diag(var)
else:
- return tt.diag(var)
+ return aet.diag(var)
@node_property
def std(self):
return rho2sigma(self.rho)
- @theano.config.change_flags(compute_test_value="off")
+ @aesara.config.change_flags(compute_test_value="off")
def __init_group__(self, group):
super().__init_group__(group)
if not self._check_user_params():
@@ -82,8 +84,8 @@ def create_shared_params(self, start=None):
start = np.tile(start, (self.bdim, 1))
rho = np.tile(rho, (self.bdim, 1))
return {
- "mu": theano.shared(pm.floatX(start), "mu"),
- "rho": theano.shared(pm.floatX(rho), "rho"),
+ "mu": aesara.shared(pm.floatX(start), "mu"),
+ "rho": aesara.shared(pm.floatX(rho), "rho"),
}
@node_property
@@ -97,7 +99,7 @@ def symbolic_random(self):
def symbolic_logq_not_scaled(self):
z0 = self.symbolic_initial
std = rho2sigma(self.rho)
- logdet = tt.log(std)
+ logdet = aet.log(std)
logq = pm.Normal.dist().logp(z0) - logdet
return logq.sum(range(1, logq.ndim))
@@ -114,7 +116,7 @@ class FullRankGroup(Group):
short_name = "full_rank"
alias_names = frozenset(["fr"])
- @theano.config.change_flags(compute_test_value="off")
+ @aesara.config.change_flags(compute_test_value="off")
def __init_group__(self, group):
super().__init_group__(group)
if not self._check_user_params():
@@ -133,21 +135,21 @@ def create_shared_params(self, start=None):
else:
start = self.bij.map(start)
n = self.ddim
- L_tril = np.eye(n)[np.tril_indices(n)].astype(theano.config.floatX)
+ L_tril = np.eye(n)[np.tril_indices(n)].astype(aesara.config.floatX)
if self.batched:
start = np.tile(start, (self.bdim, 1))
L_tril = np.tile(L_tril, (self.bdim, 1))
- return {"mu": theano.shared(start, "mu"), "L_tril": theano.shared(L_tril, "L_tril")}
+ return {"mu": aesara.shared(start, "mu"), "L_tril": aesara.shared(L_tril, "L_tril")}
@node_property
def L(self):
if self.batched:
- L = tt.zeros((self.ddim, self.ddim, self.bdim))
- L = tt.set_subtensor(L[self.tril_indices], self.params_dict["L_tril"].T)
+ L = aet.zeros((self.ddim, self.ddim, self.bdim))
+ L = aet.set_subtensor(L[self.tril_indices], self.params_dict["L_tril"].T)
L = L.dimshuffle(2, 0, 1)
else:
- L = tt.zeros((self.ddim, self.ddim))
- L = tt.set_subtensor(L[self.tril_indices], self.params_dict["L_tril"])
+ L = aet.zeros((self.ddim, self.ddim))
+ L = aet.set_subtensor(L[self.tril_indices], self.params_dict["L_tril"])
return L
@node_property
@@ -158,16 +160,16 @@ def mean(self):
def cov(self):
L = self.L
if self.batched:
- return tt.batched_dot(L, L.swapaxes(-1, -2))
+ return aet.batched_dot(L, L.swapaxes(-1, -2))
else:
return L.dot(L.T)
@node_property
def std(self):
if self.batched:
- return tt.sqrt(batched_diag(self.cov))
+ return aet.sqrt(batched_diag(self.cov))
else:
- return tt.sqrt(tt.diag(self.cov))
+ return aet.sqrt(aet.diag(self.cov))
@property
def num_tril_entries(self):
@@ -189,7 +191,7 @@ def logq(z_b, mu_b, L_b):
# it's gonna be so slow
# scan is computed over batch and then summed up
# output shape is (batch, samples)
- return theano.scan(logq, [z.swapaxes(0, 1), self.mean, self.L])[0].sum(0)
+ return aesara.scan(logq, [z.swapaxes(0, 1), self.mean, self.L])[0].sum(0)
else:
return pm.MvNormal.dist(mu=self.mean, chol=self.L).logp(z)
@@ -202,7 +204,7 @@ def symbolic_random(self):
# initial: bxsxd
# L: bxdxd
initial = initial.swapaxes(0, 1)
- return tt.batched_dot(initial, L.swapaxes(1, 2)).swapaxes(0, 1) + mu
+ return aet.batched_dot(initial, L.swapaxes(1, 2)).swapaxes(0, 1) + mu
else:
return initial.dot(L.T) + mu
@@ -218,7 +220,7 @@ class EmpiricalGroup(Group):
__param_spec__ = dict(histogram=("s", "d"))
short_name = "empirical"
- @theano.config.change_flags(compute_test_value="off")
+ @aesara.config.change_flags(compute_test_value="off")
def __init_group__(self, group):
super().__init_group__(group)
self._check_trace()
@@ -254,7 +256,7 @@ def create_shared_params(self, trace=None, size=None, jitter=1, start=None):
for j in range(len(trace)):
histogram[i] = self.bij.map(trace.point(j, t))
i += 1
- return dict(histogram=theano.shared(pm.floatX(histogram), "histogram"))
+ return dict(histogram=aesara.shared(pm.floatX(histogram), "histogram"))
def _check_trace(self):
trace = self._kwargs.get("trace", None)
@@ -264,7 +266,7 @@ def _check_trace(self):
def randidx(self, size=None):
if size is None:
size = (1,)
- elif isinstance(size, tt.TensorVariable):
+ elif isinstance(size, TensorVariable):
if size.ndim < 1:
size = size[None]
elif size.ndim > 1:
@@ -278,16 +280,16 @@ def randidx(self, size=None):
).astype("int32")
def _new_initial(self, size, deterministic, more_replacements=None):
- theano_condition_is_here = isinstance(deterministic, tt.Variable)
- if theano_condition_is_here:
- return tt.switch(
+ aesara_condition_is_here = isinstance(deterministic, Variable)
+ if aesara_condition_is_here:
+ return aet.switch(
deterministic,
- tt.repeat(self.mean.dimshuffle("x", 0), size if size is not None else 1, -1),
+ aet.repeat(self.mean.dimshuffle("x", 0), size if size is not None else 1, -1),
self.histogram[self.randidx(size)],
)
else:
if deterministic:
- return tt.repeat(self.mean.dimshuffle("x", 0), size if size is not None else 1, -1)
+ return aet.repeat(self.mean.dimshuffle("x", 0), size if size is not None else 1, -1)
else:
return self.histogram[self.randidx(size)]
@@ -310,10 +312,10 @@ def cov(self):
@node_property
def std(self):
- return tt.sqrt(tt.diag(self.cov))
+ return aet.sqrt(aet.diag(self.cov))
def __str__(self):
- if isinstance(self.histogram, theano.compile.SharedVariable):
+ if isinstance(self.histogram, aesara.compile.SharedVariable):
shp = ", ".join(map(str, self.histogram.shape.eval()))
else:
shp = "None, " + str(self.ddim)
@@ -370,7 +372,7 @@ class NormalizingFlowGroup(Group):
"""
default_flow = "scale-loc"
- @theano.config.change_flags(compute_test_value="off")
+ @aesara.config.change_flags(compute_test_value="off")
def __init_group__(self, group):
super().__init_group__(group)
# objects to be resolved
@@ -584,7 +586,7 @@ def evaluate_over_trace(self, node):
Parameters
----------
- node: Theano Variables (or Theano expressions)
+ node: Aesara Variables (or Aesara expressions)
Returns
-------
@@ -593,9 +595,9 @@ def evaluate_over_trace(self, node):
node = self.to_flat_input(node)
def sample(post):
- return theano.clone(node, {self.input: post})
+ return aesara.clone_replace(node, {self.input: post})
- nodes, _ = theano.scan(sample, self.histogram)
+ nodes, _ = aesara.scan(sample, self.histogram)
return nodes
diff --git a/pymc3/variational/flows.py b/pymc3/variational/flows.py
index 601c7351fa..f78c32e69b 100644
--- a/pymc3/variational/flows.py
+++ b/pymc3/variational/flows.py
@@ -12,10 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import aesara
import numpy as np
-import theano
-from theano import tensor as tt
+from aesara import tensor as aet
from pymc3.distributions.dist_math import rho2sigma
from pymc3.memoize import WithMemoization
@@ -161,14 +161,14 @@ def __init__(self, z0=None, dim=None, jitter=0.001, batch_size=None, local=False
"Cannot infer dimension of flow, " "please provide dim or Flow instance as z0"
)
if z0 is None:
- self.z0 = tt.matrix() # type: tt.TensorVariable
+ self.z0 = aet.matrix() # type: TensorVariable
else:
- self.z0 = tt.as_tensor(z0)
+ self.z0 = aet.as_tensor(z0)
self.parent = parent
def add_param(self, user=None, name=None, ref=0.0, dtype="floatX"):
if dtype == "floatX":
- dtype = theano.config.floatX
+ dtype = aesara.config.floatX
spec = self.__param_spec__[name]
shape = tuple(eval(s, {"d": self.dim}) for s in spec)
if user is None:
@@ -178,7 +178,7 @@ def add_param(self, user=None, name=None, ref=0.0, dtype="floatX"):
if self.batch_size is None:
raise opvi.BatchedGroupError("Need batch size to infer parameter shape")
shape = (self.batch_size,) + shape
- return theano.shared(
+ return aesara.shared(
np.asarray(np.random.normal(size=shape) * self.__jitter + ref).astype(dtype),
name=name,
)
@@ -189,7 +189,7 @@ def add_param(self, user=None, name=None, ref=0.0, dtype="floatX"):
shape = (-1,) + shape
else:
shape = (self.batch_size,) + shape
- return tt.as_tensor(user).reshape(shape)
+ return aet.as_tensor(user).reshape(shape)
@property
def params(self):
@@ -205,14 +205,14 @@ def all_params(self):
return params
@property
- @theano.config.change_flags(compute_test_value="off")
+ @aesara.config.change_flags(compute_test_value="off")
def sum_logdets(self):
dets = [self.logdet]
current = self
while not current.isroot:
current = current.parent
dets.append(current.logdet)
- return tt.add(*dets)
+ return aet.add(*dets)
@node_property
def forward(self):
@@ -222,9 +222,9 @@ def forward(self):
def logdet(self):
raise NotImplementedError
- @theano.config.change_flags(compute_test_value="off")
+ @aesara.config.change_flags(compute_test_value="off")
def forward_pass(self, z0):
- ret = theano.clone(self.forward, {self.root.z0: z0})
+ ret = aesara.clone_replace(self.forward, {self.root.z0: z0})
try:
ret.tag.test_value = np.random.normal(size=z0.tag.test_value.shape).astype(
self.z0.dtype
@@ -297,7 +297,7 @@ def __call__(self, *args):
class LinearFlow(AbstractFlow):
__param_spec__ = dict(u=("d",), w=("d",), b=())
- @theano.config.change_flags(compute_test_value="off")
+ @aesara.config.change_flags(compute_test_value="off")
def __init__(self, h, u=None, w=None, b=None, **kwargs):
self.h = h
super().__init__(**kwargs)
@@ -325,7 +325,7 @@ def forward(self):
if not self.batched:
hwz = h(z.dot(w) + b) # s
# sxd + (s \outer d) = sxd
- z1 = z + tt.outer(hwz, u) # sxd
+ z1 = z + aet.outer(hwz, u) # sxd
return z1
else:
z = z.swapaxes(0, 1)
@@ -334,7 +334,7 @@ def forward(self):
# w bxd
b = b.dimshuffle(0, "x")
# b bx-
- hwz = h(tt.batched_dot(z, w) + b) # bxs
+ hwz = h(aet.batched_dot(z, w) + b) # bxs
# bxsxd + (bxsx- * bx-xd) = bxsxd
hwz = hwz.dimshuffle(0, 1, "x") # bxsx-
u = u.dimshuffle(0, "x", 1) # bx-xd
@@ -352,8 +352,8 @@ def logdet(self):
# f'(sxd \dot d + .) * -xd = sxd
phi = deriv(z.dot(w) + b).dimshuffle(0, "x") * w.dimshuffle("x", 0)
# \abs(. + sxd \dot d) = s
- det = tt.abs_(1.0 + phi.dot(u))
- return tt.log(det)
+ det = aet.abs_(1.0 + phi.dot(u))
+ return aet.log(det)
else:
z = z.swapaxes(0, 1)
b = b.dimshuffle(0, "x")
@@ -362,20 +362,20 @@ def logdet(self):
# w bxd
# b bx-x-
# f'(bxsxd \bdot bxd + bx-x-) * bx-xd = bxsxd
- phi = deriv(tt.batched_dot(z, w) + b).dimshuffle(0, 1, "x") * w.dimshuffle(0, "x", 1)
+ phi = deriv(aet.batched_dot(z, w) + b).dimshuffle(0, 1, "x") * w.dimshuffle(0, "x", 1)
# \abs(. + bxsxd \bdot bxd) = bxs
- det = tt.abs_(1.0 + tt.batched_dot(phi, u)) # bxs
- return tt.log(det).sum(0) # s
+ det = aet.abs_(1.0 + aet.batched_dot(phi, u)) # bxs
+ return aet.log(det).sum(0) # s
class Tanh(FlowFn):
- fn = tt.tanh
- inv = tt.arctanh
+ fn = aet.tanh
+ inv = aet.arctanh
@staticmethod
def deriv(*args):
(x,) = args
- return 1.0 - tt.tanh(x) ** 2
+ return 1.0 - aet.tanh(x) ** 2
@AbstractFlow.register
@@ -390,7 +390,7 @@ def make_uw(self, u, w):
# u_: d
# w_: d
wu = u.dot(w) # .
- mwu = -1.0 + tt.nnet.softplus(wu) # .
+ mwu = -1.0 + aet.nnet.softplus(wu) # .
# d + (. - .) * d / .
u_h = u + (mwu - wu) * w / ((w ** 2).sum() + 1e-10)
return u_h, w
@@ -398,7 +398,7 @@ def make_uw(self, u, w):
# u_: bxd
# w_: bxd
wu = (u * w).sum(-1, keepdims=True) # bx-
- mwu = -1.0 + tt.nnet.softplus(wu) # bx-
+ mwu = -1.0 + aet.nnet.softplus(wu) # bx-
# bxd + (bx- - bx-) * bxd / bx- = bxd
u_h = u + (mwu - wu) * w / ((w ** 2).sum(-1, keepdims=True) + 1e-10)
return u_h, w
@@ -407,7 +407,7 @@ def make_uw(self, u, w):
class ReferencePointFlow(AbstractFlow):
__param_spec__ = dict(a=(), b=(), z_ref=("d",))
- @theano.config.change_flags(compute_test_value="off")
+ @aesara.config.change_flags(compute_test_value="off")
def __init__(self, h, a=None, b=None, z_ref=None, **kwargs):
super().__init__(**kwargs)
a = self.add_param(a, "a")
@@ -474,7 +474,7 @@ def logdet(self):
r = (z - z_ref).norm(2, axis=-1, keepdims=True) # s
har = h(a, r)
dar = deriv(a, r)
- logdet = tt.log((1.0 + b * har) ** (d - 1.0) * (1.0 + b * har + b * dar * r))
+ logdet = aet.log((1.0 + b * har) ** (d - 1.0) * (1.0 + b * har + b * dar * r))
if self.batched:
return logdet.sum([0, -1])
else:
@@ -506,8 +506,8 @@ def __init__(self, **kwargs):
super().__init__(Radial(), **kwargs)
def make_ab(self, a, b):
- a = tt.exp(a)
- b = -a + tt.nnet.softplus(b)
+ a = aet.exp(a)
+ b = -a + aet.nnet.softplus(b)
return a, b
@@ -531,7 +531,7 @@ def forward(self):
@node_property
def logdet(self):
- return tt.zeros((self.z0.shape[0],))
+ return aet.zeros((self.z0.shape[0],))
@AbstractFlow.register
@@ -539,7 +539,7 @@ class ScaleFlow(AbstractFlow):
__param_spec__ = dict(rho=("d",))
short_name = "scale"
- @theano.config.change_flags(compute_test_value="off")
+ @aesara.config.change_flags(compute_test_value="off")
def __init__(self, rho=None, **kwargs):
super().__init__(**kwargs)
rho = self.add_param(rho, "rho")
@@ -556,7 +556,7 @@ def forward(self):
@node_property
def logdet(self):
- return tt.repeat(tt.sum(tt.log(self.scale)), self.z0.shape[0])
+ return aet.repeat(aet.sum(aet.log(self.scale)), self.z0.shape[0])
@AbstractFlow.register
@@ -564,18 +564,18 @@ class HouseholderFlow(AbstractFlow):
__param_spec__ = dict(v=("d",))
short_name = "hh"
- @theano.config.change_flags(compute_test_value="raise")
+ @aesara.config.change_flags(compute_test_value="raise")
def __init__(self, v=None, **kwargs):
super().__init__(**kwargs)
v = self.add_param(v, "v")
self.shared_params = dict(v=v)
if self.batched:
vv = v.dimshuffle(0, 1, "x") * v.dimshuffle(0, "x", 1)
- I = tt.eye(self.dim).dimshuffle("x", 0, 1)
+ I = aet.eye(self.dim).dimshuffle("x", 0, 1)
vvn = (1e-10 + (v ** 2).sum(-1)).dimshuffle(0, "x", "x")
else:
- vv = tt.outer(v, v)
- I = tt.eye(self.dim)
+ vv = aet.outer(v, v)
+ I = aet.eye(self.dim)
vvn = (v ** 2).sum(-1) + 1e-10
self.H = I - 2.0 * vv / vvn
@@ -584,10 +584,10 @@ def forward(self):
z = self.z0 # sxd
H = self.H # dxd
if self.batched:
- return tt.batched_dot(z.swapaxes(0, 1), H).swapaxes(0, 1)
+ return aet.batched_dot(z.swapaxes(0, 1), H).swapaxes(0, 1)
else:
return z.dot(H)
@node_property
def logdet(self):
- return tt.zeros((self.z0.shape[0],))
+ return aet.zeros((self.z0.shape[0],))
diff --git a/pymc3/variational/inference.py b/pymc3/variational/inference.py
index 85eb08e65c..1b77104c60 100644
--- a/pymc3/variational/inference.py
+++ b/pymc3/variational/inference.py
@@ -130,7 +130,7 @@ def fit(self, n=10000, score=None, callbacks=None, progressbar=True, **kwargs):
total_grad_norm_constraint: `float`
Bounds gradient norm, prevents exploding gradient problem
fn_kwargs: `dict`
- Add kwargs to theano.function (e.g. `{'profile': True}`)
+ Add kwargs to aesara.function (e.g. `{'profile': True}`)
more_replacements: `dict`
Apply custom replacements before calculating gradients
@@ -423,7 +423,7 @@ class ADVI(KLqp):
The tensors to which mini-bathced samples are supplied are
handled separately by using callbacks in :func:`Inference.fit` method
- that change storage of shared theano variable or by :func:`pymc3.generator`
+ that change storage of shared aesara variable or by :func:`pymc3.generator`
that automatically iterates over minibatches and defined beforehand.
- (optional) Parameters of deterministic mappings
@@ -794,7 +794,7 @@ def fit(
total_grad_norm_constraint: `float`
Bounds gradient norm, prevents exploding gradient problem
fn_kwargs: `dict`
- Add kwargs to theano.function (e.g. `{'profile': True}`)
+ Add kwargs to aesara.function (e.g. `{'profile': True}`)
more_replacements: `dict`
Apply custom replacements before calculating gradients
diff --git a/pymc3/variational/operators.py b/pymc3/variational/operators.py
index 9a5c2fdc20..e69d9c447e 100644
--- a/pymc3/variational/operators.py
+++ b/pymc3/variational/operators.py
@@ -11,9 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-import theano
+import aesara
-from theano import tensor as tt
+from aesara import tensor as aet
import pymc3 as pm
@@ -75,7 +75,7 @@ def __init__(self, op, tf):
raise opvi.ParametrizationError("Op should be KSD")
ObjectiveFunction.__init__(self, op, tf)
- @theano.config.change_flags(compute_test_value="off")
+ @aesara.config.change_flags(compute_test_value="off")
def __call__(self, nmc, **kwargs):
op = self.op # type: KSD
grad = op.apply(self.tf)
@@ -88,7 +88,7 @@ def __call__(self, nmc, **kwargs):
else:
params = self.test_params + kwargs["more_tf_params"]
grad *= pm.floatX(-1)
- grads = tt.grad(None, params, known_grads={z: grad})
+ grads = aet.grad(None, params, known_grads={z: grad})
return self.approx.set_size_and_deterministic(
grads, nmc, 0, kwargs.get("more_replacements")
)
diff --git a/pymc3/variational/opvi.py b/pymc3/variational/opvi.py
index ebf4a9cda8..115c0abcae 100644
--- a/pymc3/variational/opvi.py
+++ b/pymc3/variational/opvi.py
@@ -49,17 +49,19 @@
import itertools
import warnings
+import aesara
+import aesara.tensor as aet
import numpy as np
-import theano
-import theano.tensor as tt
+
+from aesara.graph.basic import Variable
import pymc3 as pm
+from pymc3.aesaraf import aet_rng, identity
from pymc3.backends import NDArray
from pymc3.blocking import ArrayOrdering, DictToArrayBijection, VarMap
from pymc3.memoize import WithMemoization, memoize
from pymc3.model import modelcontext
-from pymc3.theanof import identity, tt_rng
from pymc3.util import get_default_varnames, get_transformed
from pymc3.variational.updates import adagrad_window
@@ -116,7 +118,7 @@ def node_property(f):
def wrapper(fn):
return property(
memoize(
- theano.config.change_flags(compute_test_value="off")(append_name(f)(fn)),
+ aesara.config.change_flags(compute_test_value="off")(append_name(f)(fn)),
bound=True,
)
)
@@ -124,16 +126,16 @@ def wrapper(fn):
return wrapper
else:
return property(
- memoize(theano.config.change_flags(compute_test_value="off")(f), bound=True)
+ memoize(aesara.config.change_flags(compute_test_value="off")(f), bound=True)
)
-@theano.config.change_flags(compute_test_value="ignore")
+@aesara.config.change_flags(compute_test_value="ignore")
def try_to_set_test_value(node_in, node_out, s):
_s = s
if s is None:
s = 1
- s = theano.compile.view_op(tt.as_tensor(s))
+ s = aesara.compile.view_op(aet.as_tensor(s))
if not isinstance(node_in, (list, tuple)):
node_in = [node_in]
if not isinstance(node_out, (list, tuple)):
@@ -150,7 +152,7 @@ def try_to_set_test_value(node_in, node_out, s):
o.tag.test_value = tv
-class ObjectiveUpdates(theano.OrderedUpdates):
+class ObjectiveUpdates(aesara.OrderedUpdates):
"""OrderedUpdates extension for storing loss"""
loss = None
@@ -291,7 +293,7 @@ def add_obj_updates(
if self.op.returns_loss:
updates.loss = obj_target
- @theano.config.change_flags(compute_test_value="off")
+ @aesara.config.change_flags(compute_test_value="off")
def step_function(
self,
obj_n_mc=None,
@@ -335,13 +337,13 @@ def step_function(
score: `bool`
calculate loss on each step? Defaults to False for speed
fn_kwargs: `dict`
- Add kwargs to theano.function (e.g. `{'profile': True}`)
+ Add kwargs to aesara.function (e.g. `{'profile': True}`)
more_replacements: `dict`
Apply custom replacements before calculating gradients
Returns
-------
- `theano.function`
+ `aesara.function`
"""
if fn_kwargs is None:
fn_kwargs = {}
@@ -359,12 +361,12 @@ def step_function(
total_grad_norm_constraint=total_grad_norm_constraint,
)
if score:
- step_fn = theano.function([], updates.loss, updates=updates, **fn_kwargs)
+ step_fn = aesara.function([], updates.loss, updates=updates, **fn_kwargs)
else:
- step_fn = theano.function([], None, updates=updates, **fn_kwargs)
+ step_fn = aesara.function([], None, updates=updates, **fn_kwargs)
return step_fn
- @theano.config.change_flags(compute_test_value="off")
+ @aesara.config.change_flags(compute_test_value="off")
def score_function(
self, sc_n_mc=None, more_replacements=None, fn_kwargs=None
): # pragma: no cover
@@ -377,11 +379,11 @@ def score_function(
more_replacements:
Apply custom replacements before compiling a function
fn_kwargs: `dict`
- arbitrary kwargs passed to `theano.function`
+ arbitrary kwargs passed to `aesara.function`
Returns
-------
- theano.function
+ aesara.function
"""
if fn_kwargs is None:
fn_kwargs = {}
@@ -390,9 +392,9 @@ def score_function(
if more_replacements is None:
more_replacements = {}
loss = self(sc_n_mc, more_replacements=more_replacements)
- return theano.function([], loss, **fn_kwargs)
+ return aesara.function([], loss, **fn_kwargs)
- @theano.config.change_flags(compute_test_value="off")
+ @aesara.config.change_flags(compute_test_value="off")
def __call__(self, nmc, **kwargs):
if "more_tf_params" in kwargs:
m = -1.0
@@ -504,7 +506,7 @@ def collect_shared_to_list(params):
return list(
t[1]
for t in sorted(params.items(), key=lambda t: t[0])
- if isinstance(t[1], theano.compile.SharedVariable)
+ if isinstance(t[1], aesara.compile.SharedVariable)
)
elif params is None:
return []
@@ -842,7 +844,7 @@ def __init__(
self._vfam = vfam
self._local = local
self._batched = rowwise
- self._rng = tt_rng(random_seed)
+ self._rng = aet_rng(random_seed)
model = modelcontext(model)
self.model = model
self.group = group
@@ -895,7 +897,7 @@ def _check_user_params(self, **kwargs):
shape = (-1,) + shape
elif self.batched:
shape = (self.bdim,) + shape
- self._user_params[name] = tt.as_tensor(param).reshape(shape)
+ self._user_params[name] = aet.as_tensor(param).reshape(shape)
return True
def _initial_type(self, name):
@@ -910,9 +912,9 @@ def _initial_type(self, name):
tensor
"""
if self.batched:
- return tt.tensor3(name)
+ return aet.tensor3(name)
else:
- return tt.matrix(name)
+ return aet.matrix(name)
def _input_type(self, name):
R"""*Dev* - input type with given name. The correct type depends on `self.batched`
@@ -926,11 +928,11 @@ def _input_type(self, name):
tensor
"""
if self.batched:
- return tt.matrix(name)
+ return aet.matrix(name)
else:
- return tt.vector(name)
+ return aet.vector(name)
- @theano.config.change_flags(compute_test_value="off")
+ @aesara.config.change_flags(compute_test_value="off")
def __init_group__(self, group):
if not group:
raise GroupError("Got empty group")
@@ -1020,11 +1022,11 @@ def _new_initial_shape(self, size, dim, more_replacements=None):
shape vector
"""
if self.batched:
- bdim = tt.as_tensor(self.bdim)
- bdim = theano.clone(bdim, more_replacements)
- return tt.stack([size, bdim, dim])
+ bdim = aet.as_tensor(self.bdim)
+ bdim = aesara.clone_replace(bdim, more_replacements)
+ return aet.stack([size, bdim, dim])
else:
- return tt.stack([size, dim])
+ return aet.stack([size, dim])
@node_property
def bdim(self):
@@ -1071,22 +1073,22 @@ def _new_initial(self, size, deterministic, more_replacements=None):
"""
if size is None:
size = 1
- if not isinstance(deterministic, tt.Variable):
+ if not isinstance(deterministic, Variable):
deterministic = np.int8(deterministic)
dim, dist_name, dist_map = (self.ddim, self.initial_dist_name, self.initial_dist_map)
dtype = self.symbolic_initial.dtype
- dim = tt.as_tensor(dim)
- size = tt.as_tensor(size)
+ dim = aet.as_tensor(dim)
+ size = aet.as_tensor(size)
shape = self._new_initial_shape(size, dim, more_replacements)
# apply optimizations if possible
- if not isinstance(deterministic, tt.Variable):
+ if not isinstance(deterministic, Variable):
if deterministic:
- return tt.ones(shape, dtype) * dist_map
+ return aet.ones(shape, dtype) * dist_map
else:
return getattr(self._rng, dist_name)(size=shape)
else:
sample = getattr(self._rng, dist_name)(size=shape)
- initial = tt.switch(deterministic, tt.ones(shape, dtype) * dist_map, sample)
+ initial = aet.switch(deterministic, aet.ones(shape, dtype) * dist_map, sample)
return initial
@node_property
@@ -1111,7 +1113,7 @@ def symbolic_random2d(self):
else:
return self.symbolic_random
- @theano.config.change_flags(compute_test_value="off")
+ @aesara.config.change_flags(compute_test_value="off")
def set_size_and_deterministic(self, node, s, d, more_replacements=None):
"""*Dev* - after node is sampled via :func:`symbolic_sample_over_posterior` or
:func:`symbolic_single_sample` new random generator can be allocated and applied to node
@@ -1119,7 +1121,7 @@ def set_size_and_deterministic(self, node, s, d, more_replacements=None):
Parameters
----------
node: :class:`Variable`
- Theano node with symbolically applied VI replacements
+ Aesara node with symbolically applied VI replacements
s: scalar
desired number of samples
d: bool or int
@@ -1132,13 +1134,13 @@ def set_size_and_deterministic(self, node, s, d, more_replacements=None):
:class:`Variable` with applied replacements, ready to use
"""
flat2rand = self.make_size_and_deterministic_replacements(s, d, more_replacements)
- node_out = theano.clone(node, flat2rand)
+ node_out = aesara.clone_replace(node, flat2rand)
try_to_set_test_value(node, node_out, s)
return node_out
def to_flat_input(self, node):
"""*Dev* - replace vars with flattened view stored in `self.inputs`"""
- return theano.clone(node, self.replacements)
+ return aesara.clone_replace(node, self.replacements)
def symbolic_sample_over_posterior(self, node):
"""*Dev* - performs sampling of node applying independent samples from posterior each time.
@@ -1146,12 +1148,12 @@ def symbolic_sample_over_posterior(self, node):
"""
node = self.to_flat_input(node)
random = self.symbolic_random.astype(self.symbolic_initial.dtype)
- random = tt.patternbroadcast(random, self.symbolic_initial.broadcastable)
+ random = aet.patternbroadcast(random, self.symbolic_initial.broadcastable)
def sample(post):
- return theano.clone(node, {self.input: post})
+ return aesara.clone_replace(node, {self.input: post})
- nodes, _ = theano.scan(sample, random)
+ nodes, _ = aesara.scan(sample, random)
return nodes
def symbolic_single_sample(self, node):
@@ -1161,8 +1163,8 @@ def symbolic_single_sample(self, node):
"""
node = self.to_flat_input(node)
random = self.symbolic_random.astype(self.symbolic_initial.dtype)
- random = tt.patternbroadcast(random, self.symbolic_initial.broadcastable)
- return theano.clone(node, {self.input: random[0]})
+ random = aet.patternbroadcast(random, self.symbolic_initial.broadcastable)
+ return aesara.clone_replace(node, {self.input: random[0]})
def make_size_and_deterministic_replacements(self, s, d, more_replacements=None):
"""*Dev* - creates correct replacements for initial depending on
@@ -1182,15 +1184,15 @@ def make_size_and_deterministic_replacements(self, s, d, more_replacements=None)
dict with replacements for initial
"""
initial = self._new_initial(s, d, more_replacements)
- initial = tt.patternbroadcast(initial, self.symbolic_initial.broadcastable)
+ initial = aet.patternbroadcast(initial, self.symbolic_initial.broadcastable)
if more_replacements:
- initial = theano.clone(initial, more_replacements)
+ initial = aesara.clone_replace(initial, more_replacements)
return {self.symbolic_initial: initial}
@node_property
def symbolic_normalizing_constant(self):
"""*Dev* - normalizing constant for `self.logq`, scales it to `minibatch_size` instead of `total_size`"""
- t = self.to_flat_input(tt.max([v.scaling for v in self.group]))
+ t = self.to_flat_input(aet.max([v.scaling for v in self.group]))
t = self.symbolic_single_sample(t)
return pm.floatX(t)
@@ -1282,7 +1284,7 @@ class Approximation(WithMemoization):
"""
def __init__(self, groups, model=None):
- self._scale_cost_to_minibatch = theano.shared(np.int8(1))
+ self._scale_cost_to_minibatch = aesara.shared(np.int8(1))
model = modelcontext(model)
if not model.free_RVs:
raise TypeError("Model does not have FreeRVs")
@@ -1341,22 +1343,22 @@ def symbolic_normalizing_constant(self):
"""*Dev* - normalizing constant for `self.logq`, scales it to `minibatch_size` instead of `total_size`.
Here the effect is controlled by `self.scale_cost_to_minibatch`
"""
- t = tt.max(
+ t = aet.max(
self.collect("symbolic_normalizing_constant")
+ [var.scaling for var in self.model.observed_RVs]
)
- t = tt.switch(self._scale_cost_to_minibatch, t, tt.constant(1, dtype=t.dtype))
+ t = aet.switch(self._scale_cost_to_minibatch, t, aet.constant(1, dtype=t.dtype))
return pm.floatX(t)
@node_property
def symbolic_logq(self):
"""*Dev* - collects `symbolic_logq` for all groups"""
- return tt.add(*self.collect("symbolic_logq"))
+ return aet.add(*self.collect("symbolic_logq"))
@node_property
def logq(self):
"""*Dev* - collects `logQ` for all groups"""
- return tt.add(*self.collect("logq"))
+ return aet.add(*self.collect("logq"))
@node_property
def logq_norm(self):
@@ -1365,7 +1367,7 @@ def logq_norm(self):
@node_property
def _sized_symbolic_varlogp_and_datalogp(self):
- """*Dev* - computes sampled prior term from model via `theano.scan`"""
+ """*Dev* - computes sampled prior term from model via `aesara.scan`"""
varlogp_s, datalogp_s = self.symbolic_sample_over_posterior(
[self.model.varlogpt, self.model.datalogpt]
)
@@ -1373,55 +1375,55 @@ def _sized_symbolic_varlogp_and_datalogp(self):
@node_property
def sized_symbolic_varlogp(self):
- """*Dev* - computes sampled prior term from model via `theano.scan`"""
+ """*Dev* - computes sampled prior term from model via `aesara.scan`"""
return self._sized_symbolic_varlogp_and_datalogp[0] # shape (s,)
@node_property
def sized_symbolic_datalogp(self):
- """*Dev* - computes sampled data term from model via `theano.scan`"""
+ """*Dev* - computes sampled data term from model via `aesara.scan`"""
return self._sized_symbolic_varlogp_and_datalogp[1] # shape (s,)
@node_property
def sized_symbolic_logp(self):
- """*Dev* - computes sampled logP from model via `theano.scan`"""
+ """*Dev* - computes sampled logP from model via `aesara.scan`"""
return self.sized_symbolic_varlogp + self.sized_symbolic_datalogp # shape (s,)
@node_property
def logp(self):
- """*Dev* - computes :math:`E_{q}(logP)` from model via `theano.scan` that can be optimized later"""
+ """*Dev* - computes :math:`E_{q}(logP)` from model via `aesara.scan` that can be optimized later"""
return self.varlogp + self.datalogp
@node_property
def varlogp(self):
- """*Dev* - computes :math:`E_{q}(prior term)` from model via `theano.scan` that can be optimized later"""
+ """*Dev* - computes :math:`E_{q}(prior term)` from model via `aesara.scan` that can be optimized later"""
return self.sized_symbolic_varlogp.mean(0)
@node_property
def datalogp(self):
- """*Dev* - computes :math:`E_{q}(data term)` from model via `theano.scan` that can be optimized later"""
+ """*Dev* - computes :math:`E_{q}(data term)` from model via `aesara.scan` that can be optimized later"""
return self.sized_symbolic_datalogp.mean(0)
@node_property
def _single_symbolic_varlogp_and_datalogp(self):
- """*Dev* - computes sampled prior term from model via `theano.scan`"""
+ """*Dev* - computes sampled prior term from model via `aesara.scan`"""
varlogp, datalogp = self.symbolic_single_sample([self.model.varlogpt, self.model.datalogpt])
return varlogp, datalogp
@node_property
def single_symbolic_varlogp(self):
- """*Dev* - for single MC sample estimate of :math:`E_{q}(prior term)` `theano.scan`
+ """*Dev* - for single MC sample estimate of :math:`E_{q}(prior term)` `aesara.scan`
is not needed and code can be optimized"""
return self._single_symbolic_varlogp_and_datalogp[0]
@node_property
def single_symbolic_datalogp(self):
- """*Dev* - for single MC sample estimate of :math:`E_{q}(data term)` `theano.scan`
+ """*Dev* - for single MC sample estimate of :math:`E_{q}(data term)` `aesara.scan`
is not needed and code can be optimized"""
return self._single_symbolic_varlogp_and_datalogp[1]
@node_property
def single_symbolic_logp(self):
- """*Dev* - for single MC sample estimate of :math:`E_{q}(logP)` `theano.scan`
+ """*Dev* - for single MC sample estimate of :math:`E_{q}(logP)` `aesara.scan`
is not needed and code can be optimized"""
return self.single_symbolic_datalogp + self.single_symbolic_varlogp
@@ -1472,7 +1474,7 @@ def make_size_and_deterministic_replacements(self, s, d, more_replacements=None)
flat2rand.update(more_replacements)
return flat2rand
- @theano.config.change_flags(compute_test_value="off")
+ @aesara.config.change_flags(compute_test_value="off")
def set_size_and_deterministic(self, node, s, d, more_replacements=None):
"""*Dev* - after node is sampled via :func:`symbolic_sample_over_posterior` or
:func:`symbolic_single_sample` new random generator can be allocated and applied to node
@@ -1480,7 +1482,7 @@ def set_size_and_deterministic(self, node, s, d, more_replacements=None):
Parameters
----------
node: :class:`Variable`
- Theano node with symbolically applied VI replacements
+ Aesara node with symbolically applied VI replacements
s: scalar
desired number of samples
d: bool or int
@@ -1495,14 +1497,14 @@ def set_size_and_deterministic(self, node, s, d, more_replacements=None):
_node = node
optimizations = self.get_optimization_replacements(s, d)
flat2rand = self.make_size_and_deterministic_replacements(s, d, more_replacements)
- node = theano.clone(node, optimizations)
- node = theano.clone(node, flat2rand)
+ node = aesara.clone_replace(node, optimizations)
+ node = aesara.clone_replace(node, flat2rand)
try_to_set_test_value(_node, node, s)
return node
def to_flat_input(self, node):
"""*Dev* - replace vars with flattened view stored in `self.inputs`"""
- return theano.clone(node, self.replacements)
+ return aesara.clone_replace(node, self.replacements)
def symbolic_sample_over_posterior(self, node):
"""*Dev* - performs sampling of node applying independent samples from posterior each time.
@@ -1511,9 +1513,9 @@ def symbolic_sample_over_posterior(self, node):
node = self.to_flat_input(node)
def sample(*post):
- return theano.clone(node, dict(zip(self.inputs, post)))
+ return aesara.clone_replace(node, dict(zip(self.inputs, post)))
- nodes, _ = theano.scan(sample, self.symbolic_randoms)
+ nodes, _ = aesara.scan(sample, self.symbolic_randoms)
return nodes
def symbolic_single_sample(self, node):
@@ -1524,11 +1526,11 @@ def symbolic_single_sample(self, node):
node = self.to_flat_input(node)
post = [v[0] for v in self.symbolic_randoms]
inp = self.inputs
- return theano.clone(node, dict(zip(inp, post)))
+ return aesara.clone_replace(node, dict(zip(inp, post)))
def get_optimization_replacements(self, s, d):
"""*Dev* - optimizations for logP. If sample size is static and equal to 1:
- then `theano.scan` MC estimate is replaced with single sample without call to `theano.scan`.
+ then `aesara.scan` MC estimate is replaced with single sample without call to `aesara.scan`.
"""
repl = collections.OrderedDict()
# avoid scan if size is constant and equal to one
@@ -1537,13 +1539,13 @@ def get_optimization_replacements(self, s, d):
repl[self.datalogp] = self.single_symbolic_datalogp
return repl
- @theano.config.change_flags(compute_test_value="off")
+ @aesara.config.change_flags(compute_test_value="off")
def sample_node(self, node, size=None, deterministic=False, more_replacements=None):
"""Samples given node or nodes over shared posterior
Parameters
----------
- node: Theano Variables (or Theano expressions)
+ node: Aesara Variables (or Aesara expressions)
size: None or scalar
number of samples
more_replacements: `dict`
@@ -1557,7 +1559,7 @@ def sample_node(self, node, size=None, deterministic=False, more_replacements=No
sampled node(s) with replacements
"""
node_in = node
- node = theano.clone(node, more_replacements)
+ node = aesara.clone_replace(node, more_replacements)
if size is None:
node_out = self.symbolic_single_sample(node)
else:
@@ -1567,7 +1569,7 @@ def sample_node(self, node, size=None, deterministic=False, more_replacements=No
return node_out
def rslice(self, name):
- """*Dev* - vectorized sampling for named random variable without call to `theano.scan`.
+ """*Dev* - vectorized sampling for named random variable without call to `aesara.scan`.
This node still needs :func:`set_size_and_deterministic` to be evaluated
"""
@@ -1588,13 +1590,13 @@ def vars_names(vs):
@property
@memoize(bound=True)
- @theano.config.change_flags(compute_test_value="off")
+ @aesara.config.change_flags(compute_test_value="off")
def sample_dict_fn(self):
- s = tt.iscalar()
+ s = aet.iscalar()
names = [v.name for v in self.model.free_RVs]
sampled = [self.rslice(name) for name in names]
sampled = self.set_size_and_deterministic(sampled, s, 0)
- sample_fn = theano.function([s], sampled)
+ sample_fn = aesara.function([s], sampled)
def inner(draws=100):
_samples = sample_fn(draws)
@@ -1658,7 +1660,7 @@ def has_batched(self):
@node_property
def symbolic_random(self):
- return tt.concatenate(self.collect("symbolic_random2d"), axis=-1)
+ return aet.concatenate(self.collect("symbolic_random2d"), axis=-1)
def __str__(self):
if len(self.groups) < 5:
@@ -1679,7 +1681,7 @@ def any_histograms(self):
def joint_histogram(self):
if not self.all_histograms:
raise VariationalInferenceError("%s does not consist of all Empirical approximations")
- return tt.concatenate(self.collect("histogram"), axis=-1)
+ return aet.concatenate(self.collect("histogram"), axis=-1)
@property
def params(self):
diff --git a/pymc3/variational/stein.py b/pymc3/variational/stein.py
index ca9a924910..79a7d78183 100644
--- a/pymc3/variational/stein.py
+++ b/pymc3/variational/stein.py
@@ -12,11 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import theano
-import theano.tensor as tt
+import aesara
+import aesara.tensor as aet
+from pymc3.aesaraf import floatX
from pymc3.memoize import WithMemoization, memoize
-from pymc3.theanof import floatX
from pymc3.variational.opvi import node_property
from pymc3.variational.test_functions import rbf
@@ -46,12 +46,12 @@ def approx_symbolic_matrices(self):
@node_property
def dlogp(self):
- grad = tt.grad(self.logp_norm.sum(), self.approx_symbolic_matrices)
+ grad = aet.grad(self.logp_norm.sum(), self.approx_symbolic_matrices)
def flatten2(tensor):
return tensor.flatten(2)
- return tt.concatenate(list(map(flatten2, grad)), -1)
+ return aet.concatenate(list(map(flatten2, grad)), -1)
@node_property
def grad(self):
@@ -64,7 +64,7 @@ def grad(self):
def density_part_grad(self):
Kxy = self.Kxy
dlogpdx = self.dlogp
- return tt.dot(Kxy, dlogpdx)
+ return aet.dot(Kxy, dlogpdx)
@node_property
def repulsive_part_grad(self):
@@ -84,13 +84,13 @@ def dxkxy(self):
def logp_norm(self):
sized_symbolic_logp = self.approx.sized_symbolic_logp
if self.use_histogram:
- sized_symbolic_logp = theano.clone(
+ sized_symbolic_logp = aesara.clone_replace(
sized_symbolic_logp,
dict(zip(self.approx.symbolic_randoms, self.approx.collect("histogram"))),
)
return sized_symbolic_logp / self.approx.symbolic_normalizing_constant
@memoize
- @theano.config.change_flags(compute_test_value="off")
+ @aesara.config.change_flags(compute_test_value="off")
def _kernel(self):
return self._kernel_f(self.input_joint_matrix)
diff --git a/pymc3/variational/test_functions.py b/pymc3/variational/test_functions.py
index 8f95abd4e1..3380ed27b8 100644
--- a/pymc3/variational/test_functions.py
+++ b/pymc3/variational/test_functions.py
@@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from theano import tensor as tt
+from aesara import tensor as aet
-from pymc3.theanof import floatX
+from pymc3.aesaraf import floatX
from pymc3.variational.opvi import TestFunction
__all__ = ["rbf"]
@@ -34,30 +34,30 @@ class Kernel(TestFunction):
class RBF(Kernel):
def __call__(self, X):
XY = X.dot(X.T)
- x2 = tt.sum(X ** 2, axis=1).dimshuffle(0, "x")
- X2e = tt.repeat(x2, X.shape[0], axis=1)
+ x2 = aet.sum(X ** 2, axis=1).dimshuffle(0, "x")
+ X2e = aet.repeat(x2, X.shape[0], axis=1)
H = X2e + X2e.T - 2.0 * XY
- V = tt.sort(H.flatten())
+ V = aet.sort(H.flatten())
length = V.shape[0]
# median distance
- m = tt.switch(
- tt.eq((length % 2), 0),
+ m = aet.switch(
+ aet.eq((length % 2), 0),
# if even vector
- tt.mean(V[((length // 2) - 1) : ((length // 2) + 1)]),
+ aet.mean(V[((length // 2) - 1) : ((length // 2) + 1)]),
# if odd vector
V[length // 2],
)
- h = 0.5 * m / tt.log(floatX(H.shape[0]) + floatX(1))
+ h = 0.5 * m / aet.log(floatX(H.shape[0]) + floatX(1))
# RBF
- Kxy = tt.exp(-H / h / 2.0)
+ Kxy = aet.exp(-H / h / 2.0)
# Derivative
- dxkxy = -tt.dot(Kxy, X)
- sumkxy = tt.sum(Kxy, axis=-1, keepdims=True)
- dxkxy = tt.add(dxkxy, tt.mul(X, sumkxy)) / h
+ dxkxy = -aet.dot(Kxy, X)
+ sumkxy = aet.sum(Kxy, axis=-1, keepdims=True)
+ dxkxy = aet.add(dxkxy, aet.mul(X, sumkxy)) / h
return Kxy, dxkxy
diff --git a/pymc3/variational/updates.py b/pymc3/variational/updates.py
index a2baa462c5..62776f48ad 100755
--- a/pymc3/variational/updates.py
+++ b/pymc3/variational/updates.py
@@ -44,7 +44,7 @@
# SOFTWARE.
"""
-Functions to generate Theano update dictionaries for training.
+Functions to generate Aesara update dictionaries for training.
The update functions implement different methods to control the learning
rate for use with stochastic gradient descent.
@@ -88,21 +88,20 @@
Examples
--------
>>> import lasagne
->>> import theano.tensor as T
->>> import theano
+>>> import aesara
>>> from lasagne.nonlinearities import softmax
>>> from lasagne.layers import InputLayer, DenseLayer, get_output
>>> from lasagne.updates import sgd, apply_momentum
>>> l_in = InputLayer((100, 20))
>>> l1 = DenseLayer(l_in, num_units=3, nonlinearity=softmax)
->>> x = tt.matrix('x') # shp: num_batch x num_features
->>> y = tt.ivector('y') # shp: num_batch
+>>> x = aet.matrix('x') # shp: num_batch x num_features
+>>> y = aet.ivector('y') # shp: num_batch
>>> l_out = get_output(l1, x)
>>> params = lasagne.layers.get_all_params(l1)
->>> loss = tt.mean(tt.nnet.categorical_crossentropy(l_out, y))
+>>> loss = aet.mean(aet.nnet.categorical_crossentropy(l_out, y))
>>> updates_sgd = sgd(loss, params, learning_rate=0.0001)
>>> updates = apply_momentum(updates_sgd, params, momentum=0.9)
->>> train_function = theano.function([x, y], updates=updates)
+>>> train_function = aesara.function([x, y], updates=updates)
Notes
-----
@@ -112,9 +111,9 @@
from collections import OrderedDict
from functools import partial
+import aesara
+import aesara.tensor as aet
import numpy as np
-import theano
-import theano.tensor as tt
import pymc3 as pm
@@ -152,7 +151,7 @@ def get_or_compute_grads(loss_or_grads, params):
gradients and returned as is, unless it does not match the length
of `params`, in which case a `ValueError` is raised.
Otherwise, `loss_or_grads` is assumed to be a cost expression and
- the function returns `theano.grad(loss_or_grads, params)`.
+ the function returns `aesara.grad(loss_or_grads, params)`.
Raises
------
@@ -161,7 +160,7 @@ def get_or_compute_grads(loss_or_grads, params):
any element of `params` is not a shared variable (while we could still
compute its gradient, we can never update it and want to fail early).
"""
- if any(not isinstance(p, theano.compile.SharedVariable) for p in params):
+ if any(not isinstance(p, aesara.compile.SharedVariable) for p in params):
raise ValueError(
"params must contain shared variables only. If it "
"contains arbitrary parameter expressions, then "
@@ -174,7 +173,7 @@ def get_or_compute_grads(loss_or_grads, params):
)
return loss_or_grads
else:
- return theano.grad(loss_or_grads, params)
+ return aesara.grad(loss_or_grads, params)
def _get_call_kwargs(_locals_):
@@ -212,7 +211,7 @@ def sgd(loss_or_grads=None, params=None, learning_rate=1e-3):
Examples
--------
- >>> a = theano.shared(1.)
+ >>> a = aesara.shared(1.)
>>> b = a*2
>>> updates = sgd(b, [a], learning_rate=.01)
>>> isinstance(updates, dict)
@@ -276,7 +275,7 @@ def apply_momentum(updates, params=None, momentum=0.9):
for param in params:
value = param.get_value(borrow=True)
- velocity = theano.shared(
+ velocity = aesara.shared(
np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
)
x = momentum * velocity + updates[param]
@@ -326,7 +325,7 @@ def momentum(loss_or_grads=None, params=None, learning_rate=1e-3, momentum=0.9):
Examples
--------
- >>> a = theano.shared(1.)
+ >>> a = aesara.shared(1.)
>>> b = a*2
>>> updates = momentum(b, [a], learning_rate=.01)
>>> isinstance(updates, dict)
@@ -391,7 +390,7 @@ def apply_nesterov_momentum(updates, params=None, momentum=0.9):
for param in params:
value = param.get_value(borrow=True)
- velocity = theano.shared(
+ velocity = aesara.shared(
np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
)
x = momentum * velocity + updates[param] - param
@@ -446,7 +445,7 @@ def nesterov_momentum(loss_or_grads=None, params=None, learning_rate=1e-3, momen
Examples
--------
- >>> a = theano.shared(1.)
+ >>> a = aesara.shared(1.)
>>> b = a*2
>>> updates = nesterov_momentum(b, [a], learning_rate=.01)
>>> isinstance(updates, dict)
@@ -514,7 +513,7 @@ def adagrad(loss_or_grads=None, params=None, learning_rate=1.0, epsilon=1e-6):
Examples
--------
- >>> a = theano.shared(1.)
+ >>> a = aesara.shared(1.)
>>> b = a*2
>>> updates = adagrad(b, [a], learning_rate=.01)
>>> isinstance(updates, dict)
@@ -535,12 +534,12 @@ def adagrad(loss_or_grads=None, params=None, learning_rate=1.0, epsilon=1e-6):
for param, grad in zip(params, grads):
value = param.get_value(borrow=True)
- accu = theano.shared(
+ accu = aesara.shared(
np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
)
accu_new = accu + grad ** 2
updates[accu] = accu_new
- updates[param] = param - (learning_rate * grad / tt.sqrt(accu_new + epsilon))
+ updates[param] = param - (learning_rate * grad / aet.sqrt(accu_new + epsilon))
return updates
@@ -574,19 +573,19 @@ def adagrad_window(loss_or_grads=None, params=None, learning_rate=0.001, epsilon
grads = get_or_compute_grads(loss_or_grads, params)
updates = OrderedDict()
for param, grad in zip(params, grads):
- i = theano.shared(pm.floatX(0))
+ i = aesara.shared(pm.floatX(0))
i_int = i.astype("int32")
value = param.get_value(borrow=True)
- accu = theano.shared(np.zeros(value.shape + (n_win,), dtype=value.dtype))
+ accu = aesara.shared(np.zeros(value.shape + (n_win,), dtype=value.dtype))
# Append squared gradient vector to accu_new
- accu_new = tt.set_subtensor(accu[..., i_int], grad ** 2)
- i_new = tt.switch((i + 1) < n_win, i + 1, 0)
+ accu_new = aet.set_subtensor(accu[..., i_int], grad ** 2)
+ i_new = aet.switch((i + 1) < n_win, i + 1, 0)
updates[accu] = accu_new
updates[i] = i_new
accu_sum = accu_new.sum(axis=-1)
- updates[param] = param - (learning_rate * grad / tt.sqrt(accu_sum + epsilon))
+ updates[param] = param - (learning_rate * grad / aet.sqrt(accu_sum + epsilon))
return updates
@@ -633,13 +632,13 @@ def rmsprop(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.9, epsilon
References
----------
- .. [1] Tieleman, tt. and Hinton, G. (2012):
+ .. [1] Tieleman, aet. and Hinton, G. (2012):
Neural Networks for Machine Learning, Lecture 6.5 - rmsprop.
Coursera. http://www.youtube.com/watch?v=O3sxAc4hxZU (formula @5:20)
Examples
--------
- >>> a = theano.shared(1.)
+ >>> a = aesara.shared(1.)
>>> b = a*2
>>> updates = rmsprop(b, [a], learning_rate=.01)
>>> isinstance(updates, dict)
@@ -658,17 +657,17 @@ def rmsprop(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.9, epsilon
grads = get_or_compute_grads(loss_or_grads, params)
updates = OrderedDict()
- # Using theano constant to prevent upcasting of float32
- one = tt.constant(1)
+ # Using aesara constant to prevent upcasting of float32
+ one = aet.constant(1)
for param, grad in zip(params, grads):
value = param.get_value(borrow=True)
- accu = theano.shared(
+ accu = aesara.shared(
np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
)
accu_new = rho * accu + (one - rho) * grad ** 2
updates[accu] = accu_new
- updates[param] = param - (learning_rate * grad / tt.sqrt(accu_new + epsilon))
+ updates[param] = param - (learning_rate * grad / aet.sqrt(accu_new + epsilon))
return updates
@@ -731,7 +730,7 @@ def adadelta(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.95, epsil
Examples
--------
- >>> a = theano.shared(1.)
+ >>> a = aesara.shared(1.)
>>> b = a*2
>>> updates = adadelta(b, [a], learning_rate=.01)
>>> isinstance(updates, dict)
@@ -750,17 +749,17 @@ def adadelta(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.95, epsil
grads = get_or_compute_grads(loss_or_grads, params)
updates = OrderedDict()
- # Using theano constant to prevent upcasting of float32
- one = tt.constant(1)
+ # Using aesara constant to prevent upcasting of float32
+ one = aet.constant(1)
for param, grad in zip(params, grads):
value = param.get_value(borrow=True)
# accu: accumulate gradient magnitudes
- accu = theano.shared(
+ accu = aesara.shared(
np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
)
# delta_accu: accumulate update magnitudes (recursively!)
- delta_accu = theano.shared(
+ delta_accu = aesara.shared(
np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
)
@@ -769,7 +768,7 @@ def adadelta(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.95, epsil
updates[accu] = accu_new
# compute parameter update, using the 'old' delta_accu
- update = grad * tt.sqrt(delta_accu + epsilon) / tt.sqrt(accu_new + epsilon)
+ update = grad * aet.sqrt(delta_accu + epsilon) / aet.sqrt(accu_new + epsilon)
updates[param] = param - learning_rate * update
# update delta_accu (as accu, but accumulating updates)
@@ -823,7 +822,7 @@ def adam(
Examples
--------
- >>> a = theano.shared(1.)
+ >>> a = aesara.shared(1.)
>>> b = a*2
>>> updates = adam(b, [a], learning_rate=.01)
>>> isinstance(updates, dict)
@@ -840,27 +839,27 @@ def adam(
elif loss_or_grads is None or params is None:
raise ValueError("Please provide both `loss_or_grads` and `params` to get updates")
all_grads = get_or_compute_grads(loss_or_grads, params)
- t_prev = theano.shared(pm.theanof.floatX(0.0))
+ t_prev = aesara.shared(pm.aesaraf.floatX(0.0))
updates = OrderedDict()
- # Using theano constant to prevent upcasting of float32
- one = tt.constant(1)
+ # Using aesara constant to prevent upcasting of float32
+ one = aet.constant(1)
t = t_prev + 1
- a_t = learning_rate * tt.sqrt(one - beta2 ** t) / (one - beta1 ** t)
+ a_t = learning_rate * aet.sqrt(one - beta2 ** t) / (one - beta1 ** t)
for param, g_t in zip(params, all_grads):
value = param.get_value(borrow=True)
- m_prev = theano.shared(
+ m_prev = aesara.shared(
np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
)
- v_prev = theano.shared(
+ v_prev = aesara.shared(
np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
)
m_t = beta1 * m_prev + (one - beta1) * g_t
v_t = beta2 * v_prev + (one - beta2) * g_t ** 2
- step = a_t * m_t / (tt.sqrt(v_t) + epsilon)
+ step = a_t * m_t / (aet.sqrt(v_t) + epsilon)
updates[m_prev] = m_t
updates[v_prev] = v_t
@@ -911,7 +910,7 @@ def adamax(
Examples
--------
- >>> a = theano.shared(1.)
+ >>> a = aesara.shared(1.)
>>> b = a*2
>>> updates = adamax(b, [a], learning_rate=.01)
>>> isinstance(updates, dict)
@@ -928,26 +927,26 @@ def adamax(
elif loss_or_grads is None or params is None:
raise ValueError("Please provide both `loss_or_grads` and `params` to get updates")
all_grads = get_or_compute_grads(loss_or_grads, params)
- t_prev = theano.shared(pm.theanof.floatX(0.0))
+ t_prev = aesara.shared(pm.aesaraf.floatX(0.0))
updates = OrderedDict()
- # Using theano constant to prevent upcasting of float32
- one = tt.constant(1)
+ # Using aesara constant to prevent upcasting of float32
+ one = aet.constant(1)
t = t_prev + 1
a_t = learning_rate / (one - beta1 ** t)
for param, g_t in zip(params, all_grads):
value = param.get_value(borrow=True)
- m_prev = theano.shared(
+ m_prev = aesara.shared(
np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
)
- u_prev = theano.shared(
+ u_prev = aesara.shared(
np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
)
m_t = beta1 * m_prev + (one - beta1) * g_t
- u_t = tt.maximum(beta2 * u_prev, abs(g_t))
+ u_t = aet.maximum(beta2 * u_prev, abs(g_t))
step = a_t * m_t / (u_t + epsilon)
updates[m_prev] = m_t
@@ -968,7 +967,7 @@ def norm_constraint(tensor_var, max_norm, norm_axes=None, epsilon=1e-7):
Parameters
----------
tensor_var: TensorVariable
- Theano expression for update, gradient, or other quantity.
+ Aesara expression for update, gradient, or other quantity.
max_norm: scalar
This value sets the maximum allowed value of any norm in
`tensor_var`.
@@ -993,11 +992,11 @@ def norm_constraint(tensor_var, max_norm, norm_axes=None, epsilon=1e-7):
Examples
--------
- >>> param = theano.shared(
- ... np.random.randn(100, 200).astype(theano.config.floatX))
+ >>> param = aesara.shared(
+ ... np.random.randn(100, 200).astype(aesara.config.floatX))
>>> update = param + 100
>>> update = norm_constraint(update, 10)
- >>> func = theano.function([], [], updates=[(param, update)])
+ >>> func = aesara.function([], [], updates=[(param, update)])
>>> # Apply constrained update
>>> _ = func()
>>> from lasagne.utils import compute_norms
@@ -1028,9 +1027,9 @@ def norm_constraint(tensor_var, max_norm, norm_axes=None, epsilon=1e-7):
"Unsupported tensor dimensionality {}." "Must specify `norm_axes`".format(ndim)
)
- dtype = np.dtype(theano.config.floatX).type
- norms = tt.sqrt(tt.sum(tt.sqr(tensor_var), axis=sum_over, keepdims=True))
- target_norms = tt.clip(norms, 0, dtype(max_norm))
+ dtype = np.dtype(aesara.config.floatX).type
+ norms = aet.sqrt(aet.sum(aet.sqr(tensor_var), axis=sum_over, keepdims=True))
+ target_norms = aet.clip(norms, 0, dtype(max_norm))
constrained_output = tensor_var * (target_norms / (dtype(epsilon) + norms))
return constrained_output
@@ -1061,7 +1060,7 @@ def total_norm_constraint(tensor_vars, max_norm, epsilon=1e-7, return_norm=False
-------
tensor_vars_scaled: list of TensorVariables
The scaled tensor variables.
- norm: Theano scalar
+ norm: Aesara scalar
The combined norms of the input variables prior to rescaling,
only returned if ``return_norms=True``.
@@ -1070,14 +1069,14 @@ def total_norm_constraint(tensor_vars, max_norm, epsilon=1e-7, return_norm=False
>>> from lasagne.layers import InputLayer, DenseLayer
>>> import lasagne
>>> from lasagne.updates import sgd, total_norm_constraint
- >>> x = tt.matrix()
- >>> y = tt.ivector()
+ >>> x = aet.matrix()
+ >>> y = aet.ivector()
>>> l_in = InputLayer((5, 10))
- >>> l1 = DenseLayer(l_in, num_units=7, nonlinearity=tt.nnet.softmax)
+ >>> l1 = DenseLayer(l_in, num_units=7, nonlinearity=aet.nnet.softmax)
>>> output = lasagne.layers.get_output(l1, x)
- >>> cost = tt.mean(tt.nnet.categorical_crossentropy(output, y))
+ >>> cost = aet.mean(aet.nnet.categorical_crossentropy(output, y))
>>> all_params = lasagne.layers.get_all_params(l1)
- >>> all_grads = tt.grad(cost, all_params)
+ >>> all_grads = aet.grad(cost, all_params)
>>> scaled_grads = total_norm_constraint(all_grads, 5)
>>> updates = sgd(scaled_grads, all_params, learning_rate=0.1)
@@ -1091,9 +1090,9 @@ def total_norm_constraint(tensor_vars, max_norm, epsilon=1e-7, return_norm=False
learning with neural networks. In Advances in Neural Information
Processing Systems (pp. 3104-3112).
"""
- norm = tt.sqrt(sum(tt.sum(tensor ** 2) for tensor in tensor_vars))
- dtype = np.dtype(theano.config.floatX).type
- target_norm = tt.clip(norm, 0, dtype(max_norm))
+ norm = aet.sqrt(sum(aet.sum(tensor ** 2) for tensor in tensor_vars))
+ dtype = np.dtype(aesara.config.floatX).type
+ target_norm = aet.clip(norm, 0, dtype(max_norm))
multiplier = target_norm / (dtype(epsilon) + norm)
tensor_vars_scaled = [step * multiplier for step in tensor_vars]
diff --git a/pymc3/vartypes.py b/pymc3/vartypes.py
index 2469036f31..8cb61333ab 100644
--- a/pymc3/vartypes.py
+++ b/pymc3/vartypes.py
@@ -12,9 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from theano.graph.basic import Constant as graph_constant
-from theano.tensor import Constant as tensor_constant
-
__all__ = [
"bool_types",
"int_types",
@@ -24,7 +21,6 @@
"discrete_types",
"typefilter",
"isgenerator",
- "theano_constant",
]
bool_types = {"int8"}
@@ -45,6 +41,3 @@ def typefilter(vars, types):
def isgenerator(obj):
return hasattr(obj, "__next__")
-
-
-theano_constant = (tensor_constant, graph_constant)
diff --git a/requirements.txt b/requirements.txt
index 93cb80ebc1..9ec84e7538 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,9 @@
-arviz>=0.11.0
+aesara>=2.0.1
+arviz>=0.11.1
dill
fastprogress>=0.2.0
numpy>=1.15.0
pandas>=0.24.0
patsy>=0.5.1
scipy>=1.2.0
-theano-pymc==1.1.2
typing-extensions>=3.7.4
diff --git a/scripts/test.sh b/scripts/test.sh
index f9ae8111f7..9045f8df50 100755
--- a/scripts/test.sh
+++ b/scripts/test.sh
@@ -3,4 +3,4 @@
set -e
_FLOATX=${FLOATX:=float64}
-THEANO_FLAGS="floatX=${_FLOATX},gcc__cxxflags='-march=core2'" pytest -v --cov=pymc3 --cov-report=xml "$@" --cov-report term
+AESARA_FLAGS="floatX=${_FLOATX},gcc__cxxflags='-march=core2'" pytest -v --cov=pymc3 --cov-report=xml "$@" --cov-report term
diff --git a/setup.py b/setup.py
index 9b8091ba1b..c7ccef4c07 100755
--- a/setup.py
+++ b/setup.py
@@ -20,7 +20,7 @@
from setuptools import find_packages, setup
DISTNAME = "pymc3"
-DESCRIPTION = "Probabilistic Programming in Python: Bayesian Modeling and Probabilistic Machine Learning with Theano"
+DESCRIPTION = "Probabilistic Programming in Python: Bayesian Modeling and Probabilistic Machine Learning with Aesara"
AUTHOR = "PyMC Developers"
AUTHOR_EMAIL = "pymc.devs@gmail.com"
URL = "http://github.com/pymc-devs/pymc3"