diff --git a/pymc3/aesaraf.py b/pymc3/aesaraf.py index 54319005df..c894786fe1 100644 --- a/pymc3/aesaraf.py +++ b/pymc3/aesaraf.py @@ -11,17 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import ( - Callable, - Dict, - Generator, - Iterable, - List, - Optional, - Set, - Tuple, - Union, -) +from typing import Dict, List import aesara import aesara.tensor as at @@ -30,29 +20,16 @@ from aesara import config, scalar from aesara.gradient import grad -from aesara.graph.basic import ( - Apply, - Constant, - Variable, - clone_get_equiv, - graph_inputs, - walk, -) -from aesara.graph.fg import FunctionGraph -from aesara.graph.op import Op, compute_test_value +from aesara.graph.basic import Apply, Constant, graph_inputs +from aesara.graph.op import Op from aesara.sandbox.rng_mrg import MRG_RandomStream as RandomStream from aesara.tensor.elemwise import Elemwise -from aesara.tensor.random.op import RandomVariable from aesara.tensor.sharedvar import SharedVariable from aesara.tensor.subtensor import AdvancedIncSubtensor, AdvancedIncSubtensor1 from aesara.tensor.var import TensorVariable -from pymc3.vartypes import continuous_types, int_types, isgenerator, typefilter - -PotentialShapeType = Union[ - int, np.ndarray, Tuple[Union[int, Variable], ...], List[Union[int, Variable]], Variable -] - +from pymc3.data import GeneratorAdapter +from pymc3.vartypes import continuous_types, int_types, typefilter __all__ = [ "gradient", @@ -75,124 +52,6 @@ ] -def pandas_to_array(data): - """Convert a pandas object to a NumPy array. - - XXX: When `data` is a generator, this will return a Aesara tensor! - - """ - if hasattr(data, "to_numpy") and hasattr(data, "isnull"): - # typically, but not limited to pandas objects - vals = data.to_numpy() - mask = data.isnull().to_numpy() - if mask.any(): - # there are missing values - ret = np.ma.MaskedArray(vals, mask) - else: - ret = vals - elif isinstance(data, np.ndarray): - if isinstance(data, np.ma.MaskedArray): - if not data.mask.any(): - # empty mask - ret = data.filled() - else: - # already masked and rightly so - ret = data - else: - # already a ndarray, but not masked - mask = np.isnan(data) - if np.any(mask): - ret = np.ma.MaskedArray(data, mask) - else: - # no masking required - ret = data - elif isinstance(data, Variable): - ret = data - elif sps.issparse(data): - ret = data - elif isgenerator(data): - ret = generator(data) - else: - ret = np.asarray(data) - - # type handling to enable index variables when data is int: - if hasattr(data, "dtype"): - if "int" in str(data.dtype): - return intX(ret) - # otherwise, assume float: - else: - return floatX(ret) - # needed for uses of this function other than with pm.Data: - else: - return floatX(ret) - - -def change_rv_size( - rv_var: TensorVariable, - new_size: PotentialShapeType, - expand: Optional[bool] = False, -) -> TensorVariable: - """Change or expand the size of a `RandomVariable`. - - Parameters - ========== - rv_var - The `RandomVariable` output. - new_size - The new size. - expand: - Whether or not to completely replace the `size` parameter in `rv_var` - with `new_size` or simply prepend it to the existing `size`. - - """ - rv_node = rv_var.owner - rng, size, dtype, *dist_params = rv_node.inputs - name = rv_var.name - tag = rv_var.tag - - if expand: - new_size = tuple(np.atleast_1d(new_size)) + tuple(size) - - new_rv_node = rv_node.op.make_node(rng, new_size, dtype, *dist_params) - rv_var = new_rv_node.outputs[-1] - rv_var.name = name - for k, v in tag.__dict__.items(): - rv_var.tag.__dict__.setdefault(k, v) - - if config.compute_test_value != "off": - compute_test_value(new_rv_node) - - return rv_var - - -def extract_rv_and_value_vars( - var: TensorVariable, -) -> Tuple[TensorVariable, TensorVariable]: - """Extract a random variable and its corresponding value variable from a generic - `TensorVariable`. - - Parameters - ========== - var - A variable corresponding to a `RandomVariable`. - - Returns - ======= - The first value in the tuple is the `RandomVariable`, and the second is the - measure-space variable that corresponds with the latter (i.e. the "value" - variable). - - """ - if not var.owner: - return None, None - - if isinstance(var.owner.op, RandomVariable): - rv_value = getattr(var.tag, "observations", getattr(var.tag, "value_var", None)) - return var, rv_value - - return None, None - - def extract_obs_data(x: TensorVariable) -> np.ndarray: """Extract data observed symbolic variables. @@ -215,137 +74,6 @@ def extract_obs_data(x: TensorVariable) -> np.ndarray: raise TypeError(f"Data cannot be extracted from {x}") -def walk_model( - graphs: Iterable[TensorVariable], - walk_past_rvs: bool = False, - stop_at_vars: Optional[Set[TensorVariable]] = None, - expand_fn: Callable[[TensorVariable], Iterable[TensorVariable]] = lambda var: [], -) -> Generator[TensorVariable, None, None]: - """Walk model graphs and yield their nodes. - - By default, these walks will not go past ``RandomVariable`` nodes. - - Parameters - ========== - graphs - The graphs to walk. - walk_past_rvs - If ``True``, the walk will not terminate at ``RandomVariable``s. - stop_at_vars - A list of variables at which the walk will terminate. - expand_fn - A function that returns the next variable(s) to be traversed. - """ - if stop_at_vars is None: - stop_at_vars = set() - - def expand(var): - new_vars = expand_fn(var) - - if ( - var.owner - and (walk_past_rvs or not isinstance(var.owner.op, RandomVariable)) - and (var not in stop_at_vars) - ): - new_vars.extend(reversed(var.owner.inputs)) - - return new_vars - - yield from walk(graphs, expand, False) - - -def replace_rvs_in_graphs( - graphs: Iterable[TensorVariable], - replacement_fn: Callable[[TensorVariable], Dict[TensorVariable, TensorVariable]], - initial_replacements: Optional[Dict[TensorVariable, TensorVariable]] = None, - **kwargs, -) -> Tuple[TensorVariable, Dict[TensorVariable, TensorVariable]]: - """Replace random variables in graphs - - This will *not* recompute test values. - - Parameters - ========== - graphs - The graphs in which random variables are to be replaced. - - Returns - ======= - Tuple containing the transformed graphs and a ``dict`` of the replacements - that were made. - """ - replacements = {} - if initial_replacements: - replacements.update(initial_replacements) - - def expand_replace(var): - new_nodes = [] - if var.owner and isinstance(var.owner.op, RandomVariable): - new_nodes.extend(replacement_fn(var, replacements)) - return new_nodes - - for var in walk_model(graphs, expand_fn=expand_replace, **kwargs): - pass - - if replacements: - inputs = [i for i in graph_inputs(graphs) if not isinstance(i, Constant)] - equiv = {k: k for k in replacements.keys()} - equiv = clone_get_equiv(inputs, graphs, False, False, equiv) - - fg = FunctionGraph( - [equiv[i] for i in inputs], - [equiv[o] for o in graphs], - clone=False, - ) - - fg.replace_all(replacements.items(), import_missing=True) - - graphs = list(fg.outputs) - - return graphs, replacements - - -def rvs_to_value_vars( - graphs: Iterable[TensorVariable], - apply_transforms: bool = False, - initial_replacements: Optional[Dict[TensorVariable, TensorVariable]] = None, - **kwargs, -) -> Tuple[TensorVariable, Dict[TensorVariable, TensorVariable]]: - """Replace random variables in graphs with their value variables. - - This will *not* recompute test values in the resulting graphs. - - Parameters - ========== - graphs - The graphs in which to perform the replacements. - apply_transforms - If ``True``, apply each value variable's transform. - initial_replacements - A ``dict`` containing the initial replacements to be made. - - """ - - def transform_replacements(var, replacements): - rv_var, rv_value_var = extract_rv_and_value_vars(var) - - if rv_value_var is None: - return [] - - transform = getattr(rv_value_var.tag, "transform", None) - - if transform is None or not apply_transforms: - replacements[var] = rv_value_var - return [] - - trans_rv_value = transform.backward(rv_var, rv_value_var) - replacements[var] = trans_rv_value - - return [trans_rv_value] - - return replace_rvs_in_graphs(graphs, transform_replacements, initial_replacements, **kwargs) - - def inputvars(a): """ Get the inputs into a aesara variables diff --git a/pymc3/blocking.py b/pymc3/blocking.py index 16bb59ec3e..332edceed8 100644 --- a/pymc3/blocking.py +++ b/pymc3/blocking.py @@ -41,11 +41,7 @@ class DictToArrayBijection: def map(var_dict: Dict[str, np.ndarray]) -> RaveledVars: """Map a dictionary of names and variables to a concatenated 1D array space.""" vars_info = tuple((v, k, v.shape, v.dtype) for k, v in var_dict.items()) - raveled_vars = [v[0].ravel() for v in vars_info] - if raveled_vars: - res = np.concatenate(raveled_vars) - else: - res = np.array([]) + res = np.concatenate([v[0].ravel() for v in vars_info]) return RaveledVars(res, tuple(v[1:] for v in vars_info)) @staticmethod diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py index d92dad0cfe..8961badcd0 100644 --- a/pymc3/distributions/__init__.py +++ b/pymc3/distributions/__init__.py @@ -12,14 +12,435 @@ # See the License for the specific language governing permissions and # limitations under the License. -from pymc3.distributions.logp import ( # isort:skip - _logcdf, - _logp, - logcdf, - logp_transform, - logpt, - logpt_sum, -) +from functools import singledispatch +from typing import Callable, Dict, Generator, Iterable, List, Optional, Tuple, Union + +import aesara.tensor as aet +import numpy as np + +from aesara import config +from aesara.graph.basic import Variable, clone_replace, graph_inputs, io_toposort, walk +from aesara.graph.op import Op, compute_test_value +from aesara.tensor.random.op import RandomVariable +from aesara.tensor.subtensor import AdvancedSubtensor, AdvancedSubtensor1, Subtensor +from aesara.tensor.var import TensorVariable + +from pymc3.aesaraf import floatX + +PotentialShapeType = Union[ + int, np.ndarray, Tuple[Union[int, Variable], ...], List[Union[int, Variable]], Variable +] + + +@singledispatch +def logp_transform(op: Op): + return None + + +def _get_scaling(total_size, shape, ndim): + """ + Gets scaling constant for logp + + Parameters + ---------- + total_size: int or list[int] + shape: shape + shape to scale + ndim: int + ndim hint + + Returns + ------- + scalar + """ + if total_size is None: + coef = floatX(1) + elif isinstance(total_size, int): + if ndim >= 1: + denom = shape[0] + else: + denom = 1 + coef = floatX(total_size) / floatX(denom) + elif isinstance(total_size, (list, tuple)): + if not all(isinstance(i, int) for i in total_size if (i is not Ellipsis and i is not None)): + raise TypeError( + "Unrecognized `total_size` type, expected " + "int or list of ints, got %r" % total_size + ) + if Ellipsis in total_size: + sep = total_size.index(Ellipsis) + begin = total_size[:sep] + end = total_size[sep + 1 :] + if Ellipsis in end: + raise ValueError( + "Double Ellipsis in `total_size` is restricted, got %r" % total_size + ) + else: + begin = total_size + end = [] + if (len(begin) + len(end)) > ndim: + raise ValueError( + "Length of `total_size` is too big, " + "number of scalings is bigger that ndim, got %r" % total_size + ) + elif (len(begin) + len(end)) == 0: + return floatX(1) + if len(end) > 0: + shp_end = shape[-len(end) :] + else: + shp_end = np.asarray([]) + shp_begin = shape[: len(begin)] + begin_coef = [floatX(t) / shp_begin[i] for i, t in enumerate(begin) if t is not None] + end_coef = [floatX(t) / shp_end[i] for i, t in enumerate(end) if t is not None] + coefs = begin_coef + end_coef + coef = aet.prod(coefs) + else: + raise TypeError( + "Unrecognized `total_size` type, expected int or list of ints, got %r" % total_size + ) + return aet.as_tensor(floatX(coef)) + + +def change_rv_size( + rv_var: TensorVariable, + new_size: PotentialShapeType, + expand: Optional[bool] = False, +) -> TensorVariable: + """Change or expand the size of a `RandomVariable`. + + Parameters + ========== + rv_var + The `RandomVariable` output. + new_size + The new size. + expand: + Whether or not to completely replace the `size` parameter in `rv_var` + with `new_size` or simply prepend it to the existing `size`. + + """ + rv_node = rv_var.owner + rng, size, dtype, *dist_params = rv_node.inputs + name = rv_var.name + tag = rv_var.tag + + if expand: + new_size = tuple(np.atleast_1d(new_size)) + tuple(size) + + new_rv_node = rv_node.op.make_node(rng, new_size, dtype, *dist_params) + rv_var = new_rv_node.outputs[-1] + rv_var.name = name + for k, v in tag.__dict__.items(): + rv_var.tag.__dict__.setdefault(k, v) + + if config.compute_test_value != "off": + compute_test_value(new_rv_node) + + return rv_var + + +def extract_rv_and_value_vars( + var: TensorVariable, +) -> Tuple[TensorVariable, TensorVariable]: + """Extract a random variable and its corresponding value variable from a generic + `TensorVariable`. + + Parameters + ========== + var + A variable corresponding to a `RandomVariable`. + + Returns + ======= + The first value in the tuple is the `RandomVariable`, and the second is the + measure-space variable that corresponds with the latter (i.e. the "value" + variable). + + """ + if not var.owner: + return None, None + + if isinstance(var.owner.op, RandomVariable): + rv_value = getattr(var.tag, "observations", getattr(var.tag, "value_var", None)) + return var, rv_value + + return None, None + + +def rv_ancestors( + graphs: Iterable[TensorVariable], walk_past_rvs: bool = False +) -> Generator[TensorVariable, None, None]: + """Yield everything except the inputs of ``RandomVariable``s. + + Parameters + ========== + graphs + The graphs to walk. + walk_past_rvs + If ``True``, do descend into ``RandomVariable``s. + """ + + def expand(var): + if var.owner and (walk_past_rvs or not isinstance(var.owner.op, RandomVariable)): + return reversed(var.owner.inputs) + + yield from walk(graphs, expand, False) + + +def replace_rvs_in_graphs( + graphs: Iterable[TensorVariable], + replacement_fn: Callable[[TensorVariable], Dict[TensorVariable, TensorVariable]], + initial_replacements: Optional[Dict[TensorVariable, TensorVariable]] = None, +) -> Tuple[TensorVariable, Dict[TensorVariable, TensorVariable]]: + """Replace random variables in graphs + + This will *not* recompute test values. + + Parameters + ========== + graphs + The graphs in which random variables are to be replaced. + + Returns + ======= + Tuple containing the transformed graphs and a ``dict`` of the replacements + that were made. + """ + replacements = {} + if initial_replacements: + replacements.update(initial_replacements) + + for var in rv_ancestors(graphs): + if var.owner and isinstance(var.owner.op, RandomVariable): + replacement_fn(var, replacements) + + if replacements: + graphs = clone_replace(graphs, replacements) + + return graphs, replacements + + +def rvs_to_value_vars( + graphs: Iterable[TensorVariable], initial_replacements: Dict[TensorVariable, TensorVariable] +) -> Tuple[Iterable[TensorVariable], Dict[TensorVariable, TensorVariable]]: + """Replace random variables in graphs with their value variables. + + This will *not* recompute test values. + """ + + def value_var_replacements(var, replacements): + rv_var, rv_value_var = extract_rv_and_value_vars(var) + + if rv_value_var is not None: + replacements[var] = rv_value_var + + return replace_rvs_in_graphs(graphs, value_var_replacements, initial_replacements) + + +def apply_transforms( + graphs: Iterable[TensorVariable], +) -> Tuple[TensorVariable, Dict[TensorVariable, TensorVariable]]: + """Apply the transforms associated with each random variable in `graphs`. + + This will *not* recompute test values. + """ + + def transform_replacements(var, replacements): + rv_var, rv_value_var = extract_rv_and_value_vars(var) + + if rv_value_var is None: + return + + transform = getattr(rv_value_var.tag, "transform", None) + + if transform is None: + return + + trans_rv_value = transform.backward(rv_var, rv_value_var) + replacements[var] = trans_rv_value + + return replace_rvs_in_graphs(graphs, transform_replacements) + + +def logpt( + rv_var: TensorVariable, + rv_value: Optional[TensorVariable] = None, + *, + jacobian: bool = True, + scaling: bool = True, + transformed: bool = True, + cdf: bool = False, + sum: bool = False, + **kwargs, +) -> TensorVariable: + """Create a measure-space (i.e. log-likelihood) graph for a random variable at a given point. + + The input `rv_var` determines which log-likelihood graph is used and + `rv_value` is that graph's input parameter. For example, if `rv_var` is + the output of a `NormalRV` `Op`, then the output is + ``normal_log_pdf(rv_value)``. + + Parameters + ========== + rv_var + The `RandomVariable` output that determines the log-likelihood graph. + rv_value + The variable that represents the value of `rv_var` in its + log-likelihood. If no value is provided, `rv_var.tag.value_var` will + be checked and, when available, used. + jacobian + Whether or not to include the Jacobian term. + scaling + A scaling term to apply to the generated log-likelihood graph. + transformed + Apply transforms. + cdf + Return the log cumulative distribution. + sum + Sum the log-likelihood. + + """ + + rv_var, rv_value_var = extract_rv_and_value_vars(rv_var) + + if rv_value is None: + + if rv_value_var is None: + raise ValueError(f"No value variable specified or associated with {rv_var}") + + rv_value = rv_value_var + else: + rv_value = aet.as_tensor(rv_value) + + # Make sure that the value is compatible with the random variable + rv_value = rv_var.type.filter_variable(rv_value.astype(rv_var.dtype)) + + if rv_value_var is None: + rv_value_var = rv_value + + rv_node = rv_var.owner + + if not rv_node: + return aet.zeros_like(rv_var) + + if not isinstance(rv_node.op, RandomVariable): + return _logp(rv_node.op, rv_value, rv_node.inputs) + + rng, size, dtype, *dist_params = rv_node.inputs + + # Here, we plug the actual random variable into the log-likelihood graph, + # because we want a log-likelihood graph that only contains + # random variables. This is important, because a random variable's + # parameters can contain random variables themselves. + # Ultimately, with a graph containing only random variables and + # "deterministics", we can simply replace all the random variables with + # their value variables and be done. + if not cdf: + logp_var = _logp(rv_node.op, rv_var, *dist_params, **kwargs) + else: + logp_var = _logcdf(rv_node.op, rv_var, *dist_params, **kwargs) + + transform = getattr(rv_value_var.tag, "transform", None) if rv_value_var else None + + if transform and transformed and not cdf: + (logp_var,), _ = apply_transforms((logp_var,)) + + if jacobian: + transformed_jacobian = transform.jacobian_det(rv_var, rv_value) + if transformed_jacobian: + if logp_var.ndim > transformed_jacobian.ndim: + logp_var = logp_var.sum(axis=-1) + logp_var += transformed_jacobian + + # Replace random variables with their value variables + (logp_var,), replaced = rvs_to_value_vars((logp_var,), {rv_var: rv_value}) + + if rv_value_var != rv_value: + (logp_var,) = clone_replace((logp_var,), replace={rv_value_var: rv_value}) + + if sum: + logp_var = aet.sum(logp_var) + + if scaling: + logp_var *= _get_scaling( + getattr(rv_var.tag, "total_size", None), rv_value.shape, rv_value.ndim + ) + + # Recompute test values for the changes introduced by the replacements + # above. + if config.compute_test_value != "off": + for node in io_toposort(graph_inputs((logp_var,)), (logp_var,)): + compute_test_value(node) + + if rv_var.name is not None: + logp_var.name = "__logp_%s" % rv_var.name + + return logp_var + + +@singledispatch +def _logp(op: Op, value: TensorVariable, *dist_params, **kwargs): + """Create a log-likelihood graph. + + This function dispatches on the type of `op`, which should be a subclass + of `RandomVariable`. If you want to implement new log-likelihood graphs + for a `RandomVariable`, register a new function on this dispatcher. + + """ + return aet.zeros_like(value) + + +@_logp.register(Subtensor) +@_logp.register(AdvancedSubtensor) +@_logp.register(AdvancedSubtensor1) +def subtensor_logp(op, value, *inputs, **kwargs): + + # TODO: Compute the log-likelihood for a subtensor/index operation. + raise NotImplementedError() + + # "Flatten" and sum an array of indexed RVs' log-likelihoods + # rv_var, missing_values = + # + # missing_values = missing_values.data + # logp_var = aet.sum( + # [ + # logpt( + # rv_var, + # ) + # for idx, missing in zip( + # np.ndindex(missing_values.shape), missing_values.flatten() + # ) + # if missing + # ] + # ) + # return logp_var + + +def logcdf(*args, **kwargs): + """Create a log-CDF graph.""" + return logpt(*args, cdf=True, **kwargs) + + +@singledispatch +def _logcdf(op, value, *args, **kwargs): + """Create a log-CDF graph. + + This function dispatches on the type of `op`, which should be a subclass + of `RandomVariable`. If you want to implement new log-CDF graphs + for a `RandomVariable`, register a new function on this dispatcher. + + """ + raise NotImplementedError() + + +def logpt_sum(*args, **kwargs): + """Return the sum of the logp values for the given observations. + + Subclasses can use this to improve the speed of logp evaluations + if only the sum of the logp values is needed. + """ + return logpt(*args, sum=True, **kwargs) + from pymc3.distributions.bart import BART from pymc3.distributions.bound import Bound diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index f4efa97a07..6083198731 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -118,8 +118,8 @@ def unit_cont_transform(op): def bounded_cont_transform(op): def transform_params(rv_var): _, _, _, lower, upper = rv_var.owner.inputs - lower = at.as_tensor_variable(lower) if lower is not None else None - upper = at.as_tensor_variable(upper) if upper is not None else None + lower = aet.as_tensor_variable(lower) if lower is not None else None + upper = aet.as_tensor_variable(upper) if upper is not None else None return lower, upper return transforms.interval(transform_params) @@ -128,7 +128,7 @@ def transform_params(rv_var): def assert_negative_support(var, label, distname, value=-1e-6): msg = f"The variable specified for {label} has negative support for {distname}, " msg += "likely making it unsuitable for this parameter." - return Assert(msg)(var, at.all(at.ge(var, 0.0))) + return Assert(msg)(var, aet.all(aet.ge(var, 0.0))) def get_tau_sigma(tau=None, sigma=None): @@ -219,8 +219,8 @@ class Uniform(BoundedContinuous): @classmethod def dist(cls, lower=0, upper=1, **kwargs): - lower = at.as_tensor_variable(floatX(lower)) - upper = at.as_tensor_variable(floatX(upper)) + lower = aet.as_tensor_variable(floatX(lower)) + upper = aet.as_tensor_variable(floatX(upper)) # mean = (upper + lower) / 2.0 # median = self.mean return super().dist([lower, upper], **kwargs) @@ -238,7 +238,7 @@ def logp(value, lower, upper): ------- TensorVariable """ - return bound(-at.log(upper - lower), value >= lower, value <= upper) + return bound(-aet.log(upper - lower), value >= lower, value <= upper) def logcdf(value, lower, upper): """ @@ -255,12 +255,12 @@ def logcdf(value, lower, upper): ------- TensorVariable """ - return at.switch( - at.lt(value, lower) | at.lt(upper, lower), + return aet.switch( + aet.lt(value, lower) | aet.lt(upper, lower), -np.inf, - at.switch( - at.lt(value, upper), - at.log(value - lower) - at.log(upper - lower), + aet.switch( + aet.lt(value, upper), + aet.log(value - lower) - aet.log(upper - lower), 0, ), ) @@ -451,11 +451,11 @@ def dist(cls, mu=0, sigma=None, tau=None, sd=None, no_assert=False, **kwargs): if sd is not None: sigma = sd tau, sigma = get_tau_sigma(tau=tau, sigma=sigma) - sigma = at.as_tensor_variable(sigma) + sigma = aet.as_tensor_variable(sigma) # sd = sigma - # tau = at.as_tensor_variable(tau) - # mean = median = mode = mu = at.as_tensor_variable(floatX(mu)) + # tau = aet.as_tensor_variable(tau) + # mean = median = mode = mu = aet.as_tensor_variable(floatX(mu)) # variance = 1.0 / self.tau if not no_assert: @@ -479,7 +479,7 @@ def logp(value, mu, sigma): """ tau, sigma = get_tau_sigma(tau=None, sigma=sigma) - return bound((-tau * (value - mu) ** 2 + at.log(tau / np.pi / 2.0)) / 2.0, sigma > 0) + return bound((-tau * (value - mu) ** 2 + aet.log(tau / np.pi / 2.0)) / 2.0, sigma > 0) def logcdf(value, mu, sigma): """ @@ -811,7 +811,7 @@ def logp(value, loc, sigma): tau, sigma = get_tau_sigma(tau=None, sigma=sigma) return bound( - -0.5 * tau * (value - loc) ** 2 + 0.5 * at.log(tau * 2.0 / np.pi), + -0.5 * tau * (value - loc) ** 2 + 0.5 * aet.log(tau * 2.0 / np.pi), value >= loc, tau > 0, sigma > 0, @@ -834,7 +834,7 @@ def logcdf(value, loc, sigma): """ z = zvalue(value, mu=loc, sigma=sigma) return bound( - at.log1p(-at.erfc(z / at.sqrt(2.0))), + aet.log1p(-aet.erfc(z / aet.sqrt(2.0))), loc <= value, 0 < sigma, ) @@ -842,6 +842,9 @@ def logcdf(value, loc, sigma): def _distr_parameters_for_repr(self): return ["sigma"] + def _distr_parameters_for_repr(self): + return ["sigma"] + class Wald(PositiveContinuous): r""" @@ -1146,8 +1149,8 @@ def dist(cls, alpha=None, beta=None, mu=None, sigma=None, sd=None, *args, **kwar sigma = sd alpha, beta = cls.get_alpha_beta(alpha, beta, mu, sigma) - alpha = at.as_tensor_variable(floatX(alpha)) - beta = at.as_tensor_variable(floatX(beta)) + alpha = aet.as_tensor_variable(floatX(alpha)) + beta = aet.as_tensor_variable(floatX(beta)) assert_negative_support(alpha, "alpha", "Beta") assert_negative_support(beta, "beta", "Beta") @@ -1188,11 +1191,11 @@ def logp(value, alpha, beta): TensorVariable """ - logval = at.log(value) - log1pval = at.log1p(-value) + logval = aet.log(value) + log1pval = aet.log1p(-value) logp = ( - at.switch(at.eq(alpha, 1), 0, (alpha - 1) * logval) - + at.switch(at.eq(beta, 1), 0, (beta - 1) * log1pval) + aet.switch(aet.eq(alpha, 1), 0, (alpha - 1) * logval) + + aet.switch(aet.eq(beta, 1), 0, (beta - 1) * log1pval) - betaln(alpha, beta) ) @@ -1219,9 +1222,9 @@ def logcdf(value, alpha, beta): ) return bound( - at.switch( - at.lt(value, 1), - at.log(incomplete_beta(alpha, beta, value)), + aet.switch( + aet.lt(value, 1), + aet.log(incomplete_beta(alpha, beta, value)), 0, ), 0 <= value, @@ -1376,10 +1379,10 @@ class Exponential(PositiveContinuous): @classmethod def dist(cls, lam, *args, **kwargs): - lam = at.as_tensor_variable(floatX(lam)) + lam = aet.as_tensor_variable(floatX(lam)) # mean = 1.0 / lam - # median = mean * at.log(2) - # mode = at.zeros_like(lam) + # median = mean * aet.log(2) + # mode = aet.zeros_like(lam) # variance = lam ** -2 @@ -1400,7 +1403,7 @@ def logp(value, lam): ------- TensorVariable """ - return bound(at.log(lam) - lam * value, value >= 0, lam > 0) + return bound(aet.log(lam) - lam * value, value >= 0, lam > 0) def logcdf(value, lam): r""" @@ -2167,8 +2170,8 @@ class Cauchy(Continuous): @classmethod def dist(cls, alpha, beta, *args, **kwargs): - alpha = at.as_tensor_variable(floatX(alpha)) - beta = at.as_tensor_variable(floatX(beta)) + alpha = aet.as_tensor_variable(floatX(alpha)) + beta = aet.as_tensor_variable(floatX(beta)) # median = alpha # mode = alpha @@ -2191,7 +2194,7 @@ def logp(value, alpha, beta): TensorVariable """ return bound( - -at.log(np.pi) - at.log(beta) - at.log1p(((value - alpha) / beta) ** 2), beta > 0 + -aet.log(np.pi) - aet.log(beta) - aet.log1p(((value - alpha) / beta) ** 2), beta > 0 ) def logcdf(value, alpha, beta): @@ -2210,7 +2213,7 @@ def logcdf(value, alpha, beta): TensorVariable """ return bound( - at.log(0.5 + at.arctan((value - alpha) / beta) / np.pi), + aet.log(0.5 + aet.arctan((value - alpha) / beta) / np.pi), 0 < beta, ) @@ -2257,7 +2260,7 @@ class HalfCauchy(PositiveContinuous): @classmethod def dist(cls, beta, *args, **kwargs): - beta = at.as_tensor_variable(floatX(beta)) + beta = aet.as_tensor_variable(floatX(beta)) assert_negative_support(beta, "beta", "HalfCauchy") return super().dist([0.0, beta], **kwargs) @@ -2276,7 +2279,7 @@ def logp(value, loc, beta): TensorVariable """ return bound( - at.log(2) - at.log(np.pi) - at.log(beta) - at.log1p(((value - loc) / beta) ** 2), + aet.log(2) - aet.log(np.pi) - aet.log(beta) - aet.log1p(((value - loc) / beta) ** 2), value >= loc, beta > 0, ) @@ -2297,7 +2300,7 @@ def logcdf(value, loc, beta): TensorVariable """ return bound( - at.log(2 * at.arctan((value - loc) / beta) / np.pi), + aet.log(2 * aet.arctan((value - loc) / beta) / np.pi), loc <= value, 0 < beta, ) @@ -2369,17 +2372,17 @@ def dist(cls, alpha=None, beta=None, mu=None, sigma=None, sd=None, no_assert=Fal sigma = sd alpha, beta = cls.get_alpha_beta(alpha, beta, mu, sigma) - alpha = at.as_tensor_variable(floatX(alpha)) - beta = at.as_tensor_variable(floatX(beta)) + alpha = aet.as_tensor_variable(floatX(alpha)) + beta = aet.as_tensor_variable(floatX(beta)) # mean = alpha / beta - # mode = at.maximum((alpha - 1) / beta, 0) + # mode = aet.maximum((alpha - 1) / beta, 0) # variance = alpha / beta ** 2 if not no_assert: assert_negative_support(alpha, "alpha", "Gamma") assert_negative_support(beta, "beta", "Gamma") - return super().dist([alpha, at.inv(beta)], **kwargs) + return super().dist([alpha, aet.inv(beta)], **kwargs) @classmethod def get_alpha_beta(cls, alpha=None, beta=None, mu=None, sigma=None): @@ -2437,12 +2440,12 @@ def logcdf(value, alpha, beta): TensorVariable """ # Avoid C-assertion when the gammainc function is called with invalid values (#4340) - safe_alpha = at.switch(at.lt(alpha, 0), 0, alpha) - safe_beta = at.switch(at.lt(beta, 0), 0, beta) - safe_value = at.switch(at.lt(value, 0), 0, value) + safe_alpha = aet.switch(aet.lt(alpha, 0), 0, alpha) + safe_beta = aet.switch(aet.lt(beta, 0), 0, beta) + safe_value = aet.switch(aet.lt(value, 0), 0, value) return bound( - at.log(at.gammainc(safe_alpha, safe_beta * safe_value)), + aet.log(aet.gammainc(safe_alpha, safe_beta * safe_value)), 0 <= value, 0 < alpha, 0 < beta, @@ -2505,8 +2508,8 @@ def dist(cls, alpha=None, beta=None, mu=None, sigma=None, sd=None, *args, **kwar sigma = sd alpha, beta = cls._get_alpha_beta(alpha, beta, mu, sigma) - alpha = at.as_tensor_variable(floatX(alpha)) - beta = at.as_tensor_variable(floatX(beta)) + alpha = aet.as_tensor_variable(floatX(alpha)) + beta = aet.as_tensor_variable(floatX(beta)) # m = beta / (alpha - 1.0) # try: @@ -2516,8 +2519,8 @@ def dist(cls, alpha=None, beta=None, mu=None, sigma=None, sd=None, *args, **kwar # mean = m # mode = beta / (alpha + 1.0) - # variance = at.switch( - # at.gt(alpha, 2), (beta ** 2) / ((alpha - 2) * (alpha - 1.0) ** 2), np.inf + # variance = aet.switch( + # aet.gt(alpha, 2), (beta ** 2) / ((alpha - 2) * (alpha - 1.0) ** 2), np.inf # ) assert_negative_support(alpha, "alpha", "InverseGamma") @@ -2585,12 +2588,12 @@ def logcdf(value, alpha, beta): TensorVariable """ # Avoid C-assertion when the gammaincc function is called with invalid values (#4340) - safe_alpha = at.switch(at.lt(alpha, 0), 0, alpha) - safe_beta = at.switch(at.lt(beta, 0), 0, beta) - safe_value = at.switch(at.lt(value, 0), 0, value) + safe_alpha = aet.switch(aet.lt(alpha, 0), 0, alpha) + safe_beta = aet.switch(aet.lt(beta, 0), 0, beta) + safe_value = aet.switch(aet.lt(value, 0), 0, value) return bound( - at.log(at.gammaincc(safe_alpha, safe_beta / safe_value)), + aet.log(aet.gammaincc(safe_alpha, safe_beta / safe_value)), 0 <= value, 0 < alpha, 0 < beta, diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 31cf813a5d..4f6b3046a9 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -13,7 +13,7 @@ # limitations under the License. import warnings -import aesara.tensor as at +import aesara.tensor as aet import numpy as np from aesara.tensor.random.basic import bernoulli, binomial, categorical, nbinom, poisson @@ -100,9 +100,9 @@ class Binomial(Discrete): @classmethod def dist(cls, n, p, *args, **kwargs): - n = at.as_tensor_variable(intX(n)) - p = at.as_tensor_variable(floatX(p)) - # mode = at.cast(tround(n * p), self.dtype) + n = aet.as_tensor_variable(intX(n)) + p = aet.as_tensor_variable(floatX(p)) + # mode = aet.cast(tround(n * p), self.dtype) return super().dist([n, p], **kwargs) def logp(value, n, p): @@ -147,12 +147,12 @@ def logcdf(value, n, p): f"Binomial.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." ) - value = at.floor(value) + value = aet.floor(value) return bound( - at.switch( - at.lt(value, n), - at.log(incomplete_beta(n - value, value + 1, 1 - p)), + aet.switch( + aet.lt(value, n), + aet.log(incomplete_beta(n - value, value + 1, 1 - p)), 0, ), 0 <= value, @@ -373,8 +373,8 @@ class Bernoulli(Discrete): @classmethod def dist(cls, p=None, logit_p=None, *args, **kwargs): - p = at.as_tensor_variable(floatX(p)) - # mode = at.cast(tround(p), "int8") + p = aet.as_tensor_variable(floatX(p)) + # mode = aet.cast(tround(p), "int8") return super().dist([p], **kwargs) def logp(value, p): @@ -392,11 +392,11 @@ def logp(value, p): TensorVariable """ # if self._is_logit: - # lp = at.switch(value, self._logit_p, -self._logit_p) + # lp = aet.switch(value, self._logit_p, -self._logit_p) # return -log1pexp(-lp) # else: return bound( - at.switch(value, at.log(p), at.log(1 - p)), + aet.switch(value, aet.log(p), aet.log(1 - p)), value >= 0, value <= 1, p >= 0, @@ -616,8 +616,8 @@ class Poisson(Discrete): @classmethod def dist(cls, mu, *args, **kwargs): - mu = at.as_tensor_variable(floatX(mu)) - # mode = intX(at.floor(mu)) + mu = aet.as_tensor_variable(floatX(mu)) + # mode = intX(aet.floor(mu)) return super().dist([mu], *args, **kwargs) def logp(value, mu): @@ -653,7 +653,7 @@ def logcdf(value, mu): ------- TensorVariable """ - value = at.floor(value) + value = aet.floor(value) # Avoid C-assertion when the gammaincc function is called with invalid values (#4340) safe_mu = at.switch(at.lt(mu, 0), 0, mu) safe_value = at.switch(at.lt(value, 0), 0, value) @@ -731,16 +731,16 @@ def NegBinom(a, m, x): @classmethod def dist(cls, mu=None, alpha=None, p=None, n=None, *args, **kwargs): - n, p = cls.get_mu_alpha(mu, alpha, p, n) - n = at.as_tensor_variable(floatX(n)) - p = at.as_tensor_variable(floatX(p)) + n, p = cls.get_n_p(mu, alpha, p, n) + n = aet.as_tensor_variable(floatX(n)) + p = aet.as_tensor_variable(floatX(p)) return super().dist([n, p], *args, **kwargs) @classmethod - def get_mu_alpha(cls, mu=None, alpha=None, p=None, n=None): + def get_n_p(cls, mu=None, alpha=None, p=None, n=None): if n is None: if alpha is not None: - n = at.as_tensor_variable(floatX(alpha)) + n = alpha else: raise ValueError("Incompatible parametrization. Must specify either alpha or n.") elif alpha is not None: @@ -748,7 +748,7 @@ def get_mu_alpha(cls, mu=None, alpha=None, p=None, n=None): if p is None: if mu is not None: - mu = at.as_tensor_variable(floatX(mu)) + mu = mu p = n / (mu + n) else: raise ValueError("Incompatible parametrization. Must specify either mu or p.") @@ -783,7 +783,7 @@ def logp(value, n, p): ) # Return Poisson when alpha gets very large. - return at.switch(at.gt(alpha, 1e10), Poisson.logp(value, mu), negbinom) + return aet.switch(aet.gt(alpha, 1e10), Poisson.logp(value, mu), negbinom) def logcdf(value, n, p): """ @@ -806,7 +806,7 @@ def logcdf(value, n, p): ) return bound( - at.log(incomplete_beta(n, at.floor(value) + 1, p)), + aet.log(incomplete_beta(n, aet.floor(value) + 1, p)), 0 <= value, 0 < n, 0 <= p, @@ -1238,11 +1238,11 @@ class Categorical(Discrete): @classmethod def dist(cls, p, **kwargs): - p = at.as_tensor_variable(floatX(p)) + p = aet.as_tensor_variable(floatX(p)) - # mode = at.argmax(p, axis=-1) + # mode = aet.argmax(p, axis=-1) # if mode.ndim == 1: - # mode = at.squeeze(mode) + # mode = aet.squeeze(mode) return super().dist([p], **kwargs) @@ -1257,28 +1257,28 @@ def logp(value, p): values are desired the values must be provided in a numpy array or `TensorVariable` """ - k = at.shape(p)[-1] + k = aet.shape(p)[-1] p_ = p - p = p_ / at.sum(p_, axis=-1, keepdims=True) - value_clip = at.clip(value, 0, k - 1) + p = p_ / aet.sum(p_, axis=-1, keepdims=True) + value_clip = aet.clip(value, 0, k - 1) if p.ndim > 1: if p.ndim > value_clip.ndim: - value_clip = at.shape_padleft(value_clip, p_.ndim - value_clip.ndim) + value_clip = aet.shape_padleft(value_clip, p_.ndim - value_clip.ndim) elif p.ndim < value_clip.ndim: - p = at.shape_padleft(p, value_clip.ndim - p_.ndim) + p = aet.shape_padleft(p, value_clip.ndim - p_.ndim) pattern = (p.ndim - 1,) + tuple(range(p.ndim - 1)) - a = at.log( + a = aet.log( take_along_axis( p.dimshuffle(pattern), value_clip, ) ) else: - a = at.log(p[value_clip]) + a = aet.log(p[value_clip]) return bound( - a, value >= 0, value <= (k - 1), at.all(p_ >= 0, axis=-1), at.all(p <= 1, axis=-1) + a, value >= 0, value <= (k - 1), aet.all(p_ >= 0, axis=-1), aet.all(p <= 1, axis=-1) ) diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py index 3626e1b80f..6fe00849a5 100644 --- a/pymc3/distributions/dist_math.py +++ b/pymc3/distributions/dist_math.py @@ -546,8 +546,8 @@ def _step(i, t, s, a, b, value): (t, s), _ = scan( _step, - sequences=[at.arange(2, 302)], - outputs_info=[e for e in at.cast((t, s), "float64")], + sequences=[aet.arange(2, 302)], + outputs_info=[e for e in aet.cast((t, s), "float64")], non_sequences=[a, b, value], ) diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py index e3f5893718..f1192374ed 100644 --- a/pymc3/distributions/distribution.py +++ b/pymc3/distributions/distribution.py @@ -33,9 +33,14 @@ import aesara import aesara.graph.basic -import aesara.tensor as at +import aesara.tensor as aet +import numpy as np -from pymc3.util import UNSET, get_repr_for_variable +from aesara.compile.sharedvalue import SharedVariable +from aesara.graph.basic import Constant +from aesara.tensor.var import TensorVariable + +from pymc3.util import get_repr_for_variable from pymc3.vartypes import string_types __all__ = [ @@ -60,6 +65,8 @@ class _Unpickling: class DistributionMeta(ABCMeta): def __new__(cls, name, bases, clsdict): + new_cls = super().__new__(cls, name, bases, clsdict) + # Forcefully deprecate old v3 `Distribution`s if "random" in clsdict: @@ -87,8 +94,6 @@ def _random(*args, **kwargs): rv_type = type(rv_op) - new_cls = super().__new__(cls, name, bases, clsdict) - if rv_type is not None: # Create dispatch functions @@ -96,17 +101,15 @@ def _random(*args, **kwargs): if class_logp: @_logp.register(rv_type) - def logp(op, var, rvs_to_values, *dist_params, **kwargs): - value_var = rvs_to_values.get(var, var) - return class_logp(value_var, *dist_params, **kwargs) + def logp(op, value, *dist_params, **kwargs): + return class_logp(value, *dist_params, **kwargs) class_logcdf = clsdict.get("logcdf") if class_logcdf: @_logcdf.register(rv_type) - def logcdf(op, var, rvs_to_values, *dist_params, **kwargs): - value_var = rvs_to_values.get(var, var) - return class_logcdf(value_var, *dist_params, **kwargs) + def logcdf(op, value, *dist_params, **kwargs): + return class_logcdf(value, *dist_params, **kwargs) # class_transform = clsdict.get("transform") # if class_transform: @@ -158,7 +161,7 @@ def __new__(cls, name, *args, **kwargs): if "shape" in kwargs: raise DeprecationWarning("The `shape` keyword is deprecated; use `size`.") - transform = kwargs.pop("transform", UNSET) + transform = kwargs.pop("transform", None) rv_out = cls.dist(*args, rng=rng, **kwargs) @@ -176,6 +179,36 @@ def dist(cls, dist_params, **kwargs): return rv_var + def default(self): + return np.asarray(self.get_test_val(self.testval, self.defaults), self.dtype) + + def get_test_val(self, val, defaults): + if val is None: + for v in defaults: + if hasattr(self, v): + attr_val = self.getattr_value(v) + if np.all(np.isfinite(attr_val)): + return attr_val + raise AttributeError( + "%s has no finite default value to use, " + "checked: %s. Pass testval argument or " + "adjust so value is finite." % (self, str(defaults)) + ) + else: + return self.getattr_value(val) + + @classmethod + def dist(cls, dist_params, **kwargs): + + testval = kwargs.pop("testval", None) + + rv_var = cls.rv_op(*dist_params, **kwargs) + + if testval is not None: + rv_var.tag.test_value = testval + + return rv_var + def _distr_parameters_for_repr(self): """Return the names of the parameters for this distribution (e.g. "mu" and "sigma" for Normal). Used in generating string (and LaTeX etc.) diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index 4eb6b01817..501636e6de 100644 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -77,11 +77,11 @@ def quaddist_matrix(cov=None, chol=None, tau=None, lower=True, *args, **kwargs): raise ValueError("Incompatible parameterization. Specify exactly one of tau, cov, or chol.") if cov is not None: - cov = at.as_tensor_variable(cov) + cov = aet.as_tensor_variable(cov) if cov.ndim != 2: raise ValueError("cov must be two dimensional.") elif tau is not None: - tau = at.as_tensor_variable(tau) + tau = aet.as_tensor_variable(tau) if tau.ndim != 2: raise ValueError("tau must be two dimensional.") # TODO: What's the correct order/approach (in the non-square case)? @@ -89,7 +89,7 @@ def quaddist_matrix(cov=None, chol=None, tau=None, lower=True, *args, **kwargs): cov = matrix_inverse(tau) else: # TODO: What's the correct order/approach (in the non-square case)? - chol = at.as_tensor_variable(chol) + chol = aet.as_tensor_variable(chol) if chol.ndim != 2: raise ValueError("chol must be two dimensional.") cov = chol.dot(chol.T) @@ -126,30 +126,30 @@ def quaddist_parse(value, mu, cov, mat_type="cov"): def quaddist_chol(delta, chol_mat): - diag = at.diag(chol_mat) + diag = aet.diag(chol_mat) # Check if the covariance matrix is positive definite. - ok = at.all(diag > 0) + ok = aet.all(diag > 0) # If not, replace the diagonal. We return -inf later, but # need to prevent solve_lower from throwing an exception. - chol_cov = at.switch(ok, chol_mat, 1) + chol_cov = aet.switch(ok, chol_mat, 1) delta_trans = solve_lower(chol_cov, delta.T).T quaddist = (delta_trans ** 2).sum(axis=-1) - logdet = at.sum(at.log(diag)) + logdet = aet.sum(aet.log(diag)) return quaddist, logdet, ok def quaddist_tau(delta, chol_mat): - diag = at.nlinalg.diag(chol_mat) + diag = aet.nlinalg.diag(chol_mat) # Check if the precision matrix is positive definite. - ok = at.all(diag > 0) + ok = aet.all(diag > 0) # If not, replace the diagonal. We return -inf later, but # need to prevent solve_lower from throwing an exception. - chol_tau = at.switch(ok, chol_mat, 1) + chol_tau = aet.switch(ok, chol_mat, 1) - delta_trans = at.dot(delta, chol_tau) + delta_trans = aet.dot(delta, chol_tau) quaddist = (delta_trans ** 2).sum(axis=-1) - logdet = -at.sum(at.log(diag)) + logdet = -aet.sum(aet.log(diag)) return quaddist, logdet, ok @@ -221,7 +221,7 @@ class MvNormal(Continuous): @classmethod def dist(cls, mu, cov=None, tau=None, chol=None, lower=True, **kwargs): - mu = at.as_tensor_variable(mu) + mu = aet.as_tensor_variable(mu) cov = quaddist_matrix(cov, chol, tau, lower) return super().dist([mu, cov], **kwargs) @@ -354,7 +354,7 @@ def logp(value, nu, cov): k = floatX(value.shape[-1]) norm = gammaln((nu + k) / 2.0) - gammaln(nu / 2.0) - 0.5 * k * floatX(np.log(nu * np.pi)) - inner = -(nu + k) / 2.0 * at.log1p(quaddist / nu) + inner = -(nu + k) / 2.0 * aet.log1p(quaddist / nu) return bound(norm + inner - logdet, ok) def _distr_parameters_for_repr(self): @@ -394,9 +394,9 @@ def __new__(cls, name, *args, **kwargs): @classmethod def dist(cls, a, **kwargs): - a = at.as_tensor_variable(a) - # mean = a / at.sum(a) - # mode = at.switch(at.all(a > 1), (a - 1) / at.sum(a - 1), np.nan) + a = aet.as_tensor_variable(a) + # mean = a / aet.sum(a) + # mode = aet.switch(aet.all(a > 1), (a - 1) / aet.sum(a - 1), np.nan) return super().dist([a], **kwargs) @@ -416,10 +416,10 @@ def logp(value, a): """ # only defined for sum(value) == 1 return bound( - at.sum(logpow(value, a - 1) - gammaln(a), axis=-1) + gammaln(at.sum(a, axis=-1)), - at.all(value >= 0), - at.all(value <= 1), - at.all(a > 0), + aet.sum(logpow(value, a - 1) - gammaln(a), axis=-1) + gammaln(aet.sum(a, axis=-1)), + aet.all(value >= 0), + aet.all(value <= 1), + aet.all(a > 0), broadcast_conditions=False, ) @@ -451,6 +451,30 @@ def rng_fn(cls, rng, n, p, size): multinomial = MultinomialRV() +class MultinomialRV(MultinomialRV): + """Aesara's `MultinomialRV` doesn't broadcast; this one does.""" + + @classmethod + def rng_fn(cls, rng, n, p, size): + if n.ndim > 0 or p.ndim > 1: + n, p = broadcast_params([n, p], cls.ndims_params) + size = tuple(size or ()) + + if size: + n = np.broadcast_to(n, size + n.shape) + p = np.broadcast_to(p, size + p.shape) + + res = np.empty(p.shape) + for idx in np.ndindex(p.shape[:-1]): + res[idx] = rng.multinomial(n[idx], p[idx]) + return res + else: + return rng.multinomial(n, p, size=size) + + +multinomial = MultinomialRV() + + class Multinomial(Discrete): R""" Multinomial log-likelihood. @@ -488,15 +512,15 @@ class Multinomial(Discrete): @classmethod def dist(cls, n, p, *args, **kwargs): - p = p / at.sum(p, axis=-1, keepdims=True) - n = at.as_tensor_variable(n) - p = at.as_tensor_variable(p) + p = p / aet.sum(p, axis=-1, keepdims=True) + n = aet.as_tensor_variable(n) + p = aet.as_tensor_variable(p) # mean = n * p - # mode = at.cast(at.round(mean), "int32") - # diff = n - at.sum(mode, axis=-1, keepdims=True) - # inc_bool_arr = at.abs_(diff) > 0 - # mode = at.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()]) + # mode = aet.cast(aet.round(mean), "int32") + # diff = n - aet.sum(mode, axis=-1, keepdims=True) + # inc_bool_arr = aet.abs_(diff) > 0 + # mode = aet.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()]) return super().dist([n, p], *args, **kwargs) def logp(value, n, p): @@ -514,12 +538,12 @@ def logp(value, n, p): TensorVariable """ return bound( - factln(n) + at.sum(-factln(value) + logpow(p, value), axis=-1), - at.all(value >= 0), - at.all(at.eq(at.sum(value, axis=-1), n)), - at.all(p <= 1), - at.all(at.eq(at.sum(p, axis=-1), 1)), - at.all(at.ge(n, 0)), + factln(n) + aet.sum(-factln(value) + logpow(p, value), axis=-1), + aet.all(value >= 0), + aet.all(aet.eq(aet.sum(value, axis=-1), n)), + aet.all(p <= 1), + aet.all(aet.eq(aet.sum(p, axis=-1), 1)), + aet.all(aet.ge(n, 0)), broadcast_conditions=False, ) @@ -917,9 +941,9 @@ def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testv # L * A * A.T * L.T ~ Wishart(L*L.T, nu) if return_cholesky: - return pm.Deterministic(name, at.dot(L, A)) + return pm.Deterministic(name, aet.dot(L, A)) else: - return pm.Deterministic(name, at.dot(at.dot(at.dot(L, A), A.T), L.T)) + return pm.Deterministic(name, aet.dot(aet.dot(aet.dot(L, A), A.T), L.T)) def _lkj_normalizing_constant(eta, n): diff --git a/pymc3/distributions/timeseries.py b/pymc3/distributions/timeseries.py index 0c72550387..3213fbe1fb 100644 --- a/pymc3/distributions/timeseries.py +++ b/pymc3/distributions/timeseries.py @@ -140,7 +140,7 @@ def __init__( self.p = p self.constant = constant - self.rho = rho = at.as_tensor_variable(rho) + self.rho = rho = aet.as_tensor_variable(rho) self.init = init or Flat.dist() def logp(self, value): @@ -212,9 +212,9 @@ def __init__(self, tau=None, init=None, sigma=None, mu=0.0, sd=None, *args, **kw self.tau = at.as_tensor_variable(tau) sigma = at.as_tensor_variable(sigma) self.sigma = self.sd = sigma - self.mu = at.as_tensor_variable(mu) + self.mu = aet.as_tensor_variable(mu) self.init = init or Flat.dist() - self.mean = at.as_tensor_variable(0.0) + self.mean = aet.as_tensor_variable(0.0) def _mu_and_sigma(self, mu, sigma): """Helper to get mu and sigma if they are high dimensional.""" diff --git a/pymc3/distributions/transforms.py b/pymc3/distributions/transforms.py index 86dfec050e..2fd152d458 100644 --- a/pymc3/distributions/transforms.py +++ b/pymc3/distributions/transforms.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import aesara.tensor as at +import aesara.tensor as aet from aesara.tensor.subtensor import advanced_set_subtensor1 from aesara.tensor.var import TensorVariable @@ -75,12 +75,9 @@ def forward(self, rv_var: TensorVariable, rv_value: TensorVariable) -> TensorVar rv_value The variable representing a value of `rv_var`. - Returns - -------- - tensor - Transformed tensor. - """ - raise NotImplementedError + When a transform is applied to a value of some random variable + `rv_var`, it will transform the random variable `rv_value` after + sampling from `rv_var`. def backward(self, rv_var: TensorVariable, rv_value: TensorVariable) -> TensorVariable: """Applies inverse of transformation. @@ -123,20 +120,20 @@ def __str__(self): class ElemwiseTransform(Transform): def jacobian_det(self, rv_var, rv_value): - grad = at.reshape( - gradient(at.sum(self.backward(rv_var, rv_value)), [rv_value]), rv_value.shape + grad = aet.reshape( + gradient(aet.sum(self.backward(rv_var, rv_value)), [rv_value]), rv_value.shape ) - return at.log(at.abs_(grad)) + return aet.log(aet.abs_(grad)) class Log(ElemwiseTransform): name = "log" def backward(self, rv_var, rv_value): - return at.exp(rv_value) + return aet.exp(rv_value) def forward(self, rv_var, rv_value): - return at.log(rv_value) + return aet.log(rv_value) def jacobian_det(self, rv_var, rv_value): return rv_value @@ -149,7 +146,7 @@ class LogExpM1(ElemwiseTransform): name = "log_exp_m1" def backward(self, rv_var, rv_value): - return at.nnet.softplus(rv_value) + return aet.nnet.softplus(rv_value) def forward(self, rv_var, rv_value): """Inverse operation of softplus. @@ -157,10 +154,10 @@ def forward(self, rv_var, rv_value): y = Log(Exp(x) - 1) = Log(1 - Exp(-x)) + x """ - return at.log(1.0 - at.exp(-rv_value)) + rv_value + return aet.log(1.0 - aet.exp(-rv_value)) + rv_value def jacobian_det(self, rv_var, rv_value): - return -at.nnet.softplus(-rv_value) + return -aet.nnet.softplus(-rv_value) log_exp_m1 = LogExpM1() @@ -191,23 +188,23 @@ def backward(self, rv_var, rv_value): a, b = self.param_extract_fn(rv_var) if a is not None and b is not None: - sigmoid_x = at.nnet.sigmoid(rv_value) + sigmoid_x = aet.nnet.sigmoid(rv_value) return sigmoid_x * b + (1 - sigmoid_x) * a elif a is not None: - return at.exp(rv_value) + a + return aet.exp(rv_value) + a elif b is not None: - return b - at.exp(rv_value) + return b - aet.exp(rv_value) else: return rv_value def forward(self, rv_var, rv_value): a, b = self.param_extract_fn(rv_var) if a is not None and b is not None: - return at.log(rv_value - a) - at.log(b - rv_value) + return aet.log(rv_value - a) - aet.log(b - rv_value) elif a is not None: - return at.log(rv_value - a) + return aet.log(rv_value - a) elif b is not None: - return at.log(b - rv_value) + return aet.log(b - rv_value) else: return rv_value @@ -215,8 +212,8 @@ def jacobian_det(self, rv_var, rv_value): a, b = self.param_extract_fn(rv_var) if a is not None and b is not None: - s = at.nnet.softplus(-rv_value) - return at.log(b - a) - 2 * s - rv_value + s = aet.nnet.softplus(-rv_value) + return aet.log(b - a) - 2 * s - rv_value else: return rv_value @@ -228,19 +225,19 @@ class Ordered(Transform): name = "ordered" def backward(self, rv_var, rv_value): - x = at.zeros(rv_value.shape) - x = at.inc_subtensor(x[..., 0], rv_value[..., 0]) - x = at.inc_subtensor(x[..., 1:], at.exp(rv_value[..., 1:])) - return at.cumsum(x, axis=-1) + x = aet.zeros(rv_value.shape) + x = aet.inc_subtensor(x[..., 0], rv_value[..., 0]) + x = aet.inc_subtensor(x[..., 1:], aet.exp(rv_value[..., 1:])) + return aet.cumsum(x, axis=-1) def forward(self, rv_var, rv_value): - y = at.zeros(rv_value.shape) - y = at.inc_subtensor(y[..., 0], rv_value[..., 0]) - y = at.inc_subtensor(y[..., 1:], at.log(rv_value[..., 1:] - rv_value[..., :-1])) + y = aet.zeros(rv_value.shape) + y = aet.inc_subtensor(y[..., 0], rv_value[..., 0]) + y = aet.inc_subtensor(y[..., 1:], aet.log(rv_value[..., 1:] - rv_value[..., :-1])) return y def jacobian_det(self, rv_var, rv_value): - return at.sum(rv_value[..., 1:], axis=-1) + return aet.sum(rv_value[..., 1:], axis=-1) ordered = Ordered() @@ -259,15 +256,15 @@ class SumTo1(Transform): name = "sumto1" def backward(self, rv_var, rv_value): - remaining = 1 - at.sum(rv_value[..., :], axis=-1, keepdims=True) - return at.concatenate([rv_value[..., :], remaining], axis=-1) + remaining = 1 - aet.sum(rv_value[..., :], axis=-1, keepdims=True) + return aet.concatenate([rv_value[..., :], remaining], axis=-1) def forward(self, rv_var, rv_value): return rv_value[..., :-1] def jacobian_det(self, rv_var, rv_value): - y = at.zeros(rv_value.shape) - return at.sum(y, axis=-1) + y = aet.zeros(rv_value.shape) + return aet.sum(y, axis=-1) sum_to_1 = SumTo1() @@ -305,7 +302,7 @@ def backward(self, rv_var, rv_value): return rv_value y = rv_value.T - y = at.concatenate([y, -at.sum(y, 0, keepdims=True)]) + y = aet.concatenate([y, -aet.sum(y, 0, keepdims=True)]) # "softmax" with vector support and no deprication warning: e_y = at.exp(y - at.max(y, 0, keepdims=True)) x = e_y / at.sum(e_y, 0, keepdims=True) @@ -315,7 +312,7 @@ def jacobian_det(self, rv_var, rv_value): if rv_var.broadcastable[-1]: # If this variable is just a bunch of scalars/degenerate # Dirichlets, we can't transform it - return at.ones_like(rv_value) + return aet.ones_like(rv_value) y = rv_value.T Km1 = y.shape[0] + 1 @@ -335,13 +332,13 @@ class Circular(ElemwiseTransform): name = "circular" def backward(self, rv_var, rv_value): - return at.arctan2(at.sin(rv_value), at.cos(rv_value)) + return aet.arctan2(aet.sin(rv_value), aet.cos(rv_value)) def forward(self, rv_var, rv_value): - return at.as_tensor_variable(rv_value) + return aet.as_tensor_variable(rv_value) def jacobian_det(self, rv_var, rv_value): - return at.zeros(rv_value.shape) + return aet.zeros(rv_value.shape) circular = Circular() @@ -355,15 +352,15 @@ def __init__(self, param_extract_fn): def backward(self, rv_var, rv_value): diag_idxs = self.param_extract_fn(rv_var) - return advanced_set_subtensor1(rv_value, at.exp(rv_value[diag_idxs]), diag_idxs) + return advanced_set_subtensor1(rv_value, aet.exp(rv_value[diag_idxs]), diag_idxs) def forward(self, rv_var, rv_value): diag_idxs = self.param_extract_fn(rv_var) - return advanced_set_subtensor1(rv_value, at.log(rv_value[diag_idxs]), diag_idxs) + return advanced_set_subtensor1(rv_value, aet.log(rv_value[diag_idxs]), diag_idxs) def jacobian_det(self, rv_var, rv_value): diag_idxs = self.param_extract_fn(rv_var) - return at.sum(rv_value[diag_idxs]) + return aet.sum(rv_value[diag_idxs]) class Chain(Transform): @@ -387,7 +384,7 @@ def backward(self, rv_var, rv_value): return x def jacobian_det(self, rv_var, rv_value): - y = at.as_tensor_variable(rv_value) + y = aet.as_tensor_variable(rv_value) det_list = [] ndim0 = y.ndim for transf in reversed(self.transform_list): diff --git a/pymc3/gp/gp.py b/pymc3/gp/gp.py index 17e232f0c2..fa0507309c 100644 --- a/pymc3/gp/gp.py +++ b/pymc3/gp/gp.py @@ -280,7 +280,7 @@ def _build_prior(self, name, X, reparameterize=True, **kwargs): if reparameterize: chi2 = pm.ChiSquared(name + "_chi2_", self.nu) v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, size=shape, **kwargs) - f = pm.Deterministic(name, (at.sqrt(self.nu) / chi2) * (mu + cholesky(cov).dot(v))) + f = pm.Deterministic(name, (aet.sqrt(self.nu) / chi2) * (mu + cholesky(cov).dot(v))) else: f = pm.MvStudentT(name, nu=self.nu, mu=mu, cov=cov, size=shape, **kwargs) return f @@ -891,7 +891,7 @@ def _build_prior(self, name, Xs, **kwargs): chols = [cholesky(stabilize(cov(X))) for cov, X in zip(self.cov_funcs, Xs)] # remove reparameterization option v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, size=self.N, **kwargs) - f = pm.Deterministic(name, mu + at.flatten(kron_dot(chols, v))) + f = pm.Deterministic(name, mu + aet.flatten(kron_dot(chols, v))) return f def prior(self, name, Xs, **kwargs): diff --git a/pymc3/model.py b/pymc3/model.py index 9cccde8b30..bcd4f8dea8 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -33,21 +33,16 @@ from aesara.tensor.var import TensorVariable from pandas import Series -from pymc3.aesaraf import ( - change_rv_size, - gradient, - hessian, - inputvars, - pandas_to_array, - rvs_to_value_vars, -) +import pymc3 as pm + +from pymc3.aesaraf import generator, gradient, hessian, inputvars from pymc3.blocking import DictToArrayBijection, RaveledVars from pymc3.data import GenTensorVariable, Minibatch -from pymc3.distributions import logp_transform, logpt, logpt_sum -from pymc3.exceptions import ImputationWarning, SamplingError +from pymc3.distributions import change_rv_size, logp_transform, logpt, logpt_sum +from pymc3.exceptions import ImputationWarning from pymc3.math import flatten_list -from pymc3.util import UNSET, WithMemoization, get_var_name, treedict, treelist -from pymc3.vartypes import continuous_types, discrete_types, typefilter +from pymc3.util import WithMemoization, get_var_name +from pymc3.vartypes import continuous_types, discrete_types, isgenerator, typefilter __all__ = [ "Model", @@ -117,6 +112,8 @@ def incorporate_methods(source, destination, methods, wrapper=None, override=Fal T = TypeVar("T", bound="ContextMeta") +no_transform_object = object() + class ContextMeta(type): """Functionality for objects that put themselves in a context using @@ -430,7 +427,7 @@ def __init__( givens.append((var, shared)) if compute_grads: - grads = grad(cost, grad_vars, disconnected_inputs="ignore") + grads = grad(cost, grad_vars) for grad_wrt, var in zip(grads, grad_vars): grad_wrt.name = f"{var.name}_grad" outputs = [cost] + grads @@ -597,7 +594,10 @@ def __new__(cls, *args, **kwargs): instance._parent = kwargs.get("model") else: instance._parent = cls.get_context(error_if_none=False) - instance._aesara_config = kwargs.get("aesara_config", {}) + aesara_config = kwargs.get("aesara_config", None) + if aesara_config is None or "compute_test_value" not in aesara_config: + aesara_config = {"compute_test_value": "ignore"} + instance._aesara_config = aesara_config return instance def __init__(self, name="", model=None, aesara_config=None, coords=None, check_bounds=True): @@ -647,9 +647,13 @@ def root(self): def isroot(self): return self.parent is None + @property + def size(self): + return sum(self.test_point[n.name].size for n in self.free_RVs) + @property def ndim(self): - return sum(var.ndim for var in self.value_vars) + return sum(var.ndim for var in self.free_RVs) def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs): """Compile a aesara function that computes logp and gradient. @@ -675,19 +679,14 @@ def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs): if tempered: with self: - # Convert random variables into their log-likelihood inputs and - # apply their transforms, if any - potentials, _ = rvs_to_value_vars(self.potentials, apply_transforms=True) - - free_RVs_logp = at.sum( + free_RVs_logp = aet.sum( [ - at.sum(logpt(var, getattr(var.tag, "value_var", None))) - for var in self.free_RVs + aet.sum(logpt(var, getattr(var.tag, "value_var", None))) + for var in self.free_RVs + self.potentials ] - + list(potentials) ) - observed_RVs_logp = at.sum( - [at.sum(logpt(obs, obs.tag.observations)) for obs in self.observed_RVs] + observed_RVs_logp = aet.sum( + [aet.sum(logpt(obs, obs.tag.observations)) for obs in self.observed_RVs] ) costs = [free_RVs_logp, observed_RVs_logp] @@ -697,7 +696,7 @@ def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs): input_vars = {i for i in graph_inputs(costs) if not isinstance(i, Constant)} extra_vars = [getattr(var.tag, "value_var", var) for var in self.free_RVs] extra_vars_and_values = { - var: self.initial_point[var.name] + var: self.test_point[var.name] for var in extra_vars if var in input_vars and var not in grad_vars } @@ -709,14 +708,8 @@ def logpt(self): with self: factors = [logpt_sum(var, getattr(var.tag, "value_var", None)) for var in self.free_RVs] factors += [logpt_sum(obs, obs.tag.observations) for obs in self.observed_RVs] - - # Convert random variables into their log-likelihood inputs and - # apply their transforms, if any - potentials, _ = rvs_to_value_vars(self.potentials, apply_transforms=True) - - factors += potentials - - logp_var = at.sum([at.sum(factor) for factor in factors]) + factors += self.potentials + logp_var = aet.sum([aet.sum(factor) for factor in factors]) if self.name: logp_var.name = "__logp_%s" % self.name else: @@ -739,14 +732,8 @@ def logp_nojact(self): factors += [ logpt_sum(obs, obs.tag.observations, jacobian=False) for obs in self.observed_RVs ] - - # Convert random variables into their log-likelihood inputs and - # apply their transforms, if any - potentials, _ = rvs_to_value_vars(self.potentials, apply_transforms=True) - factors += potentials - - logp_var = at.sum([at.sum(factor) for factor in factors]) - + factors += self.potentials + logp_var = aet.sum([aet.sum(factor) for factor in factors]) if self.name: logp_var.name = "__logp_nojac_%s" % self.name else: @@ -759,30 +746,17 @@ def varlogpt(self): (excluding deterministic).""" with self: factors = [logpt_sum(var, getattr(var.tag, "value_var", None)) for var in self.free_RVs] - return at.sum(factors) + return aet.sum(factors) @property def datalogpt(self): with self: factors = [logpt(obs, obs.tag.observations) for obs in self.observed_RVs] - - # Convert random variables into their log-likelihood inputs and - # apply their transforms, if any - potentials, _ = rvs_to_value_vars(self.potentials, apply_transforms=True) - - factors += [at.sum(factor) for factor in potentials] - return at.sum(factors) + factors += [aet.sum(factor) for factor in self.potentials] + return aet.sum(factors) @property def vars(self): - warnings.warn( - "Model.vars has been deprecated. Use Model.value_vars instead.", - DeprecationWarning, - ) - return self.value_vars - - @property - def value_vars(self): """List of unobserved random variables used as inputs to the model's log-likelihood (which excludes deterministics). """ @@ -824,43 +798,8 @@ def independent_vars(self): @property def test_point(self): - warnings.warn( - "`Model.test_point` has been deprecated. Use `Model.initial_point` instead.", - DeprecationWarning, - ) - return self.initial_point - - @property - def initial_point(self): - points = [] - for rv_var in self.free_RVs: - value_var = rv_var.tag.value_var - var_value = getattr(value_var.tag, "test_value", None) - - if var_value is None: - - rv_var_value = getattr(rv_var.tag, "test_value", None) - - if rv_var_value is None: - try: - rv_var_value = rv_var.eval() - except Exception: - raise Exception(f"Couldn't generate an initial value for {rv_var}") - - transform = getattr(value_var.tag, "transform", None) - - if transform: - try: - rv_var_value = transform.forward(rv_var, rv_var_value).eval() - except Exception: - raise Exception(f"Couldn't generate an initial value for {rv_var}") - - var_value = rv_var_value - value_var.tag.test_value = var_value - - points.append((value_var, var_value)) - - return Point(points, model=self) + """Test point used to check that the model doesn't generate errors""" + return Point(((var, var.tag.test_value) for var in self.vars), model=self) @property def disc_vars(self): @@ -902,7 +841,9 @@ def add_coords(self, coords): else: self.coords[name] = coords[name] - def register_rv(self, rv_var, name, data=None, total_size=None, dims=None, transform=UNSET): + def register_rv( + self, rv_var, name, data=None, total_size=None, dims=None, transform=no_transform_object + ): """Register an (un)observed random variable with the model. Parameters @@ -927,7 +868,6 @@ def register_rv(self, rv_var, name, data=None, total_size=None, dims=None, trans if data is None: self.free_RVs.append(rv_var) - self.create_value_var(rv_var, transform) else: if ( isinstance(data, Variable) @@ -940,26 +880,21 @@ def register_rv(self, rv_var, name, data=None, total_size=None, dims=None, trans rv_var = make_obs_var(rv_var, data) - self.create_value_var(rv_var, transform) - - if hasattr(rv_var.tag, "observations"): - self.observed_RVs.append(rv_var) - - self.add_random_variable(rv_var, dims) - - return rv_var - - def create_value_var(self, rv_var: TensorVariable, transform: Any) -> TensorVariable: - """Create a ``TensorVariable`` that will be used as the random - variable's "value" in log-likelihood graphs. - - In general, we'll call this type of variable the "value" variable. - - In all other cases, the role of the value variable is taken by - observed data. That's why value variables are only referenced in - this branch of the conditional. - - """ + self.observed_RVs.append(rv_var) + + if rv_var.tag.missing_values: + self.free_RVs.append(rv_var.tag.missing_values) + self.missing_values.append(rv_var.tag.missing_values) + self.named_vars[rv_var.tag.missing_values.name] = rv_var.tag.missing_values + + # Create a `TensorVariable` that will be used as the random + # variable's "value" in log-likelihood graphs. + # + # In general, we'll call this type of variable the "value" variable. + # + # In all other cases, the role of the value variable is taken by + # observed data. That's why value variables are only referenced in + # this branch of the conditional. value_var = rv_var.type() if aesara.config.compute_test_value != "off": @@ -971,7 +906,7 @@ def create_value_var(self, rv_var: TensorVariable, transform: Any) -> TensorVari # Make the value variable a transformed value variable, # if there's an applicable transform - if transform is UNSET: + if transform is no_transform_object: transform = logp_transform(rv_var.owner.op) if transform is not None: @@ -979,9 +914,10 @@ def create_value_var(self, rv_var: TensorVariable, transform: Any) -> TensorVari value_var.name = f"{value_var.name}_{transform.name}__" if aesara.config.compute_test_value != "off": value_var.tag.test_value = transform.forward(rv_var, value_var).tag.test_value - self.named_vars[value_var.name] = value_var - return value_var + self.add_random_variable(rv_var, dims) + + return rv_var def add_random_variable(self, var, dims=None): """Add a random variable to the named variables of the model.""" @@ -1050,7 +986,7 @@ def makefn(self, outs, mode=None, *args, **kwargs): """ with self: return aesara.function( - self.value_vars, + self.vars, outs, allow_input_downcast=True, on_unused_input="ignore", @@ -1137,7 +1073,7 @@ def flatten(self, vars=None, order=None, inputvar=None): flat_view """ if vars is None: - vars = self.value_vars + vars = self.vars if order is not None: var_map = {v.name: v for v in vars} vars = [var_map[n] for n in order] @@ -1153,7 +1089,7 @@ def flatten(self, vars=None, order=None, inputvar=None): replacements = {} last_idx = 0 for var in vars: - arr_len = at.prod(var.shape, dtype="int64") + arr_len = aet.prod(var.shape, dtype="int64") replacements[self.named_vars[var.name]] = ( inputvar[last_idx : (last_idx + arr_len)].reshape(var.shape).astype(var.dtype) ) @@ -1265,7 +1201,7 @@ def point_logps(self, point=None, round_vals=2): return Series( { rv.name: np.round( - self.fn(logpt_sum(rv, getattr(rv.tag, "observations", None)))(point), + self.fn(logpt_sum(rv, getattr(rv.tag, "observations", None)))(test_point), round_vals, ) for rv in self.basic_RVs @@ -1422,7 +1358,7 @@ def Point(*args, filter_model_vars=False, **kwargs): return { get_var_name(k): np.array(v) for k, v in d.items() - if not filter_model_vars or (get_var_name(k) in map(get_var_name, model.value_vars)) + if not filter_model_vars or (get_var_name(k) in map(get_var_name, model.vars)) } @@ -1452,6 +1388,55 @@ def __call__(self, *args, **kwargs): compilef = fastfn +def pandas_to_array(data): + """Convert a pandas object to a NumPy array. + + XXX: When `data` is a generator, this will return a Aesara tensor! + + """ + if hasattr(data, "to_numpy") and hasattr(data, "isnull"): + # typically, but not limited to pandas objects + vals = data.to_numpy() + mask = data.isnull().to_numpy() + if mask.any(): + # there are missing values + ret = np.ma.MaskedArray(vals, mask) + else: + ret = vals + elif isinstance(data, np.ndarray): + if isinstance(data, np.ma.MaskedArray): + if not data.mask.any(): + # empty mask + ret = data.filled() + else: + # already masked and rightly so + ret = data + else: + # already a ndarray, but not masked + mask = np.isnan(data) + if np.any(mask): + ret = np.ma.MaskedArray(data, mask) + else: + # no masking required + ret = data + elif isinstance(data, Variable): + ret = data + elif sps.issparse(data): + ret = data + elif isgenerator(data): + ret = generator(data) + else: + ret = np.asarray(data) + + if test_value is not None: + # We try to reuse the old test value + rv_var.tag.test_value = np.broadcast_to(test_value, rv_var.tag.test_value.shape) + else: + rv_var.tag.test_value = data + + mask = getattr(data, "mask", None) + if mask is not None: + def make_obs_var(rv_var: TensorVariable, data: Union[np.ndarray]) -> TensorVariable: """Create a `TensorVariable` for an observed random variable. @@ -1484,25 +1469,20 @@ def make_obs_var(rv_var: TensorVariable, data: Union[np.ndarray]) -> TensorVaria else: new_size = data.shape + test_value = getattr(rv_var.tag, "test_value", None) + rv_var = change_rv_size(rv_var, new_size) if aesara.config.compute_test_value != "off": - test_value = getattr(rv_var.tag, "test_value", None) - if test_value is not None: # We try to reuse the old test value rv_var.tag.test_value = np.broadcast_to(test_value, rv_var.tag.test_value.shape) else: rv_var.tag.test_value = data + missing_values = None mask = getattr(data, "mask", None) if mask is not None: - - if mask.all(): - # If there are no observed values, this variable isn't really - # observed. - return rv_var - impute_message = ( f"Data in {rv_var} contains missing values and" " will be automatically imputed from the" @@ -1510,21 +1490,57 @@ def make_obs_var(rv_var: TensorVariable, data: Union[np.ndarray]) -> TensorVaria ) warnings.warn(impute_message, ImputationWarning) - comp_data = at.as_tensor_variable(data.compressed()) - data = at.as_tensor_variable(data) - data.tag.mask = mask - - rv_var = at.set_subtensor(rv_var[~mask], comp_data) - rv_var.name = name + missing_values = rv_var[mask] + constant = aet.as_tensor_variable(data.filled()) + data = aet.set_subtensor(constant[mask.nonzero()], missing_values) elif sps.issparse(data): data = sparse.basic.as_sparse(data, name=name) else: - data = at.as_tensor_variable(data, name=name) + data = aet.as_tensor_variable(data, name=name) + rv_var.tag.missing_values = missing_values rv_var.tag.observations = data return rv_var + rv_var.tag.observations = data + +def _walk_up_rv(rv, formatting="plain"): + """Walk up aesara graph to get inputs for deterministic RV.""" + all_rvs = [] + parents = list(itertools.chain(*[j.inputs for j in rv.get_parents()])) + if parents: + for parent in parents: + all_rvs.extend(_walk_up_rv(parent, formatting=formatting)) + else: + name = rv.name if rv.name else "Constant" + fmt = r"\text{{{name}}}" if "latex" in formatting else "{name}" + all_rvs.append(fmt.format(name=name)) + return all_rvs + + +class DeterministicWrapper(TensorVariable): + def _str_repr(self, formatting="plain"): + if "latex" in formatting: + if formatting == "latex_with_params": + return r"$\text{{{name}}} \sim \text{{Deterministic}}({args})$".format( + name=self.name, args=r",~".join(_walk_up_rv(self, formatting=formatting)) + ) + return fr"$\text{{{self.name}}} \sim \text{{Deterministic}}$" + else: + if formatting == "plain_with_params": + args = ", ".join(_walk_up_rv(self, formatting=formatting)) + return f"{self.name} ~ Deterministic({args})" + return f"{self.name} ~ Deterministic" + + def _repr_latex_(self, *, formatting="latex_with_params", **kwargs): + return self._str_repr(formatting=formatting) + + __latex__ = _repr_latex_ + + def __str__(self): + return self._str_repr(formatting="plain") + def Deterministic(name, var, model=None, dims=None): """Create a named deterministic variable @@ -1564,3 +1580,25 @@ def Potential(name, var, model=None): model.potentials.append(var) model.add_random_variable(var) return var + + +def as_iterargs(data): + if isinstance(data, tuple): + return data + else: + return [data] + + +def all_continuous(vars): + """Check that vars not include discrete variables or BART variables, excepting observed RVs.""" + + vars_ = [var for var in vars if not (var.owner and hasattr(var.tag, "observations"))] + if any( + [ + (var.dtype in pm.discrete_types or (var.owner and isinstance(var.owner.op, pm.BART))) + for var in vars_ + ] + ): + return False + else: + return True diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 4f9948f746..d7bb2fbca5 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -37,11 +37,11 @@ import pymc3 as pm -from pymc3.aesaraf import change_rv_size, inputvars, walk_model -from pymc3.backends.arviz import _DefaultTrace +from pymc3.aesaraf import inputvars from pymc3.backends.base import BaseTrace, MultiTrace from pymc3.backends.ndarray import NDArray from pymc3.blocking import DictToArrayBijection +from pymc3.distributions import change_rv_size, rv_ancestors from pymc3.exceptions import IncorrectArgumentsError, SamplingError from pymc3.model import Model, Point, modelcontext from pymc3.parallel_sampling import Draw, _cpu_count @@ -201,8 +201,8 @@ def assign_step_methods(model, step=None, methods=STEP_METHODS, step_kwargs=None has_gradient = var.dtype not in discrete_types if has_gradient: try: - tg.grad(model.logpt, var) - except (NotImplementedError, tg.NullTypeGradError): + tg.grad(model.logpt, var.tag.value_var) + except (AttributeError, NotImplementedError, tg.NullTypeGradError): has_gradient = False # select the best method rv_var = model.values_to_rvs[var] @@ -659,7 +659,9 @@ def sample( idata = None if compute_convergence_checks or return_inferencedata: - ikwargs = dict(model=model, save_warmup=not discard_tuned_samples) + # XXX: Arviz `log_likelihood` calculations need to be disabled until + # it's updated to work with v4. + ikwargs = dict(model=model, save_warmup=not discard_tuned_samples, log_likelihood=False) if idata_kwargs: ikwargs.update(idata_kwargs) idata = pm.to_inference_data(trace, **ikwargs) @@ -1695,16 +1697,9 @@ def sample_posterior_predictive( if not hasattr(_trace, "varnames"): inputs_and_names = [ - (rv, rv.name) - for rv in walk_model(vars_to_sample, walk_past_rvs=True) - if rv not in vars_to_sample - and rv in model.named_vars.values() - and not isinstance(rv, SharedVariable) + (rv, rv.name) for rv in rv_ancestors(vars_to_sample, walk_past_rvs=True) ] - if inputs_and_names: - inputs, input_names = zip(*inputs_and_names) - else: - inputs, input_names = [], [] + inputs, input_names = zip(*inputs_and_names) else: output_names = [v.name for v in vars_to_sample if v.name is not None] input_names = [ @@ -1962,13 +1957,12 @@ def sample_prior_predictive( vars_ = set(var_names) if random_seed is not None: - # np.random.seed(random_seed) - model.default_rng.get_value(borrow=True).seed(random_seed) + np.random.seed(random_seed) names = get_default_varnames(vars_, include_transformed=False) vars_to_sample = [model[name] for name in names] - inputs = [i for i in inputvars(vars_to_sample) if not isinstance(i, SharedVariable)] + inputs = [i for i in inputvars(vars_to_sample)] sampler_fn = aesara.function( inputs, vars_to_sample, @@ -2120,16 +2114,16 @@ def init_nuts( pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff="relative"), ] - apoint = DictToArrayBijection.map(model.initial_point) + apoint = DictToArrayBijection.map(model.test_point) if init == "adapt_diag": - start = [model.initial_point] * chains + start = [model.test_point] * chains mean = np.mean([apoint.data] * chains, axis=0) var = np.ones_like(mean) n = len(var) potential = quadpotential.QuadPotentialDiagAdapt(n, mean, var, 10) elif init == "jitter+adapt_diag": - start = _init_jitter(model, model.initial_point, chains, jitter_max_retries) + start = _init_jitter(model, chains, jitter_max_retries) mean = np.mean([DictToArrayBijection.map(vals).data for vals in start], axis=0) var = np.ones_like(mean) n = len(var) @@ -2205,19 +2199,15 @@ def init_nuts( start = [start] * chains potential = quadpotential.QuadPotentialFull(cov) elif init == "adapt_full": - initial_point = model.initial_point - start = [initial_point] * chains + start = [model.test_point] * chains mean = np.mean([apoint.data] * chains, axis=0) - initial_point_model_size = sum(initial_point[n.name].size for n in model.value_vars) - cov = np.eye(initial_point_model_size) - potential = quadpotential.QuadPotentialFullAdapt(initial_point_model_size, mean, cov, 10) + cov = np.eye(model.size) + potential = quadpotential.QuadPotentialFullAdapt(model.size, mean, cov, 10) elif init == "jitter+adapt_full": - initial_point = model.initial_point - start = _init_jitter(model, initial_point, chains, jitter_max_retries) + start = _init_jitter(model, chains, jitter_max_retries) mean = np.mean([DictToArrayBijection.map(vals).data for vals in start], axis=0) - initial_point_model_size = sum(initial_point[n.name].size for n in model.value_vars) - cov = np.eye(initial_point_model_size) - potential = quadpotential.QuadPotentialFullAdapt(initial_point_model_size, mean, cov, 10) + cov = np.eye(model.size) + potential = quadpotential.QuadPotentialFullAdapt(model.size, mean, cov, 10) else: raise ValueError(f"Unknown initializer: {init}.") diff --git a/pymc3/smc/smc.py b/pymc3/smc/smc.py index 07470dadf8..fe5a30179e 100644 --- a/pymc3/smc/smc.py +++ b/pymc3/smc/smc.py @@ -108,14 +108,14 @@ def initialize_population(self): def setup_kernel(self): """Set up the likelihood logp function based on the chosen kernel.""" - initial_values = self.model.initial_point + initial_values = self.model.test_point shared = make_shared_replacements(initial_values, self.variables, self.model) if self.kernel == "abc": factors = [var.logpt for var in self.model.free_RVs] - factors += [at.sum(factor) for factor in self.model.potentials] + factors += [aet.sum(factor) for factor in self.model.potentials] self.prior_logp_func = logp_forw( - initial_values, [at.sum(factors)], self.variables, shared + initial_values, [aet.sum(factors)], self.variables, shared ) simulator = self.model.observed_RVs[0] distance = simulator.distribution.distance diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py index bd02887cd8..1def8c677c 100644 --- a/pymc3/step_methods/arraystep.py +++ b/pymc3/step_methods/arraystep.py @@ -69,8 +69,8 @@ def __new__(cls, *args, **kwargs): else: # Assume all model variables vars = model.value_vars - if not isinstance(vars, (tuple, list)): - vars = [vars] + # get the actual inputs from the vars + # vars = inputvars(vars) if len(vars) == 0: raise ValueError("No free random variables to sample.") @@ -164,7 +164,7 @@ def step(self, point: Dict[str, np.ndarray]): return point_new - def astep(self, apoint: RaveledVars, point: Dict[str, np.ndarray]): + def astep(self, apoint, point): raise NotImplementedError() @@ -225,7 +225,7 @@ def step(self, point): return new_point - def astep(self, apoint: RaveledVars): + def astep(self, apoint): raise NotImplementedError() @@ -287,8 +287,25 @@ def __init__( super().__init__(vars, func._extra_vars_shared, blocked) def step(self, point): - self._logp_dlogp_func._extra_are_set = True - return super().step(point) + self._logp_dlogp_func.set_extra_values(point) + + array = DictToArrayBijection.map(point) + + stats = None + if self.generates_stats: + apoint, stats = self.astep(array) + else: + apoint = self.astep(array) + + if not isinstance(apoint, RaveledVars): + # We assume that the mapping has stayed the same + apoint = RaveledVars(apoint, array.point_map_info) + + point = DictToArrayBijection.rmap(apoint) + + if stats is not None: + return point, stats + return point def astep(self, apoint): raise NotImplementedError() diff --git a/pymc3/step_methods/gibbs.py b/pymc3/step_methods/gibbs.py index 14fb6eaa18..49737676cb 100644 --- a/pymc3/step_methods/gibbs.py +++ b/pymc3/step_methods/gibbs.py @@ -19,9 +19,6 @@ """ from warnings import warn -import aesara.tensor as at - -from aesara.graph.basic import graph_inputs from numpy import arange, array, cumsum, empty, exp, max, nested_iters, searchsorted from numpy.random import uniform @@ -81,7 +78,7 @@ def elemwise_logp(model, var): v_logp = logpt(v) if var in graph_inputs([v_logp]): terms.append(v_logp) - return model.fn(at.add(*terms)) + return model.fn(add(*terms)) def categorical(prob, shape): diff --git a/pymc3/step_methods/hmc/base_hmc.py b/pymc3/step_methods/hmc/base_hmc.py index 89f74ad07e..2de1b2bd1f 100644 --- a/pymc3/step_methods/hmc/base_hmc.py +++ b/pymc3/step_methods/hmc/base_hmc.py @@ -97,7 +97,7 @@ def __init__( # size. # XXX: If the dimensions of these terms change, the step size # dimension-scaling should change as well, no? - test_point = self._model.initial_point + test_point = self._model.test_point continuous_vars = [test_point[v.name] for v in self._model.cont_vars] size = sum(v.size for v in continuous_vars) diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py index 94f2e345dc..422f4fa9be 100644 --- a/pymc3/step_methods/metropolis.py +++ b/pymc3/step_methods/metropolis.py @@ -23,7 +23,7 @@ import pymc3 as pm -from pymc3.aesaraf import floatX, rvs_to_value_vars +from pymc3.aesaraf import floatX from pymc3.blocking import DictToArrayBijection, RaveledVars from pymc3.step_methods.arraystep import ( ArrayStep, @@ -150,7 +150,7 @@ def __init__( """ model = pm.modelcontext(model) - initial_values = model.initial_point + initial_values = model.test_point if vars is None: vars = model.value_vars @@ -408,8 +408,8 @@ def __init__(self, vars, order="random", transit_p=0.8, model=None): # transition probabilities self.transit_p = transit_p - initial_point = model.initial_point - self.dim = sum(initial_point[v.name].size for v in vars) + # XXX: This needs to be refactored + self.dim = None # sum(v.dsize for v in vars) if order == "random": self.shuffle_dims = True @@ -509,17 +509,17 @@ def __init__(self, vars, proposal="uniform", order="random", model=None): distr = getattr(rv_var.owner, "op", None) if isinstance(distr, CategoricalRV): - k_graph = rv_var.owner.inputs[3].shape[-1] - (k_graph,), _ = rvs_to_value_vars((k_graph,), apply_transforms=True) - k = model.fn(k_graph)(initial_point) - elif isinstance(distr, BernoulliRV): + # XXX: This needs to be refactored + k = None # draw_values([distr.k])[0] + elif isinstance(distr, pm.Bernoulli) or (v.dtype in pm.bool_types): k = 2 else: raise ValueError( "All variables must be categorical or binary" + "for CategoricalGibbsMetropolis" ) start = len(dimcats) - dimcats += [(dim, k) for dim in range(start, start + v_init_val.size)] + # XXX: This needs to be refactored + dimcats += None # [(dim, k) for dim in range(start, start + v.dsize)] if order == "random": self.shuffle_dims = True @@ -559,6 +559,8 @@ def astep_unif(self, q0: RaveledVars, logp) -> RaveledVars: if accepted: logp_curr = logp_prop + q = RaveledVars(q, point_map_info) + return q def astep_prop(self, q0: RaveledVars, logp) -> RaveledVars: @@ -576,6 +578,8 @@ def astep_prop(self, q0: RaveledVars, logp) -> RaveledVars: for dim, k in dimcats: logp_curr = self.metropolis_proportional(q, logp, logp_curr, dim, k) + q = RaveledVars(q, point_map_info) + return q def metropolis_proportional(self, q, logp, logp_curr, dim, k): @@ -691,8 +695,7 @@ def __init__( ): model = pm.modelcontext(model) - initial_values = model.initial_point - initial_values_size = sum(initial_values[n.name].size for n in model.value_vars) + initial_values = model.test_point if vars is None: vars = model.cont_vars @@ -840,8 +843,7 @@ def __init__( **kwargs ): model = pm.modelcontext(model) - initial_values = model.initial_point - initial_values_size = sum(initial_values[n.name].size for n in model.value_vars) + initial_values = model.test_point if vars is None: vars = model.cont_vars diff --git a/pymc3/step_methods/mlda.py b/pymc3/step_methods/mlda.py index a155993fef..443ad7e7c2 100644 --- a/pymc3/step_methods/mlda.py +++ b/pymc3/step_methods/mlda.py @@ -26,7 +26,7 @@ import pymc3 as pm from pymc3.blocking import DictToArrayBijection -from pymc3.model import Model, Point +from pymc3.model import Model from pymc3.step_methods.arraystep import ArrayStepShared, Competence, metrop_select from pymc3.step_methods.compound import CompoundStep from pymc3.step_methods.metropolis import ( @@ -58,7 +58,7 @@ def __init__(self, *args, **kwargs): and some extra code specific for MLDA. """ model = pm.modelcontext(kwargs.get("model", None)) - initial_values = model.initial_point + initial_values = model.test_point # flag to that variance reduction is activated - forces MetropolisMLDA # to store quantities of interest in a register if True @@ -71,18 +71,18 @@ def __init__(self, *args, **kwargs): self.Q_reg = [np.nan] * self.mlda_subsampling_rate_above # extract some necessary variables - value_vars = kwargs.get("vars", None) - if value_vars is None: - value_vars = model.value_vars - value_vars = pm.inputvars(value_vars) - shared = pm.make_shared_replacements(initial_values, value_vars, model) + vars = kwargs.get("vars", None) + if vars is None: + vars = model.vars + vars = pm.inputvars(vars) + shared = pm.make_shared_replacements(initial_values, vars, model) # call parent class __init__ super().__init__(*args, **kwargs) # modify the delta function and point to model if VR is used if self.mlda_variance_reduction: - self.delta_logp = delta_logp_inverse(initial_values, model.logpt, value_vars, shared) + self.delta_logp = delta_logp_inverse(initial_values, model.logpt, vars, shared) self.model = model def reset_tuning(self): @@ -126,7 +126,7 @@ def __init__(self, *args, **kwargs): self.tuning_end_trigger = False model = pm.modelcontext(kwargs.get("model", None)) - initial_values = model.initial_point + initial_values = model.test_point # flag to that variance reduction is activated - forces DEMetropolisZMLDA # to store quantities of interest in a register if True @@ -139,18 +139,18 @@ def __init__(self, *args, **kwargs): self.Q_reg = [np.nan] * self.mlda_subsampling_rate_above # extract some necessary variables - value_vars = kwargs.get("vars", None) - if value_vars is None: - value_vars = model.value_vars - value_vars = pm.inputvars(value_vars) - shared = pm.make_shared_replacements(initial_values, value_vars, model) + vars = kwargs.get("vars", None) + if vars is None: + vars = model.vars + vars = pm.inputvars(vars) + shared = pm.make_shared_replacements(initial_values, vars, model) # call parent class __init__ super().__init__(*args, **kwargs) # modify the delta function and point to model if VR is used if self.mlda_variance_reduction: - self.delta_logp = delta_logp_inverse(initial_values, model.logpt, value_vars, shared) + self.delta_logp = delta_logp_inverse(initial_values, model.logpt, vars, shared) self.model = model def reset_tuning(self): @@ -403,7 +403,7 @@ def __init__( # assign internal state model = pm.modelcontext(model) - initial_values = model.initial_point + initial_values = model.test_point self.model = model self.coarse_models = coarse_models self.model_below = self.coarse_models[-1] @@ -557,8 +557,8 @@ def __init__( # Construct aesara function for current-level model likelihood # (for use in acceptance) - shared = pm.make_shared_replacements(initial_values, value_vars, model) - self.delta_logp = delta_logp_inverse(initial_values, model.logpt, value_vars, shared) + shared = pm.make_shared_replacements(initial_values, vars, model) + self.delta_logp = delta_logp_inverse(initial_values, model.logpt, vars, shared) # Construct aesara function for below-level model likelihood # (for use in acceptance) @@ -746,8 +746,7 @@ def astep(self, q0): # Call the recursive DA proposal to get proposed sample # and convert dict -> numpy array - pre_q = self.proposal_dist(q0_dict) - q = DictToArrayBijection.map(pre_q) + q = DictToArrayBijection.map(self.proposal_dist(q0_dict)) # Evaluate MLDA acceptance log-ratio # If proposed sample from lower levels is the same as current one, diff --git a/pymc3/step_methods/pgbart.py b/pymc3/step_methods/pgbart.py index b3b00bfa52..043f511c72 100644 --- a/pymc3/step_methods/pgbart.py +++ b/pymc3/step_methods/pgbart.py @@ -59,7 +59,7 @@ class PGBART(ArrayStepShared): def __init__(self, vars=None, num_particles=10, max_stages=5000, chunk="auto", model=None): _log.warning("The BART model is experimental. Use with caution.") model = modelcontext(model) - initial_values = model.initial_point + initial_values = model.test_point vars = inputvars(vars) self.bart = vars[0].distribution diff --git a/pymc3/step_methods/sgmcmc.py b/pymc3/step_methods/sgmcmc.py index 9fabf9cf62..9b3f299b32 100644 --- a/pymc3/step_methods/sgmcmc.py +++ b/pymc3/step_methods/sgmcmc.py @@ -65,7 +65,7 @@ def elemwise_dlogL(vars, model, flat_view): for var in vars: output, _ = aesara.scan( lambda i, logX, v: aesara.grad(logX[i], v).flatten(), - sequences=[at.arange(logL.shape[0])], + sequences=[aet.arange(logL.shape[0])], non_sequences=[logL, var], ) terms.append(output) @@ -162,8 +162,8 @@ def __init__( # This seems to be the only place that `Model.flatten` is used. # TODO: Why not _actually_ flatten the variables? - # E.g. `flat_vars = at.concatenate([var.ravel() for var in vars])` - # or `set_subtensor` the `vars` into a `at.vector`? + # E.g. `flat_vars = aet.concatenate([var.ravel() for var in vars])` + # or `set_subtensor` the `vars` into a `aet.vector`? flat_view = model.flatten(vars) self.inarray = [flat_view.input] diff --git a/pymc3/tests/models.py b/pymc3/tests/models.py index 0289386e54..3cd66e5098 100644 --- a/pymc3/tests/models.py +++ b/pymc3/tests/models.py @@ -92,9 +92,9 @@ def simple_2model_continuous(): tau = 1.3 with Model() as model: x = pm.Normal("x", mu, tau=tau, testval=0.1) - pm.Deterministic("logx", at.log(x)) + pm.Deterministic("logx", aet.log(x)) pm.Beta("y", alpha=1, beta=1, size=2) - return model.initial_point, model + return model.test_point, model def mv_simple(): @@ -104,8 +104,8 @@ def mv_simple(): with pm.Model() as model: pm.MvNormal( "x", - at.constant(mu), - tau=at.constant(tau), + aet.constant(mu), + tau=aet.constant(tau), testval=floatX_array([0.1, 1.0, 0.8]), ) H = tau @@ -120,8 +120,8 @@ def mv_simple_coarse(): with pm.Model() as model: pm.MvNormal( "x", - at.constant(mu), - tau=at.constant(tau), + aet.constant(mu), + tau=aet.constant(tau), testval=floatX_array([0.1, 1.0, 0.8]), ) H = tau @@ -136,8 +136,8 @@ def mv_simple_very_coarse(): with pm.Model() as model: pm.MvNormal( "x", - at.constant(mu), - tau=at.constant(tau), + aet.constant(mu), + tau=aet.constant(tau), testval=floatX_array([0.1, 1.0, 0.8]), ) H = tau @@ -150,7 +150,7 @@ def mv_simple_discrete(): n = 5 p = floatX_array([0.15, 0.85]) with pm.Model() as model: - pm.Multinomial("x", n, at.constant(p), testval=np.array([1, 4])) + pm.Multinomial("x", n, aet.constant(p), testval=np.array([1, 4])) mu = n * p # covariance matrix C = np.zeros((d, d)) @@ -192,14 +192,14 @@ def mv_prior_simple(): def non_normal(n=2): with pm.Model() as model: pm.Beta("x", 3, 3, size=n, transform=None) - return model.initial_point, model, (np.tile([0.5], n), None) + return model.test_point, model, (np.tile([0.5], n), None) def exponential_beta(n=2): with pm.Model() as model: pm.Beta("x", 3, 1, size=n, transform=None) pm.Exponential("y", 1, size=n, transform=None) - return model.initial_point, model, None + return model.test_point, model, None def beta_bernoulli(n=2): diff --git a/pymc3/tests/sampler_fixtures.py b/pymc3/tests/sampler_fixtures.py index 814ed616b7..6fb831d159 100644 --- a/pymc3/tests/sampler_fixtures.py +++ b/pymc3/tests/sampler_fixtures.py @@ -151,7 +151,7 @@ def setup_class(cls): ) cls.samples = {} for var in cls.model.unobserved_RVs: - cls.samples[get_var_name(var)] = cls.trace.get_values(var, burn=cls.burn) + cls.samples[get_var_name(var)] = cls.trace.get_values(var.tag.value_var, burn=cls.burn) def test_neff(self): if hasattr(self, "min_n_eff"): diff --git a/pymc3/tests/test_aesaraf.py b/pymc3/tests/test_aesaraf.py index f13c5d6500..535f3cefaa 100644 --- a/pymc3/tests/test_aesaraf.py +++ b/pymc3/tests/test_aesaraf.py @@ -23,23 +23,13 @@ import pytest import scipy.sparse as sps -from aesara.graph.basic import Variable -from aesara.tensor.random.basic import normal, uniform -from aesara.tensor.random.op import RandomVariable from aesara.tensor.subtensor import AdvancedIncSubtensor, AdvancedIncSubtensor1 from aesara.tensor.type import TensorType from aesara.tensor.var import TensorVariable import pymc3 as pm -from pymc3.aesaraf import ( - _conversion_map, - extract_obs_data, - pandas_to_array, - rvs_to_value_vars, - take_along_axis, - walk_model, -) +from pymc3.aesaraf import _conversion_map, extract_obs_data, take_along_axis from pymc3.vartypes import int_types FLOATX = str(aesara.config.floatX) @@ -276,10 +266,10 @@ def test_dtype_failure(self): def test_extract_obs_data(): with pytest.raises(TypeError): - extract_obs_data(at.matrix()) + extract_obs_data(aet.matrix()) data = np.random.normal(size=(2, 3)) - data_at = at.as_tensor(data) + data_at = aet.as_tensor(data) mask = np.random.binomial(1, 0.5, size=(2, 3)).astype(bool) for val_at in (data_at, aesara.shared(data)): @@ -291,8 +281,8 @@ def test_extract_obs_data(): # AdvancedIncSubtensor check data_m = np.ma.MaskedArray(data, mask) missing_values = data_at.type()[mask] - constant = at.as_tensor(data_m.filled()) - z_at = at.set_subtensor(constant[mask.nonzero()], missing_values) + constant = aet.as_tensor(data_m.filled()) + z_at = aet.set_subtensor(constant[mask.nonzero()], missing_values) assert isinstance(z_at.owner.op, AdvancedIncSubtensor) @@ -303,13 +293,13 @@ def test_extract_obs_data(): # AdvancedIncSubtensor1 check data = np.random.normal(size=(3,)) - data_at = at.as_tensor(data) + data_at = aet.as_tensor(data) mask = np.random.binomial(1, 0.5, size=(3,)).astype(bool) data_m = np.ma.MaskedArray(data, mask) missing_values = data_at.type()[mask] - constant = at.as_tensor(data_m.filled()) - z_at = at.set_subtensor(constant[mask.nonzero()], missing_values) + constant = aet.as_tensor(data_m.filled()) + z_at = aet.set_subtensor(constant[mask.nonzero()], missing_values) assert isinstance(z_at.owner.op, AdvancedIncSubtensor1) @@ -317,159 +307,3 @@ def test_extract_obs_data(): assert isinstance(res, np.ndarray) assert np.ma.allequal(res, data_m) - - -@pytest.mark.parametrize("input_dtype", ["int32", "int64", "float32", "float64"]) -def test_pandas_to_array(input_dtype): - """ - Ensure that pandas_to_array returns the dense array, masked array, - graph variable, TensorVariable, or sparse matrix as appropriate. - """ - # Create the various inputs to the function - sparse_input = sps.csr_matrix(np.eye(3)).astype(input_dtype) - dense_input = np.arange(9).reshape((3, 3)).astype(input_dtype) - - input_name = "input_variable" - aesara_graph_input = at.as_tensor(dense_input, name=input_name) - pandas_input = pd.DataFrame(dense_input) - - # All the even numbers are replaced with NaN - missing_numpy_input = np.array([[np.nan, 1, np.nan], [3, np.nan, 5], [np.nan, 7, np.nan]]) - missing_pandas_input = pd.DataFrame(missing_numpy_input) - masked_array_input = ma.array(dense_input, mask=(np.mod(dense_input, 2) == 0)) - - # Create a generator object. Apparently the generator object needs to - # yield numpy arrays. - square_generator = (np.array([i ** 2], dtype=int) for i in range(100)) - - # Alias the function to be tested - func = pandas_to_array - - ##### - # Perform the various tests - ##### - # Check function behavior with dense arrays and pandas dataframes - # without missing values - for input_value in [dense_input, pandas_input]: - func_output = func(input_value) - assert isinstance(func_output, np.ndarray) - assert func_output.shape == input_value.shape - npt.assert_allclose(func_output, dense_input) - - # Check function behavior with sparse matrix inputs - sparse_output = func(sparse_input) - assert sps.issparse(sparse_output) - assert sparse_output.shape == sparse_input.shape - npt.assert_allclose(sparse_output.toarray(), sparse_input.toarray()) - - # Check function behavior when using masked array inputs and pandas - # objects with missing data - for input_value in [missing_numpy_input, masked_array_input, missing_pandas_input]: - func_output = func(input_value) - assert isinstance(func_output, ma.core.MaskedArray) - assert func_output.shape == input_value.shape - npt.assert_allclose(func_output, masked_array_input) - - # Check function behavior with Aesara graph variable - aesara_output = func(aesara_graph_input) - assert isinstance(aesara_output, Variable) - npt.assert_allclose(aesara_output.eval(), aesara_graph_input.eval()) - intX = pm.aesaraf._conversion_map[aesara.config.floatX] - if dense_input.dtype == intX or dense_input.dtype == aesara.config.floatX: - assert aesara_output.owner is None # func should not have added new nodes - assert aesara_output.name == input_name - else: - assert aesara_output.owner is not None # func should have casted - assert aesara_output.owner.inputs[0].name == input_name - - if "float" in input_dtype: - assert aesara_output.dtype == aesara.config.floatX - else: - assert aesara_output.dtype == intX - - # Check function behavior with generator data - generator_output = func(square_generator) - - # Output is wrapped with `pm.floatX`, and this unwraps - wrapped = generator_output.owner.inputs[0] - # Make sure the returned object has .set_gen and .set_default methods - assert hasattr(wrapped, "set_gen") - assert hasattr(wrapped, "set_default") - # Make sure the returned object is a Aesara TensorVariable - assert isinstance(wrapped, TensorVariable) - - -def test_walk_model(): - d = at.vector("d") - b = at.vector("b") - c = uniform(0.0, d) - c.name = "c" - e = at.log(c) - a = normal(e, b) - a.name = "a" - - test_graph = at.exp(a + 1) - res = list(walk_model((test_graph,))) - assert a in res - assert c not in res - - res = list(walk_model((test_graph,), walk_past_rvs=True)) - assert a in res - assert c in res - - res = list(walk_model((test_graph,), walk_past_rvs=True, stop_at_vars={e})) - assert a in res - assert c not in res - - -def test_rvs_to_value_vars(): - - with pm.Model() as m: - a = pm.Uniform("a", 0.0, 1.0) - b = pm.Uniform("b", 0, a + 1.0) - c = pm.Normal("c") - d = at.log(c + b) + 2.0 - - a_value_var = m.rvs_to_values[a] - assert a_value_var.tag.transform - - b_value_var = m.rvs_to_values[b] - c_value_var = m.rvs_to_values[c] - - (res,), replaced = rvs_to_value_vars((d,)) - - assert res.owner.op == at.add - log_output = res.owner.inputs[0] - assert log_output.owner.op == at.log - log_add_output = res.owner.inputs[0].owner.inputs[0] - assert log_add_output.owner.op == at.add - c_output = log_add_output.owner.inputs[0] - - # We make sure that the random variables were replaced - # with their value variables - assert c_output == c_value_var - b_output = log_add_output.owner.inputs[1] - assert b_output == b_value_var - - res_ancestors = list(walk_model((res,), walk_past_rvs=True)) - res_rv_ancestors = [ - v for v in res_ancestors if v.owner and isinstance(v.owner.op, RandomVariable) - ] - - # There shouldn't be any `RandomVariable`s in the resulting graph - assert len(res_rv_ancestors) == 0 - assert b_value_var in res_ancestors - assert c_value_var in res_ancestors - assert a_value_var not in res_ancestors - - (res,), replaced = rvs_to_value_vars((d,), apply_transforms=True) - - res_ancestors = list(walk_model((res,), walk_past_rvs=True)) - res_rv_ancestors = [ - v for v in res_ancestors if v.owner and isinstance(v.owner.op, RandomVariable) - ] - - assert len(res_rv_ancestors) == 0 - assert a_value_var in res_ancestors - assert b_value_var in res_ancestors - assert c_value_var in res_ancestors diff --git a/pymc3/tests/test_coords.py b/pymc3/tests/test_coords.py new file mode 100644 index 0000000000..c668b1e147 --- /dev/null +++ b/pymc3/tests/test_coords.py @@ -0,0 +1,21 @@ +import numpy as np +import pytest + +import pymc3 as pm + + +@pytest.mark.xfail(reason="Arviz incompatibilities") +def test_coords(): + chains = 2 + n_features = 3 + n_samples = 10 + + coords = {"features": np.arange(n_features)} + + with pm.Model(coords=coords): + a = pm.Uniform("a", -100, 100, dims="features") + b = pm.Uniform("b", -100, 100, dims="features") + tr = pm.sample(n_samples, chains=chains, return_inferencedata=True) + + assert "features" in tr.posterior.a.coords.dims + assert "features" in tr.posterior.b.coords.dims diff --git a/pymc3/tests/test_data_container.py b/pymc3/tests/test_data_container.py index 88a1432d48..1dde0e7640 100644 --- a/pymc3/tests/test_data_container.py +++ b/pymc3/tests/test_data_container.py @@ -159,9 +159,7 @@ def test_shared_data_as_rv_input(self): with pm.Model() as m: x = pm.Data("x", [1.0, 2.0, 3.0]) _ = pm.Normal("y", mu=x, size=3) - trace = pm.sample( - chains=1, return_inferencedata=False, compute_convergence_checks=False - ) + trace = pm.sample(chains=1) np.testing.assert_allclose(np.array([1.0, 2.0, 3.0]), x.get_value(), atol=1e-1) np.testing.assert_allclose(np.array([1.0, 2.0, 3.0]), trace["y"].mean(0), atol=1e-1) diff --git a/pymc3/tests/test_dist_math.py b/pymc3/tests/test_dist_math.py index fc1e531a00..7cbcafcdfd 100644 --- a/pymc3/tests/test_dist_math.py +++ b/pymc3/tests/test_dist_math.py @@ -185,21 +185,21 @@ def func(chol_vec, delta): @aesara.config.change_flags(compute_test_value="ignore") def test_hessian(self): - chol_vec = at.vector("chol_vec") + chol_vec = aet.vector("chol_vec") chol_vec.tag.test_value = floatX(np.array([0.1, 2, 3])) - chol = at.stack( + chol = aet.stack( [ at.stack([at.exp(0.1 * chol_vec[0]), 0]), at.stack([chol_vec[1], 2 * at.exp(chol_vec[2])]), ] ) - cov = at.dot(chol, chol.T) - delta = at.matrix("delta") + cov = aet.dot(chol, chol.T) + delta = aet.matrix("delta") delta.tag.test_value = floatX(np.ones((5, 2))) logp = MvNormalLogp()(cov, delta) - g_cov, g_delta = at.grad(logp, [cov, delta]) + g_cov, g_delta = aet.grad(logp, [cov, delta]) # TODO: What's the test? Something needs to be asserted. - at.grad(g_delta.sum() + g_cov.sum(), [delta, cov]) + aet.grad(g_delta.sum() + g_cov.sum(), [delta, cov]) class TestSplineWrapper: diff --git a/pymc3/tests/test_distribution_defaults.py b/pymc3/tests/test_distribution_defaults.py new file mode 100644 index 0000000000..4d0ecfe8b2 --- /dev/null +++ b/pymc3/tests/test_distribution_defaults.py @@ -0,0 +1,92 @@ +# Copyright 2020 The PyMC Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pytest + +from pymc3.distributions import Categorical, Continuous, DiscreteUniform +from pymc3.model import Model + +pytestmark = pytest.mark.xfail(reason="This test relies on the deprecated Distribution interface") + + +class DistTest(Continuous): + def __init__(self, a, b, *args, **kwargs): + super().__init__(*args, **kwargs) + self.a = a + self.b = b + + def logp(self, v): + return 0 + + +def test_default_nan_fail(): + with Model(), pytest.raises(AttributeError): + DistTest("x", np.nan, 2, defaults=["a"]) + + +def test_default_empty_fail(): + with Model(), pytest.raises(AttributeError): + DistTest("x", 1, 2, defaults=[]) + + +def test_default_testval(): + with Model(): + x = DistTest("x", 1, 2, testval=5, defaults=[]) + assert x.tag.test_value == 5 + + +def test_default_testval_nan(): + with Model(): + x = DistTest("x", 1, 2, testval=np.nan, defaults=["a"]) + np.testing.assert_almost_equal(x.tag.test_value, np.nan) + + +def test_default_a(): + with Model(): + x = DistTest("x", 1, 2, defaults=["a"]) + assert x.tag.test_value == 1 + + +def test_default_b(): + with Model(): + x = DistTest("x", np.nan, 2, defaults=["a", "b"]) + assert x.tag.test_value == 2 + + +def test_default_c(): + with Model(): + y = DistTest("y", 7, 8, testval=94) + x = DistTest("x", y, 2, defaults=["a", "b"]) + assert x.tag.test_value == 94 + + +def test_default_discrete_uniform(): + with Model(): + x = DiscreteUniform("x", lower=1, upper=2) + assert x.init_value == 1 + + +def test_discrete_uniform_negative(): + model = Model() + with model: + x = DiscreteUniform("x", lower=-10, upper=0) + assert model.test_point["x"] == -5 + + +def test_categorical_mode(): + model = Model() + with model: + x = Categorical("x", p=np.eye(4), shape=4) + assert np.allclose(model.test_point["x"], np.arange(4)) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index dfb215aa53..798796d53a 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -36,7 +36,7 @@ import pymc3 as pm -from pymc3.aesaraf import change_rv_size, floatX +from pymc3.aesaraf import floatX from pymc3.distributions import ( AR1, CAR, @@ -98,6 +98,7 @@ ZeroInflatedBinomial, ZeroInflatedNegativeBinomial, ZeroInflatedPoisson, + change_rv_size, continuous, logcdf, logpt, @@ -648,7 +649,26 @@ def logp_reference(args): domains["value"] = domain for pt in product(domains, n_samples=n_samples): pt = dict(pt) - pt_d = self._model_input_dict(model, param_vars, pt) + pt_d = {} + for k, v in pt.items(): + rv_var = model.named_vars.get(k) + nv = param_vars.get(k, rv_var) + nv = getattr(nv.tag, "value_var", nv) + + transform = getattr(nv.tag, "transform", None) + if transform: + # TODO: The compiled graph behind this should be cached and + # reused (if it isn't already). + v = transform.forward(rv_var, v).eval() + + if nv.name in param_vars: + # Update the shared parameter variables in `param_vars` + param_vars[nv.name].set_value(v) + else: + # Create an argument entry for the (potentially + # transformed) "value" variable + pt_d[nv.name] = v + pt_logp = Point(pt_d, model=model) pt_ref = Point(pt, filter_model_vars=False, model=model) assert_almost_equal( @@ -1083,6 +1103,29 @@ def test_wald_logp_custom_points(self, value, mu, lam, phi, alpha, logp): decimals = select_by_precision(float64=6, float32=1) assert_almost_equal(model.fastlogp(pt), logp, decimal=decimals, err_msg=str(pt)) + @pytest.mark.xfail(reason="Distribution not refactored yet") + def test_wald_logp(self): + self.check_logp( + Wald, + Rplus, + {"mu": Rplus, "alpha": Rplus}, + lambda value, mu, alpha: sp.invgauss.logpdf(value, mu=mu, loc=alpha), + decimal=select_by_precision(float64=6, float32=1), + ) + + @pytest.mark.xfail( + condition=(aesara.config.floatX == "float32"), + reason="Poor CDF in SciPy. See scipy/scipy#869 for details.", + ) + def test_wald_logcdf(self): + self.check_logcdf( + Wald, + Rplus, + {"mu": Rplus, "alpha": Rplus}, + lambda value, mu, alpha: sp.invgauss.logcdf(value, mu=mu, loc=alpha), + ) + + @pytest.mark.xfail(reason="Distribution not refactored yet") def test_beta(self): self.check_logp( Beta, @@ -1237,6 +1280,7 @@ def scipy_mu_alpha_logcdf(value, mu, alpha): (5, 0.5, None, 2, "Can't specify both mu and p."), ], ) + @pytest.mark.xfail(reason="Distribution not refactored yet") def test_negative_binomial_init_fail(self, mu, p, alpha, n, expected): with Model(): with pytest.raises(ValueError, match=f"Incompatible parametrization. {expected}"): @@ -1417,6 +1461,10 @@ def test_pareto(self): lambda value, alpha, m: sp.pareto.logcdf(value, alpha, scale=m), ) + @pytest.mark.xfail( + condition=(aesara.config.floatX == "float32"), + reason="Fails on float32 due to inf issues", + ) @pytest.mark.xfail(reason="Distribution not refactored yet") def test_weibull_logp(self): self.check_logp( @@ -1483,6 +1531,10 @@ def test_binomial(self): # Too lazy to propagate decimal parameter through the whole chain of deps @pytest.mark.xfail(reason="Distribution not refactored yet") @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail( + condition=(SCIPY_VERSION < parse("1.4.0")), reason="betabinom is new in Scipy 1.4.0" + ) + @pytest.mark.xfail(reason="Distribution not refactored yet") def test_beta_binomial_distribution(self): self.checkd( BetaBinomial, @@ -1609,11 +1661,8 @@ def test_constantdist(self): self.check_logp(Constant, I, {"c": I}, lambda value, c: np.log(c == value)) # Too lazy to propagate decimal parameter through the whole chain of deps + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") @pytest.mark.xfail(reason="Distribution not refactored yet") - @pytest.mark.xfail( - condition=(aesara.config.floatX == "float32"), - reason="Fails on float32 due to inf issues", - ) def test_zeroinflatedpoisson_distribution(self): self.checkd( ZeroInflatedPoisson, @@ -1630,11 +1679,8 @@ def test_zeroinflatedpoisson_logcdf(self): ) # Too lazy to propagate decimal parameter through the whole chain of deps + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") @pytest.mark.xfail(reason="Distribution not refactored yet") - @pytest.mark.xfail( - condition=(aesara.config.floatX == "float32"), - reason="Fails on float32 due to inf issues", - ) def test_zeroinflatednegativebinomial_distribution(self): self.checkd( ZeroInflatedNegativeBinomial, @@ -1652,6 +1698,7 @@ def test_zeroinflatednegativebinomial_logcdf(self): ) # Too lazy to propagate decimal parameter through the whole chain of deps + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") @pytest.mark.xfail(reason="Distribution not refactored yet") def test_zeroinflatedbinomial_distribution(self): self.checkd( @@ -1936,7 +1983,7 @@ def test_dirichlet_with_batch_shapes(self, dist_shape): d_point /= d_point.sum(axis=-1)[..., None] if hasattr(d_value.tag, "transform"): - d_point_trans = d_value.tag.transform.forward(d, at.as_tensor(d_point)).eval() + d_point_trans = d_value.tag.transform.forward(d, d_point).eval() else: d_point_trans = d_point @@ -1948,7 +1995,7 @@ def test_dirichlet_with_batch_shapes(self, dist_shape): assert_almost_equal(pymc3_res, scipy_res) def test_dirichlet_shape(self): - a = at.as_tensor_variable(np.r_[1, 2]) + a = aet.as_tensor_variable(np.r_[1, 2]) dir_rv = Dirichlet.dist(a) assert dir_rv.shape.eval() == (2,) @@ -2104,9 +2151,9 @@ def test_batch_multinomial(self): dist = Multinomial.dist(n=n, p=p) - value = at.tensor3(dtype="int32") + value = aet.tensor3(dtype="int32") value.tag.test_value = np.zeros_like(vals, dtype="int32") - logp = at.exp(logpt(dist, value)) + logp = aet.exp(logpt(dist, value)) f = aesara.function(inputs=[value], outputs=logp) assert_almost_equal( f(vals), @@ -2144,6 +2191,24 @@ def test_dirichlet_multinomial_matches_beta_binomial(self): decimal=select_by_precision(float64=6, float32=3), ) + @pytest.mark.parametrize( + "a, n, shape", + [ + [[0.25, 0.25, 0.25, 0.25], 1, (1, 4)], + [[0.3, 0.6, 0.05, 0.05], 2, (1, 4)], + [[0.3, 0.6, 0.05, 0.05], 10, (1, 4)], + [[0.25, 0.25, 0.25, 0.25], 1, (2, 4)], + [[0.3, 0.6, 0.05, 0.05], 2, (3, 4)], + [[[0.25, 0.25, 0.25, 0.25], [0.26, 0.26, 0.26, 0.22]], [1, 10], (2, 4)], + ], + ) + @pytest.mark.xfail(reason="Distribution not refactored yet") + def test_dirichlet_multinomial_defaultval(self, a, n, shape): + a = np.asarray(a) + with Model() as model: + m = DirichletMultinomial("m", n=n, a=a, size=shape) + assert_allclose(m.distribution._defaultval.eval().sum(axis=-1), n) + @pytest.mark.xfail(reason="Distribution not refactored yet") def test_dirichlet_multinomial_vec(self): vals = np.array([[2, 4, 4], [3, 3, 4]]) @@ -2456,6 +2521,7 @@ def test_rice(self): lambda value, b, sigma: sp.rice.logpdf(value, b=b, loc=0, scale=sigma), ) + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") @pytest.mark.xfail(reason="Distribution not refactored yet") def test_moyal_logp(self): # Using a custom domain, because the standard `R` domain undeflows with scipy in float64 @@ -2516,19 +2582,22 @@ def test_bound(): LowerNormal = Bound(Normal, lower=1) dist = LowerNormal.dist(mu=0, sigma=1) assert logpt(dist, 0).eval() == -np.inf - # assert dist.transform is not None + assert dist.default() > 1 + assert dist.transform is not None assert np.all(dist.random() > 1) UpperNormal = Bound(Normal, upper=-1) dist = UpperNormal.dist(mu=0, sigma=1) assert logpt(dist, -0.5).eval() == -np.inf - # assert dist.transform is not None + assert dist.default() < -1 + assert dist.transform is not None assert np.all(dist.random() < -1) ArrayNormal = Bound(Normal, lower=[1, 2], upper=[2, 3]) dist = ArrayNormal.dist(mu=0, sigma=1, size=2) assert_equal(logpt(dist, [0.5, 3.5]).eval(), -np.array([np.inf, np.inf])) - # assert dist.transform is not None + assert_equal(dist.default(), np.array([1.5, 2.5])) + assert dist.transform is not None with pytest.raises(ValueError) as err: dist.random() err.match("Drawing samples from distributions with array-valued") @@ -2834,3 +2903,16 @@ def func(x): import pickle pickle.loads(pickle.dumps(y)) + + +def test_hierarchical_logpt(): + with pm.Model() as m: + x = pm.Uniform("x", lower=0, upper=1) + y = pm.Uniform("y", lower=0, upper=x) + + # Make sure that hierarchical random variables are replaced with their + # log-likelihood space variables in the log-likelhood + logpt_ancestors = list(ancestors([m.logpt])) + assert not any(isinstance(v.owner.op, RandomVariable) for v in logpt_ancestors if v.owner) + assert x.tag.value_var in logpt_ancestors + assert y.tag.value_var in logpt_ancestors diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 16b960f1a2..6ca9be2933 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -23,13 +23,14 @@ import pytest import scipy.stats as st -from scipy import linalg +from numpy.testing import assert_almost_equal from scipy.special import expit import pymc3 as pm -from pymc3.aesaraf import change_rv_size, floatX, intX -from pymc3.distributions.dist_math import clipped_beta_rvs +from pymc3.aesaraf import floatX, intX +from pymc3.distributions import change_rv_size +from pymc3.distributions.multivariate import quaddist_matrix from pymc3.distributions.shape_utils import to_tuple from pymc3.exceptions import ShapeError from pymc3.tests.helpers import SeededTest @@ -40,7 +41,6 @@ NatSmall, PdMatrix, PdMatrixChol, - PdMatrixCholUpper, R, RandomPdMatrix, RealMatrix, @@ -55,6 +55,10 @@ product, ) +# XXX: This test module will need to be repurposed as tests for new +# `RandomVariable`s and their `RandomVariable.perform` methods. +pytestmark = pytest.mark.xfail(reason="This test relies on the deprecated Distribution interface") + def pymc3_random( dist, @@ -246,12 +250,6 @@ class TestGaussianRandomWalk(BaseTestCases.BaseTestCase): default_shape = (1,) -@pytest.mark.skip(reason="This test is covered by Aesara") -class TestNormal(BaseTestCases.BaseTestCase): - distribution = pm.Normal - params = {"mu": 0.0, "tau": 1.0} - - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") class TestTruncatedNormal(BaseTestCases.BaseTestCase): distribution = pm.TruncatedNormal @@ -276,18 +274,6 @@ class TestSkewNormal(BaseTestCases.BaseTestCase): params = {"mu": 0.0, "sigma": 1.0, "alpha": 5.0} -@pytest.mark.skip(reason="This test is covered by Aesara") -class TestHalfNormal(BaseTestCases.BaseTestCase): - distribution = pm.HalfNormal - params = {"tau": 1.0} - - -@pytest.mark.skip(reason="This test is covered by Aesara") -class TestUniform(BaseTestCases.BaseTestCase): - distribution = pm.Uniform - params = {"lower": 0.0, "upper": 1.0} - - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") class TestTriangular(BaseTestCases.BaseTestCase): distribution = pm.Triangular @@ -311,12 +297,6 @@ class TestKumaraswamy(BaseTestCases.BaseTestCase): params = {"a": 1.0, "b": 1.0} -@pytest.mark.skip(reason="This test is covered by Aesara") -class TestExponential(BaseTestCases.BaseTestCase): - distribution = pm.Exponential - params = {"lam": 1.0} - - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") class TestLaplace(BaseTestCases.BaseTestCase): distribution = pm.Laplace @@ -347,30 +327,6 @@ class TestPareto(BaseTestCases.BaseTestCase): params = {"alpha": 0.5, "m": 1.0} -@pytest.mark.skip(reason="This test is covered by Aesara") -class TestCauchy(BaseTestCases.BaseTestCase): - distribution = pm.Cauchy - params = {"alpha": 1.0, "beta": 1.0} - - -@pytest.mark.skip(reason="This test is covered by Aesara") -class TestHalfCauchy(BaseTestCases.BaseTestCase): - distribution = pm.HalfCauchy - params = {"beta": 1.0} - - -@pytest.mark.skip(reason="This test is covered by Aesara") -class TestGamma(BaseTestCases.BaseTestCase): - distribution = pm.Gamma - params = {"alpha": 1.0, "beta": 1.0} - - -@pytest.mark.skip(reason="This test is covered by Aesara") -class TestInverseGamma(BaseTestCases.BaseTestCase): - distribution = pm.InverseGamma - params = {"alpha": 0.5, "beta": 0.5} - - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") class TestChiSquared(BaseTestCases.BaseTestCase): distribution = pm.ChiSquared @@ -413,42 +369,18 @@ class TestLogitNormal(BaseTestCases.BaseTestCase): params = {"mu": 0.0, "sigma": 1.0} -@pytest.mark.skip(reason="This test is covered by Aesara") -class TestBinomial(BaseTestCases.BaseTestCase): - distribution = pm.Binomial - params = {"n": 5, "p": 0.5} - - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") class TestBetaBinomial(BaseTestCases.BaseTestCase): distribution = pm.BetaBinomial params = {"n": 5, "alpha": 1.0, "beta": 1.0} -@pytest.mark.skip(reason="This test is covered by Aesara") -class TestBernoulli(BaseTestCases.BaseTestCase): - distribution = pm.Bernoulli - params = {"p": 0.5} - - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") class TestDiscreteWeibull(BaseTestCases.BaseTestCase): distribution = pm.DiscreteWeibull params = {"q": 0.25, "beta": 2.0} -@pytest.mark.skip(reason="This test is covered by Aesara") -class TestPoisson(BaseTestCases.BaseTestCase): - distribution = pm.Poisson - params = {"mu": 1.0} - - -@pytest.mark.skip(reason="This test is covered by Aesara") -class TestNegativeBinomial(BaseTestCases.BaseTestCase): - distribution = pm.NegativeBinomial - params = {"mu": 1.0, "alpha": 1.0} - - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") class TestConstant(BaseTestCases.BaseTestCase): distribution = pm.Constant @@ -497,43 +429,136 @@ class TestMoyal(BaseTestCases.BaseTestCase): params = {"mu": 0.0, "sigma": 1.0} -@pytest.mark.skip(reason="This test is covered by Aesara") -class TestCategorical(BaseTestCases.BaseTestCase): - distribution = pm.Categorical - params = {"p": np.ones(BaseTestCases.BaseTestCase.shape)} +class TestCorrectParametrizationMappingPymcToScipy(SeededTest): + @staticmethod + def get_inputs_from_apply_node_outputs(outputs): + parents = outputs.get_parents() + if not parents: + raise Exception("Parent Apply node missing for output") + # I am assuming there will always only be 1 Apply parent node in this context + return parents[0].inputs + + def _pymc_params_match_rv_ones(self, pymc_params, expected_aesara_params, pymc_dist, decimal=6): + pymc_dist_output = pymc_dist.dist(**dict(pymc_params)) + aesera_dist_inputs = self.get_inputs_from_apply_node_outputs(pymc_dist_output)[3:] + assert len(expected_aesara_params) == len(aesera_dist_inputs) + for (expected_name, expected_value), actual_variable in zip( + expected_aesara_params, aesera_dist_inputs + ): + assert_almost_equal(expected_value, actual_variable.eval(), decimal=decimal) - def get_random_variable( - self, shape, with_vector_params=False, **kwargs - ): # don't transform categories - return super().get_random_variable(shape, with_vector_params=False, **kwargs) + def test_normal(self): + params = [("mu", 5.0), ("sigma", 10.0)] + self._pymc_params_match_rv_ones(params, params, pm.Normal) - def test_probability_vector_shape(self): - """Check that if a 2d array of probabilities are passed to categorical correct shape is returned""" - p = np.ones((10, 5)) - assert pm.Categorical.dist(p=p).random().shape == (10,) - assert pm.Categorical.dist(p=p).random(size=4).shape == (4, 10) - p = np.ones((3, 7, 5)) - assert pm.Categorical.dist(p=p).random().shape == (3, 7) - assert pm.Categorical.dist(p=p).random(size=4).shape == (4, 3, 7) + def test_uniform(self): + params = [("lower", 0.5), ("upper", 1.5)] + self._pymc_params_match_rv_ones(params, params, pm.Uniform) + def test_half_normal(self): + params, expected_aesara_params = [("sigma", 10.0)], [("mean", 0), ("sigma", 10.0)] + self._pymc_params_match_rv_ones(params, expected_aesara_params, pm.HalfNormal) -@pytest.mark.skip(reason="This test is covered by Aesara") -class TestDirichlet(SeededTest): - @pytest.mark.parametrize( - "shape, size", - [ - ((2), (1)), - ((2), (2)), - ((2, 2), (2, 100)), - ((3, 4), (3, 4)), - ((3, 4), (3, 4, 100)), - ((3, 4), (100)), - ((3, 4), (1)), - ], - ) - def test_dirichlet_random_shape(self, shape, size): - out_shape = to_tuple(size) + to_tuple(shape) - assert pm.Dirichlet.dist(a=np.ones(shape)).random(size=size).shape == out_shape + def test_beta_alpha_beta(self): + params = [("alpha", 2.0), ("beta", 5.0)] + self._pymc_params_match_rv_ones(params, params, pm.Beta) + + def test_beta_mu_sigma(self): + params = [("mu", 2.0), ("sigma", 5.0)] + expected_alpha, expected_beta = pm.Beta.get_alpha_beta(mu=params[0][1], sigma=params[1][1]) + expected_params = [("alpha", expected_alpha), ("beta", expected_beta)] + self._pymc_params_match_rv_ones(params, expected_params, pm.Beta) + + @pytest.mark.skip(reason="Expected to fail due to bug") + def test_exponential(self): + params = [("lam", 10.0)] + expected_params = [("lam", 1 / params[0][1])] + self._pymc_params_match_rv_ones(params, expected_params, pm.Exponential) + + def test_cauchy(self): + params = [("alpha", 2.0), ("beta", 5.0)] + self._pymc_params_match_rv_ones(params, params, pm.Cauchy) + + def test_half_cauchy(self): + params = [("alpha", 2.0), ("beta", 5.0)] + self._pymc_params_match_rv_ones(params, params, pm.HalfCauchy) + + @pytest.mark.skip(reason="Expected to fail due to bug") + def test_gamma_alpha_beta(self): + params = [("alpha", 2.0), ("beta", 5.0)] + expected_params = [("alpha", params[0][1]), ("beta", 1 / params[1][1])] + self._pymc_params_match_rv_ones(params, expected_params, pm.Gamma) + + @pytest.mark.skip(reason="Expected to fail due to bug") + def test_gamma_mu_sigma(self): + params = [("mu", 2.0), ("sigma", 5.0)] + expected_alpha, expected_beta = pm.Gamma.get_alpha_beta(mu=params[0][1], sigma=params[1][1]) + expected_params = [("alpha", expected_alpha), ("beta", 1 / expected_beta)] + self._pymc_params_match_rv_ones(params, expected_params, pm.Gamma) + + def test_inverse_gamma_alpha_beta(self): + params = [("alpha", 2.0), ("beta", 5.0)] + self._pymc_params_match_rv_ones(params, params, pm.InverseGamma) + + def test_inverse_gamma_mu_sigma(self): + params = [("mu", 2.0), ("sigma", 5.0)] + expected_alpha, expected_beta = pm.InverseGamma._get_alpha_beta( + mu=params[0][1], sigma=params[1][1], alpha=None, beta=None + ) + expected_params = [("alpha", expected_alpha), ("beta", expected_beta)] + self._pymc_params_match_rv_ones(params, expected_params, pm.InverseGamma) + + def test_binomial(self): + params = [("n", 100), ("p", 0.33)] + self._pymc_params_match_rv_ones(params, params, pm.Binomial) + + def test_negative_binomial(self): + params = [("n", 100), ("p", 0.33)] + self._pymc_params_match_rv_ones(params, params, pm.NegativeBinomial) + + def test_negative_binomial_mu_sigma(self): + params = [("mu", 5.0), ("alpha", 8.0)] + expected_n, expected_p = pm.NegativeBinomial.get_n_p( + mu=params[0][1], alpha=params[1][1], n=None, p=None + ) + expected_params = [("n", expected_n), ("p", expected_p)] + self._pymc_params_match_rv_ones(params, expected_params, pm.NegativeBinomial) + + def test_bernoulli(self): + params = [("p", 0.33)] + self._pymc_params_match_rv_ones(params, params, pm.Bernoulli) + + def test_poisson(self): + params = [("mu", 4)] + self._pymc_params_match_rv_ones(params, params, pm.Poisson) + + def test_mv_distribution(self): + params = [("mu", np.array([1.0, 2.0])), ("cov", np.array([[2.0, 0.0], [0.0, 3.5]]))] + self._pymc_params_match_rv_ones(params, params, pm.MvNormal) + + def test_mv_distribution_chol(self): + params = [("mu", np.array([1.0, 2.0])), ("chol", np.array([[2.0, 0.0], [0.0, 3.5]]))] + expected_cov = quaddist_matrix(chol=params[1][1]) + expected_params = [("mu", np.array([1.0, 2.0])), ("cov", expected_cov.eval())] + self._pymc_params_match_rv_ones(params, expected_params, pm.MvNormal) + + def test_mv_distribution_tau(self): + params = [("mu", np.array([1.0, 2.0])), ("tau", np.array([[2.0, 0.0], [0.0, 3.5]]))] + expected_cov = quaddist_matrix(tau=params[1][1]) + expected_params = [("mu", np.array([1.0, 2.0])), ("cov", expected_cov.eval())] + self._pymc_params_match_rv_ones(params, expected_params, pm.MvNormal) + + def test_dirichlet(self): + params = [("a", np.array([1.0, 2.0]))] + self._pymc_params_match_rv_ones(params, params, pm.Dirichlet) + + def test_multinomial(self): + params = [("n", 85), ("p", np.array([0.28, 0.62, 0.10]))] + self._pymc_params_match_rv_ones(params, params, pm.Multinomial) + + def test_categorical(self): + params = [("p", np.array([0.28, 0.62, 0.10]))] + self._pymc_params_match_rv_ones(params, params, pm.Categorical) class TestScalarParameterSamples(SeededTest): @@ -547,20 +572,6 @@ def ref_rand(size, tau): pymc3_random(BoundedNormal, {"tau": Rplus}, ref_rand=ref_rand) - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_uniform(self): - def ref_rand(size, lower, upper): - return st.uniform.rvs(size=size, loc=lower, scale=upper - lower) - - pymc3_random(pm.Uniform, {"lower": -Rplus, "upper": Rplus}, ref_rand=ref_rand) - - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_normal(self): - def ref_rand(size, mu, sigma): - return st.norm.rvs(size=size, loc=mu, scale=sigma) - - pymc3_random(pm.Normal, {"mu": R, "sigma": Rplus}, ref_rand=ref_rand) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_truncated_normal(self): def ref_rand(size, mu, sigma, lower, upper): @@ -599,13 +610,6 @@ def ref_rand(size, alpha, mu, sigma): pymc3_random(pm.SkewNormal, {"mu": R, "sigma": Rplus, "alpha": R}, ref_rand=ref_rand) - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_half_normal(self): - def ref_rand(size, tau): - return st.halfnorm.rvs(size=size, loc=0, scale=tau ** -0.5) - - pymc3_random(pm.HalfNormal, {"tau": Rplus}, ref_rand=ref_rand) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_wald(self): # Cannot do anything too exciting as scipy wald is a @@ -619,20 +623,6 @@ def ref_rand(size, mu, lam, alpha): ref_rand=ref_rand, ) - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_beta(self): - def ref_rand(size, alpha, beta): - return clipped_beta_rvs(a=alpha, b=beta, size=size) - - pymc3_random(pm.Beta, {"alpha": Rplus, "beta": Rplus}, ref_rand=ref_rand) - - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_exponential(self): - def ref_rand(size, lam): - return nr.exponential(scale=1.0 / lam, size=size) - - pymc3_random(pm.Exponential, {"lam": Rplus}, ref_rand=ref_rand) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_laplace(self): def ref_rand(size, mu, b): @@ -666,41 +656,6 @@ def ref_rand(size, nu, mu, lam): pymc3_random(pm.StudentT, {"nu": Rplus, "mu": R, "lam": Rplus}, ref_rand=ref_rand) - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_cauchy(self): - def ref_rand(size, alpha, beta): - return st.cauchy.rvs(alpha, beta, size=size) - - pymc3_random(pm.Cauchy, {"alpha": R, "beta": Rplusbig}, ref_rand=ref_rand) - - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_half_cauchy(self): - def ref_rand(size, beta): - return st.halfcauchy.rvs(scale=beta, size=size) - - pymc3_random(pm.HalfCauchy, {"beta": Rplusbig}, ref_rand=ref_rand) - - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_gamma_alpha_beta(self): - def ref_rand(size, alpha, beta): - return st.gamma.rvs(alpha, scale=1.0 / beta, size=size) - - pymc3_random(pm.Gamma, {"alpha": Rplusbig, "beta": Rplusbig}, ref_rand=ref_rand) - - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_gamma_mu_sigma(self): - def ref_rand(size, mu, sigma): - return st.gamma.rvs(mu ** 2 / sigma ** 2, scale=sigma ** 2 / mu, size=size) - - pymc3_random(pm.Gamma, {"mu": Rplusbig, "sigma": Rplusbig}, ref_rand=ref_rand) - - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_inverse_gamma(self): - def ref_rand(size, alpha, beta): - return st.invgamma.rvs(a=alpha, scale=beta, size=size) - - pymc3_random(pm.InverseGamma, {"alpha": Rplus, "beta": Rplus}, ref_rand=ref_rand) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_pareto(self): def ref_rand(size, alpha, m): @@ -747,10 +702,6 @@ def test_half_flat(self): with pytest.raises(ValueError): f.random(1) - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_binomial(self): - pymc3_random_discrete(pm.Binomial, {"n": Nat, "p": Unit}, ref_rand=st.binom.rvs) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") @pytest.mark.xfail( sys.platform.startswith("win"), @@ -764,29 +715,6 @@ def test_beta_binomial(self): def _beta_bin(self, n, alpha, beta, size=None): return st.binom.rvs(n, st.beta.rvs(a=alpha, b=beta, size=size)) - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_bernoulli(self): - pymc3_random_discrete( - pm.Bernoulli, {"p": Unit}, ref_rand=lambda size, p=None: st.bernoulli.rvs(p, size=size) - ) - - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_poisson(self): - pymc3_random_discrete(pm.Poisson, {"mu": Rplusbig}, size=500, ref_rand=st.poisson.rvs) - - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_negative_binomial(self): - def ref_rand(size, alpha, mu): - return st.nbinom.rvs(alpha, alpha / (mu + alpha), size=size) - - pymc3_random_discrete( - pm.NegativeBinomial, - {"mu": Rplusbig, "alpha": Rplusbig}, - size=100, - fails=50, - ref_rand=ref_rand, - ) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_geometric(self): pymc3_random_discrete(pm.Geometric, {"p": Unit}, size=500, fails=50, ref_rand=nr.geometric) @@ -828,14 +756,6 @@ def ref_rand(size, q, beta): pm.DiscreteWeibull, {"q": Unit, "beta": Rplusdunif}, ref_rand=ref_rand ) - @pytest.mark.skip(reason="This test is covered by Aesara") - @pytest.mark.parametrize("s", [2, 3, 4]) - def test_categorical_random(self, s): - def ref_rand(size, p): - return nr.choice(np.arange(p.shape[0]), p=p, size=size) - - pymc3_random_discrete(pm.Categorical, {"p": Simplex(s)}, ref_rand=ref_rand) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_constant_dist(self): def ref_rand(size, c): @@ -843,51 +763,6 @@ def ref_rand(size, c): pymc3_random_discrete(pm.Constant, {"c": I}, ref_rand=ref_rand) - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_mv_normal(self): - def ref_rand(size, mu, cov): - return st.multivariate_normal.rvs(mean=mu, cov=cov, size=size) - - def ref_rand_tau(size, mu, tau): - return ref_rand(size, mu, linalg.inv(tau)) - - def ref_rand_chol(size, mu, chol): - return ref_rand(size, mu, np.dot(chol, chol.T)) - - def ref_rand_uchol(size, mu, chol): - return ref_rand(size, mu, np.dot(chol.T, chol)) - - for n in [2, 3]: - pymc3_random( - pm.MvNormal, - {"mu": Vector(R, n), "cov": PdMatrix(n)}, - size=100, - valuedomain=Vector(R, n), - ref_rand=ref_rand, - ) - pymc3_random( - pm.MvNormal, - {"mu": Vector(R, n), "tau": PdMatrix(n)}, - size=100, - valuedomain=Vector(R, n), - ref_rand=ref_rand_tau, - ) - pymc3_random( - pm.MvNormal, - {"mu": Vector(R, n), "chol": PdMatrixChol(n)}, - size=100, - valuedomain=Vector(R, n), - ref_rand=ref_rand_chol, - ) - pymc3_random( - pm.MvNormal, - {"mu": Vector(R, n), "chol": PdMatrixCholUpper(n)}, - size=100, - valuedomain=Vector(R, n), - ref_rand=ref_rand_uchol, - extra_args={"lower": False}, - ) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_matrix_normal(self): def ref_rand(size, mu, rowcov, colcov): @@ -1030,20 +905,6 @@ def ref_rand(size, nu, Sigma, mu): ref_rand=ref_rand, ) - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_dirichlet(self): - def ref_rand(size, a): - return st.dirichlet.rvs(a, size=size) - - for n in [2, 3]: - pymc3_random( - pm.Dirichlet, - {"a": Vector(Rplus, n)}, - valuedomain=Simplex(n), - size=100, - ref_rand=ref_rand, - ) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_dirichlet_multinomial(self): def ref_rand(size, a, n): @@ -1111,20 +972,6 @@ def test_dirichlet_multinomial_dist_ShapeError(self, n, a, shape, expectation): with expectation: m.random() - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_multinomial(self): - def ref_rand(size, p, n): - return nr.multinomial(pvals=p, n=n, size=size) - - for n in [2, 3]: - pymc3_random_discrete( - pm.Multinomial, - {"p": Simplex(n), "n": Nat}, - valuedomain=Vector(Nat, n), - size=100, - ref_rand=ref_rand, - ) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_gumbel(self): def ref_rand(size, mu, beta): @@ -1261,7 +1108,7 @@ def test_mixture_random_shape(): # XXX: This needs to be refactored rand0, rand1, rand2, rand3 = [None] * 4 # draw_values( - # [like0, like1, like2, like3], point=m.initial_point, size=100 + # [like0, like1, like2, like3], point=m.test_point, size=100 # ) assert rand0.shape == (100, 20) assert rand1.shape == (100, 20) @@ -1299,7 +1146,7 @@ def test_mixture_random_shape_fast(): # XXX: This needs to be refactored rand0, rand1, rand2, rand3 = [None] * 4 # draw_values( - # [like0, like1, like2, like3], point=m.initial_point, size=100 + # [like0, like1, like2, like3], point=m.test_point, size=100 # ) assert rand0.shape == (100, 20) assert rand1.shape == (100, 20) diff --git a/pymc3/tests/test_distributions_timeseries.py b/pymc3/tests/test_distributions_timeseries.py index 5f9ec3485d..fd3274e2e5 100644 --- a/pymc3/tests/test_distributions_timeseries.py +++ b/pymc3/tests/test_distributions_timeseries.py @@ -22,9 +22,10 @@ from pymc3.sampling import sample, sample_posterior_predictive from pymc3.tests.helpers import select_by_precision -# pytestmark = pytest.mark.usefixtures("seeded_test") pytestmark = pytest.mark.xfail(reason="This test relies on the deprecated Distribution interface") +pytestmark = pytest.mark.usefixtures("seeded_test") + def test_AR(): # AR1 diff --git a/pymc3/tests/test_examples.py b/pymc3/tests/test_examples.py index b79f9eaacb..8ef45019d5 100644 --- a/pymc3/tests/test_examples.py +++ b/pymc3/tests/test_examples.py @@ -241,7 +241,7 @@ def test_run(self): pm.sample(50, pm.Slice(), start=start) -@pytest.mark.xfail(reason="ZeroInflatedPoisson hasn't been refactored for v4") +@pytest.mark.xfail(reason="Metropolis samplers haven't been refactored") class TestLatentOccupancy(SeededTest): """ From the PyMC example list diff --git a/pymc3/tests/test_hmc.py b/pymc3/tests/test_hmc.py index 68585a178a..f25081a8c7 100644 --- a/pymc3/tests/test_hmc.py +++ b/pymc3/tests/test_hmc.py @@ -15,6 +15,7 @@ import numpy as np import numpy.testing as npt +import pytest import pymc3 @@ -26,6 +27,7 @@ logger = logging.getLogger("pymc3") +@pytest.mark.xfail(reason="Beta not refactored") def test_leapfrog_reversible(): n = 3 np.random.seed(42) diff --git a/pymc3/tests/test_minibatches.py b/pymc3/tests/test_minibatches.py index 64a8cbc42d..762447c421 100644 --- a/pymc3/tests/test_minibatches.py +++ b/pymc3/tests/test_minibatches.py @@ -208,12 +208,12 @@ def test_gradient_with_scaling(self): genvar = generator(gen1()) m = Normal("m") Normal("n", observed=genvar, total_size=1000) - grad1 = aesara.function([m.tag.value_var], at.grad(model1.logpt, m.tag.value_var)) + grad1 = aesara.function([m.tag.value_var], aet.grad(model1.logpt, m.tag.value_var)) with pm.Model() as model2: m = Normal("m") shavar = aesara.shared(np.ones((1000, 100))) Normal("n", observed=shavar) - grad2 = aesara.function([m.tag.value_var], at.grad(model2.logpt, m.tag.value_var)) + grad2 = aesara.function([m.tag.value_var], aet.grad(model2.logpt, m.tag.value_var)) for i in range(10): shavar.set_value(np.ones((100, 100)) * i) diff --git a/pymc3/tests/test_missing.py b/pymc3/tests/test_missing.py index 67f6635695..5ba71651b6 100644 --- a/pymc3/tests/test_missing.py +++ b/pymc3/tests/test_missing.py @@ -22,12 +22,27 @@ from pymc3 import ImputationWarning, Model, Normal, sample, sample_prior_predictive -@pytest.mark.parametrize( - "data", - [ma.masked_values([1, 2, -1, 4, -1], value=-1), pd.DataFrame([1, 2, numpy.nan, 4, numpy.nan])], -) -def test_missing(data): +@pytest.mark.xfail(reason="Missing values not fully refactored") +def test_missing(): + data = ma.masked_values([1, 2, -1, 4, -1], value=-1) + with Model() as model: + x = Normal("x", 1, 1) + with pytest.warns(ImputationWarning): + Normal("y", x, 1, observed=data) + + (y_missing,) = model.missing_values + assert y_missing.tag.test_value.shape == (2,) + + model.logp(model.test_point) + + with model: + prior_trace = sample_prior_predictive() + assert {"x", "y"} <= set(prior_trace.keys()) + +@pytest.mark.xfail(reason="Missing values not fully refactored") +def test_missing_pandas(): + data = pd.DataFrame([1, 2, numpy.nan, 4, numpy.nan]) with Model() as model: x = Normal("x", 1, 1) with pytest.warns(ImputationWarning): @@ -43,6 +58,7 @@ def test_missing(data): assert {"x", "y"} <= set(prior_trace.keys()) +@pytest.mark.xfail(reason="Missing values not fully refactored") def test_missing_with_predictors(): predictors = array([0.5, 1, 0.5, 2, 0.3]) data = ma.masked_values([1, 2, -1, 4, -1], value=-1) diff --git a/pymc3/tests/test_model.py b/pymc3/tests/test_model.py index a38c827b76..de001d2010 100644 --- a/pymc3/tests/test_model.py +++ b/pymc3/tests/test_model.py @@ -29,13 +29,14 @@ from aesara.tensor.var import TensorConstant from numpy.testing import assert_almost_equal +from aesara.tensor.subtensor import AdvancedIncSubtensor + import pymc3 as pm from pymc3 import Deterministic, Potential -from pymc3.blocking import DictToArrayBijection, RaveledVars -from pymc3.distributions import Normal, logpt_sum, transforms -from pymc3.model import Point, ValueGradFunction -from pymc3.tests.helpers import SeededTest +from pymc3.blocking import RaveledVars +from pymc3.distributions import Normal, transforms +from pymc3.model import ValueGradFunction class NewModel(pm.Model): @@ -163,7 +164,7 @@ def test_observed_rv_fail(self): Normal("n", observed=x) def test_observed_type(self): - X_ = pm.floatX(np.random.randn(100, 5)) + X_ = np.random.randn(100, 5).astype(aesara.config.floatX) X = pm.floatX(aesara.shared(X_)) with pm.Model(): x1 = pm.Normal("x1", observed=X_) @@ -202,9 +203,20 @@ def test_duplicate_vars(): def test_empty_observed(): data = pd.DataFrame(np.ones((2, 3)) / 3) data.values[:] = np.nan - with pm.Model(): + with pm.Model(aesara_config={"compute_test_value": "raise"}): a = pm.Normal("a", observed=data) - assert not hasattr(a.tag, "observations") + + assert isinstance(a.tag.observations.owner.op, AdvancedIncSubtensor) + # The masked observations are replaced by elements of the RV `a`, + # which means that they should all have the same sample test values + a_data = a.tag.observations.owner.inputs[1] + npt.assert_allclose(a.tag.test_value.flatten(), a_data.tag.test_value) + + # Let's try this again with another distribution + b = pm.Gamma("b", alpha=1, beta=1, observed=data) + assert isinstance(b.tag.observations.owner.op, AdvancedIncSubtensor) + b_data = b.tag.observations.owner.inputs[1] + npt.assert_allclose(b.tag.test_value.flatten(), b_data.tag.test_value) class TestValueGradFunction(unittest.TestCase): @@ -273,7 +285,7 @@ def test_grad(self): assert val == 21 npt.assert_allclose(grad, [5, 5, 5, 1, 1, 1, 1, 1, 1]) - @pytest.mark.xfail(reason="Lognormal not refactored for v4") + @pytest.mark.xfail(reason="Missing distributions") def test_edge_case(self): # Edge case discovered in #2948 ndim = 3 @@ -292,8 +304,9 @@ def test_edge_case(self): assert dlogp.size == 4 npt.assert_allclose(dlogp, 0.0, atol=1e-5) - def test_missing_data(self): - # Originally from a case described in #3122 + @pytest.mark.xfail(reason="Missing distributions") + def test_tensor_type_conversion(self): + # case described in #3122 X = np.random.binomial(1, 0.5, 10) X[0] = -1 # masked a single value X = np.ma.masked_values(X, value=-1) @@ -306,14 +319,8 @@ def test_missing_data(self): m.default_rng.get_value(borrow=True).seed(102) - # The gradient should have random values as inputs, so its value should - # change every time we evaluate it at the same point - # - # TODO: We could probably use a better test than this. - res = [gf(DictToArrayBijection.map(Point(m.test_point, model=m))) for i in range(20)] - assert np.var(res) > 0.0 - - def test_aesara_switch_broadcast_edge_cases_1(self): + @pytest.mark.xfail(reason="Missing distributions") + def test_aesara_switch_broadcast_edge_cases(self): # Tests against two subtle issues related to a previous bug in Theano # where `tt.switch` would not always broadcast tensors with single # values https://github.com/pymc-devs/aesara/issues/270 @@ -345,7 +352,7 @@ def test_aesara_switch_broadcast_edge_cases_2(self): npt.assert_allclose(m.dlogp([mu])({"mu": 0}), 2.499424682024436, rtol=1e-5) -@pytest.mark.xfail(reason="DensityDist not refactored for v4") +@pytest.mark.xfail(reason="DensityDist not supported") def test_multiple_observed_rv(): "Test previously buggy multi-observed RV comparison code." y1_data = np.random.randn(10) diff --git a/pymc3/tests/test_model_helpers.py b/pymc3/tests/test_model_helpers.py new file mode 100644 index 0000000000..4acaee7dd3 --- /dev/null +++ b/pymc3/tests/test_model_helpers.py @@ -0,0 +1,154 @@ +# Copyright 2020 The PyMC Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import aesara +import aesara.sparse as sparse +import aesara.tensor as aet +import numpy as np +import numpy.ma as ma +import numpy.testing as npt +import pandas as pd +import pytest +import scipy.sparse as sps + +from aesara.graph.basic import Variable +from aesara.tensor.var import TensorConstant, TensorVariable + +import pymc3 as pm + + +class TestHelperFunc: + @pytest.mark.parametrize("input_dtype", ["int32", "int64", "float32", "float64"]) + def test_pandas_to_array(self, input_dtype): + """ + Ensure that pandas_to_array returns the dense array, masked array, + graph variable, TensorVariable, or sparse matrix as appropriate. + """ + # Create the various inputs to the function + sparse_input = sps.csr_matrix(np.eye(3)).astype(input_dtype) + dense_input = np.arange(9).reshape((3, 3)).astype(input_dtype) + + input_name = "input_variable" + aesara_graph_input = aet.as_tensor(dense_input, name=input_name) + pandas_input = pd.DataFrame(dense_input) + + # All the even numbers are replaced with NaN + missing_numpy_input = np.array([[np.nan, 1, np.nan], [3, np.nan, 5], [np.nan, 7, np.nan]]) + missing_pandas_input = pd.DataFrame(missing_numpy_input) + masked_array_input = ma.array(dense_input, mask=(np.mod(dense_input, 2) == 0)) + + # Create a generator object. Apparently the generator object needs to + # yield numpy arrays. + square_generator = (np.array([i ** 2], dtype=int) for i in range(100)) + + # Alias the function to be tested + func = pm.model.pandas_to_array + + ##### + # Perform the various tests + ##### + # Check function behavior with dense arrays and pandas dataframes + # without missing values + for input_value in [dense_input, pandas_input]: + func_output = func(input_value) + assert isinstance(func_output, np.ndarray) + assert func_output.shape == input_value.shape + npt.assert_allclose(func_output, dense_input) + + # Check function behavior with sparse matrix inputs + sparse_output = func(sparse_input) + assert sps.issparse(sparse_output) + assert sparse_output.shape == sparse_input.shape + npt.assert_allclose(sparse_output.toarray(), sparse_input.toarray()) + + # Check function behavior when using masked array inputs and pandas + # objects with missing data + for input_value in [missing_numpy_input, masked_array_input, missing_pandas_input]: + func_output = func(input_value) + assert isinstance(func_output, ma.core.MaskedArray) + assert func_output.shape == input_value.shape + npt.assert_allclose(func_output, masked_array_input) + + # Check function behavior with Aesara graph variable + aesara_output = func(aesara_graph_input) + assert isinstance(aesara_output, Variable) + npt.assert_allclose(aesara_output.eval(), aesara_graph_input.eval()) + intX = pm.aesaraf._conversion_map[aesara.config.floatX] + if dense_input.dtype == intX or dense_input.dtype == aesara.config.floatX: + assert aesara_output.owner is None # func should not have added new nodes + assert aesara_output.name == input_name + else: + assert aesara_output.owner is not None # func should have casted + assert aesara_output.owner.inputs[0].name == input_name + + if "float" in input_dtype: + assert aesara_output.dtype == aesara.config.floatX + else: + assert aesara_output.dtype == intX + + # Check function behavior with generator data + generator_output = func(square_generator) + + # Output is wrapped with `pm.floatX`, and this unwraps + wrapped = generator_output.owner.inputs[0] + # Make sure the returned object has .set_gen and .set_default methods + assert hasattr(wrapped, "set_gen") + assert hasattr(wrapped, "set_default") + # Make sure the returned object is a Aesara TensorVariable + assert isinstance(wrapped, TensorVariable) + + def test_make_obs_var(self): + """ + Check returned values for `data` given known inputs to `as_tensor()`. + + Note that ndarrays should return a TensorConstant and sparse inputs + should return a Sparse Aesara object. + """ + # Create the various inputs to the function + input_name = "testing_inputs" + sparse_input = sps.csr_matrix(np.eye(3)) + dense_input = np.arange(9).reshape((3, 3)) + masked_array_input = ma.array(dense_input, mask=(np.mod(dense_input, 2) == 0)) + + # Create a fake model and fake distribution to be used for the test + fake_model = pm.Model() + with fake_model: + fake_distribution = pm.Normal.dist(mu=0, sigma=1) + # Create the testval attribute simply for the sake of model testing + fake_distribution.name = input_name + + # Check function behavior using the various inputs + dense_output = pm.model.make_obs_var(fake_distribution, dense_input) + sparse_output = pm.model.make_obs_var(fake_distribution, sparse_input) + masked_output = pm.model.make_obs_var(fake_distribution, masked_array_input) + + # Ensure that the missing values are appropriately set to None + for func_output in [dense_output, sparse_output]: + assert func_output.tag.missing_values is None + + # Ensure that the Aesara variable names are correctly set. + # Note that the output for masked inputs do not have their names set + # to the passed value. + for func_output in [dense_output, sparse_output]: + assert func_output.name == input_name + + # Ensure the that returned functions are all of the correct type + assert isinstance(dense_output.tag.observations, TensorConstant) + assert sparse.basic._is_sparse_variable(sparse_output.tag.observations) + + # Masked output is something weird. Just ensure it has missing values + # self.assertIsInstance(masked_output, TensorConstant) + assert masked_output.tag.missing_values is not None + + return None diff --git a/pymc3/tests/test_quadpotential.py b/pymc3/tests/test_quadpotential.py index 2b96b2149e..e92b42fb40 100644 --- a/pymc3/tests/test_quadpotential.py +++ b/pymc3/tests/test_quadpotential.py @@ -263,6 +263,7 @@ def test_full_adapt_warn(): quadpotential.QuadPotentialFullAdapt(2, np.zeros(2), np.eye(2), 0) +@pytest.mark.xfail(reason="MvNormal was not yet refactored") def test_full_adapt_sampling(seed=289586): np.random.seed(seed) diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py index 86d89424f4..4127b52136 100644 --- a/pymc3/tests/test_sampling.py +++ b/pymc3/tests/test_sampling.py @@ -19,7 +19,7 @@ from typing import Tuple import aesara -import aesara.tensor as at +import aesara.tensor as aet import numpy as np import numpy.testing as npt import pytest @@ -384,7 +384,7 @@ def test_shared_named(self): testval=np.atleast_2d(0), ) theta = pm.Normal( - "theta", mu=at.dot(G_var, theta0), tau=np.atleast_2d(1e20), size=(1, 1) + "theta", mu=aet.dot(G_var, theta0), tau=np.atleast_2d(1e20), size=(1, 1) ) res = theta.eval() assert np.isclose(res, 0.0) @@ -400,7 +400,7 @@ def test_shared_unnamed(self): testval=np.atleast_2d(0), ) theta = pm.Normal( - "theta", mu=at.dot(G_var, theta0), tau=np.atleast_2d(1e20), size=(1, 1) + "theta", mu=aet.dot(G_var, theta0), tau=np.atleast_2d(1e20), size=(1, 1) ) res = theta.eval() assert np.isclose(res, 0.0) @@ -416,7 +416,7 @@ def test_constant_named(self): testval=np.atleast_2d(0), ) theta = pm.Normal( - "theta", mu=at.dot(G_var, theta0), tau=np.atleast_2d(1e20), size=(1, 1) + "theta", mu=aet.dot(G_var, theta0), tau=np.atleast_2d(1e20), size=(1, 1) ) res = theta.eval() @@ -450,7 +450,7 @@ def test_normal_scalar(self): with model: # test list input - ppc0 = pm.sample_posterior_predictive([model.initial_point], samples=10) + ppc0 = pm.sample_posterior_predictive([model.test_point], samples=10) # # deprecated argument is not introduced to fast version [2019/08/20:rpg] ppc = pm.sample_posterior_predictive(trace, var_names=["a"]) # test empty ppc @@ -675,7 +675,7 @@ def test_deterministic_of_observed(self): var_names=[var.name for var in (model.deterministics + model.basic_RVs)], ) - npt.assert_allclose(ppc["in_1"] + ppc["in_2"], ppc["out"], rtol=rtol) + rtol = 1e-5 if aesara.config.floatX == "float64" else 1e-4 def test_deterministic_of_observed_modified_interface(self): np.random.seed(4982) @@ -747,14 +747,14 @@ def test_sample_posterior_predictive_w(self): y = pm.Normal("y", mu=mu, sigma=1, observed=data0) with pytest.warns(UserWarning, match=warning_msg): trace_0 = pm.sample(10, tune=0, chains=2, return_inferencedata=False) - idata_0 = pm.to_inference_data(trace_0, log_likelihood=False) + idata_0 = az.from_pymc3(trace_0, log_likelihood=False) with pm.Model() as model_1: mu = pm.Normal("mu", mu=0, sigma=1, size=len(data0)) y = pm.Normal("y", mu=mu, sigma=1, observed=data0) with pytest.warns(UserWarning, match=warning_msg): trace_1 = pm.sample(10, tune=0, chains=2, return_inferencedata=False) - idata_1 = pm.to_inference_data(trace_1, log_likelihood=False) + idata_1 = az.from_pymc3(trace_1, log_likelihood=False) with pm.Model() as model_2: # Model with no observed RVs. @@ -827,15 +827,6 @@ def check_exec_nuts_init(method): "ADVI+adapt_diag", "advi+adapt_diag_grad", "advi_map", - ], -) -def test_exec_nuts_advi_init(method): - check_exec_nuts_init(method) - - -@pytest.mark.parametrize( - "method", - [ "jitter+adapt_diag", "adapt_diag", "map", @@ -843,8 +834,22 @@ def test_exec_nuts_advi_init(method): "jitter+adapt_full", ], ) +@pytest.mark.xfail(reason="ADVI not refactored for v4", exception=NotImplementedError) def test_exec_nuts_init(method): - check_exec_nuts_init(method) + with pm.Model() as model: + pm.Normal("a", mu=0, sigma=1, size=2) + pm.HalfNormal("b", sigma=1) + with model: + start, _ = pm.init_nuts(init=method, n_init=10) + assert isinstance(start, list) + assert len(start) == 1 + assert isinstance(start[0], dict) + assert "a" in start[0] and "b" in start[0] + start, _ = pm.init_nuts(init=method, n_init=10, chains=2) + assert isinstance(start, list) + assert len(start) == 2 + assert isinstance(start[0], dict) + assert "a" in start[0] and "b" in start[0] @pytest.mark.parametrize( @@ -969,8 +974,6 @@ def test_layers(self): a = pm.Uniform("a", lower=0, upper=1, size=10) b = pm.Binomial("b", n=1, p=a, size=10) - model.default_rng.get_value(borrow=True).seed(232093) - b_sampler = aesara.function([], b) avg = np.stack([b_sampler() for i in range(10000)]).mean(0) npt.assert_array_almost_equal(avg, 0.5 * np.ones((10,)), decimal=2) @@ -1054,7 +1057,7 @@ def test_zeroinflatedpoisson(self): def test_bounded_dist(self): with pm.Model() as model: BoundedNormal = pm.Bound(pm.Normal, lower=0.0) - x = BoundedNormal("x", mu=at.zeros((3, 1)), sd=1 * at.ones((3, 1)), size=(3, 1)) + x = BoundedNormal("x", mu=aet.zeros((3, 1)), sd=1 * aet.ones((3, 1)), size=(3, 1)) with model: prior_trace = pm.sample_prior_predictive(5) diff --git a/pymc3/tests/test_step.py b/pymc3/tests/test_step.py index fd02139879..350af8473e 100644 --- a/pymc3/tests/test_step.py +++ b/pymc3/tests/test_step.py @@ -979,9 +979,9 @@ def test_bad_init_parallel(self): def test_linalg(self, caplog): with Model(): - a = Normal("a", size=2, testval=floatX(np.zeros(2))) - a = at.switch(a > 0, np.inf, a) - b = at.slinalg.solve(floatX(np.eye(2)), a) + a = Normal("a", size=2) + a = aet.switch(a > 0, np.inf, a) + b = aet.slinalg.solve(floatX(np.eye(2)), a) Normal("c", mu=b, size=2, testval=floatX(np.r_[0.0, 0.0])) caplog.clear() trace = sample(20, init=None, tune=5, chains=2) diff --git a/pymc3/tests/test_transforms.py b/pymc3/tests/test_transforms.py index fd32d8b9b6..404dcfac92 100644 --- a/pymc3/tests/test_transforms.py +++ b/pymc3/tests/test_transforms.py @@ -44,7 +44,7 @@ tol = 1e-7 if aesara.config.floatX == "float64" else 1e-6 -def check_transform(transform, domain, constructor=at.dscalar, test=0, rv_var=None): +def check_transform(transform, domain, constructor=aet.dscalar, test=0, rv_var=None): x = constructor("x") x.tag.test_value = test if rv_var is None: @@ -59,10 +59,10 @@ def check_transform(transform, domain, constructor=at.dscalar, test=0, rv_var=No def check_vector_transform(transform, domain, rv_var=None): - return check_transform(transform, domain, at.dvector, test=np.array([0, 0]), rv_var=rv_var) + return check_transform(transform, domain, aet.dvector, test=np.array([0, 0]), rv_var=rv_var) -def get_values(transform, domain=R, constructor=at.dscalar, test=0, rv_var=None): +def get_values(transform, domain=R, constructor=aet.dscalar, test=0, rv_var=None): x = constructor("x") x.tag.test_value = test if rv_var is None: @@ -74,7 +74,7 @@ def get_values(transform, domain=R, constructor=at.dscalar, test=0, rv_var=None) def check_jacobian_det( transform, domain, - constructor=at.dscalar, + constructor=aet.dscalar, test=0, make_comparable=None, elemwise=False, @@ -99,7 +99,7 @@ def check_jacobian_det( actual_ljd = aesara.function([y], jac) computed_ljd = aesara.function( - [y], at.as_tensor_variable(transform.jacobian_det(rv_var, y)), on_unused_input="ignore" + [y], aet.as_tensor_variable(transform.jacobian_det(rv_var, y)), on_unused_input="ignore" ) for yval in domain.vals: @@ -229,7 +229,7 @@ def test_interval_near_boundary(): with pm.Model() as model: pm.Uniform("x", testval=x0, lower=lb, upper=ub) - log_prob = model.point_logps() + log_prob = model.check_test_point() np.testing.assert_allclose(log_prob, np.array([-52.68])) @@ -286,7 +286,7 @@ def check_transform_elementwise_logp(self, model): x0 = x.tag.value_var assert x.ndim == logpt(x).ndim - pt = model.initial_point + pt = model.test_point array = np.random.randn(*pt[x0.name].shape) transform = x0.tag.transform logp_notrans = logpt(x, transform.backward(x, array), transformed=False) @@ -303,7 +303,7 @@ def check_vectortransform_elementwise_logp(self, model, vect_opt=0): x0 = x.tag.value_var assert (x.ndim - 1) == logpt(x).ndim - pt = model.initial_point + pt = model.test_point array = np.random.randn(*pt[x0.name].shape) transform = x0.tag.transform logp_nojac = logpt(x, transform.backward(x, array), transformed=False) @@ -312,7 +312,7 @@ def check_vectortransform_elementwise_logp(self, model, vect_opt=0): assert logpt(x).ndim == jacob_det.ndim # Hack to get relative tolerance - a = logpt(x, array.astype(aesara.config.floatX), jacobian=False).eval() + a = logpt(x, array, jacobian=False).eval() b = logp_nojac.eval() close_to(a, b, np.abs(0.5 * (a + b) * tol)) @@ -356,8 +356,8 @@ def test_beta(self, a, b, size): def test_uniform(self, lower, upper, size): def transform_params(rv_var): _, _, _, lower, upper = rv_var.owner.inputs - lower = at.as_tensor_variable(lower) if lower is not None else None - upper = at.as_tensor_variable(upper) if upper is not None else None + lower = aet.as_tensor_variable(lower) if lower is not None else None + upper = aet.as_tensor_variable(upper) if upper is not None else None return lower, upper interval = tr.Interval(transform_params) @@ -449,8 +449,8 @@ def test_beta_ordered(self, a, b, size): def test_uniform_ordered(self, lower, upper, size): def transform_params(rv_var): _, _, _, lower, upper = rv_var.owner.inputs - lower = at.as_tensor_variable(lower) if lower is not None else None - upper = at.as_tensor_variable(upper) if upper is not None else None + lower = aet.as_tensor_variable(lower) if lower is not None else None + upper = aet.as_tensor_variable(upper) if upper is not None else None return lower, upper interval = tr.Interval(transform_params) diff --git a/pymc3/tests/test_variational_inference.py b/pymc3/tests/test_variational_inference.py index b083e57870..3a5644a7bf 100644 --- a/pymc3/tests/test_variational_inference.py +++ b/pymc3/tests/test_variational_inference.py @@ -209,8 +209,8 @@ def parametric_grouped_approxes(request): @pytest.fixture def three_var_aevb_groups(parametric_grouped_approxes, three_var_model, aevb_initial): - one_initial_value = three_var_model.initial_point[three_var_model.one.tag.value_var.name] - dsize = np.prod(one_initial_value.shape[1:]) + # XXX: This needs to be refactored + dsize = None # np.prod(pymc3.util.get_transformed(three_var_model.one).dshape[1:]) cls, kw = parametric_grouped_approxes spec = cls.get_param_spec_for(d=dsize, **kw) params = dict() diff --git a/pymc3/tuning/starting.py b/pymc3/tuning/starting.py index be1da625a2..893f11b6b4 100644 --- a/pymc3/tuning/starting.py +++ b/pymc3/tuning/starting.py @@ -29,7 +29,7 @@ import pymc3 as pm from pymc3.aesaraf import inputvars -from pymc3.blocking import DictToArrayBijection, RaveledVars +from pymc3.blocking import DictToArrayBijection from pymc3.model import Point, modelcontext from pymc3.util import get_default_varnames, get_var_name from pymc3.vartypes import discrete_types, typefilter @@ -102,22 +102,14 @@ def find_MAP( start = Point(start, model=model) + logp_func = DictToArrayBijection.mapf(model.fastlogp_nojac) x0 = DictToArrayBijection.map(start) - # TODO: If the mapping is fixed, we can simply create graphs for the - # mapping and avoid all this bijection overhead - def logp_func(x): - return DictToArrayBijection.mapf(model.fastlogp_nojac)(RaveledVars(x, x0.point_map_info)) - try: # This might be needed for calls to `dlogp_func` # start_map_info = tuple((v.name, v.shape, v.dtype) for v in vars) - def dlogp_func(x): - return DictToArrayBijection.mapf(model.fastdlogp_nojac(vars))( - RaveledVars(x, x0.point_map_info) - ) - + dlogp_func = DictToArrayBijection.mapf(model.fastdlogp_nojac(vars)) compute_gradient = True except (AttributeError, NotImplementedError, tg.NullTypeGradError): compute_gradient = False @@ -154,8 +146,6 @@ def dlogp_func(x): cost_func.progress.update(last_v) print() - mx0 = RaveledVars(mx0, x0.point_map_info) - vars = get_default_varnames( [v.tag.value_var for v in model.unobserved_RVs], include_transformed ) diff --git a/pymc3/util.py b/pymc3/util.py index d60f83caff..4bf929e36d 100644 --- a/pymc3/util.py +++ b/pymc3/util.py @@ -258,7 +258,57 @@ def get_repr_for_variable(variable, formatting="plain"): def get_var_name(var): """Get an appropriate, plain variable name for a variable.""" - return getattr(var, "name", str(var)) + if isinstance(var, TensorVariable): + return super(TensorVariable, var).__str__() + else: + return str(var) + + +def update_start_vals(a, b, model): + r"""Update a with b, without overwriting existing keys.""" + a.update({k: v for k, v in b.items() if k not in a}) + + +def check_start_vals(start, model): + r"""Check that the starting values for MCMC do not cause the relevant log probability + to evaluate to something invalid (e.g. Inf or NaN) + + Parameters + ---------- + start : dict, or array of dict + Starting point in parameter space (or partial point) + Defaults to ``trace.point(-1))`` if there is a trace provided and model.test_point if not + (defaults to empty dict). Initialization methods for NUTS (see ``init`` keyword) can + overwrite the default. + model : Model object + Raises + ______ + KeyError if the parameters provided by `start` do not agree with the parameters contained + within `model` + pymc3.exceptions.SamplingError if the evaluation of the parameters in `start` leads to an + invalid (i.e. non-finite) state + Returns + ------- + None + """ + start_points = [start] if isinstance(start, dict) else start + for elem in start_points: + if not set(elem.keys()).issubset(model.named_vars.keys()): + extra_keys = ", ".join(set(elem.keys()) - set(model.named_vars.keys())) + valid_keys = ", ".join(model.named_vars.keys()) + raise KeyError( + "Some start parameters do not appear in the model!\n" + "Valid keys are: {}, but {} was supplied".format(valid_keys, extra_keys) + ) + + initial_eval = model.check_test_point(test_point=elem) + + if not np.all(np.isfinite(initial_eval)): + raise SamplingError( + "Initial evaluation of model at starting point failed!\n" + "Starting values:\n{}\n\n" + "Initial evaluation results:\n{}".format(elem, str(initial_eval)) + ) def get_transformed(z):