diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index acd64a7a3e..1c18049d28 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -27,7 +27,6 @@ jobs: # 6th block: These have some XFAILs - | --ignore=pymc3/tests/test_distribution_defaults.py - --ignore=pymc3/tests/test_distributions.py --ignore=pymc3/tests/test_distributions_random.py --ignore=pymc3/tests/test_distributions_timeseries.py --ignore=pymc3/tests/test_missing.py @@ -35,33 +34,25 @@ jobs: --ignore=pymc3/tests/test_model_graph.py --ignore=pymc3/tests/test_modelcontext.py --ignore=pymc3/tests/test_models_linear.py - --ignore=pymc3/tests/test_ndarray_backend.py --ignore=pymc3/tests/test_parallel_sampling.py - --ignore=pymc3/tests/test_posterior_predictive.py - --ignore=pymc3/tests/test_posteriors.py --ignore=pymc3/tests/test_profile.py --ignore=pymc3/tests/test_random.py - --ignore=pymc3/tests/test_sampling.py --ignore=pymc3/tests/test_shared.py --ignore=pymc3/tests/test_smc.py --ignore=pymc3/tests/test_starting.py --ignore=pymc3/tests/test_step.py --ignore=pymc3/tests/test_tracetab.py - --ignore=pymc3/tests/test_transforms.py --ignore=pymc3/tests/test_tuning.py --ignore=pymc3/tests/test_types.py --ignore=pymc3/tests/test_util.py --ignore=pymc3/tests/test_variational_inference.py - --ignore=pymc3/tests/test_sampling_jax.py - --ignore=pymc3/tests/test_dist_math.py --ignore=pymc3/tests/test_minibatches.py --ignore=pymc3/tests/test_pickling.py --ignore=pymc3/tests/test_plots.py --ignore=pymc3/tests/test_special_functions.py --ignore=pymc3/tests/test_updates.py - --ignore=pymc3/tests/test_dist_math.py --ignore=pymc3/tests/test_examples.py --ignore=pymc3/tests/test_glm.py diff --git a/pymc3/aesaraf.py b/pymc3/aesaraf.py index 0c425a90a3..61ec4a9da6 100644 --- a/pymc3/aesaraf.py +++ b/pymc3/aesaraf.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import Dict, List import aesara import numpy as np @@ -222,7 +223,7 @@ def __hash__(self): return hash(type(self)) -def make_shared_replacements(vars, model): +def make_shared_replacements(point, vars, model): """ Makes shared replacements for all *other* variables than the ones passed. @@ -231,6 +232,7 @@ def make_shared_replacements(vars, model): Parameters ---------- + point: dictionary mapping variable names to sample values vars: list of variables not to make shared model: model @@ -239,15 +241,22 @@ def make_shared_replacements(vars, model): Dict of variable -> new shared variable """ othervars = set(model.vars) - set(vars) - return {var: aesara.shared(var.tag.test_value, var.name + "_shared") for var in othervars} + return {var: aesara.shared(point[var.name], var.name + "_shared") for var in othervars} -def join_nonshared_inputs(xs, vars, shared, make_shared=False): +def join_nonshared_inputs( + point: Dict[str, np.ndarray], + xs: List[TensorVariable], + vars: List[TensorVariable], + shared, + make_shared: bool = False, +): """ Takes a list of aesara Variables and joins their non shared inputs into a single input. Parameters ---------- + point: a sample point xs: list of aesara tensors vars: list of variables to join @@ -266,17 +275,20 @@ def join_nonshared_inputs(xs, vars, shared, make_shared=False): tensor_type = joined.type inarray = tensor_type("inarray") else: - inarray = aesara.shared(joined.tag.test_value, "inarray") + if point is None: + raise ValueError("A point is required when `make_shared` is True") + joined_values = np.concatenate([point[var.name].ravel() for var in vars]) + inarray = aesara.shared(joined_values, "inarray") - inarray.tag.test_value = joined.tag.test_value + if aesara.config.compute_test_value != "off": + inarray.tag.test_value = joined.tag.test_value replace = {} last_idx = 0 for var in vars: - arr_len = aet.prod(var.shape) - replace[var] = reshape_t(inarray[last_idx : last_idx + arr_len], var.shape).astype( - var.dtype - ) + shape = point[var.name].shape + arr_len = np.prod(shape, dtype=int) + replace[var] = reshape_t(inarray[last_idx : last_idx + arr_len], shape).astype(var.dtype) last_idx += arr_len replace.update(shared) diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py index dfc96175ac..aa58ea82b5 100644 --- a/pymc3/distributions/__init__.py +++ b/pymc3/distributions/__init__.py @@ -31,6 +31,8 @@ int, np.ndarray, Tuple[Union[int, Variable], ...], List[Union[int, Variable]], Variable ] +no_transform_object = object() + @singledispatch def logp_transform(op: Op): @@ -340,17 +342,17 @@ def logpt( else: logp_var = _logcdf(rv_node.op, rv_var, *dist_params, **kwargs) - transform = getattr(rv_value_var.tag, "transform", None) if rv_value_var else None - - if transform and transformed and not cdf: + if transformed and not cdf: (logp_var,), _ = apply_transforms((logp_var,)) - if jacobian: - transformed_jacobian = transform.jacobian_det(rv_var, rv_value) - if transformed_jacobian: - if logp_var.ndim > transformed_jacobian.ndim: - logp_var = logp_var.sum(axis=-1) - logp_var += transformed_jacobian + transform = getattr(rv_value_var.tag, "transform", None) if rv_value_var else None + + if transform and transformed and not cdf and jacobian: + transformed_jacobian = transform.jacobian_det(rv_var, rv_value) + if transformed_jacobian: + if logp_var.ndim > transformed_jacobian.ndim: + logp_var = logp_var.sum(axis=-1) + logp_var += transformed_jacobian # Replace random variables with their value variables (logp_var,), replaced = rvs_to_value_vars((logp_var,), {rv_var: rv_value}) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 5864eac3ab..43f0446525 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -727,35 +727,33 @@ def NegBinom(a, m, x): @classmethod def dist(cls, mu=None, alpha=None, p=None, n=None, *args, **kwargs): - mu, alpha = cls.get_mu_alpha(mu, alpha, p, n) - mu = aet.as_tensor_variable(floatX(mu)) - alpha = aet.as_tensor_variable(floatX(alpha)) - # mode = intX(aet.floor(mu)) - return super().dist([mu, alpha], *args, **kwargs) + n, p = cls.get_mu_alpha(mu, alpha, p, n) + n = aet.as_tensor_variable(floatX(n)) + p = aet.as_tensor_variable(floatX(p)) + return super().dist([n, p], *args, **kwargs) @classmethod def get_mu_alpha(cls, mu=None, alpha=None, p=None, n=None): - if alpha is None: - if n is not None: - n = aet.as_tensor_variable(intX(n)) - alpha = n + if n is None: + if alpha is not None: + n = aet.as_tensor_variable(floatX(alpha)) else: raise ValueError("Incompatible parametrization. Must specify either alpha or n.") - elif n is not None: + elif alpha is not None: raise ValueError("Incompatible parametrization. Can't specify both alpha and n.") - if mu is None: - if p is not None: - p = aet.as_tensor_variable(floatX(p)) - mu = alpha * (1 - p) / p + if p is None: + if mu is not None: + mu = aet.as_tensor_variable(floatX(mu)) + p = n / (mu + n) else: raise ValueError("Incompatible parametrization. Must specify either mu or p.") - elif p is not None: + elif mu is not None: raise ValueError("Incompatible parametrization. Can't specify both mu and p.") - return mu, alpha + return n, p - def logp(value, mu, alpha): + def logp(value, n, p): r""" Calculate log-probability of NegativeBinomial distribution at specified value. @@ -769,6 +767,8 @@ def logp(value, mu, alpha): ------- TensorVariable """ + alpha = n + mu = alpha * (1 - p) / p negbinom = bound( binomln(value + alpha - 1, value) + logpow(mu / (mu + alpha), value) @@ -779,9 +779,9 @@ def logp(value, mu, alpha): ) # Return Poisson when alpha gets very large. - return aet.switch(aet.gt(alpha, 1e10), Poisson.dist(mu).logp(value), negbinom) + return aet.switch(aet.gt(alpha, 1e10), Poisson.logp(value, mu), negbinom) - def logcdf(value, mu, alpha): + def logcdf(value, n, p): """ Compute the log of the cumulative distribution function for NegativeBinomial distribution at the specified value. @@ -801,20 +801,14 @@ def logcdf(value, mu, alpha): f"NegativeBinomial.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." ) - # TODO: avoid `p` recomputation if distribution was defined in terms of `p` - p = alpha / (mu + alpha) - return bound( - aet.log(incomplete_beta(alpha, aet.floor(value) + 1, p)), + aet.log(incomplete_beta(n, aet.floor(value) + 1, p)), 0 <= value, - 0 < alpha, + 0 < n, 0 <= p, p <= 1, ) - def _distr_parameters_for_repr(self): - return self._param_type - class Geometric(Discrete): R""" diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py index 2b7d48a6c1..099f6f3cbc 100644 --- a/pymc3/distributions/distribution.py +++ b/pymc3/distributions/distribution.py @@ -26,7 +26,7 @@ from aesara.tensor.random.op import RandomVariable -from pymc3.distributions import _logcdf, _logp +from pymc3.distributions import _logcdf, _logp, no_transform_object if TYPE_CHECKING: from typing import Optional, Callable @@ -161,7 +161,7 @@ def __new__(cls, name, *args, **kwargs): if "shape" in kwargs: raise DeprecationWarning("The `shape` keyword is deprecated; use `size`.") - transform = kwargs.pop("transform", None) + transform = kwargs.pop("transform", no_transform_object) rv_out = cls.dist(*args, rng=rng, **kwargs) diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index fc2833f9fd..f44d65b7a0 100755 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -221,7 +221,7 @@ class MvNormal(Continuous): @classmethod def dist(cls, mu, cov=None, tau=None, chol=None, lower=True, **kwargs): mu = aet.as_tensor_variable(mu) - cov = quaddist_matrix(cov, tau, chol, lower) + cov = quaddist_matrix(cov, chol, tau, lower) return super().dist([mu, cov], **kwargs) def logp(value, mu, cov): @@ -386,8 +386,12 @@ class Dirichlet(Continuous): rv_op = dirichlet + def __new__(cls, name, *args, **kwargs): + kwargs.setdefault("transform", transforms.stick_breaking) + return super().__new__(cls, name, *args, **kwargs) + @classmethod - def dist(cls, a, transform=transforms.stick_breaking, **kwargs): + def dist(cls, a, **kwargs): a = aet.as_tensor_variable(a) # mean = a / aet.sum(a) @@ -483,7 +487,7 @@ class Multinomial(Discrete): @classmethod def dist(cls, n, p, *args, **kwargs): - # p = p / aet.sum(p, axis=-1, keepdims=True) + p = p / aet.sum(p, axis=-1, keepdims=True) n = aet.as_tensor_variable(n) p = aet.as_tensor_variable(p) diff --git a/pymc3/distributions/transforms.py b/pymc3/distributions/transforms.py index 7dbbc7afd0..b5ab265cb4 100644 --- a/pymc3/distributions/transforms.py +++ b/pymc3/distributions/transforms.py @@ -218,7 +218,7 @@ def jacobian_det(self, rv_var, rv_value): s = aet.nnet.softplus(-rv_value) return aet.log(b - a) - 2 * s - rv_value else: - return aet.ones_like(rv_value) + return rv_value interval = Interval @@ -286,7 +286,7 @@ class StickBreaking(Transform): name = "stickbreaking" def forward(self, rv_var, rv_value): - if rv_var.ndim == 1 or rv_var.broadcastable[-1]: + if rv_var.broadcastable[-1]: # If this variable is just a bunch of scalars/degenerate # Dirichlets, we can't transform it return rv_value @@ -299,7 +299,7 @@ def forward(self, rv_var, rv_value): return floatX(y.T) def backward(self, rv_var, rv_value): - if rv_var.ndim == 1 or rv_var.broadcastable[-1]: + if rv_var.broadcastable[-1]: # If this variable is just a bunch of scalars/degenerate # Dirichlets, we can't transform it return rv_value @@ -312,7 +312,7 @@ def backward(self, rv_var, rv_value): return floatX(x.T) def jacobian_det(self, rv_var, rv_value): - if rv_var.ndim == 1 or rv_var.broadcastable[-1]: + if rv_var.broadcastable[-1]: # If this variable is just a bunch of scalars/degenerate # Dirichlets, we can't transform it return aet.ones_like(rv_value) diff --git a/pymc3/model.py b/pymc3/model.py index d6f3e3f82b..d4367e827c 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -38,7 +38,13 @@ from pymc3.aesaraf import generator, gradient, hessian, inputvars from pymc3.blocking import DictToArrayBijection, RaveledVars from pymc3.data import GenTensorVariable, Minibatch -from pymc3.distributions import change_rv_size, logp_transform, logpt, logpt_sum +from pymc3.distributions import ( + change_rv_size, + logp_transform, + logpt, + logpt_sum, + no_transform_object, +) from pymc3.exceptions import ImputationWarning from pymc3.math import flatten_list from pymc3.memoize import WithMemoization @@ -240,8 +246,6 @@ def build_named_node_tree(graphs): T = TypeVar("T", bound="ContextMeta") -no_transform_object = object() - class ContextMeta(type): """Functionality for objects that put themselves in a context using @@ -1003,19 +1007,37 @@ def independent_vars(self): @property def test_point(self): - """Test point used to check that the model doesn't generate errors""" + """Test point used to check that the model doesn't generate errors + + TODO: This should be replaced with proper initial value support. + """ points = [] - for var in self.free_RVs: - var_value = getattr(var.tag, "test_value", None) + for rv_var in self.free_RVs: + value_var = rv_var.tag.value_var + var_value = getattr(value_var.tag, "test_value", None) if var_value is None: - try: - var_value = var.eval() - var.tag.test_value = var_value - except Exception: - raise Exception(f"Couldn't generate an initial value for {var}") - points.append((getattr(var.tag, "value_var", var), var_value)) + rv_var_value = getattr(rv_var.tag, "test_value", None) + + if rv_var_value is None: + try: + rv_var_value = rv_var.eval() + except Exception: + raise Exception(f"Couldn't generate an initial value for {rv_var}") + + transform = getattr(value_var.tag, "transform", None) + + if transform: + try: + rv_var_value = transform.forward(rv_var, rv_var_value).eval() + except Exception: + raise Exception(f"Couldn't generate an initial value for {rv_var}") + + var_value = rv_var_value + value_var.tag.test_value = var_value + + points.append((value_var, var_value)) return Point(points, model=self) @@ -1132,6 +1154,7 @@ def register_rv( value_var.name = f"{value_var.name}_{transform.name}__" if aesara.config.compute_test_value != "off": value_var.tag.test_value = transform.forward(rv_var, value_var).tag.test_value + self.named_vars[value_var.name] = value_var self.add_random_variable(rv_var, dims) @@ -1653,29 +1676,6 @@ def _walk_up_rv(rv, formatting="plain"): return all_rvs -class DeterministicWrapper(TensorVariable): - def _str_repr(self, formatting="plain"): - if "latex" in formatting: - if formatting == "latex_with_params": - return r"$\text{{{name}}} \sim \text{{Deterministic}}({args})$".format( - name=self.name, args=r",~".join(_walk_up_rv(self, formatting=formatting)) - ) - return fr"$\text{{{self.name}}} \sim \text{{Deterministic}}$" - else: - if formatting == "plain_with_params": - args = ", ".join(_walk_up_rv(self, formatting=formatting)) - return f"{self.name} ~ Deterministic({args})" - return f"{self.name} ~ Deterministic" - - def _repr_latex_(self, *, formatting="latex_with_params", **kwargs): - return self._str_repr(formatting=formatting) - - __latex__ = _repr_latex_ - - def __str__(self): - return self._str_repr(formatting="plain") - - def Deterministic(name, var, model=None, dims=None): """Create a named deterministic variable @@ -1692,7 +1692,6 @@ def Deterministic(name, var, model=None, dims=None): var = var.copy(model.name_for(name)) model.deterministics.append(var) model.add_random_variable(var, dims) - var.__class__ = DeterministicWrapper # adds str and latex functionality return var diff --git a/pymc3/sampling.py b/pymc3/sampling.py index d35d3d248d..b6cb03bd44 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -197,7 +197,7 @@ def assign_step_methods(model, step=None, methods=STEP_METHODS, step_kwargs=None # Use competence classmethods to select step methods for remaining # variables selected_steps = defaultdict(list) - for var in model.free_RVs: + for var in model.vars: if var not in assigned_vars: # determine if a gradient can be computed has_gradient = var.dtype not in discrete_types @@ -448,7 +448,8 @@ def sample( random_seed = [random_seed] if random_seed is None or isinstance(random_seed, int): if random_seed is not None: - np.random.seed(random_seed) + # np.random.seed(random_seed) + model.default_rng.get_value(borrow=True).seed(random_seed) random_seed = [np.random.randint(2 ** 30) for _ in range(chains)] if not isinstance(random_seed, abc.Iterable): raise TypeError("Invalid value for `random_seed`. Must be tuple, list or int") @@ -658,9 +659,9 @@ def _check_start_shape(model, start): if not isinstance(start, dict): raise TypeError("start argument must be a dict or an array-like of dicts") e = "" - for var in model.vars: + for var in model.basic_RVs: + var_shape = model.fastfn(var.shape)(start) if var.name in start.keys(): - var_shape = var.shape.tag.test_value start_var_shape = np.shape(start[var.name]) if start_var_shape: if not np.array_equal(var_shape, start_var_shape): @@ -971,7 +972,8 @@ def _iter_sample( model = modelcontext(model) draws = int(draws) if random_seed is not None: - np.random.seed(random_seed) + # np.random.seed(random_seed) + model.default_rng.get_value(borrow=True).seed(random_seed) if draws < 1: raise ValueError("Argument `draws` must be greater than 0.") @@ -1239,7 +1241,8 @@ def _prepare_iter_population( model = modelcontext(model) draws = int(draws) if random_seed is not None: - np.random.seed(random_seed) + # np.random.seed(random_seed) + model.default_rng.get_value(borrow=True).seed(random_seed) if draws < 1: raise ValueError("Argument `draws` should be above 0.") @@ -1710,7 +1713,8 @@ def sample_posterior_predictive( vars_ = model.observed_RVs if random_seed is not None: - np.random.seed(random_seed) + # np.random.seed(random_seed) + model.default_rng.get_value(borrow=True).seed(random_seed) indices = np.arange(samples) @@ -1724,12 +1728,18 @@ def sample_posterior_predictive( if not hasattr(_trace, "varnames"): inputs_and_names = [ - (rv, rv.name) for rv in rv_ancestors(vars_to_sample, walk_past_rvs=True) + (rv, rv.name) + for rv in rv_ancestors(vars_to_sample, walk_past_rvs=True) + if rv not in vars_to_sample and rv in model.named_vars.values() ] - inputs, input_names = zip(*inputs_and_names) + if inputs_and_names: + inputs, input_names = zip(*inputs_and_names) + else: + inputs, input_names = [], [] else: - input_names = _trace.varnames - inputs = [model[n] for n in _trace.varnames] + output_names = [v.name for v in vars_to_sample if v.name is not None] + input_names = [n for n in _trace.varnames if n not in output_names] + inputs = [model[n] for n in input_names] if size is not None: vars_to_sample = [change_rv_size(v, size, expand=True) for v in vars_to_sample] @@ -1813,7 +1823,7 @@ def sample_posterior_predictive_w( Dictionary with the variables as keys. The values corresponding to the posterior predictive samples from the weighted models. """ - np.random.seed(random_seed) + # np.random.seed(random_seed) if isinstance(traces[0], InferenceData): n_samples = [ @@ -1830,6 +1840,8 @@ def sample_posterior_predictive_w( models = [modelcontext(models)] * len(traces) for model in models: + if random_seed: + model.default_rng.get_value(borrow=True).seed(random_seed) if model.potentials: warnings.warn( "The effect of Potentials on other parameters is ignored during posterior predictive sampling. " @@ -1969,7 +1981,8 @@ def sample_prior_predictive( vars_ = set(var_names) if random_seed is not None: - np.random.seed(random_seed) + # np.random.seed(random_seed) + model.default_rng.get_value(borrow=True).seed(random_seed) names = get_default_varnames(vars_, include_transformed=False) @@ -2116,7 +2129,8 @@ def init_nuts( if random_seed is not None: random_seed = int(np.atleast_1d(random_seed)[0]) - np.random.seed(random_seed) + # np.random.seed(random_seed) + model.default_rng.get_value(borrow=True).seed(random_seed) cb = [ pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff="absolute"), diff --git a/pymc3/smc/smc.py b/pymc3/smc/smc.py index 04d8c017c8..80321d31f6 100644 --- a/pymc3/smc/smc.py +++ b/pymc3/smc/smc.py @@ -108,12 +108,15 @@ def initialize_population(self): def setup_kernel(self): """Set up the likelihood logp function based on the chosen kernel.""" - shared = make_shared_replacements(self.variables, self.model) + initial_values = self.model.test_point + shared = make_shared_replacements(initial_values, self.variables, self.model) if self.kernel == "abc": factors = [var.logpt for var in self.model.free_RVs] factors += [aet.sum(factor) for factor in self.model.potentials] - self.prior_logp_func = logp_forw([aet.sum(factors)], self.variables, shared) + self.prior_logp_func = logp_forw( + initial_values, [aet.sum(factors)], self.variables, shared + ) simulator = self.model.observed_RVs[0] distance = simulator.distribution.distance sum_stat = simulator.distribution.sum_stat @@ -132,8 +135,12 @@ def setup_kernel(self): self.save_log_pseudolikelihood, ) elif self.kernel == "metropolis": - self.prior_logp_func = logp_forw([self.model.varlogpt], self.variables, shared) - self.likelihood_logp_func = logp_forw([self.model.datalogpt], self.variables, shared) + self.prior_logp_func = logp_forw( + initial_values, [self.model.varlogpt], self.variables, shared + ) + self.likelihood_logp_func = logp_forw( + initial_values, [self.model.datalogpt], self.variables, shared + ) def initialize_logp(self): """Initialize the prior and likelihood log probabilities.""" @@ -271,7 +278,7 @@ def posterior_to_trace(self): return strace -def logp_forw(out_vars, vars, shared): +def logp_forw(point, out_vars, vars, shared): """Compile Aesara function of the model and the input and output variables. Parameters @@ -283,7 +290,7 @@ def logp_forw(out_vars, vars, shared): shared: List containing :class:`aesara.tensor.Tensor` for depended shared data """ - out_list, inarray0 = join_nonshared_inputs(out_vars, vars, shared) + out_list, inarray0 = join_nonshared_inputs(point, out_vars, vars, shared) f = aesara_function([inarray0], out_list[0]) f.trust_input = True return f diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py index b0b30d0262..e454d64bc6 100644 --- a/pymc3/step_methods/arraystep.py +++ b/pymc3/step_methods/arraystep.py @@ -145,14 +145,26 @@ def step(self, point: Dict[str, np.ndarray]): if self.allvars: inputs.append(point) + apoint = DictToArrayBijection.map(point) + step_res = self.astep(apoint, *inputs) + if self.generates_stats: - apoint, stats = self.astep(DictToArrayBijection.map(point), *inputs) - return DictToArrayBijection.rmap(apoint), stats + apoint_new, stats = step_res else: - apoint = self.astep(DictToArrayBijection.map(point), *inputs) - return DictToArrayBijection.rmap(apoint) + apoint_new = step_res + + if not isinstance(apoint_new, RaveledVars): + # We assume that the mapping has stayed the same + apoint_new = RaveledVars(apoint_new, apoint.point_map_info) - def astep(self, apoint, point): + point_new = DictToArrayBijection.rmap(apoint_new) + + if self.generates_stats: + return point_new, stats + + return point_new + + def astep(self, apoint: RaveledVars, point: Dict[str, np.ndarray]): raise NotImplementedError() @@ -177,21 +189,43 @@ def __init__(self, vars, shared, blocked=True): self.blocked = blocked def step(self, point): - for var, share in self.shared.items(): - share.set_value(point[var]) + + # Remove shared variables from the sample point + point_no_shared = point.copy() + for name, shared_var in self.shared.items(): + shared_var.set_value(point[name]) + if name in point_no_shared: + del point_no_shared[name] + + q = DictToArrayBijection.map(point_no_shared) + + step_res = self.astep(q) if self.generates_stats: - apoint, stats = self.astep(DictToArrayBijection.map(point)) - return DictToArrayBijection.rmap(apoint), stats + apoint, stats = step_res else: - array = DictToArrayBijection.map(point) - apoint = self.astep(array) - if not isinstance(apoint, RaveledVars): - # We assume that the mapping has stayed the same - apoint = RaveledVars(apoint, array.point_map_info) - return DictToArrayBijection.rmap(apoint) + apoint = step_res - def astep(self, apoint): + if not isinstance(apoint, RaveledVars): + # We assume that the mapping has stayed the same + apoint = RaveledVars(apoint, q.point_map_info) + + # We need to re-add the shared variables to the new sample point + a_point = DictToArrayBijection.rmap(apoint) + new_point = {} + for name in point.keys(): + shared_value = self.shared.get(name, None) + if shared_value is not None: + new_point[name] = shared_value.get_value() + else: + new_point[name] = a_point[name] + + if self.generates_stats: + return new_point, stats + + return new_point + + def astep(self, apoint: RaveledVars): raise NotImplementedError() diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py index ac74d89e65..8c523a31e3 100644 --- a/pymc3/step_methods/metropolis.py +++ b/pymc3/step_methods/metropolis.py @@ -23,7 +23,7 @@ import pymc3 as pm from pymc3.aesaraf import floatX -from pymc3.blocking import DictToArrayBijection +from pymc3.blocking import DictToArrayBijection, RaveledVars from pymc3.step_methods.arraystep import ( ArrayStep, ArrayStepShared, @@ -149,14 +149,14 @@ def __init__( """ model = pm.modelcontext(model) + initial_values = model.test_point if vars is None: vars = model.vars vars = pm.inputvars(vars) if S is None: - # XXX: This needs to be refactored - S = None # np.ones(sum(v.dsize for v in vars)) + S = np.ones(sum(initial_values[v.name].size for v in vars)) if proposal_dist is not None: self.proposal_dist = proposal_dist(S) @@ -175,8 +175,7 @@ def __init__( # Determine type of variables self.discrete = np.concatenate( - # XXX: This needs to be refactored - None # [[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in vars] + [[v.dtype in pm.discrete_types] * (initial_values[v.name].size or 1) for v in vars] ) self.any_discrete = self.discrete.any() self.all_discrete = self.discrete.all() @@ -188,8 +187,8 @@ def __init__( self.mode = mode - shared = pm.make_shared_replacements(vars, model) - self.delta_logp = delta_logp(model.logpt, vars, shared) + shared = pm.make_shared_replacements(initial_values, vars, model) + self.delta_logp = delta_logp(initial_values, model.logpt, vars, shared) super().__init__(vars, shared) def reset_tuning(self): @@ -198,7 +197,7 @@ def reset_tuning(self): setattr(self, attr, initial_value) return - def astep(self, q0): + def astep(self, q0: RaveledVars): if not self.steps_until_tune and self.tune: # Tune scaling parameter self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval)) @@ -212,15 +211,18 @@ def astep(self, q0): if self.all_discrete: delta = np.round(delta, 0).astype("int64") q0 = q0.astype("int64") - q = (q0 + delta).astype("int64") + q = (q0.data + delta).astype("int64") else: delta[self.discrete] = np.round(delta[self.discrete], 0) - q = q0 + delta + q = q0.data + delta else: - q = floatX(q0 + delta) + q = floatX(q0.data + delta) + + accept = self.delta_logp(q, q0.data) + q_new, accepted = metrop_select(accept, q, q0.data) + + q_new = RaveledVars(q_new, q0.point_map_info) - accept = self.delta_logp(q, q0) - q_new, accepted = metrop_select(accept, q, q0) self.accepted += accepted self.steps_until_tune -= 1 @@ -630,6 +632,7 @@ def __init__( ): model = pm.modelcontext(model) + initial_values = model.test_value if vars is None: vars = model.cont_vars @@ -657,8 +660,8 @@ def __init__( self.mode = mode - shared = pm.make_shared_replacements(vars, model) - self.delta_logp = delta_logp(model.logpt, vars, shared) + shared = pm.make_shared_replacements(initial_values, vars, model) + self.delta_logp = delta_logp(initial_values, model.logpt, vars, shared) super().__init__(vars, shared) def astep(self, q0): @@ -771,6 +774,7 @@ def __init__( **kwargs ): model = pm.modelcontext(model) + initial_values = model.test_value if vars is None: vars = model.cont_vars @@ -810,8 +814,8 @@ def __init__( self.mode = mode - shared = pm.make_shared_replacements(vars, model) - self.delta_logp = delta_logp(model.logpt, vars, shared) + shared = pm.make_shared_replacements(initial_values, vars, model) + self.delta_logp = delta_logp(initial_values, model.logpt, vars, shared) super().__init__(vars, shared) def reset_tuning(self): @@ -898,8 +902,8 @@ def softmax(x): return e_x / np.sum(e_x, axis=0) -def delta_logp(logp, vars, shared): - [logp0], inarray0 = pm.join_nonshared_inputs([logp], vars, shared) +def delta_logp(point, logp, vars, shared): + [logp0], inarray0 = pm.join_nonshared_inputs(point, [logp], vars, shared) tensor_type = inarray0.type inarray1 = tensor_type("inarray1") diff --git a/pymc3/step_methods/mlda.py b/pymc3/step_methods/mlda.py index 926fb16314..23c4c94a8c 100644 --- a/pymc3/step_methods/mlda.py +++ b/pymc3/step_methods/mlda.py @@ -57,6 +57,8 @@ def __init__(self, *args, **kwargs): Initialise MetropolisMLDA. This is a mix of the parent's class' initialisation and some extra code specific for MLDA. """ + model = pm.modelcontext(kwargs.get("model", None)) + initial_values = model.test_value # flag to that variance reduction is activated - forces MetropolisMLDA # to store quantities of interest in a register if True @@ -69,19 +71,18 @@ def __init__(self, *args, **kwargs): self.Q_reg = [np.nan] * self.mlda_subsampling_rate_above # extract some necessary variables - model = pm.modelcontext(kwargs.get("model", None)) vars = kwargs.get("vars", None) if vars is None: vars = model.vars vars = pm.inputvars(vars) - shared = pm.make_shared_replacements(vars, model) + shared = pm.make_shared_replacements(initial_values, vars, model) # call parent class __init__ super().__init__(*args, **kwargs) # modify the delta function and point to model if VR is used if self.mlda_variance_reduction: - self.delta_logp = delta_logp_inverse(model.logpt, vars, shared) + self.delta_logp = delta_logp_inverse(initial_values, model.logpt, vars, shared) self.model = model def reset_tuning(self): @@ -124,6 +125,9 @@ def __init__(self, *args, **kwargs): # flag used for signaling the end of tuning self.tuning_end_trigger = False + model = pm.modelcontext(kwargs.get("model", None)) + initial_values = model.test_value + # flag to that variance reduction is activated - forces DEMetropolisZMLDA # to store quantities of interest in a register if True self.mlda_variance_reduction = kwargs.pop("mlda_variance_reduction", False) @@ -135,19 +139,18 @@ def __init__(self, *args, **kwargs): self.Q_reg = [np.nan] * self.mlda_subsampling_rate_above # extract some necessary variables - model = pm.modelcontext(kwargs.get("model", None)) vars = kwargs.get("vars", None) if vars is None: vars = model.vars vars = pm.inputvars(vars) - shared = pm.make_shared_replacements(vars, model) + shared = pm.make_shared_replacements(initial_values, vars, model) # call parent class __init__ super().__init__(*args, **kwargs) # modify the delta function and point to model if VR is used if self.mlda_variance_reduction: - self.delta_logp = delta_logp_inverse(model.logpt, vars, shared) + self.delta_logp = delta_logp_inverse(initial_values, model.logpt, vars, shared) self.model = model def reset_tuning(self): @@ -400,6 +403,7 @@ def __init__( # assign internal state model = pm.modelcontext(model) + initial_values = model.test_value self.model = model self.coarse_models = coarse_models self.model_below = self.coarse_models[-1] @@ -553,16 +557,18 @@ def __init__( # Construct aesara function for current-level model likelihood # (for use in acceptance) - shared = pm.make_shared_replacements(vars, model) - self.delta_logp = delta_logp_inverse(model.logpt, vars, shared) + shared = pm.make_shared_replacements(initial_values, vars, model) + self.delta_logp = delta_logp_inverse(initial_values, model.logpt, vars, shared) # Construct aesara function for below-level model likelihood # (for use in acceptance) model_below = pm.modelcontext(self.model_below) vars_below = [var for var in model_below.vars if var.name in self.var_names] vars_below = pm.inputvars(vars_below) - shared_below = pm.make_shared_replacements(vars_below, model_below) - self.delta_logp_below = delta_logp(model_below.logpt, vars_below, shared_below) + shared_below = pm.make_shared_replacements(initial_values, vars_below, model_below) + self.delta_logp_below = delta_logp( + initial_values, model_below.logpt, vars_below, shared_below + ) super().__init__(vars, shared) @@ -958,8 +964,8 @@ def update(self, x): self.t += 1 -def delta_logp_inverse(logp, vars, shared): - [logp0], inarray0 = pm.join_nonshared_inputs([logp], vars, shared) +def delta_logp_inverse(point, logp, vars, shared): + [logp0], inarray0 = pm.join_nonshared_inputs(point, [logp], vars, shared) tensor_type = inarray0.type inarray1 = tensor_type("inarray1") diff --git a/pymc3/step_methods/pgbart.py b/pymc3/step_methods/pgbart.py index cb73f67902..0aa008389d 100644 --- a/pymc3/step_methods/pgbart.py +++ b/pymc3/step_methods/pgbart.py @@ -59,6 +59,7 @@ class PGBART(ArrayStepShared): def __init__(self, vars=None, num_particles=10, max_stages=5000, chunk="auto", model=None): _log.warning("The BART model is experimental. Use with caution.") model = modelcontext(model) + initial_values = model.test_value vars = inputvars(vars) self.bart = vars[0].distribution @@ -80,8 +81,8 @@ def __init__(self, vars=None, num_particles=10, max_stages=5000, chunk="auto", m p = ParticleTree(self.bart.trees[i], self.bart.prior_prob_leaf_node) self.old_trees_particles_list.append(p) - shared = make_shared_replacements(vars, model) - self.likelihood_logp = logp([model.datalogpt], vars, shared) + shared = make_shared_replacements(initial_values, vars, model) + self.likelihood_logp = logp(initial_values, [model.datalogpt], vars, shared) super().__init__(vars, shared) def astep(self, _): @@ -274,7 +275,7 @@ def set_particle_to_step(self, t): self.expansion_nodes = self.expansion_nodes_history[t] -def logp(out_vars, vars, shared): +def logp(point, out_vars, vars, shared): """Compile Aesara function of the model and the input and output variables. Parameters @@ -286,7 +287,7 @@ def logp(out_vars, vars, shared): shared: List containing :class:`aesara.tensor.Tensor` for depended shared data """ - out_list, inarray0 = join_nonshared_inputs(out_vars, vars, shared) + out_list, inarray0 = join_nonshared_inputs(point, out_vars, vars, shared) f = aesara_function([inarray0], out_list[0]) f.trust_input = True return f diff --git a/pymc3/step_methods/slicer.py b/pymc3/step_methods/slicer.py index b0320a9eff..2eb401858e 100644 --- a/pymc3/step_methods/slicer.py +++ b/pymc3/step_methods/slicer.py @@ -18,6 +18,7 @@ import numpy.random as nr from pymc3.aesaraf import inputvars +from pymc3.blocking import RaveledVars from pymc3.model import modelcontext from pymc3.step_methods.arraystep import ArrayStep, Competence from pymc3.vartypes import continuous_types @@ -61,24 +62,28 @@ def __init__(self, vars=None, w=1.0, tune=True, model=None, iter_limit=np.inf, * super().__init__(vars, [self.model.fastlogp], **kwargs) def astep(self, q0, logp): - self.w = np.resize(self.w, len(q0)) # this is a repmat - q = np.copy(q0) # TODO: find out if we need this - ql = np.copy(q0) # l for left boundary - qr = np.copy(q0) # r for right boudary - for i in range(len(q0)): + q0_val = q0.data + self.w = np.resize(self.w, len(q0_val)) # this is a repmat + q = np.copy(q0_val) # TODO: find out if we need this + ql = np.copy(q0_val) # l for left boundary + qr = np.copy(q0_val) # r for right boudary + for i in range(len(q0_val)): # uniformly sample from 0 to p(q), but in log space - y = logp(q) - nr.standard_exponential() + q_ra = RaveledVars(q, q0.point_map_info) + y = logp(q_ra) - nr.standard_exponential() ql[i] = q[i] - nr.uniform(0, self.w[i]) qr[i] = q[i] + self.w[i] # Stepping out procedure cnt = 0 - while y <= logp(ql): # changed lt to leq for locally uniform posteriors + while y <= logp( + RaveledVars(ql, q0.point_map_info) + ): # changed lt to leq for locally uniform posteriors ql[i] -= self.w[i] cnt += 1 if cnt > self.iter_limit: raise RuntimeError(LOOP_ERR_MSG % self.iter_limit) cnt = 0 - while y <= logp(qr): + while y <= logp(RaveledVars(qr, q0.point_map_info)): qr[i] += self.w[i] cnt += 1 if cnt > self.iter_limit: @@ -86,11 +91,11 @@ def astep(self, q0, logp): cnt = 0 q[i] = nr.uniform(ql[i], qr[i]) - while logp(q) < y: # Changed leq to lt, to accomodate for locally flat posteriors + while logp(q_ra) < y: # Changed leq to lt, to accomodate for locally flat posteriors # Sample uniformly from slice - if q[i] > q0[i]: + if q[i] > q0_val[i]: qr[i] = q[i] - elif q[i] < q0[i]: + elif q[i] < q0_val[i]: ql[i] = q[i] q[i] = nr.uniform(ql[i], qr[i]) cnt += 1 diff --git a/pymc3/tests/sampler_fixtures.py b/pymc3/tests/sampler_fixtures.py index d0a22745b3..03e79a35f8 100644 --- a/pymc3/tests/sampler_fixtures.py +++ b/pymc3/tests/sampler_fixtures.py @@ -16,6 +16,7 @@ import arviz as az import numpy as np import numpy.testing as npt +import pytest from scipy import stats @@ -81,7 +82,7 @@ class NormalFixture(KnownMean, KnownVariance, KnownCDF): @classmethod def make_model(cls): with pm.Model() as model: - a = pm.Normal("a", mu=2, sigma=np.sqrt(3), shape=10) + a = pm.Normal("a", mu=2, sigma=np.sqrt(3), size=10) return model @@ -91,7 +92,7 @@ class BetaBinomialFixture(KnownCDF): @classmethod def make_model(cls): with pm.Model() as model: - p = pm.Beta("p", [0.5, 0.5, 1.0], [0.5, 0.5, 1.0], shape=3) + p = pm.Beta("p", [0.5, 0.5, 1.0], [0.5, 0.5, 1.0], size=3) pm.Binomial("y", p=p, n=[4, 12, 9], observed=[1, 2, 9]) return model @@ -121,7 +122,7 @@ class LKJCholeskyCovFixture(KnownCDF): def make_model(cls): with pm.Model() as model: sd_mu = np.array([1, 2, 3, 4, 5]) - sd_dist = pm.Lognormal.dist(mu=sd_mu, sigma=sd_mu / 10.0, shape=5) + sd_dist = pm.Lognormal.dist(mu=sd_mu, sigma=sd_mu / 10.0, size=5) chol_packed = pm.LKJCholeskyCov("chol_packed", eta=3, n=5, sd_dist=sd_dist) chol = pm.expand_packed_triangular(5, chol_packed, lower=True) cov = aet.dot(chol, chol.T) @@ -140,17 +141,26 @@ def setup_class(cls): cls.model = cls.make_model() with cls.model: cls.step = cls.make_step() - cls.trace = pm.sample(cls.n_samples, tune=cls.tune, step=cls.step, cores=cls.chains) + cls.trace = pm.sample( + cls.n_samples, + tune=cls.tune, + step=cls.step, + cores=cls.chains, + return_inferencedata=False, + compute_convergence_checks=False, + ) cls.samples = {} for var in cls.model.unobserved_RVs: - cls.samples[get_var_name(var)] = cls.trace.get_values(var.tag.value_var, burn=cls.burn) + cls.samples[get_var_name(var)] = cls.trace.get_values(var, burn=cls.burn) + @pytest.mark.xfail(reason="Arviz not refactored for v4") def test_neff(self): if hasattr(self, "min_n_eff"): n_eff = az.ess(self.trace[self.burn :]) for var in n_eff: npt.assert_array_less(self.min_n_eff, n_eff[var]) + @pytest.mark.xfail(reason="Arviz not refactored for v4") def test_Rhat(self): rhat = az.rhat(self.trace[self.burn :]) for var in rhat: diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 15fd860981..27f5a0b827 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -230,7 +230,11 @@ def build_model(distfam, valuedomain, vardomains, extra_args=None): v_at.name = v param_vars[v] = v_at param_vars.update(extra_args) - distfam("value", **param_vars, transform=None) + distfam( + "value", + **param_vars, + transform=None, + ) return m, param_vars @@ -643,28 +647,8 @@ def logp_reference(args): domains = paramdomains.copy() domains["value"] = domain for pt in product(domains, n_samples=n_samples): - pt = dict(pt) - pt_d = {} - for k, v in pt.items(): - rv_var = model.named_vars.get(k) - nv = param_vars.get(k, rv_var) - nv = getattr(nv.tag, "value_var", nv) - - transform = getattr(nv.tag, "transform", None) - if transform: - # TODO: The compiled graph behind this should be cached and - # reused (if it isn't already). - v = transform.forward(rv_var, v).eval() - - if nv.name in param_vars: - # Update the shared parameter variables in `param_vars` - param_vars[nv.name].set_value(v) - else: - # Create an argument entry for the (potentially - # transformed) "value" variable - pt_d[nv.name] = v - + pt_d = self._model_input_dict(model, param_vars, pt) pt_logp = Point(pt_d, model=model) pt_ref = Point(pt, filter_model_vars=False, model=model) assert_almost_equal( @@ -674,6 +658,30 @@ def logp_reference(args): err_msg=str(pt), ) + def _model_input_dict(self, model, param_vars, pt): + """Create a dict with only the necessary, transformed logp inputs.""" + pt_d = {} + for k, v in pt.items(): + rv_var = model.named_vars.get(k) + nv = param_vars.get(k, rv_var) + nv = getattr(nv.tag, "value_var", nv) + + transform = getattr(nv.tag, "transform", None) + if transform: + # todo: the compiled graph behind this should be cached and + # reused (if it isn't already). + v = transform.forward(rv_var, v).eval() + + if nv.name in param_vars: + # update the shared parameter variables in `param_vars` + param_vars[nv.name].set_value(v) + else: + # create an argument entry for the (potentially + # transformed) "value" variable + pt_d[nv.name] = v + + return pt_d + def check_logcdf( self, pymc3_dist, @@ -970,6 +978,7 @@ def test_normal(self): R, {"mu": R, "sigma": Rplus}, lambda value, mu, sigma: sp.norm.logcdf(value, mu, sigma), + decimal=select_by_precision(float64=6, float32=1), ) @pytest.mark.xfail(reason="Distribution not refactored yet") @@ -1012,11 +1021,7 @@ def test_chi_squared(self): ) @pytest.mark.xfail(reason="Distribution not refactored yet") - @pytest.mark.xfail( - condition=(aesara.config.floatX == "float32"), - reason="Poor CDF in SciPy. See scipy/scipy#869 for details.", - ) - def test_wald_scipy(self): + def test_wald_logp(self): self.check_logp( Wald, Rplus, @@ -1024,6 +1029,13 @@ def test_wald_scipy(self): lambda value, mu, alpha: sp.invgauss.logpdf(value, mu=mu, loc=alpha), decimal=select_by_precision(float64=6, float32=1), ) + + @pytest.mark.xfail(reason="Distribution not refactored yet") + @pytest.mark.xfail( + condition=(aesara.config.floatX == "float32"), + reason="Poor CDF in SciPy. See scipy/scipy#869 for details.", + ) + def test_wald_logcdf(self): self.check_logcdf( Wald, Rplus, @@ -1075,6 +1087,7 @@ def test_beta(self): {"alpha": Rplus, "beta": Rplus}, lambda value, alpha, beta: sp.beta.logcdf(value, alpha, beta), n_samples=10, + decimal=select_by_precision(float64=5, float32=3), ) @pytest.mark.xfail(reason="Distribution not refactored yet") @@ -1158,7 +1171,6 @@ def modified_scipy_hypergeom_logcdf(value, N, k, n): {"N": NatSmall, "k": NatSmall, "n": NatSmall}, ) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_negative_binomial(self): def scipy_mu_alpha_logpmf(value, mu, alpha): return sp.nbinom.logpmf(value, alpha, 1 - mu / (mu + alpha)) @@ -1337,7 +1349,7 @@ def test_gamma_logcdf(self): skip_paramdomain_outside_edge_test=True, ) - def test_inverse_gamma(self): + def test_inverse_gamma_logp(self): self.check_logp( InverseGamma, Rplus, @@ -1346,6 +1358,14 @@ def test_inverse_gamma(self): ) # pymc-devs/aesara#224: skip_paramdomain_outside_edge_test has to be set # True to avoid triggering a C-level assertion in the Aesara GammaQ function + + @pytest.mark.xfail( + condition=(aesara.config.floatX == "float32"), + reason="Fails on float32 due to numerical issues", + ) + def test_inverse_gamma_logcdf(self): + # pymc-devs/aesara#224: skip_paramdomain_outside_edge_test has to be set + # True to avoid triggering a C-level assertion in the Aesara GammaQ function # in gamma.c file. Can be set back to False (default) once that issue is solved self.check_logcdf( InverseGamma, @@ -1387,18 +1407,21 @@ def test_pareto(self): lambda value, alpha, m: sp.pareto.logcdf(value, alpha, scale=m), ) - @pytest.mark.xfail( - condition=(aesara.config.floatX == "float32"), - reason="Fails on float32 due to inf issues", - ) @pytest.mark.xfail(reason="Distribution not refactored yet") - def test_weibull(self): + def test_weibull_logp(self): self.check_logp( Weibull, Rplus, {"alpha": Rplusbig, "beta": Rplusbig}, lambda value, alpha, beta: sp.exponweib.logpdf(value, 1, alpha, scale=beta), ) + + @pytest.mark.xfail(reason="Distribution not refactored yet") + @pytest.mark.xfail( + condition=(aesara.config.floatX == "float32"), + reason="Fails on float32 due to inf issues", + ) + def test_weibull_logcdf(self): self.check_logcdf( Weibull, Rplus, @@ -1448,29 +1471,42 @@ def test_binomial(self): ) # Too lazy to propagate decimal parameter through the whole chain of deps - @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") - @pytest.mark.xfail( - condition=(SCIPY_VERSION < parse("1.4.0")), reason="betabinom is new in Scipy 1.4.0" - ) @pytest.mark.xfail(reason="Distribution not refactored yet") - def test_beta_binomial(self): + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") + def test_beta_binomial_distribution(self): self.checkd( BetaBinomial, Nat, {"alpha": Rplus, "beta": Rplus, "n": NatSmall}, ) + + @pytest.mark.xfail(reason="Distribution not refactored yet") + @pytest.mark.skipif( + condition=(SCIPY_VERSION < parse("1.4.0")), reason="betabinom is new in Scipy 1.4.0" + ) + def test_beta_binomial_logp(self): self.check_logp( BetaBinomial, Nat, {"alpha": Rplus, "beta": Rplus, "n": NatSmall}, lambda value, alpha, beta, n: sp.betabinom.logpmf(value, a=alpha, b=beta, n=n), ) + + @pytest.mark.xfail(reason="Distribution not refactored yet") + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.skipif( + condition=(SCIPY_VERSION < parse("1.4.0")), reason="betabinom is new in Scipy 1.4.0" + ) + def test_beta_binomial_logcdf(self): self.check_logcdf( BetaBinomial, Nat, {"alpha": Rplus, "beta": Rplus, "n": NatSmall}, lambda value, alpha, beta, n: sp.betabinom.logcdf(value, a=alpha, b=beta, n=n), ) + + @pytest.mark.xfail(reason="Distribution not refactored yet") + def test_beta_binomial_selfconsistency(self): self.check_selfconsistency_discrete_logcdf( BetaBinomial, Nat, @@ -1563,14 +1599,20 @@ def test_constantdist(self): self.check_logp(Constant, I, {"c": I}, lambda value, c: np.log(c == value)) # Too lazy to propagate decimal parameter through the whole chain of deps - @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") @pytest.mark.xfail(reason="Distribution not refactored yet") - def test_zeroinflatedpoisson(self): + @pytest.mark.xfail( + condition=(aesara.config.floatX == "float32"), + reason="Fails on float32 due to inf issues", + ) + def test_zeroinflatedpoisson_distribution(self): self.checkd( ZeroInflatedPoisson, Nat, {"theta": Rplus, "psi": Unit}, ) + + @pytest.mark.xfail(reason="Distribution not refactored yet") + def test_zeroinflatedpoisson_logcdf(self): self.check_selfconsistency_discrete_logcdf( ZeroInflatedPoisson, Nat, @@ -1578,14 +1620,20 @@ def test_zeroinflatedpoisson(self): ) # Too lazy to propagate decimal parameter through the whole chain of deps - @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") @pytest.mark.xfail(reason="Distribution not refactored yet") - def test_zeroinflatednegativebinomial(self): + @pytest.mark.xfail( + condition=(aesara.config.floatX == "float32"), + reason="Fails on float32 due to inf issues", + ) + def test_zeroinflatednegativebinomial_distribution(self): self.checkd( ZeroInflatedNegativeBinomial, Nat, {"mu": Rplusbig, "alpha": Rplusbig, "psi": Unit}, ) + + @pytest.mark.xfail(reason="Distribution not refactored yet") + def test_zeroinflatednegativebinomial_logcdf(self): self.check_selfconsistency_discrete_logcdf( ZeroInflatedNegativeBinomial, Nat, @@ -1594,7 +1642,6 @@ def test_zeroinflatednegativebinomial(self): ) # Too lazy to propagate decimal parameter through the whole chain of deps - @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") @pytest.mark.xfail(reason="Distribution not refactored yet") def test_zeroinflatedbinomial(self): self.checkd( @@ -1602,6 +1649,9 @@ def test_zeroinflatedbinomial(self): Nat, {"n": NatSmall, "p": Unit, "psi": Unit}, ) + + @pytest.mark.xfail(reason="Distribution not refactored yet") + def test_zeroinflatedbinomial_logcdf(self): self.check_selfconsistency_discrete_logcdf( ZeroInflatedBinomial, Nat, @@ -1609,7 +1659,6 @@ def test_zeroinflatedbinomial(self): n_samples=10, ) - @pytest.mark.xfail(reason="Distribution not refactored yet") @pytest.mark.parametrize("n", [1, 2, 3]) def test_mvnormal(self, n): self.check_logp( @@ -1617,6 +1666,7 @@ def test_mvnormal(self, n): RealMatrix(5, n), {"mu": Vector(R, n), "tau": PdMatrix(n)}, normal_logpdf_tau, + extra_args={"size": 5}, ) self.check_logp( MvNormal, @@ -1629,6 +1679,7 @@ def test_mvnormal(self, n): RealMatrix(5, n), {"mu": Vector(R, n), "cov": PdMatrix(n)}, normal_logpdf_cov, + extra_args={"size": 5}, ) self.check_logp( MvNormal, @@ -1642,6 +1693,7 @@ def test_mvnormal(self, n): {"mu": Vector(R, n), "chol": PdMatrixChol(n)}, normal_logpdf_chol, decimal=select_by_precision(float64=6, float32=-1), + extra_args={"size": 5}, ) self.check_logp( MvNormal, @@ -1650,23 +1702,19 @@ def test_mvnormal(self, n): normal_logpdf_chol, decimal=select_by_precision(float64=6, float32=0), ) - - def MvNormalUpper(*args, **kwargs): - return MvNormal(lower=False, *args, **kwargs) - self.check_logp( - MvNormalUpper, + MvNormal, Vector(R, n), {"mu": Vector(R, n), "chol": PdMatrixCholUpper(n)}, normal_logpdf_chol_upper, decimal=select_by_precision(float64=6, float32=0), + extra_args={"lower": False}, ) @pytest.mark.xfail( condition=(aesara.config.floatX == "float32"), reason="Fails on float32 due to inf issues", ) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_mvnormal_indef(self): cov_val = np.array([[1, 0.5], [0.5, -2]]) cov = aet.matrix("cov") @@ -1681,14 +1729,13 @@ def test_mvnormal_indef(self): f_dlogp = aesara.function([cov, x], dlogp) assert not np.all(np.isfinite(f_dlogp(cov_val, np.ones(2)))) - logp = logp(MvNormal.dist(mu=mu, tau=cov), x) + logp = logpt(MvNormal.dist(mu=mu, tau=cov), x) f_logp = aesara.function([cov, x], logp) assert f_logp(cov_val, np.ones(2)) == -np.inf dlogp = aet.grad(logp, cov) f_dlogp = aesara.function([cov, x], dlogp) assert not np.all(np.isfinite(f_dlogp(cov_val, np.ones(2)))) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_mvnormal_init_fail(self): with Model(): with pytest.raises(ValueError): @@ -1873,10 +1920,13 @@ def test_dirichlet_with_batch_shapes(self, dist_shape): with pm.Model() as model: d = pm.Dirichlet("d", a=a) + # Generate sample points to test d_value = d.tag.value_var - d_point = d.eval() + d_point = d.eval().astype("float64") + d_point /= d_point.sum(axis=-1)[..., None] + if hasattr(d_value.tag, "transform"): - d_point_trans = d_value.tag.transform.forward(d, d_point).eval() + d_point_trans = d_value.tag.transform.forward(d, aet.as_tensor(d_point)).eval() else: d_point_trans = d_point @@ -2414,15 +2464,23 @@ def test_rice(self): lambda value, b, sigma: sp.rice.logpdf(value, b=b, loc=0, scale=sigma), ) - @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") @pytest.mark.xfail(reason="Distribution not refactored yet") - def test_moyal(self): + def test_moyal_logp(self): + # Using a custom domain, because the standard `R` domain undeflows with scipy in float64 + value_domain = Domain([-inf, -1.5, -1, -0.01, 0.0, 0.01, 1, 1.5, inf]) self.check_logp( Moyal, - R, + value_domain, {"mu": R, "sigma": Rplusbig}, lambda value, mu, sigma: floatX(sp.moyal.logpdf(value, mu, sigma)), ) + + @pytest.mark.xfail(reason="Distribution not refactored yet") + @pytest.mark.xfail( + condition=(aesara.config.floatX == "float32"), + reason="Pymc3 underflows earlier than scipy on float32", + ) + def test_moyal_logcdf(self): self.check_logcdf( Moyal, R, @@ -2726,6 +2784,18 @@ def test_issue_3051(self, dims, dist_cls, kwargs): assert isinstance(actual_a, np.ndarray) assert actual_a.shape == (X.shape[0],) + @pytest.mark.xfail(reason="Distribution not refactored yet") + def test_issue_4499(self): + # Test for bug in Uniform and DiscreteUniform logp when setting check_bounds = False + # https://github.com/pymc-devs/pymc3/issues/4499 + with pm.Model(check_bounds=False) as m: + x = pm.Uniform("x", 0, 2, shape=10, transform=None) + assert_almost_equal(m.logp_array(np.ones(10)), -np.log(2) * 10) + + with pm.Model(check_bounds=False) as m: + x = pm.DiscreteUniform("x", 0, 1, shape=10) + assert_almost_equal(m.logp_array(np.ones(10)), -np.log(2) * 10) + @pytest.mark.xfail(reason="DensityDist no longer supported") def test_serialize_density_dist(): diff --git a/pymc3/tests/test_ndarray_backend.py b/pymc3/tests/test_ndarray_backend.py index b4744373ed..c46ed73a2e 100644 --- a/pymc3/tests/test_ndarray_backend.py +++ b/pymc3/tests/test_ndarray_backend.py @@ -272,8 +272,8 @@ def test_sample_posterior_predictive(self, tmpdir_factory): ppc = pm.sample_posterior_predictive(self.trace) with TestSaveLoad.model() as model: - model.default_rng.get_value(borrow=True).seed(10) trace2 = pm.load_trace(directory) + model.default_rng.get_value(borrow=True).seed(10) ppc2 = pm.sample_posterior_predictive(trace2) ppc2f = pm.sample_posterior_predictive(trace2) diff --git a/pymc3/tests/test_posteriors.py b/pymc3/tests/test_posteriors.py index 8ac068bd75..91ecd05f1f 100644 --- a/pymc3/tests/test_posteriors.py +++ b/pymc3/tests/test_posteriors.py @@ -76,6 +76,7 @@ class TestNUTSBetaBinomial(sf.NutsFixture, sf.BetaBinomialFixture): min_n_eff = 400 +@pytest.mark.xfail(reason="StudentT not refactored for v4") class TestNUTSStudentT(sf.NutsFixture, sf.StudentTFixture): n_samples = 10000 tune = 1000 @@ -97,6 +98,7 @@ class TestNUTSNormalLong(sf.NutsFixture, sf.NormalFixture): atol = 0.001 +@pytest.mark.xfail(reason="StudentT not refactored for v4") class TestNUTSLKJCholeskyCov(sf.NutsFixture, sf.LKJCholeskyCovFixture): n_samples = 2000 tune = 1000 diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py index ff1d1473f0..150675b43c 100644 --- a/pymc3/tests/test_sampling.py +++ b/pymc3/tests/test_sampling.py @@ -159,13 +159,19 @@ def test_trace_report(self, step_cls, discard): # add more variables, because stats are 2D with CompoundStep! pm.Uniform("uni") trace = pm.sample( - draws=100, tune=50, cores=1, discard_tuned_samples=discard, step=step_cls() + draws=100, + tune=50, + cores=1, + discard_tuned_samples=discard, + step=step_cls(), + compute_convergence_checks=False, + return_inferencedata=False, ) assert trace.report.n_tune == 50 assert trace.report.n_draws == 100 assert isinstance(trace.report.t_sampling, float) - pass + @pytest.mark.xfail(reason="BART not refactored for v4") def test_trace_report_bart(self): X = np.random.normal(0, 1, size=(3, 250)).T Y = np.random.normal(0, 1, size=250) @@ -218,7 +224,6 @@ def test_return_inferencedata(self, monkeypatch): monkeypatch.setattr("pymc3.__version__", "3.10") with pytest.warns(FutureWarning, match="pass return_inferencedata"): result = pm.sample(**kwargs) - pass @pytest.mark.parametrize("cores", [1, 2]) def test_sampler_stat_tune(self, cores): @@ -228,15 +233,14 @@ def test_sampler_stat_tune(self, cores): ).get_sampler_stats("tune", chains=1) assert list(tune_stat).count(True) == 5 assert list(tune_stat).count(False) == 7 - pass @pytest.mark.parametrize( "start, error", [ ([1, 2], TypeError), - ({"x": 1}, ValueError), + ({"x": 1}, TypeError), ({"x": [1, 2, 3]}, ValueError), - ({"x": np.array([[1, 1], [1, 1]])}, ValueError), + ({"x": np.array([[1, 1], [1, 1]])}, TypeError), ], ) def test_sample_start_bad_shape(self, start, error): @@ -285,6 +289,7 @@ def callback(trace, draw): assert len(trace) == trace_cancel_length +@pytest.mark.xfail(reason="Lognormal not refactored for v4") def test_sample_find_MAP_does_not_modify_start(): # see https://github.com/pymc-devs/pymc3/pull/4458 with pm.Model(): @@ -319,6 +324,7 @@ def test_partial_trace_sample(): a = pm.Normal("a", mu=0, sigma=1) b = pm.Normal("b", mu=0, sigma=1) trace = pm.sample(trace=[a]) + # TODO: Assert something to make this a real test @pytest.mark.parametrize( @@ -358,13 +364,13 @@ def test_shared_named(self): "theta0", mu=np.atleast_2d(0), tau=np.atleast_2d(1e20), - shape=(1, 1), + size=(1, 1), testval=np.atleast_2d(0), ) theta = pm.Normal( - "theta", mu=aet.dot(G_var, theta0), tau=np.atleast_2d(1e20), shape=(1, 1) + "theta", mu=aet.dot(G_var, theta0), tau=np.atleast_2d(1e20), size=(1, 1) ) - res = theta.random() + res = theta.eval() assert np.isclose(res, 0.0) def test_shared_unnamed(self): @@ -374,13 +380,13 @@ def test_shared_unnamed(self): "theta0", mu=np.atleast_2d(0), tau=np.atleast_2d(1e20), - shape=(1, 1), + size=(1, 1), testval=np.atleast_2d(0), ) theta = pm.Normal( - "theta", mu=aet.dot(G_var, theta0), tau=np.atleast_2d(1e20), shape=(1, 1) + "theta", mu=aet.dot(G_var, theta0), tau=np.atleast_2d(1e20), size=(1, 1) ) - res = theta.random() + res = theta.eval() assert np.isclose(res, 0.0) def test_constant_named(self): @@ -390,14 +396,14 @@ def test_constant_named(self): "theta0", mu=np.atleast_2d(0), tau=np.atleast_2d(1e20), - shape=(1, 1), + size=(1, 1), testval=np.atleast_2d(0), ) theta = pm.Normal( - "theta", mu=aet.dot(G_var, theta0), tau=np.atleast_2d(1e20), shape=(1, 1) + "theta", mu=aet.dot(G_var, theta0), tau=np.atleast_2d(1e20), size=(1, 1) ) - res = theta.random() + res = theta.eval() assert np.isclose(res, 0.0) @@ -420,12 +426,16 @@ def test_normal_scalar(self): with pm.Model() as model: mu = pm.Normal("mu", 0.0, 1.0) a = pm.Normal("a", mu=mu, sigma=1, observed=0.0) - trace = pm.sample(draws=ndraws, chains=nchains) + trace = pm.sample( + draws=ndraws, + chains=nchains, + return_inferencedata=False, + ) with model: # test list input ppc0 = pm.sample_posterior_predictive([model.test_point], samples=10) - # deprecated argument is not introduced to fast version [2019/08/20:rpg] + # # deprecated argument is not introduced to fast version [2019/08/20:rpg] ppc = pm.sample_posterior_predictive(trace, var_names=["a"]) # test empty ppc ppc = pm.sample_posterior_predictive(trace, var_names=[]) @@ -435,11 +445,6 @@ def test_normal_scalar(self): ppc = pm.sample_posterior_predictive(trace, keep_size=True) assert ppc["a"].shape == (nchains, ndraws) - # test keep_size parameter and idata input - idata = az.from_pymc3(trace) - ppc = pm.sample_posterior_predictive(idata, keep_size=True) - assert ppc["a"].shape == (nchains, ndraws) - # test default case ppc = pm.sample_posterior_predictive(trace, var_names=["a"]) assert "a" in ppc @@ -453,11 +458,28 @@ def test_normal_scalar(self): ppc = pm.sample_posterior_predictive(trace, size=5, var_names=["a"]) assert ppc["a"].shape == (nchains * ndraws, 5) + @pytest.mark.xfail(reason="Arviz not refactored for v4") + def test_normal_scalar_idata(self): + nchains = 2 + ndraws = 500 + with pm.Model() as model: + mu = pm.Normal("mu", 0.0, 1.0) + a = pm.Normal("a", mu=mu, sigma=1, observed=0.0) + trace = pm.sample( + draws=ndraws, chains=nchains, return_inferencedata=True, discard_tuned_samples=False + ) + + with model: + # test keep_size parameter and idata input + idata = az.from_pymc3(trace) + ppc = pm.sample_posterior_predictive(idata, keep_size=True) + assert ppc["a"].shape == (nchains, ndraws) + def test_normal_vector(self, caplog): with pm.Model() as model: mu = pm.Normal("mu", 0.0, 1.0) a = pm.Normal("a", mu=mu, sigma=1, observed=np.array([0.5, 0.2])) - trace = pm.sample() + trace = pm.sample(return_inferencedata=False) with model: # test list input @@ -473,10 +495,6 @@ def test_normal_vector(self, caplog): assert "a" in ppc assert ppc["a"].shape == (12, 2) - # test keep_size parameter with inference data as input... - idata = az.from_pymc3(trace) - ppc = pm.sample_posterior_predictive(idata, keep_size=True) - assert ppc["a"].shape == (trace.nchains, len(trace), 2) with pytest.warns(UserWarning): ppc = pm.sample_posterior_predictive(trace, samples=12, var_names=["a"]) assert "a" in ppc @@ -487,6 +505,19 @@ def test_normal_vector(self, caplog): assert "a" in ppc assert ppc["a"].shape == (10, 4, 2) + @pytest.mark.xfail(reason="Arviz not refactored for v4") + def test_normal_vector_idata(self, caplog): + with pm.Model() as model: + mu = pm.Normal("mu", 0.0, 1.0) + a = pm.Normal("a", mu=mu, sigma=1, observed=np.array([0.5, 0.2])) + trace = pm.sample(return_inferencedata=False) + + with model: + # test keep_size parameter with inference data as input... + idata = az.from_pymc3(trace) + ppc = pm.sample_posterior_predictive(idata, keep_size=True) + assert ppc["a"].shape == (trace.nchains, len(trace), 2) + def test_exceptions(self, caplog): with pm.Model() as model: mu = pm.Normal("mu", 0.0, 1.0) @@ -542,6 +573,7 @@ def test_sum_normal(self): _, pval = stats.kstest(ppc["b"], stats.norm(scale=scale).cdf) assert pval > 0.001 + @pytest.mark.xfail(reason="HalfFlat not refactored for v4") def test_model_not_drawable_prior(self): data = np.random.poisson(lam=10, size=200) model = pm.Model() @@ -567,7 +599,7 @@ def test_model_shared_variable(self): logistic = pm.Deterministic("p", pm.math.sigmoid(coeff * x_shared)) obs = pm.Bernoulli("obs", p=logistic, observed=y_shared) - trace = pm.sample(100) + trace = pm.sample(100, return_inferencedata=False, compute_convergence_checks=False) x_shared.set_value([-1, 0, 1.0]) y_shared.set_value([0, 0, 0]) @@ -599,15 +631,22 @@ def test_deterministic_of_observed(self): out_diff = in_1 + in_2 pm.Deterministic("out", out_diff) - trace = pm.sample(100, chains=nchains) - np.random.seed(0) + model.default_rng.get_value(borrow=True).seed(0) + trace = pm.sample( + 100, + chains=nchains, + return_inferencedata=False, + compute_convergence_checks=False, + ) + rtol = 1e-5 if aesara.config.floatX == "float64" else 1e-4 - np.random.seed(0) + model.default_rng.get_value(borrow=True).seed(0) ppc = pm.sample_posterior_predictive( model=model, trace=trace, samples=len(trace) * nchains, + random_seed=0, var_names=[var.name for var in (model.deterministics + model.basic_RVs)], ) @@ -627,7 +666,11 @@ def test_deterministic_of_observed_modified_interface(self): out_diff = in_1 + in_2 pm.Deterministic("out", out_diff) - trace = pm.sample(100) + trace = pm.sample( + 100, + return_inferencedata=False, + compute_convergence_checks=False, + ) ppc_trace = pm.trace_to_dataframe( trace, varnames=[n for n in trace.varnames if n != "out"] ).to_dict("records") @@ -646,7 +689,7 @@ def test_variable_type(self): mu = pm.HalfNormal("mu", 1) a = pm.Normal("a", mu=mu, sigma=2, observed=np.array([1, 2])) b = pm.Poisson("b", mu, observed=np.array([1, 2])) - trace = pm.sample() + trace = pm.sample(compute_convergence_checks=False, return_inferencedata=False) with model: ppc = pm.sample_posterior_predictive(trace, samples=1) @@ -667,6 +710,7 @@ def test_potentials_warning(self): class TestSamplePPCW(SeededTest): + @pytest.mark.xfail(reason="sample_posterior_predictive_w not refactored for v4") def test_sample_posterior_predictive_w(self): data0 = np.random.normal(0, 1, size=50) warning_msg = "The number of samples is too small to check convergence reliably" @@ -676,14 +720,14 @@ def test_sample_posterior_predictive_w(self): y = pm.Normal("y", mu=mu, sigma=1, observed=data0) with pytest.warns(UserWarning, match=warning_msg): trace_0 = pm.sample(10, tune=0, chains=2, return_inferencedata=False) - idata_0 = az.from_pymc3(trace_0) + idata_0 = az.from_pymc3(trace_0, log_likelihood=False) with pm.Model() as model_1: - mu = pm.Normal("mu", mu=0, sigma=1, shape=len(data0)) + mu = pm.Normal("mu", mu=0, sigma=1, size=len(data0)) y = pm.Normal("y", mu=mu, sigma=1, observed=data0) with pytest.warns(UserWarning, match=warning_msg): trace_1 = pm.sample(10, tune=0, chains=2, return_inferencedata=False) - idata_1 = az.from_pymc3(trace_1) + idata_1 = az.from_pymc3(trace_1, log_likelihood=False) with pm.Model() as model_2: # Model with no observed RVs. @@ -716,6 +760,7 @@ def test_sample_posterior_predictive_w(self): ): pm.sample_posterior_predictive_w([trace_0, trace_2], 100, [model_0, model_2]) + @pytest.mark.xfail(reason="sample_posterior_predictive_w not refactored for v4") def test_potentials_warning(self): warning_msg = "The effect of Potentials on other parameters is ignored during" with pm.Model() as m: @@ -731,32 +776,33 @@ def test_potentials_warning(self): @pytest.mark.parametrize( "method", [ - "jitter+adapt_diag", - "adapt_diag", "advi", "ADVI+adapt_diag", "advi+adapt_diag_grad", - "map", "advi_map", + "jitter+adapt_diag", + "adapt_diag", + "map", "adapt_full", "jitter+adapt_full", ], ) +@pytest.mark.xfail(reason="ADVI not refactored for v4", exception=NotImplementedError) def test_exec_nuts_init(method): with pm.Model() as model: - pm.Normal("a", mu=0, sigma=1, shape=2) + pm.Normal("a", mu=0, sigma=1, size=2) pm.HalfNormal("b", sigma=1) with model: start, _ = pm.init_nuts(init=method, n_init=10) assert isinstance(start, list) assert len(start) == 1 assert isinstance(start[0], dict) - assert "a" in start[0] and "b_log__" in start[0] + assert "a" in start[0] and "b" in start[0] start, _ = pm.init_nuts(init=method, n_init=10, chains=2) assert isinstance(start, list) assert len(start) == 2 assert isinstance(start[0], dict) - assert "a" in start[0] and "b_log__" in start[0] + assert "a" in start[0] and "b" in start[0] @pytest.mark.parametrize( @@ -843,8 +889,8 @@ def test_ignores_observed(self): def test_respects_shape(self): for shape in (2, (2,), (10, 2), (10, 10)): with pm.Model(): - mu = pm.Gamma("mu", 3, 1, shape=1) - goals = pm.Poisson("goals", mu, shape=shape) + mu = pm.Gamma("mu", 3, 1, size=1) + goals = pm.Poisson("goals", mu, size=shape) trace1 = pm.sample_prior_predictive(10, var_names=["mu", "mu", "goals"]) trace2 = pm.sample_prior_predictive(10, var_names=["mu", "goals"]) if shape == 2: # want to test shape as an int @@ -854,32 +900,34 @@ def test_respects_shape(self): def test_multivariate(self): with pm.Model(): - m = pm.Multinomial("m", n=5, p=np.array([0.25, 0.25, 0.25, 0.25]), shape=4) + m = pm.Multinomial("m", n=5, p=np.array([0.25, 0.25, 0.25, 0.25])) trace = pm.sample_prior_predictive(10) - assert m.random(size=10).shape == (10, 4) assert trace["m"].shape == (10, 4) def test_multivariate2(self): # Added test for issue #3271 mn_data = np.random.multinomial(n=100, pvals=[1 / 6.0] * 6, size=10) with pm.Model() as dm_model: - probs = pm.Dirichlet("probs", a=np.ones(6), shape=6) + probs = pm.Dirichlet("probs", a=np.ones(6)) obs = pm.Multinomial("obs", n=100, p=probs, observed=mn_data) - burned_trace = pm.sample(20, tune=10, cores=1) + burned_trace = pm.sample( + 20, tune=10, cores=1, return_inferencedata=False, compute_convergence_checks=False + ) sim_priors = pm.sample_prior_predictive(samples=20, model=dm_model) sim_ppc = pm.sample_posterior_predictive(burned_trace, samples=20, model=dm_model) assert sim_priors["probs"].shape == (20, 6) - assert sim_priors["obs"].shape == (20,) + obs.distribution.shape - assert sim_ppc["obs"].shape == (20,) + obs.distribution.shape + assert sim_priors["obs"].shape == (20,) + mn_data.shape + assert sim_ppc["obs"].shape == (20,) + mn_data.shape def test_layers(self): with pm.Model() as model: - a = pm.Uniform("a", lower=0, upper=1, shape=10) - b = pm.Binomial("b", n=1, p=a, shape=10) + a = pm.Uniform("a", lower=0, upper=1, size=10) + b = pm.Binomial("b", n=1, p=a, size=10) - avg = b.random(size=10000).mean(axis=0) - npt.assert_array_almost_equal(avg, 0.5 * np.ones_like(b), decimal=2) + b_sampler = aesara.function([], b) + avg = np.stack([b_sampler() for i in range(10000)]).mean(0) + npt.assert_array_almost_equal(avg, 0.5 * np.ones((10,)), decimal=2) def test_transformed(self): n = 18 @@ -893,14 +941,14 @@ def test_transformed(self): kappa_log = pm.Exponential("logkappa", lam=5.0) kappa = pm.Deterministic("kappa", aet.exp(kappa_log)) - thetas = pm.Beta("thetas", alpha=phi * kappa, beta=(1.0 - phi) * kappa, shape=n) + thetas = pm.Beta("thetas", alpha=phi * kappa, beta=(1.0 - phi) * kappa, size=n) y = pm.Binomial("y", n=at_bats, p=thetas, observed=hits) gen = pm.sample_prior_predictive(draws) assert gen["phi"].shape == (draws,) assert gen["y"].shape == (draws, n) - assert "thetas_logodds__" in gen + assert "thetas" in gen def test_shared(self): n1 = 10 @@ -921,6 +969,7 @@ def test_shared(self): assert gen2["y"].shape == (draws, n2) + @pytest.mark.xfail(reason="DensityDist not refactored for v4") def test_density_dist(self): obs = np.random.normal(-1, 0.1, size=10) with pm.Model(): @@ -938,26 +987,28 @@ def test_density_dist(self): def test_shape_edgecase(self): with pm.Model(): - mu = pm.Normal("mu", shape=5) + mu = pm.Normal("mu", size=5) sd = pm.Uniform("sd", lower=2, upper=3) - x = pm.Normal("x", mu=mu, sigma=sd, shape=5) + x = pm.Normal("x", mu=mu, sigma=sd, size=5) prior = pm.sample_prior_predictive(10) assert prior["mu"].shape == (10, 5) + @pytest.mark.xfail(reason="ZeroInflatedPoisson not refactored for v4") def test_zeroinflatedpoisson(self): with pm.Model(): theta = pm.Beta("theta", alpha=1, beta=1) psi = pm.HalfNormal("psi", sd=1) - pm.ZeroInflatedPoisson("suppliers", psi=psi, theta=theta, shape=20) + pm.ZeroInflatedPoisson("suppliers", psi=psi, theta=theta, size=20) gen_data = pm.sample_prior_predictive(samples=5000) assert gen_data["theta"].shape == (5000,) assert gen_data["psi"].shape == (5000,) assert gen_data["suppliers"].shape == (5000, 20) + @pytest.mark.xfail(reason="Bound not refactored for v4") def test_bounded_dist(self): with pm.Model() as model: BoundedNormal = pm.Bound(pm.Normal, lower=0.0) - x = BoundedNormal("x", mu=aet.zeros((3, 1)), sd=1 * aet.ones((3, 1)), shape=(3, 1)) + x = BoundedNormal("x", mu=aet.zeros((3, 1)), sd=1 * aet.ones((3, 1)), size=(3, 1)) with model: prior_trace = pm.sample_prior_predictive(5) @@ -980,6 +1031,7 @@ def test_point_list_arg_bug_spp(self, point_list_arg_bug_fixture): with pmodel: pp = pm.sample_posterior_predictive([trace[15]], var_names=["d"]) + @pytest.mark.xfail(reason="Arviz not refactored for v4") def test_sample_from_xarray_prior(self, point_list_arg_bug_fixture): pmodel, trace = point_list_arg_bug_fixture @@ -989,6 +1041,7 @@ def test_sample_from_xarray_prior(self, point_list_arg_bug_fixture): with pmodel: pp = pm.sample_posterior_predictive(idat.prior, var_names=["d"]) + @pytest.mark.xfail(reason="Arviz not refactored for v4") def test_sample_from_xarray_posterior(self, point_list_arg_bug_fixture): pmodel, trace = point_list_arg_bug_fixture idat = az.from_pymc3(trace) diff --git a/pymc3/tests/test_transforms.py b/pymc3/tests/test_transforms.py index cedf33c37f..ad4407b8d9 100644 --- a/pymc3/tests/test_transforms.py +++ b/pymc3/tests/test_transforms.py @@ -47,6 +47,8 @@ def check_transform(transform, domain, constructor=aet.dscalar, test=0, rv_var=None): x = constructor("x") x.tag.test_value = test + if rv_var is None: + rv_var = x # test forward and forward_val # FIXME: What's being tested here? That the transformed graph can compile? forward_f = aesara.function([x], transform.forward(rv_var, x)) @@ -63,6 +65,8 @@ def check_vector_transform(transform, domain, rv_var=None): def get_values(transform, domain=R, constructor=aet.dscalar, test=0, rv_var=None): x = constructor("x") x.tag.test_value = test + if rv_var is None: + rv_var = x f = aesara.function([x], transform.backward(rv_var, x)) return np.array([f(val) for val in domain.vals]) @@ -79,6 +83,9 @@ def check_jacobian_det( y = constructor("y") y.tag.test_value = test + if rv_var is None: + rv_var = y + x = transform.backward(rv_var, y) if make_comparable: x = make_comparable(x) @@ -125,7 +132,7 @@ def test_stickbreaking_accuracy(): x = aet.dvector("x") x.tag.test_value = val identity_f = aesara.function( - [x], tr.stick_breaking.forward(None, tr.stick_breaking.backward(None, x)) + [x], tr.stick_breaking.forward(x, tr.stick_breaking.backward(x, x)) ) close_to(val, identity_f(val), tol) @@ -300,7 +307,7 @@ def check_vectortransform_elementwise_logp(self, model, vect_opt=0): assert logpt(x).ndim == jacob_det.ndim # Hack to get relative tolerance - a = logpt(x, array, jacobian=False).eval() + a = logpt(x, array.astype(aesara.config.floatX), jacobian=False).eval() b = logp_nojac.eval() close_to(a, b, np.abs(0.5 * (a + b) * tol)) diff --git a/pymc3/tuning/starting.py b/pymc3/tuning/starting.py index ad2ae9ed3d..3475f0cbac 100644 --- a/pymc3/tuning/starting.py +++ b/pymc3/tuning/starting.py @@ -29,7 +29,7 @@ import pymc3 as pm from pymc3.aesaraf import inputvars -from pymc3.blocking import DictToArrayBijection +from pymc3.blocking import DictToArrayBijection, RaveledVars from pymc3.model import Point, modelcontext from pymc3.util import ( check_start_vals, @@ -107,14 +107,22 @@ def find_MAP( start = Point(start, model=model) - logp_func = DictToArrayBijection.mapf(model.fastlogp_nojac) x0 = DictToArrayBijection.map(start) + # TODO: If the mapping is fixed, we can simply create graphs for the + # mapping and avoid all this bijection overhead + def logp_func(x): + return DictToArrayBijection.mapf(model.fastlogp_nojac)(RaveledVars(x, x0.point_map_info)) + try: # This might be needed for calls to `dlogp_func` # start_map_info = tuple((v.name, v.shape, v.dtype) for v in vars) - dlogp_func = DictToArrayBijection.mapf(model.fastdlogp_nojac(vars)) + def dlogp_func(x): + return DictToArrayBijection.mapf(model.fastdlogp_nojac(vars))( + RaveledVars(x, x0.point_map_info) + ) + compute_gradient = True except (AttributeError, NotImplementedError, tg.NullTypeGradError): compute_gradient = False @@ -135,7 +143,9 @@ def find_MAP( cost_func = CostFuncWrapper(maxeval, progressbar, logp_func) try: - opt_result = minimize(cost_func, x0, method=method, jac=compute_gradient, *args, **kwargs) + opt_result = minimize( + cost_func, x0.data, method=method, jac=compute_gradient, *args, **kwargs + ) mx0 = opt_result["x"] # r -> opt_result except (KeyboardInterrupt, StopIteration) as e: mx0, opt_result = cost_func.previous_x, None @@ -149,6 +159,8 @@ def find_MAP( cost_func.progress.update(last_v) print() + mx0 = RaveledVars(mx0, x0.point_map_info) + vars = get_default_varnames( [v.tag.value_var for v in model.unobserved_RVs], include_transformed ) diff --git a/pymc3/util.py b/pymc3/util.py index 459968aa28..a4deff209a 100644 --- a/pymc3/util.py +++ b/pymc3/util.py @@ -199,6 +199,10 @@ def check_start_vals(start, model): """ start_points = [start] if isinstance(start, dict) else start for elem in start_points: + + for k, v in elem.items(): + elem[k] = np.asarray(v, dtype=model[k].dtype) + if not set(elem.keys()).issubset(model.named_vars.keys()): extra_keys = ", ".join(set(elem.keys()) - set(model.named_vars.keys())) valid_keys = ", ".join(model.named_vars.keys()) diff --git a/pymc3/variational/opvi.py b/pymc3/variational/opvi.py index d7f62167e2..bf75a63719 100644 --- a/pymc3/variational/opvi.py +++ b/pymc3/variational/opvi.py @@ -63,6 +63,7 @@ from pymc3.model import modelcontext from pymc3.util import get_default_varnames, get_transformed from pymc3.variational.updates import adagrad_window +from pymc3.vartypes import discrete_types __all__ = ["ObjectiveFunction", "Operator", "TestFunction", "Group", "Approximation"] @@ -831,6 +832,9 @@ def __init__( options=None, **kwargs, ): + # XXX: Needs to be refactored for v4 + raise NotImplementedError("This class needs to be refactored for v4") + if local and not self.supports_batched: raise LocalGroupError("%s does not support local groups" % self.__class__) if local and rowwise: @@ -957,7 +961,7 @@ def __init_group__(self, group): # self.ordering = ArrayOrdering([]) self.replacements = dict() for var in self.group: - if isinstance(var.distribution, pm.Discrete): + if var.type.numpy_dtype.name in discrete_types: raise ParametrizationError(f"Discrete variables are not supported by VI: {var}") begin = self.ddim if self.batched: