diff --git a/docs/source/api/distributions/discrete.rst b/docs/source/api/distributions/discrete.rst index ee20b28abc..88d81682db 100644 --- a/docs/source/api/distributions/discrete.rst +++ b/docs/source/api/distributions/discrete.rst @@ -15,10 +15,12 @@ Discrete ZeroInflatedNegativeBinomial DiscreteUniform Geometric + HyperGeometric Categorical DiscreteWeibull Constant OrderedLogistic + OrderedProbit .. automodule:: pymc3.distributions.discrete :members: diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py index d92dad0cfe..807b483712 100644 --- a/pymc3/distributions/__init__.py +++ b/pymc3/distributions/__init__.py @@ -63,7 +63,6 @@ Binomial, Categorical, Constant, - ConstantDist, DiscreteUniform, DiscreteWeibull, Geometric, @@ -138,7 +137,6 @@ "Bernoulli", "Poisson", "NegativeBinomial", - "ConstantDist", "Constant", "ZeroInflatedPoisson", "ZeroInflatedNegativeBinomial", diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 9e03eb6d61..a2b9bd4f47 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -253,8 +253,6 @@ class Uniform(BoundedContinuous): def dist(cls, lower=0, upper=1, **kwargs): lower = at.as_tensor_variable(floatX(lower)) upper = at.as_tensor_variable(floatX(upper)) - # mean = (upper + lower) / 2.0 - # median = self.mean return super().dist([lower, upper], **kwargs) def logp(value, lower, upper): @@ -270,7 +268,11 @@ def logp(value, lower, upper): ------- TensorVariable """ - return bound(-at.log(upper - lower), value >= lower, value <= upper) + return bound( + at.fill(value, -at.log(upper - lower)), + value >= lower, + value <= upper, + ) def logcdf(value, lower, upper): """ diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 03672ac41e..ae3fba4639 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -11,8 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import warnings - import aesara.tensor as at import numpy as np @@ -42,6 +40,7 @@ normal_lcdf, ) from pymc3.distributions.distribution import Discrete +from pymc3.distributions.logp import _logcdf, _logp from pymc3.math import log1mexp, logaddexp, logsumexp, sigmoid __all__ = [ @@ -51,7 +50,6 @@ "DiscreteWeibull", "Poisson", "NegativeBinomial", - "ConstantDist", "Constant", "ZeroInflatedPoisson", "ZeroInflatedBinomial", @@ -671,7 +669,7 @@ def NegBinom(a, m, x): @classmethod def dist(cls, mu=None, alpha=None, p=None, n=None, *args, **kwargs): - n, p = cls.get_n_p(mu, alpha, p, n) + n, p = cls.get_n_p(mu=mu, alpha=alpha, p=p, n=n) n = at.as_tensor_variable(floatX(n)) p = at.as_tensor_variable(floatX(p)) return super().dist([n, p], *args, **kwargs) @@ -970,6 +968,21 @@ def logcdf(value, good, bad, n): ) +class DiscreteUniformRV(RandomVariable): + name = "discrete_uniform" + ndim_supp = 0 + ndims_params = [0, 0] + dtype = "int64" + _print_name = ("DiscreteUniform", "\\operatorname{DiscreteUniform}") + + @classmethod + def rng_fn(cls, rng, lower, upper, size=None): + return stats.randint.rvs(lower, upper + 1, size=size, random_state=rng) + + +discrete_uniform = DiscreteUniformRV() + + class DiscreteUniform(Discrete): R""" Discrete uniform distribution. @@ -1010,39 +1023,15 @@ class DiscreteUniform(Discrete): Upper limit (upper > lower). """ - def __init__(self, lower, upper, *args, **kwargs): - super().__init__(*args, **kwargs) - self.lower = intX(at.floor(lower)) - self.upper = intX(at.floor(upper)) - self.mode = at.maximum(intX(at.floor((upper + lower) / 2.0)), self.lower) + rv_op = discrete_uniform - def _random(self, lower, upper, size=None): - # This way seems to be the only to deal with lower and upper - # as array-like. - samples = stats.randint.rvs(lower, upper + 1, size=size) - return samples - - def random(self, point=None, size=None): - r""" - Draw random values from DiscreteUniform distribution. - - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). - - Returns - ------- - array - """ - # lower, upper = draw_values([self.lower, self.upper], point=point, size=size) - # return generate_samples(self._random, lower, upper, dist_shape=self.shape, size=size) + @classmethod + def dist(cls, lower, upper, *args, **kwargs): + lower = intX(at.floor(lower)) + upper = intX(at.floor(upper)) + return super().dist([lower, upper], **kwargs) - def logp(self, value): + def logp(value, lower, upper): r""" Calculate log-probability of DiscreteUniform distribution at specified value. @@ -1056,15 +1045,13 @@ def logp(self, value): ------- TensorVariable """ - upper = self.upper - lower = self.lower return bound( at.fill(value, -at.log(upper - lower + 1)), lower <= value, value <= upper, ) - def logcdf(self, value): + def logcdf(value, lower, upper): """ Compute the log of the cumulative distribution function for Discrete uniform distribution at the specified value. @@ -1079,8 +1066,6 @@ def logcdf(self, value): ------- TensorVariable """ - upper = self.upper - lower = self.lower return bound( at.switch( @@ -1177,6 +1162,23 @@ def logp(value, p): ) +class ConstantRV(RandomVariable): + name = "constant" + ndim_supp = 0 + ndims_params = [0] + dtype = "floatX" # Should be treated as a discrete variable! + _print_name = ("Constant", "\\operatorname{Constant}") + + @classmethod + def rng_fn(cls, rng, c, size=None): + if size is None: + return c.copy() + return np.full(size, c) + + +constant = ConstantRV() + + class Constant(Discrete): r""" Constant log-likelihood. @@ -1187,40 +1189,14 @@ class Constant(Discrete): Constant parameter. """ - def __init__(self, c, *args, **kwargs): - warnings.warn( - "Constant has been deprecated. We recommend using a Deterministic object instead.", - DeprecationWarning, - ) - super().__init__(*args, **kwargs) - self.mean = self.median = self.mode = self.c = c = at.as_tensor_variable(c) + rv_op = constant - def random(self, point=None, size=None): - r""" - Draw random values from Constant distribution. - - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). + @classmethod + def dist(cls, c, *args, **kwargs): + c = at.as_tensor_variable(floatX(c)) + return super().dist([c], **kwargs) - Returns - ------- - array - """ - # c = draw_values([self.c], point=point, size=size)[0] - # dtype = np.array(c).dtype - # - # def _random(c, dtype=dtype, size=None): - # return np.full(size, fill_value=c, dtype=dtype) - # - # return generate_samples(_random, c=c, dist_shape=self.shape, size=size).astype(dtype) - - def logp(self, value): + def logp(value, c): r""" Calculate log-probability of Constant distribution at specified value. @@ -1234,11 +1210,25 @@ def logp(self, value): ------- TensorVariable """ - c = self.c - return bound(0, at.eq(value, c)) + return bound( + at.zeros_like(value), + at.eq(value, c), + ) -ConstantDist = Constant +class ZeroInflatedPoissonRV(RandomVariable): + name = "zero_inflated_poisson" + ndim_supp = 0 + ndims_params = [0, 0] + dtype = "int64" + _print_name = ("ZeroInflatedPois", "\\operatorname{ZeroInflatedPois}") + + @classmethod + def rng_fn(cls, rng, psi, lam, size): + return rng.poisson(lam, size=size) * (rng.random(size=size) < psi) + + +zero_inflated_poisson = ZeroInflatedPoissonRV() class ZeroInflatedPoisson(Discrete): @@ -1292,36 +1282,15 @@ class ZeroInflatedPoisson(Discrete): (theta >= 0). """ - def __init__(self, psi, theta, *args, **kwargs): - super().__init__(*args, **kwargs) - self.theta = theta = at.as_tensor_variable(floatX(theta)) - self.psi = at.as_tensor_variable(floatX(psi)) - self.pois = Poisson.dist(theta) - self.mode = self.pois.mode + rv_op = zero_inflated_poisson - def random(self, point=None, size=None): - r""" - Draw random values from ZeroInflatedPoisson distribution. - - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). - - Returns - ------- - array - """ - # theta, psi = draw_values([self.theta, self.psi], point=point, size=size) - # g = generate_samples(stats.poisson.rvs, theta, dist_shape=self.shape, size=size) - # g, psi = broadcast_distribution_samples([g, psi], size=size) - # return g * (np.random.random(g.shape) < psi) + @classmethod + def dist(cls, psi, theta, *args, **kwargs): + psi = at.as_tensor_variable(floatX(psi)) + theta = at.as_tensor_variable(floatX(theta)) + return super().dist([psi, theta], *args, **kwargs) - def logp(self, value): + def logp(value, psi, theta): r""" Calculate log-probability of ZeroInflatedPoisson distribution at specified value. @@ -1335,18 +1304,22 @@ def logp(self, value): ------- TensorVariable """ - psi = self.psi - theta = self.theta logp_val = at.switch( at.gt(value, 0), - at.log(psi) + self.pois.logp(value), + at.log(psi) + _logp(poisson, value, {}, theta), logaddexp(at.log1p(-psi), at.log(psi) - theta), ) - return bound(logp_val, 0 <= value, 0 <= psi, psi <= 1, 0 <= theta) + return bound( + logp_val, + 0 <= value, + 0 <= psi, + psi <= 1, + 0 <= theta, + ) - def logcdf(self, value): + def logcdf(value, psi, theta): """ Compute the log of the cumulative distribution function for ZeroInflatedPoisson distribution at the specified value. @@ -1361,16 +1334,31 @@ def logcdf(self, value): ------- TensorVariable """ - psi = self.psi return bound( - logaddexp(at.log1p(-psi), at.log(psi) + self.pois.logcdf(value)), + logaddexp(at.log1p(-psi), at.log(psi) + _logcdf(poisson, value, {}, theta)), 0 <= value, 0 <= psi, psi <= 1, + 0 <= theta, ) +class ZeroInflatedBinomialRV(RandomVariable): + name = "zero_inflated_binomial" + ndim_supp = 0 + ndims_params = [0, 0, 0] + dtype = "int64" + _print_name = ("ZeroInflatedBinom", "\\operatorname{ZeroInflatedBinom}") + + @classmethod + def rng_fn(cls, rng, psi, n, p, size): + return rng.binomial(n=n, p=p, size=size) * (rng.random(size=size) < psi) + + +zero_inflated_binomial = ZeroInflatedBinomialRV() + + class ZeroInflatedBinomial(Discrete): R""" Zero-inflated Binomial log-likelihood. @@ -1423,37 +1411,16 @@ class ZeroInflatedBinomial(Discrete): """ - def __init__(self, psi, n, p, *args, **kwargs): - super().__init__(*args, **kwargs) - self.n = n = at.as_tensor_variable(intX(n)) - self.p = p = at.as_tensor_variable(floatX(p)) - self.psi = psi = at.as_tensor_variable(floatX(psi)) - self.bin = Binomial.dist(n, p) - self.mode = self.bin.mode - - def random(self, point=None, size=None): - r""" - Draw random values from ZeroInflatedBinomial distribution. + rv_op = zero_inflated_binomial - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). - - Returns - ------- - array - """ - # n, p, psi = draw_values([self.n, self.p, self.psi], point=point, size=size) - # g = generate_samples(stats.binom.rvs, n, p, dist_shape=self.shape, size=size) - # g, psi = broadcast_distribution_samples([g, psi], size=size) - # return g * (np.random.random(g.shape) < psi) + @classmethod + def dist(cls, psi, n, p, *args, **kwargs): + psi = at.as_tensor_variable(floatX(psi)) + n = at.as_tensor_variable(intX(n)) + p = at.as_tensor_variable(floatX(p)) + return super().dist([psi, n, p], *args, **kwargs) - def logp(self, value): + def logp(value, psi, n, p): r""" Calculate log-probability of ZeroInflatedBinomial distribution at specified value. @@ -1467,19 +1434,24 @@ def logp(self, value): ------- TensorVariable """ - psi = self.psi - p = self.p - n = self.n logp_val = at.switch( at.gt(value, 0), - at.log(psi) + self.bin.logp(value), + at.log(psi) + _logp(binomial, value, {}, n, p), logaddexp(at.log1p(-psi), at.log(psi) + n * at.log1p(-p)), ) - return bound(logp_val, 0 <= value, value <= n, 0 <= psi, psi <= 1, 0 <= p, p <= 1) + return bound( + logp_val, + 0 <= value, + value <= n, + 0 <= psi, + psi <= 1, + 0 <= p, + p <= 1, + ) - def logcdf(self, value): + def logcdf(value, psi, n, p): """ Compute the log of the cumulative distribution function for ZeroInflatedBinomial distribution at the specified value. @@ -1493,22 +1465,39 @@ def logcdf(self, value): ------- TensorVariable """ + # logcdf can only handle scalar values due to limitation in Binomial.logcdf if np.ndim(value): raise TypeError( f"ZeroInflatedBinomial.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." ) - psi = self.psi - return bound( - logaddexp(at.log1p(-psi), at.log(psi) + self.bin.logcdf(value)), + logaddexp(at.log1p(-psi), at.log(psi) + _logcdf(binomial, value, {}, n, p)), 0 <= value, + value <= n, 0 <= psi, psi <= 1, + 0 <= p, + p <= 1, ) +class ZeroInflatedNegBinomialRV(RandomVariable): + name = "zero_inflated_neg_binomial" + ndim_supp = 0 + ndims_params = [0, 0, 0] + dtype = "int64" + _print_name = ("ZeroInflatedNegBinom", "\\operatorname{ZeroInflatedNegBinom}") + + @classmethod + def rng_fn(cls, rng, psi, n, p, size): + return rng.negative_binomial(n=n, p=p, size=size) * (rng.random(size=size) < psi) + + +zero_inflated_neg_binomial = ZeroInflatedNegBinomialRV() + + class ZeroInflatedNegativeBinomial(Discrete): R""" Zero-Inflated Negative binomial log-likelihood. @@ -1578,50 +1567,17 @@ def ZeroInfNegBinom(a, m, psi, x): """ - def __init__(self, psi, mu, alpha, *args, **kwargs): - super().__init__(*args, **kwargs) - self.mu = mu = at.as_tensor_variable(floatX(mu)) - self.alpha = alpha = at.as_tensor_variable(floatX(alpha)) - self.psi = psi = at.as_tensor_variable(floatX(psi)) - self.nb = NegativeBinomial.dist(mu, alpha) - self.mode = self.nb.mode - - def random(self, point=None, size=None): - r""" - Draw random values from ZeroInflatedNegativeBinomial distribution. - - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). + rv_op = zero_inflated_neg_binomial - Returns - ------- - array - """ - # mu, alpha, psi = draw_values([self.mu, self.alpha, self.psi], point=point, size=size) - # g = generate_samples(self._random, mu=mu, alpha=alpha, dist_shape=self.shape, size=size) - # g[g == 0] = np.finfo(float).eps # Just in case - # g, psi = broadcast_distribution_samples([g, psi], size=size) - # return stats.poisson.rvs(g) * (np.random.random(g.shape) < psi) - - def _random(self, mu, alpha, size): - r"""Wrapper around stats.gamma.rvs that converts NegativeBinomial's - parametrization to scipy.gamma. All parameter arrays should have - been broadcasted properly by generate_samples at this point and size is - the scipy.rvs representation. - """ - return stats.gamma.rvs( - a=alpha, - scale=mu / alpha, - size=size, - ) + @classmethod + def dist(cls, psi, mu, alpha, *args, **kwargs): + psi = at.as_tensor_variable(floatX(psi)) + n, p = NegativeBinomial.get_n_p(mu=mu, alpha=alpha) + n = at.as_tensor_variable(floatX(n)) + p = at.as_tensor_variable(floatX(p)) + return super().dist([psi, n, p], *args, **kwargs) - def logp(self, value): + def logp(value, psi, n, p): r""" Calculate log-probability of ZeroInflatedNegativeBinomial distribution at specified value. @@ -1635,20 +1591,22 @@ def logp(self, value): ------- TensorVariable """ - alpha = self.alpha - mu = self.mu - psi = self.psi - logp_other = at.log(psi) + self.nb.logp(value) - logp_0 = logaddexp( - at.log1p(-psi), at.log(psi) + alpha * (at.log(alpha) - at.log(alpha + mu)) + return bound( + at.switch( + at.gt(value, 0), + at.log(psi) + _logp(nbinom, value, {}, n, p), + logaddexp(at.log1p(-psi), at.log(psi) + n * at.log(p)), + ), + 0 <= value, + 0 <= psi, + psi <= 1, + 0 < n, + 0 <= p, + p <= 1, ) - logp_val = at.switch(at.gt(value, 0), logp_other, logp_0) - - return bound(logp_val, 0 <= value, 0 <= psi, psi <= 1, mu > 0, alpha > 0) - - def logcdf(self, value): + def logcdf(value, psi, n, p): """ Compute the log of the cumulative distribution function for ZeroInflatedNegativeBinomial distribution at the specified value. @@ -1667,13 +1625,14 @@ def logcdf(self, value): raise TypeError( f"ZeroInflatedNegativeBinomial.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." ) - psi = self.psi return bound( - logaddexp(at.log1p(-psi), at.log(psi) + self.nb.logcdf(value)), + logaddexp(at.log1p(-psi), at.log(psi) + _logcdf(nbinom, value, {}, n, p)), 0 <= value, 0 <= psi, psi <= 1, + 0 < p, + p <= 1, ) @@ -1743,11 +1702,14 @@ class OrderedLogistic(Categorical): """ - def __init__(self, eta, cutpoints, *args, **kwargs): - self.eta = at.as_tensor_variable(floatX(eta)) - self.cutpoints = at.as_tensor_variable(cutpoints) + rv_op = categorical + + @classmethod + def dist(cls, eta, cutpoints, *args, **kwargs): + eta = at.as_tensor_variable(floatX(eta)) + cutpoints = at.as_tensor_variable(cutpoints) - pa = sigmoid(self.cutpoints - at.shape_padright(self.eta)) + pa = sigmoid(cutpoints - at.shape_padright(eta)) p_cum = at.concatenate( [ at.zeros_like(at.shape_padright(pa[..., 0])), @@ -1758,7 +1720,7 @@ def __init__(self, eta, cutpoints, *args, **kwargs): ) p = p_cum[..., 1:] - p_cum[..., :-1] - super().__init__(p=p, *args, **kwargs) + return super().dist(p, **kwargs) class OrderedProbit(Categorical): @@ -1831,12 +1793,14 @@ class OrderedProbit(Categorical): """ - def __init__(self, eta, cutpoints, *args, **kwargs): + rv_op = categorical - self.eta = at.as_tensor_variable(floatX(eta)) - self.cutpoints = at.as_tensor_variable(cutpoints) + @classmethod + def dist(cls, eta, cutpoints, *args, **kwargs): + eta = at.as_tensor_variable(floatX(eta)) + cutpoints = at.as_tensor_variable(cutpoints) - probits = at.shape_padright(self.eta) - self.cutpoints + probits = at.shape_padright(eta) - cutpoints _log_p = at.concatenate( [ at.shape_padright(normal_lccdf(0, 1, probits[..., 0])), @@ -1846,44 +1810,6 @@ def __init__(self, eta, cutpoints, *args, **kwargs): axis=-1, ) _log_p = at.as_tensor_variable(floatX(_log_p)) - - self._log_p = _log_p - self.mode = at.argmax(_log_p, axis=-1) p = at.exp(_log_p) - super().__init__(p=p, *args, **kwargs) - - def logp(self, value): - r""" - Calculate log-probability of Ordered Probit distribution at specified value. - - Parameters - ---------- - value: numeric - Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or Aesara tensor - - Returns - ------- - TensorVariable - """ - logp = self._log_p - k = self.k - - # Clip values before using them for indexing - value_clip = at.clip(value, 0, k - 1) - - if logp.ndim > 1: - if logp.ndim > value_clip.ndim: - value_clip = at.shape_padleft(value_clip, logp.ndim - value_clip.ndim) - elif logp.ndim < value_clip.ndim: - logp = at.shape_padleft(logp, value_clip.ndim - logp.ndim) - pattern = (logp.ndim - 1,) + tuple(range(logp.ndim - 1)) - a = take_along_axis( - logp.dimshuffle(pattern), - value_clip, - ) - else: - a = logp[value_clip] - - return bound(a, value >= 0, value <= (k - 1)) + return super().dist(p, **kwargs) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 397d2ba6af..63466b7902 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -101,6 +101,7 @@ continuous, logcdf, logpt, + logpt_sum, ) from pymc3.math import kronecker, logsumexp from pymc3.model import Deterministic, Model, Point @@ -814,8 +815,14 @@ def check_logcdf( ) # Test that method works with multiple values or raises informative TypeError - with pytest.raises(TypeError), aesara.config.change_flags(mode=Mode("py")): - logcdf(valid_dist, np.array([valid_value, valid_value])).eval() + valid_dist = change_rv_size(valid_dist, 2) + with aesara.config.change_flags(mode=Mode("py")): + try: + logcdf(valid_dist, np.array([valid_value, valid_value])).eval() + except TypeError as err: + assert str(err).endswith( + "logcdf expects a scalar value but received a 1-dimensional object." + ) def check_selfconsistency_discrete_logcdf( self, distribution, domain, paramdomains, decimal=None, n_samples=100 @@ -929,7 +936,6 @@ def test_bound_normal(self): x = PositiveNormal("x", mu=0, sigma=1, transform=None) assert np.isinf(logpt(x, -1).eval()) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_discrete_unif(self): self.check_logp( DiscreteUniform, @@ -1615,12 +1621,10 @@ def test_bound_poisson(self): x = NonZeroPoisson("x", mu=4) assert np.isinf(logpt(x, 0).eval()) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_constantdist(self): self.check_logp(Constant, I, {"c": I}, lambda value, c: np.log(c == value)) - # Too lazy to propagate decimal parameter through the whole chain of deps - @pytest.mark.xfail(reason="Distribution not refactored yet") + @pytest.mark.xfail(reason="Test has not been refactored") @pytest.mark.xfail( condition=(aesara.config.floatX == "float32"), reason="Fails on float32 due to inf issues", @@ -1632,16 +1636,37 @@ def test_zeroinflatedpoisson_distribution(self): {"theta": Rplus, "psi": Unit}, ) - @pytest.mark.xfail(reason="Distribution not refactored yet") - def test_zeroinflatedpoisson_logcdf(self): + def test_zeroinflatedpoisson(self): + def logp_fn(value, psi, theta): + if value == 0: + return np.log((1 - psi) * sp.poisson.pmf(0, theta)) + else: + return np.log(psi * sp.poisson.pmf(value, theta)) + + def logcdf_fn(value, psi, theta): + return np.log((1 - psi) + psi * sp.poisson.cdf(value, theta)) + + self.check_logp( + ZeroInflatedPoisson, + Nat, + {"psi": Unit, "theta": Rplus}, + logp_fn, + ) + + self.check_logcdf( + ZeroInflatedPoisson, + Nat, + {"psi": Unit, "theta": Rplus}, + logcdf_fn, + ) + self.check_selfconsistency_discrete_logcdf( ZeroInflatedPoisson, Nat, {"theta": Rplus, "psi": Unit}, ) - # Too lazy to propagate decimal parameter through the whole chain of deps - @pytest.mark.xfail(reason="Distribution not refactored yet") + @pytest.mark.xfail(reason="Test not refactored yet") @pytest.mark.xfail( condition=(aesara.config.floatX == "float32"), reason="Fails on float32 due to inf issues", @@ -1653,17 +1678,41 @@ def test_zeroinflatednegativebinomial_distribution(self): {"mu": Rplusbig, "alpha": Rplusbig, "psi": Unit}, ) - @pytest.mark.xfail(reason="Distribution not refactored yet") - def test_zeroinflatednegativebinomial_logcdf(self): + def test_zeroinflatednegativebinomial(self): + def logp_fn(value, psi, mu, alpha): + n, p = NegativeBinomial.get_n_p(mu=mu, alpha=alpha) + if value == 0: + return np.log((1 - psi) * sp.nbinom.pmf(0, n, p)) + else: + return np.log(psi * sp.nbinom.pmf(value, n, p)) + + def logcdf_fn(value, psi, mu, alpha): + n, p = NegativeBinomial.get_n_p(mu=mu, alpha=alpha) + return np.log((1 - psi) + psi * sp.nbinom.cdf(value, n, p)) + + self.check_logp( + ZeroInflatedNegativeBinomial, + Nat, + {"psi": Unit, "mu": Rplusbig, "alpha": Rplusbig}, + logp_fn, + ) + + self.check_logcdf( + ZeroInflatedNegativeBinomial, + Nat, + {"psi": Unit, "mu": Rplusbig, "alpha": Rplusbig}, + logcdf_fn, + n_samples=10, + ) + self.check_selfconsistency_discrete_logcdf( ZeroInflatedNegativeBinomial, Nat, - {"mu": Rplusbig, "alpha": Rplusbig, "psi": Unit}, + {"psi": Unit, "mu": Rplusbig, "alpha": Rplusbig}, n_samples=10, ) - # Too lazy to propagate decimal parameter through the whole chain of deps - @pytest.mark.xfail(reason="Distribution not refactored yet") + @pytest.mark.xfail(reason="Test not refactored yet") def test_zeroinflatedbinomial_distribution(self): self.checkd( ZeroInflatedBinomial, @@ -1671,8 +1720,31 @@ def test_zeroinflatedbinomial_distribution(self): {"n": NatSmall, "p": Unit, "psi": Unit}, ) - @pytest.mark.xfail(reason="Distribution not refactored yet") - def test_zeroinflatedbinomial_logcdf(self): + def test_zeroinflatedbinomial(self): + def logp_fn(value, psi, n, p): + if value == 0: + return np.log((1 - psi) * sp.binom.pmf(0, n, p)) + else: + return np.log(psi * sp.binom.pmf(value, n, p)) + + def logcdf_fn(value, psi, n, p): + return np.log((1 - psi) + psi * sp.binom.cdf(value, n, p)) + + self.check_logp( + ZeroInflatedBinomial, + Nat, + {"psi": Unit, "n": NatSmall, "p": Unit}, + logp_fn, + ) + + self.check_logcdf( + ZeroInflatedBinomial, + Nat, + {"psi": Unit, "n": NatSmall, "p": Unit}, + logcdf_fn, + n_samples=10, + ) + self.check_selfconsistency_discrete_logcdf( ZeroInflatedBinomial, Nat, @@ -2299,7 +2371,6 @@ def test_categorical(self, n): ) @pytest.mark.parametrize("n", [2, 3, 4]) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_orderedlogistic(self, n): self.check_logp( OrderedLogistic, @@ -2309,7 +2380,6 @@ def test_orderedlogistic(self, n): ) @pytest.mark.parametrize("n", [2, 3, 4]) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_orderedprobit(self, n): self.check_logp( OrderedProbit, @@ -2740,27 +2810,34 @@ def test_discrete_trafo(): err.match("Transformations for discrete distributions") +# TODO: Is this test working as expected / still relevant? @pytest.mark.parametrize("shape", [tuple(), (1,), (3, 1), (3, 2)], ids=str) -@pytest.mark.xfail(reason="Distribution not refactored yet") def test_orderedlogistic_dimensions(shape): # Test for issue #3535 loge = np.log10(np.exp(1)) size = 7 p = np.ones(shape + (10,)) / 10 cutpoints = np.tile(logit(np.linspace(0, 1, 11)[1:-1]), shape + (1,)) - obs = np.random.randint(0, 1, size=(size,) + shape) + obs = np.random.randint(0, 2, size=(size,) + shape) with Model(): ol = OrderedLogistic( - "ol", eta=np.zeros(shape), cutpoints=cutpoints, size=shape, observed=obs - ) - c = Categorical("c", p=p, size=shape, observed=obs) - ologp = logpt(ol, 1).eval() * loge - clogp = logpt(c, 1) * loge + "ol", + eta=np.zeros(shape), + cutpoints=cutpoints, + observed=obs, + ) + c = Categorical( + "c", + p=p, + observed=obs, + ) + ologp = logpt_sum(ol, np.ones_like(obs)).eval() * loge + clogp = logpt_sum(c, np.ones_like(obs)).eval() * loge expected = -np.prod((size,) + shape) - assert c.distribution.p.ndim == (len(shape) + 1) + assert c.owner.inputs[3].ndim == (len(shape) + 1) assert np.allclose(clogp, expected) - assert ol.distribution.p.ndim == (len(shape) + 1) + assert ol.owner.inputs[3].ndim == (len(shape) + 1) assert np.allclose(ologp, expected) @@ -2817,17 +2894,20 @@ def test_issue_3051(self, dims, dist_cls, kwargs): assert isinstance(actual_a, np.ndarray) assert actual_a.shape == (X.shape[0],) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_issue_4499(self): # Test for bug in Uniform and DiscreteUniform logp when setting check_bounds = False # https://github.com/pymc-devs/pymc3/issues/4499 with pm.Model(check_bounds=False) as m: - x = pm.Uniform("x", 0, 2, shape=10, transform=None) - assert_almost_equal(m.logp_array(np.ones(10)), -np.log(2) * 10) + x = pm.Uniform("x", 0, 2, size=10, transform=None) + assert_almost_equal(m.logp({"x": np.ones(10)}), -np.log(2) * 10) + + with pm.Model(check_bounds=False) as m: + x = pm.DiscreteUniform("x", 0, 1, size=10) + assert_almost_equal(m.logp({"x": np.ones(10)}), -np.log(2) * 10) with pm.Model(check_bounds=False) as m: - x = pm.DiscreteUniform("x", 0, 1, shape=10) - assert_almost_equal(m.logp_array(np.ones(10)), -np.log(2) * 10) + x = pm.Constant("x", 1, size=10) + assert_almost_equal(m.logp({"x": np.ones(10)}), 0 * 10) @pytest.mark.xfail(reason="DensityDist no longer supported") diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 1a7aa185a4..82d4f4e091 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -40,9 +40,7 @@ from pymc3.tests.helpers import SeededTest, select_by_precision from pymc3.tests.test_distributions import ( Domain, - I, Nat, - NatSmall, PdMatrix, PdMatrixChol, R, @@ -315,18 +313,6 @@ class TestLogitNormal(BaseTestCases.BaseTestCase): params = {"mu": 0.0, "sigma": 1.0} -@pytest.mark.xfail(reason="This distribution has not been refactored for v4") -class TestConstant(BaseTestCases.BaseTestCase): - distribution = pm.Constant - params = {"c": 3} - - -@pytest.mark.xfail(reason="This distribution has not been refactored for v4") -class TestZeroInflatedPoisson(BaseTestCases.BaseTestCase): - distribution = pm.ZeroInflatedPoisson - params = {"theta": 1.0, "psi": 0.3} - - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") class TestZeroInflatedNegativeBinomial(BaseTestCases.BaseTestCase): distribution = pm.ZeroInflatedNegativeBinomial @@ -339,12 +325,6 @@ class TestZeroInflatedBinomial(BaseTestCases.BaseTestCase): params = {"n": 10, "p": 0.6, "psi": 0.3} -@pytest.mark.xfail(reason="This distribution has not been refactored for v4") -class TestDiscreteUniform(BaseTestCases.BaseTestCase): - distribution = pm.DiscreteUniform - params = {"lower": 0.0, "upper": 10.0} - - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") class TestMoyal(BaseTestCases.BaseTestCase): distribution = pm.Moyal @@ -907,6 +887,159 @@ class TestBetaBinomial(BaseTestDistribution): ] +class TestDiscreteUniform(BaseTestDistribution): + def discrete_uniform_rng_fn(self, size, lower, upper, rng): + return st.randint.rvs(lower, upper + 1, size=size, random_state=rng) + + pymc_dist = pm.DiscreteUniform + pymc_dist_params = {"lower": -1, "upper": 9} + expected_rv_op_params = {"lower": -1, "upper": 9} + reference_dist_params = {"lower": -1, "upper": 9} + reference_dist = lambda self: functools.partial( + self.discrete_uniform_rng_fn, rng=self.get_random_state() + ) + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + "check_rv_size", + ] + + +class TestConstant(BaseTestDistribution): + def constant_rng_fn(self, size, c): + if size is None: + return c + return np.full(size, c) + + pymc_dist = pm.Constant + pymc_dist_params = {"c": 3} + expected_rv_op_params = {"c": 3} + reference_dist_params = {"c": 3} + reference_dist = lambda self: self.constant_rng_fn + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + "check_rv_size", + ] + + +class TestZeroInflatedPoisson(BaseTestDistribution): + def zero_inflated_poisson_rng_fn(self, size, psi, theta, poisson_rng_fct, random_rng_fct): + return poisson_rng_fct(theta, size=size) * (random_rng_fct(size=size) < psi) + + def seeded_zero_inflated_poisson_rng_fn(self): + poisson_rng_fct = functools.partial( + getattr(np.random.RandomState, "poisson"), self.get_random_state() + ) + + random_rng_fct = functools.partial( + getattr(np.random.RandomState, "random"), self.get_random_state() + ) + + return functools.partial( + self.zero_inflated_poisson_rng_fn, + poisson_rng_fct=poisson_rng_fct, + random_rng_fct=random_rng_fct, + ) + + pymc_dist = pm.ZeroInflatedPoisson + pymc_dist_params = {"psi": 0.9, "theta": 4.0} + expected_rv_op_params = {"psi": 0.9, "theta": 4.0} + reference_dist_params = {"psi": 0.9, "theta": 4.0} + reference_dist = seeded_zero_inflated_poisson_rng_fn + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + "check_rv_size", + ] + + +class TestZeroInflatedBinomial(BaseTestDistribution): + def zero_inflated_binomial_rng_fn(self, size, psi, n, p, binomial_rng_fct, random_rng_fct): + return binomial_rng_fct(n, p, size=size) * (random_rng_fct(size=size) < psi) + + def seeded_zero_inflated_binomial_rng_fn(self): + binomial_rng_fct = functools.partial( + getattr(np.random.RandomState, "binomial"), self.get_random_state() + ) + + random_rng_fct = functools.partial( + getattr(np.random.RandomState, "random"), self.get_random_state() + ) + + return functools.partial( + self.zero_inflated_binomial_rng_fn, + binomial_rng_fct=binomial_rng_fct, + random_rng_fct=random_rng_fct, + ) + + pymc_dist = pm.ZeroInflatedBinomial + pymc_dist_params = {"psi": 0.9, "n": 12, "p": 0.7} + expected_rv_op_params = {"psi": 0.9, "n": 12, "p": 0.7} + reference_dist_params = {"psi": 0.9, "n": 12, "p": 0.7} + reference_dist = seeded_zero_inflated_binomial_rng_fn + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + "check_rv_size", + ] + + +class TestZeroInflatedNegativeBinomial(BaseTestDistribution): + def zero_inflated_negbinomial_rng_fn( + self, size, psi, n, p, negbinomial_rng_fct, random_rng_fct + ): + return negbinomial_rng_fct(n, p, size=size) * (random_rng_fct(size=size) < psi) + + def seeded_zero_inflated_negbinomial_rng_fn(self): + negbinomial_rng_fct = functools.partial( + getattr(np.random.RandomState, "negative_binomial"), self.get_random_state() + ) + + random_rng_fct = functools.partial( + getattr(np.random.RandomState, "random"), self.get_random_state() + ) + + return functools.partial( + self.zero_inflated_negbinomial_rng_fn, + negbinomial_rng_fct=negbinomial_rng_fct, + random_rng_fct=random_rng_fct, + ) + + n, p = pm.NegativeBinomial.get_n_p(mu=3, alpha=5) + + pymc_dist = pm.ZeroInflatedNegativeBinomial + pymc_dist_params = {"psi": 0.9, "mu": 3, "alpha": 5} + expected_rv_op_params = {"psi": 0.9, "n": n, "p": p} + reference_dist_params = {"psi": 0.9, "n": n, "p": p} + reference_dist = seeded_zero_inflated_negbinomial_rng_fn + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + "check_rv_size", + ] + + +class TestOrderedLogistic(BaseTestDistribution): + pymc_dist = pm.OrderedLogistic + pymc_dist_params = {"eta": 0, "cutpoints": np.array([-2, 0, 2])} + expected_rv_op_params = {"p": np.array([0.11920292, 0.38079708, 0.38079708, 0.11920292])} + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_rv_size", + ] + + +class TestOrderedProbit(BaseTestDistribution): + pymc_dist = pm.OrderedProbit + pymc_dist_params = {"eta": 0, "cutpoints": np.array([-2, 0, 2])} + expected_rv_op_params = {"p": np.array([0.02275013, 0.47724987, 0.47724987, 0.02275013])} + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_rv_size", + ] + + class TestScalarParameterSamples(SeededTest): @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_bounded(self): @@ -1016,22 +1149,6 @@ def test_half_flat(self): with pytest.raises(ValueError): f.random(1) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") - def test_discrete_uniform(self): - def ref_rand(size, lower, upper): - return st.randint.rvs(lower, upper + 1, size=size) - - pymc3_random_discrete( - pm.DiscreteUniform, {"lower": -NatSmall, "upper": NatSmall}, ref_rand=ref_rand - ) - - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") - def test_constant_dist(self): - def ref_rand(size, c): - return c * np.ones(size, dtype=int) - - pymc3_random_discrete(pm.Constant, {"c": I}, ref_rand=ref_rand) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_matrix_normal(self): def ref_rand(size, mu, rowcov, colcov): diff --git a/pymc3/tests/test_examples.py b/pymc3/tests/test_examples.py index fa2e74f6f1..ad54236543 100644 --- a/pymc3/tests/test_examples.py +++ b/pymc3/tests/test_examples.py @@ -51,7 +51,7 @@ def get_city_data(): return data.merge(unique, "inner", on="fips") -@pytest.mark.xfail(reason="Bernoulli distribution not refactored") +@pytest.mark.xfail(reason="Bernoulli logitp distribution not refactored") class TestARM5_4(SeededTest): def build_model(self): data = pd.read_csv( @@ -194,7 +194,7 @@ def build_disaster_model(masked=False): @pytest.mark.xfail( - reason="DiscreteUniform hasn't been refactored" + reason="_check_start_shape fails with start dictionary" # condition=(aesara.config.floatX == "float32"), reason="Fails on float32" ) class TestDisasterModel(SeededTest): @@ -204,9 +204,9 @@ def test_disaster_model(self): model = build_disaster_model(masked=False) with model: # Initial values for stochastic nodes - start = {"early_mean": 2.0, "late_mean": 3.0} + start = {"early_mean": 2, "late_mean": 3.0} # Use slice sampler for means (other variables auto-selected) - step = pm.Slice([model.early_mean_log__, model.late_mean_log__]) + step = pm.Slice([model["early_mean_log__"], model["late_mean_log__"]]) tr = pm.sample(500, tune=50, start=start, step=step, chains=2) az.summary(tr) @@ -217,12 +217,12 @@ def test_disaster_model_missing(self): # Initial values for stochastic nodes start = {"early_mean": 2.0, "late_mean": 3.0} # Use slice sampler for means (other variables auto-selected) - step = pm.Slice([model.early_mean_log__, model.late_mean_log__]) + step = pm.Slice([model["early_mean_log__"], model["late_mean_log__"]]) tr = pm.sample(500, tune=50, start=start, step=step, chains=2) az.summary(tr) -@pytest.mark.xfail(reason="ZeroInflatedPoisson hasn't been refactored for v4") +@pytest.mark.xfail(reason="_check_start_shape fails with start dictionary") class TestLatentOccupancy(SeededTest): """ From the PyMC example list @@ -277,14 +277,14 @@ def test_run(self): "z": (self.y > 0).astype("int16"), "theta": np.array(5, dtype="f"), } - step_one = pm.Metropolis([model.theta_interval__, model.psi_logodds__]) + step_one = pm.Metropolis([model["theta_interval__"], model["psi_logodds__"]]) step_two = pm.BinaryMetropolis([model.z]) pm.sample(50, step=[step_one, step_two], start=start, chains=1) @pytest.mark.xfail( - # condition=(aesara.config.floatX == "float32"), - # reason="Fails on float32 due to starting inf at starting logP", + condition=(aesara.config.floatX == "float32"), + reason="Fails on float32 due to starting inf at starting logP", ) class TestRSV(SeededTest): """ @@ -314,7 +314,7 @@ def build_model(self): # Prior probability prev_rsv = pm.Beta("prev_rsv", 1, 5, shape=3) # RSV in Amman - y_amman = pm.Binomial("y_amman", n_amman, prev_rsv, shape=3, testval=100) + y_amman = pm.Binomial("y_amman", n_amman, prev_rsv, shape=3) # Likelihood for number with RSV in hospital (assumes Pr(hosp | RSV) = 1) pm.Binomial("y_hosp", y_amman, market_share, observed=rsv_cases) return model diff --git a/pymc3/tests/test_model.py b/pymc3/tests/test_model.py index a155702ba1..f307ad93c5 100644 --- a/pymc3/tests/test_model.py +++ b/pymc3/tests/test_model.py @@ -274,7 +274,7 @@ def test_grad(self): assert val == 21 npt.assert_allclose(grad, [5, 5, 5, 1, 1, 1, 1, 1, 1]) - @pytest.mark.xfail(reason="Lognormal not refactored for v4") + @pytest.mark.xfail(reason="Test not refactored for v4") def test_edge_case(self): # Edge case discovered in #2948 ndim = 3 diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py index 217cf96010..2581c0ae47 100644 --- a/pymc3/tests/test_sampling.py +++ b/pymc3/tests/test_sampling.py @@ -1040,7 +1040,6 @@ def test_shape_edgecase(self): prior = pm.sample_prior_predictive(10) assert prior["mu"].shape == (10, 5) - @pytest.mark.xfail(reason="ZeroInflatedPoisson not refactored for v4") def test_zeroinflatedpoisson(self): with pm.Model(): theta = pm.Beta("theta", alpha=1, beta=1)