diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py index c348da57b8..d7bb4b728a 100644 --- a/pymc3/distributions/__init__.py +++ b/pymc3/distributions/__init__.py @@ -35,8 +35,6 @@ from .discrete import NegativeBinomial from .discrete import ConstantDist from .discrete import Constant -from .discrete import ZeroInflatedPoisson -from .discrete import ZeroInflatedNegativeBinomial from .discrete import DiscreteUniform from .discrete import Geometric from .discrete import Categorical @@ -53,6 +51,9 @@ from .mixture import Mixture from .mixture import NormalMixture +from .mixture import ZeroInflatedPoisson +from .mixture import ZeroInflatedNegativeBinomial +from .mixture import ZeroInflatedBinomial from .multivariate import MvNormal from .multivariate import MvStudentT @@ -106,6 +107,7 @@ 'Constant', 'ZeroInflatedPoisson', 'ZeroInflatedNegativeBinomial', + 'ZeroInflatedBinomial', 'DiscreteUniform', 'Geometric', 'Categorical', diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 11dbbd251c..695790cc69 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -11,7 +11,6 @@ __all__ = ['Binomial', 'BetaBinomial', 'Bernoulli', 'DiscreteWeibull', 'Poisson', 'NegativeBinomial', 'ConstantDist', 'Constant', - 'ZeroInflatedPoisson', 'ZeroInflatedNegativeBinomial', 'DiscreteUniform', 'Geometric', 'Categorical'] @@ -582,129 +581,3 @@ def ConstantDist(*args, **kwargs): warnings.warn("ConstantDist has been deprecated. In future, use Constant instead.", DeprecationWarning) return Constant(*args, **kwargs) - - -class ZeroInflatedPoisson(Discrete): - R""" - Zero-inflated Poisson log-likelihood. - - Often used to model the number of events occurring in a fixed period - of time when the times at which events occur are independent. - - .. math:: - - f(x \mid \theta, \psi) = \left\{ \begin{array}{l} - (1-\psi) + \psi e^{-\theta}, \text{if } x = 0 \\ - \psi \frac{e^{-\theta}\theta^x}{x!}, \text{if } x=1,2,3,\ldots - \end{array} \right. - - ======== ========================== - Support :math:`x \in \mathbb{N}_0` - Mean :math:`\psi\theta` - Variance :math:`\theta + \frac{1-\psi}{\psi}\theta^2` - ======== ========================== - - Parameters - ---------- - theta : float - Expected number of occurrences during the given interval - (theta >= 0). - psi : float - Expected proportion of Poisson variates (0 < psi < 1) - - """ - - def __init__(self, theta, psi, *args, **kwargs): - super(ZeroInflatedPoisson, self).__init__(*args, **kwargs) - self.theta = theta = tt.as_tensor_variable(theta) - self.psi = psi = tt.as_tensor_variable(psi) - self.pois = Poisson.dist(theta) - self.mode = self.pois.mode - - def random(self, point=None, size=None, repeat=None): - theta, psi = draw_values([self.theta, self.psi], point=point) - g = generate_samples(stats.poisson.rvs, theta, - dist_shape=self.shape, - size=size) - sampled = g * (np.random.random(np.squeeze(g.shape)) < psi) - return reshape_sampled(sampled, size, self.shape) - - def logp(self, value): - return tt.switch(value > 0, - tt.log(self.psi) + self.pois.logp(value), - tt.log((1. - self.psi) + self.psi * tt.exp(-self.theta))) - - def _repr_latex_(self, name=None, dist=None): - if dist is None: - dist = self - theta = dist.theta - psi = dist.psi - return r'${} \sim \text{{ZeroInflatedPoisson}}(\mathit{{theta}}={}, \mathit{{psi}}={})$'.format(name, - get_variable_name(theta), - get_variable_name(psi)) - - -class ZeroInflatedNegativeBinomial(Discrete): - R""" - Zero-Inflated Negative binomial log-likelihood. - - The Zero-inflated version of the Negative Binomial (NB). - The NB distribution describes a Poisson random variable - whose rate parameter is gamma distributed. - - .. math:: - - f(x \mid \mu, \alpha, \psi) = \left\{ \begin{array}{l} - (1-\psi) + \psi \left (\frac{\alpha}{\alpha+\mu} \right) ^\alpha, \text{if } x = 0 \\ - \psi \frac{\Gamma(x+\alpha)}{x! \Gamma(\alpha)} \left (\frac{\alpha}{\mu+\alpha} \right)^\alpha \left( \frac{\mu}{\mu+\alpha} \right)^x, \text{if } x=1,2,3,\ldots - \end{array} \right. - - ======== ========================== - Support :math:`x \in \mathbb{N}_0` - Mean :math:`\psi\mu` - Var :math:`\psi\mu + \left (1 + \frac{\mu}{\alpha} + \frac{1-\psi}{\mu} \right)` - ======== ========================== - - Parameters - ---------- - mu : float - Poission distribution parameter (mu > 0). - alpha : float - Gamma distribution parameter (alpha > 0). - psi : float - Expected proportion of NegativeBinomial variates (0 < psi < 1) - """ - - def __init__(self, mu, alpha, psi, *args, **kwargs): - super(ZeroInflatedNegativeBinomial, self).__init__(*args, **kwargs) - self.mu = mu = tt.as_tensor_variable(mu) - self.alpha = alpha = tt.as_tensor_variable(alpha) - self.psi = psi = tt.as_tensor_variable(psi) - self.nb = NegativeBinomial.dist(mu, alpha) - self.mode = self.nb.mode - - def random(self, point=None, size=None, repeat=None): - mu, alpha, psi = draw_values( - [self.mu, self.alpha, self.psi], point=point) - g = generate_samples(stats.gamma.rvs, alpha, scale=mu / alpha, - dist_shape=self.shape, - size=size) - g[g == 0] = np.finfo(float).eps # Just in case - sampled = stats.poisson.rvs(g) * (np.random.random(np.squeeze(g.shape)) < psi) - return reshape_sampled(sampled, size, self.shape) - - def logp(self, value): - return tt.switch(value > 0, - tt.log(self.psi) + self.nb.logp(value), - tt.log((1. - self.psi) + self.psi * (self.alpha / (self.alpha + self.mu))**self.alpha)) - - def _repr_latex_(self, name=None, dist=None): - if dist is None: - dist = self - mu = dist.mu - alpha = dist.alpha - psi = dist.psi - return r'${} \sim \text{{ZeroInflatedNegativeBinomial}}(\mathit{{mu}}={}, \mathit{{alpha}}={}, \mathit{{psi}}={})$'.format(name, - get_variable_name(mu), - get_variable_name(alpha), - get_variable_name(psi)) diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py index 19953bca54..215667ec1b 100644 --- a/pymc3/distributions/mixture.py +++ b/pymc3/distributions/mixture.py @@ -5,8 +5,11 @@ from ..math import logsumexp from .dist_math import bound from .distribution import Discrete, Distribution, draw_values, generate_samples +from .discrete import Constant, Poisson, Binomial, NegativeBinomial from .continuous import get_tau_sd, Normal +__all__ = ['ZeroInflatedPoisson', 'ZeroInflatedBinomial', + 'ZeroInflatedNegativeBinomial', 'NormalMixture'] def all_discrete(comp_dists): """ @@ -140,6 +143,156 @@ def random_choice(*args, **kwargs): return np.squeeze(comp_samples[w_samples]) +class ZeroInflatedPoisson(Mixture): + R""" + Zero-inflated Poisson log-likelihood. + + Two-component mixture of zeros and Poisson-distributed data. + + Often used to model the number of events occurring in a fixed period + of time when the times at which events occur are independent. + + .. math:: + + f(x \mid \psi, \theta) = \left\{ \begin{array}{l} + (1-\psi) + \psi e^{-\theta}, \text{if } x = 0 \\ + \psi \frac{e^{-\theta}\theta^x}{x!}, \text{if } x=1,2,3,\ldots + \end{array} \right. + + ======== ========================== + Support :math:`x \in \mathbb{N}_0` + Mean :math:`\psi\theta` + Variance :math:`\theta + \frac{1-\psi}{\psi}\theta^2` + ======== ========================== + + Parameters + ---------- + psi : float + Expected proportion of Poisson variates (0 < psi < 1) + theta : float + Expected number of occurrences during the given interval + (theta >= 0). + + + """ + + def __init__(self, psi, theta, *args, **kwargs): + self.theta = theta = tt.as_tensor_variable(theta) + super(ZeroInflatedPoisson, self).__init__([1-psi, psi], + (Constant.dist(0), Poisson.dist(theta)), + *args, **kwargs) + + def _repr_latex_(self, name=None, dist=None): + if dist is None: + dist = self + theta = dist.theta + psi = dist.w + return r'${} \sim \text{{ZeroInflatedPoisson}}(\mathit{{psi}}={}, \mathit{{theta}}={})$'.format(name, + get_variable_name(psi), + get_variable_name(theta)) + + +class ZeroInflatedBinomial(Mixture): + R""" + Zero-inflated Binomial log-likelihood. + + Two-component mixture of zeros and binomial-distributed data. + + .. math:: + + f(x \mid \psi, n, p) = \left\{ \begin{array}{l} + (1-\psi) + \psi (1-p)^{n}, \text{if } x = 0 \\ + \psi {n \choose x} p^x (1-p)^{n-x}, \text{if } x=1,2,3,\ldots,n + \end{array} \right. + + ======== ========================== + Support :math:`x \in \mathbb{N}_0` + Mean :math:`(1 - \psi) n p` + Variance :math:`(1-\psi) n p [1 - p(1 - \psi n)].` + ======== ========================== + + Parameters + ---------- + psi : float + Expected proportion of Binomial variates (0 < psi < 1) + n : int + Number of Bernoulli trials (n >= 0). + p : float + Probability of success in each trial (0 < p < 1). + + """ + + def __init__(self, psi, n, p, *args, **kwargs): + self.n = n = tt.as_tensor_variable(n) + self.p = p = tt.as_tensor_variable(p) + super(ZeroInflatedBinomial, self).__init__([1-psi, psi], + (Constant.dist(0), Binomial.dist(n, p)), + *args, **kwargs) + + def _repr_latex_(self, name=None, dist=None): + if dist is None: + dist = self + n = dist.n + p = dist.p + psi = dist.w + return r'${} \sim \text{{ZeroInflatedBinomial}}(\mathit{{psi}}={}, \mathit{{n}}={}, \mathit{{p}}={})$'.format(name, + get_variable_name(psi), + get_variable_name(n), + get_variable_name(p)) + + +class ZeroInflatedNegativeBinomial(Mixture): + R""" + Zero-Inflated Negative binomial log-likelihood. + + Two-component mixture of zeros and negative binomial-distributed data. + + The Zero-inflated version of the Negative Binomial (NB). + The NB distribution describes a Poisson random variable + whose rate parameter is gamma distributed. + + .. math:: + + f(x \mid \psi, \mu, \alpha) = \left\{ \begin{array}{l} + (1-\psi) + \psi \left (\frac{\alpha}{\alpha+\mu} \right) ^\alpha, \text{if } x = 0 \\ + \psi \frac{\Gamma(x+\alpha)}{x! \Gamma(\alpha)} \left (\frac{\alpha}{\mu+\alpha} \right)^\alpha \left( \frac{\mu}{\mu+\alpha} \right)^x, \text{if } x=1,2,3,\ldots + \end{array} \right. + + ======== ========================== + Support :math:`x \in \mathbb{N}_0` + Mean :math:`\psi\mu` + Var :math:`\psi\mu + \left (1 + \frac{\mu}{\alpha} + \frac{1-\psi}{\mu} \right)` + ======== ========================== + + Parameters + ---------- + psi : float + Expected proportion of NegativeBinomial variates (0 < psi < 1) + mu : float + Poission distribution parameter (mu > 0). + alpha : float + Gamma distribution parameter (alpha > 0). + """ + + def __init__(self, psi, mu, alpha, *args, **kwargs): + self.mu = mu = tt.as_tensor_variable(mu) + self.alpha = alpha = tt.as_tensor_variable(alpha) + super(ZeroInflatedNegativeBinomial, self).__init__([1-psi, psi], + (Constant.dist(0), NegativeBinomial.dist(mu, alpha)), + *args, **kwargs) + + def _repr_latex_(self, name=None, dist=None): + if dist is None: + dist = self + mu = dist.mu + alpha = dist.alpha + psi = dist.w + return r'${} \sim \text{{ZeroInflatedNegativeBinomial}}(\mathit{{psi}}={}, \mathit{{mu}}={}, \mathit{{alpha}}={})$'.format(name, + get_variable_name(psi), + get_variable_name(mu), + get_variable_name(alpha)) + + class NormalMixture(Mixture): R""" Normal mixture log-likelihood diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 68559918e4..bd25f6fdde 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -14,7 +14,7 @@ NegativeBinomial, Geometric, Exponential, ExGaussian, Normal, Flat, LKJCorr, Wald, ChiSquared, HalfNormal, DiscreteUniform, Bound, Uniform, Triangular, Binomial, SkewNormal, DiscreteWeibull, Gumbel, - Interpolated) + Interpolated, ZeroInflatedBinomial) from ..distributions import continuous from pymc3.theanof import floatX from numpy import array, inf, log, exp @@ -585,11 +585,15 @@ def test_constantdist(self): lambda value, c: np.log(c == value)) def test_zeroinflatedpoisson(self): - self.checkd(ZeroInflatedPoisson, Nat, {'theta': Rplus, 'psi': Unit}) + self.checkd(ZeroInflatedPoisson, Nat, {'psi': Unit, 'theta': Rplus}) def test_zeroinflatednegativebinomial(self): self.checkd(ZeroInflatedNegativeBinomial, Nat, - {'mu': Rplusbig, 'alpha': Rplusbig, 'psi': Unit}) + {'psi': Unit, 'mu': Rplusbig, 'alpha': Rplusbig}) + + def test_zeroinflatedbinomial(self): + self.checkd(ZeroInflatedBinomial, Nat, + {'psi': Unit, 'n': NatSmall, 'p': Unit}) @pytest.mark.parametrize('n', [1, 2, 3]) def test_mvnormal(self, n): diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 7a7ee41128..2a68d4d9ee 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -331,13 +331,16 @@ class TestConstant(BaseTestCases.BaseTestCase): class TestZeroInflatedPoisson(BaseTestCases.BaseTestCase): distribution = pm.ZeroInflatedPoisson - params = {'theta': 1., 'psi': 0.3} + params = {'psi': 0.3, 'theta': 1.} class TestZeroInflatedNegativeBinomial(BaseTestCases.BaseTestCase): distribution = pm.ZeroInflatedNegativeBinomial - params = {'mu': 1., 'alpha': 1., 'psi': 0.3} + params = {'psi': 0.3, 'mu': 1., 'alpha': 1.} +class TestZeroInflatedBinomial(BaseTestCases.BaseTestCase): + distribution = pm.ZeroInflatedBinomial + params = {'psi': 0.3, 'n': 10, 'p': 0.6} class TestDiscreteUniform(BaseTestCases.BaseTestCase): distribution = pm.DiscreteUniform diff --git a/pymc3/tests/test_examples.py b/pymc3/tests/test_examples.py index 1ec316ec14..a8627fdb81 100644 --- a/pymc3/tests/test_examples.py +++ b/pymc3/tests/test_examples.py @@ -253,7 +253,7 @@ def build_model(self): # Estimated mean count theta = pm.Uniform('theta', 0, 100) # Poisson likelihood - pm.ZeroInflatedPoisson('y', theta, psi, observed=self.y) + pm.ZeroInflatedPoisson('y', psi, theta, observed=self.y) return model def test_run(self):