Skip to content

WIP: Convert zero-inflated distributions to Mixture subclasses #2246

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions pymc3/distributions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@
from .discrete import NegativeBinomial
from .discrete import ConstantDist
from .discrete import Constant
from .discrete import ZeroInflatedPoisson
from .discrete import ZeroInflatedNegativeBinomial
from .discrete import DiscreteUniform
from .discrete import Geometric
from .discrete import Categorical
Expand All @@ -53,6 +51,9 @@

from .mixture import Mixture
from .mixture import NormalMixture
from .mixture import ZeroInflatedPoisson
from .mixture import ZeroInflatedNegativeBinomial
from .mixture import ZeroInflatedBinomial

from .multivariate import MvNormal
from .multivariate import MvStudentT
Expand Down Expand Up @@ -106,6 +107,7 @@
'Constant',
'ZeroInflatedPoisson',
'ZeroInflatedNegativeBinomial',
'ZeroInflatedBinomial',
'DiscreteUniform',
'Geometric',
'Categorical',
Expand Down
127 changes: 0 additions & 127 deletions pymc3/distributions/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

__all__ = ['Binomial', 'BetaBinomial', 'Bernoulli', 'DiscreteWeibull',
'Poisson', 'NegativeBinomial', 'ConstantDist', 'Constant',
'ZeroInflatedPoisson', 'ZeroInflatedNegativeBinomial',
'DiscreteUniform', 'Geometric', 'Categorical']


Expand Down Expand Up @@ -582,129 +581,3 @@ def ConstantDist(*args, **kwargs):
warnings.warn("ConstantDist has been deprecated. In future, use Constant instead.",
DeprecationWarning)
return Constant(*args, **kwargs)


class ZeroInflatedPoisson(Discrete):
R"""
Zero-inflated Poisson log-likelihood.

Often used to model the number of events occurring in a fixed period
of time when the times at which events occur are independent.

.. math::

f(x \mid \theta, \psi) = \left\{ \begin{array}{l}
(1-\psi) + \psi e^{-\theta}, \text{if } x = 0 \\
\psi \frac{e^{-\theta}\theta^x}{x!}, \text{if } x=1,2,3,\ldots
\end{array} \right.

======== ==========================
Support :math:`x \in \mathbb{N}_0`
Mean :math:`\psi\theta`
Variance :math:`\theta + \frac{1-\psi}{\psi}\theta^2`
======== ==========================

Parameters
----------
theta : float
Expected number of occurrences during the given interval
(theta >= 0).
psi : float
Expected proportion of Poisson variates (0 < psi < 1)

"""

def __init__(self, theta, psi, *args, **kwargs):
super(ZeroInflatedPoisson, self).__init__(*args, **kwargs)
self.theta = theta = tt.as_tensor_variable(theta)
self.psi = psi = tt.as_tensor_variable(psi)
self.pois = Poisson.dist(theta)
self.mode = self.pois.mode

def random(self, point=None, size=None, repeat=None):
theta, psi = draw_values([self.theta, self.psi], point=point)
g = generate_samples(stats.poisson.rvs, theta,
dist_shape=self.shape,
size=size)
sampled = g * (np.random.random(np.squeeze(g.shape)) < psi)
return reshape_sampled(sampled, size, self.shape)

def logp(self, value):
return tt.switch(value > 0,
tt.log(self.psi) + self.pois.logp(value),
tt.log((1. - self.psi) + self.psi * tt.exp(-self.theta)))

def _repr_latex_(self, name=None, dist=None):
if dist is None:
dist = self
theta = dist.theta
psi = dist.psi
return r'${} \sim \text{{ZeroInflatedPoisson}}(\mathit{{theta}}={}, \mathit{{psi}}={})$'.format(name,
get_variable_name(theta),
get_variable_name(psi))


class ZeroInflatedNegativeBinomial(Discrete):
R"""
Zero-Inflated Negative binomial log-likelihood.

The Zero-inflated version of the Negative Binomial (NB).
The NB distribution describes a Poisson random variable
whose rate parameter is gamma distributed.

.. math::

f(x \mid \mu, \alpha, \psi) = \left\{ \begin{array}{l}
(1-\psi) + \psi \left (\frac{\alpha}{\alpha+\mu} \right) ^\alpha, \text{if } x = 0 \\
\psi \frac{\Gamma(x+\alpha)}{x! \Gamma(\alpha)} \left (\frac{\alpha}{\mu+\alpha} \right)^\alpha \left( \frac{\mu}{\mu+\alpha} \right)^x, \text{if } x=1,2,3,\ldots
\end{array} \right.

======== ==========================
Support :math:`x \in \mathbb{N}_0`
Mean :math:`\psi\mu`
Var :math:`\psi\mu + \left (1 + \frac{\mu}{\alpha} + \frac{1-\psi}{\mu} \right)`
======== ==========================

Parameters
----------
mu : float
Poission distribution parameter (mu > 0).
alpha : float
Gamma distribution parameter (alpha > 0).
psi : float
Expected proportion of NegativeBinomial variates (0 < psi < 1)
"""

def __init__(self, mu, alpha, psi, *args, **kwargs):
super(ZeroInflatedNegativeBinomial, self).__init__(*args, **kwargs)
self.mu = mu = tt.as_tensor_variable(mu)
self.alpha = alpha = tt.as_tensor_variable(alpha)
self.psi = psi = tt.as_tensor_variable(psi)
self.nb = NegativeBinomial.dist(mu, alpha)
self.mode = self.nb.mode

def random(self, point=None, size=None, repeat=None):
mu, alpha, psi = draw_values(
[self.mu, self.alpha, self.psi], point=point)
g = generate_samples(stats.gamma.rvs, alpha, scale=mu / alpha,
dist_shape=self.shape,
size=size)
g[g == 0] = np.finfo(float).eps # Just in case
sampled = stats.poisson.rvs(g) * (np.random.random(np.squeeze(g.shape)) < psi)
return reshape_sampled(sampled, size, self.shape)

def logp(self, value):
return tt.switch(value > 0,
tt.log(self.psi) + self.nb.logp(value),
tt.log((1. - self.psi) + self.psi * (self.alpha / (self.alpha + self.mu))**self.alpha))

def _repr_latex_(self, name=None, dist=None):
if dist is None:
dist = self
mu = dist.mu
alpha = dist.alpha
psi = dist.psi
return r'${} \sim \text{{ZeroInflatedNegativeBinomial}}(\mathit{{mu}}={}, \mathit{{alpha}}={}, \mathit{{psi}}={})$'.format(name,
get_variable_name(mu),
get_variable_name(alpha),
get_variable_name(psi))
153 changes: 153 additions & 0 deletions pymc3/distributions/mixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@
from ..math import logsumexp
from .dist_math import bound
from .distribution import Discrete, Distribution, draw_values, generate_samples
from .discrete import Constant, Poisson, Binomial, NegativeBinomial
from .continuous import get_tau_sd, Normal

__all__ = ['ZeroInflatedPoisson', 'ZeroInflatedBinomial',
'ZeroInflatedNegativeBinomial', 'NormalMixture']

def all_discrete(comp_dists):
"""
Expand Down Expand Up @@ -140,6 +143,156 @@ def random_choice(*args, **kwargs):
return np.squeeze(comp_samples[w_samples])


class ZeroInflatedPoisson(Mixture):
R"""
Zero-inflated Poisson log-likelihood.

Two-component mixture of zeros and Poisson-distributed data.

Often used to model the number of events occurring in a fixed period
of time when the times at which events occur are independent.

.. math::

f(x \mid \psi, \theta) = \left\{ \begin{array}{l}
(1-\psi) + \psi e^{-\theta}, \text{if } x = 0 \\
\psi \frac{e^{-\theta}\theta^x}{x!}, \text{if } x=1,2,3,\ldots
\end{array} \right.

======== ==========================
Support :math:`x \in \mathbb{N}_0`
Mean :math:`\psi\theta`
Variance :math:`\theta + \frac{1-\psi}{\psi}\theta^2`
======== ==========================

Parameters
----------
psi : float
Expected proportion of Poisson variates (0 < psi < 1)
theta : float
Expected number of occurrences during the given interval
(theta >= 0).


"""

def __init__(self, psi, theta, *args, **kwargs):
self.theta = theta = tt.as_tensor_variable(theta)
super(ZeroInflatedPoisson, self).__init__([1-psi, psi],
(Constant.dist(0), Poisson.dist(theta)),
*args, **kwargs)

def _repr_latex_(self, name=None, dist=None):
if dist is None:
dist = self
theta = dist.theta
psi = dist.w
return r'${} \sim \text{{ZeroInflatedPoisson}}(\mathit{{psi}}={}, \mathit{{theta}}={})$'.format(name,
get_variable_name(psi),
get_variable_name(theta))


class ZeroInflatedBinomial(Mixture):
R"""
Zero-inflated Binomial log-likelihood.

Two-component mixture of zeros and binomial-distributed data.

.. math::

f(x \mid \psi, n, p) = \left\{ \begin{array}{l}
(1-\psi) + \psi (1-p)^{n}, \text{if } x = 0 \\
\psi {n \choose x} p^x (1-p)^{n-x}, \text{if } x=1,2,3,\ldots,n
\end{array} \right.

======== ==========================
Support :math:`x \in \mathbb{N}_0`
Mean :math:`(1 - \psi) n p`
Variance :math:`(1-\psi) n p [1 - p(1 - \psi n)].`
======== ==========================

Parameters
----------
psi : float
Expected proportion of Binomial variates (0 < psi < 1)
n : int
Number of Bernoulli trials (n >= 0).
p : float
Probability of success in each trial (0 < p < 1).

"""

def __init__(self, psi, n, p, *args, **kwargs):
self.n = n = tt.as_tensor_variable(n)
self.p = p = tt.as_tensor_variable(p)
super(ZeroInflatedBinomial, self).__init__([1-psi, psi],
(Constant.dist(0), Binomial.dist(n, p)),
*args, **kwargs)

def _repr_latex_(self, name=None, dist=None):
if dist is None:
dist = self
n = dist.n
p = dist.p
psi = dist.w
return r'${} \sim \text{{ZeroInflatedBinomial}}(\mathit{{psi}}={}, \mathit{{n}}={}, \mathit{{p}}={})$'.format(name,
get_variable_name(psi),
get_variable_name(n),
get_variable_name(p))


class ZeroInflatedNegativeBinomial(Mixture):
R"""
Zero-Inflated Negative binomial log-likelihood.

Two-component mixture of zeros and negative binomial-distributed data.

The Zero-inflated version of the Negative Binomial (NB).
The NB distribution describes a Poisson random variable
whose rate parameter is gamma distributed.

.. math::

f(x \mid \psi, \mu, \alpha) = \left\{ \begin{array}{l}
(1-\psi) + \psi \left (\frac{\alpha}{\alpha+\mu} \right) ^\alpha, \text{if } x = 0 \\
\psi \frac{\Gamma(x+\alpha)}{x! \Gamma(\alpha)} \left (\frac{\alpha}{\mu+\alpha} \right)^\alpha \left( \frac{\mu}{\mu+\alpha} \right)^x, \text{if } x=1,2,3,\ldots
\end{array} \right.

======== ==========================
Support :math:`x \in \mathbb{N}_0`
Mean :math:`\psi\mu`
Var :math:`\psi\mu + \left (1 + \frac{\mu}{\alpha} + \frac{1-\psi}{\mu} \right)`
======== ==========================

Parameters
----------
psi : float
Expected proportion of NegativeBinomial variates (0 < psi < 1)
mu : float
Poission distribution parameter (mu > 0).
alpha : float
Gamma distribution parameter (alpha > 0).
"""

def __init__(self, psi, mu, alpha, *args, **kwargs):
self.mu = mu = tt.as_tensor_variable(mu)
self.alpha = alpha = tt.as_tensor_variable(alpha)
super(ZeroInflatedNegativeBinomial, self).__init__([1-psi, psi],
(Constant.dist(0), NegativeBinomial.dist(mu, alpha)),
*args, **kwargs)

def _repr_latex_(self, name=None, dist=None):
if dist is None:
dist = self
mu = dist.mu
alpha = dist.alpha
psi = dist.w
return r'${} \sim \text{{ZeroInflatedNegativeBinomial}}(\mathit{{psi}}={}, \mathit{{mu}}={}, \mathit{{alpha}}={})$'.format(name,
get_variable_name(psi),
get_variable_name(mu),
get_variable_name(alpha))


class NormalMixture(Mixture):
R"""
Normal mixture log-likelihood
Expand Down
10 changes: 7 additions & 3 deletions pymc3/tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
NegativeBinomial, Geometric, Exponential, ExGaussian, Normal,
Flat, LKJCorr, Wald, ChiSquared, HalfNormal, DiscreteUniform,
Bound, Uniform, Triangular, Binomial, SkewNormal, DiscreteWeibull, Gumbel,
Interpolated)
Interpolated, ZeroInflatedBinomial)
from ..distributions import continuous
from pymc3.theanof import floatX
from numpy import array, inf, log, exp
Expand Down Expand Up @@ -585,11 +585,15 @@ def test_constantdist(self):
lambda value, c: np.log(c == value))

def test_zeroinflatedpoisson(self):
self.checkd(ZeroInflatedPoisson, Nat, {'theta': Rplus, 'psi': Unit})
self.checkd(ZeroInflatedPoisson, Nat, {'psi': Unit, 'theta': Rplus})

def test_zeroinflatednegativebinomial(self):
self.checkd(ZeroInflatedNegativeBinomial, Nat,
{'mu': Rplusbig, 'alpha': Rplusbig, 'psi': Unit})
{'psi': Unit, 'mu': Rplusbig, 'alpha': Rplusbig})

def test_zeroinflatedbinomial(self):
self.checkd(ZeroInflatedBinomial, Nat,
{'psi': Unit, 'n': NatSmall, 'p': Unit})

@pytest.mark.parametrize('n', [1, 2, 3])
def test_mvnormal(self, n):
Expand Down
7 changes: 5 additions & 2 deletions pymc3/tests/test_distributions_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,13 +331,16 @@ class TestConstant(BaseTestCases.BaseTestCase):

class TestZeroInflatedPoisson(BaseTestCases.BaseTestCase):
distribution = pm.ZeroInflatedPoisson
params = {'theta': 1., 'psi': 0.3}
params = {'psi': 0.3, 'theta': 1.}


class TestZeroInflatedNegativeBinomial(BaseTestCases.BaseTestCase):
distribution = pm.ZeroInflatedNegativeBinomial
params = {'mu': 1., 'alpha': 1., 'psi': 0.3}
params = {'psi': 0.3, 'mu': 1., 'alpha': 1.}

class TestZeroInflatedBinomial(BaseTestCases.BaseTestCase):
distribution = pm.ZeroInflatedBinomial
params = {'psi': 0.3, 'n': 10, 'p': 0.6}

class TestDiscreteUniform(BaseTestCases.BaseTestCase):
distribution = pm.DiscreteUniform
Expand Down
2 changes: 1 addition & 1 deletion pymc3/tests/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def build_model(self):
# Estimated mean count
theta = pm.Uniform('theta', 0, 100)
# Poisson likelihood
pm.ZeroInflatedPoisson('y', theta, psi, observed=self.y)
pm.ZeroInflatedPoisson('y', psi, theta, observed=self.y)
return model

def test_run(self):
Expand Down