Skip to content

BUG: automatic imputation with pm.observe #7430

Closed as not planned
Closed as not planned
@williambdean

Description

@williambdean

Describe the issue:

Would think using the pm.observe with nan values would act similar to that of model defined without observe. However, the former raises an SamplerError due to the initial point

Reproduceable code example:

import pymc as pm
import numpy as np

import matplotlib.pyplot as plt

import arviz as az


def normal_declaration(data):
    coords = {
        "idx": range(len(data)),
    }
    with pm.Model(coords=coords) as model:
        pm.Normal(
            "obs",
            mu=pm.Normal("mu"),
            sigma=pm.HalfNormal("sigma"),
            observed=data,
            dims="idx",
        )

    return model


def work_around(data):
    coords = {
        "idx": range(len(data)),
    }
    with pm.Model(coords=coords) as generative_model:
        pm.Normal(
            "obs",
            mu=pm.Normal("mu"),
            sigma=pm.HalfNormal("sigma"),
            dims="idx",
        )

    return pm.observe(generative_model, {"obs": data})

seed = sum(map(ord, "impute observe bug"))
rng = np.random.default_rng(seed)

mu = 5
sigma = 0.25

data = rng.normal(mu, sigma, size=250)

missing_idx = rng.choice([True, False, False, False], size=data.shape)
data[missing_idx] = np.nan

with normal_declaration(data):
    idata = pm.sample()

with work_around(data):
    # SamplingError: Initial evaluation of model at starting point failed!
    idata_workaround = pm.sample()

Error message:

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
------------------------------------------------------------------------
SamplingError                          Traceback (most recent call last)
Cell In[27], line 2
      1 with work_around(data):
----> 2     idata_workaround = pm.sample()

File ~/.../python3.10/site-packages/pymc/sampling/mcmc.py:740, in sample(draws, tune, chains, cores, random_seed, progressbar, progressbar_theme, step, var_names, nuts_sampler, initvals, init, jitter_max_retries, n_init, trace, discard_tuned_samples, compute_convergence_checks, keep_warning_stat, return_inferencedata, idata_kwargs, nuts_sampler_kwargs, callback, mp_ctx, model, **kwargs)
    738 ip: dict[str, np.ndarray]
    739 for ip in initial_points:
--> 740     model.check_start_vals(ip)
    741     _check_start_shape(model, ip)
    743 if var_names is not None:

File ~/../python3.10/site-packages/pymc/model/core.py:1765, in Model.check_start_vals(self, start)
   1762 initial_eval = self.point_logps(point=elem)
   1764 if not all(np.isfinite(v) for v in initial_eval.values()):
-> 1765     raise SamplingError(
   1766         "Initial evaluation of model at starting point failed!\n"
   1767         f"Starting values:\n{elem}\n\n"
   1768         f"Logp initial evaluation results:\n{initial_eval}\n"
   1769         "You can call `model.debug()` for more details."
   1770     )

SamplingError: Initial evaluation of model at starting point failed!
Starting values:
{'mu': array(-0.77608933), 'sigma_log__': array(-0.20096084)}

Logp initial evaluation results:
{'mu': -1.22, 'sigma': -0.76, 'obs': nan}
You can call `model.debug()` for more details.

PyMC version information:

5.16.2

Context for the issue:

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions