-
Notifications
You must be signed in to change notification settings - Fork 2.2k
Closed
Labels
Description
I encountered two problems when using pm.Data()/pm.set_data(). (Not sure if they're actually related, or would warrant two issues)
-
When trying to predict a single new observation, I instead get the predictions for the training data.
-
After I set new data, the trace seems to have changed its shape so that plotting with Arviz breaks. I would expect the original trace to remain unchanged when predicting.
Please provide a minimal, self-contained, and reproducible example.
import numpy as np
import pymc3 as pm
import pandas as pd
import arviz as az
import matplotlib.pyplot as plt
size = 200
true_intercept = 1
true_slope = 2
x = np.linspace(0, 1, size)
# y = a + b*x
true_regression_line = true_intercept + true_slope * x
# add noise
y = true_regression_line + np.random.normal(scale=.5, size=size)
data = pd.DataFrame(data={"x":x, "y":y})
del x
del y
with pm.Model() as model:
x = pm.Data("x", data["x"])
# Define priors
sigma = pm.HalfCauchy('sigma', beta=10, testval=1.)
intercept = pm.Normal('Intercept', 0, sigma=20)
x_coeff = pm.Normal('x_coeff', 0, sigma=20)
# Define likelihood
likelihood = pm.Normal('y', mu=intercept + x_coeff * x,
sigma=sigma, observed=data["y"])
# Inference!
trace = pm.sample(1000, cores=2) # draw 3000 posterior samples using NUTS sampling
# this works fine
az.plot_trace(trace)
plt.show()
more_data = np.linspace(1, 1.2, num=3)
with model:
pm.set_data({"x": more_data})
post_pred = pm.sample_posterior_predictive(trace, samples=1000)
post_pred["y"].shape
# (1000, 3) # shape is as expected
# now it breaks
az.plot_trace(trace)
plt.show()Please provide the full traceback.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~/.pyenv/versions/anaconda3-2019.03/lib/python3.7/site-packages/theano/compile/function_module.py in __call__(self, *args, **kwargs)
902 outputs =\
--> 903 self.fn() if output_subset is None else\
904 self.fn(output_subset=output_subset)
ValueError: Input dimension mis-match. (input[3].shape[0] = 200, input[6].shape[0] = 3)
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-20-168e5bf23973> in <module>
----> 1 az.plot_trace(trace)
2 plt.show()
~/.pyenv/versions/anaconda3-2019.03/lib/python3.7/site-packages/arviz/plots/traceplot.py in plot_trace(data, var_names, coords, divergences, figsize, textsize, lines, compact, combined, legend, plot_kwargs, fill_kwargs, rug_kwargs, hist_kwargs, trace_kwargs)
121 coords = {}
122
--> 123 data = get_coords(convert_to_dataset(data, group="posterior"), coords)
124 var_names = _var_names(var_names, data)
125
~/.pyenv/versions/anaconda3-2019.03/lib/python3.7/site-packages/arviz/data/converters.py in convert_to_dataset(obj, group, coords, dims)
160 xarray.Dataset
161 """
--> 162 inference_data = convert_to_inference_data(obj, group=group, coords=coords, dims=dims)
163 dataset = getattr(inference_data, group, None)
164 if dataset is None:
~/.pyenv/versions/anaconda3-2019.03/lib/python3.7/site-packages/arviz/data/converters.py in convert_to_inference_data(obj, group, coords, dims, **kwargs)
81 return from_pystan(**kwargs)
82 elif obj.__class__.__name__ == "MultiTrace": # ugly, but doesn't make PyMC3 a requirement
---> 83 return from_pymc3(trace=kwargs.pop(group), **kwargs)
84 elif obj.__class__.__name__ == "EnsembleSampler": # ugly, but doesn't make emcee a requirement
85 return from_emcee(sampler=kwargs.pop(group), **kwargs)
~/.pyenv/versions/anaconda3-2019.03/lib/python3.7/site-packages/arviz/data/io_pymc3.py in from_pymc3(trace, prior, posterior_predictive, coords, dims)
224 posterior_predictive=posterior_predictive,
225 coords=coords,
--> 226 dims=dims,
227 ).to_inference_data()
~/.pyenv/versions/anaconda3-2019.03/lib/python3.7/site-packages/arviz/data/io_pymc3.py in to_inference_data(self)
208 **{
209 "posterior": self.posterior_to_xarray(),
--> 210 "sample_stats": self.sample_stats_to_xarray(),
211 "posterior_predictive": self.posterior_predictive_to_xarray(),
212 "prior": self.prior_to_xarray(),
~/.pyenv/versions/anaconda3-2019.03/lib/python3.7/site-packages/arviz/data/base.py in wrapped(cls, *args, **kwargs)
30 if all([getattr(cls, prop_i) is None for prop_i in prop]):
31 return None
---> 32 return func(cls, *args, **kwargs)
33
34 return wrapped
~/.pyenv/versions/anaconda3-2019.03/lib/python3.7/site-packages/arviz/data/io_pymc3.py in sample_stats_to_xarray(self)
104 name = rename_key.get(stat, stat)
105 data[name] = np.array(self.trace.get_sampler_stats(stat, combine=False))
--> 106 log_likelihood, dims = self._extract_log_likelihood()
107 if log_likelihood is not None:
108 data["log_likelihood"] = log_likelihood
~/.pyenv/versions/anaconda3-2019.03/lib/python3.7/site-packages/arviz/data/base.py in wrapped(cls, *args, **kwargs)
30 if all([getattr(cls, prop_i) is None for prop_i in prop]):
31 return None
---> 32 return func(cls, *args, **kwargs)
33
34 return wrapped
~/.pyenv/versions/anaconda3-2019.03/lib/python3.7/site-packages/arviz/data/base.py in wrapped(cls, *args, **kwargs)
30 if all([getattr(cls, prop_i) is None for prop_i in prop]):
31 return None
---> 32 return func(cls, *args, **kwargs)
33
34 return wrapped
~/.pyenv/versions/anaconda3-2019.03/lib/python3.7/site-packages/arviz/data/io_pymc3.py in _extract_log_likelihood(self)
81 chain_likelihoods = []
82 for chain in self.trace.chains:
---> 83 log_like = [log_likelihood_vals_point(point) for point in self.trace.points([chain])]
84 chain_likelihoods.append(np.stack(log_like))
85 return np.stack(chain_likelihoods), coord_name
~/.pyenv/versions/anaconda3-2019.03/lib/python3.7/site-packages/arviz/data/io_pymc3.py in <listcomp>(.0)
81 chain_likelihoods = []
82 for chain in self.trace.chains:
---> 83 log_like = [log_likelihood_vals_point(point) for point in self.trace.points([chain])]
84 chain_likelihoods.append(np.stack(log_like))
85 return np.stack(chain_likelihoods), coord_name
~/.pyenv/versions/anaconda3-2019.03/lib/python3.7/site-packages/arviz/data/io_pymc3.py in log_likelihood_vals_point(point)
73 log_like_vals = []
74 for var, log_like in cached:
---> 75 log_like_val = utils.one_de(log_like(point))
76 if var.missing_values:
77 log_like_val = log_like_val[~var.observations.mask]
~/.pyenv/versions/anaconda3-2019.03/lib/python3.7/site-packages/pymc3/model.py in __call__(self, *args, **kwargs)
1184 def __call__(self, *args, **kwargs):
1185 point = Point(model=self.model, *args, **kwargs)
-> 1186 return self.f(**point)
1187
1188 compilef = fastfn
~/.pyenv/versions/anaconda3-2019.03/lib/python3.7/site-packages/theano/compile/function_module.py in __call__(self, *args, **kwargs)
915 node=self.fn.nodes[self.fn.position_of_error],
916 thunk=thunk,
--> 917 storage_map=getattr(self.fn, 'storage_map', None))
918 else:
919 # old-style linkers raise their own exceptions
~/.pyenv/versions/anaconda3-2019.03/lib/python3.7/site-packages/theano/gof/link.py in raise_with_op(node, thunk, exc_info, storage_map)
323 # extra long error message in that case.
324 pass
--> 325 reraise(exc_type, exc_value, exc_trace)
326
327
~/.pyenv/versions/anaconda3-2019.03/lib/python3.7/site-packages/six.py in reraise(tp, value, tb)
690 value = tp()
691 if value.__traceback__ is not tb:
--> 692 raise value.with_traceback(tb)
693 raise value
694 finally:
~/.pyenv/versions/anaconda3-2019.03/lib/python3.7/site-packages/theano/compile/function_module.py in __call__(self, *args, **kwargs)
901 try:
902 outputs =\
--> 903 self.fn() if output_subset is None else\
904 self.fn(output_subset=output_subset)
905 except Exception:
ValueError: Input dimension mis-match. (input[3].shape[0] = 200, input[6].shape[0] = 3)
Apply node that caused the error: Elemwise{Composite{Switch(i0, (i1 * ((-(i2 * sqr((i3 - (i4 + (i5 * i6)))))) + i7)), i8)}}(Elemwise{Composite{Cast{int8}(GT(i0, i1))}}.0, TensorConstant{(1,) of 0.5}, Elemwise{Composite{inv(sqr(i0))}}[(0, 0)].0, TensorConstant{[ 0.803409...29820321]}, InplaceDimShuffle{x}.0, InplaceDimShuffle{x}.0, x, Elemwise{Composite{log((i0 * i1))}}.0, TensorConstant{(1,) of -inf})
Toposort index: 7
Inputs types: [TensorType(int8, (True,)), TensorType(float64, (True,)), TensorType(float64, (True,)), TensorType(float64, vector), TensorType(float64, (True,)), TensorType(float64, (True,)), TensorType(float64, vector), TensorType(float64, (True,)), TensorType(float32, (True,))]
Inputs shapes: [(1,), (1,), (1,), (200,), (1,), (1,), (3,), (1,), (1,)]
Inputs strides: [(1,), (8,), (8,), (8,), (8,), (8,), (8,), (8,), (4,)]
Inputs values: [array([1], dtype=int8), array([0.5]), array([4.09916064]), 'not shown', array([0.98964574]), array([2.02274036]), array([1. , 1.1, 1.2]), array([-0.42709483]), array([-inf], dtype=float32)]
Outputs clients: [['output']]
HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.And for the prediction with one observation:
one_data_pt = np.array([1.1])
with model:
pm.set_data({"x": one_data_pt})
post_pred = pm.sample_posterior_predictive(trace, samples=1000)
post_pred["y"].shape
# (1000, 200)Expected output: (1000,1). Inspecting the returned samples also confirms it is the predictions for the original training data.
Versions and main components
- PyMC3 Version: 3.7
- Theano Version: Theano==1.0.4
- Python Version: 3.7
- Operating system: Ubuntu 19.04
- How did you install PyMC3: conda
fdetsch and PaoloRanzi81