Skip to content

Set data when building Linearmodel #249

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Nov 13, 2023
67 changes: 26 additions & 41 deletions pymc_experimental/tests/test_model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import copy
import hashlib
import json
import sys
Expand Down Expand Up @@ -42,10 +43,13 @@ def toy_y(toy_X):


@pytest.fixture(scope="module")
def fitted_model_instance(toy_X, toy_y):
def fitted_model_instance_base(toy_X, toy_y):
"""Because fitting takes a relatively long time, this is intended to
be used only once and then have copies returned to tests that use a fitted
model instance. Tests should use `fitted_model_instance` instead of this."""
sampler_config = {
"draws": 100,
"tune": 100,
"draws": 20,
"tune": 10,
"chains": 2,
"target_accept": 0.95,
}
Expand All @@ -61,6 +65,14 @@ def fitted_model_instance(toy_X, toy_y):
return model


@pytest.fixture
def fitted_model_instance(fitted_model_instance_base):
"""Get a fitted model instance. The instance is copied after being fit,
so tests using this fixture can modify the model object without affecting
other tests."""
return copy.deepcopy(fitted_model_instance_base)
Copy link
Member

@ricardoV94 ricardoV94 Nov 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

copy doesn't really work for objects that have PyMC models: see pymc-devs/pymc#6985

The approach is not too bad though. What I suggest is to create the idata once and then in this fixture recreate the model and glue-in a copy of the idata. I did something like that with a helper method in this PR: pymc-labs/pymc-marketing@44985a8

Check the _build_with_idata method and how that's used by thin_fit_result. Something similar could be used for a ModelBuilder.copy(), but for now you can just reimplement the logic in this fixture if you want.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I came up with a workaround for copying the model without using copy.deepcopy.

I also noticed that there's a test marked for skipping on win32 due to lack of permissions for temp files, but the marked test doesn't use a temp file. There is a different test that does use a temp file. I thought maybe the annotation got onto the wrong test, so I made a commit to fix that possible issue. If that's wrong or you want to handle it as it's own issue, no problem, I'll take that commit back out.



class test_ModelBuilder(ModelBuilder):
def __init__(self, model_config=None, sampler_config=None, test_parameter=None):
self.test_parameter = test_parameter
Expand Down Expand Up @@ -131,8 +143,8 @@ def _generate_and_preprocess_model_data(
@staticmethod
def get_default_sampler_config() -> Dict:
return {
"draws": 1_000,
"tune": 1_000,
"draws": 10,
"tune": 10,
"chains": 3,
"target_accept": 0.95,
}
Expand Down Expand Up @@ -220,53 +232,26 @@ def test_sample_posterior_predictive(fitted_model_instance, combined):
assert np.issubdtype(pred[fitted_model_instance.output_var].dtype, np.floating)


@pytest.mark.parametrize("group", ["prior_predictive", "posterior_predictive"])
@pytest.mark.parametrize("extend_idata", [True, False])
def test_sample_prior_extend_idata_param(fitted_model_instance, extend_idata):
def test_sample_xxx_extend_idata_param(fitted_model_instance, group, extend_idata):
output_var = fitted_model_instance.output_var
idata_prev = fitted_model_instance.idata.prior_predictive[output_var]
idata_prev = fitted_model_instance.idata[group][output_var]

# Since coordinates are provided, the dimension must match
n_pred = 100 # Must match toy_x
x_pred = np.random.uniform(0, 1, n_pred)

prediction_data = pd.DataFrame({"input": x_pred})
pred = fitted_model_instance.sample_prior_predictive(
prediction_data["input"], combined=False, extend_idata=extend_idata
)

pred_unstacked = pred[output_var].values
idata_now = fitted_model_instance.idata.prior_predictive[output_var].values
if group == "prior_predictive":
prediction_method = fitted_model_instance.sample_prior_predictive
else: # group == "posterior_predictive":
prediction_method = fitted_model_instance.sample_posterior_predictive

if extend_idata:
# After sampling, data in the model should be the same as the predictions
np.testing.assert_array_equal(idata_now, pred_unstacked)
# Data in the model should NOT be the same as before
if idata_now.shape == idata_prev.values.shape:
assert np.sum(np.abs(idata_now - idata_prev.values) < 1e-5) <= 2
else:
# After sampling, data in the model should be the same as it was before
np.testing.assert_array_equal(idata_now, idata_prev.values)
# Data in the model should NOT be the same as the predictions
if idata_now.shape == pred_unstacked.shape:
assert np.sum(np.abs(idata_now - pred_unstacked) < 1e-5) <= 2


@pytest.mark.parametrize("extend_idata", [True, False])
def test_sample_posterior_extend_idata_param(fitted_model_instance, extend_idata):
output_var = fitted_model_instance.output_var
idata_prev = fitted_model_instance.idata.posterior_predictive[output_var]

# Since coordinates are provided, the dimension must match
n_pred = 100 # Must match toy_x
x_pred = np.random.uniform(0, 1, n_pred)

prediction_data = pd.DataFrame({"input": x_pred})
pred = fitted_model_instance.sample_posterior_predictive(
prediction_data["input"], combined=False, extend_idata=extend_idata
)
pred = prediction_method(prediction_data["input"], combined=False, extend_idata=extend_idata)

pred_unstacked = pred[output_var].values
idata_now = fitted_model_instance.idata.posterior_predictive[output_var].values
idata_now = fitted_model_instance.idata[group][output_var].values

if extend_idata:
# After sampling, data in the model should be the same as the predictions
Expand Down