Skip to content

check for original coords #5761

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 45 additions & 22 deletions pymc/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
from aesara.tensor.random.opt import local_subtensor_rv_lift
from aesara.tensor.random.var import RandomStateSharedVariable
from aesara.tensor.sharedvar import ScalarSharedVariable
from aesara.tensor.var import TensorVariable
from aesara.tensor.var import TensorConstant, TensorVariable

from pymc.aesaraf import (
compile_pymc,
Expand All @@ -61,7 +61,7 @@
from pymc.distributions import joint_logpt
from pymc.distributions.logprob import _get_scaling
from pymc.distributions.transforms import _default_transform
from pymc.exceptions import ImputationWarning, SamplingError, ShapeError
from pymc.exceptions import ImputationWarning, SamplingError, ShapeError, ShapeWarning
from pymc.initial_point import make_initial_point_fn
from pymc.math import flatten_list
from pymc.util import (
Expand Down Expand Up @@ -1179,23 +1179,48 @@ def set_data(
# Reject resizing if we already know that it would create shape problems.
# NOTE: If there are multiple pm.MutableData containers sharing this dim, but the user only
# changes the values for one of them, they will run into shape problems nonetheless.
length_belongs_to = length_tensor.owner.inputs[0].owner.inputs[0]
if not isinstance(length_belongs_to, SharedVariable) and length_changed:
raise ShapeError(
f"Resizing dimension '{dname}' with values of length {new_length} would lead to incompatibilities, "
f"because the dimension was initialized from '{length_belongs_to}' which is not a shared variable. "
f"Check if the dimension was defined implicitly before the shared variable '{name}' was created, "
f"for example by a model variable.",
actual=new_length,
expected=old_length,
)
if original_coords is not None and length_changed:
if length_changed and new_coords is None:
raise ValueError(
f"The '{name}' variable already had {len(original_coords)} coord values defined for"
f"its {dname} dimension. With the new values this dimension changes to length "
f"{new_length}, so new coord values for the {dname} dimension are required."
if length_changed:
if isinstance(length_tensor, TensorConstant):
raise ShapeError(
f"Resizing dimension '{dname}' is impossible, because "
f"a 'TensorConstant' stores its length. To be able "
f"to change the dimension length, 'fixed' in "
f"'model.add_coord' must be set to `False`."
)
if length_tensor.owner is None:
# This is the case if the dimension was initialized
# from custom coords, but dimension length was not
# stored in TensorConstant e.g by 'fixed' set to False

warnings.warn(
f"You're changing the shape of a variable "
f"in the '{dname}' dimension which was initialized "
f"from coords. Make sure to update the corresponding "
f"coords, otherwise you'll get shape issues.",
ShapeWarning,
)
else:
length_belongs_to = length_tensor.owner.inputs[0].owner.inputs[0]
if not isinstance(length_belongs_to, SharedVariable):
raise ShapeError(
f"Resizing dimension '{dname}' with values of length {new_length} would lead to incompatibilities, "
f"because the dimension was initialized from '{length_belongs_to}' which is not a shared variable. "
f"Check if the dimension was defined implicitly before the shared variable '{name}' was created, "
f"for example by another model variable.",
actual=new_length,
expected=old_length,
)
if original_coords is not None:
if new_coords is None:
raise ValueError(
f"The '{name}' variable already had {len(original_coords)} coord values defined for "
f"its {dname} dimension. With the new values this dimension changes to length "
f"{new_length}, so new coord values for the {dname} dimension are required."
)
if isinstance(length_tensor, ScalarSharedVariable):
# Updating the shared variable resizes dependent nodes that use this dimension for their `size`.
length_tensor.set_value(new_length)

if new_coords is not None:
# Update the registered coord values (also if they were None)
if len(new_coords) != new_length:
Expand All @@ -1204,10 +1229,8 @@ def set_data(
actual=len(new_coords),
expected=new_length,
)
self._coords[dname] = new_coords
if isinstance(length_tensor, ScalarSharedVariable) and new_length != old_length:
# Updating the shared variable resizes dependent nodes that use this dimension for their `size`.
length_tensor.set_value(new_length)
# store it as tuple for immutability as in add_coord
self._coords[dname] = tuple(new_coords)

shared_object.set_value(values)

Expand Down
5 changes: 4 additions & 1 deletion pymc/tests/test_data_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,10 @@ def test_explicit_coords(self):
# pass coordinates explicitly, use numpy array in Data container
with pm.Model(coords=coords) as pmodel:
pm.MutableData("observations", data, dims=("rows", "columns"))

# new data with same shape
pm.set_data({"observations": data + 1})
# new data with same shape and coords
pm.set_data({"observations": data}, coords=coords)
assert "rows" in pmodel.coords
assert pmodel.coords["rows"] == ("R1", "R2", "R3", "R4", "R5")
assert "rows" in pmodel.dim_lengths
Expand Down