Skip to content

WIP: V4 update test framework for distributions random method #4580

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
b283649
Initial refactoring for RandomVariable use in Model, step methods, an…
brandonwillard Jan 25, 2021
02e25aa
Remove shape dependencies from DictToArrayBijection
brandonwillard Jan 29, 2021
d160316
Remove newly deprecated classes and functions
brandonwillard Feb 4, 2021
b5065dc
Refactor tests for compatibility with logp dispatch and RandomVariables
brandonwillard Feb 13, 2021
2596a75
Apply easy fixes to get tests to pass or xfail
michaelosthege Mar 8, 2021
8bd4733
Reinstate log-likelihood transforms
brandonwillard Mar 9, 2021
b7b2963
Remove remaining v3 sampling code
brandonwillard Mar 14, 2021
f1b4da9
Change logp_transform argument to the entire random variable
brandonwillard Mar 15, 2021
207fc06
Remove logpt transformed option
brandonwillard Mar 15, 2021
08da3cc
Implement transform for Dirichlet
brandonwillard Mar 15, 2021
af6ffac
Always use the value var to initially build the log-likelihood
brandonwillard Mar 15, 2021
121c517
Add an option for negative support assertions in Normal and Gamma cla…
brandonwillard Mar 15, 2021
a5b1690
Fix Categorical logp implementation
brandonwillard Mar 15, 2021
3d4a8b4
Refactored distributions in pymc.distributions.continuous
kc611 Mar 15, 2021
63c2688
Simplify the new Distribution interface and convert a few more
brandonwillard Mar 16, 2021
5bb71d9
Make transform objects stateless
brandonwillard Mar 17, 2021
319dc2e
Add non_sequences to uses of Scan Op
brandonwillard Mar 20, 2021
1fde7e3
Replace Observed Op with tag.observations
brandonwillard Mar 20, 2021
1588645
Comment out unused moments
brandonwillard Mar 20, 2021
801c61a
Make logpt work correctly for nested models and transforms
brandonwillard Mar 21, 2021
8bcbe59
Make Metropolis, Slice, PGBART, MetropolisMLDA use point values
brandonwillard Mar 23, 2021
886e3f8
Set default transform for Dirichlet
brandonwillard Mar 24, 2021
ea324a3
Normalize Multinomial argument
brandonwillard Mar 24, 2021
9ace182
Fix Interval.jacobian_det
brandonwillard Mar 24, 2021
134c90e
Fix Stickbreaking scalar condition
brandonwillard Mar 24, 2021
fcd277c
Make v4 compatibility changes to pymc3.tests.test_sampling
brandonwillard Mar 24, 2021
e829c19
Make pymc3.tests.test_transforms work with None RV variables
brandonwillard Mar 24, 2021
c937600
Fix MvNormal quaddist_matrix parameter order
brandonwillard Mar 25, 2021
60a90be
Enable MvNormal tests in test_distributions
brandonwillard Mar 25, 2021
931c494
Fix NegativeBinomial parameterization and enable its tests
brandonwillard Mar 25, 2021
7bfcebf
Prevent SciPy error by using float64 point in test_dirichlet_with_bat…
brandonwillard Mar 25, 2021
90ed8e1
Create extract_obs_data function
brandonwillard Mar 26, 2021
ab31fc8
Re-enable Arviz tests in pymc3.tests.test_sampling
brandonwillard Mar 26, 2021
a3ad9d1
Fix HalfCauchy/HalfCauchyRV parameterization
ricardoV94 Mar 16, 2021
15a29b2
Fix HalfNormal/HalfNormalRV parameterization
ricardoV94 Mar 17, 2021
a5d8609
Refactor Beta to use custom rng_fn clipped_beta_rv
ricardoV94 Mar 17, 2021
3885d65
Re-enable v4 xfails in pymc3.distributions.dist_math
brandonwillard Mar 27, 2021
2d16aa5
Change shape to size in pymc3.tests.test_step
brandonwillard Mar 27, 2021
4231ee2
Remove incorrect size for multivariate distributions in pymc3.tests.m…
brandonwillard Mar 27, 2021
ab41e0d
Fix extra_vars in call to ValueGradFunction from Model
brandonwillard Mar 27, 2021
c6f2f31
Update tests following distributions refactoring
matteo-pallini Mar 28, 2021
199451d
Change tests for more refactored distributions.
matteo-pallini Apr 1, 2021
a312231
Change tests for refactored distributions
matteo-pallini Apr 1, 2021
5008194
Remove tests for random variable samples shape and size
matteo-pallini Apr 1, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
282 changes: 5 additions & 277 deletions pymc3/aesaraf.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import (
Callable,
Dict,
Generator,
Iterable,
List,
Optional,
Set,
Tuple,
Union,
)
from typing import Dict, List

import aesara
import aesara.tensor as at
Expand All @@ -30,29 +20,16 @@

from aesara import config, scalar
from aesara.gradient import grad
from aesara.graph.basic import (
Apply,
Constant,
Variable,
clone_get_equiv,
graph_inputs,
walk,
)
from aesara.graph.fg import FunctionGraph
from aesara.graph.op import Op, compute_test_value
from aesara.graph.basic import Apply, Constant, graph_inputs
from aesara.graph.op import Op
from aesara.sandbox.rng_mrg import MRG_RandomStream as RandomStream
from aesara.tensor.elemwise import Elemwise
from aesara.tensor.random.op import RandomVariable
from aesara.tensor.sharedvar import SharedVariable
from aesara.tensor.subtensor import AdvancedIncSubtensor, AdvancedIncSubtensor1
from aesara.tensor.var import TensorVariable

from pymc3.vartypes import continuous_types, int_types, isgenerator, typefilter

PotentialShapeType = Union[
int, np.ndarray, Tuple[Union[int, Variable], ...], List[Union[int, Variable]], Variable
]

from pymc3.data import GeneratorAdapter
from pymc3.vartypes import continuous_types, int_types, typefilter

__all__ = [
"gradient",
Expand All @@ -75,124 +52,6 @@
]


def pandas_to_array(data):
"""Convert a pandas object to a NumPy array.

XXX: When `data` is a generator, this will return a Aesara tensor!

"""
if hasattr(data, "to_numpy") and hasattr(data, "isnull"):
# typically, but not limited to pandas objects
vals = data.to_numpy()
mask = data.isnull().to_numpy()
if mask.any():
# there are missing values
ret = np.ma.MaskedArray(vals, mask)
else:
ret = vals
elif isinstance(data, np.ndarray):
if isinstance(data, np.ma.MaskedArray):
if not data.mask.any():
# empty mask
ret = data.filled()
else:
# already masked and rightly so
ret = data
else:
# already a ndarray, but not masked
mask = np.isnan(data)
if np.any(mask):
ret = np.ma.MaskedArray(data, mask)
else:
# no masking required
ret = data
elif isinstance(data, Variable):
ret = data
elif sps.issparse(data):
ret = data
elif isgenerator(data):
ret = generator(data)
else:
ret = np.asarray(data)

# type handling to enable index variables when data is int:
if hasattr(data, "dtype"):
if "int" in str(data.dtype):
return intX(ret)
# otherwise, assume float:
else:
return floatX(ret)
# needed for uses of this function other than with pm.Data:
else:
return floatX(ret)


def change_rv_size(
rv_var: TensorVariable,
new_size: PotentialShapeType,
expand: Optional[bool] = False,
) -> TensorVariable:
"""Change or expand the size of a `RandomVariable`.

Parameters
==========
rv_var
The `RandomVariable` output.
new_size
The new size.
expand:
Whether or not to completely replace the `size` parameter in `rv_var`
with `new_size` or simply prepend it to the existing `size`.

"""
rv_node = rv_var.owner
rng, size, dtype, *dist_params = rv_node.inputs
name = rv_var.name
tag = rv_var.tag

if expand:
new_size = tuple(np.atleast_1d(new_size)) + tuple(size)

new_rv_node = rv_node.op.make_node(rng, new_size, dtype, *dist_params)
rv_var = new_rv_node.outputs[-1]
rv_var.name = name
for k, v in tag.__dict__.items():
rv_var.tag.__dict__.setdefault(k, v)

if config.compute_test_value != "off":
compute_test_value(new_rv_node)

return rv_var


def extract_rv_and_value_vars(
var: TensorVariable,
) -> Tuple[TensorVariable, TensorVariable]:
"""Extract a random variable and its corresponding value variable from a generic
`TensorVariable`.

Parameters
==========
var
A variable corresponding to a `RandomVariable`.

Returns
=======
The first value in the tuple is the `RandomVariable`, and the second is the
measure-space variable that corresponds with the latter (i.e. the "value"
variable).

"""
if not var.owner:
return None, None

if isinstance(var.owner.op, RandomVariable):
rv_value = getattr(var.tag, "observations", getattr(var.tag, "value_var", None))
return var, rv_value

return None, None


def extract_obs_data(x: TensorVariable) -> np.ndarray:
"""Extract data observed symbolic variables.

Expand All @@ -215,137 +74,6 @@ def extract_obs_data(x: TensorVariable) -> np.ndarray:
raise TypeError(f"Data cannot be extracted from {x}")


def walk_model(
graphs: Iterable[TensorVariable],
walk_past_rvs: bool = False,
stop_at_vars: Optional[Set[TensorVariable]] = None,
expand_fn: Callable[[TensorVariable], Iterable[TensorVariable]] = lambda var: [],
) -> Generator[TensorVariable, None, None]:
"""Walk model graphs and yield their nodes.

By default, these walks will not go past ``RandomVariable`` nodes.

Parameters
==========
graphs
The graphs to walk.
walk_past_rvs
If ``True``, the walk will not terminate at ``RandomVariable``s.
stop_at_vars
A list of variables at which the walk will terminate.
expand_fn
A function that returns the next variable(s) to be traversed.
"""
if stop_at_vars is None:
stop_at_vars = set()

def expand(var):
new_vars = expand_fn(var)

if (
var.owner
and (walk_past_rvs or not isinstance(var.owner.op, RandomVariable))
and (var not in stop_at_vars)
):
new_vars.extend(reversed(var.owner.inputs))

return new_vars

yield from walk(graphs, expand, False)


def replace_rvs_in_graphs(
graphs: Iterable[TensorVariable],
replacement_fn: Callable[[TensorVariable], Dict[TensorVariable, TensorVariable]],
initial_replacements: Optional[Dict[TensorVariable, TensorVariable]] = None,
**kwargs,
) -> Tuple[TensorVariable, Dict[TensorVariable, TensorVariable]]:
"""Replace random variables in graphs

This will *not* recompute test values.

Parameters
==========
graphs
The graphs in which random variables are to be replaced.

Returns
=======
Tuple containing the transformed graphs and a ``dict`` of the replacements
that were made.
"""
replacements = {}
if initial_replacements:
replacements.update(initial_replacements)

def expand_replace(var):
new_nodes = []
if var.owner and isinstance(var.owner.op, RandomVariable):
new_nodes.extend(replacement_fn(var, replacements))
return new_nodes

for var in walk_model(graphs, expand_fn=expand_replace, **kwargs):
pass

if replacements:
inputs = [i for i in graph_inputs(graphs) if not isinstance(i, Constant)]
equiv = {k: k for k in replacements.keys()}
equiv = clone_get_equiv(inputs, graphs, False, False, equiv)

fg = FunctionGraph(
[equiv[i] for i in inputs],
[equiv[o] for o in graphs],
clone=False,
)

fg.replace_all(replacements.items(), import_missing=True)

graphs = list(fg.outputs)

return graphs, replacements


def rvs_to_value_vars(
graphs: Iterable[TensorVariable],
apply_transforms: bool = False,
initial_replacements: Optional[Dict[TensorVariable, TensorVariable]] = None,
**kwargs,
) -> Tuple[TensorVariable, Dict[TensorVariable, TensorVariable]]:
"""Replace random variables in graphs with their value variables.

This will *not* recompute test values in the resulting graphs.

Parameters
==========
graphs
The graphs in which to perform the replacements.
apply_transforms
If ``True``, apply each value variable's transform.
initial_replacements
A ``dict`` containing the initial replacements to be made.

"""

def transform_replacements(var, replacements):
rv_var, rv_value_var = extract_rv_and_value_vars(var)

if rv_value_var is None:
return []

transform = getattr(rv_value_var.tag, "transform", None)

if transform is None or not apply_transforms:
replacements[var] = rv_value_var
return []

trans_rv_value = transform.backward(rv_var, rv_value_var)
replacements[var] = trans_rv_value

return [trans_rv_value]

return replace_rvs_in_graphs(graphs, transform_replacements, initial_replacements, **kwargs)


def inputvars(a):
"""
Get the inputs into a aesara variables
Expand Down
6 changes: 1 addition & 5 deletions pymc3/blocking.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,7 @@ class DictToArrayBijection:
def map(var_dict: Dict[str, np.ndarray]) -> RaveledVars:
"""Map a dictionary of names and variables to a concatenated 1D array space."""
vars_info = tuple((v, k, v.shape, v.dtype) for k, v in var_dict.items())
raveled_vars = [v[0].ravel() for v in vars_info]
if raveled_vars:
res = np.concatenate(raveled_vars)
else:
res = np.array([])
res = np.concatenate([v[0].ravel() for v in vars_info])
return RaveledVars(res, tuple(v[1:] for v in vars_info))

@staticmethod
Expand Down
Loading