Evaluate initial values lazily

michaelosthege · michaelosthege · commit 3aa5c541f0aa · 2021-09-21T13:25:40.000+02:00
Related to pymc-devs#4924
diff --git a/pymc3/model.py b/pymc3/model.py
@@ -945,32 +945,43 @@ def recompute_initial_point(self) -> Dict[str, np.ndarray]:
         Returns
         -------
         initial_point : dict
-            Maps free variable names to transformed, numeric initial values.
+            Maps transformed free variable names to transformed, numeric initial values.
         """
-        self._initial_point_cache = Point(list(self.initial_values.items()), model=self)
+        numeric_initvals = {}
+        # The entries in `initial_values` are already in topological order and can be evaluated one by one.
+        for rv_value, initval in self.initial_values.items():
+            rv_var = self.values_to_rvs[rv_value]
+            transform = getattr(rv_value.tag, "transform", None)
+            if isinstance(initval, np.ndarray) and transform is None:
+                # Only untransformed, numeric initvals can be taken as they are.
+                numeric_initvals[rv_value] = initval
+            else:
+                # Evaluate initvals that are None, symbolic or need to be transformed.
+                # They can depend on other initvals from higher up in the graph,
+                # which are therefore fed to the evaluation as "givens".
+                test_value = getattr(rv_var.tag, "test_value", None)
+                numeric_initvals[rv_value] = self._eval_initval(
+                    rv_var, initval, test_value, transform, given=numeric_initvals
+                )
+
+        # Cache the evaluation results for next time.
+        self._initial_point_cache = Point(list(numeric_initvals.items()), model=self)
         return self._initial_point_cache
 
     @property
-    def initial_values(self) -> Dict[TensorVariable, np.ndarray]:
-        """Maps transformed variables to initial values.
+    def initial_values(self) -> Dict[TensorVariable, Optional[Union[np.ndarray, Variable]]]:
+        """Maps transformed variables to initial value placeholders.
 
         ⚠ The keys are NOT the objects returned by, `pm.Normal(...)`.
-        For a name-based dictionary use the `initial_point` property.
+        For a name-based dictionary use the `get_initial_point()` method.
         """
         return self._initial_values
 
     def set_initval(self, rv_var, initval):
         if initval is not None:
             initval = rv_var.type.filter(initval)
 
-        test_value = getattr(rv_var.tag, "test_value", None)
-
         rv_value_var = self.rvs_to_values[rv_var]
-        transform = getattr(rv_value_var.tag, "transform", None)
-
-        if initval is None or transform:
-            initval = self._eval_initval(rv_var, initval, test_value, transform)
-
         self.initial_values[rv_value_var] = initval
 
     def _eval_initval(
@@ -979,6 +990,7 @@ def _eval_initval(
         initval: Optional[Variable],
         test_value: Optional[np.ndarray],
         transform: Optional[Transform],
+        given: Optional[Dict[TensorVariable, np.ndarray]] = None,
     ) -> np.ndarray:
         """Sample/evaluate an initial value using the existing initial values,
         and with the least effect on the RNGs involved (i.e. no in-placing).
@@ -997,6 +1009,8 @@ def _eval_initval(
         transform : optional, Transform
             A transformation associated with the random variable.
             Transformations are automatically applied to initial values.
+        given : optional, dict
+            Numeric initial values to be used for givens instead of `self.initial_values`.
 
         Returns
         -------
@@ -1007,6 +1021,9 @@ def _eval_initval(
         opt_qry = mode.provided_optimizer.excluding("random_make_inplace")
         mode = Mode(linker=mode.linker, optimizer=opt_qry)
 
+        if given is None:
+            given = self.initial_values
+
         if transform:
             if initval is not None:
                 value = initval
@@ -1023,9 +1040,7 @@ def initval_to_rvval(value_var, value):
             else:
                 return initval
 
-        givens = {
-            self.values_to_rvs[k]: initval_to_rvval(k, v) for k, v in self.initial_values.items()
-        }
+        givens = {self.values_to_rvs[k]: initval_to_rvval(k, v) for k, v in given.items()}
         initval_fn = aesara.function([], rv_var, mode=mode, givens=givens, on_unused_input="ignore")
         try:
             initval = initval_fn()
diff --git a/pymc3/tests/test_initvals.py b/pymc3/tests/test_initvals.py
@@ -11,6 +11,7 @@
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
+import aesara
 import numpy as np
 import pytest
 
@@ -37,7 +38,8 @@ def test_new_warnings(self):
         with pm.Model() as pmodel:
             with pytest.warns(DeprecationWarning, match="`testval` argument is deprecated"):
                 rv = pm.Uniform("u", 0, 1, testval=0.75)
-                assert pmodel.initial_values[rv.tag.value_var] == transform_fwd(rv, 0.75)
+                initial_point = pmodel.recompute_initial_point()
+                assert initial_point["u_interval__"] == transform_fwd(rv, 0.75)
                 assert not hasattr(rv.tag, "test_value")
         pass
 
@@ -82,6 +84,33 @@ def test_falls_back_to_test_value(self):
         assert iv == 0.6
         pass
 
+    def test_dependent_initvals(self):
+        with pm.Model() as pmodel:
+            L = pm.Uniform("L", 0, 1, initval=0.5)
+            B = pm.Uniform("B", lower=L, upper=2, initval=1.25)
+            ip = pmodel.recompute_initial_point()
+            assert ip["L_interval__"] == 0
+            assert ip["B_interval__"] == 0
+
+            # Modify initval of L and re-evaluate
+            pmodel.initial_values[pmodel.rvs_to_values[L]] = 0.9
+            ip = pmodel.recompute_initial_point()
+            assert ip["B_interval__"] < 0
+        pass
+
+    def test_initval_resizing(self):
+        with pm.Model() as pmodel:
+            data = aesara.shared(np.arange(4))
+            rv = pm.Uniform("u", lower=data, upper=10)
+
+            ip = pmodel.recompute_initial_point()
+            assert np.shape(ip["u_interval__"]) == (4,)
+
+            data.set_value(np.arange(5))
+            ip = pmodel.recompute_initial_point()
+            assert np.shape(ip["u_interval__"]) == (5,)
+        pass
+
 
 class TestSpecialDistributions:
     def test_automatically_assigned_test_values(self):
diff --git a/pymc3/tests/test_model.py b/pymc3/tests/test_model.py
@@ -516,7 +516,8 @@ def test_initial_point():
 
     assert model.rvs_to_values[a] in model.initial_values
     assert model.rvs_to_values[x] in model.initial_values
-    assert model.initial_values[b_value_var] == b_initval_trans
+    assert model.initial_values[b_value_var] == b_initval
+    assert model.recompute_initial_point()["b_interval__"] == b_initval_trans
     assert model.initial_values[model.rvs_to_values[y]] == y_initval
 
 
@@ -641,8 +642,8 @@ def test_set_initval():
         value = pm.NegativeBinomial("value", mu=mu, alpha=alpha)
 
     assert np.array_equal(model.initial_values[model.rvs_to_values[mu]], np.array([[100.0]]))
-    np.testing.assert_almost_equal(model.initial_values[model.rvs_to_values[alpha]], np.log(100))
-    assert 50 < model.initial_values[model.rvs_to_values[value]] < 150
+    np.testing.assert_array_equal(model.initial_values[model.rvs_to_values[alpha]], np.array(100))
+    assert model.initial_values[model.rvs_to_values[value]] is None
 
     # `Flat` cannot be sampled, so let's make sure that doesn't break initial
     # value computations