[RLlib] Fix _test_dependency_torch (#60742) (#60888)

kamil-kaczmarek · ArturNiederfahrenhorst · web-flow · commit 620214fbd57a · 2026-02-09T19:27:37.000-08:00
## Description Cherry pick Fix _test_dependency_torch (#60742) into releases/2.54.0. Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com> Signed-off-by: Kamil Kaczmarek <kamil@anyscale.com> Co-authored-by: Artur Niederfahrenhorst <artur@anyscale.com>
diff --git a/rllib/evaluation/postprocessing.py b/rllib/evaluation/postprocessing.py
@@ -1,7 +1,6 @@
 from typing import Dict, Optional
 
 import numpy as np
-import scipy.signal
 
 from ray.rllib.policy.policy import Policy
 from ray.rllib.policy.sample_batch import SampleBatch
@@ -325,4 +324,7 @@ def discount_cumsum(x: np.ndarray, gamma: float) -> np.ndarray:
                2.0 + 0.9*3.0,
                3.0])
     """
+    # Import scipy here to avoid import error when framework is tensorflow.
+    import scipy
+
     return scipy.signal.lfilter([1], [1, float(-gamma)], x[::-1], axis=0)[::-1]
diff --git a/rllib/offline/offline_policy_evaluation_runner.py b/rllib/offline/offline_policy_evaluation_runner.py
@@ -15,9 +15,6 @@
 
 import ray
 from ray.data.iterator import DataIterator
-from ray.data.util.torch_utils import (
-    convert_ndarray_batch_to_torch_tensor_batch,
-)
 from ray.rllib.connectors.env_to_module import EnvToModulePipeline
 from ray.rllib.core import (
     ALL_MODULES,
@@ -116,6 +113,12 @@ def _collate_fn(
         _batch: Dict[EpisodeID, Dict[str, numpy.ndarray]],
     ) -> Dict[EpisodeID, Dict[str, TensorType]]:
         """Converts a batch of episodes to torch tensors."""
+        # Avoid torch import error when framework is tensorflow.
+        # Note (artur): This can be removed when we remove tf support.
+        from ray.data.util.torch_utils import (
+            convert_ndarray_batch_to_torch_tensor_batch,
+        )
+
         return [
             convert_ndarray_batch_to_torch_tensor_batch(
                 episode, device=self._device, dtypes=torch.float32
diff --git a/rllib/utils/tf_utils.py b/rllib/utils/tf_utils.py
@@ -892,18 +892,31 @@ def __init__(self, output, sess=None, input_variables=None):
         if input_variables is not None:
             variable_list += input_variables
 
+        def _get_var_name(v):
+            """Get variable name, supporting both TF1 ResourceVariable and
+            Keras 3 Variable objects."""
+            if hasattr(v, "op"):
+                return v.op.node_def.name
+            return v.name
+
         if not tf1.executing_eagerly():
             for v in variable_list:
-                self.variables[v.op.node_def.name] = v
+                self.variables[_get_var_name(v)] = v
 
             self.placeholders = {}
             self.assignment_nodes = {}
 
             # Create new placeholders to put in custom weights.
             for k, var in self.variables.items():
+                dtype = var.value().dtype if hasattr(var, "op") else var.dtype
+                shape = (
+                    var.get_shape().as_list()
+                    if hasattr(var, "get_shape")
+                    else list(var.shape)
+                )
                 self.placeholders[k] = tf1.placeholder(
-                    var.value().dtype,
-                    var.get_shape().as_list(),
+                    dtype,
+                    shape,
                     name="Placeholder_" + k,
                 )
                 self.assignment_nodes[k] = var.assign(self.placeholders[k])