Patch AOTAutogradCache._get_shape_env

jamesjwu · jamesjwu · commit 497e9af05214 · 2025-04-24T19:19:56.000-07:00
Signed-off-by: James Wu &lt;jjwu@meta.com&gt;
diff --git a/vllm/compilation/compiler_interface.py b/vllm/compilation/compiler_interface.py
@@ -195,7 +195,6 @@ def compile(
         hash_str, file_path = None, None
         from torch._inductor.codecache import (FxGraphCache,
                                                compiled_fx_graph_hash)
-
         if torch.__version__.startswith("2.5"):
             original_load = FxGraphCache.load
             original_load_name = "torch._inductor.codecache.FxGraphCache.load"
@@ -280,6 +279,14 @@ def _get_shape_env() -> AlwaysHitShapeEnv:
                 patch("torch._inductor.codecache.FxGraphCache._get_shape_env",
                       _get_shape_env))
 
+            from torch._functorch._aot_autograd.autograd_cache import AOTAutogradCache
+            # torch 2.8+ on main uses _get_shape_env in AOTAutogradCache
+            if hasattr(AOTAutogradCache, "_get_shape_env"):
+                stack.enter_context(
+                    patch(
+                        "torch._functorch._aot_autograd.autograd_cache.AOTAutogradCache._get_shape_env",
+                        _get_shape_env))
+
             # for forcing the graph to be cached
             stack.enter_context(
                 patch(
@@ -326,10 +333,17 @@ def load(self,
         hash_str = handle[0]
 
         from torch._inductor.codecache import FxGraphCache
+        from torch._functorch._aot_autograd.autograd_cache import AOTAutogradCache
         with ExitStack() as exit_stack:
             exit_stack.enter_context(
                 patch("torch._inductor.codecache.FxGraphCache._get_shape_env",
                       lambda *args, **kwargs: AlwaysHitShapeEnv()))
+            # torch 2.8+ on main uses _get_shape_env in AOTAutogradCache
+            if hasattr(AOTAutogradCache, "_get_shape_env"):
+                exit_stack.enter_context(
+                    patch(
+                        "torch._functorch._aot_autograd.autograd_cache.AOTAutogradCache._get_shape_env",
+                        lambda *args, **kwargs: AlwaysHitShapeEnv()))
 
             # Dynamo metrics context, see method for more details.
             exit_stack.enter_context(self.metrics_context())