See what happens if we export with max_seq_len (#11611)

larryliu0820 · facebook-github-bot · commit d2a5153b1778 · 2025-06-12T23:25:46.000-07:00
Summary:
With some testing it seems like we can't export with token dimension max being `max_seq_len` in dynamic shape, if we only export with `tokens`.

However if we export with both `tokens` and `input_pos`, we can set token dimension max value to be `max_seq_len`.

This diff fix 2 things: 

* Change dynamic shape based on different inputs
* Change pte's metadata `get_max_seq_len` and `get_max_context_len` based on the value of token dimension max value in dynamic shape.


Differential Revision: D76530379

Pulled By: larryliu0820
diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py
@@ -133,6 +133,20 @@ def __init__(
         self.output_dir = "."
         self._saved_pte_filename = None
 
+    def __post_init__(self):
+        """
+        Post init function to update metadata based on dynamic shape
+        """
+        dynamic_shape = self._get_dynamic_shape()
+        if dynamic_shape is not None:
+            token_dim = dynamic_shape[0][1]
+            if self.verbose:
+                logging.info(
+                    f"Metadata 'get_max_seq_len' is being updated to match torch.export's dynamic shape max: {token_dim.max}"
+                )
+            self.metadata["get_max_seq_len"] = token_dim.max
+            self.metadata["get_max_context_len"] = token_dim.max
+
     def set_output_dir(self, output_dir: str) -> "LLMEdgeManager":
         """
         Set the directory where the .pte file will be saved.
@@ -180,14 +194,19 @@ def _get_dynamic_shape(self) -> Any:
         if self.dynamic_shapes:
             return self.dynamic_shapes
 
-        dim = torch.export.Dim("token_dim", max=self.max_seq_len - 1)
         if self.enable_dynamic_shape:
             if not self.use_kv_cache:
                 # Only one input argument: tokens
-                self.dynamic_shapes = ({1: dim},)
+                # For some reason if with tokens, we can't go all the way to max_seq_len, otherwise export will fail.
+                self.dynamic_shapes = (
+                    {1: torch.export.Dim("token_dim", max=self.max_seq_len - 1)},
+                )
             else:
                 # Two input arguments: tokens and input_pos but input_pos is static shape
-                self.dynamic_shapes = ({1: dim}, {"input_pos": {0: 1}})
+                self.dynamic_shapes = (
+                    {1: torch.export.Dim("token_dim", max=self.max_seq_len)},
+                    {"input_pos": {0: 1}},
+                )
         else:
             # Two input arguments: tokens and input_pos but both are of static shape
             self.dynamic_shapes = None
diff --git a/extension/llm/export/test/test_builder.py b/extension/llm/export/test/test_builder.py
@@ -63,7 +63,7 @@ def test_get_dynamic_shape_with_dynamic_shape_enabled_no_kv_cache(self) -> None:
         self.assertIsInstance(result[0], dict)
         self.assertIn(1, result[0])
         # Check that the value at key 1 is a torch.export.Dim with the correct max value
-        self.assertEqual(result[0][1].max, self.max_seq_len - 1)
+        self.assertEqual(result[0][1].max, self.max_seq_len)
 
     def test_get_dynamic_shape_with_dynamic_shape_enabled_with_kv_cache(self) -> None:
         """Test _get_dynamic_shape when enable_dynamic_shape=True and use_kv_cache=True."""
@@ -88,7 +88,7 @@ def test_get_dynamic_shape_with_dynamic_shape_enabled_with_kv_cache(self) -> Non
         # Check first element (tokens dimension)
         self.assertIsInstance(result[0], dict)
         self.assertIn(1, result[0])
-        self.assertEqual(result[0][1].max, self.max_seq_len - 1)
+        self.assertEqual(result[0][1].max, self.max_seq_len)
 
         # Check second element (input_pos dimension)
         self.assertIsInstance(result[1], dict)