Setting to use torch dynamo compiling path in eager (#2045)

Ivan Kobzarev · facebook-github-bot · commit d75d46b3bcd4 · 2024-05-28T05:34:41.000-07:00
Summary:

1/ Refactoring to use is_torchdynamo_compiling() from torchrec.pt2.checks instead of code duplication

2/ We have alternative path of logic is_torchdynamo_compiling(). Our tests are not testing it without compilation, so it is error prone to not catch some shape mismatch or etc. =&gt;
We need a tool how to cover it with eager tests without compilation. =&gt; 
Introducing global setting to force using is_torchdynamo_compiling() path for eager for test coverage and debug.


Enabling this path for test_pt2_multiprocess, that first eager iteration will be done on is_torchdynamo_compiling path.

Differential Revision: D57860075
diff --git a/torchrec/distributed/dist_data.py b/torchrec/distributed/dist_data.py
@@ -27,6 +27,12 @@
 from torchrec.distributed.embedding_types import KJTList
 from torchrec.distributed.types import Awaitable, QuantizedCommCodecs
 from torchrec.fx.utils import fx_marker
+from torchrec.pt2.checks import (
+    is_non_strict_exporting,
+    is_torchdynamo_compiling,
+    pt2_checks_all_is_size,
+    pt2_checks_tensor_slice,
+)
 from torchrec.sparse.jagged_tensor import KeyedJaggedTensor
 
 try:
@@ -46,15 +52,6 @@
     pass
 
 
-try:
-    from torch.compiler import is_dynamo_compiling as is_torchdynamo_compiling
-
-except Exception:
-
-    def is_torchdynamo_compiling() -> bool:  # type: ignore[misc]
-        return False
-
-
 logger: logging.Logger = logging.getLogger()
 
 
diff --git a/torchrec/distributed/quant_embeddingbag.py b/torchrec/distributed/quant_embeddingbag.py
@@ -58,20 +58,14 @@
 )
 from torchrec.modules.embedding_modules import EmbeddingBagCollectionInterface
 from torchrec.modules.feature_processor_ import FeatureProcessorsCollection
+from torchrec.pt2.checks import is_torchdynamo_compiling
 from torchrec.quant.embedding_modules import (
     EmbeddingBagCollection as QuantEmbeddingBagCollection,
     FeatureProcessedEmbeddingBagCollection as QuantFeatureProcessedEmbeddingBagCollection,
     MODULE_ATTR_QUANT_STATE_DICT_SPLIT_SCALE_BIAS,
 )
 from torchrec.sparse.jagged_tensor import KeyedJaggedTensor, KeyedTensor
 
-try:
-    from torch._dynamo import is_compiling as is_torchdynamo_compiling
-except Exception:
-
-    def is_torchdynamo_compiling() -> bool:  # type: ignore[misc]
-        return False
-
 
 def get_device_from_parameter_sharding(ps: ParameterSharding) -> str:
     # pyre-ignore
diff --git a/torchrec/distributed/tests/test_pt2_multiprocess.py b/torchrec/distributed/tests/test_pt2_multiprocess.py
@@ -17,6 +17,7 @@
 
 import torch
 import torchrec
+import torchrec.pt2.checks
 from hypothesis import given, settings, strategies as st, Verbosity
 from torchrec.distributed.embedding import EmbeddingCollectionSharder
 from torchrec.distributed.embedding_types import EmbeddingComputeKernel
@@ -94,6 +95,11 @@ class _InputType(Enum):
     VARIABLE_BATCH = 2
 
 
+class _ConvertToVariableBatch(Enum):
+    FALSE = 0
+    TRUE = 1
+
+
 class EBCSharderFixedShardingType(EmbeddingBagCollectionSharder):
     def __init__(
         self,
@@ -333,6 +339,8 @@ def _test_compile_rank_fn(
         kjt_ft = kjt_for_pt2_tracing(kjt, convert_to_vb=convert_to_vb)
 
         torchrec.distributed.comm_ops.set_use_sync_collectives(True)
+        torchrec.pt2.checks.set_use_torchdynamo_compiling_path(True)
+
         dmp.train(True)
 
         eager_out = dmp(kjt_ft)
@@ -385,14 +393,14 @@ def disable_cuda_tf32(self) -> bool:
                     _ModelType.EBC,
                     ShardingType.TABLE_WISE.value,
                     _InputType.SINGLE_BATCH,
-                    True,
+                    _ConvertToVariableBatch.TRUE,
                     "eager",
                 ),
                 (
                     _ModelType.EBC,
                     ShardingType.COLUMN_WISE.value,
                     _InputType.SINGLE_BATCH,
-                    True,
+                    _ConvertToVariableBatch.TRUE,
                     "eager",
                 ),
             ]
@@ -406,7 +414,7 @@ def test_compile_multiprocess(
             _ModelType,
             str,
             _InputType,
-            bool,
+            _ConvertToVariableBatch,
             str,
         ],
     ) -> None:
@@ -421,6 +429,6 @@ def test_compile_multiprocess(
             sharding_type=sharding_type,
             kernel_type=kernel_type,
             input_type=input_type,
-            convert_to_vb=tovb,
+            convert_to_vb=tovb == _ConvertToVariableBatch.TRUE,
             torch_compile_backend=compile_backend,
         )
diff --git a/torchrec/distributed/train_pipeline/train_pipelines.py b/torchrec/distributed/train_pipeline/train_pipelines.py
@@ -49,17 +49,10 @@
     TrainPipelineContext,
 )
 from torchrec.distributed.types import Awaitable
+from torchrec.pt2.checks import is_torchdynamo_compiling
 from torchrec.streamable import Multistreamable
 
 
-try:
-    from torch._dynamo import is_compiling as is_torchdynamo_compiling
-except Exception:
-
-    def is_torchdynamo_compiling() -> bool:  # type: ignore[misc]
-        return False
-
-
 logger: logging.Logger = logging.getLogger(__name__)
 
 
diff --git a/torchrec/pt2/checks.py b/torchrec/pt2/checks.py
@@ -8,26 +8,42 @@
 # pyre-strict
 
 from typing import List
-
 import torch
 
+USE_TORCHDYNAMO_COMPILING_PATH: bool = False
+
+
+def set_use_torchdynamo_compiling_path(val: bool) -> None:
+    global USE_TORCHDYNAMO_COMPILING_PATH
+    USE_TORCHDYNAMO_COMPILING_PATH = val
+
+
+def get_use_torchdynamo_compiling_path() -> bool:
+    global USE_TORCHDYNAMO_COMPILING_PATH
+    return USE_TORCHDYNAMO_COMPILING_PATH
+
 
 try:
     if torch.jit.is_scripting():
         raise Exception()
 
     from torch.compiler import (
         is_compiling as is_compiler_compiling,
-        is_dynamo_compiling as is_torchdynamo_compiling,
+        is_dynamo_compiling as _is_torchdynamo_compiling,
     )
 
+    def is_torchdynamo_compiling() -> bool:
+        global USE_TORCHDYNAMO_COMPILING_PATH
+        return USE_TORCHDYNAMO_COMPILING_PATH or _is_torchdynamo_compiling()
+
     def is_non_strict_exporting() -> bool:
         return not is_torchdynamo_compiling() and is_compiler_compiling()
 
 except Exception:
     # BC for torch versions without compiler and torch deploy path
-    def is_torchdynamo_compiling() -> bool:  # type: ignore[misc]
-        return False
+    def is_torchdynamo_compiling() -> bool:
+        global USE_TORCHDYNAMO_COMPILING_PATH
+        return USE_TORCHDYNAMO_COMPILING_PATH
 
     def is_non_strict_exporting() -> bool:
         return False
diff --git a/torchrec/sparse/jagged_tensor.py b/torchrec/sparse/jagged_tensor.py
@@ -17,7 +17,12 @@
 from torch.autograd.profiler import record_function
 from torch.fx._pytree import register_pytree_flatten_spec, TreeSpec
 from torch.utils._pytree import GetAttrKey, KeyEntry, register_pytree_node
-from torchrec.pt2.checks import pt2_checks_all_is_size, pt2_checks_tensor_slice
+from torchrec.pt2.checks import (
+    is_non_strict_exporting,
+    is_torchdynamo_compiling,
+    pt2_checks_all_is_size,
+    pt2_checks_tensor_slice,
+)
 from torchrec.streamable import Pipelineable
 
 try:
@@ -38,26 +43,6 @@
 except ImportError:
     pass
 
-try:
-    if torch.jit.is_scripting():
-        raise Exception()
-
-    from torch.compiler import (
-        is_compiling as is_compiler_compiling,
-        is_dynamo_compiling as is_torchdynamo_compiling,
-    )
-
-    def is_non_strict_exporting() -> bool:
-        return not is_torchdynamo_compiling() and is_compiler_compiling()
-
-except Exception:
-    # BC for torch versions without compiler and torch deploy path
-    def is_torchdynamo_compiling() -> bool:  # type: ignore[misc]
-        return False
-
-    def is_non_strict_exporting() -> bool:
-        return False
-
 
 def _pin_and_move(tensor: torch.Tensor, device: torch.device) -> torch.Tensor:
     if is_torchdynamo_compiling():