Update on "Define the EXPERIMENTAL_PREFER_TRACING flag and the traceable option | feat(torchlib)"

justinchuby · justinchuby · commit b48a098c89f1 · 2023-11-27T22:14:24.000Z
As an effort described in #1095, this PR - adds an experimental `TORCHLIB_EXPERIMENTAL_PREFER_TRACING` flag to allow the tracer to trace a function when possible. - defined the `traceable` option in the torch_op decorator to mark a function as `traceable`. [ghstack-poisoned]
diff --git a/onnxscript/function_libs/torch_lib/graph_building.py b/onnxscript/function_libs/torch_lib/graph_building.py
@@ -816,6 +816,7 @@ def generate_function_value_info_proto(
                 continue
             if prefix:
                 name = f"{prefix}/{name}"
+            value_info.name = name
             named_value_info[name] = value_info
         for name, sub_graph in self._sub_torch_script_graphs.items():
             named_value_info.update(
diff --git a/onnxscript/function_libs/torch_lib/ops/core.py b/onnxscript/function_libs/torch_lib/ops/core.py
@@ -4576,9 +4576,10 @@ def aten_logaddexp(self: TFloatOrBFloat16, other: TFloatOrBFloat16) -> TFloatOrB
 @torch_op("aten::logaddexp2")
 def aten_logaddexp2(self: TFloatOrBFloat16, other: TFloatOrBFloat16) -> TFloatOrBFloat16:
     """logaddexp2(Tensor self, Tensor other) -> Tensor"""
-    summation = op.Add(op.Pow(2.0, self), op.Pow(2.0, other))
+    two = op.CastLike(2.0, self)
+    summation = op.Add(op.Pow(two, self), op.Pow(two, other))
 
-    return op.Div(op.Log(summation), op.CastLike(op.Log(2.0), self))
+    return op.Div(op.Log(summation), op.Log(two))
 
 
 @torch_op("aten::logcumsumexp")
@@ -4673,10 +4674,12 @@ def _aten_logit_onnx(self: TFloatOrBFloat16) -> TFloatOrBFloat16:
 
 @torch_op("aten::logit", private=True)
 def _aten_logit_clamp_onnx(self: TFloatOrBFloat16, eps: float) -> TFloatOrBFloat16:
-    temporary_self = op.Where(self <= 1.0 - eps, self, 1.0 - eps)
+    eps = op.CastLike(eps, self)
+    one = op.CastLike(1.0, self)
+    temporary_self = op.Where(self <= one - eps, self, one - eps)
     z = op.Where(temporary_self < eps, eps, temporary_self)
 
-    return op.Log(op.Div(z, op.Sub(1.0, z)))
+    return op.Log(op.Div(z, op.Sub(one, z)))
 
 
 @torch_op("aten::logit", trace_only=True)
diff --git a/onnxscript/function_libs/torch_lib/registration.py b/onnxscript/function_libs/torch_lib/registration.py
@@ -113,7 +113,18 @@ def torch_op(
         private: Whether the function is private (not directly exposed). It should
             be true for all functions with names starting with "_".
         complex: Whether the function expects complex-valued inputs.
-        traceable: Whether the function can be traced.
+        traceable: Whether the function can also be traced. This is an **experimental** flag.
+            A function is traceable if it can both be scripted and traced to produce
+            the same result for a given input. Specifically:
+
+            - A function _can_ be tagged with traceable if its if branches (if any)
+                can be statically evaluated.
+            - A function _should_ be tagged with traceable if it contains if branches
+                and/or CastLike nodes so that they can be evaluated away with the
+                EXPERIMENTAL_PREFER_TRACING on.
+            - A function without if branches or CastLike nodes _should not_ be tagged
+                with traceable because inlining will do the same thing.
+            - A function with `@graph` defined for a `Scan` op is not traceable yet.
     """
     if registry is None:
         registry = default_registry
diff --git a/onnxscript/tests/function_libs/torch_lib/extra_opinfo.py b/onnxscript/tests/function_libs/torch_lib/extra_opinfo.py
@@ -69,7 +69,7 @@ def sample_inputs_conv3d(op_info, device, dtype, requires_grad, **kwargs):
             (32,),
             {
                 "stride": (3, 3, 3),
-                "padding": 2,
+                "padding": (2, 2, 2),
                 "dilation": (1, 1, 1),
                 "groups": 1,
             },
@@ -1394,7 +1394,7 @@ def sample_inputs__native_batch_norm_legit_no_stats(
         supports_out=False,
     ),
     opinfo_core.OpInfo(
-        "nn.functional.conv3d",
+        "ops.aten.conv3d",
         aten_name="conv3d",
         dtypes=common_dtype.floating_and_complex_types_and(torch.int64, torch.bfloat16),
         sample_inputs_func=sample_inputs_conv3d,
diff --git a/onnxscript/tests/function_libs/torch_lib/ops_test_data.py b/onnxscript/tests/function_libs/torch_lib/ops_test_data.py
@@ -526,7 +526,7 @@ def _where_input_wrangler(
         core_ops.aten_addbmm,
         tolerance={torch.float32: (2e-5, 2e-5), torch.float16: (2e-1, 2e-2)},
     ),
-    TorchLibOpInfo("addcdiv", core_ops.aten_addcdiv),
+    TorchLibOpInfo("addcdiv", core_ops.aten_addcdiv, tolerance={torch.float16: (3e-2, 1e-3)}),
     TorchLibOpInfo("addcmul", core_ops.aten_addcmul, tolerance={torch.float16: (4e-3, 3e-3)}),
     TorchLibOpInfo("addmm", core_ops.aten_addmm)
     .xfail(
@@ -592,7 +592,7 @@ def _where_input_wrangler(
     TorchLibOpInfo("asin", core_ops.aten_asin),
     TorchLibOpInfo("asinh", core_ops.aten_asinh),
     TorchLibOpInfo("atan", core_ops.aten_atan),
-    TorchLibOpInfo("atan2", core_ops.aten_atan2),
+    TorchLibOpInfo("atan2", core_ops.aten_atan2, tolerance={torch.float16: (1e-3, 1e-3)}),
     TorchLibOpInfo("atanh", core_ops.aten_atanh),
     TorchLibOpInfo("atleast_1d", core_ops.aten_atleast_1d).skip(
         matcher=lambda sample: isinstance(sample.input, (list, tuple)),
@@ -737,7 +737,7 @@ def _where_input_wrangler(
     # TorchLibOpInfo("copy", core_ops.aten_copy),  # copy is not in OPS_DB
     TorchLibOpInfo("cos", core_ops.aten_cos),
     TorchLibOpInfo("cosh", core_ops.aten_cosh),
-    TorchLibOpInfo("cross", core_ops.aten_cross),
+    TorchLibOpInfo("cross", core_ops.aten_cross, tolerance={torch.float16: (6e-3, 3e-3)}),
     # TorchLibOpInfo("detach", core_ops.aten_detach),  # detach is not in OP-TEST-DB
     TorchLibOpInfo("diagonal", core_ops.aten_diagonal, trace_only=True),
     TorchLibOpInfo("diagonal_bool", core_ops.aten_diagonal_bool, trace_only=True),
@@ -920,8 +920,10 @@ def _where_input_wrangler(
         reason="fixme: LogSoftMax does not support empty tensor as input",
     ),
     TorchLibOpInfo("log2", core_ops.aten_log2),
-    TorchLibOpInfo("logaddexp", core_ops.aten_logaddexp),
-    TorchLibOpInfo("logaddexp2", core_ops.aten_logaddexp2),
+    TorchLibOpInfo("logaddexp", core_ops.aten_logaddexp, tolerance={torch.float16: (1, 1e-4)}),
+    TorchLibOpInfo(
+        "logaddexp2", core_ops.aten_logaddexp2, tolerance={torch.float16: (2e-2, 6e-4)}
+    ),
     TorchLibOpInfo(
         "logcumsumexp", core_ops.aten_logcumsumexp, tolerance={torch.float16: (1e-2, 1e-1)}
     ),
@@ -1087,10 +1089,16 @@ def _where_input_wrangler(
     TorchLibOpInfo(
         "nn.functional.adaptive_avg_pool1d",
         nn_ops.aten_adaptive_avg_pool1d,
-    ).xfail(
+    )
+    .xfail(
         # Shape should be [N, C, D1]
         matcher=lambda sample: sample.args[0] not in {1, (1,)},
         reason="only global pooling is supported; only batched inputs are supported",
+    )
+    .xfail(
+        reason="ORT fails on a cast node it inserts for float16. https://github.com/microsoft/onnxruntime/issues/16449",
+        dtypes=(torch.float16,),
+        test_class_name="TestOutputConsistencyEager",
     ),
     TorchLibOpInfo(
         "nn.functional.adaptive_avg_pool2d",
@@ -1718,7 +1726,9 @@ def _where_input_wrangler(
         dtypes=(torch.int64,),
         reason="fixme: ORT `LayerNormKernelImpl` not implemented for int64",
     ),
-    TorchLibOpInfo("logit", core_ops.aten_logit, trace_only=True),
+    TorchLibOpInfo(
+        "logit", core_ops.aten_logit, trace_only=True, tolerance={torch.float16: (1e-1, 7e-4)}
+    ),
     TorchLibOpInfo("max_dim", core_ops.aten_max_dim)
     .skip(
         variant_name="reduction_with_dim",
@@ -1869,7 +1879,7 @@ def _where_input_wrangler(
         reason="String padding is not accepted by aten::conv2d",
     ),
     TorchLibOpInfo(
-        "nn.functional.conv3d",
+        "ops.aten.conv3d",
         core_ops.aten_conv3d,
         trace_only=True,
         tolerance={torch.float32: (3.7e-5, 1.8e-4)},
@@ -1974,6 +1984,16 @@ def _where_input_wrangler(
     .skip(
         matcher=lambda sample: sample.kwargs.get("dropout_p") != 0.0,
         reason="dropout is random so the results do not match",
+    )
+    .xfail(
+        dtypes=(torch.float16,),
+        reason="fixme: ORT failed. https://github.com/microsoft/onnxruntime/issues/16438",
+        test_class_name="TestOutputConsistencyFullGraph",
+    )
+    .xfail(
+        reason="fixme: ORT fails on type mismatch in Add",
+        dtypes=(torch.float16,),
+        test_class_name="TestOutputConsistencyEager",
     ),
     TorchLibOpInfo(
         "ops.aten._scaled_dot_product_flash_attention",
@@ -2000,6 +2020,16 @@ def _where_input_wrangler(
     .skip(
         matcher=lambda sample: sample.kwargs.get("dropout_p") != 0.0,
         reason="dropout is random so the results do not match",
+    )
+    .xfail(
+        dtypes=(torch.float16,),
+        reason="fixme: ORT failed. https://github.com/microsoft/onnxruntime/issues/16438",
+        test_class_name="TestOutputConsistencyFullGraph",
+    )
+    .xfail(
+        reason="fixme: ORT fails on type mismatch in Add",
+        dtypes=(torch.float16,),
+        test_class_name="TestOutputConsistencyEager",
     ),
     TorchLibOpInfo(
         "nn.functional.upsample_bilinear2d",