Update base for Update on "[Experimental] Convert CastLike to Cast when dtype is available | feat(torchlib)"

justinchuby · justinchuby · commit ccfe90de7cf2 · 2023-11-22T20:14:25.000Z
[ghstack-poisoned]
diff --git a/onnxscript/function_libs/torch_lib/graph_building.py b/onnxscript/function_libs/torch_lib/graph_building.py
@@ -90,11 +90,14 @@ def _rename_intermediate_value(name: str) -> str:
 class TorchScriptTensor(onnxscript_tensor.Tensor):
     """A onnxscript tensor that wraps a torchscript Value."""
 
-    def __init__(self, value: torch.Value):
+    def __init__(
+        self,
+        value: torch.Value,
+    ):
         super().__init__(None)
         self._torch_value: torch.Value = value
         self._concrete_value: Optional[np.ndarray] = None
-        self._shape: Optional[Tuple[int | None, ...]] = None
+        self._shape: Optional[Tuple[int | str | None, ...]] = None
         self._torch_dtype: Optional[torch.dtype] = None
         self._name: Optional[str] = None
         self._is_complex: bool = False
@@ -125,14 +128,17 @@ def name(self, name: str):
 
     @property  # type: ignore[override]
     def rank(self) -> int | None:
+        if self._shape is not None:
+            return len(self._shape)
+
         value_type = self._torch_value.type()
         if value_type is None:
             return None
         value_type = typing.cast(torch.TensorType, value_type)
         return value_type.dim()
 
     @property  # type: ignore[override]
-    def shape(self) -> Tuple[int | None, ...] | None:
+    def shape(self) -> Tuple[int | str | None, ...] | None:
         if self._shape is not None:
             return self._shape
 
@@ -149,9 +155,17 @@ def shape(self) -> Tuple[int | None, ...] | None:
         return tuple(shape)
 
     @shape.setter
-    def shape(self, shape: Tuple[int | None, ...]):
-        self._shape = shape
-        self._torch_value.setType(self._torch_value.type().with_sizes(list(shape)))
+    def shape(self, shape: Union[torch.Size, Tuple[int | str | None, ...]]):
+        # Normalize torch symbolic dimension size to str.
+        torch_sym_types = (torch.SymInt, torch.SymFloat, torch.SymBool)
+        self._shape = tuple(
+            str(dim.node) if isinstance(dim, torch_sym_types) else dim  # type: ignore[union-attr]
+            for dim in shape
+        )
+        # jit api does not support assigning symbolic shapes,
+        # hence symbols are replaced as None.
+        jit_shape = tuple(dim if isinstance(dim, int) else None for dim in shape)
+        self._torch_value.setType(self._torch_value.type().with_sizes(list(jit_shape)))
 
     @property  # type: ignore[override]
     def dtype(self) -> torch.dtype | None:
@@ -195,6 +209,15 @@ def symbolic_value(self) -> torch.Value:
         """The symbolic Value in torch.Graph."""
         return self._torch_value
 
+    def value_info(self) -> Optional[onnx.ValueInfoProto]:
+        try:
+            dtype = self.onnx_dtype.value
+        except torch.onnx.errors.OnnxExporterError:
+            return None
+        if dtype == onnx.TensorProto.UNDEFINED:
+            return None
+        return onnx.helper.make_tensor_value_info(self.name, dtype, self.shape)
+
 
 @runtime_typing.checked
 def _unwrap_tensor_to_torch_value(
@@ -223,7 +246,12 @@ def _unwrap_tensor_to_torch_value(
 
 @runtime_typing.checked
 def _wrap_torch_value_to_tensor(
-    value: Union[torch.Value, Mapping[str, ValidTorchValueType], Sequence[ValidTorchValueType]]
+    value: Union[
+        torch.Value, Mapping[str, ValidTorchValueType], Sequence[ValidTorchValueType]
+    ],
+    *,
+    shape: Optional[Union[torch.Size, Tuple[Union[int, str, None], ...]]] = None,
+    dtype: Optional[torch.dtype] = None,
 ) -> Union[
     ValidArgumentType,
     Dict[str, ValidArgumentType],
@@ -232,7 +260,12 @@ def _wrap_torch_value_to_tensor(
 ]:
     """Wrap torch.Value to TorchScriptTensor."""
     if isinstance(value, torch.Value):
-        return TorchScriptTensor(value)
+        tensor = TorchScriptTensor(value)
+        if shape is not None:
+            tensor.shape = shape
+        if dtype is not None:
+            tensor.dtype = dtype
+        return tensor
     if isinstance(value, dict):
         return {k: _wrap_torch_value_to_tensor(v) for k, v in value.items()}  # type: ignore[misc,return-value]
     if isinstance(value, list):
@@ -444,6 +477,16 @@ def __init__(
         self._parent_torch_script_graph = parent_torch_script_graph
         # Domain name of the graph. None if this is the top level graph.
         self._domain_name: Optional[str] = domain_name
+        # Mapping from `torch.Value` to `TorchScriptTensor`.
+        # Because `torch.Value` does not provide API to set and retrieve symbolic shapes,
+        # and because `TorchScriptTensor` is not accessible through the `torch.Graph` graph,
+        # this mapping is used to keep track of the `TorchScriptTensor` associated with
+        # `torch.Value`.
+        # `TorchScriptTensor` records dtype and symbolic shapes.
+        # This info is later serialized as `ValueInfoProto` inside ONNX, to
+        # provide shape and dtype information for nodes within nested function calls.
+        # https://github.com/onnx/onnx/issues/5487
+        self._value_to_tensor: Dict[torch.Value, TorchScriptTensor] = {}
 
         if self._domain_name is None and self._parent_torch_script_graph is not None:
             raise RuntimeError(
@@ -486,7 +529,7 @@ def domain_name(self) -> Optional[str]:
     def add_input(
         self,
         input_name: Optional[str],
-        shape: Optional[Union[torch.Size, Sequence[Union[int, str, None]]]] = None,
+        shape: Optional[Union[torch.Size, Tuple[Union[int, str, None], ...]]] = None,
         dtype: Optional[torch.dtype] = None,
     ) -> TorchScriptTensor:
         if input_name is None:
@@ -507,7 +550,11 @@ def add_input(
                     [dim if isinstance(dim, int) else None for dim in shape]  # type: ignore[union-attr]
                 )
             )
-        tensor_value = _wrap_torch_value_to_tensor(torch_value)
+        tensor_value = _wrap_torch_value_to_tensor(torch_value, shape=shape, dtype=dtype)
+        if isinstance(tensor_value, TorchScriptTensor):
+            # NOTE: Only track value that maps to tensor.
+            # Value that maps to Sequence/Dict of tensors is not tracked.
+            self._value_to_tensor[torch_value] = tensor_value
         return tensor_value  # type: ignore[return-value]
 
     @runtime_typing.checked
@@ -531,16 +578,16 @@ def add_initializer(self, name: str, value: torch.Tensor) -> TorchScriptTensor:
             self._initializers_inputs_from_parent[
                 name
             ] = self._parent_torch_script_graph.add_initializer(name, value)
-            torch_value = self._torch_graph.addInput(name)
-            torch_value.setType(torch.TensorType.create_from_tensor(value))
-            tensor_value = _wrap_torch_value_to_tensor(torch_value)
-            self._initializers_inputs[name] = tensor_value  # type: ignore[assignment]
-            return tensor_value  # type: ignore[return-value]
+        else:
+            self._initializers[name] = value
 
-        self._initializers[name] = value
         torch_value = self._torch_graph.addInput(name)
         torch_value.setType(torch.TensorType.create_from_tensor(value))
-        tensor_value = _wrap_torch_value_to_tensor(torch_value)
+        tensor_value = _wrap_torch_value_to_tensor(
+            torch_value, shape=value.shape, dtype=value.dtype
+        )
+        if isinstance(tensor_value, TorchScriptTensor):
+            self._value_to_tensor[torch_value] = tensor_value
         self._initializers_inputs[name] = tensor_value  # type: ignore[assignment]
         return tensor_value  # type: ignore[return-value]
 
@@ -640,11 +687,16 @@ def _add_torchscript_op_call(
             n_outputs=n_outputs,
         )
         assert result, "Expected at least one output from ONNX op call."
+        # NOTE: TorchScriptTensor is created here, however neither dtype nor shape is
+        # set. It is expected that exporter will modify the tensor being returned and
+        # set these info.
         if len(result) == 1:
             tensor = TorchScriptTensor(result[0])
             tensor.name = _rename_intermediate_value(tensor.name)
+            self._value_to_tensor[result[0]] = tensor
             return tensor
         tensors = tuple(TorchScriptTensor(v) for v in result)
+        self._value_to_tensor.update(dict(zip(result, tensors)))
         for tensor in tensors:
             tensor.name = _rename_intermediate_value(tensor.name)
         return tensors
@@ -679,6 +731,54 @@ def fetch_function_proto_dict(
             function_proto_dict[name_domain] = function.to_function_proto()
         return function_proto_dict
 
+    @runtime_typing.checked
+    def _override_with_symbolic_value_info_proto(self, onnx_model: onnx.ModelProto):
+        existing_value_info = {info.name: info for info in onnx_model.graph.value_info}
+
+        # Override value_info for top level graph inputs.
+        for input in self.torch_graph.inputs():
+            if input not in self._value_to_tensor:
+                raise RuntimeError(f"Input '{input.debugName()}' has no type.")
+            tensor = self._value_to_tensor[input]
+            if (value_info := tensor.value_info()) is None:
+                continue
+            for i, input_info in enumerate(onnx_model.graph.input):
+                if input_info.name == input.debugName():
+                    onnx_model.graph.input.insert(i, value_info)
+                    onnx_model.graph.input.remove(input_info)
+                    break
+
+        # Override value_info for top level graph outputs.
+        for output in self.torch_graph.outputs():
+            if output not in self._value_to_tensor:
+                raise RuntimeError(f"Output '{output.debugName()}' has no type.")
+            tensor = self._value_to_tensor[output]
+            if (value_info := tensor.value_info()) is None:
+                continue
+            for i, output_info in enumerate(onnx_model.graph.output):
+                if output_info.name == output.debugName():
+                    onnx_model.graph.output.insert(i, value_info)
+                    onnx_model.graph.output.remove(output_info)
+                    break
+
+        # Remove existing static/incomplete value info.
+        del onnx_model.graph.value_info[:]
+
+        # Insert value info for nodes within nested function calls.
+        # NOTE: This is an experimental feature, since in official ONNX spec, nodes
+        # within FunctionProto to have value info. https://github.com/onnx/onnx/issues/5487
+        # The names for value info are generated uniquely to be retrievable based on
+        # the call site and call stack.
+        # The naming strategy is subject to change. Since all local functions representing
+        # nn.Modules exported by dynamo exporter have unique call sites, their function
+        # op_type name can serve to form the unique identifier for value info.
+        function_value_infos = self.generate_function_value_info_proto()
+        # Override existing value info for nodes in top level graph.
+        existing_value_info.update(function_value_infos)
+        onnx_model.graph.value_info.extend(existing_value_info.values())
+
+        return onnx_model
+
     @runtime_typing.checked
     def add_op_call(
         self,
@@ -737,6 +837,39 @@ def add_module_call(
             n_outputs=sub_torch_script_graph.num_outputs,
         )
 
+    @runtime_typing.checked
+    def generate_function_value_info_proto(
+        self, prefix: str = ""
+    ) -> Mapping[str, onnx.ValueInfoProto]:
+        """Unique naming strategies
+
+            {function1_op_type}/{function2_op_type}/.../{value_name}
+
+        As long as function op_type has unique call site, this is safe.
+
+        Preferably, the following is better
+
+            {node1_name}/{node2_name}/.../{value_name}
+
+        However, node name is an optional field generated on the fly during torchscript
+        graph serialization to onnx model proto. Such info is not retrievable at this point.
+        """
+        named_value_info = {}
+        for torch_value, tensor in self._value_to_tensor.items():
+            name = torch_value.debugName()
+            if (value_info := tensor.value_info()) is None:
+                continue
+            if prefix:
+                name = f"{prefix}/{name}"
+            named_value_info[name] = value_info
+        for name, sub_graph in self._sub_torch_script_graphs.items():
+            named_value_info.update(
+                sub_graph.generate_function_value_info_proto(
+                    f"{prefix}/{name}" if prefix else name
+                )
+            )
+        return named_value_info
+
     @runtime_typing.checked
     def to_function_proto(self, opset_version: int, function_name: str) -> onnx.FunctionProto:
         assert len(self.initializers) == 0, "Model local functions cannot have initializers."
@@ -846,6 +979,9 @@ def to_model_proto(
         onnx_model.functions.extend(function_proto_dict.values())
         onnx_model.functions.extend(_shared_functions())
 
+        # Override value_infos with symbolic shapes.
+        onnx_model = self._override_with_symbolic_value_info_proto(onnx_model)
+
         # `_export_onnx` only exports opset_imports that is visible to it. It does not
         # export opset_imports for nested functions, since it does not have access to
         # them. We manually add them back and merge with existing opset_imports in the
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,6 @@ name = "onnxscript"
 dynamic = ["version"]
 description = "Naturally author ONNX functions and models using a subset of Python"
 authors = [{ name = "Microsoft Corporation", email = "onnx@microsoft.com" }]
-urls = { "Repository" = "https://github.com/microsoft/onnxscript" }
 readme = "README.md"
 requires-python = ">=3.8"
 license = { file = "LICENSE" }
diff --git a/setup.py b/setup.py
@@ -7,6 +7,7 @@
 import datetime
 import os
 import pathlib
+import subprocess
 
 import setuptools
 
@@ -15,9 +16,20 @@
 VERSION_FILE = ROOT_DIR / "VERSION"
 version = VERSION_FILE.read_text().strip()
 
+project_urls = {
+    "Repository": "https://github.com/microsoft/onnxscript",
+}
 if os.environ.get("ONNX_SCRIPT_RELEASE") != "1":
     date = datetime.date.today().strftime("%Y%m%d")
     version = f"{version}.dev{date}"
 
+    commit_hash_cmd = subprocess.run(
+        ["git", "rev-parse", "HEAD"], stdout=subprocess.PIPE, check=False
+    )
+    if commit_hash_cmd.returncode == 0:
+        project_urls[
+            "Commit"
+        ] = f"https://github.com/microsoft/onnxscript/tree/{commit_hash_cmd.stdout.decode('utf-8').strip()}"
+
 # NOTE: Do not include other metadata in setup.py. Put it in pyproject.toml.
-setuptools.setup(version=version)
+setuptools.setup(version=version, project_urls=project_urls, url="https://onnxscript.ai/")