Skip to content

[IR] Handle ONNX custom types in DataType.from_numpy #2131

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 26 additions & 3 deletions onnxscript/ir/_enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,32 @@
Raises:
TypeError: If the data type is not supported by ONNX.
"""
if dtype not in _NP_TYPE_TO_DATA_TYPE:
raise TypeError(f"Unsupported numpy data type: {dtype}")
return cls(_NP_TYPE_TO_DATA_TYPE[dtype])
if dtype in _NP_TYPE_TO_DATA_TYPE:
return cls(_NP_TYPE_TO_DATA_TYPE[dtype])

if np.issubdtype(dtype, np.str_):
return DataType.STRING

# Special cases for handling custom dtypes defined in ONNX (as of onnx 1.18)
# Ref: https://github.com/onnx/onnx/blob/2d42b6a60a52e925e57c422593e88cc51890f58a/onnx/_custom_element_types.py
if hasattr(dtype, "names"):
if dtype.names == ("bfloat16",):
return DataType.BFLOAT16
if dtype.names == ("e4m3fn",):
return DataType.FLOAT8E4M3FN
if dtype.names == ("e4m3fnuz",):
return DataType.FLOAT8E4M3FNUZ
if dtype.names == ("e5m2",):
return DataType.FLOAT8E5M2
if dtype.names == ("e5m2fnuz",):
return DataType.FLOAT8E5M2FNUZ
if dtype.names == ("uint4",):
return DataType.UINT4
if dtype.names == ("int4",):
return DataType.INT4
if dtype.names == ("float4e2m1",):
return DataType.FLOAT4E2M1
raise TypeError(f"Unsupported numpy data type: {dtype}")

Check warning on line 101 in onnxscript/ir/_enums.py

View check run for this annotation

Codecov / codecov/patch

onnxscript/ir/_enums.py#L100-L101

Added lines #L100 - L101 were not covered by tests

@property
def itemsize(self) -> float:
Expand Down
77 changes: 74 additions & 3 deletions onnxscript/ir/_enums_test.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# pylint: disable=protected-access
import unittest

import ml_dtypes
import numpy as np
import onnx
import onnx._custom_element_types
import parameterized

from onnxscript.ir import _enums

Expand Down Expand Up @@ -36,9 +40,76 @@ def test_enums_are_the_same_as_spec(self):
self.assertEqual(_enums.DataType.FLOAT4E2M1, onnx.TensorProto.FLOAT4E2M1)
self.assertEqual(_enums.DataType.UNDEFINED, onnx.TensorProto.UNDEFINED)

def test_from_numpy_takes_np_dtype_and_returns_data_type(self):
array = np.array([], dtype=np.float64)
self.assertEqual(_enums.DataType.from_numpy(array.dtype), _enums.DataType.DOUBLE)
@parameterized.parameterized.expand(
[
("string", np.array("some_string").dtype, _enums.DataType.STRING),
("float64", np.dtype(np.float64), _enums.DataType.DOUBLE),
("float32", np.dtype(np.float32), _enums.DataType.FLOAT),
("float16", np.dtype(np.float16), _enums.DataType.FLOAT16),
("int32", np.dtype(np.int32), _enums.DataType.INT32),
("int16", np.dtype(np.int16), _enums.DataType.INT16),
("int8", np.dtype(np.int8), _enums.DataType.INT8),
("int64", np.dtype(np.int64), _enums.DataType.INT64),
("uint8", np.dtype(np.uint8), _enums.DataType.UINT8),
("uint16", np.dtype(np.uint16), _enums.DataType.UINT16),
("uint32", np.dtype(np.uint32), _enums.DataType.UINT32),
("uint64", np.dtype(np.uint64), _enums.DataType.UINT64),
("bool", np.dtype(np.bool_), _enums.DataType.BOOL),
("complex64", np.dtype(np.complex64), _enums.DataType.COMPLEX64),
("complex128", np.dtype(np.complex128), _enums.DataType.COMPLEX128),
("bfloat16", np.dtype(ml_dtypes.bfloat16), _enums.DataType.BFLOAT16),
("float8e4m3fn", np.dtype(ml_dtypes.float8_e4m3fn), _enums.DataType.FLOAT8E4M3FN),
(
"float8e4m3fnuz",
np.dtype(ml_dtypes.float8_e4m3fnuz),
_enums.DataType.FLOAT8E4M3FNUZ,
),
("float8e5m2", np.dtype(ml_dtypes.float8_e5m2), _enums.DataType.FLOAT8E5M2),
(
"float8e5m2fnuz",
np.dtype(ml_dtypes.float8_e5m2fnuz),
_enums.DataType.FLOAT8E5M2FNUZ,
),
("uint4", np.dtype(ml_dtypes.uint4), _enums.DataType.UINT4),
("int4", np.dtype(ml_dtypes.int4), _enums.DataType.INT4),
("float4e2m1", np.dtype(ml_dtypes.float4_e2m1fn), _enums.DataType.FLOAT4E2M1),
(
"onnx_ref_bfloat16",
onnx._custom_element_types.bfloat16,
_enums.DataType.BFLOAT16,
),
(
"onnx_ref_float8e4m3fn",
onnx._custom_element_types.float8e4m3fn,
_enums.DataType.FLOAT8E4M3FN,
),
(
"onnx_ref_float8e4m3fnuz",
onnx._custom_element_types.float8e4m3fnuz,
_enums.DataType.FLOAT8E4M3FNUZ,
),
(
"onnx_ref_float8e5m2",
onnx._custom_element_types.float8e5m2,
_enums.DataType.FLOAT8E5M2,
),
(
"onnx_ref_float8e5m2fnuz",
onnx._custom_element_types.float8e5m2fnuz,
_enums.DataType.FLOAT8E5M2FNUZ,
),
(
"onnx_ref_uint4",
onnx._custom_element_types.uint4,
_enums.DataType.UINT4,
),
("onnx_ref_int4", onnx._custom_element_types.int4, _enums.DataType.INT4),
]
)
def test_from_numpy_takes_np_dtype_and_returns_data_type(
self, _: str, np_dtype: np.dtype, onnx_type: _enums.DataType
):
self.assertEqual(_enums.DataType.from_numpy(np_dtype), onnx_type)

def test_numpy_returns_np_dtype(self):
self.assertEqual(_enums.DataType.DOUBLE.numpy(), np.dtype(np.float64))
Expand Down
Loading