Move more utils to TorchAOBaseTensor (pytorch#784)

jerryzh168 · jerryzh168 · commit d5aed60e340e · 2024-09-04T10:57:42.000-07:00
* Move more utils to TorchAOBaseTensor

Summary:
This moves over _implements, _dispatch__torch_dispatch__, _dispatch__torch_function__, _register_layout_cls and _get_layout_tensor_constructor
to `TorchAOBaseTensor` so when people inherit from this, they can get these utils directly

Test Plan:
python test/quantization/test_quant_api.py
python test/integration/test_integration.py

rely on CI for other tests

Reviewers:

Subscribers:

Tasks:

Tags:
diff --git a/test/prototype/test_low_bit_optim.py b/test/prototype/test_low_bit_optim.py
@@ -75,7 +75,7 @@ def test_quantize_4bit_with_qmap_compile(self, device):
 
 
 class TestOptim(TestCase):
-    @pytest.mark.skipif(not TORCH_VERSION_AT_LEAST_2_3, reason="requires PyTorch >= 2.3")
+    @pytest.mark.skipif(not TORCH_VERSION_AT_LEAST_2_4, reason="requires PyTorch >= 2.3")
     @parametrize("optim_name", ["Adam8bit", "AdamW8bit", "Adam4bit", "AdamW4bit", "AdamFp8", "AdamWFp8"])
     @parametrize("dtype", [torch.float32, torch.bfloat16])
     @parametrize("device", _DEVICES)
diff --git a/torchao/dtypes/affine_quantized_tensor.py b/torchao/dtypes/affine_quantized_tensor.py
@@ -21,11 +21,6 @@
 )
 from torch.utils._python_dispatch import return_and_correct_aliasing
 from torchao.dtypes.utils import (
-    _implements,
-    _dispatch__torch_function__,
-    _dispatch__torch_dispatch__,
-    _register_layout_cls,
-    _get_layout_tensor_constructor,
     LayoutType,
     PlainLayoutType,
     is_device,
@@ -405,7 +400,8 @@ def _apply_fn_to_data(self, fn):
             strides=self.stride(),
         )
 
-    implements = classmethod(_implements)
+    # following are the comments for __torch_function__/__torch_dispatch__, we can clean this up
+    # a bit later
     # Note: we only added cpu path here for 8da4w, this is for executorch, in the future
     # 1. we'll add cpu/cuda version (int4mm etc.)
     # 2. we'll need to hide the 8da4w executorch version under things like layouts (we also have multiple impl for cpu kernel as Michael mentioned), so it will be something like
@@ -417,19 +413,13 @@ def _apply_fn_to_data(self, fn):
     # 1 - when tensor is on CUDA: we'll add this later, we'll also enable dispatching to optimized
     #     kernels in CPU as well, see the note above
     # 2 - we're given non-floats - quantizing long to int8 is crazy
-    __torch_dispatch__ = classmethod(_dispatch__torch_dispatch__)
-    __torch_function__ = classmethod(_dispatch__torch_function__)
 
 
 ######################################################
 # LayoutType and Layout Tensor Subclass Registration #
 ######################################################
-
-def register_layout_cls(layout_type_class: type(LayoutType)):
-    return _register_layout_cls(AffineQuantizedTensor, layout_type_class)
-
-def get_layout_tensor_constructor(layout_type_class: type(LayoutType)):
-    return _get_layout_tensor_constructor(AffineQuantizedTensor, layout_type_class)
+register_layout_cls = AffineQuantizedTensor.register_layout_cls
+get_layout_tensor_constructor = AffineQuantizedTensor.get_layout_tensor_constructor
 
 @dataclass(frozen=True)
 class SemiSparseLayoutType(LayoutType):
diff --git a/torchao/dtypes/fpx/fpx.py b/torchao/dtypes/fpx/fpx.py
@@ -8,14 +8,10 @@
 from torchao.ops import quant_llm_linear
 from torchao.dtypes.utils import (
     LayoutType,
-    _implements,
-    _dispatch__torch_function__,
-    _dispatch__torch_dispatch__,
 )
 from torchao.quantization.quant_api import _get_linear_subclass_inserter
 from dataclasses import dataclass
 from torchao.dtypes.affine_quantized_tensor import AQTLayout, register_layout_cls
-from torchao.utils import TorchAOBaseTensor
 
 
 aten = torch.ops.aten
diff --git a/torchao/dtypes/uintx/Uintx.py b/torchao/dtypes/uintx/Uintx.py
@@ -6,10 +6,8 @@
 from .bitpacking import pack, unpack
 from torchao.dtypes.utils import (
     LayoutType,
-    _implements,
-    _dispatch__torch_function__,
-    _dispatch__torch_dispatch__,
 )
+from torchao.utils import TorchAOBaseTensor
 from torchao.dtypes.affine_quantized_tensor import PlainAQTLayout, register_layout_cls
 from torchao.utils import TORCH_VERSION_AT_LEAST_2_3
 
@@ -35,7 +33,7 @@
     print("uintx feature need torch 2.3+, please upgrade pytorch")
 
 
-class UintxTensor(torch.Tensor):
+class UintxTensor(TorchAOBaseTensor):
     """
     Splits int data into packed shards based on bit size
     fields:
@@ -99,10 +97,6 @@ def __tensor_unflatten__(
         packed_shape, bit_width, pack_dim = tensor_attributes
         return cls(shards, packed_shape, bit_width, pack_dim)
 
-    implements = classmethod(_implements)
-    __torch_dispatch__ = classmethod(_dispatch__torch_dispatch__)
-    __torch_function__ = classmethod(_dispatch__torch_function__)
-
     def get_plain(self):
         return unpack(self.get_shards(), self.bit_width, dim = self.pack_dim)
 
diff --git a/torchao/dtypes/utils.py b/torchao/dtypes/utils.py
@@ -1,75 +1,6 @@
 import torch
-from typing import Dict, Callable, Union, Tuple, Optional
-from collections import defaultdict
-import functools
+from typing import Union, Tuple
 from dataclasses import dataclass
-from torchao.utils import TORCH_VERSION_AT_LEAST_2_5
-
-"""
-Helper function for implementing aten op or torch function dispatch
-and dispatching to these implementations.
-"""
-def _implements(cls, aten_ops_or_torch_fns):
-    """Use this decorator to implement a function for an aten ops in __torch_dispatch__
-    (if user passed in a list of ops)
-    or torch function in __torch_function__ (if user passed in a single object)
-
-    class MyTensor(torch.Tensor):
-        ...
-        implements = classmethod(_implements)
-
-    implements = MyTensor.implements
-
-    @implements(torch.nn.functional.linear):
-    def _(func, types, args, kwargs):
-        ...
-
-    """
-    if not hasattr(cls, "_ATEN_OP_OR_TORCH_FN_TABLE"):
-        cls._ATEN_OP_OR_TORCH_FN_TABLE = {}
-
-    if not isinstance(aten_ops_or_torch_fns, (list, tuple)):
-        aten_ops_or_torch_fns = [aten_ops_or_torch_fns]
-    def decorator(func):
-        for op in aten_ops_or_torch_fns:
-            @functools.wraps(op)
-            def wrapper(f, types, args, kwargs):
-                return func(f, types, args, kwargs)
-
-            cls._ATEN_OP_OR_TORCH_FN_TABLE[op] = wrapper
-        return func
-    return decorator
-
-def _dispatch__torch_function__(cls, func, types, args=(), kwargs=None):
-    """Use this util function for a common `__torch_function__` implementation
-    that dispatches to ops/functions registered with `_implements`
-
-    class MyTensor(torch.Tensor):
-        ...
-        __torch_function__ = classmethod(_dispatch__torch_function__)
-    """
-    kwargs = {} if kwargs is None else kwargs
-    if hasattr(cls, "_ATEN_OP_OR_TORCH_FN_TABLE") and \
-       func in cls._ATEN_OP_OR_TORCH_FN_TABLE:
-        return cls._ATEN_OP_OR_TORCH_FN_TABLE[func](func, types, args, kwargs)
-
-    with torch._C.DisableTorchFunctionSubclass():
-        return func(*args, **kwargs)
-
-def _dispatch__torch_dispatch__(cls, func, types, args, kwargs):
-    """Use this util function for a common `__torch_dispatch__` implementation
-    that dispatches to ops/functions registered with `_implements`
-
-    class MyTensor(torch.Tensor):
-        ...
-        __torch_dispatch__ = classmethod(_dispatch__torch_dispatch__)
-    """
-    if hasattr(cls, "_ATEN_OP_OR_TORCH_FN_TABLE") and \
-       func in cls._ATEN_OP_OR_TORCH_FN_TABLE:
-        return cls._ATEN_OP_OR_TORCH_FN_TABLE[func](func, types, args, kwargs)
-
-    raise NotImplementedError(f"{cls.__name__} dispatch: attempting to run unimplemented operator/function: {func}")
-
 
 """
 Base class for different LayoutType, should not be instantiated directly
@@ -101,52 +32,6 @@ def extra_repr(self) -> str:
 class PlainLayoutType(LayoutType):
     pass
 
-"""
-layout tensor constructor registration for different tensor subclassesa
-
-first key is a tensor subclass type like AffineQuantizedTensor
-second key is an extended layout string, like tensor_core_tiled
-value is a constructor for the LayoutTensor class, e.g. TensorCoreTiledAQTLayout.from_plain
-"""
-_LAYOUT_CONSTRUCTOR_TABLE: Dict[Callable, Dict[type(LayoutType), Callable]] = defaultdict(dict)
-
-def _register_layout_cls(cls: Callable, layout_type_class: type(LayoutType)):
-    """Helper function for layout registrations, this is used to implement
-    register_layout_cls decorator for each tensor subclass, see aqt.py for example usage
-
-    Args:
-        cls: Tensor subclass type
-        layout_type_class: the class type of subclass of `LayoutType`, e.g. `PlainLayoutType`
-
-    Returns:
-        a decorator that registers the layout tensor constructor in the table
-    """
-    def decorator(layout_cls):
-        _LAYOUT_CONSTRUCTOR_TABLE[cls][layout_type_class] = layout_cls.from_plain
-        if TORCH_VERSION_AT_LEAST_2_5:
-            # Allow serialization to work for models uses this layout tensor subclass
-            torch.serialization.add_safe_globals([layout_type_class, layout_cls])
-        return layout_cls
-    return decorator
-
-def _get_layout_tensor_constructor(cls: Callable, layout_type_class: type(LayoutType)) -> Callable:
-    """Get Layout class constructor (LayoutClass.from_plain) for `cls` based on `layout_type_class`
-    `layout_type_class` means the class type of subclass of `LayoutType`, e.g. `PlainLayoutType`
-
-    Args:
-        cls: Tensor subclass type
-        layout_type_class: the class type of subclass of `LayoutType`, e.g. `PlainLayoutType`
-
-    Returns:
-        layout tensor subclass constructor for the layout_type_class
-    """
-    if cls not in _LAYOUT_CONSTRUCTOR_TABLE:
-        raise ValueError(f"no registered layout class constructor for: {cls}")
-    if layout_type_class not in _LAYOUT_CONSTRUCTOR_TABLE[cls]:
-        raise ValueError(f"layout_name: {layout_type_class} is not supported yet for {cls}")
-
-    return _LAYOUT_CONSTRUCTOR_TABLE[cls][layout_type_class]
-
 def is_device(target_device_str: str, device: Union[str, torch.device]):
     return torch.device(device).type == target_device_str
 
diff --git a/torchao/prototype/low_bit_optim/subclass_4bit.py b/torchao/prototype/low_bit_optim/subclass_4bit.py
@@ -2,7 +2,7 @@
 
 import torch
 from torch import Tensor
-from torchao.dtypes.utils import _implements, _dispatch__torch_dispatch__
+from torchao.utils import TorchAOBaseTensor
 
 from .quant_utils import create_dynamic_map, scale_tensor, quantize_4bit_with_qmap, dequant_with_qmap
 
@@ -18,8 +18,7 @@
 QMAP_UNSIGNED = torch.linspace(0, 1, 17)[1:].tolist()  # no zero
 
 
-class OptimState4bit(Tensor):
-    implements = classmethod(_implements)
+class OptimState4bit(TorchAOBaseTensor):
     tensor_attrs = ["codes", "scale", "qmap"]
 
     @staticmethod
@@ -80,8 +79,6 @@ def __repr__(self):
             f"shape={tuple(self.shape)}, device={self.device}, requires_grad={self.requires_grad})"
         )
 
-    __torch_dispatch__ = classmethod(_dispatch__torch_dispatch__)
-
 
 @OptimState4bit.implements(aten.copy_.default)
 def _(func, types, args, kwargs):
diff --git a/torchao/prototype/low_bit_optim/subclass_8bit.py b/torchao/prototype/low_bit_optim/subclass_8bit.py
@@ -1,6 +1,6 @@
 import torch
 from torch import Tensor
-from torchao.dtypes.utils import _implements, _dispatch__torch_dispatch__
+from torchao.utils import TorchAOBaseTensor
 
 from .quant_utils import create_dynamic_map, scale_tensor, quantize_8bit_with_qmap, dequant_with_qmap
 
@@ -13,8 +13,7 @@
 QMAP_UNSIGNED = create_dynamic_map(signed=False)
 
 
-class OptimState8bit(Tensor):
-    implements = classmethod(_implements)
+class OptimState8bit(TorchAOBaseTensor):
     tensor_attrs = ["codes", "scale", "qmap"]
 
     @staticmethod
@@ -66,8 +65,6 @@ def __repr__(self):
             f"shape={tuple(self.shape)}, device={self.device}, requires_grad={self.requires_grad})"
         )
 
-    __torch_dispatch__ = classmethod(_dispatch__torch_dispatch__)
-
 
 @OptimState8bit.implements(aten.copy_.default)
 def _(func, types, args, kwargs):
diff --git a/torchao/prototype/low_bit_optim/subclass_fp8.py b/torchao/prototype/low_bit_optim/subclass_fp8.py
@@ -1,6 +1,6 @@
 import torch
 from torch import Tensor
-from torchao.dtypes.utils import _implements, _dispatch__torch_dispatch__
+from torchao.utils import TorchAOBaseTensor
 
 
 aten = torch.ops.aten
@@ -21,8 +21,7 @@ def quantize_fp8(input: Tensor, block_size: int):
 
 # NOTE: FP8 sign bit is redundant for unsigned optim state.
 # we may investigate how to use it to increase range/precision for unsigned optim state.
-class OptimStateFp8(Tensor):
-    implements = classmethod(_implements)
+class OptimStateFp8(TorchAOBaseTensor):
     tensor_attrs = ["codes", "scale"]
 
     @staticmethod
@@ -72,8 +71,6 @@ def __repr__(self):
             f"shape={tuple(self.shape)}, device={self.device}, requires_grad={self.requires_grad})"
         )
 
-    __torch_dispatch__ = classmethod(_dispatch__torch_dispatch__)
-
 
 @OptimStateFp8.implements(aten.copy_.default)
 def _(func, types, args, kwargs):
diff --git a/torchao/prototype/quantized_training/int8.py b/torchao/prototype/quantized_training/int8.py
@@ -4,15 +4,15 @@
 from torch import Tensor, nn
 from torch.utils._python_dispatch import return_and_correct_aliasing
 
-from torchao.dtypes.utils import _dispatch__torch_dispatch__, _dispatch__torch_function__, _implements
+from torchao.utils import TorchAOBaseTensor
 
 
 aten = torch.ops.aten
 c10d_functional = torch.ops.c10d_functional
 _c10d_functional = torch.ops._c10d_functional
 
 
-class Int8QTLinearWeight(Tensor):
+class Int8QTLinearWeight(TorchAOBaseTensor):
     """INT8 symmetric quantization weight, with absmax scaling [-127, 127]. The main difference
     of this tensor subclass from AffineQuantizedTensor:
     1. `F.linear` is differentiable i.e. backward is defined.
@@ -22,10 +22,6 @@ class Int8QTLinearWeight(Tensor):
         for more details.
     """
 
-    implements = classmethod(_implements)
-    __torch_function__ = classmethod(_dispatch__torch_function__)
-    __torch_dispatch__ = classmethod(_dispatch__torch_dispatch__)
-
     @staticmethod
     @torch._dynamo.disable
     def __new__(cls, int_data: Tensor, scale: Tensor):
diff --git a/torchao/quantization/linear_activation_quantized_tensor.py b/torchao/quantization/linear_activation_quantized_tensor.py
@@ -1,9 +1,4 @@
 import torch
-from torchao.dtypes.utils import (
-    _implements,
-    _dispatch__torch_function__,
-    _dispatch__torch_dispatch__,
-)
 from typing import Callable
 from torch.utils._python_dispatch import return_and_correct_aliasing
 from torchao.utils import (
@@ -94,10 +89,6 @@ def to(self, *args, **kwargs):
             self.input_quant_func,
         )
 
-    implements = classmethod(_implements)
-    __torch_function__ = classmethod(_dispatch__torch_function__)
-    __torch_dispatch__ = classmethod(_dispatch__torch_dispatch__)
-
 implements = LinearActivationQuantizedTensor.implements
 
 @implements(torch.nn.functional.linear)
diff --git a/torchao/quantization/prototype/qat/affine_fake_quantized_tensor.py b/torchao/quantization/prototype/qat/affine_fake_quantized_tensor.py
diff --git a/torchao/sparsity/prototype/superblock/blocksparse.py b/torchao/sparsity/prototype/superblock/blocksparse.py
diff --git a/torchao/utils.py b/torchao/utils.py
diff --git a/tutorials/developer_api_guide/my_dtype_tensor_subclass.py b/tutorials/developer_api_guide/my_dtype_tensor_subclass.py