Update on "Autoquant"

HDCharles · HDCharles · commit 4aae2a36502a · 2024-03-19T16:41:23.000-07:00
Summary: Adding autoquantization functionality, using hte do_quant api we can test kernel speeds and pick the best quantization type (or no quantization) for each layer. Test Plan: python test/test.py -k "autoquant" also tested on SAM and SDXL pytorch-labs/segment-anything-fast#114 HDCharles/sdxl-fast@8d9942a Reviewers: Subscribers: Tasks: Tags: Differential Revision: [D55103983](https://our.internmc.facebook.com/intern/diff/D55103983) [ghstack-poisoned]
diff --git a/test/test.py b/test/test.py
@@ -894,12 +894,6 @@ def test_aq_int8_dynamic_quant_subclass(self):
                 AQInt8DynamicallyQuantizedLinearWeight.from_float, 35, test_dtype
             )
 
-    def test_aq_int8_weight_only_quant_subclass(self):
-        for test_dtype in [torch.float32, torch.float16, torch.bfloat16]:
-            self._test_lin_weight_subclass_impl(
-                AQInt8DynamicallyQuantizedLinearWeight.from_float, 35, test_dtype
-            )
-
     def test_aq_int8_weight_only_quant_subclass(self):
         for test_dtype in [torch.float32, torch.float16, torch.bfloat16]:
             self._test_lin_weight_subclass_impl(