Update on "Move QAT out of prototype"

andrewor14 · andrewor14 · commit b54f235fb311 · 2024-10-17T13:27:01.000-07:00
**Summary:** Move QAT out of prototype so we can provide stronger BC guarantees moving forward. **BC-breaking notes** Before: ``` from torchao.quantization.prototype.qat import ( disable_4w_fake_quant, disable_8da4w_fake_quant, enable_4w_fake_quant, enable_8da4w_fake_quant, ComposableQATQuantizer, Int4WeightOnlyQATQuantizer, Int4WeightOnlyEmbeddingQATQuantizer Int8DynActInt4WeightQATQuantizer, Int8DynActInt4WeightQATLinear, ) from torchao.quantization.prototype.qat.api import ( FakeQuantizeConfig, ) from torchao.quantization.prototype.qat.fake_quantizer import ( FakeQuantizer, ) ``` After: ``` from torchao.quantization.qat import ( ComposableQATQuantizer, Int4WeightOnlyQATQuantizer, Int4WeightOnlyEmbeddingQATQuantizer Int8DynActInt4WeightQATQuantizer, ) from torchao.quantization.qat.linear import ( disable_4w_fake_quant, disable_8da4w_fake_quant, enable_4w_fake_quant, enable_8da4w_fake_quant, Int8DynActInt4WeightQATLinear, ) from torchao.quantization.qat.api import ( FakeQuantizeConfig, ) from torchao.quantization.qat.fake_quantizer import ( FakeQuantizer, ) ``` **Test Plan:** python test/quantization/test_qat.py Differential Revision: [D64555609](https://our.internmc.facebook.com/intern/diff/D64555609) [ghstack-poisoned]
diff --git a/test/quantization/test_qat.py b/test/quantization/test_qat.py
@@ -937,6 +937,59 @@ def embedding_forward_4w(x: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
         baseline_out = embedding_forward_4w(x2, fq_embedding.weight)
         torch.testing.assert_close(baseline_out, fq_out, atol=0, rtol=0)
 
+    @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_4, "skipping when torch version is 2.4 or lower")
+    def test_qat_prototype_bc(self):
+        """
+        Just to make sure we can import all the old prototype paths.
+        We will remove this test in the near future when we actually break BC.
+        """
+        from torchao.quantization.prototype.qat import (
+            disable_4w_fake_quant,
+            disable_8da4w_fake_quant,
+            enable_4w_fake_quant,
+            enable_8da4w_fake_quant,
+            ComposableQATQuantizer,
+            Int8DynActInt4WeightQATLinear,
+            Int4WeightOnlyEmbeddingQATQuantizer,
+            Int4WeightOnlyQATQuantizer,
+            Int8DynActInt4WeightQATQuantizer,
+        )
+        from torchao.quantization.prototype.qat._module_swap_api import (
+            disable_4w_fake_quant_module_swap,
+            enable_4w_fake_quant_module_swap,
+            disable_8da4w_fake_quant_module_swap,
+            enable_8da4w_fake_quant_module_swap,
+            Int4WeightOnlyQATQuantizerModuleSwap,
+            Int8DynActInt4WeightQATQuantizerModuleSwap,
+        )
+        from torchao.quantization.prototype.qat.affine_fake_quantized_tensor import (
+            AffineFakeQuantizedTensor,
+            to_affine_fake_quantized,
+        )
+        from torchao.quantization.prototype.qat.api import (
+            ComposableQATQuantizer,
+            FakeQuantizeConfig,
+        )
+        from torchao.quantization.prototype.qat.embedding import (
+            FakeQuantizedEmbedding,
+            Int4WeightOnlyEmbeddingQATQuantizer,
+            Int4WeightOnlyEmbedding,
+            Int4WeightOnlyQATEmbedding,
+        )
+        from torchao.quantization.prototype.qat.fake_quantizer import (
+            FakeQuantizer,
+        )
+        from torchao.quantization.prototype.qat.linear import (
+            disable_4w_fake_quant,
+            disable_8da4w_fake_quant,
+            enable_4w_fake_quant,
+            enable_8da4w_fake_quant,
+            FakeQuantizedLinear,
+            Int4WeightOnlyQATLinear,
+            Int4WeightOnlyQATQuantizer,
+            Int8DynActInt4WeightQATLinear,
+            Int8DynActInt4WeightQATQuantizer,
+        )
 
 if __name__ == "__main__":
-    unittest.main()
+    unittest.main()
diff --git a/torchao/quantization/prototype/qat/README.md b/torchao/quantization/prototype/qat/README.md
@@ -0,0 +1,3 @@
+Note: QAT has been moved to torchao/quantization/qat.
+This is a legacy folder only for backward compatibility
+and will be removed in the near future.
diff --git a/torchao/quantization/prototype/qat/__init__.py b/torchao/quantization/prototype/qat/__init__.py
@@ -0,0 +1,25 @@
+from torchao.quantization.qat import (
+    ComposableQATQuantizer,
+    Int4WeightOnlyEmbeddingQATQuantizer,
+    Int4WeightOnlyQATQuantizer,
+    Int8DynActInt4WeightQATQuantizer,
+)
+from torchao.quantization.qat.linear import (
+    disable_4w_fake_quant,
+    disable_8da4w_fake_quant,
+    enable_4w_fake_quant,
+    enable_8da4w_fake_quant,
+    Int8DynActInt4WeightQATLinear,
+)
+
+__all__ = [
+    "disable_4w_fake_quant",
+    "disable_8da4w_fake_quant",
+    "enable_4w_fake_quant",
+    "enable_8da4w_fake_quant",
+    "ComposableQATQuantizer",
+    "Int4WeightOnlyQATQuantizer",
+    "Int4WeightOnlyEmbeddingQATQuantizer"
+    "Int8DynActInt4WeightQATQuantizer",
+    "Int8DynActInt4WeightQATLinear",
+]
diff --git a/torchao/quantization/prototype/qat/_module_swap_api.py b/torchao/quantization/prototype/qat/_module_swap_api.py
@@ -0,0 +1,11 @@
+# For backward compatibility only
+# These will be removed in the future
+
+from torchao.quantization.qat.linear import (
+    Int8DynActInt4WeightQATQuantizer as Int8DynActInt4WeightQATQuantizerModuleSwap,
+    Int4WeightOnlyQATQuantizer as Int4WeightOnlyQATQuantizerModuleSwap,
+    enable_8da4w_fake_quant as enable_8da4w_fake_quant_module_swap,
+    disable_8da4w_fake_quant as disable_8da4w_fake_quant_module_swap,
+    enable_4w_fake_quant as enable_4w_fake_quant_module_swap,
+    disable_4w_fake_quant as disable_4w_fake_quant_module_swap,
+)
diff --git a/torchao/quantization/prototype/qat/affine_fake_quantized_tensor.py b/torchao/quantization/prototype/qat/affine_fake_quantized_tensor.py
@@ -0,0 +1,4 @@
+from torchao.quantization.qat.affine_fake_quantized_tensor import (
+    AffineFakeQuantizedTensor,
+    to_affine_fake_quantized,
+)
diff --git a/torchao/quantization/prototype/qat/api.py b/torchao/quantization/prototype/qat/api.py
@@ -0,0 +1,4 @@
+from torchao.quantization.qat.api import (
+    ComposableQATQuantizer,
+    FakeQuantizeConfig,
+)
diff --git a/torchao/quantization/prototype/qat/embedding.py b/torchao/quantization/prototype/qat/embedding.py
@@ -0,0 +1,6 @@
+from torchao.quantization.qat.embedding import (
+    FakeQuantizedEmbedding,
+    Int4WeightOnlyEmbeddingQATQuantizer,
+    Int4WeightOnlyEmbedding,
+    Int4WeightOnlyQATEmbedding,
+)
diff --git a/torchao/quantization/prototype/qat/fake_quantizer.py b/torchao/quantization/prototype/qat/fake_quantizer.py
@@ -0,0 +1,3 @@
+from torchao.quantization.qat.fake_quantizer import (
+    FakeQuantizer,
+)
diff --git a/torchao/quantization/prototype/qat/images/qat_diagram.png b/torchao/quantization/prototype/qat/images/qat_diagram.png
diff --git a/torchao/quantization/prototype/qat/linear.py b/torchao/quantization/prototype/qat/linear.py
@@ -0,0 +1,11 @@
+from torchao.quantization.qat.linear import (
+    disable_4w_fake_quant,
+    disable_8da4w_fake_quant,
+    enable_4w_fake_quant,
+    enable_8da4w_fake_quant,
+    FakeQuantizedLinear,
+    Int4WeightOnlyQATLinear,
+    Int4WeightOnlyQATQuantizer,
+    Int8DynActInt4WeightQATLinear,
+    Int8DynActInt4WeightQATQuantizer,
+)

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+Note: QAT has been moved to torchao/quantization/qat.`
	`2`	`+This is a legacy folder only for backward compatibility`
	`3`	`+and will be removed in the near future.`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from torchao.quantization.qat.fake_quantizer import (`
	`2`	`+ FakeQuantizer,`
	`3`	`+)`