pytorch
diff --git a/‎benchmarks/benchmark_fp6.py‎
Lines changed: 0 additions & 85 deletions b/‎benchmarks/benchmark_fp6.py‎
Lines changed: 0 additions & 85 deletions
diff --git a/‎benchmarks/microbenchmarks/utils.py‎
Lines changed: 1 addition & 4 deletions b/‎benchmarks/microbenchmarks/utils.py‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎docs/source/api_ref_dtypes.rst‎
Lines changed: 0 additions & 2 deletions b/‎docs/source/api_ref_dtypes.rst‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎test/core/test_config.py‎
Lines changed: 0 additions & 2 deletions b/‎test/core/test_config.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎test/dtypes/test_floatx.py‎
Lines changed: 0 additions & 135 deletions b/‎test/dtypes/test_floatx.py‎
Lines changed: 0 additions & 135 deletions
diff --git a/‎test/quantization/test_quant_api.py‎
Lines changed: 0 additions & 4 deletions b/‎test/quantization/test_quant_api.py‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎test/test_ops.py‎
Lines changed: 0 additions & 67 deletions b/‎test/test_ops.py‎
Lines changed: 0 additions & 67 deletions
@@ -17,7 +17,6 @@
     Float8DynamicActivationFloat8SemiSparseWeightConfig,
     Float8DynamicActivationFloat8WeightConfig,
     Float8WeightOnlyConfig,
-    FPXWeightOnlyConfig,
     GemliteUIntXWeightOnlyConfig,
     Int4WeightOnlyConfig,
     Int8DynamicActivationInt4WeightConfig,
@@ -230,9 +229,7 @@ def string_to_config(
             from torchao.dtypes import MarlinSparseLayout
 
             return Int4WeightOnlyConfig(layout=MarlinSparseLayout(), version=1)
-    if "fp6" in quantization:
-        return FPXWeightOnlyConfig(3, 2)
-    elif "uintx" in quantization:
+    if "uintx" in quantization:
         # uintx-nbits-group_size, e.g. "uintx-2-64"
         if "hqq" in quantization:
             # uintx-nbits-group_size-hqq
 
@@ -32,7 +32,6 @@ Quantization techniques
 
     to_affine_quantized_intx
     to_affine_quantized_intx_static
-    to_affine_quantized_fpx
     to_affine_quantized_floatx
     to_affine_quantized_floatx_static
     to_marlinqqq_quantized_intx
@@ -51,7 +50,6 @@ Prototype
     Int8DynamicActInt4WeightCPULayout
     MarlinQQQTensor
     MarlinQQQLayout
-    FloatxTensorCoreLayout
     UintxLayout
 
 ..
 
@@ -33,7 +33,6 @@
     Float8DynamicActivationFloat8WeightConfig,
     Float8DynamicActivationInt4WeightConfig,
     Float8WeightOnlyConfig,
-    FPXWeightOnlyConfig,
     GemliteUIntXWeightOnlyConfig,
     Int4DynamicActivationInt4WeightConfig,
     Int4WeightOnlyConfig,
@@ -87,7 +86,6 @@
         group_size=128,  # Optional, has default of 64
         bit_width=8,  # Optional, has default of 4
     ),
-    FPXWeightOnlyConfig(ebits=4, mbits=8),
     # Sparsity configs
     SemiSparseWeightConfig(),
     BlockSparseWeightConfig(blocksize=128),
 
@@ -48,7 +48,6 @@
     Float8DynamicActivationFloat8WeightConfig,
     Float8StaticActivationFloat8WeightConfig,
     Float8WeightOnlyConfig,
-    FPXWeightOnlyConfig,
     FqnToConfig,
     GemliteUIntXWeightOnlyConfig,
     Int4DynamicActivationInt4WeightConfig,
@@ -562,7 +561,6 @@ def test_int4wo_cpu(self, dtype, x_dim, use_hqq):
             Int8DynamicActivationInt8WeightConfig(),
             Int8DynamicActivationInt4WeightConfig(),
             Int8WeightOnlyConfig(),
-            FPXWeightOnlyConfig(ebits=4, mbits=3),
             GemliteUIntXWeightOnlyConfig(),
             UIntXWeightOnlyConfig(dtype=torch.uint4),
         ],
@@ -809,7 +807,6 @@ def test_config_deprecation(self):
         """
         from torchao.quantization import (
             Float8StaticActivationFloat8WeightConfig,
-            FPXWeightOnlyConfig,
             GemliteUIntXWeightOnlyConfig,
             Int4DynamicActivationInt4WeightConfig,
             Int8DynamicActivationInt4WeightConfig,
@@ -822,7 +819,6 @@ def test_config_deprecation(self):
         # Map from deprecated API to the args needed to instantiate it
         deprecated_apis_to_args = {
             Float8StaticActivationFloat8WeightConfig: (torch.randn(3),),
-            FPXWeightOnlyConfig: (3, 2),
             GemliteUIntXWeightOnlyConfig: (),
             Int4DynamicActivationInt4WeightConfig: (),
             Int8DynamicActivationInt4WeightConfig: (),
 
@@ -18,7 +18,6 @@
 from torch.testing._internal.optests import opcheck
 
 import torchao
-from torchao.dtypes.floatx import from_scaled_tc_floatx
 from torchao.quantization.marlin_qqq import (
     marlin_qqq_workspace,
     pack_to_marlin_qqq,
@@ -56,72 +55,6 @@
 
 
 class TestOps(TestCase):
-    def _create_floatx_inputs(
-        self, ebits: int, mbits: int, BS: int, OC: int, IC: int, device, dtype
-    ):
-        # Randomly initialize each byte
-        nbits = 1 + ebits + mbits
-        floatx_weight = torch.randint(256, (OC, IC // 8 * nbits), dtype=torch.uint8)
-        scale = torch.rand(OC).to(dtype) + 0.5
-        fp16_act = torch.rand(BS, IC).to(dtype) + 0.5
-        return floatx_weight.to(device), scale.to(device), fp16_act.to(device)
-
-    @pytest.mark.skipif(not IS_CUDA, reason="CUDA not available")
-    @parametrize("ebits,mbits", [(3, 2), (2, 2)])
-    @parametrize("dtype", [torch.half, torch.bfloat16])
-    def test_quant_llm_linear(self, ebits, mbits, dtype):
-        BS = 2
-        OC = 256
-        IC = 256
-        splitK = 1
-        floatx_weight, scale, fp16_act = self._create_floatx_inputs(
-            ebits, mbits, BS, OC, IC, "cuda", dtype
-        )
-
-        # smoke test
-        torchao.ops.quant_llm_linear(
-            ebits, mbits, fp16_act, floatx_weight, scale, splitK
-        )
-
-        # comprehensive testing
-        test_utils = [
-            "test_schema",
-            "test_autograd_registration",
-            "test_faketensor",
-            "test_aot_dispatch_dynamic",
-        ]
-        opcheck(
-            torch.ops.torchao.quant_llm_linear,
-            (ebits, mbits, fp16_act, floatx_weight, scale, splitK),
-            test_utils=test_utils,
-        )
-
-    @pytest.mark.skipif(not IS_CUDA, reason="CUDA not available")
-    @parametrize("BS,OC,IC,splitK", [(1, 2048, 4096, 5), (2, 8192, 8192, 6)])
-    @parametrize("ebits,mbits", [(3, 2), (2, 2)])
-    @parametrize("dtype", [torch.half, torch.bfloat16])
-    def test_quant_llm_linear_correctness(
-        self, ebits, mbits, BS, OC, IC, splitK, dtype
-    ):
-        # adapted from https://github.com/usyd-fsalab/fp6_llm/blob/5df6737cca32f604e957e3f63f03ccc2e4d1df0d/tests/python/kernel_test_fpx.py
-        floatx_weight, scale, fp16_act = self._create_floatx_inputs(
-            ebits, mbits, BS, OC, IC, "cuda", dtype
-        )
-
-        results_floatx = torchao.ops.quant_llm_linear(
-            ebits, mbits, fp16_act, floatx_weight, scale, splitK
-        )
-
-        fp16_weight = from_scaled_tc_floatx(floatx_weight, ebits, mbits, scale).to(
-            dtype
-        )
-        results_fp16 = fp16_act @ fp16_weight.T
-
-        error = (results_floatx - results_fp16).abs().mean()
-        gt = results_fp16.abs().mean()
-        relative_error = error / gt
-        rtol = 1e-2 if dtype == torch.bfloat16 else 1e-3
-        assert relative_error < rtol
 
     def _scaled_dot_product_int8_op_ref(
         self,