Add max_hadamard_size parameter for Hadamard rotations.

ai-edge-bot · copybara-github · commit 9e13a3650e1d · 2026-02-11T18:22:18.000-08:00
PiperOrigin-RevId: 868388507
diff --git a/ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation.py b/ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation.py
@@ -54,12 +54,14 @@ def _make_hadamard_matrix(size: int) -> np.ndarray:
 def _rotate_with_diagonal_hadamard(
     tensor_content: np.ndarray,
     axis: int,
+    max_size: int = 0,
 ):
   """Quantizes the given float array using the diagonal Hadamard algorithm.
 
   Args:
     tensor_content: The float array to quantize.
     axis: The axis of the tensor to rotate.
+    max_size: The maximum size of the Hadamard matrix.
 
   Returns:
     A tuple containing the quantized array and the recovered array.
@@ -77,7 +79,9 @@ def _rotate_with_diagonal_hadamard(
   # Use the largest power of 2 that is a factor of the dimension and then
   # tile this Hadamard matrix along the diagonal. 2**30 is just a large power
   # of 2 to calculate this factor.
-  hadamard_size = np.gcd(tensor_content.shape[axis], 2 ** 30)
+  hadamard_size = np.gcd(tensor_content.shape[axis], 2**30)
+  if max_size > 0:
+    hadamard_size = min(hadamard_size, max_size)
   diagonal_size = tensor_content.shape[axis] // hadamard_size
   # Output size is the product of all dimensions except the one being rotated.
   output_size = np.prod(np.delete(tensor_content.shape, axis))
@@ -135,7 +139,9 @@ def get_tensor_quant_params(
 
   # Rotate the tensor with a Hadamard matrix.
   w_rotated, hadamard_size, random_vector = _rotate_with_diagonal_hadamard(
-      tensor_content, axis=reduce_axis
+      tensor_content,
+      axis=reduce_axis,
+      max_size=tensor_quant_config.max_hadamard_size,
   )
 
   # Get the quantized values of the rotated tensor.
diff --git a/ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py b/ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py
@@ -15,6 +15,7 @@
 
 """Test Hadamard rotation materialization."""
 
+import dataclasses
 import os
 
 from absl.testing import parameterized
@@ -227,6 +228,36 @@ def test_get_tensor_quant_params_basic(self):
     if qparams.hadamard is not None:
       self.assertEqual(qparams.hadamard.hadamard_size, 32)
 
+  def test_get_tensor_quant_params_max_size(self):
+    input_tensor = self._subgraph.tensors[self._fc_op.inputs[1]]
+    buffer = self._graph_info.buffers[self._fc_buffer_id]
+    np_buffer = np.frombuffer(buffer.data, dtype=np.float32).reshape(
+        input_tensor.shape
+    )
+    # The original dimension is 32. The largest power of 2 factor is 32.
+    # If we set max_hadamard_size to 16, then it should be 16.
+    new_op_quant_config = dataclasses.replace(
+        self._op_info.op_quant_config,
+        weight_tensor_config=qtyping.TensorQuantizationConfig(
+            num_bits=8,
+            symmetric=True,
+            granularity=qtyping.QuantGranularity.CHANNELWISE,
+            max_hadamard_size=16,
+        ),
+    )
+    self._op_info = dataclasses.replace(
+        self._op_info, op_quant_config=new_op_quant_config
+    )
+    qparams = hadamard_rotation.get_tensor_quant_params(
+        self._op_info,
+        self._op_info.op_quant_config.weight_tensor_config,
+        np_buffer,
+        self._tensor_name_to_qsv,
+    )
+    self.assertIsNotNone(qparams.hadamard)
+    if qparams.hadamard is not None:
+      self.assertEqual(qparams.hadamard.hadamard_size, 16)
+
   def test_get_tensor_quant_params_golden_1(self):
     test_data = np.ones((6, 6))
     # expected:
diff --git a/ai_edge_quantizer/algorithms/utils/common_utils.py b/ai_edge_quantizer/algorithms/utils/common_utils.py
@@ -94,21 +94,36 @@ def check_if_valid_op_config(
         f"No policy was specified for op: {op_name} with config:"
         f" {op_quant_config}."
     )
-  # The config_check_policy contains all possible valid configs, except for
-  # variations in the min_weight_elements field (it's set to 0 for all of them).
-  # min_weight_elements has to be ignored during policy check here because it
-  # can be any non-negative integer, which means we can't list all possible
-  # values in the policy.
-  elif (
-      dataclasses.replace(op_quant_config, min_weight_elements=0)
-      not in config_check_policy[op_name]
-  ):
-    error_msg = (
-        f"Quantization config for op: {op_name} with config:"
-        f" {op_quant_config} was not found in the policy."
-    )
   else:
-    check_passed = True
+    # min_weight_elements and max_hadamard_size have to be ignored during
+    # policy check here because they can be any non-negative integer, which
+    # means we can't list all possible values in the policy.
+    op_quant_config_to_check = dataclasses.replace(
+        op_quant_config, min_weight_elements=0
+    )
+    if op_quant_config_to_check.weight_tensor_config is not None:
+      op_quant_config_to_check = dataclasses.replace(
+          op_quant_config_to_check,
+          weight_tensor_config=dataclasses.replace(
+              op_quant_config_to_check.weight_tensor_config, max_hadamard_size=0
+          ),
+      )
+    if op_quant_config_to_check.activation_tensor_config is not None:
+      op_quant_config_to_check = dataclasses.replace(
+          op_quant_config_to_check,
+          activation_tensor_config=dataclasses.replace(
+              op_quant_config_to_check.activation_tensor_config,
+              max_hadamard_size=0,
+          ),
+      )
+
+    if op_quant_config_to_check not in config_check_policy[op_name]:
+      error_msg = (
+          f"Quantization config for op: {op_name} with config:"
+          f" {op_quant_config!r} was not found in the policy."
+      )
+    else:
+      check_passed = True
 
   if not check_passed:
     raise ValueError(
diff --git a/ai_edge_quantizer/algorithms/utils/common_utils_test.py b/ai_edge_quantizer/algorithms/utils/common_utils_test.py
@@ -224,6 +224,19 @@ def test_check_drq_config_with_non_default_min_weight_elements_succeeds(self):
         _TFLOpName.CONV_2D, op_quant_config, _DEFAULT_CONFIG_CHECK_POLICY
     )
 
+  def test_check_config_with_non_default_max_hadamard_size_succeeds(self):
+    op_quant_config = _OpQuantConfig(
+        weight_tensor_config=_TensorQuantConfig(
+            num_bits=8,
+            granularity=qtyping.QuantGranularity.CHANNELWISE,
+            max_hadamard_size=1024,
+        ),
+        compute_precision=_ComputePrecision.INTEGER,  # DRQ.
+    )
+    common_utils.check_if_valid_op_config(
+        _TFLOpName.FULLY_CONNECTED, op_quant_config, _DEFAULT_CONFIG_CHECK_POLICY
+    )
+
   @parameterized.product(
       op_name=(_TFLOpName.FULLY_CONNECTED, _TFLOpName.CONV_2D),
       act_num_bits=(8, 16),
diff --git a/ai_edge_quantizer/qtyping.py b/ai_edge_quantizer/qtyping.py
@@ -317,12 +317,15 @@ class TensorQuantizationConfig:
       quantization.
     dtype: The data type of the tensor.
     algorithm_key: The algorithm key to use for quantization.
+    max_hadamard_size: The maximum size of the Hadamard matrix to use for
+      Hadamard rotation.
   """
 
   num_bits: int
   symmetric: bool = True
   granularity: QuantGranularity = QuantGranularity.TENSORWISE
   dtype: TensorDataType = TensorDataType.INT
+  max_hadamard_size: int = 0
 
   def to_dict(self) -> dict[str, Any]:
     """Converts ActivationQuantizationConfig to dict."""
diff --git a/ai_edge_quantizer/recipe_manager_test.py b/ai_edge_quantizer/recipe_manager_test.py
@@ -581,12 +581,14 @@ def test_get_full_quantization_config(self):
                     'symmetric': False,
                     'granularity': _QuantGranularity.TENSORWISE,
                     'dtype': 'INT',
+                    'max_hadamard_size': 0,
                 },
                 'weight_tensor_config': {
                     'num_bits': 8,
                     'symmetric': True,
                     'granularity': _QuantGranularity.TENSORWISE,
                     'dtype': 'INT',
+                    'max_hadamard_size': 0,
                 },
                 # WEIGHT_ONLY.
                 'compute_precision': _ComputePrecision.INTEGER,
@@ -605,6 +607,7 @@ def test_get_full_quantization_config(self):
                     'num_bits': 8,
                     'symmetric': True,
                     'granularity': _QuantGranularity.TENSORWISE,
+                    'max_hadamard_size': 0,
                 },
                 # WEIGHT_ONLY.
                 'compute_precision': _ComputePrecision.FLOAT,
@@ -623,6 +626,7 @@ def test_get_full_quantization_config(self):
                     'num_bits': 4,
                     'symmetric': True,
                     'granularity': _QuantGranularity.TENSORWISE,
+                    'max_hadamard_size': 0,
                 },
                 # WEIGHT_ONLY.
                 'compute_precision': _ComputePrecision.FLOAT,
@@ -641,6 +645,7 @@ def test_get_full_quantization_config(self):
                     'num_bits': 6,
                     'symmetric': True,
                     'granularity': _QuantGranularity.TENSORWISE,
+                    'max_hadamard_size': 0,
                 },
                 # WEIGHT_ONLY.
                 'compute_precision': _ComputePrecision.FLOAT,
@@ -659,6 +664,7 @@ def test_get_full_quantization_config(self):
                     'num_bits': 3,
                     'symmetric': True,
                     'granularity': _QuantGranularity.TENSORWISE,
+                    'max_hadamard_size': 0,
                 },
                 # WEIGHT_ONLY.
                 'compute_precision': _ComputePrecision.FLOAT,
@@ -924,6 +930,26 @@ def test_need_calibration_true(self):
     )
     self.assertTrue(self._recipe_manager.need_calibration())
 
+  def test_get_hadamard_with_max_size(self):
+    self._recipe_manager.add_quantization_config(
+        regex='.*/Dense/.*',
+        operation_name=_TFLOpName.FULLY_CONNECTED,
+        algorithm_key=_AlgorithmName.HADAMARD_ROTATION,
+        op_config=qtyping.OpQuantizationConfig(
+            weight_tensor_config=_TensorQuantConfig(
+                num_bits=8, max_hadamard_size=1024
+            ),
+            compute_precision=_ComputePrecision.INTEGER,
+        ),
+    )
+    alg_key, op_config = self._recipe_manager.get_quantization_configs(
+        _TFLOpName.FULLY_CONNECTED, 'model/Dense/op'
+    )
+    self.assertEqual(alg_key, _AlgorithmName.HADAMARD_ROTATION)
+    weight_tensor_config = op_config.weight_tensor_config
+    assert weight_tensor_config is not None
+    self.assertEqual(weight_tensor_config.max_hadamard_size, 1024)
+
 
 if __name__ == '__main__':
   googletest.main()
diff --git a/ai_edge_quantizer/recipes/default_a16w8_recipe.json b/ai_edge_quantizer/recipes/default_a16w8_recipe.json
@@ -8,13 +8,15 @@
         "num_bits": 16,
         "symmetric": true,
         "granularity": "TENSORWISE",
-        "dtype": "INT"
+        "dtype": "INT",
+        "max_hadamard_size": 0
       },
       "weight_tensor_config": {
         "num_bits": 8,
         "symmetric": true,
         "granularity": "CHANNELWISE",
-        "dtype": "INT"
+        "dtype": "INT",
+        "max_hadamard_size": 0
       },
       "compute_precision": "INTEGER",
       "explicit_dequantize": false,
diff --git a/ai_edge_quantizer/recipes/default_a8w8_recipe.json b/ai_edge_quantizer/recipes/default_a8w8_recipe.json
@@ -8,13 +8,15 @@
         "num_bits": 8,
         "symmetric": false,
         "granularity": "TENSORWISE",
-        "dtype": "INT"
+        "dtype": "INT",
+        "max_hadamard_size": 0
       },
       "weight_tensor_config": {
         "num_bits": 8,
         "symmetric": true,
         "granularity": "CHANNELWISE",
-        "dtype": "INT"
+        "dtype": "INT",
+        "max_hadamard_size": 0
       },
       "compute_precision": "INTEGER",
       "explicit_dequantize": false,
diff --git a/ai_edge_quantizer/recipes/default_af32w4float_recipe.json b/ai_edge_quantizer/recipes/default_af32w4float_recipe.json
@@ -8,7 +8,8 @@
         "num_bits": 4,
         "symmetric": false,
         "granularity": "CHANNELWISE",
-        "dtype": "INT"
+        "dtype": "INT",
+        "max_hadamard_size": 0
       },
       "compute_precision": "FLOAT",
       "explicit_dequantize": true,
diff --git a/ai_edge_quantizer/recipes/default_af32w8float_recipe.json b/ai_edge_quantizer/recipes/default_af32w8float_recipe.json
@@ -8,7 +8,8 @@
         "num_bits": 8,
         "symmetric": false,
         "granularity": "CHANNELWISE",
-        "dtype": "INT"
+        "dtype": "INT",
+        "max_hadamard_size": 0
       },
       "compute_precision": "FLOAT",
       "explicit_dequantize": true,
diff --git a/ai_edge_quantizer/recipes/dynamic_legacy_wi8_afp32_recipe.json b/ai_edge_quantizer/recipes/dynamic_legacy_wi8_afp32_recipe.json
@@ -9,7 +9,8 @@
         "symmetric": true,
         "granularity": "CHANNELWISE",
         "dtype": "INT",
-        "block_size": 0
+        "block_size": 0,
+        "max_hadamard_size": 0
       },
       "compute_precision": "INTEGER",
       "explicit_dequantize": false,
diff --git a/ai_edge_quantizer/recipes/dynamic_wi4_afp32_hadamard_recipe.json b/ai_edge_quantizer/recipes/dynamic_wi4_afp32_hadamard_recipe.json
@@ -8,7 +8,8 @@
         "num_bits": 8,
         "symmetric": true,
         "granularity": "CHANNELWISE",
-        "dtype": "INT"
+        "dtype": "INT",
+        "max_hadamard_size": 0
       },
       "compute_precision": "INTEGER",
       "explicit_dequantize": false,
diff --git a/ai_edge_quantizer/recipes/dynamic_wi8_afp32_hadamard_recipe.json b/ai_edge_quantizer/recipes/dynamic_wi8_afp32_hadamard_recipe.json
@@ -8,7 +8,8 @@
         "num_bits": 8,
         "symmetric": true,
         "granularity": "CHANNELWISE",
-        "dtype": "INT"
+        "dtype": "INT",
+        "max_hadamard_size": 0
       },
       "compute_precision": "INTEGER",
       "explicit_dequantize": false,
diff --git a/ai_edge_quantizer/recipes/dynamic_wi8_afp32_recipe.json b/ai_edge_quantizer/recipes/dynamic_wi8_afp32_recipe.json
@@ -8,7 +8,8 @@
         "num_bits": 8,
         "symmetric": true,
         "granularity": "CHANNELWISE",
-        "dtype": "INT"
+        "dtype": "INT",
+        "max_hadamard_size": 0
       },
       "compute_precision": "INTEGER",
       "explicit_dequantize": false,
diff --git a/ai_edge_quantizer/recipes/sample_advanced_usage_recipe.json b/ai_edge_quantizer/recipes/sample_advanced_usage_recipe.json
@@ -7,14 +7,16 @@
       "activation_tensor_config": {
         "num_bits": 8,
         "symmetric": false,
-        "channel_wise": false,
-        "dtype": "INT"
+        "granularity": "TENSORWISE",
+        "dtype": "INT",
+        "max_hadamard_size": 0
       },
       "weight_tensor_config": {
         "num_bits": 8,
         "symmetric": true,
-        "channel_wise": true,
-        "dtype": "INT"
+        "granularity": "CHANNELWISE",
+        "dtype": "INT",
+        "max_hadamard_size": 0
       },
       "compute_precision": "INTEGER",
       "explicit_dequantize": false,
@@ -30,7 +32,7 @@
       "weight_tensor_config": {
         "num_bits": 4,
         "symmetric": true,
-        "channel_wise": true,
+        "granularity": "CHANNELWISE",
         "dtype": "INT"
       },
       "compute_precision": "FLOAT",