google-ai-edge
diff --git a/‎ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation.py‎
Lines changed: 8 additions & 2 deletions b/‎ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation.py‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py‎
Lines changed: 31 additions & 0 deletions b/‎ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎ai_edge_quantizer/algorithms/utils/common_utils.py‎
Lines changed: 29 additions & 14 deletions b/‎ai_edge_quantizer/algorithms/utils/common_utils.py‎
Lines changed: 29 additions & 14 deletions
diff --git a/‎ai_edge_quantizer/algorithms/utils/common_utils_test.py‎
Lines changed: 15 additions & 0 deletions b/‎ai_edge_quantizer/algorithms/utils/common_utils_test.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎ai_edge_quantizer/qtyping.py‎
Lines changed: 25 additions & 7 deletions b/‎ai_edge_quantizer/qtyping.py‎
Lines changed: 25 additions & 7 deletions
diff --git a/‎ai_edge_quantizer/recipe_manager.py‎
Lines changed: 20 additions & 10 deletions b/‎ai_edge_quantizer/recipe_manager.py‎
Lines changed: 20 additions & 10 deletions
diff --git a/‎ai_edge_quantizer/recipe_manager_test.py‎
Lines changed: 33 additions & 11 deletions b/‎ai_edge_quantizer/recipe_manager_test.py‎
Lines changed: 33 additions & 11 deletions
diff --git a/‎ai_edge_quantizer/recipes/default_a16w8_recipe.json‎
Lines changed: 8 additions & 2 deletions b/‎ai_edge_quantizer/recipes/default_a16w8_recipe.json‎
Lines changed: 8 additions & 2 deletions
@@ -54,12 +54,14 @@ def _make_hadamard_matrix(size: int) -> np.ndarray:
 def _rotate_with_diagonal_hadamard(
     tensor_content: np.ndarray,
     axis: int,
+    max_size: int = 0,
 ):
   """Quantizes the given float array using the diagonal Hadamard algorithm.
 
   Args:
     tensor_content: The float array to quantize.
     axis: The axis of the tensor to rotate.
+    max_size: The maximum size of the Hadamard matrix.
 
   Returns:
     A tuple containing the quantized array and the recovered array.
@@ -77,7 +79,9 @@ def _rotate_with_diagonal_hadamard(
   # Use the largest power of 2 that is a factor of the dimension and then
   # tile this Hadamard matrix along the diagonal. 2**30 is just a large power
   # of 2 to calculate this factor.
-  hadamard_size = np.gcd(tensor_content.shape[axis], 2 ** 30)
+  hadamard_size = np.gcd(tensor_content.shape[axis], 2**30)
+  if max_size > 0:
+    hadamard_size = min(hadamard_size, max_size)
   diagonal_size = tensor_content.shape[axis] // hadamard_size
   # Output size is the product of all dimensions except the one being rotated.
   output_size = np.prod(np.delete(tensor_content.shape, axis))
@@ -135,7 +139,9 @@ def get_tensor_quant_params(
 
   # Rotate the tensor with a Hadamard matrix.
   w_rotated, hadamard_size, random_vector = _rotate_with_diagonal_hadamard(
-      tensor_content, axis=reduce_axis
+      tensor_content,
+      axis=reduce_axis,
+      max_size=tensor_quant_config.algorithm_params.get("max_hadamard_size", 0),
   )
 
   # Get the quantized values of the rotated tensor.
 
@@ -15,6 +15,7 @@
 
 """Test Hadamard rotation materialization."""
 
+import dataclasses
 import pathlib
 
 from absl.testing import absltest
@@ -227,6 +228,36 @@ def test_get_tensor_quant_params_basic(self):
     if qparams.hadamard is not None:
       self.assertEqual(qparams.hadamard.hadamard_size, 32)
 
+  def test_get_tensor_quant_params_max_size(self):
+    input_tensor = self._subgraph.tensors[self._fc_op.inputs[1]]
+    buffer = self._graph_info.buffers[self._fc_buffer_id]
+    np_buffer = np.frombuffer(buffer.data, dtype=np.float32).reshape(
+        input_tensor.shape
+    )
+    # The original dimension is 32. The largest power of 2 factor is 32.
+    # If we set algorithm_params to {'max_hadamard_size': 16}, then it should be 16.
+    new_op_quant_config = dataclasses.replace(
+        self._op_info.op_quant_config,
+        weight_tensor_config=qtyping.TensorQuantizationConfig(
+            num_bits=8,
+            symmetric=True,
+            granularity=qtyping.QuantGranularity.CHANNELWISE,
+            algorithm_params={"max_hadamard_size": 16},
+        ),
+    )
+    self._op_info = dataclasses.replace(
+        self._op_info, op_quant_config=new_op_quant_config
+    )
+    qparams = hadamard_rotation.get_tensor_quant_params(
+        self._op_info,
+        self._op_info.op_quant_config.weight_tensor_config,
+        np_buffer,
+        self._tensor_name_to_qsv,
+    )
+    self.assertIsNotNone(qparams.hadamard)
+    if qparams.hadamard is not None:
+      self.assertEqual(qparams.hadamard.hadamard_size, 16)
+
   def test_get_tensor_quant_params_golden_1(self):
     test_data = np.ones((6, 6))
     # expected:
 
@@ -94,21 +94,36 @@ def check_if_valid_op_config(
         f"No policy was specified for op: {op_name} with config:"
         f" {op_quant_config}."
     )
-  # The config_check_policy contains all possible valid configs, except for
-  # variations in the min_weight_elements field (it's set to 0 for all of them).
-  # min_weight_elements has to be ignored during policy check here because it
-  # can be any non-negative integer, which means we can't list all possible
-  # values in the policy.
-  elif (
-      dataclasses.replace(op_quant_config, min_weight_elements=0)
-      not in config_check_policy[op_name]
-  ):
-    error_msg = (
-        f"Quantization config for op: {op_name} with config:"
-        f" {op_quant_config} was not found in the policy."
-    )
   else:
-    check_passed = True
+    # min_weight_elements and algorithm_params have to be ignored during
+    # policy check here because they can be any non-negative integer or dict,
+    # which means we can't list all possible values in the policy.
+    op_quant_config_to_check = dataclasses.replace(
+        op_quant_config, min_weight_elements=0
+    )
+    if op_quant_config_to_check.weight_tensor_config is not None:
+      op_quant_config_to_check = dataclasses.replace(
+          op_quant_config_to_check,
+          weight_tensor_config=dataclasses.replace(
+              op_quant_config_to_check.weight_tensor_config, algorithm_params={}
+          ),
+      )
+    if op_quant_config_to_check.activation_tensor_config is not None:
+      op_quant_config_to_check = dataclasses.replace(
+          op_quant_config_to_check,
+          activation_tensor_config=dataclasses.replace(
+              op_quant_config_to_check.activation_tensor_config,
+              algorithm_params={},
+          ),
+      )
+
+    if op_quant_config_to_check not in config_check_policy[op_name]:
+      error_msg = (
+          f"Quantization config for op: {op_name} with config:"
+          f" {op_quant_config!r} was not found in the policy."
+      )
+    else:
+      check_passed = True
 
   if not check_passed:
     raise ValueError(
 
@@ -224,6 +224,21 @@ def test_check_drq_config_with_non_default_min_weight_elements_succeeds(self):
         _TFLOpName.CONV_2D, op_quant_config, _DEFAULT_CONFIG_CHECK_POLICY
     )
 
+  def test_check_config_with_non_default_algorithm_params_succeeds(self):
+    op_quant_config = _OpQuantConfig(
+        weight_tensor_config=_TensorQuantConfig(
+            num_bits=8,
+            granularity=qtyping.QuantGranularity.CHANNELWISE,
+            algorithm_params={"max_hadamard_size": 1024},
+        ),
+        compute_precision=_ComputePrecision.INTEGER,  # DRQ.
+    )
+    common_utils.check_if_valid_op_config(
+        _TFLOpName.FULLY_CONNECTED,
+        op_quant_config,
+        _DEFAULT_CONFIG_CHECK_POLICY,
+    )
+
   @parameterized.product(
       op_name=(_TFLOpName.FULLY_CONNECTED, _TFLOpName.CONV_2D),
       act_num_bits=(8, 16),
 
@@ -20,10 +20,9 @@
 import copy
 import dataclasses
 import enum
-from typing import Any, Callable, Optional, Union
-
+from typing import Any, Callable, Mapping, Optional, Union, TypeAlias
+from immutabledict import immutabledict
 import numpy as np
-from typing_extensions import TypeAlias
 
 
 QSV: TypeAlias = MutableMapping[str, Any]
@@ -317,22 +316,32 @@ class TensorQuantizationConfig:
       quantization.
     dtype: The data type of the tensor.
     algorithm_key: The algorithm key to use for quantization.
+    algorithm_params: Additional parameters for the quantization algorithm.
   """
 
   num_bits: int
   symmetric: bool = True
   granularity: QuantGranularity = QuantGranularity.TENSORWISE
   dtype: TensorDataType = TensorDataType.INT
+  algorithm_params: Mapping[str, Any] = dataclasses.field(
+      default_factory=immutabledict
+  )
+
+  def __post_init__(self):
+    if not isinstance(self.algorithm_params, immutabledict):
+      object.__setattr__(
+          self, 'algorithm_params', immutabledict(self.algorithm_params)
+      )
 
   def to_dict(self) -> dict[str, Any]:
     """Converts ActivationQuantizationConfig to dict."""
     return dataclasses.asdict(
         self,
         dict_factory=lambda x: {  # pylint: disable=g-long-lambda
-            k: v
+            k: (dict(v) if isinstance(v, Mapping) and not isinstance(v, dict) else v)
             for (k, v) in x
             # Skip None and empty dict values.
-            if v is not None and not (isinstance(v, dict) and not v)
+            if v is not None and not (isinstance(v, (dict, Mapping)) and not v)
         },
     )
 
@@ -342,6 +351,15 @@ def from_dict(cls, params: dict[str, Any]) -> 'TensorQuantizationConfig':
     params_copy = copy.deepcopy(params)
     # Process block_size config from legacy recipe.
     params_copy = _process_block_size(params_copy)
+
+    # Move any unknown fields to algorithm_params for backward compatibility.
+    known_fields = {f.name for f in dataclasses.fields(cls)}
+    algorithm_params = params_copy.pop('algorithm_params', {})
+    for key in list(params_copy.keys()):
+      if key not in known_fields:
+        algorithm_params[key] = params_copy.pop(key)
+    params_copy['algorithm_params'] = algorithm_params
+
     return cls(**params_copy)
 
 
@@ -424,10 +442,10 @@ def to_dict(self) -> dict[str, Any]:
     return dataclasses.asdict(
         self,
         dict_factory=lambda x: {  # pylint: disable=g-long-lambda
-            k: v
+            k: (dict(v) if isinstance(v, Mapping) and not isinstance(v, dict) else v)
             for (k, v) in x
             # Skip None and empty dict values.
-            if v is not None and not (isinstance(v, dict) and not v)
+            if v is not None and not (isinstance(v, (dict, Mapping)) and not v)
         },
     )
 
 
@@ -205,16 +205,26 @@ def get_quantization_recipe(self) -> ModelQuantizationRecipe:
     Returns:
       A list of quantization configs in the recipe.
     """
-    ret = []
-    for _, scope_config in self._scope_configs.items():
-      for quant_config in scope_config:
-        config = dict()
-        config['regex'] = quant_config.regex
-        config['operation'] = quant_config.operation
-        config['algorithm_key'] = quant_config.algorithm_key
-        config['op_config'] = quant_config.op_config.to_dict()
-        ret.append(config)
-    return ret
+    recipe = []
+    for scope, op_recipes in self._scope_configs.items():
+      for op_recipe in op_recipes:
+        recipe_dict = dataclasses.asdict(
+            op_recipe,
+            dict_factory=lambda x: {  # pylint: disable=g-long-lambda
+                k: (
+                    dict(v)
+                    if isinstance(v, collections.abc.Mapping)
+                    and not isinstance(v, dict)
+                    else v
+                )
+                for (k, v) in x
+                # Skip None and empty dict values.
+                if v is not None
+                and not (isinstance(v, (dict, collections.abc.Mapping)) and not v)
+            },
+        )
+        recipe.append(recipe_dict)
+    return recipe
 
   def load_quantization_recipe(
       self, quantization_recipe: ModelQuantizationRecipe
 
@@ -636,20 +636,20 @@ def test_get_full_quantization_config(self):
     expected_full_quantization_config = [
         {
             'regex': '.*',
-            'operation': '*',
+            'operation': _TFLOpName.ALL_SUPPORTED,
             'algorithm_key': _AlgorithmName.MIN_MAX_UNIFORM_QUANT,
             'op_config': {
                 'activation_tensor_config': {
                     'num_bits': 8,
                     'symmetric': False,
                     'granularity': _QuantGranularity.TENSORWISE,
-                    'dtype': 'INT',
+                    'dtype': _TensorDataType.INT,
                 },
                 'weight_tensor_config': {
                     'num_bits': 8,
                     'symmetric': True,
                     'granularity': _QuantGranularity.TENSORWISE,
-                    'dtype': 'INT',
+                    'dtype': _TensorDataType.INT,
                 },
                 # WEIGHT_ONLY.
                 'compute_precision': _ComputePrecision.INTEGER,
@@ -660,11 +660,11 @@ def test_get_full_quantization_config(self):
         },
         {
             'regex': '.*',
-            'operation': 'BATCH_MATMUL',
+            'operation': _TFLOpName.BATCH_MATMUL,
             'algorithm_key': _AlgorithmName.MIN_MAX_UNIFORM_QUANT,
             'op_config': {
                 'weight_tensor_config': {
-                    'dtype': 'INT',
+                    'dtype': _TensorDataType.INT,
                     'num_bits': 8,
                     'symmetric': True,
                     'granularity': _QuantGranularity.TENSORWISE,
@@ -678,11 +678,11 @@ def test_get_full_quantization_config(self):
         },
         {
             'regex': '.*/Dense/.*',
-            'operation': '*',
+            'operation': _TFLOpName.ALL_SUPPORTED,
             'algorithm_key': _AlgorithmName.MIN_MAX_UNIFORM_QUANT,
             'op_config': {
                 'weight_tensor_config': {
-                    'dtype': 'INT',
+                    'dtype': _TensorDataType.INT,
                     'num_bits': 4,
                     'symmetric': True,
                     'granularity': _QuantGranularity.TENSORWISE,
@@ -696,11 +696,11 @@ def test_get_full_quantization_config(self):
         },
         {
             'regex': '.*/Dense_1/.*',
-            'operation': 'FULLY_CONNECTED',
+            'operation': _TFLOpName.FULLY_CONNECTED,
             'algorithm_key': _AlgorithmName.MIN_MAX_UNIFORM_QUANT,
             'op_config': {
                 'weight_tensor_config': {
-                    'dtype': 'INT',
+                    'dtype': _TensorDataType.INT,
                     'num_bits': 6,
                     'symmetric': True,
                     'granularity': _QuantGranularity.TENSORWISE,
@@ -714,11 +714,11 @@ def test_get_full_quantization_config(self):
         },
         {
             'regex': '.*/Dense_1/.*',
-            'operation': 'BATCH_MATMUL',
+            'operation': _TFLOpName.BATCH_MATMUL,
             'algorithm_key': _AlgorithmName.MIN_MAX_UNIFORM_QUANT,
             'op_config': {
                 'weight_tensor_config': {
-                    'dtype': 'INT',
+                    'dtype': _TensorDataType.INT,
                     'num_bits': 3,
                     'symmetric': True,
                     'granularity': _QuantGranularity.TENSORWISE,
@@ -987,6 +987,28 @@ def test_need_calibration_true(self):
     )
     self.assertTrue(self._recipe_manager.need_calibration())
 
+  def test_get_hadamard_with_max_size(self):
+    self._recipe_manager.add_quantization_config(
+        regex='.*/Dense/.*',
+        operation_name=_TFLOpName.FULLY_CONNECTED,
+        algorithm_key=_AlgorithmName.HADAMARD_ROTATION,
+        op_config=qtyping.OpQuantizationConfig(
+            weight_tensor_config=_TensorQuantConfig(
+                num_bits=8, algorithm_params={'max_hadamard_size': 1024}
+            ),
+            compute_precision=_ComputePrecision.INTEGER,
+        ),
+    )
+    alg_key, op_config = self._recipe_manager.get_quantization_configs(
+        _TFLOpName.FULLY_CONNECTED, 'model/Dense/op'
+    )
+    self.assertEqual(alg_key, _AlgorithmName.HADAMARD_ROTATION)
+    weight_tensor_config = op_config.weight_tensor_config
+    assert weight_tensor_config is not None
+    self.assertEqual(
+        weight_tensor_config.algorithm_params['max_hadamard_size'], 1024
+    )
+
 
 if __name__ == '__main__':
   absltest.main()
@@ -8,13 +8,19 @@
         "num_bits": 16,
         "symmetric": true,
         "granularity": "TENSORWISE",
-        "dtype": "INT"
+        "dtype": "INT",
+        "algorithm_params": {
+          "max_hadamard_size": 0
+        }
       },
       "weight_tensor_config": {
         "num_bits": 8,
         "symmetric": true,
         "granularity": "CHANNELWISE",
-        "dtype": "INT"
+        "dtype": "INT",
+        "algorithm_params": {
+          "max_hadamard_size": 0
+        }
       },
       "compute_precision": "INTEGER",
       "explicit_dequantize": false,