Skip to content

Commit ee1ef41

Browse files
gonnetcopybara-github
authored andcommitted
Compute the Hadamard transform via an int4 matrix multiplication.
PiperOrigin-RevId: 868190167
1 parent 96dcc54 commit ee1ef41

File tree

2 files changed

+28
-12
lines changed

2 files changed

+28
-12
lines changed

ai_edge_quantizer/transformations/insert_decomposed_hadamard_rotation.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def _update_fully_connected_consumers(
8888

8989

9090
def _make_hadamard_matrix(size: int):
91-
"""Generates a Hadamard matrix of the given size.
91+
"""Generates an unnormalized integer Hadamard matrix of the given size.
9292
9393
Args:
9494
size: The size of the Hadamard matrix. Must be a power of 2. This represents
@@ -103,12 +103,12 @@ def _make_hadamard_matrix(size: int):
103103
"""
104104
if size <= 0 or (size & (size - 1)) != 0:
105105
raise ValueError('Hadamard matrix size must be a power of 2. ')
106-
h = h2 = np.array([[1, 1], [1, -1]])
106+
h = h2 = np.array([[1, 1], [1, -1]], dtype=np.int8)
107107
current_size = 2
108108
while current_size < size:
109109
h = np.kron(h, h2)
110110
current_size *= 2
111-
return h / np.sqrt(size)
111+
return h
112112

113113

114114
def insert_decomposed_hadamard_rotation(
@@ -191,16 +191,23 @@ def insert_decomposed_hadamard_rotation(
191191
prerorate_reshape_op.outputs = [prerotate_reshape_output_tensor_id]
192192

193193
# Generate hadamard_matrix(hadamard_size).
194-
# We could quantize this to INT4 for better memory efficiency, but for large
195-
# models the memory overhead is not significant, and floating point
194+
# We quantize the Hadamard matrix to INT4 for better memory efficiency, but
195+
# for large models the memory overhead is not significant, and floating point
196196
# computation does seem to result in better accuracy.
197197
hadamard_matrix = _make_hadamard_matrix(hadamard_size)
198198
hadamard_matrix_tensor_id = transformation_utils.add_new_constant_tensor(
199-
tensor.name + b'_hadamard_matrix',
200-
hadamard_matrix.astype(np.float32),
201-
schema_py_generated.TensorType.FLOAT32,
202-
transformation_input.subgraph,
203-
transformation_input.buffers,
199+
tensor_name=tensor.name + b'_hadamard_matrix',
200+
data=transformation_utils.pack_data(
201+
bitwidth=4, flattened_data=hadamard_matrix.flatten()
202+
),
203+
tensor_type=schema_py_generated.TensorType.INT4,
204+
subgraph=transformation_input.subgraph,
205+
buffers=transformation_input.buffers,
206+
tensor_shape=hadamard_matrix.shape,
207+
quantization=schema_py_generated.QuantizationParametersT(
208+
scale=np.array([1.0 / np.sqrt(hadamard_size)], dtype=np.float32),
209+
zeroPoint=[0],
210+
),
204211
)
205212

206213
# Insert x' = tfl.fully_connected(x', hadamard_matrix)

ai_edge_quantizer/transformations/transformation_utils.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ def add_new_constant_tensor(
140140
buffers: list[schema_py_generated.BufferT],
141141
tensor_shape: Optional[list[int]] = None,
142142
force_duplicate_buffer: bool = False,
143+
quantization: schema_py_generated.QuantizationParametersT | None = None,
143144
) -> int:
144145
"""Add a new constant tensor to the model.
145146
@@ -153,6 +154,8 @@ def add_new_constant_tensor(
153154
data will be used.
154155
force_duplicate_buffer: Whether to add a new buffer even if the same buffer
155156
already exists.
157+
quantization: Optional `QuantizationParametersT` describing the quantization
158+
of this tensor.
156159
157160
Returns:
158161
The index of the new tensor in the subgraph.
@@ -166,6 +169,7 @@ def add_new_constant_tensor(
166169
new_tensor.buffer = new_buffer_id
167170
new_tensor.type = tensor_type
168171
new_tensor.name = tensor_name
172+
new_tensor.quantization = quantization
169173
new_tensor_id = len(subgraph.tensors)
170174
subgraph.tensors.append(new_tensor)
171175
return new_tensor_id
@@ -176,6 +180,7 @@ def add_new_activation_tensor(
176180
shape: list[int],
177181
tensor_type: schema_py_generated.TensorType,
178182
subgraph: schema_py_generated.SubGraphT,
183+
quantization: schema_py_generated.QuantizationParametersT | None = None,
179184
) -> int:
180185
"""Add a new activation tensor to the model.
181186
@@ -184,6 +189,8 @@ def add_new_activation_tensor(
184189
shape: The shape of the new tensor.
185190
tensor_type: The type of the new tensor.
186191
subgraph: The subgraph where the new tensor is added.
192+
quantization: Optional `QuantizationParametersT` describing the quantization
193+
of this tensor.
187194
188195
Returns:
189196
The index of the new tensor in the subgraph.
@@ -199,6 +206,7 @@ def add_new_activation_tensor(
199206
new_tensor.shape = shape
200207
new_tensor.type = tensor_type
201208
new_tensor.name = tensor_name
209+
new_tensor.quantization = quantization
202210
new_tensor.buffer = 0
203211
new_tensor_id = len(subgraph.tensors)
204212
subgraph.tensors.append(new_tensor)
@@ -226,8 +234,9 @@ def pack_data(bitwidth: int, flattened_data: np.ndarray) -> np.ndarray:
226234
Packed data.
227235
"""
228236
if bitwidth == 4:
229-
even_data = flattened_data[::2] & 0x0F
230-
odd_data = np.left_shift(flattened_data[1::2], 4).astype(np.uint8)
237+
flattened_data = np.bitwise_and(flattened_data.astype(np.uint8), 0x0F)
238+
even_data = flattened_data[::2]
239+
odd_data = np.left_shift(flattened_data[1::2], 4)
231240
if odd_data.shape[0] == even_data.shape[0] - 1:
232241
odd_data = np.pad(odd_data, (0, 1), constant_values=0)
233242
return np.bitwise_or(even_data, odd_data)

0 commit comments

Comments
 (0)