We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ff712f6 commit 0fa8884Copy full SHA for 0fa8884
1 file changed
vllm/model_executor/kernels/linear/scaled_mm/xpu.py
@@ -59,6 +59,9 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
59
replace_parameter(layer, "weight", weight.data.t())
60
# else: already in [in, out] layout — no-op
61
62
+ weight_scale = layer.weight_scale.t().contiguous()
63
+ replace_parameter(layer, "weight_scale", weight_scale.data)
64
+
65
def apply_weights(
66
self,
67
layer: torch.nn.Module,
0 commit comments