Skip to content

Commit dab2d7a

Browse files
authored
Fix sharding config for quant (#73)
fix sharding for quant
1 parent 57eb0e1 commit dab2d7a

File tree

2 files changed

+6
-1
lines changed

2 files changed

+6
-1
lines changed

benchmarks/run_offline.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def run_prefill_time(engine, params, decode_state, seqlen):
136136
}
137137

138138

139-
def main():
139+
def main(argv):
140140
"""Main function to run engine offline."""
141141
engine = create_engine()
142142

default_shardings/llama-2.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
freqs_cis : -1 # torch.complex64 (2048, 64)
88
tok_embeddings.weight : 1 # torch.float32 (32000, 4096)
9+
tok_embeddings.weight_scaler : 0 # torch.bfloat16 (4096,)
910
layers.*.attention.wo.weight : 1 # torch.int8 (4096, 4096)
1011
layers.*.attention.wo.weight_scaler : 0 # torch.bfloat16 (4096,)
1112
layers.*.attention.wq.weight : 0 # torch.int8 (4096, 4096)
@@ -15,9 +16,13 @@ layers.*.attention.wk.weight_scaler : 0 # torch.bfloat16 (4096,)
1516
layers.*.attention.wv.weight : 0 # torch.int8 (4096, 4096)
1617
layers.*.attention.wv.weight_scaler : 0 # torch.bfloat16 (4096,)
1718
layers.*.feed_forward.w1.weight : 0 # torch.float32 (11008, 4096)
19+
layers.*.feed_forward.w1.weight_scaler : 0 # torch.bfloat16 (4096,)
1820
layers.*.feed_forward.w2.weight : 1 # torch.float32 (4096, 11008)
21+
layers.*.feed_forward.w2.weight_scaler : 0 # torch.bfloat16 (11008,)
1922
layers.*.feed_forward.w3.weight : 0 # torch.float32 (11008, 4096)
23+
layers.*.feed_forward.w3.weight_scaler : 0 # torch.bfloat16 (4096,)
2024
layers.*.attention_norm.weight : -1 # torch.float32 (4096,)
2125
layers.*.ffn_norm.weight : -1 # torch.float32 (4096,)
2226
norm.weight : -1 # torch.float32 (4096,)
2327
output.weight : 0 # torch.float32 (32000, 4096)
28+
output.weight_scaler : 0 # torch.float32 (4096,)

0 commit comments

Comments
 (0)