Fix sharding config for quant (#73)

lsy323 · web-flow · commit dab2d7a52ad2 · 2024-05-09T15:52:36.000-07:00
fix sharding for quant
diff --git a/benchmarks/run_offline.py b/benchmarks/run_offline.py
@@ -136,7 +136,7 @@ def run_prefill_time(engine, params, decode_state, seqlen):
 }
 
 
-def main():
+def main(argv):
   """Main function to run engine offline."""
   engine = create_engine()
 
diff --git a/default_shardings/llama-2.yaml b/default_shardings/llama-2.yaml
@@ -6,6 +6,7 @@
 
 freqs_cis : -1 #  torch.complex64 (2048, 64)
 tok_embeddings.weight : 1 #  torch.float32 (32000, 4096)
+tok_embeddings.weight_scaler : 0 #  torch.bfloat16 (4096,)
 layers.*.attention.wo.weight : 1 #  torch.int8 (4096, 4096)
 layers.*.attention.wo.weight_scaler : 0 #  torch.bfloat16 (4096,)
 layers.*.attention.wq.weight : 0 #  torch.int8 (4096, 4096)
@@ -15,9 +16,13 @@ layers.*.attention.wk.weight_scaler : 0 #  torch.bfloat16 (4096,)
 layers.*.attention.wv.weight : 0 #  torch.int8 (4096, 4096)
 layers.*.attention.wv.weight_scaler : 0 #  torch.bfloat16 (4096,)
 layers.*.feed_forward.w1.weight : 0 #  torch.float32 (11008, 4096)
+layers.*.feed_forward.w1.weight_scaler : 0  # torch.bfloat16 (4096,)
 layers.*.feed_forward.w2.weight : 1 #  torch.float32 (4096, 11008)
+layers.*.feed_forward.w2.weight_scaler : 0 # torch.bfloat16 (11008,)
 layers.*.feed_forward.w3.weight : 0 #  torch.float32 (11008, 4096)
+layers.*.feed_forward.w3.weight_scaler : 0 # torch.bfloat16 (4096,)
 layers.*.attention_norm.weight : -1 #  torch.float32 (4096,)
 layers.*.ffn_norm.weight : -1 #  torch.float32 (4096,)
 norm.weight : -1 #  torch.float32 (4096,)
 output.weight : 0 #  torch.float32 (32000, 4096)
+output.weight_scaler : 0 #  torch.float32 (4096,)

Original file line number	Diff line number	Diff line change
`@@ -136,7 +136,7 @@ def run_prefill_time(engine, params, decode_state, seqlen):`
`136`	`136`	`}`
`137`	`137`
`138`	`138`
`139`		`-def main():`
	`139`	`+def main(argv):`
`140`	`140`	`"""Main function to run engine offline."""`
`141`	`141`	`engine = create_engine()`
`142`	`142`