You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# NOTE(HandH1998): To ensure proper alignment of the block-wise quantization scales, the output_size of the weights for both the gate and up layers must be divisible by block_n.
787
-
# Required by column parallel or enabling merged weights
788
-
ifintermediate_size_per_partition%block_n!=0:
789
-
raiseValueError(
790
-
f"The output_size of gate's and up's weight = "
791
-
f"{intermediate_size_per_partition} is not divisible by "
# NOTE(HandH1998): To ensure proper alignment of the block-wise quantization scales, the output_size of the weights for both the gate and up layers must be divisible by block_n.
803
+
# Required by column parallel or enabling merged weights
804
+
ifintermediate_size_per_partition%block_n!=0:
797
805
raiseValueError(
798
-
f"The input_size of down's weight = "
806
+
f"The output_size of gate's and up's weight = "
799
807
f"{intermediate_size_per_partition} is not divisible by "
800
-
f"weight quantization block_k = {block_k}."
808
+
f"weight quantization block_n = {block_n}."
801
809
)
810
+
iftp_size>1:
811
+
# Required by row parallel
812
+
ifintermediate_size_per_partition%block_k!=0:
813
+
raiseValueError(
814
+
f"The input_size of down's weight = "
815
+
f"{intermediate_size_per_partition} is not divisible by "
f"moe_intermediate_size {moe_intermediate_size} must be divisible by moe_tp_size ({moe_tp_size}) which is tp_size ({self.tp_size}) divided by moe_ep_size ({self.moe_ep_size})."
800
802
)
801
803
802
-
if (moe_intermediate_size//moe_tp_size) %weight_block_size_n!=0:
0 commit comments