@@ -85,10 +85,16 @@ def __init__(
8585
8686 self .mutex = threading .Lock ()
8787 self .req_output = dict ()
88-
88+ self . master_node_ip = self . llm_engine . config . pod_ips [ 0 ]
8989 self ._receive_output_thread = threading .Thread (
9090 target = self ._receive_output , daemon = True )
9191 self ._receive_output_thread .start ()
92+
93+ def _check_master (self ):
94+ """
95+ Check if the current node is the master node.
96+ """
97+ return self .llm_engine .config ._check_master ()
9298
9399 def _receive_output (self ):
94100 """
@@ -130,6 +136,10 @@ def generate(
130136 Union[str, list[str]]: The generated response.
131137 """
132138
139+ if not self ._check_master ():
140+ err_msg = f"Only master node can accept completion request, please send request to master node: { self .master_node_ip } "
141+ raise ValueError (err_msg )
142+
133143 if sampling_params is None :
134144 sampling_params = self .default_sampling_params
135145
@@ -182,6 +192,11 @@ def chat(
182192 Returns:
183193 Union[str, list[str]]: The generated response.
184194 """
195+
196+ if not self ._check_master ():
197+ err_msg = f"Only master node can accept completion request, please send request to master node: { self .master_node_ip } "
198+ raise ValueError (err_msg )
199+
185200 if sampling_params is None :
186201 sampling_params = self .default_sampling_params
187202
0 commit comments