Skip to content

Commit 07f6949

Browse files
committed
[CI] Optimize port cleanup logic
1 parent 08ca0f6 commit 07f6949

File tree

12 files changed

+354
-248
lines changed

12 files changed

+354
-248
lines changed

.github/workflows/_pre_ce_test.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ jobs:
9292
echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}"
9393
echo "FD_METRICS_PORT=${FD_METRICS_PORT}"
9494
echo "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}"
95+
echo "FD_ZMQ_RECV_REQUEST_SERVER_PORT=${FD_ZMQ_RECV_REQUEST_SERVER_PORT}"
96+
echo "FD_ZMQ_SEND_RESPONSE_SERVER_PORT=${FD_ZMQ_SEND_RESPONSE_SERVER_PORT}"
97+
echo "FD_ZMQ_CONTROL_CMD_SERVER_PORTS=${FD_ZMQ_CONTROL_CMD_SERVER_PORTS}"
9598
echo "DEVICES=${DEVICES}"
9699
echo "========================================================="
97100
@@ -141,6 +144,9 @@ jobs:
141144
-e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
142145
-e "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}" \
143146
-e "FLASK_PORT=${FLASK_PORT}" \
147+
-e "FD_ZMQ_RECV_REQUEST_SERVER_PORT=${FD_ZMQ_RECV_REQUEST_SERVER_PORT}" \
148+
-e "FD_ZMQ_SEND_RESPONSE_SERVER_PORT=${FD_ZMQ_SEND_RESPONSE_SERVER_PORT}" \
149+
-e "FD_ZMQ_CONTROL_CMD_SERVER_PORTS=${FD_ZMQ_CONTROL_CMD_SERVER_PORTS}" \
144150
-e "fd_wheel_url=${fd_wheel_url}" \
145151
--gpus "\"device=${DEVICES}\"" ${docker_image} /bin/bash -c '
146152
git config --global --add safe.directory /workspace/FastDeploy

.github/workflows/_unit_test_coverage.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,13 +103,17 @@ jobs:
103103
FD_ENGINE_QUEUE_PORT=$((42058 + DEVICE_PORT * 100))
104104
FD_METRICS_PORT=$((42078 + DEVICE_PORT * 100))
105105
FD_CACHE_QUEUE_PORT=$((42098 + DEVICE_PORT * 100))
106+
FD_ROUTER_PORT=$((42048 + DEVICE_PORT * 100))
107+
FD_CONNECTOR_PORT=$((42038 + DEVICE_PORT * 100))
106108
echo "Test ENV Parameter:"
107109
echo "========================================================="
108110
echo "FLASK_PORT=${FLASK_PORT}"
109111
echo "FD_API_PORT=${FD_API_PORT}"
110112
echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}"
111113
echo "FD_METRICS_PORT=${FD_METRICS_PORT}"
112114
echo "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}"
115+
echo "FD_ROUTER_PORT=${FD_ROUTER_PORT}"
116+
echo "FD_CONNECTOR_PORT=${FD_CONNECTOR_PORT}"
113117
echo "DEVICES=${DEVICES}"
114118
echo "========================================================="
115119
@@ -159,6 +163,8 @@ jobs:
159163
-e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
160164
-e "FLASK_PORT=${FLASK_PORT}" \
161165
-e "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}" \
166+
-e "FD_ROUTER_PORT=${FD_ROUTER_PORT}" \
167+
-e "FD_CONNECTOR_PORT=${FD_CONNECTOR_PORT}" \
162168
-e TZ="Asia/Shanghai" \
163169
-e "fd_wheel_url=${fd_wheel_url}" \
164170
-e "BASE_REF=${BASE_REF}" \

tests/ci_use/EB_Lite/test_EB_Lite_serving.py

Lines changed: 11 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import re
1818
import shutil
1919
import signal
20-
import socket
2120
import subprocess
2221
import sys
2322
import time
@@ -26,49 +25,17 @@
2625
import pytest
2726
import requests
2827

29-
# Read ports from environment variables; use default values if not set
30-
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
31-
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
32-
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
33-
FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8234))
34-
35-
# List of ports to clean before and after tests
36-
PORTS_TO_CLEAN = [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT, FD_CACHE_QUEUE_PORT]
37-
38-
39-
def is_port_open(host: str, port: int, timeout=1.0):
40-
"""
41-
Check if a TCP port is open on the given host.
42-
Returns True if connection succeeds, False otherwise.
43-
"""
44-
try:
45-
with socket.create_connection((host, port), timeout):
46-
return True
47-
except Exception:
48-
return False
49-
50-
51-
def kill_process_on_port(port: int):
52-
"""
53-
Kill processes that are listening on the given port.
54-
Uses `lsof` to find process ids and sends SIGKILL.
55-
"""
56-
try:
57-
output = subprocess.check_output(f"lsof -i:{port} -t", shell=True).decode().strip()
58-
for pid in output.splitlines():
59-
os.kill(int(pid), signal.SIGKILL)
60-
print(f"Killed process on port {port}, pid={pid}")
61-
except subprocess.CalledProcessError:
62-
pass
63-
64-
65-
def clean_ports():
66-
"""
67-
Kill all processes occupying the ports listed in PORTS_TO_CLEAN.
68-
"""
69-
for port in PORTS_TO_CLEAN:
70-
kill_process_on_port(port)
71-
time.sleep(2)
28+
tests_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
29+
sys.path.insert(0, tests_dir)
30+
31+
from e2e.utils.serving_utils import (
32+
FD_API_PORT,
33+
FD_CACHE_QUEUE_PORT,
34+
FD_ENGINE_QUEUE_PORT,
35+
FD_METRICS_PORT,
36+
clean_ports,
37+
is_port_open,
38+
)
7239

7340

7441
@pytest.fixture(scope="session", autouse=True)

tests/ci_use/EB_Lite_with_adapter/test_eblite_serving.py

Lines changed: 48 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -90,38 +90,71 @@ def is_port_open(host: str, port: int, timeout=1.0):
9090
def kill_process_on_port(port: int):
9191
"""
9292
Kill processes that are listening on the given port.
93-
Uses `lsof` to find process ids and sends SIGKILL.
93+
Uses multiple methods to ensure thorough cleanup.
9494
"""
95+
current_pid = os.getpid()
96+
parent_pid = os.getppid()
97+
98+
# Method 1: Use lsof to find processes
9599
try:
96100
output = subprocess.check_output(f"lsof -i:{port} -t", shell=True).decode().strip()
97101
for pid in output.splitlines():
98-
os.kill(int(pid), signal.SIGKILL)
99-
print(f"Killed process on port {port}, pid={pid}")
102+
pid = int(pid)
103+
if pid in (current_pid, parent_pid):
104+
print(f"Skip killing current process (pid={pid}) on port {port}")
105+
continue
106+
try:
107+
# First try SIGTERM for graceful shutdown
108+
os.kill(pid, signal.SIGTERM)
109+
time.sleep(1)
110+
# Then SIGKILL if still running
111+
os.kill(pid, signal.SIGKILL)
112+
print(f"Killed process on port {port}, pid={pid}")
113+
except ProcessLookupError:
114+
pass # Process already terminated
100115
except subprocess.CalledProcessError:
101116
pass
102117

118+
# Method 2: Use netstat and fuser as backup
103119
try:
104-
result = subprocess.run(
105-
f"ps -ef -ww| grep {FD_CACHE_QUEUE_PORT} | grep -v grep", shell=True, capture_output=True, text=True
106-
)
107-
for line in result.stdout.strip().split("\n"):
108-
if not line:
109-
continue
110-
parts = line.split()
111-
pid = int(parts[1]) # ps -ef 的第二列是 PID
112-
print(f"Killing PID: {pid}")
113-
os.kill(pid, signal.SIGKILL)
114-
except Exception as e:
115-
print(f"Failed to kill cache manager process: {e}")
120+
# Find processes using netstat and awk
121+
cmd = f"netstat -tulpn 2>/dev/null | grep :{port} | awk '{{print $7}}' | cut -d'/' -f1"
122+
output = subprocess.check_output(cmd, shell=True).decode().strip()
123+
for pid in output.splitlines():
124+
if pid and pid.isdigit():
125+
pid = int(pid)
126+
if pid in (current_pid, parent_pid):
127+
continue
128+
try:
129+
os.kill(pid, signal.SIGKILL)
130+
print(f"Killed process (netstat) on port {port}, pid={pid}")
131+
except ProcessLookupError:
132+
pass
133+
except (subprocess.CalledProcessError, FileNotFoundError):
134+
pass
135+
136+
# Method 3: Use fuser if available
137+
try:
138+
subprocess.run(f"fuser -k {port}/tcp", shell=True, timeout=5)
139+
except (subprocess.TimeoutExpired, subprocess.CalledProcessError, FileNotFoundError):
140+
pass
116141

117142

118143
def clean_ports():
119144
"""
120145
Kill all processes occupying the ports listed in PORTS_TO_CLEAN.
121146
"""
147+
print(f"Cleaning ports: {PORTS_TO_CLEAN}")
122148
for port in PORTS_TO_CLEAN:
123149
kill_process_on_port(port)
150+
151+
# Double check and retry if ports are still in use
124152
time.sleep(2)
153+
for port in PORTS_TO_CLEAN:
154+
if is_port_open("127.0.0.1", port, timeout=0.1):
155+
print(f"Port {port} still in use, retrying cleanup...")
156+
kill_process_on_port(port)
157+
time.sleep(1)
125158

126159

127160
@pytest.fixture(scope="session", autouse=True)

tests/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py

Lines changed: 10 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import os
1717
import re
1818
import signal
19-
import socket
2019
import subprocess
2120
import sys
2221
import time
@@ -25,53 +24,21 @@
2524
import pytest
2625
import requests
2726

28-
# Read ports from environment variables; use default values if not set
29-
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
30-
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
31-
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
32-
FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8234))
27+
tests_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
28+
sys.path.insert(0, tests_dir)
3329

34-
# List of ports to clean before and after tests
35-
PORTS_TO_CLEAN = [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT, FD_CACHE_QUEUE_PORT]
30+
from e2e.utils.serving_utils import (
31+
FD_API_PORT,
32+
FD_CACHE_QUEUE_PORT,
33+
FD_ENGINE_QUEUE_PORT,
34+
FD_METRICS_PORT,
35+
clean_ports,
36+
is_port_open,
37+
)
3638

3739
os.environ["FD_USE_MACHETE"] = "0"
3840

3941

40-
def is_port_open(host: str, port: int, timeout=1.0):
41-
"""
42-
Check if a TCP port is open on the given host.
43-
Returns True if connection succeeds, False otherwise.
44-
"""
45-
try:
46-
with socket.create_connection((host, port), timeout):
47-
return True
48-
except Exception:
49-
return False
50-
51-
52-
def kill_process_on_port(port: int):
53-
"""
54-
Kill processes that are listening on the given port.
55-
Uses `lsof` to find process ids and sends SIGKILL.
56-
"""
57-
try:
58-
output = subprocess.check_output(f"lsof -i:{port} -t", shell=True).decode().strip()
59-
for pid in output.splitlines():
60-
os.kill(int(pid), signal.SIGKILL)
61-
print(f"Killed process on port {port}, pid={pid}")
62-
except subprocess.CalledProcessError:
63-
pass
64-
65-
66-
def clean_ports():
67-
"""
68-
Kill all processes occupying the ports listed in PORTS_TO_CLEAN.
69-
"""
70-
for port in PORTS_TO_CLEAN:
71-
kill_process_on_port(port)
72-
time.sleep(2)
73-
74-
7542
@pytest.fixture(scope="session", autouse=True)
7643
def setup_and_run_server():
7744
"""

tests/ci_use/Qwen2-7B-Instruct_serving/test_Qwen2-7B-Instruct_serving.py

Lines changed: 11 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -27,49 +27,17 @@
2727
import requests
2828
from jsonschema import validate
2929

30-
# Read ports from environment variables; use default values if not set
31-
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
32-
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
33-
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
34-
FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8333))
35-
36-
# List of ports to clean before and after tests
37-
PORTS_TO_CLEAN = [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT, FD_CACHE_QUEUE_PORT]
38-
39-
40-
def is_port_open(host: str, port: int, timeout=1.0):
41-
"""
42-
Check if a TCP port is open on the given host.
43-
Returns True if connection succeeds, False otherwise.
44-
"""
45-
try:
46-
with socket.create_connection((host, port), timeout):
47-
return True
48-
except Exception:
49-
return False
50-
51-
52-
def kill_process_on_port(port: int):
53-
"""
54-
Kill processes that are listening on the given port.
55-
Uses `lsof` to find process ids and sends SIGKILL.
56-
"""
57-
try:
58-
output = subprocess.check_output(f"lsof -i:{port} -t", shell=True).decode().strip()
59-
for pid in output.splitlines():
60-
os.kill(int(pid), signal.SIGKILL)
61-
print(f"Killed process on port {port}, pid={pid}")
62-
except subprocess.CalledProcessError:
63-
pass
64-
65-
66-
def clean_ports():
67-
"""
68-
Kill all processes occupying the ports listed in PORTS_TO_CLEAN.
69-
"""
70-
for port in PORTS_TO_CLEAN:
71-
kill_process_on_port(port)
72-
time.sleep(2)
30+
tests_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
31+
sys.path.insert(0, tests_dir)
32+
33+
from e2e.utils.serving_utils import (
34+
FD_API_PORT,
35+
FD_CACHE_QUEUE_PORT,
36+
FD_ENGINE_QUEUE_PORT,
37+
FD_METRICS_PORT,
38+
clean_ports,
39+
is_port_open,
40+
)
7341

7442

7543
@pytest.fixture(scope="session", autouse=True)

tests/ci_use/Qwen2_5_VL/test_Qwen2_5_VL_serving.py

Lines changed: 11 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -25,47 +25,17 @@
2525
import pytest
2626
import requests
2727

28-
# Read ports from environment variables; use default values if not set
29-
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
30-
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
31-
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
32-
33-
# List of ports to clean before and after tests
34-
PORTS_TO_CLEAN = [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT]
35-
36-
37-
def is_port_open(host: str, port: int, timeout=1.0):
38-
"""
39-
Check if a TCP port is open on the given host.
40-
Returns True if connection succeeds, False otherwise.
41-
"""
42-
try:
43-
with socket.create_connection((host, port), timeout):
44-
return True
45-
except Exception:
46-
return False
47-
48-
49-
def kill_process_on_port(port: int):
50-
"""
51-
Kill processes that are listening on the given port.
52-
Uses `lsof` to find process ids and sends SIGKILL.
53-
"""
54-
try:
55-
output = subprocess.check_output(f"lsof -i:{port} -t", shell=True).decode().strip()
56-
for pid in output.splitlines():
57-
os.kill(int(pid), signal.SIGKILL)
58-
print(f"Killed process on port {port}, pid={pid}")
59-
except subprocess.CalledProcessError:
60-
pass
61-
62-
63-
def clean_ports():
64-
"""
65-
Kill all processes occupying the ports listed in PORTS_TO_CLEAN.
66-
"""
67-
for port in PORTS_TO_CLEAN:
68-
kill_process_on_port(port)
28+
tests_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
29+
sys.path.insert(0, tests_dir)
30+
31+
from e2e.utils.serving_utils import (
32+
FD_API_PORT,
33+
FD_CACHE_QUEUE_PORT,
34+
FD_ENGINE_QUEUE_PORT,
35+
FD_METRICS_PORT,
36+
clean_ports,
37+
is_port_open,
38+
)
6939

7040

7141
@pytest.fixture(scope="session", autouse=True)

0 commit comments

Comments
 (0)