perf: Optimize task queue communication from engine to worker (PaddlePaddle#4531)

sunlei1024 · Jiang-Jia-Jun · kevincheng2 · commit 87774497096d · 2025-10-27T11:15:50.000+08:00
* perf: Optimize task queue communication from engine to worker

* perf: get_tasks to numpy

* perf: get_tasks remove to_numpy

* fix: request &amp; replace ENV

* remove test_e2w_perf.py

* fix code style

---------

Co-authored-by: Jiang-Jia-Jun &lt;163579578+Jiang-Jia-Jun@users.noreply.github.com&gt;
diff --git a/fastdeploy/engine/request.py b/fastdeploy/engine/request.py
@@ -24,6 +24,7 @@
 import numpy as np
 from typing_extensions import TypeVar
 
+from fastdeploy import envs
 from fastdeploy.engine.pooling_params import PoolingParams
 from fastdeploy.engine.sampling_params import SamplingParams
 from fastdeploy.entrypoints.openai.protocol import ToolCall
@@ -291,11 +292,20 @@ def set(self, key, value):
             setattr(self, key, value)
 
     def __repr__(self) -> str:
-        non_none_fields = []
-        for attr, value in vars(self).items():
-            if value is not None and not attr.startswith("_"):
-                non_none_fields.append(f"{attr}={value!r}")
-        return f"Request({', '.join(non_none_fields)})"
+        """Safe string representation that ignores private and None fields."""
+        try:
+            if not envs.FD_DEBUG:
+                return f"Request(request_id={self.request_id})"
+            else:
+                attrs_snapshot = dict(vars(self))
+                non_none_fields = [
+                    f"{attr}={value!r}"
+                    for attr, value in attrs_snapshot.items()
+                    if value is not None and not attr.startswith("_")
+                ]
+                return f"Request({', '.join(non_none_fields)})"
+        except Exception as e:
+            return f"<{self.__class__.__name__} repr failed: {e}>"
 
 
 @dataclass(slots=True)
diff --git a/fastdeploy/inter_communicator/engine_worker_queue.py b/fastdeploy/inter_communicator/engine_worker_queue.py
@@ -27,7 +27,9 @@
 from typing import Any, List, Tuple
 
 import numpy as np
+import paddle
 
+from fastdeploy import envs
 from fastdeploy.utils import llm_logger
 
 
@@ -294,6 +296,49 @@ def _connect_with_retry(self, max_retries: int = 5, interval: int = 3) -> None:
                 time.sleep(interval)
         raise ConnectionError(f"TaskQueue cannot connect {self.address}")
 
+    @staticmethod
+    def to_tensor(tasks):
+        """
+        Convert NumPy arrays in multimodal inputs to PaddlePaddle tensors.
+
+        Args:
+            tasks: List of tasks containing multimodal inputs.
+        """
+        try:
+            if envs.FD_ENABLE_MAX_PREFILL:
+                llm_logger.debug(f"Convert image to tensor, type: {type(tasks)}")
+                batch_tasks, _ = tasks
+                for task in batch_tasks:
+                    if not hasattr(task, "multimodal_inputs"):
+                        continue
+                    images = task.multimodal_inputs["images"]
+                    if isinstance(images, np.ndarray):
+                        llm_logger.debug(f"Convert image to tensor, shape: {images.shape}")
+                        task.multimodal_inputs["images"] = paddle.to_tensor(images)
+        except Exception as e:
+            llm_logger.warning(f"Failed to convert to tensor: {e}")
+
+    @staticmethod
+    def to_numpy(tasks):
+        """
+        Convert PaddlePaddle tensors in multimodal inputs to NumPy arrays.
+
+        Args:
+            tasks: List of tasks containing multimodal inputs.
+        """
+        try:
+            if envs.FD_ENABLE_MAX_PREFILL:
+                for batch_tasks, _ in tasks:
+                    for task in batch_tasks:
+                        if not hasattr(task, "multimodal_inputs"):
+                            continue
+                        images = task.multimodal_inputs.get("images", None)
+                        if isinstance(images, paddle.Tensor):
+                            llm_logger.debug(f"Convert image to numpy, shape: {images.shape}")
+                            task.multimodal_inputs["images"] = images.numpy()
+        except Exception as e:
+            llm_logger.warning(f"Failed to convert to numpy: {e}")
+
     def put_tasks(self, tasks: List[Any]) -> None:
         """
         Add tasks to the shared queue in a thread-safe manner.
@@ -308,6 +353,9 @@ def put_tasks(self, tasks: List[Any]) -> None:
             time.sleep(0.001)
             self.lock.acquire()
 
+        # 多模态输入转换为张量
+        EngineWorkerQueue.to_tensor(tasks)
+
         self.tasks[:] = list()
         self.client_read_flag[:] = [0] * self.num_client
         self.tasks.append(tasks)
@@ -322,7 +370,11 @@ def get_tasks(self) -> Tuple[List[Any], bool]:
         """
         tasks: List[Any] = list()
         self.lock.acquire()
+
         tasks.extend(self.tasks)
+        # 多模态输入转换为numpy
+        # EngineWorkerQueue.to_numpy(tasks)
+
         self.client_read_flag[self.client_id] = 1
         all_client_read: bool = np.sum(self.client_read_flag) == self.num_client
         if all_client_read:
diff --git a/tests/inter_communicator/test_e2w_queue.py b/tests/inter_communicator/test_e2w_queue.py
@@ -0,0 +1,147 @@
+"""
+# Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+import unittest
+
+import numpy as np
+import paddle
+
+from fastdeploy import envs
+from fastdeploy.inter_communicator.engine_worker_queue import EngineWorkerQueue
+
+
+class DummyTask:
+    def __init__(self, images):
+        self.multimodal_inputs = {"images": images}
+
+
+class TestEngineWorkerQueue(unittest.TestCase):
+    def test_to_tensor_success(self):
+        envs.FD_ENABLE_MAX_PREFILL = 1
+        # 模拟 numpy 数组输入（使用 paddle 转 numpy）
+        np_images = paddle.randn([2, 3, 224, 224]).numpy()
+        task = DummyTask(np_images)
+        tasks = ([task], 1)
+
+        EngineWorkerQueue.to_tensor(tasks)
+
+        # 验证已转换为tensor
+        self.assertIsInstance(task.multimodal_inputs["images"], paddle.Tensor)
+
+    def test_to_tensor_disabled(self):
+        envs.FD_ENABLE_MAX_PREFILL = 0
+        # 模拟 numpy 数组输入（使用 paddle 转 numpy）
+        np_images = paddle.randn([2, 3, 224, 224]).numpy()
+        task = DummyTask(np_images)
+        tasks = ([task], 1)
+
+        EngineWorkerQueue.to_tensor(tasks)
+
+        # 验证已转换为tensor
+        self.assertIsInstance(task.multimodal_inputs["images"], np.ndarray)
+
+    def test_to_tensor_no_multimodal_inputs(self):
+        class NoMMTask:
+            pass
+
+        task = NoMMTask()
+        tasks = ([task], 1)
+
+        # 不应抛异常
+        try:
+            EngineWorkerQueue.to_tensor(tasks)
+        except Exception as e:
+            self.fail(f"Unexpected exception raised: {e}")
+
+    def test_to_tensor_exception_handling(self):
+        bad_task = DummyTask(images="not an array")
+        bad_tasks = ([bad_task], 1)
+
+        try:
+            EngineWorkerQueue.to_tensor(bad_tasks)
+        except Exception as e:
+            self.fail(f"Exception should be handled internally, but got: {e}")
+
+    def test_to_numpy_success(self):
+        envs.FD_ENABLE_MAX_PREFILL = 1
+        # 构造 paddle.Tensor 输入
+        tensor_images = paddle.randn([2, 3, 224, 224])
+        task = DummyTask(tensor_images)
+        tasks = [([task], 1)]
+
+        EngineWorkerQueue.to_numpy(tasks)
+
+        # 验证转换为 numpy.ndarray
+        self.assertIsInstance(task.multimodal_inputs["images"], np.ndarray)
+
+    def test_to_numpy_disabled(self):
+        # 禁用张量转换开关
+        envs.FD_ENABLE_MAX_PREFILL = 0
+        # 创建随机张量作为测试输入
+        tensor_images = paddle.randn([2, 3, 224, 224])
+        # 创建模拟任务
+        task = DummyTask(tensor_images)
+        tasks = [([task], 1)]
+
+        # 调用转换方法(预期不会转换)
+        EngineWorkerQueue.to_numpy(tasks)
+
+        # 因为开关关闭，应仍为 Tensor
+        self.assertIsInstance(task.multimodal_inputs["images"], paddle.Tensor)
+
+    def test_to_numpy_no_multimodal_inputs(self):
+        class NoMMTask:
+            pass
+
+        task = NoMMTask()
+        tasks = [([task], 1)]
+
+        # 不应抛异常
+        try:
+            EngineWorkerQueue.to_numpy(tasks)
+        except Exception as e:
+            self.fail(f"Unexpected exception raised: {e}")
+
+    def test_to_numpy_non_tensor_input(self):
+        envs.FD_ENABLE_MAX_PREFILL = 1
+        np_images = np.random.randn(2, 3, 224, 224)
+        task = DummyTask(np_images)
+        tasks = [([task], 1)]
+
+        EngineWorkerQueue.to_numpy(tasks)
+
+        # 非 Tensor 输入应保持为 numpy 数组
+        self.assertIsInstance(task.multimodal_inputs["images"], np.ndarray)
+
+    def test_to_numpy_exception_handling(self):
+        envs.FD_ENABLE_MAX_PREFILL = 1
+
+        # 构造错误输入（让 .numpy() 抛异常）
+        class BadTensor:
+            def numpy(self):
+                raise RuntimeError("mock error")
+
+        bad_task = DummyTask(images=BadTensor())
+        bad_tasks = [([bad_task], 1)]
+
+        try:
+            EngineWorkerQueue.to_numpy(bad_tasks)
+        except Exception as e:
+            self.fail(f"Exception should be handled internally, but got: {e}")
+
+
+if __name__ == "__main__":
+    unittest.main()