PaddlePaddle · luotao1 · Sep 22, 2025 · Sep 19, 2025 · Sep 21, 2025 · Sep 21, 2025
diff --git a/benchmarks/benchmark_mtp.py b/benchmarks/benchmark_mtp.py
@@ -98,7 +98,7 @@ def main(args):
         raise ValueError("--max_concurrency should be same length as --s_itl_base_model")
 
     for max_concurrency, s_itl in zip(args.max_concurrency, args.s_itl_base_model):
-        # Wramup
+        # Warmup
         print("Starting warmup...")
         with open(os.devnull, "w") as f:
             with contextlib.redirect_stdout(f):

diff --git a/custom_ops/gpu_ops/custom_all_reduce/all_reduce.cuh b/custom_ops/gpu_ops/custom_all_reduce/all_reduce.cuh
@@ -303,7 +303,7 @@ class CustomAllreduce {
   bool full_nvlink_;
 
   RankSignals sg_;
-  // Stores an map from a pointer to its peer pointters from all ranks.
+  // Stores an map from a pointer to its peer pointers from all ranks.
   std::unordered_map<void*, RankData*> buffers_;
   Signal* self_sg_;
 

diff --git a/docs/get_started/installation/nvidia_gpu.md b/docs/get_started/installation/nvidia_gpu.md
@@ -10,7 +10,7 @@ The following installation methods are available when your environment meets the
 
 ## 1. Pre-built Docker Installation (Recommended)
 
-**Notice**: The pre-built image only supports SM80/90 GPU(e.g. H800/A800)，if you are deploying on SM86/89GPU(L40/4090/L20), please reinstall ```fastdpeloy-gpu``` after you create the container.
+**Notice**: The pre-built image only supports SM80/90 GPU(e.g. H800/A800)，if you are deploying on SM86/89GPU(L40/4090/L20), please reinstall ```fastdeploy-gpu``` after you create the container.
 
 ```shell
 docker pull ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-cuda-12.6:2.2.0

diff --git a/docs/usage/code_overview.md b/docs/usage/code_overview.md
@@ -20,6 +20,6 @@ Below is an overview of the FastDeploy code structure and functionality organize
   - ```platforms```: Platform-specific modules for underlying hardware support.
   - ```scheduler```: Request scheduling module for large models.
   - ```metrics```: Core component for collecting, managing, and exporting Prometheus metrics, tracking key runtime performance data (e.g., request latency, resource utilization, successful request counts).
-  - ```splitwise```: Modules related to PD disaggragation deployment.
+  - ```splitwise```: Modules related to PD disaggregation deployment.
 - ```scripts```/```tools```: Utility scripts for FastDeploy operations (e.g., compilation, unit testing, code style fixes).
 - ```test```: Code for unit testing and validation.
diff --git a/docs/usage/log.md b/docs/usage/log.md
@@ -30,7 +30,7 @@ By default, logs are stored in the `log` directory under the execution path. To
 * `cache_transfer_manager.log` : Logs startup parameters and received request information.
 * `launch_cache_manager.log` : Records cache transfer startup parameters and error messages.
 
-## PD Disaggragation Logs
+## PD Disaggregation Logs
 * `cache_messager.log` : Logs transmission protocols and messages used by the P instance.
 * `splitwise_connector.log` : Records data received from P/D instances and connection establishment details.
 

diff --git a/mkdocs.yml b/mkdocs.yml
@@ -1,4 +1,4 @@
-site_name: 'FastDeploy : Large Language Model Deployement'
+site_name: 'FastDeploy : Large Language Model Deployment'
 repo_url: https://github.com/PaddlePaddle/FastDeploy
 repo_name: FastDeploy
 
@@ -36,7 +36,7 @@ plugins:
         - locale: en
           default: true
           name: English
-          site_name: 'FastDeploy: Large Language Model Deployement'
+          site_name: 'FastDeploy: Large Language Model Deployment'
           build: true
           link: /FastDeploy/
         - locale: zh
@@ -59,7 +59,7 @@ plugins:
             ERNIE-4.5-VL-424B-A47B: ERNIE-4.5-VL-424B-A47B快速部署
             Quick Deployment For QWEN: Qwen3-0.6b快速部署
             Online Serving: 在线服务
-            OpenAI-Compitable API Server: 兼容 OpenAI 协议的服务化部署
+            OpenAI-Compatible API Server: 兼容 OpenAI 协议的服务化部署
             Monitor Metrics: 监控Metrics
             Scheduler: 调度器
             Graceful Shutdown: 服务优雅关闭
@@ -114,7 +114,7 @@ nav:
       - ERNIE-4.5-VL-424B-A47B: get_started/ernie-4.5-vl.md
       - Quick Deployment For QWEN: get_started/quick_start_qwen.md
   - Online Serving:
-      - OpenAI-Compitable API Server: online_serving/README.md
+      - OpenAI-Compatible API Server: online_serving/README.md
       - Monitor Metrics: online_serving/metrics.md
       - Scheduler: online_serving/scheduler.md
       - Graceful Shutdown: online_serving/graceful_shutdown_service.md

diff --git a/tests/ce/server/test_return_token_ids.py b/tests/ce/server/test_return_token_ids.py
@@ -9,12 +9,7 @@
 
 import json
 
-from core import (
-    TEMPLATE,
-    URL,
-    build_request_payload,
-    send_request,
-)
+from core import TEMPLATE, URL, build_request_payload, send_request
 
 COMPLETIONS_URL = URL.replace("/v1/chat/completions", "/v1/completions")
 
@@ -29,17 +24,17 @@ def test_completion_stream_text_after_process_raw_prediction():
         "stream": True,
         "stream_options": {"include_usage": True, "continuous_usage_stats": True},
         "max_tokens": 50,
-        "return_token_ids": True
+        "return_token_ids": True,
     }
-    
+
     payload = build_request_payload(TEMPLATE, data)
     resp = send_request(COMPLETIONS_URL, payload, stream=True)
     for line in resp.iter_lines(decode_unicode=True):
         if line.strip() == "data: [DONE]":
             break
         if line.strip() == "" or not line.startswith("data: "):
             continue
-        line = line[len("data: "):]
+        line = line[len("data: ") :]
         response_data = json.loads(line)
 
         choice = response_data["choices"][0]
@@ -51,21 +46,16 @@ def test_completion_stream_text_after_process_raw_prediction():
             reasoning_content = choice["reasoning_content"]
             text = choice["text"]
             assert reasoning_content or text in raw_prediction, "raw_prediction取值结果不正确"
-        if "finish_reason" in line.strip() :
+        if "finish_reason" in line.strip():
             break
 
-    
-def test_completion_text_after_process_raw_predictio_return_tokrn_ids():
+
+def test_completion_text_after_process_raw_predictio_return_token_ids():
     """
     /v1/completions接口,非流式接口
     返回属性"text_after_process"和"reasoning_content"
     """
-    data = {
-        "stream": False,
-        "prompt": "你是谁",
-        "max_tokens": 50,
-        "return_token_ids": True
-    }
+    data = {"stream": False, "prompt": "你是谁", "max_tokens": 50, "return_token_ids": True}
     payload = build_request_payload(TEMPLATE, data)
     resp = send_request(COMPLETIONS_URL, payload).json()
 
@@ -80,14 +70,10 @@ def test_completion_text_after_process_raw_predictio_return_tokrn_ids():
 
 def test_completion_text_after_process_raw_prediction():
     """
-    /v1/completions接口,无return_tokrn_ids参数
+    /v1/completions接口,无return_token_ids参数
     非流式接口中,无return token ids 属性"text_after_process"和"reasoning_content"值为null
     """
-    data = {
-        "stream": False,
-        "prompt": "你是谁",
-        "max_tokens": 50
-    }
+    data = {"stream": False, "prompt": "你是谁", "max_tokens": 50}
     payload = build_request_payload(TEMPLATE, data)
     resp = send_request(COMPLETIONS_URL, payload).json()
 
@@ -108,17 +94,17 @@ def test_stream_text_after_process_raw_prediction():
         "stream": True,
         "stream_options": {"include_usage": True, "continuous_usage_stats": True},
         "max_tokens": 50,
-        "return_token_ids": True
+        "return_token_ids": True,
     }
 
     payload = build_request_payload(TEMPLATE, data)
     resp = send_request(URL, payload, stream=True)
     for line in resp.iter_lines(decode_unicode=True):
-        if line.strip() == "data: [DONE]"  :
+        if line.strip() == "data: [DONE]":
             break
         if line.strip() == "" or not line.startswith("data: "):
             continue
-        line = line[len("data: "):]
+        line = line[len("data: ") :]
         response_data = json.loads(line)
 
         choice = response_data["choices"][0]
@@ -130,11 +116,11 @@ def test_stream_text_after_process_raw_prediction():
             reasoning_content = choice["delta"]["reasoning_content"]
             content = choice["delta"]["content"]
             assert reasoning_content or content in raw_prediction, "raw_prediction取值结果不正确"
-        if "finish_reason" in line.strip() :
+        if "finish_reason" in line.strip():
             break
 
-    
-def test_text_after_process_raw_prediction_return_tokrn_ids():
+
+def test_text_after_process_raw_prediction_return_token_ids():
     """
     /v1/chat/completions接口,非流式接口
     返回属性"text_after_process"和"reasoning_content"
@@ -161,7 +147,7 @@ def test_text_after_process_raw_prediction_return_tokrn_ids():
 
 def test_text_after_process_raw_prediction():
     """
-    /v1/chat/completions接口,无return_tokrn_ids参数
+    /v1/chat/completions接口,无return_token_ids参数
     无return token ids 属性"text_after_process"和"reasoning_content"值为null
     """
     data = {
@@ -179,5 +165,3 @@ def test_text_after_process_raw_prediction():
 
     raw_prediction = resp["choices"][0]["message"]["raw_prediction"]
     assert raw_prediction is None, "raw_prediction取值结果不正确"
-
-
diff --git a/tests/ci_use/EB_Lite_with_adapter/zmq_client.py b/tests/ci_use/EB_Lite_with_adapter/zmq_client.py
@@ -50,7 +50,7 @@ def consume_results(self, result_queue):
                 if self.need_exit:
                     break
             except Exception as e:
-                print(f"zmq client occured error {e} type: {type(e)} frames: {frames}")
+                print(f"zmq client occurred error {e} type: {type(e)} frames: {frames}")
 
     def start(self, result_queue):
         threading.Thread(target=self.consume_results, args=(result_queue,), daemon=True).start()
@@ -118,4 +118,4 @@ def recv_results(self):
                 self.result[task_id] = result["result"]
                 self.task_event[task_id].set()
             except Exception as e:
-                print(f"zmq client occured error {e} type: {type(e)} frames: {frames}")
+                print(f"zmq client occurred error {e} type: {type(e)} frames: {frames}")
diff --git a/tests/distributed/custom_all_reduce.py b/tests/distributed/custom_all_reduce.py
@@ -54,14 +54,14 @@ def test_case(self):
         fa = CustomAllreduce(model_parallel_group)
 
         for m, n in mns:
-            data_cusom_ar = paddle.rand([m, n], dtype="bfloat16")
-            data_paddle = data_cusom_ar.clone()
-            if fa.should_custom_ar(data_cusom_ar):
-                fa.custom_all_reduce(data_cusom_ar)
+            data_custom_ar = paddle.rand([m, n], dtype="bfloat16")
+            data_paddle = data_custom_ar.clone()
+            if fa.should_custom_ar(data_custom_ar):
+                fa.custom_all_reduce(data_custom_ar)
             dist.all_reduce(data_paddle)
             if dist.get_rank() == 0:
                 np.testing.assert_allclose(
-                    data_cusom_ar.numpy(),
+                    data_custom_ar.numpy(),
                     data_paddle.numpy(),
                     rtol=1e-04,
                     atol=1e-04,

diff --git a/tools/deep_gemm_pre-compile/pre_compile.py b/tools/deep_gemm_pre-compile/pre_compile.py
@@ -158,7 +158,7 @@ def pre_compile_from_config(config_file: str, num_threads: int, expert_parallel:
 
         pbar.close()
 
-        logger.info(f"Total compliation time: {time() - start_time:.2f} seconds")
+        logger.info(f"Total compilation time: {time() - start_time:.2f} seconds")
 
 
 def main(args):