Skip to content

Commit 5959335

Browse files
authored
Merge pull request #254 from peterCheng123321/fix/172-mineru-subprocess-timeout
fix: add timeout parameter to MinerU subprocess to prevent indefinite hang
2 parents c56a3cc + b67c764 commit 5959335

1 file changed

Lines changed: 19 additions & 2 deletions

File tree

raganything/parser.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import subprocess
3333
import tempfile
3434
import logging
35+
import time
3536
import urllib.parse
3637
import urllib.request
3738
import shutil
@@ -723,6 +724,7 @@ def _run_mineru_command(
723724
device: Optional[str] = None,
724725
source: Optional[str] = None,
725726
vlm_url: Optional[str] = None,
727+
timeout: Optional[int] = None,
726728
**kwargs,
727729
) -> None:
728730
"""
@@ -741,6 +743,8 @@ def _run_mineru_command(
741743
device: Inference device
742744
source: Model source
743745
vlm_url: When the backend is `vlm-http-client`, you need to specify the server_url
746+
timeout: Maximum seconds to wait for MinerU to complete. None means no limit.
747+
Raises TimeoutError if the process does not finish within this duration.
744748
**kwargs: Additional parameters for subprocess (e.g., env)
745749
"""
746750
cmd = [
@@ -854,6 +858,8 @@ def enqueue_output(pipe, queue, prefix):
854858
stderr_thread.start()
855859

856860
# Process output in real time
861+
start_time = time.monotonic()
862+
857863
while process.poll() is None:
858864
# Check stdout queue
859865
try:
@@ -881,9 +887,20 @@ def enqueue_output(pipe, queue, prefix):
881887
except Empty:
882888
pass
883889

884-
# Small delay to prevent busy waiting
885-
import time
890+
# Enforce timeout — kill the process and raise if exceeded
891+
if timeout is not None and (time.monotonic() - start_time) > timeout:
892+
process.kill()
893+
process.wait()
894+
# Give reader threads a moment to drain before raising
895+
stdout_thread.join(timeout=1)
896+
stderr_thread.join(timeout=1)
897+
raise TimeoutError(
898+
f"MinerU did not finish within {timeout}s. "
899+
"This often means a model download is stuck due to network issues. "
900+
"Check your internet connection or pre-download the required models."
901+
)
886902

903+
# Small delay to prevent busy waiting
887904
time.sleep(0.1)
888905

889906
# Process any remaining output after process completion

0 commit comments

Comments
 (0)