3232import subprocess
3333import tempfile
3434import logging
35+ import time
3536import urllib .parse
3637import urllib .request
3738import shutil
@@ -723,6 +724,7 @@ def _run_mineru_command(
723724 device : Optional [str ] = None ,
724725 source : Optional [str ] = None ,
725726 vlm_url : Optional [str ] = None ,
727+ timeout : Optional [int ] = None ,
726728 ** kwargs ,
727729 ) -> None :
728730 """
@@ -741,6 +743,8 @@ def _run_mineru_command(
741743 device: Inference device
742744 source: Model source
743745 vlm_url: When the backend is `vlm-http-client`, you need to specify the server_url
746+ timeout: Maximum seconds to wait for MinerU to complete. None means no limit.
747+ Raises TimeoutError if the process does not finish within this duration.
744748 **kwargs: Additional parameters for subprocess (e.g., env)
745749 """
746750 cmd = [
@@ -854,6 +858,8 @@ def enqueue_output(pipe, queue, prefix):
854858 stderr_thread .start ()
855859
856860 # Process output in real time
861+ start_time = time .monotonic ()
862+
857863 while process .poll () is None :
858864 # Check stdout queue
859865 try :
@@ -881,9 +887,20 @@ def enqueue_output(pipe, queue, prefix):
881887 except Empty :
882888 pass
883889
884- # Small delay to prevent busy waiting
885- import time
890+ # Enforce timeout — kill the process and raise if exceeded
891+ if timeout is not None and (time .monotonic () - start_time ) > timeout :
892+ process .kill ()
893+ process .wait ()
894+ # Give reader threads a moment to drain before raising
895+ stdout_thread .join (timeout = 1 )
896+ stderr_thread .join (timeout = 1 )
897+ raise TimeoutError (
898+ f"MinerU did not finish within { timeout } s. "
899+ "This often means a model download is stuck due to network issues. "
900+ "Check your internet connection or pre-download the required models."
901+ )
886902
903+ # Small delay to prevent busy waiting
887904 time .sleep (0.1 )
888905
889906 # Process any remaining output after process completion
0 commit comments