@@ -117,15 +117,19 @@ def pull_image_with_progress(image: str) -> None:
117117
118118
119119class EnvConf (ExtendedBaseSettings ):
120- # TODO: add prefix ....
121120 default_entry : str
122121 extra_volumes : dict = {}
123- running_timeout_period : int = 3600 # 10 minutes
122+ running_timeout_period : int | None = 3600 # 10 minutes
124123 # helper settings to support transparent;
125124 enable_cache : bool = True
126125 retry_count : int = 5 # retry count for the docker run
127126 retry_wait_seconds : int = 10 # retry wait seconds for the docker run
128127
128+ model_config = SettingsConfigDict (
129+ # TODO: add prefix ....
130+ env_parse_none_str = "None" , # Nthis is the key to accept `RUNNING_TIMEOUT_PERIOD=None`
131+ )
132+
129133
130134ASpecificEnvConf = TypeVar ("ASpecificEnvConf" , bound = EnvConf )
131135
@@ -225,7 +229,7 @@ def __run_with_retry(
225229 )
226230 end = time .time ()
227231 logger .info (f"Running time: { end - start } seconds" )
228- if end - start + 1 >= self .conf .running_timeout_period :
232+ if self . conf . running_timeout_period is not None and end - start + 1 >= self .conf .running_timeout_period :
229233 logger .warning (
230234 f"The running time exceeds { self .conf .running_timeout_period } seconds, so the process is killed."
231235 )
@@ -299,9 +303,13 @@ def _get_path_stem(path: str) -> str | None:
299303 chmod_cmd += ")"
300304 return chmod_cmd
301305
306+ if self .conf .running_timeout_period is None :
307+ timeout_cmd = entry
308+ else :
309+ timeout_cmd = f"timeout --kill-after=10 { self .conf .running_timeout_period } { entry } "
302310 entry_add_timeout = (
303- f"/bin/sh -c 'timeout --kill-after=10 { self . conf . running_timeout_period } { entry } ; "
304- + " entry_exit_code=$?; "
311+ f"/bin/sh -c '" # start of the sh command
312+ + f" { timeout_cmd } ; entry_exit_code=$?; "
305313 + (
306314 f"{ _get_chmod_cmd (self .conf .mount_path )} ; "
307315 # We don't have to change the permission of the cache and input folder to remove it
@@ -310,7 +318,8 @@ def _get_path_stem(path: str) -> str | None:
310318 if isinstance (self .conf , DockerConf )
311319 else ""
312320 )
313- + "exit $entry_exit_code'"
321+ + "exit $entry_exit_code"
322+ + "'" # end of the sh command
314323 )
315324
316325 if self .conf .enable_cache :
@@ -635,7 +644,7 @@ class DockerConf(EnvConf):
635644 mem_limit : str | None = "48g" # Add memory limit attribute
636645 cpu_count : int | None = None # Add CPU limit attribute
637646
638- running_timeout_period : int = 3600 # 1 hour
647+ running_timeout_period : int | None = 3600 # 1 hour
639648
640649 enable_cache : bool = True # enable the cache mechanism
641650
@@ -678,7 +687,10 @@ def prepare(self) -> None:
678687
679688
680689class QlibDockerConf (DockerConf ):
681- model_config = SettingsConfigDict (env_prefix = "QLIB_DOCKER_" )
690+ model_config = SettingsConfigDict (
691+ env_prefix = "QLIB_DOCKER_" ,
692+ env_parse_none_str = "None" , # Nthis is the key to accept `RUNNING_TIMEOUT_PERIOD=None`
693+ )
682694
683695 build_from_dockerfile : bool = True
684696 dockerfile_folder_path : Path = Path (__file__ ).parent .parent / "scenarios" / "qlib" / "docker"
@@ -707,7 +719,7 @@ class KGDockerConf(DockerConf):
707719 # Path("git_ignore_folder/data").resolve(): "/root/.data/"
708720 # }
709721
710- running_timeout_period : int = 600
722+ running_timeout_period : int | None = 600
711723 mem_limit : str | None = (
712724 "48g" # Add memory limit attribute # new-york-city-taxi-fare-prediction may need more memory
713725 )
@@ -722,7 +734,7 @@ class DSDockerConf(DockerConf):
722734 mount_path : str = "/kaggle/workspace"
723735 default_entry : str = "python main.py"
724736
725- running_timeout_period : int = 600
737+ running_timeout_period : int | None = 600
726738 mem_limit : str | None = (
727739 "48g" # Add memory limit attribute # new-york-city-taxi-fare-prediction may need more memory
728740 )
0 commit comments