11import json
22import logging
3+ import subprocess
34from abc import abstractmethod , ABC
45from datetime import timedelta
5- from subprocess import Popen , PIPE
6+ from subprocess import Popen
67from typing import Optional
78
8- import luigi
99from luigi .contrib .external_program import ExternalProgramRunError , ExternalProgramRunContext
1010
1111from .config import bioluigi
1616
1717_schedulers = {}
1818
19- class ScheduledTask (luigi . Task , ABC ):
19+ class ScheduledTask (ABC ):
2020 """Interface for Luigi tasks that can be scheduled."""
2121 walltime : timedelta
2222
@@ -28,6 +28,8 @@ class ScheduledTask(luigi.Task, ABC):
2828
2929 capture_output : bool
3030
31+ working_directory : Optional [str ]
32+
3133 @abstractmethod
3234 def program_environment (self ) -> dict [str , str ]:
3335 pass
@@ -40,7 +42,10 @@ def get_available_schedulers():
4042 return _schedulers .keys ()
4143
4244def get_scheduler (blurb ):
43- return _schedulers [blurb ]
45+ try :
46+ return _schedulers [blurb ]
47+ except KeyError :
48+ raise ValueError ('Unsupported scheduler {}' .format (self .scheduler ))
4449
4550def register_scheduler (blurb ):
4651 """
@@ -56,9 +61,34 @@ def wrapper(cls):
5661
5762 return wrapper
5863
64+ def _run_command (args , env , cwd , capture_output ):
65+ kwargs = {'env' : env , 'universal_newlines' : True }
66+ if capture_output :
67+ kwargs ['stdout' ] = subprocess .PIPE
68+ kwargs ['stderr' ] = subprocess .PIPE
69+ if cwd :
70+ kwargs ['cwd' ] = cwd
71+ logger .info ('Running command %s%s' , ' ' .join (args ), 'in ' + cwd if cwd else '' )
72+ proc = Popen (args , ** kwargs )
73+ with ExternalProgramRunContext (proc ):
74+ if capture_output :
75+ stderr , stdout = proc .communicate ()
76+ if stdout :
77+ logger .info ('Program stdout:\n {}' .format (stdout ))
78+ if stderr :
79+ logger .info ('Program stderr:\n {}' .format (stderr ))
80+ else :
81+ proc .wait ()
82+ if proc .returncode != 0 :
83+ raise ExternalProgramRunError ('Program exited with non-zero return code.' , tuple (args ), env , stdout , stderr )
84+
5985class Scheduler (ABC ):
6086 @abstractmethod
61- def run_task (self , task ):
87+ def get_resources_for_task (self , task : ScheduledTask ) -> dict [str , int ]:
88+ pass
89+
90+ @abstractmethod
91+ def run_task (self , task : ScheduledTask ):
6292 pass
6393
6494@register_scheduler ('slurm' )
@@ -67,10 +97,13 @@ class SlurmScheduler(Scheduler):
6797 Scheduler based on Slurm https://slurm.schedmd.com/
6898 """
6999
100+ def get_resources_for_task (self , task : ScheduledTask ):
101+ return {'slurm_jobs' : 1 , 'slurm_cpus' : task .cpus }
102+
70103 def run_task (self , task : ScheduledTask ):
71104 secs = int (task .walltime .total_seconds ())
72- srun_args = [cfg .slurm_srun_bin ]
73- srun_args .extend ([
105+ args = [cfg .slurm_srun_bin ]
106+ args .extend ([
74107 '--verbose' ,
75108 '--job-name' , task .get_task_family (),
76109 '--comment' , json .dumps ({'task_id' : task .task_id , 'priority' : task .priority }),
@@ -79,19 +112,47 @@ def run_task(self, task: ScheduledTask):
79112 '--mem' , '{}G' .format (int (task .memory )),
80113 '--cpus-per-task' , str (task .cpus )])
81114 if task .scheduler_partition :
82- srun_args .extend (['--partition' , task .scheduler_partition ])
115+ args .extend (['--partition' , task .scheduler_partition ])
116+ if task .working_directory :
117+ args .extend (['--chdir' , task .working_directory ])
83118 # FIXME: task.priority is not reliable and does not reflect what the
84119 # scheduler
85120 # TODO: srun_args.extend([--priority', str(max(0, cfg.scheduler_priority))])
86- srun_args .extend (map (str , task .scheduler_extra_args ))
87- args = list (map (str , task .program_args ()))
88- env = task .program_environment ()
89- logger .info ('Running Slurm command {}' .format (' ' .join (srun_args + args )))
90- proc = Popen (srun_args + args , env = env , stdout = PIPE , stderr = PIPE , universal_newlines = True )
91- with ExternalProgramRunContext (proc ):
92- stdout , stderr = proc .communicate ()
93- if proc .returncode != 0 :
94- raise ExternalProgramRunError ('Program exited with non-zero return code.' , tuple (args ), env , stdout , stderr )
95- if task .capture_output :
96- logger .info ('Program stdout:\n {}' .format (stdout ))
97- logger .info ('Program stderr:\n {}' .format (stderr ))
121+ args .extend (map (str , task .scheduler_extra_args ))
122+ args .extend (map (str , task .program_args ()))
123+ _run_command (args , env = (task .program_environment ()), cwd = task .working_directory ,
124+ capture_output = task .capture_output )
125+
126+ @register_scheduler ('local' )
127+ class LocalScheduler (Scheduler ):
128+ """A scheduler that uses a local subprocess"""
129+
130+ def get_resources_for_task (self , task : ScheduledTask ):
131+ return {'cpus' : task .cpus , 'memory' : task .memory }
132+
133+ def run_task (self , task : ScheduledTask ):
134+ _run_command (list (map (str , task .program_args ())), env = task .program_environment (), cwd = task .working_directory ,
135+ capture_output = task .capture_output )
136+
137+ @register_scheduler ('ssh' )
138+ class SshScheduler (Scheduler ):
139+ """A scheduler that uses SSH to run a task remotely"""
140+
141+ def get_resources_for_task (self , task : ScheduledTask ):
142+ return {'ssh_cpus' : task .cpus , 'ssh_memory' : task .memory }
143+
144+ def run_task (self , task : ScheduledTask ):
145+ if not cfg .ssh_remote :
146+ raise ValueError ('No SSH remote is configured.' )
147+ args = [cfg .ssh_bin ]
148+ if cfg .ssh_port :
149+ args .extend (['-p' , str (cfg .ssh_port )])
150+ if cfg .ssh_user :
151+ args .extend (['-u' , cfg .ssh_user ])
152+ if cfg .ssh_identity_file :
153+ args .extend (['-i' , cfg .ssh_identity_file ])
154+ args .extend (task .scheduler_extra_args )
155+ args .append (cfg .ssh_remote )
156+ args .extend (map (str , task .program_args ()))
157+ _run_command (args , env = task .program_environment (), cwd = task .working_directory ,
158+ capture_output = task .capture_output )
0 commit comments