Skip to content

Commit 7e7a311

Browse files
add priorities and schedulingSpec to SDK
1 parent ac02248 commit 7e7a311

File tree

6 files changed

+53
-4
lines changed

6 files changed

+53
-4
lines changed

src/codeflare_sdk/cluster/cluster.py

+2
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ def create_app_wrapper(self):
8484
instascale = self.config.instascale
8585
instance_types = self.config.machine_types
8686
env = self.config.envs
87+
priority = self.config.priority
8788
return generate_appwrapper(
8889
name=name,
8990
namespace=namespace,
@@ -98,6 +99,7 @@ def create_app_wrapper(self):
9899
instascale=instascale,
99100
instance_types=instance_types,
100101
env=env,
102+
priority=priority,
101103
)
102104

103105
# creates a new cluster with the provided or default spec

src/codeflare_sdk/cluster/config.py

+1
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,4 @@ class ClusterConfiguration:
4848
instascale: bool = False
4949
envs: dict = field(default_factory=dict)
5050
image: str = "ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103"
51+
priority: str = "low"

src/codeflare_sdk/utils/generate_yaml.py

+37
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,27 @@ def update_labels(yaml, instascale, instance_types):
7676
metadata.pop("labels")
7777

7878

79+
def update_priority(yaml, item, priority):
80+
if priority not in ["low", "default", "high"]:
81+
sys.exit("Priority must be 'low', 'default', or 'high'")
82+
83+
priority_levels = {
84+
"low": (1, "low-priority"),
85+
"default": (5, "default-priority"),
86+
"high": (10, "high-priority"),
87+
}
88+
89+
priority_level = priority_levels[priority]
90+
spec = yaml.get("spec")
91+
spec["priority"] = priority_level[0]
92+
# spec["SchedulingSpec"]["priorityClassName"] = priority_level
93+
if "generictemplate" in item.keys():
94+
head = item.get("generictemplate").get("spec").get("headGroupSpec")
95+
worker = item.get("generictemplate").get("spec").get("workerGroupSpecs")[0]
96+
head["template"]["spec"]["priorityClassName"] = priority_level[1]
97+
worker["template"]["spec"]["priorityClassName"] = priority_level[1]
98+
99+
79100
def update_custompodresources(
80101
item, min_cpu, max_cpu, min_memory, max_memory, gpu, workers
81102
):
@@ -155,6 +176,11 @@ def update_resources(spec, min_cpu, max_cpu, min_memory, max_memory, gpu):
155176
limits["nvidia.com/gpu"] = gpu
156177

157178

179+
def update_scheduling_spec(yaml, workers):
180+
spec = yaml.get("spec")
181+
spec["schedulingSpec"]["minAvailable"] = workers + 1
182+
183+
158184
def update_nodes(
159185
item,
160186
appwrapper_name,
@@ -211,6 +237,7 @@ def generate_appwrapper(
211237
instascale: bool,
212238
instance_types: list,
213239
env,
240+
priority: str,
214241
):
215242
user_yaml = read_template(template)
216243
appwrapper_name, cluster_name = gen_names(name)
@@ -219,6 +246,8 @@ def generate_appwrapper(
219246
route_item = resources["resources"].get("GenericItems")[1]
220247
update_names(user_yaml, item, appwrapper_name, cluster_name, namespace)
221248
update_labels(user_yaml, instascale, instance_types)
249+
update_priority(user_yaml, item, priority)
250+
update_scheduling_spec(user_yaml, workers)
222251
update_custompodresources(
223252
item, min_cpu, max_cpu, min_memory, max_memory, gpu, workers
224253
)
@@ -315,6 +344,12 @@ def main(): # pragma: no cover
315344
default="default",
316345
help="Set the kubernetes namespace you want to deploy your cluster to. Default. If left blank, uses the 'default' namespace",
317346
)
347+
parser.add_argument(
348+
"--priority",
349+
required=False,
350+
default="low",
351+
help="Set the priority of the cluster. Default is 'low'. Options are 'low', 'default', 'high'",
352+
)
318353

319354
args = parser.parse_args()
320355
name = args.name
@@ -329,6 +364,7 @@ def main(): # pragma: no cover
329364
instascale = args.instascale
330365
instance_types = args.instance_types
331366
namespace = args.namespace
367+
priority = args.priority
332368
env = {}
333369

334370
outfile = generate_appwrapper(
@@ -345,6 +381,7 @@ def main(): # pragma: no cover
345381
instascale,
346382
instance_types,
347383
env,
384+
priority,
348385
)
349386
return outfile
350387

tests/test-case-cmd.yaml

+5-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ metadata:
44
name: unit-cmd-cluster
55
namespace: default
66
spec:
7-
priority: 9
7+
priority: 1
88
resources:
99
GenericItems:
1010
- custompodresources:
@@ -136,6 +136,7 @@ spec:
136136
do echo waiting for myservice; sleep 2; done
137137
image: busybox:1.28
138138
name: init-myservice
139+
priorityClassName: low-priority
139140
replicas: 1
140141
- generictemplate:
141142
apiVersion: route.openshift.io/v1
@@ -151,5 +152,7 @@ spec:
151152
to:
152153
kind: Service
153154
name: unit-cmd-cluster-head-svc
154-
replica: 1
155+
replicas: 1
155156
Items: []
157+
schedulingSpec:
158+
minAvailable: 3

tests/test-case.yaml

+5-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ metadata:
66
name: unit-test-cluster
77
namespace: ns
88
spec:
9-
priority: 9
9+
priority: 1
1010
resources:
1111
GenericItems:
1212
- custompodresources:
@@ -156,6 +156,7 @@ spec:
156156
do echo waiting for myservice; sleep 2; done
157157
image: busybox:1.28
158158
name: init-myservice
159+
priorityClassName: low-priority
159160
replicas: 1
160161
- generictemplate:
161162
apiVersion: route.openshift.io/v1
@@ -171,5 +172,7 @@ spec:
171172
to:
172173
kind: Service
173174
name: unit-test-cluster-head-svc
174-
replica: 1
175+
replicas: 1
175176
Items: []
177+
schedulingSpec:
178+
minAvailable: 3

tests/unit_test.py

+3
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ def test_config_creation():
213213
gpu=7,
214214
instascale=True,
215215
machine_types=["cpu.small", "gpu.large"],
216+
priority="low",
216217
)
217218

218219
assert config.name == "unit-test-cluster" and config.namespace == "ns"
@@ -227,11 +228,13 @@ def test_config_creation():
227228
assert config.template == f"{parent}/src/codeflare_sdk/templates/base-template.yaml"
228229
assert config.instascale
229230
assert config.machine_types == ["cpu.small", "gpu.large"]
231+
assert config.priority == "low"
230232
return config
231233

232234

233235
def test_cluster_creation():
234236
cluster = Cluster(test_config_creation())
237+
print(cluster.app_wrapper_yaml)
235238
assert cluster.app_wrapper_yaml == "unit-test-cluster.yaml"
236239
assert cluster.app_wrapper_name == "unit-test-cluster"
237240
assert filecmp.cmp(

0 commit comments

Comments
 (0)