@@ -301,6 +301,49 @@ def torchx_config(
301
301
to_return ["requirements" ] = requirements
302
302
return to_return
303
303
304
+ def from_k8_cluster_object (rc ):
305
+ machine_types = (
306
+ rc ["metadata" ]["labels" ]["orderedinstance" ].split ("_" )
307
+ if "orderedinstance" in rc ["metadata" ]["labels" ]
308
+ else []
309
+ )
310
+ local_interactive = (
311
+ "volumeMounts"
312
+ in rc ["spec" ]["workerGroupSpecs" ][0 ]["template" ]["spec" ]["containers" ][0 ]
313
+ )
314
+ cluster_config = ClusterConfiguration (
315
+ name = rc ["metadata" ]["name" ],
316
+ namespace = rc ["metadata" ]["namespace" ],
317
+ machine_types = machine_types ,
318
+ min_worker = rc ["spec" ]["workerGroupSpecs" ][0 ]["minReplicas" ],
319
+ max_worker = rc ["spec" ]["workerGroupSpecs" ][0 ]["maxReplicas" ],
320
+ min_cpus = rc ["spec" ]["workerGroupSpecs" ][0 ]["template" ]["spec" ][
321
+ "containers"
322
+ ][0 ]["resources" ]["requests" ]["cpu" ],
323
+ max_cpus = rc ["spec" ]["workerGroupSpecs" ][0 ]["template" ]["spec" ][
324
+ "containers"
325
+ ][0 ]["resources" ]["limits" ]["cpu" ],
326
+ min_memory = int (
327
+ rc ["spec" ]["workerGroupSpecs" ][0 ]["template" ]["spec" ]["containers" ][0 ][
328
+ "resources"
329
+ ]["requests" ]["memory" ][:- 1 ]
330
+ ),
331
+ max_memory = int (
332
+ rc ["spec" ]["workerGroupSpecs" ][0 ]["template" ]["spec" ]["containers" ][0 ][
333
+ "resources"
334
+ ]["limits" ]["memory" ][:- 1 ]
335
+ ),
336
+ gpu = rc ["spec" ]["workerGroupSpecs" ][0 ]["template" ]["spec" ]["containers" ][0 ][
337
+ "resources"
338
+ ]["limits" ]["nvidia.com/gpu" ],
339
+ instascale = True if machine_types else False ,
340
+ image = rc ["spec" ]["workerGroupSpecs" ][0 ]["template" ]["spec" ]["containers" ][
341
+ 0
342
+ ]["image" ],
343
+ local_interactive = local_interactive ,
344
+ )
345
+ return Cluster (cluster_config )
346
+
304
347
305
348
def list_all_clusters (namespace : str , print_to_console : bool = True ):
306
349
"""
@@ -337,6 +380,27 @@ def get_current_namespace(): # pragma: no cover
337
380
return "default"
338
381
339
382
383
+ def get_cluster (cluster_name : str , namespace : str = "default" ):
384
+ try :
385
+ config .load_kube_config ()
386
+ api_instance = client .CustomObjectsApi ()
387
+ rcs = api_instance .list_namespaced_custom_object (
388
+ group = "ray.io" ,
389
+ version = "v1alpha1" ,
390
+ namespace = namespace ,
391
+ plural = "rayclusters" ,
392
+ )
393
+ except Exception as e :
394
+ return _kube_api_error_handling (e )
395
+
396
+ for rc in rcs ["items" ]:
397
+ if rc ["metadata" ]["name" ] == cluster_name :
398
+ return Cluster .from_k8_cluster_object (rc )
399
+ raise FileNotFoundError (
400
+ f"Cluster { cluster_name } is not found in { namespace } namespace"
401
+ )
402
+
403
+
340
404
# private methods
341
405
342
406
0 commit comments