Skip to content

Commit 4123677

Browse files
committed
Add RayCLuster Oauth Authentication test
1 parent a36f34a commit 4123677

File tree

2 files changed

+521
-0
lines changed

2 files changed

+521
-0
lines changed
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
import sys
2+
import os
3+
import requests
4+
5+
from time import sleep
6+
7+
from torchx.specs.api import AppState, is_terminal
8+
9+
from codeflare_sdk.cluster.cluster import Cluster, ClusterConfiguration
10+
from codeflare_sdk.job.jobs import DDPJobDefinition
11+
from codeflare_sdk.cluster.auth import TokenAuthentication
12+
13+
14+
def submit_ddp_job(cluster, cluster_token, server_url):
15+
auth = TokenAuthentication(token=cluster_token, server=server_url, skip_tls=True)
16+
auth.login()
17+
18+
jobdef = DDPJobDefinition(
19+
name="mnist",
20+
script="mnist.py",
21+
scheduler_args={"requirements": "requirements.txt"},
22+
)
23+
job = jobdef.submit(cluster)
24+
25+
done = False
26+
time = 0
27+
timeout = 900
28+
while not done:
29+
status = job.status()
30+
if is_terminal(status.state):
31+
break
32+
if not done:
33+
print(status)
34+
if timeout and time >= timeout:
35+
raise TimeoutError(f"job has timed out after waiting {timeout}s")
36+
sleep(5)
37+
time += 5
38+
39+
print(f"Job has completed: {status.state}")
40+
print(job.logs())
41+
cluster.down()
42+
43+
if not status.state == AppState.SUCCEEDED:
44+
exit(1)
45+
else:
46+
exit(0)
47+
48+
49+
def submit_job_post_request(cluster, dashboard_url):
50+
jobdata = {
51+
"entrypoint": "python mnist.py",
52+
"runtime_env": {"working_dir": "workdir", "pip": "requirements.txt"},
53+
}
54+
try:
55+
response = requests.post(
56+
dashboard_url + "/api/jobs/", verify=False, json=jobdata
57+
)
58+
cluster.down()
59+
if response.status_code == 403:
60+
exit(0)
61+
else:
62+
response.raise_for_status()
63+
exit(1)
64+
65+
except Exception as e:
66+
print(f"An unexpected error occurred: {e}")
67+
exit(1)
68+
69+
70+
namespace = sys.argv[1]
71+
ray_image = os.getenv("RAY_IMAGE")
72+
isAuthLoginEnable = os.getenv("OAUTH_LOGIN")
73+
cluster_token = os.getenv("CLUSTER_TOKEN")
74+
server_url = os.getenv("SERVER_URL")
75+
76+
cluster = Cluster(
77+
ClusterConfiguration(
78+
name="mnist",
79+
namespace=namespace,
80+
num_workers=1,
81+
head_cpus="500m",
82+
head_memory=2,
83+
min_cpus="500m",
84+
max_cpus=1,
85+
min_memory=1,
86+
max_memory=2,
87+
num_gpus=0,
88+
instascale=False,
89+
image=ray_image,
90+
openshift_oauth=True,
91+
)
92+
)
93+
94+
cluster.up()
95+
96+
cluster.status()
97+
98+
cluster.wait_ready()
99+
100+
cluster.status()
101+
102+
cluster.details()
103+
104+
if isAuthLoginEnable == "true":
105+
submit_ddp_job(cluster, cluster_token, server_url)
106+
else:
107+
dashboard_url = cluster.cluster_dashboard_uri()
108+
submit_job_post_request(cluster, dashboard_url)

0 commit comments

Comments
 (0)