Skip to content
This repository was archived by the owner on Aug 16, 2023. It is now read-only.

Commit 5b60ad7

Browse files
authored
Merge pull request #870 from kpouget/codeflare
testing: codeflare: sdk_user/sample/sample: retry 'cluster.wait_ready' multiple times
2 parents ed7e356 + 6abcef0 commit 5b60ad7

File tree

2 files changed

+15
-4
lines changed

2 files changed

+15
-4
lines changed
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# not used by the buildconfig should not be prefix with the image registry location
2-
FROM ci-artifacts:main
2+
FROM ci-artifacts:main
33

4-
RUN pip install --quiet --no-cache-dir \
4+
RUN pip install --no-cache-dir \
55
codeflare_sdk

testing/codeflare/sdk_user/sample/sample.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,16 @@ def main():
3535

3636
# Bring up the cluster
3737
cluster.up()
38-
cluster.wait_ready()
38+
while True:
39+
try:
40+
cluster.wait_ready()
41+
break
42+
except TypeError as e:
43+
# there's a timeout in the calling script
44+
logging.warning(f"cluster.wait_ready() failed with {e}")
45+
import time
46+
time.sleep(5)
47+
3948
cluster.status()
4049
cluster.details()
4150

@@ -56,7 +65,9 @@ def main():
5665
finished = (str(status.state) == "SUCCEEDED")
5766
failed = (str(status.state) == "FAILED")
5867

59-
print(job.logs())
68+
with open(f"{name}-{job_name}.log", "w") as f:
69+
print(job.logs(), file=f)
70+
6071
print(status)
6172

6273
cluster.down()

0 commit comments

Comments
 (0)