test: byoidc compatibility #1287
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Guided notebooks tests | |
| on: | |
| pull_request: | |
| branches: [ main ] | |
| types: [ labeled, synchronize ] | |
| concurrency: | |
| group: ${{ github.head_ref }}-${{ github.workflow }} | |
| cancel-in-progress: true | |
| env: | |
| KUEUE_VERSION: v0.13.4 | |
| KUBERAY_VERSION: v1.4.2 | |
| jobs: | |
| verify-0_basic_ray: | |
| if: ${{ contains(github.event.pull_request.labels.*.name, 'test-guided-notebooks') }} | |
| runs-on: ubuntu-latest-4core | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Checkout common repo code | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: 'project-codeflare/codeflare-common' | |
| ref: 'main' | |
| path: 'common' | |
| - name: Set up specific Python version | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.12' | |
| cache: 'pip' # caching pip dependencies | |
| - name: Setup and start KinD cluster | |
| uses: ./common/github-actions/kind | |
| - name: Deploy Kueue and KubeRay | |
| id: deploy | |
| run: | | |
| # Install Kueue | |
| echo "Installing Kueue ${KUEUE_VERSION}..." | |
| kubectl apply --server-side -f https://github.com/kubernetes-sigs/kueue/releases/download/${KUEUE_VERSION}/manifests.yaml | |
| kubectl wait --timeout=120s --for=condition=Available=true deployment -n kueue-system kueue-controller-manager | |
| # Install KubeRay from opendatahub-io fork (has RHOAI features) | |
| echo "Installing KubeRay ${KUBERAY_VERSION} from opendatahub-io..." | |
| kubectl create -k "github.com/opendatahub-io/kuberay/ray-operator/config/default?ref=${KUBERAY_VERSION}" | |
| kubectl wait --timeout=120s --for=condition=Available=true deployment kuberay-operator | |
| # Create default Kueue resources for the tests | |
| echo "Creating Kueue resources..." | |
| kubectl apply -f - <<EOF | |
| apiVersion: kueue.x-k8s.io/v1beta1 | |
| kind: ResourceFlavor | |
| metadata: | |
| name: default-flavor | |
| --- | |
| apiVersion: kueue.x-k8s.io/v1beta1 | |
| kind: ClusterQueue | |
| metadata: | |
| name: cluster-queue | |
| spec: | |
| namespaceSelector: {} | |
| resourceGroups: | |
| - coveredResources: ["cpu", "memory", "nvidia.com/gpu"] | |
| flavors: | |
| - name: default-flavor | |
| resources: | |
| - name: cpu | |
| nominalQuota: 100 | |
| - name: memory | |
| nominalQuota: 100Gi | |
| - name: nvidia.com/gpu | |
| nominalQuota: 10 | |
| --- | |
| apiVersion: kueue.x-k8s.io/v1beta1 | |
| kind: LocalQueue | |
| metadata: | |
| name: local-queue | |
| namespace: default | |
| annotations: | |
| kueue.x-k8s.io/default-queue: "true" | |
| spec: | |
| clusterQueue: cluster-queue | |
| EOF | |
| - name: Setup Guided notebooks execution | |
| run: | | |
| echo "Installing papermill and dependencies..." | |
| pip install poetry papermill ipython ipykernel | |
| # Disable virtualenv due to problems using packaged in virtualenv in papermill | |
| poetry config virtualenvs.create false | |
| echo "Installing SDK..." | |
| poetry install --with test,docs | |
| - name: Run 0_basic_ray.ipynb | |
| run: | | |
| set -euo pipefail | |
| # Remove login/logout cells, as KinD doesn't support authentication using token | |
| jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' 0_basic_ray.ipynb > 0_basic_ray.ipynb.tmp && mv 0_basic_ray.ipynb.tmp 0_basic_ray.ipynb | |
| jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' 0_basic_ray.ipynb > 0_basic_ray.ipynb.tmp && mv 0_basic_ray.ipynb.tmp 0_basic_ray.ipynb | |
| # Set explicit namespace as SDK need it (currently) to resolve local queues | |
| sed -i "s/head_memory_limits=8,/head_memory_limits=8, namespace='default',/" 0_basic_ray.ipynb | |
| # Disable dashboard check as KinD doesn't have HTTPRoute/Route configured | |
| sed -i "s/cluster.wait_ready()/cluster.wait_ready(dashboard_check=False)/" 0_basic_ray.ipynb | |
| # Run notebook | |
| poetry run papermill 0_basic_ray.ipynb 0_basic_ray_out.ipynb --log-output --execution-timeout 600 | |
| working-directory: demo-notebooks/guided-demos | |
| - name: Print Kueue operator logs | |
| if: always() && steps.deploy.outcome == 'success' | |
| run: | | |
| echo "Printing Kueue operator logs" | |
| KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}') | |
| kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log | |
| - name: Print KubeRay operator logs | |
| if: always() && steps.deploy.outcome == 'success' | |
| run: | | |
| echo "Printing KubeRay operator logs" | |
| kubectl logs --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR}/kuberay.log | |
| - name: Export all KinD pod logs | |
| uses: ./common/github-actions/kind-export-logs | |
| if: always() && steps.deploy.outcome == 'success' | |
| with: | |
| output-directory: ${TEMP_DIR} | |
| - name: Upload logs | |
| uses: actions/upload-artifact@v4 | |
| if: always() && steps.deploy.outcome == 'success' | |
| with: | |
| name: logs-0_basic_ray | |
| retention-days: 10 | |
| path: | | |
| ${{ env.TEMP_DIR }}/**/*.log | |
| verify-4_rayjob_existing_cluster: | |
| if: ${{ contains(github.event.pull_request.labels.*.name, 'test-guided-notebooks') }} | |
| runs-on: ubuntu-latest-4core | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Checkout common repo code | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: 'project-codeflare/codeflare-common' | |
| ref: 'main' | |
| path: 'common' | |
| - name: Set up specific Python version | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.12' | |
| cache: 'pip' # caching pip dependencies | |
| - name: Setup and start KinD cluster | |
| uses: ./common/github-actions/kind | |
| - name: Deploy Kueue and KubeRay | |
| id: deploy | |
| run: | | |
| # Install Kueue | |
| echo "Installing Kueue ${KUEUE_VERSION}..." | |
| kubectl apply --server-side -f https://github.com/kubernetes-sigs/kueue/releases/download/${KUEUE_VERSION}/manifests.yaml | |
| kubectl wait --timeout=120s --for=condition=Available=true deployment -n kueue-system kueue-controller-manager | |
| # Install KubeRay from opendatahub-io fork (has RHOAI features) | |
| echo "Installing KubeRay ${KUBERAY_VERSION} from opendatahub-io..." | |
| kubectl create -k "github.com/opendatahub-io/kuberay/ray-operator/config/default?ref=${KUBERAY_VERSION}" | |
| kubectl wait --timeout=120s --for=condition=Available=true deployment kuberay-operator | |
| # Create default Kueue resources for the tests | |
| echo "Creating Kueue resources..." | |
| kubectl apply -f - <<EOF | |
| apiVersion: kueue.x-k8s.io/v1beta1 | |
| kind: ResourceFlavor | |
| metadata: | |
| name: default-flavor | |
| --- | |
| apiVersion: kueue.x-k8s.io/v1beta1 | |
| kind: ClusterQueue | |
| metadata: | |
| name: cluster-queue | |
| spec: | |
| namespaceSelector: {} | |
| resourceGroups: | |
| - coveredResources: ["cpu", "memory", "nvidia.com/gpu"] | |
| flavors: | |
| - name: default-flavor | |
| resources: | |
| - name: cpu | |
| nominalQuota: 100 | |
| - name: memory | |
| nominalQuota: 100Gi | |
| - name: nvidia.com/gpu | |
| nominalQuota: 10 | |
| --- | |
| apiVersion: kueue.x-k8s.io/v1beta1 | |
| kind: LocalQueue | |
| metadata: | |
| name: local-queue | |
| namespace: default | |
| annotations: | |
| kueue.x-k8s.io/default-queue: "true" | |
| spec: | |
| clusterQueue: cluster-queue | |
| EOF | |
| - name: Setup Guided notebooks execution | |
| run: | | |
| echo "Installing papermill and dependencies..." | |
| pip install poetry papermill ipython ipykernel | |
| # Disable virtualenv due to problems using packaged in virtualenv in papermill | |
| poetry config virtualenvs.create false | |
| echo "Installing SDK..." | |
| poetry install --with test,docs | |
| - name: Run 4_rayjob_existing_cluster.ipynb | |
| run: | | |
| set -euo pipefail | |
| # Remove oc login cell, as KinD doesn't support oc login | |
| jq -r 'del(.cells[] | select(.source[] | contains("oc login")))' 4_rayjob_existing_cluster.ipynb > 4_rayjob_existing_cluster.ipynb.tmp && mv 4_rayjob_existing_cluster.ipynb.tmp 4_rayjob_existing_cluster.ipynb | |
| # Remove GPU requests (KinD doesn't have GPUs) | |
| sed -i "s/head_extended_resource_requests={'nvidia.com\/gpu':1},/head_extended_resource_requests={'nvidia.com\/gpu':0},/" 4_rayjob_existing_cluster.ipynb | |
| sed -i "s/worker_extended_resource_requests={'nvidia.com\/gpu':1},/worker_extended_resource_requests={'nvidia.com\/gpu':0},/" 4_rayjob_existing_cluster.ipynb | |
| # Set explicit namespace for RayJob (notebook stores JSON with escaped quotes) | |
| sed -i 's/namespace=\\"your-namespace\\"/namespace=\\"default\\"/' 4_rayjob_existing_cluster.ipynb | |
| # Add namespace to ClusterConfiguration | |
| sed -i "s/head_memory_limits=8,/head_memory_limits=8, namespace='default',/" 4_rayjob_existing_cluster.ipynb | |
| # Run notebook | |
| poetry run papermill 4_rayjob_existing_cluster.ipynb 4_rayjob_existing_cluster_out.ipynb --log-output --execution-timeout 600 | |
| working-directory: demo-notebooks/guided-demos | |
| - name: Print Kueue operator logs | |
| if: always() && steps.deploy.outcome == 'success' | |
| run: | | |
| echo "Printing Kueue operator logs" | |
| KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}') | |
| kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log | |
| - name: Print KubeRay operator logs | |
| if: always() && steps.deploy.outcome == 'success' | |
| run: | | |
| echo "Printing KubeRay operator logs" | |
| kubectl logs --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR}/kuberay.log | |
| - name: Export all KinD pod logs | |
| uses: ./common/github-actions/kind-export-logs | |
| if: always() && steps.deploy.outcome == 'success' | |
| with: | |
| output-directory: ${TEMP_DIR} | |
| - name: Upload logs | |
| uses: actions/upload-artifact@v4 | |
| if: always() && steps.deploy.outcome == 'success' | |
| with: | |
| name: logs-4_rayjob_existing_cluster | |
| retention-days: 10 | |
| path: | | |
| ${{ env.TEMP_DIR }}/**/*.log | |
| verify-5_submit_rayjob_cr: | |
| if: ${{ contains(github.event.pull_request.labels.*.name, 'test-guided-notebooks') }} | |
| runs-on: ubuntu-latest-4core | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Checkout common repo code | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: 'project-codeflare/codeflare-common' | |
| ref: 'main' | |
| path: 'common' | |
| - name: Set up specific Python version | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.12' | |
| cache: 'pip' # caching pip dependencies | |
| - name: Setup and start KinD cluster | |
| uses: ./common/github-actions/kind | |
| - name: Deploy Kueue and KubeRay | |
| id: deploy | |
| run: | | |
| # Install Kueue | |
| echo "Installing Kueue ${KUEUE_VERSION}..." | |
| kubectl apply --server-side -f https://github.com/kubernetes-sigs/kueue/releases/download/${KUEUE_VERSION}/manifests.yaml | |
| kubectl wait --timeout=120s --for=condition=Available=true deployment -n kueue-system kueue-controller-manager | |
| # Install KubeRay from opendatahub-io fork (has RHOAI features) | |
| echo "Installing KubeRay ${KUBERAY_VERSION} from opendatahub-io..." | |
| kubectl create -k "github.com/opendatahub-io/kuberay/ray-operator/config/default?ref=${KUBERAY_VERSION}" | |
| kubectl wait --timeout=120s --for=condition=Available=true deployment kuberay-operator | |
| # Create default Kueue resources for the tests | |
| echo "Creating Kueue resources..." | |
| kubectl apply -f - <<EOF | |
| apiVersion: kueue.x-k8s.io/v1beta1 | |
| kind: ResourceFlavor | |
| metadata: | |
| name: default-flavor | |
| --- | |
| apiVersion: kueue.x-k8s.io/v1beta1 | |
| kind: ClusterQueue | |
| metadata: | |
| name: cluster-queue | |
| spec: | |
| namespaceSelector: {} | |
| resourceGroups: | |
| - coveredResources: ["cpu", "memory", "nvidia.com/gpu"] | |
| flavors: | |
| - name: default-flavor | |
| resources: | |
| - name: cpu | |
| nominalQuota: 100 | |
| - name: memory | |
| nominalQuota: 100Gi | |
| - name: nvidia.com/gpu | |
| nominalQuota: 10 | |
| --- | |
| apiVersion: kueue.x-k8s.io/v1beta1 | |
| kind: LocalQueue | |
| metadata: | |
| name: local-queue | |
| namespace: default | |
| annotations: | |
| kueue.x-k8s.io/default-queue: "true" | |
| spec: | |
| clusterQueue: cluster-queue | |
| EOF | |
| - name: Setup Guided notebooks execution | |
| run: | | |
| echo "Installing papermill and dependencies..." | |
| pip install poetry papermill ipython ipykernel | |
| # Disable virtualenv due to problems using packaged in virtualenv in papermill | |
| poetry config virtualenvs.create false | |
| echo "Installing SDK..." | |
| poetry install --with test,docs | |
| - name: Run 5_submit_rayjob_cr.ipynb | |
| run: | | |
| set -euo pipefail | |
| # Remove oc login cell, as KinD doesn't support oc login | |
| jq -r 'del(.cells[] | select(.source[] | contains("oc login")))' 5_submit_rayjob_cr.ipynb > 5_submit_rayjob_cr.ipynb.tmp && mv 5_submit_rayjob_cr.ipynb.tmp 5_submit_rayjob_cr.ipynb | |
| # Set explicit namespace (notebook stores JSON with escaped quotes) | |
| sed -i 's/namespace=\\"your-namespace\\"/namespace=\\"default\\"/' 5_submit_rayjob_cr.ipynb | |
| # Run notebook | |
| poetry run papermill 5_submit_rayjob_cr.ipynb 5_submit_rayjob_cr_out.ipynb --log-output --execution-timeout 600 | |
| working-directory: demo-notebooks/guided-demos | |
| - name: Print Kueue operator logs | |
| if: always() && steps.deploy.outcome == 'success' | |
| run: | | |
| echo "Printing Kueue operator logs" | |
| KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}') | |
| kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log | |
| - name: Print KubeRay operator logs | |
| if: always() && steps.deploy.outcome == 'success' | |
| run: | | |
| echo "Printing KubeRay operator logs" | |
| kubectl logs --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR}/kuberay.log | |
| - name: Export all KinD pod logs | |
| uses: ./common/github-actions/kind-export-logs | |
| if: always() && steps.deploy.outcome == 'success' | |
| with: | |
| output-directory: ${TEMP_DIR} | |
| - name: Upload logs | |
| uses: actions/upload-artifact@v4 | |
| if: always() && steps.deploy.outcome == 'success' | |
| with: | |
| name: logs-5_submit_rayjob_cr | |
| retention-days: 10 | |
| path: | | |
| ${{ env.TEMP_DIR }}/**/*.log |