Skip to content

Update demos for v0.6.1 #296

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Aug 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,13 @@
"metadata": {},
"outputs": [],
"source": [
"# Create authentication object for oc user permissions and login\n",
"# Create authentication object for user permissions\n",
"# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n",
"# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n",
"auth = TokenAuthentication(\n",
" token = \"XXXX\",\n",
" server = \"XXXX\",\n",
" skip_tls = True\n",
" skip_tls = False\n",
")\n",
"auth.login()"
]
Expand Down Expand Up @@ -88,13 +90,12 @@
"# Create our cluster and submit appwrapper\n",
"cluster = Cluster(ClusterConfiguration(name='hfgputest', \n",
" namespace=\"default\",\n",
" min_worker=1, \n",
" max_worker=1, \n",
" num_workers=1,\n",
" min_cpus=8, \n",
" max_cpus=8, \n",
" min_memory=16, \n",
" max_memory=16, \n",
" gpu=4,\n",
" num_gpus=4,\n",
" image=\"quay.io/project-codeflare/ray:2.5.0-py38-cu116\",\n",
" instascale=True, machine_types=[\"m5.xlarge\", \"p3.8xlarge\"]))"
]
Expand Down Expand Up @@ -319,7 +320,9 @@
"#install additional libraries that will be required for this training\n",
"runtime_env = {\"pip\": [\"transformers\", \"datasets\", \"evaluate\", \"pyarrow<7.0.0\", \"accelerate\"]}\n",
"\n",
"ray.init(address=f'{ray_cluster_uri}', runtime_env=runtime_env)\n",
"# NOTE: This will work for in-cluster notebook servers (RHODS/ODH), but not for local machines\n",
"# To see how to connect from your laptop, go to demo-notebooks/additional-demos/local_interactive.ipynb\n",
"ray.init(address=ray_cluster_uri, runtime_env=runtime_env)\n",
"\n",
"print(\"Ray cluster is up and running: \", ray.is_initialized())"
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
"cluster_name = \"hfgputest-1\"\n",
"local_interactive = True\n",
"\n",
"cluster = Cluster(ClusterConfiguration(local_interactive=local_interactive, namespace=namespace, name=cluster_name, min_worker=1, max_worker=1, min_cpus=1, max_cpus=1, min_memory=4, max_memory=4, gpu=0, instascale=False, machine_types=[\"m5.xlarge\", \"p3.8xlarge\"]))"
"cluster = Cluster(ClusterConfiguration(local_interactive=local_interactive, namespace=namespace, name=cluster_name, num_workers=1, min_cpus=1, max_cpus=1, min_memory=4, max_memory=4, num_gpus=0, instascale=False, machine_types=[\"m5.xlarge\", \"p3.8xlarge\"]))"
]
},
{
Expand Down Expand Up @@ -319,7 +319,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
"version": "3.8.17"
},
"vscode": {
"interpreter": {
Expand Down
3,377 changes: 0 additions & 3,377 deletions demo-notebooks/batch-job/batch_mnist_mcad.ipynb

This file was deleted.

5,250 changes: 0 additions & 5,250 deletions demo-notebooks/batch-job/batch_mnist_ray.ipynb

This file was deleted.

160 changes: 0 additions & 160 deletions demo-notebooks/batch-job/mnist.py

This file was deleted.

4 changes: 0 additions & 4 deletions demo-notebooks/batch-job/requirements.txt

This file was deleted.

11 changes: 6 additions & 5 deletions demo-notebooks/guided-demos/0_basic_ray.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@
"metadata": {},
"outputs": [],
"source": [
"# Create authentication object for oc user permissions\n",
"# Create authentication object for user permissions\n",
"# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n",
"# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n",
"auth = TokenAuthentication(\n",
" token = \"XXXXX\",\n",
" server = \"XXXXX\",\n",
Expand Down Expand Up @@ -58,14 +60,13 @@
"cluster = Cluster(ClusterConfiguration(\n",
" name='raytest',\n",
" namespace='default',\n",
" min_worker=2,\n",
" max_worker=2,\n",
" num_workers=2,\n",
" min_cpus=1,\n",
" max_cpus=1,\n",
" min_memory=4,\n",
" max_memory=4,\n",
" image=\"quay.io/project-codeflare/ray:2.5.0-py38-cu116\",\n",
" gpu=0,\n",
" num_gpus=0,\n",
" image=\"quay.io/project-codeflare/ray:2.5.0-py38-cu116\", #current default\n",
" instascale=False\n",
"))"
]
Expand Down
10 changes: 5 additions & 5 deletions demo-notebooks/guided-demos/1_basic_instascale.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@
"metadata": {},
"outputs": [],
"source": [
"# Create authentication object for oc user permissions\n",
"# Create authentication object for user permissions\n",
"# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n",
"# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n",
"auth = TokenAuthentication(\n",
" token = \"XXXXX\",\n",
" server = \"XXXXX\",\n",
Expand Down Expand Up @@ -55,14 +57,12 @@
"cluster = Cluster(ClusterConfiguration(\n",
" name='instascaletest',\n",
" namespace='default',\n",
" min_worker=2,\n",
" max_worker=2,\n",
" num_workers=2,\n",
" min_cpus=2,\n",
" max_cpus=2,\n",
" min_memory=8,\n",
" max_memory=8,\n",
" gpu=1,\n",
" image=\"quay.io/project-codeflare/ray:2.5.0-py38-cu116\",\n",
" num_gpus=1,\n",
" instascale=True, # InstaScale now enabled, will scale OCP cluster to guarantee resource request\n",
" machine_types=[\"m5.xlarge\", \"g4dn.xlarge\"] # Head, worker AWS machine types desired\n",
"))"
Expand Down
30 changes: 24 additions & 6 deletions demo-notebooks/guided-demos/2_basic_jobs.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@
"metadata": {},
"outputs": [],
"source": [
"# Create authentication object for oc user permissions\n",
"# Create authentication object for user permissions\n",
"# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n",
"# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n",
"auth = TokenAuthentication(\n",
" token = \"XXXXX\",\n",
" server = \"XXXXX\",\n",
Expand Down Expand Up @@ -55,14 +57,12 @@
"cluster = Cluster(ClusterConfiguration(\n",
" name='jobtest',\n",
" namespace='default',\n",
" min_worker=2,\n",
" max_worker=2,\n",
" num_workers=2,\n",
" min_cpus=1,\n",
" max_cpus=1,\n",
" min_memory=4,\n",
" max_memory=4,\n",
" gpu=0,\n",
" image=\"quay.io/project-codeflare/ray:2.5.0-py38-cu116\",\n",
" num_gpus=0,\n",
" instascale=False\n",
"))"
]
Expand Down Expand Up @@ -135,7 +135,7 @@
"id": "5b9ae53a",
"metadata": {},
"source": [
"Now we can take a look at the status of our submitted job, as well as the logs:"
"Now we can take a look at the status of our submitted job, as well as retrieve the full logs:"
]
},
{
Expand All @@ -158,6 +158,24 @@
"job.logs()"
]
},
{
"cell_type": "markdown",
"id": "c8267fb2",
"metadata": {},
"source": [
"You can also view organized logs, status, and other information directly through the Ray cluster's dashboard:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3f3861d0",
"metadata": {},
"outputs": [],
"source": [
"cluster.cluster_dashboard_uri()"
]
},
{
"cell_type": "markdown",
"id": "5af8cd32",
Expand Down
16 changes: 9 additions & 7 deletions demo-notebooks/guided-demos/3_basic_interactive.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@
"metadata": {},
"outputs": [],
"source": [
"# Create authentication object for oc user permissions\n",
"# Create authentication object for user permissions\n",
"# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n",
"# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n",
"auth = TokenAuthentication(\n",
" token = \"XXXXX\",\n",
" server = \"XXXXX\",\n",
Expand Down Expand Up @@ -55,15 +57,13 @@
"cluster = Cluster(ClusterConfiguration(\n",
" name='interactivetest',\n",
" namespace='default',\n",
" min_worker=2,\n",
" max_worker=2,\n",
" num_workers=2,\n",
" min_cpus=2,\n",
" max_cpus=2,\n",
" min_memory=8,\n",
" max_memory=8,\n",
" gpu=1,\n",
" image=\"quay.io/project-codeflare/ray:2.5.0-py38-cu116\",\n",
" instascale=True,\n",
" num_gpus=1,\n",
" instascale=True, #<---instascale enabled\n",
" machine_types=[\"m5.xlarge\", \"g4dn.xlarge\"]\n",
" \n",
"))"
Expand Down Expand Up @@ -142,7 +142,9 @@
"#install additional libraries that will be required for model training\n",
"runtime_env = {\"pip\": [\"transformers\", \"datasets\", \"evaluate\", \"pyarrow<7.0.0\", \"accelerate\"]}\n",
"\n",
"ray.init(address=f'{ray_cluster_uri}', runtime_env=runtime_env)\n",
"# NOTE: This will work for in-cluster notebook servers (RHODS/ODH), but not for local machines\n",
"# To see how to connect from your laptop, go to demo-notebooks/additional-demos/local_interactive.ipynb\n",
"ray.init(address=ray_cluster_uri, runtime_env=runtime_env)\n",
"\n",
"print(\"Ray cluster is up and running: \", ray.is_initialized())"
]
Expand Down
Loading