From f768a11f2da0f10332b268ce3f8853bcafa4254d Mon Sep 17 00:00:00 2001 From: Govert Verkes Date: Mon, 22 Jan 2024 14:22:19 +0000 Subject: [PATCH 1/3] fix: make sure gpus are found in local_gpu run --- src/sagemaker/local/image.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/sagemaker/local/image.py b/src/sagemaker/local/image.py index 7893ee9260..aa3bfb8269 100644 --- a/src/sagemaker/local/image.py +++ b/src/sagemaker/local/image.py @@ -860,7 +860,13 @@ def _create_docker_host( # to setting --runtime=nvidia in the docker commandline. if self.instance_type == "local_gpu": host_config["deploy"] = { - "resources": {"reservations": {"devices": [{"capabilities": ["gpu"]}]}} + "resources": { + "reservations": { + "devices": [ + {"count": "all", "capabilities": ["gpu"]} + ] + } + } } if not self.is_studio and command == "serve": From 0f6b2aac874b32df36749145c52ef448a2c58bed Mon Sep 17 00:00:00 2001 From: Govert Verkes Date: Tue, 13 Feb 2024 19:42:58 +0000 Subject: [PATCH 2/3] fix: black formatting --- src/sagemaker/local/image.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/sagemaker/local/image.py b/src/sagemaker/local/image.py index aa3bfb8269..39c879ef6d 100644 --- a/src/sagemaker/local/image.py +++ b/src/sagemaker/local/image.py @@ -861,11 +861,7 @@ def _create_docker_host( if self.instance_type == "local_gpu": host_config["deploy"] = { "resources": { - "reservations": { - "devices": [ - {"count": "all", "capabilities": ["gpu"]} - ] - } + "reservations": {"devices": [{"count": "all", "capabilities": ["gpu"]}]} } } From 1cefc73ea92b1a4814e1ad522354e6645e178ba3 Mon Sep 17 00:00:00 2001 From: Govert Verkes Date: Wed, 14 Feb 2024 21:39:27 +0000 Subject: [PATCH 3/3] fix: adjust unit test --- tests/unit/sagemaker/local/test_local_image.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/sagemaker/local/test_local_image.py b/tests/unit/sagemaker/local/test_local_image.py index ebca91a9f9..08c55fa0b4 100644 --- a/tests/unit/sagemaker/local/test_local_image.py +++ b/tests/unit/sagemaker/local/test_local_image.py @@ -871,7 +871,7 @@ def test_container_has_gpu_support(tmpdir, sagemaker_session): docker_host = sagemaker_container._create_docker_host("host-1", {}, set(), "train", []) assert "deploy" in docker_host assert docker_host["deploy"] == { - "resources": {"reservations": {"devices": [{"capabilities": ["gpu"]}]}} + "resources": {"reservations": {"devices": [{"count": "all", "capabilities": ["gpu"]}]}} }