diff --git a/src/sagemaker/jumpstart/model.py b/src/sagemaker/jumpstart/model.py index 2addb0a044..f939bc303b 100644 --- a/src/sagemaker/jumpstart/model.py +++ b/src/sagemaker/jumpstart/model.py @@ -15,7 +15,7 @@ from __future__ import absolute_import from functools import lru_cache -from typing import Dict, List, Optional, Union, Any +from typing import Dict, List, Optional, Any, Union import pandas as pd from botocore.exceptions import ClientError @@ -441,6 +441,15 @@ def set_deployment_config(self, config_name: Optional[str]) -> None: model_id=self.model_id, model_version=self.model_version, config_name=config_name ) + @property + def deployment_config(self) -> Optional[Dict[str, Any]]: + """The deployment config that will be applied to the model. + + Returns: + Optional[Dict[str, Any]]: Deployment config that will be applied to the model. + """ + return self._retrieve_selected_deployment_config(self.config_name) + @property def benchmark_metrics(self) -> pd.DataFrame: """Benchmark Metrics for deployment configs @@ -448,7 +457,7 @@ def benchmark_metrics(self) -> pd.DataFrame: Returns: Metrics: Pandas DataFrame object. """ - return pd.DataFrame(self._get_benchmark_data(self.config_name)) + return pd.DataFrame(self._get_benchmarks_data(self.config_name)) def display_benchmark_metrics(self) -> None: """Display Benchmark Metrics for deployment configs.""" @@ -851,8 +860,8 @@ def register_deploy_wrapper(*args, **kwargs): return model_package @lru_cache - def _get_benchmark_data(self, config_name: str) -> Dict[str, List[str]]: - """Constructs deployment configs benchmark data. + def _get_benchmarks_data(self, config_name: str) -> Dict[str, List[str]]: + """Deployment configs benchmark metrics. Args: config_name (str): The name of the selected deployment config. @@ -864,6 +873,23 @@ def _get_benchmark_data(self, config_name: str) -> Dict[str, List[str]]: config_name, ) + @lru_cache + def _retrieve_selected_deployment_config(self, config_name: str) -> Optional[Dict[str, Any]]: + """Retrieve the deployment config to apply to the model. + + Args: + config_name (str): The name of the deployment config to retrieve. + Returns: + Optional[Dict[str, Any]]: The retrieved deployment config. + """ + if config_name is None: + return None + + for deployment_config in self._deployment_configs: + if deployment_config.get("DeploymentConfigName") == config_name: + return deployment_config + return None + def _convert_to_deployment_config_metadata( self, config_name: str, metadata_config: JumpStartMetadataConfig ) -> Dict[str, Any]: diff --git a/src/sagemaker/jumpstart/types.py b/src/sagemaker/jumpstart/types.py index bf0a84319b..65b6b32739 100644 --- a/src/sagemaker/jumpstart/types.py +++ b/src/sagemaker/jumpstart/types.py @@ -2251,17 +2251,17 @@ def to_json(self) -> Dict[str, Any]: return json_obj -class DeploymentConfig(BaseDeploymentConfigDataHolder): +class DeploymentArgs(BaseDeploymentConfigDataHolder): """Dataclass representing a Deployment Config.""" __slots__ = [ - "model_data_download_timeout", - "container_startup_health_check_timeout", "image_uri", "model_data", - "instance_type", "environment", + "instance_type", "compute_resource_requirements", + "model_data_download_timeout", + "container_startup_health_check_timeout", ] def __init__( @@ -2288,9 +2288,10 @@ class DeploymentConfigMetadata(BaseDeploymentConfigDataHolder): """Dataclass representing a Deployment Config Metadata""" __slots__ = [ - "config_name", + "deployment_config_name", + "deployment_args", + "acceleration_configs", "benchmark_metrics", - "deployment_config", ] def __init__( @@ -2301,6 +2302,7 @@ def __init__( deploy_kwargs: JumpStartModelDeployKwargs, ): """Instantiates DeploymentConfigMetadata object.""" - self.config_name = config_name + self.deployment_config_name = config_name + self.deployment_args = DeploymentArgs(init_kwargs, deploy_kwargs) + self.acceleration_configs = None self.benchmark_metrics = benchmark_metrics - self.deployment_config = DeploymentConfig(init_kwargs, deploy_kwargs) diff --git a/src/sagemaker/jumpstart/utils.py b/src/sagemaker/jumpstart/utils.py index 59bf11b415..3fce6dd105 100644 --- a/src/sagemaker/jumpstart/utils.py +++ b/src/sagemaker/jumpstart/utils.py @@ -1040,24 +1040,40 @@ def extract_metrics_from_deployment_configs( config_name (str): The name of the deployment config use by the model. """ - data = {"Config Name": [], "Instance Type": [], "Selected": []} + data = {"Config Name": [], "Instance Type": [], "Selected": [], "Accelerated": []} for index, deployment_config in enumerate(deployment_configs): - if deployment_config.get("DeploymentConfig") is None: + if deployment_config.get("DeploymentArgs") is None: continue benchmark_metrics = deployment_config.get("BenchmarkMetrics") if benchmark_metrics is not None: - data["Config Name"].append(deployment_config.get("ConfigName")) + data["Config Name"].append(deployment_config.get("DeploymentConfigName")) data["Instance Type"].append( - deployment_config.get("DeploymentConfig").get("InstanceType") + deployment_config.get("DeploymentArgs").get("InstanceType") ) data["Selected"].append( "Yes" - if (config_name is not None and config_name == deployment_config.get("ConfigName")) + if ( + config_name is not None + and config_name == deployment_config.get("DeploymentConfigName") + ) else "No" ) + accelerated_configs = deployment_config.get("AccelerationConfigs") + if accelerated_configs is None: + data["Accelerated"].append("No") + else: + data["Accelerated"].append( + "Yes" + if ( + len(accelerated_configs) > 0 + and accelerated_configs[0].get("Enabled", False) + ) + else "No" + ) + if index == 0: for benchmark_metric in benchmark_metrics: column_name = f"{benchmark_metric.get('name')} ({benchmark_metric.get('unit')})" @@ -1068,4 +1084,6 @@ def extract_metrics_from_deployment_configs( if column_name in data.keys(): data[column_name].append(benchmark_metric.get("value")) + if "Yes" not in data["Accelerated"]: + del data["Accelerated"] return data diff --git a/src/sagemaker/serve/builder/jumpstart_builder.py b/src/sagemaker/serve/builder/jumpstart_builder.py index c1760311e7..d3c2581885 100644 --- a/src/sagemaker/serve/builder/jumpstart_builder.py +++ b/src/sagemaker/serve/builder/jumpstart_builder.py @@ -16,7 +16,7 @@ import copy from abc import ABC, abstractmethod from datetime import datetime, timedelta -from typing import Type, Any, List, Dict +from typing import Type, Any, List, Dict, Optional import logging from sagemaker.model import Model @@ -431,8 +431,35 @@ def tune_for_tgi_jumpstart(self, max_tuning_duration: int = 1800): sharded_supported=sharded_supported, max_tuning_duration=max_tuning_duration ) + def set_deployment_config(self, config_name: Optional[str]) -> None: + """Sets the deployment config to apply to the model. + + Args: + config_name (Optional[str]): + The name of the deployment config. Set to None to unset + any existing config that is applied to the model. + """ + if not hasattr(self, "pysdk_model") or self.pysdk_model is None: + raise Exception("Cannot set deployment config to an uninitialized model.") + + self.pysdk_model.set_deployment_config(config_name) + + def get_deployment_config(self) -> Optional[Dict[str, Any]]: + """Gets the deployment config to apply to the model. + + Returns: + Optional[Dict[str, Any]]: Deployment config to apply to this model. + """ + if not hasattr(self, "pysdk_model") or self.pysdk_model is None: + self.pysdk_model = self._create_pre_trained_js_model() + + return self.pysdk_model.deployment_config + def display_benchmark_metrics(self): """Display Markdown Benchmark Metrics for deployment configs.""" + if not hasattr(self, "pysdk_model") or self.pysdk_model is None: + self.pysdk_model = self._create_pre_trained_js_model() + self.pysdk_model.display_benchmark_metrics() def list_deployment_configs(self) -> List[Dict[str, Any]]: @@ -441,6 +468,9 @@ def list_deployment_configs(self) -> List[Dict[str, Any]]: Returns: List[Dict[str, Any]]: A list of deployment configs. """ + if not hasattr(self, "pysdk_model") or self.pysdk_model is None: + self.pysdk_model = self._create_pre_trained_js_model() + return self.pysdk_model.list_deployment_configs() def _build_for_jumpstart(self): @@ -449,32 +479,29 @@ def _build_for_jumpstart(self): self.secret_key = None self.jumpstart = True - pysdk_model = self._create_pre_trained_js_model() - - image_uri = pysdk_model.image_uri + if not hasattr(self, "pysdk_model") or self.pysdk_model is None: + self.pysdk_model = self._create_pre_trained_js_model() - logger.info("JumpStart ID %s is packaged with Image URI: %s", self.model, image_uri) + logger.info( + "JumpStart ID %s is packaged with Image URI: %s", self.model, self.pysdk_model.image_uri + ) - if self._is_gated_model(pysdk_model) and self.mode != Mode.SAGEMAKER_ENDPOINT: + if self._is_gated_model() and self.mode != Mode.SAGEMAKER_ENDPOINT: raise ValueError( "JumpStart Gated Models are only supported in SAGEMAKER_ENDPOINT mode." ) - if "djl-inference" in image_uri: + if "djl-inference" in self.pysdk_model.image_uri: logger.info("Building for DJL JumpStart Model ID...") self.model_server = ModelServer.DJL_SERVING - - self.pysdk_model = pysdk_model self.image_uri = self.pysdk_model.image_uri self._build_for_djl_jumpstart() self.pysdk_model.tune = self.tune_for_djl_jumpstart - elif "tgi-inference" in image_uri: + elif "tgi-inference" in self.pysdk_model.image_uri: logger.info("Building for TGI JumpStart Model ID...") self.model_server = ModelServer.TGI - - self.pysdk_model = pysdk_model self.image_uri = self.pysdk_model.image_uri self._build_for_tgi_jumpstart() @@ -487,15 +514,13 @@ def _build_for_jumpstart(self): return self.pysdk_model - def _is_gated_model(self, model) -> bool: + def _is_gated_model(self) -> bool: """Determine if ``this`` Model is Gated - Args: - model (Model): Jumpstart Model Returns: bool: ``True`` if ``this`` Model is Gated """ - s3_uri = model.model_data + s3_uri = self.pysdk_model.model_data if isinstance(s3_uri, dict): s3_uri = s3_uri.get("S3DataSource").get("S3Uri") diff --git a/tests/unit/sagemaker/jumpstart/constants.py b/tests/unit/sagemaker/jumpstart/constants.py index b83f85ffde..90f037daea 100644 --- a/tests/unit/sagemaker/jumpstart/constants.py +++ b/tests/unit/sagemaker/jumpstart/constants.py @@ -7911,11 +7911,8 @@ DEPLOYMENT_CONFIGS = [ { - "ConfigName": "neuron-inference", - "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}], - "DeploymentConfig": { - "ModelDataDownloadTimeout": None, - "ContainerStartupHealthCheckTimeout": None, + "DeploymentConfigName": "neuron-inference", + "DeploymentArgs": { "ImageUri": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.4" ".0-gpu-py310-cu121-ubuntu20.04", "ModelData": { @@ -7926,7 +7923,6 @@ "CompressionType": "None", } }, - "InstanceType": "ml.p2.xlarge", "Environment": { "SAGEMAKER_PROGRAM": "inference.py", "ENDPOINT_SERVER_TIMEOUT": "3600", @@ -7938,15 +7934,17 @@ "MAX_TOTAL_TOKENS": "2048", "SAGEMAKER_MODEL_SERVER_WORKERS": "1", }, + "InstanceType": "ml.p2.xlarge", "ComputeResourceRequirements": {"MinMemoryRequiredInMb": None}, + "ModelDataDownloadTimeout": None, + "ContainerStartupHealthCheckTimeout": None, }, + "AccelerationConfigs": None, + "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}], }, { - "ConfigName": "neuron-inference-budget", - "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}], - "DeploymentConfig": { - "ModelDataDownloadTimeout": None, - "ContainerStartupHealthCheckTimeout": None, + "DeploymentConfigName": "neuron-inference-budget", + "DeploymentArgs": { "ImageUri": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.4" ".0-gpu-py310-cu121-ubuntu20.04", "ModelData": { @@ -7957,7 +7955,6 @@ "CompressionType": "None", } }, - "InstanceType": "ml.p2.xlarge", "Environment": { "SAGEMAKER_PROGRAM": "inference.py", "ENDPOINT_SERVER_TIMEOUT": "3600", @@ -7969,15 +7966,17 @@ "MAX_TOTAL_TOKENS": "2048", "SAGEMAKER_MODEL_SERVER_WORKERS": "1", }, + "InstanceType": "ml.p2.xlarge", "ComputeResourceRequirements": {"MinMemoryRequiredInMb": None}, + "ModelDataDownloadTimeout": None, + "ContainerStartupHealthCheckTimeout": None, }, + "AccelerationConfigs": None, + "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}], }, { - "ConfigName": "gpu-inference-budget", - "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}], - "DeploymentConfig": { - "ModelDataDownloadTimeout": None, - "ContainerStartupHealthCheckTimeout": None, + "DeploymentConfigName": "gpu-inference-budget", + "DeploymentArgs": { "ImageUri": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.4" ".0-gpu-py310-cu121-ubuntu20.04", "ModelData": { @@ -7988,7 +7987,6 @@ "CompressionType": "None", } }, - "InstanceType": "ml.p2.xlarge", "Environment": { "SAGEMAKER_PROGRAM": "inference.py", "ENDPOINT_SERVER_TIMEOUT": "3600", @@ -8000,15 +7998,17 @@ "MAX_TOTAL_TOKENS": "2048", "SAGEMAKER_MODEL_SERVER_WORKERS": "1", }, + "InstanceType": "ml.p2.xlarge", "ComputeResourceRequirements": {"MinMemoryRequiredInMb": None}, + "ModelDataDownloadTimeout": None, + "ContainerStartupHealthCheckTimeout": None, }, + "AccelerationConfigs": None, + "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}], }, { - "ConfigName": "gpu-inference", - "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}], - "DeploymentConfig": { - "ModelDataDownloadTimeout": None, - "ContainerStartupHealthCheckTimeout": None, + "DeploymentConfigName": "gpu-inference", + "DeploymentArgs": { "ImageUri": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.4" ".0-gpu-py310-cu121-ubuntu20.04", "ModelData": { @@ -8019,7 +8019,6 @@ "CompressionType": "None", } }, - "InstanceType": "ml.p2.xlarge", "Environment": { "SAGEMAKER_PROGRAM": "inference.py", "ENDPOINT_SERVER_TIMEOUT": "3600", @@ -8031,8 +8030,13 @@ "MAX_TOTAL_TOKENS": "2048", "SAGEMAKER_MODEL_SERVER_WORKERS": "1", }, + "InstanceType": "ml.p2.xlarge", "ComputeResourceRequirements": {"MinMemoryRequiredInMb": None}, + "ModelDataDownloadTimeout": None, + "ContainerStartupHealthCheckTimeout": None, }, + "AccelerationConfigs": None, + "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}], }, ] diff --git a/tests/unit/sagemaker/jumpstart/model/test_model.py b/tests/unit/sagemaker/jumpstart/model/test_model.py index 476002457b..5d8d048501 100644 --- a/tests/unit/sagemaker/jumpstart/model/test_model.py +++ b/tests/unit/sagemaker/jumpstart/model/test_model.py @@ -1768,6 +1768,54 @@ def test_model_list_deployment_configs_empty( self.assertTrue(len(configs) == 0) + @mock.patch("sagemaker.jumpstart.model.get_init_kwargs") + @mock.patch("sagemaker.jumpstart.utils.verify_model_region_and_return_specs") + @mock.patch("sagemaker.jumpstart.model.get_instance_rate_per_hour") + @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor._get_manifest") + @mock.patch("sagemaker.jumpstart.factory.model.Session") + @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor.get_model_specs") + @mock.patch("sagemaker.jumpstart.model.Model.deploy") + @mock.patch("sagemaker.jumpstart.factory.model.JUMPSTART_DEFAULT_REGION_NAME", region) + def test_model_retrieve_deployment_config( + self, + mock_model_deploy: mock.Mock, + mock_get_model_specs: mock.Mock, + mock_session: mock.Mock, + mock_get_manifest: mock.Mock, + mock_get_instance_rate_per_hour: mock.Mock, + mock_verify_model_region_and_return_specs: mock.Mock, + mock_get_init_kwargs: mock.Mock, + ): + model_id, _ = "pytorch-eqa-bert-base-cased", "*" + + mock_get_init_kwargs.side_effect = lambda *args, **kwargs: get_mock_init_kwargs(model_id) + mock_verify_model_region_and_return_specs.side_effect = ( + lambda *args, **kwargs: get_base_spec_with_prototype_configs() + ) + mock_get_instance_rate_per_hour.side_effect = lambda *args, **kwargs: { + "name": "Instance Rate", + "unit": "USD/Hrs", + "value": "0.0083000000", + } + mock_get_model_specs.side_effect = get_prototype_spec_with_configs + mock_get_manifest.side_effect = ( + lambda region, model_type, *args, **kwargs: get_prototype_manifest(region, model_type) + ) + mock_model_deploy.return_value = default_predictor + + mock_session.return_value = sagemaker_session + + model = JumpStartModel(model_id=model_id) + + expected = get_base_deployment_configs()[0] + model.set_deployment_config(expected.get("DeploymentConfigName")) + + self.assertEqual(model.deployment_config, expected) + + # Unset + model.set_deployment_config(None) + self.assertIsNone(model.deployment_config) + @mock.patch("sagemaker.jumpstart.model.get_init_kwargs") @mock.patch("sagemaker.jumpstart.utils.verify_model_region_and_return_specs") @mock.patch("sagemaker.jumpstart.model.get_instance_rate_per_hour") diff --git a/tests/unit/sagemaker/jumpstart/test_utils.py b/tests/unit/sagemaker/jumpstart/test_utils.py index 83724e5e8a..210bd8e074 100644 --- a/tests/unit/sagemaker/jumpstart/test_utils.py +++ b/tests/unit/sagemaker/jumpstart/test_utils.py @@ -50,6 +50,7 @@ get_special_model_spec, get_prototype_manifest, get_base_deployment_configs, + get_base_deployment_configs_with_acceleration_configs, ) from mock import MagicMock @@ -1763,10 +1764,11 @@ def test_get_jumpstart_benchmark_stats_training( @pytest.mark.parametrize( - "config_name, expected", + "config_name, configs, expected", [ ( None, + get_base_deployment_configs(), { "Config Name": [ "neuron-inference", @@ -1786,6 +1788,7 @@ def test_get_jumpstart_benchmark_stats_training( ), ( "neuron-inference", + get_base_deployment_configs_with_acceleration_configs(), { "Config Name": [ "neuron-inference", @@ -1795,6 +1798,7 @@ def test_get_jumpstart_benchmark_stats_training( ], "Instance Type": ["ml.p2.xlarge", "ml.p2.xlarge", "ml.p2.xlarge", "ml.p2.xlarge"], "Selected": ["Yes", "No", "No", "No"], + "Accelerated": ["Yes", "No", "No", "No"], "Instance Rate (USD/Hrs)": [ "0.0083000000", "0.0083000000", @@ -1805,7 +1809,7 @@ def test_get_jumpstart_benchmark_stats_training( ), ], ) -def test_extract_metrics_from_deployment_configs(config_name, expected): - data = utils.extract_metrics_from_deployment_configs(get_base_deployment_configs(), config_name) +def test_extract_metrics_from_deployment_configs(config_name, configs, expected): + data = utils.extract_metrics_from_deployment_configs(configs, config_name) assert data == expected diff --git a/tests/unit/sagemaker/jumpstart/utils.py b/tests/unit/sagemaker/jumpstart/utils.py index e0d6f645a8..96662837b4 100644 --- a/tests/unit/sagemaker/jumpstart/utils.py +++ b/tests/unit/sagemaker/jumpstart/utils.py @@ -307,6 +307,14 @@ def get_base_deployment_configs() -> List[Dict[str, Any]]: return DEPLOYMENT_CONFIGS +def get_base_deployment_configs_with_acceleration_configs() -> List[Dict[str, Any]]: + configs = copy.deepcopy(DEPLOYMENT_CONFIGS) + configs[0]["AccelerationConfigs"] = [ + {"Type": "Speculative-Decoding", "Enabled": True, "Spec": {"Version": "0.1"}} + ] + return configs + + def get_mock_init_kwargs(model_id) -> JumpStartModelInitKwargs: return JumpStartModelInitKwargs( model_id=model_id, diff --git a/tests/unit/sagemaker/serve/builder/test_js_builder.py b/tests/unit/sagemaker/serve/builder/test_js_builder.py index 3d5148772e..b83b113209 100644 --- a/tests/unit/sagemaker/serve/builder/test_js_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_js_builder.py @@ -676,13 +676,122 @@ def test_list_deployment_configs( lambda: DEPLOYMENT_CONFIGS ) - model = builder.build() - builder.serve_settings.telemetry_opt_out = True - - configs = model.list_deployment_configs() + configs = builder.list_deployment_configs() self.assertEqual(configs, DEPLOYMENT_CONFIGS) + @patch("sagemaker.serve.builder.jumpstart_builder._capture_telemetry", side_effect=None) + @patch( + "sagemaker.serve.builder.jumpstart_builder.JumpStart._is_jumpstart_model_id", + return_value=True, + ) + @patch( + "sagemaker.serve.builder.jumpstart_builder.JumpStart._create_pre_trained_js_model", + return_value=MagicMock(), + ) + @patch( + "sagemaker.serve.builder.jumpstart_builder.prepare_tgi_js_resources", + return_value=({"model_type": "t5", "n_head": 71}, True), + ) + @patch("sagemaker.serve.builder.jumpstart_builder._get_ram_usage_mb", return_value=1024) + @patch( + "sagemaker.serve.builder.jumpstart_builder._get_nb_instance", return_value="ml.g5.24xlarge" + ) + def test_get_deployment_config( + self, + mock_get_nb_instance, + mock_get_ram_usage_mb, + mock_prepare_for_tgi, + mock_pre_trained_model, + mock_is_jumpstart_model, + mock_telemetry, + ): + builder = ModelBuilder( + model="facebook/galactica-mock-model-id", + schema_builder=mock_schema_builder, + ) + + mock_pre_trained_model.return_value.image_uri = mock_tgi_image_uri + + expected = DEPLOYMENT_CONFIGS[0] + mock_pre_trained_model.return_value.deployment_config = expected + + self.assertEqual(builder.get_deployment_config(), expected) + + @patch("sagemaker.serve.builder.jumpstart_builder._capture_telemetry", side_effect=None) + @patch( + "sagemaker.serve.builder.jumpstart_builder.JumpStart._is_jumpstart_model_id", + return_value=True, + ) + @patch( + "sagemaker.serve.builder.jumpstart_builder.JumpStart._create_pre_trained_js_model", + return_value=MagicMock(), + ) + @patch( + "sagemaker.serve.builder.jumpstart_builder.prepare_tgi_js_resources", + return_value=({"model_type": "t5", "n_head": 71}, True), + ) + @patch("sagemaker.serve.builder.jumpstart_builder._get_ram_usage_mb", return_value=1024) + @patch( + "sagemaker.serve.builder.jumpstart_builder._get_nb_instance", return_value="ml.g5.24xlarge" + ) + def test_set_deployment_config( + self, + mock_get_nb_instance, + mock_get_ram_usage_mb, + mock_prepare_for_tgi, + mock_pre_trained_model, + mock_is_jumpstart_model, + mock_telemetry, + ): + builder = ModelBuilder( + model="facebook/galactica-mock-model-id", + schema_builder=mock_schema_builder, + ) + + mock_pre_trained_model.return_value.image_uri = mock_tgi_image_uri + + builder.build() + builder.set_deployment_config("config-1") + + mock_pre_trained_model.return_value.set_deployment_config.assert_called_with("config-1") + + @patch("sagemaker.serve.builder.jumpstart_builder._capture_telemetry", side_effect=None) + @patch( + "sagemaker.serve.builder.jumpstart_builder.JumpStart._is_jumpstart_model_id", + return_value=True, + ) + @patch( + "sagemaker.serve.builder.jumpstart_builder.JumpStart._create_pre_trained_js_model", + return_value=MagicMock(), + ) + @patch( + "sagemaker.serve.builder.jumpstart_builder.prepare_tgi_js_resources", + return_value=({"model_type": "t5", "n_head": 71}, True), + ) + @patch("sagemaker.serve.builder.jumpstart_builder._get_ram_usage_mb", return_value=1024) + @patch( + "sagemaker.serve.builder.jumpstart_builder._get_nb_instance", return_value="ml.g5.24xlarge" + ) + def test_set_deployment_config_ex( + self, + mock_get_nb_instance, + mock_get_ram_usage_mb, + mock_prepare_for_tgi, + mock_pre_trained_model, + mock_is_jumpstart_model, + mock_telemetry, + ): + mock_pre_trained_model.return_value.image_uri = mock_tgi_image_uri + + self.assertRaisesRegex( + Exception, + "Cannot set deployment config to an uninitialized model.", + lambda: ModelBuilder( + model="facebook/galactica-mock-model-id", schema_builder=mock_schema_builder + ).set_deployment_config("config-2"), + ) + @patch("sagemaker.serve.builder.jumpstart_builder._capture_telemetry", side_effect=None) @patch( "sagemaker.serve.builder.jumpstart_builder.JumpStart._is_jumpstart_model_id", @@ -715,11 +824,46 @@ def test_display_benchmark_metrics( ) mock_pre_trained_model.return_value.image_uri = mock_tgi_image_uri - mock_pre_trained_model.return_value.display_benchmark_metrics.side_effect = ( - lambda *args, **kwargs: "metric data" + mock_pre_trained_model.return_value.list_deployment_configs.side_effect = ( + lambda: DEPLOYMENT_CONFIGS ) - model = builder.build() - builder.serve_settings.telemetry_opt_out = True + builder.list_deployment_configs() + + builder.display_benchmark_metrics() + + mock_pre_trained_model.return_value.display_benchmark_metrics.assert_called_once() + + @patch("sagemaker.serve.builder.jumpstart_builder._capture_telemetry", side_effect=None) + @patch( + "sagemaker.serve.builder.jumpstart_builder.JumpStart._is_jumpstart_model_id", + return_value=True, + ) + @patch( + "sagemaker.serve.builder.jumpstart_builder.JumpStart._create_pre_trained_js_model", + return_value=MagicMock(), + ) + @patch( + "sagemaker.serve.builder.jumpstart_builder.prepare_tgi_js_resources", + return_value=({"model_type": "t5", "n_head": 71}, True), + ) + @patch("sagemaker.serve.builder.jumpstart_builder._get_ram_usage_mb", return_value=1024) + @patch( + "sagemaker.serve.builder.jumpstart_builder._get_nb_instance", return_value="ml.g5.24xlarge" + ) + def test_display_benchmark_metrics_initial( + self, + mock_get_nb_instance, + mock_get_ram_usage_mb, + mock_prepare_for_tgi, + mock_pre_trained_model, + mock_is_jumpstart_model, + mock_telemetry, + ): + builder = ModelBuilder( + model="facebook/galactica-mock-model-id", + schema_builder=mock_schema_builder, + ) + builder.display_benchmark_metrics() - model.display_benchmark_metrics() + mock_pre_trained_model.return_value.display_benchmark_metrics.assert_called_once()