diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index 980a720ac1..f4bc0fbfb3 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -2033,6 +2033,45 @@ def create_tuning_job( "Only one of training_config and training_config_list should be provided." ) + tune_request = self._get_tuning_request( + job_name=job_name, + tuning_config=tuning_config, + training_config=training_config, + training_config_list=training_config_list, + warm_start_config=warm_start_config, + tags=tags, + ) + + LOGGER.info("Creating hyperparameter tuning job with name: %s", job_name) + LOGGER.debug("tune request: %s", json.dumps(tune_request, indent=4)) + self.sagemaker_client.create_hyper_parameter_tuning_job(**tune_request) + + def _get_tuning_request( + self, + job_name, + tuning_config, + training_config=None, + training_config_list=None, + warm_start_config=None, + tags=None, + ): + """Construct CreateHyperParameterTuningJob request + + Args: + job_name (str): Name of the tuning job being created. + tuning_config (dict): Configuration to launch the tuning job. + training_config (dict): Configuration to launch training jobs under the tuning job + using a single algorithm. + training_config_list (list[dict]): A list of configurations to launch training jobs + under the tuning job using one or multiple algorithms. Either training_config + or training_config_list should be provided, but not both. + warm_start_config (dict): Configuration defining the type of warm start and + other required configurations. + tags (list[dict]): List of tags for labeling the tuning job. For more, see + https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. + Returns: + dict: A dictionary for CreateHyperParameterTuningJob request + """ tune_request = { "HyperParameterTuningJobName": job_name, "HyperParameterTuningJobConfig": self._map_tuning_config(**tuning_config), @@ -2053,9 +2092,7 @@ def create_tuning_job( if tags is not None: tune_request["Tags"] = tags - LOGGER.info("Creating hyperparameter tuning job with name: %s", job_name) - LOGGER.debug("tune request: %s", json.dumps(tune_request, indent=4)) - self.sagemaker_client.create_hyper_parameter_tuning_job(**tune_request) + return tune_request def describe_tuning_job(self, job_name): """Calls DescribeHyperParameterTuningJob API for the given job name, returns the response. diff --git a/src/sagemaker/tuner.py b/src/sagemaker/tuner.py index 57f81648c2..ac363e0c68 100644 --- a/src/sagemaker/tuner.py +++ b/src/sagemaker/tuner.py @@ -346,7 +346,9 @@ def _prepare_static_hyperparameters_for_tuning(self, include_cls_metadata=False) estimator_name: self._prepare_static_hyperparameters( estimator, self._hyperparameter_ranges_dict[estimator_name], - include_cls_metadata.get(estimator_name, False), + include_cls_metadata.get(estimator_name, False) + if isinstance(include_cls_metadata, dict) + else include_cls_metadata, ) for (estimator_name, estimator) in self.estimator_dict.items() } @@ -1460,6 +1462,23 @@ def start_new(cls, tuner, inputs): sagemaker.tuner._TuningJob: Constructed object that captures all information about the started job. """ + tuner_args = cls._get_tuner_args(tuner, inputs) + tuner.sagemaker_session.create_tuning_job(**tuner_args) + + return cls(tuner.sagemaker_session, tuner._current_job_name) + + @classmethod + def _get_tuner_args(cls, tuner, inputs): + """Gets a dict of arguments for a new Amazon SageMaker tuning job from the tuner + + Args: + tuner (:class:`~sagemaker.tuner.HyperparameterTuner`): + The ``HyperparameterTuner`` instance that started the job. + inputs: Information about the training data. Please refer to the + ``fit()`` method of the associated estimator. + Returns: + Dict: dict for `sagemaker.session.Session.tune` method + """ warm_start_config_req = None if tuner.warm_start_config: warm_start_config_req = tuner.warm_start_config.to_input_req() @@ -1506,8 +1525,7 @@ def start_new(cls, tuner, inputs): for estimator_name in sorted(tuner.estimator_dict.keys()) ] - tuner.sagemaker_session.create_tuning_job(**tuner_args) - return cls(tuner.sagemaker_session, tuner._current_job_name) + return tuner_args @staticmethod def _prepare_training_config( diff --git a/src/sagemaker/workflow/properties.py b/src/sagemaker/workflow/properties.py index 7d7c42a9ee..2e029fc333 100644 --- a/src/sagemaker/workflow/properties.py +++ b/src/sagemaker/workflow/properties.py @@ -13,7 +13,7 @@ """The properties definitions for workflow.""" from __future__ import absolute_import -from typing import Dict, Union +from typing import Dict, Union, List import attr @@ -40,27 +40,35 @@ def __new__(mcs, *args, **kwargs): class Properties(metaclass=PropertiesMeta): """Properties for use in workflow expressions.""" - def __init__(self, path: str, shape_name: str = None): + def __init__( + self, + path: str, + shape_name: str = None, + shape_names: List[str] = None, + ): """Create a Properties instance representing the given shape. Args: path (str): The parent path of the Properties instance. shape_name (str): The botocore sagemaker service model shape name. + shape_names (str): A List of the botocore sagemaker service model shape name. """ self._path = path - self._shape_name = shape_name - - shape = Properties._shapes.get(self._shape_name, {}) - shape_type = shape.get("type") - if shape_type in Properties._primitive_types: - self.__str__ = shape_name - elif shape_type == "structure": - members = shape["members"] - for key, info in members.items(): - if Properties._shapes.get(info["shape"], {}).get("type") == "list": - self.__dict__[key] = PropertiesList(f"{path}.{key}", info["shape"]) - else: - self.__dict__[key] = Properties(f"{path}.{key}", info["shape"]) + shape_names = [] if shape_names is None else shape_names + self._shape_names = shape_names if shape_name is None else [shape_name] + shape_names + + for name in self._shape_names: + shape = Properties._shapes.get(name, {}) + shape_type = shape.get("type") + if shape_type in Properties._primitive_types: + self.__str__ = name + elif shape_type == "structure": + members = shape["members"] + for key, info in members.items(): + if Properties._shapes.get(info["shape"], {}).get("type") == "list": + self.__dict__[key] = PropertiesList(f"{path}.{key}", info["shape"]) + else: + self.__dict__[key] = Properties(f"{path}.{key}", info["shape"]) @property def expr(self): @@ -77,8 +85,10 @@ def __init__(self, path: str, shape_name: str = None): Args: path (str): The parent path of the PropertiesList instance. shape_name (str): The botocore sagemaker service model shape name. + root_shape_name (str): The botocore sagemaker service model shape name. """ super(PropertiesList, self).__init__(path, shape_name) + self.shape_name = shape_name self._items: Dict[Union[int, str], Properties] = dict() def __getitem__(self, item: Union[int, str]): @@ -88,7 +98,7 @@ def __getitem__(self, item: Union[int, str]): item (Union[int, str]): The index of the item in sequence. """ if item not in self._items.keys(): - shape = Properties._shapes.get(self._shape_name) + shape = Properties._shapes.get(self.shape_name) member = shape["member"]["shape"] if isinstance(item, str): property_item = Properties(f"{self._path}['{item}']", member) diff --git a/src/sagemaker/workflow/steps.py b/src/sagemaker/workflow/steps.py index 5e36392b70..4b7021bd81 100644 --- a/src/sagemaker/workflow/steps.py +++ b/src/sagemaker/workflow/steps.py @@ -30,6 +30,7 @@ Processor, ) from sagemaker.transformer import Transformer, _TransformJob +from sagemaker.tuner import HyperparameterTuner, _TuningJob from sagemaker.workflow.entities import ( DefaultEnumMeta, Entity, @@ -39,6 +40,7 @@ PropertyFile, Properties, ) +from sagemaker.workflow.functions import Join class StepTypeEnum(Enum, metaclass=DefaultEnumMeta): @@ -51,6 +53,7 @@ class StepTypeEnum(Enum, metaclass=DefaultEnumMeta): TRAINING = "Training" TRANSFORM = "Transform" CALLBACK = "Callback" + TUNING = "Tuning" @attr.s @@ -92,6 +95,7 @@ def add_depends_on(self, step_names: List[str]): """Add step names to the current step depends on list""" if not step_names: return + if not self.depends_on: self.depends_on = [] self.depends_on.extend(step_names) @@ -429,3 +433,132 @@ def to_request(self) -> RequestType: property_file.expr for property_file in self.property_files ] return request_dict + + +class TuningStep(Step): + """Tuning step for workflow.""" + + def __init__( + self, + name: str, + tuner: HyperparameterTuner, + inputs=None, + job_arguments: List[str] = None, + cache_config: CacheConfig = None, + depends_on: List[str] = None, + ): + """Construct a TuningStep, given a `HyperparameterTuner` instance. + + In addition to the tuner instance, the other arguments are those that are supplied to + the `fit` method of the `sagemaker.tuner.HyperparameterTuner`. + + Args: + name (str): The name of the tuning step. + tuner (HyperparameterTuner): A `sagemaker.tuner.HyperparameterTuner` instance. + inputs: Information about the training data. Please refer to the + ``fit()`` method of the associated estimator, as this can take + any of the following forms: + + * (str) - The S3 location where training data is saved. + * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) - + If using multiple channels for training data, you can specify + a dict mapping channel names to strings or + :func:`~sagemaker.inputs.TrainingInput` objects. + * (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources + that can provide additional information about the training dataset. + See :func:`sagemaker.inputs.TrainingInput` for full details. + * (sagemaker.session.FileSystemInput) - channel configuration for + a file system data source that can provide additional information as well as + the path to the training dataset. + * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of + Amazon :class:~`Record` objects serialized and stored in S3. + For use with an estimator for an Amazon algorithm. + * (sagemaker.amazon.amazon_estimator.FileSystemRecordSet) - + Amazon SageMaker channel configuration for a file system data source for + Amazon algorithms. + * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of + :class:~`sagemaker.amazon.amazon_estimator.RecordSet` objects, + where each instance is a different channel of training data. + * (list[sagemaker.amazon.amazon_estimator.FileSystemRecordSet]) - A list of + :class:~`sagemaker.amazon.amazon_estimator.FileSystemRecordSet` objects, + where each instance is a different channel of training data. + job_arguments (List[str]): A list of strings to be passed into the processing job. + Defaults to `None`. + cache_config (CacheConfig): A `sagemaker.workflow.steps.CacheConfig` instance. + depends_on (List[str]): A list of step names this `sagemaker.workflow.steps.ProcessingStep` + depends on + """ + super(TuningStep, self).__init__(name, StepTypeEnum.TUNING, depends_on) + self.tuner = tuner + self.inputs = inputs + self.job_arguments = job_arguments + self._properties = Properties( + path=f"Steps.{name}", + shape_names=[ + "DescribeHyperParameterTuningJobResponse", + "ListTrainingJobsForHyperParameterTuningJobResponse", + ], + ) + self.cache_config = cache_config + + @property + def arguments(self) -> RequestType: + """The arguments dict that is used to call `create_hyper_parameter_tuning_job`. + + NOTE: The CreateHyperParameterTuningJob request is not quite the + args list that workflow needs. + The HyperParameterTuningJobName attribute cannot be included. + """ + if self.tuner.estimator is not None: + self.tuner.estimator._prepare_for_training() + else: + for _, estimator in self.tuner.estimator_dict.items(): + estimator._prepare_for_training() + + self.tuner._prepare_for_tuning() + tuner_args = _TuningJob._get_tuner_args(self.tuner, self.inputs) + request_dict = self.tuner.sagemaker_session._get_tuning_request(**tuner_args) + request_dict.pop("HyperParameterTuningJobName") + + return request_dict + + @property + def properties(self): + """A Properties object representing + + `DescribeHyperParameterTuningJobResponse` and + `ListTrainingJobsForHyperParameterTuningJobResponse` data model. + """ + return self._properties + + def to_request(self) -> RequestType: + """Updates the dictionary with cache configuration.""" + request_dict = super().to_request() + if self.cache_config: + request_dict.update(self.cache_config.config) + + return request_dict + + def get_top_model_s3_uri(self, top_k: int, s3_bucket: str, prefix: str = ""): + """Get the model artifact s3 uri from the top performing training jobs. + + Args: + top_k (int): the index of the top performing training job + tuning step stores up to 50 top performing training jobs, hence + a valid top_k value is from 0 to 49. The best training job + model is at index 0 + s3_bucket (str): the s3 bucket to store the training job output artifact + prefix (str): the s3 key prefix to store the training job output artifact + """ + values = ["s3:/", s3_bucket] + if prefix != "" and prefix is not None: + values.append(prefix) + + return Join( + on="/", + values=values + + [ + self.properties.TrainingJobSummaries[top_k].TrainingJobName, + "output/model.tar.gz", + ], + ) diff --git a/tests/data/pytorch_mnist/mnist.py b/tests/data/pytorch_mnist/mnist.py index f22bd0f315..ef1c15ae60 100644 --- a/tests/data/pytorch_mnist/mnist.py +++ b/tests/data/pytorch_mnist/mnist.py @@ -182,7 +182,7 @@ def train(args): accuracy = test(model, test_loader, device) save_model(model, args.model_dir) - logger.debug("Overall test accuracy: {}".format(accuracy)) + logger.debug("Overall test accuracy: {};".format(accuracy)) def test(model, test_loader, device): diff --git a/tests/integ/test_workflow.py b/tests/integ/test_workflow.py index f88e8cc1b8..f108e74e5d 100644 --- a/tests/integ/test_workflow.py +++ b/tests/integ/test_workflow.py @@ -38,6 +38,7 @@ from sagemaker.model import Model from sagemaker.processing import ProcessingInput, ProcessingOutput, FeatureStoreOutput from sagemaker.pytorch.estimator import PyTorch +from sagemaker.tuner import HyperparameterTuner, IntegerParameter from sagemaker.s3 import S3Uploader from sagemaker.session import get_execution_role from sagemaker.sklearn.estimator import SKLearn @@ -60,6 +61,7 @@ ProcessingStep, TrainingStep, CacheConfig, + TuningStep, ) from sagemaker.workflow.step_collections import RegisterModel from sagemaker.workflow.pipeline import Pipeline @@ -899,6 +901,116 @@ def test_conditional_pytorch_training_model_registration( pass +def test_tuning( + sagemaker_session, + role, + cpu_instance_type, + pipeline_name, + region_name, +): + base_dir = os.path.join(DATA_DIR, "pytorch_mnist") + entry_point = os.path.join(base_dir, "mnist.py") + input_path = sagemaker_session.upload_data( + path=os.path.join(base_dir, "training"), + key_prefix="integ-test-data/pytorch_mnist/training", + ) + inputs = TrainingInput(s3_data=input_path) + + instance_count = ParameterInteger(name="InstanceCount", default_value=1) + instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge") + + pytorch_estimator = PyTorch( + entry_point=entry_point, + role=role, + framework_version="1.5.0", + py_version="py3", + instance_count=1, + instance_type="ml.m5.xlarge", + sagemaker_session=sagemaker_session, + enable_sagemaker_metrics=True, + ) + + hyperparameter_ranges = { + "batch-size": IntegerParameter(64, 128), + } + + tuner = HyperparameterTuner( + estimator=pytorch_estimator, + objective_metric_name="test:acc", + objective_type="Maximize", + hyperparameter_ranges=hyperparameter_ranges, + metric_definitions=[{"Name": "test:acc", "Regex": "Overall test accuracy: (.*?);"}], + max_jobs=2, + max_parallel_jobs=2, + ) + + step_tune = TuningStep( + name="my-tuning-step", + tuner=tuner, + inputs=inputs, + ) + + best_model = Model( + image_uri=pytorch_estimator.training_image_uri(), + model_data=step_tune.get_top_model_s3_uri( + top_k=0, + s3_bucket=sagemaker_session.default_bucket(), + ), + sagemaker_session=sagemaker_session, + role=role, + ) + model_inputs = CreateModelInput( + instance_type="ml.m5.large", + accelerator_type="ml.eia1.medium", + ) + step_best_model = CreateModelStep( + name="1st-model", + model=best_model, + inputs=model_inputs, + ) + + second_best_model = Model( + image_uri=pytorch_estimator.training_image_uri(), + model_data=step_tune.get_top_model_s3_uri( + top_k=1, + s3_bucket=sagemaker_session.default_bucket(), + ), + sagemaker_session=sagemaker_session, + role=role, + ) + + step_second_best_model = CreateModelStep( + name="2nd-best-model", + model=second_best_model, + inputs=model_inputs, + ) + + pipeline = Pipeline( + name=pipeline_name, + parameters=[instance_count, instance_type], + steps=[step_tune, step_best_model, step_second_best_model], + sagemaker_session=sagemaker_session, + ) + + try: + response = pipeline.create(role) + create_arn = response["PipelineArn"] + assert re.match( + fr"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}", create_arn + ) + + execution = pipeline.start(parameters={}) + assert re.match( + fr"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/", + execution.arn, + ) + finally: + try: + pipeline.delete() + except Exception: + pass + + def test_model_registration_with_model_repack( sagemaker_session, role, diff --git a/tests/unit/sagemaker/workflow/test_properties.py b/tests/unit/sagemaker/workflow/test_properties.py index 6b5591931d..ff163626c2 100644 --- a/tests/unit/sagemaker/workflow/test_properties.py +++ b/tests/unit/sagemaker/workflow/test_properties.py @@ -36,3 +36,34 @@ def test_properties_describe_processing_job_response(): assert prop.ProcessingOutputConfig.Outputs["MyOutputName"].S3Output.S3Uri.expr == { "Get": "Steps.MyStep.ProcessingOutputConfig.Outputs['MyOutputName'].S3Output.S3Uri" } + + +def test_properties_tuning_job(): + prop = Properties( + "Steps.MyStep", + shape_names=[ + "DescribeHyperParameterTuningJobResponse", + "ListTrainingJobsForHyperParameterTuningJobResponse", + ], + ) + some_prop_names = [ + "BestTrainingJob", + "HyperParameterTuningJobConfig", + "ObjectiveStatusCounters", + "TrainingJobSummaries", + ] + for name in some_prop_names: + assert name in prop.__dict__.keys() + + assert prop.HyperParameterTuningJobName.expr == { + "Get": "Steps.MyStep.HyperParameterTuningJobName" + } + assert prop.HyperParameterTuningJobConfig.HyperParameterTuningJobObjective.Type.expr == { + "Get": "Steps.MyStep.HyperParameterTuningJobConfig.HyperParameterTuningJobObjective.Type" + } + assert prop.ObjectiveStatusCounters.Succeeded.expr == { + "Get": "Steps.MyStep.ObjectiveStatusCounters.Succeeded" + } + assert prop.TrainingJobSummaries[0].TrainingJobName.expr == { + "Get": "Steps.MyStep.TrainingJobSummaries[0].TrainingJobName" + } diff --git a/tests/unit/sagemaker/workflow/test_steps.py b/tests/unit/sagemaker/workflow/test_steps.py index 34f1856c19..25d2b9f20e 100644 --- a/tests/unit/sagemaker/workflow/test_steps.py +++ b/tests/unit/sagemaker/workflow/test_steps.py @@ -34,6 +34,12 @@ ProcessingOutput, ScriptProcessor, ) +from sagemaker.tuner import ( + HyperparameterTuner, + ContinuousParameter, + WarmStartConfig, + WarmStartTypes, +) from sagemaker.network import NetworkConfig from sagemaker.transformer import Transformer from sagemaker.workflow.properties import Properties @@ -45,6 +51,7 @@ TrainingStep, TransformStep, CreateModelStep, + TuningStep, CacheConfig, ) from tests.unit import DATA_DIR @@ -488,3 +495,367 @@ def test_properties_describe_processing_job_response(): assert prop.ProcessingOutputConfig.Outputs["MyOutputName"].S3Output.S3Uri.expr == { "Get": "Steps.MyStep.ProcessingOutputConfig.Outputs['MyOutputName'].S3Output.S3Uri" } + + +def test_single_algo_tuning_step(sagemaker_session): + data_source_uri_parameter = ParameterString( + name="DataSourceS3Uri", default_value=f"s3://{BUCKET}/train_manifest" + ) + estimator = Estimator( + image_uri=IMAGE_URI, + role=ROLE, + instance_count=1, + instance_type="ml.c5.4xlarge", + profiler_config=ProfilerConfig(system_monitor_interval_millis=500), + rules=[], + sagemaker_session=sagemaker_session, + ) + estimator.set_hyperparameters( + num_layers=18, + image_shape="3,224,224", + num_classes=257, + num_training_samples=15420, + mini_batch_size=128, + epochs=10, + optimizer="sgd", + top_k="2", + precision_dtype="float32", + augmentation_type="crop", + ) + + hyperparameter_ranges = { + "learning_rate": ContinuousParameter(0.0001, 0.05), + "momentum": ContinuousParameter(0.0, 0.99), + "weight_decay": ContinuousParameter(0.0, 0.99), + } + + tuner = HyperparameterTuner( + estimator=estimator, + objective_metric_name="val:accuracy", + hyperparameter_ranges=hyperparameter_ranges, + objective_type="Maximize", + max_jobs=5, + max_parallel_jobs=2, + early_stopping_type="OFF", + strategy="Bayesian", + warm_start_config=WarmStartConfig( + warm_start_type=WarmStartTypes.IDENTICAL_DATA_AND_ALGORITHM, + parents=set(["parent-hpo"]), + ), + ) + + inputs = TrainingInput(s3_data=data_source_uri_parameter) + + tuning_step = TuningStep( + name="MyTuningStep", + tuner=tuner, + inputs=inputs, + ) + + assert tuning_step.to_request() == { + "Name": "MyTuningStep", + "Type": "Tuning", + "Arguments": { + "HyperParameterTuningJobConfig": { + "Strategy": "Bayesian", + "ResourceLimits": {"MaxNumberOfTrainingJobs": 5, "MaxParallelTrainingJobs": 2}, + "TrainingJobEarlyStoppingType": "OFF", + "HyperParameterTuningJobObjective": { + "Type": "Maximize", + "MetricName": "val:accuracy", + }, + "ParameterRanges": { + "ContinuousParameterRanges": [ + { + "Name": "learning_rate", + "MinValue": "0.0001", + "MaxValue": "0.05", + "ScalingType": "Auto", + }, + { + "Name": "momentum", + "MinValue": "0.0", + "MaxValue": "0.99", + "ScalingType": "Auto", + }, + { + "Name": "weight_decay", + "MinValue": "0.0", + "MaxValue": "0.99", + "ScalingType": "Auto", + }, + ], + "CategoricalParameterRanges": [], + "IntegerParameterRanges": [], + }, + }, + "TrainingJobDefinition": { + "StaticHyperParameters": { + "num_layers": "18", + "image_shape": "3,224,224", + "num_classes": "257", + "num_training_samples": "15420", + "mini_batch_size": "128", + "epochs": "10", + "optimizer": "sgd", + "top_k": "2", + "precision_dtype": "float32", + "augmentation_type": "crop", + }, + "RoleArn": "DummyRole", + "OutputDataConfig": {"S3OutputPath": "s3://my-bucket/"}, + "ResourceConfig": { + "InstanceCount": 1, + "InstanceType": "ml.c5.4xlarge", + "VolumeSizeInGB": 30, + }, + "StoppingCondition": {"MaxRuntimeInSeconds": 86400}, + "AlgorithmSpecification": { + "TrainingInputMode": "File", + "TrainingImage": "fakeimage", + }, + "InputDataConfig": [ + { + "DataSource": { + "S3DataSource": { + "S3DataType": "S3Prefix", + "S3Uri": data_source_uri_parameter, + "S3DataDistributionType": "FullyReplicated", + } + }, + "ChannelName": "training", + } + ], + }, + "WarmStartConfig": { + "WarmStartType": "IdenticalDataAndAlgorithm", + "ParentHyperParameterTuningJobs": [ + { + "HyperParameterTuningJobName": "parent-hpo", + } + ], + }, + }, + } + + assert tuning_step.properties.HyperParameterTuningJobName.expr == { + "Get": "Steps.MyTuningStep.HyperParameterTuningJobName" + } + assert tuning_step.properties.TrainingJobSummaries[0].TrainingJobName.expr == { + "Get": "Steps.MyTuningStep.TrainingJobSummaries[0].TrainingJobName" + } + assert tuning_step.get_top_model_s3_uri(0, "my-bucket", "my-prefix").expr == { + "Std:Join": { + "On": "/", + "Values": [ + "s3:/", + "my-bucket", + "my-prefix", + {"Get": "Steps.MyTuningStep.TrainingJobSummaries[0].TrainingJobName"}, + "output/model.tar.gz", + ], + } + } + + +def test_multi_algo_tuning_step(sagemaker_session): + data_source_uri_parameter = ParameterString( + name="DataSourceS3Uri", default_value=f"s3://{BUCKET}/train_manifest" + ) + estimator = Estimator( + image_uri=IMAGE_URI, + role=ROLE, + instance_count=1, + instance_type="ml.c5.4xlarge", + profiler_config=ProfilerConfig(system_monitor_interval_millis=500), + rules=[], + sagemaker_session=sagemaker_session, + ) + + estimator.set_hyperparameters( + num_layers=18, + image_shape="3,224,224", + num_classes=257, + num_training_samples=15420, + mini_batch_size=128, + epochs=10, + optimizer="sgd", + top_k="2", + precision_dtype="float32", + augmentation_type="crop", + ) + + hyperparameter_ranges = { + "learning_rate": ContinuousParameter(0.0001, 0.05), + "momentum": ContinuousParameter(0.0, 0.99), + "weight_decay": ContinuousParameter(0.0, 0.99), + } + + tuner = HyperparameterTuner.create( + estimator_dict={ + "estimator-1": estimator, + "estimator-2": estimator, + }, + objective_type="Minimize", + objective_metric_name_dict={ + "estimator-1": "val:loss", + "estimator-2": "val:loss", + }, + hyperparameter_ranges_dict={ + "estimator-1": hyperparameter_ranges, + "estimator-2": hyperparameter_ranges, + }, + ) + + inputs = TrainingInput(s3_data=data_source_uri_parameter) + + tuning_step = TuningStep( + name="MyTuningStep", + tuner=tuner, + inputs={ + "estimator-1": inputs, + "estimator-2": inputs, + }, + ) + + assert tuning_step.to_request() == { + "Name": "MyTuningStep", + "Type": "Tuning", + "Arguments": { + "HyperParameterTuningJobConfig": { + "Strategy": "Bayesian", + "ResourceLimits": {"MaxNumberOfTrainingJobs": 1, "MaxParallelTrainingJobs": 1}, + "TrainingJobEarlyStoppingType": "Off", + }, + "TrainingJobDefinitions": [ + { + "StaticHyperParameters": { + "num_layers": "18", + "image_shape": "3,224,224", + "num_classes": "257", + "num_training_samples": "15420", + "mini_batch_size": "128", + "epochs": "10", + "optimizer": "sgd", + "top_k": "2", + "precision_dtype": "float32", + "augmentation_type": "crop", + }, + "RoleArn": "DummyRole", + "OutputDataConfig": {"S3OutputPath": "s3://my-bucket/"}, + "ResourceConfig": { + "InstanceCount": 1, + "InstanceType": "ml.c5.4xlarge", + "VolumeSizeInGB": 30, + }, + "StoppingCondition": {"MaxRuntimeInSeconds": 86400}, + "AlgorithmSpecification": { + "TrainingInputMode": "File", + "TrainingImage": "fakeimage", + }, + "InputDataConfig": [ + { + "DataSource": { + "S3DataSource": { + "S3DataType": "S3Prefix", + "S3Uri": data_source_uri_parameter, + "S3DataDistributionType": "FullyReplicated", + } + }, + "ChannelName": "training", + } + ], + "DefinitionName": "estimator-1", + "TuningObjective": {"Type": "Minimize", "MetricName": "val:loss"}, + "HyperParameterRanges": { + "ContinuousParameterRanges": [ + { + "Name": "learning_rate", + "MinValue": "0.0001", + "MaxValue": "0.05", + "ScalingType": "Auto", + }, + { + "Name": "momentum", + "MinValue": "0.0", + "MaxValue": "0.99", + "ScalingType": "Auto", + }, + { + "Name": "weight_decay", + "MinValue": "0.0", + "MaxValue": "0.99", + "ScalingType": "Auto", + }, + ], + "CategoricalParameterRanges": [], + "IntegerParameterRanges": [], + }, + }, + { + "StaticHyperParameters": { + "num_layers": "18", + "image_shape": "3,224,224", + "num_classes": "257", + "num_training_samples": "15420", + "mini_batch_size": "128", + "epochs": "10", + "optimizer": "sgd", + "top_k": "2", + "precision_dtype": "float32", + "augmentation_type": "crop", + }, + "RoleArn": "DummyRole", + "OutputDataConfig": {"S3OutputPath": "s3://my-bucket/"}, + "ResourceConfig": { + "InstanceCount": 1, + "InstanceType": "ml.c5.4xlarge", + "VolumeSizeInGB": 30, + }, + "StoppingCondition": {"MaxRuntimeInSeconds": 86400}, + "AlgorithmSpecification": { + "TrainingInputMode": "File", + "TrainingImage": "fakeimage", + }, + "InputDataConfig": [ + { + "DataSource": { + "S3DataSource": { + "S3DataType": "S3Prefix", + "S3Uri": data_source_uri_parameter, + "S3DataDistributionType": "FullyReplicated", + } + }, + "ChannelName": "training", + } + ], + "DefinitionName": "estimator-2", + "TuningObjective": {"Type": "Minimize", "MetricName": "val:loss"}, + "HyperParameterRanges": { + "ContinuousParameterRanges": [ + { + "Name": "learning_rate", + "MinValue": "0.0001", + "MaxValue": "0.05", + "ScalingType": "Auto", + }, + { + "Name": "momentum", + "MinValue": "0.0", + "MaxValue": "0.99", + "ScalingType": "Auto", + }, + { + "Name": "weight_decay", + "MinValue": "0.0", + "MaxValue": "0.99", + "ScalingType": "Auto", + }, + ], + "CategoricalParameterRanges": [], + "IntegerParameterRanges": [], + }, + }, + ], + }, + }