diff --git a/doc/frameworks/tensorflow/upgrade_from_legacy.rst b/doc/frameworks/tensorflow/upgrade_from_legacy.rst index 7805c9579b..51b80e8d59 100644 --- a/doc/frameworks/tensorflow/upgrade_from_legacy.rst +++ b/doc/frameworks/tensorflow/upgrade_from_legacy.rst @@ -104,6 +104,7 @@ the difference in code would be as follows: ... source_dir="code", framework_version="1.10.0", + py_version="py2", train_instance_type="ml.m4.xlarge", image_name="520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-tensorflow:1.10.0-cpu-py2", hyperparameters={ diff --git a/doc/frameworks/tensorflow/using_tf.rst b/doc/frameworks/tensorflow/using_tf.rst index ae3a56a183..651b3562c5 100644 --- a/doc/frameworks/tensorflow/using_tf.rst +++ b/doc/frameworks/tensorflow/using_tf.rst @@ -319,7 +319,7 @@ To run training job with Pipe input mode, pass in ``input_mode='Pipe'`` to your tf_estimator = TensorFlow(entry_point='tf-train-with-pipemodedataset.py', role='SageMakerRole', training_steps=10000, evaluation_steps=100, train_instance_count=1, train_instance_type='ml.p2.xlarge', - framework_version='1.10.0', input_mode='Pipe') + framework_version='1.10.0', py_version='py3', input_mode='Pipe') tf_estimator.fit('s3://bucket/path/to/training/data') @@ -383,7 +383,8 @@ estimator object to create a SageMaker Endpoint: from sagemaker.tensorflow import TensorFlow estimator = TensorFlow(entry_point='tf-train.py', ..., train_instance_count=1, - train_instance_type='ml.c4.xlarge', framework_version='1.11') + train_instance_type='ml.c4.xlarge', framework_version='1.11', + py_version='py3') estimator.fit(inputs) diff --git a/src/sagemaker/tensorflow/estimator.py b/src/sagemaker/tensorflow/estimator.py index 70c120c369..e1c01e99ce 100644 --- a/src/sagemaker/tensorflow/estimator.py +++ b/src/sagemaker/tensorflow/estimator.py @@ -56,11 +56,11 @@ def __init__( Args: py_version (str): Python version you want to use for executing your model training - code (default: 'py2'). + code. Defaults to ``None``. Required unless ``image_name`` is provided. framework_version (str): TensorFlow version you want to use for executing your model - training code. List of supported versions + training code. Defaults to ``None``. Required unless ``image_name`` is provided. + List of supported versions: https://github.com/aws/sagemaker-python-sdk#tensorflow-sagemaker-estimators. - If not specified, this will default to 1.11. model_dir (str): S3 location where the checkpoint data and models can be exported to during training (default: None). It will be passed in the training script as one of the command line arguments. If not specified, one is provided based on @@ -81,6 +81,10 @@ def __init__( Examples: 123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0 custom-image:latest. + + If ``framework_version`` or ``py_version`` are ``None``, then + ``image_name`` is required. If also ``None``, then a ``ValueError`` + will be raised. distributions (dict): A dictionary with information on how to run distributed training (default: None). Currently we support distributed training with parameter servers and MPI. @@ -114,18 +118,13 @@ def __init__( :class:`~sagemaker.estimator.Framework` and :class:`~sagemaker.estimator.EstimatorBase`. """ - if framework_version is None: - logger.warning( - fw.empty_framework_version_warning(defaults.TF_VERSION, self.LATEST_VERSION) - ) - self.framework_version = framework_version or defaults.TF_VERSION - - if not py_version: - py_version = "py3" if self._only_python_3_supported() else "py2" + fw.validate_version_or_image_args(framework_version, py_version, image_name) if py_version == "py2": logger.warning( fw.python_deprecation_warning(self.__framework_name__, defaults.LATEST_PY2_VERSION) ) + self.framework_version = framework_version + self.py_version = py_version if distributions is not None: logger.warning(fw.parameter_v2_rename_warning("distribution", distributions)) @@ -136,24 +135,19 @@ def __init__( if "enable_sagemaker_metrics" not in kwargs: # enable sagemaker metrics for TF v1.15 or greater: - if fw.is_version_equal_or_higher([1, 15], self.framework_version): + if framework_version and fw.is_version_equal_or_higher([1, 15], framework_version): kwargs["enable_sagemaker_metrics"] = True super(TensorFlow, self).__init__(image_name=image_name, **kwargs) - self.py_version = py_version self.model_dir = model_dir self.distributions = distributions or {} - self._validate_args(py_version=py_version, framework_version=self.framework_version) + self._validate_args(py_version=py_version) - def _validate_args(self, py_version, framework_version): + def _validate_args(self, py_version): """Placeholder docstring""" - if py_version == "py3": - if framework_version is None: - raise AttributeError(fw.EMPTY_FRAMEWORK_VERSION_ERROR) - if py_version == "py2" and self._only_python_3_supported(): msg = ( "Python 2 containers are only available with {} and lower versions. " @@ -161,7 +155,7 @@ def _validate_args(self, py_version, framework_version): ) raise AttributeError(msg) - if self._only_legacy_mode_supported() and self.image_name is None: + if self.image_name is None and self._only_legacy_mode_supported(): legacy_image_uri = fw.create_image_uri( self.sagemaker_session.boto_region_name, "tensorflow", diff --git a/src/sagemaker/tensorflow/model.py b/src/sagemaker/tensorflow/model.py index 4446dd5d1b..6c1e15fda0 100644 --- a/src/sagemaker/tensorflow/model.py +++ b/src/sagemaker/tensorflow/model.py @@ -19,7 +19,6 @@ from sagemaker.content_types import CONTENT_TYPE_JSON from sagemaker.fw_utils import create_image_uri from sagemaker.predictor import json_serializer, json_deserializer -from sagemaker.tensorflow.defaults import TF_VERSION class TensorFlowPredictor(sagemaker.RealTimePredictor): @@ -138,7 +137,7 @@ def __init__( role, entry_point=None, image=None, - framework_version=TF_VERSION, + framework_version=None, container_log_level=None, predictor_cls=TensorFlowPredictor, **kwargs @@ -158,9 +157,12 @@ def __init__( hosting. If ``source_dir`` is specified, then ``entry_point`` must point to a file located at the root of ``source_dir``. image (str): A Docker image URI (default: None). If not specified, a - default image for TensorFlow Serving will be used. + default image for TensorFlow Serving will be used. If + ``framework_version`` is ``None``, then ``image`` is required. + If also ``None``, then a ``ValueError`` will be raised. framework_version (str): Optional. TensorFlow Serving version you - want to use. + want to use. Defaults to ``None``. Required unless ``image`` is + provided. container_log_level (int): Log level to use within the container (default: logging.ERROR). Valid values are defined in the Python logging module. @@ -176,6 +178,13 @@ def __init__( :class:`~sagemaker.model.FrameworkModel` and :class:`~sagemaker.model.Model`. """ + if framework_version is None and image is None: + raise ValueError( + "Both framework_version and image were None. " + "Either specify framework_version or specify image_name." + ) + self.framework_version = framework_version + super(TensorFlowModel, self).__init__( model_data=model_data, role=role, @@ -184,7 +193,6 @@ def __init__( entry_point=entry_point, **kwargs ) - self.framework_version = framework_version self._container_log_level = container_log_level def deploy( diff --git a/tests/conftest.py b/tests/conftest.py index 327250753b..b68f40bd4e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -226,6 +226,16 @@ def tf_version(request): return request.param +@pytest.fixture(scope="module", params=["py2", "py3"]) +def tf_py_version(tf_version, request): + version = [int(val) for val in tf_version.split(".")] + if version < [1, 11]: + return "py2" + if version < [2, 2]: + return request.param + return "py37" + + @pytest.fixture(scope="module", params=["0.10.1", "0.10.1", "0.11", "0.11.0", "0.11.1"]) def rl_coach_tf_version(request): return request.param @@ -290,6 +300,23 @@ def tf_full_version(request): return tf_version +@pytest.fixture(scope="module") +def tf_full_py_version(tf_full_version, request): + """fixture to match tf_full_version + + Fixture exists as such, since tf_full_version may be overridden --tf-full-version. + Otherwise, this would simply be py37 to match the latest version support. + + TODO: Evaluate use of --tf-full-version with possible eye to remove and simplify code. + """ + version = [int(val) for val in tf_full_version.split(".")] + if version < [1, 11]: + return "py2" + if tf_full_version in [TensorFlow._LATEST_1X_VERSION, LATEST_VERSION]: + return "py37" + return "py3" + + @pytest.fixture(scope="module", params=["1.15.0", "2.0.0"]) def ei_tf_full_version(request): tf_ei_version = request.config.getoption("--ei-tf-full-version") diff --git a/tests/integ/test_airflow_config.py b/tests/integ/test_airflow_config.py index 03011115de..87953b7d33 100644 --- a/tests/integ/test_airflow_config.py +++ b/tests/integ/test_airflow_config.py @@ -562,7 +562,7 @@ def test_tf_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_inst train_instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, framework_version=TensorFlow.LATEST_VERSION, - py_version=PYTHON_VERSION, + py_version="py37", # only version available with 2.2.0 metric_definitions=[ {"Name": "train:global_steps", "Regex": r"global_step\/sec:\s(.*)"} ], diff --git a/tests/integ/test_tf.py b/tests/integ/test_tf.py index e3e030f799..b4cdbe7f1c 100644 --- a/tests/integ/test_tf.py +++ b/tests/integ/test_tf.py @@ -23,8 +23,7 @@ from sagemaker.utils import unique_name_from_base, sagemaker_timestamp import tests.integ -from tests.integ import timeout -from tests.integ import kms_utils +from tests.integ import kms_utils, timeout, PYTHON_VERSION from tests.integ.retry import retries from tests.integ.s3_utils import assert_s3_files_exist @@ -40,13 +39,8 @@ TAGS = [{"Key": "some-key", "Value": "some-value"}] -@pytest.fixture(scope="module") -def py_version(tf_full_version, tf_serving_version): - return "py37" if tf_full_version == tf_serving_version else tests.integ.PYTHON_VERSION - - def test_mnist_with_checkpoint_config( - sagemaker_session, instance_type, tf_full_version, py_version + sagemaker_session, instance_type, tf_full_version, tf_full_py_version ): checkpoint_s3_uri = "s3://{}/checkpoints/tf-{}".format( sagemaker_session.default_bucket(), sagemaker_timestamp() @@ -59,7 +53,7 @@ def test_mnist_with_checkpoint_config( train_instance_type=instance_type, sagemaker_session=sagemaker_session, framework_version=tf_full_version, - py_version="py37", + py_version=tf_full_py_version, metric_definitions=[{"Name": "train:global_steps", "Regex": r"global_step\/sec:\s(.*)"}], checkpoint_s3_uri=checkpoint_s3_uri, checkpoint_local_path=checkpoint_local_path, @@ -89,7 +83,7 @@ def test_mnist_with_checkpoint_config( assert actual_training_checkpoint_config == expected_training_checkpoint_config -def test_server_side_encryption(sagemaker_session, tf_serving_version, py_version): +def test_server_side_encryption(sagemaker_session, tf_serving_version): with kms_utils.bucket_with_encryption(sagemaker_session, ROLE) as (bucket_with_kms, kms_key): output_path = os.path.join( bucket_with_kms, "test-server-side-encryption", time.strftime("%y%m%d-%H%M") @@ -103,7 +97,7 @@ def test_server_side_encryption(sagemaker_session, tf_serving_version, py_versio train_instance_type="ml.c5.xlarge", sagemaker_session=sagemaker_session, framework_version=tf_serving_version, - py_version=py_version, + py_version=PYTHON_VERSION, code_location=output_path, output_path=output_path, model_dir="/opt/ml/model", @@ -130,15 +124,15 @@ def test_server_side_encryption(sagemaker_session, tf_serving_version, py_versio @pytest.mark.canary_quick -def test_mnist_distributed(sagemaker_session, instance_type, tf_full_version, py_version): +def test_mnist_distributed(sagemaker_session, instance_type, tf_full_version, tf_full_py_version): estimator = TensorFlow( entry_point=SCRIPT, role=ROLE, train_instance_count=2, train_instance_type=instance_type, sagemaker_session=sagemaker_session, - py_version="py37", framework_version=tf_full_version, + py_version=tf_full_py_version, distributions=PARAMETER_SERVER_DISTRIBUTION, ) inputs = estimator.sagemaker_session.upload_data( @@ -154,13 +148,13 @@ def test_mnist_distributed(sagemaker_session, instance_type, tf_full_version, py ) -def test_mnist_async(sagemaker_session, cpu_instance_type, tf_full_version, py_version): +def test_mnist_async(sagemaker_session, cpu_instance_type): estimator = TensorFlow( entry_point=SCRIPT, role=ROLE, train_instance_count=1, train_instance_type="ml.c5.4xlarge", - py_version=tests.integ.PYTHON_VERSION, + py_version=PYTHON_VERSION, sagemaker_session=sagemaker_session, # testing py-sdk functionality, no need to run against all TF versions framework_version=LATEST_SERVING_VERSION, @@ -195,18 +189,16 @@ def test_mnist_async(sagemaker_session, cpu_instance_type, tf_full_version, py_v _assert_model_name_match(sagemaker_session.sagemaker_client, endpoint_name, model_name) -def test_deploy_with_input_handlers( - sagemaker_session, instance_type, tf_serving_version, py_version -): +def test_deploy_with_input_handlers(sagemaker_session, instance_type, tf_serving_version): estimator = TensorFlow( entry_point="training.py", source_dir=TFS_RESOURCE_PATH, role=ROLE, train_instance_count=1, train_instance_type=instance_type, - py_version=py_version, - sagemaker_session=sagemaker_session, framework_version=tf_serving_version, + py_version=PYTHON_VERSION, + sagemaker_session=sagemaker_session, tags=TAGS, ) diff --git a/tests/integ/test_tf_efs_fsx.py b/tests/integ/test_tf_efs_fsx.py index fb085cfe1f..d805930023 100644 --- a/tests/integ/test_tf_efs_fsx.py +++ b/tests/integ/test_tf_efs_fsx.py @@ -36,7 +36,7 @@ FSX_DIR_PATH = "/fsx/tensorflow" MAX_JOBS = 2 MAX_PARALLEL_JOBS = 2 -PY_VERSION = "py3" +PY_VERSION = "py37" @pytest.fixture(scope="module") @@ -139,8 +139,8 @@ def test_tuning_tf_efs(efs_fsx_setup, sagemaker_session, cpu_instance_type): train_instance_count=1, train_instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, - py_version=PY_VERSION, framework_version=TensorFlow.LATEST_VERSION, + py_version=PY_VERSION, subnets=subnets, security_group_ids=security_group_ids, ) @@ -186,8 +186,8 @@ def test_tuning_tf_lustre(efs_fsx_setup, sagemaker_session, cpu_instance_type): train_instance_count=1, train_instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, - py_version=PY_VERSION, framework_version=TensorFlow.LATEST_VERSION, + py_version=PY_VERSION, subnets=subnets, security_group_ids=security_group_ids, ) diff --git a/tests/integ/test_tuner.py b/tests/integ/test_tuner.py index 485e94d4ba..33a63c19bc 100644 --- a/tests/integ/test_tuner.py +++ b/tests/integ/test_tuner.py @@ -37,7 +37,6 @@ from sagemaker.predictor import json_deserializer from sagemaker.pytorch import PyTorch from sagemaker.tensorflow import TensorFlow -from sagemaker.tensorflow.defaults import LATEST_VERSION from sagemaker.tuner import ( IntegerParameter, ContinuousParameter, @@ -52,13 +51,6 @@ DATA_PATH = os.path.join(DATA_DIR, "iris", "data") -PY37_SUPPORTED_FRAMEWORK_VERSION = [TensorFlow._LATEST_1X_VERSION, LATEST_VERSION] - - -@pytest.fixture(scope="module") -def py_version(tf_full_version): - return "py37" if tf_full_version in PY37_SUPPORTED_FRAMEWORK_VERSION else PYTHON_VERSION - @pytest.fixture(scope="module") def kmeans_train_set(sagemaker_session): @@ -598,7 +590,9 @@ def test_tuning_mxnet(sagemaker_session, mxnet_full_version, cpu_instance_type): @pytest.mark.canary_quick -def test_tuning_tf_script_mode(sagemaker_session, cpu_instance_type, tf_full_version, py_version): +def test_tuning_tf_script_mode( + sagemaker_session, cpu_instance_type, tf_full_version, tf_full_py_version +): resource_path = os.path.join(DATA_DIR, "tensorflow_mnist") script_path = os.path.join(resource_path, "mnist.py") @@ -608,8 +602,8 @@ def test_tuning_tf_script_mode(sagemaker_session, cpu_instance_type, tf_full_ver train_instance_count=1, train_instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, - py_version=py_version, framework_version=tf_full_version, + py_version=tf_full_py_version, ) hyperparameter_ranges = {"epochs": IntegerParameter(1, 2)} @@ -639,60 +633,6 @@ def test_tuning_tf_script_mode(sagemaker_session, cpu_instance_type, tf_full_ver tuner.wait() -@pytest.mark.canary_quick -@pytest.mark.skipif(PYTHON_VERSION != "py2", reason="TensorFlow image supports only python 2.") -def test_tuning_tf(sagemaker_session, cpu_instance_type): - with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): - script_path = os.path.join(DATA_DIR, "iris", "iris-dnn-classifier.py") - - estimator = TensorFlow( - entry_point=script_path, - role="SageMakerRole", - training_steps=1, - evaluation_steps=1, - hyperparameters={"input_tensor_name": "inputs"}, - train_instance_count=1, - train_instance_type=cpu_instance_type, - sagemaker_session=sagemaker_session, - ) - - inputs = sagemaker_session.upload_data(path=DATA_PATH, key_prefix="integ-test-data/tf_iris") - hyperparameter_ranges = {"learning_rate": ContinuousParameter(0.05, 0.2)} - - objective_metric_name = "loss" - metric_definitions = [{"Name": "loss", "Regex": "loss = ([0-9\\.]+)"}] - - tuner = HyperparameterTuner( - estimator, - objective_metric_name, - hyperparameter_ranges, - metric_definitions, - objective_type="Minimize", - max_jobs=2, - max_parallel_jobs=2, - ) - - tuning_job_name = unique_name_from_base("tune-tf", max_length=32) - tuner.fit(inputs, job_name=tuning_job_name) - - print("Started hyperparameter tuning job with name:" + tuning_job_name) - - time.sleep(15) - tuner.wait() - - best_training_job = tuner.best_training_job() - with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): - predictor = tuner.deploy(1, cpu_instance_type) - - features = [6.4, 3.2, 4.5, 1.5] - dict_result = predictor.predict({"inputs": features}) - print("predict result: {}".format(dict_result)) - list_result = predictor.predict(features) - print("predict result: {}".format(list_result)) - - assert dict_result == list_result - - @pytest.mark.skipif(PYTHON_VERSION != "py2", reason="TensorFlow image supports only python 2.") def test_tuning_tf_vpc_multi(sagemaker_session, cpu_instance_type): """Test Tensorflow multi-instance using the same VpcConfig for training and inference""" @@ -718,6 +658,8 @@ def test_tuning_tf_vpc_multi(sagemaker_session, cpu_instance_type): subnets=subnet_ids, security_group_ids=[security_group_id], encrypt_inter_container_traffic=True, + framework_version="1.11", + py_version=PYTHON_VERSION, ) inputs = sagemaker_session.upload_data(path=DATA_PATH, key_prefix="integ-test-data/tf_iris") diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py index a94934a1bc..3c91d0b5a1 100644 --- a/tests/unit/__init__.py +++ b/tests/unit/__init__.py @@ -39,3 +39,5 @@ "cn-northwest-1", "us-gov-west-1", ] + +PY_VERSION = "py3" diff --git a/tests/unit/sagemaker/tensorflow/test_estimator.py b/tests/unit/sagemaker/tensorflow/test_estimator.py index 076c849a76..50005490fb 100644 --- a/tests/unit/sagemaker/tensorflow/test_estimator.py +++ b/tests/unit/sagemaker/tensorflow/test_estimator.py @@ -21,7 +21,7 @@ import pytest from sagemaker.estimator import _TrainingJob -from sagemaker.tensorflow import defaults, model, TensorFlow +from sagemaker.tensorflow import model, TensorFlow from tests.unit import DATA_DIR SCRIPT_FILE = "dummy_script.py" @@ -144,7 +144,8 @@ def _create_train_job(tf_version, horovod=False, ps=False, py_version="py2"): def _build_tf( sagemaker_session, - framework_version=defaults.TF_VERSION, + framework_version=None, + py_version=None, train_instance_type=None, base_job_name=None, **kwargs @@ -152,6 +153,7 @@ def _build_tf( return TensorFlow( entry_point=SCRIPT_PATH, framework_version=framework_version, + py_version=py_version, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, @@ -161,7 +163,7 @@ def _build_tf( ) -def test_create_model(sagemaker_session, tf_version): +def test_create_model(sagemaker_session, tf_version, tf_py_version): if version.Version(tf_version) < version.Version("1.11"): pytest.skip( "Legacy TF version requires explicit image URI, and " @@ -172,11 +174,12 @@ def test_create_model(sagemaker_session, tf_version): source_dir = "s3://mybucket/source" tf = TensorFlow( entry_point=SCRIPT_PATH, + framework_version=tf_version, + py_version=tf_py_version, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, - framework_version=tf_version, container_log_level=container_log_level, base_job_name="job", source_dir=source_dir, @@ -198,12 +201,20 @@ def test_create_model(sagemaker_session, tf_version): assert model.enable_network_isolation() -def test_create_model_with_optional_params(sagemaker_session): +def test_create_model_with_optional_params(sagemaker_session, tf_version, tf_py_version): + if version.Version(tf_version) < version.Version("1.11"): + pytest.skip( + "Legacy TF version requires explicit image URI, and " + "this logic is tested in test_create_model_with_custom_image." + ) + container_log_level = '"logging.INFO"' source_dir = "s3://mybucket/source" enable_cloudwatch_metrics = "true" tf = TensorFlow( entry_point=SCRIPT_PATH, + framework_version=tf_version, + py_version=tf_py_version, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, @@ -259,12 +270,22 @@ def test_create_model_with_custom_image(sagemaker_session): @patch("sagemaker.tensorflow.estimator.TensorFlow.create_model") -def test_transformer_creation_with_optional_args(create_model, sagemaker_session): +def test_transformer_creation_with_optional_args( + create_model, sagemaker_session, tf_version, tf_py_version +): + if version.Version(tf_version) < version.Version("1.11"): + pytest.skip( + "Legacy TF version requires explicit image URI, and " + "this logic is tested in test_create_model_with_custom_image." + ) + model = Mock() create_model.return_value = model tf = TensorFlow( entry_point=SCRIPT_PATH, + framework_version=tf_version, + py_version=tf_py_version, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, @@ -329,12 +350,22 @@ def test_transformer_creation_with_optional_args(create_model, sagemaker_session @patch("sagemaker.tensorflow.estimator.TensorFlow.create_model") -def test_transformer_creation_without_optional_args(create_model, sagemaker_session): +def test_transformer_creation_without_optional_args( + create_model, sagemaker_session, tf_version, tf_py_version +): + if version.Version(tf_version) < version.Version("1.11"): + pytest.skip( + "Legacy TF version requires explicit image URI, and " + "this logic is tested in test_create_model_with_custom_image." + ) + model = Mock() create_model.return_value = model tf = TensorFlow( entry_point=SCRIPT_PATH, + framework_version=tf_version, + py_version=tf_py_version, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, @@ -368,7 +399,10 @@ def test_transformer_creation_without_optional_args(create_model, sagemaker_sess def test_script_mode_create_model(sagemaker_session): tf = _build_tf( - sagemaker_session=sagemaker_session, py_version="py3", enable_network_isolation=True + sagemaker_session=sagemaker_session, + framework_version="1.11", + py_version="py2", + enable_network_isolation=True, ) tf._prepare_for_training() # set output_path and job name as if training happened @@ -391,12 +425,12 @@ def test_script_mode_create_model(sagemaker_session): def test_fit(time, strftime, sagemaker_session): tf = TensorFlow( entry_point=SCRIPT_FILE, + framework_version="1.11", + py_version="py2", role=ROLE, sagemaker_session=sagemaker_session, - py_version="py3", train_instance_type=INSTANCE_TYPE, train_instance_count=1, - framework_version="1.11", source_dir=DATA_DIR, ) @@ -406,7 +440,7 @@ def test_fit(time, strftime, sagemaker_session): call_names = [c[0] for c in sagemaker_session.method_calls] assert call_names == ["train", "logs_for_job"] - expected_train_args = _create_train_job("1.11", py_version="py3") + expected_train_args = _create_train_job("1.11", py_version="py2") expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs actual_train_args = sagemaker_session.method_calls[0][2] @@ -419,12 +453,12 @@ def test_fit(time, strftime, sagemaker_session): def test_fit_ps(time, strftime, sagemaker_session): tf = TensorFlow( entry_point=SCRIPT_FILE, + framework_version="1.11", + py_version="py2", role=ROLE, sagemaker_session=sagemaker_session, - py_version="py3", train_instance_type=INSTANCE_TYPE, train_instance_count=1, - framework_version="1.11", source_dir=DATA_DIR, distributions=DISTRIBUTION_ENABLED, ) @@ -435,7 +469,7 @@ def test_fit_ps(time, strftime, sagemaker_session): call_names = [c[0] for c in sagemaker_session.method_calls] assert call_names == ["train", "logs_for_job"] - expected_train_args = _create_train_job("1.11", ps=True, py_version="py3") + expected_train_args = _create_train_job("1.11", ps=True, py_version="py2") expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs expected_train_args["hyperparameters"][TensorFlow.LAUNCH_PS_ENV_NAME] = json.dumps(True) @@ -449,12 +483,12 @@ def test_fit_ps(time, strftime, sagemaker_session): def test_fit_mpi(time, strftime, sagemaker_session): tf = TensorFlow( entry_point=SCRIPT_FILE, + framework_version="1.11", + py_version="py2", role=ROLE, sagemaker_session=sagemaker_session, - py_version="py3", train_instance_type=INSTANCE_TYPE, train_instance_count=1, - framework_version="1.11", source_dir=DATA_DIR, distributions=DISTRIBUTION_MPI_ENABLED, ) @@ -465,7 +499,7 @@ def test_fit_mpi(time, strftime, sagemaker_session): call_names = [c[0] for c in sagemaker_session.method_calls] assert call_names == ["train", "logs_for_job"] - expected_train_args = _create_train_job("1.11", horovod=True, py_version="py3") + expected_train_args = _create_train_job("1.11", horovod=True, py_version="py2") expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs expected_train_args["hyperparameters"][TensorFlow.LAUNCH_MPI_ENV_NAME] = json.dumps(True) expected_train_args["hyperparameters"][TensorFlow.MPI_NUM_PROCESSES_PER_HOST] = json.dumps(2) @@ -477,18 +511,20 @@ def test_fit_mpi(time, strftime, sagemaker_session): assert actual_train_args == expected_train_args -def test_hyperparameters_no_model_dir(sagemaker_session): - tf = _build_tf(sagemaker_session, model_dir=False) +def test_hyperparameters_no_model_dir(sagemaker_session, tf_version, tf_py_version): + if version.Version(tf_version) < version.Version("1.11"): + pytest.skip( + "Legacy TF version requires explicit image URI, and " + "this logic is tested in test_create_model_with_custom_image." + ) + + tf = _build_tf( + sagemaker_session, framework_version=tf_version, py_version=tf_py_version, model_dir=False + ) hyperparameters = tf.hyperparameters() assert "model_dir" not in hyperparameters -def test_train_image_default(sagemaker_session): - tf = _build_tf(sagemaker_session) - expected_image = _image_uri(defaults.TF_VERSION, "py2") - assert expected_image == tf.train_image() - - def test_train_image_custom_image(sagemaker_session): custom_image = "tensorflow:latest" tf = _build_tf(sagemaker_session, image_name=custom_image) diff --git a/tests/unit/sagemaker/tensorflow/test_estimator_init.py b/tests/unit/sagemaker/tensorflow/test_estimator_init.py index 374e9e2fe4..86b6df3065 100644 --- a/tests/unit/sagemaker/tensorflow/test_estimator_init.py +++ b/tests/unit/sagemaker/tensorflow/test_estimator_init.py @@ -16,7 +16,7 @@ from packaging import version import pytest -from sagemaker.tensorflow import defaults, TensorFlow +from sagemaker.tensorflow import TensorFlow REGION = "us-west-2" @@ -37,17 +37,9 @@ def _build_tf(sagemaker_session, **kwargs): ) -@patch("sagemaker.fw_utils.empty_framework_version_warning") -def test_empty_framework_version(warning, sagemaker_session): - estimator = _build_tf(sagemaker_session, framework_version=None) - - assert estimator.framework_version == defaults.TF_VERSION - warning.assert_called_with(defaults.TF_VERSION, estimator.LATEST_VERSION) - - @patch("sagemaker.fw_utils.python_deprecation_warning") def test_estimator_py2_deprecation_warning(warning, sagemaker_session): - estimator = _build_tf(sagemaker_session, py_version="py2") + estimator = _build_tf(sagemaker_session, framework_version="2.1.0", py_version="py2") assert estimator.py_version == "py2" warning.assert_called_with("tensorflow", "2.1.0") @@ -71,48 +63,62 @@ def test_py2_version_is_not_deprecated(sagemaker_session): assert estimator.py_version == "py2" -def test_py2_is_default_version_before_tf1_14(sagemaker_session): - estimator = _build_tf(sagemaker_session, framework_version="1.13") - assert estimator.py_version == "py2" - - def test_framework_name(sagemaker_session): - tf = _build_tf(sagemaker_session, framework_version="1.15.2") + tf = _build_tf(sagemaker_session, framework_version="1.15.2", py_version="py3") assert tf.__framework_name__ == "tensorflow" def test_enable_sm_metrics(sagemaker_session): - tf = _build_tf(sagemaker_session, enable_sagemaker_metrics=True) + tf = _build_tf( + sagemaker_session, + framework_version="1.15.2", + py_version="py3", + enable_sagemaker_metrics=True, + ) assert tf.enable_sagemaker_metrics def test_disable_sm_metrics(sagemaker_session): - tf = _build_tf(sagemaker_session, enable_sagemaker_metrics=False) + tf = _build_tf( + sagemaker_session, + framework_version="1.15.2", + py_version="py3", + enable_sagemaker_metrics=False, + ) assert not tf.enable_sagemaker_metrics -def test_disable_sm_metrics_if_fw_ver_is_less_than_1_15(sagemaker_session, tf_version): +def test_disable_sm_metrics_if_fw_ver_is_less_than_1_15( + sagemaker_session, tf_version, tf_py_version +): if version.Version(tf_version) > version.Version("1.14"): pytest.skip("This test is for TF 1.14 and lower.") - tf = _build_tf(sagemaker_session, framework_version=tf_version, image_name="old-image") + tf = _build_tf( + sagemaker_session, + framework_version=tf_version, + py_version=tf_py_version, + image_name="old-image", + ) assert tf.enable_sagemaker_metrics is None -def test_enable_sm_metrics_if_fw_ver_is_at_least_1_15(sagemaker_session, tf_version): +def test_enable_sm_metrics_if_fw_ver_is_at_least_1_15(sagemaker_session, tf_version, tf_py_version): if version.Version(tf_version) < version.Version("1.15"): pytest.skip("This test is for TF 1.15 and higher.") - tf = _build_tf(sagemaker_session, framework_version=tf_version) + tf = _build_tf(sagemaker_session, framework_version=tf_version, py_version=tf_py_version) assert tf.enable_sagemaker_metrics -def test_require_image_name_if_fw_ver_is_less_than_1_11(sagemaker_session, tf_version): +def test_require_image_name_if_fw_ver_is_less_than_1_11( + sagemaker_session, tf_version, tf_py_version +): if version.Version(tf_version) > version.Version("1.10"): pytest.skip("This test is for TF 1.10 and lower.") with pytest.raises(ValueError) as e: - _build_tf(sagemaker_session, framework_version=tf_version) + _build_tf(sagemaker_session, framework_version=tf_version, py_version=tf_py_version) expected_msg = ( "TF {version} supports only legacy mode. Please supply the image URI directly with "