diff --git a/src/sagemaker/rl/estimator.py b/src/sagemaker/rl/estimator.py index f0af0424ed..8b319b4b8c 100644 --- a/src/sagemaker/rl/estimator.py +++ b/src/sagemaker/rl/estimator.py @@ -36,6 +36,7 @@ "0.11.0": {"tensorflow": "1.11", "mxnet": "1.3"}, "0.11.1": {"tensorflow": "1.12"}, "0.11": {"tensorflow": "1.12", "mxnet": "1.3"}, + "1.0.0": {"tensorflow": "1.12"}, }, "ray": { "0.5.3": {"tensorflow": "1.11"}, @@ -68,7 +69,7 @@ class RLEstimator(Framework): COACH_LATEST_VERSION_TF = "0.11.1" COACH_LATEST_VERSION_MXNET = "0.11.0" - RAY_LATEST_VERSION = "0.6.5" + RAY_LATEST_VERSION = "0.8.5" def __init__( self, diff --git a/tests/conftest.py b/tests/conftest.py index f29ee7feee..fac9be9cb1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,7 +22,6 @@ from sagemaker import Session, image_uris, utils from sagemaker.local import LocalSession -from sagemaker.rl import RLEstimator import tests.integ DEFAULT_REGION = "us-west-2" @@ -41,6 +40,8 @@ FRAMEWORKS_FOR_GENERATED_VERSION_FIXTURES = ( "chainer", + "coach_mxnet", + "coach_tensorflow", "inferentia_mxnet", "inferentia_tensorflow", "mxnet", @@ -48,8 +49,11 @@ "neo_pytorch", "neo_tensorflow", "pytorch", + "ray_pytorch", + "ray_tensorflow", "sklearn", "tensorflow", + "vw", "xgboost", ) @@ -181,46 +185,6 @@ def _tf_py_version(tf_version, request): return "py37" -@pytest.fixture(scope="module", params=["0.10.1", "0.10.1", "0.11", "0.11.0", "0.11.1"]) -def rl_coach_tf_version(request): - return request.param - - -@pytest.fixture(scope="module", params=["0.11", "0.11.0"]) -def rl_coach_mxnet_version(request): - return request.param - - -@pytest.fixture(scope="module", params=["0.5", "0.5.3", "0.6", "0.6.5", "0.8.2", "0.8.5"]) -def rl_ray_tf_version(request): - return request.param - - -@pytest.fixture(scope="module", params=["0.8.5"]) -def rl_ray_pytorch_version(request): - return request.param - - -@pytest.fixture(scope="module", params=["8.7.0"]) -def rl_vw_version(request): - return request.param - - -@pytest.fixture(scope="module") -def rl_coach_mxnet_full_version(): - return RLEstimator.COACH_LATEST_VERSION_MXNET - - -@pytest.fixture(scope="module") -def rl_coach_tf_full_version(): - return RLEstimator.COACH_LATEST_VERSION_TF - - -@pytest.fixture(scope="module") -def rl_ray_full_version(): - return RLEstimator.RAY_LATEST_VERSION - - @pytest.fixture(scope="module") def tf_full_version(tensorflow_training_latest_version, tensorflow_inference_latest_version): """Fixture for TF tests that test both training and inference. diff --git a/tests/data/ray_cartpole/train_ray.py b/tests/data/ray_cartpole/train_ray.py index aea02f621c..d19d625b65 100644 --- a/tests/data/ray_cartpole/train_ray.py +++ b/tests/data/ray_cartpole/train_ray.py @@ -5,12 +5,12 @@ from ray.tune.logger import pretty_print # Based on https://github.com/ray-project/ray/blob/master/doc/source/rllib-training.rst#python-api -ray.init(log_to_driver=False) +ray.init(log_to_driver=False, webui_host="127.0.0.1") config = ppo.DEFAULT_CONFIG.copy() config["num_gpus"] = int(os.environ.get("SM_NUM_GPUS", 0)) checkpoint_dir = os.environ.get("SM_MODEL_DIR", "/Users/nadzeya/gym") config["num_workers"] = 1 -agent = ppo.PPOAgent(config=config, env="CartPole-v0") +agent = ppo.PPOTrainer(config=config, env="CartPole-v0") # Can optionally call agent.restore(path) to load a checkpoint. diff --git a/tests/integ/test_rl.py b/tests/integ/test_rl.py index 8104f7a631..ab8cc3048c 100644 --- a/tests/integ/test_rl.py +++ b/tests/integ/test_rl.py @@ -24,9 +24,9 @@ @pytest.mark.canary_quick -def test_coach_mxnet(sagemaker_session, rl_coach_mxnet_full_version, cpu_instance_type): +def test_coach_mxnet(sagemaker_session, coach_mxnet_latest_version, cpu_instance_type): estimator = _test_coach( - sagemaker_session, RLFramework.MXNET, rl_coach_mxnet_full_version, cpu_instance_type + sagemaker_session, RLFramework.MXNET, coach_mxnet_latest_version, cpu_instance_type ) job_name = unique_name_from_base("test-coach-mxnet") @@ -51,9 +51,12 @@ def test_coach_mxnet(sagemaker_session, rl_coach_mxnet_full_version, cpu_instanc assert 0 < action[0][1] < 1 -def test_coach_tf(sagemaker_session, rl_coach_tf_full_version, cpu_instance_type): +def test_coach_tf(sagemaker_session, coach_tensorflow_latest_version, cpu_instance_type): estimator = _test_coach( - sagemaker_session, RLFramework.TENSORFLOW, rl_coach_tf_full_version, cpu_instance_type + sagemaker_session, + RLFramework.TENSORFLOW, + coach_tensorflow_latest_version, + cpu_instance_type, ) job_name = unique_name_from_base("test-coach-tf") @@ -96,7 +99,7 @@ def _test_coach(sagemaker_session, rl_framework, rl_coach_version, cpu_instance_ @pytest.mark.canary_quick -def test_ray_tf(sagemaker_session, rl_ray_full_version, cpu_instance_type): +def test_ray_tf(sagemaker_session, ray_tensorflow_latest_version, cpu_instance_type): source_dir = os.path.join(DATA_DIR, "ray_cartpole") cartpole = "train_ray.py" @@ -105,7 +108,7 @@ def test_ray_tf(sagemaker_session, rl_ray_full_version, cpu_instance_type): source_dir=source_dir, toolkit=RLToolkit.RAY, framework=RLFramework.TENSORFLOW, - toolkit_version=rl_ray_full_version, + toolkit_version=ray_tensorflow_latest_version, sagemaker_session=sagemaker_session, role="SageMakerRole", instance_type=cpu_instance_type, diff --git a/tests/unit/sagemaker/image_uris/test_rl.py b/tests/unit/sagemaker/image_uris/test_rl.py index b32c9820ec..daa8ab2413 100644 --- a/tests/unit/sagemaker/image_uris/test_rl.py +++ b/tests/unit/sagemaker/image_uris/test_rl.py @@ -31,12 +31,15 @@ def _version_for_tag(toolkit, toolkit_version, framework, framework_in_tag=False return "{}{}".format(toolkit, toolkit_version) -def test_coach_tf(rl_coach_tf_version): +def test_coach_tf(coach_tensorflow_version): for instance_type, processor in INSTANCE_TYPES_AND_PROCESSORS: uri = image_uris.retrieve( - "coach-tensorflow", REGION, version=rl_coach_tf_version, instance_type=instance_type + "coach-tensorflow", + REGION, + version=coach_tensorflow_version, + instance_type=instance_type, ) - assert _expected_coach_tf_uri(rl_coach_tf_version, processor) == uri + assert _expected_coach_tf_uri(coach_tensorflow_version, processor) == uri def _expected_coach_tf_uri(coach_tf_version, processor): @@ -58,15 +61,15 @@ def _expected_coach_tf_uri(coach_tf_version, processor): ) -def test_coach_mxnet(rl_coach_mxnet_version): +def test_coach_mxnet(coach_mxnet_version): for instance_type, processor in INSTANCE_TYPES_AND_PROCESSORS: uri = image_uris.retrieve( - "coach-mxnet", REGION, version=rl_coach_mxnet_version, instance_type=instance_type + "coach-mxnet", REGION, version=coach_mxnet_version, instance_type=instance_type ) expected = expected_uris.framework_uri( "sagemaker-rl-mxnet", - "coach{}".format(rl_coach_mxnet_version), + "coach{}".format(coach_mxnet_version), SAGEMAKER_ACCOUNT, py_version="py3", processor=processor, @@ -74,12 +77,12 @@ def test_coach_mxnet(rl_coach_mxnet_version): assert expected == uri -def test_ray_tf(rl_ray_tf_version): +def test_ray_tf(ray_tensorflow_version): for instance_type, processor in INSTANCE_TYPES_AND_PROCESSORS: uri = image_uris.retrieve( - "ray-tensorflow", REGION, version=rl_ray_tf_version, instance_type=instance_type + "ray-tensorflow", REGION, version=ray_tensorflow_version, instance_type=instance_type ) - assert _expected_ray_tf_uri(rl_ray_tf_version, processor) == uri + assert _expected_ray_tf_uri(ray_tensorflow_version, processor) == uri def _expected_ray_tf_uri(ray_tf_version, processor): @@ -101,15 +104,15 @@ def _expected_ray_tf_uri(ray_tf_version, processor): ) -def test_ray_pytorch(rl_ray_pytorch_version): +def test_ray_pytorch(ray_pytorch_version): for instance_type, processor in INSTANCE_TYPES_AND_PROCESSORS: uri = image_uris.retrieve( - "ray-pytorch", REGION, version=rl_ray_pytorch_version, instance_type=instance_type + "ray-pytorch", REGION, version=ray_pytorch_version, instance_type=instance_type ) expected = expected_uris.framework_uri( "sagemaker-rl-ray-container", - "ray-{}-torch".format(rl_ray_pytorch_version), + "ray-{}-torch".format(ray_pytorch_version), RL_ACCOUNT, py_version="py36", processor=processor, @@ -118,8 +121,8 @@ def test_ray_pytorch(rl_ray_pytorch_version): assert expected == uri -def test_vw(rl_vw_version): - version = "vw-{}".format(rl_vw_version) +def test_vw(vw_version): + version = "vw-{}".format(vw_version) uri = image_uris.retrieve("vw", REGION, version=version, instance_type="ml.c4.xlarge") expected = expected_uris.framework_uri("sagemaker-rl-vw-container", version, RL_ACCOUNT) diff --git a/tests/unit/test_rl.py b/tests/unit/test_rl.py index f3743ddfea..73e9e4357b 100644 --- a/tests/unit/test_rl.py +++ b/tests/unit/test_rl.py @@ -155,7 +155,7 @@ def _create_train_job(toolkit, toolkit_version, framework): @patch("sagemaker.estimator.name_from_base") -def test_create_tf_model(name_from_base, sagemaker_session, rl_coach_tf_version): +def test_create_tf_model(name_from_base, sagemaker_session, coach_tensorflow_version): container_log_level = '"logging.INFO"' source_dir = "s3://mybucket/source" rl = RLEstimator( @@ -165,7 +165,7 @@ def test_create_tf_model(name_from_base, sagemaker_session, rl_coach_tf_version) instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, toolkit=RLToolkit.COACH, - toolkit_version=rl_coach_tf_version, + toolkit_version=coach_tensorflow_version, framework=RLFramework.TENSORFLOW, container_log_level=container_log_level, source_dir=source_dir, @@ -178,7 +178,7 @@ def test_create_tf_model(name_from_base, sagemaker_session, rl_coach_tf_version) model = rl.create_model() supported_versions = TOOLKIT_FRAMEWORK_VERSION_MAP[RLToolkit.COACH.value] - framework_version = supported_versions[rl_coach_tf_version][RLFramework.TENSORFLOW.value] + framework_version = supported_versions[coach_tensorflow_version][RLFramework.TENSORFLOW.value] assert isinstance(model, TensorFlowModel) assert model.sagemaker_session == sagemaker_session @@ -188,11 +188,12 @@ def test_create_tf_model(name_from_base, sagemaker_session, rl_coach_tf_version) assert model._container_log_level == container_log_level assert model.vpc_config is None - name_from_base.assert_called_with("sagemaker-rl-tensorflow") + call_args = name_from_base.call_args_list[0][0] + assert call_args[0] in ("sagemaker-rl-tensorflow", "sagemaker-rl-coach-container") @patch("sagemaker.estimator.name_from_base") -def test_create_mxnet_model(name_from_base, sagemaker_session, rl_coach_mxnet_version): +def test_create_mxnet_model(name_from_base, sagemaker_session, coach_mxnet_version): container_log_level = '"logging.INFO"' source_dir = "s3://mybucket/source" rl = RLEstimator( @@ -202,7 +203,7 @@ def test_create_mxnet_model(name_from_base, sagemaker_session, rl_coach_mxnet_ve instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, toolkit=RLToolkit.COACH, - toolkit_version=rl_coach_mxnet_version, + toolkit_version=coach_mxnet_version, framework=RLFramework.MXNET, container_log_level=container_log_level, source_dir=source_dir, @@ -215,7 +216,7 @@ def test_create_mxnet_model(name_from_base, sagemaker_session, rl_coach_mxnet_ve model = rl.create_model() supported_versions = TOOLKIT_FRAMEWORK_VERSION_MAP[RLToolkit.COACH.value] - framework_version = supported_versions[rl_coach_mxnet_version][RLFramework.MXNET.value] + framework_version = supported_versions[coach_mxnet_version][RLFramework.MXNET.value] assert isinstance(model, MXNetModel) assert model.sagemaker_session == sagemaker_session @@ -231,7 +232,7 @@ def test_create_mxnet_model(name_from_base, sagemaker_session, rl_coach_mxnet_ve name_from_base.assert_called_with("sagemaker-rl-mxnet") -def test_create_model_with_optional_params(sagemaker_session, rl_coach_mxnet_version): +def test_create_model_with_optional_params(sagemaker_session, coach_mxnet_version): container_log_level = '"logging.INFO"' source_dir = "s3://mybucket/source" rl = RLEstimator( @@ -241,7 +242,7 @@ def test_create_model_with_optional_params(sagemaker_session, rl_coach_mxnet_ver instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, toolkit=RLToolkit.COACH, - toolkit_version=rl_coach_mxnet_version, + toolkit_version=coach_mxnet_version, framework=RLFramework.MXNET, container_log_level=container_log_level, source_dir=source_dir, @@ -300,7 +301,7 @@ def test_create_model_with_custom_image(name_from_base, sagemaker_session): @patch("sagemaker.utils.create_tar_file", MagicMock()) @patch("time.strftime", return_value=TIMESTAMP) -def test_rl(strftime, sagemaker_session, rl_coach_mxnet_version): +def test_rl(strftime, sagemaker_session, coach_mxnet_version): rl = RLEstimator( entry_point=SCRIPT_PATH, role=ROLE, @@ -308,7 +309,7 @@ def test_rl(strftime, sagemaker_session, rl_coach_mxnet_version): instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, toolkit=RLToolkit.COACH, - toolkit_version=rl_coach_mxnet_version, + toolkit_version=coach_mxnet_version, framework=RLFramework.MXNET, ) @@ -322,7 +323,7 @@ def test_rl(strftime, sagemaker_session, rl_coach_mxnet_version): assert boto_call_names == ["resource"] expected_train_args = _create_train_job( - RLToolkit.COACH.value, rl_coach_mxnet_version, RLFramework.MXNET.value + RLToolkit.COACH.value, coach_mxnet_version, RLFramework.MXNET.value ) expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs expected_train_args["experiment_config"] = EXPERIMENT_CONFIG @@ -332,7 +333,7 @@ def test_rl(strftime, sagemaker_session, rl_coach_mxnet_version): model = rl.create_model() supported_versions = TOOLKIT_FRAMEWORK_VERSION_MAP[RLToolkit.COACH.value] - framework_version = supported_versions[rl_coach_mxnet_version][RLFramework.MXNET.value] + framework_version = supported_versions[coach_mxnet_version][RLFramework.MXNET.value] expected_image_base = "520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet:{}-gpu-py3" submit_dir = "s3://notmybucket/sagemaker-rl-mxnet-{}/source/sourcedir.tar.gz".format(TIMESTAMP) @@ -351,11 +352,11 @@ def test_rl(strftime, sagemaker_session, rl_coach_mxnet_version): @patch("sagemaker.utils.create_tar_file", MagicMock()) -def test_deploy_mxnet(sagemaker_session, rl_coach_mxnet_version): +def test_deploy_mxnet(sagemaker_session, coach_mxnet_version): rl = _rl_estimator( sagemaker_session, RLToolkit.COACH, - rl_coach_mxnet_version, + coach_mxnet_version, RLFramework.MXNET, instance_type="ml.g2.2xlarge", ) @@ -365,11 +366,11 @@ def test_deploy_mxnet(sagemaker_session, rl_coach_mxnet_version): @patch("sagemaker.utils.create_tar_file", MagicMock()) -def test_deploy_tfs(sagemaker_session, rl_coach_tf_version): +def test_deploy_tfs(sagemaker_session, coach_tensorflow_version): rl = _rl_estimator( sagemaker_session, RLToolkit.COACH, - rl_coach_tf_version, + coach_tensorflow_version, RLFramework.TENSORFLOW, instance_type="ml.g2.2xlarge", ) @@ -379,11 +380,11 @@ def test_deploy_tfs(sagemaker_session, rl_coach_tf_version): @patch("sagemaker.utils.create_tar_file", MagicMock()) -def test_deploy_ray(sagemaker_session, rl_ray_tf_version): +def test_deploy_ray(sagemaker_session, ray_tensorflow_version): rl = _rl_estimator( sagemaker_session, RLToolkit.RAY, - rl_ray_tf_version, + ray_tensorflow_version, RLFramework.TENSORFLOW, instance_type="ml.g2.2xlarge", ) @@ -394,31 +395,38 @@ def test_deploy_ray(sagemaker_session, rl_ray_tf_version): @patch("sagemaker.image_uris.retrieve") -def test_train_image(retrieve_image_uri, sagemaker_session, rl_ray_tf_version): +def test_train_image(retrieve_image_uri, sagemaker_session, ray_tensorflow_version): toolkit = RLToolkit.RAY framework = RLFramework.TENSORFLOW image = "custom-image:latest" rl = _rl_estimator( - sagemaker_session, toolkit, rl_ray_tf_version, framework, instance_type=CPU, image_uri=image + sagemaker_session, + toolkit, + ray_tensorflow_version, + framework, + instance_type=CPU, + image_uri=image, ) assert image == rl.train_image() retrieve_image_uri.assert_not_called() - rl = _rl_estimator(sagemaker_session, toolkit, rl_ray_tf_version, framework, instance_type=CPU) + rl = _rl_estimator( + sagemaker_session, toolkit, ray_tensorflow_version, framework, instance_type=CPU + ) assert retrieve_image_uri.return_value == rl.train_image() retrieve_image_uri.assert_called_with( - "ray-tensorflow", REGION, version=rl_ray_tf_version, instance_type=CPU + "ray-tensorflow", REGION, version=ray_tensorflow_version, instance_type=CPU ) -def test_attach(sagemaker_session, rl_coach_mxnet_version): +def test_attach(sagemaker_session, coach_mxnet_version): training_image = "1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-rl-{}:{}{}-cpu-py3".format( - RLFramework.MXNET.value, RLToolkit.COACH.value, rl_coach_mxnet_version + RLFramework.MXNET.value, RLToolkit.COACH.value, coach_mxnet_version ) supported_versions = TOOLKIT_FRAMEWORK_VERSION_MAP[RLToolkit.COACH.value] - framework_version = supported_versions[rl_coach_mxnet_version][RLFramework.MXNET.value] + framework_version = supported_versions[coach_mxnet_version][RLFramework.MXNET.value] returned_job_description = { "AlgorithmSpecification": {"TrainingInputMode": "File", "TrainingImage": training_image}, "HyperParameters": { @@ -451,7 +459,7 @@ def test_attach(sagemaker_session, rl_coach_mxnet_version): assert estimator.framework == RLFramework.MXNET.value assert estimator.toolkit == RLToolkit.COACH.value assert estimator.framework_version == framework_version - assert estimator.toolkit_version == rl_coach_mxnet_version + assert estimator.toolkit_version == coach_mxnet_version assert estimator.role == "arn:aws:iam::366:role/SageMakerRole" assert estimator.instance_count == 1 assert estimator.max_run == 24 * 60 * 60