Skip to content

Commit 3cfd5c1

Browse files
goutamvenkat-anyscalealexeykudinkin
authored andcommitted
[Data] - Update Pyarrow version to 23.0 for release tests + Update moto to 5.x.x (ray-project#59489)
## Description PyArrow 22 uses a newer AWS SDK that sends S3 requests with HTTP chunked transfer encoding and trailer checksums (x-amz-checksum-crc64nvme). Our old moto version (4.2.12) doesn't properly parse this protocol, causing raw HTTP wire format to leak into test responses: ``` Expected: b'spam' Got: b'4\r\nspam\r\n0\r\nx-amz-checksum-crc64nvme:...\r\n\r\n' ``` Related issue from moto: getmoto/moto#7198 ## Related issues > Link related issues: "Fixes ray-project#1234", "Closes ray-project#1234", or "Related to ray-project#1234". ## Additional information > Optional: Add implementation details, API changes, usage examples, screenshots, etc. --------- Signed-off-by: Alexey Kudinkin <ak@anyscale.com> Signed-off-by: Goutam <goutam@anyscale.com> Co-authored-by: Alexey Kudinkin <ak@anyscale.com> Signed-off-by: Adel Nour <ans9868@nyu.edu>
1 parent 287661f commit 3cfd5c1

File tree

8 files changed

+45
-61
lines changed

8 files changed

+45
-61
lines changed

.buildkite/data.rayci.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ steps:
9999
--only-tags data_non_parallel
100100
depends_on: data9build-multipy
101101

102-
- label: ":database: data: arrow v21 tests"
102+
- label: ":database: data: arrow v23 tests"
103103
tags:
104104
- python
105105
- data
@@ -113,7 +113,7 @@ steps:
113113
--except-tags data_integration,doctest,data_non_parallel,dask,needs_credentials,tensorflow_datasets
114114
depends_on: datalbuild-multipy
115115

116-
- label: ":database: data: arrow v21 tests (data_non_parallel)"
116+
- label: ":database: data: arrow v23 tests (data_non_parallel)"
117117
tags:
118118
- python
119119
- data
@@ -128,7 +128,7 @@ steps:
128128
--only-tags data_non_parallel
129129
depends_on: datalbuild-multipy
130130

131-
- label: ":database: data: arrow v21 py{{matrix.python}} tests ({{matrix.worker_id}})"
131+
- label: ":database: data: arrow v23 py{{matrix.python}} tests ({{matrix.worker_id}})"
132132
key: datal_python_tests
133133
if: build.pull_request.labels includes "continuous-build" || pipeline.id == "0189e759-8c96-4302-b6b5-b4274406bf89" || pipeline.id == "018f4f1e-1b73-4906-9802-92422e3badaa"
134134
tags:
@@ -145,7 +145,7 @@ steps:
145145
python: ["3.12"]
146146
worker_id: ["0", "1"]
147147

148-
- label: ":database: data: arrow v21 py{{matrix.python}} tests (data_non_parallel)"
148+
- label: ":database: data: arrow v23 py{{matrix.python}} tests (data_non_parallel)"
149149
key: datal_python_non_parallel_tests
150150
if: build.pull_request.labels includes "continuous-build" || pipeline.id == "0189e759-8c96-4302-b6b5-b4274406bf89" || pipeline.id == "018f4f1e-1b73-4906-9802-92422e3badaa"
151151
tags:

ci/docker/datal.build.wanda.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,6 @@ srcs:
1111
- python/requirements/ml/data-test-requirements.txt
1212
build_args:
1313
- DOCKER_IMAGE_BASE_BUILD=cr.ray.io/rayproject/oss-ci-base_ml-py$PYTHON
14-
- ARROW_VERSION=21.*
14+
- ARROW_VERSION=23.*
1515
tags:
1616
- cr.ray.io/rayproject/databuild-py$PYTHON

python/ray/data/tests/mock_server.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,9 @@ def start_service(service_name, host, port):
6060
# Always use port conflict resolution to be safe
6161
port = _find_available_port(host, port)
6262

63-
args = [moto_svr_path, service_name, "-H", host, "-p", str(port)]
63+
# moto 5.x no longer accepts a service name argument - all services
64+
# are served on a single endpoint
65+
args = [moto_svr_path, "-H", host, "-p", str(port)]
6466
# For debugging
6567
# args = '{0} {1} -H {2} -p {3} 2>&1 | \
6668
# tee -a /tmp/moto.log'.format(moto_svr_path, service_name, host, port)

python/ray/tests/mock_s3_server.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818

1919
def start_service(service_name, host, port):
2020
moto_svr_path = shutil.which("moto_server")
21-
args = [moto_svr_path, service_name, "-H", host, "-p", str(port)]
21+
# moto 5.x no longer accepts a service name argument - all services
22+
# are served on a single endpoint
23+
args = [moto_svr_path, "-H", host, "-p", str(port)]
2224
process = sp.Popen(
2325
args, stdin=sp.PIPE, stdout=sp.DEVNULL, stderr=sp.DEVNULL
2426
) # shell=True

python/ray/tests/test_cli.py

Lines changed: 20 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,10 @@
3535
from unittest import mock
3636
from unittest.mock import MagicMock, patch
3737

38-
import moto
3938
import pytest
4039
import yaml
4140
from click.testing import CliRunner
42-
from moto import mock_ec2, mock_iam
41+
from moto import mock_aws
4342
from testfixtures import Replacer
4443
from testfixtures.popen import MockPopen, PopenBehaviour
4544

@@ -113,18 +112,25 @@ def configure_aws():
113112
os.environ["AWS_SESSION_TOKEN"] = "testing"
114113

115114
# moto (boto3 mock) only allows a hardcoded set of AMIs
116-
dlami = (
117-
moto.ec2.models.ec2_backends["us-west-2"]["us-west-2"]
118-
.describe_images(filters={"name": "Deep Learning AMI Ubuntu*"})[0]
119-
.id
120-
)
121-
aws_config.DEFAULT_AMI["us-west-2"] = dlami
122-
list_instances_mock = MagicMock(return_value=boto3_list)
123-
with patch(
124-
"ray.autoscaler._private.aws.node_provider.list_ec2_instances",
125-
list_instances_mock,
126-
):
127-
yield
115+
# Use mock_aws context manager and boto3 to find the AMI
116+
import boto3
117+
118+
# In moto 5.x, AWS managed policies (e.g., AmazonEC2FullAccess) are not
119+
# loaded by default for performance. Enable them since the autoscaler
120+
# attaches these policies to the IAM role.
121+
with mock_aws(config={"iam": {"load_aws_managed_policies": True}}):
122+
ec2_client = boto3.client("ec2", region_name="us-west-2")
123+
images = ec2_client.describe_images(
124+
Filters=[{"Name": "name", "Values": ["Deep Learning AMI Ubuntu*"]}]
125+
)["Images"]
126+
dlami = images[0]["ImageId"]
127+
aws_config.DEFAULT_AMI["us-west-2"] = dlami
128+
list_instances_mock = MagicMock(return_value=boto3_list)
129+
with patch(
130+
"ray.autoscaler._private.aws.node_provider.list_ec2_instances",
131+
list_instances_mock,
132+
):
133+
yield
128134

129135

130136
@pytest.fixture(scope="function")
@@ -636,8 +642,6 @@ def test_ray_start_block_and_stop(configure_lang, monkeypatch, tmp_path, cleanup
636642
sys.platform == "darwin" and "travis" in os.environ.get("USER", ""),
637643
reason=("Mac builds don't provide proper locale support"),
638644
)
639-
@mock_ec2
640-
@mock_iam
641645
def test_ray_up(
642646
configure_lang, _unlink_test_ssh_key, configure_aws, monkeypatch, tmp_path
643647
):
@@ -677,8 +681,6 @@ def commands_mock(command, stdin):
677681
sys.platform == "darwin" and "travis" in os.environ.get("USER", ""),
678682
reason=("Mac builds don't provide proper locale support"),
679683
)
680-
@mock_ec2
681-
@mock_iam
682684
def test_ray_up_docker(
683685
configure_lang, _unlink_test_ssh_key, configure_aws, monkeypatch, tmp_path
684686
):
@@ -720,8 +722,6 @@ def commands_mock(command, stdin):
720722
sys.platform == "darwin" and "travis" in os.environ.get("USER", ""),
721723
reason=("Mac builds don't provide proper locale support"),
722724
)
723-
@mock_ec2
724-
@mock_iam
725725
def test_ray_up_record(
726726
configure_lang, _unlink_test_ssh_key, configure_aws, monkeypatch, tmp_path
727727
):
@@ -754,8 +754,6 @@ def commands_mock(command, stdin):
754754
sys.platform == "darwin" and "travis" in os.environ.get("USER", ""),
755755
reason=("Mac builds don't provide proper locale support"),
756756
)
757-
@mock_ec2
758-
@mock_iam
759757
def test_ray_attach(configure_lang, configure_aws, _unlink_test_ssh_key):
760758
def commands_mock(command, stdin):
761759
# TODO(maximsmol): this is a hack since stdout=sys.stdout
@@ -796,8 +794,6 @@ def commands_mock(command, stdin):
796794
sys.platform == "darwin" and "travis" in os.environ.get("USER", ""),
797795
reason=("Mac builds don't provide proper locale support"),
798796
)
799-
@mock_ec2
800-
@mock_iam
801797
def test_ray_attach_with_ip(configure_lang, configure_aws, _unlink_test_ssh_key):
802798
from ray.autoscaler._private.commands import get_worker_node_ips
803799

@@ -876,8 +872,6 @@ def commands_verifier(calls):
876872
sys.platform == "darwin" and "travis" in os.environ.get("USER", ""),
877873
reason=("Mac builds don't provide proper locale support"),
878874
)
879-
@mock_ec2
880-
@mock_iam
881875
def test_ray_dashboard(configure_lang, configure_aws, _unlink_test_ssh_key):
882876
def commands_mock(command, stdin):
883877
# TODO(maximsmol): this is a hack since stdout=sys.stdout
@@ -910,8 +904,6 @@ def commands_mock(command, stdin):
910904
sys.platform == "darwin" and "travis" in os.environ.get("USER", ""),
911905
reason=("Mac builds don't provide proper locale support"),
912906
)
913-
@mock_ec2
914-
@mock_iam
915907
def test_ray_exec(configure_lang, configure_aws, _unlink_test_ssh_key):
916908
def commands_mock(command, stdin):
917909
# TODO(maximsmol): this is a hack since stdout=sys.stdout
@@ -963,8 +955,6 @@ def commands_verifier(calls):
963955
sys.platform == "darwin" and "travis" in os.environ.get("USER", ""),
964956
reason=("Mac builds don't provide proper locale support"),
965957
)
966-
@mock_ec2
967-
@mock_iam
968958
def test_ray_submit(configure_lang, configure_aws, _unlink_test_ssh_key):
969959
def commands_mock(command, stdin):
970960
# TODO(maximsmol): this is a hack since stdout=sys.stdout
@@ -1355,8 +1345,6 @@ def test_ray_drain_node(monkeypatch):
13551345
sys.platform == "darwin" and "travis" in os.environ.get("USER", ""),
13561346
reason=("Mac builds don't provide proper locale support"),
13571347
)
1358-
@mock_ec2
1359-
@mock_iam
13601348
def test_ray_cluster_dump(configure_lang, configure_aws, _unlink_test_ssh_key):
13611349
def commands_mock(command, stdin):
13621350
print("This is a test!")

python/requirements/ml/data-requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,5 @@ modin==0.22.2; python_version < '3.12'
1414
pandas==1.5.3; python_version < '3.12'
1515
modin==0.31.0; python_version >= '3.12'
1616
pandas==2.2.2; python_version >= '3.12'
17-
responses==0.13.4
17+
responses>=0.15.0
1818
pymars>=0.8.3; python_version < "3.12"

python/requirements/test-requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jsonpatch==1.32
3131
kubernetes==24.2.0
3232
llvmlite==0.42.0
3333
lxml>=6.0.2
34-
moto[s3,server]==4.2.12
34+
moto[s3,server]==5.1.18
3535
mypy==1.7.0
3636
numba==0.59.1
3737
openpyxl==3.0.10

python/requirements_compiled.txt

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ annotated-types==0.6.0
8888
antlr4-python3-runtime==4.11.1
8989
# via
9090
# fugue-sql-antlr
91+
# moto
9192
# qpd
9293
anyio==4.12.0
9394
# via
@@ -392,14 +393,13 @@ cryptography==44.0.3
392393
# azure-cli-core
393394
# azure-identity
394395
# azure-storage-blob
396+
# joserfc
395397
# moto
396398
# msal
397399
# paramiko
398400
# pyjwt
399401
# pyopenssl
400-
# python-jose
401402
# snowflake-connector-python
402-
# sshpubkeys
403403
# trustme
404404
cupy-cuda12x==13.4.0 ; sys_platform != "darwin"
405405
# via
@@ -484,11 +484,6 @@ docutils==0.19
484484
# sphinx
485485
dulwich==0.21.6
486486
# via comet-ml
487-
ecdsa==0.18.0
488-
# via
489-
# moto
490-
# python-jose
491-
# sshpubkeys
492487
entrypoints==0.4
493488
# via nbconvert
494489
et-xmlfile==1.1.0
@@ -889,17 +884,19 @@ joblib==1.2.0
889884
# via
890885
# -r python/requirements/test-requirements.txt
891886
# scikit-learn
887+
joserfc==1.5.0
888+
# via moto
892889
jschema-to-python==1.2.3
893890
# via cfn-lint
894891
json5==0.9.14
895892
# via jupyterlab-server
896-
jsondiff==2.0.0
897-
# via moto
898893
jsonpatch==1.32
899894
# via
900895
# -r python/requirements/cloud-requirements.txt
901896
# -r python/requirements/test-requirements.txt
902897
# cfn-lint
898+
jsonpath-ng==1.7.0
899+
# via moto
903900
jsonpickle==3.0.2
904901
# via jschema-to-python
905902
jsonpointer==2.4
@@ -1089,7 +1086,7 @@ more-itertools==10.7.0
10891086
# via configspace
10901087
mosaicml==0.3.1 ; python_version < "3.12"
10911088
# via -r python/requirements/ml/train-test-requirements.txt
1092-
moto==4.2.12
1089+
moto==5.1.18
10931090
# via -r python/requirements/test-requirements.txt
10941091
moviepy==0.2.3.1
10951092
# via -r python/requirements/ml/rllib-test-requirements.txt
@@ -1497,6 +1494,8 @@ plotly==5.23.0
14971494
# via ax-platform
14981495
pluggy==1.3.0
14991496
# via pytest
1497+
ply==3.11
1498+
# via jsonpath-ng
15001499
polars==1.36.1
15011500
# via -r python/requirements/test-requirements.txt
15021501
polars-runtime-32==1.36.1
@@ -1579,7 +1578,7 @@ py==1.11.0
15791578
# via pytest-forked
15801579
py-cpuinfo==9.0.0
15811580
# via deepspeed
1582-
py-partiql-parser==0.5.0
1581+
py-partiql-parser==0.6.3
15831582
# via moto
15841583
py-spy==0.4.0 ; python_version < "3.12"
15851584
# via -r python/requirements.txt
@@ -1605,7 +1604,6 @@ pyasn1==0.5.1
16051604
# via
16061605
# oauth2client
16071606
# pyasn1-modules
1608-
# python-jose
16091607
# rsa
16101608
pyasn1-modules==0.3.0
16111609
# via
@@ -1786,8 +1784,6 @@ python-dateutil==2.8.2
17861784
# strictyaml
17871785
python-dotenv==1.2.1
17881786
# via testcontainers
1789-
python-jose==3.3.0
1790-
# via moto
17911787
python-json-logger==2.0.7
17921788
# via jupyter-events
17931789
python-lsp-jsonrpc==1.0.0
@@ -1842,6 +1838,7 @@ pyyaml==6.0.3
18421838
# pymars
18431839
# pytorch-lightning
18441840
# ray
1841+
# responses
18451842
# timm
18461843
# transformers
18471844
# wandb
@@ -1926,7 +1923,7 @@ requests-oauthlib==2.0.0
19261923
# msrest
19271924
requests-toolbelt==1.0.0
19281925
# via comet-ml
1929-
responses==0.13.4
1926+
responses==0.25.8
19301927
# via
19311928
# -r python/requirements/ml/data-requirements.txt
19321929
# moto
@@ -1965,7 +1962,6 @@ rsa==4.7.2
19651962
# gcs-oauth2-boto-plugin
19661963
# google-auth
19671964
# oauth2client
1968-
# python-jose
19691965
ruamel-yaml==0.17.40
19701966
# via
19711967
# semgrep
@@ -2058,7 +2054,6 @@ six==1.16.0
20582054
# azure-core
20592055
# bleach
20602056
# configobj
2061-
# ecdsa
20622057
# fs
20632058
# gcs-oauth2-boto-plugin
20642059
# google-apitools
@@ -2080,7 +2075,6 @@ six==1.16.0
20802075
# python-dateutil
20812076
# pyu2f
20822077
# pyvmomi
2083-
# responses
20842078
# rfc3339-validator
20852079
# tensorboard
20862080
# tensorflow
@@ -2139,8 +2133,6 @@ sqlglot==25.6.1
21392133
# via fugue
21402134
sqlparse==0.5.1
21412135
# via mlflow-skinny
2142-
sshpubkeys==3.3.1
2143-
# via moto
21442136
stack-data==0.6.3
21452137
# via ipython
21462138
stanio==0.3.0

0 commit comments

Comments
 (0)