diff --git a/.github/actions/setup-conda/action.yml b/.github/actions/setup-conda/action.yml index 002d0020c2df1..b667075e87144 100644 --- a/.github/actions/setup-conda/action.yml +++ b/.github/actions/setup-conda/action.yml @@ -9,20 +9,9 @@ inputs: extra-specs: description: Extra packages to install required: false - pyarrow-version: - description: If set, overrides the PyArrow version in the Conda environment to the given string. - required: false runs: using: composite steps: - - name: Set Arrow version in ${{ inputs.environment-file }} to ${{ inputs.pyarrow-version }} - run: | - grep -q ' - pyarrow' ${{ inputs.environment-file }} - sed -i"" -e "s/ - pyarrow/ - pyarrow=${{ inputs.pyarrow-version }}/" ${{ inputs.environment-file }} - cat ${{ inputs.environment-file }} - shell: bash - if: ${{ inputs.pyarrow-version }} - - name: Install ${{ inputs.environment-file }} uses: mamba-org/provision-with-micromamba@v12 with: diff --git a/.github/workflows/macos-windows.yml b/.github/workflows/macos-windows.yml index 849fc92082f0e..fd6560d61b160 100644 --- a/.github/workflows/macos-windows.yml +++ b/.github/workflows/macos-windows.yml @@ -51,7 +51,6 @@ jobs: uses: ./.github/actions/setup-conda with: environment-file: ci/deps/${{ matrix.env_file }} - pyarrow-version: ${{ matrix.os == 'macos-latest' && '9' || '' }} - name: Build Pandas uses: ./.github/actions/build_pandas diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index a5187542f0514..1929b81df4709 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -27,7 +27,6 @@ jobs: env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml, actions-311.yaml] # Prevent the include jobs from overriding other jobs pattern: [""] - pyarrow_version: ["8", "9", "10"] include: - name: "Downstream Compat" env_file: actions-38-downstream_compat.yaml @@ -75,21 +74,11 @@ jobs: # TODO(cython3): Re-enable once next-beta(after beta 1) comes out # There are some warnings failing the build with -werror pandas_ci: "0" - exclude: - - env_file: actions-38.yaml - pyarrow_version: "8" - - env_file: actions-38.yaml - pyarrow_version: "9" - - env_file: actions-39.yaml - pyarrow_version: "8" - - env_file: actions-39.yaml - pyarrow_version: "9" - - env_file: actions-310.yaml - pyarrow_version: "8" - - env_file: actions-310.yaml - pyarrow_version: "9" + - name: "Pyarrow Nightly" + env_file: actions-311-pyarrownightly.yaml + pattern: "not slow and not network and not single_cpu" fail-fast: false - name: ${{ matrix.name || format('{0} pyarrow={1} {2}', matrix.env_file, matrix.pyarrow_version, matrix.pattern) }} + name: ${{ matrix.name || matrix.env_file }} env: ENV_FILE: ci/deps/${{ matrix.env_file }} PATTERN: ${{ matrix.pattern }} @@ -107,7 +96,7 @@ jobs: COVERAGE: ${{ !contains(matrix.env_file, 'pypy') }} concurrency: # https://github.community/t/concurrecy-not-work-for-push/183068/7 - group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.pyarrow_version || '' }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_data_manager || '' }} + group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_data_manager || '' }} cancel-in-progress: true services: @@ -166,7 +155,6 @@ jobs: uses: ./.github/actions/setup-conda with: environment-file: ${{ env.ENV_FILE }} - pyarrow-version: ${{ matrix.pyarrow_version }} - name: Build Pandas id: build diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml index 5238bc4966e6b..5b9919d8e4c1f 100644 --- a/ci/deps/actions-310.yaml +++ b/ci/deps/actions-310.yaml @@ -41,7 +41,7 @@ dependencies: - psycopg2 - pymysql - pytables - - pyarrow + - pyarrow>=7.0.0 - pyreadstat - python-snappy - pyxlsb diff --git a/ci/deps/actions-311-pyarrownightly.yaml b/ci/deps/actions-311-pyarrownightly.yaml new file mode 100644 index 0000000000000..77e4fc9d2c2d9 --- /dev/null +++ b/ci/deps/actions-311-pyarrownightly.yaml @@ -0,0 +1,29 @@ +name: pandas-dev +channels: + - conda-forge +dependencies: + - python=3.11 + + # build dependencies + - versioneer[toml] + - cython>=0.29.33 + + # test dependencies + - pytest>=7.0.0 + - pytest-cov + - pytest-xdist>=2.2.0 + - hypothesis>=6.34.2 + - pytest-asyncio>=0.17.0 + + # required dependencies + - python-dateutil + - numpy + - pytz + - pip + + - pip: + - "tzdata>=2022.1" + - "--extra-index-url https://pypi.fury.io/arrow-nightlies/" + - "--prefer-binary" + - "--pre" + - "pyarrow" diff --git a/ci/deps/actions-311.yaml b/ci/deps/actions-311.yaml index 72417b8ff6dea..ed01238216e9e 100644 --- a/ci/deps/actions-311.yaml +++ b/ci/deps/actions-311.yaml @@ -41,7 +41,7 @@ dependencies: - psycopg2 - pymysql # - pytables>=3.8.0 # first version that supports 3.11 - - pyarrow + - pyarrow>=7.0.0 - pyreadstat - python-snappy - pyxlsb diff --git a/ci/deps/actions-38-downstream_compat.yaml b/ci/deps/actions-38-downstream_compat.yaml index ddca181f3e584..3c498663c04df 100644 --- a/ci/deps/actions-38-downstream_compat.yaml +++ b/ci/deps/actions-38-downstream_compat.yaml @@ -39,7 +39,7 @@ dependencies: - openpyxl<3.1.1 - odfpy - psycopg2 - - pyarrow + - pyarrow>=7.0.0 - pymysql - pyreadstat - pytables diff --git a/ci/deps/actions-38.yaml b/ci/deps/actions-38.yaml index 18ce9dbfcd629..2a968f059952e 100644 --- a/ci/deps/actions-38.yaml +++ b/ci/deps/actions-38.yaml @@ -39,7 +39,7 @@ dependencies: - odfpy - pandas-gbq - psycopg2 - - pyarrow + - pyarrow>=7.0.0 - pymysql - pyreadstat - pytables diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml index 6b4331b8a1802..a1fba778bfc70 100644 --- a/ci/deps/actions-39.yaml +++ b/ci/deps/actions-39.yaml @@ -40,7 +40,7 @@ dependencies: - pandas-gbq - psycopg2 - pymysql - - pyarrow + - pyarrow>=7.0.0 - pyreadstat - pytables - python-snappy diff --git a/ci/deps/circle-38-arm64.yaml b/ci/deps/circle-38-arm64.yaml index 48cef11ef6edf..7bc71483be34a 100644 --- a/ci/deps/circle-38-arm64.yaml +++ b/ci/deps/circle-38-arm64.yaml @@ -39,7 +39,7 @@ dependencies: - odfpy - pandas-gbq - psycopg2 - - pyarrow + - pyarrow>=7.0.0 - pymysql # Not provided on ARM #- pyreadstat diff --git a/environment.yml b/environment.yml index f108b429102ea..3ecc9763e5953 100644 --- a/environment.yml +++ b/environment.yml @@ -42,7 +42,7 @@ dependencies: - odfpy - py - psycopg2 - - pyarrow + - pyarrow>=7.0.0 - pymysql - pyreadstat - pytables diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index dd0b43c116266..7e4869589cee6 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -12,6 +12,7 @@ import pandas as pd import pandas._testing as tm from pandas.core.arrays.string_arrow import ArrowStringArray +from pandas.util.version import Version @pytest.fixture @@ -406,15 +407,14 @@ def test_fillna_args(dtype, request): arr.fillna(value=1) -@td.skip_if_no("pyarrow") def test_arrow_array(dtype): # protocol added in 0.15.0 - import pyarrow as pa + pa = pytest.importorskip("pyarrow") data = pd.array(["a", "b", "c"], dtype=dtype) arr = pa.array(data) expected = pa.array(list(data), type=pa.string(), from_pandas=True) - if dtype.storage == "pyarrow": + if dtype.storage == "pyarrow" and Version(pa.__version__) <= Version("11.0.0"): expected = pa.chunked_array(expected) assert arr.equals(expected) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index b9efcecb0c3eb..92c86e32ed580 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -1019,7 +1019,10 @@ def test_read_dtype_backend_pyarrow_config_index(self, pa): {"a": [1, 2]}, index=pd.Index([3, 4], name="test"), dtype="int64[pyarrow]" ) expected = df.copy() + import pyarrow + if Version(pyarrow.__version__) > Version("11.0.0"): + expected.index = expected.index.astype("int64[pyarrow]") check_round_trip( df, engine=pa, diff --git a/pandas/tests/util/test_show_versions.py b/pandas/tests/util/test_show_versions.py index 8ff78cc073acf..714588d179aef 100644 --- a/pandas/tests/util/test_show_versions.py +++ b/pandas/tests/util/test_show_versions.py @@ -65,7 +65,7 @@ def test_show_versions_console(capsys): assert re.search(r"numpy\s*:\s[0-9]+\..*\n", result) # check optional dependency - assert re.search(r"pyarrow\s*:\s([0-9\.]+|None)\n", result) + assert re.search(r"pyarrow\s*:\s([0-9]+.*|None)\n", result) def test_json_output_match(capsys, tmpdir): diff --git a/requirements-dev.txt b/requirements-dev.txt index 1ad6c218fcdfb..2d263a07d53f4 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -31,7 +31,7 @@ openpyxl<3.1.1 odfpy py psycopg2-binary -pyarrow +pyarrow>=7.0.0 pymysql pyreadstat tables