From 7051521202d301f81a0a9e05b5baf0634997dbe0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 22 May 2020 14:26:44 +0200 Subject: [PATCH] CLN: consolidate arrow roundtrip tests for nullable dtypes in base masked tests --- .../tests/arrays/boolean/test_construction.py | 30 ----------- .../tests/arrays/integer/test_construction.py | 39 -------------- pandas/tests/arrays/masked/__init__.py | 0 .../tests/arrays/masked/test_arrow_compat.py | 53 +++++++++++++++++++ 4 files changed, 53 insertions(+), 69 deletions(-) create mode 100644 pandas/tests/arrays/masked/__init__.py create mode 100644 pandas/tests/arrays/masked/test_arrow_compat.py diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py index bf1aba190f3e2..f7354a089df3b 100644 --- a/pandas/tests/arrays/boolean/test_construction.py +++ b/pandas/tests/arrays/boolean/test_construction.py @@ -1,8 +1,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - import pandas as pd import pandas._testing as tm from pandas.arrays import BooleanArray @@ -346,31 +344,3 @@ def test_to_numpy_copy(): # mask = pd.array([True, False, True, None], dtype="boolean") # with pytest.raises(IndexError): # result = arr[mask] - - -@td.skip_if_no("pyarrow", min_version="0.15.0") -def test_arrow_array(data): - # protocol added in 0.15.0 - import pyarrow as pa - - arr = pa.array(data) - - # TODO use to_numpy(na_value=None) here - data_object = np.array(data, dtype=object) - data_object[data.isna()] = None - expected = pa.array(data_object, type=pa.bool_(), from_pandas=True) - assert arr.equals(expected) - - -@td.skip_if_no("pyarrow", min_version="0.15.1.dev") -def test_arrow_roundtrip(): - # roundtrip possible from arrow 1.0.0 - import pyarrow as pa - - data = pd.array([True, False, None], dtype="boolean") - df = pd.DataFrame({"a": data}) - table = pa.table(df) - assert table.field("a").type == "bool" - result = table.to_pandas() - assert isinstance(result["a"].dtype, pd.BooleanDtype) - tm.assert_frame_equal(result, df) diff --git a/pandas/tests/arrays/integer/test_construction.py b/pandas/tests/arrays/integer/test_construction.py index 43936d8b95bd6..1893c4554bfbf 100644 --- a/pandas/tests/arrays/integer/test_construction.py +++ b/pandas/tests/arrays/integer/test_construction.py @@ -1,8 +1,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - import pandas as pd import pandas._testing as tm from pandas.api.types import is_integer @@ -199,40 +197,3 @@ def test_to_integer_array(values, to_dtype, result_dtype): assert result.dtype == result_dtype() expected = integer_array(values, dtype=result_dtype()) tm.assert_extension_array_equal(result, expected) - - -@td.skip_if_no("pyarrow", min_version="0.15.0") -def test_arrow_array(data): - # protocol added in 0.15.0 - import pyarrow as pa - - arr = pa.array(data) - expected = np.array(data, dtype=object) - expected[data.isna()] = None - expected = pa.array(expected, type=data.dtype.name.lower(), from_pandas=True) - assert arr.equals(expected) - - -@td.skip_if_no("pyarrow", min_version="0.16.0") -def test_arrow_roundtrip(data): - # roundtrip possible from arrow 0.16.0 - import pyarrow as pa - - df = pd.DataFrame({"a": data}) - table = pa.table(df) - assert table.field("a").type == str(data.dtype.numpy_dtype) - result = table.to_pandas() - tm.assert_frame_equal(result, df) - - -@td.skip_if_no("pyarrow", min_version="0.16.0") -def test_arrow_from_arrow_uint(): - # https://github.com/pandas-dev/pandas/issues/31896 - # possible mismatch in types - import pyarrow as pa - - dtype = pd.UInt32Dtype() - result = dtype.__from_arrow__(pa.array([1, 2, 3, 4, None], type="int64")) - expected = pd.array([1, 2, 3, 4, None], dtype="UInt32") - - tm.assert_extension_array_equal(result, expected) diff --git a/pandas/tests/arrays/masked/__init__.py b/pandas/tests/arrays/masked/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/arrays/masked/test_arrow_compat.py b/pandas/tests/arrays/masked/test_arrow_compat.py new file mode 100644 index 0000000000000..b63bb0fbd9a3b --- /dev/null +++ b/pandas/tests/arrays/masked/test_arrow_compat.py @@ -0,0 +1,53 @@ +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm + +arrays = [pd.array([1, 2, 3, None], dtype=dtype) for dtype in tm.ALL_EA_INT_DTYPES] +arrays += [pd.array([True, False, True, None], dtype="boolean")] + + +@pytest.fixture(params=arrays, ids=[a.dtype.name for a in arrays]) +def data(request): + return request.param + + +@td.skip_if_no("pyarrow", min_version="0.15.0") +def test_arrow_array(data): + # protocol added in 0.15.0 + import pyarrow as pa + + arr = pa.array(data) + expected = pa.array( + data.to_numpy(object, na_value=None), + type=pa.from_numpy_dtype(data.dtype.numpy_dtype), + ) + assert arr.equals(expected) + + +@td.skip_if_no("pyarrow", min_version="0.16.0") +def test_arrow_roundtrip(data): + # roundtrip possible from arrow 0.16.0 + import pyarrow as pa + + df = pd.DataFrame({"a": data}) + table = pa.table(df) + assert table.field("a").type == str(data.dtype.numpy_dtype) + result = table.to_pandas() + assert result["a"].dtype == data.dtype + tm.assert_frame_equal(result, df) + + +@td.skip_if_no("pyarrow", min_version="0.16.0") +def test_arrow_from_arrow_uint(): + # https://github.com/pandas-dev/pandas/issues/31896 + # possible mismatch in types + import pyarrow as pa + + dtype = pd.UInt32Dtype() + result = dtype.__from_arrow__(pa.array([1, 2, 3, 4, None], type="int64")) + expected = pd.array([1, 2, 3, 4, None], dtype="UInt32") + + tm.assert_extension_array_equal(result, expected)