From 04ae6ad2369822e56fa90e26b08c62ecaf195c1d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 2 Sep 2024 14:53:57 +0200 Subject: [PATCH] TST (string dtype): resolve all infer_string TODO/xfails in pandas/tests/arrays --- pandas/core/arrays/string_arrow.py | 6 +++++- .../arrays/categorical/test_analytics.py | 20 +++++++++---------- pandas/tests/arrays/integer/test_reduction.py | 7 +------ 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index e8e74b0ba1215..80cf7571ecec2 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -488,9 +488,13 @@ def _reduce( arr = pc.or_kleene(nas, pc.not_equal(self._pa_array, "")) else: arr = pc.not_equal(self._pa_array, "") - return ArrowExtensionArray(arr)._reduce( + result = ArrowExtensionArray(arr)._reduce( name, skipna=skipna, keepdims=keepdims, **kwargs ) + if keepdims: + # ArrowExtensionArray will return a length-1 bool[pyarrow] array + return result.astype(np.bool_) + return result result = self._reduce_calc(name, skipna=skipna, keepdims=keepdims, **kwargs) if name in ("argmin", "argmax") and isinstance(result, pa.Array): diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index 52fd80cd196e0..47fa354e12393 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -4,12 +4,7 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - -from pandas.compat import ( - HAS_PYARROW, - PYPY, -) +from pandas.compat import PYPY from pandas import ( Categorical, @@ -299,10 +294,7 @@ def test_nbytes(self): exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories assert cat.nbytes == exp - @pytest.mark.xfail( - using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)" - ) - def test_memory_usage(self): + def test_memory_usage(self, using_infer_string): cat = Categorical([1, 2, 3]) # .categories is an index, so we include the hashtable @@ -310,7 +302,13 @@ def test_memory_usage(self): assert 0 < cat.nbytes <= cat.memory_usage(deep=True) cat = Categorical(["foo", "foo", "bar"]) - assert cat.memory_usage(deep=True) > cat.nbytes + if using_infer_string: + if cat.categories.dtype.storage == "python": + assert cat.memory_usage(deep=True) > cat.nbytes + else: + assert cat.memory_usage(deep=True) >= cat.nbytes + else: + assert cat.memory_usage(deep=True) > cat.nbytes if not PYPY: # sys.getsizeof will call the .memory_usage with diff --git a/pandas/tests/arrays/integer/test_reduction.py b/pandas/tests/arrays/integer/test_reduction.py index e485c7f79b475..1c91cd25ba69c 100644 --- a/pandas/tests/arrays/integer/test_reduction.py +++ b/pandas/tests/arrays/integer/test_reduction.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas.compat import HAS_PYARROW - import pandas as pd from pandas import ( DataFrame, @@ -104,10 +102,7 @@ def test_groupby_reductions(op, expected): ["all", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")], ], ) -def test_mixed_reductions(request, op, expected, using_infer_string): - if op in ["any", "all"] and using_infer_string and HAS_PYARROW: - # TODO(infer_string) inconsistent result type - request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)")) +def test_mixed_reductions(op, expected): df = DataFrame( { "A": ["a", "b", "b"],