Skip to content

Commit 9a9c203

Browse files
committed
BUG: Fix Series.str.zfill for ArrowDtype string arrays #61485
1 parent cfe54bd commit 9a9c203

File tree

6 files changed

+32
-3
lines changed

6 files changed

+32
-3
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -725,8 +725,8 @@ Conversion
725725

726726
Strings
727727
^^^^^^^
728+
- Bug in :meth:`Series.str.zfill` raising ``AttributeError`` for ``ArrowDtype(pa.string())``. Now supported via ``_str_zfill`` implementation in ``ArrowExtensionArray`` (:issue:`61485`)
728729
- Bug in :meth:`Series.value_counts` would not respect ``sort=False`` for series having ``string`` dtype (:issue:`55224`)
729-
-
730730

731731
Interval
732732
^^^^^^^^

pandas/core/arrays/arrow/array.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2601,6 +2601,13 @@ def _str_wrap(self, width: int, **kwargs) -> Self:
26012601
result = self._apply_elementwise(predicate)
26022602
return type(self)(pa.chunked_array(result))
26032603

2604+
def _str_zfill(self, width: int) -> Self:
2605+
# TODO: Replace with pc.utf8_zfill when supported by arrow
2606+
# Arrow ENH - https://github.com/apache/arrow/issues/46683
2607+
predicate = lambda val: val.zfill(width)
2608+
result = self._apply_elementwise(predicate)
2609+
return type(self)(pa.chunked_array(result))
2610+
26042611
@property
26052612
def _dt_days(self) -> Self:
26062613
return type(self)(

pandas/core/arrays/string_.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,6 +1077,9 @@ def _cmp_method(self, other, op):
10771077

10781078
_arith_method = _cmp_method
10791079

1080+
def _str_zfill(self, width: int) -> Self:
1081+
return self._str_map(lambda x: x.zfill(width))
1082+
10801083

10811084
class StringArrayNumpySemantics(StringArray):
10821085
_storage = "python"

pandas/core/strings/accessor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1938,8 +1938,8 @@ def zfill(self, width: int):
19381938
if not is_integer(width):
19391939
msg = f"width must be of integer type, not {type(width).__name__}"
19401940
raise TypeError(msg)
1941-
f = lambda x: x.zfill(width)
1942-
result = self._data.array._str_map(f)
1941+
1942+
result = self._data.array._str_zfill(width)
19431943
return self._wrap_result(result)
19441944

19451945
def slice(self, start=None, stop=None, step=None):

pandas/core/strings/object_array.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,3 +537,6 @@ def f(x):
537537
return empty_row
538538

539539
return [f(val) for val in np.asarray(self)]
540+
541+
def _str_zfill(self, width: int):
542+
return self._str_map(lambda x: x.zfill(width))

pandas/tests/strings/test_string_array.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,19 @@ def test_string_array_extract(nullable_string_dtype):
110110

111111
result = result.astype(object)
112112
tm.assert_equal(result, expected)
113+
114+
115+
@pytest.mark.parametrize(
116+
"values, width, expected",
117+
[
118+
(["a", "ab", "abc", None], 4, ["000a", "00ab", "0abc", None]),
119+
(["1", "-1", "+1", None], 4, ["0001", "-001", "+001", None]),
120+
(["1234", "-1234"], 3, ["1234", "-1234"]),
121+
],
122+
)
123+
def test_string_array_zfill(nullable_string_dtype, values, width, expected):
124+
# GH #61485
125+
s = Series(values, dtype=nullable_string_dtype)
126+
result = s.str.zfill(width)
127+
expected = Series(expected, dtype=nullable_string_dtype)
128+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)