Skip to content

Commit 9ead4ff

Browse files
[2.3.x] Only use new string dtype repr for the new (NaN-based) string dtype
1 parent 4bdac7c commit 9ead4ff

File tree

5 files changed

+21
-16
lines changed

5 files changed

+21
-16
lines changed

pandas/core/arrays/string_.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,10 +119,10 @@ class StringDtype(StorageExtensionDtype):
119119
Examples
120120
--------
121121
>>> pd.StringDtype()
122-
<StringDtype(storage='python', na_value=<NA>)>
122+
string[python]
123123
124124
>>> pd.StringDtype(storage="pyarrow")
125-
<StringDtype(na_value=<NA>)>
125+
string[pyarrow]
126126
"""
127127

128128
@property
@@ -194,8 +194,11 @@ def __init__(
194194
self._na_value = na_value
195195

196196
def __repr__(self) -> str:
197-
storage = "" if self.storage == "pyarrow" else "storage='python', "
198-
return f"<StringDtype({storage}na_value={self._na_value})>"
197+
if self._na_value is libmissing.NA:
198+
return f"{self.name}[{self.storage}]"
199+
else:
200+
storage = "" if self.storage == "pyarrow" else "storage='python', "
201+
return f"<StringDtype({storage}na_value={self._na_value})>"
199202

200203
def __eq__(self, other: object) -> bool:
201204
# we need to override the base class __eq__ because na_value (NA or NaN)

pandas/core/generic.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7021,13 +7021,12 @@ def convert_dtypes(
70217021
2 3 z <NA> <NA> 20 200.0
70227022
70237023
>>> dfn.dtypes
7024-
a Int32
7025-
b string
7026-
c boolean
7027-
d string
7028-
e Int64
7029-
f Float64
7030-
dtype: object
7024+
a Int32
7025+
b string[python]
7026+
c boolean
7027+
d string[python]
7028+
e Int64
7029+
f Float64
70317030
70327031
Start with a Series of strings and missing data represented by ``np.nan``.
70337032

pandas/io/formats/format.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
ExtensionArray,
6767
TimedeltaArray,
6868
)
69+
from pandas.core.arrays.string_ import StringDtype
6970
from pandas.core.base import PandasObject
7071
import pandas.core.common as com
7172
from pandas.core.indexes.api import (
@@ -1231,6 +1232,8 @@ def _format(x):
12311232
return self.na_rep
12321233
elif isinstance(x, PandasObject):
12331234
return str(x)
1235+
elif isinstance(x, StringDtype) and x.na_value is NA:
1236+
return repr(x)
12341237
else:
12351238
# object dtype
12361239
return str(formatter(x))

pandas/tests/arrays/string_/test_string.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,11 @@ def test_repr(dtype):
126126
def test_dtype_repr(dtype):
127127
if dtype.storage == "pyarrow":
128128
if dtype.na_value is pd.NA:
129-
assert repr(dtype) == "<StringDtype(na_value=<NA>)>"
129+
assert repr(dtype) == "string[pyarrow]"
130130
else:
131131
assert repr(dtype) == "<StringDtype(na_value=nan)>"
132132
elif dtype.na_value is pd.NA:
133-
assert repr(dtype) == "<StringDtype(storage='python', na_value=<NA>)>"
133+
assert repr(dtype) == "string[python]"
134134
else:
135135
assert repr(dtype) == "<StringDtype(storage='python', na_value=nan)>"
136136

pandas/tests/io/formats/test_to_string.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -758,9 +758,9 @@ def test_to_string_string_dtype(self):
758758
result = df.dtypes.to_string()
759759
expected = dedent(
760760
"""\
761-
x string
762-
y string
763-
z int64[pyarrow]"""
761+
x string[pyarrow]
762+
y string[python]
763+
z int64[pyarrow]"""
764764
)
765765
assert result == expected
766766

0 commit comments

Comments
 (0)