Skip to content

Commit 1e8d4b7

Browse files
authored
Reorder to_pandas extension dtype mapping
Addresses pandas-dev/pandas#53011 `types_mapper` always had highest priority as it overrode what was set before. However, switching the logical ordering, it means that we don't need to call `_pandas_api.pandas_dtype(dtype)` when using the pyarrow backend. Resolving the issue of complex `dtype` with `list` or `struct`
1 parent d7bc378 commit 1e8d4b7

File tree

1 file changed

+20
-20
lines changed

1 file changed

+20
-20
lines changed

python/pyarrow/pandas_compat.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -848,6 +848,25 @@ def _get_extension_dtypes(table, columns_metadata, types_mapper=None):
848848
if _pandas_api.extension_dtype is None:
849849
return ext_columns
850850

851+
# use the specified mapping of built-in arrow types to pandas dtypes
852+
if types_mapper:
853+
for field in table.schema:
854+
typ = field.type
855+
pandas_dtype = types_mapper(typ)
856+
if pandas_dtype is not None:
857+
ext_columns[field.name] = pandas_dtype
858+
859+
# infer from extension type in the schema
860+
for field in table.schema:
861+
typ = field.type
862+
if field.name not in ext_columns and isinstance(typ, pa.BaseExtensionType):
863+
try:
864+
pandas_dtype = typ.to_pandas_dtype()
865+
except NotImplementedError:
866+
pass
867+
else:
868+
ext_columns[field.name] = pandas_dtype
869+
851870
# infer the extension columns from the pandas metadata
852871
for col_meta in columns_metadata:
853872
try:
@@ -856,33 +875,14 @@ def _get_extension_dtypes(table, columns_metadata, types_mapper=None):
856875
name = col_meta['name']
857876
dtype = col_meta['numpy_type']
858877

859-
if dtype not in _pandas_supported_numpy_types:
878+
if name not in ext_columns and dtype not in _pandas_supported_numpy_types:
860879
# pandas_dtype is expensive, so avoid doing this for types
861880
# that are certainly numpy dtypes
862881
pandas_dtype = _pandas_api.pandas_dtype(dtype)
863882
if isinstance(pandas_dtype, _pandas_api.extension_dtype):
864883
if hasattr(pandas_dtype, "__from_arrow__"):
865884
ext_columns[name] = pandas_dtype
866885

867-
# infer from extension type in the schema
868-
for field in table.schema:
869-
typ = field.type
870-
if isinstance(typ, pa.BaseExtensionType):
871-
try:
872-
pandas_dtype = typ.to_pandas_dtype()
873-
except NotImplementedError:
874-
pass
875-
else:
876-
ext_columns[field.name] = pandas_dtype
877-
878-
# use the specified mapping of built-in arrow types to pandas dtypes
879-
if types_mapper:
880-
for field in table.schema:
881-
typ = field.type
882-
pandas_dtype = types_mapper(typ)
883-
if pandas_dtype is not None:
884-
ext_columns[field.name] = pandas_dtype
885-
886886
return ext_columns
887887

888888

0 commit comments

Comments
 (0)