aws · kukushking · Apr 7, 2025 · Apr 7, 2025 · Apr 7, 2025
diff --git a/awswrangler/s3/_read.py b/awswrangler/s3/_read.py
@@ -116,7 +116,8 @@ def _extract_partitions_dtypes_from_table_details(response: "GetTableResponseTyp
     return dtypes
 
 
-def _union(dfs: list[pd.DataFrame], ignore_index: bool) -> pd.DataFrame:
+def _concat_union_categoricals(dfs: list[pd.DataFrame], ignore_index: bool) -> pd.DataFrame:
+    """Concatenate dataframes with union of categorical columns."""
     cats: tuple[set[str], ...] = tuple(set(df.select_dtypes(include="category").columns) for df in dfs)
     for col in set.intersection(*cats):
         cat = union_categoricals([df[col] for df in dfs])

diff --git a/awswrangler/s3/_read_parquet.py b/awswrangler/s3/_read_parquet.py
@@ -33,6 +33,7 @@
 from awswrangler.s3._read import (
     _apply_partition_filter,
     _check_version_id,
+    _concat_union_categoricals,
     _extract_partitions_dtypes_from_table_details,
     _get_num_output_blocks,
     _get_path_ignore_suffix,
@@ -264,7 +265,7 @@ def _read_parquet_chunked(
                         yield df
                     else:
                         if next_slice is not None:
-                            df = pd.concat(objs=[next_slice, df], sort=False, copy=False)
+                            df = _concat_union_categoricals(dfs=[next_slice, df], ignore_index=False)
                         while len(df.index) >= chunked:
                             yield df.iloc[:chunked, :].copy()
                             df = df.iloc[chunked:, :]

diff --git a/awswrangler/s3/_read_text.py b/awswrangler/s3/_read_text.py
@@ -19,10 +19,10 @@
 from awswrangler.s3._read import (
     _apply_partition_filter,
     _check_version_id,
+    _concat_union_categoricals,
     _get_num_output_blocks,
     _get_path_ignore_suffix,
     _get_path_root,
-    _union,
 )
 from awswrangler.s3._read_text_core import _read_text_file, _read_text_files_chunked
 from awswrangler.typing import RaySettings
@@ -70,7 +70,7 @@ def _read_text(
         itertools.repeat(s3_additional_kwargs),
         itertools.repeat(dataset),
     )
-    return _union(dfs=tables, ignore_index=ignore_index)
+    return _concat_union_categoricals(dfs=tables, ignore_index=ignore_index)
 
 
 def _read_text_format(