From 07fec060050df00a478a57b48051fecee1d7b0d0 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 20 Sep 2020 04:23:13 +0000 Subject: [PATCH 1/2] TST: DataFrame.to_parquet accepts pathlib.Path with partition_cols defined --- pandas/tests/io/test_parquet.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 35a400cba8671..dd665337f3cf1 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -3,6 +3,7 @@ from distutils.version import LooseVersion from io import BytesIO import os +import pathlib from warnings import catch_warnings import numpy as np @@ -663,6 +664,20 @@ def test_partition_cols_string(self, pa, df_full): assert len(dataset.partitions.partition_names) == 1 assert dataset.partitions.partition_names == set(partition_cols_list) + def test_partition_cols_pathlib(self, pa, df_compat): + # GH 35902 + + partition_cols = "B" + partition_cols_list = [partition_cols] + df = df_compat + + with tm.ensure_clean_dir() as path_str: + df.to_parquet(path_str, partition_cols=partition_cols_list) + + with tm.ensure_clean_dir() as path_str: + path_posix = pathlib.Path(path_str) + df_compat.to_parquet(path_posix, partition_cols=partition_cols_list) + def test_empty_dataframe(self, pa): # GH #27339 df = pd.DataFrame() From f1c7e67115e8d53b058cc87ae7c92dd173eb1460 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Tue, 22 Sep 2020 16:34:45 +0000 Subject: [PATCH 2/2] feedback --- pandas/tests/io/test_parquet.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index dd665337f3cf1..cf8b34a52139e 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -664,7 +664,10 @@ def test_partition_cols_string(self, pa, df_full): assert len(dataset.partitions.partition_names) == 1 assert dataset.partitions.partition_names == set(partition_cols_list) - def test_partition_cols_pathlib(self, pa, df_compat): + @pytest.mark.parametrize( + "path_type", [lambda path: path, lambda path: pathlib.Path(path)] + ) + def test_partition_cols_pathlib(self, pa, df_compat, path_type): # GH 35902 partition_cols = "B" @@ -672,11 +675,8 @@ def test_partition_cols_pathlib(self, pa, df_compat): df = df_compat with tm.ensure_clean_dir() as path_str: - df.to_parquet(path_str, partition_cols=partition_cols_list) - - with tm.ensure_clean_dir() as path_str: - path_posix = pathlib.Path(path_str) - df_compat.to_parquet(path_posix, partition_cols=partition_cols_list) + path = path_type(path_str) + df.to_parquet(path, partition_cols=partition_cols_list) def test_empty_dataframe(self, pa): # GH #27339