diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 358d9447b131d..82694de531db6 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -500,6 +500,7 @@ Conversion - Bug in :func:`array` with ``FloatingDtype`` and values containing float-castable strings incorrectly raising (:issue:`45424`) - Bug when comparing string and datetime64ns objects causing ``OverflowError`` exception. (:issue:`45506`) - Bug in metaclass of generic abstract dtypes causing :meth:`DataFrame.apply` and :meth:`Series.apply` to raise for the built-in function ``type`` (:issue:`46684`) +- Bug in :meth:`DataFrame.to_dict` for ``orient="list"`` or ``orient="index"`` was not returning native types (:issue:`46751`) Strings ^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ceede5fdb5577..061b01b6d169a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1913,7 +1913,9 @@ def to_dict(self, orient: str = "dict", into=dict): return into_c((k, v.to_dict(into)) for k, v in self.items()) elif orient == "list": - return into_c((k, v.tolist()) for k, v in self.items()) + return into_c( + (k, list(map(maybe_box_native, v.tolist()))) for k, v in self.items() + ) elif orient == "split": return into_c( @@ -1964,7 +1966,7 @@ def to_dict(self, orient: str = "dict", into=dict): if not self.index.is_unique: raise ValueError("DataFrame index must be unique for orient='index'.") return into_c( - (t[0], dict(zip(self.columns, t[1:]))) + (t[0], dict(zip(self.columns, map(maybe_box_native, t[1:])))) for t in self.itertuples(name=None) ) diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py index 31ea3e582eeb2..6d5c32cae7368 100644 --- a/pandas/tests/frame/methods/test_to_dict.py +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -344,3 +344,80 @@ def test_to_dict_orient_tight(self, index, columns): roundtrip = DataFrame.from_dict(df.to_dict(orient="tight"), orient="tight") tm.assert_frame_equal(df, roundtrip) + + @pytest.mark.parametrize( + "orient", + ["dict", "list", "split", "records", "index", "tight"], + ) + @pytest.mark.parametrize( + "data,expected_types", + ( + ( + { + "a": [np.int64(1), 1, np.int64(3)], + "b": [np.float64(1.0), 2.0, np.float64(3.0)], + "c": [np.float64(1.0), 2, np.int64(3)], + "d": [np.float64(1.0), "a", np.int64(3)], + "e": [np.float64(1.0), ["a"], np.int64(3)], + "f": [np.float64(1.0), ("a",), np.int64(3)], + }, + { + "a": [int, int, int], + "b": [float, float, float], + "c": [float, float, float], + "d": [float, str, int], + "e": [float, list, int], + "f": [float, tuple, int], + }, + ), + ( + { + "a": [1, 2, 3], + "b": [1.1, 2.2, 3.3], + }, + { + "a": [int, int, int], + "b": [float, float, float], + }, + ), + ), + ) + def test_to_dict_returns_native_types(self, orient, data, expected_types): + # GH 46751 + # Tests we get back native types for all orient types + df = DataFrame(data) + result = df.to_dict(orient) + if orient == "dict": + assertion_iterator = ( + (i, key, value) + for key, index_value_map in result.items() + for i, value in index_value_map.items() + ) + elif orient == "list": + assertion_iterator = ( + (i, key, value) + for key, values in result.items() + for i, value in enumerate(values) + ) + elif orient in {"split", "tight"}: + assertion_iterator = ( + (i, key, result["data"][i][j]) + for i in result["index"] + for j, key in enumerate(result["columns"]) + ) + elif orient == "records": + assertion_iterator = ( + (i, key, value) + for i, record in enumerate(result) + for key, value in record.items() + ) + elif orient == "index": + assertion_iterator = ( + (i, key, value) + for i, record in result.items() + for key, value in record.items() + ) + + for i, key, value in assertion_iterator: + assert value == data[key][i] + assert type(value) is expected_types[key][i]