Skip to content

Commit 9e5a62f

Browse files
authored
PERF: Fix reference leak in read_hdf (#50714)
* PERF: Fix reference leak in read_hdf * address comments
1 parent 89a76f6 commit 9e5a62f

File tree

4 files changed

+26
-1
lines changed

4 files changed

+26
-1
lines changed

asv_bench/benchmarks/io/hdf.py

+8
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,17 @@ def setup(self, format):
128128
self.df["object"] = tm.makeStringIndex(N)
129129
self.df.to_hdf(self.fname, "df", format=format)
130130

131+
# Numeric df
132+
self.df1 = self.df.copy()
133+
self.df1 = self.df1.reset_index()
134+
self.df1.to_hdf(self.fname, "df1", format=format)
135+
131136
def time_read_hdf(self, format):
132137
read_hdf(self.fname, "df")
133138

139+
def peakmem_read_hdf(self, format):
140+
read_hdf(self.fname, "df")
141+
134142
def time_write_hdf(self, format):
135143
self.df.to_hdf(self.fname, "df", format=format)
136144

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -869,6 +869,7 @@ Performance improvements
869869
- Performance improvement in :func:`to_datetime` when format is given or can be inferred (:issue:`50465`)
870870
- Performance improvement in :func:`read_csv` when passing :func:`to_datetime` lambda-function to ``date_parser`` and inputs have mixed timezone offsetes (:issue:`35296`)
871871
- Performance improvement in :meth:`.SeriesGroupBy.value_counts` with categorical dtype (:issue:`46202`)
872+
- Fixed a reference leak in :func:`read_hdf` (:issue:`37441`)
872873

873874
.. ---------------------------------------------------------------------------
874875
.. _whatsnew_200.bug_fixes:

pandas/io/pytables.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -2057,7 +2057,9 @@ def convert(
20572057

20582058
# values is a recarray
20592059
if values.dtype.fields is not None:
2060-
values = values[self.cname]
2060+
# Copy, otherwise values will be a view
2061+
# preventing the original recarry from being free'ed
2062+
values = values[self.cname].copy()
20612063

20622064
val_kind = _ensure_decoded(self.kind)
20632065
values = _maybe_convert(values, val_kind, encoding, errors)

pandas/tests/io/pytables/test_read.py

+14
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,20 @@ def test_read_hdf_open_store(tmp_path, setup_path):
214214
assert store.is_open
215215

216216

217+
def test_read_hdf_index_not_view(tmp_path, setup_path):
218+
# GH 37441
219+
# Ensure that the index of the DataFrame is not a view
220+
# into the original recarray that pytables reads in
221+
df = DataFrame(np.random.rand(4, 5), index=[0, 1, 2, 3], columns=list("ABCDE"))
222+
223+
path = tmp_path / setup_path
224+
df.to_hdf(path, "df", mode="w", format="table")
225+
226+
df2 = read_hdf(path, "df")
227+
assert df2.index._data.base is None
228+
tm.assert_frame_equal(df, df2)
229+
230+
217231
def test_read_hdf_iterator(tmp_path, setup_path):
218232
df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))
219233
df.index.name = "letters"

0 commit comments

Comments
 (0)