Skip to content

Commit dbc7817

Browse files
committed
Fixing memory leaks: 1) Moved the allocation of na_hashset down to avoid a leak on continue, 2) Delete na_hashset if there is an exception, 3) Clean up table before raising an exception
1 parent 1a61e26 commit dbc7817

File tree

1 file changed

+20
-18
lines changed

1 file changed

+20
-18
lines changed

pandas/_libs/parsers.pyx

+20-18
Original file line numberDiff line numberDiff line change
@@ -1058,18 +1058,6 @@ cdef class TextReader:
10581058

10591059
conv = self._get_converter(i, name)
10601060

1061-
# XXX
1062-
na_flist = set()
1063-
if self.na_filter:
1064-
na_list, na_flist = self._get_na_list(i, name)
1065-
if na_list is None:
1066-
na_filter = 0
1067-
else:
1068-
na_filter = 1
1069-
na_hashset = kset_from_list(na_list)
1070-
else:
1071-
na_filter = 0
1072-
10731061
col_dtype = None
10741062
if self.dtype is not None:
10751063
if isinstance(self.dtype, dict):
@@ -1094,13 +1082,26 @@ cdef class TextReader:
10941082
self.c_encoding)
10951083
continue
10961084

1097-
# Should return as the desired dtype (inferred or specified)
1098-
col_res, na_count = self._convert_tokens(
1099-
i, start, end, name, na_filter, na_hashset,
1100-
na_flist, col_dtype)
1085+
# XXX
1086+
na_flist = set()
1087+
if self.na_filter:
1088+
na_list, na_flist = self._get_na_list(i, name)
1089+
if na_list is None:
1090+
na_filter = 0
1091+
else:
1092+
na_filter = 1
1093+
na_hashset = kset_from_list(na_list)
1094+
else:
1095+
na_filter = 0
11011096

1102-
if na_filter:
1103-
self._free_na_set(na_hashset)
1097+
try:
1098+
# Should return as the desired dtype (inferred or specified)
1099+
col_res, na_count = self._convert_tokens(
1100+
i, start, end, name, na_filter, na_hashset,
1101+
na_flist, col_dtype)
1102+
finally:
1103+
if na_filter:
1104+
self._free_na_set(na_hashset)
11041105

11051106
if upcast_na and na_count > 0:
11061107
col_res = _maybe_upcast(col_res)
@@ -2047,6 +2048,7 @@ cdef kh_str_t* kset_from_list(list values) except NULL:
20472048

20482049
# None creeps in sometimes, which isn't possible here
20492050
if not isinstance(val, bytes):
2051+
kh_destroy_str(table)
20502052
raise ValueError('Must be all encoded bytes')
20512053

20522054
k = kh_put_str(table, PyBytes_AsString(val), &ret)

0 commit comments

Comments
 (0)