Skip to content

Commit 74f01a8

Browse files
BUG: Fix potential segfault after pd.Categorical(pd.Series(...), categories=...)
1 parent f9cb581 commit 74f01a8

File tree

3 files changed

+15
-8
lines changed

3 files changed

+15
-8
lines changed

doc/source/whatsnew/v0.24.2.rst

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ Fixed Regressions
2727
- Fixed regression in :meth:`DataFrame.duplicated()`, where empty dataframe was not returning a boolean dtyped Series. (:issue:`25184`)
2828
- Fixed regression in :meth:`Series.min` and :meth:`Series.max` where ``numeric_only=True`` was ignored when the ``Series`` contained ```Categorical`` data (:issue:`25299`)
2929

30+
- Fixed regression in ``Categorical``, where supplying a ``Series`` in the constructor could create a broken object which could cause segfaults (:issue:`25318`)
31+
3032
.. _whatsnew_0242.enhancements:
3133

3234
Enhancements

pandas/core/arrays/categorical.py

+3-8
Original file line numberDiff line numberDiff line change
@@ -323,14 +323,6 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
323323
# we may have dtype.categories be None, and we need to
324324
# infer categories in a factorization step futher below
325325

326-
if is_categorical(values):
327-
# GH23814, for perf, if values._values already an instance of
328-
# Categorical, set values to codes, and run fastpath
329-
if (isinstance(values, (ABCSeries, ABCIndexClass)) and
330-
isinstance(values._values, type(self))):
331-
values = values._values.codes.copy()
332-
fastpath = True
333-
334326
if fastpath:
335327
self._codes = coerce_indexer_dtype(values, dtype.categories)
336328
self._dtype = self._dtype.update_dtype(dtype)
@@ -2625,6 +2617,9 @@ def _recode_for_categories(codes, old_categories, new_categories):
26252617
if len(old_categories) == 0:
26262618
# All null anyway, so just retain the nulls
26272619
return codes.copy()
2620+
elif new_categories.equals(old_categories):
2621+
# Same categories, so no need to actually recode
2622+
return codes.copy()
26282623
indexer = coerce_indexer_dtype(new_categories.get_indexer(old_categories),
26292624
new_categories)
26302625
new_codes = take_1d(indexer, codes.copy(), fill_value=-1)

pandas/tests/arrays/categorical/test_constructors.py

+10
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,16 @@ def test_constructor(self):
149149
categories=["a", "b", "c", "d"])
150150
tm.assert_categorical_equal(c1, c2)
151151

152+
# GH25318
153+
c0 = Categorical(["a", "b", "c", "a"])
154+
c1 = Categorical(["a", "b", "c", "a"], categories=["b", "c"])
155+
156+
c2 = Categorical(c0, categories=c1.categories)
157+
tm.assert_categorical_equal(c1, c2)
158+
159+
c3 = Categorical(Series(c0), categories=c1.categories)
160+
tm.assert_categorical_equal(c1, c3)
161+
152162
# This should result in integer categories, not float!
153163
cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3])
154164
assert is_integer_dtype(cat.categories)

0 commit comments

Comments
 (0)