Skip to content

Commit 55d1b7a

Browse files
committed
gh-107137: Add _PyTuple_NewNoTrack() internal C API
Fix a crash in PySequence_Tuple() when the tuple which is being initialized was accessed via GC introspection, like the gc.get_referrers() function. Now the function only tracks the tuple by the GC once it's fully initialized. Add _PyTuple_NewNoTrack() and _PyTuple_ResizeNoTrack() functions to the internal C API: similar to PyTuple_New() and _PyTuple_Resize(), but don't track the tuple by the GC. Modify PySequence_Tuple() to use these functions.
1 parent 032f480 commit 55d1b7a

File tree

5 files changed

+103
-29
lines changed

5 files changed

+103
-29
lines changed

Include/internal/pycore_tuple.h

+5
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@ extern "C" {
1111
extern void _PyTuple_MaybeUntrack(PyObject *);
1212
extern void _PyTuple_DebugMallocStats(FILE *out);
1313

14+
// Similar to PyTuple_New() and _PyTuple_Resize(), but don't track the tuple
15+
// by the GC.
16+
extern PyObject* _PyTuple_NewNoTrack(Py_ssize_t size);
17+
extern int _PyTuple_ResizeNoTrack(PyObject **pv, Py_ssize_t newsize);
18+
1419
/* runtime lifecycle */
1520

1621
extern PyStatus _PyTuple_InitGlobalObjects(PyInterpreterState *);

Lib/test/test_tuple.py

+23
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,29 @@ def test_lexicographic_ordering(self):
416416
self.assertLess(a, b)
417417
self.assertLess(b, c)
418418

419+
def test_sequence_tuple(self):
420+
# gh-39117, gh-59313, gh-107137: PySequence_Tuple() must not track
421+
# the tuple by the GC before it's fully initialized.
422+
423+
# TAG object used to find the tuple in objects tracked by the GC
424+
TAG = object()
425+
426+
def my_iter():
427+
yield TAG
428+
429+
for ref in gc.get_referrers(TAG):
430+
if isinstance(ref, tuple):
431+
raise Exception("PySequence_Tuple() leaks the tuple")
432+
433+
# Add 100 items to trigger _PyTuple_Resize() calls.
434+
# PySequence_Tuple() started by allocating a tuple of 10 items.
435+
for num in range(100):
436+
yield num
437+
438+
seq = tuple(my_iter())
439+
self.assertEqual(seq, (TAG, *range(100)))
440+
441+
419442
# Notes on testing hash codes. The primary thing is that Python doesn't
420443
# care about "random" hash codes. To the contrary, we like them to be
421444
# very regular when possible, so that the low-order bits are as evenly
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix a crash in :c:func:`PySequence_Tuple` when the tuple which is being
2+
initialized was accessed via GC introspection, like the
3+
:func:`gc.get_referrers` function. Now the function only tracks the tuple by
4+
the GC once it's fully initialized. Patch by Victor Stinner.

Objects/abstract.c

+27-19
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "pycore_long.h" // _Py_IsNegative
99
#include "pycore_pyerrors.h" // _PyErr_Occurred()
1010
#include "pycore_pystate.h" // _PyThreadState_GET()
11+
#include "pycore_tuple.h" // _PyTuple_NewNoTrack()
1112
#include "pycore_unionobject.h" // _PyUnion_Check()
1213
#include <ctype.h>
1314
#include <stddef.h> // offsetof()
@@ -2074,11 +2075,6 @@ PySequence_DelSlice(PyObject *s, Py_ssize_t i1, Py_ssize_t i2)
20742075
PyObject *
20752076
PySequence_Tuple(PyObject *v)
20762077
{
2077-
PyObject *it; /* iter(v) */
2078-
Py_ssize_t n; /* guess for result tuple size */
2079-
PyObject *result = NULL;
2080-
Py_ssize_t j;
2081-
20822078
if (v == NULL) {
20832079
return null_error();
20842080
}
@@ -2091,28 +2087,36 @@ PySequence_Tuple(PyObject *v)
20912087
a copy, so there's no need for exactness below. */
20922088
return Py_NewRef(v);
20932089
}
2094-
if (PyList_CheckExact(v))
2090+
if (PyList_CheckExact(v)) {
20952091
return PyList_AsTuple(v);
2092+
}
20962093

20972094
/* Get iterator. */
2098-
it = PyObject_GetIter(v);
2099-
if (it == NULL)
2095+
PyObject *it = PyObject_GetIter(v); // iter(v)
2096+
if (it == NULL) {
21002097
return NULL;
2098+
}
21012099

21022100
/* Guess result size and allocate space. */
2103-
n = PyObject_LengthHint(v, 10);
2104-
if (n == -1)
2105-
goto Fail;
2106-
result = PyTuple_New(n);
2107-
if (result == NULL)
2101+
Py_ssize_t n = PyObject_LengthHint(v, 10); // guess for result tuple size
2102+
if (n == -1) {
2103+
Py_DECREF(it);
2104+
return NULL;
2105+
}
2106+
2107+
PyObject *result = _PyTuple_NewNoTrack(n);
2108+
if (result == NULL) {
21082109
goto Fail;
2110+
}
21092111

21102112
/* Fill the tuple. */
2111-
for (j = 0; ; ++j) {
2113+
Py_ssize_t j = 0;
2114+
for (; ; ++j) {
21122115
PyObject *item = PyIter_Next(it);
21132116
if (item == NULL) {
2114-
if (PyErr_Occurred())
2117+
if (PyErr_Occurred()) {
21152118
goto Fail;
2119+
}
21162120
break;
21172121
}
21182122
if (j >= n) {
@@ -2132,7 +2136,7 @@ PySequence_Tuple(PyObject *v)
21322136
goto Fail;
21332137
}
21342138
n = (Py_ssize_t)newn;
2135-
if (_PyTuple_Resize(&result, n) != 0) {
2139+
if (_PyTuple_ResizeNoTrack(&result, n) != 0) {
21362140
Py_DECREF(item);
21372141
goto Fail;
21382142
}
@@ -2141,11 +2145,15 @@ PySequence_Tuple(PyObject *v)
21412145
}
21422146

21432147
/* Cut tuple back if guess was too large. */
2144-
if (j < n &&
2145-
_PyTuple_Resize(&result, j) != 0)
2148+
if (j < n && _PyTuple_ResizeNoTrack(&result, j) != 0) {
21462149
goto Fail;
2147-
2150+
}
21482151
Py_DECREF(it);
2152+
2153+
// No need to track the empty tuple singleton
2154+
if (j != 0) {
2155+
PyObject_GC_Track(result);
2156+
}
21492157
return result;
21502158

21512159
Fail:

Objects/tupleobject.c

+44-10
Original file line numberDiff line numberDiff line change
@@ -65,24 +65,38 @@ tuple_get_empty(void)
6565
return Py_NewRef(&_Py_SINGLETON(tuple_empty));
6666
}
6767

68-
PyObject *
69-
PyTuple_New(Py_ssize_t size)
68+
69+
static inline PyObject *
70+
tuple_new_capi(Py_ssize_t size, int track)
7071
{
71-
PyTupleObject *op;
7272
if (size == 0) {
7373
return tuple_get_empty();
7474
}
75-
op = tuple_alloc(size);
75+
PyTupleObject *op = tuple_alloc(size);
7676
if (op == NULL) {
7777
return NULL;
7878
}
7979
for (Py_ssize_t i = 0; i < size; i++) {
8080
op->ob_item[i] = NULL;
8181
}
82-
_PyObject_GC_TRACK(op);
82+
if (track) {
83+
_PyObject_GC_TRACK(op);
84+
}
8385
return (PyObject *) op;
8486
}
8587

88+
PyObject *
89+
_PyTuple_NewNoTrack(Py_ssize_t size)
90+
{
91+
return tuple_new_capi(size, 0);
92+
}
93+
94+
PyObject *
95+
PyTuple_New(Py_ssize_t size)
96+
{
97+
return tuple_new_capi(size, 1);
98+
}
99+
86100
Py_ssize_t
87101
PyTuple_Size(PyObject *op)
88102
{
@@ -894,8 +908,8 @@ PyTypeObject PyTuple_Type = {
894908
efficiently. In any case, don't use this if the tuple may already be
895909
known to some other part of the code. */
896910

897-
int
898-
_PyTuple_Resize(PyObject **pv, Py_ssize_t newsize)
911+
static int
912+
tuple_resize(PyObject **pv, Py_ssize_t newsize, int track)
899913
{
900914
PyTupleObject *v;
901915
PyTupleObject *sv;
@@ -927,7 +941,12 @@ _PyTuple_Resize(PyObject **pv, Py_ssize_t newsize)
927941
/* The empty tuple is statically allocated so we never
928942
resize it in-place. */
929943
Py_DECREF(v);
930-
*pv = PyTuple_New(newsize);
944+
if (track) {
945+
*pv = PyTuple_New(newsize);
946+
}
947+
else {
948+
*pv = _PyTuple_NewNoTrack(newsize);
949+
}
931950
return *pv == NULL ? -1 : 0;
932951
}
933952

@@ -952,14 +971,29 @@ _PyTuple_Resize(PyObject **pv, Py_ssize_t newsize)
952971
}
953972
_Py_NewReferenceNoTotal((PyObject *) sv);
954973
/* Zero out items added by growing */
955-
if (newsize > oldsize)
974+
if (newsize > oldsize) {
956975
memset(&sv->ob_item[oldsize], 0,
957976
sizeof(*sv->ob_item) * (newsize - oldsize));
977+
}
958978
*pv = (PyObject *) sv;
959-
_PyObject_GC_TRACK(sv);
979+
if (track) {
980+
_PyObject_GC_TRACK(*pv);
981+
}
960982
return 0;
961983
}
962984

985+
int
986+
_PyTuple_ResizeNoTrack(PyObject **pv, Py_ssize_t newsize)
987+
{
988+
return tuple_resize(pv, newsize, 0);
989+
}
990+
991+
int
992+
_PyTuple_Resize(PyObject **pv, Py_ssize_t newsize)
993+
{
994+
return tuple_resize(pv, newsize, 1);
995+
}
996+
963997

964998
static void maybe_freelist_clear(PyInterpreterState *, int);
965999

0 commit comments

Comments
 (0)