Skip to content

Commit d53784d

Browse files
colesburynascheme
authored andcommitted
pythongh-112529: Use atomic operations for gcstate->collecting
The `collecting` field in `GCState` is used to prevent overlapping garbage collections within the same interpreter. This is updated to use atomic operations in order to be thread-safe in `--disable-gil` builds. The GC code is refactored a bit to support this. More of the logic is pushed down to `gc_collect_main()` so that we can safely order the logic setting `collecting`, the selection of the generation, and the invocation of callbacks with respect to the atomic operations and the (future) stop-the-world pauses. The change uses atomic operations for both `--disable-gil` and the default build (with the GIL) to avoid extra `#ifdef` guards and ease the maintenance burden.
1 parent d4a6229 commit d53784d

File tree

1 file changed

+73
-79
lines changed

1 file changed

+73
-79
lines changed

Modules/gcmodule.c

Lines changed: 73 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,20 @@ module gc
7474
#define AS_GC(op) _Py_AS_GC(op)
7575
#define FROM_GC(gc) _Py_FROM_GC(gc)
7676

77+
// Automatically choose the generation that needs collecting.
78+
#define GENERATION_AUTO (-1)
79+
80+
typedef enum {
81+
// GC was triggered by heap allocation
82+
_Py_GC_REASON_HEAP,
83+
84+
// GC was called during shutdown
85+
_Py_GC_REASON_SHUTDOWN,
86+
87+
// GC was called by gc.collect() or PyGC_Collect()
88+
_Py_GC_REASON_MANUAL
89+
} _PyGC_Reason;
90+
7791

7892
static inline int
7993
gc_is_collecting(PyGC_Head *g)
@@ -1192,14 +1206,20 @@ handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable,
11921206
gc_list_merge(resurrected, old_generation);
11931207
}
11941208

1209+
1210+
static void
1211+
invoke_gc_callback(PyThreadState *tstate, const char *phase,
1212+
int generation, Py_ssize_t collected,
1213+
Py_ssize_t uncollectable);
1214+
1215+
static int
1216+
gc_select_generation(GCState *gcstate);
1217+
11951218
/* This is the main function. Read this to understand how the
11961219
* collection process works. */
11971220
static Py_ssize_t
1198-
gc_collect_main(PyThreadState *tstate, int generation,
1199-
Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
1200-
int nofail)
1221+
gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
12011222
{
1202-
GC_STAT_ADD(generation, collections, 1);
12031223
#ifdef Py_STATS
12041224
if (_Py_stats) {
12051225
_Py_stats->object_stats.object_visits = 0;
@@ -1221,6 +1241,31 @@ gc_collect_main(PyThreadState *tstate, int generation,
12211241
assert(gcstate->garbage != NULL);
12221242
assert(!_PyErr_Occurred(tstate));
12231243

1244+
int expected = 0;
1245+
if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) {
1246+
// Don't start a garbage collection if one is already in progress.
1247+
return 0;
1248+
}
1249+
1250+
if (generation == GENERATION_AUTO) {
1251+
// Select the oldest generation that needs collecting. We will collect
1252+
// objects from that generation and all generations younger than it.
1253+
generation = gc_select_generation(gcstate);
1254+
if (generation < 0) {
1255+
// No generation needs to be collected.
1256+
_Py_atomic_store_int(&gcstate->collecting, 0);
1257+
return 0;
1258+
}
1259+
}
1260+
1261+
assert(generation >= 0 && generation < NUM_GENERATIONS);
1262+
1263+
GC_STAT_ADD(generation, collections, 1);
1264+
1265+
if (reason != _Py_GC_REASON_SHUTDOWN) {
1266+
invoke_gc_callback(tstate, "start", generation, 0, 0);
1267+
}
1268+
12241269
if (gcstate->debug & DEBUG_STATS) {
12251270
PySys_WriteStderr("gc: collecting generation %d...\n", generation);
12261271
show_stats_each_generations(gcstate);
@@ -1340,7 +1385,7 @@ gc_collect_main(PyThreadState *tstate, int generation,
13401385
}
13411386

13421387
if (_PyErr_Occurred(tstate)) {
1343-
if (nofail) {
1388+
if (reason == _Py_GC_REASON_SHUTDOWN) {
13441389
_PyErr_Clear(tstate);
13451390
}
13461391
else {
@@ -1349,13 +1394,6 @@ gc_collect_main(PyThreadState *tstate, int generation,
13491394
}
13501395

13511396
/* Update stats */
1352-
if (n_collected) {
1353-
*n_collected = m;
1354-
}
1355-
if (n_uncollectable) {
1356-
*n_uncollectable = n;
1357-
}
1358-
13591397
struct gc_generation_stats *stats = &gcstate->generation_stats[generation];
13601398
stats->collections++;
13611399
stats->collected += m;
@@ -1374,7 +1412,12 @@ gc_collect_main(PyThreadState *tstate, int generation,
13741412
PyDTrace_GC_DONE(n + m);
13751413
}
13761414

1415+
if (reason != _Py_GC_REASON_SHUTDOWN) {
1416+
invoke_gc_callback(tstate, "stop", generation, m, n);
1417+
}
1418+
13771419
assert(!_PyErr_Occurred(tstate));
1420+
_Py_atomic_store_int(&gcstate->collecting, 0);
13781421
return n + m;
13791422
}
13801423

@@ -1433,29 +1476,12 @@ invoke_gc_callback(PyThreadState *tstate, const char *phase,
14331476
assert(!_PyErr_Occurred(tstate));
14341477
}
14351478

1436-
/* Perform garbage collection of a generation and invoke
1437-
* progress callbacks.
1438-
*/
1439-
static Py_ssize_t
1440-
gc_collect_with_callback(PyThreadState *tstate, int generation)
1441-
{
1442-
assert(!_PyErr_Occurred(tstate));
1443-
Py_ssize_t result, collected, uncollectable;
1444-
invoke_gc_callback(tstate, "start", generation, 0, 0);
1445-
result = gc_collect_main(tstate, generation, &collected, &uncollectable, 0);
1446-
invoke_gc_callback(tstate, "stop", generation, collected, uncollectable);
1447-
assert(!_PyErr_Occurred(tstate));
1448-
return result;
1449-
}
1450-
1451-
static Py_ssize_t
1452-
gc_collect_generations(PyThreadState *tstate)
1479+
/* Find the oldest generation (highest numbered) where the count
1480+
* exceeds the threshold. Objects in the that generation and
1481+
* generations younger than it will be collected. */
1482+
static int
1483+
gc_select_generation(GCState *gcstate)
14531484
{
1454-
GCState *gcstate = &tstate->interp->gc;
1455-
/* Find the oldest generation (highest numbered) where the count
1456-
* exceeds the threshold. Objects in the that generation and
1457-
* generations younger than it will be collected. */
1458-
Py_ssize_t n = 0;
14591485
for (int i = NUM_GENERATIONS-1; i >= 0; i--) {
14601486
if (gcstate->generations[i].count > gcstate->generations[i].threshold) {
14611487
/* Avoid quadratic performance degradation in number
@@ -1497,13 +1523,16 @@ gc_collect_generations(PyThreadState *tstate)
14971523
if (i == NUM_GENERATIONS - 1
14981524
&& gcstate->long_lived_pending < gcstate->long_lived_total / 4)
14991525
continue;
1500-
n = gc_collect_with_callback(tstate, i);
1501-
break;
1526+
return i;
15021527
}
15031528
}
1504-
return n;
1529+
return -1;
15051530
}
15061531

1532+
1533+
1534+
1535+
15071536
#include "clinic/gcmodule.c.h"
15081537

15091538
/*[clinic input]
@@ -1572,18 +1601,7 @@ gc_collect_impl(PyObject *module, int generation)
15721601
return -1;
15731602
}
15741603

1575-
GCState *gcstate = &tstate->interp->gc;
1576-
Py_ssize_t n;
1577-
if (gcstate->collecting) {
1578-
/* already collecting, don't do anything */
1579-
n = 0;
1580-
}
1581-
else {
1582-
gcstate->collecting = 1;
1583-
n = gc_collect_with_callback(tstate, generation);
1584-
gcstate->collecting = 0;
1585-
}
1586-
return n;
1604+
return gc_collect_main(tstate, generation, _Py_GC_REASON_MANUAL);
15871605
}
15881606

15891607
/*[clinic input]
@@ -2120,17 +2138,9 @@ PyGC_Collect(void)
21202138
}
21212139

21222140
Py_ssize_t n;
2123-
if (gcstate->collecting) {
2124-
/* already collecting, don't do anything */
2125-
n = 0;
2126-
}
2127-
else {
2128-
gcstate->collecting = 1;
2129-
PyObject *exc = _PyErr_GetRaisedException(tstate);
2130-
n = gc_collect_with_callback(tstate, NUM_GENERATIONS - 1);
2131-
_PyErr_SetRaisedException(tstate, exc);
2132-
gcstate->collecting = 0;
2133-
}
2141+
PyObject *exc = _PyErr_GetRaisedException(tstate);
2142+
n = gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_MANUAL);
2143+
_PyErr_SetRaisedException(tstate, exc);
21342144

21352145
return n;
21362146
}
@@ -2144,16 +2154,7 @@ _PyGC_CollectNoFail(PyThreadState *tstate)
21442154
during interpreter shutdown (and then never finish it).
21452155
See http://bugs.python.org/issue8713#msg195178 for an example.
21462156
*/
2147-
GCState *gcstate = &tstate->interp->gc;
2148-
if (gcstate->collecting) {
2149-
return 0;
2150-
}
2151-
2152-
Py_ssize_t n;
2153-
gcstate->collecting = 1;
2154-
n = gc_collect_main(tstate, NUM_GENERATIONS - 1, NULL, NULL, 1);
2155-
gcstate->collecting = 0;
2156-
return n;
2157+
return gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN);
21572158
}
21582159

21592160
void
@@ -2271,10 +2272,6 @@ PyObject_IS_GC(PyObject *obj)
22712272
void
22722273
_Py_ScheduleGC(PyInterpreterState *interp)
22732274
{
2274-
GCState *gcstate = &interp->gc;
2275-
if (gcstate->collecting == 1) {
2276-
return;
2277-
}
22782275
_Py_set_eval_breaker_bit(interp, _PY_GC_SCHEDULED_BIT, 1);
22792276
}
22802277

@@ -2292,7 +2289,7 @@ _PyObject_GC_Link(PyObject *op)
22922289
if (gcstate->generations[0].count > gcstate->generations[0].threshold &&
22932290
gcstate->enabled &&
22942291
gcstate->generations[0].threshold &&
2295-
!gcstate->collecting &&
2292+
!_Py_atomic_load_int_relaxed(&gcstate->collecting) &&
22962293
!_PyErr_Occurred(tstate))
22972294
{
22982295
_Py_ScheduleGC(tstate->interp);
@@ -2302,10 +2299,7 @@ _PyObject_GC_Link(PyObject *op)
23022299
void
23032300
_Py_RunGC(PyThreadState *tstate)
23042301
{
2305-
GCState *gcstate = &tstate->interp->gc;
2306-
gcstate->collecting = 1;
2307-
gc_collect_generations(tstate);
2308-
gcstate->collecting = 0;
2302+
gc_collect_main(tstate, GENERATION_AUTO, _Py_GC_REASON_HEAP);
23092303
}
23102304

23112305
static PyObject *

0 commit comments

Comments
 (0)