Coverage Report

Created: 2026-06-21 06:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/gc.c
Line
Count
Source
1
//  This implements the reference cycle garbage collector.
2
//  The Python module interface to the collector is in gcmodule.c.
3
//  See InternalDocs/garbage_collector.md for more infromation.
4
5
#include "Python.h"
6
#include "pycore_ceval.h"         // _Py_set_eval_breaker_bit()
7
#include "pycore_dict.h"          // _PyInlineValuesSize()
8
#include "pycore_initconfig.h"    // _PyStatus_OK()
9
#include "pycore_context.h"
10
#include "pycore_interp.h"        // PyInterpreterState.gc
11
#include "pycore_interpframe.h"   // _PyFrame_GetLocalsArray()
12
#include "pycore_object.h"
13
#include "pycore_object_alloc.h"  // _PyObject_MallocWithType()
14
#include "pycore_pyerrors.h"
15
#include "pycore_pystate.h"       // _PyThreadState_GET()
16
#include "pycore_tuple.h"         // _PyTuple_MaybeUntrack()
17
#include "pycore_weakref.h"       // _PyWeakref_ClearRef()
18
#include "pydtrace.h"
19
20
#if !defined(Py_GIL_DISABLED)
21
22
typedef struct _gc_runtime_state GCState;
23
24
#ifdef Py_DEBUG
25
#  define GC_DEBUG
26
#endif
27
28
1.33G
#define GC_NEXT _PyGCHead_NEXT
29
114M
#define GC_PREV _PyGCHead_PREV
30
31
// update_refs() set this bit for all objects in current generation.
32
// subtract_refs() and move_unreachable() uses this to distinguish
33
// visited object is in GCing or not.
34
//
35
// move_unreachable() removes this flag from reachable objects.
36
// Only unreachable objects have this flag.
37
//
38
// No objects in interpreter have this flag after GC ends.
39
1.89G
#define PREV_MASK_COLLECTING   _PyGC_PREV_MASK_COLLECTING
40
41
// Lowest bit of _gc_next is used for UNREACHABLE flag.
42
//
43
// This flag represents the object is in unreachable list in move_unreachable()
44
//
45
// Although this flag is used only in move_unreachable(), move_unreachable()
46
// doesn't clear this flag to skip unnecessary iteration.
47
// move_legacy_finalizers() removes this flag instead.
48
// Between them, unreachable list is not normal list and we can not use
49
// most gc_list_* functions for it.
50
392M
#define NEXT_MASK_UNREACHABLE  (1)
51
52
2.40G
#define AS_GC(op) _Py_AS_GC(op)
53
1.45G
#define FROM_GC(gc) _Py_FROM_GC(gc)
54
55
// Automatically choose the generation that needs collecting.
56
204k
#define GENERATION_AUTO (-1)
57
58
static inline int
59
gc_is_collecting(PyGC_Head *g)
60
1.17G
{
61
1.17G
    return (g->_gc_prev & PREV_MASK_COLLECTING) != 0;
62
1.17G
}
63
64
static inline void
65
gc_clear_collecting(PyGC_Head *g)
66
357M
{
67
357M
    g->_gc_prev &= ~PREV_MASK_COLLECTING;
68
357M
}
69
70
static inline Py_ssize_t
71
gc_get_refs(PyGC_Head *g)
72
975M
{
73
975M
    return (Py_ssize_t)(g->_gc_prev >> _PyGC_PREV_SHIFT);
74
975M
}
75
76
static inline void
77
gc_set_refs(PyGC_Head *g, Py_ssize_t refs)
78
205M
{
79
205M
    g->_gc_prev = (g->_gc_prev & ~_PyGC_PREV_MASK)
80
205M
        | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
81
205M
}
82
83
static inline void
84
gc_reset_refs(PyGC_Head *g, Py_ssize_t refs)
85
363M
{
86
363M
    g->_gc_prev = (g->_gc_prev & _PyGC_PREV_MASK_FINALIZED)
87
363M
        | PREV_MASK_COLLECTING
88
363M
        | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
89
363M
}
90
91
static inline void
92
gc_decref(PyGC_Head *g)
93
310M
{
94
310M
    _PyObject_ASSERT_WITH_MSG(FROM_GC(g),
95
310M
                              gc_get_refs(g) > 0,
96
310M
                              "refcount is too small");
97
310M
    g->_gc_prev -= 1 << _PyGC_PREV_SHIFT;
98
310M
}
99
100
101
192k
#define GEN_HEAD(gcstate, n) (&(gcstate)->generations[n].head)
102
103
104
static GCState *
105
get_gc_state(void)
106
612M
{
107
612M
    PyInterpreterState *interp = _PyInterpreterState_GET();
108
612M
    return &interp->gc;
109
612M
}
110
111
112
void
113
_PyGC_InitState(GCState *gcstate)
114
36
{
115
36
#define INIT_HEAD(GEN) \
116
144
    do { \
117
144
        GEN.head._gc_next = (uintptr_t)&GEN.head; \
118
144
        GEN.head._gc_prev = (uintptr_t)&GEN.head; \
119
144
    } while (0)
120
121
144
    for (int i = 0; i < NUM_GENERATIONS; i++) {
122
108
        assert(gcstate->generations[i].count == 0);
123
108
        INIT_HEAD(gcstate->generations[i]);
124
108
    };
125
36
    gcstate->generation0 = GEN_HEAD(gcstate, 0);
126
36
    INIT_HEAD(gcstate->permanent_generation);
127
128
36
#undef INIT_HEAD
129
36
}
130
131
132
PyStatus
133
_PyGC_Init(PyInterpreterState *interp)
134
36
{
135
36
    GCState *gcstate = &interp->gc;
136
137
36
    gcstate->generation_stats = PyMem_RawCalloc(1, sizeof(struct gc_stats));
138
36
    if (gcstate->generation_stats == NULL) {
139
0
        return _PyStatus_NO_MEMORY();
140
0
    }
141
142
36
    gcstate->garbage = PyList_New(0);
143
36
    if (gcstate->garbage == NULL) {
144
0
        return _PyStatus_NO_MEMORY();
145
0
    }
146
147
36
    gcstate->callbacks = PyList_New(0);
148
36
    if (gcstate->callbacks == NULL) {
149
0
        return _PyStatus_NO_MEMORY();
150
0
    }
151
152
36
    return _PyStatus_OK();
153
36
}
154
155
156
/*
157
_gc_prev values
158
---------------
159
160
Between collections, _gc_prev is used for doubly linked list.
161
162
Lowest two bits of _gc_prev are used for flags.
163
PREV_MASK_COLLECTING is used only while collecting and cleared before GC ends
164
or _PyObject_GC_UNTRACK() is called.
165
166
During a collection, _gc_prev is temporary used for gc_refs, and the gc list
167
is singly linked until _gc_prev is restored.
168
169
gc_refs
170
    At the start of a collection, update_refs() copies the true refcount
171
    to gc_refs, for each object in the generation being collected.
172
    subtract_refs() then adjusts gc_refs so that it equals the number of
173
    times an object is referenced directly from outside the generation
174
    being collected.
175
176
PREV_MASK_COLLECTING
177
    Objects in generation being collected are marked PREV_MASK_COLLECTING in
178
    update_refs().
179
180
181
_gc_next values
182
---------------
183
184
_gc_next takes these values:
185
186
0
187
    The object is not tracked
188
189
!= 0
190
    Pointer to the next object in the GC list.
191
    Additionally, lowest bit is used temporary for
192
    NEXT_MASK_UNREACHABLE flag described below.
193
194
NEXT_MASK_UNREACHABLE
195
    move_unreachable() then moves objects not reachable (whether directly or
196
    indirectly) from outside the generation into an "unreachable" set and
197
    set this flag.
198
199
    Objects that are found to be reachable have gc_refs set to 1.
200
    When this flag is set for the reachable object, the object must be in
201
    "unreachable" set.
202
    The flag is unset and the object is moved back to "reachable" set.
203
204
    move_legacy_finalizers() will remove this flag from "unreachable" set.
205
*/
206
207
/*** list functions ***/
208
209
static inline void
210
gc_list_init(PyGC_Head *list)
211
801k
{
212
    // List header must not have flags.
213
    // We can assign pointer by simple cast.
214
801k
    list->_gc_prev = (uintptr_t)list;
215
801k
    list->_gc_next = (uintptr_t)list;
216
801k
}
217
218
static inline int
219
gc_list_is_empty(PyGC_Head *list)
220
14.8M
{
221
14.8M
    return (list->_gc_next == (uintptr_t)list);
222
14.8M
}
223
224
/* Append `node` to `list`. */
225
static inline void
226
gc_list_append(PyGC_Head *node, PyGC_Head *list)
227
40.6M
{
228
40.6M
    PyGC_Head *last = (PyGC_Head *)list->_gc_prev;
229
230
    // last <-> node
231
40.6M
    _PyGCHead_SET_PREV(node, last);
232
40.6M
    _PyGCHead_SET_NEXT(last, node);
233
234
    // node <-> list
235
40.6M
    _PyGCHead_SET_NEXT(node, list);
236
40.6M
    list->_gc_prev = (uintptr_t)node;
237
40.6M
}
238
239
/* Remove `node` from the gc list it's currently in. */
240
static inline void
241
gc_list_remove(PyGC_Head *node)
242
0
{
243
0
    PyGC_Head *prev = GC_PREV(node);
244
0
    PyGC_Head *next = GC_NEXT(node);
245
246
0
    _PyGCHead_SET_NEXT(prev, next);
247
0
    _PyGCHead_SET_PREV(next, prev);
248
249
0
    node->_gc_next = 0; /* object is not currently tracked */
250
0
}
251
252
/* Move `node` from the gc list it's currently in (which is not explicitly
253
 * named here) to the end of `list`.  This is semantically the same as
254
 * gc_list_remove(node) followed by gc_list_append(node, list).
255
 */
256
static void
257
gc_list_move(PyGC_Head *node, PyGC_Head *list)
258
13.5M
{
259
    /* Unlink from current list. */
260
13.5M
    PyGC_Head *from_prev = GC_PREV(node);
261
13.5M
    PyGC_Head *from_next = GC_NEXT(node);
262
13.5M
    _PyGCHead_SET_NEXT(from_prev, from_next);
263
13.5M
    _PyGCHead_SET_PREV(from_next, from_prev);
264
265
    /* Relink at end of new list. */
266
    // list must not have flags.  So we can skip macros.
267
13.5M
    PyGC_Head *to_prev = (PyGC_Head*)list->_gc_prev;
268
13.5M
    _PyGCHead_SET_PREV(node, to_prev);
269
13.5M
    _PyGCHead_SET_NEXT(to_prev, node);
270
13.5M
    list->_gc_prev = (uintptr_t)node;
271
13.5M
    _PyGCHead_SET_NEXT(node, list);
272
13.5M
}
273
274
/* append list `from` onto list `to`; `from` becomes an empty list */
275
static void
276
gc_list_merge(PyGC_Head *from, PyGC_Head *to)
277
360k
{
278
360k
    assert(from != to);
279
360k
    if (!gc_list_is_empty(from)) {
280
95.9k
        PyGC_Head *to_tail = GC_PREV(to);
281
95.9k
        PyGC_Head *from_head = GC_NEXT(from);
282
95.9k
        PyGC_Head *from_tail = GC_PREV(from);
283
95.9k
        assert(from_head != from);
284
95.9k
        assert(from_tail != from);
285
286
95.9k
        _PyGCHead_SET_NEXT(to_tail, from_head);
287
95.9k
        _PyGCHead_SET_PREV(from_head, to_tail);
288
289
95.9k
        _PyGCHead_SET_NEXT(from_tail, to);
290
95.9k
        _PyGCHead_SET_PREV(to, from_tail);
291
95.9k
    }
292
360k
    gc_list_init(from);
293
360k
}
294
295
static Py_ssize_t
296
gc_list_size(PyGC_Head *list)
297
96.0k
{
298
96.0k
    PyGC_Head *gc;
299
96.0k
    Py_ssize_t n = 0;
300
201M
    for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
301
201M
        n++;
302
201M
    }
303
96.0k
    return n;
304
96.0k
}
305
306
/* Walk the list and mark all objects as non-collecting */
307
static inline void
308
gc_list_clear_collecting(PyGC_Head *collectable)
309
88.1k
{
310
88.1k
    PyGC_Head *gc;
311
9.76M
    for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) {
312
9.68M
        gc_clear_collecting(gc);
313
9.68M
    }
314
88.1k
}
315
316
/* Append objects in a GC list to a Python list.
317
 * Return 0 if all OK, < 0 if error (out of memory for list)
318
 */
319
static int
320
append_objects(PyObject *py_list, PyGC_Head *gc_list)
321
0
{
322
0
    PyGC_Head *gc;
323
0
    for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
324
0
        PyObject *op = FROM_GC(gc);
325
0
        if (op != py_list) {
326
0
            if (PyList_Append(py_list, op)) {
327
0
                return -1; /* exception */
328
0
            }
329
0
        }
330
0
    }
331
0
    return 0;
332
0
}
333
334
// Constants for validate_list's flags argument.
335
enum flagstates {collecting_clear_unreachable_clear,
336
                 collecting_clear_unreachable_set,
337
                 collecting_set_unreachable_clear,
338
                 collecting_set_unreachable_set};
339
340
#ifdef GC_DEBUG
341
// validate_list checks list consistency.  And it works as document
342
// describing when flags are expected to be set / unset.
343
// `head` must be a doubly-linked gc list, although it's fine (expected!) if
344
// the prev and next pointers are "polluted" with flags.
345
// What's checked:
346
// - The `head` pointers are not polluted.
347
// - The objects' PREV_MASK_COLLECTING and NEXT_MASK_UNREACHABLE flags are all
348
//   `set or clear, as specified by the 'flags' argument.
349
// - The prev and next pointers are mutually consistent.
350
static void
351
validate_list(PyGC_Head *head, enum flagstates flags)
352
{
353
    assert((head->_gc_prev & PREV_MASK_COLLECTING) == 0);
354
    assert((head->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
355
    uintptr_t prev_value = 0, next_value = 0;
356
    switch (flags) {
357
        case collecting_clear_unreachable_clear:
358
            break;
359
        case collecting_set_unreachable_clear:
360
            prev_value = PREV_MASK_COLLECTING;
361
            break;
362
        case collecting_clear_unreachable_set:
363
            next_value = NEXT_MASK_UNREACHABLE;
364
            break;
365
        case collecting_set_unreachable_set:
366
            prev_value = PREV_MASK_COLLECTING;
367
            next_value = NEXT_MASK_UNREACHABLE;
368
            break;
369
        default:
370
            assert(! "bad internal flags argument");
371
    }
372
    PyGC_Head *prev = head;
373
    PyGC_Head *gc = GC_NEXT(head);
374
    while (gc != head) {
375
        PyGC_Head *trueprev = GC_PREV(gc);
376
        PyGC_Head *truenext = (PyGC_Head *)(gc->_gc_next  & ~NEXT_MASK_UNREACHABLE);
377
        assert(truenext != NULL);
378
        assert(trueprev == prev);
379
        assert((gc->_gc_prev & PREV_MASK_COLLECTING) == prev_value);
380
        assert((gc->_gc_next & NEXT_MASK_UNREACHABLE) == next_value);
381
        prev = gc;
382
        gc = truenext;
383
    }
384
    assert(prev == GC_PREV(head));
385
}
386
#else
387
1.14M
#define validate_list(x, y) do{}while(0)
388
#endif
389
390
/*** end of list stuff ***/
391
392
393
/* Set all gc_refs = ob_refcnt.  After this, gc_refs is > 0 and
394
 * PREV_MASK_COLLECTING bit is set for all objects in containers.
395
 */
396
static Py_ssize_t
397
update_refs(PyGC_Head *containers)
398
176k
{
399
176k
    PyGC_Head *next;
400
176k
    PyGC_Head *gc = GC_NEXT(containers);
401
176k
    Py_ssize_t candidates = 0;
402
403
363M
    while (gc != containers) {
404
363M
        next = GC_NEXT(gc);
405
363M
        PyObject *op = FROM_GC(gc);
406
363M
        if (_Py_IsImmortal(op)) {
407
0
            assert(!_Py_IsStaticImmortal(op));
408
0
            _PyObject_GC_UNTRACK(op);
409
0
            gc = next;
410
0
            continue;
411
0
        }
412
363M
        gc_reset_refs(gc, Py_REFCNT(op));
413
        /* Python's cyclic gc should never see an incoming refcount
414
         * of 0:  if something decref'ed to 0, it should have been
415
         * deallocated immediately at that time.
416
         * Possible cause (if the assert triggers):  a tp_dealloc
417
         * routine left a gc-aware object tracked during its teardown
418
         * phase, and did something-- or allowed something to happen --
419
         * that called back into Python.  gc can trigger then, and may
420
         * see the still-tracked dying object.  Before this assert
421
         * was added, such mistakes went on to allow gc to try to
422
         * delete the object again.  In a debug build, that caused
423
         * a mysterious segfault, when _Py_ForgetReference tried
424
         * to remove the object from the doubly-linked list of all
425
         * objects a second time.  In a release build, an actual
426
         * double deallocation occurred, which leads to corruption
427
         * of the allocator's internal bookkeeping pointers.  That's
428
         * so serious that maybe this should be a release-build
429
         * check instead of an assert?
430
         */
431
363M
        _PyObject_ASSERT(op, gc_get_refs(gc) != 0);
432
363M
        gc = next;
433
363M
        candidates++;
434
363M
    }
435
176k
    return candidates;
436
176k
}
437
438
/* A traversal callback for subtract_refs. */
439
static int
440
visit_decref(PyObject *op, void *parent)
441
1.19G
{
442
1.19G
    OBJECT_STAT_INC(object_visits);
443
1.19G
    _PyObject_ASSERT(_PyObject_CAST(parent), !_PyObject_IsFreed(op));
444
445
1.19G
    if (_PyObject_IS_GC(op)) {
446
605M
        PyGC_Head *gc = AS_GC(op);
447
        /* We're only interested in gc_refs for objects in the
448
         * generation being collected, which can be recognized
449
         * because only they have positive gc_refs.
450
         */
451
605M
        if (gc_is_collecting(gc)) {
452
310M
            gc_decref(gc);
453
310M
        }
454
605M
    }
455
1.19G
    return 0;
456
1.19G
}
457
458
int
459
_PyGC_VisitStackRef(_PyStackRef *ref, visitproc visit, void *arg)
460
4.61M
{
461
    // This is a bit tricky! We want to ignore stackrefs with embedded
462
    // refcounts when computing the incoming references, but otherwise treat
463
    // them like normal.
464
4.61M
    assert(!PyStackRef_IsTaggedInt(*ref));
465
4.61M
    if (!PyStackRef_RefcountOnObject(*ref) && (visit == visit_decref)) {
466
301k
        return 0;
467
301k
    }
468
4.30M
    Py_VISIT(PyStackRef_AsPyObjectBorrow(*ref));
469
4.30M
    return 0;
470
4.30M
}
471
472
int
473
_PyGC_VisitFrameStack(_PyInterpreterFrame *frame, visitproc visit, void *arg)
474
781k
{
475
781k
    _PyStackRef *ref = _PyFrame_GetLocalsArray(frame);
476
    /* locals and stack */
477
5.26M
    for (; ref < frame->stackpointer; ref++) {
478
4.48M
        if (!PyStackRef_IsTaggedInt(*ref)) {
479
4.47M
            _Py_VISIT_STACKREF(*ref);
480
4.47M
        }
481
4.48M
    }
482
781k
    return 0;
483
781k
}
484
485
/* Subtract internal references from gc_refs.  After this, gc_refs is >= 0
486
 * for all objects in containers. The ones with gc_refs > 0 are directly
487
 * reachable from outside containers, and so can't be collected.
488
 */
489
static void
490
subtract_refs(PyGC_Head *containers)
491
176k
{
492
176k
    traverseproc traverse;
493
176k
    PyGC_Head *gc = GC_NEXT(containers);
494
363M
    for (; gc != containers; gc = GC_NEXT(gc)) {
495
363M
        PyObject *op = FROM_GC(gc);
496
363M
        traverse = Py_TYPE(op)->tp_traverse;
497
363M
        (void) traverse(op,
498
363M
                        visit_decref,
499
363M
                        op);
500
363M
    }
501
176k
}
502
503
/* A traversal callback for move_unreachable. */
504
static int
505
visit_reachable(PyObject *op, void *arg)
506
1.11G
{
507
1.11G
    PyGC_Head *reachable = arg;
508
1.11G
    OBJECT_STAT_INC(object_visits);
509
1.11G
    if (!_PyObject_IS_GC(op)) {
510
547M
        return 0;
511
547M
    }
512
513
571M
    PyGC_Head *gc = AS_GC(op);
514
571M
    const Py_ssize_t gc_refs = gc_get_refs(gc);
515
516
    // Ignore objects in other generation.
517
    // This also skips objects "to the left" of the current position in
518
    // move_unreachable's scan of the 'young' list - they've already been
519
    // traversed, and no longer have the PREV_MASK_COLLECTING flag.
520
571M
    if (! gc_is_collecting(gc)) {
521
358M
        return 0;
522
358M
    }
523
    // It would be a logic error elsewhere if the collecting flag were set on
524
    // an untracked object.
525
212M
    _PyObject_ASSERT(op, gc->_gc_next != 0);
526
527
212M
    if (gc->_gc_next & NEXT_MASK_UNREACHABLE) {
528
        /* This had gc_refs = 0 when move_unreachable got
529
         * to it, but turns out it's reachable after all.
530
         * Move it back to move_unreachable's 'young' list,
531
         * and move_unreachable will eventually get to it
532
         * again.
533
         */
534
        // Manually unlink gc from unreachable list because the list functions
535
        // don't work right in the presence of NEXT_MASK_UNREACHABLE flags.
536
40.6M
        PyGC_Head *prev = GC_PREV(gc);
537
40.6M
        PyGC_Head *next = (PyGC_Head*)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE);
538
40.6M
        _PyObject_ASSERT(FROM_GC(prev),
539
40.6M
                         prev->_gc_next & NEXT_MASK_UNREACHABLE);
540
40.6M
        _PyObject_ASSERT(FROM_GC(next),
541
40.6M
                         next->_gc_next & NEXT_MASK_UNREACHABLE);
542
40.6M
        prev->_gc_next = gc->_gc_next;  // copy NEXT_MASK_UNREACHABLE
543
40.6M
        _PyGCHead_SET_PREV(next, prev);
544
545
40.6M
        gc_list_append(gc, reachable);
546
40.6M
        gc_set_refs(gc, 1);
547
40.6M
    }
548
172M
    else if (gc_refs == 0) {
549
        /* This is in move_unreachable's 'young' list, but
550
         * the traversal hasn't yet gotten to it.  All
551
         * we need to do is tell move_unreachable that it's
552
         * reachable.
553
         */
554
165M
        gc_set_refs(gc, 1);
555
165M
    }
556
    /* Else there's nothing to do.
557
     * If gc_refs > 0, it must be in move_unreachable's 'young'
558
     * list, and move_unreachable will eventually get to it.
559
     */
560
6.99M
    else {
561
6.99M
        _PyObject_ASSERT_WITH_MSG(op, gc_refs > 0, "refcount is too small");
562
6.99M
    }
563
212M
    return 0;
564
571M
}
565
566
/* Move the unreachable objects from young to unreachable.  After this,
567
 * all objects in young don't have PREV_MASK_COLLECTING flag and
568
 * unreachable have the flag.
569
 * All objects in young after this are directly or indirectly reachable
570
 * from outside the original young; and all objects in unreachable are
571
 * not.
572
 *
573
 * This function restores _gc_prev pointer.  young and unreachable are
574
 * doubly linked list after this function.
575
 * But _gc_next in unreachable list has NEXT_MASK_UNREACHABLE flag.
576
 * So we can not gc_list_* functions for unreachable until we remove the flag.
577
 */
578
static void
579
move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
580
176k
{
581
    // previous elem in the young list, used for restore gc_prev.
582
176k
    PyGC_Head *prev = young;
583
176k
    PyGC_Head *gc = GC_NEXT(young);
584
585
    /* Invariants:  all objects "to the left" of us in young are reachable
586
     * (directly or indirectly) from outside the young list as it was at entry.
587
     *
588
     * All other objects from the original young "to the left" of us are in
589
     * unreachable now, and have NEXT_MASK_UNREACHABLE.  All objects to the
590
     * left of us in 'young' now have been scanned, and no objects here
591
     * or to the right have been scanned yet.
592
     */
593
594
404M
    while (gc != young) {
595
404M
        if (gc_get_refs(gc)) {
596
            /* gc is definitely reachable from outside the
597
             * original 'young'.  Mark it as such, and traverse
598
             * its pointers to find any other objects that may
599
             * be directly reachable from it.  Note that the
600
             * call to tp_traverse may append objects to young,
601
             * so we have to wait until it returns to determine
602
             * the next object to visit.
603
             */
604
344M
            PyObject *op = FROM_GC(gc);
605
344M
            traverseproc traverse = Py_TYPE(op)->tp_traverse;
606
344M
            _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(gc) > 0,
607
344M
                                      "refcount is too small");
608
            // NOTE: visit_reachable may change gc->_gc_next when
609
            // young->_gc_prev == gc.  Don't do gc = GC_NEXT(gc) before!
610
344M
            (void) traverse(op,
611
344M
                    visit_reachable,
612
344M
                    (void *)young);
613
            // relink gc_prev to prev element.
614
344M
            _PyGCHead_SET_PREV(gc, prev);
615
            // gc is not COLLECTING state after here.
616
344M
            gc_clear_collecting(gc);
617
344M
            prev = gc;
618
344M
        }
619
60.0M
        else {
620
            /* This *may* be unreachable.  To make progress,
621
             * assume it is.  gc isn't directly reachable from
622
             * any object we've already traversed, but may be
623
             * reachable from an object we haven't gotten to yet.
624
             * visit_reachable will eventually move gc back into
625
             * young if that's so, and we'll see it again.
626
             */
627
            // Move gc to unreachable.
628
            // No need to gc->next->prev = prev because it is single linked.
629
60.0M
            prev->_gc_next = gc->_gc_next;
630
631
            // We can't use gc_list_append() here because we use
632
            // NEXT_MASK_UNREACHABLE here.
633
60.0M
            PyGC_Head *last = GC_PREV(unreachable);
634
            // NOTE: Since all objects in unreachable set has
635
            // NEXT_MASK_UNREACHABLE flag, we set it unconditionally.
636
            // But this may pollute the unreachable list head's 'next' pointer
637
            // too. That's semantically senseless but expedient here - the
638
            // damage is repaired when this function ends.
639
60.0M
            last->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)gc);
640
60.0M
            _PyGCHead_SET_PREV(gc, last);
641
60.0M
            gc->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)unreachable);
642
60.0M
            unreachable->_gc_prev = (uintptr_t)gc;
643
60.0M
        }
644
404M
        gc = (PyGC_Head*)prev->_gc_next;
645
404M
    }
646
    // young->_gc_prev must be last element remained in the list.
647
176k
    young->_gc_prev = (uintptr_t)prev;
648
    // don't let the pollution of the list head's next pointer leak
649
176k
    unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE;
650
176k
}
651
652
/* In theory, all tuples should be younger than the
653
* objects they refer to, as tuples are immortal.
654
* Therefore, untracking tuples in oldest-first order in the
655
* young generation before promoting them should have tracked
656
* all the tuples that can be untracked.
657
*
658
* Unfortunately, the C API allows tuples to be created
659
* and then filled in. So this won't untrack all tuples
660
* that can be untracked. It should untrack most of them
661
* and is much faster than a more complex approach that
662
* would untrack all relevant tuples.
663
*/
664
static void
665
untrack_tuples(PyGC_Head *head)
666
88.1k
{
667
88.1k
    PyGC_Head *gc = GC_NEXT(head);
668
344M
    while (gc != head) {
669
344M
        PyObject *op = FROM_GC(gc);
670
344M
        PyGC_Head *next = GC_NEXT(gc);
671
344M
        if (PyTuple_CheckExact(op)) {
672
174M
            _PyTuple_MaybeUntrack(op);
673
174M
        }
674
344M
        gc = next;
675
344M
    }
676
88.1k
}
677
678
/* Return true if object has a pre-PEP 442 finalization method. */
679
static int
680
has_legacy_finalizer(PyObject *op)
681
9.68M
{
682
9.68M
    return Py_TYPE(op)->tp_del != NULL;
683
9.68M
}
684
685
/* Move the objects in unreachable with tp_del slots into `finalizers`.
686
 *
687
 * This function also removes NEXT_MASK_UNREACHABLE flag
688
 * from _gc_next in unreachable.
689
 */
690
static void
691
move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
692
88.1k
{
693
88.1k
    PyGC_Head *gc, *next;
694
88.1k
    _PyObject_ASSERT(
695
88.1k
        FROM_GC(unreachable),
696
88.1k
        (unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
697
698
    /* March over unreachable.  Move objects with finalizers into
699
     * `finalizers`.
700
     */
701
9.76M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
702
9.68M
        PyObject *op = FROM_GC(gc);
703
704
9.68M
        _PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE);
705
9.68M
        gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
706
9.68M
        next = (PyGC_Head*)gc->_gc_next;
707
708
9.68M
        if (has_legacy_finalizer(op)) {
709
0
            gc_clear_collecting(gc);
710
0
            gc_list_move(gc, finalizers);
711
0
        }
712
9.68M
    }
713
88.1k
}
714
715
static inline void
716
clear_unreachable_mask(PyGC_Head *unreachable)
717
88.1k
{
718
    /* Check that the list head does not have the unreachable bit set */
719
88.1k
    _PyObject_ASSERT(
720
88.1k
        FROM_GC(unreachable),
721
88.1k
        ((uintptr_t)unreachable & NEXT_MASK_UNREACHABLE) == 0);
722
88.1k
    _PyObject_ASSERT(
723
88.1k
        FROM_GC(unreachable),
724
88.1k
        (unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
725
726
88.1k
    PyGC_Head *gc, *next;
727
9.76M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
728
9.68M
        _PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE);
729
9.68M
        gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
730
9.68M
        next = (PyGC_Head*)gc->_gc_next;
731
9.68M
    }
732
88.1k
    validate_list(unreachable, collecting_set_unreachable_clear);
733
88.1k
}
734
735
/* A traversal callback for move_legacy_finalizer_reachable. */
736
static int
737
visit_move(PyObject *op, void *arg)
738
0
{
739
0
    PyGC_Head *tolist = arg;
740
0
    OBJECT_STAT_INC(object_visits);
741
0
    if (_PyObject_IS_GC(op)) {
742
0
        PyGC_Head *gc = AS_GC(op);
743
0
        if (gc_is_collecting(gc)) {
744
0
            gc_list_move(gc, tolist);
745
0
            gc_clear_collecting(gc);
746
0
        }
747
0
    }
748
0
    return 0;
749
0
}
750
751
/* Move objects that are reachable from finalizers, from the unreachable set
752
 * into finalizers set.
753
 */
754
static void
755
move_legacy_finalizer_reachable(PyGC_Head *finalizers)
756
88.1k
{
757
88.1k
    traverseproc traverse;
758
88.1k
    PyGC_Head *gc = GC_NEXT(finalizers);
759
88.1k
    for (; gc != finalizers; gc = GC_NEXT(gc)) {
760
        /* Note that the finalizers list may grow during this. */
761
0
        traverse = Py_TYPE(FROM_GC(gc))->tp_traverse;
762
0
        (void) traverse(FROM_GC(gc),
763
0
                        visit_move,
764
0
                        (void *)finalizers);
765
0
    }
766
88.1k
}
767
768
/* Handle weakref callbacks.  Note that it's possible for such weakrefs to be
769
 * outside the unreachable set -- indeed, those are precisely the weakrefs
770
 * whose callbacks must be invoked.  See gc_weakref.txt for overview & some
771
 * details.
772
 *
773
 * The clearing of weakrefs is suble and must be done carefully, as there was
774
 * previous bugs related to this.  First, weakrefs to the unreachable set of
775
 * objects must be cleared before we start calling `tp_clear`.  If we don't,
776
 * those weakrefs can reveal unreachable objects to Python-level code and that
777
 * is not safe.  Many objects are not usable after `tp_clear` has been called
778
 * and could even cause crashes if accessed (see bpo-38006 and gh-91636 as
779
 * example bugs).
780
 *
781
 * Weakrefs with callbacks always need to be cleared before executing the
782
 * callback.  That's because sometimes a callback will call the ref object,
783
 * to check if the reference is actually dead (KeyedRef does this, for
784
 * example).  We want to indicate that it is dead, even though it is possible
785
 * a finalizer might resurrect it.  Clearing also prevents the callback from
786
 * executing more than once.
787
 *
788
 * Since Python 2.3, all weakrefs to cyclic garbage have been cleared *before*
789
 * calling finalizers.  However, since tp_subclasses started being necessary
790
 * to invalidate caches (e.g. by PyType_Modified), that clearing has created
791
 * a bug.  If the weakref to the subclass is cleared before a finalizer is
792
 * called, the cache may not be correctly invalidated.  That can lead to
793
 * segfaults since the caches can refer to deallocated objects (GH-135552
794
 * is an example).  Now, we delay the clear of weakrefs without callbacks
795
 * until *after* finalizers have been executed.  That means weakrefs without
796
 * callbacks are still usable while finalizers are being executed.
797
 *
798
 * The weakrefs that are inside the unreachable set must also be cleared.
799
 * The reason this is required is not immediately obvious.  If the weakref
800
 * refers to an object outside of the unreachable set, that object might go
801
 * away when we start clearing objects.  Normally, the object should also be
802
 * part of the unreachable set but that's not true in the case of incomplete
803
 * or missing `tp_traverse` methods.  When that object goes away, the callback
804
 * for weakref can be executed and that could reveal unreachable objects to
805
 * Python-level code.  See bpo-38006 as an example bug.
806
 */
807
static int
808
handle_weakref_callbacks(PyGC_Head *unreachable, PyGC_Head *old)
809
88.1k
{
810
88.1k
    PyGC_Head *gc;
811
88.1k
    PyGC_Head wrcb_to_call;     /* weakrefs with callbacks to call */
812
88.1k
    PyGC_Head *next;
813
88.1k
    int num_freed = 0;
814
815
88.1k
    gc_list_init(&wrcb_to_call);
816
817
    /* Find all weakrefs with callbacks and move into `wrcb_to_call` if the
818
     * callback needs to be invoked. We make another pass over wrcb_to_call,
819
     * invoking callbacks, after this pass completes.
820
     */
821
9.76M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
822
9.68M
        PyWeakReference **wrlist;
823
824
9.68M
        PyObject *op = FROM_GC(gc);
825
9.68M
        next = GC_NEXT(gc);
826
827
9.68M
        if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) {
828
7.45M
            continue;
829
7.45M
        }
830
831
        /* It supports weakrefs.  Does it have any?
832
         *
833
         * This is never triggered for static types so we can avoid the
834
         * (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR().
835
         */
836
2.22M
        wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op);
837
838
        /* `op` may have some weakrefs.  March over the list and move the
839
         * weakrefs with callbacks that must be called into wrcb_to_call.
840
         */
841
2.22M
        PyWeakReference *next_wr;
842
2.50M
        for (PyWeakReference *wr = *wrlist; wr != NULL; wr = next_wr) {
843
            // Get the next list element to get iterator progress if we omit
844
            // clearing of the weakref (because _PyWeakref_ClearRef changes
845
            // next pointer in the wrlist).
846
280k
            next_wr = wr->wr_next;
847
848
280k
            if (wr->wr_callback == NULL) {
849
                /* no callback */
850
237k
                continue;
851
237k
            }
852
853
            // Clear the weakref.  See the comments above this function for
854
            // reasons why we need to clear weakrefs that have callbacks.
855
            // Note that _PyWeakref_ClearRef clears the weakref but leaves the
856
            // callback pointer intact.  Obscure: it also changes *wrlist.
857
43.5k
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
858
43.5k
            _PyWeakref_ClearRef(wr);
859
43.5k
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
860
861
            /* Headache time.  `op` is going away, and is weakly referenced by
862
             * `wr`, which has a callback.  Should the callback be invoked?  If wr
863
             * is also trash, no:
864
             *
865
             * 1. There's no need to call it.  The object and the weakref are
866
             *    both going away, so it's legitimate to pretend the weakref is
867
             *    going away first.  The user has to ensure a weakref outlives its
868
             *    referent if they want a guarantee that the wr callback will get
869
             *    invoked.
870
             *
871
             * 2. It may be catastrophic to call it.  If the callback is also in
872
             *    cyclic trash (CT), then although the CT is unreachable from
873
             *    outside the current generation, CT may be reachable from the
874
             *    callback.  Then the callback could resurrect insane objects.
875
             *
876
             * Since the callback is never needed and may be unsafe in this
877
             * case, wr is simply left in the unreachable set.  Note that
878
             * clear_weakrefs() will ensure its callback will not trigger
879
             * inside delete_garbage().
880
             *
881
             * OTOH, if wr isn't part of CT, we should invoke the callback:  the
882
             * weakref outlived the trash.  Note that since wr isn't CT in this
883
             * case, its callback can't be CT either -- wr acted as an external
884
             * root to this generation, and therefore its callback did too.  So
885
             * nothing in CT is reachable from the callback either, so it's hard
886
             * to imagine how calling it later could create a problem for us.  wr
887
             * is moved to wrcb_to_call in this case.
888
             */
889
43.5k
            if (gc_is_collecting(AS_GC((PyObject *)wr))) {
890
0
                continue;
891
0
            }
892
893
            /* Create a new reference so that wr can't go away
894
             * before we can process it again.
895
             */
896
43.5k
            Py_INCREF(wr);
897
898
            /* Move wr to wrcb_to_call, for the next pass. */
899
43.5k
            PyGC_Head *wrasgc = AS_GC((PyObject *)wr);
900
            // wrasgc is reachable, but next isn't, so they can't be the same
901
43.5k
            _PyObject_ASSERT((PyObject *)wr, wrasgc != next);
902
43.5k
            gc_list_move(wrasgc, &wrcb_to_call);
903
43.5k
        }
904
2.22M
    }
905
906
    /* Invoke the callbacks we decided to honor.  It's safe to invoke them
907
     * because they can't reference unreachable objects.
908
     */
909
131k
    while (! gc_list_is_empty(&wrcb_to_call)) {
910
43.5k
        PyObject *temp;
911
43.5k
        PyObject *callback;
912
913
43.5k
        gc = (PyGC_Head*)wrcb_to_call._gc_next;
914
43.5k
        PyObject *op = FROM_GC(gc);
915
43.5k
        _PyObject_ASSERT(op, PyWeakref_Check(op));
916
43.5k
        PyWeakReference *wr = (PyWeakReference *)op;
917
43.5k
        callback = wr->wr_callback;
918
43.5k
        _PyObject_ASSERT(op, callback != NULL);
919
920
        /* copy-paste of weakrefobject.c's handle_callback() */
921
43.5k
        temp = PyObject_CallOneArg(callback, (PyObject *)wr);
922
43.5k
        if (temp == NULL) {
923
0
            PyErr_FormatUnraisable("Exception ignored on "
924
0
                                   "calling weakref callback %R", callback);
925
0
        }
926
43.5k
        else {
927
43.5k
            Py_DECREF(temp);
928
43.5k
        }
929
930
        /* Give up the reference we created in the first pass.  When
931
         * op's refcount hits 0 (which it may or may not do right now),
932
         * op's tp_dealloc will decref op->wr_callback too.  Note
933
         * that the refcount probably will hit 0 now, and because this
934
         * weakref was reachable to begin with, gc didn't already
935
         * add it to its count of freed objects.  Example:  a reachable
936
         * weak value dict maps some key to this reachable weakref.
937
         * The callback removes this key->weakref mapping from the
938
         * dict, leaving no other references to the weakref (excepting
939
         * ours).
940
         */
941
43.5k
        Py_DECREF(op);
942
43.5k
        if (wrcb_to_call._gc_next == (uintptr_t)gc) {
943
            /* object is still alive -- move it */
944
0
            gc_list_move(gc, old);
945
0
        }
946
43.5k
        else {
947
43.5k
            ++num_freed;
948
43.5k
        }
949
43.5k
    }
950
951
88.1k
    return num_freed;
952
88.1k
}
953
954
/* Clear all weakrefs to unreachable objects.  When this returns, no object in
955
 * `unreachable` is weakly referenced anymore.  See the comments above
956
 * handle_weakref_callbacks() for why these weakrefs need to be cleared.
957
 */
958
static void
959
clear_weakrefs(PyGC_Head *unreachable)
960
88.1k
{
961
88.1k
    PyGC_Head *gc;
962
88.1k
    PyGC_Head *next;
963
964
9.76M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
965
9.68M
        PyWeakReference **wrlist;
966
967
9.68M
        PyObject *op = FROM_GC(gc);
968
9.68M
        next = GC_NEXT(gc);
969
970
9.68M
        if (PyWeakref_Check(op)) {
971
            /* A weakref inside the unreachable set is always cleared. See
972
             * the comments above handle_weakref_callbacks() for why these
973
             * must be cleared.
974
             */
975
8
            _PyWeakref_ClearRef((PyWeakReference *)op);
976
8
        }
977
978
9.68M
        if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) {
979
7.45M
            continue;
980
7.45M
        }
981
982
        /* It supports weakrefs.  Does it have any?
983
         *
984
         * This is never triggered for static types so we can avoid the
985
         * (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR().
986
         */
987
2.22M
        wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op);
988
989
        /* `op` may have some weakrefs.  March over the list, clear
990
         * all the weakrefs.
991
         */
992
2.46M
        for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) {
993
            /* _PyWeakref_ClearRef clears the weakref but leaves
994
             * the callback pointer intact.  Obscure:  it also
995
             * changes *wrlist.
996
             */
997
237k
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
998
237k
            _PyWeakref_ClearRef(wr);
999
237k
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
1000
237k
        }
1001
2.22M
    }
1002
88.1k
}
1003
1004
static void
1005
debug_cycle(const char *msg, PyObject *op)
1006
0
{
1007
0
    PySys_FormatStderr("gc: %s <%s %p>\n",
1008
0
                       msg, Py_TYPE(op)->tp_name, op);
1009
0
}
1010
1011
/* Handle uncollectable garbage (cycles with tp_del slots, and stuff reachable
1012
 * only from such cycles).
1013
 * If _PyGC_DEBUG_SAVEALL, all objects in finalizers are appended to the module
1014
 * garbage list (a Python list), else only the objects in finalizers with
1015
 * __del__ methods are appended to garbage.  All objects in finalizers are
1016
 * merged into the old list regardless.
1017
 */
1018
static void
1019
handle_legacy_finalizers(PyThreadState *tstate,
1020
                         GCState *gcstate,
1021
                         PyGC_Head *finalizers, PyGC_Head *old)
1022
88.1k
{
1023
88.1k
    assert(!_PyErr_Occurred(tstate));
1024
88.1k
    assert(gcstate->garbage != NULL);
1025
1026
88.1k
    PyGC_Head *gc = GC_NEXT(finalizers);
1027
88.1k
    for (; gc != finalizers; gc = GC_NEXT(gc)) {
1028
0
        PyObject *op = FROM_GC(gc);
1029
1030
0
        if ((gcstate->debug & _PyGC_DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
1031
0
            if (PyList_Append(gcstate->garbage, op) < 0) {
1032
0
                _PyErr_Clear(tstate);
1033
0
                break;
1034
0
            }
1035
0
        }
1036
0
    }
1037
1038
88.1k
    gc_list_merge(finalizers, old);
1039
88.1k
}
1040
1041
/* Run first-time finalizers (if any) on all the objects in collectable.
1042
 * Note that this may remove some (or even all) of the objects from the
1043
 * list, due to refcounts falling to 0.
1044
 */
1045
static void
1046
finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable)
1047
88.1k
{
1048
88.1k
    destructor finalize;
1049
88.1k
    PyGC_Head seen;
1050
1051
    /* While we're going through the loop, `finalize(op)` may cause op, or
1052
     * other objects, to be reclaimed via refcounts falling to zero.  So
1053
     * there's little we can rely on about the structure of the input
1054
     * `collectable` list across iterations.  For safety, we always take the
1055
     * first object in that list and move it to a temporary `seen` list.
1056
     * If objects vanish from the `collectable` and `seen` lists we don't
1057
     * care.
1058
     */
1059
88.1k
    gc_list_init(&seen);
1060
1061
9.76M
    while (!gc_list_is_empty(collectable)) {
1062
9.68M
        PyGC_Head *gc = GC_NEXT(collectable);
1063
9.68M
        PyObject *op = FROM_GC(gc);
1064
9.68M
        gc_list_move(gc, &seen);
1065
9.68M
        if (!_PyGC_FINALIZED(op) &&
1066
9.68M
            (finalize = Py_TYPE(op)->tp_finalize) != NULL)
1067
1
        {
1068
1
            _PyGC_SET_FINALIZED(op);
1069
1
            Py_INCREF(op);
1070
1
            finalize(op);
1071
1
            assert(!_PyErr_Occurred(tstate));
1072
1
            Py_DECREF(op);
1073
1
        }
1074
9.68M
    }
1075
88.1k
    gc_list_merge(&seen, collectable);
1076
88.1k
}
1077
1078
/* Break reference cycles by clearing the containers involved.  This is
1079
 * tricky business as the lists can be changing and we don't know which
1080
 * objects may be freed.  It is possible I screwed something up here.
1081
 */
1082
static void
1083
delete_garbage(PyThreadState *tstate, GCState *gcstate,
1084
               PyGC_Head *collectable, PyGC_Head *old)
1085
88.1k
{
1086
88.1k
    assert(!_PyErr_Occurred(tstate));
1087
1088
4.55M
    while (!gc_list_is_empty(collectable)) {
1089
4.46M
        PyGC_Head *gc = GC_NEXT(collectable);
1090
4.46M
        PyObject *op = FROM_GC(gc);
1091
1092
4.46M
        _PyObject_ASSERT_WITH_MSG(op, Py_REFCNT(op) > 0,
1093
4.46M
                                  "refcount is too small");
1094
1095
4.46M
        if (gcstate->debug & _PyGC_DEBUG_SAVEALL) {
1096
0
            assert(gcstate->garbage != NULL);
1097
0
            if (PyList_Append(gcstate->garbage, op) < 0) {
1098
0
                _PyErr_Clear(tstate);
1099
0
            }
1100
0
        }
1101
4.46M
        else {
1102
4.46M
            inquiry clear;
1103
4.46M
            if ((clear = Py_TYPE(op)->tp_clear) != NULL) {
1104
4.08M
                Py_INCREF(op);
1105
4.08M
                (void) clear(op);
1106
4.08M
                if (_PyErr_Occurred(tstate)) {
1107
0
                    PyErr_FormatUnraisable("Exception ignored in tp_clear of %s",
1108
0
                                           Py_TYPE(op)->tp_name);
1109
0
                }
1110
4.08M
                Py_DECREF(op);
1111
4.08M
            }
1112
4.46M
        }
1113
4.46M
        if (GC_NEXT(collectable) == gc) {
1114
            /* object is still alive, move it, it may die later */
1115
3.80M
            gc_clear_collecting(gc);
1116
3.80M
            gc_list_move(gc, old);
1117
3.80M
        }
1118
4.46M
    }
1119
88.1k
}
1120
1121
1122
// Show stats for objects in each generations
1123
static void
1124
show_stats_each_generations(GCState *gcstate)
1125
0
{
1126
0
    char buf[100];
1127
0
    size_t pos = 0;
1128
1129
0
    for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) {
1130
0
        pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos,
1131
0
                             " %zd",
1132
0
                             gc_list_size(GEN_HEAD(gcstate, i)));
1133
0
    }
1134
1135
0
    PySys_FormatStderr(
1136
0
        "gc: objects in each generation:%s\n"
1137
0
        "gc: objects in permanent generation: %zd\n",
1138
0
        buf, gc_list_size(&gcstate->permanent_generation.head));
1139
0
}
1140
1141
/* Deduce which objects among "base" are unreachable from outside the list
1142
   and move them to 'unreachable'. The process consist in the following steps:
1143
1144
1. Copy all reference counts to a different field (gc_prev is used to hold
1145
   this copy to save memory).
1146
2. Traverse all objects in "base" and visit all referred objects using
1147
   "tp_traverse" and for every visited object, subtract 1 to the reference
1148
   count (the one that we copied in the previous step). After this step, all
1149
   objects that can be reached directly from outside must have strictly positive
1150
   reference count, while all unreachable objects must have a count of exactly 0.
1151
3. Identify all unreachable objects (the ones with 0 reference count) and move
1152
   them to the "unreachable" list. This step also needs to move back to "base" all
1153
   objects that were initially marked as unreachable but are referred transitively
1154
   by the reachable objects (the ones with strictly positive reference count).
1155
1156
Contracts:
1157
1158
    * The "base" has to be a valid list with no mask set.
1159
1160
    * The "unreachable" list must be uninitialized (this function calls
1161
      gc_list_init over 'unreachable').
1162
1163
IMPORTANT: This function leaves 'unreachable' with the NEXT_MASK_UNREACHABLE
1164
flag set but it does not clear it to skip unnecessary iteration. Before the
1165
flag is cleared (for example, by using 'clear_unreachable_mask' function or
1166
by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal
1167
list and we can not use most gc_list_* functions for it. */
1168
static inline Py_ssize_t
1169
176k
deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
1170
176k
    validate_list(base, collecting_clear_unreachable_clear);
1171
    /* Using ob_refcnt and gc_refs, calculate which objects in the
1172
     * container set are reachable from outside the set (i.e., have a
1173
     * refcount greater than 0 when all the references within the
1174
     * set are taken into account).
1175
     */
1176
176k
    Py_ssize_t candidates = update_refs(base);  // gc_prev is used for gc_refs
1177
176k
    subtract_refs(base);
1178
1179
    /* Leave everything reachable from outside base in base, and move
1180
     * everything else (in base) to unreachable.
1181
     *
1182
     * NOTE:  This used to move the reachable objects into a reachable
1183
     * set instead.  But most things usually turn out to be reachable,
1184
     * so it's more efficient to move the unreachable things.  It "sounds slick"
1185
     * to move the unreachable objects, until you think about it - the reason it
1186
     * pays isn't actually obvious.
1187
     *
1188
     * Suppose we create objects A, B, C in that order.  They appear in the young
1189
     * generation in the same order.  If B points to A, and C to B, and C is
1190
     * reachable from outside, then the adjusted refcounts will be 0, 0, and 1
1191
     * respectively.
1192
     *
1193
     * When move_unreachable finds A, A is moved to the unreachable list.  The
1194
     * same for B when it's first encountered.  Then C is traversed, B is moved
1195
     * _back_ to the reachable list.  B is eventually traversed, and then A is
1196
     * moved back to the reachable list.
1197
     *
1198
     * So instead of not moving at all, the reachable objects B and A are moved
1199
     * twice each.  Why is this a win?  A straightforward algorithm to move the
1200
     * reachable objects instead would move A, B, and C once each.
1201
     *
1202
     * The key is that this dance leaves the objects in order C, B, A - it's
1203
     * reversed from the original order.  On all _subsequent_ scans, none of
1204
     * them will move.  Since most objects aren't in cycles, this can save an
1205
     * unbounded number of moves across an unbounded number of later collections.
1206
     * It can cost more only the first time the chain is scanned.
1207
     *
1208
     * Drawback:  move_unreachable is also used to find out what's still trash
1209
     * after finalizers may resurrect objects.  In _that_ case most unreachable
1210
     * objects will remain unreachable, so it would be more efficient to move
1211
     * the reachable objects instead.  But this is a one-time cost, probably not
1212
     * worth complicating the code to speed just a little.
1213
     */
1214
176k
    gc_list_init(unreachable);
1215
176k
    move_unreachable(base, unreachable);  // gc_prev is pointer again
1216
176k
    validate_list(base, collecting_clear_unreachable_clear);
1217
176k
    validate_list(unreachable, collecting_set_unreachable_set);
1218
176k
    return candidates;
1219
176k
}
1220
1221
/* Handle objects that may have resurrected after a call to 'finalize_garbage', moving
1222
   them to 'old_generation' and placing the rest on 'still_unreachable'.
1223
1224
   Contracts:
1225
       * After this function 'unreachable' must not be used anymore and 'still_unreachable'
1226
         will contain the objects that did not resurrect.
1227
1228
       * The "still_unreachable" list must be uninitialized (this function calls
1229
         gc_list_init over 'still_unreachable').
1230
1231
IMPORTANT: After a call to this function, the 'still_unreachable' set will have the
1232
PREV_MARK_COLLECTING set, but the objects in this set are going to be removed so
1233
we can skip the expense of clearing the flag to avoid extra iteration. */
1234
static inline void
1235
handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable,
1236
                           PyGC_Head *old_generation)
1237
88.1k
{
1238
    // Remove the PREV_MASK_COLLECTING from unreachable
1239
    // to prepare it for a new call to 'deduce_unreachable'
1240
88.1k
    gc_list_clear_collecting(unreachable);
1241
1242
    // After the call to deduce_unreachable, the 'still_unreachable' set will
1243
    // have the PREV_MARK_COLLECTING set, but the objects are going to be
1244
    // removed so we can skip the expense of clearing the flag.
1245
88.1k
    PyGC_Head* resurrected = unreachable;
1246
88.1k
    deduce_unreachable(resurrected, still_unreachable);
1247
88.1k
    clear_unreachable_mask(still_unreachable);
1248
1249
    // Move the resurrected objects to the old generation for future collection.
1250
88.1k
    gc_list_merge(resurrected, old_generation);
1251
88.1k
}
1252
1253
1254
/* Invoke progress callbacks to notify clients that garbage collection
1255
 * is starting or stopping
1256
 */
1257
static void
1258
invoke_gc_callback(PyThreadState *tstate, const char *phase,
1259
                   int generation, struct gc_generation_stats *stats)
1260
176k
{
1261
176k
    assert(!_PyErr_Occurred(tstate));
1262
1263
    /* we may get called very early */
1264
176k
    GCState *gcstate = &tstate->interp->gc;
1265
176k
    if (gcstate->callbacks == NULL) {
1266
0
        return;
1267
0
    }
1268
1269
    /* The local variable cannot be rebound, check it for sanity */
1270
176k
    assert(PyList_CheckExact(gcstate->callbacks));
1271
176k
    PyObject *info = NULL;
1272
176k
    if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
1273
6.50k
        info = Py_BuildValue("{sisnsnsnsd}",
1274
6.50k
            "generation", generation,
1275
6.50k
            "collected", stats->collected,
1276
6.50k
            "uncollectable", stats->uncollectable,
1277
6.50k
            "candidates", stats->candidates,
1278
6.50k
            "duration", stats->duration);
1279
6.50k
        if (info == NULL) {
1280
0
            PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks");
1281
0
            return;
1282
0
        }
1283
6.50k
    }
1284
1285
176k
    PyObject *phase_obj = PyUnicode_FromString(phase);
1286
176k
    if (phase_obj == NULL) {
1287
0
        Py_XDECREF(info);
1288
0
        PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks");
1289
0
        return;
1290
0
    }
1291
1292
176k
    PyObject *stack[] = {phase_obj, info};
1293
182k
    for (Py_ssize_t i=0; i<PyList_GET_SIZE(gcstate->callbacks); i++) {
1294
6.50k
        PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i);
1295
6.50k
        Py_INCREF(cb); /* make sure cb doesn't go away */
1296
6.50k
        r = PyObject_Vectorcall(cb, stack, 2, NULL);
1297
6.50k
        if (r == NULL) {
1298
0
            PyErr_FormatUnraisable("Exception ignored while "
1299
0
                                   "calling GC callback %R", cb);
1300
0
        }
1301
6.50k
        else {
1302
6.50k
            Py_DECREF(r);
1303
6.50k
        }
1304
6.50k
        Py_DECREF(cb);
1305
6.50k
    }
1306
176k
    Py_DECREF(phase_obj);
1307
176k
    Py_XDECREF(info);
1308
176k
    assert(!_PyErr_Occurred(tstate));
1309
176k
}
1310
1311
1312
/* Find the oldest generation (highest numbered) where the count
1313
 * exceeds the threshold.  Objects in the that generation and
1314
 * generations younger than it will be collected. */
1315
static int
1316
gc_select_generation(GCState *gcstate)
1317
102k
{
1318
312k
    for (int i = NUM_GENERATIONS-1; i >= 0; i--) {
1319
298k
        if (gcstate->generations[i].count > gcstate->generations[i].threshold) {
1320
            /* Avoid quadratic performance degradation in number
1321
               of tracked objects (see also issue #4074):
1322
1323
               To limit the cost of garbage collection, there are two strategies;
1324
                 - make each collection faster, e.g. by scanning fewer objects
1325
                 - do less collections
1326
               This heuristic is about the latter strategy.
1327
1328
               In addition to the various configurable thresholds, we only trigger a
1329
               full collection if the ratio
1330
1331
                long_lived_pending / long_lived_total
1332
1333
               is above a given value (hardwired to 25%).
1334
1335
               The reason is that, while "non-full" collections (i.e., collections of
1336
               the young and middle generations) will always examine roughly the same
1337
               number of objects -- determined by the aforementioned thresholds --,
1338
               the cost of a full collection is proportional to the total number of
1339
               long-lived objects, which is virtually unbounded.
1340
1341
               Indeed, it has been remarked that doing a full collection every
1342
               <constant number> of object creations entails a dramatic performance
1343
               degradation in workloads which consist in creating and storing lots of
1344
               long-lived objects (e.g. building a large list of GC-tracked objects would
1345
               show quadratic performance, instead of linear as expected: see issue #4074).
1346
1347
               Using the above ratio, instead, yields amortized linear performance in
1348
               the total number of objects (the effect of which can be summarized
1349
               thusly: "each full garbage collection is more and more costly as the
1350
               number of objects grows, but we do fewer and fewer of them").
1351
1352
               This heuristic was suggested by Martin von Löwis on python-dev in
1353
               June 2008. His original analysis and proposal can be found at:
1354
               http://mail.python.org/pipermail/python-dev/2008-June/080579.html
1355
            */
1356
97.0k
            if (i == NUM_GENERATIONS - 1
1357
9.49k
                && gcstate->long_lived_pending < gcstate->long_lived_total / 4)
1358
8.89k
            {
1359
8.89k
                continue;
1360
8.89k
            }
1361
88.1k
            return i;
1362
97.0k
        }
1363
298k
    }
1364
14.0k
    return -1;
1365
102k
}
1366
1367
static struct gc_generation_stats *
1368
gc_get_stats(GCState *gcstate, int gen)
1369
88.1k
{
1370
88.1k
    if (gen == 0) {
1371
80.2k
        struct gc_young_stats_buffer *buffer = &gcstate->generation_stats->young;
1372
80.2k
        buffer->index = (buffer->index + 1) % GC_YOUNG_STATS_SIZE;
1373
80.2k
        struct gc_generation_stats *stats = &buffer->items[buffer->index];
1374
80.2k
        return stats;
1375
80.2k
    }
1376
7.87k
    else {
1377
7.87k
        struct gc_old_stats_buffer *buffer = &gcstate->generation_stats->old[gen - 1];
1378
7.87k
        buffer->index = (buffer->index + 1) % GC_OLD_STATS_SIZE;
1379
7.87k
        struct gc_generation_stats *stats = &buffer->items[buffer->index];
1380
7.87k
        return stats;
1381
7.87k
    }
1382
88.1k
}
1383
1384
static struct gc_generation_stats *
1385
gc_get_prev_stats(GCState *gcstate, int gen)
1386
88.1k
{
1387
88.1k
    if (gen == 0) {
1388
80.2k
        struct gc_young_stats_buffer *buffer = &gcstate->generation_stats->young;
1389
80.2k
        struct gc_generation_stats *stats = &buffer->items[buffer->index];
1390
80.2k
        return stats;
1391
80.2k
    }
1392
7.87k
    else {
1393
7.87k
        struct gc_old_stats_buffer *buffer = &gcstate->generation_stats->old[gen - 1];
1394
7.87k
        struct gc_generation_stats *stats = &buffer->items[buffer->index];
1395
7.87k
        return stats;
1396
7.87k
    }
1397
88.1k
}
1398
1399
static void
1400
add_stats(GCState *gcstate, int gen, struct gc_generation_stats *stats)
1401
88.1k
{
1402
88.1k
    struct gc_generation_stats *prev_stats = gc_get_prev_stats(gcstate, gen);
1403
88.1k
    struct gc_generation_stats *cur_stats = gc_get_stats(gcstate, gen);
1404
1405
88.1k
    memcpy(cur_stats, prev_stats, sizeof(struct gc_generation_stats));
1406
1407
88.1k
    cur_stats->ts_start = stats->ts_start;
1408
88.1k
    cur_stats->collections += 1;
1409
88.1k
    cur_stats->collected += stats->collected;
1410
88.1k
    cur_stats->uncollectable += stats->uncollectable;
1411
88.1k
    cur_stats->candidates += stats->candidates;
1412
1413
88.1k
    cur_stats->duration += stats->duration;
1414
88.1k
    cur_stats->heap_size = stats->heap_size;
1415
    /* Publish ts_stop last so remote readers do not select a partially
1416
       updated stats record as the latest collection. */
1417
88.1k
    cur_stats->ts_stop = stats->ts_stop;
1418
88.1k
}
1419
1420
/* This is the main function.  Read this to understand how the
1421
 * collection process works. */
1422
static Py_ssize_t
1423
gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
1424
102k
{
1425
102k
    int i;
1426
102k
    PyGC_Head *young; /* the generation we are examining */
1427
102k
    PyGC_Head *old; /* next older generation */
1428
102k
    PyGC_Head unreachable; /* non-problematic unreachable trash */
1429
102k
    PyGC_Head finalizers;  /* objects with, & reachable from, __del__ */
1430
102k
    PyGC_Head *gc;
1431
102k
    GCState *gcstate = &tstate->interp->gc;
1432
1433
    // gc_collect_main() must not be called before _PyGC_Init
1434
    // or after _PyGC_Fini()
1435
102k
    assert(gcstate->garbage != NULL);
1436
102k
    assert(!_PyErr_Occurred(tstate));
1437
1438
102k
    int expected = 0;
1439
102k
    if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) {
1440
        // Don't start a garbage collection if one is already in progress.
1441
0
        return 0;
1442
0
    }
1443
102k
    gcstate->frame = tstate->current_frame;
1444
1445
102k
    if (generation == GENERATION_AUTO) {
1446
        // Select the oldest generation that needs collecting. We will collect
1447
        // objects from that generation and all generations younger than it.
1448
102k
        generation = gc_select_generation(gcstate);
1449
102k
        if (generation < 0) {
1450
            // No generation needs to be collected.
1451
14.0k
            _Py_atomic_store_int(&gcstate->collecting, 0);
1452
14.0k
            return 0;
1453
14.0k
        }
1454
102k
    }
1455
1456
102k
    assert(generation >= 0 && generation < NUM_GENERATIONS);
1457
1458
#ifdef Py_STATS
1459
    {
1460
        PyStats *s = _PyStats_GET();
1461
        if (s) {
1462
            s->object_stats.object_visits = 0;
1463
        }
1464
    }
1465
#endif
1466
1467
88.1k
    GC_STAT_ADD(generation, collections, 1);
1468
1469
88.1k
    struct gc_generation_stats stats = { 0 };
1470
88.1k
    if (reason != _Py_GC_REASON_SHUTDOWN) {
1471
88.1k
        invoke_gc_callback(tstate, "start", generation, &stats);
1472
88.1k
    }
1473
1474
88.1k
    stats.heap_size = gcstate->heap_size;
1475
    // ignore error: don't interrupt the GC if reading the clock fails
1476
88.1k
    (void)PyTime_PerfCounterRaw(&stats.ts_start);
1477
88.1k
    if (gcstate->debug & _PyGC_DEBUG_STATS) {
1478
0
        PySys_WriteStderr("gc: collecting generation %d...\n", generation);
1479
0
        show_stats_each_generations(gcstate);
1480
0
    }
1481
1482
88.1k
    if (PyDTrace_GC_START_ENABLED()) {
1483
0
        PyDTrace_GC_START(generation);
1484
0
    }
1485
1486
    /* update collection and allocation counters */
1487
88.1k
    if (generation+1 < NUM_GENERATIONS) {
1488
87.5k
        gcstate->generations[generation+1].count += 1;
1489
87.5k
    }
1490
184k
    for (i = 0; i <= generation; i++) {
1491
96.6k
        gcstate->generations[i].count = 0;
1492
96.6k
    }
1493
1494
    /* merge younger generations with one we are currently collecting */
1495
96.6k
    for (i = 0; i < generation; i++) {
1496
8.47k
        gc_list_merge(GEN_HEAD(gcstate, i), GEN_HEAD(gcstate, generation));
1497
8.47k
    }
1498
1499
    /* handy references */
1500
88.1k
    young = GEN_HEAD(gcstate, generation);
1501
88.1k
    if (generation < NUM_GENERATIONS-1) {
1502
87.5k
        old = GEN_HEAD(gcstate, generation+1);
1503
87.5k
    }
1504
597
    else {
1505
597
        old = young;
1506
597
    }
1507
88.1k
    validate_list(old, collecting_clear_unreachable_clear);
1508
1509
88.1k
    stats.candidates = deduce_unreachable(young, &unreachable);
1510
1511
88.1k
    untrack_tuples(young);
1512
    /* Move reachable objects to next generation. */
1513
88.1k
    if (young != old) {
1514
87.5k
        if (generation == NUM_GENERATIONS - 2) {
1515
7.28k
            gcstate->long_lived_pending += gc_list_size(young);
1516
7.28k
        }
1517
87.5k
        gc_list_merge(young, old);
1518
87.5k
    }
1519
597
    else {
1520
        // In Python <= 3.13, we called untrack_dicts(young) here to untrack
1521
        // atomic-only dicts (see issue #14775). Python 3.14 removed the lazy
1522
        // dict tracking machinery entirely (GH-127010) -- dicts are always
1523
        // tracked from creation and never untracked by GC. That way, we don't
1524
        // have to restore MAINTAIN_TRACKING across every PyDict_SetItem call
1525
        // site; the cost is slightly more work for full collections on dicts
1526
        // with only atomic values.
1527
597
        gcstate->long_lived_pending = 0;
1528
597
        gcstate->long_lived_total = gc_list_size(young);
1529
597
    }
1530
1531
    /* All objects in unreachable are trash, but objects reachable from
1532
     * legacy finalizers (e.g. tp_del) can't safely be deleted.
1533
     */
1534
88.1k
    gc_list_init(&finalizers);
1535
    // NEXT_MASK_UNREACHABLE is cleared here.
1536
    // After move_legacy_finalizers(), unreachable is normal list.
1537
88.1k
    move_legacy_finalizers(&unreachable, &finalizers);
1538
    /* finalizers contains the unreachable objects with a legacy finalizer;
1539
     * unreachable objects reachable *from* those are also uncollectable,
1540
     * and we move those into the finalizers list too.
1541
     */
1542
88.1k
    move_legacy_finalizer_reachable(&finalizers);
1543
1544
88.1k
    validate_list(&finalizers, collecting_clear_unreachable_clear);
1545
88.1k
    validate_list(&unreachable, collecting_set_unreachable_clear);
1546
1547
    /* Print debugging information. */
1548
88.1k
    if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE) {
1549
0
        for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) {
1550
0
            debug_cycle("collectable", FROM_GC(gc));
1551
0
        }
1552
0
    }
1553
1554
    /* Clear weakrefs and invoke callbacks as necessary. */
1555
88.1k
    stats.collected += handle_weakref_callbacks(&unreachable, old);
1556
88.1k
    validate_list(old, collecting_clear_unreachable_clear);
1557
88.1k
    validate_list(&unreachable, collecting_set_unreachable_clear);
1558
1559
    /* Call tp_finalize on objects which have one. */
1560
88.1k
    finalize_garbage(tstate, &unreachable);
1561
1562
    /* Handle any objects that may have resurrected after the call
1563
     * to 'finalize_garbage' and continue the collection with the
1564
     * objects that are still unreachable */
1565
88.1k
    PyGC_Head final_unreachable;
1566
88.1k
    handle_resurrected_objects(&unreachable, &final_unreachable, old);
1567
1568
    /* Clear weakrefs to objects in the unreachable set.  No Python-level
1569
     * code must be allowed to access those unreachable objects.  During
1570
     * delete_garbage(), finalizers outside the unreachable set might run
1571
     * and create new weakrefs.  If those weakrefs were not cleared, they
1572
     * could reveal unreachable objects.  Callbacks are not executed.
1573
     */
1574
88.1k
    clear_weakrefs(&final_unreachable);
1575
1576
    /* Call tp_clear on objects in the final_unreachable set.  This will cause
1577
    * the reference cycles to be broken.  It may also cause some objects
1578
    * in finalizers to be freed.
1579
    */
1580
88.1k
    stats.collected += gc_list_size(&final_unreachable);
1581
88.1k
    delete_garbage(tstate, gcstate, &final_unreachable, old);
1582
1583
    /* Collect statistics on uncollectable objects found and print
1584
     * debugging information. */
1585
88.1k
    Py_ssize_t n = 0;
1586
88.1k
    for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) {
1587
0
        n++;
1588
0
        if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE)
1589
0
            debug_cycle("uncollectable", FROM_GC(gc));
1590
0
    }
1591
88.1k
    stats.uncollectable = n;
1592
88.1k
    (void)PyTime_PerfCounterRaw(&stats.ts_stop);
1593
88.1k
    stats.duration = PyTime_AsSecondsDouble(stats.ts_stop - stats.ts_start);
1594
88.1k
    if (gcstate->debug & _PyGC_DEBUG_STATS) {
1595
0
        PySys_WriteStderr(
1596
0
            "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n",
1597
0
            stats.uncollectable+stats.collected, stats.uncollectable,
1598
0
            stats.duration);
1599
0
    }
1600
1601
    /* Append instances in the uncollectable set to a Python
1602
     * reachable list of garbage.  The programmer has to deal with
1603
     * this if they insist on creating this type of structure.
1604
     */
1605
88.1k
    handle_legacy_finalizers(tstate, gcstate, &finalizers, old);
1606
88.1k
    validate_list(old, collecting_clear_unreachable_clear);
1607
1608
    /* Clear free list only during the collection of the highest
1609
     * generation */
1610
88.1k
    if (generation == NUM_GENERATIONS-1) {
1611
597
        _PyGC_ClearAllFreeLists(tstate->interp);
1612
597
    }
1613
1614
88.1k
    if (_PyErr_Occurred(tstate)) {
1615
0
        if (reason == _Py_GC_REASON_SHUTDOWN) {
1616
0
            _PyErr_Clear(tstate);
1617
0
        }
1618
0
        else {
1619
0
            PyErr_FormatUnraisable("Exception ignored in garbage collection");
1620
0
        }
1621
0
    }
1622
1623
    /* Update stats */
1624
88.1k
    add_stats(gcstate, generation, &stats);
1625
88.1k
    GC_STAT_ADD(generation, objects_collected, stats.collected);
1626
1627
#ifdef Py_STATS
1628
    {
1629
        PyStats *s = _PyStats_GET();
1630
        if (s) {
1631
            GC_STAT_ADD(generation, object_visits,
1632
                s->object_stats.object_visits);
1633
            s->object_stats.object_visits = 0;
1634
        }
1635
    }
1636
#endif
1637
1638
88.1k
    if (PyDTrace_GC_DONE_ENABLED()) {
1639
0
        PyDTrace_GC_DONE(stats.uncollectable + stats.collected);
1640
0
    }
1641
1642
88.1k
    if (reason != _Py_GC_REASON_SHUTDOWN) {
1643
88.1k
        invoke_gc_callback(tstate, "stop", generation, &stats);
1644
88.1k
    }
1645
1646
88.1k
    assert(!_PyErr_Occurred(tstate));
1647
88.1k
    gcstate->frame = NULL;
1648
88.1k
    _Py_atomic_store_int(&gcstate->collecting, 0);
1649
88.1k
    return stats.uncollectable + stats.collected;
1650
102k
}
1651
1652
static int
1653
referrersvisit(PyObject* obj, void *arg)
1654
0
{
1655
0
    PyObject *objs = arg;
1656
0
    Py_ssize_t i;
1657
0
    for (i = 0; i < PyTuple_GET_SIZE(objs); i++) {
1658
0
        if (PyTuple_GET_ITEM(objs, i) == obj) {
1659
0
            return 1;
1660
0
        }
1661
0
    }
1662
0
    return 0;
1663
0
}
1664
1665
static int
1666
gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist)
1667
0
{
1668
0
    PyGC_Head *gc;
1669
0
    PyObject *obj;
1670
0
    traverseproc traverse;
1671
0
    for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
1672
0
        obj = FROM_GC(gc);
1673
0
        traverse = Py_TYPE(obj)->tp_traverse;
1674
0
        if (obj == objs || obj == resultlist) {
1675
0
            continue;
1676
0
        }
1677
0
        if (traverse(obj, referrersvisit, objs)) {
1678
0
            if (PyList_Append(resultlist, obj) < 0) {
1679
0
                return 0; /* error */
1680
0
            }
1681
0
        }
1682
0
    }
1683
0
    return 1; /* no error */
1684
0
}
1685
1686
PyObject *
1687
_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs)
1688
0
{
1689
0
    PyObject *result = PyList_New(0);
1690
0
    if (!result) {
1691
0
        return NULL;
1692
0
    }
1693
1694
0
    GCState *gcstate = &interp->gc;
1695
0
    for (int i = 0; i < NUM_GENERATIONS; i++) {
1696
0
        if (!(gc_referrers_for(objs, GEN_HEAD(gcstate, i), result))) {
1697
0
            Py_DECREF(result);
1698
0
            return NULL;
1699
0
        }
1700
0
    }
1701
0
    return result;
1702
0
}
1703
1704
PyObject *
1705
_PyGC_GetObjects(PyInterpreterState *interp, int generation)
1706
0
{
1707
0
    assert(generation >= -1 && generation < NUM_GENERATIONS);
1708
0
    GCState *gcstate = &interp->gc;
1709
1710
0
    PyObject *result = PyList_New(0);
1711
0
    if (result == NULL) {
1712
0
        return NULL;
1713
0
    }
1714
1715
0
    if (generation == -1) {
1716
        /* If generation is -1, get all objects from all generations */
1717
0
        for (int i = 0; i < NUM_GENERATIONS; i++) {
1718
0
            if (append_objects(result, GEN_HEAD(gcstate, i))) {
1719
0
                goto error;
1720
0
            }
1721
0
        }
1722
0
    }
1723
0
    else {
1724
0
        if (append_objects(result, GEN_HEAD(gcstate, generation))) {
1725
0
            goto error;
1726
0
        }
1727
0
    }
1728
1729
0
    return result;
1730
0
error:
1731
0
    Py_DECREF(result);
1732
0
    return NULL;
1733
0
}
1734
1735
void
1736
_PyGC_Freeze(PyInterpreterState *interp)
1737
0
{
1738
0
    GCState *gcstate = &interp->gc;
1739
0
    for (int i = 0; i < NUM_GENERATIONS; ++i) {
1740
0
        gc_list_merge(GEN_HEAD(gcstate, i), &gcstate->permanent_generation.head);
1741
0
        gcstate->generations[i].count = 0;
1742
0
    }
1743
0
}
1744
1745
void
1746
_PyGC_Unfreeze(PyInterpreterState *interp)
1747
0
{
1748
0
    GCState *gcstate = &interp->gc;
1749
0
    gc_list_merge(&gcstate->permanent_generation.head,
1750
0
                  GEN_HEAD(gcstate, NUM_GENERATIONS-1));
1751
0
}
1752
1753
Py_ssize_t
1754
_PyGC_GetFreezeCount(PyInterpreterState *interp)
1755
0
{
1756
0
    GCState *gcstate = &interp->gc;
1757
0
    return gc_list_size(&gcstate->permanent_generation.head);
1758
0
}
1759
1760
/* C API for controlling the state of the garbage collector */
1761
int
1762
PyGC_Enable(void)
1763
0
{
1764
0
    GCState *gcstate = get_gc_state();
1765
0
    int old_state = gcstate->enabled;
1766
0
    gcstate->enabled = 1;
1767
0
    return old_state;
1768
0
}
1769
1770
int
1771
PyGC_Disable(void)
1772
0
{
1773
0
    GCState *gcstate = get_gc_state();
1774
0
    int old_state = gcstate->enabled;
1775
0
    gcstate->enabled = 0;
1776
0
    return old_state;
1777
0
}
1778
1779
int
1780
PyGC_IsEnabled(void)
1781
0
{
1782
0
    GCState *gcstate = get_gc_state();
1783
0
    return gcstate->enabled;
1784
0
}
1785
1786
/* Public API to invoke gc.collect() from C */
1787
Py_ssize_t
1788
PyGC_Collect(void)
1789
0
{
1790
0
    PyThreadState *tstate = _PyThreadState_GET();
1791
0
    GCState *gcstate = &tstate->interp->gc;
1792
1793
0
    if (!gcstate->enabled) {
1794
0
        return 0;
1795
0
    }
1796
1797
0
    Py_ssize_t n;
1798
0
    PyObject *exc = _PyErr_GetRaisedException(tstate);
1799
0
    n = gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_MANUAL);
1800
0
    _PyErr_SetRaisedException(tstate, exc);
1801
1802
0
    return n;
1803
0
}
1804
1805
Py_ssize_t
1806
_PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
1807
0
{
1808
0
    return gc_collect_main(tstate, generation, reason);
1809
0
}
1810
1811
void
1812
_PyGC_CollectNoFail(PyThreadState *tstate)
1813
0
{
1814
    /* Ideally, this function is only called on interpreter shutdown,
1815
       and therefore not recursively.  Unfortunately, when there are daemon
1816
       threads, a daemon thread can start a cyclic garbage collection
1817
       during interpreter shutdown (and then never finish it).
1818
       See http://bugs.python.org/issue8713#msg195178 for an example.
1819
       */
1820
0
    gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN);
1821
0
}
1822
1823
void
1824
_PyGC_DumpShutdownStats(PyInterpreterState *interp)
1825
0
{
1826
0
    GCState *gcstate = &interp->gc;
1827
0
    if (!(gcstate->debug & _PyGC_DEBUG_SAVEALL)
1828
0
        && gcstate->garbage != NULL && PyList_GET_SIZE(gcstate->garbage) > 0) {
1829
0
        const char *message;
1830
0
        if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) {
1831
0
            message = "gc: %zd uncollectable objects at shutdown";
1832
0
        }
1833
0
        else {
1834
0
            message = "gc: %zd uncollectable objects at shutdown; " \
1835
0
                "use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them";
1836
0
        }
1837
        /* PyErr_WarnFormat does too many things and we are at shutdown,
1838
           the warnings module's dependencies (e.g. linecache) may be gone
1839
           already. */
1840
0
        if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
1841
0
                                     "gc", NULL, message,
1842
0
                                     PyList_GET_SIZE(gcstate->garbage)))
1843
0
        {
1844
0
            PyErr_FormatUnraisable("Exception ignored in GC shutdown");
1845
0
        }
1846
0
        if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) {
1847
0
            PyObject *repr = NULL, *bytes = NULL;
1848
0
            repr = PyObject_Repr(gcstate->garbage);
1849
0
            if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr))) {
1850
0
                PyErr_FormatUnraisable("Exception ignored in GC shutdown "
1851
0
                                       "while formatting garbage");
1852
0
            }
1853
0
            else {
1854
0
                PySys_WriteStderr(
1855
0
                    "      %s\n",
1856
0
                    PyBytes_AS_STRING(bytes)
1857
0
                    );
1858
0
            }
1859
0
            Py_XDECREF(repr);
1860
0
            Py_XDECREF(bytes);
1861
0
        }
1862
0
    }
1863
0
}
1864
1865
static void
1866
0
finalize_unlink_gc_head(PyGC_Head *gc) {
1867
0
    PyGC_Head *prev = GC_PREV(gc);
1868
0
    PyGC_Head *next = GC_NEXT(gc);
1869
0
    _PyGCHead_SET_NEXT(prev, next);
1870
0
    _PyGCHead_SET_PREV(next, prev);
1871
0
}
1872
1873
void
1874
_PyGC_Fini(PyInterpreterState *interp)
1875
0
{
1876
0
    GCState *gcstate = &interp->gc;
1877
0
    Py_CLEAR(gcstate->garbage);
1878
0
    Py_CLEAR(gcstate->callbacks);
1879
0
    PyMem_RawFree(gcstate->generation_stats);
1880
0
    gcstate->generation_stats = NULL;
1881
1882
    /* Prevent a subtle bug that affects sub-interpreters that use basic
1883
     * single-phase init extensions (m_size == -1).  Those extensions cause objects
1884
     * to be shared between interpreters, via the PyDict_Update(mdict, m_copy) call
1885
     * in import_find_extension().
1886
     *
1887
     * If they are GC objects, their GC head next or prev links could refer to
1888
     * the interpreter _gc_runtime_state PyGC_Head nodes.  Those nodes go away
1889
     * when the interpreter structure is freed and so pointers to them become
1890
     * invalid.  If those objects are still used by another interpreter and
1891
     * UNTRACK is called on them, a crash will happen.  We untrack the nodes
1892
     * here to avoid that.
1893
     *
1894
     * This bug was originally fixed when reported as gh-90228.  The bug was
1895
     * re-introduced in gh-94673.
1896
     */
1897
0
    for (int i = 0; i < NUM_GENERATIONS; i++) {
1898
0
        finalize_unlink_gc_head(&gcstate->generations[i].head);
1899
0
    }
1900
0
    finalize_unlink_gc_head(&gcstate->permanent_generation.head);
1901
0
}
1902
1903
/* for debugging */
1904
void
1905
_PyGC_Dump(PyGC_Head *g)
1906
0
{
1907
0
    PyObject_Dump(FROM_GC(g));
1908
0
}
1909
1910
1911
#ifdef Py_DEBUG
1912
static int
1913
visit_validate(PyObject *op, void *parent_raw)
1914
{
1915
    PyObject *parent = _PyObject_CAST(parent_raw);
1916
    if (_PyObject_IsFreed(op)) {
1917
        _PyObject_ASSERT_FAILED_MSG(parent,
1918
                                    "PyObject_GC_Track() object is not valid");
1919
    }
1920
    return 0;
1921
}
1922
#endif
1923
1924
1925
/* extension modules might be compiled with GC support so these
1926
   functions must always be available */
1927
1928
void
1929
PyObject_GC_Track(void *op_raw)
1930
184M
{
1931
184M
    PyObject *op = _PyObject_CAST(op_raw);
1932
184M
    if (_PyObject_GC_IS_TRACKED(op)) {
1933
0
        _PyObject_ASSERT_FAILED_MSG(op,
1934
0
                                    "object already tracked "
1935
0
                                    "by the garbage collector");
1936
0
    }
1937
184M
    _PyObject_GC_TRACK(op);
1938
1939
#ifdef Py_DEBUG
1940
    /* Check that the object is valid: validate objects traversed
1941
       by tp_traverse() */
1942
    traverseproc traverse = Py_TYPE(op)->tp_traverse;
1943
    (void)traverse(op, visit_validate, op);
1944
#endif
1945
184M
}
1946
1947
void
1948
PyObject_GC_UnTrack(void *op_raw)
1949
1.27G
{
1950
1.27G
    PyObject *op = _PyObject_CAST(op_raw);
1951
    /* The code for some objects, such as tuples, is a bit
1952
     * sloppy about when the object is tracked and untracked. */
1953
1.27G
    if (_PyObject_GC_IS_TRACKED(op)) {
1954
1.05G
        _PyObject_GC_UNTRACK(op);
1955
1.05G
    }
1956
1.27G
}
1957
1958
int
1959
PyObject_IS_GC(PyObject *obj)
1960
220M
{
1961
220M
    return _PyObject_IS_GC(obj);
1962
220M
}
1963
1964
void
1965
_Py_ScheduleGC(PyThreadState *tstate)
1966
17.8M
{
1967
17.8M
    if (!_Py_eval_breaker_bit_is_set(tstate, _PY_GC_SCHEDULED_BIT))
1968
102k
    {
1969
102k
        _Py_set_eval_breaker_bit(tstate, _PY_GC_SCHEDULED_BIT);
1970
102k
    }
1971
17.8M
}
1972
1973
void
1974
_PyObject_GC_Link(PyObject *op)
1975
613M
{
1976
613M
    PyGC_Head *gc = AS_GC(op);
1977
    // gc must be correctly aligned
1978
613M
    _PyObject_ASSERT(op, ((uintptr_t)gc & (sizeof(uintptr_t)-1)) == 0);
1979
1980
613M
    PyThreadState *tstate = _PyThreadState_GET();
1981
613M
    GCState *gcstate = &tstate->interp->gc;
1982
613M
    gc->_gc_next = 0;
1983
613M
    gc->_gc_prev = 0;
1984
613M
    gcstate->generations[0].count++; /* number of allocated GC objects */
1985
613M
    if (gcstate->generations[0].count > gcstate->generations[0].threshold &&
1986
17.8M
        gcstate->enabled &&
1987
17.8M
        gcstate->generations[0].threshold &&
1988
17.8M
        !_Py_atomic_load_int_relaxed(&gcstate->collecting) &&
1989
17.8M
        !_PyErr_Occurred(tstate))
1990
17.8M
    {
1991
17.8M
        _Py_ScheduleGC(tstate);
1992
17.8M
    }
1993
613M
}
1994
1995
void
1996
_Py_RunGC(PyThreadState *tstate)
1997
102k
{
1998
102k
    GCState *gcstate = get_gc_state();
1999
102k
    if (!gcstate->enabled) {
2000
0
        return;
2001
0
    }
2002
102k
    gc_collect_main(tstate, GENERATION_AUTO, _Py_GC_REASON_HEAP);
2003
102k
}
2004
2005
static PyObject *
2006
gc_alloc(PyTypeObject *tp, size_t basicsize, size_t presize)
2007
495M
{
2008
495M
    PyThreadState *tstate = _PyThreadState_GET();
2009
495M
    if (basicsize > PY_SSIZE_T_MAX - presize) {
2010
0
        return _PyErr_NoMemory(tstate);
2011
0
    }
2012
495M
    size_t size = presize + basicsize;
2013
495M
    char *mem = _PyObject_MallocWithType(tp, size);
2014
495M
    if (mem == NULL) {
2015
0
        return _PyErr_NoMemory(tstate);
2016
0
    }
2017
495M
    ((PyObject **)mem)[0] = NULL;
2018
495M
    ((PyObject **)mem)[1] = NULL;
2019
495M
    PyObject *op = (PyObject *)(mem + presize);
2020
495M
    _PyObject_GC_Link(op);
2021
495M
    return op;
2022
495M
}
2023
2024
2025
PyObject *
2026
_PyObject_GC_New(PyTypeObject *tp)
2027
264M
{
2028
264M
    size_t presize = _PyType_PreHeaderSize(tp);
2029
264M
    size_t size = _PyObject_SIZE(tp);
2030
264M
    if (_PyType_HasFeature(tp, Py_TPFLAGS_INLINE_VALUES)) {
2031
236
        size += _PyInlineValuesSize(tp);
2032
236
    }
2033
264M
    PyObject *op = gc_alloc(tp, size, presize);
2034
264M
    if (op == NULL) {
2035
0
        return NULL;
2036
0
    }
2037
264M
    _PyObject_Init(op, tp);
2038
264M
    if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) {
2039
236
        _PyObject_InitInlineValues(op, tp);
2040
236
    }
2041
264M
    return op;
2042
264M
}
2043
2044
PyVarObject *
2045
_PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems)
2046
230M
{
2047
230M
    PyVarObject *op;
2048
2049
230M
    if (nitems < 0) {
2050
0
        PyErr_BadInternalCall();
2051
0
        return NULL;
2052
0
    }
2053
230M
    size_t presize = _PyType_PreHeaderSize(tp);
2054
230M
    size_t size = _PyObject_VAR_SIZE(tp, nitems);
2055
230M
    op = (PyVarObject *)gc_alloc(tp, size, presize);
2056
230M
    if (op == NULL) {
2057
0
        return NULL;
2058
0
    }
2059
230M
    _PyObject_InitVar(op, tp, nitems);
2060
230M
    return op;
2061
230M
}
2062
2063
PyObject *
2064
PyUnstable_Object_GC_NewWithExtraData(PyTypeObject *tp, size_t extra_size)
2065
0
{
2066
0
    size_t presize = _PyType_PreHeaderSize(tp);
2067
0
    size_t size = _PyObject_SIZE(tp) + extra_size;
2068
0
    PyObject *op = gc_alloc(tp, size, presize);
2069
0
    if (op == NULL) {
2070
0
        return NULL;
2071
0
    }
2072
0
    memset((char *)op + sizeof(PyObject), 0, size - sizeof(PyObject));
2073
0
    _PyObject_Init(op, tp);
2074
0
    return op;
2075
0
}
2076
2077
PyVarObject *
2078
_PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems)
2079
40
{
2080
40
    const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems);
2081
40
    const size_t presize = _PyType_PreHeaderSize(Py_TYPE(op));
2082
40
    _PyObject_ASSERT((PyObject *)op, !_PyObject_GC_IS_TRACKED(op));
2083
40
    if (basicsize > (size_t)PY_SSIZE_T_MAX - presize) {
2084
0
        return (PyVarObject *)PyErr_NoMemory();
2085
0
    }
2086
40
    char *mem = (char *)op - presize;
2087
40
    mem = (char *)_PyObject_ReallocWithType(Py_TYPE(op), mem, presize + basicsize);
2088
40
    if (mem == NULL) {
2089
0
        return (PyVarObject *)PyErr_NoMemory();
2090
0
    }
2091
40
    op = (PyVarObject *) (mem + presize);
2092
40
    Py_SET_SIZE(op, nitems);
2093
40
    return op;
2094
40
}
2095
2096
void
2097
PyObject_GC_Del(void *op)
2098
612M
{
2099
612M
    size_t presize = _PyType_PreHeaderSize(Py_TYPE(op));
2100
612M
    PyGC_Head *g = AS_GC(op);
2101
612M
    if (_PyObject_GC_IS_TRACKED(op)) {
2102
0
        gc_list_remove(g);
2103
0
        GCState *gcstate = get_gc_state();
2104
0
        gcstate->heap_size--;
2105
#ifdef Py_DEBUG
2106
        PyObject *exc = PyErr_GetRaisedException();
2107
        if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
2108
                                     "gc", NULL,
2109
                                     "Object of type %s is not untracked "
2110
                                     "before destruction",
2111
                                     Py_TYPE(op)->tp_name))
2112
        {
2113
            PyErr_FormatUnraisable("Exception ignored on object deallocation");
2114
        }
2115
        PyErr_SetRaisedException(exc);
2116
#endif
2117
0
    }
2118
612M
    GCState *gcstate = get_gc_state();
2119
612M
    if (gcstate->generations[0].count > 0) {
2120
426M
        gcstate->generations[0].count--;
2121
426M
    }
2122
612M
    PyObject_Free(((char *)op)-presize);
2123
612M
}
2124
2125
int
2126
PyObject_GC_IsTracked(PyObject* obj)
2127
9.21k
{
2128
9.21k
    if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj)) {
2129
32
        return 1;
2130
32
    }
2131
9.18k
    return 0;
2132
9.21k
}
2133
2134
int
2135
PyObject_GC_IsFinalized(PyObject *obj)
2136
0
{
2137
0
    if (_PyObject_IS_GC(obj) && _PyGC_FINALIZED(obj)) {
2138
0
         return 1;
2139
0
    }
2140
0
    return 0;
2141
0
}
2142
2143
static int
2144
visit_generation(gcvisitobjects_t callback, void *arg, struct gc_generation *gen)
2145
0
{
2146
0
    PyGC_Head *gc_list, *gc;
2147
0
    gc_list = &gen->head;
2148
0
    for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
2149
0
        PyObject *op = FROM_GC(gc);
2150
0
        Py_INCREF(op);
2151
0
        int res = callback(op, arg);
2152
0
        Py_DECREF(op);
2153
0
        if (!res) {
2154
0
            return -1;
2155
0
        }
2156
0
    }
2157
0
    return 0;
2158
0
}
2159
2160
void
2161
PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
2162
0
{
2163
0
    GCState *gcstate = get_gc_state();
2164
0
    int original_state = gcstate->enabled;
2165
0
    gcstate->enabled = 0;
2166
0
    for (size_t i = 0; i < NUM_GENERATIONS; i++) {
2167
0
        if (visit_generation(callback, arg, &gcstate->generations[i]) < 0) {
2168
0
            goto done;
2169
0
        }
2170
0
    }
2171
0
    visit_generation(callback, arg, &gcstate->permanent_generation);
2172
0
done:
2173
0
    gcstate->enabled = original_state;
2174
0
}
2175
2176
#endif  // !Py_GIL_DISABLED