Coverage Report

Created: 2026-05-16 06:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/gc.c
Line
Count
Source
1
//  This implements the reference cycle garbage collector.
2
//  The Python module interface to the collector is in gcmodule.c.
3
//  See InternalDocs/garbage_collector.md for more infromation.
4
5
#include "Python.h"
6
#include "pycore_ceval.h"         // _Py_set_eval_breaker_bit()
7
#include "pycore_dict.h"          // _PyInlineValuesSize()
8
#include "pycore_initconfig.h"    // _PyStatus_OK()
9
#include "pycore_context.h"
10
#include "pycore_interp.h"        // PyInterpreterState.gc
11
#include "pycore_interpframe.h"   // _PyFrame_GetLocalsArray()
12
#include "pycore_object.h"
13
#include "pycore_object_alloc.h"  // _PyObject_MallocWithType()
14
#include "pycore_pyerrors.h"
15
#include "pycore_pystate.h"       // _PyThreadState_GET()
16
#include "pycore_tuple.h"         // _PyTuple_MaybeUntrack()
17
#include "pycore_weakref.h"       // _PyWeakref_ClearRef()
18
#include "pydtrace.h"
19
20
#if !defined(Py_GIL_DISABLED)
21
22
typedef struct _gc_runtime_state GCState;
23
24
#ifdef Py_DEBUG
25
#  define GC_DEBUG
26
#endif
27
28
1.34G
#define GC_NEXT _PyGCHead_NEXT
29
116M
#define GC_PREV _PyGCHead_PREV
30
31
// update_refs() set this bit for all objects in current generation.
32
// subtract_refs() and move_unreachable() uses this to distinguish
33
// visited object is in GCing or not.
34
//
35
// move_unreachable() removes this flag from reachable objects.
36
// Only unreachable objects have this flag.
37
//
38
// No objects in interpreter have this flag after GC ends.
39
1.91G
#define PREV_MASK_COLLECTING   _PyGC_PREV_MASK_COLLECTING
40
41
// Lowest bit of _gc_next is used for UNREACHABLE flag.
42
//
43
// This flag represents the object is in unreachable list in move_unreachable()
44
//
45
// Although this flag is used only in move_unreachable(), move_unreachable()
46
// doesn't clear this flag to skip unnecessary iteration.
47
// move_legacy_finalizers() removes this flag instead.
48
// Between them, unreachable list is not normal list and we can not use
49
// most gc_list_* functions for it.
50
399M
#define NEXT_MASK_UNREACHABLE  (1)
51
52
2.37G
#define AS_GC(op) _Py_AS_GC(op)
53
1.47G
#define FROM_GC(gc) _Py_FROM_GC(gc)
54
55
// Automatically choose the generation that needs collecting.
56
219k
#define GENERATION_AUTO (-1)
57
58
static inline int
59
gc_is_collecting(PyGC_Head *g)
60
1.18G
{
61
1.18G
    return (g->_gc_prev & PREV_MASK_COLLECTING) != 0;
62
1.18G
}
63
64
static inline void
65
gc_clear_collecting(PyGC_Head *g)
66
361M
{
67
361M
    g->_gc_prev &= ~PREV_MASK_COLLECTING;
68
361M
}
69
70
static inline Py_ssize_t
71
gc_get_refs(PyGC_Head *g)
72
986M
{
73
986M
    return (Py_ssize_t)(g->_gc_prev >> _PyGC_PREV_SHIFT);
74
986M
}
75
76
static inline void
77
gc_set_refs(PyGC_Head *g, Py_ssize_t refs)
78
211M
{
79
211M
    g->_gc_prev = (g->_gc_prev & ~_PyGC_PREV_MASK)
80
211M
        | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
81
211M
}
82
83
static inline void
84
gc_reset_refs(PyGC_Head *g, Py_ssize_t refs)
85
366M
{
86
366M
    g->_gc_prev = (g->_gc_prev & _PyGC_PREV_MASK_FINALIZED)
87
366M
        | PREV_MASK_COLLECTING
88
366M
        | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
89
366M
}
90
91
static inline void
92
gc_decref(PyGC_Head *g)
93
312M
{
94
312M
    _PyObject_ASSERT_WITH_MSG(FROM_GC(g),
95
312M
                              gc_get_refs(g) > 0,
96
312M
                              "refcount is too small");
97
312M
    g->_gc_prev -= 1 << _PyGC_PREV_SHIFT;
98
312M
}
99
100
101
196k
#define GEN_HEAD(gcstate, n) (&(gcstate)->generations[n].head)
102
103
104
static GCState *
105
get_gc_state(void)
106
591M
{
107
591M
    PyInterpreterState *interp = _PyInterpreterState_GET();
108
591M
    return &interp->gc;
109
591M
}
110
111
112
void
113
_PyGC_InitState(GCState *gcstate)
114
37
{
115
37
#define INIT_HEAD(GEN) \
116
148
    do { \
117
148
        GEN.head._gc_next = (uintptr_t)&GEN.head; \
118
148
        GEN.head._gc_prev = (uintptr_t)&GEN.head; \
119
148
    } while (0)
120
121
148
    for (int i = 0; i < NUM_GENERATIONS; i++) {
122
111
        assert(gcstate->generations[i].count == 0);
123
111
        INIT_HEAD(gcstate->generations[i]);
124
111
    };
125
37
    gcstate->generation0 = GEN_HEAD(gcstate, 0);
126
37
    INIT_HEAD(gcstate->permanent_generation);
127
128
37
#undef INIT_HEAD
129
37
}
130
131
132
PyStatus
133
_PyGC_Init(PyInterpreterState *interp)
134
37
{
135
37
    GCState *gcstate = &interp->gc;
136
137
37
    gcstate->generation_stats = PyMem_RawCalloc(1, sizeof(struct gc_stats));
138
37
    if (gcstate->generation_stats == NULL) {
139
0
        return _PyStatus_NO_MEMORY();
140
0
    }
141
142
37
    gcstate->garbage = PyList_New(0);
143
37
    if (gcstate->garbage == NULL) {
144
0
        return _PyStatus_NO_MEMORY();
145
0
    }
146
147
37
    gcstate->callbacks = PyList_New(0);
148
37
    if (gcstate->callbacks == NULL) {
149
0
        return _PyStatus_NO_MEMORY();
150
0
    }
151
152
37
    return _PyStatus_OK();
153
37
}
154
155
156
/*
157
_gc_prev values
158
---------------
159
160
Between collections, _gc_prev is used for doubly linked list.
161
162
Lowest two bits of _gc_prev are used for flags.
163
PREV_MASK_COLLECTING is used only while collecting and cleared before GC ends
164
or _PyObject_GC_UNTRACK() is called.
165
166
During a collection, _gc_prev is temporary used for gc_refs, and the gc list
167
is singly linked until _gc_prev is restored.
168
169
gc_refs
170
    At the start of a collection, update_refs() copies the true refcount
171
    to gc_refs, for each object in the generation being collected.
172
    subtract_refs() then adjusts gc_refs so that it equals the number of
173
    times an object is referenced directly from outside the generation
174
    being collected.
175
176
PREV_MASK_COLLECTING
177
    Objects in generation being collected are marked PREV_MASK_COLLECTING in
178
    update_refs().
179
180
181
_gc_next values
182
---------------
183
184
_gc_next takes these values:
185
186
0
187
    The object is not tracked
188
189
!= 0
190
    Pointer to the next object in the GC list.
191
    Additionally, lowest bit is used temporary for
192
    NEXT_MASK_UNREACHABLE flag described below.
193
194
NEXT_MASK_UNREACHABLE
195
    move_unreachable() then moves objects not reachable (whether directly or
196
    indirectly) from outside the generation into an "unreachable" set and
197
    set this flag.
198
199
    Objects that are found to be reachable have gc_refs set to 1.
200
    When this flag is set for the reachable object, the object must be in
201
    "unreachable" set.
202
    The flag is unset and the object is moved back to "reachable" set.
203
204
    move_legacy_finalizers() will remove this flag from "unreachable" set.
205
*/
206
207
/*** list functions ***/
208
209
static inline void
210
gc_list_init(PyGC_Head *list)
211
817k
{
212
    // List header must not have flags.
213
    // We can assign pointer by simple cast.
214
817k
    list->_gc_prev = (uintptr_t)list;
215
817k
    list->_gc_next = (uintptr_t)list;
216
817k
}
217
218
static inline int
219
gc_list_is_empty(PyGC_Head *list)
220
14.1M
{
221
14.1M
    return (list->_gc_next == (uintptr_t)list);
222
14.1M
}
223
224
/* Append `node` to `list`. */
225
static inline void
226
gc_list_append(PyGC_Head *node, PyGC_Head *list)
227
42.3M
{
228
42.3M
    PyGC_Head *last = (PyGC_Head *)list->_gc_prev;
229
230
    // last <-> node
231
42.3M
    _PyGCHead_SET_PREV(node, last);
232
42.3M
    _PyGCHead_SET_NEXT(last, node);
233
234
    // node <-> list
235
42.3M
    _PyGCHead_SET_NEXT(node, list);
236
42.3M
    list->_gc_prev = (uintptr_t)node;
237
42.3M
}
238
239
/* Remove `node` from the gc list it's currently in. */
240
static inline void
241
gc_list_remove(PyGC_Head *node)
242
0
{
243
0
    PyGC_Head *prev = GC_PREV(node);
244
0
    PyGC_Head *next = GC_NEXT(node);
245
246
0
    _PyGCHead_SET_NEXT(prev, next);
247
0
    _PyGCHead_SET_PREV(next, prev);
248
249
0
    node->_gc_next = 0; /* object is not currently tracked */
250
0
}
251
252
/* Move `node` from the gc list it's currently in (which is not explicitly
253
 * named here) to the end of `list`.  This is semantically the same as
254
 * gc_list_remove(node) followed by gc_list_append(node, list).
255
 */
256
static void
257
gc_list_move(PyGC_Head *node, PyGC_Head *list)
258
12.8M
{
259
    /* Unlink from current list. */
260
12.8M
    PyGC_Head *from_prev = GC_PREV(node);
261
12.8M
    PyGC_Head *from_next = GC_NEXT(node);
262
12.8M
    _PyGCHead_SET_NEXT(from_prev, from_next);
263
12.8M
    _PyGCHead_SET_PREV(from_next, from_prev);
264
265
    /* Relink at end of new list. */
266
    // list must not have flags.  So we can skip macros.
267
12.8M
    PyGC_Head *to_prev = (PyGC_Head*)list->_gc_prev;
268
12.8M
    _PyGCHead_SET_PREV(node, to_prev);
269
12.8M
    _PyGCHead_SET_NEXT(to_prev, node);
270
12.8M
    list->_gc_prev = (uintptr_t)node;
271
12.8M
    _PyGCHead_SET_NEXT(node, list);
272
12.8M
}
273
274
/* append list `from` onto list `to`; `from` becomes an empty list */
275
static void
276
gc_list_merge(PyGC_Head *from, PyGC_Head *to)
277
367k
{
278
367k
    assert(from != to);
279
367k
    if (!gc_list_is_empty(from)) {
280
98.3k
        PyGC_Head *to_tail = GC_PREV(to);
281
98.3k
        PyGC_Head *from_head = GC_NEXT(from);
282
98.3k
        PyGC_Head *from_tail = GC_PREV(from);
283
98.3k
        assert(from_head != from);
284
98.3k
        assert(from_tail != from);
285
286
98.3k
        _PyGCHead_SET_NEXT(to_tail, from_head);
287
98.3k
        _PyGCHead_SET_PREV(from_head, to_tail);
288
289
98.3k
        _PyGCHead_SET_NEXT(from_tail, to);
290
98.3k
        _PyGCHead_SET_PREV(to, from_tail);
291
98.3k
    }
292
367k
    gc_list_init(from);
293
367k
}
294
295
static Py_ssize_t
296
gc_list_size(PyGC_Head *list)
297
98.0k
{
298
98.0k
    PyGC_Head *gc;
299
98.0k
    Py_ssize_t n = 0;
300
200M
    for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
301
200M
        n++;
302
200M
    }
303
98.0k
    return n;
304
98.0k
}
305
306
/* Walk the list and mark all objects as non-collecting */
307
static inline void
308
gc_list_clear_collecting(PyGC_Head *collectable)
309
89.9k
{
310
89.9k
    PyGC_Head *gc;
311
9.38M
    for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) {
312
9.29M
        gc_clear_collecting(gc);
313
9.29M
    }
314
89.9k
}
315
316
/* Append objects in a GC list to a Python list.
317
 * Return 0 if all OK, < 0 if error (out of memory for list)
318
 */
319
static int
320
append_objects(PyObject *py_list, PyGC_Head *gc_list)
321
0
{
322
0
    PyGC_Head *gc;
323
0
    for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
324
0
        PyObject *op = FROM_GC(gc);
325
0
        if (op != py_list) {
326
0
            if (PyList_Append(py_list, op)) {
327
0
                return -1; /* exception */
328
0
            }
329
0
        }
330
0
    }
331
0
    return 0;
332
0
}
333
334
// Constants for validate_list's flags argument.
335
enum flagstates {collecting_clear_unreachable_clear,
336
                 collecting_clear_unreachable_set,
337
                 collecting_set_unreachable_clear,
338
                 collecting_set_unreachable_set};
339
340
#ifdef GC_DEBUG
341
// validate_list checks list consistency.  And it works as document
342
// describing when flags are expected to be set / unset.
343
// `head` must be a doubly-linked gc list, although it's fine (expected!) if
344
// the prev and next pointers are "polluted" with flags.
345
// What's checked:
346
// - The `head` pointers are not polluted.
347
// - The objects' PREV_MASK_COLLECTING and NEXT_MASK_UNREACHABLE flags are all
348
//   `set or clear, as specified by the 'flags' argument.
349
// - The prev and next pointers are mutually consistent.
350
static void
351
validate_list(PyGC_Head *head, enum flagstates flags)
352
{
353
    assert((head->_gc_prev & PREV_MASK_COLLECTING) == 0);
354
    assert((head->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
355
    uintptr_t prev_value = 0, next_value = 0;
356
    switch (flags) {
357
        case collecting_clear_unreachable_clear:
358
            break;
359
        case collecting_set_unreachable_clear:
360
            prev_value = PREV_MASK_COLLECTING;
361
            break;
362
        case collecting_clear_unreachable_set:
363
            next_value = NEXT_MASK_UNREACHABLE;
364
            break;
365
        case collecting_set_unreachable_set:
366
            prev_value = PREV_MASK_COLLECTING;
367
            next_value = NEXT_MASK_UNREACHABLE;
368
            break;
369
        default:
370
            assert(! "bad internal flags argument");
371
    }
372
    PyGC_Head *prev = head;
373
    PyGC_Head *gc = GC_NEXT(head);
374
    while (gc != head) {
375
        PyGC_Head *trueprev = GC_PREV(gc);
376
        PyGC_Head *truenext = (PyGC_Head *)(gc->_gc_next  & ~NEXT_MASK_UNREACHABLE);
377
        assert(truenext != NULL);
378
        assert(trueprev == prev);
379
        assert((gc->_gc_prev & PREV_MASK_COLLECTING) == prev_value);
380
        assert((gc->_gc_next & NEXT_MASK_UNREACHABLE) == next_value);
381
        prev = gc;
382
        gc = truenext;
383
    }
384
    assert(prev == GC_PREV(head));
385
}
386
#else
387
1.16M
#define validate_list(x, y) do{}while(0)
388
#endif
389
390
/*** end of list stuff ***/
391
392
393
/* Set all gc_refs = ob_refcnt.  After this, gc_refs is > 0 and
394
 * PREV_MASK_COLLECTING bit is set for all objects in containers.
395
 */
396
static Py_ssize_t
397
update_refs(PyGC_Head *containers)
398
179k
{
399
179k
    PyGC_Head *next;
400
179k
    PyGC_Head *gc = GC_NEXT(containers);
401
179k
    Py_ssize_t candidates = 0;
402
403
367M
    while (gc != containers) {
404
366M
        next = GC_NEXT(gc);
405
366M
        PyObject *op = FROM_GC(gc);
406
366M
        if (_Py_IsImmortal(op)) {
407
0
            assert(!_Py_IsStaticImmortal(op));
408
0
            _PyObject_GC_UNTRACK(op);
409
0
            gc = next;
410
0
            continue;
411
0
        }
412
366M
        gc_reset_refs(gc, Py_REFCNT(op));
413
        /* Python's cyclic gc should never see an incoming refcount
414
         * of 0:  if something decref'ed to 0, it should have been
415
         * deallocated immediately at that time.
416
         * Possible cause (if the assert triggers):  a tp_dealloc
417
         * routine left a gc-aware object tracked during its teardown
418
         * phase, and did something-- or allowed something to happen --
419
         * that called back into Python.  gc can trigger then, and may
420
         * see the still-tracked dying object.  Before this assert
421
         * was added, such mistakes went on to allow gc to try to
422
         * delete the object again.  In a debug build, that caused
423
         * a mysterious segfault, when _Py_ForgetReference tried
424
         * to remove the object from the doubly-linked list of all
425
         * objects a second time.  In a release build, an actual
426
         * double deallocation occurred, which leads to corruption
427
         * of the allocator's internal bookkeeping pointers.  That's
428
         * so serious that maybe this should be a release-build
429
         * check instead of an assert?
430
         */
431
366M
        _PyObject_ASSERT(op, gc_get_refs(gc) != 0);
432
366M
        gc = next;
433
366M
        candidates++;
434
366M
    }
435
179k
    return candidates;
436
179k
}
437
438
/* A traversal callback for subtract_refs. */
439
static int
440
visit_decref(PyObject *op, void *parent)
441
1.19G
{
442
1.19G
    OBJECT_STAT_INC(object_visits);
443
1.19G
    _PyObject_ASSERT(_PyObject_CAST(parent), !_PyObject_IsFreed(op));
444
445
1.19G
    if (_PyObject_IS_GC(op)) {
446
611M
        PyGC_Head *gc = AS_GC(op);
447
        /* We're only interested in gc_refs for objects in the
448
         * generation being collected, which can be recognized
449
         * because only they have positive gc_refs.
450
         */
451
611M
        if (gc_is_collecting(gc)) {
452
312M
            gc_decref(gc);
453
312M
        }
454
611M
    }
455
1.19G
    return 0;
456
1.19G
}
457
458
int
459
_PyGC_VisitStackRef(_PyStackRef *ref, visitproc visit, void *arg)
460
4.50M
{
461
    // This is a bit tricky! We want to ignore stackrefs with embedded
462
    // refcounts when computing the incoming references, but otherwise treat
463
    // them like normal.
464
4.50M
    assert(!PyStackRef_IsTaggedInt(*ref));
465
4.50M
    if (!PyStackRef_RefcountOnObject(*ref) && (visit == visit_decref)) {
466
236k
        return 0;
467
236k
    }
468
4.27M
    Py_VISIT(PyStackRef_AsPyObjectBorrow(*ref));
469
4.27M
    return 0;
470
4.27M
}
471
472
int
473
_PyGC_VisitFrameStack(_PyInterpreterFrame *frame, visitproc visit, void *arg)
474
789k
{
475
789k
    _PyStackRef *ref = _PyFrame_GetLocalsArray(frame);
476
    /* locals and stack */
477
5.10M
    for (; ref < frame->stackpointer; ref++) {
478
4.31M
        if (!PyStackRef_IsTaggedInt(*ref)) {
479
4.31M
            _Py_VISIT_STACKREF(*ref);
480
4.31M
        }
481
4.31M
    }
482
789k
    return 0;
483
789k
}
484
485
/* Subtract internal references from gc_refs.  After this, gc_refs is >= 0
486
 * for all objects in containers. The ones with gc_refs > 0 are directly
487
 * reachable from outside containers, and so can't be collected.
488
 */
489
static void
490
subtract_refs(PyGC_Head *containers)
491
179k
{
492
179k
    traverseproc traverse;
493
179k
    PyGC_Head *gc = GC_NEXT(containers);
494
367M
    for (; gc != containers; gc = GC_NEXT(gc)) {
495
366M
        PyObject *op = FROM_GC(gc);
496
366M
        traverse = Py_TYPE(op)->tp_traverse;
497
366M
        (void) traverse(op,
498
366M
                        visit_decref,
499
366M
                        op);
500
366M
    }
501
179k
}
502
503
/* A traversal callback for move_unreachable. */
504
static int
505
visit_reachable(PyObject *op, void *arg)
506
1.12G
{
507
1.12G
    PyGC_Head *reachable = arg;
508
1.12G
    OBJECT_STAT_INC(object_visits);
509
1.12G
    if (!_PyObject_IS_GC(op)) {
510
543M
        return 0;
511
543M
    }
512
513
577M
    PyGC_Head *gc = AS_GC(op);
514
577M
    const Py_ssize_t gc_refs = gc_get_refs(gc);
515
516
    // Ignore objects in other generation.
517
    // This also skips objects "to the left" of the current position in
518
    // move_unreachable's scan of the 'young' list - they've already been
519
    // traversed, and no longer have the PREV_MASK_COLLECTING flag.
520
577M
    if (! gc_is_collecting(gc)) {
521
361M
        return 0;
522
361M
    }
523
    // It would be a logic error elsewhere if the collecting flag were set on
524
    // an untracked object.
525
216M
    _PyObject_ASSERT(op, gc->_gc_next != 0);
526
527
216M
    if (gc->_gc_next & NEXT_MASK_UNREACHABLE) {
528
        /* This had gc_refs = 0 when move_unreachable got
529
         * to it, but turns out it's reachable after all.
530
         * Move it back to move_unreachable's 'young' list,
531
         * and move_unreachable will eventually get to it
532
         * again.
533
         */
534
        // Manually unlink gc from unreachable list because the list functions
535
        // don't work right in the presence of NEXT_MASK_UNREACHABLE flags.
536
42.3M
        PyGC_Head *prev = GC_PREV(gc);
537
42.3M
        PyGC_Head *next = (PyGC_Head*)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE);
538
42.3M
        _PyObject_ASSERT(FROM_GC(prev),
539
42.3M
                         prev->_gc_next & NEXT_MASK_UNREACHABLE);
540
42.3M
        _PyObject_ASSERT(FROM_GC(next),
541
42.3M
                         next->_gc_next & NEXT_MASK_UNREACHABLE);
542
42.3M
        prev->_gc_next = gc->_gc_next;  // copy NEXT_MASK_UNREACHABLE
543
42.3M
        _PyGCHead_SET_PREV(next, prev);
544
545
42.3M
        gc_list_append(gc, reachable);
546
42.3M
        gc_set_refs(gc, 1);
547
42.3M
    }
548
174M
    else if (gc_refs == 0) {
549
        /* This is in move_unreachable's 'young' list, but
550
         * the traversal hasn't yet gotten to it.  All
551
         * we need to do is tell move_unreachable that it's
552
         * reachable.
553
         */
554
169M
        gc_set_refs(gc, 1);
555
169M
    }
556
    /* Else there's nothing to do.
557
     * If gc_refs > 0, it must be in move_unreachable's 'young'
558
     * list, and move_unreachable will eventually get to it.
559
     */
560
4.66M
    else {
561
4.66M
        _PyObject_ASSERT_WITH_MSG(op, gc_refs > 0, "refcount is too small");
562
4.66M
    }
563
216M
    return 0;
564
577M
}
565
566
/* Move the unreachable objects from young to unreachable.  After this,
567
 * all objects in young don't have PREV_MASK_COLLECTING flag and
568
 * unreachable have the flag.
569
 * All objects in young after this are directly or indirectly reachable
570
 * from outside the original young; and all objects in unreachable are
571
 * not.
572
 *
573
 * This function restores _gc_prev pointer.  young and unreachable are
574
 * doubly linked list after this function.
575
 * But _gc_next in unreachable list has NEXT_MASK_UNREACHABLE flag.
576
 * So we can not gc_list_* functions for unreachable until we remove the flag.
577
 */
578
static void
579
move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
580
179k
{
581
    // previous elem in the young list, used for restore gc_prev.
582
179k
    PyGC_Head *prev = young;
583
179k
    PyGC_Head *gc = GC_NEXT(young);
584
585
    /* Invariants:  all objects "to the left" of us in young are reachable
586
     * (directly or indirectly) from outside the young list as it was at entry.
587
     *
588
     * All other objects from the original young "to the left" of us are in
589
     * unreachable now, and have NEXT_MASK_UNREACHABLE.  All objects to the
590
     * left of us in 'young' now have been scanned, and no objects here
591
     * or to the right have been scanned yet.
592
     */
593
594
409M
    while (gc != young) {
595
409M
        if (gc_get_refs(gc)) {
596
            /* gc is definitely reachable from outside the
597
             * original 'young'.  Mark it as such, and traverse
598
             * its pointers to find any other objects that may
599
             * be directly reachable from it.  Note that the
600
             * call to tp_traverse may append objects to young,
601
             * so we have to wait until it returns to determine
602
             * the next object to visit.
603
             */
604
348M
            PyObject *op = FROM_GC(gc);
605
348M
            traverseproc traverse = Py_TYPE(op)->tp_traverse;
606
348M
            _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(gc) > 0,
607
348M
                                      "refcount is too small");
608
            // NOTE: visit_reachable may change gc->_gc_next when
609
            // young->_gc_prev == gc.  Don't do gc = GC_NEXT(gc) before!
610
348M
            (void) traverse(op,
611
348M
                    visit_reachable,
612
348M
                    (void *)young);
613
            // relink gc_prev to prev element.
614
348M
            _PyGCHead_SET_PREV(gc, prev);
615
            // gc is not COLLECTING state after here.
616
348M
            gc_clear_collecting(gc);
617
348M
            prev = gc;
618
348M
        }
619
60.9M
        else {
620
            /* This *may* be unreachable.  To make progress,
621
             * assume it is.  gc isn't directly reachable from
622
             * any object we've already traversed, but may be
623
             * reachable from an object we haven't gotten to yet.
624
             * visit_reachable will eventually move gc back into
625
             * young if that's so, and we'll see it again.
626
             */
627
            // Move gc to unreachable.
628
            // No need to gc->next->prev = prev because it is single linked.
629
60.9M
            prev->_gc_next = gc->_gc_next;
630
631
            // We can't use gc_list_append() here because we use
632
            // NEXT_MASK_UNREACHABLE here.
633
60.9M
            PyGC_Head *last = GC_PREV(unreachable);
634
            // NOTE: Since all objects in unreachable set has
635
            // NEXT_MASK_UNREACHABLE flag, we set it unconditionally.
636
            // But this may pollute the unreachable list head's 'next' pointer
637
            // too. That's semantically senseless but expedient here - the
638
            // damage is repaired when this function ends.
639
60.9M
            last->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)gc);
640
60.9M
            _PyGCHead_SET_PREV(gc, last);
641
60.9M
            gc->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)unreachable);
642
60.9M
            unreachable->_gc_prev = (uintptr_t)gc;
643
60.9M
        }
644
409M
        gc = (PyGC_Head*)prev->_gc_next;
645
409M
    }
646
    // young->_gc_prev must be last element remained in the list.
647
179k
    young->_gc_prev = (uintptr_t)prev;
648
    // don't let the pollution of the list head's next pointer leak
649
179k
    unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE;
650
179k
}
651
652
/* In theory, all tuples should be younger than the
653
* objects they refer to, as tuples are immortal.
654
* Therefore, untracking tuples in oldest-first order in the
655
* young generation before promoting them should have tracked
656
* all the tuples that can be untracked.
657
*
658
* Unfortunately, the C API allows tuples to be created
659
* and then filled in. So this won't untrack all tuples
660
* that can be untracked. It should untrack most of them
661
* and is much faster than a more complex approach that
662
* would untrack all relevant tuples.
663
*/
664
static void
665
untrack_tuples(PyGC_Head *head)
666
89.9k
{
667
89.9k
    PyGC_Head *gc = GC_NEXT(head);
668
348M
    while (gc != head) {
669
348M
        PyObject *op = FROM_GC(gc);
670
348M
        PyGC_Head *next = GC_NEXT(gc);
671
348M
        if (PyTuple_CheckExact(op)) {
672
179M
            _PyTuple_MaybeUntrack(op);
673
179M
        }
674
348M
        gc = next;
675
348M
    }
676
89.9k
}
677
678
/* Return true if object has a pre-PEP 442 finalization method. */
679
static int
680
has_legacy_finalizer(PyObject *op)
681
9.29M
{
682
9.29M
    return Py_TYPE(op)->tp_del != NULL;
683
9.29M
}
684
685
/* Move the objects in unreachable with tp_del slots into `finalizers`.
686
 *
687
 * This function also removes NEXT_MASK_UNREACHABLE flag
688
 * from _gc_next in unreachable.
689
 */
690
static void
691
move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
692
89.9k
{
693
89.9k
    PyGC_Head *gc, *next;
694
89.9k
    _PyObject_ASSERT(
695
89.9k
        FROM_GC(unreachable),
696
89.9k
        (unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
697
698
    /* March over unreachable.  Move objects with finalizers into
699
     * `finalizers`.
700
     */
701
9.38M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
702
9.29M
        PyObject *op = FROM_GC(gc);
703
704
9.29M
        _PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE);
705
9.29M
        gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
706
9.29M
        next = (PyGC_Head*)gc->_gc_next;
707
708
9.29M
        if (has_legacy_finalizer(op)) {
709
0
            gc_clear_collecting(gc);
710
0
            gc_list_move(gc, finalizers);
711
0
        }
712
9.29M
    }
713
89.9k
}
714
715
static inline void
716
clear_unreachable_mask(PyGC_Head *unreachable)
717
89.9k
{
718
    /* Check that the list head does not have the unreachable bit set */
719
89.9k
    _PyObject_ASSERT(
720
89.9k
        FROM_GC(unreachable),
721
89.9k
        ((uintptr_t)unreachable & NEXT_MASK_UNREACHABLE) == 0);
722
89.9k
    _PyObject_ASSERT(
723
89.9k
        FROM_GC(unreachable),
724
89.9k
        (unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
725
726
89.9k
    PyGC_Head *gc, *next;
727
9.38M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
728
9.29M
        _PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE);
729
9.29M
        gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
730
9.29M
        next = (PyGC_Head*)gc->_gc_next;
731
9.29M
    }
732
89.9k
    validate_list(unreachable, collecting_set_unreachable_clear);
733
89.9k
}
734
735
/* A traversal callback for move_legacy_finalizer_reachable. */
736
static int
737
visit_move(PyObject *op, void *arg)
738
0
{
739
0
    PyGC_Head *tolist = arg;
740
0
    OBJECT_STAT_INC(object_visits);
741
0
    if (_PyObject_IS_GC(op)) {
742
0
        PyGC_Head *gc = AS_GC(op);
743
0
        if (gc_is_collecting(gc)) {
744
0
            gc_list_move(gc, tolist);
745
0
            gc_clear_collecting(gc);
746
0
        }
747
0
    }
748
0
    return 0;
749
0
}
750
751
/* Move objects that are reachable from finalizers, from the unreachable set
752
 * into finalizers set.
753
 */
754
static void
755
move_legacy_finalizer_reachable(PyGC_Head *finalizers)
756
89.9k
{
757
89.9k
    traverseproc traverse;
758
89.9k
    PyGC_Head *gc = GC_NEXT(finalizers);
759
89.9k
    for (; gc != finalizers; gc = GC_NEXT(gc)) {
760
        /* Note that the finalizers list may grow during this. */
761
0
        traverse = Py_TYPE(FROM_GC(gc))->tp_traverse;
762
0
        (void) traverse(FROM_GC(gc),
763
0
                        visit_move,
764
0
                        (void *)finalizers);
765
0
    }
766
89.9k
}
767
768
/* Handle weakref callbacks.  Note that it's possible for such weakrefs to be
769
 * outside the unreachable set -- indeed, those are precisely the weakrefs
770
 * whose callbacks must be invoked.  See gc_weakref.txt for overview & some
771
 * details.
772
 *
773
 * The clearing of weakrefs is suble and must be done carefully, as there was
774
 * previous bugs related to this.  First, weakrefs to the unreachable set of
775
 * objects must be cleared before we start calling `tp_clear`.  If we don't,
776
 * those weakrefs can reveal unreachable objects to Python-level code and that
777
 * is not safe.  Many objects are not usable after `tp_clear` has been called
778
 * and could even cause crashes if accessed (see bpo-38006 and gh-91636 as
779
 * example bugs).
780
 *
781
 * Weakrefs with callbacks always need to be cleared before executing the
782
 * callback.  That's because sometimes a callback will call the ref object,
783
 * to check if the reference is actually dead (KeyedRef does this, for
784
 * example).  We want to indicate that it is dead, even though it is possible
785
 * a finalizer might resurrect it.  Clearing also prevents the callback from
786
 * executing more than once.
787
 *
788
 * Since Python 2.3, all weakrefs to cyclic garbage have been cleared *before*
789
 * calling finalizers.  However, since tp_subclasses started being necessary
790
 * to invalidate caches (e.g. by PyType_Modified), that clearing has created
791
 * a bug.  If the weakref to the subclass is cleared before a finalizer is
792
 * called, the cache may not be correctly invalidated.  That can lead to
793
 * segfaults since the caches can refer to deallocated objects (GH-135552
794
 * is an example).  Now, we delay the clear of weakrefs without callbacks
795
 * until *after* finalizers have been executed.  That means weakrefs without
796
 * callbacks are still usable while finalizers are being executed.
797
 *
798
 * The weakrefs that are inside the unreachable set must also be cleared.
799
 * The reason this is required is not immediately obvious.  If the weakref
800
 * refers to an object outside of the unreachable set, that object might go
801
 * away when we start clearing objects.  Normally, the object should also be
802
 * part of the unreachable set but that's not true in the case of incomplete
803
 * or missing `tp_traverse` methods.  When that object goes away, the callback
804
 * for weakref can be executed and that could reveal unreachable objects to
805
 * Python-level code.  See bpo-38006 as an example bug.
806
 */
807
static int
808
handle_weakref_callbacks(PyGC_Head *unreachable, PyGC_Head *old)
809
89.9k
{
810
89.9k
    PyGC_Head *gc;
811
89.9k
    PyGC_Head wrcb_to_call;     /* weakrefs with callbacks to call */
812
89.9k
    PyGC_Head *next;
813
89.9k
    int num_freed = 0;
814
815
89.9k
    gc_list_init(&wrcb_to_call);
816
817
    /* Find all weakrefs with callbacks and move into `wrcb_to_call` if the
818
     * callback needs to be invoked. We make another pass over wrcb_to_call,
819
     * invoking callbacks, after this pass completes.
820
     */
821
9.38M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
822
9.29M
        PyWeakReference **wrlist;
823
824
9.29M
        PyObject *op = FROM_GC(gc);
825
9.29M
        next = GC_NEXT(gc);
826
827
9.29M
        if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) {
828
6.49M
            continue;
829
6.49M
        }
830
831
        /* It supports weakrefs.  Does it have any?
832
         *
833
         * This is never triggered for static types so we can avoid the
834
         * (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR().
835
         */
836
2.79M
        wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op);
837
838
        /* `op` may have some weakrefs.  March over the list and move the
839
         * weakrefs with callbacks that must be called into wrcb_to_call.
840
         */
841
2.79M
        PyWeakReference *next_wr;
842
3.06M
        for (PyWeakReference *wr = *wrlist; wr != NULL; wr = next_wr) {
843
            // Get the next list element to get iterator progress if we omit
844
            // clearing of the weakref (because _PyWeakref_ClearRef changes
845
            // next pointer in the wrlist).
846
267k
            next_wr = wr->wr_next;
847
848
267k
            if (wr->wr_callback == NULL) {
849
                /* no callback */
850
227k
                continue;
851
227k
            }
852
853
            // Clear the weakref.  See the comments above this function for
854
            // reasons why we need to clear weakrefs that have callbacks.
855
            // Note that _PyWeakref_ClearRef clears the weakref but leaves the
856
            // callback pointer intact.  Obscure: it also changes *wrlist.
857
39.9k
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
858
39.9k
            _PyWeakref_ClearRef(wr);
859
39.9k
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
860
861
            /* Headache time.  `op` is going away, and is weakly referenced by
862
             * `wr`, which has a callback.  Should the callback be invoked?  If wr
863
             * is also trash, no:
864
             *
865
             * 1. There's no need to call it.  The object and the weakref are
866
             *    both going away, so it's legitimate to pretend the weakref is
867
             *    going away first.  The user has to ensure a weakref outlives its
868
             *    referent if they want a guarantee that the wr callback will get
869
             *    invoked.
870
             *
871
             * 2. It may be catastrophic to call it.  If the callback is also in
872
             *    cyclic trash (CT), then although the CT is unreachable from
873
             *    outside the current generation, CT may be reachable from the
874
             *    callback.  Then the callback could resurrect insane objects.
875
             *
876
             * Since the callback is never needed and may be unsafe in this
877
             * case, wr is simply left in the unreachable set.  Note that
878
             * clear_weakrefs() will ensure its callback will not trigger
879
             * inside delete_garbage().
880
             *
881
             * OTOH, if wr isn't part of CT, we should invoke the callback:  the
882
             * weakref outlived the trash.  Note that since wr isn't CT in this
883
             * case, its callback can't be CT either -- wr acted as an external
884
             * root to this generation, and therefore its callback did too.  So
885
             * nothing in CT is reachable from the callback either, so it's hard
886
             * to imagine how calling it later could create a problem for us.  wr
887
             * is moved to wrcb_to_call in this case.
888
             */
889
39.9k
            if (gc_is_collecting(AS_GC((PyObject *)wr))) {
890
0
                continue;
891
0
            }
892
893
            /* Create a new reference so that wr can't go away
894
             * before we can process it again.
895
             */
896
39.9k
            Py_INCREF(wr);
897
898
            /* Move wr to wrcb_to_call, for the next pass. */
899
39.9k
            PyGC_Head *wrasgc = AS_GC((PyObject *)wr);
900
            // wrasgc is reachable, but next isn't, so they can't be the same
901
39.9k
            _PyObject_ASSERT((PyObject *)wr, wrasgc != next);
902
39.9k
            gc_list_move(wrasgc, &wrcb_to_call);
903
39.9k
        }
904
2.79M
    }
905
906
    /* Invoke the callbacks we decided to honor.  It's safe to invoke them
907
     * because they can't reference unreachable objects.
908
     */
909
129k
    while (! gc_list_is_empty(&wrcb_to_call)) {
910
39.9k
        PyObject *temp;
911
39.9k
        PyObject *callback;
912
913
39.9k
        gc = (PyGC_Head*)wrcb_to_call._gc_next;
914
39.9k
        PyObject *op = FROM_GC(gc);
915
39.9k
        _PyObject_ASSERT(op, PyWeakref_Check(op));
916
39.9k
        PyWeakReference *wr = (PyWeakReference *)op;
917
39.9k
        callback = wr->wr_callback;
918
39.9k
        _PyObject_ASSERT(op, callback != NULL);
919
920
        /* copy-paste of weakrefobject.c's handle_callback() */
921
39.9k
        temp = PyObject_CallOneArg(callback, (PyObject *)wr);
922
39.9k
        if (temp == NULL) {
923
0
            PyErr_FormatUnraisable("Exception ignored on "
924
0
                                   "calling weakref callback %R", callback);
925
0
        }
926
39.9k
        else {
927
39.9k
            Py_DECREF(temp);
928
39.9k
        }
929
930
        /* Give up the reference we created in the first pass.  When
931
         * op's refcount hits 0 (which it may or may not do right now),
932
         * op's tp_dealloc will decref op->wr_callback too.  Note
933
         * that the refcount probably will hit 0 now, and because this
934
         * weakref was reachable to begin with, gc didn't already
935
         * add it to its count of freed objects.  Example:  a reachable
936
         * weak value dict maps some key to this reachable weakref.
937
         * The callback removes this key->weakref mapping from the
938
         * dict, leaving no other references to the weakref (excepting
939
         * ours).
940
         */
941
39.9k
        Py_DECREF(op);
942
39.9k
        if (wrcb_to_call._gc_next == (uintptr_t)gc) {
943
            /* object is still alive -- move it */
944
0
            gc_list_move(gc, old);
945
0
        }
946
39.9k
        else {
947
39.9k
            ++num_freed;
948
39.9k
        }
949
39.9k
    }
950
951
89.9k
    return num_freed;
952
89.9k
}
953
954
/* Clear all weakrefs to unreachable objects.  When this returns, no object in
955
 * `unreachable` is weakly referenced anymore.  See the comments above
956
 * handle_weakref_callbacks() for why these weakrefs need to be cleared.
957
 */
958
static void
959
clear_weakrefs(PyGC_Head *unreachable)
960
89.9k
{
961
89.9k
    PyGC_Head *gc;
962
89.9k
    PyGC_Head *next;
963
964
9.38M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
965
9.29M
        PyWeakReference **wrlist;
966
967
9.29M
        PyObject *op = FROM_GC(gc);
968
9.29M
        next = GC_NEXT(gc);
969
970
9.29M
        if (PyWeakref_Check(op)) {
971
            /* A weakref inside the unreachable set is always cleared. See
972
             * the comments above handle_weakref_callbacks() for why these
973
             * must be cleared.
974
             */
975
8
            _PyWeakref_ClearRef((PyWeakReference *)op);
976
8
        }
977
978
9.29M
        if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) {
979
6.49M
            continue;
980
6.49M
        }
981
982
        /* It supports weakrefs.  Does it have any?
983
         *
984
         * This is never triggered for static types so we can avoid the
985
         * (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR().
986
         */
987
2.79M
        wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op);
988
989
        /* `op` may have some weakrefs.  March over the list, clear
990
         * all the weakrefs.
991
         */
992
3.02M
        for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) {
993
            /* _PyWeakref_ClearRef clears the weakref but leaves
994
             * the callback pointer intact.  Obscure:  it also
995
             * changes *wrlist.
996
             */
997
227k
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
998
227k
            _PyWeakref_ClearRef(wr);
999
227k
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
1000
227k
        }
1001
2.79M
    }
1002
89.9k
}
1003
1004
static void
1005
debug_cycle(const char *msg, PyObject *op)
1006
0
{
1007
0
    PySys_FormatStderr("gc: %s <%s %p>\n",
1008
0
                       msg, Py_TYPE(op)->tp_name, op);
1009
0
}
1010
1011
/* Handle uncollectable garbage (cycles with tp_del slots, and stuff reachable
1012
 * only from such cycles).
1013
 * If _PyGC_DEBUG_SAVEALL, all objects in finalizers are appended to the module
1014
 * garbage list (a Python list), else only the objects in finalizers with
1015
 * __del__ methods are appended to garbage.  All objects in finalizers are
1016
 * merged into the old list regardless.
1017
 */
1018
static void
1019
handle_legacy_finalizers(PyThreadState *tstate,
1020
                         GCState *gcstate,
1021
                         PyGC_Head *finalizers, PyGC_Head *old)
1022
89.9k
{
1023
89.9k
    assert(!_PyErr_Occurred(tstate));
1024
89.9k
    assert(gcstate->garbage != NULL);
1025
1026
89.9k
    PyGC_Head *gc = GC_NEXT(finalizers);
1027
89.9k
    for (; gc != finalizers; gc = GC_NEXT(gc)) {
1028
0
        PyObject *op = FROM_GC(gc);
1029
1030
0
        if ((gcstate->debug & _PyGC_DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
1031
0
            if (PyList_Append(gcstate->garbage, op) < 0) {
1032
0
                _PyErr_Clear(tstate);
1033
0
                break;
1034
0
            }
1035
0
        }
1036
0
    }
1037
1038
89.9k
    gc_list_merge(finalizers, old);
1039
89.9k
}
1040
1041
/* Run first-time finalizers (if any) on all the objects in collectable.
1042
 * Note that this may remove some (or even all) of the objects from the
1043
 * list, due to refcounts falling to 0.
1044
 */
1045
static void
1046
finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable)
1047
89.9k
{
1048
89.9k
    destructor finalize;
1049
89.9k
    PyGC_Head seen;
1050
1051
    /* While we're going through the loop, `finalize(op)` may cause op, or
1052
     * other objects, to be reclaimed via refcounts falling to zero.  So
1053
     * there's little we can rely on about the structure of the input
1054
     * `collectable` list across iterations.  For safety, we always take the
1055
     * first object in that list and move it to a temporary `seen` list.
1056
     * If objects vanish from the `collectable` and `seen` lists we don't
1057
     * care.
1058
     */
1059
89.9k
    gc_list_init(&seen);
1060
1061
9.38M
    while (!gc_list_is_empty(collectable)) {
1062
9.29M
        PyGC_Head *gc = GC_NEXT(collectable);
1063
9.29M
        PyObject *op = FROM_GC(gc);
1064
9.29M
        gc_list_move(gc, &seen);
1065
9.29M
        if (!_PyGC_FINALIZED(op) &&
1066
9.29M
            (finalize = Py_TYPE(op)->tp_finalize) != NULL)
1067
3
        {
1068
3
            _PyGC_SET_FINALIZED(op);
1069
3
            Py_INCREF(op);
1070
3
            finalize(op);
1071
3
            assert(!_PyErr_Occurred(tstate));
1072
3
            Py_DECREF(op);
1073
3
        }
1074
9.29M
    }
1075
89.9k
    gc_list_merge(&seen, collectable);
1076
89.9k
}
1077
1078
/* Break reference cycles by clearing the containers involved.  This is
1079
 * tricky business as the lists can be changing and we don't know which
1080
 * objects may be freed.  It is possible I screwed something up here.
1081
 */
1082
static void
1083
delete_garbage(PyThreadState *tstate, GCState *gcstate,
1084
               PyGC_Head *collectable, PyGC_Head *old)
1085
89.9k
{
1086
89.9k
    assert(!_PyErr_Occurred(tstate));
1087
1088
4.29M
    while (!gc_list_is_empty(collectable)) {
1089
4.20M
        PyGC_Head *gc = GC_NEXT(collectable);
1090
4.20M
        PyObject *op = FROM_GC(gc);
1091
1092
4.20M
        _PyObject_ASSERT_WITH_MSG(op, Py_REFCNT(op) > 0,
1093
4.20M
                                  "refcount is too small");
1094
1095
4.20M
        if (gcstate->debug & _PyGC_DEBUG_SAVEALL) {
1096
0
            assert(gcstate->garbage != NULL);
1097
0
            if (PyList_Append(gcstate->garbage, op) < 0) {
1098
0
                _PyErr_Clear(tstate);
1099
0
            }
1100
0
        }
1101
4.20M
        else {
1102
4.20M
            inquiry clear;
1103
4.20M
            if ((clear = Py_TYPE(op)->tp_clear) != NULL) {
1104
3.84M
                Py_INCREF(op);
1105
3.84M
                (void) clear(op);
1106
3.84M
                if (_PyErr_Occurred(tstate)) {
1107
0
                    PyErr_FormatUnraisable("Exception ignored in tp_clear of %s",
1108
0
                                           Py_TYPE(op)->tp_name);
1109
0
                }
1110
3.84M
                Py_DECREF(op);
1111
3.84M
            }
1112
4.20M
        }
1113
4.20M
        if (GC_NEXT(collectable) == gc) {
1114
            /* object is still alive, move it, it may die later */
1115
3.51M
            gc_clear_collecting(gc);
1116
3.51M
            gc_list_move(gc, old);
1117
3.51M
        }
1118
4.20M
    }
1119
89.9k
}
1120
1121
1122
// Show stats for objects in each generations
1123
static void
1124
show_stats_each_generations(GCState *gcstate)
1125
0
{
1126
0
    char buf[100];
1127
0
    size_t pos = 0;
1128
1129
0
    for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) {
1130
0
        pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos,
1131
0
                             " %zd",
1132
0
                             gc_list_size(GEN_HEAD(gcstate, i)));
1133
0
    }
1134
1135
0
    PySys_FormatStderr(
1136
0
        "gc: objects in each generation:%s\n"
1137
0
        "gc: objects in permanent generation: %zd\n",
1138
0
        buf, gc_list_size(&gcstate->permanent_generation.head));
1139
0
}
1140
1141
/* Deduce which objects among "base" are unreachable from outside the list
1142
   and move them to 'unreachable'. The process consist in the following steps:
1143
1144
1. Copy all reference counts to a different field (gc_prev is used to hold
1145
   this copy to save memory).
1146
2. Traverse all objects in "base" and visit all referred objects using
1147
   "tp_traverse" and for every visited object, subtract 1 to the reference
1148
   count (the one that we copied in the previous step). After this step, all
1149
   objects that can be reached directly from outside must have strictly positive
1150
   reference count, while all unreachable objects must have a count of exactly 0.
1151
3. Identify all unreachable objects (the ones with 0 reference count) and move
1152
   them to the "unreachable" list. This step also needs to move back to "base" all
1153
   objects that were initially marked as unreachable but are referred transitively
1154
   by the reachable objects (the ones with strictly positive reference count).
1155
1156
Contracts:
1157
1158
    * The "base" has to be a valid list with no mask set.
1159
1160
    * The "unreachable" list must be uninitialized (this function calls
1161
      gc_list_init over 'unreachable').
1162
1163
IMPORTANT: This function leaves 'unreachable' with the NEXT_MASK_UNREACHABLE
1164
flag set but it does not clear it to skip unnecessary iteration. Before the
1165
flag is cleared (for example, by using 'clear_unreachable_mask' function or
1166
by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal
1167
list and we can not use most gc_list_* functions for it. */
1168
static inline Py_ssize_t
1169
179k
deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
1170
179k
    validate_list(base, collecting_clear_unreachable_clear);
1171
    /* Using ob_refcnt and gc_refs, calculate which objects in the
1172
     * container set are reachable from outside the set (i.e., have a
1173
     * refcount greater than 0 when all the references within the
1174
     * set are taken into account).
1175
     */
1176
179k
    Py_ssize_t candidates = update_refs(base);  // gc_prev is used for gc_refs
1177
179k
    subtract_refs(base);
1178
1179
    /* Leave everything reachable from outside base in base, and move
1180
     * everything else (in base) to unreachable.
1181
     *
1182
     * NOTE:  This used to move the reachable objects into a reachable
1183
     * set instead.  But most things usually turn out to be reachable,
1184
     * so it's more efficient to move the unreachable things.  It "sounds slick"
1185
     * to move the unreachable objects, until you think about it - the reason it
1186
     * pays isn't actually obvious.
1187
     *
1188
     * Suppose we create objects A, B, C in that order.  They appear in the young
1189
     * generation in the same order.  If B points to A, and C to B, and C is
1190
     * reachable from outside, then the adjusted refcounts will be 0, 0, and 1
1191
     * respectively.
1192
     *
1193
     * When move_unreachable finds A, A is moved to the unreachable list.  The
1194
     * same for B when it's first encountered.  Then C is traversed, B is moved
1195
     * _back_ to the reachable list.  B is eventually traversed, and then A is
1196
     * moved back to the reachable list.
1197
     *
1198
     * So instead of not moving at all, the reachable objects B and A are moved
1199
     * twice each.  Why is this a win?  A straightforward algorithm to move the
1200
     * reachable objects instead would move A, B, and C once each.
1201
     *
1202
     * The key is that this dance leaves the objects in order C, B, A - it's
1203
     * reversed from the original order.  On all _subsequent_ scans, none of
1204
     * them will move.  Since most objects aren't in cycles, this can save an
1205
     * unbounded number of moves across an unbounded number of later collections.
1206
     * It can cost more only the first time the chain is scanned.
1207
     *
1208
     * Drawback:  move_unreachable is also used to find out what's still trash
1209
     * after finalizers may resurrect objects.  In _that_ case most unreachable
1210
     * objects will remain unreachable, so it would be more efficient to move
1211
     * the reachable objects instead.  But this is a one-time cost, probably not
1212
     * worth complicating the code to speed just a little.
1213
     */
1214
179k
    gc_list_init(unreachable);
1215
179k
    move_unreachable(base, unreachable);  // gc_prev is pointer again
1216
179k
    validate_list(base, collecting_clear_unreachable_clear);
1217
179k
    validate_list(unreachable, collecting_set_unreachable_set);
1218
179k
    return candidates;
1219
179k
}
1220
1221
/* Handle objects that may have resurrected after a call to 'finalize_garbage', moving
1222
   them to 'old_generation' and placing the rest on 'still_unreachable'.
1223
1224
   Contracts:
1225
       * After this function 'unreachable' must not be used anymore and 'still_unreachable'
1226
         will contain the objects that did not resurrect.
1227
1228
       * The "still_unreachable" list must be uninitialized (this function calls
1229
         gc_list_init over 'still_unreachable').
1230
1231
IMPORTANT: After a call to this function, the 'still_unreachable' set will have the
1232
PREV_MARK_COLLECTING set, but the objects in this set are going to be removed so
1233
we can skip the expense of clearing the flag to avoid extra iteration. */
1234
static inline void
1235
handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable,
1236
                           PyGC_Head *old_generation)
1237
89.9k
{
1238
    // Remove the PREV_MASK_COLLECTING from unreachable
1239
    // to prepare it for a new call to 'deduce_unreachable'
1240
89.9k
    gc_list_clear_collecting(unreachable);
1241
1242
    // After the call to deduce_unreachable, the 'still_unreachable' set will
1243
    // have the PREV_MARK_COLLECTING set, but the objects are going to be
1244
    // removed so we can skip the expense of clearing the flag.
1245
89.9k
    PyGC_Head* resurrected = unreachable;
1246
89.9k
    deduce_unreachable(resurrected, still_unreachable);
1247
89.9k
    clear_unreachable_mask(still_unreachable);
1248
1249
    // Move the resurrected objects to the old generation for future collection.
1250
89.9k
    gc_list_merge(resurrected, old_generation);
1251
89.9k
}
1252
1253
1254
/* Invoke progress callbacks to notify clients that garbage collection
1255
 * is starting or stopping
1256
 */
1257
static void
1258
invoke_gc_callback(PyThreadState *tstate, const char *phase,
1259
                   int generation, struct gc_generation_stats *stats)
1260
179k
{
1261
179k
    assert(!_PyErr_Occurred(tstate));
1262
1263
    /* we may get called very early */
1264
179k
    GCState *gcstate = &tstate->interp->gc;
1265
179k
    if (gcstate->callbacks == NULL) {
1266
0
        return;
1267
0
    }
1268
1269
    /* The local variable cannot be rebound, check it for sanity */
1270
179k
    assert(PyList_CheckExact(gcstate->callbacks));
1271
179k
    PyObject *info = NULL;
1272
179k
    if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
1273
5.96k
        info = Py_BuildValue("{sisnsnsnsd}",
1274
5.96k
            "generation", generation,
1275
5.96k
            "collected", stats->collected,
1276
5.96k
            "uncollectable", stats->uncollectable,
1277
5.96k
            "candidates", stats->candidates,
1278
5.96k
            "duration", stats->duration);
1279
5.96k
        if (info == NULL) {
1280
0
            PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks");
1281
0
            return;
1282
0
        }
1283
5.96k
    }
1284
1285
179k
    PyObject *phase_obj = PyUnicode_FromString(phase);
1286
179k
    if (phase_obj == NULL) {
1287
0
        Py_XDECREF(info);
1288
0
        PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks");
1289
0
        return;
1290
0
    }
1291
1292
179k
    PyObject *stack[] = {phase_obj, info};
1293
185k
    for (Py_ssize_t i=0; i<PyList_GET_SIZE(gcstate->callbacks); i++) {
1294
5.96k
        PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i);
1295
5.96k
        Py_INCREF(cb); /* make sure cb doesn't go away */
1296
5.96k
        r = PyObject_Vectorcall(cb, stack, 2, NULL);
1297
5.96k
        if (r == NULL) {
1298
0
            PyErr_FormatUnraisable("Exception ignored while "
1299
0
                                   "calling GC callback %R", cb);
1300
0
        }
1301
5.96k
        else {
1302
5.96k
            Py_DECREF(r);
1303
5.96k
        }
1304
5.96k
        Py_DECREF(cb);
1305
5.96k
    }
1306
179k
    Py_DECREF(phase_obj);
1307
179k
    Py_XDECREF(info);
1308
179k
    assert(!_PyErr_Occurred(tstate));
1309
179k
}
1310
1311
1312
/* Find the oldest generation (highest numbered) where the count
1313
 * exceeds the threshold.  Objects in the that generation and
1314
 * generations younger than it will be collected. */
1315
static int
1316
gc_select_generation(GCState *gcstate)
1317
109k
{
1318
340k
    for (int i = NUM_GENERATIONS-1; i >= 0; i--) {
1319
320k
        if (gcstate->generations[i].count > gcstate->generations[i].threshold) {
1320
            /* Avoid quadratic performance degradation in number
1321
               of tracked objects (see also issue #4074):
1322
1323
               To limit the cost of garbage collection, there are two strategies;
1324
                 - make each collection faster, e.g. by scanning fewer objects
1325
                 - do less collections
1326
               This heuristic is about the latter strategy.
1327
1328
               In addition to the various configurable thresholds, we only trigger a
1329
               full collection if the ratio
1330
1331
                long_lived_pending / long_lived_total
1332
1333
               is above a given value (hardwired to 25%).
1334
1335
               The reason is that, while "non-full" collections (i.e., collections of
1336
               the young and middle generations) will always examine roughly the same
1337
               number of objects -- determined by the aforementioned thresholds --,
1338
               the cost of a full collection is proportional to the total number of
1339
               long-lived objects, which is virtually unbounded.
1340
1341
               Indeed, it has been remarked that doing a full collection every
1342
               <constant number> of object creations entails a dramatic performance
1343
               degradation in workloads which consist in creating and storing lots of
1344
               long-lived objects (e.g. building a large list of GC-tracked objects would
1345
               show quadratic performance, instead of linear as expected: see issue #4074).
1346
1347
               Using the above ratio, instead, yields amortized linear performance in
1348
               the total number of objects (the effect of which can be summarized
1349
               thusly: "each full garbage collection is more and more costly as the
1350
               number of objects grows, but we do fewer and fewer of them").
1351
1352
               This heuristic was suggested by Martin von Löwis on python-dev in
1353
               June 2008. His original analysis and proposal can be found at:
1354
               http://mail.python.org/pipermail/python-dev/2008-June/080579.html
1355
            */
1356
98.7k
            if (i == NUM_GENERATIONS - 1
1357
9.36k
                && gcstate->long_lived_pending < gcstate->long_lived_total / 4)
1358
8.75k
            {
1359
8.75k
                continue;
1360
8.75k
            }
1361
89.9k
            return i;
1362
98.7k
        }
1363
320k
    }
1364
19.7k
    return -1;
1365
109k
}
1366
1367
static struct gc_generation_stats *
1368
gc_get_stats(GCState *gcstate, int gen)
1369
89.9k
{
1370
89.9k
    if (gen == 0) {
1371
81.9k
        struct gc_young_stats_buffer *buffer = &gcstate->generation_stats->young;
1372
81.9k
        buffer->index = (buffer->index + 1) % GC_YOUNG_STATS_SIZE;
1373
81.9k
        struct gc_generation_stats *stats = &buffer->items[buffer->index];
1374
81.9k
        return stats;
1375
81.9k
    }
1376
8.04k
    else {
1377
8.04k
        struct gc_old_stats_buffer *buffer = &gcstate->generation_stats->old[gen - 1];
1378
8.04k
        buffer->index = (buffer->index + 1) % GC_OLD_STATS_SIZE;
1379
8.04k
        struct gc_generation_stats *stats = &buffer->items[buffer->index];
1380
8.04k
        return stats;
1381
8.04k
    }
1382
89.9k
}
1383
1384
static struct gc_generation_stats *
1385
gc_get_prev_stats(GCState *gcstate, int gen)
1386
89.9k
{
1387
89.9k
    if (gen == 0) {
1388
81.9k
        struct gc_young_stats_buffer *buffer = &gcstate->generation_stats->young;
1389
81.9k
        struct gc_generation_stats *stats = &buffer->items[buffer->index];
1390
81.9k
        return stats;
1391
81.9k
    }
1392
8.04k
    else {
1393
8.04k
        struct gc_old_stats_buffer *buffer = &gcstate->generation_stats->old[gen - 1];
1394
8.04k
        struct gc_generation_stats *stats = &buffer->items[buffer->index];
1395
8.04k
        return stats;
1396
8.04k
    }
1397
89.9k
}
1398
1399
static void
1400
add_stats(GCState *gcstate, int gen, struct gc_generation_stats *stats)
1401
89.9k
{
1402
89.9k
    struct gc_generation_stats *prev_stats = gc_get_prev_stats(gcstate, gen);
1403
89.9k
    struct gc_generation_stats *cur_stats = gc_get_stats(gcstate, gen);
1404
1405
89.9k
    memcpy(cur_stats, prev_stats, sizeof(struct gc_generation_stats));
1406
1407
89.9k
    cur_stats->ts_start = stats->ts_start;
1408
89.9k
    cur_stats->collections += 1;
1409
89.9k
    cur_stats->collected += stats->collected;
1410
89.9k
    cur_stats->uncollectable += stats->uncollectable;
1411
89.9k
    cur_stats->candidates += stats->candidates;
1412
1413
89.9k
    cur_stats->duration += stats->duration;
1414
89.9k
    cur_stats->heap_size = stats->heap_size;
1415
    /* Publish ts_stop last so remote readers do not select a partially
1416
       updated stats record as the latest collection. */
1417
89.9k
    cur_stats->ts_stop = stats->ts_stop;
1418
89.9k
}
1419
1420
/* This is the main function.  Read this to understand how the
1421
 * collection process works. */
1422
static Py_ssize_t
1423
gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
1424
109k
{
1425
109k
    int i;
1426
109k
    PyGC_Head *young; /* the generation we are examining */
1427
109k
    PyGC_Head *old; /* next older generation */
1428
109k
    PyGC_Head unreachable; /* non-problematic unreachable trash */
1429
109k
    PyGC_Head finalizers;  /* objects with, & reachable from, __del__ */
1430
109k
    PyGC_Head *gc;
1431
109k
    GCState *gcstate = &tstate->interp->gc;
1432
1433
    // gc_collect_main() must not be called before _PyGC_Init
1434
    // or after _PyGC_Fini()
1435
109k
    assert(gcstate->garbage != NULL);
1436
109k
    assert(!_PyErr_Occurred(tstate));
1437
1438
109k
    int expected = 0;
1439
109k
    if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) {
1440
        // Don't start a garbage collection if one is already in progress.
1441
0
        return 0;
1442
0
    }
1443
109k
    gcstate->frame = tstate->current_frame;
1444
1445
109k
    if (generation == GENERATION_AUTO) {
1446
        // Select the oldest generation that needs collecting. We will collect
1447
        // objects from that generation and all generations younger than it.
1448
109k
        generation = gc_select_generation(gcstate);
1449
109k
        if (generation < 0) {
1450
            // No generation needs to be collected.
1451
19.7k
            _Py_atomic_store_int(&gcstate->collecting, 0);
1452
19.7k
            return 0;
1453
19.7k
        }
1454
109k
    }
1455
1456
109k
    assert(generation >= 0 && generation < NUM_GENERATIONS);
1457
1458
#ifdef Py_STATS
1459
    {
1460
        PyStats *s = _PyStats_GET();
1461
        if (s) {
1462
            s->object_stats.object_visits = 0;
1463
        }
1464
    }
1465
#endif
1466
1467
89.9k
    GC_STAT_ADD(generation, collections, 1);
1468
1469
89.9k
    struct gc_generation_stats stats = { 0 };
1470
89.9k
    if (reason != _Py_GC_REASON_SHUTDOWN) {
1471
89.9k
        invoke_gc_callback(tstate, "start", generation, &stats);
1472
89.9k
    }
1473
1474
89.9k
    stats.heap_size = gcstate->heap_size;
1475
    // ignore error: don't interrupt the GC if reading the clock fails
1476
89.9k
    (void)PyTime_PerfCounterRaw(&stats.ts_start);
1477
89.9k
    if (gcstate->debug & _PyGC_DEBUG_STATS) {
1478
0
        PySys_WriteStderr("gc: collecting generation %d...\n", generation);
1479
0
        show_stats_each_generations(gcstate);
1480
0
    }
1481
1482
89.9k
    if (PyDTrace_GC_START_ENABLED()) {
1483
0
        PyDTrace_GC_START(generation);
1484
0
    }
1485
1486
    /* update collection and allocation counters */
1487
89.9k
    if (generation+1 < NUM_GENERATIONS) {
1488
89.3k
        gcstate->generations[generation+1].count += 1;
1489
89.3k
    }
1490
188k
    for (i = 0; i <= generation; i++) {
1491
98.6k
        gcstate->generations[i].count = 0;
1492
98.6k
    }
1493
1494
    /* merge younger generations with one we are currently collecting */
1495
98.6k
    for (i = 0; i < generation; i++) {
1496
8.65k
        gc_list_merge(GEN_HEAD(gcstate, i), GEN_HEAD(gcstate, generation));
1497
8.65k
    }
1498
1499
    /* handy references */
1500
89.9k
    young = GEN_HEAD(gcstate, generation);
1501
89.9k
    if (generation < NUM_GENERATIONS-1) {
1502
89.3k
        old = GEN_HEAD(gcstate, generation+1);
1503
89.3k
    }
1504
611
    else {
1505
611
        old = young;
1506
611
    }
1507
89.9k
    validate_list(old, collecting_clear_unreachable_clear);
1508
1509
89.9k
    stats.candidates = deduce_unreachable(young, &unreachable);
1510
1511
89.9k
    untrack_tuples(young);
1512
    /* Move reachable objects to next generation. */
1513
89.9k
    if (young != old) {
1514
89.3k
        if (generation == NUM_GENERATIONS - 2) {
1515
7.43k
            gcstate->long_lived_pending += gc_list_size(young);
1516
7.43k
        }
1517
89.3k
        gc_list_merge(young, old);
1518
89.3k
    }
1519
611
    else {
1520
        // In Python <= 3.13, we called untrack_dicts(young) here to untrack
1521
        // atomic-only dicts (see issue #14775). Python 3.14 removed the lazy
1522
        // dict tracking machinery entirely (GH-127010) -- dicts are always
1523
        // tracked from creation and never untracked by GC. That way, we don't
1524
        // have to restore MAINTAIN_TRACKING across every PyDict_SetItem call
1525
        // site; the cost is slightly more work for full collections on dicts
1526
        // with only atomic values.
1527
611
        gcstate->long_lived_pending = 0;
1528
611
        gcstate->long_lived_total = gc_list_size(young);
1529
611
    }
1530
1531
    /* All objects in unreachable are trash, but objects reachable from
1532
     * legacy finalizers (e.g. tp_del) can't safely be deleted.
1533
     */
1534
89.9k
    gc_list_init(&finalizers);
1535
    // NEXT_MASK_UNREACHABLE is cleared here.
1536
    // After move_legacy_finalizers(), unreachable is normal list.
1537
89.9k
    move_legacy_finalizers(&unreachable, &finalizers);
1538
    /* finalizers contains the unreachable objects with a legacy finalizer;
1539
     * unreachable objects reachable *from* those are also uncollectable,
1540
     * and we move those into the finalizers list too.
1541
     */
1542
89.9k
    move_legacy_finalizer_reachable(&finalizers);
1543
1544
89.9k
    validate_list(&finalizers, collecting_clear_unreachable_clear);
1545
89.9k
    validate_list(&unreachable, collecting_set_unreachable_clear);
1546
1547
    /* Print debugging information. */
1548
89.9k
    if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE) {
1549
0
        for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) {
1550
0
            debug_cycle("collectable", FROM_GC(gc));
1551
0
        }
1552
0
    }
1553
1554
    /* Clear weakrefs and invoke callbacks as necessary. */
1555
89.9k
    stats.collected += handle_weakref_callbacks(&unreachable, old);
1556
89.9k
    validate_list(old, collecting_clear_unreachable_clear);
1557
89.9k
    validate_list(&unreachable, collecting_set_unreachable_clear);
1558
1559
    /* Call tp_finalize on objects which have one. */
1560
89.9k
    finalize_garbage(tstate, &unreachable);
1561
1562
    /* Handle any objects that may have resurrected after the call
1563
     * to 'finalize_garbage' and continue the collection with the
1564
     * objects that are still unreachable */
1565
89.9k
    PyGC_Head final_unreachable;
1566
89.9k
    handle_resurrected_objects(&unreachable, &final_unreachable, old);
1567
1568
    /* Clear weakrefs to objects in the unreachable set.  No Python-level
1569
     * code must be allowed to access those unreachable objects.  During
1570
     * delete_garbage(), finalizers outside the unreachable set might run
1571
     * and create new weakrefs.  If those weakrefs were not cleared, they
1572
     * could reveal unreachable objects.  Callbacks are not executed.
1573
     */
1574
89.9k
    clear_weakrefs(&final_unreachable);
1575
1576
    /* Call tp_clear on objects in the final_unreachable set.  This will cause
1577
    * the reference cycles to be broken.  It may also cause some objects
1578
    * in finalizers to be freed.
1579
    */
1580
89.9k
    stats.collected += gc_list_size(&final_unreachable);
1581
89.9k
    delete_garbage(tstate, gcstate, &final_unreachable, old);
1582
1583
    /* Collect statistics on uncollectable objects found and print
1584
     * debugging information. */
1585
89.9k
    Py_ssize_t n = 0;
1586
89.9k
    for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) {
1587
0
        n++;
1588
0
        if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE)
1589
0
            debug_cycle("uncollectable", FROM_GC(gc));
1590
0
    }
1591
89.9k
    stats.uncollectable = n;
1592
89.9k
    (void)PyTime_PerfCounterRaw(&stats.ts_stop);
1593
89.9k
    stats.duration = PyTime_AsSecondsDouble(stats.ts_stop - stats.ts_start);
1594
89.9k
    if (gcstate->debug & _PyGC_DEBUG_STATS) {
1595
0
        PySys_WriteStderr(
1596
0
            "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n",
1597
0
            stats.uncollectable+stats.collected, stats.uncollectable,
1598
0
            stats.duration);
1599
0
    }
1600
1601
    /* Append instances in the uncollectable set to a Python
1602
     * reachable list of garbage.  The programmer has to deal with
1603
     * this if they insist on creating this type of structure.
1604
     */
1605
89.9k
    handle_legacy_finalizers(tstate, gcstate, &finalizers, old);
1606
89.9k
    validate_list(old, collecting_clear_unreachable_clear);
1607
1608
    /* Clear free list only during the collection of the highest
1609
     * generation */
1610
89.9k
    if (generation == NUM_GENERATIONS-1) {
1611
611
        _PyGC_ClearAllFreeLists(tstate->interp);
1612
611
    }
1613
1614
89.9k
    if (_PyErr_Occurred(tstate)) {
1615
0
        if (reason == _Py_GC_REASON_SHUTDOWN) {
1616
0
            _PyErr_Clear(tstate);
1617
0
        }
1618
0
        else {
1619
0
            PyErr_FormatUnraisable("Exception ignored in garbage collection");
1620
0
        }
1621
0
    }
1622
1623
    /* Update stats */
1624
89.9k
    add_stats(gcstate, generation, &stats);
1625
89.9k
    GC_STAT_ADD(generation, objects_collected, stats.collected);
1626
1627
#ifdef Py_STATS
1628
    {
1629
        PyStats *s = _PyStats_GET();
1630
        if (s) {
1631
            GC_STAT_ADD(generation, object_visits,
1632
                s->object_stats.object_visits);
1633
            s->object_stats.object_visits = 0;
1634
        }
1635
    }
1636
#endif
1637
1638
89.9k
    if (PyDTrace_GC_DONE_ENABLED()) {
1639
0
        PyDTrace_GC_DONE(stats.uncollectable + stats.collected);
1640
0
    }
1641
1642
89.9k
    if (reason != _Py_GC_REASON_SHUTDOWN) {
1643
89.9k
        invoke_gc_callback(tstate, "stop", generation, &stats);
1644
89.9k
    }
1645
1646
89.9k
    assert(!_PyErr_Occurred(tstate));
1647
89.9k
    gcstate->frame = NULL;
1648
89.9k
    _Py_atomic_store_int(&gcstate->collecting, 0);
1649
89.9k
    return stats.uncollectable + stats.collected;
1650
109k
}
1651
1652
static int
1653
referrersvisit(PyObject* obj, void *arg)
1654
0
{
1655
0
    PyObject *objs = arg;
1656
0
    Py_ssize_t i;
1657
0
    for (i = 0; i < PyTuple_GET_SIZE(objs); i++) {
1658
0
        if (PyTuple_GET_ITEM(objs, i) == obj) {
1659
0
            return 1;
1660
0
        }
1661
0
    }
1662
0
    return 0;
1663
0
}
1664
1665
static int
1666
gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist)
1667
0
{
1668
0
    PyGC_Head *gc;
1669
0
    PyObject *obj;
1670
0
    traverseproc traverse;
1671
0
    for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
1672
0
        obj = FROM_GC(gc);
1673
0
        traverse = Py_TYPE(obj)->tp_traverse;
1674
0
        if (obj == objs || obj == resultlist) {
1675
0
            continue;
1676
0
        }
1677
0
        if (traverse(obj, referrersvisit, objs)) {
1678
0
            if (PyList_Append(resultlist, obj) < 0) {
1679
0
                return 0; /* error */
1680
0
            }
1681
0
        }
1682
0
    }
1683
0
    return 1; /* no error */
1684
0
}
1685
1686
PyObject *
1687
_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs)
1688
0
{
1689
0
    PyObject *result = PyList_New(0);
1690
0
    if (!result) {
1691
0
        return NULL;
1692
0
    }
1693
1694
0
    GCState *gcstate = &interp->gc;
1695
0
    for (int i = 0; i < NUM_GENERATIONS; i++) {
1696
0
        if (!(gc_referrers_for(objs, GEN_HEAD(gcstate, i), result))) {
1697
0
            Py_DECREF(result);
1698
0
            return NULL;
1699
0
        }
1700
0
    }
1701
0
    return result;
1702
0
}
1703
1704
PyObject *
1705
_PyGC_GetObjects(PyInterpreterState *interp, int generation)
1706
0
{
1707
0
    assert(generation >= -1 && generation < NUM_GENERATIONS);
1708
0
    GCState *gcstate = &interp->gc;
1709
1710
0
    PyObject *result = PyList_New(0);
1711
0
    if (result == NULL) {
1712
0
        return NULL;
1713
0
    }
1714
1715
0
    if (generation == -1) {
1716
        /* If generation is -1, get all objects from all generations */
1717
0
        for (int i = 0; i < NUM_GENERATIONS; i++) {
1718
0
            if (append_objects(result, GEN_HEAD(gcstate, i))) {
1719
0
                goto error;
1720
0
            }
1721
0
        }
1722
0
    }
1723
0
    else {
1724
0
        if (append_objects(result, GEN_HEAD(gcstate, generation))) {
1725
0
            goto error;
1726
0
        }
1727
0
    }
1728
1729
0
    return result;
1730
0
error:
1731
0
    Py_DECREF(result);
1732
0
    return NULL;
1733
0
}
1734
1735
void
1736
_PyGC_Freeze(PyInterpreterState *interp)
1737
0
{
1738
0
    GCState *gcstate = &interp->gc;
1739
0
    for (int i = 0; i < NUM_GENERATIONS; ++i) {
1740
0
        gc_list_merge(GEN_HEAD(gcstate, i), &gcstate->permanent_generation.head);
1741
0
        gcstate->generations[i].count = 0;
1742
0
    }
1743
0
}
1744
1745
void
1746
_PyGC_Unfreeze(PyInterpreterState *interp)
1747
0
{
1748
0
    GCState *gcstate = &interp->gc;
1749
0
    gc_list_merge(&gcstate->permanent_generation.head,
1750
0
                  GEN_HEAD(gcstate, NUM_GENERATIONS-1));
1751
0
}
1752
1753
Py_ssize_t
1754
_PyGC_GetFreezeCount(PyInterpreterState *interp)
1755
0
{
1756
0
    GCState *gcstate = &interp->gc;
1757
0
    return gc_list_size(&gcstate->permanent_generation.head);
1758
0
}
1759
1760
/* C API for controlling the state of the garbage collector */
1761
int
1762
PyGC_Enable(void)
1763
0
{
1764
0
    GCState *gcstate = get_gc_state();
1765
0
    int old_state = gcstate->enabled;
1766
0
    gcstate->enabled = 1;
1767
0
    return old_state;
1768
0
}
1769
1770
int
1771
PyGC_Disable(void)
1772
0
{
1773
0
    GCState *gcstate = get_gc_state();
1774
0
    int old_state = gcstate->enabled;
1775
0
    gcstate->enabled = 0;
1776
0
    return old_state;
1777
0
}
1778
1779
int
1780
PyGC_IsEnabled(void)
1781
0
{
1782
0
    GCState *gcstate = get_gc_state();
1783
0
    return gcstate->enabled;
1784
0
}
1785
1786
/* Public API to invoke gc.collect() from C */
1787
Py_ssize_t
1788
PyGC_Collect(void)
1789
0
{
1790
0
    PyThreadState *tstate = _PyThreadState_GET();
1791
0
    GCState *gcstate = &tstate->interp->gc;
1792
1793
0
    if (!gcstate->enabled) {
1794
0
        return 0;
1795
0
    }
1796
1797
0
    Py_ssize_t n;
1798
0
    PyObject *exc = _PyErr_GetRaisedException(tstate);
1799
0
    n = gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_MANUAL);
1800
0
    _PyErr_SetRaisedException(tstate, exc);
1801
1802
0
    return n;
1803
0
}
1804
1805
Py_ssize_t
1806
_PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
1807
0
{
1808
0
    return gc_collect_main(tstate, generation, reason);
1809
0
}
1810
1811
void
1812
_PyGC_CollectNoFail(PyThreadState *tstate)
1813
0
{
1814
    /* Ideally, this function is only called on interpreter shutdown,
1815
       and therefore not recursively.  Unfortunately, when there are daemon
1816
       threads, a daemon thread can start a cyclic garbage collection
1817
       during interpreter shutdown (and then never finish it).
1818
       See http://bugs.python.org/issue8713#msg195178 for an example.
1819
       */
1820
0
    gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN);
1821
0
}
1822
1823
void
1824
_PyGC_DumpShutdownStats(PyInterpreterState *interp)
1825
0
{
1826
0
    GCState *gcstate = &interp->gc;
1827
0
    if (!(gcstate->debug & _PyGC_DEBUG_SAVEALL)
1828
0
        && gcstate->garbage != NULL && PyList_GET_SIZE(gcstate->garbage) > 0) {
1829
0
        const char *message;
1830
0
        if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) {
1831
0
            message = "gc: %zd uncollectable objects at shutdown";
1832
0
        }
1833
0
        else {
1834
0
            message = "gc: %zd uncollectable objects at shutdown; " \
1835
0
                "use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them";
1836
0
        }
1837
        /* PyErr_WarnFormat does too many things and we are at shutdown,
1838
           the warnings module's dependencies (e.g. linecache) may be gone
1839
           already. */
1840
0
        if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
1841
0
                                     "gc", NULL, message,
1842
0
                                     PyList_GET_SIZE(gcstate->garbage)))
1843
0
        {
1844
0
            PyErr_FormatUnraisable("Exception ignored in GC shutdown");
1845
0
        }
1846
0
        if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) {
1847
0
            PyObject *repr = NULL, *bytes = NULL;
1848
0
            repr = PyObject_Repr(gcstate->garbage);
1849
0
            if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr))) {
1850
0
                PyErr_FormatUnraisable("Exception ignored in GC shutdown "
1851
0
                                       "while formatting garbage");
1852
0
            }
1853
0
            else {
1854
0
                PySys_WriteStderr(
1855
0
                    "      %s\n",
1856
0
                    PyBytes_AS_STRING(bytes)
1857
0
                    );
1858
0
            }
1859
0
            Py_XDECREF(repr);
1860
0
            Py_XDECREF(bytes);
1861
0
        }
1862
0
    }
1863
0
}
1864
1865
static void
1866
0
finalize_unlink_gc_head(PyGC_Head *gc) {
1867
0
    PyGC_Head *prev = GC_PREV(gc);
1868
0
    PyGC_Head *next = GC_NEXT(gc);
1869
0
    _PyGCHead_SET_NEXT(prev, next);
1870
0
    _PyGCHead_SET_PREV(next, prev);
1871
0
}
1872
1873
void
1874
_PyGC_Fini(PyInterpreterState *interp)
1875
0
{
1876
0
    GCState *gcstate = &interp->gc;
1877
0
    Py_CLEAR(gcstate->garbage);
1878
0
    Py_CLEAR(gcstate->callbacks);
1879
1880
    /* Prevent a subtle bug that affects sub-interpreters that use basic
1881
     * single-phase init extensions (m_size == -1).  Those extensions cause objects
1882
     * to be shared between interpreters, via the PyDict_Update(mdict, m_copy) call
1883
     * in import_find_extension().
1884
     *
1885
     * If they are GC objects, their GC head next or prev links could refer to
1886
     * the interpreter _gc_runtime_state PyGC_Head nodes.  Those nodes go away
1887
     * when the interpreter structure is freed and so pointers to them become
1888
     * invalid.  If those objects are still used by another interpreter and
1889
     * UNTRACK is called on them, a crash will happen.  We untrack the nodes
1890
     * here to avoid that.
1891
     *
1892
     * This bug was originally fixed when reported as gh-90228.  The bug was
1893
     * re-introduced in gh-94673.
1894
     */
1895
0
    for (int i = 0; i < NUM_GENERATIONS; i++) {
1896
0
        finalize_unlink_gc_head(&gcstate->generations[i].head);
1897
0
    }
1898
0
    finalize_unlink_gc_head(&gcstate->permanent_generation.head);
1899
0
}
1900
1901
/* for debugging */
1902
void
1903
_PyGC_Dump(PyGC_Head *g)
1904
0
{
1905
0
    PyObject_Dump(FROM_GC(g));
1906
0
}
1907
1908
1909
#ifdef Py_DEBUG
1910
static int
1911
visit_validate(PyObject *op, void *parent_raw)
1912
{
1913
    PyObject *parent = _PyObject_CAST(parent_raw);
1914
    if (_PyObject_IsFreed(op)) {
1915
        _PyObject_ASSERT_FAILED_MSG(parent,
1916
                                    "PyObject_GC_Track() object is not valid");
1917
    }
1918
    return 0;
1919
}
1920
#endif
1921
1922
1923
/* extension modules might be compiled with GC support so these
1924
   functions must always be available */
1925
1926
void
1927
PyObject_GC_Track(void *op_raw)
1928
176M
{
1929
176M
    PyObject *op = _PyObject_CAST(op_raw);
1930
176M
    if (_PyObject_GC_IS_TRACKED(op)) {
1931
0
        _PyObject_ASSERT_FAILED_MSG(op,
1932
0
                                    "object already tracked "
1933
0
                                    "by the garbage collector");
1934
0
    }
1935
176M
    _PyObject_GC_TRACK(op);
1936
1937
#ifdef Py_DEBUG
1938
    /* Check that the object is valid: validate objects traversed
1939
       by tp_traverse() */
1940
    traverseproc traverse = Py_TYPE(op)->tp_traverse;
1941
    (void)traverse(op, visit_validate, op);
1942
#endif
1943
176M
}
1944
1945
void
1946
PyObject_GC_UnTrack(void *op_raw)
1947
1.27G
{
1948
1.27G
    PyObject *op = _PyObject_CAST(op_raw);
1949
    /* The code for some objects, such as tuples, is a bit
1950
     * sloppy about when the object is tracked and untracked. */
1951
1.27G
    if (_PyObject_GC_IS_TRACKED(op)) {
1952
1.04G
        _PyObject_GC_UNTRACK(op);
1953
1.04G
    }
1954
1.27G
}
1955
1956
int
1957
PyObject_IS_GC(PyObject *obj)
1958
229M
{
1959
229M
    return _PyObject_IS_GC(obj);
1960
229M
}
1961
1962
void
1963
_Py_ScheduleGC(PyThreadState *tstate)
1964
18.3M
{
1965
18.3M
    if (!_Py_eval_breaker_bit_is_set(tstate, _PY_GC_SCHEDULED_BIT))
1966
109k
    {
1967
109k
        _Py_set_eval_breaker_bit(tstate, _PY_GC_SCHEDULED_BIT);
1968
109k
    }
1969
18.3M
}
1970
1971
void
1972
_PyObject_GC_Link(PyObject *op)
1973
592M
{
1974
592M
    PyGC_Head *gc = AS_GC(op);
1975
    // gc must be correctly aligned
1976
592M
    _PyObject_ASSERT(op, ((uintptr_t)gc & (sizeof(uintptr_t)-1)) == 0);
1977
1978
592M
    PyThreadState *tstate = _PyThreadState_GET();
1979
592M
    GCState *gcstate = &tstate->interp->gc;
1980
592M
    gc->_gc_next = 0;
1981
592M
    gc->_gc_prev = 0;
1982
592M
    gcstate->generations[0].count++; /* number of allocated GC objects */
1983
592M
    if (gcstate->generations[0].count > gcstate->generations[0].threshold &&
1984
18.3M
        gcstate->enabled &&
1985
18.3M
        gcstate->generations[0].threshold &&
1986
18.3M
        !_Py_atomic_load_int_relaxed(&gcstate->collecting) &&
1987
18.3M
        !_PyErr_Occurred(tstate))
1988
18.3M
    {
1989
18.3M
        _Py_ScheduleGC(tstate);
1990
18.3M
    }
1991
592M
}
1992
1993
void
1994
_Py_RunGC(PyThreadState *tstate)
1995
109k
{
1996
109k
    GCState *gcstate = get_gc_state();
1997
109k
    if (!gcstate->enabled) {
1998
0
        return;
1999
0
    }
2000
109k
    gc_collect_main(tstate, GENERATION_AUTO, _Py_GC_REASON_HEAP);
2001
109k
}
2002
2003
static PyObject *
2004
gc_alloc(PyTypeObject *tp, size_t basicsize, size_t presize)
2005
475M
{
2006
475M
    PyThreadState *tstate = _PyThreadState_GET();
2007
475M
    if (basicsize > PY_SSIZE_T_MAX - presize) {
2008
0
        return _PyErr_NoMemory(tstate);
2009
0
    }
2010
475M
    size_t size = presize + basicsize;
2011
475M
    char *mem = _PyObject_MallocWithType(tp, size);
2012
475M
    if (mem == NULL) {
2013
0
        return _PyErr_NoMemory(tstate);
2014
0
    }
2015
475M
    ((PyObject **)mem)[0] = NULL;
2016
475M
    ((PyObject **)mem)[1] = NULL;
2017
475M
    PyObject *op = (PyObject *)(mem + presize);
2018
475M
    _PyObject_GC_Link(op);
2019
475M
    return op;
2020
475M
}
2021
2022
2023
PyObject *
2024
_PyObject_GC_New(PyTypeObject *tp)
2025
245M
{
2026
245M
    size_t presize = _PyType_PreHeaderSize(tp);
2027
245M
    size_t size = _PyObject_SIZE(tp);
2028
245M
    if (_PyType_HasFeature(tp, Py_TPFLAGS_INLINE_VALUES)) {
2029
216
        size += _PyInlineValuesSize(tp);
2030
216
    }
2031
245M
    PyObject *op = gc_alloc(tp, size, presize);
2032
245M
    if (op == NULL) {
2033
0
        return NULL;
2034
0
    }
2035
245M
    _PyObject_Init(op, tp);
2036
245M
    if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) {
2037
216
        _PyObject_InitInlineValues(op, tp);
2038
216
    }
2039
245M
    return op;
2040
245M
}
2041
2042
PyVarObject *
2043
_PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems)
2044
230M
{
2045
230M
    PyVarObject *op;
2046
2047
230M
    if (nitems < 0) {
2048
0
        PyErr_BadInternalCall();
2049
0
        return NULL;
2050
0
    }
2051
230M
    size_t presize = _PyType_PreHeaderSize(tp);
2052
230M
    size_t size = _PyObject_VAR_SIZE(tp, nitems);
2053
230M
    op = (PyVarObject *)gc_alloc(tp, size, presize);
2054
230M
    if (op == NULL) {
2055
0
        return NULL;
2056
0
    }
2057
230M
    _PyObject_InitVar(op, tp, nitems);
2058
230M
    return op;
2059
230M
}
2060
2061
PyObject *
2062
PyUnstable_Object_GC_NewWithExtraData(PyTypeObject *tp, size_t extra_size)
2063
0
{
2064
0
    size_t presize = _PyType_PreHeaderSize(tp);
2065
0
    size_t size = _PyObject_SIZE(tp) + extra_size;
2066
0
    PyObject *op = gc_alloc(tp, size, presize);
2067
0
    if (op == NULL) {
2068
0
        return NULL;
2069
0
    }
2070
0
    memset((char *)op + sizeof(PyObject), 0, size - sizeof(PyObject));
2071
0
    _PyObject_Init(op, tp);
2072
0
    return op;
2073
0
}
2074
2075
PyVarObject *
2076
_PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems)
2077
41
{
2078
41
    const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems);
2079
41
    const size_t presize = _PyType_PreHeaderSize(Py_TYPE(op));
2080
41
    _PyObject_ASSERT((PyObject *)op, !_PyObject_GC_IS_TRACKED(op));
2081
41
    if (basicsize > (size_t)PY_SSIZE_T_MAX - presize) {
2082
0
        return (PyVarObject *)PyErr_NoMemory();
2083
0
    }
2084
41
    char *mem = (char *)op - presize;
2085
41
    mem = (char *)_PyObject_ReallocWithType(Py_TYPE(op), mem, presize + basicsize);
2086
41
    if (mem == NULL) {
2087
0
        return (PyVarObject *)PyErr_NoMemory();
2088
0
    }
2089
41
    op = (PyVarObject *) (mem + presize);
2090
41
    Py_SET_SIZE(op, nitems);
2091
41
    return op;
2092
41
}
2093
2094
void
2095
PyObject_GC_Del(void *op)
2096
591M
{
2097
591M
    size_t presize = _PyType_PreHeaderSize(Py_TYPE(op));
2098
591M
    PyGC_Head *g = AS_GC(op);
2099
591M
    if (_PyObject_GC_IS_TRACKED(op)) {
2100
0
        gc_list_remove(g);
2101
0
        GCState *gcstate = get_gc_state();
2102
0
        gcstate->heap_size--;
2103
#ifdef Py_DEBUG
2104
        PyObject *exc = PyErr_GetRaisedException();
2105
        if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
2106
                                     "gc", NULL,
2107
                                     "Object of type %s is not untracked "
2108
                                     "before destruction",
2109
                                     Py_TYPE(op)->tp_name))
2110
        {
2111
            PyErr_FormatUnraisable("Exception ignored on object deallocation");
2112
        }
2113
        PyErr_SetRaisedException(exc);
2114
#endif
2115
0
    }
2116
591M
    GCState *gcstate = get_gc_state();
2117
591M
    if (gcstate->generations[0].count > 0) {
2118
402M
        gcstate->generations[0].count--;
2119
402M
    }
2120
591M
    PyObject_Free(((char *)op)-presize);
2121
591M
}
2122
2123
int
2124
PyObject_GC_IsTracked(PyObject* obj)
2125
9.31k
{
2126
9.31k
    if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj)) {
2127
32
        return 1;
2128
32
    }
2129
9.27k
    return 0;
2130
9.31k
}
2131
2132
int
2133
PyObject_GC_IsFinalized(PyObject *obj)
2134
0
{
2135
0
    if (_PyObject_IS_GC(obj) && _PyGC_FINALIZED(obj)) {
2136
0
         return 1;
2137
0
    }
2138
0
    return 0;
2139
0
}
2140
2141
static int
2142
visit_generation(gcvisitobjects_t callback, void *arg, struct gc_generation *gen)
2143
0
{
2144
0
    PyGC_Head *gc_list, *gc;
2145
0
    gc_list = &gen->head;
2146
0
    for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
2147
0
        PyObject *op = FROM_GC(gc);
2148
0
        Py_INCREF(op);
2149
0
        int res = callback(op, arg);
2150
0
        Py_DECREF(op);
2151
0
        if (!res) {
2152
0
            return -1;
2153
0
        }
2154
0
    }
2155
0
    return 0;
2156
0
}
2157
2158
void
2159
PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
2160
0
{
2161
0
    GCState *gcstate = get_gc_state();
2162
0
    int original_state = gcstate->enabled;
2163
0
    gcstate->enabled = 0;
2164
0
    for (size_t i = 0; i < NUM_GENERATIONS; i++) {
2165
0
        if (visit_generation(callback, arg, &gcstate->generations[i]) < 0) {
2166
0
            goto done;
2167
0
        }
2168
0
    }
2169
0
    visit_generation(callback, arg, &gcstate->permanent_generation);
2170
0
done:
2171
0
    gcstate->enabled = original_state;
2172
0
}
2173
2174
#endif  // !Py_GIL_DISABLED