Coverage Report

Created: 2026-05-30 06:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/gc.c
Line
Count
Source
1
//  This implements the reference cycle garbage collector.
2
//  The Python module interface to the collector is in gcmodule.c.
3
//  See InternalDocs/garbage_collector.md for more infromation.
4
5
#include "Python.h"
6
#include "pycore_ceval.h"         // _Py_set_eval_breaker_bit()
7
#include "pycore_dict.h"          // _PyInlineValuesSize()
8
#include "pycore_initconfig.h"    // _PyStatus_OK()
9
#include "pycore_context.h"
10
#include "pycore_interp.h"        // PyInterpreterState.gc
11
#include "pycore_interpframe.h"   // _PyFrame_GetLocalsArray()
12
#include "pycore_object.h"
13
#include "pycore_object_alloc.h"  // _PyObject_MallocWithType()
14
#include "pycore_pyerrors.h"
15
#include "pycore_pystate.h"       // _PyThreadState_GET()
16
#include "pycore_tuple.h"         // _PyTuple_MaybeUntrack()
17
#include "pycore_weakref.h"       // _PyWeakref_ClearRef()
18
#include "pydtrace.h"
19
20
#if !defined(Py_GIL_DISABLED)
21
22
typedef struct _gc_runtime_state GCState;
23
24
#ifdef Py_DEBUG
25
#  define GC_DEBUG
26
#endif
27
28
1.37G
#define GC_NEXT _PyGCHead_NEXT
29
117M
#define GC_PREV _PyGCHead_PREV
30
31
// update_refs() set this bit for all objects in current generation.
32
// subtract_refs() and move_unreachable() uses this to distinguish
33
// visited object is in GCing or not.
34
//
35
// move_unreachable() removes this flag from reachable objects.
36
// Only unreachable objects have this flag.
37
//
38
// No objects in interpreter have this flag after GC ends.
39
1.96G
#define PREV_MASK_COLLECTING   _PyGC_PREV_MASK_COLLECTING
40
41
// Lowest bit of _gc_next is used for UNREACHABLE flag.
42
//
43
// This flag represents the object is in unreachable list in move_unreachable()
44
//
45
// Although this flag is used only in move_unreachable(), move_unreachable()
46
// doesn't clear this flag to skip unnecessary iteration.
47
// move_legacy_finalizers() removes this flag instead.
48
// Between them, unreachable list is not normal list and we can not use
49
// most gc_list_* functions for it.
50
409M
#define NEXT_MASK_UNREACHABLE  (1)
51
52
2.44G
#define AS_GC(op) _Py_AS_GC(op)
53
1.50G
#define FROM_GC(gc) _Py_FROM_GC(gc)
54
55
// Automatically choose the generation that needs collecting.
56
226k
#define GENERATION_AUTO (-1)
57
58
static inline int
59
gc_is_collecting(PyGC_Head *g)
60
1.21G
{
61
1.21G
    return (g->_gc_prev & PREV_MASK_COLLECTING) != 0;
62
1.21G
}
63
64
static inline void
65
gc_clear_collecting(PyGC_Head *g)
66
368M
{
67
368M
    g->_gc_prev &= ~PREV_MASK_COLLECTING;
68
368M
}
69
70
static inline Py_ssize_t
71
gc_get_refs(PyGC_Head *g)
72
1.00G
{
73
1.00G
    return (Py_ssize_t)(g->_gc_prev >> _PyGC_PREV_SHIFT);
74
1.00G
}
75
76
static inline void
77
gc_set_refs(PyGC_Head *g, Py_ssize_t refs)
78
216M
{
79
216M
    g->_gc_prev = (g->_gc_prev & ~_PyGC_PREV_MASK)
80
216M
        | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
81
216M
}
82
83
static inline void
84
gc_reset_refs(PyGC_Head *g, Py_ssize_t refs)
85
375M
{
86
375M
    g->_gc_prev = (g->_gc_prev & _PyGC_PREV_MASK_FINALIZED)
87
375M
        | PREV_MASK_COLLECTING
88
375M
        | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
89
375M
}
90
91
static inline void
92
gc_decref(PyGC_Head *g)
93
323M
{
94
323M
    _PyObject_ASSERT_WITH_MSG(FROM_GC(g),
95
323M
                              gc_get_refs(g) > 0,
96
323M
                              "refcount is too small");
97
323M
    g->_gc_prev -= 1 << _PyGC_PREV_SHIFT;
98
323M
}
99
100
101
202k
#define GEN_HEAD(gcstate, n) (&(gcstate)->generations[n].head)
102
103
104
static GCState *
105
get_gc_state(void)
106
612M
{
107
612M
    PyInterpreterState *interp = _PyInterpreterState_GET();
108
612M
    return &interp->gc;
109
612M
}
110
111
112
void
113
_PyGC_InitState(GCState *gcstate)
114
37
{
115
37
#define INIT_HEAD(GEN) \
116
148
    do { \
117
148
        GEN.head._gc_next = (uintptr_t)&GEN.head; \
118
148
        GEN.head._gc_prev = (uintptr_t)&GEN.head; \
119
148
    } while (0)
120
121
148
    for (int i = 0; i < NUM_GENERATIONS; i++) {
122
111
        assert(gcstate->generations[i].count == 0);
123
111
        INIT_HEAD(gcstate->generations[i]);
124
111
    };
125
37
    gcstate->generation0 = GEN_HEAD(gcstate, 0);
126
37
    INIT_HEAD(gcstate->permanent_generation);
127
128
37
#undef INIT_HEAD
129
37
}
130
131
132
PyStatus
133
_PyGC_Init(PyInterpreterState *interp)
134
37
{
135
37
    GCState *gcstate = &interp->gc;
136
137
37
    gcstate->generation_stats = PyMem_RawCalloc(1, sizeof(struct gc_stats));
138
37
    if (gcstate->generation_stats == NULL) {
139
0
        return _PyStatus_NO_MEMORY();
140
0
    }
141
142
37
    gcstate->garbage = PyList_New(0);
143
37
    if (gcstate->garbage == NULL) {
144
0
        return _PyStatus_NO_MEMORY();
145
0
    }
146
147
37
    gcstate->callbacks = PyList_New(0);
148
37
    if (gcstate->callbacks == NULL) {
149
0
        return _PyStatus_NO_MEMORY();
150
0
    }
151
152
37
    return _PyStatus_OK();
153
37
}
154
155
156
/*
157
_gc_prev values
158
---------------
159
160
Between collections, _gc_prev is used for doubly linked list.
161
162
Lowest two bits of _gc_prev are used for flags.
163
PREV_MASK_COLLECTING is used only while collecting and cleared before GC ends
164
or _PyObject_GC_UNTRACK() is called.
165
166
During a collection, _gc_prev is temporary used for gc_refs, and the gc list
167
is singly linked until _gc_prev is restored.
168
169
gc_refs
170
    At the start of a collection, update_refs() copies the true refcount
171
    to gc_refs, for each object in the generation being collected.
172
    subtract_refs() then adjusts gc_refs so that it equals the number of
173
    times an object is referenced directly from outside the generation
174
    being collected.
175
176
PREV_MASK_COLLECTING
177
    Objects in generation being collected are marked PREV_MASK_COLLECTING in
178
    update_refs().
179
180
181
_gc_next values
182
---------------
183
184
_gc_next takes these values:
185
186
0
187
    The object is not tracked
188
189
!= 0
190
    Pointer to the next object in the GC list.
191
    Additionally, lowest bit is used temporary for
192
    NEXT_MASK_UNREACHABLE flag described below.
193
194
NEXT_MASK_UNREACHABLE
195
    move_unreachable() then moves objects not reachable (whether directly or
196
    indirectly) from outside the generation into an "unreachable" set and
197
    set this flag.
198
199
    Objects that are found to be reachable have gc_refs set to 1.
200
    When this flag is set for the reachable object, the object must be in
201
    "unreachable" set.
202
    The flag is unset and the object is moved back to "reachable" set.
203
204
    move_legacy_finalizers() will remove this flag from "unreachable" set.
205
*/
206
207
/*** list functions ***/
208
209
static inline void
210
gc_list_init(PyGC_Head *list)
211
842k
{
212
    // List header must not have flags.
213
    // We can assign pointer by simple cast.
214
842k
    list->_gc_prev = (uintptr_t)list;
215
842k
    list->_gc_next = (uintptr_t)list;
216
842k
}
217
218
static inline int
219
gc_list_is_empty(PyGC_Head *list)
220
15.1M
{
221
15.1M
    return (list->_gc_next == (uintptr_t)list);
222
15.1M
}
223
224
/* Append `node` to `list`. */
225
static inline void
226
gc_list_append(PyGC_Head *node, PyGC_Head *list)
227
41.4M
{
228
41.4M
    PyGC_Head *last = (PyGC_Head *)list->_gc_prev;
229
230
    // last <-> node
231
41.4M
    _PyGCHead_SET_PREV(node, last);
232
41.4M
    _PyGCHead_SET_NEXT(last, node);
233
234
    // node <-> list
235
41.4M
    _PyGCHead_SET_NEXT(node, list);
236
41.4M
    list->_gc_prev = (uintptr_t)node;
237
41.4M
}
238
239
/* Remove `node` from the gc list it's currently in. */
240
static inline void
241
gc_list_remove(PyGC_Head *node)
242
0
{
243
0
    PyGC_Head *prev = GC_PREV(node);
244
0
    PyGC_Head *next = GC_NEXT(node);
245
246
0
    _PyGCHead_SET_NEXT(prev, next);
247
0
    _PyGCHead_SET_PREV(next, prev);
248
249
0
    node->_gc_next = 0; /* object is not currently tracked */
250
0
}
251
252
/* Move `node` from the gc list it's currently in (which is not explicitly
253
 * named here) to the end of `list`.  This is semantically the same as
254
 * gc_list_remove(node) followed by gc_list_append(node, list).
255
 */
256
static void
257
gc_list_move(PyGC_Head *node, PyGC_Head *list)
258
13.7M
{
259
    /* Unlink from current list. */
260
13.7M
    PyGC_Head *from_prev = GC_PREV(node);
261
13.7M
    PyGC_Head *from_next = GC_NEXT(node);
262
13.7M
    _PyGCHead_SET_NEXT(from_prev, from_next);
263
13.7M
    _PyGCHead_SET_PREV(from_next, from_prev);
264
265
    /* Relink at end of new list. */
266
    // list must not have flags.  So we can skip macros.
267
13.7M
    PyGC_Head *to_prev = (PyGC_Head*)list->_gc_prev;
268
13.7M
    _PyGCHead_SET_PREV(node, to_prev);
269
13.7M
    _PyGCHead_SET_NEXT(to_prev, node);
270
13.7M
    list->_gc_prev = (uintptr_t)node;
271
13.7M
    _PyGCHead_SET_NEXT(node, list);
272
13.7M
}
273
274
/* append list `from` onto list `to`; `from` becomes an empty list */
275
static void
276
gc_list_merge(PyGC_Head *from, PyGC_Head *to)
277
379k
{
278
379k
    assert(from != to);
279
379k
    if (!gc_list_is_empty(from)) {
280
101k
        PyGC_Head *to_tail = GC_PREV(to);
281
101k
        PyGC_Head *from_head = GC_NEXT(from);
282
101k
        PyGC_Head *from_tail = GC_PREV(from);
283
101k
        assert(from_head != from);
284
101k
        assert(from_tail != from);
285
286
101k
        _PyGCHead_SET_NEXT(to_tail, from_head);
287
101k
        _PyGCHead_SET_PREV(from_head, to_tail);
288
289
101k
        _PyGCHead_SET_NEXT(from_tail, to);
290
101k
        _PyGCHead_SET_PREV(to, from_tail);
291
101k
    }
292
379k
    gc_list_init(from);
293
379k
}
294
295
static Py_ssize_t
296
gc_list_size(PyGC_Head *list)
297
101k
{
298
101k
    PyGC_Head *gc;
299
101k
    Py_ssize_t n = 0;
300
203M
    for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
301
203M
        n++;
302
203M
    }
303
101k
    return n;
304
101k
}
305
306
/* Walk the list and mark all objects as non-collecting */
307
static inline void
308
gc_list_clear_collecting(PyGC_Head *collectable)
309
92.7k
{
310
92.7k
    PyGC_Head *gc;
311
10.3M
    for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) {
312
10.2M
        gc_clear_collecting(gc);
313
10.2M
    }
314
92.7k
}
315
316
/* Append objects in a GC list to a Python list.
317
 * Return 0 if all OK, < 0 if error (out of memory for list)
318
 */
319
static int
320
append_objects(PyObject *py_list, PyGC_Head *gc_list)
321
0
{
322
0
    PyGC_Head *gc;
323
0
    for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
324
0
        PyObject *op = FROM_GC(gc);
325
0
        if (op != py_list) {
326
0
            if (PyList_Append(py_list, op)) {
327
0
                return -1; /* exception */
328
0
            }
329
0
        }
330
0
    }
331
0
    return 0;
332
0
}
333
334
// Constants for validate_list's flags argument.
335
enum flagstates {collecting_clear_unreachable_clear,
336
                 collecting_clear_unreachable_set,
337
                 collecting_set_unreachable_clear,
338
                 collecting_set_unreachable_set};
339
340
#ifdef GC_DEBUG
341
// validate_list checks list consistency.  And it works as document
342
// describing when flags are expected to be set / unset.
343
// `head` must be a doubly-linked gc list, although it's fine (expected!) if
344
// the prev and next pointers are "polluted" with flags.
345
// What's checked:
346
// - The `head` pointers are not polluted.
347
// - The objects' PREV_MASK_COLLECTING and NEXT_MASK_UNREACHABLE flags are all
348
//   `set or clear, as specified by the 'flags' argument.
349
// - The prev and next pointers are mutually consistent.
350
static void
351
validate_list(PyGC_Head *head, enum flagstates flags)
352
{
353
    assert((head->_gc_prev & PREV_MASK_COLLECTING) == 0);
354
    assert((head->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
355
    uintptr_t prev_value = 0, next_value = 0;
356
    switch (flags) {
357
        case collecting_clear_unreachable_clear:
358
            break;
359
        case collecting_set_unreachable_clear:
360
            prev_value = PREV_MASK_COLLECTING;
361
            break;
362
        case collecting_clear_unreachable_set:
363
            next_value = NEXT_MASK_UNREACHABLE;
364
            break;
365
        case collecting_set_unreachable_set:
366
            prev_value = PREV_MASK_COLLECTING;
367
            next_value = NEXT_MASK_UNREACHABLE;
368
            break;
369
        default:
370
            assert(! "bad internal flags argument");
371
    }
372
    PyGC_Head *prev = head;
373
    PyGC_Head *gc = GC_NEXT(head);
374
    while (gc != head) {
375
        PyGC_Head *trueprev = GC_PREV(gc);
376
        PyGC_Head *truenext = (PyGC_Head *)(gc->_gc_next  & ~NEXT_MASK_UNREACHABLE);
377
        assert(truenext != NULL);
378
        assert(trueprev == prev);
379
        assert((gc->_gc_prev & PREV_MASK_COLLECTING) == prev_value);
380
        assert((gc->_gc_next & NEXT_MASK_UNREACHABLE) == next_value);
381
        prev = gc;
382
        gc = truenext;
383
    }
384
    assert(prev == GC_PREV(head));
385
}
386
#else
387
1.20M
#define validate_list(x, y) do{}while(0)
388
#endif
389
390
/*** end of list stuff ***/
391
392
393
/* Set all gc_refs = ob_refcnt.  After this, gc_refs is > 0 and
394
 * PREV_MASK_COLLECTING bit is set for all objects in containers.
395
 */
396
static Py_ssize_t
397
update_refs(PyGC_Head *containers)
398
185k
{
399
185k
    PyGC_Head *next;
400
185k
    PyGC_Head *gc = GC_NEXT(containers);
401
185k
    Py_ssize_t candidates = 0;
402
403
375M
    while (gc != containers) {
404
375M
        next = GC_NEXT(gc);
405
375M
        PyObject *op = FROM_GC(gc);
406
375M
        if (_Py_IsImmortal(op)) {
407
0
            assert(!_Py_IsStaticImmortal(op));
408
0
            _PyObject_GC_UNTRACK(op);
409
0
            gc = next;
410
0
            continue;
411
0
        }
412
375M
        gc_reset_refs(gc, Py_REFCNT(op));
413
        /* Python's cyclic gc should never see an incoming refcount
414
         * of 0:  if something decref'ed to 0, it should have been
415
         * deallocated immediately at that time.
416
         * Possible cause (if the assert triggers):  a tp_dealloc
417
         * routine left a gc-aware object tracked during its teardown
418
         * phase, and did something-- or allowed something to happen --
419
         * that called back into Python.  gc can trigger then, and may
420
         * see the still-tracked dying object.  Before this assert
421
         * was added, such mistakes went on to allow gc to try to
422
         * delete the object again.  In a debug build, that caused
423
         * a mysterious segfault, when _Py_ForgetReference tried
424
         * to remove the object from the doubly-linked list of all
425
         * objects a second time.  In a release build, an actual
426
         * double deallocation occurred, which leads to corruption
427
         * of the allocator's internal bookkeeping pointers.  That's
428
         * so serious that maybe this should be a release-build
429
         * check instead of an assert?
430
         */
431
375M
        _PyObject_ASSERT(op, gc_get_refs(gc) != 0);
432
375M
        gc = next;
433
375M
        candidates++;
434
375M
    }
435
185k
    return candidates;
436
185k
}
437
438
/* A traversal callback for subtract_refs. */
439
static int
440
visit_decref(PyObject *op, void *parent)
441
1.21G
{
442
1.21G
    OBJECT_STAT_INC(object_visits);
443
1.21G
    _PyObject_ASSERT(_PyObject_CAST(parent), !_PyObject_IsFreed(op));
444
445
1.21G
    if (_PyObject_IS_GC(op)) {
446
628M
        PyGC_Head *gc = AS_GC(op);
447
        /* We're only interested in gc_refs for objects in the
448
         * generation being collected, which can be recognized
449
         * because only they have positive gc_refs.
450
         */
451
628M
        if (gc_is_collecting(gc)) {
452
323M
            gc_decref(gc);
453
323M
        }
454
628M
    }
455
1.21G
    return 0;
456
1.21G
}
457
458
int
459
_PyGC_VisitStackRef(_PyStackRef *ref, visitproc visit, void *arg)
460
4.32M
{
461
    // This is a bit tricky! We want to ignore stackrefs with embedded
462
    // refcounts when computing the incoming references, but otherwise treat
463
    // them like normal.
464
4.32M
    assert(!PyStackRef_IsTaggedInt(*ref));
465
4.32M
    if (!PyStackRef_RefcountOnObject(*ref) && (visit == visit_decref)) {
466
249k
        return 0;
467
249k
    }
468
4.07M
    Py_VISIT(PyStackRef_AsPyObjectBorrow(*ref));
469
4.07M
    return 0;
470
4.07M
}
471
472
int
473
_PyGC_VisitFrameStack(_PyInterpreterFrame *frame, visitproc visit, void *arg)
474
741k
{
475
741k
    _PyStackRef *ref = _PyFrame_GetLocalsArray(frame);
476
    /* locals and stack */
477
4.98M
    for (; ref < frame->stackpointer; ref++) {
478
4.24M
        if (!PyStackRef_IsTaggedInt(*ref)) {
479
4.24M
            _Py_VISIT_STACKREF(*ref);
480
4.24M
        }
481
4.24M
    }
482
741k
    return 0;
483
741k
}
484
485
/* Subtract internal references from gc_refs.  After this, gc_refs is >= 0
486
 * for all objects in containers. The ones with gc_refs > 0 are directly
487
 * reachable from outside containers, and so can't be collected.
488
 */
489
static void
490
subtract_refs(PyGC_Head *containers)
491
185k
{
492
185k
    traverseproc traverse;
493
185k
    PyGC_Head *gc = GC_NEXT(containers);
494
375M
    for (; gc != containers; gc = GC_NEXT(gc)) {
495
375M
        PyObject *op = FROM_GC(gc);
496
375M
        traverse = Py_TYPE(op)->tp_traverse;
497
375M
        (void) traverse(op,
498
375M
                        visit_decref,
499
375M
                        op);
500
375M
    }
501
185k
}
502
503
/* A traversal callback for move_unreachable. */
504
static int
505
visit_reachable(PyObject *op, void *arg)
506
1.13G
{
507
1.13G
    PyGC_Head *reachable = arg;
508
1.13G
    OBJECT_STAT_INC(object_visits);
509
1.13G
    if (!_PyObject_IS_GC(op)) {
510
542M
        return 0;
511
542M
    }
512
513
590M
    PyGC_Head *gc = AS_GC(op);
514
590M
    const Py_ssize_t gc_refs = gc_get_refs(gc);
515
516
    // Ignore objects in other generation.
517
    // This also skips objects "to the left" of the current position in
518
    // move_unreachable's scan of the 'young' list - they've already been
519
    // traversed, and no longer have the PREV_MASK_COLLECTING flag.
520
590M
    if (! gc_is_collecting(gc)) {
521
367M
        return 0;
522
367M
    }
523
    // It would be a logic error elsewhere if the collecting flag were set on
524
    // an untracked object.
525
223M
    _PyObject_ASSERT(op, gc->_gc_next != 0);
526
527
223M
    if (gc->_gc_next & NEXT_MASK_UNREACHABLE) {
528
        /* This had gc_refs = 0 when move_unreachable got
529
         * to it, but turns out it's reachable after all.
530
         * Move it back to move_unreachable's 'young' list,
531
         * and move_unreachable will eventually get to it
532
         * again.
533
         */
534
        // Manually unlink gc from unreachable list because the list functions
535
        // don't work right in the presence of NEXT_MASK_UNREACHABLE flags.
536
41.4M
        PyGC_Head *prev = GC_PREV(gc);
537
41.4M
        PyGC_Head *next = (PyGC_Head*)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE);
538
41.4M
        _PyObject_ASSERT(FROM_GC(prev),
539
41.4M
                         prev->_gc_next & NEXT_MASK_UNREACHABLE);
540
41.4M
        _PyObject_ASSERT(FROM_GC(next),
541
41.4M
                         next->_gc_next & NEXT_MASK_UNREACHABLE);
542
41.4M
        prev->_gc_next = gc->_gc_next;  // copy NEXT_MASK_UNREACHABLE
543
41.4M
        _PyGCHead_SET_PREV(next, prev);
544
545
41.4M
        gc_list_append(gc, reachable);
546
41.4M
        gc_set_refs(gc, 1);
547
41.4M
    }
548
182M
    else if (gc_refs == 0) {
549
        /* This is in move_unreachable's 'young' list, but
550
         * the traversal hasn't yet gotten to it.  All
551
         * we need to do is tell move_unreachable that it's
552
         * reachable.
553
         */
554
175M
        gc_set_refs(gc, 1);
555
175M
    }
556
    /* Else there's nothing to do.
557
     * If gc_refs > 0, it must be in move_unreachable's 'young'
558
     * list, and move_unreachable will eventually get to it.
559
     */
560
7.11M
    else {
561
7.11M
        _PyObject_ASSERT_WITH_MSG(op, gc_refs > 0, "refcount is too small");
562
7.11M
    }
563
223M
    return 0;
564
590M
}
565
566
/* Move the unreachable objects from young to unreachable.  After this,
567
 * all objects in young don't have PREV_MASK_COLLECTING flag and
568
 * unreachable have the flag.
569
 * All objects in young after this are directly or indirectly reachable
570
 * from outside the original young; and all objects in unreachable are
571
 * not.
572
 *
573
 * This function restores _gc_prev pointer.  young and unreachable are
574
 * doubly linked list after this function.
575
 * But _gc_next in unreachable list has NEXT_MASK_UNREACHABLE flag.
576
 * So we can not gc_list_* functions for unreachable until we remove the flag.
577
 */
578
static void
579
move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
580
185k
{
581
    // previous elem in the young list, used for restore gc_prev.
582
185k
    PyGC_Head *prev = young;
583
185k
    PyGC_Head *gc = GC_NEXT(young);
584
585
    /* Invariants:  all objects "to the left" of us in young are reachable
586
     * (directly or indirectly) from outside the young list as it was at entry.
587
     *
588
     * All other objects from the original young "to the left" of us are in
589
     * unreachable now, and have NEXT_MASK_UNREACHABLE.  All objects to the
590
     * left of us in 'young' now have been scanned, and no objects here
591
     * or to the right have been scanned yet.
592
     */
593
594
416M
    while (gc != young) {
595
416M
        if (gc_get_refs(gc)) {
596
            /* gc is definitely reachable from outside the
597
             * original 'young'.  Mark it as such, and traverse
598
             * its pointers to find any other objects that may
599
             * be directly reachable from it.  Note that the
600
             * call to tp_traverse may append objects to young,
601
             * so we have to wait until it returns to determine
602
             * the next object to visit.
603
             */
604
354M
            PyObject *op = FROM_GC(gc);
605
354M
            traverseproc traverse = Py_TYPE(op)->tp_traverse;
606
354M
            _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(gc) > 0,
607
354M
                                      "refcount is too small");
608
            // NOTE: visit_reachable may change gc->_gc_next when
609
            // young->_gc_prev == gc.  Don't do gc = GC_NEXT(gc) before!
610
354M
            (void) traverse(op,
611
354M
                    visit_reachable,
612
354M
                    (void *)young);
613
            // relink gc_prev to prev element.
614
354M
            _PyGCHead_SET_PREV(gc, prev);
615
            // gc is not COLLECTING state after here.
616
354M
            gc_clear_collecting(gc);
617
354M
            prev = gc;
618
354M
        }
619
61.9M
        else {
620
            /* This *may* be unreachable.  To make progress,
621
             * assume it is.  gc isn't directly reachable from
622
             * any object we've already traversed, but may be
623
             * reachable from an object we haven't gotten to yet.
624
             * visit_reachable will eventually move gc back into
625
             * young if that's so, and we'll see it again.
626
             */
627
            // Move gc to unreachable.
628
            // No need to gc->next->prev = prev because it is single linked.
629
61.9M
            prev->_gc_next = gc->_gc_next;
630
631
            // We can't use gc_list_append() here because we use
632
            // NEXT_MASK_UNREACHABLE here.
633
61.9M
            PyGC_Head *last = GC_PREV(unreachable);
634
            // NOTE: Since all objects in unreachable set has
635
            // NEXT_MASK_UNREACHABLE flag, we set it unconditionally.
636
            // But this may pollute the unreachable list head's 'next' pointer
637
            // too. That's semantically senseless but expedient here - the
638
            // damage is repaired when this function ends.
639
61.9M
            last->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)gc);
640
61.9M
            _PyGCHead_SET_PREV(gc, last);
641
61.9M
            gc->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)unreachable);
642
61.9M
            unreachable->_gc_prev = (uintptr_t)gc;
643
61.9M
        }
644
416M
        gc = (PyGC_Head*)prev->_gc_next;
645
416M
    }
646
    // young->_gc_prev must be last element remained in the list.
647
185k
    young->_gc_prev = (uintptr_t)prev;
648
    // don't let the pollution of the list head's next pointer leak
649
185k
    unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE;
650
185k
}
651
652
/* In theory, all tuples should be younger than the
653
* objects they refer to, as tuples are immortal.
654
* Therefore, untracking tuples in oldest-first order in the
655
* young generation before promoting them should have tracked
656
* all the tuples that can be untracked.
657
*
658
* Unfortunately, the C API allows tuples to be created
659
* and then filled in. So this won't untrack all tuples
660
* that can be untracked. It should untrack most of them
661
* and is much faster than a more complex approach that
662
* would untrack all relevant tuples.
663
*/
664
static void
665
untrack_tuples(PyGC_Head *head)
666
92.7k
{
667
92.7k
    PyGC_Head *gc = GC_NEXT(head);
668
354M
    while (gc != head) {
669
354M
        PyObject *op = FROM_GC(gc);
670
354M
        PyGC_Head *next = GC_NEXT(gc);
671
354M
        if (PyTuple_CheckExact(op)) {
672
179M
            _PyTuple_MaybeUntrack(op);
673
179M
        }
674
354M
        gc = next;
675
354M
    }
676
92.7k
}
677
678
/* Return true if object has a pre-PEP 442 finalization method. */
679
static int
680
has_legacy_finalizer(PyObject *op)
681
10.2M
{
682
10.2M
    return Py_TYPE(op)->tp_del != NULL;
683
10.2M
}
684
685
/* Move the objects in unreachable with tp_del slots into `finalizers`.
686
 *
687
 * This function also removes NEXT_MASK_UNREACHABLE flag
688
 * from _gc_next in unreachable.
689
 */
690
static void
691
move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
692
92.7k
{
693
92.7k
    PyGC_Head *gc, *next;
694
92.7k
    _PyObject_ASSERT(
695
92.7k
        FROM_GC(unreachable),
696
92.7k
        (unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
697
698
    /* March over unreachable.  Move objects with finalizers into
699
     * `finalizers`.
700
     */
701
10.3M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
702
10.2M
        PyObject *op = FROM_GC(gc);
703
704
10.2M
        _PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE);
705
10.2M
        gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
706
10.2M
        next = (PyGC_Head*)gc->_gc_next;
707
708
10.2M
        if (has_legacy_finalizer(op)) {
709
0
            gc_clear_collecting(gc);
710
0
            gc_list_move(gc, finalizers);
711
0
        }
712
10.2M
    }
713
92.7k
}
714
715
static inline void
716
clear_unreachable_mask(PyGC_Head *unreachable)
717
92.7k
{
718
    /* Check that the list head does not have the unreachable bit set */
719
92.7k
    _PyObject_ASSERT(
720
92.7k
        FROM_GC(unreachable),
721
92.7k
        ((uintptr_t)unreachable & NEXT_MASK_UNREACHABLE) == 0);
722
92.7k
    _PyObject_ASSERT(
723
92.7k
        FROM_GC(unreachable),
724
92.7k
        (unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
725
726
92.7k
    PyGC_Head *gc, *next;
727
10.3M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
728
10.2M
        _PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE);
729
10.2M
        gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
730
10.2M
        next = (PyGC_Head*)gc->_gc_next;
731
10.2M
    }
732
92.7k
    validate_list(unreachable, collecting_set_unreachable_clear);
733
92.7k
}
734
735
/* A traversal callback for move_legacy_finalizer_reachable. */
736
static int
737
visit_move(PyObject *op, void *arg)
738
0
{
739
0
    PyGC_Head *tolist = arg;
740
0
    OBJECT_STAT_INC(object_visits);
741
0
    if (_PyObject_IS_GC(op)) {
742
0
        PyGC_Head *gc = AS_GC(op);
743
0
        if (gc_is_collecting(gc)) {
744
0
            gc_list_move(gc, tolist);
745
0
            gc_clear_collecting(gc);
746
0
        }
747
0
    }
748
0
    return 0;
749
0
}
750
751
/* Move objects that are reachable from finalizers, from the unreachable set
752
 * into finalizers set.
753
 */
754
static void
755
move_legacy_finalizer_reachable(PyGC_Head *finalizers)
756
92.7k
{
757
92.7k
    traverseproc traverse;
758
92.7k
    PyGC_Head *gc = GC_NEXT(finalizers);
759
92.7k
    for (; gc != finalizers; gc = GC_NEXT(gc)) {
760
        /* Note that the finalizers list may grow during this. */
761
0
        traverse = Py_TYPE(FROM_GC(gc))->tp_traverse;
762
0
        (void) traverse(FROM_GC(gc),
763
0
                        visit_move,
764
0
                        (void *)finalizers);
765
0
    }
766
92.7k
}
767
768
/* Handle weakref callbacks.  Note that it's possible for such weakrefs to be
769
 * outside the unreachable set -- indeed, those are precisely the weakrefs
770
 * whose callbacks must be invoked.  See gc_weakref.txt for overview & some
771
 * details.
772
 *
773
 * The clearing of weakrefs is suble and must be done carefully, as there was
774
 * previous bugs related to this.  First, weakrefs to the unreachable set of
775
 * objects must be cleared before we start calling `tp_clear`.  If we don't,
776
 * those weakrefs can reveal unreachable objects to Python-level code and that
777
 * is not safe.  Many objects are not usable after `tp_clear` has been called
778
 * and could even cause crashes if accessed (see bpo-38006 and gh-91636 as
779
 * example bugs).
780
 *
781
 * Weakrefs with callbacks always need to be cleared before executing the
782
 * callback.  That's because sometimes a callback will call the ref object,
783
 * to check if the reference is actually dead (KeyedRef does this, for
784
 * example).  We want to indicate that it is dead, even though it is possible
785
 * a finalizer might resurrect it.  Clearing also prevents the callback from
786
 * executing more than once.
787
 *
788
 * Since Python 2.3, all weakrefs to cyclic garbage have been cleared *before*
789
 * calling finalizers.  However, since tp_subclasses started being necessary
790
 * to invalidate caches (e.g. by PyType_Modified), that clearing has created
791
 * a bug.  If the weakref to the subclass is cleared before a finalizer is
792
 * called, the cache may not be correctly invalidated.  That can lead to
793
 * segfaults since the caches can refer to deallocated objects (GH-135552
794
 * is an example).  Now, we delay the clear of weakrefs without callbacks
795
 * until *after* finalizers have been executed.  That means weakrefs without
796
 * callbacks are still usable while finalizers are being executed.
797
 *
798
 * The weakrefs that are inside the unreachable set must also be cleared.
799
 * The reason this is required is not immediately obvious.  If the weakref
800
 * refers to an object outside of the unreachable set, that object might go
801
 * away when we start clearing objects.  Normally, the object should also be
802
 * part of the unreachable set but that's not true in the case of incomplete
803
 * or missing `tp_traverse` methods.  When that object goes away, the callback
804
 * for weakref can be executed and that could reveal unreachable objects to
805
 * Python-level code.  See bpo-38006 as an example bug.
806
 */
807
static int
808
handle_weakref_callbacks(PyGC_Head *unreachable, PyGC_Head *old)
809
92.7k
{
810
92.7k
    PyGC_Head *gc;
811
92.7k
    PyGC_Head wrcb_to_call;     /* weakrefs with callbacks to call */
812
92.7k
    PyGC_Head *next;
813
92.7k
    int num_freed = 0;
814
815
92.7k
    gc_list_init(&wrcb_to_call);
816
817
    /* Find all weakrefs with callbacks and move into `wrcb_to_call` if the
818
     * callback needs to be invoked. We make another pass over wrcb_to_call,
819
     * invoking callbacks, after this pass completes.
820
     */
821
10.3M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
822
10.2M
        PyWeakReference **wrlist;
823
824
10.2M
        PyObject *op = FROM_GC(gc);
825
10.2M
        next = GC_NEXT(gc);
826
827
10.2M
        if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) {
828
7.42M
            continue;
829
7.42M
        }
830
831
        /* It supports weakrefs.  Does it have any?
832
         *
833
         * This is never triggered for static types so we can avoid the
834
         * (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR().
835
         */
836
2.78M
        wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op);
837
838
        /* `op` may have some weakrefs.  March over the list and move the
839
         * weakrefs with callbacks that must be called into wrcb_to_call.
840
         */
841
2.78M
        PyWeakReference *next_wr;
842
3.08M
        for (PyWeakReference *wr = *wrlist; wr != NULL; wr = next_wr) {
843
            // Get the next list element to get iterator progress if we omit
844
            // clearing of the weakref (because _PyWeakref_ClearRef changes
845
            // next pointer in the wrlist).
846
299k
            next_wr = wr->wr_next;
847
848
299k
            if (wr->wr_callback == NULL) {
849
                /* no callback */
850
261k
                continue;
851
261k
            }
852
853
            // Clear the weakref.  See the comments above this function for
854
            // reasons why we need to clear weakrefs that have callbacks.
855
            // Note that _PyWeakref_ClearRef clears the weakref but leaves the
856
            // callback pointer intact.  Obscure: it also changes *wrlist.
857
38.6k
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
858
38.6k
            _PyWeakref_ClearRef(wr);
859
38.6k
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
860
861
            /* Headache time.  `op` is going away, and is weakly referenced by
862
             * `wr`, which has a callback.  Should the callback be invoked?  If wr
863
             * is also trash, no:
864
             *
865
             * 1. There's no need to call it.  The object and the weakref are
866
             *    both going away, so it's legitimate to pretend the weakref is
867
             *    going away first.  The user has to ensure a weakref outlives its
868
             *    referent if they want a guarantee that the wr callback will get
869
             *    invoked.
870
             *
871
             * 2. It may be catastrophic to call it.  If the callback is also in
872
             *    cyclic trash (CT), then although the CT is unreachable from
873
             *    outside the current generation, CT may be reachable from the
874
             *    callback.  Then the callback could resurrect insane objects.
875
             *
876
             * Since the callback is never needed and may be unsafe in this
877
             * case, wr is simply left in the unreachable set.  Note that
878
             * clear_weakrefs() will ensure its callback will not trigger
879
             * inside delete_garbage().
880
             *
881
             * OTOH, if wr isn't part of CT, we should invoke the callback:  the
882
             * weakref outlived the trash.  Note that since wr isn't CT in this
883
             * case, its callback can't be CT either -- wr acted as an external
884
             * root to this generation, and therefore its callback did too.  So
885
             * nothing in CT is reachable from the callback either, so it's hard
886
             * to imagine how calling it later could create a problem for us.  wr
887
             * is moved to wrcb_to_call in this case.
888
             */
889
38.6k
            if (gc_is_collecting(AS_GC((PyObject *)wr))) {
890
0
                continue;
891
0
            }
892
893
            /* Create a new reference so that wr can't go away
894
             * before we can process it again.
895
             */
896
38.6k
            Py_INCREF(wr);
897
898
            /* Move wr to wrcb_to_call, for the next pass. */
899
38.6k
            PyGC_Head *wrasgc = AS_GC((PyObject *)wr);
900
            // wrasgc is reachable, but next isn't, so they can't be the same
901
38.6k
            _PyObject_ASSERT((PyObject *)wr, wrasgc != next);
902
38.6k
            gc_list_move(wrasgc, &wrcb_to_call);
903
38.6k
        }
904
2.78M
    }
905
906
    /* Invoke the callbacks we decided to honor.  It's safe to invoke them
907
     * because they can't reference unreachable objects.
908
     */
909
131k
    while (! gc_list_is_empty(&wrcb_to_call)) {
910
38.6k
        PyObject *temp;
911
38.6k
        PyObject *callback;
912
913
38.6k
        gc = (PyGC_Head*)wrcb_to_call._gc_next;
914
38.6k
        PyObject *op = FROM_GC(gc);
915
38.6k
        _PyObject_ASSERT(op, PyWeakref_Check(op));
916
38.6k
        PyWeakReference *wr = (PyWeakReference *)op;
917
38.6k
        callback = wr->wr_callback;
918
38.6k
        _PyObject_ASSERT(op, callback != NULL);
919
920
        /* copy-paste of weakrefobject.c's handle_callback() */
921
38.6k
        temp = PyObject_CallOneArg(callback, (PyObject *)wr);
922
38.6k
        if (temp == NULL) {
923
0
            PyErr_FormatUnraisable("Exception ignored on "
924
0
                                   "calling weakref callback %R", callback);
925
0
        }
926
38.6k
        else {
927
38.6k
            Py_DECREF(temp);
928
38.6k
        }
929
930
        /* Give up the reference we created in the first pass.  When
931
         * op's refcount hits 0 (which it may or may not do right now),
932
         * op's tp_dealloc will decref op->wr_callback too.  Note
933
         * that the refcount probably will hit 0 now, and because this
934
         * weakref was reachable to begin with, gc didn't already
935
         * add it to its count of freed objects.  Example:  a reachable
936
         * weak value dict maps some key to this reachable weakref.
937
         * The callback removes this key->weakref mapping from the
938
         * dict, leaving no other references to the weakref (excepting
939
         * ours).
940
         */
941
38.6k
        Py_DECREF(op);
942
38.6k
        if (wrcb_to_call._gc_next == (uintptr_t)gc) {
943
            /* object is still alive -- move it */
944
0
            gc_list_move(gc, old);
945
0
        }
946
38.6k
        else {
947
38.6k
            ++num_freed;
948
38.6k
        }
949
38.6k
    }
950
951
92.7k
    return num_freed;
952
92.7k
}
953
954
/* Clear all weakrefs to unreachable objects.  When this returns, no object in
955
 * `unreachable` is weakly referenced anymore.  See the comments above
956
 * handle_weakref_callbacks() for why these weakrefs need to be cleared.
957
 */
958
static void
959
clear_weakrefs(PyGC_Head *unreachable)
960
92.7k
{
961
92.7k
    PyGC_Head *gc;
962
92.7k
    PyGC_Head *next;
963
964
10.3M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
965
10.2M
        PyWeakReference **wrlist;
966
967
10.2M
        PyObject *op = FROM_GC(gc);
968
10.2M
        next = GC_NEXT(gc);
969
970
10.2M
        if (PyWeakref_Check(op)) {
971
            /* A weakref inside the unreachable set is always cleared. See
972
             * the comments above handle_weakref_callbacks() for why these
973
             * must be cleared.
974
             */
975
8
            _PyWeakref_ClearRef((PyWeakReference *)op);
976
8
        }
977
978
10.2M
        if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) {
979
7.42M
            continue;
980
7.42M
        }
981
982
        /* It supports weakrefs.  Does it have any?
983
         *
984
         * This is never triggered for static types so we can avoid the
985
         * (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR().
986
         */
987
2.78M
        wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op);
988
989
        /* `op` may have some weakrefs.  March over the list, clear
990
         * all the weakrefs.
991
         */
992
3.04M
        for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) {
993
            /* _PyWeakref_ClearRef clears the weakref but leaves
994
             * the callback pointer intact.  Obscure:  it also
995
             * changes *wrlist.
996
             */
997
261k
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
998
261k
            _PyWeakref_ClearRef(wr);
999
261k
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
1000
261k
        }
1001
2.78M
    }
1002
92.7k
}
1003
1004
static void
1005
debug_cycle(const char *msg, PyObject *op)
1006
0
{
1007
0
    PySys_FormatStderr("gc: %s <%s %p>\n",
1008
0
                       msg, Py_TYPE(op)->tp_name, op);
1009
0
}
1010
1011
/* Handle uncollectable garbage (cycles with tp_del slots, and stuff reachable
1012
 * only from such cycles).
1013
 * If _PyGC_DEBUG_SAVEALL, all objects in finalizers are appended to the module
1014
 * garbage list (a Python list), else only the objects in finalizers with
1015
 * __del__ methods are appended to garbage.  All objects in finalizers are
1016
 * merged into the old list regardless.
1017
 */
1018
static void
1019
handle_legacy_finalizers(PyThreadState *tstate,
1020
                         GCState *gcstate,
1021
                         PyGC_Head *finalizers, PyGC_Head *old)
1022
92.7k
{
1023
92.7k
    assert(!_PyErr_Occurred(tstate));
1024
92.7k
    assert(gcstate->garbage != NULL);
1025
1026
92.7k
    PyGC_Head *gc = GC_NEXT(finalizers);
1027
92.7k
    for (; gc != finalizers; gc = GC_NEXT(gc)) {
1028
0
        PyObject *op = FROM_GC(gc);
1029
1030
0
        if ((gcstate->debug & _PyGC_DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
1031
0
            if (PyList_Append(gcstate->garbage, op) < 0) {
1032
0
                _PyErr_Clear(tstate);
1033
0
                break;
1034
0
            }
1035
0
        }
1036
0
    }
1037
1038
92.7k
    gc_list_merge(finalizers, old);
1039
92.7k
}
1040
1041
/* Run first-time finalizers (if any) on all the objects in collectable.
1042
 * Note that this may remove some (or even all) of the objects from the
1043
 * list, due to refcounts falling to 0.
1044
 */
1045
static void
1046
finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable)
1047
92.7k
{
1048
92.7k
    destructor finalize;
1049
92.7k
    PyGC_Head seen;
1050
1051
    /* While we're going through the loop, `finalize(op)` may cause op, or
1052
     * other objects, to be reclaimed via refcounts falling to zero.  So
1053
     * there's little we can rely on about the structure of the input
1054
     * `collectable` list across iterations.  For safety, we always take the
1055
     * first object in that list and move it to a temporary `seen` list.
1056
     * If objects vanish from the `collectable` and `seen` lists we don't
1057
     * care.
1058
     */
1059
92.7k
    gc_list_init(&seen);
1060
1061
10.3M
    while (!gc_list_is_empty(collectable)) {
1062
10.2M
        PyGC_Head *gc = GC_NEXT(collectable);
1063
10.2M
        PyObject *op = FROM_GC(gc);
1064
10.2M
        gc_list_move(gc, &seen);
1065
10.2M
        if (!_PyGC_FINALIZED(op) &&
1066
10.2M
            (finalize = Py_TYPE(op)->tp_finalize) != NULL)
1067
4
        {
1068
4
            _PyGC_SET_FINALIZED(op);
1069
4
            Py_INCREF(op);
1070
4
            finalize(op);
1071
4
            assert(!_PyErr_Occurred(tstate));
1072
4
            Py_DECREF(op);
1073
4
        }
1074
10.2M
    }
1075
92.7k
    gc_list_merge(&seen, collectable);
1076
92.7k
}
1077
1078
/* Break reference cycles by clearing the containers involved.  This is
1079
 * tricky business as the lists can be changing and we don't know which
1080
 * objects may be freed.  It is possible I screwed something up here.
1081
 */
1082
static void
1083
delete_garbage(PyThreadState *tstate, GCState *gcstate,
1084
               PyGC_Head *collectable, PyGC_Head *old)
1085
92.7k
{
1086
92.7k
    assert(!_PyErr_Occurred(tstate));
1087
1088
4.30M
    while (!gc_list_is_empty(collectable)) {
1089
4.20M
        PyGC_Head *gc = GC_NEXT(collectable);
1090
4.20M
        PyObject *op = FROM_GC(gc);
1091
1092
4.20M
        _PyObject_ASSERT_WITH_MSG(op, Py_REFCNT(op) > 0,
1093
4.20M
                                  "refcount is too small");
1094
1095
4.20M
        if (gcstate->debug & _PyGC_DEBUG_SAVEALL) {
1096
0
            assert(gcstate->garbage != NULL);
1097
0
            if (PyList_Append(gcstate->garbage, op) < 0) {
1098
0
                _PyErr_Clear(tstate);
1099
0
            }
1100
0
        }
1101
4.20M
        else {
1102
4.20M
            inquiry clear;
1103
4.20M
            if ((clear = Py_TYPE(op)->tp_clear) != NULL) {
1104
3.81M
                Py_INCREF(op);
1105
3.81M
                (void) clear(op);
1106
3.81M
                if (_PyErr_Occurred(tstate)) {
1107
0
                    PyErr_FormatUnraisable("Exception ignored in tp_clear of %s",
1108
0
                                           Py_TYPE(op)->tp_name);
1109
0
                }
1110
3.81M
                Py_DECREF(op);
1111
3.81M
            }
1112
4.20M
        }
1113
4.20M
        if (GC_NEXT(collectable) == gc) {
1114
            /* object is still alive, move it, it may die later */
1115
3.49M
            gc_clear_collecting(gc);
1116
3.49M
            gc_list_move(gc, old);
1117
3.49M
        }
1118
4.20M
    }
1119
92.7k
}
1120
1121
1122
// Show stats for objects in each generations
1123
static void
1124
show_stats_each_generations(GCState *gcstate)
1125
0
{
1126
0
    char buf[100];
1127
0
    size_t pos = 0;
1128
1129
0
    for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) {
1130
0
        pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos,
1131
0
                             " %zd",
1132
0
                             gc_list_size(GEN_HEAD(gcstate, i)));
1133
0
    }
1134
1135
0
    PySys_FormatStderr(
1136
0
        "gc: objects in each generation:%s\n"
1137
0
        "gc: objects in permanent generation: %zd\n",
1138
0
        buf, gc_list_size(&gcstate->permanent_generation.head));
1139
0
}
1140
1141
/* Deduce which objects among "base" are unreachable from outside the list
1142
   and move them to 'unreachable'. The process consist in the following steps:
1143
1144
1. Copy all reference counts to a different field (gc_prev is used to hold
1145
   this copy to save memory).
1146
2. Traverse all objects in "base" and visit all referred objects using
1147
   "tp_traverse" and for every visited object, subtract 1 to the reference
1148
   count (the one that we copied in the previous step). After this step, all
1149
   objects that can be reached directly from outside must have strictly positive
1150
   reference count, while all unreachable objects must have a count of exactly 0.
1151
3. Identify all unreachable objects (the ones with 0 reference count) and move
1152
   them to the "unreachable" list. This step also needs to move back to "base" all
1153
   objects that were initially marked as unreachable but are referred transitively
1154
   by the reachable objects (the ones with strictly positive reference count).
1155
1156
Contracts:
1157
1158
    * The "base" has to be a valid list with no mask set.
1159
1160
    * The "unreachable" list must be uninitialized (this function calls
1161
      gc_list_init over 'unreachable').
1162
1163
IMPORTANT: This function leaves 'unreachable' with the NEXT_MASK_UNREACHABLE
1164
flag set but it does not clear it to skip unnecessary iteration. Before the
1165
flag is cleared (for example, by using 'clear_unreachable_mask' function or
1166
by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal
1167
list and we can not use most gc_list_* functions for it. */
1168
static inline Py_ssize_t
1169
185k
deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
1170
185k
    validate_list(base, collecting_clear_unreachable_clear);
1171
    /* Using ob_refcnt and gc_refs, calculate which objects in the
1172
     * container set are reachable from outside the set (i.e., have a
1173
     * refcount greater than 0 when all the references within the
1174
     * set are taken into account).
1175
     */
1176
185k
    Py_ssize_t candidates = update_refs(base);  // gc_prev is used for gc_refs
1177
185k
    subtract_refs(base);
1178
1179
    /* Leave everything reachable from outside base in base, and move
1180
     * everything else (in base) to unreachable.
1181
     *
1182
     * NOTE:  This used to move the reachable objects into a reachable
1183
     * set instead.  But most things usually turn out to be reachable,
1184
     * so it's more efficient to move the unreachable things.  It "sounds slick"
1185
     * to move the unreachable objects, until you think about it - the reason it
1186
     * pays isn't actually obvious.
1187
     *
1188
     * Suppose we create objects A, B, C in that order.  They appear in the young
1189
     * generation in the same order.  If B points to A, and C to B, and C is
1190
     * reachable from outside, then the adjusted refcounts will be 0, 0, and 1
1191
     * respectively.
1192
     *
1193
     * When move_unreachable finds A, A is moved to the unreachable list.  The
1194
     * same for B when it's first encountered.  Then C is traversed, B is moved
1195
     * _back_ to the reachable list.  B is eventually traversed, and then A is
1196
     * moved back to the reachable list.
1197
     *
1198
     * So instead of not moving at all, the reachable objects B and A are moved
1199
     * twice each.  Why is this a win?  A straightforward algorithm to move the
1200
     * reachable objects instead would move A, B, and C once each.
1201
     *
1202
     * The key is that this dance leaves the objects in order C, B, A - it's
1203
     * reversed from the original order.  On all _subsequent_ scans, none of
1204
     * them will move.  Since most objects aren't in cycles, this can save an
1205
     * unbounded number of moves across an unbounded number of later collections.
1206
     * It can cost more only the first time the chain is scanned.
1207
     *
1208
     * Drawback:  move_unreachable is also used to find out what's still trash
1209
     * after finalizers may resurrect objects.  In _that_ case most unreachable
1210
     * objects will remain unreachable, so it would be more efficient to move
1211
     * the reachable objects instead.  But this is a one-time cost, probably not
1212
     * worth complicating the code to speed just a little.
1213
     */
1214
185k
    gc_list_init(unreachable);
1215
185k
    move_unreachable(base, unreachable);  // gc_prev is pointer again
1216
185k
    validate_list(base, collecting_clear_unreachable_clear);
1217
185k
    validate_list(unreachable, collecting_set_unreachable_set);
1218
185k
    return candidates;
1219
185k
}
1220
1221
/* Handle objects that may have resurrected after a call to 'finalize_garbage', moving
1222
   them to 'old_generation' and placing the rest on 'still_unreachable'.
1223
1224
   Contracts:
1225
       * After this function 'unreachable' must not be used anymore and 'still_unreachable'
1226
         will contain the objects that did not resurrect.
1227
1228
       * The "still_unreachable" list must be uninitialized (this function calls
1229
         gc_list_init over 'still_unreachable').
1230
1231
IMPORTANT: After a call to this function, the 'still_unreachable' set will have the
1232
PREV_MARK_COLLECTING set, but the objects in this set are going to be removed so
1233
we can skip the expense of clearing the flag to avoid extra iteration. */
1234
static inline void
1235
handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable,
1236
                           PyGC_Head *old_generation)
1237
92.7k
{
1238
    // Remove the PREV_MASK_COLLECTING from unreachable
1239
    // to prepare it for a new call to 'deduce_unreachable'
1240
92.7k
    gc_list_clear_collecting(unreachable);
1241
1242
    // After the call to deduce_unreachable, the 'still_unreachable' set will
1243
    // have the PREV_MARK_COLLECTING set, but the objects are going to be
1244
    // removed so we can skip the expense of clearing the flag.
1245
92.7k
    PyGC_Head* resurrected = unreachable;
1246
92.7k
    deduce_unreachable(resurrected, still_unreachable);
1247
92.7k
    clear_unreachable_mask(still_unreachable);
1248
1249
    // Move the resurrected objects to the old generation for future collection.
1250
92.7k
    gc_list_merge(resurrected, old_generation);
1251
92.7k
}
1252
1253
1254
/* Invoke progress callbacks to notify clients that garbage collection
1255
 * is starting or stopping
1256
 */
1257
static void
1258
invoke_gc_callback(PyThreadState *tstate, const char *phase,
1259
                   int generation, struct gc_generation_stats *stats)
1260
185k
{
1261
185k
    assert(!_PyErr_Occurred(tstate));
1262
1263
    /* we may get called very early */
1264
185k
    GCState *gcstate = &tstate->interp->gc;
1265
185k
    if (gcstate->callbacks == NULL) {
1266
0
        return;
1267
0
    }
1268
1269
    /* The local variable cannot be rebound, check it for sanity */
1270
185k
    assert(PyList_CheckExact(gcstate->callbacks));
1271
185k
    PyObject *info = NULL;
1272
185k
    if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
1273
5.80k
        info = Py_BuildValue("{sisnsnsnsd}",
1274
5.80k
            "generation", generation,
1275
5.80k
            "collected", stats->collected,
1276
5.80k
            "uncollectable", stats->uncollectable,
1277
5.80k
            "candidates", stats->candidates,
1278
5.80k
            "duration", stats->duration);
1279
5.80k
        if (info == NULL) {
1280
0
            PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks");
1281
0
            return;
1282
0
        }
1283
5.80k
    }
1284
1285
185k
    PyObject *phase_obj = PyUnicode_FromString(phase);
1286
185k
    if (phase_obj == NULL) {
1287
0
        Py_XDECREF(info);
1288
0
        PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks");
1289
0
        return;
1290
0
    }
1291
1292
185k
    PyObject *stack[] = {phase_obj, info};
1293
191k
    for (Py_ssize_t i=0; i<PyList_GET_SIZE(gcstate->callbacks); i++) {
1294
5.80k
        PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i);
1295
5.80k
        Py_INCREF(cb); /* make sure cb doesn't go away */
1296
5.80k
        r = PyObject_Vectorcall(cb, stack, 2, NULL);
1297
5.80k
        if (r == NULL) {
1298
0
            PyErr_FormatUnraisable("Exception ignored while "
1299
0
                                   "calling GC callback %R", cb);
1300
0
        }
1301
5.80k
        else {
1302
5.80k
            Py_DECREF(r);
1303
5.80k
        }
1304
5.80k
        Py_DECREF(cb);
1305
5.80k
    }
1306
185k
    Py_DECREF(phase_obj);
1307
185k
    Py_XDECREF(info);
1308
185k
    assert(!_PyErr_Occurred(tstate));
1309
185k
}
1310
1311
1312
/* Find the oldest generation (highest numbered) where the count
1313
 * exceeds the threshold.  Objects in the that generation and
1314
 * generations younger than it will be collected. */
1315
static int
1316
gc_select_generation(GCState *gcstate)
1317
113k
{
1318
351k
    for (int i = NUM_GENERATIONS-1; i >= 0; i--) {
1319
330k
        if (gcstate->generations[i].count > gcstate->generations[i].threshold) {
1320
            /* Avoid quadratic performance degradation in number
1321
               of tracked objects (see also issue #4074):
1322
1323
               To limit the cost of garbage collection, there are two strategies;
1324
                 - make each collection faster, e.g. by scanning fewer objects
1325
                 - do less collections
1326
               This heuristic is about the latter strategy.
1327
1328
               In addition to the various configurable thresholds, we only trigger a
1329
               full collection if the ratio
1330
1331
                long_lived_pending / long_lived_total
1332
1333
               is above a given value (hardwired to 25%).
1334
1335
               The reason is that, while "non-full" collections (i.e., collections of
1336
               the young and middle generations) will always examine roughly the same
1337
               number of objects -- determined by the aforementioned thresholds --,
1338
               the cost of a full collection is proportional to the total number of
1339
               long-lived objects, which is virtually unbounded.
1340
1341
               Indeed, it has been remarked that doing a full collection every
1342
               <constant number> of object creations entails a dramatic performance
1343
               degradation in workloads which consist in creating and storing lots of
1344
               long-lived objects (e.g. building a large list of GC-tracked objects would
1345
               show quadratic performance, instead of linear as expected: see issue #4074).
1346
1347
               Using the above ratio, instead, yields amortized linear performance in
1348
               the total number of objects (the effect of which can be summarized
1349
               thusly: "each full garbage collection is more and more costly as the
1350
               number of objects grows, but we do fewer and fewer of them").
1351
1352
               This heuristic was suggested by Martin von Löwis on python-dev in
1353
               June 2008. His original analysis and proposal can be found at:
1354
               http://mail.python.org/pipermail/python-dev/2008-June/080579.html
1355
            */
1356
101k
            if (i == NUM_GENERATIONS - 1
1357
9.27k
                && gcstate->long_lived_pending < gcstate->long_lived_total / 4)
1358
8.64k
            {
1359
8.64k
                continue;
1360
8.64k
            }
1361
92.7k
            return i;
1362
101k
        }
1363
330k
    }
1364
20.4k
    return -1;
1365
113k
}
1366
1367
static struct gc_generation_stats *
1368
gc_get_stats(GCState *gcstate, int gen)
1369
92.7k
{
1370
92.7k
    if (gen == 0) {
1371
84.4k
        struct gc_young_stats_buffer *buffer = &gcstate->generation_stats->young;
1372
84.4k
        buffer->index = (buffer->index + 1) % GC_YOUNG_STATS_SIZE;
1373
84.4k
        struct gc_generation_stats *stats = &buffer->items[buffer->index];
1374
84.4k
        return stats;
1375
84.4k
    }
1376
8.29k
    else {
1377
8.29k
        struct gc_old_stats_buffer *buffer = &gcstate->generation_stats->old[gen - 1];
1378
8.29k
        buffer->index = (buffer->index + 1) % GC_OLD_STATS_SIZE;
1379
8.29k
        struct gc_generation_stats *stats = &buffer->items[buffer->index];
1380
8.29k
        return stats;
1381
8.29k
    }
1382
92.7k
}
1383
1384
static struct gc_generation_stats *
1385
gc_get_prev_stats(GCState *gcstate, int gen)
1386
92.7k
{
1387
92.7k
    if (gen == 0) {
1388
84.4k
        struct gc_young_stats_buffer *buffer = &gcstate->generation_stats->young;
1389
84.4k
        struct gc_generation_stats *stats = &buffer->items[buffer->index];
1390
84.4k
        return stats;
1391
84.4k
    }
1392
8.29k
    else {
1393
8.29k
        struct gc_old_stats_buffer *buffer = &gcstate->generation_stats->old[gen - 1];
1394
8.29k
        struct gc_generation_stats *stats = &buffer->items[buffer->index];
1395
8.29k
        return stats;
1396
8.29k
    }
1397
92.7k
}
1398
1399
static void
1400
add_stats(GCState *gcstate, int gen, struct gc_generation_stats *stats)
1401
92.7k
{
1402
92.7k
    struct gc_generation_stats *prev_stats = gc_get_prev_stats(gcstate, gen);
1403
92.7k
    struct gc_generation_stats *cur_stats = gc_get_stats(gcstate, gen);
1404
1405
92.7k
    memcpy(cur_stats, prev_stats, sizeof(struct gc_generation_stats));
1406
1407
92.7k
    cur_stats->ts_start = stats->ts_start;
1408
92.7k
    cur_stats->collections += 1;
1409
92.7k
    cur_stats->collected += stats->collected;
1410
92.7k
    cur_stats->uncollectable += stats->uncollectable;
1411
92.7k
    cur_stats->candidates += stats->candidates;
1412
1413
92.7k
    cur_stats->duration += stats->duration;
1414
92.7k
    cur_stats->heap_size = stats->heap_size;
1415
    /* Publish ts_stop last so remote readers do not select a partially
1416
       updated stats record as the latest collection. */
1417
92.7k
    cur_stats->ts_stop = stats->ts_stop;
1418
92.7k
}
1419
1420
/* This is the main function.  Read this to understand how the
1421
 * collection process works. */
1422
static Py_ssize_t
1423
gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
1424
113k
{
1425
113k
    int i;
1426
113k
    PyGC_Head *young; /* the generation we are examining */
1427
113k
    PyGC_Head *old; /* next older generation */
1428
113k
    PyGC_Head unreachable; /* non-problematic unreachable trash */
1429
113k
    PyGC_Head finalizers;  /* objects with, & reachable from, __del__ */
1430
113k
    PyGC_Head *gc;
1431
113k
    GCState *gcstate = &tstate->interp->gc;
1432
1433
    // gc_collect_main() must not be called before _PyGC_Init
1434
    // or after _PyGC_Fini()
1435
113k
    assert(gcstate->garbage != NULL);
1436
113k
    assert(!_PyErr_Occurred(tstate));
1437
1438
113k
    int expected = 0;
1439
113k
    if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) {
1440
        // Don't start a garbage collection if one is already in progress.
1441
0
        return 0;
1442
0
    }
1443
113k
    gcstate->frame = tstate->current_frame;
1444
1445
113k
    if (generation == GENERATION_AUTO) {
1446
        // Select the oldest generation that needs collecting. We will collect
1447
        // objects from that generation and all generations younger than it.
1448
113k
        generation = gc_select_generation(gcstate);
1449
113k
        if (generation < 0) {
1450
            // No generation needs to be collected.
1451
20.4k
            _Py_atomic_store_int(&gcstate->collecting, 0);
1452
20.4k
            return 0;
1453
20.4k
        }
1454
113k
    }
1455
1456
113k
    assert(generation >= 0 && generation < NUM_GENERATIONS);
1457
1458
#ifdef Py_STATS
1459
    {
1460
        PyStats *s = _PyStats_GET();
1461
        if (s) {
1462
            s->object_stats.object_visits = 0;
1463
        }
1464
    }
1465
#endif
1466
1467
92.7k
    GC_STAT_ADD(generation, collections, 1);
1468
1469
92.7k
    struct gc_generation_stats stats = { 0 };
1470
92.7k
    if (reason != _Py_GC_REASON_SHUTDOWN) {
1471
92.7k
        invoke_gc_callback(tstate, "start", generation, &stats);
1472
92.7k
    }
1473
1474
92.7k
    stats.heap_size = gcstate->heap_size;
1475
    // ignore error: don't interrupt the GC if reading the clock fails
1476
92.7k
    (void)PyTime_PerfCounterRaw(&stats.ts_start);
1477
92.7k
    if (gcstate->debug & _PyGC_DEBUG_STATS) {
1478
0
        PySys_WriteStderr("gc: collecting generation %d...\n", generation);
1479
0
        show_stats_each_generations(gcstate);
1480
0
    }
1481
1482
92.7k
    if (PyDTrace_GC_START_ENABLED()) {
1483
0
        PyDTrace_GC_START(generation);
1484
0
    }
1485
1486
    /* update collection and allocation counters */
1487
92.7k
    if (generation+1 < NUM_GENERATIONS) {
1488
92.1k
        gcstate->generations[generation+1].count += 1;
1489
92.1k
    }
1490
194k
    for (i = 0; i <= generation; i++) {
1491
101k
        gcstate->generations[i].count = 0;
1492
101k
    }
1493
1494
    /* merge younger generations with one we are currently collecting */
1495
101k
    for (i = 0; i < generation; i++) {
1496
8.92k
        gc_list_merge(GEN_HEAD(gcstate, i), GEN_HEAD(gcstate, generation));
1497
8.92k
    }
1498
1499
    /* handy references */
1500
92.7k
    young = GEN_HEAD(gcstate, generation);
1501
92.7k
    if (generation < NUM_GENERATIONS-1) {
1502
92.1k
        old = GEN_HEAD(gcstate, generation+1);
1503
92.1k
    }
1504
633
    else {
1505
633
        old = young;
1506
633
    }
1507
92.7k
    validate_list(old, collecting_clear_unreachable_clear);
1508
1509
92.7k
    stats.candidates = deduce_unreachable(young, &unreachable);
1510
1511
92.7k
    untrack_tuples(young);
1512
    /* Move reachable objects to next generation. */
1513
92.7k
    if (young != old) {
1514
92.1k
        if (generation == NUM_GENERATIONS - 2) {
1515
7.66k
            gcstate->long_lived_pending += gc_list_size(young);
1516
7.66k
        }
1517
92.1k
        gc_list_merge(young, old);
1518
92.1k
    }
1519
633
    else {
1520
        // In Python <= 3.13, we called untrack_dicts(young) here to untrack
1521
        // atomic-only dicts (see issue #14775). Python 3.14 removed the lazy
1522
        // dict tracking machinery entirely (GH-127010) -- dicts are always
1523
        // tracked from creation and never untracked by GC. That way, we don't
1524
        // have to restore MAINTAIN_TRACKING across every PyDict_SetItem call
1525
        // site; the cost is slightly more work for full collections on dicts
1526
        // with only atomic values.
1527
633
        gcstate->long_lived_pending = 0;
1528
633
        gcstate->long_lived_total = gc_list_size(young);
1529
633
    }
1530
1531
    /* All objects in unreachable are trash, but objects reachable from
1532
     * legacy finalizers (e.g. tp_del) can't safely be deleted.
1533
     */
1534
92.7k
    gc_list_init(&finalizers);
1535
    // NEXT_MASK_UNREACHABLE is cleared here.
1536
    // After move_legacy_finalizers(), unreachable is normal list.
1537
92.7k
    move_legacy_finalizers(&unreachable, &finalizers);
1538
    /* finalizers contains the unreachable objects with a legacy finalizer;
1539
     * unreachable objects reachable *from* those are also uncollectable,
1540
     * and we move those into the finalizers list too.
1541
     */
1542
92.7k
    move_legacy_finalizer_reachable(&finalizers);
1543
1544
92.7k
    validate_list(&finalizers, collecting_clear_unreachable_clear);
1545
92.7k
    validate_list(&unreachable, collecting_set_unreachable_clear);
1546
1547
    /* Print debugging information. */
1548
92.7k
    if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE) {
1549
0
        for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) {
1550
0
            debug_cycle("collectable", FROM_GC(gc));
1551
0
        }
1552
0
    }
1553
1554
    /* Clear weakrefs and invoke callbacks as necessary. */
1555
92.7k
    stats.collected += handle_weakref_callbacks(&unreachable, old);
1556
92.7k
    validate_list(old, collecting_clear_unreachable_clear);
1557
92.7k
    validate_list(&unreachable, collecting_set_unreachable_clear);
1558
1559
    /* Call tp_finalize on objects which have one. */
1560
92.7k
    finalize_garbage(tstate, &unreachable);
1561
1562
    /* Handle any objects that may have resurrected after the call
1563
     * to 'finalize_garbage' and continue the collection with the
1564
     * objects that are still unreachable */
1565
92.7k
    PyGC_Head final_unreachable;
1566
92.7k
    handle_resurrected_objects(&unreachable, &final_unreachable, old);
1567
1568
    /* Clear weakrefs to objects in the unreachable set.  No Python-level
1569
     * code must be allowed to access those unreachable objects.  During
1570
     * delete_garbage(), finalizers outside the unreachable set might run
1571
     * and create new weakrefs.  If those weakrefs were not cleared, they
1572
     * could reveal unreachable objects.  Callbacks are not executed.
1573
     */
1574
92.7k
    clear_weakrefs(&final_unreachable);
1575
1576
    /* Call tp_clear on objects in the final_unreachable set.  This will cause
1577
    * the reference cycles to be broken.  It may also cause some objects
1578
    * in finalizers to be freed.
1579
    */
1580
92.7k
    stats.collected += gc_list_size(&final_unreachable);
1581
92.7k
    delete_garbage(tstate, gcstate, &final_unreachable, old);
1582
1583
    /* Collect statistics on uncollectable objects found and print
1584
     * debugging information. */
1585
92.7k
    Py_ssize_t n = 0;
1586
92.7k
    for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) {
1587
0
        n++;
1588
0
        if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE)
1589
0
            debug_cycle("uncollectable", FROM_GC(gc));
1590
0
    }
1591
92.7k
    stats.uncollectable = n;
1592
92.7k
    (void)PyTime_PerfCounterRaw(&stats.ts_stop);
1593
92.7k
    stats.duration = PyTime_AsSecondsDouble(stats.ts_stop - stats.ts_start);
1594
92.7k
    if (gcstate->debug & _PyGC_DEBUG_STATS) {
1595
0
        PySys_WriteStderr(
1596
0
            "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n",
1597
0
            stats.uncollectable+stats.collected, stats.uncollectable,
1598
0
            stats.duration);
1599
0
    }
1600
1601
    /* Append instances in the uncollectable set to a Python
1602
     * reachable list of garbage.  The programmer has to deal with
1603
     * this if they insist on creating this type of structure.
1604
     */
1605
92.7k
    handle_legacy_finalizers(tstate, gcstate, &finalizers, old);
1606
92.7k
    validate_list(old, collecting_clear_unreachable_clear);
1607
1608
    /* Clear free list only during the collection of the highest
1609
     * generation */
1610
92.7k
    if (generation == NUM_GENERATIONS-1) {
1611
633
        _PyGC_ClearAllFreeLists(tstate->interp);
1612
633
    }
1613
1614
92.7k
    if (_PyErr_Occurred(tstate)) {
1615
0
        if (reason == _Py_GC_REASON_SHUTDOWN) {
1616
0
            _PyErr_Clear(tstate);
1617
0
        }
1618
0
        else {
1619
0
            PyErr_FormatUnraisable("Exception ignored in garbage collection");
1620
0
        }
1621
0
    }
1622
1623
    /* Update stats */
1624
92.7k
    add_stats(gcstate, generation, &stats);
1625
92.7k
    GC_STAT_ADD(generation, objects_collected, stats.collected);
1626
1627
#ifdef Py_STATS
1628
    {
1629
        PyStats *s = _PyStats_GET();
1630
        if (s) {
1631
            GC_STAT_ADD(generation, object_visits,
1632
                s->object_stats.object_visits);
1633
            s->object_stats.object_visits = 0;
1634
        }
1635
    }
1636
#endif
1637
1638
92.7k
    if (PyDTrace_GC_DONE_ENABLED()) {
1639
0
        PyDTrace_GC_DONE(stats.uncollectable + stats.collected);
1640
0
    }
1641
1642
92.7k
    if (reason != _Py_GC_REASON_SHUTDOWN) {
1643
92.7k
        invoke_gc_callback(tstate, "stop", generation, &stats);
1644
92.7k
    }
1645
1646
92.7k
    assert(!_PyErr_Occurred(tstate));
1647
92.7k
    gcstate->frame = NULL;
1648
92.7k
    _Py_atomic_store_int(&gcstate->collecting, 0);
1649
92.7k
    return stats.uncollectable + stats.collected;
1650
113k
}
1651
1652
static int
1653
referrersvisit(PyObject* obj, void *arg)
1654
0
{
1655
0
    PyObject *objs = arg;
1656
0
    Py_ssize_t i;
1657
0
    for (i = 0; i < PyTuple_GET_SIZE(objs); i++) {
1658
0
        if (PyTuple_GET_ITEM(objs, i) == obj) {
1659
0
            return 1;
1660
0
        }
1661
0
    }
1662
0
    return 0;
1663
0
}
1664
1665
static int
1666
gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist)
1667
0
{
1668
0
    PyGC_Head *gc;
1669
0
    PyObject *obj;
1670
0
    traverseproc traverse;
1671
0
    for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
1672
0
        obj = FROM_GC(gc);
1673
0
        traverse = Py_TYPE(obj)->tp_traverse;
1674
0
        if (obj == objs || obj == resultlist) {
1675
0
            continue;
1676
0
        }
1677
0
        if (traverse(obj, referrersvisit, objs)) {
1678
0
            if (PyList_Append(resultlist, obj) < 0) {
1679
0
                return 0; /* error */
1680
0
            }
1681
0
        }
1682
0
    }
1683
0
    return 1; /* no error */
1684
0
}
1685
1686
PyObject *
1687
_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs)
1688
0
{
1689
0
    PyObject *result = PyList_New(0);
1690
0
    if (!result) {
1691
0
        return NULL;
1692
0
    }
1693
1694
0
    GCState *gcstate = &interp->gc;
1695
0
    for (int i = 0; i < NUM_GENERATIONS; i++) {
1696
0
        if (!(gc_referrers_for(objs, GEN_HEAD(gcstate, i), result))) {
1697
0
            Py_DECREF(result);
1698
0
            return NULL;
1699
0
        }
1700
0
    }
1701
0
    return result;
1702
0
}
1703
1704
PyObject *
1705
_PyGC_GetObjects(PyInterpreterState *interp, int generation)
1706
0
{
1707
0
    assert(generation >= -1 && generation < NUM_GENERATIONS);
1708
0
    GCState *gcstate = &interp->gc;
1709
1710
0
    PyObject *result = PyList_New(0);
1711
0
    if (result == NULL) {
1712
0
        return NULL;
1713
0
    }
1714
1715
0
    if (generation == -1) {
1716
        /* If generation is -1, get all objects from all generations */
1717
0
        for (int i = 0; i < NUM_GENERATIONS; i++) {
1718
0
            if (append_objects(result, GEN_HEAD(gcstate, i))) {
1719
0
                goto error;
1720
0
            }
1721
0
        }
1722
0
    }
1723
0
    else {
1724
0
        if (append_objects(result, GEN_HEAD(gcstate, generation))) {
1725
0
            goto error;
1726
0
        }
1727
0
    }
1728
1729
0
    return result;
1730
0
error:
1731
0
    Py_DECREF(result);
1732
0
    return NULL;
1733
0
}
1734
1735
void
1736
_PyGC_Freeze(PyInterpreterState *interp)
1737
0
{
1738
0
    GCState *gcstate = &interp->gc;
1739
0
    for (int i = 0; i < NUM_GENERATIONS; ++i) {
1740
0
        gc_list_merge(GEN_HEAD(gcstate, i), &gcstate->permanent_generation.head);
1741
0
        gcstate->generations[i].count = 0;
1742
0
    }
1743
0
}
1744
1745
void
1746
_PyGC_Unfreeze(PyInterpreterState *interp)
1747
0
{
1748
0
    GCState *gcstate = &interp->gc;
1749
0
    gc_list_merge(&gcstate->permanent_generation.head,
1750
0
                  GEN_HEAD(gcstate, NUM_GENERATIONS-1));
1751
0
}
1752
1753
Py_ssize_t
1754
_PyGC_GetFreezeCount(PyInterpreterState *interp)
1755
0
{
1756
0
    GCState *gcstate = &interp->gc;
1757
0
    return gc_list_size(&gcstate->permanent_generation.head);
1758
0
}
1759
1760
/* C API for controlling the state of the garbage collector */
1761
int
1762
PyGC_Enable(void)
1763
0
{
1764
0
    GCState *gcstate = get_gc_state();
1765
0
    int old_state = gcstate->enabled;
1766
0
    gcstate->enabled = 1;
1767
0
    return old_state;
1768
0
}
1769
1770
int
1771
PyGC_Disable(void)
1772
0
{
1773
0
    GCState *gcstate = get_gc_state();
1774
0
    int old_state = gcstate->enabled;
1775
0
    gcstate->enabled = 0;
1776
0
    return old_state;
1777
0
}
1778
1779
int
1780
PyGC_IsEnabled(void)
1781
0
{
1782
0
    GCState *gcstate = get_gc_state();
1783
0
    return gcstate->enabled;
1784
0
}
1785
1786
/* Public API to invoke gc.collect() from C */
1787
Py_ssize_t
1788
PyGC_Collect(void)
1789
0
{
1790
0
    PyThreadState *tstate = _PyThreadState_GET();
1791
0
    GCState *gcstate = &tstate->interp->gc;
1792
1793
0
    if (!gcstate->enabled) {
1794
0
        return 0;
1795
0
    }
1796
1797
0
    Py_ssize_t n;
1798
0
    PyObject *exc = _PyErr_GetRaisedException(tstate);
1799
0
    n = gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_MANUAL);
1800
0
    _PyErr_SetRaisedException(tstate, exc);
1801
1802
0
    return n;
1803
0
}
1804
1805
Py_ssize_t
1806
_PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
1807
0
{
1808
0
    return gc_collect_main(tstate, generation, reason);
1809
0
}
1810
1811
void
1812
_PyGC_CollectNoFail(PyThreadState *tstate)
1813
0
{
1814
    /* Ideally, this function is only called on interpreter shutdown,
1815
       and therefore not recursively.  Unfortunately, when there are daemon
1816
       threads, a daemon thread can start a cyclic garbage collection
1817
       during interpreter shutdown (and then never finish it).
1818
       See http://bugs.python.org/issue8713#msg195178 for an example.
1819
       */
1820
0
    gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN);
1821
0
}
1822
1823
void
1824
_PyGC_DumpShutdownStats(PyInterpreterState *interp)
1825
0
{
1826
0
    GCState *gcstate = &interp->gc;
1827
0
    if (!(gcstate->debug & _PyGC_DEBUG_SAVEALL)
1828
0
        && gcstate->garbage != NULL && PyList_GET_SIZE(gcstate->garbage) > 0) {
1829
0
        const char *message;
1830
0
        if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) {
1831
0
            message = "gc: %zd uncollectable objects at shutdown";
1832
0
        }
1833
0
        else {
1834
0
            message = "gc: %zd uncollectable objects at shutdown; " \
1835
0
                "use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them";
1836
0
        }
1837
        /* PyErr_WarnFormat does too many things and we are at shutdown,
1838
           the warnings module's dependencies (e.g. linecache) may be gone
1839
           already. */
1840
0
        if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
1841
0
                                     "gc", NULL, message,
1842
0
                                     PyList_GET_SIZE(gcstate->garbage)))
1843
0
        {
1844
0
            PyErr_FormatUnraisable("Exception ignored in GC shutdown");
1845
0
        }
1846
0
        if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) {
1847
0
            PyObject *repr = NULL, *bytes = NULL;
1848
0
            repr = PyObject_Repr(gcstate->garbage);
1849
0
            if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr))) {
1850
0
                PyErr_FormatUnraisable("Exception ignored in GC shutdown "
1851
0
                                       "while formatting garbage");
1852
0
            }
1853
0
            else {
1854
0
                PySys_WriteStderr(
1855
0
                    "      %s\n",
1856
0
                    PyBytes_AS_STRING(bytes)
1857
0
                    );
1858
0
            }
1859
0
            Py_XDECREF(repr);
1860
0
            Py_XDECREF(bytes);
1861
0
        }
1862
0
    }
1863
0
}
1864
1865
static void
1866
0
finalize_unlink_gc_head(PyGC_Head *gc) {
1867
0
    PyGC_Head *prev = GC_PREV(gc);
1868
0
    PyGC_Head *next = GC_NEXT(gc);
1869
0
    _PyGCHead_SET_NEXT(prev, next);
1870
0
    _PyGCHead_SET_PREV(next, prev);
1871
0
}
1872
1873
void
1874
_PyGC_Fini(PyInterpreterState *interp)
1875
0
{
1876
0
    GCState *gcstate = &interp->gc;
1877
0
    Py_CLEAR(gcstate->garbage);
1878
0
    Py_CLEAR(gcstate->callbacks);
1879
1880
    /* Prevent a subtle bug that affects sub-interpreters that use basic
1881
     * single-phase init extensions (m_size == -1).  Those extensions cause objects
1882
     * to be shared between interpreters, via the PyDict_Update(mdict, m_copy) call
1883
     * in import_find_extension().
1884
     *
1885
     * If they are GC objects, their GC head next or prev links could refer to
1886
     * the interpreter _gc_runtime_state PyGC_Head nodes.  Those nodes go away
1887
     * when the interpreter structure is freed and so pointers to them become
1888
     * invalid.  If those objects are still used by another interpreter and
1889
     * UNTRACK is called on them, a crash will happen.  We untrack the nodes
1890
     * here to avoid that.
1891
     *
1892
     * This bug was originally fixed when reported as gh-90228.  The bug was
1893
     * re-introduced in gh-94673.
1894
     */
1895
0
    for (int i = 0; i < NUM_GENERATIONS; i++) {
1896
0
        finalize_unlink_gc_head(&gcstate->generations[i].head);
1897
0
    }
1898
0
    finalize_unlink_gc_head(&gcstate->permanent_generation.head);
1899
0
}
1900
1901
/* for debugging */
1902
void
1903
_PyGC_Dump(PyGC_Head *g)
1904
0
{
1905
0
    PyObject_Dump(FROM_GC(g));
1906
0
}
1907
1908
1909
#ifdef Py_DEBUG
1910
static int
1911
visit_validate(PyObject *op, void *parent_raw)
1912
{
1913
    PyObject *parent = _PyObject_CAST(parent_raw);
1914
    if (_PyObject_IsFreed(op)) {
1915
        _PyObject_ASSERT_FAILED_MSG(parent,
1916
                                    "PyObject_GC_Track() object is not valid");
1917
    }
1918
    return 0;
1919
}
1920
#endif
1921
1922
1923
/* extension modules might be compiled with GC support so these
1924
   functions must always be available */
1925
1926
void
1927
PyObject_GC_Track(void *op_raw)
1928
181M
{
1929
181M
    PyObject *op = _PyObject_CAST(op_raw);
1930
181M
    if (_PyObject_GC_IS_TRACKED(op)) {
1931
0
        _PyObject_ASSERT_FAILED_MSG(op,
1932
0
                                    "object already tracked "
1933
0
                                    "by the garbage collector");
1934
0
    }
1935
181M
    _PyObject_GC_TRACK(op);
1936
1937
#ifdef Py_DEBUG
1938
    /* Check that the object is valid: validate objects traversed
1939
       by tp_traverse() */
1940
    traverseproc traverse = Py_TYPE(op)->tp_traverse;
1941
    (void)traverse(op, visit_validate, op);
1942
#endif
1943
181M
}
1944
1945
void
1946
PyObject_GC_UnTrack(void *op_raw)
1947
1.32G
{
1948
1.32G
    PyObject *op = _PyObject_CAST(op_raw);
1949
    /* The code for some objects, such as tuples, is a bit
1950
     * sloppy about when the object is tracked and untracked. */
1951
1.32G
    if (_PyObject_GC_IS_TRACKED(op)) {
1952
1.09G
        _PyObject_GC_UNTRACK(op);
1953
1.09G
    }
1954
1.32G
}
1955
1956
int
1957
PyObject_IS_GC(PyObject *obj)
1958
229M
{
1959
229M
    return _PyObject_IS_GC(obj);
1960
229M
}
1961
1962
void
1963
_Py_ScheduleGC(PyThreadState *tstate)
1964
17.6M
{
1965
17.6M
    if (!_Py_eval_breaker_bit_is_set(tstate, _PY_GC_SCHEDULED_BIT))
1966
113k
    {
1967
113k
        _Py_set_eval_breaker_bit(tstate, _PY_GC_SCHEDULED_BIT);
1968
113k
    }
1969
17.6M
}
1970
1971
void
1972
_PyObject_GC_Link(PyObject *op)
1973
613M
{
1974
613M
    PyGC_Head *gc = AS_GC(op);
1975
    // gc must be correctly aligned
1976
613M
    _PyObject_ASSERT(op, ((uintptr_t)gc & (sizeof(uintptr_t)-1)) == 0);
1977
1978
613M
    PyThreadState *tstate = _PyThreadState_GET();
1979
613M
    GCState *gcstate = &tstate->interp->gc;
1980
613M
    gc->_gc_next = 0;
1981
613M
    gc->_gc_prev = 0;
1982
613M
    gcstate->generations[0].count++; /* number of allocated GC objects */
1983
613M
    if (gcstate->generations[0].count > gcstate->generations[0].threshold &&
1984
17.6M
        gcstate->enabled &&
1985
17.6M
        gcstate->generations[0].threshold &&
1986
17.6M
        !_Py_atomic_load_int_relaxed(&gcstate->collecting) &&
1987
17.6M
        !_PyErr_Occurred(tstate))
1988
17.6M
    {
1989
17.6M
        _Py_ScheduleGC(tstate);
1990
17.6M
    }
1991
613M
}
1992
1993
void
1994
_Py_RunGC(PyThreadState *tstate)
1995
113k
{
1996
113k
    GCState *gcstate = get_gc_state();
1997
113k
    if (!gcstate->enabled) {
1998
0
        return;
1999
0
    }
2000
113k
    gc_collect_main(tstate, GENERATION_AUTO, _Py_GC_REASON_HEAP);
2001
113k
}
2002
2003
static PyObject *
2004
gc_alloc(PyTypeObject *tp, size_t basicsize, size_t presize)
2005
489M
{
2006
489M
    PyThreadState *tstate = _PyThreadState_GET();
2007
489M
    if (basicsize > PY_SSIZE_T_MAX - presize) {
2008
0
        return _PyErr_NoMemory(tstate);
2009
0
    }
2010
489M
    size_t size = presize + basicsize;
2011
489M
    char *mem = _PyObject_MallocWithType(tp, size);
2012
489M
    if (mem == NULL) {
2013
0
        return _PyErr_NoMemory(tstate);
2014
0
    }
2015
489M
    ((PyObject **)mem)[0] = NULL;
2016
489M
    ((PyObject **)mem)[1] = NULL;
2017
489M
    PyObject *op = (PyObject *)(mem + presize);
2018
489M
    _PyObject_GC_Link(op);
2019
489M
    return op;
2020
489M
}
2021
2022
2023
PyObject *
2024
_PyObject_GC_New(PyTypeObject *tp)
2025
253M
{
2026
253M
    size_t presize = _PyType_PreHeaderSize(tp);
2027
253M
    size_t size = _PyObject_SIZE(tp);
2028
253M
    if (_PyType_HasFeature(tp, Py_TPFLAGS_INLINE_VALUES)) {
2029
236
        size += _PyInlineValuesSize(tp);
2030
236
    }
2031
253M
    PyObject *op = gc_alloc(tp, size, presize);
2032
253M
    if (op == NULL) {
2033
0
        return NULL;
2034
0
    }
2035
253M
    _PyObject_Init(op, tp);
2036
253M
    if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) {
2037
236
        _PyObject_InitInlineValues(op, tp);
2038
236
    }
2039
253M
    return op;
2040
253M
}
2041
2042
PyVarObject *
2043
_PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems)
2044
235M
{
2045
235M
    PyVarObject *op;
2046
2047
235M
    if (nitems < 0) {
2048
0
        PyErr_BadInternalCall();
2049
0
        return NULL;
2050
0
    }
2051
235M
    size_t presize = _PyType_PreHeaderSize(tp);
2052
235M
    size_t size = _PyObject_VAR_SIZE(tp, nitems);
2053
235M
    op = (PyVarObject *)gc_alloc(tp, size, presize);
2054
235M
    if (op == NULL) {
2055
0
        return NULL;
2056
0
    }
2057
235M
    _PyObject_InitVar(op, tp, nitems);
2058
235M
    return op;
2059
235M
}
2060
2061
PyObject *
2062
PyUnstable_Object_GC_NewWithExtraData(PyTypeObject *tp, size_t extra_size)
2063
0
{
2064
0
    size_t presize = _PyType_PreHeaderSize(tp);
2065
0
    size_t size = _PyObject_SIZE(tp) + extra_size;
2066
0
    PyObject *op = gc_alloc(tp, size, presize);
2067
0
    if (op == NULL) {
2068
0
        return NULL;
2069
0
    }
2070
0
    memset((char *)op + sizeof(PyObject), 0, size - sizeof(PyObject));
2071
0
    _PyObject_Init(op, tp);
2072
0
    return op;
2073
0
}
2074
2075
PyVarObject *
2076
_PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems)
2077
41
{
2078
41
    const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems);
2079
41
    const size_t presize = _PyType_PreHeaderSize(Py_TYPE(op));
2080
41
    _PyObject_ASSERT((PyObject *)op, !_PyObject_GC_IS_TRACKED(op));
2081
41
    if (basicsize > (size_t)PY_SSIZE_T_MAX - presize) {
2082
0
        return (PyVarObject *)PyErr_NoMemory();
2083
0
    }
2084
41
    char *mem = (char *)op - presize;
2085
41
    mem = (char *)_PyObject_ReallocWithType(Py_TYPE(op), mem, presize + basicsize);
2086
41
    if (mem == NULL) {
2087
0
        return (PyVarObject *)PyErr_NoMemory();
2088
0
    }
2089
41
    op = (PyVarObject *) (mem + presize);
2090
41
    Py_SET_SIZE(op, nitems);
2091
41
    return op;
2092
41
}
2093
2094
void
2095
PyObject_GC_Del(void *op)
2096
611M
{
2097
611M
    size_t presize = _PyType_PreHeaderSize(Py_TYPE(op));
2098
611M
    PyGC_Head *g = AS_GC(op);
2099
611M
    if (_PyObject_GC_IS_TRACKED(op)) {
2100
0
        gc_list_remove(g);
2101
0
        GCState *gcstate = get_gc_state();
2102
0
        gcstate->heap_size--;
2103
#ifdef Py_DEBUG
2104
        PyObject *exc = PyErr_GetRaisedException();
2105
        if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
2106
                                     "gc", NULL,
2107
                                     "Object of type %s is not untracked "
2108
                                     "before destruction",
2109
                                     Py_TYPE(op)->tp_name))
2110
        {
2111
            PyErr_FormatUnraisable("Exception ignored on object deallocation");
2112
        }
2113
        PyErr_SetRaisedException(exc);
2114
#endif
2115
0
    }
2116
611M
    GCState *gcstate = get_gc_state();
2117
611M
    if (gcstate->generations[0].count > 0) {
2118
417M
        gcstate->generations[0].count--;
2119
417M
    }
2120
611M
    PyObject_Free(((char *)op)-presize);
2121
611M
}
2122
2123
int
2124
PyObject_GC_IsTracked(PyObject* obj)
2125
9.36k
{
2126
9.36k
    if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj)) {
2127
32
        return 1;
2128
32
    }
2129
9.33k
    return 0;
2130
9.36k
}
2131
2132
int
2133
PyObject_GC_IsFinalized(PyObject *obj)
2134
0
{
2135
0
    if (_PyObject_IS_GC(obj) && _PyGC_FINALIZED(obj)) {
2136
0
         return 1;
2137
0
    }
2138
0
    return 0;
2139
0
}
2140
2141
static int
2142
visit_generation(gcvisitobjects_t callback, void *arg, struct gc_generation *gen)
2143
0
{
2144
0
    PyGC_Head *gc_list, *gc;
2145
0
    gc_list = &gen->head;
2146
0
    for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
2147
0
        PyObject *op = FROM_GC(gc);
2148
0
        Py_INCREF(op);
2149
0
        int res = callback(op, arg);
2150
0
        Py_DECREF(op);
2151
0
        if (!res) {
2152
0
            return -1;
2153
0
        }
2154
0
    }
2155
0
    return 0;
2156
0
}
2157
2158
void
2159
PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
2160
0
{
2161
0
    GCState *gcstate = get_gc_state();
2162
0
    int original_state = gcstate->enabled;
2163
0
    gcstate->enabled = 0;
2164
0
    for (size_t i = 0; i < NUM_GENERATIONS; i++) {
2165
0
        if (visit_generation(callback, arg, &gcstate->generations[i]) < 0) {
2166
0
            goto done;
2167
0
        }
2168
0
    }
2169
0
    visit_generation(callback, arg, &gcstate->permanent_generation);
2170
0
done:
2171
0
    gcstate->enabled = original_state;
2172
0
}
2173
2174
#endif  // !Py_GIL_DISABLED