Coverage Report

Created: 2026-04-20 06:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/gc.c
Line
Count
Source
1
//  This implements the reference cycle garbage collector.
2
//  The Python module interface to the collector is in gcmodule.c.
3
//  See InternalDocs/garbage_collector.md for more infromation.
4
5
#include "Python.h"
6
#include "pycore_ceval.h"         // _Py_set_eval_breaker_bit()
7
#include "pycore_dict.h"          // _PyInlineValuesSize()
8
#include "pycore_initconfig.h"    // _PyStatus_OK()
9
#include "pycore_interp.h"        // PyInterpreterState.gc
10
#include "pycore_interpframe.h"   // _PyFrame_GetLocalsArray()
11
#include "pycore_object_alloc.h"  // _PyObject_MallocWithType()
12
#include "pycore_pystate.h"       // _PyThreadState_GET()
13
#include "pycore_tuple.h"         // _PyTuple_MaybeUntrack()
14
#include "pycore_weakref.h"       // _PyWeakref_ClearRef()
15
16
#include "pydtrace.h"
17
18
19
#ifndef Py_GIL_DISABLED
20
21
typedef struct _gc_runtime_state GCState;
22
23
#ifdef Py_DEBUG
24
#  define GC_DEBUG
25
#endif
26
27
// Define this when debugging the GC
28
// #define GC_EXTRA_DEBUG
29
30
31
924M
#define GC_NEXT _PyGCHead_NEXT
32
345M
#define GC_PREV _PyGCHead_PREV
33
34
// update_refs() set this bit for all objects in current generation.
35
// subtract_refs() and move_unreachable() uses this to distinguish
36
// visited object is in GCing or not.
37
//
38
// move_unreachable() removes this flag from reachable objects.
39
// Only unreachable objects have this flag.
40
//
41
// No objects in interpreter have this flag after GC ends.
42
415M
#define PREV_MASK_COLLECTING   _PyGC_PREV_MASK_COLLECTING
43
44
// Lowest bit of _gc_next is used for UNREACHABLE flag.
45
//
46
// This flag represents the object is in unreachable list in move_unreachable()
47
//
48
// Although this flag is used only in move_unreachable(), move_unreachable()
49
// doesn't clear this flag to skip unnecessary iteration.
50
// move_legacy_finalizers() removes this flag instead.
51
// Between them, unreachable list is not normal list and we can not use
52
// most gc_list_* functions for it.
53
60.3M
#define NEXT_MASK_UNREACHABLE  2
54
55
1.86G
#define AS_GC(op) _Py_AS_GC(op)
56
641M
#define FROM_GC(gc) _Py_FROM_GC(gc)
57
58
// Automatically choose the generation that needs collecting.
59
#define GENERATION_AUTO (-1)
60
61
static inline int
62
gc_is_collecting(PyGC_Head *g)
63
249M
{
64
249M
    return (g->_gc_prev & PREV_MASK_COLLECTING) != 0;
65
249M
}
66
67
static inline void
68
gc_clear_collecting(PyGC_Head *g)
69
80.2M
{
70
80.2M
    g->_gc_prev &= ~PREV_MASK_COLLECTING;
71
80.2M
}
72
73
static inline Py_ssize_t
74
gc_get_refs(PyGC_Head *g)
75
206M
{
76
206M
    return (Py_ssize_t)(g->_gc_prev >> _PyGC_PREV_SHIFT);
77
206M
}
78
79
static inline void
80
gc_set_refs(PyGC_Head *g, Py_ssize_t refs)
81
30.6M
{
82
30.6M
    g->_gc_prev = (g->_gc_prev & ~_PyGC_PREV_MASK)
83
30.6M
        | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
84
30.6M
}
85
86
static inline void
87
gc_reset_refs(PyGC_Head *g, Py_ssize_t refs)
88
85.5M
{
89
85.5M
    g->_gc_prev = (g->_gc_prev & _PyGC_PREV_MASK_FINALIZED)
90
85.5M
        | PREV_MASK_COLLECTING
91
85.5M
        | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
92
85.5M
}
93
94
static inline void
95
gc_decref(PyGC_Head *g)
96
50.8M
{
97
50.8M
    _PyObject_ASSERT_WITH_MSG(FROM_GC(g),
98
50.8M
                              gc_get_refs(g) > 0,
99
50.8M
                              "refcount is too small");
100
50.8M
    g->_gc_prev -= 1 << _PyGC_PREV_SHIFT;
101
50.8M
}
102
103
static inline int
104
gc_old_space(PyGC_Head *g)
105
274M
{
106
274M
    return g->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1;
107
274M
}
108
109
static inline int
110
other_space(int space)
111
1.81k
{
112
1.81k
    assert(space == 0 || space == 1);
113
1.81k
    return space ^ _PyGC_NEXT_MASK_OLD_SPACE_1;
114
1.81k
}
115
116
static inline void
117
gc_flip_old_space(PyGC_Head *g)
118
147M
{
119
147M
    g->_gc_next ^= _PyGC_NEXT_MASK_OLD_SPACE_1;
120
147M
}
121
122
static inline void
123
gc_set_old_space(PyGC_Head *g, int space)
124
75.3M
{
125
75.3M
    assert(space == 0 || space == _PyGC_NEXT_MASK_OLD_SPACE_1);
126
75.3M
    g->_gc_next &= ~_PyGC_NEXT_MASK_OLD_SPACE_1;
127
75.3M
    g->_gc_next |= space;
128
75.3M
}
129
130
static PyGC_Head *
131
GEN_HEAD(GCState *gcstate, int n)
132
0
{
133
0
    assert((gcstate->visited_space & (~1)) == 0);
134
0
    switch(n) {
135
0
        case 0:
136
0
            return &gcstate->young.head;
137
0
        case 1:
138
0
            return &gcstate->old[gcstate->visited_space].head;
139
0
        case 2:
140
0
            return &gcstate->old[gcstate->visited_space^1].head;
141
0
        default:
142
0
            Py_UNREACHABLE();
143
0
    }
144
0
}
145
146
static GCState *
147
get_gc_state(void)
148
21.0k
{
149
21.0k
    PyInterpreterState *interp = _PyInterpreterState_GET();
150
21.0k
    return &interp->gc;
151
21.0k
}
152
153
154
void
155
_PyGC_InitState(GCState *gcstate)
156
36
{
157
36
#define INIT_HEAD(GEN) \
158
144
    do { \
159
144
        GEN.head._gc_next = (uintptr_t)&GEN.head; \
160
144
        GEN.head._gc_prev = (uintptr_t)&GEN.head; \
161
144
    } while (0)
162
163
36
    assert(gcstate->young.count == 0);
164
36
    assert(gcstate->old[0].count == 0);
165
36
    assert(gcstate->old[1].count == 0);
166
36
    INIT_HEAD(gcstate->young);
167
36
    INIT_HEAD(gcstate->old[0]);
168
36
    INIT_HEAD(gcstate->old[1]);
169
36
    INIT_HEAD(gcstate->permanent_generation);
170
171
36
#undef INIT_HEAD
172
36
}
173
174
175
PyStatus
176
_PyGC_Init(PyInterpreterState *interp)
177
36
{
178
36
    GCState *gcstate = &interp->gc;
179
180
36
    gcstate->generation_stats = PyMem_RawCalloc(1, sizeof(struct gc_stats));
181
36
    if (gcstate->generation_stats == NULL) {
182
0
        return _PyStatus_NO_MEMORY();
183
0
    }
184
185
36
    gcstate->garbage = PyList_New(0);
186
36
    if (gcstate->garbage == NULL) {
187
0
        return _PyStatus_NO_MEMORY();
188
0
    }
189
190
36
    gcstate->callbacks = PyList_New(0);
191
36
    if (gcstate->callbacks == NULL) {
192
0
        return _PyStatus_NO_MEMORY();
193
0
    }
194
36
    gcstate->heap_size = 0;
195
196
36
    return _PyStatus_OK();
197
36
}
198
199
200
/*
201
_gc_prev values
202
---------------
203
204
Between collections, _gc_prev is used for doubly linked list.
205
206
Lowest two bits of _gc_prev are used for flags.
207
PREV_MASK_COLLECTING is used only while collecting and cleared before GC ends
208
or _PyObject_GC_UNTRACK() is called.
209
210
During a collection, _gc_prev is temporary used for gc_refs, and the gc list
211
is singly linked until _gc_prev is restored.
212
213
gc_refs
214
    At the start of a collection, update_refs() copies the true refcount
215
    to gc_refs, for each object in the generation being collected.
216
    subtract_refs() then adjusts gc_refs so that it equals the number of
217
    times an object is referenced directly from outside the generation
218
    being collected.
219
220
PREV_MASK_COLLECTING
221
    Objects in generation being collected are marked PREV_MASK_COLLECTING in
222
    update_refs().
223
224
225
_gc_next values
226
---------------
227
228
_gc_next takes these values:
229
230
0
231
    The object is not tracked
232
233
!= 0
234
    Pointer to the next object in the GC list.
235
    Additionally, lowest bit is used temporary for
236
    NEXT_MASK_UNREACHABLE flag described below.
237
238
NEXT_MASK_UNREACHABLE
239
    move_unreachable() then moves objects not reachable (whether directly or
240
    indirectly) from outside the generation into an "unreachable" set and
241
    set this flag.
242
243
    Objects that are found to be reachable have gc_refs set to 1.
244
    When this flag is set for the reachable object, the object must be in
245
    "unreachable" set.
246
    The flag is unset and the object is moved back to "reachable" set.
247
248
    move_legacy_finalizers() will remove this flag from "unreachable" set.
249
*/
250
251
/*** list functions ***/
252
253
static inline void
254
gc_list_init(PyGC_Head *list)
255
298k
{
256
    // List header must not have flags.
257
    // We can assign pointer by simple cast.
258
298k
    list->_gc_prev = (uintptr_t)list;
259
298k
    list->_gc_next = (uintptr_t)list;
260
298k
}
261
262
static inline int
263
gc_list_is_empty(PyGC_Head *list)
264
158M
{
265
158M
    return (list->_gc_next == (uintptr_t)list);
266
158M
}
267
268
/* Append `node` to `list`. */
269
static inline void
270
gc_list_append(PyGC_Head *node, PyGC_Head *list)
271
11.6M
{
272
11.6M
    assert((list->_gc_prev & ~_PyGC_PREV_MASK) == 0);
273
11.6M
    PyGC_Head *last = (PyGC_Head *)list->_gc_prev;
274
275
    // last <-> node
276
11.6M
    _PyGCHead_SET_PREV(node, last);
277
11.6M
    _PyGCHead_SET_NEXT(last, node);
278
279
    // node <-> list
280
11.6M
    _PyGCHead_SET_NEXT(node, list);
281
11.6M
    list->_gc_prev = (uintptr_t)node;
282
11.6M
}
283
284
/* Remove `node` from the gc list it's currently in. */
285
static inline void
286
gc_list_remove(PyGC_Head *node)
287
0
{
288
0
    PyGC_Head *prev = GC_PREV(node);
289
0
    PyGC_Head *next = GC_NEXT(node);
290
291
0
    _PyGCHead_SET_NEXT(prev, next);
292
0
    _PyGCHead_SET_PREV(next, prev);
293
294
0
    node->_gc_next = 0; /* object is not currently tracked */
295
0
}
296
297
/* Move `node` from the gc list it's currently in (which is not explicitly
298
 * named here) to the end of `list`.  This is semantically the same as
299
 * gc_list_remove(node) followed by gc_list_append(node, list).
300
 */
301
static void
302
gc_list_move(PyGC_Head *node, PyGC_Head *list)
303
305M
{
304
    /* Unlink from current list. */
305
305M
    PyGC_Head *from_prev = GC_PREV(node);
306
305M
    PyGC_Head *from_next = GC_NEXT(node);
307
305M
    _PyGCHead_SET_NEXT(from_prev, from_next);
308
305M
    _PyGCHead_SET_PREV(from_next, from_prev);
309
310
    /* Relink at end of new list. */
311
    // list must not have flags.  So we can skip macros.
312
305M
    PyGC_Head *to_prev = (PyGC_Head*)list->_gc_prev;
313
305M
    _PyGCHead_SET_PREV(node, to_prev);
314
305M
    _PyGCHead_SET_NEXT(to_prev, node);
315
305M
    list->_gc_prev = (uintptr_t)node;
316
305M
    _PyGCHead_SET_NEXT(node, list);
317
305M
}
318
319
/* append list `from` onto list `to`; `from` becomes an empty list */
320
static void
321
gc_list_merge(PyGC_Head *from, PyGC_Head *to)
322
126k
{
323
126k
    assert(from != to);
324
126k
    if (!gc_list_is_empty(from)) {
325
61.5k
        PyGC_Head *to_tail = GC_PREV(to);
326
61.5k
        PyGC_Head *from_head = GC_NEXT(from);
327
61.5k
        PyGC_Head *from_tail = GC_PREV(from);
328
61.5k
        assert(from_head != from);
329
61.5k
        assert(from_tail != from);
330
61.5k
        assert(gc_list_is_empty(to) ||
331
61.5k
            gc_old_space(to_tail) == gc_old_space(from_tail));
332
333
61.5k
        _PyGCHead_SET_NEXT(to_tail, from_head);
334
61.5k
        _PyGCHead_SET_PREV(from_head, to_tail);
335
336
61.5k
        _PyGCHead_SET_NEXT(from_tail, to);
337
61.5k
        _PyGCHead_SET_PREV(to, from_tail);
338
61.5k
    }
339
126k
    gc_list_init(from);
340
126k
}
341
342
static Py_ssize_t
343
gc_list_size(PyGC_Head *list)
344
42.1k
{
345
42.1k
    PyGC_Head *gc;
346
42.1k
    Py_ssize_t n = 0;
347
83.5M
    for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
348
83.4M
        n++;
349
83.4M
    }
350
42.1k
    return n;
351
42.1k
}
352
353
/* Walk the list and mark all objects as non-collecting */
354
static inline void
355
gc_list_clear_collecting(PyGC_Head *collectable)
356
21.0k
{
357
21.0k
    PyGC_Head *gc;
358
8.65M
    for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) {
359
8.63M
        gc_clear_collecting(gc);
360
8.63M
    }
361
21.0k
}
362
363
/* Append objects in a GC list to a Python list.
364
 * Return 0 if all OK, < 0 if error (out of memory for list)
365
 */
366
static int
367
append_objects(PyObject *py_list, PyGC_Head *gc_list)
368
0
{
369
0
    PyGC_Head *gc;
370
0
    for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
371
0
        PyObject *op = FROM_GC(gc);
372
0
        if (op != py_list) {
373
0
            if (PyList_Append(py_list, op)) {
374
0
                return -1; /* exception */
375
0
            }
376
0
        }
377
0
    }
378
0
    return 0;
379
0
}
380
381
// Constants for validate_list's flags argument.
382
enum flagstates {collecting_clear_unreachable_clear,
383
                 collecting_clear_unreachable_set,
384
                 collecting_set_unreachable_clear,
385
                 collecting_set_unreachable_set};
386
387
#ifdef GC_DEBUG
388
// validate_list checks list consistency.  And it works as document
389
// describing when flags are expected to be set / unset.
390
// `head` must be a doubly-linked gc list, although it's fine (expected!) if
391
// the prev and next pointers are "polluted" with flags.
392
// What's checked:
393
// - The `head` pointers are not polluted.
394
// - The objects' PREV_MASK_COLLECTING and NEXT_MASK_UNREACHABLE flags are all
395
//   `set or clear, as specified by the 'flags' argument.
396
// - The prev and next pointers are mutually consistent.
397
static void
398
validate_list(PyGC_Head *head, enum flagstates flags)
399
{
400
    assert((head->_gc_prev & ~_PyGC_PREV_MASK) == 0);
401
    assert((head->_gc_next & ~_PyGC_PREV_MASK) == 0);
402
    uintptr_t prev_value = 0, next_value = 0;
403
    switch (flags) {
404
        case collecting_clear_unreachable_clear:
405
            break;
406
        case collecting_set_unreachable_clear:
407
            prev_value = PREV_MASK_COLLECTING;
408
            break;
409
        case collecting_clear_unreachable_set:
410
            next_value = NEXT_MASK_UNREACHABLE;
411
            break;
412
        case collecting_set_unreachable_set:
413
            prev_value = PREV_MASK_COLLECTING;
414
            next_value = NEXT_MASK_UNREACHABLE;
415
            break;
416
        default:
417
            assert(! "bad internal flags argument");
418
    }
419
    PyGC_Head *prev = head;
420
    PyGC_Head *gc = GC_NEXT(head);
421
    while (gc != head) {
422
        PyGC_Head *trueprev = GC_PREV(gc);
423
        PyGC_Head *truenext = GC_NEXT(gc);
424
        assert(truenext != NULL);
425
        assert(trueprev == prev);
426
        assert((gc->_gc_prev & PREV_MASK_COLLECTING) == prev_value);
427
        assert((gc->_gc_next & NEXT_MASK_UNREACHABLE) == next_value);
428
        prev = gc;
429
        gc = truenext;
430
    }
431
    assert(prev == GC_PREV(head));
432
}
433
434
#else
435
273k
#define validate_list(x, y) do{}while(0)
436
#endif
437
438
#ifdef GC_EXTRA_DEBUG
439
440
441
static void
442
gc_list_validate_space(PyGC_Head *head, int space) {
443
    PyGC_Head *gc = GC_NEXT(head);
444
    while (gc != head) {
445
        assert(gc_old_space(gc) == space);
446
        gc = GC_NEXT(gc);
447
    }
448
}
449
450
static void
451
validate_spaces(GCState *gcstate)
452
{
453
    int visited = gcstate->visited_space;
454
    int not_visited = other_space(visited);
455
    gc_list_validate_space(&gcstate->young.head, not_visited);
456
    for (int space = 0; space < 2; space++) {
457
        gc_list_validate_space(&gcstate->old[space].head, space);
458
    }
459
    gc_list_validate_space(&gcstate->permanent_generation.head, visited);
460
}
461
462
static void
463
validate_consistent_old_space(PyGC_Head *head)
464
{
465
    PyGC_Head *gc = GC_NEXT(head);
466
    if (gc == head) {
467
        return;
468
    }
469
    int old_space = gc_old_space(gc);
470
    while (gc != head) {
471
        PyGC_Head *truenext = GC_NEXT(gc);
472
        assert(truenext != NULL);
473
        assert(gc_old_space(gc) == old_space);
474
        gc = truenext;
475
    }
476
}
477
478
479
#else
480
119k
#define validate_spaces(g) do{}while(0)
481
105k
#define validate_consistent_old_space(l) do{}while(0)
482
108k
#define gc_list_validate_space(l, s) do{}while(0)
483
#endif
484
485
/*** end of list stuff ***/
486
487
488
/* Set all gc_refs = ob_refcnt.  After this, gc_refs is > 0 and
489
 * PREV_MASK_COLLECTING bit is set for all objects in containers.
490
 */
491
static Py_ssize_t
492
update_refs(PyGC_Head *containers)
493
42.1k
{
494
42.1k
    PyGC_Head *next;
495
42.1k
    PyGC_Head *gc = GC_NEXT(containers);
496
42.1k
    Py_ssize_t candidates = 0;
497
498
85.5M
    while (gc != containers) {
499
85.5M
        next = GC_NEXT(gc);
500
85.5M
        PyObject *op = FROM_GC(gc);
501
85.5M
        if (_Py_IsImmortal(op)) {
502
0
            assert(!_Py_IsStaticImmortal(op));
503
0
            _PyObject_GC_UNTRACK(op);
504
0
           gc = next;
505
0
           continue;
506
0
        }
507
85.5M
        gc_reset_refs(gc, Py_REFCNT(op));
508
        /* Python's cyclic gc should never see an incoming refcount
509
         * of 0:  if something decref'ed to 0, it should have been
510
         * deallocated immediately at that time.
511
         * Possible cause (if the assert triggers):  a tp_dealloc
512
         * routine left a gc-aware object tracked during its teardown
513
         * phase, and did something-- or allowed something to happen --
514
         * that called back into Python.  gc can trigger then, and may
515
         * see the still-tracked dying object.  Before this assert
516
         * was added, such mistakes went on to allow gc to try to
517
         * delete the object again.  In a debug build, that caused
518
         * a mysterious segfault, when _Py_ForgetReference tried
519
         * to remove the object from the doubly-linked list of all
520
         * objects a second time.  In a release build, an actual
521
         * double deallocation occurred, which leads to corruption
522
         * of the allocator's internal bookkeeping pointers.  That's
523
         * so serious that maybe this should be a release-build
524
         * check instead of an assert?
525
         */
526
85.5M
        _PyObject_ASSERT(op, gc_get_refs(gc) != 0);
527
85.5M
        gc = next;
528
85.5M
        candidates++;
529
85.5M
    }
530
42.1k
    return candidates;
531
42.1k
}
532
533
struct visit_decref_context {
534
    PyObject *parent;
535
    struct gc_generation_stats *stats;
536
};
537
538
/* A traversal callback for subtract_refs. */
539
static int
540
visit_decref(PyObject *op, void *arg)
541
262M
{
542
262M
    OBJECT_STAT_INC(object_visits);
543
262M
    struct visit_decref_context *ctx = (struct visit_decref_context *)arg;
544
262M
    ctx->stats->object_visits += 1;
545
262M
    _PyObject_ASSERT(ctx->parent, !_PyObject_IsFreed(op));
546
547
262M
    if (_PyObject_IS_GC(op)) {
548
140M
        PyGC_Head *gc = AS_GC(op);
549
        /* We're only interested in gc_refs for objects in the
550
         * generation being collected, which can be recognized
551
         * because only they have positive gc_refs.
552
         */
553
140M
        if (gc_is_collecting(gc)) {
554
50.8M
            gc_decref(gc);
555
50.8M
        }
556
140M
    }
557
262M
    return 0;
558
262M
}
559
560
int
561
_PyGC_VisitStackRef(_PyStackRef *ref, visitproc visit, void *arg)
562
2.46M
{
563
    // This is a bit tricky! We want to ignore stackrefs with embedded
564
    // refcounts when computing the incoming references, but otherwise treat
565
    // them like normal.
566
2.46M
    assert(!PyStackRef_IsTaggedInt(*ref));
567
2.46M
    if (!PyStackRef_RefcountOnObject(*ref) && (visit == visit_decref)) {
568
348
        return 0;
569
348
    }
570
2.46M
    Py_VISIT(PyStackRef_AsPyObjectBorrow(*ref));
571
2.46M
    return 0;
572
2.46M
}
573
574
int
575
_PyGC_VisitFrameStack(_PyInterpreterFrame *frame, visitproc visit, void *arg)
576
460k
{
577
460k
    _PyStackRef *ref = _PyFrame_GetLocalsArray(frame);
578
    /* locals and stack */
579
2.62M
    for (; ref < frame->stackpointer; ref++) {
580
2.16M
        if (!PyStackRef_IsTaggedInt(*ref)) {
581
2.16M
            _Py_VISIT_STACKREF(*ref);
582
2.16M
        }
583
2.16M
    }
584
460k
    return 0;
585
460k
}
586
587
/* Subtract internal references from gc_refs.  After this, gc_refs is >= 0
588
 * for all objects in containers. The ones with gc_refs > 0 are directly
589
 * reachable from outside containers, and so can't be collected.
590
 */
591
static void
592
subtract_refs(PyGC_Head *containers, struct gc_generation_stats *stats)
593
42.1k
{
594
42.1k
    traverseproc traverse;
595
42.1k
    PyGC_Head *gc = GC_NEXT(containers);
596
85.5M
    for (; gc != containers; gc = GC_NEXT(gc)) {
597
85.5M
        PyObject *op = FROM_GC(gc);
598
85.5M
        traverse = Py_TYPE(op)->tp_traverse;
599
85.5M
        struct visit_decref_context ctx = {
600
85.5M
            .parent = op,
601
85.5M
            .stats = stats
602
85.5M
        };
603
85.5M
        (void) traverse(op,
604
85.5M
                        visit_decref,
605
85.5M
                        &ctx);
606
85.5M
    }
607
42.1k
}
608
609
struct visit_reachable_context {
610
    PyGC_Head *head;
611
    struct gc_generation_stats *stats;
612
};
613
614
/* A traversal callback for move_unreachable. */
615
static int
616
visit_reachable(PyObject *op, void *arg)
617
203M
{
618
203M
    struct visit_reachable_context *ctx = (struct visit_reachable_context *)arg;
619
203M
    ctx->stats->object_visits += 1;
620
203M
    PyGC_Head *reachable = ctx->head;
621
203M
    OBJECT_STAT_INC(object_visits);
622
203M
    if (!_PyObject_IS_GC(op)) {
623
94.7M
        return 0;
624
94.7M
    }
625
626
108M
    PyGC_Head *gc = AS_GC(op);
627
108M
    const Py_ssize_t gc_refs = gc_get_refs(gc);
628
629
    // Ignore objects in other generation.
630
    // This also skips objects "to the left" of the current position in
631
    // move_unreachable's scan of the 'young' list - they've already been
632
    // traversed, and no longer have the PREV_MASK_COLLECTING flag.
633
108M
    if (! gc_is_collecting(gc)) {
634
77.5M
        return 0;
635
77.5M
    }
636
    // It would be a logic error elsewhere if the collecting flag were set on
637
    // an untracked object.
638
31.3M
    _PyObject_ASSERT(op, gc->_gc_next != 0);
639
640
31.3M
    if (gc->_gc_next & NEXT_MASK_UNREACHABLE) {
641
        /* This had gc_refs = 0 when move_unreachable got
642
         * to it, but turns out it's reachable after all.
643
         * Move it back to move_unreachable's 'young' list,
644
         * and move_unreachable will eventually get to it
645
         * again.
646
         */
647
        // Manually unlink gc from unreachable list because the list functions
648
        // don't work right in the presence of NEXT_MASK_UNREACHABLE flags.
649
11.6M
        PyGC_Head *prev = GC_PREV(gc);
650
11.6M
        PyGC_Head *next = GC_NEXT(gc);
651
11.6M
        _PyObject_ASSERT(FROM_GC(prev),
652
11.6M
                         prev->_gc_next & NEXT_MASK_UNREACHABLE);
653
11.6M
        _PyObject_ASSERT(FROM_GC(next),
654
11.6M
                         next->_gc_next & NEXT_MASK_UNREACHABLE);
655
11.6M
        prev->_gc_next = gc->_gc_next;  // copy flag bits
656
11.6M
        gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
657
11.6M
        _PyGCHead_SET_PREV(next, prev);
658
659
11.6M
        gc_list_append(gc, reachable);
660
11.6M
        gc_set_refs(gc, 1);
661
11.6M
    }
662
19.7M
    else if (gc_refs == 0) {
663
        /* This is in move_unreachable's 'young' list, but
664
         * the traversal hasn't yet gotten to it.  All
665
         * we need to do is tell move_unreachable that it's
666
         * reachable.
667
         */
668
19.0M
        gc_set_refs(gc, 1);
669
19.0M
    }
670
    /* Else there's nothing to do.
671
     * If gc_refs > 0, it must be in move_unreachable's 'young'
672
     * list, and move_unreachable will eventually get to it.
673
     */
674
742k
    else {
675
742k
        _PyObject_ASSERT_WITH_MSG(op, gc_refs > 0, "refcount is too small");
676
742k
    }
677
31.3M
    return 0;
678
108M
}
679
680
/* Move the unreachable objects from young to unreachable.  After this,
681
 * all objects in young don't have PREV_MASK_COLLECTING flag and
682
 * unreachable have the flag.
683
 * All objects in young after this are directly or indirectly reachable
684
 * from outside the original young; and all objects in unreachable are
685
 * not.
686
 *
687
 * This function restores _gc_prev pointer.  young and unreachable are
688
 * doubly linked list after this function.
689
 * But _gc_next in unreachable list has NEXT_MASK_UNREACHABLE flag.
690
 * So we can not gc_list_* functions for unreachable until we remove the flag.
691
 */
692
static void
693
move_unreachable(PyGC_Head *young, PyGC_Head *unreachable, struct gc_generation_stats *stats)
694
42.1k
{
695
    // previous elem in the young list, used for restore gc_prev.
696
42.1k
    PyGC_Head *prev = young;
697
42.1k
    PyGC_Head *gc = GC_NEXT(young);
698
699
    /* Invariants:  all objects "to the left" of us in young are reachable
700
     * (directly or indirectly) from outside the young list as it was at entry.
701
     *
702
     * All other objects from the original young "to the left" of us are in
703
     * unreachable now, and have NEXT_MASK_UNREACHABLE.  All objects to the
704
     * left of us in 'young' now have been scanned, and no objects here
705
     * or to the right have been scanned yet.
706
     */
707
708
42.1k
    struct visit_reachable_context ctx = {
709
42.1k
        .head = young,
710
42.1k
        .stats = stats
711
42.1k
    };
712
713
42.1k
    validate_consistent_old_space(young);
714
    /* Record which old space we are in, and set NEXT_MASK_UNREACHABLE bit for convenience */
715
42.1k
    uintptr_t flags = NEXT_MASK_UNREACHABLE | (gc->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1);
716
97.1M
    while (gc != young) {
717
97.1M
        if (gc_get_refs(gc)) {
718
            /* gc is definitely reachable from outside the
719
             * original 'young'.  Mark it as such, and traverse
720
             * its pointers to find any other objects that may
721
             * be directly reachable from it.  Note that the
722
             * call to tp_traverse may append objects to young,
723
             * so we have to wait until it returns to determine
724
             * the next object to visit.
725
             */
726
68.2M
            PyObject *op = FROM_GC(gc);
727
68.2M
            traverseproc traverse = Py_TYPE(op)->tp_traverse;
728
68.2M
            _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(gc) > 0,
729
68.2M
                                      "refcount is too small");
730
            // NOTE: visit_reachable may change gc->_gc_next when
731
            // young->_gc_prev == gc.  Don't do gc = GC_NEXT(gc) before!
732
68.2M
            (void) traverse(op,
733
68.2M
                    visit_reachable,
734
68.2M
                    &ctx);
735
            // relink gc_prev to prev element.
736
68.2M
            _PyGCHead_SET_PREV(gc, prev);
737
            // gc is not COLLECTING state after here.
738
68.2M
            gc_clear_collecting(gc);
739
68.2M
            prev = gc;
740
68.2M
        }
741
28.8M
        else {
742
            /* This *may* be unreachable.  To make progress,
743
             * assume it is.  gc isn't directly reachable from
744
             * any object we've already traversed, but may be
745
             * reachable from an object we haven't gotten to yet.
746
             * visit_reachable will eventually move gc back into
747
             * young if that's so, and we'll see it again.
748
             */
749
            // Move gc to unreachable.
750
            // No need to gc->next->prev = prev because it is single linked.
751
28.8M
            prev->_gc_next = gc->_gc_next;
752
753
            // We can't use gc_list_append() here because we use
754
            // NEXT_MASK_UNREACHABLE here.
755
28.8M
            PyGC_Head *last = GC_PREV(unreachable);
756
            // NOTE: Since all objects in unreachable set has
757
            // NEXT_MASK_UNREACHABLE flag, we set it unconditionally.
758
            // But this may pollute the unreachable list head's 'next' pointer
759
            // too. That's semantically senseless but expedient here - the
760
            // damage is repaired when this function ends.
761
28.8M
            last->_gc_next = flags | (uintptr_t)gc;
762
28.8M
            _PyGCHead_SET_PREV(gc, last);
763
28.8M
            gc->_gc_next = flags | (uintptr_t)unreachable;
764
28.8M
            unreachable->_gc_prev = (uintptr_t)gc;
765
28.8M
        }
766
97.1M
        gc = _PyGCHead_NEXT(prev);
767
97.1M
    }
768
    // young->_gc_prev must be last element remained in the list.
769
42.1k
    young->_gc_prev = (uintptr_t)prev;
770
42.1k
    young->_gc_next &= _PyGC_PREV_MASK;
771
    // don't let the pollution of the list head's next pointer leak
772
42.1k
    unreachable->_gc_next &= _PyGC_PREV_MASK;
773
42.1k
}
774
775
/* In theory, all tuples should be younger than the
776
* objects they refer to, as tuples are immortal.
777
* Therefore, untracking tuples in oldest-first order in the
778
* young generation before promoting them should have tracked
779
* all the tuples that can be untracked.
780
*
781
* Unfortunately, the C API allows tuples to be created
782
* and then filled in. So this won't untrack all tuples
783
* that can be untracked. It should untrack most of them
784
* and is much faster than a more complex approach that
785
* would untrack all relevant tuples.
786
*/
787
static void
788
untrack_tuples(PyGC_Head *head)
789
43.9k
{
790
43.9k
    PyGC_Head *gc = GC_NEXT(head);
791
216M
    while (gc != head) {
792
216M
        PyObject *op = FROM_GC(gc);
793
216M
        PyGC_Head *next = GC_NEXT(gc);
794
216M
        if (PyTuple_CheckExact(op)) {
795
93.8M
            _PyTuple_MaybeUntrack(op);
796
93.8M
        }
797
216M
        gc = next;
798
216M
    }
799
43.9k
}
800
801
/* Return true if object has a pre-PEP 442 finalization method. */
802
static int
803
has_legacy_finalizer(PyObject *op)
804
8.63M
{
805
8.63M
    return Py_TYPE(op)->tp_del != NULL;
806
8.63M
}
807
808
/* Move the objects in unreachable with tp_del slots into `finalizers`.
809
 *
810
 * This function also removes NEXT_MASK_UNREACHABLE flag
811
 * from _gc_next in unreachable.
812
 */
813
static void
814
move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
815
21.0k
{
816
21.0k
    PyGC_Head *gc, *next;
817
21.0k
    _PyObject_ASSERT(
818
21.0k
        FROM_GC(unreachable),
819
21.0k
        (unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
820
821
    /* March over unreachable.  Move objects with finalizers into
822
     * `finalizers`.
823
     */
824
8.65M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
825
8.63M
        PyObject *op = FROM_GC(gc);
826
827
8.63M
        _PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE);
828
8.63M
        next = GC_NEXT(gc);
829
8.63M
        gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
830
831
8.63M
        if (has_legacy_finalizer(op)) {
832
0
            gc_clear_collecting(gc);
833
0
            gc_list_move(gc, finalizers);
834
0
        }
835
8.63M
    }
836
21.0k
}
837
838
static inline void
839
clear_unreachable_mask(PyGC_Head *unreachable)
840
21.0k
{
841
    /* Check that the list head does not have the unreachable bit set */
842
21.0k
    _PyObject_ASSERT(
843
21.0k
        FROM_GC(unreachable),
844
21.0k
        ((uintptr_t)unreachable & NEXT_MASK_UNREACHABLE) == 0);
845
21.0k
    _PyObject_ASSERT(
846
21.0k
        FROM_GC(unreachable),
847
21.0k
        (unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
848
849
21.0k
    PyGC_Head *gc, *next;
850
8.65M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
851
8.63M
        _PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE);
852
8.63M
        next = GC_NEXT(gc);
853
8.63M
        gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
854
8.63M
    }
855
21.0k
    validate_list(unreachable, collecting_set_unreachable_clear);
856
21.0k
}
857
858
/* A traversal callback for move_legacy_finalizer_reachable. */
859
static int
860
visit_move(PyObject *op, void *arg)
861
0
{
862
0
    struct visit_reachable_context *ctx = (struct visit_reachable_context *)arg;
863
0
    PyGC_Head *tolist = ctx->head;
864
0
    ctx->stats->object_visits += 1;
865
0
    OBJECT_STAT_INC(object_visits);
866
0
    if (_PyObject_IS_GC(op)) {
867
0
        PyGC_Head *gc = AS_GC(op);
868
0
        if (gc_is_collecting(gc)) {
869
0
            gc_list_move(gc, tolist);
870
0
            gc_clear_collecting(gc);
871
0
        }
872
0
    }
873
0
    return 0;
874
0
}
875
876
/* Move objects that are reachable from finalizers, from the unreachable set
877
 * into finalizers set.
878
 */
879
static void
880
move_legacy_finalizer_reachable(PyGC_Head *finalizers, struct gc_generation_stats *stats)
881
21.0k
{
882
21.0k
    struct visit_reachable_context ctx = {
883
21.0k
        .head = finalizers,
884
21.0k
        .stats = stats
885
21.0k
    };
886
21.0k
    traverseproc traverse;
887
21.0k
    PyGC_Head *gc = GC_NEXT(finalizers);
888
21.0k
    for (; gc != finalizers; gc = GC_NEXT(gc)) {
889
        /* Note that the finalizers list may grow during this. */
890
0
        traverse = Py_TYPE(FROM_GC(gc))->tp_traverse;
891
0
        (void) traverse(FROM_GC(gc),
892
0
                        visit_move,
893
0
                        &ctx);
894
0
    }
895
21.0k
}
896
897
/* Handle weakref callbacks.  Note that it's possible for such weakrefs to be
898
 * outside the unreachable set -- indeed, those are precisely the weakrefs
899
 * whose callbacks must be invoked.  See gc_weakref.txt for overview & some
900
 * details.
901
 *
902
 * The clearing of weakrefs is suble and must be done carefully, as there was
903
 * previous bugs related to this.  First, weakrefs to the unreachable set of
904
 * objects must be cleared before we start calling `tp_clear`.  If we don't,
905
 * those weakrefs can reveal unreachable objects to Python-level code and that
906
 * is not safe.  Many objects are not usable after `tp_clear` has been called
907
 * and could even cause crashes if accessed (see bpo-38006 and gh-91636 as
908
 * example bugs).
909
 *
910
 * Weakrefs with callbacks always need to be cleared before executing the
911
 * callback.  That's because sometimes a callback will call the ref object,
912
 * to check if the reference is actually dead (KeyedRef does this, for
913
 * example).  We want to indicate that it is dead, even though it is possible
914
 * a finalizer might resurrect it.  Clearing also prevents the callback from
915
 * executing more than once.
916
 *
917
 * Since Python 2.3, all weakrefs to cyclic garbage have been cleared *before*
918
 * calling finalizers.  However, since tp_subclasses started being necessary
919
 * to invalidate caches (e.g. by PyType_Modified), that clearing has created
920
 * a bug.  If the weakref to the subclass is cleared before a finalizer is
921
 * called, the cache may not be correctly invalidated.  That can lead to
922
 * segfaults since the caches can refer to deallocated objects (GH-135552
923
 * is an example).  Now, we delay the clear of weakrefs without callbacks
924
 * until *after* finalizers have been executed.  That means weakrefs without
925
 * callbacks are still usable while finalizers are being executed.
926
 *
927
 * The weakrefs that are inside the unreachable set must also be cleared.
928
 * The reason this is required is not immediately obvious.  If the weakref
929
 * refers to an object outside of the unreachable set, that object might go
930
 * away when we start clearing objects.  Normally, the object should also be
931
 * part of the unreachable set but that's not true in the case of incomplete
932
 * or missing `tp_traverse` methods.  When that object goes away, the callback
933
 * for weakref can be executed and that could reveal unreachable objects to
934
 * Python-level code.  See bpo-38006 as an example bug.
935
 */
936
static int
937
handle_weakref_callbacks(PyGC_Head *unreachable, PyGC_Head *old)
938
21.0k
{
939
21.0k
    PyGC_Head *gc;
940
21.0k
    PyGC_Head wrcb_to_call;     /* weakrefs with callbacks to call */
941
21.0k
    PyGC_Head *next;
942
21.0k
    int num_freed = 0;
943
944
21.0k
    gc_list_init(&wrcb_to_call);
945
946
    /* Find all weakrefs with callbacks and move into `wrcb_to_call` if the
947
     * callback needs to be invoked. We make another pass over wrcb_to_call,
948
     * invoking callbacks, after this pass completes.
949
     */
950
8.65M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
951
8.63M
        PyWeakReference **wrlist;
952
953
8.63M
        PyObject *op = FROM_GC(gc);
954
8.63M
        next = GC_NEXT(gc);
955
956
8.63M
        if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) {
957
5.96M
            continue;
958
5.96M
        }
959
960
        /* It supports weakrefs.  Does it have any?
961
         *
962
         * This is never triggered for static types so we can avoid the
963
         * (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR().
964
         */
965
2.67M
        wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op);
966
967
        /* `op` may have some weakrefs.  March over the list and move the
968
         * weakrefs with callbacks that must be called into wrcb_to_call.
969
         */
970
2.67M
        PyWeakReference *next_wr;
971
2.92M
        for (PyWeakReference *wr = *wrlist; wr != NULL; wr = next_wr) {
972
            // Get the next list element to get iterator progress if we omit
973
            // clearing of the weakref (because _PyWeakref_ClearRef changes
974
            // next pointer in the wrlist).
975
253k
            next_wr = wr->wr_next;
976
977
253k
            if (wr->wr_callback == NULL) {
978
                /* no callback */
979
214k
                continue;
980
214k
            }
981
982
            // Clear the weakref.  See the comments above this function for
983
            // reasons why we need to clear weakrefs that have callbacks.
984
            // Note that _PyWeakref_ClearRef clears the weakref but leaves the
985
            // callback pointer intact.  Obscure: it also changes *wrlist.
986
38.9k
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
987
38.9k
            _PyWeakref_ClearRef(wr);
988
38.9k
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
989
990
            /* Headache time.  `op` is going away, and is weakly referenced by
991
             * `wr`, which has a callback.  Should the callback be invoked?  If wr
992
             * is also trash, no:
993
             *
994
             * 1. There's no need to call it.  The object and the weakref are
995
             *    both going away, so it's legitimate to pretend the weakref is
996
             *    going away first.  The user has to ensure a weakref outlives its
997
             *    referent if they want a guarantee that the wr callback will get
998
             *    invoked.
999
             *
1000
             * 2. It may be catastrophic to call it.  If the callback is also in
1001
             *    cyclic trash (CT), then although the CT is unreachable from
1002
             *    outside the current generation, CT may be reachable from the
1003
             *    callback.  Then the callback could resurrect insane objects.
1004
             *
1005
             * Since the callback is never needed and may be unsafe in this
1006
             * case, wr is simply left in the unreachable set.  Note that
1007
             * clear_weakrefs() will ensure its callback will not trigger
1008
             * inside delete_garbage().
1009
             *
1010
             * OTOH, if wr isn't part of CT, we should invoke the callback:  the
1011
             * weakref outlived the trash.  Note that since wr isn't CT in this
1012
             * case, its callback can't be CT either -- wr acted as an external
1013
             * root to this generation, and therefore its callback did too.  So
1014
             * nothing in CT is reachable from the callback either, so it's hard
1015
             * to imagine how calling it later could create a problem for us.  wr
1016
             * is moved to wrcb_to_call in this case.
1017
             */
1018
38.9k
            if (gc_is_collecting(AS_GC((PyObject *)wr))) {
1019
0
                continue;
1020
0
            }
1021
1022
            /* Create a new reference so that wr can't go away
1023
             * before we can process it again.
1024
             */
1025
38.9k
            Py_INCREF(wr);
1026
1027
            /* Move wr to wrcb_to_call, for the next pass. */
1028
38.9k
            PyGC_Head *wrasgc = AS_GC((PyObject *)wr);
1029
            // wrasgc is reachable, but next isn't, so they can't be the same
1030
38.9k
            _PyObject_ASSERT((PyObject *)wr, wrasgc != next);
1031
38.9k
            gc_list_move(wrasgc, &wrcb_to_call);
1032
38.9k
        }
1033
2.67M
    }
1034
1035
    /* Invoke the callbacks we decided to honor.  It's safe to invoke them
1036
     * because they can't reference unreachable objects.
1037
     */
1038
21.0k
    int visited_space = get_gc_state()->visited_space;
1039
60.0k
    while (! gc_list_is_empty(&wrcb_to_call)) {
1040
38.9k
        PyObject *temp;
1041
38.9k
        PyObject *callback;
1042
1043
38.9k
        gc = (PyGC_Head*)wrcb_to_call._gc_next;
1044
38.9k
        PyObject *op = FROM_GC(gc);
1045
38.9k
        _PyObject_ASSERT(op, PyWeakref_Check(op));
1046
38.9k
        PyWeakReference *wr = (PyWeakReference *)op;
1047
38.9k
        callback = wr->wr_callback;
1048
38.9k
        _PyObject_ASSERT(op, callback != NULL);
1049
1050
        /* copy-paste of weakrefobject.c's handle_callback() */
1051
38.9k
        temp = PyObject_CallOneArg(callback, (PyObject *)wr);
1052
38.9k
        if (temp == NULL) {
1053
0
            PyErr_FormatUnraisable("Exception ignored on "
1054
0
                                   "calling weakref callback %R", callback);
1055
0
        }
1056
38.9k
        else {
1057
38.9k
            Py_DECREF(temp);
1058
38.9k
        }
1059
1060
        /* Give up the reference we created in the first pass.  When
1061
         * op's refcount hits 0 (which it may or may not do right now),
1062
         * op's tp_dealloc will decref op->wr_callback too.  Note
1063
         * that the refcount probably will hit 0 now, and because this
1064
         * weakref was reachable to begin with, gc didn't already
1065
         * add it to its count of freed objects.  Example:  a reachable
1066
         * weak value dict maps some key to this reachable weakref.
1067
         * The callback removes this key->weakref mapping from the
1068
         * dict, leaving no other references to the weakref (excepting
1069
         * ours).
1070
         */
1071
38.9k
        Py_DECREF(op);
1072
38.9k
        if (wrcb_to_call._gc_next == (uintptr_t)gc) {
1073
            /* object is still alive -- move it */
1074
0
            gc_set_old_space(gc, visited_space);
1075
0
            gc_list_move(gc, old);
1076
0
        }
1077
38.9k
        else {
1078
38.9k
            ++num_freed;
1079
38.9k
        }
1080
38.9k
    }
1081
1082
21.0k
    return num_freed;
1083
21.0k
}
1084
1085
/* Clear all weakrefs to unreachable objects.  When this returns, no object in
1086
 * `unreachable` is weakly referenced anymore.  See the comments above
1087
 * handle_weakref_callbacks() for why these weakrefs need to be cleared.
1088
 */
1089
static void
1090
clear_weakrefs(PyGC_Head *unreachable)
1091
21.0k
{
1092
21.0k
    PyGC_Head *gc;
1093
21.0k
    PyGC_Head *next;
1094
1095
8.65M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
1096
8.63M
        PyWeakReference **wrlist;
1097
1098
8.63M
        PyObject *op = FROM_GC(gc);
1099
8.63M
        next = GC_NEXT(gc);
1100
1101
8.63M
        if (PyWeakref_Check(op)) {
1102
            /* A weakref inside the unreachable set is always cleared. See
1103
             * the comments above handle_weakref_callbacks() for why these
1104
             * must be cleared.
1105
             */
1106
8
            _PyWeakref_ClearRef((PyWeakReference *)op);
1107
8
        }
1108
1109
8.63M
        if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) {
1110
5.96M
            continue;
1111
5.96M
        }
1112
1113
        /* It supports weakrefs.  Does it have any?
1114
         *
1115
         * This is never triggered for static types so we can avoid the
1116
         * (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR().
1117
         */
1118
2.67M
        wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op);
1119
1120
        /* `op` may have some weakrefs.  March over the list, clear
1121
         * all the weakrefs.
1122
         */
1123
2.88M
        for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) {
1124
            /* _PyWeakref_ClearRef clears the weakref but leaves
1125
             * the callback pointer intact.  Obscure:  it also
1126
             * changes *wrlist.
1127
             */
1128
214k
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
1129
214k
            _PyWeakref_ClearRef(wr);
1130
214k
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
1131
214k
        }
1132
2.67M
    }
1133
21.0k
}
1134
1135
static void
1136
debug_cycle(const char *msg, PyObject *op)
1137
0
{
1138
0
    PySys_FormatStderr("gc: %s <%s %p>\n",
1139
0
                       msg, Py_TYPE(op)->tp_name, op);
1140
0
}
1141
1142
/* Handle uncollectable garbage (cycles with tp_del slots, and stuff reachable
1143
 * only from such cycles).
1144
 * If _PyGC_DEBUG_SAVEALL, all objects in finalizers are appended to the module
1145
 * garbage list (a Python list), else only the objects in finalizers with
1146
 * __del__ methods are appended to garbage.  All objects in finalizers are
1147
 * merged into the old list regardless.
1148
 */
1149
static void
1150
handle_legacy_finalizers(PyThreadState *tstate,
1151
                         GCState *gcstate,
1152
                         PyGC_Head *finalizers, PyGC_Head *old)
1153
21.0k
{
1154
21.0k
    assert(!_PyErr_Occurred(tstate));
1155
21.0k
    assert(gcstate->garbage != NULL);
1156
1157
21.0k
    PyGC_Head *gc = GC_NEXT(finalizers);
1158
21.0k
    for (; gc != finalizers; gc = GC_NEXT(gc)) {
1159
0
        PyObject *op = FROM_GC(gc);
1160
1161
0
        if ((gcstate->debug & _PyGC_DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
1162
0
            if (PyList_Append(gcstate->garbage, op) < 0) {
1163
0
                _PyErr_Clear(tstate);
1164
0
                break;
1165
0
            }
1166
0
        }
1167
0
    }
1168
1169
21.0k
    gc_list_merge(finalizers, old);
1170
21.0k
}
1171
1172
/* Run first-time finalizers (if any) on all the objects in collectable.
1173
 * Note that this may remove some (or even all) of the objects from the
1174
 * list, due to refcounts falling to 0.
1175
 */
1176
static void
1177
finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable)
1178
21.0k
{
1179
21.0k
    destructor finalize;
1180
21.0k
    PyGC_Head seen;
1181
1182
    /* While we're going through the loop, `finalize(op)` may cause op, or
1183
     * other objects, to be reclaimed via refcounts falling to zero.  So
1184
     * there's little we can rely on about the structure of the input
1185
     * `collectable` list across iterations.  For safety, we always take the
1186
     * first object in that list and move it to a temporary `seen` list.
1187
     * If objects vanish from the `collectable` and `seen` lists we don't
1188
     * care.
1189
     */
1190
21.0k
    gc_list_init(&seen);
1191
1192
8.65M
    while (!gc_list_is_empty(collectable)) {
1193
8.63M
        PyGC_Head *gc = GC_NEXT(collectable);
1194
8.63M
        PyObject *op = FROM_GC(gc);
1195
8.63M
        gc_list_move(gc, &seen);
1196
8.63M
        if (!_PyGC_FINALIZED(op) &&
1197
8.63M
            (finalize = Py_TYPE(op)->tp_finalize) != NULL)
1198
0
        {
1199
0
            _PyGC_SET_FINALIZED(op);
1200
0
            Py_INCREF(op);
1201
0
            finalize(op);
1202
0
            assert(!_PyErr_Occurred(tstate));
1203
0
            Py_DECREF(op);
1204
0
        }
1205
8.63M
    }
1206
21.0k
    gc_list_merge(&seen, collectable);
1207
21.0k
}
1208
1209
/* Break reference cycles by clearing the containers involved.  This is
1210
 * tricky business as the lists can be changing and we don't know which
1211
 * objects may be freed.  It is possible I screwed something up here.
1212
 */
1213
static void
1214
delete_garbage(PyThreadState *tstate, GCState *gcstate,
1215
               PyGC_Head *collectable, PyGC_Head *old)
1216
21.0k
{
1217
21.0k
    assert(!_PyErr_Occurred(tstate));
1218
1219
3.99M
    while (!gc_list_is_empty(collectable)) {
1220
3.97M
        PyGC_Head *gc = GC_NEXT(collectable);
1221
3.97M
        PyObject *op = FROM_GC(gc);
1222
1223
3.97M
        _PyObject_ASSERT_WITH_MSG(op, Py_REFCNT(op) > 0,
1224
3.97M
                                  "refcount is too small");
1225
1226
3.97M
        if (gcstate->debug & _PyGC_DEBUG_SAVEALL) {
1227
0
            assert(gcstate->garbage != NULL);
1228
0
            if (PyList_Append(gcstate->garbage, op) < 0) {
1229
0
                _PyErr_Clear(tstate);
1230
0
            }
1231
0
        }
1232
3.97M
        else {
1233
3.97M
            inquiry clear;
1234
3.97M
            if ((clear = Py_TYPE(op)->tp_clear) != NULL) {
1235
3.62M
                Py_INCREF(op);
1236
3.62M
                (void) clear(op);
1237
3.62M
                if (_PyErr_Occurred(tstate)) {
1238
0
                    PyErr_FormatUnraisable("Exception ignored in tp_clear of %s",
1239
0
                                           Py_TYPE(op)->tp_name);
1240
0
                }
1241
3.62M
                Py_DECREF(op);
1242
3.62M
            }
1243
3.97M
        }
1244
3.97M
        if (GC_NEXT(collectable) == gc) {
1245
            /* object is still alive, move it, it may die later */
1246
3.32M
            gc_clear_collecting(gc);
1247
3.32M
            gc_list_move(gc, old);
1248
3.32M
        }
1249
3.97M
    }
1250
21.0k
}
1251
1252
1253
/* Deduce which objects among "base" are unreachable from outside the list
1254
   and move them to 'unreachable'. The process consist in the following steps:
1255
1256
1. Copy all reference counts to a different field (gc_prev is used to hold
1257
   this copy to save memory).
1258
2. Traverse all objects in "base" and visit all referred objects using
1259
   "tp_traverse" and for every visited object, subtract 1 to the reference
1260
   count (the one that we copied in the previous step). After this step, all
1261
   objects that can be reached directly from outside must have strictly positive
1262
   reference count, while all unreachable objects must have a count of exactly 0.
1263
3. Identify all unreachable objects (the ones with 0 reference count) and move
1264
   them to the "unreachable" list. This step also needs to move back to "base" all
1265
   objects that were initially marked as unreachable but are referred transitively
1266
   by the reachable objects (the ones with strictly positive reference count).
1267
1268
Contracts:
1269
1270
    * The "base" has to be a valid list with no mask set.
1271
1272
    * The "unreachable" list must be uninitialized (this function calls
1273
      gc_list_init over 'unreachable').
1274
1275
IMPORTANT: This function leaves 'unreachable' with the NEXT_MASK_UNREACHABLE
1276
flag set but it does not clear it to skip unnecessary iteration. Before the
1277
flag is cleared (for example, by using 'clear_unreachable_mask' function or
1278
by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal
1279
list and we can not use most gc_list_* functions for it. */
1280
static inline Py_ssize_t
1281
42.1k
deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable, struct gc_generation_stats *stats) {
1282
42.1k
    validate_list(base, collecting_clear_unreachable_clear);
1283
    /* Using ob_refcnt and gc_refs, calculate which objects in the
1284
     * container set are reachable from outside the set (i.e., have a
1285
     * refcount greater than 0 when all the references within the
1286
     * set are taken into account).
1287
     */
1288
42.1k
    Py_ssize_t candidates = update_refs(base);  // gc_prev is used for gc_refs
1289
42.1k
    subtract_refs(base, stats);
1290
1291
    /* Leave everything reachable from outside base in base, and move
1292
     * everything else (in base) to unreachable.
1293
     *
1294
     * NOTE:  This used to move the reachable objects into a reachable
1295
     * set instead.  But most things usually turn out to be reachable,
1296
     * so it's more efficient to move the unreachable things.  It "sounds slick"
1297
     * to move the unreachable objects, until you think about it - the reason it
1298
     * pays isn't actually obvious.
1299
     *
1300
     * Suppose we create objects A, B, C in that order.  They appear in the young
1301
     * generation in the same order.  If B points to A, and C to B, and C is
1302
     * reachable from outside, then the adjusted refcounts will be 0, 0, and 1
1303
     * respectively.
1304
     *
1305
     * When move_unreachable finds A, A is moved to the unreachable list.  The
1306
     * same for B when it's first encountered.  Then C is traversed, B is moved
1307
     * _back_ to the reachable list.  B is eventually traversed, and then A is
1308
     * moved back to the reachable list.
1309
     *
1310
     * So instead of not moving at all, the reachable objects B and A are moved
1311
     * twice each.  Why is this a win?  A straightforward algorithm to move the
1312
     * reachable objects instead would move A, B, and C once each.
1313
     *
1314
     * The key is that this dance leaves the objects in order C, B, A - it's
1315
     * reversed from the original order.  On all _subsequent_ scans, none of
1316
     * them will move.  Since most objects aren't in cycles, this can save an
1317
     * unbounded number of moves across an unbounded number of later collections.
1318
     * It can cost more only the first time the chain is scanned.
1319
     *
1320
     * Drawback:  move_unreachable is also used to find out what's still trash
1321
     * after finalizers may resurrect objects.  In _that_ case most unreachable
1322
     * objects will remain unreachable, so it would be more efficient to move
1323
     * the reachable objects instead.  But this is a one-time cost, probably not
1324
     * worth complicating the code to speed just a little.
1325
     */
1326
42.1k
    move_unreachable(base, unreachable, stats);  // gc_prev is pointer again
1327
42.1k
    validate_list(base, collecting_clear_unreachable_clear);
1328
42.1k
    validate_list(unreachable, collecting_set_unreachable_set);
1329
42.1k
    return candidates;
1330
42.1k
}
1331
1332
/* Handle objects that may have resurrected after a call to 'finalize_garbage', moving
1333
   them to 'old_generation' and placing the rest on 'still_unreachable'.
1334
1335
   Contracts:
1336
       * After this function 'unreachable' must not be used anymore and 'still_unreachable'
1337
         will contain the objects that did not resurrect.
1338
1339
       * The "still_unreachable" list must be uninitialized (this function calls
1340
         gc_list_init over 'still_unreachable').
1341
1342
IMPORTANT: After a call to this function, the 'still_unreachable' set will have the
1343
PREV_MARK_COLLECTING set, but the objects in this set are going to be removed so
1344
we can skip the expense of clearing the flag to avoid extra iteration. */
1345
static inline void
1346
handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable,
1347
                           PyGC_Head *old_generation,
1348
                           struct gc_generation_stats *stats)
1349
21.0k
{
1350
    // Remove the PREV_MASK_COLLECTING from unreachable
1351
    // to prepare it for a new call to 'deduce_unreachable'
1352
21.0k
    gc_list_clear_collecting(unreachable);
1353
1354
    // After the call to deduce_unreachable, the 'still_unreachable' set will
1355
    // have the PREV_MARK_COLLECTING set, but the objects are going to be
1356
    // removed so we can skip the expense of clearing the flag.
1357
21.0k
    PyGC_Head* resurrected = unreachable;
1358
21.0k
    deduce_unreachable(resurrected, still_unreachable, stats);
1359
21.0k
    clear_unreachable_mask(still_unreachable);
1360
1361
    // Move the resurrected objects to the old generation for future collection.
1362
21.0k
    gc_list_merge(resurrected, old_generation);
1363
21.0k
}
1364
1365
static void
1366
gc_collect_region(PyThreadState *tstate,
1367
                  PyGC_Head *from,
1368
                  PyGC_Head *to,
1369
                  struct gc_generation_stats *stats);
1370
1371
static inline Py_ssize_t
1372
gc_list_set_space(PyGC_Head *list, int space)
1373
22.8k
{
1374
22.8k
    Py_ssize_t size = 0;
1375
22.8k
    PyGC_Head *gc;
1376
74.8M
    for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
1377
74.8M
        gc_set_old_space(gc, space);
1378
74.8M
        size++;
1379
74.8M
    }
1380
22.8k
    return size;
1381
22.8k
}
1382
1383
/* Making progress in the incremental collector
1384
 * In order to eventually collect all cycles
1385
 * the incremental collector must progress through the old
1386
 * space faster than objects are added to the old space.
1387
 *
1388
 * Each young or incremental collection adds a number of
1389
 * objects, S (for survivors) to the old space, and
1390
 * incremental collectors scan I objects from the old space.
1391
 * I > S must be true. We also want I > S * N to be where
1392
 * N > 1. Higher values of N mean that the old space is
1393
 * scanned more rapidly.
1394
 * The default incremental threshold of 10 translates to
1395
 * N == 1.4 (1 + 4/threshold)
1396
 */
1397
1398
/* Divide by 10, so that the default incremental threshold of 10
1399
 * scans objects at 1% of the heap size */
1400
95.0k
#define SCAN_RATE_DIVISOR 10
1401
1402
static struct gc_generation_stats *
1403
gc_get_stats(GCState *gcstate, int gen)
1404
95.0k
{
1405
95.0k
    if (gen == 0) {
1406
0
        struct gc_young_stats_buffer *buffer = &gcstate->generation_stats->young;
1407
0
        buffer->index = (buffer->index + 1) % GC_YOUNG_STATS_SIZE;
1408
0
        struct gc_generation_stats *stats = &buffer->items[buffer->index];
1409
0
        return stats;
1410
0
    }
1411
95.0k
    else {
1412
95.0k
        struct gc_old_stats_buffer *buffer = &gcstate->generation_stats->old[gen - 1];
1413
95.0k
        buffer->index = (buffer->index + 1) % GC_OLD_STATS_SIZE;
1414
95.0k
        struct gc_generation_stats *stats = &buffer->items[buffer->index];
1415
95.0k
        return stats;
1416
95.0k
    }
1417
95.0k
}
1418
1419
static struct gc_generation_stats *
1420
gc_get_prev_stats(GCState *gcstate, int gen)
1421
95.0k
{
1422
95.0k
    if (gen == 0) {
1423
0
        struct gc_young_stats_buffer *buffer = &gcstate->generation_stats->young;
1424
0
        struct gc_generation_stats *stats = &buffer->items[buffer->index];
1425
0
        return stats;
1426
0
    }
1427
95.0k
    else {
1428
95.0k
        struct gc_old_stats_buffer *buffer = &gcstate->generation_stats->old[gen - 1];
1429
95.0k
        struct gc_generation_stats *stats = &buffer->items[buffer->index];
1430
95.0k
        return stats;
1431
95.0k
    }
1432
95.0k
}
1433
1434
static void
1435
add_stats(GCState *gcstate, int gen, struct gc_generation_stats *stats)
1436
95.0k
{
1437
95.0k
    struct gc_generation_stats *prev_stats = gc_get_prev_stats(gcstate, gen);
1438
95.0k
    struct gc_generation_stats *cur_stats = gc_get_stats(gcstate, gen);
1439
1440
95.0k
    memcpy(cur_stats, prev_stats, sizeof(struct gc_generation_stats));
1441
1442
95.0k
    cur_stats->ts_start = stats->ts_start;
1443
95.0k
    cur_stats->ts_stop = stats->ts_stop;
1444
95.0k
    cur_stats->heap_size = stats->heap_size;
1445
95.0k
    cur_stats->work_to_do = stats->work_to_do;
1446
1447
95.0k
    cur_stats->collections += 1;
1448
95.0k
    cur_stats->object_visits += stats->object_visits;
1449
95.0k
    cur_stats->collected += stats->collected;
1450
95.0k
    cur_stats->uncollectable += stats->uncollectable;
1451
95.0k
    cur_stats->candidates += stats->candidates;
1452
1453
95.0k
    cur_stats->objects_transitively_reachable += stats->objects_transitively_reachable;
1454
95.0k
    cur_stats->objects_not_transitively_reachable += stats->objects_not_transitively_reachable;
1455
1456
95.0k
    cur_stats->duration += stats->duration;
1457
95.0k
}
1458
1459
static void
1460
gc_collect_young(PyThreadState *tstate,
1461
                 struct gc_generation_stats *stats)
1462
0
{
1463
0
    GCState *gcstate = &tstate->interp->gc;
1464
0
    validate_spaces(gcstate);
1465
0
    PyGC_Head *young = &gcstate->young.head;
1466
0
    PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
1467
0
    untrack_tuples(young);
1468
1469
0
    PyGC_Head survivors;
1470
0
    gc_list_init(&survivors);
1471
0
    gc_list_set_space(young, gcstate->visited_space);
1472
0
    gc_collect_region(tstate, young, &survivors, stats);
1473
0
    gc_list_merge(&survivors, visited);
1474
0
    validate_spaces(gcstate);
1475
0
    gcstate->young.count = 0;
1476
0
    gcstate->old[gcstate->visited_space].count++;
1477
0
    validate_spaces(gcstate);
1478
0
}
1479
1480
#ifndef NDEBUG
1481
static inline int
1482
IS_IN_VISITED(PyGC_Head *gc, int visited_space)
1483
{
1484
    assert(visited_space == 0 || other_space(visited_space) == 0);
1485
    return gc_old_space(gc) == visited_space;
1486
}
1487
#endif
1488
1489
struct container_and_flag {
1490
    PyGC_Head *container;
1491
    int visited_space;
1492
    intptr_t size;
1493
    struct gc_generation_stats *stats;
1494
};
1495
1496
/* A traversal callback for adding to container) */
1497
static int
1498
visit_add_to_container(PyObject *op, void *arg)
1499
617M
{
1500
617M
    OBJECT_STAT_INC(object_visits);
1501
617M
    struct container_and_flag *cf = (struct container_and_flag *)arg;
1502
617M
    cf->stats->object_visits += 1;
1503
617M
    int visited = cf->visited_space;
1504
617M
    assert(visited == get_gc_state()->visited_space);
1505
617M
    if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
1506
345M
        PyGC_Head *gc = AS_GC(op);
1507
345M
        if (_PyObject_GC_IS_TRACKED(op) &&
1508
267M
            gc_old_space(gc) != visited) {
1509
143M
            gc_flip_old_space(gc);
1510
143M
            gc_list_move(gc, cf->container);
1511
143M
            cf->size++;
1512
143M
        }
1513
345M
    }
1514
617M
    return 0;
1515
617M
}
1516
1517
static intptr_t
1518
expand_region_transitively_reachable(PyGC_Head *container,
1519
                                     PyGC_Head *gc,
1520
                                     GCState *gcstate,
1521
                                     struct gc_generation_stats *stats)
1522
491k
{
1523
491k
    struct container_and_flag arg = {
1524
491k
        .container = container,
1525
491k
        .visited_space = gcstate->visited_space,
1526
491k
        .size = 0,
1527
491k
        .stats = stats
1528
491k
    };
1529
491k
    assert(GC_NEXT(gc) == container);
1530
2.55M
    while (gc != container) {
1531
        /* Survivors will be moved to visited space, so they should
1532
         * have been marked as visited */
1533
2.06M
        assert(IS_IN_VISITED(gc, gcstate->visited_space));
1534
2.06M
        PyObject *op = FROM_GC(gc);
1535
2.06M
        assert(_PyObject_GC_IS_TRACKED(op));
1536
2.06M
        if (_Py_IsImmortal(op)) {
1537
0
            PyGC_Head *next = GC_NEXT(gc);
1538
0
            gc_list_move(gc, &gcstate->permanent_generation.head);
1539
0
            gc = next;
1540
0
            continue;
1541
0
        }
1542
2.06M
        traverseproc traverse = Py_TYPE(op)->tp_traverse;
1543
2.06M
        (void) traverse(op,
1544
2.06M
                        visit_add_to_container,
1545
2.06M
                        &arg);
1546
2.06M
        gc = GC_NEXT(gc);
1547
2.06M
    }
1548
491k
    return arg.size;
1549
491k
}
1550
1551
/* Do bookkeeping for a completed GC cycle */
1552
static void
1553
completed_scavenge(GCState *gcstate)
1554
1.81k
{
1555
    /* We must observe two invariants:
1556
    * 1. Members of the permanent generation must be marked visited.
1557
    * 2. We cannot touch members of the permanent generation. */
1558
1.81k
    int visited;
1559
1.81k
    if (gc_list_is_empty(&gcstate->permanent_generation.head)) {
1560
        /* Permanent generation is empty so we can flip spaces bit */
1561
1.81k
        int not_visited = gcstate->visited_space;
1562
1.81k
        visited = other_space(not_visited);
1563
1.81k
        gcstate->visited_space = visited;
1564
        /* Make sure all objects have visited bit set correctly */
1565
1.81k
        gc_list_set_space(&gcstate->young.head, not_visited);
1566
1.81k
    }
1567
0
    else {
1568
         /* We must move the objects from visited to pending space. */
1569
0
        visited = gcstate->visited_space;
1570
0
        int not_visited = other_space(visited);
1571
0
        assert(gc_list_is_empty(&gcstate->old[not_visited].head));
1572
0
        gc_list_merge(&gcstate->old[visited].head, &gcstate->old[not_visited].head);
1573
0
        gc_list_set_space(&gcstate->old[not_visited].head, not_visited);
1574
0
    }
1575
1.81k
    assert(gc_list_is_empty(&gcstate->old[visited].head));
1576
1.81k
    gcstate->work_to_do = 0;
1577
1.81k
    gcstate->phase = GC_PHASE_MARK;
1578
1.81k
}
1579
1580
static intptr_t
1581
move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space)
1582
3.80M
{
1583
3.80M
    if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
1584
1.96M
        PyGC_Head *gc = AS_GC(op);
1585
1.96M
        if (_PyObject_GC_IS_TRACKED(op) &&
1586
1.96M
            gc_old_space(gc) != visited_space) {
1587
814k
            gc_flip_old_space(gc);
1588
814k
            gc_list_move(gc, reachable);
1589
814k
            return 1;
1590
814k
        }
1591
1.96M
    }
1592
2.99M
    return 0;
1593
3.80M
}
1594
1595
static intptr_t
1596
mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space, struct gc_generation_stats *stats)
1597
24.7k
{
1598
    // Transitively traverse all objects from reachable, until empty
1599
24.7k
    struct container_and_flag arg = {
1600
24.7k
        .container = reachable,
1601
24.7k
        .visited_space = visited_space,
1602
24.7k
        .size = 0,
1603
24.7k
        .stats = stats
1604
24.7k
    };
1605
145M
    while (!gc_list_is_empty(reachable)) {
1606
145M
        PyGC_Head *gc = _PyGCHead_NEXT(reachable);
1607
145M
        assert(gc_old_space(gc) == visited_space);
1608
145M
        gc_list_move(gc, visited);
1609
145M
        PyObject *op = FROM_GC(gc);
1610
145M
        traverseproc traverse = Py_TYPE(op)->tp_traverse;
1611
145M
        (void) traverse(op,
1612
145M
                        visit_add_to_container,
1613
145M
                        &arg);
1614
145M
    }
1615
24.7k
    gc_list_validate_space(visited, visited_space);
1616
24.7k
    return arg.size;
1617
24.7k
}
1618
1619
static intptr_t
1620
mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start, struct gc_generation_stats *stats)
1621
22.8k
{
1622
22.8k
    PyGC_Head reachable;
1623
22.8k
    gc_list_init(&reachable);
1624
22.8k
    Py_ssize_t objects_marked = 0;
1625
    // Move all objects on stacks to reachable
1626
22.8k
    _PyRuntimeState *runtime = &_PyRuntime;
1627
22.8k
    HEAD_LOCK(runtime);
1628
22.8k
    PyThreadState* ts = PyInterpreterState_ThreadHead(interp);
1629
22.8k
    HEAD_UNLOCK(runtime);
1630
45.7k
    while (ts) {
1631
22.8k
        _PyInterpreterFrame *frame = ts->current_frame;
1632
2.06M
        while (frame) {
1633
2.05M
            if (frame->owner >= FRAME_OWNED_BY_INTERPRETER) {
1634
546k
                frame = frame->previous;
1635
546k
                continue;
1636
546k
            }
1637
1.50M
            _PyStackRef *locals = frame->localsplus;
1638
1.50M
            _PyStackRef *sp = frame->stackpointer;
1639
1.50M
            objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space);
1640
1.50M
            PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj);
1641
1.50M
            objects_marked += move_to_reachable(func, &reachable, visited_space);
1642
12.0M
            while (sp > locals) {
1643
10.5M
                sp--;
1644
10.5M
                if (PyStackRef_IsNullOrInt(*sp)) {
1645
3.75M
                    continue;
1646
3.75M
                }
1647
6.83M
                PyObject *op = PyStackRef_AsPyObjectBorrow(*sp);
1648
6.83M
                if (_Py_IsImmortal(op)) {
1649
1.02M
                    continue;
1650
1.02M
                }
1651
5.80M
                if (_PyObject_IS_GC(op)) {
1652
5.24M
                    PyGC_Head *gc = AS_GC(op);
1653
5.24M
                    if (_PyObject_GC_IS_TRACKED(op) &&
1654
5.23M
                        gc_old_space(gc) != visited_space) {
1655
2.70M
                        gc_flip_old_space(gc);
1656
2.70M
                        objects_marked++;
1657
2.70M
                        gc_list_move(gc, &reachable);
1658
2.70M
                    }
1659
5.24M
                }
1660
5.80M
            }
1661
1.50M
            if (!start && frame->visited) {
1662
                // If this frame has already been visited, then the lower frames
1663
                // will have already been visited and will not have changed
1664
16.0k
                break;
1665
16.0k
            }
1666
1.49M
            frame->visited = 1;
1667
1.49M
            frame = frame->previous;
1668
1.49M
        }
1669
22.8k
        HEAD_LOCK(runtime);
1670
22.8k
        ts = PyThreadState_Next(ts);
1671
22.8k
        HEAD_UNLOCK(runtime);
1672
22.8k
    }
1673
22.8k
    objects_marked += mark_all_reachable(&reachable, visited, visited_space, stats);
1674
22.8k
    assert(gc_list_is_empty(&reachable));
1675
22.8k
    return objects_marked;
1676
22.8k
}
1677
1678
static intptr_t
1679
mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, struct gc_generation_stats *stats)
1680
1.84k
{
1681
1.84k
    PyGC_Head reachable;
1682
1.84k
    gc_list_init(&reachable);
1683
1.84k
    Py_ssize_t objects_marked = 0;
1684
1.84k
    objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space);
1685
1.84k
    objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space);
1686
1.84k
    objects_marked += move_to_reachable(interp->dict, &reachable, visited_space);
1687
1.84k
    struct types_state *types = &interp->types;
1688
374k
    for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) {
1689
372k
        objects_marked += move_to_reachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space);
1690
372k
        objects_marked += move_to_reachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space);
1691
372k
    }
1692
20.2k
    for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) {
1693
18.4k
        objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space);
1694
18.4k
        objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space);
1695
18.4k
    }
1696
1.84k
    objects_marked += mark_all_reachable(&reachable, visited, visited_space, stats);
1697
1.84k
    assert(gc_list_is_empty(&reachable));
1698
1.84k
    return objects_marked;
1699
1.84k
}
1700
1701
static intptr_t
1702
mark_at_start(PyThreadState *tstate, struct gc_generation_stats *stats)
1703
1.84k
{
1704
    // TO DO -- Make this incremental
1705
1.84k
    GCState *gcstate = &tstate->interp->gc;
1706
1.84k
    PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
1707
1.84k
    Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space, stats);
1708
1.84k
    objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true, stats);
1709
1.84k
    gcstate->work_to_do -= objects_marked;
1710
1.84k
    gcstate->phase = GC_PHASE_COLLECT;
1711
1.84k
    validate_spaces(gcstate);
1712
1.84k
    return objects_marked;
1713
1.84k
}
1714
1715
static intptr_t
1716
assess_work_to_do(GCState *gcstate)
1717
95.0k
{
1718
    /* The amount of work we want to do depends on three things.
1719
     * 1. The number of new objects created
1720
     * 2. The growth in heap size since the last collection
1721
     * 3. The heap size (up to the number of new objects, to avoid quadratic effects)
1722
     *
1723
     * For a steady state heap, the amount of work to do is three times the number
1724
     * of new objects added to the heap. This ensures that we stay ahead in the
1725
     * worst case of all new objects being garbage.
1726
     *
1727
     * This could be improved by tracking survival rates, but it is still a
1728
     * large improvement on the non-marking approach.
1729
     */
1730
95.0k
    intptr_t scale_factor = gcstate->old[0].threshold;
1731
95.0k
    if (scale_factor < 2) {
1732
0
        scale_factor = 2;
1733
0
    }
1734
95.0k
    intptr_t new_objects = gcstate->young.count;
1735
95.0k
    intptr_t max_heap_fraction = new_objects*2;
1736
95.0k
    intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
1737
95.0k
    if (heap_fraction > max_heap_fraction) {
1738
8.55k
        heap_fraction = max_heap_fraction;
1739
8.55k
    }
1740
95.0k
    gcstate->young.count = 0;
1741
95.0k
    return new_objects + heap_fraction;
1742
95.0k
}
1743
1744
static void
1745
gc_collect_increment(PyThreadState *tstate, struct gc_generation_stats *stats)
1746
95.0k
{
1747
95.0k
    GCState *gcstate = &tstate->interp->gc;
1748
95.0k
    gcstate->work_to_do += assess_work_to_do(gcstate);
1749
95.0k
    if (gcstate->work_to_do < 0) {
1750
72.1k
        return;
1751
72.1k
    }
1752
22.8k
    untrack_tuples(&gcstate->young.head);
1753
22.8k
    if (gcstate->phase == GC_PHASE_MARK) {
1754
1.84k
        Py_ssize_t objects_marked = mark_at_start(tstate, stats);
1755
1.84k
        stats->objects_transitively_reachable += objects_marked;
1756
1.84k
        stats->candidates += objects_marked;
1757
1.84k
        gcstate->work_to_do -= objects_marked;
1758
1.84k
        validate_spaces(gcstate);
1759
1.84k
        return;
1760
1.84k
    }
1761
21.0k
    PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head;
1762
21.0k
    PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
1763
21.0k
    PyGC_Head increment;
1764
21.0k
    gc_list_init(&increment);
1765
21.0k
    int scale_factor = gcstate->old[0].threshold;
1766
21.0k
    if (scale_factor < 2) {
1767
0
        scale_factor = 2;
1768
0
    }
1769
21.0k
    intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false, stats);
1770
21.0k
    stats->objects_transitively_reachable += objects_marked;
1771
21.0k
    gcstate->work_to_do -= objects_marked;
1772
21.0k
    gc_list_set_space(&gcstate->young.head, gcstate->visited_space);
1773
21.0k
    gc_list_merge(&gcstate->young.head, &increment);
1774
21.0k
    gc_list_validate_space(&increment, gcstate->visited_space);
1775
21.0k
    Py_ssize_t increment_size = gc_list_size(&increment);
1776
512k
    while (increment_size < gcstate->work_to_do) {
1777
493k
        if (gc_list_is_empty(not_visited)) {
1778
1.77k
            break;
1779
1.77k
        }
1780
491k
        PyGC_Head *gc = _PyGCHead_NEXT(not_visited);
1781
491k
        gc_list_move(gc, &increment);
1782
491k
        increment_size++;
1783
491k
        assert(!_Py_IsImmortal(FROM_GC(gc)));
1784
491k
        gc_set_old_space(gc, gcstate->visited_space);
1785
491k
        increment_size += expand_region_transitively_reachable(&increment, gc, gcstate, stats);
1786
491k
    }
1787
21.0k
    stats->objects_not_transitively_reachable += increment_size;
1788
21.0k
    validate_list(&increment, collecting_clear_unreachable_clear);
1789
21.0k
    gc_list_validate_space(&increment, gcstate->visited_space);
1790
21.0k
    PyGC_Head survivors;
1791
21.0k
    gc_list_init(&survivors);
1792
21.0k
    gc_collect_region(tstate, &increment, &survivors, stats);
1793
21.0k
    gc_list_merge(&survivors, visited);
1794
21.0k
    assert(gc_list_is_empty(&increment));
1795
21.0k
    gcstate->work_to_do -= increment_size;
1796
1797
21.0k
    if (gc_list_is_empty(not_visited)) {
1798
1.81k
        completed_scavenge(gcstate);
1799
1.81k
    }
1800
21.0k
    validate_spaces(gcstate);
1801
21.0k
}
1802
1803
static void
1804
gc_collect_full(PyThreadState *tstate,
1805
                struct gc_generation_stats *stats)
1806
0
{
1807
0
    GCState *gcstate = &tstate->interp->gc;
1808
0
    validate_spaces(gcstate);
1809
0
    PyGC_Head *young = &gcstate->young.head;
1810
0
    PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head;
1811
0
    PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
1812
0
    untrack_tuples(young);
1813
    /* merge all generations into visited */
1814
0
    gc_list_merge(young, pending);
1815
0
    gc_list_validate_space(pending, 1-gcstate->visited_space);
1816
0
    gc_list_set_space(pending, gcstate->visited_space);
1817
0
    gcstate->young.count = 0;
1818
0
    gc_list_merge(pending, visited);
1819
0
    validate_spaces(gcstate);
1820
1821
0
    gc_collect_region(tstate, visited, visited,
1822
0
                      stats);
1823
0
    validate_spaces(gcstate);
1824
0
    gcstate->young.count = 0;
1825
0
    gcstate->old[0].count = 0;
1826
0
    gcstate->old[1].count = 0;
1827
0
    completed_scavenge(gcstate);
1828
0
    _PyGC_ClearAllFreeLists(tstate->interp);
1829
0
    validate_spaces(gcstate);
1830
0
}
1831
1832
/* This is the main function. Read this to understand how the
1833
 * collection process works. */
1834
static void
1835
gc_collect_region(PyThreadState *tstate,
1836
                  PyGC_Head *from,
1837
                  PyGC_Head *to,
1838
                  struct gc_generation_stats *stats)
1839
21.0k
{
1840
21.0k
    PyGC_Head unreachable; /* non-problematic unreachable trash */
1841
21.0k
    PyGC_Head finalizers;  /* objects with, & reachable from, __del__ */
1842
21.0k
    PyGC_Head *gc; /* initialize to prevent a compiler warning */
1843
21.0k
    GCState *gcstate = &tstate->interp->gc;
1844
1845
21.0k
    assert(gcstate->garbage != NULL);
1846
21.0k
    assert(!_PyErr_Occurred(tstate));
1847
1848
21.0k
    gc_list_init(&unreachable);
1849
21.0k
    stats->candidates = deduce_unreachable(from, &unreachable, stats);
1850
21.0k
    validate_consistent_old_space(from);
1851
21.0k
    untrack_tuples(from);
1852
1853
  /* Move reachable objects to next generation. */
1854
21.0k
    validate_consistent_old_space(to);
1855
21.0k
    if (from != to) {
1856
21.0k
        gc_list_merge(from, to);
1857
21.0k
    }
1858
21.0k
    validate_consistent_old_space(to);
1859
1860
    /* All objects in unreachable are trash, but objects reachable from
1861
     * legacy finalizers (e.g. tp_del) can't safely be deleted.
1862
     */
1863
21.0k
    gc_list_init(&finalizers);
1864
    // NEXT_MASK_UNREACHABLE is cleared here.
1865
    // After move_legacy_finalizers(), unreachable is normal list.
1866
21.0k
    move_legacy_finalizers(&unreachable, &finalizers);
1867
    /* finalizers contains the unreachable objects with a legacy finalizer;
1868
     * unreachable objects reachable *from* those are also uncollectable,
1869
     * and we move those into the finalizers list too.
1870
     */
1871
21.0k
    move_legacy_finalizer_reachable(&finalizers, stats);
1872
21.0k
    validate_list(&finalizers, collecting_clear_unreachable_clear);
1873
21.0k
    validate_list(&unreachable, collecting_set_unreachable_clear);
1874
    /* Print debugging information. */
1875
21.0k
    if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE) {
1876
0
        for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) {
1877
0
            debug_cycle("collectable", FROM_GC(gc));
1878
0
        }
1879
0
    }
1880
1881
    /* Invoke weakref callbacks as necessary. */
1882
21.0k
    stats->collected += handle_weakref_callbacks(&unreachable, to);
1883
21.0k
    gc_list_validate_space(to, gcstate->visited_space);
1884
21.0k
    validate_list(to, collecting_clear_unreachable_clear);
1885
21.0k
    validate_list(&unreachable, collecting_set_unreachable_clear);
1886
1887
    /* Call tp_finalize on objects which have one. */
1888
21.0k
    finalize_garbage(tstate, &unreachable);
1889
    /* Handle any objects that may have resurrected after the call
1890
     * to 'finalize_garbage' and continue the collection with the
1891
     * objects that are still unreachable */
1892
21.0k
    PyGC_Head final_unreachable;
1893
21.0k
    gc_list_init(&final_unreachable);
1894
21.0k
    handle_resurrected_objects(&unreachable, &final_unreachable, to, stats);
1895
1896
    /* Clear weakrefs to objects in the unreachable set.  See the comments
1897
     * above handle_weakref_callbacks() for details.
1898
     */
1899
21.0k
    clear_weakrefs(&final_unreachable);
1900
1901
    /* Call tp_clear on objects in the final_unreachable set.  This will cause
1902
    * the reference cycles to be broken.  It may also cause some objects
1903
    * in finalizers to be freed.
1904
    */
1905
21.0k
    stats->collected += gc_list_size(&final_unreachable);
1906
21.0k
    delete_garbage(tstate, gcstate, &final_unreachable, to);
1907
1908
    /* Collect statistics on uncollectable objects found and print
1909
     * debugging information. */
1910
21.0k
    Py_ssize_t n = 0;
1911
21.0k
    for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) {
1912
0
        n++;
1913
0
        if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE)
1914
0
            debug_cycle("uncollectable", FROM_GC(gc));
1915
0
    }
1916
21.0k
    stats->uncollectable = n;
1917
    /* Append instances in the uncollectable set to a Python
1918
     * reachable list of garbage.  The programmer has to deal with
1919
     * this if they insist on creating this type of structure.
1920
     */
1921
21.0k
    handle_legacy_finalizers(tstate, gcstate, &finalizers, to);
1922
21.0k
    gc_list_validate_space(to, gcstate->visited_space);
1923
21.0k
    validate_list(to, collecting_clear_unreachable_clear);
1924
21.0k
}
1925
1926
/* Invoke progress callbacks to notify clients that garbage collection
1927
 * is starting or stopping
1928
 */
1929
static void
1930
do_gc_callback(GCState *gcstate, const char *phase,
1931
                   int generation, struct gc_generation_stats *stats)
1932
190k
{
1933
190k
    assert(!PyErr_Occurred());
1934
1935
    /* The local variable cannot be rebound, check it for sanity */
1936
190k
    assert(PyList_CheckExact(gcstate->callbacks));
1937
190k
    PyObject *info = NULL;
1938
190k
    if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
1939
6.77k
        info = Py_BuildValue("{sisnsnsnsd}",
1940
6.77k
            "generation", generation,
1941
6.77k
            "collected", stats->collected,
1942
6.77k
            "uncollectable", stats->uncollectable,
1943
6.77k
            "candidates", stats->candidates,
1944
6.77k
            "duration", stats->duration);
1945
6.77k
        if (info == NULL) {
1946
0
            PyErr_FormatUnraisable("Exception ignored while invoking gc callbacks");
1947
0
            return;
1948
0
        }
1949
6.77k
    }
1950
1951
190k
    PyObject *phase_obj = PyUnicode_FromString(phase);
1952
190k
    if (phase_obj == NULL) {
1953
0
        Py_XDECREF(info);
1954
0
        PyErr_FormatUnraisable("Exception ignored while invoking gc callbacks");
1955
0
        return;
1956
0
    }
1957
1958
190k
    PyObject *stack[] = {phase_obj, info};
1959
196k
    for (Py_ssize_t i=0; i<PyList_GET_SIZE(gcstate->callbacks); i++) {
1960
6.77k
        PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i);
1961
6.77k
        Py_INCREF(cb); /* make sure cb doesn't go away */
1962
6.77k
        r = PyObject_Vectorcall(cb, stack, 2, NULL);
1963
6.77k
        if (r == NULL) {
1964
0
            PyErr_FormatUnraisable("Exception ignored while "
1965
0
                                   "calling GC callback %R", cb);
1966
0
        }
1967
6.77k
        else {
1968
6.77k
            Py_DECREF(r);
1969
6.77k
        }
1970
6.77k
        Py_DECREF(cb);
1971
6.77k
    }
1972
190k
    Py_DECREF(phase_obj);
1973
190k
    Py_XDECREF(info);
1974
190k
    assert(!PyErr_Occurred());
1975
190k
}
1976
1977
static void
1978
invoke_gc_callback(GCState *gcstate, const char *phase,
1979
                   int generation, struct gc_generation_stats *stats)
1980
190k
{
1981
190k
    if (gcstate->callbacks == NULL) {
1982
0
        return;
1983
0
    }
1984
190k
    do_gc_callback(gcstate, phase, generation, stats);
1985
190k
}
1986
1987
static int
1988
referrersvisit(PyObject* obj, void *arg)
1989
0
{
1990
0
    PyObject *objs = arg;
1991
0
    Py_ssize_t i;
1992
0
    for (i = 0; i < PyTuple_GET_SIZE(objs); i++) {
1993
0
        if (PyTuple_GET_ITEM(objs, i) == obj) {
1994
0
            return 1;
1995
0
        }
1996
0
    }
1997
0
    return 0;
1998
0
}
1999
2000
static int
2001
gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist)
2002
0
{
2003
0
    PyGC_Head *gc;
2004
0
    PyObject *obj;
2005
0
    traverseproc traverse;
2006
0
    for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
2007
0
        obj = FROM_GC(gc);
2008
0
        traverse = Py_TYPE(obj)->tp_traverse;
2009
0
        if (obj == objs || obj == resultlist) {
2010
0
            continue;
2011
0
        }
2012
0
        if (traverse(obj, referrersvisit, objs)) {
2013
0
            if (PyList_Append(resultlist, obj) < 0) {
2014
0
                return 0; /* error */
2015
0
            }
2016
0
        }
2017
0
    }
2018
0
    return 1; /* no error */
2019
0
}
2020
2021
PyObject *
2022
_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs)
2023
0
{
2024
0
    PyObject *result = PyList_New(0);
2025
0
    if (!result) {
2026
0
        return NULL;
2027
0
    }
2028
2029
0
    GCState *gcstate = &interp->gc;
2030
0
    for (int i = 0; i < NUM_GENERATIONS; i++) {
2031
0
        if (!(gc_referrers_for(objs, GEN_HEAD(gcstate, i), result))) {
2032
0
            Py_DECREF(result);
2033
0
            return NULL;
2034
0
        }
2035
0
    }
2036
0
    return result;
2037
0
}
2038
2039
PyObject *
2040
_PyGC_GetObjects(PyInterpreterState *interp, int generation)
2041
0
{
2042
0
    assert(generation >= -1 && generation < NUM_GENERATIONS);
2043
0
    GCState *gcstate = &interp->gc;
2044
2045
0
    PyObject *result = PyList_New(0);
2046
    /* Generation:
2047
     * -1: Return all objects
2048
     * 0: All young objects
2049
     * 1: No objects
2050
     * 2: All old objects
2051
     */
2052
0
    if (result == NULL || generation == 1) {
2053
0
        return result;
2054
0
    }
2055
0
    if (generation <= 0) {
2056
0
        if (append_objects(result, &gcstate->young.head)) {
2057
0
            goto error;
2058
0
        }
2059
0
    }
2060
0
    if (generation != 0) {
2061
0
        if (append_objects(result, &gcstate->old[0].head)) {
2062
0
            goto error;
2063
0
        }
2064
0
        if (append_objects(result, &gcstate->old[1].head)) {
2065
0
            goto error;
2066
0
        }
2067
0
    }
2068
2069
0
    return result;
2070
0
error:
2071
0
    Py_DECREF(result);
2072
0
    return NULL;
2073
0
}
2074
2075
void
2076
_PyGC_Freeze(PyInterpreterState *interp)
2077
0
{
2078
0
    GCState *gcstate = &interp->gc;
2079
    /* The permanent_generation must be visited */
2080
0
    gc_list_set_space(&gcstate->young.head, gcstate->visited_space);
2081
0
    gc_list_merge(&gcstate->young.head, &gcstate->permanent_generation.head);
2082
0
    gcstate->young.count = 0;
2083
0
    PyGC_Head*old0 = &gcstate->old[0].head;
2084
0
    PyGC_Head*old1 = &gcstate->old[1].head;
2085
0
    if (gcstate->visited_space) {
2086
0
        gc_list_set_space(old0, 1);
2087
0
    }
2088
0
    else {
2089
0
        gc_list_set_space(old1, 0);
2090
0
    }
2091
0
    gc_list_merge(old0, &gcstate->permanent_generation.head);
2092
0
    gcstate->old[0].count = 0;
2093
0
    gc_list_merge(old1, &gcstate->permanent_generation.head);
2094
0
    gcstate->old[1].count = 0;
2095
0
    validate_spaces(gcstate);
2096
0
}
2097
2098
void
2099
_PyGC_Unfreeze(PyInterpreterState *interp)
2100
0
{
2101
0
    GCState *gcstate = &interp->gc;
2102
0
    gc_list_merge(&gcstate->permanent_generation.head,
2103
0
                  &gcstate->old[gcstate->visited_space].head);
2104
0
    validate_spaces(gcstate);
2105
0
}
2106
2107
Py_ssize_t
2108
_PyGC_GetFreezeCount(PyInterpreterState *interp)
2109
0
{
2110
0
    GCState *gcstate = &interp->gc;
2111
0
    return gc_list_size(&gcstate->permanent_generation.head);
2112
0
}
2113
2114
/* C API for controlling the state of the garbage collector */
2115
int
2116
PyGC_Enable(void)
2117
0
{
2118
0
    GCState *gcstate = get_gc_state();
2119
0
    int old_state = gcstate->enabled;
2120
0
    gcstate->enabled = 1;
2121
0
    return old_state;
2122
0
}
2123
2124
int
2125
PyGC_Disable(void)
2126
0
{
2127
0
    GCState *gcstate = get_gc_state();
2128
0
    int old_state = gcstate->enabled;
2129
0
    gcstate->enabled = 0;
2130
0
    return old_state;
2131
0
}
2132
2133
int
2134
PyGC_IsEnabled(void)
2135
0
{
2136
0
    GCState *gcstate = get_gc_state();
2137
0
    return gcstate->enabled;
2138
0
}
2139
2140
// Show stats for objects in each generations
2141
static void
2142
show_stats_each_generations(GCState *gcstate)
2143
0
{
2144
0
    char buf[100];
2145
0
    size_t pos = 0;
2146
2147
0
    for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) {
2148
0
        pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos,
2149
0
                             " %zd",
2150
0
                             gc_list_size(GEN_HEAD(gcstate, i)));
2151
0
    }
2152
0
    PySys_FormatStderr(
2153
0
        "gc: objects in each generation:%s\n"
2154
0
        "gc: objects in permanent generation: %zd\n",
2155
0
        buf, gc_list_size(&gcstate->permanent_generation.head));
2156
0
}
2157
2158
Py_ssize_t
2159
_PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
2160
95.0k
{
2161
95.0k
    GCState *gcstate = &tstate->interp->gc;
2162
95.0k
    assert(tstate->current_frame == NULL || tstate->current_frame->stackpointer != NULL);
2163
2164
95.0k
    int expected = 0;
2165
95.0k
    if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) {
2166
        // Don't start a garbage collection if one is already in progress.
2167
0
        return 0;
2168
0
    }
2169
95.0k
    gcstate->frame = tstate->current_frame;
2170
2171
95.0k
    struct gc_generation_stats stats = { 0 };
2172
95.0k
    if (reason != _Py_GC_REASON_SHUTDOWN) {
2173
95.0k
        invoke_gc_callback(gcstate, "start", generation, &stats);
2174
95.0k
    }
2175
95.0k
    if (gcstate->debug & _PyGC_DEBUG_STATS) {
2176
0
        PySys_WriteStderr("gc: collecting generation %d...\n", generation);
2177
0
        show_stats_each_generations(gcstate);
2178
0
    }
2179
95.0k
    if (PyDTrace_GC_START_ENABLED()) {
2180
0
        PyDTrace_GC_START(generation);
2181
0
    }
2182
95.0k
    stats.heap_size = gcstate->heap_size;
2183
95.0k
    stats.work_to_do = gcstate->work_to_do;
2184
95.0k
    (void)PyTime_PerfCounterRaw(&stats.ts_start);
2185
95.0k
    PyObject *exc = _PyErr_GetRaisedException(tstate);
2186
95.0k
    switch(generation) {
2187
0
        case 0:
2188
0
            gc_collect_young(tstate, &stats);
2189
0
            break;
2190
95.0k
        case 1:
2191
95.0k
            gc_collect_increment(tstate, &stats);
2192
95.0k
            break;
2193
0
        case 2:
2194
0
            gc_collect_full(tstate, &stats);
2195
0
            break;
2196
0
        default:
2197
0
            Py_UNREACHABLE();
2198
95.0k
    }
2199
95.0k
    (void)PyTime_PerfCounterRaw(&stats.ts_stop);
2200
95.0k
    stats.duration = PyTime_AsSecondsDouble(stats.ts_stop - stats.ts_start);
2201
95.0k
    add_stats(gcstate, generation, &stats);
2202
95.0k
    if (PyDTrace_GC_DONE_ENABLED()) {
2203
0
        PyDTrace_GC_DONE(stats.uncollectable + stats.collected);
2204
0
    }
2205
95.0k
    if (reason != _Py_GC_REASON_SHUTDOWN) {
2206
95.0k
        invoke_gc_callback(gcstate, "stop", generation, &stats);
2207
95.0k
    }
2208
95.0k
    _PyErr_SetRaisedException(tstate, exc);
2209
95.0k
    GC_STAT_ADD(generation, objects_collected, stats.collected);
2210
#ifdef Py_STATS
2211
    PyStats *s = _PyStats_GET();
2212
    if (s) {
2213
        GC_STAT_ADD(generation, object_visits,
2214
            s->object_stats.object_visits);
2215
        s->object_stats.object_visits = 0;
2216
    }
2217
#endif
2218
95.0k
    validate_spaces(gcstate);
2219
95.0k
    gcstate->frame = NULL;
2220
95.0k
    _Py_atomic_store_int(&gcstate->collecting, 0);
2221
2222
95.0k
    if (gcstate->debug & _PyGC_DEBUG_STATS) {
2223
0
        PySys_WriteStderr(
2224
0
            "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n",
2225
0
            stats.collected + stats.uncollectable, stats.uncollectable, stats.duration
2226
0
        );
2227
0
    }
2228
2229
95.0k
    return stats.uncollectable + stats.collected;
2230
95.0k
}
2231
2232
/* Public API to invoke gc.collect() from C */
2233
Py_ssize_t
2234
PyGC_Collect(void)
2235
0
{
2236
0
    return _PyGC_Collect(_PyThreadState_GET(), 2, _Py_GC_REASON_MANUAL);
2237
0
}
2238
2239
void
2240
_PyGC_CollectNoFail(PyThreadState *tstate)
2241
0
{
2242
    /* Ideally, this function is only called on interpreter shutdown,
2243
       and therefore not recursively.  Unfortunately, when there are daemon
2244
       threads, a daemon thread can start a cyclic garbage collection
2245
       during interpreter shutdown (and then never finish it).
2246
       See http://bugs.python.org/issue8713#msg195178 for an example.
2247
       */
2248
0
    _PyGC_Collect(_PyThreadState_GET(), 2, _Py_GC_REASON_SHUTDOWN);
2249
0
}
2250
2251
void
2252
_PyGC_DumpShutdownStats(PyInterpreterState *interp)
2253
0
{
2254
0
    GCState *gcstate = &interp->gc;
2255
0
    if (!(gcstate->debug & _PyGC_DEBUG_SAVEALL)
2256
0
        && gcstate->garbage != NULL && PyList_GET_SIZE(gcstate->garbage) > 0) {
2257
0
        const char *message;
2258
0
        if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) {
2259
0
            message = "gc: %zd uncollectable objects at shutdown";
2260
0
        }
2261
0
        else {
2262
0
            message = "gc: %zd uncollectable objects at shutdown; " \
2263
0
                "use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them";
2264
0
        }
2265
        /* PyErr_WarnFormat does too many things and we are at shutdown,
2266
           the warnings module's dependencies (e.g. linecache) may be gone
2267
           already. */
2268
0
        if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
2269
0
                                     "gc", NULL, message,
2270
0
                                     PyList_GET_SIZE(gcstate->garbage)))
2271
0
        {
2272
0
            PyErr_FormatUnraisable("Exception ignored in GC shutdown");
2273
0
        }
2274
0
        if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) {
2275
0
            PyObject *repr = NULL, *bytes = NULL;
2276
0
            repr = PyObject_Repr(gcstate->garbage);
2277
0
            if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr))) {
2278
0
                PyErr_FormatUnraisable("Exception ignored in GC shutdown "
2279
0
                                       "while formatting garbage");
2280
0
            }
2281
0
            else {
2282
0
                PySys_WriteStderr(
2283
0
                    "      %s\n",
2284
0
                    PyBytes_AS_STRING(bytes)
2285
0
                    );
2286
0
            }
2287
0
            Py_XDECREF(repr);
2288
0
            Py_XDECREF(bytes);
2289
0
        }
2290
0
    }
2291
0
}
2292
2293
static void
2294
0
finalize_unlink_gc_head(PyGC_Head *gc) {
2295
0
    PyGC_Head *prev = GC_PREV(gc);
2296
0
    PyGC_Head *next = GC_NEXT(gc);
2297
0
    _PyGCHead_SET_NEXT(prev, next);
2298
0
    _PyGCHead_SET_PREV(next, prev);
2299
0
}
2300
2301
void
2302
_PyGC_Fini(PyInterpreterState *interp)
2303
0
{
2304
0
    GCState *gcstate = &interp->gc;
2305
0
    Py_CLEAR(gcstate->garbage);
2306
0
    Py_CLEAR(gcstate->callbacks);
2307
0
    PyMem_RawFree(gcstate->generation_stats);
2308
0
    gcstate->generation_stats = NULL;
2309
2310
    /* Prevent a subtle bug that affects sub-interpreters that use basic
2311
     * single-phase init extensions (m_size == -1).  Those extensions cause objects
2312
     * to be shared between interpreters, via the PyDict_Update(mdict, m_copy) call
2313
     * in import_find_extension().
2314
     *
2315
     * If they are GC objects, their GC head next or prev links could refer to
2316
     * the interpreter _gc_runtime_state PyGC_Head nodes.  Those nodes go away
2317
     * when the interpreter structure is freed and so pointers to them become
2318
     * invalid.  If those objects are still used by another interpreter and
2319
     * UNTRACK is called on them, a crash will happen.  We untrack the nodes
2320
     * here to avoid that.
2321
     *
2322
     * This bug was originally fixed when reported as gh-90228.  The bug was
2323
     * re-introduced in gh-94673.
2324
     */
2325
0
    finalize_unlink_gc_head(&gcstate->young.head);
2326
0
    finalize_unlink_gc_head(&gcstate->old[0].head);
2327
0
    finalize_unlink_gc_head(&gcstate->old[1].head);
2328
0
    finalize_unlink_gc_head(&gcstate->permanent_generation.head);
2329
0
}
2330
2331
/* for debugging */
2332
void
2333
_PyGC_Dump(PyGC_Head *g)
2334
0
{
2335
0
    PyObject_Dump(FROM_GC(g));
2336
0
}
2337
2338
2339
#ifdef Py_DEBUG
2340
static int
2341
visit_validate(PyObject *op, void *parent_raw)
2342
{
2343
    PyObject *parent = _PyObject_CAST(parent_raw);
2344
    if (_PyObject_IsFreed(op)) {
2345
        _PyObject_ASSERT_FAILED_MSG(parent,
2346
                                    "PyObject_GC_Track() object is not valid");
2347
    }
2348
    return 0;
2349
}
2350
#endif
2351
2352
2353
/* extension modules might be compiled with GC support so these
2354
   functions must always be available */
2355
2356
void
2357
PyObject_GC_Track(void *op_raw)
2358
185M
{
2359
185M
    PyObject *op = _PyObject_CAST(op_raw);
2360
185M
    if (_PyObject_GC_IS_TRACKED(op)) {
2361
0
        _PyObject_ASSERT_FAILED_MSG(op,
2362
0
                                    "object already tracked "
2363
0
                                    "by the garbage collector");
2364
0
    }
2365
185M
    _PyObject_GC_TRACK(op);
2366
2367
#ifdef Py_DEBUG
2368
    /* Check that the object is valid: validate objects traversed
2369
       by tp_traverse() */
2370
    traverseproc traverse = Py_TYPE(op)->tp_traverse;
2371
    (void)traverse(op, visit_validate, op);
2372
#endif
2373
185M
}
2374
2375
void
2376
PyObject_GC_UnTrack(void *op_raw)
2377
1.33G
{
2378
1.33G
    PyObject *op = _PyObject_CAST(op_raw);
2379
    /* The code for some objects, such as tuples, is a bit
2380
     * sloppy about when the object is tracked and untracked. */
2381
1.33G
    if (_PyObject_GC_IS_TRACKED(op)) {
2382
1.09G
        _PyObject_GC_UNTRACK(op);
2383
1.09G
    }
2384
1.33G
}
2385
2386
int
2387
PyObject_IS_GC(PyObject *obj)
2388
188M
{
2389
188M
    return _PyObject_IS_GC(obj);
2390
188M
}
2391
2392
void
2393
_Py_ScheduleGC(PyThreadState *tstate)
2394
24.1M
{
2395
24.1M
    if (!_Py_eval_breaker_bit_is_set(tstate, _PY_GC_SCHEDULED_BIT))
2396
95.0k
    {
2397
95.0k
        _Py_set_eval_breaker_bit(tstate, _PY_GC_SCHEDULED_BIT);
2398
95.0k
    }
2399
24.1M
}
2400
2401
void
2402
_Py_TriggerGC(struct _gc_runtime_state *gcstate)
2403
24.2M
{
2404
24.2M
    PyThreadState *tstate = _PyThreadState_GET();
2405
24.2M
    if (gcstate->enabled &&
2406
24.2M
        gcstate->young.threshold != 0 &&
2407
24.2M
        !_Py_atomic_load_int_relaxed(&gcstate->collecting) &&
2408
24.2M
        !_PyErr_Occurred(tstate))
2409
24.1M
    {
2410
24.1M
        _Py_ScheduleGC(tstate);
2411
24.1M
    }
2412
24.2M
}
2413
2414
void
2415
_PyObject_GC_Link(PyObject *op)
2416
633M
{
2417
633M
    PyGC_Head *gc = AS_GC(op);
2418
    // gc must be correctly aligned
2419
633M
    _PyObject_ASSERT(op, ((uintptr_t)gc & (sizeof(uintptr_t)-1)) == 0);
2420
633M
    gc->_gc_next = 0;
2421
633M
    gc->_gc_prev = 0;
2422
2423
633M
}
2424
2425
void
2426
_Py_RunGC(PyThreadState *tstate)
2427
95.0k
{
2428
95.0k
    if (tstate->interp->gc.enabled) {
2429
95.0k
        _PyGC_Collect(tstate, 1, _Py_GC_REASON_HEAP);
2430
95.0k
    }
2431
95.0k
}
2432
2433
static PyObject *
2434
gc_alloc(PyTypeObject *tp, size_t basicsize, size_t presize)
2435
509M
{
2436
509M
    PyThreadState *tstate = _PyThreadState_GET();
2437
509M
    if (basicsize > PY_SSIZE_T_MAX - presize) {
2438
0
        return _PyErr_NoMemory(tstate);
2439
0
    }
2440
509M
    size_t size = presize + basicsize;
2441
509M
    char *mem = _PyObject_MallocWithType(tp, size);
2442
509M
    if (mem == NULL) {
2443
0
        return _PyErr_NoMemory(tstate);
2444
0
    }
2445
509M
    ((PyObject **)mem)[0] = NULL;
2446
509M
    ((PyObject **)mem)[1] = NULL;
2447
509M
    PyObject *op = (PyObject *)(mem + presize);
2448
509M
    _PyObject_GC_Link(op);
2449
509M
    return op;
2450
509M
}
2451
2452
2453
PyObject *
2454
_PyObject_GC_New(PyTypeObject *tp)
2455
264M
{
2456
264M
    size_t presize = _PyType_PreHeaderSize(tp);
2457
264M
    size_t size = _PyObject_SIZE(tp);
2458
264M
    if (_PyType_HasFeature(tp, Py_TPFLAGS_INLINE_VALUES)) {
2459
216
        size += _PyInlineValuesSize(tp);
2460
216
    }
2461
264M
    PyObject *op = gc_alloc(tp, size, presize);
2462
264M
    if (op == NULL) {
2463
0
        return NULL;
2464
0
    }
2465
264M
    _PyObject_Init(op, tp);
2466
264M
    if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) {
2467
216
        _PyObject_InitInlineValues(op, tp);
2468
216
    }
2469
264M
    return op;
2470
264M
}
2471
2472
PyVarObject *
2473
_PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems)
2474
244M
{
2475
244M
    PyVarObject *op;
2476
2477
244M
    if (nitems < 0) {
2478
0
        PyErr_BadInternalCall();
2479
0
        return NULL;
2480
0
    }
2481
244M
    size_t presize = _PyType_PreHeaderSize(tp);
2482
244M
    size_t size = _PyObject_VAR_SIZE(tp, nitems);
2483
244M
    op = (PyVarObject *)gc_alloc(tp, size, presize);
2484
244M
    if (op == NULL) {
2485
0
        return NULL;
2486
0
    }
2487
244M
    _PyObject_InitVar(op, tp, nitems);
2488
244M
    return op;
2489
244M
}
2490
2491
PyObject *
2492
PyUnstable_Object_GC_NewWithExtraData(PyTypeObject *tp, size_t extra_size)
2493
0
{
2494
0
    size_t presize = _PyType_PreHeaderSize(tp);
2495
0
    size_t size = _PyObject_SIZE(tp) + extra_size;
2496
0
    PyObject *op = gc_alloc(tp, size, presize);
2497
0
    if (op == NULL) {
2498
0
        return NULL;
2499
0
    }
2500
0
    memset((char *)op + sizeof(PyObject), 0, size - sizeof(PyObject));
2501
0
    _PyObject_Init(op, tp);
2502
0
    return op;
2503
0
}
2504
2505
PyVarObject *
2506
_PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems)
2507
40
{
2508
40
    const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems);
2509
40
    const size_t presize = _PyType_PreHeaderSize(Py_TYPE(op));
2510
40
    _PyObject_ASSERT((PyObject *)op, !_PyObject_GC_IS_TRACKED(op));
2511
40
    if (basicsize > (size_t)PY_SSIZE_T_MAX - presize) {
2512
0
        return (PyVarObject *)PyErr_NoMemory();
2513
0
    }
2514
40
    char *mem = (char *)op - presize;
2515
40
    mem = (char *)_PyObject_ReallocWithType(Py_TYPE(op), mem, presize + basicsize);
2516
40
    if (mem == NULL) {
2517
0
        return (PyVarObject *)PyErr_NoMemory();
2518
0
    }
2519
40
    op = (PyVarObject *) (mem + presize);
2520
40
    Py_SET_SIZE(op, nitems);
2521
40
    return op;
2522
40
}
2523
2524
void
2525
PyObject_GC_Del(void *op)
2526
632M
{
2527
632M
    size_t presize = _PyType_PreHeaderSize(Py_TYPE(op));
2528
632M
    PyGC_Head *g = AS_GC(op);
2529
632M
    if (_PyObject_GC_IS_TRACKED(op)) {
2530
0
        gc_list_remove(g);
2531
0
        GCState *gcstate = get_gc_state();
2532
0
        if (gcstate->young.count > 0) {
2533
0
            gcstate->young.count--;
2534
0
        }
2535
0
        gcstate->heap_size--;
2536
#ifdef Py_DEBUG
2537
        PyObject *exc = PyErr_GetRaisedException();
2538
        if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
2539
                                     "gc", NULL,
2540
                                     "Object of type %s is not untracked "
2541
                                     "before destruction",
2542
                                     Py_TYPE(op)->tp_name))
2543
        {
2544
            PyErr_FormatUnraisable("Exception ignored on object deallocation");
2545
        }
2546
        PyErr_SetRaisedException(exc);
2547
#endif
2548
0
    }
2549
632M
    PyObject_Free(((char *)op)-presize);
2550
632M
}
2551
2552
int
2553
PyObject_GC_IsTracked(PyObject* obj)
2554
9.23k
{
2555
9.23k
    if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj)) {
2556
32
        return 1;
2557
32
    }
2558
9.20k
    return 0;
2559
9.23k
}
2560
2561
int
2562
PyObject_GC_IsFinalized(PyObject *obj)
2563
0
{
2564
0
    if (_PyObject_IS_GC(obj) && _PyGC_FINALIZED(obj)) {
2565
0
         return 1;
2566
0
    }
2567
0
    return 0;
2568
0
}
2569
2570
static int
2571
visit_generation(gcvisitobjects_t callback, void *arg, struct gc_generation *gen)
2572
0
{
2573
0
    PyGC_Head *gc_list, *gc;
2574
0
    gc_list = &gen->head;
2575
0
    for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
2576
0
        PyObject *op = FROM_GC(gc);
2577
0
        Py_INCREF(op);
2578
0
        int res = callback(op, arg);
2579
0
        Py_DECREF(op);
2580
0
        if (!res) {
2581
0
            return -1;
2582
0
        }
2583
0
    }
2584
0
    return 0;
2585
0
}
2586
2587
void
2588
PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
2589
0
{
2590
0
    GCState *gcstate = get_gc_state();
2591
0
    int original_state = gcstate->enabled;
2592
0
    gcstate->enabled = 0;
2593
0
    if (visit_generation(callback, arg, &gcstate->young) < 0) {
2594
0
        goto done;
2595
0
    }
2596
0
    if (visit_generation(callback, arg, &gcstate->old[0]) < 0) {
2597
0
        goto done;
2598
0
    }
2599
0
    if (visit_generation(callback, arg, &gcstate->old[1]) < 0) {
2600
0
        goto done;
2601
0
    }
2602
0
    visit_generation(callback, arg, &gcstate->permanent_generation);
2603
0
done:
2604
0
    gcstate->enabled = original_state;
2605
0
}
2606
2607
#endif  // Py_GIL_DISABLED