Coverage Report

Created: 2026-04-12 06:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/gc.c
Line
Count
Source
1
//  This implements the reference cycle garbage collector.
2
//  The Python module interface to the collector is in gcmodule.c.
3
//  See InternalDocs/garbage_collector.md for more infromation.
4
5
#include "Python.h"
6
#include "pycore_ceval.h"         // _Py_set_eval_breaker_bit()
7
#include "pycore_dict.h"          // _PyInlineValuesSize()
8
#include "pycore_initconfig.h"    // _PyStatus_OK()
9
#include "pycore_interp.h"        // PyInterpreterState.gc
10
#include "pycore_interpframe.h"   // _PyFrame_GetLocalsArray()
11
#include "pycore_object_alloc.h"  // _PyObject_MallocWithType()
12
#include "pycore_pystate.h"       // _PyThreadState_GET()
13
#include "pycore_tuple.h"         // _PyTuple_MaybeUntrack()
14
#include "pycore_weakref.h"       // _PyWeakref_ClearRef()
15
16
#include "pydtrace.h"
17
18
19
#ifndef Py_GIL_DISABLED
20
21
typedef struct _gc_runtime_state GCState;
22
23
#ifdef Py_DEBUG
24
#  define GC_DEBUG
25
#endif
26
27
// Define this when debugging the GC
28
// #define GC_EXTRA_DEBUG
29
30
31
935M
#define GC_NEXT _PyGCHead_NEXT
32
344M
#define GC_PREV _PyGCHead_PREV
33
34
// update_refs() set this bit for all objects in current generation.
35
// subtract_refs() and move_unreachable() uses this to distinguish
36
// visited object is in GCing or not.
37
//
38
// move_unreachable() removes this flag from reachable objects.
39
// Only unreachable objects have this flag.
40
//
41
// No objects in interpreter have this flag after GC ends.
42
424M
#define PREV_MASK_COLLECTING   _PyGC_PREV_MASK_COLLECTING
43
44
// Lowest bit of _gc_next is used for UNREACHABLE flag.
45
//
46
// This flag represents the object is in unreachable list in move_unreachable()
47
//
48
// Although this flag is used only in move_unreachable(), move_unreachable()
49
// doesn't clear this flag to skip unnecessary iteration.
50
// move_legacy_finalizers() removes this flag instead.
51
// Between them, unreachable list is not normal list and we can not use
52
// most gc_list_* functions for it.
53
63.1M
#define NEXT_MASK_UNREACHABLE  2
54
55
1.88G
#define AS_GC(op) _Py_AS_GC(op)
56
651M
#define FROM_GC(gc) _Py_FROM_GC(gc)
57
58
// Automatically choose the generation that needs collecting.
59
#define GENERATION_AUTO (-1)
60
61
static inline int
62
gc_is_collecting(PyGC_Head *g)
63
254M
{
64
254M
    return (g->_gc_prev & PREV_MASK_COLLECTING) != 0;
65
254M
}
66
67
static inline void
68
gc_clear_collecting(PyGC_Head *g)
69
82.2M
{
70
82.2M
    g->_gc_prev &= ~PREV_MASK_COLLECTING;
71
82.2M
}
72
73
static inline Py_ssize_t
74
gc_get_refs(PyGC_Head *g)
75
210M
{
76
210M
    return (Py_ssize_t)(g->_gc_prev >> _PyGC_PREV_SHIFT);
77
210M
}
78
79
static inline void
80
gc_set_refs(PyGC_Head *g, Py_ssize_t refs)
81
32.6M
{
82
32.6M
    g->_gc_prev = (g->_gc_prev & ~_PyGC_PREV_MASK)
83
32.6M
        | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
84
32.6M
}
85
86
static inline void
87
gc_reset_refs(PyGC_Head *g, Py_ssize_t refs)
88
87.6M
{
89
87.6M
    g->_gc_prev = (g->_gc_prev & _PyGC_PREV_MASK_FINALIZED)
90
87.6M
        | PREV_MASK_COLLECTING
91
87.6M
        | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
92
87.6M
}
93
94
static inline void
95
gc_decref(PyGC_Head *g)
96
53.6M
{
97
53.6M
    _PyObject_ASSERT_WITH_MSG(FROM_GC(g),
98
53.6M
                              gc_get_refs(g) > 0,
99
53.6M
                              "refcount is too small");
100
53.6M
    g->_gc_prev -= 1 << _PyGC_PREV_SHIFT;
101
53.6M
}
102
103
static inline int
104
gc_old_space(PyGC_Head *g)
105
272M
{
106
272M
    return g->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1;
107
272M
}
108
109
static inline int
110
other_space(int space)
111
1.76k
{
112
1.76k
    assert(space == 0 || space == 1);
113
1.76k
    return space ^ _PyGC_NEXT_MASK_OLD_SPACE_1;
114
1.76k
}
115
116
static inline void
117
gc_flip_old_space(PyGC_Head *g)
118
146M
{
119
146M
    g->_gc_next ^= _PyGC_NEXT_MASK_OLD_SPACE_1;
120
146M
}
121
122
static inline void
123
gc_set_old_space(PyGC_Head *g, int space)
124
76.8M
{
125
76.8M
    assert(space == 0 || space == _PyGC_NEXT_MASK_OLD_SPACE_1);
126
76.8M
    g->_gc_next &= ~_PyGC_NEXT_MASK_OLD_SPACE_1;
127
76.8M
    g->_gc_next |= space;
128
76.8M
}
129
130
static PyGC_Head *
131
GEN_HEAD(GCState *gcstate, int n)
132
0
{
133
0
    assert((gcstate->visited_space & (~1)) == 0);
134
0
    switch(n) {
135
0
        case 0:
136
0
            return &gcstate->young.head;
137
0
        case 1:
138
0
            return &gcstate->old[gcstate->visited_space].head;
139
0
        case 2:
140
0
            return &gcstate->old[gcstate->visited_space^1].head;
141
0
        default:
142
0
            Py_UNREACHABLE();
143
0
    }
144
0
}
145
146
static GCState *
147
get_gc_state(void)
148
22.0k
{
149
22.0k
    PyInterpreterState *interp = _PyInterpreterState_GET();
150
22.0k
    return &interp->gc;
151
22.0k
}
152
153
154
void
155
_PyGC_InitState(GCState *gcstate)
156
36
{
157
36
#define INIT_HEAD(GEN) \
158
144
    do { \
159
144
        GEN.head._gc_next = (uintptr_t)&GEN.head; \
160
144
        GEN.head._gc_prev = (uintptr_t)&GEN.head; \
161
144
    } while (0)
162
163
36
    assert(gcstate->young.count == 0);
164
36
    assert(gcstate->old[0].count == 0);
165
36
    assert(gcstate->old[1].count == 0);
166
36
    INIT_HEAD(gcstate->young);
167
36
    INIT_HEAD(gcstate->old[0]);
168
36
    INIT_HEAD(gcstate->old[1]);
169
36
    INIT_HEAD(gcstate->permanent_generation);
170
171
36
#undef INIT_HEAD
172
36
}
173
174
175
PyStatus
176
_PyGC_Init(PyInterpreterState *interp)
177
36
{
178
36
    GCState *gcstate = &interp->gc;
179
180
36
    gcstate->generation_stats = PyMem_RawCalloc(1, sizeof(struct gc_stats));
181
36
    if (gcstate->generation_stats == NULL) {
182
0
        return _PyStatus_NO_MEMORY();
183
0
    }
184
185
36
    gcstate->garbage = PyList_New(0);
186
36
    if (gcstate->garbage == NULL) {
187
0
        return _PyStatus_NO_MEMORY();
188
0
    }
189
190
36
    gcstate->callbacks = PyList_New(0);
191
36
    if (gcstate->callbacks == NULL) {
192
0
        return _PyStatus_NO_MEMORY();
193
0
    }
194
36
    gcstate->heap_size = 0;
195
196
36
    return _PyStatus_OK();
197
36
}
198
199
200
/*
201
_gc_prev values
202
---------------
203
204
Between collections, _gc_prev is used for doubly linked list.
205
206
Lowest two bits of _gc_prev are used for flags.
207
PREV_MASK_COLLECTING is used only while collecting and cleared before GC ends
208
or _PyObject_GC_UNTRACK() is called.
209
210
During a collection, _gc_prev is temporary used for gc_refs, and the gc list
211
is singly linked until _gc_prev is restored.
212
213
gc_refs
214
    At the start of a collection, update_refs() copies the true refcount
215
    to gc_refs, for each object in the generation being collected.
216
    subtract_refs() then adjusts gc_refs so that it equals the number of
217
    times an object is referenced directly from outside the generation
218
    being collected.
219
220
PREV_MASK_COLLECTING
221
    Objects in generation being collected are marked PREV_MASK_COLLECTING in
222
    update_refs().
223
224
225
_gc_next values
226
---------------
227
228
_gc_next takes these values:
229
230
0
231
    The object is not tracked
232
233
!= 0
234
    Pointer to the next object in the GC list.
235
    Additionally, lowest bit is used temporary for
236
    NEXT_MASK_UNREACHABLE flag described below.
237
238
NEXT_MASK_UNREACHABLE
239
    move_unreachable() then moves objects not reachable (whether directly or
240
    indirectly) from outside the generation into an "unreachable" set and
241
    set this flag.
242
243
    Objects that are found to be reachable have gc_refs set to 1.
244
    When this flag is set for the reachable object, the object must be in
245
    "unreachable" set.
246
    The flag is unset and the object is moved back to "reachable" set.
247
248
    move_legacy_finalizers() will remove this flag from "unreachable" set.
249
*/
250
251
/*** list functions ***/
252
253
static inline void
254
gc_list_init(PyGC_Head *list)
255
312k
{
256
    // List header must not have flags.
257
    // We can assign pointer by simple cast.
258
312k
    list->_gc_prev = (uintptr_t)list;
259
312k
    list->_gc_next = (uintptr_t)list;
260
312k
}
261
262
static inline int
263
gc_list_is_empty(PyGC_Head *list)
264
158M
{
265
158M
    return (list->_gc_next == (uintptr_t)list);
266
158M
}
267
268
/* Append `node` to `list`. */
269
static inline void
270
gc_list_append(PyGC_Head *node, PyGC_Head *list)
271
11.7M
{
272
11.7M
    assert((list->_gc_prev & ~_PyGC_PREV_MASK) == 0);
273
11.7M
    PyGC_Head *last = (PyGC_Head *)list->_gc_prev;
274
275
    // last <-> node
276
11.7M
    _PyGCHead_SET_PREV(node, last);
277
11.7M
    _PyGCHead_SET_NEXT(last, node);
278
279
    // node <-> list
280
11.7M
    _PyGCHead_SET_NEXT(node, list);
281
11.7M
    list->_gc_prev = (uintptr_t)node;
282
11.7M
}
283
284
/* Remove `node` from the gc list it's currently in. */
285
static inline void
286
gc_list_remove(PyGC_Head *node)
287
0
{
288
0
    PyGC_Head *prev = GC_PREV(node);
289
0
    PyGC_Head *next = GC_NEXT(node);
290
291
0
    _PyGCHead_SET_NEXT(prev, next);
292
0
    _PyGCHead_SET_PREV(next, prev);
293
294
0
    node->_gc_next = 0; /* object is not currently tracked */
295
0
}
296
297
/* Move `node` from the gc list it's currently in (which is not explicitly
298
 * named here) to the end of `list`.  This is semantically the same as
299
 * gc_list_remove(node) followed by gc_list_append(node, list).
300
 */
301
static void
302
gc_list_move(PyGC_Head *node, PyGC_Head *list)
303
303M
{
304
    /* Unlink from current list. */
305
303M
    PyGC_Head *from_prev = GC_PREV(node);
306
303M
    PyGC_Head *from_next = GC_NEXT(node);
307
303M
    _PyGCHead_SET_NEXT(from_prev, from_next);
308
303M
    _PyGCHead_SET_PREV(from_next, from_prev);
309
310
    /* Relink at end of new list. */
311
    // list must not have flags.  So we can skip macros.
312
303M
    PyGC_Head *to_prev = (PyGC_Head*)list->_gc_prev;
313
303M
    _PyGCHead_SET_PREV(node, to_prev);
314
303M
    _PyGCHead_SET_NEXT(to_prev, node);
315
303M
    list->_gc_prev = (uintptr_t)node;
316
303M
    _PyGCHead_SET_NEXT(node, list);
317
303M
}
318
319
/* append list `from` onto list `to`; `from` becomes an empty list */
320
static void
321
gc_list_merge(PyGC_Head *from, PyGC_Head *to)
322
132k
{
323
132k
    assert(from != to);
324
132k
    if (!gc_list_is_empty(from)) {
325
65.7k
        PyGC_Head *to_tail = GC_PREV(to);
326
65.7k
        PyGC_Head *from_head = GC_NEXT(from);
327
65.7k
        PyGC_Head *from_tail = GC_PREV(from);
328
65.7k
        assert(from_head != from);
329
65.7k
        assert(from_tail != from);
330
65.7k
        assert(gc_list_is_empty(to) ||
331
65.7k
            gc_old_space(to_tail) == gc_old_space(from_tail));
332
333
65.7k
        _PyGCHead_SET_NEXT(to_tail, from_head);
334
65.7k
        _PyGCHead_SET_PREV(from_head, to_tail);
335
336
65.7k
        _PyGCHead_SET_NEXT(from_tail, to);
337
65.7k
        _PyGCHead_SET_PREV(to, from_tail);
338
65.7k
    }
339
132k
    gc_list_init(from);
340
132k
}
341
342
static Py_ssize_t
343
gc_list_size(PyGC_Head *list)
344
44.0k
{
345
44.0k
    PyGC_Head *gc;
346
44.0k
    Py_ssize_t n = 0;
347
85.3M
    for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
348
85.2M
        n++;
349
85.2M
    }
350
44.0k
    return n;
351
44.0k
}
352
353
/* Walk the list and mark all objects as non-collecting */
354
static inline void
355
gc_list_clear_collecting(PyGC_Head *collectable)
356
22.0k
{
357
22.0k
    PyGC_Head *gc;
358
8.94M
    for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) {
359
8.91M
        gc_clear_collecting(gc);
360
8.91M
    }
361
22.0k
}
362
363
/* Append objects in a GC list to a Python list.
364
 * Return 0 if all OK, < 0 if error (out of memory for list)
365
 */
366
static int
367
append_objects(PyObject *py_list, PyGC_Head *gc_list)
368
0
{
369
0
    PyGC_Head *gc;
370
0
    for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
371
0
        PyObject *op = FROM_GC(gc);
372
0
        if (op != py_list) {
373
0
            if (PyList_Append(py_list, op)) {
374
0
                return -1; /* exception */
375
0
            }
376
0
        }
377
0
    }
378
0
    return 0;
379
0
}
380
381
// Constants for validate_list's flags argument.
382
enum flagstates {collecting_clear_unreachable_clear,
383
                 collecting_clear_unreachable_set,
384
                 collecting_set_unreachable_clear,
385
                 collecting_set_unreachable_set};
386
387
#ifdef GC_DEBUG
388
// validate_list checks list consistency.  And it works as document
389
// describing when flags are expected to be set / unset.
390
// `head` must be a doubly-linked gc list, although it's fine (expected!) if
391
// the prev and next pointers are "polluted" with flags.
392
// What's checked:
393
// - The `head` pointers are not polluted.
394
// - The objects' PREV_MASK_COLLECTING and NEXT_MASK_UNREACHABLE flags are all
395
//   `set or clear, as specified by the 'flags' argument.
396
// - The prev and next pointers are mutually consistent.
397
static void
398
validate_list(PyGC_Head *head, enum flagstates flags)
399
{
400
    assert((head->_gc_prev & ~_PyGC_PREV_MASK) == 0);
401
    assert((head->_gc_next & ~_PyGC_PREV_MASK) == 0);
402
    uintptr_t prev_value = 0, next_value = 0;
403
    switch (flags) {
404
        case collecting_clear_unreachable_clear:
405
            break;
406
        case collecting_set_unreachable_clear:
407
            prev_value = PREV_MASK_COLLECTING;
408
            break;
409
        case collecting_clear_unreachable_set:
410
            next_value = NEXT_MASK_UNREACHABLE;
411
            break;
412
        case collecting_set_unreachable_set:
413
            prev_value = PREV_MASK_COLLECTING;
414
            next_value = NEXT_MASK_UNREACHABLE;
415
            break;
416
        default:
417
            assert(! "bad internal flags argument");
418
    }
419
    PyGC_Head *prev = head;
420
    PyGC_Head *gc = GC_NEXT(head);
421
    while (gc != head) {
422
        PyGC_Head *trueprev = GC_PREV(gc);
423
        PyGC_Head *truenext = GC_NEXT(gc);
424
        assert(truenext != NULL);
425
        assert(trueprev == prev);
426
        assert((gc->_gc_prev & PREV_MASK_COLLECTING) == prev_value);
427
        assert((gc->_gc_next & NEXT_MASK_UNREACHABLE) == next_value);
428
        prev = gc;
429
        gc = truenext;
430
    }
431
    assert(prev == GC_PREV(head));
432
}
433
434
#else
435
286k
#define validate_list(x, y) do{}while(0)
436
#endif
437
438
#ifdef GC_EXTRA_DEBUG
439
440
441
static void
442
gc_list_validate_space(PyGC_Head *head, int space) {
443
    PyGC_Head *gc = GC_NEXT(head);
444
    while (gc != head) {
445
        assert(gc_old_space(gc) == space);
446
        gc = GC_NEXT(gc);
447
    }
448
}
449
450
static void
451
validate_spaces(GCState *gcstate)
452
{
453
    int visited = gcstate->visited_space;
454
    int not_visited = other_space(visited);
455
    gc_list_validate_space(&gcstate->young.head, not_visited);
456
    for (int space = 0; space < 2; space++) {
457
        gc_list_validate_space(&gcstate->old[space].head, space);
458
    }
459
    gc_list_validate_space(&gcstate->permanent_generation.head, visited);
460
}
461
462
static void
463
validate_consistent_old_space(PyGC_Head *head)
464
{
465
    PyGC_Head *gc = GC_NEXT(head);
466
    if (gc == head) {
467
        return;
468
    }
469
    int old_space = gc_old_space(gc);
470
    while (gc != head) {
471
        PyGC_Head *truenext = GC_NEXT(gc);
472
        assert(truenext != NULL);
473
        assert(gc_old_space(gc) == old_space);
474
        gc = truenext;
475
    }
476
}
477
478
479
#else
480
120k
#define validate_spaces(g) do{}while(0)
481
110k
#define validate_consistent_old_space(l) do{}while(0)
482
113k
#define gc_list_validate_space(l, s) do{}while(0)
483
#endif
484
485
/*** end of list stuff ***/
486
487
488
/* Set all gc_refs = ob_refcnt.  After this, gc_refs is > 0 and
489
 * PREV_MASK_COLLECTING bit is set for all objects in containers.
490
 */
491
static Py_ssize_t
492
update_refs(PyGC_Head *containers)
493
44.0k
{
494
44.0k
    PyGC_Head *next;
495
44.0k
    PyGC_Head *gc = GC_NEXT(containers);
496
44.0k
    Py_ssize_t candidates = 0;
497
498
87.7M
    while (gc != containers) {
499
87.6M
        next = GC_NEXT(gc);
500
87.6M
        PyObject *op = FROM_GC(gc);
501
87.6M
        if (_Py_IsImmortal(op)) {
502
0
            assert(!_Py_IsStaticImmortal(op));
503
0
            _PyObject_GC_UNTRACK(op);
504
0
           gc = next;
505
0
           continue;
506
0
        }
507
87.6M
        gc_reset_refs(gc, Py_REFCNT(op));
508
        /* Python's cyclic gc should never see an incoming refcount
509
         * of 0:  if something decref'ed to 0, it should have been
510
         * deallocated immediately at that time.
511
         * Possible cause (if the assert triggers):  a tp_dealloc
512
         * routine left a gc-aware object tracked during its teardown
513
         * phase, and did something-- or allowed something to happen --
514
         * that called back into Python.  gc can trigger then, and may
515
         * see the still-tracked dying object.  Before this assert
516
         * was added, such mistakes went on to allow gc to try to
517
         * delete the object again.  In a debug build, that caused
518
         * a mysterious segfault, when _Py_ForgetReference tried
519
         * to remove the object from the doubly-linked list of all
520
         * objects a second time.  In a release build, an actual
521
         * double deallocation occurred, which leads to corruption
522
         * of the allocator's internal bookkeeping pointers.  That's
523
         * so serious that maybe this should be a release-build
524
         * check instead of an assert?
525
         */
526
87.6M
        _PyObject_ASSERT(op, gc_get_refs(gc) != 0);
527
87.6M
        gc = next;
528
87.6M
        candidates++;
529
87.6M
    }
530
44.0k
    return candidates;
531
44.0k
}
532
533
struct visit_decref_context {
534
    PyObject *parent;
535
    struct gc_generation_stats *stats;
536
};
537
538
/* A traversal callback for subtract_refs. */
539
static int
540
visit_decref(PyObject *op, void *arg)
541
267M
{
542
267M
    OBJECT_STAT_INC(object_visits);
543
267M
    struct visit_decref_context *ctx = (struct visit_decref_context *)arg;
544
267M
    ctx->stats->object_visits += 1;
545
267M
    _PyObject_ASSERT(ctx->parent, !_PyObject_IsFreed(op));
546
547
267M
    if (_PyObject_IS_GC(op)) {
548
143M
        PyGC_Head *gc = AS_GC(op);
549
        /* We're only interested in gc_refs for objects in the
550
         * generation being collected, which can be recognized
551
         * because only they have positive gc_refs.
552
         */
553
143M
        if (gc_is_collecting(gc)) {
554
53.6M
            gc_decref(gc);
555
53.6M
        }
556
143M
    }
557
267M
    return 0;
558
267M
}
559
560
int
561
_PyGC_VisitStackRef(_PyStackRef *ref, visitproc visit, void *arg)
562
2.57M
{
563
    // This is a bit tricky! We want to ignore stackrefs with embedded
564
    // refcounts when computing the incoming references, but otherwise treat
565
    // them like normal.
566
2.57M
    assert(!PyStackRef_IsTaggedInt(*ref));
567
2.57M
    if (!PyStackRef_RefcountOnObject(*ref) && (visit == visit_decref)) {
568
445
        return 0;
569
445
    }
570
2.57M
    Py_VISIT(PyStackRef_AsPyObjectBorrow(*ref));
571
2.57M
    return 0;
572
2.57M
}
573
574
int
575
_PyGC_VisitFrameStack(_PyInterpreterFrame *frame, visitproc visit, void *arg)
576
480k
{
577
480k
    _PyStackRef *ref = _PyFrame_GetLocalsArray(frame);
578
    /* locals and stack */
579
2.75M
    for (; ref < frame->stackpointer; ref++) {
580
2.26M
        if (!PyStackRef_IsTaggedInt(*ref)) {
581
2.26M
            _Py_VISIT_STACKREF(*ref);
582
2.26M
        }
583
2.26M
    }
584
480k
    return 0;
585
480k
}
586
587
/* Subtract internal references from gc_refs.  After this, gc_refs is >= 0
588
 * for all objects in containers. The ones with gc_refs > 0 are directly
589
 * reachable from outside containers, and so can't be collected.
590
 */
591
static void
592
subtract_refs(PyGC_Head *containers, struct gc_generation_stats *stats)
593
44.0k
{
594
44.0k
    traverseproc traverse;
595
44.0k
    PyGC_Head *gc = GC_NEXT(containers);
596
87.7M
    for (; gc != containers; gc = GC_NEXT(gc)) {
597
87.6M
        PyObject *op = FROM_GC(gc);
598
87.6M
        traverse = Py_TYPE(op)->tp_traverse;
599
87.6M
        struct visit_decref_context ctx = {
600
87.6M
            .parent = op,
601
87.6M
            .stats = stats
602
87.6M
        };
603
87.6M
        (void) traverse(op,
604
87.6M
                        visit_decref,
605
87.6M
                        &ctx);
606
87.6M
    }
607
44.0k
}
608
609
struct visit_reachable_context {
610
    PyGC_Head *head;
611
    struct gc_generation_stats *stats;
612
};
613
614
/* A traversal callback for move_unreachable. */
615
static int
616
visit_reachable(PyObject *op, void *arg)
617
205M
{
618
205M
    struct visit_reachable_context *ctx = (struct visit_reachable_context *)arg;
619
205M
    ctx->stats->object_visits += 1;
620
205M
    PyGC_Head *reachable = ctx->head;
621
205M
    OBJECT_STAT_INC(object_visits);
622
205M
    if (!_PyObject_IS_GC(op)) {
623
94.5M
        return 0;
624
94.5M
    }
625
626
111M
    PyGC_Head *gc = AS_GC(op);
627
111M
    const Py_ssize_t gc_refs = gc_get_refs(gc);
628
629
    // Ignore objects in other generation.
630
    // This also skips objects "to the left" of the current position in
631
    // move_unreachable's scan of the 'young' list - they've already been
632
    // traversed, and no longer have the PREV_MASK_COLLECTING flag.
633
111M
    if (! gc_is_collecting(gc)) {
634
77.5M
        return 0;
635
77.5M
    }
636
    // It would be a logic error elsewhere if the collecting flag were set on
637
    // an untracked object.
638
33.5M
    _PyObject_ASSERT(op, gc->_gc_next != 0);
639
640
33.5M
    if (gc->_gc_next & NEXT_MASK_UNREACHABLE) {
641
        /* This had gc_refs = 0 when move_unreachable got
642
         * to it, but turns out it's reachable after all.
643
         * Move it back to move_unreachable's 'young' list,
644
         * and move_unreachable will eventually get to it
645
         * again.
646
         */
647
        // Manually unlink gc from unreachable list because the list functions
648
        // don't work right in the presence of NEXT_MASK_UNREACHABLE flags.
649
11.7M
        PyGC_Head *prev = GC_PREV(gc);
650
11.7M
        PyGC_Head *next = GC_NEXT(gc);
651
11.7M
        _PyObject_ASSERT(FROM_GC(prev),
652
11.7M
                         prev->_gc_next & NEXT_MASK_UNREACHABLE);
653
11.7M
        _PyObject_ASSERT(FROM_GC(next),
654
11.7M
                         next->_gc_next & NEXT_MASK_UNREACHABLE);
655
11.7M
        prev->_gc_next = gc->_gc_next;  // copy flag bits
656
11.7M
        gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
657
11.7M
        _PyGCHead_SET_PREV(next, prev);
658
659
11.7M
        gc_list_append(gc, reachable);
660
11.7M
        gc_set_refs(gc, 1);
661
11.7M
    }
662
21.8M
    else if (gc_refs == 0) {
663
        /* This is in move_unreachable's 'young' list, but
664
         * the traversal hasn't yet gotten to it.  All
665
         * we need to do is tell move_unreachable that it's
666
         * reachable.
667
         */
668
20.9M
        gc_set_refs(gc, 1);
669
20.9M
    }
670
    /* Else there's nothing to do.
671
     * If gc_refs > 0, it must be in move_unreachable's 'young'
672
     * list, and move_unreachable will eventually get to it.
673
     */
674
905k
    else {
675
905k
        _PyObject_ASSERT_WITH_MSG(op, gc_refs > 0, "refcount is too small");
676
905k
    }
677
33.5M
    return 0;
678
111M
}
679
680
/* Move the unreachable objects from young to unreachable.  After this,
681
 * all objects in young don't have PREV_MASK_COLLECTING flag and
682
 * unreachable have the flag.
683
 * All objects in young after this are directly or indirectly reachable
684
 * from outside the original young; and all objects in unreachable are
685
 * not.
686
 *
687
 * This function restores _gc_prev pointer.  young and unreachable are
688
 * doubly linked list after this function.
689
 * But _gc_next in unreachable list has NEXT_MASK_UNREACHABLE flag.
690
 * So we can not gc_list_* functions for unreachable until we remove the flag.
691
 */
692
static void
693
move_unreachable(PyGC_Head *young, PyGC_Head *unreachable, struct gc_generation_stats *stats)
694
44.0k
{
695
    // previous elem in the young list, used for restore gc_prev.
696
44.0k
    PyGC_Head *prev = young;
697
44.0k
    PyGC_Head *gc = GC_NEXT(young);
698
699
    /* Invariants:  all objects "to the left" of us in young are reachable
700
     * (directly or indirectly) from outside the young list as it was at entry.
701
     *
702
     * All other objects from the original young "to the left" of us are in
703
     * unreachable now, and have NEXT_MASK_UNREACHABLE.  All objects to the
704
     * left of us in 'young' now have been scanned, and no objects here
705
     * or to the right have been scanned yet.
706
     */
707
708
44.0k
    struct visit_reachable_context ctx = {
709
44.0k
        .head = young,
710
44.0k
        .stats = stats
711
44.0k
    };
712
713
44.0k
    validate_consistent_old_space(young);
714
    /* Record which old space we are in, and set NEXT_MASK_UNREACHABLE bit for convenience */
715
44.0k
    uintptr_t flags = NEXT_MASK_UNREACHABLE | (gc->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1);
716
99.4M
    while (gc != young) {
717
99.3M
        if (gc_get_refs(gc)) {
718
            /* gc is definitely reachable from outside the
719
             * original 'young'.  Mark it as such, and traverse
720
             * its pointers to find any other objects that may
721
             * be directly reachable from it.  Note that the
722
             * call to tp_traverse may append objects to young,
723
             * so we have to wait until it returns to determine
724
             * the next object to visit.
725
             */
726
69.8M
            PyObject *op = FROM_GC(gc);
727
69.8M
            traverseproc traverse = Py_TYPE(op)->tp_traverse;
728
69.8M
            _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(gc) > 0,
729
69.8M
                                      "refcount is too small");
730
            // NOTE: visit_reachable may change gc->_gc_next when
731
            // young->_gc_prev == gc.  Don't do gc = GC_NEXT(gc) before!
732
69.8M
            (void) traverse(op,
733
69.8M
                    visit_reachable,
734
69.8M
                    &ctx);
735
            // relink gc_prev to prev element.
736
69.8M
            _PyGCHead_SET_PREV(gc, prev);
737
            // gc is not COLLECTING state after here.
738
69.8M
            gc_clear_collecting(gc);
739
69.8M
            prev = gc;
740
69.8M
        }
741
29.5M
        else {
742
            /* This *may* be unreachable.  To make progress,
743
             * assume it is.  gc isn't directly reachable from
744
             * any object we've already traversed, but may be
745
             * reachable from an object we haven't gotten to yet.
746
             * visit_reachable will eventually move gc back into
747
             * young if that's so, and we'll see it again.
748
             */
749
            // Move gc to unreachable.
750
            // No need to gc->next->prev = prev because it is single linked.
751
29.5M
            prev->_gc_next = gc->_gc_next;
752
753
            // We can't use gc_list_append() here because we use
754
            // NEXT_MASK_UNREACHABLE here.
755
29.5M
            PyGC_Head *last = GC_PREV(unreachable);
756
            // NOTE: Since all objects in unreachable set has
757
            // NEXT_MASK_UNREACHABLE flag, we set it unconditionally.
758
            // But this may pollute the unreachable list head's 'next' pointer
759
            // too. That's semantically senseless but expedient here - the
760
            // damage is repaired when this function ends.
761
29.5M
            last->_gc_next = flags | (uintptr_t)gc;
762
29.5M
            _PyGCHead_SET_PREV(gc, last);
763
29.5M
            gc->_gc_next = flags | (uintptr_t)unreachable;
764
29.5M
            unreachable->_gc_prev = (uintptr_t)gc;
765
29.5M
        }
766
99.3M
        gc = _PyGCHead_NEXT(prev);
767
99.3M
    }
768
    // young->_gc_prev must be last element remained in the list.
769
44.0k
    young->_gc_prev = (uintptr_t)prev;
770
44.0k
    young->_gc_next &= _PyGC_PREV_MASK;
771
    // don't let the pollution of the list head's next pointer leak
772
44.0k
    unreachable->_gc_next &= _PyGC_PREV_MASK;
773
44.0k
}
774
775
/* In theory, all tuples should be younger than the
776
* objects they refer to, as tuples are immortal.
777
* Therefore, untracking tuples in oldest-first order in the
778
* young generation before promoting them should have tracked
779
* all the tuples that can be untracked.
780
*
781
* Unfortunately, the C API allows tuples to be created
782
* and then filled in. So this won't untrack all tuples
783
* that can be untracked. It should untrack most of them
784
* and is much faster than a more complex approach that
785
* would untrack all relevant tuples.
786
*/
787
static void
788
untrack_tuples(PyGC_Head *head)
789
45.8k
{
790
45.8k
    PyGC_Head *gc = GC_NEXT(head);
791
219M
    while (gc != head) {
792
219M
        PyObject *op = FROM_GC(gc);
793
219M
        PyGC_Head *next = GC_NEXT(gc);
794
219M
        if (PyTuple_CheckExact(op)) {
795
93.6M
            _PyTuple_MaybeUntrack(op);
796
93.6M
        }
797
219M
        gc = next;
798
219M
    }
799
45.8k
}
800
801
/* Return true if object has a pre-PEP 442 finalization method. */
802
static int
803
has_legacy_finalizer(PyObject *op)
804
8.91M
{
805
8.91M
    return Py_TYPE(op)->tp_del != NULL;
806
8.91M
}
807
808
/* Move the objects in unreachable with tp_del slots into `finalizers`.
809
 *
810
 * This function also removes NEXT_MASK_UNREACHABLE flag
811
 * from _gc_next in unreachable.
812
 */
813
static void
814
move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
815
22.0k
{
816
22.0k
    PyGC_Head *gc, *next;
817
22.0k
    _PyObject_ASSERT(
818
22.0k
        FROM_GC(unreachable),
819
22.0k
        (unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
820
821
    /* March over unreachable.  Move objects with finalizers into
822
     * `finalizers`.
823
     */
824
8.94M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
825
8.91M
        PyObject *op = FROM_GC(gc);
826
827
8.91M
        _PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE);
828
8.91M
        next = GC_NEXT(gc);
829
8.91M
        gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
830
831
8.91M
        if (has_legacy_finalizer(op)) {
832
0
            gc_clear_collecting(gc);
833
0
            gc_list_move(gc, finalizers);
834
0
        }
835
8.91M
    }
836
22.0k
}
837
838
static inline void
839
clear_unreachable_mask(PyGC_Head *unreachable)
840
22.0k
{
841
    /* Check that the list head does not have the unreachable bit set */
842
22.0k
    _PyObject_ASSERT(
843
22.0k
        FROM_GC(unreachable),
844
22.0k
        ((uintptr_t)unreachable & NEXT_MASK_UNREACHABLE) == 0);
845
22.0k
    _PyObject_ASSERT(
846
22.0k
        FROM_GC(unreachable),
847
22.0k
        (unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
848
849
22.0k
    PyGC_Head *gc, *next;
850
8.94M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
851
8.91M
        _PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE);
852
8.91M
        next = GC_NEXT(gc);
853
8.91M
        gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
854
8.91M
    }
855
22.0k
    validate_list(unreachable, collecting_set_unreachable_clear);
856
22.0k
}
857
858
/* A traversal callback for move_legacy_finalizer_reachable. */
859
static int
860
visit_move(PyObject *op, void *arg)
861
0
{
862
0
    struct visit_reachable_context *ctx = (struct visit_reachable_context *)arg;
863
0
    PyGC_Head *tolist = ctx->head;
864
0
    ctx->stats->object_visits += 1;
865
0
    OBJECT_STAT_INC(object_visits);
866
0
    if (_PyObject_IS_GC(op)) {
867
0
        PyGC_Head *gc = AS_GC(op);
868
0
        if (gc_is_collecting(gc)) {
869
0
            gc_list_move(gc, tolist);
870
0
            gc_clear_collecting(gc);
871
0
        }
872
0
    }
873
0
    return 0;
874
0
}
875
876
/* Move objects that are reachable from finalizers, from the unreachable set
877
 * into finalizers set.
878
 */
879
static void
880
move_legacy_finalizer_reachable(PyGC_Head *finalizers, struct gc_generation_stats *stats)
881
22.0k
{
882
22.0k
    struct visit_reachable_context ctx = {
883
22.0k
        .head = finalizers,
884
22.0k
        .stats = stats
885
22.0k
    };
886
22.0k
    traverseproc traverse;
887
22.0k
    PyGC_Head *gc = GC_NEXT(finalizers);
888
22.0k
    for (; gc != finalizers; gc = GC_NEXT(gc)) {
889
        /* Note that the finalizers list may grow during this. */
890
0
        traverse = Py_TYPE(FROM_GC(gc))->tp_traverse;
891
0
        (void) traverse(FROM_GC(gc),
892
0
                        visit_move,
893
0
                        &ctx);
894
0
    }
895
22.0k
}
896
897
/* Handle weakref callbacks.  Note that it's possible for such weakrefs to be
898
 * outside the unreachable set -- indeed, those are precisely the weakrefs
899
 * whose callbacks must be invoked.  See gc_weakref.txt for overview & some
900
 * details.
901
 *
902
 * The clearing of weakrefs is suble and must be done carefully, as there was
903
 * previous bugs related to this.  First, weakrefs to the unreachable set of
904
 * objects must be cleared before we start calling `tp_clear`.  If we don't,
905
 * those weakrefs can reveal unreachable objects to Python-level code and that
906
 * is not safe.  Many objects are not usable after `tp_clear` has been called
907
 * and could even cause crashes if accessed (see bpo-38006 and gh-91636 as
908
 * example bugs).
909
 *
910
 * Weakrefs with callbacks always need to be cleared before executing the
911
 * callback.  That's because sometimes a callback will call the ref object,
912
 * to check if the reference is actually dead (KeyedRef does this, for
913
 * example).  We want to indicate that it is dead, even though it is possible
914
 * a finalizer might resurrect it.  Clearing also prevents the callback from
915
 * executing more than once.
916
 *
917
 * Since Python 2.3, all weakrefs to cyclic garbage have been cleared *before*
918
 * calling finalizers.  However, since tp_subclasses started being necessary
919
 * to invalidate caches (e.g. by PyType_Modified), that clearing has created
920
 * a bug.  If the weakref to the subclass is cleared before a finalizer is
921
 * called, the cache may not be correctly invalidated.  That can lead to
922
 * segfaults since the caches can refer to deallocated objects (GH-135552
923
 * is an example).  Now, we delay the clear of weakrefs without callbacks
924
 * until *after* finalizers have been executed.  That means weakrefs without
925
 * callbacks are still usable while finalizers are being executed.
926
 *
927
 * The weakrefs that are inside the unreachable set must also be cleared.
928
 * The reason this is required is not immediately obvious.  If the weakref
929
 * refers to an object outside of the unreachable set, that object might go
930
 * away when we start clearing objects.  Normally, the object should also be
931
 * part of the unreachable set but that's not true in the case of incomplete
932
 * or missing `tp_traverse` methods.  When that object goes away, the callback
933
 * for weakref can be executed and that could reveal unreachable objects to
934
 * Python-level code.  See bpo-38006 as an example bug.
935
 */
936
static int
937
handle_weakref_callbacks(PyGC_Head *unreachable, PyGC_Head *old)
938
22.0k
{
939
22.0k
    PyGC_Head *gc;
940
22.0k
    PyGC_Head wrcb_to_call;     /* weakrefs with callbacks to call */
941
22.0k
    PyGC_Head *next;
942
22.0k
    int num_freed = 0;
943
944
22.0k
    gc_list_init(&wrcb_to_call);
945
946
    /* Find all weakrefs with callbacks and move into `wrcb_to_call` if the
947
     * callback needs to be invoked. We make another pass over wrcb_to_call,
948
     * invoking callbacks, after this pass completes.
949
     */
950
8.94M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
951
8.91M
        PyWeakReference **wrlist;
952
953
8.91M
        PyObject *op = FROM_GC(gc);
954
8.91M
        next = GC_NEXT(gc);
955
956
8.91M
        if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) {
957
6.16M
            continue;
958
6.16M
        }
959
960
        /* It supports weakrefs.  Does it have any?
961
         *
962
         * This is never triggered for static types so we can avoid the
963
         * (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR().
964
         */
965
2.74M
        wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op);
966
967
        /* `op` may have some weakrefs.  March over the list and move the
968
         * weakrefs with callbacks that must be called into wrcb_to_call.
969
         */
970
2.74M
        PyWeakReference *next_wr;
971
3.01M
        for (PyWeakReference *wr = *wrlist; wr != NULL; wr = next_wr) {
972
            // Get the next list element to get iterator progress if we omit
973
            // clearing of the weakref (because _PyWeakref_ClearRef changes
974
            // next pointer in the wrlist).
975
267k
            next_wr = wr->wr_next;
976
977
267k
            if (wr->wr_callback == NULL) {
978
                /* no callback */
979
227k
                continue;
980
227k
            }
981
982
            // Clear the weakref.  See the comments above this function for
983
            // reasons why we need to clear weakrefs that have callbacks.
984
            // Note that _PyWeakref_ClearRef clears the weakref but leaves the
985
            // callback pointer intact.  Obscure: it also changes *wrlist.
986
40.3k
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
987
40.3k
            _PyWeakref_ClearRef(wr);
988
40.3k
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
989
990
            /* Headache time.  `op` is going away, and is weakly referenced by
991
             * `wr`, which has a callback.  Should the callback be invoked?  If wr
992
             * is also trash, no:
993
             *
994
             * 1. There's no need to call it.  The object and the weakref are
995
             *    both going away, so it's legitimate to pretend the weakref is
996
             *    going away first.  The user has to ensure a weakref outlives its
997
             *    referent if they want a guarantee that the wr callback will get
998
             *    invoked.
999
             *
1000
             * 2. It may be catastrophic to call it.  If the callback is also in
1001
             *    cyclic trash (CT), then although the CT is unreachable from
1002
             *    outside the current generation, CT may be reachable from the
1003
             *    callback.  Then the callback could resurrect insane objects.
1004
             *
1005
             * Since the callback is never needed and may be unsafe in this
1006
             * case, wr is simply left in the unreachable set.  Note that
1007
             * clear_weakrefs() will ensure its callback will not trigger
1008
             * inside delete_garbage().
1009
             *
1010
             * OTOH, if wr isn't part of CT, we should invoke the callback:  the
1011
             * weakref outlived the trash.  Note that since wr isn't CT in this
1012
             * case, its callback can't be CT either -- wr acted as an external
1013
             * root to this generation, and therefore its callback did too.  So
1014
             * nothing in CT is reachable from the callback either, so it's hard
1015
             * to imagine how calling it later could create a problem for us.  wr
1016
             * is moved to wrcb_to_call in this case.
1017
             */
1018
40.3k
            if (gc_is_collecting(AS_GC((PyObject *)wr))) {
1019
0
                continue;
1020
0
            }
1021
1022
            /* Create a new reference so that wr can't go away
1023
             * before we can process it again.
1024
             */
1025
40.3k
            Py_INCREF(wr);
1026
1027
            /* Move wr to wrcb_to_call, for the next pass. */
1028
40.3k
            PyGC_Head *wrasgc = AS_GC((PyObject *)wr);
1029
            // wrasgc is reachable, but next isn't, so they can't be the same
1030
40.3k
            _PyObject_ASSERT((PyObject *)wr, wrasgc != next);
1031
40.3k
            gc_list_move(wrasgc, &wrcb_to_call);
1032
40.3k
        }
1033
2.74M
    }
1034
1035
    /* Invoke the callbacks we decided to honor.  It's safe to invoke them
1036
     * because they can't reference unreachable objects.
1037
     */
1038
22.0k
    int visited_space = get_gc_state()->visited_space;
1039
62.4k
    while (! gc_list_is_empty(&wrcb_to_call)) {
1040
40.3k
        PyObject *temp;
1041
40.3k
        PyObject *callback;
1042
1043
40.3k
        gc = (PyGC_Head*)wrcb_to_call._gc_next;
1044
40.3k
        PyObject *op = FROM_GC(gc);
1045
40.3k
        _PyObject_ASSERT(op, PyWeakref_Check(op));
1046
40.3k
        PyWeakReference *wr = (PyWeakReference *)op;
1047
40.3k
        callback = wr->wr_callback;
1048
40.3k
        _PyObject_ASSERT(op, callback != NULL);
1049
1050
        /* copy-paste of weakrefobject.c's handle_callback() */
1051
40.3k
        temp = PyObject_CallOneArg(callback, (PyObject *)wr);
1052
40.3k
        if (temp == NULL) {
1053
0
            PyErr_FormatUnraisable("Exception ignored on "
1054
0
                                   "calling weakref callback %R", callback);
1055
0
        }
1056
40.3k
        else {
1057
40.3k
            Py_DECREF(temp);
1058
40.3k
        }
1059
1060
        /* Give up the reference we created in the first pass.  When
1061
         * op's refcount hits 0 (which it may or may not do right now),
1062
         * op's tp_dealloc will decref op->wr_callback too.  Note
1063
         * that the refcount probably will hit 0 now, and because this
1064
         * weakref was reachable to begin with, gc didn't already
1065
         * add it to its count of freed objects.  Example:  a reachable
1066
         * weak value dict maps some key to this reachable weakref.
1067
         * The callback removes this key->weakref mapping from the
1068
         * dict, leaving no other references to the weakref (excepting
1069
         * ours).
1070
         */
1071
40.3k
        Py_DECREF(op);
1072
40.3k
        if (wrcb_to_call._gc_next == (uintptr_t)gc) {
1073
            /* object is still alive -- move it */
1074
0
            gc_set_old_space(gc, visited_space);
1075
0
            gc_list_move(gc, old);
1076
0
        }
1077
40.3k
        else {
1078
40.3k
            ++num_freed;
1079
40.3k
        }
1080
40.3k
    }
1081
1082
22.0k
    return num_freed;
1083
22.0k
}
1084
1085
/* Clear all weakrefs to unreachable objects.  When this returns, no object in
1086
 * `unreachable` is weakly referenced anymore.  See the comments above
1087
 * handle_weakref_callbacks() for why these weakrefs need to be cleared.
1088
 */
1089
static void
1090
clear_weakrefs(PyGC_Head *unreachable)
1091
22.0k
{
1092
22.0k
    PyGC_Head *gc;
1093
22.0k
    PyGC_Head *next;
1094
1095
8.94M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
1096
8.91M
        PyWeakReference **wrlist;
1097
1098
8.91M
        PyObject *op = FROM_GC(gc);
1099
8.91M
        next = GC_NEXT(gc);
1100
1101
8.91M
        if (PyWeakref_Check(op)) {
1102
            /* A weakref inside the unreachable set is always cleared. See
1103
             * the comments above handle_weakref_callbacks() for why these
1104
             * must be cleared.
1105
             */
1106
8
            _PyWeakref_ClearRef((PyWeakReference *)op);
1107
8
        }
1108
1109
8.91M
        if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) {
1110
6.16M
            continue;
1111
6.16M
        }
1112
1113
        /* It supports weakrefs.  Does it have any?
1114
         *
1115
         * This is never triggered for static types so we can avoid the
1116
         * (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR().
1117
         */
1118
2.74M
        wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op);
1119
1120
        /* `op` may have some weakrefs.  March over the list, clear
1121
         * all the weakrefs.
1122
         */
1123
2.97M
        for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) {
1124
            /* _PyWeakref_ClearRef clears the weakref but leaves
1125
             * the callback pointer intact.  Obscure:  it also
1126
             * changes *wrlist.
1127
             */
1128
227k
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
1129
227k
            _PyWeakref_ClearRef(wr);
1130
227k
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
1131
227k
        }
1132
2.74M
    }
1133
22.0k
}
1134
1135
static void
1136
debug_cycle(const char *msg, PyObject *op)
1137
0
{
1138
0
    PySys_FormatStderr("gc: %s <%s %p>\n",
1139
0
                       msg, Py_TYPE(op)->tp_name, op);
1140
0
}
1141
1142
/* Handle uncollectable garbage (cycles with tp_del slots, and stuff reachable
1143
 * only from such cycles).
1144
 * If _PyGC_DEBUG_SAVEALL, all objects in finalizers are appended to the module
1145
 * garbage list (a Python list), else only the objects in finalizers with
1146
 * __del__ methods are appended to garbage.  All objects in finalizers are
1147
 * merged into the old list regardless.
1148
 */
1149
static void
1150
handle_legacy_finalizers(PyThreadState *tstate,
1151
                         GCState *gcstate,
1152
                         PyGC_Head *finalizers, PyGC_Head *old)
1153
22.0k
{
1154
22.0k
    assert(!_PyErr_Occurred(tstate));
1155
22.0k
    assert(gcstate->garbage != NULL);
1156
1157
22.0k
    PyGC_Head *gc = GC_NEXT(finalizers);
1158
22.0k
    for (; gc != finalizers; gc = GC_NEXT(gc)) {
1159
0
        PyObject *op = FROM_GC(gc);
1160
1161
0
        if ((gcstate->debug & _PyGC_DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
1162
0
            if (PyList_Append(gcstate->garbage, op) < 0) {
1163
0
                _PyErr_Clear(tstate);
1164
0
                break;
1165
0
            }
1166
0
        }
1167
0
    }
1168
1169
22.0k
    gc_list_merge(finalizers, old);
1170
22.0k
}
1171
1172
/* Run first-time finalizers (if any) on all the objects in collectable.
1173
 * Note that this may remove some (or even all) of the objects from the
1174
 * list, due to refcounts falling to 0.
1175
 */
1176
static void
1177
finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable)
1178
22.0k
{
1179
22.0k
    destructor finalize;
1180
22.0k
    PyGC_Head seen;
1181
1182
    /* While we're going through the loop, `finalize(op)` may cause op, or
1183
     * other objects, to be reclaimed via refcounts falling to zero.  So
1184
     * there's little we can rely on about the structure of the input
1185
     * `collectable` list across iterations.  For safety, we always take the
1186
     * first object in that list and move it to a temporary `seen` list.
1187
     * If objects vanish from the `collectable` and `seen` lists we don't
1188
     * care.
1189
     */
1190
22.0k
    gc_list_init(&seen);
1191
1192
8.94M
    while (!gc_list_is_empty(collectable)) {
1193
8.91M
        PyGC_Head *gc = GC_NEXT(collectable);
1194
8.91M
        PyObject *op = FROM_GC(gc);
1195
8.91M
        gc_list_move(gc, &seen);
1196
8.91M
        if (!_PyGC_FINALIZED(op) &&
1197
8.91M
            (finalize = Py_TYPE(op)->tp_finalize) != NULL)
1198
2
        {
1199
2
            _PyGC_SET_FINALIZED(op);
1200
2
            Py_INCREF(op);
1201
2
            finalize(op);
1202
2
            assert(!_PyErr_Occurred(tstate));
1203
2
            Py_DECREF(op);
1204
2
        }
1205
8.91M
    }
1206
22.0k
    gc_list_merge(&seen, collectable);
1207
22.0k
}
1208
1209
/* Break reference cycles by clearing the containers involved.  This is
1210
 * tricky business as the lists can be changing and we don't know which
1211
 * objects may be freed.  It is possible I screwed something up here.
1212
 */
1213
static void
1214
delete_garbage(PyThreadState *tstate, GCState *gcstate,
1215
               PyGC_Head *collectable, PyGC_Head *old)
1216
22.0k
{
1217
22.0k
    assert(!_PyErr_Occurred(tstate));
1218
1219
4.14M
    while (!gc_list_is_empty(collectable)) {
1220
4.12M
        PyGC_Head *gc = GC_NEXT(collectable);
1221
4.12M
        PyObject *op = FROM_GC(gc);
1222
1223
4.12M
        _PyObject_ASSERT_WITH_MSG(op, Py_REFCNT(op) > 0,
1224
4.12M
                                  "refcount is too small");
1225
1226
4.12M
        if (gcstate->debug & _PyGC_DEBUG_SAVEALL) {
1227
0
            assert(gcstate->garbage != NULL);
1228
0
            if (PyList_Append(gcstate->garbage, op) < 0) {
1229
0
                _PyErr_Clear(tstate);
1230
0
            }
1231
0
        }
1232
4.12M
        else {
1233
4.12M
            inquiry clear;
1234
4.12M
            if ((clear = Py_TYPE(op)->tp_clear) != NULL) {
1235
3.75M
                Py_INCREF(op);
1236
3.75M
                (void) clear(op);
1237
3.75M
                if (_PyErr_Occurred(tstate)) {
1238
0
                    PyErr_FormatUnraisable("Exception ignored in tp_clear of %s",
1239
0
                                           Py_TYPE(op)->tp_name);
1240
0
                }
1241
3.75M
                Py_DECREF(op);
1242
3.75M
            }
1243
4.12M
        }
1244
4.12M
        if (GC_NEXT(collectable) == gc) {
1245
            /* object is still alive, move it, it may die later */
1246
3.44M
            gc_clear_collecting(gc);
1247
3.44M
            gc_list_move(gc, old);
1248
3.44M
        }
1249
4.12M
    }
1250
22.0k
}
1251
1252
1253
/* Deduce which objects among "base" are unreachable from outside the list
1254
   and move them to 'unreachable'. The process consist in the following steps:
1255
1256
1. Copy all reference counts to a different field (gc_prev is used to hold
1257
   this copy to save memory).
1258
2. Traverse all objects in "base" and visit all referred objects using
1259
   "tp_traverse" and for every visited object, subtract 1 to the reference
1260
   count (the one that we copied in the previous step). After this step, all
1261
   objects that can be reached directly from outside must have strictly positive
1262
   reference count, while all unreachable objects must have a count of exactly 0.
1263
3. Identify all unreachable objects (the ones with 0 reference count) and move
1264
   them to the "unreachable" list. This step also needs to move back to "base" all
1265
   objects that were initially marked as unreachable but are referred transitively
1266
   by the reachable objects (the ones with strictly positive reference count).
1267
1268
Contracts:
1269
1270
    * The "base" has to be a valid list with no mask set.
1271
1272
    * The "unreachable" list must be uninitialized (this function calls
1273
      gc_list_init over 'unreachable').
1274
1275
IMPORTANT: This function leaves 'unreachable' with the NEXT_MASK_UNREACHABLE
1276
flag set but it does not clear it to skip unnecessary iteration. Before the
1277
flag is cleared (for example, by using 'clear_unreachable_mask' function or
1278
by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal
1279
list and we can not use most gc_list_* functions for it. */
1280
static inline Py_ssize_t
1281
44.0k
deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable, struct gc_generation_stats *stats) {
1282
44.0k
    validate_list(base, collecting_clear_unreachable_clear);
1283
    /* Using ob_refcnt and gc_refs, calculate which objects in the
1284
     * container set are reachable from outside the set (i.e., have a
1285
     * refcount greater than 0 when all the references within the
1286
     * set are taken into account).
1287
     */
1288
44.0k
    Py_ssize_t candidates = update_refs(base);  // gc_prev is used for gc_refs
1289
44.0k
    subtract_refs(base, stats);
1290
1291
    /* Leave everything reachable from outside base in base, and move
1292
     * everything else (in base) to unreachable.
1293
     *
1294
     * NOTE:  This used to move the reachable objects into a reachable
1295
     * set instead.  But most things usually turn out to be reachable,
1296
     * so it's more efficient to move the unreachable things.  It "sounds slick"
1297
     * to move the unreachable objects, until you think about it - the reason it
1298
     * pays isn't actually obvious.
1299
     *
1300
     * Suppose we create objects A, B, C in that order.  They appear in the young
1301
     * generation in the same order.  If B points to A, and C to B, and C is
1302
     * reachable from outside, then the adjusted refcounts will be 0, 0, and 1
1303
     * respectively.
1304
     *
1305
     * When move_unreachable finds A, A is moved to the unreachable list.  The
1306
     * same for B when it's first encountered.  Then C is traversed, B is moved
1307
     * _back_ to the reachable list.  B is eventually traversed, and then A is
1308
     * moved back to the reachable list.
1309
     *
1310
     * So instead of not moving at all, the reachable objects B and A are moved
1311
     * twice each.  Why is this a win?  A straightforward algorithm to move the
1312
     * reachable objects instead would move A, B, and C once each.
1313
     *
1314
     * The key is that this dance leaves the objects in order C, B, A - it's
1315
     * reversed from the original order.  On all _subsequent_ scans, none of
1316
     * them will move.  Since most objects aren't in cycles, this can save an
1317
     * unbounded number of moves across an unbounded number of later collections.
1318
     * It can cost more only the first time the chain is scanned.
1319
     *
1320
     * Drawback:  move_unreachable is also used to find out what's still trash
1321
     * after finalizers may resurrect objects.  In _that_ case most unreachable
1322
     * objects will remain unreachable, so it would be more efficient to move
1323
     * the reachable objects instead.  But this is a one-time cost, probably not
1324
     * worth complicating the code to speed just a little.
1325
     */
1326
44.0k
    move_unreachable(base, unreachable, stats);  // gc_prev is pointer again
1327
44.0k
    validate_list(base, collecting_clear_unreachable_clear);
1328
44.0k
    validate_list(unreachable, collecting_set_unreachable_set);
1329
44.0k
    return candidates;
1330
44.0k
}
1331
1332
/* Handle objects that may have resurrected after a call to 'finalize_garbage', moving
1333
   them to 'old_generation' and placing the rest on 'still_unreachable'.
1334
1335
   Contracts:
1336
       * After this function 'unreachable' must not be used anymore and 'still_unreachable'
1337
         will contain the objects that did not resurrect.
1338
1339
       * The "still_unreachable" list must be uninitialized (this function calls
1340
         gc_list_init over 'still_unreachable').
1341
1342
IMPORTANT: After a call to this function, the 'still_unreachable' set will have the
1343
PREV_MARK_COLLECTING set, but the objects in this set are going to be removed so
1344
we can skip the expense of clearing the flag to avoid extra iteration. */
1345
static inline void
1346
handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable,
1347
                           PyGC_Head *old_generation,
1348
                           struct gc_generation_stats *stats)
1349
22.0k
{
1350
    // Remove the PREV_MASK_COLLECTING from unreachable
1351
    // to prepare it for a new call to 'deduce_unreachable'
1352
22.0k
    gc_list_clear_collecting(unreachable);
1353
1354
    // After the call to deduce_unreachable, the 'still_unreachable' set will
1355
    // have the PREV_MARK_COLLECTING set, but the objects are going to be
1356
    // removed so we can skip the expense of clearing the flag.
1357
22.0k
    PyGC_Head* resurrected = unreachable;
1358
22.0k
    deduce_unreachable(resurrected, still_unreachable, stats);
1359
22.0k
    clear_unreachable_mask(still_unreachable);
1360
1361
    // Move the resurrected objects to the old generation for future collection.
1362
22.0k
    gc_list_merge(resurrected, old_generation);
1363
22.0k
}
1364
1365
static void
1366
gc_collect_region(PyThreadState *tstate,
1367
                  PyGC_Head *from,
1368
                  PyGC_Head *to,
1369
                  struct gc_generation_stats *stats);
1370
1371
static inline Py_ssize_t
1372
gc_list_set_space(PyGC_Head *list, int space)
1373
23.7k
{
1374
23.7k
    Py_ssize_t size = 0;
1375
23.7k
    PyGC_Head *gc;
1376
76.3M
    for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
1377
76.3M
        gc_set_old_space(gc, space);
1378
76.3M
        size++;
1379
76.3M
    }
1380
23.7k
    return size;
1381
23.7k
}
1382
1383
/* Making progress in the incremental collector
1384
 * In order to eventually collect all cycles
1385
 * the incremental collector must progress through the old
1386
 * space faster than objects are added to the old space.
1387
 *
1388
 * Each young or incremental collection adds a number of
1389
 * objects, S (for survivors) to the old space, and
1390
 * incremental collectors scan I objects from the old space.
1391
 * I > S must be true. We also want I > S * N to be where
1392
 * N > 1. Higher values of N mean that the old space is
1393
 * scanned more rapidly.
1394
 * The default incremental threshold of 10 translates to
1395
 * N == 1.4 (1 + 4/threshold)
1396
 */
1397
1398
/* Divide by 10, so that the default incremental threshold of 10
1399
 * scans objects at 1% of the heap size */
1400
95.0k
#define SCAN_RATE_DIVISOR 10
1401
1402
static struct gc_generation_stats *
1403
gc_get_stats(GCState *gcstate, int gen)
1404
95.0k
{
1405
95.0k
    if (gen == 0) {
1406
0
        struct gc_young_stats_buffer *buffer = &gcstate->generation_stats->young;
1407
0
        buffer->index = (buffer->index + 1) % GC_YOUNG_STATS_SIZE;
1408
0
        struct gc_generation_stats *stats = &buffer->items[buffer->index];
1409
0
        return stats;
1410
0
    }
1411
95.0k
    else {
1412
95.0k
        struct gc_old_stats_buffer *buffer = &gcstate->generation_stats->old[gen - 1];
1413
95.0k
        buffer->index = (buffer->index + 1) % GC_OLD_STATS_SIZE;
1414
95.0k
        struct gc_generation_stats *stats = &buffer->items[buffer->index];
1415
95.0k
        return stats;
1416
95.0k
    }
1417
95.0k
}
1418
1419
static struct gc_generation_stats *
1420
gc_get_prev_stats(GCState *gcstate, int gen)
1421
95.0k
{
1422
95.0k
    if (gen == 0) {
1423
0
        struct gc_young_stats_buffer *buffer = &gcstate->generation_stats->young;
1424
0
        struct gc_generation_stats *stats = &buffer->items[buffer->index];
1425
0
        return stats;
1426
0
    }
1427
95.0k
    else {
1428
95.0k
        struct gc_old_stats_buffer *buffer = &gcstate->generation_stats->old[gen - 1];
1429
95.0k
        struct gc_generation_stats *stats = &buffer->items[buffer->index];
1430
95.0k
        return stats;
1431
95.0k
    }
1432
95.0k
}
1433
1434
static void
1435
add_stats(GCState *gcstate, int gen, struct gc_generation_stats *stats)
1436
95.0k
{
1437
95.0k
    struct gc_generation_stats *prev_stats = gc_get_prev_stats(gcstate, gen);
1438
95.0k
    struct gc_generation_stats *cur_stats = gc_get_stats(gcstate, gen);
1439
1440
95.0k
    memcpy(cur_stats, prev_stats, sizeof(struct gc_generation_stats));
1441
1442
95.0k
    cur_stats->ts_start = stats->ts_start;
1443
95.0k
    cur_stats->ts_stop = stats->ts_stop;
1444
95.0k
    cur_stats->heap_size = stats->heap_size;
1445
95.0k
    cur_stats->work_to_do = stats->work_to_do;
1446
1447
95.0k
    cur_stats->collections += 1;
1448
95.0k
    cur_stats->object_visits += stats->object_visits;
1449
95.0k
    cur_stats->collected += stats->collected;
1450
95.0k
    cur_stats->uncollectable += stats->uncollectable;
1451
95.0k
    cur_stats->candidates += stats->candidates;
1452
1453
95.0k
    cur_stats->objects_transitively_reachable += stats->objects_transitively_reachable;
1454
95.0k
    cur_stats->objects_not_transitively_reachable += stats->objects_not_transitively_reachable;
1455
1456
95.0k
    cur_stats->duration += stats->duration;
1457
95.0k
}
1458
1459
static void
1460
gc_collect_young(PyThreadState *tstate,
1461
                 struct gc_generation_stats *stats)
1462
0
{
1463
0
    GCState *gcstate = &tstate->interp->gc;
1464
0
    validate_spaces(gcstate);
1465
0
    PyGC_Head *young = &gcstate->young.head;
1466
0
    PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
1467
0
    untrack_tuples(young);
1468
1469
0
    PyGC_Head survivors;
1470
0
    gc_list_init(&survivors);
1471
0
    gc_list_set_space(young, gcstate->visited_space);
1472
0
    gc_collect_region(tstate, young, &survivors, stats);
1473
0
    gc_list_merge(&survivors, visited);
1474
0
    validate_spaces(gcstate);
1475
0
    gcstate->young.count = 0;
1476
0
    gcstate->old[gcstate->visited_space].count++;
1477
0
    validate_spaces(gcstate);
1478
0
}
1479
1480
#ifndef NDEBUG
1481
static inline int
1482
IS_IN_VISITED(PyGC_Head *gc, int visited_space)
1483
{
1484
    assert(visited_space == 0 || other_space(visited_space) == 0);
1485
    return gc_old_space(gc) == visited_space;
1486
}
1487
#endif
1488
1489
struct container_and_flag {
1490
    PyGC_Head *container;
1491
    int visited_space;
1492
    intptr_t size;
1493
    struct gc_generation_stats *stats;
1494
};
1495
1496
/* A traversal callback for adding to container) */
1497
static int
1498
visit_add_to_container(PyObject *op, void *arg)
1499
605M
{
1500
605M
    OBJECT_STAT_INC(object_visits);
1501
605M
    struct container_and_flag *cf = (struct container_and_flag *)arg;
1502
605M
    cf->stats->object_visits += 1;
1503
605M
    int visited = cf->visited_space;
1504
605M
    assert(visited == get_gc_state()->visited_space);
1505
605M
    if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
1506
342M
        PyGC_Head *gc = AS_GC(op);
1507
342M
        if (_PyObject_GC_IS_TRACKED(op) &&
1508
264M
            gc_old_space(gc) != visited) {
1509
142M
            gc_flip_old_space(gc);
1510
142M
            gc_list_move(gc, cf->container);
1511
142M
            cf->size++;
1512
142M
        }
1513
342M
    }
1514
605M
    return 0;
1515
605M
}
1516
1517
static intptr_t
1518
expand_region_transitively_reachable(PyGC_Head *container,
1519
                                     PyGC_Head *gc,
1520
                                     GCState *gcstate,
1521
                                     struct gc_generation_stats *stats)
1522
534k
{
1523
534k
    struct container_and_flag arg = {
1524
534k
        .container = container,
1525
534k
        .visited_space = gcstate->visited_space,
1526
534k
        .size = 0,
1527
534k
        .stats = stats
1528
534k
    };
1529
534k
    assert(GC_NEXT(gc) == container);
1530
2.95M
    while (gc != container) {
1531
        /* Survivors will be moved to visited space, so they should
1532
         * have been marked as visited */
1533
2.41M
        assert(IS_IN_VISITED(gc, gcstate->visited_space));
1534
2.41M
        PyObject *op = FROM_GC(gc);
1535
2.41M
        assert(_PyObject_GC_IS_TRACKED(op));
1536
2.41M
        if (_Py_IsImmortal(op)) {
1537
0
            PyGC_Head *next = GC_NEXT(gc);
1538
0
            gc_list_move(gc, &gcstate->permanent_generation.head);
1539
0
            gc = next;
1540
0
            continue;
1541
0
        }
1542
2.41M
        traverseproc traverse = Py_TYPE(op)->tp_traverse;
1543
2.41M
        (void) traverse(op,
1544
2.41M
                        visit_add_to_container,
1545
2.41M
                        &arg);
1546
2.41M
        gc = GC_NEXT(gc);
1547
2.41M
    }
1548
534k
    return arg.size;
1549
534k
}
1550
1551
/* Do bookkeeping for a completed GC cycle */
1552
static void
1553
completed_scavenge(GCState *gcstate)
1554
1.76k
{
1555
    /* We must observe two invariants:
1556
    * 1. Members of the permanent generation must be marked visited.
1557
    * 2. We cannot touch members of the permanent generation. */
1558
1.76k
    int visited;
1559
1.76k
    if (gc_list_is_empty(&gcstate->permanent_generation.head)) {
1560
        /* Permanent generation is empty so we can flip spaces bit */
1561
1.76k
        int not_visited = gcstate->visited_space;
1562
1.76k
        visited = other_space(not_visited);
1563
1.76k
        gcstate->visited_space = visited;
1564
        /* Make sure all objects have visited bit set correctly */
1565
1.76k
        gc_list_set_space(&gcstate->young.head, not_visited);
1566
1.76k
    }
1567
0
    else {
1568
         /* We must move the objects from visited to pending space. */
1569
0
        visited = gcstate->visited_space;
1570
0
        int not_visited = other_space(visited);
1571
0
        assert(gc_list_is_empty(&gcstate->old[not_visited].head));
1572
0
        gc_list_merge(&gcstate->old[visited].head, &gcstate->old[not_visited].head);
1573
0
        gc_list_set_space(&gcstate->old[not_visited].head, not_visited);
1574
0
    }
1575
1.76k
    assert(gc_list_is_empty(&gcstate->old[visited].head));
1576
1.76k
    gcstate->work_to_do = 0;
1577
1.76k
    gcstate->phase = GC_PHASE_MARK;
1578
1.76k
}
1579
1580
static intptr_t
1581
move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space)
1582
4.04M
{
1583
4.04M
    if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
1584
2.08M
        PyGC_Head *gc = AS_GC(op);
1585
2.08M
        if (_PyObject_GC_IS_TRACKED(op) &&
1586
2.08M
            gc_old_space(gc) != visited_space) {
1587
821k
            gc_flip_old_space(gc);
1588
821k
            gc_list_move(gc, reachable);
1589
821k
            return 1;
1590
821k
        }
1591
2.08M
    }
1592
3.22M
    return 0;
1593
4.04M
}
1594
1595
static intptr_t
1596
mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space, struct gc_generation_stats *stats)
1597
25.6k
{
1598
    // Transitively traverse all objects from reachable, until empty
1599
25.6k
    struct container_and_flag arg = {
1600
25.6k
        .container = reachable,
1601
25.6k
        .visited_space = visited_space,
1602
25.6k
        .size = 0,
1603
25.6k
        .stats = stats
1604
25.6k
    };
1605
144M
    while (!gc_list_is_empty(reachable)) {
1606
144M
        PyGC_Head *gc = _PyGCHead_NEXT(reachable);
1607
144M
        assert(gc_old_space(gc) == visited_space);
1608
144M
        gc_list_move(gc, visited);
1609
144M
        PyObject *op = FROM_GC(gc);
1610
144M
        traverseproc traverse = Py_TYPE(op)->tp_traverse;
1611
144M
        (void) traverse(op,
1612
144M
                        visit_add_to_container,
1613
144M
                        &arg);
1614
144M
    }
1615
25.6k
    gc_list_validate_space(visited, visited_space);
1616
25.6k
    return arg.size;
1617
25.6k
}
1618
1619
static intptr_t
1620
mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start, struct gc_generation_stats *stats)
1621
23.8k
{
1622
23.8k
    PyGC_Head reachable;
1623
23.8k
    gc_list_init(&reachable);
1624
23.8k
    Py_ssize_t objects_marked = 0;
1625
    // Move all objects on stacks to reachable
1626
23.8k
    _PyRuntimeState *runtime = &_PyRuntime;
1627
23.8k
    HEAD_LOCK(runtime);
1628
23.8k
    PyThreadState* ts = PyInterpreterState_ThreadHead(interp);
1629
23.8k
    HEAD_UNLOCK(runtime);
1630
47.6k
    while (ts) {
1631
23.8k
        _PyInterpreterFrame *frame = ts->current_frame;
1632
2.23M
        while (frame) {
1633
2.23M
            if (frame->owner >= FRAME_OWNED_BY_INTERPRETER) {
1634
591k
                frame = frame->previous;
1635
591k
                continue;
1636
591k
            }
1637
1.64M
            _PyStackRef *locals = frame->localsplus;
1638
1.64M
            _PyStackRef *sp = frame->stackpointer;
1639
1.64M
            objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space);
1640
1.64M
            PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj);
1641
1.64M
            objects_marked += move_to_reachable(func, &reachable, visited_space);
1642
13.0M
            while (sp > locals) {
1643
11.4M
                sp--;
1644
11.4M
                if (PyStackRef_IsNullOrInt(*sp)) {
1645
4.03M
                    continue;
1646
4.03M
                }
1647
7.39M
                PyObject *op = PyStackRef_AsPyObjectBorrow(*sp);
1648
7.39M
                if (_Py_IsImmortal(op)) {
1649
1.10M
                    continue;
1650
1.10M
                }
1651
6.28M
                if (_PyObject_IS_GC(op)) {
1652
5.67M
                    PyGC_Head *gc = AS_GC(op);
1653
5.67M
                    if (_PyObject_GC_IS_TRACKED(op) &&
1654
5.66M
                        gc_old_space(gc) != visited_space) {
1655
2.92M
                        gc_flip_old_space(gc);
1656
2.92M
                        objects_marked++;
1657
2.92M
                        gc_list_move(gc, &reachable);
1658
2.92M
                    }
1659
5.67M
                }
1660
6.28M
            }
1661
1.64M
            if (!start && frame->visited) {
1662
                // If this frame has already been visited, then the lower frames
1663
                // will have already been visited and will not have changed
1664
17.0k
                break;
1665
17.0k
            }
1666
1.62M
            frame->visited = 1;
1667
1.62M
            frame = frame->previous;
1668
1.62M
        }
1669
23.8k
        HEAD_LOCK(runtime);
1670
23.8k
        ts = PyThreadState_Next(ts);
1671
23.8k
        HEAD_UNLOCK(runtime);
1672
23.8k
    }
1673
23.8k
    objects_marked += mark_all_reachable(&reachable, visited, visited_space, stats);
1674
23.8k
    assert(gc_list_is_empty(&reachable));
1675
23.8k
    return objects_marked;
1676
23.8k
}
1677
1678
static intptr_t
1679
mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, struct gc_generation_stats *stats)
1680
1.79k
{
1681
1.79k
    PyGC_Head reachable;
1682
1.79k
    gc_list_init(&reachable);
1683
1.79k
    Py_ssize_t objects_marked = 0;
1684
1.79k
    objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space);
1685
1.79k
    objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space);
1686
1.79k
    objects_marked += move_to_reachable(interp->dict, &reachable, visited_space);
1687
1.79k
    struct types_state *types = &interp->types;
1688
364k
    for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) {
1689
362k
        objects_marked += move_to_reachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space);
1690
362k
        objects_marked += move_to_reachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space);
1691
362k
    }
1692
19.7k
    for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) {
1693
17.9k
        objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space);
1694
17.9k
        objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space);
1695
17.9k
    }
1696
1.79k
    objects_marked += mark_all_reachable(&reachable, visited, visited_space, stats);
1697
1.79k
    assert(gc_list_is_empty(&reachable));
1698
1.79k
    return objects_marked;
1699
1.79k
}
1700
1701
static intptr_t
1702
mark_at_start(PyThreadState *tstate, struct gc_generation_stats *stats)
1703
1.79k
{
1704
    // TO DO -- Make this incremental
1705
1.79k
    GCState *gcstate = &tstate->interp->gc;
1706
1.79k
    PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
1707
1.79k
    Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space, stats);
1708
1.79k
    objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true, stats);
1709
1.79k
    gcstate->work_to_do -= objects_marked;
1710
1.79k
    gcstate->phase = GC_PHASE_COLLECT;
1711
1.79k
    validate_spaces(gcstate);
1712
1.79k
    return objects_marked;
1713
1.79k
}
1714
1715
static intptr_t
1716
assess_work_to_do(GCState *gcstate)
1717
95.0k
{
1718
    /* The amount of work we want to do depends on three things.
1719
     * 1. The number of new objects created
1720
     * 2. The growth in heap size since the last collection
1721
     * 3. The heap size (up to the number of new objects, to avoid quadratic effects)
1722
     *
1723
     * For a steady state heap, the amount of work to do is three times the number
1724
     * of new objects added to the heap. This ensures that we stay ahead in the
1725
     * worst case of all new objects being garbage.
1726
     *
1727
     * This could be improved by tracking survival rates, but it is still a
1728
     * large improvement on the non-marking approach.
1729
     */
1730
95.0k
    intptr_t scale_factor = gcstate->old[0].threshold;
1731
95.0k
    if (scale_factor < 2) {
1732
0
        scale_factor = 2;
1733
0
    }
1734
95.0k
    intptr_t new_objects = gcstate->young.count;
1735
95.0k
    intptr_t max_heap_fraction = new_objects*2;
1736
95.0k
    intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
1737
95.0k
    if (heap_fraction > max_heap_fraction) {
1738
8.57k
        heap_fraction = max_heap_fraction;
1739
8.57k
    }
1740
95.0k
    gcstate->young.count = 0;
1741
95.0k
    return new_objects + heap_fraction;
1742
95.0k
}
1743
1744
static void
1745
gc_collect_increment(PyThreadState *tstate, struct gc_generation_stats *stats)
1746
95.0k
{
1747
95.0k
    GCState *gcstate = &tstate->interp->gc;
1748
95.0k
    gcstate->work_to_do += assess_work_to_do(gcstate);
1749
95.0k
    if (gcstate->work_to_do < 0) {
1750
71.1k
        return;
1751
71.1k
    }
1752
23.8k
    untrack_tuples(&gcstate->young.head);
1753
23.8k
    if (gcstate->phase == GC_PHASE_MARK) {
1754
1.79k
        Py_ssize_t objects_marked = mark_at_start(tstate, stats);
1755
1.79k
        stats->objects_transitively_reachable += objects_marked;
1756
1.79k
        stats->candidates += objects_marked;
1757
1.79k
        gcstate->work_to_do -= objects_marked;
1758
1.79k
        validate_spaces(gcstate);
1759
1.79k
        return;
1760
1.79k
    }
1761
22.0k
    PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head;
1762
22.0k
    PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
1763
22.0k
    PyGC_Head increment;
1764
22.0k
    gc_list_init(&increment);
1765
22.0k
    int scale_factor = gcstate->old[0].threshold;
1766
22.0k
    if (scale_factor < 2) {
1767
0
        scale_factor = 2;
1768
0
    }
1769
22.0k
    intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false, stats);
1770
22.0k
    stats->objects_transitively_reachable += objects_marked;
1771
22.0k
    gcstate->work_to_do -= objects_marked;
1772
22.0k
    gc_list_set_space(&gcstate->young.head, gcstate->visited_space);
1773
22.0k
    gc_list_merge(&gcstate->young.head, &increment);
1774
22.0k
    gc_list_validate_space(&increment, gcstate->visited_space);
1775
22.0k
    Py_ssize_t increment_size = gc_list_size(&increment);
1776
556k
    while (increment_size < gcstate->work_to_do) {
1777
536k
        if (gc_list_is_empty(not_visited)) {
1778
1.73k
            break;
1779
1.73k
        }
1780
534k
        PyGC_Head *gc = _PyGCHead_NEXT(not_visited);
1781
534k
        gc_list_move(gc, &increment);
1782
534k
        increment_size++;
1783
534k
        assert(!_Py_IsImmortal(FROM_GC(gc)));
1784
534k
        gc_set_old_space(gc, gcstate->visited_space);
1785
534k
        increment_size += expand_region_transitively_reachable(&increment, gc, gcstate, stats);
1786
534k
    }
1787
22.0k
    stats->objects_not_transitively_reachable += increment_size;
1788
22.0k
    validate_list(&increment, collecting_clear_unreachable_clear);
1789
22.0k
    gc_list_validate_space(&increment, gcstate->visited_space);
1790
22.0k
    PyGC_Head survivors;
1791
22.0k
    gc_list_init(&survivors);
1792
22.0k
    gc_collect_region(tstate, &increment, &survivors, stats);
1793
22.0k
    gc_list_merge(&survivors, visited);
1794
22.0k
    assert(gc_list_is_empty(&increment));
1795
22.0k
    gcstate->work_to_do -= increment_size;
1796
1797
22.0k
    if (gc_list_is_empty(not_visited)) {
1798
1.76k
        completed_scavenge(gcstate);
1799
1.76k
    }
1800
22.0k
    validate_spaces(gcstate);
1801
22.0k
}
1802
1803
static void
1804
gc_collect_full(PyThreadState *tstate,
1805
                struct gc_generation_stats *stats)
1806
0
{
1807
0
    GCState *gcstate = &tstate->interp->gc;
1808
0
    validate_spaces(gcstate);
1809
0
    PyGC_Head *young = &gcstate->young.head;
1810
0
    PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head;
1811
0
    PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
1812
0
    untrack_tuples(young);
1813
    /* merge all generations into visited */
1814
0
    gc_list_merge(young, pending);
1815
0
    gc_list_validate_space(pending, 1-gcstate->visited_space);
1816
0
    gc_list_set_space(pending, gcstate->visited_space);
1817
0
    gcstate->young.count = 0;
1818
0
    gc_list_merge(pending, visited);
1819
0
    validate_spaces(gcstate);
1820
1821
0
    gc_collect_region(tstate, visited, visited,
1822
0
                      stats);
1823
0
    validate_spaces(gcstate);
1824
0
    gcstate->young.count = 0;
1825
0
    gcstate->old[0].count = 0;
1826
0
    gcstate->old[1].count = 0;
1827
0
    completed_scavenge(gcstate);
1828
0
    _PyGC_ClearAllFreeLists(tstate->interp);
1829
0
    validate_spaces(gcstate);
1830
0
}
1831
1832
/* This is the main function. Read this to understand how the
1833
 * collection process works. */
1834
static void
1835
gc_collect_region(PyThreadState *tstate,
1836
                  PyGC_Head *from,
1837
                  PyGC_Head *to,
1838
                  struct gc_generation_stats *stats)
1839
22.0k
{
1840
22.0k
    PyGC_Head unreachable; /* non-problematic unreachable trash */
1841
22.0k
    PyGC_Head finalizers;  /* objects with, & reachable from, __del__ */
1842
22.0k
    PyGC_Head *gc; /* initialize to prevent a compiler warning */
1843
22.0k
    GCState *gcstate = &tstate->interp->gc;
1844
1845
22.0k
    assert(gcstate->garbage != NULL);
1846
22.0k
    assert(!_PyErr_Occurred(tstate));
1847
1848
22.0k
    gc_list_init(&unreachable);
1849
22.0k
    stats->candidates = deduce_unreachable(from, &unreachable, stats);
1850
22.0k
    validate_consistent_old_space(from);
1851
22.0k
    untrack_tuples(from);
1852
1853
  /* Move reachable objects to next generation. */
1854
22.0k
    validate_consistent_old_space(to);
1855
22.0k
    if (from != to) {
1856
22.0k
        gc_list_merge(from, to);
1857
22.0k
    }
1858
22.0k
    validate_consistent_old_space(to);
1859
1860
    /* All objects in unreachable are trash, but objects reachable from
1861
     * legacy finalizers (e.g. tp_del) can't safely be deleted.
1862
     */
1863
22.0k
    gc_list_init(&finalizers);
1864
    // NEXT_MASK_UNREACHABLE is cleared here.
1865
    // After move_legacy_finalizers(), unreachable is normal list.
1866
22.0k
    move_legacy_finalizers(&unreachable, &finalizers);
1867
    /* finalizers contains the unreachable objects with a legacy finalizer;
1868
     * unreachable objects reachable *from* those are also uncollectable,
1869
     * and we move those into the finalizers list too.
1870
     */
1871
22.0k
    move_legacy_finalizer_reachable(&finalizers, stats);
1872
22.0k
    validate_list(&finalizers, collecting_clear_unreachable_clear);
1873
22.0k
    validate_list(&unreachable, collecting_set_unreachable_clear);
1874
    /* Print debugging information. */
1875
22.0k
    if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE) {
1876
0
        for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) {
1877
0
            debug_cycle("collectable", FROM_GC(gc));
1878
0
        }
1879
0
    }
1880
1881
    /* Invoke weakref callbacks as necessary. */
1882
22.0k
    stats->collected += handle_weakref_callbacks(&unreachable, to);
1883
22.0k
    gc_list_validate_space(to, gcstate->visited_space);
1884
22.0k
    validate_list(to, collecting_clear_unreachable_clear);
1885
22.0k
    validate_list(&unreachable, collecting_set_unreachable_clear);
1886
1887
    /* Call tp_finalize on objects which have one. */
1888
22.0k
    finalize_garbage(tstate, &unreachable);
1889
    /* Handle any objects that may have resurrected after the call
1890
     * to 'finalize_garbage' and continue the collection with the
1891
     * objects that are still unreachable */
1892
22.0k
    PyGC_Head final_unreachable;
1893
22.0k
    gc_list_init(&final_unreachable);
1894
22.0k
    handle_resurrected_objects(&unreachable, &final_unreachable, to, stats);
1895
1896
    /* Clear weakrefs to objects in the unreachable set.  See the comments
1897
     * above handle_weakref_callbacks() for details.
1898
     */
1899
22.0k
    clear_weakrefs(&final_unreachable);
1900
1901
    /* Call tp_clear on objects in the final_unreachable set.  This will cause
1902
    * the reference cycles to be broken.  It may also cause some objects
1903
    * in finalizers to be freed.
1904
    */
1905
22.0k
    stats->collected += gc_list_size(&final_unreachable);
1906
22.0k
    delete_garbage(tstate, gcstate, &final_unreachable, to);
1907
1908
    /* Collect statistics on uncollectable objects found and print
1909
     * debugging information. */
1910
22.0k
    Py_ssize_t n = 0;
1911
22.0k
    for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) {
1912
0
        n++;
1913
0
        if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE)
1914
0
            debug_cycle("uncollectable", FROM_GC(gc));
1915
0
    }
1916
22.0k
    stats->uncollectable = n;
1917
    /* Append instances in the uncollectable set to a Python
1918
     * reachable list of garbage.  The programmer has to deal with
1919
     * this if they insist on creating this type of structure.
1920
     */
1921
22.0k
    handle_legacy_finalizers(tstate, gcstate, &finalizers, to);
1922
22.0k
    gc_list_validate_space(to, gcstate->visited_space);
1923
22.0k
    validate_list(to, collecting_clear_unreachable_clear);
1924
22.0k
}
1925
1926
/* Invoke progress callbacks to notify clients that garbage collection
1927
 * is starting or stopping
1928
 */
1929
static void
1930
do_gc_callback(GCState *gcstate, const char *phase,
1931
                   int generation, struct gc_generation_stats *stats)
1932
190k
{
1933
190k
    assert(!PyErr_Occurred());
1934
1935
    /* The local variable cannot be rebound, check it for sanity */
1936
190k
    assert(PyList_CheckExact(gcstate->callbacks));
1937
190k
    PyObject *info = NULL;
1938
190k
    if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
1939
7.02k
        info = Py_BuildValue("{sisnsnsnsd}",
1940
7.02k
            "generation", generation,
1941
7.02k
            "collected", stats->collected,
1942
7.02k
            "uncollectable", stats->uncollectable,
1943
7.02k
            "candidates", stats->candidates,
1944
7.02k
            "duration", stats->duration);
1945
7.02k
        if (info == NULL) {
1946
0
            PyErr_FormatUnraisable("Exception ignored while invoking gc callbacks");
1947
0
            return;
1948
0
        }
1949
7.02k
    }
1950
1951
190k
    PyObject *phase_obj = PyUnicode_FromString(phase);
1952
190k
    if (phase_obj == NULL) {
1953
0
        Py_XDECREF(info);
1954
0
        PyErr_FormatUnraisable("Exception ignored while invoking gc callbacks");
1955
0
        return;
1956
0
    }
1957
1958
190k
    PyObject *stack[] = {phase_obj, info};
1959
197k
    for (Py_ssize_t i=0; i<PyList_GET_SIZE(gcstate->callbacks); i++) {
1960
7.02k
        PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i);
1961
7.02k
        Py_INCREF(cb); /* make sure cb doesn't go away */
1962
7.02k
        r = PyObject_Vectorcall(cb, stack, 2, NULL);
1963
7.02k
        if (r == NULL) {
1964
0
            PyErr_FormatUnraisable("Exception ignored while "
1965
0
                                   "calling GC callback %R", cb);
1966
0
        }
1967
7.02k
        else {
1968
7.02k
            Py_DECREF(r);
1969
7.02k
        }
1970
7.02k
        Py_DECREF(cb);
1971
7.02k
    }
1972
190k
    Py_DECREF(phase_obj);
1973
190k
    Py_XDECREF(info);
1974
190k
    assert(!PyErr_Occurred());
1975
190k
}
1976
1977
static void
1978
invoke_gc_callback(GCState *gcstate, const char *phase,
1979
                   int generation, struct gc_generation_stats *stats)
1980
190k
{
1981
190k
    if (gcstate->callbacks == NULL) {
1982
0
        return;
1983
0
    }
1984
190k
    do_gc_callback(gcstate, phase, generation, stats);
1985
190k
}
1986
1987
static int
1988
referrersvisit(PyObject* obj, void *arg)
1989
0
{
1990
0
    PyObject *objs = arg;
1991
0
    Py_ssize_t i;
1992
0
    for (i = 0; i < PyTuple_GET_SIZE(objs); i++) {
1993
0
        if (PyTuple_GET_ITEM(objs, i) == obj) {
1994
0
            return 1;
1995
0
        }
1996
0
    }
1997
0
    return 0;
1998
0
}
1999
2000
static int
2001
gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist)
2002
0
{
2003
0
    PyGC_Head *gc;
2004
0
    PyObject *obj;
2005
0
    traverseproc traverse;
2006
0
    for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
2007
0
        obj = FROM_GC(gc);
2008
0
        traverse = Py_TYPE(obj)->tp_traverse;
2009
0
        if (obj == objs || obj == resultlist) {
2010
0
            continue;
2011
0
        }
2012
0
        if (traverse(obj, referrersvisit, objs)) {
2013
0
            if (PyList_Append(resultlist, obj) < 0) {
2014
0
                return 0; /* error */
2015
0
            }
2016
0
        }
2017
0
    }
2018
0
    return 1; /* no error */
2019
0
}
2020
2021
PyObject *
2022
_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs)
2023
0
{
2024
0
    PyObject *result = PyList_New(0);
2025
0
    if (!result) {
2026
0
        return NULL;
2027
0
    }
2028
2029
0
    GCState *gcstate = &interp->gc;
2030
0
    for (int i = 0; i < NUM_GENERATIONS; i++) {
2031
0
        if (!(gc_referrers_for(objs, GEN_HEAD(gcstate, i), result))) {
2032
0
            Py_DECREF(result);
2033
0
            return NULL;
2034
0
        }
2035
0
    }
2036
0
    return result;
2037
0
}
2038
2039
PyObject *
2040
_PyGC_GetObjects(PyInterpreterState *interp, int generation)
2041
0
{
2042
0
    assert(generation >= -1 && generation < NUM_GENERATIONS);
2043
0
    GCState *gcstate = &interp->gc;
2044
2045
0
    PyObject *result = PyList_New(0);
2046
    /* Generation:
2047
     * -1: Return all objects
2048
     * 0: All young objects
2049
     * 1: No objects
2050
     * 2: All old objects
2051
     */
2052
0
    if (result == NULL || generation == 1) {
2053
0
        return result;
2054
0
    }
2055
0
    if (generation <= 0) {
2056
0
        if (append_objects(result, &gcstate->young.head)) {
2057
0
            goto error;
2058
0
        }
2059
0
    }
2060
0
    if (generation != 0) {
2061
0
        if (append_objects(result, &gcstate->old[0].head)) {
2062
0
            goto error;
2063
0
        }
2064
0
        if (append_objects(result, &gcstate->old[1].head)) {
2065
0
            goto error;
2066
0
        }
2067
0
    }
2068
2069
0
    return result;
2070
0
error:
2071
0
    Py_DECREF(result);
2072
0
    return NULL;
2073
0
}
2074
2075
void
2076
_PyGC_Freeze(PyInterpreterState *interp)
2077
0
{
2078
0
    GCState *gcstate = &interp->gc;
2079
    /* The permanent_generation must be visited */
2080
0
    gc_list_set_space(&gcstate->young.head, gcstate->visited_space);
2081
0
    gc_list_merge(&gcstate->young.head, &gcstate->permanent_generation.head);
2082
0
    gcstate->young.count = 0;
2083
0
    PyGC_Head*old0 = &gcstate->old[0].head;
2084
0
    PyGC_Head*old1 = &gcstate->old[1].head;
2085
0
    if (gcstate->visited_space) {
2086
0
        gc_list_set_space(old0, 1);
2087
0
    }
2088
0
    else {
2089
0
        gc_list_set_space(old1, 0);
2090
0
    }
2091
0
    gc_list_merge(old0, &gcstate->permanent_generation.head);
2092
0
    gcstate->old[0].count = 0;
2093
0
    gc_list_merge(old1, &gcstate->permanent_generation.head);
2094
0
    gcstate->old[1].count = 0;
2095
0
    validate_spaces(gcstate);
2096
0
}
2097
2098
void
2099
_PyGC_Unfreeze(PyInterpreterState *interp)
2100
0
{
2101
0
    GCState *gcstate = &interp->gc;
2102
0
    gc_list_merge(&gcstate->permanent_generation.head,
2103
0
                  &gcstate->old[gcstate->visited_space].head);
2104
0
    validate_spaces(gcstate);
2105
0
}
2106
2107
Py_ssize_t
2108
_PyGC_GetFreezeCount(PyInterpreterState *interp)
2109
0
{
2110
0
    GCState *gcstate = &interp->gc;
2111
0
    return gc_list_size(&gcstate->permanent_generation.head);
2112
0
}
2113
2114
/* C API for controlling the state of the garbage collector */
2115
int
2116
PyGC_Enable(void)
2117
0
{
2118
0
    GCState *gcstate = get_gc_state();
2119
0
    int old_state = gcstate->enabled;
2120
0
    gcstate->enabled = 1;
2121
0
    return old_state;
2122
0
}
2123
2124
int
2125
PyGC_Disable(void)
2126
0
{
2127
0
    GCState *gcstate = get_gc_state();
2128
0
    int old_state = gcstate->enabled;
2129
0
    gcstate->enabled = 0;
2130
0
    return old_state;
2131
0
}
2132
2133
int
2134
PyGC_IsEnabled(void)
2135
0
{
2136
0
    GCState *gcstate = get_gc_state();
2137
0
    return gcstate->enabled;
2138
0
}
2139
2140
// Show stats for objects in each generations
2141
static void
2142
show_stats_each_generations(GCState *gcstate)
2143
0
{
2144
0
    char buf[100];
2145
0
    size_t pos = 0;
2146
2147
0
    for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) {
2148
0
        pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos,
2149
0
                             " %zd",
2150
0
                             gc_list_size(GEN_HEAD(gcstate, i)));
2151
0
    }
2152
0
    PySys_FormatStderr(
2153
0
        "gc: objects in each generation:%s\n"
2154
0
        "gc: objects in permanent generation: %zd\n",
2155
0
        buf, gc_list_size(&gcstate->permanent_generation.head));
2156
0
}
2157
2158
Py_ssize_t
2159
_PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
2160
95.0k
{
2161
95.0k
    GCState *gcstate = &tstate->interp->gc;
2162
95.0k
    assert(tstate->current_frame == NULL || tstate->current_frame->stackpointer != NULL);
2163
2164
95.0k
    int expected = 0;
2165
95.0k
    if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) {
2166
        // Don't start a garbage collection if one is already in progress.
2167
0
        return 0;
2168
0
    }
2169
95.0k
    gcstate->frame = tstate->current_frame;
2170
2171
95.0k
    struct gc_generation_stats stats = { 0 };
2172
95.0k
    if (reason != _Py_GC_REASON_SHUTDOWN) {
2173
95.0k
        invoke_gc_callback(gcstate, "start", generation, &stats);
2174
95.0k
    }
2175
95.0k
    if (gcstate->debug & _PyGC_DEBUG_STATS) {
2176
0
        PySys_WriteStderr("gc: collecting generation %d...\n", generation);
2177
0
        show_stats_each_generations(gcstate);
2178
0
    }
2179
95.0k
    if (PyDTrace_GC_START_ENABLED()) {
2180
0
        PyDTrace_GC_START(generation);
2181
0
    }
2182
95.0k
    stats.heap_size = gcstate->heap_size;
2183
95.0k
    stats.work_to_do = gcstate->work_to_do;
2184
95.0k
    (void)PyTime_PerfCounterRaw(&stats.ts_start);
2185
95.0k
    PyObject *exc = _PyErr_GetRaisedException(tstate);
2186
95.0k
    switch(generation) {
2187
0
        case 0:
2188
0
            gc_collect_young(tstate, &stats);
2189
0
            break;
2190
95.0k
        case 1:
2191
95.0k
            gc_collect_increment(tstate, &stats);
2192
95.0k
            break;
2193
0
        case 2:
2194
0
            gc_collect_full(tstate, &stats);
2195
0
            break;
2196
0
        default:
2197
0
            Py_UNREACHABLE();
2198
95.0k
    }
2199
95.0k
    (void)PyTime_PerfCounterRaw(&stats.ts_stop);
2200
95.0k
    stats.duration = PyTime_AsSecondsDouble(stats.ts_stop - stats.ts_start);
2201
95.0k
    add_stats(gcstate, generation, &stats);
2202
95.0k
    if (PyDTrace_GC_DONE_ENABLED()) {
2203
0
        PyDTrace_GC_DONE(stats.uncollectable + stats.collected);
2204
0
    }
2205
95.0k
    if (reason != _Py_GC_REASON_SHUTDOWN) {
2206
95.0k
        invoke_gc_callback(gcstate, "stop", generation, &stats);
2207
95.0k
    }
2208
95.0k
    _PyErr_SetRaisedException(tstate, exc);
2209
95.0k
    GC_STAT_ADD(generation, objects_collected, stats.collected);
2210
#ifdef Py_STATS
2211
    PyStats *s = _PyStats_GET();
2212
    if (s) {
2213
        GC_STAT_ADD(generation, object_visits,
2214
            s->object_stats.object_visits);
2215
        s->object_stats.object_visits = 0;
2216
    }
2217
#endif
2218
95.0k
    validate_spaces(gcstate);
2219
95.0k
    gcstate->frame = NULL;
2220
95.0k
    _Py_atomic_store_int(&gcstate->collecting, 0);
2221
2222
95.0k
    if (gcstate->debug & _PyGC_DEBUG_STATS) {
2223
0
        PySys_WriteStderr(
2224
0
            "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n",
2225
0
            stats.collected + stats.uncollectable, stats.uncollectable, stats.duration
2226
0
        );
2227
0
    }
2228
2229
95.0k
    return stats.uncollectable + stats.collected;
2230
95.0k
}
2231
2232
/* Public API to invoke gc.collect() from C */
2233
Py_ssize_t
2234
PyGC_Collect(void)
2235
0
{
2236
0
    return _PyGC_Collect(_PyThreadState_GET(), 2, _Py_GC_REASON_MANUAL);
2237
0
}
2238
2239
void
2240
_PyGC_CollectNoFail(PyThreadState *tstate)
2241
0
{
2242
    /* Ideally, this function is only called on interpreter shutdown,
2243
       and therefore not recursively.  Unfortunately, when there are daemon
2244
       threads, a daemon thread can start a cyclic garbage collection
2245
       during interpreter shutdown (and then never finish it).
2246
       See http://bugs.python.org/issue8713#msg195178 for an example.
2247
       */
2248
0
    _PyGC_Collect(_PyThreadState_GET(), 2, _Py_GC_REASON_SHUTDOWN);
2249
0
}
2250
2251
void
2252
_PyGC_DumpShutdownStats(PyInterpreterState *interp)
2253
0
{
2254
0
    GCState *gcstate = &interp->gc;
2255
0
    if (!(gcstate->debug & _PyGC_DEBUG_SAVEALL)
2256
0
        && gcstate->garbage != NULL && PyList_GET_SIZE(gcstate->garbage) > 0) {
2257
0
        const char *message;
2258
0
        if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) {
2259
0
            message = "gc: %zd uncollectable objects at shutdown";
2260
0
        }
2261
0
        else {
2262
0
            message = "gc: %zd uncollectable objects at shutdown; " \
2263
0
                "use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them";
2264
0
        }
2265
        /* PyErr_WarnFormat does too many things and we are at shutdown,
2266
           the warnings module's dependencies (e.g. linecache) may be gone
2267
           already. */
2268
0
        if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
2269
0
                                     "gc", NULL, message,
2270
0
                                     PyList_GET_SIZE(gcstate->garbage)))
2271
0
        {
2272
0
            PyErr_FormatUnraisable("Exception ignored in GC shutdown");
2273
0
        }
2274
0
        if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) {
2275
0
            PyObject *repr = NULL, *bytes = NULL;
2276
0
            repr = PyObject_Repr(gcstate->garbage);
2277
0
            if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr))) {
2278
0
                PyErr_FormatUnraisable("Exception ignored in GC shutdown "
2279
0
                                       "while formatting garbage");
2280
0
            }
2281
0
            else {
2282
0
                PySys_WriteStderr(
2283
0
                    "      %s\n",
2284
0
                    PyBytes_AS_STRING(bytes)
2285
0
                    );
2286
0
            }
2287
0
            Py_XDECREF(repr);
2288
0
            Py_XDECREF(bytes);
2289
0
        }
2290
0
    }
2291
0
}
2292
2293
static void
2294
0
finalize_unlink_gc_head(PyGC_Head *gc) {
2295
0
    PyGC_Head *prev = GC_PREV(gc);
2296
0
    PyGC_Head *next = GC_NEXT(gc);
2297
0
    _PyGCHead_SET_NEXT(prev, next);
2298
0
    _PyGCHead_SET_PREV(next, prev);
2299
0
}
2300
2301
void
2302
_PyGC_Fini(PyInterpreterState *interp)
2303
0
{
2304
0
    GCState *gcstate = &interp->gc;
2305
0
    Py_CLEAR(gcstate->garbage);
2306
0
    Py_CLEAR(gcstate->callbacks);
2307
0
    PyMem_RawFree(gcstate->generation_stats);
2308
0
    gcstate->generation_stats = NULL;
2309
2310
    /* Prevent a subtle bug that affects sub-interpreters that use basic
2311
     * single-phase init extensions (m_size == -1).  Those extensions cause objects
2312
     * to be shared between interpreters, via the PyDict_Update(mdict, m_copy) call
2313
     * in import_find_extension().
2314
     *
2315
     * If they are GC objects, their GC head next or prev links could refer to
2316
     * the interpreter _gc_runtime_state PyGC_Head nodes.  Those nodes go away
2317
     * when the interpreter structure is freed and so pointers to them become
2318
     * invalid.  If those objects are still used by another interpreter and
2319
     * UNTRACK is called on them, a crash will happen.  We untrack the nodes
2320
     * here to avoid that.
2321
     *
2322
     * This bug was originally fixed when reported as gh-90228.  The bug was
2323
     * re-introduced in gh-94673.
2324
     */
2325
0
    finalize_unlink_gc_head(&gcstate->young.head);
2326
0
    finalize_unlink_gc_head(&gcstate->old[0].head);
2327
0
    finalize_unlink_gc_head(&gcstate->old[1].head);
2328
0
    finalize_unlink_gc_head(&gcstate->permanent_generation.head);
2329
0
}
2330
2331
/* for debugging */
2332
void
2333
_PyGC_Dump(PyGC_Head *g)
2334
0
{
2335
0
    PyObject_Dump(FROM_GC(g));
2336
0
}
2337
2338
2339
#ifdef Py_DEBUG
2340
static int
2341
visit_validate(PyObject *op, void *parent_raw)
2342
{
2343
    PyObject *parent = _PyObject_CAST(parent_raw);
2344
    if (_PyObject_IsFreed(op)) {
2345
        _PyObject_ASSERT_FAILED_MSG(parent,
2346
                                    "PyObject_GC_Track() object is not valid");
2347
    }
2348
    return 0;
2349
}
2350
#endif
2351
2352
2353
/* extension modules might be compiled with GC support so these
2354
   functions must always be available */
2355
2356
void
2357
PyObject_GC_Track(void *op_raw)
2358
185M
{
2359
185M
    PyObject *op = _PyObject_CAST(op_raw);
2360
185M
    if (_PyObject_GC_IS_TRACKED(op)) {
2361
0
        _PyObject_ASSERT_FAILED_MSG(op,
2362
0
                                    "object already tracked "
2363
0
                                    "by the garbage collector");
2364
0
    }
2365
185M
    _PyObject_GC_TRACK(op);
2366
2367
#ifdef Py_DEBUG
2368
    /* Check that the object is valid: validate objects traversed
2369
       by tp_traverse() */
2370
    traverseproc traverse = Py_TYPE(op)->tp_traverse;
2371
    (void)traverse(op, visit_validate, op);
2372
#endif
2373
185M
}
2374
2375
void
2376
PyObject_GC_UnTrack(void *op_raw)
2377
1.33G
{
2378
1.33G
    PyObject *op = _PyObject_CAST(op_raw);
2379
    /* The code for some objects, such as tuples, is a bit
2380
     * sloppy about when the object is tracked and untracked. */
2381
1.33G
    if (_PyObject_GC_IS_TRACKED(op)) {
2382
1.09G
        _PyObject_GC_UNTRACK(op);
2383
1.09G
    }
2384
1.33G
}
2385
2386
int
2387
PyObject_IS_GC(PyObject *obj)
2388
187M
{
2389
187M
    return _PyObject_IS_GC(obj);
2390
187M
}
2391
2392
void
2393
_Py_ScheduleGC(PyThreadState *tstate)
2394
25.9M
{
2395
25.9M
    if (!_Py_eval_breaker_bit_is_set(tstate, _PY_GC_SCHEDULED_BIT))
2396
95.0k
    {
2397
95.0k
        _Py_set_eval_breaker_bit(tstate, _PY_GC_SCHEDULED_BIT);
2398
95.0k
    }
2399
25.9M
}
2400
2401
void
2402
_Py_TriggerGC(struct _gc_runtime_state *gcstate)
2403
26.0M
{
2404
26.0M
    PyThreadState *tstate = _PyThreadState_GET();
2405
26.0M
    if (gcstate->enabled &&
2406
26.0M
        gcstate->young.threshold != 0 &&
2407
26.0M
        !_Py_atomic_load_int_relaxed(&gcstate->collecting) &&
2408
26.0M
        !_PyErr_Occurred(tstate))
2409
25.9M
    {
2410
25.9M
        _Py_ScheduleGC(tstate);
2411
25.9M
    }
2412
26.0M
}
2413
2414
void
2415
_PyObject_GC_Link(PyObject *op)
2416
640M
{
2417
640M
    PyGC_Head *gc = AS_GC(op);
2418
    // gc must be correctly aligned
2419
640M
    _PyObject_ASSERT(op, ((uintptr_t)gc & (sizeof(uintptr_t)-1)) == 0);
2420
640M
    gc->_gc_next = 0;
2421
640M
    gc->_gc_prev = 0;
2422
2423
640M
}
2424
2425
void
2426
_Py_RunGC(PyThreadState *tstate)
2427
95.0k
{
2428
95.0k
    if (tstate->interp->gc.enabled) {
2429
95.0k
        _PyGC_Collect(tstate, 1, _Py_GC_REASON_HEAP);
2430
95.0k
    }
2431
95.0k
}
2432
2433
static PyObject *
2434
gc_alloc(PyTypeObject *tp, size_t basicsize, size_t presize)
2435
514M
{
2436
514M
    PyThreadState *tstate = _PyThreadState_GET();
2437
514M
    if (basicsize > PY_SSIZE_T_MAX - presize) {
2438
0
        return _PyErr_NoMemory(tstate);
2439
0
    }
2440
514M
    size_t size = presize + basicsize;
2441
514M
    char *mem = _PyObject_MallocWithType(tp, size);
2442
514M
    if (mem == NULL) {
2443
0
        return _PyErr_NoMemory(tstate);
2444
0
    }
2445
514M
    ((PyObject **)mem)[0] = NULL;
2446
514M
    ((PyObject **)mem)[1] = NULL;
2447
514M
    PyObject *op = (PyObject *)(mem + presize);
2448
514M
    _PyObject_GC_Link(op);
2449
514M
    return op;
2450
514M
}
2451
2452
2453
PyObject *
2454
_PyObject_GC_New(PyTypeObject *tp)
2455
268M
{
2456
268M
    size_t presize = _PyType_PreHeaderSize(tp);
2457
268M
    size_t size = _PyObject_SIZE(tp);
2458
268M
    if (_PyType_HasFeature(tp, Py_TPFLAGS_INLINE_VALUES)) {
2459
216
        size += _PyInlineValuesSize(tp);
2460
216
    }
2461
268M
    PyObject *op = gc_alloc(tp, size, presize);
2462
268M
    if (op == NULL) {
2463
0
        return NULL;
2464
0
    }
2465
268M
    _PyObject_Init(op, tp);
2466
268M
    if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) {
2467
216
        _PyObject_InitInlineValues(op, tp);
2468
216
    }
2469
268M
    return op;
2470
268M
}
2471
2472
PyVarObject *
2473
_PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems)
2474
246M
{
2475
246M
    PyVarObject *op;
2476
2477
246M
    if (nitems < 0) {
2478
0
        PyErr_BadInternalCall();
2479
0
        return NULL;
2480
0
    }
2481
246M
    size_t presize = _PyType_PreHeaderSize(tp);
2482
246M
    size_t size = _PyObject_VAR_SIZE(tp, nitems);
2483
246M
    op = (PyVarObject *)gc_alloc(tp, size, presize);
2484
246M
    if (op == NULL) {
2485
0
        return NULL;
2486
0
    }
2487
246M
    _PyObject_InitVar(op, tp, nitems);
2488
246M
    return op;
2489
246M
}
2490
2491
PyObject *
2492
PyUnstable_Object_GC_NewWithExtraData(PyTypeObject *tp, size_t extra_size)
2493
0
{
2494
0
    size_t presize = _PyType_PreHeaderSize(tp);
2495
0
    size_t size = _PyObject_SIZE(tp) + extra_size;
2496
0
    PyObject *op = gc_alloc(tp, size, presize);
2497
0
    if (op == NULL) {
2498
0
        return NULL;
2499
0
    }
2500
0
    memset((char *)op + sizeof(PyObject), 0, size - sizeof(PyObject));
2501
0
    _PyObject_Init(op, tp);
2502
0
    return op;
2503
0
}
2504
2505
PyVarObject *
2506
_PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems)
2507
40
{
2508
40
    const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems);
2509
40
    const size_t presize = _PyType_PreHeaderSize(Py_TYPE(op));
2510
40
    _PyObject_ASSERT((PyObject *)op, !_PyObject_GC_IS_TRACKED(op));
2511
40
    if (basicsize > (size_t)PY_SSIZE_T_MAX - presize) {
2512
0
        return (PyVarObject *)PyErr_NoMemory();
2513
0
    }
2514
40
    char *mem = (char *)op - presize;
2515
40
    mem = (char *)_PyObject_ReallocWithType(Py_TYPE(op), mem, presize + basicsize);
2516
40
    if (mem == NULL) {
2517
0
        return (PyVarObject *)PyErr_NoMemory();
2518
0
    }
2519
40
    op = (PyVarObject *) (mem + presize);
2520
40
    Py_SET_SIZE(op, nitems);
2521
40
    return op;
2522
40
}
2523
2524
void
2525
PyObject_GC_Del(void *op)
2526
639M
{
2527
639M
    size_t presize = _PyType_PreHeaderSize(Py_TYPE(op));
2528
639M
    PyGC_Head *g = AS_GC(op);
2529
639M
    if (_PyObject_GC_IS_TRACKED(op)) {
2530
0
        gc_list_remove(g);
2531
0
        GCState *gcstate = get_gc_state();
2532
0
        if (gcstate->young.count > 0) {
2533
0
            gcstate->young.count--;
2534
0
        }
2535
0
        gcstate->heap_size--;
2536
#ifdef Py_DEBUG
2537
        PyObject *exc = PyErr_GetRaisedException();
2538
        if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
2539
                                     "gc", NULL,
2540
                                     "Object of type %s is not untracked "
2541
                                     "before destruction",
2542
                                     Py_TYPE(op)->tp_name))
2543
        {
2544
            PyErr_FormatUnraisable("Exception ignored on object deallocation");
2545
        }
2546
        PyErr_SetRaisedException(exc);
2547
#endif
2548
0
    }
2549
639M
    PyObject_Free(((char *)op)-presize);
2550
639M
}
2551
2552
int
2553
PyObject_GC_IsTracked(PyObject* obj)
2554
9.23k
{
2555
9.23k
    if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj)) {
2556
32
        return 1;
2557
32
    }
2558
9.20k
    return 0;
2559
9.23k
}
2560
2561
int
2562
PyObject_GC_IsFinalized(PyObject *obj)
2563
0
{
2564
0
    if (_PyObject_IS_GC(obj) && _PyGC_FINALIZED(obj)) {
2565
0
         return 1;
2566
0
    }
2567
0
    return 0;
2568
0
}
2569
2570
static int
2571
visit_generation(gcvisitobjects_t callback, void *arg, struct gc_generation *gen)
2572
0
{
2573
0
    PyGC_Head *gc_list, *gc;
2574
0
    gc_list = &gen->head;
2575
0
    for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
2576
0
        PyObject *op = FROM_GC(gc);
2577
0
        Py_INCREF(op);
2578
0
        int res = callback(op, arg);
2579
0
        Py_DECREF(op);
2580
0
        if (!res) {
2581
0
            return -1;
2582
0
        }
2583
0
    }
2584
0
    return 0;
2585
0
}
2586
2587
void
2588
PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
2589
0
{
2590
0
    GCState *gcstate = get_gc_state();
2591
0
    int original_state = gcstate->enabled;
2592
0
    gcstate->enabled = 0;
2593
0
    if (visit_generation(callback, arg, &gcstate->young) < 0) {
2594
0
        goto done;
2595
0
    }
2596
0
    if (visit_generation(callback, arg, &gcstate->old[0]) < 0) {
2597
0
        goto done;
2598
0
    }
2599
0
    if (visit_generation(callback, arg, &gcstate->old[1]) < 0) {
2600
0
        goto done;
2601
0
    }
2602
0
    visit_generation(callback, arg, &gcstate->permanent_generation);
2603
0
done:
2604
0
    gcstate->enabled = original_state;
2605
0
}
2606
2607
#endif  // Py_GIL_DISABLED