Coverage Report

Created: 2025-11-02 06:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/gc.c
Line
Count
Source
1
//  This implements the reference cycle garbage collector.
2
//  The Python module interface to the collector is in gcmodule.c.
3
//  See InternalDocs/garbage_collector.md for more infromation.
4
5
#include "Python.h"
6
#include "pycore_ceval.h"         // _Py_set_eval_breaker_bit()
7
#include "pycore_dict.h"          // _PyInlineValuesSize()
8
#include "pycore_initconfig.h"    // _PyStatus_OK()
9
#include "pycore_interp.h"        // PyInterpreterState.gc
10
#include "pycore_interpframe.h"   // _PyFrame_GetLocalsArray()
11
#include "pycore_object_alloc.h"  // _PyObject_MallocWithType()
12
#include "pycore_pystate.h"       // _PyThreadState_GET()
13
#include "pycore_tuple.h"         // _PyTuple_MaybeUntrack()
14
#include "pycore_weakref.h"       // _PyWeakref_ClearRef()
15
16
#include "pydtrace.h"
17
18
19
#ifndef Py_GIL_DISABLED
20
21
typedef struct _gc_runtime_state GCState;
22
23
#ifdef Py_DEBUG
24
#  define GC_DEBUG
25
#endif
26
27
// Define this when debugging the GC
28
// #define GC_EXTRA_DEBUG
29
30
31
236M
#define GC_NEXT _PyGCHead_NEXT
32
82.3M
#define GC_PREV _PyGCHead_PREV
33
34
// update_refs() set this bit for all objects in current generation.
35
// subtract_refs() and move_unreachable() uses this to distinguish
36
// visited object is in GCing or not.
37
//
38
// move_unreachable() removes this flag from reachable objects.
39
// Only unreachable objects have this flag.
40
//
41
// No objects in interpreter have this flag after GC ends.
42
108M
#define PREV_MASK_COLLECTING   _PyGC_PREV_MASK_COLLECTING
43
44
// Lowest bit of _gc_next is used for UNREACHABLE flag.
45
//
46
// This flag represents the object is in unreachable list in move_unreachable()
47
//
48
// Although this flag is used only in move_unreachable(), move_unreachable()
49
// doesn't clear this flag to skip unnecessary iteration.
50
// move_legacy_finalizers() removes this flag instead.
51
// Between them, unreachable list is not normal list and we can not use
52
// most gc_list_* functions for it.
53
19.4M
#define NEXT_MASK_UNREACHABLE  2
54
55
877M
#define AS_GC(op) _Py_AS_GC(op)
56
170M
#define FROM_GC(gc) _Py_FROM_GC(gc)
57
58
// Automatically choose the generation that needs collecting.
59
#define GENERATION_AUTO (-1)
60
61
static inline int
62
gc_is_collecting(PyGC_Head *g)
63
63.9M
{
64
63.9M
    return (g->_gc_prev & PREV_MASK_COLLECTING) != 0;
65
63.9M
}
66
67
static inline void
68
gc_clear_collecting(PyGC_Head *g)
69
21.9M
{
70
21.9M
    g->_gc_prev &= ~PREV_MASK_COLLECTING;
71
21.9M
}
72
73
static inline Py_ssize_t
74
gc_get_refs(PyGC_Head *g)
75
53.9M
{
76
53.9M
    return (Py_ssize_t)(g->_gc_prev >> _PyGC_PREV_SHIFT);
77
53.9M
}
78
79
static inline void
80
gc_set_refs(PyGC_Head *g, Py_ssize_t refs)
81
14.5M
{
82
14.5M
    g->_gc_prev = (g->_gc_prev & ~_PyGC_PREV_MASK)
83
14.5M
        | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
84
14.5M
}
85
86
static inline void
87
gc_reset_refs(PyGC_Head *g, Py_ssize_t refs)
88
22.4M
{
89
22.4M
    g->_gc_prev = (g->_gc_prev & _PyGC_PREV_MASK_FINALIZED)
90
22.4M
        | PREV_MASK_COLLECTING
91
22.4M
        | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
92
22.4M
}
93
94
static inline void
95
gc_decref(PyGC_Head *g)
96
17.9M
{
97
17.9M
    _PyObject_ASSERT_WITH_MSG(FROM_GC(g),
98
17.9M
                              gc_get_refs(g) > 0,
99
17.9M
                              "refcount is too small");
100
17.9M
    g->_gc_prev -= 1 << _PyGC_PREV_SHIFT;
101
17.9M
}
102
103
static inline int
104
gc_old_space(PyGC_Head *g)
105
65.4M
{
106
65.4M
    return g->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1;
107
65.4M
}
108
109
static inline int
110
other_space(int space)
111
849
{
112
849
    assert(space == 0 || space == 1);
113
849
    return space ^ _PyGC_NEXT_MASK_OLD_SPACE_1;
114
849
}
115
116
static inline void
117
gc_flip_old_space(PyGC_Head *g)
118
37.2M
{
119
37.2M
    g->_gc_next ^= _PyGC_NEXT_MASK_OLD_SPACE_1;
120
37.2M
}
121
122
static inline void
123
gc_set_old_space(PyGC_Head *g, int space)
124
20.9M
{
125
20.9M
    assert(space == 0 || space == _PyGC_NEXT_MASK_OLD_SPACE_1);
126
20.9M
    g->_gc_next &= ~_PyGC_NEXT_MASK_OLD_SPACE_1;
127
20.9M
    g->_gc_next |= space;
128
20.9M
}
129
130
static PyGC_Head *
131
GEN_HEAD(GCState *gcstate, int n)
132
0
{
133
0
    assert((gcstate->visited_space & (~1)) == 0);
134
0
    switch(n) {
135
0
        case 0:
136
0
            return &gcstate->young.head;
137
0
        case 1:
138
0
            return &gcstate->old[gcstate->visited_space].head;
139
0
        case 2:
140
0
            return &gcstate->old[gcstate->visited_space^1].head;
141
0
        default:
142
0
            Py_UNREACHABLE();
143
0
    }
144
0
}
145
146
static GCState *
147
get_gc_state(void)
148
10.9k
{
149
10.9k
    PyInterpreterState *interp = _PyInterpreterState_GET();
150
10.9k
    return &interp->gc;
151
10.9k
}
152
153
154
void
155
_PyGC_InitState(GCState *gcstate)
156
16
{
157
16
#define INIT_HEAD(GEN) \
158
64
    do { \
159
64
        GEN.head._gc_next = (uintptr_t)&GEN.head; \
160
64
        GEN.head._gc_prev = (uintptr_t)&GEN.head; \
161
64
    } while (0)
162
163
16
    assert(gcstate->young.count == 0);
164
16
    assert(gcstate->old[0].count == 0);
165
16
    assert(gcstate->old[1].count == 0);
166
16
    INIT_HEAD(gcstate->young);
167
16
    INIT_HEAD(gcstate->old[0]);
168
16
    INIT_HEAD(gcstate->old[1]);
169
16
    INIT_HEAD(gcstate->permanent_generation);
170
171
16
#undef INIT_HEAD
172
16
}
173
174
175
PyStatus
176
_PyGC_Init(PyInterpreterState *interp)
177
16
{
178
16
    GCState *gcstate = &interp->gc;
179
180
16
    gcstate->garbage = PyList_New(0);
181
16
    if (gcstate->garbage == NULL) {
182
0
        return _PyStatus_NO_MEMORY();
183
0
    }
184
185
16
    gcstate->callbacks = PyList_New(0);
186
16
    if (gcstate->callbacks == NULL) {
187
0
        return _PyStatus_NO_MEMORY();
188
0
    }
189
16
    gcstate->heap_size = 0;
190
191
16
    return _PyStatus_OK();
192
16
}
193
194
195
/*
196
_gc_prev values
197
---------------
198
199
Between collections, _gc_prev is used for doubly linked list.
200
201
Lowest two bits of _gc_prev are used for flags.
202
PREV_MASK_COLLECTING is used only while collecting and cleared before GC ends
203
or _PyObject_GC_UNTRACK() is called.
204
205
During a collection, _gc_prev is temporary used for gc_refs, and the gc list
206
is singly linked until _gc_prev is restored.
207
208
gc_refs
209
    At the start of a collection, update_refs() copies the true refcount
210
    to gc_refs, for each object in the generation being collected.
211
    subtract_refs() then adjusts gc_refs so that it equals the number of
212
    times an object is referenced directly from outside the generation
213
    being collected.
214
215
PREV_MASK_COLLECTING
216
    Objects in generation being collected are marked PREV_MASK_COLLECTING in
217
    update_refs().
218
219
220
_gc_next values
221
---------------
222
223
_gc_next takes these values:
224
225
0
226
    The object is not tracked
227
228
!= 0
229
    Pointer to the next object in the GC list.
230
    Additionally, lowest bit is used temporary for
231
    NEXT_MASK_UNREACHABLE flag described below.
232
233
NEXT_MASK_UNREACHABLE
234
    move_unreachable() then moves objects not reachable (whether directly or
235
    indirectly) from outside the generation into an "unreachable" set and
236
    set this flag.
237
238
    Objects that are found to be reachable have gc_refs set to 1.
239
    When this flag is set for the reachable object, the object must be in
240
    "unreachable" set.
241
    The flag is unset and the object is moved back to "reachable" set.
242
243
    move_legacy_finalizers() will remove this flag from "unreachable" set.
244
*/
245
246
/*** list functions ***/
247
248
static inline void
249
gc_list_init(PyGC_Head *list)
250
155k
{
251
    // List header must not have flags.
252
    // We can assign pointer by simple cast.
253
155k
    list->_gc_prev = (uintptr_t)list;
254
155k
    list->_gc_next = (uintptr_t)list;
255
155k
}
256
257
static inline int
258
gc_list_is_empty(PyGC_Head *list)
259
39.0M
{
260
39.0M
    return (list->_gc_next == (uintptr_t)list);
261
39.0M
}
262
263
/* Append `node` to `list`. */
264
static inline void
265
gc_list_append(PyGC_Head *node, PyGC_Head *list)
266
2.13M
{
267
2.13M
    assert((list->_gc_prev & ~_PyGC_PREV_MASK) == 0);
268
2.13M
    PyGC_Head *last = (PyGC_Head *)list->_gc_prev;
269
270
    // last <-> node
271
2.13M
    _PyGCHead_SET_PREV(node, last);
272
2.13M
    _PyGCHead_SET_NEXT(last, node);
273
274
    // node <-> list
275
2.13M
    _PyGCHead_SET_NEXT(node, list);
276
2.13M
    list->_gc_prev = (uintptr_t)node;
277
2.13M
}
278
279
/* Remove `node` from the gc list it's currently in. */
280
static inline void
281
gc_list_remove(PyGC_Head *node)
282
0
{
283
0
    PyGC_Head *prev = GC_PREV(node);
284
0
    PyGC_Head *next = GC_NEXT(node);
285
286
0
    _PyGCHead_SET_NEXT(prev, next);
287
0
    _PyGCHead_SET_PREV(next, prev);
288
289
0
    node->_gc_next = 0; /* object is not currently tracked */
290
0
}
291
292
/* Move `node` from the gc list it's currently in (which is not explicitly
293
 * named here) to the end of `list`.  This is semantically the same as
294
 * gc_list_remove(node) followed by gc_list_append(node, list).
295
 */
296
static void
297
gc_list_move(PyGC_Head *node, PyGC_Head *list)
298
75.9M
{
299
    /* Unlink from current list. */
300
75.9M
    PyGC_Head *from_prev = GC_PREV(node);
301
75.9M
    PyGC_Head *from_next = GC_NEXT(node);
302
75.9M
    _PyGCHead_SET_NEXT(from_prev, from_next);
303
75.9M
    _PyGCHead_SET_PREV(from_next, from_prev);
304
305
    /* Relink at end of new list. */
306
    // list must not have flags.  So we can skip macros.
307
75.9M
    PyGC_Head *to_prev = (PyGC_Head*)list->_gc_prev;
308
75.9M
    _PyGCHead_SET_PREV(node, to_prev);
309
75.9M
    _PyGCHead_SET_NEXT(to_prev, node);
310
75.9M
    list->_gc_prev = (uintptr_t)node;
311
75.9M
    _PyGCHead_SET_NEXT(node, list);
312
75.9M
}
313
314
/* append list `from` onto list `to`; `from` becomes an empty list */
315
static void
316
gc_list_merge(PyGC_Head *from, PyGC_Head *to)
317
65.7k
{
318
65.7k
    assert(from != to);
319
65.7k
    if (!gc_list_is_empty(from)) {
320
35.2k
        PyGC_Head *to_tail = GC_PREV(to);
321
35.2k
        PyGC_Head *from_head = GC_NEXT(from);
322
35.2k
        PyGC_Head *from_tail = GC_PREV(from);
323
35.2k
        assert(from_head != from);
324
35.2k
        assert(from_tail != from);
325
35.2k
        assert(gc_list_is_empty(to) ||
326
35.2k
            gc_old_space(to_tail) == gc_old_space(from_tail));
327
328
35.2k
        _PyGCHead_SET_NEXT(to_tail, from_head);
329
35.2k
        _PyGCHead_SET_PREV(from_head, to_tail);
330
331
35.2k
        _PyGCHead_SET_NEXT(from_tail, to);
332
35.2k
        _PyGCHead_SET_PREV(to, from_tail);
333
35.2k
    }
334
65.7k
    gc_list_init(from);
335
65.7k
}
336
337
static Py_ssize_t
338
gc_list_size(PyGC_Head *list)
339
21.9k
{
340
21.9k
    PyGC_Head *gc;
341
21.9k
    Py_ssize_t n = 0;
342
21.7M
    for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
343
21.7M
        n++;
344
21.7M
    }
345
21.9k
    return n;
346
21.9k
}
347
348
/* Walk the list and mark all objects as non-collecting */
349
static inline void
350
gc_list_clear_collecting(PyGC_Head *collectable)
351
10.9k
{
352
10.9k
    PyGC_Head *gc;
353
1.07M
    for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) {
354
1.06M
        gc_clear_collecting(gc);
355
1.06M
    }
356
10.9k
}
357
358
/* Append objects in a GC list to a Python list.
359
 * Return 0 if all OK, < 0 if error (out of memory for list)
360
 */
361
static int
362
append_objects(PyObject *py_list, PyGC_Head *gc_list)
363
0
{
364
0
    PyGC_Head *gc;
365
0
    for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
366
0
        PyObject *op = FROM_GC(gc);
367
0
        if (op != py_list) {
368
0
            if (PyList_Append(py_list, op)) {
369
0
                return -1; /* exception */
370
0
            }
371
0
        }
372
0
    }
373
0
    return 0;
374
0
}
375
376
// Constants for validate_list's flags argument.
377
enum flagstates {collecting_clear_unreachable_clear,
378
                 collecting_clear_unreachable_set,
379
                 collecting_set_unreachable_clear,
380
                 collecting_set_unreachable_set};
381
382
#ifdef GC_DEBUG
383
// validate_list checks list consistency.  And it works as document
384
// describing when flags are expected to be set / unset.
385
// `head` must be a doubly-linked gc list, although it's fine (expected!) if
386
// the prev and next pointers are "polluted" with flags.
387
// What's checked:
388
// - The `head` pointers are not polluted.
389
// - The objects' PREV_MASK_COLLECTING and NEXT_MASK_UNREACHABLE flags are all
390
//   `set or clear, as specified by the 'flags' argument.
391
// - The prev and next pointers are mutually consistent.
392
static void
393
validate_list(PyGC_Head *head, enum flagstates flags)
394
{
395
    assert((head->_gc_prev & ~_PyGC_PREV_MASK) == 0);
396
    assert((head->_gc_next & ~_PyGC_PREV_MASK) == 0);
397
    uintptr_t prev_value = 0, next_value = 0;
398
    switch (flags) {
399
        case collecting_clear_unreachable_clear:
400
            break;
401
        case collecting_set_unreachable_clear:
402
            prev_value = PREV_MASK_COLLECTING;
403
            break;
404
        case collecting_clear_unreachable_set:
405
            next_value = NEXT_MASK_UNREACHABLE;
406
            break;
407
        case collecting_set_unreachable_set:
408
            prev_value = PREV_MASK_COLLECTING;
409
            next_value = NEXT_MASK_UNREACHABLE;
410
            break;
411
        default:
412
            assert(! "bad internal flags argument");
413
    }
414
    PyGC_Head *prev = head;
415
    PyGC_Head *gc = GC_NEXT(head);
416
    while (gc != head) {
417
        PyGC_Head *trueprev = GC_PREV(gc);
418
        PyGC_Head *truenext = GC_NEXT(gc);
419
        assert(truenext != NULL);
420
        assert(trueprev == prev);
421
        assert((gc->_gc_prev & PREV_MASK_COLLECTING) == prev_value);
422
        assert((gc->_gc_next & NEXT_MASK_UNREACHABLE) == next_value);
423
        prev = gc;
424
        gc = truenext;
425
    }
426
    assert(prev == GC_PREV(head));
427
}
428
429
#else
430
142k
#define validate_list(x, y) do{}while(0)
431
#endif
432
433
#ifdef GC_EXTRA_DEBUG
434
435
436
static void
437
gc_list_validate_space(PyGC_Head *head, int space) {
438
    PyGC_Head *gc = GC_NEXT(head);
439
    while (gc != head) {
440
        assert(gc_old_space(gc) == space);
441
        gc = GC_NEXT(gc);
442
    }
443
}
444
445
static void
446
validate_spaces(GCState *gcstate)
447
{
448
    int visited = gcstate->visited_space;
449
    int not_visited = other_space(visited);
450
    gc_list_validate_space(&gcstate->young.head, not_visited);
451
    for (int space = 0; space < 2; space++) {
452
        gc_list_validate_space(&gcstate->old[space].head, space);
453
    }
454
    gc_list_validate_space(&gcstate->permanent_generation.head, visited);
455
}
456
457
static void
458
validate_consistent_old_space(PyGC_Head *head)
459
{
460
    PyGC_Head *gc = GC_NEXT(head);
461
    if (gc == head) {
462
        return;
463
    }
464
    int old_space = gc_old_space(gc);
465
    while (gc != head) {
466
        PyGC_Head *truenext = GC_NEXT(gc);
467
        assert(truenext != NULL);
468
        assert(gc_old_space(gc) == old_space);
469
        gc = truenext;
470
    }
471
}
472
473
474
#else
475
43.2k
#define validate_spaces(g) do{}while(0)
476
54.7k
#define validate_consistent_old_space(l) do{}while(0)
477
56.4k
#define gc_list_validate_space(l, s) do{}while(0)
478
#endif
479
480
/*** end of list stuff ***/
481
482
483
/* Set all gc_refs = ob_refcnt.  After this, gc_refs is > 0 and
484
 * PREV_MASK_COLLECTING bit is set for all objects in containers.
485
 */
486
static void
487
update_refs(PyGC_Head *containers)
488
21.9k
{
489
21.9k
    PyGC_Head *next;
490
21.9k
    PyGC_Head *gc = GC_NEXT(containers);
491
492
22.5M
    while (gc != containers) {
493
22.4M
        next = GC_NEXT(gc);
494
22.4M
        PyObject *op = FROM_GC(gc);
495
22.4M
        if (_Py_IsImmortal(op)) {
496
0
            assert(!_Py_IsStaticImmortal(op));
497
0
            _PyObject_GC_UNTRACK(op);
498
0
           gc = next;
499
0
           continue;
500
0
        }
501
22.4M
        gc_reset_refs(gc, Py_REFCNT(op));
502
        /* Python's cyclic gc should never see an incoming refcount
503
         * of 0:  if something decref'ed to 0, it should have been
504
         * deallocated immediately at that time.
505
         * Possible cause (if the assert triggers):  a tp_dealloc
506
         * routine left a gc-aware object tracked during its teardown
507
         * phase, and did something-- or allowed something to happen --
508
         * that called back into Python.  gc can trigger then, and may
509
         * see the still-tracked dying object.  Before this assert
510
         * was added, such mistakes went on to allow gc to try to
511
         * delete the object again.  In a debug build, that caused
512
         * a mysterious segfault, when _Py_ForgetReference tried
513
         * to remove the object from the doubly-linked list of all
514
         * objects a second time.  In a release build, an actual
515
         * double deallocation occurred, which leads to corruption
516
         * of the allocator's internal bookkeeping pointers.  That's
517
         * so serious that maybe this should be a release-build
518
         * check instead of an assert?
519
         */
520
22.4M
        _PyObject_ASSERT(op, gc_get_refs(gc) != 0);
521
22.4M
        gc = next;
522
22.4M
    }
523
21.9k
}
524
525
/* A traversal callback for subtract_refs. */
526
static int
527
visit_decref(PyObject *op, void *parent)
528
71.8M
{
529
71.8M
    OBJECT_STAT_INC(object_visits);
530
71.8M
    _PyObject_ASSERT(_PyObject_CAST(parent), !_PyObject_IsFreed(op));
531
532
71.8M
    if (_PyObject_IS_GC(op)) {
533
34.6M
        PyGC_Head *gc = AS_GC(op);
534
        /* We're only interested in gc_refs for objects in the
535
         * generation being collected, which can be recognized
536
         * because only they have positive gc_refs.
537
         */
538
34.6M
        if (gc_is_collecting(gc)) {
539
17.9M
            gc_decref(gc);
540
17.9M
        }
541
34.6M
    }
542
71.8M
    return 0;
543
71.8M
}
544
545
int
546
_PyGC_VisitStackRef(_PyStackRef *ref, visitproc visit, void *arg)
547
2.02M
{
548
    // This is a bit tricky! We want to ignore stackrefs with embedded
549
    // refcounts when computing the incoming references, but otherwise treat
550
    // them like normal.
551
2.02M
    assert(!PyStackRef_IsTaggedInt(*ref));
552
2.02M
    if (!PyStackRef_RefcountOnObject(*ref) && (visit == visit_decref)) {
553
7.90k
        return 0;
554
7.90k
    }
555
2.01M
    Py_VISIT(PyStackRef_AsPyObjectBorrow(*ref));
556
2.01M
    return 0;
557
2.01M
}
558
559
int
560
_PyGC_VisitFrameStack(_PyInterpreterFrame *frame, visitproc visit, void *arg)
561
384k
{
562
384k
    _PyStackRef *ref = _PyFrame_GetLocalsArray(frame);
563
    /* locals and stack */
564
2.32M
    for (; ref < frame->stackpointer; ref++) {
565
1.93M
        if (!PyStackRef_IsTaggedInt(*ref)) {
566
1.93M
            _Py_VISIT_STACKREF(*ref);
567
1.93M
        }
568
1.93M
    }
569
384k
    return 0;
570
384k
}
571
572
/* Subtract internal references from gc_refs.  After this, gc_refs is >= 0
573
 * for all objects in containers. The ones with gc_refs > 0 are directly
574
 * reachable from outside containers, and so can't be collected.
575
 */
576
static void
577
subtract_refs(PyGC_Head *containers)
578
21.9k
{
579
21.9k
    traverseproc traverse;
580
21.9k
    PyGC_Head *gc = GC_NEXT(containers);
581
22.5M
    for (; gc != containers; gc = GC_NEXT(gc)) {
582
22.4M
        PyObject *op = FROM_GC(gc);
583
22.4M
        traverse = Py_TYPE(op)->tp_traverse;
584
22.4M
        (void) traverse(op,
585
22.4M
                        visit_decref,
586
22.4M
                        op);
587
22.4M
    }
588
21.9k
}
589
590
/* A traversal callback for move_unreachable. */
591
static int
592
visit_reachable(PyObject *op, void *arg)
593
64.4M
{
594
64.4M
    PyGC_Head *reachable = arg;
595
64.4M
    OBJECT_STAT_INC(object_visits);
596
64.4M
    if (!_PyObject_IS_GC(op)) {
597
35.1M
        return 0;
598
35.1M
    }
599
600
29.3M
    PyGC_Head *gc = AS_GC(op);
601
29.3M
    const Py_ssize_t gc_refs = gc_get_refs(gc);
602
603
    // Ignore objects in other generation.
604
    // This also skips objects "to the left" of the current position in
605
    // move_unreachable's scan of the 'young' list - they've already been
606
    // traversed, and no longer have the PREV_MASK_COLLECTING flag.
607
29.3M
    if (! gc_is_collecting(gc)) {
608
14.1M
        return 0;
609
14.1M
    }
610
    // It would be a logic error elsewhere if the collecting flag were set on
611
    // an untracked object.
612
15.1M
    _PyObject_ASSERT(op, gc->_gc_next != 0);
613
614
15.1M
    if (gc->_gc_next & NEXT_MASK_UNREACHABLE) {
615
        /* This had gc_refs = 0 when move_unreachable got
616
         * to it, but turns out it's reachable after all.
617
         * Move it back to move_unreachable's 'young' list,
618
         * and move_unreachable will eventually get to it
619
         * again.
620
         */
621
        // Manually unlink gc from unreachable list because the list functions
622
        // don't work right in the presence of NEXT_MASK_UNREACHABLE flags.
623
2.13M
        PyGC_Head *prev = GC_PREV(gc);
624
2.13M
        PyGC_Head *next = GC_NEXT(gc);
625
2.13M
        _PyObject_ASSERT(FROM_GC(prev),
626
2.13M
                         prev->_gc_next & NEXT_MASK_UNREACHABLE);
627
2.13M
        _PyObject_ASSERT(FROM_GC(next),
628
2.13M
                         next->_gc_next & NEXT_MASK_UNREACHABLE);
629
2.13M
        prev->_gc_next = gc->_gc_next;  // copy flag bits
630
2.13M
        gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
631
2.13M
        _PyGCHead_SET_PREV(next, prev);
632
633
2.13M
        gc_list_append(gc, reachable);
634
2.13M
        gc_set_refs(gc, 1);
635
2.13M
    }
636
12.9M
    else if (gc_refs == 0) {
637
        /* This is in move_unreachable's 'young' list, but
638
         * the traversal hasn't yet gotten to it.  All
639
         * we need to do is tell move_unreachable that it's
640
         * reachable.
641
         */
642
12.4M
        gc_set_refs(gc, 1);
643
12.4M
    }
644
    /* Else there's nothing to do.
645
     * If gc_refs > 0, it must be in move_unreachable's 'young'
646
     * list, and move_unreachable will eventually get to it.
647
     */
648
583k
    else {
649
583k
        _PyObject_ASSERT_WITH_MSG(op, gc_refs > 0, "refcount is too small");
650
583k
    }
651
15.1M
    return 0;
652
29.3M
}
653
654
/* Move the unreachable objects from young to unreachable.  After this,
655
 * all objects in young don't have PREV_MASK_COLLECTING flag and
656
 * unreachable have the flag.
657
 * All objects in young after this are directly or indirectly reachable
658
 * from outside the original young; and all objects in unreachable are
659
 * not.
660
 *
661
 * This function restores _gc_prev pointer.  young and unreachable are
662
 * doubly linked list after this function.
663
 * But _gc_next in unreachable list has NEXT_MASK_UNREACHABLE flag.
664
 * So we can not gc_list_* functions for unreachable until we remove the flag.
665
 */
666
static void
667
move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
668
21.9k
{
669
    // previous elem in the young list, used for restore gc_prev.
670
21.9k
    PyGC_Head *prev = young;
671
21.9k
    PyGC_Head *gc = GC_NEXT(young);
672
673
    /* Invariants:  all objects "to the left" of us in young are reachable
674
     * (directly or indirectly) from outside the young list as it was at entry.
675
     *
676
     * All other objects from the original young "to the left" of us are in
677
     * unreachable now, and have NEXT_MASK_UNREACHABLE.  All objects to the
678
     * left of us in 'young' now have been scanned, and no objects here
679
     * or to the right have been scanned yet.
680
     */
681
682
21.9k
    validate_consistent_old_space(young);
683
    /* Record which old space we are in, and set NEXT_MASK_UNREACHABLE bit for convenience */
684
21.9k
    uintptr_t flags = NEXT_MASK_UNREACHABLE | (gc->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1);
685
24.6M
    while (gc != young) {
686
24.6M
        if (gc_get_refs(gc)) {
687
            /* gc is definitely reachable from outside the
688
             * original 'young'.  Mark it as such, and traverse
689
             * its pointers to find any other objects that may
690
             * be directly reachable from it.  Note that the
691
             * call to tp_traverse may append objects to young,
692
             * so we have to wait until it returns to determine
693
             * the next object to visit.
694
             */
695
20.3M
            PyObject *op = FROM_GC(gc);
696
20.3M
            traverseproc traverse = Py_TYPE(op)->tp_traverse;
697
20.3M
            _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(gc) > 0,
698
20.3M
                                      "refcount is too small");
699
            // NOTE: visit_reachable may change gc->_gc_next when
700
            // young->_gc_prev == gc.  Don't do gc = GC_NEXT(gc) before!
701
20.3M
            (void) traverse(op,
702
20.3M
                    visit_reachable,
703
20.3M
                    (void *)young);
704
            // relink gc_prev to prev element.
705
20.3M
            _PyGCHead_SET_PREV(gc, prev);
706
            // gc is not COLLECTING state after here.
707
20.3M
            gc_clear_collecting(gc);
708
20.3M
            prev = gc;
709
20.3M
        }
710
4.25M
        else {
711
            /* This *may* be unreachable.  To make progress,
712
             * assume it is.  gc isn't directly reachable from
713
             * any object we've already traversed, but may be
714
             * reachable from an object we haven't gotten to yet.
715
             * visit_reachable will eventually move gc back into
716
             * young if that's so, and we'll see it again.
717
             */
718
            // Move gc to unreachable.
719
            // No need to gc->next->prev = prev because it is single linked.
720
4.25M
            prev->_gc_next = gc->_gc_next;
721
722
            // We can't use gc_list_append() here because we use
723
            // NEXT_MASK_UNREACHABLE here.
724
4.25M
            PyGC_Head *last = GC_PREV(unreachable);
725
            // NOTE: Since all objects in unreachable set has
726
            // NEXT_MASK_UNREACHABLE flag, we set it unconditionally.
727
            // But this may pollute the unreachable list head's 'next' pointer
728
            // too. That's semantically senseless but expedient here - the
729
            // damage is repaired when this function ends.
730
4.25M
            last->_gc_next = flags | (uintptr_t)gc;
731
4.25M
            _PyGCHead_SET_PREV(gc, last);
732
4.25M
            gc->_gc_next = flags | (uintptr_t)unreachable;
733
4.25M
            unreachable->_gc_prev = (uintptr_t)gc;
734
4.25M
        }
735
24.6M
        gc = _PyGCHead_NEXT(prev);
736
24.6M
    }
737
    // young->_gc_prev must be last element remained in the list.
738
21.9k
    young->_gc_prev = (uintptr_t)prev;
739
21.9k
    young->_gc_next &= _PyGC_PREV_MASK;
740
    // don't let the pollution of the list head's next pointer leak
741
21.9k
    unreachable->_gc_next &= _PyGC_PREV_MASK;
742
21.9k
}
743
744
/* In theory, all tuples should be younger than the
745
* objects they refer to, as tuples are immortal.
746
* Therefore, untracking tuples in oldest-first order in the
747
* young generation before promoting them should have tracked
748
* all the tuples that can be untracked.
749
*
750
* Unfortunately, the C API allows tuples to be created
751
* and then filled in. So this won't untrack all tuples
752
* that can be untracked. It should untrack most of them
753
* and is much faster than a more complex approach that
754
* would untrack all relevant tuples.
755
*/
756
static void
757
untrack_tuples(PyGC_Head *head)
758
22.7k
{
759
22.7k
    PyGC_Head *gc = GC_NEXT(head);
760
62.2M
    while (gc != head) {
761
62.1M
        PyObject *op = FROM_GC(gc);
762
62.1M
        PyGC_Head *next = GC_NEXT(gc);
763
62.1M
        if (PyTuple_CheckExact(op)) {
764
2.66M
            _PyTuple_MaybeUntrack(op);
765
2.66M
        }
766
62.1M
        gc = next;
767
62.1M
    }
768
22.7k
}
769
770
/* Return true if object has a pre-PEP 442 finalization method. */
771
static int
772
has_legacy_finalizer(PyObject *op)
773
1.06M
{
774
1.06M
    return Py_TYPE(op)->tp_del != NULL;
775
1.06M
}
776
777
/* Move the objects in unreachable with tp_del slots into `finalizers`.
778
 *
779
 * This function also removes NEXT_MASK_UNREACHABLE flag
780
 * from _gc_next in unreachable.
781
 */
782
static void
783
move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
784
10.9k
{
785
10.9k
    PyGC_Head *gc, *next;
786
10.9k
    _PyObject_ASSERT(
787
10.9k
        FROM_GC(unreachable),
788
10.9k
        (unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
789
790
    /* March over unreachable.  Move objects with finalizers into
791
     * `finalizers`.
792
     */
793
1.07M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
794
1.06M
        PyObject *op = FROM_GC(gc);
795
796
1.06M
        _PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE);
797
1.06M
        next = GC_NEXT(gc);
798
1.06M
        gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
799
800
1.06M
        if (has_legacy_finalizer(op)) {
801
0
            gc_clear_collecting(gc);
802
0
            gc_list_move(gc, finalizers);
803
0
        }
804
1.06M
    }
805
10.9k
}
806
807
static inline void
808
clear_unreachable_mask(PyGC_Head *unreachable)
809
10.9k
{
810
    /* Check that the list head does not have the unreachable bit set */
811
10.9k
    _PyObject_ASSERT(
812
10.9k
        FROM_GC(unreachable),
813
10.9k
        ((uintptr_t)unreachable & NEXT_MASK_UNREACHABLE) == 0);
814
10.9k
    _PyObject_ASSERT(
815
10.9k
        FROM_GC(unreachable),
816
10.9k
        (unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
817
818
10.9k
    PyGC_Head *gc, *next;
819
1.07M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
820
1.06M
        _PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE);
821
1.06M
        next = GC_NEXT(gc);
822
1.06M
        gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
823
1.06M
    }
824
10.9k
    validate_list(unreachable, collecting_set_unreachable_clear);
825
10.9k
}
826
827
/* A traversal callback for move_legacy_finalizer_reachable. */
828
static int
829
visit_move(PyObject *op, void *arg)
830
0
{
831
0
    PyGC_Head *tolist = arg;
832
0
    OBJECT_STAT_INC(object_visits);
833
0
    if (_PyObject_IS_GC(op)) {
834
0
        PyGC_Head *gc = AS_GC(op);
835
0
        if (gc_is_collecting(gc)) {
836
0
            gc_list_move(gc, tolist);
837
0
            gc_clear_collecting(gc);
838
0
        }
839
0
    }
840
0
    return 0;
841
0
}
842
843
/* Move objects that are reachable from finalizers, from the unreachable set
844
 * into finalizers set.
845
 */
846
static void
847
move_legacy_finalizer_reachable(PyGC_Head *finalizers)
848
10.9k
{
849
10.9k
    traverseproc traverse;
850
10.9k
    PyGC_Head *gc = GC_NEXT(finalizers);
851
10.9k
    for (; gc != finalizers; gc = GC_NEXT(gc)) {
852
        /* Note that the finalizers list may grow during this. */
853
0
        traverse = Py_TYPE(FROM_GC(gc))->tp_traverse;
854
0
        (void) traverse(FROM_GC(gc),
855
0
                        visit_move,
856
0
                        (void *)finalizers);
857
0
    }
858
10.9k
}
859
860
/* Handle weakref callbacks.  Note that it's possible for such weakrefs to be
861
 * outside the unreachable set -- indeed, those are precisely the weakrefs
862
 * whose callbacks must be invoked.  See gc_weakref.txt for overview & some
863
 * details.
864
 *
865
 * The clearing of weakrefs is suble and must be done carefully, as there was
866
 * previous bugs related to this.  First, weakrefs to the unreachable set of
867
 * objects must be cleared before we start calling `tp_clear`.  If we don't,
868
 * those weakrefs can reveal unreachable objects to Python-level code and that
869
 * is not safe.  Many objects are not usable after `tp_clear` has been called
870
 * and could even cause crashes if accessed (see bpo-38006 and gh-91636 as
871
 * example bugs).
872
 *
873
 * Weakrefs with callbacks always need to be cleared before executing the
874
 * callback.  That's because sometimes a callback will call the ref object,
875
 * to check if the reference is actually dead (KeyedRef does this, for
876
 * example).  We want to indicate that it is dead, even though it is possible
877
 * a finalizer might resurrect it.  Clearing also prevents the callback from
878
 * executing more than once.
879
 *
880
 * Since Python 2.3, all weakrefs to cyclic garbage have been cleared *before*
881
 * calling finalizers.  However, since tp_subclasses started being necessary
882
 * to invalidate caches (e.g. by PyType_Modified), that clearing has created
883
 * a bug.  If the weakref to the subclass is cleared before a finalizer is
884
 * called, the cache may not be correctly invalidated.  That can lead to
885
 * segfaults since the caches can refer to deallocated objects (GH-135552
886
 * is an example).  Now, we delay the clear of weakrefs without callbacks
887
 * until *after* finalizers have been executed.  That means weakrefs without
888
 * callbacks are still usable while finalizers are being executed.
889
 *
890
 * The weakrefs that are inside the unreachable set must also be cleared.
891
 * The reason this is required is not immediately obvious.  If the weakref
892
 * refers to an object outside of the unreachable set, that object might go
893
 * away when we start clearing objects.  Normally, the object should also be
894
 * part of the unreachable set but that's not true in the case of incomplete
895
 * or missing `tp_traverse` methods.  When that object goes away, the callback
896
 * for weakref can be executed and that could reveal unreachable objects to
897
 * Python-level code.  See bpo-38006 as an example bug.
898
 */
899
static int
900
handle_weakref_callbacks(PyGC_Head *unreachable, PyGC_Head *old)
901
10.9k
{
902
10.9k
    PyGC_Head *gc;
903
10.9k
    PyGC_Head wrcb_to_call;     /* weakrefs with callbacks to call */
904
10.9k
    PyGC_Head *next;
905
10.9k
    int num_freed = 0;
906
907
10.9k
    gc_list_init(&wrcb_to_call);
908
909
    /* Find all weakrefs with callbacks and move into `wrcb_to_call` if the
910
     * callback needs to be invoked. We make another pass over wrcb_to_call,
911
     * invoking callbacks, after this pass completes.
912
     */
913
1.07M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
914
1.06M
        PyWeakReference **wrlist;
915
916
1.06M
        PyObject *op = FROM_GC(gc);
917
1.06M
        next = GC_NEXT(gc);
918
919
1.06M
        if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) {
920
795k
            continue;
921
795k
        }
922
923
        /* It supports weakrefs.  Does it have any?
924
         *
925
         * This is never triggered for static types so we can avoid the
926
         * (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR().
927
         */
928
265k
        wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op);
929
930
        /* `op` may have some weakrefs.  March over the list and move the
931
         * weakrefs with callbacks that must be called into wrcb_to_call.
932
         */
933
265k
        PyWeakReference *next_wr;
934
530k
        for (PyWeakReference *wr = *wrlist; wr != NULL; wr = next_wr) {
935
            // Get the next list element to get iterator progress if we omit
936
            // clearing of the weakref (because _PyWeakref_ClearRef changes
937
            // next pointer in the wrlist).
938
265k
            next_wr = wr->wr_next;
939
940
265k
            if (wr->wr_callback == NULL) {
941
                /* no callback */
942
265k
                continue;
943
265k
            }
944
945
            // Clear the weakref.  See the comments above this function for
946
            // reasons why we need to clear weakrefs that have callbacks.
947
            // Note that _PyWeakref_ClearRef clears the weakref but leaves the
948
            // callback pointer intact.  Obscure: it also changes *wrlist.
949
0
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
950
0
            _PyWeakref_ClearRef(wr);
951
0
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
952
953
            /* Headache time.  `op` is going away, and is weakly referenced by
954
             * `wr`, which has a callback.  Should the callback be invoked?  If wr
955
             * is also trash, no:
956
             *
957
             * 1. There's no need to call it.  The object and the weakref are
958
             *    both going away, so it's legitimate to pretend the weakref is
959
             *    going away first.  The user has to ensure a weakref outlives its
960
             *    referent if they want a guarantee that the wr callback will get
961
             *    invoked.
962
             *
963
             * 2. It may be catastrophic to call it.  If the callback is also in
964
             *    cyclic trash (CT), then although the CT is unreachable from
965
             *    outside the current generation, CT may be reachable from the
966
             *    callback.  Then the callback could resurrect insane objects.
967
             *
968
             * Since the callback is never needed and may be unsafe in this
969
             * case, wr is simply left in the unreachable set.  Note that
970
             * clear_weakrefs() will ensure its callback will not trigger
971
             * inside delete_garbage().
972
             *
973
             * OTOH, if wr isn't part of CT, we should invoke the callback:  the
974
             * weakref outlived the trash.  Note that since wr isn't CT in this
975
             * case, its callback can't be CT either -- wr acted as an external
976
             * root to this generation, and therefore its callback did too.  So
977
             * nothing in CT is reachable from the callback either, so it's hard
978
             * to imagine how calling it later could create a problem for us.  wr
979
             * is moved to wrcb_to_call in this case.
980
             */
981
0
            if (gc_is_collecting(AS_GC((PyObject *)wr))) {
982
0
                continue;
983
0
            }
984
985
            /* Create a new reference so that wr can't go away
986
             * before we can process it again.
987
             */
988
0
            Py_INCREF(wr);
989
990
            /* Move wr to wrcb_to_call, for the next pass. */
991
0
            PyGC_Head *wrasgc = AS_GC((PyObject *)wr);
992
            // wrasgc is reachable, but next isn't, so they can't be the same
993
0
            _PyObject_ASSERT((PyObject *)wr, wrasgc != next);
994
0
            gc_list_move(wrasgc, &wrcb_to_call);
995
0
        }
996
265k
    }
997
998
    /* Invoke the callbacks we decided to honor.  It's safe to invoke them
999
     * because they can't reference unreachable objects.
1000
     */
1001
10.9k
    int visited_space = get_gc_state()->visited_space;
1002
10.9k
    while (! gc_list_is_empty(&wrcb_to_call)) {
1003
0
        PyObject *temp;
1004
0
        PyObject *callback;
1005
1006
0
        gc = (PyGC_Head*)wrcb_to_call._gc_next;
1007
0
        PyObject *op = FROM_GC(gc);
1008
0
        _PyObject_ASSERT(op, PyWeakref_Check(op));
1009
0
        PyWeakReference *wr = (PyWeakReference *)op;
1010
0
        callback = wr->wr_callback;
1011
0
        _PyObject_ASSERT(op, callback != NULL);
1012
1013
        /* copy-paste of weakrefobject.c's handle_callback() */
1014
0
        temp = PyObject_CallOneArg(callback, (PyObject *)wr);
1015
0
        if (temp == NULL) {
1016
0
            PyErr_FormatUnraisable("Exception ignored on "
1017
0
                                   "calling weakref callback %R", callback);
1018
0
        }
1019
0
        else {
1020
0
            Py_DECREF(temp);
1021
0
        }
1022
1023
        /* Give up the reference we created in the first pass.  When
1024
         * op's refcount hits 0 (which it may or may not do right now),
1025
         * op's tp_dealloc will decref op->wr_callback too.  Note
1026
         * that the refcount probably will hit 0 now, and because this
1027
         * weakref was reachable to begin with, gc didn't already
1028
         * add it to its count of freed objects.  Example:  a reachable
1029
         * weak value dict maps some key to this reachable weakref.
1030
         * The callback removes this key->weakref mapping from the
1031
         * dict, leaving no other references to the weakref (excepting
1032
         * ours).
1033
         */
1034
0
        Py_DECREF(op);
1035
0
        if (wrcb_to_call._gc_next == (uintptr_t)gc) {
1036
            /* object is still alive -- move it */
1037
0
            gc_set_old_space(gc, visited_space);
1038
0
            gc_list_move(gc, old);
1039
0
        }
1040
0
        else {
1041
0
            ++num_freed;
1042
0
        }
1043
0
    }
1044
1045
10.9k
    return num_freed;
1046
10.9k
}
1047
1048
/* Clear all weakrefs to unreachable objects.  When this returns, no object in
1049
 * `unreachable` is weakly referenced anymore.  See the comments above
1050
 * handle_weakref_callbacks() for why these weakrefs need to be cleared.
1051
 */
1052
static void
1053
clear_weakrefs(PyGC_Head *unreachable)
1054
10.9k
{
1055
10.9k
    PyGC_Head *gc;
1056
10.9k
    PyGC_Head *next;
1057
1058
1.07M
    for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
1059
1.06M
        PyWeakReference **wrlist;
1060
1061
1.06M
        PyObject *op = FROM_GC(gc);
1062
1.06M
        next = GC_NEXT(gc);
1063
1064
1.06M
        if (PyWeakref_Check(op)) {
1065
            /* A weakref inside the unreachable set is always cleared. See
1066
             * the comments above handle_weakref_callbacks() for why these
1067
             * must be cleared.
1068
             */
1069
0
            _PyWeakref_ClearRef((PyWeakReference *)op);
1070
0
        }
1071
1072
1.06M
        if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) {
1073
795k
            continue;
1074
795k
        }
1075
1076
        /* It supports weakrefs.  Does it have any?
1077
         *
1078
         * This is never triggered for static types so we can avoid the
1079
         * (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR().
1080
         */
1081
265k
        wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op);
1082
1083
        /* `op` may have some weakrefs.  March over the list, clear
1084
         * all the weakrefs.
1085
         */
1086
530k
        for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) {
1087
            /* _PyWeakref_ClearRef clears the weakref but leaves
1088
             * the callback pointer intact.  Obscure:  it also
1089
             * changes *wrlist.
1090
             */
1091
265k
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
1092
265k
            _PyWeakref_ClearRef(wr);
1093
265k
            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
1094
265k
        }
1095
265k
    }
1096
10.9k
}
1097
1098
static void
1099
debug_cycle(const char *msg, PyObject *op)
1100
0
{
1101
0
    PySys_FormatStderr("gc: %s <%s %p>\n",
1102
0
                       msg, Py_TYPE(op)->tp_name, op);
1103
0
}
1104
1105
/* Handle uncollectable garbage (cycles with tp_del slots, and stuff reachable
1106
 * only from such cycles).
1107
 * If _PyGC_DEBUG_SAVEALL, all objects in finalizers are appended to the module
1108
 * garbage list (a Python list), else only the objects in finalizers with
1109
 * __del__ methods are appended to garbage.  All objects in finalizers are
1110
 * merged into the old list regardless.
1111
 */
1112
static void
1113
handle_legacy_finalizers(PyThreadState *tstate,
1114
                         GCState *gcstate,
1115
                         PyGC_Head *finalizers, PyGC_Head *old)
1116
10.9k
{
1117
10.9k
    assert(!_PyErr_Occurred(tstate));
1118
10.9k
    assert(gcstate->garbage != NULL);
1119
1120
10.9k
    PyGC_Head *gc = GC_NEXT(finalizers);
1121
10.9k
    for (; gc != finalizers; gc = GC_NEXT(gc)) {
1122
0
        PyObject *op = FROM_GC(gc);
1123
1124
0
        if ((gcstate->debug & _PyGC_DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
1125
0
            if (PyList_Append(gcstate->garbage, op) < 0) {
1126
0
                _PyErr_Clear(tstate);
1127
0
                break;
1128
0
            }
1129
0
        }
1130
0
    }
1131
1132
10.9k
    gc_list_merge(finalizers, old);
1133
10.9k
}
1134
1135
/* Run first-time finalizers (if any) on all the objects in collectable.
1136
 * Note that this may remove some (or even all) of the objects from the
1137
 * list, due to refcounts falling to 0.
1138
 */
1139
static void
1140
finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable)
1141
10.9k
{
1142
10.9k
    destructor finalize;
1143
10.9k
    PyGC_Head seen;
1144
1145
    /* While we're going through the loop, `finalize(op)` may cause op, or
1146
     * other objects, to be reclaimed via refcounts falling to zero.  So
1147
     * there's little we can rely on about the structure of the input
1148
     * `collectable` list across iterations.  For safety, we always take the
1149
     * first object in that list and move it to a temporary `seen` list.
1150
     * If objects vanish from the `collectable` and `seen` lists we don't
1151
     * care.
1152
     */
1153
10.9k
    gc_list_init(&seen);
1154
1155
1.07M
    while (!gc_list_is_empty(collectable)) {
1156
1.06M
        PyGC_Head *gc = GC_NEXT(collectable);
1157
1.06M
        PyObject *op = FROM_GC(gc);
1158
1.06M
        gc_list_move(gc, &seen);
1159
1.06M
        if (!_PyGC_FINALIZED(op) &&
1160
1.06M
            (finalize = Py_TYPE(op)->tp_finalize) != NULL)
1161
0
        {
1162
0
            _PyGC_SET_FINALIZED(op);
1163
0
            Py_INCREF(op);
1164
0
            finalize(op);
1165
0
            assert(!_PyErr_Occurred(tstate));
1166
0
            Py_DECREF(op);
1167
0
        }
1168
1.06M
    }
1169
10.9k
    gc_list_merge(&seen, collectable);
1170
10.9k
}
1171
1172
/* Break reference cycles by clearing the containers involved.  This is
1173
 * tricky business as the lists can be changing and we don't know which
1174
 * objects may be freed.  It is possible I screwed something up here.
1175
 */
1176
static void
1177
delete_garbage(PyThreadState *tstate, GCState *gcstate,
1178
               PyGC_Head *collectable, PyGC_Head *old)
1179
10.9k
{
1180
10.9k
    assert(!_PyErr_Occurred(tstate));
1181
1182
794k
    while (!gc_list_is_empty(collectable)) {
1183
783k
        PyGC_Head *gc = GC_NEXT(collectable);
1184
783k
        PyObject *op = FROM_GC(gc);
1185
1186
783k
        _PyObject_ASSERT_WITH_MSG(op, Py_REFCNT(op) > 0,
1187
783k
                                  "refcount is too small");
1188
1189
783k
        if (gcstate->debug & _PyGC_DEBUG_SAVEALL) {
1190
0
            assert(gcstate->garbage != NULL);
1191
0
            if (PyList_Append(gcstate->garbage, op) < 0) {
1192
0
                _PyErr_Clear(tstate);
1193
0
            }
1194
0
        }
1195
783k
        else {
1196
783k
            inquiry clear;
1197
783k
            if ((clear = Py_TYPE(op)->tp_clear) != NULL) {
1198
524k
                Py_INCREF(op);
1199
524k
                (void) clear(op);
1200
524k
                if (_PyErr_Occurred(tstate)) {
1201
0
                    PyErr_FormatUnraisable("Exception ignored in tp_clear of %s",
1202
0
                                           Py_TYPE(op)->tp_name);
1203
0
                }
1204
524k
                Py_DECREF(op);
1205
524k
            }
1206
783k
        }
1207
783k
        if (GC_NEXT(collectable) == gc) {
1208
            /* object is still alive, move it, it may die later */
1209
518k
            gc_clear_collecting(gc);
1210
518k
            gc_list_move(gc, old);
1211
518k
        }
1212
783k
    }
1213
10.9k
}
1214
1215
1216
/* Deduce which objects among "base" are unreachable from outside the list
1217
   and move them to 'unreachable'. The process consist in the following steps:
1218
1219
1. Copy all reference counts to a different field (gc_prev is used to hold
1220
   this copy to save memory).
1221
2. Traverse all objects in "base" and visit all referred objects using
1222
   "tp_traverse" and for every visited object, subtract 1 to the reference
1223
   count (the one that we copied in the previous step). After this step, all
1224
   objects that can be reached directly from outside must have strictly positive
1225
   reference count, while all unreachable objects must have a count of exactly 0.
1226
3. Identify all unreachable objects (the ones with 0 reference count) and move
1227
   them to the "unreachable" list. This step also needs to move back to "base" all
1228
   objects that were initially marked as unreachable but are referred transitively
1229
   by the reachable objects (the ones with strictly positive reference count).
1230
1231
Contracts:
1232
1233
    * The "base" has to be a valid list with no mask set.
1234
1235
    * The "unreachable" list must be uninitialized (this function calls
1236
      gc_list_init over 'unreachable').
1237
1238
IMPORTANT: This function leaves 'unreachable' with the NEXT_MASK_UNREACHABLE
1239
flag set but it does not clear it to skip unnecessary iteration. Before the
1240
flag is cleared (for example, by using 'clear_unreachable_mask' function or
1241
by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal
1242
list and we can not use most gc_list_* functions for it. */
1243
static inline void
1244
21.9k
deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
1245
21.9k
    validate_list(base, collecting_clear_unreachable_clear);
1246
    /* Using ob_refcnt and gc_refs, calculate which objects in the
1247
     * container set are reachable from outside the set (i.e., have a
1248
     * refcount greater than 0 when all the references within the
1249
     * set are taken into account).
1250
     */
1251
21.9k
    update_refs(base);  // gc_prev is used for gc_refs
1252
21.9k
    subtract_refs(base);
1253
1254
    /* Leave everything reachable from outside base in base, and move
1255
     * everything else (in base) to unreachable.
1256
     *
1257
     * NOTE:  This used to move the reachable objects into a reachable
1258
     * set instead.  But most things usually turn out to be reachable,
1259
     * so it's more efficient to move the unreachable things.  It "sounds slick"
1260
     * to move the unreachable objects, until you think about it - the reason it
1261
     * pays isn't actually obvious.
1262
     *
1263
     * Suppose we create objects A, B, C in that order.  They appear in the young
1264
     * generation in the same order.  If B points to A, and C to B, and C is
1265
     * reachable from outside, then the adjusted refcounts will be 0, 0, and 1
1266
     * respectively.
1267
     *
1268
     * When move_unreachable finds A, A is moved to the unreachable list.  The
1269
     * same for B when it's first encountered.  Then C is traversed, B is moved
1270
     * _back_ to the reachable list.  B is eventually traversed, and then A is
1271
     * moved back to the reachable list.
1272
     *
1273
     * So instead of not moving at all, the reachable objects B and A are moved
1274
     * twice each.  Why is this a win?  A straightforward algorithm to move the
1275
     * reachable objects instead would move A, B, and C once each.
1276
     *
1277
     * The key is that this dance leaves the objects in order C, B, A - it's
1278
     * reversed from the original order.  On all _subsequent_ scans, none of
1279
     * them will move.  Since most objects aren't in cycles, this can save an
1280
     * unbounded number of moves across an unbounded number of later collections.
1281
     * It can cost more only the first time the chain is scanned.
1282
     *
1283
     * Drawback:  move_unreachable is also used to find out what's still trash
1284
     * after finalizers may resurrect objects.  In _that_ case most unreachable
1285
     * objects will remain unreachable, so it would be more efficient to move
1286
     * the reachable objects instead.  But this is a one-time cost, probably not
1287
     * worth complicating the code to speed just a little.
1288
     */
1289
21.9k
    move_unreachable(base, unreachable);  // gc_prev is pointer again
1290
21.9k
    validate_list(base, collecting_clear_unreachable_clear);
1291
21.9k
    validate_list(unreachable, collecting_set_unreachable_set);
1292
21.9k
}
1293
1294
/* Handle objects that may have resurrected after a call to 'finalize_garbage', moving
1295
   them to 'old_generation' and placing the rest on 'still_unreachable'.
1296
1297
   Contracts:
1298
       * After this function 'unreachable' must not be used anymore and 'still_unreachable'
1299
         will contain the objects that did not resurrect.
1300
1301
       * The "still_unreachable" list must be uninitialized (this function calls
1302
         gc_list_init over 'still_unreachable').
1303
1304
IMPORTANT: After a call to this function, the 'still_unreachable' set will have the
1305
PREV_MARK_COLLECTING set, but the objects in this set are going to be removed so
1306
we can skip the expense of clearing the flag to avoid extra iteration. */
1307
static inline void
1308
handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable,
1309
                           PyGC_Head *old_generation)
1310
10.9k
{
1311
    // Remove the PREV_MASK_COLLECTING from unreachable
1312
    // to prepare it for a new call to 'deduce_unreachable'
1313
10.9k
    gc_list_clear_collecting(unreachable);
1314
1315
    // After the call to deduce_unreachable, the 'still_unreachable' set will
1316
    // have the PREV_MARK_COLLECTING set, but the objects are going to be
1317
    // removed so we can skip the expense of clearing the flag.
1318
10.9k
    PyGC_Head* resurrected = unreachable;
1319
10.9k
    deduce_unreachable(resurrected, still_unreachable);
1320
10.9k
    clear_unreachable_mask(still_unreachable);
1321
1322
    // Move the resurrected objects to the old generation for future collection.
1323
10.9k
    gc_list_merge(resurrected, old_generation);
1324
10.9k
}
1325
1326
static void
1327
gc_collect_region(PyThreadState *tstate,
1328
                  PyGC_Head *from,
1329
                  PyGC_Head *to,
1330
                  struct gc_collection_stats *stats);
1331
1332
static inline Py_ssize_t
1333
gc_list_set_space(PyGC_Head *list, int space)
1334
11.7k
{
1335
11.7k
    Py_ssize_t size = 0;
1336
11.7k
    PyGC_Head *gc;
1337
20.6M
    for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
1338
20.6M
        gc_set_old_space(gc, space);
1339
20.6M
        size++;
1340
20.6M
    }
1341
11.7k
    return size;
1342
11.7k
}
1343
1344
/* Making progress in the incremental collector
1345
 * In order to eventually collect all cycles
1346
 * the incremental collector must progress through the old
1347
 * space faster than objects are added to the old space.
1348
 *
1349
 * Each young or incremental collection adds a number of
1350
 * objects, S (for survivors) to the old space, and
1351
 * incremental collectors scan I objects from the old space.
1352
 * I > S must be true. We also want I > S * N to be where
1353
 * N > 1. Higher values of N mean that the old space is
1354
 * scanned more rapidly.
1355
 * The default incremental threshold of 10 translates to
1356
 * N == 1.4 (1 + 4/threshold)
1357
 */
1358
1359
/* Divide by 10, so that the default incremental threshold of 10
1360
 * scans objects at 1% of the heap size */
1361
30.5k
#define SCAN_RATE_DIVISOR 10
1362
1363
static void
1364
add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats)
1365
10.9k
{
1366
10.9k
    gcstate->generation_stats[gen].collected += stats->collected;
1367
10.9k
    gcstate->generation_stats[gen].uncollectable += stats->uncollectable;
1368
10.9k
    gcstate->generation_stats[gen].collections += 1;
1369
10.9k
}
1370
1371
static void
1372
gc_collect_young(PyThreadState *tstate,
1373
                 struct gc_collection_stats *stats)
1374
0
{
1375
0
    GCState *gcstate = &tstate->interp->gc;
1376
0
    validate_spaces(gcstate);
1377
0
    PyGC_Head *young = &gcstate->young.head;
1378
0
    PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
1379
0
    untrack_tuples(young);
1380
0
    GC_STAT_ADD(0, collections, 1);
1381
1382
0
    PyGC_Head survivors;
1383
0
    gc_list_init(&survivors);
1384
0
    gc_list_set_space(young, gcstate->visited_space);
1385
0
    gc_collect_region(tstate, young, &survivors, stats);
1386
0
    gc_list_merge(&survivors, visited);
1387
0
    validate_spaces(gcstate);
1388
0
    gcstate->young.count = 0;
1389
0
    gcstate->old[gcstate->visited_space].count++;
1390
0
    add_stats(gcstate, 0, stats);
1391
0
    validate_spaces(gcstate);
1392
0
}
1393
1394
#ifndef NDEBUG
1395
static inline int
1396
IS_IN_VISITED(PyGC_Head *gc, int visited_space)
1397
{
1398
    assert(visited_space == 0 || other_space(visited_space) == 0);
1399
    return gc_old_space(gc) == visited_space;
1400
}
1401
#endif
1402
1403
struct container_and_flag {
1404
    PyGC_Head *container;
1405
    int visited_space;
1406
    intptr_t size;
1407
};
1408
1409
/* A traversal callback for adding to container) */
1410
static int
1411
visit_add_to_container(PyObject *op, void *arg)
1412
148M
{
1413
148M
    OBJECT_STAT_INC(object_visits);
1414
148M
    struct container_and_flag *cf = (struct container_and_flag *)arg;
1415
148M
    int visited = cf->visited_space;
1416
148M
    assert(visited == get_gc_state()->visited_space);
1417
148M
    if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
1418
66.2M
        PyGC_Head *gc = AS_GC(op);
1419
66.2M
        if (_PyObject_GC_IS_TRACKED(op) &&
1420
62.5M
            gc_old_space(gc) != visited) {
1421
36.0M
            gc_flip_old_space(gc);
1422
36.0M
            gc_list_move(gc, cf->container);
1423
36.0M
            cf->size++;
1424
36.0M
        }
1425
66.2M
    }
1426
148M
    return 0;
1427
148M
}
1428
1429
static intptr_t
1430
expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate)
1431
261k
{
1432
261k
    struct container_and_flag arg = {
1433
261k
        .container = container,
1434
261k
        .visited_space = gcstate->visited_space,
1435
261k
        .size = 0
1436
261k
    };
1437
261k
    assert(GC_NEXT(gc) == container);
1438
1.02M
    while (gc != container) {
1439
        /* Survivors will be moved to visited space, so they should
1440
         * have been marked as visited */
1441
762k
        assert(IS_IN_VISITED(gc, gcstate->visited_space));
1442
762k
        PyObject *op = FROM_GC(gc);
1443
762k
        assert(_PyObject_GC_IS_TRACKED(op));
1444
762k
        if (_Py_IsImmortal(op)) {
1445
0
            PyGC_Head *next = GC_NEXT(gc);
1446
0
            gc_list_move(gc, &gcstate->permanent_generation.head);
1447
0
            gc = next;
1448
0
            continue;
1449
0
        }
1450
762k
        traverseproc traverse = Py_TYPE(op)->tp_traverse;
1451
762k
        (void) traverse(op,
1452
762k
                        visit_add_to_container,
1453
762k
                        &arg);
1454
762k
        gc = GC_NEXT(gc);
1455
762k
    }
1456
261k
    return arg.size;
1457
261k
}
1458
1459
/* Do bookkeeping for a completed GC cycle */
1460
static void
1461
completed_scavenge(GCState *gcstate)
1462
849
{
1463
    /* We must observe two invariants:
1464
    * 1. Members of the permanent generation must be marked visited.
1465
    * 2. We cannot touch members of the permanent generation. */
1466
849
    int visited;
1467
849
    if (gc_list_is_empty(&gcstate->permanent_generation.head)) {
1468
        /* Permanent generation is empty so we can flip spaces bit */
1469
849
        int not_visited = gcstate->visited_space;
1470
849
        visited = other_space(not_visited);
1471
849
        gcstate->visited_space = visited;
1472
        /* Make sure all objects have visited bit set correctly */
1473
849
        gc_list_set_space(&gcstate->young.head, not_visited);
1474
849
    }
1475
0
    else {
1476
         /* We must move the objects from visited to pending space. */
1477
0
        visited = gcstate->visited_space;
1478
0
        int not_visited = other_space(visited);
1479
0
        assert(gc_list_is_empty(&gcstate->old[not_visited].head));
1480
0
        gc_list_merge(&gcstate->old[visited].head, &gcstate->old[not_visited].head);
1481
0
        gc_list_set_space(&gcstate->old[not_visited].head, not_visited);
1482
0
    }
1483
849
    assert(gc_list_is_empty(&gcstate->old[visited].head));
1484
849
    gcstate->work_to_do = 0;
1485
849
    gcstate->phase = GC_PHASE_MARK;
1486
849
}
1487
1488
static intptr_t
1489
move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space)
1490
1.77M
{
1491
1.77M
    if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
1492
915k
        PyGC_Head *gc = AS_GC(op);
1493
915k
        if (_PyObject_GC_IS_TRACKED(op) &&
1494
915k
            gc_old_space(gc) != visited_space) {
1495
224k
            gc_flip_old_space(gc);
1496
224k
            gc_list_move(gc, reachable);
1497
224k
            return 1;
1498
224k
        }
1499
915k
    }
1500
1.55M
    return 0;
1501
1.77M
}
1502
1503
static intptr_t
1504
mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space)
1505
12.6k
{
1506
    // Transitively traverse all objects from reachable, until empty
1507
12.6k
    struct container_and_flag arg = {
1508
12.6k
        .container = reachable,
1509
12.6k
        .visited_space = visited_space,
1510
12.6k
        .size = 0
1511
12.6k
    };
1512
36.8M
    while (!gc_list_is_empty(reachable)) {
1513
36.7M
        PyGC_Head *gc = _PyGCHead_NEXT(reachable);
1514
36.7M
        assert(gc_old_space(gc) == visited_space);
1515
36.7M
        gc_list_move(gc, visited);
1516
36.7M
        PyObject *op = FROM_GC(gc);
1517
36.7M
        traverseproc traverse = Py_TYPE(op)->tp_traverse;
1518
36.7M
        (void) traverse(op,
1519
36.7M
                        visit_add_to_container,
1520
36.7M
                        &arg);
1521
36.7M
    }
1522
12.6k
    gc_list_validate_space(visited, visited_space);
1523
12.6k
    return arg.size;
1524
12.6k
}
1525
1526
static intptr_t
1527
mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start)
1528
11.8k
{
1529
11.8k
    PyGC_Head reachable;
1530
11.8k
    gc_list_init(&reachable);
1531
11.8k
    Py_ssize_t objects_marked = 0;
1532
    // Move all objects on stacks to reachable
1533
11.8k
    _PyRuntimeState *runtime = &_PyRuntime;
1534
11.8k
    HEAD_LOCK(runtime);
1535
11.8k
    PyThreadState* ts = PyInterpreterState_ThreadHead(interp);
1536
11.8k
    HEAD_UNLOCK(runtime);
1537
23.6k
    while (ts) {
1538
11.8k
        _PyInterpreterFrame *frame = ts->current_frame;
1539
865k
        while (frame) {
1540
863k
            if (frame->owner >= FRAME_OWNED_BY_INTERPRETER) {
1541
157k
                frame = frame->previous;
1542
157k
                continue;
1543
157k
            }
1544
706k
            _PyStackRef *locals = frame->localsplus;
1545
706k
            _PyStackRef *sp = frame->stackpointer;
1546
706k
            objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space);
1547
706k
            PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj);
1548
706k
            objects_marked += move_to_reachable(func, &reachable, visited_space);
1549
4.48M
            while (sp > locals) {
1550
3.77M
                sp--;
1551
3.77M
                if (PyStackRef_IsNullOrInt(*sp)) {
1552
1.01M
                    continue;
1553
1.01M
                }
1554
2.75M
                PyObject *op = PyStackRef_AsPyObjectBorrow(*sp);
1555
2.75M
                if (_Py_IsImmortal(op)) {
1556
288k
                    continue;
1557
288k
                }
1558
2.47M
                if (_PyObject_IS_GC(op)) {
1559
2.01M
                    PyGC_Head *gc = AS_GC(op);
1560
2.01M
                    if (_PyObject_GC_IS_TRACKED(op) &&
1561
2.01M
                        gc_old_space(gc) != visited_space) {
1562
1.02M
                        gc_flip_old_space(gc);
1563
1.02M
                        objects_marked++;
1564
1.02M
                        gc_list_move(gc, &reachable);
1565
1.02M
                    }
1566
2.01M
                }
1567
2.47M
            }
1568
706k
            if (!start && frame->visited) {
1569
                // If this frame has already been visited, then the lower frames
1570
                // will have already been visited and will not have changed
1571
10.4k
                break;
1572
10.4k
            }
1573
696k
            frame->visited = 1;
1574
696k
            frame = frame->previous;
1575
696k
        }
1576
11.8k
        HEAD_LOCK(runtime);
1577
11.8k
        ts = PyThreadState_Next(ts);
1578
11.8k
        HEAD_UNLOCK(runtime);
1579
11.8k
    }
1580
11.8k
    objects_marked += mark_all_reachable(&reachable, visited, visited_space);
1581
11.8k
    assert(gc_list_is_empty(&reachable));
1582
11.8k
    return objects_marked;
1583
11.8k
}
1584
1585
static intptr_t
1586
mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space)
1587
865
{
1588
865
    PyGC_Head reachable;
1589
865
    gc_list_init(&reachable);
1590
865
    Py_ssize_t objects_marked = 0;
1591
865
    objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space);
1592
865
    objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space);
1593
865
    objects_marked += move_to_reachable(interp->dict, &reachable, visited_space);
1594
865
    struct types_state *types = &interp->types;
1595
173k
    for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) {
1596
173k
        objects_marked += move_to_reachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space);
1597
173k
        objects_marked += move_to_reachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space);
1598
173k
    }
1599
9.51k
    for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) {
1600
8.65k
        objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space);
1601
8.65k
        objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space);
1602
8.65k
    }
1603
865
    objects_marked += mark_all_reachable(&reachable, visited, visited_space);
1604
865
    assert(gc_list_is_empty(&reachable));
1605
865
    return objects_marked;
1606
865
}
1607
1608
static intptr_t
1609
mark_at_start(PyThreadState *tstate)
1610
865
{
1611
    // TO DO -- Make this incremental
1612
865
    GCState *gcstate = &tstate->interp->gc;
1613
865
    PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
1614
865
    Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space);
1615
865
    objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true);
1616
865
    gcstate->work_to_do -= objects_marked;
1617
865
    gcstate->phase = GC_PHASE_COLLECT;
1618
865
    validate_spaces(gcstate);
1619
865
    return objects_marked;
1620
865
}
1621
1622
static intptr_t
1623
assess_work_to_do(GCState *gcstate)
1624
30.5k
{
1625
    /* The amount of work we want to do depends on three things.
1626
     * 1. The number of new objects created
1627
     * 2. The growth in heap size since the last collection
1628
     * 3. The heap size (up to the number of new objects, to avoid quadratic effects)
1629
     *
1630
     * For a steady state heap, the amount of work to do is three times the number
1631
     * of new objects added to the heap. This ensures that we stay ahead in the
1632
     * worst case of all new objects being garbage.
1633
     *
1634
     * This could be improved by tracking survival rates, but it is still a
1635
     * large improvement on the non-marking approach.
1636
     */
1637
30.5k
    intptr_t scale_factor = gcstate->old[0].threshold;
1638
30.5k
    if (scale_factor < 2) {
1639
0
        scale_factor = 2;
1640
0
    }
1641
30.5k
    intptr_t new_objects = gcstate->young.count;
1642
30.5k
    intptr_t max_heap_fraction = new_objects*2;
1643
30.5k
    intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
1644
30.5k
    if (heap_fraction > max_heap_fraction) {
1645
503
        heap_fraction = max_heap_fraction;
1646
503
    }
1647
30.5k
    gcstate->young.count = 0;
1648
30.5k
    return new_objects + heap_fraction;
1649
30.5k
}
1650
1651
static void
1652
gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
1653
30.5k
{
1654
30.5k
    GC_STAT_ADD(1, collections, 1);
1655
30.5k
    GCState *gcstate = &tstate->interp->gc;
1656
30.5k
    gcstate->work_to_do += assess_work_to_do(gcstate);
1657
30.5k
    if (gcstate->work_to_do < 0) {
1658
18.7k
        return;
1659
18.7k
    }
1660
11.8k
    untrack_tuples(&gcstate->young.head);
1661
11.8k
    if (gcstate->phase == GC_PHASE_MARK) {
1662
865
        Py_ssize_t objects_marked = mark_at_start(tstate);
1663
865
        GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
1664
865
        gcstate->work_to_do -= objects_marked;
1665
865
        validate_spaces(gcstate);
1666
865
        return;
1667
865
    }
1668
10.9k
    PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head;
1669
10.9k
    PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
1670
10.9k
    PyGC_Head increment;
1671
10.9k
    gc_list_init(&increment);
1672
10.9k
    int scale_factor = gcstate->old[0].threshold;
1673
10.9k
    if (scale_factor < 2) {
1674
0
        scale_factor = 2;
1675
0
    }
1676
10.9k
    intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false);
1677
10.9k
    GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
1678
10.9k
    gcstate->work_to_do -= objects_marked;
1679
10.9k
    gc_list_set_space(&gcstate->young.head, gcstate->visited_space);
1680
10.9k
    gc_list_merge(&gcstate->young.head, &increment);
1681
10.9k
    gc_list_validate_space(&increment, gcstate->visited_space);
1682
10.9k
    Py_ssize_t increment_size = gc_list_size(&increment);
1683
272k
    while (increment_size < gcstate->work_to_do) {
1684
262k
        if (gc_list_is_empty(not_visited)) {
1685
829
            break;
1686
829
        }
1687
261k
        PyGC_Head *gc = _PyGCHead_NEXT(not_visited);
1688
261k
        gc_list_move(gc, &increment);
1689
261k
        increment_size++;
1690
261k
        assert(!_Py_IsImmortal(FROM_GC(gc)));
1691
261k
        gc_set_old_space(gc, gcstate->visited_space);
1692
261k
        increment_size += expand_region_transitively_reachable(&increment, gc, gcstate);
1693
261k
    }
1694
10.9k
    GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size);
1695
10.9k
    validate_list(&increment, collecting_clear_unreachable_clear);
1696
10.9k
    gc_list_validate_space(&increment, gcstate->visited_space);
1697
10.9k
    PyGC_Head survivors;
1698
10.9k
    gc_list_init(&survivors);
1699
10.9k
    gc_collect_region(tstate, &increment, &survivors, stats);
1700
10.9k
    gc_list_merge(&survivors, visited);
1701
10.9k
    assert(gc_list_is_empty(&increment));
1702
10.9k
    gcstate->work_to_do -= increment_size;
1703
1704
10.9k
    add_stats(gcstate, 1, stats);
1705
10.9k
    if (gc_list_is_empty(not_visited)) {
1706
849
        completed_scavenge(gcstate);
1707
849
    }
1708
10.9k
    validate_spaces(gcstate);
1709
10.9k
}
1710
1711
static void
1712
gc_collect_full(PyThreadState *tstate,
1713
                struct gc_collection_stats *stats)
1714
0
{
1715
0
    GC_STAT_ADD(2, collections, 1);
1716
0
    GCState *gcstate = &tstate->interp->gc;
1717
0
    validate_spaces(gcstate);
1718
0
    PyGC_Head *young = &gcstate->young.head;
1719
0
    PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head;
1720
0
    PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
1721
0
    untrack_tuples(young);
1722
    /* merge all generations into visited */
1723
0
    gc_list_merge(young, pending);
1724
0
    gc_list_validate_space(pending, 1-gcstate->visited_space);
1725
0
    gc_list_set_space(pending, gcstate->visited_space);
1726
0
    gcstate->young.count = 0;
1727
0
    gc_list_merge(pending, visited);
1728
0
    validate_spaces(gcstate);
1729
1730
0
    gc_collect_region(tstate, visited, visited,
1731
0
                      stats);
1732
0
    validate_spaces(gcstate);
1733
0
    gcstate->young.count = 0;
1734
0
    gcstate->old[0].count = 0;
1735
0
    gcstate->old[1].count = 0;
1736
0
    completed_scavenge(gcstate);
1737
0
    _PyGC_ClearAllFreeLists(tstate->interp);
1738
0
    validate_spaces(gcstate);
1739
0
    add_stats(gcstate, 2, stats);
1740
0
}
1741
1742
/* This is the main function. Read this to understand how the
1743
 * collection process works. */
1744
static void
1745
gc_collect_region(PyThreadState *tstate,
1746
                  PyGC_Head *from,
1747
                  PyGC_Head *to,
1748
                  struct gc_collection_stats *stats)
1749
10.9k
{
1750
10.9k
    PyGC_Head unreachable; /* non-problematic unreachable trash */
1751
10.9k
    PyGC_Head finalizers;  /* objects with, & reachable from, __del__ */
1752
10.9k
    PyGC_Head *gc; /* initialize to prevent a compiler warning */
1753
10.9k
    GCState *gcstate = &tstate->interp->gc;
1754
1755
10.9k
    assert(gcstate->garbage != NULL);
1756
10.9k
    assert(!_PyErr_Occurred(tstate));
1757
1758
10.9k
    gc_list_init(&unreachable);
1759
10.9k
    deduce_unreachable(from, &unreachable);
1760
10.9k
    validate_consistent_old_space(from);
1761
10.9k
    untrack_tuples(from);
1762
1763
  /* Move reachable objects to next generation. */
1764
10.9k
    validate_consistent_old_space(to);
1765
10.9k
    if (from != to) {
1766
10.9k
        gc_list_merge(from, to);
1767
10.9k
    }
1768
10.9k
    validate_consistent_old_space(to);
1769
1770
    /* All objects in unreachable are trash, but objects reachable from
1771
     * legacy finalizers (e.g. tp_del) can't safely be deleted.
1772
     */
1773
10.9k
    gc_list_init(&finalizers);
1774
    // NEXT_MASK_UNREACHABLE is cleared here.
1775
    // After move_legacy_finalizers(), unreachable is normal list.
1776
10.9k
    move_legacy_finalizers(&unreachable, &finalizers);
1777
    /* finalizers contains the unreachable objects with a legacy finalizer;
1778
     * unreachable objects reachable *from* those are also uncollectable,
1779
     * and we move those into the finalizers list too.
1780
     */
1781
10.9k
    move_legacy_finalizer_reachable(&finalizers);
1782
10.9k
    validate_list(&finalizers, collecting_clear_unreachable_clear);
1783
10.9k
    validate_list(&unreachable, collecting_set_unreachable_clear);
1784
    /* Print debugging information. */
1785
10.9k
    if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE) {
1786
0
        for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) {
1787
0
            debug_cycle("collectable", FROM_GC(gc));
1788
0
        }
1789
0
    }
1790
1791
    /* Invoke weakref callbacks as necessary. */
1792
10.9k
    stats->collected += handle_weakref_callbacks(&unreachable, to);
1793
10.9k
    gc_list_validate_space(to, gcstate->visited_space);
1794
10.9k
    validate_list(to, collecting_clear_unreachable_clear);
1795
10.9k
    validate_list(&unreachable, collecting_set_unreachable_clear);
1796
1797
    /* Call tp_finalize on objects which have one. */
1798
10.9k
    finalize_garbage(tstate, &unreachable);
1799
    /* Handle any objects that may have resurrected after the call
1800
     * to 'finalize_garbage' and continue the collection with the
1801
     * objects that are still unreachable */
1802
10.9k
    PyGC_Head final_unreachable;
1803
10.9k
    gc_list_init(&final_unreachable);
1804
10.9k
    handle_resurrected_objects(&unreachable, &final_unreachable, to);
1805
1806
    /* Clear weakrefs to objects in the unreachable set.  See the comments
1807
     * above handle_weakref_callbacks() for details.
1808
     */
1809
10.9k
    clear_weakrefs(&final_unreachable);
1810
1811
    /* Call tp_clear on objects in the final_unreachable set.  This will cause
1812
    * the reference cycles to be broken.  It may also cause some objects
1813
    * in finalizers to be freed.
1814
    */
1815
10.9k
    stats->collected += gc_list_size(&final_unreachable);
1816
10.9k
    delete_garbage(tstate, gcstate, &final_unreachable, to);
1817
1818
    /* Collect statistics on uncollectable objects found and print
1819
     * debugging information. */
1820
10.9k
    Py_ssize_t n = 0;
1821
10.9k
    for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) {
1822
0
        n++;
1823
0
        if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE)
1824
0
            debug_cycle("uncollectable", FROM_GC(gc));
1825
0
    }
1826
10.9k
    stats->uncollectable = n;
1827
    /* Append instances in the uncollectable set to a Python
1828
     * reachable list of garbage.  The programmer has to deal with
1829
     * this if they insist on creating this type of structure.
1830
     */
1831
10.9k
    handle_legacy_finalizers(tstate, gcstate, &finalizers, to);
1832
10.9k
    gc_list_validate_space(to, gcstate->visited_space);
1833
10.9k
    validate_list(to, collecting_clear_unreachable_clear);
1834
10.9k
}
1835
1836
/* Invoke progress callbacks to notify clients that garbage collection
1837
 * is starting or stopping
1838
 */
1839
static void
1840
do_gc_callback(GCState *gcstate, const char *phase,
1841
                   int generation, struct gc_collection_stats *stats)
1842
61.0k
{
1843
61.0k
    assert(!PyErr_Occurred());
1844
1845
    /* The local variable cannot be rebound, check it for sanity */
1846
61.0k
    assert(PyList_CheckExact(gcstate->callbacks));
1847
61.0k
    PyObject *info = NULL;
1848
61.0k
    if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
1849
0
        info = Py_BuildValue("{sisnsn}",
1850
0
            "generation", generation,
1851
0
            "collected", stats->collected,
1852
0
            "uncollectable", stats->uncollectable);
1853
0
        if (info == NULL) {
1854
0
            PyErr_FormatUnraisable("Exception ignored while invoking gc callbacks");
1855
0
            return;
1856
0
        }
1857
0
    }
1858
1859
61.0k
    PyObject *phase_obj = PyUnicode_FromString(phase);
1860
61.0k
    if (phase_obj == NULL) {
1861
0
        Py_XDECREF(info);
1862
0
        PyErr_FormatUnraisable("Exception ignored while invoking gc callbacks");
1863
0
        return;
1864
0
    }
1865
1866
61.0k
    PyObject *stack[] = {phase_obj, info};
1867
61.0k
    for (Py_ssize_t i=0; i<PyList_GET_SIZE(gcstate->callbacks); i++) {
1868
0
        PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i);
1869
0
        Py_INCREF(cb); /* make sure cb doesn't go away */
1870
0
        r = PyObject_Vectorcall(cb, stack, 2, NULL);
1871
0
        if (r == NULL) {
1872
0
            PyErr_FormatUnraisable("Exception ignored while "
1873
0
                                   "calling GC callback %R", cb);
1874
0
        }
1875
0
        else {
1876
0
            Py_DECREF(r);
1877
0
        }
1878
0
        Py_DECREF(cb);
1879
0
    }
1880
61.0k
    Py_DECREF(phase_obj);
1881
61.0k
    Py_XDECREF(info);
1882
61.0k
    assert(!PyErr_Occurred());
1883
61.0k
}
1884
1885
static void
1886
invoke_gc_callback(GCState *gcstate, const char *phase,
1887
                   int generation, struct gc_collection_stats *stats)
1888
61.0k
{
1889
61.0k
    if (gcstate->callbacks == NULL) {
1890
0
        return;
1891
0
    }
1892
61.0k
    do_gc_callback(gcstate, phase, generation, stats);
1893
61.0k
}
1894
1895
static int
1896
referrersvisit(PyObject* obj, void *arg)
1897
0
{
1898
0
    PyObject *objs = arg;
1899
0
    Py_ssize_t i;
1900
0
    for (i = 0; i < PyTuple_GET_SIZE(objs); i++) {
1901
0
        if (PyTuple_GET_ITEM(objs, i) == obj) {
1902
0
            return 1;
1903
0
        }
1904
0
    }
1905
0
    return 0;
1906
0
}
1907
1908
static int
1909
gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist)
1910
0
{
1911
0
    PyGC_Head *gc;
1912
0
    PyObject *obj;
1913
0
    traverseproc traverse;
1914
0
    for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
1915
0
        obj = FROM_GC(gc);
1916
0
        traverse = Py_TYPE(obj)->tp_traverse;
1917
0
        if (obj == objs || obj == resultlist) {
1918
0
            continue;
1919
0
        }
1920
0
        if (traverse(obj, referrersvisit, objs)) {
1921
0
            if (PyList_Append(resultlist, obj) < 0) {
1922
0
                return 0; /* error */
1923
0
            }
1924
0
        }
1925
0
    }
1926
0
    return 1; /* no error */
1927
0
}
1928
1929
PyObject *
1930
_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs)
1931
0
{
1932
0
    PyObject *result = PyList_New(0);
1933
0
    if (!result) {
1934
0
        return NULL;
1935
0
    }
1936
1937
0
    GCState *gcstate = &interp->gc;
1938
0
    for (int i = 0; i < NUM_GENERATIONS; i++) {
1939
0
        if (!(gc_referrers_for(objs, GEN_HEAD(gcstate, i), result))) {
1940
0
            Py_DECREF(result);
1941
0
            return NULL;
1942
0
        }
1943
0
    }
1944
0
    return result;
1945
0
}
1946
1947
PyObject *
1948
_PyGC_GetObjects(PyInterpreterState *interp, int generation)
1949
0
{
1950
0
    assert(generation >= -1 && generation < NUM_GENERATIONS);
1951
0
    GCState *gcstate = &interp->gc;
1952
1953
0
    PyObject *result = PyList_New(0);
1954
    /* Generation:
1955
     * -1: Return all objects
1956
     * 0: All young objects
1957
     * 1: No objects
1958
     * 2: All old objects
1959
     */
1960
0
    if (result == NULL || generation == 1) {
1961
0
        return result;
1962
0
    }
1963
0
    if (generation <= 0) {
1964
0
        if (append_objects(result, &gcstate->young.head)) {
1965
0
            goto error;
1966
0
        }
1967
0
    }
1968
0
    if (generation != 0) {
1969
0
        if (append_objects(result, &gcstate->old[0].head)) {
1970
0
            goto error;
1971
0
        }
1972
0
        if (append_objects(result, &gcstate->old[1].head)) {
1973
0
            goto error;
1974
0
        }
1975
0
    }
1976
1977
0
    return result;
1978
0
error:
1979
0
    Py_DECREF(result);
1980
0
    return NULL;
1981
0
}
1982
1983
void
1984
_PyGC_Freeze(PyInterpreterState *interp)
1985
0
{
1986
0
    GCState *gcstate = &interp->gc;
1987
    /* The permanent_generation must be visited */
1988
0
    gc_list_set_space(&gcstate->young.head, gcstate->visited_space);
1989
0
    gc_list_merge(&gcstate->young.head, &gcstate->permanent_generation.head);
1990
0
    gcstate->young.count = 0;
1991
0
    PyGC_Head*old0 = &gcstate->old[0].head;
1992
0
    PyGC_Head*old1 = &gcstate->old[1].head;
1993
0
    if (gcstate->visited_space) {
1994
0
        gc_list_set_space(old0, 1);
1995
0
    }
1996
0
    else {
1997
0
        gc_list_set_space(old1, 0);
1998
0
    }
1999
0
    gc_list_merge(old0, &gcstate->permanent_generation.head);
2000
0
    gcstate->old[0].count = 0;
2001
0
    gc_list_merge(old1, &gcstate->permanent_generation.head);
2002
0
    gcstate->old[1].count = 0;
2003
0
    validate_spaces(gcstate);
2004
0
}
2005
2006
void
2007
_PyGC_Unfreeze(PyInterpreterState *interp)
2008
0
{
2009
0
    GCState *gcstate = &interp->gc;
2010
0
    gc_list_merge(&gcstate->permanent_generation.head,
2011
0
                  &gcstate->old[gcstate->visited_space].head);
2012
0
    validate_spaces(gcstate);
2013
0
}
2014
2015
Py_ssize_t
2016
_PyGC_GetFreezeCount(PyInterpreterState *interp)
2017
0
{
2018
0
    GCState *gcstate = &interp->gc;
2019
0
    return gc_list_size(&gcstate->permanent_generation.head);
2020
0
}
2021
2022
/* C API for controlling the state of the garbage collector */
2023
int
2024
PyGC_Enable(void)
2025
0
{
2026
0
    GCState *gcstate = get_gc_state();
2027
0
    int old_state = gcstate->enabled;
2028
0
    gcstate->enabled = 1;
2029
0
    return old_state;
2030
0
}
2031
2032
int
2033
PyGC_Disable(void)
2034
0
{
2035
0
    GCState *gcstate = get_gc_state();
2036
0
    int old_state = gcstate->enabled;
2037
0
    gcstate->enabled = 0;
2038
0
    return old_state;
2039
0
}
2040
2041
int
2042
PyGC_IsEnabled(void)
2043
0
{
2044
0
    GCState *gcstate = get_gc_state();
2045
0
    return gcstate->enabled;
2046
0
}
2047
2048
// Show stats for objects in each generations
2049
static void
2050
show_stats_each_generations(GCState *gcstate)
2051
0
{
2052
0
    char buf[100];
2053
0
    size_t pos = 0;
2054
2055
0
    for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) {
2056
0
        pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos,
2057
0
                             " %zd",
2058
0
                             gc_list_size(GEN_HEAD(gcstate, i)));
2059
0
    }
2060
0
    PySys_FormatStderr(
2061
0
        "gc: objects in each generation:%s\n"
2062
0
        "gc: objects in permanent generation: %zd\n",
2063
0
        buf, gc_list_size(&gcstate->permanent_generation.head));
2064
0
}
2065
2066
Py_ssize_t
2067
_PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
2068
30.5k
{
2069
30.5k
    GCState *gcstate = &tstate->interp->gc;
2070
30.5k
    assert(tstate->current_frame == NULL || tstate->current_frame->stackpointer != NULL);
2071
2072
30.5k
    int expected = 0;
2073
30.5k
    if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) {
2074
        // Don't start a garbage collection if one is already in progress.
2075
0
        return 0;
2076
0
    }
2077
2078
30.5k
    struct gc_collection_stats stats = { 0 };
2079
30.5k
    if (reason != _Py_GC_REASON_SHUTDOWN) {
2080
30.5k
        invoke_gc_callback(gcstate, "start", generation, &stats);
2081
30.5k
    }
2082
30.5k
    PyTime_t t1;
2083
30.5k
    if (gcstate->debug & _PyGC_DEBUG_STATS) {
2084
0
        PySys_WriteStderr("gc: collecting generation %d...\n", generation);
2085
0
        (void)PyTime_PerfCounterRaw(&t1);
2086
0
        show_stats_each_generations(gcstate);
2087
0
    }
2088
30.5k
    if (PyDTrace_GC_START_ENABLED()) {
2089
0
        PyDTrace_GC_START(generation);
2090
0
    }
2091
30.5k
    PyObject *exc = _PyErr_GetRaisedException(tstate);
2092
30.5k
    switch(generation) {
2093
0
        case 0:
2094
0
            gc_collect_young(tstate, &stats);
2095
0
            break;
2096
30.5k
        case 1:
2097
30.5k
            gc_collect_increment(tstate, &stats);
2098
30.5k
            break;
2099
0
        case 2:
2100
0
            gc_collect_full(tstate, &stats);
2101
0
            break;
2102
0
        default:
2103
0
            Py_UNREACHABLE();
2104
30.5k
    }
2105
30.5k
    if (PyDTrace_GC_DONE_ENABLED()) {
2106
0
        PyDTrace_GC_DONE(stats.uncollectable + stats.collected);
2107
0
    }
2108
30.5k
    if (reason != _Py_GC_REASON_SHUTDOWN) {
2109
30.5k
        invoke_gc_callback(gcstate, "stop", generation, &stats);
2110
30.5k
    }
2111
30.5k
    _PyErr_SetRaisedException(tstate, exc);
2112
30.5k
    GC_STAT_ADD(generation, objects_collected, stats.collected);
2113
#ifdef Py_STATS
2114
    if (_Py_stats) {
2115
        GC_STAT_ADD(generation, object_visits,
2116
            _Py_stats->object_stats.object_visits);
2117
        _Py_stats->object_stats.object_visits = 0;
2118
    }
2119
#endif
2120
30.5k
    validate_spaces(gcstate);
2121
30.5k
    _Py_atomic_store_int(&gcstate->collecting, 0);
2122
2123
30.5k
    if (gcstate->debug & _PyGC_DEBUG_STATS) {
2124
0
        PyTime_t t2;
2125
0
        (void)PyTime_PerfCounterRaw(&t2);
2126
0
        double d = PyTime_AsSecondsDouble(t2 - t1);
2127
0
        PySys_WriteStderr(
2128
0
            "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n",
2129
0
            stats.collected + stats.uncollectable, stats.uncollectable, d
2130
0
        );
2131
0
    }
2132
2133
30.5k
    return stats.uncollectable + stats.collected;
2134
30.5k
}
2135
2136
/* Public API to invoke gc.collect() from C */
2137
Py_ssize_t
2138
PyGC_Collect(void)
2139
0
{
2140
0
    return _PyGC_Collect(_PyThreadState_GET(), 2, _Py_GC_REASON_MANUAL);
2141
0
}
2142
2143
void
2144
_PyGC_CollectNoFail(PyThreadState *tstate)
2145
0
{
2146
    /* Ideally, this function is only called on interpreter shutdown,
2147
       and therefore not recursively.  Unfortunately, when there are daemon
2148
       threads, a daemon thread can start a cyclic garbage collection
2149
       during interpreter shutdown (and then never finish it).
2150
       See http://bugs.python.org/issue8713#msg195178 for an example.
2151
       */
2152
0
    _PyGC_Collect(_PyThreadState_GET(), 2, _Py_GC_REASON_SHUTDOWN);
2153
0
}
2154
2155
void
2156
_PyGC_DumpShutdownStats(PyInterpreterState *interp)
2157
0
{
2158
0
    GCState *gcstate = &interp->gc;
2159
0
    if (!(gcstate->debug & _PyGC_DEBUG_SAVEALL)
2160
0
        && gcstate->garbage != NULL && PyList_GET_SIZE(gcstate->garbage) > 0) {
2161
0
        const char *message;
2162
0
        if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) {
2163
0
            message = "gc: %zd uncollectable objects at shutdown";
2164
0
        }
2165
0
        else {
2166
0
            message = "gc: %zd uncollectable objects at shutdown; " \
2167
0
                "use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them";
2168
0
        }
2169
        /* PyErr_WarnFormat does too many things and we are at shutdown,
2170
           the warnings module's dependencies (e.g. linecache) may be gone
2171
           already. */
2172
0
        if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
2173
0
                                     "gc", NULL, message,
2174
0
                                     PyList_GET_SIZE(gcstate->garbage)))
2175
0
        {
2176
0
            PyErr_FormatUnraisable("Exception ignored in GC shutdown");
2177
0
        }
2178
0
        if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) {
2179
0
            PyObject *repr = NULL, *bytes = NULL;
2180
0
            repr = PyObject_Repr(gcstate->garbage);
2181
0
            if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr))) {
2182
0
                PyErr_FormatUnraisable("Exception ignored in GC shutdown "
2183
0
                                       "while formatting garbage");
2184
0
            }
2185
0
            else {
2186
0
                PySys_WriteStderr(
2187
0
                    "      %s\n",
2188
0
                    PyBytes_AS_STRING(bytes)
2189
0
                    );
2190
0
            }
2191
0
            Py_XDECREF(repr);
2192
0
            Py_XDECREF(bytes);
2193
0
        }
2194
0
    }
2195
0
}
2196
2197
static void
2198
0
finalize_unlink_gc_head(PyGC_Head *gc) {
2199
0
    PyGC_Head *prev = GC_PREV(gc);
2200
0
    PyGC_Head *next = GC_NEXT(gc);
2201
0
    _PyGCHead_SET_NEXT(prev, next);
2202
0
    _PyGCHead_SET_PREV(next, prev);
2203
0
}
2204
2205
void
2206
_PyGC_Fini(PyInterpreterState *interp)
2207
0
{
2208
0
    GCState *gcstate = &interp->gc;
2209
0
    Py_CLEAR(gcstate->garbage);
2210
0
    Py_CLEAR(gcstate->callbacks);
2211
2212
    /* Prevent a subtle bug that affects sub-interpreters that use basic
2213
     * single-phase init extensions (m_size == -1).  Those extensions cause objects
2214
     * to be shared between interpreters, via the PyDict_Update(mdict, m_copy) call
2215
     * in import_find_extension().
2216
     *
2217
     * If they are GC objects, their GC head next or prev links could refer to
2218
     * the interpreter _gc_runtime_state PyGC_Head nodes.  Those nodes go away
2219
     * when the interpreter structure is freed and so pointers to them become
2220
     * invalid.  If those objects are still used by another interpreter and
2221
     * UNTRACK is called on them, a crash will happen.  We untrack the nodes
2222
     * here to avoid that.
2223
     *
2224
     * This bug was originally fixed when reported as gh-90228.  The bug was
2225
     * re-introduced in gh-94673.
2226
     */
2227
0
    finalize_unlink_gc_head(&gcstate->young.head);
2228
0
    finalize_unlink_gc_head(&gcstate->old[0].head);
2229
0
    finalize_unlink_gc_head(&gcstate->old[1].head);
2230
0
    finalize_unlink_gc_head(&gcstate->permanent_generation.head);
2231
0
}
2232
2233
/* for debugging */
2234
void
2235
_PyGC_Dump(PyGC_Head *g)
2236
0
{
2237
0
    _PyObject_Dump(FROM_GC(g));
2238
0
}
2239
2240
2241
#ifdef Py_DEBUG
2242
static int
2243
visit_validate(PyObject *op, void *parent_raw)
2244
{
2245
    PyObject *parent = _PyObject_CAST(parent_raw);
2246
    if (_PyObject_IsFreed(op)) {
2247
        _PyObject_ASSERT_FAILED_MSG(parent,
2248
                                    "PyObject_GC_Track() object is not valid");
2249
    }
2250
    return 0;
2251
}
2252
#endif
2253
2254
2255
/* extension modules might be compiled with GC support so these
2256
   functions must always be available */
2257
2258
void
2259
PyObject_GC_Track(void *op_raw)
2260
123M
{
2261
123M
    PyObject *op = _PyObject_CAST(op_raw);
2262
123M
    if (_PyObject_GC_IS_TRACKED(op)) {
2263
0
        _PyObject_ASSERT_FAILED_MSG(op,
2264
0
                                    "object already tracked "
2265
0
                                    "by the garbage collector");
2266
0
    }
2267
123M
    _PyObject_GC_TRACK(op);
2268
2269
#ifdef Py_DEBUG
2270
    /* Check that the object is valid: validate objects traversed
2271
       by tp_traverse() */
2272
    traverseproc traverse = Py_TYPE(op)->tp_traverse;
2273
    (void)traverse(op, visit_validate, op);
2274
#endif
2275
123M
}
2276
2277
void
2278
PyObject_GC_UnTrack(void *op_raw)
2279
1.45G
{
2280
1.45G
    PyObject *op = _PyObject_CAST(op_raw);
2281
    /* The code for some objects, such as tuples, is a bit
2282
     * sloppy about when the object is tracked and untracked. */
2283
1.45G
    if (_PyObject_GC_IS_TRACKED(op)) {
2284
1.26G
        _PyObject_GC_UNTRACK(op);
2285
1.26G
    }
2286
1.45G
}
2287
2288
int
2289
PyObject_IS_GC(PyObject *obj)
2290
4.81M
{
2291
4.81M
    return _PyObject_IS_GC(obj);
2292
4.81M
}
2293
2294
void
2295
_Py_ScheduleGC(PyThreadState *tstate)
2296
5.78M
{
2297
5.78M
    if (!_Py_eval_breaker_bit_is_set(tstate, _PY_GC_SCHEDULED_BIT))
2298
30.5k
    {
2299
30.5k
        _Py_set_eval_breaker_bit(tstate, _PY_GC_SCHEDULED_BIT);
2300
30.5k
    }
2301
5.78M
}
2302
2303
void
2304
_Py_TriggerGC(struct _gc_runtime_state *gcstate)
2305
5.82M
{
2306
5.82M
    PyThreadState *tstate = _PyThreadState_GET();
2307
5.82M
    if (gcstate->enabled &&
2308
5.82M
        gcstate->young.threshold != 0 &&
2309
5.82M
        !_Py_atomic_load_int_relaxed(&gcstate->collecting) &&
2310
5.82M
        !_PyErr_Occurred(tstate))
2311
5.78M
    {
2312
5.78M
        _Py_ScheduleGC(tstate);
2313
5.78M
    }
2314
5.82M
}
2315
2316
void
2317
_PyObject_GC_Link(PyObject *op)
2318
372M
{
2319
372M
    PyGC_Head *gc = AS_GC(op);
2320
    // gc must be correctly aligned
2321
372M
    _PyObject_ASSERT(op, ((uintptr_t)gc & (sizeof(uintptr_t)-1)) == 0);
2322
372M
    gc->_gc_next = 0;
2323
372M
    gc->_gc_prev = 0;
2324
2325
372M
}
2326
2327
void
2328
_Py_RunGC(PyThreadState *tstate)
2329
30.5k
{
2330
30.5k
    if (tstate->interp->gc.enabled) {
2331
30.5k
        _PyGC_Collect(tstate, 1, _Py_GC_REASON_HEAP);
2332
30.5k
    }
2333
30.5k
}
2334
2335
static PyObject *
2336
gc_alloc(PyTypeObject *tp, size_t basicsize, size_t presize)
2337
284M
{
2338
284M
    PyThreadState *tstate = _PyThreadState_GET();
2339
284M
    if (basicsize > PY_SSIZE_T_MAX - presize) {
2340
0
        return _PyErr_NoMemory(tstate);
2341
0
    }
2342
284M
    size_t size = presize + basicsize;
2343
284M
    char *mem = _PyObject_MallocWithType(tp, size);
2344
284M
    if (mem == NULL) {
2345
0
        return _PyErr_NoMemory(tstate);
2346
0
    }
2347
284M
    ((PyObject **)mem)[0] = NULL;
2348
284M
    ((PyObject **)mem)[1] = NULL;
2349
284M
    PyObject *op = (PyObject *)(mem + presize);
2350
284M
    _PyObject_GC_Link(op);
2351
284M
    return op;
2352
284M
}
2353
2354
2355
PyObject *
2356
_PyObject_GC_New(PyTypeObject *tp)
2357
152M
{
2358
152M
    size_t presize = _PyType_PreHeaderSize(tp);
2359
152M
    size_t size = _PyObject_SIZE(tp);
2360
152M
    if (_PyType_HasFeature(tp, Py_TPFLAGS_INLINE_VALUES)) {
2361
0
        size += _PyInlineValuesSize(tp);
2362
0
    }
2363
152M
    PyObject *op = gc_alloc(tp, size, presize);
2364
152M
    if (op == NULL) {
2365
0
        return NULL;
2366
0
    }
2367
152M
    _PyObject_Init(op, tp);
2368
152M
    if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) {
2369
0
        _PyObject_InitInlineValues(op, tp);
2370
0
    }
2371
152M
    return op;
2372
152M
}
2373
2374
PyVarObject *
2375
_PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems)
2376
132M
{
2377
132M
    PyVarObject *op;
2378
2379
132M
    if (nitems < 0) {
2380
0
        PyErr_BadInternalCall();
2381
0
        return NULL;
2382
0
    }
2383
132M
    size_t presize = _PyType_PreHeaderSize(tp);
2384
132M
    size_t size = _PyObject_VAR_SIZE(tp, nitems);
2385
132M
    op = (PyVarObject *)gc_alloc(tp, size, presize);
2386
132M
    if (op == NULL) {
2387
0
        return NULL;
2388
0
    }
2389
132M
    _PyObject_InitVar(op, tp, nitems);
2390
132M
    return op;
2391
132M
}
2392
2393
PyObject *
2394
PyUnstable_Object_GC_NewWithExtraData(PyTypeObject *tp, size_t extra_size)
2395
0
{
2396
0
    size_t presize = _PyType_PreHeaderSize(tp);
2397
0
    size_t size = _PyObject_SIZE(tp) + extra_size;
2398
0
    PyObject *op = gc_alloc(tp, size, presize);
2399
0
    if (op == NULL) {
2400
0
        return NULL;
2401
0
    }
2402
0
    memset((char *)op + sizeof(PyObject), 0, size - sizeof(PyObject));
2403
0
    _PyObject_Init(op, tp);
2404
0
    return op;
2405
0
}
2406
2407
PyVarObject *
2408
_PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems)
2409
16
{
2410
16
    const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems);
2411
16
    const size_t presize = _PyType_PreHeaderSize(Py_TYPE(op));
2412
16
    _PyObject_ASSERT((PyObject *)op, !_PyObject_GC_IS_TRACKED(op));
2413
16
    if (basicsize > (size_t)PY_SSIZE_T_MAX - presize) {
2414
0
        return (PyVarObject *)PyErr_NoMemory();
2415
0
    }
2416
16
    char *mem = (char *)op - presize;
2417
16
    mem = (char *)_PyObject_ReallocWithType(Py_TYPE(op), mem, presize + basicsize);
2418
16
    if (mem == NULL) {
2419
0
        return (PyVarObject *)PyErr_NoMemory();
2420
0
    }
2421
16
    op = (PyVarObject *) (mem + presize);
2422
16
    Py_SET_SIZE(op, nitems);
2423
16
    return op;
2424
16
}
2425
2426
void
2427
PyObject_GC_Del(void *op)
2428
372M
{
2429
372M
    size_t presize = _PyType_PreHeaderSize(Py_TYPE(op));
2430
372M
    PyGC_Head *g = AS_GC(op);
2431
372M
    if (_PyObject_GC_IS_TRACKED(op)) {
2432
0
        gc_list_remove(g);
2433
0
        GCState *gcstate = get_gc_state();
2434
0
        if (gcstate->young.count > 0) {
2435
0
            gcstate->young.count--;
2436
0
        }
2437
0
        gcstate->heap_size--;
2438
#ifdef Py_DEBUG
2439
        PyObject *exc = PyErr_GetRaisedException();
2440
        if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
2441
                                     "gc", NULL,
2442
                                     "Object of type %s is not untracked "
2443
                                     "before destruction",
2444
                                     Py_TYPE(op)->tp_name))
2445
        {
2446
            PyErr_FormatUnraisable("Exception ignored on object deallocation");
2447
        }
2448
        PyErr_SetRaisedException(exc);
2449
#endif
2450
0
    }
2451
372M
    PyObject_Free(((char *)op)-presize);
2452
372M
}
2453
2454
int
2455
PyObject_GC_IsTracked(PyObject* obj)
2456
0
{
2457
0
    if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj)) {
2458
0
        return 1;
2459
0
    }
2460
0
    return 0;
2461
0
}
2462
2463
int
2464
PyObject_GC_IsFinalized(PyObject *obj)
2465
0
{
2466
0
    if (_PyObject_IS_GC(obj) && _PyGC_FINALIZED(obj)) {
2467
0
         return 1;
2468
0
    }
2469
0
    return 0;
2470
0
}
2471
2472
static int
2473
visit_generation(gcvisitobjects_t callback, void *arg, struct gc_generation *gen)
2474
0
{
2475
0
    PyGC_Head *gc_list, *gc;
2476
0
    gc_list = &gen->head;
2477
0
    for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
2478
0
        PyObject *op = FROM_GC(gc);
2479
0
        Py_INCREF(op);
2480
0
        int res = callback(op, arg);
2481
0
        Py_DECREF(op);
2482
0
        if (!res) {
2483
0
            return -1;
2484
0
        }
2485
0
    }
2486
0
    return 0;
2487
0
}
2488
2489
void
2490
PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
2491
0
{
2492
0
    GCState *gcstate = get_gc_state();
2493
0
    int original_state = gcstate->enabled;
2494
0
    gcstate->enabled = 0;
2495
0
    if (visit_generation(callback, arg, &gcstate->young) < 0) {
2496
0
        goto done;
2497
0
    }
2498
0
    if (visit_generation(callback, arg, &gcstate->old[0]) < 0) {
2499
0
        goto done;
2500
0
    }
2501
0
    if (visit_generation(callback, arg, &gcstate->old[1]) < 0) {
2502
0
        goto done;
2503
0
    }
2504
0
    visit_generation(callback, arg, &gcstate->permanent_generation);
2505
0
done:
2506
0
    gcstate->enabled = original_state;
2507
0
}
2508
2509
#endif  // Py_GIL_DISABLED