Coverage Report

Created: 2026-06-21 06:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/stringlib/join.h
Line
Count
Source
1
/* stringlib: bytes joining implementation */
2
3
#if STRINGLIB_IS_UNICODE
4
#error join.h only compatible with byte-wise strings
5
#endif
6
7
Py_LOCAL_INLINE(PyObject *)
8
STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
9
326k
{
10
326k
    const char *sepstr = STRINGLIB_STR(sep);
11
326k
    Py_ssize_t seplen = STRINGLIB_LEN(sep);
12
326k
    PyObject *res = NULL;
13
326k
    char *p;
14
326k
    Py_ssize_t seqlen = 0;
15
326k
    Py_ssize_t sz = 0;
16
326k
    Py_ssize_t i, nbufs;
17
326k
    PyObject *seq, *item;
18
326k
    Py_buffer *buffers = NULL;
19
326k
#define NB_STATIC_BUFFERS 10
20
326k
    Py_buffer static_buffers[NB_STATIC_BUFFERS];
21
326k
#define GIL_THRESHOLD 1048576
22
326k
    int drop_gil = 1;
23
326k
    PyThreadState *save = NULL;
24
25
326k
    seq = PySequence_Fast(iterable, "can only join an iterable");
26
326k
    if (seq == NULL) {
27
0
        return NULL;
28
0
    }
29
30
326k
    seqlen = PySequence_Fast_GET_SIZE(seq);
31
326k
    if (seqlen == 0) {
32
28
        Py_DECREF(seq);
33
28
        return STRINGLIB_NEW(NULL, 0);
34
28
    }
35
#if !STRINGLIB_MUTABLE
36
326k
    if (seqlen == 1) {
37
87.2k
        item = PySequence_Fast_GET_ITEM(seq, 0);
38
87.2k
        if (STRINGLIB_CHECK_EXACT(item)) {
39
87.2k
            Py_INCREF(item);
40
87.2k
            Py_DECREF(seq);
41
87.2k
            return item;
42
87.2k
        }
43
87.2k
    }
44
239k
#endif
45
239k
    if (seqlen > NB_STATIC_BUFFERS) {
46
16.3k
        buffers = PyMem_NEW(Py_buffer, seqlen);
47
16.3k
        if (buffers == NULL) {
48
0
            Py_DECREF(seq);
49
0
            PyErr_NoMemory();
50
0
            return NULL;
51
0
        }
52
16.3k
    }
53
222k
    else {
54
222k
        buffers = static_buffers;
55
222k
    }
56
57
    /* Here is the general case.  Do a pre-pass to figure out the total
58
     * amount of space we'll need (sz), and see whether all arguments are
59
     * bytes-like.
60
     */
61
1.00M
    for (i = 0, nbufs = 0; i < seqlen; i++) {
62
763k
        Py_ssize_t itemlen;
63
763k
        item = PySequence_Fast_GET_ITEM(seq, i);
64
763k
        if (PyBytes_CheckExact(item)) {
65
            /* Fast path. */
66
761k
            buffers[i].obj = Py_NewRef(item);
67
761k
            buffers[i].buf = PyBytes_AS_STRING(item);
68
761k
            buffers[i].len = PyBytes_GET_SIZE(item);
69
761k
        }
70
2.04k
        else {
71
            /* item is only borrowed; its __buffer__() may run Python that
72
               drops the sequence's last reference to it. */
73
2.04k
            Py_INCREF(item);
74
2.04k
            if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
75
0
                PyErr_Format(PyExc_TypeError,
76
0
                             "sequence item %zd: expected a bytes-like object, "
77
0
                             "%.80s found",
78
0
                             i, Py_TYPE(item)->tp_name);
79
0
                Py_DECREF(item);
80
0
                goto error;
81
0
            }
82
2.04k
            Py_DECREF(item);
83
            /* If the backing objects are mutable, then dropping the GIL
84
             * opens up race conditions where another thread tries to modify
85
             * the object which we hold a buffer on it. Such code has data
86
             * races anyway, but this is a conservative approach that avoids
87
             * changing the behaviour of that data race.
88
             */
89
2.04k
            drop_gil = 0;
90
2.04k
        }
91
763k
        nbufs = i + 1;  /* for error cleanup */
92
763k
        itemlen = buffers[i].len;
93
763k
        if (itemlen > PY_SSIZE_T_MAX - sz) {
94
0
            PyErr_SetString(PyExc_OverflowError,
95
0
                            "join() result is too long");
96
0
            goto error;
97
0
        }
98
763k
        sz += itemlen;
99
763k
        if (i != 0) {
100
524k
            if (seplen > PY_SSIZE_T_MAX - sz) {
101
0
                PyErr_SetString(PyExc_OverflowError,
102
0
                                "join() result is too long");
103
0
                goto error;
104
0
            }
105
524k
            sz += seplen;
106
524k
        }
107
763k
        if (seqlen != PySequence_Fast_GET_SIZE(seq)) {
108
0
            PyErr_SetString(PyExc_RuntimeError,
109
0
                            "sequence changed size during iteration");
110
0
            goto error;
111
0
        }
112
763k
    }
113
114
    /* Allocate result space. */
115
239k
    res = STRINGLIB_NEW(NULL, sz);
116
239k
    if (res == NULL)
117
0
        goto error;
118
119
    /* Catenate everything. */
120
239k
    p = STRINGLIB_STR(res);
121
239k
    if (sz < GIL_THRESHOLD) {
122
239k
        drop_gil = 0;   /* Benefits are likely outweighed by the overheads */
123
239k
    }
124
239k
    if (drop_gil) {
125
0
        save = PyEval_SaveThread();
126
0
    }
127
239k
    if (!seplen) {
128
        /* fast path */
129
1.00M
        for (i = 0; i < nbufs; i++) {
130
763k
            Py_ssize_t n = buffers[i].len;
131
763k
            char *q = buffers[i].buf;
132
763k
            memcpy(p, q, n);
133
763k
            p += n;
134
763k
        }
135
239k
    }
136
0
    else {
137
0
        for (i = 0; i < nbufs; i++) {
138
0
            Py_ssize_t n;
139
0
            char *q;
140
0
            if (i) {
141
0
                memcpy(p, sepstr, seplen);
142
0
                p += seplen;
143
0
            }
144
0
            n = buffers[i].len;
145
0
            q = buffers[i].buf;
146
0
            memcpy(p, q, n);
147
0
            p += n;
148
0
        }
149
0
    }
150
239k
    if (drop_gil) {
151
0
        PyEval_RestoreThread(save);
152
0
    }
153
239k
    goto done;
154
155
0
error:
156
0
    res = NULL;
157
239k
done:
158
239k
    Py_DECREF(seq);
159
1.00M
    for (i = 0; i < nbufs; i++)
160
763k
        PyBuffer_Release(&buffers[i]);
161
239k
    if (buffers != static_buffers)
162
16.3k
        PyMem_Free(buffers);
163
239k
    return res;
164
0
}
bytesobject.c:stringlib_bytes_join
Line
Count
Source
9
326k
{
10
326k
    const char *sepstr = STRINGLIB_STR(sep);
11
326k
    Py_ssize_t seplen = STRINGLIB_LEN(sep);
12
326k
    PyObject *res = NULL;
13
326k
    char *p;
14
326k
    Py_ssize_t seqlen = 0;
15
326k
    Py_ssize_t sz = 0;
16
326k
    Py_ssize_t i, nbufs;
17
326k
    PyObject *seq, *item;
18
326k
    Py_buffer *buffers = NULL;
19
326k
#define NB_STATIC_BUFFERS 10
20
326k
    Py_buffer static_buffers[NB_STATIC_BUFFERS];
21
326k
#define GIL_THRESHOLD 1048576
22
326k
    int drop_gil = 1;
23
326k
    PyThreadState *save = NULL;
24
25
326k
    seq = PySequence_Fast(iterable, "can only join an iterable");
26
326k
    if (seq == NULL) {
27
0
        return NULL;
28
0
    }
29
30
326k
    seqlen = PySequence_Fast_GET_SIZE(seq);
31
326k
    if (seqlen == 0) {
32
28
        Py_DECREF(seq);
33
28
        return STRINGLIB_NEW(NULL, 0);
34
28
    }
35
326k
#if !STRINGLIB_MUTABLE
36
326k
    if (seqlen == 1) {
37
87.2k
        item = PySequence_Fast_GET_ITEM(seq, 0);
38
87.2k
        if (STRINGLIB_CHECK_EXACT(item)) {
39
87.2k
            Py_INCREF(item);
40
87.2k
            Py_DECREF(seq);
41
87.2k
            return item;
42
87.2k
        }
43
87.2k
    }
44
239k
#endif
45
239k
    if (seqlen > NB_STATIC_BUFFERS) {
46
16.3k
        buffers = PyMem_NEW(Py_buffer, seqlen);
47
16.3k
        if (buffers == NULL) {
48
0
            Py_DECREF(seq);
49
0
            PyErr_NoMemory();
50
0
            return NULL;
51
0
        }
52
16.3k
    }
53
222k
    else {
54
222k
        buffers = static_buffers;
55
222k
    }
56
57
    /* Here is the general case.  Do a pre-pass to figure out the total
58
     * amount of space we'll need (sz), and see whether all arguments are
59
     * bytes-like.
60
     */
61
1.00M
    for (i = 0, nbufs = 0; i < seqlen; i++) {
62
763k
        Py_ssize_t itemlen;
63
763k
        item = PySequence_Fast_GET_ITEM(seq, i);
64
763k
        if (PyBytes_CheckExact(item)) {
65
            /* Fast path. */
66
761k
            buffers[i].obj = Py_NewRef(item);
67
761k
            buffers[i].buf = PyBytes_AS_STRING(item);
68
761k
            buffers[i].len = PyBytes_GET_SIZE(item);
69
761k
        }
70
2.04k
        else {
71
            /* item is only borrowed; its __buffer__() may run Python that
72
               drops the sequence's last reference to it. */
73
2.04k
            Py_INCREF(item);
74
2.04k
            if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
75
0
                PyErr_Format(PyExc_TypeError,
76
0
                             "sequence item %zd: expected a bytes-like object, "
77
0
                             "%.80s found",
78
0
                             i, Py_TYPE(item)->tp_name);
79
0
                Py_DECREF(item);
80
0
                goto error;
81
0
            }
82
2.04k
            Py_DECREF(item);
83
            /* If the backing objects are mutable, then dropping the GIL
84
             * opens up race conditions where another thread tries to modify
85
             * the object which we hold a buffer on it. Such code has data
86
             * races anyway, but this is a conservative approach that avoids
87
             * changing the behaviour of that data race.
88
             */
89
2.04k
            drop_gil = 0;
90
2.04k
        }
91
763k
        nbufs = i + 1;  /* for error cleanup */
92
763k
        itemlen = buffers[i].len;
93
763k
        if (itemlen > PY_SSIZE_T_MAX - sz) {
94
0
            PyErr_SetString(PyExc_OverflowError,
95
0
                            "join() result is too long");
96
0
            goto error;
97
0
        }
98
763k
        sz += itemlen;
99
763k
        if (i != 0) {
100
524k
            if (seplen > PY_SSIZE_T_MAX - sz) {
101
0
                PyErr_SetString(PyExc_OverflowError,
102
0
                                "join() result is too long");
103
0
                goto error;
104
0
            }
105
524k
            sz += seplen;
106
524k
        }
107
763k
        if (seqlen != PySequence_Fast_GET_SIZE(seq)) {
108
0
            PyErr_SetString(PyExc_RuntimeError,
109
0
                            "sequence changed size during iteration");
110
0
            goto error;
111
0
        }
112
763k
    }
113
114
    /* Allocate result space. */
115
239k
    res = STRINGLIB_NEW(NULL, sz);
116
239k
    if (res == NULL)
117
0
        goto error;
118
119
    /* Catenate everything. */
120
239k
    p = STRINGLIB_STR(res);
121
239k
    if (sz < GIL_THRESHOLD) {
122
239k
        drop_gil = 0;   /* Benefits are likely outweighed by the overheads */
123
239k
    }
124
239k
    if (drop_gil) {
125
0
        save = PyEval_SaveThread();
126
0
    }
127
239k
    if (!seplen) {
128
        /* fast path */
129
1.00M
        for (i = 0; i < nbufs; i++) {
130
763k
            Py_ssize_t n = buffers[i].len;
131
763k
            char *q = buffers[i].buf;
132
763k
            memcpy(p, q, n);
133
763k
            p += n;
134
763k
        }
135
239k
    }
136
0
    else {
137
0
        for (i = 0; i < nbufs; i++) {
138
0
            Py_ssize_t n;
139
0
            char *q;
140
0
            if (i) {
141
0
                memcpy(p, sepstr, seplen);
142
0
                p += seplen;
143
0
            }
144
0
            n = buffers[i].len;
145
0
            q = buffers[i].buf;
146
0
            memcpy(p, q, n);
147
0
            p += n;
148
0
        }
149
0
    }
150
239k
    if (drop_gil) {
151
0
        PyEval_RestoreThread(save);
152
0
    }
153
239k
    goto done;
154
155
0
error:
156
0
    res = NULL;
157
239k
done:
158
239k
    Py_DECREF(seq);
159
1.00M
    for (i = 0; i < nbufs; i++)
160
763k
        PyBuffer_Release(&buffers[i]);
161
239k
    if (buffers != static_buffers)
162
16.3k
        PyMem_Free(buffers);
163
239k
    return res;
164
0
}
Unexecuted instantiation: bytearrayobject.c:stringlib_bytes_join
165
166
#undef NB_STATIC_BUFFERS
167
#undef GIL_THRESHOLD