/src/cpython/Objects/stringlib/join.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* stringlib: bytes joining implementation */ |
2 | | |
3 | | #if STRINGLIB_IS_UNICODE |
4 | | #error join.h only compatible with byte-wise strings |
5 | | #endif |
6 | | |
7 | | Py_LOCAL_INLINE(PyObject *) |
8 | | STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable) |
9 | 36.7k | { |
10 | 36.7k | const char *sepstr = STRINGLIB_STR(sep); |
11 | 36.7k | Py_ssize_t seplen = STRINGLIB_LEN(sep); |
12 | 36.7k | PyObject *res = NULL; |
13 | 36.7k | char *p; |
14 | 36.7k | Py_ssize_t seqlen = 0; |
15 | 36.7k | Py_ssize_t sz = 0; |
16 | 36.7k | Py_ssize_t i, nbufs; |
17 | 36.7k | PyObject *seq, *item; |
18 | 36.7k | Py_buffer *buffers = NULL; |
19 | 36.7k | #define NB_STATIC_BUFFERS 10 |
20 | 36.7k | Py_buffer static_buffers[NB_STATIC_BUFFERS]; |
21 | 36.7k | #define GIL_THRESHOLD 1048576 |
22 | 36.7k | int drop_gil = 1; |
23 | 36.7k | PyThreadState *save = NULL; |
24 | | |
25 | 36.7k | seq = PySequence_Fast(iterable, "can only join an iterable"); |
26 | 36.7k | if (seq == NULL) { |
27 | 0 | return NULL; |
28 | 0 | } |
29 | | |
30 | 36.7k | seqlen = PySequence_Fast_GET_SIZE(seq); |
31 | 36.7k | if (seqlen == 0) { |
32 | 0 | Py_DECREF(seq); |
33 | 0 | return STRINGLIB_NEW(NULL, 0); |
34 | 0 | } |
35 | | #if !STRINGLIB_MUTABLE |
36 | 36.7k | if (seqlen == 1) { |
37 | 8.20k | item = PySequence_Fast_GET_ITEM(seq, 0); |
38 | 8.20k | if (STRINGLIB_CHECK_EXACT(item)) { |
39 | 8.20k | Py_INCREF(item); |
40 | 8.20k | Py_DECREF(seq); |
41 | 8.20k | return item; |
42 | 8.20k | } |
43 | 8.20k | } |
44 | 28.5k | #endif |
45 | 28.5k | if (seqlen > NB_STATIC_BUFFERS) { |
46 | 2.05k | buffers = PyMem_NEW(Py_buffer, seqlen); |
47 | 2.05k | if (buffers == NULL) { |
48 | 0 | Py_DECREF(seq); |
49 | 0 | PyErr_NoMemory(); |
50 | 0 | return NULL; |
51 | 0 | } |
52 | 2.05k | } |
53 | 26.5k | else { |
54 | 26.5k | buffers = static_buffers; |
55 | 26.5k | } |
56 | | |
57 | | /* Here is the general case. Do a pre-pass to figure out the total |
58 | | * amount of space we'll need (sz), and see whether all arguments are |
59 | | * bytes-like. |
60 | | */ |
61 | 181k | for (i = 0, nbufs = 0; i < seqlen; i++) { |
62 | 153k | Py_ssize_t itemlen; |
63 | 153k | item = PySequence_Fast_GET_ITEM(seq, i); |
64 | 153k | if (PyBytes_CheckExact(item)) { |
65 | | /* Fast path. */ |
66 | 153k | buffers[i].obj = Py_NewRef(item); |
67 | 153k | buffers[i].buf = PyBytes_AS_STRING(item); |
68 | 153k | buffers[i].len = PyBytes_GET_SIZE(item); |
69 | 153k | } |
70 | 0 | else { |
71 | 0 | if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) { |
72 | 0 | PyErr_Format(PyExc_TypeError, |
73 | 0 | "sequence item %zd: expected a bytes-like object, " |
74 | 0 | "%.80s found", |
75 | 0 | i, Py_TYPE(item)->tp_name); |
76 | 0 | goto error; |
77 | 0 | } |
78 | | /* If the backing objects are mutable, then dropping the GIL |
79 | | * opens up race conditions where another thread tries to modify |
80 | | * the object which we hold a buffer on it. Such code has data |
81 | | * races anyway, but this is a conservative approach that avoids |
82 | | * changing the behaviour of that data race. |
83 | | */ |
84 | 0 | drop_gil = 0; |
85 | 0 | } |
86 | 153k | nbufs = i + 1; /* for error cleanup */ |
87 | 153k | itemlen = buffers[i].len; |
88 | 153k | if (itemlen > PY_SSIZE_T_MAX - sz) { |
89 | 0 | PyErr_SetString(PyExc_OverflowError, |
90 | 0 | "join() result is too long"); |
91 | 0 | goto error; |
92 | 0 | } |
93 | 153k | sz += itemlen; |
94 | 153k | if (i != 0) { |
95 | 124k | if (seplen > PY_SSIZE_T_MAX - sz) { |
96 | 0 | PyErr_SetString(PyExc_OverflowError, |
97 | 0 | "join() result is too long"); |
98 | 0 | goto error; |
99 | 0 | } |
100 | 124k | sz += seplen; |
101 | 124k | } |
102 | 153k | if (seqlen != PySequence_Fast_GET_SIZE(seq)) { |
103 | 0 | PyErr_SetString(PyExc_RuntimeError, |
104 | 0 | "sequence changed size during iteration"); |
105 | 0 | goto error; |
106 | 0 | } |
107 | 153k | } |
108 | | |
109 | | /* Allocate result space. */ |
110 | 28.5k | res = STRINGLIB_NEW(NULL, sz); |
111 | 28.5k | if (res == NULL) |
112 | 0 | goto error; |
113 | | |
114 | | /* Catenate everything. */ |
115 | 28.5k | p = STRINGLIB_STR(res); |
116 | 28.5k | if (sz < GIL_THRESHOLD) { |
117 | 28.5k | drop_gil = 0; /* Benefits are likely outweighed by the overheads */ |
118 | 28.5k | } |
119 | 28.5k | if (drop_gil) { |
120 | 0 | save = PyEval_SaveThread(); |
121 | 0 | } |
122 | 28.5k | if (!seplen) { |
123 | | /* fast path */ |
124 | 181k | for (i = 0; i < nbufs; i++) { |
125 | 153k | Py_ssize_t n = buffers[i].len; |
126 | 153k | char *q = buffers[i].buf; |
127 | 153k | memcpy(p, q, n); |
128 | 153k | p += n; |
129 | 153k | } |
130 | 28.5k | } |
131 | 0 | else { |
132 | 0 | for (i = 0; i < nbufs; i++) { |
133 | 0 | Py_ssize_t n; |
134 | 0 | char *q; |
135 | 0 | if (i) { |
136 | 0 | memcpy(p, sepstr, seplen); |
137 | 0 | p += seplen; |
138 | 0 | } |
139 | 0 | n = buffers[i].len; |
140 | 0 | q = buffers[i].buf; |
141 | 0 | memcpy(p, q, n); |
142 | 0 | p += n; |
143 | 0 | } |
144 | 0 | } |
145 | 28.5k | if (drop_gil) { |
146 | 0 | PyEval_RestoreThread(save); |
147 | 0 | } |
148 | 28.5k | goto done; |
149 | | |
150 | 0 | error: |
151 | 0 | res = NULL; |
152 | 28.5k | done: |
153 | 28.5k | Py_DECREF(seq); |
154 | 181k | for (i = 0; i < nbufs; i++) |
155 | 153k | PyBuffer_Release(&buffers[i]); |
156 | 28.5k | if (buffers != static_buffers) |
157 | 2.05k | PyMem_Free(buffers); |
158 | 28.5k | return res; |
159 | 0 | } bytesobject.c:stringlib_bytes_join Line | Count | Source | 9 | 36.7k | { | 10 | 36.7k | const char *sepstr = STRINGLIB_STR(sep); | 11 | 36.7k | Py_ssize_t seplen = STRINGLIB_LEN(sep); | 12 | 36.7k | PyObject *res = NULL; | 13 | 36.7k | char *p; | 14 | 36.7k | Py_ssize_t seqlen = 0; | 15 | 36.7k | Py_ssize_t sz = 0; | 16 | 36.7k | Py_ssize_t i, nbufs; | 17 | 36.7k | PyObject *seq, *item; | 18 | 36.7k | Py_buffer *buffers = NULL; | 19 | 36.7k | #define NB_STATIC_BUFFERS 10 | 20 | 36.7k | Py_buffer static_buffers[NB_STATIC_BUFFERS]; | 21 | 36.7k | #define GIL_THRESHOLD 1048576 | 22 | 36.7k | int drop_gil = 1; | 23 | 36.7k | PyThreadState *save = NULL; | 24 | | | 25 | 36.7k | seq = PySequence_Fast(iterable, "can only join an iterable"); | 26 | 36.7k | if (seq == NULL) { | 27 | 0 | return NULL; | 28 | 0 | } | 29 | | | 30 | 36.7k | seqlen = PySequence_Fast_GET_SIZE(seq); | 31 | 36.7k | if (seqlen == 0) { | 32 | 0 | Py_DECREF(seq); | 33 | 0 | return STRINGLIB_NEW(NULL, 0); | 34 | 0 | } | 35 | 36.7k | #if !STRINGLIB_MUTABLE | 36 | 36.7k | if (seqlen == 1) { | 37 | 8.20k | item = PySequence_Fast_GET_ITEM(seq, 0); | 38 | 8.20k | if (STRINGLIB_CHECK_EXACT(item)) { | 39 | 8.20k | Py_INCREF(item); | 40 | 8.20k | Py_DECREF(seq); | 41 | 8.20k | return item; | 42 | 8.20k | } | 43 | 8.20k | } | 44 | 28.5k | #endif | 45 | 28.5k | if (seqlen > NB_STATIC_BUFFERS) { | 46 | 2.05k | buffers = PyMem_NEW(Py_buffer, seqlen); | 47 | 2.05k | if (buffers == NULL) { | 48 | 0 | Py_DECREF(seq); | 49 | 0 | PyErr_NoMemory(); | 50 | 0 | return NULL; | 51 | 0 | } | 52 | 2.05k | } | 53 | 26.5k | else { | 54 | 26.5k | buffers = static_buffers; | 55 | 26.5k | } | 56 | | | 57 | | /* Here is the general case. Do a pre-pass to figure out the total | 58 | | * amount of space we'll need (sz), and see whether all arguments are | 59 | | * bytes-like. | 60 | | */ | 61 | 181k | for (i = 0, nbufs = 0; i < seqlen; i++) { | 62 | 153k | Py_ssize_t itemlen; | 63 | 153k | item = PySequence_Fast_GET_ITEM(seq, i); | 64 | 153k | if (PyBytes_CheckExact(item)) { | 65 | | /* Fast path. */ | 66 | 153k | buffers[i].obj = Py_NewRef(item); | 67 | 153k | buffers[i].buf = PyBytes_AS_STRING(item); | 68 | 153k | buffers[i].len = PyBytes_GET_SIZE(item); | 69 | 153k | } | 70 | 0 | else { | 71 | 0 | if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) { | 72 | 0 | PyErr_Format(PyExc_TypeError, | 73 | 0 | "sequence item %zd: expected a bytes-like object, " | 74 | 0 | "%.80s found", | 75 | 0 | i, Py_TYPE(item)->tp_name); | 76 | 0 | goto error; | 77 | 0 | } | 78 | | /* If the backing objects are mutable, then dropping the GIL | 79 | | * opens up race conditions where another thread tries to modify | 80 | | * the object which we hold a buffer on it. Such code has data | 81 | | * races anyway, but this is a conservative approach that avoids | 82 | | * changing the behaviour of that data race. | 83 | | */ | 84 | 0 | drop_gil = 0; | 85 | 0 | } | 86 | 153k | nbufs = i + 1; /* for error cleanup */ | 87 | 153k | itemlen = buffers[i].len; | 88 | 153k | if (itemlen > PY_SSIZE_T_MAX - sz) { | 89 | 0 | PyErr_SetString(PyExc_OverflowError, | 90 | 0 | "join() result is too long"); | 91 | 0 | goto error; | 92 | 0 | } | 93 | 153k | sz += itemlen; | 94 | 153k | if (i != 0) { | 95 | 124k | if (seplen > PY_SSIZE_T_MAX - sz) { | 96 | 0 | PyErr_SetString(PyExc_OverflowError, | 97 | 0 | "join() result is too long"); | 98 | 0 | goto error; | 99 | 0 | } | 100 | 124k | sz += seplen; | 101 | 124k | } | 102 | 153k | if (seqlen != PySequence_Fast_GET_SIZE(seq)) { | 103 | 0 | PyErr_SetString(PyExc_RuntimeError, | 104 | 0 | "sequence changed size during iteration"); | 105 | 0 | goto error; | 106 | 0 | } | 107 | 153k | } | 108 | | | 109 | | /* Allocate result space. */ | 110 | 28.5k | res = STRINGLIB_NEW(NULL, sz); | 111 | 28.5k | if (res == NULL) | 112 | 0 | goto error; | 113 | | | 114 | | /* Catenate everything. */ | 115 | 28.5k | p = STRINGLIB_STR(res); | 116 | 28.5k | if (sz < GIL_THRESHOLD) { | 117 | 28.5k | drop_gil = 0; /* Benefits are likely outweighed by the overheads */ | 118 | 28.5k | } | 119 | 28.5k | if (drop_gil) { | 120 | 0 | save = PyEval_SaveThread(); | 121 | 0 | } | 122 | 28.5k | if (!seplen) { | 123 | | /* fast path */ | 124 | 181k | for (i = 0; i < nbufs; i++) { | 125 | 153k | Py_ssize_t n = buffers[i].len; | 126 | 153k | char *q = buffers[i].buf; | 127 | 153k | memcpy(p, q, n); | 128 | 153k | p += n; | 129 | 153k | } | 130 | 28.5k | } | 131 | 0 | else { | 132 | 0 | for (i = 0; i < nbufs; i++) { | 133 | 0 | Py_ssize_t n; | 134 | 0 | char *q; | 135 | 0 | if (i) { | 136 | 0 | memcpy(p, sepstr, seplen); | 137 | 0 | p += seplen; | 138 | 0 | } | 139 | 0 | n = buffers[i].len; | 140 | 0 | q = buffers[i].buf; | 141 | 0 | memcpy(p, q, n); | 142 | 0 | p += n; | 143 | 0 | } | 144 | 0 | } | 145 | 28.5k | if (drop_gil) { | 146 | 0 | PyEval_RestoreThread(save); | 147 | 0 | } | 148 | 28.5k | goto done; | 149 | | | 150 | 0 | error: | 151 | 0 | res = NULL; | 152 | 28.5k | done: | 153 | 28.5k | Py_DECREF(seq); | 154 | 181k | for (i = 0; i < nbufs; i++) | 155 | 153k | PyBuffer_Release(&buffers[i]); | 156 | 28.5k | if (buffers != static_buffers) | 157 | 2.05k | PyMem_Free(buffers); | 158 | 28.5k | return res; | 159 | 0 | } |
Unexecuted instantiation: bytearrayobject.c:stringlib_bytes_join |
160 | | |
161 | | #undef NB_STATIC_BUFFERS |
162 | | #undef GIL_THRESHOLD |