/src/cpython/Objects/stringlib/transmogrify.h
Line | Count | Source (jump to first uncovered line) |
1 | | #if STRINGLIB_IS_UNICODE |
2 | | # error "transmogrify.h only compatible with byte-wise strings" |
3 | | #endif |
4 | | |
5 | | /* the more complicated methods. parts of these should be pulled out into the |
6 | | shared code in bytes_methods.c to cut down on duplicate code bloat. */ |
7 | | |
8 | | /*[clinic input] |
9 | | class B "PyObject *" "&PyType_Type" |
10 | | [clinic start generated code]*/ |
11 | | /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2935558188d97c76]*/ |
12 | | |
13 | | #include "clinic/transmogrify.h.h" |
14 | | |
15 | | static inline PyObject * |
16 | | return_self(PyObject *self) |
17 | 26.9k | { |
18 | | #if !STRINGLIB_MUTABLE |
19 | 26.9k | if (STRINGLIB_CHECK_EXACT(self)) { |
20 | 26.9k | return Py_NewRef(self); |
21 | 26.9k | } |
22 | 0 | #endif |
23 | 0 | return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); |
24 | 26.9k | } bytesobject.c:return_self Line | Count | Source | 17 | 26.9k | { | 18 | 26.9k | #if !STRINGLIB_MUTABLE | 19 | 26.9k | if (STRINGLIB_CHECK_EXACT(self)) { | 20 | 26.9k | return Py_NewRef(self); | 21 | 26.9k | } | 22 | 0 | #endif | 23 | 0 | return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); | 24 | 26.9k | } |
Unexecuted instantiation: bytearrayobject.c:return_self |
25 | | |
26 | | /*[clinic input] |
27 | | B.expandtabs as stringlib_expandtabs |
28 | | |
29 | | tabsize: int = 8 |
30 | | |
31 | | Return a copy where all tab characters are expanded using spaces. |
32 | | |
33 | | If tabsize is not given, a tab size of 8 characters is assumed. |
34 | | [clinic start generated code]*/ |
35 | | |
36 | | static PyObject * |
37 | | stringlib_expandtabs_impl(PyObject *self, int tabsize) |
38 | | /*[clinic end generated code: output=069cb7fae72e4c2b input=3c6d3b12aa3ccbea]*/ |
39 | 0 | { |
40 | 0 | const char *e, *p; |
41 | 0 | char *q; |
42 | 0 | Py_ssize_t i, j; |
43 | 0 | PyObject *u; |
44 | | |
45 | | /* First pass: determine size of output string */ |
46 | 0 | i = j = 0; |
47 | 0 | e = STRINGLIB_STR(self) + STRINGLIB_LEN(self); |
48 | 0 | for (p = STRINGLIB_STR(self); p < e; p++) { |
49 | 0 | if (*p == '\t') { |
50 | 0 | if (tabsize > 0) { |
51 | 0 | Py_ssize_t incr = tabsize - (j % tabsize); |
52 | 0 | if (j > PY_SSIZE_T_MAX - incr) |
53 | 0 | goto overflow; |
54 | 0 | j += incr; |
55 | 0 | } |
56 | 0 | } |
57 | 0 | else { |
58 | 0 | if (j > PY_SSIZE_T_MAX - 1) |
59 | 0 | goto overflow; |
60 | 0 | j++; |
61 | 0 | if (*p == '\n' || *p == '\r') { |
62 | 0 | if (i > PY_SSIZE_T_MAX - j) |
63 | 0 | goto overflow; |
64 | 0 | i += j; |
65 | 0 | j = 0; |
66 | 0 | } |
67 | 0 | } |
68 | 0 | } |
69 | | |
70 | 0 | if (i > PY_SSIZE_T_MAX - j) |
71 | 0 | goto overflow; |
72 | | |
73 | | /* Second pass: create output string and fill it */ |
74 | 0 | u = STRINGLIB_NEW(NULL, i + j); |
75 | 0 | if (!u) |
76 | 0 | return NULL; |
77 | | |
78 | 0 | j = 0; |
79 | 0 | q = STRINGLIB_STR(u); |
80 | |
|
81 | 0 | for (p = STRINGLIB_STR(self); p < e; p++) { |
82 | 0 | if (*p == '\t') { |
83 | 0 | if (tabsize > 0) { |
84 | 0 | i = tabsize - (j % tabsize); |
85 | 0 | j += i; |
86 | 0 | while (i--) |
87 | 0 | *q++ = ' '; |
88 | 0 | } |
89 | 0 | } |
90 | 0 | else { |
91 | 0 | j++; |
92 | 0 | *q++ = *p; |
93 | 0 | if (*p == '\n' || *p == '\r') |
94 | 0 | j = 0; |
95 | 0 | } |
96 | 0 | } |
97 | |
|
98 | 0 | return u; |
99 | 0 | overflow: |
100 | 0 | PyErr_SetString(PyExc_OverflowError, "result too long"); |
101 | 0 | return NULL; |
102 | 0 | } Unexecuted instantiation: bytesobject.c:stringlib_expandtabs_impl Unexecuted instantiation: bytearrayobject.c:stringlib_expandtabs_impl |
103 | | |
104 | | static inline PyObject * |
105 | | pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill) |
106 | 0 | { |
107 | 0 | PyObject *u; |
108 | |
|
109 | 0 | if (left < 0) |
110 | 0 | left = 0; |
111 | 0 | if (right < 0) |
112 | 0 | right = 0; |
113 | |
|
114 | 0 | if (left == 0 && right == 0) { |
115 | 0 | return return_self(self); |
116 | 0 | } |
117 | | |
118 | 0 | u = STRINGLIB_NEW(NULL, left + STRINGLIB_LEN(self) + right); |
119 | 0 | if (u) { |
120 | 0 | if (left) |
121 | 0 | memset(STRINGLIB_STR(u), fill, left); |
122 | 0 | memcpy(STRINGLIB_STR(u) + left, |
123 | 0 | STRINGLIB_STR(self), |
124 | 0 | STRINGLIB_LEN(self)); |
125 | 0 | if (right) |
126 | 0 | memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self), |
127 | 0 | fill, right); |
128 | 0 | } |
129 | |
|
130 | 0 | return u; |
131 | 0 | } Unexecuted instantiation: bytesobject.c:pad Unexecuted instantiation: bytearrayobject.c:pad |
132 | | |
133 | | /*[clinic input] |
134 | | B.ljust as stringlib_ljust |
135 | | |
136 | | width: Py_ssize_t |
137 | | fillchar: char = b' ' |
138 | | / |
139 | | |
140 | | Return a left-justified string of length width. |
141 | | |
142 | | Padding is done using the specified fill character. |
143 | | [clinic start generated code]*/ |
144 | | |
145 | | static PyObject * |
146 | | stringlib_ljust_impl(PyObject *self, Py_ssize_t width, char fillchar) |
147 | | /*[clinic end generated code: output=c79ca173c5ff8337 input=eff2d014bc7d80df]*/ |
148 | 0 | { |
149 | 0 | if (STRINGLIB_LEN(self) >= width) { |
150 | 0 | return return_self(self); |
151 | 0 | } |
152 | | |
153 | 0 | return pad(self, 0, width - STRINGLIB_LEN(self), fillchar); |
154 | 0 | } Unexecuted instantiation: bytesobject.c:stringlib_ljust_impl Unexecuted instantiation: bytearrayobject.c:stringlib_ljust_impl |
155 | | |
156 | | |
157 | | /*[clinic input] |
158 | | B.rjust as stringlib_rjust |
159 | | |
160 | | width: Py_ssize_t |
161 | | fillchar: char = b' ' |
162 | | / |
163 | | |
164 | | Return a right-justified string of length width. |
165 | | |
166 | | Padding is done using the specified fill character. |
167 | | [clinic start generated code]*/ |
168 | | |
169 | | static PyObject * |
170 | | stringlib_rjust_impl(PyObject *self, Py_ssize_t width, char fillchar) |
171 | | /*[clinic end generated code: output=7df5d728a5439570 input=218b0bd31308955d]*/ |
172 | 0 | { |
173 | 0 | if (STRINGLIB_LEN(self) >= width) { |
174 | 0 | return return_self(self); |
175 | 0 | } |
176 | | |
177 | 0 | return pad(self, width - STRINGLIB_LEN(self), 0, fillchar); |
178 | 0 | } Unexecuted instantiation: bytesobject.c:stringlib_rjust_impl Unexecuted instantiation: bytearrayobject.c:stringlib_rjust_impl |
179 | | |
180 | | |
181 | | /*[clinic input] |
182 | | B.center as stringlib_center |
183 | | |
184 | | width: Py_ssize_t |
185 | | fillchar: char = b' ' |
186 | | / |
187 | | |
188 | | Return a centered string of length width. |
189 | | |
190 | | Padding is done using the specified fill character. |
191 | | [clinic start generated code]*/ |
192 | | |
193 | | static PyObject * |
194 | | stringlib_center_impl(PyObject *self, Py_ssize_t width, char fillchar) |
195 | | /*[clinic end generated code: output=d8da2e055288b4c2 input=3776fd278765d89b]*/ |
196 | 0 | { |
197 | 0 | Py_ssize_t marg, left; |
198 | |
|
199 | 0 | if (STRINGLIB_LEN(self) >= width) { |
200 | 0 | return return_self(self); |
201 | 0 | } |
202 | | |
203 | 0 | marg = width - STRINGLIB_LEN(self); |
204 | 0 | left = marg / 2 + (marg & width & 1); |
205 | |
|
206 | 0 | return pad(self, left, marg - left, fillchar); |
207 | 0 | } Unexecuted instantiation: bytesobject.c:stringlib_center_impl Unexecuted instantiation: bytearrayobject.c:stringlib_center_impl |
208 | | |
209 | | /*[clinic input] |
210 | | B.zfill as stringlib_zfill |
211 | | |
212 | | width: Py_ssize_t |
213 | | / |
214 | | |
215 | | Pad a numeric string with zeros on the left, to fill a field of the given width. |
216 | | |
217 | | The original string is never truncated. |
218 | | [clinic start generated code]*/ |
219 | | |
220 | | static PyObject * |
221 | | stringlib_zfill_impl(PyObject *self, Py_ssize_t width) |
222 | | /*[clinic end generated code: output=0b3c684a7f1b2319 input=2da6d7b8e9bcb19a]*/ |
223 | 0 | { |
224 | 0 | Py_ssize_t fill; |
225 | 0 | PyObject *s; |
226 | 0 | char *p; |
227 | |
|
228 | 0 | if (STRINGLIB_LEN(self) >= width) { |
229 | 0 | return return_self(self); |
230 | 0 | } |
231 | | |
232 | 0 | fill = width - STRINGLIB_LEN(self); |
233 | |
|
234 | 0 | s = pad(self, fill, 0, '0'); |
235 | |
|
236 | 0 | if (s == NULL) |
237 | 0 | return NULL; |
238 | | |
239 | 0 | p = STRINGLIB_STR(s); |
240 | 0 | if (p[fill] == '+' || p[fill] == '-') { |
241 | | /* move sign to beginning of string */ |
242 | 0 | p[0] = p[fill]; |
243 | 0 | p[fill] = '0'; |
244 | 0 | } |
245 | |
|
246 | 0 | return s; |
247 | 0 | } Unexecuted instantiation: bytesobject.c:stringlib_zfill_impl Unexecuted instantiation: bytearrayobject.c:stringlib_zfill_impl |
248 | | |
249 | | |
250 | | /* find and count characters and substrings */ |
251 | | |
252 | | #define findchar(target, target_len, c) \ |
253 | 29.9k | ((char *)memchr((const void *)(target), c, target_len)) |
254 | | |
255 | | |
256 | | static Py_ssize_t |
257 | | countchar(const char *target, Py_ssize_t target_len, char c, |
258 | | Py_ssize_t maxcount) |
259 | 0 | { |
260 | 0 | Py_ssize_t count = 0; |
261 | 0 | const char *start = target; |
262 | 0 | const char *end = target + target_len; |
263 | |
|
264 | 0 | while ((start = findchar(start, end - start, c)) != NULL) { |
265 | 0 | count++; |
266 | 0 | if (count >= maxcount) |
267 | 0 | break; |
268 | 0 | start += 1; |
269 | 0 | } |
270 | 0 | return count; |
271 | 0 | } Unexecuted instantiation: bytesobject.c:countchar Unexecuted instantiation: bytearrayobject.c:countchar |
272 | | |
273 | | |
274 | | /* Algorithms for different cases of string replacement */ |
275 | | |
276 | | /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */ |
277 | | static PyObject * |
278 | | stringlib_replace_interleave(PyObject *self, |
279 | | const char *to_s, Py_ssize_t to_len, |
280 | | Py_ssize_t maxcount) |
281 | 0 | { |
282 | 0 | const char *self_s; |
283 | 0 | char *result_s; |
284 | 0 | Py_ssize_t self_len, result_len; |
285 | 0 | Py_ssize_t count, i; |
286 | 0 | PyObject *result; |
287 | |
|
288 | 0 | self_len = STRINGLIB_LEN(self); |
289 | | |
290 | | /* 1 at the end plus 1 after every character; |
291 | | count = min(maxcount, self_len + 1) */ |
292 | 0 | if (maxcount <= self_len) { |
293 | 0 | count = maxcount; |
294 | 0 | } |
295 | 0 | else { |
296 | | /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */ |
297 | 0 | count = self_len + 1; |
298 | 0 | } |
299 | | |
300 | | /* Check for overflow */ |
301 | | /* result_len = count * to_len + self_len; */ |
302 | 0 | assert(count > 0); |
303 | 0 | if (to_len > (PY_SSIZE_T_MAX - self_len) / count) { |
304 | 0 | PyErr_SetString(PyExc_OverflowError, |
305 | 0 | "replace bytes is too long"); |
306 | 0 | return NULL; |
307 | 0 | } |
308 | 0 | result_len = count * to_len + self_len; |
309 | 0 | result = STRINGLIB_NEW(NULL, result_len); |
310 | 0 | if (result == NULL) { |
311 | 0 | return NULL; |
312 | 0 | } |
313 | | |
314 | 0 | self_s = STRINGLIB_STR(self); |
315 | 0 | result_s = STRINGLIB_STR(result); |
316 | |
|
317 | 0 | if (to_len > 1) { |
318 | | /* Lay the first one down (guaranteed this will occur) */ |
319 | 0 | memcpy(result_s, to_s, to_len); |
320 | 0 | result_s += to_len; |
321 | 0 | count -= 1; |
322 | |
|
323 | 0 | for (i = 0; i < count; i++) { |
324 | 0 | *result_s++ = *self_s++; |
325 | 0 | memcpy(result_s, to_s, to_len); |
326 | 0 | result_s += to_len; |
327 | 0 | } |
328 | 0 | } |
329 | 0 | else { |
330 | 0 | result_s[0] = to_s[0]; |
331 | 0 | result_s += to_len; |
332 | 0 | count -= 1; |
333 | 0 | for (i = 0; i < count; i++) { |
334 | 0 | *result_s++ = *self_s++; |
335 | 0 | result_s[0] = to_s[0]; |
336 | 0 | result_s += to_len; |
337 | 0 | } |
338 | 0 | } |
339 | | |
340 | | /* Copy the rest of the original string */ |
341 | 0 | memcpy(result_s, self_s, self_len - i); |
342 | |
|
343 | 0 | return result; |
344 | 0 | } Unexecuted instantiation: bytesobject.c:stringlib_replace_interleave Unexecuted instantiation: bytearrayobject.c:stringlib_replace_interleave |
345 | | |
346 | | /* Special case for deleting a single character */ |
347 | | /* len(self)>=1, len(from)==1, to="", maxcount>=1 */ |
348 | | static PyObject * |
349 | | stringlib_replace_delete_single_character(PyObject *self, |
350 | | char from_c, Py_ssize_t maxcount) |
351 | 0 | { |
352 | 0 | const char *self_s, *start, *next, *end; |
353 | 0 | char *result_s; |
354 | 0 | Py_ssize_t self_len, result_len; |
355 | 0 | Py_ssize_t count; |
356 | 0 | PyObject *result; |
357 | |
|
358 | 0 | self_len = STRINGLIB_LEN(self); |
359 | 0 | self_s = STRINGLIB_STR(self); |
360 | |
|
361 | 0 | count = countchar(self_s, self_len, from_c, maxcount); |
362 | 0 | if (count == 0) { |
363 | 0 | return return_self(self); |
364 | 0 | } |
365 | | |
366 | 0 | result_len = self_len - count; /* from_len == 1 */ |
367 | 0 | assert(result_len>=0); |
368 | |
|
369 | 0 | result = STRINGLIB_NEW(NULL, result_len); |
370 | 0 | if (result == NULL) { |
371 | 0 | return NULL; |
372 | 0 | } |
373 | 0 | result_s = STRINGLIB_STR(result); |
374 | |
|
375 | 0 | start = self_s; |
376 | 0 | end = self_s + self_len; |
377 | 0 | while (count-- > 0) { |
378 | 0 | next = findchar(start, end - start, from_c); |
379 | 0 | if (next == NULL) |
380 | 0 | break; |
381 | 0 | memcpy(result_s, start, next - start); |
382 | 0 | result_s += (next - start); |
383 | 0 | start = next + 1; |
384 | 0 | } |
385 | 0 | memcpy(result_s, start, end - start); |
386 | |
|
387 | 0 | return result; |
388 | 0 | } Unexecuted instantiation: bytesobject.c:stringlib_replace_delete_single_character Unexecuted instantiation: bytearrayobject.c:stringlib_replace_delete_single_character |
389 | | |
390 | | /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */ |
391 | | |
392 | | static PyObject * |
393 | | stringlib_replace_delete_substring(PyObject *self, |
394 | | const char *from_s, Py_ssize_t from_len, |
395 | | Py_ssize_t maxcount) |
396 | 0 | { |
397 | 0 | const char *self_s, *start, *next, *end; |
398 | 0 | char *result_s; |
399 | 0 | Py_ssize_t self_len, result_len; |
400 | 0 | Py_ssize_t count, offset; |
401 | 0 | PyObject *result; |
402 | |
|
403 | 0 | self_len = STRINGLIB_LEN(self); |
404 | 0 | self_s = STRINGLIB_STR(self); |
405 | |
|
406 | 0 | count = stringlib_count(self_s, self_len, |
407 | 0 | from_s, from_len, |
408 | 0 | maxcount); |
409 | |
|
410 | 0 | if (count == 0) { |
411 | | /* no matches */ |
412 | 0 | return return_self(self); |
413 | 0 | } |
414 | | |
415 | 0 | result_len = self_len - (count * from_len); |
416 | 0 | assert (result_len>=0); |
417 | |
|
418 | 0 | result = STRINGLIB_NEW(NULL, result_len); |
419 | 0 | if (result == NULL) { |
420 | 0 | return NULL; |
421 | 0 | } |
422 | 0 | result_s = STRINGLIB_STR(result); |
423 | |
|
424 | 0 | start = self_s; |
425 | 0 | end = self_s + self_len; |
426 | 0 | while (count-- > 0) { |
427 | 0 | offset = stringlib_find(start, end - start, |
428 | 0 | from_s, from_len, |
429 | 0 | 0); |
430 | 0 | if (offset == -1) |
431 | 0 | break; |
432 | 0 | next = start + offset; |
433 | |
|
434 | 0 | memcpy(result_s, start, next - start); |
435 | |
|
436 | 0 | result_s += (next - start); |
437 | 0 | start = next + from_len; |
438 | 0 | } |
439 | 0 | memcpy(result_s, start, end - start); |
440 | 0 | return result; |
441 | 0 | } Unexecuted instantiation: bytesobject.c:stringlib_replace_delete_substring Unexecuted instantiation: bytearrayobject.c:stringlib_replace_delete_substring |
442 | | |
443 | | /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */ |
444 | | static PyObject * |
445 | | stringlib_replace_single_character_in_place(PyObject *self, |
446 | | char from_c, char to_c, |
447 | | Py_ssize_t maxcount) |
448 | 27.9k | { |
449 | 27.9k | const char *self_s, *end; |
450 | 27.9k | char *result_s, *start, *next; |
451 | 27.9k | Py_ssize_t self_len; |
452 | 27.9k | PyObject *result; |
453 | | |
454 | | /* The result string will be the same size */ |
455 | 27.9k | self_s = STRINGLIB_STR(self); |
456 | 27.9k | self_len = STRINGLIB_LEN(self); |
457 | | |
458 | 27.9k | next = findchar(self_s, self_len, from_c); |
459 | | |
460 | 27.9k | if (next == NULL) { |
461 | | /* No matches; return the original bytes */ |
462 | 26.6k | return return_self(self); |
463 | 26.6k | } |
464 | | |
465 | | /* Need to make a new bytes */ |
466 | 1.25k | result = STRINGLIB_NEW(NULL, self_len); |
467 | 1.25k | if (result == NULL) { |
468 | 0 | return NULL; |
469 | 0 | } |
470 | 1.25k | result_s = STRINGLIB_STR(result); |
471 | 1.25k | memcpy(result_s, self_s, self_len); |
472 | | |
473 | | /* change everything in-place, starting with this one */ |
474 | 1.25k | start = result_s + (next - self_s); |
475 | 1.25k | *start = to_c; |
476 | 1.25k | start++; |
477 | 1.25k | end = result_s + self_len; |
478 | | |
479 | 2.01k | while (--maxcount > 0) { |
480 | 2.01k | next = findchar(start, end - start, from_c); |
481 | 2.01k | if (next == NULL) |
482 | 1.25k | break; |
483 | 760 | *next = to_c; |
484 | 760 | start = next + 1; |
485 | 760 | } |
486 | | |
487 | 1.25k | return result; |
488 | 1.25k | } bytesobject.c:stringlib_replace_single_character_in_place Line | Count | Source | 448 | 27.9k | { | 449 | 27.9k | const char *self_s, *end; | 450 | 27.9k | char *result_s, *start, *next; | 451 | 27.9k | Py_ssize_t self_len; | 452 | 27.9k | PyObject *result; | 453 | | | 454 | | /* The result string will be the same size */ | 455 | 27.9k | self_s = STRINGLIB_STR(self); | 456 | 27.9k | self_len = STRINGLIB_LEN(self); | 457 | | | 458 | 27.9k | next = findchar(self_s, self_len, from_c); | 459 | | | 460 | 27.9k | if (next == NULL) { | 461 | | /* No matches; return the original bytes */ | 462 | 26.6k | return return_self(self); | 463 | 26.6k | } | 464 | | | 465 | | /* Need to make a new bytes */ | 466 | 1.25k | result = STRINGLIB_NEW(NULL, self_len); | 467 | 1.25k | if (result == NULL) { | 468 | 0 | return NULL; | 469 | 0 | } | 470 | 1.25k | result_s = STRINGLIB_STR(result); | 471 | 1.25k | memcpy(result_s, self_s, self_len); | 472 | | | 473 | | /* change everything in-place, starting with this one */ | 474 | 1.25k | start = result_s + (next - self_s); | 475 | 1.25k | *start = to_c; | 476 | 1.25k | start++; | 477 | 1.25k | end = result_s + self_len; | 478 | | | 479 | 2.01k | while (--maxcount > 0) { | 480 | 2.01k | next = findchar(start, end - start, from_c); | 481 | 2.01k | if (next == NULL) | 482 | 1.25k | break; | 483 | 760 | *next = to_c; | 484 | 760 | start = next + 1; | 485 | 760 | } | 486 | | | 487 | 1.25k | return result; | 488 | 1.25k | } |
Unexecuted instantiation: bytearrayobject.c:stringlib_replace_single_character_in_place |
489 | | |
490 | | /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */ |
491 | | static PyObject * |
492 | | stringlib_replace_substring_in_place(PyObject *self, |
493 | | const char *from_s, Py_ssize_t from_len, |
494 | | const char *to_s, Py_ssize_t to_len, |
495 | | Py_ssize_t maxcount) |
496 | 0 | { |
497 | 0 | const char *self_s, *end; |
498 | 0 | char *result_s, *start; |
499 | 0 | Py_ssize_t self_len, offset; |
500 | 0 | PyObject *result; |
501 | | |
502 | | /* The result bytes will be the same size */ |
503 | |
|
504 | 0 | self_s = STRINGLIB_STR(self); |
505 | 0 | self_len = STRINGLIB_LEN(self); |
506 | |
|
507 | 0 | offset = stringlib_find(self_s, self_len, |
508 | 0 | from_s, from_len, |
509 | 0 | 0); |
510 | 0 | if (offset == -1) { |
511 | | /* No matches; return the original bytes */ |
512 | 0 | return return_self(self); |
513 | 0 | } |
514 | | |
515 | | /* Need to make a new bytes */ |
516 | 0 | result = STRINGLIB_NEW(NULL, self_len); |
517 | 0 | if (result == NULL) { |
518 | 0 | return NULL; |
519 | 0 | } |
520 | 0 | result_s = STRINGLIB_STR(result); |
521 | 0 | memcpy(result_s, self_s, self_len); |
522 | | |
523 | | /* change everything in-place, starting with this one */ |
524 | 0 | start = result_s + offset; |
525 | 0 | memcpy(start, to_s, from_len); |
526 | 0 | start += from_len; |
527 | 0 | end = result_s + self_len; |
528 | |
|
529 | 0 | while ( --maxcount > 0) { |
530 | 0 | offset = stringlib_find(start, end - start, |
531 | 0 | from_s, from_len, |
532 | 0 | 0); |
533 | 0 | if (offset == -1) |
534 | 0 | break; |
535 | 0 | memcpy(start + offset, to_s, from_len); |
536 | 0 | start += offset + from_len; |
537 | 0 | } |
538 | |
|
539 | 0 | return result; |
540 | 0 | } Unexecuted instantiation: bytesobject.c:stringlib_replace_substring_in_place Unexecuted instantiation: bytearrayobject.c:stringlib_replace_substring_in_place |
541 | | |
542 | | /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */ |
543 | | static PyObject * |
544 | | stringlib_replace_single_character(PyObject *self, |
545 | | char from_c, |
546 | | const char *to_s, Py_ssize_t to_len, |
547 | | Py_ssize_t maxcount) |
548 | 0 | { |
549 | 0 | const char *self_s, *start, *next, *end; |
550 | 0 | char *result_s; |
551 | 0 | Py_ssize_t self_len, result_len; |
552 | 0 | Py_ssize_t count; |
553 | 0 | PyObject *result; |
554 | |
|
555 | 0 | self_s = STRINGLIB_STR(self); |
556 | 0 | self_len = STRINGLIB_LEN(self); |
557 | |
|
558 | 0 | count = countchar(self_s, self_len, from_c, maxcount); |
559 | 0 | if (count == 0) { |
560 | | /* no matches, return unchanged */ |
561 | 0 | return return_self(self); |
562 | 0 | } |
563 | | |
564 | | /* use the difference between current and new, hence the "-1" */ |
565 | | /* result_len = self_len + count * (to_len-1) */ |
566 | 0 | assert(count > 0); |
567 | 0 | if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) { |
568 | 0 | PyErr_SetString(PyExc_OverflowError, "replace bytes is too long"); |
569 | 0 | return NULL; |
570 | 0 | } |
571 | 0 | result_len = self_len + count * (to_len - 1); |
572 | |
|
573 | 0 | result = STRINGLIB_NEW(NULL, result_len); |
574 | 0 | if (result == NULL) { |
575 | 0 | return NULL; |
576 | 0 | } |
577 | 0 | result_s = STRINGLIB_STR(result); |
578 | |
|
579 | 0 | start = self_s; |
580 | 0 | end = self_s + self_len; |
581 | 0 | while (count-- > 0) { |
582 | 0 | next = findchar(start, end - start, from_c); |
583 | 0 | if (next == NULL) |
584 | 0 | break; |
585 | | |
586 | 0 | if (next == start) { |
587 | | /* replace with the 'to' */ |
588 | 0 | memcpy(result_s, to_s, to_len); |
589 | 0 | result_s += to_len; |
590 | 0 | start += 1; |
591 | 0 | } else { |
592 | | /* copy the unchanged old then the 'to' */ |
593 | 0 | memcpy(result_s, start, next - start); |
594 | 0 | result_s += (next - start); |
595 | 0 | memcpy(result_s, to_s, to_len); |
596 | 0 | result_s += to_len; |
597 | 0 | start = next + 1; |
598 | 0 | } |
599 | 0 | } |
600 | | /* Copy the remainder of the remaining bytes */ |
601 | 0 | memcpy(result_s, start, end - start); |
602 | |
|
603 | 0 | return result; |
604 | 0 | } Unexecuted instantiation: bytesobject.c:stringlib_replace_single_character Unexecuted instantiation: bytearrayobject.c:stringlib_replace_single_character |
605 | | |
606 | | /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */ |
607 | | static PyObject * |
608 | | stringlib_replace_substring(PyObject *self, |
609 | | const char *from_s, Py_ssize_t from_len, |
610 | | const char *to_s, Py_ssize_t to_len, |
611 | | Py_ssize_t maxcount) |
612 | 0 | { |
613 | 0 | const char *self_s, *start, *next, *end; |
614 | 0 | char *result_s; |
615 | 0 | Py_ssize_t self_len, result_len; |
616 | 0 | Py_ssize_t count, offset; |
617 | 0 | PyObject *result; |
618 | |
|
619 | 0 | self_s = STRINGLIB_STR(self); |
620 | 0 | self_len = STRINGLIB_LEN(self); |
621 | |
|
622 | 0 | count = stringlib_count(self_s, self_len, |
623 | 0 | from_s, from_len, |
624 | 0 | maxcount); |
625 | |
|
626 | 0 | if (count == 0) { |
627 | | /* no matches, return unchanged */ |
628 | 0 | return return_self(self); |
629 | 0 | } |
630 | | |
631 | | /* Check for overflow */ |
632 | | /* result_len = self_len + count * (to_len-from_len) */ |
633 | 0 | assert(count > 0); |
634 | 0 | if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) { |
635 | 0 | PyErr_SetString(PyExc_OverflowError, "replace bytes is too long"); |
636 | 0 | return NULL; |
637 | 0 | } |
638 | 0 | result_len = self_len + count * (to_len - from_len); |
639 | |
|
640 | 0 | result = STRINGLIB_NEW(NULL, result_len); |
641 | 0 | if (result == NULL) { |
642 | 0 | return NULL; |
643 | 0 | } |
644 | 0 | result_s = STRINGLIB_STR(result); |
645 | |
|
646 | 0 | start = self_s; |
647 | 0 | end = self_s + self_len; |
648 | 0 | while (count-- > 0) { |
649 | 0 | offset = stringlib_find(start, end - start, |
650 | 0 | from_s, from_len, |
651 | 0 | 0); |
652 | 0 | if (offset == -1) |
653 | 0 | break; |
654 | 0 | next = start + offset; |
655 | 0 | if (next == start) { |
656 | | /* replace with the 'to' */ |
657 | 0 | memcpy(result_s, to_s, to_len); |
658 | 0 | result_s += to_len; |
659 | 0 | start += from_len; |
660 | 0 | } else { |
661 | | /* copy the unchanged old then the 'to' */ |
662 | 0 | memcpy(result_s, start, next - start); |
663 | 0 | result_s += (next - start); |
664 | 0 | memcpy(result_s, to_s, to_len); |
665 | 0 | result_s += to_len; |
666 | 0 | start = next + from_len; |
667 | 0 | } |
668 | 0 | } |
669 | | /* Copy the remainder of the remaining bytes */ |
670 | 0 | memcpy(result_s, start, end - start); |
671 | |
|
672 | 0 | return result; |
673 | 0 | } Unexecuted instantiation: bytesobject.c:stringlib_replace_substring Unexecuted instantiation: bytearrayobject.c:stringlib_replace_substring |
674 | | |
675 | | |
676 | | static PyObject * |
677 | | stringlib_replace(PyObject *self, |
678 | | const char *from_s, Py_ssize_t from_len, |
679 | | const char *to_s, Py_ssize_t to_len, |
680 | | Py_ssize_t maxcount) |
681 | 28.1k | { |
682 | 28.1k | if (STRINGLIB_LEN(self) < from_len) { |
683 | | /* nothing to do; return the original bytes */ |
684 | 271 | return return_self(self); |
685 | 271 | } |
686 | 27.9k | if (maxcount < 0) { |
687 | 27.9k | maxcount = PY_SSIZE_T_MAX; |
688 | 27.9k | } else if (maxcount == 0) { |
689 | | /* nothing to do; return the original bytes */ |
690 | 0 | return return_self(self); |
691 | 0 | } |
692 | | |
693 | | /* Handle zero-length special cases */ |
694 | 27.9k | if (from_len == 0) { |
695 | 0 | if (to_len == 0) { |
696 | | /* nothing to do; return the original bytes */ |
697 | 0 | return return_self(self); |
698 | 0 | } |
699 | | /* insert the 'to' bytes everywhere. */ |
700 | | /* >>> b"Python".replace(b"", b".") */ |
701 | | /* b'.P.y.t.h.o.n.' */ |
702 | 0 | return stringlib_replace_interleave(self, to_s, to_len, maxcount); |
703 | 0 | } |
704 | | |
705 | 27.9k | if (to_len == 0) { |
706 | | /* delete all occurrences of 'from' bytes */ |
707 | 0 | if (from_len == 1) { |
708 | 0 | return stringlib_replace_delete_single_character( |
709 | 0 | self, from_s[0], maxcount); |
710 | 0 | } else { |
711 | 0 | return stringlib_replace_delete_substring( |
712 | 0 | self, from_s, from_len, maxcount); |
713 | 0 | } |
714 | 0 | } |
715 | | |
716 | | /* Handle special case where both bytes have the same length */ |
717 | | |
718 | 27.9k | if (from_len == to_len) { |
719 | 27.9k | if (from_len == 1) { |
720 | 27.9k | return stringlib_replace_single_character_in_place( |
721 | 27.9k | self, from_s[0], to_s[0], maxcount); |
722 | 27.9k | } else { |
723 | 0 | return stringlib_replace_substring_in_place( |
724 | 0 | self, from_s, from_len, to_s, to_len, maxcount); |
725 | 0 | } |
726 | 27.9k | } |
727 | | |
728 | | /* Otherwise use the more generic algorithms */ |
729 | 0 | if (from_len == 1) { |
730 | 0 | return stringlib_replace_single_character( |
731 | 0 | self, from_s[0], to_s, to_len, maxcount); |
732 | 0 | } else { |
733 | | /* len('from')>=2, len('to')>=1 */ |
734 | 0 | return stringlib_replace_substring( |
735 | 0 | self, from_s, from_len, to_s, to_len, maxcount); |
736 | 0 | } |
737 | 0 | } bytesobject.c:stringlib_replace Line | Count | Source | 681 | 28.1k | { | 682 | 28.1k | if (STRINGLIB_LEN(self) < from_len) { | 683 | | /* nothing to do; return the original bytes */ | 684 | 271 | return return_self(self); | 685 | 271 | } | 686 | 27.9k | if (maxcount < 0) { | 687 | 27.9k | maxcount = PY_SSIZE_T_MAX; | 688 | 27.9k | } else if (maxcount == 0) { | 689 | | /* nothing to do; return the original bytes */ | 690 | 0 | return return_self(self); | 691 | 0 | } | 692 | | | 693 | | /* Handle zero-length special cases */ | 694 | 27.9k | if (from_len == 0) { | 695 | 0 | if (to_len == 0) { | 696 | | /* nothing to do; return the original bytes */ | 697 | 0 | return return_self(self); | 698 | 0 | } | 699 | | /* insert the 'to' bytes everywhere. */ | 700 | | /* >>> b"Python".replace(b"", b".") */ | 701 | | /* b'.P.y.t.h.o.n.' */ | 702 | 0 | return stringlib_replace_interleave(self, to_s, to_len, maxcount); | 703 | 0 | } | 704 | | | 705 | 27.9k | if (to_len == 0) { | 706 | | /* delete all occurrences of 'from' bytes */ | 707 | 0 | if (from_len == 1) { | 708 | 0 | return stringlib_replace_delete_single_character( | 709 | 0 | self, from_s[0], maxcount); | 710 | 0 | } else { | 711 | 0 | return stringlib_replace_delete_substring( | 712 | 0 | self, from_s, from_len, maxcount); | 713 | 0 | } | 714 | 0 | } | 715 | | | 716 | | /* Handle special case where both bytes have the same length */ | 717 | | | 718 | 27.9k | if (from_len == to_len) { | 719 | 27.9k | if (from_len == 1) { | 720 | 27.9k | return stringlib_replace_single_character_in_place( | 721 | 27.9k | self, from_s[0], to_s[0], maxcount); | 722 | 27.9k | } else { | 723 | 0 | return stringlib_replace_substring_in_place( | 724 | 0 | self, from_s, from_len, to_s, to_len, maxcount); | 725 | 0 | } | 726 | 27.9k | } | 727 | | | 728 | | /* Otherwise use the more generic algorithms */ | 729 | 0 | if (from_len == 1) { | 730 | 0 | return stringlib_replace_single_character( | 731 | 0 | self, from_s[0], to_s, to_len, maxcount); | 732 | 0 | } else { | 733 | | /* len('from')>=2, len('to')>=1 */ | 734 | 0 | return stringlib_replace_substring( | 735 | 0 | self, from_s, from_len, to_s, to_len, maxcount); | 736 | 0 | } | 737 | 0 | } |
Unexecuted instantiation: bytearrayobject.c:stringlib_replace |
738 | | |
739 | | #undef findchar |