/src/Python-3.8.3/Objects/stringlib/transmogrify.h
Line | Count | Source (jump to first uncovered line) |
1 | | #if STRINGLIB_IS_UNICODE |
2 | | # error "transmogrify.h only compatible with byte-wise strings" |
3 | | #endif |
4 | | |
5 | | /* the more complicated methods. parts of these should be pulled out into the |
6 | | shared code in bytes_methods.c to cut down on duplicate code bloat. */ |
7 | | |
8 | | /*[clinic input] |
9 | | class B "PyObject *" "&PyType_Type" |
10 | | [clinic start generated code]*/ |
11 | | /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2935558188d97c76]*/ |
12 | | |
13 | | #include "clinic/transmogrify.h.h" |
14 | | |
15 | | static inline PyObject * |
16 | | return_self(PyObject *self) |
17 | 0 | { |
18 | | #if !STRINGLIB_MUTABLE |
19 | 0 | if (STRINGLIB_CHECK_EXACT(self)) { |
20 | 0 | Py_INCREF(self); |
21 | 0 | return self; |
22 | 0 | } |
23 | 0 | #endif |
24 | 0 | return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); |
25 | 0 | } Unexecuted instantiation: bytearrayobject.c:return_self Unexecuted instantiation: bytesobject.c:return_self |
26 | | |
27 | | /*[clinic input] |
28 | | B.expandtabs as stringlib_expandtabs |
29 | | |
30 | | tabsize: int = 8 |
31 | | |
32 | | Return a copy where all tab characters are expanded using spaces. |
33 | | |
34 | | If tabsize is not given, a tab size of 8 characters is assumed. |
35 | | [clinic start generated code]*/ |
36 | | |
37 | | static PyObject * |
38 | | stringlib_expandtabs_impl(PyObject *self, int tabsize) |
39 | | /*[clinic end generated code: output=069cb7fae72e4c2b input=3c6d3b12aa3ccbea]*/ |
40 | 0 | { |
41 | 0 | const char *e, *p; |
42 | 0 | char *q; |
43 | 0 | Py_ssize_t i, j; |
44 | 0 | PyObject *u; |
45 | | |
46 | | /* First pass: determine size of output string */ |
47 | 0 | i = j = 0; |
48 | 0 | e = STRINGLIB_STR(self) + STRINGLIB_LEN(self); |
49 | 0 | for (p = STRINGLIB_STR(self); p < e; p++) { |
50 | 0 | if (*p == '\t') { |
51 | 0 | if (tabsize > 0) { |
52 | 0 | Py_ssize_t incr = tabsize - (j % tabsize); |
53 | 0 | if (j > PY_SSIZE_T_MAX - incr) |
54 | 0 | goto overflow; |
55 | 0 | j += incr; |
56 | 0 | } |
57 | 0 | } |
58 | 0 | else { |
59 | 0 | if (j > PY_SSIZE_T_MAX - 1) |
60 | 0 | goto overflow; |
61 | 0 | j++; |
62 | 0 | if (*p == '\n' || *p == '\r') { |
63 | 0 | if (i > PY_SSIZE_T_MAX - j) |
64 | 0 | goto overflow; |
65 | 0 | i += j; |
66 | 0 | j = 0; |
67 | 0 | } |
68 | 0 | } |
69 | 0 | } |
70 | | |
71 | 0 | if (i > PY_SSIZE_T_MAX - j) |
72 | 0 | goto overflow; |
73 | | |
74 | | /* Second pass: create output string and fill it */ |
75 | 0 | u = STRINGLIB_NEW(NULL, i + j); |
76 | 0 | if (!u) |
77 | 0 | return NULL; |
78 | | |
79 | 0 | j = 0; |
80 | 0 | q = STRINGLIB_STR(u); |
81 | |
|
82 | 0 | for (p = STRINGLIB_STR(self); p < e; p++) { |
83 | 0 | if (*p == '\t') { |
84 | 0 | if (tabsize > 0) { |
85 | 0 | i = tabsize - (j % tabsize); |
86 | 0 | j += i; |
87 | 0 | while (i--) |
88 | 0 | *q++ = ' '; |
89 | 0 | } |
90 | 0 | } |
91 | 0 | else { |
92 | 0 | j++; |
93 | 0 | *q++ = *p; |
94 | 0 | if (*p == '\n' || *p == '\r') |
95 | 0 | j = 0; |
96 | 0 | } |
97 | 0 | } |
98 | |
|
99 | 0 | return u; |
100 | 0 | overflow: |
101 | 0 | PyErr_SetString(PyExc_OverflowError, "result too long"); |
102 | 0 | return NULL; |
103 | 0 | } Unexecuted instantiation: bytearrayobject.c:stringlib_expandtabs_impl Unexecuted instantiation: bytesobject.c:stringlib_expandtabs_impl |
104 | | |
105 | | static inline PyObject * |
106 | | pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill) |
107 | 0 | { |
108 | 0 | PyObject *u; |
109 | |
|
110 | 0 | if (left < 0) |
111 | 0 | left = 0; |
112 | 0 | if (right < 0) |
113 | 0 | right = 0; |
114 | |
|
115 | 0 | if (left == 0 && right == 0) { |
116 | 0 | return return_self(self); |
117 | 0 | } |
118 | | |
119 | 0 | u = STRINGLIB_NEW(NULL, left + STRINGLIB_LEN(self) + right); |
120 | 0 | if (u) { |
121 | 0 | if (left) |
122 | 0 | memset(STRINGLIB_STR(u), fill, left); |
123 | 0 | memcpy(STRINGLIB_STR(u) + left, |
124 | 0 | STRINGLIB_STR(self), |
125 | 0 | STRINGLIB_LEN(self)); |
126 | 0 | if (right) |
127 | 0 | memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self), |
128 | 0 | fill, right); |
129 | 0 | } |
130 | |
|
131 | 0 | return u; |
132 | 0 | } Unexecuted instantiation: bytearrayobject.c:pad Unexecuted instantiation: bytesobject.c:pad |
133 | | |
134 | | /*[clinic input] |
135 | | B.ljust as stringlib_ljust |
136 | | |
137 | | width: Py_ssize_t |
138 | | fillchar: char = b' ' |
139 | | / |
140 | | |
141 | | Return a left-justified string of length width. |
142 | | |
143 | | Padding is done using the specified fill character. |
144 | | [clinic start generated code]*/ |
145 | | |
146 | | static PyObject * |
147 | | stringlib_ljust_impl(PyObject *self, Py_ssize_t width, char fillchar) |
148 | | /*[clinic end generated code: output=c79ca173c5ff8337 input=eff2d014bc7d80df]*/ |
149 | 0 | { |
150 | 0 | if (STRINGLIB_LEN(self) >= width) { |
151 | 0 | return return_self(self); |
152 | 0 | } |
153 | | |
154 | 0 | return pad(self, 0, width - STRINGLIB_LEN(self), fillchar); |
155 | 0 | } Unexecuted instantiation: bytearrayobject.c:stringlib_ljust_impl Unexecuted instantiation: bytesobject.c:stringlib_ljust_impl |
156 | | |
157 | | |
158 | | /*[clinic input] |
159 | | B.rjust as stringlib_rjust |
160 | | |
161 | | width: Py_ssize_t |
162 | | fillchar: char = b' ' |
163 | | / |
164 | | |
165 | | Return a right-justified string of length width. |
166 | | |
167 | | Padding is done using the specified fill character. |
168 | | [clinic start generated code]*/ |
169 | | |
170 | | static PyObject * |
171 | | stringlib_rjust_impl(PyObject *self, Py_ssize_t width, char fillchar) |
172 | | /*[clinic end generated code: output=7df5d728a5439570 input=218b0bd31308955d]*/ |
173 | 0 | { |
174 | 0 | if (STRINGLIB_LEN(self) >= width) { |
175 | 0 | return return_self(self); |
176 | 0 | } |
177 | | |
178 | 0 | return pad(self, width - STRINGLIB_LEN(self), 0, fillchar); |
179 | 0 | } Unexecuted instantiation: bytearrayobject.c:stringlib_rjust_impl Unexecuted instantiation: bytesobject.c:stringlib_rjust_impl |
180 | | |
181 | | |
182 | | /*[clinic input] |
183 | | B.center as stringlib_center |
184 | | |
185 | | width: Py_ssize_t |
186 | | fillchar: char = b' ' |
187 | | / |
188 | | |
189 | | Return a centered string of length width. |
190 | | |
191 | | Padding is done using the specified fill character. |
192 | | [clinic start generated code]*/ |
193 | | |
194 | | static PyObject * |
195 | | stringlib_center_impl(PyObject *self, Py_ssize_t width, char fillchar) |
196 | | /*[clinic end generated code: output=d8da2e055288b4c2 input=3776fd278765d89b]*/ |
197 | 0 | { |
198 | 0 | Py_ssize_t marg, left; |
199 | |
|
200 | 0 | if (STRINGLIB_LEN(self) >= width) { |
201 | 0 | return return_self(self); |
202 | 0 | } |
203 | | |
204 | 0 | marg = width - STRINGLIB_LEN(self); |
205 | 0 | left = marg / 2 + (marg & width & 1); |
206 | |
|
207 | 0 | return pad(self, left, marg - left, fillchar); |
208 | 0 | } Unexecuted instantiation: bytearrayobject.c:stringlib_center_impl Unexecuted instantiation: bytesobject.c:stringlib_center_impl |
209 | | |
210 | | /*[clinic input] |
211 | | B.zfill as stringlib_zfill |
212 | | |
213 | | width: Py_ssize_t |
214 | | / |
215 | | |
216 | | Pad a numeric string with zeros on the left, to fill a field of the given width. |
217 | | |
218 | | The original string is never truncated. |
219 | | [clinic start generated code]*/ |
220 | | |
221 | | static PyObject * |
222 | | stringlib_zfill_impl(PyObject *self, Py_ssize_t width) |
223 | | /*[clinic end generated code: output=0b3c684a7f1b2319 input=2da6d7b8e9bcb19a]*/ |
224 | 0 | { |
225 | 0 | Py_ssize_t fill; |
226 | 0 | PyObject *s; |
227 | 0 | char *p; |
228 | |
|
229 | 0 | if (STRINGLIB_LEN(self) >= width) { |
230 | 0 | return return_self(self); |
231 | 0 | } |
232 | | |
233 | 0 | fill = width - STRINGLIB_LEN(self); |
234 | |
|
235 | 0 | s = pad(self, fill, 0, '0'); |
236 | |
|
237 | 0 | if (s == NULL) |
238 | 0 | return NULL; |
239 | | |
240 | 0 | p = STRINGLIB_STR(s); |
241 | 0 | if (p[fill] == '+' || p[fill] == '-') { |
242 | | /* move sign to beginning of string */ |
243 | 0 | p[0] = p[fill]; |
244 | 0 | p[fill] = '0'; |
245 | 0 | } |
246 | |
|
247 | 0 | return s; |
248 | 0 | } Unexecuted instantiation: bytearrayobject.c:stringlib_zfill_impl Unexecuted instantiation: bytesobject.c:stringlib_zfill_impl |
249 | | |
250 | | |
251 | | /* find and count characters and substrings */ |
252 | | |
253 | | #define findchar(target, target_len, c) \ |
254 | 0 | ((char *)memchr((const void *)(target), c, target_len)) |
255 | | |
256 | | |
257 | | static Py_ssize_t |
258 | | countchar(const char *target, Py_ssize_t target_len, char c, |
259 | | Py_ssize_t maxcount) |
260 | 0 | { |
261 | 0 | Py_ssize_t count = 0; |
262 | 0 | const char *start = target; |
263 | 0 | const char *end = target + target_len; |
264 | |
|
265 | 0 | while ((start = findchar(start, end - start, c)) != NULL) { |
266 | 0 | count++; |
267 | 0 | if (count >= maxcount) |
268 | 0 | break; |
269 | 0 | start += 1; |
270 | 0 | } |
271 | 0 | return count; |
272 | 0 | } Unexecuted instantiation: bytearrayobject.c:countchar Unexecuted instantiation: bytesobject.c:countchar |
273 | | |
274 | | |
275 | | /* Algorithms for different cases of string replacement */ |
276 | | |
277 | | /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */ |
278 | | static PyObject * |
279 | | stringlib_replace_interleave(PyObject *self, |
280 | | const char *to_s, Py_ssize_t to_len, |
281 | | Py_ssize_t maxcount) |
282 | 0 | { |
283 | 0 | const char *self_s; |
284 | 0 | char *result_s; |
285 | 0 | Py_ssize_t self_len, result_len; |
286 | 0 | Py_ssize_t count, i; |
287 | 0 | PyObject *result; |
288 | |
|
289 | 0 | self_len = STRINGLIB_LEN(self); |
290 | | |
291 | | /* 1 at the end plus 1 after every character; |
292 | | count = min(maxcount, self_len + 1) */ |
293 | 0 | if (maxcount <= self_len) { |
294 | 0 | count = maxcount; |
295 | 0 | } |
296 | 0 | else { |
297 | | /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */ |
298 | 0 | count = self_len + 1; |
299 | 0 | } |
300 | | |
301 | | /* Check for overflow */ |
302 | | /* result_len = count * to_len + self_len; */ |
303 | 0 | assert(count > 0); |
304 | 0 | if (to_len > (PY_SSIZE_T_MAX - self_len) / count) { |
305 | 0 | PyErr_SetString(PyExc_OverflowError, |
306 | 0 | "replace bytes is too long"); |
307 | 0 | return NULL; |
308 | 0 | } |
309 | 0 | result_len = count * to_len + self_len; |
310 | 0 | result = STRINGLIB_NEW(NULL, result_len); |
311 | 0 | if (result == NULL) { |
312 | 0 | return NULL; |
313 | 0 | } |
314 | | |
315 | 0 | self_s = STRINGLIB_STR(self); |
316 | 0 | result_s = STRINGLIB_STR(result); |
317 | |
|
318 | 0 | if (to_len > 1) { |
319 | | /* Lay the first one down (guaranteed this will occur) */ |
320 | 0 | memcpy(result_s, to_s, to_len); |
321 | 0 | result_s += to_len; |
322 | 0 | count -= 1; |
323 | |
|
324 | 0 | for (i = 0; i < count; i++) { |
325 | 0 | *result_s++ = *self_s++; |
326 | 0 | memcpy(result_s, to_s, to_len); |
327 | 0 | result_s += to_len; |
328 | 0 | } |
329 | 0 | } |
330 | 0 | else { |
331 | 0 | result_s[0] = to_s[0]; |
332 | 0 | result_s += to_len; |
333 | 0 | count -= 1; |
334 | 0 | for (i = 0; i < count; i++) { |
335 | 0 | *result_s++ = *self_s++; |
336 | 0 | result_s[0] = to_s[0]; |
337 | 0 | result_s += to_len; |
338 | 0 | } |
339 | 0 | } |
340 | | |
341 | | /* Copy the rest of the original string */ |
342 | 0 | memcpy(result_s, self_s, self_len - i); |
343 | |
|
344 | 0 | return result; |
345 | 0 | } Unexecuted instantiation: bytearrayobject.c:stringlib_replace_interleave Unexecuted instantiation: bytesobject.c:stringlib_replace_interleave |
346 | | |
347 | | /* Special case for deleting a single character */ |
348 | | /* len(self)>=1, len(from)==1, to="", maxcount>=1 */ |
349 | | static PyObject * |
350 | | stringlib_replace_delete_single_character(PyObject *self, |
351 | | char from_c, Py_ssize_t maxcount) |
352 | 0 | { |
353 | 0 | const char *self_s, *start, *next, *end; |
354 | 0 | char *result_s; |
355 | 0 | Py_ssize_t self_len, result_len; |
356 | 0 | Py_ssize_t count; |
357 | 0 | PyObject *result; |
358 | |
|
359 | 0 | self_len = STRINGLIB_LEN(self); |
360 | 0 | self_s = STRINGLIB_STR(self); |
361 | |
|
362 | 0 | count = countchar(self_s, self_len, from_c, maxcount); |
363 | 0 | if (count == 0) { |
364 | 0 | return return_self(self); |
365 | 0 | } |
366 | | |
367 | 0 | result_len = self_len - count; /* from_len == 1 */ |
368 | 0 | assert(result_len>=0); |
369 | |
|
370 | 0 | result = STRINGLIB_NEW(NULL, result_len); |
371 | 0 | if (result == NULL) { |
372 | 0 | return NULL; |
373 | 0 | } |
374 | 0 | result_s = STRINGLIB_STR(result); |
375 | |
|
376 | 0 | start = self_s; |
377 | 0 | end = self_s + self_len; |
378 | 0 | while (count-- > 0) { |
379 | 0 | next = findchar(start, end - start, from_c); |
380 | 0 | if (next == NULL) |
381 | 0 | break; |
382 | 0 | memcpy(result_s, start, next - start); |
383 | 0 | result_s += (next - start); |
384 | 0 | start = next + 1; |
385 | 0 | } |
386 | 0 | memcpy(result_s, start, end - start); |
387 | |
|
388 | 0 | return result; |
389 | 0 | } Unexecuted instantiation: bytearrayobject.c:stringlib_replace_delete_single_character Unexecuted instantiation: bytesobject.c:stringlib_replace_delete_single_character |
390 | | |
391 | | /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */ |
392 | | |
393 | | static PyObject * |
394 | | stringlib_replace_delete_substring(PyObject *self, |
395 | | const char *from_s, Py_ssize_t from_len, |
396 | | Py_ssize_t maxcount) |
397 | 0 | { |
398 | 0 | const char *self_s, *start, *next, *end; |
399 | 0 | char *result_s; |
400 | 0 | Py_ssize_t self_len, result_len; |
401 | 0 | Py_ssize_t count, offset; |
402 | 0 | PyObject *result; |
403 | |
|
404 | 0 | self_len = STRINGLIB_LEN(self); |
405 | 0 | self_s = STRINGLIB_STR(self); |
406 | |
|
407 | 0 | count = stringlib_count(self_s, self_len, |
408 | 0 | from_s, from_len, |
409 | 0 | maxcount); |
410 | |
|
411 | 0 | if (count == 0) { |
412 | | /* no matches */ |
413 | 0 | return return_self(self); |
414 | 0 | } |
415 | | |
416 | 0 | result_len = self_len - (count * from_len); |
417 | 0 | assert (result_len>=0); |
418 | |
|
419 | 0 | result = STRINGLIB_NEW(NULL, result_len); |
420 | 0 | if (result == NULL) { |
421 | 0 | return NULL; |
422 | 0 | } |
423 | 0 | result_s = STRINGLIB_STR(result); |
424 | |
|
425 | 0 | start = self_s; |
426 | 0 | end = self_s + self_len; |
427 | 0 | while (count-- > 0) { |
428 | 0 | offset = stringlib_find(start, end - start, |
429 | 0 | from_s, from_len, |
430 | 0 | 0); |
431 | 0 | if (offset == -1) |
432 | 0 | break; |
433 | 0 | next = start + offset; |
434 | |
|
435 | 0 | memcpy(result_s, start, next - start); |
436 | |
|
437 | 0 | result_s += (next - start); |
438 | 0 | start = next + from_len; |
439 | 0 | } |
440 | 0 | memcpy(result_s, start, end - start); |
441 | 0 | return result; |
442 | 0 | } Unexecuted instantiation: bytearrayobject.c:stringlib_replace_delete_substring Unexecuted instantiation: bytesobject.c:stringlib_replace_delete_substring |
443 | | |
444 | | /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */ |
445 | | static PyObject * |
446 | | stringlib_replace_single_character_in_place(PyObject *self, |
447 | | char from_c, char to_c, |
448 | | Py_ssize_t maxcount) |
449 | 0 | { |
450 | 0 | const char *self_s, *end; |
451 | 0 | char *result_s, *start, *next; |
452 | 0 | Py_ssize_t self_len; |
453 | 0 | PyObject *result; |
454 | | |
455 | | /* The result string will be the same size */ |
456 | 0 | self_s = STRINGLIB_STR(self); |
457 | 0 | self_len = STRINGLIB_LEN(self); |
458 | |
|
459 | 0 | next = findchar(self_s, self_len, from_c); |
460 | |
|
461 | 0 | if (next == NULL) { |
462 | | /* No matches; return the original bytes */ |
463 | 0 | return return_self(self); |
464 | 0 | } |
465 | | |
466 | | /* Need to make a new bytes */ |
467 | 0 | result = STRINGLIB_NEW(NULL, self_len); |
468 | 0 | if (result == NULL) { |
469 | 0 | return NULL; |
470 | 0 | } |
471 | 0 | result_s = STRINGLIB_STR(result); |
472 | 0 | memcpy(result_s, self_s, self_len); |
473 | | |
474 | | /* change everything in-place, starting with this one */ |
475 | 0 | start = result_s + (next - self_s); |
476 | 0 | *start = to_c; |
477 | 0 | start++; |
478 | 0 | end = result_s + self_len; |
479 | |
|
480 | 0 | while (--maxcount > 0) { |
481 | 0 | next = findchar(start, end - start, from_c); |
482 | 0 | if (next == NULL) |
483 | 0 | break; |
484 | 0 | *next = to_c; |
485 | 0 | start = next + 1; |
486 | 0 | } |
487 | |
|
488 | 0 | return result; |
489 | 0 | } Unexecuted instantiation: bytearrayobject.c:stringlib_replace_single_character_in_place Unexecuted instantiation: bytesobject.c:stringlib_replace_single_character_in_place |
490 | | |
491 | | /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */ |
492 | | static PyObject * |
493 | | stringlib_replace_substring_in_place(PyObject *self, |
494 | | const char *from_s, Py_ssize_t from_len, |
495 | | const char *to_s, Py_ssize_t to_len, |
496 | | Py_ssize_t maxcount) |
497 | 0 | { |
498 | 0 | const char *self_s, *end; |
499 | 0 | char *result_s, *start; |
500 | 0 | Py_ssize_t self_len, offset; |
501 | 0 | PyObject *result; |
502 | | |
503 | | /* The result bytes will be the same size */ |
504 | |
|
505 | 0 | self_s = STRINGLIB_STR(self); |
506 | 0 | self_len = STRINGLIB_LEN(self); |
507 | |
|
508 | 0 | offset = stringlib_find(self_s, self_len, |
509 | 0 | from_s, from_len, |
510 | 0 | 0); |
511 | 0 | if (offset == -1) { |
512 | | /* No matches; return the original bytes */ |
513 | 0 | return return_self(self); |
514 | 0 | } |
515 | | |
516 | | /* Need to make a new bytes */ |
517 | 0 | result = STRINGLIB_NEW(NULL, self_len); |
518 | 0 | if (result == NULL) { |
519 | 0 | return NULL; |
520 | 0 | } |
521 | 0 | result_s = STRINGLIB_STR(result); |
522 | 0 | memcpy(result_s, self_s, self_len); |
523 | | |
524 | | /* change everything in-place, starting with this one */ |
525 | 0 | start = result_s + offset; |
526 | 0 | memcpy(start, to_s, from_len); |
527 | 0 | start += from_len; |
528 | 0 | end = result_s + self_len; |
529 | |
|
530 | 0 | while ( --maxcount > 0) { |
531 | 0 | offset = stringlib_find(start, end - start, |
532 | 0 | from_s, from_len, |
533 | 0 | 0); |
534 | 0 | if (offset == -1) |
535 | 0 | break; |
536 | 0 | memcpy(start + offset, to_s, from_len); |
537 | 0 | start += offset + from_len; |
538 | 0 | } |
539 | |
|
540 | 0 | return result; |
541 | 0 | } Unexecuted instantiation: bytearrayobject.c:stringlib_replace_substring_in_place Unexecuted instantiation: bytesobject.c:stringlib_replace_substring_in_place |
542 | | |
543 | | /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */ |
544 | | static PyObject * |
545 | | stringlib_replace_single_character(PyObject *self, |
546 | | char from_c, |
547 | | const char *to_s, Py_ssize_t to_len, |
548 | | Py_ssize_t maxcount) |
549 | 0 | { |
550 | 0 | const char *self_s, *start, *next, *end; |
551 | 0 | char *result_s; |
552 | 0 | Py_ssize_t self_len, result_len; |
553 | 0 | Py_ssize_t count; |
554 | 0 | PyObject *result; |
555 | |
|
556 | 0 | self_s = STRINGLIB_STR(self); |
557 | 0 | self_len = STRINGLIB_LEN(self); |
558 | |
|
559 | 0 | count = countchar(self_s, self_len, from_c, maxcount); |
560 | 0 | if (count == 0) { |
561 | | /* no matches, return unchanged */ |
562 | 0 | return return_self(self); |
563 | 0 | } |
564 | | |
565 | | /* use the difference between current and new, hence the "-1" */ |
566 | | /* result_len = self_len + count * (to_len-1) */ |
567 | 0 | assert(count > 0); |
568 | 0 | if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) { |
569 | 0 | PyErr_SetString(PyExc_OverflowError, "replace bytes is too long"); |
570 | 0 | return NULL; |
571 | 0 | } |
572 | 0 | result_len = self_len + count * (to_len - 1); |
573 | |
|
574 | 0 | result = STRINGLIB_NEW(NULL, result_len); |
575 | 0 | if (result == NULL) { |
576 | 0 | return NULL; |
577 | 0 | } |
578 | 0 | result_s = STRINGLIB_STR(result); |
579 | |
|
580 | 0 | start = self_s; |
581 | 0 | end = self_s + self_len; |
582 | 0 | while (count-- > 0) { |
583 | 0 | next = findchar(start, end - start, from_c); |
584 | 0 | if (next == NULL) |
585 | 0 | break; |
586 | | |
587 | 0 | if (next == start) { |
588 | | /* replace with the 'to' */ |
589 | 0 | memcpy(result_s, to_s, to_len); |
590 | 0 | result_s += to_len; |
591 | 0 | start += 1; |
592 | 0 | } else { |
593 | | /* copy the unchanged old then the 'to' */ |
594 | 0 | memcpy(result_s, start, next - start); |
595 | 0 | result_s += (next - start); |
596 | 0 | memcpy(result_s, to_s, to_len); |
597 | 0 | result_s += to_len; |
598 | 0 | start = next + 1; |
599 | 0 | } |
600 | 0 | } |
601 | | /* Copy the remainder of the remaining bytes */ |
602 | 0 | memcpy(result_s, start, end - start); |
603 | |
|
604 | 0 | return result; |
605 | 0 | } Unexecuted instantiation: bytearrayobject.c:stringlib_replace_single_character Unexecuted instantiation: bytesobject.c:stringlib_replace_single_character |
606 | | |
607 | | /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */ |
608 | | static PyObject * |
609 | | stringlib_replace_substring(PyObject *self, |
610 | | const char *from_s, Py_ssize_t from_len, |
611 | | const char *to_s, Py_ssize_t to_len, |
612 | | Py_ssize_t maxcount) |
613 | 0 | { |
614 | 0 | const char *self_s, *start, *next, *end; |
615 | 0 | char *result_s; |
616 | 0 | Py_ssize_t self_len, result_len; |
617 | 0 | Py_ssize_t count, offset; |
618 | 0 | PyObject *result; |
619 | |
|
620 | 0 | self_s = STRINGLIB_STR(self); |
621 | 0 | self_len = STRINGLIB_LEN(self); |
622 | |
|
623 | 0 | count = stringlib_count(self_s, self_len, |
624 | 0 | from_s, from_len, |
625 | 0 | maxcount); |
626 | |
|
627 | 0 | if (count == 0) { |
628 | | /* no matches, return unchanged */ |
629 | 0 | return return_self(self); |
630 | 0 | } |
631 | | |
632 | | /* Check for overflow */ |
633 | | /* result_len = self_len + count * (to_len-from_len) */ |
634 | 0 | assert(count > 0); |
635 | 0 | if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) { |
636 | 0 | PyErr_SetString(PyExc_OverflowError, "replace bytes is too long"); |
637 | 0 | return NULL; |
638 | 0 | } |
639 | 0 | result_len = self_len + count * (to_len - from_len); |
640 | |
|
641 | 0 | result = STRINGLIB_NEW(NULL, result_len); |
642 | 0 | if (result == NULL) { |
643 | 0 | return NULL; |
644 | 0 | } |
645 | 0 | result_s = STRINGLIB_STR(result); |
646 | |
|
647 | 0 | start = self_s; |
648 | 0 | end = self_s + self_len; |
649 | 0 | while (count-- > 0) { |
650 | 0 | offset = stringlib_find(start, end - start, |
651 | 0 | from_s, from_len, |
652 | 0 | 0); |
653 | 0 | if (offset == -1) |
654 | 0 | break; |
655 | 0 | next = start + offset; |
656 | 0 | if (next == start) { |
657 | | /* replace with the 'to' */ |
658 | 0 | memcpy(result_s, to_s, to_len); |
659 | 0 | result_s += to_len; |
660 | 0 | start += from_len; |
661 | 0 | } else { |
662 | | /* copy the unchanged old then the 'to' */ |
663 | 0 | memcpy(result_s, start, next - start); |
664 | 0 | result_s += (next - start); |
665 | 0 | memcpy(result_s, to_s, to_len); |
666 | 0 | result_s += to_len; |
667 | 0 | start = next + from_len; |
668 | 0 | } |
669 | 0 | } |
670 | | /* Copy the remainder of the remaining bytes */ |
671 | 0 | memcpy(result_s, start, end - start); |
672 | |
|
673 | 0 | return result; |
674 | 0 | } Unexecuted instantiation: bytearrayobject.c:stringlib_replace_substring Unexecuted instantiation: bytesobject.c:stringlib_replace_substring |
675 | | |
676 | | |
677 | | static PyObject * |
678 | | stringlib_replace(PyObject *self, |
679 | | const char *from_s, Py_ssize_t from_len, |
680 | | const char *to_s, Py_ssize_t to_len, |
681 | | Py_ssize_t maxcount) |
682 | 0 | { |
683 | 0 | if (maxcount < 0) { |
684 | 0 | maxcount = PY_SSIZE_T_MAX; |
685 | 0 | } else if (maxcount == 0 || STRINGLIB_LEN(self) == 0) { |
686 | | /* nothing to do; return the original bytes */ |
687 | 0 | return return_self(self); |
688 | 0 | } |
689 | | |
690 | | /* Handle zero-length special cases */ |
691 | 0 | if (from_len == 0) { |
692 | 0 | if (to_len == 0) { |
693 | | /* nothing to do; return the original bytes */ |
694 | 0 | return return_self(self); |
695 | 0 | } |
696 | | /* insert the 'to' bytes everywhere. */ |
697 | | /* >>> b"Python".replace(b"", b".") */ |
698 | | /* b'.P.y.t.h.o.n.' */ |
699 | 0 | return stringlib_replace_interleave(self, to_s, to_len, maxcount); |
700 | 0 | } |
701 | | |
702 | | /* Except for b"".replace(b"", b"A") == b"A" there is no way beyond this */ |
703 | | /* point for an empty self bytes to generate a non-empty bytes */ |
704 | | /* Special case so the remaining code always gets a non-empty bytes */ |
705 | 0 | if (STRINGLIB_LEN(self) == 0) { |
706 | 0 | return return_self(self); |
707 | 0 | } |
708 | | |
709 | 0 | if (to_len == 0) { |
710 | | /* delete all occurrences of 'from' bytes */ |
711 | 0 | if (from_len == 1) { |
712 | 0 | return stringlib_replace_delete_single_character( |
713 | 0 | self, from_s[0], maxcount); |
714 | 0 | } else { |
715 | 0 | return stringlib_replace_delete_substring( |
716 | 0 | self, from_s, from_len, maxcount); |
717 | 0 | } |
718 | 0 | } |
719 | | |
720 | | /* Handle special case where both bytes have the same length */ |
721 | | |
722 | 0 | if (from_len == to_len) { |
723 | 0 | if (from_len == 1) { |
724 | 0 | return stringlib_replace_single_character_in_place( |
725 | 0 | self, from_s[0], to_s[0], maxcount); |
726 | 0 | } else { |
727 | 0 | return stringlib_replace_substring_in_place( |
728 | 0 | self, from_s, from_len, to_s, to_len, maxcount); |
729 | 0 | } |
730 | 0 | } |
731 | | |
732 | | /* Otherwise use the more generic algorithms */ |
733 | 0 | if (from_len == 1) { |
734 | 0 | return stringlib_replace_single_character( |
735 | 0 | self, from_s[0], to_s, to_len, maxcount); |
736 | 0 | } else { |
737 | | /* len('from')>=2, len('to')>=1 */ |
738 | 0 | return stringlib_replace_substring( |
739 | 0 | self, from_s, from_len, to_s, to_len, maxcount); |
740 | 0 | } |
741 | 0 | } Unexecuted instantiation: bytearrayobject.c:stringlib_replace Unexecuted instantiation: bytesobject.c:stringlib_replace |
742 | | |
743 | | #undef findchar |