/src/cpython/Objects/bytes_methods.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "Python.h" |
2 | | #include "pycore_abstract.h" // _PyIndex_Check() |
3 | | #include "pycore_bytes_methods.h" |
4 | | |
5 | | PyDoc_STRVAR_shared(_Py_isspace__doc__, |
6 | | "B.isspace() -> bool\n\ |
7 | | \n\ |
8 | | Return True if all characters in B are whitespace\n\ |
9 | | and there is at least one character in B, False otherwise."); |
10 | | |
11 | | PyObject* |
12 | | _Py_bytes_isspace(const char *cptr, Py_ssize_t len) |
13 | 0 | { |
14 | 0 | const unsigned char *p |
15 | 0 | = (const unsigned char *) cptr; |
16 | 0 | const unsigned char *e; |
17 | | |
18 | | /* Shortcut for single character strings */ |
19 | 0 | if (len == 1 && Py_ISSPACE(*p)) |
20 | 0 | Py_RETURN_TRUE; |
21 | | |
22 | | /* Special case for empty strings */ |
23 | 0 | if (len == 0) |
24 | 0 | Py_RETURN_FALSE; |
25 | | |
26 | 0 | e = p + len; |
27 | 0 | for (; p < e; p++) { |
28 | 0 | if (!Py_ISSPACE(*p)) |
29 | 0 | Py_RETURN_FALSE; |
30 | 0 | } |
31 | 0 | Py_RETURN_TRUE; |
32 | 0 | } |
33 | | |
34 | | |
35 | | PyDoc_STRVAR_shared(_Py_isalpha__doc__, |
36 | | "B.isalpha() -> bool\n\ |
37 | | \n\ |
38 | | Return True if all characters in B are alphabetic\n\ |
39 | | and there is at least one character in B, False otherwise."); |
40 | | |
41 | | PyObject* |
42 | | _Py_bytes_isalpha(const char *cptr, Py_ssize_t len) |
43 | 0 | { |
44 | 0 | const unsigned char *p |
45 | 0 | = (const unsigned char *) cptr; |
46 | 0 | const unsigned char *e; |
47 | | |
48 | | /* Shortcut for single character strings */ |
49 | 0 | if (len == 1 && Py_ISALPHA(*p)) |
50 | 0 | Py_RETURN_TRUE; |
51 | | |
52 | | /* Special case for empty strings */ |
53 | 0 | if (len == 0) |
54 | 0 | Py_RETURN_FALSE; |
55 | | |
56 | 0 | e = p + len; |
57 | 0 | for (; p < e; p++) { |
58 | 0 | if (!Py_ISALPHA(*p)) |
59 | 0 | Py_RETURN_FALSE; |
60 | 0 | } |
61 | 0 | Py_RETURN_TRUE; |
62 | 0 | } |
63 | | |
64 | | |
65 | | PyDoc_STRVAR_shared(_Py_isalnum__doc__, |
66 | | "B.isalnum() -> bool\n\ |
67 | | \n\ |
68 | | Return True if all characters in B are alphanumeric\n\ |
69 | | and there is at least one character in B, False otherwise."); |
70 | | |
71 | | PyObject* |
72 | | _Py_bytes_isalnum(const char *cptr, Py_ssize_t len) |
73 | 0 | { |
74 | 0 | const unsigned char *p |
75 | 0 | = (const unsigned char *) cptr; |
76 | 0 | const unsigned char *e; |
77 | | |
78 | | /* Shortcut for single character strings */ |
79 | 0 | if (len == 1 && Py_ISALNUM(*p)) |
80 | 0 | Py_RETURN_TRUE; |
81 | | |
82 | | /* Special case for empty strings */ |
83 | 0 | if (len == 0) |
84 | 0 | Py_RETURN_FALSE; |
85 | | |
86 | 0 | e = p + len; |
87 | 0 | for (; p < e; p++) { |
88 | 0 | if (!Py_ISALNUM(*p)) |
89 | 0 | Py_RETURN_FALSE; |
90 | 0 | } |
91 | 0 | Py_RETURN_TRUE; |
92 | 0 | } |
93 | | |
94 | | |
95 | | PyDoc_STRVAR_shared(_Py_isdigit__doc__, |
96 | | "B.isdigit() -> bool\n\ |
97 | | \n\ |
98 | | Return True if all characters in B are digits\n\ |
99 | | and there is at least one character in B, False otherwise."); |
100 | | |
101 | | PyObject* |
102 | | _Py_bytes_isdigit(const char *cptr, Py_ssize_t len) |
103 | 0 | { |
104 | 0 | const unsigned char *p |
105 | 0 | = (const unsigned char *) cptr; |
106 | 0 | const unsigned char *e; |
107 | | |
108 | | /* Shortcut for single character strings */ |
109 | 0 | if (len == 1 && Py_ISDIGIT(*p)) |
110 | 0 | Py_RETURN_TRUE; |
111 | | |
112 | | /* Special case for empty strings */ |
113 | 0 | if (len == 0) |
114 | 0 | Py_RETURN_FALSE; |
115 | | |
116 | 0 | e = p + len; |
117 | 0 | for (; p < e; p++) { |
118 | 0 | if (!Py_ISDIGIT(*p)) |
119 | 0 | Py_RETURN_FALSE; |
120 | 0 | } |
121 | 0 | Py_RETURN_TRUE; |
122 | 0 | } |
123 | | |
124 | | |
125 | | PyDoc_STRVAR_shared(_Py_islower__doc__, |
126 | | "B.islower() -> bool\n\ |
127 | | \n\ |
128 | | Return True if all cased characters in B are lowercase and there is\n\ |
129 | | at least one cased character in B, False otherwise."); |
130 | | |
131 | | PyObject* |
132 | | _Py_bytes_islower(const char *cptr, Py_ssize_t len) |
133 | 0 | { |
134 | 0 | const unsigned char *p |
135 | 0 | = (const unsigned char *) cptr; |
136 | 0 | const unsigned char *e; |
137 | 0 | int cased; |
138 | | |
139 | | /* Shortcut for single character strings */ |
140 | 0 | if (len == 1) |
141 | 0 | return PyBool_FromLong(Py_ISLOWER(*p)); |
142 | | |
143 | | /* Special case for empty strings */ |
144 | 0 | if (len == 0) |
145 | 0 | Py_RETURN_FALSE; |
146 | | |
147 | 0 | e = p + len; |
148 | 0 | cased = 0; |
149 | 0 | for (; p < e; p++) { |
150 | 0 | if (Py_ISUPPER(*p)) |
151 | 0 | Py_RETURN_FALSE; |
152 | 0 | else if (!cased && Py_ISLOWER(*p)) |
153 | 0 | cased = 1; |
154 | 0 | } |
155 | 0 | return PyBool_FromLong(cased); |
156 | 0 | } |
157 | | |
158 | | |
159 | | PyDoc_STRVAR_shared(_Py_isupper__doc__, |
160 | | "B.isupper() -> bool\n\ |
161 | | \n\ |
162 | | Return True if all cased characters in B are uppercase and there is\n\ |
163 | | at least one cased character in B, False otherwise."); |
164 | | |
165 | | PyObject* |
166 | | _Py_bytes_isupper(const char *cptr, Py_ssize_t len) |
167 | 0 | { |
168 | 0 | const unsigned char *p |
169 | 0 | = (const unsigned char *) cptr; |
170 | 0 | const unsigned char *e; |
171 | 0 | int cased; |
172 | | |
173 | | /* Shortcut for single character strings */ |
174 | 0 | if (len == 1) |
175 | 0 | return PyBool_FromLong(Py_ISUPPER(*p)); |
176 | | |
177 | | /* Special case for empty strings */ |
178 | 0 | if (len == 0) |
179 | 0 | Py_RETURN_FALSE; |
180 | | |
181 | 0 | e = p + len; |
182 | 0 | cased = 0; |
183 | 0 | for (; p < e; p++) { |
184 | 0 | if (Py_ISLOWER(*p)) |
185 | 0 | Py_RETURN_FALSE; |
186 | 0 | else if (!cased && Py_ISUPPER(*p)) |
187 | 0 | cased = 1; |
188 | 0 | } |
189 | 0 | return PyBool_FromLong(cased); |
190 | 0 | } |
191 | | |
192 | | |
193 | | PyDoc_STRVAR_shared(_Py_istitle__doc__, |
194 | | "B.istitle() -> bool\n\ |
195 | | \n\ |
196 | | Return True if B is a titlecased string and there is at least one\n\ |
197 | | character in B, i.e. uppercase characters may only follow uncased\n\ |
198 | | characters and lowercase characters only cased ones. Return False\n\ |
199 | | otherwise."); |
200 | | |
201 | | PyObject* |
202 | | _Py_bytes_istitle(const char *cptr, Py_ssize_t len) |
203 | 0 | { |
204 | 0 | const unsigned char *p |
205 | 0 | = (const unsigned char *) cptr; |
206 | 0 | const unsigned char *e; |
207 | 0 | int cased, previous_is_cased; |
208 | |
|
209 | 0 | if (len == 1) { |
210 | 0 | if (Py_ISUPPER(*p)) { |
211 | 0 | Py_RETURN_TRUE; |
212 | 0 | } |
213 | 0 | Py_RETURN_FALSE; |
214 | 0 | } |
215 | | |
216 | | /* Special case for empty strings */ |
217 | 0 | if (len == 0) |
218 | 0 | Py_RETURN_FALSE; |
219 | | |
220 | 0 | e = p + len; |
221 | 0 | cased = 0; |
222 | 0 | previous_is_cased = 0; |
223 | 0 | for (; p < e; p++) { |
224 | 0 | const unsigned char ch = *p; |
225 | |
|
226 | 0 | if (Py_ISUPPER(ch)) { |
227 | 0 | if (previous_is_cased) |
228 | 0 | Py_RETURN_FALSE; |
229 | 0 | previous_is_cased = 1; |
230 | 0 | cased = 1; |
231 | 0 | } |
232 | 0 | else if (Py_ISLOWER(ch)) { |
233 | 0 | if (!previous_is_cased) |
234 | 0 | Py_RETURN_FALSE; |
235 | 0 | previous_is_cased = 1; |
236 | 0 | cased = 1; |
237 | 0 | } |
238 | 0 | else |
239 | 0 | previous_is_cased = 0; |
240 | 0 | } |
241 | 0 | return PyBool_FromLong(cased); |
242 | 0 | } |
243 | | |
244 | | |
245 | | PyDoc_STRVAR_shared(_Py_lower__doc__, |
246 | | "B.lower() -> copy of B\n\ |
247 | | \n\ |
248 | | Return a copy of B with all ASCII characters converted to lowercase."); |
249 | | |
250 | | void |
251 | | _Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len) |
252 | 95.8M | { |
253 | 95.8M | Py_ssize_t i; |
254 | | |
255 | 856M | for (i = 0; i < len; i++) { |
256 | 761M | result[i] = Py_TOLOWER((unsigned char) cptr[i]); |
257 | 761M | } |
258 | 95.8M | } |
259 | | |
260 | | |
261 | | PyDoc_STRVAR_shared(_Py_upper__doc__, |
262 | | "B.upper() -> copy of B\n\ |
263 | | \n\ |
264 | | Return a copy of B with all ASCII characters converted to uppercase."); |
265 | | |
266 | | void |
267 | | _Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len) |
268 | 18.3k | { |
269 | 18.3k | Py_ssize_t i; |
270 | | |
271 | 460k | for (i = 0; i < len; i++) { |
272 | 442k | result[i] = Py_TOUPPER((unsigned char) cptr[i]); |
273 | 442k | } |
274 | 18.3k | } |
275 | | |
276 | | |
277 | | PyDoc_STRVAR_shared(_Py_title__doc__, |
278 | | "B.title() -> copy of B\n\ |
279 | | \n\ |
280 | | Return a titlecased version of B, i.e. ASCII words start with uppercase\n\ |
281 | | characters, all remaining cased characters have lowercase."); |
282 | | |
283 | | void |
284 | | _Py_bytes_title(char *result, const char *s, Py_ssize_t len) |
285 | 0 | { |
286 | 0 | Py_ssize_t i; |
287 | 0 | int previous_is_cased = 0; |
288 | |
|
289 | 0 | for (i = 0; i < len; i++) { |
290 | 0 | int c = Py_CHARMASK(*s++); |
291 | 0 | if (Py_ISLOWER(c)) { |
292 | 0 | if (!previous_is_cased) |
293 | 0 | c = Py_TOUPPER(c); |
294 | 0 | previous_is_cased = 1; |
295 | 0 | } else if (Py_ISUPPER(c)) { |
296 | 0 | if (previous_is_cased) |
297 | 0 | c = Py_TOLOWER(c); |
298 | 0 | previous_is_cased = 1; |
299 | 0 | } else |
300 | 0 | previous_is_cased = 0; |
301 | 0 | *result++ = c; |
302 | 0 | } |
303 | 0 | } |
304 | | |
305 | | |
306 | | PyDoc_STRVAR_shared(_Py_capitalize__doc__, |
307 | | "B.capitalize() -> copy of B\n\ |
308 | | \n\ |
309 | | Return a copy of B with only its first character capitalized (ASCII)\n\ |
310 | | and the rest lower-cased."); |
311 | | |
312 | | void |
313 | | _Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len) |
314 | 0 | { |
315 | 0 | if (len > 0) { |
316 | 0 | *result = Py_TOUPPER(*s); |
317 | 0 | _Py_bytes_lower(result + 1, s + 1, len - 1); |
318 | 0 | } |
319 | 0 | } |
320 | | |
321 | | |
322 | | PyDoc_STRVAR_shared(_Py_swapcase__doc__, |
323 | | "B.swapcase() -> copy of B\n\ |
324 | | \n\ |
325 | | Return a copy of B with uppercase ASCII characters converted\n\ |
326 | | to lowercase ASCII and vice versa."); |
327 | | |
328 | | void |
329 | | _Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len) |
330 | 0 | { |
331 | 0 | Py_ssize_t i; |
332 | |
|
333 | 0 | for (i = 0; i < len; i++) { |
334 | 0 | int c = Py_CHARMASK(*s++); |
335 | 0 | if (Py_ISLOWER(c)) { |
336 | 0 | *result = Py_TOUPPER(c); |
337 | 0 | } |
338 | 0 | else if (Py_ISUPPER(c)) { |
339 | 0 | *result = Py_TOLOWER(c); |
340 | 0 | } |
341 | 0 | else |
342 | 0 | *result = c; |
343 | 0 | result++; |
344 | 0 | } |
345 | 0 | } |
346 | | |
347 | | |
348 | | PyDoc_STRVAR_shared(_Py_maketrans__doc__, |
349 | | "B.maketrans(frm, to) -> translation table\n\ |
350 | | \n\ |
351 | | Return a translation table (a bytes object of length 256) suitable\n\ |
352 | | for use in the bytes or bytearray translate method where each byte\n\ |
353 | | in frm is mapped to the byte at the same position in to.\n\ |
354 | | The bytes objects frm and to must be of the same length."); |
355 | | |
356 | | PyObject * |
357 | | _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to) |
358 | 28 | { |
359 | 28 | PyObject *res = NULL; |
360 | 28 | Py_ssize_t i; |
361 | 28 | char *p; |
362 | | |
363 | 28 | if (frm->len != to->len) { |
364 | 0 | PyErr_Format(PyExc_ValueError, |
365 | 0 | "maketrans arguments must have same length"); |
366 | 0 | return NULL; |
367 | 0 | } |
368 | 28 | res = PyBytes_FromStringAndSize(NULL, 256); |
369 | 28 | if (!res) |
370 | 0 | return NULL; |
371 | 28 | p = PyBytes_AS_STRING(res); |
372 | 7.19k | for (i = 0; i < 256; i++) |
373 | 7.16k | p[i] = (char) i; |
374 | 1.28k | for (i = 0; i < frm->len; i++) { |
375 | 1.25k | p[((unsigned char *)frm->buf)[i]] = ((char *)to->buf)[i]; |
376 | 1.25k | } |
377 | | |
378 | 28 | return res; |
379 | 28 | } |
380 | | |
381 | 2.94k | #define FASTSEARCH fastsearch |
382 | 2.94k | #define STRINGLIB(F) stringlib_##F |
383 | 0 | #define STRINGLIB_CHAR char |
384 | 0 | #define STRINGLIB_SIZEOF_CHAR 1 |
385 | 1.59k | #define STRINGLIB_FAST_MEMCHR memchr |
386 | | |
387 | | #include "stringlib/fastsearch.h" |
388 | | #include "stringlib/count.h" |
389 | | #include "stringlib/find.h" |
390 | | #include "stringlib/find_max_char.h" |
391 | | |
392 | | /* |
393 | | Wraps stringlib_parse_args_finds() and additionally checks the first |
394 | | argument type. |
395 | | |
396 | | In case the first argument is a bytes-like object, sets it to subobj, |
397 | | and doesn't touch the byte parameter. |
398 | | In case it is an integer in range(0, 256), writes the integer value |
399 | | to byte, and sets subobj to NULL. |
400 | | |
401 | | The other parameters are similar to those of |
402 | | stringlib_parse_args_finds(). |
403 | | */ |
404 | | |
405 | | Py_LOCAL_INLINE(int) |
406 | | parse_args_finds_byte(const char *function_name, PyObject **subobj, char *byte) |
407 | 19.9k | { |
408 | 19.9k | if (PyObject_CheckBuffer(*subobj)) { |
409 | 18.3k | return 1; |
410 | 18.3k | } |
411 | | |
412 | 1.59k | if (!_PyIndex_Check(*subobj)) { |
413 | 0 | PyErr_Format(PyExc_TypeError, |
414 | 0 | "argument should be integer or bytes-like object, " |
415 | 0 | "not '%.200s'", |
416 | 0 | Py_TYPE(*subobj)->tp_name); |
417 | 0 | return 0; |
418 | 0 | } |
419 | | |
420 | 1.59k | Py_ssize_t ival = PyNumber_AsSsize_t(*subobj, NULL); |
421 | 1.59k | if (ival == -1 && PyErr_Occurred()) { |
422 | 0 | return 0; |
423 | 0 | } |
424 | 1.59k | if (ival < 0 || ival > 255) { |
425 | 0 | PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)"); |
426 | 0 | return 0; |
427 | 0 | } |
428 | | |
429 | 1.59k | *subobj = NULL; |
430 | 1.59k | *byte = (char)ival; |
431 | 1.59k | return 1; |
432 | 1.59k | } |
433 | | |
434 | | /* helper macro to fixup start/end slice values */ |
435 | | #define ADJUST_INDICES(start, end, len) \ |
436 | 596k | do { \ |
437 | 596k | if (end > len) { \ |
438 | 596k | end = len; \ |
439 | 596k | } \ |
440 | 596k | else if (end < 0) { \ |
441 | 0 | end += len; \ |
442 | 0 | if (end < 0) { \ |
443 | 0 | end = 0; \ |
444 | 0 | } \ |
445 | 0 | } \ |
446 | 596k | if (start < 0) { \ |
447 | 0 | start += len; \ |
448 | 0 | if (start < 0) { \ |
449 | 0 | start = 0; \ |
450 | 0 | } \ |
451 | 0 | } \ |
452 | 596k | } while (0) |
453 | | |
454 | | Py_LOCAL_INLINE(Py_ssize_t) |
455 | | find_internal(const char *str, Py_ssize_t len, |
456 | | const char *function_name, PyObject *subobj, |
457 | | Py_ssize_t start, Py_ssize_t end, |
458 | | int dir) |
459 | 19.9k | { |
460 | 19.9k | char byte; |
461 | 19.9k | Py_buffer subbuf; |
462 | 19.9k | const char *sub; |
463 | 19.9k | Py_ssize_t sub_len; |
464 | 19.9k | Py_ssize_t res; |
465 | | |
466 | 19.9k | if (!parse_args_finds_byte(function_name, &subobj, &byte)) { |
467 | 0 | return -2; |
468 | 0 | } |
469 | | |
470 | 19.9k | if (subobj) { |
471 | 18.3k | if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0) |
472 | 0 | return -2; |
473 | | |
474 | 18.3k | sub = subbuf.buf; |
475 | 18.3k | sub_len = subbuf.len; |
476 | 18.3k | } |
477 | 1.59k | else { |
478 | 1.59k | sub = &byte; |
479 | 1.59k | sub_len = 1; |
480 | 1.59k | } |
481 | | |
482 | 19.9k | ADJUST_INDICES(start, end, len); |
483 | 19.9k | if (end - start < sub_len) |
484 | 0 | res = -1; |
485 | 19.9k | else if (sub_len == 1) { |
486 | 19.9k | if (dir > 0) |
487 | 1.59k | res = stringlib_find_char( |
488 | 1.59k | str + start, end - start, |
489 | 1.59k | *sub); |
490 | 18.3k | else |
491 | 18.3k | res = stringlib_rfind_char( |
492 | 18.3k | str + start, end - start, |
493 | 18.3k | *sub); |
494 | 19.9k | if (res >= 0) |
495 | 6.95k | res += start; |
496 | 19.9k | } |
497 | 4 | else { |
498 | 4 | if (dir > 0) |
499 | 0 | res = stringlib_find_slice( |
500 | 0 | str, len, |
501 | 0 | sub, sub_len, start, end); |
502 | 4 | else |
503 | 4 | res = stringlib_rfind_slice( |
504 | 4 | str, len, |
505 | 4 | sub, sub_len, start, end); |
506 | 4 | } |
507 | | |
508 | 19.9k | if (subobj) |
509 | 18.3k | PyBuffer_Release(&subbuf); |
510 | | |
511 | 19.9k | return res; |
512 | 19.9k | } |
513 | | |
514 | | PyObject * |
515 | | _Py_bytes_find(const char *str, Py_ssize_t len, PyObject *sub, |
516 | | Py_ssize_t start, Py_ssize_t end) |
517 | 1.59k | { |
518 | 1.59k | Py_ssize_t result = find_internal(str, len, "find", sub, start, end, +1); |
519 | 1.59k | if (result == -2) |
520 | 0 | return NULL; |
521 | 1.59k | return PyLong_FromSsize_t(result); |
522 | 1.59k | } |
523 | | |
524 | | PyObject * |
525 | | _Py_bytes_index(const char *str, Py_ssize_t len, PyObject *sub, |
526 | | Py_ssize_t start, Py_ssize_t end) |
527 | 0 | { |
528 | 0 | Py_ssize_t result = find_internal(str, len, "index", sub, start, end, +1); |
529 | 0 | if (result == -2) |
530 | 0 | return NULL; |
531 | 0 | if (result == -1) { |
532 | 0 | PyErr_SetString(PyExc_ValueError, |
533 | 0 | "subsection not found"); |
534 | 0 | return NULL; |
535 | 0 | } |
536 | 0 | return PyLong_FromSsize_t(result); |
537 | 0 | } |
538 | | |
539 | | PyObject * |
540 | | _Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *sub, |
541 | | Py_ssize_t start, Py_ssize_t end) |
542 | 18.3k | { |
543 | 18.3k | Py_ssize_t result = find_internal(str, len, "rfind", sub, start, end, -1); |
544 | 18.3k | if (result == -2) |
545 | 0 | return NULL; |
546 | 18.3k | return PyLong_FromSsize_t(result); |
547 | 18.3k | } |
548 | | |
549 | | PyObject * |
550 | | _Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *sub, |
551 | | Py_ssize_t start, Py_ssize_t end) |
552 | 0 | { |
553 | 0 | Py_ssize_t result = find_internal(str, len, "rindex", sub, start, end, -1); |
554 | 0 | if (result == -2) |
555 | 0 | return NULL; |
556 | 0 | if (result == -1) { |
557 | 0 | PyErr_SetString(PyExc_ValueError, |
558 | 0 | "subsection not found"); |
559 | 0 | return NULL; |
560 | 0 | } |
561 | 0 | return PyLong_FromSsize_t(result); |
562 | 0 | } |
563 | | |
564 | | PyObject * |
565 | | _Py_bytes_count(const char *str, Py_ssize_t len, PyObject *sub_obj, |
566 | | Py_ssize_t start, Py_ssize_t end) |
567 | 0 | { |
568 | 0 | const char *sub; |
569 | 0 | Py_ssize_t sub_len; |
570 | 0 | char byte; |
571 | |
|
572 | 0 | Py_buffer vsub; |
573 | 0 | PyObject *count_obj; |
574 | |
|
575 | 0 | if (!parse_args_finds_byte("count", &sub_obj, &byte)) { |
576 | 0 | return NULL; |
577 | 0 | } |
578 | | |
579 | 0 | if (sub_obj) { |
580 | 0 | if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0) |
581 | 0 | return NULL; |
582 | | |
583 | 0 | sub = vsub.buf; |
584 | 0 | sub_len = vsub.len; |
585 | 0 | } |
586 | 0 | else { |
587 | 0 | sub = &byte; |
588 | 0 | sub_len = 1; |
589 | 0 | } |
590 | | |
591 | 0 | ADJUST_INDICES(start, end, len); |
592 | |
|
593 | 0 | count_obj = PyLong_FromSsize_t( |
594 | 0 | stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX) |
595 | 0 | ); |
596 | |
|
597 | 0 | if (sub_obj) |
598 | 0 | PyBuffer_Release(&vsub); |
599 | |
|
600 | 0 | return count_obj; |
601 | 0 | } |
602 | | |
603 | | int |
604 | | _Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg) |
605 | 2.94k | { |
606 | 2.94k | Py_ssize_t ival = PyNumber_AsSsize_t(arg, NULL); |
607 | 2.94k | if (ival == -1 && PyErr_Occurred()) { |
608 | 2.94k | Py_buffer varg; |
609 | 2.94k | Py_ssize_t pos; |
610 | 2.94k | PyErr_Clear(); |
611 | 2.94k | if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0) |
612 | 0 | return -1; |
613 | 2.94k | pos = stringlib_find(str, len, |
614 | 2.94k | varg.buf, varg.len, 0); |
615 | 2.94k | PyBuffer_Release(&varg); |
616 | 2.94k | return pos >= 0; |
617 | 2.94k | } |
618 | 0 | if (ival < 0 || ival >= 256) { |
619 | 0 | PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)"); |
620 | 0 | return -1; |
621 | 0 | } |
622 | | |
623 | 0 | return memchr(str, (int) ival, len) != NULL; |
624 | 0 | } |
625 | | |
626 | | |
627 | | /* Matches the end (direction >= 0) or start (direction < 0) of the buffer |
628 | | * against substr, using the start and end arguments. Returns |
629 | | * -1 on error, 0 if not found and 1 if found. |
630 | | */ |
631 | | static int |
632 | | tailmatch(const char *str, Py_ssize_t len, PyObject *substr, |
633 | | Py_ssize_t start, Py_ssize_t end, int direction) |
634 | 576k | { |
635 | 576k | Py_buffer sub_view = {NULL, NULL}; |
636 | 576k | const char *sub; |
637 | 576k | Py_ssize_t slen; |
638 | | |
639 | 576k | if (PyBytes_Check(substr)) { |
640 | 576k | sub = PyBytes_AS_STRING(substr); |
641 | 576k | slen = PyBytes_GET_SIZE(substr); |
642 | 576k | } |
643 | 0 | else { |
644 | 0 | if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0) |
645 | 0 | return -1; |
646 | 0 | sub = sub_view.buf; |
647 | 0 | slen = sub_view.len; |
648 | 0 | } |
649 | | |
650 | 576k | ADJUST_INDICES(start, end, len); |
651 | | |
652 | 576k | if (direction < 0) { |
653 | | /* startswith */ |
654 | 576k | if (start > len - slen) |
655 | 552k | goto notfound; |
656 | 576k | } else { |
657 | | /* endswith */ |
658 | 0 | if (end - start < slen || start > len) |
659 | 0 | goto notfound; |
660 | | |
661 | 0 | if (end - slen > start) |
662 | 0 | start = end - slen; |
663 | 0 | } |
664 | 24.2k | if (end - start < slen) |
665 | 0 | goto notfound; |
666 | 24.2k | if (memcmp(str + start, sub, slen) != 0) |
667 | 6.17k | goto notfound; |
668 | | |
669 | 18.0k | PyBuffer_Release(&sub_view); |
670 | 18.0k | return 1; |
671 | | |
672 | 558k | notfound: |
673 | 558k | PyBuffer_Release(&sub_view); |
674 | 558k | return 0; |
675 | 24.2k | } |
676 | | |
677 | | static PyObject * |
678 | | _Py_bytes_tailmatch(const char *str, Py_ssize_t len, |
679 | | const char *function_name, PyObject *subobj, |
680 | | Py_ssize_t start, Py_ssize_t end, |
681 | | int direction) |
682 | 576k | { |
683 | 576k | if (PyTuple_Check(subobj)) { |
684 | 0 | Py_ssize_t i; |
685 | 0 | for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { |
686 | 0 | PyObject *item = PyTuple_GET_ITEM(subobj, i); |
687 | 0 | int result = tailmatch(str, len, item, start, end, direction); |
688 | 0 | if (result < 0) { |
689 | 0 | return NULL; |
690 | 0 | } |
691 | 0 | else if (result) { |
692 | 0 | Py_RETURN_TRUE; |
693 | 0 | } |
694 | 0 | } |
695 | 0 | Py_RETURN_FALSE; |
696 | 0 | } |
697 | 576k | int result = tailmatch(str, len, subobj, start, end, direction); |
698 | 576k | if (result == -1) { |
699 | 0 | if (PyErr_ExceptionMatches(PyExc_TypeError)) { |
700 | 0 | PyErr_Format(PyExc_TypeError, |
701 | 0 | "%s first arg must be bytes or a tuple of bytes, " |
702 | 0 | "not %s", |
703 | 0 | function_name, Py_TYPE(subobj)->tp_name); |
704 | 0 | } |
705 | 0 | return NULL; |
706 | 0 | } |
707 | 576k | return PyBool_FromLong(result); |
708 | 576k | } |
709 | | |
710 | | PyObject * |
711 | | _Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *subobj, |
712 | | Py_ssize_t start, Py_ssize_t end) |
713 | 576k | { |
714 | 576k | return _Py_bytes_tailmatch(str, len, "startswith", subobj, start, end, -1); |
715 | 576k | } |
716 | | |
717 | | PyObject * |
718 | | _Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *subobj, |
719 | | Py_ssize_t start, Py_ssize_t end) |
720 | 0 | { |
721 | 0 | return _Py_bytes_tailmatch(str, len, "endswith", subobj, start, end, +1); |
722 | 0 | } |
723 | | |
724 | | PyDoc_STRVAR_shared(_Py_isascii__doc__, |
725 | | "B.isascii() -> bool\n\ |
726 | | \n\ |
727 | | Return True if B is empty or all characters in B are ASCII,\n\ |
728 | | False otherwise."); |
729 | | |
730 | | PyObject* |
731 | | _Py_bytes_isascii(const char *cptr, Py_ssize_t len) |
732 | 0 | { |
733 | 0 | const char *p = cptr; |
734 | 0 | const char *end = p + len; |
735 | 0 | Py_ssize_t max_char = stringlib_find_max_char(cptr, end); |
736 | 0 | if (max_char > 127) { |
737 | 0 | Py_RETURN_FALSE; |
738 | 0 | } |
739 | 0 | Py_RETURN_TRUE; |
740 | 0 | } |