/src/Python-3.8.3/Objects/bytes_methods.c
Line | Count | Source (jump to first uncovered line) |
1 | | #define PY_SSIZE_T_CLEAN |
2 | | #include "Python.h" |
3 | | #include "bytes_methods.h" |
4 | | |
5 | | PyDoc_STRVAR_shared(_Py_isspace__doc__, |
6 | | "B.isspace() -> bool\n\ |
7 | | \n\ |
8 | | Return True if all characters in B are whitespace\n\ |
9 | | and there is at least one character in B, False otherwise."); |
10 | | |
11 | | PyObject* |
12 | | _Py_bytes_isspace(const char *cptr, Py_ssize_t len) |
13 | 0 | { |
14 | 0 | const unsigned char *p |
15 | 0 | = (const unsigned char *) cptr; |
16 | 0 | const unsigned char *e; |
17 | | |
18 | | /* Shortcut for single character strings */ |
19 | 0 | if (len == 1 && Py_ISSPACE(*p)) |
20 | 0 | Py_RETURN_TRUE; |
21 | | |
22 | | /* Special case for empty strings */ |
23 | 0 | if (len == 0) |
24 | 0 | Py_RETURN_FALSE; |
25 | | |
26 | 0 | e = p + len; |
27 | 0 | for (; p < e; p++) { |
28 | 0 | if (!Py_ISSPACE(*p)) |
29 | 0 | Py_RETURN_FALSE; |
30 | 0 | } |
31 | 0 | Py_RETURN_TRUE; |
32 | 0 | } |
33 | | |
34 | | |
35 | | PyDoc_STRVAR_shared(_Py_isalpha__doc__, |
36 | | "B.isalpha() -> bool\n\ |
37 | | \n\ |
38 | | Return True if all characters in B are alphabetic\n\ |
39 | | and there is at least one character in B, False otherwise."); |
40 | | |
41 | | PyObject* |
42 | | _Py_bytes_isalpha(const char *cptr, Py_ssize_t len) |
43 | 0 | { |
44 | 0 | const unsigned char *p |
45 | 0 | = (const unsigned char *) cptr; |
46 | 0 | const unsigned char *e; |
47 | | |
48 | | /* Shortcut for single character strings */ |
49 | 0 | if (len == 1 && Py_ISALPHA(*p)) |
50 | 0 | Py_RETURN_TRUE; |
51 | | |
52 | | /* Special case for empty strings */ |
53 | 0 | if (len == 0) |
54 | 0 | Py_RETURN_FALSE; |
55 | | |
56 | 0 | e = p + len; |
57 | 0 | for (; p < e; p++) { |
58 | 0 | if (!Py_ISALPHA(*p)) |
59 | 0 | Py_RETURN_FALSE; |
60 | 0 | } |
61 | 0 | Py_RETURN_TRUE; |
62 | 0 | } |
63 | | |
64 | | |
65 | | PyDoc_STRVAR_shared(_Py_isalnum__doc__, |
66 | | "B.isalnum() -> bool\n\ |
67 | | \n\ |
68 | | Return True if all characters in B are alphanumeric\n\ |
69 | | and there is at least one character in B, False otherwise."); |
70 | | |
71 | | PyObject* |
72 | | _Py_bytes_isalnum(const char *cptr, Py_ssize_t len) |
73 | 0 | { |
74 | 0 | const unsigned char *p |
75 | 0 | = (const unsigned char *) cptr; |
76 | 0 | const unsigned char *e; |
77 | | |
78 | | /* Shortcut for single character strings */ |
79 | 0 | if (len == 1 && Py_ISALNUM(*p)) |
80 | 0 | Py_RETURN_TRUE; |
81 | | |
82 | | /* Special case for empty strings */ |
83 | 0 | if (len == 0) |
84 | 0 | Py_RETURN_FALSE; |
85 | | |
86 | 0 | e = p + len; |
87 | 0 | for (; p < e; p++) { |
88 | 0 | if (!Py_ISALNUM(*p)) |
89 | 0 | Py_RETURN_FALSE; |
90 | 0 | } |
91 | 0 | Py_RETURN_TRUE; |
92 | 0 | } |
93 | | |
94 | | |
95 | | PyDoc_STRVAR_shared(_Py_isascii__doc__, |
96 | | "B.isascii() -> bool\n\ |
97 | | \n\ |
98 | | Return True if B is empty or all characters in B are ASCII,\n\ |
99 | | False otherwise."); |
100 | | |
101 | | // Optimization is copied from ascii_decode in unicodeobject.c |
102 | | /* Mask to quickly check whether a C 'long' contains a |
103 | | non-ASCII, UTF8-encoded char. */ |
104 | | #if (SIZEOF_LONG == 8) |
105 | 0 | # define ASCII_CHAR_MASK 0x8080808080808080UL |
106 | | #elif (SIZEOF_LONG == 4) |
107 | | # define ASCII_CHAR_MASK 0x80808080UL |
108 | | #else |
109 | | # error C 'long' size should be either 4 or 8! |
110 | | #endif |
111 | | |
112 | | PyObject* |
113 | | _Py_bytes_isascii(const char *cptr, Py_ssize_t len) |
114 | 0 | { |
115 | 0 | const char *p = cptr; |
116 | 0 | const char *end = p + len; |
117 | 0 | const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG); |
118 | |
|
119 | 0 | while (p < end) { |
120 | | /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h |
121 | | for an explanation. */ |
122 | 0 | if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) { |
123 | | /* Help allocation */ |
124 | 0 | const char *_p = p; |
125 | 0 | while (_p < aligned_end) { |
126 | 0 | unsigned long value = *(const unsigned long *) _p; |
127 | 0 | if (value & ASCII_CHAR_MASK) { |
128 | 0 | Py_RETURN_FALSE; |
129 | 0 | } |
130 | 0 | _p += SIZEOF_LONG; |
131 | 0 | } |
132 | 0 | p = _p; |
133 | 0 | if (_p == end) |
134 | 0 | break; |
135 | 0 | } |
136 | 0 | if ((unsigned char)*p & 0x80) { |
137 | 0 | Py_RETURN_FALSE; |
138 | 0 | } |
139 | 0 | p++; |
140 | 0 | } |
141 | 0 | Py_RETURN_TRUE; |
142 | 0 | } |
143 | | |
144 | | #undef ASCII_CHAR_MASK |
145 | | |
146 | | |
147 | | PyDoc_STRVAR_shared(_Py_isdigit__doc__, |
148 | | "B.isdigit() -> bool\n\ |
149 | | \n\ |
150 | | Return True if all characters in B are digits\n\ |
151 | | and there is at least one character in B, False otherwise."); |
152 | | |
153 | | PyObject* |
154 | | _Py_bytes_isdigit(const char *cptr, Py_ssize_t len) |
155 | 0 | { |
156 | 0 | const unsigned char *p |
157 | 0 | = (const unsigned char *) cptr; |
158 | 0 | const unsigned char *e; |
159 | | |
160 | | /* Shortcut for single character strings */ |
161 | 0 | if (len == 1 && Py_ISDIGIT(*p)) |
162 | 0 | Py_RETURN_TRUE; |
163 | | |
164 | | /* Special case for empty strings */ |
165 | 0 | if (len == 0) |
166 | 0 | Py_RETURN_FALSE; |
167 | | |
168 | 0 | e = p + len; |
169 | 0 | for (; p < e; p++) { |
170 | 0 | if (!Py_ISDIGIT(*p)) |
171 | 0 | Py_RETURN_FALSE; |
172 | 0 | } |
173 | 0 | Py_RETURN_TRUE; |
174 | 0 | } |
175 | | |
176 | | |
177 | | PyDoc_STRVAR_shared(_Py_islower__doc__, |
178 | | "B.islower() -> bool\n\ |
179 | | \n\ |
180 | | Return True if all cased characters in B are lowercase and there is\n\ |
181 | | at least one cased character in B, False otherwise."); |
182 | | |
183 | | PyObject* |
184 | | _Py_bytes_islower(const char *cptr, Py_ssize_t len) |
185 | 0 | { |
186 | 0 | const unsigned char *p |
187 | 0 | = (const unsigned char *) cptr; |
188 | 0 | const unsigned char *e; |
189 | 0 | int cased; |
190 | | |
191 | | /* Shortcut for single character strings */ |
192 | 0 | if (len == 1) |
193 | 0 | return PyBool_FromLong(Py_ISLOWER(*p)); |
194 | | |
195 | | /* Special case for empty strings */ |
196 | 0 | if (len == 0) |
197 | 0 | Py_RETURN_FALSE; |
198 | | |
199 | 0 | e = p + len; |
200 | 0 | cased = 0; |
201 | 0 | for (; p < e; p++) { |
202 | 0 | if (Py_ISUPPER(*p)) |
203 | 0 | Py_RETURN_FALSE; |
204 | 0 | else if (!cased && Py_ISLOWER(*p)) |
205 | 0 | cased = 1; |
206 | 0 | } |
207 | 0 | return PyBool_FromLong(cased); |
208 | 0 | } |
209 | | |
210 | | |
211 | | PyDoc_STRVAR_shared(_Py_isupper__doc__, |
212 | | "B.isupper() -> bool\n\ |
213 | | \n\ |
214 | | Return True if all cased characters in B are uppercase and there is\n\ |
215 | | at least one cased character in B, False otherwise."); |
216 | | |
217 | | PyObject* |
218 | | _Py_bytes_isupper(const char *cptr, Py_ssize_t len) |
219 | 0 | { |
220 | 0 | const unsigned char *p |
221 | 0 | = (const unsigned char *) cptr; |
222 | 0 | const unsigned char *e; |
223 | 0 | int cased; |
224 | | |
225 | | /* Shortcut for single character strings */ |
226 | 0 | if (len == 1) |
227 | 0 | return PyBool_FromLong(Py_ISUPPER(*p)); |
228 | | |
229 | | /* Special case for empty strings */ |
230 | 0 | if (len == 0) |
231 | 0 | Py_RETURN_FALSE; |
232 | | |
233 | 0 | e = p + len; |
234 | 0 | cased = 0; |
235 | 0 | for (; p < e; p++) { |
236 | 0 | if (Py_ISLOWER(*p)) |
237 | 0 | Py_RETURN_FALSE; |
238 | 0 | else if (!cased && Py_ISUPPER(*p)) |
239 | 0 | cased = 1; |
240 | 0 | } |
241 | 0 | return PyBool_FromLong(cased); |
242 | 0 | } |
243 | | |
244 | | |
245 | | PyDoc_STRVAR_shared(_Py_istitle__doc__, |
246 | | "B.istitle() -> bool\n\ |
247 | | \n\ |
248 | | Return True if B is a titlecased string and there is at least one\n\ |
249 | | character in B, i.e. uppercase characters may only follow uncased\n\ |
250 | | characters and lowercase characters only cased ones. Return False\n\ |
251 | | otherwise."); |
252 | | |
253 | | PyObject* |
254 | | _Py_bytes_istitle(const char *cptr, Py_ssize_t len) |
255 | 0 | { |
256 | 0 | const unsigned char *p |
257 | 0 | = (const unsigned char *) cptr; |
258 | 0 | const unsigned char *e; |
259 | 0 | int cased, previous_is_cased; |
260 | | |
261 | | /* Shortcut for single character strings */ |
262 | 0 | if (len == 1) |
263 | 0 | return PyBool_FromLong(Py_ISUPPER(*p)); |
264 | | |
265 | | /* Special case for empty strings */ |
266 | 0 | if (len == 0) |
267 | 0 | Py_RETURN_FALSE; |
268 | | |
269 | 0 | e = p + len; |
270 | 0 | cased = 0; |
271 | 0 | previous_is_cased = 0; |
272 | 0 | for (; p < e; p++) { |
273 | 0 | const unsigned char ch = *p; |
274 | |
|
275 | 0 | if (Py_ISUPPER(ch)) { |
276 | 0 | if (previous_is_cased) |
277 | 0 | Py_RETURN_FALSE; |
278 | 0 | previous_is_cased = 1; |
279 | 0 | cased = 1; |
280 | 0 | } |
281 | 0 | else if (Py_ISLOWER(ch)) { |
282 | 0 | if (!previous_is_cased) |
283 | 0 | Py_RETURN_FALSE; |
284 | 0 | previous_is_cased = 1; |
285 | 0 | cased = 1; |
286 | 0 | } |
287 | 0 | else |
288 | 0 | previous_is_cased = 0; |
289 | 0 | } |
290 | 0 | return PyBool_FromLong(cased); |
291 | 0 | } |
292 | | |
293 | | |
294 | | PyDoc_STRVAR_shared(_Py_lower__doc__, |
295 | | "B.lower() -> copy of B\n\ |
296 | | \n\ |
297 | | Return a copy of B with all ASCII characters converted to lowercase."); |
298 | | |
299 | | void |
300 | | _Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len) |
301 | 0 | { |
302 | 0 | Py_ssize_t i; |
303 | |
|
304 | 0 | for (i = 0; i < len; i++) { |
305 | 0 | result[i] = Py_TOLOWER((unsigned char) cptr[i]); |
306 | 0 | } |
307 | 0 | } |
308 | | |
309 | | |
310 | | PyDoc_STRVAR_shared(_Py_upper__doc__, |
311 | | "B.upper() -> copy of B\n\ |
312 | | \n\ |
313 | | Return a copy of B with all ASCII characters converted to uppercase."); |
314 | | |
315 | | void |
316 | | _Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len) |
317 | 36 | { |
318 | 36 | Py_ssize_t i; |
319 | | |
320 | 72 | for (i = 0; i < len; i++) { |
321 | 36 | result[i] = Py_TOUPPER((unsigned char) cptr[i]); |
322 | 36 | } |
323 | 36 | } |
324 | | |
325 | | |
326 | | PyDoc_STRVAR_shared(_Py_title__doc__, |
327 | | "B.title() -> copy of B\n\ |
328 | | \n\ |
329 | | Return a titlecased version of B, i.e. ASCII words start with uppercase\n\ |
330 | | characters, all remaining cased characters have lowercase."); |
331 | | |
332 | | void |
333 | | _Py_bytes_title(char *result, const char *s, Py_ssize_t len) |
334 | 0 | { |
335 | 0 | Py_ssize_t i; |
336 | 0 | int previous_is_cased = 0; |
337 | |
|
338 | 0 | for (i = 0; i < len; i++) { |
339 | 0 | int c = Py_CHARMASK(*s++); |
340 | 0 | if (Py_ISLOWER(c)) { |
341 | 0 | if (!previous_is_cased) |
342 | 0 | c = Py_TOUPPER(c); |
343 | 0 | previous_is_cased = 1; |
344 | 0 | } else if (Py_ISUPPER(c)) { |
345 | 0 | if (previous_is_cased) |
346 | 0 | c = Py_TOLOWER(c); |
347 | 0 | previous_is_cased = 1; |
348 | 0 | } else |
349 | 0 | previous_is_cased = 0; |
350 | 0 | *result++ = c; |
351 | 0 | } |
352 | 0 | } |
353 | | |
354 | | |
355 | | PyDoc_STRVAR_shared(_Py_capitalize__doc__, |
356 | | "B.capitalize() -> copy of B\n\ |
357 | | \n\ |
358 | | Return a copy of B with only its first character capitalized (ASCII)\n\ |
359 | | and the rest lower-cased."); |
360 | | |
361 | | void |
362 | | _Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len) |
363 | 0 | { |
364 | 0 | if (len > 0) { |
365 | 0 | *result = Py_TOUPPER(*s); |
366 | 0 | _Py_bytes_lower(result + 1, s + 1, len - 1); |
367 | 0 | } |
368 | 0 | } |
369 | | |
370 | | |
371 | | PyDoc_STRVAR_shared(_Py_swapcase__doc__, |
372 | | "B.swapcase() -> copy of B\n\ |
373 | | \n\ |
374 | | Return a copy of B with uppercase ASCII characters converted\n\ |
375 | | to lowercase ASCII and vice versa."); |
376 | | |
377 | | void |
378 | | _Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len) |
379 | 0 | { |
380 | 0 | Py_ssize_t i; |
381 | |
|
382 | 0 | for (i = 0; i < len; i++) { |
383 | 0 | int c = Py_CHARMASK(*s++); |
384 | 0 | if (Py_ISLOWER(c)) { |
385 | 0 | *result = Py_TOUPPER(c); |
386 | 0 | } |
387 | 0 | else if (Py_ISUPPER(c)) { |
388 | 0 | *result = Py_TOLOWER(c); |
389 | 0 | } |
390 | 0 | else |
391 | 0 | *result = c; |
392 | 0 | result++; |
393 | 0 | } |
394 | 0 | } |
395 | | |
396 | | |
397 | | PyDoc_STRVAR_shared(_Py_maketrans__doc__, |
398 | | "B.maketrans(frm, to) -> translation table\n\ |
399 | | \n\ |
400 | | Return a translation table (a bytes object of length 256) suitable\n\ |
401 | | for use in the bytes or bytearray translate method where each byte\n\ |
402 | | in frm is mapped to the byte at the same position in to.\n\ |
403 | | The bytes objects frm and to must be of the same length."); |
404 | | |
405 | | PyObject * |
406 | | _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to) |
407 | 0 | { |
408 | 0 | PyObject *res = NULL; |
409 | 0 | Py_ssize_t i; |
410 | 0 | char *p; |
411 | |
|
412 | 0 | if (frm->len != to->len) { |
413 | 0 | PyErr_Format(PyExc_ValueError, |
414 | 0 | "maketrans arguments must have same length"); |
415 | 0 | return NULL; |
416 | 0 | } |
417 | 0 | res = PyBytes_FromStringAndSize(NULL, 256); |
418 | 0 | if (!res) |
419 | 0 | return NULL; |
420 | 0 | p = PyBytes_AS_STRING(res); |
421 | 0 | for (i = 0; i < 256; i++) |
422 | 0 | p[i] = (char) i; |
423 | 0 | for (i = 0; i < frm->len; i++) { |
424 | 0 | p[((unsigned char *)frm->buf)[i]] = ((char *)to->buf)[i]; |
425 | 0 | } |
426 | |
|
427 | 0 | return res; |
428 | 0 | } |
429 | | |
430 | 0 | #define FASTSEARCH fastsearch |
431 | 0 | #define STRINGLIB(F) stringlib_##F |
432 | | #define STRINGLIB_CHAR char |
433 | | #define STRINGLIB_SIZEOF_CHAR 1 |
434 | | |
435 | | #include "stringlib/fastsearch.h" |
436 | | #include "stringlib/count.h" |
437 | | #include "stringlib/find.h" |
438 | | |
439 | | /* |
440 | | Wraps stringlib_parse_args_finds() and additionally checks the first |
441 | | argument type. |
442 | | |
443 | | In case the first argument is a bytes-like object, sets it to subobj, |
444 | | and doesn't touch the byte parameter. |
445 | | In case it is an integer in range(0, 256), writes the integer value |
446 | | to byte, and sets subobj to NULL. |
447 | | |
448 | | The other parameters are similar to those of |
449 | | stringlib_parse_args_finds(). |
450 | | */ |
451 | | |
452 | | Py_LOCAL_INLINE(int) |
453 | | parse_args_finds_byte(const char *function_name, PyObject *args, |
454 | | PyObject **subobj, char *byte, |
455 | | Py_ssize_t *start, Py_ssize_t *end) |
456 | 88 | { |
457 | 88 | PyObject *tmp_subobj; |
458 | 88 | Py_ssize_t ival; |
459 | | |
460 | 88 | if(!stringlib_parse_args_finds(function_name, args, &tmp_subobj, |
461 | 88 | start, end)) |
462 | 0 | return 0; |
463 | | |
464 | 88 | if (PyObject_CheckBuffer(tmp_subobj)) { |
465 | 0 | *subobj = tmp_subobj; |
466 | 0 | return 1; |
467 | 0 | } |
468 | | |
469 | 88 | if (!PyIndex_Check(tmp_subobj)) { |
470 | 0 | PyErr_Format(PyExc_TypeError, |
471 | 0 | "argument should be integer or bytes-like object, " |
472 | 0 | "not '%.200s'", |
473 | 0 | Py_TYPE(tmp_subobj)->tp_name); |
474 | 0 | return 0; |
475 | 0 | } |
476 | | |
477 | 88 | ival = PyNumber_AsSsize_t(tmp_subobj, NULL); |
478 | 88 | if (ival == -1 && PyErr_Occurred()) { |
479 | 0 | return 0; |
480 | 0 | } |
481 | 88 | if (ival < 0 || ival > 255) { |
482 | 0 | PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)"); |
483 | 0 | return 0; |
484 | 0 | } |
485 | | |
486 | 88 | *subobj = NULL; |
487 | 88 | *byte = (char)ival; |
488 | 88 | return 1; |
489 | 88 | } |
490 | | |
491 | | /* helper macro to fixup start/end slice values */ |
492 | | #define ADJUST_INDICES(start, end, len) \ |
493 | 89 | if (end > len) \ |
494 | 89 | end = len; \ |
495 | 89 | else if (end < 0) { \ |
496 | 0 | end += len; \ |
497 | 0 | if (end < 0) \ |
498 | 0 | end = 0; \ |
499 | 0 | } \ |
500 | 89 | if (start < 0) { \ |
501 | 0 | start += len; \ |
502 | 0 | if (start < 0) \ |
503 | 0 | start = 0; \ |
504 | 0 | } |
505 | | |
506 | | Py_LOCAL_INLINE(Py_ssize_t) |
507 | | find_internal(const char *str, Py_ssize_t len, |
508 | | const char *function_name, PyObject *args, int dir) |
509 | 88 | { |
510 | 88 | PyObject *subobj; |
511 | 88 | char byte; |
512 | 88 | Py_buffer subbuf; |
513 | 88 | const char *sub; |
514 | 88 | Py_ssize_t sub_len; |
515 | 88 | Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; |
516 | 88 | Py_ssize_t res; |
517 | | |
518 | 88 | if (!parse_args_finds_byte(function_name, args, |
519 | 88 | &subobj, &byte, &start, &end)) |
520 | 0 | return -2; |
521 | | |
522 | 88 | if (subobj) { |
523 | 0 | if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0) |
524 | 0 | return -2; |
525 | | |
526 | 0 | sub = subbuf.buf; |
527 | 0 | sub_len = subbuf.len; |
528 | 0 | } |
529 | 88 | else { |
530 | 88 | sub = &byte; |
531 | 88 | sub_len = 1; |
532 | 88 | } |
533 | | |
534 | 88 | ADJUST_INDICES(start, end, len); |
535 | 88 | if (end - start < sub_len) |
536 | 0 | res = -1; |
537 | 88 | else if (sub_len == 1) { |
538 | 88 | if (dir > 0) |
539 | 88 | res = stringlib_find_char( |
540 | 88 | str + start, end - start, |
541 | 88 | *sub); |
542 | 0 | else |
543 | 0 | res = stringlib_rfind_char( |
544 | 0 | str + start, end - start, |
545 | 0 | *sub); |
546 | 88 | if (res >= 0) |
547 | 71 | res += start; |
548 | 88 | } |
549 | 0 | else { |
550 | 0 | if (dir > 0) |
551 | 0 | res = stringlib_find_slice( |
552 | 0 | str, len, |
553 | 0 | sub, sub_len, start, end); |
554 | 0 | else |
555 | 0 | res = stringlib_rfind_slice( |
556 | 0 | str, len, |
557 | 0 | sub, sub_len, start, end); |
558 | 0 | } |
559 | | |
560 | 88 | if (subobj) |
561 | 0 | PyBuffer_Release(&subbuf); |
562 | | |
563 | 88 | return res; |
564 | 88 | } |
565 | | |
566 | | PyDoc_STRVAR_shared(_Py_find__doc__, |
567 | | "B.find(sub[, start[, end]]) -> int\n\ |
568 | | \n\ |
569 | | Return the lowest index in B where subsection sub is found,\n\ |
570 | | such that sub is contained within B[start,end]. Optional\n\ |
571 | | arguments start and end are interpreted as in slice notation.\n\ |
572 | | \n\ |
573 | | Return -1 on failure."); |
574 | | |
575 | | PyObject * |
576 | | _Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args) |
577 | 88 | { |
578 | 88 | Py_ssize_t result = find_internal(str, len, "find", args, +1); |
579 | 88 | if (result == -2) |
580 | 0 | return NULL; |
581 | 88 | return PyLong_FromSsize_t(result); |
582 | 88 | } |
583 | | |
584 | | PyDoc_STRVAR_shared(_Py_index__doc__, |
585 | | "B.index(sub[, start[, end]]) -> int\n\ |
586 | | \n\ |
587 | | Return the lowest index in B where subsection sub is found,\n\ |
588 | | such that sub is contained within B[start,end]. Optional\n\ |
589 | | arguments start and end are interpreted as in slice notation.\n\ |
590 | | \n\ |
591 | | Raises ValueError when the subsection is not found."); |
592 | | |
593 | | PyObject * |
594 | | _Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args) |
595 | 0 | { |
596 | 0 | Py_ssize_t result = find_internal(str, len, "index", args, +1); |
597 | 0 | if (result == -2) |
598 | 0 | return NULL; |
599 | 0 | if (result == -1) { |
600 | 0 | PyErr_SetString(PyExc_ValueError, |
601 | 0 | "subsection not found"); |
602 | 0 | return NULL; |
603 | 0 | } |
604 | 0 | return PyLong_FromSsize_t(result); |
605 | 0 | } |
606 | | |
607 | | PyDoc_STRVAR_shared(_Py_rfind__doc__, |
608 | | "B.rfind(sub[, start[, end]]) -> int\n\ |
609 | | \n\ |
610 | | Return the highest index in B where subsection sub is found,\n\ |
611 | | such that sub is contained within B[start,end]. Optional\n\ |
612 | | arguments start and end are interpreted as in slice notation.\n\ |
613 | | \n\ |
614 | | Return -1 on failure."); |
615 | | |
616 | | PyObject * |
617 | | _Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args) |
618 | 0 | { |
619 | 0 | Py_ssize_t result = find_internal(str, len, "rfind", args, -1); |
620 | 0 | if (result == -2) |
621 | 0 | return NULL; |
622 | 0 | return PyLong_FromSsize_t(result); |
623 | 0 | } |
624 | | |
625 | | PyDoc_STRVAR_shared(_Py_rindex__doc__, |
626 | | "B.rindex(sub[, start[, end]]) -> int\n\ |
627 | | \n\ |
628 | | Return the highest index in B where subsection sub is found,\n\ |
629 | | such that sub is contained within B[start,end]. Optional\n\ |
630 | | arguments start and end are interpreted as in slice notation.\n\ |
631 | | \n\ |
632 | | Raise ValueError when the subsection is not found."); |
633 | | |
634 | | PyObject * |
635 | | _Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args) |
636 | 0 | { |
637 | 0 | Py_ssize_t result = find_internal(str, len, "rindex", args, -1); |
638 | 0 | if (result == -2) |
639 | 0 | return NULL; |
640 | 0 | if (result == -1) { |
641 | 0 | PyErr_SetString(PyExc_ValueError, |
642 | 0 | "subsection not found"); |
643 | 0 | return NULL; |
644 | 0 | } |
645 | 0 | return PyLong_FromSsize_t(result); |
646 | 0 | } |
647 | | |
648 | | PyDoc_STRVAR_shared(_Py_count__doc__, |
649 | | "B.count(sub[, start[, end]]) -> int\n\ |
650 | | \n\ |
651 | | Return the number of non-overlapping occurrences of subsection sub in\n\ |
652 | | bytes B[start:end]. Optional arguments start and end are interpreted\n\ |
653 | | as in slice notation."); |
654 | | |
655 | | PyObject * |
656 | | _Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args) |
657 | 0 | { |
658 | 0 | PyObject *sub_obj; |
659 | 0 | const char *sub; |
660 | 0 | Py_ssize_t sub_len; |
661 | 0 | char byte; |
662 | 0 | Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; |
663 | |
|
664 | 0 | Py_buffer vsub; |
665 | 0 | PyObject *count_obj; |
666 | |
|
667 | 0 | if (!parse_args_finds_byte("count", args, |
668 | 0 | &sub_obj, &byte, &start, &end)) |
669 | 0 | return NULL; |
670 | | |
671 | 0 | if (sub_obj) { |
672 | 0 | if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0) |
673 | 0 | return NULL; |
674 | | |
675 | 0 | sub = vsub.buf; |
676 | 0 | sub_len = vsub.len; |
677 | 0 | } |
678 | 0 | else { |
679 | 0 | sub = &byte; |
680 | 0 | sub_len = 1; |
681 | 0 | } |
682 | | |
683 | 0 | ADJUST_INDICES(start, end, len); |
684 | |
|
685 | 0 | count_obj = PyLong_FromSsize_t( |
686 | 0 | stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX) |
687 | 0 | ); |
688 | |
|
689 | 0 | if (sub_obj) |
690 | 0 | PyBuffer_Release(&vsub); |
691 | |
|
692 | 0 | return count_obj; |
693 | 0 | } |
694 | | |
695 | | int |
696 | | _Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg) |
697 | 0 | { |
698 | 0 | Py_ssize_t ival = PyNumber_AsSsize_t(arg, NULL); |
699 | 0 | if (ival == -1 && PyErr_Occurred()) { |
700 | 0 | Py_buffer varg; |
701 | 0 | Py_ssize_t pos; |
702 | 0 | PyErr_Clear(); |
703 | 0 | if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0) |
704 | 0 | return -1; |
705 | 0 | pos = stringlib_find(str, len, |
706 | 0 | varg.buf, varg.len, 0); |
707 | 0 | PyBuffer_Release(&varg); |
708 | 0 | return pos >= 0; |
709 | 0 | } |
710 | 0 | if (ival < 0 || ival >= 256) { |
711 | 0 | PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)"); |
712 | 0 | return -1; |
713 | 0 | } |
714 | | |
715 | 0 | return memchr(str, (int) ival, len) != NULL; |
716 | 0 | } |
717 | | |
718 | | |
719 | | /* Matches the end (direction >= 0) or start (direction < 0) of the buffer |
720 | | * against substr, using the start and end arguments. Returns |
721 | | * -1 on error, 0 if not found and 1 if found. |
722 | | */ |
723 | | static int |
724 | | tailmatch(const char *str, Py_ssize_t len, PyObject *substr, |
725 | | Py_ssize_t start, Py_ssize_t end, int direction) |
726 | 1 | { |
727 | 1 | Py_buffer sub_view = {NULL, NULL}; |
728 | 1 | const char *sub; |
729 | 1 | Py_ssize_t slen; |
730 | | |
731 | 1 | if (PyBytes_Check(substr)) { |
732 | 1 | sub = PyBytes_AS_STRING(substr); |
733 | 1 | slen = PyBytes_GET_SIZE(substr); |
734 | 1 | } |
735 | 0 | else { |
736 | 0 | if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0) |
737 | 0 | return -1; |
738 | 0 | sub = sub_view.buf; |
739 | 0 | slen = sub_view.len; |
740 | 0 | } |
741 | | |
742 | 1 | ADJUST_INDICES(start, end, len); |
743 | | |
744 | 1 | if (direction < 0) { |
745 | | /* startswith */ |
746 | 1 | if (start > len - slen) |
747 | 0 | goto notfound; |
748 | 1 | } else { |
749 | | /* endswith */ |
750 | 0 | if (end - start < slen || start > len) |
751 | 0 | goto notfound; |
752 | | |
753 | 0 | if (end - slen > start) |
754 | 0 | start = end - slen; |
755 | 0 | } |
756 | 1 | if (end - start < slen) |
757 | 0 | goto notfound; |
758 | 1 | if (memcmp(str + start, sub, slen) != 0) |
759 | 1 | goto notfound; |
760 | | |
761 | 0 | PyBuffer_Release(&sub_view); |
762 | 0 | return 1; |
763 | | |
764 | 1 | notfound: |
765 | 1 | PyBuffer_Release(&sub_view); |
766 | 1 | return 0; |
767 | 1 | } |
768 | | |
769 | | static PyObject * |
770 | | _Py_bytes_tailmatch(const char *str, Py_ssize_t len, |
771 | | const char *function_name, PyObject *args, |
772 | | int direction) |
773 | 1 | { |
774 | 1 | Py_ssize_t start = 0; |
775 | 1 | Py_ssize_t end = PY_SSIZE_T_MAX; |
776 | 1 | PyObject *subobj; |
777 | 1 | int result; |
778 | | |
779 | 1 | if (!stringlib_parse_args_finds(function_name, args, &subobj, &start, &end)) |
780 | 0 | return NULL; |
781 | 1 | if (PyTuple_Check(subobj)) { |
782 | 0 | Py_ssize_t i; |
783 | 0 | for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { |
784 | 0 | result = tailmatch(str, len, PyTuple_GET_ITEM(subobj, i), |
785 | 0 | start, end, direction); |
786 | 0 | if (result == -1) |
787 | 0 | return NULL; |
788 | 0 | else if (result) { |
789 | 0 | Py_RETURN_TRUE; |
790 | 0 | } |
791 | 0 | } |
792 | 0 | Py_RETURN_FALSE; |
793 | 0 | } |
794 | 1 | result = tailmatch(str, len, subobj, start, end, direction); |
795 | 1 | if (result == -1) { |
796 | 0 | if (PyErr_ExceptionMatches(PyExc_TypeError)) |
797 | 0 | PyErr_Format(PyExc_TypeError, |
798 | 0 | "%s first arg must be bytes or a tuple of bytes, " |
799 | 0 | "not %s", |
800 | 0 | function_name, Py_TYPE(subobj)->tp_name); |
801 | 0 | return NULL; |
802 | 0 | } |
803 | 1 | else |
804 | 1 | return PyBool_FromLong(result); |
805 | 1 | } |
806 | | |
807 | | PyDoc_STRVAR_shared(_Py_startswith__doc__, |
808 | | "B.startswith(prefix[, start[, end]]) -> bool\n\ |
809 | | \n\ |
810 | | Return True if B starts with the specified prefix, False otherwise.\n\ |
811 | | With optional start, test B beginning at that position.\n\ |
812 | | With optional end, stop comparing B at that position.\n\ |
813 | | prefix can also be a tuple of bytes to try."); |
814 | | |
815 | | PyObject * |
816 | | _Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *args) |
817 | 1 | { |
818 | 1 | return _Py_bytes_tailmatch(str, len, "startswith", args, -1); |
819 | 1 | } |
820 | | |
821 | | PyDoc_STRVAR_shared(_Py_endswith__doc__, |
822 | | "B.endswith(suffix[, start[, end]]) -> bool\n\ |
823 | | \n\ |
824 | | Return True if B ends with the specified suffix, False otherwise.\n\ |
825 | | With optional start, test B beginning at that position.\n\ |
826 | | With optional end, stop comparing B at that position.\n\ |
827 | | suffix can also be a tuple of bytes to try."); |
828 | | |
829 | | PyObject * |
830 | | _Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *args) |
831 | 0 | { |
832 | 0 | return _Py_bytes_tailmatch(str, len, "endswith", args, +1); |
833 | 0 | } |