Coverage Report

Created: 2026-05-30 06:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/bytesobject.c
Line
Count
Source
1
/* bytes object implementation */
2
3
#include "Python.h"
4
#include "pycore_abstract.h"      // _PyIndex_Check()
5
#include "pycore_bytes_methods.h" // _Py_bytes_startswith()
6
#include "pycore_bytesobject.h"   // _PyBytes_Find(), _PyBytes_RepeatBuffer()
7
#include "pycore_call.h"          // _PyObject_CallNoArgs()
8
#include "pycore_ceval.h"         // _PyEval_GetBuiltin()
9
#include "pycore_format.h"        // F_LJUST
10
#include "pycore_freelist.h"      // _Py_FREELIST_FREE()
11
#include "pycore_global_objects.h"// _Py_GET_GLOBAL_OBJECT()
12
#include "pycore_initconfig.h"    // _PyStatus_OK()
13
#include "pycore_long.h"          // _PyLong_DigitValue
14
#include "pycore_list.h"          // _PyList_GetItemRef
15
#include "pycore_object.h"        // _PyObject_GC_TRACK
16
#include "pycore_pymem.h"         // PYMEM_CLEANBYTE
17
#include "pycore_strhex.h"        // _Py_strhex_with_sep()
18
#include "pycore_unicodeobject.h" // _PyUnicode_FormatLong()
19
20
#include <stddef.h>
21
22
/*[clinic input]
23
class bytes "PyBytesObject *" "&PyBytes_Type"
24
[clinic start generated code]*/
25
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
26
27
#include "clinic/bytesobject.c.h"
28
29
284M
#define PyBytesObject_SIZE _PyBytesObject_SIZE
30
31
/* Forward declaration */
32
static void* _PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer,
33
                                                   Py_ssize_t size, void *data);
34
static Py_ssize_t _PyBytesWriter_GetAllocated(PyBytesWriter *writer);
35
36
37
40.4M
#define CHARACTERS _Py_SINGLETON(bytes_characters)
38
#define CHARACTER(ch) \
39
40.4M
     ((PyBytesObject *)&(CHARACTERS[ch]));
40
7.91M
#define EMPTY (&_Py_SINGLETON(bytes_empty))
41
42
43
// Return a reference to the immortal empty bytes string singleton.
44
static inline PyObject* bytes_get_empty(void)
45
7.91M
{
46
7.91M
    PyObject *empty = &EMPTY->ob_base.ob_base;
47
7.91M
    assert(_Py_IsImmortal(empty));
48
7.91M
    return empty;
49
7.91M
}
50
51
52
static inline void
53
set_ob_shash(PyBytesObject *a, Py_hash_t hash)
54
187M
{
55
187M
_Py_COMP_DIAG_PUSH
56
187M
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
57
#ifdef Py_GIL_DISABLED
58
    _Py_atomic_store_ssize_relaxed(&a->ob_shash, hash);
59
#else
60
187M
    a->ob_shash = hash;
61
187M
#endif
62
187M
_Py_COMP_DIAG_POP
63
187M
}
64
65
static inline Py_hash_t
66
get_ob_shash(PyBytesObject *a)
67
75.0M
{
68
75.0M
_Py_COMP_DIAG_PUSH
69
75.0M
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
70
#ifdef Py_GIL_DISABLED
71
    return _Py_atomic_load_ssize_relaxed(&a->ob_shash);
72
#else
73
75.0M
    return a->ob_shash;
74
75.0M
#endif
75
75.0M
_Py_COMP_DIAG_POP
76
75.0M
}
77
78
79
/*
80
   For PyBytes_FromString(), the parameter 'str' points to a null-terminated
81
   string containing exactly 'size' bytes.
82
83
   For PyBytes_FromStringAndSize(), the parameter 'str' is
84
   either NULL or else points to a string containing at least 'size' bytes.
85
   For PyBytes_FromStringAndSize(), the string in the 'str' parameter does
86
   not have to be null-terminated.  (Therefore it is safe to construct a
87
   substring by calling 'PyBytes_FromStringAndSize(origstring, substrlen)'.)
88
   If 'str' is NULL then PyBytes_FromStringAndSize() will allocate 'size+1'
89
   bytes (setting the last byte to the null terminating character) and you can
90
   fill in the data yourself.  If 'str' is non-NULL then the resulting
91
   PyBytes object must be treated as immutable and you must not fill in nor
92
   alter the data yourself, since the strings may be shared.
93
94
   The PyObject member 'op->ob_size', which denotes the number of "extra
95
   items" in a variable-size object, will contain the number of bytes
96
   allocated for string data, not counting the null terminating character.
97
   It is therefore equal to the 'size' parameter (for
98
   PyBytes_FromStringAndSize()) or the length of the string in the 'str'
99
   parameter (for PyBytes_FromString()).
100
*/
101
static PyObject *
102
_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
103
140M
{
104
140M
    PyBytesObject *op;
105
140M
    assert(size >= 0);
106
107
140M
    if (size == 0) {
108
0
        return bytes_get_empty();
109
0
    }
110
111
140M
    if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
112
0
        PyErr_SetString(PyExc_OverflowError,
113
0
                        "byte string is too large");
114
0
        return NULL;
115
0
    }
116
117
    /* Inline PyObject_NewVar */
118
140M
    if (use_calloc)
119
0
        op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
120
140M
    else
121
140M
        op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
122
140M
    if (op == NULL) {
123
0
        return PyErr_NoMemory();
124
0
    }
125
140M
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
126
140M
    set_ob_shash(op, -1);
127
140M
    if (!use_calloc) {
128
140M
        op->ob_sval[size] = '\0';
129
140M
    }
130
140M
    return (PyObject *) op;
131
140M
}
132
133
PyObject *
134
PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
135
169M
{
136
169M
    PyBytesObject *op;
137
169M
    if (size < 0) {
138
0
        PyErr_SetString(PyExc_SystemError,
139
0
            "Negative size passed to PyBytes_FromStringAndSize");
140
0
        return NULL;
141
0
    }
142
169M
    if (size == 1 && str != NULL) {
143
40.4M
        op = CHARACTER(*str & 255);
144
40.4M
        assert(_Py_IsImmortal(op));
145
40.4M
        return (PyObject *)op;
146
40.4M
    }
147
129M
    if (size == 0) {
148
7.86M
        return bytes_get_empty();
149
7.86M
    }
150
151
121M
    op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
152
121M
    if (op == NULL)
153
0
        return NULL;
154
121M
    if (str == NULL)
155
9.69M
        return (PyObject *) op;
156
157
111M
    memcpy(op->ob_sval, str, size);
158
111M
    return (PyObject *) op;
159
121M
}
160
161
PyObject *
162
PyBytes_FromString(const char *str)
163
90
{
164
90
    size_t size;
165
90
    PyBytesObject *op;
166
167
90
    assert(str != NULL);
168
90
    size = strlen(str);
169
90
    if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
170
0
        PyErr_SetString(PyExc_OverflowError,
171
0
            "byte string is too long");
172
0
        return NULL;
173
0
    }
174
175
90
    if (size == 0) {
176
0
        return bytes_get_empty();
177
0
    }
178
90
    else if (size == 1) {
179
0
        op = CHARACTER(*str & 255);
180
0
        assert(_Py_IsImmortal(op));
181
0
        return (PyObject *)op;
182
0
    }
183
184
    /* Inline PyObject_NewVar */
185
90
    op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
186
90
    if (op == NULL) {
187
0
        return PyErr_NoMemory();
188
0
    }
189
90
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
190
90
    set_ob_shash(op, -1);
191
90
    memcpy(op->ob_sval, str, size+1);
192
90
    return (PyObject *) op;
193
90
}
194
195
196
static char*
197
bytes_fromformat(PyBytesWriter *writer, Py_ssize_t writer_pos,
198
                 const char *format, va_list vargs)
199
0
{
200
0
    const char *f;
201
0
    const char *p;
202
0
    Py_ssize_t prec;
203
0
    int longflag;
204
0
    int size_tflag;
205
    /* Longest 64-bit formatted numbers:
206
       - "18446744073709551615\0" (21 bytes)
207
       - "-9223372036854775808\0" (21 bytes)
208
       Decimal takes the most space (it isn't enough for octal.)
209
210
       Longest 64-bit pointer representation:
211
       "0xffffffffffffffff\0" (19 bytes). */
212
0
    char buffer[21];
213
214
0
    char *s = (char*)PyBytesWriter_GetData(writer) + writer_pos;
215
216
0
#define WRITE_BYTES_LEN(str, len_expr) \
217
0
    do { \
218
0
        size_t len = (len_expr); \
219
0
        s = PyBytesWriter_GrowAndUpdatePointer(writer, len, s); \
220
0
        if (s == NULL) { \
221
0
            goto error; \
222
0
        } \
223
0
        memcpy(s, (str), len); \
224
0
        s += len; \
225
0
    } while (0)
226
0
#define WRITE_BYTES(str) WRITE_BYTES_LEN(str, strlen(str))
227
228
0
    for (f = format; *f; f++) {
229
0
        if (*f != '%') {
230
0
            *s++ = *f;
231
0
            continue;
232
0
        }
233
234
0
        p = f++;
235
236
        /* ignore the width (ex: 10 in "%10s") */
237
0
        while (Py_ISDIGIT(*f))
238
0
            f++;
239
240
        /* parse the precision (ex: 10 in "%.10s") */
241
0
        prec = 0;
242
0
        if (*f == '.') {
243
0
            f++;
244
0
            for (; Py_ISDIGIT(*f); f++) {
245
0
                prec = (prec * 10) + (*f - '0');
246
0
            }
247
0
        }
248
249
0
        while (*f && *f != '%' && !Py_ISALPHA(*f))
250
0
            f++;
251
252
        /* handle the long flag ('l'), but only for %ld and %lu.
253
           others can be added when necessary. */
254
0
        longflag = 0;
255
0
        if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
256
0
            longflag = 1;
257
0
            ++f;
258
0
        }
259
260
        /* handle the size_t flag ('z'). */
261
0
        size_tflag = 0;
262
0
        if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
263
0
            size_tflag = 1;
264
0
            ++f;
265
0
        }
266
267
0
        switch (*f) {
268
0
        case 'c':
269
0
        {
270
0
            int c = va_arg(vargs, int);
271
0
            if (c < 0 || c > 255) {
272
0
                PyErr_SetString(PyExc_OverflowError,
273
0
                                "PyBytes_FromFormatV(): %c format "
274
0
                                "expects an integer in range [0; 255]");
275
0
                goto error;
276
0
            }
277
0
            *s++ = (unsigned char)c;
278
0
            break;
279
0
        }
280
281
0
        case 'd':
282
0
            if (longflag) {
283
0
                sprintf(buffer, "%ld", va_arg(vargs, long));
284
0
            }
285
0
            else if (size_tflag) {
286
0
                sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
287
0
            }
288
0
            else {
289
0
                sprintf(buffer, "%d", va_arg(vargs, int));
290
0
            }
291
0
            assert(strlen(buffer) < sizeof(buffer));
292
0
            WRITE_BYTES(buffer);
293
0
            break;
294
295
0
        case 'u':
296
0
            if (longflag) {
297
0
                sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
298
0
            }
299
0
            else if (size_tflag) {
300
0
                sprintf(buffer, "%zu", va_arg(vargs, size_t));
301
0
            }
302
0
            else {
303
0
                sprintf(buffer, "%u", va_arg(vargs, unsigned int));
304
0
            }
305
0
            assert(strlen(buffer) < sizeof(buffer));
306
0
            WRITE_BYTES(buffer);
307
0
            break;
308
309
0
        case 'i':
310
0
            sprintf(buffer, "%i", va_arg(vargs, int));
311
0
            assert(strlen(buffer) < sizeof(buffer));
312
0
            WRITE_BYTES(buffer);
313
0
            break;
314
315
0
        case 'x':
316
0
            sprintf(buffer, "%x", va_arg(vargs, int));
317
0
            assert(strlen(buffer) < sizeof(buffer));
318
0
            WRITE_BYTES(buffer);
319
0
            break;
320
321
0
        case 's':
322
0
        {
323
0
            Py_ssize_t i;
324
325
0
            p = va_arg(vargs, const char*);
326
0
            if (prec <= 0) {
327
0
                i = strlen(p);
328
0
            }
329
0
            else {
330
0
                i = 0;
331
0
                while (i < prec && p[i]) {
332
0
                    i++;
333
0
                }
334
0
            }
335
0
            WRITE_BYTES_LEN(p, i);
336
0
            break;
337
0
        }
338
339
0
        case 'p':
340
0
            sprintf(buffer, "%p", va_arg(vargs, void*));
341
0
            assert(strlen(buffer) < sizeof(buffer));
342
            /* %p is ill-defined:  ensure leading 0x. */
343
0
            if (buffer[1] == 'X')
344
0
                buffer[1] = 'x';
345
0
            else if (buffer[1] != 'x') {
346
0
                memmove(buffer+2, buffer, strlen(buffer)+1);
347
0
                buffer[0] = '0';
348
0
                buffer[1] = 'x';
349
0
            }
350
0
            WRITE_BYTES(buffer);
351
0
            break;
352
353
0
        case '%':
354
0
            *s++ = '%';
355
0
            break;
356
357
0
        default:
358
            /* invalid format string: copy unformatted string and exit */
359
0
            WRITE_BYTES(p);
360
0
            return s;
361
0
        }
362
0
    }
363
364
0
#undef WRITE_BYTES
365
0
#undef WRITE_BYTES_LEN
366
367
0
    return s;
368
369
0
 error:
370
0
    return NULL;
371
0
}
372
373
374
PyObject *
375
PyBytes_FromFormatV(const char *format, va_list vargs)
376
0
{
377
0
    Py_ssize_t alloc = strlen(format);
378
0
    PyBytesWriter *writer = PyBytesWriter_Create(alloc);
379
0
    if (writer == NULL) {
380
0
        return NULL;
381
0
    }
382
383
0
    char *s = bytes_fromformat(writer, 0, format, vargs);
384
0
    if (s == NULL) {
385
0
        PyBytesWriter_Discard(writer);
386
0
        return NULL;
387
0
    }
388
389
0
    return PyBytesWriter_FinishWithPointer(writer, s);
390
0
}
391
392
393
PyObject *
394
PyBytes_FromFormat(const char *format, ...)
395
0
{
396
0
    PyObject* ret;
397
0
    va_list vargs;
398
399
0
    va_start(vargs, format);
400
0
    ret = PyBytes_FromFormatV(format, vargs);
401
0
    va_end(vargs);
402
0
    return ret;
403
0
}
404
405
406
/* Helpers for formatstring */
407
408
0
#define FORMAT_ERROR(EXC, FMT, ...) do {                                    \
409
0
    if (key != NULL) {                                                      \
410
0
        PyErr_Format((EXC), "format argument %R: " FMT,                     \
411
0
                     key, __VA_ARGS__);                                     \
412
0
    }                                                                       \
413
0
    else if (argidx >= 0) {                                                 \
414
0
        PyErr_Format((EXC), "format argument %zd: " FMT,                    \
415
0
                     argidx, __VA_ARGS__);                                  \
416
0
    }                                                                       \
417
0
    else {                                                                  \
418
0
        PyErr_Format((EXC), "format argument: " FMT, __VA_ARGS__);          \
419
0
    }                                                                       \
420
0
} while (0)
421
422
Py_LOCAL_INLINE(PyObject *)
423
getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx, int allowone)
424
0
{
425
0
    Py_ssize_t argidx = *p_argidx;
426
0
    if (argidx < arglen) {
427
0
        (*p_argidx)++;
428
0
        if (arglen >= 0) {
429
0
            return PyTuple_GetItem(args, argidx);
430
0
        }
431
0
        else if (allowone) {
432
0
            return args;
433
0
        }
434
0
    }
435
0
    PyErr_Format(PyExc_TypeError,
436
0
                 "not enough arguments for format string (got %zd)",
437
0
                 arglen < 0 ? 1 : arglen);
438
0
    return NULL;
439
0
}
440
441
/* Returns a new reference to a PyBytes object, or NULL on failure. */
442
443
static char*
444
formatfloat(PyObject *v, Py_ssize_t argidx, PyObject *key,
445
            int flags, int prec, int type,
446
            PyObject **p_result, PyBytesWriter *writer, char *str)
447
0
{
448
0
    char *p;
449
0
    PyObject *result;
450
0
    double x;
451
0
    size_t len;
452
0
    int dtoa_flags = 0;
453
454
0
    x = PyFloat_AsDouble(v);
455
0
    if (x == -1.0 && PyErr_Occurred()) {
456
0
        if (PyErr_ExceptionMatches(PyExc_TypeError)) {
457
0
            FORMAT_ERROR(PyExc_TypeError,
458
0
                         "%%%c requires a real number, not %T",
459
0
                         type, v);
460
0
        }
461
0
        return NULL;
462
0
    }
463
464
0
    if (prec < 0)
465
0
        prec = 6;
466
467
0
    if (flags & F_ALT) {
468
0
        dtoa_flags |= Py_DTSF_ALT;
469
0
    }
470
0
    p = PyOS_double_to_string(x, type, prec, dtoa_flags, NULL);
471
472
0
    if (p == NULL)
473
0
        return NULL;
474
475
0
    len = strlen(p);
476
0
    if (writer != NULL) {
477
0
        str = PyBytesWriter_GrowAndUpdatePointer(writer, len, str);
478
0
        if (str == NULL) {
479
0
            PyMem_Free(p);
480
0
            return NULL;
481
0
        }
482
0
        memcpy(str, p, len);
483
0
        PyMem_Free(p);
484
0
        str += len;
485
0
        return str;
486
0
    }
487
488
0
    result = PyBytes_FromStringAndSize(p, len);
489
0
    PyMem_Free(p);
490
0
    *p_result = result;
491
0
    return result != NULL ? str : NULL;
492
0
}
493
494
static PyObject *
495
formatlong(PyObject *v, Py_ssize_t argidx, PyObject *key,
496
           int flags, int prec, int type)
497
0
{
498
0
    PyObject *result, *iobj;
499
0
    if (PyLong_Check(v))
500
0
        return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
501
0
    if (PyNumber_Check(v)) {
502
        /* make sure number is a type of integer for o, x, and X */
503
0
        if (type == 'o' || type == 'x' || type == 'X')
504
0
            iobj = _PyNumber_Index(v);
505
0
        else
506
0
            iobj = PyNumber_Long(v);
507
0
        if (iobj != NULL) {
508
0
            assert(PyLong_Check(iobj));
509
0
            result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
510
0
            Py_DECREF(iobj);
511
0
            return result;
512
0
        }
513
0
        if (!PyErr_ExceptionMatches(PyExc_TypeError))
514
0
            return NULL;
515
0
    }
516
0
    FORMAT_ERROR(PyExc_TypeError,
517
0
                 "%%%c requires %s, not %T",
518
0
                 type,
519
0
                 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
520
0
                                                             : "a real number",
521
0
                 v);
522
0
    return NULL;
523
0
}
524
525
static int
526
byte_converter(PyObject *arg, Py_ssize_t argidx, PyObject *key, char *p)
527
0
{
528
0
    if (PyBytes_Check(arg)) {
529
0
        if (PyBytes_GET_SIZE(arg) != 1) {
530
0
            FORMAT_ERROR(PyExc_TypeError,
531
0
                         "%%c requires an integer in range(256) or "
532
0
                         "a single byte, not a bytes object of length %zd",
533
0
                         PyBytes_GET_SIZE(arg));
534
0
            return 0;
535
0
        }
536
0
        *p = PyBytes_AS_STRING(arg)[0];
537
0
        return 1;
538
0
    }
539
0
    else if (PyByteArray_Check(arg)) {
540
0
        if (PyByteArray_GET_SIZE(arg) != 1) {
541
0
            FORMAT_ERROR(PyExc_TypeError,
542
0
                         "%%c requires an integer in range(256) or "
543
0
                         "a single byte, not a bytearray object of length %zd",
544
0
                         PyByteArray_GET_SIZE(arg));
545
0
            return 0;
546
0
        }
547
0
        *p = PyByteArray_AS_STRING(arg)[0];
548
0
        return 1;
549
0
    }
550
0
    else if (PyIndex_Check(arg)) {
551
0
        int overflow;
552
0
        long ival = PyLong_AsLongAndOverflow(arg, &overflow);
553
0
        if (ival == -1 && PyErr_Occurred()) {
554
0
            return 0;
555
0
        }
556
0
        if (!(0 <= ival && ival <= 255)) {
557
            /* this includes an overflow in converting to C long */
558
0
            FORMAT_ERROR(PyExc_OverflowError,
559
0
                         "%%c argument not in range(256)%s", "");
560
0
            return 0;
561
0
        }
562
0
        *p = (char)ival;
563
0
        return 1;
564
0
    }
565
0
    FORMAT_ERROR(PyExc_TypeError,
566
0
                 "%%c requires an integer in range(256) or "
567
0
                 "a single byte, not %T",
568
0
                 arg);
569
0
    return 0;
570
0
}
571
572
static PyObject *_PyBytes_FromBuffer(PyObject *x);
573
574
static PyObject *
575
format_obj(PyObject *v, Py_ssize_t argidx, PyObject *key,
576
           const char **pbuf, Py_ssize_t *plen)
577
0
{
578
0
    PyObject *func, *result;
579
    /* is it a bytes object? */
580
0
    if (PyBytes_Check(v)) {
581
0
        *pbuf = PyBytes_AS_STRING(v);
582
0
        *plen = PyBytes_GET_SIZE(v);
583
0
        return Py_NewRef(v);
584
0
    }
585
0
    if (PyByteArray_Check(v)) {
586
0
        *pbuf = PyByteArray_AS_STRING(v);
587
0
        *plen = PyByteArray_GET_SIZE(v);
588
0
        return Py_NewRef(v);
589
0
    }
590
    /* does it support __bytes__? */
591
0
    func = _PyObject_LookupSpecial(v, &_Py_ID(__bytes__));
592
0
    if (func != NULL) {
593
0
        result = _PyObject_CallNoArgs(func);
594
0
        Py_DECREF(func);
595
0
        if (result == NULL)
596
0
            return NULL;
597
0
        if (!PyBytes_Check(result)) {
598
0
            PyErr_Format(PyExc_TypeError,
599
0
                         "%T.__bytes__() must return a bytes, not %T",
600
0
                         v, result);
601
0
            Py_DECREF(result);
602
0
            return NULL;
603
0
        }
604
0
        *pbuf = PyBytes_AS_STRING(result);
605
0
        *plen = PyBytes_GET_SIZE(result);
606
0
        return result;
607
0
    }
608
    /* does it support buffer protocol? */
609
0
    if (PyObject_CheckBuffer(v)) {
610
        /* maybe we can avoid making a copy of the buffer object here? */
611
0
        result = _PyBytes_FromBuffer(v);
612
0
        if (result == NULL)
613
0
            return NULL;
614
0
        *pbuf = PyBytes_AS_STRING(result);
615
0
        *plen = PyBytes_GET_SIZE(result);
616
0
        return result;
617
0
    }
618
0
    FORMAT_ERROR(PyExc_TypeError,
619
0
                 "%%b requires a bytes-like object, "
620
0
                 "or an object that implements __bytes__, not %T",
621
0
                 v);
622
0
    return NULL;
623
0
}
624
625
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
626
627
PyObject *
628
_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
629
                  PyObject *args, int use_bytearray)
630
0
{
631
0
    const char *fmt;
632
0
    Py_ssize_t arglen, argidx;
633
0
    Py_ssize_t fmtcnt;
634
0
    int args_owned = 0;
635
0
    PyObject *dict = NULL;
636
0
    PyObject *key = NULL;
637
638
0
    if (args == NULL) {
639
0
        PyErr_BadInternalCall();
640
0
        return NULL;
641
0
    }
642
0
    fmt = format;
643
0
    fmtcnt = format_len;
644
645
0
    PyBytesWriter *writer;
646
0
    if (use_bytearray) {
647
0
        writer = _PyBytesWriter_CreateByteArray(fmtcnt);
648
0
    }
649
0
    else {
650
0
        writer = PyBytesWriter_Create(fmtcnt);
651
0
    }
652
0
    if (writer == NULL) {
653
0
        return NULL;
654
0
    }
655
0
    char *res = PyBytesWriter_GetData(writer);
656
657
0
    if (PyTuple_Check(args)) {
658
0
        arglen = PyTuple_GET_SIZE(args);
659
0
        argidx = 0;
660
0
    }
661
0
    else {
662
0
        arglen = -1;
663
0
        argidx = -2;
664
0
    }
665
0
    if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
666
0
        !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
667
0
        !PyByteArray_Check(args)) {
668
0
            dict = args;
669
0
    }
670
671
0
    while (--fmtcnt >= 0) {
672
0
        if (*fmt != '%') {
673
0
            Py_ssize_t len;
674
0
            char *pos;
675
676
0
            pos = (char *)memchr(fmt + 1, '%', fmtcnt);
677
0
            if (pos != NULL)
678
0
                len = pos - fmt;
679
0
            else
680
0
                len = fmtcnt + 1;
681
0
            assert(len != 0);
682
683
0
            memcpy(res, fmt, len);
684
0
            res += len;
685
0
            fmt += len;
686
0
            fmtcnt -= (len - 1);
687
0
        }
688
0
        else {
689
            /* Got a format specifier */
690
0
            int flags = 0;
691
0
            Py_ssize_t width = -1;
692
0
            int prec = -1;
693
0
            int c = '\0';
694
0
            int fill;
695
0
            PyObject *v = NULL;
696
0
            PyObject *temp = NULL;
697
0
            const char *pbuf = NULL;
698
0
            int sign;
699
0
            Py_ssize_t len = 0;
700
0
            char onechar; /* For byte_converter() */
701
0
            Py_ssize_t alloc;
702
703
0
            fmt++;
704
0
            if (*fmt == '%') {
705
0
                *res++ = '%';
706
0
                fmt++;
707
0
                fmtcnt--;
708
0
                continue;
709
0
            }
710
0
            Py_CLEAR(key);
711
0
            const char *fmtstart = fmt;
712
0
            if (*fmt == '(') {
713
0
                const char *keystart;
714
0
                Py_ssize_t keylen;
715
0
                int pcount = 1;
716
717
0
                if (dict == NULL) {
718
0
                    PyErr_Format(PyExc_TypeError,
719
0
                                 "format requires a mapping, not %T",
720
0
                                 args);
721
0
                    goto error;
722
0
                }
723
0
                ++fmt;
724
0
                --fmtcnt;
725
0
                keystart = fmt;
726
                /* Skip over balanced parentheses */
727
0
                while (pcount > 0 && --fmtcnt >= 0) {
728
0
                    if (*fmt == ')')
729
0
                        --pcount;
730
0
                    else if (*fmt == '(')
731
0
                        ++pcount;
732
0
                    fmt++;
733
0
                }
734
0
                keylen = fmt - keystart - 1;
735
0
                if (fmtcnt < 0 || pcount > 0) {
736
0
                    PyErr_Format(PyExc_ValueError,
737
0
                                 "stray %% or incomplete format key "
738
0
                                 "at position %zd",
739
0
                                 (Py_ssize_t)(fmtstart - format - 1));
740
0
                    goto error;
741
0
                }
742
0
                key = PyBytes_FromStringAndSize(keystart,
743
0
                                                 keylen);
744
0
                if (key == NULL)
745
0
                    goto error;
746
0
                if (args_owned) {
747
0
                    Py_DECREF(args);
748
0
                    args_owned = 0;
749
0
                }
750
0
                args = PyObject_GetItem(dict, key);
751
0
                if (args == NULL) {
752
0
                    goto error;
753
0
                }
754
0
                args_owned = 1;
755
0
                arglen = -3;
756
0
                argidx = -4;
757
0
            }
758
0
            else {
759
0
                if (arglen < -1) {
760
0
                    PyErr_Format(PyExc_ValueError,
761
0
                                 "format requires a parenthesised mapping key "
762
0
                                 "at position %zd",
763
0
                                 (Py_ssize_t)(fmtstart - format - 1));
764
0
                    goto error;
765
0
                }
766
0
            }
767
768
            /* Parse flags. Example: "%+i" => flags=F_SIGN. */
769
0
            while (--fmtcnt >= 0) {
770
0
                switch (c = *fmt++) {
771
0
                case '-': flags |= F_LJUST; continue;
772
0
                case '+': flags |= F_SIGN; continue;
773
0
                case ' ': flags |= F_BLANK; continue;
774
0
                case '#': flags |= F_ALT; continue;
775
0
                case '0': flags |= F_ZERO; continue;
776
0
                }
777
0
                break;
778
0
            }
779
780
            /* Parse width. Example: "%10s" => width=10 */
781
0
            if (c == '*') {
782
0
                if (arglen < -1) {
783
0
                    PyErr_Format(PyExc_ValueError,
784
0
                            "* cannot be used with a parenthesised mapping key "
785
0
                            "at position %zd",
786
0
                            (Py_ssize_t)(fmtstart - format - 1));
787
0
                    goto error;
788
0
                }
789
0
                v = getnextarg(args, arglen, &argidx, 0);
790
0
                if (v == NULL)
791
0
                    goto error;
792
0
                if (!PyLong_Check(v)) {
793
0
                    FORMAT_ERROR(PyExc_TypeError, "* requires int, not %T", v);
794
0
                    goto error;
795
0
                }
796
0
                width = PyLong_AsSsize_t(v);
797
0
                if (width == -1 && PyErr_Occurred()) {
798
0
                    if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
799
0
                        FORMAT_ERROR(PyExc_OverflowError,
800
0
                                     "too big for width%s", "");
801
0
                    }
802
0
                    goto error;
803
0
                }
804
0
                if (width < 0) {
805
0
                    flags |= F_LJUST;
806
0
                    width = -width;
807
0
                }
808
0
                if (--fmtcnt >= 0)
809
0
                    c = *fmt++;
810
0
            }
811
0
            else if (c >= 0 && Py_ISDIGIT(c)) {
812
0
                width = c - '0';
813
0
                while (--fmtcnt >= 0) {
814
0
                    c = Py_CHARMASK(*fmt++);
815
0
                    if (!Py_ISDIGIT(c))
816
0
                        break;
817
0
                    if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
818
0
                        PyErr_Format(PyExc_ValueError,
819
0
                                     "width too big at position %zd",
820
0
                                     (Py_ssize_t)(fmtstart - format - 1));
821
0
                        goto error;
822
0
                    }
823
0
                    width = width*10 + (c - '0');
824
0
                }
825
0
            }
826
827
            /* Parse precision. Example: "%.3f" => prec=3 */
828
0
            if (c == '.') {
829
0
                prec = 0;
830
0
                if (--fmtcnt >= 0)
831
0
                    c = *fmt++;
832
0
                if (c == '*') {
833
0
                    if (arglen < -1) {
834
0
                        PyErr_Format(PyExc_ValueError,
835
0
                                "* cannot be used with a parenthesised mapping key "
836
0
                                "at position %zd",
837
0
                                (Py_ssize_t)(fmtstart - format - 1));
838
0
                        goto error;
839
0
                    }
840
0
                    v = getnextarg(args, arglen, &argidx, 0);
841
0
                    if (v == NULL)
842
0
                        goto error;
843
0
                    if (!PyLong_Check(v)) {
844
0
                        FORMAT_ERROR(PyExc_TypeError,
845
0
                                     "* requires int, not %T", v);
846
0
                        goto error;
847
0
                    }
848
0
                    prec = PyLong_AsInt(v);
849
0
                    if (prec == -1 && PyErr_Occurred()) {
850
0
                        if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
851
0
                            FORMAT_ERROR(PyExc_OverflowError,
852
0
                                         "too big for precision%s", "");
853
0
                        }
854
0
                        goto error;
855
0
                    }
856
0
                    if (prec < 0)
857
0
                        prec = 0;
858
0
                    if (--fmtcnt >= 0)
859
0
                        c = *fmt++;
860
0
                }
861
0
                else if (c >= 0 && Py_ISDIGIT(c)) {
862
0
                    prec = c - '0';
863
0
                    while (--fmtcnt >= 0) {
864
0
                        c = Py_CHARMASK(*fmt++);
865
0
                        if (!Py_ISDIGIT(c))
866
0
                            break;
867
0
                        if (prec > (INT_MAX - ((int)c - '0')) / 10) {
868
0
                            PyErr_Format(PyExc_ValueError,
869
0
                                "precision too big at position %zd",
870
0
                                (Py_ssize_t)(fmtstart - format - 1));
871
0
                            goto error;
872
0
                        }
873
0
                        prec = prec*10 + (c - '0');
874
0
                    }
875
0
                }
876
0
            } /* prec */
877
0
            if (fmtcnt >= 0) {
878
0
                if (c == 'h' || c == 'l' || c == 'L') {
879
0
                    if (--fmtcnt >= 0)
880
0
                        c = *fmt++;
881
0
                }
882
0
            }
883
0
            if (fmtcnt < 0) {
884
0
                PyErr_Format(PyExc_ValueError,
885
0
                             "stray %% at position %zd",
886
0
                             (Py_ssize_t)(fmtstart - format - 1));
887
0
                goto error;
888
0
            }
889
0
            v = getnextarg(args, arglen, &argidx, 1);
890
0
            if (v == NULL)
891
0
                goto error;
892
893
0
            if (fmtcnt == 0) {
894
                /* last write: disable writer overallocation */
895
0
                writer->overallocate = 0;
896
0
            }
897
898
0
            sign = 0;
899
0
            fill = ' ';
900
0
            switch (c) {
901
0
            case 'r':
902
                // %r is only for 2/3 code; 3 only code should use %a
903
0
            case 'a':
904
0
                temp = PyObject_ASCII(v);
905
0
                if (temp == NULL)
906
0
                    goto error;
907
0
                assert(PyUnicode_IS_ASCII(temp));
908
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
909
0
                len = PyUnicode_GET_LENGTH(temp);
910
0
                if (prec >= 0 && len > prec)
911
0
                    len = prec;
912
0
                break;
913
914
0
            case 's':
915
                // %s is only for 2/3 code; 3 only code should use %b
916
0
            case 'b':
917
0
                temp = format_obj(v, argidx, key, &pbuf, &len);
918
0
                if (temp == NULL)
919
0
                    goto error;
920
0
                if (prec >= 0 && len > prec)
921
0
                    len = prec;
922
0
                break;
923
924
0
            case 'i':
925
0
            case 'd':
926
0
            case 'u':
927
0
            case 'o':
928
0
            case 'x':
929
0
            case 'X':
930
0
                if (PyLong_CheckExact(v)
931
0
                    && width == -1 && prec == -1
932
0
                    && !(flags & (F_SIGN | F_BLANK))
933
0
                    && c != 'X')
934
0
                {
935
                    /* Fast path */
936
0
                    int alternate = flags & F_ALT;
937
0
                    int base;
938
939
0
                    switch(c)
940
0
                    {
941
0
                        default:
942
0
                            Py_UNREACHABLE();
943
0
                        case 'd':
944
0
                        case 'i':
945
0
                        case 'u':
946
0
                            base = 10;
947
0
                            break;
948
0
                        case 'o':
949
0
                            base = 8;
950
0
                            break;
951
0
                        case 'x':
952
0
                        case 'X':
953
0
                            base = 16;
954
0
                            break;
955
0
                    }
956
957
                    /* Fast path */
958
0
                    res = _PyLong_FormatBytesWriter(writer, res,
959
0
                                                    v, base, alternate);
960
0
                    if (res == NULL)
961
0
                        goto error;
962
0
                    continue;
963
0
                }
964
965
0
                temp = formatlong(v, argidx, key, flags, prec, c);
966
0
                if (!temp)
967
0
                    goto error;
968
0
                assert(PyUnicode_IS_ASCII(temp));
969
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
970
0
                len = PyUnicode_GET_LENGTH(temp);
971
0
                sign = 1;
972
0
                if (flags & F_ZERO)
973
0
                    fill = '0';
974
0
                break;
975
976
0
            case 'e':
977
0
            case 'E':
978
0
            case 'f':
979
0
            case 'F':
980
0
            case 'g':
981
0
            case 'G':
982
0
                if (width == -1 && prec == -1
983
0
                    && !(flags & (F_SIGN | F_BLANK)))
984
0
                {
985
                    /* Fast path */
986
0
                    res = formatfloat(v, argidx, key, flags, prec, c, NULL, writer, res);
987
0
                    if (res == NULL)
988
0
                        goto error;
989
0
                    continue;
990
0
                }
991
992
0
                if (!formatfloat(v, argidx, key, flags, prec, c, &temp, NULL, res))
993
0
                    goto error;
994
0
                pbuf = PyBytes_AS_STRING(temp);
995
0
                len = PyBytes_GET_SIZE(temp);
996
0
                sign = 1;
997
0
                if (flags & F_ZERO)
998
0
                    fill = '0';
999
0
                break;
1000
1001
0
            case 'c':
1002
0
                pbuf = &onechar;
1003
0
                len = byte_converter(v, argidx, key, &onechar);
1004
0
                if (!len)
1005
0
                    goto error;
1006
0
                if (width == -1) {
1007
                    /* Fast path */
1008
0
                    *res++ = onechar;
1009
0
                    continue;
1010
0
                }
1011
0
                break;
1012
1013
0
            default:
1014
0
                if (Py_ISALPHA(c)) {
1015
0
                    PyErr_Format(PyExc_ValueError,
1016
0
                                 "unsupported format %%%c at position %zd",
1017
0
                                 c, (Py_ssize_t)(fmtstart - format - 1));
1018
0
                }
1019
0
                else if (c == '\'') {
1020
0
                    PyErr_Format(PyExc_ValueError,
1021
0
                                 "stray %% at position %zd or unexpected "
1022
0
                                 "format character \"'\" "
1023
0
                                 "at position %zd",
1024
0
                                 (Py_ssize_t)(fmtstart - format - 1),
1025
0
                                 (Py_ssize_t)(fmt - format - 1));
1026
0
                }
1027
0
                else if (c >= 32 && c < 127 && c != '\'') {
1028
0
                    PyErr_Format(PyExc_ValueError,
1029
0
                                 "stray %% at position %zd or unexpected "
1030
0
                                 "format character '%c' "
1031
0
                                 "at position %zd",
1032
0
                                 (Py_ssize_t)(fmtstart - format - 1),
1033
0
                                 c, (Py_ssize_t)(fmt - format - 1));
1034
0
                }
1035
0
                else {
1036
0
                    PyErr_Format(PyExc_ValueError,
1037
0
                                 "stray %% at position %zd or unexpected "
1038
0
                                 "format character with code 0x%02x "
1039
0
                                 "at position %zd",
1040
0
                                 (Py_ssize_t)(fmtstart - format - 1),
1041
0
                                 Py_CHARMASK(c),
1042
0
                                 (Py_ssize_t)(fmt - format - 1));
1043
0
                }
1044
0
                goto error;
1045
0
            }
1046
1047
0
            if (sign) {
1048
0
                if (*pbuf == '-' || *pbuf == '+') {
1049
0
                    sign = *pbuf++;
1050
0
                    len--;
1051
0
                }
1052
0
                else if (flags & F_SIGN)
1053
0
                    sign = '+';
1054
0
                else if (flags & F_BLANK)
1055
0
                    sign = ' ';
1056
0
                else
1057
0
                    sign = 0;
1058
0
            }
1059
0
            if (width < len)
1060
0
                width = len;
1061
1062
0
            alloc = width;
1063
0
            if (sign != 0 && len == width)
1064
0
                alloc++;
1065
            /* 2: size preallocated for %s */
1066
0
            if (alloc > 2) {
1067
0
                res = PyBytesWriter_GrowAndUpdatePointer(writer, alloc - 2, res);
1068
0
                if (res == NULL) {
1069
0
                    Py_XDECREF(temp);
1070
0
                    goto error;
1071
0
                }
1072
0
            }
1073
#ifndef NDEBUG
1074
            char *before = res;
1075
#endif
1076
1077
            /* Write the sign if needed */
1078
0
            if (sign) {
1079
0
                if (fill != ' ')
1080
0
                    *res++ = sign;
1081
0
                if (width > len)
1082
0
                    width--;
1083
0
            }
1084
1085
            /* Write the numeric prefix for "x", "X" and "o" formats
1086
               if the alternate form is used.
1087
               For example, write "0x" for the "%#x" format. */
1088
0
            if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1089
0
                assert(pbuf[0] == '0');
1090
0
                assert(pbuf[1] == c);
1091
0
                if (fill != ' ') {
1092
0
                    *res++ = *pbuf++;
1093
0
                    *res++ = *pbuf++;
1094
0
                }
1095
0
                width -= 2;
1096
0
                if (width < 0)
1097
0
                    width = 0;
1098
0
                len -= 2;
1099
0
            }
1100
1101
            /* Pad left with the fill character if needed */
1102
0
            if (width > len && !(flags & F_LJUST)) {
1103
0
                memset(res, fill, width - len);
1104
0
                res += (width - len);
1105
0
                width = len;
1106
0
            }
1107
1108
            /* If padding with spaces: write sign if needed and/or numeric
1109
               prefix if the alternate form is used */
1110
0
            if (fill == ' ') {
1111
0
                if (sign)
1112
0
                    *res++ = sign;
1113
0
                if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1114
0
                    assert(pbuf[0] == '0');
1115
0
                    assert(pbuf[1] == c);
1116
0
                    *res++ = *pbuf++;
1117
0
                    *res++ = *pbuf++;
1118
0
                }
1119
0
            }
1120
1121
            /* Copy bytes */
1122
0
            memcpy(res, pbuf, len);
1123
0
            res += len;
1124
1125
            /* Pad right with the fill character if needed */
1126
0
            if (width > len) {
1127
0
                memset(res, ' ', width - len);
1128
0
                res += (width - len);
1129
0
            }
1130
1131
0
            if (dict && (argidx < arglen)) {
1132
                // XXX: Never happens?
1133
0
                PyErr_SetString(PyExc_TypeError,
1134
0
                           "not all arguments converted during bytes formatting");
1135
0
                Py_XDECREF(temp);
1136
0
                goto error;
1137
0
            }
1138
0
            Py_XDECREF(temp);
1139
1140
#ifndef NDEBUG
1141
            /* check that we computed the exact size for this write */
1142
            assert((res - before) == alloc);
1143
#endif
1144
0
        } /* '%' */
1145
1146
        /* If overallocation was disabled, ensure that it was the last
1147
           write. Otherwise, we missed an optimization */
1148
0
        assert(writer->overallocate || fmtcnt == 0 || use_bytearray);
1149
0
    } /* until end */
1150
1151
0
    if (argidx < arglen && !dict) {
1152
0
        PyErr_Format(PyExc_TypeError,
1153
0
                     "not all arguments converted during bytes formatting "
1154
0
                     "(required %zd, got %zd)",
1155
0
                     arglen < 0 ? 0 : argidx,
1156
0
                     arglen < 0 ? 1 : arglen);
1157
0
        goto error;
1158
0
    }
1159
1160
0
    Py_XDECREF(key);
1161
0
    if (args_owned) {
1162
0
        Py_DECREF(args);
1163
0
    }
1164
0
    return PyBytesWriter_FinishWithPointer(writer, res);
1165
1166
0
 error:
1167
0
    Py_XDECREF(key);
1168
0
    PyBytesWriter_Discard(writer);
1169
0
    if (args_owned) {
1170
0
        Py_DECREF(args);
1171
0
    }
1172
0
    return NULL;
1173
0
}
1174
1175
/* Unescape a backslash-escaped string. */
1176
PyObject *_PyBytes_DecodeEscape2(const char *s,
1177
                                Py_ssize_t len,
1178
                                const char *errors,
1179
                                int *first_invalid_escape_char,
1180
                                const char **first_invalid_escape_ptr)
1181
2.62k
{
1182
2.62k
    PyBytesWriter *writer = PyBytesWriter_Create(len);
1183
2.62k
    if (writer == NULL) {
1184
0
        return NULL;
1185
0
    }
1186
2.62k
    char *p = PyBytesWriter_GetData(writer);
1187
1188
2.62k
    *first_invalid_escape_char = -1;
1189
2.62k
    *first_invalid_escape_ptr = NULL;
1190
1191
2.62k
    const char *end = s + len;
1192
67.5k
    while (s < end) {
1193
64.9k
        if (*s != '\\') {
1194
54.6k
            *p++ = *s++;
1195
54.6k
            continue;
1196
54.6k
        }
1197
1198
10.2k
        s++;
1199
10.2k
        if (s == end) {
1200
0
            PyErr_SetString(PyExc_ValueError,
1201
0
                            "Trailing \\ in string");
1202
0
            goto failed;
1203
0
        }
1204
1205
10.2k
        switch (*s++) {
1206
        /* XXX This assumes ASCII! */
1207
727
        case '\n': break;
1208
793
        case '\\': *p++ = '\\'; break;
1209
345
        case '\'': *p++ = '\''; break;
1210
268
        case '\"': *p++ = '\"'; break;
1211
202
        case 'b': *p++ = '\b'; break;
1212
189
        case 'f': *p++ = '\014'; break; /* FF */
1213
295
        case 't': *p++ = '\t'; break;
1214
355
        case 'n': *p++ = '\n'; break;
1215
333
        case 'r': *p++ = '\r'; break;
1216
531
        case 'v': *p++ = '\013'; break; /* VT */
1217
222
        case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1218
2.46k
        case '0': case '1': case '2': case '3':
1219
4.39k
        case '4': case '5': case '6': case '7':
1220
4.39k
        {
1221
4.39k
            int c = s[-1] - '0';
1222
4.39k
            if (s < end && '0' <= *s && *s <= '7') {
1223
1.96k
                c = (c<<3) + *s++ - '0';
1224
1.96k
                if (s < end && '0' <= *s && *s <= '7')
1225
714
                    c = (c<<3) + *s++ - '0';
1226
1.96k
            }
1227
4.39k
            if (c > 0377) {
1228
653
                if (*first_invalid_escape_char == -1) {
1229
268
                    *first_invalid_escape_char = c;
1230
                    /* Back up 3 chars, since we've already incremented s. */
1231
268
                    *first_invalid_escape_ptr = s - 3;
1232
268
                }
1233
653
            }
1234
4.39k
            *p++ = c;
1235
4.39k
            break;
1236
3.97k
        }
1237
321
        case 'x':
1238
321
            if (s+1 < end) {
1239
320
                int digit1, digit2;
1240
320
                digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1241
320
                digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1242
320
                if (digit1 < 16 && digit2 < 16) {
1243
316
                    *p++ = (unsigned char)((digit1 << 4) + digit2);
1244
316
                    s += 2;
1245
316
                    break;
1246
316
                }
1247
320
            }
1248
            /* invalid hexadecimal digits */
1249
1250
5
            if (!errors || strcmp(errors, "strict") == 0) {
1251
5
                PyErr_Format(PyExc_ValueError,
1252
5
                             "invalid \\x escape at position %zd",
1253
5
                             s - 2 - (end - len));
1254
5
                goto failed;
1255
5
            }
1256
0
            if (strcmp(errors, "replace") == 0) {
1257
0
                *p++ = '?';
1258
0
            } else if (strcmp(errors, "ignore") == 0)
1259
0
                /* do nothing */;
1260
0
            else {
1261
0
                PyErr_Format(PyExc_ValueError,
1262
0
                             "decoding error; unknown "
1263
0
                             "error handling code: %.400s",
1264
0
                             errors);
1265
0
                goto failed;
1266
0
            }
1267
            /* skip \x */
1268
0
            if (s < end && Py_ISXDIGIT(s[0]))
1269
0
                s++; /* and a hexdigit */
1270
0
            break;
1271
1272
1.30k
        default:
1273
1.30k
            if (*first_invalid_escape_char == -1) {
1274
684
                *first_invalid_escape_char = (unsigned char)s[-1];
1275
                /* Back up one char, since we've already incremented s. */
1276
684
                *first_invalid_escape_ptr = s - 1;
1277
684
            }
1278
1.30k
            *p++ = '\\';
1279
1.30k
            s--;
1280
10.2k
        }
1281
10.2k
    }
1282
1283
2.62k
    return PyBytesWriter_FinishWithPointer(writer, p);
1284
1285
5
  failed:
1286
5
    PyBytesWriter_Discard(writer);
1287
5
    return NULL;
1288
2.62k
}
1289
1290
PyObject *PyBytes_DecodeEscape(const char *s,
1291
                                Py_ssize_t len,
1292
                                const char *errors,
1293
                                Py_ssize_t Py_UNUSED(unicode),
1294
                                const char *Py_UNUSED(recode_encoding))
1295
0
{
1296
0
    int first_invalid_escape_char;
1297
0
    const char *first_invalid_escape_ptr;
1298
0
    PyObject *result = _PyBytes_DecodeEscape2(s, len, errors,
1299
0
                                             &first_invalid_escape_char,
1300
0
                                             &first_invalid_escape_ptr);
1301
0
    if (result == NULL)
1302
0
        return NULL;
1303
0
    if (first_invalid_escape_char != -1) {
1304
0
        if (first_invalid_escape_char > 0xff) {
1305
0
            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1306
0
                                 "b\"\\%o\" is an invalid octal escape sequence. "
1307
0
                                 "Such sequences will not work in the future. ",
1308
0
                                 first_invalid_escape_char) < 0)
1309
0
            {
1310
0
                Py_DECREF(result);
1311
0
                return NULL;
1312
0
            }
1313
0
        }
1314
0
        else {
1315
0
            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1316
0
                                 "b\"\\%c\" is an invalid escape sequence. "
1317
0
                                 "Such sequences will not work in the future. ",
1318
0
                                 first_invalid_escape_char) < 0)
1319
0
            {
1320
0
                Py_DECREF(result);
1321
0
                return NULL;
1322
0
            }
1323
0
        }
1324
0
    }
1325
0
    return result;
1326
0
}
1327
/* -------------------------------------------------------------------- */
1328
/* object api */
1329
1330
Py_ssize_t
1331
PyBytes_Size(PyObject *op)
1332
5.19k
{
1333
5.19k
    if (!PyBytes_Check(op)) {
1334
0
        PyErr_Format(PyExc_TypeError,
1335
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1336
0
        return -1;
1337
0
    }
1338
5.19k
    return Py_SIZE(op);
1339
5.19k
}
1340
1341
char *
1342
PyBytes_AsString(PyObject *op)
1343
12.4M
{
1344
12.4M
    if (!PyBytes_Check(op)) {
1345
0
        PyErr_Format(PyExc_TypeError,
1346
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1347
0
        return NULL;
1348
0
    }
1349
12.4M
    return ((PyBytesObject *)op)->ob_sval;
1350
12.4M
}
1351
1352
int
1353
PyBytes_AsStringAndSize(PyObject *obj,
1354
                         char **s,
1355
                         Py_ssize_t *len)
1356
70.6k
{
1357
70.6k
    if (s == NULL) {
1358
0
        PyErr_BadInternalCall();
1359
0
        return -1;
1360
0
    }
1361
1362
70.6k
    if (!PyBytes_Check(obj)) {
1363
0
        PyErr_Format(PyExc_TypeError,
1364
0
             "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1365
0
        return -1;
1366
0
    }
1367
1368
70.6k
    *s = PyBytes_AS_STRING(obj);
1369
70.6k
    if (len != NULL)
1370
70.6k
        *len = PyBytes_GET_SIZE(obj);
1371
0
    else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1372
0
        PyErr_SetString(PyExc_ValueError,
1373
0
                        "embedded null byte");
1374
0
        return -1;
1375
0
    }
1376
70.6k
    return 0;
1377
70.6k
}
1378
1379
/* -------------------------------------------------------------------- */
1380
/* Methods */
1381
1382
1.87k
#define STRINGLIB_GET_EMPTY() bytes_get_empty()
1383
1384
#include "stringlib/stringdefs.h"
1385
#define STRINGLIB_MUTABLE 0
1386
1387
#include "stringlib/fastsearch.h"
1388
#include "stringlib/count.h"
1389
#include "stringlib/find.h"
1390
#include "stringlib/join.h"
1391
#include "stringlib/partition.h"
1392
#include "stringlib/split.h"
1393
#include "stringlib/ctype.h"
1394
1395
#include "stringlib/transmogrify.h"
1396
1397
#undef STRINGLIB_GET_EMPTY
1398
1399
Py_ssize_t
1400
_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
1401
              const char *needle, Py_ssize_t len_needle,
1402
              Py_ssize_t offset)
1403
0
{
1404
0
    assert(len_haystack >= 0);
1405
0
    assert(len_needle >= 0);
1406
    // Extra checks because stringlib_find accesses haystack[len_haystack].
1407
0
    if (len_needle == 0) {
1408
0
        return offset;
1409
0
    }
1410
0
    if (len_needle > len_haystack) {
1411
0
        return -1;
1412
0
    }
1413
0
    assert(len_haystack >= 1);
1414
0
    Py_ssize_t res = stringlib_find(haystack, len_haystack - 1,
1415
0
                                    needle, len_needle, offset);
1416
0
    if (res == -1) {
1417
0
        Py_ssize_t last_align = len_haystack - len_needle;
1418
0
        if (memcmp(haystack + last_align, needle, len_needle) == 0) {
1419
0
            return offset + last_align;
1420
0
        }
1421
0
    }
1422
0
    return res;
1423
0
}
1424
1425
Py_ssize_t
1426
_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
1427
                     const char *needle, Py_ssize_t len_needle,
1428
                     Py_ssize_t offset)
1429
0
{
1430
0
    return stringlib_rfind(haystack, len_haystack,
1431
0
                           needle, len_needle, offset);
1432
0
}
1433
1434
PyObject *
1435
PyBytes_Repr(PyObject *obj, int smartquotes)
1436
3.20k
{
1437
3.20k
    return _Py_bytes_repr(PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
1438
3.20k
                          smartquotes, "bytes");
1439
3.20k
}
1440
1441
PyObject *
1442
_Py_bytes_repr(const char *data, Py_ssize_t length, int smartquotes,
1443
               const char *classname)
1444
3.20k
{
1445
3.20k
    Py_ssize_t i;
1446
3.20k
    Py_ssize_t newsize, squotes, dquotes;
1447
3.20k
    PyObject *v;
1448
3.20k
    unsigned char quote;
1449
3.20k
    Py_UCS1 *p;
1450
1451
    /* Compute size of output string */
1452
3.20k
    squotes = dquotes = 0;
1453
3.20k
    newsize = 3; /* b'' */
1454
3.55M
    for (i = 0; i < length; i++) {
1455
3.55M
        unsigned char c = data[i];
1456
3.55M
        Py_ssize_t incr = 1;
1457
3.55M
        switch(c) {
1458
4.66k
        case '\'': squotes++; break;
1459
9.67k
        case '"':  dquotes++; break;
1460
37.0k
        case '\\': case '\t': case '\n': case '\r':
1461
37.0k
            incr = 2; break; /* \C */
1462
3.50M
        default:
1463
3.50M
            if (c < ' ' || c >= 0x7f)
1464
2.62M
                incr = 4; /* \xHH */
1465
3.55M
        }
1466
3.55M
        if (newsize > PY_SSIZE_T_MAX - incr)
1467
0
            goto overflow;
1468
3.55M
        newsize += incr;
1469
3.55M
    }
1470
3.20k
    quote = '\'';
1471
3.20k
    if (smartquotes && squotes && !dquotes)
1472
115
        quote = '"';
1473
3.20k
    if (squotes && quote == '\'') {
1474
204
        if (newsize > PY_SSIZE_T_MAX - squotes)
1475
0
            goto overflow;
1476
204
        newsize += squotes;
1477
204
    }
1478
1479
3.20k
    v = PyUnicode_New(newsize, 127);
1480
3.20k
    if (v == NULL) {
1481
0
        return NULL;
1482
0
    }
1483
3.20k
    p = PyUnicode_1BYTE_DATA(v);
1484
1485
3.20k
    *p++ = 'b', *p++ = quote;
1486
3.55M
    for (i = 0; i < length; i++) {
1487
3.55M
        unsigned char c = data[i];
1488
3.55M
        if (c == quote || c == '\\')
1489
5.23k
            *p++ = '\\', *p++ = c;
1490
3.54M
        else if (c == '\t')
1491
20.4k
            *p++ = '\\', *p++ = 't';
1492
3.52M
        else if (c == '\n')
1493
6.62k
            *p++ = '\\', *p++ = 'n';
1494
3.52M
        else if (c == '\r')
1495
7.48k
            *p++ = '\\', *p++ = 'r';
1496
3.51M
        else if (c < ' ' || c >= 0x7f) {
1497
2.62M
            *p++ = '\\';
1498
2.62M
            *p++ = 'x';
1499
2.62M
            *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1500
2.62M
            *p++ = Py_hexdigits[c & 0xf];
1501
2.62M
        }
1502
884k
        else
1503
884k
            *p++ = c;
1504
3.55M
    }
1505
3.20k
    *p++ = quote;
1506
3.20k
    assert(_PyUnicode_CheckConsistency(v, 1));
1507
3.20k
    return v;
1508
1509
0
  overflow:
1510
0
    PyErr_Format(PyExc_OverflowError,
1511
0
                 "%s object is too large to make repr", classname);
1512
0
    return NULL;
1513
3.20k
}
1514
1515
static PyObject *
1516
bytes_repr(PyObject *op)
1517
3.20k
{
1518
3.20k
    return PyBytes_Repr(op, 1);
1519
3.20k
}
1520
1521
static PyObject *
1522
bytes_str(PyObject *op)
1523
0
{
1524
0
    if (_Py_GetConfig()->bytes_warning) {
1525
0
        if (PyErr_WarnEx(PyExc_BytesWarning,
1526
0
                         "str() on a bytes instance", 1)) {
1527
0
            return NULL;
1528
0
        }
1529
0
    }
1530
0
    return bytes_repr(op);
1531
0
}
1532
1533
static Py_ssize_t
1534
bytes_length(PyObject *self)
1535
42.6M
{
1536
42.6M
    PyBytesObject *a = _PyBytes_CAST(self);
1537
42.6M
    return Py_SIZE(a);
1538
42.6M
}
1539
1540
/* This is also used by PyBytes_Concat() and the specializing interpreter. */
1541
PyObject *
1542
_PyBytes_Concat(PyObject *a, PyObject *b)
1543
920k
{
1544
920k
    Py_buffer va, vb;
1545
920k
    PyObject *result = NULL;
1546
1547
920k
    va.len = -1;
1548
920k
    vb.len = -1;
1549
920k
    if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1550
920k
        PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1551
0
        PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1552
0
                     Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1553
0
        goto done;
1554
0
    }
1555
1556
    /* Optimize end cases */
1557
920k
    if (va.len == 0 && PyBytes_CheckExact(b)) {
1558
129k
        result = Py_NewRef(b);
1559
129k
        goto done;
1560
129k
    }
1561
791k
    if (vb.len == 0 && PyBytes_CheckExact(a)) {
1562
68.5k
        result = Py_NewRef(a);
1563
68.5k
        goto done;
1564
68.5k
    }
1565
1566
723k
    if (va.len > PY_SSIZE_T_MAX - vb.len) {
1567
0
        PyErr_NoMemory();
1568
0
        goto done;
1569
0
    }
1570
1571
723k
    result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1572
723k
    if (result != NULL) {
1573
723k
        memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1574
723k
        memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1575
723k
    }
1576
1577
920k
  done:
1578
920k
    if (va.len != -1)
1579
920k
        PyBuffer_Release(&va);
1580
920k
    if (vb.len != -1)
1581
920k
        PyBuffer_Release(&vb);
1582
920k
    return result;
1583
723k
}
1584
1585
PyObject *
1586
_PyBytes_Repeat(PyObject *self, Py_ssize_t n)
1587
186k
{
1588
186k
    PyBytesObject *a = _PyBytes_CAST(self);
1589
186k
    if (n < 0)
1590
0
        n = 0;
1591
    /* watch out for overflows:  the size can overflow int,
1592
     * and the # of bytes needed can overflow size_t
1593
     */
1594
186k
    if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1595
0
        PyErr_SetString(PyExc_OverflowError,
1596
0
            "repeated bytes are too long");
1597
0
        return NULL;
1598
0
    }
1599
186k
    Py_ssize_t size = Py_SIZE(a) * n;
1600
186k
    if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1601
7
        return Py_NewRef(a);
1602
7
    }
1603
186k
    size_t nbytes = (size_t)size;
1604
186k
    if (nbytes + PyBytesObject_SIZE <= nbytes) {
1605
0
        PyErr_SetString(PyExc_OverflowError,
1606
0
            "repeated bytes are too long");
1607
0
        return NULL;
1608
0
    }
1609
186k
    PyBytesObject *op = PyObject_Malloc(PyBytesObject_SIZE + nbytes);
1610
186k
    if (op == NULL) {
1611
0
        return PyErr_NoMemory();
1612
0
    }
1613
186k
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
1614
186k
    set_ob_shash(op, -1);
1615
186k
    op->ob_sval[size] = '\0';
1616
1617
186k
    _PyBytes_RepeatBuffer(op->ob_sval, size, a->ob_sval, Py_SIZE(a));
1618
1619
186k
    return (PyObject *) op;
1620
186k
}
1621
1622
static int
1623
bytes_contains(PyObject *self, PyObject *arg)
1624
2.93k
{
1625
2.93k
    return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1626
2.93k
}
1627
1628
static PyObject *
1629
bytes_item(PyObject *self, Py_ssize_t i)
1630
0
{
1631
0
    PyBytesObject *a = _PyBytes_CAST(self);
1632
0
    if (i < 0 || i >= Py_SIZE(a)) {
1633
0
        PyErr_SetString(PyExc_IndexError, "index out of range");
1634
0
        return NULL;
1635
0
    }
1636
0
    return _PyLong_FromUnsignedChar((unsigned char)a->ob_sval[i]);
1637
0
}
1638
1639
static int
1640
bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1641
78.5M
{
1642
78.5M
    int cmp;
1643
78.5M
    Py_ssize_t len;
1644
1645
78.5M
    len = Py_SIZE(a);
1646
78.5M
    if (Py_SIZE(b) != len)
1647
795k
        return 0;
1648
1649
77.7M
    if (a->ob_sval[0] != b->ob_sval[0])
1650
10.3M
        return 0;
1651
1652
67.3M
    cmp = memcmp(a->ob_sval, b->ob_sval, len);
1653
67.3M
    return (cmp == 0);
1654
77.7M
}
1655
1656
static PyObject*
1657
bytes_richcompare(PyObject *aa, PyObject *bb, int op)
1658
78.9M
{
1659
    /* Make sure both arguments are strings. */
1660
78.9M
    if (!(PyBytes_Check(aa) && PyBytes_Check(bb))) {
1661
0
        if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1662
0
            if (PyUnicode_Check(aa) || PyUnicode_Check(bb)) {
1663
0
                if (PyErr_WarnEx(PyExc_BytesWarning,
1664
0
                                 "Comparison between bytes and string", 1))
1665
0
                    return NULL;
1666
0
            }
1667
0
            if (PyLong_Check(aa) || PyLong_Check(bb)) {
1668
0
                if (PyErr_WarnEx(PyExc_BytesWarning,
1669
0
                                 "Comparison between bytes and int", 1))
1670
0
                    return NULL;
1671
0
            }
1672
0
        }
1673
0
        Py_RETURN_NOTIMPLEMENTED;
1674
0
    }
1675
1676
78.9M
    PyBytesObject *a = _PyBytes_CAST(aa);
1677
78.9M
    PyBytesObject *b = _PyBytes_CAST(bb);
1678
78.9M
    if (a == b) {
1679
400k
        switch (op) {
1680
3.28k
        case Py_EQ:
1681
3.28k
        case Py_LE:
1682
3.28k
        case Py_GE:
1683
            /* a byte string is equal to itself */
1684
3.28k
            Py_RETURN_TRUE;
1685
396k
        case Py_NE:
1686
396k
        case Py_LT:
1687
396k
        case Py_GT:
1688
396k
            Py_RETURN_FALSE;
1689
0
        default:
1690
0
            PyErr_BadArgument();
1691
0
            return NULL;
1692
400k
        }
1693
400k
    }
1694
78.5M
    else if (op == Py_EQ || op == Py_NE) {
1695
78.5M
        int eq = bytes_compare_eq(a, b);
1696
78.5M
        eq ^= (op == Py_NE);
1697
78.5M
        return PyBool_FromLong(eq);
1698
78.5M
    }
1699
166
    else {
1700
166
        Py_ssize_t len_a = Py_SIZE(a);
1701
166
        Py_ssize_t len_b = Py_SIZE(b);
1702
166
        Py_ssize_t min_len = Py_MIN(len_a, len_b);
1703
166
        int c;
1704
166
        if (min_len > 0) {
1705
166
            c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1706
166
            if (c == 0)
1707
166
                c = memcmp(a->ob_sval, b->ob_sval, min_len);
1708
166
        }
1709
0
        else {
1710
0
            c = 0;
1711
0
        }
1712
166
        if (c != 0) {
1713
166
            Py_RETURN_RICHCOMPARE(c, 0, op);
1714
166
        }
1715
0
        Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1716
0
    }
1717
78.9M
}
1718
1719
static Py_hash_t
1720
bytes_hash(PyObject *self)
1721
75.0M
{
1722
75.0M
    PyBytesObject *a = _PyBytes_CAST(self);
1723
75.0M
    Py_hash_t hash = get_ob_shash(a);
1724
75.0M
    if (hash == -1) {
1725
        /* Can't fail */
1726
42.9M
        hash = Py_HashBuffer(a->ob_sval, Py_SIZE(a));
1727
42.9M
        set_ob_shash(a, hash);
1728
42.9M
    }
1729
75.0M
    return hash;
1730
75.0M
}
1731
1732
static PyObject*
1733
bytes_subscript(PyObject *op, PyObject* item)
1734
89.5M
{
1735
89.5M
    PyBytesObject *self = _PyBytes_CAST(op);
1736
89.5M
    if (_PyIndex_Check(item)) {
1737
19.8M
        Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1738
19.8M
        if (i == -1 && PyErr_Occurred())
1739
0
            return NULL;
1740
19.8M
        if (i < 0)
1741
0
            i += PyBytes_GET_SIZE(self);
1742
19.8M
        if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1743
101
            PyErr_SetString(PyExc_IndexError,
1744
101
                            "index out of range");
1745
101
            return NULL;
1746
101
        }
1747
19.8M
        return _PyLong_FromUnsignedChar((unsigned char)self->ob_sval[i]);
1748
19.8M
    }
1749
69.6M
    else if (PySlice_Check(item)) {
1750
69.6M
        Py_ssize_t start, stop, step, slicelength, i;
1751
69.6M
        size_t cur;
1752
69.6M
        const char* source_buf;
1753
69.6M
        char* result_buf;
1754
69.6M
        PyObject* result;
1755
1756
69.6M
        if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1757
0
            return NULL;
1758
0
        }
1759
69.6M
        slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1760
69.6M
                                            &stop, step);
1761
1762
69.6M
        if (slicelength <= 0) {
1763
6.03M
            return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
1764
6.03M
        }
1765
63.6M
        else if (start == 0 && step == 1 &&
1766
9.05M
                 slicelength == PyBytes_GET_SIZE(self) &&
1767
158k
                 PyBytes_CheckExact(self)) {
1768
158k
            return Py_NewRef(self);
1769
158k
        }
1770
63.4M
        else if (step == 1) {
1771
63.4M
            return PyBytes_FromStringAndSize(
1772
63.4M
                PyBytes_AS_STRING(self) + start,
1773
63.4M
                slicelength);
1774
63.4M
        }
1775
0
        else {
1776
0
            source_buf = PyBytes_AS_STRING(self);
1777
0
            result = PyBytes_FromStringAndSize(NULL, slicelength);
1778
0
            if (result == NULL)
1779
0
                return NULL;
1780
1781
0
            result_buf = PyBytes_AS_STRING(result);
1782
0
            for (cur = start, i = 0; i < slicelength;
1783
0
                 cur += step, i++) {
1784
0
                result_buf[i] = source_buf[cur];
1785
0
            }
1786
1787
0
            return result;
1788
0
        }
1789
69.6M
    }
1790
0
    else {
1791
0
        PyErr_Format(PyExc_TypeError,
1792
0
                     "byte indices must be integers or slices, not %.200s",
1793
0
                     Py_TYPE(item)->tp_name);
1794
0
        return NULL;
1795
0
    }
1796
89.5M
}
1797
1798
static int
1799
bytes_buffer_getbuffer(PyObject *op, Py_buffer *view, int flags)
1800
83.2M
{
1801
83.2M
    PyBytesObject *self = _PyBytes_CAST(op);
1802
83.2M
    return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1803
83.2M
                             1, flags);
1804
83.2M
}
1805
1806
static PySequenceMethods bytes_as_sequence = {
1807
    bytes_length,       /*sq_length*/
1808
    _PyBytes_Concat,       /*sq_concat*/
1809
    _PyBytes_Repeat,    /*sq_repeat*/
1810
    bytes_item,         /*sq_item*/
1811
    0,                  /*sq_slice*/
1812
    0,                  /*sq_ass_item*/
1813
    0,                  /*sq_ass_slice*/
1814
    bytes_contains      /*sq_contains*/
1815
};
1816
1817
static PyMappingMethods bytes_as_mapping = {
1818
    bytes_length,
1819
    bytes_subscript,
1820
    0,
1821
};
1822
1823
static PyBufferProcs bytes_as_buffer = {
1824
    bytes_buffer_getbuffer,
1825
    NULL,
1826
};
1827
1828
1829
/*[clinic input]
1830
bytes.__bytes__
1831
Convert this value to exact type bytes.
1832
[clinic start generated code]*/
1833
1834
static PyObject *
1835
bytes___bytes___impl(PyBytesObject *self)
1836
/*[clinic end generated code: output=63a306a9bc0caac5 input=34ec5ddba98bd6bb]*/
1837
53.1k
{
1838
53.1k
    if (PyBytes_CheckExact(self)) {
1839
53.1k
        return Py_NewRef(self);
1840
53.1k
    }
1841
0
    else {
1842
0
        return PyBytes_FromStringAndSize(self->ob_sval, Py_SIZE(self));
1843
0
    }
1844
53.1k
}
1845
1846
1847
294
#define LEFTSTRIP 0
1848
588
#define RIGHTSTRIP 1
1849
0
#define BOTHSTRIP 2
1850
1851
/*[clinic input]
1852
@permit_long_summary
1853
bytes.split
1854
1855
    sep: object = None
1856
        The delimiter according which to split the bytes.
1857
        None (the default value) means split on ASCII whitespace
1858
        characters (space, tab, return, newline, formfeed, vertical tab).
1859
    maxsplit: Py_ssize_t = -1
1860
        Maximum number of splits to do.
1861
        -1 (the default value) means no limit.
1862
1863
Return a list of the sections in the bytes, using sep as the delimiter.
1864
[clinic start generated code]*/
1865
1866
static PyObject *
1867
bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1868
/*[clinic end generated code: output=52126b5844c1d8ef input=330ff95d92544b05]*/
1869
3.01M
{
1870
3.01M
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1871
3.01M
    const char *s = PyBytes_AS_STRING(self), *sub;
1872
3.01M
    Py_buffer vsub;
1873
3.01M
    PyObject *list;
1874
1875
3.01M
    if (maxsplit < 0)
1876
3.01M
        maxsplit = PY_SSIZE_T_MAX;
1877
3.01M
    if (sep == Py_None)
1878
0
        return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1879
3.01M
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1880
0
        return NULL;
1881
3.01M
    sub = vsub.buf;
1882
3.01M
    n = vsub.len;
1883
1884
3.01M
    list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1885
3.01M
    PyBuffer_Release(&vsub);
1886
3.01M
    return list;
1887
3.01M
}
1888
1889
/*[clinic input]
1890
bytes.partition
1891
1892
    sep: Py_buffer
1893
    /
1894
1895
Partition the bytes into three parts using the given separator.
1896
1897
This will search for the separator sep in the bytes.  If the
1898
separator is found, returns a 3-tuple containing the part before the
1899
separator, the separator itself, and the part after it.
1900
1901
If the separator is not found, returns a 3-tuple containing the
1902
original bytes object and two empty bytes objects.
1903
[clinic start generated code]*/
1904
1905
static PyObject *
1906
bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1907
/*[clinic end generated code: output=f532b392a17ff695 input=2e6e551ea4f8b95a]*/
1908
380k
{
1909
380k
    return stringlib_partition(
1910
380k
        (PyObject*) self,
1911
380k
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1912
380k
        sep->obj, (const char *)sep->buf, sep->len
1913
380k
        );
1914
380k
}
1915
1916
/*[clinic input]
1917
bytes.rpartition
1918
1919
    sep: Py_buffer
1920
    /
1921
1922
Partition the bytes into three parts using the given separator.
1923
1924
This will search for the separator sep in the bytes, starting at the
1925
end.  If the separator is found, returns a 3-tuple containing the
1926
part before the separator, the separator itself, and the part after
1927
it.
1928
1929
If the separator is not found, returns a 3-tuple containing two
1930
empty bytes objects and the original bytes object.
1931
[clinic start generated code]*/
1932
1933
static PyObject *
1934
bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1935
/*[clinic end generated code: output=191b114cbb028e50 input=f7d24f722a5470a4]*/
1936
0
{
1937
0
    return stringlib_rpartition(
1938
0
        (PyObject*) self,
1939
0
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1940
0
        sep->obj, (const char *)sep->buf, sep->len
1941
0
        );
1942
0
}
1943
1944
/*[clinic input]
1945
@permit_long_summary
1946
bytes.rsplit = bytes.split
1947
1948
Return a list of the sections in the bytes, using sep as the delimiter.
1949
1950
Splitting is done starting at the end of the bytes and working to
1951
the front.
1952
[clinic start generated code]*/
1953
1954
static PyObject *
1955
bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1956
/*[clinic end generated code: output=ba698d9ea01e1c8f input=ba9bee56285f43e4]*/
1957
0
{
1958
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1959
0
    const char *s = PyBytes_AS_STRING(self), *sub;
1960
0
    Py_buffer vsub;
1961
0
    PyObject *list;
1962
1963
0
    if (maxsplit < 0)
1964
0
        maxsplit = PY_SSIZE_T_MAX;
1965
0
    if (sep == Py_None)
1966
0
        return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1967
0
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1968
0
        return NULL;
1969
0
    sub = vsub.buf;
1970
0
    n = vsub.len;
1971
1972
0
    list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1973
0
    PyBuffer_Release(&vsub);
1974
0
    return list;
1975
0
}
1976
1977
1978
/*[clinic input]
1979
bytes.join
1980
1981
    iterable_of_bytes: object
1982
    /
1983
1984
Concatenate any number of bytes objects.
1985
1986
The bytes whose method is called is inserted in between each pair.
1987
1988
The result is returned as a new bytes object.
1989
1990
Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1991
[clinic start generated code]*/
1992
1993
static PyObject *
1994
bytes_join_impl(PyBytesObject *self, PyObject *iterable_of_bytes)
1995
/*[clinic end generated code: output=0687abb94d7d438e input=7fe377b95bd549d2]*/
1996
260k
{
1997
260k
    return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1998
260k
}
1999
2000
PyObject *
2001
PyBytes_Join(PyObject *sep, PyObject *iterable)
2002
35.3k
{
2003
35.3k
    if (sep == NULL) {
2004
0
        PyErr_BadInternalCall();
2005
0
        return NULL;
2006
0
    }
2007
35.3k
    if (!PyBytes_Check(sep)) {
2008
0
        PyErr_Format(PyExc_TypeError,
2009
0
                     "sep: expected bytes, got %T", sep);
2010
0
        return NULL;
2011
0
    }
2012
2013
35.3k
    return stringlib_bytes_join(sep, iterable);
2014
35.3k
}
2015
2016
/*[clinic input]
2017
@permit_long_summary
2018
@text_signature "($self, sub[, start[, end]], /)"
2019
bytes.find
2020
2021
    sub: object
2022
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2023
         Optional start position. Default: start of the bytes.
2024
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2025
         Optional stop position. Default: end of the bytes.
2026
    /
2027
2028
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2029
2030
Return -1 on failure.
2031
[clinic start generated code]*/
2032
2033
static PyObject *
2034
bytes_find_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2035
                Py_ssize_t end)
2036
/*[clinic end generated code: output=d5961a1c77b472a1 input=47d0929adafc6b0b]*/
2037
13.4M
{
2038
13.4M
    return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2039
13.4M
                          sub, start, end);
2040
13.4M
}
2041
2042
/*[clinic input]
2043
@permit_long_summary
2044
bytes.index = bytes.find
2045
2046
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2047
2048
Raise ValueError if the subsection is not found.
2049
[clinic start generated code]*/
2050
2051
static PyObject *
2052
bytes_index_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2053
                 Py_ssize_t end)
2054
/*[clinic end generated code: output=0da25cc74683ba42 input=1cb45ce71456a269]*/
2055
0
{
2056
0
    return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2057
0
                           sub, start, end);
2058
0
}
2059
2060
/*[clinic input]
2061
@permit_long_summary
2062
bytes.rfind = bytes.find
2063
2064
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2065
2066
Return -1 on failure.
2067
[clinic start generated code]*/
2068
2069
static PyObject *
2070
bytes_rfind_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2071
                 Py_ssize_t end)
2072
/*[clinic end generated code: output=51b60fa4ad011c09 input=c9473d714251f1ab]*/
2073
278k
{
2074
278k
    return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2075
278k
                           sub, start, end);
2076
278k
}
2077
2078
/*[clinic input]
2079
@permit_long_summary
2080
bytes.rindex = bytes.find
2081
2082
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2083
2084
Raise ValueError if the subsection is not found.
2085
[clinic start generated code]*/
2086
2087
static PyObject *
2088
bytes_rindex_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2089
                  Py_ssize_t end)
2090
/*[clinic end generated code: output=42bf674e0a0aabf6 input=bb5f473c64610c43]*/
2091
0
{
2092
0
    return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2093
0
                            sub, start, end);
2094
0
}
2095
2096
2097
Py_LOCAL_INLINE(PyObject *)
2098
do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
2099
294
{
2100
294
    Py_buffer vsep;
2101
294
    const char *s = PyBytes_AS_STRING(self);
2102
294
    Py_ssize_t len = PyBytes_GET_SIZE(self);
2103
294
    char *sep;
2104
294
    Py_ssize_t seplen;
2105
294
    Py_ssize_t i, j;
2106
2107
294
    if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
2108
0
        return NULL;
2109
294
    sep = vsep.buf;
2110
294
    seplen = vsep.len;
2111
2112
294
    i = 0;
2113
294
    if (striptype != RIGHTSTRIP) {
2114
0
        while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2115
0
            i++;
2116
0
        }
2117
0
    }
2118
2119
294
    j = len;
2120
294
    if (striptype != LEFTSTRIP) {
2121
588
        do {
2122
588
            j--;
2123
588
        } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2124
294
        j++;
2125
294
    }
2126
2127
294
    PyBuffer_Release(&vsep);
2128
2129
294
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2130
0
        return Py_NewRef(self);
2131
0
    }
2132
294
    else
2133
294
        return PyBytes_FromStringAndSize(s+i, j-i);
2134
294
}
2135
2136
2137
Py_LOCAL_INLINE(PyObject *)
2138
do_strip(PyBytesObject *self, int striptype)
2139
0
{
2140
0
    const char *s = PyBytes_AS_STRING(self);
2141
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
2142
2143
0
    i = 0;
2144
0
    if (striptype != RIGHTSTRIP) {
2145
0
        while (i < len && Py_ISSPACE(s[i])) {
2146
0
            i++;
2147
0
        }
2148
0
    }
2149
2150
0
    j = len;
2151
0
    if (striptype != LEFTSTRIP) {
2152
0
        do {
2153
0
            j--;
2154
0
        } while (j >= i && Py_ISSPACE(s[j]));
2155
0
        j++;
2156
0
    }
2157
2158
0
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2159
0
        return Py_NewRef(self);
2160
0
    }
2161
0
    else
2162
0
        return PyBytes_FromStringAndSize(s+i, j-i);
2163
0
}
2164
2165
2166
Py_LOCAL_INLINE(PyObject *)
2167
do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
2168
294
{
2169
294
    if (bytes != Py_None) {
2170
294
        return do_xstrip(self, striptype, bytes);
2171
294
    }
2172
0
    return do_strip(self, striptype);
2173
294
}
2174
2175
/*[clinic input]
2176
bytes.strip
2177
2178
    bytes: object = None
2179
    /
2180
2181
Strip leading and trailing bytes contained in the argument.
2182
2183
If the argument is omitted or None, strip leading and trailing ASCII
2184
whitespace.
2185
[clinic start generated code]*/
2186
2187
static PyObject *
2188
bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
2189
/*[clinic end generated code: output=c7c228d3bd104a1b input=9ffea5f752032bd0]*/
2190
0
{
2191
0
    return do_argstrip(self, BOTHSTRIP, bytes);
2192
0
}
2193
2194
/*[clinic input]
2195
bytes.lstrip
2196
2197
    bytes: object = None
2198
    /
2199
2200
Strip leading bytes contained in the argument.
2201
2202
If the argument is omitted or None, strip leading  ASCII whitespace.
2203
[clinic start generated code]*/
2204
2205
static PyObject *
2206
bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2207
/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2208
0
{
2209
0
    return do_argstrip(self, LEFTSTRIP, bytes);
2210
0
}
2211
2212
/*[clinic input]
2213
bytes.rstrip
2214
2215
    bytes: object = None
2216
    /
2217
2218
Strip trailing bytes contained in the argument.
2219
2220
If the argument is omitted or None, strip trailing ASCII whitespace.
2221
[clinic start generated code]*/
2222
2223
static PyObject *
2224
bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2225
/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2226
294
{
2227
294
    return do_argstrip(self, RIGHTSTRIP, bytes);
2228
294
}
2229
2230
2231
/*[clinic input]
2232
@permit_long_summary
2233
bytes.count = bytes.find
2234
2235
Return the number of non-overlapping occurrences of subsection 'sub' in bytes B[start:end].
2236
[clinic start generated code]*/
2237
2238
static PyObject *
2239
bytes_count_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2240
                 Py_ssize_t end)
2241
/*[clinic end generated code: output=9848140b9be17d0f input=bb2f136f83f0d30e]*/
2242
5.70M
{
2243
5.70M
    return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2244
5.70M
                           sub, start, end);
2245
5.70M
}
2246
2247
2248
/*[clinic input]
2249
@permit_long_summary
2250
bytes.translate
2251
2252
    table: object
2253
        Translation table, which must be a bytes object of length 256.
2254
    /
2255
    delete as deletechars: object(c_default="NULL") = b''
2256
2257
Return a copy with each character mapped by the given translation table.
2258
2259
All characters occurring in the optional argument delete are
2260
removed.  The remaining characters are mapped through the given
2261
translation table.
2262
[clinic start generated code]*/
2263
2264
static PyObject *
2265
bytes_translate_impl(PyBytesObject *self, PyObject *table,
2266
                     PyObject *deletechars)
2267
/*[clinic end generated code: output=43be3437f1956211 input=bddcdef0a87895d2]*/
2268
0
{
2269
0
    const char *input;
2270
0
    char *output;
2271
0
    Py_buffer table_view = {NULL, NULL};
2272
0
    Py_buffer del_table_view = {NULL, NULL};
2273
0
    const char *table_chars;
2274
0
    Py_ssize_t i, c, changed = 0;
2275
0
    PyObject *input_obj = (PyObject*)self;
2276
0
    const char *output_start, *del_table_chars=NULL;
2277
0
    Py_ssize_t inlen, tablen, dellen = 0;
2278
0
    PyObject *result;
2279
0
    int trans_table[256];
2280
2281
0
    if (PyBytes_Check(table)) {
2282
0
        table_chars = PyBytes_AS_STRING(table);
2283
0
        tablen = PyBytes_GET_SIZE(table);
2284
0
    }
2285
0
    else if (table == Py_None) {
2286
0
        table_chars = NULL;
2287
0
        tablen = 256;
2288
0
    }
2289
0
    else {
2290
0
        if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2291
0
            return NULL;
2292
0
        table_chars = table_view.buf;
2293
0
        tablen = table_view.len;
2294
0
    }
2295
2296
0
    if (tablen != 256) {
2297
0
        PyErr_SetString(PyExc_ValueError,
2298
0
          "translation table must be 256 characters long");
2299
0
        PyBuffer_Release(&table_view);
2300
0
        return NULL;
2301
0
    }
2302
2303
0
    if (deletechars != NULL) {
2304
0
        if (PyBytes_Check(deletechars)) {
2305
0
            del_table_chars = PyBytes_AS_STRING(deletechars);
2306
0
            dellen = PyBytes_GET_SIZE(deletechars);
2307
0
        }
2308
0
        else {
2309
0
            if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2310
0
                PyBuffer_Release(&table_view);
2311
0
                return NULL;
2312
0
            }
2313
0
            del_table_chars = del_table_view.buf;
2314
0
            dellen = del_table_view.len;
2315
0
        }
2316
0
    }
2317
0
    else {
2318
0
        del_table_chars = NULL;
2319
0
        dellen = 0;
2320
0
    }
2321
2322
0
    inlen = PyBytes_GET_SIZE(input_obj);
2323
0
    result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2324
0
    if (result == NULL) {
2325
0
        PyBuffer_Release(&del_table_view);
2326
0
        PyBuffer_Release(&table_view);
2327
0
        return NULL;
2328
0
    }
2329
0
    output_start = output = PyBytes_AS_STRING(result);
2330
0
    input = PyBytes_AS_STRING(input_obj);
2331
2332
0
    if (dellen == 0 && table_chars != NULL) {
2333
        /* If no deletions are required, use faster code */
2334
0
        for (i = inlen; --i >= 0; ) {
2335
0
            c = Py_CHARMASK(*input++);
2336
0
            *output++ = table_chars[c];
2337
0
        }
2338
        /* Check if anything changed (for returning original object) */
2339
        /* We save this check until the end so that the compiler will */
2340
        /* unroll the loop above leading to MUCH faster code. */
2341
0
        if (PyBytes_CheckExact(input_obj)) {
2342
0
            if (memcmp(PyBytes_AS_STRING(input_obj), output_start, inlen) == 0) {
2343
0
                Py_SETREF(result, Py_NewRef(input_obj));
2344
0
            }
2345
0
        }
2346
0
        PyBuffer_Release(&del_table_view);
2347
0
        PyBuffer_Release(&table_view);
2348
0
        return result;
2349
0
    }
2350
2351
0
    if (table_chars == NULL) {
2352
0
        for (i = 0; i < 256; i++)
2353
0
            trans_table[i] = Py_CHARMASK(i);
2354
0
    } else {
2355
0
        for (i = 0; i < 256; i++)
2356
0
            trans_table[i] = Py_CHARMASK(table_chars[i]);
2357
0
    }
2358
0
    PyBuffer_Release(&table_view);
2359
2360
0
    for (i = 0; i < dellen; i++)
2361
0
        trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2362
0
    PyBuffer_Release(&del_table_view);
2363
2364
0
    for (i = inlen; --i >= 0; ) {
2365
0
        c = Py_CHARMASK(*input++);
2366
0
        if (trans_table[c] != -1)
2367
0
            if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2368
0
                continue;
2369
0
        changed = 1;
2370
0
    }
2371
0
    if (!changed && PyBytes_CheckExact(input_obj)) {
2372
0
        Py_DECREF(result);
2373
0
        return Py_NewRef(input_obj);
2374
0
    }
2375
    /* Fix the size of the resulting byte string */
2376
0
    if (inlen > 0)
2377
0
        _PyBytes_Resize(&result, output - output_start);
2378
0
    return result;
2379
0
}
2380
2381
2382
/*[clinic input]
2383
2384
@permit_long_summary
2385
@staticmethod
2386
bytes.maketrans
2387
2388
    frm: Py_buffer
2389
    to: Py_buffer
2390
    /
2391
2392
Return a translation table usable for the bytes or bytearray translate method.
2393
2394
The returned table will be one where each byte in frm is mapped to
2395
the byte at the same position in to.
2396
2397
The bytes objects frm and to must be of the same length.
2398
[clinic start generated code]*/
2399
2400
static PyObject *
2401
bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2402
/*[clinic end generated code: output=a36f6399d4b77f6f input=3a577e5badfea8f7]*/
2403
9
{
2404
9
    return _Py_bytes_maketrans(frm, to);
2405
9
}
2406
2407
2408
/*[clinic input]
2409
bytes.replace
2410
2411
    old: Py_buffer
2412
    new: Py_buffer
2413
    /
2414
    count: Py_ssize_t = -1
2415
        Maximum number of occurrences to replace.
2416
        -1 (the default value) means replace all occurrences.
2417
2418
Return a copy with all occurrences of substring old replaced by new.
2419
2420
If count is given, only the first count occurrences are replaced.
2421
If count is not specified or -1, then all occurrences are replaced.
2422
[clinic start generated code]*/
2423
2424
static PyObject *
2425
bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2426
                   Py_ssize_t count)
2427
/*[clinic end generated code: output=994fa588b6b9c104 input=cdf3cf8639297745]*/
2428
35.7k
{
2429
35.7k
    return stringlib_replace((PyObject *)self,
2430
35.7k
                             (const char *)old->buf, old->len,
2431
35.7k
                             (const char *)new->buf, new->len, count);
2432
35.7k
}
2433
2434
/** End DALKE **/
2435
2436
/*[clinic input]
2437
@permit_long_summary
2438
bytes.removeprefix as bytes_removeprefix
2439
2440
    prefix: Py_buffer
2441
    /
2442
2443
Return a bytes object with the given prefix string removed if present.
2444
2445
If the bytes starts with the prefix string, return
2446
bytes[len(prefix):].  Otherwise, return a copy of the original
2447
bytes.
2448
[clinic start generated code]*/
2449
2450
static PyObject *
2451
bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2452
/*[clinic end generated code: output=f006865331a06ab6 input=3a2672bcee61d7a7]*/
2453
0
{
2454
0
    const char *self_start = PyBytes_AS_STRING(self);
2455
0
    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2456
0
    const char *prefix_start = prefix->buf;
2457
0
    Py_ssize_t prefix_len = prefix->len;
2458
2459
0
    if (self_len >= prefix_len
2460
0
        && prefix_len > 0
2461
0
        && memcmp(self_start, prefix_start, prefix_len) == 0)
2462
0
    {
2463
0
        return PyBytes_FromStringAndSize(self_start + prefix_len,
2464
0
                                         self_len - prefix_len);
2465
0
    }
2466
2467
0
    if (PyBytes_CheckExact(self)) {
2468
0
        return Py_NewRef(self);
2469
0
    }
2470
2471
0
    return PyBytes_FromStringAndSize(self_start, self_len);
2472
0
}
2473
2474
/*[clinic input]
2475
@permit_long_summary
2476
bytes.removesuffix as bytes_removesuffix
2477
2478
    suffix: Py_buffer
2479
    /
2480
2481
Return a bytes object with the given suffix string removed if present.
2482
2483
If the bytes ends with the suffix string and that suffix is not
2484
empty, return bytes[:-len(prefix)].  Otherwise, return a copy of the
2485
original bytes.
2486
[clinic start generated code]*/
2487
2488
static PyObject *
2489
bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2490
/*[clinic end generated code: output=d887d308e3242eeb input=04df5f18a36f69d7]*/
2491
0
{
2492
0
    const char *self_start = PyBytes_AS_STRING(self);
2493
0
    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2494
0
    const char *suffix_start = suffix->buf;
2495
0
    Py_ssize_t suffix_len = suffix->len;
2496
2497
0
    if (self_len >= suffix_len
2498
0
        && suffix_len > 0
2499
0
        && memcmp(self_start + self_len - suffix_len,
2500
0
                  suffix_start, suffix_len) == 0)
2501
0
    {
2502
0
        return PyBytes_FromStringAndSize(self_start,
2503
0
                                         self_len - suffix_len);
2504
0
    }
2505
2506
0
    if (PyBytes_CheckExact(self)) {
2507
0
        return Py_NewRef(self);
2508
0
    }
2509
2510
0
    return PyBytes_FromStringAndSize(self_start, self_len);
2511
0
}
2512
2513
/*[clinic input]
2514
@permit_long_summary
2515
@text_signature "($self, prefix[, start[, end]], /)"
2516
bytes.startswith
2517
2518
    prefix as subobj: object
2519
        A bytes or a tuple of bytes to try.
2520
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2521
        Optional start position. Default: start of the bytes.
2522
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2523
        Optional stop position. Default: end of the bytes.
2524
    /
2525
2526
Return True if the bytes starts with the specified prefix, False otherwise.
2527
[clinic start generated code]*/
2528
2529
static PyObject *
2530
bytes_startswith_impl(PyBytesObject *self, PyObject *subobj,
2531
                      Py_ssize_t start, Py_ssize_t end)
2532
/*[clinic end generated code: output=b1e8da1cbd528e8c input=a14efd070f15be80]*/
2533
1.93M
{
2534
1.93M
    return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2535
1.93M
                                subobj, start, end);
2536
1.93M
}
2537
2538
/*[clinic input]
2539
@permit_long_summary
2540
@text_signature "($self, suffix[, start[, end]], /)"
2541
bytes.endswith
2542
2543
    suffix as subobj: object
2544
        A bytes or a tuple of bytes to try.
2545
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2546
         Optional start position. Default: start of the bytes.
2547
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2548
         Optional stop position. Default: end of the bytes.
2549
    /
2550
2551
Return True if the bytes ends with the specified suffix, False otherwise.
2552
[clinic start generated code]*/
2553
2554
static PyObject *
2555
bytes_endswith_impl(PyBytesObject *self, PyObject *subobj, Py_ssize_t start,
2556
                    Py_ssize_t end)
2557
/*[clinic end generated code: output=038b633111f3629d input=49e383eaaf292713]*/
2558
315
{
2559
315
    return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2560
315
                              subobj, start, end);
2561
315
}
2562
2563
2564
/*[clinic input]
2565
bytes.decode
2566
2567
    encoding: str(c_default="NULL") = 'utf-8'
2568
        The encoding with which to decode the bytes.
2569
    errors: str(c_default="NULL") = 'strict'
2570
        The error handling scheme to use for the handling of decoding
2571
        errors.  The default is 'strict' meaning that decoding errors
2572
        raise a UnicodeDecodeError.  Other possible values are 'ignore'
2573
        and 'replace' as well as any other name registered with
2574
        codecs.register_error that can handle UnicodeDecodeErrors.
2575
2576
Decode the bytes using the codec registered for encoding.
2577
[clinic start generated code]*/
2578
2579
static PyObject *
2580
bytes_decode_impl(PyBytesObject *self, const char *encoding,
2581
                  const char *errors)
2582
/*[clinic end generated code: output=5649a53dde27b314 input=94e9b8524f1d7f37]*/
2583
18.9M
{
2584
18.9M
    return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2585
18.9M
}
2586
2587
2588
/*[clinic input]
2589
@permit_long_summary
2590
bytes.splitlines
2591
2592
    keepends: bool = False
2593
2594
Return a list of the lines in the bytes, breaking at line boundaries.
2595
2596
Line breaks are not included in the resulting list unless keepends
2597
is given and true.
2598
[clinic start generated code]*/
2599
2600
static PyObject *
2601
bytes_splitlines_impl(PyBytesObject *self, int keepends)
2602
/*[clinic end generated code: output=3484149a5d880ffb input=8734672f34430514]*/
2603
0
{
2604
0
    return stringlib_splitlines(
2605
0
        (PyObject*) self, PyBytes_AS_STRING(self),
2606
0
        PyBytes_GET_SIZE(self), keepends
2607
0
        );
2608
0
}
2609
2610
/*[clinic input]
2611
@classmethod
2612
bytes.fromhex
2613
2614
    string: object
2615
    /
2616
2617
Create a bytes object from a string of hexadecimal numbers.
2618
2619
Spaces between two numbers are accepted.
2620
Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2621
[clinic start generated code]*/
2622
2623
static PyObject *
2624
bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2625
/*[clinic end generated code: output=0973acc63661bb2e input=f37d98ed51088a21]*/
2626
34.9k
{
2627
34.9k
    PyObject *result = _PyBytes_FromHex(string, 0);
2628
34.9k
    if (type != &PyBytes_Type && result != NULL) {
2629
0
        Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
2630
0
    }
2631
34.9k
    return result;
2632
34.9k
}
2633
2634
PyObject*
2635
_PyBytes_FromHex(PyObject *string, int use_bytearray)
2636
34.9k
{
2637
34.9k
    Py_ssize_t hexlen, invalid_char;
2638
34.9k
    unsigned int top, bot;
2639
34.9k
    const Py_UCS1 *str, *start, *end;
2640
34.9k
    PyBytesWriter *writer = NULL;
2641
34.9k
    Py_buffer view;
2642
34.9k
    view.obj = NULL;
2643
2644
34.9k
    if (PyUnicode_Check(string)) {
2645
34.9k
        hexlen = PyUnicode_GET_LENGTH(string);
2646
2647
34.9k
        if (!PyUnicode_IS_ASCII(string)) {
2648
0
            const void *data = PyUnicode_DATA(string);
2649
0
            int kind = PyUnicode_KIND(string);
2650
0
            Py_ssize_t i;
2651
2652
            /* search for the first non-ASCII character */
2653
0
            for (i = 0; i < hexlen; i++) {
2654
0
                if (PyUnicode_READ(kind, data, i) >= 128)
2655
0
                    break;
2656
0
            }
2657
0
            invalid_char = i;
2658
0
            goto error;
2659
0
        }
2660
2661
34.9k
        assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2662
34.9k
        str = PyUnicode_1BYTE_DATA(string);
2663
34.9k
    }
2664
0
    else if (PyObject_CheckBuffer(string)) {
2665
0
        if (PyObject_GetBuffer(string, &view, PyBUF_SIMPLE) != 0) {
2666
0
            return NULL;
2667
0
        }
2668
0
        hexlen = view.len;
2669
0
        str = view.buf;
2670
0
    }
2671
0
    else {
2672
0
        PyErr_Format(PyExc_TypeError,
2673
0
                     "fromhex() argument must be str or bytes-like, not %T",
2674
0
                     string);
2675
0
        return NULL;
2676
0
    }
2677
2678
    /* This overestimates if there are spaces */
2679
34.9k
    if (use_bytearray) {
2680
0
        writer = _PyBytesWriter_CreateByteArray(hexlen / 2);
2681
0
    }
2682
34.9k
    else {
2683
34.9k
        writer = PyBytesWriter_Create(hexlen / 2);
2684
34.9k
    }
2685
34.9k
    if (writer == NULL) {
2686
0
        goto release_buffer;
2687
0
    }
2688
34.9k
    char *buf = PyBytesWriter_GetData(writer);
2689
2690
34.9k
    start = str;
2691
34.9k
    end = str + hexlen;
2692
69.8k
    while (str < end) {
2693
        /* skip over spaces in the input */
2694
34.9k
        if (Py_ISSPACE(*str)) {
2695
0
            do {
2696
0
                str++;
2697
0
            } while (Py_ISSPACE(*str));
2698
0
            if (str >= end)
2699
0
                break;
2700
0
        }
2701
2702
34.9k
        top = _PyLong_DigitValue[*str];
2703
34.9k
        if (top >= 16) {
2704
0
            invalid_char = str - start;
2705
0
            goto error;
2706
0
        }
2707
34.9k
        str++;
2708
2709
34.9k
        bot = _PyLong_DigitValue[*str];
2710
34.9k
        if (bot >= 16) {
2711
            /* Check if we had a second digit */
2712
0
            if (str >= end){
2713
0
                invalid_char = -1;
2714
0
            } else {
2715
0
                invalid_char = str - start;
2716
0
            }
2717
0
            goto error;
2718
0
        }
2719
34.9k
        str++;
2720
2721
34.9k
        *buf++ = (unsigned char)((top << 4) + bot);
2722
34.9k
    }
2723
2724
34.9k
    if (view.obj != NULL) {
2725
0
       PyBuffer_Release(&view);
2726
0
    }
2727
34.9k
    return PyBytesWriter_FinishWithPointer(writer, buf);
2728
2729
0
  error:
2730
0
    if (invalid_char == -1) {
2731
0
        PyErr_SetString(PyExc_ValueError,
2732
0
                        "fromhex() arg must contain an even number of hexadecimal digits");
2733
0
    } else {
2734
0
        PyErr_Format(PyExc_ValueError,
2735
0
                     "non-hexadecimal number found in "
2736
0
                     "fromhex() arg at position %zd", invalid_char);
2737
0
    }
2738
0
    PyBytesWriter_Discard(writer);
2739
2740
0
  release_buffer:
2741
0
    if (view.obj != NULL) {
2742
0
        PyBuffer_Release(&view);
2743
0
    }
2744
0
    return NULL;
2745
0
}
2746
2747
/*[clinic input]
2748
bytes.hex
2749
2750
    sep: object = NULL
2751
        An optional single character or byte to separate hex bytes.
2752
    bytes_per_sep: Py_ssize_t = 1
2753
        How many bytes between separators.  Positive values count from
2754
        the right, negative values count from the left.
2755
2756
Create a string of hexadecimal numbers from a bytes object.
2757
2758
Example:
2759
>>> value = b'\xb9\x01\xef'
2760
>>> value.hex()
2761
'b901ef'
2762
>>> value.hex(':')
2763
'b9:01:ef'
2764
>>> value.hex(':', 2)
2765
'b9:01ef'
2766
>>> value.hex(':', -2)
2767
'b901:ef'
2768
[clinic start generated code]*/
2769
2770
static PyObject *
2771
bytes_hex_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t bytes_per_sep)
2772
/*[clinic end generated code: output=588821f02cb9d8f5 input=b8d40cf203d172dc]*/
2773
0
{
2774
0
    const char *argbuf = PyBytes_AS_STRING(self);
2775
0
    Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2776
0
    return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2777
0
}
2778
2779
static PyObject *
2780
bytes_getnewargs(PyObject *op, PyObject *Py_UNUSED(dummy))
2781
0
{
2782
0
    PyBytesObject *v = _PyBytes_CAST(op);
2783
0
    return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2784
0
}
2785
2786
2787
static PyMethodDef
2788
bytes_methods[] = {
2789
    {"__getnewargs__", bytes_getnewargs,  METH_NOARGS},
2790
    BYTES___BYTES___METHODDEF
2791
    {"capitalize", stringlib_capitalize, METH_NOARGS,
2792
     _Py_capitalize__doc__},
2793
    STRINGLIB_CENTER_METHODDEF
2794
    BYTES_COUNT_METHODDEF
2795
    BYTES_DECODE_METHODDEF
2796
    BYTES_ENDSWITH_METHODDEF
2797
    STRINGLIB_EXPANDTABS_METHODDEF
2798
    BYTES_FIND_METHODDEF
2799
    BYTES_FROMHEX_METHODDEF
2800
    BYTES_HEX_METHODDEF
2801
    BYTES_INDEX_METHODDEF
2802
    {"isalnum", stringlib_isalnum, METH_NOARGS,
2803
     _Py_isalnum__doc__},
2804
    {"isalpha", stringlib_isalpha, METH_NOARGS,
2805
     _Py_isalpha__doc__},
2806
    {"isascii", stringlib_isascii, METH_NOARGS,
2807
     _Py_isascii__doc__},
2808
    {"isdigit", stringlib_isdigit, METH_NOARGS,
2809
     _Py_isdigit__doc__},
2810
    {"islower", stringlib_islower, METH_NOARGS,
2811
     _Py_islower__doc__},
2812
    {"isspace", stringlib_isspace, METH_NOARGS,
2813
     _Py_isspace__doc__},
2814
    {"istitle", stringlib_istitle, METH_NOARGS,
2815
     _Py_istitle__doc__},
2816
    {"isupper", stringlib_isupper, METH_NOARGS,
2817
     _Py_isupper__doc__},
2818
    BYTES_JOIN_METHODDEF
2819
    STRINGLIB_LJUST_METHODDEF
2820
    {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2821
    BYTES_LSTRIP_METHODDEF
2822
    BYTES_MAKETRANS_METHODDEF
2823
    BYTES_PARTITION_METHODDEF
2824
    BYTES_REPLACE_METHODDEF
2825
    BYTES_REMOVEPREFIX_METHODDEF
2826
    BYTES_REMOVESUFFIX_METHODDEF
2827
    BYTES_RFIND_METHODDEF
2828
    BYTES_RINDEX_METHODDEF
2829
    STRINGLIB_RJUST_METHODDEF
2830
    BYTES_RPARTITION_METHODDEF
2831
    BYTES_RSPLIT_METHODDEF
2832
    BYTES_RSTRIP_METHODDEF
2833
    BYTES_SPLIT_METHODDEF
2834
    BYTES_SPLITLINES_METHODDEF
2835
    BYTES_STARTSWITH_METHODDEF
2836
    BYTES_STRIP_METHODDEF
2837
    {"swapcase", stringlib_swapcase, METH_NOARGS,
2838
     _Py_swapcase__doc__},
2839
    {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2840
    BYTES_TRANSLATE_METHODDEF
2841
    {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2842
    STRINGLIB_ZFILL_METHODDEF
2843
    {NULL,     NULL}                         /* sentinel */
2844
};
2845
2846
static PyObject *
2847
bytes_mod(PyObject *self, PyObject *arg)
2848
0
{
2849
0
    if (!PyBytes_Check(self)) {
2850
0
        Py_RETURN_NOTIMPLEMENTED;
2851
0
    }
2852
0
    return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2853
0
                             arg, 0);
2854
0
}
2855
2856
static PyNumberMethods bytes_as_number = {
2857
    0,              /*nb_add*/
2858
    0,              /*nb_subtract*/
2859
    0,              /*nb_multiply*/
2860
    bytes_mod,      /*nb_remainder*/
2861
};
2862
2863
static PyObject *
2864
bytes_subtype_new(PyTypeObject *, PyObject *);
2865
2866
/*[clinic input]
2867
@classmethod
2868
bytes.__new__ as bytes_new
2869
2870
    source as x: object = NULL
2871
    encoding: str = NULL
2872
    errors: str = NULL
2873
2874
[clinic start generated code]*/
2875
2876
static PyObject *
2877
bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
2878
               const char *errors)
2879
/*[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]*/
2880
17.3M
{
2881
17.3M
    PyObject *bytes;
2882
17.3M
    PyObject *func;
2883
17.3M
    Py_ssize_t size;
2884
2885
17.3M
    if (x == NULL) {
2886
0
        if (encoding != NULL || errors != NULL) {
2887
0
            PyErr_SetString(PyExc_TypeError,
2888
0
                            encoding != NULL ?
2889
0
                            "encoding without a string argument" :
2890
0
                            "errors without a string argument");
2891
0
            return NULL;
2892
0
        }
2893
0
        bytes = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
2894
0
    }
2895
17.3M
    else if (encoding != NULL) {
2896
        /* Encode via the codec registry */
2897
330k
        if (!PyUnicode_Check(x)) {
2898
0
            PyErr_SetString(PyExc_TypeError,
2899
0
                            "encoding without a string argument");
2900
0
            return NULL;
2901
0
        }
2902
330k
        bytes = PyUnicode_AsEncodedString(x, encoding, errors);
2903
330k
    }
2904
17.0M
    else if (errors != NULL) {
2905
0
        PyErr_SetString(PyExc_TypeError,
2906
0
                        PyUnicode_Check(x) ?
2907
0
                        "string argument without an encoding" :
2908
0
                        "errors without a string argument");
2909
0
        return NULL;
2910
0
    }
2911
    /* We'd like to call PyObject_Bytes here, but we need to check for an
2912
       integer argument before deferring to PyBytes_FromObject, something
2913
       PyObject_Bytes doesn't do. */
2914
17.0M
    else if ((func = _PyObject_LookupSpecial(x, &_Py_ID(__bytes__))) != NULL) {
2915
53.1k
        bytes = _PyObject_CallNoArgs(func);
2916
53.1k
        Py_DECREF(func);
2917
53.1k
        if (bytes == NULL)
2918
0
            return NULL;
2919
53.1k
        if (!PyBytes_Check(bytes)) {
2920
0
            PyErr_Format(PyExc_TypeError,
2921
0
                         "%T.__bytes__() must return a bytes, not %T",
2922
0
                         x, bytes);
2923
0
            Py_DECREF(bytes);
2924
0
            return NULL;
2925
0
        }
2926
53.1k
    }
2927
16.9M
    else if (PyErr_Occurred())
2928
0
        return NULL;
2929
16.9M
    else if (PyUnicode_Check(x)) {
2930
0
        PyErr_SetString(PyExc_TypeError,
2931
0
                        "string argument without an encoding");
2932
0
        return NULL;
2933
0
    }
2934
    /* Is it an integer? */
2935
16.9M
    else if (_PyIndex_Check(x)) {
2936
0
        size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2937
0
        if (size == -1 && PyErr_Occurred()) {
2938
0
            if (!PyErr_ExceptionMatches(PyExc_TypeError))
2939
0
                return NULL;
2940
0
            PyErr_Clear();  /* fall through */
2941
0
            bytes = PyBytes_FromObject(x);
2942
0
        }
2943
0
        else {
2944
0
            if (size < 0) {
2945
0
                PyErr_SetString(PyExc_ValueError, "negative count");
2946
0
                return NULL;
2947
0
            }
2948
0
            bytes = _PyBytes_FromSize(size, 1);
2949
0
        }
2950
0
    }
2951
16.9M
    else {
2952
16.9M
        bytes = PyBytes_FromObject(x);
2953
16.9M
    }
2954
2955
17.3M
    if (bytes != NULL && type != &PyBytes_Type) {
2956
0
        Py_SETREF(bytes, bytes_subtype_new(type, bytes));
2957
0
    }
2958
2959
17.3M
    return bytes;
2960
17.3M
}
2961
2962
static PyObject*
2963
_PyBytes_FromBuffer(PyObject *x)
2964
16.9M
{
2965
16.9M
    Py_buffer view;
2966
16.9M
    if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2967
0
        return NULL;
2968
2969
16.9M
    PyBytesWriter *writer = PyBytesWriter_Create(view.len);
2970
16.9M
    if (writer == NULL) {
2971
0
        goto fail;
2972
0
    }
2973
2974
16.9M
    if (PyBuffer_ToContiguous(PyBytesWriter_GetData(writer),
2975
16.9M
                              &view, view.len, 'C') < 0) {
2976
0
        goto fail;
2977
0
    }
2978
2979
16.9M
    PyBuffer_Release(&view);
2980
16.9M
    return PyBytesWriter_Finish(writer);
2981
2982
0
fail:
2983
0
    PyBytesWriter_Discard(writer);
2984
0
    PyBuffer_Release(&view);
2985
0
    return NULL;
2986
16.9M
}
2987
2988
static PyObject*
2989
_PyBytes_FromList(PyObject *x)
2990
13.1k
{
2991
13.1k
    Py_ssize_t size = PyList_GET_SIZE(x);
2992
13.1k
    PyBytesWriter *writer = PyBytesWriter_Create(size);
2993
13.1k
    if (writer == NULL) {
2994
0
        return NULL;
2995
0
    }
2996
13.1k
    char *str = PyBytesWriter_GetData(writer);
2997
13.1k
    size = _PyBytesWriter_GetAllocated(writer);
2998
2999
1.08M
    for (Py_ssize_t i = 0; i < PyList_GET_SIZE(x); i++) {
3000
1.07M
        PyObject *item = _PyList_GetItemRef((PyListObject *)x, i);
3001
1.07M
        if (item == NULL) {
3002
0
            goto error;
3003
0
        }
3004
1.07M
        Py_ssize_t value = PyNumber_AsSsize_t(item, NULL);
3005
1.07M
        Py_DECREF(item);
3006
1.07M
        if (value == -1 && PyErr_Occurred())
3007
0
            goto error;
3008
3009
1.07M
        if (value < 0 || value >= 256) {
3010
0
            PyErr_SetString(PyExc_ValueError,
3011
0
                            "bytes must be in range(0, 256)");
3012
0
            goto error;
3013
0
        }
3014
3015
1.07M
        if (i >= size) {
3016
0
            str = _PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str);
3017
0
            if (str == NULL) {
3018
0
                goto error;
3019
0
            }
3020
0
            size = _PyBytesWriter_GetAllocated(writer);
3021
0
        }
3022
1.07M
        *str++ = (char) value;
3023
1.07M
    }
3024
13.1k
    return PyBytesWriter_FinishWithPointer(writer, str);
3025
3026
0
error:
3027
0
    PyBytesWriter_Discard(writer);
3028
0
    return NULL;
3029
13.1k
}
3030
3031
static PyObject*
3032
_PyBytes_FromTuple(PyObject *x)
3033
0
{
3034
0
    Py_ssize_t i, size = PyTuple_GET_SIZE(x);
3035
0
    Py_ssize_t value;
3036
0
    PyObject *item;
3037
3038
0
    PyBytesWriter *writer = PyBytesWriter_Create(size);
3039
0
    if (writer == NULL) {
3040
0
        return NULL;
3041
0
    }
3042
0
    char *str = PyBytesWriter_GetData(writer);
3043
3044
0
    for (i = 0; i < size; i++) {
3045
0
        item = PyTuple_GET_ITEM(x, i);
3046
0
        value = PyNumber_AsSsize_t(item, NULL);
3047
0
        if (value == -1 && PyErr_Occurred())
3048
0
            goto error;
3049
3050
0
        if (value < 0 || value >= 256) {
3051
0
            PyErr_SetString(PyExc_ValueError,
3052
0
                            "bytes must be in range(0, 256)");
3053
0
            goto error;
3054
0
        }
3055
0
        *str++ = (char) value;
3056
0
    }
3057
0
    return PyBytesWriter_Finish(writer);
3058
3059
0
  error:
3060
0
    PyBytesWriter_Discard(writer);
3061
0
    return NULL;
3062
0
}
3063
3064
static PyObject *
3065
_PyBytes_FromIterator(PyObject *it, PyObject *x)
3066
184
{
3067
184
    Py_ssize_t i, size;
3068
3069
    /* For iterator version, create a bytes object and resize as needed */
3070
184
    size = PyObject_LengthHint(x, 64);
3071
184
    if (size == -1 && PyErr_Occurred())
3072
0
        return NULL;
3073
3074
184
    PyBytesWriter *writer = PyBytesWriter_Create(size);
3075
184
    if (writer == NULL) {
3076
0
        return NULL;
3077
0
    }
3078
184
    char *str = PyBytesWriter_GetData(writer);
3079
184
    size = _PyBytesWriter_GetAllocated(writer);
3080
3081
    /* Run the iterator to exhaustion */
3082
1.41k
    for (i = 0; ; i++) {
3083
1.41k
        PyObject *item;
3084
1.41k
        Py_ssize_t value;
3085
3086
        /* Get the next item */
3087
1.41k
        item = PyIter_Next(it);
3088
1.41k
        if (item == NULL) {
3089
184
            if (PyErr_Occurred())
3090
0
                goto error;
3091
184
            break;
3092
184
        }
3093
3094
        /* Interpret it as an int (__index__) */
3095
1.23k
        value = PyNumber_AsSsize_t(item, NULL);
3096
1.23k
        Py_DECREF(item);
3097
1.23k
        if (value == -1 && PyErr_Occurred())
3098
0
            goto error;
3099
3100
        /* Range check */
3101
1.23k
        if (value < 0 || value >= 256) {
3102
0
            PyErr_SetString(PyExc_ValueError,
3103
0
                            "bytes must be in range(0, 256)");
3104
0
            goto error;
3105
0
        }
3106
3107
        /* Append the byte */
3108
1.23k
        if (i >= size) {
3109
0
            str = _PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str);
3110
0
            if (str == NULL) {
3111
0
                goto error;
3112
0
            }
3113
0
            size = _PyBytesWriter_GetAllocated(writer);
3114
0
        }
3115
1.23k
        *str++ = (char) value;
3116
1.23k
    }
3117
184
    return PyBytesWriter_FinishWithPointer(writer, str);
3118
3119
0
  error:
3120
0
    PyBytesWriter_Discard(writer);
3121
0
    return NULL;
3122
184
}
3123
3124
PyObject *
3125
PyBytes_FromObject(PyObject *x)
3126
16.9M
{
3127
16.9M
    PyObject *it, *result;
3128
3129
16.9M
    if (x == NULL) {
3130
0
        PyErr_BadInternalCall();
3131
0
        return NULL;
3132
0
    }
3133
3134
16.9M
    if (PyBytes_CheckExact(x)) {
3135
0
        return Py_NewRef(x);
3136
0
    }
3137
3138
    /* Use the modern buffer interface */
3139
16.9M
    if (PyObject_CheckBuffer(x))
3140
16.9M
        return _PyBytes_FromBuffer(x);
3141
3142
13.3k
    if (PyList_CheckExact(x))
3143
13.1k
        return _PyBytes_FromList(x);
3144
3145
184
    if (PyTuple_CheckExact(x))
3146
0
        return _PyBytes_FromTuple(x);
3147
3148
184
    if (!PyUnicode_Check(x)) {
3149
184
        it = PyObject_GetIter(x);
3150
184
        if (it != NULL) {
3151
184
            result = _PyBytes_FromIterator(it, x);
3152
184
            Py_DECREF(it);
3153
184
            return result;
3154
184
        }
3155
0
        if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
3156
0
            return NULL;
3157
0
        }
3158
0
    }
3159
3160
0
    PyErr_Format(PyExc_TypeError,
3161
0
                 "cannot convert '%.200s' object to bytes",
3162
0
                 Py_TYPE(x)->tp_name);
3163
0
    return NULL;
3164
184
}
3165
3166
/* This allocator is needed for subclasses don't want to use __new__.
3167
 * See https://github.com/python/cpython/issues/91020#issuecomment-1096793239
3168
 *
3169
 * This allocator will be removed when ob_shash is removed.
3170
 */
3171
static PyObject *
3172
bytes_alloc(PyTypeObject *self, Py_ssize_t nitems)
3173
0
{
3174
0
    PyBytesObject *obj = (PyBytesObject*)PyType_GenericAlloc(self, nitems);
3175
0
    if (obj == NULL) {
3176
0
        return NULL;
3177
0
    }
3178
0
    set_ob_shash(obj, -1);
3179
0
    return (PyObject*)obj;
3180
0
}
3181
3182
static PyObject *
3183
bytes_subtype_new(PyTypeObject *type, PyObject *tmp)
3184
0
{
3185
0
    PyObject *pnew;
3186
0
    Py_ssize_t n;
3187
3188
0
    assert(PyType_IsSubtype(type, &PyBytes_Type));
3189
0
    assert(PyBytes_Check(tmp));
3190
0
    n = PyBytes_GET_SIZE(tmp);
3191
0
    pnew = type->tp_alloc(type, n);
3192
0
    if (pnew != NULL) {
3193
0
        memcpy(PyBytes_AS_STRING(pnew),
3194
0
                  PyBytes_AS_STRING(tmp), n+1);
3195
0
        set_ob_shash((PyBytesObject *)pnew,
3196
0
            get_ob_shash((PyBytesObject *)tmp));
3197
0
    }
3198
0
    return pnew;
3199
0
}
3200
3201
PyDoc_STRVAR(bytes_doc,
3202
"bytes(iterable_of_ints) -> bytes\n\
3203
bytes(string, encoding[, errors]) -> bytes\n\
3204
bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
3205
bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3206
bytes() -> empty bytes object\n\
3207
\n\
3208
Construct an immutable array of bytes from:\n\
3209
  - an iterable yielding integers in range(256)\n\
3210
  - a text string encoded using the specified encoding\n\
3211
  - any object implementing the buffer API.\n\
3212
  - an integer");
3213
3214
static PyObject *bytes_iter(PyObject *seq);
3215
3216
3217
static _PyObjectIndexPair
3218
bytes_iteritem(PyObject *obj, Py_ssize_t index)
3219
2.03k
{
3220
2.03k
    PyBytesObject *a = _PyBytes_CAST(obj);
3221
2.03k
    if (index >= Py_SIZE(a)) {
3222
52
        return (_PyObjectIndexPair) { .object = NULL, .index = index };
3223
52
    }
3224
1.98k
    PyObject *l = _PyLong_FromUnsignedChar((unsigned char)a->ob_sval[index]);
3225
1.98k
    return (_PyObjectIndexPair) { .object = l, .index = index + 1 };
3226
2.03k
}
3227
3228
PyTypeObject PyBytes_Type = {
3229
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3230
    "bytes",
3231
    PyBytesObject_SIZE,
3232
    sizeof(char),
3233
    0,                                          /* tp_dealloc */
3234
    0,                                          /* tp_vectorcall_offset */
3235
    0,                                          /* tp_getattr */
3236
    0,                                          /* tp_setattr */
3237
    0,                                          /* tp_as_async */
3238
    bytes_repr,                                 /* tp_repr */
3239
    &bytes_as_number,                           /* tp_as_number */
3240
    &bytes_as_sequence,                         /* tp_as_sequence */
3241
    &bytes_as_mapping,                          /* tp_as_mapping */
3242
    bytes_hash,                                 /* tp_hash */
3243
    0,                                          /* tp_call */
3244
    bytes_str,                                  /* tp_str */
3245
    PyObject_GenericGetAttr,                    /* tp_getattro */
3246
    0,                                          /* tp_setattro */
3247
    &bytes_as_buffer,                           /* tp_as_buffer */
3248
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3249
        Py_TPFLAGS_BYTES_SUBCLASS |
3250
        _Py_TPFLAGS_MATCH_SELF,               /* tp_flags */
3251
    bytes_doc,                                  /* tp_doc */
3252
    0,                                          /* tp_traverse */
3253
    0,                                          /* tp_clear */
3254
    bytes_richcompare,                          /* tp_richcompare */
3255
    0,                                          /* tp_weaklistoffset */
3256
    bytes_iter,                                 /* tp_iter */
3257
    0,                                          /* tp_iternext */
3258
    bytes_methods,                              /* tp_methods */
3259
    0,                                          /* tp_members */
3260
    0,                                          /* tp_getset */
3261
    0,                                          /* tp_base */
3262
    0,                                          /* tp_dict */
3263
    0,                                          /* tp_descr_get */
3264
    0,                                          /* tp_descr_set */
3265
    0,                                          /* tp_dictoffset */
3266
    0,                                          /* tp_init */
3267
    bytes_alloc,                                /* tp_alloc */
3268
    bytes_new,                                  /* tp_new */
3269
    PyObject_Free,                              /* tp_free */
3270
    .tp_version_tag = _Py_TYPE_VERSION_BYTES,
3271
    ._tp_iteritem = bytes_iteritem,
3272
};
3273
3274
void
3275
PyBytes_Concat(PyObject **pv, PyObject *w)
3276
0
{
3277
0
    assert(pv != NULL);
3278
0
    if (*pv == NULL)
3279
0
        return;
3280
0
    if (w == NULL) {
3281
0
        Py_CLEAR(*pv);
3282
0
        return;
3283
0
    }
3284
3285
0
    if (_PyObject_IsUniquelyReferenced(*pv) && PyBytes_CheckExact(*pv)) {
3286
        /* Only one reference, so we can resize in place */
3287
0
        Py_ssize_t oldsize;
3288
0
        Py_buffer wb;
3289
3290
0
        if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
3291
0
            PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3292
0
                         Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3293
0
            Py_CLEAR(*pv);
3294
0
            return;
3295
0
        }
3296
3297
0
        oldsize = PyBytes_GET_SIZE(*pv);
3298
0
        if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3299
0
            PyErr_NoMemory();
3300
0
            goto error;
3301
0
        }
3302
0
        if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3303
0
            goto error;
3304
3305
0
        memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3306
0
        PyBuffer_Release(&wb);
3307
0
        return;
3308
3309
0
      error:
3310
0
        PyBuffer_Release(&wb);
3311
0
        Py_CLEAR(*pv);
3312
0
        return;
3313
0
    }
3314
3315
0
    else {
3316
        /* Multiple references, need to create new object */
3317
0
        PyObject *v;
3318
0
        v = _PyBytes_Concat(*pv, w);
3319
0
        Py_SETREF(*pv, v);
3320
0
    }
3321
0
}
3322
3323
void
3324
PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
3325
0
{
3326
0
    PyBytes_Concat(pv, w);
3327
0
    Py_XDECREF(w);
3328
0
}
3329
3330
3331
/* The following function breaks the notion that bytes are immutable:
3332
   it changes the size of a bytes object.  You can think of it
3333
   as creating a new bytes object and destroying the old one, only
3334
   more efficiently.
3335
   Note that if there's not enough memory to resize the bytes object, the
3336
   original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
3337
   memory" exception is set, and -1 is returned.  Else (on success) 0 is
3338
   returned, and the value in *pv may or may not be the same as on input.
3339
   As always, an extra byte is allocated for a trailing \0 byte (newsize
3340
   does *not* include that), and a trailing \0 byte is stored.
3341
*/
3342
3343
int
3344
_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3345
24.7M
{
3346
24.7M
    PyObject *v;
3347
24.7M
    PyBytesObject *sv;
3348
24.7M
    v = *pv;
3349
24.7M
    if (!PyBytes_Check(v) || newsize < 0) {
3350
0
        *pv = 0;
3351
0
        Py_DECREF(v);
3352
0
        PyErr_BadInternalCall();
3353
0
        return -1;
3354
0
    }
3355
24.7M
    Py_ssize_t oldsize = PyBytes_GET_SIZE(v);
3356
24.7M
    if (oldsize == newsize) {
3357
        /* return early if newsize equals to v->ob_size */
3358
1.63M
        return 0;
3359
1.63M
    }
3360
23.0M
    if (oldsize == 0) {
3361
19.1M
        *pv = _PyBytes_FromSize(newsize, 0);
3362
19.1M
        Py_DECREF(v);
3363
19.1M
        return (*pv == NULL) ? -1 : 0;
3364
19.1M
    }
3365
3.97M
    if (newsize == 0) {
3366
6.93k
        *pv = bytes_get_empty();
3367
6.93k
        Py_DECREF(v);
3368
6.93k
        return 0;
3369
6.93k
    }
3370
3.96M
    if (!_PyObject_IsUniquelyReferenced(v)) {
3371
0
        if (oldsize < newsize) {
3372
0
            *pv = _PyBytes_FromSize(newsize, 0);
3373
0
            if (*pv) {
3374
0
                memcpy(PyBytes_AS_STRING(*pv), PyBytes_AS_STRING(v), oldsize);
3375
0
            }
3376
0
        }
3377
0
        else {
3378
0
            *pv = PyBytes_FromStringAndSize(PyBytes_AS_STRING(v), newsize);
3379
0
        }
3380
0
        Py_DECREF(v);
3381
0
        return (*pv == NULL) ? -1 : 0;
3382
0
    }
3383
3384
#ifdef Py_TRACE_REFS
3385
    _Py_ForgetReference(v);
3386
#endif
3387
3.96M
    _PyReftracerTrack(v, PyRefTracer_DESTROY);
3388
3.96M
    *pv = (PyObject *)
3389
3.96M
        PyObject_Realloc(v, PyBytesObject_SIZE + newsize);
3390
3.96M
    if (*pv == NULL) {
3391
#ifdef Py_REF_DEBUG
3392
        _Py_DecRefTotal(_PyThreadState_GET());
3393
#endif
3394
0
        PyObject_Free(v);
3395
0
        PyErr_NoMemory();
3396
0
        return -1;
3397
0
    }
3398
3.96M
    _Py_NewReferenceNoTotal(*pv);
3399
3.96M
    sv = (PyBytesObject *) *pv;
3400
3.96M
    Py_SET_SIZE(sv, newsize);
3401
3.96M
    sv->ob_sval[newsize] = '\0';
3402
3.96M
    set_ob_shash(sv, -1);          /* invalidate cached hash value */
3403
3.96M
    return 0;
3404
3.96M
}
3405
3406
3407
/*********************** Bytes Iterator ****************************/
3408
3409
typedef struct {
3410
    PyObject_HEAD
3411
    Py_ssize_t it_index;
3412
    PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3413
} striterobject;
3414
3415
581
#define _striterobject_CAST(op)  ((striterobject *)(op))
3416
3417
static void
3418
striter_dealloc(PyObject *op)
3419
45
{
3420
45
    striterobject *it = _striterobject_CAST(op);
3421
45
    _PyObject_GC_UNTRACK(it);
3422
45
    Py_XDECREF(it->it_seq);
3423
45
    PyObject_GC_Del(it);
3424
45
}
3425
3426
static int
3427
striter_traverse(PyObject *op, visitproc visit, void *arg)
3428
0
{
3429
0
    striterobject *it = _striterobject_CAST(op);
3430
0
    Py_VISIT(it->it_seq);
3431
0
    return 0;
3432
0
}
3433
3434
static PyObject *
3435
striter_next(PyObject *op)
3436
536
{
3437
536
    striterobject *it = _striterobject_CAST(op);
3438
536
    PyBytesObject *seq;
3439
3440
536
    assert(it != NULL);
3441
536
    seq = it->it_seq;
3442
536
    if (seq == NULL)
3443
0
        return NULL;
3444
536
    assert(PyBytes_Check(seq));
3445
3446
536
    if (it->it_index < PyBytes_GET_SIZE(seq)) {
3447
528
        return _PyLong_FromUnsignedChar(
3448
528
            (unsigned char)seq->ob_sval[it->it_index++]);
3449
528
    }
3450
3451
8
    it->it_seq = NULL;
3452
8
    Py_DECREF(seq);
3453
8
    return NULL;
3454
536
}
3455
3456
static PyObject *
3457
striter_len(PyObject *op, PyObject *Py_UNUSED(ignored))
3458
0
{
3459
0
    striterobject *it = _striterobject_CAST(op);
3460
0
    Py_ssize_t len = 0;
3461
0
    if (it->it_seq)
3462
0
        len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3463
0
    return PyLong_FromSsize_t(len);
3464
0
}
3465
3466
PyDoc_STRVAR(length_hint_doc,
3467
             "Private method returning an estimate of len(list(it)).");
3468
3469
static PyObject *
3470
striter_reduce(PyObject *op, PyObject *Py_UNUSED(ignored))
3471
0
{
3472
0
    PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));
3473
3474
    /* _PyEval_GetBuiltin can invoke arbitrary code,
3475
     * call must be before access of iterator pointers.
3476
     * see issue #101765 */
3477
0
    striterobject *it = _striterobject_CAST(op);
3478
0
    if (it->it_seq != NULL) {
3479
0
        return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
3480
0
    } else {
3481
0
        return Py_BuildValue("N(())", iter);
3482
0
    }
3483
0
}
3484
3485
PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3486
3487
static PyObject *
3488
striter_setstate(PyObject *op, PyObject *state)
3489
0
{
3490
0
    Py_ssize_t index = PyLong_AsSsize_t(state);
3491
0
    if (index == -1 && PyErr_Occurred())
3492
0
        return NULL;
3493
0
    striterobject *it = _striterobject_CAST(op);
3494
0
    if (it->it_seq != NULL) {
3495
0
        if (index < 0)
3496
0
            index = 0;
3497
0
        else if (index > PyBytes_GET_SIZE(it->it_seq))
3498
0
            index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3499
0
        it->it_index = index;
3500
0
    }
3501
0
    Py_RETURN_NONE;
3502
0
}
3503
3504
PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3505
3506
static PyMethodDef striter_methods[] = {
3507
    {"__length_hint__", striter_len, METH_NOARGS, length_hint_doc},
3508
    {"__reduce__",      striter_reduce, METH_NOARGS, reduce_doc},
3509
    {"__setstate__",    striter_setstate, METH_O, setstate_doc},
3510
    {NULL,              NULL}           /* sentinel */
3511
};
3512
3513
PyTypeObject PyBytesIter_Type = {
3514
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3515
    "bytes_iterator",                           /* tp_name */
3516
    sizeof(striterobject),                      /* tp_basicsize */
3517
    0,                                          /* tp_itemsize */
3518
    /* methods */
3519
    striter_dealloc,                            /* tp_dealloc */
3520
    0,                                          /* tp_vectorcall_offset */
3521
    0,                                          /* tp_getattr */
3522
    0,                                          /* tp_setattr */
3523
    0,                                          /* tp_as_async */
3524
    0,                                          /* tp_repr */
3525
    0,                                          /* tp_as_number */
3526
    0,                                          /* tp_as_sequence */
3527
    0,                                          /* tp_as_mapping */
3528
    0,                                          /* tp_hash */
3529
    0,                                          /* tp_call */
3530
    0,                                          /* tp_str */
3531
    PyObject_GenericGetAttr,                    /* tp_getattro */
3532
    0,                                          /* tp_setattro */
3533
    0,                                          /* tp_as_buffer */
3534
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3535
    0,                                          /* tp_doc */
3536
    striter_traverse,                           /* tp_traverse */
3537
    0,                                          /* tp_clear */
3538
    0,                                          /* tp_richcompare */
3539
    0,                                          /* tp_weaklistoffset */
3540
    PyObject_SelfIter,                          /* tp_iter */
3541
    striter_next,                               /* tp_iternext */
3542
    striter_methods,                            /* tp_methods */
3543
    0,
3544
};
3545
3546
static PyObject *
3547
bytes_iter(PyObject *seq)
3548
45
{
3549
45
    striterobject *it;
3550
3551
45
    if (!PyBytes_Check(seq)) {
3552
0
        PyErr_BadInternalCall();
3553
0
        return NULL;
3554
0
    }
3555
45
    it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3556
45
    if (it == NULL)
3557
0
        return NULL;
3558
45
    it->it_index = 0;
3559
45
    it->it_seq = (PyBytesObject *)Py_NewRef(seq);
3560
45
    _PyObject_GC_TRACK(it);
3561
45
    return (PyObject *)it;
3562
45
}
3563
3564
3565
void
3566
_PyBytes_RepeatBuffer(char* dest, Py_ssize_t len_dest,
3567
    const char* src, Py_ssize_t len_src)
3568
188k
{
3569
188k
    if (len_dest == 0) {
3570
779
        return;
3571
779
    }
3572
187k
    if (len_src == 1) {
3573
185k
        memset(dest, src[0], len_dest);
3574
185k
    }
3575
2.33k
    else {
3576
2.33k
        if (src != dest) {
3577
2.33k
            memcpy(dest, src, len_src);
3578
2.33k
        }
3579
2.33k
        Py_ssize_t copied = len_src;
3580
5.62k
        while (copied < len_dest) {
3581
3.28k
            Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied);
3582
3.28k
            memcpy(dest + copied, dest, bytes_to_copy);
3583
3.28k
            copied += bytes_to_copy;
3584
3.28k
        }
3585
2.33k
    }
3586
187k
}
3587
3588
3589
// --- PyBytesWriter API -----------------------------------------------------
3590
3591
static inline char*
3592
byteswriter_data(PyBytesWriter *writer)
3593
36.9M
{
3594
36.9M
    return _PyBytesWriter_GetData(writer);
3595
36.9M
}
3596
3597
3598
static inline Py_ssize_t
3599
byteswriter_allocated(PyBytesWriter *writer)
3600
36.7M
{
3601
36.7M
    if (writer->obj == NULL) {
3602
36.1M
        return sizeof(writer->small_buffer);
3603
36.1M
    }
3604
629k
    else if (writer->use_bytearray) {
3605
0
        return PyByteArray_GET_SIZE(writer->obj);
3606
0
    }
3607
629k
    else {
3608
629k
        return PyBytes_GET_SIZE(writer->obj);
3609
629k
    }
3610
36.7M
}
3611
3612
3613
#ifdef MS_WINDOWS
3614
   /* On Windows, overallocate by 50% is the best factor */
3615
#  define OVERALLOCATE_FACTOR 2
3616
#else
3617
   /* On Linux, overallocate by 25% is the best factor */
3618
41.6k
#  define OVERALLOCATE_FACTOR 4
3619
#endif
3620
3621
static inline int
3622
byteswriter_resize(PyBytesWriter *writer, Py_ssize_t size, int resize)
3623
27.5M
{
3624
27.5M
    assert(size >= 0);
3625
3626
27.5M
    Py_ssize_t old_allocated = byteswriter_allocated(writer);
3627
27.5M
    if (size <= old_allocated) {
3628
26.7M
        return 0;
3629
26.7M
    }
3630
3631
826k
    if (resize & writer->overallocate) {
3632
20.8k
        if (size <= (PY_SSIZE_T_MAX - size / OVERALLOCATE_FACTOR)) {
3633
20.8k
            size += size / OVERALLOCATE_FACTOR;
3634
20.8k
        }
3635
20.8k
    }
3636
3637
826k
    if (writer->obj != NULL) {
3638
20.8k
        if (writer->use_bytearray) {
3639
0
            if (PyByteArray_Resize(writer->obj, size)) {
3640
0
                return -1;
3641
0
            }
3642
0
        }
3643
20.8k
        else {
3644
20.8k
            if (_PyBytes_Resize(&writer->obj, size)) {
3645
0
                return -1;
3646
0
            }
3647
20.8k
        }
3648
20.8k
        assert(writer->obj != NULL);
3649
20.8k
    }
3650
806k
    else if (writer->use_bytearray) {
3651
0
        writer->obj = PyByteArray_FromStringAndSize(NULL, size);
3652
0
        if (writer->obj == NULL) {
3653
0
            return -1;
3654
0
        }
3655
0
        if (resize) {
3656
0
            assert((size_t)size > sizeof(writer->small_buffer));
3657
0
            memcpy(PyByteArray_AS_STRING(writer->obj),
3658
0
                   writer->small_buffer,
3659
0
                   sizeof(writer->small_buffer));
3660
0
        }
3661
0
    }
3662
806k
    else {
3663
806k
        writer->obj = PyBytes_FromStringAndSize(NULL, size);
3664
806k
        if (writer->obj == NULL) {
3665
0
            return -1;
3666
0
        }
3667
806k
        if (resize) {
3668
0
            assert((size_t)size > sizeof(writer->small_buffer));
3669
0
            memcpy(PyBytes_AS_STRING(writer->obj),
3670
0
                   writer->small_buffer,
3671
0
                   sizeof(writer->small_buffer));
3672
0
        }
3673
806k
    }
3674
3675
#ifdef Py_DEBUG
3676
    Py_ssize_t allocated = byteswriter_allocated(writer);
3677
    if (resize && allocated > old_allocated) {
3678
        memset(byteswriter_data(writer) + old_allocated, 0xff,
3679
               allocated - old_allocated);
3680
    }
3681
#endif
3682
3683
826k
    return 0;
3684
826k
}
3685
3686
3687
static PyBytesWriter*
3688
byteswriter_create(Py_ssize_t size, int use_bytearray)
3689
27.5M
{
3690
27.5M
    if (size < 0) {
3691
0
        PyErr_SetString(PyExc_ValueError, "size must be >= 0");
3692
0
        return NULL;
3693
0
    }
3694
3695
27.5M
    PyBytesWriter *writer = _Py_FREELIST_POP_MEM(bytes_writers);
3696
27.5M
    if (writer == NULL) {
3697
12.5k
        writer = (PyBytesWriter *)PyMem_Malloc(sizeof(PyBytesWriter));
3698
12.5k
        if (writer == NULL) {
3699
0
            PyErr_NoMemory();
3700
0
            return NULL;
3701
0
        }
3702
12.5k
    }
3703
27.5M
    writer->obj = NULL;
3704
27.5M
    writer->size = 0;
3705
27.5M
    writer->use_bytearray = use_bytearray;
3706
27.5M
    writer->overallocate = !use_bytearray;
3707
3708
27.5M
    if (size >= 1) {
3709
27.5M
        if (byteswriter_resize(writer, size, 0) < 0) {
3710
0
            PyBytesWriter_Discard(writer);
3711
0
            return NULL;
3712
0
        }
3713
27.5M
        writer->size = size;
3714
27.5M
    }
3715
#ifdef Py_DEBUG
3716
    memset(byteswriter_data(writer), 0xff, byteswriter_allocated(writer));
3717
#endif
3718
27.5M
    return writer;
3719
27.5M
}
3720
3721
PyBytesWriter*
3722
PyBytesWriter_Create(Py_ssize_t size)
3723
27.5M
{
3724
27.5M
    return byteswriter_create(size, 0);
3725
27.5M
}
3726
3727
PyBytesWriter*
3728
_PyBytesWriter_CreateByteArray(Py_ssize_t size)
3729
0
{
3730
0
    return byteswriter_create(size, 1);
3731
0
}
3732
3733
3734
void
3735
PyBytesWriter_Discard(PyBytesWriter *writer)
3736
27.7M
{
3737
27.7M
    if (writer == NULL) {
3738
170k
        return;
3739
170k
    }
3740
3741
27.5M
    Py_XDECREF(writer->obj);
3742
27.5M
    _Py_FREELIST_FREE(bytes_writers, writer, PyMem_Free);
3743
27.5M
}
3744
3745
3746
PyObject*
3747
PyBytesWriter_FinishWithSize(PyBytesWriter *writer, Py_ssize_t size)
3748
26.5M
{
3749
26.5M
    PyObject *result;
3750
26.5M
    if (size == 0) {
3751
47.4k
        result = bytes_get_empty();
3752
47.4k
    }
3753
26.5M
    else if (writer->obj != NULL) {
3754
693k
        if (writer->use_bytearray) {
3755
0
            if (size != PyByteArray_GET_SIZE(writer->obj)) {
3756
0
                if (PyByteArray_Resize(writer->obj, size)) {
3757
0
                    goto error;
3758
0
                }
3759
0
            }
3760
0
        }
3761
693k
        else {
3762
693k
            if (size != PyBytes_GET_SIZE(writer->obj)) {
3763
651k
                if (_PyBytes_Resize(&writer->obj, size)) {
3764
0
                    goto error;
3765
0
                }
3766
651k
            }
3767
693k
        }
3768
693k
        result = writer->obj;
3769
693k
        writer->obj = NULL;
3770
693k
    }
3771
25.8M
    else if (writer->use_bytearray) {
3772
0
        result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3773
0
    }
3774
25.8M
    else {
3775
25.8M
        result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3776
25.8M
    }
3777
26.5M
    PyBytesWriter_Discard(writer);
3778
26.5M
    return result;
3779
3780
0
error:
3781
0
    PyBytesWriter_Discard(writer);
3782
0
    return NULL;
3783
26.5M
}
3784
3785
PyObject*
3786
PyBytesWriter_Finish(PyBytesWriter *writer)
3787
17.2M
{
3788
17.2M
    return PyBytesWriter_FinishWithSize(writer, writer->size);
3789
17.2M
}
3790
3791
3792
PyObject*
3793
PyBytesWriter_FinishWithPointer(PyBytesWriter *writer, void *buf)
3794
9.15M
{
3795
9.15M
    Py_ssize_t size = (char*)buf - byteswriter_data(writer);
3796
9.15M
    if (size < 0 || size > byteswriter_allocated(writer)) {
3797
0
        PyBytesWriter_Discard(writer);
3798
0
        PyErr_SetString(PyExc_ValueError, "invalid end pointer");
3799
0
        return NULL;
3800
0
    }
3801
3802
9.15M
    return PyBytesWriter_FinishWithSize(writer, size);
3803
9.15M
}
3804
3805
3806
void*
3807
PyBytesWriter_GetData(PyBytesWriter *writer)
3808
27.8M
{
3809
27.8M
    return byteswriter_data(writer);
3810
27.8M
}
3811
3812
3813
Py_ssize_t
3814
PyBytesWriter_GetSize(PyBytesWriter *writer)
3815
0
{
3816
0
    return _PyBytesWriter_GetSize(writer);
3817
0
}
3818
3819
3820
static Py_ssize_t
3821
_PyBytesWriter_GetAllocated(PyBytesWriter *writer)
3822
13.3k
{
3823
13.3k
    return byteswriter_allocated(writer);
3824
13.3k
}
3825
3826
3827
int
3828
PyBytesWriter_Resize(PyBytesWriter *writer, Py_ssize_t size)
3829
0
{
3830
0
    if (size < 0) {
3831
0
        PyErr_SetString(PyExc_ValueError, "size must be >= 0");
3832
0
        return -1;
3833
0
    }
3834
0
    if (byteswriter_resize(writer, size, 1) < 0) {
3835
0
        return -1;
3836
0
    }
3837
0
    writer->size = size;
3838
0
    return 0;
3839
0
}
3840
3841
3842
static void*
3843
_PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size,
3844
                                      void *data)
3845
0
{
3846
0
    Py_ssize_t pos = (char*)data - byteswriter_data(writer);
3847
0
    if (PyBytesWriter_Resize(writer, size) < 0) {
3848
0
        return NULL;
3849
0
    }
3850
0
    return byteswriter_data(writer) + pos;
3851
0
}
3852
3853
3854
int
3855
PyBytesWriter_Grow(PyBytesWriter *writer, Py_ssize_t size)
3856
20.8k
{
3857
20.8k
    if (size < 0 && writer->size + size < 0) {
3858
0
        PyErr_SetString(PyExc_ValueError, "invalid size");
3859
0
        return -1;
3860
0
    }
3861
20.8k
    if (size > PY_SSIZE_T_MAX - writer->size) {
3862
0
        PyErr_NoMemory();
3863
0
        return -1;
3864
0
    }
3865
20.8k
    size = writer->size + size;
3866
3867
20.8k
    if (byteswriter_resize(writer, size, 1) < 0) {
3868
0
        return -1;
3869
0
    }
3870
20.8k
    writer->size = size;
3871
20.8k
    return 0;
3872
20.8k
}
3873
3874
3875
void*
3876
PyBytesWriter_GrowAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size,
3877
                                   void *buf)
3878
0
{
3879
0
    Py_ssize_t pos = (char*)buf - byteswriter_data(writer);
3880
0
    if (PyBytesWriter_Grow(writer, size) < 0) {
3881
0
        return NULL;
3882
0
    }
3883
0
    return byteswriter_data(writer) + pos;
3884
0
}
3885
3886
3887
int
3888
PyBytesWriter_WriteBytes(PyBytesWriter *writer,
3889
                         const void *bytes, Py_ssize_t size)
3890
0
{
3891
0
    if (size < 0) {
3892
0
        size_t len = strlen(bytes);
3893
0
        if (len > (size_t)PY_SSIZE_T_MAX) {
3894
0
            PyErr_NoMemory();
3895
0
            return -1;
3896
0
        }
3897
0
        size = (Py_ssize_t)len;
3898
0
    }
3899
3900
0
    Py_ssize_t pos = writer->size;
3901
0
    if (PyBytesWriter_Grow(writer, size) < 0) {
3902
0
        return -1;
3903
0
    }
3904
0
    char *buf = byteswriter_data(writer);
3905
0
    memcpy(buf + pos, bytes, size);
3906
0
    return 0;
3907
0
}
3908
3909
3910
int
3911
PyBytesWriter_Format(PyBytesWriter *writer, const char *format, ...)
3912
0
{
3913
0
    Py_ssize_t pos = writer->size;
3914
0
    if (PyBytesWriter_Grow(writer, strlen(format)) < 0) {
3915
0
        return -1;
3916
0
    }
3917
3918
0
    va_list vargs;
3919
0
    va_start(vargs, format);
3920
0
    char *buf = bytes_fromformat(writer, pos, format, vargs);
3921
0
    va_end(vargs);
3922
3923
0
    Py_ssize_t size = buf - byteswriter_data(writer);
3924
0
    return PyBytesWriter_Resize(writer, size);
3925
0
}