Coverage Report

Created: 2026-05-16 06:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/bytesobject.c
Line
Count
Source
1
/* bytes object implementation */
2
3
#include "Python.h"
4
#include "pycore_abstract.h"      // _PyIndex_Check()
5
#include "pycore_bytes_methods.h" // _Py_bytes_startswith()
6
#include "pycore_bytesobject.h"   // _PyBytes_Find(), _PyBytes_RepeatBuffer()
7
#include "pycore_call.h"          // _PyObject_CallNoArgs()
8
#include "pycore_ceval.h"         // _PyEval_GetBuiltin()
9
#include "pycore_format.h"        // F_LJUST
10
#include "pycore_freelist.h"      // _Py_FREELIST_FREE()
11
#include "pycore_global_objects.h"// _Py_GET_GLOBAL_OBJECT()
12
#include "pycore_initconfig.h"    // _PyStatus_OK()
13
#include "pycore_long.h"          // _PyLong_DigitValue
14
#include "pycore_object.h"        // _PyObject_GC_TRACK
15
#include "pycore_pymem.h"         // PYMEM_CLEANBYTE
16
#include "pycore_strhex.h"        // _Py_strhex_with_sep()
17
#include "pycore_unicodeobject.h" // _PyUnicode_FormatLong()
18
19
#include <stddef.h>
20
21
/*[clinic input]
22
class bytes "PyBytesObject *" "&PyBytes_Type"
23
[clinic start generated code]*/
24
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
25
26
#include "clinic/bytesobject.c.h"
27
28
289M
#define PyBytesObject_SIZE _PyBytesObject_SIZE
29
30
/* Forward declaration */
31
static void* _PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer,
32
                                                   Py_ssize_t size, void *data);
33
static Py_ssize_t _PyBytesWriter_GetAllocated(PyBytesWriter *writer);
34
35
36
33.1M
#define CHARACTERS _Py_SINGLETON(bytes_characters)
37
#define CHARACTER(ch) \
38
33.1M
     ((PyBytesObject *)&(CHARACTERS[ch]));
39
7.65M
#define EMPTY (&_Py_SINGLETON(bytes_empty))
40
41
42
// Return a reference to the immortal empty bytes string singleton.
43
static inline PyObject* bytes_get_empty(void)
44
7.65M
{
45
7.65M
    PyObject *empty = &EMPTY->ob_base.ob_base;
46
7.65M
    assert(_Py_IsImmortal(empty));
47
7.65M
    return empty;
48
7.65M
}
49
50
51
static inline void
52
set_ob_shash(PyBytesObject *a, Py_hash_t hash)
53
192M
{
54
192M
_Py_COMP_DIAG_PUSH
55
192M
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
56
#ifdef Py_GIL_DISABLED
57
    _Py_atomic_store_ssize_relaxed(&a->ob_shash, hash);
58
#else
59
192M
    a->ob_shash = hash;
60
192M
#endif
61
192M
_Py_COMP_DIAG_POP
62
192M
}
63
64
static inline Py_hash_t
65
get_ob_shash(PyBytesObject *a)
66
77.5M
{
67
77.5M
_Py_COMP_DIAG_PUSH
68
77.5M
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
69
#ifdef Py_GIL_DISABLED
70
    return _Py_atomic_load_ssize_relaxed(&a->ob_shash);
71
#else
72
77.5M
    return a->ob_shash;
73
77.5M
#endif
74
77.5M
_Py_COMP_DIAG_POP
75
77.5M
}
76
77
78
/*
79
   For PyBytes_FromString(), the parameter 'str' points to a null-terminated
80
   string containing exactly 'size' bytes.
81
82
   For PyBytes_FromStringAndSize(), the parameter 'str' is
83
   either NULL or else points to a string containing at least 'size' bytes.
84
   For PyBytes_FromStringAndSize(), the string in the 'str' parameter does
85
   not have to be null-terminated.  (Therefore it is safe to construct a
86
   substring by calling 'PyBytes_FromStringAndSize(origstring, substrlen)'.)
87
   If 'str' is NULL then PyBytes_FromStringAndSize() will allocate 'size+1'
88
   bytes (setting the last byte to the null terminating character) and you can
89
   fill in the data yourself.  If 'str' is non-NULL then the resulting
90
   PyBytes object must be treated as immutable and you must not fill in nor
91
   alter the data yourself, since the strings may be shared.
92
93
   The PyObject member 'op->ob_size', which denotes the number of "extra
94
   items" in a variable-size object, will contain the number of bytes
95
   allocated for string data, not counting the null terminating character.
96
   It is therefore equal to the 'size' parameter (for
97
   PyBytes_FromStringAndSize()) or the length of the string in the 'str'
98
   parameter (for PyBytes_FromString()).
99
*/
100
static PyObject *
101
_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
102
142M
{
103
142M
    PyBytesObject *op;
104
142M
    assert(size >= 0);
105
106
142M
    if (size == 0) {
107
0
        return bytes_get_empty();
108
0
    }
109
110
142M
    if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
111
0
        PyErr_SetString(PyExc_OverflowError,
112
0
                        "byte string is too large");
113
0
        return NULL;
114
0
    }
115
116
    /* Inline PyObject_NewVar */
117
142M
    if (use_calloc)
118
0
        op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
119
142M
    else
120
142M
        op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
121
142M
    if (op == NULL) {
122
0
        return PyErr_NoMemory();
123
0
    }
124
142M
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
125
142M
    set_ob_shash(op, -1);
126
142M
    if (!use_calloc) {
127
142M
        op->ob_sval[size] = '\0';
128
142M
    }
129
142M
    return (PyObject *) op;
130
142M
}
131
132
PyObject *
133
PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
134
165M
{
135
165M
    PyBytesObject *op;
136
165M
    if (size < 0) {
137
0
        PyErr_SetString(PyExc_SystemError,
138
0
            "Negative size passed to PyBytes_FromStringAndSize");
139
0
        return NULL;
140
0
    }
141
165M
    if (size == 1 && str != NULL) {
142
33.1M
        op = CHARACTER(*str & 255);
143
33.1M
        assert(_Py_IsImmortal(op));
144
33.1M
        return (PyObject *)op;
145
33.1M
    }
146
131M
    if (size == 0) {
147
7.59M
        return bytes_get_empty();
148
7.59M
    }
149
150
124M
    op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
151
124M
    if (op == NULL)
152
0
        return NULL;
153
124M
    if (str == NULL)
154
9.98M
        return (PyObject *) op;
155
156
114M
    memcpy(op->ob_sval, str, size);
157
114M
    return (PyObject *) op;
158
124M
}
159
160
PyObject *
161
PyBytes_FromString(const char *str)
162
90
{
163
90
    size_t size;
164
90
    PyBytesObject *op;
165
166
90
    assert(str != NULL);
167
90
    size = strlen(str);
168
90
    if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
169
0
        PyErr_SetString(PyExc_OverflowError,
170
0
            "byte string is too long");
171
0
        return NULL;
172
0
    }
173
174
90
    if (size == 0) {
175
0
        return bytes_get_empty();
176
0
    }
177
90
    else if (size == 1) {
178
0
        op = CHARACTER(*str & 255);
179
0
        assert(_Py_IsImmortal(op));
180
0
        return (PyObject *)op;
181
0
    }
182
183
    /* Inline PyObject_NewVar */
184
90
    op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
185
90
    if (op == NULL) {
186
0
        return PyErr_NoMemory();
187
0
    }
188
90
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
189
90
    set_ob_shash(op, -1);
190
90
    memcpy(op->ob_sval, str, size+1);
191
90
    return (PyObject *) op;
192
90
}
193
194
195
static char*
196
bytes_fromformat(PyBytesWriter *writer, Py_ssize_t writer_pos,
197
                 const char *format, va_list vargs)
198
0
{
199
0
    const char *f;
200
0
    const char *p;
201
0
    Py_ssize_t prec;
202
0
    int longflag;
203
0
    int size_tflag;
204
    /* Longest 64-bit formatted numbers:
205
       - "18446744073709551615\0" (21 bytes)
206
       - "-9223372036854775808\0" (21 bytes)
207
       Decimal takes the most space (it isn't enough for octal.)
208
209
       Longest 64-bit pointer representation:
210
       "0xffffffffffffffff\0" (19 bytes). */
211
0
    char buffer[21];
212
213
0
    char *s = (char*)PyBytesWriter_GetData(writer) + writer_pos;
214
215
0
#define WRITE_BYTES_LEN(str, len_expr) \
216
0
    do { \
217
0
        size_t len = (len_expr); \
218
0
        s = PyBytesWriter_GrowAndUpdatePointer(writer, len, s); \
219
0
        if (s == NULL) { \
220
0
            goto error; \
221
0
        } \
222
0
        memcpy(s, (str), len); \
223
0
        s += len; \
224
0
    } while (0)
225
0
#define WRITE_BYTES(str) WRITE_BYTES_LEN(str, strlen(str))
226
227
0
    for (f = format; *f; f++) {
228
0
        if (*f != '%') {
229
0
            *s++ = *f;
230
0
            continue;
231
0
        }
232
233
0
        p = f++;
234
235
        /* ignore the width (ex: 10 in "%10s") */
236
0
        while (Py_ISDIGIT(*f))
237
0
            f++;
238
239
        /* parse the precision (ex: 10 in "%.10s") */
240
0
        prec = 0;
241
0
        if (*f == '.') {
242
0
            f++;
243
0
            for (; Py_ISDIGIT(*f); f++) {
244
0
                prec = (prec * 10) + (*f - '0');
245
0
            }
246
0
        }
247
248
0
        while (*f && *f != '%' && !Py_ISALPHA(*f))
249
0
            f++;
250
251
        /* handle the long flag ('l'), but only for %ld and %lu.
252
           others can be added when necessary. */
253
0
        longflag = 0;
254
0
        if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
255
0
            longflag = 1;
256
0
            ++f;
257
0
        }
258
259
        /* handle the size_t flag ('z'). */
260
0
        size_tflag = 0;
261
0
        if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
262
0
            size_tflag = 1;
263
0
            ++f;
264
0
        }
265
266
0
        switch (*f) {
267
0
        case 'c':
268
0
        {
269
0
            int c = va_arg(vargs, int);
270
0
            if (c < 0 || c > 255) {
271
0
                PyErr_SetString(PyExc_OverflowError,
272
0
                                "PyBytes_FromFormatV(): %c format "
273
0
                                "expects an integer in range [0; 255]");
274
0
                goto error;
275
0
            }
276
0
            *s++ = (unsigned char)c;
277
0
            break;
278
0
        }
279
280
0
        case 'd':
281
0
            if (longflag) {
282
0
                sprintf(buffer, "%ld", va_arg(vargs, long));
283
0
            }
284
0
            else if (size_tflag) {
285
0
                sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
286
0
            }
287
0
            else {
288
0
                sprintf(buffer, "%d", va_arg(vargs, int));
289
0
            }
290
0
            assert(strlen(buffer) < sizeof(buffer));
291
0
            WRITE_BYTES(buffer);
292
0
            break;
293
294
0
        case 'u':
295
0
            if (longflag) {
296
0
                sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
297
0
            }
298
0
            else if (size_tflag) {
299
0
                sprintf(buffer, "%zu", va_arg(vargs, size_t));
300
0
            }
301
0
            else {
302
0
                sprintf(buffer, "%u", va_arg(vargs, unsigned int));
303
0
            }
304
0
            assert(strlen(buffer) < sizeof(buffer));
305
0
            WRITE_BYTES(buffer);
306
0
            break;
307
308
0
        case 'i':
309
0
            sprintf(buffer, "%i", va_arg(vargs, int));
310
0
            assert(strlen(buffer) < sizeof(buffer));
311
0
            WRITE_BYTES(buffer);
312
0
            break;
313
314
0
        case 'x':
315
0
            sprintf(buffer, "%x", va_arg(vargs, int));
316
0
            assert(strlen(buffer) < sizeof(buffer));
317
0
            WRITE_BYTES(buffer);
318
0
            break;
319
320
0
        case 's':
321
0
        {
322
0
            Py_ssize_t i;
323
324
0
            p = va_arg(vargs, const char*);
325
0
            if (prec <= 0) {
326
0
                i = strlen(p);
327
0
            }
328
0
            else {
329
0
                i = 0;
330
0
                while (i < prec && p[i]) {
331
0
                    i++;
332
0
                }
333
0
            }
334
0
            WRITE_BYTES_LEN(p, i);
335
0
            break;
336
0
        }
337
338
0
        case 'p':
339
0
            sprintf(buffer, "%p", va_arg(vargs, void*));
340
0
            assert(strlen(buffer) < sizeof(buffer));
341
            /* %p is ill-defined:  ensure leading 0x. */
342
0
            if (buffer[1] == 'X')
343
0
                buffer[1] = 'x';
344
0
            else if (buffer[1] != 'x') {
345
0
                memmove(buffer+2, buffer, strlen(buffer)+1);
346
0
                buffer[0] = '0';
347
0
                buffer[1] = 'x';
348
0
            }
349
0
            WRITE_BYTES(buffer);
350
0
            break;
351
352
0
        case '%':
353
0
            *s++ = '%';
354
0
            break;
355
356
0
        default:
357
            /* invalid format string: copy unformatted string and exit */
358
0
            WRITE_BYTES(p);
359
0
            return s;
360
0
        }
361
0
    }
362
363
0
#undef WRITE_BYTES
364
0
#undef WRITE_BYTES_LEN
365
366
0
    return s;
367
368
0
 error:
369
0
    return NULL;
370
0
}
371
372
373
PyObject *
374
PyBytes_FromFormatV(const char *format, va_list vargs)
375
0
{
376
0
    Py_ssize_t alloc = strlen(format);
377
0
    PyBytesWriter *writer = PyBytesWriter_Create(alloc);
378
0
    if (writer == NULL) {
379
0
        return NULL;
380
0
    }
381
382
0
    char *s = bytes_fromformat(writer, 0, format, vargs);
383
0
    if (s == NULL) {
384
0
        PyBytesWriter_Discard(writer);
385
0
        return NULL;
386
0
    }
387
388
0
    return PyBytesWriter_FinishWithPointer(writer, s);
389
0
}
390
391
392
PyObject *
393
PyBytes_FromFormat(const char *format, ...)
394
0
{
395
0
    PyObject* ret;
396
0
    va_list vargs;
397
398
0
    va_start(vargs, format);
399
0
    ret = PyBytes_FromFormatV(format, vargs);
400
0
    va_end(vargs);
401
0
    return ret;
402
0
}
403
404
405
/* Helpers for formatstring */
406
407
0
#define FORMAT_ERROR(EXC, FMT, ...) do {                                    \
408
0
    if (key != NULL) {                                                      \
409
0
        PyErr_Format((EXC), "format argument %R: " FMT,                     \
410
0
                     key, __VA_ARGS__);                                     \
411
0
    }                                                                       \
412
0
    else if (argidx >= 0) {                                                 \
413
0
        PyErr_Format((EXC), "format argument %zd: " FMT,                    \
414
0
                     argidx, __VA_ARGS__);                                  \
415
0
    }                                                                       \
416
0
    else {                                                                  \
417
0
        PyErr_Format((EXC), "format argument: " FMT, __VA_ARGS__);          \
418
0
    }                                                                       \
419
0
} while (0)
420
421
Py_LOCAL_INLINE(PyObject *)
422
getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx, int allowone)
423
0
{
424
0
    Py_ssize_t argidx = *p_argidx;
425
0
    if (argidx < arglen) {
426
0
        (*p_argidx)++;
427
0
        if (arglen >= 0) {
428
0
            return PyTuple_GetItem(args, argidx);
429
0
        }
430
0
        else if (allowone) {
431
0
            return args;
432
0
        }
433
0
    }
434
0
    PyErr_Format(PyExc_TypeError,
435
0
                 "not enough arguments for format string (got %zd)",
436
0
                 arglen < 0 ? 1 : arglen);
437
0
    return NULL;
438
0
}
439
440
/* Returns a new reference to a PyBytes object, or NULL on failure. */
441
442
static char*
443
formatfloat(PyObject *v, Py_ssize_t argidx, PyObject *key,
444
            int flags, int prec, int type,
445
            PyObject **p_result, PyBytesWriter *writer, char *str)
446
0
{
447
0
    char *p;
448
0
    PyObject *result;
449
0
    double x;
450
0
    size_t len;
451
0
    int dtoa_flags = 0;
452
453
0
    x = PyFloat_AsDouble(v);
454
0
    if (x == -1.0 && PyErr_Occurred()) {
455
0
        if (PyErr_ExceptionMatches(PyExc_TypeError)) {
456
0
            FORMAT_ERROR(PyExc_TypeError,
457
0
                         "%%%c requires a real number, not %T",
458
0
                         type, v);
459
0
        }
460
0
        return NULL;
461
0
    }
462
463
0
    if (prec < 0)
464
0
        prec = 6;
465
466
0
    if (flags & F_ALT) {
467
0
        dtoa_flags |= Py_DTSF_ALT;
468
0
    }
469
0
    p = PyOS_double_to_string(x, type, prec, dtoa_flags, NULL);
470
471
0
    if (p == NULL)
472
0
        return NULL;
473
474
0
    len = strlen(p);
475
0
    if (writer != NULL) {
476
0
        str = PyBytesWriter_GrowAndUpdatePointer(writer, len, str);
477
0
        if (str == NULL) {
478
0
            PyMem_Free(p);
479
0
            return NULL;
480
0
        }
481
0
        memcpy(str, p, len);
482
0
        PyMem_Free(p);
483
0
        str += len;
484
0
        return str;
485
0
    }
486
487
0
    result = PyBytes_FromStringAndSize(p, len);
488
0
    PyMem_Free(p);
489
0
    *p_result = result;
490
0
    return result != NULL ? str : NULL;
491
0
}
492
493
static PyObject *
494
formatlong(PyObject *v, Py_ssize_t argidx, PyObject *key,
495
           int flags, int prec, int type)
496
0
{
497
0
    PyObject *result, *iobj;
498
0
    if (PyLong_Check(v))
499
0
        return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
500
0
    if (PyNumber_Check(v)) {
501
        /* make sure number is a type of integer for o, x, and X */
502
0
        if (type == 'o' || type == 'x' || type == 'X')
503
0
            iobj = _PyNumber_Index(v);
504
0
        else
505
0
            iobj = PyNumber_Long(v);
506
0
        if (iobj != NULL) {
507
0
            assert(PyLong_Check(iobj));
508
0
            result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
509
0
            Py_DECREF(iobj);
510
0
            return result;
511
0
        }
512
0
        if (!PyErr_ExceptionMatches(PyExc_TypeError))
513
0
            return NULL;
514
0
    }
515
0
    FORMAT_ERROR(PyExc_TypeError,
516
0
                 "%%%c requires %s, not %T",
517
0
                 type,
518
0
                 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
519
0
                                                             : "a real number",
520
0
                 v);
521
0
    return NULL;
522
0
}
523
524
static int
525
byte_converter(PyObject *arg, Py_ssize_t argidx, PyObject *key, char *p)
526
0
{
527
0
    if (PyBytes_Check(arg)) {
528
0
        if (PyBytes_GET_SIZE(arg) != 1) {
529
0
            FORMAT_ERROR(PyExc_TypeError,
530
0
                         "%%c requires an integer in range(256) or "
531
0
                         "a single byte, not a bytes object of length %zd",
532
0
                         PyBytes_GET_SIZE(arg));
533
0
            return 0;
534
0
        }
535
0
        *p = PyBytes_AS_STRING(arg)[0];
536
0
        return 1;
537
0
    }
538
0
    else if (PyByteArray_Check(arg)) {
539
0
        if (PyByteArray_GET_SIZE(arg) != 1) {
540
0
            FORMAT_ERROR(PyExc_TypeError,
541
0
                         "%%c requires an integer in range(256) or "
542
0
                         "a single byte, not a bytearray object of length %zd",
543
0
                         PyByteArray_GET_SIZE(arg));
544
0
            return 0;
545
0
        }
546
0
        *p = PyByteArray_AS_STRING(arg)[0];
547
0
        return 1;
548
0
    }
549
0
    else if (PyIndex_Check(arg)) {
550
0
        int overflow;
551
0
        long ival = PyLong_AsLongAndOverflow(arg, &overflow);
552
0
        if (ival == -1 && PyErr_Occurred()) {
553
0
            return 0;
554
0
        }
555
0
        if (!(0 <= ival && ival <= 255)) {
556
            /* this includes an overflow in converting to C long */
557
0
            FORMAT_ERROR(PyExc_OverflowError,
558
0
                         "%%c argument not in range(256)%s", "");
559
0
            return 0;
560
0
        }
561
0
        *p = (char)ival;
562
0
        return 1;
563
0
    }
564
0
    FORMAT_ERROR(PyExc_TypeError,
565
0
                 "%%c requires an integer in range(256) or "
566
0
                 "a single byte, not %T",
567
0
                 arg);
568
0
    return 0;
569
0
}
570
571
static PyObject *_PyBytes_FromBuffer(PyObject *x);
572
573
static PyObject *
574
format_obj(PyObject *v, Py_ssize_t argidx, PyObject *key,
575
           const char **pbuf, Py_ssize_t *plen)
576
0
{
577
0
    PyObject *func, *result;
578
    /* is it a bytes object? */
579
0
    if (PyBytes_Check(v)) {
580
0
        *pbuf = PyBytes_AS_STRING(v);
581
0
        *plen = PyBytes_GET_SIZE(v);
582
0
        return Py_NewRef(v);
583
0
    }
584
0
    if (PyByteArray_Check(v)) {
585
0
        *pbuf = PyByteArray_AS_STRING(v);
586
0
        *plen = PyByteArray_GET_SIZE(v);
587
0
        return Py_NewRef(v);
588
0
    }
589
    /* does it support __bytes__? */
590
0
    func = _PyObject_LookupSpecial(v, &_Py_ID(__bytes__));
591
0
    if (func != NULL) {
592
0
        result = _PyObject_CallNoArgs(func);
593
0
        Py_DECREF(func);
594
0
        if (result == NULL)
595
0
            return NULL;
596
0
        if (!PyBytes_Check(result)) {
597
0
            PyErr_Format(PyExc_TypeError,
598
0
                         "%T.__bytes__() must return a bytes, not %T",
599
0
                         v, result);
600
0
            Py_DECREF(result);
601
0
            return NULL;
602
0
        }
603
0
        *pbuf = PyBytes_AS_STRING(result);
604
0
        *plen = PyBytes_GET_SIZE(result);
605
0
        return result;
606
0
    }
607
    /* does it support buffer protocol? */
608
0
    if (PyObject_CheckBuffer(v)) {
609
        /* maybe we can avoid making a copy of the buffer object here? */
610
0
        result = _PyBytes_FromBuffer(v);
611
0
        if (result == NULL)
612
0
            return NULL;
613
0
        *pbuf = PyBytes_AS_STRING(result);
614
0
        *plen = PyBytes_GET_SIZE(result);
615
0
        return result;
616
0
    }
617
0
    FORMAT_ERROR(PyExc_TypeError,
618
0
                 "%%b requires a bytes-like object, "
619
0
                 "or an object that implements __bytes__, not %T",
620
0
                 v);
621
0
    return NULL;
622
0
}
623
624
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
625
626
PyObject *
627
_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
628
                  PyObject *args, int use_bytearray)
629
0
{
630
0
    const char *fmt;
631
0
    Py_ssize_t arglen, argidx;
632
0
    Py_ssize_t fmtcnt;
633
0
    int args_owned = 0;
634
0
    PyObject *dict = NULL;
635
0
    PyObject *key = NULL;
636
637
0
    if (args == NULL) {
638
0
        PyErr_BadInternalCall();
639
0
        return NULL;
640
0
    }
641
0
    fmt = format;
642
0
    fmtcnt = format_len;
643
644
0
    PyBytesWriter *writer;
645
0
    if (use_bytearray) {
646
0
        writer = _PyBytesWriter_CreateByteArray(fmtcnt);
647
0
    }
648
0
    else {
649
0
        writer = PyBytesWriter_Create(fmtcnt);
650
0
    }
651
0
    if (writer == NULL) {
652
0
        return NULL;
653
0
    }
654
0
    char *res = PyBytesWriter_GetData(writer);
655
656
0
    if (PyTuple_Check(args)) {
657
0
        arglen = PyTuple_GET_SIZE(args);
658
0
        argidx = 0;
659
0
    }
660
0
    else {
661
0
        arglen = -1;
662
0
        argidx = -2;
663
0
    }
664
0
    if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
665
0
        !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
666
0
        !PyByteArray_Check(args)) {
667
0
            dict = args;
668
0
    }
669
670
0
    while (--fmtcnt >= 0) {
671
0
        if (*fmt != '%') {
672
0
            Py_ssize_t len;
673
0
            char *pos;
674
675
0
            pos = (char *)memchr(fmt + 1, '%', fmtcnt);
676
0
            if (pos != NULL)
677
0
                len = pos - fmt;
678
0
            else
679
0
                len = fmtcnt + 1;
680
0
            assert(len != 0);
681
682
0
            memcpy(res, fmt, len);
683
0
            res += len;
684
0
            fmt += len;
685
0
            fmtcnt -= (len - 1);
686
0
        }
687
0
        else {
688
            /* Got a format specifier */
689
0
            int flags = 0;
690
0
            Py_ssize_t width = -1;
691
0
            int prec = -1;
692
0
            int c = '\0';
693
0
            int fill;
694
0
            PyObject *v = NULL;
695
0
            PyObject *temp = NULL;
696
0
            const char *pbuf = NULL;
697
0
            int sign;
698
0
            Py_ssize_t len = 0;
699
0
            char onechar; /* For byte_converter() */
700
0
            Py_ssize_t alloc;
701
702
0
            fmt++;
703
0
            if (*fmt == '%') {
704
0
                *res++ = '%';
705
0
                fmt++;
706
0
                fmtcnt--;
707
0
                continue;
708
0
            }
709
0
            Py_CLEAR(key);
710
0
            const char *fmtstart = fmt;
711
0
            if (*fmt == '(') {
712
0
                const char *keystart;
713
0
                Py_ssize_t keylen;
714
0
                int pcount = 1;
715
716
0
                if (dict == NULL) {
717
0
                    PyErr_Format(PyExc_TypeError,
718
0
                                 "format requires a mapping, not %T",
719
0
                                 args);
720
0
                    goto error;
721
0
                }
722
0
                ++fmt;
723
0
                --fmtcnt;
724
0
                keystart = fmt;
725
                /* Skip over balanced parentheses */
726
0
                while (pcount > 0 && --fmtcnt >= 0) {
727
0
                    if (*fmt == ')')
728
0
                        --pcount;
729
0
                    else if (*fmt == '(')
730
0
                        ++pcount;
731
0
                    fmt++;
732
0
                }
733
0
                keylen = fmt - keystart - 1;
734
0
                if (fmtcnt < 0 || pcount > 0) {
735
0
                    PyErr_Format(PyExc_ValueError,
736
0
                                 "stray %% or incomplete format key "
737
0
                                 "at position %zd",
738
0
                                 (Py_ssize_t)(fmtstart - format - 1));
739
0
                    goto error;
740
0
                }
741
0
                key = PyBytes_FromStringAndSize(keystart,
742
0
                                                 keylen);
743
0
                if (key == NULL)
744
0
                    goto error;
745
0
                if (args_owned) {
746
0
                    Py_DECREF(args);
747
0
                    args_owned = 0;
748
0
                }
749
0
                args = PyObject_GetItem(dict, key);
750
0
                if (args == NULL) {
751
0
                    goto error;
752
0
                }
753
0
                args_owned = 1;
754
0
                arglen = -3;
755
0
                argidx = -4;
756
0
            }
757
0
            else {
758
0
                if (arglen < -1) {
759
0
                    PyErr_Format(PyExc_ValueError,
760
0
                                 "format requires a parenthesised mapping key "
761
0
                                 "at position %zd",
762
0
                                 (Py_ssize_t)(fmtstart - format - 1));
763
0
                    goto error;
764
0
                }
765
0
            }
766
767
            /* Parse flags. Example: "%+i" => flags=F_SIGN. */
768
0
            while (--fmtcnt >= 0) {
769
0
                switch (c = *fmt++) {
770
0
                case '-': flags |= F_LJUST; continue;
771
0
                case '+': flags |= F_SIGN; continue;
772
0
                case ' ': flags |= F_BLANK; continue;
773
0
                case '#': flags |= F_ALT; continue;
774
0
                case '0': flags |= F_ZERO; continue;
775
0
                }
776
0
                break;
777
0
            }
778
779
            /* Parse width. Example: "%10s" => width=10 */
780
0
            if (c == '*') {
781
0
                if (arglen < -1) {
782
0
                    PyErr_Format(PyExc_ValueError,
783
0
                            "* cannot be used with a parenthesised mapping key "
784
0
                            "at position %zd",
785
0
                            (Py_ssize_t)(fmtstart - format - 1));
786
0
                    goto error;
787
0
                }
788
0
                v = getnextarg(args, arglen, &argidx, 0);
789
0
                if (v == NULL)
790
0
                    goto error;
791
0
                if (!PyLong_Check(v)) {
792
0
                    FORMAT_ERROR(PyExc_TypeError, "* requires int, not %T", v);
793
0
                    goto error;
794
0
                }
795
0
                width = PyLong_AsSsize_t(v);
796
0
                if (width == -1 && PyErr_Occurred()) {
797
0
                    if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
798
0
                        FORMAT_ERROR(PyExc_OverflowError,
799
0
                                     "too big for width%s", "");
800
0
                    }
801
0
                    goto error;
802
0
                }
803
0
                if (width < 0) {
804
0
                    flags |= F_LJUST;
805
0
                    width = -width;
806
0
                }
807
0
                if (--fmtcnt >= 0)
808
0
                    c = *fmt++;
809
0
            }
810
0
            else if (c >= 0 && Py_ISDIGIT(c)) {
811
0
                width = c - '0';
812
0
                while (--fmtcnt >= 0) {
813
0
                    c = Py_CHARMASK(*fmt++);
814
0
                    if (!Py_ISDIGIT(c))
815
0
                        break;
816
0
                    if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
817
0
                        PyErr_Format(PyExc_ValueError,
818
0
                                     "width too big at position %zd",
819
0
                                     (Py_ssize_t)(fmtstart - format - 1));
820
0
                        goto error;
821
0
                    }
822
0
                    width = width*10 + (c - '0');
823
0
                }
824
0
            }
825
826
            /* Parse precision. Example: "%.3f" => prec=3 */
827
0
            if (c == '.') {
828
0
                prec = 0;
829
0
                if (--fmtcnt >= 0)
830
0
                    c = *fmt++;
831
0
                if (c == '*') {
832
0
                    if (arglen < -1) {
833
0
                        PyErr_Format(PyExc_ValueError,
834
0
                                "* cannot be used with a parenthesised mapping key "
835
0
                                "at position %zd",
836
0
                                (Py_ssize_t)(fmtstart - format - 1));
837
0
                        goto error;
838
0
                    }
839
0
                    v = getnextarg(args, arglen, &argidx, 0);
840
0
                    if (v == NULL)
841
0
                        goto error;
842
0
                    if (!PyLong_Check(v)) {
843
0
                        FORMAT_ERROR(PyExc_TypeError,
844
0
                                     "* requires int, not %T", v);
845
0
                        goto error;
846
0
                    }
847
0
                    prec = PyLong_AsInt(v);
848
0
                    if (prec == -1 && PyErr_Occurred()) {
849
0
                        if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
850
0
                            FORMAT_ERROR(PyExc_OverflowError,
851
0
                                         "too big for precision%s", "");
852
0
                        }
853
0
                        goto error;
854
0
                    }
855
0
                    if (prec < 0)
856
0
                        prec = 0;
857
0
                    if (--fmtcnt >= 0)
858
0
                        c = *fmt++;
859
0
                }
860
0
                else if (c >= 0 && Py_ISDIGIT(c)) {
861
0
                    prec = c - '0';
862
0
                    while (--fmtcnt >= 0) {
863
0
                        c = Py_CHARMASK(*fmt++);
864
0
                        if (!Py_ISDIGIT(c))
865
0
                            break;
866
0
                        if (prec > (INT_MAX - ((int)c - '0')) / 10) {
867
0
                            PyErr_Format(PyExc_ValueError,
868
0
                                "precision too big at position %zd",
869
0
                                (Py_ssize_t)(fmtstart - format - 1));
870
0
                            goto error;
871
0
                        }
872
0
                        prec = prec*10 + (c - '0');
873
0
                    }
874
0
                }
875
0
            } /* prec */
876
0
            if (fmtcnt >= 0) {
877
0
                if (c == 'h' || c == 'l' || c == 'L') {
878
0
                    if (--fmtcnt >= 0)
879
0
                        c = *fmt++;
880
0
                }
881
0
            }
882
0
            if (fmtcnt < 0) {
883
0
                PyErr_Format(PyExc_ValueError,
884
0
                             "stray %% at position %zd",
885
0
                             (Py_ssize_t)(fmtstart - format - 1));
886
0
                goto error;
887
0
            }
888
0
            v = getnextarg(args, arglen, &argidx, 1);
889
0
            if (v == NULL)
890
0
                goto error;
891
892
0
            if (fmtcnt == 0) {
893
                /* last write: disable writer overallocation */
894
0
                writer->overallocate = 0;
895
0
            }
896
897
0
            sign = 0;
898
0
            fill = ' ';
899
0
            switch (c) {
900
0
            case 'r':
901
                // %r is only for 2/3 code; 3 only code should use %a
902
0
            case 'a':
903
0
                temp = PyObject_ASCII(v);
904
0
                if (temp == NULL)
905
0
                    goto error;
906
0
                assert(PyUnicode_IS_ASCII(temp));
907
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
908
0
                len = PyUnicode_GET_LENGTH(temp);
909
0
                if (prec >= 0 && len > prec)
910
0
                    len = prec;
911
0
                break;
912
913
0
            case 's':
914
                // %s is only for 2/3 code; 3 only code should use %b
915
0
            case 'b':
916
0
                temp = format_obj(v, argidx, key, &pbuf, &len);
917
0
                if (temp == NULL)
918
0
                    goto error;
919
0
                if (prec >= 0 && len > prec)
920
0
                    len = prec;
921
0
                break;
922
923
0
            case 'i':
924
0
            case 'd':
925
0
            case 'u':
926
0
            case 'o':
927
0
            case 'x':
928
0
            case 'X':
929
0
                if (PyLong_CheckExact(v)
930
0
                    && width == -1 && prec == -1
931
0
                    && !(flags & (F_SIGN | F_BLANK))
932
0
                    && c != 'X')
933
0
                {
934
                    /* Fast path */
935
0
                    int alternate = flags & F_ALT;
936
0
                    int base;
937
938
0
                    switch(c)
939
0
                    {
940
0
                        default:
941
0
                            Py_UNREACHABLE();
942
0
                        case 'd':
943
0
                        case 'i':
944
0
                        case 'u':
945
0
                            base = 10;
946
0
                            break;
947
0
                        case 'o':
948
0
                            base = 8;
949
0
                            break;
950
0
                        case 'x':
951
0
                        case 'X':
952
0
                            base = 16;
953
0
                            break;
954
0
                    }
955
956
                    /* Fast path */
957
0
                    res = _PyLong_FormatBytesWriter(writer, res,
958
0
                                                    v, base, alternate);
959
0
                    if (res == NULL)
960
0
                        goto error;
961
0
                    continue;
962
0
                }
963
964
0
                temp = formatlong(v, argidx, key, flags, prec, c);
965
0
                if (!temp)
966
0
                    goto error;
967
0
                assert(PyUnicode_IS_ASCII(temp));
968
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
969
0
                len = PyUnicode_GET_LENGTH(temp);
970
0
                sign = 1;
971
0
                if (flags & F_ZERO)
972
0
                    fill = '0';
973
0
                break;
974
975
0
            case 'e':
976
0
            case 'E':
977
0
            case 'f':
978
0
            case 'F':
979
0
            case 'g':
980
0
            case 'G':
981
0
                if (width == -1 && prec == -1
982
0
                    && !(flags & (F_SIGN | F_BLANK)))
983
0
                {
984
                    /* Fast path */
985
0
                    res = formatfloat(v, argidx, key, flags, prec, c, NULL, writer, res);
986
0
                    if (res == NULL)
987
0
                        goto error;
988
0
                    continue;
989
0
                }
990
991
0
                if (!formatfloat(v, argidx, key, flags, prec, c, &temp, NULL, res))
992
0
                    goto error;
993
0
                pbuf = PyBytes_AS_STRING(temp);
994
0
                len = PyBytes_GET_SIZE(temp);
995
0
                sign = 1;
996
0
                if (flags & F_ZERO)
997
0
                    fill = '0';
998
0
                break;
999
1000
0
            case 'c':
1001
0
                pbuf = &onechar;
1002
0
                len = byte_converter(v, argidx, key, &onechar);
1003
0
                if (!len)
1004
0
                    goto error;
1005
0
                if (width == -1) {
1006
                    /* Fast path */
1007
0
                    *res++ = onechar;
1008
0
                    continue;
1009
0
                }
1010
0
                break;
1011
1012
0
            default:
1013
0
                if (Py_ISALPHA(c)) {
1014
0
                    PyErr_Format(PyExc_ValueError,
1015
0
                                 "unsupported format %%%c at position %zd",
1016
0
                                 c, (Py_ssize_t)(fmtstart - format - 1));
1017
0
                }
1018
0
                else if (c == '\'') {
1019
0
                    PyErr_Format(PyExc_ValueError,
1020
0
                                 "stray %% at position %zd or unexpected "
1021
0
                                 "format character \"'\" "
1022
0
                                 "at position %zd",
1023
0
                                 (Py_ssize_t)(fmtstart - format - 1),
1024
0
                                 (Py_ssize_t)(fmt - format - 1));
1025
0
                }
1026
0
                else if (c >= 32 && c < 127 && c != '\'') {
1027
0
                    PyErr_Format(PyExc_ValueError,
1028
0
                                 "stray %% at position %zd or unexpected "
1029
0
                                 "format character '%c' "
1030
0
                                 "at position %zd",
1031
0
                                 (Py_ssize_t)(fmtstart - format - 1),
1032
0
                                 c, (Py_ssize_t)(fmt - format - 1));
1033
0
                }
1034
0
                else {
1035
0
                    PyErr_Format(PyExc_ValueError,
1036
0
                                 "stray %% at position %zd or unexpected "
1037
0
                                 "format character with code 0x%02x "
1038
0
                                 "at position %zd",
1039
0
                                 (Py_ssize_t)(fmtstart - format - 1),
1040
0
                                 Py_CHARMASK(c),
1041
0
                                 (Py_ssize_t)(fmt - format - 1));
1042
0
                }
1043
0
                goto error;
1044
0
            }
1045
1046
0
            if (sign) {
1047
0
                if (*pbuf == '-' || *pbuf == '+') {
1048
0
                    sign = *pbuf++;
1049
0
                    len--;
1050
0
                }
1051
0
                else if (flags & F_SIGN)
1052
0
                    sign = '+';
1053
0
                else if (flags & F_BLANK)
1054
0
                    sign = ' ';
1055
0
                else
1056
0
                    sign = 0;
1057
0
            }
1058
0
            if (width < len)
1059
0
                width = len;
1060
1061
0
            alloc = width;
1062
0
            if (sign != 0 && len == width)
1063
0
                alloc++;
1064
            /* 2: size preallocated for %s */
1065
0
            if (alloc > 2) {
1066
0
                res = PyBytesWriter_GrowAndUpdatePointer(writer, alloc - 2, res);
1067
0
                if (res == NULL) {
1068
0
                    Py_XDECREF(temp);
1069
0
                    goto error;
1070
0
                }
1071
0
            }
1072
#ifndef NDEBUG
1073
            char *before = res;
1074
#endif
1075
1076
            /* Write the sign if needed */
1077
0
            if (sign) {
1078
0
                if (fill != ' ')
1079
0
                    *res++ = sign;
1080
0
                if (width > len)
1081
0
                    width--;
1082
0
            }
1083
1084
            /* Write the numeric prefix for "x", "X" and "o" formats
1085
               if the alternate form is used.
1086
               For example, write "0x" for the "%#x" format. */
1087
0
            if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1088
0
                assert(pbuf[0] == '0');
1089
0
                assert(pbuf[1] == c);
1090
0
                if (fill != ' ') {
1091
0
                    *res++ = *pbuf++;
1092
0
                    *res++ = *pbuf++;
1093
0
                }
1094
0
                width -= 2;
1095
0
                if (width < 0)
1096
0
                    width = 0;
1097
0
                len -= 2;
1098
0
            }
1099
1100
            /* Pad left with the fill character if needed */
1101
0
            if (width > len && !(flags & F_LJUST)) {
1102
0
                memset(res, fill, width - len);
1103
0
                res += (width - len);
1104
0
                width = len;
1105
0
            }
1106
1107
            /* If padding with spaces: write sign if needed and/or numeric
1108
               prefix if the alternate form is used */
1109
0
            if (fill == ' ') {
1110
0
                if (sign)
1111
0
                    *res++ = sign;
1112
0
                if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1113
0
                    assert(pbuf[0] == '0');
1114
0
                    assert(pbuf[1] == c);
1115
0
                    *res++ = *pbuf++;
1116
0
                    *res++ = *pbuf++;
1117
0
                }
1118
0
            }
1119
1120
            /* Copy bytes */
1121
0
            memcpy(res, pbuf, len);
1122
0
            res += len;
1123
1124
            /* Pad right with the fill character if needed */
1125
0
            if (width > len) {
1126
0
                memset(res, ' ', width - len);
1127
0
                res += (width - len);
1128
0
            }
1129
1130
0
            if (dict && (argidx < arglen)) {
1131
                // XXX: Never happens?
1132
0
                PyErr_SetString(PyExc_TypeError,
1133
0
                           "not all arguments converted during bytes formatting");
1134
0
                Py_XDECREF(temp);
1135
0
                goto error;
1136
0
            }
1137
0
            Py_XDECREF(temp);
1138
1139
#ifndef NDEBUG
1140
            /* check that we computed the exact size for this write */
1141
            assert((res - before) == alloc);
1142
#endif
1143
0
        } /* '%' */
1144
1145
        /* If overallocation was disabled, ensure that it was the last
1146
           write. Otherwise, we missed an optimization */
1147
0
        assert(writer->overallocate || fmtcnt == 0 || use_bytearray);
1148
0
    } /* until end */
1149
1150
0
    if (argidx < arglen && !dict) {
1151
0
        PyErr_Format(PyExc_TypeError,
1152
0
                     "not all arguments converted during bytes formatting "
1153
0
                     "(required %zd, got %zd)",
1154
0
                     arglen < 0 ? 0 : argidx,
1155
0
                     arglen < 0 ? 1 : arglen);
1156
0
        goto error;
1157
0
    }
1158
1159
0
    Py_XDECREF(key);
1160
0
    if (args_owned) {
1161
0
        Py_DECREF(args);
1162
0
    }
1163
0
    return PyBytesWriter_FinishWithPointer(writer, res);
1164
1165
0
 error:
1166
0
    Py_XDECREF(key);
1167
0
    PyBytesWriter_Discard(writer);
1168
0
    if (args_owned) {
1169
0
        Py_DECREF(args);
1170
0
    }
1171
0
    return NULL;
1172
0
}
1173
1174
/* Unescape a backslash-escaped string. */
1175
PyObject *_PyBytes_DecodeEscape2(const char *s,
1176
                                Py_ssize_t len,
1177
                                const char *errors,
1178
                                int *first_invalid_escape_char,
1179
                                const char **first_invalid_escape_ptr)
1180
2.06k
{
1181
2.06k
    PyBytesWriter *writer = PyBytesWriter_Create(len);
1182
2.06k
    if (writer == NULL) {
1183
0
        return NULL;
1184
0
    }
1185
2.06k
    char *p = PyBytesWriter_GetData(writer);
1186
1187
2.06k
    *first_invalid_escape_char = -1;
1188
2.06k
    *first_invalid_escape_ptr = NULL;
1189
1190
2.06k
    const char *end = s + len;
1191
61.2k
    while (s < end) {
1192
59.2k
        if (*s != '\\') {
1193
50.3k
            *p++ = *s++;
1194
50.3k
            continue;
1195
50.3k
        }
1196
1197
8.90k
        s++;
1198
8.90k
        if (s == end) {
1199
0
            PyErr_SetString(PyExc_ValueError,
1200
0
                            "Trailing \\ in string");
1201
0
            goto failed;
1202
0
        }
1203
1204
8.90k
        switch (*s++) {
1205
        /* XXX This assumes ASCII! */
1206
601
        case '\n': break;
1207
804
        case '\\': *p++ = '\\'; break;
1208
210
        case '\'': *p++ = '\''; break;
1209
138
        case '\"': *p++ = '\"'; break;
1210
215
        case 'b': *p++ = '\b'; break;
1211
218
        case 'f': *p++ = '\014'; break; /* FF */
1212
186
        case 't': *p++ = '\t'; break;
1213
174
        case 'n': *p++ = '\n'; break;
1214
345
        case 'r': *p++ = '\r'; break;
1215
528
        case 'v': *p++ = '\013'; break; /* VT */
1216
168
        case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1217
2.37k
        case '0': case '1': case '2': case '3':
1218
3.85k
        case '4': case '5': case '6': case '7':
1219
3.85k
        {
1220
3.85k
            int c = s[-1] - '0';
1221
3.85k
            if (s < end && '0' <= *s && *s <= '7') {
1222
1.50k
                c = (c<<3) + *s++ - '0';
1223
1.50k
                if (s < end && '0' <= *s && *s <= '7')
1224
467
                    c = (c<<3) + *s++ - '0';
1225
1.50k
            }
1226
3.85k
            if (c > 0377) {
1227
405
                if (*first_invalid_escape_char == -1) {
1228
145
                    *first_invalid_escape_char = c;
1229
                    /* Back up 3 chars, since we've already incremented s. */
1230
145
                    *first_invalid_escape_ptr = s - 3;
1231
145
                }
1232
405
            }
1233
3.85k
            *p++ = c;
1234
3.85k
            break;
1235
3.62k
        }
1236
302
        case 'x':
1237
302
            if (s+1 < end) {
1238
301
                int digit1, digit2;
1239
301
                digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1240
301
                digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1241
301
                if (digit1 < 16 && digit2 < 16) {
1242
298
                    *p++ = (unsigned char)((digit1 << 4) + digit2);
1243
298
                    s += 2;
1244
298
                    break;
1245
298
                }
1246
301
            }
1247
            /* invalid hexadecimal digits */
1248
1249
4
            if (!errors || strcmp(errors, "strict") == 0) {
1250
4
                PyErr_Format(PyExc_ValueError,
1251
4
                             "invalid \\x escape at position %zd",
1252
4
                             s - 2 - (end - len));
1253
4
                goto failed;
1254
4
            }
1255
0
            if (strcmp(errors, "replace") == 0) {
1256
0
                *p++ = '?';
1257
0
            } else if (strcmp(errors, "ignore") == 0)
1258
0
                /* do nothing */;
1259
0
            else {
1260
0
                PyErr_Format(PyExc_ValueError,
1261
0
                             "decoding error; unknown "
1262
0
                             "error handling code: %.400s",
1263
0
                             errors);
1264
0
                goto failed;
1265
0
            }
1266
            /* skip \x */
1267
0
            if (s < end && Py_ISXDIGIT(s[0]))
1268
0
                s++; /* and a hexdigit */
1269
0
            break;
1270
1271
1.15k
        default:
1272
1.15k
            if (*first_invalid_escape_char == -1) {
1273
566
                *first_invalid_escape_char = (unsigned char)s[-1];
1274
                /* Back up one char, since we've already incremented s. */
1275
566
                *first_invalid_escape_ptr = s - 1;
1276
566
            }
1277
1.15k
            *p++ = '\\';
1278
1.15k
            s--;
1279
8.90k
        }
1280
8.90k
    }
1281
1282
2.06k
    return PyBytesWriter_FinishWithPointer(writer, p);
1283
1284
4
  failed:
1285
4
    PyBytesWriter_Discard(writer);
1286
4
    return NULL;
1287
2.06k
}
1288
1289
PyObject *PyBytes_DecodeEscape(const char *s,
1290
                                Py_ssize_t len,
1291
                                const char *errors,
1292
                                Py_ssize_t Py_UNUSED(unicode),
1293
                                const char *Py_UNUSED(recode_encoding))
1294
0
{
1295
0
    int first_invalid_escape_char;
1296
0
    const char *first_invalid_escape_ptr;
1297
0
    PyObject *result = _PyBytes_DecodeEscape2(s, len, errors,
1298
0
                                             &first_invalid_escape_char,
1299
0
                                             &first_invalid_escape_ptr);
1300
0
    if (result == NULL)
1301
0
        return NULL;
1302
0
    if (first_invalid_escape_char != -1) {
1303
0
        if (first_invalid_escape_char > 0xff) {
1304
0
            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1305
0
                                 "b\"\\%o\" is an invalid octal escape sequence. "
1306
0
                                 "Such sequences will not work in the future. ",
1307
0
                                 first_invalid_escape_char) < 0)
1308
0
            {
1309
0
                Py_DECREF(result);
1310
0
                return NULL;
1311
0
            }
1312
0
        }
1313
0
        else {
1314
0
            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1315
0
                                 "b\"\\%c\" is an invalid escape sequence. "
1316
0
                                 "Such sequences will not work in the future. ",
1317
0
                                 first_invalid_escape_char) < 0)
1318
0
            {
1319
0
                Py_DECREF(result);
1320
0
                return NULL;
1321
0
            }
1322
0
        }
1323
0
    }
1324
0
    return result;
1325
0
}
1326
/* -------------------------------------------------------------------- */
1327
/* object api */
1328
1329
Py_ssize_t
1330
PyBytes_Size(PyObject *op)
1331
5.09k
{
1332
5.09k
    if (!PyBytes_Check(op)) {
1333
0
        PyErr_Format(PyExc_TypeError,
1334
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1335
0
        return -1;
1336
0
    }
1337
5.09k
    return Py_SIZE(op);
1338
5.09k
}
1339
1340
char *
1341
PyBytes_AsString(PyObject *op)
1342
12.7M
{
1343
12.7M
    if (!PyBytes_Check(op)) {
1344
0
        PyErr_Format(PyExc_TypeError,
1345
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1346
0
        return NULL;
1347
0
    }
1348
12.7M
    return ((PyBytesObject *)op)->ob_sval;
1349
12.7M
}
1350
1351
int
1352
PyBytes_AsStringAndSize(PyObject *obj,
1353
                         char **s,
1354
                         Py_ssize_t *len)
1355
69.0k
{
1356
69.0k
    if (s == NULL) {
1357
0
        PyErr_BadInternalCall();
1358
0
        return -1;
1359
0
    }
1360
1361
69.0k
    if (!PyBytes_Check(obj)) {
1362
0
        PyErr_Format(PyExc_TypeError,
1363
0
             "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1364
0
        return -1;
1365
0
    }
1366
1367
69.0k
    *s = PyBytes_AS_STRING(obj);
1368
69.0k
    if (len != NULL)
1369
69.0k
        *len = PyBytes_GET_SIZE(obj);
1370
0
    else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1371
0
        PyErr_SetString(PyExc_ValueError,
1372
0
                        "embedded null byte");
1373
0
        return -1;
1374
0
    }
1375
69.0k
    return 0;
1376
69.0k
}
1377
1378
/* -------------------------------------------------------------------- */
1379
/* Methods */
1380
1381
5.89k
#define STRINGLIB_GET_EMPTY() bytes_get_empty()
1382
1383
#include "stringlib/stringdefs.h"
1384
#define STRINGLIB_MUTABLE 0
1385
1386
#include "stringlib/fastsearch.h"
1387
#include "stringlib/count.h"
1388
#include "stringlib/find.h"
1389
#include "stringlib/join.h"
1390
#include "stringlib/partition.h"
1391
#include "stringlib/split.h"
1392
#include "stringlib/ctype.h"
1393
1394
#include "stringlib/transmogrify.h"
1395
1396
#undef STRINGLIB_GET_EMPTY
1397
1398
Py_ssize_t
1399
_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
1400
              const char *needle, Py_ssize_t len_needle,
1401
              Py_ssize_t offset)
1402
0
{
1403
0
    assert(len_haystack >= 0);
1404
0
    assert(len_needle >= 0);
1405
    // Extra checks because stringlib_find accesses haystack[len_haystack].
1406
0
    if (len_needle == 0) {
1407
0
        return offset;
1408
0
    }
1409
0
    if (len_needle > len_haystack) {
1410
0
        return -1;
1411
0
    }
1412
0
    assert(len_haystack >= 1);
1413
0
    Py_ssize_t res = stringlib_find(haystack, len_haystack - 1,
1414
0
                                    needle, len_needle, offset);
1415
0
    if (res == -1) {
1416
0
        Py_ssize_t last_align = len_haystack - len_needle;
1417
0
        if (memcmp(haystack + last_align, needle, len_needle) == 0) {
1418
0
            return offset + last_align;
1419
0
        }
1420
0
    }
1421
0
    return res;
1422
0
}
1423
1424
Py_ssize_t
1425
_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
1426
                     const char *needle, Py_ssize_t len_needle,
1427
                     Py_ssize_t offset)
1428
0
{
1429
0
    return stringlib_rfind(haystack, len_haystack,
1430
0
                           needle, len_needle, offset);
1431
0
}
1432
1433
PyObject *
1434
PyBytes_Repr(PyObject *obj, int smartquotes)
1435
3.17k
{
1436
3.17k
    return _Py_bytes_repr(PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
1437
3.17k
                          smartquotes, "bytes");
1438
3.17k
}
1439
1440
PyObject *
1441
_Py_bytes_repr(const char *data, Py_ssize_t length, int smartquotes,
1442
               const char *classname)
1443
3.17k
{
1444
3.17k
    Py_ssize_t i;
1445
3.17k
    Py_ssize_t newsize, squotes, dquotes;
1446
3.17k
    PyObject *v;
1447
3.17k
    unsigned char quote;
1448
3.17k
    Py_UCS1 *p;
1449
1450
    /* Compute size of output string */
1451
3.17k
    squotes = dquotes = 0;
1452
3.17k
    newsize = 3; /* b'' */
1453
3.32M
    for (i = 0; i < length; i++) {
1454
3.32M
        unsigned char c = data[i];
1455
3.32M
        Py_ssize_t incr = 1;
1456
3.32M
        switch(c) {
1457
4.12k
        case '\'': squotes++; break;
1458
9.01k
        case '"':  dquotes++; break;
1459
33.0k
        case '\\': case '\t': case '\n': case '\r':
1460
33.0k
            incr = 2; break; /* \C */
1461
3.28M
        default:
1462
3.28M
            if (c < ' ' || c >= 0x7f)
1463
2.49M
                incr = 4; /* \xHH */
1464
3.32M
        }
1465
3.32M
        if (newsize > PY_SSIZE_T_MAX - incr)
1466
0
            goto overflow;
1467
3.32M
        newsize += incr;
1468
3.32M
    }
1469
3.17k
    quote = '\'';
1470
3.17k
    if (smartquotes && squotes && !dquotes)
1471
121
        quote = '"';
1472
3.17k
    if (squotes && quote == '\'') {
1473
195
        if (newsize > PY_SSIZE_T_MAX - squotes)
1474
0
            goto overflow;
1475
195
        newsize += squotes;
1476
195
    }
1477
1478
3.17k
    v = PyUnicode_New(newsize, 127);
1479
3.17k
    if (v == NULL) {
1480
0
        return NULL;
1481
0
    }
1482
3.17k
    p = PyUnicode_1BYTE_DATA(v);
1483
1484
3.17k
    *p++ = 'b', *p++ = quote;
1485
3.32M
    for (i = 0; i < length; i++) {
1486
3.32M
        unsigned char c = data[i];
1487
3.32M
        if (c == quote || c == '\\')
1488
4.11k
            *p++ = '\\', *p++ = c;
1489
3.32M
        else if (c == '\t')
1490
19.2k
            *p++ = '\\', *p++ = 't';
1491
3.30M
        else if (c == '\n')
1492
5.02k
            *p++ = '\\', *p++ = 'n';
1493
3.29M
        else if (c == '\r')
1494
7.03k
            *p++ = '\\', *p++ = 'r';
1495
3.29M
        else if (c < ' ' || c >= 0x7f) {
1496
2.49M
            *p++ = '\\';
1497
2.49M
            *p++ = 'x';
1498
2.49M
            *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1499
2.49M
            *p++ = Py_hexdigits[c & 0xf];
1500
2.49M
        }
1501
793k
        else
1502
793k
            *p++ = c;
1503
3.32M
    }
1504
3.17k
    *p++ = quote;
1505
3.17k
    assert(_PyUnicode_CheckConsistency(v, 1));
1506
3.17k
    return v;
1507
1508
0
  overflow:
1509
0
    PyErr_Format(PyExc_OverflowError,
1510
0
                 "%s object is too large to make repr", classname);
1511
0
    return NULL;
1512
3.17k
}
1513
1514
static PyObject *
1515
bytes_repr(PyObject *op)
1516
3.17k
{
1517
3.17k
    return PyBytes_Repr(op, 1);
1518
3.17k
}
1519
1520
static PyObject *
1521
bytes_str(PyObject *op)
1522
0
{
1523
0
    if (_Py_GetConfig()->bytes_warning) {
1524
0
        if (PyErr_WarnEx(PyExc_BytesWarning,
1525
0
                         "str() on a bytes instance", 1)) {
1526
0
            return NULL;
1527
0
        }
1528
0
    }
1529
0
    return bytes_repr(op);
1530
0
}
1531
1532
static Py_ssize_t
1533
bytes_length(PyObject *self)
1534
38.4M
{
1535
38.4M
    PyBytesObject *a = _PyBytes_CAST(self);
1536
38.4M
    return Py_SIZE(a);
1537
38.4M
}
1538
1539
/* This is also used by PyBytes_Concat() and the specializing interpreter. */
1540
PyObject *
1541
_PyBytes_Concat(PyObject *a, PyObject *b)
1542
914k
{
1543
914k
    Py_buffer va, vb;
1544
914k
    PyObject *result = NULL;
1545
1546
914k
    va.len = -1;
1547
914k
    vb.len = -1;
1548
914k
    if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1549
914k
        PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1550
0
        PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1551
0
                     Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1552
0
        goto done;
1553
0
    }
1554
1555
    /* Optimize end cases */
1556
914k
    if (va.len == 0 && PyBytes_CheckExact(b)) {
1557
136k
        result = Py_NewRef(b);
1558
136k
        goto done;
1559
136k
    }
1560
778k
    if (vb.len == 0 && PyBytes_CheckExact(a)) {
1561
60.5k
        result = Py_NewRef(a);
1562
60.5k
        goto done;
1563
60.5k
    }
1564
1565
717k
    if (va.len > PY_SSIZE_T_MAX - vb.len) {
1566
0
        PyErr_NoMemory();
1567
0
        goto done;
1568
0
    }
1569
1570
717k
    result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1571
717k
    if (result != NULL) {
1572
717k
        memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1573
717k
        memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1574
717k
    }
1575
1576
914k
  done:
1577
914k
    if (va.len != -1)
1578
914k
        PyBuffer_Release(&va);
1579
914k
    if (vb.len != -1)
1580
914k
        PyBuffer_Release(&vb);
1581
914k
    return result;
1582
717k
}
1583
1584
PyObject *
1585
_PyBytes_Repeat(PyObject *self, Py_ssize_t n)
1586
190k
{
1587
190k
    PyBytesObject *a = _PyBytes_CAST(self);
1588
190k
    if (n < 0)
1589
0
        n = 0;
1590
    /* watch out for overflows:  the size can overflow int,
1591
     * and the # of bytes needed can overflow size_t
1592
     */
1593
190k
    if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1594
0
        PyErr_SetString(PyExc_OverflowError,
1595
0
            "repeated bytes are too long");
1596
0
        return NULL;
1597
0
    }
1598
190k
    Py_ssize_t size = Py_SIZE(a) * n;
1599
190k
    if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1600
5
        return Py_NewRef(a);
1601
5
    }
1602
190k
    size_t nbytes = (size_t)size;
1603
190k
    if (nbytes + PyBytesObject_SIZE <= nbytes) {
1604
0
        PyErr_SetString(PyExc_OverflowError,
1605
0
            "repeated bytes are too long");
1606
0
        return NULL;
1607
0
    }
1608
190k
    PyBytesObject *op = PyObject_Malloc(PyBytesObject_SIZE + nbytes);
1609
190k
    if (op == NULL) {
1610
0
        return PyErr_NoMemory();
1611
0
    }
1612
190k
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
1613
190k
    set_ob_shash(op, -1);
1614
190k
    op->ob_sval[size] = '\0';
1615
1616
190k
    _PyBytes_RepeatBuffer(op->ob_sval, size, a->ob_sval, Py_SIZE(a));
1617
1618
190k
    return (PyObject *) op;
1619
190k
}
1620
1621
static int
1622
bytes_contains(PyObject *self, PyObject *arg)
1623
2.97k
{
1624
2.97k
    return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1625
2.97k
}
1626
1627
static PyObject *
1628
bytes_item(PyObject *self, Py_ssize_t i)
1629
0
{
1630
0
    PyBytesObject *a = _PyBytes_CAST(self);
1631
0
    if (i < 0 || i >= Py_SIZE(a)) {
1632
0
        PyErr_SetString(PyExc_IndexError, "index out of range");
1633
0
        return NULL;
1634
0
    }
1635
0
    return _PyLong_FromUnsignedChar((unsigned char)a->ob_sval[i]);
1636
0
}
1637
1638
static int
1639
bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1640
77.9M
{
1641
77.9M
    int cmp;
1642
77.9M
    Py_ssize_t len;
1643
1644
77.9M
    len = Py_SIZE(a);
1645
77.9M
    if (Py_SIZE(b) != len)
1646
770k
        return 0;
1647
1648
77.2M
    if (a->ob_sval[0] != b->ob_sval[0])
1649
6.83M
        return 0;
1650
1651
70.3M
    cmp = memcmp(a->ob_sval, b->ob_sval, len);
1652
70.3M
    return (cmp == 0);
1653
77.2M
}
1654
1655
static PyObject*
1656
bytes_richcompare(PyObject *aa, PyObject *bb, int op)
1657
78.3M
{
1658
    /* Make sure both arguments are strings. */
1659
78.3M
    if (!(PyBytes_Check(aa) && PyBytes_Check(bb))) {
1660
0
        if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1661
0
            if (PyUnicode_Check(aa) || PyUnicode_Check(bb)) {
1662
0
                if (PyErr_WarnEx(PyExc_BytesWarning,
1663
0
                                 "Comparison between bytes and string", 1))
1664
0
                    return NULL;
1665
0
            }
1666
0
            if (PyLong_Check(aa) || PyLong_Check(bb)) {
1667
0
                if (PyErr_WarnEx(PyExc_BytesWarning,
1668
0
                                 "Comparison between bytes and int", 1))
1669
0
                    return NULL;
1670
0
            }
1671
0
        }
1672
0
        Py_RETURN_NOTIMPLEMENTED;
1673
0
    }
1674
1675
78.3M
    PyBytesObject *a = _PyBytes_CAST(aa);
1676
78.3M
    PyBytesObject *b = _PyBytes_CAST(bb);
1677
78.3M
    if (a == b) {
1678
376k
        switch (op) {
1679
2.59k
        case Py_EQ:
1680
2.59k
        case Py_LE:
1681
2.59k
        case Py_GE:
1682
            /* a byte string is equal to itself */
1683
2.59k
            Py_RETURN_TRUE;
1684
373k
        case Py_NE:
1685
373k
        case Py_LT:
1686
373k
        case Py_GT:
1687
373k
            Py_RETURN_FALSE;
1688
0
        default:
1689
0
            PyErr_BadArgument();
1690
0
            return NULL;
1691
376k
        }
1692
376k
    }
1693
77.9M
    else if (op == Py_EQ || op == Py_NE) {
1694
77.9M
        int eq = bytes_compare_eq(a, b);
1695
77.9M
        eq ^= (op == Py_NE);
1696
77.9M
        return PyBool_FromLong(eq);
1697
77.9M
    }
1698
151
    else {
1699
151
        Py_ssize_t len_a = Py_SIZE(a);
1700
151
        Py_ssize_t len_b = Py_SIZE(b);
1701
151
        Py_ssize_t min_len = Py_MIN(len_a, len_b);
1702
151
        int c;
1703
151
        if (min_len > 0) {
1704
151
            c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1705
151
            if (c == 0)
1706
151
                c = memcmp(a->ob_sval, b->ob_sval, min_len);
1707
151
        }
1708
0
        else {
1709
0
            c = 0;
1710
0
        }
1711
151
        if (c != 0) {
1712
151
            Py_RETURN_RICHCOMPARE(c, 0, op);
1713
151
        }
1714
0
        Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1715
0
    }
1716
78.3M
}
1717
1718
static Py_hash_t
1719
bytes_hash(PyObject *self)
1720
77.5M
{
1721
77.5M
    PyBytesObject *a = _PyBytes_CAST(self);
1722
77.5M
    Py_hash_t hash = get_ob_shash(a);
1723
77.5M
    if (hash == -1) {
1724
        /* Can't fail */
1725
46.0M
        hash = Py_HashBuffer(a->ob_sval, Py_SIZE(a));
1726
46.0M
        set_ob_shash(a, hash);
1727
46.0M
    }
1728
77.5M
    return hash;
1729
77.5M
}
1730
1731
static PyObject*
1732
bytes_subscript(PyObject *op, PyObject* item)
1733
86.6M
{
1734
86.6M
    PyBytesObject *self = _PyBytes_CAST(op);
1735
86.6M
    if (_PyIndex_Check(item)) {
1736
19.4M
        Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1737
19.4M
        if (i == -1 && PyErr_Occurred())
1738
0
            return NULL;
1739
19.4M
        if (i < 0)
1740
0
            i += PyBytes_GET_SIZE(self);
1741
19.4M
        if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1742
103
            PyErr_SetString(PyExc_IndexError,
1743
103
                            "index out of range");
1744
103
            return NULL;
1745
103
        }
1746
19.4M
        return _PyLong_FromUnsignedChar((unsigned char)self->ob_sval[i]);
1747
19.4M
    }
1748
67.2M
    else if (PySlice_Check(item)) {
1749
67.2M
        Py_ssize_t start, stop, step, slicelength, i;
1750
67.2M
        size_t cur;
1751
67.2M
        const char* source_buf;
1752
67.2M
        char* result_buf;
1753
67.2M
        PyObject* result;
1754
1755
67.2M
        if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1756
0
            return NULL;
1757
0
        }
1758
67.2M
        slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1759
67.2M
                                            &stop, step);
1760
1761
67.2M
        if (slicelength <= 0) {
1762
5.62M
            return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
1763
5.62M
        }
1764
61.6M
        else if (start == 0 && step == 1 &&
1765
8.59M
                 slicelength == PyBytes_GET_SIZE(self) &&
1766
150k
                 PyBytes_CheckExact(self)) {
1767
150k
            return Py_NewRef(self);
1768
150k
        }
1769
61.4M
        else if (step == 1) {
1770
61.4M
            return PyBytes_FromStringAndSize(
1771
61.4M
                PyBytes_AS_STRING(self) + start,
1772
61.4M
                slicelength);
1773
61.4M
        }
1774
0
        else {
1775
0
            source_buf = PyBytes_AS_STRING(self);
1776
0
            result = PyBytes_FromStringAndSize(NULL, slicelength);
1777
0
            if (result == NULL)
1778
0
                return NULL;
1779
1780
0
            result_buf = PyBytes_AS_STRING(result);
1781
0
            for (cur = start, i = 0; i < slicelength;
1782
0
                 cur += step, i++) {
1783
0
                result_buf[i] = source_buf[cur];
1784
0
            }
1785
1786
0
            return result;
1787
0
        }
1788
67.2M
    }
1789
0
    else {
1790
0
        PyErr_Format(PyExc_TypeError,
1791
0
                     "byte indices must be integers or slices, not %.200s",
1792
0
                     Py_TYPE(item)->tp_name);
1793
0
        return NULL;
1794
0
    }
1795
86.6M
}
1796
1797
static int
1798
bytes_buffer_getbuffer(PyObject *op, Py_buffer *view, int flags)
1799
76.6M
{
1800
76.6M
    PyBytesObject *self = _PyBytes_CAST(op);
1801
76.6M
    return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1802
76.6M
                             1, flags);
1803
76.6M
}
1804
1805
static PySequenceMethods bytes_as_sequence = {
1806
    bytes_length,       /*sq_length*/
1807
    _PyBytes_Concat,       /*sq_concat*/
1808
    _PyBytes_Repeat,    /*sq_repeat*/
1809
    bytes_item,         /*sq_item*/
1810
    0,                  /*sq_slice*/
1811
    0,                  /*sq_ass_item*/
1812
    0,                  /*sq_ass_slice*/
1813
    bytes_contains      /*sq_contains*/
1814
};
1815
1816
static PyMappingMethods bytes_as_mapping = {
1817
    bytes_length,
1818
    bytes_subscript,
1819
    0,
1820
};
1821
1822
static PyBufferProcs bytes_as_buffer = {
1823
    bytes_buffer_getbuffer,
1824
    NULL,
1825
};
1826
1827
1828
/*[clinic input]
1829
bytes.__bytes__
1830
Convert this value to exact type bytes.
1831
[clinic start generated code]*/
1832
1833
static PyObject *
1834
bytes___bytes___impl(PyBytesObject *self)
1835
/*[clinic end generated code: output=63a306a9bc0caac5 input=34ec5ddba98bd6bb]*/
1836
42.8k
{
1837
42.8k
    if (PyBytes_CheckExact(self)) {
1838
42.8k
        return Py_NewRef(self);
1839
42.8k
    }
1840
0
    else {
1841
0
        return PyBytes_FromStringAndSize(self->ob_sval, Py_SIZE(self));
1842
0
    }
1843
42.8k
}
1844
1845
1846
294
#define LEFTSTRIP 0
1847
588
#define RIGHTSTRIP 1
1848
0
#define BOTHSTRIP 2
1849
1850
/*[clinic input]
1851
bytes.split
1852
1853
    sep: object = None
1854
        The delimiter according which to split the bytes.
1855
        None (the default value) means split on ASCII whitespace characters
1856
        (space, tab, return, newline, formfeed, vertical tab).
1857
    maxsplit: Py_ssize_t = -1
1858
        Maximum number of splits to do.
1859
        -1 (the default value) means no limit.
1860
1861
Return a list of the sections in the bytes, using sep as the delimiter.
1862
[clinic start generated code]*/
1863
1864
static PyObject *
1865
bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1866
/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1867
2.93M
{
1868
2.93M
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1869
2.93M
    const char *s = PyBytes_AS_STRING(self), *sub;
1870
2.93M
    Py_buffer vsub;
1871
2.93M
    PyObject *list;
1872
1873
2.93M
    if (maxsplit < 0)
1874
2.93M
        maxsplit = PY_SSIZE_T_MAX;
1875
2.93M
    if (sep == Py_None)
1876
0
        return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1877
2.93M
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1878
0
        return NULL;
1879
2.93M
    sub = vsub.buf;
1880
2.93M
    n = vsub.len;
1881
1882
2.93M
    list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1883
2.93M
    PyBuffer_Release(&vsub);
1884
2.93M
    return list;
1885
2.93M
}
1886
1887
/*[clinic input]
1888
@permit_long_docstring_body
1889
bytes.partition
1890
1891
    sep: Py_buffer
1892
    /
1893
1894
Partition the bytes into three parts using the given separator.
1895
1896
This will search for the separator sep in the bytes. If the separator is found,
1897
returns a 3-tuple containing the part before the separator, the separator
1898
itself, and the part after it.
1899
1900
If the separator is not found, returns a 3-tuple containing the original bytes
1901
object and two empty bytes objects.
1902
[clinic start generated code]*/
1903
1904
static PyObject *
1905
bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1906
/*[clinic end generated code: output=f532b392a17ff695 input=31c55a0cebaf7722]*/
1907
361k
{
1908
361k
    return stringlib_partition(
1909
361k
        (PyObject*) self,
1910
361k
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1911
361k
        sep->obj, (const char *)sep->buf, sep->len
1912
361k
        );
1913
361k
}
1914
1915
/*[clinic input]
1916
@permit_long_docstring_body
1917
bytes.rpartition
1918
1919
    sep: Py_buffer
1920
    /
1921
1922
Partition the bytes into three parts using the given separator.
1923
1924
This will search for the separator sep in the bytes, starting at the end. If
1925
the separator is found, returns a 3-tuple containing the part before the
1926
separator, the separator itself, and the part after it.
1927
1928
If the separator is not found, returns a 3-tuple containing two empty bytes
1929
objects and the original bytes object.
1930
[clinic start generated code]*/
1931
1932
static PyObject *
1933
bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1934
/*[clinic end generated code: output=191b114cbb028e50 input=9ea5a3ab0b02bf52]*/
1935
0
{
1936
0
    return stringlib_rpartition(
1937
0
        (PyObject*) self,
1938
0
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1939
0
        sep->obj, (const char *)sep->buf, sep->len
1940
0
        );
1941
0
}
1942
1943
/*[clinic input]
1944
@permit_long_docstring_body
1945
bytes.rsplit = bytes.split
1946
1947
Return a list of the sections in the bytes, using sep as the delimiter.
1948
1949
Splitting is done starting at the end of the bytes and working to the front.
1950
[clinic start generated code]*/
1951
1952
static PyObject *
1953
bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1954
/*[clinic end generated code: output=ba698d9ea01e1c8f input=55b6eaea1f3d7046]*/
1955
0
{
1956
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1957
0
    const char *s = PyBytes_AS_STRING(self), *sub;
1958
0
    Py_buffer vsub;
1959
0
    PyObject *list;
1960
1961
0
    if (maxsplit < 0)
1962
0
        maxsplit = PY_SSIZE_T_MAX;
1963
0
    if (sep == Py_None)
1964
0
        return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1965
0
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1966
0
        return NULL;
1967
0
    sub = vsub.buf;
1968
0
    n = vsub.len;
1969
1970
0
    list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1971
0
    PyBuffer_Release(&vsub);
1972
0
    return list;
1973
0
}
1974
1975
1976
/*[clinic input]
1977
bytes.join
1978
1979
    iterable_of_bytes: object
1980
    /
1981
1982
Concatenate any number of bytes objects.
1983
1984
The bytes whose method is called is inserted in between each pair.
1985
1986
The result is returned as a new bytes object.
1987
1988
Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1989
[clinic start generated code]*/
1990
1991
static PyObject *
1992
bytes_join_impl(PyBytesObject *self, PyObject *iterable_of_bytes)
1993
/*[clinic end generated code: output=0687abb94d7d438e input=7fe377b95bd549d2]*/
1994
269k
{
1995
269k
    return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1996
269k
}
1997
1998
PyObject *
1999
PyBytes_Join(PyObject *sep, PyObject *iterable)
2000
33.5k
{
2001
33.5k
    if (sep == NULL) {
2002
0
        PyErr_BadInternalCall();
2003
0
        return NULL;
2004
0
    }
2005
33.5k
    if (!PyBytes_Check(sep)) {
2006
0
        PyErr_Format(PyExc_TypeError,
2007
0
                     "sep: expected bytes, got %T", sep);
2008
0
        return NULL;
2009
0
    }
2010
2011
33.5k
    return stringlib_bytes_join(sep, iterable);
2012
33.5k
}
2013
2014
/*[clinic input]
2015
@permit_long_summary
2016
@text_signature "($self, sub[, start[, end]], /)"
2017
bytes.find
2018
2019
    sub: object
2020
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2021
         Optional start position. Default: start of the bytes.
2022
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2023
         Optional stop position. Default: end of the bytes.
2024
    /
2025
2026
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2027
2028
Return -1 on failure.
2029
[clinic start generated code]*/
2030
2031
static PyObject *
2032
bytes_find_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2033
                Py_ssize_t end)
2034
/*[clinic end generated code: output=d5961a1c77b472a1 input=47d0929adafc6b0b]*/
2035
12.9M
{
2036
12.9M
    return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2037
12.9M
                          sub, start, end);
2038
12.9M
}
2039
2040
/*[clinic input]
2041
@permit_long_summary
2042
bytes.index = bytes.find
2043
2044
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2045
2046
Raise ValueError if the subsection is not found.
2047
[clinic start generated code]*/
2048
2049
static PyObject *
2050
bytes_index_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2051
                 Py_ssize_t end)
2052
/*[clinic end generated code: output=0da25cc74683ba42 input=1cb45ce71456a269]*/
2053
0
{
2054
0
    return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2055
0
                           sub, start, end);
2056
0
}
2057
2058
/*[clinic input]
2059
@permit_long_summary
2060
bytes.rfind = bytes.find
2061
2062
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2063
2064
Return -1 on failure.
2065
[clinic start generated code]*/
2066
2067
static PyObject *
2068
bytes_rfind_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2069
                 Py_ssize_t end)
2070
/*[clinic end generated code: output=51b60fa4ad011c09 input=c9473d714251f1ab]*/
2071
279k
{
2072
279k
    return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2073
279k
                           sub, start, end);
2074
279k
}
2075
2076
/*[clinic input]
2077
@permit_long_summary
2078
bytes.rindex = bytes.find
2079
2080
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2081
2082
Raise ValueError if the subsection is not found.
2083
[clinic start generated code]*/
2084
2085
static PyObject *
2086
bytes_rindex_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2087
                  Py_ssize_t end)
2088
/*[clinic end generated code: output=42bf674e0a0aabf6 input=bb5f473c64610c43]*/
2089
0
{
2090
0
    return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2091
0
                            sub, start, end);
2092
0
}
2093
2094
2095
Py_LOCAL_INLINE(PyObject *)
2096
do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
2097
294
{
2098
294
    Py_buffer vsep;
2099
294
    const char *s = PyBytes_AS_STRING(self);
2100
294
    Py_ssize_t len = PyBytes_GET_SIZE(self);
2101
294
    char *sep;
2102
294
    Py_ssize_t seplen;
2103
294
    Py_ssize_t i, j;
2104
2105
294
    if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
2106
0
        return NULL;
2107
294
    sep = vsep.buf;
2108
294
    seplen = vsep.len;
2109
2110
294
    i = 0;
2111
294
    if (striptype != RIGHTSTRIP) {
2112
0
        while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2113
0
            i++;
2114
0
        }
2115
0
    }
2116
2117
294
    j = len;
2118
294
    if (striptype != LEFTSTRIP) {
2119
588
        do {
2120
588
            j--;
2121
588
        } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2122
294
        j++;
2123
294
    }
2124
2125
294
    PyBuffer_Release(&vsep);
2126
2127
294
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2128
0
        return Py_NewRef(self);
2129
0
    }
2130
294
    else
2131
294
        return PyBytes_FromStringAndSize(s+i, j-i);
2132
294
}
2133
2134
2135
Py_LOCAL_INLINE(PyObject *)
2136
do_strip(PyBytesObject *self, int striptype)
2137
0
{
2138
0
    const char *s = PyBytes_AS_STRING(self);
2139
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
2140
2141
0
    i = 0;
2142
0
    if (striptype != RIGHTSTRIP) {
2143
0
        while (i < len && Py_ISSPACE(s[i])) {
2144
0
            i++;
2145
0
        }
2146
0
    }
2147
2148
0
    j = len;
2149
0
    if (striptype != LEFTSTRIP) {
2150
0
        do {
2151
0
            j--;
2152
0
        } while (j >= i && Py_ISSPACE(s[j]));
2153
0
        j++;
2154
0
    }
2155
2156
0
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2157
0
        return Py_NewRef(self);
2158
0
    }
2159
0
    else
2160
0
        return PyBytes_FromStringAndSize(s+i, j-i);
2161
0
}
2162
2163
2164
Py_LOCAL_INLINE(PyObject *)
2165
do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
2166
294
{
2167
294
    if (bytes != Py_None) {
2168
294
        return do_xstrip(self, striptype, bytes);
2169
294
    }
2170
0
    return do_strip(self, striptype);
2171
294
}
2172
2173
/*[clinic input]
2174
@permit_long_docstring_body
2175
bytes.strip
2176
2177
    bytes: object = None
2178
    /
2179
2180
Strip leading and trailing bytes contained in the argument.
2181
2182
If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2183
[clinic start generated code]*/
2184
2185
static PyObject *
2186
bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
2187
/*[clinic end generated code: output=c7c228d3bd104a1b input=71904cd278c0ee03]*/
2188
0
{
2189
0
    return do_argstrip(self, BOTHSTRIP, bytes);
2190
0
}
2191
2192
/*[clinic input]
2193
bytes.lstrip
2194
2195
    bytes: object = None
2196
    /
2197
2198
Strip leading bytes contained in the argument.
2199
2200
If the argument is omitted or None, strip leading  ASCII whitespace.
2201
[clinic start generated code]*/
2202
2203
static PyObject *
2204
bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2205
/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2206
0
{
2207
0
    return do_argstrip(self, LEFTSTRIP, bytes);
2208
0
}
2209
2210
/*[clinic input]
2211
bytes.rstrip
2212
2213
    bytes: object = None
2214
    /
2215
2216
Strip trailing bytes contained in the argument.
2217
2218
If the argument is omitted or None, strip trailing ASCII whitespace.
2219
[clinic start generated code]*/
2220
2221
static PyObject *
2222
bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2223
/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2224
294
{
2225
294
    return do_argstrip(self, RIGHTSTRIP, bytes);
2226
294
}
2227
2228
2229
/*[clinic input]
2230
@permit_long_summary
2231
bytes.count = bytes.find
2232
2233
Return the number of non-overlapping occurrences of subsection 'sub' in bytes B[start:end].
2234
[clinic start generated code]*/
2235
2236
static PyObject *
2237
bytes_count_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2238
                 Py_ssize_t end)
2239
/*[clinic end generated code: output=9848140b9be17d0f input=bb2f136f83f0d30e]*/
2240
5.69M
{
2241
5.69M
    return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2242
5.69M
                           sub, start, end);
2243
5.69M
}
2244
2245
2246
/*[clinic input]
2247
bytes.translate
2248
2249
    table: object
2250
        Translation table, which must be a bytes object of length 256.
2251
    /
2252
    delete as deletechars: object(c_default="NULL") = b''
2253
2254
Return a copy with each character mapped by the given translation table.
2255
2256
All characters occurring in the optional argument delete are removed.
2257
The remaining characters are mapped through the given translation table.
2258
[clinic start generated code]*/
2259
2260
static PyObject *
2261
bytes_translate_impl(PyBytesObject *self, PyObject *table,
2262
                     PyObject *deletechars)
2263
/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2264
0
{
2265
0
    const char *input;
2266
0
    char *output;
2267
0
    Py_buffer table_view = {NULL, NULL};
2268
0
    Py_buffer del_table_view = {NULL, NULL};
2269
0
    const char *table_chars;
2270
0
    Py_ssize_t i, c, changed = 0;
2271
0
    PyObject *input_obj = (PyObject*)self;
2272
0
    const char *output_start, *del_table_chars=NULL;
2273
0
    Py_ssize_t inlen, tablen, dellen = 0;
2274
0
    PyObject *result;
2275
0
    int trans_table[256];
2276
2277
0
    if (PyBytes_Check(table)) {
2278
0
        table_chars = PyBytes_AS_STRING(table);
2279
0
        tablen = PyBytes_GET_SIZE(table);
2280
0
    }
2281
0
    else if (table == Py_None) {
2282
0
        table_chars = NULL;
2283
0
        tablen = 256;
2284
0
    }
2285
0
    else {
2286
0
        if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2287
0
            return NULL;
2288
0
        table_chars = table_view.buf;
2289
0
        tablen = table_view.len;
2290
0
    }
2291
2292
0
    if (tablen != 256) {
2293
0
        PyErr_SetString(PyExc_ValueError,
2294
0
          "translation table must be 256 characters long");
2295
0
        PyBuffer_Release(&table_view);
2296
0
        return NULL;
2297
0
    }
2298
2299
0
    if (deletechars != NULL) {
2300
0
        if (PyBytes_Check(deletechars)) {
2301
0
            del_table_chars = PyBytes_AS_STRING(deletechars);
2302
0
            dellen = PyBytes_GET_SIZE(deletechars);
2303
0
        }
2304
0
        else {
2305
0
            if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2306
0
                PyBuffer_Release(&table_view);
2307
0
                return NULL;
2308
0
            }
2309
0
            del_table_chars = del_table_view.buf;
2310
0
            dellen = del_table_view.len;
2311
0
        }
2312
0
    }
2313
0
    else {
2314
0
        del_table_chars = NULL;
2315
0
        dellen = 0;
2316
0
    }
2317
2318
0
    inlen = PyBytes_GET_SIZE(input_obj);
2319
0
    result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2320
0
    if (result == NULL) {
2321
0
        PyBuffer_Release(&del_table_view);
2322
0
        PyBuffer_Release(&table_view);
2323
0
        return NULL;
2324
0
    }
2325
0
    output_start = output = PyBytes_AS_STRING(result);
2326
0
    input = PyBytes_AS_STRING(input_obj);
2327
2328
0
    if (dellen == 0 && table_chars != NULL) {
2329
        /* If no deletions are required, use faster code */
2330
0
        for (i = inlen; --i >= 0; ) {
2331
0
            c = Py_CHARMASK(*input++);
2332
0
            *output++ = table_chars[c];
2333
0
        }
2334
        /* Check if anything changed (for returning original object) */
2335
        /* We save this check until the end so that the compiler will */
2336
        /* unroll the loop above leading to MUCH faster code. */
2337
0
        if (PyBytes_CheckExact(input_obj)) {
2338
0
            if (memcmp(PyBytes_AS_STRING(input_obj), output_start, inlen) == 0) {
2339
0
                Py_SETREF(result, Py_NewRef(input_obj));
2340
0
            }
2341
0
        }
2342
0
        PyBuffer_Release(&del_table_view);
2343
0
        PyBuffer_Release(&table_view);
2344
0
        return result;
2345
0
    }
2346
2347
0
    if (table_chars == NULL) {
2348
0
        for (i = 0; i < 256; i++)
2349
0
            trans_table[i] = Py_CHARMASK(i);
2350
0
    } else {
2351
0
        for (i = 0; i < 256; i++)
2352
0
            trans_table[i] = Py_CHARMASK(table_chars[i]);
2353
0
    }
2354
0
    PyBuffer_Release(&table_view);
2355
2356
0
    for (i = 0; i < dellen; i++)
2357
0
        trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2358
0
    PyBuffer_Release(&del_table_view);
2359
2360
0
    for (i = inlen; --i >= 0; ) {
2361
0
        c = Py_CHARMASK(*input++);
2362
0
        if (trans_table[c] != -1)
2363
0
            if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2364
0
                continue;
2365
0
        changed = 1;
2366
0
    }
2367
0
    if (!changed && PyBytes_CheckExact(input_obj)) {
2368
0
        Py_DECREF(result);
2369
0
        return Py_NewRef(input_obj);
2370
0
    }
2371
    /* Fix the size of the resulting byte string */
2372
0
    if (inlen > 0)
2373
0
        _PyBytes_Resize(&result, output - output_start);
2374
0
    return result;
2375
0
}
2376
2377
2378
/*[clinic input]
2379
2380
@permit_long_summary
2381
@permit_long_docstring_body
2382
@staticmethod
2383
bytes.maketrans
2384
2385
    frm: Py_buffer
2386
    to: Py_buffer
2387
    /
2388
2389
Return a translation table usable for the bytes or bytearray translate method.
2390
2391
The returned table will be one where each byte in frm is mapped to the byte at
2392
the same position in to.
2393
2394
The bytes objects frm and to must be of the same length.
2395
[clinic start generated code]*/
2396
2397
static PyObject *
2398
bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2399
/*[clinic end generated code: output=a36f6399d4b77f6f input=a06b75f44d933fb3]*/
2400
9
{
2401
9
    return _Py_bytes_maketrans(frm, to);
2402
9
}
2403
2404
2405
/*[clinic input]
2406
bytes.replace
2407
2408
    old: Py_buffer
2409
    new: Py_buffer
2410
    /
2411
    count: Py_ssize_t = -1
2412
        Maximum number of occurrences to replace.
2413
        -1 (the default value) means replace all occurrences.
2414
2415
Return a copy with all occurrences of substring old replaced by new.
2416
2417
If count is given, only the first count occurrences are replaced.
2418
If count is not specified or -1, then all occurrences are replaced.
2419
[clinic start generated code]*/
2420
2421
static PyObject *
2422
bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2423
                   Py_ssize_t count)
2424
/*[clinic end generated code: output=994fa588b6b9c104 input=cdf3cf8639297745]*/
2425
34.0k
{
2426
34.0k
    return stringlib_replace((PyObject *)self,
2427
34.0k
                             (const char *)old->buf, old->len,
2428
34.0k
                             (const char *)new->buf, new->len, count);
2429
34.0k
}
2430
2431
/** End DALKE **/
2432
2433
/*[clinic input]
2434
bytes.removeprefix as bytes_removeprefix
2435
2436
    prefix: Py_buffer
2437
    /
2438
2439
Return a bytes object with the given prefix string removed if present.
2440
2441
If the bytes starts with the prefix string, return bytes[len(prefix):].
2442
Otherwise, return a copy of the original bytes.
2443
[clinic start generated code]*/
2444
2445
static PyObject *
2446
bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2447
/*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2448
0
{
2449
0
    const char *self_start = PyBytes_AS_STRING(self);
2450
0
    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2451
0
    const char *prefix_start = prefix->buf;
2452
0
    Py_ssize_t prefix_len = prefix->len;
2453
2454
0
    if (self_len >= prefix_len
2455
0
        && prefix_len > 0
2456
0
        && memcmp(self_start, prefix_start, prefix_len) == 0)
2457
0
    {
2458
0
        return PyBytes_FromStringAndSize(self_start + prefix_len,
2459
0
                                         self_len - prefix_len);
2460
0
    }
2461
2462
0
    if (PyBytes_CheckExact(self)) {
2463
0
        return Py_NewRef(self);
2464
0
    }
2465
2466
0
    return PyBytes_FromStringAndSize(self_start, self_len);
2467
0
}
2468
2469
/*[clinic input]
2470
bytes.removesuffix as bytes_removesuffix
2471
2472
    suffix: Py_buffer
2473
    /
2474
2475
Return a bytes object with the given suffix string removed if present.
2476
2477
If the bytes ends with the suffix string and that suffix is not empty,
2478
return bytes[:-len(prefix)].  Otherwise, return a copy of the original
2479
bytes.
2480
[clinic start generated code]*/
2481
2482
static PyObject *
2483
bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2484
/*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2485
0
{
2486
0
    const char *self_start = PyBytes_AS_STRING(self);
2487
0
    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2488
0
    const char *suffix_start = suffix->buf;
2489
0
    Py_ssize_t suffix_len = suffix->len;
2490
2491
0
    if (self_len >= suffix_len
2492
0
        && suffix_len > 0
2493
0
        && memcmp(self_start + self_len - suffix_len,
2494
0
                  suffix_start, suffix_len) == 0)
2495
0
    {
2496
0
        return PyBytes_FromStringAndSize(self_start,
2497
0
                                         self_len - suffix_len);
2498
0
    }
2499
2500
0
    if (PyBytes_CheckExact(self)) {
2501
0
        return Py_NewRef(self);
2502
0
    }
2503
2504
0
    return PyBytes_FromStringAndSize(self_start, self_len);
2505
0
}
2506
2507
/*[clinic input]
2508
@permit_long_summary
2509
@text_signature "($self, prefix[, start[, end]], /)"
2510
bytes.startswith
2511
2512
    prefix as subobj: object
2513
        A bytes or a tuple of bytes to try.
2514
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2515
        Optional start position. Default: start of the bytes.
2516
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2517
        Optional stop position. Default: end of the bytes.
2518
    /
2519
2520
Return True if the bytes starts with the specified prefix, False otherwise.
2521
[clinic start generated code]*/
2522
2523
static PyObject *
2524
bytes_startswith_impl(PyBytesObject *self, PyObject *subobj,
2525
                      Py_ssize_t start, Py_ssize_t end)
2526
/*[clinic end generated code: output=b1e8da1cbd528e8c input=a14efd070f15be80]*/
2527
1.93M
{
2528
1.93M
    return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2529
1.93M
                                subobj, start, end);
2530
1.93M
}
2531
2532
/*[clinic input]
2533
@permit_long_summary
2534
@text_signature "($self, suffix[, start[, end]], /)"
2535
bytes.endswith
2536
2537
    suffix as subobj: object
2538
        A bytes or a tuple of bytes to try.
2539
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2540
         Optional start position. Default: start of the bytes.
2541
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2542
         Optional stop position. Default: end of the bytes.
2543
    /
2544
2545
Return True if the bytes ends with the specified suffix, False otherwise.
2546
[clinic start generated code]*/
2547
2548
static PyObject *
2549
bytes_endswith_impl(PyBytesObject *self, PyObject *subobj, Py_ssize_t start,
2550
                    Py_ssize_t end)
2551
/*[clinic end generated code: output=038b633111f3629d input=49e383eaaf292713]*/
2552
315
{
2553
315
    return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2554
315
                              subobj, start, end);
2555
315
}
2556
2557
2558
/*[clinic input]
2559
bytes.decode
2560
2561
    encoding: str(c_default="NULL") = 'utf-8'
2562
        The encoding with which to decode the bytes.
2563
    errors: str(c_default="NULL") = 'strict'
2564
        The error handling scheme to use for the handling of decoding errors.
2565
        The default is 'strict' meaning that decoding errors raise a
2566
        UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2567
        as well as any other name registered with codecs.register_error that
2568
        can handle UnicodeDecodeErrors.
2569
2570
Decode the bytes using the codec registered for encoding.
2571
[clinic start generated code]*/
2572
2573
static PyObject *
2574
bytes_decode_impl(PyBytesObject *self, const char *encoding,
2575
                  const char *errors)
2576
/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2577
18.4M
{
2578
18.4M
    return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2579
18.4M
}
2580
2581
2582
/*[clinic input]
2583
@permit_long_docstring_body
2584
bytes.splitlines
2585
2586
    keepends: bool = False
2587
2588
Return a list of the lines in the bytes, breaking at line boundaries.
2589
2590
Line breaks are not included in the resulting list unless keepends is given and
2591
true.
2592
[clinic start generated code]*/
2593
2594
static PyObject *
2595
bytes_splitlines_impl(PyBytesObject *self, int keepends)
2596
/*[clinic end generated code: output=3484149a5d880ffb input=d17968d2a355fe55]*/
2597
0
{
2598
0
    return stringlib_splitlines(
2599
0
        (PyObject*) self, PyBytes_AS_STRING(self),
2600
0
        PyBytes_GET_SIZE(self), keepends
2601
0
        );
2602
0
}
2603
2604
/*[clinic input]
2605
@classmethod
2606
bytes.fromhex
2607
2608
    string: object
2609
    /
2610
2611
Create a bytes object from a string of hexadecimal numbers.
2612
2613
Spaces between two numbers are accepted.
2614
Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2615
[clinic start generated code]*/
2616
2617
static PyObject *
2618
bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2619
/*[clinic end generated code: output=0973acc63661bb2e input=f37d98ed51088a21]*/
2620
33.2k
{
2621
33.2k
    PyObject *result = _PyBytes_FromHex(string, 0);
2622
33.2k
    if (type != &PyBytes_Type && result != NULL) {
2623
0
        Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
2624
0
    }
2625
33.2k
    return result;
2626
33.2k
}
2627
2628
PyObject*
2629
_PyBytes_FromHex(PyObject *string, int use_bytearray)
2630
33.2k
{
2631
33.2k
    Py_ssize_t hexlen, invalid_char;
2632
33.2k
    unsigned int top, bot;
2633
33.2k
    const Py_UCS1 *str, *start, *end;
2634
33.2k
    PyBytesWriter *writer = NULL;
2635
33.2k
    Py_buffer view;
2636
33.2k
    view.obj = NULL;
2637
2638
33.2k
    if (PyUnicode_Check(string)) {
2639
33.2k
        hexlen = PyUnicode_GET_LENGTH(string);
2640
2641
33.2k
        if (!PyUnicode_IS_ASCII(string)) {
2642
0
            const void *data = PyUnicode_DATA(string);
2643
0
            int kind = PyUnicode_KIND(string);
2644
0
            Py_ssize_t i;
2645
2646
            /* search for the first non-ASCII character */
2647
0
            for (i = 0; i < hexlen; i++) {
2648
0
                if (PyUnicode_READ(kind, data, i) >= 128)
2649
0
                    break;
2650
0
            }
2651
0
            invalid_char = i;
2652
0
            goto error;
2653
0
        }
2654
2655
33.2k
        assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2656
33.2k
        str = PyUnicode_1BYTE_DATA(string);
2657
33.2k
    }
2658
0
    else if (PyObject_CheckBuffer(string)) {
2659
0
        if (PyObject_GetBuffer(string, &view, PyBUF_SIMPLE) != 0) {
2660
0
            return NULL;
2661
0
        }
2662
0
        hexlen = view.len;
2663
0
        str = view.buf;
2664
0
    }
2665
0
    else {
2666
0
        PyErr_Format(PyExc_TypeError,
2667
0
                     "fromhex() argument must be str or bytes-like, not %T",
2668
0
                     string);
2669
0
        return NULL;
2670
0
    }
2671
2672
    /* This overestimates if there are spaces */
2673
33.2k
    if (use_bytearray) {
2674
0
        writer = _PyBytesWriter_CreateByteArray(hexlen / 2);
2675
0
    }
2676
33.2k
    else {
2677
33.2k
        writer = PyBytesWriter_Create(hexlen / 2);
2678
33.2k
    }
2679
33.2k
    if (writer == NULL) {
2680
0
        goto release_buffer;
2681
0
    }
2682
33.2k
    char *buf = PyBytesWriter_GetData(writer);
2683
2684
33.2k
    start = str;
2685
33.2k
    end = str + hexlen;
2686
66.5k
    while (str < end) {
2687
        /* skip over spaces in the input */
2688
33.2k
        if (Py_ISSPACE(*str)) {
2689
0
            do {
2690
0
                str++;
2691
0
            } while (Py_ISSPACE(*str));
2692
0
            if (str >= end)
2693
0
                break;
2694
0
        }
2695
2696
33.2k
        top = _PyLong_DigitValue[*str];
2697
33.2k
        if (top >= 16) {
2698
0
            invalid_char = str - start;
2699
0
            goto error;
2700
0
        }
2701
33.2k
        str++;
2702
2703
33.2k
        bot = _PyLong_DigitValue[*str];
2704
33.2k
        if (bot >= 16) {
2705
            /* Check if we had a second digit */
2706
0
            if (str >= end){
2707
0
                invalid_char = -1;
2708
0
            } else {
2709
0
                invalid_char = str - start;
2710
0
            }
2711
0
            goto error;
2712
0
        }
2713
33.2k
        str++;
2714
2715
33.2k
        *buf++ = (unsigned char)((top << 4) + bot);
2716
33.2k
    }
2717
2718
33.2k
    if (view.obj != NULL) {
2719
0
       PyBuffer_Release(&view);
2720
0
    }
2721
33.2k
    return PyBytesWriter_FinishWithPointer(writer, buf);
2722
2723
0
  error:
2724
0
    if (invalid_char == -1) {
2725
0
        PyErr_SetString(PyExc_ValueError,
2726
0
                        "fromhex() arg must contain an even number of hexadecimal digits");
2727
0
    } else {
2728
0
        PyErr_Format(PyExc_ValueError,
2729
0
                     "non-hexadecimal number found in "
2730
0
                     "fromhex() arg at position %zd", invalid_char);
2731
0
    }
2732
0
    PyBytesWriter_Discard(writer);
2733
2734
0
  release_buffer:
2735
0
    if (view.obj != NULL) {
2736
0
        PyBuffer_Release(&view);
2737
0
    }
2738
0
    return NULL;
2739
0
}
2740
2741
/*[clinic input]
2742
bytes.hex
2743
2744
    sep: object = NULL
2745
        An optional single character or byte to separate hex bytes.
2746
    bytes_per_sep: Py_ssize_t = 1
2747
        How many bytes between separators.  Positive values count from the
2748
        right, negative values count from the left.
2749
2750
Create a string of hexadecimal numbers from a bytes object.
2751
2752
Example:
2753
>>> value = b'\xb9\x01\xef'
2754
>>> value.hex()
2755
'b901ef'
2756
>>> value.hex(':')
2757
'b9:01:ef'
2758
>>> value.hex(':', 2)
2759
'b9:01ef'
2760
>>> value.hex(':', -2)
2761
'b901:ef'
2762
[clinic start generated code]*/
2763
2764
static PyObject *
2765
bytes_hex_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t bytes_per_sep)
2766
/*[clinic end generated code: output=588821f02cb9d8f5 input=bd8eceb755d8230f]*/
2767
0
{
2768
0
    const char *argbuf = PyBytes_AS_STRING(self);
2769
0
    Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2770
0
    return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2771
0
}
2772
2773
static PyObject *
2774
bytes_getnewargs(PyObject *op, PyObject *Py_UNUSED(dummy))
2775
0
{
2776
0
    PyBytesObject *v = _PyBytes_CAST(op);
2777
0
    return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2778
0
}
2779
2780
2781
static PyMethodDef
2782
bytes_methods[] = {
2783
    {"__getnewargs__", bytes_getnewargs,  METH_NOARGS},
2784
    BYTES___BYTES___METHODDEF
2785
    {"capitalize", stringlib_capitalize, METH_NOARGS,
2786
     _Py_capitalize__doc__},
2787
    STRINGLIB_CENTER_METHODDEF
2788
    BYTES_COUNT_METHODDEF
2789
    BYTES_DECODE_METHODDEF
2790
    BYTES_ENDSWITH_METHODDEF
2791
    STRINGLIB_EXPANDTABS_METHODDEF
2792
    BYTES_FIND_METHODDEF
2793
    BYTES_FROMHEX_METHODDEF
2794
    BYTES_HEX_METHODDEF
2795
    BYTES_INDEX_METHODDEF
2796
    {"isalnum", stringlib_isalnum, METH_NOARGS,
2797
     _Py_isalnum__doc__},
2798
    {"isalpha", stringlib_isalpha, METH_NOARGS,
2799
     _Py_isalpha__doc__},
2800
    {"isascii", stringlib_isascii, METH_NOARGS,
2801
     _Py_isascii__doc__},
2802
    {"isdigit", stringlib_isdigit, METH_NOARGS,
2803
     _Py_isdigit__doc__},
2804
    {"islower", stringlib_islower, METH_NOARGS,
2805
     _Py_islower__doc__},
2806
    {"isspace", stringlib_isspace, METH_NOARGS,
2807
     _Py_isspace__doc__},
2808
    {"istitle", stringlib_istitle, METH_NOARGS,
2809
     _Py_istitle__doc__},
2810
    {"isupper", stringlib_isupper, METH_NOARGS,
2811
     _Py_isupper__doc__},
2812
    BYTES_JOIN_METHODDEF
2813
    STRINGLIB_LJUST_METHODDEF
2814
    {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2815
    BYTES_LSTRIP_METHODDEF
2816
    BYTES_MAKETRANS_METHODDEF
2817
    BYTES_PARTITION_METHODDEF
2818
    BYTES_REPLACE_METHODDEF
2819
    BYTES_REMOVEPREFIX_METHODDEF
2820
    BYTES_REMOVESUFFIX_METHODDEF
2821
    BYTES_RFIND_METHODDEF
2822
    BYTES_RINDEX_METHODDEF
2823
    STRINGLIB_RJUST_METHODDEF
2824
    BYTES_RPARTITION_METHODDEF
2825
    BYTES_RSPLIT_METHODDEF
2826
    BYTES_RSTRIP_METHODDEF
2827
    BYTES_SPLIT_METHODDEF
2828
    BYTES_SPLITLINES_METHODDEF
2829
    BYTES_STARTSWITH_METHODDEF
2830
    BYTES_STRIP_METHODDEF
2831
    {"swapcase", stringlib_swapcase, METH_NOARGS,
2832
     _Py_swapcase__doc__},
2833
    {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2834
    BYTES_TRANSLATE_METHODDEF
2835
    {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2836
    STRINGLIB_ZFILL_METHODDEF
2837
    {NULL,     NULL}                         /* sentinel */
2838
};
2839
2840
static PyObject *
2841
bytes_mod(PyObject *self, PyObject *arg)
2842
0
{
2843
0
    if (!PyBytes_Check(self)) {
2844
0
        Py_RETURN_NOTIMPLEMENTED;
2845
0
    }
2846
0
    return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2847
0
                             arg, 0);
2848
0
}
2849
2850
static PyNumberMethods bytes_as_number = {
2851
    0,              /*nb_add*/
2852
    0,              /*nb_subtract*/
2853
    0,              /*nb_multiply*/
2854
    bytes_mod,      /*nb_remainder*/
2855
};
2856
2857
static PyObject *
2858
bytes_subtype_new(PyTypeObject *, PyObject *);
2859
2860
/*[clinic input]
2861
@classmethod
2862
bytes.__new__ as bytes_new
2863
2864
    source as x: object = NULL
2865
    encoding: str = NULL
2866
    errors: str = NULL
2867
2868
[clinic start generated code]*/
2869
2870
static PyObject *
2871
bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
2872
               const char *errors)
2873
/*[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]*/
2874
16.3M
{
2875
16.3M
    PyObject *bytes;
2876
16.3M
    PyObject *func;
2877
16.3M
    Py_ssize_t size;
2878
2879
16.3M
    if (x == NULL) {
2880
0
        if (encoding != NULL || errors != NULL) {
2881
0
            PyErr_SetString(PyExc_TypeError,
2882
0
                            encoding != NULL ?
2883
0
                            "encoding without a string argument" :
2884
0
                            "errors without a string argument");
2885
0
            return NULL;
2886
0
        }
2887
0
        bytes = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
2888
0
    }
2889
16.3M
    else if (encoding != NULL) {
2890
        /* Encode via the codec registry */
2891
323k
        if (!PyUnicode_Check(x)) {
2892
0
            PyErr_SetString(PyExc_TypeError,
2893
0
                            "encoding without a string argument");
2894
0
            return NULL;
2895
0
        }
2896
323k
        bytes = PyUnicode_AsEncodedString(x, encoding, errors);
2897
323k
    }
2898
16.0M
    else if (errors != NULL) {
2899
0
        PyErr_SetString(PyExc_TypeError,
2900
0
                        PyUnicode_Check(x) ?
2901
0
                        "string argument without an encoding" :
2902
0
                        "errors without a string argument");
2903
0
        return NULL;
2904
0
    }
2905
    /* We'd like to call PyObject_Bytes here, but we need to check for an
2906
       integer argument before deferring to PyBytes_FromObject, something
2907
       PyObject_Bytes doesn't do. */
2908
16.0M
    else if ((func = _PyObject_LookupSpecial(x, &_Py_ID(__bytes__))) != NULL) {
2909
42.8k
        bytes = _PyObject_CallNoArgs(func);
2910
42.8k
        Py_DECREF(func);
2911
42.8k
        if (bytes == NULL)
2912
0
            return NULL;
2913
42.8k
        if (!PyBytes_Check(bytes)) {
2914
0
            PyErr_Format(PyExc_TypeError,
2915
0
                         "%T.__bytes__() must return a bytes, not %T",
2916
0
                         x, bytes);
2917
0
            Py_DECREF(bytes);
2918
0
            return NULL;
2919
0
        }
2920
42.8k
    }
2921
15.9M
    else if (PyErr_Occurred())
2922
0
        return NULL;
2923
15.9M
    else if (PyUnicode_Check(x)) {
2924
0
        PyErr_SetString(PyExc_TypeError,
2925
0
                        "string argument without an encoding");
2926
0
        return NULL;
2927
0
    }
2928
    /* Is it an integer? */
2929
15.9M
    else if (_PyIndex_Check(x)) {
2930
0
        size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2931
0
        if (size == -1 && PyErr_Occurred()) {
2932
0
            if (!PyErr_ExceptionMatches(PyExc_TypeError))
2933
0
                return NULL;
2934
0
            PyErr_Clear();  /* fall through */
2935
0
            bytes = PyBytes_FromObject(x);
2936
0
        }
2937
0
        else {
2938
0
            if (size < 0) {
2939
0
                PyErr_SetString(PyExc_ValueError, "negative count");
2940
0
                return NULL;
2941
0
            }
2942
0
            bytes = _PyBytes_FromSize(size, 1);
2943
0
        }
2944
0
    }
2945
15.9M
    else {
2946
15.9M
        bytes = PyBytes_FromObject(x);
2947
15.9M
    }
2948
2949
16.3M
    if (bytes != NULL && type != &PyBytes_Type) {
2950
0
        Py_SETREF(bytes, bytes_subtype_new(type, bytes));
2951
0
    }
2952
2953
16.3M
    return bytes;
2954
16.3M
}
2955
2956
static PyObject*
2957
_PyBytes_FromBuffer(PyObject *x)
2958
15.9M
{
2959
15.9M
    Py_buffer view;
2960
15.9M
    if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2961
0
        return NULL;
2962
2963
15.9M
    PyBytesWriter *writer = PyBytesWriter_Create(view.len);
2964
15.9M
    if (writer == NULL) {
2965
0
        goto fail;
2966
0
    }
2967
2968
15.9M
    if (PyBuffer_ToContiguous(PyBytesWriter_GetData(writer),
2969
15.9M
                              &view, view.len, 'C') < 0) {
2970
0
        goto fail;
2971
0
    }
2972
2973
15.9M
    PyBuffer_Release(&view);
2974
15.9M
    return PyBytesWriter_Finish(writer);
2975
2976
0
fail:
2977
0
    PyBytesWriter_Discard(writer);
2978
0
    PyBuffer_Release(&view);
2979
0
    return NULL;
2980
15.9M
}
2981
2982
static PyObject*
2983
_PyBytes_FromList(PyObject *x)
2984
13.2k
{
2985
13.2k
    Py_ssize_t size = PyList_GET_SIZE(x);
2986
13.2k
    PyBytesWriter *writer = PyBytesWriter_Create(size);
2987
13.2k
    if (writer == NULL) {
2988
0
        return NULL;
2989
0
    }
2990
13.2k
    char *str = PyBytesWriter_GetData(writer);
2991
13.2k
    size = _PyBytesWriter_GetAllocated(writer);
2992
2993
1.04M
    for (Py_ssize_t i = 0; i < PyList_GET_SIZE(x); i++) {
2994
1.02M
        PyObject *item = PyList_GET_ITEM(x, i);
2995
1.02M
        Py_INCREF(item);
2996
1.02M
        Py_ssize_t value = PyNumber_AsSsize_t(item, NULL);
2997
1.02M
        Py_DECREF(item);
2998
1.02M
        if (value == -1 && PyErr_Occurred())
2999
0
            goto error;
3000
3001
1.02M
        if (value < 0 || value >= 256) {
3002
0
            PyErr_SetString(PyExc_ValueError,
3003
0
                            "bytes must be in range(0, 256)");
3004
0
            goto error;
3005
0
        }
3006
3007
1.02M
        if (i >= size) {
3008
0
            str = _PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str);
3009
0
            if (str == NULL) {
3010
0
                goto error;
3011
0
            }
3012
0
            size = _PyBytesWriter_GetAllocated(writer);
3013
0
        }
3014
1.02M
        *str++ = (char) value;
3015
1.02M
    }
3016
13.2k
    return PyBytesWriter_FinishWithPointer(writer, str);
3017
3018
0
error:
3019
0
    PyBytesWriter_Discard(writer);
3020
0
    return NULL;
3021
13.2k
}
3022
3023
static PyObject*
3024
_PyBytes_FromTuple(PyObject *x)
3025
0
{
3026
0
    Py_ssize_t i, size = PyTuple_GET_SIZE(x);
3027
0
    Py_ssize_t value;
3028
0
    PyObject *item;
3029
3030
0
    PyBytesWriter *writer = PyBytesWriter_Create(size);
3031
0
    if (writer == NULL) {
3032
0
        return NULL;
3033
0
    }
3034
0
    char *str = PyBytesWriter_GetData(writer);
3035
3036
0
    for (i = 0; i < size; i++) {
3037
0
        item = PyTuple_GET_ITEM(x, i);
3038
0
        value = PyNumber_AsSsize_t(item, NULL);
3039
0
        if (value == -1 && PyErr_Occurred())
3040
0
            goto error;
3041
3042
0
        if (value < 0 || value >= 256) {
3043
0
            PyErr_SetString(PyExc_ValueError,
3044
0
                            "bytes must be in range(0, 256)");
3045
0
            goto error;
3046
0
        }
3047
0
        *str++ = (char) value;
3048
0
    }
3049
0
    return PyBytesWriter_Finish(writer);
3050
3051
0
  error:
3052
0
    PyBytesWriter_Discard(writer);
3053
0
    return NULL;
3054
0
}
3055
3056
static PyObject *
3057
_PyBytes_FromIterator(PyObject *it, PyObject *x)
3058
184
{
3059
184
    Py_ssize_t i, size;
3060
3061
    /* For iterator version, create a bytes object and resize as needed */
3062
184
    size = PyObject_LengthHint(x, 64);
3063
184
    if (size == -1 && PyErr_Occurred())
3064
0
        return NULL;
3065
3066
184
    PyBytesWriter *writer = PyBytesWriter_Create(size);
3067
184
    if (writer == NULL) {
3068
0
        return NULL;
3069
0
    }
3070
184
    char *str = PyBytesWriter_GetData(writer);
3071
184
    size = _PyBytesWriter_GetAllocated(writer);
3072
3073
    /* Run the iterator to exhaustion */
3074
1.41k
    for (i = 0; ; i++) {
3075
1.41k
        PyObject *item;
3076
1.41k
        Py_ssize_t value;
3077
3078
        /* Get the next item */
3079
1.41k
        item = PyIter_Next(it);
3080
1.41k
        if (item == NULL) {
3081
184
            if (PyErr_Occurred())
3082
0
                goto error;
3083
184
            break;
3084
184
        }
3085
3086
        /* Interpret it as an int (__index__) */
3087
1.23k
        value = PyNumber_AsSsize_t(item, NULL);
3088
1.23k
        Py_DECREF(item);
3089
1.23k
        if (value == -1 && PyErr_Occurred())
3090
0
            goto error;
3091
3092
        /* Range check */
3093
1.23k
        if (value < 0 || value >= 256) {
3094
0
            PyErr_SetString(PyExc_ValueError,
3095
0
                            "bytes must be in range(0, 256)");
3096
0
            goto error;
3097
0
        }
3098
3099
        /* Append the byte */
3100
1.23k
        if (i >= size) {
3101
0
            str = _PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str);
3102
0
            if (str == NULL) {
3103
0
                goto error;
3104
0
            }
3105
0
            size = _PyBytesWriter_GetAllocated(writer);
3106
0
        }
3107
1.23k
        *str++ = (char) value;
3108
1.23k
    }
3109
184
    return PyBytesWriter_FinishWithPointer(writer, str);
3110
3111
0
  error:
3112
0
    PyBytesWriter_Discard(writer);
3113
0
    return NULL;
3114
184
}
3115
3116
PyObject *
3117
PyBytes_FromObject(PyObject *x)
3118
15.9M
{
3119
15.9M
    PyObject *it, *result;
3120
3121
15.9M
    if (x == NULL) {
3122
0
        PyErr_BadInternalCall();
3123
0
        return NULL;
3124
0
    }
3125
3126
15.9M
    if (PyBytes_CheckExact(x)) {
3127
0
        return Py_NewRef(x);
3128
0
    }
3129
3130
    /* Use the modern buffer interface */
3131
15.9M
    if (PyObject_CheckBuffer(x))
3132
15.9M
        return _PyBytes_FromBuffer(x);
3133
3134
13.4k
    if (PyList_CheckExact(x))
3135
13.2k
        return _PyBytes_FromList(x);
3136
3137
184
    if (PyTuple_CheckExact(x))
3138
0
        return _PyBytes_FromTuple(x);
3139
3140
184
    if (!PyUnicode_Check(x)) {
3141
184
        it = PyObject_GetIter(x);
3142
184
        if (it != NULL) {
3143
184
            result = _PyBytes_FromIterator(it, x);
3144
184
            Py_DECREF(it);
3145
184
            return result;
3146
184
        }
3147
0
        if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
3148
0
            return NULL;
3149
0
        }
3150
0
    }
3151
3152
0
    PyErr_Format(PyExc_TypeError,
3153
0
                 "cannot convert '%.200s' object to bytes",
3154
0
                 Py_TYPE(x)->tp_name);
3155
0
    return NULL;
3156
184
}
3157
3158
/* This allocator is needed for subclasses don't want to use __new__.
3159
 * See https://github.com/python/cpython/issues/91020#issuecomment-1096793239
3160
 *
3161
 * This allocator will be removed when ob_shash is removed.
3162
 */
3163
static PyObject *
3164
bytes_alloc(PyTypeObject *self, Py_ssize_t nitems)
3165
0
{
3166
0
    PyBytesObject *obj = (PyBytesObject*)PyType_GenericAlloc(self, nitems);
3167
0
    if (obj == NULL) {
3168
0
        return NULL;
3169
0
    }
3170
0
    set_ob_shash(obj, -1);
3171
0
    return (PyObject*)obj;
3172
0
}
3173
3174
static PyObject *
3175
bytes_subtype_new(PyTypeObject *type, PyObject *tmp)
3176
0
{
3177
0
    PyObject *pnew;
3178
0
    Py_ssize_t n;
3179
3180
0
    assert(PyType_IsSubtype(type, &PyBytes_Type));
3181
0
    assert(PyBytes_Check(tmp));
3182
0
    n = PyBytes_GET_SIZE(tmp);
3183
0
    pnew = type->tp_alloc(type, n);
3184
0
    if (pnew != NULL) {
3185
0
        memcpy(PyBytes_AS_STRING(pnew),
3186
0
                  PyBytes_AS_STRING(tmp), n+1);
3187
0
        set_ob_shash((PyBytesObject *)pnew,
3188
0
            get_ob_shash((PyBytesObject *)tmp));
3189
0
    }
3190
0
    return pnew;
3191
0
}
3192
3193
PyDoc_STRVAR(bytes_doc,
3194
"bytes(iterable_of_ints) -> bytes\n\
3195
bytes(string, encoding[, errors]) -> bytes\n\
3196
bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
3197
bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3198
bytes() -> empty bytes object\n\
3199
\n\
3200
Construct an immutable array of bytes from:\n\
3201
  - an iterable yielding integers in range(256)\n\
3202
  - a text string encoded using the specified encoding\n\
3203
  - any object implementing the buffer API.\n\
3204
  - an integer");
3205
3206
static PyObject *bytes_iter(PyObject *seq);
3207
3208
3209
static _PyObjectIndexPair
3210
bytes_iteritem(PyObject *obj, Py_ssize_t index)
3211
2.03k
{
3212
2.03k
    PyBytesObject *a = _PyBytes_CAST(obj);
3213
2.03k
    if (index >= Py_SIZE(a)) {
3214
52
        return (_PyObjectIndexPair) { .object = NULL, .index = index };
3215
52
    }
3216
1.98k
    PyObject *l = _PyLong_FromUnsignedChar((unsigned char)a->ob_sval[index]);
3217
1.98k
    return (_PyObjectIndexPair) { .object = l, .index = index + 1 };
3218
2.03k
}
3219
3220
PyTypeObject PyBytes_Type = {
3221
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3222
    "bytes",
3223
    PyBytesObject_SIZE,
3224
    sizeof(char),
3225
    0,                                          /* tp_dealloc */
3226
    0,                                          /* tp_vectorcall_offset */
3227
    0,                                          /* tp_getattr */
3228
    0,                                          /* tp_setattr */
3229
    0,                                          /* tp_as_async */
3230
    bytes_repr,                                 /* tp_repr */
3231
    &bytes_as_number,                           /* tp_as_number */
3232
    &bytes_as_sequence,                         /* tp_as_sequence */
3233
    &bytes_as_mapping,                          /* tp_as_mapping */
3234
    bytes_hash,                                 /* tp_hash */
3235
    0,                                          /* tp_call */
3236
    bytes_str,                                  /* tp_str */
3237
    PyObject_GenericGetAttr,                    /* tp_getattro */
3238
    0,                                          /* tp_setattro */
3239
    &bytes_as_buffer,                           /* tp_as_buffer */
3240
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3241
        Py_TPFLAGS_BYTES_SUBCLASS |
3242
        _Py_TPFLAGS_MATCH_SELF,               /* tp_flags */
3243
    bytes_doc,                                  /* tp_doc */
3244
    0,                                          /* tp_traverse */
3245
    0,                                          /* tp_clear */
3246
    bytes_richcompare,                          /* tp_richcompare */
3247
    0,                                          /* tp_weaklistoffset */
3248
    bytes_iter,                                 /* tp_iter */
3249
    0,                                          /* tp_iternext */
3250
    bytes_methods,                              /* tp_methods */
3251
    0,                                          /* tp_members */
3252
    0,                                          /* tp_getset */
3253
    0,                                          /* tp_base */
3254
    0,                                          /* tp_dict */
3255
    0,                                          /* tp_descr_get */
3256
    0,                                          /* tp_descr_set */
3257
    0,                                          /* tp_dictoffset */
3258
    0,                                          /* tp_init */
3259
    bytes_alloc,                                /* tp_alloc */
3260
    bytes_new,                                  /* tp_new */
3261
    PyObject_Free,                              /* tp_free */
3262
    .tp_version_tag = _Py_TYPE_VERSION_BYTES,
3263
    ._tp_iteritem = bytes_iteritem,
3264
};
3265
3266
void
3267
PyBytes_Concat(PyObject **pv, PyObject *w)
3268
0
{
3269
0
    assert(pv != NULL);
3270
0
    if (*pv == NULL)
3271
0
        return;
3272
0
    if (w == NULL) {
3273
0
        Py_CLEAR(*pv);
3274
0
        return;
3275
0
    }
3276
3277
0
    if (_PyObject_IsUniquelyReferenced(*pv) && PyBytes_CheckExact(*pv)) {
3278
        /* Only one reference, so we can resize in place */
3279
0
        Py_ssize_t oldsize;
3280
0
        Py_buffer wb;
3281
3282
0
        if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
3283
0
            PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3284
0
                         Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3285
0
            Py_CLEAR(*pv);
3286
0
            return;
3287
0
        }
3288
3289
0
        oldsize = PyBytes_GET_SIZE(*pv);
3290
0
        if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3291
0
            PyErr_NoMemory();
3292
0
            goto error;
3293
0
        }
3294
0
        if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3295
0
            goto error;
3296
3297
0
        memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3298
0
        PyBuffer_Release(&wb);
3299
0
        return;
3300
3301
0
      error:
3302
0
        PyBuffer_Release(&wb);
3303
0
        Py_CLEAR(*pv);
3304
0
        return;
3305
0
    }
3306
3307
0
    else {
3308
        /* Multiple references, need to create new object */
3309
0
        PyObject *v;
3310
0
        v = _PyBytes_Concat(*pv, w);
3311
0
        Py_SETREF(*pv, v);
3312
0
    }
3313
0
}
3314
3315
void
3316
PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
3317
0
{
3318
0
    PyBytes_Concat(pv, w);
3319
0
    Py_XDECREF(w);
3320
0
}
3321
3322
3323
/* The following function breaks the notion that bytes are immutable:
3324
   it changes the size of a bytes object.  You can think of it
3325
   as creating a new bytes object and destroying the old one, only
3326
   more efficiently.
3327
   Note that if there's not enough memory to resize the bytes object, the
3328
   original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
3329
   memory" exception is set, and -1 is returned.  Else (on success) 0 is
3330
   returned, and the value in *pv may or may not be the same as on input.
3331
   As always, an extra byte is allocated for a trailing \0 byte (newsize
3332
   does *not* include that), and a trailing \0 byte is stored.
3333
*/
3334
3335
int
3336
_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3337
23.8M
{
3338
23.8M
    PyObject *v;
3339
23.8M
    PyBytesObject *sv;
3340
23.8M
    v = *pv;
3341
23.8M
    if (!PyBytes_Check(v) || newsize < 0) {
3342
0
        *pv = 0;
3343
0
        Py_DECREF(v);
3344
0
        PyErr_BadInternalCall();
3345
0
        return -1;
3346
0
    }
3347
23.8M
    Py_ssize_t oldsize = PyBytes_GET_SIZE(v);
3348
23.8M
    if (oldsize == newsize) {
3349
        /* return early if newsize equals to v->ob_size */
3350
1.68M
        return 0;
3351
1.68M
    }
3352
22.1M
    if (oldsize == 0) {
3353
18.1M
        *pv = _PyBytes_FromSize(newsize, 0);
3354
18.1M
        Py_DECREF(v);
3355
18.1M
        return (*pv == NULL) ? -1 : 0;
3356
18.1M
    }
3357
4.02M
    if (newsize == 0) {
3358
6.60k
        *pv = bytes_get_empty();
3359
6.60k
        Py_DECREF(v);
3360
6.60k
        return 0;
3361
6.60k
    }
3362
4.02M
    if (!_PyObject_IsUniquelyReferenced(v)) {
3363
0
        if (oldsize < newsize) {
3364
0
            *pv = _PyBytes_FromSize(newsize, 0);
3365
0
            if (*pv) {
3366
0
                memcpy(PyBytes_AS_STRING(*pv), PyBytes_AS_STRING(v), oldsize);
3367
0
            }
3368
0
        }
3369
0
        else {
3370
0
            *pv = PyBytes_FromStringAndSize(PyBytes_AS_STRING(v), newsize);
3371
0
        }
3372
0
        Py_DECREF(v);
3373
0
        return (*pv == NULL) ? -1 : 0;
3374
0
    }
3375
3376
#ifdef Py_TRACE_REFS
3377
    _Py_ForgetReference(v);
3378
#endif
3379
4.02M
    _PyReftracerTrack(v, PyRefTracer_DESTROY);
3380
4.02M
    *pv = (PyObject *)
3381
4.02M
        PyObject_Realloc(v, PyBytesObject_SIZE + newsize);
3382
4.02M
    if (*pv == NULL) {
3383
#ifdef Py_REF_DEBUG
3384
        _Py_DecRefTotal(_PyThreadState_GET());
3385
#endif
3386
0
        PyObject_Free(v);
3387
0
        PyErr_NoMemory();
3388
0
        return -1;
3389
0
    }
3390
4.02M
    _Py_NewReferenceNoTotal(*pv);
3391
4.02M
    sv = (PyBytesObject *) *pv;
3392
4.02M
    Py_SET_SIZE(sv, newsize);
3393
4.02M
    sv->ob_sval[newsize] = '\0';
3394
4.02M
    set_ob_shash(sv, -1);          /* invalidate cached hash value */
3395
4.02M
    return 0;
3396
4.02M
}
3397
3398
3399
/*********************** Bytes Iterator ****************************/
3400
3401
typedef struct {
3402
    PyObject_HEAD
3403
    Py_ssize_t it_index;
3404
    PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3405
} striterobject;
3406
3407
581
#define _striterobject_CAST(op)  ((striterobject *)(op))
3408
3409
static void
3410
striter_dealloc(PyObject *op)
3411
45
{
3412
45
    striterobject *it = _striterobject_CAST(op);
3413
45
    _PyObject_GC_UNTRACK(it);
3414
45
    Py_XDECREF(it->it_seq);
3415
45
    PyObject_GC_Del(it);
3416
45
}
3417
3418
static int
3419
striter_traverse(PyObject *op, visitproc visit, void *arg)
3420
0
{
3421
0
    striterobject *it = _striterobject_CAST(op);
3422
0
    Py_VISIT(it->it_seq);
3423
0
    return 0;
3424
0
}
3425
3426
static PyObject *
3427
striter_next(PyObject *op)
3428
536
{
3429
536
    striterobject *it = _striterobject_CAST(op);
3430
536
    PyBytesObject *seq;
3431
3432
536
    assert(it != NULL);
3433
536
    seq = it->it_seq;
3434
536
    if (seq == NULL)
3435
0
        return NULL;
3436
536
    assert(PyBytes_Check(seq));
3437
3438
536
    if (it->it_index < PyBytes_GET_SIZE(seq)) {
3439
528
        return _PyLong_FromUnsignedChar(
3440
528
            (unsigned char)seq->ob_sval[it->it_index++]);
3441
528
    }
3442
3443
8
    it->it_seq = NULL;
3444
8
    Py_DECREF(seq);
3445
8
    return NULL;
3446
536
}
3447
3448
static PyObject *
3449
striter_len(PyObject *op, PyObject *Py_UNUSED(ignored))
3450
0
{
3451
0
    striterobject *it = _striterobject_CAST(op);
3452
0
    Py_ssize_t len = 0;
3453
0
    if (it->it_seq)
3454
0
        len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3455
0
    return PyLong_FromSsize_t(len);
3456
0
}
3457
3458
PyDoc_STRVAR(length_hint_doc,
3459
             "Private method returning an estimate of len(list(it)).");
3460
3461
static PyObject *
3462
striter_reduce(PyObject *op, PyObject *Py_UNUSED(ignored))
3463
0
{
3464
0
    PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));
3465
3466
    /* _PyEval_GetBuiltin can invoke arbitrary code,
3467
     * call must be before access of iterator pointers.
3468
     * see issue #101765 */
3469
0
    striterobject *it = _striterobject_CAST(op);
3470
0
    if (it->it_seq != NULL) {
3471
0
        return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
3472
0
    } else {
3473
0
        return Py_BuildValue("N(())", iter);
3474
0
    }
3475
0
}
3476
3477
PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3478
3479
static PyObject *
3480
striter_setstate(PyObject *op, PyObject *state)
3481
0
{
3482
0
    Py_ssize_t index = PyLong_AsSsize_t(state);
3483
0
    if (index == -1 && PyErr_Occurred())
3484
0
        return NULL;
3485
0
    striterobject *it = _striterobject_CAST(op);
3486
0
    if (it->it_seq != NULL) {
3487
0
        if (index < 0)
3488
0
            index = 0;
3489
0
        else if (index > PyBytes_GET_SIZE(it->it_seq))
3490
0
            index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3491
0
        it->it_index = index;
3492
0
    }
3493
0
    Py_RETURN_NONE;
3494
0
}
3495
3496
PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3497
3498
static PyMethodDef striter_methods[] = {
3499
    {"__length_hint__", striter_len, METH_NOARGS, length_hint_doc},
3500
    {"__reduce__",      striter_reduce, METH_NOARGS, reduce_doc},
3501
    {"__setstate__",    striter_setstate, METH_O, setstate_doc},
3502
    {NULL,              NULL}           /* sentinel */
3503
};
3504
3505
PyTypeObject PyBytesIter_Type = {
3506
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3507
    "bytes_iterator",                           /* tp_name */
3508
    sizeof(striterobject),                      /* tp_basicsize */
3509
    0,                                          /* tp_itemsize */
3510
    /* methods */
3511
    striter_dealloc,                            /* tp_dealloc */
3512
    0,                                          /* tp_vectorcall_offset */
3513
    0,                                          /* tp_getattr */
3514
    0,                                          /* tp_setattr */
3515
    0,                                          /* tp_as_async */
3516
    0,                                          /* tp_repr */
3517
    0,                                          /* tp_as_number */
3518
    0,                                          /* tp_as_sequence */
3519
    0,                                          /* tp_as_mapping */
3520
    0,                                          /* tp_hash */
3521
    0,                                          /* tp_call */
3522
    0,                                          /* tp_str */
3523
    PyObject_GenericGetAttr,                    /* tp_getattro */
3524
    0,                                          /* tp_setattro */
3525
    0,                                          /* tp_as_buffer */
3526
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3527
    0,                                          /* tp_doc */
3528
    striter_traverse,                           /* tp_traverse */
3529
    0,                                          /* tp_clear */
3530
    0,                                          /* tp_richcompare */
3531
    0,                                          /* tp_weaklistoffset */
3532
    PyObject_SelfIter,                          /* tp_iter */
3533
    striter_next,                               /* tp_iternext */
3534
    striter_methods,                            /* tp_methods */
3535
    0,
3536
};
3537
3538
static PyObject *
3539
bytes_iter(PyObject *seq)
3540
45
{
3541
45
    striterobject *it;
3542
3543
45
    if (!PyBytes_Check(seq)) {
3544
0
        PyErr_BadInternalCall();
3545
0
        return NULL;
3546
0
    }
3547
45
    it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3548
45
    if (it == NULL)
3549
0
        return NULL;
3550
45
    it->it_index = 0;
3551
45
    it->it_seq = (PyBytesObject *)Py_NewRef(seq);
3552
45
    _PyObject_GC_TRACK(it);
3553
45
    return (PyObject *)it;
3554
45
}
3555
3556
3557
void
3558
_PyBytes_RepeatBuffer(char* dest, Py_ssize_t len_dest,
3559
    const char* src, Py_ssize_t len_src)
3560
192k
{
3561
192k
    if (len_dest == 0) {
3562
749
        return;
3563
749
    }
3564
191k
    if (len_src == 1) {
3565
189k
        memset(dest, src[0], len_dest);
3566
189k
    }
3567
2.28k
    else {
3568
2.28k
        if (src != dest) {
3569
2.28k
            memcpy(dest, src, len_src);
3570
2.28k
        }
3571
2.28k
        Py_ssize_t copied = len_src;
3572
5.49k
        while (copied < len_dest) {
3573
3.20k
            Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied);
3574
3.20k
            memcpy(dest + copied, dest, bytes_to_copy);
3575
3.20k
            copied += bytes_to_copy;
3576
3.20k
        }
3577
2.28k
    }
3578
191k
}
3579
3580
3581
// --- PyBytesWriter API -----------------------------------------------------
3582
3583
static inline char*
3584
byteswriter_data(PyBytesWriter *writer)
3585
35.5M
{
3586
35.5M
    return _PyBytesWriter_GetData(writer);
3587
35.5M
}
3588
3589
3590
static inline Py_ssize_t
3591
byteswriter_allocated(PyBytesWriter *writer)
3592
35.3M
{
3593
35.3M
    if (writer->obj == NULL) {
3594
34.6M
        return sizeof(writer->small_buffer);
3595
34.6M
    }
3596
700k
    else if (writer->use_bytearray) {
3597
0
        return PyByteArray_GET_SIZE(writer->obj);
3598
0
    }
3599
700k
    else {
3600
700k
        return PyBytes_GET_SIZE(writer->obj);
3601
700k
    }
3602
35.3M
}
3603
3604
3605
#ifdef MS_WINDOWS
3606
   /* On Windows, overallocate by 50% is the best factor */
3607
#  define OVERALLOCATE_FACTOR 2
3608
#else
3609
   /* On Linux, overallocate by 25% is the best factor */
3610
40.4k
#  define OVERALLOCATE_FACTOR 4
3611
#endif
3612
3613
static inline int
3614
byteswriter_resize(PyBytesWriter *writer, Py_ssize_t size, int resize)
3615
26.3M
{
3616
26.3M
    assert(size >= 0);
3617
3618
26.3M
    Py_ssize_t old_allocated = byteswriter_allocated(writer);
3619
26.3M
    if (size <= old_allocated) {
3620
25.4M
        return 0;
3621
25.4M
    }
3622
3623
891k
    if (resize & writer->overallocate) {
3624
20.2k
        if (size <= (PY_SSIZE_T_MAX - size / OVERALLOCATE_FACTOR)) {
3625
20.2k
            size += size / OVERALLOCATE_FACTOR;
3626
20.2k
        }
3627
20.2k
    }
3628
3629
891k
    if (writer->obj != NULL) {
3630
20.2k
        if (writer->use_bytearray) {
3631
0
            if (PyByteArray_Resize(writer->obj, size)) {
3632
0
                return -1;
3633
0
            }
3634
0
        }
3635
20.2k
        else {
3636
20.2k
            if (_PyBytes_Resize(&writer->obj, size)) {
3637
0
                return -1;
3638
0
            }
3639
20.2k
        }
3640
20.2k
        assert(writer->obj != NULL);
3641
20.2k
    }
3642
871k
    else if (writer->use_bytearray) {
3643
0
        writer->obj = PyByteArray_FromStringAndSize(NULL, size);
3644
0
        if (writer->obj == NULL) {
3645
0
            return -1;
3646
0
        }
3647
0
        if (resize) {
3648
0
            assert((size_t)size > sizeof(writer->small_buffer));
3649
0
            memcpy(PyByteArray_AS_STRING(writer->obj),
3650
0
                   writer->small_buffer,
3651
0
                   sizeof(writer->small_buffer));
3652
0
        }
3653
0
    }
3654
871k
    else {
3655
871k
        writer->obj = PyBytes_FromStringAndSize(NULL, size);
3656
871k
        if (writer->obj == NULL) {
3657
0
            return -1;
3658
0
        }
3659
871k
        if (resize) {
3660
0
            assert((size_t)size > sizeof(writer->small_buffer));
3661
0
            memcpy(PyBytes_AS_STRING(writer->obj),
3662
0
                   writer->small_buffer,
3663
0
                   sizeof(writer->small_buffer));
3664
0
        }
3665
871k
    }
3666
3667
#ifdef Py_DEBUG
3668
    Py_ssize_t allocated = byteswriter_allocated(writer);
3669
    if (resize && allocated > old_allocated) {
3670
        memset(byteswriter_data(writer) + old_allocated, 0xff,
3671
               allocated - old_allocated);
3672
    }
3673
#endif
3674
3675
891k
    return 0;
3676
891k
}
3677
3678
3679
static PyBytesWriter*
3680
byteswriter_create(Py_ssize_t size, int use_bytearray)
3681
26.3M
{
3682
26.3M
    if (size < 0) {
3683
0
        PyErr_SetString(PyExc_ValueError, "size must be >= 0");
3684
0
        return NULL;
3685
0
    }
3686
3687
26.3M
    PyBytesWriter *writer = _Py_FREELIST_POP_MEM(bytes_writers);
3688
26.3M
    if (writer == NULL) {
3689
11.9k
        writer = (PyBytesWriter *)PyMem_Malloc(sizeof(PyBytesWriter));
3690
11.9k
        if (writer == NULL) {
3691
0
            PyErr_NoMemory();
3692
0
            return NULL;
3693
0
        }
3694
11.9k
    }
3695
26.3M
    writer->obj = NULL;
3696
26.3M
    writer->size = 0;
3697
26.3M
    writer->use_bytearray = use_bytearray;
3698
26.3M
    writer->overallocate = !use_bytearray;
3699
3700
26.3M
    if (size >= 1) {
3701
26.3M
        if (byteswriter_resize(writer, size, 0) < 0) {
3702
0
            PyBytesWriter_Discard(writer);
3703
0
            return NULL;
3704
0
        }
3705
26.3M
        writer->size = size;
3706
26.3M
    }
3707
#ifdef Py_DEBUG
3708
    memset(byteswriter_data(writer), 0xff, byteswriter_allocated(writer));
3709
#endif
3710
26.3M
    return writer;
3711
26.3M
}
3712
3713
PyBytesWriter*
3714
PyBytesWriter_Create(Py_ssize_t size)
3715
26.3M
{
3716
26.3M
    return byteswriter_create(size, 0);
3717
26.3M
}
3718
3719
PyBytesWriter*
3720
_PyBytesWriter_CreateByteArray(Py_ssize_t size)
3721
0
{
3722
0
    return byteswriter_create(size, 1);
3723
0
}
3724
3725
3726
void
3727
PyBytesWriter_Discard(PyBytesWriter *writer)
3728
26.5M
{
3729
26.5M
    if (writer == NULL) {
3730
148k
        return;
3731
148k
    }
3732
3733
26.3M
    Py_XDECREF(writer->obj);
3734
26.3M
    _Py_FREELIST_FREE(bytes_writers, writer, PyMem_Free);
3735
26.3M
}
3736
3737
3738
PyObject*
3739
PyBytesWriter_FinishWithSize(PyBytesWriter *writer, Py_ssize_t size)
3740
25.4M
{
3741
25.4M
    PyObject *result;
3742
25.4M
    if (size == 0) {
3743
45.1k
        result = bytes_get_empty();
3744
45.1k
    }
3745
25.3M
    else if (writer->obj != NULL) {
3746
766k
        if (writer->use_bytearray) {
3747
0
            if (size != PyByteArray_GET_SIZE(writer->obj)) {
3748
0
                if (PyByteArray_Resize(writer->obj, size)) {
3749
0
                    goto error;
3750
0
                }
3751
0
            }
3752
0
        }
3753
766k
        else {
3754
766k
            if (size != PyBytes_GET_SIZE(writer->obj)) {
3755
725k
                if (_PyBytes_Resize(&writer->obj, size)) {
3756
0
                    goto error;
3757
0
                }
3758
725k
            }
3759
766k
        }
3760
766k
        result = writer->obj;
3761
766k
        writer->obj = NULL;
3762
766k
    }
3763
24.6M
    else if (writer->use_bytearray) {
3764
0
        result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3765
0
    }
3766
24.6M
    else {
3767
24.6M
        result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3768
24.6M
    }
3769
25.4M
    PyBytesWriter_Discard(writer);
3770
25.4M
    return result;
3771
3772
0
error:
3773
0
    PyBytesWriter_Discard(writer);
3774
0
    return NULL;
3775
25.4M
}
3776
3777
PyObject*
3778
PyBytesWriter_Finish(PyBytesWriter *writer)
3779
16.3M
{
3780
16.3M
    return PyBytesWriter_FinishWithSize(writer, writer->size);
3781
16.3M
}
3782
3783
3784
PyObject*
3785
PyBytesWriter_FinishWithPointer(PyBytesWriter *writer, void *buf)
3786
8.98M
{
3787
8.98M
    Py_ssize_t size = (char*)buf - byteswriter_data(writer);
3788
8.98M
    if (size < 0 || size > byteswriter_allocated(writer)) {
3789
0
        PyBytesWriter_Discard(writer);
3790
0
        PyErr_SetString(PyExc_ValueError, "invalid end pointer");
3791
0
        return NULL;
3792
0
    }
3793
3794
8.98M
    return PyBytesWriter_FinishWithSize(writer, size);
3795
8.98M
}
3796
3797
3798
void*
3799
PyBytesWriter_GetData(PyBytesWriter *writer)
3800
26.5M
{
3801
26.5M
    return byteswriter_data(writer);
3802
26.5M
}
3803
3804
3805
Py_ssize_t
3806
PyBytesWriter_GetSize(PyBytesWriter *writer)
3807
0
{
3808
0
    return _PyBytesWriter_GetSize(writer);
3809
0
}
3810
3811
3812
static Py_ssize_t
3813
_PyBytesWriter_GetAllocated(PyBytesWriter *writer)
3814
13.4k
{
3815
13.4k
    return byteswriter_allocated(writer);
3816
13.4k
}
3817
3818
3819
int
3820
PyBytesWriter_Resize(PyBytesWriter *writer, Py_ssize_t size)
3821
0
{
3822
0
    if (size < 0) {
3823
0
        PyErr_SetString(PyExc_ValueError, "size must be >= 0");
3824
0
        return -1;
3825
0
    }
3826
0
    if (byteswriter_resize(writer, size, 1) < 0) {
3827
0
        return -1;
3828
0
    }
3829
0
    writer->size = size;
3830
0
    return 0;
3831
0
}
3832
3833
3834
static void*
3835
_PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size,
3836
                                      void *data)
3837
0
{
3838
0
    Py_ssize_t pos = (char*)data - byteswriter_data(writer);
3839
0
    if (PyBytesWriter_Resize(writer, size) < 0) {
3840
0
        return NULL;
3841
0
    }
3842
0
    return byteswriter_data(writer) + pos;
3843
0
}
3844
3845
3846
int
3847
PyBytesWriter_Grow(PyBytesWriter *writer, Py_ssize_t size)
3848
20.2k
{
3849
20.2k
    if (size < 0 && writer->size + size < 0) {
3850
0
        PyErr_SetString(PyExc_ValueError, "invalid size");
3851
0
        return -1;
3852
0
    }
3853
20.2k
    if (size > PY_SSIZE_T_MAX - writer->size) {
3854
0
        PyErr_NoMemory();
3855
0
        return -1;
3856
0
    }
3857
20.2k
    size = writer->size + size;
3858
3859
20.2k
    if (byteswriter_resize(writer, size, 1) < 0) {
3860
0
        return -1;
3861
0
    }
3862
20.2k
    writer->size = size;
3863
20.2k
    return 0;
3864
20.2k
}
3865
3866
3867
void*
3868
PyBytesWriter_GrowAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size,
3869
                                   void *buf)
3870
0
{
3871
0
    Py_ssize_t pos = (char*)buf - byteswriter_data(writer);
3872
0
    if (PyBytesWriter_Grow(writer, size) < 0) {
3873
0
        return NULL;
3874
0
    }
3875
0
    return byteswriter_data(writer) + pos;
3876
0
}
3877
3878
3879
int
3880
PyBytesWriter_WriteBytes(PyBytesWriter *writer,
3881
                         const void *bytes, Py_ssize_t size)
3882
0
{
3883
0
    if (size < 0) {
3884
0
        size_t len = strlen(bytes);
3885
0
        if (len > (size_t)PY_SSIZE_T_MAX) {
3886
0
            PyErr_NoMemory();
3887
0
            return -1;
3888
0
        }
3889
0
        size = (Py_ssize_t)len;
3890
0
    }
3891
3892
0
    Py_ssize_t pos = writer->size;
3893
0
    if (PyBytesWriter_Grow(writer, size) < 0) {
3894
0
        return -1;
3895
0
    }
3896
0
    char *buf = byteswriter_data(writer);
3897
0
    memcpy(buf + pos, bytes, size);
3898
0
    return 0;
3899
0
}
3900
3901
3902
int
3903
PyBytesWriter_Format(PyBytesWriter *writer, const char *format, ...)
3904
0
{
3905
0
    Py_ssize_t pos = writer->size;
3906
0
    if (PyBytesWriter_Grow(writer, strlen(format)) < 0) {
3907
0
        return -1;
3908
0
    }
3909
3910
0
    va_list vargs;
3911
0
    va_start(vargs, format);
3912
0
    char *buf = bytes_fromformat(writer, pos, format, vargs);
3913
0
    va_end(vargs);
3914
3915
0
    Py_ssize_t size = buf - byteswriter_data(writer);
3916
0
    return PyBytesWriter_Resize(writer, size);
3917
0
}