Coverage Report

Created: 2025-07-04 06:49

/src/cpython/Objects/bytesobject.c
Line
Count
Source (jump to first uncovered line)
1
/* bytes object implementation */
2
3
#include "Python.h"
4
#include "pycore_abstract.h"      // _PyIndex_Check()
5
#include "pycore_bytes_methods.h" // _Py_bytes_startswith()
6
#include "pycore_bytesobject.h"   // _PyBytes_Find(), _PyBytes_Repeat()
7
#include "pycore_call.h"          // _PyObject_CallNoArgs()
8
#include "pycore_ceval.h"         // _PyEval_GetBuiltin()
9
#include "pycore_format.h"        // F_LJUST
10
#include "pycore_global_objects.h"// _Py_GET_GLOBAL_OBJECT()
11
#include "pycore_initconfig.h"    // _PyStatus_OK()
12
#include "pycore_long.h"          // _PyLong_DigitValue
13
#include "pycore_object.h"        // _PyObject_GC_TRACK
14
#include "pycore_pymem.h"         // PYMEM_CLEANBYTE
15
#include "pycore_strhex.h"        // _Py_strhex_with_sep()
16
#include "pycore_unicodeobject.h" // _PyUnicode_FormatLong()
17
18
#include <stddef.h>
19
20
/*[clinic input]
21
class bytes "PyBytesObject *" "&PyBytes_Type"
22
[clinic start generated code]*/
23
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
24
25
#include "clinic/bytesobject.c.h"
26
27
/* PyBytesObject_SIZE gives the basic size of a bytes object; any memory allocation
28
   for a bytes object of length n should request PyBytesObject_SIZE + n bytes.
29
30
   Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
31
   3 or 7 bytes per bytes object allocation on a typical system.
32
*/
33
34.9M
#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
34
35
/* Forward declaration */
36
Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
37
                                                   char *str);
38
39
40
3.39M
#define CHARACTERS _Py_SINGLETON(bytes_characters)
41
#define CHARACTER(ch) \
42
3.39M
     ((PyBytesObject *)&(CHARACTERS[ch]));
43
6.75M
#define EMPTY (&_Py_SINGLETON(bytes_empty))
44
45
46
// Return a reference to the immortal empty bytes string singleton.
47
static inline PyObject* bytes_get_empty(void)
48
6.75M
{
49
6.75M
    PyObject *empty = &EMPTY->ob_base.ob_base;
50
6.75M
    assert(_Py_IsImmortal(empty));
51
6.75M
    return empty;
52
6.75M
}
53
54
55
static inline void
56
set_ob_shash(PyBytesObject *a, Py_hash_t hash)
57
17.7M
{
58
17.7M
_Py_COMP_DIAG_PUSH
59
17.7M
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
60
#ifdef Py_GIL_DISABLED
61
    _Py_atomic_store_ssize_relaxed(&a->ob_shash, hash);
62
#else
63
17.7M
    a->ob_shash = hash;
64
17.7M
#endif
65
17.7M
_Py_COMP_DIAG_POP
66
17.7M
}
67
68
static inline Py_hash_t
69
get_ob_shash(PyBytesObject *a)
70
4.21M
{
71
4.21M
_Py_COMP_DIAG_PUSH
72
4.21M
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
73
#ifdef Py_GIL_DISABLED
74
    return _Py_atomic_load_ssize_relaxed(&a->ob_shash);
75
#else
76
4.21M
    return a->ob_shash;
77
4.21M
#endif
78
4.21M
_Py_COMP_DIAG_POP
79
4.21M
}
80
81
82
/*
83
   For PyBytes_FromString(), the parameter 'str' points to a null-terminated
84
   string containing exactly 'size' bytes.
85
86
   For PyBytes_FromStringAndSize(), the parameter 'str' is
87
   either NULL or else points to a string containing at least 'size' bytes.
88
   For PyBytes_FromStringAndSize(), the string in the 'str' parameter does
89
   not have to be null-terminated.  (Therefore it is safe to construct a
90
   substring by calling 'PyBytes_FromStringAndSize(origstring, substrlen)'.)
91
   If 'str' is NULL then PyBytes_FromStringAndSize() will allocate 'size+1'
92
   bytes (setting the last byte to the null terminating character) and you can
93
   fill in the data yourself.  If 'str' is non-NULL then the resulting
94
   PyBytes object must be treated as immutable and you must not fill in nor
95
   alter the data yourself, since the strings may be shared.
96
97
   The PyObject member 'op->ob_size', which denotes the number of "extra
98
   items" in a variable-size object, will contain the number of bytes
99
   allocated for string data, not counting the null terminating character.
100
   It is therefore equal to the 'size' parameter (for
101
   PyBytes_FromStringAndSize()) or the length of the string in the 'str'
102
   parameter (for PyBytes_FromString()).
103
*/
104
static PyObject *
105
_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
106
17.3M
{
107
17.3M
    PyBytesObject *op;
108
17.3M
    assert(size >= 0);
109
110
17.3M
    if (size == 0) {
111
0
        return bytes_get_empty();
112
0
    }
113
114
17.3M
    if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
115
0
        PyErr_SetString(PyExc_OverflowError,
116
0
                        "byte string is too large");
117
0
        return NULL;
118
0
    }
119
120
    /* Inline PyObject_NewVar */
121
17.3M
    if (use_calloc)
122
0
        op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
123
17.3M
    else
124
17.3M
        op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
125
17.3M
    if (op == NULL) {
126
0
        return PyErr_NoMemory();
127
0
    }
128
17.3M
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
129
17.3M
    set_ob_shash(op, -1);
130
17.3M
    if (!use_calloc) {
131
17.3M
        op->ob_sval[size] = '\0';
132
17.3M
    }
133
17.3M
    return (PyObject *) op;
134
17.3M
}
135
136
PyObject *
137
PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
138
27.4M
{
139
27.4M
    PyBytesObject *op;
140
27.4M
    if (size < 0) {
141
0
        PyErr_SetString(PyExc_SystemError,
142
0
            "Negative size passed to PyBytes_FromStringAndSize");
143
0
        return NULL;
144
0
    }
145
27.4M
    if (size == 1 && str != NULL) {
146
3.39M
        op = CHARACTER(*str & 255);
147
3.39M
        assert(_Py_IsImmortal(op));
148
3.39M
        return (PyObject *)op;
149
3.39M
    }
150
24.0M
    if (size == 0) {
151
6.75M
        return bytes_get_empty();
152
6.75M
    }
153
154
17.3M
    op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
155
17.3M
    if (op == NULL)
156
0
        return NULL;
157
17.3M
    if (str == NULL)
158
3.48M
        return (PyObject *) op;
159
160
13.8M
    memcpy(op->ob_sval, str, size);
161
13.8M
    return (PyObject *) op;
162
17.3M
}
163
164
PyObject *
165
PyBytes_FromString(const char *str)
166
408
{
167
408
    size_t size;
168
408
    PyBytesObject *op;
169
170
408
    assert(str != NULL);
171
408
    size = strlen(str);
172
408
    if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
173
0
        PyErr_SetString(PyExc_OverflowError,
174
0
            "byte string is too long");
175
0
        return NULL;
176
0
    }
177
178
408
    if (size == 0) {
179
0
        return bytes_get_empty();
180
0
    }
181
408
    else if (size == 1) {
182
0
        op = CHARACTER(*str & 255);
183
0
        assert(_Py_IsImmortal(op));
184
0
        return (PyObject *)op;
185
0
    }
186
187
    /* Inline PyObject_NewVar */
188
408
    op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
189
408
    if (op == NULL) {
190
0
        return PyErr_NoMemory();
191
0
    }
192
408
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
193
408
    set_ob_shash(op, -1);
194
408
    memcpy(op->ob_sval, str, size+1);
195
408
    return (PyObject *) op;
196
408
}
197
198
PyObject *
199
PyBytes_FromFormatV(const char *format, va_list vargs)
200
0
{
201
0
    char *s;
202
0
    const char *f;
203
0
    const char *p;
204
0
    Py_ssize_t prec;
205
0
    int longflag;
206
0
    int size_tflag;
207
    /* Longest 64-bit formatted numbers:
208
       - "18446744073709551615\0" (21 bytes)
209
       - "-9223372036854775808\0" (21 bytes)
210
       Decimal takes the most space (it isn't enough for octal.)
211
212
       Longest 64-bit pointer representation:
213
       "0xffffffffffffffff\0" (19 bytes). */
214
0
    char buffer[21];
215
0
    _PyBytesWriter writer;
216
217
0
    _PyBytesWriter_Init(&writer);
218
219
0
    s = _PyBytesWriter_Alloc(&writer, strlen(format));
220
0
    if (s == NULL)
221
0
        return NULL;
222
0
    writer.overallocate = 1;
223
224
0
#define WRITE_BYTES(str) \
225
0
    do { \
226
0
        s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
227
0
        if (s == NULL) \
228
0
            goto error; \
229
0
    } while (0)
230
231
0
    for (f = format; *f; f++) {
232
0
        if (*f != '%') {
233
0
            *s++ = *f;
234
0
            continue;
235
0
        }
236
237
0
        p = f++;
238
239
        /* ignore the width (ex: 10 in "%10s") */
240
0
        while (Py_ISDIGIT(*f))
241
0
            f++;
242
243
        /* parse the precision (ex: 10 in "%.10s") */
244
0
        prec = 0;
245
0
        if (*f == '.') {
246
0
            f++;
247
0
            for (; Py_ISDIGIT(*f); f++) {
248
0
                prec = (prec * 10) + (*f - '0');
249
0
            }
250
0
        }
251
252
0
        while (*f && *f != '%' && !Py_ISALPHA(*f))
253
0
            f++;
254
255
        /* handle the long flag ('l'), but only for %ld and %lu.
256
           others can be added when necessary. */
257
0
        longflag = 0;
258
0
        if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
259
0
            longflag = 1;
260
0
            ++f;
261
0
        }
262
263
        /* handle the size_t flag ('z'). */
264
0
        size_tflag = 0;
265
0
        if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
266
0
            size_tflag = 1;
267
0
            ++f;
268
0
        }
269
270
        /* subtract bytes preallocated for the format string
271
           (ex: 2 for "%s") */
272
0
        writer.min_size -= (f - p + 1);
273
274
0
        switch (*f) {
275
0
        case 'c':
276
0
        {
277
0
            int c = va_arg(vargs, int);
278
0
            if (c < 0 || c > 255) {
279
0
                PyErr_SetString(PyExc_OverflowError,
280
0
                                "PyBytes_FromFormatV(): %c format "
281
0
                                "expects an integer in range [0; 255]");
282
0
                goto error;
283
0
            }
284
0
            writer.min_size++;
285
0
            *s++ = (unsigned char)c;
286
0
            break;
287
0
        }
288
289
0
        case 'd':
290
0
            if (longflag) {
291
0
                sprintf(buffer, "%ld", va_arg(vargs, long));
292
0
            }
293
0
            else if (size_tflag) {
294
0
                sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
295
0
            }
296
0
            else {
297
0
                sprintf(buffer, "%d", va_arg(vargs, int));
298
0
            }
299
0
            assert(strlen(buffer) < sizeof(buffer));
300
0
            WRITE_BYTES(buffer);
301
0
            break;
302
303
0
        case 'u':
304
0
            if (longflag) {
305
0
                sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
306
0
            }
307
0
            else if (size_tflag) {
308
0
                sprintf(buffer, "%zu", va_arg(vargs, size_t));
309
0
            }
310
0
            else {
311
0
                sprintf(buffer, "%u", va_arg(vargs, unsigned int));
312
0
            }
313
0
            assert(strlen(buffer) < sizeof(buffer));
314
0
            WRITE_BYTES(buffer);
315
0
            break;
316
317
0
        case 'i':
318
0
            sprintf(buffer, "%i", va_arg(vargs, int));
319
0
            assert(strlen(buffer) < sizeof(buffer));
320
0
            WRITE_BYTES(buffer);
321
0
            break;
322
323
0
        case 'x':
324
0
            sprintf(buffer, "%x", va_arg(vargs, int));
325
0
            assert(strlen(buffer) < sizeof(buffer));
326
0
            WRITE_BYTES(buffer);
327
0
            break;
328
329
0
        case 's':
330
0
        {
331
0
            Py_ssize_t i;
332
333
0
            p = va_arg(vargs, const char*);
334
0
            if (prec <= 0) {
335
0
                i = strlen(p);
336
0
            }
337
0
            else {
338
0
                i = 0;
339
0
                while (i < prec && p[i]) {
340
0
                    i++;
341
0
                }
342
0
            }
343
0
            s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
344
0
            if (s == NULL)
345
0
                goto error;
346
0
            break;
347
0
        }
348
349
0
        case 'p':
350
0
            sprintf(buffer, "%p", va_arg(vargs, void*));
351
0
            assert(strlen(buffer) < sizeof(buffer));
352
            /* %p is ill-defined:  ensure leading 0x. */
353
0
            if (buffer[1] == 'X')
354
0
                buffer[1] = 'x';
355
0
            else if (buffer[1] != 'x') {
356
0
                memmove(buffer+2, buffer, strlen(buffer)+1);
357
0
                buffer[0] = '0';
358
0
                buffer[1] = 'x';
359
0
            }
360
0
            WRITE_BYTES(buffer);
361
0
            break;
362
363
0
        case '%':
364
0
            writer.min_size++;
365
0
            *s++ = '%';
366
0
            break;
367
368
0
        default:
369
0
            if (*f == 0) {
370
                /* fix min_size if we reached the end of the format string */
371
0
                writer.min_size++;
372
0
            }
373
374
            /* invalid format string: copy unformatted string and exit */
375
0
            WRITE_BYTES(p);
376
0
            return _PyBytesWriter_Finish(&writer, s);
377
0
        }
378
0
    }
379
380
0
#undef WRITE_BYTES
381
382
0
    return _PyBytesWriter_Finish(&writer, s);
383
384
0
 error:
385
0
    _PyBytesWriter_Dealloc(&writer);
386
0
    return NULL;
387
0
}
388
389
PyObject *
390
PyBytes_FromFormat(const char *format, ...)
391
0
{
392
0
    PyObject* ret;
393
0
    va_list vargs;
394
395
0
    va_start(vargs, format);
396
0
    ret = PyBytes_FromFormatV(format, vargs);
397
0
    va_end(vargs);
398
0
    return ret;
399
0
}
400
401
/* Helpers for formatstring */
402
403
Py_LOCAL_INLINE(PyObject *)
404
getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
405
0
{
406
0
    Py_ssize_t argidx = *p_argidx;
407
0
    if (argidx < arglen) {
408
0
        (*p_argidx)++;
409
0
        if (arglen < 0)
410
0
            return args;
411
0
        else
412
0
            return PyTuple_GetItem(args, argidx);
413
0
    }
414
0
    PyErr_SetString(PyExc_TypeError,
415
0
                    "not enough arguments for format string");
416
0
    return NULL;
417
0
}
418
419
/* Returns a new reference to a PyBytes object, or NULL on failure. */
420
421
static char*
422
formatfloat(PyObject *v, int flags, int prec, int type,
423
            PyObject **p_result, _PyBytesWriter *writer, char *str)
424
0
{
425
0
    char *p;
426
0
    PyObject *result;
427
0
    double x;
428
0
    size_t len;
429
0
    int dtoa_flags = 0;
430
431
0
    x = PyFloat_AsDouble(v);
432
0
    if (x == -1.0 && PyErr_Occurred()) {
433
0
        PyErr_Format(PyExc_TypeError, "float argument required, "
434
0
                     "not %.200s", Py_TYPE(v)->tp_name);
435
0
        return NULL;
436
0
    }
437
438
0
    if (prec < 0)
439
0
        prec = 6;
440
441
0
    if (flags & F_ALT) {
442
0
        dtoa_flags |= Py_DTSF_ALT;
443
0
    }
444
0
    p = PyOS_double_to_string(x, type, prec, dtoa_flags, NULL);
445
446
0
    if (p == NULL)
447
0
        return NULL;
448
449
0
    len = strlen(p);
450
0
    if (writer != NULL) {
451
0
        str = _PyBytesWriter_Prepare(writer, str, len);
452
0
        if (str == NULL) {
453
0
            PyMem_Free(p);
454
0
            return NULL;
455
0
        }
456
0
        memcpy(str, p, len);
457
0
        PyMem_Free(p);
458
0
        str += len;
459
0
        return str;
460
0
    }
461
462
0
    result = PyBytes_FromStringAndSize(p, len);
463
0
    PyMem_Free(p);
464
0
    *p_result = result;
465
0
    return result != NULL ? str : NULL;
466
0
}
467
468
static PyObject *
469
formatlong(PyObject *v, int flags, int prec, int type)
470
0
{
471
0
    PyObject *result, *iobj;
472
0
    if (PyLong_Check(v))
473
0
        return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
474
0
    if (PyNumber_Check(v)) {
475
        /* make sure number is a type of integer for o, x, and X */
476
0
        if (type == 'o' || type == 'x' || type == 'X')
477
0
            iobj = _PyNumber_Index(v);
478
0
        else
479
0
            iobj = PyNumber_Long(v);
480
0
        if (iobj != NULL) {
481
0
            assert(PyLong_Check(iobj));
482
0
            result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
483
0
            Py_DECREF(iobj);
484
0
            return result;
485
0
        }
486
0
        if (!PyErr_ExceptionMatches(PyExc_TypeError))
487
0
            return NULL;
488
0
    }
489
0
    PyErr_Format(PyExc_TypeError,
490
0
        "%%%c format: %s is required, not %.200s", type,
491
0
        (type == 'o' || type == 'x' || type == 'X') ? "an integer"
492
0
                                                    : "a real number",
493
0
        Py_TYPE(v)->tp_name);
494
0
    return NULL;
495
0
}
496
497
static int
498
byte_converter(PyObject *arg, char *p)
499
0
{
500
0
    if (PyBytes_Check(arg)) {
501
0
        if (PyBytes_GET_SIZE(arg) != 1) {
502
0
            PyErr_Format(PyExc_TypeError,
503
0
                         "%%c requires an integer in range(256) or "
504
0
                         "a single byte, not a bytes object of length %zd",
505
0
                         PyBytes_GET_SIZE(arg));
506
0
            return 0;
507
0
        }
508
0
        *p = PyBytes_AS_STRING(arg)[0];
509
0
        return 1;
510
0
    }
511
0
    else if (PyByteArray_Check(arg)) {
512
0
        if (PyByteArray_GET_SIZE(arg) != 1) {
513
0
            PyErr_Format(PyExc_TypeError,
514
0
                         "%%c requires an integer in range(256) or "
515
0
                         "a single byte, not a bytearray object of length %zd",
516
0
                         PyByteArray_GET_SIZE(arg));
517
0
            return 0;
518
0
        }
519
0
        *p = PyByteArray_AS_STRING(arg)[0];
520
0
        return 1;
521
0
    }
522
0
    else if (PyIndex_Check(arg)) {
523
0
        int overflow;
524
0
        long ival = PyLong_AsLongAndOverflow(arg, &overflow);
525
0
        if (ival == -1 && PyErr_Occurred()) {
526
0
            return 0;
527
0
        }
528
0
        if (!(0 <= ival && ival <= 255)) {
529
            /* this includes an overflow in converting to C long */
530
0
            PyErr_SetString(PyExc_OverflowError,
531
0
                            "%c arg not in range(256)");
532
0
            return 0;
533
0
        }
534
0
        *p = (char)ival;
535
0
        return 1;
536
0
    }
537
0
    PyErr_Format(PyExc_TypeError,
538
0
        "%%c requires an integer in range(256) or a single byte, not %T",
539
0
        arg);
540
0
    return 0;
541
0
}
542
543
static PyObject *_PyBytes_FromBuffer(PyObject *x);
544
545
static PyObject *
546
format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
547
0
{
548
0
    PyObject *func, *result;
549
    /* is it a bytes object? */
550
0
    if (PyBytes_Check(v)) {
551
0
        *pbuf = PyBytes_AS_STRING(v);
552
0
        *plen = PyBytes_GET_SIZE(v);
553
0
        return Py_NewRef(v);
554
0
    }
555
0
    if (PyByteArray_Check(v)) {
556
0
        *pbuf = PyByteArray_AS_STRING(v);
557
0
        *plen = PyByteArray_GET_SIZE(v);
558
0
        return Py_NewRef(v);
559
0
    }
560
    /* does it support __bytes__? */
561
0
    func = _PyObject_LookupSpecial(v, &_Py_ID(__bytes__));
562
0
    if (func != NULL) {
563
0
        result = _PyObject_CallNoArgs(func);
564
0
        Py_DECREF(func);
565
0
        if (result == NULL)
566
0
            return NULL;
567
0
        if (!PyBytes_Check(result)) {
568
0
            PyErr_Format(PyExc_TypeError,
569
0
                         "__bytes__ returned non-bytes (type %.200s)",
570
0
                         Py_TYPE(result)->tp_name);
571
0
            Py_DECREF(result);
572
0
            return NULL;
573
0
        }
574
0
        *pbuf = PyBytes_AS_STRING(result);
575
0
        *plen = PyBytes_GET_SIZE(result);
576
0
        return result;
577
0
    }
578
    /* does it support buffer protocol? */
579
0
    if (PyObject_CheckBuffer(v)) {
580
        /* maybe we can avoid making a copy of the buffer object here? */
581
0
        result = _PyBytes_FromBuffer(v);
582
0
        if (result == NULL)
583
0
            return NULL;
584
0
        *pbuf = PyBytes_AS_STRING(result);
585
0
        *plen = PyBytes_GET_SIZE(result);
586
0
        return result;
587
0
    }
588
0
    PyErr_Format(PyExc_TypeError,
589
0
                 "%%b requires a bytes-like object, "
590
0
                 "or an object that implements __bytes__, not '%.100s'",
591
0
                 Py_TYPE(v)->tp_name);
592
0
    return NULL;
593
0
}
594
595
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
596
597
PyObject *
598
_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
599
                  PyObject *args, int use_bytearray)
600
0
{
601
0
    const char *fmt;
602
0
    char *res;
603
0
    Py_ssize_t arglen, argidx;
604
0
    Py_ssize_t fmtcnt;
605
0
    int args_owned = 0;
606
0
    PyObject *dict = NULL;
607
0
    _PyBytesWriter writer;
608
609
0
    if (args == NULL) {
610
0
        PyErr_BadInternalCall();
611
0
        return NULL;
612
0
    }
613
0
    fmt = format;
614
0
    fmtcnt = format_len;
615
616
0
    _PyBytesWriter_Init(&writer);
617
0
    writer.use_bytearray = use_bytearray;
618
619
0
    res = _PyBytesWriter_Alloc(&writer, fmtcnt);
620
0
    if (res == NULL)
621
0
        return NULL;
622
0
    if (!use_bytearray)
623
0
        writer.overallocate = 1;
624
625
0
    if (PyTuple_Check(args)) {
626
0
        arglen = PyTuple_GET_SIZE(args);
627
0
        argidx = 0;
628
0
    }
629
0
    else {
630
0
        arglen = -1;
631
0
        argidx = -2;
632
0
    }
633
0
    if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
634
0
        !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
635
0
        !PyByteArray_Check(args)) {
636
0
            dict = args;
637
0
    }
638
639
0
    while (--fmtcnt >= 0) {
640
0
        if (*fmt != '%') {
641
0
            Py_ssize_t len;
642
0
            char *pos;
643
644
0
            pos = (char *)memchr(fmt + 1, '%', fmtcnt);
645
0
            if (pos != NULL)
646
0
                len = pos - fmt;
647
0
            else
648
0
                len = fmtcnt + 1;
649
0
            assert(len != 0);
650
651
0
            memcpy(res, fmt, len);
652
0
            res += len;
653
0
            fmt += len;
654
0
            fmtcnt -= (len - 1);
655
0
        }
656
0
        else {
657
            /* Got a format specifier */
658
0
            int flags = 0;
659
0
            Py_ssize_t width = -1;
660
0
            int prec = -1;
661
0
            int c = '\0';
662
0
            int fill;
663
0
            PyObject *v = NULL;
664
0
            PyObject *temp = NULL;
665
0
            const char *pbuf = NULL;
666
0
            int sign;
667
0
            Py_ssize_t len = 0;
668
0
            char onechar; /* For byte_converter() */
669
0
            Py_ssize_t alloc;
670
671
0
            fmt++;
672
0
            if (*fmt == '%') {
673
0
                *res++ = '%';
674
0
                fmt++;
675
0
                fmtcnt--;
676
0
                continue;
677
0
            }
678
0
            if (*fmt == '(') {
679
0
                const char *keystart;
680
0
                Py_ssize_t keylen;
681
0
                PyObject *key;
682
0
                int pcount = 1;
683
684
0
                if (dict == NULL) {
685
0
                    PyErr_SetString(PyExc_TypeError,
686
0
                             "format requires a mapping");
687
0
                    goto error;
688
0
                }
689
0
                ++fmt;
690
0
                --fmtcnt;
691
0
                keystart = fmt;
692
                /* Skip over balanced parentheses */
693
0
                while (pcount > 0 && --fmtcnt >= 0) {
694
0
                    if (*fmt == ')')
695
0
                        --pcount;
696
0
                    else if (*fmt == '(')
697
0
                        ++pcount;
698
0
                    fmt++;
699
0
                }
700
0
                keylen = fmt - keystart - 1;
701
0
                if (fmtcnt < 0 || pcount > 0) {
702
0
                    PyErr_SetString(PyExc_ValueError,
703
0
                               "incomplete format key");
704
0
                    goto error;
705
0
                }
706
0
                key = PyBytes_FromStringAndSize(keystart,
707
0
                                                 keylen);
708
0
                if (key == NULL)
709
0
                    goto error;
710
0
                if (args_owned) {
711
0
                    Py_DECREF(args);
712
0
                    args_owned = 0;
713
0
                }
714
0
                args = PyObject_GetItem(dict, key);
715
0
                Py_DECREF(key);
716
0
                if (args == NULL) {
717
0
                    goto error;
718
0
                }
719
0
                args_owned = 1;
720
0
                arglen = -1;
721
0
                argidx = -2;
722
0
            }
723
724
            /* Parse flags. Example: "%+i" => flags=F_SIGN. */
725
0
            while (--fmtcnt >= 0) {
726
0
                switch (c = *fmt++) {
727
0
                case '-': flags |= F_LJUST; continue;
728
0
                case '+': flags |= F_SIGN; continue;
729
0
                case ' ': flags |= F_BLANK; continue;
730
0
                case '#': flags |= F_ALT; continue;
731
0
                case '0': flags |= F_ZERO; continue;
732
0
                }
733
0
                break;
734
0
            }
735
736
            /* Parse width. Example: "%10s" => width=10 */
737
0
            if (c == '*') {
738
0
                v = getnextarg(args, arglen, &argidx);
739
0
                if (v == NULL)
740
0
                    goto error;
741
0
                if (!PyLong_Check(v)) {
742
0
                    PyErr_SetString(PyExc_TypeError,
743
0
                                    "* wants int");
744
0
                    goto error;
745
0
                }
746
0
                width = PyLong_AsSsize_t(v);
747
0
                if (width == -1 && PyErr_Occurred())
748
0
                    goto error;
749
0
                if (width < 0) {
750
0
                    flags |= F_LJUST;
751
0
                    width = -width;
752
0
                }
753
0
                if (--fmtcnt >= 0)
754
0
                    c = *fmt++;
755
0
            }
756
0
            else if (c >= 0 && Py_ISDIGIT(c)) {
757
0
                width = c - '0';
758
0
                while (--fmtcnt >= 0) {
759
0
                    c = Py_CHARMASK(*fmt++);
760
0
                    if (!Py_ISDIGIT(c))
761
0
                        break;
762
0
                    if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
763
0
                        PyErr_SetString(
764
0
                            PyExc_ValueError,
765
0
                            "width too big");
766
0
                        goto error;
767
0
                    }
768
0
                    width = width*10 + (c - '0');
769
0
                }
770
0
            }
771
772
            /* Parse precision. Example: "%.3f" => prec=3 */
773
0
            if (c == '.') {
774
0
                prec = 0;
775
0
                if (--fmtcnt >= 0)
776
0
                    c = *fmt++;
777
0
                if (c == '*') {
778
0
                    v = getnextarg(args, arglen, &argidx);
779
0
                    if (v == NULL)
780
0
                        goto error;
781
0
                    if (!PyLong_Check(v)) {
782
0
                        PyErr_SetString(
783
0
                            PyExc_TypeError,
784
0
                            "* wants int");
785
0
                        goto error;
786
0
                    }
787
0
                    prec = PyLong_AsInt(v);
788
0
                    if (prec == -1 && PyErr_Occurred())
789
0
                        goto error;
790
0
                    if (prec < 0)
791
0
                        prec = 0;
792
0
                    if (--fmtcnt >= 0)
793
0
                        c = *fmt++;
794
0
                }
795
0
                else if (c >= 0 && Py_ISDIGIT(c)) {
796
0
                    prec = c - '0';
797
0
                    while (--fmtcnt >= 0) {
798
0
                        c = Py_CHARMASK(*fmt++);
799
0
                        if (!Py_ISDIGIT(c))
800
0
                            break;
801
0
                        if (prec > (INT_MAX - ((int)c - '0')) / 10) {
802
0
                            PyErr_SetString(
803
0
                                PyExc_ValueError,
804
0
                                "prec too big");
805
0
                            goto error;
806
0
                        }
807
0
                        prec = prec*10 + (c - '0');
808
0
                    }
809
0
                }
810
0
            } /* prec */
811
0
            if (fmtcnt >= 0) {
812
0
                if (c == 'h' || c == 'l' || c == 'L') {
813
0
                    if (--fmtcnt >= 0)
814
0
                        c = *fmt++;
815
0
                }
816
0
            }
817
0
            if (fmtcnt < 0) {
818
0
                PyErr_SetString(PyExc_ValueError,
819
0
                                "incomplete format");
820
0
                goto error;
821
0
            }
822
0
            v = getnextarg(args, arglen, &argidx);
823
0
            if (v == NULL)
824
0
                goto error;
825
826
0
            if (fmtcnt == 0) {
827
                /* last write: disable writer overallocation */
828
0
                writer.overallocate = 0;
829
0
            }
830
831
0
            sign = 0;
832
0
            fill = ' ';
833
0
            switch (c) {
834
0
            case 'r':
835
                // %r is only for 2/3 code; 3 only code should use %a
836
0
            case 'a':
837
0
                temp = PyObject_ASCII(v);
838
0
                if (temp == NULL)
839
0
                    goto error;
840
0
                assert(PyUnicode_IS_ASCII(temp));
841
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
842
0
                len = PyUnicode_GET_LENGTH(temp);
843
0
                if (prec >= 0 && len > prec)
844
0
                    len = prec;
845
0
                break;
846
847
0
            case 's':
848
                // %s is only for 2/3 code; 3 only code should use %b
849
0
            case 'b':
850
0
                temp = format_obj(v, &pbuf, &len);
851
0
                if (temp == NULL)
852
0
                    goto error;
853
0
                if (prec >= 0 && len > prec)
854
0
                    len = prec;
855
0
                break;
856
857
0
            case 'i':
858
0
            case 'd':
859
0
            case 'u':
860
0
            case 'o':
861
0
            case 'x':
862
0
            case 'X':
863
0
                if (PyLong_CheckExact(v)
864
0
                    && width == -1 && prec == -1
865
0
                    && !(flags & (F_SIGN | F_BLANK))
866
0
                    && c != 'X')
867
0
                {
868
                    /* Fast path */
869
0
                    int alternate = flags & F_ALT;
870
0
                    int base;
871
872
0
                    switch(c)
873
0
                    {
874
0
                        default:
875
0
                            Py_UNREACHABLE();
876
0
                        case 'd':
877
0
                        case 'i':
878
0
                        case 'u':
879
0
                            base = 10;
880
0
                            break;
881
0
                        case 'o':
882
0
                            base = 8;
883
0
                            break;
884
0
                        case 'x':
885
0
                        case 'X':
886
0
                            base = 16;
887
0
                            break;
888
0
                    }
889
890
                    /* Fast path */
891
0
                    writer.min_size -= 2; /* size preallocated for "%d" */
892
0
                    res = _PyLong_FormatBytesWriter(&writer, res,
893
0
                                                    v, base, alternate);
894
0
                    if (res == NULL)
895
0
                        goto error;
896
0
                    continue;
897
0
                }
898
899
0
                temp = formatlong(v, flags, prec, c);
900
0
                if (!temp)
901
0
                    goto error;
902
0
                assert(PyUnicode_IS_ASCII(temp));
903
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
904
0
                len = PyUnicode_GET_LENGTH(temp);
905
0
                sign = 1;
906
0
                if (flags & F_ZERO)
907
0
                    fill = '0';
908
0
                break;
909
910
0
            case 'e':
911
0
            case 'E':
912
0
            case 'f':
913
0
            case 'F':
914
0
            case 'g':
915
0
            case 'G':
916
0
                if (width == -1 && prec == -1
917
0
                    && !(flags & (F_SIGN | F_BLANK)))
918
0
                {
919
                    /* Fast path */
920
0
                    writer.min_size -= 2; /* size preallocated for "%f" */
921
0
                    res = formatfloat(v, flags, prec, c, NULL, &writer, res);
922
0
                    if (res == NULL)
923
0
                        goto error;
924
0
                    continue;
925
0
                }
926
927
0
                if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
928
0
                    goto error;
929
0
                pbuf = PyBytes_AS_STRING(temp);
930
0
                len = PyBytes_GET_SIZE(temp);
931
0
                sign = 1;
932
0
                if (flags & F_ZERO)
933
0
                    fill = '0';
934
0
                break;
935
936
0
            case 'c':
937
0
                pbuf = &onechar;
938
0
                len = byte_converter(v, &onechar);
939
0
                if (!len)
940
0
                    goto error;
941
0
                if (width == -1) {
942
                    /* Fast path */
943
0
                    *res++ = onechar;
944
0
                    continue;
945
0
                }
946
0
                break;
947
948
0
            default:
949
0
                PyErr_Format(PyExc_ValueError,
950
0
                  "unsupported format character '%c' (0x%x) "
951
0
                  "at index %zd",
952
0
                  c, c,
953
0
                  (Py_ssize_t)(fmt - 1 - format));
954
0
                goto error;
955
0
            }
956
957
0
            if (sign) {
958
0
                if (*pbuf == '-' || *pbuf == '+') {
959
0
                    sign = *pbuf++;
960
0
                    len--;
961
0
                }
962
0
                else if (flags & F_SIGN)
963
0
                    sign = '+';
964
0
                else if (flags & F_BLANK)
965
0
                    sign = ' ';
966
0
                else
967
0
                    sign = 0;
968
0
            }
969
0
            if (width < len)
970
0
                width = len;
971
972
0
            alloc = width;
973
0
            if (sign != 0 && len == width)
974
0
                alloc++;
975
            /* 2: size preallocated for %s */
976
0
            if (alloc > 2) {
977
0
                res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
978
0
                if (res == NULL)
979
0
                    goto error;
980
0
            }
981
#ifndef NDEBUG
982
            char *before = res;
983
#endif
984
985
            /* Write the sign if needed */
986
0
            if (sign) {
987
0
                if (fill != ' ')
988
0
                    *res++ = sign;
989
0
                if (width > len)
990
0
                    width--;
991
0
            }
992
993
            /* Write the numeric prefix for "x", "X" and "o" formats
994
               if the alternate form is used.
995
               For example, write "0x" for the "%#x" format. */
996
0
            if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
997
0
                assert(pbuf[0] == '0');
998
0
                assert(pbuf[1] == c);
999
0
                if (fill != ' ') {
1000
0
                    *res++ = *pbuf++;
1001
0
                    *res++ = *pbuf++;
1002
0
                }
1003
0
                width -= 2;
1004
0
                if (width < 0)
1005
0
                    width = 0;
1006
0
                len -= 2;
1007
0
            }
1008
1009
            /* Pad left with the fill character if needed */
1010
0
            if (width > len && !(flags & F_LJUST)) {
1011
0
                memset(res, fill, width - len);
1012
0
                res += (width - len);
1013
0
                width = len;
1014
0
            }
1015
1016
            /* If padding with spaces: write sign if needed and/or numeric
1017
               prefix if the alternate form is used */
1018
0
            if (fill == ' ') {
1019
0
                if (sign)
1020
0
                    *res++ = sign;
1021
0
                if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1022
0
                    assert(pbuf[0] == '0');
1023
0
                    assert(pbuf[1] == c);
1024
0
                    *res++ = *pbuf++;
1025
0
                    *res++ = *pbuf++;
1026
0
                }
1027
0
            }
1028
1029
            /* Copy bytes */
1030
0
            memcpy(res, pbuf, len);
1031
0
            res += len;
1032
1033
            /* Pad right with the fill character if needed */
1034
0
            if (width > len) {
1035
0
                memset(res, ' ', width - len);
1036
0
                res += (width - len);
1037
0
            }
1038
1039
0
            if (dict && (argidx < arglen)) {
1040
0
                PyErr_SetString(PyExc_TypeError,
1041
0
                           "not all arguments converted during bytes formatting");
1042
0
                Py_XDECREF(temp);
1043
0
                goto error;
1044
0
            }
1045
0
            Py_XDECREF(temp);
1046
1047
#ifndef NDEBUG
1048
            /* check that we computed the exact size for this write */
1049
            assert((res - before) == alloc);
1050
#endif
1051
0
        } /* '%' */
1052
1053
        /* If overallocation was disabled, ensure that it was the last
1054
           write. Otherwise, we missed an optimization */
1055
0
        assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
1056
0
    } /* until end */
1057
1058
0
    if (argidx < arglen && !dict) {
1059
0
        PyErr_SetString(PyExc_TypeError,
1060
0
                        "not all arguments converted during bytes formatting");
1061
0
        goto error;
1062
0
    }
1063
1064
0
    if (args_owned) {
1065
0
        Py_DECREF(args);
1066
0
    }
1067
0
    return _PyBytesWriter_Finish(&writer, res);
1068
1069
0
 error:
1070
0
    _PyBytesWriter_Dealloc(&writer);
1071
0
    if (args_owned) {
1072
0
        Py_DECREF(args);
1073
0
    }
1074
0
    return NULL;
1075
0
}
1076
1077
/* Unescape a backslash-escaped string. */
1078
PyObject *_PyBytes_DecodeEscape2(const char *s,
1079
                                Py_ssize_t len,
1080
                                const char *errors,
1081
                                int *first_invalid_escape_char,
1082
                                const char **first_invalid_escape_ptr)
1083
2.88k
{
1084
2.88k
    int c;
1085
2.88k
    char *p;
1086
2.88k
    const char *end;
1087
2.88k
    _PyBytesWriter writer;
1088
1089
2.88k
    _PyBytesWriter_Init(&writer);
1090
1091
2.88k
    p = _PyBytesWriter_Alloc(&writer, len);
1092
2.88k
    if (p == NULL)
1093
0
        return NULL;
1094
2.88k
    writer.overallocate = 1;
1095
1096
2.88k
    *first_invalid_escape_char = -1;
1097
2.88k
    *first_invalid_escape_ptr = NULL;
1098
1099
2.88k
    end = s + len;
1100
49.5k
    while (s < end) {
1101
46.6k
        if (*s != '\\') {
1102
35.7k
            *p++ = *s++;
1103
35.7k
            continue;
1104
35.7k
        }
1105
1106
10.9k
        s++;
1107
10.9k
        if (s == end) {
1108
0
            PyErr_SetString(PyExc_ValueError,
1109
0
                            "Trailing \\ in string");
1110
0
            goto failed;
1111
0
        }
1112
1113
10.9k
        switch (*s++) {
1114
        /* XXX This assumes ASCII! */
1115
1.30k
        case '\n': break;
1116
631
        case '\\': *p++ = '\\'; break;
1117
235
        case '\'': *p++ = '\''; break;
1118
961
        case '\"': *p++ = '\"'; break;
1119
215
        case 'b': *p++ = '\b'; break;
1120
230
        case 'f': *p++ = '\014'; break; /* FF */
1121
233
        case 't': *p++ = '\t'; break;
1122
267
        case 'n': *p++ = '\n'; break;
1123
470
        case 'r': *p++ = '\r'; break;
1124
230
        case 'v': *p++ = '\013'; break; /* VT */
1125
212
        case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1126
2.52k
        case '0': case '1': case '2': case '3':
1127
4.36k
        case '4': case '5': case '6': case '7':
1128
4.36k
            c = s[-1] - '0';
1129
4.36k
            if (s < end && '0' <= *s && *s <= '7') {
1130
2.32k
                c = (c<<3) + *s++ - '0';
1131
2.32k
                if (s < end && '0' <= *s && *s <= '7')
1132
1.27k
                    c = (c<<3) + *s++ - '0';
1133
2.32k
            }
1134
4.36k
            if (c > 0377) {
1135
630
                if (*first_invalid_escape_char == -1) {
1136
106
                    *first_invalid_escape_char = c;
1137
                    /* Back up 3 chars, since we've already incremented s. */
1138
106
                    *first_invalid_escape_ptr = s - 3;
1139
106
                }
1140
630
            }
1141
4.36k
            *p++ = c;
1142
4.36k
            break;
1143
235
        case 'x':
1144
235
            if (s+1 < end) {
1145
234
                int digit1, digit2;
1146
234
                digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1147
234
                digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1148
234
                if (digit1 < 16 && digit2 < 16) {
1149
230
                    *p++ = (unsigned char)((digit1 << 4) + digit2);
1150
230
                    s += 2;
1151
230
                    break;
1152
230
                }
1153
234
            }
1154
            /* invalid hexadecimal digits */
1155
1156
5
            if (!errors || strcmp(errors, "strict") == 0) {
1157
5
                PyErr_Format(PyExc_ValueError,
1158
5
                             "invalid \\x escape at position %zd",
1159
5
                             s - 2 - (end - len));
1160
5
                goto failed;
1161
5
            }
1162
0
            if (strcmp(errors, "replace") == 0) {
1163
0
                *p++ = '?';
1164
0
            } else if (strcmp(errors, "ignore") == 0)
1165
0
                /* do nothing */;
1166
0
            else {
1167
0
                PyErr_Format(PyExc_ValueError,
1168
0
                             "decoding error; unknown "
1169
0
                             "error handling code: %.400s",
1170
0
                             errors);
1171
0
                goto failed;
1172
0
            }
1173
            /* skip \x */
1174
0
            if (s < end && Py_ISXDIGIT(s[0]))
1175
0
                s++; /* and a hexdigit */
1176
0
            break;
1177
1178
1.34k
        default:
1179
1.34k
            if (*first_invalid_escape_char == -1) {
1180
578
                *first_invalid_escape_char = (unsigned char)s[-1];
1181
                /* Back up one char, since we've already incremented s. */
1182
578
                *first_invalid_escape_ptr = s - 1;
1183
578
            }
1184
1.34k
            *p++ = '\\';
1185
1.34k
            s--;
1186
10.9k
        }
1187
10.9k
    }
1188
1189
2.88k
    return _PyBytesWriter_Finish(&writer, p);
1190
1191
5
  failed:
1192
5
    _PyBytesWriter_Dealloc(&writer);
1193
5
    return NULL;
1194
2.88k
}
1195
1196
PyObject *PyBytes_DecodeEscape(const char *s,
1197
                                Py_ssize_t len,
1198
                                const char *errors,
1199
                                Py_ssize_t Py_UNUSED(unicode),
1200
                                const char *Py_UNUSED(recode_encoding))
1201
0
{
1202
0
    int first_invalid_escape_char;
1203
0
    const char *first_invalid_escape_ptr;
1204
0
    PyObject *result = _PyBytes_DecodeEscape2(s, len, errors,
1205
0
                                             &first_invalid_escape_char,
1206
0
                                             &first_invalid_escape_ptr);
1207
0
    if (result == NULL)
1208
0
        return NULL;
1209
0
    if (first_invalid_escape_char != -1) {
1210
0
        if (first_invalid_escape_char > 0xff) {
1211
0
            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1212
0
                                 "b\"\\%o\" is an invalid octal escape sequence. "
1213
0
                                 "Such sequences will not work in the future. ",
1214
0
                                 first_invalid_escape_char) < 0)
1215
0
            {
1216
0
                Py_DECREF(result);
1217
0
                return NULL;
1218
0
            }
1219
0
        }
1220
0
        else {
1221
0
            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1222
0
                                 "b\"\\%c\" is an invalid escape sequence. "
1223
0
                                 "Such sequences will not work in the future. ",
1224
0
                                 first_invalid_escape_char) < 0)
1225
0
            {
1226
0
                Py_DECREF(result);
1227
0
                return NULL;
1228
0
            }
1229
0
        }
1230
0
    }
1231
0
    return result;
1232
0
}
1233
/* -------------------------------------------------------------------- */
1234
/* object api */
1235
1236
Py_ssize_t
1237
PyBytes_Size(PyObject *op)
1238
5.34k
{
1239
5.34k
    if (!PyBytes_Check(op)) {
1240
0
        PyErr_Format(PyExc_TypeError,
1241
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1242
0
        return -1;
1243
0
    }
1244
5.34k
    return Py_SIZE(op);
1245
5.34k
}
1246
1247
char *
1248
PyBytes_AsString(PyObject *op)
1249
3.15M
{
1250
3.15M
    if (!PyBytes_Check(op)) {
1251
0
        PyErr_Format(PyExc_TypeError,
1252
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1253
0
        return NULL;
1254
0
    }
1255
3.15M
    return ((PyBytesObject *)op)->ob_sval;
1256
3.15M
}
1257
1258
int
1259
PyBytes_AsStringAndSize(PyObject *obj,
1260
                         char **s,
1261
                         Py_ssize_t *len)
1262
86.0k
{
1263
86.0k
    if (s == NULL) {
1264
0
        PyErr_BadInternalCall();
1265
0
        return -1;
1266
0
    }
1267
1268
86.0k
    if (!PyBytes_Check(obj)) {
1269
0
        PyErr_Format(PyExc_TypeError,
1270
0
             "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1271
0
        return -1;
1272
0
    }
1273
1274
86.0k
    *s = PyBytes_AS_STRING(obj);
1275
86.0k
    if (len != NULL)
1276
86.0k
        *len = PyBytes_GET_SIZE(obj);
1277
0
    else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1278
0
        PyErr_SetString(PyExc_ValueError,
1279
0
                        "embedded null byte");
1280
0
        return -1;
1281
0
    }
1282
86.0k
    return 0;
1283
86.0k
}
1284
1285
/* -------------------------------------------------------------------- */
1286
/* Methods */
1287
1288
0
#define STRINGLIB_GET_EMPTY() bytes_get_empty()
1289
1290
#include "stringlib/stringdefs.h"
1291
#define STRINGLIB_MUTABLE 0
1292
1293
#include "stringlib/fastsearch.h"
1294
#include "stringlib/count.h"
1295
#include "stringlib/find.h"
1296
#include "stringlib/join.h"
1297
#include "stringlib/partition.h"
1298
#include "stringlib/split.h"
1299
#include "stringlib/ctype.h"
1300
1301
#include "stringlib/transmogrify.h"
1302
1303
#undef STRINGLIB_GET_EMPTY
1304
1305
Py_ssize_t
1306
_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
1307
              const char *needle, Py_ssize_t len_needle,
1308
              Py_ssize_t offset)
1309
0
{
1310
0
    assert(len_haystack >= 0);
1311
0
    assert(len_needle >= 0);
1312
    // Extra checks because stringlib_find accesses haystack[len_haystack].
1313
0
    if (len_needle == 0) {
1314
0
        return offset;
1315
0
    }
1316
0
    if (len_needle > len_haystack) {
1317
0
        return -1;
1318
0
    }
1319
0
    assert(len_haystack >= 1);
1320
0
    Py_ssize_t res = stringlib_find(haystack, len_haystack - 1,
1321
0
                                    needle, len_needle, offset);
1322
0
    if (res == -1) {
1323
0
        Py_ssize_t last_align = len_haystack - len_needle;
1324
0
        if (memcmp(haystack + last_align, needle, len_needle) == 0) {
1325
0
            return offset + last_align;
1326
0
        }
1327
0
    }
1328
0
    return res;
1329
0
}
1330
1331
Py_ssize_t
1332
_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
1333
                     const char *needle, Py_ssize_t len_needle,
1334
                     Py_ssize_t offset)
1335
0
{
1336
0
    return stringlib_rfind(haystack, len_haystack,
1337
0
                           needle, len_needle, offset);
1338
0
}
1339
1340
PyObject *
1341
PyBytes_Repr(PyObject *obj, int smartquotes)
1342
2.60k
{
1343
2.60k
    PyBytesObject* op = (PyBytesObject*) obj;
1344
2.60k
    Py_ssize_t i, length = Py_SIZE(op);
1345
2.60k
    Py_ssize_t newsize, squotes, dquotes;
1346
2.60k
    PyObject *v;
1347
2.60k
    unsigned char quote;
1348
2.60k
    const unsigned char *s;
1349
2.60k
    Py_UCS1 *p;
1350
1351
    /* Compute size of output string */
1352
2.60k
    squotes = dquotes = 0;
1353
2.60k
    newsize = 3; /* b'' */
1354
2.60k
    s = (const unsigned char*)op->ob_sval;
1355
37.3k
    for (i = 0; i < length; i++) {
1356
34.7k
        Py_ssize_t incr = 1;
1357
34.7k
        switch(s[i]) {
1358
1.08k
        case '\'': squotes++; break;
1359
882
        case '"':  dquotes++; break;
1360
1.26k
        case '\\': case '\t': case '\n': case '\r':
1361
1.26k
            incr = 2; break; /* \C */
1362
31.5k
        default:
1363
31.5k
            if (s[i] < ' ' || s[i] >= 0x7f)
1364
2.47k
                incr = 4; /* \xHH */
1365
34.7k
        }
1366
34.7k
        if (newsize > PY_SSIZE_T_MAX - incr)
1367
0
            goto overflow;
1368
34.7k
        newsize += incr;
1369
34.7k
    }
1370
2.60k
    quote = '\'';
1371
2.60k
    if (smartquotes && squotes && !dquotes)
1372
89
        quote = '"';
1373
2.60k
    if (squotes && quote == '\'') {
1374
518
        if (newsize > PY_SSIZE_T_MAX - squotes)
1375
0
            goto overflow;
1376
518
        newsize += squotes;
1377
518
    }
1378
1379
2.60k
    v = PyUnicode_New(newsize, 127);
1380
2.60k
    if (v == NULL) {
1381
0
        return NULL;
1382
0
    }
1383
2.60k
    p = PyUnicode_1BYTE_DATA(v);
1384
1385
2.60k
    *p++ = 'b', *p++ = quote;
1386
37.3k
    for (i = 0; i < length; i++) {
1387
34.7k
        unsigned char c = op->ob_sval[i];
1388
34.7k
        if (c == quote || c == '\\')
1389
1.25k
            *p++ = '\\', *p++ = c;
1390
33.4k
        else if (c == '\t')
1391
247
            *p++ = '\\', *p++ = 't';
1392
33.2k
        else if (c == '\n')
1393
256
            *p++ = '\\', *p++ = 'n';
1394
32.9k
        else if (c == '\r')
1395
291
            *p++ = '\\', *p++ = 'r';
1396
32.6k
        else if (c < ' ' || c >= 0x7f) {
1397
2.47k
            *p++ = '\\';
1398
2.47k
            *p++ = 'x';
1399
2.47k
            *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1400
2.47k
            *p++ = Py_hexdigits[c & 0xf];
1401
2.47k
        }
1402
30.2k
        else
1403
30.2k
            *p++ = c;
1404
34.7k
    }
1405
2.60k
    *p++ = quote;
1406
2.60k
    assert(_PyUnicode_CheckConsistency(v, 1));
1407
2.60k
    return v;
1408
1409
0
  overflow:
1410
0
    PyErr_SetString(PyExc_OverflowError,
1411
0
                    "bytes object is too large to make repr");
1412
0
    return NULL;
1413
2.60k
}
1414
1415
static PyObject *
1416
bytes_repr(PyObject *op)
1417
2.60k
{
1418
2.60k
    return PyBytes_Repr(op, 1);
1419
2.60k
}
1420
1421
static PyObject *
1422
bytes_str(PyObject *op)
1423
0
{
1424
0
    if (_Py_GetConfig()->bytes_warning) {
1425
0
        if (PyErr_WarnEx(PyExc_BytesWarning,
1426
0
                         "str() on a bytes instance", 1)) {
1427
0
            return NULL;
1428
0
        }
1429
0
    }
1430
0
    return bytes_repr(op);
1431
0
}
1432
1433
static Py_ssize_t
1434
bytes_length(PyObject *self)
1435
1.60M
{
1436
1.60M
    PyBytesObject *a = _PyBytes_CAST(self);
1437
1.60M
    return Py_SIZE(a);
1438
1.60M
}
1439
1440
/* This is also used by PyBytes_Concat() */
1441
static PyObject *
1442
bytes_concat(PyObject *a, PyObject *b)
1443
64.1k
{
1444
64.1k
    Py_buffer va, vb;
1445
64.1k
    PyObject *result = NULL;
1446
1447
64.1k
    va.len = -1;
1448
64.1k
    vb.len = -1;
1449
64.1k
    if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1450
64.1k
        PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1451
0
        PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1452
0
                     Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1453
0
        goto done;
1454
0
    }
1455
1456
    /* Optimize end cases */
1457
64.1k
    if (va.len == 0 && PyBytes_CheckExact(b)) {
1458
1.51k
        result = Py_NewRef(b);
1459
1.51k
        goto done;
1460
1.51k
    }
1461
62.6k
    if (vb.len == 0 && PyBytes_CheckExact(a)) {
1462
12.5k
        result = Py_NewRef(a);
1463
12.5k
        goto done;
1464
12.5k
    }
1465
1466
50.0k
    if (va.len > PY_SSIZE_T_MAX - vb.len) {
1467
0
        PyErr_NoMemory();
1468
0
        goto done;
1469
0
    }
1470
1471
50.0k
    result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1472
50.0k
    if (result != NULL) {
1473
50.0k
        memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1474
50.0k
        memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1475
50.0k
    }
1476
1477
64.1k
  done:
1478
64.1k
    if (va.len != -1)
1479
64.1k
        PyBuffer_Release(&va);
1480
64.1k
    if (vb.len != -1)
1481
64.1k
        PyBuffer_Release(&vb);
1482
64.1k
    return result;
1483
50.0k
}
1484
1485
static PyObject *
1486
bytes_repeat(PyObject *self, Py_ssize_t n)
1487
16
{
1488
16
    PyBytesObject *a = _PyBytes_CAST(self);
1489
16
    if (n < 0)
1490
0
        n = 0;
1491
    /* watch out for overflows:  the size can overflow int,
1492
     * and the # of bytes needed can overflow size_t
1493
     */
1494
16
    if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1495
0
        PyErr_SetString(PyExc_OverflowError,
1496
0
            "repeated bytes are too long");
1497
0
        return NULL;
1498
0
    }
1499
16
    Py_ssize_t size = Py_SIZE(a) * n;
1500
16
    if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1501
0
        return Py_NewRef(a);
1502
0
    }
1503
16
    size_t nbytes = (size_t)size;
1504
16
    if (nbytes + PyBytesObject_SIZE <= nbytes) {
1505
0
        PyErr_SetString(PyExc_OverflowError,
1506
0
            "repeated bytes are too long");
1507
0
        return NULL;
1508
0
    }
1509
16
    PyBytesObject *op = PyObject_Malloc(PyBytesObject_SIZE + nbytes);
1510
16
    if (op == NULL) {
1511
0
        return PyErr_NoMemory();
1512
0
    }
1513
16
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
1514
16
    set_ob_shash(op, -1);
1515
16
    op->ob_sval[size] = '\0';
1516
1517
16
    _PyBytes_Repeat(op->ob_sval, size, a->ob_sval, Py_SIZE(a));
1518
1519
16
    return (PyObject *) op;
1520
16
}
1521
1522
static int
1523
bytes_contains(PyObject *self, PyObject *arg)
1524
2.94k
{
1525
2.94k
    return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1526
2.94k
}
1527
1528
static PyObject *
1529
bytes_item(PyObject *self, Py_ssize_t i)
1530
0
{
1531
0
    PyBytesObject *a = _PyBytes_CAST(self);
1532
0
    if (i < 0 || i >= Py_SIZE(a)) {
1533
0
        PyErr_SetString(PyExc_IndexError, "index out of range");
1534
0
        return NULL;
1535
0
    }
1536
0
    return _PyLong_FromUnsignedChar((unsigned char)a->ob_sval[i]);
1537
0
}
1538
1539
static int
1540
bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1541
430k
{
1542
430k
    int cmp;
1543
430k
    Py_ssize_t len;
1544
1545
430k
    len = Py_SIZE(a);
1546
430k
    if (Py_SIZE(b) != len)
1547
339k
        return 0;
1548
1549
90.6k
    if (a->ob_sval[0] != b->ob_sval[0])
1550
10.1k
        return 0;
1551
1552
80.5k
    cmp = memcmp(a->ob_sval, b->ob_sval, len);
1553
80.5k
    return (cmp == 0);
1554
90.6k
}
1555
1556
static PyObject*
1557
bytes_richcompare(PyObject *aa, PyObject *bb, int op)
1558
430k
{
1559
    /* Make sure both arguments are strings. */
1560
430k
    if (!(PyBytes_Check(aa) && PyBytes_Check(bb))) {
1561
0
        if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1562
0
            if (PyUnicode_Check(aa) || PyUnicode_Check(bb)) {
1563
0
                if (PyErr_WarnEx(PyExc_BytesWarning,
1564
0
                                 "Comparison between bytes and string", 1))
1565
0
                    return NULL;
1566
0
            }
1567
0
            if (PyLong_Check(aa) || PyLong_Check(bb)) {
1568
0
                if (PyErr_WarnEx(PyExc_BytesWarning,
1569
0
                                 "Comparison between bytes and int", 1))
1570
0
                    return NULL;
1571
0
            }
1572
0
        }
1573
0
        Py_RETURN_NOTIMPLEMENTED;
1574
0
    }
1575
1576
430k
    PyBytesObject *a = _PyBytes_CAST(aa);
1577
430k
    PyBytesObject *b = _PyBytes_CAST(bb);
1578
430k
    if (a == b) {
1579
0
        switch (op) {
1580
0
        case Py_EQ:
1581
0
        case Py_LE:
1582
0
        case Py_GE:
1583
            /* a byte string is equal to itself */
1584
0
            Py_RETURN_TRUE;
1585
0
        case Py_NE:
1586
0
        case Py_LT:
1587
0
        case Py_GT:
1588
0
            Py_RETURN_FALSE;
1589
0
        default:
1590
0
            PyErr_BadArgument();
1591
0
            return NULL;
1592
0
        }
1593
0
    }
1594
430k
    else if (op == Py_EQ || op == Py_NE) {
1595
430k
        int eq = bytes_compare_eq(a, b);
1596
430k
        eq ^= (op == Py_NE);
1597
430k
        return PyBool_FromLong(eq);
1598
430k
    }
1599
133
    else {
1600
133
        Py_ssize_t len_a = Py_SIZE(a);
1601
133
        Py_ssize_t len_b = Py_SIZE(b);
1602
133
        Py_ssize_t min_len = Py_MIN(len_a, len_b);
1603
133
        int c;
1604
133
        if (min_len > 0) {
1605
133
            c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1606
133
            if (c == 0)
1607
133
                c = memcmp(a->ob_sval, b->ob_sval, min_len);
1608
133
        }
1609
0
        else {
1610
0
            c = 0;
1611
0
        }
1612
133
        if (c != 0) {
1613
133
            Py_RETURN_RICHCOMPARE(c, 0, op);
1614
133
        }
1615
0
        Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1616
0
    }
1617
430k
}
1618
1619
static Py_hash_t
1620
bytes_hash(PyObject *self)
1621
4.21M
{
1622
4.21M
    PyBytesObject *a = _PyBytes_CAST(self);
1623
4.21M
    Py_hash_t hash = get_ob_shash(a);
1624
4.21M
    if (hash == -1) {
1625
        /* Can't fail */
1626
137k
        hash = Py_HashBuffer(a->ob_sval, Py_SIZE(a));
1627
137k
        set_ob_shash(a, hash);
1628
137k
    }
1629
4.21M
    return hash;
1630
4.21M
}
1631
1632
static PyObject*
1633
bytes_subscript(PyObject *op, PyObject* item)
1634
5.11M
{
1635
5.11M
    PyBytesObject *self = _PyBytes_CAST(op);
1636
5.11M
    if (_PyIndex_Check(item)) {
1637
844k
        Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1638
844k
        if (i == -1 && PyErr_Occurred())
1639
0
            return NULL;
1640
844k
        if (i < 0)
1641
0
            i += PyBytes_GET_SIZE(self);
1642
844k
        if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1643
55
            PyErr_SetString(PyExc_IndexError,
1644
55
                            "index out of range");
1645
55
            return NULL;
1646
55
        }
1647
844k
        return _PyLong_FromUnsignedChar((unsigned char)self->ob_sval[i]);
1648
844k
    }
1649
4.27M
    else if (PySlice_Check(item)) {
1650
4.27M
        Py_ssize_t start, stop, step, slicelength, i;
1651
4.27M
        size_t cur;
1652
4.27M
        const char* source_buf;
1653
4.27M
        char* result_buf;
1654
4.27M
        PyObject* result;
1655
1656
4.27M
        if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1657
0
            return NULL;
1658
0
        }
1659
4.27M
        slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1660
4.27M
                                            &stop, step);
1661
1662
4.27M
        if (slicelength <= 0) {
1663
4.01M
            return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
1664
4.01M
        }
1665
255k
        else if (start == 0 && step == 1 &&
1666
255k
                 slicelength == PyBytes_GET_SIZE(self) &&
1667
255k
                 PyBytes_CheckExact(self)) {
1668
88.2k
            return Py_NewRef(self);
1669
88.2k
        }
1670
167k
        else if (step == 1) {
1671
167k
            return PyBytes_FromStringAndSize(
1672
167k
                PyBytes_AS_STRING(self) + start,
1673
167k
                slicelength);
1674
167k
        }
1675
0
        else {
1676
0
            source_buf = PyBytes_AS_STRING(self);
1677
0
            result = PyBytes_FromStringAndSize(NULL, slicelength);
1678
0
            if (result == NULL)
1679
0
                return NULL;
1680
1681
0
            result_buf = PyBytes_AS_STRING(result);
1682
0
            for (cur = start, i = 0; i < slicelength;
1683
0
                 cur += step, i++) {
1684
0
                result_buf[i] = source_buf[cur];
1685
0
            }
1686
1687
0
            return result;
1688
0
        }
1689
4.27M
    }
1690
0
    else {
1691
0
        PyErr_Format(PyExc_TypeError,
1692
0
                     "byte indices must be integers or slices, not %.200s",
1693
0
                     Py_TYPE(item)->tp_name);
1694
0
        return NULL;
1695
0
    }
1696
5.11M
}
1697
1698
static int
1699
bytes_buffer_getbuffer(PyObject *op, Py_buffer *view, int flags)
1700
11.9M
{
1701
11.9M
    PyBytesObject *self = _PyBytes_CAST(op);
1702
11.9M
    return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1703
11.9M
                             1, flags);
1704
11.9M
}
1705
1706
static PySequenceMethods bytes_as_sequence = {
1707
    bytes_length,       /*sq_length*/
1708
    bytes_concat,       /*sq_concat*/
1709
    bytes_repeat,       /*sq_repeat*/
1710
    bytes_item,         /*sq_item*/
1711
    0,                  /*sq_slice*/
1712
    0,                  /*sq_ass_item*/
1713
    0,                  /*sq_ass_slice*/
1714
    bytes_contains      /*sq_contains*/
1715
};
1716
1717
static PyMappingMethods bytes_as_mapping = {
1718
    bytes_length,
1719
    bytes_subscript,
1720
    0,
1721
};
1722
1723
static PyBufferProcs bytes_as_buffer = {
1724
    bytes_buffer_getbuffer,
1725
    NULL,
1726
};
1727
1728
1729
/*[clinic input]
1730
bytes.__bytes__
1731
Convert this value to exact type bytes.
1732
[clinic start generated code]*/
1733
1734
static PyObject *
1735
bytes___bytes___impl(PyBytesObject *self)
1736
/*[clinic end generated code: output=63a306a9bc0caac5 input=34ec5ddba98bd6bb]*/
1737
69.9k
{
1738
69.9k
    if (PyBytes_CheckExact(self)) {
1739
69.9k
        return Py_NewRef(self);
1740
69.9k
    }
1741
0
    else {
1742
0
        return PyBytes_FromStringAndSize(self->ob_sval, Py_SIZE(self));
1743
0
    }
1744
69.9k
}
1745
1746
1747
0
#define LEFTSTRIP 0
1748
0
#define RIGHTSTRIP 1
1749
0
#define BOTHSTRIP 2
1750
1751
/*[clinic input]
1752
bytes.split
1753
1754
    sep: object = None
1755
        The delimiter according which to split the bytes.
1756
        None (the default value) means split on ASCII whitespace characters
1757
        (space, tab, return, newline, formfeed, vertical tab).
1758
    maxsplit: Py_ssize_t = -1
1759
        Maximum number of splits to do.
1760
        -1 (the default value) means no limit.
1761
1762
Return a list of the sections in the bytes, using sep as the delimiter.
1763
[clinic start generated code]*/
1764
1765
static PyObject *
1766
bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1767
/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1768
2.50M
{
1769
2.50M
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1770
2.50M
    const char *s = PyBytes_AS_STRING(self), *sub;
1771
2.50M
    Py_buffer vsub;
1772
2.50M
    PyObject *list;
1773
1774
2.50M
    if (maxsplit < 0)
1775
2.50M
        maxsplit = PY_SSIZE_T_MAX;
1776
2.50M
    if (sep == Py_None)
1777
0
        return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1778
2.50M
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1779
0
        return NULL;
1780
2.50M
    sub = vsub.buf;
1781
2.50M
    n = vsub.len;
1782
1783
2.50M
    list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1784
2.50M
    PyBuffer_Release(&vsub);
1785
2.50M
    return list;
1786
2.50M
}
1787
1788
/*[clinic input]
1789
bytes.partition
1790
1791
    sep: Py_buffer
1792
    /
1793
1794
Partition the bytes into three parts using the given separator.
1795
1796
This will search for the separator sep in the bytes. If the separator is found,
1797
returns a 3-tuple containing the part before the separator, the separator
1798
itself, and the part after it.
1799
1800
If the separator is not found, returns a 3-tuple containing the original bytes
1801
object and two empty bytes objects.
1802
[clinic start generated code]*/
1803
1804
static PyObject *
1805
bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1806
/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
1807
0
{
1808
0
    return stringlib_partition(
1809
0
        (PyObject*) self,
1810
0
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1811
0
        sep->obj, (const char *)sep->buf, sep->len
1812
0
        );
1813
0
}
1814
1815
/*[clinic input]
1816
bytes.rpartition
1817
1818
    sep: Py_buffer
1819
    /
1820
1821
Partition the bytes into three parts using the given separator.
1822
1823
This will search for the separator sep in the bytes, starting at the end. If
1824
the separator is found, returns a 3-tuple containing the part before the
1825
separator, the separator itself, and the part after it.
1826
1827
If the separator is not found, returns a 3-tuple containing two empty bytes
1828
objects and the original bytes object.
1829
[clinic start generated code]*/
1830
1831
static PyObject *
1832
bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1833
/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
1834
0
{
1835
0
    return stringlib_rpartition(
1836
0
        (PyObject*) self,
1837
0
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1838
0
        sep->obj, (const char *)sep->buf, sep->len
1839
0
        );
1840
0
}
1841
1842
/*[clinic input]
1843
bytes.rsplit = bytes.split
1844
1845
Return a list of the sections in the bytes, using sep as the delimiter.
1846
1847
Splitting is done starting at the end of the bytes and working to the front.
1848
[clinic start generated code]*/
1849
1850
static PyObject *
1851
bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1852
/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
1853
0
{
1854
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1855
0
    const char *s = PyBytes_AS_STRING(self), *sub;
1856
0
    Py_buffer vsub;
1857
0
    PyObject *list;
1858
1859
0
    if (maxsplit < 0)
1860
0
        maxsplit = PY_SSIZE_T_MAX;
1861
0
    if (sep == Py_None)
1862
0
        return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1863
0
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1864
0
        return NULL;
1865
0
    sub = vsub.buf;
1866
0
    n = vsub.len;
1867
1868
0
    list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1869
0
    PyBuffer_Release(&vsub);
1870
0
    return list;
1871
0
}
1872
1873
1874
/*[clinic input]
1875
bytes.join
1876
1877
    iterable_of_bytes: object
1878
    /
1879
1880
Concatenate any number of bytes objects.
1881
1882
The bytes whose method is called is inserted in between each pair.
1883
1884
The result is returned as a new bytes object.
1885
1886
Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1887
[clinic start generated code]*/
1888
1889
static PyObject *
1890
bytes_join_impl(PyBytesObject *self, PyObject *iterable_of_bytes)
1891
/*[clinic end generated code: output=0687abb94d7d438e input=7fe377b95bd549d2]*/
1892
8.93k
{
1893
8.93k
    return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1894
8.93k
}
1895
1896
PyObject *
1897
PyBytes_Join(PyObject *sep, PyObject *iterable)
1898
27.8k
{
1899
27.8k
    if (sep == NULL) {
1900
0
        PyErr_BadInternalCall();
1901
0
        return NULL;
1902
0
    }
1903
27.8k
    if (!PyBytes_Check(sep)) {
1904
0
        PyErr_Format(PyExc_TypeError,
1905
0
                     "sep: expected bytes, got %T", sep);
1906
0
        return NULL;
1907
0
    }
1908
1909
27.8k
    return stringlib_bytes_join(sep, iterable);
1910
27.8k
}
1911
1912
/*[clinic input]
1913
@text_signature "($self, sub[, start[, end]], /)"
1914
bytes.find
1915
1916
    sub: object
1917
    start: slice_index(accept={int, NoneType}, c_default='0') = None
1918
         Optional start position. Default: start of the bytes.
1919
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
1920
         Optional stop position. Default: end of the bytes.
1921
    /
1922
1923
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
1924
1925
Return -1 on failure.
1926
[clinic start generated code]*/
1927
1928
static PyObject *
1929
bytes_find_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
1930
                Py_ssize_t end)
1931
/*[clinic end generated code: output=d5961a1c77b472a1 input=3171e62a8ae7f240]*/
1932
0
{
1933
0
    return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1934
0
                          sub, start, end);
1935
0
}
1936
1937
/*[clinic input]
1938
bytes.index = bytes.find
1939
1940
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
1941
1942
Raise ValueError if the subsection is not found.
1943
[clinic start generated code]*/
1944
1945
static PyObject *
1946
bytes_index_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
1947
                 Py_ssize_t end)
1948
/*[clinic end generated code: output=0da25cc74683ba42 input=aa34ad71ba0bafe3]*/
1949
0
{
1950
0
    return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1951
0
                           sub, start, end);
1952
0
}
1953
1954
/*[clinic input]
1955
bytes.rfind = bytes.find
1956
1957
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
1958
1959
Return -1 on failure.
1960
[clinic start generated code]*/
1961
1962
static PyObject *
1963
bytes_rfind_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
1964
                 Py_ssize_t end)
1965
/*[clinic end generated code: output=51b60fa4ad011c09 input=864c3e7f3010b33c]*/
1966
18.3k
{
1967
18.3k
    return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1968
18.3k
                           sub, start, end);
1969
18.3k
}
1970
1971
/*[clinic input]
1972
bytes.rindex = bytes.find
1973
1974
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
1975
1976
Raise ValueError if the subsection is not found.
1977
[clinic start generated code]*/
1978
1979
static PyObject *
1980
bytes_rindex_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
1981
                  Py_ssize_t end)
1982
/*[clinic end generated code: output=42bf674e0a0aabf6 input=21051fc5cfeacf2c]*/
1983
0
{
1984
0
    return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1985
0
                            sub, start, end);
1986
0
}
1987
1988
1989
Py_LOCAL_INLINE(PyObject *)
1990
do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
1991
0
{
1992
0
    Py_buffer vsep;
1993
0
    const char *s = PyBytes_AS_STRING(self);
1994
0
    Py_ssize_t len = PyBytes_GET_SIZE(self);
1995
0
    char *sep;
1996
0
    Py_ssize_t seplen;
1997
0
    Py_ssize_t i, j;
1998
1999
0
    if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
2000
0
        return NULL;
2001
0
    sep = vsep.buf;
2002
0
    seplen = vsep.len;
2003
2004
0
    i = 0;
2005
0
    if (striptype != RIGHTSTRIP) {
2006
0
        while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2007
0
            i++;
2008
0
        }
2009
0
    }
2010
2011
0
    j = len;
2012
0
    if (striptype != LEFTSTRIP) {
2013
0
        do {
2014
0
            j--;
2015
0
        } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2016
0
        j++;
2017
0
    }
2018
2019
0
    PyBuffer_Release(&vsep);
2020
2021
0
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2022
0
        return Py_NewRef(self);
2023
0
    }
2024
0
    else
2025
0
        return PyBytes_FromStringAndSize(s+i, j-i);
2026
0
}
2027
2028
2029
Py_LOCAL_INLINE(PyObject *)
2030
do_strip(PyBytesObject *self, int striptype)
2031
0
{
2032
0
    const char *s = PyBytes_AS_STRING(self);
2033
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
2034
2035
0
    i = 0;
2036
0
    if (striptype != RIGHTSTRIP) {
2037
0
        while (i < len && Py_ISSPACE(s[i])) {
2038
0
            i++;
2039
0
        }
2040
0
    }
2041
2042
0
    j = len;
2043
0
    if (striptype != LEFTSTRIP) {
2044
0
        do {
2045
0
            j--;
2046
0
        } while (j >= i && Py_ISSPACE(s[j]));
2047
0
        j++;
2048
0
    }
2049
2050
0
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2051
0
        return Py_NewRef(self);
2052
0
    }
2053
0
    else
2054
0
        return PyBytes_FromStringAndSize(s+i, j-i);
2055
0
}
2056
2057
2058
Py_LOCAL_INLINE(PyObject *)
2059
do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
2060
0
{
2061
0
    if (bytes != Py_None) {
2062
0
        return do_xstrip(self, striptype, bytes);
2063
0
    }
2064
0
    return do_strip(self, striptype);
2065
0
}
2066
2067
/*[clinic input]
2068
bytes.strip
2069
2070
    bytes: object = None
2071
    /
2072
2073
Strip leading and trailing bytes contained in the argument.
2074
2075
If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2076
[clinic start generated code]*/
2077
2078
static PyObject *
2079
bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
2080
/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
2081
0
{
2082
0
    return do_argstrip(self, BOTHSTRIP, bytes);
2083
0
}
2084
2085
/*[clinic input]
2086
bytes.lstrip
2087
2088
    bytes: object = None
2089
    /
2090
2091
Strip leading bytes contained in the argument.
2092
2093
If the argument is omitted or None, strip leading  ASCII whitespace.
2094
[clinic start generated code]*/
2095
2096
static PyObject *
2097
bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2098
/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2099
0
{
2100
0
    return do_argstrip(self, LEFTSTRIP, bytes);
2101
0
}
2102
2103
/*[clinic input]
2104
bytes.rstrip
2105
2106
    bytes: object = None
2107
    /
2108
2109
Strip trailing bytes contained in the argument.
2110
2111
If the argument is omitted or None, strip trailing ASCII whitespace.
2112
[clinic start generated code]*/
2113
2114
static PyObject *
2115
bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2116
/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2117
0
{
2118
0
    return do_argstrip(self, RIGHTSTRIP, bytes);
2119
0
}
2120
2121
2122
/*[clinic input]
2123
bytes.count = bytes.find
2124
2125
Return the number of non-overlapping occurrences of subsection 'sub' in bytes B[start:end].
2126
[clinic start generated code]*/
2127
2128
static PyObject *
2129
bytes_count_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2130
                 Py_ssize_t end)
2131
/*[clinic end generated code: output=9848140b9be17d0f input=b6e4a5ed515e1e59]*/
2132
0
{
2133
0
    return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2134
0
                           sub, start, end);
2135
0
}
2136
2137
2138
/*[clinic input]
2139
bytes.translate
2140
2141
    table: object
2142
        Translation table, which must be a bytes object of length 256.
2143
    /
2144
    delete as deletechars: object(c_default="NULL") = b''
2145
2146
Return a copy with each character mapped by the given translation table.
2147
2148
All characters occurring in the optional argument delete are removed.
2149
The remaining characters are mapped through the given translation table.
2150
[clinic start generated code]*/
2151
2152
static PyObject *
2153
bytes_translate_impl(PyBytesObject *self, PyObject *table,
2154
                     PyObject *deletechars)
2155
/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2156
0
{
2157
0
    const char *input;
2158
0
    char *output;
2159
0
    Py_buffer table_view = {NULL, NULL};
2160
0
    Py_buffer del_table_view = {NULL, NULL};
2161
0
    const char *table_chars;
2162
0
    Py_ssize_t i, c, changed = 0;
2163
0
    PyObject *input_obj = (PyObject*)self;
2164
0
    const char *output_start, *del_table_chars=NULL;
2165
0
    Py_ssize_t inlen, tablen, dellen = 0;
2166
0
    PyObject *result;
2167
0
    int trans_table[256];
2168
2169
0
    if (PyBytes_Check(table)) {
2170
0
        table_chars = PyBytes_AS_STRING(table);
2171
0
        tablen = PyBytes_GET_SIZE(table);
2172
0
    }
2173
0
    else if (table == Py_None) {
2174
0
        table_chars = NULL;
2175
0
        tablen = 256;
2176
0
    }
2177
0
    else {
2178
0
        if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2179
0
            return NULL;
2180
0
        table_chars = table_view.buf;
2181
0
        tablen = table_view.len;
2182
0
    }
2183
2184
0
    if (tablen != 256) {
2185
0
        PyErr_SetString(PyExc_ValueError,
2186
0
          "translation table must be 256 characters long");
2187
0
        PyBuffer_Release(&table_view);
2188
0
        return NULL;
2189
0
    }
2190
2191
0
    if (deletechars != NULL) {
2192
0
        if (PyBytes_Check(deletechars)) {
2193
0
            del_table_chars = PyBytes_AS_STRING(deletechars);
2194
0
            dellen = PyBytes_GET_SIZE(deletechars);
2195
0
        }
2196
0
        else {
2197
0
            if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2198
0
                PyBuffer_Release(&table_view);
2199
0
                return NULL;
2200
0
            }
2201
0
            del_table_chars = del_table_view.buf;
2202
0
            dellen = del_table_view.len;
2203
0
        }
2204
0
    }
2205
0
    else {
2206
0
        del_table_chars = NULL;
2207
0
        dellen = 0;
2208
0
    }
2209
2210
0
    inlen = PyBytes_GET_SIZE(input_obj);
2211
0
    result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2212
0
    if (result == NULL) {
2213
0
        PyBuffer_Release(&del_table_view);
2214
0
        PyBuffer_Release(&table_view);
2215
0
        return NULL;
2216
0
    }
2217
0
    output_start = output = PyBytes_AS_STRING(result);
2218
0
    input = PyBytes_AS_STRING(input_obj);
2219
2220
0
    if (dellen == 0 && table_chars != NULL) {
2221
        /* If no deletions are required, use faster code */
2222
0
        for (i = inlen; --i >= 0; ) {
2223
0
            c = Py_CHARMASK(*input++);
2224
0
            if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2225
0
                changed = 1;
2226
0
        }
2227
0
        if (!changed && PyBytes_CheckExact(input_obj)) {
2228
0
            Py_SETREF(result, Py_NewRef(input_obj));
2229
0
        }
2230
0
        PyBuffer_Release(&del_table_view);
2231
0
        PyBuffer_Release(&table_view);
2232
0
        return result;
2233
0
    }
2234
2235
0
    if (table_chars == NULL) {
2236
0
        for (i = 0; i < 256; i++)
2237
0
            trans_table[i] = Py_CHARMASK(i);
2238
0
    } else {
2239
0
        for (i = 0; i < 256; i++)
2240
0
            trans_table[i] = Py_CHARMASK(table_chars[i]);
2241
0
    }
2242
0
    PyBuffer_Release(&table_view);
2243
2244
0
    for (i = 0; i < dellen; i++)
2245
0
        trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2246
0
    PyBuffer_Release(&del_table_view);
2247
2248
0
    for (i = inlen; --i >= 0; ) {
2249
0
        c = Py_CHARMASK(*input++);
2250
0
        if (trans_table[c] != -1)
2251
0
            if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2252
0
                continue;
2253
0
        changed = 1;
2254
0
    }
2255
0
    if (!changed && PyBytes_CheckExact(input_obj)) {
2256
0
        Py_DECREF(result);
2257
0
        return Py_NewRef(input_obj);
2258
0
    }
2259
    /* Fix the size of the resulting byte string */
2260
0
    if (inlen > 0)
2261
0
        _PyBytes_Resize(&result, output - output_start);
2262
0
    return result;
2263
0
}
2264
2265
2266
/*[clinic input]
2267
2268
@staticmethod
2269
bytes.maketrans
2270
2271
    frm: Py_buffer
2272
    to: Py_buffer
2273
    /
2274
2275
Return a translation table usable for the bytes or bytearray translate method.
2276
2277
The returned table will be one where each byte in frm is mapped to the byte at
2278
the same position in to.
2279
2280
The bytes objects frm and to must be of the same length.
2281
[clinic start generated code]*/
2282
2283
static PyObject *
2284
bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2285
/*[clinic end generated code: output=a36f6399d4b77f6f input=a3bd00d430a0979f]*/
2286
28
{
2287
28
    return _Py_bytes_maketrans(frm, to);
2288
28
}
2289
2290
2291
/*[clinic input]
2292
bytes.replace
2293
2294
    old: Py_buffer
2295
    new: Py_buffer
2296
    count: Py_ssize_t = -1
2297
        Maximum number of occurrences to replace.
2298
        -1 (the default value) means replace all occurrences.
2299
    /
2300
2301
Return a copy with all occurrences of substring old replaced by new.
2302
2303
If the optional argument count is given, only the first count occurrences are
2304
replaced.
2305
[clinic start generated code]*/
2306
2307
static PyObject *
2308
bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2309
                   Py_ssize_t count)
2310
/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
2311
28.1k
{
2312
28.1k
    return stringlib_replace((PyObject *)self,
2313
28.1k
                             (const char *)old->buf, old->len,
2314
28.1k
                             (const char *)new->buf, new->len, count);
2315
28.1k
}
2316
2317
/** End DALKE **/
2318
2319
/*[clinic input]
2320
bytes.removeprefix as bytes_removeprefix
2321
2322
    prefix: Py_buffer
2323
    /
2324
2325
Return a bytes object with the given prefix string removed if present.
2326
2327
If the bytes starts with the prefix string, return bytes[len(prefix):].
2328
Otherwise, return a copy of the original bytes.
2329
[clinic start generated code]*/
2330
2331
static PyObject *
2332
bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2333
/*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2334
0
{
2335
0
    const char *self_start = PyBytes_AS_STRING(self);
2336
0
    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2337
0
    const char *prefix_start = prefix->buf;
2338
0
    Py_ssize_t prefix_len = prefix->len;
2339
2340
0
    if (self_len >= prefix_len
2341
0
        && prefix_len > 0
2342
0
        && memcmp(self_start, prefix_start, prefix_len) == 0)
2343
0
    {
2344
0
        return PyBytes_FromStringAndSize(self_start + prefix_len,
2345
0
                                         self_len - prefix_len);
2346
0
    }
2347
2348
0
    if (PyBytes_CheckExact(self)) {
2349
0
        return Py_NewRef(self);
2350
0
    }
2351
2352
0
    return PyBytes_FromStringAndSize(self_start, self_len);
2353
0
}
2354
2355
/*[clinic input]
2356
bytes.removesuffix as bytes_removesuffix
2357
2358
    suffix: Py_buffer
2359
    /
2360
2361
Return a bytes object with the given suffix string removed if present.
2362
2363
If the bytes ends with the suffix string and that suffix is not empty,
2364
return bytes[:-len(prefix)].  Otherwise, return a copy of the original
2365
bytes.
2366
[clinic start generated code]*/
2367
2368
static PyObject *
2369
bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2370
/*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2371
0
{
2372
0
    const char *self_start = PyBytes_AS_STRING(self);
2373
0
    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2374
0
    const char *suffix_start = suffix->buf;
2375
0
    Py_ssize_t suffix_len = suffix->len;
2376
2377
0
    if (self_len >= suffix_len
2378
0
        && suffix_len > 0
2379
0
        && memcmp(self_start + self_len - suffix_len,
2380
0
                  suffix_start, suffix_len) == 0)
2381
0
    {
2382
0
        return PyBytes_FromStringAndSize(self_start,
2383
0
                                         self_len - suffix_len);
2384
0
    }
2385
2386
0
    if (PyBytes_CheckExact(self)) {
2387
0
        return Py_NewRef(self);
2388
0
    }
2389
2390
0
    return PyBytes_FromStringAndSize(self_start, self_len);
2391
0
}
2392
2393
/*[clinic input]
2394
@text_signature "($self, prefix[, start[, end]], /)"
2395
bytes.startswith
2396
2397
    prefix as subobj: object
2398
        A bytes or a tuple of bytes to try.
2399
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2400
        Optional start position. Default: start of the bytes.
2401
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2402
        Optional stop position. Default: end of the bytes.
2403
    /
2404
2405
Return True if the bytes starts with the specified prefix, False otherwise.
2406
[clinic start generated code]*/
2407
2408
static PyObject *
2409
bytes_startswith_impl(PyBytesObject *self, PyObject *subobj,
2410
                      Py_ssize_t start, Py_ssize_t end)
2411
/*[clinic end generated code: output=b1e8da1cbd528e8c input=8a4165df8adfa6c9]*/
2412
576k
{
2413
576k
    return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2414
576k
                                subobj, start, end);
2415
576k
}
2416
2417
/*[clinic input]
2418
@text_signature "($self, suffix[, start[, end]], /)"
2419
bytes.endswith
2420
2421
    suffix as subobj: object
2422
        A bytes or a tuple of bytes to try.
2423
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2424
         Optional start position. Default: start of the bytes.
2425
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2426
         Optional stop position. Default: end of the bytes.
2427
    /
2428
2429
Return True if the bytes ends with the specified suffix, False otherwise.
2430
[clinic start generated code]*/
2431
2432
static PyObject *
2433
bytes_endswith_impl(PyBytesObject *self, PyObject *subobj, Py_ssize_t start,
2434
                    Py_ssize_t end)
2435
/*[clinic end generated code: output=038b633111f3629d input=b5c3407a2a5c9aac]*/
2436
0
{
2437
0
    return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2438
0
                              subobj, start, end);
2439
0
}
2440
2441
2442
/*[clinic input]
2443
bytes.decode
2444
2445
    encoding: str(c_default="NULL") = 'utf-8'
2446
        The encoding with which to decode the bytes.
2447
    errors: str(c_default="NULL") = 'strict'
2448
        The error handling scheme to use for the handling of decoding errors.
2449
        The default is 'strict' meaning that decoding errors raise a
2450
        UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2451
        as well as any other name registered with codecs.register_error that
2452
        can handle UnicodeDecodeErrors.
2453
2454
Decode the bytes using the codec registered for encoding.
2455
[clinic start generated code]*/
2456
2457
static PyObject *
2458
bytes_decode_impl(PyBytesObject *self, const char *encoding,
2459
                  const char *errors)
2460
/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2461
3.47M
{
2462
3.47M
    return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2463
3.47M
}
2464
2465
2466
/*[clinic input]
2467
bytes.splitlines
2468
2469
    keepends: bool = False
2470
2471
Return a list of the lines in the bytes, breaking at line boundaries.
2472
2473
Line breaks are not included in the resulting list unless keepends is given and
2474
true.
2475
[clinic start generated code]*/
2476
2477
static PyObject *
2478
bytes_splitlines_impl(PyBytesObject *self, int keepends)
2479
/*[clinic end generated code: output=3484149a5d880ffb input=5d7b898af2fe55c0]*/
2480
0
{
2481
0
    return stringlib_splitlines(
2482
0
        (PyObject*) self, PyBytes_AS_STRING(self),
2483
0
        PyBytes_GET_SIZE(self), keepends
2484
0
        );
2485
0
}
2486
2487
/*[clinic input]
2488
@classmethod
2489
bytes.fromhex
2490
2491
    string: object
2492
    /
2493
2494
Create a bytes object from a string of hexadecimal numbers.
2495
2496
Spaces between two numbers are accepted.
2497
Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2498
[clinic start generated code]*/
2499
2500
static PyObject *
2501
bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2502
/*[clinic end generated code: output=0973acc63661bb2e input=f37d98ed51088a21]*/
2503
27.5k
{
2504
27.5k
    PyObject *result = _PyBytes_FromHex(string, 0);
2505
27.5k
    if (type != &PyBytes_Type && result != NULL) {
2506
0
        Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
2507
0
    }
2508
27.5k
    return result;
2509
27.5k
}
2510
2511
PyObject*
2512
_PyBytes_FromHex(PyObject *string, int use_bytearray)
2513
27.5k
{
2514
27.5k
    char *buf;
2515
27.5k
    Py_ssize_t hexlen, invalid_char;
2516
27.5k
    unsigned int top, bot;
2517
27.5k
    const Py_UCS1 *str, *start, *end;
2518
27.5k
    _PyBytesWriter writer;
2519
27.5k
    Py_buffer view;
2520
27.5k
    view.obj = NULL;
2521
2522
27.5k
    _PyBytesWriter_Init(&writer);
2523
27.5k
    writer.use_bytearray = use_bytearray;
2524
2525
27.5k
    if (PyUnicode_Check(string)) {
2526
27.5k
        hexlen = PyUnicode_GET_LENGTH(string);
2527
2528
27.5k
        if (!PyUnicode_IS_ASCII(string)) {
2529
0
            const void *data = PyUnicode_DATA(string);
2530
0
            int kind = PyUnicode_KIND(string);
2531
0
            Py_ssize_t i;
2532
2533
            /* search for the first non-ASCII character */
2534
0
            for (i = 0; i < hexlen; i++) {
2535
0
                if (PyUnicode_READ(kind, data, i) >= 128)
2536
0
                    break;
2537
0
            }
2538
0
            invalid_char = i;
2539
0
            goto error;
2540
0
        }
2541
2542
27.5k
        assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2543
27.5k
        str = PyUnicode_1BYTE_DATA(string);
2544
27.5k
    }
2545
0
    else if (PyObject_CheckBuffer(string)) {
2546
0
        if (PyObject_GetBuffer(string, &view, PyBUF_SIMPLE) != 0) {
2547
0
            return NULL;
2548
0
        }
2549
0
        hexlen = view.len;
2550
0
        str = view.buf;
2551
0
    }
2552
0
    else {
2553
0
        PyErr_Format(PyExc_TypeError,
2554
0
                     "fromhex() argument must be str or bytes-like, not %T",
2555
0
                     string);
2556
0
        return NULL;
2557
0
    }
2558
2559
    /* This overestimates if there are spaces */
2560
27.5k
    buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2561
27.5k
    if (buf == NULL) {
2562
0
        goto release_buffer;
2563
0
    }
2564
2565
27.5k
    start = str;
2566
27.5k
    end = str + hexlen;
2567
55.0k
    while (str < end) {
2568
        /* skip over spaces in the input */
2569
27.5k
        if (Py_ISSPACE(*str)) {
2570
0
            do {
2571
0
                str++;
2572
0
            } while (Py_ISSPACE(*str));
2573
0
            if (str >= end)
2574
0
                break;
2575
0
        }
2576
2577
27.5k
        top = _PyLong_DigitValue[*str];
2578
27.5k
        if (top >= 16) {
2579
0
            invalid_char = str - start;
2580
0
            goto error;
2581
0
        }
2582
27.5k
        str++;
2583
2584
27.5k
        bot = _PyLong_DigitValue[*str];
2585
27.5k
        if (bot >= 16) {
2586
            /* Check if we had a second digit */
2587
0
            if (str >= end){
2588
0
                invalid_char = -1;
2589
0
            } else {
2590
0
                invalid_char = str - start;
2591
0
            }
2592
0
            goto error;
2593
0
        }
2594
27.5k
        str++;
2595
2596
27.5k
        *buf++ = (unsigned char)((top << 4) + bot);
2597
27.5k
    }
2598
2599
27.5k
    if (view.obj != NULL) {
2600
0
       PyBuffer_Release(&view);
2601
0
    }
2602
27.5k
    return _PyBytesWriter_Finish(&writer, buf);
2603
2604
0
  error:
2605
0
    if (invalid_char == -1) {
2606
0
        PyErr_SetString(PyExc_ValueError,
2607
0
                        "fromhex() arg must contain an even number of hexadecimal digits");
2608
0
    } else {
2609
0
        PyErr_Format(PyExc_ValueError,
2610
0
                     "non-hexadecimal number found in "
2611
0
                     "fromhex() arg at position %zd", invalid_char);
2612
0
    }
2613
0
    _PyBytesWriter_Dealloc(&writer);
2614
2615
0
  release_buffer:
2616
0
    if (view.obj != NULL) {
2617
0
        PyBuffer_Release(&view);
2618
0
    }
2619
0
    return NULL;
2620
0
}
2621
2622
/*[clinic input]
2623
bytes.hex
2624
2625
    sep: object = NULL
2626
        An optional single character or byte to separate hex bytes.
2627
    bytes_per_sep: int = 1
2628
        How many bytes between separators.  Positive values count from the
2629
        right, negative values count from the left.
2630
2631
Create a string of hexadecimal numbers from a bytes object.
2632
2633
Example:
2634
>>> value = b'\xb9\x01\xef'
2635
>>> value.hex()
2636
'b901ef'
2637
>>> value.hex(':')
2638
'b9:01:ef'
2639
>>> value.hex(':', 2)
2640
'b9:01ef'
2641
>>> value.hex(':', -2)
2642
'b901:ef'
2643
[clinic start generated code]*/
2644
2645
static PyObject *
2646
bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2647
/*[clinic end generated code: output=1f134da504064139 input=1a21282b1f1ae595]*/
2648
0
{
2649
0
    const char *argbuf = PyBytes_AS_STRING(self);
2650
0
    Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2651
0
    return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2652
0
}
2653
2654
static PyObject *
2655
bytes_getnewargs(PyObject *op, PyObject *Py_UNUSED(dummy))
2656
0
{
2657
0
    PyBytesObject *v = _PyBytes_CAST(op);
2658
0
    return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2659
0
}
2660
2661
2662
static PyMethodDef
2663
bytes_methods[] = {
2664
    {"__getnewargs__", bytes_getnewargs,  METH_NOARGS},
2665
    BYTES___BYTES___METHODDEF
2666
    {"capitalize", stringlib_capitalize, METH_NOARGS,
2667
     _Py_capitalize__doc__},
2668
    STRINGLIB_CENTER_METHODDEF
2669
    BYTES_COUNT_METHODDEF
2670
    BYTES_DECODE_METHODDEF
2671
    BYTES_ENDSWITH_METHODDEF
2672
    STRINGLIB_EXPANDTABS_METHODDEF
2673
    BYTES_FIND_METHODDEF
2674
    BYTES_FROMHEX_METHODDEF
2675
    BYTES_HEX_METHODDEF
2676
    BYTES_INDEX_METHODDEF
2677
    {"isalnum", stringlib_isalnum, METH_NOARGS,
2678
     _Py_isalnum__doc__},
2679
    {"isalpha", stringlib_isalpha, METH_NOARGS,
2680
     _Py_isalpha__doc__},
2681
    {"isascii", stringlib_isascii, METH_NOARGS,
2682
     _Py_isascii__doc__},
2683
    {"isdigit", stringlib_isdigit, METH_NOARGS,
2684
     _Py_isdigit__doc__},
2685
    {"islower", stringlib_islower, METH_NOARGS,
2686
     _Py_islower__doc__},
2687
    {"isspace", stringlib_isspace, METH_NOARGS,
2688
     _Py_isspace__doc__},
2689
    {"istitle", stringlib_istitle, METH_NOARGS,
2690
     _Py_istitle__doc__},
2691
    {"isupper", stringlib_isupper, METH_NOARGS,
2692
     _Py_isupper__doc__},
2693
    BYTES_JOIN_METHODDEF
2694
    STRINGLIB_LJUST_METHODDEF
2695
    {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2696
    BYTES_LSTRIP_METHODDEF
2697
    BYTES_MAKETRANS_METHODDEF
2698
    BYTES_PARTITION_METHODDEF
2699
    BYTES_REPLACE_METHODDEF
2700
    BYTES_REMOVEPREFIX_METHODDEF
2701
    BYTES_REMOVESUFFIX_METHODDEF
2702
    BYTES_RFIND_METHODDEF
2703
    BYTES_RINDEX_METHODDEF
2704
    STRINGLIB_RJUST_METHODDEF
2705
    BYTES_RPARTITION_METHODDEF
2706
    BYTES_RSPLIT_METHODDEF
2707
    BYTES_RSTRIP_METHODDEF
2708
    BYTES_SPLIT_METHODDEF
2709
    BYTES_SPLITLINES_METHODDEF
2710
    BYTES_STARTSWITH_METHODDEF
2711
    BYTES_STRIP_METHODDEF
2712
    {"swapcase", stringlib_swapcase, METH_NOARGS,
2713
     _Py_swapcase__doc__},
2714
    {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2715
    BYTES_TRANSLATE_METHODDEF
2716
    {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2717
    STRINGLIB_ZFILL_METHODDEF
2718
    {NULL,     NULL}                         /* sentinel */
2719
};
2720
2721
static PyObject *
2722
bytes_mod(PyObject *self, PyObject *arg)
2723
0
{
2724
0
    if (!PyBytes_Check(self)) {
2725
0
        Py_RETURN_NOTIMPLEMENTED;
2726
0
    }
2727
0
    return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2728
0
                             arg, 0);
2729
0
}
2730
2731
static PyNumberMethods bytes_as_number = {
2732
    0,              /*nb_add*/
2733
    0,              /*nb_subtract*/
2734
    0,              /*nb_multiply*/
2735
    bytes_mod,      /*nb_remainder*/
2736
};
2737
2738
static PyObject *
2739
bytes_subtype_new(PyTypeObject *, PyObject *);
2740
2741
/*[clinic input]
2742
@classmethod
2743
bytes.__new__ as bytes_new
2744
2745
    source as x: object = NULL
2746
    encoding: str = NULL
2747
    errors: str = NULL
2748
2749
[clinic start generated code]*/
2750
2751
static PyObject *
2752
bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
2753
               const char *errors)
2754
/*[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]*/
2755
742k
{
2756
742k
    PyObject *bytes;
2757
742k
    PyObject *func;
2758
742k
    Py_ssize_t size;
2759
2760
742k
    if (x == NULL) {
2761
0
        if (encoding != NULL || errors != NULL) {
2762
0
            PyErr_SetString(PyExc_TypeError,
2763
0
                            encoding != NULL ?
2764
0
                            "encoding without a string argument" :
2765
0
                            "errors without a string argument");
2766
0
            return NULL;
2767
0
        }
2768
0
        bytes = PyBytes_FromStringAndSize(NULL, 0);
2769
0
    }
2770
742k
    else if (encoding != NULL) {
2771
        /* Encode via the codec registry */
2772
200k
        if (!PyUnicode_Check(x)) {
2773
0
            PyErr_SetString(PyExc_TypeError,
2774
0
                            "encoding without a string argument");
2775
0
            return NULL;
2776
0
        }
2777
200k
        bytes = PyUnicode_AsEncodedString(x, encoding, errors);
2778
200k
    }
2779
542k
    else if (errors != NULL) {
2780
0
        PyErr_SetString(PyExc_TypeError,
2781
0
                        PyUnicode_Check(x) ?
2782
0
                        "string argument without an encoding" :
2783
0
                        "errors without a string argument");
2784
0
        return NULL;
2785
0
    }
2786
    /* We'd like to call PyObject_Bytes here, but we need to check for an
2787
       integer argument before deferring to PyBytes_FromObject, something
2788
       PyObject_Bytes doesn't do. */
2789
542k
    else if ((func = _PyObject_LookupSpecial(x, &_Py_ID(__bytes__))) != NULL) {
2790
69.9k
        bytes = _PyObject_CallNoArgs(func);
2791
69.9k
        Py_DECREF(func);
2792
69.9k
        if (bytes == NULL)
2793
0
            return NULL;
2794
69.9k
        if (!PyBytes_Check(bytes)) {
2795
0
            PyErr_Format(PyExc_TypeError,
2796
0
                        "__bytes__ returned non-bytes (type %.200s)",
2797
0
                        Py_TYPE(bytes)->tp_name);
2798
0
            Py_DECREF(bytes);
2799
0
            return NULL;
2800
0
        }
2801
69.9k
    }
2802
472k
    else if (PyErr_Occurred())
2803
0
        return NULL;
2804
472k
    else if (PyUnicode_Check(x)) {
2805
0
        PyErr_SetString(PyExc_TypeError,
2806
0
                        "string argument without an encoding");
2807
0
        return NULL;
2808
0
    }
2809
    /* Is it an integer? */
2810
472k
    else if (_PyIndex_Check(x)) {
2811
0
        size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2812
0
        if (size == -1 && PyErr_Occurred()) {
2813
0
            if (!PyErr_ExceptionMatches(PyExc_TypeError))
2814
0
                return NULL;
2815
0
            PyErr_Clear();  /* fall through */
2816
0
            bytes = PyBytes_FromObject(x);
2817
0
        }
2818
0
        else {
2819
0
            if (size < 0) {
2820
0
                PyErr_SetString(PyExc_ValueError, "negative count");
2821
0
                return NULL;
2822
0
            }
2823
0
            bytes = _PyBytes_FromSize(size, 1);
2824
0
        }
2825
0
    }
2826
472k
    else {
2827
472k
        bytes = PyBytes_FromObject(x);
2828
472k
    }
2829
2830
742k
    if (bytes != NULL && type != &PyBytes_Type) {
2831
0
        Py_SETREF(bytes, bytes_subtype_new(type, bytes));
2832
0
    }
2833
2834
742k
    return bytes;
2835
742k
}
2836
2837
static PyObject*
2838
_PyBytes_FromBuffer(PyObject *x)
2839
472k
{
2840
472k
    PyObject *new;
2841
472k
    Py_buffer view;
2842
2843
472k
    if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2844
0
        return NULL;
2845
2846
472k
    new = PyBytes_FromStringAndSize(NULL, view.len);
2847
472k
    if (!new)
2848
0
        goto fail;
2849
472k
    if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2850
472k
                &view, view.len, 'C') < 0)
2851
0
        goto fail;
2852
472k
    PyBuffer_Release(&view);
2853
472k
    return new;
2854
2855
0
fail:
2856
0
    Py_XDECREF(new);
2857
0
    PyBuffer_Release(&view);
2858
0
    return NULL;
2859
472k
}
2860
2861
static PyObject*
2862
_PyBytes_FromList(PyObject *x)
2863
0
{
2864
0
    Py_ssize_t i, size = PyList_GET_SIZE(x);
2865
0
    Py_ssize_t value;
2866
0
    char *str;
2867
0
    PyObject *item;
2868
0
    _PyBytesWriter writer;
2869
2870
0
    _PyBytesWriter_Init(&writer);
2871
0
    str = _PyBytesWriter_Alloc(&writer, size);
2872
0
    if (str == NULL)
2873
0
        return NULL;
2874
0
    writer.overallocate = 1;
2875
0
    size = writer.allocated;
2876
2877
0
    for (i = 0; i < PyList_GET_SIZE(x); i++) {
2878
0
        item = PyList_GET_ITEM(x, i);
2879
0
        Py_INCREF(item);
2880
0
        value = PyNumber_AsSsize_t(item, NULL);
2881
0
        Py_DECREF(item);
2882
0
        if (value == -1 && PyErr_Occurred())
2883
0
            goto error;
2884
2885
0
        if (value < 0 || value >= 256) {
2886
0
            PyErr_SetString(PyExc_ValueError,
2887
0
                            "bytes must be in range(0, 256)");
2888
0
            goto error;
2889
0
        }
2890
2891
0
        if (i >= size) {
2892
0
            str = _PyBytesWriter_Resize(&writer, str, size+1);
2893
0
            if (str == NULL)
2894
0
                return NULL;
2895
0
            size = writer.allocated;
2896
0
        }
2897
0
        *str++ = (char) value;
2898
0
    }
2899
0
    return _PyBytesWriter_Finish(&writer, str);
2900
2901
0
  error:
2902
0
    _PyBytesWriter_Dealloc(&writer);
2903
0
    return NULL;
2904
0
}
2905
2906
static PyObject*
2907
_PyBytes_FromTuple(PyObject *x)
2908
0
{
2909
0
    PyObject *bytes;
2910
0
    Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2911
0
    Py_ssize_t value;
2912
0
    char *str;
2913
0
    PyObject *item;
2914
2915
0
    bytes = PyBytes_FromStringAndSize(NULL, size);
2916
0
    if (bytes == NULL)
2917
0
        return NULL;
2918
0
    str = ((PyBytesObject *)bytes)->ob_sval;
2919
2920
0
    for (i = 0; i < size; i++) {
2921
0
        item = PyTuple_GET_ITEM(x, i);
2922
0
        value = PyNumber_AsSsize_t(item, NULL);
2923
0
        if (value == -1 && PyErr_Occurred())
2924
0
            goto error;
2925
2926
0
        if (value < 0 || value >= 256) {
2927
0
            PyErr_SetString(PyExc_ValueError,
2928
0
                            "bytes must be in range(0, 256)");
2929
0
            goto error;
2930
0
        }
2931
0
        *str++ = (char) value;
2932
0
    }
2933
0
    return bytes;
2934
2935
0
  error:
2936
0
    Py_DECREF(bytes);
2937
0
    return NULL;
2938
0
}
2939
2940
static PyObject *
2941
_PyBytes_FromIterator(PyObject *it, PyObject *x)
2942
138
{
2943
138
    char *str;
2944
138
    Py_ssize_t i, size;
2945
138
    _PyBytesWriter writer;
2946
2947
    /* For iterator version, create a bytes object and resize as needed */
2948
138
    size = PyObject_LengthHint(x, 64);
2949
138
    if (size == -1 && PyErr_Occurred())
2950
0
        return NULL;
2951
2952
138
    _PyBytesWriter_Init(&writer);
2953
138
    str = _PyBytesWriter_Alloc(&writer, size);
2954
138
    if (str == NULL)
2955
0
        return NULL;
2956
138
    writer.overallocate = 1;
2957
138
    size = writer.allocated;
2958
2959
    /* Run the iterator to exhaustion */
2960
1.06k
    for (i = 0; ; i++) {
2961
1.06k
        PyObject *item;
2962
1.06k
        Py_ssize_t value;
2963
2964
        /* Get the next item */
2965
1.06k
        item = PyIter_Next(it);
2966
1.06k
        if (item == NULL) {
2967
138
            if (PyErr_Occurred())
2968
0
                goto error;
2969
138
            break;
2970
138
        }
2971
2972
        /* Interpret it as an int (__index__) */
2973
924
        value = PyNumber_AsSsize_t(item, NULL);
2974
924
        Py_DECREF(item);
2975
924
        if (value == -1 && PyErr_Occurred())
2976
0
            goto error;
2977
2978
        /* Range check */
2979
924
        if (value < 0 || value >= 256) {
2980
0
            PyErr_SetString(PyExc_ValueError,
2981
0
                            "bytes must be in range(0, 256)");
2982
0
            goto error;
2983
0
        }
2984
2985
        /* Append the byte */
2986
924
        if (i >= size) {
2987
0
            str = _PyBytesWriter_Resize(&writer, str, size+1);
2988
0
            if (str == NULL)
2989
0
                return NULL;
2990
0
            size = writer.allocated;
2991
0
        }
2992
924
        *str++ = (char) value;
2993
924
    }
2994
2995
138
    return _PyBytesWriter_Finish(&writer, str);
2996
2997
0
  error:
2998
0
    _PyBytesWriter_Dealloc(&writer);
2999
0
    return NULL;
3000
138
}
3001
3002
PyObject *
3003
PyBytes_FromObject(PyObject *x)
3004
472k
{
3005
472k
    PyObject *it, *result;
3006
3007
472k
    if (x == NULL) {
3008
0
        PyErr_BadInternalCall();
3009
0
        return NULL;
3010
0
    }
3011
3012
472k
    if (PyBytes_CheckExact(x)) {
3013
0
        return Py_NewRef(x);
3014
0
    }
3015
3016
    /* Use the modern buffer interface */
3017
472k
    if (PyObject_CheckBuffer(x))
3018
472k
        return _PyBytes_FromBuffer(x);
3019
3020
138
    if (PyList_CheckExact(x))
3021
0
        return _PyBytes_FromList(x);
3022
3023
138
    if (PyTuple_CheckExact(x))
3024
0
        return _PyBytes_FromTuple(x);
3025
3026
138
    if (!PyUnicode_Check(x)) {
3027
138
        it = PyObject_GetIter(x);
3028
138
        if (it != NULL) {
3029
138
            result = _PyBytes_FromIterator(it, x);
3030
138
            Py_DECREF(it);
3031
138
            return result;
3032
138
        }
3033
0
        if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
3034
0
            return NULL;
3035
0
        }
3036
0
    }
3037
3038
0
    PyErr_Format(PyExc_TypeError,
3039
0
                 "cannot convert '%.200s' object to bytes",
3040
0
                 Py_TYPE(x)->tp_name);
3041
0
    return NULL;
3042
138
}
3043
3044
/* This allocator is needed for subclasses don't want to use __new__.
3045
 * See https://github.com/python/cpython/issues/91020#issuecomment-1096793239
3046
 *
3047
 * This allocator will be removed when ob_shash is removed.
3048
 */
3049
static PyObject *
3050
bytes_alloc(PyTypeObject *self, Py_ssize_t nitems)
3051
0
{
3052
0
    PyBytesObject *obj = (PyBytesObject*)PyType_GenericAlloc(self, nitems);
3053
0
    if (obj == NULL) {
3054
0
        return NULL;
3055
0
    }
3056
0
    set_ob_shash(obj, -1);
3057
0
    return (PyObject*)obj;
3058
0
}
3059
3060
static PyObject *
3061
bytes_subtype_new(PyTypeObject *type, PyObject *tmp)
3062
0
{
3063
0
    PyObject *pnew;
3064
0
    Py_ssize_t n;
3065
3066
0
    assert(PyType_IsSubtype(type, &PyBytes_Type));
3067
0
    assert(PyBytes_Check(tmp));
3068
0
    n = PyBytes_GET_SIZE(tmp);
3069
0
    pnew = type->tp_alloc(type, n);
3070
0
    if (pnew != NULL) {
3071
0
        memcpy(PyBytes_AS_STRING(pnew),
3072
0
                  PyBytes_AS_STRING(tmp), n+1);
3073
0
        set_ob_shash((PyBytesObject *)pnew,
3074
0
            get_ob_shash((PyBytesObject *)tmp));
3075
0
    }
3076
0
    return pnew;
3077
0
}
3078
3079
PyDoc_STRVAR(bytes_doc,
3080
"bytes(iterable_of_ints) -> bytes\n\
3081
bytes(string, encoding[, errors]) -> bytes\n\
3082
bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
3083
bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3084
bytes() -> empty bytes object\n\
3085
\n\
3086
Construct an immutable array of bytes from:\n\
3087
  - an iterable yielding integers in range(256)\n\
3088
  - a text string encoded using the specified encoding\n\
3089
  - any object implementing the buffer API.\n\
3090
  - an integer");
3091
3092
static PyObject *bytes_iter(PyObject *seq);
3093
3094
PyTypeObject PyBytes_Type = {
3095
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3096
    "bytes",
3097
    PyBytesObject_SIZE,
3098
    sizeof(char),
3099
    0,                                          /* tp_dealloc */
3100
    0,                                          /* tp_vectorcall_offset */
3101
    0,                                          /* tp_getattr */
3102
    0,                                          /* tp_setattr */
3103
    0,                                          /* tp_as_async */
3104
    bytes_repr,                                 /* tp_repr */
3105
    &bytes_as_number,                           /* tp_as_number */
3106
    &bytes_as_sequence,                         /* tp_as_sequence */
3107
    &bytes_as_mapping,                          /* tp_as_mapping */
3108
    bytes_hash,                                 /* tp_hash */
3109
    0,                                          /* tp_call */
3110
    bytes_str,                                  /* tp_str */
3111
    PyObject_GenericGetAttr,                    /* tp_getattro */
3112
    0,                                          /* tp_setattro */
3113
    &bytes_as_buffer,                           /* tp_as_buffer */
3114
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3115
        Py_TPFLAGS_BYTES_SUBCLASS |
3116
        _Py_TPFLAGS_MATCH_SELF,               /* tp_flags */
3117
    bytes_doc,                                  /* tp_doc */
3118
    0,                                          /* tp_traverse */
3119
    0,                                          /* tp_clear */
3120
    bytes_richcompare,                          /* tp_richcompare */
3121
    0,                                          /* tp_weaklistoffset */
3122
    bytes_iter,                                 /* tp_iter */
3123
    0,                                          /* tp_iternext */
3124
    bytes_methods,                              /* tp_methods */
3125
    0,                                          /* tp_members */
3126
    0,                                          /* tp_getset */
3127
    0,                                          /* tp_base */
3128
    0,                                          /* tp_dict */
3129
    0,                                          /* tp_descr_get */
3130
    0,                                          /* tp_descr_set */
3131
    0,                                          /* tp_dictoffset */
3132
    0,                                          /* tp_init */
3133
    bytes_alloc,                                /* tp_alloc */
3134
    bytes_new,                                  /* tp_new */
3135
    PyObject_Free,                              /* tp_free */
3136
    .tp_version_tag = _Py_TYPE_VERSION_BYTES,
3137
};
3138
3139
void
3140
PyBytes_Concat(PyObject **pv, PyObject *w)
3141
2.96k
{
3142
2.96k
    assert(pv != NULL);
3143
2.96k
    if (*pv == NULL)
3144
0
        return;
3145
2.96k
    if (w == NULL) {
3146
0
        Py_CLEAR(*pv);
3147
0
        return;
3148
0
    }
3149
3150
2.96k
    if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3151
        /* Only one reference, so we can resize in place */
3152
877
        Py_ssize_t oldsize;
3153
877
        Py_buffer wb;
3154
3155
877
        if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
3156
0
            PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3157
0
                         Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3158
0
            Py_CLEAR(*pv);
3159
0
            return;
3160
0
        }
3161
3162
877
        oldsize = PyBytes_GET_SIZE(*pv);
3163
877
        if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3164
0
            PyErr_NoMemory();
3165
0
            goto error;
3166
0
        }
3167
877
        if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3168
0
            goto error;
3169
3170
877
        memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3171
877
        PyBuffer_Release(&wb);
3172
877
        return;
3173
3174
0
      error:
3175
0
        PyBuffer_Release(&wb);
3176
0
        Py_CLEAR(*pv);
3177
0
        return;
3178
877
    }
3179
3180
2.09k
    else {
3181
        /* Multiple references, need to create new object */
3182
2.09k
        PyObject *v;
3183
2.09k
        v = bytes_concat(*pv, w);
3184
2.09k
        Py_SETREF(*pv, v);
3185
2.09k
    }
3186
2.96k
}
3187
3188
void
3189
PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
3190
0
{
3191
0
    PyBytes_Concat(pv, w);
3192
0
    Py_XDECREF(w);
3193
0
}
3194
3195
3196
/* The following function breaks the notion that bytes are immutable:
3197
   it changes the size of a bytes object.  You can think of it
3198
   as creating a new bytes object and destroying the old one, only
3199
   more efficiently.
3200
   Note that if there's not enough memory to resize the bytes object, the
3201
   original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
3202
   memory" exception is set, and -1 is returned.  Else (on success) 0 is
3203
   returned, and the value in *pv may or may not be the same as on input.
3204
   As always, an extra byte is allocated for a trailing \0 byte (newsize
3205
   does *not* include that), and a trailing \0 byte is stored.
3206
*/
3207
3208
int
3209
_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3210
678k
{
3211
678k
    PyObject *v;
3212
678k
    PyBytesObject *sv;
3213
678k
    v = *pv;
3214
678k
    if (!PyBytes_Check(v) || newsize < 0) {
3215
0
        *pv = 0;
3216
0
        Py_DECREF(v);
3217
0
        PyErr_BadInternalCall();
3218
0
        return -1;
3219
0
    }
3220
678k
    Py_ssize_t oldsize = PyBytes_GET_SIZE(v);
3221
678k
    if (oldsize == newsize) {
3222
        /* return early if newsize equals to v->ob_size */
3223
413k
        return 0;
3224
413k
    }
3225
265k
    if (oldsize == 0) {
3226
0
        *pv = _PyBytes_FromSize(newsize, 0);
3227
0
        Py_DECREF(v);
3228
0
        return (*pv == NULL) ? -1 : 0;
3229
0
    }
3230
265k
    if (newsize == 0) {
3231
4.84k
        *pv = bytes_get_empty();
3232
4.84k
        Py_DECREF(v);
3233
4.84k
        return 0;
3234
4.84k
    }
3235
260k
    if (Py_REFCNT(v) != 1) {
3236
0
        if (oldsize < newsize) {
3237
0
            *pv = _PyBytes_FromSize(newsize, 0);
3238
0
            if (*pv) {
3239
0
                memcpy(PyBytes_AS_STRING(*pv), PyBytes_AS_STRING(v), oldsize);
3240
0
            }
3241
0
        }
3242
0
        else {
3243
0
            *pv = PyBytes_FromStringAndSize(PyBytes_AS_STRING(v), newsize);
3244
0
        }
3245
0
        Py_DECREF(v);
3246
0
        return (*pv == NULL) ? -1 : 0;
3247
0
    }
3248
3249
#ifdef Py_TRACE_REFS
3250
    _Py_ForgetReference(v);
3251
#endif
3252
260k
    _PyReftracerTrack(v, PyRefTracer_DESTROY);
3253
260k
    *pv = (PyObject *)
3254
260k
        PyObject_Realloc(v, PyBytesObject_SIZE + newsize);
3255
260k
    if (*pv == NULL) {
3256
#ifdef Py_REF_DEBUG
3257
        _Py_DecRefTotal(_PyThreadState_GET());
3258
#endif
3259
0
        PyObject_Free(v);
3260
0
        PyErr_NoMemory();
3261
0
        return -1;
3262
0
    }
3263
260k
    _Py_NewReferenceNoTotal(*pv);
3264
260k
    sv = (PyBytesObject *) *pv;
3265
260k
    Py_SET_SIZE(sv, newsize);
3266
260k
    sv->ob_sval[newsize] = '\0';
3267
260k
    set_ob_shash(sv, -1);          /* invalidate cached hash value */
3268
260k
    return 0;
3269
260k
}
3270
3271
3272
/*********************** Bytes Iterator ****************************/
3273
3274
typedef struct {
3275
    PyObject_HEAD
3276
    Py_ssize_t it_index;
3277
    PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3278
} striterobject;
3279
3280
1.73k
#define _striterobject_CAST(op)  ((striterobject *)(op))
3281
3282
static void
3283
striter_dealloc(PyObject *op)
3284
51
{
3285
51
    striterobject *it = _striterobject_CAST(op);
3286
51
    _PyObject_GC_UNTRACK(it);
3287
51
    Py_XDECREF(it->it_seq);
3288
51
    PyObject_GC_Del(it);
3289
51
}
3290
3291
static int
3292
striter_traverse(PyObject *op, visitproc visit, void *arg)
3293
0
{
3294
0
    striterobject *it = _striterobject_CAST(op);
3295
0
    Py_VISIT(it->it_seq);
3296
0
    return 0;
3297
0
}
3298
3299
static PyObject *
3300
striter_next(PyObject *op)
3301
1.67k
{
3302
1.67k
    striterobject *it = _striterobject_CAST(op);
3303
1.67k
    PyBytesObject *seq;
3304
3305
1.67k
    assert(it != NULL);
3306
1.67k
    seq = it->it_seq;
3307
1.67k
    if (seq == NULL)
3308
0
        return NULL;
3309
1.67k
    assert(PyBytes_Check(seq));
3310
3311
1.67k
    if (it->it_index < PyBytes_GET_SIZE(seq)) {
3312
1.64k
        return _PyLong_FromUnsignedChar(
3313
1.64k
            (unsigned char)seq->ob_sval[it->it_index++]);
3314
1.64k
    }
3315
3316
35
    it->it_seq = NULL;
3317
35
    Py_DECREF(seq);
3318
35
    return NULL;
3319
1.67k
}
3320
3321
static PyObject *
3322
striter_len(PyObject *op, PyObject *Py_UNUSED(ignored))
3323
0
{
3324
0
    striterobject *it = _striterobject_CAST(op);
3325
0
    Py_ssize_t len = 0;
3326
0
    if (it->it_seq)
3327
0
        len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3328
0
    return PyLong_FromSsize_t(len);
3329
0
}
3330
3331
PyDoc_STRVAR(length_hint_doc,
3332
             "Private method returning an estimate of len(list(it)).");
3333
3334
static PyObject *
3335
striter_reduce(PyObject *op, PyObject *Py_UNUSED(ignored))
3336
0
{
3337
0
    PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));
3338
3339
    /* _PyEval_GetBuiltin can invoke arbitrary code,
3340
     * call must be before access of iterator pointers.
3341
     * see issue #101765 */
3342
0
    striterobject *it = _striterobject_CAST(op);
3343
0
    if (it->it_seq != NULL) {
3344
0
        return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
3345
0
    } else {
3346
0
        return Py_BuildValue("N(())", iter);
3347
0
    }
3348
0
}
3349
3350
PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3351
3352
static PyObject *
3353
striter_setstate(PyObject *op, PyObject *state)
3354
0
{
3355
0
    Py_ssize_t index = PyLong_AsSsize_t(state);
3356
0
    if (index == -1 && PyErr_Occurred())
3357
0
        return NULL;
3358
0
    striterobject *it = _striterobject_CAST(op);
3359
0
    if (it->it_seq != NULL) {
3360
0
        if (index < 0)
3361
0
            index = 0;
3362
0
        else if (index > PyBytes_GET_SIZE(it->it_seq))
3363
0
            index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3364
0
        it->it_index = index;
3365
0
    }
3366
0
    Py_RETURN_NONE;
3367
0
}
3368
3369
PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3370
3371
static PyMethodDef striter_methods[] = {
3372
    {"__length_hint__", striter_len, METH_NOARGS, length_hint_doc},
3373
    {"__reduce__",      striter_reduce, METH_NOARGS, reduce_doc},
3374
    {"__setstate__",    striter_setstate, METH_O, setstate_doc},
3375
    {NULL,              NULL}           /* sentinel */
3376
};
3377
3378
PyTypeObject PyBytesIter_Type = {
3379
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3380
    "bytes_iterator",                           /* tp_name */
3381
    sizeof(striterobject),                      /* tp_basicsize */
3382
    0,                                          /* tp_itemsize */
3383
    /* methods */
3384
    striter_dealloc,                            /* tp_dealloc */
3385
    0,                                          /* tp_vectorcall_offset */
3386
    0,                                          /* tp_getattr */
3387
    0,                                          /* tp_setattr */
3388
    0,                                          /* tp_as_async */
3389
    0,                                          /* tp_repr */
3390
    0,                                          /* tp_as_number */
3391
    0,                                          /* tp_as_sequence */
3392
    0,                                          /* tp_as_mapping */
3393
    0,                                          /* tp_hash */
3394
    0,                                          /* tp_call */
3395
    0,                                          /* tp_str */
3396
    PyObject_GenericGetAttr,                    /* tp_getattro */
3397
    0,                                          /* tp_setattro */
3398
    0,                                          /* tp_as_buffer */
3399
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3400
    0,                                          /* tp_doc */
3401
    striter_traverse,                           /* tp_traverse */
3402
    0,                                          /* tp_clear */
3403
    0,                                          /* tp_richcompare */
3404
    0,                                          /* tp_weaklistoffset */
3405
    PyObject_SelfIter,                          /* tp_iter */
3406
    striter_next,                               /* tp_iternext */
3407
    striter_methods,                            /* tp_methods */
3408
    0,
3409
};
3410
3411
static PyObject *
3412
bytes_iter(PyObject *seq)
3413
51
{
3414
51
    striterobject *it;
3415
3416
51
    if (!PyBytes_Check(seq)) {
3417
0
        PyErr_BadInternalCall();
3418
0
        return NULL;
3419
0
    }
3420
51
    it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3421
51
    if (it == NULL)
3422
0
        return NULL;
3423
51
    it->it_index = 0;
3424
51
    it->it_seq = (PyBytesObject *)Py_NewRef(seq);
3425
51
    _PyObject_GC_TRACK(it);
3426
51
    return (PyObject *)it;
3427
51
}
3428
3429
3430
/* _PyBytesWriter API */
3431
3432
#ifdef MS_WINDOWS
3433
   /* On Windows, overallocate by 50% is the best factor */
3434
#  define OVERALLOCATE_FACTOR 2
3435
#else
3436
   /* On Linux, overallocate by 25% is the best factor */
3437
0
#  define OVERALLOCATE_FACTOR 4
3438
#endif
3439
3440
void
3441
_PyBytesWriter_Init(_PyBytesWriter *writer)
3442
6.84M
{
3443
    /* Set all attributes before small_buffer to 0 */
3444
6.84M
    memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
3445
#ifndef NDEBUG
3446
    memset(writer->small_buffer, PYMEM_CLEANBYTE,
3447
           sizeof(writer->small_buffer));
3448
#endif
3449
6.84M
}
3450
3451
void
3452
_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3453
341k
{
3454
341k
    Py_CLEAR(writer->buffer);
3455
341k
}
3456
3457
Py_LOCAL_INLINE(char*)
3458
_PyBytesWriter_AsString(_PyBytesWriter *writer)
3459
6.91M
{
3460
6.91M
    if (writer->use_small_buffer) {
3461
6.51M
        assert(writer->buffer == NULL);
3462
6.51M
        return writer->small_buffer;
3463
6.51M
    }
3464
397k
    else if (writer->use_bytearray) {
3465
0
        assert(writer->buffer != NULL);
3466
0
        return PyByteArray_AS_STRING(writer->buffer);
3467
0
    }
3468
397k
    else {
3469
397k
        assert(writer->buffer != NULL);
3470
397k
        return PyBytes_AS_STRING(writer->buffer);
3471
397k
    }
3472
6.91M
}
3473
3474
Py_LOCAL_INLINE(Py_ssize_t)
3475
_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
3476
6.70M
{
3477
6.70M
    const char *start = _PyBytesWriter_AsString(writer);
3478
6.70M
    assert(str != NULL);
3479
6.70M
    assert(str >= start);
3480
6.70M
    assert(str - start <= writer->allocated);
3481
6.70M
    return str - start;
3482
6.70M
}
3483
3484
#ifndef NDEBUG
3485
Py_LOCAL_INLINE(int)
3486
_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3487
{
3488
    const char *start, *end;
3489
3490
    if (writer->use_small_buffer) {
3491
        assert(writer->buffer == NULL);
3492
    }
3493
    else {
3494
        assert(writer->buffer != NULL);
3495
        if (writer->use_bytearray)
3496
            assert(PyByteArray_CheckExact(writer->buffer));
3497
        else
3498
            assert(PyBytes_CheckExact(writer->buffer));
3499
        assert(Py_REFCNT(writer->buffer) == 1);
3500
    }
3501
3502
    if (writer->use_bytearray) {
3503
        /* bytearray has its own overallocation algorithm,
3504
           writer overallocation must be disabled */
3505
        assert(!writer->overallocate);
3506
    }
3507
3508
    assert(0 <= writer->allocated);
3509
    assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
3510
    /* the last byte must always be null */
3511
    start = _PyBytesWriter_AsString(writer);
3512
    assert(start[writer->allocated] == 0);
3513
3514
    end = start + writer->allocated;
3515
    assert(str != NULL);
3516
    assert(start <= str && str <= end);
3517
    return 1;
3518
}
3519
#endif
3520
3521
void*
3522
_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3523
207k
{
3524
207k
    Py_ssize_t allocated, pos;
3525
3526
207k
    assert(_PyBytesWriter_CheckConsistency(writer, str));
3527
207k
    assert(writer->allocated < size);
3528
3529
207k
    allocated = size;
3530
207k
    if (writer->overallocate
3531
207k
        && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3532
        /* overallocate to limit the number of realloc() */
3533
0
        allocated += allocated / OVERALLOCATE_FACTOR;
3534
0
    }
3535
3536
207k
    pos = _PyBytesWriter_GetSize(writer, str);
3537
207k
    if (!writer->use_small_buffer) {
3538
0
        if (writer->use_bytearray) {
3539
0
            if (PyByteArray_Resize(writer->buffer, allocated))
3540
0
                goto error;
3541
            /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3542
               but we cannot use ob_alloc because bytes may need to be moved
3543
               to use the whole buffer. bytearray uses an internal optimization
3544
               to avoid moving or copying bytes when bytes are removed at the
3545
               beginning (ex: del bytearray[:1]). */
3546
0
        }
3547
0
        else {
3548
0
            if (_PyBytes_Resize(&writer->buffer, allocated))
3549
0
                goto error;
3550
0
        }
3551
0
    }
3552
207k
    else {
3553
        /* convert from stack buffer to bytes object buffer */
3554
207k
        assert(writer->buffer == NULL);
3555
3556
207k
        if (writer->use_bytearray)
3557
0
            writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3558
207k
        else
3559
207k
            writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3560
207k
        if (writer->buffer == NULL)
3561
0
            goto error;
3562
3563
207k
        if (pos != 0) {
3564
0
            char *dest;
3565
0
            if (writer->use_bytearray)
3566
0
                dest = PyByteArray_AS_STRING(writer->buffer);
3567
0
            else
3568
0
                dest = PyBytes_AS_STRING(writer->buffer);
3569
0
            memcpy(dest,
3570
0
                      writer->small_buffer,
3571
0
                      pos);
3572
0
        }
3573
3574
207k
        writer->use_small_buffer = 0;
3575
#ifndef NDEBUG
3576
        memset(writer->small_buffer, PYMEM_CLEANBYTE,
3577
               sizeof(writer->small_buffer));
3578
#endif
3579
207k
    }
3580
207k
    writer->allocated = allocated;
3581
3582
207k
    str = _PyBytesWriter_AsString(writer) + pos;
3583
207k
    assert(_PyBytesWriter_CheckConsistency(writer, str));
3584
207k
    return str;
3585
3586
0
error:
3587
0
    _PyBytesWriter_Dealloc(writer);
3588
0
    return NULL;
3589
207k
}
3590
3591
void*
3592
_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3593
6.84M
{
3594
6.84M
    Py_ssize_t new_min_size;
3595
3596
6.84M
    assert(_PyBytesWriter_CheckConsistency(writer, str));
3597
6.84M
    assert(size >= 0);
3598
3599
6.84M
    if (size == 0) {
3600
        /* nothing to do */
3601
293
        return str;
3602
293
    }
3603
3604
6.84M
    if (writer->min_size > PY_SSIZE_T_MAX - size) {
3605
0
        PyErr_NoMemory();
3606
0
        _PyBytesWriter_Dealloc(writer);
3607
0
        return NULL;
3608
0
    }
3609
6.84M
    new_min_size = writer->min_size + size;
3610
3611
6.84M
    if (new_min_size > writer->allocated)
3612
207k
        str = _PyBytesWriter_Resize(writer, str, new_min_size);
3613
3614
6.84M
    writer->min_size = new_min_size;
3615
6.84M
    return str;
3616
6.84M
}
3617
3618
/* Allocate the buffer to write size bytes.
3619
   Return the pointer to the beginning of buffer data.
3620
   Raise an exception and return NULL on error. */
3621
void*
3622
_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3623
6.84M
{
3624
    /* ensure that _PyBytesWriter_Alloc() is only called once */
3625
6.84M
    assert(writer->min_size == 0 && writer->buffer == NULL);
3626
6.84M
    assert(size >= 0);
3627
3628
6.84M
    writer->use_small_buffer = 1;
3629
#ifndef NDEBUG
3630
    writer->allocated = sizeof(writer->small_buffer) - 1;
3631
    /* In debug mode, don't use the full small buffer because it is less
3632
       efficient than bytes and bytearray objects to detect buffer underflow
3633
       and buffer overflow. Use 10 bytes of the small buffer to test also
3634
       code using the smaller buffer in debug mode.
3635
3636
       Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3637
       in debug mode to also be able to detect stack overflow when running
3638
       tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3639
       if _Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3640
       stack overflow. */
3641
    writer->allocated = Py_MIN(writer->allocated, 10);
3642
    /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3643
       to detect buffer overflow */
3644
    writer->small_buffer[writer->allocated] = 0;
3645
#else
3646
6.84M
    writer->allocated = sizeof(writer->small_buffer);
3647
6.84M
#endif
3648
6.84M
    return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
3649
6.84M
}
3650
3651
PyObject *
3652
_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
3653
6.50M
{
3654
6.50M
    Py_ssize_t size;
3655
6.50M
    PyObject *result;
3656
3657
6.50M
    assert(_PyBytesWriter_CheckConsistency(writer, str));
3658
3659
6.50M
    size = _PyBytesWriter_GetSize(writer, str);
3660
6.50M
    if (size == 0 && !writer->use_bytearray) {
3661
1.43k
        Py_CLEAR(writer->buffer);
3662
        /* Get the empty byte string singleton */
3663
1.43k
        result = PyBytes_FromStringAndSize(NULL, 0);
3664
1.43k
    }
3665
6.50M
    else if (writer->use_small_buffer) {
3666
6.31M
        if (writer->use_bytearray) {
3667
0
            result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3668
0
        }
3669
6.31M
        else {
3670
6.31M
            result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3671
6.31M
        }
3672
6.31M
    }
3673
190k
    else {
3674
190k
        result = writer->buffer;
3675
190k
        writer->buffer = NULL;
3676
3677
190k
        if (size != writer->allocated) {
3678
190k
            if (writer->use_bytearray) {
3679
0
                if (PyByteArray_Resize(result, size)) {
3680
0
                    Py_DECREF(result);
3681
0
                    return NULL;
3682
0
                }
3683
0
            }
3684
190k
            else {
3685
190k
                if (_PyBytes_Resize(&result, size)) {
3686
0
                    assert(result == NULL);
3687
0
                    return NULL;
3688
0
                }
3689
190k
            }
3690
190k
        }
3691
190k
    }
3692
6.50M
    return result;
3693
6.50M
}
3694
3695
void*
3696
_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
3697
                          const void *bytes, Py_ssize_t size)
3698
0
{
3699
0
    char *str = (char *)ptr;
3700
3701
0
    str = _PyBytesWriter_Prepare(writer, str, size);
3702
0
    if (str == NULL)
3703
0
        return NULL;
3704
3705
0
    memcpy(str, bytes, size);
3706
0
    str += size;
3707
3708
0
    return str;
3709
0
}
3710
3711
3712
void
3713
_PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
3714
    const char* src, Py_ssize_t len_src)
3715
3.31k
{
3716
3.31k
    if (len_dest == 0) {
3717
0
        return;
3718
0
    }
3719
3.31k
    if (len_src == 1) {
3720
16
        memset(dest, src[0], len_dest);
3721
16
    }
3722
3.30k
    else {
3723
3.30k
        if (src != dest) {
3724
3.30k
            memcpy(dest, src, len_src);
3725
3.30k
        }
3726
3.30k
        Py_ssize_t copied = len_src;
3727
7.08k
        while (copied < len_dest) {
3728
3.78k
            Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied);
3729
3.78k
            memcpy(dest + copied, dest, bytes_to_copy);
3730
3.78k
            copied += bytes_to_copy;
3731
3.78k
        }
3732
3.30k
    }
3733
3.31k
}
3734