Coverage Report

Created: 2025-08-26 06:26

/src/cpython/Objects/bytesobject.c
Line
Count
Source (jump to first uncovered line)
1
/* bytes object implementation */
2
3
#include "Python.h"
4
#include "pycore_abstract.h"      // _PyIndex_Check()
5
#include "pycore_bytes_methods.h" // _Py_bytes_startswith()
6
#include "pycore_bytesobject.h"   // _PyBytes_Find(), _PyBytes_Repeat()
7
#include "pycore_call.h"          // _PyObject_CallNoArgs()
8
#include "pycore_ceval.h"         // _PyEval_GetBuiltin()
9
#include "pycore_format.h"        // F_LJUST
10
#include "pycore_global_objects.h"// _Py_GET_GLOBAL_OBJECT()
11
#include "pycore_initconfig.h"    // _PyStatus_OK()
12
#include "pycore_long.h"          // _PyLong_DigitValue
13
#include "pycore_object.h"        // _PyObject_GC_TRACK
14
#include "pycore_pymem.h"         // PYMEM_CLEANBYTE
15
#include "pycore_strhex.h"        // _Py_strhex_with_sep()
16
#include "pycore_unicodeobject.h" // _PyUnicode_FormatLong()
17
18
#include <stddef.h>
19
20
/*[clinic input]
21
class bytes "PyBytesObject *" "&PyBytes_Type"
22
[clinic start generated code]*/
23
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
24
25
#include "clinic/bytesobject.c.h"
26
27
/* PyBytesObject_SIZE gives the basic size of a bytes object; any memory allocation
28
   for a bytes object of length n should request PyBytesObject_SIZE + n bytes.
29
30
   Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
31
   3 or 7 bytes per bytes object allocation on a typical system.
32
*/
33
43.1M
#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
34
35
/* Forward declaration */
36
Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
37
                                                   char *str);
38
39
40
3.64M
#define CHARACTERS _Py_SINGLETON(bytes_characters)
41
#define CHARACTER(ch) \
42
3.64M
     ((PyBytesObject *)&(CHARACTERS[ch]));
43
5.90M
#define EMPTY (&_Py_SINGLETON(bytes_empty))
44
45
46
// Return a reference to the immortal empty bytes string singleton.
47
static inline PyObject* bytes_get_empty(void)
48
5.90M
{
49
5.90M
    PyObject *empty = &EMPTY->ob_base.ob_base;
50
5.90M
    assert(_Py_IsImmortal(empty));
51
5.90M
    return empty;
52
5.90M
}
53
54
55
static inline void
56
set_ob_shash(PyBytesObject *a, Py_hash_t hash)
57
21.8M
{
58
21.8M
_Py_COMP_DIAG_PUSH
59
21.8M
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
60
#ifdef Py_GIL_DISABLED
61
    _Py_atomic_store_ssize_relaxed(&a->ob_shash, hash);
62
#else
63
21.8M
    a->ob_shash = hash;
64
21.8M
#endif
65
21.8M
_Py_COMP_DIAG_POP
66
21.8M
}
67
68
static inline Py_hash_t
69
get_ob_shash(PyBytesObject *a)
70
4.27M
{
71
4.27M
_Py_COMP_DIAG_PUSH
72
4.27M
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
73
#ifdef Py_GIL_DISABLED
74
    return _Py_atomic_load_ssize_relaxed(&a->ob_shash);
75
#else
76
4.27M
    return a->ob_shash;
77
4.27M
#endif
78
4.27M
_Py_COMP_DIAG_POP
79
4.27M
}
80
81
82
/*
83
   For PyBytes_FromString(), the parameter 'str' points to a null-terminated
84
   string containing exactly 'size' bytes.
85
86
   For PyBytes_FromStringAndSize(), the parameter 'str' is
87
   either NULL or else points to a string containing at least 'size' bytes.
88
   For PyBytes_FromStringAndSize(), the string in the 'str' parameter does
89
   not have to be null-terminated.  (Therefore it is safe to construct a
90
   substring by calling 'PyBytes_FromStringAndSize(origstring, substrlen)'.)
91
   If 'str' is NULL then PyBytes_FromStringAndSize() will allocate 'size+1'
92
   bytes (setting the last byte to the null terminating character) and you can
93
   fill in the data yourself.  If 'str' is non-NULL then the resulting
94
   PyBytes object must be treated as immutable and you must not fill in nor
95
   alter the data yourself, since the strings may be shared.
96
97
   The PyObject member 'op->ob_size', which denotes the number of "extra
98
   items" in a variable-size object, will contain the number of bytes
99
   allocated for string data, not counting the null terminating character.
100
   It is therefore equal to the 'size' parameter (for
101
   PyBytes_FromStringAndSize()) or the length of the string in the 'str'
102
   parameter (for PyBytes_FromString()).
103
*/
104
static PyObject *
105
_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
106
21.4M
{
107
21.4M
    PyBytesObject *op;
108
21.4M
    assert(size >= 0);
109
110
21.4M
    if (size == 0) {
111
0
        return bytes_get_empty();
112
0
    }
113
114
21.4M
    if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
115
0
        PyErr_SetString(PyExc_OverflowError,
116
0
                        "byte string is too large");
117
0
        return NULL;
118
0
    }
119
120
    /* Inline PyObject_NewVar */
121
21.4M
    if (use_calloc)
122
0
        op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
123
21.4M
    else
124
21.4M
        op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
125
21.4M
    if (op == NULL) {
126
0
        return PyErr_NoMemory();
127
0
    }
128
21.4M
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
129
21.4M
    set_ob_shash(op, -1);
130
21.4M
    if (!use_calloc) {
131
21.4M
        op->ob_sval[size] = '\0';
132
21.4M
    }
133
21.4M
    return (PyObject *) op;
134
21.4M
}
135
136
PyObject *
137
PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
138
30.9M
{
139
30.9M
    PyBytesObject *op;
140
30.9M
    if (size < 0) {
141
0
        PyErr_SetString(PyExc_SystemError,
142
0
            "Negative size passed to PyBytes_FromStringAndSize");
143
0
        return NULL;
144
0
    }
145
30.9M
    if (size == 1 && str != NULL) {
146
3.64M
        op = CHARACTER(*str & 255);
147
3.64M
        assert(_Py_IsImmortal(op));
148
3.64M
        return (PyObject *)op;
149
3.64M
    }
150
27.3M
    if (size == 0) {
151
5.89M
        return bytes_get_empty();
152
5.89M
    }
153
154
21.4M
    op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
155
21.4M
    if (op == NULL)
156
0
        return NULL;
157
21.4M
    if (str == NULL)
158
7.69M
        return (PyObject *) op;
159
160
13.7M
    memcpy(op->ob_sval, str, size);
161
13.7M
    return (PyObject *) op;
162
21.4M
}
163
164
PyObject *
165
PyBytes_FromString(const char *str)
166
414
{
167
414
    size_t size;
168
414
    PyBytesObject *op;
169
170
414
    assert(str != NULL);
171
414
    size = strlen(str);
172
414
    if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
173
0
        PyErr_SetString(PyExc_OverflowError,
174
0
            "byte string is too long");
175
0
        return NULL;
176
0
    }
177
178
414
    if (size == 0) {
179
0
        return bytes_get_empty();
180
0
    }
181
414
    else if (size == 1) {
182
0
        op = CHARACTER(*str & 255);
183
0
        assert(_Py_IsImmortal(op));
184
0
        return (PyObject *)op;
185
0
    }
186
187
    /* Inline PyObject_NewVar */
188
414
    op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
189
414
    if (op == NULL) {
190
0
        return PyErr_NoMemory();
191
0
    }
192
414
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
193
414
    set_ob_shash(op, -1);
194
414
    memcpy(op->ob_sval, str, size+1);
195
414
    return (PyObject *) op;
196
414
}
197
198
PyObject *
199
PyBytes_FromFormatV(const char *format, va_list vargs)
200
0
{
201
0
    char *s;
202
0
    const char *f;
203
0
    const char *p;
204
0
    Py_ssize_t prec;
205
0
    int longflag;
206
0
    int size_tflag;
207
    /* Longest 64-bit formatted numbers:
208
       - "18446744073709551615\0" (21 bytes)
209
       - "-9223372036854775808\0" (21 bytes)
210
       Decimal takes the most space (it isn't enough for octal.)
211
212
       Longest 64-bit pointer representation:
213
       "0xffffffffffffffff\0" (19 bytes). */
214
0
    char buffer[21];
215
0
    _PyBytesWriter writer;
216
217
0
    _PyBytesWriter_Init(&writer);
218
219
0
    s = _PyBytesWriter_Alloc(&writer, strlen(format));
220
0
    if (s == NULL)
221
0
        return NULL;
222
0
    writer.overallocate = 1;
223
224
0
#define WRITE_BYTES(str) \
225
0
    do { \
226
0
        s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
227
0
        if (s == NULL) \
228
0
            goto error; \
229
0
    } while (0)
230
231
0
    for (f = format; *f; f++) {
232
0
        if (*f != '%') {
233
0
            *s++ = *f;
234
0
            continue;
235
0
        }
236
237
0
        p = f++;
238
239
        /* ignore the width (ex: 10 in "%10s") */
240
0
        while (Py_ISDIGIT(*f))
241
0
            f++;
242
243
        /* parse the precision (ex: 10 in "%.10s") */
244
0
        prec = 0;
245
0
        if (*f == '.') {
246
0
            f++;
247
0
            for (; Py_ISDIGIT(*f); f++) {
248
0
                prec = (prec * 10) + (*f - '0');
249
0
            }
250
0
        }
251
252
0
        while (*f && *f != '%' && !Py_ISALPHA(*f))
253
0
            f++;
254
255
        /* handle the long flag ('l'), but only for %ld and %lu.
256
           others can be added when necessary. */
257
0
        longflag = 0;
258
0
        if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
259
0
            longflag = 1;
260
0
            ++f;
261
0
        }
262
263
        /* handle the size_t flag ('z'). */
264
0
        size_tflag = 0;
265
0
        if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
266
0
            size_tflag = 1;
267
0
            ++f;
268
0
        }
269
270
        /* subtract bytes preallocated for the format string
271
           (ex: 2 for "%s") */
272
0
        writer.min_size -= (f - p + 1);
273
274
0
        switch (*f) {
275
0
        case 'c':
276
0
        {
277
0
            int c = va_arg(vargs, int);
278
0
            if (c < 0 || c > 255) {
279
0
                PyErr_SetString(PyExc_OverflowError,
280
0
                                "PyBytes_FromFormatV(): %c format "
281
0
                                "expects an integer in range [0; 255]");
282
0
                goto error;
283
0
            }
284
0
            writer.min_size++;
285
0
            *s++ = (unsigned char)c;
286
0
            break;
287
0
        }
288
289
0
        case 'd':
290
0
            if (longflag) {
291
0
                sprintf(buffer, "%ld", va_arg(vargs, long));
292
0
            }
293
0
            else if (size_tflag) {
294
0
                sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
295
0
            }
296
0
            else {
297
0
                sprintf(buffer, "%d", va_arg(vargs, int));
298
0
            }
299
0
            assert(strlen(buffer) < sizeof(buffer));
300
0
            WRITE_BYTES(buffer);
301
0
            break;
302
303
0
        case 'u':
304
0
            if (longflag) {
305
0
                sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
306
0
            }
307
0
            else if (size_tflag) {
308
0
                sprintf(buffer, "%zu", va_arg(vargs, size_t));
309
0
            }
310
0
            else {
311
0
                sprintf(buffer, "%u", va_arg(vargs, unsigned int));
312
0
            }
313
0
            assert(strlen(buffer) < sizeof(buffer));
314
0
            WRITE_BYTES(buffer);
315
0
            break;
316
317
0
        case 'i':
318
0
            sprintf(buffer, "%i", va_arg(vargs, int));
319
0
            assert(strlen(buffer) < sizeof(buffer));
320
0
            WRITE_BYTES(buffer);
321
0
            break;
322
323
0
        case 'x':
324
0
            sprintf(buffer, "%x", va_arg(vargs, int));
325
0
            assert(strlen(buffer) < sizeof(buffer));
326
0
            WRITE_BYTES(buffer);
327
0
            break;
328
329
0
        case 's':
330
0
        {
331
0
            Py_ssize_t i;
332
333
0
            p = va_arg(vargs, const char*);
334
0
            if (prec <= 0) {
335
0
                i = strlen(p);
336
0
            }
337
0
            else {
338
0
                i = 0;
339
0
                while (i < prec && p[i]) {
340
0
                    i++;
341
0
                }
342
0
            }
343
0
            s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
344
0
            if (s == NULL)
345
0
                goto error;
346
0
            break;
347
0
        }
348
349
0
        case 'p':
350
0
            sprintf(buffer, "%p", va_arg(vargs, void*));
351
0
            assert(strlen(buffer) < sizeof(buffer));
352
            /* %p is ill-defined:  ensure leading 0x. */
353
0
            if (buffer[1] == 'X')
354
0
                buffer[1] = 'x';
355
0
            else if (buffer[1] != 'x') {
356
0
                memmove(buffer+2, buffer, strlen(buffer)+1);
357
0
                buffer[0] = '0';
358
0
                buffer[1] = 'x';
359
0
            }
360
0
            WRITE_BYTES(buffer);
361
0
            break;
362
363
0
        case '%':
364
0
            writer.min_size++;
365
0
            *s++ = '%';
366
0
            break;
367
368
0
        default:
369
0
            if (*f == 0) {
370
                /* fix min_size if we reached the end of the format string */
371
0
                writer.min_size++;
372
0
            }
373
374
            /* invalid format string: copy unformatted string and exit */
375
0
            WRITE_BYTES(p);
376
0
            return _PyBytesWriter_Finish(&writer, s);
377
0
        }
378
0
    }
379
380
0
#undef WRITE_BYTES
381
382
0
    return _PyBytesWriter_Finish(&writer, s);
383
384
0
 error:
385
0
    _PyBytesWriter_Dealloc(&writer);
386
0
    return NULL;
387
0
}
388
389
PyObject *
390
PyBytes_FromFormat(const char *format, ...)
391
0
{
392
0
    PyObject* ret;
393
0
    va_list vargs;
394
395
0
    va_start(vargs, format);
396
0
    ret = PyBytes_FromFormatV(format, vargs);
397
0
    va_end(vargs);
398
0
    return ret;
399
0
}
400
401
/* Helpers for formatstring */
402
403
Py_LOCAL_INLINE(PyObject *)
404
getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
405
0
{
406
0
    Py_ssize_t argidx = *p_argidx;
407
0
    if (argidx < arglen) {
408
0
        (*p_argidx)++;
409
0
        if (arglen < 0)
410
0
            return args;
411
0
        else
412
0
            return PyTuple_GetItem(args, argidx);
413
0
    }
414
0
    PyErr_SetString(PyExc_TypeError,
415
0
                    "not enough arguments for format string");
416
0
    return NULL;
417
0
}
418
419
/* Returns a new reference to a PyBytes object, or NULL on failure. */
420
421
static char*
422
formatfloat(PyObject *v, int flags, int prec, int type,
423
            PyObject **p_result, _PyBytesWriter *writer, char *str)
424
0
{
425
0
    char *p;
426
0
    PyObject *result;
427
0
    double x;
428
0
    size_t len;
429
0
    int dtoa_flags = 0;
430
431
0
    x = PyFloat_AsDouble(v);
432
0
    if (x == -1.0 && PyErr_Occurred()) {
433
0
        PyErr_Format(PyExc_TypeError, "float argument required, "
434
0
                     "not %.200s", Py_TYPE(v)->tp_name);
435
0
        return NULL;
436
0
    }
437
438
0
    if (prec < 0)
439
0
        prec = 6;
440
441
0
    if (flags & F_ALT) {
442
0
        dtoa_flags |= Py_DTSF_ALT;
443
0
    }
444
0
    p = PyOS_double_to_string(x, type, prec, dtoa_flags, NULL);
445
446
0
    if (p == NULL)
447
0
        return NULL;
448
449
0
    len = strlen(p);
450
0
    if (writer != NULL) {
451
0
        str = _PyBytesWriter_Prepare(writer, str, len);
452
0
        if (str == NULL) {
453
0
            PyMem_Free(p);
454
0
            return NULL;
455
0
        }
456
0
        memcpy(str, p, len);
457
0
        PyMem_Free(p);
458
0
        str += len;
459
0
        return str;
460
0
    }
461
462
0
    result = PyBytes_FromStringAndSize(p, len);
463
0
    PyMem_Free(p);
464
0
    *p_result = result;
465
0
    return result != NULL ? str : NULL;
466
0
}
467
468
static PyObject *
469
formatlong(PyObject *v, int flags, int prec, int type)
470
0
{
471
0
    PyObject *result, *iobj;
472
0
    if (PyLong_Check(v))
473
0
        return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
474
0
    if (PyNumber_Check(v)) {
475
        /* make sure number is a type of integer for o, x, and X */
476
0
        if (type == 'o' || type == 'x' || type == 'X')
477
0
            iobj = _PyNumber_Index(v);
478
0
        else
479
0
            iobj = PyNumber_Long(v);
480
0
        if (iobj != NULL) {
481
0
            assert(PyLong_Check(iobj));
482
0
            result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
483
0
            Py_DECREF(iobj);
484
0
            return result;
485
0
        }
486
0
        if (!PyErr_ExceptionMatches(PyExc_TypeError))
487
0
            return NULL;
488
0
    }
489
0
    PyErr_Format(PyExc_TypeError,
490
0
        "%%%c format: %s is required, not %.200s", type,
491
0
        (type == 'o' || type == 'x' || type == 'X') ? "an integer"
492
0
                                                    : "a real number",
493
0
        Py_TYPE(v)->tp_name);
494
0
    return NULL;
495
0
}
496
497
static int
498
byte_converter(PyObject *arg, char *p)
499
0
{
500
0
    if (PyBytes_Check(arg)) {
501
0
        if (PyBytes_GET_SIZE(arg) != 1) {
502
0
            PyErr_Format(PyExc_TypeError,
503
0
                         "%%c requires an integer in range(256) or "
504
0
                         "a single byte, not a bytes object of length %zd",
505
0
                         PyBytes_GET_SIZE(arg));
506
0
            return 0;
507
0
        }
508
0
        *p = PyBytes_AS_STRING(arg)[0];
509
0
        return 1;
510
0
    }
511
0
    else if (PyByteArray_Check(arg)) {
512
0
        if (PyByteArray_GET_SIZE(arg) != 1) {
513
0
            PyErr_Format(PyExc_TypeError,
514
0
                         "%%c requires an integer in range(256) or "
515
0
                         "a single byte, not a bytearray object of length %zd",
516
0
                         PyByteArray_GET_SIZE(arg));
517
0
            return 0;
518
0
        }
519
0
        *p = PyByteArray_AS_STRING(arg)[0];
520
0
        return 1;
521
0
    }
522
0
    else if (PyIndex_Check(arg)) {
523
0
        int overflow;
524
0
        long ival = PyLong_AsLongAndOverflow(arg, &overflow);
525
0
        if (ival == -1 && PyErr_Occurred()) {
526
0
            return 0;
527
0
        }
528
0
        if (!(0 <= ival && ival <= 255)) {
529
            /* this includes an overflow in converting to C long */
530
0
            PyErr_SetString(PyExc_OverflowError,
531
0
                            "%c arg not in range(256)");
532
0
            return 0;
533
0
        }
534
0
        *p = (char)ival;
535
0
        return 1;
536
0
    }
537
0
    PyErr_Format(PyExc_TypeError,
538
0
        "%%c requires an integer in range(256) or a single byte, not %T",
539
0
        arg);
540
0
    return 0;
541
0
}
542
543
static PyObject *_PyBytes_FromBuffer(PyObject *x);
544
545
static PyObject *
546
format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
547
0
{
548
0
    PyObject *func, *result;
549
    /* is it a bytes object? */
550
0
    if (PyBytes_Check(v)) {
551
0
        *pbuf = PyBytes_AS_STRING(v);
552
0
        *plen = PyBytes_GET_SIZE(v);
553
0
        return Py_NewRef(v);
554
0
    }
555
0
    if (PyByteArray_Check(v)) {
556
0
        *pbuf = PyByteArray_AS_STRING(v);
557
0
        *plen = PyByteArray_GET_SIZE(v);
558
0
        return Py_NewRef(v);
559
0
    }
560
    /* does it support __bytes__? */
561
0
    func = _PyObject_LookupSpecial(v, &_Py_ID(__bytes__));
562
0
    if (func != NULL) {
563
0
        result = _PyObject_CallNoArgs(func);
564
0
        Py_DECREF(func);
565
0
        if (result == NULL)
566
0
            return NULL;
567
0
        if (!PyBytes_Check(result)) {
568
0
            PyErr_Format(PyExc_TypeError,
569
0
                         "%T.__bytes__() must return a bytes, not %T",
570
0
                         v, result);
571
0
            Py_DECREF(result);
572
0
            return NULL;
573
0
        }
574
0
        *pbuf = PyBytes_AS_STRING(result);
575
0
        *plen = PyBytes_GET_SIZE(result);
576
0
        return result;
577
0
    }
578
    /* does it support buffer protocol? */
579
0
    if (PyObject_CheckBuffer(v)) {
580
        /* maybe we can avoid making a copy of the buffer object here? */
581
0
        result = _PyBytes_FromBuffer(v);
582
0
        if (result == NULL)
583
0
            return NULL;
584
0
        *pbuf = PyBytes_AS_STRING(result);
585
0
        *plen = PyBytes_GET_SIZE(result);
586
0
        return result;
587
0
    }
588
0
    PyErr_Format(PyExc_TypeError,
589
0
                 "%%b requires a bytes-like object, "
590
0
                 "or an object that implements __bytes__, not '%.100s'",
591
0
                 Py_TYPE(v)->tp_name);
592
0
    return NULL;
593
0
}
594
595
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
596
597
PyObject *
598
_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
599
                  PyObject *args, int use_bytearray)
600
0
{
601
0
    const char *fmt;
602
0
    char *res;
603
0
    Py_ssize_t arglen, argidx;
604
0
    Py_ssize_t fmtcnt;
605
0
    int args_owned = 0;
606
0
    PyObject *dict = NULL;
607
0
    _PyBytesWriter writer;
608
609
0
    if (args == NULL) {
610
0
        PyErr_BadInternalCall();
611
0
        return NULL;
612
0
    }
613
0
    fmt = format;
614
0
    fmtcnt = format_len;
615
616
0
    _PyBytesWriter_Init(&writer);
617
0
    writer.use_bytearray = use_bytearray;
618
619
0
    res = _PyBytesWriter_Alloc(&writer, fmtcnt);
620
0
    if (res == NULL)
621
0
        return NULL;
622
0
    if (!use_bytearray)
623
0
        writer.overallocate = 1;
624
625
0
    if (PyTuple_Check(args)) {
626
0
        arglen = PyTuple_GET_SIZE(args);
627
0
        argidx = 0;
628
0
    }
629
0
    else {
630
0
        arglen = -1;
631
0
        argidx = -2;
632
0
    }
633
0
    if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
634
0
        !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
635
0
        !PyByteArray_Check(args)) {
636
0
            dict = args;
637
0
    }
638
639
0
    while (--fmtcnt >= 0) {
640
0
        if (*fmt != '%') {
641
0
            Py_ssize_t len;
642
0
            char *pos;
643
644
0
            pos = (char *)memchr(fmt + 1, '%', fmtcnt);
645
0
            if (pos != NULL)
646
0
                len = pos - fmt;
647
0
            else
648
0
                len = fmtcnt + 1;
649
0
            assert(len != 0);
650
651
0
            memcpy(res, fmt, len);
652
0
            res += len;
653
0
            fmt += len;
654
0
            fmtcnt -= (len - 1);
655
0
        }
656
0
        else {
657
            /* Got a format specifier */
658
0
            int flags = 0;
659
0
            Py_ssize_t width = -1;
660
0
            int prec = -1;
661
0
            int c = '\0';
662
0
            int fill;
663
0
            PyObject *v = NULL;
664
0
            PyObject *temp = NULL;
665
0
            const char *pbuf = NULL;
666
0
            int sign;
667
0
            Py_ssize_t len = 0;
668
0
            char onechar; /* For byte_converter() */
669
0
            Py_ssize_t alloc;
670
671
0
            fmt++;
672
0
            if (*fmt == '%') {
673
0
                *res++ = '%';
674
0
                fmt++;
675
0
                fmtcnt--;
676
0
                continue;
677
0
            }
678
0
            if (*fmt == '(') {
679
0
                const char *keystart;
680
0
                Py_ssize_t keylen;
681
0
                PyObject *key;
682
0
                int pcount = 1;
683
684
0
                if (dict == NULL) {
685
0
                    PyErr_SetString(PyExc_TypeError,
686
0
                             "format requires a mapping");
687
0
                    goto error;
688
0
                }
689
0
                ++fmt;
690
0
                --fmtcnt;
691
0
                keystart = fmt;
692
                /* Skip over balanced parentheses */
693
0
                while (pcount > 0 && --fmtcnt >= 0) {
694
0
                    if (*fmt == ')')
695
0
                        --pcount;
696
0
                    else if (*fmt == '(')
697
0
                        ++pcount;
698
0
                    fmt++;
699
0
                }
700
0
                keylen = fmt - keystart - 1;
701
0
                if (fmtcnt < 0 || pcount > 0) {
702
0
                    PyErr_SetString(PyExc_ValueError,
703
0
                               "incomplete format key");
704
0
                    goto error;
705
0
                }
706
0
                key = PyBytes_FromStringAndSize(keystart,
707
0
                                                 keylen);
708
0
                if (key == NULL)
709
0
                    goto error;
710
0
                if (args_owned) {
711
0
                    Py_DECREF(args);
712
0
                    args_owned = 0;
713
0
                }
714
0
                args = PyObject_GetItem(dict, key);
715
0
                Py_DECREF(key);
716
0
                if (args == NULL) {
717
0
                    goto error;
718
0
                }
719
0
                args_owned = 1;
720
0
                arglen = -1;
721
0
                argidx = -2;
722
0
            }
723
724
            /* Parse flags. Example: "%+i" => flags=F_SIGN. */
725
0
            while (--fmtcnt >= 0) {
726
0
                switch (c = *fmt++) {
727
0
                case '-': flags |= F_LJUST; continue;
728
0
                case '+': flags |= F_SIGN; continue;
729
0
                case ' ': flags |= F_BLANK; continue;
730
0
                case '#': flags |= F_ALT; continue;
731
0
                case '0': flags |= F_ZERO; continue;
732
0
                }
733
0
                break;
734
0
            }
735
736
            /* Parse width. Example: "%10s" => width=10 */
737
0
            if (c == '*') {
738
0
                v = getnextarg(args, arglen, &argidx);
739
0
                if (v == NULL)
740
0
                    goto error;
741
0
                if (!PyLong_Check(v)) {
742
0
                    PyErr_SetString(PyExc_TypeError,
743
0
                                    "* wants int");
744
0
                    goto error;
745
0
                }
746
0
                width = PyLong_AsSsize_t(v);
747
0
                if (width == -1 && PyErr_Occurred())
748
0
                    goto error;
749
0
                if (width < 0) {
750
0
                    flags |= F_LJUST;
751
0
                    width = -width;
752
0
                }
753
0
                if (--fmtcnt >= 0)
754
0
                    c = *fmt++;
755
0
            }
756
0
            else if (c >= 0 && Py_ISDIGIT(c)) {
757
0
                width = c - '0';
758
0
                while (--fmtcnt >= 0) {
759
0
                    c = Py_CHARMASK(*fmt++);
760
0
                    if (!Py_ISDIGIT(c))
761
0
                        break;
762
0
                    if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
763
0
                        PyErr_SetString(
764
0
                            PyExc_ValueError,
765
0
                            "width too big");
766
0
                        goto error;
767
0
                    }
768
0
                    width = width*10 + (c - '0');
769
0
                }
770
0
            }
771
772
            /* Parse precision. Example: "%.3f" => prec=3 */
773
0
            if (c == '.') {
774
0
                prec = 0;
775
0
                if (--fmtcnt >= 0)
776
0
                    c = *fmt++;
777
0
                if (c == '*') {
778
0
                    v = getnextarg(args, arglen, &argidx);
779
0
                    if (v == NULL)
780
0
                        goto error;
781
0
                    if (!PyLong_Check(v)) {
782
0
                        PyErr_SetString(
783
0
                            PyExc_TypeError,
784
0
                            "* wants int");
785
0
                        goto error;
786
0
                    }
787
0
                    prec = PyLong_AsInt(v);
788
0
                    if (prec == -1 && PyErr_Occurred())
789
0
                        goto error;
790
0
                    if (prec < 0)
791
0
                        prec = 0;
792
0
                    if (--fmtcnt >= 0)
793
0
                        c = *fmt++;
794
0
                }
795
0
                else if (c >= 0 && Py_ISDIGIT(c)) {
796
0
                    prec = c - '0';
797
0
                    while (--fmtcnt >= 0) {
798
0
                        c = Py_CHARMASK(*fmt++);
799
0
                        if (!Py_ISDIGIT(c))
800
0
                            break;
801
0
                        if (prec > (INT_MAX - ((int)c - '0')) / 10) {
802
0
                            PyErr_SetString(
803
0
                                PyExc_ValueError,
804
0
                                "prec too big");
805
0
                            goto error;
806
0
                        }
807
0
                        prec = prec*10 + (c - '0');
808
0
                    }
809
0
                }
810
0
            } /* prec */
811
0
            if (fmtcnt >= 0) {
812
0
                if (c == 'h' || c == 'l' || c == 'L') {
813
0
                    if (--fmtcnt >= 0)
814
0
                        c = *fmt++;
815
0
                }
816
0
            }
817
0
            if (fmtcnt < 0) {
818
0
                PyErr_SetString(PyExc_ValueError,
819
0
                                "incomplete format");
820
0
                goto error;
821
0
            }
822
0
            v = getnextarg(args, arglen, &argidx);
823
0
            if (v == NULL)
824
0
                goto error;
825
826
0
            if (fmtcnt == 0) {
827
                /* last write: disable writer overallocation */
828
0
                writer.overallocate = 0;
829
0
            }
830
831
0
            sign = 0;
832
0
            fill = ' ';
833
0
            switch (c) {
834
0
            case 'r':
835
                // %r is only for 2/3 code; 3 only code should use %a
836
0
            case 'a':
837
0
                temp = PyObject_ASCII(v);
838
0
                if (temp == NULL)
839
0
                    goto error;
840
0
                assert(PyUnicode_IS_ASCII(temp));
841
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
842
0
                len = PyUnicode_GET_LENGTH(temp);
843
0
                if (prec >= 0 && len > prec)
844
0
                    len = prec;
845
0
                break;
846
847
0
            case 's':
848
                // %s is only for 2/3 code; 3 only code should use %b
849
0
            case 'b':
850
0
                temp = format_obj(v, &pbuf, &len);
851
0
                if (temp == NULL)
852
0
                    goto error;
853
0
                if (prec >= 0 && len > prec)
854
0
                    len = prec;
855
0
                break;
856
857
0
            case 'i':
858
0
            case 'd':
859
0
            case 'u':
860
0
            case 'o':
861
0
            case 'x':
862
0
            case 'X':
863
0
                if (PyLong_CheckExact(v)
864
0
                    && width == -1 && prec == -1
865
0
                    && !(flags & (F_SIGN | F_BLANK))
866
0
                    && c != 'X')
867
0
                {
868
                    /* Fast path */
869
0
                    int alternate = flags & F_ALT;
870
0
                    int base;
871
872
0
                    switch(c)
873
0
                    {
874
0
                        default:
875
0
                            Py_UNREACHABLE();
876
0
                        case 'd':
877
0
                        case 'i':
878
0
                        case 'u':
879
0
                            base = 10;
880
0
                            break;
881
0
                        case 'o':
882
0
                            base = 8;
883
0
                            break;
884
0
                        case 'x':
885
0
                        case 'X':
886
0
                            base = 16;
887
0
                            break;
888
0
                    }
889
890
                    /* Fast path */
891
0
                    writer.min_size -= 2; /* size preallocated for "%d" */
892
0
                    res = _PyLong_FormatBytesWriter(&writer, res,
893
0
                                                    v, base, alternate);
894
0
                    if (res == NULL)
895
0
                        goto error;
896
0
                    continue;
897
0
                }
898
899
0
                temp = formatlong(v, flags, prec, c);
900
0
                if (!temp)
901
0
                    goto error;
902
0
                assert(PyUnicode_IS_ASCII(temp));
903
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
904
0
                len = PyUnicode_GET_LENGTH(temp);
905
0
                sign = 1;
906
0
                if (flags & F_ZERO)
907
0
                    fill = '0';
908
0
                break;
909
910
0
            case 'e':
911
0
            case 'E':
912
0
            case 'f':
913
0
            case 'F':
914
0
            case 'g':
915
0
            case 'G':
916
0
                if (width == -1 && prec == -1
917
0
                    && !(flags & (F_SIGN | F_BLANK)))
918
0
                {
919
                    /* Fast path */
920
0
                    writer.min_size -= 2; /* size preallocated for "%f" */
921
0
                    res = formatfloat(v, flags, prec, c, NULL, &writer, res);
922
0
                    if (res == NULL)
923
0
                        goto error;
924
0
                    continue;
925
0
                }
926
927
0
                if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
928
0
                    goto error;
929
0
                pbuf = PyBytes_AS_STRING(temp);
930
0
                len = PyBytes_GET_SIZE(temp);
931
0
                sign = 1;
932
0
                if (flags & F_ZERO)
933
0
                    fill = '0';
934
0
                break;
935
936
0
            case 'c':
937
0
                pbuf = &onechar;
938
0
                len = byte_converter(v, &onechar);
939
0
                if (!len)
940
0
                    goto error;
941
0
                if (width == -1) {
942
                    /* Fast path */
943
0
                    *res++ = onechar;
944
0
                    continue;
945
0
                }
946
0
                break;
947
948
0
            default:
949
0
                PyErr_Format(PyExc_ValueError,
950
0
                  "unsupported format character '%c' (0x%x) "
951
0
                  "at index %zd",
952
0
                  c, c,
953
0
                  (Py_ssize_t)(fmt - 1 - format));
954
0
                goto error;
955
0
            }
956
957
0
            if (sign) {
958
0
                if (*pbuf == '-' || *pbuf == '+') {
959
0
                    sign = *pbuf++;
960
0
                    len--;
961
0
                }
962
0
                else if (flags & F_SIGN)
963
0
                    sign = '+';
964
0
                else if (flags & F_BLANK)
965
0
                    sign = ' ';
966
0
                else
967
0
                    sign = 0;
968
0
            }
969
0
            if (width < len)
970
0
                width = len;
971
972
0
            alloc = width;
973
0
            if (sign != 0 && len == width)
974
0
                alloc++;
975
            /* 2: size preallocated for %s */
976
0
            if (alloc > 2) {
977
0
                res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
978
0
                if (res == NULL)
979
0
                    goto error;
980
0
            }
981
#ifndef NDEBUG
982
            char *before = res;
983
#endif
984
985
            /* Write the sign if needed */
986
0
            if (sign) {
987
0
                if (fill != ' ')
988
0
                    *res++ = sign;
989
0
                if (width > len)
990
0
                    width--;
991
0
            }
992
993
            /* Write the numeric prefix for "x", "X" and "o" formats
994
               if the alternate form is used.
995
               For example, write "0x" for the "%#x" format. */
996
0
            if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
997
0
                assert(pbuf[0] == '0');
998
0
                assert(pbuf[1] == c);
999
0
                if (fill != ' ') {
1000
0
                    *res++ = *pbuf++;
1001
0
                    *res++ = *pbuf++;
1002
0
                }
1003
0
                width -= 2;
1004
0
                if (width < 0)
1005
0
                    width = 0;
1006
0
                len -= 2;
1007
0
            }
1008
1009
            /* Pad left with the fill character if needed */
1010
0
            if (width > len && !(flags & F_LJUST)) {
1011
0
                memset(res, fill, width - len);
1012
0
                res += (width - len);
1013
0
                width = len;
1014
0
            }
1015
1016
            /* If padding with spaces: write sign if needed and/or numeric
1017
               prefix if the alternate form is used */
1018
0
            if (fill == ' ') {
1019
0
                if (sign)
1020
0
                    *res++ = sign;
1021
0
                if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1022
0
                    assert(pbuf[0] == '0');
1023
0
                    assert(pbuf[1] == c);
1024
0
                    *res++ = *pbuf++;
1025
0
                    *res++ = *pbuf++;
1026
0
                }
1027
0
            }
1028
1029
            /* Copy bytes */
1030
0
            memcpy(res, pbuf, len);
1031
0
            res += len;
1032
1033
            /* Pad right with the fill character if needed */
1034
0
            if (width > len) {
1035
0
                memset(res, ' ', width - len);
1036
0
                res += (width - len);
1037
0
            }
1038
1039
0
            if (dict && (argidx < arglen)) {
1040
0
                PyErr_SetString(PyExc_TypeError,
1041
0
                           "not all arguments converted during bytes formatting");
1042
0
                Py_XDECREF(temp);
1043
0
                goto error;
1044
0
            }
1045
0
            Py_XDECREF(temp);
1046
1047
#ifndef NDEBUG
1048
            /* check that we computed the exact size for this write */
1049
            assert((res - before) == alloc);
1050
#endif
1051
0
        } /* '%' */
1052
1053
        /* If overallocation was disabled, ensure that it was the last
1054
           write. Otherwise, we missed an optimization */
1055
0
        assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
1056
0
    } /* until end */
1057
1058
0
    if (argidx < arglen && !dict) {
1059
0
        PyErr_SetString(PyExc_TypeError,
1060
0
                        "not all arguments converted during bytes formatting");
1061
0
        goto error;
1062
0
    }
1063
1064
0
    if (args_owned) {
1065
0
        Py_DECREF(args);
1066
0
    }
1067
0
    return _PyBytesWriter_Finish(&writer, res);
1068
1069
0
 error:
1070
0
    _PyBytesWriter_Dealloc(&writer);
1071
0
    if (args_owned) {
1072
0
        Py_DECREF(args);
1073
0
    }
1074
0
    return NULL;
1075
0
}
1076
1077
/* Unescape a backslash-escaped string. */
1078
PyObject *_PyBytes_DecodeEscape2(const char *s,
1079
                                Py_ssize_t len,
1080
                                const char *errors,
1081
                                int *first_invalid_escape_char,
1082
                                const char **first_invalid_escape_ptr)
1083
3.07k
{
1084
3.07k
    int c;
1085
3.07k
    char *p;
1086
3.07k
    const char *end;
1087
3.07k
    _PyBytesWriter writer;
1088
1089
3.07k
    _PyBytesWriter_Init(&writer);
1090
1091
3.07k
    p = _PyBytesWriter_Alloc(&writer, len);
1092
3.07k
    if (p == NULL)
1093
0
        return NULL;
1094
3.07k
    writer.overallocate = 1;
1095
1096
3.07k
    *first_invalid_escape_char = -1;
1097
3.07k
    *first_invalid_escape_ptr = NULL;
1098
1099
3.07k
    end = s + len;
1100
71.4k
    while (s < end) {
1101
68.3k
        if (*s != '\\') {
1102
55.1k
            *p++ = *s++;
1103
55.1k
            continue;
1104
55.1k
        }
1105
1106
13.2k
        s++;
1107
13.2k
        if (s == end) {
1108
0
            PyErr_SetString(PyExc_ValueError,
1109
0
                            "Trailing \\ in string");
1110
0
            goto failed;
1111
0
        }
1112
1113
13.2k
        switch (*s++) {
1114
        /* XXX This assumes ASCII! */
1115
2.34k
        case '\n': break;
1116
916
        case '\\': *p++ = '\\'; break;
1117
233
        case '\'': *p++ = '\''; break;
1118
920
        case '\"': *p++ = '\"'; break;
1119
222
        case 'b': *p++ = '\b'; break;
1120
324
        case 'f': *p++ = '\014'; break; /* FF */
1121
225
        case 't': *p++ = '\t'; break;
1122
328
        case 'n': *p++ = '\n'; break;
1123
1.39k
        case 'r': *p++ = '\r'; break;
1124
248
        case 'v': *p++ = '\013'; break; /* VT */
1125
204
        case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1126
2.25k
        case '0': case '1': case '2': case '3':
1127
4.15k
        case '4': case '5': case '6': case '7':
1128
4.15k
            c = s[-1] - '0';
1129
4.15k
            if (s < end && '0' <= *s && *s <= '7') {
1130
2.10k
                c = (c<<3) + *s++ - '0';
1131
2.10k
                if (s < end && '0' <= *s && *s <= '7')
1132
1.10k
                    c = (c<<3) + *s++ - '0';
1133
2.10k
            }
1134
4.15k
            if (c > 0377) {
1135
597
                if (*first_invalid_escape_char == -1) {
1136
149
                    *first_invalid_escape_char = c;
1137
                    /* Back up 3 chars, since we've already incremented s. */
1138
149
                    *first_invalid_escape_ptr = s - 3;
1139
149
                }
1140
597
            }
1141
4.15k
            *p++ = c;
1142
4.15k
            break;
1143
310
        case 'x':
1144
310
            if (s+1 < end) {
1145
309
                int digit1, digit2;
1146
309
                digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1147
309
                digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1148
309
                if (digit1 < 16 && digit2 < 16) {
1149
306
                    *p++ = (unsigned char)((digit1 << 4) + digit2);
1150
306
                    s += 2;
1151
306
                    break;
1152
306
                }
1153
309
            }
1154
            /* invalid hexadecimal digits */
1155
1156
4
            if (!errors || strcmp(errors, "strict") == 0) {
1157
4
                PyErr_Format(PyExc_ValueError,
1158
4
                             "invalid \\x escape at position %zd",
1159
4
                             s - 2 - (end - len));
1160
4
                goto failed;
1161
4
            }
1162
0
            if (strcmp(errors, "replace") == 0) {
1163
0
                *p++ = '?';
1164
0
            } else if (strcmp(errors, "ignore") == 0)
1165
0
                /* do nothing */;
1166
0
            else {
1167
0
                PyErr_Format(PyExc_ValueError,
1168
0
                             "decoding error; unknown "
1169
0
                             "error handling code: %.400s",
1170
0
                             errors);
1171
0
                goto failed;
1172
0
            }
1173
            /* skip \x */
1174
0
            if (s < end && Py_ISXDIGIT(s[0]))
1175
0
                s++; /* and a hexdigit */
1176
0
            break;
1177
1178
1.40k
        default:
1179
1.40k
            if (*first_invalid_escape_char == -1) {
1180
724
                *first_invalid_escape_char = (unsigned char)s[-1];
1181
                /* Back up one char, since we've already incremented s. */
1182
724
                *first_invalid_escape_ptr = s - 1;
1183
724
            }
1184
1.40k
            *p++ = '\\';
1185
1.40k
            s--;
1186
13.2k
        }
1187
13.2k
    }
1188
1189
3.07k
    return _PyBytesWriter_Finish(&writer, p);
1190
1191
4
  failed:
1192
4
    _PyBytesWriter_Dealloc(&writer);
1193
4
    return NULL;
1194
3.07k
}
1195
1196
PyObject *PyBytes_DecodeEscape(const char *s,
1197
                                Py_ssize_t len,
1198
                                const char *errors,
1199
                                Py_ssize_t Py_UNUSED(unicode),
1200
                                const char *Py_UNUSED(recode_encoding))
1201
0
{
1202
0
    int first_invalid_escape_char;
1203
0
    const char *first_invalid_escape_ptr;
1204
0
    PyObject *result = _PyBytes_DecodeEscape2(s, len, errors,
1205
0
                                             &first_invalid_escape_char,
1206
0
                                             &first_invalid_escape_ptr);
1207
0
    if (result == NULL)
1208
0
        return NULL;
1209
0
    if (first_invalid_escape_char != -1) {
1210
0
        if (first_invalid_escape_char > 0xff) {
1211
0
            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1212
0
                                 "b\"\\%o\" is an invalid octal escape sequence. "
1213
0
                                 "Such sequences will not work in the future. ",
1214
0
                                 first_invalid_escape_char) < 0)
1215
0
            {
1216
0
                Py_DECREF(result);
1217
0
                return NULL;
1218
0
            }
1219
0
        }
1220
0
        else {
1221
0
            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1222
0
                                 "b\"\\%c\" is an invalid escape sequence. "
1223
0
                                 "Such sequences will not work in the future. ",
1224
0
                                 first_invalid_escape_char) < 0)
1225
0
            {
1226
0
                Py_DECREF(result);
1227
0
                return NULL;
1228
0
            }
1229
0
        }
1230
0
    }
1231
0
    return result;
1232
0
}
1233
/* -------------------------------------------------------------------- */
1234
/* object api */
1235
1236
Py_ssize_t
1237
PyBytes_Size(PyObject *op)
1238
4.64k
{
1239
4.64k
    if (!PyBytes_Check(op)) {
1240
0
        PyErr_Format(PyExc_TypeError,
1241
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1242
0
        return -1;
1243
0
    }
1244
4.64k
    return Py_SIZE(op);
1245
4.64k
}
1246
1247
char *
1248
PyBytes_AsString(PyObject *op)
1249
3.02M
{
1250
3.02M
    if (!PyBytes_Check(op)) {
1251
0
        PyErr_Format(PyExc_TypeError,
1252
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1253
0
        return NULL;
1254
0
    }
1255
3.02M
    return ((PyBytesObject *)op)->ob_sval;
1256
3.02M
}
1257
1258
int
1259
PyBytes_AsStringAndSize(PyObject *obj,
1260
                         char **s,
1261
                         Py_ssize_t *len)
1262
78.0k
{
1263
78.0k
    if (s == NULL) {
1264
0
        PyErr_BadInternalCall();
1265
0
        return -1;
1266
0
    }
1267
1268
78.0k
    if (!PyBytes_Check(obj)) {
1269
0
        PyErr_Format(PyExc_TypeError,
1270
0
             "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1271
0
        return -1;
1272
0
    }
1273
1274
78.0k
    *s = PyBytes_AS_STRING(obj);
1275
78.0k
    if (len != NULL)
1276
78.0k
        *len = PyBytes_GET_SIZE(obj);
1277
0
    else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1278
0
        PyErr_SetString(PyExc_ValueError,
1279
0
                        "embedded null byte");
1280
0
        return -1;
1281
0
    }
1282
78.0k
    return 0;
1283
78.0k
}
1284
1285
/* -------------------------------------------------------------------- */
1286
/* Methods */
1287
1288
0
#define STRINGLIB_GET_EMPTY() bytes_get_empty()
1289
1290
#include "stringlib/stringdefs.h"
1291
#define STRINGLIB_MUTABLE 0
1292
1293
#include "stringlib/fastsearch.h"
1294
#include "stringlib/count.h"
1295
#include "stringlib/find.h"
1296
#include "stringlib/join.h"
1297
#include "stringlib/partition.h"
1298
#include "stringlib/split.h"
1299
#include "stringlib/ctype.h"
1300
1301
#include "stringlib/transmogrify.h"
1302
1303
#undef STRINGLIB_GET_EMPTY
1304
1305
Py_ssize_t
1306
_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
1307
              const char *needle, Py_ssize_t len_needle,
1308
              Py_ssize_t offset)
1309
0
{
1310
0
    assert(len_haystack >= 0);
1311
0
    assert(len_needle >= 0);
1312
    // Extra checks because stringlib_find accesses haystack[len_haystack].
1313
0
    if (len_needle == 0) {
1314
0
        return offset;
1315
0
    }
1316
0
    if (len_needle > len_haystack) {
1317
0
        return -1;
1318
0
    }
1319
0
    assert(len_haystack >= 1);
1320
0
    Py_ssize_t res = stringlib_find(haystack, len_haystack - 1,
1321
0
                                    needle, len_needle, offset);
1322
0
    if (res == -1) {
1323
0
        Py_ssize_t last_align = len_haystack - len_needle;
1324
0
        if (memcmp(haystack + last_align, needle, len_needle) == 0) {
1325
0
            return offset + last_align;
1326
0
        }
1327
0
    }
1328
0
    return res;
1329
0
}
1330
1331
Py_ssize_t
1332
_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
1333
                     const char *needle, Py_ssize_t len_needle,
1334
                     Py_ssize_t offset)
1335
0
{
1336
0
    return stringlib_rfind(haystack, len_haystack,
1337
0
                           needle, len_needle, offset);
1338
0
}
1339
1340
PyObject *
1341
PyBytes_Repr(PyObject *obj, int smartquotes)
1342
2.90k
{
1343
2.90k
    PyBytesObject* op = (PyBytesObject*) obj;
1344
2.90k
    Py_ssize_t i, length = Py_SIZE(op);
1345
2.90k
    Py_ssize_t newsize, squotes, dquotes;
1346
2.90k
    PyObject *v;
1347
2.90k
    unsigned char quote;
1348
2.90k
    const unsigned char *s;
1349
2.90k
    Py_UCS1 *p;
1350
1351
    /* Compute size of output string */
1352
2.90k
    squotes = dquotes = 0;
1353
2.90k
    newsize = 3; /* b'' */
1354
2.90k
    s = (const unsigned char*)op->ob_sval;
1355
42.6k
    for (i = 0; i < length; i++) {
1356
39.7k
        Py_ssize_t incr = 1;
1357
39.7k
        switch(s[i]) {
1358
1.25k
        case '\'': squotes++; break;
1359
826
        case '"':  dquotes++; break;
1360
1.30k
        case '\\': case '\t': case '\n': case '\r':
1361
1.30k
            incr = 2; break; /* \C */
1362
36.3k
        default:
1363
36.3k
            if (s[i] < ' ' || s[i] >= 0x7f)
1364
4.87k
                incr = 4; /* \xHH */
1365
39.7k
        }
1366
39.7k
        if (newsize > PY_SSIZE_T_MAX - incr)
1367
0
            goto overflow;
1368
39.7k
        newsize += incr;
1369
39.7k
    }
1370
2.90k
    quote = '\'';
1371
2.90k
    if (smartquotes && squotes && !dquotes)
1372
100
        quote = '"';
1373
2.90k
    if (squotes && quote == '\'') {
1374
461
        if (newsize > PY_SSIZE_T_MAX - squotes)
1375
0
            goto overflow;
1376
461
        newsize += squotes;
1377
461
    }
1378
1379
2.90k
    v = PyUnicode_New(newsize, 127);
1380
2.90k
    if (v == NULL) {
1381
0
        return NULL;
1382
0
    }
1383
2.90k
    p = PyUnicode_1BYTE_DATA(v);
1384
1385
2.90k
    *p++ = 'b', *p++ = quote;
1386
42.6k
    for (i = 0; i < length; i++) {
1387
39.7k
        unsigned char c = op->ob_sval[i];
1388
39.7k
        if (c == quote || c == '\\')
1389
1.37k
            *p++ = '\\', *p++ = c;
1390
38.3k
        else if (c == '\t')
1391
273
            *p++ = '\\', *p++ = 't';
1392
38.0k
        else if (c == '\n')
1393
272
            *p++ = '\\', *p++ = 'n';
1394
37.7k
        else if (c == '\r')
1395
290
            *p++ = '\\', *p++ = 'r';
1396
37.4k
        else if (c < ' ' || c >= 0x7f) {
1397
4.87k
            *p++ = '\\';
1398
4.87k
            *p++ = 'x';
1399
4.87k
            *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1400
4.87k
            *p++ = Py_hexdigits[c & 0xf];
1401
4.87k
        }
1402
32.6k
        else
1403
32.6k
            *p++ = c;
1404
39.7k
    }
1405
2.90k
    *p++ = quote;
1406
2.90k
    assert(_PyUnicode_CheckConsistency(v, 1));
1407
2.90k
    return v;
1408
1409
0
  overflow:
1410
0
    PyErr_SetString(PyExc_OverflowError,
1411
0
                    "bytes object is too large to make repr");
1412
0
    return NULL;
1413
2.90k
}
1414
1415
static PyObject *
1416
bytes_repr(PyObject *op)
1417
2.90k
{
1418
2.90k
    return PyBytes_Repr(op, 1);
1419
2.90k
}
1420
1421
static PyObject *
1422
bytes_str(PyObject *op)
1423
0
{
1424
0
    if (_Py_GetConfig()->bytes_warning) {
1425
0
        if (PyErr_WarnEx(PyExc_BytesWarning,
1426
0
                         "str() on a bytes instance", 1)) {
1427
0
            return NULL;
1428
0
        }
1429
0
    }
1430
0
    return bytes_repr(op);
1431
0
}
1432
1433
static Py_ssize_t
1434
bytes_length(PyObject *self)
1435
1.44M
{
1436
1.44M
    PyBytesObject *a = _PyBytes_CAST(self);
1437
1.44M
    return Py_SIZE(a);
1438
1.44M
}
1439
1440
/* This is also used by PyBytes_Concat() */
1441
static PyObject *
1442
bytes_concat(PyObject *a, PyObject *b)
1443
74.6k
{
1444
74.6k
    Py_buffer va, vb;
1445
74.6k
    PyObject *result = NULL;
1446
1447
74.6k
    va.len = -1;
1448
74.6k
    vb.len = -1;
1449
74.6k
    if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1450
74.6k
        PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1451
0
        PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1452
0
                     Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1453
0
        goto done;
1454
0
    }
1455
1456
    /* Optimize end cases */
1457
74.6k
    if (va.len == 0 && PyBytes_CheckExact(b)) {
1458
2.24k
        result = Py_NewRef(b);
1459
2.24k
        goto done;
1460
2.24k
    }
1461
72.3k
    if (vb.len == 0 && PyBytes_CheckExact(a)) {
1462
15.9k
        result = Py_NewRef(a);
1463
15.9k
        goto done;
1464
15.9k
    }
1465
1466
56.4k
    if (va.len > PY_SSIZE_T_MAX - vb.len) {
1467
0
        PyErr_NoMemory();
1468
0
        goto done;
1469
0
    }
1470
1471
56.4k
    result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1472
56.4k
    if (result != NULL) {
1473
56.4k
        memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1474
56.4k
        memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1475
56.4k
    }
1476
1477
74.6k
  done:
1478
74.6k
    if (va.len != -1)
1479
74.6k
        PyBuffer_Release(&va);
1480
74.6k
    if (vb.len != -1)
1481
74.6k
        PyBuffer_Release(&vb);
1482
74.6k
    return result;
1483
56.4k
}
1484
1485
static PyObject *
1486
bytes_repeat(PyObject *self, Py_ssize_t n)
1487
16
{
1488
16
    PyBytesObject *a = _PyBytes_CAST(self);
1489
16
    if (n < 0)
1490
0
        n = 0;
1491
    /* watch out for overflows:  the size can overflow int,
1492
     * and the # of bytes needed can overflow size_t
1493
     */
1494
16
    if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1495
0
        PyErr_SetString(PyExc_OverflowError,
1496
0
            "repeated bytes are too long");
1497
0
        return NULL;
1498
0
    }
1499
16
    Py_ssize_t size = Py_SIZE(a) * n;
1500
16
    if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1501
0
        return Py_NewRef(a);
1502
0
    }
1503
16
    size_t nbytes = (size_t)size;
1504
16
    if (nbytes + PyBytesObject_SIZE <= nbytes) {
1505
0
        PyErr_SetString(PyExc_OverflowError,
1506
0
            "repeated bytes are too long");
1507
0
        return NULL;
1508
0
    }
1509
16
    PyBytesObject *op = PyObject_Malloc(PyBytesObject_SIZE + nbytes);
1510
16
    if (op == NULL) {
1511
0
        return PyErr_NoMemory();
1512
0
    }
1513
16
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
1514
16
    set_ob_shash(op, -1);
1515
16
    op->ob_sval[size] = '\0';
1516
1517
16
    _PyBytes_Repeat(op->ob_sval, size, a->ob_sval, Py_SIZE(a));
1518
1519
16
    return (PyObject *) op;
1520
16
}
1521
1522
static int
1523
bytes_contains(PyObject *self, PyObject *arg)
1524
3.24k
{
1525
3.24k
    return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1526
3.24k
}
1527
1528
static PyObject *
1529
bytes_item(PyObject *self, Py_ssize_t i)
1530
0
{
1531
0
    PyBytesObject *a = _PyBytes_CAST(self);
1532
0
    if (i < 0 || i >= Py_SIZE(a)) {
1533
0
        PyErr_SetString(PyExc_IndexError, "index out of range");
1534
0
        return NULL;
1535
0
    }
1536
0
    return _PyLong_FromUnsignedChar((unsigned char)a->ob_sval[i]);
1537
0
}
1538
1539
static int
1540
bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1541
459k
{
1542
459k
    int cmp;
1543
459k
    Py_ssize_t len;
1544
1545
459k
    len = Py_SIZE(a);
1546
459k
    if (Py_SIZE(b) != len)
1547
341k
        return 0;
1548
1549
118k
    if (a->ob_sval[0] != b->ob_sval[0])
1550
10.3k
        return 0;
1551
1552
108k
    cmp = memcmp(a->ob_sval, b->ob_sval, len);
1553
108k
    return (cmp == 0);
1554
118k
}
1555
1556
static PyObject*
1557
bytes_richcompare(PyObject *aa, PyObject *bb, int op)
1558
459k
{
1559
    /* Make sure both arguments are strings. */
1560
459k
    if (!(PyBytes_Check(aa) && PyBytes_Check(bb))) {
1561
0
        if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1562
0
            if (PyUnicode_Check(aa) || PyUnicode_Check(bb)) {
1563
0
                if (PyErr_WarnEx(PyExc_BytesWarning,
1564
0
                                 "Comparison between bytes and string", 1))
1565
0
                    return NULL;
1566
0
            }
1567
0
            if (PyLong_Check(aa) || PyLong_Check(bb)) {
1568
0
                if (PyErr_WarnEx(PyExc_BytesWarning,
1569
0
                                 "Comparison between bytes and int", 1))
1570
0
                    return NULL;
1571
0
            }
1572
0
        }
1573
0
        Py_RETURN_NOTIMPLEMENTED;
1574
0
    }
1575
1576
459k
    PyBytesObject *a = _PyBytes_CAST(aa);
1577
459k
    PyBytesObject *b = _PyBytes_CAST(bb);
1578
459k
    if (a == b) {
1579
0
        switch (op) {
1580
0
        case Py_EQ:
1581
0
        case Py_LE:
1582
0
        case Py_GE:
1583
            /* a byte string is equal to itself */
1584
0
            Py_RETURN_TRUE;
1585
0
        case Py_NE:
1586
0
        case Py_LT:
1587
0
        case Py_GT:
1588
0
            Py_RETURN_FALSE;
1589
0
        default:
1590
0
            PyErr_BadArgument();
1591
0
            return NULL;
1592
0
        }
1593
0
    }
1594
459k
    else if (op == Py_EQ || op == Py_NE) {
1595
459k
        int eq = bytes_compare_eq(a, b);
1596
459k
        eq ^= (op == Py_NE);
1597
459k
        return PyBool_FromLong(eq);
1598
459k
    }
1599
122
    else {
1600
122
        Py_ssize_t len_a = Py_SIZE(a);
1601
122
        Py_ssize_t len_b = Py_SIZE(b);
1602
122
        Py_ssize_t min_len = Py_MIN(len_a, len_b);
1603
122
        int c;
1604
122
        if (min_len > 0) {
1605
122
            c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1606
122
            if (c == 0)
1607
122
                c = memcmp(a->ob_sval, b->ob_sval, min_len);
1608
122
        }
1609
0
        else {
1610
0
            c = 0;
1611
0
        }
1612
122
        if (c != 0) {
1613
122
            Py_RETURN_RICHCOMPARE(c, 0, op);
1614
122
        }
1615
0
        Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1616
0
    }
1617
459k
}
1618
1619
static Py_hash_t
1620
bytes_hash(PyObject *self)
1621
4.27M
{
1622
4.27M
    PyBytesObject *a = _PyBytes_CAST(self);
1623
4.27M
    Py_hash_t hash = get_ob_shash(a);
1624
4.27M
    if (hash == -1) {
1625
        /* Can't fail */
1626
172k
        hash = Py_HashBuffer(a->ob_sval, Py_SIZE(a));
1627
172k
        set_ob_shash(a, hash);
1628
172k
    }
1629
4.27M
    return hash;
1630
4.27M
}
1631
1632
static PyObject*
1633
bytes_subscript(PyObject *op, PyObject* item)
1634
5.31M
{
1635
5.31M
    PyBytesObject *self = _PyBytes_CAST(op);
1636
5.31M
    if (_PyIndex_Check(item)) {
1637
958k
        Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1638
958k
        if (i == -1 && PyErr_Occurred())
1639
0
            return NULL;
1640
958k
        if (i < 0)
1641
0
            i += PyBytes_GET_SIZE(self);
1642
958k
        if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1643
73
            PyErr_SetString(PyExc_IndexError,
1644
73
                            "index out of range");
1645
73
            return NULL;
1646
73
        }
1647
958k
        return _PyLong_FromUnsignedChar((unsigned char)self->ob_sval[i]);
1648
958k
    }
1649
4.36M
    else if (PySlice_Check(item)) {
1650
4.36M
        Py_ssize_t start, stop, step, slicelength, i;
1651
4.36M
        size_t cur;
1652
4.36M
        const char* source_buf;
1653
4.36M
        char* result_buf;
1654
4.36M
        PyObject* result;
1655
1656
4.36M
        if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1657
0
            return NULL;
1658
0
        }
1659
4.36M
        slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1660
4.36M
                                            &stop, step);
1661
1662
4.36M
        if (slicelength <= 0) {
1663
4.04M
            return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
1664
4.04M
        }
1665
315k
        else if (start == 0 && step == 1 &&
1666
315k
                 slicelength == PyBytes_GET_SIZE(self) &&
1667
315k
                 PyBytes_CheckExact(self)) {
1668
106k
            return Py_NewRef(self);
1669
106k
        }
1670
208k
        else if (step == 1) {
1671
208k
            return PyBytes_FromStringAndSize(
1672
208k
                PyBytes_AS_STRING(self) + start,
1673
208k
                slicelength);
1674
208k
        }
1675
0
        else {
1676
0
            source_buf = PyBytes_AS_STRING(self);
1677
0
            result = PyBytes_FromStringAndSize(NULL, slicelength);
1678
0
            if (result == NULL)
1679
0
                return NULL;
1680
1681
0
            result_buf = PyBytes_AS_STRING(result);
1682
0
            for (cur = start, i = 0; i < slicelength;
1683
0
                 cur += step, i++) {
1684
0
                result_buf[i] = source_buf[cur];
1685
0
            }
1686
1687
0
            return result;
1688
0
        }
1689
4.36M
    }
1690
0
    else {
1691
0
        PyErr_Format(PyExc_TypeError,
1692
0
                     "byte indices must be integers or slices, not %.200s",
1693
0
                     Py_TYPE(item)->tp_name);
1694
0
        return NULL;
1695
0
    }
1696
5.31M
}
1697
1698
static int
1699
bytes_buffer_getbuffer(PyObject *op, Py_buffer *view, int flags)
1700
12.5M
{
1701
12.5M
    PyBytesObject *self = _PyBytes_CAST(op);
1702
12.5M
    return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1703
12.5M
                             1, flags);
1704
12.5M
}
1705
1706
static PySequenceMethods bytes_as_sequence = {
1707
    bytes_length,       /*sq_length*/
1708
    bytes_concat,       /*sq_concat*/
1709
    bytes_repeat,       /*sq_repeat*/
1710
    bytes_item,         /*sq_item*/
1711
    0,                  /*sq_slice*/
1712
    0,                  /*sq_ass_item*/
1713
    0,                  /*sq_ass_slice*/
1714
    bytes_contains      /*sq_contains*/
1715
};
1716
1717
static PyMappingMethods bytes_as_mapping = {
1718
    bytes_length,
1719
    bytes_subscript,
1720
    0,
1721
};
1722
1723
static PyBufferProcs bytes_as_buffer = {
1724
    bytes_buffer_getbuffer,
1725
    NULL,
1726
};
1727
1728
1729
/*[clinic input]
1730
bytes.__bytes__
1731
Convert this value to exact type bytes.
1732
[clinic start generated code]*/
1733
1734
static PyObject *
1735
bytes___bytes___impl(PyBytesObject *self)
1736
/*[clinic end generated code: output=63a306a9bc0caac5 input=34ec5ddba98bd6bb]*/
1737
86.5k
{
1738
86.5k
    if (PyBytes_CheckExact(self)) {
1739
86.5k
        return Py_NewRef(self);
1740
86.5k
    }
1741
0
    else {
1742
0
        return PyBytes_FromStringAndSize(self->ob_sval, Py_SIZE(self));
1743
0
    }
1744
86.5k
}
1745
1746
1747
0
#define LEFTSTRIP 0
1748
0
#define RIGHTSTRIP 1
1749
0
#define BOTHSTRIP 2
1750
1751
/*[clinic input]
1752
bytes.split
1753
1754
    sep: object = None
1755
        The delimiter according which to split the bytes.
1756
        None (the default value) means split on ASCII whitespace characters
1757
        (space, tab, return, newline, formfeed, vertical tab).
1758
    maxsplit: Py_ssize_t = -1
1759
        Maximum number of splits to do.
1760
        -1 (the default value) means no limit.
1761
1762
Return a list of the sections in the bytes, using sep as the delimiter.
1763
[clinic start generated code]*/
1764
1765
static PyObject *
1766
bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1767
/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1768
2.77M
{
1769
2.77M
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1770
2.77M
    const char *s = PyBytes_AS_STRING(self), *sub;
1771
2.77M
    Py_buffer vsub;
1772
2.77M
    PyObject *list;
1773
1774
2.77M
    if (maxsplit < 0)
1775
2.77M
        maxsplit = PY_SSIZE_T_MAX;
1776
2.77M
    if (sep == Py_None)
1777
0
        return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1778
2.77M
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1779
0
        return NULL;
1780
2.77M
    sub = vsub.buf;
1781
2.77M
    n = vsub.len;
1782
1783
2.77M
    list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1784
2.77M
    PyBuffer_Release(&vsub);
1785
2.77M
    return list;
1786
2.77M
}
1787
1788
/*[clinic input]
1789
@permit_long_docstring_body
1790
bytes.partition
1791
1792
    sep: Py_buffer
1793
    /
1794
1795
Partition the bytes into three parts using the given separator.
1796
1797
This will search for the separator sep in the bytes. If the separator is found,
1798
returns a 3-tuple containing the part before the separator, the separator
1799
itself, and the part after it.
1800
1801
If the separator is not found, returns a 3-tuple containing the original bytes
1802
object and two empty bytes objects.
1803
[clinic start generated code]*/
1804
1805
static PyObject *
1806
bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1807
/*[clinic end generated code: output=f532b392a17ff695 input=31c55a0cebaf7722]*/
1808
0
{
1809
0
    return stringlib_partition(
1810
0
        (PyObject*) self,
1811
0
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1812
0
        sep->obj, (const char *)sep->buf, sep->len
1813
0
        );
1814
0
}
1815
1816
/*[clinic input]
1817
@permit_long_docstring_body
1818
bytes.rpartition
1819
1820
    sep: Py_buffer
1821
    /
1822
1823
Partition the bytes into three parts using the given separator.
1824
1825
This will search for the separator sep in the bytes, starting at the end. If
1826
the separator is found, returns a 3-tuple containing the part before the
1827
separator, the separator itself, and the part after it.
1828
1829
If the separator is not found, returns a 3-tuple containing two empty bytes
1830
objects and the original bytes object.
1831
[clinic start generated code]*/
1832
1833
static PyObject *
1834
bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1835
/*[clinic end generated code: output=191b114cbb028e50 input=9ea5a3ab0b02bf52]*/
1836
0
{
1837
0
    return stringlib_rpartition(
1838
0
        (PyObject*) self,
1839
0
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1840
0
        sep->obj, (const char *)sep->buf, sep->len
1841
0
        );
1842
0
}
1843
1844
/*[clinic input]
1845
@permit_long_docstring_body
1846
bytes.rsplit = bytes.split
1847
1848
Return a list of the sections in the bytes, using sep as the delimiter.
1849
1850
Splitting is done starting at the end of the bytes and working to the front.
1851
[clinic start generated code]*/
1852
1853
static PyObject *
1854
bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1855
/*[clinic end generated code: output=ba698d9ea01e1c8f input=55b6eaea1f3d7046]*/
1856
0
{
1857
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1858
0
    const char *s = PyBytes_AS_STRING(self), *sub;
1859
0
    Py_buffer vsub;
1860
0
    PyObject *list;
1861
1862
0
    if (maxsplit < 0)
1863
0
        maxsplit = PY_SSIZE_T_MAX;
1864
0
    if (sep == Py_None)
1865
0
        return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1866
0
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1867
0
        return NULL;
1868
0
    sub = vsub.buf;
1869
0
    n = vsub.len;
1870
1871
0
    list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1872
0
    PyBuffer_Release(&vsub);
1873
0
    return list;
1874
0
}
1875
1876
1877
/*[clinic input]
1878
bytes.join
1879
1880
    iterable_of_bytes: object
1881
    /
1882
1883
Concatenate any number of bytes objects.
1884
1885
The bytes whose method is called is inserted in between each pair.
1886
1887
The result is returned as a new bytes object.
1888
1889
Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1890
[clinic start generated code]*/
1891
1892
static PyObject *
1893
bytes_join_impl(PyBytesObject *self, PyObject *iterable_of_bytes)
1894
/*[clinic end generated code: output=0687abb94d7d438e input=7fe377b95bd549d2]*/
1895
8.70k
{
1896
8.70k
    return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1897
8.70k
}
1898
1899
PyObject *
1900
PyBytes_Join(PyObject *sep, PyObject *iterable)
1901
34.2k
{
1902
34.2k
    if (sep == NULL) {
1903
0
        PyErr_BadInternalCall();
1904
0
        return NULL;
1905
0
    }
1906
34.2k
    if (!PyBytes_Check(sep)) {
1907
0
        PyErr_Format(PyExc_TypeError,
1908
0
                     "sep: expected bytes, got %T", sep);
1909
0
        return NULL;
1910
0
    }
1911
1912
34.2k
    return stringlib_bytes_join(sep, iterable);
1913
34.2k
}
1914
1915
/*[clinic input]
1916
@permit_long_summary
1917
@text_signature "($self, sub[, start[, end]], /)"
1918
bytes.find
1919
1920
    sub: object
1921
    start: slice_index(accept={int, NoneType}, c_default='0') = None
1922
         Optional start position. Default: start of the bytes.
1923
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
1924
         Optional stop position. Default: end of the bytes.
1925
    /
1926
1927
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
1928
1929
Return -1 on failure.
1930
[clinic start generated code]*/
1931
1932
static PyObject *
1933
bytes_find_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
1934
                Py_ssize_t end)
1935
/*[clinic end generated code: output=d5961a1c77b472a1 input=47d0929adafc6b0b]*/
1936
0
{
1937
0
    return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1938
0
                          sub, start, end);
1939
0
}
1940
1941
/*[clinic input]
1942
@permit_long_summary
1943
bytes.index = bytes.find
1944
1945
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
1946
1947
Raise ValueError if the subsection is not found.
1948
[clinic start generated code]*/
1949
1950
static PyObject *
1951
bytes_index_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
1952
                 Py_ssize_t end)
1953
/*[clinic end generated code: output=0da25cc74683ba42 input=1cb45ce71456a269]*/
1954
0
{
1955
0
    return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1956
0
                           sub, start, end);
1957
0
}
1958
1959
/*[clinic input]
1960
@permit_long_summary
1961
bytes.rfind = bytes.find
1962
1963
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
1964
1965
Return -1 on failure.
1966
[clinic start generated code]*/
1967
1968
static PyObject *
1969
bytes_rfind_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
1970
                 Py_ssize_t end)
1971
/*[clinic end generated code: output=51b60fa4ad011c09 input=c9473d714251f1ab]*/
1972
20.3k
{
1973
20.3k
    return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1974
20.3k
                           sub, start, end);
1975
20.3k
}
1976
1977
/*[clinic input]
1978
@permit_long_summary
1979
bytes.rindex = bytes.find
1980
1981
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
1982
1983
Raise ValueError if the subsection is not found.
1984
[clinic start generated code]*/
1985
1986
static PyObject *
1987
bytes_rindex_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
1988
                  Py_ssize_t end)
1989
/*[clinic end generated code: output=42bf674e0a0aabf6 input=bb5f473c64610c43]*/
1990
0
{
1991
0
    return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1992
0
                            sub, start, end);
1993
0
}
1994
1995
1996
Py_LOCAL_INLINE(PyObject *)
1997
do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
1998
0
{
1999
0
    Py_buffer vsep;
2000
0
    const char *s = PyBytes_AS_STRING(self);
2001
0
    Py_ssize_t len = PyBytes_GET_SIZE(self);
2002
0
    char *sep;
2003
0
    Py_ssize_t seplen;
2004
0
    Py_ssize_t i, j;
2005
2006
0
    if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
2007
0
        return NULL;
2008
0
    sep = vsep.buf;
2009
0
    seplen = vsep.len;
2010
2011
0
    i = 0;
2012
0
    if (striptype != RIGHTSTRIP) {
2013
0
        while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2014
0
            i++;
2015
0
        }
2016
0
    }
2017
2018
0
    j = len;
2019
0
    if (striptype != LEFTSTRIP) {
2020
0
        do {
2021
0
            j--;
2022
0
        } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2023
0
        j++;
2024
0
    }
2025
2026
0
    PyBuffer_Release(&vsep);
2027
2028
0
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2029
0
        return Py_NewRef(self);
2030
0
    }
2031
0
    else
2032
0
        return PyBytes_FromStringAndSize(s+i, j-i);
2033
0
}
2034
2035
2036
Py_LOCAL_INLINE(PyObject *)
2037
do_strip(PyBytesObject *self, int striptype)
2038
0
{
2039
0
    const char *s = PyBytes_AS_STRING(self);
2040
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
2041
2042
0
    i = 0;
2043
0
    if (striptype != RIGHTSTRIP) {
2044
0
        while (i < len && Py_ISSPACE(s[i])) {
2045
0
            i++;
2046
0
        }
2047
0
    }
2048
2049
0
    j = len;
2050
0
    if (striptype != LEFTSTRIP) {
2051
0
        do {
2052
0
            j--;
2053
0
        } while (j >= i && Py_ISSPACE(s[j]));
2054
0
        j++;
2055
0
    }
2056
2057
0
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2058
0
        return Py_NewRef(self);
2059
0
    }
2060
0
    else
2061
0
        return PyBytes_FromStringAndSize(s+i, j-i);
2062
0
}
2063
2064
2065
Py_LOCAL_INLINE(PyObject *)
2066
do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
2067
0
{
2068
0
    if (bytes != Py_None) {
2069
0
        return do_xstrip(self, striptype, bytes);
2070
0
    }
2071
0
    return do_strip(self, striptype);
2072
0
}
2073
2074
/*[clinic input]
2075
@permit_long_docstring_body
2076
bytes.strip
2077
2078
    bytes: object = None
2079
    /
2080
2081
Strip leading and trailing bytes contained in the argument.
2082
2083
If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2084
[clinic start generated code]*/
2085
2086
static PyObject *
2087
bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
2088
/*[clinic end generated code: output=c7c228d3bd104a1b input=71904cd278c0ee03]*/
2089
0
{
2090
0
    return do_argstrip(self, BOTHSTRIP, bytes);
2091
0
}
2092
2093
/*[clinic input]
2094
bytes.lstrip
2095
2096
    bytes: object = None
2097
    /
2098
2099
Strip leading bytes contained in the argument.
2100
2101
If the argument is omitted or None, strip leading  ASCII whitespace.
2102
[clinic start generated code]*/
2103
2104
static PyObject *
2105
bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2106
/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2107
0
{
2108
0
    return do_argstrip(self, LEFTSTRIP, bytes);
2109
0
}
2110
2111
/*[clinic input]
2112
bytes.rstrip
2113
2114
    bytes: object = None
2115
    /
2116
2117
Strip trailing bytes contained in the argument.
2118
2119
If the argument is omitted or None, strip trailing ASCII whitespace.
2120
[clinic start generated code]*/
2121
2122
static PyObject *
2123
bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2124
/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2125
0
{
2126
0
    return do_argstrip(self, RIGHTSTRIP, bytes);
2127
0
}
2128
2129
2130
/*[clinic input]
2131
@permit_long_summary
2132
bytes.count = bytes.find
2133
2134
Return the number of non-overlapping occurrences of subsection 'sub' in bytes B[start:end].
2135
[clinic start generated code]*/
2136
2137
static PyObject *
2138
bytes_count_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2139
                 Py_ssize_t end)
2140
/*[clinic end generated code: output=9848140b9be17d0f input=bb2f136f83f0d30e]*/
2141
0
{
2142
0
    return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2143
0
                           sub, start, end);
2144
0
}
2145
2146
2147
/*[clinic input]
2148
bytes.translate
2149
2150
    table: object
2151
        Translation table, which must be a bytes object of length 256.
2152
    /
2153
    delete as deletechars: object(c_default="NULL") = b''
2154
2155
Return a copy with each character mapped by the given translation table.
2156
2157
All characters occurring in the optional argument delete are removed.
2158
The remaining characters are mapped through the given translation table.
2159
[clinic start generated code]*/
2160
2161
static PyObject *
2162
bytes_translate_impl(PyBytesObject *self, PyObject *table,
2163
                     PyObject *deletechars)
2164
/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2165
0
{
2166
0
    const char *input;
2167
0
    char *output;
2168
0
    Py_buffer table_view = {NULL, NULL};
2169
0
    Py_buffer del_table_view = {NULL, NULL};
2170
0
    const char *table_chars;
2171
0
    Py_ssize_t i, c, changed = 0;
2172
0
    PyObject *input_obj = (PyObject*)self;
2173
0
    const char *output_start, *del_table_chars=NULL;
2174
0
    Py_ssize_t inlen, tablen, dellen = 0;
2175
0
    PyObject *result;
2176
0
    int trans_table[256];
2177
2178
0
    if (PyBytes_Check(table)) {
2179
0
        table_chars = PyBytes_AS_STRING(table);
2180
0
        tablen = PyBytes_GET_SIZE(table);
2181
0
    }
2182
0
    else if (table == Py_None) {
2183
0
        table_chars = NULL;
2184
0
        tablen = 256;
2185
0
    }
2186
0
    else {
2187
0
        if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2188
0
            return NULL;
2189
0
        table_chars = table_view.buf;
2190
0
        tablen = table_view.len;
2191
0
    }
2192
2193
0
    if (tablen != 256) {
2194
0
        PyErr_SetString(PyExc_ValueError,
2195
0
          "translation table must be 256 characters long");
2196
0
        PyBuffer_Release(&table_view);
2197
0
        return NULL;
2198
0
    }
2199
2200
0
    if (deletechars != NULL) {
2201
0
        if (PyBytes_Check(deletechars)) {
2202
0
            del_table_chars = PyBytes_AS_STRING(deletechars);
2203
0
            dellen = PyBytes_GET_SIZE(deletechars);
2204
0
        }
2205
0
        else {
2206
0
            if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2207
0
                PyBuffer_Release(&table_view);
2208
0
                return NULL;
2209
0
            }
2210
0
            del_table_chars = del_table_view.buf;
2211
0
            dellen = del_table_view.len;
2212
0
        }
2213
0
    }
2214
0
    else {
2215
0
        del_table_chars = NULL;
2216
0
        dellen = 0;
2217
0
    }
2218
2219
0
    inlen = PyBytes_GET_SIZE(input_obj);
2220
0
    result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2221
0
    if (result == NULL) {
2222
0
        PyBuffer_Release(&del_table_view);
2223
0
        PyBuffer_Release(&table_view);
2224
0
        return NULL;
2225
0
    }
2226
0
    output_start = output = PyBytes_AS_STRING(result);
2227
0
    input = PyBytes_AS_STRING(input_obj);
2228
2229
0
    if (dellen == 0 && table_chars != NULL) {
2230
        /* If no deletions are required, use faster code */
2231
0
        for (i = inlen; --i >= 0; ) {
2232
0
            c = Py_CHARMASK(*input++);
2233
0
            if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2234
0
                changed = 1;
2235
0
        }
2236
0
        if (!changed && PyBytes_CheckExact(input_obj)) {
2237
0
            Py_SETREF(result, Py_NewRef(input_obj));
2238
0
        }
2239
0
        PyBuffer_Release(&del_table_view);
2240
0
        PyBuffer_Release(&table_view);
2241
0
        return result;
2242
0
    }
2243
2244
0
    if (table_chars == NULL) {
2245
0
        for (i = 0; i < 256; i++)
2246
0
            trans_table[i] = Py_CHARMASK(i);
2247
0
    } else {
2248
0
        for (i = 0; i < 256; i++)
2249
0
            trans_table[i] = Py_CHARMASK(table_chars[i]);
2250
0
    }
2251
0
    PyBuffer_Release(&table_view);
2252
2253
0
    for (i = 0; i < dellen; i++)
2254
0
        trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2255
0
    PyBuffer_Release(&del_table_view);
2256
2257
0
    for (i = inlen; --i >= 0; ) {
2258
0
        c = Py_CHARMASK(*input++);
2259
0
        if (trans_table[c] != -1)
2260
0
            if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2261
0
                continue;
2262
0
        changed = 1;
2263
0
    }
2264
0
    if (!changed && PyBytes_CheckExact(input_obj)) {
2265
0
        Py_DECREF(result);
2266
0
        return Py_NewRef(input_obj);
2267
0
    }
2268
    /* Fix the size of the resulting byte string */
2269
0
    if (inlen > 0)
2270
0
        _PyBytes_Resize(&result, output - output_start);
2271
0
    return result;
2272
0
}
2273
2274
2275
/*[clinic input]
2276
2277
@permit_long_summary
2278
@permit_long_docstring_body
2279
@staticmethod
2280
bytes.maketrans
2281
2282
    frm: Py_buffer
2283
    to: Py_buffer
2284
    /
2285
2286
Return a translation table usable for the bytes or bytearray translate method.
2287
2288
The returned table will be one where each byte in frm is mapped to the byte at
2289
the same position in to.
2290
2291
The bytes objects frm and to must be of the same length.
2292
[clinic start generated code]*/
2293
2294
static PyObject *
2295
bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2296
/*[clinic end generated code: output=a36f6399d4b77f6f input=a06b75f44d933fb3]*/
2297
28
{
2298
28
    return _Py_bytes_maketrans(frm, to);
2299
28
}
2300
2301
2302
/*[clinic input]
2303
@permit_long_docstring_body
2304
bytes.replace
2305
2306
    old: Py_buffer
2307
    new: Py_buffer
2308
    count: Py_ssize_t = -1
2309
        Maximum number of occurrences to replace.
2310
        -1 (the default value) means replace all occurrences.
2311
    /
2312
2313
Return a copy with all occurrences of substring old replaced by new.
2314
2315
If the optional argument count is given, only the first count occurrences are
2316
replaced.
2317
[clinic start generated code]*/
2318
2319
static PyObject *
2320
bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2321
                   Py_ssize_t count)
2322
/*[clinic end generated code: output=994fa588b6b9c104 input=8b99a9ab32bc06a2]*/
2323
34.5k
{
2324
34.5k
    return stringlib_replace((PyObject *)self,
2325
34.5k
                             (const char *)old->buf, old->len,
2326
34.5k
                             (const char *)new->buf, new->len, count);
2327
34.5k
}
2328
2329
/** End DALKE **/
2330
2331
/*[clinic input]
2332
bytes.removeprefix as bytes_removeprefix
2333
2334
    prefix: Py_buffer
2335
    /
2336
2337
Return a bytes object with the given prefix string removed if present.
2338
2339
If the bytes starts with the prefix string, return bytes[len(prefix):].
2340
Otherwise, return a copy of the original bytes.
2341
[clinic start generated code]*/
2342
2343
static PyObject *
2344
bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2345
/*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2346
0
{
2347
0
    const char *self_start = PyBytes_AS_STRING(self);
2348
0
    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2349
0
    const char *prefix_start = prefix->buf;
2350
0
    Py_ssize_t prefix_len = prefix->len;
2351
2352
0
    if (self_len >= prefix_len
2353
0
        && prefix_len > 0
2354
0
        && memcmp(self_start, prefix_start, prefix_len) == 0)
2355
0
    {
2356
0
        return PyBytes_FromStringAndSize(self_start + prefix_len,
2357
0
                                         self_len - prefix_len);
2358
0
    }
2359
2360
0
    if (PyBytes_CheckExact(self)) {
2361
0
        return Py_NewRef(self);
2362
0
    }
2363
2364
0
    return PyBytes_FromStringAndSize(self_start, self_len);
2365
0
}
2366
2367
/*[clinic input]
2368
bytes.removesuffix as bytes_removesuffix
2369
2370
    suffix: Py_buffer
2371
    /
2372
2373
Return a bytes object with the given suffix string removed if present.
2374
2375
If the bytes ends with the suffix string and that suffix is not empty,
2376
return bytes[:-len(prefix)].  Otherwise, return a copy of the original
2377
bytes.
2378
[clinic start generated code]*/
2379
2380
static PyObject *
2381
bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2382
/*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2383
0
{
2384
0
    const char *self_start = PyBytes_AS_STRING(self);
2385
0
    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2386
0
    const char *suffix_start = suffix->buf;
2387
0
    Py_ssize_t suffix_len = suffix->len;
2388
2389
0
    if (self_len >= suffix_len
2390
0
        && suffix_len > 0
2391
0
        && memcmp(self_start + self_len - suffix_len,
2392
0
                  suffix_start, suffix_len) == 0)
2393
0
    {
2394
0
        return PyBytes_FromStringAndSize(self_start,
2395
0
                                         self_len - suffix_len);
2396
0
    }
2397
2398
0
    if (PyBytes_CheckExact(self)) {
2399
0
        return Py_NewRef(self);
2400
0
    }
2401
2402
0
    return PyBytes_FromStringAndSize(self_start, self_len);
2403
0
}
2404
2405
/*[clinic input]
2406
@permit_long_summary
2407
@text_signature "($self, prefix[, start[, end]], /)"
2408
bytes.startswith
2409
2410
    prefix as subobj: object
2411
        A bytes or a tuple of bytes to try.
2412
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2413
        Optional start position. Default: start of the bytes.
2414
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2415
        Optional stop position. Default: end of the bytes.
2416
    /
2417
2418
Return True if the bytes starts with the specified prefix, False otherwise.
2419
[clinic start generated code]*/
2420
2421
static PyObject *
2422
bytes_startswith_impl(PyBytesObject *self, PyObject *subobj,
2423
                      Py_ssize_t start, Py_ssize_t end)
2424
/*[clinic end generated code: output=b1e8da1cbd528e8c input=a14efd070f15be80]*/
2425
485k
{
2426
485k
    return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2427
485k
                                subobj, start, end);
2428
485k
}
2429
2430
/*[clinic input]
2431
@permit_long_summary
2432
@text_signature "($self, suffix[, start[, end]], /)"
2433
bytes.endswith
2434
2435
    suffix as subobj: object
2436
        A bytes or a tuple of bytes to try.
2437
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2438
         Optional start position. Default: start of the bytes.
2439
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2440
         Optional stop position. Default: end of the bytes.
2441
    /
2442
2443
Return True if the bytes ends with the specified suffix, False otherwise.
2444
[clinic start generated code]*/
2445
2446
static PyObject *
2447
bytes_endswith_impl(PyBytesObject *self, PyObject *subobj, Py_ssize_t start,
2448
                    Py_ssize_t end)
2449
/*[clinic end generated code: output=038b633111f3629d input=49e383eaaf292713]*/
2450
0
{
2451
0
    return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2452
0
                              subobj, start, end);
2453
0
}
2454
2455
2456
/*[clinic input]
2457
bytes.decode
2458
2459
    encoding: str(c_default="NULL") = 'utf-8'
2460
        The encoding with which to decode the bytes.
2461
    errors: str(c_default="NULL") = 'strict'
2462
        The error handling scheme to use for the handling of decoding errors.
2463
        The default is 'strict' meaning that decoding errors raise a
2464
        UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2465
        as well as any other name registered with codecs.register_error that
2466
        can handle UnicodeDecodeErrors.
2467
2468
Decode the bytes using the codec registered for encoding.
2469
[clinic start generated code]*/
2470
2471
static PyObject *
2472
bytes_decode_impl(PyBytesObject *self, const char *encoding,
2473
                  const char *errors)
2474
/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2475
4.14M
{
2476
4.14M
    return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2477
4.14M
}
2478
2479
2480
/*[clinic input]
2481
@permit_long_docstring_body
2482
bytes.splitlines
2483
2484
    keepends: bool = False
2485
2486
Return a list of the lines in the bytes, breaking at line boundaries.
2487
2488
Line breaks are not included in the resulting list unless keepends is given and
2489
true.
2490
[clinic start generated code]*/
2491
2492
static PyObject *
2493
bytes_splitlines_impl(PyBytesObject *self, int keepends)
2494
/*[clinic end generated code: output=3484149a5d880ffb input=d17968d2a355fe55]*/
2495
0
{
2496
0
    return stringlib_splitlines(
2497
0
        (PyObject*) self, PyBytes_AS_STRING(self),
2498
0
        PyBytes_GET_SIZE(self), keepends
2499
0
        );
2500
0
}
2501
2502
/*[clinic input]
2503
@classmethod
2504
bytes.fromhex
2505
2506
    string: object
2507
    /
2508
2509
Create a bytes object from a string of hexadecimal numbers.
2510
2511
Spaces between two numbers are accepted.
2512
Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2513
[clinic start generated code]*/
2514
2515
static PyObject *
2516
bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2517
/*[clinic end generated code: output=0973acc63661bb2e input=f37d98ed51088a21]*/
2518
34.1k
{
2519
34.1k
    PyObject *result = _PyBytes_FromHex(string, 0);
2520
34.1k
    if (type != &PyBytes_Type && result != NULL) {
2521
0
        Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
2522
0
    }
2523
34.1k
    return result;
2524
34.1k
}
2525
2526
PyObject*
2527
_PyBytes_FromHex(PyObject *string, int use_bytearray)
2528
34.1k
{
2529
34.1k
    char *buf;
2530
34.1k
    Py_ssize_t hexlen, invalid_char;
2531
34.1k
    unsigned int top, bot;
2532
34.1k
    const Py_UCS1 *str, *start, *end;
2533
34.1k
    _PyBytesWriter writer;
2534
34.1k
    Py_buffer view;
2535
34.1k
    view.obj = NULL;
2536
2537
34.1k
    _PyBytesWriter_Init(&writer);
2538
34.1k
    writer.use_bytearray = use_bytearray;
2539
2540
34.1k
    if (PyUnicode_Check(string)) {
2541
34.1k
        hexlen = PyUnicode_GET_LENGTH(string);
2542
2543
34.1k
        if (!PyUnicode_IS_ASCII(string)) {
2544
0
            const void *data = PyUnicode_DATA(string);
2545
0
            int kind = PyUnicode_KIND(string);
2546
0
            Py_ssize_t i;
2547
2548
            /* search for the first non-ASCII character */
2549
0
            for (i = 0; i < hexlen; i++) {
2550
0
                if (PyUnicode_READ(kind, data, i) >= 128)
2551
0
                    break;
2552
0
            }
2553
0
            invalid_char = i;
2554
0
            goto error;
2555
0
        }
2556
2557
34.1k
        assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2558
34.1k
        str = PyUnicode_1BYTE_DATA(string);
2559
34.1k
    }
2560
0
    else if (PyObject_CheckBuffer(string)) {
2561
0
        if (PyObject_GetBuffer(string, &view, PyBUF_SIMPLE) != 0) {
2562
0
            return NULL;
2563
0
        }
2564
0
        hexlen = view.len;
2565
0
        str = view.buf;
2566
0
    }
2567
0
    else {
2568
0
        PyErr_Format(PyExc_TypeError,
2569
0
                     "fromhex() argument must be str or bytes-like, not %T",
2570
0
                     string);
2571
0
        return NULL;
2572
0
    }
2573
2574
    /* This overestimates if there are spaces */
2575
34.1k
    buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2576
34.1k
    if (buf == NULL) {
2577
0
        goto release_buffer;
2578
0
    }
2579
2580
34.1k
    start = str;
2581
34.1k
    end = str + hexlen;
2582
68.2k
    while (str < end) {
2583
        /* skip over spaces in the input */
2584
34.1k
        if (Py_ISSPACE(*str)) {
2585
0
            do {
2586
0
                str++;
2587
0
            } while (Py_ISSPACE(*str));
2588
0
            if (str >= end)
2589
0
                break;
2590
0
        }
2591
2592
34.1k
        top = _PyLong_DigitValue[*str];
2593
34.1k
        if (top >= 16) {
2594
0
            invalid_char = str - start;
2595
0
            goto error;
2596
0
        }
2597
34.1k
        str++;
2598
2599
34.1k
        bot = _PyLong_DigitValue[*str];
2600
34.1k
        if (bot >= 16) {
2601
            /* Check if we had a second digit */
2602
0
            if (str >= end){
2603
0
                invalid_char = -1;
2604
0
            } else {
2605
0
                invalid_char = str - start;
2606
0
            }
2607
0
            goto error;
2608
0
        }
2609
34.1k
        str++;
2610
2611
34.1k
        *buf++ = (unsigned char)((top << 4) + bot);
2612
34.1k
    }
2613
2614
34.1k
    if (view.obj != NULL) {
2615
0
       PyBuffer_Release(&view);
2616
0
    }
2617
34.1k
    return _PyBytesWriter_Finish(&writer, buf);
2618
2619
0
  error:
2620
0
    if (invalid_char == -1) {
2621
0
        PyErr_SetString(PyExc_ValueError,
2622
0
                        "fromhex() arg must contain an even number of hexadecimal digits");
2623
0
    } else {
2624
0
        PyErr_Format(PyExc_ValueError,
2625
0
                     "non-hexadecimal number found in "
2626
0
                     "fromhex() arg at position %zd", invalid_char);
2627
0
    }
2628
0
    _PyBytesWriter_Dealloc(&writer);
2629
2630
0
  release_buffer:
2631
0
    if (view.obj != NULL) {
2632
0
        PyBuffer_Release(&view);
2633
0
    }
2634
0
    return NULL;
2635
0
}
2636
2637
/*[clinic input]
2638
bytes.hex
2639
2640
    sep: object = NULL
2641
        An optional single character or byte to separate hex bytes.
2642
    bytes_per_sep: int = 1
2643
        How many bytes between separators.  Positive values count from the
2644
        right, negative values count from the left.
2645
2646
Create a string of hexadecimal numbers from a bytes object.
2647
2648
Example:
2649
>>> value = b'\xb9\x01\xef'
2650
>>> value.hex()
2651
'b901ef'
2652
>>> value.hex(':')
2653
'b9:01:ef'
2654
>>> value.hex(':', 2)
2655
'b9:01ef'
2656
>>> value.hex(':', -2)
2657
'b901:ef'
2658
[clinic start generated code]*/
2659
2660
static PyObject *
2661
bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2662
/*[clinic end generated code: output=1f134da504064139 input=1a21282b1f1ae595]*/
2663
0
{
2664
0
    const char *argbuf = PyBytes_AS_STRING(self);
2665
0
    Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2666
0
    return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2667
0
}
2668
2669
static PyObject *
2670
bytes_getnewargs(PyObject *op, PyObject *Py_UNUSED(dummy))
2671
0
{
2672
0
    PyBytesObject *v = _PyBytes_CAST(op);
2673
0
    return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2674
0
}
2675
2676
2677
static PyMethodDef
2678
bytes_methods[] = {
2679
    {"__getnewargs__", bytes_getnewargs,  METH_NOARGS},
2680
    BYTES___BYTES___METHODDEF
2681
    {"capitalize", stringlib_capitalize, METH_NOARGS,
2682
     _Py_capitalize__doc__},
2683
    STRINGLIB_CENTER_METHODDEF
2684
    BYTES_COUNT_METHODDEF
2685
    BYTES_DECODE_METHODDEF
2686
    BYTES_ENDSWITH_METHODDEF
2687
    STRINGLIB_EXPANDTABS_METHODDEF
2688
    BYTES_FIND_METHODDEF
2689
    BYTES_FROMHEX_METHODDEF
2690
    BYTES_HEX_METHODDEF
2691
    BYTES_INDEX_METHODDEF
2692
    {"isalnum", stringlib_isalnum, METH_NOARGS,
2693
     _Py_isalnum__doc__},
2694
    {"isalpha", stringlib_isalpha, METH_NOARGS,
2695
     _Py_isalpha__doc__},
2696
    {"isascii", stringlib_isascii, METH_NOARGS,
2697
     _Py_isascii__doc__},
2698
    {"isdigit", stringlib_isdigit, METH_NOARGS,
2699
     _Py_isdigit__doc__},
2700
    {"islower", stringlib_islower, METH_NOARGS,
2701
     _Py_islower__doc__},
2702
    {"isspace", stringlib_isspace, METH_NOARGS,
2703
     _Py_isspace__doc__},
2704
    {"istitle", stringlib_istitle, METH_NOARGS,
2705
     _Py_istitle__doc__},
2706
    {"isupper", stringlib_isupper, METH_NOARGS,
2707
     _Py_isupper__doc__},
2708
    BYTES_JOIN_METHODDEF
2709
    STRINGLIB_LJUST_METHODDEF
2710
    {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2711
    BYTES_LSTRIP_METHODDEF
2712
    BYTES_MAKETRANS_METHODDEF
2713
    BYTES_PARTITION_METHODDEF
2714
    BYTES_REPLACE_METHODDEF
2715
    BYTES_REMOVEPREFIX_METHODDEF
2716
    BYTES_REMOVESUFFIX_METHODDEF
2717
    BYTES_RFIND_METHODDEF
2718
    BYTES_RINDEX_METHODDEF
2719
    STRINGLIB_RJUST_METHODDEF
2720
    BYTES_RPARTITION_METHODDEF
2721
    BYTES_RSPLIT_METHODDEF
2722
    BYTES_RSTRIP_METHODDEF
2723
    BYTES_SPLIT_METHODDEF
2724
    BYTES_SPLITLINES_METHODDEF
2725
    BYTES_STARTSWITH_METHODDEF
2726
    BYTES_STRIP_METHODDEF
2727
    {"swapcase", stringlib_swapcase, METH_NOARGS,
2728
     _Py_swapcase__doc__},
2729
    {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2730
    BYTES_TRANSLATE_METHODDEF
2731
    {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2732
    STRINGLIB_ZFILL_METHODDEF
2733
    {NULL,     NULL}                         /* sentinel */
2734
};
2735
2736
static PyObject *
2737
bytes_mod(PyObject *self, PyObject *arg)
2738
0
{
2739
0
    if (!PyBytes_Check(self)) {
2740
0
        Py_RETURN_NOTIMPLEMENTED;
2741
0
    }
2742
0
    return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2743
0
                             arg, 0);
2744
0
}
2745
2746
static PyNumberMethods bytes_as_number = {
2747
    0,              /*nb_add*/
2748
    0,              /*nb_subtract*/
2749
    0,              /*nb_multiply*/
2750
    bytes_mod,      /*nb_remainder*/
2751
};
2752
2753
static PyObject *
2754
bytes_subtype_new(PyTypeObject *, PyObject *);
2755
2756
/*[clinic input]
2757
@classmethod
2758
bytes.__new__ as bytes_new
2759
2760
    source as x: object = NULL
2761
    encoding: str = NULL
2762
    errors: str = NULL
2763
2764
[clinic start generated code]*/
2765
2766
static PyObject *
2767
bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
2768
               const char *errors)
2769
/*[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]*/
2770
859k
{
2771
859k
    PyObject *bytes;
2772
859k
    PyObject *func;
2773
859k
    Py_ssize_t size;
2774
2775
859k
    if (x == NULL) {
2776
0
        if (encoding != NULL || errors != NULL) {
2777
0
            PyErr_SetString(PyExc_TypeError,
2778
0
                            encoding != NULL ?
2779
0
                            "encoding without a string argument" :
2780
0
                            "errors without a string argument");
2781
0
            return NULL;
2782
0
        }
2783
0
        bytes = PyBytes_FromStringAndSize(NULL, 0);
2784
0
    }
2785
859k
    else if (encoding != NULL) {
2786
        /* Encode via the codec registry */
2787
213k
        if (!PyUnicode_Check(x)) {
2788
0
            PyErr_SetString(PyExc_TypeError,
2789
0
                            "encoding without a string argument");
2790
0
            return NULL;
2791
0
        }
2792
213k
        bytes = PyUnicode_AsEncodedString(x, encoding, errors);
2793
213k
    }
2794
646k
    else if (errors != NULL) {
2795
0
        PyErr_SetString(PyExc_TypeError,
2796
0
                        PyUnicode_Check(x) ?
2797
0
                        "string argument without an encoding" :
2798
0
                        "errors without a string argument");
2799
0
        return NULL;
2800
0
    }
2801
    /* We'd like to call PyObject_Bytes here, but we need to check for an
2802
       integer argument before deferring to PyBytes_FromObject, something
2803
       PyObject_Bytes doesn't do. */
2804
646k
    else if ((func = _PyObject_LookupSpecial(x, &_Py_ID(__bytes__))) != NULL) {
2805
86.5k
        bytes = _PyObject_CallNoArgs(func);
2806
86.5k
        Py_DECREF(func);
2807
86.5k
        if (bytes == NULL)
2808
0
            return NULL;
2809
86.5k
        if (!PyBytes_Check(bytes)) {
2810
0
            PyErr_Format(PyExc_TypeError,
2811
0
                         "%T.__bytes__() must return a bytes, not %T",
2812
0
                         x, bytes);
2813
0
            Py_DECREF(bytes);
2814
0
            return NULL;
2815
0
        }
2816
86.5k
    }
2817
560k
    else if (PyErr_Occurred())
2818
0
        return NULL;
2819
560k
    else if (PyUnicode_Check(x)) {
2820
0
        PyErr_SetString(PyExc_TypeError,
2821
0
                        "string argument without an encoding");
2822
0
        return NULL;
2823
0
    }
2824
    /* Is it an integer? */
2825
560k
    else if (_PyIndex_Check(x)) {
2826
0
        size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2827
0
        if (size == -1 && PyErr_Occurred()) {
2828
0
            if (!PyErr_ExceptionMatches(PyExc_TypeError))
2829
0
                return NULL;
2830
0
            PyErr_Clear();  /* fall through */
2831
0
            bytes = PyBytes_FromObject(x);
2832
0
        }
2833
0
        else {
2834
0
            if (size < 0) {
2835
0
                PyErr_SetString(PyExc_ValueError, "negative count");
2836
0
                return NULL;
2837
0
            }
2838
0
            bytes = _PyBytes_FromSize(size, 1);
2839
0
        }
2840
0
    }
2841
560k
    else {
2842
560k
        bytes = PyBytes_FromObject(x);
2843
560k
    }
2844
2845
859k
    if (bytes != NULL && type != &PyBytes_Type) {
2846
0
        Py_SETREF(bytes, bytes_subtype_new(type, bytes));
2847
0
    }
2848
2849
859k
    return bytes;
2850
859k
}
2851
2852
static PyObject*
2853
_PyBytes_FromBuffer(PyObject *x)
2854
560k
{
2855
560k
    PyObject *new;
2856
560k
    Py_buffer view;
2857
2858
560k
    if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2859
0
        return NULL;
2860
2861
560k
    new = PyBytes_FromStringAndSize(NULL, view.len);
2862
560k
    if (!new)
2863
0
        goto fail;
2864
560k
    if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2865
560k
                &view, view.len, 'C') < 0)
2866
0
        goto fail;
2867
560k
    PyBuffer_Release(&view);
2868
560k
    return new;
2869
2870
0
fail:
2871
0
    Py_XDECREF(new);
2872
0
    PyBuffer_Release(&view);
2873
0
    return NULL;
2874
560k
}
2875
2876
static PyObject*
2877
_PyBytes_FromList(PyObject *x)
2878
0
{
2879
0
    Py_ssize_t i, size = PyList_GET_SIZE(x);
2880
0
    Py_ssize_t value;
2881
0
    char *str;
2882
0
    PyObject *item;
2883
0
    _PyBytesWriter writer;
2884
2885
0
    _PyBytesWriter_Init(&writer);
2886
0
    str = _PyBytesWriter_Alloc(&writer, size);
2887
0
    if (str == NULL)
2888
0
        return NULL;
2889
0
    writer.overallocate = 1;
2890
0
    size = writer.allocated;
2891
2892
0
    for (i = 0; i < PyList_GET_SIZE(x); i++) {
2893
0
        item = PyList_GET_ITEM(x, i);
2894
0
        Py_INCREF(item);
2895
0
        value = PyNumber_AsSsize_t(item, NULL);
2896
0
        Py_DECREF(item);
2897
0
        if (value == -1 && PyErr_Occurred())
2898
0
            goto error;
2899
2900
0
        if (value < 0 || value >= 256) {
2901
0
            PyErr_SetString(PyExc_ValueError,
2902
0
                            "bytes must be in range(0, 256)");
2903
0
            goto error;
2904
0
        }
2905
2906
0
        if (i >= size) {
2907
0
            str = _PyBytesWriter_Resize(&writer, str, size+1);
2908
0
            if (str == NULL)
2909
0
                return NULL;
2910
0
            size = writer.allocated;
2911
0
        }
2912
0
        *str++ = (char) value;
2913
0
    }
2914
0
    return _PyBytesWriter_Finish(&writer, str);
2915
2916
0
  error:
2917
0
    _PyBytesWriter_Dealloc(&writer);
2918
0
    return NULL;
2919
0
}
2920
2921
static PyObject*
2922
_PyBytes_FromTuple(PyObject *x)
2923
0
{
2924
0
    PyObject *bytes;
2925
0
    Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2926
0
    Py_ssize_t value;
2927
0
    char *str;
2928
0
    PyObject *item;
2929
2930
0
    bytes = PyBytes_FromStringAndSize(NULL, size);
2931
0
    if (bytes == NULL)
2932
0
        return NULL;
2933
0
    str = ((PyBytesObject *)bytes)->ob_sval;
2934
2935
0
    for (i = 0; i < size; i++) {
2936
0
        item = PyTuple_GET_ITEM(x, i);
2937
0
        value = PyNumber_AsSsize_t(item, NULL);
2938
0
        if (value == -1 && PyErr_Occurred())
2939
0
            goto error;
2940
2941
0
        if (value < 0 || value >= 256) {
2942
0
            PyErr_SetString(PyExc_ValueError,
2943
0
                            "bytes must be in range(0, 256)");
2944
0
            goto error;
2945
0
        }
2946
0
        *str++ = (char) value;
2947
0
    }
2948
0
    return bytes;
2949
2950
0
  error:
2951
0
    Py_DECREF(bytes);
2952
0
    return NULL;
2953
0
}
2954
2955
static PyObject *
2956
_PyBytes_FromIterator(PyObject *it, PyObject *x)
2957
138
{
2958
138
    char *str;
2959
138
    Py_ssize_t i, size;
2960
138
    _PyBytesWriter writer;
2961
2962
    /* For iterator version, create a bytes object and resize as needed */
2963
138
    size = PyObject_LengthHint(x, 64);
2964
138
    if (size == -1 && PyErr_Occurred())
2965
0
        return NULL;
2966
2967
138
    _PyBytesWriter_Init(&writer);
2968
138
    str = _PyBytesWriter_Alloc(&writer, size);
2969
138
    if (str == NULL)
2970
0
        return NULL;
2971
138
    writer.overallocate = 1;
2972
138
    size = writer.allocated;
2973
2974
    /* Run the iterator to exhaustion */
2975
1.06k
    for (i = 0; ; i++) {
2976
1.06k
        PyObject *item;
2977
1.06k
        Py_ssize_t value;
2978
2979
        /* Get the next item */
2980
1.06k
        item = PyIter_Next(it);
2981
1.06k
        if (item == NULL) {
2982
138
            if (PyErr_Occurred())
2983
0
                goto error;
2984
138
            break;
2985
138
        }
2986
2987
        /* Interpret it as an int (__index__) */
2988
924
        value = PyNumber_AsSsize_t(item, NULL);
2989
924
        Py_DECREF(item);
2990
924
        if (value == -1 && PyErr_Occurred())
2991
0
            goto error;
2992
2993
        /* Range check */
2994
924
        if (value < 0 || value >= 256) {
2995
0
            PyErr_SetString(PyExc_ValueError,
2996
0
                            "bytes must be in range(0, 256)");
2997
0
            goto error;
2998
0
        }
2999
3000
        /* Append the byte */
3001
924
        if (i >= size) {
3002
0
            str = _PyBytesWriter_Resize(&writer, str, size+1);
3003
0
            if (str == NULL)
3004
0
                return NULL;
3005
0
            size = writer.allocated;
3006
0
        }
3007
924
        *str++ = (char) value;
3008
924
    }
3009
3010
138
    return _PyBytesWriter_Finish(&writer, str);
3011
3012
0
  error:
3013
0
    _PyBytesWriter_Dealloc(&writer);
3014
0
    return NULL;
3015
138
}
3016
3017
PyObject *
3018
PyBytes_FromObject(PyObject *x)
3019
560k
{
3020
560k
    PyObject *it, *result;
3021
3022
560k
    if (x == NULL) {
3023
0
        PyErr_BadInternalCall();
3024
0
        return NULL;
3025
0
    }
3026
3027
560k
    if (PyBytes_CheckExact(x)) {
3028
0
        return Py_NewRef(x);
3029
0
    }
3030
3031
    /* Use the modern buffer interface */
3032
560k
    if (PyObject_CheckBuffer(x))
3033
560k
        return _PyBytes_FromBuffer(x);
3034
3035
138
    if (PyList_CheckExact(x))
3036
0
        return _PyBytes_FromList(x);
3037
3038
138
    if (PyTuple_CheckExact(x))
3039
0
        return _PyBytes_FromTuple(x);
3040
3041
138
    if (!PyUnicode_Check(x)) {
3042
138
        it = PyObject_GetIter(x);
3043
138
        if (it != NULL) {
3044
138
            result = _PyBytes_FromIterator(it, x);
3045
138
            Py_DECREF(it);
3046
138
            return result;
3047
138
        }
3048
0
        if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
3049
0
            return NULL;
3050
0
        }
3051
0
    }
3052
3053
0
    PyErr_Format(PyExc_TypeError,
3054
0
                 "cannot convert '%.200s' object to bytes",
3055
0
                 Py_TYPE(x)->tp_name);
3056
0
    return NULL;
3057
138
}
3058
3059
/* This allocator is needed for subclasses don't want to use __new__.
3060
 * See https://github.com/python/cpython/issues/91020#issuecomment-1096793239
3061
 *
3062
 * This allocator will be removed when ob_shash is removed.
3063
 */
3064
static PyObject *
3065
bytes_alloc(PyTypeObject *self, Py_ssize_t nitems)
3066
0
{
3067
0
    PyBytesObject *obj = (PyBytesObject*)PyType_GenericAlloc(self, nitems);
3068
0
    if (obj == NULL) {
3069
0
        return NULL;
3070
0
    }
3071
0
    set_ob_shash(obj, -1);
3072
0
    return (PyObject*)obj;
3073
0
}
3074
3075
static PyObject *
3076
bytes_subtype_new(PyTypeObject *type, PyObject *tmp)
3077
0
{
3078
0
    PyObject *pnew;
3079
0
    Py_ssize_t n;
3080
3081
0
    assert(PyType_IsSubtype(type, &PyBytes_Type));
3082
0
    assert(PyBytes_Check(tmp));
3083
0
    n = PyBytes_GET_SIZE(tmp);
3084
0
    pnew = type->tp_alloc(type, n);
3085
0
    if (pnew != NULL) {
3086
0
        memcpy(PyBytes_AS_STRING(pnew),
3087
0
                  PyBytes_AS_STRING(tmp), n+1);
3088
0
        set_ob_shash((PyBytesObject *)pnew,
3089
0
            get_ob_shash((PyBytesObject *)tmp));
3090
0
    }
3091
0
    return pnew;
3092
0
}
3093
3094
PyDoc_STRVAR(bytes_doc,
3095
"bytes(iterable_of_ints) -> bytes\n\
3096
bytes(string, encoding[, errors]) -> bytes\n\
3097
bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
3098
bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3099
bytes() -> empty bytes object\n\
3100
\n\
3101
Construct an immutable array of bytes from:\n\
3102
  - an iterable yielding integers in range(256)\n\
3103
  - a text string encoded using the specified encoding\n\
3104
  - any object implementing the buffer API.\n\
3105
  - an integer");
3106
3107
static PyObject *bytes_iter(PyObject *seq);
3108
3109
PyTypeObject PyBytes_Type = {
3110
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3111
    "bytes",
3112
    PyBytesObject_SIZE,
3113
    sizeof(char),
3114
    0,                                          /* tp_dealloc */
3115
    0,                                          /* tp_vectorcall_offset */
3116
    0,                                          /* tp_getattr */
3117
    0,                                          /* tp_setattr */
3118
    0,                                          /* tp_as_async */
3119
    bytes_repr,                                 /* tp_repr */
3120
    &bytes_as_number,                           /* tp_as_number */
3121
    &bytes_as_sequence,                         /* tp_as_sequence */
3122
    &bytes_as_mapping,                          /* tp_as_mapping */
3123
    bytes_hash,                                 /* tp_hash */
3124
    0,                                          /* tp_call */
3125
    bytes_str,                                  /* tp_str */
3126
    PyObject_GenericGetAttr,                    /* tp_getattro */
3127
    0,                                          /* tp_setattro */
3128
    &bytes_as_buffer,                           /* tp_as_buffer */
3129
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3130
        Py_TPFLAGS_BYTES_SUBCLASS |
3131
        _Py_TPFLAGS_MATCH_SELF,               /* tp_flags */
3132
    bytes_doc,                                  /* tp_doc */
3133
    0,                                          /* tp_traverse */
3134
    0,                                          /* tp_clear */
3135
    bytes_richcompare,                          /* tp_richcompare */
3136
    0,                                          /* tp_weaklistoffset */
3137
    bytes_iter,                                 /* tp_iter */
3138
    0,                                          /* tp_iternext */
3139
    bytes_methods,                              /* tp_methods */
3140
    0,                                          /* tp_members */
3141
    0,                                          /* tp_getset */
3142
    0,                                          /* tp_base */
3143
    0,                                          /* tp_dict */
3144
    0,                                          /* tp_descr_get */
3145
    0,                                          /* tp_descr_set */
3146
    0,                                          /* tp_dictoffset */
3147
    0,                                          /* tp_init */
3148
    bytes_alloc,                                /* tp_alloc */
3149
    bytes_new,                                  /* tp_new */
3150
    PyObject_Free,                              /* tp_free */
3151
    .tp_version_tag = _Py_TYPE_VERSION_BYTES,
3152
};
3153
3154
void
3155
PyBytes_Concat(PyObject **pv, PyObject *w)
3156
3.25k
{
3157
3.25k
    assert(pv != NULL);
3158
3.25k
    if (*pv == NULL)
3159
0
        return;
3160
3.25k
    if (w == NULL) {
3161
0
        Py_CLEAR(*pv);
3162
0
        return;
3163
0
    }
3164
3165
3.25k
    if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3166
        /* Only one reference, so we can resize in place */
3167
456
        Py_ssize_t oldsize;
3168
456
        Py_buffer wb;
3169
3170
456
        if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
3171
0
            PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3172
0
                         Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3173
0
            Py_CLEAR(*pv);
3174
0
            return;
3175
0
        }
3176
3177
456
        oldsize = PyBytes_GET_SIZE(*pv);
3178
456
        if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3179
0
            PyErr_NoMemory();
3180
0
            goto error;
3181
0
        }
3182
456
        if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3183
0
            goto error;
3184
3185
456
        memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3186
456
        PyBuffer_Release(&wb);
3187
456
        return;
3188
3189
0
      error:
3190
0
        PyBuffer_Release(&wb);
3191
0
        Py_CLEAR(*pv);
3192
0
        return;
3193
456
    }
3194
3195
2.80k
    else {
3196
        /* Multiple references, need to create new object */
3197
2.80k
        PyObject *v;
3198
2.80k
        v = bytes_concat(*pv, w);
3199
2.80k
        Py_SETREF(*pv, v);
3200
2.80k
    }
3201
3.25k
}
3202
3203
void
3204
PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
3205
0
{
3206
0
    PyBytes_Concat(pv, w);
3207
0
    Py_XDECREF(w);
3208
0
}
3209
3210
3211
/* The following function breaks the notion that bytes are immutable:
3212
   it changes the size of a bytes object.  You can think of it
3213
   as creating a new bytes object and destroying the old one, only
3214
   more efficiently.
3215
   Note that if there's not enough memory to resize the bytes object, the
3216
   original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
3217
   memory" exception is set, and -1 is returned.  Else (on success) 0 is
3218
   returned, and the value in *pv may or may not be the same as on input.
3219
   As always, an extra byte is allocated for a trailing \0 byte (newsize
3220
   does *not* include that), and a trailing \0 byte is stored.
3221
*/
3222
3223
int
3224
_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3225
872k
{
3226
872k
    PyObject *v;
3227
872k
    PyBytesObject *sv;
3228
872k
    v = *pv;
3229
872k
    if (!PyBytes_Check(v) || newsize < 0) {
3230
0
        *pv = 0;
3231
0
        Py_DECREF(v);
3232
0
        PyErr_BadInternalCall();
3233
0
        return -1;
3234
0
    }
3235
872k
    Py_ssize_t oldsize = PyBytes_GET_SIZE(v);
3236
872k
    if (oldsize == newsize) {
3237
        /* return early if newsize equals to v->ob_size */
3238
604k
        return 0;
3239
604k
    }
3240
268k
    if (oldsize == 0) {
3241
0
        *pv = _PyBytes_FromSize(newsize, 0);
3242
0
        Py_DECREF(v);
3243
0
        return (*pv == NULL) ? -1 : 0;
3244
0
    }
3245
268k
    if (newsize == 0) {
3246
5.51k
        *pv = bytes_get_empty();
3247
5.51k
        Py_DECREF(v);
3248
5.51k
        return 0;
3249
5.51k
    }
3250
262k
    if (Py_REFCNT(v) != 1) {
3251
0
        if (oldsize < newsize) {
3252
0
            *pv = _PyBytes_FromSize(newsize, 0);
3253
0
            if (*pv) {
3254
0
                memcpy(PyBytes_AS_STRING(*pv), PyBytes_AS_STRING(v), oldsize);
3255
0
            }
3256
0
        }
3257
0
        else {
3258
0
            *pv = PyBytes_FromStringAndSize(PyBytes_AS_STRING(v), newsize);
3259
0
        }
3260
0
        Py_DECREF(v);
3261
0
        return (*pv == NULL) ? -1 : 0;
3262
0
    }
3263
3264
#ifdef Py_TRACE_REFS
3265
    _Py_ForgetReference(v);
3266
#endif
3267
262k
    _PyReftracerTrack(v, PyRefTracer_DESTROY);
3268
262k
    *pv = (PyObject *)
3269
262k
        PyObject_Realloc(v, PyBytesObject_SIZE + newsize);
3270
262k
    if (*pv == NULL) {
3271
#ifdef Py_REF_DEBUG
3272
        _Py_DecRefTotal(_PyThreadState_GET());
3273
#endif
3274
0
        PyObject_Free(v);
3275
0
        PyErr_NoMemory();
3276
0
        return -1;
3277
0
    }
3278
262k
    _Py_NewReferenceNoTotal(*pv);
3279
262k
    sv = (PyBytesObject *) *pv;
3280
262k
    Py_SET_SIZE(sv, newsize);
3281
262k
    sv->ob_sval[newsize] = '\0';
3282
262k
    set_ob_shash(sv, -1);          /* invalidate cached hash value */
3283
262k
    return 0;
3284
262k
}
3285
3286
3287
/*********************** Bytes Iterator ****************************/
3288
3289
typedef struct {
3290
    PyObject_HEAD
3291
    Py_ssize_t it_index;
3292
    PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3293
} striterobject;
3294
3295
1.75k
#define _striterobject_CAST(op)  ((striterobject *)(op))
3296
3297
static void
3298
striter_dealloc(PyObject *op)
3299
52
{
3300
52
    striterobject *it = _striterobject_CAST(op);
3301
52
    _PyObject_GC_UNTRACK(it);
3302
52
    Py_XDECREF(it->it_seq);
3303
52
    PyObject_GC_Del(it);
3304
52
}
3305
3306
static int
3307
striter_traverse(PyObject *op, visitproc visit, void *arg)
3308
0
{
3309
0
    striterobject *it = _striterobject_CAST(op);
3310
0
    Py_VISIT(it->it_seq);
3311
0
    return 0;
3312
0
}
3313
3314
static PyObject *
3315
striter_next(PyObject *op)
3316
1.70k
{
3317
1.70k
    striterobject *it = _striterobject_CAST(op);
3318
1.70k
    PyBytesObject *seq;
3319
3320
1.70k
    assert(it != NULL);
3321
1.70k
    seq = it->it_seq;
3322
1.70k
    if (seq == NULL)
3323
0
        return NULL;
3324
1.70k
    assert(PyBytes_Check(seq));
3325
3326
1.70k
    if (it->it_index < PyBytes_GET_SIZE(seq)) {
3327
1.66k
        return _PyLong_FromUnsignedChar(
3328
1.66k
            (unsigned char)seq->ob_sval[it->it_index++]);
3329
1.66k
    }
3330
3331
36
    it->it_seq = NULL;
3332
36
    Py_DECREF(seq);
3333
36
    return NULL;
3334
1.70k
}
3335
3336
static PyObject *
3337
striter_len(PyObject *op, PyObject *Py_UNUSED(ignored))
3338
0
{
3339
0
    striterobject *it = _striterobject_CAST(op);
3340
0
    Py_ssize_t len = 0;
3341
0
    if (it->it_seq)
3342
0
        len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3343
0
    return PyLong_FromSsize_t(len);
3344
0
}
3345
3346
PyDoc_STRVAR(length_hint_doc,
3347
             "Private method returning an estimate of len(list(it)).");
3348
3349
static PyObject *
3350
striter_reduce(PyObject *op, PyObject *Py_UNUSED(ignored))
3351
0
{
3352
0
    PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));
3353
3354
    /* _PyEval_GetBuiltin can invoke arbitrary code,
3355
     * call must be before access of iterator pointers.
3356
     * see issue #101765 */
3357
0
    striterobject *it = _striterobject_CAST(op);
3358
0
    if (it->it_seq != NULL) {
3359
0
        return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
3360
0
    } else {
3361
0
        return Py_BuildValue("N(())", iter);
3362
0
    }
3363
0
}
3364
3365
PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3366
3367
static PyObject *
3368
striter_setstate(PyObject *op, PyObject *state)
3369
0
{
3370
0
    Py_ssize_t index = PyLong_AsSsize_t(state);
3371
0
    if (index == -1 && PyErr_Occurred())
3372
0
        return NULL;
3373
0
    striterobject *it = _striterobject_CAST(op);
3374
0
    if (it->it_seq != NULL) {
3375
0
        if (index < 0)
3376
0
            index = 0;
3377
0
        else if (index > PyBytes_GET_SIZE(it->it_seq))
3378
0
            index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3379
0
        it->it_index = index;
3380
0
    }
3381
0
    Py_RETURN_NONE;
3382
0
}
3383
3384
PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3385
3386
static PyMethodDef striter_methods[] = {
3387
    {"__length_hint__", striter_len, METH_NOARGS, length_hint_doc},
3388
    {"__reduce__",      striter_reduce, METH_NOARGS, reduce_doc},
3389
    {"__setstate__",    striter_setstate, METH_O, setstate_doc},
3390
    {NULL,              NULL}           /* sentinel */
3391
};
3392
3393
PyTypeObject PyBytesIter_Type = {
3394
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3395
    "bytes_iterator",                           /* tp_name */
3396
    sizeof(striterobject),                      /* tp_basicsize */
3397
    0,                                          /* tp_itemsize */
3398
    /* methods */
3399
    striter_dealloc,                            /* tp_dealloc */
3400
    0,                                          /* tp_vectorcall_offset */
3401
    0,                                          /* tp_getattr */
3402
    0,                                          /* tp_setattr */
3403
    0,                                          /* tp_as_async */
3404
    0,                                          /* tp_repr */
3405
    0,                                          /* tp_as_number */
3406
    0,                                          /* tp_as_sequence */
3407
    0,                                          /* tp_as_mapping */
3408
    0,                                          /* tp_hash */
3409
    0,                                          /* tp_call */
3410
    0,                                          /* tp_str */
3411
    PyObject_GenericGetAttr,                    /* tp_getattro */
3412
    0,                                          /* tp_setattro */
3413
    0,                                          /* tp_as_buffer */
3414
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3415
    0,                                          /* tp_doc */
3416
    striter_traverse,                           /* tp_traverse */
3417
    0,                                          /* tp_clear */
3418
    0,                                          /* tp_richcompare */
3419
    0,                                          /* tp_weaklistoffset */
3420
    PyObject_SelfIter,                          /* tp_iter */
3421
    striter_next,                               /* tp_iternext */
3422
    striter_methods,                            /* tp_methods */
3423
    0,
3424
};
3425
3426
static PyObject *
3427
bytes_iter(PyObject *seq)
3428
52
{
3429
52
    striterobject *it;
3430
3431
52
    if (!PyBytes_Check(seq)) {
3432
0
        PyErr_BadInternalCall();
3433
0
        return NULL;
3434
0
    }
3435
52
    it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3436
52
    if (it == NULL)
3437
0
        return NULL;
3438
52
    it->it_index = 0;
3439
52
    it->it_seq = (PyBytesObject *)Py_NewRef(seq);
3440
52
    _PyObject_GC_TRACK(it);
3441
52
    return (PyObject *)it;
3442
52
}
3443
3444
3445
/* _PyBytesWriter API */
3446
3447
#ifdef MS_WINDOWS
3448
   /* On Windows, overallocate by 50% is the best factor */
3449
#  define OVERALLOCATE_FACTOR 2
3450
#else
3451
   /* On Linux, overallocate by 25% is the best factor */
3452
0
#  define OVERALLOCATE_FACTOR 4
3453
#endif
3454
3455
void
3456
_PyBytesWriter_Init(_PyBytesWriter *writer)
3457
6.31M
{
3458
    /* Set all attributes before small_buffer to 0 */
3459
6.31M
    memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
3460
#ifndef NDEBUG
3461
    memset(writer->small_buffer, PYMEM_CLEANBYTE,
3462
           sizeof(writer->small_buffer));
3463
#endif
3464
6.31M
}
3465
3466
void
3467
_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3468
437k
{
3469
437k
    Py_CLEAR(writer->buffer);
3470
437k
}
3471
3472
Py_LOCAL_INLINE(char*)
3473
_PyBytesWriter_AsString(_PyBytesWriter *writer)
3474
6.30M
{
3475
6.30M
    if (writer->use_small_buffer) {
3476
5.92M
        assert(writer->buffer == NULL);
3477
5.92M
        return writer->small_buffer;
3478
5.92M
    }
3479
386k
    else if (writer->use_bytearray) {
3480
0
        assert(writer->buffer != NULL);
3481
0
        return PyByteArray_AS_STRING(writer->buffer);
3482
0
    }
3483
386k
    else {
3484
386k
        assert(writer->buffer != NULL);
3485
386k
        return PyBytes_AS_STRING(writer->buffer);
3486
386k
    }
3487
6.30M
}
3488
3489
Py_LOCAL_INLINE(Py_ssize_t)
3490
_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
3491
6.09M
{
3492
6.09M
    const char *start = _PyBytesWriter_AsString(writer);
3493
6.09M
    assert(str != NULL);
3494
6.09M
    assert(str >= start);
3495
6.09M
    assert(str - start <= writer->allocated);
3496
6.09M
    return str - start;
3497
6.09M
}
3498
3499
#ifndef NDEBUG
3500
Py_LOCAL_INLINE(int)
3501
_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3502
{
3503
    const char *start, *end;
3504
3505
    if (writer->use_small_buffer) {
3506
        assert(writer->buffer == NULL);
3507
    }
3508
    else {
3509
        assert(writer->buffer != NULL);
3510
        if (writer->use_bytearray)
3511
            assert(PyByteArray_CheckExact(writer->buffer));
3512
        else
3513
            assert(PyBytes_CheckExact(writer->buffer));
3514
        assert(Py_REFCNT(writer->buffer) == 1);
3515
    }
3516
3517
    if (writer->use_bytearray) {
3518
        /* bytearray has its own overallocation algorithm,
3519
           writer overallocation must be disabled */
3520
        assert(!writer->overallocate);
3521
    }
3522
3523
    assert(0 <= writer->allocated);
3524
    assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
3525
    /* the last byte must always be null */
3526
    start = _PyBytesWriter_AsString(writer);
3527
    assert(start[writer->allocated] == 0);
3528
3529
    end = start + writer->allocated;
3530
    assert(str != NULL);
3531
    assert(start <= str && str <= end);
3532
    return 1;
3533
}
3534
#endif
3535
3536
void*
3537
_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3538
214k
{
3539
214k
    Py_ssize_t allocated, pos;
3540
3541
214k
    assert(_PyBytesWriter_CheckConsistency(writer, str));
3542
214k
    assert(writer->allocated < size);
3543
3544
214k
    allocated = size;
3545
214k
    if (writer->overallocate
3546
214k
        && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3547
        /* overallocate to limit the number of realloc() */
3548
0
        allocated += allocated / OVERALLOCATE_FACTOR;
3549
0
    }
3550
3551
214k
    pos = _PyBytesWriter_GetSize(writer, str);
3552
214k
    if (!writer->use_small_buffer) {
3553
0
        if (writer->use_bytearray) {
3554
0
            if (PyByteArray_Resize(writer->buffer, allocated))
3555
0
                goto error;
3556
            /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3557
               but we cannot use ob_alloc because bytes may need to be moved
3558
               to use the whole buffer. bytearray uses an internal optimization
3559
               to avoid moving or copying bytes when bytes are removed at the
3560
               beginning (ex: del bytearray[:1]). */
3561
0
        }
3562
0
        else {
3563
0
            if (_PyBytes_Resize(&writer->buffer, allocated))
3564
0
                goto error;
3565
0
        }
3566
0
    }
3567
214k
    else {
3568
        /* convert from stack buffer to bytes object buffer */
3569
214k
        assert(writer->buffer == NULL);
3570
3571
214k
        if (writer->use_bytearray)
3572
0
            writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3573
214k
        else
3574
214k
            writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3575
214k
        if (writer->buffer == NULL)
3576
0
            goto error;
3577
3578
214k
        if (pos != 0) {
3579
0
            char *dest;
3580
0
            if (writer->use_bytearray)
3581
0
                dest = PyByteArray_AS_STRING(writer->buffer);
3582
0
            else
3583
0
                dest = PyBytes_AS_STRING(writer->buffer);
3584
0
            memcpy(dest,
3585
0
                      writer->small_buffer,
3586
0
                      pos);
3587
0
        }
3588
3589
214k
        writer->use_small_buffer = 0;
3590
#ifndef NDEBUG
3591
        memset(writer->small_buffer, PYMEM_CLEANBYTE,
3592
               sizeof(writer->small_buffer));
3593
#endif
3594
214k
    }
3595
214k
    writer->allocated = allocated;
3596
3597
214k
    str = _PyBytesWriter_AsString(writer) + pos;
3598
214k
    assert(_PyBytesWriter_CheckConsistency(writer, str));
3599
214k
    return str;
3600
3601
0
error:
3602
0
    _PyBytesWriter_Dealloc(writer);
3603
0
    return NULL;
3604
214k
}
3605
3606
void*
3607
_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3608
6.31M
{
3609
6.31M
    Py_ssize_t new_min_size;
3610
3611
6.31M
    assert(_PyBytesWriter_CheckConsistency(writer, str));
3612
6.31M
    assert(size >= 0);
3613
3614
6.31M
    if (size == 0) {
3615
        /* nothing to do */
3616
279
        return str;
3617
279
    }
3618
3619
6.31M
    if (writer->min_size > PY_SSIZE_T_MAX - size) {
3620
0
        PyErr_NoMemory();
3621
0
        _PyBytesWriter_Dealloc(writer);
3622
0
        return NULL;
3623
0
    }
3624
6.31M
    new_min_size = writer->min_size + size;
3625
3626
6.31M
    if (new_min_size > writer->allocated)
3627
214k
        str = _PyBytesWriter_Resize(writer, str, new_min_size);
3628
3629
6.31M
    writer->min_size = new_min_size;
3630
6.31M
    return str;
3631
6.31M
}
3632
3633
/* Allocate the buffer to write size bytes.
3634
   Return the pointer to the beginning of buffer data.
3635
   Raise an exception and return NULL on error. */
3636
void*
3637
_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3638
6.31M
{
3639
    /* ensure that _PyBytesWriter_Alloc() is only called once */
3640
6.31M
    assert(writer->min_size == 0 && writer->buffer == NULL);
3641
6.31M
    assert(size >= 0);
3642
3643
6.31M
    writer->use_small_buffer = 1;
3644
#ifndef NDEBUG
3645
    writer->allocated = sizeof(writer->small_buffer) - 1;
3646
    /* In debug mode, don't use the full small buffer because it is less
3647
       efficient than bytes and bytearray objects to detect buffer underflow
3648
       and buffer overflow. Use 10 bytes of the small buffer to test also
3649
       code using the smaller buffer in debug mode.
3650
3651
       Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3652
       in debug mode to also be able to detect stack overflow when running
3653
       tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3654
       if _Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3655
       stack overflow. */
3656
    writer->allocated = Py_MIN(writer->allocated, 10);
3657
    /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3658
       to detect buffer overflow */
3659
    writer->small_buffer[writer->allocated] = 0;
3660
#else
3661
6.31M
    writer->allocated = sizeof(writer->small_buffer);
3662
6.31M
#endif
3663
6.31M
    return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
3664
6.31M
}
3665
3666
PyObject *
3667
_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
3668
5.87M
{
3669
5.87M
    Py_ssize_t size;
3670
5.87M
    PyObject *result;
3671
3672
5.87M
    assert(_PyBytesWriter_CheckConsistency(writer, str));
3673
3674
5.87M
    size = _PyBytesWriter_GetSize(writer, str);
3675
5.87M
    if (size == 0 && !writer->use_bytearray) {
3676
1.54k
        Py_CLEAR(writer->buffer);
3677
        /* Get the empty byte string singleton */
3678
1.54k
        result = PyBytes_FromStringAndSize(NULL, 0);
3679
1.54k
    }
3680
5.87M
    else if (writer->use_small_buffer) {
3681
5.70M
        if (writer->use_bytearray) {
3682
0
            result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3683
0
        }
3684
5.70M
        else {
3685
5.70M
            result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3686
5.70M
        }
3687
5.70M
    }
3688
171k
    else {
3689
171k
        result = writer->buffer;
3690
171k
        writer->buffer = NULL;
3691
3692
171k
        if (size != writer->allocated) {
3693
171k
            if (writer->use_bytearray) {
3694
0
                if (PyByteArray_Resize(result, size)) {
3695
0
                    Py_DECREF(result);
3696
0
                    return NULL;
3697
0
                }
3698
0
            }
3699
171k
            else {
3700
171k
                if (_PyBytes_Resize(&result, size)) {
3701
0
                    assert(result == NULL);
3702
0
                    return NULL;
3703
0
                }
3704
171k
            }
3705
171k
        }
3706
171k
    }
3707
5.87M
    return result;
3708
5.87M
}
3709
3710
void*
3711
_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
3712
                          const void *bytes, Py_ssize_t size)
3713
0
{
3714
0
    char *str = (char *)ptr;
3715
3716
0
    str = _PyBytesWriter_Prepare(writer, str, size);
3717
0
    if (str == NULL)
3718
0
        return NULL;
3719
3720
0
    memcpy(str, bytes, size);
3721
0
    str += size;
3722
3723
0
    return str;
3724
0
}
3725
3726
3727
void
3728
_PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
3729
    const char* src, Py_ssize_t len_src)
3730
3.01k
{
3731
3.01k
    if (len_dest == 0) {
3732
0
        return;
3733
0
    }
3734
3.01k
    if (len_src == 1) {
3735
16
        memset(dest, src[0], len_dest);
3736
16
    }
3737
2.99k
    else {
3738
2.99k
        if (src != dest) {
3739
2.99k
            memcpy(dest, src, len_src);
3740
2.99k
        }
3741
2.99k
        Py_ssize_t copied = len_src;
3742
6.63k
        while (copied < len_dest) {
3743
3.64k
            Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied);
3744
3.64k
            memcpy(dest + copied, dest, bytes_to_copy);
3745
3.64k
            copied += bytes_to_copy;
3746
3.64k
        }
3747
2.99k
    }
3748
3.01k
}
3749