Coverage Report

Created: 2026-06-09 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/bytesobject.c
Line
Count
Source
1
/* bytes object implementation */
2
3
#include "Python.h"
4
#include "pycore_abstract.h"      // _PyIndex_Check()
5
#include "pycore_bytes_methods.h" // _Py_bytes_startswith()
6
#include "pycore_bytesobject.h"   // _PyBytes_Find(), _PyBytes_RepeatBuffer()
7
#include "pycore_call.h"          // _PyObject_CallNoArgs()
8
#include "pycore_ceval.h"         // _PyEval_GetBuiltin()
9
#include "pycore_format.h"        // F_LJUST
10
#include "pycore_freelist.h"      // _Py_FREELIST_FREE()
11
#include "pycore_global_objects.h"// _Py_GET_GLOBAL_OBJECT()
12
#include "pycore_initconfig.h"    // _PyStatus_OK()
13
#include "pycore_long.h"          // _PyLong_DigitValue
14
#include "pycore_list.h"          // _PyList_GetItemRef
15
#include "pycore_object.h"        // _PyObject_GC_TRACK
16
#include "pycore_pymem.h"         // PYMEM_CLEANBYTE
17
#include "pycore_strhex.h"        // _Py_strhex_with_sep()
18
#include "pycore_unicodeobject.h" // _PyUnicode_FormatLong()
19
20
#include <stddef.h>
21
22
/*[clinic input]
23
class bytes "PyBytesObject *" "&PyBytes_Type"
24
[clinic start generated code]*/
25
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
26
27
#include "clinic/bytesobject.c.h"
28
29
300M
#define PyBytesObject_SIZE _PyBytesObject_SIZE
30
31
/* Forward declaration */
32
static void* _PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer,
33
                                                   Py_ssize_t size, void *data);
34
static Py_ssize_t _PyBytesWriter_GetAllocated(PyBytesWriter *writer);
35
36
37
39.9M
#define CHARACTERS _Py_SINGLETON(bytes_characters)
38
#define CHARACTER(ch) \
39
39.9M
     ((PyBytesObject *)&(CHARACTERS[ch]));
40
8.00M
#define EMPTY (&_Py_SINGLETON(bytes_empty))
41
42
43
// Return a reference to the immortal empty bytes string singleton.
44
static inline PyObject* bytes_get_empty(void)
45
8.00M
{
46
8.00M
    PyObject *empty = &EMPTY->ob_base.ob_base;
47
8.00M
    assert(_Py_IsImmortal(empty));
48
8.00M
    return empty;
49
8.00M
}
50
51
52
static inline void
53
set_ob_shash(PyBytesObject *a, Py_hash_t hash)
54
198M
{
55
198M
_Py_COMP_DIAG_PUSH
56
198M
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
57
#ifdef Py_GIL_DISABLED
58
    _Py_atomic_store_ssize_relaxed(&a->ob_shash, hash);
59
#else
60
198M
    a->ob_shash = hash;
61
198M
#endif
62
198M
_Py_COMP_DIAG_POP
63
198M
}
64
65
static inline Py_hash_t
66
get_ob_shash(PyBytesObject *a)
67
79.0M
{
68
79.0M
_Py_COMP_DIAG_PUSH
69
79.0M
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
70
#ifdef Py_GIL_DISABLED
71
    return _Py_atomic_load_ssize_relaxed(&a->ob_shash);
72
#else
73
79.0M
    return a->ob_shash;
74
79.0M
#endif
75
79.0M
_Py_COMP_DIAG_POP
76
79.0M
}
77
78
79
/*
80
   For PyBytes_FromString(), the parameter 'str' points to a null-terminated
81
   string containing exactly 'size' bytes.
82
83
   For PyBytes_FromStringAndSize(), the parameter 'str' is
84
   either NULL or else points to a string containing at least 'size' bytes.
85
   For PyBytes_FromStringAndSize(), the string in the 'str' parameter does
86
   not have to be null-terminated.  (Therefore it is safe to construct a
87
   substring by calling 'PyBytes_FromStringAndSize(origstring, substrlen)'.)
88
   If 'str' is NULL then PyBytes_FromStringAndSize() will allocate 'size+1'
89
   bytes (setting the last byte to the null terminating character) and you can
90
   fill in the data yourself.  If 'str' is non-NULL then the resulting
91
   PyBytes object must be treated as immutable and you must not fill in nor
92
   alter the data yourself, since the strings may be shared.
93
94
   The PyObject member 'op->ob_size', which denotes the number of "extra
95
   items" in a variable-size object, will contain the number of bytes
96
   allocated for string data, not counting the null terminating character.
97
   It is therefore equal to the 'size' parameter (for
98
   PyBytes_FromStringAndSize()) or the length of the string in the 'str'
99
   parameter (for PyBytes_FromString()).
100
*/
101
static PyObject *
102
_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
103
147M
{
104
147M
    PyBytesObject *op;
105
147M
    assert(size >= 0);
106
107
147M
    if (size == 0) {
108
0
        return bytes_get_empty();
109
0
    }
110
111
147M
    if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
112
0
        PyErr_SetString(PyExc_OverflowError,
113
0
                        "byte string is too large");
114
0
        return NULL;
115
0
    }
116
117
    /* Inline PyObject_NewVar */
118
147M
    if (use_calloc)
119
0
        op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
120
147M
    else
121
147M
        op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
122
147M
    if (op == NULL) {
123
0
        return PyErr_NoMemory();
124
0
    }
125
147M
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
126
147M
    set_ob_shash(op, -1);
127
147M
    if (!use_calloc) {
128
147M
        op->ob_sval[size] = '\0';
129
147M
    }
130
147M
    return (PyObject *) op;
131
147M
}
132
133
PyObject *
134
PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
135
176M
{
136
176M
    PyBytesObject *op;
137
176M
    if (size < 0) {
138
0
        PyErr_SetString(PyExc_SystemError,
139
0
            "Negative size passed to PyBytes_FromStringAndSize");
140
0
        return NULL;
141
0
    }
142
176M
    if (size == 1 && str != NULL) {
143
39.9M
        op = CHARACTER(*str & 255);
144
39.9M
        assert(_Py_IsImmortal(op));
145
39.9M
        return (PyObject *)op;
146
39.9M
    }
147
136M
    if (size == 0) {
148
7.95M
        return bytes_get_empty();
149
7.95M
    }
150
151
128M
    op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
152
128M
    if (op == NULL)
153
0
        return NULL;
154
128M
    if (str == NULL)
155
10.1M
        return (PyObject *) op;
156
157
118M
    memcpy(op->ob_sval, str, size);
158
118M
    return (PyObject *) op;
159
128M
}
160
161
PyObject *
162
PyBytes_FromString(const char *str)
163
90
{
164
90
    size_t size;
165
90
    PyBytesObject *op;
166
167
90
    assert(str != NULL);
168
90
    size = strlen(str);
169
90
    if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
170
0
        PyErr_SetString(PyExc_OverflowError,
171
0
            "byte string is too long");
172
0
        return NULL;
173
0
    }
174
175
90
    if (size == 0) {
176
0
        return bytes_get_empty();
177
0
    }
178
90
    else if (size == 1) {
179
0
        op = CHARACTER(*str & 255);
180
0
        assert(_Py_IsImmortal(op));
181
0
        return (PyObject *)op;
182
0
    }
183
184
    /* Inline PyObject_NewVar */
185
90
    op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
186
90
    if (op == NULL) {
187
0
        return PyErr_NoMemory();
188
0
    }
189
90
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
190
90
    set_ob_shash(op, -1);
191
90
    memcpy(op->ob_sval, str, size+1);
192
90
    return (PyObject *) op;
193
90
}
194
195
196
static char*
197
bytes_fromformat(PyBytesWriter *writer, Py_ssize_t writer_pos,
198
                 const char *format, va_list vargs)
199
0
{
200
0
    const char *f;
201
0
    const char *p;
202
0
    Py_ssize_t prec;
203
0
    int longflag;
204
0
    int size_tflag;
205
    /* Longest 64-bit formatted numbers:
206
       - "18446744073709551615\0" (21 bytes)
207
       - "-9223372036854775808\0" (21 bytes)
208
       Decimal takes the most space (it isn't enough for octal.)
209
210
       Longest 64-bit pointer representation:
211
       "0xffffffffffffffff\0" (19 bytes). */
212
0
    char buffer[21];
213
214
0
    char *s = (char*)PyBytesWriter_GetData(writer) + writer_pos;
215
216
0
#define WRITE_BYTES_LEN(str, len_expr) \
217
0
    do { \
218
0
        size_t len = (len_expr); \
219
0
        s = PyBytesWriter_GrowAndUpdatePointer(writer, len, s); \
220
0
        if (s == NULL) { \
221
0
            goto error; \
222
0
        } \
223
0
        memcpy(s, (str), len); \
224
0
        s += len; \
225
0
    } while (0)
226
0
#define WRITE_BYTES(str) WRITE_BYTES_LEN(str, strlen(str))
227
228
0
    for (f = format; *f; f++) {
229
0
        if (*f != '%') {
230
0
            *s++ = *f;
231
0
            continue;
232
0
        }
233
234
0
        p = f++;
235
236
        /* ignore the width (ex: 10 in "%10s") */
237
0
        while (Py_ISDIGIT(*f))
238
0
            f++;
239
240
        /* parse the precision (ex: 10 in "%.10s") */
241
0
        prec = 0;
242
0
        if (*f == '.') {
243
0
            f++;
244
0
            for (; Py_ISDIGIT(*f); f++) {
245
0
                prec = (prec * 10) + (*f - '0');
246
0
            }
247
0
        }
248
249
0
        while (*f && *f != '%' && !Py_ISALPHA(*f))
250
0
            f++;
251
252
        /* handle the long flag ('l'), but only for %ld and %lu.
253
           others can be added when necessary. */
254
0
        longflag = 0;
255
0
        if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
256
0
            longflag = 1;
257
0
            ++f;
258
0
        }
259
260
        /* handle the size_t flag ('z'). */
261
0
        size_tflag = 0;
262
0
        if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
263
0
            size_tflag = 1;
264
0
            ++f;
265
0
        }
266
267
0
        switch (*f) {
268
0
        case 'c':
269
0
        {
270
0
            int c = va_arg(vargs, int);
271
0
            if (c < 0 || c > 255) {
272
0
                PyErr_SetString(PyExc_OverflowError,
273
0
                                "PyBytes_FromFormatV(): %c format "
274
0
                                "expects an integer in range [0; 255]");
275
0
                goto error;
276
0
            }
277
0
            *s++ = (unsigned char)c;
278
0
            break;
279
0
        }
280
281
0
        case 'd':
282
0
            if (longflag) {
283
0
                sprintf(buffer, "%ld", va_arg(vargs, long));
284
0
            }
285
0
            else if (size_tflag) {
286
0
                sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
287
0
            }
288
0
            else {
289
0
                sprintf(buffer, "%d", va_arg(vargs, int));
290
0
            }
291
0
            assert(strlen(buffer) < sizeof(buffer));
292
0
            WRITE_BYTES(buffer);
293
0
            break;
294
295
0
        case 'u':
296
0
            if (longflag) {
297
0
                sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
298
0
            }
299
0
            else if (size_tflag) {
300
0
                sprintf(buffer, "%zu", va_arg(vargs, size_t));
301
0
            }
302
0
            else {
303
0
                sprintf(buffer, "%u", va_arg(vargs, unsigned int));
304
0
            }
305
0
            assert(strlen(buffer) < sizeof(buffer));
306
0
            WRITE_BYTES(buffer);
307
0
            break;
308
309
0
        case 'i':
310
0
            sprintf(buffer, "%i", va_arg(vargs, int));
311
0
            assert(strlen(buffer) < sizeof(buffer));
312
0
            WRITE_BYTES(buffer);
313
0
            break;
314
315
0
        case 'x':
316
0
            sprintf(buffer, "%x", va_arg(vargs, int));
317
0
            assert(strlen(buffer) < sizeof(buffer));
318
0
            WRITE_BYTES(buffer);
319
0
            break;
320
321
0
        case 's':
322
0
        {
323
0
            Py_ssize_t i;
324
325
0
            p = va_arg(vargs, const char*);
326
0
            if (prec <= 0) {
327
0
                i = strlen(p);
328
0
            }
329
0
            else {
330
0
                i = 0;
331
0
                while (i < prec && p[i]) {
332
0
                    i++;
333
0
                }
334
0
            }
335
0
            WRITE_BYTES_LEN(p, i);
336
0
            break;
337
0
        }
338
339
0
        case 'p':
340
0
            sprintf(buffer, "%p", va_arg(vargs, void*));
341
0
            assert(strlen(buffer) < sizeof(buffer));
342
            /* %p is ill-defined:  ensure leading 0x. */
343
0
            if (buffer[1] == 'X')
344
0
                buffer[1] = 'x';
345
0
            else if (buffer[1] != 'x') {
346
0
                memmove(buffer+2, buffer, strlen(buffer)+1);
347
0
                buffer[0] = '0';
348
0
                buffer[1] = 'x';
349
0
            }
350
0
            WRITE_BYTES(buffer);
351
0
            break;
352
353
0
        case '%':
354
0
            *s++ = '%';
355
0
            break;
356
357
0
        default:
358
            /* invalid format string: copy unformatted string and exit */
359
0
            WRITE_BYTES(p);
360
0
            return s;
361
0
        }
362
0
    }
363
364
0
#undef WRITE_BYTES
365
0
#undef WRITE_BYTES_LEN
366
367
0
    return s;
368
369
0
 error:
370
0
    return NULL;
371
0
}
372
373
374
PyObject *
375
PyBytes_FromFormatV(const char *format, va_list vargs)
376
0
{
377
0
    Py_ssize_t alloc = strlen(format);
378
0
    PyBytesWriter *writer = PyBytesWriter_Create(alloc);
379
0
    if (writer == NULL) {
380
0
        return NULL;
381
0
    }
382
383
0
    char *s = bytes_fromformat(writer, 0, format, vargs);
384
0
    if (s == NULL) {
385
0
        PyBytesWriter_Discard(writer);
386
0
        return NULL;
387
0
    }
388
389
0
    return PyBytesWriter_FinishWithPointer(writer, s);
390
0
}
391
392
393
PyObject *
394
PyBytes_FromFormat(const char *format, ...)
395
0
{
396
0
    PyObject* ret;
397
0
    va_list vargs;
398
399
0
    va_start(vargs, format);
400
0
    ret = PyBytes_FromFormatV(format, vargs);
401
0
    va_end(vargs);
402
0
    return ret;
403
0
}
404
405
406
/* Helpers for formatstring */
407
408
0
#define FORMAT_ERROR(EXC, FMT, ...) do {                                    \
409
0
    if (key != NULL) {                                                      \
410
0
        PyErr_Format((EXC), "format argument %R: " FMT,                     \
411
0
                     key, __VA_ARGS__);                                     \
412
0
    }                                                                       \
413
0
    else if (argidx >= 0) {                                                 \
414
0
        PyErr_Format((EXC), "format argument %zd: " FMT,                    \
415
0
                     argidx, __VA_ARGS__);                                  \
416
0
    }                                                                       \
417
0
    else {                                                                  \
418
0
        PyErr_Format((EXC), "format argument: " FMT, __VA_ARGS__);          \
419
0
    }                                                                       \
420
0
} while (0)
421
422
Py_LOCAL_INLINE(PyObject *)
423
getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx, int allowone)
424
0
{
425
0
    Py_ssize_t argidx = *p_argidx;
426
0
    if (argidx < arglen) {
427
0
        (*p_argidx)++;
428
0
        if (arglen >= 0) {
429
0
            return PyTuple_GetItem(args, argidx);
430
0
        }
431
0
        else if (allowone) {
432
0
            return args;
433
0
        }
434
0
    }
435
0
    PyErr_Format(PyExc_TypeError,
436
0
                 "not enough arguments for format string (got %zd)",
437
0
                 arglen < 0 ? 1 : arglen);
438
0
    return NULL;
439
0
}
440
441
/* Returns a new reference to a PyBytes object, or NULL on failure. */
442
443
static char*
444
formatfloat(PyObject *v, Py_ssize_t argidx, PyObject *key,
445
            int flags, int prec, int type,
446
            PyObject **p_result, PyBytesWriter *writer, char *str)
447
0
{
448
0
    char *p;
449
0
    PyObject *result;
450
0
    double x;
451
0
    size_t len;
452
0
    int dtoa_flags = 0;
453
454
0
    x = PyFloat_AsDouble(v);
455
0
    if (x == -1.0 && PyErr_Occurred()) {
456
0
        if (PyErr_ExceptionMatches(PyExc_TypeError)) {
457
0
            FORMAT_ERROR(PyExc_TypeError,
458
0
                         "%%%c requires a real number, not %T",
459
0
                         type, v);
460
0
        }
461
0
        return NULL;
462
0
    }
463
464
0
    if (prec < 0)
465
0
        prec = 6;
466
467
0
    if (flags & F_ALT) {
468
0
        dtoa_flags |= Py_DTSF_ALT;
469
0
    }
470
0
    p = PyOS_double_to_string(x, type, prec, dtoa_flags, NULL);
471
472
0
    if (p == NULL)
473
0
        return NULL;
474
475
0
    len = strlen(p);
476
0
    if (writer != NULL) {
477
0
        str = PyBytesWriter_GrowAndUpdatePointer(writer, len, str);
478
0
        if (str == NULL) {
479
0
            PyMem_Free(p);
480
0
            return NULL;
481
0
        }
482
0
        memcpy(str, p, len);
483
0
        PyMem_Free(p);
484
0
        str += len;
485
0
        return str;
486
0
    }
487
488
0
    result = PyBytes_FromStringAndSize(p, len);
489
0
    PyMem_Free(p);
490
0
    *p_result = result;
491
0
    return result != NULL ? str : NULL;
492
0
}
493
494
static PyObject *
495
formatlong(PyObject *v, Py_ssize_t argidx, PyObject *key,
496
           int flags, int prec, int type)
497
0
{
498
0
    PyObject *result, *iobj;
499
0
    if (PyLong_Check(v))
500
0
        return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
501
0
    if (PyNumber_Check(v)) {
502
        /* make sure number is a type of integer for o, x, and X */
503
0
        if (type == 'o' || type == 'x' || type == 'X')
504
0
            iobj = _PyNumber_Index(v);
505
0
        else
506
0
            iobj = PyNumber_Long(v);
507
0
        if (iobj != NULL) {
508
0
            assert(PyLong_Check(iobj));
509
0
            result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
510
0
            Py_DECREF(iobj);
511
0
            return result;
512
0
        }
513
0
        if (!PyErr_ExceptionMatches(PyExc_TypeError))
514
0
            return NULL;
515
0
    }
516
0
    FORMAT_ERROR(PyExc_TypeError,
517
0
                 "%%%c requires %s, not %T",
518
0
                 type,
519
0
                 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
520
0
                                                             : "a real number",
521
0
                 v);
522
0
    return NULL;
523
0
}
524
525
static int
526
byte_converter(PyObject *arg, Py_ssize_t argidx, PyObject *key, char *p)
527
0
{
528
0
    if (PyBytes_Check(arg)) {
529
0
        if (PyBytes_GET_SIZE(arg) != 1) {
530
0
            FORMAT_ERROR(PyExc_TypeError,
531
0
                         "%%c requires an integer in range(256) or "
532
0
                         "a single byte, not a bytes object of length %zd",
533
0
                         PyBytes_GET_SIZE(arg));
534
0
            return 0;
535
0
        }
536
0
        *p = PyBytes_AS_STRING(arg)[0];
537
0
        return 1;
538
0
    }
539
0
    else if (PyByteArray_Check(arg)) {
540
0
        if (PyByteArray_GET_SIZE(arg) != 1) {
541
0
            FORMAT_ERROR(PyExc_TypeError,
542
0
                         "%%c requires an integer in range(256) or "
543
0
                         "a single byte, not a bytearray object of length %zd",
544
0
                         PyByteArray_GET_SIZE(arg));
545
0
            return 0;
546
0
        }
547
0
        *p = PyByteArray_AS_STRING(arg)[0];
548
0
        return 1;
549
0
    }
550
0
    else if (PyIndex_Check(arg)) {
551
0
        int overflow;
552
0
        long ival = PyLong_AsLongAndOverflow(arg, &overflow);
553
0
        if (ival == -1 && PyErr_Occurred()) {
554
0
            return 0;
555
0
        }
556
0
        if (!(0 <= ival && ival <= 255)) {
557
            /* this includes an overflow in converting to C long */
558
0
            FORMAT_ERROR(PyExc_OverflowError,
559
0
                         "%%c argument not in range(256)%s", "");
560
0
            return 0;
561
0
        }
562
0
        *p = (char)ival;
563
0
        return 1;
564
0
    }
565
0
    FORMAT_ERROR(PyExc_TypeError,
566
0
                 "%%c requires an integer in range(256) or "
567
0
                 "a single byte, not %T",
568
0
                 arg);
569
0
    return 0;
570
0
}
571
572
static PyObject *_PyBytes_FromBuffer(PyObject *x);
573
574
static PyObject *
575
format_obj(PyObject *v, Py_ssize_t argidx, PyObject *key,
576
           const char **pbuf, Py_ssize_t *plen)
577
0
{
578
0
    PyObject *func, *result;
579
    /* is it a bytes object? */
580
0
    if (PyBytes_Check(v)) {
581
0
        *pbuf = PyBytes_AS_STRING(v);
582
0
        *plen = PyBytes_GET_SIZE(v);
583
0
        return Py_NewRef(v);
584
0
    }
585
0
    if (PyByteArray_Check(v)) {
586
0
        *pbuf = PyByteArray_AS_STRING(v);
587
0
        *plen = PyByteArray_GET_SIZE(v);
588
0
        return Py_NewRef(v);
589
0
    }
590
    /* does it support __bytes__? */
591
0
    func = _PyObject_LookupSpecial(v, &_Py_ID(__bytes__));
592
0
    if (func != NULL) {
593
0
        result = _PyObject_CallNoArgs(func);
594
0
        Py_DECREF(func);
595
0
        if (result == NULL)
596
0
            return NULL;
597
0
        if (!PyBytes_Check(result)) {
598
0
            PyErr_Format(PyExc_TypeError,
599
0
                         "%T.__bytes__() must return a bytes, not %T",
600
0
                         v, result);
601
0
            Py_DECREF(result);
602
0
            return NULL;
603
0
        }
604
0
        *pbuf = PyBytes_AS_STRING(result);
605
0
        *plen = PyBytes_GET_SIZE(result);
606
0
        return result;
607
0
    }
608
    /* does it support buffer protocol? */
609
0
    if (PyObject_CheckBuffer(v)) {
610
        /* maybe we can avoid making a copy of the buffer object here? */
611
0
        result = _PyBytes_FromBuffer(v);
612
0
        if (result == NULL)
613
0
            return NULL;
614
0
        *pbuf = PyBytes_AS_STRING(result);
615
0
        *plen = PyBytes_GET_SIZE(result);
616
0
        return result;
617
0
    }
618
0
    FORMAT_ERROR(PyExc_TypeError,
619
0
                 "%%b requires a bytes-like object, "
620
0
                 "or an object that implements __bytes__, not %T",
621
0
                 v);
622
0
    return NULL;
623
0
}
624
625
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
626
627
PyObject *
628
_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
629
                  PyObject *args, int use_bytearray)
630
0
{
631
0
    const char *fmt;
632
0
    Py_ssize_t arglen, argidx;
633
0
    Py_ssize_t fmtcnt;
634
0
    int args_owned = 0;
635
0
    PyObject *dict = NULL;
636
0
    PyObject *key = NULL;
637
638
0
    if (args == NULL) {
639
0
        PyErr_BadInternalCall();
640
0
        return NULL;
641
0
    }
642
0
    fmt = format;
643
0
    fmtcnt = format_len;
644
645
0
    PyBytesWriter *writer;
646
0
    if (use_bytearray) {
647
0
        writer = _PyBytesWriter_CreateByteArray(fmtcnt);
648
0
    }
649
0
    else {
650
0
        writer = PyBytesWriter_Create(fmtcnt);
651
0
    }
652
0
    if (writer == NULL) {
653
0
        return NULL;
654
0
    }
655
0
    char *res = PyBytesWriter_GetData(writer);
656
657
0
    if (PyTuple_Check(args)) {
658
0
        arglen = PyTuple_GET_SIZE(args);
659
0
        argidx = 0;
660
0
    }
661
0
    else {
662
0
        arglen = -1;
663
0
        argidx = -2;
664
0
    }
665
0
    if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
666
0
        !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
667
0
        !PyByteArray_Check(args)) {
668
0
            dict = args;
669
0
    }
670
671
0
    while (--fmtcnt >= 0) {
672
0
        if (*fmt != '%') {
673
0
            Py_ssize_t len;
674
0
            char *pos;
675
676
0
            pos = (char *)memchr(fmt + 1, '%', fmtcnt);
677
0
            if (pos != NULL)
678
0
                len = pos - fmt;
679
0
            else
680
0
                len = fmtcnt + 1;
681
0
            assert(len != 0);
682
683
0
            memcpy(res, fmt, len);
684
0
            res += len;
685
0
            fmt += len;
686
0
            fmtcnt -= (len - 1);
687
0
        }
688
0
        else {
689
            /* Got a format specifier */
690
0
            int flags = 0;
691
0
            Py_ssize_t width = -1;
692
0
            int prec = -1;
693
0
            int c = '\0';
694
0
            int fill;
695
0
            PyObject *v = NULL;
696
0
            PyObject *temp = NULL;
697
0
            const char *pbuf = NULL;
698
0
            int sign;
699
0
            Py_ssize_t len = 0;
700
0
            char onechar; /* For byte_converter() */
701
0
            Py_ssize_t alloc;
702
703
0
            fmt++;
704
0
            if (*fmt == '%') {
705
0
                *res++ = '%';
706
0
                fmt++;
707
0
                fmtcnt--;
708
0
                continue;
709
0
            }
710
0
            Py_CLEAR(key);
711
0
            const char *fmtstart = fmt;
712
0
            if (*fmt == '(') {
713
0
                const char *keystart;
714
0
                Py_ssize_t keylen;
715
0
                int pcount = 1;
716
717
0
                if (dict == NULL) {
718
0
                    PyErr_Format(PyExc_TypeError,
719
0
                                 "format requires a mapping, not %T",
720
0
                                 args);
721
0
                    goto error;
722
0
                }
723
0
                ++fmt;
724
0
                --fmtcnt;
725
0
                keystart = fmt;
726
                /* Skip over balanced parentheses */
727
0
                while (pcount > 0 && --fmtcnt >= 0) {
728
0
                    if (*fmt == ')')
729
0
                        --pcount;
730
0
                    else if (*fmt == '(')
731
0
                        ++pcount;
732
0
                    fmt++;
733
0
                }
734
0
                keylen = fmt - keystart - 1;
735
0
                if (fmtcnt < 0 || pcount > 0) {
736
0
                    PyErr_Format(PyExc_ValueError,
737
0
                                 "stray %% or incomplete format key "
738
0
                                 "at position %zd",
739
0
                                 (Py_ssize_t)(fmtstart - format - 1));
740
0
                    goto error;
741
0
                }
742
0
                key = PyBytes_FromStringAndSize(keystart,
743
0
                                                 keylen);
744
0
                if (key == NULL)
745
0
                    goto error;
746
0
                if (args_owned) {
747
0
                    Py_DECREF(args);
748
0
                    args_owned = 0;
749
0
                }
750
0
                args = PyObject_GetItem(dict, key);
751
0
                if (args == NULL) {
752
0
                    goto error;
753
0
                }
754
0
                args_owned = 1;
755
0
                arglen = -3;
756
0
                argidx = -4;
757
0
            }
758
0
            else {
759
0
                if (arglen < -1) {
760
0
                    PyErr_Format(PyExc_ValueError,
761
0
                                 "format requires a parenthesised mapping key "
762
0
                                 "at position %zd",
763
0
                                 (Py_ssize_t)(fmtstart - format - 1));
764
0
                    goto error;
765
0
                }
766
0
            }
767
768
            /* Parse flags. Example: "%+i" => flags=F_SIGN. */
769
0
            while (--fmtcnt >= 0) {
770
0
                switch (c = *fmt++) {
771
0
                case '-': flags |= F_LJUST; continue;
772
0
                case '+': flags |= F_SIGN; continue;
773
0
                case ' ': flags |= F_BLANK; continue;
774
0
                case '#': flags |= F_ALT; continue;
775
0
                case '0': flags |= F_ZERO; continue;
776
0
                }
777
0
                break;
778
0
            }
779
780
            /* Parse width. Example: "%10s" => width=10 */
781
0
            if (c == '*') {
782
0
                if (arglen < -1) {
783
0
                    PyErr_Format(PyExc_ValueError,
784
0
                            "* cannot be used with a parenthesised mapping key "
785
0
                            "at position %zd",
786
0
                            (Py_ssize_t)(fmtstart - format - 1));
787
0
                    goto error;
788
0
                }
789
0
                v = getnextarg(args, arglen, &argidx, 0);
790
0
                if (v == NULL)
791
0
                    goto error;
792
0
                if (!PyLong_Check(v)) {
793
0
                    FORMAT_ERROR(PyExc_TypeError, "* requires int, not %T", v);
794
0
                    goto error;
795
0
                }
796
0
                width = PyLong_AsSsize_t(v);
797
0
                if (width == -1 && PyErr_Occurred()) {
798
0
                    if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
799
0
                        FORMAT_ERROR(PyExc_OverflowError,
800
0
                                     "too big for width%s", "");
801
0
                    }
802
0
                    goto error;
803
0
                }
804
0
                if (width < 0) {
805
0
                    flags |= F_LJUST;
806
0
                    width = -width;
807
0
                }
808
0
                if (--fmtcnt >= 0)
809
0
                    c = *fmt++;
810
0
            }
811
0
            else if (c >= 0 && Py_ISDIGIT(c)) {
812
0
                width = c - '0';
813
0
                while (--fmtcnt >= 0) {
814
0
                    c = Py_CHARMASK(*fmt++);
815
0
                    if (!Py_ISDIGIT(c))
816
0
                        break;
817
0
                    if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
818
0
                        PyErr_Format(PyExc_ValueError,
819
0
                                     "width too big at position %zd",
820
0
                                     (Py_ssize_t)(fmtstart - format - 1));
821
0
                        goto error;
822
0
                    }
823
0
                    width = width*10 + (c - '0');
824
0
                }
825
0
            }
826
827
            /* Parse precision. Example: "%.3f" => prec=3 */
828
0
            if (c == '.') {
829
0
                prec = 0;
830
0
                if (--fmtcnt >= 0)
831
0
                    c = *fmt++;
832
0
                if (c == '*') {
833
0
                    if (arglen < -1) {
834
0
                        PyErr_Format(PyExc_ValueError,
835
0
                                "* cannot be used with a parenthesised mapping key "
836
0
                                "at position %zd",
837
0
                                (Py_ssize_t)(fmtstart - format - 1));
838
0
                        goto error;
839
0
                    }
840
0
                    v = getnextarg(args, arglen, &argidx, 0);
841
0
                    if (v == NULL)
842
0
                        goto error;
843
0
                    if (!PyLong_Check(v)) {
844
0
                        FORMAT_ERROR(PyExc_TypeError,
845
0
                                     "* requires int, not %T", v);
846
0
                        goto error;
847
0
                    }
848
0
                    prec = PyLong_AsInt(v);
849
0
                    if (prec == -1 && PyErr_Occurred()) {
850
0
                        if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
851
0
                            FORMAT_ERROR(PyExc_OverflowError,
852
0
                                         "too big for precision%s", "");
853
0
                        }
854
0
                        goto error;
855
0
                    }
856
0
                    if (prec < 0)
857
0
                        prec = 0;
858
0
                    if (--fmtcnt >= 0)
859
0
                        c = *fmt++;
860
0
                }
861
0
                else if (c >= 0 && Py_ISDIGIT(c)) {
862
0
                    prec = c - '0';
863
0
                    while (--fmtcnt >= 0) {
864
0
                        c = Py_CHARMASK(*fmt++);
865
0
                        if (!Py_ISDIGIT(c))
866
0
                            break;
867
0
                        if (prec > (INT_MAX - ((int)c - '0')) / 10) {
868
0
                            PyErr_Format(PyExc_ValueError,
869
0
                                "precision too big at position %zd",
870
0
                                (Py_ssize_t)(fmtstart - format - 1));
871
0
                            goto error;
872
0
                        }
873
0
                        prec = prec*10 + (c - '0');
874
0
                    }
875
0
                }
876
0
            } /* prec */
877
0
            if (fmtcnt >= 0) {
878
0
                if (c == 'h' || c == 'l' || c == 'L') {
879
0
                    if (--fmtcnt >= 0)
880
0
                        c = *fmt++;
881
0
                }
882
0
            }
883
0
            if (fmtcnt < 0) {
884
0
                PyErr_Format(PyExc_ValueError,
885
0
                             "stray %% at position %zd",
886
0
                             (Py_ssize_t)(fmtstart - format - 1));
887
0
                goto error;
888
0
            }
889
0
            v = getnextarg(args, arglen, &argidx, 1);
890
0
            if (v == NULL)
891
0
                goto error;
892
893
0
            if (fmtcnt == 0) {
894
                /* last write: disable writer overallocation */
895
0
                writer->overallocate = 0;
896
0
            }
897
898
0
            sign = 0;
899
0
            fill = ' ';
900
0
            switch (c) {
901
0
            case 'r':
902
                // %r is only for 2/3 code; 3 only code should use %a
903
0
            case 'a':
904
0
                temp = PyObject_ASCII(v);
905
0
                if (temp == NULL)
906
0
                    goto error;
907
0
                assert(PyUnicode_IS_ASCII(temp));
908
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
909
0
                len = PyUnicode_GET_LENGTH(temp);
910
0
                if (prec >= 0 && len > prec)
911
0
                    len = prec;
912
0
                break;
913
914
0
            case 's':
915
                // %s is only for 2/3 code; 3 only code should use %b
916
0
            case 'b':
917
0
                temp = format_obj(v, argidx, key, &pbuf, &len);
918
0
                if (temp == NULL)
919
0
                    goto error;
920
0
                if (prec >= 0 && len > prec)
921
0
                    len = prec;
922
0
                break;
923
924
0
            case 'i':
925
0
            case 'd':
926
0
            case 'u':
927
0
            case 'o':
928
0
            case 'x':
929
0
            case 'X':
930
0
                if (PyLong_CheckExact(v)
931
0
                    && width == -1 && prec == -1
932
0
                    && !(flags & (F_SIGN | F_BLANK))
933
0
                    && c != 'X')
934
0
                {
935
                    /* Fast path */
936
0
                    int alternate = flags & F_ALT;
937
0
                    int base;
938
939
0
                    switch(c)
940
0
                    {
941
0
                        default:
942
0
                            Py_UNREACHABLE();
943
0
                        case 'd':
944
0
                        case 'i':
945
0
                        case 'u':
946
0
                            base = 10;
947
0
                            break;
948
0
                        case 'o':
949
0
                            base = 8;
950
0
                            break;
951
0
                        case 'x':
952
0
                        case 'X':
953
0
                            base = 16;
954
0
                            break;
955
0
                    }
956
957
                    /* Fast path */
958
0
                    res = _PyLong_FormatBytesWriter(writer, res,
959
0
                                                    v, base, alternate);
960
0
                    if (res == NULL)
961
0
                        goto error;
962
0
                    continue;
963
0
                }
964
965
0
                temp = formatlong(v, argidx, key, flags, prec, c);
966
0
                if (!temp)
967
0
                    goto error;
968
0
                assert(PyUnicode_IS_ASCII(temp));
969
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
970
0
                len = PyUnicode_GET_LENGTH(temp);
971
0
                sign = 1;
972
0
                if (flags & F_ZERO)
973
0
                    fill = '0';
974
0
                break;
975
976
0
            case 'e':
977
0
            case 'E':
978
0
            case 'f':
979
0
            case 'F':
980
0
            case 'g':
981
0
            case 'G':
982
0
                if (width == -1 && prec == -1
983
0
                    && !(flags & (F_SIGN | F_BLANK)))
984
0
                {
985
                    /* Fast path */
986
0
                    res = formatfloat(v, argidx, key, flags, prec, c, NULL, writer, res);
987
0
                    if (res == NULL)
988
0
                        goto error;
989
0
                    continue;
990
0
                }
991
992
0
                if (!formatfloat(v, argidx, key, flags, prec, c, &temp, NULL, res))
993
0
                    goto error;
994
0
                pbuf = PyBytes_AS_STRING(temp);
995
0
                len = PyBytes_GET_SIZE(temp);
996
0
                sign = 1;
997
0
                if (flags & F_ZERO)
998
0
                    fill = '0';
999
0
                break;
1000
1001
0
            case 'c':
1002
0
                pbuf = &onechar;
1003
0
                len = byte_converter(v, argidx, key, &onechar);
1004
0
                if (!len)
1005
0
                    goto error;
1006
0
                if (width == -1) {
1007
                    /* Fast path */
1008
0
                    *res++ = onechar;
1009
0
                    continue;
1010
0
                }
1011
0
                break;
1012
1013
0
            default:
1014
0
                if (Py_ISALPHA(c)) {
1015
0
                    PyErr_Format(PyExc_ValueError,
1016
0
                                 "unsupported format %%%c at position %zd",
1017
0
                                 c, (Py_ssize_t)(fmtstart - format - 1));
1018
0
                }
1019
0
                else if (c == '\'') {
1020
0
                    PyErr_Format(PyExc_ValueError,
1021
0
                                 "stray %% at position %zd or unexpected "
1022
0
                                 "format character \"'\" "
1023
0
                                 "at position %zd",
1024
0
                                 (Py_ssize_t)(fmtstart - format - 1),
1025
0
                                 (Py_ssize_t)(fmt - format - 1));
1026
0
                }
1027
0
                else if (c >= 32 && c < 127 && c != '\'') {
1028
0
                    PyErr_Format(PyExc_ValueError,
1029
0
                                 "stray %% at position %zd or unexpected "
1030
0
                                 "format character '%c' "
1031
0
                                 "at position %zd",
1032
0
                                 (Py_ssize_t)(fmtstart - format - 1),
1033
0
                                 c, (Py_ssize_t)(fmt - format - 1));
1034
0
                }
1035
0
                else {
1036
0
                    PyErr_Format(PyExc_ValueError,
1037
0
                                 "stray %% at position %zd or unexpected "
1038
0
                                 "format character with code 0x%02x "
1039
0
                                 "at position %zd",
1040
0
                                 (Py_ssize_t)(fmtstart - format - 1),
1041
0
                                 Py_CHARMASK(c),
1042
0
                                 (Py_ssize_t)(fmt - format - 1));
1043
0
                }
1044
0
                goto error;
1045
0
            }
1046
1047
0
            if (sign) {
1048
0
                if (*pbuf == '-' || *pbuf == '+') {
1049
0
                    sign = *pbuf++;
1050
0
                    len--;
1051
0
                }
1052
0
                else if (flags & F_SIGN)
1053
0
                    sign = '+';
1054
0
                else if (flags & F_BLANK)
1055
0
                    sign = ' ';
1056
0
                else
1057
0
                    sign = 0;
1058
0
            }
1059
0
            if (width < len)
1060
0
                width = len;
1061
1062
0
            alloc = width;
1063
0
            if (sign != 0 && len == width)
1064
0
                alloc++;
1065
            /* 2: size preallocated for %s */
1066
0
            if (alloc > 2) {
1067
0
                res = PyBytesWriter_GrowAndUpdatePointer(writer, alloc - 2, res);
1068
0
                if (res == NULL) {
1069
0
                    Py_XDECREF(temp);
1070
0
                    goto error;
1071
0
                }
1072
0
            }
1073
#ifndef NDEBUG
1074
            char *before = res;
1075
#endif
1076
1077
            /* Write the sign if needed */
1078
0
            if (sign) {
1079
0
                if (fill != ' ')
1080
0
                    *res++ = sign;
1081
0
                if (width > len)
1082
0
                    width--;
1083
0
            }
1084
1085
            /* Write the numeric prefix for "x", "X" and "o" formats
1086
               if the alternate form is used.
1087
               For example, write "0x" for the "%#x" format. */
1088
0
            if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1089
0
                assert(pbuf[0] == '0');
1090
0
                assert(pbuf[1] == c);
1091
0
                if (fill != ' ') {
1092
0
                    *res++ = *pbuf++;
1093
0
                    *res++ = *pbuf++;
1094
0
                }
1095
0
                width -= 2;
1096
0
                if (width < 0)
1097
0
                    width = 0;
1098
0
                len -= 2;
1099
0
            }
1100
1101
            /* Pad left with the fill character if needed */
1102
0
            if (width > len && !(flags & F_LJUST)) {
1103
0
                memset(res, fill, width - len);
1104
0
                res += (width - len);
1105
0
                width = len;
1106
0
            }
1107
1108
            /* If padding with spaces: write sign if needed and/or numeric
1109
               prefix if the alternate form is used */
1110
0
            if (fill == ' ') {
1111
0
                if (sign)
1112
0
                    *res++ = sign;
1113
0
                if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1114
0
                    assert(pbuf[0] == '0');
1115
0
                    assert(pbuf[1] == c);
1116
0
                    *res++ = *pbuf++;
1117
0
                    *res++ = *pbuf++;
1118
0
                }
1119
0
            }
1120
1121
            /* Copy bytes */
1122
0
            memcpy(res, pbuf, len);
1123
0
            res += len;
1124
1125
            /* Pad right with the fill character if needed */
1126
0
            if (width > len) {
1127
0
                memset(res, ' ', width - len);
1128
0
                res += (width - len);
1129
0
            }
1130
1131
0
            if (dict && (argidx < arglen)) {
1132
                // XXX: Never happens?
1133
0
                PyErr_SetString(PyExc_TypeError,
1134
0
                           "not all arguments converted during bytes formatting");
1135
0
                Py_XDECREF(temp);
1136
0
                goto error;
1137
0
            }
1138
0
            Py_XDECREF(temp);
1139
1140
#ifndef NDEBUG
1141
            /* check that we computed the exact size for this write */
1142
            assert((res - before) == alloc);
1143
#endif
1144
0
        } /* '%' */
1145
1146
        /* If overallocation was disabled, ensure that it was the last
1147
           write. Otherwise, we missed an optimization */
1148
0
        assert(writer->overallocate || fmtcnt == 0 || use_bytearray);
1149
0
    } /* until end */
1150
1151
0
    if (argidx < arglen && !dict) {
1152
0
        PyErr_Format(PyExc_TypeError,
1153
0
                     "not all arguments converted during bytes formatting "
1154
0
                     "(required %zd, got %zd)",
1155
0
                     arglen < 0 ? 0 : argidx,
1156
0
                     arglen < 0 ? 1 : arglen);
1157
0
        goto error;
1158
0
    }
1159
1160
0
    Py_XDECREF(key);
1161
0
    if (args_owned) {
1162
0
        Py_DECREF(args);
1163
0
    }
1164
0
    return PyBytesWriter_FinishWithPointer(writer, res);
1165
1166
0
 error:
1167
0
    Py_XDECREF(key);
1168
0
    PyBytesWriter_Discard(writer);
1169
0
    if (args_owned) {
1170
0
        Py_DECREF(args);
1171
0
    }
1172
0
    return NULL;
1173
0
}
1174
1175
/* Unescape a backslash-escaped string. */
1176
PyObject *_PyBytes_DecodeEscape2(const char *s,
1177
                                Py_ssize_t len,
1178
                                const char *errors,
1179
                                int *first_invalid_escape_char,
1180
                                const char **first_invalid_escape_ptr)
1181
2.18k
{
1182
2.18k
    PyBytesWriter *writer = PyBytesWriter_Create(len);
1183
2.18k
    if (writer == NULL) {
1184
0
        return NULL;
1185
0
    }
1186
2.18k
    char *p = PyBytesWriter_GetData(writer);
1187
1188
2.18k
    *first_invalid_escape_char = -1;
1189
2.18k
    *first_invalid_escape_ptr = NULL;
1190
1191
2.18k
    const char *end = s + len;
1192
58.8k
    while (s < end) {
1193
56.6k
        if (*s != '\\') {
1194
47.8k
            *p++ = *s++;
1195
47.8k
            continue;
1196
47.8k
        }
1197
1198
8.85k
        s++;
1199
8.85k
        if (s == end) {
1200
0
            PyErr_SetString(PyExc_ValueError,
1201
0
                            "Trailing \\ in string");
1202
0
            goto failed;
1203
0
        }
1204
1205
8.85k
        switch (*s++) {
1206
        /* XXX This assumes ASCII! */
1207
724
        case '\n': break;
1208
628
        case '\\': *p++ = '\\'; break;
1209
257
        case '\'': *p++ = '\''; break;
1210
333
        case '\"': *p++ = '\"'; break;
1211
208
        case 'b': *p++ = '\b'; break;
1212
162
        case 'f': *p++ = '\014'; break; /* FF */
1213
203
        case 't': *p++ = '\t'; break;
1214
338
        case 'n': *p++ = '\n'; break;
1215
321
        case 'r': *p++ = '\r'; break;
1216
216
        case 'v': *p++ = '\013'; break; /* VT */
1217
203
        case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1218
2.01k
        case '0': case '1': case '2': case '3':
1219
3.91k
        case '4': case '5': case '6': case '7':
1220
3.91k
        {
1221
3.91k
            int c = s[-1] - '0';
1222
3.91k
            if (s < end && '0' <= *s && *s <= '7') {
1223
1.79k
                c = (c<<3) + *s++ - '0';
1224
1.79k
                if (s < end && '0' <= *s && *s <= '7')
1225
710
                    c = (c<<3) + *s++ - '0';
1226
1.79k
            }
1227
3.91k
            if (c > 0377) {
1228
642
                if (*first_invalid_escape_char == -1) {
1229
274
                    *first_invalid_escape_char = c;
1230
                    /* Back up 3 chars, since we've already incremented s. */
1231
274
                    *first_invalid_escape_ptr = s - 3;
1232
274
                }
1233
642
            }
1234
3.91k
            *p++ = c;
1235
3.91k
            break;
1236
3.75k
        }
1237
283
        case 'x':
1238
283
            if (s+1 < end) {
1239
282
                int digit1, digit2;
1240
282
                digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1241
282
                digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1242
282
                if (digit1 < 16 && digit2 < 16) {
1243
279
                    *p++ = (unsigned char)((digit1 << 4) + digit2);
1244
279
                    s += 2;
1245
279
                    break;
1246
279
                }
1247
282
            }
1248
            /* invalid hexadecimal digits */
1249
1250
4
            if (!errors || strcmp(errors, "strict") == 0) {
1251
4
                PyErr_Format(PyExc_ValueError,
1252
4
                             "invalid \\x escape at position %zd",
1253
4
                             s - 2 - (end - len));
1254
4
                goto failed;
1255
4
            }
1256
0
            if (strcmp(errors, "replace") == 0) {
1257
0
                *p++ = '?';
1258
0
            } else if (strcmp(errors, "ignore") == 0)
1259
0
                /* do nothing */;
1260
0
            else {
1261
0
                PyErr_Format(PyExc_ValueError,
1262
0
                             "decoding error; unknown "
1263
0
                             "error handling code: %.400s",
1264
0
                             errors);
1265
0
                goto failed;
1266
0
            }
1267
            /* skip \x */
1268
0
            if (s < end && Py_ISXDIGIT(s[0]))
1269
0
                s++; /* and a hexdigit */
1270
0
            break;
1271
1272
1.06k
        default:
1273
1.06k
            if (*first_invalid_escape_char == -1) {
1274
518
                *first_invalid_escape_char = (unsigned char)s[-1];
1275
                /* Back up one char, since we've already incremented s. */
1276
518
                *first_invalid_escape_ptr = s - 1;
1277
518
            }
1278
1.06k
            *p++ = '\\';
1279
1.06k
            s--;
1280
8.85k
        }
1281
8.85k
    }
1282
1283
2.17k
    return PyBytesWriter_FinishWithPointer(writer, p);
1284
1285
4
  failed:
1286
4
    PyBytesWriter_Discard(writer);
1287
4
    return NULL;
1288
2.18k
}
1289
1290
PyObject *PyBytes_DecodeEscape(const char *s,
1291
                                Py_ssize_t len,
1292
                                const char *errors,
1293
                                Py_ssize_t Py_UNUSED(unicode),
1294
                                const char *Py_UNUSED(recode_encoding))
1295
0
{
1296
0
    int first_invalid_escape_char;
1297
0
    const char *first_invalid_escape_ptr;
1298
0
    PyObject *result = _PyBytes_DecodeEscape2(s, len, errors,
1299
0
                                             &first_invalid_escape_char,
1300
0
                                             &first_invalid_escape_ptr);
1301
0
    if (result == NULL)
1302
0
        return NULL;
1303
0
    if (first_invalid_escape_char != -1) {
1304
0
        if (first_invalid_escape_char > 0xff) {
1305
0
            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1306
0
                                 "b\"\\%o\" is an invalid octal escape sequence. "
1307
0
                                 "Such sequences will not work in the future. ",
1308
0
                                 first_invalid_escape_char) < 0)
1309
0
            {
1310
0
                Py_DECREF(result);
1311
0
                return NULL;
1312
0
            }
1313
0
        }
1314
0
        else {
1315
0
            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1316
0
                                 "b\"\\%c\" is an invalid escape sequence. "
1317
0
                                 "Such sequences will not work in the future. ",
1318
0
                                 first_invalid_escape_char) < 0)
1319
0
            {
1320
0
                Py_DECREF(result);
1321
0
                return NULL;
1322
0
            }
1323
0
        }
1324
0
    }
1325
0
    return result;
1326
0
}
1327
/* -------------------------------------------------------------------- */
1328
/* object api */
1329
1330
Py_ssize_t
1331
PyBytes_Size(PyObject *op)
1332
5.13k
{
1333
5.13k
    if (!PyBytes_Check(op)) {
1334
0
        PyErr_Format(PyExc_TypeError,
1335
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1336
0
        return -1;
1337
0
    }
1338
5.13k
    return Py_SIZE(op);
1339
5.13k
}
1340
1341
char *
1342
PyBytes_AsString(PyObject *op)
1343
12.8M
{
1344
12.8M
    if (!PyBytes_Check(op)) {
1345
0
        PyErr_Format(PyExc_TypeError,
1346
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1347
0
        return NULL;
1348
0
    }
1349
12.8M
    return ((PyBytesObject *)op)->ob_sval;
1350
12.8M
}
1351
1352
int
1353
PyBytes_AsStringAndSize(PyObject *obj,
1354
                         char **s,
1355
                         Py_ssize_t *len)
1356
71.7k
{
1357
71.7k
    if (s == NULL) {
1358
0
        PyErr_BadInternalCall();
1359
0
        return -1;
1360
0
    }
1361
1362
71.7k
    if (!PyBytes_Check(obj)) {
1363
0
        PyErr_Format(PyExc_TypeError,
1364
0
             "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1365
0
        return -1;
1366
0
    }
1367
1368
71.7k
    *s = PyBytes_AS_STRING(obj);
1369
71.7k
    if (len != NULL)
1370
71.7k
        *len = PyBytes_GET_SIZE(obj);
1371
0
    else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1372
0
        PyErr_SetString(PyExc_ValueError,
1373
0
                        "embedded null byte");
1374
0
        return -1;
1375
0
    }
1376
71.7k
    return 0;
1377
71.7k
}
1378
1379
/* -------------------------------------------------------------------- */
1380
/* Methods */
1381
1382
1.81k
#define STRINGLIB_GET_EMPTY() bytes_get_empty()
1383
1384
#include "stringlib/stringdefs.h"
1385
#define STRINGLIB_MUTABLE 0
1386
1387
#include "stringlib/fastsearch.h"
1388
#include "stringlib/count.h"
1389
#include "stringlib/find.h"
1390
#include "stringlib/join.h"
1391
#include "stringlib/partition.h"
1392
#include "stringlib/split.h"
1393
#include "stringlib/ctype.h"
1394
1395
#include "stringlib/transmogrify.h"
1396
1397
#undef STRINGLIB_GET_EMPTY
1398
1399
Py_ssize_t
1400
_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
1401
              const char *needle, Py_ssize_t len_needle,
1402
              Py_ssize_t offset)
1403
0
{
1404
0
    assert(len_haystack >= 0);
1405
0
    assert(len_needle >= 0);
1406
    // Extra checks because stringlib_find accesses haystack[len_haystack].
1407
0
    if (len_needle == 0) {
1408
0
        return offset;
1409
0
    }
1410
0
    if (len_needle > len_haystack) {
1411
0
        return -1;
1412
0
    }
1413
0
    assert(len_haystack >= 1);
1414
0
    Py_ssize_t res = stringlib_find(haystack, len_haystack - 1,
1415
0
                                    needle, len_needle, offset);
1416
0
    if (res == -1) {
1417
0
        Py_ssize_t last_align = len_haystack - len_needle;
1418
0
        if (memcmp(haystack + last_align, needle, len_needle) == 0) {
1419
0
            return offset + last_align;
1420
0
        }
1421
0
    }
1422
0
    return res;
1423
0
}
1424
1425
Py_ssize_t
1426
_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
1427
                     const char *needle, Py_ssize_t len_needle,
1428
                     Py_ssize_t offset)
1429
0
{
1430
0
    return stringlib_rfind(haystack, len_haystack,
1431
0
                           needle, len_needle, offset);
1432
0
}
1433
1434
PyObject *
1435
PyBytes_Repr(PyObject *obj, int smartquotes)
1436
3.28k
{
1437
3.28k
    return _Py_bytes_repr(PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
1438
3.28k
                          smartquotes, "bytes");
1439
3.28k
}
1440
1441
PyObject *
1442
_Py_bytes_repr(const char *data, Py_ssize_t length, int smartquotes,
1443
               const char *classname)
1444
3.28k
{
1445
3.28k
    Py_ssize_t i;
1446
3.28k
    Py_ssize_t newsize, squotes, dquotes;
1447
3.28k
    PyObject *v;
1448
3.28k
    unsigned char quote;
1449
3.28k
    Py_UCS1 *p;
1450
1451
    /* Compute size of output string */
1452
3.28k
    squotes = dquotes = 0;
1453
3.28k
    newsize = 3; /* b'' */
1454
3.35M
    for (i = 0; i < length; i++) {
1455
3.35M
        unsigned char c = data[i];
1456
3.35M
        Py_ssize_t incr = 1;
1457
3.35M
        switch(c) {
1458
4.46k
        case '\'': squotes++; break;
1459
9.60k
        case '"':  dquotes++; break;
1460
33.2k
        case '\\': case '\t': case '\n': case '\r':
1461
33.2k
            incr = 2; break; /* \C */
1462
3.30M
        default:
1463
3.30M
            if (c < ' ' || c >= 0x7f)
1464
2.47M
                incr = 4; /* \xHH */
1465
3.35M
        }
1466
3.35M
        if (newsize > PY_SSIZE_T_MAX - incr)
1467
0
            goto overflow;
1468
3.35M
        newsize += incr;
1469
3.35M
    }
1470
3.28k
    quote = '\'';
1471
3.28k
    if (smartquotes && squotes && !dquotes)
1472
119
        quote = '"';
1473
3.28k
    if (squotes && quote == '\'') {
1474
231
        if (newsize > PY_SSIZE_T_MAX - squotes)
1475
0
            goto overflow;
1476
231
        newsize += squotes;
1477
231
    }
1478
1479
3.28k
    v = PyUnicode_New(newsize, 127);
1480
3.28k
    if (v == NULL) {
1481
0
        return NULL;
1482
0
    }
1483
3.28k
    p = PyUnicode_1BYTE_DATA(v);
1484
1485
3.28k
    *p++ = 'b', *p++ = quote;
1486
3.35M
    for (i = 0; i < length; i++) {
1487
3.35M
        unsigned char c = data[i];
1488
3.35M
        if (c == quote || c == '\\')
1489
4.74k
            *p++ = '\\', *p++ = c;
1490
3.35M
        else if (c == '\t')
1491
17.7k
            *p++ = '\\', *p++ = 't';
1492
3.33M
        else if (c == '\n')
1493
5.91k
            *p++ = '\\', *p++ = 'n';
1494
3.32M
        else if (c == '\r')
1495
7.50k
            *p++ = '\\', *p++ = 'r';
1496
3.31M
        else if (c < ' ' || c >= 0x7f) {
1497
2.47M
            *p++ = '\\';
1498
2.47M
            *p++ = 'x';
1499
2.47M
            *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1500
2.47M
            *p++ = Py_hexdigits[c & 0xf];
1501
2.47M
        }
1502
845k
        else
1503
845k
            *p++ = c;
1504
3.35M
    }
1505
3.28k
    *p++ = quote;
1506
3.28k
    assert(_PyUnicode_CheckConsistency(v, 1));
1507
3.28k
    return v;
1508
1509
0
  overflow:
1510
0
    PyErr_Format(PyExc_OverflowError,
1511
0
                 "%s object is too large to make repr", classname);
1512
0
    return NULL;
1513
3.28k
}
1514
1515
static PyObject *
1516
bytes_repr(PyObject *op)
1517
3.28k
{
1518
3.28k
    return PyBytes_Repr(op, 1);
1519
3.28k
}
1520
1521
static PyObject *
1522
bytes_str(PyObject *op)
1523
0
{
1524
0
    if (_Py_GetConfig()->bytes_warning) {
1525
0
        if (PyErr_WarnEx(PyExc_BytesWarning,
1526
0
                         "str() on a bytes instance", 1)) {
1527
0
            return NULL;
1528
0
        }
1529
0
    }
1530
0
    return bytes_repr(op);
1531
0
}
1532
1533
static Py_ssize_t
1534
bytes_length(PyObject *self)
1535
44.3M
{
1536
44.3M
    PyBytesObject *a = _PyBytes_CAST(self);
1537
44.3M
    return Py_SIZE(a);
1538
44.3M
}
1539
1540
/* This is also used by PyBytes_Concat() and the specializing interpreter. */
1541
PyObject *
1542
_PyBytes_Concat(PyObject *a, PyObject *b)
1543
944k
{
1544
944k
    Py_buffer va, vb;
1545
944k
    PyObject *result = NULL;
1546
1547
944k
    va.len = -1;
1548
944k
    vb.len = -1;
1549
944k
    if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1550
944k
        PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1551
0
        PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1552
0
                     Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1553
0
        goto done;
1554
0
    }
1555
1556
    /* Optimize end cases */
1557
944k
    if (va.len == 0 && PyBytes_CheckExact(b)) {
1558
135k
        result = Py_NewRef(b);
1559
135k
        goto done;
1560
135k
    }
1561
809k
    if (vb.len == 0 && PyBytes_CheckExact(a)) {
1562
72.0k
        result = Py_NewRef(a);
1563
72.0k
        goto done;
1564
72.0k
    }
1565
1566
737k
    if (va.len > PY_SSIZE_T_MAX - vb.len) {
1567
0
        PyErr_NoMemory();
1568
0
        goto done;
1569
0
    }
1570
1571
737k
    result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1572
737k
    if (result != NULL) {
1573
737k
        memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1574
737k
        memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1575
737k
    }
1576
1577
944k
  done:
1578
944k
    if (va.len != -1)
1579
944k
        PyBuffer_Release(&va);
1580
944k
    if (vb.len != -1)
1581
944k
        PyBuffer_Release(&vb);
1582
944k
    return result;
1583
737k
}
1584
1585
PyObject *
1586
_PyBytes_Repeat(PyObject *self, Py_ssize_t n)
1587
192k
{
1588
192k
    PyBytesObject *a = _PyBytes_CAST(self);
1589
192k
    if (n < 0)
1590
0
        n = 0;
1591
    /* watch out for overflows:  the size can overflow int,
1592
     * and the # of bytes needed can overflow size_t
1593
     */
1594
192k
    if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1595
0
        PyErr_SetString(PyExc_OverflowError,
1596
0
            "repeated bytes are too long");
1597
0
        return NULL;
1598
0
    }
1599
192k
    Py_ssize_t size = Py_SIZE(a) * n;
1600
192k
    if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1601
6
        return Py_NewRef(a);
1602
6
    }
1603
192k
    size_t nbytes = (size_t)size;
1604
192k
    if (nbytes + PyBytesObject_SIZE <= nbytes) {
1605
0
        PyErr_SetString(PyExc_OverflowError,
1606
0
            "repeated bytes are too long");
1607
0
        return NULL;
1608
0
    }
1609
192k
    PyBytesObject *op = PyObject_Malloc(PyBytesObject_SIZE + nbytes);
1610
192k
    if (op == NULL) {
1611
0
        return PyErr_NoMemory();
1612
0
    }
1613
192k
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
1614
192k
    set_ob_shash(op, -1);
1615
192k
    op->ob_sval[size] = '\0';
1616
1617
192k
    _PyBytes_RepeatBuffer(op->ob_sval, size, a->ob_sval, Py_SIZE(a));
1618
1619
192k
    return (PyObject *) op;
1620
192k
}
1621
1622
static int
1623
bytes_contains(PyObject *self, PyObject *arg)
1624
3.02k
{
1625
3.02k
    return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1626
3.02k
}
1627
1628
static PyObject *
1629
bytes_item(PyObject *self, Py_ssize_t i)
1630
0
{
1631
0
    PyBytesObject *a = _PyBytes_CAST(self);
1632
0
    if (i < 0 || i >= Py_SIZE(a)) {
1633
0
        PyErr_SetString(PyExc_IndexError, "index out of range");
1634
0
        return NULL;
1635
0
    }
1636
0
    return _PyLong_FromUnsignedChar((unsigned char)a->ob_sval[i]);
1637
0
}
1638
1639
static int
1640
bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1641
83.9M
{
1642
83.9M
    int cmp;
1643
83.9M
    Py_ssize_t len;
1644
1645
83.9M
    len = Py_SIZE(a);
1646
83.9M
    if (Py_SIZE(b) != len)
1647
802k
        return 0;
1648
1649
83.1M
    if (a->ob_sval[0] != b->ob_sval[0])
1650
11.9M
        return 0;
1651
1652
71.2M
    cmp = memcmp(a->ob_sval, b->ob_sval, len);
1653
71.2M
    return (cmp == 0);
1654
83.1M
}
1655
1656
static PyObject*
1657
bytes_richcompare(PyObject *aa, PyObject *bb, int op)
1658
84.3M
{
1659
    /* Make sure both arguments are strings. */
1660
84.3M
    if (!(PyBytes_Check(aa) && PyBytes_Check(bb))) {
1661
0
        if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1662
0
            if (PyUnicode_Check(aa) || PyUnicode_Check(bb)) {
1663
0
                if (PyErr_WarnEx(PyExc_BytesWarning,
1664
0
                                 "Comparison between bytes and string", 1))
1665
0
                    return NULL;
1666
0
            }
1667
0
            if (PyLong_Check(aa) || PyLong_Check(bb)) {
1668
0
                if (PyErr_WarnEx(PyExc_BytesWarning,
1669
0
                                 "Comparison between bytes and int", 1))
1670
0
                    return NULL;
1671
0
            }
1672
0
        }
1673
0
        Py_RETURN_NOTIMPLEMENTED;
1674
0
    }
1675
1676
84.3M
    PyBytesObject *a = _PyBytes_CAST(aa);
1677
84.3M
    PyBytesObject *b = _PyBytes_CAST(bb);
1678
84.3M
    if (a == b) {
1679
408k
        switch (op) {
1680
3.44k
        case Py_EQ:
1681
3.44k
        case Py_LE:
1682
3.44k
        case Py_GE:
1683
            /* a byte string is equal to itself */
1684
3.44k
            Py_RETURN_TRUE;
1685
404k
        case Py_NE:
1686
404k
        case Py_LT:
1687
404k
        case Py_GT:
1688
404k
            Py_RETURN_FALSE;
1689
0
        default:
1690
0
            PyErr_BadArgument();
1691
0
            return NULL;
1692
408k
        }
1693
408k
    }
1694
83.9M
    else if (op == Py_EQ || op == Py_NE) {
1695
83.9M
        int eq = bytes_compare_eq(a, b);
1696
83.9M
        eq ^= (op == Py_NE);
1697
83.9M
        return PyBool_FromLong(eq);
1698
83.9M
    }
1699
165
    else {
1700
165
        Py_ssize_t len_a = Py_SIZE(a);
1701
165
        Py_ssize_t len_b = Py_SIZE(b);
1702
165
        Py_ssize_t min_len = Py_MIN(len_a, len_b);
1703
165
        int c;
1704
165
        if (min_len > 0) {
1705
165
            c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1706
165
            if (c == 0)
1707
165
                c = memcmp(a->ob_sval, b->ob_sval, min_len);
1708
165
        }
1709
0
        else {
1710
0
            c = 0;
1711
0
        }
1712
165
        if (c != 0) {
1713
165
            Py_RETURN_RICHCOMPARE(c, 0, op);
1714
165
        }
1715
0
        Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1716
0
    }
1717
84.3M
}
1718
1719
static Py_hash_t
1720
bytes_hash(PyObject *self)
1721
79.0M
{
1722
79.0M
    PyBytesObject *a = _PyBytes_CAST(self);
1723
79.0M
    Py_hash_t hash = get_ob_shash(a);
1724
79.0M
    if (hash == -1) {
1725
        /* Can't fail */
1726
46.7M
        hash = Py_HashBuffer(a->ob_sval, Py_SIZE(a));
1727
46.7M
        set_ob_shash(a, hash);
1728
46.7M
    }
1729
79.0M
    return hash;
1730
79.0M
}
1731
1732
static PyObject*
1733
bytes_subscript(PyObject *op, PyObject* item)
1734
91.5M
{
1735
91.5M
    PyBytesObject *self = _PyBytes_CAST(op);
1736
91.5M
    if (_PyIndex_Check(item)) {
1737
20.6M
        Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1738
20.6M
        if (i == -1 && PyErr_Occurred())
1739
0
            return NULL;
1740
20.6M
        if (i < 0)
1741
0
            i += PyBytes_GET_SIZE(self);
1742
20.6M
        if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1743
100
            PyErr_SetString(PyExc_IndexError,
1744
100
                            "index out of range");
1745
100
            return NULL;
1746
100
        }
1747
20.6M
        return _PyLong_FromUnsignedChar((unsigned char)self->ob_sval[i]);
1748
20.6M
    }
1749
70.8M
    else if (PySlice_Check(item)) {
1750
70.8M
        Py_ssize_t start, stop, step, slicelength, i;
1751
70.8M
        size_t cur;
1752
70.8M
        const char* source_buf;
1753
70.8M
        char* result_buf;
1754
70.8M
        PyObject* result;
1755
1756
70.8M
        if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1757
0
            return NULL;
1758
0
        }
1759
70.8M
        slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1760
70.8M
                                            &stop, step);
1761
1762
70.8M
        if (slicelength <= 0) {
1763
6.02M
            return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
1764
6.02M
        }
1765
64.8M
        else if (start == 0 && step == 1 &&
1766
9.60M
                 slicelength == PyBytes_GET_SIZE(self) &&
1767
158k
                 PyBytes_CheckExact(self)) {
1768
158k
            return Py_NewRef(self);
1769
158k
        }
1770
64.6M
        else if (step == 1) {
1771
64.6M
            return PyBytes_FromStringAndSize(
1772
64.6M
                PyBytes_AS_STRING(self) + start,
1773
64.6M
                slicelength);
1774
64.6M
        }
1775
0
        else {
1776
0
            source_buf = PyBytes_AS_STRING(self);
1777
0
            result = PyBytes_FromStringAndSize(NULL, slicelength);
1778
0
            if (result == NULL)
1779
0
                return NULL;
1780
1781
0
            result_buf = PyBytes_AS_STRING(result);
1782
0
            for (cur = start, i = 0; i < slicelength;
1783
0
                 cur += step, i++) {
1784
0
                result_buf[i] = source_buf[cur];
1785
0
            }
1786
1787
0
            return result;
1788
0
        }
1789
70.8M
    }
1790
0
    else {
1791
0
        PyErr_Format(PyExc_TypeError,
1792
0
                     "byte indices must be integers or slices, not %.200s",
1793
0
                     Py_TYPE(item)->tp_name);
1794
0
        return NULL;
1795
0
    }
1796
91.5M
}
1797
1798
static int
1799
bytes_buffer_getbuffer(PyObject *op, Py_buffer *view, int flags)
1800
85.1M
{
1801
85.1M
    PyBytesObject *self = _PyBytes_CAST(op);
1802
85.1M
    return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1803
85.1M
                             1, flags);
1804
85.1M
}
1805
1806
static PySequenceMethods bytes_as_sequence = {
1807
    bytes_length,       /*sq_length*/
1808
    _PyBytes_Concat,       /*sq_concat*/
1809
    _PyBytes_Repeat,    /*sq_repeat*/
1810
    bytes_item,         /*sq_item*/
1811
    0,                  /*sq_slice*/
1812
    0,                  /*sq_ass_item*/
1813
    0,                  /*sq_ass_slice*/
1814
    bytes_contains      /*sq_contains*/
1815
};
1816
1817
static PyMappingMethods bytes_as_mapping = {
1818
    bytes_length,
1819
    bytes_subscript,
1820
    0,
1821
};
1822
1823
static PyBufferProcs bytes_as_buffer = {
1824
    bytes_buffer_getbuffer,
1825
    NULL,
1826
};
1827
1828
1829
/*[clinic input]
1830
bytes.__bytes__
1831
Convert this value to exact type bytes.
1832
[clinic start generated code]*/
1833
1834
static PyObject *
1835
bytes___bytes___impl(PyBytesObject *self)
1836
/*[clinic end generated code: output=63a306a9bc0caac5 input=34ec5ddba98bd6bb]*/
1837
43.3k
{
1838
43.3k
    if (PyBytes_CheckExact(self)) {
1839
43.3k
        return Py_NewRef(self);
1840
43.3k
    }
1841
0
    else {
1842
0
        return PyBytes_FromStringAndSize(self->ob_sval, Py_SIZE(self));
1843
0
    }
1844
43.3k
}
1845
1846
1847
294
#define LEFTSTRIP 0
1848
588
#define RIGHTSTRIP 1
1849
0
#define BOTHSTRIP 2
1850
1851
/*[clinic input]
1852
@permit_long_summary
1853
bytes.split
1854
1855
    sep: object = None
1856
        The delimiter according which to split the bytes.
1857
        None (the default value) means split on ASCII whitespace
1858
        characters (space, tab, return, newline, formfeed, vertical tab).
1859
    maxsplit: Py_ssize_t = -1
1860
        Maximum number of splits to do.
1861
        -1 (the default value) means no limit.
1862
1863
Return a list of the sections in the bytes, using sep as the delimiter.
1864
[clinic start generated code]*/
1865
1866
static PyObject *
1867
bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1868
/*[clinic end generated code: output=52126b5844c1d8ef input=330ff95d92544b05]*/
1869
3.00M
{
1870
3.00M
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1871
3.00M
    const char *s = PyBytes_AS_STRING(self), *sub;
1872
3.00M
    Py_buffer vsub;
1873
3.00M
    PyObject *list;
1874
1875
3.00M
    if (maxsplit < 0)
1876
3.00M
        maxsplit = PY_SSIZE_T_MAX;
1877
3.00M
    if (sep == Py_None)
1878
0
        return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1879
3.00M
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1880
0
        return NULL;
1881
3.00M
    sub = vsub.buf;
1882
3.00M
    n = vsub.len;
1883
1884
3.00M
    list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1885
3.00M
    PyBuffer_Release(&vsub);
1886
3.00M
    return list;
1887
3.00M
}
1888
1889
/*[clinic input]
1890
bytes.partition
1891
1892
    sep: Py_buffer
1893
    /
1894
1895
Partition the bytes into three parts using the given separator.
1896
1897
This will search for the separator sep in the bytes.  If the
1898
separator is found, returns a 3-tuple containing the part before the
1899
separator, the separator itself, and the part after it.
1900
1901
If the separator is not found, returns a 3-tuple containing the
1902
original bytes object and two empty bytes objects.
1903
[clinic start generated code]*/
1904
1905
static PyObject *
1906
bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1907
/*[clinic end generated code: output=f532b392a17ff695 input=2e6e551ea4f8b95a]*/
1908
387k
{
1909
387k
    return stringlib_partition(
1910
387k
        (PyObject*) self,
1911
387k
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1912
387k
        sep->obj, (const char *)sep->buf, sep->len
1913
387k
        );
1914
387k
}
1915
1916
/*[clinic input]
1917
bytes.rpartition
1918
1919
    sep: Py_buffer
1920
    /
1921
1922
Partition the bytes into three parts using the given separator.
1923
1924
This will search for the separator sep in the bytes, starting at the
1925
end.  If the separator is found, returns a 3-tuple containing the
1926
part before the separator, the separator itself, and the part after
1927
it.
1928
1929
If the separator is not found, returns a 3-tuple containing two
1930
empty bytes objects and the original bytes object.
1931
[clinic start generated code]*/
1932
1933
static PyObject *
1934
bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1935
/*[clinic end generated code: output=191b114cbb028e50 input=f7d24f722a5470a4]*/
1936
0
{
1937
0
    return stringlib_rpartition(
1938
0
        (PyObject*) self,
1939
0
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1940
0
        sep->obj, (const char *)sep->buf, sep->len
1941
0
        );
1942
0
}
1943
1944
/*[clinic input]
1945
@permit_long_summary
1946
bytes.rsplit = bytes.split
1947
1948
Return a list of the sections in the bytes, using sep as the delimiter.
1949
1950
Splitting is done starting at the end of the bytes and working to
1951
the front.
1952
[clinic start generated code]*/
1953
1954
static PyObject *
1955
bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1956
/*[clinic end generated code: output=ba698d9ea01e1c8f input=ba9bee56285f43e4]*/
1957
0
{
1958
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1959
0
    const char *s = PyBytes_AS_STRING(self), *sub;
1960
0
    Py_buffer vsub;
1961
0
    PyObject *list;
1962
1963
0
    if (maxsplit < 0)
1964
0
        maxsplit = PY_SSIZE_T_MAX;
1965
0
    if (sep == Py_None)
1966
0
        return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1967
0
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1968
0
        return NULL;
1969
0
    sub = vsub.buf;
1970
0
    n = vsub.len;
1971
1972
0
    list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1973
0
    PyBuffer_Release(&vsub);
1974
0
    return list;
1975
0
}
1976
1977
1978
/*[clinic input]
1979
bytes.join
1980
1981
    iterable_of_bytes: object
1982
    /
1983
1984
Concatenate any number of bytes objects.
1985
1986
The bytes whose method is called is inserted in between each pair.
1987
1988
The result is returned as a new bytes object.
1989
1990
Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1991
[clinic start generated code]*/
1992
1993
static PyObject *
1994
bytes_join_impl(PyBytesObject *self, PyObject *iterable_of_bytes)
1995
/*[clinic end generated code: output=0687abb94d7d438e input=7fe377b95bd549d2]*/
1996
269k
{
1997
269k
    return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1998
269k
}
1999
2000
PyObject *
2001
PyBytes_Join(PyObject *sep, PyObject *iterable)
2002
35.4k
{
2003
35.4k
    if (sep == NULL) {
2004
0
        PyErr_BadInternalCall();
2005
0
        return NULL;
2006
0
    }
2007
35.4k
    if (!PyBytes_Check(sep)) {
2008
0
        PyErr_Format(PyExc_TypeError,
2009
0
                     "sep: expected bytes, got %T", sep);
2010
0
        return NULL;
2011
0
    }
2012
2013
35.4k
    return stringlib_bytes_join(sep, iterable);
2014
35.4k
}
2015
2016
/*[clinic input]
2017
@permit_long_summary
2018
@text_signature "($self, sub[, start[, end]], /)"
2019
bytes.find
2020
2021
    sub: object
2022
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2023
         Optional start position. Default: start of the bytes.
2024
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2025
         Optional stop position. Default: end of the bytes.
2026
    /
2027
2028
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2029
2030
Return -1 on failure.
2031
[clinic start generated code]*/
2032
2033
static PyObject *
2034
bytes_find_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2035
                Py_ssize_t end)
2036
/*[clinic end generated code: output=d5961a1c77b472a1 input=47d0929adafc6b0b]*/
2037
14.2M
{
2038
14.2M
    return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2039
14.2M
                          sub, start, end);
2040
14.2M
}
2041
2042
/*[clinic input]
2043
@permit_long_summary
2044
bytes.index = bytes.find
2045
2046
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2047
2048
Raise ValueError if the subsection is not found.
2049
[clinic start generated code]*/
2050
2051
static PyObject *
2052
bytes_index_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2053
                 Py_ssize_t end)
2054
/*[clinic end generated code: output=0da25cc74683ba42 input=1cb45ce71456a269]*/
2055
0
{
2056
0
    return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2057
0
                           sub, start, end);
2058
0
}
2059
2060
/*[clinic input]
2061
@permit_long_summary
2062
bytes.rfind = bytes.find
2063
2064
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2065
2066
Return -1 on failure.
2067
[clinic start generated code]*/
2068
2069
static PyObject *
2070
bytes_rfind_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2071
                 Py_ssize_t end)
2072
/*[clinic end generated code: output=51b60fa4ad011c09 input=c9473d714251f1ab]*/
2073
278k
{
2074
278k
    return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2075
278k
                           sub, start, end);
2076
278k
}
2077
2078
/*[clinic input]
2079
@permit_long_summary
2080
bytes.rindex = bytes.find
2081
2082
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2083
2084
Raise ValueError if the subsection is not found.
2085
[clinic start generated code]*/
2086
2087
static PyObject *
2088
bytes_rindex_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2089
                  Py_ssize_t end)
2090
/*[clinic end generated code: output=42bf674e0a0aabf6 input=bb5f473c64610c43]*/
2091
0
{
2092
0
    return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2093
0
                            sub, start, end);
2094
0
}
2095
2096
2097
Py_LOCAL_INLINE(PyObject *)
2098
do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
2099
294
{
2100
294
    Py_buffer vsep;
2101
294
    const char *s = PyBytes_AS_STRING(self);
2102
294
    Py_ssize_t len = PyBytes_GET_SIZE(self);
2103
294
    char *sep;
2104
294
    Py_ssize_t seplen;
2105
294
    Py_ssize_t i, j;
2106
2107
294
    if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
2108
0
        return NULL;
2109
294
    sep = vsep.buf;
2110
294
    seplen = vsep.len;
2111
2112
294
    i = 0;
2113
294
    if (striptype != RIGHTSTRIP) {
2114
0
        while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2115
0
            i++;
2116
0
        }
2117
0
    }
2118
2119
294
    j = len;
2120
294
    if (striptype != LEFTSTRIP) {
2121
588
        do {
2122
588
            j--;
2123
588
        } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2124
294
        j++;
2125
294
    }
2126
2127
294
    PyBuffer_Release(&vsep);
2128
2129
294
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2130
0
        return Py_NewRef(self);
2131
0
    }
2132
294
    else
2133
294
        return PyBytes_FromStringAndSize(s+i, j-i);
2134
294
}
2135
2136
2137
Py_LOCAL_INLINE(PyObject *)
2138
do_strip(PyBytesObject *self, int striptype)
2139
0
{
2140
0
    const char *s = PyBytes_AS_STRING(self);
2141
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
2142
2143
0
    i = 0;
2144
0
    if (striptype != RIGHTSTRIP) {
2145
0
        while (i < len && Py_ISSPACE(s[i])) {
2146
0
            i++;
2147
0
        }
2148
0
    }
2149
2150
0
    j = len;
2151
0
    if (striptype != LEFTSTRIP) {
2152
0
        do {
2153
0
            j--;
2154
0
        } while (j >= i && Py_ISSPACE(s[j]));
2155
0
        j++;
2156
0
    }
2157
2158
0
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2159
0
        return Py_NewRef(self);
2160
0
    }
2161
0
    else
2162
0
        return PyBytes_FromStringAndSize(s+i, j-i);
2163
0
}
2164
2165
2166
Py_LOCAL_INLINE(PyObject *)
2167
do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
2168
294
{
2169
294
    if (bytes != Py_None) {
2170
294
        return do_xstrip(self, striptype, bytes);
2171
294
    }
2172
0
    return do_strip(self, striptype);
2173
294
}
2174
2175
/*[clinic input]
2176
bytes.strip
2177
2178
    bytes: object = None
2179
    /
2180
2181
Strip leading and trailing bytes contained in the argument.
2182
2183
If the argument is omitted or None, strip leading and trailing ASCII
2184
whitespace.
2185
[clinic start generated code]*/
2186
2187
static PyObject *
2188
bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
2189
/*[clinic end generated code: output=c7c228d3bd104a1b input=9ffea5f752032bd0]*/
2190
0
{
2191
0
    return do_argstrip(self, BOTHSTRIP, bytes);
2192
0
}
2193
2194
/*[clinic input]
2195
bytes.lstrip
2196
2197
    bytes: object = None
2198
    /
2199
2200
Strip leading bytes contained in the argument.
2201
2202
If the argument is omitted or None, strip leading  ASCII whitespace.
2203
[clinic start generated code]*/
2204
2205
static PyObject *
2206
bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2207
/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2208
0
{
2209
0
    return do_argstrip(self, LEFTSTRIP, bytes);
2210
0
}
2211
2212
/*[clinic input]
2213
bytes.rstrip
2214
2215
    bytes: object = None
2216
    /
2217
2218
Strip trailing bytes contained in the argument.
2219
2220
If the argument is omitted or None, strip trailing ASCII whitespace.
2221
[clinic start generated code]*/
2222
2223
static PyObject *
2224
bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2225
/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2226
294
{
2227
294
    return do_argstrip(self, RIGHTSTRIP, bytes);
2228
294
}
2229
2230
2231
/*[clinic input]
2232
@permit_long_summary
2233
bytes.count = bytes.find
2234
2235
Return the number of non-overlapping occurrences of subsection 'sub' in bytes B[start:end].
2236
[clinic start generated code]*/
2237
2238
static PyObject *
2239
bytes_count_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2240
                 Py_ssize_t end)
2241
/*[clinic end generated code: output=9848140b9be17d0f input=bb2f136f83f0d30e]*/
2242
5.94M
{
2243
5.94M
    return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2244
5.94M
                           sub, start, end);
2245
5.94M
}
2246
2247
2248
/*[clinic input]
2249
@permit_long_summary
2250
bytes.translate
2251
2252
    table: object
2253
        Translation table, which must be a bytes object of length 256.
2254
    /
2255
    delete as deletechars: object(c_default="NULL") = b''
2256
2257
Return a copy with each character mapped by the given translation table.
2258
2259
All characters occurring in the optional argument delete are
2260
removed.  The remaining characters are mapped through the given
2261
translation table.
2262
[clinic start generated code]*/
2263
2264
static PyObject *
2265
bytes_translate_impl(PyBytesObject *self, PyObject *table,
2266
                     PyObject *deletechars)
2267
/*[clinic end generated code: output=43be3437f1956211 input=bddcdef0a87895d2]*/
2268
0
{
2269
0
    const char *input;
2270
0
    char *output;
2271
0
    Py_buffer table_view = {NULL, NULL};
2272
0
    Py_buffer del_table_view = {NULL, NULL};
2273
0
    const char *table_chars;
2274
0
    Py_ssize_t i, c, changed = 0;
2275
0
    PyObject *input_obj = (PyObject*)self;
2276
0
    const char *output_start, *del_table_chars=NULL;
2277
0
    Py_ssize_t inlen, tablen, dellen = 0;
2278
0
    PyObject *result;
2279
0
    int trans_table[256];
2280
2281
0
    if (PyBytes_Check(table)) {
2282
0
        table_chars = PyBytes_AS_STRING(table);
2283
0
        tablen = PyBytes_GET_SIZE(table);
2284
0
    }
2285
0
    else if (table == Py_None) {
2286
0
        table_chars = NULL;
2287
0
        tablen = 256;
2288
0
    }
2289
0
    else {
2290
0
        if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2291
0
            return NULL;
2292
0
        table_chars = table_view.buf;
2293
0
        tablen = table_view.len;
2294
0
    }
2295
2296
0
    if (tablen != 256) {
2297
0
        PyErr_SetString(PyExc_ValueError,
2298
0
          "translation table must be 256 characters long");
2299
0
        PyBuffer_Release(&table_view);
2300
0
        return NULL;
2301
0
    }
2302
2303
0
    if (deletechars != NULL) {
2304
0
        if (PyBytes_Check(deletechars)) {
2305
0
            del_table_chars = PyBytes_AS_STRING(deletechars);
2306
0
            dellen = PyBytes_GET_SIZE(deletechars);
2307
0
        }
2308
0
        else {
2309
0
            if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2310
0
                PyBuffer_Release(&table_view);
2311
0
                return NULL;
2312
0
            }
2313
0
            del_table_chars = del_table_view.buf;
2314
0
            dellen = del_table_view.len;
2315
0
        }
2316
0
    }
2317
0
    else {
2318
0
        del_table_chars = NULL;
2319
0
        dellen = 0;
2320
0
    }
2321
2322
0
    inlen = PyBytes_GET_SIZE(input_obj);
2323
0
    result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2324
0
    if (result == NULL) {
2325
0
        PyBuffer_Release(&del_table_view);
2326
0
        PyBuffer_Release(&table_view);
2327
0
        return NULL;
2328
0
    }
2329
0
    output_start = output = PyBytes_AS_STRING(result);
2330
0
    input = PyBytes_AS_STRING(input_obj);
2331
2332
0
    if (dellen == 0 && table_chars != NULL) {
2333
        /* If no deletions are required, use faster code */
2334
0
        for (i = inlen; --i >= 0; ) {
2335
0
            c = Py_CHARMASK(*input++);
2336
0
            *output++ = table_chars[c];
2337
0
        }
2338
        /* Check if anything changed (for returning original object) */
2339
        /* We save this check until the end so that the compiler will */
2340
        /* unroll the loop above leading to MUCH faster code. */
2341
0
        if (PyBytes_CheckExact(input_obj)) {
2342
0
            if (memcmp(PyBytes_AS_STRING(input_obj), output_start, inlen) == 0) {
2343
0
                Py_SETREF(result, Py_NewRef(input_obj));
2344
0
            }
2345
0
        }
2346
0
        PyBuffer_Release(&del_table_view);
2347
0
        PyBuffer_Release(&table_view);
2348
0
        return result;
2349
0
    }
2350
2351
0
    if (table_chars == NULL) {
2352
0
        for (i = 0; i < 256; i++)
2353
0
            trans_table[i] = Py_CHARMASK(i);
2354
0
    } else {
2355
0
        for (i = 0; i < 256; i++)
2356
0
            trans_table[i] = Py_CHARMASK(table_chars[i]);
2357
0
    }
2358
0
    PyBuffer_Release(&table_view);
2359
2360
0
    for (i = 0; i < dellen; i++)
2361
0
        trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2362
0
    PyBuffer_Release(&del_table_view);
2363
2364
0
    for (i = inlen; --i >= 0; ) {
2365
0
        c = Py_CHARMASK(*input++);
2366
0
        if (trans_table[c] != -1)
2367
0
            if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2368
0
                continue;
2369
0
        changed = 1;
2370
0
    }
2371
0
    if (!changed && PyBytes_CheckExact(input_obj)) {
2372
0
        Py_DECREF(result);
2373
0
        return Py_NewRef(input_obj);
2374
0
    }
2375
    /* Fix the size of the resulting byte string */
2376
0
    if (inlen > 0)
2377
0
        _PyBytes_Resize(&result, output - output_start);
2378
0
    return result;
2379
0
}
2380
2381
2382
/*[clinic input]
2383
2384
@permit_long_summary
2385
@staticmethod
2386
bytes.maketrans
2387
2388
    frm: Py_buffer
2389
    to: Py_buffer
2390
    /
2391
2392
Return a translation table usable for the bytes or bytearray translate method.
2393
2394
The returned table will be one where each byte in frm is mapped to
2395
the byte at the same position in to.
2396
2397
The bytes objects frm and to must be of the same length.
2398
[clinic start generated code]*/
2399
2400
static PyObject *
2401
bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2402
/*[clinic end generated code: output=a36f6399d4b77f6f input=3a577e5badfea8f7]*/
2403
9
{
2404
9
    return _Py_bytes_maketrans(frm, to);
2405
9
}
2406
2407
2408
/*[clinic input]
2409
bytes.replace
2410
2411
    old: Py_buffer
2412
    new: Py_buffer
2413
    /
2414
    count: Py_ssize_t = -1
2415
        Maximum number of occurrences to replace.
2416
        -1 (the default value) means replace all occurrences.
2417
2418
Return a copy with all occurrences of substring old replaced by new.
2419
2420
If count is given, only the first count occurrences are replaced.
2421
If count is not specified or -1, then all occurrences are replaced.
2422
[clinic start generated code]*/
2423
2424
static PyObject *
2425
bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2426
                   Py_ssize_t count)
2427
/*[clinic end generated code: output=994fa588b6b9c104 input=cdf3cf8639297745]*/
2428
35.9k
{
2429
35.9k
    return stringlib_replace((PyObject *)self,
2430
35.9k
                             (const char *)old->buf, old->len,
2431
35.9k
                             (const char *)new->buf, new->len, count);
2432
35.9k
}
2433
2434
/** End DALKE **/
2435
2436
/*[clinic input]
2437
@permit_long_summary
2438
bytes.removeprefix as bytes_removeprefix
2439
2440
    prefix: Py_buffer
2441
    /
2442
2443
Return a bytes object with the given prefix string removed if present.
2444
2445
If the bytes starts with the prefix string, return
2446
bytes[len(prefix):].  Otherwise, return a copy of the original
2447
bytes.
2448
[clinic start generated code]*/
2449
2450
static PyObject *
2451
bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2452
/*[clinic end generated code: output=f006865331a06ab6 input=3a2672bcee61d7a7]*/
2453
0
{
2454
0
    const char *self_start = PyBytes_AS_STRING(self);
2455
0
    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2456
0
    const char *prefix_start = prefix->buf;
2457
0
    Py_ssize_t prefix_len = prefix->len;
2458
2459
0
    if (self_len >= prefix_len
2460
0
        && prefix_len > 0
2461
0
        && memcmp(self_start, prefix_start, prefix_len) == 0)
2462
0
    {
2463
0
        return PyBytes_FromStringAndSize(self_start + prefix_len,
2464
0
                                         self_len - prefix_len);
2465
0
    }
2466
2467
0
    if (PyBytes_CheckExact(self)) {
2468
0
        return Py_NewRef(self);
2469
0
    }
2470
2471
0
    return PyBytes_FromStringAndSize(self_start, self_len);
2472
0
}
2473
2474
/*[clinic input]
2475
@permit_long_summary
2476
bytes.removesuffix as bytes_removesuffix
2477
2478
    suffix: Py_buffer
2479
    /
2480
2481
Return a bytes object with the given suffix string removed if present.
2482
2483
If the bytes ends with the suffix string and that suffix is not
2484
empty, return bytes[:-len(prefix)].  Otherwise, return a copy of the
2485
original bytes.
2486
[clinic start generated code]*/
2487
2488
static PyObject *
2489
bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2490
/*[clinic end generated code: output=d887d308e3242eeb input=04df5f18a36f69d7]*/
2491
0
{
2492
0
    const char *self_start = PyBytes_AS_STRING(self);
2493
0
    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2494
0
    const char *suffix_start = suffix->buf;
2495
0
    Py_ssize_t suffix_len = suffix->len;
2496
2497
0
    if (self_len >= suffix_len
2498
0
        && suffix_len > 0
2499
0
        && memcmp(self_start + self_len - suffix_len,
2500
0
                  suffix_start, suffix_len) == 0)
2501
0
    {
2502
0
        return PyBytes_FromStringAndSize(self_start,
2503
0
                                         self_len - suffix_len);
2504
0
    }
2505
2506
0
    if (PyBytes_CheckExact(self)) {
2507
0
        return Py_NewRef(self);
2508
0
    }
2509
2510
0
    return PyBytes_FromStringAndSize(self_start, self_len);
2511
0
}
2512
2513
/*[clinic input]
2514
@permit_long_summary
2515
@text_signature "($self, prefix[, start[, end]], /)"
2516
bytes.startswith
2517
2518
    prefix as subobj: object
2519
        A bytes or a tuple of bytes to try.
2520
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2521
        Optional start position. Default: start of the bytes.
2522
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2523
        Optional stop position. Default: end of the bytes.
2524
    /
2525
2526
Return True if the bytes starts with the specified prefix, False otherwise.
2527
[clinic start generated code]*/
2528
2529
static PyObject *
2530
bytes_startswith_impl(PyBytesObject *self, PyObject *subobj,
2531
                      Py_ssize_t start, Py_ssize_t end)
2532
/*[clinic end generated code: output=b1e8da1cbd528e8c input=a14efd070f15be80]*/
2533
1.94M
{
2534
1.94M
    return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2535
1.94M
                                subobj, start, end);
2536
1.94M
}
2537
2538
/*[clinic input]
2539
@permit_long_summary
2540
@text_signature "($self, suffix[, start[, end]], /)"
2541
bytes.endswith
2542
2543
    suffix as subobj: object
2544
        A bytes or a tuple of bytes to try.
2545
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2546
         Optional start position. Default: start of the bytes.
2547
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2548
         Optional stop position. Default: end of the bytes.
2549
    /
2550
2551
Return True if the bytes ends with the specified suffix, False otherwise.
2552
[clinic start generated code]*/
2553
2554
static PyObject *
2555
bytes_endswith_impl(PyBytesObject *self, PyObject *subobj, Py_ssize_t start,
2556
                    Py_ssize_t end)
2557
/*[clinic end generated code: output=038b633111f3629d input=49e383eaaf292713]*/
2558
315
{
2559
315
    return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2560
315
                              subobj, start, end);
2561
315
}
2562
2563
2564
/*[clinic input]
2565
bytes.decode
2566
2567
    encoding: str(c_default="NULL") = 'utf-8'
2568
        The encoding with which to decode the bytes.
2569
    errors: str(c_default="NULL") = 'strict'
2570
        The error handling scheme to use for the handling of decoding
2571
        errors.  The default is 'strict' meaning that decoding errors
2572
        raise a UnicodeDecodeError.  Other possible values are 'ignore'
2573
        and 'replace' as well as any other name registered with
2574
        codecs.register_error that can handle UnicodeDecodeErrors.
2575
2576
Decode the bytes using the codec registered for encoding.
2577
[clinic start generated code]*/
2578
2579
static PyObject *
2580
bytes_decode_impl(PyBytesObject *self, const char *encoding,
2581
                  const char *errors)
2582
/*[clinic end generated code: output=5649a53dde27b314 input=94e9b8524f1d7f37]*/
2583
19.9M
{
2584
19.9M
    return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2585
19.9M
}
2586
2587
2588
/*[clinic input]
2589
@permit_long_summary
2590
bytes.splitlines
2591
2592
    keepends: bool = False
2593
2594
Return a list of the lines in the bytes, breaking at line boundaries.
2595
2596
Line breaks are not included in the resulting list unless keepends
2597
is given and true.
2598
[clinic start generated code]*/
2599
2600
static PyObject *
2601
bytes_splitlines_impl(PyBytesObject *self, int keepends)
2602
/*[clinic end generated code: output=3484149a5d880ffb input=8734672f34430514]*/
2603
0
{
2604
0
    return stringlib_splitlines(
2605
0
        (PyObject*) self, PyBytes_AS_STRING(self),
2606
0
        PyBytes_GET_SIZE(self), keepends
2607
0
        );
2608
0
}
2609
2610
/*[clinic input]
2611
@classmethod
2612
bytes.fromhex
2613
2614
    string: object
2615
    /
2616
2617
Create a bytes object from a string of hexadecimal numbers.
2618
2619
Spaces between two numbers are accepted.
2620
Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2621
[clinic start generated code]*/
2622
2623
static PyObject *
2624
bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2625
/*[clinic end generated code: output=0973acc63661bb2e input=f37d98ed51088a21]*/
2626
35.1k
{
2627
35.1k
    PyObject *result = _PyBytes_FromHex(string, 0);
2628
35.1k
    if (type != &PyBytes_Type && result != NULL) {
2629
0
        Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
2630
0
    }
2631
35.1k
    return result;
2632
35.1k
}
2633
2634
PyObject*
2635
_PyBytes_FromHex(PyObject *string, int use_bytearray)
2636
35.1k
{
2637
35.1k
    Py_ssize_t hexlen, invalid_char;
2638
35.1k
    unsigned int top, bot;
2639
35.1k
    const Py_UCS1 *str, *start, *end;
2640
35.1k
    PyBytesWriter *writer = NULL;
2641
35.1k
    Py_buffer view;
2642
35.1k
    view.obj = NULL;
2643
2644
35.1k
    if (PyUnicode_Check(string)) {
2645
35.1k
        hexlen = PyUnicode_GET_LENGTH(string);
2646
2647
35.1k
        if (!PyUnicode_IS_ASCII(string)) {
2648
0
            const void *data = PyUnicode_DATA(string);
2649
0
            int kind = PyUnicode_KIND(string);
2650
0
            Py_ssize_t i;
2651
2652
            /* search for the first non-ASCII character */
2653
0
            for (i = 0; i < hexlen; i++) {
2654
0
                if (PyUnicode_READ(kind, data, i) >= 128)
2655
0
                    break;
2656
0
            }
2657
0
            invalid_char = i;
2658
0
            goto error;
2659
0
        }
2660
2661
35.1k
        assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2662
35.1k
        str = PyUnicode_1BYTE_DATA(string);
2663
35.1k
    }
2664
0
    else if (PyObject_CheckBuffer(string)) {
2665
0
        if (PyObject_GetBuffer(string, &view, PyBUF_SIMPLE) != 0) {
2666
0
            return NULL;
2667
0
        }
2668
0
        hexlen = view.len;
2669
0
        str = view.buf;
2670
0
    }
2671
0
    else {
2672
0
        PyErr_Format(PyExc_TypeError,
2673
0
                     "fromhex() argument must be str or bytes-like, not %T",
2674
0
                     string);
2675
0
        return NULL;
2676
0
    }
2677
2678
    /* This overestimates if there are spaces */
2679
35.1k
    if (use_bytearray) {
2680
0
        writer = _PyBytesWriter_CreateByteArray(hexlen / 2);
2681
0
    }
2682
35.1k
    else {
2683
35.1k
        writer = PyBytesWriter_Create(hexlen / 2);
2684
35.1k
    }
2685
35.1k
    if (writer == NULL) {
2686
0
        goto release_buffer;
2687
0
    }
2688
35.1k
    char *buf = PyBytesWriter_GetData(writer);
2689
2690
35.1k
    start = str;
2691
35.1k
    end = str + hexlen;
2692
70.2k
    while (str < end) {
2693
        /* skip over spaces in the input */
2694
35.1k
        if (Py_ISSPACE(*str)) {
2695
0
            do {
2696
0
                str++;
2697
0
            } while (Py_ISSPACE(*str));
2698
0
            if (str >= end)
2699
0
                break;
2700
0
        }
2701
2702
35.1k
        top = _PyLong_DigitValue[*str];
2703
35.1k
        if (top >= 16) {
2704
0
            invalid_char = str - start;
2705
0
            goto error;
2706
0
        }
2707
35.1k
        str++;
2708
2709
35.1k
        bot = _PyLong_DigitValue[*str];
2710
35.1k
        if (bot >= 16) {
2711
            /* Check if we had a second digit */
2712
0
            if (str >= end){
2713
0
                invalid_char = -1;
2714
0
            } else {
2715
0
                invalid_char = str - start;
2716
0
            }
2717
0
            goto error;
2718
0
        }
2719
35.1k
        str++;
2720
2721
35.1k
        *buf++ = (unsigned char)((top << 4) + bot);
2722
35.1k
    }
2723
2724
35.1k
    if (view.obj != NULL) {
2725
0
       PyBuffer_Release(&view);
2726
0
    }
2727
35.1k
    return PyBytesWriter_FinishWithPointer(writer, buf);
2728
2729
0
  error:
2730
0
    if (invalid_char == -1) {
2731
0
        PyErr_SetString(PyExc_ValueError,
2732
0
                        "fromhex() arg must contain an even number of hexadecimal digits");
2733
0
    } else {
2734
0
        PyErr_Format(PyExc_ValueError,
2735
0
                     "non-hexadecimal number found in "
2736
0
                     "fromhex() arg at position %zd", invalid_char);
2737
0
    }
2738
0
    PyBytesWriter_Discard(writer);
2739
2740
0
  release_buffer:
2741
0
    if (view.obj != NULL) {
2742
0
        PyBuffer_Release(&view);
2743
0
    }
2744
0
    return NULL;
2745
0
}
2746
2747
/*[clinic input]
2748
bytes.hex
2749
2750
    sep: object = NULL
2751
        An optional single character or byte to separate hex bytes.
2752
    bytes_per_sep: Py_ssize_t = 1
2753
        How many bytes between separators.  Positive values count from
2754
        the right, negative values count from the left.
2755
2756
Create a string of hexadecimal numbers from a bytes object.
2757
2758
Example:
2759
>>> value = b'\xb9\x01\xef'
2760
>>> value.hex()
2761
'b901ef'
2762
>>> value.hex(':')
2763
'b9:01:ef'
2764
>>> value.hex(':', 2)
2765
'b9:01ef'
2766
>>> value.hex(':', -2)
2767
'b901:ef'
2768
[clinic start generated code]*/
2769
2770
static PyObject *
2771
bytes_hex_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t bytes_per_sep)
2772
/*[clinic end generated code: output=588821f02cb9d8f5 input=b8d40cf203d172dc]*/
2773
0
{
2774
0
    const char *argbuf = PyBytes_AS_STRING(self);
2775
0
    Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2776
0
    return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2777
0
}
2778
2779
static PyObject *
2780
bytes_getnewargs(PyObject *op, PyObject *Py_UNUSED(dummy))
2781
0
{
2782
0
    PyBytesObject *v = _PyBytes_CAST(op);
2783
0
    return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2784
0
}
2785
2786
2787
static PyMethodDef
2788
bytes_methods[] = {
2789
    {"__getnewargs__", bytes_getnewargs,  METH_NOARGS},
2790
    BYTES___BYTES___METHODDEF
2791
    {"capitalize", stringlib_capitalize, METH_NOARGS,
2792
     _Py_capitalize__doc__},
2793
    STRINGLIB_CENTER_METHODDEF
2794
    BYTES_COUNT_METHODDEF
2795
    BYTES_DECODE_METHODDEF
2796
    BYTES_ENDSWITH_METHODDEF
2797
    STRINGLIB_EXPANDTABS_METHODDEF
2798
    BYTES_FIND_METHODDEF
2799
    BYTES_FROMHEX_METHODDEF
2800
    BYTES_HEX_METHODDEF
2801
    BYTES_INDEX_METHODDEF
2802
    {"isalnum", stringlib_isalnum, METH_NOARGS,
2803
     _Py_isalnum__doc__},
2804
    {"isalpha", stringlib_isalpha, METH_NOARGS,
2805
     _Py_isalpha__doc__},
2806
    {"isascii", stringlib_isascii, METH_NOARGS,
2807
     _Py_isascii__doc__},
2808
    {"isdigit", stringlib_isdigit, METH_NOARGS,
2809
     _Py_isdigit__doc__},
2810
    {"islower", stringlib_islower, METH_NOARGS,
2811
     _Py_islower__doc__},
2812
    {"isspace", stringlib_isspace, METH_NOARGS,
2813
     _Py_isspace__doc__},
2814
    {"istitle", stringlib_istitle, METH_NOARGS,
2815
     _Py_istitle__doc__},
2816
    {"isupper", stringlib_isupper, METH_NOARGS,
2817
     _Py_isupper__doc__},
2818
    BYTES_JOIN_METHODDEF
2819
    STRINGLIB_LJUST_METHODDEF
2820
    {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2821
    BYTES_LSTRIP_METHODDEF
2822
    BYTES_MAKETRANS_METHODDEF
2823
    BYTES_PARTITION_METHODDEF
2824
    BYTES_REPLACE_METHODDEF
2825
    BYTES_REMOVEPREFIX_METHODDEF
2826
    BYTES_REMOVESUFFIX_METHODDEF
2827
    BYTES_RFIND_METHODDEF
2828
    BYTES_RINDEX_METHODDEF
2829
    STRINGLIB_RJUST_METHODDEF
2830
    BYTES_RPARTITION_METHODDEF
2831
    BYTES_RSPLIT_METHODDEF
2832
    BYTES_RSTRIP_METHODDEF
2833
    BYTES_SPLIT_METHODDEF
2834
    BYTES_SPLITLINES_METHODDEF
2835
    BYTES_STARTSWITH_METHODDEF
2836
    BYTES_STRIP_METHODDEF
2837
    {"swapcase", stringlib_swapcase, METH_NOARGS,
2838
     _Py_swapcase__doc__},
2839
    {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2840
    BYTES_TRANSLATE_METHODDEF
2841
    {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2842
    STRINGLIB_ZFILL_METHODDEF
2843
    {NULL,     NULL}                         /* sentinel */
2844
};
2845
2846
static PyObject *
2847
bytes_mod(PyObject *self, PyObject *arg)
2848
0
{
2849
0
    if (!PyBytes_Check(self)) {
2850
0
        Py_RETURN_NOTIMPLEMENTED;
2851
0
    }
2852
0
    return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2853
0
                             arg, 0);
2854
0
}
2855
2856
static PyNumberMethods bytes_as_number = {
2857
    0,              /*nb_add*/
2858
    0,              /*nb_subtract*/
2859
    0,              /*nb_multiply*/
2860
    bytes_mod,      /*nb_remainder*/
2861
};
2862
2863
static PyObject *
2864
bytes_subtype_new(PyTypeObject *, PyObject *);
2865
2866
/*[clinic input]
2867
@classmethod
2868
bytes.__new__ as bytes_new
2869
2870
    source as x: object = NULL
2871
    encoding: str = NULL
2872
    errors: str = NULL
2873
2874
[clinic start generated code]*/
2875
2876
static PyObject *
2877
bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
2878
               const char *errors)
2879
/*[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]*/
2880
17.1M
{
2881
17.1M
    PyObject *bytes;
2882
17.1M
    PyObject *func;
2883
17.1M
    Py_ssize_t size;
2884
2885
17.1M
    if (x == NULL) {
2886
0
        if (encoding != NULL || errors != NULL) {
2887
0
            PyErr_SetString(PyExc_TypeError,
2888
0
                            encoding != NULL ?
2889
0
                            "encoding without a string argument" :
2890
0
                            "errors without a string argument");
2891
0
            return NULL;
2892
0
        }
2893
0
        bytes = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
2894
0
    }
2895
17.1M
    else if (encoding != NULL) {
2896
        /* Encode via the codec registry */
2897
336k
        if (!PyUnicode_Check(x)) {
2898
0
            PyErr_SetString(PyExc_TypeError,
2899
0
                            "encoding without a string argument");
2900
0
            return NULL;
2901
0
        }
2902
336k
        bytes = PyUnicode_AsEncodedString(x, encoding, errors);
2903
336k
    }
2904
16.8M
    else if (errors != NULL) {
2905
0
        PyErr_SetString(PyExc_TypeError,
2906
0
                        PyUnicode_Check(x) ?
2907
0
                        "string argument without an encoding" :
2908
0
                        "errors without a string argument");
2909
0
        return NULL;
2910
0
    }
2911
    /* We'd like to call PyObject_Bytes here, but we need to check for an
2912
       integer argument before deferring to PyBytes_FromObject, something
2913
       PyObject_Bytes doesn't do. */
2914
16.8M
    else if ((func = _PyObject_LookupSpecial(x, &_Py_ID(__bytes__))) != NULL) {
2915
43.3k
        bytes = _PyObject_CallNoArgs(func);
2916
43.3k
        Py_DECREF(func);
2917
43.3k
        if (bytes == NULL)
2918
0
            return NULL;
2919
43.3k
        if (!PyBytes_Check(bytes)) {
2920
0
            PyErr_Format(PyExc_TypeError,
2921
0
                         "%T.__bytes__() must return a bytes, not %T",
2922
0
                         x, bytes);
2923
0
            Py_DECREF(bytes);
2924
0
            return NULL;
2925
0
        }
2926
43.3k
    }
2927
16.8M
    else if (PyErr_Occurred())
2928
0
        return NULL;
2929
16.8M
    else if (PyUnicode_Check(x)) {
2930
0
        PyErr_SetString(PyExc_TypeError,
2931
0
                        "string argument without an encoding");
2932
0
        return NULL;
2933
0
    }
2934
    /* Is it an integer? */
2935
16.8M
    else if (_PyIndex_Check(x)) {
2936
0
        size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2937
0
        if (size == -1 && PyErr_Occurred()) {
2938
0
            if (!PyErr_ExceptionMatches(PyExc_TypeError))
2939
0
                return NULL;
2940
0
            PyErr_Clear();  /* fall through */
2941
0
            bytes = PyBytes_FromObject(x);
2942
0
        }
2943
0
        else {
2944
0
            if (size < 0) {
2945
0
                PyErr_SetString(PyExc_ValueError, "negative count");
2946
0
                return NULL;
2947
0
            }
2948
0
            bytes = _PyBytes_FromSize(size, 1);
2949
0
        }
2950
0
    }
2951
16.8M
    else {
2952
16.8M
        bytes = PyBytes_FromObject(x);
2953
16.8M
    }
2954
2955
17.1M
    if (bytes != NULL && type != &PyBytes_Type) {
2956
0
        Py_SETREF(bytes, bytes_subtype_new(type, bytes));
2957
0
    }
2958
2959
17.1M
    return bytes;
2960
17.1M
}
2961
2962
static PyObject*
2963
_PyBytes_FromBuffer(PyObject *x)
2964
16.8M
{
2965
16.8M
    Py_buffer view;
2966
16.8M
    if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2967
0
        return NULL;
2968
2969
16.8M
    PyBytesWriter *writer = PyBytesWriter_Create(view.len);
2970
16.8M
    if (writer == NULL) {
2971
0
        goto fail;
2972
0
    }
2973
2974
16.8M
    if (PyBuffer_ToContiguous(PyBytesWriter_GetData(writer),
2975
16.8M
                              &view, view.len, 'C') < 0) {
2976
0
        goto fail;
2977
0
    }
2978
2979
16.8M
    PyBuffer_Release(&view);
2980
16.8M
    return PyBytesWriter_Finish(writer);
2981
2982
0
fail:
2983
0
    PyBytesWriter_Discard(writer);
2984
0
    PyBuffer_Release(&view);
2985
0
    return NULL;
2986
16.8M
}
2987
2988
static PyObject*
2989
_PyBytes_FromList(PyObject *x)
2990
13.1k
{
2991
13.1k
    Py_ssize_t size = PyList_GET_SIZE(x);
2992
13.1k
    PyBytesWriter *writer = PyBytesWriter_Create(size);
2993
13.1k
    if (writer == NULL) {
2994
0
        return NULL;
2995
0
    }
2996
13.1k
    char *str = PyBytesWriter_GetData(writer);
2997
13.1k
    size = _PyBytesWriter_GetAllocated(writer);
2998
2999
918k
    for (Py_ssize_t i = 0; i < PyList_GET_SIZE(x); i++) {
3000
905k
        PyObject *item = _PyList_GetItemRef((PyListObject *)x, i);
3001
905k
        if (item == NULL) {
3002
0
            goto error;
3003
0
        }
3004
905k
        Py_ssize_t value = PyNumber_AsSsize_t(item, NULL);
3005
905k
        Py_DECREF(item);
3006
905k
        if (value == -1 && PyErr_Occurred())
3007
0
            goto error;
3008
3009
905k
        if (value < 0 || value >= 256) {
3010
0
            PyErr_SetString(PyExc_ValueError,
3011
0
                            "bytes must be in range(0, 256)");
3012
0
            goto error;
3013
0
        }
3014
3015
905k
        if (i >= size) {
3016
0
            str = _PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str);
3017
0
            if (str == NULL) {
3018
0
                goto error;
3019
0
            }
3020
0
            size = _PyBytesWriter_GetAllocated(writer);
3021
0
        }
3022
905k
        *str++ = (char) value;
3023
905k
    }
3024
13.1k
    return PyBytesWriter_FinishWithPointer(writer, str);
3025
3026
0
error:
3027
0
    PyBytesWriter_Discard(writer);
3028
0
    return NULL;
3029
13.1k
}
3030
3031
static PyObject*
3032
_PyBytes_FromTuple(PyObject *x)
3033
0
{
3034
0
    Py_ssize_t i, size = PyTuple_GET_SIZE(x);
3035
0
    Py_ssize_t value;
3036
0
    PyObject *item;
3037
3038
0
    PyBytesWriter *writer = PyBytesWriter_Create(size);
3039
0
    if (writer == NULL) {
3040
0
        return NULL;
3041
0
    }
3042
0
    char *str = PyBytesWriter_GetData(writer);
3043
3044
0
    for (i = 0; i < size; i++) {
3045
0
        item = PyTuple_GET_ITEM(x, i);
3046
0
        value = PyNumber_AsSsize_t(item, NULL);
3047
0
        if (value == -1 && PyErr_Occurred())
3048
0
            goto error;
3049
3050
0
        if (value < 0 || value >= 256) {
3051
0
            PyErr_SetString(PyExc_ValueError,
3052
0
                            "bytes must be in range(0, 256)");
3053
0
            goto error;
3054
0
        }
3055
0
        *str++ = (char) value;
3056
0
    }
3057
0
    return PyBytesWriter_Finish(writer);
3058
3059
0
  error:
3060
0
    PyBytesWriter_Discard(writer);
3061
0
    return NULL;
3062
0
}
3063
3064
static PyObject *
3065
_PyBytes_FromIterator(PyObject *it, PyObject *x)
3066
184
{
3067
184
    Py_ssize_t i, size;
3068
3069
    /* For iterator version, create a bytes object and resize as needed */
3070
184
    size = PyObject_LengthHint(x, 64);
3071
184
    if (size == -1 && PyErr_Occurred())
3072
0
        return NULL;
3073
3074
184
    PyBytesWriter *writer = PyBytesWriter_Create(size);
3075
184
    if (writer == NULL) {
3076
0
        return NULL;
3077
0
    }
3078
184
    char *str = PyBytesWriter_GetData(writer);
3079
184
    size = _PyBytesWriter_GetAllocated(writer);
3080
3081
    /* Run the iterator to exhaustion */
3082
1.41k
    for (i = 0; ; i++) {
3083
1.41k
        PyObject *item;
3084
1.41k
        Py_ssize_t value;
3085
3086
        /* Get the next item */
3087
1.41k
        item = PyIter_Next(it);
3088
1.41k
        if (item == NULL) {
3089
184
            if (PyErr_Occurred())
3090
0
                goto error;
3091
184
            break;
3092
184
        }
3093
3094
        /* Interpret it as an int (__index__) */
3095
1.23k
        value = PyNumber_AsSsize_t(item, NULL);
3096
1.23k
        Py_DECREF(item);
3097
1.23k
        if (value == -1 && PyErr_Occurred())
3098
0
            goto error;
3099
3100
        /* Range check */
3101
1.23k
        if (value < 0 || value >= 256) {
3102
0
            PyErr_SetString(PyExc_ValueError,
3103
0
                            "bytes must be in range(0, 256)");
3104
0
            goto error;
3105
0
        }
3106
3107
        /* Append the byte */
3108
1.23k
        if (i >= size) {
3109
0
            str = _PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str);
3110
0
            if (str == NULL) {
3111
0
                goto error;
3112
0
            }
3113
0
            size = _PyBytesWriter_GetAllocated(writer);
3114
0
        }
3115
1.23k
        *str++ = (char) value;
3116
1.23k
    }
3117
184
    return PyBytesWriter_FinishWithPointer(writer, str);
3118
3119
0
  error:
3120
0
    PyBytesWriter_Discard(writer);
3121
0
    return NULL;
3122
184
}
3123
3124
PyObject *
3125
PyBytes_FromObject(PyObject *x)
3126
16.8M
{
3127
16.8M
    PyObject *it, *result;
3128
3129
16.8M
    if (x == NULL) {
3130
0
        PyErr_BadInternalCall();
3131
0
        return NULL;
3132
0
    }
3133
3134
16.8M
    if (PyBytes_CheckExact(x)) {
3135
0
        return Py_NewRef(x);
3136
0
    }
3137
3138
    /* Use the modern buffer interface */
3139
16.8M
    if (PyObject_CheckBuffer(x))
3140
16.8M
        return _PyBytes_FromBuffer(x);
3141
3142
13.2k
    if (PyList_CheckExact(x))
3143
13.1k
        return _PyBytes_FromList(x);
3144
3145
184
    if (PyTuple_CheckExact(x))
3146
0
        return _PyBytes_FromTuple(x);
3147
3148
184
    if (!PyUnicode_Check(x)) {
3149
184
        it = PyObject_GetIter(x);
3150
184
        if (it != NULL) {
3151
184
            result = _PyBytes_FromIterator(it, x);
3152
184
            Py_DECREF(it);
3153
184
            return result;
3154
184
        }
3155
0
        if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
3156
0
            return NULL;
3157
0
        }
3158
0
    }
3159
3160
0
    PyErr_Format(PyExc_TypeError,
3161
0
                 "cannot convert '%.200s' object to bytes",
3162
0
                 Py_TYPE(x)->tp_name);
3163
0
    return NULL;
3164
184
}
3165
3166
/* This allocator is needed for subclasses don't want to use __new__.
3167
 * See https://github.com/python/cpython/issues/91020#issuecomment-1096793239
3168
 *
3169
 * This allocator will be removed when ob_shash is removed.
3170
 */
3171
static PyObject *
3172
bytes_alloc(PyTypeObject *self, Py_ssize_t nitems)
3173
0
{
3174
0
    PyBytesObject *obj = (PyBytesObject*)PyType_GenericAlloc(self, nitems);
3175
0
    if (obj == NULL) {
3176
0
        return NULL;
3177
0
    }
3178
0
    set_ob_shash(obj, -1);
3179
0
    return (PyObject*)obj;
3180
0
}
3181
3182
static PyObject *
3183
bytes_subtype_new(PyTypeObject *type, PyObject *tmp)
3184
0
{
3185
0
    PyObject *pnew;
3186
0
    Py_ssize_t n;
3187
3188
0
    assert(PyType_IsSubtype(type, &PyBytes_Type));
3189
0
    assert(PyBytes_Check(tmp));
3190
0
    n = PyBytes_GET_SIZE(tmp);
3191
0
    pnew = type->tp_alloc(type, n);
3192
0
    if (pnew != NULL) {
3193
0
        memcpy(PyBytes_AS_STRING(pnew),
3194
0
                  PyBytes_AS_STRING(tmp), n+1);
3195
0
        set_ob_shash((PyBytesObject *)pnew,
3196
0
            get_ob_shash((PyBytesObject *)tmp));
3197
0
    }
3198
0
    return pnew;
3199
0
}
3200
3201
PyDoc_STRVAR(bytes_doc,
3202
"bytes(iterable_of_ints) -> bytes\n\
3203
bytes(string, encoding[, errors]) -> bytes\n\
3204
bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
3205
bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3206
bytes() -> empty bytes object\n\
3207
\n\
3208
Construct an immutable array of bytes from:\n\
3209
  - an iterable yielding integers in range(256)\n\
3210
  - a text string encoded using the specified encoding\n\
3211
  - any object implementing the buffer API.\n\
3212
  - an integer");
3213
3214
static PyObject *bytes_iter(PyObject *seq);
3215
3216
3217
static _PyObjectIndexPair
3218
bytes_iteritem(PyObject *obj, Py_ssize_t index)
3219
2.01k
{
3220
2.01k
    PyBytesObject *a = _PyBytes_CAST(obj);
3221
2.01k
    if (index >= Py_SIZE(a)) {
3222
51
        return (_PyObjectIndexPair) { .object = NULL, .index = index };
3223
51
    }
3224
1.96k
    PyObject *l = _PyLong_FromUnsignedChar((unsigned char)a->ob_sval[index]);
3225
1.96k
    return (_PyObjectIndexPair) { .object = l, .index = index + 1 };
3226
2.01k
}
3227
3228
PyTypeObject PyBytes_Type = {
3229
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3230
    "bytes",
3231
    PyBytesObject_SIZE,
3232
    sizeof(char),
3233
    0,                                          /* tp_dealloc */
3234
    0,                                          /* tp_vectorcall_offset */
3235
    0,                                          /* tp_getattr */
3236
    0,                                          /* tp_setattr */
3237
    0,                                          /* tp_as_async */
3238
    bytes_repr,                                 /* tp_repr */
3239
    &bytes_as_number,                           /* tp_as_number */
3240
    &bytes_as_sequence,                         /* tp_as_sequence */
3241
    &bytes_as_mapping,                          /* tp_as_mapping */
3242
    bytes_hash,                                 /* tp_hash */
3243
    0,                                          /* tp_call */
3244
    bytes_str,                                  /* tp_str */
3245
    PyObject_GenericGetAttr,                    /* tp_getattro */
3246
    0,                                          /* tp_setattro */
3247
    &bytes_as_buffer,                           /* tp_as_buffer */
3248
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3249
        Py_TPFLAGS_BYTES_SUBCLASS |
3250
        _Py_TPFLAGS_MATCH_SELF,               /* tp_flags */
3251
    bytes_doc,                                  /* tp_doc */
3252
    0,                                          /* tp_traverse */
3253
    0,                                          /* tp_clear */
3254
    bytes_richcompare,                          /* tp_richcompare */
3255
    0,                                          /* tp_weaklistoffset */
3256
    bytes_iter,                                 /* tp_iter */
3257
    0,                                          /* tp_iternext */
3258
    bytes_methods,                              /* tp_methods */
3259
    0,                                          /* tp_members */
3260
    0,                                          /* tp_getset */
3261
    0,                                          /* tp_base */
3262
    0,                                          /* tp_dict */
3263
    0,                                          /* tp_descr_get */
3264
    0,                                          /* tp_descr_set */
3265
    0,                                          /* tp_dictoffset */
3266
    0,                                          /* tp_init */
3267
    bytes_alloc,                                /* tp_alloc */
3268
    bytes_new,                                  /* tp_new */
3269
    PyObject_Free,                              /* tp_free */
3270
    .tp_version_tag = _Py_TYPE_VERSION_BYTES,
3271
    ._tp_iteritem = bytes_iteritem,
3272
};
3273
3274
void
3275
PyBytes_Concat(PyObject **pv, PyObject *w)
3276
0
{
3277
0
    assert(pv != NULL);
3278
0
    if (*pv == NULL)
3279
0
        return;
3280
0
    if (w == NULL) {
3281
0
        Py_CLEAR(*pv);
3282
0
        return;
3283
0
    }
3284
3285
0
    if (_PyObject_IsUniquelyReferenced(*pv) && PyBytes_CheckExact(*pv)) {
3286
        /* Only one reference, so we can resize in place */
3287
0
        Py_ssize_t oldsize;
3288
0
        Py_buffer wb;
3289
3290
0
        if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
3291
0
            PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3292
0
                         Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3293
0
            Py_CLEAR(*pv);
3294
0
            return;
3295
0
        }
3296
3297
0
        oldsize = PyBytes_GET_SIZE(*pv);
3298
0
        if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3299
0
            PyErr_NoMemory();
3300
0
            goto error;
3301
0
        }
3302
0
        if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3303
0
            goto error;
3304
3305
0
        memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3306
0
        PyBuffer_Release(&wb);
3307
0
        return;
3308
3309
0
      error:
3310
0
        PyBuffer_Release(&wb);
3311
0
        Py_CLEAR(*pv);
3312
0
        return;
3313
0
    }
3314
3315
0
    else {
3316
        /* Multiple references, need to create new object */
3317
0
        PyObject *v;
3318
0
        v = _PyBytes_Concat(*pv, w);
3319
0
        Py_SETREF(*pv, v);
3320
0
    }
3321
0
}
3322
3323
void
3324
PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
3325
0
{
3326
0
    PyBytes_Concat(pv, w);
3327
0
    Py_XDECREF(w);
3328
0
}
3329
3330
3331
/* The following function breaks the notion that bytes are immutable:
3332
   it changes the size of a bytes object.  You can think of it
3333
   as creating a new bytes object and destroying the old one, only
3334
   more efficiently.
3335
   Note that if there's not enough memory to resize the bytes object, the
3336
   original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
3337
   memory" exception is set, and -1 is returned.  Else (on success) 0 is
3338
   returned, and the value in *pv may or may not be the same as on input.
3339
   As always, an extra byte is allocated for a trailing \0 byte (newsize
3340
   does *not* include that), and a trailing \0 byte is stored.
3341
*/
3342
3343
int
3344
_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3345
24.6M
{
3346
24.6M
    PyObject *v;
3347
24.6M
    PyBytesObject *sv;
3348
24.6M
    v = *pv;
3349
24.6M
    if (!PyBytes_Check(v) || newsize < 0) {
3350
0
        *pv = 0;
3351
0
        Py_DECREF(v);
3352
0
        PyErr_BadInternalCall();
3353
0
        return -1;
3354
0
    }
3355
24.6M
    Py_ssize_t oldsize = PyBytes_GET_SIZE(v);
3356
24.6M
    if (oldsize == newsize) {
3357
        /* return early if newsize equals to v->ob_size */
3358
1.67M
        return 0;
3359
1.67M
    }
3360
22.9M
    if (oldsize == 0) {
3361
18.9M
        *pv = _PyBytes_FromSize(newsize, 0);
3362
18.9M
        Py_DECREF(v);
3363
18.9M
        return (*pv == NULL) ? -1 : 0;
3364
18.9M
    }
3365
4.01M
    if (newsize == 0) {
3366
6.97k
        *pv = bytes_get_empty();
3367
6.97k
        Py_DECREF(v);
3368
6.97k
        return 0;
3369
6.97k
    }
3370
4.00M
    if (!_PyObject_IsUniquelyReferenced(v)) {
3371
0
        if (oldsize < newsize) {
3372
0
            *pv = _PyBytes_FromSize(newsize, 0);
3373
0
            if (*pv) {
3374
0
                memcpy(PyBytes_AS_STRING(*pv), PyBytes_AS_STRING(v), oldsize);
3375
0
            }
3376
0
        }
3377
0
        else {
3378
0
            *pv = PyBytes_FromStringAndSize(PyBytes_AS_STRING(v), newsize);
3379
0
        }
3380
0
        Py_DECREF(v);
3381
0
        return (*pv == NULL) ? -1 : 0;
3382
0
    }
3383
3384
#ifdef Py_TRACE_REFS
3385
    _Py_ForgetReference(v);
3386
#endif
3387
4.00M
    _PyReftracerTrack(v, PyRefTracer_DESTROY);
3388
4.00M
    *pv = (PyObject *)
3389
4.00M
        PyObject_Realloc(v, PyBytesObject_SIZE + newsize);
3390
4.00M
    if (*pv == NULL) {
3391
#ifdef Py_REF_DEBUG
3392
        _Py_DecRefTotal(_PyThreadState_GET());
3393
#endif
3394
0
        PyObject_Free(v);
3395
0
        PyErr_NoMemory();
3396
0
        return -1;
3397
0
    }
3398
4.00M
    _Py_NewReferenceNoTotal(*pv);
3399
4.00M
    sv = (PyBytesObject *) *pv;
3400
4.00M
    Py_SET_SIZE(sv, newsize);
3401
4.00M
    sv->ob_sval[newsize] = '\0';
3402
4.00M
    set_ob_shash(sv, -1);          /* invalidate cached hash value */
3403
4.00M
    return 0;
3404
4.00M
}
3405
3406
3407
/*********************** Bytes Iterator ****************************/
3408
3409
typedef struct {
3410
    PyObject_HEAD
3411
    Py_ssize_t it_index;
3412
    PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3413
} striterobject;
3414
3415
580
#define _striterobject_CAST(op)  ((striterobject *)(op))
3416
3417
static void
3418
striter_dealloc(PyObject *op)
3419
44
{
3420
44
    striterobject *it = _striterobject_CAST(op);
3421
44
    _PyObject_GC_UNTRACK(it);
3422
44
    Py_XDECREF(it->it_seq);
3423
44
    PyObject_GC_Del(it);
3424
44
}
3425
3426
static int
3427
striter_traverse(PyObject *op, visitproc visit, void *arg)
3428
0
{
3429
0
    striterobject *it = _striterobject_CAST(op);
3430
0
    Py_VISIT(it->it_seq);
3431
0
    return 0;
3432
0
}
3433
3434
static PyObject *
3435
striter_next(PyObject *op)
3436
536
{
3437
536
    striterobject *it = _striterobject_CAST(op);
3438
536
    PyBytesObject *seq;
3439
3440
536
    assert(it != NULL);
3441
536
    seq = it->it_seq;
3442
536
    if (seq == NULL)
3443
0
        return NULL;
3444
536
    assert(PyBytes_Check(seq));
3445
3446
536
    if (it->it_index < PyBytes_GET_SIZE(seq)) {
3447
528
        return _PyLong_FromUnsignedChar(
3448
528
            (unsigned char)seq->ob_sval[it->it_index++]);
3449
528
    }
3450
3451
8
    it->it_seq = NULL;
3452
8
    Py_DECREF(seq);
3453
8
    return NULL;
3454
536
}
3455
3456
static PyObject *
3457
striter_len(PyObject *op, PyObject *Py_UNUSED(ignored))
3458
0
{
3459
0
    striterobject *it = _striterobject_CAST(op);
3460
0
    Py_ssize_t len = 0;
3461
0
    if (it->it_seq)
3462
0
        len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3463
0
    return PyLong_FromSsize_t(len);
3464
0
}
3465
3466
PyDoc_STRVAR(length_hint_doc,
3467
             "Private method returning an estimate of len(list(it)).");
3468
3469
static PyObject *
3470
striter_reduce(PyObject *op, PyObject *Py_UNUSED(ignored))
3471
0
{
3472
0
    PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));
3473
3474
    /* _PyEval_GetBuiltin can invoke arbitrary code,
3475
     * call must be before access of iterator pointers.
3476
     * see issue #101765 */
3477
0
    striterobject *it = _striterobject_CAST(op);
3478
0
    if (it->it_seq != NULL) {
3479
0
        return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
3480
0
    } else {
3481
0
        return Py_BuildValue("N(())", iter);
3482
0
    }
3483
0
}
3484
3485
PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3486
3487
static PyObject *
3488
striter_setstate(PyObject *op, PyObject *state)
3489
0
{
3490
0
    Py_ssize_t index = PyLong_AsSsize_t(state);
3491
0
    if (index == -1 && PyErr_Occurred())
3492
0
        return NULL;
3493
0
    striterobject *it = _striterobject_CAST(op);
3494
0
    if (it->it_seq != NULL) {
3495
0
        if (index < 0)
3496
0
            index = 0;
3497
0
        else if (index > PyBytes_GET_SIZE(it->it_seq))
3498
0
            index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3499
0
        it->it_index = index;
3500
0
    }
3501
0
    Py_RETURN_NONE;
3502
0
}
3503
3504
PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3505
3506
static PyMethodDef striter_methods[] = {
3507
    {"__length_hint__", striter_len, METH_NOARGS, length_hint_doc},
3508
    {"__reduce__",      striter_reduce, METH_NOARGS, reduce_doc},
3509
    {"__setstate__",    striter_setstate, METH_O, setstate_doc},
3510
    {NULL,              NULL}           /* sentinel */
3511
};
3512
3513
PyTypeObject PyBytesIter_Type = {
3514
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3515
    "bytes_iterator",                           /* tp_name */
3516
    sizeof(striterobject),                      /* tp_basicsize */
3517
    0,                                          /* tp_itemsize */
3518
    /* methods */
3519
    striter_dealloc,                            /* tp_dealloc */
3520
    0,                                          /* tp_vectorcall_offset */
3521
    0,                                          /* tp_getattr */
3522
    0,                                          /* tp_setattr */
3523
    0,                                          /* tp_as_async */
3524
    0,                                          /* tp_repr */
3525
    0,                                          /* tp_as_number */
3526
    0,                                          /* tp_as_sequence */
3527
    0,                                          /* tp_as_mapping */
3528
    0,                                          /* tp_hash */
3529
    0,                                          /* tp_call */
3530
    0,                                          /* tp_str */
3531
    PyObject_GenericGetAttr,                    /* tp_getattro */
3532
    0,                                          /* tp_setattro */
3533
    0,                                          /* tp_as_buffer */
3534
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3535
    0,                                          /* tp_doc */
3536
    striter_traverse,                           /* tp_traverse */
3537
    0,                                          /* tp_clear */
3538
    0,                                          /* tp_richcompare */
3539
    0,                                          /* tp_weaklistoffset */
3540
    PyObject_SelfIter,                          /* tp_iter */
3541
    striter_next,                               /* tp_iternext */
3542
    striter_methods,                            /* tp_methods */
3543
    0,
3544
};
3545
3546
static PyObject *
3547
bytes_iter(PyObject *seq)
3548
44
{
3549
44
    striterobject *it;
3550
3551
44
    if (!PyBytes_Check(seq)) {
3552
0
        PyErr_BadInternalCall();
3553
0
        return NULL;
3554
0
    }
3555
44
    it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3556
44
    if (it == NULL)
3557
0
        return NULL;
3558
44
    it->it_index = 0;
3559
44
    it->it_seq = (PyBytesObject *)Py_NewRef(seq);
3560
44
    _PyObject_GC_TRACK(it);
3561
44
    return (PyObject *)it;
3562
44
}
3563
3564
3565
void
3566
_PyBytes_RepeatBuffer(char* dest, Py_ssize_t len_dest,
3567
    const char* src, Py_ssize_t len_src)
3568
194k
{
3569
194k
    if (len_dest == 0) {
3570
834
        return;
3571
834
    }
3572
193k
    if (len_src == 1) {
3573
191k
        memset(dest, src[0], len_dest);
3574
191k
    }
3575
2.27k
    else {
3576
2.27k
        if (src != dest) {
3577
2.27k
            memcpy(dest, src, len_src);
3578
2.27k
        }
3579
2.27k
        Py_ssize_t copied = len_src;
3580
5.41k
        while (copied < len_dest) {
3581
3.14k
            Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied);
3582
3.14k
            memcpy(dest + copied, dest, bytes_to_copy);
3583
3.14k
            copied += bytes_to_copy;
3584
3.14k
        }
3585
2.27k
    }
3586
193k
}
3587
3588
3589
// --- PyBytesWriter API -----------------------------------------------------
3590
3591
static inline char*
3592
byteswriter_data(PyBytesWriter *writer)
3593
37.0M
{
3594
37.0M
    return _PyBytesWriter_GetData(writer);
3595
37.0M
}
3596
3597
3598
static inline Py_ssize_t
3599
byteswriter_allocated(PyBytesWriter *writer)
3600
36.8M
{
3601
36.8M
    if (writer->obj == NULL) {
3602
36.1M
        return sizeof(writer->small_buffer);
3603
36.1M
    }
3604
661k
    else if (writer->use_bytearray) {
3605
0
        return PyByteArray_GET_SIZE(writer->obj);
3606
0
    }
3607
661k
    else {
3608
661k
        return PyBytes_GET_SIZE(writer->obj);
3609
661k
    }
3610
36.8M
}
3611
3612
3613
#ifdef MS_WINDOWS
3614
   /* On Windows, overallocate by 50% is the best factor */
3615
#  define OVERALLOCATE_FACTOR 2
3616
#else
3617
   /* On Linux, overallocate by 25% is the best factor */
3618
43.1k
#  define OVERALLOCATE_FACTOR 4
3619
#endif
3620
3621
static inline int
3622
byteswriter_resize(PyBytesWriter *writer, Py_ssize_t size, int resize)
3623
27.5M
{
3624
27.5M
    assert(size >= 0);
3625
3626
27.5M
    Py_ssize_t old_allocated = byteswriter_allocated(writer);
3627
27.5M
    if (size <= old_allocated) {
3628
26.6M
        return 0;
3629
26.6M
    }
3630
3631
860k
    if (resize & writer->overallocate) {
3632
21.5k
        if (size <= (PY_SSIZE_T_MAX - size / OVERALLOCATE_FACTOR)) {
3633
21.5k
            size += size / OVERALLOCATE_FACTOR;
3634
21.5k
        }
3635
21.5k
    }
3636
3637
860k
    if (writer->obj != NULL) {
3638
21.5k
        if (writer->use_bytearray) {
3639
0
            if (PyByteArray_Resize(writer->obj, size)) {
3640
0
                return -1;
3641
0
            }
3642
0
        }
3643
21.5k
        else {
3644
21.5k
            if (_PyBytes_Resize(&writer->obj, size)) {
3645
0
                return -1;
3646
0
            }
3647
21.5k
        }
3648
21.5k
        assert(writer->obj != NULL);
3649
21.5k
    }
3650
838k
    else if (writer->use_bytearray) {
3651
0
        writer->obj = PyByteArray_FromStringAndSize(NULL, size);
3652
0
        if (writer->obj == NULL) {
3653
0
            return -1;
3654
0
        }
3655
0
        if (resize) {
3656
0
            assert((size_t)size > sizeof(writer->small_buffer));
3657
0
            memcpy(PyByteArray_AS_STRING(writer->obj),
3658
0
                   writer->small_buffer,
3659
0
                   sizeof(writer->small_buffer));
3660
0
        }
3661
0
    }
3662
838k
    else {
3663
838k
        writer->obj = PyBytes_FromStringAndSize(NULL, size);
3664
838k
        if (writer->obj == NULL) {
3665
0
            return -1;
3666
0
        }
3667
838k
        if (resize) {
3668
0
            assert((size_t)size > sizeof(writer->small_buffer));
3669
0
            memcpy(PyBytes_AS_STRING(writer->obj),
3670
0
                   writer->small_buffer,
3671
0
                   sizeof(writer->small_buffer));
3672
0
        }
3673
838k
    }
3674
3675
#ifdef Py_DEBUG
3676
    Py_ssize_t allocated = byteswriter_allocated(writer);
3677
    if (resize && allocated > old_allocated) {
3678
        memset(byteswriter_data(writer) + old_allocated, 0xff,
3679
               allocated - old_allocated);
3680
    }
3681
#endif
3682
3683
860k
    return 0;
3684
860k
}
3685
3686
3687
static PyBytesWriter*
3688
byteswriter_create(Py_ssize_t size, int use_bytearray)
3689
27.5M
{
3690
27.5M
    if (size < 0) {
3691
0
        PyErr_SetString(PyExc_ValueError, "size must be >= 0");
3692
0
        return NULL;
3693
0
    }
3694
3695
27.5M
    PyBytesWriter *writer = _Py_FREELIST_POP_MEM(bytes_writers);
3696
27.5M
    if (writer == NULL) {
3697
12.9k
        writer = (PyBytesWriter *)PyMem_Malloc(sizeof(PyBytesWriter));
3698
12.9k
        if (writer == NULL) {
3699
0
            PyErr_NoMemory();
3700
0
            return NULL;
3701
0
        }
3702
12.9k
    }
3703
27.5M
    writer->obj = NULL;
3704
27.5M
    writer->size = 0;
3705
27.5M
    writer->use_bytearray = use_bytearray;
3706
27.5M
    writer->overallocate = !use_bytearray;
3707
3708
27.5M
    if (size >= 1) {
3709
27.5M
        if (byteswriter_resize(writer, size, 0) < 0) {
3710
0
            PyBytesWriter_Discard(writer);
3711
0
            return NULL;
3712
0
        }
3713
27.5M
        writer->size = size;
3714
27.5M
    }
3715
#ifdef Py_DEBUG
3716
    memset(byteswriter_data(writer), 0xff, byteswriter_allocated(writer));
3717
#endif
3718
27.5M
    return writer;
3719
27.5M
}
3720
3721
PyBytesWriter*
3722
PyBytesWriter_Create(Py_ssize_t size)
3723
27.5M
{
3724
27.5M
    return byteswriter_create(size, 0);
3725
27.5M
}
3726
3727
PyBytesWriter*
3728
_PyBytesWriter_CreateByteArray(Py_ssize_t size)
3729
0
{
3730
0
    return byteswriter_create(size, 1);
3731
0
}
3732
3733
3734
void
3735
PyBytesWriter_Discard(PyBytesWriter *writer)
3736
27.6M
{
3737
27.6M
    if (writer == NULL) {
3738
155k
        return;
3739
155k
    }
3740
3741
27.5M
    Py_XDECREF(writer->obj);
3742
27.5M
    _Py_FREELIST_FREE(bytes_writers, writer, PyMem_Free);
3743
27.5M
}
3744
3745
3746
PyObject*
3747
PyBytesWriter_FinishWithSize(PyBytesWriter *writer, Py_ssize_t size)
3748
26.5M
{
3749
26.5M
    PyObject *result;
3750
26.5M
    if (size == 0) {
3751
48.6k
        result = bytes_get_empty();
3752
48.6k
    }
3753
26.4M
    else if (writer->obj != NULL) {
3754
726k
        if (writer->use_bytearray) {
3755
0
            if (size != PyByteArray_GET_SIZE(writer->obj)) {
3756
0
                if (PyByteArray_Resize(writer->obj, size)) {
3757
0
                    goto error;
3758
0
                }
3759
0
            }
3760
0
        }
3761
726k
        else {
3762
726k
            if (size != PyBytes_GET_SIZE(writer->obj)) {
3763
682k
                if (_PyBytes_Resize(&writer->obj, size)) {
3764
0
                    goto error;
3765
0
                }
3766
682k
            }
3767
726k
        }
3768
726k
        result = writer->obj;
3769
726k
        writer->obj = NULL;
3770
726k
    }
3771
25.7M
    else if (writer->use_bytearray) {
3772
0
        result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3773
0
    }
3774
25.7M
    else {
3775
25.7M
        result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3776
25.7M
    }
3777
26.5M
    PyBytesWriter_Discard(writer);
3778
26.5M
    return result;
3779
3780
0
error:
3781
0
    PyBytesWriter_Discard(writer);
3782
0
    return NULL;
3783
26.5M
}
3784
3785
PyObject*
3786
PyBytesWriter_Finish(PyBytesWriter *writer)
3787
17.1M
{
3788
17.1M
    return PyBytesWriter_FinishWithSize(writer, writer->size);
3789
17.1M
}
3790
3791
3792
PyObject*
3793
PyBytesWriter_FinishWithPointer(PyBytesWriter *writer, void *buf)
3794
9.26M
{
3795
9.26M
    Py_ssize_t size = (char*)buf - byteswriter_data(writer);
3796
9.26M
    if (size < 0 || size > byteswriter_allocated(writer)) {
3797
0
        PyBytesWriter_Discard(writer);
3798
0
        PyErr_SetString(PyExc_ValueError, "invalid end pointer");
3799
0
        return NULL;
3800
0
    }
3801
3802
9.26M
    return PyBytesWriter_FinishWithSize(writer, size);
3803
9.26M
}
3804
3805
3806
void*
3807
PyBytesWriter_GetData(PyBytesWriter *writer)
3808
27.7M
{
3809
27.7M
    return byteswriter_data(writer);
3810
27.7M
}
3811
3812
3813
Py_ssize_t
3814
PyBytesWriter_GetSize(PyBytesWriter *writer)
3815
0
{
3816
0
    return _PyBytesWriter_GetSize(writer);
3817
0
}
3818
3819
3820
static Py_ssize_t
3821
_PyBytesWriter_GetAllocated(PyBytesWriter *writer)
3822
13.2k
{
3823
13.2k
    return byteswriter_allocated(writer);
3824
13.2k
}
3825
3826
3827
int
3828
PyBytesWriter_Resize(PyBytesWriter *writer, Py_ssize_t size)
3829
0
{
3830
0
    if (size < 0) {
3831
0
        PyErr_SetString(PyExc_ValueError, "size must be >= 0");
3832
0
        return -1;
3833
0
    }
3834
0
    if (byteswriter_resize(writer, size, 1) < 0) {
3835
0
        return -1;
3836
0
    }
3837
0
    writer->size = size;
3838
0
    return 0;
3839
0
}
3840
3841
3842
static void*
3843
_PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size,
3844
                                      void *data)
3845
0
{
3846
0
    Py_ssize_t pos = (char*)data - byteswriter_data(writer);
3847
0
    if (PyBytesWriter_Resize(writer, size) < 0) {
3848
0
        return NULL;
3849
0
    }
3850
0
    return byteswriter_data(writer) + pos;
3851
0
}
3852
3853
3854
int
3855
PyBytesWriter_Grow(PyBytesWriter *writer, Py_ssize_t size)
3856
21.5k
{
3857
21.5k
    if (size < 0 && writer->size + size < 0) {
3858
0
        PyErr_SetString(PyExc_ValueError, "invalid size");
3859
0
        return -1;
3860
0
    }
3861
21.5k
    if (size > PY_SSIZE_T_MAX - writer->size) {
3862
0
        PyErr_NoMemory();
3863
0
        return -1;
3864
0
    }
3865
21.5k
    size = writer->size + size;
3866
3867
21.5k
    if (byteswriter_resize(writer, size, 1) < 0) {
3868
0
        return -1;
3869
0
    }
3870
21.5k
    writer->size = size;
3871
21.5k
    return 0;
3872
21.5k
}
3873
3874
3875
void*
3876
PyBytesWriter_GrowAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size,
3877
                                   void *buf)
3878
0
{
3879
0
    Py_ssize_t pos = (char*)buf - byteswriter_data(writer);
3880
0
    if (PyBytesWriter_Grow(writer, size) < 0) {
3881
0
        return NULL;
3882
0
    }
3883
0
    return byteswriter_data(writer) + pos;
3884
0
}
3885
3886
3887
int
3888
PyBytesWriter_WriteBytes(PyBytesWriter *writer,
3889
                         const void *bytes, Py_ssize_t size)
3890
0
{
3891
0
    if (size < 0) {
3892
0
        size_t len = strlen(bytes);
3893
0
        if (len > (size_t)PY_SSIZE_T_MAX) {
3894
0
            PyErr_NoMemory();
3895
0
            return -1;
3896
0
        }
3897
0
        size = (Py_ssize_t)len;
3898
0
    }
3899
3900
0
    Py_ssize_t pos = writer->size;
3901
0
    if (PyBytesWriter_Grow(writer, size) < 0) {
3902
0
        return -1;
3903
0
    }
3904
0
    char *buf = byteswriter_data(writer);
3905
0
    memcpy(buf + pos, bytes, size);
3906
0
    return 0;
3907
0
}
3908
3909
3910
int
3911
PyBytesWriter_Format(PyBytesWriter *writer, const char *format, ...)
3912
0
{
3913
0
    Py_ssize_t pos = writer->size;
3914
0
    if (PyBytesWriter_Grow(writer, strlen(format)) < 0) {
3915
0
        return -1;
3916
0
    }
3917
3918
0
    va_list vargs;
3919
0
    va_start(vargs, format);
3920
0
    char *buf = bytes_fromformat(writer, pos, format, vargs);
3921
0
    va_end(vargs);
3922
3923
0
    Py_ssize_t size = buf - byteswriter_data(writer);
3924
0
    return PyBytesWriter_Resize(writer, size);
3925
0
}