Coverage Report

Created: 2026-04-20 06:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/bytesobject.c
Line
Count
Source
1
/* bytes object implementation */
2
3
#include "Python.h"
4
#include "pycore_abstract.h"      // _PyIndex_Check()
5
#include "pycore_bytes_methods.h" // _Py_bytes_startswith()
6
#include "pycore_bytesobject.h"   // _PyBytes_Find(), _PyBytes_Repeat()
7
#include "pycore_call.h"          // _PyObject_CallNoArgs()
8
#include "pycore_ceval.h"         // _PyEval_GetBuiltin()
9
#include "pycore_format.h"        // F_LJUST
10
#include "pycore_freelist.h"      // _Py_FREELIST_FREE()
11
#include "pycore_global_objects.h"// _Py_GET_GLOBAL_OBJECT()
12
#include "pycore_initconfig.h"    // _PyStatus_OK()
13
#include "pycore_long.h"          // _PyLong_DigitValue
14
#include "pycore_object.h"        // _PyObject_GC_TRACK
15
#include "pycore_pymem.h"         // PYMEM_CLEANBYTE
16
#include "pycore_strhex.h"        // _Py_strhex_with_sep()
17
#include "pycore_unicodeobject.h" // _PyUnicode_FormatLong()
18
19
#include <stddef.h>
20
21
/*[clinic input]
22
class bytes "PyBytesObject *" "&PyBytes_Type"
23
[clinic start generated code]*/
24
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
25
26
#include "clinic/bytesobject.c.h"
27
28
314M
#define PyBytesObject_SIZE _PyBytesObject_SIZE
29
30
/* Forward declaration */
31
static void* _PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer,
32
                                                   Py_ssize_t size, void *data);
33
static Py_ssize_t _PyBytesWriter_GetAllocated(PyBytesWriter *writer);
34
35
36
32.1M
#define CHARACTERS _Py_SINGLETON(bytes_characters)
37
#define CHARACTER(ch) \
38
32.1M
     ((PyBytesObject *)&(CHARACTERS[ch]));
39
8.03M
#define EMPTY (&_Py_SINGLETON(bytes_empty))
40
41
42
// Return a reference to the immortal empty bytes string singleton.
43
static inline PyObject* bytes_get_empty(void)
44
8.03M
{
45
8.03M
    PyObject *empty = &EMPTY->ob_base.ob_base;
46
8.03M
    assert(_Py_IsImmortal(empty));
47
8.03M
    return empty;
48
8.03M
}
49
50
51
static inline void
52
set_ob_shash(PyBytesObject *a, Py_hash_t hash)
53
210M
{
54
210M
_Py_COMP_DIAG_PUSH
55
210M
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
56
#ifdef Py_GIL_DISABLED
57
    _Py_atomic_store_ssize_relaxed(&a->ob_shash, hash);
58
#else
59
210M
    a->ob_shash = hash;
60
210M
#endif
61
210M
_Py_COMP_DIAG_POP
62
210M
}
63
64
static inline Py_hash_t
65
get_ob_shash(PyBytesObject *a)
66
81.7M
{
67
81.7M
_Py_COMP_DIAG_PUSH
68
81.7M
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
69
#ifdef Py_GIL_DISABLED
70
    return _Py_atomic_load_ssize_relaxed(&a->ob_shash);
71
#else
72
81.7M
    return a->ob_shash;
73
81.7M
#endif
74
81.7M
_Py_COMP_DIAG_POP
75
81.7M
}
76
77
78
/*
79
   For PyBytes_FromString(), the parameter 'str' points to a null-terminated
80
   string containing exactly 'size' bytes.
81
82
   For PyBytes_FromStringAndSize(), the parameter 'str' is
83
   either NULL or else points to a string containing at least 'size' bytes.
84
   For PyBytes_FromStringAndSize(), the string in the 'str' parameter does
85
   not have to be null-terminated.  (Therefore it is safe to construct a
86
   substring by calling 'PyBytes_FromStringAndSize(origstring, substrlen)'.)
87
   If 'str' is NULL then PyBytes_FromStringAndSize() will allocate 'size+1'
88
   bytes (setting the last byte to the null terminating character) and you can
89
   fill in the data yourself.  If 'str' is non-NULL then the resulting
90
   PyBytes object must be treated as immutable and you must not fill in nor
91
   alter the data yourself, since the strings may be shared.
92
93
   The PyObject member 'op->ob_size', which denotes the number of "extra
94
   items" in a variable-size object, will contain the number of bytes
95
   allocated for string data, not counting the null terminating character.
96
   It is therefore equal to the 'size' parameter (for
97
   PyBytes_FromStringAndSize()) or the length of the string in the 'str'
98
   parameter (for PyBytes_FromString()).
99
*/
100
static PyObject *
101
_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
102
154M
{
103
154M
    PyBytesObject *op;
104
154M
    assert(size >= 0);
105
106
154M
    if (size == 0) {
107
0
        return bytes_get_empty();
108
0
    }
109
110
154M
    if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
111
0
        PyErr_SetString(PyExc_OverflowError,
112
0
                        "byte string is too large");
113
0
        return NULL;
114
0
    }
115
116
    /* Inline PyObject_NewVar */
117
154M
    if (use_calloc)
118
0
        op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
119
154M
    else
120
154M
        op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
121
154M
    if (op == NULL) {
122
0
        return PyErr_NoMemory();
123
0
    }
124
154M
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
125
154M
    set_ob_shash(op, -1);
126
154M
    if (!use_calloc) {
127
154M
        op->ob_sval[size] = '\0';
128
154M
    }
129
154M
    return (PyObject *) op;
130
154M
}
131
132
PyObject *
133
PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
134
177M
{
135
177M
    PyBytesObject *op;
136
177M
    if (size < 0) {
137
0
        PyErr_SetString(PyExc_SystemError,
138
0
            "Negative size passed to PyBytes_FromStringAndSize");
139
0
        return NULL;
140
0
    }
141
177M
    if (size == 1 && str != NULL) {
142
32.1M
        op = CHARACTER(*str & 255);
143
32.1M
        assert(_Py_IsImmortal(op));
144
32.1M
        return (PyObject *)op;
145
32.1M
    }
146
145M
    if (size == 0) {
147
7.96M
        return bytes_get_empty();
148
7.96M
    }
149
150
137M
    op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
151
137M
    if (op == NULL)
152
0
        return NULL;
153
137M
    if (str == NULL)
154
10.3M
        return (PyObject *) op;
155
156
127M
    memcpy(op->ob_sval, str, size);
157
127M
    return (PyObject *) op;
158
137M
}
159
160
PyObject *
161
PyBytes_FromString(const char *str)
162
84
{
163
84
    size_t size;
164
84
    PyBytesObject *op;
165
166
84
    assert(str != NULL);
167
84
    size = strlen(str);
168
84
    if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
169
0
        PyErr_SetString(PyExc_OverflowError,
170
0
            "byte string is too long");
171
0
        return NULL;
172
0
    }
173
174
84
    if (size == 0) {
175
0
        return bytes_get_empty();
176
0
    }
177
84
    else if (size == 1) {
178
0
        op = CHARACTER(*str & 255);
179
0
        assert(_Py_IsImmortal(op));
180
0
        return (PyObject *)op;
181
0
    }
182
183
    /* Inline PyObject_NewVar */
184
84
    op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
185
84
    if (op == NULL) {
186
0
        return PyErr_NoMemory();
187
0
    }
188
84
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
189
84
    set_ob_shash(op, -1);
190
84
    memcpy(op->ob_sval, str, size+1);
191
84
    return (PyObject *) op;
192
84
}
193
194
195
static char*
196
bytes_fromformat(PyBytesWriter *writer, Py_ssize_t writer_pos,
197
                 const char *format, va_list vargs)
198
0
{
199
0
    const char *f;
200
0
    const char *p;
201
0
    Py_ssize_t prec;
202
0
    int longflag;
203
0
    int size_tflag;
204
    /* Longest 64-bit formatted numbers:
205
       - "18446744073709551615\0" (21 bytes)
206
       - "-9223372036854775808\0" (21 bytes)
207
       Decimal takes the most space (it isn't enough for octal.)
208
209
       Longest 64-bit pointer representation:
210
       "0xffffffffffffffff\0" (19 bytes). */
211
0
    char buffer[21];
212
213
0
    char *s = (char*)PyBytesWriter_GetData(writer) + writer_pos;
214
215
0
#define WRITE_BYTES_LEN(str, len_expr) \
216
0
    do { \
217
0
        size_t len = (len_expr); \
218
0
        s = PyBytesWriter_GrowAndUpdatePointer(writer, len, s); \
219
0
        if (s == NULL) { \
220
0
            goto error; \
221
0
        } \
222
0
        memcpy(s, (str), len); \
223
0
        s += len; \
224
0
    } while (0)
225
0
#define WRITE_BYTES(str) WRITE_BYTES_LEN(str, strlen(str))
226
227
0
    for (f = format; *f; f++) {
228
0
        if (*f != '%') {
229
0
            *s++ = *f;
230
0
            continue;
231
0
        }
232
233
0
        p = f++;
234
235
        /* ignore the width (ex: 10 in "%10s") */
236
0
        while (Py_ISDIGIT(*f))
237
0
            f++;
238
239
        /* parse the precision (ex: 10 in "%.10s") */
240
0
        prec = 0;
241
0
        if (*f == '.') {
242
0
            f++;
243
0
            for (; Py_ISDIGIT(*f); f++) {
244
0
                prec = (prec * 10) + (*f - '0');
245
0
            }
246
0
        }
247
248
0
        while (*f && *f != '%' && !Py_ISALPHA(*f))
249
0
            f++;
250
251
        /* handle the long flag ('l'), but only for %ld and %lu.
252
           others can be added when necessary. */
253
0
        longflag = 0;
254
0
        if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
255
0
            longflag = 1;
256
0
            ++f;
257
0
        }
258
259
        /* handle the size_t flag ('z'). */
260
0
        size_tflag = 0;
261
0
        if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
262
0
            size_tflag = 1;
263
0
            ++f;
264
0
        }
265
266
0
        switch (*f) {
267
0
        case 'c':
268
0
        {
269
0
            int c = va_arg(vargs, int);
270
0
            if (c < 0 || c > 255) {
271
0
                PyErr_SetString(PyExc_OverflowError,
272
0
                                "PyBytes_FromFormatV(): %c format "
273
0
                                "expects an integer in range [0; 255]");
274
0
                goto error;
275
0
            }
276
0
            *s++ = (unsigned char)c;
277
0
            break;
278
0
        }
279
280
0
        case 'd':
281
0
            if (longflag) {
282
0
                sprintf(buffer, "%ld", va_arg(vargs, long));
283
0
            }
284
0
            else if (size_tflag) {
285
0
                sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
286
0
            }
287
0
            else {
288
0
                sprintf(buffer, "%d", va_arg(vargs, int));
289
0
            }
290
0
            assert(strlen(buffer) < sizeof(buffer));
291
0
            WRITE_BYTES(buffer);
292
0
            break;
293
294
0
        case 'u':
295
0
            if (longflag) {
296
0
                sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
297
0
            }
298
0
            else if (size_tflag) {
299
0
                sprintf(buffer, "%zu", va_arg(vargs, size_t));
300
0
            }
301
0
            else {
302
0
                sprintf(buffer, "%u", va_arg(vargs, unsigned int));
303
0
            }
304
0
            assert(strlen(buffer) < sizeof(buffer));
305
0
            WRITE_BYTES(buffer);
306
0
            break;
307
308
0
        case 'i':
309
0
            sprintf(buffer, "%i", va_arg(vargs, int));
310
0
            assert(strlen(buffer) < sizeof(buffer));
311
0
            WRITE_BYTES(buffer);
312
0
            break;
313
314
0
        case 'x':
315
0
            sprintf(buffer, "%x", va_arg(vargs, int));
316
0
            assert(strlen(buffer) < sizeof(buffer));
317
0
            WRITE_BYTES(buffer);
318
0
            break;
319
320
0
        case 's':
321
0
        {
322
0
            Py_ssize_t i;
323
324
0
            p = va_arg(vargs, const char*);
325
0
            if (prec <= 0) {
326
0
                i = strlen(p);
327
0
            }
328
0
            else {
329
0
                i = 0;
330
0
                while (i < prec && p[i]) {
331
0
                    i++;
332
0
                }
333
0
            }
334
0
            WRITE_BYTES_LEN(p, i);
335
0
            break;
336
0
        }
337
338
0
        case 'p':
339
0
            sprintf(buffer, "%p", va_arg(vargs, void*));
340
0
            assert(strlen(buffer) < sizeof(buffer));
341
            /* %p is ill-defined:  ensure leading 0x. */
342
0
            if (buffer[1] == 'X')
343
0
                buffer[1] = 'x';
344
0
            else if (buffer[1] != 'x') {
345
0
                memmove(buffer+2, buffer, strlen(buffer)+1);
346
0
                buffer[0] = '0';
347
0
                buffer[1] = 'x';
348
0
            }
349
0
            WRITE_BYTES(buffer);
350
0
            break;
351
352
0
        case '%':
353
0
            *s++ = '%';
354
0
            break;
355
356
0
        default:
357
            /* invalid format string: copy unformatted string and exit */
358
0
            WRITE_BYTES(p);
359
0
            return s;
360
0
        }
361
0
    }
362
363
0
#undef WRITE_BYTES
364
0
#undef WRITE_BYTES_LEN
365
366
0
    return s;
367
368
0
 error:
369
0
    return NULL;
370
0
}
371
372
373
PyObject *
374
PyBytes_FromFormatV(const char *format, va_list vargs)
375
0
{
376
0
    Py_ssize_t alloc = strlen(format);
377
0
    PyBytesWriter *writer = PyBytesWriter_Create(alloc);
378
0
    if (writer == NULL) {
379
0
        return NULL;
380
0
    }
381
382
0
    char *s = bytes_fromformat(writer, 0, format, vargs);
383
0
    if (s == NULL) {
384
0
        PyBytesWriter_Discard(writer);
385
0
        return NULL;
386
0
    }
387
388
0
    return PyBytesWriter_FinishWithPointer(writer, s);
389
0
}
390
391
392
PyObject *
393
PyBytes_FromFormat(const char *format, ...)
394
0
{
395
0
    PyObject* ret;
396
0
    va_list vargs;
397
398
0
    va_start(vargs, format);
399
0
    ret = PyBytes_FromFormatV(format, vargs);
400
0
    va_end(vargs);
401
0
    return ret;
402
0
}
403
404
405
/* Helpers for formatstring */
406
407
0
#define FORMAT_ERROR(EXC, FMT, ...) do {                                    \
408
0
    if (key != NULL) {                                                      \
409
0
        PyErr_Format((EXC), "format argument %R: " FMT,                     \
410
0
                     key, __VA_ARGS__);                                     \
411
0
    }                                                                       \
412
0
    else if (argidx >= 0) {                                                 \
413
0
        PyErr_Format((EXC), "format argument %zd: " FMT,                    \
414
0
                     argidx, __VA_ARGS__);                                  \
415
0
    }                                                                       \
416
0
    else {                                                                  \
417
0
        PyErr_Format((EXC), "format argument: " FMT, __VA_ARGS__);          \
418
0
    }                                                                       \
419
0
} while (0)
420
421
Py_LOCAL_INLINE(PyObject *)
422
getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx, int allowone)
423
0
{
424
0
    Py_ssize_t argidx = *p_argidx;
425
0
    if (argidx < arglen) {
426
0
        (*p_argidx)++;
427
0
        if (arglen >= 0) {
428
0
            return PyTuple_GetItem(args, argidx);
429
0
        }
430
0
        else if (allowone) {
431
0
            return args;
432
0
        }
433
0
    }
434
0
    PyErr_Format(PyExc_TypeError,
435
0
                 "not enough arguments for format string (got %zd)",
436
0
                 arglen < 0 ? 1 : arglen);
437
0
    return NULL;
438
0
}
439
440
/* Returns a new reference to a PyBytes object, or NULL on failure. */
441
442
static char*
443
formatfloat(PyObject *v, Py_ssize_t argidx, PyObject *key,
444
            int flags, int prec, int type,
445
            PyObject **p_result, PyBytesWriter *writer, char *str)
446
0
{
447
0
    char *p;
448
0
    PyObject *result;
449
0
    double x;
450
0
    size_t len;
451
0
    int dtoa_flags = 0;
452
453
0
    x = PyFloat_AsDouble(v);
454
0
    if (x == -1.0 && PyErr_Occurred()) {
455
0
        if (PyErr_ExceptionMatches(PyExc_TypeError)) {
456
0
            FORMAT_ERROR(PyExc_TypeError,
457
0
                         "%%%c requires a real number, not %T",
458
0
                         type, v);
459
0
        }
460
0
        return NULL;
461
0
    }
462
463
0
    if (prec < 0)
464
0
        prec = 6;
465
466
0
    if (flags & F_ALT) {
467
0
        dtoa_flags |= Py_DTSF_ALT;
468
0
    }
469
0
    p = PyOS_double_to_string(x, type, prec, dtoa_flags, NULL);
470
471
0
    if (p == NULL)
472
0
        return NULL;
473
474
0
    len = strlen(p);
475
0
    if (writer != NULL) {
476
0
        str = PyBytesWriter_GrowAndUpdatePointer(writer, len, str);
477
0
        if (str == NULL) {
478
0
            PyMem_Free(p);
479
0
            return NULL;
480
0
        }
481
0
        memcpy(str, p, len);
482
0
        PyMem_Free(p);
483
0
        str += len;
484
0
        return str;
485
0
    }
486
487
0
    result = PyBytes_FromStringAndSize(p, len);
488
0
    PyMem_Free(p);
489
0
    *p_result = result;
490
0
    return result != NULL ? str : NULL;
491
0
}
492
493
static PyObject *
494
formatlong(PyObject *v, Py_ssize_t argidx, PyObject *key,
495
           int flags, int prec, int type)
496
0
{
497
0
    PyObject *result, *iobj;
498
0
    if (PyLong_Check(v))
499
0
        return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
500
0
    if (PyNumber_Check(v)) {
501
        /* make sure number is a type of integer for o, x, and X */
502
0
        if (type == 'o' || type == 'x' || type == 'X')
503
0
            iobj = _PyNumber_Index(v);
504
0
        else
505
0
            iobj = PyNumber_Long(v);
506
0
        if (iobj != NULL) {
507
0
            assert(PyLong_Check(iobj));
508
0
            result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
509
0
            Py_DECREF(iobj);
510
0
            return result;
511
0
        }
512
0
        if (!PyErr_ExceptionMatches(PyExc_TypeError))
513
0
            return NULL;
514
0
    }
515
0
    FORMAT_ERROR(PyExc_TypeError,
516
0
                 "%%%c requires %s, not %T",
517
0
                 type,
518
0
                 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
519
0
                                                             : "a real number",
520
0
                 v);
521
0
    return NULL;
522
0
}
523
524
static int
525
byte_converter(PyObject *arg, Py_ssize_t argidx, PyObject *key, char *p)
526
0
{
527
0
    if (PyBytes_Check(arg)) {
528
0
        if (PyBytes_GET_SIZE(arg) != 1) {
529
0
            FORMAT_ERROR(PyExc_TypeError,
530
0
                         "%%c requires an integer in range(256) or "
531
0
                         "a single byte, not a bytes object of length %zd",
532
0
                         PyBytes_GET_SIZE(arg));
533
0
            return 0;
534
0
        }
535
0
        *p = PyBytes_AS_STRING(arg)[0];
536
0
        return 1;
537
0
    }
538
0
    else if (PyByteArray_Check(arg)) {
539
0
        if (PyByteArray_GET_SIZE(arg) != 1) {
540
0
            FORMAT_ERROR(PyExc_TypeError,
541
0
                         "%%c requires an integer in range(256) or "
542
0
                         "a single byte, not a bytearray object of length %zd",
543
0
                         PyByteArray_GET_SIZE(arg));
544
0
            return 0;
545
0
        }
546
0
        *p = PyByteArray_AS_STRING(arg)[0];
547
0
        return 1;
548
0
    }
549
0
    else if (PyIndex_Check(arg)) {
550
0
        int overflow;
551
0
        long ival = PyLong_AsLongAndOverflow(arg, &overflow);
552
0
        if (ival == -1 && PyErr_Occurred()) {
553
0
            return 0;
554
0
        }
555
0
        if (!(0 <= ival && ival <= 255)) {
556
            /* this includes an overflow in converting to C long */
557
0
            FORMAT_ERROR(PyExc_OverflowError,
558
0
                         "%%c argument not in range(256)%s", "");
559
0
            return 0;
560
0
        }
561
0
        *p = (char)ival;
562
0
        return 1;
563
0
    }
564
0
    FORMAT_ERROR(PyExc_TypeError,
565
0
                 "%%c requires an integer in range(256) or "
566
0
                 "a single byte, not %T",
567
0
                 arg);
568
0
    return 0;
569
0
}
570
571
static PyObject *_PyBytes_FromBuffer(PyObject *x);
572
573
static PyObject *
574
format_obj(PyObject *v, Py_ssize_t argidx, PyObject *key,
575
           const char **pbuf, Py_ssize_t *plen)
576
0
{
577
0
    PyObject *func, *result;
578
    /* is it a bytes object? */
579
0
    if (PyBytes_Check(v)) {
580
0
        *pbuf = PyBytes_AS_STRING(v);
581
0
        *plen = PyBytes_GET_SIZE(v);
582
0
        return Py_NewRef(v);
583
0
    }
584
0
    if (PyByteArray_Check(v)) {
585
0
        *pbuf = PyByteArray_AS_STRING(v);
586
0
        *plen = PyByteArray_GET_SIZE(v);
587
0
        return Py_NewRef(v);
588
0
    }
589
    /* does it support __bytes__? */
590
0
    func = _PyObject_LookupSpecial(v, &_Py_ID(__bytes__));
591
0
    if (func != NULL) {
592
0
        result = _PyObject_CallNoArgs(func);
593
0
        Py_DECREF(func);
594
0
        if (result == NULL)
595
0
            return NULL;
596
0
        if (!PyBytes_Check(result)) {
597
0
            PyErr_Format(PyExc_TypeError,
598
0
                         "%T.__bytes__() must return a bytes, not %T",
599
0
                         v, result);
600
0
            Py_DECREF(result);
601
0
            return NULL;
602
0
        }
603
0
        *pbuf = PyBytes_AS_STRING(result);
604
0
        *plen = PyBytes_GET_SIZE(result);
605
0
        return result;
606
0
    }
607
    /* does it support buffer protocol? */
608
0
    if (PyObject_CheckBuffer(v)) {
609
        /* maybe we can avoid making a copy of the buffer object here? */
610
0
        result = _PyBytes_FromBuffer(v);
611
0
        if (result == NULL)
612
0
            return NULL;
613
0
        *pbuf = PyBytes_AS_STRING(result);
614
0
        *plen = PyBytes_GET_SIZE(result);
615
0
        return result;
616
0
    }
617
0
    FORMAT_ERROR(PyExc_TypeError,
618
0
                 "%%b requires a bytes-like object, "
619
0
                 "or an object that implements __bytes__, not %T",
620
0
                 v);
621
0
    return NULL;
622
0
}
623
624
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
625
626
PyObject *
627
_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
628
                  PyObject *args, int use_bytearray)
629
0
{
630
0
    const char *fmt;
631
0
    Py_ssize_t arglen, argidx;
632
0
    Py_ssize_t fmtcnt;
633
0
    int args_owned = 0;
634
0
    PyObject *dict = NULL;
635
0
    PyObject *key = NULL;
636
637
0
    if (args == NULL) {
638
0
        PyErr_BadInternalCall();
639
0
        return NULL;
640
0
    }
641
0
    fmt = format;
642
0
    fmtcnt = format_len;
643
644
0
    PyBytesWriter *writer;
645
0
    if (use_bytearray) {
646
0
        writer = _PyBytesWriter_CreateByteArray(fmtcnt);
647
0
    }
648
0
    else {
649
0
        writer = PyBytesWriter_Create(fmtcnt);
650
0
    }
651
0
    if (writer == NULL) {
652
0
        return NULL;
653
0
    }
654
0
    char *res = PyBytesWriter_GetData(writer);
655
656
0
    if (PyTuple_Check(args)) {
657
0
        arglen = PyTuple_GET_SIZE(args);
658
0
        argidx = 0;
659
0
    }
660
0
    else {
661
0
        arglen = -1;
662
0
        argidx = -2;
663
0
    }
664
0
    if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
665
0
        !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
666
0
        !PyByteArray_Check(args)) {
667
0
            dict = args;
668
0
    }
669
670
0
    while (--fmtcnt >= 0) {
671
0
        if (*fmt != '%') {
672
0
            Py_ssize_t len;
673
0
            char *pos;
674
675
0
            pos = (char *)memchr(fmt + 1, '%', fmtcnt);
676
0
            if (pos != NULL)
677
0
                len = pos - fmt;
678
0
            else
679
0
                len = fmtcnt + 1;
680
0
            assert(len != 0);
681
682
0
            memcpy(res, fmt, len);
683
0
            res += len;
684
0
            fmt += len;
685
0
            fmtcnt -= (len - 1);
686
0
        }
687
0
        else {
688
            /* Got a format specifier */
689
0
            int flags = 0;
690
0
            Py_ssize_t width = -1;
691
0
            int prec = -1;
692
0
            int c = '\0';
693
0
            int fill;
694
0
            PyObject *v = NULL;
695
0
            PyObject *temp = NULL;
696
0
            const char *pbuf = NULL;
697
0
            int sign;
698
0
            Py_ssize_t len = 0;
699
0
            char onechar; /* For byte_converter() */
700
0
            Py_ssize_t alloc;
701
702
0
            fmt++;
703
0
            if (*fmt == '%') {
704
0
                *res++ = '%';
705
0
                fmt++;
706
0
                fmtcnt--;
707
0
                continue;
708
0
            }
709
0
            Py_CLEAR(key);
710
0
            const char *fmtstart = fmt;
711
0
            if (*fmt == '(') {
712
0
                const char *keystart;
713
0
                Py_ssize_t keylen;
714
0
                int pcount = 1;
715
716
0
                if (dict == NULL) {
717
0
                    PyErr_Format(PyExc_TypeError,
718
0
                                 "format requires a mapping, not %T",
719
0
                                 args);
720
0
                    goto error;
721
0
                }
722
0
                ++fmt;
723
0
                --fmtcnt;
724
0
                keystart = fmt;
725
                /* Skip over balanced parentheses */
726
0
                while (pcount > 0 && --fmtcnt >= 0) {
727
0
                    if (*fmt == ')')
728
0
                        --pcount;
729
0
                    else if (*fmt == '(')
730
0
                        ++pcount;
731
0
                    fmt++;
732
0
                }
733
0
                keylen = fmt - keystart - 1;
734
0
                if (fmtcnt < 0 || pcount > 0) {
735
0
                    PyErr_Format(PyExc_ValueError,
736
0
                                 "stray %% or incomplete format key "
737
0
                                 "at position %zd",
738
0
                                 (Py_ssize_t)(fmtstart - format - 1));
739
0
                    goto error;
740
0
                }
741
0
                key = PyBytes_FromStringAndSize(keystart,
742
0
                                                 keylen);
743
0
                if (key == NULL)
744
0
                    goto error;
745
0
                if (args_owned) {
746
0
                    Py_DECREF(args);
747
0
                    args_owned = 0;
748
0
                }
749
0
                args = PyObject_GetItem(dict, key);
750
0
                if (args == NULL) {
751
0
                    goto error;
752
0
                }
753
0
                args_owned = 1;
754
0
                arglen = -3;
755
0
                argidx = -4;
756
0
            }
757
0
            else {
758
0
                if (arglen < -1) {
759
0
                    PyErr_Format(PyExc_ValueError,
760
0
                                 "format requires a parenthesised mapping key "
761
0
                                 "at position %zd",
762
0
                                 (Py_ssize_t)(fmtstart - format - 1));
763
0
                    goto error;
764
0
                }
765
0
            }
766
767
            /* Parse flags. Example: "%+i" => flags=F_SIGN. */
768
0
            while (--fmtcnt >= 0) {
769
0
                switch (c = *fmt++) {
770
0
                case '-': flags |= F_LJUST; continue;
771
0
                case '+': flags |= F_SIGN; continue;
772
0
                case ' ': flags |= F_BLANK; continue;
773
0
                case '#': flags |= F_ALT; continue;
774
0
                case '0': flags |= F_ZERO; continue;
775
0
                }
776
0
                break;
777
0
            }
778
779
            /* Parse width. Example: "%10s" => width=10 */
780
0
            if (c == '*') {
781
0
                if (arglen < -1) {
782
0
                    PyErr_Format(PyExc_ValueError,
783
0
                            "* cannot be used with a parenthesised mapping key "
784
0
                            "at position %zd",
785
0
                            (Py_ssize_t)(fmtstart - format - 1));
786
0
                    goto error;
787
0
                }
788
0
                v = getnextarg(args, arglen, &argidx, 0);
789
0
                if (v == NULL)
790
0
                    goto error;
791
0
                if (!PyLong_Check(v)) {
792
0
                    FORMAT_ERROR(PyExc_TypeError, "* requires int, not %T", v);
793
0
                    goto error;
794
0
                }
795
0
                width = PyLong_AsSsize_t(v);
796
0
                if (width == -1 && PyErr_Occurred()) {
797
0
                    if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
798
0
                        FORMAT_ERROR(PyExc_OverflowError,
799
0
                                     "too big for width%s", "");
800
0
                    }
801
0
                    goto error;
802
0
                }
803
0
                if (width < 0) {
804
0
                    flags |= F_LJUST;
805
0
                    width = -width;
806
0
                }
807
0
                if (--fmtcnt >= 0)
808
0
                    c = *fmt++;
809
0
            }
810
0
            else if (c >= 0 && Py_ISDIGIT(c)) {
811
0
                width = c - '0';
812
0
                while (--fmtcnt >= 0) {
813
0
                    c = Py_CHARMASK(*fmt++);
814
0
                    if (!Py_ISDIGIT(c))
815
0
                        break;
816
0
                    if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
817
0
                        PyErr_Format(PyExc_ValueError,
818
0
                                     "width too big at position %zd",
819
0
                                     (Py_ssize_t)(fmtstart - format - 1));
820
0
                        goto error;
821
0
                    }
822
0
                    width = width*10 + (c - '0');
823
0
                }
824
0
            }
825
826
            /* Parse precision. Example: "%.3f" => prec=3 */
827
0
            if (c == '.') {
828
0
                prec = 0;
829
0
                if (--fmtcnt >= 0)
830
0
                    c = *fmt++;
831
0
                if (c == '*') {
832
0
                    if (arglen < -1) {
833
0
                        PyErr_Format(PyExc_ValueError,
834
0
                                "* cannot be used with a parenthesised mapping key "
835
0
                                "at position %zd",
836
0
                                (Py_ssize_t)(fmtstart - format - 1));
837
0
                        goto error;
838
0
                    }
839
0
                    v = getnextarg(args, arglen, &argidx, 0);
840
0
                    if (v == NULL)
841
0
                        goto error;
842
0
                    if (!PyLong_Check(v)) {
843
0
                        FORMAT_ERROR(PyExc_TypeError,
844
0
                                     "* requires int, not %T", v);
845
0
                        goto error;
846
0
                    }
847
0
                    prec = PyLong_AsInt(v);
848
0
                    if (prec == -1 && PyErr_Occurred()) {
849
0
                        if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
850
0
                            FORMAT_ERROR(PyExc_OverflowError,
851
0
                                         "too big for precision%s", "");
852
0
                        }
853
0
                        goto error;
854
0
                    }
855
0
                    if (prec < 0)
856
0
                        prec = 0;
857
0
                    if (--fmtcnt >= 0)
858
0
                        c = *fmt++;
859
0
                }
860
0
                else if (c >= 0 && Py_ISDIGIT(c)) {
861
0
                    prec = c - '0';
862
0
                    while (--fmtcnt >= 0) {
863
0
                        c = Py_CHARMASK(*fmt++);
864
0
                        if (!Py_ISDIGIT(c))
865
0
                            break;
866
0
                        if (prec > (INT_MAX - ((int)c - '0')) / 10) {
867
0
                            PyErr_Format(PyExc_ValueError,
868
0
                                "precision too big at position %zd",
869
0
                                (Py_ssize_t)(fmtstart - format - 1));
870
0
                            goto error;
871
0
                        }
872
0
                        prec = prec*10 + (c - '0');
873
0
                    }
874
0
                }
875
0
            } /* prec */
876
0
            if (fmtcnt >= 0) {
877
0
                if (c == 'h' || c == 'l' || c == 'L') {
878
0
                    if (--fmtcnt >= 0)
879
0
                        c = *fmt++;
880
0
                }
881
0
            }
882
0
            if (fmtcnt < 0) {
883
0
                PyErr_Format(PyExc_ValueError,
884
0
                             "stray %% at position %zd",
885
0
                             (Py_ssize_t)(fmtstart - format - 1));
886
0
                goto error;
887
0
            }
888
0
            v = getnextarg(args, arglen, &argidx, 1);
889
0
            if (v == NULL)
890
0
                goto error;
891
892
0
            if (fmtcnt == 0) {
893
                /* last write: disable writer overallocation */
894
0
                writer->overallocate = 0;
895
0
            }
896
897
0
            sign = 0;
898
0
            fill = ' ';
899
0
            switch (c) {
900
0
            case 'r':
901
                // %r is only for 2/3 code; 3 only code should use %a
902
0
            case 'a':
903
0
                temp = PyObject_ASCII(v);
904
0
                if (temp == NULL)
905
0
                    goto error;
906
0
                assert(PyUnicode_IS_ASCII(temp));
907
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
908
0
                len = PyUnicode_GET_LENGTH(temp);
909
0
                if (prec >= 0 && len > prec)
910
0
                    len = prec;
911
0
                break;
912
913
0
            case 's':
914
                // %s is only for 2/3 code; 3 only code should use %b
915
0
            case 'b':
916
0
                temp = format_obj(v, argidx, key, &pbuf, &len);
917
0
                if (temp == NULL)
918
0
                    goto error;
919
0
                if (prec >= 0 && len > prec)
920
0
                    len = prec;
921
0
                break;
922
923
0
            case 'i':
924
0
            case 'd':
925
0
            case 'u':
926
0
            case 'o':
927
0
            case 'x':
928
0
            case 'X':
929
0
                if (PyLong_CheckExact(v)
930
0
                    && width == -1 && prec == -1
931
0
                    && !(flags & (F_SIGN | F_BLANK))
932
0
                    && c != 'X')
933
0
                {
934
                    /* Fast path */
935
0
                    int alternate = flags & F_ALT;
936
0
                    int base;
937
938
0
                    switch(c)
939
0
                    {
940
0
                        default:
941
0
                            Py_UNREACHABLE();
942
0
                        case 'd':
943
0
                        case 'i':
944
0
                        case 'u':
945
0
                            base = 10;
946
0
                            break;
947
0
                        case 'o':
948
0
                            base = 8;
949
0
                            break;
950
0
                        case 'x':
951
0
                        case 'X':
952
0
                            base = 16;
953
0
                            break;
954
0
                    }
955
956
                    /* Fast path */
957
0
                    res = _PyLong_FormatBytesWriter(writer, res,
958
0
                                                    v, base, alternate);
959
0
                    if (res == NULL)
960
0
                        goto error;
961
0
                    continue;
962
0
                }
963
964
0
                temp = formatlong(v, argidx, key, flags, prec, c);
965
0
                if (!temp)
966
0
                    goto error;
967
0
                assert(PyUnicode_IS_ASCII(temp));
968
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
969
0
                len = PyUnicode_GET_LENGTH(temp);
970
0
                sign = 1;
971
0
                if (flags & F_ZERO)
972
0
                    fill = '0';
973
0
                break;
974
975
0
            case 'e':
976
0
            case 'E':
977
0
            case 'f':
978
0
            case 'F':
979
0
            case 'g':
980
0
            case 'G':
981
0
                if (width == -1 && prec == -1
982
0
                    && !(flags & (F_SIGN | F_BLANK)))
983
0
                {
984
                    /* Fast path */
985
0
                    res = formatfloat(v, argidx, key, flags, prec, c, NULL, writer, res);
986
0
                    if (res == NULL)
987
0
                        goto error;
988
0
                    continue;
989
0
                }
990
991
0
                if (!formatfloat(v, argidx, key, flags, prec, c, &temp, NULL, res))
992
0
                    goto error;
993
0
                pbuf = PyBytes_AS_STRING(temp);
994
0
                len = PyBytes_GET_SIZE(temp);
995
0
                sign = 1;
996
0
                if (flags & F_ZERO)
997
0
                    fill = '0';
998
0
                break;
999
1000
0
            case 'c':
1001
0
                pbuf = &onechar;
1002
0
                len = byte_converter(v, argidx, key, &onechar);
1003
0
                if (!len)
1004
0
                    goto error;
1005
0
                if (width == -1) {
1006
                    /* Fast path */
1007
0
                    *res++ = onechar;
1008
0
                    continue;
1009
0
                }
1010
0
                break;
1011
1012
0
            default:
1013
0
                if (Py_ISALPHA(c)) {
1014
0
                    PyErr_Format(PyExc_ValueError,
1015
0
                                 "unsupported format %%%c at position %zd",
1016
0
                                 c, (Py_ssize_t)(fmtstart - format - 1));
1017
0
                }
1018
0
                else if (c == '\'') {
1019
0
                    PyErr_Format(PyExc_ValueError,
1020
0
                                 "stray %% at position %zd or unexpected "
1021
0
                                 "format character \"'\" "
1022
0
                                 "at position %zd",
1023
0
                                 (Py_ssize_t)(fmtstart - format - 1),
1024
0
                                 (Py_ssize_t)(fmt - format - 1));
1025
0
                }
1026
0
                else if (c >= 32 && c < 127 && c != '\'') {
1027
0
                    PyErr_Format(PyExc_ValueError,
1028
0
                                 "stray %% at position %zd or unexpected "
1029
0
                                 "format character '%c' "
1030
0
                                 "at position %zd",
1031
0
                                 (Py_ssize_t)(fmtstart - format - 1),
1032
0
                                 c, (Py_ssize_t)(fmt - format - 1));
1033
0
                }
1034
0
                else {
1035
0
                    PyErr_Format(PyExc_ValueError,
1036
0
                                 "stray %% at position %zd or unexpected "
1037
0
                                 "format character with code 0x%02x "
1038
0
                                 "at position %zd",
1039
0
                                 (Py_ssize_t)(fmtstart - format - 1),
1040
0
                                 Py_CHARMASK(c),
1041
0
                                 (Py_ssize_t)(fmt - format - 1));
1042
0
                }
1043
0
                goto error;
1044
0
            }
1045
1046
0
            if (sign) {
1047
0
                if (*pbuf == '-' || *pbuf == '+') {
1048
0
                    sign = *pbuf++;
1049
0
                    len--;
1050
0
                }
1051
0
                else if (flags & F_SIGN)
1052
0
                    sign = '+';
1053
0
                else if (flags & F_BLANK)
1054
0
                    sign = ' ';
1055
0
                else
1056
0
                    sign = 0;
1057
0
            }
1058
0
            if (width < len)
1059
0
                width = len;
1060
1061
0
            alloc = width;
1062
0
            if (sign != 0 && len == width)
1063
0
                alloc++;
1064
            /* 2: size preallocated for %s */
1065
0
            if (alloc > 2) {
1066
0
                res = PyBytesWriter_GrowAndUpdatePointer(writer, alloc - 2, res);
1067
0
                if (res == NULL) {
1068
0
                    Py_XDECREF(temp);
1069
0
                    goto error;
1070
0
                }
1071
0
            }
1072
#ifndef NDEBUG
1073
            char *before = res;
1074
#endif
1075
1076
            /* Write the sign if needed */
1077
0
            if (sign) {
1078
0
                if (fill != ' ')
1079
0
                    *res++ = sign;
1080
0
                if (width > len)
1081
0
                    width--;
1082
0
            }
1083
1084
            /* Write the numeric prefix for "x", "X" and "o" formats
1085
               if the alternate form is used.
1086
               For example, write "0x" for the "%#x" format. */
1087
0
            if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1088
0
                assert(pbuf[0] == '0');
1089
0
                assert(pbuf[1] == c);
1090
0
                if (fill != ' ') {
1091
0
                    *res++ = *pbuf++;
1092
0
                    *res++ = *pbuf++;
1093
0
                }
1094
0
                width -= 2;
1095
0
                if (width < 0)
1096
0
                    width = 0;
1097
0
                len -= 2;
1098
0
            }
1099
1100
            /* Pad left with the fill character if needed */
1101
0
            if (width > len && !(flags & F_LJUST)) {
1102
0
                memset(res, fill, width - len);
1103
0
                res += (width - len);
1104
0
                width = len;
1105
0
            }
1106
1107
            /* If padding with spaces: write sign if needed and/or numeric
1108
               prefix if the alternate form is used */
1109
0
            if (fill == ' ') {
1110
0
                if (sign)
1111
0
                    *res++ = sign;
1112
0
                if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1113
0
                    assert(pbuf[0] == '0');
1114
0
                    assert(pbuf[1] == c);
1115
0
                    *res++ = *pbuf++;
1116
0
                    *res++ = *pbuf++;
1117
0
                }
1118
0
            }
1119
1120
            /* Copy bytes */
1121
0
            memcpy(res, pbuf, len);
1122
0
            res += len;
1123
1124
            /* Pad right with the fill character if needed */
1125
0
            if (width > len) {
1126
0
                memset(res, ' ', width - len);
1127
0
                res += (width - len);
1128
0
            }
1129
1130
0
            if (dict && (argidx < arglen)) {
1131
                // XXX: Never happens?
1132
0
                PyErr_SetString(PyExc_TypeError,
1133
0
                           "not all arguments converted during bytes formatting");
1134
0
                Py_XDECREF(temp);
1135
0
                goto error;
1136
0
            }
1137
0
            Py_XDECREF(temp);
1138
1139
#ifndef NDEBUG
1140
            /* check that we computed the exact size for this write */
1141
            assert((res - before) == alloc);
1142
#endif
1143
0
        } /* '%' */
1144
1145
        /* If overallocation was disabled, ensure that it was the last
1146
           write. Otherwise, we missed an optimization */
1147
0
        assert(writer->overallocate || fmtcnt == 0 || use_bytearray);
1148
0
    } /* until end */
1149
1150
0
    if (argidx < arglen && !dict) {
1151
0
        PyErr_Format(PyExc_TypeError,
1152
0
                     "not all arguments converted during bytes formatting "
1153
0
                     "(required %zd, got %zd)",
1154
0
                     arglen < 0 ? 0 : argidx,
1155
0
                     arglen < 0 ? 1 : arglen);
1156
0
        goto error;
1157
0
    }
1158
1159
0
    Py_XDECREF(key);
1160
0
    if (args_owned) {
1161
0
        Py_DECREF(args);
1162
0
    }
1163
0
    return PyBytesWriter_FinishWithPointer(writer, res);
1164
1165
0
 error:
1166
0
    Py_XDECREF(key);
1167
0
    PyBytesWriter_Discard(writer);
1168
0
    if (args_owned) {
1169
0
        Py_DECREF(args);
1170
0
    }
1171
0
    return NULL;
1172
0
}
1173
1174
/* Unescape a backslash-escaped string. */
1175
PyObject *_PyBytes_DecodeEscape2(const char *s,
1176
                                Py_ssize_t len,
1177
                                const char *errors,
1178
                                int *first_invalid_escape_char,
1179
                                const char **first_invalid_escape_ptr)
1180
2.31k
{
1181
2.31k
    PyBytesWriter *writer = PyBytesWriter_Create(len);
1182
2.31k
    if (writer == NULL) {
1183
0
        return NULL;
1184
0
    }
1185
2.31k
    char *p = PyBytesWriter_GetData(writer);
1186
1187
2.31k
    *first_invalid_escape_char = -1;
1188
2.31k
    *first_invalid_escape_ptr = NULL;
1189
1190
2.31k
    const char *end = s + len;
1191
63.5k
    while (s < end) {
1192
61.2k
        if (*s != '\\') {
1193
51.1k
            *p++ = *s++;
1194
51.1k
            continue;
1195
51.1k
        }
1196
1197
10.1k
        s++;
1198
10.1k
        if (s == end) {
1199
0
            PyErr_SetString(PyExc_ValueError,
1200
0
                            "Trailing \\ in string");
1201
0
            goto failed;
1202
0
        }
1203
1204
10.1k
        switch (*s++) {
1205
        /* XXX This assumes ASCII! */
1206
619
        case '\n': break;
1207
955
        case '\\': *p++ = '\\'; break;
1208
234
        case '\'': *p++ = '\''; break;
1209
298
        case '\"': *p++ = '\"'; break;
1210
224
        case 'b': *p++ = '\b'; break;
1211
305
        case 'f': *p++ = '\014'; break; /* FF */
1212
214
        case 't': *p++ = '\t'; break;
1213
167
        case 'n': *p++ = '\n'; break;
1214
359
        case 'r': *p++ = '\r'; break;
1215
537
        case 'v': *p++ = '\013'; break; /* VT */
1216
205
        case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1217
2.42k
        case '0': case '1': case '2': case '3':
1218
4.47k
        case '4': case '5': case '6': case '7':
1219
4.47k
        {
1220
4.47k
            int c = s[-1] - '0';
1221
4.47k
            if (s < end && '0' <= *s && *s <= '7') {
1222
2.18k
                c = (c<<3) + *s++ - '0';
1223
2.18k
                if (s < end && '0' <= *s && *s <= '7')
1224
721
                    c = (c<<3) + *s++ - '0';
1225
2.18k
            }
1226
4.47k
            if (c > 0377) {
1227
675
                if (*first_invalid_escape_char == -1) {
1228
159
                    *first_invalid_escape_char = c;
1229
                    /* Back up 3 chars, since we've already incremented s. */
1230
159
                    *first_invalid_escape_ptr = s - 3;
1231
159
                }
1232
675
            }
1233
4.47k
            *p++ = c;
1234
4.47k
            break;
1235
4.23k
        }
1236
291
        case 'x':
1237
291
            if (s+1 < end) {
1238
290
                int digit1, digit2;
1239
290
                digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1240
290
                digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1241
290
                if (digit1 < 16 && digit2 < 16) {
1242
287
                    *p++ = (unsigned char)((digit1 << 4) + digit2);
1243
287
                    s += 2;
1244
287
                    break;
1245
287
                }
1246
290
            }
1247
            /* invalid hexadecimal digits */
1248
1249
4
            if (!errors || strcmp(errors, "strict") == 0) {
1250
4
                PyErr_Format(PyExc_ValueError,
1251
4
                             "invalid \\x escape at position %zd",
1252
4
                             s - 2 - (end - len));
1253
4
                goto failed;
1254
4
            }
1255
0
            if (strcmp(errors, "replace") == 0) {
1256
0
                *p++ = '?';
1257
0
            } else if (strcmp(errors, "ignore") == 0)
1258
0
                /* do nothing */;
1259
0
            else {
1260
0
                PyErr_Format(PyExc_ValueError,
1261
0
                             "decoding error; unknown "
1262
0
                             "error handling code: %.400s",
1263
0
                             errors);
1264
0
                goto failed;
1265
0
            }
1266
            /* skip \x */
1267
0
            if (s < end && Py_ISXDIGIT(s[0]))
1268
0
                s++; /* and a hexdigit */
1269
0
            break;
1270
1271
1.25k
        default:
1272
1.25k
            if (*first_invalid_escape_char == -1) {
1273
592
                *first_invalid_escape_char = (unsigned char)s[-1];
1274
                /* Back up one char, since we've already incremented s. */
1275
592
                *first_invalid_escape_ptr = s - 1;
1276
592
            }
1277
1.25k
            *p++ = '\\';
1278
1.25k
            s--;
1279
10.1k
        }
1280
10.1k
    }
1281
1282
2.31k
    return PyBytesWriter_FinishWithPointer(writer, p);
1283
1284
4
  failed:
1285
4
    PyBytesWriter_Discard(writer);
1286
4
    return NULL;
1287
2.31k
}
1288
1289
PyObject *PyBytes_DecodeEscape(const char *s,
1290
                                Py_ssize_t len,
1291
                                const char *errors,
1292
                                Py_ssize_t Py_UNUSED(unicode),
1293
                                const char *Py_UNUSED(recode_encoding))
1294
0
{
1295
0
    int first_invalid_escape_char;
1296
0
    const char *first_invalid_escape_ptr;
1297
0
    PyObject *result = _PyBytes_DecodeEscape2(s, len, errors,
1298
0
                                             &first_invalid_escape_char,
1299
0
                                             &first_invalid_escape_ptr);
1300
0
    if (result == NULL)
1301
0
        return NULL;
1302
0
    if (first_invalid_escape_char != -1) {
1303
0
        if (first_invalid_escape_char > 0xff) {
1304
0
            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1305
0
                                 "b\"\\%o\" is an invalid octal escape sequence. "
1306
0
                                 "Such sequences will not work in the future. ",
1307
0
                                 first_invalid_escape_char) < 0)
1308
0
            {
1309
0
                Py_DECREF(result);
1310
0
                return NULL;
1311
0
            }
1312
0
        }
1313
0
        else {
1314
0
            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1315
0
                                 "b\"\\%c\" is an invalid escape sequence. "
1316
0
                                 "Such sequences will not work in the future. ",
1317
0
                                 first_invalid_escape_char) < 0)
1318
0
            {
1319
0
                Py_DECREF(result);
1320
0
                return NULL;
1321
0
            }
1322
0
        }
1323
0
    }
1324
0
    return result;
1325
0
}
1326
/* -------------------------------------------------------------------- */
1327
/* object api */
1328
1329
Py_ssize_t
1330
PyBytes_Size(PyObject *op)
1331
5.38k
{
1332
5.38k
    if (!PyBytes_Check(op)) {
1333
0
        PyErr_Format(PyExc_TypeError,
1334
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1335
0
        return -1;
1336
0
    }
1337
5.38k
    return Py_SIZE(op);
1338
5.38k
}
1339
1340
char *
1341
PyBytes_AsString(PyObject *op)
1342
12.4M
{
1343
12.4M
    if (!PyBytes_Check(op)) {
1344
0
        PyErr_Format(PyExc_TypeError,
1345
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1346
0
        return NULL;
1347
0
    }
1348
12.4M
    return ((PyBytesObject *)op)->ob_sval;
1349
12.4M
}
1350
1351
int
1352
PyBytes_AsStringAndSize(PyObject *obj,
1353
                         char **s,
1354
                         Py_ssize_t *len)
1355
70.6k
{
1356
70.6k
    if (s == NULL) {
1357
0
        PyErr_BadInternalCall();
1358
0
        return -1;
1359
0
    }
1360
1361
70.6k
    if (!PyBytes_Check(obj)) {
1362
0
        PyErr_Format(PyExc_TypeError,
1363
0
             "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1364
0
        return -1;
1365
0
    }
1366
1367
70.6k
    *s = PyBytes_AS_STRING(obj);
1368
70.6k
    if (len != NULL)
1369
70.6k
        *len = PyBytes_GET_SIZE(obj);
1370
0
    else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1371
0
        PyErr_SetString(PyExc_ValueError,
1372
0
                        "embedded null byte");
1373
0
        return -1;
1374
0
    }
1375
70.6k
    return 0;
1376
70.6k
}
1377
1378
/* -------------------------------------------------------------------- */
1379
/* Methods */
1380
1381
4.60k
#define STRINGLIB_GET_EMPTY() bytes_get_empty()
1382
1383
#include "stringlib/stringdefs.h"
1384
#define STRINGLIB_MUTABLE 0
1385
1386
#include "stringlib/fastsearch.h"
1387
#include "stringlib/count.h"
1388
#include "stringlib/find.h"
1389
#include "stringlib/join.h"
1390
#include "stringlib/partition.h"
1391
#include "stringlib/split.h"
1392
#include "stringlib/ctype.h"
1393
1394
#include "stringlib/transmogrify.h"
1395
1396
#undef STRINGLIB_GET_EMPTY
1397
1398
Py_ssize_t
1399
_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
1400
              const char *needle, Py_ssize_t len_needle,
1401
              Py_ssize_t offset)
1402
0
{
1403
0
    assert(len_haystack >= 0);
1404
0
    assert(len_needle >= 0);
1405
    // Extra checks because stringlib_find accesses haystack[len_haystack].
1406
0
    if (len_needle == 0) {
1407
0
        return offset;
1408
0
    }
1409
0
    if (len_needle > len_haystack) {
1410
0
        return -1;
1411
0
    }
1412
0
    assert(len_haystack >= 1);
1413
0
    Py_ssize_t res = stringlib_find(haystack, len_haystack - 1,
1414
0
                                    needle, len_needle, offset);
1415
0
    if (res == -1) {
1416
0
        Py_ssize_t last_align = len_haystack - len_needle;
1417
0
        if (memcmp(haystack + last_align, needle, len_needle) == 0) {
1418
0
            return offset + last_align;
1419
0
        }
1420
0
    }
1421
0
    return res;
1422
0
}
1423
1424
Py_ssize_t
1425
_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
1426
                     const char *needle, Py_ssize_t len_needle,
1427
                     Py_ssize_t offset)
1428
0
{
1429
0
    return stringlib_rfind(haystack, len_haystack,
1430
0
                           needle, len_needle, offset);
1431
0
}
1432
1433
PyObject *
1434
PyBytes_Repr(PyObject *obj, int smartquotes)
1435
3.16k
{
1436
3.16k
    return _Py_bytes_repr(PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
1437
3.16k
                          smartquotes, "bytes");
1438
3.16k
}
1439
1440
PyObject *
1441
_Py_bytes_repr(const char *data, Py_ssize_t length, int smartquotes,
1442
               const char *classname)
1443
3.16k
{
1444
3.16k
    Py_ssize_t i;
1445
3.16k
    Py_ssize_t newsize, squotes, dquotes;
1446
3.16k
    PyObject *v;
1447
3.16k
    unsigned char quote;
1448
3.16k
    Py_UCS1 *p;
1449
1450
    /* Compute size of output string */
1451
3.16k
    squotes = dquotes = 0;
1452
3.16k
    newsize = 3; /* b'' */
1453
3.43M
    for (i = 0; i < length; i++) {
1454
3.43M
        unsigned char c = data[i];
1455
3.43M
        Py_ssize_t incr = 1;
1456
3.43M
        switch(c) {
1457
4.12k
        case '\'': squotes++; break;
1458
9.09k
        case '"':  dquotes++; break;
1459
36.2k
        case '\\': case '\t': case '\n': case '\r':
1460
36.2k
            incr = 2; break; /* \C */
1461
3.38M
        default:
1462
3.38M
            if (c < ' ' || c >= 0x7f)
1463
2.55M
                incr = 4; /* \xHH */
1464
3.43M
        }
1465
3.43M
        if (newsize > PY_SSIZE_T_MAX - incr)
1466
0
            goto overflow;
1467
3.43M
        newsize += incr;
1468
3.43M
    }
1469
3.16k
    quote = '\'';
1470
3.16k
    if (smartquotes && squotes && !dquotes)
1471
133
        quote = '"';
1472
3.16k
    if (squotes && quote == '\'') {
1473
226
        if (newsize > PY_SSIZE_T_MAX - squotes)
1474
0
            goto overflow;
1475
226
        newsize += squotes;
1476
226
    }
1477
1478
3.16k
    v = PyUnicode_New(newsize, 127);
1479
3.16k
    if (v == NULL) {
1480
0
        return NULL;
1481
0
    }
1482
3.16k
    p = PyUnicode_1BYTE_DATA(v);
1483
1484
3.16k
    *p++ = 'b', *p++ = quote;
1485
3.43M
    for (i = 0; i < length; i++) {
1486
3.43M
        unsigned char c = data[i];
1487
3.43M
        if (c == quote || c == '\\')
1488
3.94k
            *p++ = '\\', *p++ = c;
1489
3.42M
        else if (c == '\t')
1490
18.9k
            *p++ = '\\', *p++ = 't';
1491
3.40M
        else if (c == '\n')
1492
8.76k
            *p++ = '\\', *p++ = 'n';
1493
3.39M
        else if (c == '\r')
1494
6.81k
            *p++ = '\\', *p++ = 'r';
1495
3.39M
        else if (c < ' ' || c >= 0x7f) {
1496
2.55M
            *p++ = '\\';
1497
2.55M
            *p++ = 'x';
1498
2.55M
            *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1499
2.55M
            *p++ = Py_hexdigits[c & 0xf];
1500
2.55M
        }
1501
841k
        else
1502
841k
            *p++ = c;
1503
3.43M
    }
1504
3.16k
    *p++ = quote;
1505
3.16k
    assert(_PyUnicode_CheckConsistency(v, 1));
1506
3.16k
    return v;
1507
1508
0
  overflow:
1509
0
    PyErr_Format(PyExc_OverflowError,
1510
0
                 "%s object is too large to make repr", classname);
1511
0
    return NULL;
1512
3.16k
}
1513
1514
static PyObject *
1515
bytes_repr(PyObject *op)
1516
3.16k
{
1517
3.16k
    return PyBytes_Repr(op, 1);
1518
3.16k
}
1519
1520
static PyObject *
1521
bytes_str(PyObject *op)
1522
0
{
1523
0
    if (_Py_GetConfig()->bytes_warning) {
1524
0
        if (PyErr_WarnEx(PyExc_BytesWarning,
1525
0
                         "str() on a bytes instance", 1)) {
1526
0
            return NULL;
1527
0
        }
1528
0
    }
1529
0
    return bytes_repr(op);
1530
0
}
1531
1532
static Py_ssize_t
1533
bytes_length(PyObject *self)
1534
39.7M
{
1535
39.7M
    PyBytesObject *a = _PyBytes_CAST(self);
1536
39.7M
    return Py_SIZE(a);
1537
39.7M
}
1538
1539
/* This is also used by PyBytes_Concat() and the specializing interpreter. */
1540
PyObject *
1541
_PyBytes_Concat(PyObject *a, PyObject *b)
1542
915k
{
1543
915k
    Py_buffer va, vb;
1544
915k
    PyObject *result = NULL;
1545
1546
915k
    va.len = -1;
1547
915k
    vb.len = -1;
1548
915k
    if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1549
915k
        PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1550
0
        PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1551
0
                     Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1552
0
        goto done;
1553
0
    }
1554
1555
    /* Optimize end cases */
1556
915k
    if (va.len == 0 && PyBytes_CheckExact(b)) {
1557
139k
        result = Py_NewRef(b);
1558
139k
        goto done;
1559
139k
    }
1560
776k
    if (vb.len == 0 && PyBytes_CheckExact(a)) {
1561
60.0k
        result = Py_NewRef(a);
1562
60.0k
        goto done;
1563
60.0k
    }
1564
1565
716k
    if (va.len > PY_SSIZE_T_MAX - vb.len) {
1566
0
        PyErr_NoMemory();
1567
0
        goto done;
1568
0
    }
1569
1570
716k
    result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1571
716k
    if (result != NULL) {
1572
716k
        memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1573
716k
        memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1574
716k
    }
1575
1576
915k
  done:
1577
915k
    if (va.len != -1)
1578
915k
        PyBuffer_Release(&va);
1579
915k
    if (vb.len != -1)
1580
915k
        PyBuffer_Release(&vb);
1581
915k
    return result;
1582
716k
}
1583
1584
static PyObject *
1585
bytes_repeat(PyObject *self, Py_ssize_t n)
1586
188k
{
1587
188k
    PyBytesObject *a = _PyBytes_CAST(self);
1588
188k
    if (n < 0)
1589
0
        n = 0;
1590
    /* watch out for overflows:  the size can overflow int,
1591
     * and the # of bytes needed can overflow size_t
1592
     */
1593
188k
    if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1594
0
        PyErr_SetString(PyExc_OverflowError,
1595
0
            "repeated bytes are too long");
1596
0
        return NULL;
1597
0
    }
1598
188k
    Py_ssize_t size = Py_SIZE(a) * n;
1599
188k
    if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1600
3
        return Py_NewRef(a);
1601
3
    }
1602
188k
    size_t nbytes = (size_t)size;
1603
188k
    if (nbytes + PyBytesObject_SIZE <= nbytes) {
1604
0
        PyErr_SetString(PyExc_OverflowError,
1605
0
            "repeated bytes are too long");
1606
0
        return NULL;
1607
0
    }
1608
188k
    PyBytesObject *op = PyObject_Malloc(PyBytesObject_SIZE + nbytes);
1609
188k
    if (op == NULL) {
1610
0
        return PyErr_NoMemory();
1611
0
    }
1612
188k
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
1613
188k
    set_ob_shash(op, -1);
1614
188k
    op->ob_sval[size] = '\0';
1615
1616
188k
    _PyBytes_Repeat(op->ob_sval, size, a->ob_sval, Py_SIZE(a));
1617
1618
188k
    return (PyObject *) op;
1619
188k
}
1620
1621
static int
1622
bytes_contains(PyObject *self, PyObject *arg)
1623
2.98k
{
1624
2.98k
    return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1625
2.98k
}
1626
1627
static PyObject *
1628
bytes_item(PyObject *self, Py_ssize_t i)
1629
0
{
1630
0
    PyBytesObject *a = _PyBytes_CAST(self);
1631
0
    if (i < 0 || i >= Py_SIZE(a)) {
1632
0
        PyErr_SetString(PyExc_IndexError, "index out of range");
1633
0
        return NULL;
1634
0
    }
1635
0
    return _PyLong_FromUnsignedChar((unsigned char)a->ob_sval[i]);
1636
0
}
1637
1638
static int
1639
bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1640
85.0M
{
1641
85.0M
    int cmp;
1642
85.0M
    Py_ssize_t len;
1643
1644
85.0M
    len = Py_SIZE(a);
1645
85.0M
    if (Py_SIZE(b) != len)
1646
940k
        return 0;
1647
1648
84.0M
    if (a->ob_sval[0] != b->ob_sval[0])
1649
9.24M
        return 0;
1650
1651
74.8M
    cmp = memcmp(a->ob_sval, b->ob_sval, len);
1652
74.8M
    return (cmp == 0);
1653
84.0M
}
1654
1655
static PyObject*
1656
bytes_richcompare(PyObject *aa, PyObject *bb, int op)
1657
85.5M
{
1658
    /* Make sure both arguments are strings. */
1659
85.5M
    if (!(PyBytes_Check(aa) && PyBytes_Check(bb))) {
1660
0
        if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1661
0
            if (PyUnicode_Check(aa) || PyUnicode_Check(bb)) {
1662
0
                if (PyErr_WarnEx(PyExc_BytesWarning,
1663
0
                                 "Comparison between bytes and string", 1))
1664
0
                    return NULL;
1665
0
            }
1666
0
            if (PyLong_Check(aa) || PyLong_Check(bb)) {
1667
0
                if (PyErr_WarnEx(PyExc_BytesWarning,
1668
0
                                 "Comparison between bytes and int", 1))
1669
0
                    return NULL;
1670
0
            }
1671
0
        }
1672
0
        Py_RETURN_NOTIMPLEMENTED;
1673
0
    }
1674
1675
85.5M
    PyBytesObject *a = _PyBytes_CAST(aa);
1676
85.5M
    PyBytesObject *b = _PyBytes_CAST(bb);
1677
85.5M
    if (a == b) {
1678
525k
        switch (op) {
1679
4.07k
        case Py_EQ:
1680
4.07k
        case Py_LE:
1681
4.07k
        case Py_GE:
1682
            /* a byte string is equal to itself */
1683
4.07k
            Py_RETURN_TRUE;
1684
521k
        case Py_NE:
1685
521k
        case Py_LT:
1686
521k
        case Py_GT:
1687
521k
            Py_RETURN_FALSE;
1688
0
        default:
1689
0
            PyErr_BadArgument();
1690
0
            return NULL;
1691
525k
        }
1692
525k
    }
1693
85.0M
    else if (op == Py_EQ || op == Py_NE) {
1694
85.0M
        int eq = bytes_compare_eq(a, b);
1695
85.0M
        eq ^= (op == Py_NE);
1696
85.0M
        return PyBool_FromLong(eq);
1697
85.0M
    }
1698
151
    else {
1699
151
        Py_ssize_t len_a = Py_SIZE(a);
1700
151
        Py_ssize_t len_b = Py_SIZE(b);
1701
151
        Py_ssize_t min_len = Py_MIN(len_a, len_b);
1702
151
        int c;
1703
151
        if (min_len > 0) {
1704
151
            c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1705
151
            if (c == 0)
1706
151
                c = memcmp(a->ob_sval, b->ob_sval, min_len);
1707
151
        }
1708
0
        else {
1709
0
            c = 0;
1710
0
        }
1711
151
        if (c != 0) {
1712
151
            Py_RETURN_RICHCOMPARE(c, 0, op);
1713
151
        }
1714
0
        Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1715
0
    }
1716
85.5M
}
1717
1718
static Py_hash_t
1719
bytes_hash(PyObject *self)
1720
81.7M
{
1721
81.7M
    PyBytesObject *a = _PyBytes_CAST(self);
1722
81.7M
    Py_hash_t hash = get_ob_shash(a);
1723
81.7M
    if (hash == -1) {
1724
        /* Can't fail */
1725
50.7M
        hash = Py_HashBuffer(a->ob_sval, Py_SIZE(a));
1726
50.7M
        set_ob_shash(a, hash);
1727
50.7M
    }
1728
81.7M
    return hash;
1729
81.7M
}
1730
1731
static PyObject*
1732
bytes_subscript(PyObject *op, PyObject* item)
1733
95.5M
{
1734
95.5M
    PyBytesObject *self = _PyBytes_CAST(op);
1735
95.5M
    if (_PyIndex_Check(item)) {
1736
23.0M
        Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1737
23.0M
        if (i == -1 && PyErr_Occurred())
1738
0
            return NULL;
1739
23.0M
        if (i < 0)
1740
0
            i += PyBytes_GET_SIZE(self);
1741
23.0M
        if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1742
87
            PyErr_SetString(PyExc_IndexError,
1743
87
                            "index out of range");
1744
87
            return NULL;
1745
87
        }
1746
23.0M
        return _PyLong_FromUnsignedChar((unsigned char)self->ob_sval[i]);
1747
23.0M
    }
1748
72.5M
    else if (PySlice_Check(item)) {
1749
72.5M
        Py_ssize_t start, stop, step, slicelength, i;
1750
72.5M
        size_t cur;
1751
72.5M
        const char* source_buf;
1752
72.5M
        char* result_buf;
1753
72.5M
        PyObject* result;
1754
1755
72.5M
        if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1756
0
            return NULL;
1757
0
        }
1758
72.5M
        slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1759
72.5M
                                            &stop, step);
1760
1761
72.5M
        if (slicelength <= 0) {
1762
6.27M
            return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
1763
6.27M
        }
1764
66.2M
        else if (start == 0 && step == 1 &&
1765
11.0M
                 slicelength == PyBytes_GET_SIZE(self) &&
1766
183k
                 PyBytes_CheckExact(self)) {
1767
183k
            return Py_NewRef(self);
1768
183k
        }
1769
66.0M
        else if (step == 1) {
1770
66.0M
            return PyBytes_FromStringAndSize(
1771
66.0M
                PyBytes_AS_STRING(self) + start,
1772
66.0M
                slicelength);
1773
66.0M
        }
1774
0
        else {
1775
0
            source_buf = PyBytes_AS_STRING(self);
1776
0
            result = PyBytes_FromStringAndSize(NULL, slicelength);
1777
0
            if (result == NULL)
1778
0
                return NULL;
1779
1780
0
            result_buf = PyBytes_AS_STRING(result);
1781
0
            for (cur = start, i = 0; i < slicelength;
1782
0
                 cur += step, i++) {
1783
0
                result_buf[i] = source_buf[cur];
1784
0
            }
1785
1786
0
            return result;
1787
0
        }
1788
72.5M
    }
1789
0
    else {
1790
0
        PyErr_Format(PyExc_TypeError,
1791
0
                     "byte indices must be integers or slices, not %.200s",
1792
0
                     Py_TYPE(item)->tp_name);
1793
0
        return NULL;
1794
0
    }
1795
95.5M
}
1796
1797
static int
1798
bytes_buffer_getbuffer(PyObject *op, Py_buffer *view, int flags)
1799
79.6M
{
1800
79.6M
    PyBytesObject *self = _PyBytes_CAST(op);
1801
79.6M
    return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1802
79.6M
                             1, flags);
1803
79.6M
}
1804
1805
static PySequenceMethods bytes_as_sequence = {
1806
    bytes_length,       /*sq_length*/
1807
    _PyBytes_Concat,       /*sq_concat*/
1808
    bytes_repeat,       /*sq_repeat*/
1809
    bytes_item,         /*sq_item*/
1810
    0,                  /*sq_slice*/
1811
    0,                  /*sq_ass_item*/
1812
    0,                  /*sq_ass_slice*/
1813
    bytes_contains      /*sq_contains*/
1814
};
1815
1816
static PyMappingMethods bytes_as_mapping = {
1817
    bytes_length,
1818
    bytes_subscript,
1819
    0,
1820
};
1821
1822
static PyBufferProcs bytes_as_buffer = {
1823
    bytes_buffer_getbuffer,
1824
    NULL,
1825
};
1826
1827
1828
/*[clinic input]
1829
bytes.__bytes__
1830
Convert this value to exact type bytes.
1831
[clinic start generated code]*/
1832
1833
static PyObject *
1834
bytes___bytes___impl(PyBytesObject *self)
1835
/*[clinic end generated code: output=63a306a9bc0caac5 input=34ec5ddba98bd6bb]*/
1836
45.2k
{
1837
45.2k
    if (PyBytes_CheckExact(self)) {
1838
45.2k
        return Py_NewRef(self);
1839
45.2k
    }
1840
0
    else {
1841
0
        return PyBytes_FromStringAndSize(self->ob_sval, Py_SIZE(self));
1842
0
    }
1843
45.2k
}
1844
1845
1846
294
#define LEFTSTRIP 0
1847
588
#define RIGHTSTRIP 1
1848
0
#define BOTHSTRIP 2
1849
1850
/*[clinic input]
1851
bytes.split
1852
1853
    sep: object = None
1854
        The delimiter according which to split the bytes.
1855
        None (the default value) means split on ASCII whitespace characters
1856
        (space, tab, return, newline, formfeed, vertical tab).
1857
    maxsplit: Py_ssize_t = -1
1858
        Maximum number of splits to do.
1859
        -1 (the default value) means no limit.
1860
1861
Return a list of the sections in the bytes, using sep as the delimiter.
1862
[clinic start generated code]*/
1863
1864
static PyObject *
1865
bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1866
/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1867
2.93M
{
1868
2.93M
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1869
2.93M
    const char *s = PyBytes_AS_STRING(self), *sub;
1870
2.93M
    Py_buffer vsub;
1871
2.93M
    PyObject *list;
1872
1873
2.93M
    if (maxsplit < 0)
1874
2.93M
        maxsplit = PY_SSIZE_T_MAX;
1875
2.93M
    if (sep == Py_None)
1876
0
        return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1877
2.93M
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1878
0
        return NULL;
1879
2.93M
    sub = vsub.buf;
1880
2.93M
    n = vsub.len;
1881
1882
2.93M
    list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1883
2.93M
    PyBuffer_Release(&vsub);
1884
2.93M
    return list;
1885
2.93M
}
1886
1887
/*[clinic input]
1888
@permit_long_docstring_body
1889
bytes.partition
1890
1891
    sep: Py_buffer
1892
    /
1893
1894
Partition the bytes into three parts using the given separator.
1895
1896
This will search for the separator sep in the bytes. If the separator is found,
1897
returns a 3-tuple containing the part before the separator, the separator
1898
itself, and the part after it.
1899
1900
If the separator is not found, returns a 3-tuple containing the original bytes
1901
object and two empty bytes objects.
1902
[clinic start generated code]*/
1903
1904
static PyObject *
1905
bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1906
/*[clinic end generated code: output=f532b392a17ff695 input=31c55a0cebaf7722]*/
1907
505k
{
1908
505k
    return stringlib_partition(
1909
505k
        (PyObject*) self,
1910
505k
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1911
505k
        sep->obj, (const char *)sep->buf, sep->len
1912
505k
        );
1913
505k
}
1914
1915
/*[clinic input]
1916
@permit_long_docstring_body
1917
bytes.rpartition
1918
1919
    sep: Py_buffer
1920
    /
1921
1922
Partition the bytes into three parts using the given separator.
1923
1924
This will search for the separator sep in the bytes, starting at the end. If
1925
the separator is found, returns a 3-tuple containing the part before the
1926
separator, the separator itself, and the part after it.
1927
1928
If the separator is not found, returns a 3-tuple containing two empty bytes
1929
objects and the original bytes object.
1930
[clinic start generated code]*/
1931
1932
static PyObject *
1933
bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1934
/*[clinic end generated code: output=191b114cbb028e50 input=9ea5a3ab0b02bf52]*/
1935
0
{
1936
0
    return stringlib_rpartition(
1937
0
        (PyObject*) self,
1938
0
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1939
0
        sep->obj, (const char *)sep->buf, sep->len
1940
0
        );
1941
0
}
1942
1943
/*[clinic input]
1944
@permit_long_docstring_body
1945
bytes.rsplit = bytes.split
1946
1947
Return a list of the sections in the bytes, using sep as the delimiter.
1948
1949
Splitting is done starting at the end of the bytes and working to the front.
1950
[clinic start generated code]*/
1951
1952
static PyObject *
1953
bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1954
/*[clinic end generated code: output=ba698d9ea01e1c8f input=55b6eaea1f3d7046]*/
1955
0
{
1956
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1957
0
    const char *s = PyBytes_AS_STRING(self), *sub;
1958
0
    Py_buffer vsub;
1959
0
    PyObject *list;
1960
1961
0
    if (maxsplit < 0)
1962
0
        maxsplit = PY_SSIZE_T_MAX;
1963
0
    if (sep == Py_None)
1964
0
        return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1965
0
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1966
0
        return NULL;
1967
0
    sub = vsub.buf;
1968
0
    n = vsub.len;
1969
1970
0
    list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1971
0
    PyBuffer_Release(&vsub);
1972
0
    return list;
1973
0
}
1974
1975
1976
/*[clinic input]
1977
bytes.join
1978
1979
    iterable_of_bytes: object
1980
    /
1981
1982
Concatenate any number of bytes objects.
1983
1984
The bytes whose method is called is inserted in between each pair.
1985
1986
The result is returned as a new bytes object.
1987
1988
Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1989
[clinic start generated code]*/
1990
1991
static PyObject *
1992
bytes_join_impl(PyBytesObject *self, PyObject *iterable_of_bytes)
1993
/*[clinic end generated code: output=0687abb94d7d438e input=7fe377b95bd549d2]*/
1994
260k
{
1995
260k
    return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1996
260k
}
1997
1998
PyObject *
1999
PyBytes_Join(PyObject *sep, PyObject *iterable)
2000
30.3k
{
2001
30.3k
    if (sep == NULL) {
2002
0
        PyErr_BadInternalCall();
2003
0
        return NULL;
2004
0
    }
2005
30.3k
    if (!PyBytes_Check(sep)) {
2006
0
        PyErr_Format(PyExc_TypeError,
2007
0
                     "sep: expected bytes, got %T", sep);
2008
0
        return NULL;
2009
0
    }
2010
2011
30.3k
    return stringlib_bytes_join(sep, iterable);
2012
30.3k
}
2013
2014
/*[clinic input]
2015
@permit_long_summary
2016
@text_signature "($self, sub[, start[, end]], /)"
2017
bytes.find
2018
2019
    sub: object
2020
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2021
         Optional start position. Default: start of the bytes.
2022
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2023
         Optional stop position. Default: end of the bytes.
2024
    /
2025
2026
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2027
2028
Return -1 on failure.
2029
[clinic start generated code]*/
2030
2031
static PyObject *
2032
bytes_find_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2033
                Py_ssize_t end)
2034
/*[clinic end generated code: output=d5961a1c77b472a1 input=47d0929adafc6b0b]*/
2035
16.1M
{
2036
16.1M
    return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2037
16.1M
                          sub, start, end);
2038
16.1M
}
2039
2040
/*[clinic input]
2041
@permit_long_summary
2042
bytes.index = bytes.find
2043
2044
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2045
2046
Raise ValueError if the subsection is not found.
2047
[clinic start generated code]*/
2048
2049
static PyObject *
2050
bytes_index_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2051
                 Py_ssize_t end)
2052
/*[clinic end generated code: output=0da25cc74683ba42 input=1cb45ce71456a269]*/
2053
0
{
2054
0
    return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2055
0
                           sub, start, end);
2056
0
}
2057
2058
/*[clinic input]
2059
@permit_long_summary
2060
bytes.rfind = bytes.find
2061
2062
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2063
2064
Return -1 on failure.
2065
[clinic start generated code]*/
2066
2067
static PyObject *
2068
bytes_rfind_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2069
                 Py_ssize_t end)
2070
/*[clinic end generated code: output=51b60fa4ad011c09 input=c9473d714251f1ab]*/
2071
279k
{
2072
279k
    return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2073
279k
                           sub, start, end);
2074
279k
}
2075
2076
/*[clinic input]
2077
@permit_long_summary
2078
bytes.rindex = bytes.find
2079
2080
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2081
2082
Raise ValueError if the subsection is not found.
2083
[clinic start generated code]*/
2084
2085
static PyObject *
2086
bytes_rindex_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2087
                  Py_ssize_t end)
2088
/*[clinic end generated code: output=42bf674e0a0aabf6 input=bb5f473c64610c43]*/
2089
0
{
2090
0
    return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2091
0
                            sub, start, end);
2092
0
}
2093
2094
2095
Py_LOCAL_INLINE(PyObject *)
2096
do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
2097
294
{
2098
294
    Py_buffer vsep;
2099
294
    const char *s = PyBytes_AS_STRING(self);
2100
294
    Py_ssize_t len = PyBytes_GET_SIZE(self);
2101
294
    char *sep;
2102
294
    Py_ssize_t seplen;
2103
294
    Py_ssize_t i, j;
2104
2105
294
    if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
2106
0
        return NULL;
2107
294
    sep = vsep.buf;
2108
294
    seplen = vsep.len;
2109
2110
294
    i = 0;
2111
294
    if (striptype != RIGHTSTRIP) {
2112
0
        while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2113
0
            i++;
2114
0
        }
2115
0
    }
2116
2117
294
    j = len;
2118
294
    if (striptype != LEFTSTRIP) {
2119
588
        do {
2120
588
            j--;
2121
588
        } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2122
294
        j++;
2123
294
    }
2124
2125
294
    PyBuffer_Release(&vsep);
2126
2127
294
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2128
0
        return Py_NewRef(self);
2129
0
    }
2130
294
    else
2131
294
        return PyBytes_FromStringAndSize(s+i, j-i);
2132
294
}
2133
2134
2135
Py_LOCAL_INLINE(PyObject *)
2136
do_strip(PyBytesObject *self, int striptype)
2137
0
{
2138
0
    const char *s = PyBytes_AS_STRING(self);
2139
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
2140
2141
0
    i = 0;
2142
0
    if (striptype != RIGHTSTRIP) {
2143
0
        while (i < len && Py_ISSPACE(s[i])) {
2144
0
            i++;
2145
0
        }
2146
0
    }
2147
2148
0
    j = len;
2149
0
    if (striptype != LEFTSTRIP) {
2150
0
        do {
2151
0
            j--;
2152
0
        } while (j >= i && Py_ISSPACE(s[j]));
2153
0
        j++;
2154
0
    }
2155
2156
0
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2157
0
        return Py_NewRef(self);
2158
0
    }
2159
0
    else
2160
0
        return PyBytes_FromStringAndSize(s+i, j-i);
2161
0
}
2162
2163
2164
Py_LOCAL_INLINE(PyObject *)
2165
do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
2166
294
{
2167
294
    if (bytes != Py_None) {
2168
294
        return do_xstrip(self, striptype, bytes);
2169
294
    }
2170
0
    return do_strip(self, striptype);
2171
294
}
2172
2173
/*[clinic input]
2174
@permit_long_docstring_body
2175
bytes.strip
2176
2177
    bytes: object = None
2178
    /
2179
2180
Strip leading and trailing bytes contained in the argument.
2181
2182
If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2183
[clinic start generated code]*/
2184
2185
static PyObject *
2186
bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
2187
/*[clinic end generated code: output=c7c228d3bd104a1b input=71904cd278c0ee03]*/
2188
0
{
2189
0
    return do_argstrip(self, BOTHSTRIP, bytes);
2190
0
}
2191
2192
/*[clinic input]
2193
bytes.lstrip
2194
2195
    bytes: object = None
2196
    /
2197
2198
Strip leading bytes contained in the argument.
2199
2200
If the argument is omitted or None, strip leading  ASCII whitespace.
2201
[clinic start generated code]*/
2202
2203
static PyObject *
2204
bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2205
/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2206
0
{
2207
0
    return do_argstrip(self, LEFTSTRIP, bytes);
2208
0
}
2209
2210
/*[clinic input]
2211
bytes.rstrip
2212
2213
    bytes: object = None
2214
    /
2215
2216
Strip trailing bytes contained in the argument.
2217
2218
If the argument is omitted or None, strip trailing ASCII whitespace.
2219
[clinic start generated code]*/
2220
2221
static PyObject *
2222
bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2223
/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2224
294
{
2225
294
    return do_argstrip(self, RIGHTSTRIP, bytes);
2226
294
}
2227
2228
2229
/*[clinic input]
2230
@permit_long_summary
2231
bytes.count = bytes.find
2232
2233
Return the number of non-overlapping occurrences of subsection 'sub' in bytes B[start:end].
2234
[clinic start generated code]*/
2235
2236
static PyObject *
2237
bytes_count_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2238
                 Py_ssize_t end)
2239
/*[clinic end generated code: output=9848140b9be17d0f input=bb2f136f83f0d30e]*/
2240
6.18M
{
2241
6.18M
    return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2242
6.18M
                           sub, start, end);
2243
6.18M
}
2244
2245
2246
/*[clinic input]
2247
bytes.translate
2248
2249
    table: object
2250
        Translation table, which must be a bytes object of length 256.
2251
    /
2252
    delete as deletechars: object(c_default="NULL") = b''
2253
2254
Return a copy with each character mapped by the given translation table.
2255
2256
All characters occurring in the optional argument delete are removed.
2257
The remaining characters are mapped through the given translation table.
2258
[clinic start generated code]*/
2259
2260
static PyObject *
2261
bytes_translate_impl(PyBytesObject *self, PyObject *table,
2262
                     PyObject *deletechars)
2263
/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2264
0
{
2265
0
    const char *input;
2266
0
    char *output;
2267
0
    Py_buffer table_view = {NULL, NULL};
2268
0
    Py_buffer del_table_view = {NULL, NULL};
2269
0
    const char *table_chars;
2270
0
    Py_ssize_t i, c, changed = 0;
2271
0
    PyObject *input_obj = (PyObject*)self;
2272
0
    const char *output_start, *del_table_chars=NULL;
2273
0
    Py_ssize_t inlen, tablen, dellen = 0;
2274
0
    PyObject *result;
2275
0
    int trans_table[256];
2276
2277
0
    if (PyBytes_Check(table)) {
2278
0
        table_chars = PyBytes_AS_STRING(table);
2279
0
        tablen = PyBytes_GET_SIZE(table);
2280
0
    }
2281
0
    else if (table == Py_None) {
2282
0
        table_chars = NULL;
2283
0
        tablen = 256;
2284
0
    }
2285
0
    else {
2286
0
        if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2287
0
            return NULL;
2288
0
        table_chars = table_view.buf;
2289
0
        tablen = table_view.len;
2290
0
    }
2291
2292
0
    if (tablen != 256) {
2293
0
        PyErr_SetString(PyExc_ValueError,
2294
0
          "translation table must be 256 characters long");
2295
0
        PyBuffer_Release(&table_view);
2296
0
        return NULL;
2297
0
    }
2298
2299
0
    if (deletechars != NULL) {
2300
0
        if (PyBytes_Check(deletechars)) {
2301
0
            del_table_chars = PyBytes_AS_STRING(deletechars);
2302
0
            dellen = PyBytes_GET_SIZE(deletechars);
2303
0
        }
2304
0
        else {
2305
0
            if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2306
0
                PyBuffer_Release(&table_view);
2307
0
                return NULL;
2308
0
            }
2309
0
            del_table_chars = del_table_view.buf;
2310
0
            dellen = del_table_view.len;
2311
0
        }
2312
0
    }
2313
0
    else {
2314
0
        del_table_chars = NULL;
2315
0
        dellen = 0;
2316
0
    }
2317
2318
0
    inlen = PyBytes_GET_SIZE(input_obj);
2319
0
    result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2320
0
    if (result == NULL) {
2321
0
        PyBuffer_Release(&del_table_view);
2322
0
        PyBuffer_Release(&table_view);
2323
0
        return NULL;
2324
0
    }
2325
0
    output_start = output = PyBytes_AS_STRING(result);
2326
0
    input = PyBytes_AS_STRING(input_obj);
2327
2328
0
    if (dellen == 0 && table_chars != NULL) {
2329
        /* If no deletions are required, use faster code */
2330
0
        for (i = inlen; --i >= 0; ) {
2331
0
            c = Py_CHARMASK(*input++);
2332
0
            *output++ = table_chars[c];
2333
0
        }
2334
        /* Check if anything changed (for returning original object) */
2335
        /* We save this check until the end so that the compiler will */
2336
        /* unroll the loop above leading to MUCH faster code. */
2337
0
        if (PyBytes_CheckExact(input_obj)) {
2338
0
            if (memcmp(PyBytes_AS_STRING(input_obj), output_start, inlen) == 0) {
2339
0
                Py_SETREF(result, Py_NewRef(input_obj));
2340
0
            }
2341
0
        }
2342
0
        PyBuffer_Release(&del_table_view);
2343
0
        PyBuffer_Release(&table_view);
2344
0
        return result;
2345
0
    }
2346
2347
0
    if (table_chars == NULL) {
2348
0
        for (i = 0; i < 256; i++)
2349
0
            trans_table[i] = Py_CHARMASK(i);
2350
0
    } else {
2351
0
        for (i = 0; i < 256; i++)
2352
0
            trans_table[i] = Py_CHARMASK(table_chars[i]);
2353
0
    }
2354
0
    PyBuffer_Release(&table_view);
2355
2356
0
    for (i = 0; i < dellen; i++)
2357
0
        trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2358
0
    PyBuffer_Release(&del_table_view);
2359
2360
0
    for (i = inlen; --i >= 0; ) {
2361
0
        c = Py_CHARMASK(*input++);
2362
0
        if (trans_table[c] != -1)
2363
0
            if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2364
0
                continue;
2365
0
        changed = 1;
2366
0
    }
2367
0
    if (!changed && PyBytes_CheckExact(input_obj)) {
2368
0
        Py_DECREF(result);
2369
0
        return Py_NewRef(input_obj);
2370
0
    }
2371
    /* Fix the size of the resulting byte string */
2372
0
    if (inlen > 0)
2373
0
        _PyBytes_Resize(&result, output - output_start);
2374
0
    return result;
2375
0
}
2376
2377
2378
/*[clinic input]
2379
2380
@permit_long_summary
2381
@permit_long_docstring_body
2382
@staticmethod
2383
bytes.maketrans
2384
2385
    frm: Py_buffer
2386
    to: Py_buffer
2387
    /
2388
2389
Return a translation table usable for the bytes or bytearray translate method.
2390
2391
The returned table will be one where each byte in frm is mapped to the byte at
2392
the same position in to.
2393
2394
The bytes objects frm and to must be of the same length.
2395
[clinic start generated code]*/
2396
2397
static PyObject *
2398
bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2399
/*[clinic end generated code: output=a36f6399d4b77f6f input=a06b75f44d933fb3]*/
2400
9
{
2401
9
    return _Py_bytes_maketrans(frm, to);
2402
9
}
2403
2404
2405
/*[clinic input]
2406
bytes.replace
2407
2408
    old: Py_buffer
2409
    new: Py_buffer
2410
    /
2411
    count: Py_ssize_t = -1
2412
        Maximum number of occurrences to replace.
2413
        -1 (the default value) means replace all occurrences.
2414
2415
Return a copy with all occurrences of substring old replaced by new.
2416
2417
If count is given, only the first count occurrences are replaced.
2418
If count is not specified or -1, then all occurrences are replaced.
2419
[clinic start generated code]*/
2420
2421
static PyObject *
2422
bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2423
                   Py_ssize_t count)
2424
/*[clinic end generated code: output=994fa588b6b9c104 input=cdf3cf8639297745]*/
2425
30.7k
{
2426
30.7k
    return stringlib_replace((PyObject *)self,
2427
30.7k
                             (const char *)old->buf, old->len,
2428
30.7k
                             (const char *)new->buf, new->len, count);
2429
30.7k
}
2430
2431
/** End DALKE **/
2432
2433
/*[clinic input]
2434
bytes.removeprefix as bytes_removeprefix
2435
2436
    prefix: Py_buffer
2437
    /
2438
2439
Return a bytes object with the given prefix string removed if present.
2440
2441
If the bytes starts with the prefix string, return bytes[len(prefix):].
2442
Otherwise, return a copy of the original bytes.
2443
[clinic start generated code]*/
2444
2445
static PyObject *
2446
bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2447
/*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2448
0
{
2449
0
    const char *self_start = PyBytes_AS_STRING(self);
2450
0
    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2451
0
    const char *prefix_start = prefix->buf;
2452
0
    Py_ssize_t prefix_len = prefix->len;
2453
2454
0
    if (self_len >= prefix_len
2455
0
        && prefix_len > 0
2456
0
        && memcmp(self_start, prefix_start, prefix_len) == 0)
2457
0
    {
2458
0
        return PyBytes_FromStringAndSize(self_start + prefix_len,
2459
0
                                         self_len - prefix_len);
2460
0
    }
2461
2462
0
    if (PyBytes_CheckExact(self)) {
2463
0
        return Py_NewRef(self);
2464
0
    }
2465
2466
0
    return PyBytes_FromStringAndSize(self_start, self_len);
2467
0
}
2468
2469
/*[clinic input]
2470
bytes.removesuffix as bytes_removesuffix
2471
2472
    suffix: Py_buffer
2473
    /
2474
2475
Return a bytes object with the given suffix string removed if present.
2476
2477
If the bytes ends with the suffix string and that suffix is not empty,
2478
return bytes[:-len(prefix)].  Otherwise, return a copy of the original
2479
bytes.
2480
[clinic start generated code]*/
2481
2482
static PyObject *
2483
bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2484
/*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2485
0
{
2486
0
    const char *self_start = PyBytes_AS_STRING(self);
2487
0
    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2488
0
    const char *suffix_start = suffix->buf;
2489
0
    Py_ssize_t suffix_len = suffix->len;
2490
2491
0
    if (self_len >= suffix_len
2492
0
        && suffix_len > 0
2493
0
        && memcmp(self_start + self_len - suffix_len,
2494
0
                  suffix_start, suffix_len) == 0)
2495
0
    {
2496
0
        return PyBytes_FromStringAndSize(self_start,
2497
0
                                         self_len - suffix_len);
2498
0
    }
2499
2500
0
    if (PyBytes_CheckExact(self)) {
2501
0
        return Py_NewRef(self);
2502
0
    }
2503
2504
0
    return PyBytes_FromStringAndSize(self_start, self_len);
2505
0
}
2506
2507
/*[clinic input]
2508
@permit_long_summary
2509
@text_signature "($self, prefix[, start[, end]], /)"
2510
bytes.startswith
2511
2512
    prefix as subobj: object
2513
        A bytes or a tuple of bytes to try.
2514
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2515
        Optional start position. Default: start of the bytes.
2516
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2517
        Optional stop position. Default: end of the bytes.
2518
    /
2519
2520
Return True if the bytes starts with the specified prefix, False otherwise.
2521
[clinic start generated code]*/
2522
2523
static PyObject *
2524
bytes_startswith_impl(PyBytesObject *self, PyObject *subobj,
2525
                      Py_ssize_t start, Py_ssize_t end)
2526
/*[clinic end generated code: output=b1e8da1cbd528e8c input=a14efd070f15be80]*/
2527
1.93M
{
2528
1.93M
    return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2529
1.93M
                                subobj, start, end);
2530
1.93M
}
2531
2532
/*[clinic input]
2533
@permit_long_summary
2534
@text_signature "($self, suffix[, start[, end]], /)"
2535
bytes.endswith
2536
2537
    suffix as subobj: object
2538
        A bytes or a tuple of bytes to try.
2539
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2540
         Optional start position. Default: start of the bytes.
2541
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2542
         Optional stop position. Default: end of the bytes.
2543
    /
2544
2545
Return True if the bytes ends with the specified suffix, False otherwise.
2546
[clinic start generated code]*/
2547
2548
static PyObject *
2549
bytes_endswith_impl(PyBytesObject *self, PyObject *subobj, Py_ssize_t start,
2550
                    Py_ssize_t end)
2551
/*[clinic end generated code: output=038b633111f3629d input=49e383eaaf292713]*/
2552
315
{
2553
315
    return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2554
315
                              subobj, start, end);
2555
315
}
2556
2557
2558
/*[clinic input]
2559
bytes.decode
2560
2561
    encoding: str(c_default="NULL") = 'utf-8'
2562
        The encoding with which to decode the bytes.
2563
    errors: str(c_default="NULL") = 'strict'
2564
        The error handling scheme to use for the handling of decoding errors.
2565
        The default is 'strict' meaning that decoding errors raise a
2566
        UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2567
        as well as any other name registered with codecs.register_error that
2568
        can handle UnicodeDecodeErrors.
2569
2570
Decode the bytes using the codec registered for encoding.
2571
[clinic start generated code]*/
2572
2573
static PyObject *
2574
bytes_decode_impl(PyBytesObject *self, const char *encoding,
2575
                  const char *errors)
2576
/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2577
22.0M
{
2578
22.0M
    return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2579
22.0M
}
2580
2581
2582
/*[clinic input]
2583
@permit_long_docstring_body
2584
bytes.splitlines
2585
2586
    keepends: bool = False
2587
2588
Return a list of the lines in the bytes, breaking at line boundaries.
2589
2590
Line breaks are not included in the resulting list unless keepends is given and
2591
true.
2592
[clinic start generated code]*/
2593
2594
static PyObject *
2595
bytes_splitlines_impl(PyBytesObject *self, int keepends)
2596
/*[clinic end generated code: output=3484149a5d880ffb input=d17968d2a355fe55]*/
2597
0
{
2598
0
    return stringlib_splitlines(
2599
0
        (PyObject*) self, PyBytes_AS_STRING(self),
2600
0
        PyBytes_GET_SIZE(self), keepends
2601
0
        );
2602
0
}
2603
2604
/*[clinic input]
2605
@classmethod
2606
bytes.fromhex
2607
2608
    string: object
2609
    /
2610
2611
Create a bytes object from a string of hexadecimal numbers.
2612
2613
Spaces between two numbers are accepted.
2614
Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2615
[clinic start generated code]*/
2616
2617
static PyObject *
2618
bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2619
/*[clinic end generated code: output=0973acc63661bb2e input=f37d98ed51088a21]*/
2620
30.1k
{
2621
30.1k
    PyObject *result = _PyBytes_FromHex(string, 0);
2622
30.1k
    if (type != &PyBytes_Type && result != NULL) {
2623
0
        Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
2624
0
    }
2625
30.1k
    return result;
2626
30.1k
}
2627
2628
PyObject*
2629
_PyBytes_FromHex(PyObject *string, int use_bytearray)
2630
30.1k
{
2631
30.1k
    Py_ssize_t hexlen, invalid_char;
2632
30.1k
    unsigned int top, bot;
2633
30.1k
    const Py_UCS1 *str, *start, *end;
2634
30.1k
    PyBytesWriter *writer = NULL;
2635
30.1k
    Py_buffer view;
2636
30.1k
    view.obj = NULL;
2637
2638
30.1k
    if (PyUnicode_Check(string)) {
2639
30.1k
        hexlen = PyUnicode_GET_LENGTH(string);
2640
2641
30.1k
        if (!PyUnicode_IS_ASCII(string)) {
2642
0
            const void *data = PyUnicode_DATA(string);
2643
0
            int kind = PyUnicode_KIND(string);
2644
0
            Py_ssize_t i;
2645
2646
            /* search for the first non-ASCII character */
2647
0
            for (i = 0; i < hexlen; i++) {
2648
0
                if (PyUnicode_READ(kind, data, i) >= 128)
2649
0
                    break;
2650
0
            }
2651
0
            invalid_char = i;
2652
0
            goto error;
2653
0
        }
2654
2655
30.1k
        assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2656
30.1k
        str = PyUnicode_1BYTE_DATA(string);
2657
30.1k
    }
2658
0
    else if (PyObject_CheckBuffer(string)) {
2659
0
        if (PyObject_GetBuffer(string, &view, PyBUF_SIMPLE) != 0) {
2660
0
            return NULL;
2661
0
        }
2662
0
        hexlen = view.len;
2663
0
        str = view.buf;
2664
0
    }
2665
0
    else {
2666
0
        PyErr_Format(PyExc_TypeError,
2667
0
                     "fromhex() argument must be str or bytes-like, not %T",
2668
0
                     string);
2669
0
        return NULL;
2670
0
    }
2671
2672
    /* This overestimates if there are spaces */
2673
30.1k
    if (use_bytearray) {
2674
0
        writer = _PyBytesWriter_CreateByteArray(hexlen / 2);
2675
0
    }
2676
30.1k
    else {
2677
30.1k
        writer = PyBytesWriter_Create(hexlen / 2);
2678
30.1k
    }
2679
30.1k
    if (writer == NULL) {
2680
0
        goto release_buffer;
2681
0
    }
2682
30.1k
    char *buf = PyBytesWriter_GetData(writer);
2683
2684
30.1k
    start = str;
2685
30.1k
    end = str + hexlen;
2686
60.2k
    while (str < end) {
2687
        /* skip over spaces in the input */
2688
30.1k
        if (Py_ISSPACE(*str)) {
2689
0
            do {
2690
0
                str++;
2691
0
            } while (Py_ISSPACE(*str));
2692
0
            if (str >= end)
2693
0
                break;
2694
0
        }
2695
2696
30.1k
        top = _PyLong_DigitValue[*str];
2697
30.1k
        if (top >= 16) {
2698
0
            invalid_char = str - start;
2699
0
            goto error;
2700
0
        }
2701
30.1k
        str++;
2702
2703
30.1k
        bot = _PyLong_DigitValue[*str];
2704
30.1k
        if (bot >= 16) {
2705
            /* Check if we had a second digit */
2706
0
            if (str >= end){
2707
0
                invalid_char = -1;
2708
0
            } else {
2709
0
                invalid_char = str - start;
2710
0
            }
2711
0
            goto error;
2712
0
        }
2713
30.1k
        str++;
2714
2715
30.1k
        *buf++ = (unsigned char)((top << 4) + bot);
2716
30.1k
    }
2717
2718
30.1k
    if (view.obj != NULL) {
2719
0
       PyBuffer_Release(&view);
2720
0
    }
2721
30.1k
    return PyBytesWriter_FinishWithPointer(writer, buf);
2722
2723
0
  error:
2724
0
    if (invalid_char == -1) {
2725
0
        PyErr_SetString(PyExc_ValueError,
2726
0
                        "fromhex() arg must contain an even number of hexadecimal digits");
2727
0
    } else {
2728
0
        PyErr_Format(PyExc_ValueError,
2729
0
                     "non-hexadecimal number found in "
2730
0
                     "fromhex() arg at position %zd", invalid_char);
2731
0
    }
2732
0
    PyBytesWriter_Discard(writer);
2733
2734
0
  release_buffer:
2735
0
    if (view.obj != NULL) {
2736
0
        PyBuffer_Release(&view);
2737
0
    }
2738
0
    return NULL;
2739
0
}
2740
2741
/*[clinic input]
2742
bytes.hex
2743
2744
    sep: object = NULL
2745
        An optional single character or byte to separate hex bytes.
2746
    bytes_per_sep: Py_ssize_t = 1
2747
        How many bytes between separators.  Positive values count from the
2748
        right, negative values count from the left.
2749
2750
Create a string of hexadecimal numbers from a bytes object.
2751
2752
Example:
2753
>>> value = b'\xb9\x01\xef'
2754
>>> value.hex()
2755
'b901ef'
2756
>>> value.hex(':')
2757
'b9:01:ef'
2758
>>> value.hex(':', 2)
2759
'b9:01ef'
2760
>>> value.hex(':', -2)
2761
'b901:ef'
2762
[clinic start generated code]*/
2763
2764
static PyObject *
2765
bytes_hex_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t bytes_per_sep)
2766
/*[clinic end generated code: output=588821f02cb9d8f5 input=bd8eceb755d8230f]*/
2767
0
{
2768
0
    const char *argbuf = PyBytes_AS_STRING(self);
2769
0
    Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2770
0
    return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2771
0
}
2772
2773
static PyObject *
2774
bytes_getnewargs(PyObject *op, PyObject *Py_UNUSED(dummy))
2775
0
{
2776
0
    PyBytesObject *v = _PyBytes_CAST(op);
2777
0
    return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2778
0
}
2779
2780
2781
static PyMethodDef
2782
bytes_methods[] = {
2783
    {"__getnewargs__", bytes_getnewargs,  METH_NOARGS},
2784
    BYTES___BYTES___METHODDEF
2785
    {"capitalize", stringlib_capitalize, METH_NOARGS,
2786
     _Py_capitalize__doc__},
2787
    STRINGLIB_CENTER_METHODDEF
2788
    BYTES_COUNT_METHODDEF
2789
    BYTES_DECODE_METHODDEF
2790
    BYTES_ENDSWITH_METHODDEF
2791
    STRINGLIB_EXPANDTABS_METHODDEF
2792
    BYTES_FIND_METHODDEF
2793
    BYTES_FROMHEX_METHODDEF
2794
    BYTES_HEX_METHODDEF
2795
    BYTES_INDEX_METHODDEF
2796
    {"isalnum", stringlib_isalnum, METH_NOARGS,
2797
     _Py_isalnum__doc__},
2798
    {"isalpha", stringlib_isalpha, METH_NOARGS,
2799
     _Py_isalpha__doc__},
2800
    {"isascii", stringlib_isascii, METH_NOARGS,
2801
     _Py_isascii__doc__},
2802
    {"isdigit", stringlib_isdigit, METH_NOARGS,
2803
     _Py_isdigit__doc__},
2804
    {"islower", stringlib_islower, METH_NOARGS,
2805
     _Py_islower__doc__},
2806
    {"isspace", stringlib_isspace, METH_NOARGS,
2807
     _Py_isspace__doc__},
2808
    {"istitle", stringlib_istitle, METH_NOARGS,
2809
     _Py_istitle__doc__},
2810
    {"isupper", stringlib_isupper, METH_NOARGS,
2811
     _Py_isupper__doc__},
2812
    BYTES_JOIN_METHODDEF
2813
    STRINGLIB_LJUST_METHODDEF
2814
    {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2815
    BYTES_LSTRIP_METHODDEF
2816
    BYTES_MAKETRANS_METHODDEF
2817
    BYTES_PARTITION_METHODDEF
2818
    BYTES_REPLACE_METHODDEF
2819
    BYTES_REMOVEPREFIX_METHODDEF
2820
    BYTES_REMOVESUFFIX_METHODDEF
2821
    BYTES_RFIND_METHODDEF
2822
    BYTES_RINDEX_METHODDEF
2823
    STRINGLIB_RJUST_METHODDEF
2824
    BYTES_RPARTITION_METHODDEF
2825
    BYTES_RSPLIT_METHODDEF
2826
    BYTES_RSTRIP_METHODDEF
2827
    BYTES_SPLIT_METHODDEF
2828
    BYTES_SPLITLINES_METHODDEF
2829
    BYTES_STARTSWITH_METHODDEF
2830
    BYTES_STRIP_METHODDEF
2831
    {"swapcase", stringlib_swapcase, METH_NOARGS,
2832
     _Py_swapcase__doc__},
2833
    {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2834
    BYTES_TRANSLATE_METHODDEF
2835
    {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2836
    STRINGLIB_ZFILL_METHODDEF
2837
    {NULL,     NULL}                         /* sentinel */
2838
};
2839
2840
static PyObject *
2841
bytes_mod(PyObject *self, PyObject *arg)
2842
0
{
2843
0
    if (!PyBytes_Check(self)) {
2844
0
        Py_RETURN_NOTIMPLEMENTED;
2845
0
    }
2846
0
    return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2847
0
                             arg, 0);
2848
0
}
2849
2850
static PyNumberMethods bytes_as_number = {
2851
    0,              /*nb_add*/
2852
    0,              /*nb_subtract*/
2853
    0,              /*nb_multiply*/
2854
    bytes_mod,      /*nb_remainder*/
2855
};
2856
2857
static PyObject *
2858
bytes_subtype_new(PyTypeObject *, PyObject *);
2859
2860
/*[clinic input]
2861
@classmethod
2862
bytes.__new__ as bytes_new
2863
2864
    source as x: object = NULL
2865
    encoding: str = NULL
2866
    errors: str = NULL
2867
2868
[clinic start generated code]*/
2869
2870
static PyObject *
2871
bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
2872
               const char *errors)
2873
/*[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]*/
2874
15.4M
{
2875
15.4M
    PyObject *bytes;
2876
15.4M
    PyObject *func;
2877
15.4M
    Py_ssize_t size;
2878
2879
15.4M
    if (x == NULL) {
2880
0
        if (encoding != NULL || errors != NULL) {
2881
0
            PyErr_SetString(PyExc_TypeError,
2882
0
                            encoding != NULL ?
2883
0
                            "encoding without a string argument" :
2884
0
                            "errors without a string argument");
2885
0
            return NULL;
2886
0
        }
2887
0
        bytes = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
2888
0
    }
2889
15.4M
    else if (encoding != NULL) {
2890
        /* Encode via the codec registry */
2891
361k
        if (!PyUnicode_Check(x)) {
2892
0
            PyErr_SetString(PyExc_TypeError,
2893
0
                            "encoding without a string argument");
2894
0
            return NULL;
2895
0
        }
2896
361k
        bytes = PyUnicode_AsEncodedString(x, encoding, errors);
2897
361k
    }
2898
15.0M
    else if (errors != NULL) {
2899
0
        PyErr_SetString(PyExc_TypeError,
2900
0
                        PyUnicode_Check(x) ?
2901
0
                        "string argument without an encoding" :
2902
0
                        "errors without a string argument");
2903
0
        return NULL;
2904
0
    }
2905
    /* We'd like to call PyObject_Bytes here, but we need to check for an
2906
       integer argument before deferring to PyBytes_FromObject, something
2907
       PyObject_Bytes doesn't do. */
2908
15.0M
    else if ((func = _PyObject_LookupSpecial(x, &_Py_ID(__bytes__))) != NULL) {
2909
45.2k
        bytes = _PyObject_CallNoArgs(func);
2910
45.2k
        Py_DECREF(func);
2911
45.2k
        if (bytes == NULL)
2912
0
            return NULL;
2913
45.2k
        if (!PyBytes_Check(bytes)) {
2914
0
            PyErr_Format(PyExc_TypeError,
2915
0
                         "%T.__bytes__() must return a bytes, not %T",
2916
0
                         x, bytes);
2917
0
            Py_DECREF(bytes);
2918
0
            return NULL;
2919
0
        }
2920
45.2k
    }
2921
15.0M
    else if (PyErr_Occurred())
2922
0
        return NULL;
2923
15.0M
    else if (PyUnicode_Check(x)) {
2924
0
        PyErr_SetString(PyExc_TypeError,
2925
0
                        "string argument without an encoding");
2926
0
        return NULL;
2927
0
    }
2928
    /* Is it an integer? */
2929
15.0M
    else if (_PyIndex_Check(x)) {
2930
0
        size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2931
0
        if (size == -1 && PyErr_Occurred()) {
2932
0
            if (!PyErr_ExceptionMatches(PyExc_TypeError))
2933
0
                return NULL;
2934
0
            PyErr_Clear();  /* fall through */
2935
0
            bytes = PyBytes_FromObject(x);
2936
0
        }
2937
0
        else {
2938
0
            if (size < 0) {
2939
0
                PyErr_SetString(PyExc_ValueError, "negative count");
2940
0
                return NULL;
2941
0
            }
2942
0
            bytes = _PyBytes_FromSize(size, 1);
2943
0
        }
2944
0
    }
2945
15.0M
    else {
2946
15.0M
        bytes = PyBytes_FromObject(x);
2947
15.0M
    }
2948
2949
15.4M
    if (bytes != NULL && type != &PyBytes_Type) {
2950
0
        Py_SETREF(bytes, bytes_subtype_new(type, bytes));
2951
0
    }
2952
2953
15.4M
    return bytes;
2954
15.4M
}
2955
2956
static PyObject*
2957
_PyBytes_FromBuffer(PyObject *x)
2958
15.0M
{
2959
15.0M
    Py_buffer view;
2960
15.0M
    if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2961
0
        return NULL;
2962
2963
15.0M
    PyBytesWriter *writer = PyBytesWriter_Create(view.len);
2964
15.0M
    if (writer == NULL) {
2965
0
        goto fail;
2966
0
    }
2967
2968
15.0M
    if (PyBuffer_ToContiguous(PyBytesWriter_GetData(writer),
2969
15.0M
                              &view, view.len, 'C') < 0) {
2970
0
        goto fail;
2971
0
    }
2972
2973
15.0M
    PyBuffer_Release(&view);
2974
15.0M
    return PyBytesWriter_Finish(writer);
2975
2976
0
fail:
2977
0
    PyBytesWriter_Discard(writer);
2978
0
    PyBuffer_Release(&view);
2979
0
    return NULL;
2980
15.0M
}
2981
2982
static PyObject*
2983
_PyBytes_FromList(PyObject *x)
2984
11.7k
{
2985
11.7k
    Py_ssize_t size = PyList_GET_SIZE(x);
2986
11.7k
    PyBytesWriter *writer = PyBytesWriter_Create(size);
2987
11.7k
    if (writer == NULL) {
2988
0
        return NULL;
2989
0
    }
2990
11.7k
    char *str = PyBytesWriter_GetData(writer);
2991
11.7k
    size = _PyBytesWriter_GetAllocated(writer);
2992
2993
683k
    for (Py_ssize_t i = 0; i < PyList_GET_SIZE(x); i++) {
2994
671k
        PyObject *item = PyList_GET_ITEM(x, i);
2995
671k
        Py_INCREF(item);
2996
671k
        Py_ssize_t value = PyNumber_AsSsize_t(item, NULL);
2997
671k
        Py_DECREF(item);
2998
671k
        if (value == -1 && PyErr_Occurred())
2999
0
            goto error;
3000
3001
671k
        if (value < 0 || value >= 256) {
3002
0
            PyErr_SetString(PyExc_ValueError,
3003
0
                            "bytes must be in range(0, 256)");
3004
0
            goto error;
3005
0
        }
3006
3007
671k
        if (i >= size) {
3008
0
            str = _PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str);
3009
0
            if (str == NULL) {
3010
0
                goto error;
3011
0
            }
3012
0
            size = _PyBytesWriter_GetAllocated(writer);
3013
0
        }
3014
671k
        *str++ = (char) value;
3015
671k
    }
3016
11.7k
    return PyBytesWriter_FinishWithPointer(writer, str);
3017
3018
0
error:
3019
0
    PyBytesWriter_Discard(writer);
3020
0
    return NULL;
3021
11.7k
}
3022
3023
static PyObject*
3024
_PyBytes_FromTuple(PyObject *x)
3025
0
{
3026
0
    Py_ssize_t i, size = PyTuple_GET_SIZE(x);
3027
0
    Py_ssize_t value;
3028
0
    PyObject *item;
3029
3030
0
    PyBytesWriter *writer = PyBytesWriter_Create(size);
3031
0
    if (writer == NULL) {
3032
0
        return NULL;
3033
0
    }
3034
0
    char *str = PyBytesWriter_GetData(writer);
3035
3036
0
    for (i = 0; i < size; i++) {
3037
0
        item = PyTuple_GET_ITEM(x, i);
3038
0
        value = PyNumber_AsSsize_t(item, NULL);
3039
0
        if (value == -1 && PyErr_Occurred())
3040
0
            goto error;
3041
3042
0
        if (value < 0 || value >= 256) {
3043
0
            PyErr_SetString(PyExc_ValueError,
3044
0
                            "bytes must be in range(0, 256)");
3045
0
            goto error;
3046
0
        }
3047
0
        *str++ = (char) value;
3048
0
    }
3049
0
    return PyBytesWriter_Finish(writer);
3050
3051
0
  error:
3052
0
    PyBytesWriter_Discard(writer);
3053
0
    return NULL;
3054
0
}
3055
3056
static PyObject *
3057
_PyBytes_FromIterator(PyObject *it, PyObject *x)
3058
184
{
3059
184
    Py_ssize_t i, size;
3060
3061
    /* For iterator version, create a bytes object and resize as needed */
3062
184
    size = PyObject_LengthHint(x, 64);
3063
184
    if (size == -1 && PyErr_Occurred())
3064
0
        return NULL;
3065
3066
184
    PyBytesWriter *writer = PyBytesWriter_Create(size);
3067
184
    if (writer == NULL) {
3068
0
        return NULL;
3069
0
    }
3070
184
    char *str = PyBytesWriter_GetData(writer);
3071
184
    size = _PyBytesWriter_GetAllocated(writer);
3072
3073
    /* Run the iterator to exhaustion */
3074
1.41k
    for (i = 0; ; i++) {
3075
1.41k
        PyObject *item;
3076
1.41k
        Py_ssize_t value;
3077
3078
        /* Get the next item */
3079
1.41k
        item = PyIter_Next(it);
3080
1.41k
        if (item == NULL) {
3081
184
            if (PyErr_Occurred())
3082
0
                goto error;
3083
184
            break;
3084
184
        }
3085
3086
        /* Interpret it as an int (__index__) */
3087
1.23k
        value = PyNumber_AsSsize_t(item, NULL);
3088
1.23k
        Py_DECREF(item);
3089
1.23k
        if (value == -1 && PyErr_Occurred())
3090
0
            goto error;
3091
3092
        /* Range check */
3093
1.23k
        if (value < 0 || value >= 256) {
3094
0
            PyErr_SetString(PyExc_ValueError,
3095
0
                            "bytes must be in range(0, 256)");
3096
0
            goto error;
3097
0
        }
3098
3099
        /* Append the byte */
3100
1.23k
        if (i >= size) {
3101
0
            str = _PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str);
3102
0
            if (str == NULL) {
3103
0
                goto error;
3104
0
            }
3105
0
            size = _PyBytesWriter_GetAllocated(writer);
3106
0
        }
3107
1.23k
        *str++ = (char) value;
3108
1.23k
    }
3109
184
    return PyBytesWriter_FinishWithPointer(writer, str);
3110
3111
0
  error:
3112
0
    PyBytesWriter_Discard(writer);
3113
0
    return NULL;
3114
184
}
3115
3116
PyObject *
3117
PyBytes_FromObject(PyObject *x)
3118
15.0M
{
3119
15.0M
    PyObject *it, *result;
3120
3121
15.0M
    if (x == NULL) {
3122
0
        PyErr_BadInternalCall();
3123
0
        return NULL;
3124
0
    }
3125
3126
15.0M
    if (PyBytes_CheckExact(x)) {
3127
0
        return Py_NewRef(x);
3128
0
    }
3129
3130
    /* Use the modern buffer interface */
3131
15.0M
    if (PyObject_CheckBuffer(x))
3132
15.0M
        return _PyBytes_FromBuffer(x);
3133
3134
11.8k
    if (PyList_CheckExact(x))
3135
11.7k
        return _PyBytes_FromList(x);
3136
3137
184
    if (PyTuple_CheckExact(x))
3138
0
        return _PyBytes_FromTuple(x);
3139
3140
184
    if (!PyUnicode_Check(x)) {
3141
184
        it = PyObject_GetIter(x);
3142
184
        if (it != NULL) {
3143
184
            result = _PyBytes_FromIterator(it, x);
3144
184
            Py_DECREF(it);
3145
184
            return result;
3146
184
        }
3147
0
        if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
3148
0
            return NULL;
3149
0
        }
3150
0
    }
3151
3152
0
    PyErr_Format(PyExc_TypeError,
3153
0
                 "cannot convert '%.200s' object to bytes",
3154
0
                 Py_TYPE(x)->tp_name);
3155
0
    return NULL;
3156
184
}
3157
3158
/* This allocator is needed for subclasses don't want to use __new__.
3159
 * See https://github.com/python/cpython/issues/91020#issuecomment-1096793239
3160
 *
3161
 * This allocator will be removed when ob_shash is removed.
3162
 */
3163
static PyObject *
3164
bytes_alloc(PyTypeObject *self, Py_ssize_t nitems)
3165
0
{
3166
0
    PyBytesObject *obj = (PyBytesObject*)PyType_GenericAlloc(self, nitems);
3167
0
    if (obj == NULL) {
3168
0
        return NULL;
3169
0
    }
3170
0
    set_ob_shash(obj, -1);
3171
0
    return (PyObject*)obj;
3172
0
}
3173
3174
static PyObject *
3175
bytes_subtype_new(PyTypeObject *type, PyObject *tmp)
3176
0
{
3177
0
    PyObject *pnew;
3178
0
    Py_ssize_t n;
3179
3180
0
    assert(PyType_IsSubtype(type, &PyBytes_Type));
3181
0
    assert(PyBytes_Check(tmp));
3182
0
    n = PyBytes_GET_SIZE(tmp);
3183
0
    pnew = type->tp_alloc(type, n);
3184
0
    if (pnew != NULL) {
3185
0
        memcpy(PyBytes_AS_STRING(pnew),
3186
0
                  PyBytes_AS_STRING(tmp), n+1);
3187
0
        set_ob_shash((PyBytesObject *)pnew,
3188
0
            get_ob_shash((PyBytesObject *)tmp));
3189
0
    }
3190
0
    return pnew;
3191
0
}
3192
3193
PyDoc_STRVAR(bytes_doc,
3194
"bytes(iterable_of_ints) -> bytes\n\
3195
bytes(string, encoding[, errors]) -> bytes\n\
3196
bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
3197
bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3198
bytes() -> empty bytes object\n\
3199
\n\
3200
Construct an immutable array of bytes from:\n\
3201
  - an iterable yielding integers in range(256)\n\
3202
  - a text string encoded using the specified encoding\n\
3203
  - any object implementing the buffer API.\n\
3204
  - an integer");
3205
3206
static PyObject *bytes_iter(PyObject *seq);
3207
3208
3209
static _PyObjectIndexPair
3210
bytes_iteritem(PyObject *obj, Py_ssize_t index)
3211
2.06k
{
3212
2.06k
    PyBytesObject *a = _PyBytes_CAST(obj);
3213
2.06k
    if (index >= Py_SIZE(a)) {
3214
53
        return (_PyObjectIndexPair) { .object = NULL, .index = index };
3215
53
    }
3216
2.00k
    PyObject *l = _PyLong_FromUnsignedChar((unsigned char)a->ob_sval[index]);
3217
2.00k
    return (_PyObjectIndexPair) { .object = l, .index = index + 1 };
3218
2.06k
}
3219
3220
PyTypeObject PyBytes_Type = {
3221
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3222
    "bytes",
3223
    PyBytesObject_SIZE,
3224
    sizeof(char),
3225
    0,                                          /* tp_dealloc */
3226
    0,                                          /* tp_vectorcall_offset */
3227
    0,                                          /* tp_getattr */
3228
    0,                                          /* tp_setattr */
3229
    0,                                          /* tp_as_async */
3230
    bytes_repr,                                 /* tp_repr */
3231
    &bytes_as_number,                           /* tp_as_number */
3232
    &bytes_as_sequence,                         /* tp_as_sequence */
3233
    &bytes_as_mapping,                          /* tp_as_mapping */
3234
    bytes_hash,                                 /* tp_hash */
3235
    0,                                          /* tp_call */
3236
    bytes_str,                                  /* tp_str */
3237
    PyObject_GenericGetAttr,                    /* tp_getattro */
3238
    0,                                          /* tp_setattro */
3239
    &bytes_as_buffer,                           /* tp_as_buffer */
3240
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3241
        Py_TPFLAGS_BYTES_SUBCLASS |
3242
        _Py_TPFLAGS_MATCH_SELF,               /* tp_flags */
3243
    bytes_doc,                                  /* tp_doc */
3244
    0,                                          /* tp_traverse */
3245
    0,                                          /* tp_clear */
3246
    bytes_richcompare,                          /* tp_richcompare */
3247
    0,                                          /* tp_weaklistoffset */
3248
    bytes_iter,                                 /* tp_iter */
3249
    0,                                          /* tp_iternext */
3250
    bytes_methods,                              /* tp_methods */
3251
    0,                                          /* tp_members */
3252
    0,                                          /* tp_getset */
3253
    0,                                          /* tp_base */
3254
    0,                                          /* tp_dict */
3255
    0,                                          /* tp_descr_get */
3256
    0,                                          /* tp_descr_set */
3257
    0,                                          /* tp_dictoffset */
3258
    0,                                          /* tp_init */
3259
    bytes_alloc,                                /* tp_alloc */
3260
    bytes_new,                                  /* tp_new */
3261
    PyObject_Free,                              /* tp_free */
3262
    .tp_version_tag = _Py_TYPE_VERSION_BYTES,
3263
    ._tp_iteritem = bytes_iteritem,
3264
};
3265
3266
void
3267
PyBytes_Concat(PyObject **pv, PyObject *w)
3268
0
{
3269
0
    assert(pv != NULL);
3270
0
    if (*pv == NULL)
3271
0
        return;
3272
0
    if (w == NULL) {
3273
0
        Py_CLEAR(*pv);
3274
0
        return;
3275
0
    }
3276
3277
0
    if (_PyObject_IsUniquelyReferenced(*pv) && PyBytes_CheckExact(*pv)) {
3278
        /* Only one reference, so we can resize in place */
3279
0
        Py_ssize_t oldsize;
3280
0
        Py_buffer wb;
3281
3282
0
        if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
3283
0
            PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3284
0
                         Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3285
0
            Py_CLEAR(*pv);
3286
0
            return;
3287
0
        }
3288
3289
0
        oldsize = PyBytes_GET_SIZE(*pv);
3290
0
        if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3291
0
            PyErr_NoMemory();
3292
0
            goto error;
3293
0
        }
3294
0
        if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3295
0
            goto error;
3296
3297
0
        memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3298
0
        PyBuffer_Release(&wb);
3299
0
        return;
3300
3301
0
      error:
3302
0
        PyBuffer_Release(&wb);
3303
0
        Py_CLEAR(*pv);
3304
0
        return;
3305
0
    }
3306
3307
0
    else {
3308
        /* Multiple references, need to create new object */
3309
0
        PyObject *v;
3310
0
        v = _PyBytes_Concat(*pv, w);
3311
0
        Py_SETREF(*pv, v);
3312
0
    }
3313
0
}
3314
3315
void
3316
PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
3317
0
{
3318
0
    PyBytes_Concat(pv, w);
3319
0
    Py_XDECREF(w);
3320
0
}
3321
3322
3323
/* The following function breaks the notion that bytes are immutable:
3324
   it changes the size of a bytes object.  You can think of it
3325
   as creating a new bytes object and destroying the old one, only
3326
   more efficiently.
3327
   Note that if there's not enough memory to resize the bytes object, the
3328
   original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
3329
   memory" exception is set, and -1 is returned.  Else (on success) 0 is
3330
   returned, and the value in *pv may or may not be the same as on input.
3331
   As always, an extra byte is allocated for a trailing \0 byte (newsize
3332
   does *not* include that), and a trailing \0 byte is stored.
3333
*/
3334
3335
int
3336
_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3337
23.2M
{
3338
23.2M
    PyObject *v;
3339
23.2M
    PyBytesObject *sv;
3340
23.2M
    v = *pv;
3341
23.2M
    if (!PyBytes_Check(v) || newsize < 0) {
3342
0
        *pv = 0;
3343
0
        Py_DECREF(v);
3344
0
        PyErr_BadInternalCall();
3345
0
        return -1;
3346
0
    }
3347
23.2M
    Py_ssize_t oldsize = PyBytes_GET_SIZE(v);
3348
23.2M
    if (oldsize == newsize) {
3349
        /* return early if newsize equals to v->ob_size */
3350
1.66M
        return 0;
3351
1.66M
    }
3352
21.5M
    if (oldsize == 0) {
3353
17.2M
        *pv = _PyBytes_FromSize(newsize, 0);
3354
17.2M
        Py_DECREF(v);
3355
17.2M
        return (*pv == NULL) ? -1 : 0;
3356
17.2M
    }
3357
4.27M
    if (newsize == 0) {
3358
8.40k
        *pv = bytes_get_empty();
3359
8.40k
        Py_DECREF(v);
3360
8.40k
        return 0;
3361
8.40k
    }
3362
4.27M
    if (!_PyObject_IsUniquelyReferenced(v)) {
3363
0
        if (oldsize < newsize) {
3364
0
            *pv = _PyBytes_FromSize(newsize, 0);
3365
0
            if (*pv) {
3366
0
                memcpy(PyBytes_AS_STRING(*pv), PyBytes_AS_STRING(v), oldsize);
3367
0
            }
3368
0
        }
3369
0
        else {
3370
0
            *pv = PyBytes_FromStringAndSize(PyBytes_AS_STRING(v), newsize);
3371
0
        }
3372
0
        Py_DECREF(v);
3373
0
        return (*pv == NULL) ? -1 : 0;
3374
0
    }
3375
3376
#ifdef Py_TRACE_REFS
3377
    _Py_ForgetReference(v);
3378
#endif
3379
4.27M
    _PyReftracerTrack(v, PyRefTracer_DESTROY);
3380
4.27M
    *pv = (PyObject *)
3381
4.27M
        PyObject_Realloc(v, PyBytesObject_SIZE + newsize);
3382
4.27M
    if (*pv == NULL) {
3383
#ifdef Py_REF_DEBUG
3384
        _Py_DecRefTotal(_PyThreadState_GET());
3385
#endif
3386
0
        PyObject_Free(v);
3387
0
        PyErr_NoMemory();
3388
0
        return -1;
3389
0
    }
3390
4.27M
    _Py_NewReferenceNoTotal(*pv);
3391
4.27M
    sv = (PyBytesObject *) *pv;
3392
4.27M
    Py_SET_SIZE(sv, newsize);
3393
4.27M
    sv->ob_sval[newsize] = '\0';
3394
4.27M
    set_ob_shash(sv, -1);          /* invalidate cached hash value */
3395
4.27M
    return 0;
3396
4.27M
}
3397
3398
3399
/*********************** Bytes Iterator ****************************/
3400
3401
typedef struct {
3402
    PyObject_HEAD
3403
    Py_ssize_t it_index;
3404
    PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3405
} striterobject;
3406
3407
580
#define _striterobject_CAST(op)  ((striterobject *)(op))
3408
3409
static void
3410
striter_dealloc(PyObject *op)
3411
44
{
3412
44
    striterobject *it = _striterobject_CAST(op);
3413
44
    _PyObject_GC_UNTRACK(it);
3414
44
    Py_XDECREF(it->it_seq);
3415
44
    PyObject_GC_Del(it);
3416
44
}
3417
3418
static int
3419
striter_traverse(PyObject *op, visitproc visit, void *arg)
3420
0
{
3421
0
    striterobject *it = _striterobject_CAST(op);
3422
0
    Py_VISIT(it->it_seq);
3423
0
    return 0;
3424
0
}
3425
3426
static PyObject *
3427
striter_next(PyObject *op)
3428
536
{
3429
536
    striterobject *it = _striterobject_CAST(op);
3430
536
    PyBytesObject *seq;
3431
3432
536
    assert(it != NULL);
3433
536
    seq = it->it_seq;
3434
536
    if (seq == NULL)
3435
0
        return NULL;
3436
536
    assert(PyBytes_Check(seq));
3437
3438
536
    if (it->it_index < PyBytes_GET_SIZE(seq)) {
3439
528
        return _PyLong_FromUnsignedChar(
3440
528
            (unsigned char)seq->ob_sval[it->it_index++]);
3441
528
    }
3442
3443
8
    it->it_seq = NULL;
3444
8
    Py_DECREF(seq);
3445
8
    return NULL;
3446
536
}
3447
3448
static PyObject *
3449
striter_len(PyObject *op, PyObject *Py_UNUSED(ignored))
3450
0
{
3451
0
    striterobject *it = _striterobject_CAST(op);
3452
0
    Py_ssize_t len = 0;
3453
0
    if (it->it_seq)
3454
0
        len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3455
0
    return PyLong_FromSsize_t(len);
3456
0
}
3457
3458
PyDoc_STRVAR(length_hint_doc,
3459
             "Private method returning an estimate of len(list(it)).");
3460
3461
static PyObject *
3462
striter_reduce(PyObject *op, PyObject *Py_UNUSED(ignored))
3463
0
{
3464
0
    PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));
3465
3466
    /* _PyEval_GetBuiltin can invoke arbitrary code,
3467
     * call must be before access of iterator pointers.
3468
     * see issue #101765 */
3469
0
    striterobject *it = _striterobject_CAST(op);
3470
0
    if (it->it_seq != NULL) {
3471
0
        return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
3472
0
    } else {
3473
0
        return Py_BuildValue("N(())", iter);
3474
0
    }
3475
0
}
3476
3477
PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3478
3479
static PyObject *
3480
striter_setstate(PyObject *op, PyObject *state)
3481
0
{
3482
0
    Py_ssize_t index = PyLong_AsSsize_t(state);
3483
0
    if (index == -1 && PyErr_Occurred())
3484
0
        return NULL;
3485
0
    striterobject *it = _striterobject_CAST(op);
3486
0
    if (it->it_seq != NULL) {
3487
0
        if (index < 0)
3488
0
            index = 0;
3489
0
        else if (index > PyBytes_GET_SIZE(it->it_seq))
3490
0
            index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3491
0
        it->it_index = index;
3492
0
    }
3493
0
    Py_RETURN_NONE;
3494
0
}
3495
3496
PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3497
3498
static PyMethodDef striter_methods[] = {
3499
    {"__length_hint__", striter_len, METH_NOARGS, length_hint_doc},
3500
    {"__reduce__",      striter_reduce, METH_NOARGS, reduce_doc},
3501
    {"__setstate__",    striter_setstate, METH_O, setstate_doc},
3502
    {NULL,              NULL}           /* sentinel */
3503
};
3504
3505
PyTypeObject PyBytesIter_Type = {
3506
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3507
    "bytes_iterator",                           /* tp_name */
3508
    sizeof(striterobject),                      /* tp_basicsize */
3509
    0,                                          /* tp_itemsize */
3510
    /* methods */
3511
    striter_dealloc,                            /* tp_dealloc */
3512
    0,                                          /* tp_vectorcall_offset */
3513
    0,                                          /* tp_getattr */
3514
    0,                                          /* tp_setattr */
3515
    0,                                          /* tp_as_async */
3516
    0,                                          /* tp_repr */
3517
    0,                                          /* tp_as_number */
3518
    0,                                          /* tp_as_sequence */
3519
    0,                                          /* tp_as_mapping */
3520
    0,                                          /* tp_hash */
3521
    0,                                          /* tp_call */
3522
    0,                                          /* tp_str */
3523
    PyObject_GenericGetAttr,                    /* tp_getattro */
3524
    0,                                          /* tp_setattro */
3525
    0,                                          /* tp_as_buffer */
3526
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3527
    0,                                          /* tp_doc */
3528
    striter_traverse,                           /* tp_traverse */
3529
    0,                                          /* tp_clear */
3530
    0,                                          /* tp_richcompare */
3531
    0,                                          /* tp_weaklistoffset */
3532
    PyObject_SelfIter,                          /* tp_iter */
3533
    striter_next,                               /* tp_iternext */
3534
    striter_methods,                            /* tp_methods */
3535
    0,
3536
};
3537
3538
static PyObject *
3539
bytes_iter(PyObject *seq)
3540
44
{
3541
44
    striterobject *it;
3542
3543
44
    if (!PyBytes_Check(seq)) {
3544
0
        PyErr_BadInternalCall();
3545
0
        return NULL;
3546
0
    }
3547
44
    it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3548
44
    if (it == NULL)
3549
0
        return NULL;
3550
44
    it->it_index = 0;
3551
44
    it->it_seq = (PyBytesObject *)Py_NewRef(seq);
3552
44
    _PyObject_GC_TRACK(it);
3553
44
    return (PyObject *)it;
3554
44
}
3555
3556
3557
void
3558
_PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
3559
    const char* src, Py_ssize_t len_src)
3560
190k
{
3561
190k
    if (len_dest == 0) {
3562
727
        return;
3563
727
    }
3564
190k
    if (len_src == 1) {
3565
187k
        memset(dest, src[0], len_dest);
3566
187k
    }
3567
2.38k
    else {
3568
2.38k
        if (src != dest) {
3569
2.38k
            memcpy(dest, src, len_src);
3570
2.38k
        }
3571
2.38k
        Py_ssize_t copied = len_src;
3572
5.80k
        while (copied < len_dest) {
3573
3.42k
            Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied);
3574
3.42k
            memcpy(dest + copied, dest, bytes_to_copy);
3575
3.42k
            copied += bytes_to_copy;
3576
3.42k
        }
3577
2.38k
    }
3578
190k
}
3579
3580
3581
// --- PyBytesWriter API -----------------------------------------------------
3582
3583
static inline char*
3584
byteswriter_data(PyBytesWriter *writer)
3585
36.3M
{
3586
36.3M
    return _PyBytesWriter_GetData(writer);
3587
36.3M
}
3588
3589
3590
static inline Py_ssize_t
3591
byteswriter_allocated(PyBytesWriter *writer)
3592
36.1M
{
3593
36.1M
    if (writer->obj == NULL) {
3594
35.3M
        return sizeof(writer->small_buffer);
3595
35.3M
    }
3596
883k
    else if (writer->use_bytearray) {
3597
0
        return PyByteArray_GET_SIZE(writer->obj);
3598
0
    }
3599
883k
    else {
3600
883k
        return PyBytes_GET_SIZE(writer->obj);
3601
883k
    }
3602
36.1M
}
3603
3604
3605
#ifdef MS_WINDOWS
3606
   /* On Windows, overallocate by 50% is the best factor */
3607
#  define OVERALLOCATE_FACTOR 2
3608
#else
3609
   /* On Linux, overallocate by 25% is the best factor */
3610
44.3k
#  define OVERALLOCATE_FACTOR 4
3611
#endif
3612
3613
static inline int
3614
byteswriter_resize(PyBytesWriter *writer, Py_ssize_t size, int resize)
3615
26.2M
{
3616
26.2M
    assert(size >= 0);
3617
3618
26.2M
    Py_ssize_t old_allocated = byteswriter_allocated(writer);
3619
26.2M
    if (size <= old_allocated) {
3620
25.2M
        return 0;
3621
25.2M
    }
3622
3623
1.07M
    if (resize & writer->overallocate) {
3624
22.1k
        if (size <= (PY_SSIZE_T_MAX - size / OVERALLOCATE_FACTOR)) {
3625
22.1k
            size += size / OVERALLOCATE_FACTOR;
3626
22.1k
        }
3627
22.1k
    }
3628
3629
1.07M
    if (writer->obj != NULL) {
3630
22.1k
        if (writer->use_bytearray) {
3631
0
            if (PyByteArray_Resize(writer->obj, size)) {
3632
0
                return -1;
3633
0
            }
3634
0
        }
3635
22.1k
        else {
3636
22.1k
            if (_PyBytes_Resize(&writer->obj, size)) {
3637
0
                return -1;
3638
0
            }
3639
22.1k
        }
3640
22.1k
        assert(writer->obj != NULL);
3641
22.1k
    }
3642
1.05M
    else if (writer->use_bytearray) {
3643
0
        writer->obj = PyByteArray_FromStringAndSize(NULL, size);
3644
0
        if (writer->obj == NULL) {
3645
0
            return -1;
3646
0
        }
3647
0
        if (resize) {
3648
0
            assert((size_t)size > sizeof(writer->small_buffer));
3649
0
            memcpy(PyByteArray_AS_STRING(writer->obj),
3650
0
                   writer->small_buffer,
3651
0
                   sizeof(writer->small_buffer));
3652
0
        }
3653
0
    }
3654
1.05M
    else {
3655
1.05M
        writer->obj = PyBytes_FromStringAndSize(NULL, size);
3656
1.05M
        if (writer->obj == NULL) {
3657
0
            return -1;
3658
0
        }
3659
1.05M
        if (resize) {
3660
0
            assert((size_t)size > sizeof(writer->small_buffer));
3661
0
            memcpy(PyBytes_AS_STRING(writer->obj),
3662
0
                   writer->small_buffer,
3663
0
                   sizeof(writer->small_buffer));
3664
0
        }
3665
1.05M
    }
3666
3667
#ifdef Py_DEBUG
3668
    Py_ssize_t allocated = byteswriter_allocated(writer);
3669
    if (resize && allocated > old_allocated) {
3670
        memset(byteswriter_data(writer) + old_allocated, 0xff,
3671
               allocated - old_allocated);
3672
    }
3673
#endif
3674
3675
1.07M
    return 0;
3676
1.07M
}
3677
3678
3679
static PyBytesWriter*
3680
byteswriter_create(Py_ssize_t size, int use_bytearray)
3681
26.2M
{
3682
26.2M
    if (size < 0) {
3683
0
        PyErr_SetString(PyExc_ValueError, "size must be >= 0");
3684
0
        return NULL;
3685
0
    }
3686
3687
26.2M
    PyBytesWriter *writer = _Py_FREELIST_POP_MEM(bytes_writers);
3688
26.2M
    if (writer == NULL) {
3689
12.7k
        writer = (PyBytesWriter *)PyMem_Malloc(sizeof(PyBytesWriter));
3690
12.7k
        if (writer == NULL) {
3691
0
            PyErr_NoMemory();
3692
0
            return NULL;
3693
0
        }
3694
12.7k
    }
3695
26.2M
    writer->obj = NULL;
3696
26.2M
    writer->size = 0;
3697
26.2M
    writer->use_bytearray = use_bytearray;
3698
26.2M
    writer->overallocate = !use_bytearray;
3699
3700
26.2M
    if (size >= 1) {
3701
26.2M
        if (byteswriter_resize(writer, size, 0) < 0) {
3702
0
            PyBytesWriter_Discard(writer);
3703
0
            return NULL;
3704
0
        }
3705
26.2M
        writer->size = size;
3706
26.2M
    }
3707
#ifdef Py_DEBUG
3708
    memset(byteswriter_data(writer), 0xff, byteswriter_allocated(writer));
3709
#endif
3710
26.2M
    return writer;
3711
26.2M
}
3712
3713
PyBytesWriter*
3714
PyBytesWriter_Create(Py_ssize_t size)
3715
26.2M
{
3716
26.2M
    return byteswriter_create(size, 0);
3717
26.2M
}
3718
3719
PyBytesWriter*
3720
_PyBytesWriter_CreateByteArray(Py_ssize_t size)
3721
0
{
3722
0
    return byteswriter_create(size, 1);
3723
0
}
3724
3725
3726
void
3727
PyBytesWriter_Discard(PyBytesWriter *writer)
3728
26.4M
{
3729
26.4M
    if (writer == NULL) {
3730
138k
        return;
3731
138k
    }
3732
3733
26.2M
    Py_XDECREF(writer->obj);
3734
26.2M
    _Py_FREELIST_FREE(bytes_writers, writer, PyMem_Free);
3735
26.2M
}
3736
3737
3738
PyObject*
3739
PyBytesWriter_FinishWithSize(PyBytesWriter *writer, Py_ssize_t size)
3740
25.3M
{
3741
25.3M
    PyObject *result;
3742
25.3M
    if (size == 0) {
3743
48.5k
        result = bytes_get_empty();
3744
48.5k
    }
3745
25.2M
    else if (writer->obj != NULL) {
3746
950k
        if (writer->use_bytearray) {
3747
0
            if (size != PyByteArray_GET_SIZE(writer->obj)) {
3748
0
                if (PyByteArray_Resize(writer->obj, size)) {
3749
0
                    goto error;
3750
0
                }
3751
0
            }
3752
0
        }
3753
950k
        else {
3754
950k
            if (size != PyBytes_GET_SIZE(writer->obj)) {
3755
907k
                if (_PyBytes_Resize(&writer->obj, size)) {
3756
0
                    goto error;
3757
0
                }
3758
907k
            }
3759
950k
        }
3760
950k
        result = writer->obj;
3761
950k
        writer->obj = NULL;
3762
950k
    }
3763
24.3M
    else if (writer->use_bytearray) {
3764
0
        result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3765
0
    }
3766
24.3M
    else {
3767
24.3M
        result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3768
24.3M
    }
3769
25.3M
    PyBytesWriter_Discard(writer);
3770
25.3M
    return result;
3771
3772
0
error:
3773
0
    PyBytesWriter_Discard(writer);
3774
0
    return NULL;
3775
25.3M
}
3776
3777
PyObject*
3778
PyBytesWriter_Finish(PyBytesWriter *writer)
3779
15.3M
{
3780
15.3M
    return PyBytesWriter_FinishWithSize(writer, writer->size);
3781
15.3M
}
3782
3783
3784
PyObject*
3785
PyBytesWriter_FinishWithPointer(PyBytesWriter *writer, void *buf)
3786
9.88M
{
3787
9.88M
    Py_ssize_t size = (char*)buf - byteswriter_data(writer);
3788
9.88M
    if (size < 0 || size > byteswriter_allocated(writer)) {
3789
0
        PyBytesWriter_Discard(writer);
3790
0
        PyErr_SetString(PyExc_ValueError, "invalid end pointer");
3791
0
        return NULL;
3792
0
    }
3793
3794
9.88M
    return PyBytesWriter_FinishWithSize(writer, size);
3795
9.88M
}
3796
3797
3798
void*
3799
PyBytesWriter_GetData(PyBytesWriter *writer)
3800
26.5M
{
3801
26.5M
    return byteswriter_data(writer);
3802
26.5M
}
3803
3804
3805
Py_ssize_t
3806
PyBytesWriter_GetSize(PyBytesWriter *writer)
3807
0
{
3808
0
    return _PyBytesWriter_GetSize(writer);
3809
0
}
3810
3811
3812
static Py_ssize_t
3813
_PyBytesWriter_GetAllocated(PyBytesWriter *writer)
3814
11.8k
{
3815
11.8k
    return byteswriter_allocated(writer);
3816
11.8k
}
3817
3818
3819
int
3820
PyBytesWriter_Resize(PyBytesWriter *writer, Py_ssize_t size)
3821
0
{
3822
0
    if (size < 0) {
3823
0
        PyErr_SetString(PyExc_ValueError, "size must be >= 0");
3824
0
        return -1;
3825
0
    }
3826
0
    if (byteswriter_resize(writer, size, 1) < 0) {
3827
0
        return -1;
3828
0
    }
3829
0
    writer->size = size;
3830
0
    return 0;
3831
0
}
3832
3833
3834
static void*
3835
_PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size,
3836
                                      void *data)
3837
0
{
3838
0
    Py_ssize_t pos = (char*)data - byteswriter_data(writer);
3839
0
    if (PyBytesWriter_Resize(writer, size) < 0) {
3840
0
        return NULL;
3841
0
    }
3842
0
    return byteswriter_data(writer) + pos;
3843
0
}
3844
3845
3846
int
3847
PyBytesWriter_Grow(PyBytesWriter *writer, Py_ssize_t size)
3848
22.1k
{
3849
22.1k
    if (size < 0 && writer->size + size < 0) {
3850
0
        PyErr_SetString(PyExc_ValueError, "invalid size");
3851
0
        return -1;
3852
0
    }
3853
22.1k
    if (size > PY_SSIZE_T_MAX - writer->size) {
3854
0
        PyErr_NoMemory();
3855
0
        return -1;
3856
0
    }
3857
22.1k
    size = writer->size + size;
3858
3859
22.1k
    if (byteswriter_resize(writer, size, 1) < 0) {
3860
0
        return -1;
3861
0
    }
3862
22.1k
    writer->size = size;
3863
22.1k
    return 0;
3864
22.1k
}
3865
3866
3867
void*
3868
PyBytesWriter_GrowAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size,
3869
                                   void *buf)
3870
0
{
3871
0
    Py_ssize_t pos = (char*)buf - byteswriter_data(writer);
3872
0
    if (PyBytesWriter_Grow(writer, size) < 0) {
3873
0
        return NULL;
3874
0
    }
3875
0
    return byteswriter_data(writer) + pos;
3876
0
}
3877
3878
3879
int
3880
PyBytesWriter_WriteBytes(PyBytesWriter *writer,
3881
                         const void *bytes, Py_ssize_t size)
3882
0
{
3883
0
    if (size < 0) {
3884
0
        size_t len = strlen(bytes);
3885
0
        if (len > (size_t)PY_SSIZE_T_MAX) {
3886
0
            PyErr_NoMemory();
3887
0
            return -1;
3888
0
        }
3889
0
        size = (Py_ssize_t)len;
3890
0
    }
3891
3892
0
    Py_ssize_t pos = writer->size;
3893
0
    if (PyBytesWriter_Grow(writer, size) < 0) {
3894
0
        return -1;
3895
0
    }
3896
0
    char *buf = byteswriter_data(writer);
3897
0
    memcpy(buf + pos, bytes, size);
3898
0
    return 0;
3899
0
}
3900
3901
3902
int
3903
PyBytesWriter_Format(PyBytesWriter *writer, const char *format, ...)
3904
0
{
3905
0
    Py_ssize_t pos = writer->size;
3906
0
    if (PyBytesWriter_Grow(writer, strlen(format)) < 0) {
3907
0
        return -1;
3908
0
    }
3909
3910
0
    va_list vargs;
3911
0
    va_start(vargs, format);
3912
0
    char *buf = bytes_fromformat(writer, pos, format, vargs);
3913
0
    va_end(vargs);
3914
3915
0
    Py_ssize_t size = buf - byteswriter_data(writer);
3916
0
    return PyBytesWriter_Resize(writer, size);
3917
0
}