Coverage Report

Created: 2026-02-26 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/bytesobject.c
Line
Count
Source
1
/* bytes object implementation */
2
3
#include "Python.h"
4
#include "pycore_abstract.h"      // _PyIndex_Check()
5
#include "pycore_bytes_methods.h" // _Py_bytes_startswith()
6
#include "pycore_bytesobject.h"   // _PyBytes_Find(), _PyBytes_Repeat()
7
#include "pycore_call.h"          // _PyObject_CallNoArgs()
8
#include "pycore_ceval.h"         // _PyEval_GetBuiltin()
9
#include "pycore_format.h"        // F_LJUST
10
#include "pycore_freelist.h"      // _Py_FREELIST_FREE()
11
#include "pycore_global_objects.h"// _Py_GET_GLOBAL_OBJECT()
12
#include "pycore_initconfig.h"    // _PyStatus_OK()
13
#include "pycore_long.h"          // _PyLong_DigitValue
14
#include "pycore_object.h"        // _PyObject_GC_TRACK
15
#include "pycore_pymem.h"         // PYMEM_CLEANBYTE
16
#include "pycore_strhex.h"        // _Py_strhex_with_sep()
17
#include "pycore_unicodeobject.h" // _PyUnicode_FormatLong()
18
19
#include <stddef.h>
20
21
/*[clinic input]
22
class bytes "PyBytesObject *" "&PyBytes_Type"
23
[clinic start generated code]*/
24
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
25
26
#include "clinic/bytesobject.c.h"
27
28
215M
#define PyBytesObject_SIZE _PyBytesObject_SIZE
29
30
/* Forward declaration */
31
static void* _PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer,
32
                                                   Py_ssize_t size, void *data);
33
static Py_ssize_t _PyBytesWriter_GetAllocated(PyBytesWriter *writer);
34
35
36
11.3M
#define CHARACTERS _Py_SINGLETON(bytes_characters)
37
#define CHARACTER(ch) \
38
11.3M
     ((PyBytesObject *)&(CHARACTERS[ch]));
39
6.53M
#define EMPTY (&_Py_SINGLETON(bytes_empty))
40
41
42
// Return a reference to the immortal empty bytes string singleton.
43
static inline PyObject* bytes_get_empty(void)
44
6.53M
{
45
6.53M
    PyObject *empty = &EMPTY->ob_base.ob_base;
46
6.53M
    assert(_Py_IsImmortal(empty));
47
6.53M
    return empty;
48
6.53M
}
49
50
51
static inline void
52
set_ob_shash(PyBytesObject *a, Py_hash_t hash)
53
148M
{
54
148M
_Py_COMP_DIAG_PUSH
55
148M
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
56
#ifdef Py_GIL_DISABLED
57
    _Py_atomic_store_ssize_relaxed(&a->ob_shash, hash);
58
#else
59
148M
    a->ob_shash = hash;
60
148M
#endif
61
148M
_Py_COMP_DIAG_POP
62
148M
}
63
64
static inline Py_hash_t
65
get_ob_shash(PyBytesObject *a)
66
74.3M
{
67
74.3M
_Py_COMP_DIAG_PUSH
68
74.3M
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
69
#ifdef Py_GIL_DISABLED
70
    return _Py_atomic_load_ssize_relaxed(&a->ob_shash);
71
#else
72
74.3M
    return a->ob_shash;
73
74.3M
#endif
74
74.3M
_Py_COMP_DIAG_POP
75
74.3M
}
76
77
78
/*
79
   For PyBytes_FromString(), the parameter 'str' points to a null-terminated
80
   string containing exactly 'size' bytes.
81
82
   For PyBytes_FromStringAndSize(), the parameter 'str' is
83
   either NULL or else points to a string containing at least 'size' bytes.
84
   For PyBytes_FromStringAndSize(), the string in the 'str' parameter does
85
   not have to be null-terminated.  (Therefore it is safe to construct a
86
   substring by calling 'PyBytes_FromStringAndSize(origstring, substrlen)'.)
87
   If 'str' is NULL then PyBytes_FromStringAndSize() will allocate 'size+1'
88
   bytes (setting the last byte to the null terminating character) and you can
89
   fill in the data yourself.  If 'str' is non-NULL then the resulting
90
   PyBytes object must be treated as immutable and you must not fill in nor
91
   alter the data yourself, since the strings may be shared.
92
93
   The PyObject member 'op->ob_size', which denotes the number of "extra
94
   items" in a variable-size object, will contain the number of bytes
95
   allocated for string data, not counting the null terminating character.
96
   It is therefore equal to the 'size' parameter (for
97
   PyBytes_FromStringAndSize()) or the length of the string in the 'str'
98
   parameter (for PyBytes_FromString()).
99
*/
100
static PyObject *
101
_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
102
106M
{
103
106M
    PyBytesObject *op;
104
106M
    assert(size >= 0);
105
106
106M
    if (size == 0) {
107
0
        return bytes_get_empty();
108
0
    }
109
110
106M
    if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
111
0
        PyErr_SetString(PyExc_OverflowError,
112
0
                        "byte string is too large");
113
0
        return NULL;
114
0
    }
115
116
    /* Inline PyObject_NewVar */
117
106M
    if (use_calloc)
118
0
        op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
119
106M
    else
120
106M
        op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
121
106M
    if (op == NULL) {
122
0
        return PyErr_NoMemory();
123
0
    }
124
106M
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
125
106M
    set_ob_shash(op, -1);
126
106M
    if (!use_calloc) {
127
106M
        op->ob_sval[size] = '\0';
128
106M
    }
129
106M
    return (PyObject *) op;
130
106M
}
131
132
PyObject *
133
PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
134
123M
{
135
123M
    PyBytesObject *op;
136
123M
    if (size < 0) {
137
0
        PyErr_SetString(PyExc_SystemError,
138
0
            "Negative size passed to PyBytes_FromStringAndSize");
139
0
        return NULL;
140
0
    }
141
123M
    if (size == 1 && str != NULL) {
142
11.3M
        op = CHARACTER(*str & 255);
143
11.3M
        assert(_Py_IsImmortal(op));
144
11.3M
        return (PyObject *)op;
145
11.3M
    }
146
111M
    if (size == 0) {
147
6.37M
        return bytes_get_empty();
148
6.37M
    }
149
150
105M
    op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
151
105M
    if (op == NULL)
152
0
        return NULL;
153
105M
    if (str == NULL)
154
9.52M
        return (PyObject *) op;
155
156
95.9M
    memcpy(op->ob_sval, str, size);
157
95.9M
    return (PyObject *) op;
158
105M
}
159
160
PyObject *
161
PyBytes_FromString(const char *str)
162
768
{
163
768
    size_t size;
164
768
    PyBytesObject *op;
165
166
768
    assert(str != NULL);
167
768
    size = strlen(str);
168
768
    if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
169
0
        PyErr_SetString(PyExc_OverflowError,
170
0
            "byte string is too long");
171
0
        return NULL;
172
0
    }
173
174
768
    if (size == 0) {
175
0
        return bytes_get_empty();
176
0
    }
177
768
    else if (size == 1) {
178
0
        op = CHARACTER(*str & 255);
179
0
        assert(_Py_IsImmortal(op));
180
0
        return (PyObject *)op;
181
0
    }
182
183
    /* Inline PyObject_NewVar */
184
768
    op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
185
768
    if (op == NULL) {
186
0
        return PyErr_NoMemory();
187
0
    }
188
768
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
189
768
    set_ob_shash(op, -1);
190
768
    memcpy(op->ob_sval, str, size+1);
191
768
    return (PyObject *) op;
192
768
}
193
194
195
static char*
196
bytes_fromformat(PyBytesWriter *writer, Py_ssize_t writer_pos,
197
                 const char *format, va_list vargs)
198
0
{
199
0
    const char *f;
200
0
    const char *p;
201
0
    Py_ssize_t prec;
202
0
    int longflag;
203
0
    int size_tflag;
204
    /* Longest 64-bit formatted numbers:
205
       - "18446744073709551615\0" (21 bytes)
206
       - "-9223372036854775808\0" (21 bytes)
207
       Decimal takes the most space (it isn't enough for octal.)
208
209
       Longest 64-bit pointer representation:
210
       "0xffffffffffffffff\0" (19 bytes). */
211
0
    char buffer[21];
212
213
0
    char *s = (char*)PyBytesWriter_GetData(writer) + writer_pos;
214
215
0
#define WRITE_BYTES_LEN(str, len_expr) \
216
0
    do { \
217
0
        size_t len = (len_expr); \
218
0
        s = PyBytesWriter_GrowAndUpdatePointer(writer, len, s); \
219
0
        if (s == NULL) { \
220
0
            goto error; \
221
0
        } \
222
0
        memcpy(s, (str), len); \
223
0
        s += len; \
224
0
    } while (0)
225
0
#define WRITE_BYTES(str) WRITE_BYTES_LEN(str, strlen(str))
226
227
0
    for (f = format; *f; f++) {
228
0
        if (*f != '%') {
229
0
            *s++ = *f;
230
0
            continue;
231
0
        }
232
233
0
        p = f++;
234
235
        /* ignore the width (ex: 10 in "%10s") */
236
0
        while (Py_ISDIGIT(*f))
237
0
            f++;
238
239
        /* parse the precision (ex: 10 in "%.10s") */
240
0
        prec = 0;
241
0
        if (*f == '.') {
242
0
            f++;
243
0
            for (; Py_ISDIGIT(*f); f++) {
244
0
                prec = (prec * 10) + (*f - '0');
245
0
            }
246
0
        }
247
248
0
        while (*f && *f != '%' && !Py_ISALPHA(*f))
249
0
            f++;
250
251
        /* handle the long flag ('l'), but only for %ld and %lu.
252
           others can be added when necessary. */
253
0
        longflag = 0;
254
0
        if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
255
0
            longflag = 1;
256
0
            ++f;
257
0
        }
258
259
        /* handle the size_t flag ('z'). */
260
0
        size_tflag = 0;
261
0
        if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
262
0
            size_tflag = 1;
263
0
            ++f;
264
0
        }
265
266
0
        switch (*f) {
267
0
        case 'c':
268
0
        {
269
0
            int c = va_arg(vargs, int);
270
0
            if (c < 0 || c > 255) {
271
0
                PyErr_SetString(PyExc_OverflowError,
272
0
                                "PyBytes_FromFormatV(): %c format "
273
0
                                "expects an integer in range [0; 255]");
274
0
                goto error;
275
0
            }
276
0
            *s++ = (unsigned char)c;
277
0
            break;
278
0
        }
279
280
0
        case 'd':
281
0
            if (longflag) {
282
0
                sprintf(buffer, "%ld", va_arg(vargs, long));
283
0
            }
284
0
            else if (size_tflag) {
285
0
                sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
286
0
            }
287
0
            else {
288
0
                sprintf(buffer, "%d", va_arg(vargs, int));
289
0
            }
290
0
            assert(strlen(buffer) < sizeof(buffer));
291
0
            WRITE_BYTES(buffer);
292
0
            break;
293
294
0
        case 'u':
295
0
            if (longflag) {
296
0
                sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
297
0
            }
298
0
            else if (size_tflag) {
299
0
                sprintf(buffer, "%zu", va_arg(vargs, size_t));
300
0
            }
301
0
            else {
302
0
                sprintf(buffer, "%u", va_arg(vargs, unsigned int));
303
0
            }
304
0
            assert(strlen(buffer) < sizeof(buffer));
305
0
            WRITE_BYTES(buffer);
306
0
            break;
307
308
0
        case 'i':
309
0
            sprintf(buffer, "%i", va_arg(vargs, int));
310
0
            assert(strlen(buffer) < sizeof(buffer));
311
0
            WRITE_BYTES(buffer);
312
0
            break;
313
314
0
        case 'x':
315
0
            sprintf(buffer, "%x", va_arg(vargs, int));
316
0
            assert(strlen(buffer) < sizeof(buffer));
317
0
            WRITE_BYTES(buffer);
318
0
            break;
319
320
0
        case 's':
321
0
        {
322
0
            Py_ssize_t i;
323
324
0
            p = va_arg(vargs, const char*);
325
0
            if (prec <= 0) {
326
0
                i = strlen(p);
327
0
            }
328
0
            else {
329
0
                i = 0;
330
0
                while (i < prec && p[i]) {
331
0
                    i++;
332
0
                }
333
0
            }
334
0
            WRITE_BYTES_LEN(p, i);
335
0
            break;
336
0
        }
337
338
0
        case 'p':
339
0
            sprintf(buffer, "%p", va_arg(vargs, void*));
340
0
            assert(strlen(buffer) < sizeof(buffer));
341
            /* %p is ill-defined:  ensure leading 0x. */
342
0
            if (buffer[1] == 'X')
343
0
                buffer[1] = 'x';
344
0
            else if (buffer[1] != 'x') {
345
0
                memmove(buffer+2, buffer, strlen(buffer)+1);
346
0
                buffer[0] = '0';
347
0
                buffer[1] = 'x';
348
0
            }
349
0
            WRITE_BYTES(buffer);
350
0
            break;
351
352
0
        case '%':
353
0
            *s++ = '%';
354
0
            break;
355
356
0
        default:
357
            /* invalid format string: copy unformatted string and exit */
358
0
            WRITE_BYTES(p);
359
0
            return s;
360
0
        }
361
0
    }
362
363
0
#undef WRITE_BYTES
364
0
#undef WRITE_BYTES_LEN
365
366
0
    return s;
367
368
0
 error:
369
0
    return NULL;
370
0
}
371
372
373
PyObject *
374
PyBytes_FromFormatV(const char *format, va_list vargs)
375
0
{
376
0
    Py_ssize_t alloc = strlen(format);
377
0
    PyBytesWriter *writer = PyBytesWriter_Create(alloc);
378
0
    if (writer == NULL) {
379
0
        return NULL;
380
0
    }
381
382
0
    char *s = bytes_fromformat(writer, 0, format, vargs);
383
0
    if (s == NULL) {
384
0
        PyBytesWriter_Discard(writer);
385
0
        return NULL;
386
0
    }
387
388
0
    return PyBytesWriter_FinishWithPointer(writer, s);
389
0
}
390
391
392
PyObject *
393
PyBytes_FromFormat(const char *format, ...)
394
0
{
395
0
    PyObject* ret;
396
0
    va_list vargs;
397
398
0
    va_start(vargs, format);
399
0
    ret = PyBytes_FromFormatV(format, vargs);
400
0
    va_end(vargs);
401
0
    return ret;
402
0
}
403
404
405
/* Helpers for formatstring */
406
407
0
#define FORMAT_ERROR(EXC, FMT, ...) do {                                    \
408
0
    if (key != NULL) {                                                      \
409
0
        PyErr_Format((EXC), "format argument %R: " FMT,                     \
410
0
                     key, __VA_ARGS__);                                     \
411
0
    }                                                                       \
412
0
    else if (argidx >= 0) {                                                 \
413
0
        PyErr_Format((EXC), "format argument %zd: " FMT,                    \
414
0
                     argidx, __VA_ARGS__);                                  \
415
0
    }                                                                       \
416
0
    else {                                                                  \
417
0
        PyErr_Format((EXC), "format argument: " FMT, __VA_ARGS__);          \
418
0
    }                                                                       \
419
0
} while (0)
420
421
Py_LOCAL_INLINE(PyObject *)
422
getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx, int allowone)
423
0
{
424
0
    Py_ssize_t argidx = *p_argidx;
425
0
    if (argidx < arglen) {
426
0
        (*p_argidx)++;
427
0
        if (arglen >= 0) {
428
0
            return PyTuple_GetItem(args, argidx);
429
0
        }
430
0
        else if (allowone) {
431
0
            return args;
432
0
        }
433
0
    }
434
0
    PyErr_Format(PyExc_TypeError,
435
0
                 "not enough arguments for format string (got %zd)",
436
0
                 arglen < 0 ? 1 : arglen);
437
0
    return NULL;
438
0
}
439
440
/* Returns a new reference to a PyBytes object, or NULL on failure. */
441
442
static char*
443
formatfloat(PyObject *v, Py_ssize_t argidx, PyObject *key,
444
            int flags, int prec, int type,
445
            PyObject **p_result, PyBytesWriter *writer, char *str)
446
0
{
447
0
    char *p;
448
0
    PyObject *result;
449
0
    double x;
450
0
    size_t len;
451
0
    int dtoa_flags = 0;
452
453
0
    x = PyFloat_AsDouble(v);
454
0
    if (x == -1.0 && PyErr_Occurred()) {
455
0
        if (PyErr_ExceptionMatches(PyExc_TypeError)) {
456
0
            FORMAT_ERROR(PyExc_TypeError,
457
0
                         "%%%c requires a real number, not %T",
458
0
                         type, v);
459
0
        }
460
0
        return NULL;
461
0
    }
462
463
0
    if (prec < 0)
464
0
        prec = 6;
465
466
0
    if (flags & F_ALT) {
467
0
        dtoa_flags |= Py_DTSF_ALT;
468
0
    }
469
0
    p = PyOS_double_to_string(x, type, prec, dtoa_flags, NULL);
470
471
0
    if (p == NULL)
472
0
        return NULL;
473
474
0
    len = strlen(p);
475
0
    if (writer != NULL) {
476
0
        str = PyBytesWriter_GrowAndUpdatePointer(writer, len, str);
477
0
        if (str == NULL) {
478
0
            PyMem_Free(p);
479
0
            return NULL;
480
0
        }
481
0
        memcpy(str, p, len);
482
0
        PyMem_Free(p);
483
0
        str += len;
484
0
        return str;
485
0
    }
486
487
0
    result = PyBytes_FromStringAndSize(p, len);
488
0
    PyMem_Free(p);
489
0
    *p_result = result;
490
0
    return result != NULL ? str : NULL;
491
0
}
492
493
static PyObject *
494
formatlong(PyObject *v, Py_ssize_t argidx, PyObject *key,
495
           int flags, int prec, int type)
496
0
{
497
0
    PyObject *result, *iobj;
498
0
    if (PyLong_Check(v))
499
0
        return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
500
0
    if (PyNumber_Check(v)) {
501
        /* make sure number is a type of integer for o, x, and X */
502
0
        if (type == 'o' || type == 'x' || type == 'X')
503
0
            iobj = _PyNumber_Index(v);
504
0
        else
505
0
            iobj = PyNumber_Long(v);
506
0
        if (iobj != NULL) {
507
0
            assert(PyLong_Check(iobj));
508
0
            result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
509
0
            Py_DECREF(iobj);
510
0
            return result;
511
0
        }
512
0
        if (!PyErr_ExceptionMatches(PyExc_TypeError))
513
0
            return NULL;
514
0
    }
515
0
    FORMAT_ERROR(PyExc_TypeError,
516
0
                 "%%%c requires %s, not %T",
517
0
                 type,
518
0
                 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
519
0
                                                             : "a real number",
520
0
                 v);
521
0
    return NULL;
522
0
}
523
524
static int
525
byte_converter(PyObject *arg, Py_ssize_t argidx, PyObject *key, char *p)
526
0
{
527
0
    if (PyBytes_Check(arg)) {
528
0
        if (PyBytes_GET_SIZE(arg) != 1) {
529
0
            FORMAT_ERROR(PyExc_TypeError,
530
0
                         "%%c requires an integer in range(256) or "
531
0
                         "a single byte, not a bytes object of length %zd",
532
0
                         PyBytes_GET_SIZE(arg));
533
0
            return 0;
534
0
        }
535
0
        *p = PyBytes_AS_STRING(arg)[0];
536
0
        return 1;
537
0
    }
538
0
    else if (PyByteArray_Check(arg)) {
539
0
        if (PyByteArray_GET_SIZE(arg) != 1) {
540
0
            FORMAT_ERROR(PyExc_TypeError,
541
0
                         "%%c requires an integer in range(256) or "
542
0
                         "a single byte, not a bytearray object of length %zd",
543
0
                         PyByteArray_GET_SIZE(arg));
544
0
            return 0;
545
0
        }
546
0
        *p = PyByteArray_AS_STRING(arg)[0];
547
0
        return 1;
548
0
    }
549
0
    else if (PyIndex_Check(arg)) {
550
0
        int overflow;
551
0
        long ival = PyLong_AsLongAndOverflow(arg, &overflow);
552
0
        if (ival == -1 && PyErr_Occurred()) {
553
0
            return 0;
554
0
        }
555
0
        if (!(0 <= ival && ival <= 255)) {
556
            /* this includes an overflow in converting to C long */
557
0
            FORMAT_ERROR(PyExc_OverflowError,
558
0
                         "%%c argument not in range(256)%s", "");
559
0
            return 0;
560
0
        }
561
0
        *p = (char)ival;
562
0
        return 1;
563
0
    }
564
0
    FORMAT_ERROR(PyExc_TypeError,
565
0
                 "%%c requires an integer in range(256) or "
566
0
                 "a single byte, not %T",
567
0
                 arg);
568
0
    return 0;
569
0
}
570
571
static PyObject *_PyBytes_FromBuffer(PyObject *x);
572
573
static PyObject *
574
format_obj(PyObject *v, Py_ssize_t argidx, PyObject *key,
575
           const char **pbuf, Py_ssize_t *plen)
576
0
{
577
0
    PyObject *func, *result;
578
    /* is it a bytes object? */
579
0
    if (PyBytes_Check(v)) {
580
0
        *pbuf = PyBytes_AS_STRING(v);
581
0
        *plen = PyBytes_GET_SIZE(v);
582
0
        return Py_NewRef(v);
583
0
    }
584
0
    if (PyByteArray_Check(v)) {
585
0
        *pbuf = PyByteArray_AS_STRING(v);
586
0
        *plen = PyByteArray_GET_SIZE(v);
587
0
        return Py_NewRef(v);
588
0
    }
589
    /* does it support __bytes__? */
590
0
    func = _PyObject_LookupSpecial(v, &_Py_ID(__bytes__));
591
0
    if (func != NULL) {
592
0
        result = _PyObject_CallNoArgs(func);
593
0
        Py_DECREF(func);
594
0
        if (result == NULL)
595
0
            return NULL;
596
0
        if (!PyBytes_Check(result)) {
597
0
            PyErr_Format(PyExc_TypeError,
598
0
                         "%T.__bytes__() must return a bytes, not %T",
599
0
                         v, result);
600
0
            Py_DECREF(result);
601
0
            return NULL;
602
0
        }
603
0
        *pbuf = PyBytes_AS_STRING(result);
604
0
        *plen = PyBytes_GET_SIZE(result);
605
0
        return result;
606
0
    }
607
    /* does it support buffer protocol? */
608
0
    if (PyObject_CheckBuffer(v)) {
609
        /* maybe we can avoid making a copy of the buffer object here? */
610
0
        result = _PyBytes_FromBuffer(v);
611
0
        if (result == NULL)
612
0
            return NULL;
613
0
        *pbuf = PyBytes_AS_STRING(result);
614
0
        *plen = PyBytes_GET_SIZE(result);
615
0
        return result;
616
0
    }
617
0
    FORMAT_ERROR(PyExc_TypeError,
618
0
                 "%%b requires a bytes-like object, "
619
0
                 "or an object that implements __bytes__, not %T",
620
0
                 v);
621
0
    return NULL;
622
0
}
623
624
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
625
626
PyObject *
627
_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
628
                  PyObject *args, int use_bytearray)
629
0
{
630
0
    const char *fmt;
631
0
    Py_ssize_t arglen, argidx;
632
0
    Py_ssize_t fmtcnt;
633
0
    int args_owned = 0;
634
0
    PyObject *dict = NULL;
635
0
    PyObject *key = NULL;
636
637
0
    if (args == NULL) {
638
0
        PyErr_BadInternalCall();
639
0
        return NULL;
640
0
    }
641
0
    fmt = format;
642
0
    fmtcnt = format_len;
643
644
0
    PyBytesWriter *writer;
645
0
    if (use_bytearray) {
646
0
        writer = _PyBytesWriter_CreateByteArray(fmtcnt);
647
0
    }
648
0
    else {
649
0
        writer = PyBytesWriter_Create(fmtcnt);
650
0
    }
651
0
    if (writer == NULL) {
652
0
        return NULL;
653
0
    }
654
0
    char *res = PyBytesWriter_GetData(writer);
655
656
0
    if (PyTuple_Check(args)) {
657
0
        arglen = PyTuple_GET_SIZE(args);
658
0
        argidx = 0;
659
0
    }
660
0
    else {
661
0
        arglen = -1;
662
0
        argidx = -2;
663
0
    }
664
0
    if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
665
0
        !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
666
0
        !PyByteArray_Check(args)) {
667
0
            dict = args;
668
0
    }
669
670
0
    while (--fmtcnt >= 0) {
671
0
        if (*fmt != '%') {
672
0
            Py_ssize_t len;
673
0
            char *pos;
674
675
0
            pos = (char *)memchr(fmt + 1, '%', fmtcnt);
676
0
            if (pos != NULL)
677
0
                len = pos - fmt;
678
0
            else
679
0
                len = fmtcnt + 1;
680
0
            assert(len != 0);
681
682
0
            memcpy(res, fmt, len);
683
0
            res += len;
684
0
            fmt += len;
685
0
            fmtcnt -= (len - 1);
686
0
        }
687
0
        else {
688
            /* Got a format specifier */
689
0
            int flags = 0;
690
0
            Py_ssize_t width = -1;
691
0
            int prec = -1;
692
0
            int c = '\0';
693
0
            int fill;
694
0
            PyObject *v = NULL;
695
0
            PyObject *temp = NULL;
696
0
            const char *pbuf = NULL;
697
0
            int sign;
698
0
            Py_ssize_t len = 0;
699
0
            char onechar; /* For byte_converter() */
700
0
            Py_ssize_t alloc;
701
702
0
            fmt++;
703
0
            if (*fmt == '%') {
704
0
                *res++ = '%';
705
0
                fmt++;
706
0
                fmtcnt--;
707
0
                continue;
708
0
            }
709
0
            Py_CLEAR(key);
710
0
            const char *fmtstart = fmt;
711
0
            if (*fmt == '(') {
712
0
                const char *keystart;
713
0
                Py_ssize_t keylen;
714
0
                int pcount = 1;
715
716
0
                if (dict == NULL) {
717
0
                    PyErr_Format(PyExc_TypeError,
718
0
                                 "format requires a mapping, not %T",
719
0
                                 args);
720
0
                    goto error;
721
0
                }
722
0
                ++fmt;
723
0
                --fmtcnt;
724
0
                keystart = fmt;
725
                /* Skip over balanced parentheses */
726
0
                while (pcount > 0 && --fmtcnt >= 0) {
727
0
                    if (*fmt == ')')
728
0
                        --pcount;
729
0
                    else if (*fmt == '(')
730
0
                        ++pcount;
731
0
                    fmt++;
732
0
                }
733
0
                keylen = fmt - keystart - 1;
734
0
                if (fmtcnt < 0 || pcount > 0) {
735
0
                    PyErr_Format(PyExc_ValueError,
736
0
                                 "stray %% or incomplete format key "
737
0
                                 "at position %zd",
738
0
                                 (Py_ssize_t)(fmtstart - format - 1));
739
0
                    goto error;
740
0
                }
741
0
                key = PyBytes_FromStringAndSize(keystart,
742
0
                                                 keylen);
743
0
                if (key == NULL)
744
0
                    goto error;
745
0
                if (args_owned) {
746
0
                    Py_DECREF(args);
747
0
                    args_owned = 0;
748
0
                }
749
0
                args = PyObject_GetItem(dict, key);
750
0
                if (args == NULL) {
751
0
                    goto error;
752
0
                }
753
0
                args_owned = 1;
754
0
                arglen = -3;
755
0
                argidx = -4;
756
0
            }
757
0
            else {
758
0
                if (arglen < -1) {
759
0
                    PyErr_Format(PyExc_ValueError,
760
0
                                 "format requires a parenthesised mapping key "
761
0
                                 "at position %zd",
762
0
                                 (Py_ssize_t)(fmtstart - format - 1));
763
0
                    goto error;
764
0
                }
765
0
            }
766
767
            /* Parse flags. Example: "%+i" => flags=F_SIGN. */
768
0
            while (--fmtcnt >= 0) {
769
0
                switch (c = *fmt++) {
770
0
                case '-': flags |= F_LJUST; continue;
771
0
                case '+': flags |= F_SIGN; continue;
772
0
                case ' ': flags |= F_BLANK; continue;
773
0
                case '#': flags |= F_ALT; continue;
774
0
                case '0': flags |= F_ZERO; continue;
775
0
                }
776
0
                break;
777
0
            }
778
779
            /* Parse width. Example: "%10s" => width=10 */
780
0
            if (c == '*') {
781
0
                if (arglen < -1) {
782
0
                    PyErr_Format(PyExc_ValueError,
783
0
                            "* cannot be used with a parenthesised mapping key "
784
0
                            "at position %zd",
785
0
                            (Py_ssize_t)(fmtstart - format - 1));
786
0
                    goto error;
787
0
                }
788
0
                v = getnextarg(args, arglen, &argidx, 0);
789
0
                if (v == NULL)
790
0
                    goto error;
791
0
                if (!PyLong_Check(v)) {
792
0
                    FORMAT_ERROR(PyExc_TypeError, "* requires int, not %T", v);
793
0
                    goto error;
794
0
                }
795
0
                width = PyLong_AsSsize_t(v);
796
0
                if (width == -1 && PyErr_Occurred()) {
797
0
                    if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
798
0
                        FORMAT_ERROR(PyExc_OverflowError,
799
0
                                     "too big for width%s", "");
800
0
                    }
801
0
                    goto error;
802
0
                }
803
0
                if (width < 0) {
804
0
                    flags |= F_LJUST;
805
0
                    width = -width;
806
0
                }
807
0
                if (--fmtcnt >= 0)
808
0
                    c = *fmt++;
809
0
            }
810
0
            else if (c >= 0 && Py_ISDIGIT(c)) {
811
0
                width = c - '0';
812
0
                while (--fmtcnt >= 0) {
813
0
                    c = Py_CHARMASK(*fmt++);
814
0
                    if (!Py_ISDIGIT(c))
815
0
                        break;
816
0
                    if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
817
0
                        PyErr_Format(PyExc_ValueError,
818
0
                                     "width too big at position %zd",
819
0
                                     (Py_ssize_t)(fmtstart - format - 1));
820
0
                        goto error;
821
0
                    }
822
0
                    width = width*10 + (c - '0');
823
0
                }
824
0
            }
825
826
            /* Parse precision. Example: "%.3f" => prec=3 */
827
0
            if (c == '.') {
828
0
                prec = 0;
829
0
                if (--fmtcnt >= 0)
830
0
                    c = *fmt++;
831
0
                if (c == '*') {
832
0
                    if (arglen < -1) {
833
0
                        PyErr_Format(PyExc_ValueError,
834
0
                                "* cannot be used with a parenthesised mapping key "
835
0
                                "at position %zd",
836
0
                                (Py_ssize_t)(fmtstart - format - 1));
837
0
                        goto error;
838
0
                    }
839
0
                    v = getnextarg(args, arglen, &argidx, 0);
840
0
                    if (v == NULL)
841
0
                        goto error;
842
0
                    if (!PyLong_Check(v)) {
843
0
                        FORMAT_ERROR(PyExc_TypeError,
844
0
                                     "* requires int, not %T", v);
845
0
                        goto error;
846
0
                    }
847
0
                    prec = PyLong_AsInt(v);
848
0
                    if (prec == -1 && PyErr_Occurred()) {
849
0
                        if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
850
0
                            FORMAT_ERROR(PyExc_OverflowError,
851
0
                                         "too big for precision%s", "");
852
0
                        }
853
0
                        goto error;
854
0
                    }
855
0
                    if (prec < 0)
856
0
                        prec = 0;
857
0
                    if (--fmtcnt >= 0)
858
0
                        c = *fmt++;
859
0
                }
860
0
                else if (c >= 0 && Py_ISDIGIT(c)) {
861
0
                    prec = c - '0';
862
0
                    while (--fmtcnt >= 0) {
863
0
                        c = Py_CHARMASK(*fmt++);
864
0
                        if (!Py_ISDIGIT(c))
865
0
                            break;
866
0
                        if (prec > (INT_MAX - ((int)c - '0')) / 10) {
867
0
                            PyErr_Format(PyExc_ValueError,
868
0
                                "precision too big at position %zd",
869
0
                                (Py_ssize_t)(fmtstart - format - 1));
870
0
                            goto error;
871
0
                        }
872
0
                        prec = prec*10 + (c - '0');
873
0
                    }
874
0
                }
875
0
            } /* prec */
876
0
            if (fmtcnt >= 0) {
877
0
                if (c == 'h' || c == 'l' || c == 'L') {
878
0
                    if (--fmtcnt >= 0)
879
0
                        c = *fmt++;
880
0
                }
881
0
            }
882
0
            if (fmtcnt < 0) {
883
0
                PyErr_Format(PyExc_ValueError,
884
0
                             "stray %% at position %zd",
885
0
                             (Py_ssize_t)(fmtstart - format - 1));
886
0
                goto error;
887
0
            }
888
0
            v = getnextarg(args, arglen, &argidx, 1);
889
0
            if (v == NULL)
890
0
                goto error;
891
892
0
            if (fmtcnt == 0) {
893
                /* last write: disable writer overallocation */
894
0
                writer->overallocate = 0;
895
0
            }
896
897
0
            sign = 0;
898
0
            fill = ' ';
899
0
            switch (c) {
900
0
            case 'r':
901
                // %r is only for 2/3 code; 3 only code should use %a
902
0
            case 'a':
903
0
                temp = PyObject_ASCII(v);
904
0
                if (temp == NULL)
905
0
                    goto error;
906
0
                assert(PyUnicode_IS_ASCII(temp));
907
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
908
0
                len = PyUnicode_GET_LENGTH(temp);
909
0
                if (prec >= 0 && len > prec)
910
0
                    len = prec;
911
0
                break;
912
913
0
            case 's':
914
                // %s is only for 2/3 code; 3 only code should use %b
915
0
            case 'b':
916
0
                temp = format_obj(v, argidx, key, &pbuf, &len);
917
0
                if (temp == NULL)
918
0
                    goto error;
919
0
                if (prec >= 0 && len > prec)
920
0
                    len = prec;
921
0
                break;
922
923
0
            case 'i':
924
0
            case 'd':
925
0
            case 'u':
926
0
            case 'o':
927
0
            case 'x':
928
0
            case 'X':
929
0
                if (PyLong_CheckExact(v)
930
0
                    && width == -1 && prec == -1
931
0
                    && !(flags & (F_SIGN | F_BLANK))
932
0
                    && c != 'X')
933
0
                {
934
                    /* Fast path */
935
0
                    int alternate = flags & F_ALT;
936
0
                    int base;
937
938
0
                    switch(c)
939
0
                    {
940
0
                        default:
941
0
                            Py_UNREACHABLE();
942
0
                        case 'd':
943
0
                        case 'i':
944
0
                        case 'u':
945
0
                            base = 10;
946
0
                            break;
947
0
                        case 'o':
948
0
                            base = 8;
949
0
                            break;
950
0
                        case 'x':
951
0
                        case 'X':
952
0
                            base = 16;
953
0
                            break;
954
0
                    }
955
956
                    /* Fast path */
957
0
                    res = _PyLong_FormatBytesWriter(writer, res,
958
0
                                                    v, base, alternate);
959
0
                    if (res == NULL)
960
0
                        goto error;
961
0
                    continue;
962
0
                }
963
964
0
                temp = formatlong(v, argidx, key, flags, prec, c);
965
0
                if (!temp)
966
0
                    goto error;
967
0
                assert(PyUnicode_IS_ASCII(temp));
968
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
969
0
                len = PyUnicode_GET_LENGTH(temp);
970
0
                sign = 1;
971
0
                if (flags & F_ZERO)
972
0
                    fill = '0';
973
0
                break;
974
975
0
            case 'e':
976
0
            case 'E':
977
0
            case 'f':
978
0
            case 'F':
979
0
            case 'g':
980
0
            case 'G':
981
0
                if (width == -1 && prec == -1
982
0
                    && !(flags & (F_SIGN | F_BLANK)))
983
0
                {
984
                    /* Fast path */
985
0
                    res = formatfloat(v, argidx, key, flags, prec, c, NULL, writer, res);
986
0
                    if (res == NULL)
987
0
                        goto error;
988
0
                    continue;
989
0
                }
990
991
0
                if (!formatfloat(v, argidx, key, flags, prec, c, &temp, NULL, res))
992
0
                    goto error;
993
0
                pbuf = PyBytes_AS_STRING(temp);
994
0
                len = PyBytes_GET_SIZE(temp);
995
0
                sign = 1;
996
0
                if (flags & F_ZERO)
997
0
                    fill = '0';
998
0
                break;
999
1000
0
            case 'c':
1001
0
                pbuf = &onechar;
1002
0
                len = byte_converter(v, argidx, key, &onechar);
1003
0
                if (!len)
1004
0
                    goto error;
1005
0
                if (width == -1) {
1006
                    /* Fast path */
1007
0
                    *res++ = onechar;
1008
0
                    continue;
1009
0
                }
1010
0
                break;
1011
1012
0
            default:
1013
0
                if (Py_ISALPHA(c)) {
1014
0
                    PyErr_Format(PyExc_ValueError,
1015
0
                                 "unsupported format %%%c at position %zd",
1016
0
                                 c, (Py_ssize_t)(fmtstart - format - 1));
1017
0
                }
1018
0
                else if (c == '\'') {
1019
0
                    PyErr_Format(PyExc_ValueError,
1020
0
                                 "stray %% at position %zd or unexpected "
1021
0
                                 "format character \"'\" "
1022
0
                                 "at position %zd",
1023
0
                                 (Py_ssize_t)(fmtstart - format - 1),
1024
0
                                 (Py_ssize_t)(fmt - format - 1));
1025
0
                }
1026
0
                else if (c >= 32 && c < 127 && c != '\'') {
1027
0
                    PyErr_Format(PyExc_ValueError,
1028
0
                                 "stray %% at position %zd or unexpected "
1029
0
                                 "format character '%c' "
1030
0
                                 "at position %zd",
1031
0
                                 (Py_ssize_t)(fmtstart - format - 1),
1032
0
                                 c, (Py_ssize_t)(fmt - format - 1));
1033
0
                }
1034
0
                else {
1035
0
                    PyErr_Format(PyExc_ValueError,
1036
0
                                 "stray %% at position %zd or unexpected "
1037
0
                                 "format character with code 0x%02x "
1038
0
                                 "at position %zd",
1039
0
                                 (Py_ssize_t)(fmtstart - format - 1),
1040
0
                                 Py_CHARMASK(c),
1041
0
                                 (Py_ssize_t)(fmt - format - 1));
1042
0
                }
1043
0
                goto error;
1044
0
            }
1045
1046
0
            if (sign) {
1047
0
                if (*pbuf == '-' || *pbuf == '+') {
1048
0
                    sign = *pbuf++;
1049
0
                    len--;
1050
0
                }
1051
0
                else if (flags & F_SIGN)
1052
0
                    sign = '+';
1053
0
                else if (flags & F_BLANK)
1054
0
                    sign = ' ';
1055
0
                else
1056
0
                    sign = 0;
1057
0
            }
1058
0
            if (width < len)
1059
0
                width = len;
1060
1061
0
            alloc = width;
1062
0
            if (sign != 0 && len == width)
1063
0
                alloc++;
1064
            /* 2: size preallocated for %s */
1065
0
            if (alloc > 2) {
1066
0
                res = PyBytesWriter_GrowAndUpdatePointer(writer, alloc - 2, res);
1067
0
                if (res == NULL) {
1068
0
                    Py_XDECREF(temp);
1069
0
                    goto error;
1070
0
                }
1071
0
            }
1072
#ifndef NDEBUG
1073
            char *before = res;
1074
#endif
1075
1076
            /* Write the sign if needed */
1077
0
            if (sign) {
1078
0
                if (fill != ' ')
1079
0
                    *res++ = sign;
1080
0
                if (width > len)
1081
0
                    width--;
1082
0
            }
1083
1084
            /* Write the numeric prefix for "x", "X" and "o" formats
1085
               if the alternate form is used.
1086
               For example, write "0x" for the "%#x" format. */
1087
0
            if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1088
0
                assert(pbuf[0] == '0');
1089
0
                assert(pbuf[1] == c);
1090
0
                if (fill != ' ') {
1091
0
                    *res++ = *pbuf++;
1092
0
                    *res++ = *pbuf++;
1093
0
                }
1094
0
                width -= 2;
1095
0
                if (width < 0)
1096
0
                    width = 0;
1097
0
                len -= 2;
1098
0
            }
1099
1100
            /* Pad left with the fill character if needed */
1101
0
            if (width > len && !(flags & F_LJUST)) {
1102
0
                memset(res, fill, width - len);
1103
0
                res += (width - len);
1104
0
                width = len;
1105
0
            }
1106
1107
            /* If padding with spaces: write sign if needed and/or numeric
1108
               prefix if the alternate form is used */
1109
0
            if (fill == ' ') {
1110
0
                if (sign)
1111
0
                    *res++ = sign;
1112
0
                if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1113
0
                    assert(pbuf[0] == '0');
1114
0
                    assert(pbuf[1] == c);
1115
0
                    *res++ = *pbuf++;
1116
0
                    *res++ = *pbuf++;
1117
0
                }
1118
0
            }
1119
1120
            /* Copy bytes */
1121
0
            memcpy(res, pbuf, len);
1122
0
            res += len;
1123
1124
            /* Pad right with the fill character if needed */
1125
0
            if (width > len) {
1126
0
                memset(res, ' ', width - len);
1127
0
                res += (width - len);
1128
0
            }
1129
1130
0
            if (dict && (argidx < arglen)) {
1131
                // XXX: Never happens?
1132
0
                PyErr_SetString(PyExc_TypeError,
1133
0
                           "not all arguments converted during bytes formatting");
1134
0
                Py_XDECREF(temp);
1135
0
                goto error;
1136
0
            }
1137
0
            Py_XDECREF(temp);
1138
1139
#ifndef NDEBUG
1140
            /* check that we computed the exact size for this write */
1141
            assert((res - before) == alloc);
1142
#endif
1143
0
        } /* '%' */
1144
1145
        /* If overallocation was disabled, ensure that it was the last
1146
           write. Otherwise, we missed an optimization */
1147
0
        assert(writer->overallocate || fmtcnt == 0 || use_bytearray);
1148
0
    } /* until end */
1149
1150
0
    if (argidx < arglen && !dict) {
1151
0
        PyErr_Format(PyExc_TypeError,
1152
0
                     "not all arguments converted during bytes formatting "
1153
0
                     "(required %zd, got %zd)",
1154
0
                     arglen < 0 ? 0 : argidx,
1155
0
                     arglen < 0 ? 1 : arglen);
1156
0
        goto error;
1157
0
    }
1158
1159
0
    Py_XDECREF(key);
1160
0
    if (args_owned) {
1161
0
        Py_DECREF(args);
1162
0
    }
1163
0
    return PyBytesWriter_FinishWithPointer(writer, res);
1164
1165
0
 error:
1166
0
    Py_XDECREF(key);
1167
0
    PyBytesWriter_Discard(writer);
1168
0
    if (args_owned) {
1169
0
        Py_DECREF(args);
1170
0
    }
1171
0
    return NULL;
1172
0
}
1173
1174
/* Unescape a backslash-escaped string. */
1175
PyObject *_PyBytes_DecodeEscape2(const char *s,
1176
                                Py_ssize_t len,
1177
                                const char *errors,
1178
                                int *first_invalid_escape_char,
1179
                                const char **first_invalid_escape_ptr)
1180
2.09k
{
1181
2.09k
    PyBytesWriter *writer = PyBytesWriter_Create(len);
1182
2.09k
    if (writer == NULL) {
1183
0
        return NULL;
1184
0
    }
1185
2.09k
    char *p = PyBytesWriter_GetData(writer);
1186
1187
2.09k
    *first_invalid_escape_char = -1;
1188
2.09k
    *first_invalid_escape_ptr = NULL;
1189
1190
2.09k
    const char *end = s + len;
1191
42.9k
    while (s < end) {
1192
40.8k
        if (*s != '\\') {
1193
30.7k
            *p++ = *s++;
1194
30.7k
            continue;
1195
30.7k
        }
1196
1197
10.1k
        s++;
1198
10.1k
        if (s == end) {
1199
0
            PyErr_SetString(PyExc_ValueError,
1200
0
                            "Trailing \\ in string");
1201
0
            goto failed;
1202
0
        }
1203
1204
10.1k
        switch (*s++) {
1205
        /* XXX This assumes ASCII! */
1206
1.05k
        case '\n': break;
1207
327
        case '\\': *p++ = '\\'; break;
1208
43
        case '\'': *p++ = '\''; break;
1209
231
        case '\"': *p++ = '\"'; break;
1210
4.06k
        case 'b': *p++ = '\b'; break;
1211
141
        case 'f': *p++ = '\014'; break; /* FF */
1212
161
        case 't': *p++ = '\t'; break;
1213
320
        case 'n': *p++ = '\n'; break;
1214
181
        case 'r': *p++ = '\r'; break;
1215
337
        case 'v': *p++ = '\013'; break; /* VT */
1216
36
        case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1217
970
        case '0': case '1': case '2': case '3':
1218
2.22k
        case '4': case '5': case '6': case '7':
1219
2.22k
        {
1220
2.22k
            int c = s[-1] - '0';
1221
2.22k
            if (s < end && '0' <= *s && *s <= '7') {
1222
799
                c = (c<<3) + *s++ - '0';
1223
799
                if (s < end && '0' <= *s && *s <= '7')
1224
237
                    c = (c<<3) + *s++ - '0';
1225
799
            }
1226
2.22k
            if (c > 0377) {
1227
195
                if (*first_invalid_escape_char == -1) {
1228
90
                    *first_invalid_escape_char = c;
1229
                    /* Back up 3 chars, since we've already incremented s. */
1230
90
                    *first_invalid_escape_ptr = s - 3;
1231
90
                }
1232
195
            }
1233
2.22k
            *p++ = c;
1234
2.22k
            break;
1235
1.78k
        }
1236
353
        case 'x':
1237
353
            if (s+1 < end) {
1238
352
                int digit1, digit2;
1239
352
                digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1240
352
                digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1241
352
                if (digit1 < 16 && digit2 < 16) {
1242
349
                    *p++ = (unsigned char)((digit1 << 4) + digit2);
1243
349
                    s += 2;
1244
349
                    break;
1245
349
                }
1246
352
            }
1247
            /* invalid hexadecimal digits */
1248
1249
4
            if (!errors || strcmp(errors, "strict") == 0) {
1250
4
                PyErr_Format(PyExc_ValueError,
1251
4
                             "invalid \\x escape at position %zd",
1252
4
                             s - 2 - (end - len));
1253
4
                goto failed;
1254
4
            }
1255
0
            if (strcmp(errors, "replace") == 0) {
1256
0
                *p++ = '?';
1257
0
            } else if (strcmp(errors, "ignore") == 0)
1258
0
                /* do nothing */;
1259
0
            else {
1260
0
                PyErr_Format(PyExc_ValueError,
1261
0
                             "decoding error; unknown "
1262
0
                             "error handling code: %.400s",
1263
0
                             errors);
1264
0
                goto failed;
1265
0
            }
1266
            /* skip \x */
1267
0
            if (s < end && Py_ISXDIGIT(s[0]))
1268
0
                s++; /* and a hexdigit */
1269
0
            break;
1270
1271
628
        default:
1272
628
            if (*first_invalid_escape_char == -1) {
1273
337
                *first_invalid_escape_char = (unsigned char)s[-1];
1274
                /* Back up one char, since we've already incremented s. */
1275
337
                *first_invalid_escape_ptr = s - 1;
1276
337
            }
1277
628
            *p++ = '\\';
1278
628
            s--;
1279
10.1k
        }
1280
10.1k
    }
1281
1282
2.09k
    return PyBytesWriter_FinishWithPointer(writer, p);
1283
1284
4
  failed:
1285
4
    PyBytesWriter_Discard(writer);
1286
4
    return NULL;
1287
2.09k
}
1288
1289
PyObject *PyBytes_DecodeEscape(const char *s,
1290
                                Py_ssize_t len,
1291
                                const char *errors,
1292
                                Py_ssize_t Py_UNUSED(unicode),
1293
                                const char *Py_UNUSED(recode_encoding))
1294
0
{
1295
0
    int first_invalid_escape_char;
1296
0
    const char *first_invalid_escape_ptr;
1297
0
    PyObject *result = _PyBytes_DecodeEscape2(s, len, errors,
1298
0
                                             &first_invalid_escape_char,
1299
0
                                             &first_invalid_escape_ptr);
1300
0
    if (result == NULL)
1301
0
        return NULL;
1302
0
    if (first_invalid_escape_char != -1) {
1303
0
        if (first_invalid_escape_char > 0xff) {
1304
0
            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1305
0
                                 "b\"\\%o\" is an invalid octal escape sequence. "
1306
0
                                 "Such sequences will not work in the future. ",
1307
0
                                 first_invalid_escape_char) < 0)
1308
0
            {
1309
0
                Py_DECREF(result);
1310
0
                return NULL;
1311
0
            }
1312
0
        }
1313
0
        else {
1314
0
            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1315
0
                                 "b\"\\%c\" is an invalid escape sequence. "
1316
0
                                 "Such sequences will not work in the future. ",
1317
0
                                 first_invalid_escape_char) < 0)
1318
0
            {
1319
0
                Py_DECREF(result);
1320
0
                return NULL;
1321
0
            }
1322
0
        }
1323
0
    }
1324
0
    return result;
1325
0
}
1326
/* -------------------------------------------------------------------- */
1327
/* object api */
1328
1329
Py_ssize_t
1330
PyBytes_Size(PyObject *op)
1331
5.98k
{
1332
5.98k
    if (!PyBytes_Check(op)) {
1333
0
        PyErr_Format(PyExc_TypeError,
1334
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1335
0
        return -1;
1336
0
    }
1337
5.98k
    return Py_SIZE(op);
1338
5.98k
}
1339
1340
char *
1341
PyBytes_AsString(PyObject *op)
1342
2.25M
{
1343
2.25M
    if (!PyBytes_Check(op)) {
1344
0
        PyErr_Format(PyExc_TypeError,
1345
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1346
0
        return NULL;
1347
0
    }
1348
2.25M
    return ((PyBytesObject *)op)->ob_sval;
1349
2.25M
}
1350
1351
int
1352
PyBytes_AsStringAndSize(PyObject *obj,
1353
                         char **s,
1354
                         Py_ssize_t *len)
1355
57.7k
{
1356
57.7k
    if (s == NULL) {
1357
0
        PyErr_BadInternalCall();
1358
0
        return -1;
1359
0
    }
1360
1361
57.7k
    if (!PyBytes_Check(obj)) {
1362
0
        PyErr_Format(PyExc_TypeError,
1363
0
             "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1364
0
        return -1;
1365
0
    }
1366
1367
57.7k
    *s = PyBytes_AS_STRING(obj);
1368
57.7k
    if (len != NULL)
1369
57.7k
        *len = PyBytes_GET_SIZE(obj);
1370
0
    else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1371
0
        PyErr_SetString(PyExc_ValueError,
1372
0
                        "embedded null byte");
1373
0
        return -1;
1374
0
    }
1375
57.7k
    return 0;
1376
57.7k
}
1377
1378
/* -------------------------------------------------------------------- */
1379
/* Methods */
1380
1381
2.54k
#define STRINGLIB_GET_EMPTY() bytes_get_empty()
1382
1383
#include "stringlib/stringdefs.h"
1384
#define STRINGLIB_MUTABLE 0
1385
1386
#include "stringlib/fastsearch.h"
1387
#include "stringlib/count.h"
1388
#include "stringlib/find.h"
1389
#include "stringlib/join.h"
1390
#include "stringlib/partition.h"
1391
#include "stringlib/split.h"
1392
#include "stringlib/ctype.h"
1393
1394
#include "stringlib/transmogrify.h"
1395
1396
#undef STRINGLIB_GET_EMPTY
1397
1398
Py_ssize_t
1399
_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
1400
              const char *needle, Py_ssize_t len_needle,
1401
              Py_ssize_t offset)
1402
0
{
1403
0
    assert(len_haystack >= 0);
1404
0
    assert(len_needle >= 0);
1405
    // Extra checks because stringlib_find accesses haystack[len_haystack].
1406
0
    if (len_needle == 0) {
1407
0
        return offset;
1408
0
    }
1409
0
    if (len_needle > len_haystack) {
1410
0
        return -1;
1411
0
    }
1412
0
    assert(len_haystack >= 1);
1413
0
    Py_ssize_t res = stringlib_find(haystack, len_haystack - 1,
1414
0
                                    needle, len_needle, offset);
1415
0
    if (res == -1) {
1416
0
        Py_ssize_t last_align = len_haystack - len_needle;
1417
0
        if (memcmp(haystack + last_align, needle, len_needle) == 0) {
1418
0
            return offset + last_align;
1419
0
        }
1420
0
    }
1421
0
    return res;
1422
0
}
1423
1424
Py_ssize_t
1425
_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
1426
                     const char *needle, Py_ssize_t len_needle,
1427
                     Py_ssize_t offset)
1428
0
{
1429
0
    return stringlib_rfind(haystack, len_haystack,
1430
0
                           needle, len_needle, offset);
1431
0
}
1432
1433
PyObject *
1434
PyBytes_Repr(PyObject *obj, int smartquotes)
1435
3.07k
{
1436
3.07k
    return _Py_bytes_repr(PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
1437
3.07k
                          smartquotes, "bytes");
1438
3.07k
}
1439
1440
PyObject *
1441
_Py_bytes_repr(const char *data, Py_ssize_t length, int smartquotes,
1442
               const char *classname)
1443
3.07k
{
1444
3.07k
    Py_ssize_t i;
1445
3.07k
    Py_ssize_t newsize, squotes, dquotes;
1446
3.07k
    PyObject *v;
1447
3.07k
    unsigned char quote;
1448
3.07k
    Py_UCS1 *p;
1449
1450
    /* Compute size of output string */
1451
3.07k
    squotes = dquotes = 0;
1452
3.07k
    newsize = 3; /* b'' */
1453
925k
    for (i = 0; i < length; i++) {
1454
922k
        unsigned char c = data[i];
1455
922k
        Py_ssize_t incr = 1;
1456
922k
        switch(c) {
1457
1.95k
        case '\'': squotes++; break;
1458
1.92k
        case '"':  dquotes++; break;
1459
20.6k
        case '\\': case '\t': case '\n': case '\r':
1460
20.6k
            incr = 2; break; /* \C */
1461
897k
        default:
1462
897k
            if (c < ' ' || c >= 0x7f)
1463
616k
                incr = 4; /* \xHH */
1464
922k
        }
1465
922k
        if (newsize > PY_SSIZE_T_MAX - incr)
1466
0
            goto overflow;
1467
922k
        newsize += incr;
1468
922k
    }
1469
3.07k
    quote = '\'';
1470
3.07k
    if (smartquotes && squotes && !dquotes)
1471
157
        quote = '"';
1472
3.07k
    if (squotes && quote == '\'') {
1473
152
        if (newsize > PY_SSIZE_T_MAX - squotes)
1474
0
            goto overflow;
1475
152
        newsize += squotes;
1476
152
    }
1477
1478
3.07k
    v = PyUnicode_New(newsize, 127);
1479
3.07k
    if (v == NULL) {
1480
0
        return NULL;
1481
0
    }
1482
3.07k
    p = PyUnicode_1BYTE_DATA(v);
1483
1484
3.07k
    *p++ = 'b', *p++ = quote;
1485
925k
    for (i = 0; i < length; i++) {
1486
922k
        unsigned char c = data[i];
1487
922k
        if (c == quote || c == '\\')
1488
2.93k
            *p++ = '\\', *p++ = c;
1489
919k
        else if (c == '\t')
1490
10.0k
            *p++ = '\\', *p++ = 't';
1491
909k
        else if (c == '\n')
1492
8.11k
            *p++ = '\\', *p++ = 'n';
1493
900k
        else if (c == '\r')
1494
1.09k
            *p++ = '\\', *p++ = 'r';
1495
899k
        else if (c < ' ' || c >= 0x7f) {
1496
616k
            *p++ = '\\';
1497
616k
            *p++ = 'x';
1498
616k
            *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1499
616k
            *p++ = Py_hexdigits[c & 0xf];
1500
616k
        }
1501
282k
        else
1502
282k
            *p++ = c;
1503
922k
    }
1504
3.07k
    *p++ = quote;
1505
3.07k
    assert(_PyUnicode_CheckConsistency(v, 1));
1506
3.07k
    return v;
1507
1508
0
  overflow:
1509
0
    PyErr_Format(PyExc_OverflowError,
1510
0
                 "%s object is too large to make repr", classname);
1511
0
    return NULL;
1512
3.07k
}
1513
1514
static PyObject *
1515
bytes_repr(PyObject *op)
1516
3.07k
{
1517
3.07k
    return PyBytes_Repr(op, 1);
1518
3.07k
}
1519
1520
static PyObject *
1521
bytes_str(PyObject *op)
1522
0
{
1523
0
    if (_Py_GetConfig()->bytes_warning) {
1524
0
        if (PyErr_WarnEx(PyExc_BytesWarning,
1525
0
                         "str() on a bytes instance", 1)) {
1526
0
            return NULL;
1527
0
        }
1528
0
    }
1529
0
    return bytes_repr(op);
1530
0
}
1531
1532
static Py_ssize_t
1533
bytes_length(PyObject *self)
1534
23.8M
{
1535
23.8M
    PyBytesObject *a = _PyBytes_CAST(self);
1536
23.8M
    return Py_SIZE(a);
1537
23.8M
}
1538
1539
/* This is also used by PyBytes_Concat() */
1540
static PyObject *
1541
bytes_concat(PyObject *a, PyObject *b)
1542
207k
{
1543
207k
    Py_buffer va, vb;
1544
207k
    PyObject *result = NULL;
1545
1546
207k
    va.len = -1;
1547
207k
    vb.len = -1;
1548
207k
    if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1549
207k
        PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1550
0
        PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1551
0
                     Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1552
0
        goto done;
1553
0
    }
1554
1555
    /* Optimize end cases */
1556
207k
    if (va.len == 0 && PyBytes_CheckExact(b)) {
1557
77.4k
        result = Py_NewRef(b);
1558
77.4k
        goto done;
1559
77.4k
    }
1560
130k
    if (vb.len == 0 && PyBytes_CheckExact(a)) {
1561
63.3k
        result = Py_NewRef(a);
1562
63.3k
        goto done;
1563
63.3k
    }
1564
1565
67.0k
    if (va.len > PY_SSIZE_T_MAX - vb.len) {
1566
0
        PyErr_NoMemory();
1567
0
        goto done;
1568
0
    }
1569
1570
67.0k
    result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1571
67.0k
    if (result != NULL) {
1572
67.0k
        memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1573
67.0k
        memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1574
67.0k
    }
1575
1576
207k
  done:
1577
207k
    if (va.len != -1)
1578
207k
        PyBuffer_Release(&va);
1579
207k
    if (vb.len != -1)
1580
207k
        PyBuffer_Release(&vb);
1581
207k
    return result;
1582
67.0k
}
1583
1584
static PyObject *
1585
bytes_repeat(PyObject *self, Py_ssize_t n)
1586
112k
{
1587
112k
    PyBytesObject *a = _PyBytes_CAST(self);
1588
112k
    if (n < 0)
1589
0
        n = 0;
1590
    /* watch out for overflows:  the size can overflow int,
1591
     * and the # of bytes needed can overflow size_t
1592
     */
1593
112k
    if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1594
0
        PyErr_SetString(PyExc_OverflowError,
1595
0
            "repeated bytes are too long");
1596
0
        return NULL;
1597
0
    }
1598
112k
    Py_ssize_t size = Py_SIZE(a) * n;
1599
112k
    if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1600
0
        return Py_NewRef(a);
1601
0
    }
1602
112k
    size_t nbytes = (size_t)size;
1603
112k
    if (nbytes + PyBytesObject_SIZE <= nbytes) {
1604
0
        PyErr_SetString(PyExc_OverflowError,
1605
0
            "repeated bytes are too long");
1606
0
        return NULL;
1607
0
    }
1608
112k
    PyBytesObject *op = PyObject_Malloc(PyBytesObject_SIZE + nbytes);
1609
112k
    if (op == NULL) {
1610
0
        return PyErr_NoMemory();
1611
0
    }
1612
112k
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
1613
112k
    set_ob_shash(op, -1);
1614
112k
    op->ob_sval[size] = '\0';
1615
1616
112k
    _PyBytes_Repeat(op->ob_sval, size, a->ob_sval, Py_SIZE(a));
1617
1618
112k
    return (PyObject *) op;
1619
112k
}
1620
1621
static int
1622
bytes_contains(PyObject *self, PyObject *arg)
1623
2.52k
{
1624
2.52k
    return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1625
2.52k
}
1626
1627
static PyObject *
1628
bytes_item(PyObject *self, Py_ssize_t i)
1629
0
{
1630
0
    PyBytesObject *a = _PyBytes_CAST(self);
1631
0
    if (i < 0 || i >= Py_SIZE(a)) {
1632
0
        PyErr_SetString(PyExc_IndexError, "index out of range");
1633
0
        return NULL;
1634
0
    }
1635
0
    return _PyLong_FromUnsignedChar((unsigned char)a->ob_sval[i]);
1636
0
}
1637
1638
static int
1639
bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1640
82.4M
{
1641
82.4M
    int cmp;
1642
82.4M
    Py_ssize_t len;
1643
1644
82.4M
    len = Py_SIZE(a);
1645
82.4M
    if (Py_SIZE(b) != len)
1646
860k
        return 0;
1647
1648
81.6M
    if (a->ob_sval[0] != b->ob_sval[0])
1649
11.5M
        return 0;
1650
1651
70.0M
    cmp = memcmp(a->ob_sval, b->ob_sval, len);
1652
70.0M
    return (cmp == 0);
1653
81.6M
}
1654
1655
static PyObject*
1656
bytes_richcompare(PyObject *aa, PyObject *bb, int op)
1657
83.0M
{
1658
    /* Make sure both arguments are strings. */
1659
83.0M
    if (!(PyBytes_Check(aa) && PyBytes_Check(bb))) {
1660
0
        if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1661
0
            if (PyUnicode_Check(aa) || PyUnicode_Check(bb)) {
1662
0
                if (PyErr_WarnEx(PyExc_BytesWarning,
1663
0
                                 "Comparison between bytes and string", 1))
1664
0
                    return NULL;
1665
0
            }
1666
0
            if (PyLong_Check(aa) || PyLong_Check(bb)) {
1667
0
                if (PyErr_WarnEx(PyExc_BytesWarning,
1668
0
                                 "Comparison between bytes and int", 1))
1669
0
                    return NULL;
1670
0
            }
1671
0
        }
1672
0
        Py_RETURN_NOTIMPLEMENTED;
1673
0
    }
1674
1675
83.0M
    PyBytesObject *a = _PyBytes_CAST(aa);
1676
83.0M
    PyBytesObject *b = _PyBytes_CAST(bb);
1677
83.0M
    if (a == b) {
1678
518k
        switch (op) {
1679
11.5k
        case Py_EQ:
1680
11.5k
        case Py_LE:
1681
11.5k
        case Py_GE:
1682
            /* a byte string is equal to itself */
1683
11.5k
            Py_RETURN_TRUE;
1684
507k
        case Py_NE:
1685
507k
        case Py_LT:
1686
507k
        case Py_GT:
1687
507k
            Py_RETURN_FALSE;
1688
0
        default:
1689
0
            PyErr_BadArgument();
1690
0
            return NULL;
1691
518k
        }
1692
518k
    }
1693
82.4M
    else if (op == Py_EQ || op == Py_NE) {
1694
82.4M
        int eq = bytes_compare_eq(a, b);
1695
82.4M
        eq ^= (op == Py_NE);
1696
82.4M
        return PyBool_FromLong(eq);
1697
82.4M
    }
1698
149
    else {
1699
149
        Py_ssize_t len_a = Py_SIZE(a);
1700
149
        Py_ssize_t len_b = Py_SIZE(b);
1701
149
        Py_ssize_t min_len = Py_MIN(len_a, len_b);
1702
149
        int c;
1703
149
        if (min_len > 0) {
1704
149
            c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1705
149
            if (c == 0)
1706
149
                c = memcmp(a->ob_sval, b->ob_sval, min_len);
1707
149
        }
1708
0
        else {
1709
0
            c = 0;
1710
0
        }
1711
149
        if (c != 0) {
1712
149
            Py_RETURN_RICHCOMPARE(c, 0, op);
1713
149
        }
1714
0
        Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1715
0
    }
1716
83.0M
}
1717
1718
static Py_hash_t
1719
bytes_hash(PyObject *self)
1720
74.3M
{
1721
74.3M
    PyBytesObject *a = _PyBytes_CAST(self);
1722
74.3M
    Py_hash_t hash = get_ob_shash(a);
1723
74.3M
    if (hash == -1) {
1724
        /* Can't fail */
1725
39.7M
        hash = Py_HashBuffer(a->ob_sval, Py_SIZE(a));
1726
39.7M
        set_ob_shash(a, hash);
1727
39.7M
    }
1728
74.3M
    return hash;
1729
74.3M
}
1730
1731
static PyObject*
1732
bytes_subscript(PyObject *op, PyObject* item)
1733
85.1M
{
1734
85.1M
    PyBytesObject *self = _PyBytes_CAST(op);
1735
85.1M
    if (_PyIndex_Check(item)) {
1736
19.8M
        Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1737
19.8M
        if (i == -1 && PyErr_Occurred())
1738
0
            return NULL;
1739
19.8M
        if (i < 0)
1740
0
            i += PyBytes_GET_SIZE(self);
1741
19.8M
        if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1742
62
            PyErr_SetString(PyExc_IndexError,
1743
62
                            "index out of range");
1744
62
            return NULL;
1745
62
        }
1746
19.8M
        return _PyLong_FromUnsignedChar((unsigned char)self->ob_sval[i]);
1747
19.8M
    }
1748
65.3M
    else if (PySlice_Check(item)) {
1749
65.3M
        Py_ssize_t start, stop, step, slicelength, i;
1750
65.3M
        size_t cur;
1751
65.3M
        const char* source_buf;
1752
65.3M
        char* result_buf;
1753
65.3M
        PyObject* result;
1754
1755
65.3M
        if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1756
0
            return NULL;
1757
0
        }
1758
65.3M
        slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1759
65.3M
                                            &stop, step);
1760
1761
65.3M
        if (slicelength <= 0) {
1762
6.04M
            return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
1763
6.04M
        }
1764
59.3M
        else if (start == 0 && step == 1 &&
1765
10.7M
                 slicelength == PyBytes_GET_SIZE(self) &&
1766
216k
                 PyBytes_CheckExact(self)) {
1767
216k
            return Py_NewRef(self);
1768
216k
        }
1769
59.0M
        else if (step == 1) {
1770
59.0M
            return PyBytes_FromStringAndSize(
1771
59.0M
                PyBytes_AS_STRING(self) + start,
1772
59.0M
                slicelength);
1773
59.0M
        }
1774
0
        else {
1775
0
            source_buf = PyBytes_AS_STRING(self);
1776
0
            result = PyBytes_FromStringAndSize(NULL, slicelength);
1777
0
            if (result == NULL)
1778
0
                return NULL;
1779
1780
0
            result_buf = PyBytes_AS_STRING(result);
1781
0
            for (cur = start, i = 0; i < slicelength;
1782
0
                 cur += step, i++) {
1783
0
                result_buf[i] = source_buf[cur];
1784
0
            }
1785
1786
0
            return result;
1787
0
        }
1788
65.3M
    }
1789
0
    else {
1790
0
        PyErr_Format(PyExc_TypeError,
1791
0
                     "byte indices must be integers or slices, not %.200s",
1792
0
                     Py_TYPE(item)->tp_name);
1793
0
        return NULL;
1794
0
    }
1795
85.1M
}
1796
1797
static int
1798
bytes_buffer_getbuffer(PyObject *op, Py_buffer *view, int flags)
1799
42.1M
{
1800
42.1M
    PyBytesObject *self = _PyBytes_CAST(op);
1801
42.1M
    return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1802
42.1M
                             1, flags);
1803
42.1M
}
1804
1805
static PySequenceMethods bytes_as_sequence = {
1806
    bytes_length,       /*sq_length*/
1807
    bytes_concat,       /*sq_concat*/
1808
    bytes_repeat,       /*sq_repeat*/
1809
    bytes_item,         /*sq_item*/
1810
    0,                  /*sq_slice*/
1811
    0,                  /*sq_ass_item*/
1812
    0,                  /*sq_ass_slice*/
1813
    bytes_contains      /*sq_contains*/
1814
};
1815
1816
static PyMappingMethods bytes_as_mapping = {
1817
    bytes_length,
1818
    bytes_subscript,
1819
    0,
1820
};
1821
1822
static PyBufferProcs bytes_as_buffer = {
1823
    bytes_buffer_getbuffer,
1824
    NULL,
1825
};
1826
1827
1828
/*[clinic input]
1829
bytes.__bytes__
1830
Convert this value to exact type bytes.
1831
[clinic start generated code]*/
1832
1833
static PyObject *
1834
bytes___bytes___impl(PyBytesObject *self)
1835
/*[clinic end generated code: output=63a306a9bc0caac5 input=34ec5ddba98bd6bb]*/
1836
54.1k
{
1837
54.1k
    if (PyBytes_CheckExact(self)) {
1838
54.1k
        return Py_NewRef(self);
1839
54.1k
    }
1840
0
    else {
1841
0
        return PyBytes_FromStringAndSize(self->ob_sval, Py_SIZE(self));
1842
0
    }
1843
54.1k
}
1844
1845
1846
0
#define LEFTSTRIP 0
1847
0
#define RIGHTSTRIP 1
1848
0
#define BOTHSTRIP 2
1849
1850
/*[clinic input]
1851
bytes.split
1852
1853
    sep: object = None
1854
        The delimiter according which to split the bytes.
1855
        None (the default value) means split on ASCII whitespace characters
1856
        (space, tab, return, newline, formfeed, vertical tab).
1857
    maxsplit: Py_ssize_t = -1
1858
        Maximum number of splits to do.
1859
        -1 (the default value) means no limit.
1860
1861
Return a list of the sections in the bytes, using sep as the delimiter.
1862
[clinic start generated code]*/
1863
1864
static PyObject *
1865
bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1866
/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1867
2.89M
{
1868
2.89M
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1869
2.89M
    const char *s = PyBytes_AS_STRING(self), *sub;
1870
2.89M
    Py_buffer vsub;
1871
2.89M
    PyObject *list;
1872
1873
2.89M
    if (maxsplit < 0)
1874
2.89M
        maxsplit = PY_SSIZE_T_MAX;
1875
2.89M
    if (sep == Py_None)
1876
0
        return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1877
2.89M
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1878
0
        return NULL;
1879
2.89M
    sub = vsub.buf;
1880
2.89M
    n = vsub.len;
1881
1882
2.89M
    list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1883
2.89M
    PyBuffer_Release(&vsub);
1884
2.89M
    return list;
1885
2.89M
}
1886
1887
/*[clinic input]
1888
@permit_long_docstring_body
1889
bytes.partition
1890
1891
    sep: Py_buffer
1892
    /
1893
1894
Partition the bytes into three parts using the given separator.
1895
1896
This will search for the separator sep in the bytes. If the separator is found,
1897
returns a 3-tuple containing the part before the separator, the separator
1898
itself, and the part after it.
1899
1900
If the separator is not found, returns a 3-tuple containing the original bytes
1901
object and two empty bytes objects.
1902
[clinic start generated code]*/
1903
1904
static PyObject *
1905
bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1906
/*[clinic end generated code: output=f532b392a17ff695 input=31c55a0cebaf7722]*/
1907
481k
{
1908
481k
    return stringlib_partition(
1909
481k
        (PyObject*) self,
1910
481k
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1911
481k
        sep->obj, (const char *)sep->buf, sep->len
1912
481k
        );
1913
481k
}
1914
1915
/*[clinic input]
1916
@permit_long_docstring_body
1917
bytes.rpartition
1918
1919
    sep: Py_buffer
1920
    /
1921
1922
Partition the bytes into three parts using the given separator.
1923
1924
This will search for the separator sep in the bytes, starting at the end. If
1925
the separator is found, returns a 3-tuple containing the part before the
1926
separator, the separator itself, and the part after it.
1927
1928
If the separator is not found, returns a 3-tuple containing two empty bytes
1929
objects and the original bytes object.
1930
[clinic start generated code]*/
1931
1932
static PyObject *
1933
bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1934
/*[clinic end generated code: output=191b114cbb028e50 input=9ea5a3ab0b02bf52]*/
1935
0
{
1936
0
    return stringlib_rpartition(
1937
0
        (PyObject*) self,
1938
0
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1939
0
        sep->obj, (const char *)sep->buf, sep->len
1940
0
        );
1941
0
}
1942
1943
/*[clinic input]
1944
@permit_long_docstring_body
1945
bytes.rsplit = bytes.split
1946
1947
Return a list of the sections in the bytes, using sep as the delimiter.
1948
1949
Splitting is done starting at the end of the bytes and working to the front.
1950
[clinic start generated code]*/
1951
1952
static PyObject *
1953
bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1954
/*[clinic end generated code: output=ba698d9ea01e1c8f input=55b6eaea1f3d7046]*/
1955
0
{
1956
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1957
0
    const char *s = PyBytes_AS_STRING(self), *sub;
1958
0
    Py_buffer vsub;
1959
0
    PyObject *list;
1960
1961
0
    if (maxsplit < 0)
1962
0
        maxsplit = PY_SSIZE_T_MAX;
1963
0
    if (sep == Py_None)
1964
0
        return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1965
0
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1966
0
        return NULL;
1967
0
    sub = vsub.buf;
1968
0
    n = vsub.len;
1969
1970
0
    list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1971
0
    PyBuffer_Release(&vsub);
1972
0
    return list;
1973
0
}
1974
1975
1976
/*[clinic input]
1977
bytes.join
1978
1979
    iterable_of_bytes: object
1980
    /
1981
1982
Concatenate any number of bytes objects.
1983
1984
The bytes whose method is called is inserted in between each pair.
1985
1986
The result is returned as a new bytes object.
1987
1988
Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1989
[clinic start generated code]*/
1990
1991
static PyObject *
1992
bytes_join_impl(PyBytesObject *self, PyObject *iterable_of_bytes)
1993
/*[clinic end generated code: output=0687abb94d7d438e input=7fe377b95bd549d2]*/
1994
8.85k
{
1995
8.85k
    return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1996
8.85k
}
1997
1998
PyObject *
1999
PyBytes_Join(PyObject *sep, PyObject *iterable)
2000
31.2k
{
2001
31.2k
    if (sep == NULL) {
2002
0
        PyErr_BadInternalCall();
2003
0
        return NULL;
2004
0
    }
2005
31.2k
    if (!PyBytes_Check(sep)) {
2006
0
        PyErr_Format(PyExc_TypeError,
2007
0
                     "sep: expected bytes, got %T", sep);
2008
0
        return NULL;
2009
0
    }
2010
2011
31.2k
    return stringlib_bytes_join(sep, iterable);
2012
31.2k
}
2013
2014
/*[clinic input]
2015
@permit_long_summary
2016
@text_signature "($self, sub[, start[, end]], /)"
2017
bytes.find
2018
2019
    sub: object
2020
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2021
         Optional start position. Default: start of the bytes.
2022
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2023
         Optional stop position. Default: end of the bytes.
2024
    /
2025
2026
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2027
2028
Return -1 on failure.
2029
[clinic start generated code]*/
2030
2031
static PyObject *
2032
bytes_find_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2033
                Py_ssize_t end)
2034
/*[clinic end generated code: output=d5961a1c77b472a1 input=47d0929adafc6b0b]*/
2035
16.3M
{
2036
16.3M
    return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2037
16.3M
                          sub, start, end);
2038
16.3M
}
2039
2040
/*[clinic input]
2041
@permit_long_summary
2042
bytes.index = bytes.find
2043
2044
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2045
2046
Raise ValueError if the subsection is not found.
2047
[clinic start generated code]*/
2048
2049
static PyObject *
2050
bytes_index_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2051
                 Py_ssize_t end)
2052
/*[clinic end generated code: output=0da25cc74683ba42 input=1cb45ce71456a269]*/
2053
0
{
2054
0
    return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2055
0
                           sub, start, end);
2056
0
}
2057
2058
/*[clinic input]
2059
@permit_long_summary
2060
bytes.rfind = bytes.find
2061
2062
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2063
2064
Return -1 on failure.
2065
[clinic start generated code]*/
2066
2067
static PyObject *
2068
bytes_rfind_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2069
                 Py_ssize_t end)
2070
/*[clinic end generated code: output=51b60fa4ad011c09 input=c9473d714251f1ab]*/
2071
26.1k
{
2072
26.1k
    return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2073
26.1k
                           sub, start, end);
2074
26.1k
}
2075
2076
/*[clinic input]
2077
@permit_long_summary
2078
bytes.rindex = bytes.find
2079
2080
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2081
2082
Raise ValueError if the subsection is not found.
2083
[clinic start generated code]*/
2084
2085
static PyObject *
2086
bytes_rindex_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2087
                  Py_ssize_t end)
2088
/*[clinic end generated code: output=42bf674e0a0aabf6 input=bb5f473c64610c43]*/
2089
0
{
2090
0
    return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2091
0
                            sub, start, end);
2092
0
}
2093
2094
2095
Py_LOCAL_INLINE(PyObject *)
2096
do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
2097
0
{
2098
0
    Py_buffer vsep;
2099
0
    const char *s = PyBytes_AS_STRING(self);
2100
0
    Py_ssize_t len = PyBytes_GET_SIZE(self);
2101
0
    char *sep;
2102
0
    Py_ssize_t seplen;
2103
0
    Py_ssize_t i, j;
2104
2105
0
    if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
2106
0
        return NULL;
2107
0
    sep = vsep.buf;
2108
0
    seplen = vsep.len;
2109
2110
0
    i = 0;
2111
0
    if (striptype != RIGHTSTRIP) {
2112
0
        while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2113
0
            i++;
2114
0
        }
2115
0
    }
2116
2117
0
    j = len;
2118
0
    if (striptype != LEFTSTRIP) {
2119
0
        do {
2120
0
            j--;
2121
0
        } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2122
0
        j++;
2123
0
    }
2124
2125
0
    PyBuffer_Release(&vsep);
2126
2127
0
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2128
0
        return Py_NewRef(self);
2129
0
    }
2130
0
    else
2131
0
        return PyBytes_FromStringAndSize(s+i, j-i);
2132
0
}
2133
2134
2135
Py_LOCAL_INLINE(PyObject *)
2136
do_strip(PyBytesObject *self, int striptype)
2137
0
{
2138
0
    const char *s = PyBytes_AS_STRING(self);
2139
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
2140
2141
0
    i = 0;
2142
0
    if (striptype != RIGHTSTRIP) {
2143
0
        while (i < len && Py_ISSPACE(s[i])) {
2144
0
            i++;
2145
0
        }
2146
0
    }
2147
2148
0
    j = len;
2149
0
    if (striptype != LEFTSTRIP) {
2150
0
        do {
2151
0
            j--;
2152
0
        } while (j >= i && Py_ISSPACE(s[j]));
2153
0
        j++;
2154
0
    }
2155
2156
0
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2157
0
        return Py_NewRef(self);
2158
0
    }
2159
0
    else
2160
0
        return PyBytes_FromStringAndSize(s+i, j-i);
2161
0
}
2162
2163
2164
Py_LOCAL_INLINE(PyObject *)
2165
do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
2166
0
{
2167
0
    if (bytes != Py_None) {
2168
0
        return do_xstrip(self, striptype, bytes);
2169
0
    }
2170
0
    return do_strip(self, striptype);
2171
0
}
2172
2173
/*[clinic input]
2174
@permit_long_docstring_body
2175
bytes.strip
2176
2177
    bytes: object = None
2178
    /
2179
2180
Strip leading and trailing bytes contained in the argument.
2181
2182
If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2183
[clinic start generated code]*/
2184
2185
static PyObject *
2186
bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
2187
/*[clinic end generated code: output=c7c228d3bd104a1b input=71904cd278c0ee03]*/
2188
0
{
2189
0
    return do_argstrip(self, BOTHSTRIP, bytes);
2190
0
}
2191
2192
/*[clinic input]
2193
bytes.lstrip
2194
2195
    bytes: object = None
2196
    /
2197
2198
Strip leading bytes contained in the argument.
2199
2200
If the argument is omitted or None, strip leading  ASCII whitespace.
2201
[clinic start generated code]*/
2202
2203
static PyObject *
2204
bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2205
/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2206
0
{
2207
0
    return do_argstrip(self, LEFTSTRIP, bytes);
2208
0
}
2209
2210
/*[clinic input]
2211
bytes.rstrip
2212
2213
    bytes: object = None
2214
    /
2215
2216
Strip trailing bytes contained in the argument.
2217
2218
If the argument is omitted or None, strip trailing ASCII whitespace.
2219
[clinic start generated code]*/
2220
2221
static PyObject *
2222
bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2223
/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2224
0
{
2225
0
    return do_argstrip(self, RIGHTSTRIP, bytes);
2226
0
}
2227
2228
2229
/*[clinic input]
2230
@permit_long_summary
2231
bytes.count = bytes.find
2232
2233
Return the number of non-overlapping occurrences of subsection 'sub' in bytes B[start:end].
2234
[clinic start generated code]*/
2235
2236
static PyObject *
2237
bytes_count_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2238
                 Py_ssize_t end)
2239
/*[clinic end generated code: output=9848140b9be17d0f input=bb2f136f83f0d30e]*/
2240
6.80M
{
2241
6.80M
    return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2242
6.80M
                           sub, start, end);
2243
6.80M
}
2244
2245
2246
/*[clinic input]
2247
bytes.translate
2248
2249
    table: object
2250
        Translation table, which must be a bytes object of length 256.
2251
    /
2252
    delete as deletechars: object(c_default="NULL") = b''
2253
2254
Return a copy with each character mapped by the given translation table.
2255
2256
All characters occurring in the optional argument delete are removed.
2257
The remaining characters are mapped through the given translation table.
2258
[clinic start generated code]*/
2259
2260
static PyObject *
2261
bytes_translate_impl(PyBytesObject *self, PyObject *table,
2262
                     PyObject *deletechars)
2263
/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2264
0
{
2265
0
    const char *input;
2266
0
    char *output;
2267
0
    Py_buffer table_view = {NULL, NULL};
2268
0
    Py_buffer del_table_view = {NULL, NULL};
2269
0
    const char *table_chars;
2270
0
    Py_ssize_t i, c, changed = 0;
2271
0
    PyObject *input_obj = (PyObject*)self;
2272
0
    const char *output_start, *del_table_chars=NULL;
2273
0
    Py_ssize_t inlen, tablen, dellen = 0;
2274
0
    PyObject *result;
2275
0
    int trans_table[256];
2276
2277
0
    if (PyBytes_Check(table)) {
2278
0
        table_chars = PyBytes_AS_STRING(table);
2279
0
        tablen = PyBytes_GET_SIZE(table);
2280
0
    }
2281
0
    else if (table == Py_None) {
2282
0
        table_chars = NULL;
2283
0
        tablen = 256;
2284
0
    }
2285
0
    else {
2286
0
        if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2287
0
            return NULL;
2288
0
        table_chars = table_view.buf;
2289
0
        tablen = table_view.len;
2290
0
    }
2291
2292
0
    if (tablen != 256) {
2293
0
        PyErr_SetString(PyExc_ValueError,
2294
0
          "translation table must be 256 characters long");
2295
0
        PyBuffer_Release(&table_view);
2296
0
        return NULL;
2297
0
    }
2298
2299
0
    if (deletechars != NULL) {
2300
0
        if (PyBytes_Check(deletechars)) {
2301
0
            del_table_chars = PyBytes_AS_STRING(deletechars);
2302
0
            dellen = PyBytes_GET_SIZE(deletechars);
2303
0
        }
2304
0
        else {
2305
0
            if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2306
0
                PyBuffer_Release(&table_view);
2307
0
                return NULL;
2308
0
            }
2309
0
            del_table_chars = del_table_view.buf;
2310
0
            dellen = del_table_view.len;
2311
0
        }
2312
0
    }
2313
0
    else {
2314
0
        del_table_chars = NULL;
2315
0
        dellen = 0;
2316
0
    }
2317
2318
0
    inlen = PyBytes_GET_SIZE(input_obj);
2319
0
    result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2320
0
    if (result == NULL) {
2321
0
        PyBuffer_Release(&del_table_view);
2322
0
        PyBuffer_Release(&table_view);
2323
0
        return NULL;
2324
0
    }
2325
0
    output_start = output = PyBytes_AS_STRING(result);
2326
0
    input = PyBytes_AS_STRING(input_obj);
2327
2328
0
    if (dellen == 0 && table_chars != NULL) {
2329
        /* If no deletions are required, use faster code */
2330
0
        for (i = inlen; --i >= 0; ) {
2331
0
            c = Py_CHARMASK(*input++);
2332
0
            *output++ = table_chars[c];
2333
0
        }
2334
        /* Check if anything changed (for returning original object) */
2335
        /* We save this check until the end so that the compiler will */
2336
        /* unroll the loop above leading to MUCH faster code. */
2337
0
        if (PyBytes_CheckExact(input_obj)) {
2338
0
            if (memcmp(PyBytes_AS_STRING(input_obj), output_start, inlen) == 0) {
2339
0
                Py_SETREF(result, Py_NewRef(input_obj));
2340
0
            }
2341
0
        }
2342
0
        PyBuffer_Release(&del_table_view);
2343
0
        PyBuffer_Release(&table_view);
2344
0
        return result;
2345
0
    }
2346
2347
0
    if (table_chars == NULL) {
2348
0
        for (i = 0; i < 256; i++)
2349
0
            trans_table[i] = Py_CHARMASK(i);
2350
0
    } else {
2351
0
        for (i = 0; i < 256; i++)
2352
0
            trans_table[i] = Py_CHARMASK(table_chars[i]);
2353
0
    }
2354
0
    PyBuffer_Release(&table_view);
2355
2356
0
    for (i = 0; i < dellen; i++)
2357
0
        trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2358
0
    PyBuffer_Release(&del_table_view);
2359
2360
0
    for (i = inlen; --i >= 0; ) {
2361
0
        c = Py_CHARMASK(*input++);
2362
0
        if (trans_table[c] != -1)
2363
0
            if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2364
0
                continue;
2365
0
        changed = 1;
2366
0
    }
2367
0
    if (!changed && PyBytes_CheckExact(input_obj)) {
2368
0
        Py_DECREF(result);
2369
0
        return Py_NewRef(input_obj);
2370
0
    }
2371
    /* Fix the size of the resulting byte string */
2372
0
    if (inlen > 0)
2373
0
        _PyBytes_Resize(&result, output - output_start);
2374
0
    return result;
2375
0
}
2376
2377
2378
/*[clinic input]
2379
2380
@permit_long_summary
2381
@permit_long_docstring_body
2382
@staticmethod
2383
bytes.maketrans
2384
2385
    frm: Py_buffer
2386
    to: Py_buffer
2387
    /
2388
2389
Return a translation table usable for the bytes or bytearray translate method.
2390
2391
The returned table will be one where each byte in frm is mapped to the byte at
2392
the same position in to.
2393
2394
The bytes objects frm and to must be of the same length.
2395
[clinic start generated code]*/
2396
2397
static PyObject *
2398
bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2399
/*[clinic end generated code: output=a36f6399d4b77f6f input=a06b75f44d933fb3]*/
2400
18
{
2401
18
    return _Py_bytes_maketrans(frm, to);
2402
18
}
2403
2404
2405
/*[clinic input]
2406
@permit_long_docstring_body
2407
bytes.replace
2408
2409
    old: Py_buffer
2410
    new: Py_buffer
2411
    count: Py_ssize_t = -1
2412
        Maximum number of occurrences to replace.
2413
        -1 (the default value) means replace all occurrences.
2414
    /
2415
2416
Return a copy with all occurrences of substring old replaced by new.
2417
2418
If the optional argument count is given, only the first count occurrences are
2419
replaced.
2420
[clinic start generated code]*/
2421
2422
static PyObject *
2423
bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2424
                   Py_ssize_t count)
2425
/*[clinic end generated code: output=994fa588b6b9c104 input=8b99a9ab32bc06a2]*/
2426
31.6k
{
2427
31.6k
    return stringlib_replace((PyObject *)self,
2428
31.6k
                             (const char *)old->buf, old->len,
2429
31.6k
                             (const char *)new->buf, new->len, count);
2430
31.6k
}
2431
2432
/** End DALKE **/
2433
2434
/*[clinic input]
2435
bytes.removeprefix as bytes_removeprefix
2436
2437
    prefix: Py_buffer
2438
    /
2439
2440
Return a bytes object with the given prefix string removed if present.
2441
2442
If the bytes starts with the prefix string, return bytes[len(prefix):].
2443
Otherwise, return a copy of the original bytes.
2444
[clinic start generated code]*/
2445
2446
static PyObject *
2447
bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2448
/*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2449
0
{
2450
0
    const char *self_start = PyBytes_AS_STRING(self);
2451
0
    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2452
0
    const char *prefix_start = prefix->buf;
2453
0
    Py_ssize_t prefix_len = prefix->len;
2454
2455
0
    if (self_len >= prefix_len
2456
0
        && prefix_len > 0
2457
0
        && memcmp(self_start, prefix_start, prefix_len) == 0)
2458
0
    {
2459
0
        return PyBytes_FromStringAndSize(self_start + prefix_len,
2460
0
                                         self_len - prefix_len);
2461
0
    }
2462
2463
0
    if (PyBytes_CheckExact(self)) {
2464
0
        return Py_NewRef(self);
2465
0
    }
2466
2467
0
    return PyBytes_FromStringAndSize(self_start, self_len);
2468
0
}
2469
2470
/*[clinic input]
2471
bytes.removesuffix as bytes_removesuffix
2472
2473
    suffix: Py_buffer
2474
    /
2475
2476
Return a bytes object with the given suffix string removed if present.
2477
2478
If the bytes ends with the suffix string and that suffix is not empty,
2479
return bytes[:-len(prefix)].  Otherwise, return a copy of the original
2480
bytes.
2481
[clinic start generated code]*/
2482
2483
static PyObject *
2484
bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2485
/*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2486
0
{
2487
0
    const char *self_start = PyBytes_AS_STRING(self);
2488
0
    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2489
0
    const char *suffix_start = suffix->buf;
2490
0
    Py_ssize_t suffix_len = suffix->len;
2491
2492
0
    if (self_len >= suffix_len
2493
0
        && suffix_len > 0
2494
0
        && memcmp(self_start + self_len - suffix_len,
2495
0
                  suffix_start, suffix_len) == 0)
2496
0
    {
2497
0
        return PyBytes_FromStringAndSize(self_start,
2498
0
                                         self_len - suffix_len);
2499
0
    }
2500
2501
0
    if (PyBytes_CheckExact(self)) {
2502
0
        return Py_NewRef(self);
2503
0
    }
2504
2505
0
    return PyBytes_FromStringAndSize(self_start, self_len);
2506
0
}
2507
2508
/*[clinic input]
2509
@permit_long_summary
2510
@text_signature "($self, prefix[, start[, end]], /)"
2511
bytes.startswith
2512
2513
    prefix as subobj: object
2514
        A bytes or a tuple of bytes to try.
2515
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2516
        Optional start position. Default: start of the bytes.
2517
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2518
        Optional stop position. Default: end of the bytes.
2519
    /
2520
2521
Return True if the bytes starts with the specified prefix, False otherwise.
2522
[clinic start generated code]*/
2523
2524
static PyObject *
2525
bytes_startswith_impl(PyBytesObject *self, PyObject *subobj,
2526
                      Py_ssize_t start, Py_ssize_t end)
2527
/*[clinic end generated code: output=b1e8da1cbd528e8c input=a14efd070f15be80]*/
2528
164k
{
2529
164k
    return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2530
164k
                                subobj, start, end);
2531
164k
}
2532
2533
/*[clinic input]
2534
@permit_long_summary
2535
@text_signature "($self, suffix[, start[, end]], /)"
2536
bytes.endswith
2537
2538
    suffix as subobj: object
2539
        A bytes or a tuple of bytes to try.
2540
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2541
         Optional start position. Default: start of the bytes.
2542
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2543
         Optional stop position. Default: end of the bytes.
2544
    /
2545
2546
Return True if the bytes ends with the specified suffix, False otherwise.
2547
[clinic start generated code]*/
2548
2549
static PyObject *
2550
bytes_endswith_impl(PyBytesObject *self, PyObject *subobj, Py_ssize_t start,
2551
                    Py_ssize_t end)
2552
/*[clinic end generated code: output=038b633111f3629d input=49e383eaaf292713]*/
2553
3
{
2554
3
    return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2555
3
                              subobj, start, end);
2556
3
}
2557
2558
2559
/*[clinic input]
2560
bytes.decode
2561
2562
    encoding: str(c_default="NULL") = 'utf-8'
2563
        The encoding with which to decode the bytes.
2564
    errors: str(c_default="NULL") = 'strict'
2565
        The error handling scheme to use for the handling of decoding errors.
2566
        The default is 'strict' meaning that decoding errors raise a
2567
        UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2568
        as well as any other name registered with codecs.register_error that
2569
        can handle UnicodeDecodeErrors.
2570
2571
Decode the bytes using the codec registered for encoding.
2572
[clinic start generated code]*/
2573
2574
static PyObject *
2575
bytes_decode_impl(PyBytesObject *self, const char *encoding,
2576
                  const char *errors)
2577
/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2578
21.6M
{
2579
21.6M
    return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2580
21.6M
}
2581
2582
2583
/*[clinic input]
2584
@permit_long_docstring_body
2585
bytes.splitlines
2586
2587
    keepends: bool = False
2588
2589
Return a list of the lines in the bytes, breaking at line boundaries.
2590
2591
Line breaks are not included in the resulting list unless keepends is given and
2592
true.
2593
[clinic start generated code]*/
2594
2595
static PyObject *
2596
bytes_splitlines_impl(PyBytesObject *self, int keepends)
2597
/*[clinic end generated code: output=3484149a5d880ffb input=d17968d2a355fe55]*/
2598
0
{
2599
0
    return stringlib_splitlines(
2600
0
        (PyObject*) self, PyBytes_AS_STRING(self),
2601
0
        PyBytes_GET_SIZE(self), keepends
2602
0
        );
2603
0
}
2604
2605
/*[clinic input]
2606
@classmethod
2607
bytes.fromhex
2608
2609
    string: object
2610
    /
2611
2612
Create a bytes object from a string of hexadecimal numbers.
2613
2614
Spaces between two numbers are accepted.
2615
Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2616
[clinic start generated code]*/
2617
2618
static PyObject *
2619
bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2620
/*[clinic end generated code: output=0973acc63661bb2e input=f37d98ed51088a21]*/
2621
30.7k
{
2622
30.7k
    PyObject *result = _PyBytes_FromHex(string, 0);
2623
30.7k
    if (type != &PyBytes_Type && result != NULL) {
2624
0
        Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
2625
0
    }
2626
30.7k
    return result;
2627
30.7k
}
2628
2629
PyObject*
2630
_PyBytes_FromHex(PyObject *string, int use_bytearray)
2631
30.7k
{
2632
30.7k
    Py_ssize_t hexlen, invalid_char;
2633
30.7k
    unsigned int top, bot;
2634
30.7k
    const Py_UCS1 *str, *start, *end;
2635
30.7k
    PyBytesWriter *writer = NULL;
2636
30.7k
    Py_buffer view;
2637
30.7k
    view.obj = NULL;
2638
2639
30.7k
    if (PyUnicode_Check(string)) {
2640
30.7k
        hexlen = PyUnicode_GET_LENGTH(string);
2641
2642
30.7k
        if (!PyUnicode_IS_ASCII(string)) {
2643
0
            const void *data = PyUnicode_DATA(string);
2644
0
            int kind = PyUnicode_KIND(string);
2645
0
            Py_ssize_t i;
2646
2647
            /* search for the first non-ASCII character */
2648
0
            for (i = 0; i < hexlen; i++) {
2649
0
                if (PyUnicode_READ(kind, data, i) >= 128)
2650
0
                    break;
2651
0
            }
2652
0
            invalid_char = i;
2653
0
            goto error;
2654
0
        }
2655
2656
30.7k
        assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2657
30.7k
        str = PyUnicode_1BYTE_DATA(string);
2658
30.7k
    }
2659
0
    else if (PyObject_CheckBuffer(string)) {
2660
0
        if (PyObject_GetBuffer(string, &view, PyBUF_SIMPLE) != 0) {
2661
0
            return NULL;
2662
0
        }
2663
0
        hexlen = view.len;
2664
0
        str = view.buf;
2665
0
    }
2666
0
    else {
2667
0
        PyErr_Format(PyExc_TypeError,
2668
0
                     "fromhex() argument must be str or bytes-like, not %T",
2669
0
                     string);
2670
0
        return NULL;
2671
0
    }
2672
2673
    /* This overestimates if there are spaces */
2674
30.7k
    if (use_bytearray) {
2675
0
        writer = _PyBytesWriter_CreateByteArray(hexlen / 2);
2676
0
    }
2677
30.7k
    else {
2678
30.7k
        writer = PyBytesWriter_Create(hexlen / 2);
2679
30.7k
    }
2680
30.7k
    if (writer == NULL) {
2681
0
        goto release_buffer;
2682
0
    }
2683
30.7k
    char *buf = PyBytesWriter_GetData(writer);
2684
2685
30.7k
    start = str;
2686
30.7k
    end = str + hexlen;
2687
61.4k
    while (str < end) {
2688
        /* skip over spaces in the input */
2689
30.7k
        if (Py_ISSPACE(*str)) {
2690
0
            do {
2691
0
                str++;
2692
0
            } while (Py_ISSPACE(*str));
2693
0
            if (str >= end)
2694
0
                break;
2695
0
        }
2696
2697
30.7k
        top = _PyLong_DigitValue[*str];
2698
30.7k
        if (top >= 16) {
2699
0
            invalid_char = str - start;
2700
0
            goto error;
2701
0
        }
2702
30.7k
        str++;
2703
2704
30.7k
        bot = _PyLong_DigitValue[*str];
2705
30.7k
        if (bot >= 16) {
2706
            /* Check if we had a second digit */
2707
0
            if (str >= end){
2708
0
                invalid_char = -1;
2709
0
            } else {
2710
0
                invalid_char = str - start;
2711
0
            }
2712
0
            goto error;
2713
0
        }
2714
30.7k
        str++;
2715
2716
30.7k
        *buf++ = (unsigned char)((top << 4) + bot);
2717
30.7k
    }
2718
2719
30.7k
    if (view.obj != NULL) {
2720
0
       PyBuffer_Release(&view);
2721
0
    }
2722
30.7k
    return PyBytesWriter_FinishWithPointer(writer, buf);
2723
2724
0
  error:
2725
0
    if (invalid_char == -1) {
2726
0
        PyErr_SetString(PyExc_ValueError,
2727
0
                        "fromhex() arg must contain an even number of hexadecimal digits");
2728
0
    } else {
2729
0
        PyErr_Format(PyExc_ValueError,
2730
0
                     "non-hexadecimal number found in "
2731
0
                     "fromhex() arg at position %zd", invalid_char);
2732
0
    }
2733
0
    PyBytesWriter_Discard(writer);
2734
2735
0
  release_buffer:
2736
0
    if (view.obj != NULL) {
2737
0
        PyBuffer_Release(&view);
2738
0
    }
2739
0
    return NULL;
2740
0
}
2741
2742
/*[clinic input]
2743
bytes.hex
2744
2745
    sep: object = NULL
2746
        An optional single character or byte to separate hex bytes.
2747
    bytes_per_sep: int = 1
2748
        How many bytes between separators.  Positive values count from the
2749
        right, negative values count from the left.
2750
2751
Create a string of hexadecimal numbers from a bytes object.
2752
2753
Example:
2754
>>> value = b'\xb9\x01\xef'
2755
>>> value.hex()
2756
'b901ef'
2757
>>> value.hex(':')
2758
'b9:01:ef'
2759
>>> value.hex(':', 2)
2760
'b9:01ef'
2761
>>> value.hex(':', -2)
2762
'b901:ef'
2763
[clinic start generated code]*/
2764
2765
static PyObject *
2766
bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2767
/*[clinic end generated code: output=1f134da504064139 input=1a21282b1f1ae595]*/
2768
0
{
2769
0
    const char *argbuf = PyBytes_AS_STRING(self);
2770
0
    Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2771
0
    return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2772
0
}
2773
2774
static PyObject *
2775
bytes_getnewargs(PyObject *op, PyObject *Py_UNUSED(dummy))
2776
0
{
2777
0
    PyBytesObject *v = _PyBytes_CAST(op);
2778
0
    return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2779
0
}
2780
2781
2782
static PyMethodDef
2783
bytes_methods[] = {
2784
    {"__getnewargs__", bytes_getnewargs,  METH_NOARGS},
2785
    BYTES___BYTES___METHODDEF
2786
    {"capitalize", stringlib_capitalize, METH_NOARGS,
2787
     _Py_capitalize__doc__},
2788
    STRINGLIB_CENTER_METHODDEF
2789
    BYTES_COUNT_METHODDEF
2790
    BYTES_DECODE_METHODDEF
2791
    BYTES_ENDSWITH_METHODDEF
2792
    STRINGLIB_EXPANDTABS_METHODDEF
2793
    BYTES_FIND_METHODDEF
2794
    BYTES_FROMHEX_METHODDEF
2795
    BYTES_HEX_METHODDEF
2796
    BYTES_INDEX_METHODDEF
2797
    {"isalnum", stringlib_isalnum, METH_NOARGS,
2798
     _Py_isalnum__doc__},
2799
    {"isalpha", stringlib_isalpha, METH_NOARGS,
2800
     _Py_isalpha__doc__},
2801
    {"isascii", stringlib_isascii, METH_NOARGS,
2802
     _Py_isascii__doc__},
2803
    {"isdigit", stringlib_isdigit, METH_NOARGS,
2804
     _Py_isdigit__doc__},
2805
    {"islower", stringlib_islower, METH_NOARGS,
2806
     _Py_islower__doc__},
2807
    {"isspace", stringlib_isspace, METH_NOARGS,
2808
     _Py_isspace__doc__},
2809
    {"istitle", stringlib_istitle, METH_NOARGS,
2810
     _Py_istitle__doc__},
2811
    {"isupper", stringlib_isupper, METH_NOARGS,
2812
     _Py_isupper__doc__},
2813
    BYTES_JOIN_METHODDEF
2814
    STRINGLIB_LJUST_METHODDEF
2815
    {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2816
    BYTES_LSTRIP_METHODDEF
2817
    BYTES_MAKETRANS_METHODDEF
2818
    BYTES_PARTITION_METHODDEF
2819
    BYTES_REPLACE_METHODDEF
2820
    BYTES_REMOVEPREFIX_METHODDEF
2821
    BYTES_REMOVESUFFIX_METHODDEF
2822
    BYTES_RFIND_METHODDEF
2823
    BYTES_RINDEX_METHODDEF
2824
    STRINGLIB_RJUST_METHODDEF
2825
    BYTES_RPARTITION_METHODDEF
2826
    BYTES_RSPLIT_METHODDEF
2827
    BYTES_RSTRIP_METHODDEF
2828
    BYTES_SPLIT_METHODDEF
2829
    BYTES_SPLITLINES_METHODDEF
2830
    BYTES_STARTSWITH_METHODDEF
2831
    BYTES_STRIP_METHODDEF
2832
    {"swapcase", stringlib_swapcase, METH_NOARGS,
2833
     _Py_swapcase__doc__},
2834
    {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2835
    BYTES_TRANSLATE_METHODDEF
2836
    {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2837
    STRINGLIB_ZFILL_METHODDEF
2838
    {NULL,     NULL}                         /* sentinel */
2839
};
2840
2841
static PyObject *
2842
bytes_mod(PyObject *self, PyObject *arg)
2843
0
{
2844
0
    if (!PyBytes_Check(self)) {
2845
0
        Py_RETURN_NOTIMPLEMENTED;
2846
0
    }
2847
0
    return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2848
0
                             arg, 0);
2849
0
}
2850
2851
static PyNumberMethods bytes_as_number = {
2852
    0,              /*nb_add*/
2853
    0,              /*nb_subtract*/
2854
    0,              /*nb_multiply*/
2855
    bytes_mod,      /*nb_remainder*/
2856
};
2857
2858
static PyObject *
2859
bytes_subtype_new(PyTypeObject *, PyObject *);
2860
2861
/*[clinic input]
2862
@classmethod
2863
bytes.__new__ as bytes_new
2864
2865
    source as x: object = NULL
2866
    encoding: str = NULL
2867
    errors: str = NULL
2868
2869
[clinic start generated code]*/
2870
2871
static PyObject *
2872
bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
2873
               const char *errors)
2874
/*[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]*/
2875
362k
{
2876
362k
    PyObject *bytes;
2877
362k
    PyObject *func;
2878
362k
    Py_ssize_t size;
2879
2880
362k
    if (x == NULL) {
2881
0
        if (encoding != NULL || errors != NULL) {
2882
0
            PyErr_SetString(PyExc_TypeError,
2883
0
                            encoding != NULL ?
2884
0
                            "encoding without a string argument" :
2885
0
                            "errors without a string argument");
2886
0
            return NULL;
2887
0
        }
2888
0
        bytes = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
2889
0
    }
2890
362k
    else if (encoding != NULL) {
2891
        /* Encode via the codec registry */
2892
227k
        if (!PyUnicode_Check(x)) {
2893
0
            PyErr_SetString(PyExc_TypeError,
2894
0
                            "encoding without a string argument");
2895
0
            return NULL;
2896
0
        }
2897
227k
        bytes = PyUnicode_AsEncodedString(x, encoding, errors);
2898
227k
    }
2899
134k
    else if (errors != NULL) {
2900
0
        PyErr_SetString(PyExc_TypeError,
2901
0
                        PyUnicode_Check(x) ?
2902
0
                        "string argument without an encoding" :
2903
0
                        "errors without a string argument");
2904
0
        return NULL;
2905
0
    }
2906
    /* We'd like to call PyObject_Bytes here, but we need to check for an
2907
       integer argument before deferring to PyBytes_FromObject, something
2908
       PyObject_Bytes doesn't do. */
2909
134k
    else if ((func = _PyObject_LookupSpecial(x, &_Py_ID(__bytes__))) != NULL) {
2910
54.1k
        bytes = _PyObject_CallNoArgs(func);
2911
54.1k
        Py_DECREF(func);
2912
54.1k
        if (bytes == NULL)
2913
0
            return NULL;
2914
54.1k
        if (!PyBytes_Check(bytes)) {
2915
0
            PyErr_Format(PyExc_TypeError,
2916
0
                         "%T.__bytes__() must return a bytes, not %T",
2917
0
                         x, bytes);
2918
0
            Py_DECREF(bytes);
2919
0
            return NULL;
2920
0
        }
2921
54.1k
    }
2922
80.7k
    else if (PyErr_Occurred())
2923
0
        return NULL;
2924
80.7k
    else if (PyUnicode_Check(x)) {
2925
0
        PyErr_SetString(PyExc_TypeError,
2926
0
                        "string argument without an encoding");
2927
0
        return NULL;
2928
0
    }
2929
    /* Is it an integer? */
2930
80.7k
    else if (_PyIndex_Check(x)) {
2931
0
        size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2932
0
        if (size == -1 && PyErr_Occurred()) {
2933
0
            if (!PyErr_ExceptionMatches(PyExc_TypeError))
2934
0
                return NULL;
2935
0
            PyErr_Clear();  /* fall through */
2936
0
            bytes = PyBytes_FromObject(x);
2937
0
        }
2938
0
        else {
2939
0
            if (size < 0) {
2940
0
                PyErr_SetString(PyExc_ValueError, "negative count");
2941
0
                return NULL;
2942
0
            }
2943
0
            bytes = _PyBytes_FromSize(size, 1);
2944
0
        }
2945
0
    }
2946
80.7k
    else {
2947
80.7k
        bytes = PyBytes_FromObject(x);
2948
80.7k
    }
2949
2950
362k
    if (bytes != NULL && type != &PyBytes_Type) {
2951
0
        Py_SETREF(bytes, bytes_subtype_new(type, bytes));
2952
0
    }
2953
2954
362k
    return bytes;
2955
362k
}
2956
2957
static PyObject*
2958
_PyBytes_FromBuffer(PyObject *x)
2959
80.7k
{
2960
80.7k
    Py_buffer view;
2961
80.7k
    if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2962
0
        return NULL;
2963
2964
80.7k
    PyBytesWriter *writer = PyBytesWriter_Create(view.len);
2965
80.7k
    if (writer == NULL) {
2966
0
        goto fail;
2967
0
    }
2968
2969
80.7k
    if (PyBuffer_ToContiguous(PyBytesWriter_GetData(writer),
2970
80.7k
                              &view, view.len, 'C') < 0) {
2971
0
        goto fail;
2972
0
    }
2973
2974
80.7k
    PyBuffer_Release(&view);
2975
80.7k
    return PyBytesWriter_Finish(writer);
2976
2977
0
fail:
2978
0
    PyBytesWriter_Discard(writer);
2979
0
    PyBuffer_Release(&view);
2980
0
    return NULL;
2981
80.7k
}
2982
2983
static PyObject*
2984
_PyBytes_FromList(PyObject *x)
2985
0
{
2986
0
    Py_ssize_t size = PyList_GET_SIZE(x);
2987
0
    PyBytesWriter *writer = PyBytesWriter_Create(size);
2988
0
    if (writer == NULL) {
2989
0
        return NULL;
2990
0
    }
2991
0
    char *str = PyBytesWriter_GetData(writer);
2992
0
    size = _PyBytesWriter_GetAllocated(writer);
2993
2994
0
    for (Py_ssize_t i = 0; i < PyList_GET_SIZE(x); i++) {
2995
0
        PyObject *item = PyList_GET_ITEM(x, i);
2996
0
        Py_INCREF(item);
2997
0
        Py_ssize_t value = PyNumber_AsSsize_t(item, NULL);
2998
0
        Py_DECREF(item);
2999
0
        if (value == -1 && PyErr_Occurred())
3000
0
            goto error;
3001
3002
0
        if (value < 0 || value >= 256) {
3003
0
            PyErr_SetString(PyExc_ValueError,
3004
0
                            "bytes must be in range(0, 256)");
3005
0
            goto error;
3006
0
        }
3007
3008
0
        if (i >= size) {
3009
0
            str = _PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str);
3010
0
            if (str == NULL) {
3011
0
                goto error;
3012
0
            }
3013
0
            size = _PyBytesWriter_GetAllocated(writer);
3014
0
        }
3015
0
        *str++ = (char) value;
3016
0
    }
3017
0
    return PyBytesWriter_FinishWithPointer(writer, str);
3018
3019
0
error:
3020
0
    PyBytesWriter_Discard(writer);
3021
0
    return NULL;
3022
0
}
3023
3024
static PyObject*
3025
_PyBytes_FromTuple(PyObject *x)
3026
0
{
3027
0
    Py_ssize_t i, size = PyTuple_GET_SIZE(x);
3028
0
    Py_ssize_t value;
3029
0
    PyObject *item;
3030
3031
0
    PyBytesWriter *writer = PyBytesWriter_Create(size);
3032
0
    if (writer == NULL) {
3033
0
        return NULL;
3034
0
    }
3035
0
    char *str = PyBytesWriter_GetData(writer);
3036
3037
0
    for (i = 0; i < size; i++) {
3038
0
        item = PyTuple_GET_ITEM(x, i);
3039
0
        value = PyNumber_AsSsize_t(item, NULL);
3040
0
        if (value == -1 && PyErr_Occurred())
3041
0
            goto error;
3042
3043
0
        if (value < 0 || value >= 256) {
3044
0
            PyErr_SetString(PyExc_ValueError,
3045
0
                            "bytes must be in range(0, 256)");
3046
0
            goto error;
3047
0
        }
3048
0
        *str++ = (char) value;
3049
0
    }
3050
0
    return PyBytesWriter_Finish(writer);
3051
3052
0
  error:
3053
0
    PyBytesWriter_Discard(writer);
3054
0
    return NULL;
3055
0
}
3056
3057
static PyObject *
3058
_PyBytes_FromIterator(PyObject *it, PyObject *x)
3059
184
{
3060
184
    Py_ssize_t i, size;
3061
3062
    /* For iterator version, create a bytes object and resize as needed */
3063
184
    size = PyObject_LengthHint(x, 64);
3064
184
    if (size == -1 && PyErr_Occurred())
3065
0
        return NULL;
3066
3067
184
    PyBytesWriter *writer = PyBytesWriter_Create(size);
3068
184
    if (writer == NULL) {
3069
0
        return NULL;
3070
0
    }
3071
184
    char *str = PyBytesWriter_GetData(writer);
3072
184
    size = _PyBytesWriter_GetAllocated(writer);
3073
3074
    /* Run the iterator to exhaustion */
3075
1.41k
    for (i = 0; ; i++) {
3076
1.41k
        PyObject *item;
3077
1.41k
        Py_ssize_t value;
3078
3079
        /* Get the next item */
3080
1.41k
        item = PyIter_Next(it);
3081
1.41k
        if (item == NULL) {
3082
184
            if (PyErr_Occurred())
3083
0
                goto error;
3084
184
            break;
3085
184
        }
3086
3087
        /* Interpret it as an int (__index__) */
3088
1.23k
        value = PyNumber_AsSsize_t(item, NULL);
3089
1.23k
        Py_DECREF(item);
3090
1.23k
        if (value == -1 && PyErr_Occurred())
3091
0
            goto error;
3092
3093
        /* Range check */
3094
1.23k
        if (value < 0 || value >= 256) {
3095
0
            PyErr_SetString(PyExc_ValueError,
3096
0
                            "bytes must be in range(0, 256)");
3097
0
            goto error;
3098
0
        }
3099
3100
        /* Append the byte */
3101
1.23k
        if (i >= size) {
3102
0
            str = _PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str);
3103
0
            if (str == NULL) {
3104
0
                goto error;
3105
0
            }
3106
0
            size = _PyBytesWriter_GetAllocated(writer);
3107
0
        }
3108
1.23k
        *str++ = (char) value;
3109
1.23k
    }
3110
184
    return PyBytesWriter_FinishWithPointer(writer, str);
3111
3112
0
  error:
3113
0
    PyBytesWriter_Discard(writer);
3114
0
    return NULL;
3115
184
}
3116
3117
PyObject *
3118
PyBytes_FromObject(PyObject *x)
3119
80.8k
{
3120
80.8k
    PyObject *it, *result;
3121
3122
80.8k
    if (x == NULL) {
3123
0
        PyErr_BadInternalCall();
3124
0
        return NULL;
3125
0
    }
3126
3127
80.8k
    if (PyBytes_CheckExact(x)) {
3128
0
        return Py_NewRef(x);
3129
0
    }
3130
3131
    /* Use the modern buffer interface */
3132
80.8k
    if (PyObject_CheckBuffer(x))
3133
80.7k
        return _PyBytes_FromBuffer(x);
3134
3135
184
    if (PyList_CheckExact(x))
3136
0
        return _PyBytes_FromList(x);
3137
3138
184
    if (PyTuple_CheckExact(x))
3139
0
        return _PyBytes_FromTuple(x);
3140
3141
184
    if (!PyUnicode_Check(x)) {
3142
184
        it = PyObject_GetIter(x);
3143
184
        if (it != NULL) {
3144
184
            result = _PyBytes_FromIterator(it, x);
3145
184
            Py_DECREF(it);
3146
184
            return result;
3147
184
        }
3148
0
        if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
3149
0
            return NULL;
3150
0
        }
3151
0
    }
3152
3153
0
    PyErr_Format(PyExc_TypeError,
3154
0
                 "cannot convert '%.200s' object to bytes",
3155
0
                 Py_TYPE(x)->tp_name);
3156
0
    return NULL;
3157
184
}
3158
3159
/* This allocator is needed for subclasses don't want to use __new__.
3160
 * See https://github.com/python/cpython/issues/91020#issuecomment-1096793239
3161
 *
3162
 * This allocator will be removed when ob_shash is removed.
3163
 */
3164
static PyObject *
3165
bytes_alloc(PyTypeObject *self, Py_ssize_t nitems)
3166
0
{
3167
0
    PyBytesObject *obj = (PyBytesObject*)PyType_GenericAlloc(self, nitems);
3168
0
    if (obj == NULL) {
3169
0
        return NULL;
3170
0
    }
3171
0
    set_ob_shash(obj, -1);
3172
0
    return (PyObject*)obj;
3173
0
}
3174
3175
static PyObject *
3176
bytes_subtype_new(PyTypeObject *type, PyObject *tmp)
3177
0
{
3178
0
    PyObject *pnew;
3179
0
    Py_ssize_t n;
3180
3181
0
    assert(PyType_IsSubtype(type, &PyBytes_Type));
3182
0
    assert(PyBytes_Check(tmp));
3183
0
    n = PyBytes_GET_SIZE(tmp);
3184
0
    pnew = type->tp_alloc(type, n);
3185
0
    if (pnew != NULL) {
3186
0
        memcpy(PyBytes_AS_STRING(pnew),
3187
0
                  PyBytes_AS_STRING(tmp), n+1);
3188
0
        set_ob_shash((PyBytesObject *)pnew,
3189
0
            get_ob_shash((PyBytesObject *)tmp));
3190
0
    }
3191
0
    return pnew;
3192
0
}
3193
3194
PyDoc_STRVAR(bytes_doc,
3195
"bytes(iterable_of_ints) -> bytes\n\
3196
bytes(string, encoding[, errors]) -> bytes\n\
3197
bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
3198
bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3199
bytes() -> empty bytes object\n\
3200
\n\
3201
Construct an immutable array of bytes from:\n\
3202
  - an iterable yielding integers in range(256)\n\
3203
  - a text string encoded using the specified encoding\n\
3204
  - any object implementing the buffer API.\n\
3205
  - an integer");
3206
3207
static PyObject *bytes_iter(PyObject *seq);
3208
3209
PyTypeObject PyBytes_Type = {
3210
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3211
    "bytes",
3212
    PyBytesObject_SIZE,
3213
    sizeof(char),
3214
    0,                                          /* tp_dealloc */
3215
    0,                                          /* tp_vectorcall_offset */
3216
    0,                                          /* tp_getattr */
3217
    0,                                          /* tp_setattr */
3218
    0,                                          /* tp_as_async */
3219
    bytes_repr,                                 /* tp_repr */
3220
    &bytes_as_number,                           /* tp_as_number */
3221
    &bytes_as_sequence,                         /* tp_as_sequence */
3222
    &bytes_as_mapping,                          /* tp_as_mapping */
3223
    bytes_hash,                                 /* tp_hash */
3224
    0,                                          /* tp_call */
3225
    bytes_str,                                  /* tp_str */
3226
    PyObject_GenericGetAttr,                    /* tp_getattro */
3227
    0,                                          /* tp_setattro */
3228
    &bytes_as_buffer,                           /* tp_as_buffer */
3229
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3230
        Py_TPFLAGS_BYTES_SUBCLASS |
3231
        _Py_TPFLAGS_MATCH_SELF,               /* tp_flags */
3232
    bytes_doc,                                  /* tp_doc */
3233
    0,                                          /* tp_traverse */
3234
    0,                                          /* tp_clear */
3235
    bytes_richcompare,                          /* tp_richcompare */
3236
    0,                                          /* tp_weaklistoffset */
3237
    bytes_iter,                                 /* tp_iter */
3238
    0,                                          /* tp_iternext */
3239
    bytes_methods,                              /* tp_methods */
3240
    0,                                          /* tp_members */
3241
    0,                                          /* tp_getset */
3242
    0,                                          /* tp_base */
3243
    0,                                          /* tp_dict */
3244
    0,                                          /* tp_descr_get */
3245
    0,                                          /* tp_descr_set */
3246
    0,                                          /* tp_dictoffset */
3247
    0,                                          /* tp_init */
3248
    bytes_alloc,                                /* tp_alloc */
3249
    bytes_new,                                  /* tp_new */
3250
    PyObject_Free,                              /* tp_free */
3251
    .tp_version_tag = _Py_TYPE_VERSION_BYTES,
3252
};
3253
3254
void
3255
PyBytes_Concat(PyObject **pv, PyObject *w)
3256
0
{
3257
0
    assert(pv != NULL);
3258
0
    if (*pv == NULL)
3259
0
        return;
3260
0
    if (w == NULL) {
3261
0
        Py_CLEAR(*pv);
3262
0
        return;
3263
0
    }
3264
3265
0
    if (_PyObject_IsUniquelyReferenced(*pv) && PyBytes_CheckExact(*pv)) {
3266
        /* Only one reference, so we can resize in place */
3267
0
        Py_ssize_t oldsize;
3268
0
        Py_buffer wb;
3269
3270
0
        if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
3271
0
            PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3272
0
                         Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3273
0
            Py_CLEAR(*pv);
3274
0
            return;
3275
0
        }
3276
3277
0
        oldsize = PyBytes_GET_SIZE(*pv);
3278
0
        if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3279
0
            PyErr_NoMemory();
3280
0
            goto error;
3281
0
        }
3282
0
        if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3283
0
            goto error;
3284
3285
0
        memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3286
0
        PyBuffer_Release(&wb);
3287
0
        return;
3288
3289
0
      error:
3290
0
        PyBuffer_Release(&wb);
3291
0
        Py_CLEAR(*pv);
3292
0
        return;
3293
0
    }
3294
3295
0
    else {
3296
        /* Multiple references, need to create new object */
3297
0
        PyObject *v;
3298
0
        v = bytes_concat(*pv, w);
3299
0
        Py_SETREF(*pv, v);
3300
0
    }
3301
0
}
3302
3303
void
3304
PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
3305
0
{
3306
0
    PyBytes_Concat(pv, w);
3307
0
    Py_XDECREF(w);
3308
0
}
3309
3310
3311
/* The following function breaks the notion that bytes are immutable:
3312
   it changes the size of a bytes object.  You can think of it
3313
   as creating a new bytes object and destroying the old one, only
3314
   more efficiently.
3315
   Note that if there's not enough memory to resize the bytes object, the
3316
   original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
3317
   memory" exception is set, and -1 is returned.  Else (on success) 0 is
3318
   returned, and the value in *pv may or may not be the same as on input.
3319
   As always, an extra byte is allocated for a trailing \0 byte (newsize
3320
   does *not* include that), and a trailing \0 byte is stored.
3321
*/
3322
3323
int
3324
_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3325
3.87M
{
3326
3.87M
    PyObject *v;
3327
3.87M
    PyBytesObject *sv;
3328
3.87M
    v = *pv;
3329
3.87M
    if (!PyBytes_Check(v) || newsize < 0) {
3330
0
        *pv = 0;
3331
0
        Py_DECREF(v);
3332
0
        PyErr_BadInternalCall();
3333
0
        return -1;
3334
0
    }
3335
3.87M
    Py_ssize_t oldsize = PyBytes_GET_SIZE(v);
3336
3.87M
    if (oldsize == newsize) {
3337
        /* return early if newsize equals to v->ob_size */
3338
507k
        return 0;
3339
507k
    }
3340
3.36M
    if (oldsize == 0) {
3341
1.21M
        *pv = _PyBytes_FromSize(newsize, 0);
3342
1.21M
        Py_DECREF(v);
3343
1.21M
        return (*pv == NULL) ? -1 : 0;
3344
1.21M
    }
3345
2.15M
    if (newsize == 0) {
3346
14.2k
        *pv = bytes_get_empty();
3347
14.2k
        Py_DECREF(v);
3348
14.2k
        return 0;
3349
14.2k
    }
3350
2.13M
    if (!_PyObject_IsUniquelyReferenced(v)) {
3351
0
        if (oldsize < newsize) {
3352
0
            *pv = _PyBytes_FromSize(newsize, 0);
3353
0
            if (*pv) {
3354
0
                memcpy(PyBytes_AS_STRING(*pv), PyBytes_AS_STRING(v), oldsize);
3355
0
            }
3356
0
        }
3357
0
        else {
3358
0
            *pv = PyBytes_FromStringAndSize(PyBytes_AS_STRING(v), newsize);
3359
0
        }
3360
0
        Py_DECREF(v);
3361
0
        return (*pv == NULL) ? -1 : 0;
3362
0
    }
3363
3364
#ifdef Py_TRACE_REFS
3365
    _Py_ForgetReference(v);
3366
#endif
3367
2.13M
    _PyReftracerTrack(v, PyRefTracer_DESTROY);
3368
2.13M
    *pv = (PyObject *)
3369
2.13M
        PyObject_Realloc(v, PyBytesObject_SIZE + newsize);
3370
2.13M
    if (*pv == NULL) {
3371
#ifdef Py_REF_DEBUG
3372
        _Py_DecRefTotal(_PyThreadState_GET());
3373
#endif
3374
0
        PyObject_Free(v);
3375
0
        PyErr_NoMemory();
3376
0
        return -1;
3377
0
    }
3378
2.13M
    _Py_NewReferenceNoTotal(*pv);
3379
2.13M
    sv = (PyBytesObject *) *pv;
3380
2.13M
    Py_SET_SIZE(sv, newsize);
3381
2.13M
    sv->ob_sval[newsize] = '\0';
3382
2.13M
    set_ob_shash(sv, -1);          /* invalidate cached hash value */
3383
2.13M
    return 0;
3384
2.13M
}
3385
3386
3387
/*********************** Bytes Iterator ****************************/
3388
3389
typedef struct {
3390
    PyObject_HEAD
3391
    Py_ssize_t it_index;
3392
    PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3393
} striterobject;
3394
3395
2.66k
#define _striterobject_CAST(op)  ((striterobject *)(op))
3396
3397
static void
3398
striter_dealloc(PyObject *op)
3399
92
{
3400
92
    striterobject *it = _striterobject_CAST(op);
3401
92
    _PyObject_GC_UNTRACK(it);
3402
92
    Py_XDECREF(it->it_seq);
3403
92
    PyObject_GC_Del(it);
3404
92
}
3405
3406
static int
3407
striter_traverse(PyObject *op, visitproc visit, void *arg)
3408
0
{
3409
0
    striterobject *it = _striterobject_CAST(op);
3410
0
    Py_VISIT(it->it_seq);
3411
0
    return 0;
3412
0
}
3413
3414
static PyObject *
3415
striter_next(PyObject *op)
3416
2.57k
{
3417
2.57k
    striterobject *it = _striterobject_CAST(op);
3418
2.57k
    PyBytesObject *seq;
3419
3420
2.57k
    assert(it != NULL);
3421
2.57k
    seq = it->it_seq;
3422
2.57k
    if (seq == NULL)
3423
0
        return NULL;
3424
2.57k
    assert(PyBytes_Check(seq));
3425
3426
2.57k
    if (it->it_index < PyBytes_GET_SIZE(seq)) {
3427
2.51k
        return _PyLong_FromUnsignedChar(
3428
2.51k
            (unsigned char)seq->ob_sval[it->it_index++]);
3429
2.51k
    }
3430
3431
60
    it->it_seq = NULL;
3432
60
    Py_DECREF(seq);
3433
60
    return NULL;
3434
2.57k
}
3435
3436
static PyObject *
3437
striter_len(PyObject *op, PyObject *Py_UNUSED(ignored))
3438
0
{
3439
0
    striterobject *it = _striterobject_CAST(op);
3440
0
    Py_ssize_t len = 0;
3441
0
    if (it->it_seq)
3442
0
        len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3443
0
    return PyLong_FromSsize_t(len);
3444
0
}
3445
3446
PyDoc_STRVAR(length_hint_doc,
3447
             "Private method returning an estimate of len(list(it)).");
3448
3449
static PyObject *
3450
striter_reduce(PyObject *op, PyObject *Py_UNUSED(ignored))
3451
0
{
3452
0
    PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));
3453
3454
    /* _PyEval_GetBuiltin can invoke arbitrary code,
3455
     * call must be before access of iterator pointers.
3456
     * see issue #101765 */
3457
0
    striterobject *it = _striterobject_CAST(op);
3458
0
    if (it->it_seq != NULL) {
3459
0
        return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
3460
0
    } else {
3461
0
        return Py_BuildValue("N(())", iter);
3462
0
    }
3463
0
}
3464
3465
PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3466
3467
static PyObject *
3468
striter_setstate(PyObject *op, PyObject *state)
3469
0
{
3470
0
    Py_ssize_t index = PyLong_AsSsize_t(state);
3471
0
    if (index == -1 && PyErr_Occurred())
3472
0
        return NULL;
3473
0
    striterobject *it = _striterobject_CAST(op);
3474
0
    if (it->it_seq != NULL) {
3475
0
        if (index < 0)
3476
0
            index = 0;
3477
0
        else if (index > PyBytes_GET_SIZE(it->it_seq))
3478
0
            index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3479
0
        it->it_index = index;
3480
0
    }
3481
0
    Py_RETURN_NONE;
3482
0
}
3483
3484
PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3485
3486
static PyMethodDef striter_methods[] = {
3487
    {"__length_hint__", striter_len, METH_NOARGS, length_hint_doc},
3488
    {"__reduce__",      striter_reduce, METH_NOARGS, reduce_doc},
3489
    {"__setstate__",    striter_setstate, METH_O, setstate_doc},
3490
    {NULL,              NULL}           /* sentinel */
3491
};
3492
3493
PyTypeObject PyBytesIter_Type = {
3494
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3495
    "bytes_iterator",                           /* tp_name */
3496
    sizeof(striterobject),                      /* tp_basicsize */
3497
    0,                                          /* tp_itemsize */
3498
    /* methods */
3499
    striter_dealloc,                            /* tp_dealloc */
3500
    0,                                          /* tp_vectorcall_offset */
3501
    0,                                          /* tp_getattr */
3502
    0,                                          /* tp_setattr */
3503
    0,                                          /* tp_as_async */
3504
    0,                                          /* tp_repr */
3505
    0,                                          /* tp_as_number */
3506
    0,                                          /* tp_as_sequence */
3507
    0,                                          /* tp_as_mapping */
3508
    0,                                          /* tp_hash */
3509
    0,                                          /* tp_call */
3510
    0,                                          /* tp_str */
3511
    PyObject_GenericGetAttr,                    /* tp_getattro */
3512
    0,                                          /* tp_setattro */
3513
    0,                                          /* tp_as_buffer */
3514
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3515
    0,                                          /* tp_doc */
3516
    striter_traverse,                           /* tp_traverse */
3517
    0,                                          /* tp_clear */
3518
    0,                                          /* tp_richcompare */
3519
    0,                                          /* tp_weaklistoffset */
3520
    PyObject_SelfIter,                          /* tp_iter */
3521
    striter_next,                               /* tp_iternext */
3522
    striter_methods,                            /* tp_methods */
3523
    0,
3524
};
3525
3526
static PyObject *
3527
bytes_iter(PyObject *seq)
3528
92
{
3529
92
    striterobject *it;
3530
3531
92
    if (!PyBytes_Check(seq)) {
3532
0
        PyErr_BadInternalCall();
3533
0
        return NULL;
3534
0
    }
3535
92
    it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3536
92
    if (it == NULL)
3537
0
        return NULL;
3538
92
    it->it_index = 0;
3539
92
    it->it_seq = (PyBytesObject *)Py_NewRef(seq);
3540
92
    _PyObject_GC_TRACK(it);
3541
92
    return (PyObject *)it;
3542
92
}
3543
3544
3545
void
3546
_PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
3547
    const char* src, Py_ssize_t len_src)
3548
115k
{
3549
115k
    if (len_dest == 0) {
3550
567
        return;
3551
567
    }
3552
114k
    if (len_src == 1) {
3553
112k
        memset(dest, src[0], len_dest);
3554
112k
    }
3555
2.91k
    else {
3556
2.91k
        if (src != dest) {
3557
2.91k
            memcpy(dest, src, len_src);
3558
2.91k
        }
3559
2.91k
        Py_ssize_t copied = len_src;
3560
7.00k
        while (copied < len_dest) {
3561
4.08k
            Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied);
3562
4.08k
            memcpy(dest + copied, dest, bytes_to_copy);
3563
4.08k
            copied += bytes_to_copy;
3564
4.08k
        }
3565
2.91k
    }
3566
114k
}
3567
3568
3569
// --- PyBytesWriter API -----------------------------------------------------
3570
3571
static inline char*
3572
byteswriter_data(PyBytesWriter *writer)
3573
16.1M
{
3574
16.1M
    return _PyBytesWriter_GetData(writer);
3575
16.1M
}
3576
3577
3578
static inline Py_ssize_t
3579
byteswriter_allocated(PyBytesWriter *writer)
3580
15.9M
{
3581
15.9M
    if (writer->obj == NULL) {
3582
15.1M
        return sizeof(writer->small_buffer);
3583
15.1M
    }
3584
798k
    else if (writer->use_bytearray) {
3585
0
        return PyByteArray_GET_SIZE(writer->obj);
3586
0
    }
3587
798k
    else {
3588
798k
        return PyBytes_GET_SIZE(writer->obj);
3589
798k
    }
3590
15.9M
}
3591
3592
3593
#ifdef MS_WINDOWS
3594
   /* On Windows, overallocate by 50% is the best factor */
3595
#  define OVERALLOCATE_FACTOR 2
3596
#else
3597
   /* On Linux, overallocate by 25% is the best factor */
3598
46.1k
#  define OVERALLOCATE_FACTOR 4
3599
#endif
3600
3601
static inline int
3602
byteswriter_resize(PyBytesWriter *writer, Py_ssize_t size, int resize)
3603
8.34M
{
3604
8.34M
    assert(size >= 0);
3605
3606
8.34M
    Py_ssize_t old_allocated = byteswriter_allocated(writer);
3607
8.34M
    if (size <= old_allocated) {
3608
7.25M
        return 0;
3609
7.25M
    }
3610
3611
1.09M
    if (resize & writer->overallocate) {
3612
23.0k
        if (size <= (PY_SSIZE_T_MAX - size / OVERALLOCATE_FACTOR)) {
3613
23.0k
            size += size / OVERALLOCATE_FACTOR;
3614
23.0k
        }
3615
23.0k
    }
3616
3617
1.09M
    if (writer->obj != NULL) {
3618
23.0k
        if (writer->use_bytearray) {
3619
0
            if (PyByteArray_Resize(writer->obj, size)) {
3620
0
                return -1;
3621
0
            }
3622
0
        }
3623
23.0k
        else {
3624
23.0k
            if (_PyBytes_Resize(&writer->obj, size)) {
3625
0
                return -1;
3626
0
            }
3627
23.0k
        }
3628
23.0k
        assert(writer->obj != NULL);
3629
23.0k
    }
3630
1.07M
    else if (writer->use_bytearray) {
3631
0
        writer->obj = PyByteArray_FromStringAndSize(NULL, size);
3632
0
        if (writer->obj == NULL) {
3633
0
            return -1;
3634
0
        }
3635
0
        if (resize) {
3636
0
            assert((size_t)size > sizeof(writer->small_buffer));
3637
0
            memcpy(PyByteArray_AS_STRING(writer->obj),
3638
0
                   writer->small_buffer,
3639
0
                   sizeof(writer->small_buffer));
3640
0
        }
3641
0
    }
3642
1.07M
    else {
3643
1.07M
        writer->obj = PyBytes_FromStringAndSize(NULL, size);
3644
1.07M
        if (writer->obj == NULL) {
3645
0
            return -1;
3646
0
        }
3647
1.07M
        if (resize) {
3648
0
            assert((size_t)size > sizeof(writer->small_buffer));
3649
0
            memcpy(PyBytes_AS_STRING(writer->obj),
3650
0
                   writer->small_buffer,
3651
0
                   sizeof(writer->small_buffer));
3652
0
        }
3653
1.07M
    }
3654
3655
#ifdef Py_DEBUG
3656
    Py_ssize_t allocated = byteswriter_allocated(writer);
3657
    if (resize && allocated > old_allocated) {
3658
        memset(byteswriter_data(writer) + old_allocated, 0xff,
3659
               allocated - old_allocated);
3660
    }
3661
#endif
3662
3663
1.09M
    return 0;
3664
1.09M
}
3665
3666
3667
static PyBytesWriter*
3668
byteswriter_create(Py_ssize_t size, int use_bytearray)
3669
8.32M
{
3670
8.32M
    if (size < 0) {
3671
0
        PyErr_SetString(PyExc_ValueError, "size must be >= 0");
3672
0
        return NULL;
3673
0
    }
3674
3675
8.32M
    PyBytesWriter *writer = _Py_FREELIST_POP_MEM(bytes_writers);
3676
8.32M
    if (writer == NULL) {
3677
14.9k
        writer = (PyBytesWriter *)PyMem_Malloc(sizeof(PyBytesWriter));
3678
14.9k
        if (writer == NULL) {
3679
0
            PyErr_NoMemory();
3680
0
            return NULL;
3681
0
        }
3682
14.9k
    }
3683
8.32M
    writer->obj = NULL;
3684
8.32M
    writer->size = 0;
3685
8.32M
    writer->use_bytearray = use_bytearray;
3686
8.32M
    writer->overallocate = !use_bytearray;
3687
3688
8.32M
    if (size >= 1) {
3689
8.32M
        if (byteswriter_resize(writer, size, 0) < 0) {
3690
0
            PyBytesWriter_Discard(writer);
3691
0
            return NULL;
3692
0
        }
3693
8.32M
        writer->size = size;
3694
8.32M
    }
3695
#ifdef Py_DEBUG
3696
    memset(byteswriter_data(writer), 0xff, byteswriter_allocated(writer));
3697
#endif
3698
8.32M
    return writer;
3699
8.32M
}
3700
3701
PyBytesWriter*
3702
PyBytesWriter_Create(Py_ssize_t size)
3703
8.32M
{
3704
8.32M
    return byteswriter_create(size, 0);
3705
8.32M
}
3706
3707
PyBytesWriter*
3708
_PyBytesWriter_CreateByteArray(Py_ssize_t size)
3709
0
{
3710
0
    return byteswriter_create(size, 1);
3711
0
}
3712
3713
3714
void
3715
PyBytesWriter_Discard(PyBytesWriter *writer)
3716
8.47M
{
3717
8.47M
    if (writer == NULL) {
3718
147k
        return;
3719
147k
    }
3720
3721
8.32M
    Py_XDECREF(writer->obj);
3722
8.32M
    _Py_FREELIST_FREE(bytes_writers, writer, PyMem_Free);
3723
8.32M
}
3724
3725
3726
PyObject*
3727
PyBytesWriter_FinishWithSize(PyBytesWriter *writer, Py_ssize_t size)
3728
7.90M
{
3729
7.90M
    PyObject *result;
3730
7.90M
    if (size == 0) {
3731
147k
        result = bytes_get_empty();
3732
147k
    }
3733
7.75M
    else if (writer->obj != NULL) {
3734
857k
        if (writer->use_bytearray) {
3735
0
            if (size != PyByteArray_GET_SIZE(writer->obj)) {
3736
0
                if (PyByteArray_Resize(writer->obj, size)) {
3737
0
                    goto error;
3738
0
                }
3739
0
            }
3740
0
        }
3741
857k
        else {
3742
857k
            if (size != PyBytes_GET_SIZE(writer->obj)) {
3743
827k
                if (_PyBytes_Resize(&writer->obj, size)) {
3744
0
                    goto error;
3745
0
                }
3746
827k
            }
3747
857k
        }
3748
857k
        result = writer->obj;
3749
857k
        writer->obj = NULL;
3750
857k
    }
3751
6.89M
    else if (writer->use_bytearray) {
3752
0
        result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3753
0
    }
3754
6.89M
    else {
3755
6.89M
        result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3756
6.89M
    }
3757
7.90M
    PyBytesWriter_Discard(writer);
3758
7.90M
    return result;
3759
3760
0
error:
3761
0
    PyBytesWriter_Discard(writer);
3762
0
    return NULL;
3763
7.90M
}
3764
3765
PyObject*
3766
PyBytesWriter_Finish(PyBytesWriter *writer)
3767
110k
{
3768
110k
    return PyBytesWriter_FinishWithSize(writer, writer->size);
3769
110k
}
3770
3771
3772
PyObject*
3773
PyBytesWriter_FinishWithPointer(PyBytesWriter *writer, void *buf)
3774
7.59M
{
3775
7.59M
    Py_ssize_t size = (char*)buf - byteswriter_data(writer);
3776
7.59M
    if (size < 0 || size > byteswriter_allocated(writer)) {
3777
0
        PyBytesWriter_Discard(writer);
3778
0
        PyErr_SetString(PyExc_ValueError, "invalid end pointer");
3779
0
        return NULL;
3780
0
    }
3781
3782
7.59M
    return PyBytesWriter_FinishWithSize(writer, size);
3783
7.59M
}
3784
3785
3786
void*
3787
PyBytesWriter_GetData(PyBytesWriter *writer)
3788
8.52M
{
3789
8.52M
    return byteswriter_data(writer);
3790
8.52M
}
3791
3792
3793
Py_ssize_t
3794
PyBytesWriter_GetSize(PyBytesWriter *writer)
3795
0
{
3796
0
    return _PyBytesWriter_GetSize(writer);
3797
0
}
3798
3799
3800
static Py_ssize_t
3801
_PyBytesWriter_GetAllocated(PyBytesWriter *writer)
3802
184
{
3803
184
    return byteswriter_allocated(writer);
3804
184
}
3805
3806
3807
int
3808
PyBytesWriter_Resize(PyBytesWriter *writer, Py_ssize_t size)
3809
0
{
3810
0
    if (size < 0) {
3811
0
        PyErr_SetString(PyExc_ValueError, "size must be >= 0");
3812
0
        return -1;
3813
0
    }
3814
0
    if (byteswriter_resize(writer, size, 1) < 0) {
3815
0
        return -1;
3816
0
    }
3817
0
    writer->size = size;
3818
0
    return 0;
3819
0
}
3820
3821
3822
static void*
3823
_PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size,
3824
                                      void *data)
3825
0
{
3826
0
    Py_ssize_t pos = (char*)data - byteswriter_data(writer);
3827
0
    if (PyBytesWriter_Resize(writer, size) < 0) {
3828
0
        return NULL;
3829
0
    }
3830
0
    return byteswriter_data(writer) + pos;
3831
0
}
3832
3833
3834
int
3835
PyBytesWriter_Grow(PyBytesWriter *writer, Py_ssize_t size)
3836
23.0k
{
3837
23.0k
    if (size < 0 && writer->size + size < 0) {
3838
0
        PyErr_SetString(PyExc_ValueError, "invalid size");
3839
0
        return -1;
3840
0
    }
3841
23.0k
    if (size > PY_SSIZE_T_MAX - writer->size) {
3842
0
        PyErr_NoMemory();
3843
0
        return -1;
3844
0
    }
3845
23.0k
    size = writer->size + size;
3846
3847
23.0k
    if (byteswriter_resize(writer, size, 1) < 0) {
3848
0
        return -1;
3849
0
    }
3850
23.0k
    writer->size = size;
3851
23.0k
    return 0;
3852
23.0k
}
3853
3854
3855
void*
3856
PyBytesWriter_GrowAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size,
3857
                                   void *buf)
3858
0
{
3859
0
    Py_ssize_t pos = (char*)buf - byteswriter_data(writer);
3860
0
    if (PyBytesWriter_Grow(writer, size) < 0) {
3861
0
        return NULL;
3862
0
    }
3863
0
    return byteswriter_data(writer) + pos;
3864
0
}
3865
3866
3867
int
3868
PyBytesWriter_WriteBytes(PyBytesWriter *writer,
3869
                         const void *bytes, Py_ssize_t size)
3870
0
{
3871
0
    if (size < 0) {
3872
0
        size_t len = strlen(bytes);
3873
0
        if (len > (size_t)PY_SSIZE_T_MAX) {
3874
0
            PyErr_NoMemory();
3875
0
            return -1;
3876
0
        }
3877
0
        size = (Py_ssize_t)len;
3878
0
    }
3879
3880
0
    Py_ssize_t pos = writer->size;
3881
0
    if (PyBytesWriter_Grow(writer, size) < 0) {
3882
0
        return -1;
3883
0
    }
3884
0
    char *buf = byteswriter_data(writer);
3885
0
    memcpy(buf + pos, bytes, size);
3886
0
    return 0;
3887
0
}
3888
3889
3890
int
3891
PyBytesWriter_Format(PyBytesWriter *writer, const char *format, ...)
3892
0
{
3893
0
    Py_ssize_t pos = writer->size;
3894
0
    if (PyBytesWriter_Grow(writer, strlen(format)) < 0) {
3895
0
        return -1;
3896
0
    }
3897
3898
0
    va_list vargs;
3899
0
    va_start(vargs, format);
3900
0
    char *buf = bytes_fromformat(writer, pos, format, vargs);
3901
0
    va_end(vargs);
3902
3903
0
    Py_ssize_t size = buf - byteswriter_data(writer);
3904
0
    return PyBytesWriter_Resize(writer, size);
3905
0
}