Coverage Report

Created: 2026-04-12 06:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/bytesobject.c
Line
Count
Source
1
/* bytes object implementation */
2
3
#include "Python.h"
4
#include "pycore_abstract.h"      // _PyIndex_Check()
5
#include "pycore_bytes_methods.h" // _Py_bytes_startswith()
6
#include "pycore_bytesobject.h"   // _PyBytes_Find(), _PyBytes_Repeat()
7
#include "pycore_call.h"          // _PyObject_CallNoArgs()
8
#include "pycore_ceval.h"         // _PyEval_GetBuiltin()
9
#include "pycore_format.h"        // F_LJUST
10
#include "pycore_freelist.h"      // _Py_FREELIST_FREE()
11
#include "pycore_global_objects.h"// _Py_GET_GLOBAL_OBJECT()
12
#include "pycore_initconfig.h"    // _PyStatus_OK()
13
#include "pycore_long.h"          // _PyLong_DigitValue
14
#include "pycore_object.h"        // _PyObject_GC_TRACK
15
#include "pycore_pymem.h"         // PYMEM_CLEANBYTE
16
#include "pycore_strhex.h"        // _Py_strhex_with_sep()
17
#include "pycore_unicodeobject.h" // _PyUnicode_FormatLong()
18
19
#include <stddef.h>
20
21
/*[clinic input]
22
class bytes "PyBytesObject *" "&PyBytes_Type"
23
[clinic start generated code]*/
24
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
25
26
#include "clinic/bytesobject.c.h"
27
28
307M
#define PyBytesObject_SIZE _PyBytesObject_SIZE
29
30
/* Forward declaration */
31
static void* _PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer,
32
                                                   Py_ssize_t size, void *data);
33
static Py_ssize_t _PyBytesWriter_GetAllocated(PyBytesWriter *writer);
34
35
36
30.9M
#define CHARACTERS _Py_SINGLETON(bytes_characters)
37
#define CHARACTER(ch) \
38
30.9M
     ((PyBytesObject *)&(CHARACTERS[ch]));
39
5.08M
#define EMPTY (&_Py_SINGLETON(bytes_empty))
40
41
42
// Return a reference to the immortal empty bytes string singleton.
43
static inline PyObject* bytes_get_empty(void)
44
5.08M
{
45
5.08M
    PyObject *empty = &EMPTY->ob_base.ob_base;
46
5.08M
    assert(_Py_IsImmortal(empty));
47
5.08M
    return empty;
48
5.08M
}
49
50
51
static inline void
52
set_ob_shash(PyBytesObject *a, Py_hash_t hash)
53
205M
{
54
205M
_Py_COMP_DIAG_PUSH
55
205M
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
56
#ifdef Py_GIL_DISABLED
57
    _Py_atomic_store_ssize_relaxed(&a->ob_shash, hash);
58
#else
59
205M
    a->ob_shash = hash;
60
205M
#endif
61
205M
_Py_COMP_DIAG_POP
62
205M
}
63
64
static inline Py_hash_t
65
get_ob_shash(PyBytesObject *a)
66
80.5M
{
67
80.5M
_Py_COMP_DIAG_PUSH
68
80.5M
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
69
#ifdef Py_GIL_DISABLED
70
    return _Py_atomic_load_ssize_relaxed(&a->ob_shash);
71
#else
72
80.5M
    return a->ob_shash;
73
80.5M
#endif
74
80.5M
_Py_COMP_DIAG_POP
75
80.5M
}
76
77
78
/*
79
   For PyBytes_FromString(), the parameter 'str' points to a null-terminated
80
   string containing exactly 'size' bytes.
81
82
   For PyBytes_FromStringAndSize(), the parameter 'str' is
83
   either NULL or else points to a string containing at least 'size' bytes.
84
   For PyBytes_FromStringAndSize(), the string in the 'str' parameter does
85
   not have to be null-terminated.  (Therefore it is safe to construct a
86
   substring by calling 'PyBytes_FromStringAndSize(origstring, substrlen)'.)
87
   If 'str' is NULL then PyBytes_FromStringAndSize() will allocate 'size+1'
88
   bytes (setting the last byte to the null terminating character) and you can
89
   fill in the data yourself.  If 'str' is non-NULL then the resulting
90
   PyBytes object must be treated as immutable and you must not fill in nor
91
   alter the data yourself, since the strings may be shared.
92
93
   The PyObject member 'op->ob_size', which denotes the number of "extra
94
   items" in a variable-size object, will contain the number of bytes
95
   allocated for string data, not counting the null terminating character.
96
   It is therefore equal to the 'size' parameter (for
97
   PyBytes_FromStringAndSize()) or the length of the string in the 'str'
98
   parameter (for PyBytes_FromString()).
99
*/
100
static PyObject *
101
_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
102
152M
{
103
152M
    PyBytesObject *op;
104
152M
    assert(size >= 0);
105
106
152M
    if (size == 0) {
107
0
        return bytes_get_empty();
108
0
    }
109
110
152M
    if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
111
0
        PyErr_SetString(PyExc_OverflowError,
112
0
                        "byte string is too large");
113
0
        return NULL;
114
0
    }
115
116
    /* Inline PyObject_NewVar */
117
152M
    if (use_calloc)
118
0
        op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
119
152M
    else
120
152M
        op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
121
152M
    if (op == NULL) {
122
0
        return PyErr_NoMemory();
123
0
    }
124
152M
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
125
152M
    set_ob_shash(op, -1);
126
152M
    if (!use_calloc) {
127
152M
        op->ob_sval[size] = '\0';
128
152M
    }
129
152M
    return (PyObject *) op;
130
152M
}
131
132
PyObject *
133
PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
134
172M
{
135
172M
    PyBytesObject *op;
136
172M
    if (size < 0) {
137
0
        PyErr_SetString(PyExc_SystemError,
138
0
            "Negative size passed to PyBytes_FromStringAndSize");
139
0
        return NULL;
140
0
    }
141
172M
    if (size == 1 && str != NULL) {
142
30.9M
        op = CHARACTER(*str & 255);
143
30.9M
        assert(_Py_IsImmortal(op));
144
30.9M
        return (PyObject *)op;
145
30.9M
    }
146
141M
    if (size == 0) {
147
5.01M
        return bytes_get_empty();
148
5.01M
    }
149
150
136M
    op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
151
136M
    if (op == NULL)
152
0
        return NULL;
153
136M
    if (str == NULL)
154
9.60M
        return (PyObject *) op;
155
156
126M
    memcpy(op->ob_sval, str, size);
157
126M
    return (PyObject *) op;
158
136M
}
159
160
PyObject *
161
PyBytes_FromString(const char *str)
162
984
{
163
984
    size_t size;
164
984
    PyBytesObject *op;
165
166
984
    assert(str != NULL);
167
984
    size = strlen(str);
168
984
    if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
169
0
        PyErr_SetString(PyExc_OverflowError,
170
0
            "byte string is too long");
171
0
        return NULL;
172
0
    }
173
174
984
    if (size == 0) {
175
0
        return bytes_get_empty();
176
0
    }
177
984
    else if (size == 1) {
178
0
        op = CHARACTER(*str & 255);
179
0
        assert(_Py_IsImmortal(op));
180
0
        return (PyObject *)op;
181
0
    }
182
183
    /* Inline PyObject_NewVar */
184
984
    op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
185
984
    if (op == NULL) {
186
0
        return PyErr_NoMemory();
187
0
    }
188
984
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
189
984
    set_ob_shash(op, -1);
190
984
    memcpy(op->ob_sval, str, size+1);
191
984
    return (PyObject *) op;
192
984
}
193
194
195
static char*
196
bytes_fromformat(PyBytesWriter *writer, Py_ssize_t writer_pos,
197
                 const char *format, va_list vargs)
198
0
{
199
0
    const char *f;
200
0
    const char *p;
201
0
    Py_ssize_t prec;
202
0
    int longflag;
203
0
    int size_tflag;
204
    /* Longest 64-bit formatted numbers:
205
       - "18446744073709551615\0" (21 bytes)
206
       - "-9223372036854775808\0" (21 bytes)
207
       Decimal takes the most space (it isn't enough for octal.)
208
209
       Longest 64-bit pointer representation:
210
       "0xffffffffffffffff\0" (19 bytes). */
211
0
    char buffer[21];
212
213
0
    char *s = (char*)PyBytesWriter_GetData(writer) + writer_pos;
214
215
0
#define WRITE_BYTES_LEN(str, len_expr) \
216
0
    do { \
217
0
        size_t len = (len_expr); \
218
0
        s = PyBytesWriter_GrowAndUpdatePointer(writer, len, s); \
219
0
        if (s == NULL) { \
220
0
            goto error; \
221
0
        } \
222
0
        memcpy(s, (str), len); \
223
0
        s += len; \
224
0
    } while (0)
225
0
#define WRITE_BYTES(str) WRITE_BYTES_LEN(str, strlen(str))
226
227
0
    for (f = format; *f; f++) {
228
0
        if (*f != '%') {
229
0
            *s++ = *f;
230
0
            continue;
231
0
        }
232
233
0
        p = f++;
234
235
        /* ignore the width (ex: 10 in "%10s") */
236
0
        while (Py_ISDIGIT(*f))
237
0
            f++;
238
239
        /* parse the precision (ex: 10 in "%.10s") */
240
0
        prec = 0;
241
0
        if (*f == '.') {
242
0
            f++;
243
0
            for (; Py_ISDIGIT(*f); f++) {
244
0
                prec = (prec * 10) + (*f - '0');
245
0
            }
246
0
        }
247
248
0
        while (*f && *f != '%' && !Py_ISALPHA(*f))
249
0
            f++;
250
251
        /* handle the long flag ('l'), but only for %ld and %lu.
252
           others can be added when necessary. */
253
0
        longflag = 0;
254
0
        if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
255
0
            longflag = 1;
256
0
            ++f;
257
0
        }
258
259
        /* handle the size_t flag ('z'). */
260
0
        size_tflag = 0;
261
0
        if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
262
0
            size_tflag = 1;
263
0
            ++f;
264
0
        }
265
266
0
        switch (*f) {
267
0
        case 'c':
268
0
        {
269
0
            int c = va_arg(vargs, int);
270
0
            if (c < 0 || c > 255) {
271
0
                PyErr_SetString(PyExc_OverflowError,
272
0
                                "PyBytes_FromFormatV(): %c format "
273
0
                                "expects an integer in range [0; 255]");
274
0
                goto error;
275
0
            }
276
0
            *s++ = (unsigned char)c;
277
0
            break;
278
0
        }
279
280
0
        case 'd':
281
0
            if (longflag) {
282
0
                sprintf(buffer, "%ld", va_arg(vargs, long));
283
0
            }
284
0
            else if (size_tflag) {
285
0
                sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
286
0
            }
287
0
            else {
288
0
                sprintf(buffer, "%d", va_arg(vargs, int));
289
0
            }
290
0
            assert(strlen(buffer) < sizeof(buffer));
291
0
            WRITE_BYTES(buffer);
292
0
            break;
293
294
0
        case 'u':
295
0
            if (longflag) {
296
0
                sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
297
0
            }
298
0
            else if (size_tflag) {
299
0
                sprintf(buffer, "%zu", va_arg(vargs, size_t));
300
0
            }
301
0
            else {
302
0
                sprintf(buffer, "%u", va_arg(vargs, unsigned int));
303
0
            }
304
0
            assert(strlen(buffer) < sizeof(buffer));
305
0
            WRITE_BYTES(buffer);
306
0
            break;
307
308
0
        case 'i':
309
0
            sprintf(buffer, "%i", va_arg(vargs, int));
310
0
            assert(strlen(buffer) < sizeof(buffer));
311
0
            WRITE_BYTES(buffer);
312
0
            break;
313
314
0
        case 'x':
315
0
            sprintf(buffer, "%x", va_arg(vargs, int));
316
0
            assert(strlen(buffer) < sizeof(buffer));
317
0
            WRITE_BYTES(buffer);
318
0
            break;
319
320
0
        case 's':
321
0
        {
322
0
            Py_ssize_t i;
323
324
0
            p = va_arg(vargs, const char*);
325
0
            if (prec <= 0) {
326
0
                i = strlen(p);
327
0
            }
328
0
            else {
329
0
                i = 0;
330
0
                while (i < prec && p[i]) {
331
0
                    i++;
332
0
                }
333
0
            }
334
0
            WRITE_BYTES_LEN(p, i);
335
0
            break;
336
0
        }
337
338
0
        case 'p':
339
0
            sprintf(buffer, "%p", va_arg(vargs, void*));
340
0
            assert(strlen(buffer) < sizeof(buffer));
341
            /* %p is ill-defined:  ensure leading 0x. */
342
0
            if (buffer[1] == 'X')
343
0
                buffer[1] = 'x';
344
0
            else if (buffer[1] != 'x') {
345
0
                memmove(buffer+2, buffer, strlen(buffer)+1);
346
0
                buffer[0] = '0';
347
0
                buffer[1] = 'x';
348
0
            }
349
0
            WRITE_BYTES(buffer);
350
0
            break;
351
352
0
        case '%':
353
0
            *s++ = '%';
354
0
            break;
355
356
0
        default:
357
            /* invalid format string: copy unformatted string and exit */
358
0
            WRITE_BYTES(p);
359
0
            return s;
360
0
        }
361
0
    }
362
363
0
#undef WRITE_BYTES
364
0
#undef WRITE_BYTES_LEN
365
366
0
    return s;
367
368
0
 error:
369
0
    return NULL;
370
0
}
371
372
373
PyObject *
374
PyBytes_FromFormatV(const char *format, va_list vargs)
375
0
{
376
0
    Py_ssize_t alloc = strlen(format);
377
0
    PyBytesWriter *writer = PyBytesWriter_Create(alloc);
378
0
    if (writer == NULL) {
379
0
        return NULL;
380
0
    }
381
382
0
    char *s = bytes_fromformat(writer, 0, format, vargs);
383
0
    if (s == NULL) {
384
0
        PyBytesWriter_Discard(writer);
385
0
        return NULL;
386
0
    }
387
388
0
    return PyBytesWriter_FinishWithPointer(writer, s);
389
0
}
390
391
392
PyObject *
393
PyBytes_FromFormat(const char *format, ...)
394
0
{
395
0
    PyObject* ret;
396
0
    va_list vargs;
397
398
0
    va_start(vargs, format);
399
0
    ret = PyBytes_FromFormatV(format, vargs);
400
0
    va_end(vargs);
401
0
    return ret;
402
0
}
403
404
405
/* Helpers for formatstring */
406
407
0
#define FORMAT_ERROR(EXC, FMT, ...) do {                                    \
408
0
    if (key != NULL) {                                                      \
409
0
        PyErr_Format((EXC), "format argument %R: " FMT,                     \
410
0
                     key, __VA_ARGS__);                                     \
411
0
    }                                                                       \
412
0
    else if (argidx >= 0) {                                                 \
413
0
        PyErr_Format((EXC), "format argument %zd: " FMT,                    \
414
0
                     argidx, __VA_ARGS__);                                  \
415
0
    }                                                                       \
416
0
    else {                                                                  \
417
0
        PyErr_Format((EXC), "format argument: " FMT, __VA_ARGS__);          \
418
0
    }                                                                       \
419
0
} while (0)
420
421
Py_LOCAL_INLINE(PyObject *)
422
getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx, int allowone)
423
0
{
424
0
    Py_ssize_t argidx = *p_argidx;
425
0
    if (argidx < arglen) {
426
0
        (*p_argidx)++;
427
0
        if (arglen >= 0) {
428
0
            return PyTuple_GetItem(args, argidx);
429
0
        }
430
0
        else if (allowone) {
431
0
            return args;
432
0
        }
433
0
    }
434
0
    PyErr_Format(PyExc_TypeError,
435
0
                 "not enough arguments for format string (got %zd)",
436
0
                 arglen < 0 ? 1 : arglen);
437
0
    return NULL;
438
0
}
439
440
/* Returns a new reference to a PyBytes object, or NULL on failure. */
441
442
static char*
443
formatfloat(PyObject *v, Py_ssize_t argidx, PyObject *key,
444
            int flags, int prec, int type,
445
            PyObject **p_result, PyBytesWriter *writer, char *str)
446
0
{
447
0
    char *p;
448
0
    PyObject *result;
449
0
    double x;
450
0
    size_t len;
451
0
    int dtoa_flags = 0;
452
453
0
    x = PyFloat_AsDouble(v);
454
0
    if (x == -1.0 && PyErr_Occurred()) {
455
0
        if (PyErr_ExceptionMatches(PyExc_TypeError)) {
456
0
            FORMAT_ERROR(PyExc_TypeError,
457
0
                         "%%%c requires a real number, not %T",
458
0
                         type, v);
459
0
        }
460
0
        return NULL;
461
0
    }
462
463
0
    if (prec < 0)
464
0
        prec = 6;
465
466
0
    if (flags & F_ALT) {
467
0
        dtoa_flags |= Py_DTSF_ALT;
468
0
    }
469
0
    p = PyOS_double_to_string(x, type, prec, dtoa_flags, NULL);
470
471
0
    if (p == NULL)
472
0
        return NULL;
473
474
0
    len = strlen(p);
475
0
    if (writer != NULL) {
476
0
        str = PyBytesWriter_GrowAndUpdatePointer(writer, len, str);
477
0
        if (str == NULL) {
478
0
            PyMem_Free(p);
479
0
            return NULL;
480
0
        }
481
0
        memcpy(str, p, len);
482
0
        PyMem_Free(p);
483
0
        str += len;
484
0
        return str;
485
0
    }
486
487
0
    result = PyBytes_FromStringAndSize(p, len);
488
0
    PyMem_Free(p);
489
0
    *p_result = result;
490
0
    return result != NULL ? str : NULL;
491
0
}
492
493
static PyObject *
494
formatlong(PyObject *v, Py_ssize_t argidx, PyObject *key,
495
           int flags, int prec, int type)
496
0
{
497
0
    PyObject *result, *iobj;
498
0
    if (PyLong_Check(v))
499
0
        return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
500
0
    if (PyNumber_Check(v)) {
501
        /* make sure number is a type of integer for o, x, and X */
502
0
        if (type == 'o' || type == 'x' || type == 'X')
503
0
            iobj = _PyNumber_Index(v);
504
0
        else
505
0
            iobj = PyNumber_Long(v);
506
0
        if (iobj != NULL) {
507
0
            assert(PyLong_Check(iobj));
508
0
            result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
509
0
            Py_DECREF(iobj);
510
0
            return result;
511
0
        }
512
0
        if (!PyErr_ExceptionMatches(PyExc_TypeError))
513
0
            return NULL;
514
0
    }
515
0
    FORMAT_ERROR(PyExc_TypeError,
516
0
                 "%%%c requires %s, not %T",
517
0
                 type,
518
0
                 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
519
0
                                                             : "a real number",
520
0
                 v);
521
0
    return NULL;
522
0
}
523
524
static int
525
byte_converter(PyObject *arg, Py_ssize_t argidx, PyObject *key, char *p)
526
0
{
527
0
    if (PyBytes_Check(arg)) {
528
0
        if (PyBytes_GET_SIZE(arg) != 1) {
529
0
            FORMAT_ERROR(PyExc_TypeError,
530
0
                         "%%c requires an integer in range(256) or "
531
0
                         "a single byte, not a bytes object of length %zd",
532
0
                         PyBytes_GET_SIZE(arg));
533
0
            return 0;
534
0
        }
535
0
        *p = PyBytes_AS_STRING(arg)[0];
536
0
        return 1;
537
0
    }
538
0
    else if (PyByteArray_Check(arg)) {
539
0
        if (PyByteArray_GET_SIZE(arg) != 1) {
540
0
            FORMAT_ERROR(PyExc_TypeError,
541
0
                         "%%c requires an integer in range(256) or "
542
0
                         "a single byte, not a bytearray object of length %zd",
543
0
                         PyByteArray_GET_SIZE(arg));
544
0
            return 0;
545
0
        }
546
0
        *p = PyByteArray_AS_STRING(arg)[0];
547
0
        return 1;
548
0
    }
549
0
    else if (PyIndex_Check(arg)) {
550
0
        int overflow;
551
0
        long ival = PyLong_AsLongAndOverflow(arg, &overflow);
552
0
        if (ival == -1 && PyErr_Occurred()) {
553
0
            return 0;
554
0
        }
555
0
        if (!(0 <= ival && ival <= 255)) {
556
            /* this includes an overflow in converting to C long */
557
0
            FORMAT_ERROR(PyExc_OverflowError,
558
0
                         "%%c argument not in range(256)%s", "");
559
0
            return 0;
560
0
        }
561
0
        *p = (char)ival;
562
0
        return 1;
563
0
    }
564
0
    FORMAT_ERROR(PyExc_TypeError,
565
0
                 "%%c requires an integer in range(256) or "
566
0
                 "a single byte, not %T",
567
0
                 arg);
568
0
    return 0;
569
0
}
570
571
static PyObject *_PyBytes_FromBuffer(PyObject *x);
572
573
static PyObject *
574
format_obj(PyObject *v, Py_ssize_t argidx, PyObject *key,
575
           const char **pbuf, Py_ssize_t *plen)
576
0
{
577
0
    PyObject *func, *result;
578
    /* is it a bytes object? */
579
0
    if (PyBytes_Check(v)) {
580
0
        *pbuf = PyBytes_AS_STRING(v);
581
0
        *plen = PyBytes_GET_SIZE(v);
582
0
        return Py_NewRef(v);
583
0
    }
584
0
    if (PyByteArray_Check(v)) {
585
0
        *pbuf = PyByteArray_AS_STRING(v);
586
0
        *plen = PyByteArray_GET_SIZE(v);
587
0
        return Py_NewRef(v);
588
0
    }
589
    /* does it support __bytes__? */
590
0
    func = _PyObject_LookupSpecial(v, &_Py_ID(__bytes__));
591
0
    if (func != NULL) {
592
0
        result = _PyObject_CallNoArgs(func);
593
0
        Py_DECREF(func);
594
0
        if (result == NULL)
595
0
            return NULL;
596
0
        if (!PyBytes_Check(result)) {
597
0
            PyErr_Format(PyExc_TypeError,
598
0
                         "%T.__bytes__() must return a bytes, not %T",
599
0
                         v, result);
600
0
            Py_DECREF(result);
601
0
            return NULL;
602
0
        }
603
0
        *pbuf = PyBytes_AS_STRING(result);
604
0
        *plen = PyBytes_GET_SIZE(result);
605
0
        return result;
606
0
    }
607
    /* does it support buffer protocol? */
608
0
    if (PyObject_CheckBuffer(v)) {
609
        /* maybe we can avoid making a copy of the buffer object here? */
610
0
        result = _PyBytes_FromBuffer(v);
611
0
        if (result == NULL)
612
0
            return NULL;
613
0
        *pbuf = PyBytes_AS_STRING(result);
614
0
        *plen = PyBytes_GET_SIZE(result);
615
0
        return result;
616
0
    }
617
0
    FORMAT_ERROR(PyExc_TypeError,
618
0
                 "%%b requires a bytes-like object, "
619
0
                 "or an object that implements __bytes__, not %T",
620
0
                 v);
621
0
    return NULL;
622
0
}
623
624
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
625
626
PyObject *
627
_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
628
                  PyObject *args, int use_bytearray)
629
0
{
630
0
    const char *fmt;
631
0
    Py_ssize_t arglen, argidx;
632
0
    Py_ssize_t fmtcnt;
633
0
    int args_owned = 0;
634
0
    PyObject *dict = NULL;
635
0
    PyObject *key = NULL;
636
637
0
    if (args == NULL) {
638
0
        PyErr_BadInternalCall();
639
0
        return NULL;
640
0
    }
641
0
    fmt = format;
642
0
    fmtcnt = format_len;
643
644
0
    PyBytesWriter *writer;
645
0
    if (use_bytearray) {
646
0
        writer = _PyBytesWriter_CreateByteArray(fmtcnt);
647
0
    }
648
0
    else {
649
0
        writer = PyBytesWriter_Create(fmtcnt);
650
0
    }
651
0
    if (writer == NULL) {
652
0
        return NULL;
653
0
    }
654
0
    char *res = PyBytesWriter_GetData(writer);
655
656
0
    if (PyTuple_Check(args)) {
657
0
        arglen = PyTuple_GET_SIZE(args);
658
0
        argidx = 0;
659
0
    }
660
0
    else {
661
0
        arglen = -1;
662
0
        argidx = -2;
663
0
    }
664
0
    if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
665
0
        !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
666
0
        !PyByteArray_Check(args)) {
667
0
            dict = args;
668
0
    }
669
670
0
    while (--fmtcnt >= 0) {
671
0
        if (*fmt != '%') {
672
0
            Py_ssize_t len;
673
0
            char *pos;
674
675
0
            pos = (char *)memchr(fmt + 1, '%', fmtcnt);
676
0
            if (pos != NULL)
677
0
                len = pos - fmt;
678
0
            else
679
0
                len = fmtcnt + 1;
680
0
            assert(len != 0);
681
682
0
            memcpy(res, fmt, len);
683
0
            res += len;
684
0
            fmt += len;
685
0
            fmtcnt -= (len - 1);
686
0
        }
687
0
        else {
688
            /* Got a format specifier */
689
0
            int flags = 0;
690
0
            Py_ssize_t width = -1;
691
0
            int prec = -1;
692
0
            int c = '\0';
693
0
            int fill;
694
0
            PyObject *v = NULL;
695
0
            PyObject *temp = NULL;
696
0
            const char *pbuf = NULL;
697
0
            int sign;
698
0
            Py_ssize_t len = 0;
699
0
            char onechar; /* For byte_converter() */
700
0
            Py_ssize_t alloc;
701
702
0
            fmt++;
703
0
            if (*fmt == '%') {
704
0
                *res++ = '%';
705
0
                fmt++;
706
0
                fmtcnt--;
707
0
                continue;
708
0
            }
709
0
            Py_CLEAR(key);
710
0
            const char *fmtstart = fmt;
711
0
            if (*fmt == '(') {
712
0
                const char *keystart;
713
0
                Py_ssize_t keylen;
714
0
                int pcount = 1;
715
716
0
                if (dict == NULL) {
717
0
                    PyErr_Format(PyExc_TypeError,
718
0
                                 "format requires a mapping, not %T",
719
0
                                 args);
720
0
                    goto error;
721
0
                }
722
0
                ++fmt;
723
0
                --fmtcnt;
724
0
                keystart = fmt;
725
                /* Skip over balanced parentheses */
726
0
                while (pcount > 0 && --fmtcnt >= 0) {
727
0
                    if (*fmt == ')')
728
0
                        --pcount;
729
0
                    else if (*fmt == '(')
730
0
                        ++pcount;
731
0
                    fmt++;
732
0
                }
733
0
                keylen = fmt - keystart - 1;
734
0
                if (fmtcnt < 0 || pcount > 0) {
735
0
                    PyErr_Format(PyExc_ValueError,
736
0
                                 "stray %% or incomplete format key "
737
0
                                 "at position %zd",
738
0
                                 (Py_ssize_t)(fmtstart - format - 1));
739
0
                    goto error;
740
0
                }
741
0
                key = PyBytes_FromStringAndSize(keystart,
742
0
                                                 keylen);
743
0
                if (key == NULL)
744
0
                    goto error;
745
0
                if (args_owned) {
746
0
                    Py_DECREF(args);
747
0
                    args_owned = 0;
748
0
                }
749
0
                args = PyObject_GetItem(dict, key);
750
0
                if (args == NULL) {
751
0
                    goto error;
752
0
                }
753
0
                args_owned = 1;
754
0
                arglen = -3;
755
0
                argidx = -4;
756
0
            }
757
0
            else {
758
0
                if (arglen < -1) {
759
0
                    PyErr_Format(PyExc_ValueError,
760
0
                                 "format requires a parenthesised mapping key "
761
0
                                 "at position %zd",
762
0
                                 (Py_ssize_t)(fmtstart - format - 1));
763
0
                    goto error;
764
0
                }
765
0
            }
766
767
            /* Parse flags. Example: "%+i" => flags=F_SIGN. */
768
0
            while (--fmtcnt >= 0) {
769
0
                switch (c = *fmt++) {
770
0
                case '-': flags |= F_LJUST; continue;
771
0
                case '+': flags |= F_SIGN; continue;
772
0
                case ' ': flags |= F_BLANK; continue;
773
0
                case '#': flags |= F_ALT; continue;
774
0
                case '0': flags |= F_ZERO; continue;
775
0
                }
776
0
                break;
777
0
            }
778
779
            /* Parse width. Example: "%10s" => width=10 */
780
0
            if (c == '*') {
781
0
                if (arglen < -1) {
782
0
                    PyErr_Format(PyExc_ValueError,
783
0
                            "* cannot be used with a parenthesised mapping key "
784
0
                            "at position %zd",
785
0
                            (Py_ssize_t)(fmtstart - format - 1));
786
0
                    goto error;
787
0
                }
788
0
                v = getnextarg(args, arglen, &argidx, 0);
789
0
                if (v == NULL)
790
0
                    goto error;
791
0
                if (!PyLong_Check(v)) {
792
0
                    FORMAT_ERROR(PyExc_TypeError, "* requires int, not %T", v);
793
0
                    goto error;
794
0
                }
795
0
                width = PyLong_AsSsize_t(v);
796
0
                if (width == -1 && PyErr_Occurred()) {
797
0
                    if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
798
0
                        FORMAT_ERROR(PyExc_OverflowError,
799
0
                                     "too big for width%s", "");
800
0
                    }
801
0
                    goto error;
802
0
                }
803
0
                if (width < 0) {
804
0
                    flags |= F_LJUST;
805
0
                    width = -width;
806
0
                }
807
0
                if (--fmtcnt >= 0)
808
0
                    c = *fmt++;
809
0
            }
810
0
            else if (c >= 0 && Py_ISDIGIT(c)) {
811
0
                width = c - '0';
812
0
                while (--fmtcnt >= 0) {
813
0
                    c = Py_CHARMASK(*fmt++);
814
0
                    if (!Py_ISDIGIT(c))
815
0
                        break;
816
0
                    if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
817
0
                        PyErr_Format(PyExc_ValueError,
818
0
                                     "width too big at position %zd",
819
0
                                     (Py_ssize_t)(fmtstart - format - 1));
820
0
                        goto error;
821
0
                    }
822
0
                    width = width*10 + (c - '0');
823
0
                }
824
0
            }
825
826
            /* Parse precision. Example: "%.3f" => prec=3 */
827
0
            if (c == '.') {
828
0
                prec = 0;
829
0
                if (--fmtcnt >= 0)
830
0
                    c = *fmt++;
831
0
                if (c == '*') {
832
0
                    if (arglen < -1) {
833
0
                        PyErr_Format(PyExc_ValueError,
834
0
                                "* cannot be used with a parenthesised mapping key "
835
0
                                "at position %zd",
836
0
                                (Py_ssize_t)(fmtstart - format - 1));
837
0
                        goto error;
838
0
                    }
839
0
                    v = getnextarg(args, arglen, &argidx, 0);
840
0
                    if (v == NULL)
841
0
                        goto error;
842
0
                    if (!PyLong_Check(v)) {
843
0
                        FORMAT_ERROR(PyExc_TypeError,
844
0
                                     "* requires int, not %T", v);
845
0
                        goto error;
846
0
                    }
847
0
                    prec = PyLong_AsInt(v);
848
0
                    if (prec == -1 && PyErr_Occurred()) {
849
0
                        if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
850
0
                            FORMAT_ERROR(PyExc_OverflowError,
851
0
                                         "too big for precision%s", "");
852
0
                        }
853
0
                        goto error;
854
0
                    }
855
0
                    if (prec < 0)
856
0
                        prec = 0;
857
0
                    if (--fmtcnt >= 0)
858
0
                        c = *fmt++;
859
0
                }
860
0
                else if (c >= 0 && Py_ISDIGIT(c)) {
861
0
                    prec = c - '0';
862
0
                    while (--fmtcnt >= 0) {
863
0
                        c = Py_CHARMASK(*fmt++);
864
0
                        if (!Py_ISDIGIT(c))
865
0
                            break;
866
0
                        if (prec > (INT_MAX - ((int)c - '0')) / 10) {
867
0
                            PyErr_Format(PyExc_ValueError,
868
0
                                "precision too big at position %zd",
869
0
                                (Py_ssize_t)(fmtstart - format - 1));
870
0
                            goto error;
871
0
                        }
872
0
                        prec = prec*10 + (c - '0');
873
0
                    }
874
0
                }
875
0
            } /* prec */
876
0
            if (fmtcnt >= 0) {
877
0
                if (c == 'h' || c == 'l' || c == 'L') {
878
0
                    if (--fmtcnt >= 0)
879
0
                        c = *fmt++;
880
0
                }
881
0
            }
882
0
            if (fmtcnt < 0) {
883
0
                PyErr_Format(PyExc_ValueError,
884
0
                             "stray %% at position %zd",
885
0
                             (Py_ssize_t)(fmtstart - format - 1));
886
0
                goto error;
887
0
            }
888
0
            v = getnextarg(args, arglen, &argidx, 1);
889
0
            if (v == NULL)
890
0
                goto error;
891
892
0
            if (fmtcnt == 0) {
893
                /* last write: disable writer overallocation */
894
0
                writer->overallocate = 0;
895
0
            }
896
897
0
            sign = 0;
898
0
            fill = ' ';
899
0
            switch (c) {
900
0
            case 'r':
901
                // %r is only for 2/3 code; 3 only code should use %a
902
0
            case 'a':
903
0
                temp = PyObject_ASCII(v);
904
0
                if (temp == NULL)
905
0
                    goto error;
906
0
                assert(PyUnicode_IS_ASCII(temp));
907
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
908
0
                len = PyUnicode_GET_LENGTH(temp);
909
0
                if (prec >= 0 && len > prec)
910
0
                    len = prec;
911
0
                break;
912
913
0
            case 's':
914
                // %s is only for 2/3 code; 3 only code should use %b
915
0
            case 'b':
916
0
                temp = format_obj(v, argidx, key, &pbuf, &len);
917
0
                if (temp == NULL)
918
0
                    goto error;
919
0
                if (prec >= 0 && len > prec)
920
0
                    len = prec;
921
0
                break;
922
923
0
            case 'i':
924
0
            case 'd':
925
0
            case 'u':
926
0
            case 'o':
927
0
            case 'x':
928
0
            case 'X':
929
0
                if (PyLong_CheckExact(v)
930
0
                    && width == -1 && prec == -1
931
0
                    && !(flags & (F_SIGN | F_BLANK))
932
0
                    && c != 'X')
933
0
                {
934
                    /* Fast path */
935
0
                    int alternate = flags & F_ALT;
936
0
                    int base;
937
938
0
                    switch(c)
939
0
                    {
940
0
                        default:
941
0
                            Py_UNREACHABLE();
942
0
                        case 'd':
943
0
                        case 'i':
944
0
                        case 'u':
945
0
                            base = 10;
946
0
                            break;
947
0
                        case 'o':
948
0
                            base = 8;
949
0
                            break;
950
0
                        case 'x':
951
0
                        case 'X':
952
0
                            base = 16;
953
0
                            break;
954
0
                    }
955
956
                    /* Fast path */
957
0
                    res = _PyLong_FormatBytesWriter(writer, res,
958
0
                                                    v, base, alternate);
959
0
                    if (res == NULL)
960
0
                        goto error;
961
0
                    continue;
962
0
                }
963
964
0
                temp = formatlong(v, argidx, key, flags, prec, c);
965
0
                if (!temp)
966
0
                    goto error;
967
0
                assert(PyUnicode_IS_ASCII(temp));
968
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
969
0
                len = PyUnicode_GET_LENGTH(temp);
970
0
                sign = 1;
971
0
                if (flags & F_ZERO)
972
0
                    fill = '0';
973
0
                break;
974
975
0
            case 'e':
976
0
            case 'E':
977
0
            case 'f':
978
0
            case 'F':
979
0
            case 'g':
980
0
            case 'G':
981
0
                if (width == -1 && prec == -1
982
0
                    && !(flags & (F_SIGN | F_BLANK)))
983
0
                {
984
                    /* Fast path */
985
0
                    res = formatfloat(v, argidx, key, flags, prec, c, NULL, writer, res);
986
0
                    if (res == NULL)
987
0
                        goto error;
988
0
                    continue;
989
0
                }
990
991
0
                if (!formatfloat(v, argidx, key, flags, prec, c, &temp, NULL, res))
992
0
                    goto error;
993
0
                pbuf = PyBytes_AS_STRING(temp);
994
0
                len = PyBytes_GET_SIZE(temp);
995
0
                sign = 1;
996
0
                if (flags & F_ZERO)
997
0
                    fill = '0';
998
0
                break;
999
1000
0
            case 'c':
1001
0
                pbuf = &onechar;
1002
0
                len = byte_converter(v, argidx, key, &onechar);
1003
0
                if (!len)
1004
0
                    goto error;
1005
0
                if (width == -1) {
1006
                    /* Fast path */
1007
0
                    *res++ = onechar;
1008
0
                    continue;
1009
0
                }
1010
0
                break;
1011
1012
0
            default:
1013
0
                if (Py_ISALPHA(c)) {
1014
0
                    PyErr_Format(PyExc_ValueError,
1015
0
                                 "unsupported format %%%c at position %zd",
1016
0
                                 c, (Py_ssize_t)(fmtstart - format - 1));
1017
0
                }
1018
0
                else if (c == '\'') {
1019
0
                    PyErr_Format(PyExc_ValueError,
1020
0
                                 "stray %% at position %zd or unexpected "
1021
0
                                 "format character \"'\" "
1022
0
                                 "at position %zd",
1023
0
                                 (Py_ssize_t)(fmtstart - format - 1),
1024
0
                                 (Py_ssize_t)(fmt - format - 1));
1025
0
                }
1026
0
                else if (c >= 32 && c < 127 && c != '\'') {
1027
0
                    PyErr_Format(PyExc_ValueError,
1028
0
                                 "stray %% at position %zd or unexpected "
1029
0
                                 "format character '%c' "
1030
0
                                 "at position %zd",
1031
0
                                 (Py_ssize_t)(fmtstart - format - 1),
1032
0
                                 c, (Py_ssize_t)(fmt - format - 1));
1033
0
                }
1034
0
                else {
1035
0
                    PyErr_Format(PyExc_ValueError,
1036
0
                                 "stray %% at position %zd or unexpected "
1037
0
                                 "format character with code 0x%02x "
1038
0
                                 "at position %zd",
1039
0
                                 (Py_ssize_t)(fmtstart - format - 1),
1040
0
                                 Py_CHARMASK(c),
1041
0
                                 (Py_ssize_t)(fmt - format - 1));
1042
0
                }
1043
0
                goto error;
1044
0
            }
1045
1046
0
            if (sign) {
1047
0
                if (*pbuf == '-' || *pbuf == '+') {
1048
0
                    sign = *pbuf++;
1049
0
                    len--;
1050
0
                }
1051
0
                else if (flags & F_SIGN)
1052
0
                    sign = '+';
1053
0
                else if (flags & F_BLANK)
1054
0
                    sign = ' ';
1055
0
                else
1056
0
                    sign = 0;
1057
0
            }
1058
0
            if (width < len)
1059
0
                width = len;
1060
1061
0
            alloc = width;
1062
0
            if (sign != 0 && len == width)
1063
0
                alloc++;
1064
            /* 2: size preallocated for %s */
1065
0
            if (alloc > 2) {
1066
0
                res = PyBytesWriter_GrowAndUpdatePointer(writer, alloc - 2, res);
1067
0
                if (res == NULL) {
1068
0
                    Py_XDECREF(temp);
1069
0
                    goto error;
1070
0
                }
1071
0
            }
1072
#ifndef NDEBUG
1073
            char *before = res;
1074
#endif
1075
1076
            /* Write the sign if needed */
1077
0
            if (sign) {
1078
0
                if (fill != ' ')
1079
0
                    *res++ = sign;
1080
0
                if (width > len)
1081
0
                    width--;
1082
0
            }
1083
1084
            /* Write the numeric prefix for "x", "X" and "o" formats
1085
               if the alternate form is used.
1086
               For example, write "0x" for the "%#x" format. */
1087
0
            if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1088
0
                assert(pbuf[0] == '0');
1089
0
                assert(pbuf[1] == c);
1090
0
                if (fill != ' ') {
1091
0
                    *res++ = *pbuf++;
1092
0
                    *res++ = *pbuf++;
1093
0
                }
1094
0
                width -= 2;
1095
0
                if (width < 0)
1096
0
                    width = 0;
1097
0
                len -= 2;
1098
0
            }
1099
1100
            /* Pad left with the fill character if needed */
1101
0
            if (width > len && !(flags & F_LJUST)) {
1102
0
                memset(res, fill, width - len);
1103
0
                res += (width - len);
1104
0
                width = len;
1105
0
            }
1106
1107
            /* If padding with spaces: write sign if needed and/or numeric
1108
               prefix if the alternate form is used */
1109
0
            if (fill == ' ') {
1110
0
                if (sign)
1111
0
                    *res++ = sign;
1112
0
                if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1113
0
                    assert(pbuf[0] == '0');
1114
0
                    assert(pbuf[1] == c);
1115
0
                    *res++ = *pbuf++;
1116
0
                    *res++ = *pbuf++;
1117
0
                }
1118
0
            }
1119
1120
            /* Copy bytes */
1121
0
            memcpy(res, pbuf, len);
1122
0
            res += len;
1123
1124
            /* Pad right with the fill character if needed */
1125
0
            if (width > len) {
1126
0
                memset(res, ' ', width - len);
1127
0
                res += (width - len);
1128
0
            }
1129
1130
0
            if (dict && (argidx < arglen)) {
1131
                // XXX: Never happens?
1132
0
                PyErr_SetString(PyExc_TypeError,
1133
0
                           "not all arguments converted during bytes formatting");
1134
0
                Py_XDECREF(temp);
1135
0
                goto error;
1136
0
            }
1137
0
            Py_XDECREF(temp);
1138
1139
#ifndef NDEBUG
1140
            /* check that we computed the exact size for this write */
1141
            assert((res - before) == alloc);
1142
#endif
1143
0
        } /* '%' */
1144
1145
        /* If overallocation was disabled, ensure that it was the last
1146
           write. Otherwise, we missed an optimization */
1147
0
        assert(writer->overallocate || fmtcnt == 0 || use_bytearray);
1148
0
    } /* until end */
1149
1150
0
    if (argidx < arglen && !dict) {
1151
0
        PyErr_Format(PyExc_TypeError,
1152
0
                     "not all arguments converted during bytes formatting "
1153
0
                     "(required %zd, got %zd)",
1154
0
                     arglen < 0 ? 0 : argidx,
1155
0
                     arglen < 0 ? 1 : arglen);
1156
0
        goto error;
1157
0
    }
1158
1159
0
    Py_XDECREF(key);
1160
0
    if (args_owned) {
1161
0
        Py_DECREF(args);
1162
0
    }
1163
0
    return PyBytesWriter_FinishWithPointer(writer, res);
1164
1165
0
 error:
1166
0
    Py_XDECREF(key);
1167
0
    PyBytesWriter_Discard(writer);
1168
0
    if (args_owned) {
1169
0
        Py_DECREF(args);
1170
0
    }
1171
0
    return NULL;
1172
0
}
1173
1174
/* Unescape a backslash-escaped string. */
1175
PyObject *_PyBytes_DecodeEscape2(const char *s,
1176
                                Py_ssize_t len,
1177
                                const char *errors,
1178
                                int *first_invalid_escape_char,
1179
                                const char **first_invalid_escape_ptr)
1180
2.08k
{
1181
2.08k
    PyBytesWriter *writer = PyBytesWriter_Create(len);
1182
2.08k
    if (writer == NULL) {
1183
0
        return NULL;
1184
0
    }
1185
2.08k
    char *p = PyBytesWriter_GetData(writer);
1186
1187
2.08k
    *first_invalid_escape_char = -1;
1188
2.08k
    *first_invalid_escape_ptr = NULL;
1189
1190
2.08k
    const char *end = s + len;
1191
76.1k
    while (s < end) {
1192
74.0k
        if (*s != '\\') {
1193
61.5k
            *p++ = *s++;
1194
61.5k
            continue;
1195
61.5k
        }
1196
1197
12.4k
        s++;
1198
12.4k
        if (s == end) {
1199
0
            PyErr_SetString(PyExc_ValueError,
1200
0
                            "Trailing \\ in string");
1201
0
            goto failed;
1202
0
        }
1203
1204
12.4k
        switch (*s++) {
1205
        /* XXX This assumes ASCII! */
1206
621
        case '\n': break;
1207
1.52k
        case '\\': *p++ = '\\'; break;
1208
365
        case '\'': *p++ = '\''; break;
1209
312
        case '\"': *p++ = '\"'; break;
1210
223
        case 'b': *p++ = '\b'; break;
1211
322
        case 'f': *p++ = '\014'; break; /* FF */
1212
215
        case 't': *p++ = '\t'; break;
1213
400
        case 'n': *p++ = '\n'; break;
1214
449
        case 'r': *p++ = '\r'; break;
1215
589
        case 'v': *p++ = '\013'; break; /* VT */
1216
250
        case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1217
1.98k
        case '0': case '1': case '2': case '3':
1218
5.39k
        case '4': case '5': case '6': case '7':
1219
5.39k
        {
1220
5.39k
            int c = s[-1] - '0';
1221
5.39k
            if (s < end && '0' <= *s && *s <= '7') {
1222
1.95k
                c = (c<<3) + *s++ - '0';
1223
1.95k
                if (s < end && '0' <= *s && *s <= '7')
1224
627
                    c = (c<<3) + *s++ - '0';
1225
1.95k
            }
1226
5.39k
            if (c > 0377) {
1227
571
                if (*first_invalid_escape_char == -1) {
1228
145
                    *first_invalid_escape_char = c;
1229
                    /* Back up 3 chars, since we've already incremented s. */
1230
145
                    *first_invalid_escape_ptr = s - 3;
1231
145
                }
1232
571
            }
1233
5.39k
            *p++ = c;
1234
5.39k
            break;
1235
5.14k
        }
1236
294
        case 'x':
1237
294
            if (s+1 < end) {
1238
292
                int digit1, digit2;
1239
292
                digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1240
292
                digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1241
292
                if (digit1 < 16 && digit2 < 16) {
1242
289
                    *p++ = (unsigned char)((digit1 << 4) + digit2);
1243
289
                    s += 2;
1244
289
                    break;
1245
289
                }
1246
292
            }
1247
            /* invalid hexadecimal digits */
1248
1249
5
            if (!errors || strcmp(errors, "strict") == 0) {
1250
5
                PyErr_Format(PyExc_ValueError,
1251
5
                             "invalid \\x escape at position %zd",
1252
5
                             s - 2 - (end - len));
1253
5
                goto failed;
1254
5
            }
1255
0
            if (strcmp(errors, "replace") == 0) {
1256
0
                *p++ = '?';
1257
0
            } else if (strcmp(errors, "ignore") == 0)
1258
0
                /* do nothing */;
1259
0
            else {
1260
0
                PyErr_Format(PyExc_ValueError,
1261
0
                             "decoding error; unknown "
1262
0
                             "error handling code: %.400s",
1263
0
                             errors);
1264
0
                goto failed;
1265
0
            }
1266
            /* skip \x */
1267
0
            if (s < end && Py_ISXDIGIT(s[0]))
1268
0
                s++; /* and a hexdigit */
1269
0
            break;
1270
1271
1.53k
        default:
1272
1.53k
            if (*first_invalid_escape_char == -1) {
1273
533
                *first_invalid_escape_char = (unsigned char)s[-1];
1274
                /* Back up one char, since we've already incremented s. */
1275
533
                *first_invalid_escape_ptr = s - 1;
1276
533
            }
1277
1.53k
            *p++ = '\\';
1278
1.53k
            s--;
1279
12.4k
        }
1280
12.4k
    }
1281
1282
2.08k
    return PyBytesWriter_FinishWithPointer(writer, p);
1283
1284
5
  failed:
1285
5
    PyBytesWriter_Discard(writer);
1286
5
    return NULL;
1287
2.08k
}
1288
1289
PyObject *PyBytes_DecodeEscape(const char *s,
1290
                                Py_ssize_t len,
1291
                                const char *errors,
1292
                                Py_ssize_t Py_UNUSED(unicode),
1293
                                const char *Py_UNUSED(recode_encoding))
1294
0
{
1295
0
    int first_invalid_escape_char;
1296
0
    const char *first_invalid_escape_ptr;
1297
0
    PyObject *result = _PyBytes_DecodeEscape2(s, len, errors,
1298
0
                                             &first_invalid_escape_char,
1299
0
                                             &first_invalid_escape_ptr);
1300
0
    if (result == NULL)
1301
0
        return NULL;
1302
0
    if (first_invalid_escape_char != -1) {
1303
0
        if (first_invalid_escape_char > 0xff) {
1304
0
            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1305
0
                                 "b\"\\%o\" is an invalid octal escape sequence. "
1306
0
                                 "Such sequences will not work in the future. ",
1307
0
                                 first_invalid_escape_char) < 0)
1308
0
            {
1309
0
                Py_DECREF(result);
1310
0
                return NULL;
1311
0
            }
1312
0
        }
1313
0
        else {
1314
0
            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1315
0
                                 "b\"\\%c\" is an invalid escape sequence. "
1316
0
                                 "Such sequences will not work in the future. ",
1317
0
                                 first_invalid_escape_char) < 0)
1318
0
            {
1319
0
                Py_DECREF(result);
1320
0
                return NULL;
1321
0
            }
1322
0
        }
1323
0
    }
1324
0
    return result;
1325
0
}
1326
/* -------------------------------------------------------------------- */
1327
/* object api */
1328
1329
Py_ssize_t
1330
PyBytes_Size(PyObject *op)
1331
5.34k
{
1332
5.34k
    if (!PyBytes_Check(op)) {
1333
0
        PyErr_Format(PyExc_TypeError,
1334
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1335
0
        return -1;
1336
0
    }
1337
5.34k
    return Py_SIZE(op);
1338
5.34k
}
1339
1340
char *
1341
PyBytes_AsString(PyObject *op)
1342
12.6M
{
1343
12.6M
    if (!PyBytes_Check(op)) {
1344
0
        PyErr_Format(PyExc_TypeError,
1345
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1346
0
        return NULL;
1347
0
    }
1348
12.6M
    return ((PyBytesObject *)op)->ob_sval;
1349
12.6M
}
1350
1351
int
1352
PyBytes_AsStringAndSize(PyObject *obj,
1353
                         char **s,
1354
                         Py_ssize_t *len)
1355
68.2k
{
1356
68.2k
    if (s == NULL) {
1357
0
        PyErr_BadInternalCall();
1358
0
        return -1;
1359
0
    }
1360
1361
68.2k
    if (!PyBytes_Check(obj)) {
1362
0
        PyErr_Format(PyExc_TypeError,
1363
0
             "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1364
0
        return -1;
1365
0
    }
1366
1367
68.2k
    *s = PyBytes_AS_STRING(obj);
1368
68.2k
    if (len != NULL)
1369
68.2k
        *len = PyBytes_GET_SIZE(obj);
1370
0
    else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1371
0
        PyErr_SetString(PyExc_ValueError,
1372
0
                        "embedded null byte");
1373
0
        return -1;
1374
0
    }
1375
68.2k
    return 0;
1376
68.2k
}
1377
1378
/* -------------------------------------------------------------------- */
1379
/* Methods */
1380
1381
4.75k
#define STRINGLIB_GET_EMPTY() bytes_get_empty()
1382
1383
#include "stringlib/stringdefs.h"
1384
#define STRINGLIB_MUTABLE 0
1385
1386
#include "stringlib/fastsearch.h"
1387
#include "stringlib/count.h"
1388
#include "stringlib/find.h"
1389
#include "stringlib/join.h"
1390
#include "stringlib/partition.h"
1391
#include "stringlib/split.h"
1392
#include "stringlib/ctype.h"
1393
1394
#include "stringlib/transmogrify.h"
1395
1396
#undef STRINGLIB_GET_EMPTY
1397
1398
Py_ssize_t
1399
_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
1400
              const char *needle, Py_ssize_t len_needle,
1401
              Py_ssize_t offset)
1402
0
{
1403
0
    assert(len_haystack >= 0);
1404
0
    assert(len_needle >= 0);
1405
    // Extra checks because stringlib_find accesses haystack[len_haystack].
1406
0
    if (len_needle == 0) {
1407
0
        return offset;
1408
0
    }
1409
0
    if (len_needle > len_haystack) {
1410
0
        return -1;
1411
0
    }
1412
0
    assert(len_haystack >= 1);
1413
0
    Py_ssize_t res = stringlib_find(haystack, len_haystack - 1,
1414
0
                                    needle, len_needle, offset);
1415
0
    if (res == -1) {
1416
0
        Py_ssize_t last_align = len_haystack - len_needle;
1417
0
        if (memcmp(haystack + last_align, needle, len_needle) == 0) {
1418
0
            return offset + last_align;
1419
0
        }
1420
0
    }
1421
0
    return res;
1422
0
}
1423
1424
Py_ssize_t
1425
_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
1426
                     const char *needle, Py_ssize_t len_needle,
1427
                     Py_ssize_t offset)
1428
0
{
1429
0
    return stringlib_rfind(haystack, len_haystack,
1430
0
                           needle, len_needle, offset);
1431
0
}
1432
1433
PyObject *
1434
PyBytes_Repr(PyObject *obj, int smartquotes)
1435
2.35k
{
1436
2.35k
    return _Py_bytes_repr(PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
1437
2.35k
                          smartquotes, "bytes");
1438
2.35k
}
1439
1440
PyObject *
1441
_Py_bytes_repr(const char *data, Py_ssize_t length, int smartquotes,
1442
               const char *classname)
1443
2.35k
{
1444
2.35k
    Py_ssize_t i;
1445
2.35k
    Py_ssize_t newsize, squotes, dquotes;
1446
2.35k
    PyObject *v;
1447
2.35k
    unsigned char quote;
1448
2.35k
    Py_UCS1 *p;
1449
1450
    /* Compute size of output string */
1451
2.35k
    squotes = dquotes = 0;
1452
2.35k
    newsize = 3; /* b'' */
1453
3.35M
    for (i = 0; i < length; i++) {
1454
3.35M
        unsigned char c = data[i];
1455
3.35M
        Py_ssize_t incr = 1;
1456
3.35M
        switch(c) {
1457
4.88k
        case '\'': squotes++; break;
1458
9.09k
        case '"':  dquotes++; break;
1459
34.6k
        case '\\': case '\t': case '\n': case '\r':
1460
34.6k
            incr = 2; break; /* \C */
1461
3.30M
        default:
1462
3.30M
            if (c < ' ' || c >= 0x7f)
1463
2.52M
                incr = 4; /* \xHH */
1464
3.35M
        }
1465
3.35M
        if (newsize > PY_SSIZE_T_MAX - incr)
1466
0
            goto overflow;
1467
3.35M
        newsize += incr;
1468
3.35M
    }
1469
2.35k
    quote = '\'';
1470
2.35k
    if (smartquotes && squotes && !dquotes)
1471
113
        quote = '"';
1472
2.35k
    if (squotes && quote == '\'') {
1473
224
        if (newsize > PY_SSIZE_T_MAX - squotes)
1474
0
            goto overflow;
1475
224
        newsize += squotes;
1476
224
    }
1477
1478
2.35k
    v = PyUnicode_New(newsize, 127);
1479
2.35k
    if (v == NULL) {
1480
0
        return NULL;
1481
0
    }
1482
2.35k
    p = PyUnicode_1BYTE_DATA(v);
1483
1484
2.35k
    *p++ = 'b', *p++ = quote;
1485
3.35M
    for (i = 0; i < length; i++) {
1486
3.35M
        unsigned char c = data[i];
1487
3.35M
        if (c == quote || c == '\\')
1488
5.27k
            *p++ = '\\', *p++ = c;
1489
3.35M
        else if (c == '\t')
1490
19.2k
            *p++ = '\\', *p++ = 't';
1491
3.33M
        else if (c == '\n')
1492
6.17k
            *p++ = '\\', *p++ = 'n';
1493
3.32M
        else if (c == '\r')
1494
6.95k
            *p++ = '\\', *p++ = 'r';
1495
3.31M
        else if (c < ' ' || c >= 0x7f) {
1496
2.52M
            *p++ = '\\';
1497
2.52M
            *p++ = 'x';
1498
2.52M
            *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1499
2.52M
            *p++ = Py_hexdigits[c & 0xf];
1500
2.52M
        }
1501
795k
        else
1502
795k
            *p++ = c;
1503
3.35M
    }
1504
2.35k
    *p++ = quote;
1505
2.35k
    assert(_PyUnicode_CheckConsistency(v, 1));
1506
2.35k
    return v;
1507
1508
0
  overflow:
1509
0
    PyErr_Format(PyExc_OverflowError,
1510
0
                 "%s object is too large to make repr", classname);
1511
0
    return NULL;
1512
2.35k
}
1513
1514
static PyObject *
1515
bytes_repr(PyObject *op)
1516
2.35k
{
1517
2.35k
    return PyBytes_Repr(op, 1);
1518
2.35k
}
1519
1520
static PyObject *
1521
bytes_str(PyObject *op)
1522
0
{
1523
0
    if (_Py_GetConfig()->bytes_warning) {
1524
0
        if (PyErr_WarnEx(PyExc_BytesWarning,
1525
0
                         "str() on a bytes instance", 1)) {
1526
0
            return NULL;
1527
0
        }
1528
0
    }
1529
0
    return bytes_repr(op);
1530
0
}
1531
1532
static Py_ssize_t
1533
bytes_length(PyObject *self)
1534
36.1M
{
1535
36.1M
    PyBytesObject *a = _PyBytes_CAST(self);
1536
36.1M
    return Py_SIZE(a);
1537
36.1M
}
1538
1539
/* This is also used by PyBytes_Concat() */
1540
static PyObject *
1541
bytes_concat(PyObject *a, PyObject *b)
1542
455k
{
1543
455k
    Py_buffer va, vb;
1544
455k
    PyObject *result = NULL;
1545
1546
455k
    va.len = -1;
1547
455k
    vb.len = -1;
1548
455k
    if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1549
455k
        PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1550
0
        PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1551
0
                     Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1552
0
        goto done;
1553
0
    }
1554
1555
    /* Optimize end cases */
1556
455k
    if (va.len == 0 && PyBytes_CheckExact(b)) {
1557
145k
        result = Py_NewRef(b);
1558
145k
        goto done;
1559
145k
    }
1560
310k
    if (vb.len == 0 && PyBytes_CheckExact(a)) {
1561
65.1k
        result = Py_NewRef(a);
1562
65.1k
        goto done;
1563
65.1k
    }
1564
1565
245k
    if (va.len > PY_SSIZE_T_MAX - vb.len) {
1566
0
        PyErr_NoMemory();
1567
0
        goto done;
1568
0
    }
1569
1570
245k
    result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1571
245k
    if (result != NULL) {
1572
245k
        memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1573
245k
        memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1574
245k
    }
1575
1576
455k
  done:
1577
455k
    if (va.len != -1)
1578
455k
        PyBuffer_Release(&va);
1579
455k
    if (vb.len != -1)
1580
455k
        PyBuffer_Release(&vb);
1581
455k
    return result;
1582
245k
}
1583
1584
static PyObject *
1585
bytes_repeat(PyObject *self, Py_ssize_t n)
1586
192k
{
1587
192k
    PyBytesObject *a = _PyBytes_CAST(self);
1588
192k
    if (n < 0)
1589
0
        n = 0;
1590
    /* watch out for overflows:  the size can overflow int,
1591
     * and the # of bytes needed can overflow size_t
1592
     */
1593
192k
    if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1594
0
        PyErr_SetString(PyExc_OverflowError,
1595
0
            "repeated bytes are too long");
1596
0
        return NULL;
1597
0
    }
1598
192k
    Py_ssize_t size = Py_SIZE(a) * n;
1599
192k
    if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1600
1
        return Py_NewRef(a);
1601
1
    }
1602
192k
    size_t nbytes = (size_t)size;
1603
192k
    if (nbytes + PyBytesObject_SIZE <= nbytes) {
1604
0
        PyErr_SetString(PyExc_OverflowError,
1605
0
            "repeated bytes are too long");
1606
0
        return NULL;
1607
0
    }
1608
192k
    PyBytesObject *op = PyObject_Malloc(PyBytesObject_SIZE + nbytes);
1609
192k
    if (op == NULL) {
1610
0
        return PyErr_NoMemory();
1611
0
    }
1612
192k
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
1613
192k
    set_ob_shash(op, -1);
1614
192k
    op->ob_sval[size] = '\0';
1615
1616
192k
    _PyBytes_Repeat(op->ob_sval, size, a->ob_sval, Py_SIZE(a));
1617
1618
192k
    return (PyObject *) op;
1619
192k
}
1620
1621
static int
1622
bytes_contains(PyObject *self, PyObject *arg)
1623
1.18k
{
1624
1.18k
    return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1625
1.18k
}
1626
1627
static PyObject *
1628
bytes_item(PyObject *self, Py_ssize_t i)
1629
0
{
1630
0
    PyBytesObject *a = _PyBytes_CAST(self);
1631
0
    if (i < 0 || i >= Py_SIZE(a)) {
1632
0
        PyErr_SetString(PyExc_IndexError, "index out of range");
1633
0
        return NULL;
1634
0
    }
1635
0
    return _PyLong_FromUnsignedChar((unsigned char)a->ob_sval[i]);
1636
0
}
1637
1638
static int
1639
bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1640
85.6M
{
1641
85.6M
    int cmp;
1642
85.6M
    Py_ssize_t len;
1643
1644
85.6M
    len = Py_SIZE(a);
1645
85.6M
    if (Py_SIZE(b) != len)
1646
955k
        return 0;
1647
1648
84.6M
    if (a->ob_sval[0] != b->ob_sval[0])
1649
9.96M
        return 0;
1650
1651
74.6M
    cmp = memcmp(a->ob_sval, b->ob_sval, len);
1652
74.6M
    return (cmp == 0);
1653
84.6M
}
1654
1655
static PyObject*
1656
bytes_richcompare(PyObject *aa, PyObject *bb, int op)
1657
86.1M
{
1658
    /* Make sure both arguments are strings. */
1659
86.1M
    if (!(PyBytes_Check(aa) && PyBytes_Check(bb))) {
1660
0
        if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1661
0
            if (PyUnicode_Check(aa) || PyUnicode_Check(bb)) {
1662
0
                if (PyErr_WarnEx(PyExc_BytesWarning,
1663
0
                                 "Comparison between bytes and string", 1))
1664
0
                    return NULL;
1665
0
            }
1666
0
            if (PyLong_Check(aa) || PyLong_Check(bb)) {
1667
0
                if (PyErr_WarnEx(PyExc_BytesWarning,
1668
0
                                 "Comparison between bytes and int", 1))
1669
0
                    return NULL;
1670
0
            }
1671
0
        }
1672
0
        Py_RETURN_NOTIMPLEMENTED;
1673
0
    }
1674
1675
86.1M
    PyBytesObject *a = _PyBytes_CAST(aa);
1676
86.1M
    PyBytesObject *b = _PyBytes_CAST(bb);
1677
86.1M
    if (a == b) {
1678
535k
        switch (op) {
1679
4.07k
        case Py_EQ:
1680
4.07k
        case Py_LE:
1681
4.07k
        case Py_GE:
1682
            /* a byte string is equal to itself */
1683
4.07k
            Py_RETURN_TRUE;
1684
531k
        case Py_NE:
1685
531k
        case Py_LT:
1686
531k
        case Py_GT:
1687
531k
            Py_RETURN_FALSE;
1688
0
        default:
1689
0
            PyErr_BadArgument();
1690
0
            return NULL;
1691
535k
        }
1692
535k
    }
1693
85.6M
    else if (op == Py_EQ || op == Py_NE) {
1694
85.6M
        int eq = bytes_compare_eq(a, b);
1695
85.6M
        eq ^= (op == Py_NE);
1696
85.6M
        return PyBool_FromLong(eq);
1697
85.6M
    }
1698
144
    else {
1699
144
        Py_ssize_t len_a = Py_SIZE(a);
1700
144
        Py_ssize_t len_b = Py_SIZE(b);
1701
144
        Py_ssize_t min_len = Py_MIN(len_a, len_b);
1702
144
        int c;
1703
144
        if (min_len > 0) {
1704
144
            c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1705
144
            if (c == 0)
1706
144
                c = memcmp(a->ob_sval, b->ob_sval, min_len);
1707
144
        }
1708
0
        else {
1709
0
            c = 0;
1710
0
        }
1711
144
        if (c != 0) {
1712
144
            Py_RETURN_RICHCOMPARE(c, 0, op);
1713
144
        }
1714
0
        Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1715
0
    }
1716
86.1M
}
1717
1718
static Py_hash_t
1719
bytes_hash(PyObject *self)
1720
80.5M
{
1721
80.5M
    PyBytesObject *a = _PyBytes_CAST(self);
1722
80.5M
    Py_hash_t hash = get_ob_shash(a);
1723
80.5M
    if (hash == -1) {
1724
        /* Can't fail */
1725
50.5M
        hash = Py_HashBuffer(a->ob_sval, Py_SIZE(a));
1726
50.5M
        set_ob_shash(a, hash);
1727
50.5M
    }
1728
80.5M
    return hash;
1729
80.5M
}
1730
1731
static PyObject*
1732
bytes_subscript(PyObject *op, PyObject* item)
1733
93.6M
{
1734
93.6M
    PyBytesObject *self = _PyBytes_CAST(op);
1735
93.6M
    if (_PyIndex_Check(item)) {
1736
19.5M
        Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1737
19.5M
        if (i == -1 && PyErr_Occurred())
1738
0
            return NULL;
1739
19.5M
        if (i < 0)
1740
0
            i += PyBytes_GET_SIZE(self);
1741
19.5M
        if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1742
29
            PyErr_SetString(PyExc_IndexError,
1743
29
                            "index out of range");
1744
29
            return NULL;
1745
29
        }
1746
19.5M
        return _PyLong_FromUnsignedChar((unsigned char)self->ob_sval[i]);
1747
19.5M
    }
1748
74.0M
    else if (PySlice_Check(item)) {
1749
74.0M
        Py_ssize_t start, stop, step, slicelength, i;
1750
74.0M
        size_t cur;
1751
74.0M
        const char* source_buf;
1752
74.0M
        char* result_buf;
1753
74.0M
        PyObject* result;
1754
1755
74.0M
        if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1756
0
            return NULL;
1757
0
        }
1758
74.0M
        slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1759
74.0M
                                            &stop, step);
1760
1761
74.0M
        if (slicelength <= 0) {
1762
6.38M
            return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
1763
6.38M
        }
1764
67.6M
        else if (start == 0 && step == 1 &&
1765
11.3M
                 slicelength == PyBytes_GET_SIZE(self) &&
1766
199k
                 PyBytes_CheckExact(self)) {
1767
199k
            return Py_NewRef(self);
1768
199k
        }
1769
67.4M
        else if (step == 1) {
1770
67.4M
            return PyBytes_FromStringAndSize(
1771
67.4M
                PyBytes_AS_STRING(self) + start,
1772
67.4M
                slicelength);
1773
67.4M
        }
1774
0
        else {
1775
0
            source_buf = PyBytes_AS_STRING(self);
1776
0
            result = PyBytes_FromStringAndSize(NULL, slicelength);
1777
0
            if (result == NULL)
1778
0
                return NULL;
1779
1780
0
            result_buf = PyBytes_AS_STRING(result);
1781
0
            for (cur = start, i = 0; i < slicelength;
1782
0
                 cur += step, i++) {
1783
0
                result_buf[i] = source_buf[cur];
1784
0
            }
1785
1786
0
            return result;
1787
0
        }
1788
74.0M
    }
1789
0
    else {
1790
0
        PyErr_Format(PyExc_TypeError,
1791
0
                     "byte indices must be integers or slices, not %.200s",
1792
0
                     Py_TYPE(item)->tp_name);
1793
0
        return NULL;
1794
0
    }
1795
93.6M
}
1796
1797
static int
1798
bytes_buffer_getbuffer(PyObject *op, Py_buffer *view, int flags)
1799
79.4M
{
1800
79.4M
    PyBytesObject *self = _PyBytes_CAST(op);
1801
79.4M
    return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1802
79.4M
                             1, flags);
1803
79.4M
}
1804
1805
static PySequenceMethods bytes_as_sequence = {
1806
    bytes_length,       /*sq_length*/
1807
    bytes_concat,       /*sq_concat*/
1808
    bytes_repeat,       /*sq_repeat*/
1809
    bytes_item,         /*sq_item*/
1810
    0,                  /*sq_slice*/
1811
    0,                  /*sq_ass_item*/
1812
    0,                  /*sq_ass_slice*/
1813
    bytes_contains      /*sq_contains*/
1814
};
1815
1816
static PyMappingMethods bytes_as_mapping = {
1817
    bytes_length,
1818
    bytes_subscript,
1819
    0,
1820
};
1821
1822
static PyBufferProcs bytes_as_buffer = {
1823
    bytes_buffer_getbuffer,
1824
    NULL,
1825
};
1826
1827
1828
/*[clinic input]
1829
bytes.__bytes__
1830
Convert this value to exact type bytes.
1831
[clinic start generated code]*/
1832
1833
static PyObject *
1834
bytes___bytes___impl(PyBytesObject *self)
1835
/*[clinic end generated code: output=63a306a9bc0caac5 input=34ec5ddba98bd6bb]*/
1836
51.6k
{
1837
51.6k
    if (PyBytes_CheckExact(self)) {
1838
51.6k
        return Py_NewRef(self);
1839
51.6k
    }
1840
0
    else {
1841
0
        return PyBytes_FromStringAndSize(self->ob_sval, Py_SIZE(self));
1842
0
    }
1843
51.6k
}
1844
1845
1846
294
#define LEFTSTRIP 0
1847
588
#define RIGHTSTRIP 1
1848
0
#define BOTHSTRIP 2
1849
1850
/*[clinic input]
1851
bytes.split
1852
1853
    sep: object = None
1854
        The delimiter according which to split the bytes.
1855
        None (the default value) means split on ASCII whitespace characters
1856
        (space, tab, return, newline, formfeed, vertical tab).
1857
    maxsplit: Py_ssize_t = -1
1858
        Maximum number of splits to do.
1859
        -1 (the default value) means no limit.
1860
1861
Return a list of the sections in the bytes, using sep as the delimiter.
1862
[clinic start generated code]*/
1863
1864
static PyObject *
1865
bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1866
/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1867
2.95M
{
1868
2.95M
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1869
2.95M
    const char *s = PyBytes_AS_STRING(self), *sub;
1870
2.95M
    Py_buffer vsub;
1871
2.95M
    PyObject *list;
1872
1873
2.95M
    if (maxsplit < 0)
1874
2.95M
        maxsplit = PY_SSIZE_T_MAX;
1875
2.95M
    if (sep == Py_None)
1876
0
        return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1877
2.95M
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1878
0
        return NULL;
1879
2.95M
    sub = vsub.buf;
1880
2.95M
    n = vsub.len;
1881
1882
2.95M
    list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1883
2.95M
    PyBuffer_Release(&vsub);
1884
2.95M
    return list;
1885
2.95M
}
1886
1887
/*[clinic input]
1888
@permit_long_docstring_body
1889
bytes.partition
1890
1891
    sep: Py_buffer
1892
    /
1893
1894
Partition the bytes into three parts using the given separator.
1895
1896
This will search for the separator sep in the bytes. If the separator is found,
1897
returns a 3-tuple containing the part before the separator, the separator
1898
itself, and the part after it.
1899
1900
If the separator is not found, returns a 3-tuple containing the original bytes
1901
object and two empty bytes objects.
1902
[clinic start generated code]*/
1903
1904
static PyObject *
1905
bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1906
/*[clinic end generated code: output=f532b392a17ff695 input=31c55a0cebaf7722]*/
1907
515k
{
1908
515k
    return stringlib_partition(
1909
515k
        (PyObject*) self,
1910
515k
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1911
515k
        sep->obj, (const char *)sep->buf, sep->len
1912
515k
        );
1913
515k
}
1914
1915
/*[clinic input]
1916
@permit_long_docstring_body
1917
bytes.rpartition
1918
1919
    sep: Py_buffer
1920
    /
1921
1922
Partition the bytes into three parts using the given separator.
1923
1924
This will search for the separator sep in the bytes, starting at the end. If
1925
the separator is found, returns a 3-tuple containing the part before the
1926
separator, the separator itself, and the part after it.
1927
1928
If the separator is not found, returns a 3-tuple containing two empty bytes
1929
objects and the original bytes object.
1930
[clinic start generated code]*/
1931
1932
static PyObject *
1933
bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1934
/*[clinic end generated code: output=191b114cbb028e50 input=9ea5a3ab0b02bf52]*/
1935
0
{
1936
0
    return stringlib_rpartition(
1937
0
        (PyObject*) self,
1938
0
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1939
0
        sep->obj, (const char *)sep->buf, sep->len
1940
0
        );
1941
0
}
1942
1943
/*[clinic input]
1944
@permit_long_docstring_body
1945
bytes.rsplit = bytes.split
1946
1947
Return a list of the sections in the bytes, using sep as the delimiter.
1948
1949
Splitting is done starting at the end of the bytes and working to the front.
1950
[clinic start generated code]*/
1951
1952
static PyObject *
1953
bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1954
/*[clinic end generated code: output=ba698d9ea01e1c8f input=55b6eaea1f3d7046]*/
1955
0
{
1956
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1957
0
    const char *s = PyBytes_AS_STRING(self), *sub;
1958
0
    Py_buffer vsub;
1959
0
    PyObject *list;
1960
1961
0
    if (maxsplit < 0)
1962
0
        maxsplit = PY_SSIZE_T_MAX;
1963
0
    if (sep == Py_None)
1964
0
        return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1965
0
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1966
0
        return NULL;
1967
0
    sub = vsub.buf;
1968
0
    n = vsub.len;
1969
1970
0
    list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1971
0
    PyBuffer_Release(&vsub);
1972
0
    return list;
1973
0
}
1974
1975
1976
/*[clinic input]
1977
bytes.join
1978
1979
    iterable_of_bytes: object
1980
    /
1981
1982
Concatenate any number of bytes objects.
1983
1984
The bytes whose method is called is inserted in between each pair.
1985
1986
The result is returned as a new bytes object.
1987
1988
Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1989
[clinic start generated code]*/
1990
1991
static PyObject *
1992
bytes_join_impl(PyBytesObject *self, PyObject *iterable_of_bytes)
1993
/*[clinic end generated code: output=0687abb94d7d438e input=7fe377b95bd549d2]*/
1994
271k
{
1995
271k
    return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1996
271k
}
1997
1998
PyObject *
1999
PyBytes_Join(PyObject *sep, PyObject *iterable)
2000
32.1k
{
2001
32.1k
    if (sep == NULL) {
2002
0
        PyErr_BadInternalCall();
2003
0
        return NULL;
2004
0
    }
2005
32.1k
    if (!PyBytes_Check(sep)) {
2006
0
        PyErr_Format(PyExc_TypeError,
2007
0
                     "sep: expected bytes, got %T", sep);
2008
0
        return NULL;
2009
0
    }
2010
2011
32.1k
    return stringlib_bytes_join(sep, iterable);
2012
32.1k
}
2013
2014
/*[clinic input]
2015
@permit_long_summary
2016
@text_signature "($self, sub[, start[, end]], /)"
2017
bytes.find
2018
2019
    sub: object
2020
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2021
         Optional start position. Default: start of the bytes.
2022
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2023
         Optional stop position. Default: end of the bytes.
2024
    /
2025
2026
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2027
2028
Return -1 on failure.
2029
[clinic start generated code]*/
2030
2031
static PyObject *
2032
bytes_find_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2033
                Py_ssize_t end)
2034
/*[clinic end generated code: output=d5961a1c77b472a1 input=47d0929adafc6b0b]*/
2035
16.8M
{
2036
16.8M
    return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2037
16.8M
                          sub, start, end);
2038
16.8M
}
2039
2040
/*[clinic input]
2041
@permit_long_summary
2042
bytes.index = bytes.find
2043
2044
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2045
2046
Raise ValueError if the subsection is not found.
2047
[clinic start generated code]*/
2048
2049
static PyObject *
2050
bytes_index_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2051
                 Py_ssize_t end)
2052
/*[clinic end generated code: output=0da25cc74683ba42 input=1cb45ce71456a269]*/
2053
0
{
2054
0
    return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2055
0
                           sub, start, end);
2056
0
}
2057
2058
/*[clinic input]
2059
@permit_long_summary
2060
bytes.rfind = bytes.find
2061
2062
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2063
2064
Return -1 on failure.
2065
[clinic start generated code]*/
2066
2067
static PyObject *
2068
bytes_rfind_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2069
                 Py_ssize_t end)
2070
/*[clinic end generated code: output=51b60fa4ad011c09 input=c9473d714251f1ab]*/
2071
14.2k
{
2072
14.2k
    return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2073
14.2k
                           sub, start, end);
2074
14.2k
}
2075
2076
/*[clinic input]
2077
@permit_long_summary
2078
bytes.rindex = bytes.find
2079
2080
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
2081
2082
Raise ValueError if the subsection is not found.
2083
[clinic start generated code]*/
2084
2085
static PyObject *
2086
bytes_rindex_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2087
                  Py_ssize_t end)
2088
/*[clinic end generated code: output=42bf674e0a0aabf6 input=bb5f473c64610c43]*/
2089
0
{
2090
0
    return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2091
0
                            sub, start, end);
2092
0
}
2093
2094
2095
Py_LOCAL_INLINE(PyObject *)
2096
do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
2097
294
{
2098
294
    Py_buffer vsep;
2099
294
    const char *s = PyBytes_AS_STRING(self);
2100
294
    Py_ssize_t len = PyBytes_GET_SIZE(self);
2101
294
    char *sep;
2102
294
    Py_ssize_t seplen;
2103
294
    Py_ssize_t i, j;
2104
2105
294
    if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
2106
0
        return NULL;
2107
294
    sep = vsep.buf;
2108
294
    seplen = vsep.len;
2109
2110
294
    i = 0;
2111
294
    if (striptype != RIGHTSTRIP) {
2112
0
        while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2113
0
            i++;
2114
0
        }
2115
0
    }
2116
2117
294
    j = len;
2118
294
    if (striptype != LEFTSTRIP) {
2119
588
        do {
2120
588
            j--;
2121
588
        } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2122
294
        j++;
2123
294
    }
2124
2125
294
    PyBuffer_Release(&vsep);
2126
2127
294
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2128
0
        return Py_NewRef(self);
2129
0
    }
2130
294
    else
2131
294
        return PyBytes_FromStringAndSize(s+i, j-i);
2132
294
}
2133
2134
2135
Py_LOCAL_INLINE(PyObject *)
2136
do_strip(PyBytesObject *self, int striptype)
2137
0
{
2138
0
    const char *s = PyBytes_AS_STRING(self);
2139
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
2140
2141
0
    i = 0;
2142
0
    if (striptype != RIGHTSTRIP) {
2143
0
        while (i < len && Py_ISSPACE(s[i])) {
2144
0
            i++;
2145
0
        }
2146
0
    }
2147
2148
0
    j = len;
2149
0
    if (striptype != LEFTSTRIP) {
2150
0
        do {
2151
0
            j--;
2152
0
        } while (j >= i && Py_ISSPACE(s[j]));
2153
0
        j++;
2154
0
    }
2155
2156
0
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2157
0
        return Py_NewRef(self);
2158
0
    }
2159
0
    else
2160
0
        return PyBytes_FromStringAndSize(s+i, j-i);
2161
0
}
2162
2163
2164
Py_LOCAL_INLINE(PyObject *)
2165
do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
2166
294
{
2167
294
    if (bytes != Py_None) {
2168
294
        return do_xstrip(self, striptype, bytes);
2169
294
    }
2170
0
    return do_strip(self, striptype);
2171
294
}
2172
2173
/*[clinic input]
2174
@permit_long_docstring_body
2175
bytes.strip
2176
2177
    bytes: object = None
2178
    /
2179
2180
Strip leading and trailing bytes contained in the argument.
2181
2182
If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2183
[clinic start generated code]*/
2184
2185
static PyObject *
2186
bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
2187
/*[clinic end generated code: output=c7c228d3bd104a1b input=71904cd278c0ee03]*/
2188
0
{
2189
0
    return do_argstrip(self, BOTHSTRIP, bytes);
2190
0
}
2191
2192
/*[clinic input]
2193
bytes.lstrip
2194
2195
    bytes: object = None
2196
    /
2197
2198
Strip leading bytes contained in the argument.
2199
2200
If the argument is omitted or None, strip leading  ASCII whitespace.
2201
[clinic start generated code]*/
2202
2203
static PyObject *
2204
bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2205
/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2206
0
{
2207
0
    return do_argstrip(self, LEFTSTRIP, bytes);
2208
0
}
2209
2210
/*[clinic input]
2211
bytes.rstrip
2212
2213
    bytes: object = None
2214
    /
2215
2216
Strip trailing bytes contained in the argument.
2217
2218
If the argument is omitted or None, strip trailing ASCII whitespace.
2219
[clinic start generated code]*/
2220
2221
static PyObject *
2222
bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2223
/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2224
294
{
2225
294
    return do_argstrip(self, RIGHTSTRIP, bytes);
2226
294
}
2227
2228
2229
/*[clinic input]
2230
@permit_long_summary
2231
bytes.count = bytes.find
2232
2233
Return the number of non-overlapping occurrences of subsection 'sub' in bytes B[start:end].
2234
[clinic start generated code]*/
2235
2236
static PyObject *
2237
bytes_count_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2238
                 Py_ssize_t end)
2239
/*[clinic end generated code: output=9848140b9be17d0f input=bb2f136f83f0d30e]*/
2240
6.52M
{
2241
6.52M
    return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2242
6.52M
                           sub, start, end);
2243
6.52M
}
2244
2245
2246
/*[clinic input]
2247
bytes.translate
2248
2249
    table: object
2250
        Translation table, which must be a bytes object of length 256.
2251
    /
2252
    delete as deletechars: object(c_default="NULL") = b''
2253
2254
Return a copy with each character mapped by the given translation table.
2255
2256
All characters occurring in the optional argument delete are removed.
2257
The remaining characters are mapped through the given translation table.
2258
[clinic start generated code]*/
2259
2260
static PyObject *
2261
bytes_translate_impl(PyBytesObject *self, PyObject *table,
2262
                     PyObject *deletechars)
2263
/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2264
0
{
2265
0
    const char *input;
2266
0
    char *output;
2267
0
    Py_buffer table_view = {NULL, NULL};
2268
0
    Py_buffer del_table_view = {NULL, NULL};
2269
0
    const char *table_chars;
2270
0
    Py_ssize_t i, c, changed = 0;
2271
0
    PyObject *input_obj = (PyObject*)self;
2272
0
    const char *output_start, *del_table_chars=NULL;
2273
0
    Py_ssize_t inlen, tablen, dellen = 0;
2274
0
    PyObject *result;
2275
0
    int trans_table[256];
2276
2277
0
    if (PyBytes_Check(table)) {
2278
0
        table_chars = PyBytes_AS_STRING(table);
2279
0
        tablen = PyBytes_GET_SIZE(table);
2280
0
    }
2281
0
    else if (table == Py_None) {
2282
0
        table_chars = NULL;
2283
0
        tablen = 256;
2284
0
    }
2285
0
    else {
2286
0
        if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2287
0
            return NULL;
2288
0
        table_chars = table_view.buf;
2289
0
        tablen = table_view.len;
2290
0
    }
2291
2292
0
    if (tablen != 256) {
2293
0
        PyErr_SetString(PyExc_ValueError,
2294
0
          "translation table must be 256 characters long");
2295
0
        PyBuffer_Release(&table_view);
2296
0
        return NULL;
2297
0
    }
2298
2299
0
    if (deletechars != NULL) {
2300
0
        if (PyBytes_Check(deletechars)) {
2301
0
            del_table_chars = PyBytes_AS_STRING(deletechars);
2302
0
            dellen = PyBytes_GET_SIZE(deletechars);
2303
0
        }
2304
0
        else {
2305
0
            if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2306
0
                PyBuffer_Release(&table_view);
2307
0
                return NULL;
2308
0
            }
2309
0
            del_table_chars = del_table_view.buf;
2310
0
            dellen = del_table_view.len;
2311
0
        }
2312
0
    }
2313
0
    else {
2314
0
        del_table_chars = NULL;
2315
0
        dellen = 0;
2316
0
    }
2317
2318
0
    inlen = PyBytes_GET_SIZE(input_obj);
2319
0
    result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2320
0
    if (result == NULL) {
2321
0
        PyBuffer_Release(&del_table_view);
2322
0
        PyBuffer_Release(&table_view);
2323
0
        return NULL;
2324
0
    }
2325
0
    output_start = output = PyBytes_AS_STRING(result);
2326
0
    input = PyBytes_AS_STRING(input_obj);
2327
2328
0
    if (dellen == 0 && table_chars != NULL) {
2329
        /* If no deletions are required, use faster code */
2330
0
        for (i = inlen; --i >= 0; ) {
2331
0
            c = Py_CHARMASK(*input++);
2332
0
            *output++ = table_chars[c];
2333
0
        }
2334
        /* Check if anything changed (for returning original object) */
2335
        /* We save this check until the end so that the compiler will */
2336
        /* unroll the loop above leading to MUCH faster code. */
2337
0
        if (PyBytes_CheckExact(input_obj)) {
2338
0
            if (memcmp(PyBytes_AS_STRING(input_obj), output_start, inlen) == 0) {
2339
0
                Py_SETREF(result, Py_NewRef(input_obj));
2340
0
            }
2341
0
        }
2342
0
        PyBuffer_Release(&del_table_view);
2343
0
        PyBuffer_Release(&table_view);
2344
0
        return result;
2345
0
    }
2346
2347
0
    if (table_chars == NULL) {
2348
0
        for (i = 0; i < 256; i++)
2349
0
            trans_table[i] = Py_CHARMASK(i);
2350
0
    } else {
2351
0
        for (i = 0; i < 256; i++)
2352
0
            trans_table[i] = Py_CHARMASK(table_chars[i]);
2353
0
    }
2354
0
    PyBuffer_Release(&table_view);
2355
2356
0
    for (i = 0; i < dellen; i++)
2357
0
        trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2358
0
    PyBuffer_Release(&del_table_view);
2359
2360
0
    for (i = inlen; --i >= 0; ) {
2361
0
        c = Py_CHARMASK(*input++);
2362
0
        if (trans_table[c] != -1)
2363
0
            if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2364
0
                continue;
2365
0
        changed = 1;
2366
0
    }
2367
0
    if (!changed && PyBytes_CheckExact(input_obj)) {
2368
0
        Py_DECREF(result);
2369
0
        return Py_NewRef(input_obj);
2370
0
    }
2371
    /* Fix the size of the resulting byte string */
2372
0
    if (inlen > 0)
2373
0
        _PyBytes_Resize(&result, output - output_start);
2374
0
    return result;
2375
0
}
2376
2377
2378
/*[clinic input]
2379
2380
@permit_long_summary
2381
@permit_long_docstring_body
2382
@staticmethod
2383
bytes.maketrans
2384
2385
    frm: Py_buffer
2386
    to: Py_buffer
2387
    /
2388
2389
Return a translation table usable for the bytes or bytearray translate method.
2390
2391
The returned table will be one where each byte in frm is mapped to the byte at
2392
the same position in to.
2393
2394
The bytes objects frm and to must be of the same length.
2395
[clinic start generated code]*/
2396
2397
static PyObject *
2398
bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2399
/*[clinic end generated code: output=a36f6399d4b77f6f input=a06b75f44d933fb3]*/
2400
9
{
2401
9
    return _Py_bytes_maketrans(frm, to);
2402
9
}
2403
2404
2405
/*[clinic input]
2406
bytes.replace
2407
2408
    old: Py_buffer
2409
    new: Py_buffer
2410
    /
2411
    count: Py_ssize_t = -1
2412
        Maximum number of occurrences to replace.
2413
        -1 (the default value) means replace all occurrences.
2414
2415
Return a copy with all occurrences of substring old replaced by new.
2416
2417
If count is given, only the first count occurrences are replaced.
2418
If count is not specified or -1, then all occurrences are replaced.
2419
[clinic start generated code]*/
2420
2421
static PyObject *
2422
bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2423
                   Py_ssize_t count)
2424
/*[clinic end generated code: output=994fa588b6b9c104 input=cdf3cf8639297745]*/
2425
32.5k
{
2426
32.5k
    return stringlib_replace((PyObject *)self,
2427
32.5k
                             (const char *)old->buf, old->len,
2428
32.5k
                             (const char *)new->buf, new->len, count);
2429
32.5k
}
2430
2431
/** End DALKE **/
2432
2433
/*[clinic input]
2434
bytes.removeprefix as bytes_removeprefix
2435
2436
    prefix: Py_buffer
2437
    /
2438
2439
Return a bytes object with the given prefix string removed if present.
2440
2441
If the bytes starts with the prefix string, return bytes[len(prefix):].
2442
Otherwise, return a copy of the original bytes.
2443
[clinic start generated code]*/
2444
2445
static PyObject *
2446
bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2447
/*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2448
0
{
2449
0
    const char *self_start = PyBytes_AS_STRING(self);
2450
0
    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2451
0
    const char *prefix_start = prefix->buf;
2452
0
    Py_ssize_t prefix_len = prefix->len;
2453
2454
0
    if (self_len >= prefix_len
2455
0
        && prefix_len > 0
2456
0
        && memcmp(self_start, prefix_start, prefix_len) == 0)
2457
0
    {
2458
0
        return PyBytes_FromStringAndSize(self_start + prefix_len,
2459
0
                                         self_len - prefix_len);
2460
0
    }
2461
2462
0
    if (PyBytes_CheckExact(self)) {
2463
0
        return Py_NewRef(self);
2464
0
    }
2465
2466
0
    return PyBytes_FromStringAndSize(self_start, self_len);
2467
0
}
2468
2469
/*[clinic input]
2470
bytes.removesuffix as bytes_removesuffix
2471
2472
    suffix: Py_buffer
2473
    /
2474
2475
Return a bytes object with the given suffix string removed if present.
2476
2477
If the bytes ends with the suffix string and that suffix is not empty,
2478
return bytes[:-len(prefix)].  Otherwise, return a copy of the original
2479
bytes.
2480
[clinic start generated code]*/
2481
2482
static PyObject *
2483
bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2484
/*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2485
0
{
2486
0
    const char *self_start = PyBytes_AS_STRING(self);
2487
0
    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2488
0
    const char *suffix_start = suffix->buf;
2489
0
    Py_ssize_t suffix_len = suffix->len;
2490
2491
0
    if (self_len >= suffix_len
2492
0
        && suffix_len > 0
2493
0
        && memcmp(self_start + self_len - suffix_len,
2494
0
                  suffix_start, suffix_len) == 0)
2495
0
    {
2496
0
        return PyBytes_FromStringAndSize(self_start,
2497
0
                                         self_len - suffix_len);
2498
0
    }
2499
2500
0
    if (PyBytes_CheckExact(self)) {
2501
0
        return Py_NewRef(self);
2502
0
    }
2503
2504
0
    return PyBytes_FromStringAndSize(self_start, self_len);
2505
0
}
2506
2507
/*[clinic input]
2508
@permit_long_summary
2509
@text_signature "($self, prefix[, start[, end]], /)"
2510
bytes.startswith
2511
2512
    prefix as subobj: object
2513
        A bytes or a tuple of bytes to try.
2514
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2515
        Optional start position. Default: start of the bytes.
2516
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2517
        Optional stop position. Default: end of the bytes.
2518
    /
2519
2520
Return True if the bytes starts with the specified prefix, False otherwise.
2521
[clinic start generated code]*/
2522
2523
static PyObject *
2524
bytes_startswith_impl(PyBytesObject *self, PyObject *subobj,
2525
                      Py_ssize_t start, Py_ssize_t end)
2526
/*[clinic end generated code: output=b1e8da1cbd528e8c input=a14efd070f15be80]*/
2527
43.6k
{
2528
43.6k
    return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2529
43.6k
                                subobj, start, end);
2530
43.6k
}
2531
2532
/*[clinic input]
2533
@permit_long_summary
2534
@text_signature "($self, suffix[, start[, end]], /)"
2535
bytes.endswith
2536
2537
    suffix as subobj: object
2538
        A bytes or a tuple of bytes to try.
2539
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2540
         Optional start position. Default: start of the bytes.
2541
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2542
         Optional stop position. Default: end of the bytes.
2543
    /
2544
2545
Return True if the bytes ends with the specified suffix, False otherwise.
2546
[clinic start generated code]*/
2547
2548
static PyObject *
2549
bytes_endswith_impl(PyBytesObject *self, PyObject *subobj, Py_ssize_t start,
2550
                    Py_ssize_t end)
2551
/*[clinic end generated code: output=038b633111f3629d input=49e383eaaf292713]*/
2552
315
{
2553
315
    return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2554
315
                              subobj, start, end);
2555
315
}
2556
2557
2558
/*[clinic input]
2559
bytes.decode
2560
2561
    encoding: str(c_default="NULL") = 'utf-8'
2562
        The encoding with which to decode the bytes.
2563
    errors: str(c_default="NULL") = 'strict'
2564
        The error handling scheme to use for the handling of decoding errors.
2565
        The default is 'strict' meaning that decoding errors raise a
2566
        UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2567
        as well as any other name registered with codecs.register_error that
2568
        can handle UnicodeDecodeErrors.
2569
2570
Decode the bytes using the codec registered for encoding.
2571
[clinic start generated code]*/
2572
2573
static PyObject *
2574
bytes_decode_impl(PyBytesObject *self, const char *encoding,
2575
                  const char *errors)
2576
/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2577
22.4M
{
2578
22.4M
    return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2579
22.4M
}
2580
2581
2582
/*[clinic input]
2583
@permit_long_docstring_body
2584
bytes.splitlines
2585
2586
    keepends: bool = False
2587
2588
Return a list of the lines in the bytes, breaking at line boundaries.
2589
2590
Line breaks are not included in the resulting list unless keepends is given and
2591
true.
2592
[clinic start generated code]*/
2593
2594
static PyObject *
2595
bytes_splitlines_impl(PyBytesObject *self, int keepends)
2596
/*[clinic end generated code: output=3484149a5d880ffb input=d17968d2a355fe55]*/
2597
0
{
2598
0
    return stringlib_splitlines(
2599
0
        (PyObject*) self, PyBytes_AS_STRING(self),
2600
0
        PyBytes_GET_SIZE(self), keepends
2601
0
        );
2602
0
}
2603
2604
/*[clinic input]
2605
@classmethod
2606
bytes.fromhex
2607
2608
    string: object
2609
    /
2610
2611
Create a bytes object from a string of hexadecimal numbers.
2612
2613
Spaces between two numbers are accepted.
2614
Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2615
[clinic start generated code]*/
2616
2617
static PyObject *
2618
bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2619
/*[clinic end generated code: output=0973acc63661bb2e input=f37d98ed51088a21]*/
2620
31.8k
{
2621
31.8k
    PyObject *result = _PyBytes_FromHex(string, 0);
2622
31.8k
    if (type != &PyBytes_Type && result != NULL) {
2623
0
        Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
2624
0
    }
2625
31.8k
    return result;
2626
31.8k
}
2627
2628
PyObject*
2629
_PyBytes_FromHex(PyObject *string, int use_bytearray)
2630
31.8k
{
2631
31.8k
    Py_ssize_t hexlen, invalid_char;
2632
31.8k
    unsigned int top, bot;
2633
31.8k
    const Py_UCS1 *str, *start, *end;
2634
31.8k
    PyBytesWriter *writer = NULL;
2635
31.8k
    Py_buffer view;
2636
31.8k
    view.obj = NULL;
2637
2638
31.8k
    if (PyUnicode_Check(string)) {
2639
31.8k
        hexlen = PyUnicode_GET_LENGTH(string);
2640
2641
31.8k
        if (!PyUnicode_IS_ASCII(string)) {
2642
0
            const void *data = PyUnicode_DATA(string);
2643
0
            int kind = PyUnicode_KIND(string);
2644
0
            Py_ssize_t i;
2645
2646
            /* search for the first non-ASCII character */
2647
0
            for (i = 0; i < hexlen; i++) {
2648
0
                if (PyUnicode_READ(kind, data, i) >= 128)
2649
0
                    break;
2650
0
            }
2651
0
            invalid_char = i;
2652
0
            goto error;
2653
0
        }
2654
2655
31.8k
        assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2656
31.8k
        str = PyUnicode_1BYTE_DATA(string);
2657
31.8k
    }
2658
0
    else if (PyObject_CheckBuffer(string)) {
2659
0
        if (PyObject_GetBuffer(string, &view, PyBUF_SIMPLE) != 0) {
2660
0
            return NULL;
2661
0
        }
2662
0
        hexlen = view.len;
2663
0
        str = view.buf;
2664
0
    }
2665
0
    else {
2666
0
        PyErr_Format(PyExc_TypeError,
2667
0
                     "fromhex() argument must be str or bytes-like, not %T",
2668
0
                     string);
2669
0
        return NULL;
2670
0
    }
2671
2672
    /* This overestimates if there are spaces */
2673
31.8k
    if (use_bytearray) {
2674
0
        writer = _PyBytesWriter_CreateByteArray(hexlen / 2);
2675
0
    }
2676
31.8k
    else {
2677
31.8k
        writer = PyBytesWriter_Create(hexlen / 2);
2678
31.8k
    }
2679
31.8k
    if (writer == NULL) {
2680
0
        goto release_buffer;
2681
0
    }
2682
31.8k
    char *buf = PyBytesWriter_GetData(writer);
2683
2684
31.8k
    start = str;
2685
31.8k
    end = str + hexlen;
2686
63.7k
    while (str < end) {
2687
        /* skip over spaces in the input */
2688
31.8k
        if (Py_ISSPACE(*str)) {
2689
0
            do {
2690
0
                str++;
2691
0
            } while (Py_ISSPACE(*str));
2692
0
            if (str >= end)
2693
0
                break;
2694
0
        }
2695
2696
31.8k
        top = _PyLong_DigitValue[*str];
2697
31.8k
        if (top >= 16) {
2698
0
            invalid_char = str - start;
2699
0
            goto error;
2700
0
        }
2701
31.8k
        str++;
2702
2703
31.8k
        bot = _PyLong_DigitValue[*str];
2704
31.8k
        if (bot >= 16) {
2705
            /* Check if we had a second digit */
2706
0
            if (str >= end){
2707
0
                invalid_char = -1;
2708
0
            } else {
2709
0
                invalid_char = str - start;
2710
0
            }
2711
0
            goto error;
2712
0
        }
2713
31.8k
        str++;
2714
2715
31.8k
        *buf++ = (unsigned char)((top << 4) + bot);
2716
31.8k
    }
2717
2718
31.8k
    if (view.obj != NULL) {
2719
0
       PyBuffer_Release(&view);
2720
0
    }
2721
31.8k
    return PyBytesWriter_FinishWithPointer(writer, buf);
2722
2723
0
  error:
2724
0
    if (invalid_char == -1) {
2725
0
        PyErr_SetString(PyExc_ValueError,
2726
0
                        "fromhex() arg must contain an even number of hexadecimal digits");
2727
0
    } else {
2728
0
        PyErr_Format(PyExc_ValueError,
2729
0
                     "non-hexadecimal number found in "
2730
0
                     "fromhex() arg at position %zd", invalid_char);
2731
0
    }
2732
0
    PyBytesWriter_Discard(writer);
2733
2734
0
  release_buffer:
2735
0
    if (view.obj != NULL) {
2736
0
        PyBuffer_Release(&view);
2737
0
    }
2738
0
    return NULL;
2739
0
}
2740
2741
/*[clinic input]
2742
bytes.hex
2743
2744
    sep: object = NULL
2745
        An optional single character or byte to separate hex bytes.
2746
    bytes_per_sep: Py_ssize_t = 1
2747
        How many bytes between separators.  Positive values count from the
2748
        right, negative values count from the left.
2749
2750
Create a string of hexadecimal numbers from a bytes object.
2751
2752
Example:
2753
>>> value = b'\xb9\x01\xef'
2754
>>> value.hex()
2755
'b901ef'
2756
>>> value.hex(':')
2757
'b9:01:ef'
2758
>>> value.hex(':', 2)
2759
'b9:01ef'
2760
>>> value.hex(':', -2)
2761
'b901:ef'
2762
[clinic start generated code]*/
2763
2764
static PyObject *
2765
bytes_hex_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t bytes_per_sep)
2766
/*[clinic end generated code: output=588821f02cb9d8f5 input=bd8eceb755d8230f]*/
2767
0
{
2768
0
    const char *argbuf = PyBytes_AS_STRING(self);
2769
0
    Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2770
0
    return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2771
0
}
2772
2773
static PyObject *
2774
bytes_getnewargs(PyObject *op, PyObject *Py_UNUSED(dummy))
2775
0
{
2776
0
    PyBytesObject *v = _PyBytes_CAST(op);
2777
0
    return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2778
0
}
2779
2780
2781
static PyMethodDef
2782
bytes_methods[] = {
2783
    {"__getnewargs__", bytes_getnewargs,  METH_NOARGS},
2784
    BYTES___BYTES___METHODDEF
2785
    {"capitalize", stringlib_capitalize, METH_NOARGS,
2786
     _Py_capitalize__doc__},
2787
    STRINGLIB_CENTER_METHODDEF
2788
    BYTES_COUNT_METHODDEF
2789
    BYTES_DECODE_METHODDEF
2790
    BYTES_ENDSWITH_METHODDEF
2791
    STRINGLIB_EXPANDTABS_METHODDEF
2792
    BYTES_FIND_METHODDEF
2793
    BYTES_FROMHEX_METHODDEF
2794
    BYTES_HEX_METHODDEF
2795
    BYTES_INDEX_METHODDEF
2796
    {"isalnum", stringlib_isalnum, METH_NOARGS,
2797
     _Py_isalnum__doc__},
2798
    {"isalpha", stringlib_isalpha, METH_NOARGS,
2799
     _Py_isalpha__doc__},
2800
    {"isascii", stringlib_isascii, METH_NOARGS,
2801
     _Py_isascii__doc__},
2802
    {"isdigit", stringlib_isdigit, METH_NOARGS,
2803
     _Py_isdigit__doc__},
2804
    {"islower", stringlib_islower, METH_NOARGS,
2805
     _Py_islower__doc__},
2806
    {"isspace", stringlib_isspace, METH_NOARGS,
2807
     _Py_isspace__doc__},
2808
    {"istitle", stringlib_istitle, METH_NOARGS,
2809
     _Py_istitle__doc__},
2810
    {"isupper", stringlib_isupper, METH_NOARGS,
2811
     _Py_isupper__doc__},
2812
    BYTES_JOIN_METHODDEF
2813
    STRINGLIB_LJUST_METHODDEF
2814
    {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2815
    BYTES_LSTRIP_METHODDEF
2816
    BYTES_MAKETRANS_METHODDEF
2817
    BYTES_PARTITION_METHODDEF
2818
    BYTES_REPLACE_METHODDEF
2819
    BYTES_REMOVEPREFIX_METHODDEF
2820
    BYTES_REMOVESUFFIX_METHODDEF
2821
    BYTES_RFIND_METHODDEF
2822
    BYTES_RINDEX_METHODDEF
2823
    STRINGLIB_RJUST_METHODDEF
2824
    BYTES_RPARTITION_METHODDEF
2825
    BYTES_RSPLIT_METHODDEF
2826
    BYTES_RSTRIP_METHODDEF
2827
    BYTES_SPLIT_METHODDEF
2828
    BYTES_SPLITLINES_METHODDEF
2829
    BYTES_STARTSWITH_METHODDEF
2830
    BYTES_STRIP_METHODDEF
2831
    {"swapcase", stringlib_swapcase, METH_NOARGS,
2832
     _Py_swapcase__doc__},
2833
    {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2834
    BYTES_TRANSLATE_METHODDEF
2835
    {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2836
    STRINGLIB_ZFILL_METHODDEF
2837
    {NULL,     NULL}                         /* sentinel */
2838
};
2839
2840
static PyObject *
2841
bytes_mod(PyObject *self, PyObject *arg)
2842
0
{
2843
0
    if (!PyBytes_Check(self)) {
2844
0
        Py_RETURN_NOTIMPLEMENTED;
2845
0
    }
2846
0
    return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2847
0
                             arg, 0);
2848
0
}
2849
2850
static PyNumberMethods bytes_as_number = {
2851
    0,              /*nb_add*/
2852
    0,              /*nb_subtract*/
2853
    0,              /*nb_multiply*/
2854
    bytes_mod,      /*nb_remainder*/
2855
};
2856
2857
static PyObject *
2858
bytes_subtype_new(PyTypeObject *, PyObject *);
2859
2860
/*[clinic input]
2861
@classmethod
2862
bytes.__new__ as bytes_new
2863
2864
    source as x: object = NULL
2865
    encoding: str = NULL
2866
    errors: str = NULL
2867
2868
[clinic start generated code]*/
2869
2870
static PyObject *
2871
bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
2872
               const char *errors)
2873
/*[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]*/
2874
15.5M
{
2875
15.5M
    PyObject *bytes;
2876
15.5M
    PyObject *func;
2877
15.5M
    Py_ssize_t size;
2878
2879
15.5M
    if (x == NULL) {
2880
0
        if (encoding != NULL || errors != NULL) {
2881
0
            PyErr_SetString(PyExc_TypeError,
2882
0
                            encoding != NULL ?
2883
0
                            "encoding without a string argument" :
2884
0
                            "errors without a string argument");
2885
0
            return NULL;
2886
0
        }
2887
0
        bytes = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
2888
0
    }
2889
15.5M
    else if (encoding != NULL) {
2890
        /* Encode via the codec registry */
2891
364k
        if (!PyUnicode_Check(x)) {
2892
0
            PyErr_SetString(PyExc_TypeError,
2893
0
                            "encoding without a string argument");
2894
0
            return NULL;
2895
0
        }
2896
364k
        bytes = PyUnicode_AsEncodedString(x, encoding, errors);
2897
364k
    }
2898
15.2M
    else if (errors != NULL) {
2899
0
        PyErr_SetString(PyExc_TypeError,
2900
0
                        PyUnicode_Check(x) ?
2901
0
                        "string argument without an encoding" :
2902
0
                        "errors without a string argument");
2903
0
        return NULL;
2904
0
    }
2905
    /* We'd like to call PyObject_Bytes here, but we need to check for an
2906
       integer argument before deferring to PyBytes_FromObject, something
2907
       PyObject_Bytes doesn't do. */
2908
15.2M
    else if ((func = _PyObject_LookupSpecial(x, &_Py_ID(__bytes__))) != NULL) {
2909
51.6k
        bytes = _PyObject_CallNoArgs(func);
2910
51.6k
        Py_DECREF(func);
2911
51.6k
        if (bytes == NULL)
2912
0
            return NULL;
2913
51.6k
        if (!PyBytes_Check(bytes)) {
2914
0
            PyErr_Format(PyExc_TypeError,
2915
0
                         "%T.__bytes__() must return a bytes, not %T",
2916
0
                         x, bytes);
2917
0
            Py_DECREF(bytes);
2918
0
            return NULL;
2919
0
        }
2920
51.6k
    }
2921
15.1M
    else if (PyErr_Occurred())
2922
0
        return NULL;
2923
15.1M
    else if (PyUnicode_Check(x)) {
2924
0
        PyErr_SetString(PyExc_TypeError,
2925
0
                        "string argument without an encoding");
2926
0
        return NULL;
2927
0
    }
2928
    /* Is it an integer? */
2929
15.1M
    else if (_PyIndex_Check(x)) {
2930
0
        size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2931
0
        if (size == -1 && PyErr_Occurred()) {
2932
0
            if (!PyErr_ExceptionMatches(PyExc_TypeError))
2933
0
                return NULL;
2934
0
            PyErr_Clear();  /* fall through */
2935
0
            bytes = PyBytes_FromObject(x);
2936
0
        }
2937
0
        else {
2938
0
            if (size < 0) {
2939
0
                PyErr_SetString(PyExc_ValueError, "negative count");
2940
0
                return NULL;
2941
0
            }
2942
0
            bytes = _PyBytes_FromSize(size, 1);
2943
0
        }
2944
0
    }
2945
15.1M
    else {
2946
15.1M
        bytes = PyBytes_FromObject(x);
2947
15.1M
    }
2948
2949
15.5M
    if (bytes != NULL && type != &PyBytes_Type) {
2950
0
        Py_SETREF(bytes, bytes_subtype_new(type, bytes));
2951
0
    }
2952
2953
15.5M
    return bytes;
2954
15.5M
}
2955
2956
static PyObject*
2957
_PyBytes_FromBuffer(PyObject *x)
2958
15.1M
{
2959
15.1M
    Py_buffer view;
2960
15.1M
    if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2961
0
        return NULL;
2962
2963
15.1M
    PyBytesWriter *writer = PyBytesWriter_Create(view.len);
2964
15.1M
    if (writer == NULL) {
2965
0
        goto fail;
2966
0
    }
2967
2968
15.1M
    if (PyBuffer_ToContiguous(PyBytesWriter_GetData(writer),
2969
15.1M
                              &view, view.len, 'C') < 0) {
2970
0
        goto fail;
2971
0
    }
2972
2973
15.1M
    PyBuffer_Release(&view);
2974
15.1M
    return PyBytesWriter_Finish(writer);
2975
2976
0
fail:
2977
0
    PyBytesWriter_Discard(writer);
2978
0
    PyBuffer_Release(&view);
2979
0
    return NULL;
2980
15.1M
}
2981
2982
static PyObject*
2983
_PyBytes_FromList(PyObject *x)
2984
11.8k
{
2985
11.8k
    Py_ssize_t size = PyList_GET_SIZE(x);
2986
11.8k
    PyBytesWriter *writer = PyBytesWriter_Create(size);
2987
11.8k
    if (writer == NULL) {
2988
0
        return NULL;
2989
0
    }
2990
11.8k
    char *str = PyBytesWriter_GetData(writer);
2991
11.8k
    size = _PyBytesWriter_GetAllocated(writer);
2992
2993
692k
    for (Py_ssize_t i = 0; i < PyList_GET_SIZE(x); i++) {
2994
680k
        PyObject *item = PyList_GET_ITEM(x, i);
2995
680k
        Py_INCREF(item);
2996
680k
        Py_ssize_t value = PyNumber_AsSsize_t(item, NULL);
2997
680k
        Py_DECREF(item);
2998
680k
        if (value == -1 && PyErr_Occurred())
2999
0
            goto error;
3000
3001
680k
        if (value < 0 || value >= 256) {
3002
0
            PyErr_SetString(PyExc_ValueError,
3003
0
                            "bytes must be in range(0, 256)");
3004
0
            goto error;
3005
0
        }
3006
3007
680k
        if (i >= size) {
3008
0
            str = _PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str);
3009
0
            if (str == NULL) {
3010
0
                goto error;
3011
0
            }
3012
0
            size = _PyBytesWriter_GetAllocated(writer);
3013
0
        }
3014
680k
        *str++ = (char) value;
3015
680k
    }
3016
11.8k
    return PyBytesWriter_FinishWithPointer(writer, str);
3017
3018
0
error:
3019
0
    PyBytesWriter_Discard(writer);
3020
0
    return NULL;
3021
11.8k
}
3022
3023
static PyObject*
3024
_PyBytes_FromTuple(PyObject *x)
3025
0
{
3026
0
    Py_ssize_t i, size = PyTuple_GET_SIZE(x);
3027
0
    Py_ssize_t value;
3028
0
    PyObject *item;
3029
3030
0
    PyBytesWriter *writer = PyBytesWriter_Create(size);
3031
0
    if (writer == NULL) {
3032
0
        return NULL;
3033
0
    }
3034
0
    char *str = PyBytesWriter_GetData(writer);
3035
3036
0
    for (i = 0; i < size; i++) {
3037
0
        item = PyTuple_GET_ITEM(x, i);
3038
0
        value = PyNumber_AsSsize_t(item, NULL);
3039
0
        if (value == -1 && PyErr_Occurred())
3040
0
            goto error;
3041
3042
0
        if (value < 0 || value >= 256) {
3043
0
            PyErr_SetString(PyExc_ValueError,
3044
0
                            "bytes must be in range(0, 256)");
3045
0
            goto error;
3046
0
        }
3047
0
        *str++ = (char) value;
3048
0
    }
3049
0
    return PyBytesWriter_Finish(writer);
3050
3051
0
  error:
3052
0
    PyBytesWriter_Discard(writer);
3053
0
    return NULL;
3054
0
}
3055
3056
static PyObject *
3057
_PyBytes_FromIterator(PyObject *it, PyObject *x)
3058
184
{
3059
184
    Py_ssize_t i, size;
3060
3061
    /* For iterator version, create a bytes object and resize as needed */
3062
184
    size = PyObject_LengthHint(x, 64);
3063
184
    if (size == -1 && PyErr_Occurred())
3064
0
        return NULL;
3065
3066
184
    PyBytesWriter *writer = PyBytesWriter_Create(size);
3067
184
    if (writer == NULL) {
3068
0
        return NULL;
3069
0
    }
3070
184
    char *str = PyBytesWriter_GetData(writer);
3071
184
    size = _PyBytesWriter_GetAllocated(writer);
3072
3073
    /* Run the iterator to exhaustion */
3074
1.41k
    for (i = 0; ; i++) {
3075
1.41k
        PyObject *item;
3076
1.41k
        Py_ssize_t value;
3077
3078
        /* Get the next item */
3079
1.41k
        item = PyIter_Next(it);
3080
1.41k
        if (item == NULL) {
3081
184
            if (PyErr_Occurred())
3082
0
                goto error;
3083
184
            break;
3084
184
        }
3085
3086
        /* Interpret it as an int (__index__) */
3087
1.23k
        value = PyNumber_AsSsize_t(item, NULL);
3088
1.23k
        Py_DECREF(item);
3089
1.23k
        if (value == -1 && PyErr_Occurred())
3090
0
            goto error;
3091
3092
        /* Range check */
3093
1.23k
        if (value < 0 || value >= 256) {
3094
0
            PyErr_SetString(PyExc_ValueError,
3095
0
                            "bytes must be in range(0, 256)");
3096
0
            goto error;
3097
0
        }
3098
3099
        /* Append the byte */
3100
1.23k
        if (i >= size) {
3101
0
            str = _PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str);
3102
0
            if (str == NULL) {
3103
0
                goto error;
3104
0
            }
3105
0
            size = _PyBytesWriter_GetAllocated(writer);
3106
0
        }
3107
1.23k
        *str++ = (char) value;
3108
1.23k
    }
3109
184
    return PyBytesWriter_FinishWithPointer(writer, str);
3110
3111
0
  error:
3112
0
    PyBytesWriter_Discard(writer);
3113
0
    return NULL;
3114
184
}
3115
3116
PyObject *
3117
PyBytes_FromObject(PyObject *x)
3118
15.1M
{
3119
15.1M
    PyObject *it, *result;
3120
3121
15.1M
    if (x == NULL) {
3122
0
        PyErr_BadInternalCall();
3123
0
        return NULL;
3124
0
    }
3125
3126
15.1M
    if (PyBytes_CheckExact(x)) {
3127
0
        return Py_NewRef(x);
3128
0
    }
3129
3130
    /* Use the modern buffer interface */
3131
15.1M
    if (PyObject_CheckBuffer(x))
3132
15.1M
        return _PyBytes_FromBuffer(x);
3133
3134
12.0k
    if (PyList_CheckExact(x))
3135
11.8k
        return _PyBytes_FromList(x);
3136
3137
184
    if (PyTuple_CheckExact(x))
3138
0
        return _PyBytes_FromTuple(x);
3139
3140
184
    if (!PyUnicode_Check(x)) {
3141
184
        it = PyObject_GetIter(x);
3142
184
        if (it != NULL) {
3143
184
            result = _PyBytes_FromIterator(it, x);
3144
184
            Py_DECREF(it);
3145
184
            return result;
3146
184
        }
3147
0
        if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
3148
0
            return NULL;
3149
0
        }
3150
0
    }
3151
3152
0
    PyErr_Format(PyExc_TypeError,
3153
0
                 "cannot convert '%.200s' object to bytes",
3154
0
                 Py_TYPE(x)->tp_name);
3155
0
    return NULL;
3156
184
}
3157
3158
/* This allocator is needed for subclasses don't want to use __new__.
3159
 * See https://github.com/python/cpython/issues/91020#issuecomment-1096793239
3160
 *
3161
 * This allocator will be removed when ob_shash is removed.
3162
 */
3163
static PyObject *
3164
bytes_alloc(PyTypeObject *self, Py_ssize_t nitems)
3165
0
{
3166
0
    PyBytesObject *obj = (PyBytesObject*)PyType_GenericAlloc(self, nitems);
3167
0
    if (obj == NULL) {
3168
0
        return NULL;
3169
0
    }
3170
0
    set_ob_shash(obj, -1);
3171
0
    return (PyObject*)obj;
3172
0
}
3173
3174
static PyObject *
3175
bytes_subtype_new(PyTypeObject *type, PyObject *tmp)
3176
0
{
3177
0
    PyObject *pnew;
3178
0
    Py_ssize_t n;
3179
3180
0
    assert(PyType_IsSubtype(type, &PyBytes_Type));
3181
0
    assert(PyBytes_Check(tmp));
3182
0
    n = PyBytes_GET_SIZE(tmp);
3183
0
    pnew = type->tp_alloc(type, n);
3184
0
    if (pnew != NULL) {
3185
0
        memcpy(PyBytes_AS_STRING(pnew),
3186
0
                  PyBytes_AS_STRING(tmp), n+1);
3187
0
        set_ob_shash((PyBytesObject *)pnew,
3188
0
            get_ob_shash((PyBytesObject *)tmp));
3189
0
    }
3190
0
    return pnew;
3191
0
}
3192
3193
PyDoc_STRVAR(bytes_doc,
3194
"bytes(iterable_of_ints) -> bytes\n\
3195
bytes(string, encoding[, errors]) -> bytes\n\
3196
bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
3197
bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3198
bytes() -> empty bytes object\n\
3199
\n\
3200
Construct an immutable array of bytes from:\n\
3201
  - an iterable yielding integers in range(256)\n\
3202
  - a text string encoded using the specified encoding\n\
3203
  - any object implementing the buffer API.\n\
3204
  - an integer");
3205
3206
static PyObject *bytes_iter(PyObject *seq);
3207
3208
PyTypeObject PyBytes_Type = {
3209
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3210
    "bytes",
3211
    PyBytesObject_SIZE,
3212
    sizeof(char),
3213
    0,                                          /* tp_dealloc */
3214
    0,                                          /* tp_vectorcall_offset */
3215
    0,                                          /* tp_getattr */
3216
    0,                                          /* tp_setattr */
3217
    0,                                          /* tp_as_async */
3218
    bytes_repr,                                 /* tp_repr */
3219
    &bytes_as_number,                           /* tp_as_number */
3220
    &bytes_as_sequence,                         /* tp_as_sequence */
3221
    &bytes_as_mapping,                          /* tp_as_mapping */
3222
    bytes_hash,                                 /* tp_hash */
3223
    0,                                          /* tp_call */
3224
    bytes_str,                                  /* tp_str */
3225
    PyObject_GenericGetAttr,                    /* tp_getattro */
3226
    0,                                          /* tp_setattro */
3227
    &bytes_as_buffer,                           /* tp_as_buffer */
3228
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3229
        Py_TPFLAGS_BYTES_SUBCLASS |
3230
        _Py_TPFLAGS_MATCH_SELF,               /* tp_flags */
3231
    bytes_doc,                                  /* tp_doc */
3232
    0,                                          /* tp_traverse */
3233
    0,                                          /* tp_clear */
3234
    bytes_richcompare,                          /* tp_richcompare */
3235
    0,                                          /* tp_weaklistoffset */
3236
    bytes_iter,                                 /* tp_iter */
3237
    0,                                          /* tp_iternext */
3238
    bytes_methods,                              /* tp_methods */
3239
    0,                                          /* tp_members */
3240
    0,                                          /* tp_getset */
3241
    0,                                          /* tp_base */
3242
    0,                                          /* tp_dict */
3243
    0,                                          /* tp_descr_get */
3244
    0,                                          /* tp_descr_set */
3245
    0,                                          /* tp_dictoffset */
3246
    0,                                          /* tp_init */
3247
    bytes_alloc,                                /* tp_alloc */
3248
    bytes_new,                                  /* tp_new */
3249
    PyObject_Free,                              /* tp_free */
3250
    .tp_version_tag = _Py_TYPE_VERSION_BYTES,
3251
};
3252
3253
void
3254
PyBytes_Concat(PyObject **pv, PyObject *w)
3255
0
{
3256
0
    assert(pv != NULL);
3257
0
    if (*pv == NULL)
3258
0
        return;
3259
0
    if (w == NULL) {
3260
0
        Py_CLEAR(*pv);
3261
0
        return;
3262
0
    }
3263
3264
0
    if (_PyObject_IsUniquelyReferenced(*pv) && PyBytes_CheckExact(*pv)) {
3265
        /* Only one reference, so we can resize in place */
3266
0
        Py_ssize_t oldsize;
3267
0
        Py_buffer wb;
3268
3269
0
        if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
3270
0
            PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3271
0
                         Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3272
0
            Py_CLEAR(*pv);
3273
0
            return;
3274
0
        }
3275
3276
0
        oldsize = PyBytes_GET_SIZE(*pv);
3277
0
        if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3278
0
            PyErr_NoMemory();
3279
0
            goto error;
3280
0
        }
3281
0
        if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3282
0
            goto error;
3283
3284
0
        memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3285
0
        PyBuffer_Release(&wb);
3286
0
        return;
3287
3288
0
      error:
3289
0
        PyBuffer_Release(&wb);
3290
0
        Py_CLEAR(*pv);
3291
0
        return;
3292
0
    }
3293
3294
0
    else {
3295
        /* Multiple references, need to create new object */
3296
0
        PyObject *v;
3297
0
        v = bytes_concat(*pv, w);
3298
0
        Py_SETREF(*pv, v);
3299
0
    }
3300
0
}
3301
3302
void
3303
PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
3304
0
{
3305
0
    PyBytes_Concat(pv, w);
3306
0
    Py_XDECREF(w);
3307
0
}
3308
3309
3310
/* The following function breaks the notion that bytes are immutable:
3311
   it changes the size of a bytes object.  You can think of it
3312
   as creating a new bytes object and destroying the old one, only
3313
   more efficiently.
3314
   Note that if there's not enough memory to resize the bytes object, the
3315
   original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
3316
   memory" exception is set, and -1 is returned.  Else (on success) 0 is
3317
   returned, and the value in *pv may or may not be the same as on input.
3318
   As always, an extra byte is allocated for a trailing \0 byte (newsize
3319
   does *not* include that), and a trailing \0 byte is stored.
3320
*/
3321
3322
int
3323
_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3324
18.6M
{
3325
18.6M
    PyObject *v;
3326
18.6M
    PyBytesObject *sv;
3327
18.6M
    v = *pv;
3328
18.6M
    if (!PyBytes_Check(v) || newsize < 0) {
3329
0
        *pv = 0;
3330
0
        Py_DECREF(v);
3331
0
        PyErr_BadInternalCall();
3332
0
        return -1;
3333
0
    }
3334
18.6M
    Py_ssize_t oldsize = PyBytes_GET_SIZE(v);
3335
18.6M
    if (oldsize == newsize) {
3336
        /* return early if newsize equals to v->ob_size */
3337
241k
        return 0;
3338
241k
    }
3339
18.4M
    if (oldsize == 0) {
3340
16.1M
        *pv = _PyBytes_FromSize(newsize, 0);
3341
16.1M
        Py_DECREF(v);
3342
16.1M
        return (*pv == NULL) ? -1 : 0;
3343
16.1M
    }
3344
2.26M
    if (newsize == 0) {
3345
8.34k
        *pv = bytes_get_empty();
3346
8.34k
        Py_DECREF(v);
3347
8.34k
        return 0;
3348
8.34k
    }
3349
2.25M
    if (!_PyObject_IsUniquelyReferenced(v)) {
3350
0
        if (oldsize < newsize) {
3351
0
            *pv = _PyBytes_FromSize(newsize, 0);
3352
0
            if (*pv) {
3353
0
                memcpy(PyBytes_AS_STRING(*pv), PyBytes_AS_STRING(v), oldsize);
3354
0
            }
3355
0
        }
3356
0
        else {
3357
0
            *pv = PyBytes_FromStringAndSize(PyBytes_AS_STRING(v), newsize);
3358
0
        }
3359
0
        Py_DECREF(v);
3360
0
        return (*pv == NULL) ? -1 : 0;
3361
0
    }
3362
3363
#ifdef Py_TRACE_REFS
3364
    _Py_ForgetReference(v);
3365
#endif
3366
2.25M
    _PyReftracerTrack(v, PyRefTracer_DESTROY);
3367
2.25M
    *pv = (PyObject *)
3368
2.25M
        PyObject_Realloc(v, PyBytesObject_SIZE + newsize);
3369
2.25M
    if (*pv == NULL) {
3370
#ifdef Py_REF_DEBUG
3371
        _Py_DecRefTotal(_PyThreadState_GET());
3372
#endif
3373
0
        PyObject_Free(v);
3374
0
        PyErr_NoMemory();
3375
0
        return -1;
3376
0
    }
3377
2.25M
    _Py_NewReferenceNoTotal(*pv);
3378
2.25M
    sv = (PyBytesObject *) *pv;
3379
2.25M
    Py_SET_SIZE(sv, newsize);
3380
2.25M
    sv->ob_sval[newsize] = '\0';
3381
2.25M
    set_ob_shash(sv, -1);          /* invalidate cached hash value */
3382
2.25M
    return 0;
3383
2.25M
}
3384
3385
3386
/*********************** Bytes Iterator ****************************/
3387
3388
typedef struct {
3389
    PyObject_HEAD
3390
    Py_ssize_t it_index;
3391
    PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3392
} striterobject;
3393
3394
2.72k
#define _striterobject_CAST(op)  ((striterobject *)(op))
3395
3396
static void
3397
striter_dealloc(PyObject *op)
3398
98
{
3399
98
    striterobject *it = _striterobject_CAST(op);
3400
98
    _PyObject_GC_UNTRACK(it);
3401
98
    Py_XDECREF(it->it_seq);
3402
98
    PyObject_GC_Del(it);
3403
98
}
3404
3405
static int
3406
striter_traverse(PyObject *op, visitproc visit, void *arg)
3407
0
{
3408
0
    striterobject *it = _striterobject_CAST(op);
3409
0
    Py_VISIT(it->it_seq);
3410
0
    return 0;
3411
0
}
3412
3413
static PyObject *
3414
striter_next(PyObject *op)
3415
2.62k
{
3416
2.62k
    striterobject *it = _striterobject_CAST(op);
3417
2.62k
    PyBytesObject *seq;
3418
3419
2.62k
    assert(it != NULL);
3420
2.62k
    seq = it->it_seq;
3421
2.62k
    if (seq == NULL)
3422
0
        return NULL;
3423
2.62k
    assert(PyBytes_Check(seq));
3424
3425
2.62k
    if (it->it_index < PyBytes_GET_SIZE(seq)) {
3426
2.56k
        return _PyLong_FromUnsignedChar(
3427
2.56k
            (unsigned char)seq->ob_sval[it->it_index++]);
3428
2.56k
    }
3429
3430
62
    it->it_seq = NULL;
3431
62
    Py_DECREF(seq);
3432
62
    return NULL;
3433
2.62k
}
3434
3435
static PyObject *
3436
striter_len(PyObject *op, PyObject *Py_UNUSED(ignored))
3437
0
{
3438
0
    striterobject *it = _striterobject_CAST(op);
3439
0
    Py_ssize_t len = 0;
3440
0
    if (it->it_seq)
3441
0
        len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3442
0
    return PyLong_FromSsize_t(len);
3443
0
}
3444
3445
PyDoc_STRVAR(length_hint_doc,
3446
             "Private method returning an estimate of len(list(it)).");
3447
3448
static PyObject *
3449
striter_reduce(PyObject *op, PyObject *Py_UNUSED(ignored))
3450
0
{
3451
0
    PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));
3452
3453
    /* _PyEval_GetBuiltin can invoke arbitrary code,
3454
     * call must be before access of iterator pointers.
3455
     * see issue #101765 */
3456
0
    striterobject *it = _striterobject_CAST(op);
3457
0
    if (it->it_seq != NULL) {
3458
0
        return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
3459
0
    } else {
3460
0
        return Py_BuildValue("N(())", iter);
3461
0
    }
3462
0
}
3463
3464
PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3465
3466
static PyObject *
3467
striter_setstate(PyObject *op, PyObject *state)
3468
0
{
3469
0
    Py_ssize_t index = PyLong_AsSsize_t(state);
3470
0
    if (index == -1 && PyErr_Occurred())
3471
0
        return NULL;
3472
0
    striterobject *it = _striterobject_CAST(op);
3473
0
    if (it->it_seq != NULL) {
3474
0
        if (index < 0)
3475
0
            index = 0;
3476
0
        else if (index > PyBytes_GET_SIZE(it->it_seq))
3477
0
            index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3478
0
        it->it_index = index;
3479
0
    }
3480
0
    Py_RETURN_NONE;
3481
0
}
3482
3483
PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3484
3485
static PyMethodDef striter_methods[] = {
3486
    {"__length_hint__", striter_len, METH_NOARGS, length_hint_doc},
3487
    {"__reduce__",      striter_reduce, METH_NOARGS, reduce_doc},
3488
    {"__setstate__",    striter_setstate, METH_O, setstate_doc},
3489
    {NULL,              NULL}           /* sentinel */
3490
};
3491
3492
PyTypeObject PyBytesIter_Type = {
3493
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3494
    "bytes_iterator",                           /* tp_name */
3495
    sizeof(striterobject),                      /* tp_basicsize */
3496
    0,                                          /* tp_itemsize */
3497
    /* methods */
3498
    striter_dealloc,                            /* tp_dealloc */
3499
    0,                                          /* tp_vectorcall_offset */
3500
    0,                                          /* tp_getattr */
3501
    0,                                          /* tp_setattr */
3502
    0,                                          /* tp_as_async */
3503
    0,                                          /* tp_repr */
3504
    0,                                          /* tp_as_number */
3505
    0,                                          /* tp_as_sequence */
3506
    0,                                          /* tp_as_mapping */
3507
    0,                                          /* tp_hash */
3508
    0,                                          /* tp_call */
3509
    0,                                          /* tp_str */
3510
    PyObject_GenericGetAttr,                    /* tp_getattro */
3511
    0,                                          /* tp_setattro */
3512
    0,                                          /* tp_as_buffer */
3513
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3514
    0,                                          /* tp_doc */
3515
    striter_traverse,                           /* tp_traverse */
3516
    0,                                          /* tp_clear */
3517
    0,                                          /* tp_richcompare */
3518
    0,                                          /* tp_weaklistoffset */
3519
    PyObject_SelfIter,                          /* tp_iter */
3520
    striter_next,                               /* tp_iternext */
3521
    striter_methods,                            /* tp_methods */
3522
    0,
3523
};
3524
3525
static PyObject *
3526
bytes_iter(PyObject *seq)
3527
98
{
3528
98
    striterobject *it;
3529
3530
98
    if (!PyBytes_Check(seq)) {
3531
0
        PyErr_BadInternalCall();
3532
0
        return NULL;
3533
0
    }
3534
98
    it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3535
98
    if (it == NULL)
3536
0
        return NULL;
3537
98
    it->it_index = 0;
3538
98
    it->it_seq = (PyBytesObject *)Py_NewRef(seq);
3539
98
    _PyObject_GC_TRACK(it);
3540
98
    return (PyObject *)it;
3541
98
}
3542
3543
3544
void
3545
_PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
3546
    const char* src, Py_ssize_t len_src)
3547
194k
{
3548
194k
    if (len_dest == 0) {
3549
717
        return;
3550
717
    }
3551
193k
    if (len_src == 1) {
3552
191k
        memset(dest, src[0], len_dest);
3553
191k
    }
3554
2.40k
    else {
3555
2.40k
        if (src != dest) {
3556
2.40k
            memcpy(dest, src, len_src);
3557
2.40k
        }
3558
2.40k
        Py_ssize_t copied = len_src;
3559
5.66k
        while (copied < len_dest) {
3560
3.26k
            Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied);
3561
3.26k
            memcpy(dest + copied, dest, bytes_to_copy);
3562
3.26k
            copied += bytes_to_copy;
3563
3.26k
        }
3564
2.40k
    }
3565
193k
}
3566
3567
3568
// --- PyBytesWriter API -----------------------------------------------------
3569
3570
static inline char*
3571
byteswriter_data(PyBytesWriter *writer)
3572
36.1M
{
3573
36.1M
    return _PyBytesWriter_GetData(writer);
3574
36.1M
}
3575
3576
3577
static inline Py_ssize_t
3578
byteswriter_allocated(PyBytesWriter *writer)
3579
35.9M
{
3580
35.9M
    if (writer->obj == NULL) {
3581
35.0M
        return sizeof(writer->small_buffer);
3582
35.0M
    }
3583
911k
    else if (writer->use_bytearray) {
3584
0
        return PyByteArray_GET_SIZE(writer->obj);
3585
0
    }
3586
911k
    else {
3587
911k
        return PyBytes_GET_SIZE(writer->obj);
3588
911k
    }
3589
35.9M
}
3590
3591
3592
#ifdef MS_WINDOWS
3593
   /* On Windows, overallocate by 50% is the best factor */
3594
#  define OVERALLOCATE_FACTOR 2
3595
#else
3596
   /* On Linux, overallocate by 25% is the best factor */
3597
46.3k
#  define OVERALLOCATE_FACTOR 4
3598
#endif
3599
3600
static inline int
3601
byteswriter_resize(PyBytesWriter *writer, Py_ssize_t size, int resize)
3602
25.9M
{
3603
25.9M
    assert(size >= 0);
3604
3605
25.9M
    Py_ssize_t old_allocated = byteswriter_allocated(writer);
3606
25.9M
    if (size <= old_allocated) {
3607
24.8M
        return 0;
3608
24.8M
    }
3609
3610
1.11M
    if (resize & writer->overallocate) {
3611
23.1k
        if (size <= (PY_SSIZE_T_MAX - size / OVERALLOCATE_FACTOR)) {
3612
23.1k
            size += size / OVERALLOCATE_FACTOR;
3613
23.1k
        }
3614
23.1k
    }
3615
3616
1.11M
    if (writer->obj != NULL) {
3617
23.1k
        if (writer->use_bytearray) {
3618
0
            if (PyByteArray_Resize(writer->obj, size)) {
3619
0
                return -1;
3620
0
            }
3621
0
        }
3622
23.1k
        else {
3623
23.1k
            if (_PyBytes_Resize(&writer->obj, size)) {
3624
0
                return -1;
3625
0
            }
3626
23.1k
        }
3627
23.1k
        assert(writer->obj != NULL);
3628
23.1k
    }
3629
1.09M
    else if (writer->use_bytearray) {
3630
0
        writer->obj = PyByteArray_FromStringAndSize(NULL, size);
3631
0
        if (writer->obj == NULL) {
3632
0
            return -1;
3633
0
        }
3634
0
        if (resize) {
3635
0
            assert((size_t)size > sizeof(writer->small_buffer));
3636
0
            memcpy(PyByteArray_AS_STRING(writer->obj),
3637
0
                   writer->small_buffer,
3638
0
                   sizeof(writer->small_buffer));
3639
0
        }
3640
0
    }
3641
1.09M
    else {
3642
1.09M
        writer->obj = PyBytes_FromStringAndSize(NULL, size);
3643
1.09M
        if (writer->obj == NULL) {
3644
0
            return -1;
3645
0
        }
3646
1.09M
        if (resize) {
3647
0
            assert((size_t)size > sizeof(writer->small_buffer));
3648
0
            memcpy(PyBytes_AS_STRING(writer->obj),
3649
0
                   writer->small_buffer,
3650
0
                   sizeof(writer->small_buffer));
3651
0
        }
3652
1.09M
    }
3653
3654
#ifdef Py_DEBUG
3655
    Py_ssize_t allocated = byteswriter_allocated(writer);
3656
    if (resize && allocated > old_allocated) {
3657
        memset(byteswriter_data(writer) + old_allocated, 0xff,
3658
               allocated - old_allocated);
3659
    }
3660
#endif
3661
3662
1.11M
    return 0;
3663
1.11M
}
3664
3665
3666
static PyBytesWriter*
3667
byteswriter_create(Py_ssize_t size, int use_bytearray)
3668
25.9M
{
3669
25.9M
    if (size < 0) {
3670
0
        PyErr_SetString(PyExc_ValueError, "size must be >= 0");
3671
0
        return NULL;
3672
0
    }
3673
3674
25.9M
    PyBytesWriter *writer = _Py_FREELIST_POP_MEM(bytes_writers);
3675
25.9M
    if (writer == NULL) {
3676
13.2k
        writer = (PyBytesWriter *)PyMem_Malloc(sizeof(PyBytesWriter));
3677
13.2k
        if (writer == NULL) {
3678
0
            PyErr_NoMemory();
3679
0
            return NULL;
3680
0
        }
3681
13.2k
    }
3682
25.9M
    writer->obj = NULL;
3683
25.9M
    writer->size = 0;
3684
25.9M
    writer->use_bytearray = use_bytearray;
3685
25.9M
    writer->overallocate = !use_bytearray;
3686
3687
25.9M
    if (size >= 1) {
3688
25.9M
        if (byteswriter_resize(writer, size, 0) < 0) {
3689
0
            PyBytesWriter_Discard(writer);
3690
0
            return NULL;
3691
0
        }
3692
25.9M
        writer->size = size;
3693
25.9M
    }
3694
#ifdef Py_DEBUG
3695
    memset(byteswriter_data(writer), 0xff, byteswriter_allocated(writer));
3696
#endif
3697
25.9M
    return writer;
3698
25.9M
}
3699
3700
PyBytesWriter*
3701
PyBytesWriter_Create(Py_ssize_t size)
3702
25.9M
{
3703
25.9M
    return byteswriter_create(size, 0);
3704
25.9M
}
3705
3706
PyBytesWriter*
3707
_PyBytesWriter_CreateByteArray(Py_ssize_t size)
3708
0
{
3709
0
    return byteswriter_create(size, 1);
3710
0
}
3711
3712
3713
void
3714
PyBytesWriter_Discard(PyBytesWriter *writer)
3715
26.1M
{
3716
26.1M
    if (writer == NULL) {
3717
151k
        return;
3718
151k
    }
3719
3720
25.9M
    Py_XDECREF(writer->obj);
3721
25.9M
    _Py_FREELIST_FREE(bytes_writers, writer, PyMem_Free);
3722
25.9M
}
3723
3724
3725
PyObject*
3726
PyBytesWriter_FinishWithSize(PyBytesWriter *writer, Py_ssize_t size)
3727
25.5M
{
3728
25.5M
    PyObject *result;
3729
25.5M
    if (size == 0) {
3730
54.2k
        result = bytes_get_empty();
3731
54.2k
    }
3732
25.4M
    else if (writer->obj != NULL) {
3733
980k
        if (writer->use_bytearray) {
3734
0
            if (size != PyByteArray_GET_SIZE(writer->obj)) {
3735
0
                if (PyByteArray_Resize(writer->obj, size)) {
3736
0
                    goto error;
3737
0
                }
3738
0
            }
3739
0
        }
3740
980k
        else {
3741
980k
            if (size != PyBytes_GET_SIZE(writer->obj)) {
3742
935k
                if (_PyBytes_Resize(&writer->obj, size)) {
3743
0
                    goto error;
3744
0
                }
3745
935k
            }
3746
980k
        }
3747
980k
        result = writer->obj;
3748
980k
        writer->obj = NULL;
3749
980k
    }
3750
24.5M
    else if (writer->use_bytearray) {
3751
0
        result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3752
0
    }
3753
24.5M
    else {
3754
24.5M
        result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3755
24.5M
    }
3756
25.5M
    PyBytesWriter_Discard(writer);
3757
25.5M
    return result;
3758
3759
0
error:
3760
0
    PyBytesWriter_Discard(writer);
3761
0
    return NULL;
3762
25.5M
}
3763
3764
PyObject*
3765
PyBytesWriter_Finish(PyBytesWriter *writer)
3766
15.4M
{
3767
15.4M
    return PyBytesWriter_FinishWithSize(writer, writer->size);
3768
15.4M
}
3769
3770
3771
PyObject*
3772
PyBytesWriter_FinishWithPointer(PyBytesWriter *writer, void *buf)
3773
9.92M
{
3774
9.92M
    Py_ssize_t size = (char*)buf - byteswriter_data(writer);
3775
9.92M
    if (size < 0 || size > byteswriter_allocated(writer)) {
3776
0
        PyBytesWriter_Discard(writer);
3777
0
        PyErr_SetString(PyExc_ValueError, "invalid end pointer");
3778
0
        return NULL;
3779
0
    }
3780
3781
9.92M
    return PyBytesWriter_FinishWithSize(writer, size);
3782
9.92M
}
3783
3784
3785
void*
3786
PyBytesWriter_GetData(PyBytesWriter *writer)
3787
26.1M
{
3788
26.1M
    return byteswriter_data(writer);
3789
26.1M
}
3790
3791
3792
Py_ssize_t
3793
PyBytesWriter_GetSize(PyBytesWriter *writer)
3794
0
{
3795
0
    return _PyBytesWriter_GetSize(writer);
3796
0
}
3797
3798
3799
static Py_ssize_t
3800
_PyBytesWriter_GetAllocated(PyBytesWriter *writer)
3801
12.0k
{
3802
12.0k
    return byteswriter_allocated(writer);
3803
12.0k
}
3804
3805
3806
int
3807
PyBytesWriter_Resize(PyBytesWriter *writer, Py_ssize_t size)
3808
0
{
3809
0
    if (size < 0) {
3810
0
        PyErr_SetString(PyExc_ValueError, "size must be >= 0");
3811
0
        return -1;
3812
0
    }
3813
0
    if (byteswriter_resize(writer, size, 1) < 0) {
3814
0
        return -1;
3815
0
    }
3816
0
    writer->size = size;
3817
0
    return 0;
3818
0
}
3819
3820
3821
static void*
3822
_PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size,
3823
                                      void *data)
3824
0
{
3825
0
    Py_ssize_t pos = (char*)data - byteswriter_data(writer);
3826
0
    if (PyBytesWriter_Resize(writer, size) < 0) {
3827
0
        return NULL;
3828
0
    }
3829
0
    return byteswriter_data(writer) + pos;
3830
0
}
3831
3832
3833
int
3834
PyBytesWriter_Grow(PyBytesWriter *writer, Py_ssize_t size)
3835
23.1k
{
3836
23.1k
    if (size < 0 && writer->size + size < 0) {
3837
0
        PyErr_SetString(PyExc_ValueError, "invalid size");
3838
0
        return -1;
3839
0
    }
3840
23.1k
    if (size > PY_SSIZE_T_MAX - writer->size) {
3841
0
        PyErr_NoMemory();
3842
0
        return -1;
3843
0
    }
3844
23.1k
    size = writer->size + size;
3845
3846
23.1k
    if (byteswriter_resize(writer, size, 1) < 0) {
3847
0
        return -1;
3848
0
    }
3849
23.1k
    writer->size = size;
3850
23.1k
    return 0;
3851
23.1k
}
3852
3853
3854
void*
3855
PyBytesWriter_GrowAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size,
3856
                                   void *buf)
3857
0
{
3858
0
    Py_ssize_t pos = (char*)buf - byteswriter_data(writer);
3859
0
    if (PyBytesWriter_Grow(writer, size) < 0) {
3860
0
        return NULL;
3861
0
    }
3862
0
    return byteswriter_data(writer) + pos;
3863
0
}
3864
3865
3866
int
3867
PyBytesWriter_WriteBytes(PyBytesWriter *writer,
3868
                         const void *bytes, Py_ssize_t size)
3869
0
{
3870
0
    if (size < 0) {
3871
0
        size_t len = strlen(bytes);
3872
0
        if (len > (size_t)PY_SSIZE_T_MAX) {
3873
0
            PyErr_NoMemory();
3874
0
            return -1;
3875
0
        }
3876
0
        size = (Py_ssize_t)len;
3877
0
    }
3878
3879
0
    Py_ssize_t pos = writer->size;
3880
0
    if (PyBytesWriter_Grow(writer, size) < 0) {
3881
0
        return -1;
3882
0
    }
3883
0
    char *buf = byteswriter_data(writer);
3884
0
    memcpy(buf + pos, bytes, size);
3885
0
    return 0;
3886
0
}
3887
3888
3889
int
3890
PyBytesWriter_Format(PyBytesWriter *writer, const char *format, ...)
3891
0
{
3892
0
    Py_ssize_t pos = writer->size;
3893
0
    if (PyBytesWriter_Grow(writer, strlen(format)) < 0) {
3894
0
        return -1;
3895
0
    }
3896
3897
0
    va_list vargs;
3898
0
    va_start(vargs, format);
3899
0
    char *buf = bytes_fromformat(writer, pos, format, vargs);
3900
0
    va_end(vargs);
3901
3902
0
    Py_ssize_t size = buf - byteswriter_data(writer);
3903
0
    return PyBytesWriter_Resize(writer, size);
3904
0
}