Coverage Report

Created: 2025-12-07 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/bytesobject.c
Line
Count
Source
1
/* bytes object implementation */
2
3
#include "Python.h"
4
#include "pycore_abstract.h"      // _PyIndex_Check()
5
#include "pycore_bytes_methods.h" // _Py_bytes_startswith()
6
#include "pycore_bytesobject.h"   // _PyBytes_Find(), _PyBytes_Repeat()
7
#include "pycore_call.h"          // _PyObject_CallNoArgs()
8
#include "pycore_ceval.h"         // _PyEval_GetBuiltin()
9
#include "pycore_format.h"        // F_LJUST
10
#include "pycore_freelist.h"      // _Py_FREELIST_FREE()
11
#include "pycore_global_objects.h"// _Py_GET_GLOBAL_OBJECT()
12
#include "pycore_initconfig.h"    // _PyStatus_OK()
13
#include "pycore_long.h"          // _PyLong_DigitValue
14
#include "pycore_object.h"        // _PyObject_GC_TRACK
15
#include "pycore_pymem.h"         // PYMEM_CLEANBYTE
16
#include "pycore_strhex.h"        // _Py_strhex_with_sep()
17
#include "pycore_unicodeobject.h" // _PyUnicode_FormatLong()
18
19
#include <stddef.h>
20
21
/*[clinic input]
22
class bytes "PyBytesObject *" "&PyBytes_Type"
23
[clinic start generated code]*/
24
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
25
26
#include "clinic/bytesobject.c.h"
27
28
107M
#define PyBytesObject_SIZE _PyBytesObject_SIZE
29
30
/* Forward declaration */
31
static void* _PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer,
32
                                                   Py_ssize_t size, void *data);
33
static Py_ssize_t _PyBytesWriter_GetAllocated(PyBytesWriter *writer);
34
35
36
7.25M
#define CHARACTERS _Py_SINGLETON(bytes_characters)
37
#define CHARACTER(ch) \
38
7.25M
     ((PyBytesObject *)&(CHARACTERS[ch]));
39
5.32M
#define EMPTY (&_Py_SINGLETON(bytes_empty))
40
41
42
// Return a reference to the immortal empty bytes string singleton.
43
static inline PyObject* bytes_get_empty(void)
44
5.32M
{
45
5.32M
    PyObject *empty = &EMPTY->ob_base.ob_base;
46
5.32M
    assert(_Py_IsImmortal(empty));
47
5.32M
    return empty;
48
5.32M
}
49
50
51
static inline void
52
set_ob_shash(PyBytesObject *a, Py_hash_t hash)
53
71.6M
{
54
71.6M
_Py_COMP_DIAG_PUSH
55
71.6M
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
56
#ifdef Py_GIL_DISABLED
57
    _Py_atomic_store_ssize_relaxed(&a->ob_shash, hash);
58
#else
59
71.6M
    a->ob_shash = hash;
60
71.6M
#endif
61
71.6M
_Py_COMP_DIAG_POP
62
71.6M
}
63
64
static inline Py_hash_t
65
get_ob_shash(PyBytesObject *a)
66
37.2M
{
67
37.2M
_Py_COMP_DIAG_PUSH
68
37.2M
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
69
#ifdef Py_GIL_DISABLED
70
    return _Py_atomic_load_ssize_relaxed(&a->ob_shash);
71
#else
72
37.2M
    return a->ob_shash;
73
37.2M
#endif
74
37.2M
_Py_COMP_DIAG_POP
75
37.2M
}
76
77
78
/*
79
   For PyBytes_FromString(), the parameter 'str' points to a null-terminated
80
   string containing exactly 'size' bytes.
81
82
   For PyBytes_FromStringAndSize(), the parameter 'str' is
83
   either NULL or else points to a string containing at least 'size' bytes.
84
   For PyBytes_FromStringAndSize(), the string in the 'str' parameter does
85
   not have to be null-terminated.  (Therefore it is safe to construct a
86
   substring by calling 'PyBytes_FromStringAndSize(origstring, substrlen)'.)
87
   If 'str' is NULL then PyBytes_FromStringAndSize() will allocate 'size+1'
88
   bytes (setting the last byte to the null terminating character) and you can
89
   fill in the data yourself.  If 'str' is non-NULL then the resulting
90
   PyBytes object must be treated as immutable and you must not fill in nor
91
   alter the data yourself, since the strings may be shared.
92
93
   The PyObject member 'op->ob_size', which denotes the number of "extra
94
   items" in a variable-size object, will contain the number of bytes
95
   allocated for string data, not counting the null terminating character.
96
   It is therefore equal to the 'size' parameter (for
97
   PyBytes_FromStringAndSize()) or the length of the string in the 'str'
98
   parameter (for PyBytes_FromString()).
99
*/
100
static PyObject *
101
_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
102
52.6M
{
103
52.6M
    PyBytesObject *op;
104
52.6M
    assert(size >= 0);
105
106
52.6M
    if (size == 0) {
107
0
        return bytes_get_empty();
108
0
    }
109
110
52.6M
    if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
111
0
        PyErr_SetString(PyExc_OverflowError,
112
0
                        "byte string is too large");
113
0
        return NULL;
114
0
    }
115
116
    /* Inline PyObject_NewVar */
117
52.6M
    if (use_calloc)
118
0
        op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
119
52.6M
    else
120
52.6M
        op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
121
52.6M
    if (op == NULL) {
122
0
        return PyErr_NoMemory();
123
0
    }
124
52.6M
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
125
52.6M
    set_ob_shash(op, -1);
126
52.6M
    if (!use_calloc) {
127
52.6M
        op->ob_sval[size] = '\0';
128
52.6M
    }
129
52.6M
    return (PyObject *) op;
130
52.6M
}
131
132
PyObject *
133
PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
134
63.7M
{
135
63.7M
    PyBytesObject *op;
136
63.7M
    if (size < 0) {
137
0
        PyErr_SetString(PyExc_SystemError,
138
0
            "Negative size passed to PyBytes_FromStringAndSize");
139
0
        return NULL;
140
0
    }
141
63.7M
    if (size == 1 && str != NULL) {
142
7.25M
        op = CHARACTER(*str & 255);
143
7.25M
        assert(_Py_IsImmortal(op));
144
7.25M
        return (PyObject *)op;
145
7.25M
    }
146
56.4M
    if (size == 0) {
147
5.29M
        return bytes_get_empty();
148
5.29M
    }
149
150
51.1M
    op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
151
51.1M
    if (op == NULL)
152
0
        return NULL;
153
51.1M
    if (str == NULL)
154
7.20M
        return (PyObject *) op;
155
156
43.9M
    memcpy(op->ob_sval, str, size);
157
43.9M
    return (PyObject *) op;
158
51.1M
}
159
160
PyObject *
161
PyBytes_FromString(const char *str)
162
714
{
163
714
    size_t size;
164
714
    PyBytesObject *op;
165
166
714
    assert(str != NULL);
167
714
    size = strlen(str);
168
714
    if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
169
0
        PyErr_SetString(PyExc_OverflowError,
170
0
            "byte string is too long");
171
0
        return NULL;
172
0
    }
173
174
714
    if (size == 0) {
175
0
        return bytes_get_empty();
176
0
    }
177
714
    else if (size == 1) {
178
0
        op = CHARACTER(*str & 255);
179
0
        assert(_Py_IsImmortal(op));
180
0
        return (PyObject *)op;
181
0
    }
182
183
    /* Inline PyObject_NewVar */
184
714
    op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
185
714
    if (op == NULL) {
186
0
        return PyErr_NoMemory();
187
0
    }
188
714
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
189
714
    set_ob_shash(op, -1);
190
714
    memcpy(op->ob_sval, str, size+1);
191
714
    return (PyObject *) op;
192
714
}
193
194
195
static char*
196
bytes_fromformat(PyBytesWriter *writer, Py_ssize_t writer_pos,
197
                 const char *format, va_list vargs)
198
0
{
199
0
    const char *f;
200
0
    const char *p;
201
0
    Py_ssize_t prec;
202
0
    int longflag;
203
0
    int size_tflag;
204
    /* Longest 64-bit formatted numbers:
205
       - "18446744073709551615\0" (21 bytes)
206
       - "-9223372036854775808\0" (21 bytes)
207
       Decimal takes the most space (it isn't enough for octal.)
208
209
       Longest 64-bit pointer representation:
210
       "0xffffffffffffffff\0" (19 bytes). */
211
0
    char buffer[21];
212
213
0
    char *s = (char*)PyBytesWriter_GetData(writer) + writer_pos;
214
215
0
#define WRITE_BYTES_LEN(str, len_expr) \
216
0
    do { \
217
0
        size_t len = (len_expr); \
218
0
        s = PyBytesWriter_GrowAndUpdatePointer(writer, len, s); \
219
0
        if (s == NULL) { \
220
0
            goto error; \
221
0
        } \
222
0
        memcpy(s, (str), len); \
223
0
        s += len; \
224
0
    } while (0)
225
0
#define WRITE_BYTES(str) WRITE_BYTES_LEN(str, strlen(str))
226
227
0
    for (f = format; *f; f++) {
228
0
        if (*f != '%') {
229
0
            *s++ = *f;
230
0
            continue;
231
0
        }
232
233
0
        p = f++;
234
235
        /* ignore the width (ex: 10 in "%10s") */
236
0
        while (Py_ISDIGIT(*f))
237
0
            f++;
238
239
        /* parse the precision (ex: 10 in "%.10s") */
240
0
        prec = 0;
241
0
        if (*f == '.') {
242
0
            f++;
243
0
            for (; Py_ISDIGIT(*f); f++) {
244
0
                prec = (prec * 10) + (*f - '0');
245
0
            }
246
0
        }
247
248
0
        while (*f && *f != '%' && !Py_ISALPHA(*f))
249
0
            f++;
250
251
        /* handle the long flag ('l'), but only for %ld and %lu.
252
           others can be added when necessary. */
253
0
        longflag = 0;
254
0
        if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
255
0
            longflag = 1;
256
0
            ++f;
257
0
        }
258
259
        /* handle the size_t flag ('z'). */
260
0
        size_tflag = 0;
261
0
        if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
262
0
            size_tflag = 1;
263
0
            ++f;
264
0
        }
265
266
0
        switch (*f) {
267
0
        case 'c':
268
0
        {
269
0
            int c = va_arg(vargs, int);
270
0
            if (c < 0 || c > 255) {
271
0
                PyErr_SetString(PyExc_OverflowError,
272
0
                                "PyBytes_FromFormatV(): %c format "
273
0
                                "expects an integer in range [0; 255]");
274
0
                goto error;
275
0
            }
276
0
            *s++ = (unsigned char)c;
277
0
            break;
278
0
        }
279
280
0
        case 'd':
281
0
            if (longflag) {
282
0
                sprintf(buffer, "%ld", va_arg(vargs, long));
283
0
            }
284
0
            else if (size_tflag) {
285
0
                sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
286
0
            }
287
0
            else {
288
0
                sprintf(buffer, "%d", va_arg(vargs, int));
289
0
            }
290
0
            assert(strlen(buffer) < sizeof(buffer));
291
0
            WRITE_BYTES(buffer);
292
0
            break;
293
294
0
        case 'u':
295
0
            if (longflag) {
296
0
                sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
297
0
            }
298
0
            else if (size_tflag) {
299
0
                sprintf(buffer, "%zu", va_arg(vargs, size_t));
300
0
            }
301
0
            else {
302
0
                sprintf(buffer, "%u", va_arg(vargs, unsigned int));
303
0
            }
304
0
            assert(strlen(buffer) < sizeof(buffer));
305
0
            WRITE_BYTES(buffer);
306
0
            break;
307
308
0
        case 'i':
309
0
            sprintf(buffer, "%i", va_arg(vargs, int));
310
0
            assert(strlen(buffer) < sizeof(buffer));
311
0
            WRITE_BYTES(buffer);
312
0
            break;
313
314
0
        case 'x':
315
0
            sprintf(buffer, "%x", va_arg(vargs, int));
316
0
            assert(strlen(buffer) < sizeof(buffer));
317
0
            WRITE_BYTES(buffer);
318
0
            break;
319
320
0
        case 's':
321
0
        {
322
0
            Py_ssize_t i;
323
324
0
            p = va_arg(vargs, const char*);
325
0
            if (prec <= 0) {
326
0
                i = strlen(p);
327
0
            }
328
0
            else {
329
0
                i = 0;
330
0
                while (i < prec && p[i]) {
331
0
                    i++;
332
0
                }
333
0
            }
334
0
            WRITE_BYTES_LEN(p, i);
335
0
            break;
336
0
        }
337
338
0
        case 'p':
339
0
            sprintf(buffer, "%p", va_arg(vargs, void*));
340
0
            assert(strlen(buffer) < sizeof(buffer));
341
            /* %p is ill-defined:  ensure leading 0x. */
342
0
            if (buffer[1] == 'X')
343
0
                buffer[1] = 'x';
344
0
            else if (buffer[1] != 'x') {
345
0
                memmove(buffer+2, buffer, strlen(buffer)+1);
346
0
                buffer[0] = '0';
347
0
                buffer[1] = 'x';
348
0
            }
349
0
            WRITE_BYTES(buffer);
350
0
            break;
351
352
0
        case '%':
353
0
            *s++ = '%';
354
0
            break;
355
356
0
        default:
357
            /* invalid format string: copy unformatted string and exit */
358
0
            WRITE_BYTES(p);
359
0
            return s;
360
0
        }
361
0
    }
362
363
0
#undef WRITE_BYTES
364
0
#undef WRITE_BYTES_LEN
365
366
0
    return s;
367
368
0
 error:
369
0
    return NULL;
370
0
}
371
372
373
PyObject *
374
PyBytes_FromFormatV(const char *format, va_list vargs)
375
0
{
376
0
    Py_ssize_t alloc = strlen(format);
377
0
    PyBytesWriter *writer = PyBytesWriter_Create(alloc);
378
0
    if (writer == NULL) {
379
0
        return NULL;
380
0
    }
381
382
0
    char *s = bytes_fromformat(writer, 0, format, vargs);
383
0
    if (s == NULL) {
384
0
        PyBytesWriter_Discard(writer);
385
0
        return NULL;
386
0
    }
387
388
0
    return PyBytesWriter_FinishWithPointer(writer, s);
389
0
}
390
391
392
PyObject *
393
PyBytes_FromFormat(const char *format, ...)
394
0
{
395
0
    PyObject* ret;
396
0
    va_list vargs;
397
398
0
    va_start(vargs, format);
399
0
    ret = PyBytes_FromFormatV(format, vargs);
400
0
    va_end(vargs);
401
0
    return ret;
402
0
}
403
404
405
/* Helpers for formatstring */
406
407
Py_LOCAL_INLINE(PyObject *)
408
getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
409
0
{
410
0
    Py_ssize_t argidx = *p_argidx;
411
0
    if (argidx < arglen) {
412
0
        (*p_argidx)++;
413
0
        if (arglen < 0)
414
0
            return args;
415
0
        else
416
0
            return PyTuple_GetItem(args, argidx);
417
0
    }
418
0
    PyErr_SetString(PyExc_TypeError,
419
0
                    "not enough arguments for format string");
420
0
    return NULL;
421
0
}
422
423
/* Returns a new reference to a PyBytes object, or NULL on failure. */
424
425
static char*
426
formatfloat(PyObject *v, int flags, int prec, int type,
427
            PyObject **p_result, PyBytesWriter *writer, char *str)
428
0
{
429
0
    char *p;
430
0
    PyObject *result;
431
0
    double x;
432
0
    size_t len;
433
0
    int dtoa_flags = 0;
434
435
0
    x = PyFloat_AsDouble(v);
436
0
    if (x == -1.0 && PyErr_Occurred()) {
437
0
        PyErr_Format(PyExc_TypeError, "float argument required, "
438
0
                     "not %.200s", Py_TYPE(v)->tp_name);
439
0
        return NULL;
440
0
    }
441
442
0
    if (prec < 0)
443
0
        prec = 6;
444
445
0
    if (flags & F_ALT) {
446
0
        dtoa_flags |= Py_DTSF_ALT;
447
0
    }
448
0
    p = PyOS_double_to_string(x, type, prec, dtoa_flags, NULL);
449
450
0
    if (p == NULL)
451
0
        return NULL;
452
453
0
    len = strlen(p);
454
0
    if (writer != NULL) {
455
0
        str = PyBytesWriter_GrowAndUpdatePointer(writer, len, str);
456
0
        if (str == NULL) {
457
0
            PyMem_Free(p);
458
0
            return NULL;
459
0
        }
460
0
        memcpy(str, p, len);
461
0
        PyMem_Free(p);
462
0
        str += len;
463
0
        return str;
464
0
    }
465
466
0
    result = PyBytes_FromStringAndSize(p, len);
467
0
    PyMem_Free(p);
468
0
    *p_result = result;
469
0
    return result != NULL ? str : NULL;
470
0
}
471
472
static PyObject *
473
formatlong(PyObject *v, int flags, int prec, int type)
474
0
{
475
0
    PyObject *result, *iobj;
476
0
    if (PyLong_Check(v))
477
0
        return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
478
0
    if (PyNumber_Check(v)) {
479
        /* make sure number is a type of integer for o, x, and X */
480
0
        if (type == 'o' || type == 'x' || type == 'X')
481
0
            iobj = _PyNumber_Index(v);
482
0
        else
483
0
            iobj = PyNumber_Long(v);
484
0
        if (iobj != NULL) {
485
0
            assert(PyLong_Check(iobj));
486
0
            result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
487
0
            Py_DECREF(iobj);
488
0
            return result;
489
0
        }
490
0
        if (!PyErr_ExceptionMatches(PyExc_TypeError))
491
0
            return NULL;
492
0
    }
493
0
    PyErr_Format(PyExc_TypeError,
494
0
        "%%%c format: %s is required, not %.200s", type,
495
0
        (type == 'o' || type == 'x' || type == 'X') ? "an integer"
496
0
                                                    : "a real number",
497
0
        Py_TYPE(v)->tp_name);
498
0
    return NULL;
499
0
}
500
501
static int
502
byte_converter(PyObject *arg, char *p)
503
0
{
504
0
    if (PyBytes_Check(arg)) {
505
0
        if (PyBytes_GET_SIZE(arg) != 1) {
506
0
            PyErr_Format(PyExc_TypeError,
507
0
                         "%%c requires an integer in range(256) or "
508
0
                         "a single byte, not a bytes object of length %zd",
509
0
                         PyBytes_GET_SIZE(arg));
510
0
            return 0;
511
0
        }
512
0
        *p = PyBytes_AS_STRING(arg)[0];
513
0
        return 1;
514
0
    }
515
0
    else if (PyByteArray_Check(arg)) {
516
0
        if (PyByteArray_GET_SIZE(arg) != 1) {
517
0
            PyErr_Format(PyExc_TypeError,
518
0
                         "%%c requires an integer in range(256) or "
519
0
                         "a single byte, not a bytearray object of length %zd",
520
0
                         PyByteArray_GET_SIZE(arg));
521
0
            return 0;
522
0
        }
523
0
        *p = PyByteArray_AS_STRING(arg)[0];
524
0
        return 1;
525
0
    }
526
0
    else if (PyIndex_Check(arg)) {
527
0
        int overflow;
528
0
        long ival = PyLong_AsLongAndOverflow(arg, &overflow);
529
0
        if (ival == -1 && PyErr_Occurred()) {
530
0
            return 0;
531
0
        }
532
0
        if (!(0 <= ival && ival <= 255)) {
533
            /* this includes an overflow in converting to C long */
534
0
            PyErr_SetString(PyExc_OverflowError,
535
0
                            "%c arg not in range(256)");
536
0
            return 0;
537
0
        }
538
0
        *p = (char)ival;
539
0
        return 1;
540
0
    }
541
0
    PyErr_Format(PyExc_TypeError,
542
0
        "%%c requires an integer in range(256) or a single byte, not %T",
543
0
        arg);
544
0
    return 0;
545
0
}
546
547
static PyObject *_PyBytes_FromBuffer(PyObject *x);
548
549
static PyObject *
550
format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
551
0
{
552
0
    PyObject *func, *result;
553
    /* is it a bytes object? */
554
0
    if (PyBytes_Check(v)) {
555
0
        *pbuf = PyBytes_AS_STRING(v);
556
0
        *plen = PyBytes_GET_SIZE(v);
557
0
        return Py_NewRef(v);
558
0
    }
559
0
    if (PyByteArray_Check(v)) {
560
0
        *pbuf = PyByteArray_AS_STRING(v);
561
0
        *plen = PyByteArray_GET_SIZE(v);
562
0
        return Py_NewRef(v);
563
0
    }
564
    /* does it support __bytes__? */
565
0
    func = _PyObject_LookupSpecial(v, &_Py_ID(__bytes__));
566
0
    if (func != NULL) {
567
0
        result = _PyObject_CallNoArgs(func);
568
0
        Py_DECREF(func);
569
0
        if (result == NULL)
570
0
            return NULL;
571
0
        if (!PyBytes_Check(result)) {
572
0
            PyErr_Format(PyExc_TypeError,
573
0
                         "%T.__bytes__() must return a bytes, not %T",
574
0
                         v, result);
575
0
            Py_DECREF(result);
576
0
            return NULL;
577
0
        }
578
0
        *pbuf = PyBytes_AS_STRING(result);
579
0
        *plen = PyBytes_GET_SIZE(result);
580
0
        return result;
581
0
    }
582
    /* does it support buffer protocol? */
583
0
    if (PyObject_CheckBuffer(v)) {
584
        /* maybe we can avoid making a copy of the buffer object here? */
585
0
        result = _PyBytes_FromBuffer(v);
586
0
        if (result == NULL)
587
0
            return NULL;
588
0
        *pbuf = PyBytes_AS_STRING(result);
589
0
        *plen = PyBytes_GET_SIZE(result);
590
0
        return result;
591
0
    }
592
0
    PyErr_Format(PyExc_TypeError,
593
0
                 "%%b requires a bytes-like object, "
594
0
                 "or an object that implements __bytes__, not '%.100s'",
595
0
                 Py_TYPE(v)->tp_name);
596
0
    return NULL;
597
0
}
598
599
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
600
601
PyObject *
602
_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
603
                  PyObject *args, int use_bytearray)
604
0
{
605
0
    const char *fmt;
606
0
    Py_ssize_t arglen, argidx;
607
0
    Py_ssize_t fmtcnt;
608
0
    int args_owned = 0;
609
0
    PyObject *dict = NULL;
610
611
0
    if (args == NULL) {
612
0
        PyErr_BadInternalCall();
613
0
        return NULL;
614
0
    }
615
0
    fmt = format;
616
0
    fmtcnt = format_len;
617
618
0
    PyBytesWriter *writer;
619
0
    if (use_bytearray) {
620
0
        writer = _PyBytesWriter_CreateByteArray(fmtcnt);
621
0
    }
622
0
    else {
623
0
        writer = PyBytesWriter_Create(fmtcnt);
624
0
    }
625
0
    if (writer == NULL) {
626
0
        return NULL;
627
0
    }
628
0
    char *res = PyBytesWriter_GetData(writer);
629
630
0
    if (PyTuple_Check(args)) {
631
0
        arglen = PyTuple_GET_SIZE(args);
632
0
        argidx = 0;
633
0
    }
634
0
    else {
635
0
        arglen = -1;
636
0
        argidx = -2;
637
0
    }
638
0
    if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
639
0
        !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
640
0
        !PyByteArray_Check(args)) {
641
0
            dict = args;
642
0
    }
643
644
0
    while (--fmtcnt >= 0) {
645
0
        if (*fmt != '%') {
646
0
            Py_ssize_t len;
647
0
            char *pos;
648
649
0
            pos = (char *)memchr(fmt + 1, '%', fmtcnt);
650
0
            if (pos != NULL)
651
0
                len = pos - fmt;
652
0
            else
653
0
                len = fmtcnt + 1;
654
0
            assert(len != 0);
655
656
0
            memcpy(res, fmt, len);
657
0
            res += len;
658
0
            fmt += len;
659
0
            fmtcnt -= (len - 1);
660
0
        }
661
0
        else {
662
            /* Got a format specifier */
663
0
            int flags = 0;
664
0
            Py_ssize_t width = -1;
665
0
            int prec = -1;
666
0
            int c = '\0';
667
0
            int fill;
668
0
            PyObject *v = NULL;
669
0
            PyObject *temp = NULL;
670
0
            const char *pbuf = NULL;
671
0
            int sign;
672
0
            Py_ssize_t len = 0;
673
0
            char onechar; /* For byte_converter() */
674
0
            Py_ssize_t alloc;
675
676
0
            fmt++;
677
0
            if (*fmt == '%') {
678
0
                *res++ = '%';
679
0
                fmt++;
680
0
                fmtcnt--;
681
0
                continue;
682
0
            }
683
0
            if (*fmt == '(') {
684
0
                const char *keystart;
685
0
                Py_ssize_t keylen;
686
0
                PyObject *key;
687
0
                int pcount = 1;
688
689
0
                if (dict == NULL) {
690
0
                    PyErr_SetString(PyExc_TypeError,
691
0
                             "format requires a mapping");
692
0
                    goto error;
693
0
                }
694
0
                ++fmt;
695
0
                --fmtcnt;
696
0
                keystart = fmt;
697
                /* Skip over balanced parentheses */
698
0
                while (pcount > 0 && --fmtcnt >= 0) {
699
0
                    if (*fmt == ')')
700
0
                        --pcount;
701
0
                    else if (*fmt == '(')
702
0
                        ++pcount;
703
0
                    fmt++;
704
0
                }
705
0
                keylen = fmt - keystart - 1;
706
0
                if (fmtcnt < 0 || pcount > 0) {
707
0
                    PyErr_SetString(PyExc_ValueError,
708
0
                               "incomplete format key");
709
0
                    goto error;
710
0
                }
711
0
                key = PyBytes_FromStringAndSize(keystart,
712
0
                                                 keylen);
713
0
                if (key == NULL)
714
0
                    goto error;
715
0
                if (args_owned) {
716
0
                    Py_DECREF(args);
717
0
                    args_owned = 0;
718
0
                }
719
0
                args = PyObject_GetItem(dict, key);
720
0
                Py_DECREF(key);
721
0
                if (args == NULL) {
722
0
                    goto error;
723
0
                }
724
0
                args_owned = 1;
725
0
                arglen = -1;
726
0
                argidx = -2;
727
0
            }
728
729
            /* Parse flags. Example: "%+i" => flags=F_SIGN. */
730
0
            while (--fmtcnt >= 0) {
731
0
                switch (c = *fmt++) {
732
0
                case '-': flags |= F_LJUST; continue;
733
0
                case '+': flags |= F_SIGN; continue;
734
0
                case ' ': flags |= F_BLANK; continue;
735
0
                case '#': flags |= F_ALT; continue;
736
0
                case '0': flags |= F_ZERO; continue;
737
0
                }
738
0
                break;
739
0
            }
740
741
            /* Parse width. Example: "%10s" => width=10 */
742
0
            if (c == '*') {
743
0
                v = getnextarg(args, arglen, &argidx);
744
0
                if (v == NULL)
745
0
                    goto error;
746
0
                if (!PyLong_Check(v)) {
747
0
                    PyErr_SetString(PyExc_TypeError,
748
0
                                    "* wants int");
749
0
                    goto error;
750
0
                }
751
0
                width = PyLong_AsSsize_t(v);
752
0
                if (width == -1 && PyErr_Occurred())
753
0
                    goto error;
754
0
                if (width < 0) {
755
0
                    flags |= F_LJUST;
756
0
                    width = -width;
757
0
                }
758
0
                if (--fmtcnt >= 0)
759
0
                    c = *fmt++;
760
0
            }
761
0
            else if (c >= 0 && Py_ISDIGIT(c)) {
762
0
                width = c - '0';
763
0
                while (--fmtcnt >= 0) {
764
0
                    c = Py_CHARMASK(*fmt++);
765
0
                    if (!Py_ISDIGIT(c))
766
0
                        break;
767
0
                    if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
768
0
                        PyErr_SetString(
769
0
                            PyExc_ValueError,
770
0
                            "width too big");
771
0
                        goto error;
772
0
                    }
773
0
                    width = width*10 + (c - '0');
774
0
                }
775
0
            }
776
777
            /* Parse precision. Example: "%.3f" => prec=3 */
778
0
            if (c == '.') {
779
0
                prec = 0;
780
0
                if (--fmtcnt >= 0)
781
0
                    c = *fmt++;
782
0
                if (c == '*') {
783
0
                    v = getnextarg(args, arglen, &argidx);
784
0
                    if (v == NULL)
785
0
                        goto error;
786
0
                    if (!PyLong_Check(v)) {
787
0
                        PyErr_SetString(
788
0
                            PyExc_TypeError,
789
0
                            "* wants int");
790
0
                        goto error;
791
0
                    }
792
0
                    prec = PyLong_AsInt(v);
793
0
                    if (prec == -1 && PyErr_Occurred())
794
0
                        goto error;
795
0
                    if (prec < 0)
796
0
                        prec = 0;
797
0
                    if (--fmtcnt >= 0)
798
0
                        c = *fmt++;
799
0
                }
800
0
                else if (c >= 0 && Py_ISDIGIT(c)) {
801
0
                    prec = c - '0';
802
0
                    while (--fmtcnt >= 0) {
803
0
                        c = Py_CHARMASK(*fmt++);
804
0
                        if (!Py_ISDIGIT(c))
805
0
                            break;
806
0
                        if (prec > (INT_MAX - ((int)c - '0')) / 10) {
807
0
                            PyErr_SetString(
808
0
                                PyExc_ValueError,
809
0
                                "prec too big");
810
0
                            goto error;
811
0
                        }
812
0
                        prec = prec*10 + (c - '0');
813
0
                    }
814
0
                }
815
0
            } /* prec */
816
0
            if (fmtcnt >= 0) {
817
0
                if (c == 'h' || c == 'l' || c == 'L') {
818
0
                    if (--fmtcnt >= 0)
819
0
                        c = *fmt++;
820
0
                }
821
0
            }
822
0
            if (fmtcnt < 0) {
823
0
                PyErr_SetString(PyExc_ValueError,
824
0
                                "incomplete format");
825
0
                goto error;
826
0
            }
827
0
            v = getnextarg(args, arglen, &argidx);
828
0
            if (v == NULL)
829
0
                goto error;
830
831
0
            if (fmtcnt == 0) {
832
                /* last write: disable writer overallocation */
833
0
                writer->overallocate = 0;
834
0
            }
835
836
0
            sign = 0;
837
0
            fill = ' ';
838
0
            switch (c) {
839
0
            case 'r':
840
                // %r is only for 2/3 code; 3 only code should use %a
841
0
            case 'a':
842
0
                temp = PyObject_ASCII(v);
843
0
                if (temp == NULL)
844
0
                    goto error;
845
0
                assert(PyUnicode_IS_ASCII(temp));
846
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
847
0
                len = PyUnicode_GET_LENGTH(temp);
848
0
                if (prec >= 0 && len > prec)
849
0
                    len = prec;
850
0
                break;
851
852
0
            case 's':
853
                // %s is only for 2/3 code; 3 only code should use %b
854
0
            case 'b':
855
0
                temp = format_obj(v, &pbuf, &len);
856
0
                if (temp == NULL)
857
0
                    goto error;
858
0
                if (prec >= 0 && len > prec)
859
0
                    len = prec;
860
0
                break;
861
862
0
            case 'i':
863
0
            case 'd':
864
0
            case 'u':
865
0
            case 'o':
866
0
            case 'x':
867
0
            case 'X':
868
0
                if (PyLong_CheckExact(v)
869
0
                    && width == -1 && prec == -1
870
0
                    && !(flags & (F_SIGN | F_BLANK))
871
0
                    && c != 'X')
872
0
                {
873
                    /* Fast path */
874
0
                    int alternate = flags & F_ALT;
875
0
                    int base;
876
877
0
                    switch(c)
878
0
                    {
879
0
                        default:
880
0
                            Py_UNREACHABLE();
881
0
                        case 'd':
882
0
                        case 'i':
883
0
                        case 'u':
884
0
                            base = 10;
885
0
                            break;
886
0
                        case 'o':
887
0
                            base = 8;
888
0
                            break;
889
0
                        case 'x':
890
0
                        case 'X':
891
0
                            base = 16;
892
0
                            break;
893
0
                    }
894
895
                    /* Fast path */
896
0
                    res = _PyLong_FormatBytesWriter(writer, res,
897
0
                                                    v, base, alternate);
898
0
                    if (res == NULL)
899
0
                        goto error;
900
0
                    continue;
901
0
                }
902
903
0
                temp = formatlong(v, flags, prec, c);
904
0
                if (!temp)
905
0
                    goto error;
906
0
                assert(PyUnicode_IS_ASCII(temp));
907
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
908
0
                len = PyUnicode_GET_LENGTH(temp);
909
0
                sign = 1;
910
0
                if (flags & F_ZERO)
911
0
                    fill = '0';
912
0
                break;
913
914
0
            case 'e':
915
0
            case 'E':
916
0
            case 'f':
917
0
            case 'F':
918
0
            case 'g':
919
0
            case 'G':
920
0
                if (width == -1 && prec == -1
921
0
                    && !(flags & (F_SIGN | F_BLANK)))
922
0
                {
923
                    /* Fast path */
924
0
                    res = formatfloat(v, flags, prec, c, NULL, writer, res);
925
0
                    if (res == NULL)
926
0
                        goto error;
927
0
                    continue;
928
0
                }
929
930
0
                if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
931
0
                    goto error;
932
0
                pbuf = PyBytes_AS_STRING(temp);
933
0
                len = PyBytes_GET_SIZE(temp);
934
0
                sign = 1;
935
0
                if (flags & F_ZERO)
936
0
                    fill = '0';
937
0
                break;
938
939
0
            case 'c':
940
0
                pbuf = &onechar;
941
0
                len = byte_converter(v, &onechar);
942
0
                if (!len)
943
0
                    goto error;
944
0
                if (width == -1) {
945
                    /* Fast path */
946
0
                    *res++ = onechar;
947
0
                    continue;
948
0
                }
949
0
                break;
950
951
0
            default:
952
0
                PyErr_Format(PyExc_ValueError,
953
0
                  "unsupported format character '%c' (0x%x) "
954
0
                  "at index %zd",
955
0
                  c, c,
956
0
                  (Py_ssize_t)(fmt - 1 - format));
957
0
                goto error;
958
0
            }
959
960
0
            if (sign) {
961
0
                if (*pbuf == '-' || *pbuf == '+') {
962
0
                    sign = *pbuf++;
963
0
                    len--;
964
0
                }
965
0
                else if (flags & F_SIGN)
966
0
                    sign = '+';
967
0
                else if (flags & F_BLANK)
968
0
                    sign = ' ';
969
0
                else
970
0
                    sign = 0;
971
0
            }
972
0
            if (width < len)
973
0
                width = len;
974
975
0
            alloc = width;
976
0
            if (sign != 0 && len == width)
977
0
                alloc++;
978
            /* 2: size preallocated for %s */
979
0
            if (alloc > 2) {
980
0
                res = PyBytesWriter_GrowAndUpdatePointer(writer, alloc - 2, res);
981
0
                if (res == NULL) {
982
0
                    Py_XDECREF(temp);
983
0
                    goto error;
984
0
                }
985
0
            }
986
#ifndef NDEBUG
987
            char *before = res;
988
#endif
989
990
            /* Write the sign if needed */
991
0
            if (sign) {
992
0
                if (fill != ' ')
993
0
                    *res++ = sign;
994
0
                if (width > len)
995
0
                    width--;
996
0
            }
997
998
            /* Write the numeric prefix for "x", "X" and "o" formats
999
               if the alternate form is used.
1000
               For example, write "0x" for the "%#x" format. */
1001
0
            if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1002
0
                assert(pbuf[0] == '0');
1003
0
                assert(pbuf[1] == c);
1004
0
                if (fill != ' ') {
1005
0
                    *res++ = *pbuf++;
1006
0
                    *res++ = *pbuf++;
1007
0
                }
1008
0
                width -= 2;
1009
0
                if (width < 0)
1010
0
                    width = 0;
1011
0
                len -= 2;
1012
0
            }
1013
1014
            /* Pad left with the fill character if needed */
1015
0
            if (width > len && !(flags & F_LJUST)) {
1016
0
                memset(res, fill, width - len);
1017
0
                res += (width - len);
1018
0
                width = len;
1019
0
            }
1020
1021
            /* If padding with spaces: write sign if needed and/or numeric
1022
               prefix if the alternate form is used */
1023
0
            if (fill == ' ') {
1024
0
                if (sign)
1025
0
                    *res++ = sign;
1026
0
                if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1027
0
                    assert(pbuf[0] == '0');
1028
0
                    assert(pbuf[1] == c);
1029
0
                    *res++ = *pbuf++;
1030
0
                    *res++ = *pbuf++;
1031
0
                }
1032
0
            }
1033
1034
            /* Copy bytes */
1035
0
            memcpy(res, pbuf, len);
1036
0
            res += len;
1037
1038
            /* Pad right with the fill character if needed */
1039
0
            if (width > len) {
1040
0
                memset(res, ' ', width - len);
1041
0
                res += (width - len);
1042
0
            }
1043
1044
0
            if (dict && (argidx < arglen)) {
1045
0
                PyErr_SetString(PyExc_TypeError,
1046
0
                           "not all arguments converted during bytes formatting");
1047
0
                Py_XDECREF(temp);
1048
0
                goto error;
1049
0
            }
1050
0
            Py_XDECREF(temp);
1051
1052
#ifndef NDEBUG
1053
            /* check that we computed the exact size for this write */
1054
            assert((res - before) == alloc);
1055
#endif
1056
0
        } /* '%' */
1057
1058
        /* If overallocation was disabled, ensure that it was the last
1059
           write. Otherwise, we missed an optimization */
1060
0
        assert(writer->overallocate || fmtcnt == 0 || use_bytearray);
1061
0
    } /* until end */
1062
1063
0
    if (argidx < arglen && !dict) {
1064
0
        PyErr_SetString(PyExc_TypeError,
1065
0
                        "not all arguments converted during bytes formatting");
1066
0
        goto error;
1067
0
    }
1068
1069
0
    if (args_owned) {
1070
0
        Py_DECREF(args);
1071
0
    }
1072
0
    return PyBytesWriter_FinishWithPointer(writer, res);
1073
1074
0
 error:
1075
0
    PyBytesWriter_Discard(writer);
1076
0
    if (args_owned) {
1077
0
        Py_DECREF(args);
1078
0
    }
1079
0
    return NULL;
1080
0
}
1081
1082
/* Unescape a backslash-escaped string. */
1083
PyObject *_PyBytes_DecodeEscape2(const char *s,
1084
                                Py_ssize_t len,
1085
                                const char *errors,
1086
                                int *first_invalid_escape_char,
1087
                                const char **first_invalid_escape_ptr)
1088
2.37k
{
1089
2.37k
    PyBytesWriter *writer = PyBytesWriter_Create(len);
1090
2.37k
    if (writer == NULL) {
1091
0
        return NULL;
1092
0
    }
1093
2.37k
    char *p = PyBytesWriter_GetData(writer);
1094
1095
2.37k
    *first_invalid_escape_char = -1;
1096
2.37k
    *first_invalid_escape_ptr = NULL;
1097
1098
2.37k
    const char *end = s + len;
1099
62.3k
    while (s < end) {
1100
60.0k
        if (*s != '\\') {
1101
48.7k
            *p++ = *s++;
1102
48.7k
            continue;
1103
48.7k
        }
1104
1105
11.2k
        s++;
1106
11.2k
        if (s == end) {
1107
0
            PyErr_SetString(PyExc_ValueError,
1108
0
                            "Trailing \\ in string");
1109
0
            goto failed;
1110
0
        }
1111
1112
11.2k
        switch (*s++) {
1113
        /* XXX This assumes ASCII! */
1114
922
        case '\n': break;
1115
663
        case '\\': *p++ = '\\'; break;
1116
263
        case '\'': *p++ = '\''; break;
1117
704
        case '\"': *p++ = '\"'; break;
1118
228
        case 'b': *p++ = '\b'; break;
1119
407
        case 'f': *p++ = '\014'; break; /* FF */
1120
234
        case 't': *p++ = '\t'; break;
1121
404
        case 'n': *p++ = '\n'; break;
1122
515
        case 'r': *p++ = '\r'; break;
1123
230
        case 'v': *p++ = '\013'; break; /* VT */
1124
201
        case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1125
1.96k
        case '0': case '1': case '2': case '3':
1126
4.12k
        case '4': case '5': case '6': case '7':
1127
4.12k
        {
1128
4.12k
            int c = s[-1] - '0';
1129
4.12k
            if (s < end && '0' <= *s && *s <= '7') {
1130
1.56k
                c = (c<<3) + *s++ - '0';
1131
1.56k
                if (s < end && '0' <= *s && *s <= '7')
1132
766
                    c = (c<<3) + *s++ - '0';
1133
1.56k
            }
1134
4.12k
            if (c > 0377) {
1135
578
                if (*first_invalid_escape_char == -1) {
1136
146
                    *first_invalid_escape_char = c;
1137
                    /* Back up 3 chars, since we've already incremented s. */
1138
146
                    *first_invalid_escape_ptr = s - 3;
1139
146
                }
1140
578
            }
1141
4.12k
            *p++ = c;
1142
4.12k
            break;
1143
3.30k
        }
1144
1.16k
        case 'x':
1145
1.16k
            if (s+1 < end) {
1146
1.16k
                int digit1, digit2;
1147
1.16k
                digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1148
1.16k
                digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1149
1.16k
                if (digit1 < 16 && digit2 < 16) {
1150
1.16k
                    *p++ = (unsigned char)((digit1 << 4) + digit2);
1151
1.16k
                    s += 2;
1152
1.16k
                    break;
1153
1.16k
                }
1154
1.16k
            }
1155
            /* invalid hexadecimal digits */
1156
1157
5
            if (!errors || strcmp(errors, "strict") == 0) {
1158
5
                PyErr_Format(PyExc_ValueError,
1159
5
                             "invalid \\x escape at position %zd",
1160
5
                             s - 2 - (end - len));
1161
5
                goto failed;
1162
5
            }
1163
0
            if (strcmp(errors, "replace") == 0) {
1164
0
                *p++ = '?';
1165
0
            } else if (strcmp(errors, "ignore") == 0)
1166
0
                /* do nothing */;
1167
0
            else {
1168
0
                PyErr_Format(PyExc_ValueError,
1169
0
                             "decoding error; unknown "
1170
0
                             "error handling code: %.400s",
1171
0
                             errors);
1172
0
                goto failed;
1173
0
            }
1174
            /* skip \x */
1175
0
            if (s < end && Py_ISXDIGIT(s[0]))
1176
0
                s++; /* and a hexdigit */
1177
0
            break;
1178
1179
1.20k
        default:
1180
1.20k
            if (*first_invalid_escape_char == -1) {
1181
626
                *first_invalid_escape_char = (unsigned char)s[-1];
1182
                /* Back up one char, since we've already incremented s. */
1183
626
                *first_invalid_escape_ptr = s - 1;
1184
626
            }
1185
1.20k
            *p++ = '\\';
1186
1.20k
            s--;
1187
11.2k
        }
1188
11.2k
    }
1189
1190
2.36k
    return PyBytesWriter_FinishWithPointer(writer, p);
1191
1192
5
  failed:
1193
5
    PyBytesWriter_Discard(writer);
1194
5
    return NULL;
1195
2.37k
}
1196
1197
PyObject *PyBytes_DecodeEscape(const char *s,
1198
                                Py_ssize_t len,
1199
                                const char *errors,
1200
                                Py_ssize_t Py_UNUSED(unicode),
1201
                                const char *Py_UNUSED(recode_encoding))
1202
0
{
1203
0
    int first_invalid_escape_char;
1204
0
    const char *first_invalid_escape_ptr;
1205
0
    PyObject *result = _PyBytes_DecodeEscape2(s, len, errors,
1206
0
                                             &first_invalid_escape_char,
1207
0
                                             &first_invalid_escape_ptr);
1208
0
    if (result == NULL)
1209
0
        return NULL;
1210
0
    if (first_invalid_escape_char != -1) {
1211
0
        if (first_invalid_escape_char > 0xff) {
1212
0
            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1213
0
                                 "b\"\\%o\" is an invalid octal escape sequence. "
1214
0
                                 "Such sequences will not work in the future. ",
1215
0
                                 first_invalid_escape_char) < 0)
1216
0
            {
1217
0
                Py_DECREF(result);
1218
0
                return NULL;
1219
0
            }
1220
0
        }
1221
0
        else {
1222
0
            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1223
0
                                 "b\"\\%c\" is an invalid escape sequence. "
1224
0
                                 "Such sequences will not work in the future. ",
1225
0
                                 first_invalid_escape_char) < 0)
1226
0
            {
1227
0
                Py_DECREF(result);
1228
0
                return NULL;
1229
0
            }
1230
0
        }
1231
0
    }
1232
0
    return result;
1233
0
}
1234
/* -------------------------------------------------------------------- */
1235
/* object api */
1236
1237
Py_ssize_t
1238
PyBytes_Size(PyObject *op)
1239
5.12k
{
1240
5.12k
    if (!PyBytes_Check(op)) {
1241
0
        PyErr_Format(PyExc_TypeError,
1242
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1243
0
        return -1;
1244
0
    }
1245
5.12k
    return Py_SIZE(op);
1246
5.12k
}
1247
1248
char *
1249
PyBytes_AsString(PyObject *op)
1250
3.02M
{
1251
3.02M
    if (!PyBytes_Check(op)) {
1252
0
        PyErr_Format(PyExc_TypeError,
1253
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1254
0
        return NULL;
1255
0
    }
1256
3.02M
    return ((PyBytesObject *)op)->ob_sval;
1257
3.02M
}
1258
1259
int
1260
PyBytes_AsStringAndSize(PyObject *obj,
1261
                         char **s,
1262
                         Py_ssize_t *len)
1263
76.4k
{
1264
76.4k
    if (s == NULL) {
1265
0
        PyErr_BadInternalCall();
1266
0
        return -1;
1267
0
    }
1268
1269
76.4k
    if (!PyBytes_Check(obj)) {
1270
0
        PyErr_Format(PyExc_TypeError,
1271
0
             "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1272
0
        return -1;
1273
0
    }
1274
1275
76.4k
    *s = PyBytes_AS_STRING(obj);
1276
76.4k
    if (len != NULL)
1277
76.4k
        *len = PyBytes_GET_SIZE(obj);
1278
0
    else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1279
0
        PyErr_SetString(PyExc_ValueError,
1280
0
                        "embedded null byte");
1281
0
        return -1;
1282
0
    }
1283
76.4k
    return 0;
1284
76.4k
}
1285
1286
/* -------------------------------------------------------------------- */
1287
/* Methods */
1288
1289
11
#define STRINGLIB_GET_EMPTY() bytes_get_empty()
1290
1291
#include "stringlib/stringdefs.h"
1292
#define STRINGLIB_MUTABLE 0
1293
1294
#include "stringlib/fastsearch.h"
1295
#include "stringlib/count.h"
1296
#include "stringlib/find.h"
1297
#include "stringlib/join.h"
1298
#include "stringlib/partition.h"
1299
#include "stringlib/split.h"
1300
#include "stringlib/ctype.h"
1301
1302
#include "stringlib/transmogrify.h"
1303
1304
#undef STRINGLIB_GET_EMPTY
1305
1306
Py_ssize_t
1307
_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
1308
              const char *needle, Py_ssize_t len_needle,
1309
              Py_ssize_t offset)
1310
0
{
1311
0
    assert(len_haystack >= 0);
1312
0
    assert(len_needle >= 0);
1313
    // Extra checks because stringlib_find accesses haystack[len_haystack].
1314
0
    if (len_needle == 0) {
1315
0
        return offset;
1316
0
    }
1317
0
    if (len_needle > len_haystack) {
1318
0
        return -1;
1319
0
    }
1320
0
    assert(len_haystack >= 1);
1321
0
    Py_ssize_t res = stringlib_find(haystack, len_haystack - 1,
1322
0
                                    needle, len_needle, offset);
1323
0
    if (res == -1) {
1324
0
        Py_ssize_t last_align = len_haystack - len_needle;
1325
0
        if (memcmp(haystack + last_align, needle, len_needle) == 0) {
1326
0
            return offset + last_align;
1327
0
        }
1328
0
    }
1329
0
    return res;
1330
0
}
1331
1332
Py_ssize_t
1333
_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
1334
                     const char *needle, Py_ssize_t len_needle,
1335
                     Py_ssize_t offset)
1336
0
{
1337
0
    return stringlib_rfind(haystack, len_haystack,
1338
0
                           needle, len_needle, offset);
1339
0
}
1340
1341
PyObject *
1342
PyBytes_Repr(PyObject *obj, int smartquotes)
1343
2.99k
{
1344
2.99k
    return _Py_bytes_repr(PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
1345
2.99k
                          smartquotes, "bytes");
1346
2.99k
}
1347
1348
PyObject *
1349
_Py_bytes_repr(const char *data, Py_ssize_t length, int smartquotes,
1350
               const char *classname)
1351
2.99k
{
1352
2.99k
    Py_ssize_t i;
1353
2.99k
    Py_ssize_t newsize, squotes, dquotes;
1354
2.99k
    PyObject *v;
1355
2.99k
    unsigned char quote;
1356
2.99k
    Py_UCS1 *p;
1357
1358
    /* Compute size of output string */
1359
2.99k
    squotes = dquotes = 0;
1360
2.99k
    newsize = 3; /* b'' */
1361
901k
    for (i = 0; i < length; i++) {
1362
898k
        unsigned char c = data[i];
1363
898k
        Py_ssize_t incr = 1;
1364
898k
        switch(c) {
1365
3.37k
        case '\'': squotes++; break;
1366
3.39k
        case '"':  dquotes++; break;
1367
44.6k
        case '\\': case '\t': case '\n': case '\r':
1368
44.6k
            incr = 2; break; /* \C */
1369
847k
        default:
1370
847k
            if (c < ' ' || c >= 0x7f)
1371
517k
                incr = 4; /* \xHH */
1372
898k
        }
1373
898k
        if (newsize > PY_SSIZE_T_MAX - incr)
1374
0
            goto overflow;
1375
898k
        newsize += incr;
1376
898k
    }
1377
2.99k
    quote = '\'';
1378
2.99k
    if (smartquotes && squotes && !dquotes)
1379
122
        quote = '"';
1380
2.99k
    if (squotes && quote == '\'') {
1381
407
        if (newsize > PY_SSIZE_T_MAX - squotes)
1382
0
            goto overflow;
1383
407
        newsize += squotes;
1384
407
    }
1385
1386
2.99k
    v = PyUnicode_New(newsize, 127);
1387
2.99k
    if (v == NULL) {
1388
0
        return NULL;
1389
0
    }
1390
2.99k
    p = PyUnicode_1BYTE_DATA(v);
1391
1392
2.99k
    *p++ = 'b', *p++ = quote;
1393
901k
    for (i = 0; i < length; i++) {
1394
898k
        unsigned char c = data[i];
1395
898k
        if (c == quote || c == '\\')
1396
5.24k
            *p++ = '\\', *p++ = c;
1397
893k
        else if (c == '\t')
1398
31.4k
            *p++ = '\\', *p++ = 't';
1399
862k
        else if (c == '\n')
1400
8.33k
            *p++ = '\\', *p++ = 'n';
1401
853k
        else if (c == '\r')
1402
2.60k
            *p++ = '\\', *p++ = 'r';
1403
851k
        else if (c < ' ' || c >= 0x7f) {
1404
517k
            *p++ = '\\';
1405
517k
            *p++ = 'x';
1406
517k
            *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1407
517k
            *p++ = Py_hexdigits[c & 0xf];
1408
517k
        }
1409
333k
        else
1410
333k
            *p++ = c;
1411
898k
    }
1412
2.99k
    *p++ = quote;
1413
2.99k
    assert(_PyUnicode_CheckConsistency(v, 1));
1414
2.99k
    return v;
1415
1416
0
  overflow:
1417
0
    PyErr_Format(PyExc_OverflowError,
1418
0
                 "%s object is too large to make repr", classname);
1419
0
    return NULL;
1420
2.99k
}
1421
1422
static PyObject *
1423
bytes_repr(PyObject *op)
1424
2.99k
{
1425
2.99k
    return PyBytes_Repr(op, 1);
1426
2.99k
}
1427
1428
static PyObject *
1429
bytes_str(PyObject *op)
1430
0
{
1431
0
    if (_Py_GetConfig()->bytes_warning) {
1432
0
        if (PyErr_WarnEx(PyExc_BytesWarning,
1433
0
                         "str() on a bytes instance", 1)) {
1434
0
            return NULL;
1435
0
        }
1436
0
    }
1437
0
    return bytes_repr(op);
1438
0
}
1439
1440
static Py_ssize_t
1441
bytes_length(PyObject *self)
1442
13.9M
{
1443
13.9M
    PyBytesObject *a = _PyBytes_CAST(self);
1444
13.9M
    return Py_SIZE(a);
1445
13.9M
}
1446
1447
/* This is also used by PyBytes_Concat() */
1448
static PyObject *
1449
bytes_concat(PyObject *a, PyObject *b)
1450
132k
{
1451
132k
    Py_buffer va, vb;
1452
132k
    PyObject *result = NULL;
1453
1454
132k
    va.len = -1;
1455
132k
    vb.len = -1;
1456
132k
    if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1457
132k
        PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1458
0
        PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1459
0
                     Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1460
0
        goto done;
1461
0
    }
1462
1463
    /* Optimize end cases */
1464
132k
    if (va.len == 0 && PyBytes_CheckExact(b)) {
1465
17.5k
        result = Py_NewRef(b);
1466
17.5k
        goto done;
1467
17.5k
    }
1468
115k
    if (vb.len == 0 && PyBytes_CheckExact(a)) {
1469
44.7k
        result = Py_NewRef(a);
1470
44.7k
        goto done;
1471
44.7k
    }
1472
1473
70.3k
    if (va.len > PY_SSIZE_T_MAX - vb.len) {
1474
0
        PyErr_NoMemory();
1475
0
        goto done;
1476
0
    }
1477
1478
70.3k
    result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1479
70.3k
    if (result != NULL) {
1480
70.3k
        memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1481
70.3k
        memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1482
70.3k
    }
1483
1484
132k
  done:
1485
132k
    if (va.len != -1)
1486
132k
        PyBuffer_Release(&va);
1487
132k
    if (vb.len != -1)
1488
132k
        PyBuffer_Release(&vb);
1489
132k
    return result;
1490
70.3k
}
1491
1492
static PyObject *
1493
bytes_repeat(PyObject *self, Py_ssize_t n)
1494
67.6k
{
1495
67.6k
    PyBytesObject *a = _PyBytes_CAST(self);
1496
67.6k
    if (n < 0)
1497
0
        n = 0;
1498
    /* watch out for overflows:  the size can overflow int,
1499
     * and the # of bytes needed can overflow size_t
1500
     */
1501
67.6k
    if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1502
0
        PyErr_SetString(PyExc_OverflowError,
1503
0
            "repeated bytes are too long");
1504
0
        return NULL;
1505
0
    }
1506
67.6k
    Py_ssize_t size = Py_SIZE(a) * n;
1507
67.6k
    if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1508
0
        return Py_NewRef(a);
1509
0
    }
1510
67.6k
    size_t nbytes = (size_t)size;
1511
67.6k
    if (nbytes + PyBytesObject_SIZE <= nbytes) {
1512
0
        PyErr_SetString(PyExc_OverflowError,
1513
0
            "repeated bytes are too long");
1514
0
        return NULL;
1515
0
    }
1516
67.6k
    PyBytesObject *op = PyObject_Malloc(PyBytesObject_SIZE + nbytes);
1517
67.6k
    if (op == NULL) {
1518
0
        return PyErr_NoMemory();
1519
0
    }
1520
67.6k
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
1521
67.6k
    set_ob_shash(op, -1);
1522
67.6k
    op->ob_sval[size] = '\0';
1523
1524
67.6k
    _PyBytes_Repeat(op->ob_sval, size, a->ob_sval, Py_SIZE(a));
1525
1526
67.6k
    return (PyObject *) op;
1527
67.6k
}
1528
1529
static int
1530
bytes_contains(PyObject *self, PyObject *arg)
1531
2.80k
{
1532
2.80k
    return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1533
2.80k
}
1534
1535
static PyObject *
1536
bytes_item(PyObject *self, Py_ssize_t i)
1537
0
{
1538
0
    PyBytesObject *a = _PyBytes_CAST(self);
1539
0
    if (i < 0 || i >= Py_SIZE(a)) {
1540
0
        PyErr_SetString(PyExc_IndexError, "index out of range");
1541
0
        return NULL;
1542
0
    }
1543
0
    return _PyLong_FromUnsignedChar((unsigned char)a->ob_sval[i]);
1544
0
}
1545
1546
static int
1547
bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1548
36.8M
{
1549
36.8M
    int cmp;
1550
36.8M
    Py_ssize_t len;
1551
1552
36.8M
    len = Py_SIZE(a);
1553
36.8M
    if (Py_SIZE(b) != len)
1554
414k
        return 0;
1555
1556
36.4M
    if (a->ob_sval[0] != b->ob_sval[0])
1557
2.77M
        return 0;
1558
1559
33.6M
    cmp = memcmp(a->ob_sval, b->ob_sval, len);
1560
33.6M
    return (cmp == 0);
1561
36.4M
}
1562
1563
static PyObject*
1564
bytes_richcompare(PyObject *aa, PyObject *bb, int op)
1565
37.9M
{
1566
    /* Make sure both arguments are strings. */
1567
37.9M
    if (!(PyBytes_Check(aa) && PyBytes_Check(bb))) {
1568
0
        if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1569
0
            if (PyUnicode_Check(aa) || PyUnicode_Check(bb)) {
1570
0
                if (PyErr_WarnEx(PyExc_BytesWarning,
1571
0
                                 "Comparison between bytes and string", 1))
1572
0
                    return NULL;
1573
0
            }
1574
0
            if (PyLong_Check(aa) || PyLong_Check(bb)) {
1575
0
                if (PyErr_WarnEx(PyExc_BytesWarning,
1576
0
                                 "Comparison between bytes and int", 1))
1577
0
                    return NULL;
1578
0
            }
1579
0
        }
1580
0
        Py_RETURN_NOTIMPLEMENTED;
1581
0
    }
1582
1583
37.9M
    PyBytesObject *a = _PyBytes_CAST(aa);
1584
37.9M
    PyBytesObject *b = _PyBytes_CAST(bb);
1585
37.9M
    if (a == b) {
1586
1.10M
        switch (op) {
1587
1.06M
        case Py_EQ:
1588
1.06M
        case Py_LE:
1589
1.06M
        case Py_GE:
1590
            /* a byte string is equal to itself */
1591
1.06M
            Py_RETURN_TRUE;
1592
39.3k
        case Py_NE:
1593
39.3k
        case Py_LT:
1594
39.3k
        case Py_GT:
1595
39.3k
            Py_RETURN_FALSE;
1596
0
        default:
1597
0
            PyErr_BadArgument();
1598
0
            return NULL;
1599
1.10M
        }
1600
1.10M
    }
1601
36.8M
    else if (op == Py_EQ || op == Py_NE) {
1602
36.8M
        int eq = bytes_compare_eq(a, b);
1603
36.8M
        eq ^= (op == Py_NE);
1604
36.8M
        return PyBool_FromLong(eq);
1605
36.8M
    }
1606
263
    else {
1607
263
        Py_ssize_t len_a = Py_SIZE(a);
1608
263
        Py_ssize_t len_b = Py_SIZE(b);
1609
263
        Py_ssize_t min_len = Py_MIN(len_a, len_b);
1610
263
        int c;
1611
263
        if (min_len > 0) {
1612
263
            c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1613
263
            if (c == 0)
1614
259
                c = memcmp(a->ob_sval, b->ob_sval, min_len);
1615
263
        }
1616
0
        else {
1617
0
            c = 0;
1618
0
        }
1619
263
        if (c != 0) {
1620
263
            Py_RETURN_RICHCOMPARE(c, 0, op);
1621
263
        }
1622
0
        Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1623
0
    }
1624
37.9M
}
1625
1626
static Py_hash_t
1627
bytes_hash(PyObject *self)
1628
37.2M
{
1629
37.2M
    PyBytesObject *a = _PyBytes_CAST(self);
1630
37.2M
    Py_hash_t hash = get_ob_shash(a);
1631
37.2M
    if (hash == -1) {
1632
        /* Can't fail */
1633
17.1M
        hash = Py_HashBuffer(a->ob_sval, Py_SIZE(a));
1634
17.1M
        set_ob_shash(a, hash);
1635
17.1M
    }
1636
37.2M
    return hash;
1637
37.2M
}
1638
1639
static PyObject*
1640
bytes_subscript(PyObject *op, PyObject* item)
1641
41.6M
{
1642
41.6M
    PyBytesObject *self = _PyBytes_CAST(op);
1643
41.6M
    if (_PyIndex_Check(item)) {
1644
13.4M
        Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1645
13.4M
        if (i == -1 && PyErr_Occurred())
1646
0
            return NULL;
1647
13.4M
        if (i < 0)
1648
0
            i += PyBytes_GET_SIZE(self);
1649
13.4M
        if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1650
68
            PyErr_SetString(PyExc_IndexError,
1651
68
                            "index out of range");
1652
68
            return NULL;
1653
68
        }
1654
13.4M
        return _PyLong_FromUnsignedChar((unsigned char)self->ob_sval[i]);
1655
13.4M
    }
1656
28.2M
    else if (PySlice_Check(item)) {
1657
28.2M
        Py_ssize_t start, stop, step, slicelength, i;
1658
28.2M
        size_t cur;
1659
28.2M
        const char* source_buf;
1660
28.2M
        char* result_buf;
1661
28.2M
        PyObject* result;
1662
1663
28.2M
        if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1664
0
            return NULL;
1665
0
        }
1666
28.2M
        slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1667
28.2M
                                            &stop, step);
1668
1669
28.2M
        if (slicelength <= 0) {
1670
3.32M
            return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
1671
3.32M
        }
1672
24.8M
        else if (start == 0 && step == 1 &&
1673
644k
                 slicelength == PyBytes_GET_SIZE(self) &&
1674
118k
                 PyBytes_CheckExact(self)) {
1675
118k
            return Py_NewRef(self);
1676
118k
        }
1677
24.7M
        else if (step == 1) {
1678
24.7M
            return PyBytes_FromStringAndSize(
1679
24.7M
                PyBytes_AS_STRING(self) + start,
1680
24.7M
                slicelength);
1681
24.7M
        }
1682
0
        else {
1683
0
            source_buf = PyBytes_AS_STRING(self);
1684
0
            result = PyBytes_FromStringAndSize(NULL, slicelength);
1685
0
            if (result == NULL)
1686
0
                return NULL;
1687
1688
0
            result_buf = PyBytes_AS_STRING(result);
1689
0
            for (cur = start, i = 0; i < slicelength;
1690
0
                 cur += step, i++) {
1691
0
                result_buf[i] = source_buf[cur];
1692
0
            }
1693
1694
0
            return result;
1695
0
        }
1696
28.2M
    }
1697
0
    else {
1698
0
        PyErr_Format(PyExc_TypeError,
1699
0
                     "byte indices must be integers or slices, not %.200s",
1700
0
                     Py_TYPE(item)->tp_name);
1701
0
        return NULL;
1702
0
    }
1703
41.6M
}
1704
1705
static int
1706
bytes_buffer_getbuffer(PyObject *op, Py_buffer *view, int flags)
1707
21.4M
{
1708
21.4M
    PyBytesObject *self = _PyBytes_CAST(op);
1709
21.4M
    return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1710
21.4M
                             1, flags);
1711
21.4M
}
1712
1713
static PySequenceMethods bytes_as_sequence = {
1714
    bytes_length,       /*sq_length*/
1715
    bytes_concat,       /*sq_concat*/
1716
    bytes_repeat,       /*sq_repeat*/
1717
    bytes_item,         /*sq_item*/
1718
    0,                  /*sq_slice*/
1719
    0,                  /*sq_ass_item*/
1720
    0,                  /*sq_ass_slice*/
1721
    bytes_contains      /*sq_contains*/
1722
};
1723
1724
static PyMappingMethods bytes_as_mapping = {
1725
    bytes_length,
1726
    bytes_subscript,
1727
    0,
1728
};
1729
1730
static PyBufferProcs bytes_as_buffer = {
1731
    bytes_buffer_getbuffer,
1732
    NULL,
1733
};
1734
1735
1736
/*[clinic input]
1737
bytes.__bytes__
1738
Convert this value to exact type bytes.
1739
[clinic start generated code]*/
1740
1741
static PyObject *
1742
bytes___bytes___impl(PyBytesObject *self)
1743
/*[clinic end generated code: output=63a306a9bc0caac5 input=34ec5ddba98bd6bb]*/
1744
44.8k
{
1745
44.8k
    if (PyBytes_CheckExact(self)) {
1746
44.8k
        return Py_NewRef(self);
1747
44.8k
    }
1748
0
    else {
1749
0
        return PyBytes_FromStringAndSize(self->ob_sval, Py_SIZE(self));
1750
0
    }
1751
44.8k
}
1752
1753
1754
0
#define LEFTSTRIP 0
1755
0
#define RIGHTSTRIP 1
1756
0
#define BOTHSTRIP 2
1757
1758
/*[clinic input]
1759
bytes.split
1760
1761
    sep: object = None
1762
        The delimiter according which to split the bytes.
1763
        None (the default value) means split on ASCII whitespace characters
1764
        (space, tab, return, newline, formfeed, vertical tab).
1765
    maxsplit: Py_ssize_t = -1
1766
        Maximum number of splits to do.
1767
        -1 (the default value) means no limit.
1768
1769
Return a list of the sections in the bytes, using sep as the delimiter.
1770
[clinic start generated code]*/
1771
1772
static PyObject *
1773
bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1774
/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1775
3.13M
{
1776
3.13M
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1777
3.13M
    const char *s = PyBytes_AS_STRING(self), *sub;
1778
3.13M
    Py_buffer vsub;
1779
3.13M
    PyObject *list;
1780
1781
3.13M
    if (maxsplit < 0)
1782
3.13M
        maxsplit = PY_SSIZE_T_MAX;
1783
3.13M
    if (sep == Py_None)
1784
0
        return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1785
3.13M
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1786
0
        return NULL;
1787
3.13M
    sub = vsub.buf;
1788
3.13M
    n = vsub.len;
1789
1790
3.13M
    list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1791
3.13M
    PyBuffer_Release(&vsub);
1792
3.13M
    return list;
1793
3.13M
}
1794
1795
/*[clinic input]
1796
@permit_long_docstring_body
1797
bytes.partition
1798
1799
    sep: Py_buffer
1800
    /
1801
1802
Partition the bytes into three parts using the given separator.
1803
1804
This will search for the separator sep in the bytes. If the separator is found,
1805
returns a 3-tuple containing the part before the separator, the separator
1806
itself, and the part after it.
1807
1808
If the separator is not found, returns a 3-tuple containing the original bytes
1809
object and two empty bytes objects.
1810
[clinic start generated code]*/
1811
1812
static PyObject *
1813
bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1814
/*[clinic end generated code: output=f532b392a17ff695 input=31c55a0cebaf7722]*/
1815
9.94k
{
1816
9.94k
    return stringlib_partition(
1817
9.94k
        (PyObject*) self,
1818
9.94k
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1819
9.94k
        sep->obj, (const char *)sep->buf, sep->len
1820
9.94k
        );
1821
9.94k
}
1822
1823
/*[clinic input]
1824
@permit_long_docstring_body
1825
bytes.rpartition
1826
1827
    sep: Py_buffer
1828
    /
1829
1830
Partition the bytes into three parts using the given separator.
1831
1832
This will search for the separator sep in the bytes, starting at the end. If
1833
the separator is found, returns a 3-tuple containing the part before the
1834
separator, the separator itself, and the part after it.
1835
1836
If the separator is not found, returns a 3-tuple containing two empty bytes
1837
objects and the original bytes object.
1838
[clinic start generated code]*/
1839
1840
static PyObject *
1841
bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1842
/*[clinic end generated code: output=191b114cbb028e50 input=9ea5a3ab0b02bf52]*/
1843
0
{
1844
0
    return stringlib_rpartition(
1845
0
        (PyObject*) self,
1846
0
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1847
0
        sep->obj, (const char *)sep->buf, sep->len
1848
0
        );
1849
0
}
1850
1851
/*[clinic input]
1852
@permit_long_docstring_body
1853
bytes.rsplit = bytes.split
1854
1855
Return a list of the sections in the bytes, using sep as the delimiter.
1856
1857
Splitting is done starting at the end of the bytes and working to the front.
1858
[clinic start generated code]*/
1859
1860
static PyObject *
1861
bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1862
/*[clinic end generated code: output=ba698d9ea01e1c8f input=55b6eaea1f3d7046]*/
1863
0
{
1864
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1865
0
    const char *s = PyBytes_AS_STRING(self), *sub;
1866
0
    Py_buffer vsub;
1867
0
    PyObject *list;
1868
1869
0
    if (maxsplit < 0)
1870
0
        maxsplit = PY_SSIZE_T_MAX;
1871
0
    if (sep == Py_None)
1872
0
        return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1873
0
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1874
0
        return NULL;
1875
0
    sub = vsub.buf;
1876
0
    n = vsub.len;
1877
1878
0
    list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1879
0
    PyBuffer_Release(&vsub);
1880
0
    return list;
1881
0
}
1882
1883
1884
/*[clinic input]
1885
bytes.join
1886
1887
    iterable_of_bytes: object
1888
    /
1889
1890
Concatenate any number of bytes objects.
1891
1892
The bytes whose method is called is inserted in between each pair.
1893
1894
The result is returned as a new bytes object.
1895
1896
Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1897
[clinic start generated code]*/
1898
1899
static PyObject *
1900
bytes_join_impl(PyBytesObject *self, PyObject *iterable_of_bytes)
1901
/*[clinic end generated code: output=0687abb94d7d438e input=7fe377b95bd549d2]*/
1902
8.70k
{
1903
8.70k
    return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1904
8.70k
}
1905
1906
PyObject *
1907
PyBytes_Join(PyObject *sep, PyObject *iterable)
1908
42.2k
{
1909
42.2k
    if (sep == NULL) {
1910
0
        PyErr_BadInternalCall();
1911
0
        return NULL;
1912
0
    }
1913
42.2k
    if (!PyBytes_Check(sep)) {
1914
0
        PyErr_Format(PyExc_TypeError,
1915
0
                     "sep: expected bytes, got %T", sep);
1916
0
        return NULL;
1917
0
    }
1918
1919
42.2k
    return stringlib_bytes_join(sep, iterable);
1920
42.2k
}
1921
1922
/*[clinic input]
1923
@permit_long_summary
1924
@text_signature "($self, sub[, start[, end]], /)"
1925
bytes.find
1926
1927
    sub: object
1928
    start: slice_index(accept={int, NoneType}, c_default='0') = None
1929
         Optional start position. Default: start of the bytes.
1930
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
1931
         Optional stop position. Default: end of the bytes.
1932
    /
1933
1934
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
1935
1936
Return -1 on failure.
1937
[clinic start generated code]*/
1938
1939
static PyObject *
1940
bytes_find_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
1941
                Py_ssize_t end)
1942
/*[clinic end generated code: output=d5961a1c77b472a1 input=47d0929adafc6b0b]*/
1943
3.20M
{
1944
3.20M
    return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1945
3.20M
                          sub, start, end);
1946
3.20M
}
1947
1948
/*[clinic input]
1949
@permit_long_summary
1950
bytes.index = bytes.find
1951
1952
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
1953
1954
Raise ValueError if the subsection is not found.
1955
[clinic start generated code]*/
1956
1957
static PyObject *
1958
bytes_index_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
1959
                 Py_ssize_t end)
1960
/*[clinic end generated code: output=0da25cc74683ba42 input=1cb45ce71456a269]*/
1961
0
{
1962
0
    return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1963
0
                           sub, start, end);
1964
0
}
1965
1966
/*[clinic input]
1967
@permit_long_summary
1968
bytes.rfind = bytes.find
1969
1970
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
1971
1972
Return -1 on failure.
1973
[clinic start generated code]*/
1974
1975
static PyObject *
1976
bytes_rfind_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
1977
                 Py_ssize_t end)
1978
/*[clinic end generated code: output=51b60fa4ad011c09 input=c9473d714251f1ab]*/
1979
26.5k
{
1980
26.5k
    return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1981
26.5k
                           sub, start, end);
1982
26.5k
}
1983
1984
/*[clinic input]
1985
@permit_long_summary
1986
bytes.rindex = bytes.find
1987
1988
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
1989
1990
Raise ValueError if the subsection is not found.
1991
[clinic start generated code]*/
1992
1993
static PyObject *
1994
bytes_rindex_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
1995
                  Py_ssize_t end)
1996
/*[clinic end generated code: output=42bf674e0a0aabf6 input=bb5f473c64610c43]*/
1997
0
{
1998
0
    return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1999
0
                            sub, start, end);
2000
0
}
2001
2002
2003
Py_LOCAL_INLINE(PyObject *)
2004
do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
2005
0
{
2006
0
    Py_buffer vsep;
2007
0
    const char *s = PyBytes_AS_STRING(self);
2008
0
    Py_ssize_t len = PyBytes_GET_SIZE(self);
2009
0
    char *sep;
2010
0
    Py_ssize_t seplen;
2011
0
    Py_ssize_t i, j;
2012
2013
0
    if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
2014
0
        return NULL;
2015
0
    sep = vsep.buf;
2016
0
    seplen = vsep.len;
2017
2018
0
    i = 0;
2019
0
    if (striptype != RIGHTSTRIP) {
2020
0
        while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2021
0
            i++;
2022
0
        }
2023
0
    }
2024
2025
0
    j = len;
2026
0
    if (striptype != LEFTSTRIP) {
2027
0
        do {
2028
0
            j--;
2029
0
        } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2030
0
        j++;
2031
0
    }
2032
2033
0
    PyBuffer_Release(&vsep);
2034
2035
0
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2036
0
        return Py_NewRef(self);
2037
0
    }
2038
0
    else
2039
0
        return PyBytes_FromStringAndSize(s+i, j-i);
2040
0
}
2041
2042
2043
Py_LOCAL_INLINE(PyObject *)
2044
do_strip(PyBytesObject *self, int striptype)
2045
0
{
2046
0
    const char *s = PyBytes_AS_STRING(self);
2047
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
2048
2049
0
    i = 0;
2050
0
    if (striptype != RIGHTSTRIP) {
2051
0
        while (i < len && Py_ISSPACE(s[i])) {
2052
0
            i++;
2053
0
        }
2054
0
    }
2055
2056
0
    j = len;
2057
0
    if (striptype != LEFTSTRIP) {
2058
0
        do {
2059
0
            j--;
2060
0
        } while (j >= i && Py_ISSPACE(s[j]));
2061
0
        j++;
2062
0
    }
2063
2064
0
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2065
0
        return Py_NewRef(self);
2066
0
    }
2067
0
    else
2068
0
        return PyBytes_FromStringAndSize(s+i, j-i);
2069
0
}
2070
2071
2072
Py_LOCAL_INLINE(PyObject *)
2073
do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
2074
0
{
2075
0
    if (bytes != Py_None) {
2076
0
        return do_xstrip(self, striptype, bytes);
2077
0
    }
2078
0
    return do_strip(self, striptype);
2079
0
}
2080
2081
/*[clinic input]
2082
@permit_long_docstring_body
2083
bytes.strip
2084
2085
    bytes: object = None
2086
    /
2087
2088
Strip leading and trailing bytes contained in the argument.
2089
2090
If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2091
[clinic start generated code]*/
2092
2093
static PyObject *
2094
bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
2095
/*[clinic end generated code: output=c7c228d3bd104a1b input=71904cd278c0ee03]*/
2096
0
{
2097
0
    return do_argstrip(self, BOTHSTRIP, bytes);
2098
0
}
2099
2100
/*[clinic input]
2101
bytes.lstrip
2102
2103
    bytes: object = None
2104
    /
2105
2106
Strip leading bytes contained in the argument.
2107
2108
If the argument is omitted or None, strip leading  ASCII whitespace.
2109
[clinic start generated code]*/
2110
2111
static PyObject *
2112
bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2113
/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2114
0
{
2115
0
    return do_argstrip(self, LEFTSTRIP, bytes);
2116
0
}
2117
2118
/*[clinic input]
2119
bytes.rstrip
2120
2121
    bytes: object = None
2122
    /
2123
2124
Strip trailing bytes contained in the argument.
2125
2126
If the argument is omitted or None, strip trailing ASCII whitespace.
2127
[clinic start generated code]*/
2128
2129
static PyObject *
2130
bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2131
/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2132
0
{
2133
0
    return do_argstrip(self, RIGHTSTRIP, bytes);
2134
0
}
2135
2136
2137
/*[clinic input]
2138
@permit_long_summary
2139
bytes.count = bytes.find
2140
2141
Return the number of non-overlapping occurrences of subsection 'sub' in bytes B[start:end].
2142
[clinic start generated code]*/
2143
2144
static PyObject *
2145
bytes_count_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2146
                 Py_ssize_t end)
2147
/*[clinic end generated code: output=9848140b9be17d0f input=bb2f136f83f0d30e]*/
2148
3.88M
{
2149
3.88M
    return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2150
3.88M
                           sub, start, end);
2151
3.88M
}
2152
2153
2154
/*[clinic input]
2155
bytes.translate
2156
2157
    table: object
2158
        Translation table, which must be a bytes object of length 256.
2159
    /
2160
    delete as deletechars: object(c_default="NULL") = b''
2161
2162
Return a copy with each character mapped by the given translation table.
2163
2164
All characters occurring in the optional argument delete are removed.
2165
The remaining characters are mapped through the given translation table.
2166
[clinic start generated code]*/
2167
2168
static PyObject *
2169
bytes_translate_impl(PyBytesObject *self, PyObject *table,
2170
                     PyObject *deletechars)
2171
/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2172
0
{
2173
0
    const char *input;
2174
0
    char *output;
2175
0
    Py_buffer table_view = {NULL, NULL};
2176
0
    Py_buffer del_table_view = {NULL, NULL};
2177
0
    const char *table_chars;
2178
0
    Py_ssize_t i, c, changed = 0;
2179
0
    PyObject *input_obj = (PyObject*)self;
2180
0
    const char *output_start, *del_table_chars=NULL;
2181
0
    Py_ssize_t inlen, tablen, dellen = 0;
2182
0
    PyObject *result;
2183
0
    int trans_table[256];
2184
2185
0
    if (PyBytes_Check(table)) {
2186
0
        table_chars = PyBytes_AS_STRING(table);
2187
0
        tablen = PyBytes_GET_SIZE(table);
2188
0
    }
2189
0
    else if (table == Py_None) {
2190
0
        table_chars = NULL;
2191
0
        tablen = 256;
2192
0
    }
2193
0
    else {
2194
0
        if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2195
0
            return NULL;
2196
0
        table_chars = table_view.buf;
2197
0
        tablen = table_view.len;
2198
0
    }
2199
2200
0
    if (tablen != 256) {
2201
0
        PyErr_SetString(PyExc_ValueError,
2202
0
          "translation table must be 256 characters long");
2203
0
        PyBuffer_Release(&table_view);
2204
0
        return NULL;
2205
0
    }
2206
2207
0
    if (deletechars != NULL) {
2208
0
        if (PyBytes_Check(deletechars)) {
2209
0
            del_table_chars = PyBytes_AS_STRING(deletechars);
2210
0
            dellen = PyBytes_GET_SIZE(deletechars);
2211
0
        }
2212
0
        else {
2213
0
            if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2214
0
                PyBuffer_Release(&table_view);
2215
0
                return NULL;
2216
0
            }
2217
0
            del_table_chars = del_table_view.buf;
2218
0
            dellen = del_table_view.len;
2219
0
        }
2220
0
    }
2221
0
    else {
2222
0
        del_table_chars = NULL;
2223
0
        dellen = 0;
2224
0
    }
2225
2226
0
    inlen = PyBytes_GET_SIZE(input_obj);
2227
0
    result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2228
0
    if (result == NULL) {
2229
0
        PyBuffer_Release(&del_table_view);
2230
0
        PyBuffer_Release(&table_view);
2231
0
        return NULL;
2232
0
    }
2233
0
    output_start = output = PyBytes_AS_STRING(result);
2234
0
    input = PyBytes_AS_STRING(input_obj);
2235
2236
0
    if (dellen == 0 && table_chars != NULL) {
2237
        /* If no deletions are required, use faster code */
2238
0
        for (i = inlen; --i >= 0; ) {
2239
0
            c = Py_CHARMASK(*input++);
2240
0
            if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2241
0
                changed = 1;
2242
0
        }
2243
0
        if (!changed && PyBytes_CheckExact(input_obj)) {
2244
0
            Py_SETREF(result, Py_NewRef(input_obj));
2245
0
        }
2246
0
        PyBuffer_Release(&del_table_view);
2247
0
        PyBuffer_Release(&table_view);
2248
0
        return result;
2249
0
    }
2250
2251
0
    if (table_chars == NULL) {
2252
0
        for (i = 0; i < 256; i++)
2253
0
            trans_table[i] = Py_CHARMASK(i);
2254
0
    } else {
2255
0
        for (i = 0; i < 256; i++)
2256
0
            trans_table[i] = Py_CHARMASK(table_chars[i]);
2257
0
    }
2258
0
    PyBuffer_Release(&table_view);
2259
2260
0
    for (i = 0; i < dellen; i++)
2261
0
        trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2262
0
    PyBuffer_Release(&del_table_view);
2263
2264
0
    for (i = inlen; --i >= 0; ) {
2265
0
        c = Py_CHARMASK(*input++);
2266
0
        if (trans_table[c] != -1)
2267
0
            if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2268
0
                continue;
2269
0
        changed = 1;
2270
0
    }
2271
0
    if (!changed && PyBytes_CheckExact(input_obj)) {
2272
0
        Py_DECREF(result);
2273
0
        return Py_NewRef(input_obj);
2274
0
    }
2275
    /* Fix the size of the resulting byte string */
2276
0
    if (inlen > 0)
2277
0
        _PyBytes_Resize(&result, output - output_start);
2278
0
    return result;
2279
0
}
2280
2281
2282
/*[clinic input]
2283
2284
@permit_long_summary
2285
@permit_long_docstring_body
2286
@staticmethod
2287
bytes.maketrans
2288
2289
    frm: Py_buffer
2290
    to: Py_buffer
2291
    /
2292
2293
Return a translation table usable for the bytes or bytearray translate method.
2294
2295
The returned table will be one where each byte in frm is mapped to the byte at
2296
the same position in to.
2297
2298
The bytes objects frm and to must be of the same length.
2299
[clinic start generated code]*/
2300
2301
static PyObject *
2302
bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2303
/*[clinic end generated code: output=a36f6399d4b77f6f input=a06b75f44d933fb3]*/
2304
28
{
2305
28
    return _Py_bytes_maketrans(frm, to);
2306
28
}
2307
2308
2309
/*[clinic input]
2310
@permit_long_docstring_body
2311
bytes.replace
2312
2313
    old: Py_buffer
2314
    new: Py_buffer
2315
    count: Py_ssize_t = -1
2316
        Maximum number of occurrences to replace.
2317
        -1 (the default value) means replace all occurrences.
2318
    /
2319
2320
Return a copy with all occurrences of substring old replaced by new.
2321
2322
If the optional argument count is given, only the first count occurrences are
2323
replaced.
2324
[clinic start generated code]*/
2325
2326
static PyObject *
2327
bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2328
                   Py_ssize_t count)
2329
/*[clinic end generated code: output=994fa588b6b9c104 input=8b99a9ab32bc06a2]*/
2330
42.6k
{
2331
42.6k
    return stringlib_replace((PyObject *)self,
2332
42.6k
                             (const char *)old->buf, old->len,
2333
42.6k
                             (const char *)new->buf, new->len, count);
2334
42.6k
}
2335
2336
/** End DALKE **/
2337
2338
/*[clinic input]
2339
bytes.removeprefix as bytes_removeprefix
2340
2341
    prefix: Py_buffer
2342
    /
2343
2344
Return a bytes object with the given prefix string removed if present.
2345
2346
If the bytes starts with the prefix string, return bytes[len(prefix):].
2347
Otherwise, return a copy of the original bytes.
2348
[clinic start generated code]*/
2349
2350
static PyObject *
2351
bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2352
/*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2353
0
{
2354
0
    const char *self_start = PyBytes_AS_STRING(self);
2355
0
    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2356
0
    const char *prefix_start = prefix->buf;
2357
0
    Py_ssize_t prefix_len = prefix->len;
2358
2359
0
    if (self_len >= prefix_len
2360
0
        && prefix_len > 0
2361
0
        && memcmp(self_start, prefix_start, prefix_len) == 0)
2362
0
    {
2363
0
        return PyBytes_FromStringAndSize(self_start + prefix_len,
2364
0
                                         self_len - prefix_len);
2365
0
    }
2366
2367
0
    if (PyBytes_CheckExact(self)) {
2368
0
        return Py_NewRef(self);
2369
0
    }
2370
2371
0
    return PyBytes_FromStringAndSize(self_start, self_len);
2372
0
}
2373
2374
/*[clinic input]
2375
bytes.removesuffix as bytes_removesuffix
2376
2377
    suffix: Py_buffer
2378
    /
2379
2380
Return a bytes object with the given suffix string removed if present.
2381
2382
If the bytes ends with the suffix string and that suffix is not empty,
2383
return bytes[:-len(prefix)].  Otherwise, return a copy of the original
2384
bytes.
2385
[clinic start generated code]*/
2386
2387
static PyObject *
2388
bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2389
/*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2390
0
{
2391
0
    const char *self_start = PyBytes_AS_STRING(self);
2392
0
    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2393
0
    const char *suffix_start = suffix->buf;
2394
0
    Py_ssize_t suffix_len = suffix->len;
2395
2396
0
    if (self_len >= suffix_len
2397
0
        && suffix_len > 0
2398
0
        && memcmp(self_start + self_len - suffix_len,
2399
0
                  suffix_start, suffix_len) == 0)
2400
0
    {
2401
0
        return PyBytes_FromStringAndSize(self_start,
2402
0
                                         self_len - suffix_len);
2403
0
    }
2404
2405
0
    if (PyBytes_CheckExact(self)) {
2406
0
        return Py_NewRef(self);
2407
0
    }
2408
2409
0
    return PyBytes_FromStringAndSize(self_start, self_len);
2410
0
}
2411
2412
/*[clinic input]
2413
@permit_long_summary
2414
@text_signature "($self, prefix[, start[, end]], /)"
2415
bytes.startswith
2416
2417
    prefix as subobj: object
2418
        A bytes or a tuple of bytes to try.
2419
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2420
        Optional start position. Default: start of the bytes.
2421
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2422
        Optional stop position. Default: end of the bytes.
2423
    /
2424
2425
Return True if the bytes starts with the specified prefix, False otherwise.
2426
[clinic start generated code]*/
2427
2428
static PyObject *
2429
bytes_startswith_impl(PyBytesObject *self, PyObject *subobj,
2430
                      Py_ssize_t start, Py_ssize_t end)
2431
/*[clinic end generated code: output=b1e8da1cbd528e8c input=a14efd070f15be80]*/
2432
394k
{
2433
394k
    return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2434
394k
                                subobj, start, end);
2435
394k
}
2436
2437
/*[clinic input]
2438
@permit_long_summary
2439
@text_signature "($self, suffix[, start[, end]], /)"
2440
bytes.endswith
2441
2442
    suffix as subobj: object
2443
        A bytes or a tuple of bytes to try.
2444
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2445
         Optional start position. Default: start of the bytes.
2446
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2447
         Optional stop position. Default: end of the bytes.
2448
    /
2449
2450
Return True if the bytes ends with the specified suffix, False otherwise.
2451
[clinic start generated code]*/
2452
2453
static PyObject *
2454
bytes_endswith_impl(PyBytesObject *self, PyObject *subobj, Py_ssize_t start,
2455
                    Py_ssize_t end)
2456
/*[clinic end generated code: output=038b633111f3629d input=49e383eaaf292713]*/
2457
0
{
2458
0
    return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2459
0
                              subobj, start, end);
2460
0
}
2461
2462
2463
/*[clinic input]
2464
bytes.decode
2465
2466
    encoding: str(c_default="NULL") = 'utf-8'
2467
        The encoding with which to decode the bytes.
2468
    errors: str(c_default="NULL") = 'strict'
2469
        The error handling scheme to use for the handling of decoding errors.
2470
        The default is 'strict' meaning that decoding errors raise a
2471
        UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2472
        as well as any other name registered with codecs.register_error that
2473
        can handle UnicodeDecodeErrors.
2474
2475
Decode the bytes using the codec registered for encoding.
2476
[clinic start generated code]*/
2477
2478
static PyObject *
2479
bytes_decode_impl(PyBytesObject *self, const char *encoding,
2480
                  const char *errors)
2481
/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2482
7.35M
{
2483
7.35M
    return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2484
7.35M
}
2485
2486
2487
/*[clinic input]
2488
@permit_long_docstring_body
2489
bytes.splitlines
2490
2491
    keepends: bool = False
2492
2493
Return a list of the lines in the bytes, breaking at line boundaries.
2494
2495
Line breaks are not included in the resulting list unless keepends is given and
2496
true.
2497
[clinic start generated code]*/
2498
2499
static PyObject *
2500
bytes_splitlines_impl(PyBytesObject *self, int keepends)
2501
/*[clinic end generated code: output=3484149a5d880ffb input=d17968d2a355fe55]*/
2502
0
{
2503
0
    return stringlib_splitlines(
2504
0
        (PyObject*) self, PyBytes_AS_STRING(self),
2505
0
        PyBytes_GET_SIZE(self), keepends
2506
0
        );
2507
0
}
2508
2509
/*[clinic input]
2510
@classmethod
2511
bytes.fromhex
2512
2513
    string: object
2514
    /
2515
2516
Create a bytes object from a string of hexadecimal numbers.
2517
2518
Spaces between two numbers are accepted.
2519
Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2520
[clinic start generated code]*/
2521
2522
static PyObject *
2523
bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2524
/*[clinic end generated code: output=0973acc63661bb2e input=f37d98ed51088a21]*/
2525
41.3k
{
2526
41.3k
    PyObject *result = _PyBytes_FromHex(string, 0);
2527
41.3k
    if (type != &PyBytes_Type && result != NULL) {
2528
0
        Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
2529
0
    }
2530
41.3k
    return result;
2531
41.3k
}
2532
2533
PyObject*
2534
_PyBytes_FromHex(PyObject *string, int use_bytearray)
2535
41.3k
{
2536
41.3k
    Py_ssize_t hexlen, invalid_char;
2537
41.3k
    unsigned int top, bot;
2538
41.3k
    const Py_UCS1 *str, *start, *end;
2539
41.3k
    PyBytesWriter *writer = NULL;
2540
41.3k
    Py_buffer view;
2541
41.3k
    view.obj = NULL;
2542
2543
41.3k
    if (PyUnicode_Check(string)) {
2544
41.3k
        hexlen = PyUnicode_GET_LENGTH(string);
2545
2546
41.3k
        if (!PyUnicode_IS_ASCII(string)) {
2547
0
            const void *data = PyUnicode_DATA(string);
2548
0
            int kind = PyUnicode_KIND(string);
2549
0
            Py_ssize_t i;
2550
2551
            /* search for the first non-ASCII character */
2552
0
            for (i = 0; i < hexlen; i++) {
2553
0
                if (PyUnicode_READ(kind, data, i) >= 128)
2554
0
                    break;
2555
0
            }
2556
0
            invalid_char = i;
2557
0
            goto error;
2558
0
        }
2559
2560
41.3k
        assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2561
41.3k
        str = PyUnicode_1BYTE_DATA(string);
2562
41.3k
    }
2563
0
    else if (PyObject_CheckBuffer(string)) {
2564
0
        if (PyObject_GetBuffer(string, &view, PyBUF_SIMPLE) != 0) {
2565
0
            return NULL;
2566
0
        }
2567
0
        hexlen = view.len;
2568
0
        str = view.buf;
2569
0
    }
2570
0
    else {
2571
0
        PyErr_Format(PyExc_TypeError,
2572
0
                     "fromhex() argument must be str or bytes-like, not %T",
2573
0
                     string);
2574
0
        return NULL;
2575
0
    }
2576
2577
    /* This overestimates if there are spaces */
2578
41.3k
    if (use_bytearray) {
2579
0
        writer = _PyBytesWriter_CreateByteArray(hexlen / 2);
2580
0
    }
2581
41.3k
    else {
2582
41.3k
        writer = PyBytesWriter_Create(hexlen / 2);
2583
41.3k
    }
2584
41.3k
    if (writer == NULL) {
2585
0
        goto release_buffer;
2586
0
    }
2587
41.3k
    char *buf = PyBytesWriter_GetData(writer);
2588
2589
41.3k
    start = str;
2590
41.3k
    end = str + hexlen;
2591
82.6k
    while (str < end) {
2592
        /* skip over spaces in the input */
2593
41.3k
        if (Py_ISSPACE(*str)) {
2594
0
            do {
2595
0
                str++;
2596
0
            } while (Py_ISSPACE(*str));
2597
0
            if (str >= end)
2598
0
                break;
2599
0
        }
2600
2601
41.3k
        top = _PyLong_DigitValue[*str];
2602
41.3k
        if (top >= 16) {
2603
0
            invalid_char = str - start;
2604
0
            goto error;
2605
0
        }
2606
41.3k
        str++;
2607
2608
41.3k
        bot = _PyLong_DigitValue[*str];
2609
41.3k
        if (bot >= 16) {
2610
            /* Check if we had a second digit */
2611
0
            if (str >= end){
2612
0
                invalid_char = -1;
2613
0
            } else {
2614
0
                invalid_char = str - start;
2615
0
            }
2616
0
            goto error;
2617
0
        }
2618
41.3k
        str++;
2619
2620
41.3k
        *buf++ = (unsigned char)((top << 4) + bot);
2621
41.3k
    }
2622
2623
41.3k
    if (view.obj != NULL) {
2624
0
       PyBuffer_Release(&view);
2625
0
    }
2626
41.3k
    return PyBytesWriter_FinishWithPointer(writer, buf);
2627
2628
0
  error:
2629
0
    if (invalid_char == -1) {
2630
0
        PyErr_SetString(PyExc_ValueError,
2631
0
                        "fromhex() arg must contain an even number of hexadecimal digits");
2632
0
    } else {
2633
0
        PyErr_Format(PyExc_ValueError,
2634
0
                     "non-hexadecimal number found in "
2635
0
                     "fromhex() arg at position %zd", invalid_char);
2636
0
    }
2637
0
    PyBytesWriter_Discard(writer);
2638
2639
0
  release_buffer:
2640
0
    if (view.obj != NULL) {
2641
0
        PyBuffer_Release(&view);
2642
0
    }
2643
0
    return NULL;
2644
0
}
2645
2646
/*[clinic input]
2647
bytes.hex
2648
2649
    sep: object = NULL
2650
        An optional single character or byte to separate hex bytes.
2651
    bytes_per_sep: int = 1
2652
        How many bytes between separators.  Positive values count from the
2653
        right, negative values count from the left.
2654
2655
Create a string of hexadecimal numbers from a bytes object.
2656
2657
Example:
2658
>>> value = b'\xb9\x01\xef'
2659
>>> value.hex()
2660
'b901ef'
2661
>>> value.hex(':')
2662
'b9:01:ef'
2663
>>> value.hex(':', 2)
2664
'b9:01ef'
2665
>>> value.hex(':', -2)
2666
'b901:ef'
2667
[clinic start generated code]*/
2668
2669
static PyObject *
2670
bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2671
/*[clinic end generated code: output=1f134da504064139 input=1a21282b1f1ae595]*/
2672
0
{
2673
0
    const char *argbuf = PyBytes_AS_STRING(self);
2674
0
    Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2675
0
    return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2676
0
}
2677
2678
static PyObject *
2679
bytes_getnewargs(PyObject *op, PyObject *Py_UNUSED(dummy))
2680
0
{
2681
0
    PyBytesObject *v = _PyBytes_CAST(op);
2682
0
    return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2683
0
}
2684
2685
2686
static PyMethodDef
2687
bytes_methods[] = {
2688
    {"__getnewargs__", bytes_getnewargs,  METH_NOARGS},
2689
    BYTES___BYTES___METHODDEF
2690
    {"capitalize", stringlib_capitalize, METH_NOARGS,
2691
     _Py_capitalize__doc__},
2692
    STRINGLIB_CENTER_METHODDEF
2693
    BYTES_COUNT_METHODDEF
2694
    BYTES_DECODE_METHODDEF
2695
    BYTES_ENDSWITH_METHODDEF
2696
    STRINGLIB_EXPANDTABS_METHODDEF
2697
    BYTES_FIND_METHODDEF
2698
    BYTES_FROMHEX_METHODDEF
2699
    BYTES_HEX_METHODDEF
2700
    BYTES_INDEX_METHODDEF
2701
    {"isalnum", stringlib_isalnum, METH_NOARGS,
2702
     _Py_isalnum__doc__},
2703
    {"isalpha", stringlib_isalpha, METH_NOARGS,
2704
     _Py_isalpha__doc__},
2705
    {"isascii", stringlib_isascii, METH_NOARGS,
2706
     _Py_isascii__doc__},
2707
    {"isdigit", stringlib_isdigit, METH_NOARGS,
2708
     _Py_isdigit__doc__},
2709
    {"islower", stringlib_islower, METH_NOARGS,
2710
     _Py_islower__doc__},
2711
    {"isspace", stringlib_isspace, METH_NOARGS,
2712
     _Py_isspace__doc__},
2713
    {"istitle", stringlib_istitle, METH_NOARGS,
2714
     _Py_istitle__doc__},
2715
    {"isupper", stringlib_isupper, METH_NOARGS,
2716
     _Py_isupper__doc__},
2717
    BYTES_JOIN_METHODDEF
2718
    STRINGLIB_LJUST_METHODDEF
2719
    {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2720
    BYTES_LSTRIP_METHODDEF
2721
    BYTES_MAKETRANS_METHODDEF
2722
    BYTES_PARTITION_METHODDEF
2723
    BYTES_REPLACE_METHODDEF
2724
    BYTES_REMOVEPREFIX_METHODDEF
2725
    BYTES_REMOVESUFFIX_METHODDEF
2726
    BYTES_RFIND_METHODDEF
2727
    BYTES_RINDEX_METHODDEF
2728
    STRINGLIB_RJUST_METHODDEF
2729
    BYTES_RPARTITION_METHODDEF
2730
    BYTES_RSPLIT_METHODDEF
2731
    BYTES_RSTRIP_METHODDEF
2732
    BYTES_SPLIT_METHODDEF
2733
    BYTES_SPLITLINES_METHODDEF
2734
    BYTES_STARTSWITH_METHODDEF
2735
    BYTES_STRIP_METHODDEF
2736
    {"swapcase", stringlib_swapcase, METH_NOARGS,
2737
     _Py_swapcase__doc__},
2738
    {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2739
    BYTES_TRANSLATE_METHODDEF
2740
    {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2741
    STRINGLIB_ZFILL_METHODDEF
2742
    {NULL,     NULL}                         /* sentinel */
2743
};
2744
2745
static PyObject *
2746
bytes_mod(PyObject *self, PyObject *arg)
2747
0
{
2748
0
    if (!PyBytes_Check(self)) {
2749
0
        Py_RETURN_NOTIMPLEMENTED;
2750
0
    }
2751
0
    return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2752
0
                             arg, 0);
2753
0
}
2754
2755
static PyNumberMethods bytes_as_number = {
2756
    0,              /*nb_add*/
2757
    0,              /*nb_subtract*/
2758
    0,              /*nb_multiply*/
2759
    bytes_mod,      /*nb_remainder*/
2760
};
2761
2762
static PyObject *
2763
bytes_subtype_new(PyTypeObject *, PyObject *);
2764
2765
/*[clinic input]
2766
@classmethod
2767
bytes.__new__ as bytes_new
2768
2769
    source as x: object = NULL
2770
    encoding: str = NULL
2771
    errors: str = NULL
2772
2773
[clinic start generated code]*/
2774
2775
static PyObject *
2776
bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
2777
               const char *errors)
2778
/*[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]*/
2779
363k
{
2780
363k
    PyObject *bytes;
2781
363k
    PyObject *func;
2782
363k
    Py_ssize_t size;
2783
2784
363k
    if (x == NULL) {
2785
0
        if (encoding != NULL || errors != NULL) {
2786
0
            PyErr_SetString(PyExc_TypeError,
2787
0
                            encoding != NULL ?
2788
0
                            "encoding without a string argument" :
2789
0
                            "errors without a string argument");
2790
0
            return NULL;
2791
0
        }
2792
0
        bytes = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
2793
0
    }
2794
363k
    else if (encoding != NULL) {
2795
        /* Encode via the codec registry */
2796
261k
        if (!PyUnicode_Check(x)) {
2797
0
            PyErr_SetString(PyExc_TypeError,
2798
0
                            "encoding without a string argument");
2799
0
            return NULL;
2800
0
        }
2801
261k
        bytes = PyUnicode_AsEncodedString(x, encoding, errors);
2802
261k
    }
2803
101k
    else if (errors != NULL) {
2804
0
        PyErr_SetString(PyExc_TypeError,
2805
0
                        PyUnicode_Check(x) ?
2806
0
                        "string argument without an encoding" :
2807
0
                        "errors without a string argument");
2808
0
        return NULL;
2809
0
    }
2810
    /* We'd like to call PyObject_Bytes here, but we need to check for an
2811
       integer argument before deferring to PyBytes_FromObject, something
2812
       PyObject_Bytes doesn't do. */
2813
101k
    else if ((func = _PyObject_LookupSpecial(x, &_Py_ID(__bytes__))) != NULL) {
2814
44.8k
        bytes = _PyObject_CallNoArgs(func);
2815
44.8k
        Py_DECREF(func);
2816
44.8k
        if (bytes == NULL)
2817
0
            return NULL;
2818
44.8k
        if (!PyBytes_Check(bytes)) {
2819
0
            PyErr_Format(PyExc_TypeError,
2820
0
                         "%T.__bytes__() must return a bytes, not %T",
2821
0
                         x, bytes);
2822
0
            Py_DECREF(bytes);
2823
0
            return NULL;
2824
0
        }
2825
44.8k
    }
2826
56.7k
    else if (PyErr_Occurred())
2827
0
        return NULL;
2828
56.7k
    else if (PyUnicode_Check(x)) {
2829
0
        PyErr_SetString(PyExc_TypeError,
2830
0
                        "string argument without an encoding");
2831
0
        return NULL;
2832
0
    }
2833
    /* Is it an integer? */
2834
56.7k
    else if (_PyIndex_Check(x)) {
2835
0
        size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2836
0
        if (size == -1 && PyErr_Occurred()) {
2837
0
            if (!PyErr_ExceptionMatches(PyExc_TypeError))
2838
0
                return NULL;
2839
0
            PyErr_Clear();  /* fall through */
2840
0
            bytes = PyBytes_FromObject(x);
2841
0
        }
2842
0
        else {
2843
0
            if (size < 0) {
2844
0
                PyErr_SetString(PyExc_ValueError, "negative count");
2845
0
                return NULL;
2846
0
            }
2847
0
            bytes = _PyBytes_FromSize(size, 1);
2848
0
        }
2849
0
    }
2850
56.7k
    else {
2851
56.7k
        bytes = PyBytes_FromObject(x);
2852
56.7k
    }
2853
2854
363k
    if (bytes != NULL && type != &PyBytes_Type) {
2855
0
        Py_SETREF(bytes, bytes_subtype_new(type, bytes));
2856
0
    }
2857
2858
363k
    return bytes;
2859
363k
}
2860
2861
static PyObject*
2862
_PyBytes_FromBuffer(PyObject *x)
2863
56.7k
{
2864
56.7k
    Py_buffer view;
2865
56.7k
    if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2866
0
        return NULL;
2867
2868
56.7k
    PyBytesWriter *writer = PyBytesWriter_Create(view.len);
2869
56.7k
    if (writer == NULL) {
2870
0
        goto fail;
2871
0
    }
2872
2873
56.7k
    if (PyBuffer_ToContiguous(PyBytesWriter_GetData(writer),
2874
56.7k
                              &view, view.len, 'C') < 0) {
2875
0
        goto fail;
2876
0
    }
2877
2878
56.7k
    PyBuffer_Release(&view);
2879
56.7k
    return PyBytesWriter_Finish(writer);
2880
2881
0
fail:
2882
0
    PyBytesWriter_Discard(writer);
2883
0
    PyBuffer_Release(&view);
2884
0
    return NULL;
2885
56.7k
}
2886
2887
static PyObject*
2888
_PyBytes_FromList(PyObject *x)
2889
0
{
2890
0
    Py_ssize_t size = PyList_GET_SIZE(x);
2891
0
    PyBytesWriter *writer = PyBytesWriter_Create(size);
2892
0
    if (writer == NULL) {
2893
0
        return NULL;
2894
0
    }
2895
0
    char *str = PyBytesWriter_GetData(writer);
2896
0
    size = _PyBytesWriter_GetAllocated(writer);
2897
2898
0
    for (Py_ssize_t i = 0; i < PyList_GET_SIZE(x); i++) {
2899
0
        PyObject *item = PyList_GET_ITEM(x, i);
2900
0
        Py_INCREF(item);
2901
0
        Py_ssize_t value = PyNumber_AsSsize_t(item, NULL);
2902
0
        Py_DECREF(item);
2903
0
        if (value == -1 && PyErr_Occurred())
2904
0
            goto error;
2905
2906
0
        if (value < 0 || value >= 256) {
2907
0
            PyErr_SetString(PyExc_ValueError,
2908
0
                            "bytes must be in range(0, 256)");
2909
0
            goto error;
2910
0
        }
2911
2912
0
        if (i >= size) {
2913
0
            str = _PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str);
2914
0
            if (str == NULL) {
2915
0
                goto error;
2916
0
            }
2917
0
            size = _PyBytesWriter_GetAllocated(writer);
2918
0
        }
2919
0
        *str++ = (char) value;
2920
0
    }
2921
0
    return PyBytesWriter_FinishWithPointer(writer, str);
2922
2923
0
error:
2924
0
    PyBytesWriter_Discard(writer);
2925
0
    return NULL;
2926
0
}
2927
2928
static PyObject*
2929
_PyBytes_FromTuple(PyObject *x)
2930
0
{
2931
0
    Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2932
0
    Py_ssize_t value;
2933
0
    PyObject *item;
2934
2935
0
    PyBytesWriter *writer = PyBytesWriter_Create(size);
2936
0
    if (writer == NULL) {
2937
0
        return NULL;
2938
0
    }
2939
0
    char *str = PyBytesWriter_GetData(writer);
2940
2941
0
    for (i = 0; i < size; i++) {
2942
0
        item = PyTuple_GET_ITEM(x, i);
2943
0
        value = PyNumber_AsSsize_t(item, NULL);
2944
0
        if (value == -1 && PyErr_Occurred())
2945
0
            goto error;
2946
2947
0
        if (value < 0 || value >= 256) {
2948
0
            PyErr_SetString(PyExc_ValueError,
2949
0
                            "bytes must be in range(0, 256)");
2950
0
            goto error;
2951
0
        }
2952
0
        *str++ = (char) value;
2953
0
    }
2954
0
    return PyBytesWriter_Finish(writer);
2955
2956
0
  error:
2957
0
    PyBytesWriter_Discard(writer);
2958
0
    return NULL;
2959
0
}
2960
2961
static PyObject *
2962
_PyBytes_FromIterator(PyObject *it, PyObject *x)
2963
138
{
2964
138
    Py_ssize_t i, size;
2965
2966
    /* For iterator version, create a bytes object and resize as needed */
2967
138
    size = PyObject_LengthHint(x, 64);
2968
138
    if (size == -1 && PyErr_Occurred())
2969
0
        return NULL;
2970
2971
138
    PyBytesWriter *writer = PyBytesWriter_Create(size);
2972
138
    if (writer == NULL) {
2973
0
        return NULL;
2974
0
    }
2975
138
    char *str = PyBytesWriter_GetData(writer);
2976
138
    size = _PyBytesWriter_GetAllocated(writer);
2977
2978
    /* Run the iterator to exhaustion */
2979
1.06k
    for (i = 0; ; i++) {
2980
1.06k
        PyObject *item;
2981
1.06k
        Py_ssize_t value;
2982
2983
        /* Get the next item */
2984
1.06k
        item = PyIter_Next(it);
2985
1.06k
        if (item == NULL) {
2986
138
            if (PyErr_Occurred())
2987
0
                goto error;
2988
138
            break;
2989
138
        }
2990
2991
        /* Interpret it as an int (__index__) */
2992
924
        value = PyNumber_AsSsize_t(item, NULL);
2993
924
        Py_DECREF(item);
2994
924
        if (value == -1 && PyErr_Occurred())
2995
0
            goto error;
2996
2997
        /* Range check */
2998
924
        if (value < 0 || value >= 256) {
2999
0
            PyErr_SetString(PyExc_ValueError,
3000
0
                            "bytes must be in range(0, 256)");
3001
0
            goto error;
3002
0
        }
3003
3004
        /* Append the byte */
3005
924
        if (i >= size) {
3006
0
            str = _PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str);
3007
0
            if (str == NULL) {
3008
0
                goto error;
3009
0
            }
3010
0
            size = _PyBytesWriter_GetAllocated(writer);
3011
0
        }
3012
924
        *str++ = (char) value;
3013
924
    }
3014
138
    return PyBytesWriter_FinishWithPointer(writer, str);
3015
3016
0
  error:
3017
0
    PyBytesWriter_Discard(writer);
3018
0
    return NULL;
3019
138
}
3020
3021
PyObject *
3022
PyBytes_FromObject(PyObject *x)
3023
56.9k
{
3024
56.9k
    PyObject *it, *result;
3025
3026
56.9k
    if (x == NULL) {
3027
0
        PyErr_BadInternalCall();
3028
0
        return NULL;
3029
0
    }
3030
3031
56.9k
    if (PyBytes_CheckExact(x)) {
3032
0
        return Py_NewRef(x);
3033
0
    }
3034
3035
    /* Use the modern buffer interface */
3036
56.9k
    if (PyObject_CheckBuffer(x))
3037
56.7k
        return _PyBytes_FromBuffer(x);
3038
3039
138
    if (PyList_CheckExact(x))
3040
0
        return _PyBytes_FromList(x);
3041
3042
138
    if (PyTuple_CheckExact(x))
3043
0
        return _PyBytes_FromTuple(x);
3044
3045
138
    if (!PyUnicode_Check(x)) {
3046
138
        it = PyObject_GetIter(x);
3047
138
        if (it != NULL) {
3048
138
            result = _PyBytes_FromIterator(it, x);
3049
138
            Py_DECREF(it);
3050
138
            return result;
3051
138
        }
3052
0
        if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
3053
0
            return NULL;
3054
0
        }
3055
0
    }
3056
3057
0
    PyErr_Format(PyExc_TypeError,
3058
0
                 "cannot convert '%.200s' object to bytes",
3059
0
                 Py_TYPE(x)->tp_name);
3060
0
    return NULL;
3061
138
}
3062
3063
/* This allocator is needed for subclasses don't want to use __new__.
3064
 * See https://github.com/python/cpython/issues/91020#issuecomment-1096793239
3065
 *
3066
 * This allocator will be removed when ob_shash is removed.
3067
 */
3068
static PyObject *
3069
bytes_alloc(PyTypeObject *self, Py_ssize_t nitems)
3070
0
{
3071
0
    PyBytesObject *obj = (PyBytesObject*)PyType_GenericAlloc(self, nitems);
3072
0
    if (obj == NULL) {
3073
0
        return NULL;
3074
0
    }
3075
0
    set_ob_shash(obj, -1);
3076
0
    return (PyObject*)obj;
3077
0
}
3078
3079
static PyObject *
3080
bytes_subtype_new(PyTypeObject *type, PyObject *tmp)
3081
0
{
3082
0
    PyObject *pnew;
3083
0
    Py_ssize_t n;
3084
3085
0
    assert(PyType_IsSubtype(type, &PyBytes_Type));
3086
0
    assert(PyBytes_Check(tmp));
3087
0
    n = PyBytes_GET_SIZE(tmp);
3088
0
    pnew = type->tp_alloc(type, n);
3089
0
    if (pnew != NULL) {
3090
0
        memcpy(PyBytes_AS_STRING(pnew),
3091
0
                  PyBytes_AS_STRING(tmp), n+1);
3092
0
        set_ob_shash((PyBytesObject *)pnew,
3093
0
            get_ob_shash((PyBytesObject *)tmp));
3094
0
    }
3095
0
    return pnew;
3096
0
}
3097
3098
PyDoc_STRVAR(bytes_doc,
3099
"bytes(iterable_of_ints) -> bytes\n\
3100
bytes(string, encoding[, errors]) -> bytes\n\
3101
bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
3102
bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3103
bytes() -> empty bytes object\n\
3104
\n\
3105
Construct an immutable array of bytes from:\n\
3106
  - an iterable yielding integers in range(256)\n\
3107
  - a text string encoded using the specified encoding\n\
3108
  - any object implementing the buffer API.\n\
3109
  - an integer");
3110
3111
static PyObject *bytes_iter(PyObject *seq);
3112
3113
PyTypeObject PyBytes_Type = {
3114
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3115
    "bytes",
3116
    PyBytesObject_SIZE,
3117
    sizeof(char),
3118
    0,                                          /* tp_dealloc */
3119
    0,                                          /* tp_vectorcall_offset */
3120
    0,                                          /* tp_getattr */
3121
    0,                                          /* tp_setattr */
3122
    0,                                          /* tp_as_async */
3123
    bytes_repr,                                 /* tp_repr */
3124
    &bytes_as_number,                           /* tp_as_number */
3125
    &bytes_as_sequence,                         /* tp_as_sequence */
3126
    &bytes_as_mapping,                          /* tp_as_mapping */
3127
    bytes_hash,                                 /* tp_hash */
3128
    0,                                          /* tp_call */
3129
    bytes_str,                                  /* tp_str */
3130
    PyObject_GenericGetAttr,                    /* tp_getattro */
3131
    0,                                          /* tp_setattro */
3132
    &bytes_as_buffer,                           /* tp_as_buffer */
3133
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3134
        Py_TPFLAGS_BYTES_SUBCLASS |
3135
        _Py_TPFLAGS_MATCH_SELF,               /* tp_flags */
3136
    bytes_doc,                                  /* tp_doc */
3137
    0,                                          /* tp_traverse */
3138
    0,                                          /* tp_clear */
3139
    bytes_richcompare,                          /* tp_richcompare */
3140
    0,                                          /* tp_weaklistoffset */
3141
    bytes_iter,                                 /* tp_iter */
3142
    0,                                          /* tp_iternext */
3143
    bytes_methods,                              /* tp_methods */
3144
    0,                                          /* tp_members */
3145
    0,                                          /* tp_getset */
3146
    0,                                          /* tp_base */
3147
    0,                                          /* tp_dict */
3148
    0,                                          /* tp_descr_get */
3149
    0,                                          /* tp_descr_set */
3150
    0,                                          /* tp_dictoffset */
3151
    0,                                          /* tp_init */
3152
    bytes_alloc,                                /* tp_alloc */
3153
    bytes_new,                                  /* tp_new */
3154
    PyObject_Free,                              /* tp_free */
3155
    .tp_version_tag = _Py_TYPE_VERSION_BYTES,
3156
};
3157
3158
void
3159
PyBytes_Concat(PyObject **pv, PyObject *w)
3160
0
{
3161
0
    assert(pv != NULL);
3162
0
    if (*pv == NULL)
3163
0
        return;
3164
0
    if (w == NULL) {
3165
0
        Py_CLEAR(*pv);
3166
0
        return;
3167
0
    }
3168
3169
0
    if (_PyObject_IsUniquelyReferenced(*pv) && PyBytes_CheckExact(*pv)) {
3170
        /* Only one reference, so we can resize in place */
3171
0
        Py_ssize_t oldsize;
3172
0
        Py_buffer wb;
3173
3174
0
        if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
3175
0
            PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3176
0
                         Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3177
0
            Py_CLEAR(*pv);
3178
0
            return;
3179
0
        }
3180
3181
0
        oldsize = PyBytes_GET_SIZE(*pv);
3182
0
        if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3183
0
            PyErr_NoMemory();
3184
0
            goto error;
3185
0
        }
3186
0
        if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3187
0
            goto error;
3188
3189
0
        memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3190
0
        PyBuffer_Release(&wb);
3191
0
        return;
3192
3193
0
      error:
3194
0
        PyBuffer_Release(&wb);
3195
0
        Py_CLEAR(*pv);
3196
0
        return;
3197
0
    }
3198
3199
0
    else {
3200
        /* Multiple references, need to create new object */
3201
0
        PyObject *v;
3202
0
        v = bytes_concat(*pv, w);
3203
0
        Py_SETREF(*pv, v);
3204
0
    }
3205
0
}
3206
3207
void
3208
PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
3209
0
{
3210
0
    PyBytes_Concat(pv, w);
3211
0
    Py_XDECREF(w);
3212
0
}
3213
3214
3215
/* The following function breaks the notion that bytes are immutable:
3216
   it changes the size of a bytes object.  You can think of it
3217
   as creating a new bytes object and destroying the old one, only
3218
   more efficiently.
3219
   Note that if there's not enough memory to resize the bytes object, the
3220
   original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
3221
   memory" exception is set, and -1 is returned.  Else (on success) 0 is
3222
   returned, and the value in *pv may or may not be the same as on input.
3223
   As always, an extra byte is allocated for a trailing \0 byte (newsize
3224
   does *not* include that), and a trailing \0 byte is stored.
3225
*/
3226
3227
int
3228
_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3229
3.77M
{
3230
3.77M
    PyObject *v;
3231
3.77M
    PyBytesObject *sv;
3232
3.77M
    v = *pv;
3233
3.77M
    if (!PyBytes_Check(v) || newsize < 0) {
3234
0
        *pv = 0;
3235
0
        Py_DECREF(v);
3236
0
        PyErr_BadInternalCall();
3237
0
        return -1;
3238
0
    }
3239
3.77M
    Py_ssize_t oldsize = PyBytes_GET_SIZE(v);
3240
3.77M
    if (oldsize == newsize) {
3241
        /* return early if newsize equals to v->ob_size */
3242
434k
        return 0;
3243
434k
    }
3244
3.33M
    if (oldsize == 0) {
3245
1.42M
        *pv = _PyBytes_FromSize(newsize, 0);
3246
1.42M
        Py_DECREF(v);
3247
1.42M
        return (*pv == NULL) ? -1 : 0;
3248
1.42M
    }
3249
1.90M
    if (newsize == 0) {
3250
21.6k
        *pv = bytes_get_empty();
3251
21.6k
        Py_DECREF(v);
3252
21.6k
        return 0;
3253
21.6k
    }
3254
1.88M
    if (!_PyObject_IsUniquelyReferenced(v)) {
3255
0
        if (oldsize < newsize) {
3256
0
            *pv = _PyBytes_FromSize(newsize, 0);
3257
0
            if (*pv) {
3258
0
                memcpy(PyBytes_AS_STRING(*pv), PyBytes_AS_STRING(v), oldsize);
3259
0
            }
3260
0
        }
3261
0
        else {
3262
0
            *pv = PyBytes_FromStringAndSize(PyBytes_AS_STRING(v), newsize);
3263
0
        }
3264
0
        Py_DECREF(v);
3265
0
        return (*pv == NULL) ? -1 : 0;
3266
0
    }
3267
3268
#ifdef Py_TRACE_REFS
3269
    _Py_ForgetReference(v);
3270
#endif
3271
1.88M
    _PyReftracerTrack(v, PyRefTracer_DESTROY);
3272
1.88M
    *pv = (PyObject *)
3273
1.88M
        PyObject_Realloc(v, PyBytesObject_SIZE + newsize);
3274
1.88M
    if (*pv == NULL) {
3275
#ifdef Py_REF_DEBUG
3276
        _Py_DecRefTotal(_PyThreadState_GET());
3277
#endif
3278
0
        PyObject_Free(v);
3279
0
        PyErr_NoMemory();
3280
0
        return -1;
3281
0
    }
3282
1.88M
    _Py_NewReferenceNoTotal(*pv);
3283
1.88M
    sv = (PyBytesObject *) *pv;
3284
1.88M
    Py_SET_SIZE(sv, newsize);
3285
1.88M
    sv->ob_sval[newsize] = '\0';
3286
1.88M
    set_ob_shash(sv, -1);          /* invalidate cached hash value */
3287
1.88M
    return 0;
3288
1.88M
}
3289
3290
3291
/*********************** Bytes Iterator ****************************/
3292
3293
typedef struct {
3294
    PyObject_HEAD
3295
    Py_ssize_t it_index;
3296
    PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3297
} striterobject;
3298
3299
2.08k
#define _striterobject_CAST(op)  ((striterobject *)(op))
3300
3301
static void
3302
striter_dealloc(PyObject *op)
3303
76
{
3304
76
    striterobject *it = _striterobject_CAST(op);
3305
76
    _PyObject_GC_UNTRACK(it);
3306
76
    Py_XDECREF(it->it_seq);
3307
76
    PyObject_GC_Del(it);
3308
76
}
3309
3310
static int
3311
striter_traverse(PyObject *op, visitproc visit, void *arg)
3312
0
{
3313
0
    striterobject *it = _striterobject_CAST(op);
3314
0
    Py_VISIT(it->it_seq);
3315
0
    return 0;
3316
0
}
3317
3318
static PyObject *
3319
striter_next(PyObject *op)
3320
2.00k
{
3321
2.00k
    striterobject *it = _striterobject_CAST(op);
3322
2.00k
    PyBytesObject *seq;
3323
3324
2.00k
    assert(it != NULL);
3325
2.00k
    seq = it->it_seq;
3326
2.00k
    if (seq == NULL)
3327
0
        return NULL;
3328
2.00k
    assert(PyBytes_Check(seq));
3329
3330
2.00k
    if (it->it_index < PyBytes_GET_SIZE(seq)) {
3331
1.95k
        return _PyLong_FromUnsignedChar(
3332
1.95k
            (unsigned char)seq->ob_sval[it->it_index++]);
3333
1.95k
    }
3334
3335
48
    it->it_seq = NULL;
3336
48
    Py_DECREF(seq);
3337
48
    return NULL;
3338
2.00k
}
3339
3340
static PyObject *
3341
striter_len(PyObject *op, PyObject *Py_UNUSED(ignored))
3342
0
{
3343
0
    striterobject *it = _striterobject_CAST(op);
3344
0
    Py_ssize_t len = 0;
3345
0
    if (it->it_seq)
3346
0
        len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3347
0
    return PyLong_FromSsize_t(len);
3348
0
}
3349
3350
PyDoc_STRVAR(length_hint_doc,
3351
             "Private method returning an estimate of len(list(it)).");
3352
3353
static PyObject *
3354
striter_reduce(PyObject *op, PyObject *Py_UNUSED(ignored))
3355
0
{
3356
0
    PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));
3357
3358
    /* _PyEval_GetBuiltin can invoke arbitrary code,
3359
     * call must be before access of iterator pointers.
3360
     * see issue #101765 */
3361
0
    striterobject *it = _striterobject_CAST(op);
3362
0
    if (it->it_seq != NULL) {
3363
0
        return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
3364
0
    } else {
3365
0
        return Py_BuildValue("N(())", iter);
3366
0
    }
3367
0
}
3368
3369
PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3370
3371
static PyObject *
3372
striter_setstate(PyObject *op, PyObject *state)
3373
0
{
3374
0
    Py_ssize_t index = PyLong_AsSsize_t(state);
3375
0
    if (index == -1 && PyErr_Occurred())
3376
0
        return NULL;
3377
0
    striterobject *it = _striterobject_CAST(op);
3378
0
    if (it->it_seq != NULL) {
3379
0
        if (index < 0)
3380
0
            index = 0;
3381
0
        else if (index > PyBytes_GET_SIZE(it->it_seq))
3382
0
            index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3383
0
        it->it_index = index;
3384
0
    }
3385
0
    Py_RETURN_NONE;
3386
0
}
3387
3388
PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3389
3390
static PyMethodDef striter_methods[] = {
3391
    {"__length_hint__", striter_len, METH_NOARGS, length_hint_doc},
3392
    {"__reduce__",      striter_reduce, METH_NOARGS, reduce_doc},
3393
    {"__setstate__",    striter_setstate, METH_O, setstate_doc},
3394
    {NULL,              NULL}           /* sentinel */
3395
};
3396
3397
PyTypeObject PyBytesIter_Type = {
3398
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3399
    "bytes_iterator",                           /* tp_name */
3400
    sizeof(striterobject),                      /* tp_basicsize */
3401
    0,                                          /* tp_itemsize */
3402
    /* methods */
3403
    striter_dealloc,                            /* tp_dealloc */
3404
    0,                                          /* tp_vectorcall_offset */
3405
    0,                                          /* tp_getattr */
3406
    0,                                          /* tp_setattr */
3407
    0,                                          /* tp_as_async */
3408
    0,                                          /* tp_repr */
3409
    0,                                          /* tp_as_number */
3410
    0,                                          /* tp_as_sequence */
3411
    0,                                          /* tp_as_mapping */
3412
    0,                                          /* tp_hash */
3413
    0,                                          /* tp_call */
3414
    0,                                          /* tp_str */
3415
    PyObject_GenericGetAttr,                    /* tp_getattro */
3416
    0,                                          /* tp_setattro */
3417
    0,                                          /* tp_as_buffer */
3418
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3419
    0,                                          /* tp_doc */
3420
    striter_traverse,                           /* tp_traverse */
3421
    0,                                          /* tp_clear */
3422
    0,                                          /* tp_richcompare */
3423
    0,                                          /* tp_weaklistoffset */
3424
    PyObject_SelfIter,                          /* tp_iter */
3425
    striter_next,                               /* tp_iternext */
3426
    striter_methods,                            /* tp_methods */
3427
    0,
3428
};
3429
3430
static PyObject *
3431
bytes_iter(PyObject *seq)
3432
76
{
3433
76
    striterobject *it;
3434
3435
76
    if (!PyBytes_Check(seq)) {
3436
0
        PyErr_BadInternalCall();
3437
0
        return NULL;
3438
0
    }
3439
76
    it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3440
76
    if (it == NULL)
3441
0
        return NULL;
3442
76
    it->it_index = 0;
3443
76
    it->it_seq = (PyBytesObject *)Py_NewRef(seq);
3444
76
    _PyObject_GC_TRACK(it);
3445
76
    return (PyObject *)it;
3446
76
}
3447
3448
3449
void
3450
_PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
3451
    const char* src, Py_ssize_t len_src)
3452
69.9k
{
3453
69.9k
    if (len_dest == 0) {
3454
481
        return;
3455
481
    }
3456
69.4k
    if (len_src == 1) {
3457
67.1k
        memset(dest, src[0], len_dest);
3458
67.1k
    }
3459
2.35k
    else {
3460
2.35k
        if (src != dest) {
3461
2.35k
            memcpy(dest, src, len_src);
3462
2.35k
        }
3463
2.35k
        Py_ssize_t copied = len_src;
3464
5.34k
        while (copied < len_dest) {
3465
2.99k
            Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied);
3466
2.99k
            memcpy(dest + copied, dest, bytes_to_copy);
3467
2.99k
            copied += bytes_to_copy;
3468
2.99k
        }
3469
2.35k
    }
3470
69.4k
}
3471
3472
3473
// --- PyBytesWriter API -----------------------------------------------------
3474
3475
static inline char*
3476
byteswriter_data(PyBytesWriter *writer)
3477
17.2M
{
3478
17.2M
    return _PyBytesWriter_GetData(writer);
3479
17.2M
}
3480
3481
3482
static inline Py_ssize_t
3483
byteswriter_allocated(PyBytesWriter *writer)
3484
17.0M
{
3485
17.0M
    if (writer->obj == NULL) {
3486
16.3M
        return sizeof(writer->small_buffer);
3487
16.3M
    }
3488
744k
    else if (writer->use_bytearray) {
3489
0
        return PyByteArray_GET_SIZE(writer->obj);
3490
0
    }
3491
744k
    else {
3492
744k
        return PyBytes_GET_SIZE(writer->obj);
3493
744k
    }
3494
17.0M
}
3495
3496
3497
#ifdef MS_WINDOWS
3498
   /* On Windows, overallocate by 50% is the best factor */
3499
#  define OVERALLOCATE_FACTOR 2
3500
#else
3501
   /* On Linux, overallocate by 25% is the best factor */
3502
5.03k
#  define OVERALLOCATE_FACTOR 4
3503
#endif
3504
3505
static inline int
3506
byteswriter_resize(PyBytesWriter *writer, Py_ssize_t size, int resize)
3507
8.81M
{
3508
8.81M
    assert(size >= 0);
3509
3510
8.81M
    Py_ssize_t old_allocated = byteswriter_allocated(writer);
3511
8.81M
    if (size <= old_allocated) {
3512
7.96M
        return 0;
3513
7.96M
    }
3514
3515
853k
    if (resize & writer->overallocate) {
3516
2.51k
        if (size <= (PY_SSIZE_T_MAX - size / OVERALLOCATE_FACTOR)) {
3517
2.51k
            size += size / OVERALLOCATE_FACTOR;
3518
2.51k
        }
3519
2.51k
    }
3520
3521
853k
    if (writer->obj != NULL) {
3522
2.51k
        if (writer->use_bytearray) {
3523
0
            if (PyByteArray_Resize(writer->obj, size)) {
3524
0
                return -1;
3525
0
            }
3526
0
        }
3527
2.51k
        else {
3528
2.51k
            if (_PyBytes_Resize(&writer->obj, size)) {
3529
0
                return -1;
3530
0
            }
3531
2.51k
        }
3532
2.51k
        assert(writer->obj != NULL);
3533
2.51k
    }
3534
850k
    else if (writer->use_bytearray) {
3535
0
        writer->obj = PyByteArray_FromStringAndSize(NULL, size);
3536
0
        if (writer->obj == NULL) {
3537
0
            return -1;
3538
0
        }
3539
0
        if (resize) {
3540
0
            assert((size_t)size > sizeof(writer->small_buffer));
3541
0
            memcpy(PyByteArray_AS_STRING(writer->obj),
3542
0
                   writer->small_buffer,
3543
0
                   sizeof(writer->small_buffer));
3544
0
        }
3545
0
    }
3546
850k
    else {
3547
850k
        writer->obj = PyBytes_FromStringAndSize(NULL, size);
3548
850k
        if (writer->obj == NULL) {
3549
0
            return -1;
3550
0
        }
3551
850k
        if (resize) {
3552
0
            assert((size_t)size > sizeof(writer->small_buffer));
3553
0
            memcpy(PyBytes_AS_STRING(writer->obj),
3554
0
                   writer->small_buffer,
3555
0
                   sizeof(writer->small_buffer));
3556
0
        }
3557
850k
    }
3558
3559
#ifdef Py_DEBUG
3560
    Py_ssize_t allocated = byteswriter_allocated(writer);
3561
    if (resize && allocated > old_allocated) {
3562
        memset(byteswriter_data(writer) + old_allocated, 0xff,
3563
               allocated - old_allocated);
3564
    }
3565
#endif
3566
3567
853k
    return 0;
3568
853k
}
3569
3570
3571
static PyBytesWriter*
3572
byteswriter_create(Py_ssize_t size, int use_bytearray)
3573
8.81M
{
3574
8.81M
    if (size < 0) {
3575
0
        PyErr_SetString(PyExc_ValueError, "size must be >= 0");
3576
0
        return NULL;
3577
0
    }
3578
3579
8.81M
    PyBytesWriter *writer = _Py_FREELIST_POP_MEM(bytes_writers);
3580
8.81M
    if (writer == NULL) {
3581
3.45k
        writer = (PyBytesWriter *)PyMem_Malloc(sizeof(PyBytesWriter));
3582
3.45k
        if (writer == NULL) {
3583
0
            PyErr_NoMemory();
3584
0
            return NULL;
3585
0
        }
3586
3.45k
    }
3587
8.81M
    writer->obj = NULL;
3588
8.81M
    writer->size = 0;
3589
8.81M
    writer->use_bytearray = use_bytearray;
3590
8.81M
    writer->overallocate = !use_bytearray;
3591
3592
8.81M
    if (size >= 1) {
3593
8.81M
        if (byteswriter_resize(writer, size, 0) < 0) {
3594
0
            PyBytesWriter_Discard(writer);
3595
0
            return NULL;
3596
0
        }
3597
8.81M
        writer->size = size;
3598
8.81M
    }
3599
#ifdef Py_DEBUG
3600
    memset(byteswriter_data(writer), 0xff, byteswriter_allocated(writer));
3601
#endif
3602
8.81M
    return writer;
3603
8.81M
}
3604
3605
PyBytesWriter*
3606
PyBytesWriter_Create(Py_ssize_t size)
3607
8.81M
{
3608
8.81M
    return byteswriter_create(size, 0);
3609
8.81M
}
3610
3611
PyBytesWriter*
3612
_PyBytesWriter_CreateByteArray(Py_ssize_t size)
3613
0
{
3614
0
    return byteswriter_create(size, 1);
3615
0
}
3616
3617
3618
void
3619
PyBytesWriter_Discard(PyBytesWriter *writer)
3620
8.97M
{
3621
8.97M
    if (writer == NULL) {
3622
161k
        return;
3623
161k
    }
3624
3625
8.81M
    Py_XDECREF(writer->obj);
3626
8.81M
    _Py_FREELIST_FREE(bytes_writers, writer, PyMem_Free);
3627
8.81M
}
3628
3629
3630
PyObject*
3631
PyBytesWriter_FinishWithSize(PyBytesWriter *writer, Py_ssize_t size)
3632
8.37M
{
3633
8.37M
    PyObject *result;
3634
8.37M
    if (size == 0) {
3635
9.26k
        result = bytes_get_empty();
3636
9.26k
    }
3637
8.36M
    else if (writer->obj != NULL) {
3638
773k
        if (writer->use_bytearray) {
3639
0
            if (size != PyByteArray_GET_SIZE(writer->obj)) {
3640
0
                if (PyByteArray_Resize(writer->obj, size)) {
3641
0
                    goto error;
3642
0
                }
3643
0
            }
3644
0
        }
3645
773k
        else {
3646
773k
            if (size != PyBytes_GET_SIZE(writer->obj)) {
3647
755k
                if (_PyBytes_Resize(&writer->obj, size)) {
3648
0
                    goto error;
3649
0
                }
3650
755k
            }
3651
773k
        }
3652
773k
        result = writer->obj;
3653
773k
        writer->obj = NULL;
3654
773k
    }
3655
7.59M
    else if (writer->use_bytearray) {
3656
0
        result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3657
0
    }
3658
7.59M
    else {
3659
7.59M
        result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3660
7.59M
    }
3661
8.37M
    PyBytesWriter_Discard(writer);
3662
8.37M
    return result;
3663
3664
0
error:
3665
0
    PyBytesWriter_Discard(writer);
3666
0
    return NULL;
3667
8.37M
}
3668
3669
PyObject*
3670
PyBytesWriter_Finish(PyBytesWriter *writer)
3671
74.6k
{
3672
74.6k
    return PyBytesWriter_FinishWithSize(writer, writer->size);
3673
74.6k
}
3674
3675
3676
PyObject*
3677
PyBytesWriter_FinishWithPointer(PyBytesWriter *writer, void *buf)
3678
8.27M
{
3679
8.27M
    Py_ssize_t size = (char*)buf - byteswriter_data(writer);
3680
8.27M
    if (size < 0 || size > byteswriter_allocated(writer)) {
3681
0
        PyBytesWriter_Discard(writer);
3682
0
        PyErr_SetString(PyExc_ValueError, "invalid end pointer");
3683
0
        return NULL;
3684
0
    }
3685
3686
8.27M
    return PyBytesWriter_FinishWithSize(writer, size);
3687
8.27M
}
3688
3689
3690
void*
3691
PyBytesWriter_GetData(PyBytesWriter *writer)
3692
8.98M
{
3693
8.98M
    return byteswriter_data(writer);
3694
8.98M
}
3695
3696
3697
Py_ssize_t
3698
PyBytesWriter_GetSize(PyBytesWriter *writer)
3699
0
{
3700
0
    return _PyBytesWriter_GetSize(writer);
3701
0
}
3702
3703
3704
static Py_ssize_t
3705
_PyBytesWriter_GetAllocated(PyBytesWriter *writer)
3706
138
{
3707
138
    return byteswriter_allocated(writer);
3708
138
}
3709
3710
3711
int
3712
PyBytesWriter_Resize(PyBytesWriter *writer, Py_ssize_t size)
3713
0
{
3714
0
    if (size < 0) {
3715
0
        PyErr_SetString(PyExc_ValueError, "size must be >= 0");
3716
0
        return -1;
3717
0
    }
3718
0
    if (byteswriter_resize(writer, size, 1) < 0) {
3719
0
        return -1;
3720
0
    }
3721
0
    writer->size = size;
3722
0
    return 0;
3723
0
}
3724
3725
3726
static void*
3727
_PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size,
3728
                                      void *data)
3729
0
{
3730
0
    Py_ssize_t pos = (char*)data - byteswriter_data(writer);
3731
0
    if (PyBytesWriter_Resize(writer, size) < 0) {
3732
0
        return NULL;
3733
0
    }
3734
0
    return byteswriter_data(writer) + pos;
3735
0
}
3736
3737
3738
int
3739
PyBytesWriter_Grow(PyBytesWriter *writer, Py_ssize_t size)
3740
2.51k
{
3741
2.51k
    if (size < 0 && writer->size + size < 0) {
3742
0
        PyErr_SetString(PyExc_ValueError, "invalid size");
3743
0
        return -1;
3744
0
    }
3745
2.51k
    if (size > PY_SSIZE_T_MAX - writer->size) {
3746
0
        PyErr_NoMemory();
3747
0
        return -1;
3748
0
    }
3749
2.51k
    size = writer->size + size;
3750
3751
2.51k
    if (byteswriter_resize(writer, size, 1) < 0) {
3752
0
        return -1;
3753
0
    }
3754
2.51k
    writer->size = size;
3755
2.51k
    return 0;
3756
2.51k
}
3757
3758
3759
void*
3760
PyBytesWriter_GrowAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size,
3761
                                   void *buf)
3762
0
{
3763
0
    Py_ssize_t pos = (char*)buf - byteswriter_data(writer);
3764
0
    if (PyBytesWriter_Grow(writer, size) < 0) {
3765
0
        return NULL;
3766
0
    }
3767
0
    return byteswriter_data(writer) + pos;
3768
0
}
3769
3770
3771
int
3772
PyBytesWriter_WriteBytes(PyBytesWriter *writer,
3773
                         const void *bytes, Py_ssize_t size)
3774
0
{
3775
0
    if (size < 0) {
3776
0
        size_t len = strlen(bytes);
3777
0
        if (len > (size_t)PY_SSIZE_T_MAX) {
3778
0
            PyErr_NoMemory();
3779
0
            return -1;
3780
0
        }
3781
0
        size = (Py_ssize_t)len;
3782
0
    }
3783
3784
0
    Py_ssize_t pos = writer->size;
3785
0
    if (PyBytesWriter_Grow(writer, size) < 0) {
3786
0
        return -1;
3787
0
    }
3788
0
    char *buf = byteswriter_data(writer);
3789
0
    memcpy(buf + pos, bytes, size);
3790
0
    return 0;
3791
0
}
3792
3793
3794
int
3795
PyBytesWriter_Format(PyBytesWriter *writer, const char *format, ...)
3796
0
{
3797
0
    Py_ssize_t pos = writer->size;
3798
0
    if (PyBytesWriter_Grow(writer, strlen(format)) < 0) {
3799
0
        return -1;
3800
0
    }
3801
3802
0
    va_list vargs;
3803
0
    va_start(vargs, format);
3804
0
    char *buf = bytes_fromformat(writer, pos, format, vargs);
3805
0
    va_end(vargs);
3806
3807
0
    Py_ssize_t size = buf - byteswriter_data(writer);
3808
0
    return PyBytesWriter_Resize(writer, size);
3809
0
}