Coverage Report

Created: 2025-11-11 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython3/Objects/bytesobject.c
Line
Count
Source
1
/* bytes object implementation */
2
3
#include "Python.h"
4
#include "pycore_abstract.h"      // _PyIndex_Check()
5
#include "pycore_bytes_methods.h" // _Py_bytes_startswith()
6
#include "pycore_bytesobject.h"   // _PyBytes_Find(), _PyBytes_Repeat()
7
#include "pycore_call.h"          // _PyObject_CallNoArgs()
8
#include "pycore_ceval.h"         // _PyEval_GetBuiltin()
9
#include "pycore_format.h"        // F_LJUST
10
#include "pycore_freelist.h"      // _Py_FREELIST_FREE()
11
#include "pycore_global_objects.h"// _Py_GET_GLOBAL_OBJECT()
12
#include "pycore_initconfig.h"    // _PyStatus_OK()
13
#include "pycore_long.h"          // _PyLong_DigitValue
14
#include "pycore_object.h"        // _PyObject_GC_TRACK
15
#include "pycore_pymem.h"         // PYMEM_CLEANBYTE
16
#include "pycore_strhex.h"        // _Py_strhex_with_sep()
17
#include "pycore_unicodeobject.h" // _PyUnicode_FormatLong()
18
19
#include <stddef.h>
20
21
/*[clinic input]
22
class bytes "PyBytesObject *" "&PyBytes_Type"
23
[clinic start generated code]*/
24
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
25
26
#include "clinic/bytesobject.c.h"
27
28
/* PyBytesObject_SIZE gives the basic size of a bytes object; any memory allocation
29
   for a bytes object of length n should request PyBytesObject_SIZE + n bytes.
30
31
   Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
32
   3 or 7 bytes per bytes object allocation on a typical system.
33
*/
34
26.7M
#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
35
36
/* Forward declaration */
37
static void* _PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer,
38
                                                   Py_ssize_t size, void *data);
39
static Py_ssize_t _PyBytesWriter_GetAllocated(PyBytesWriter *writer);
40
41
42
11.3M
#define CHARACTERS _Py_SINGLETON(bytes_characters)
43
#define CHARACTER(ch) \
44
11.3M
     ((PyBytesObject *)&(CHARACTERS[ch]));
45
4.70M
#define EMPTY (&_Py_SINGLETON(bytes_empty))
46
47
48
// Return a reference to the immortal empty bytes string singleton.
49
static inline PyObject* bytes_get_empty(void)
50
4.70M
{
51
4.70M
    PyObject *empty = &EMPTY->ob_base.ob_base;
52
4.70M
    assert(_Py_IsImmortal(empty));
53
4.70M
    return empty;
54
4.70M
}
55
56
57
static inline void
58
set_ob_shash(PyBytesObject *a, Py_hash_t hash)
59
23.0M
{
60
23.0M
_Py_COMP_DIAG_PUSH
61
23.0M
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
62
#ifdef Py_GIL_DISABLED
63
    _Py_atomic_store_ssize_relaxed(&a->ob_shash, hash);
64
#else
65
23.0M
    a->ob_shash = hash;
66
23.0M
#endif
67
23.0M
_Py_COMP_DIAG_POP
68
23.0M
}
69
70
static inline Py_hash_t
71
get_ob_shash(PyBytesObject *a)
72
18.6M
{
73
18.6M
_Py_COMP_DIAG_PUSH
74
18.6M
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
75
#ifdef Py_GIL_DISABLED
76
    return _Py_atomic_load_ssize_relaxed(&a->ob_shash);
77
#else
78
18.6M
    return a->ob_shash;
79
18.6M
#endif
80
18.6M
_Py_COMP_DIAG_POP
81
18.6M
}
82
83
84
/*
85
   For PyBytes_FromString(), the parameter 'str' points to a null-terminated
86
   string containing exactly 'size' bytes.
87
88
   For PyBytes_FromStringAndSize(), the parameter 'str' is
89
   either NULL or else points to a string containing at least 'size' bytes.
90
   For PyBytes_FromStringAndSize(), the string in the 'str' parameter does
91
   not have to be null-terminated.  (Therefore it is safe to construct a
92
   substring by calling 'PyBytes_FromStringAndSize(origstring, substrlen)'.)
93
   If 'str' is NULL then PyBytes_FromStringAndSize() will allocate 'size+1'
94
   bytes (setting the last byte to the null terminating character) and you can
95
   fill in the data yourself.  If 'str' is non-NULL then the resulting
96
   PyBytes object must be treated as immutable and you must not fill in nor
97
   alter the data yourself, since the strings may be shared.
98
99
   The PyObject member 'op->ob_size', which denotes the number of "extra
100
   items" in a variable-size object, will contain the number of bytes
101
   allocated for string data, not counting the null terminating character.
102
   It is therefore equal to the 'size' parameter (for
103
   PyBytes_FromStringAndSize()) or the length of the string in the 'str'
104
   parameter (for PyBytes_FromString()).
105
*/
106
static PyObject *
107
_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
108
13.2M
{
109
13.2M
    PyBytesObject *op;
110
13.2M
    assert(size >= 0);
111
112
13.2M
    if (size == 0) {
113
0
        return bytes_get_empty();
114
0
    }
115
116
13.2M
    if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
117
0
        PyErr_SetString(PyExc_OverflowError,
118
0
                        "byte string is too large");
119
0
        return NULL;
120
0
    }
121
122
    /* Inline PyObject_NewVar */
123
13.2M
    if (use_calloc)
124
0
        op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
125
13.2M
    else
126
13.2M
        op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
127
13.2M
    if (op == NULL) {
128
0
        return PyErr_NoMemory();
129
0
    }
130
13.2M
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
131
13.2M
    set_ob_shash(op, -1);
132
13.2M
    if (!use_calloc) {
133
13.2M
        op->ob_sval[size] = '\0';
134
13.2M
    }
135
13.2M
    return (PyObject *) op;
136
13.2M
}
137
138
PyObject *
139
PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
140
29.3M
{
141
29.3M
    PyBytesObject *op;
142
29.3M
    if (size < 0) {
143
0
        PyErr_SetString(PyExc_SystemError,
144
0
            "Negative size passed to PyBytes_FromStringAndSize");
145
0
        return NULL;
146
0
    }
147
29.3M
    if (size == 1 && str != NULL) {
148
11.3M
        op = CHARACTER(*str & 255);
149
11.3M
        assert(_Py_IsImmortal(op));
150
11.3M
        return (PyObject *)op;
151
11.3M
    }
152
17.9M
    if (size == 0) {
153
4.65M
        return bytes_get_empty();
154
4.65M
    }
155
156
13.2M
    op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
157
13.2M
    if (op == NULL)
158
0
        return NULL;
159
13.2M
    if (str == NULL)
160
2.03M
        return (PyObject *) op;
161
162
11.2M
    memcpy(op->ob_sval, str, size);
163
11.2M
    return (PyObject *) op;
164
13.2M
}
165
166
PyObject *
167
PyBytes_FromString(const char *str)
168
528
{
169
528
    size_t size;
170
528
    PyBytesObject *op;
171
172
528
    assert(str != NULL);
173
528
    size = strlen(str);
174
528
    if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
175
0
        PyErr_SetString(PyExc_OverflowError,
176
0
            "byte string is too long");
177
0
        return NULL;
178
0
    }
179
180
528
    if (size == 0) {
181
0
        return bytes_get_empty();
182
0
    }
183
528
    else if (size == 1) {
184
0
        op = CHARACTER(*str & 255);
185
0
        assert(_Py_IsImmortal(op));
186
0
        return (PyObject *)op;
187
0
    }
188
189
    /* Inline PyObject_NewVar */
190
528
    op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
191
528
    if (op == NULL) {
192
0
        return PyErr_NoMemory();
193
0
    }
194
528
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
195
528
    set_ob_shash(op, -1);
196
528
    memcpy(op->ob_sval, str, size+1);
197
528
    return (PyObject *) op;
198
528
}
199
200
201
static char*
202
bytes_fromformat(PyBytesWriter *writer, Py_ssize_t writer_pos,
203
                 const char *format, va_list vargs)
204
0
{
205
0
    const char *f;
206
0
    const char *p;
207
0
    Py_ssize_t prec;
208
0
    int longflag;
209
0
    int size_tflag;
210
    /* Longest 64-bit formatted numbers:
211
       - "18446744073709551615\0" (21 bytes)
212
       - "-9223372036854775808\0" (21 bytes)
213
       Decimal takes the most space (it isn't enough for octal.)
214
215
       Longest 64-bit pointer representation:
216
       "0xffffffffffffffff\0" (19 bytes). */
217
0
    char buffer[21];
218
219
0
    char *s = (char*)PyBytesWriter_GetData(writer) + writer_pos;
220
221
0
#define WRITE_BYTES_LEN(str, len_expr) \
222
0
    do { \
223
0
        size_t len = (len_expr); \
224
0
        s = PyBytesWriter_GrowAndUpdatePointer(writer, len, s); \
225
0
        if (s == NULL) { \
226
0
            goto error; \
227
0
        } \
228
0
        memcpy(s, (str), len); \
229
0
        s += len; \
230
0
    } while (0)
231
0
#define WRITE_BYTES(str) WRITE_BYTES_LEN(str, strlen(str))
232
233
0
    for (f = format; *f; f++) {
234
0
        if (*f != '%') {
235
0
            *s++ = *f;
236
0
            continue;
237
0
        }
238
239
0
        p = f++;
240
241
        /* ignore the width (ex: 10 in "%10s") */
242
0
        while (Py_ISDIGIT(*f))
243
0
            f++;
244
245
        /* parse the precision (ex: 10 in "%.10s") */
246
0
        prec = 0;
247
0
        if (*f == '.') {
248
0
            f++;
249
0
            for (; Py_ISDIGIT(*f); f++) {
250
0
                prec = (prec * 10) + (*f - '0');
251
0
            }
252
0
        }
253
254
0
        while (*f && *f != '%' && !Py_ISALPHA(*f))
255
0
            f++;
256
257
        /* handle the long flag ('l'), but only for %ld and %lu.
258
           others can be added when necessary. */
259
0
        longflag = 0;
260
0
        if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
261
0
            longflag = 1;
262
0
            ++f;
263
0
        }
264
265
        /* handle the size_t flag ('z'). */
266
0
        size_tflag = 0;
267
0
        if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
268
0
            size_tflag = 1;
269
0
            ++f;
270
0
        }
271
272
0
        switch (*f) {
273
0
        case 'c':
274
0
        {
275
0
            int c = va_arg(vargs, int);
276
0
            if (c < 0 || c > 255) {
277
0
                PyErr_SetString(PyExc_OverflowError,
278
0
                                "PyBytes_FromFormatV(): %c format "
279
0
                                "expects an integer in range [0; 255]");
280
0
                goto error;
281
0
            }
282
0
            *s++ = (unsigned char)c;
283
0
            break;
284
0
        }
285
286
0
        case 'd':
287
0
            if (longflag) {
288
0
                sprintf(buffer, "%ld", va_arg(vargs, long));
289
0
            }
290
0
            else if (size_tflag) {
291
0
                sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
292
0
            }
293
0
            else {
294
0
                sprintf(buffer, "%d", va_arg(vargs, int));
295
0
            }
296
0
            assert(strlen(buffer) < sizeof(buffer));
297
0
            WRITE_BYTES(buffer);
298
0
            break;
299
300
0
        case 'u':
301
0
            if (longflag) {
302
0
                sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
303
0
            }
304
0
            else if (size_tflag) {
305
0
                sprintf(buffer, "%zu", va_arg(vargs, size_t));
306
0
            }
307
0
            else {
308
0
                sprintf(buffer, "%u", va_arg(vargs, unsigned int));
309
0
            }
310
0
            assert(strlen(buffer) < sizeof(buffer));
311
0
            WRITE_BYTES(buffer);
312
0
            break;
313
314
0
        case 'i':
315
0
            sprintf(buffer, "%i", va_arg(vargs, int));
316
0
            assert(strlen(buffer) < sizeof(buffer));
317
0
            WRITE_BYTES(buffer);
318
0
            break;
319
320
0
        case 'x':
321
0
            sprintf(buffer, "%x", va_arg(vargs, int));
322
0
            assert(strlen(buffer) < sizeof(buffer));
323
0
            WRITE_BYTES(buffer);
324
0
            break;
325
326
0
        case 's':
327
0
        {
328
0
            Py_ssize_t i;
329
330
0
            p = va_arg(vargs, const char*);
331
0
            if (prec <= 0) {
332
0
                i = strlen(p);
333
0
            }
334
0
            else {
335
0
                i = 0;
336
0
                while (i < prec && p[i]) {
337
0
                    i++;
338
0
                }
339
0
            }
340
0
            WRITE_BYTES_LEN(p, i);
341
0
            break;
342
0
        }
343
344
0
        case 'p':
345
0
            sprintf(buffer, "%p", va_arg(vargs, void*));
346
0
            assert(strlen(buffer) < sizeof(buffer));
347
            /* %p is ill-defined:  ensure leading 0x. */
348
0
            if (buffer[1] == 'X')
349
0
                buffer[1] = 'x';
350
0
            else if (buffer[1] != 'x') {
351
0
                memmove(buffer+2, buffer, strlen(buffer)+1);
352
0
                buffer[0] = '0';
353
0
                buffer[1] = 'x';
354
0
            }
355
0
            WRITE_BYTES(buffer);
356
0
            break;
357
358
0
        case '%':
359
0
            *s++ = '%';
360
0
            break;
361
362
0
        default:
363
            /* invalid format string: copy unformatted string and exit */
364
0
            WRITE_BYTES(p);
365
0
            return s;
366
0
        }
367
0
    }
368
369
0
#undef WRITE_BYTES
370
0
#undef WRITE_BYTES_LEN
371
372
0
    return s;
373
374
0
 error:
375
0
    return NULL;
376
0
}
377
378
379
PyObject *
380
PyBytes_FromFormatV(const char *format, va_list vargs)
381
0
{
382
0
    Py_ssize_t alloc = strlen(format);
383
0
    PyBytesWriter *writer = PyBytesWriter_Create(alloc);
384
0
    if (writer == NULL) {
385
0
        return NULL;
386
0
    }
387
388
0
    char *s = bytes_fromformat(writer, 0, format, vargs);
389
0
    if (s == NULL) {
390
0
        PyBytesWriter_Discard(writer);
391
0
        return NULL;
392
0
    }
393
394
0
    return PyBytesWriter_FinishWithPointer(writer, s);
395
0
}
396
397
398
PyObject *
399
PyBytes_FromFormat(const char *format, ...)
400
0
{
401
0
    PyObject* ret;
402
0
    va_list vargs;
403
404
0
    va_start(vargs, format);
405
0
    ret = PyBytes_FromFormatV(format, vargs);
406
0
    va_end(vargs);
407
0
    return ret;
408
0
}
409
410
411
/* Helpers for formatstring */
412
413
Py_LOCAL_INLINE(PyObject *)
414
getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
415
0
{
416
0
    Py_ssize_t argidx = *p_argidx;
417
0
    if (argidx < arglen) {
418
0
        (*p_argidx)++;
419
0
        if (arglen < 0)
420
0
            return args;
421
0
        else
422
0
            return PyTuple_GetItem(args, argidx);
423
0
    }
424
0
    PyErr_SetString(PyExc_TypeError,
425
0
                    "not enough arguments for format string");
426
0
    return NULL;
427
0
}
428
429
/* Returns a new reference to a PyBytes object, or NULL on failure. */
430
431
static char*
432
formatfloat(PyObject *v, int flags, int prec, int type,
433
            PyObject **p_result, PyBytesWriter *writer, char *str)
434
0
{
435
0
    char *p;
436
0
    PyObject *result;
437
0
    double x;
438
0
    size_t len;
439
0
    int dtoa_flags = 0;
440
441
0
    x = PyFloat_AsDouble(v);
442
0
    if (x == -1.0 && PyErr_Occurred()) {
443
0
        PyErr_Format(PyExc_TypeError, "float argument required, "
444
0
                     "not %.200s", Py_TYPE(v)->tp_name);
445
0
        return NULL;
446
0
    }
447
448
0
    if (prec < 0)
449
0
        prec = 6;
450
451
0
    if (flags & F_ALT) {
452
0
        dtoa_flags |= Py_DTSF_ALT;
453
0
    }
454
0
    p = PyOS_double_to_string(x, type, prec, dtoa_flags, NULL);
455
456
0
    if (p == NULL)
457
0
        return NULL;
458
459
0
    len = strlen(p);
460
0
    if (writer != NULL) {
461
0
        str = PyBytesWriter_GrowAndUpdatePointer(writer, len, str);
462
0
        if (str == NULL) {
463
0
            PyMem_Free(p);
464
0
            return NULL;
465
0
        }
466
0
        memcpy(str, p, len);
467
0
        PyMem_Free(p);
468
0
        str += len;
469
0
        return str;
470
0
    }
471
472
0
    result = PyBytes_FromStringAndSize(p, len);
473
0
    PyMem_Free(p);
474
0
    *p_result = result;
475
0
    return result != NULL ? str : NULL;
476
0
}
477
478
static PyObject *
479
formatlong(PyObject *v, int flags, int prec, int type)
480
0
{
481
0
    PyObject *result, *iobj;
482
0
    if (PyLong_Check(v))
483
0
        return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
484
0
    if (PyNumber_Check(v)) {
485
        /* make sure number is a type of integer for o, x, and X */
486
0
        if (type == 'o' || type == 'x' || type == 'X')
487
0
            iobj = _PyNumber_Index(v);
488
0
        else
489
0
            iobj = PyNumber_Long(v);
490
0
        if (iobj != NULL) {
491
0
            assert(PyLong_Check(iobj));
492
0
            result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
493
0
            Py_DECREF(iobj);
494
0
            return result;
495
0
        }
496
0
        if (!PyErr_ExceptionMatches(PyExc_TypeError))
497
0
            return NULL;
498
0
    }
499
0
    PyErr_Format(PyExc_TypeError,
500
0
        "%%%c format: %s is required, not %.200s", type,
501
0
        (type == 'o' || type == 'x' || type == 'X') ? "an integer"
502
0
                                                    : "a real number",
503
0
        Py_TYPE(v)->tp_name);
504
0
    return NULL;
505
0
}
506
507
static int
508
byte_converter(PyObject *arg, char *p)
509
0
{
510
0
    if (PyBytes_Check(arg)) {
511
0
        if (PyBytes_GET_SIZE(arg) != 1) {
512
0
            PyErr_Format(PyExc_TypeError,
513
0
                         "%%c requires an integer in range(256) or "
514
0
                         "a single byte, not a bytes object of length %zd",
515
0
                         PyBytes_GET_SIZE(arg));
516
0
            return 0;
517
0
        }
518
0
        *p = PyBytes_AS_STRING(arg)[0];
519
0
        return 1;
520
0
    }
521
0
    else if (PyByteArray_Check(arg)) {
522
0
        if (PyByteArray_GET_SIZE(arg) != 1) {
523
0
            PyErr_Format(PyExc_TypeError,
524
0
                         "%%c requires an integer in range(256) or "
525
0
                         "a single byte, not a bytearray object of length %zd",
526
0
                         PyByteArray_GET_SIZE(arg));
527
0
            return 0;
528
0
        }
529
0
        *p = PyByteArray_AS_STRING(arg)[0];
530
0
        return 1;
531
0
    }
532
0
    else if (PyIndex_Check(arg)) {
533
0
        int overflow;
534
0
        long ival = PyLong_AsLongAndOverflow(arg, &overflow);
535
0
        if (ival == -1 && PyErr_Occurred()) {
536
0
            return 0;
537
0
        }
538
0
        if (!(0 <= ival && ival <= 255)) {
539
            /* this includes an overflow in converting to C long */
540
0
            PyErr_SetString(PyExc_OverflowError,
541
0
                            "%c arg not in range(256)");
542
0
            return 0;
543
0
        }
544
0
        *p = (char)ival;
545
0
        return 1;
546
0
    }
547
0
    PyErr_Format(PyExc_TypeError,
548
0
        "%%c requires an integer in range(256) or a single byte, not %T",
549
0
        arg);
550
0
    return 0;
551
0
}
552
553
static PyObject *_PyBytes_FromBuffer(PyObject *x);
554
555
static PyObject *
556
format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
557
0
{
558
0
    PyObject *func, *result;
559
    /* is it a bytes object? */
560
0
    if (PyBytes_Check(v)) {
561
0
        *pbuf = PyBytes_AS_STRING(v);
562
0
        *plen = PyBytes_GET_SIZE(v);
563
0
        return Py_NewRef(v);
564
0
    }
565
0
    if (PyByteArray_Check(v)) {
566
0
        *pbuf = PyByteArray_AS_STRING(v);
567
0
        *plen = PyByteArray_GET_SIZE(v);
568
0
        return Py_NewRef(v);
569
0
    }
570
    /* does it support __bytes__? */
571
0
    func = _PyObject_LookupSpecial(v, &_Py_ID(__bytes__));
572
0
    if (func != NULL) {
573
0
        result = _PyObject_CallNoArgs(func);
574
0
        Py_DECREF(func);
575
0
        if (result == NULL)
576
0
            return NULL;
577
0
        if (!PyBytes_Check(result)) {
578
0
            PyErr_Format(PyExc_TypeError,
579
0
                         "%T.__bytes__() must return a bytes, not %T",
580
0
                         v, result);
581
0
            Py_DECREF(result);
582
0
            return NULL;
583
0
        }
584
0
        *pbuf = PyBytes_AS_STRING(result);
585
0
        *plen = PyBytes_GET_SIZE(result);
586
0
        return result;
587
0
    }
588
    /* does it support buffer protocol? */
589
0
    if (PyObject_CheckBuffer(v)) {
590
        /* maybe we can avoid making a copy of the buffer object here? */
591
0
        result = _PyBytes_FromBuffer(v);
592
0
        if (result == NULL)
593
0
            return NULL;
594
0
        *pbuf = PyBytes_AS_STRING(result);
595
0
        *plen = PyBytes_GET_SIZE(result);
596
0
        return result;
597
0
    }
598
0
    PyErr_Format(PyExc_TypeError,
599
0
                 "%%b requires a bytes-like object, "
600
0
                 "or an object that implements __bytes__, not '%.100s'",
601
0
                 Py_TYPE(v)->tp_name);
602
0
    return NULL;
603
0
}
604
605
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
606
607
PyObject *
608
_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
609
                  PyObject *args, int use_bytearray)
610
0
{
611
0
    const char *fmt;
612
0
    Py_ssize_t arglen, argidx;
613
0
    Py_ssize_t fmtcnt;
614
0
    int args_owned = 0;
615
0
    PyObject *dict = NULL;
616
617
0
    if (args == NULL) {
618
0
        PyErr_BadInternalCall();
619
0
        return NULL;
620
0
    }
621
0
    fmt = format;
622
0
    fmtcnt = format_len;
623
624
0
    PyBytesWriter *writer;
625
0
    if (use_bytearray) {
626
0
        writer = _PyBytesWriter_CreateByteArray(fmtcnt);
627
0
    }
628
0
    else {
629
0
        writer = PyBytesWriter_Create(fmtcnt);
630
0
    }
631
0
    if (writer == NULL) {
632
0
        return NULL;
633
0
    }
634
0
    char *res = PyBytesWriter_GetData(writer);
635
636
0
    if (PyTuple_Check(args)) {
637
0
        arglen = PyTuple_GET_SIZE(args);
638
0
        argidx = 0;
639
0
    }
640
0
    else {
641
0
        arglen = -1;
642
0
        argidx = -2;
643
0
    }
644
0
    if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
645
0
        !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
646
0
        !PyByteArray_Check(args)) {
647
0
            dict = args;
648
0
    }
649
650
0
    while (--fmtcnt >= 0) {
651
0
        if (*fmt != '%') {
652
0
            Py_ssize_t len;
653
0
            char *pos;
654
655
0
            pos = (char *)memchr(fmt + 1, '%', fmtcnt);
656
0
            if (pos != NULL)
657
0
                len = pos - fmt;
658
0
            else
659
0
                len = fmtcnt + 1;
660
0
            assert(len != 0);
661
662
0
            memcpy(res, fmt, len);
663
0
            res += len;
664
0
            fmt += len;
665
0
            fmtcnt -= (len - 1);
666
0
        }
667
0
        else {
668
            /* Got a format specifier */
669
0
            int flags = 0;
670
0
            Py_ssize_t width = -1;
671
0
            int prec = -1;
672
0
            int c = '\0';
673
0
            int fill;
674
0
            PyObject *v = NULL;
675
0
            PyObject *temp = NULL;
676
0
            const char *pbuf = NULL;
677
0
            int sign;
678
0
            Py_ssize_t len = 0;
679
0
            char onechar; /* For byte_converter() */
680
0
            Py_ssize_t alloc;
681
682
0
            fmt++;
683
0
            if (*fmt == '%') {
684
0
                *res++ = '%';
685
0
                fmt++;
686
0
                fmtcnt--;
687
0
                continue;
688
0
            }
689
0
            if (*fmt == '(') {
690
0
                const char *keystart;
691
0
                Py_ssize_t keylen;
692
0
                PyObject *key;
693
0
                int pcount = 1;
694
695
0
                if (dict == NULL) {
696
0
                    PyErr_SetString(PyExc_TypeError,
697
0
                             "format requires a mapping");
698
0
                    goto error;
699
0
                }
700
0
                ++fmt;
701
0
                --fmtcnt;
702
0
                keystart = fmt;
703
                /* Skip over balanced parentheses */
704
0
                while (pcount > 0 && --fmtcnt >= 0) {
705
0
                    if (*fmt == ')')
706
0
                        --pcount;
707
0
                    else if (*fmt == '(')
708
0
                        ++pcount;
709
0
                    fmt++;
710
0
                }
711
0
                keylen = fmt - keystart - 1;
712
0
                if (fmtcnt < 0 || pcount > 0) {
713
0
                    PyErr_SetString(PyExc_ValueError,
714
0
                               "incomplete format key");
715
0
                    goto error;
716
0
                }
717
0
                key = PyBytes_FromStringAndSize(keystart,
718
0
                                                 keylen);
719
0
                if (key == NULL)
720
0
                    goto error;
721
0
                if (args_owned) {
722
0
                    Py_DECREF(args);
723
0
                    args_owned = 0;
724
0
                }
725
0
                args = PyObject_GetItem(dict, key);
726
0
                Py_DECREF(key);
727
0
                if (args == NULL) {
728
0
                    goto error;
729
0
                }
730
0
                args_owned = 1;
731
0
                arglen = -1;
732
0
                argidx = -2;
733
0
            }
734
735
            /* Parse flags. Example: "%+i" => flags=F_SIGN. */
736
0
            while (--fmtcnt >= 0) {
737
0
                switch (c = *fmt++) {
738
0
                case '-': flags |= F_LJUST; continue;
739
0
                case '+': flags |= F_SIGN; continue;
740
0
                case ' ': flags |= F_BLANK; continue;
741
0
                case '#': flags |= F_ALT; continue;
742
0
                case '0': flags |= F_ZERO; continue;
743
0
                }
744
0
                break;
745
0
            }
746
747
            /* Parse width. Example: "%10s" => width=10 */
748
0
            if (c == '*') {
749
0
                v = getnextarg(args, arglen, &argidx);
750
0
                if (v == NULL)
751
0
                    goto error;
752
0
                if (!PyLong_Check(v)) {
753
0
                    PyErr_SetString(PyExc_TypeError,
754
0
                                    "* wants int");
755
0
                    goto error;
756
0
                }
757
0
                width = PyLong_AsSsize_t(v);
758
0
                if (width == -1 && PyErr_Occurred())
759
0
                    goto error;
760
0
                if (width < 0) {
761
0
                    flags |= F_LJUST;
762
0
                    width = -width;
763
0
                }
764
0
                if (--fmtcnt >= 0)
765
0
                    c = *fmt++;
766
0
            }
767
0
            else if (c >= 0 && Py_ISDIGIT(c)) {
768
0
                width = c - '0';
769
0
                while (--fmtcnt >= 0) {
770
0
                    c = Py_CHARMASK(*fmt++);
771
0
                    if (!Py_ISDIGIT(c))
772
0
                        break;
773
0
                    if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
774
0
                        PyErr_SetString(
775
0
                            PyExc_ValueError,
776
0
                            "width too big");
777
0
                        goto error;
778
0
                    }
779
0
                    width = width*10 + (c - '0');
780
0
                }
781
0
            }
782
783
            /* Parse precision. Example: "%.3f" => prec=3 */
784
0
            if (c == '.') {
785
0
                prec = 0;
786
0
                if (--fmtcnt >= 0)
787
0
                    c = *fmt++;
788
0
                if (c == '*') {
789
0
                    v = getnextarg(args, arglen, &argidx);
790
0
                    if (v == NULL)
791
0
                        goto error;
792
0
                    if (!PyLong_Check(v)) {
793
0
                        PyErr_SetString(
794
0
                            PyExc_TypeError,
795
0
                            "* wants int");
796
0
                        goto error;
797
0
                    }
798
0
                    prec = PyLong_AsInt(v);
799
0
                    if (prec == -1 && PyErr_Occurred())
800
0
                        goto error;
801
0
                    if (prec < 0)
802
0
                        prec = 0;
803
0
                    if (--fmtcnt >= 0)
804
0
                        c = *fmt++;
805
0
                }
806
0
                else if (c >= 0 && Py_ISDIGIT(c)) {
807
0
                    prec = c - '0';
808
0
                    while (--fmtcnt >= 0) {
809
0
                        c = Py_CHARMASK(*fmt++);
810
0
                        if (!Py_ISDIGIT(c))
811
0
                            break;
812
0
                        if (prec > (INT_MAX - ((int)c - '0')) / 10) {
813
0
                            PyErr_SetString(
814
0
                                PyExc_ValueError,
815
0
                                "prec too big");
816
0
                            goto error;
817
0
                        }
818
0
                        prec = prec*10 + (c - '0');
819
0
                    }
820
0
                }
821
0
            } /* prec */
822
0
            if (fmtcnt >= 0) {
823
0
                if (c == 'h' || c == 'l' || c == 'L') {
824
0
                    if (--fmtcnt >= 0)
825
0
                        c = *fmt++;
826
0
                }
827
0
            }
828
0
            if (fmtcnt < 0) {
829
0
                PyErr_SetString(PyExc_ValueError,
830
0
                                "incomplete format");
831
0
                goto error;
832
0
            }
833
0
            v = getnextarg(args, arglen, &argidx);
834
0
            if (v == NULL)
835
0
                goto error;
836
837
0
            if (fmtcnt == 0) {
838
                /* last write: disable writer overallocation */
839
0
                writer->overallocate = 0;
840
0
            }
841
842
0
            sign = 0;
843
0
            fill = ' ';
844
0
            switch (c) {
845
0
            case 'r':
846
                // %r is only for 2/3 code; 3 only code should use %a
847
0
            case 'a':
848
0
                temp = PyObject_ASCII(v);
849
0
                if (temp == NULL)
850
0
                    goto error;
851
0
                assert(PyUnicode_IS_ASCII(temp));
852
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
853
0
                len = PyUnicode_GET_LENGTH(temp);
854
0
                if (prec >= 0 && len > prec)
855
0
                    len = prec;
856
0
                break;
857
858
0
            case 's':
859
                // %s is only for 2/3 code; 3 only code should use %b
860
0
            case 'b':
861
0
                temp = format_obj(v, &pbuf, &len);
862
0
                if (temp == NULL)
863
0
                    goto error;
864
0
                if (prec >= 0 && len > prec)
865
0
                    len = prec;
866
0
                break;
867
868
0
            case 'i':
869
0
            case 'd':
870
0
            case 'u':
871
0
            case 'o':
872
0
            case 'x':
873
0
            case 'X':
874
0
                if (PyLong_CheckExact(v)
875
0
                    && width == -1 && prec == -1
876
0
                    && !(flags & (F_SIGN | F_BLANK))
877
0
                    && c != 'X')
878
0
                {
879
                    /* Fast path */
880
0
                    int alternate = flags & F_ALT;
881
0
                    int base;
882
883
0
                    switch(c)
884
0
                    {
885
0
                        default:
886
0
                            Py_UNREACHABLE();
887
0
                        case 'd':
888
0
                        case 'i':
889
0
                        case 'u':
890
0
                            base = 10;
891
0
                            break;
892
0
                        case 'o':
893
0
                            base = 8;
894
0
                            break;
895
0
                        case 'x':
896
0
                        case 'X':
897
0
                            base = 16;
898
0
                            break;
899
0
                    }
900
901
                    /* Fast path */
902
0
                    res = _PyLong_FormatBytesWriter(writer, res,
903
0
                                                    v, base, alternate);
904
0
                    if (res == NULL)
905
0
                        goto error;
906
0
                    continue;
907
0
                }
908
909
0
                temp = formatlong(v, flags, prec, c);
910
0
                if (!temp)
911
0
                    goto error;
912
0
                assert(PyUnicode_IS_ASCII(temp));
913
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
914
0
                len = PyUnicode_GET_LENGTH(temp);
915
0
                sign = 1;
916
0
                if (flags & F_ZERO)
917
0
                    fill = '0';
918
0
                break;
919
920
0
            case 'e':
921
0
            case 'E':
922
0
            case 'f':
923
0
            case 'F':
924
0
            case 'g':
925
0
            case 'G':
926
0
                if (width == -1 && prec == -1
927
0
                    && !(flags & (F_SIGN | F_BLANK)))
928
0
                {
929
                    /* Fast path */
930
0
                    res = formatfloat(v, flags, prec, c, NULL, writer, res);
931
0
                    if (res == NULL)
932
0
                        goto error;
933
0
                    continue;
934
0
                }
935
936
0
                if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
937
0
                    goto error;
938
0
                pbuf = PyBytes_AS_STRING(temp);
939
0
                len = PyBytes_GET_SIZE(temp);
940
0
                sign = 1;
941
0
                if (flags & F_ZERO)
942
0
                    fill = '0';
943
0
                break;
944
945
0
            case 'c':
946
0
                pbuf = &onechar;
947
0
                len = byte_converter(v, &onechar);
948
0
                if (!len)
949
0
                    goto error;
950
0
                if (width == -1) {
951
                    /* Fast path */
952
0
                    *res++ = onechar;
953
0
                    continue;
954
0
                }
955
0
                break;
956
957
0
            default:
958
0
                PyErr_Format(PyExc_ValueError,
959
0
                  "unsupported format character '%c' (0x%x) "
960
0
                  "at index %zd",
961
0
                  c, c,
962
0
                  (Py_ssize_t)(fmt - 1 - format));
963
0
                goto error;
964
0
            }
965
966
0
            if (sign) {
967
0
                if (*pbuf == '-' || *pbuf == '+') {
968
0
                    sign = *pbuf++;
969
0
                    len--;
970
0
                }
971
0
                else if (flags & F_SIGN)
972
0
                    sign = '+';
973
0
                else if (flags & F_BLANK)
974
0
                    sign = ' ';
975
0
                else
976
0
                    sign = 0;
977
0
            }
978
0
            if (width < len)
979
0
                width = len;
980
981
0
            alloc = width;
982
0
            if (sign != 0 && len == width)
983
0
                alloc++;
984
            /* 2: size preallocated for %s */
985
0
            if (alloc > 2) {
986
0
                res = PyBytesWriter_GrowAndUpdatePointer(writer, alloc - 2, res);
987
0
                if (res == NULL) {
988
0
                    Py_XDECREF(temp);
989
0
                    goto error;
990
0
                }
991
0
            }
992
0
#ifndef NDEBUG
993
0
            char *before = res;
994
0
#endif
995
996
            /* Write the sign if needed */
997
0
            if (sign) {
998
0
                if (fill != ' ')
999
0
                    *res++ = sign;
1000
0
                if (width > len)
1001
0
                    width--;
1002
0
            }
1003
1004
            /* Write the numeric prefix for "x", "X" and "o" formats
1005
               if the alternate form is used.
1006
               For example, write "0x" for the "%#x" format. */
1007
0
            if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1008
0
                assert(pbuf[0] == '0');
1009
0
                assert(pbuf[1] == c);
1010
0
                if (fill != ' ') {
1011
0
                    *res++ = *pbuf++;
1012
0
                    *res++ = *pbuf++;
1013
0
                }
1014
0
                width -= 2;
1015
0
                if (width < 0)
1016
0
                    width = 0;
1017
0
                len -= 2;
1018
0
            }
1019
1020
            /* Pad left with the fill character if needed */
1021
0
            if (width > len && !(flags & F_LJUST)) {
1022
0
                memset(res, fill, width - len);
1023
0
                res += (width - len);
1024
0
                width = len;
1025
0
            }
1026
1027
            /* If padding with spaces: write sign if needed and/or numeric
1028
               prefix if the alternate form is used */
1029
0
            if (fill == ' ') {
1030
0
                if (sign)
1031
0
                    *res++ = sign;
1032
0
                if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1033
0
                    assert(pbuf[0] == '0');
1034
0
                    assert(pbuf[1] == c);
1035
0
                    *res++ = *pbuf++;
1036
0
                    *res++ = *pbuf++;
1037
0
                }
1038
0
            }
1039
1040
            /* Copy bytes */
1041
0
            memcpy(res, pbuf, len);
1042
0
            res += len;
1043
1044
            /* Pad right with the fill character if needed */
1045
0
            if (width > len) {
1046
0
                memset(res, ' ', width - len);
1047
0
                res += (width - len);
1048
0
            }
1049
1050
0
            if (dict && (argidx < arglen)) {
1051
0
                PyErr_SetString(PyExc_TypeError,
1052
0
                           "not all arguments converted during bytes formatting");
1053
0
                Py_XDECREF(temp);
1054
0
                goto error;
1055
0
            }
1056
0
            Py_XDECREF(temp);
1057
1058
0
#ifndef NDEBUG
1059
            /* check that we computed the exact size for this write */
1060
0
            assert((res - before) == alloc);
1061
0
#endif
1062
0
        } /* '%' */
1063
1064
        /* If overallocation was disabled, ensure that it was the last
1065
           write. Otherwise, we missed an optimization */
1066
0
        assert(writer->overallocate || fmtcnt == 0 || use_bytearray);
1067
0
    } /* until end */
1068
1069
0
    if (argidx < arglen && !dict) {
1070
0
        PyErr_SetString(PyExc_TypeError,
1071
0
                        "not all arguments converted during bytes formatting");
1072
0
        goto error;
1073
0
    }
1074
1075
0
    if (args_owned) {
1076
0
        Py_DECREF(args);
1077
0
    }
1078
0
    return PyBytesWriter_FinishWithPointer(writer, res);
1079
1080
0
 error:
1081
0
    PyBytesWriter_Discard(writer);
1082
0
    if (args_owned) {
1083
0
        Py_DECREF(args);
1084
0
    }
1085
0
    return NULL;
1086
0
}
1087
1088
/* Unescape a backslash-escaped string. */
1089
PyObject *_PyBytes_DecodeEscape2(const char *s,
1090
                                Py_ssize_t len,
1091
                                const char *errors,
1092
                                int *first_invalid_escape_char,
1093
                                const char **first_invalid_escape_ptr)
1094
5.55k
{
1095
5.55k
    PyBytesWriter *writer = PyBytesWriter_Create(len);
1096
5.55k
    if (writer == NULL) {
1097
0
        return NULL;
1098
0
    }
1099
5.55k
    char *p = PyBytesWriter_GetData(writer);
1100
1101
5.55k
    *first_invalid_escape_char = -1;
1102
5.55k
    *first_invalid_escape_ptr = NULL;
1103
1104
5.55k
    const char *end = s + len;
1105
1.25M
    while (s < end) {
1106
1.25M
        if (*s != '\\') {
1107
1.22M
            *p++ = *s++;
1108
1.22M
            continue;
1109
1.22M
        }
1110
1111
21.8k
        s++;
1112
21.8k
        if (s == end) {
1113
0
            PyErr_SetString(PyExc_ValueError,
1114
0
                            "Trailing \\ in string");
1115
0
            goto failed;
1116
0
        }
1117
1118
21.8k
        switch (*s++) {
1119
        /* XXX This assumes ASCII! */
1120
764
        case '\n': break;
1121
2.03k
        case '\\': *p++ = '\\'; break;
1122
313
        case '\'': *p++ = '\''; break;
1123
502
        case '\"': *p++ = '\"'; break;
1124
112
        case 'b': *p++ = '\b'; break;
1125
396
        case 'f': *p++ = '\014'; break; /* FF */
1126
320
        case 't': *p++ = '\t'; break;
1127
730
        case 'n': *p++ = '\n'; break;
1128
2.19k
        case 'r': *p++ = '\r'; break;
1129
114
        case 'v': *p++ = '\013'; break; /* VT */
1130
859
        case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1131
3.56k
        case '0': case '1': case '2': case '3':
1132
8.79k
        case '4': case '5': case '6': case '7':
1133
8.79k
        {
1134
8.79k
            int c = s[-1] - '0';
1135
8.79k
            if (s < end && '0' <= *s && *s <= '7') {
1136
5.58k
                c = (c<<3) + *s++ - '0';
1137
5.58k
                if (s < end && '0' <= *s && *s <= '7')
1138
3.00k
                    c = (c<<3) + *s++ - '0';
1139
5.58k
            }
1140
8.79k
            if (c > 0377) {
1141
2.78k
                if (*first_invalid_escape_char == -1) {
1142
2.08k
                    *first_invalid_escape_char = c;
1143
                    /* Back up 3 chars, since we've already incremented s. */
1144
2.08k
                    *first_invalid_escape_ptr = s - 3;
1145
2.08k
                }
1146
2.78k
            }
1147
8.79k
            *p++ = c;
1148
8.79k
            break;
1149
7.18k
        }
1150
421
        case 'x':
1151
421
            if (s+1 < end) {
1152
419
                int digit1, digit2;
1153
419
                digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1154
419
                digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1155
419
                if (digit1 < 16 && digit2 < 16) {
1156
416
                    *p++ = (unsigned char)((digit1 << 4) + digit2);
1157
416
                    s += 2;
1158
416
                    break;
1159
416
                }
1160
419
            }
1161
            /* invalid hexadecimal digits */
1162
1163
5
            if (!errors || strcmp(errors, "strict") == 0) {
1164
5
                PyErr_Format(PyExc_ValueError,
1165
5
                             "invalid \\x escape at position %zd",
1166
5
                             s - 2 - (end - len));
1167
5
                goto failed;
1168
5
            }
1169
0
            if (strcmp(errors, "replace") == 0) {
1170
0
                *p++ = '?';
1171
0
            } else if (strcmp(errors, "ignore") == 0)
1172
0
                /* do nothing */;
1173
0
            else {
1174
0
                PyErr_Format(PyExc_ValueError,
1175
0
                             "decoding error; unknown "
1176
0
                             "error handling code: %.400s",
1177
0
                             errors);
1178
0
                goto failed;
1179
0
            }
1180
            /* skip \x */
1181
0
            if (s < end && Py_ISXDIGIT(s[0]))
1182
0
                s++; /* and a hexdigit */
1183
0
            break;
1184
1185
4.26k
        default:
1186
4.26k
            if (*first_invalid_escape_char == -1) {
1187
1.48k
                *first_invalid_escape_char = (unsigned char)s[-1];
1188
                /* Back up one char, since we've already incremented s. */
1189
1.48k
                *first_invalid_escape_ptr = s - 1;
1190
1.48k
            }
1191
4.26k
            *p++ = '\\';
1192
4.26k
            s--;
1193
21.8k
        }
1194
21.8k
    }
1195
1196
5.54k
    return PyBytesWriter_FinishWithPointer(writer, p);
1197
1198
5
  failed:
1199
5
    PyBytesWriter_Discard(writer);
1200
5
    return NULL;
1201
5.55k
}
1202
1203
PyObject *PyBytes_DecodeEscape(const char *s,
1204
                                Py_ssize_t len,
1205
                                const char *errors,
1206
                                Py_ssize_t Py_UNUSED(unicode),
1207
                                const char *Py_UNUSED(recode_encoding))
1208
0
{
1209
0
    int first_invalid_escape_char;
1210
0
    const char *first_invalid_escape_ptr;
1211
0
    PyObject *result = _PyBytes_DecodeEscape2(s, len, errors,
1212
0
                                             &first_invalid_escape_char,
1213
0
                                             &first_invalid_escape_ptr);
1214
0
    if (result == NULL)
1215
0
        return NULL;
1216
0
    if (first_invalid_escape_char != -1) {
1217
0
        if (first_invalid_escape_char > 0xff) {
1218
0
            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1219
0
                                 "b\"\\%o\" is an invalid octal escape sequence. "
1220
0
                                 "Such sequences will not work in the future. ",
1221
0
                                 first_invalid_escape_char) < 0)
1222
0
            {
1223
0
                Py_DECREF(result);
1224
0
                return NULL;
1225
0
            }
1226
0
        }
1227
0
        else {
1228
0
            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1229
0
                                 "b\"\\%c\" is an invalid escape sequence. "
1230
0
                                 "Such sequences will not work in the future. ",
1231
0
                                 first_invalid_escape_char) < 0)
1232
0
            {
1233
0
                Py_DECREF(result);
1234
0
                return NULL;
1235
0
            }
1236
0
        }
1237
0
    }
1238
0
    return result;
1239
0
}
1240
/* -------------------------------------------------------------------- */
1241
/* object api */
1242
1243
Py_ssize_t
1244
PyBytes_Size(PyObject *op)
1245
10.3k
{
1246
10.3k
    if (!PyBytes_Check(op)) {
1247
0
        PyErr_Format(PyExc_TypeError,
1248
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1249
0
        return -1;
1250
0
    }
1251
10.3k
    return Py_SIZE(op);
1252
10.3k
}
1253
1254
char *
1255
PyBytes_AsString(PyObject *op)
1256
5.61M
{
1257
5.61M
    if (!PyBytes_Check(op)) {
1258
0
        PyErr_Format(PyExc_TypeError,
1259
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1260
0
        return NULL;
1261
0
    }
1262
5.61M
    return ((PyBytesObject *)op)->ob_sval;
1263
5.61M
}
1264
1265
int
1266
PyBytes_AsStringAndSize(PyObject *obj,
1267
                         char **s,
1268
                         Py_ssize_t *len)
1269
107k
{
1270
107k
    if (s == NULL) {
1271
0
        PyErr_BadInternalCall();
1272
0
        return -1;
1273
0
    }
1274
1275
107k
    if (!PyBytes_Check(obj)) {
1276
0
        PyErr_Format(PyExc_TypeError,
1277
0
             "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1278
0
        return -1;
1279
0
    }
1280
1281
107k
    *s = PyBytes_AS_STRING(obj);
1282
107k
    if (len != NULL)
1283
107k
        *len = PyBytes_GET_SIZE(obj);
1284
0
    else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1285
0
        PyErr_SetString(PyExc_ValueError,
1286
0
                        "embedded null byte");
1287
0
        return -1;
1288
0
    }
1289
107k
    return 0;
1290
107k
}
1291
1292
/* -------------------------------------------------------------------- */
1293
/* Methods */
1294
1295
0
#define STRINGLIB_GET_EMPTY() bytes_get_empty()
1296
1297
#include "stringlib/stringdefs.h"
1298
#define STRINGLIB_MUTABLE 0
1299
1300
#include "stringlib/fastsearch.h"
1301
#include "stringlib/count.h"
1302
#include "stringlib/find.h"
1303
#include "stringlib/join.h"
1304
#include "stringlib/partition.h"
1305
#include "stringlib/split.h"
1306
#include "stringlib/ctype.h"
1307
1308
#include "stringlib/transmogrify.h"
1309
1310
#undef STRINGLIB_GET_EMPTY
1311
1312
Py_ssize_t
1313
_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
1314
              const char *needle, Py_ssize_t len_needle,
1315
              Py_ssize_t offset)
1316
0
{
1317
0
    assert(len_haystack >= 0);
1318
0
    assert(len_needle >= 0);
1319
    // Extra checks because stringlib_find accesses haystack[len_haystack].
1320
0
    if (len_needle == 0) {
1321
0
        return offset;
1322
0
    }
1323
0
    if (len_needle > len_haystack) {
1324
0
        return -1;
1325
0
    }
1326
0
    assert(len_haystack >= 1);
1327
0
    Py_ssize_t res = stringlib_find(haystack, len_haystack - 1,
1328
0
                                    needle, len_needle, offset);
1329
0
    if (res == -1) {
1330
0
        Py_ssize_t last_align = len_haystack - len_needle;
1331
0
        if (memcmp(haystack + last_align, needle, len_needle) == 0) {
1332
0
            return offset + last_align;
1333
0
        }
1334
0
    }
1335
0
    return res;
1336
0
}
1337
1338
Py_ssize_t
1339
_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
1340
                     const char *needle, Py_ssize_t len_needle,
1341
                     Py_ssize_t offset)
1342
0
{
1343
0
    return stringlib_rfind(haystack, len_haystack,
1344
0
                           needle, len_needle, offset);
1345
0
}
1346
1347
PyObject *
1348
PyBytes_Repr(PyObject *obj, int smartquotes)
1349
3.42k
{
1350
3.42k
    return _Py_bytes_repr(PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
1351
3.42k
                          smartquotes, "bytes");
1352
3.42k
}
1353
1354
PyObject *
1355
_Py_bytes_repr(const char *data, Py_ssize_t length, int smartquotes,
1356
               const char *classname)
1357
3.42k
{
1358
3.42k
    Py_ssize_t i;
1359
3.42k
    Py_ssize_t newsize, squotes, dquotes;
1360
3.42k
    PyObject *v;
1361
3.42k
    unsigned char quote;
1362
3.42k
    Py_UCS1 *p;
1363
1364
    /* Compute size of output string */
1365
3.42k
    squotes = dquotes = 0;
1366
3.42k
    newsize = 3; /* b'' */
1367
47.9M
    for (i = 0; i < length; i++) {
1368
47.9M
        unsigned char c = data[i];
1369
47.9M
        Py_ssize_t incr = 1;
1370
47.9M
        switch(c) {
1371
347k
        case '\'': squotes++; break;
1372
1.17M
        case '"':  dquotes++; break;
1373
101k
        case '\\': case '\t': case '\n': case '\r':
1374
101k
            incr = 2; break; /* \C */
1375
46.3M
        default:
1376
46.3M
            if (c < ' ' || c >= 0x7f)
1377
29.7M
                incr = 4; /* \xHH */
1378
47.9M
        }
1379
47.9M
        if (newsize > PY_SSIZE_T_MAX - incr)
1380
0
            goto overflow;
1381
47.9M
        newsize += incr;
1382
47.9M
    }
1383
3.42k
    quote = '\'';
1384
3.42k
    if (smartquotes && squotes && !dquotes)
1385
100
        quote = '"';
1386
3.42k
    if (squotes && quote == '\'') {
1387
511
        if (newsize > PY_SSIZE_T_MAX - squotes)
1388
0
            goto overflow;
1389
511
        newsize += squotes;
1390
511
    }
1391
1392
3.42k
    v = PyUnicode_New(newsize, 127);
1393
3.42k
    if (v == NULL) {
1394
0
        return NULL;
1395
0
    }
1396
3.42k
    p = PyUnicode_1BYTE_DATA(v);
1397
1398
3.42k
    *p++ = 'b', *p++ = quote;
1399
47.9M
    for (i = 0; i < length; i++) {
1400
47.9M
        unsigned char c = data[i];
1401
47.9M
        if (c == quote || c == '\\')
1402
362k
            *p++ = '\\', *p++ = c;
1403
47.6M
        else if (c == '\t')
1404
4.57k
            *p++ = '\\', *p++ = 't';
1405
47.5M
        else if (c == '\n')
1406
76.8k
            *p++ = '\\', *p++ = 'n';
1407
47.5M
        else if (c == '\r')
1408
4.75k
            *p++ = '\\', *p++ = 'r';
1409
47.5M
        else if (c < ' ' || c >= 0x7f) {
1410
29.7M
            *p++ = '\\';
1411
29.7M
            *p++ = 'x';
1412
29.7M
            *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1413
29.7M
            *p++ = Py_hexdigits[c & 0xf];
1414
29.7M
        }
1415
17.7M
        else
1416
17.7M
            *p++ = c;
1417
47.9M
    }
1418
3.42k
    *p++ = quote;
1419
3.42k
    assert(_PyUnicode_CheckConsistency(v, 1));
1420
3.42k
    return v;
1421
1422
0
  overflow:
1423
0
    PyErr_Format(PyExc_OverflowError,
1424
0
                 "%s object is too large to make repr", classname);
1425
0
    return NULL;
1426
3.42k
}
1427
1428
static PyObject *
1429
bytes_repr(PyObject *op)
1430
3.42k
{
1431
3.42k
    return PyBytes_Repr(op, 1);
1432
3.42k
}
1433
1434
static PyObject *
1435
bytes_str(PyObject *op)
1436
0
{
1437
0
    if (_Py_GetConfig()->bytes_warning) {
1438
0
        if (PyErr_WarnEx(PyExc_BytesWarning,
1439
0
                         "str() on a bytes instance", 1)) {
1440
0
            return NULL;
1441
0
        }
1442
0
    }
1443
0
    return bytes_repr(op);
1444
0
}
1445
1446
static Py_ssize_t
1447
bytes_length(PyObject *self)
1448
267k
{
1449
267k
    PyBytesObject *a = _PyBytes_CAST(self);
1450
267k
    return Py_SIZE(a);
1451
267k
}
1452
1453
/* This is also used by PyBytes_Concat() */
1454
static PyObject *
1455
bytes_concat(PyObject *a, PyObject *b)
1456
19.6k
{
1457
19.6k
    Py_buffer va, vb;
1458
19.6k
    PyObject *result = NULL;
1459
1460
19.6k
    va.len = -1;
1461
19.6k
    vb.len = -1;
1462
19.6k
    if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1463
19.6k
        PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1464
501
        PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1465
501
                     Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1466
501
        goto done;
1467
501
    }
1468
1469
    /* Optimize end cases */
1470
19.1k
    if (va.len == 0 && PyBytes_CheckExact(b)) {
1471
4
        result = Py_NewRef(b);
1472
4
        goto done;
1473
4
    }
1474
19.1k
    if (vb.len == 0 && PyBytes_CheckExact(a)) {
1475
3
        result = Py_NewRef(a);
1476
3
        goto done;
1477
3
    }
1478
1479
19.1k
    if (va.len > PY_SSIZE_T_MAX - vb.len) {
1480
0
        PyErr_NoMemory();
1481
0
        goto done;
1482
0
    }
1483
1484
19.1k
    result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1485
19.1k
    if (result != NULL) {
1486
19.1k
        memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1487
19.1k
        memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1488
19.1k
    }
1489
1490
19.6k
  done:
1491
19.6k
    if (va.len != -1)
1492
19.6k
        PyBuffer_Release(&va);
1493
19.6k
    if (vb.len != -1)
1494
19.1k
        PyBuffer_Release(&vb);
1495
19.6k
    return result;
1496
19.1k
}
1497
1498
static PyObject *
1499
bytes_repeat(PyObject *self, Py_ssize_t n)
1500
8.04k
{
1501
8.04k
    PyBytesObject *a = _PyBytes_CAST(self);
1502
8.04k
    if (n < 0)
1503
2.24k
        n = 0;
1504
    /* watch out for overflows:  the size can overflow int,
1505
     * and the # of bytes needed can overflow size_t
1506
     */
1507
8.04k
    if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1508
0
        PyErr_SetString(PyExc_OverflowError,
1509
0
            "repeated bytes are too long");
1510
0
        return NULL;
1511
0
    }
1512
8.04k
    Py_ssize_t size = Py_SIZE(a) * n;
1513
8.04k
    if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1514
5.13k
        return Py_NewRef(a);
1515
5.13k
    }
1516
2.91k
    size_t nbytes = (size_t)size;
1517
2.91k
    if (nbytes + PyBytesObject_SIZE <= nbytes) {
1518
0
        PyErr_SetString(PyExc_OverflowError,
1519
0
            "repeated bytes are too long");
1520
0
        return NULL;
1521
0
    }
1522
2.91k
    PyBytesObject *op = PyObject_Malloc(PyBytesObject_SIZE + nbytes);
1523
2.91k
    if (op == NULL) {
1524
0
        return PyErr_NoMemory();
1525
0
    }
1526
2.91k
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
1527
2.91k
    set_ob_shash(op, -1);
1528
2.91k
    op->ob_sval[size] = '\0';
1529
1530
2.91k
    _PyBytes_Repeat(op->ob_sval, size, a->ob_sval, Py_SIZE(a));
1531
1532
2.91k
    return (PyObject *) op;
1533
2.91k
}
1534
1535
static int
1536
bytes_contains(PyObject *self, PyObject *arg)
1537
3.28k
{
1538
3.28k
    return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1539
3.28k
}
1540
1541
static PyObject *
1542
bytes_item(PyObject *self, Py_ssize_t i)
1543
0
{
1544
0
    PyBytesObject *a = _PyBytes_CAST(self);
1545
0
    if (i < 0 || i >= Py_SIZE(a)) {
1546
0
        PyErr_SetString(PyExc_IndexError, "index out of range");
1547
0
        return NULL;
1548
0
    }
1549
0
    return _PyLong_FromUnsignedChar((unsigned char)a->ob_sval[i]);
1550
0
}
1551
1552
static int
1553
bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1554
9.43M
{
1555
9.43M
    int cmp;
1556
9.43M
    Py_ssize_t len;
1557
1558
9.43M
    len = Py_SIZE(a);
1559
9.43M
    if (Py_SIZE(b) != len)
1560
0
        return 0;
1561
1562
9.43M
    if (a->ob_sval[0] != b->ob_sval[0])
1563
0
        return 0;
1564
1565
9.43M
    cmp = memcmp(a->ob_sval, b->ob_sval, len);
1566
9.43M
    return (cmp == 0);
1567
9.43M
}
1568
1569
static PyObject*
1570
bytes_richcompare(PyObject *aa, PyObject *bb, int op)
1571
9.44M
{
1572
    /* Make sure both arguments are strings. */
1573
9.44M
    if (!(PyBytes_Check(aa) && PyBytes_Check(bb))) {
1574
762
        if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1575
0
            if (PyUnicode_Check(aa) || PyUnicode_Check(bb)) {
1576
0
                if (PyErr_WarnEx(PyExc_BytesWarning,
1577
0
                                 "Comparison between bytes and string", 1))
1578
0
                    return NULL;
1579
0
            }
1580
0
            if (PyLong_Check(aa) || PyLong_Check(bb)) {
1581
0
                if (PyErr_WarnEx(PyExc_BytesWarning,
1582
0
                                 "Comparison between bytes and int", 1))
1583
0
                    return NULL;
1584
0
            }
1585
0
        }
1586
762
        Py_RETURN_NOTIMPLEMENTED;
1587
762
    }
1588
1589
9.43M
    PyBytesObject *a = _PyBytes_CAST(aa);
1590
9.43M
    PyBytesObject *b = _PyBytes_CAST(bb);
1591
9.43M
    if (a == b) {
1592
0
        switch (op) {
1593
0
        case Py_EQ:
1594
0
        case Py_LE:
1595
0
        case Py_GE:
1596
            /* a byte string is equal to itself */
1597
0
            Py_RETURN_TRUE;
1598
0
        case Py_NE:
1599
0
        case Py_LT:
1600
0
        case Py_GT:
1601
0
            Py_RETURN_FALSE;
1602
0
        default:
1603
0
            PyErr_BadArgument();
1604
0
            return NULL;
1605
0
        }
1606
0
    }
1607
9.43M
    else if (op == Py_EQ || op == Py_NE) {
1608
9.43M
        int eq = bytes_compare_eq(a, b);
1609
9.43M
        eq ^= (op == Py_NE);
1610
9.43M
        return PyBool_FromLong(eq);
1611
9.43M
    }
1612
0
    else {
1613
0
        Py_ssize_t len_a = Py_SIZE(a);
1614
0
        Py_ssize_t len_b = Py_SIZE(b);
1615
0
        Py_ssize_t min_len = Py_MIN(len_a, len_b);
1616
0
        int c;
1617
0
        if (min_len > 0) {
1618
0
            c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1619
0
            if (c == 0)
1620
0
                c = memcmp(a->ob_sval, b->ob_sval, min_len);
1621
0
        }
1622
0
        else {
1623
0
            c = 0;
1624
0
        }
1625
0
        if (c != 0) {
1626
0
            Py_RETURN_RICHCOMPARE(c, 0, op);
1627
0
        }
1628
0
        Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1629
0
    }
1630
9.43M
}
1631
1632
static Py_hash_t
1633
bytes_hash(PyObject *self)
1634
18.6M
{
1635
18.6M
    PyBytesObject *a = _PyBytes_CAST(self);
1636
0
    Py_hash_t hash = get_ob_shash(a);
1637
18.6M
    if (hash == -1) {
1638
        /* Can't fail */
1639
9.54M
        hash = Py_HashBuffer(a->ob_sval, Py_SIZE(a));
1640
9.54M
        set_ob_shash(a, hash);
1641
9.54M
    }
1642
18.6M
    return hash;
1643
18.6M
}
1644
1645
static PyObject*
1646
bytes_subscript(PyObject *op, PyObject* item)
1647
446k
{
1648
446k
    PyBytesObject *self = _PyBytes_CAST(op);
1649
446k
    if (_PyIndex_Check(item)) {
1650
423k
        Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1651
423k
        if (i == -1 && PyErr_Occurred())
1652
770
            return NULL;
1653
422k
        if (i < 0)
1654
1.41k
            i += PyBytes_GET_SIZE(self);
1655
422k
        if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1656
2.44k
            PyErr_SetString(PyExc_IndexError,
1657
2.44k
                            "index out of range");
1658
2.44k
            return NULL;
1659
2.44k
        }
1660
420k
        return _PyLong_FromUnsignedChar((unsigned char)self->ob_sval[i]);
1661
422k
    }
1662
22.4k
    else if (PySlice_Check(item)) {
1663
21.9k
        Py_ssize_t start, stop, step, slicelength, i;
1664
21.9k
        size_t cur;
1665
21.9k
        const char* source_buf;
1666
21.9k
        char* result_buf;
1667
21.9k
        PyObject* result;
1668
1669
21.9k
        if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1670
30
            return NULL;
1671
30
        }
1672
21.9k
        slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1673
21.9k
                                            &stop, step);
1674
1675
21.9k
        if (slicelength <= 0) {
1676
314
            return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
1677
314
        }
1678
21.6k
        else if (start == 0 && step == 1 &&
1679
3.79k
                 slicelength == PyBytes_GET_SIZE(self) &&
1680
139
                 PyBytes_CheckExact(self)) {
1681
139
            return Py_NewRef(self);
1682
139
        }
1683
21.4k
        else if (step == 1) {
1684
21.2k
            return PyBytes_FromStringAndSize(
1685
21.2k
                PyBytes_AS_STRING(self) + start,
1686
21.2k
                slicelength);
1687
21.2k
        }
1688
209
        else {
1689
209
            source_buf = PyBytes_AS_STRING(self);
1690
209
            result = PyBytes_FromStringAndSize(NULL, slicelength);
1691
209
            if (result == NULL)
1692
0
                return NULL;
1693
1694
209
            result_buf = PyBytes_AS_STRING(result);
1695
2.67k
            for (cur = start, i = 0; i < slicelength;
1696
2.46k
                 cur += step, i++) {
1697
2.46k
                result_buf[i] = source_buf[cur];
1698
2.46k
            }
1699
1700
209
            return result;
1701
209
        }
1702
21.9k
    }
1703
465
    else {
1704
465
        PyErr_Format(PyExc_TypeError,
1705
465
                     "byte indices must be integers or slices, not %.200s",
1706
465
                     Py_TYPE(item)->tp_name);
1707
465
        return NULL;
1708
465
    }
1709
446k
}
1710
1711
static int
1712
bytes_buffer_getbuffer(PyObject *op, Py_buffer *view, int flags)
1713
534k
{
1714
534k
    PyBytesObject *self = _PyBytes_CAST(op);
1715
534k
    return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1716
534k
                             1, flags);
1717
534k
}
1718
1719
static PySequenceMethods bytes_as_sequence = {
1720
    bytes_length,       /*sq_length*/
1721
    bytes_concat,       /*sq_concat*/
1722
    bytes_repeat,       /*sq_repeat*/
1723
    bytes_item,         /*sq_item*/
1724
    0,                  /*sq_slice*/
1725
    0,                  /*sq_ass_item*/
1726
    0,                  /*sq_ass_slice*/
1727
    bytes_contains      /*sq_contains*/
1728
};
1729
1730
static PyMappingMethods bytes_as_mapping = {
1731
    bytes_length,
1732
    bytes_subscript,
1733
    0,
1734
};
1735
1736
static PyBufferProcs bytes_as_buffer = {
1737
    bytes_buffer_getbuffer,
1738
    NULL,
1739
};
1740
1741
1742
/*[clinic input]
1743
bytes.__bytes__
1744
Convert this value to exact type bytes.
1745
[clinic start generated code]*/
1746
1747
static PyObject *
1748
bytes___bytes___impl(PyBytesObject *self)
1749
/*[clinic end generated code: output=63a306a9bc0caac5 input=34ec5ddba98bd6bb]*/
1750
0
{
1751
0
    if (PyBytes_CheckExact(self)) {
1752
0
        return Py_NewRef(self);
1753
0
    }
1754
0
    else {
1755
0
        return PyBytes_FromStringAndSize(self->ob_sval, Py_SIZE(self));
1756
0
    }
1757
0
}
1758
1759
1760
0
#define LEFTSTRIP 0
1761
0
#define RIGHTSTRIP 1
1762
0
#define BOTHSTRIP 2
1763
1764
/*[clinic input]
1765
bytes.split
1766
1767
    sep: object = None
1768
        The delimiter according which to split the bytes.
1769
        None (the default value) means split on ASCII whitespace characters
1770
        (space, tab, return, newline, formfeed, vertical tab).
1771
    maxsplit: Py_ssize_t = -1
1772
        Maximum number of splits to do.
1773
        -1 (the default value) means no limit.
1774
1775
Return a list of the sections in the bytes, using sep as the delimiter.
1776
[clinic start generated code]*/
1777
1778
static PyObject *
1779
bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1780
/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1781
1.10k
{
1782
1.10k
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1783
1.10k
    const char *s = PyBytes_AS_STRING(self), *sub;
1784
1.10k
    Py_buffer vsub;
1785
1.10k
    PyObject *list;
1786
1787
1.10k
    if (maxsplit < 0)
1788
1.10k
        maxsplit = PY_SSIZE_T_MAX;
1789
1.10k
    if (sep == Py_None)
1790
0
        return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1791
1.10k
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1792
0
        return NULL;
1793
1.10k
    sub = vsub.buf;
1794
1.10k
    n = vsub.len;
1795
1796
1.10k
    list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1797
1.10k
    PyBuffer_Release(&vsub);
1798
1.10k
    return list;
1799
1.10k
}
1800
1801
/*[clinic input]
1802
@permit_long_docstring_body
1803
bytes.partition
1804
1805
    sep: Py_buffer
1806
    /
1807
1808
Partition the bytes into three parts using the given separator.
1809
1810
This will search for the separator sep in the bytes. If the separator is found,
1811
returns a 3-tuple containing the part before the separator, the separator
1812
itself, and the part after it.
1813
1814
If the separator is not found, returns a 3-tuple containing the original bytes
1815
object and two empty bytes objects.
1816
[clinic start generated code]*/
1817
1818
static PyObject *
1819
bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1820
/*[clinic end generated code: output=f532b392a17ff695 input=31c55a0cebaf7722]*/
1821
0
{
1822
0
    return stringlib_partition(
1823
0
        (PyObject*) self,
1824
0
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1825
0
        sep->obj, (const char *)sep->buf, sep->len
1826
0
        );
1827
0
}
1828
1829
/*[clinic input]
1830
@permit_long_docstring_body
1831
bytes.rpartition
1832
1833
    sep: Py_buffer
1834
    /
1835
1836
Partition the bytes into three parts using the given separator.
1837
1838
This will search for the separator sep in the bytes, starting at the end. If
1839
the separator is found, returns a 3-tuple containing the part before the
1840
separator, the separator itself, and the part after it.
1841
1842
If the separator is not found, returns a 3-tuple containing two empty bytes
1843
objects and the original bytes object.
1844
[clinic start generated code]*/
1845
1846
static PyObject *
1847
bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1848
/*[clinic end generated code: output=191b114cbb028e50 input=9ea5a3ab0b02bf52]*/
1849
0
{
1850
0
    return stringlib_rpartition(
1851
0
        (PyObject*) self,
1852
0
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1853
0
        sep->obj, (const char *)sep->buf, sep->len
1854
0
        );
1855
0
}
1856
1857
/*[clinic input]
1858
@permit_long_docstring_body
1859
bytes.rsplit = bytes.split
1860
1861
Return a list of the sections in the bytes, using sep as the delimiter.
1862
1863
Splitting is done starting at the end of the bytes and working to the front.
1864
[clinic start generated code]*/
1865
1866
static PyObject *
1867
bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1868
/*[clinic end generated code: output=ba698d9ea01e1c8f input=55b6eaea1f3d7046]*/
1869
0
{
1870
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1871
0
    const char *s = PyBytes_AS_STRING(self), *sub;
1872
0
    Py_buffer vsub;
1873
0
    PyObject *list;
1874
1875
0
    if (maxsplit < 0)
1876
0
        maxsplit = PY_SSIZE_T_MAX;
1877
0
    if (sep == Py_None)
1878
0
        return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1879
0
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1880
0
        return NULL;
1881
0
    sub = vsub.buf;
1882
0
    n = vsub.len;
1883
1884
0
    list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1885
0
    PyBuffer_Release(&vsub);
1886
0
    return list;
1887
0
}
1888
1889
1890
/*[clinic input]
1891
bytes.join
1892
1893
    iterable_of_bytes: object
1894
    /
1895
1896
Concatenate any number of bytes objects.
1897
1898
The bytes whose method is called is inserted in between each pair.
1899
1900
The result is returned as a new bytes object.
1901
1902
Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1903
[clinic start generated code]*/
1904
1905
static PyObject *
1906
bytes_join_impl(PyBytesObject *self, PyObject *iterable_of_bytes)
1907
/*[clinic end generated code: output=0687abb94d7d438e input=7fe377b95bd549d2]*/
1908
0
{
1909
0
    return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1910
0
}
1911
1912
PyObject *
1913
PyBytes_Join(PyObject *sep, PyObject *iterable)
1914
0
{
1915
0
    if (sep == NULL) {
1916
0
        PyErr_BadInternalCall();
1917
0
        return NULL;
1918
0
    }
1919
0
    if (!PyBytes_Check(sep)) {
1920
0
        PyErr_Format(PyExc_TypeError,
1921
0
                     "sep: expected bytes, got %T", sep);
1922
0
        return NULL;
1923
0
    }
1924
1925
0
    return stringlib_bytes_join(sep, iterable);
1926
0
}
1927
1928
/*[clinic input]
1929
@permit_long_summary
1930
@text_signature "($self, sub[, start[, end]], /)"
1931
bytes.find
1932
1933
    sub: object
1934
    start: slice_index(accept={int, NoneType}, c_default='0') = None
1935
         Optional start position. Default: start of the bytes.
1936
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
1937
         Optional stop position. Default: end of the bytes.
1938
    /
1939
1940
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
1941
1942
Return -1 on failure.
1943
[clinic start generated code]*/
1944
1945
static PyObject *
1946
bytes_find_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
1947
                Py_ssize_t end)
1948
/*[clinic end generated code: output=d5961a1c77b472a1 input=47d0929adafc6b0b]*/
1949
0
{
1950
0
    return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1951
0
                          sub, start, end);
1952
0
}
1953
1954
/*[clinic input]
1955
@permit_long_summary
1956
bytes.index = bytes.find
1957
1958
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
1959
1960
Raise ValueError if the subsection is not found.
1961
[clinic start generated code]*/
1962
1963
static PyObject *
1964
bytes_index_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
1965
                 Py_ssize_t end)
1966
/*[clinic end generated code: output=0da25cc74683ba42 input=1cb45ce71456a269]*/
1967
0
{
1968
0
    return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1969
0
                           sub, start, end);
1970
0
}
1971
1972
/*[clinic input]
1973
@permit_long_summary
1974
bytes.rfind = bytes.find
1975
1976
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
1977
1978
Return -1 on failure.
1979
[clinic start generated code]*/
1980
1981
static PyObject *
1982
bytes_rfind_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
1983
                 Py_ssize_t end)
1984
/*[clinic end generated code: output=51b60fa4ad011c09 input=c9473d714251f1ab]*/
1985
14.9k
{
1986
14.9k
    return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1987
14.9k
                           sub, start, end);
1988
14.9k
}
1989
1990
/*[clinic input]
1991
@permit_long_summary
1992
bytes.rindex = bytes.find
1993
1994
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
1995
1996
Raise ValueError if the subsection is not found.
1997
[clinic start generated code]*/
1998
1999
static PyObject *
2000
bytes_rindex_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2001
                  Py_ssize_t end)
2002
/*[clinic end generated code: output=42bf674e0a0aabf6 input=bb5f473c64610c43]*/
2003
0
{
2004
0
    return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2005
0
                            sub, start, end);
2006
0
}
2007
2008
2009
Py_LOCAL_INLINE(PyObject *)
2010
do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
2011
0
{
2012
0
    Py_buffer vsep;
2013
0
    const char *s = PyBytes_AS_STRING(self);
2014
0
    Py_ssize_t len = PyBytes_GET_SIZE(self);
2015
0
    char *sep;
2016
0
    Py_ssize_t seplen;
2017
0
    Py_ssize_t i, j;
2018
2019
0
    if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
2020
0
        return NULL;
2021
0
    sep = vsep.buf;
2022
0
    seplen = vsep.len;
2023
2024
0
    i = 0;
2025
0
    if (striptype != RIGHTSTRIP) {
2026
0
        while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2027
0
            i++;
2028
0
        }
2029
0
    }
2030
2031
0
    j = len;
2032
0
    if (striptype != LEFTSTRIP) {
2033
0
        do {
2034
0
            j--;
2035
0
        } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2036
0
        j++;
2037
0
    }
2038
2039
0
    PyBuffer_Release(&vsep);
2040
2041
0
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2042
0
        return Py_NewRef(self);
2043
0
    }
2044
0
    else
2045
0
        return PyBytes_FromStringAndSize(s+i, j-i);
2046
0
}
2047
2048
2049
Py_LOCAL_INLINE(PyObject *)
2050
do_strip(PyBytesObject *self, int striptype)
2051
0
{
2052
0
    const char *s = PyBytes_AS_STRING(self);
2053
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
2054
2055
0
    i = 0;
2056
0
    if (striptype != RIGHTSTRIP) {
2057
0
        while (i < len && Py_ISSPACE(s[i])) {
2058
0
            i++;
2059
0
        }
2060
0
    }
2061
2062
0
    j = len;
2063
0
    if (striptype != LEFTSTRIP) {
2064
0
        do {
2065
0
            j--;
2066
0
        } while (j >= i && Py_ISSPACE(s[j]));
2067
0
        j++;
2068
0
    }
2069
2070
0
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2071
0
        return Py_NewRef(self);
2072
0
    }
2073
0
    else
2074
0
        return PyBytes_FromStringAndSize(s+i, j-i);
2075
0
}
2076
2077
2078
Py_LOCAL_INLINE(PyObject *)
2079
do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
2080
0
{
2081
0
    if (bytes != Py_None) {
2082
0
        return do_xstrip(self, striptype, bytes);
2083
0
    }
2084
0
    return do_strip(self, striptype);
2085
0
}
2086
2087
/*[clinic input]
2088
@permit_long_docstring_body
2089
bytes.strip
2090
2091
    bytes: object = None
2092
    /
2093
2094
Strip leading and trailing bytes contained in the argument.
2095
2096
If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2097
[clinic start generated code]*/
2098
2099
static PyObject *
2100
bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
2101
/*[clinic end generated code: output=c7c228d3bd104a1b input=71904cd278c0ee03]*/
2102
0
{
2103
0
    return do_argstrip(self, BOTHSTRIP, bytes);
2104
0
}
2105
2106
/*[clinic input]
2107
bytes.lstrip
2108
2109
    bytes: object = None
2110
    /
2111
2112
Strip leading bytes contained in the argument.
2113
2114
If the argument is omitted or None, strip leading  ASCII whitespace.
2115
[clinic start generated code]*/
2116
2117
static PyObject *
2118
bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2119
/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2120
0
{
2121
0
    return do_argstrip(self, LEFTSTRIP, bytes);
2122
0
}
2123
2124
/*[clinic input]
2125
bytes.rstrip
2126
2127
    bytes: object = None
2128
    /
2129
2130
Strip trailing bytes contained in the argument.
2131
2132
If the argument is omitted or None, strip trailing ASCII whitespace.
2133
[clinic start generated code]*/
2134
2135
static PyObject *
2136
bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2137
/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2138
0
{
2139
0
    return do_argstrip(self, RIGHTSTRIP, bytes);
2140
0
}
2141
2142
2143
/*[clinic input]
2144
@permit_long_summary
2145
bytes.count = bytes.find
2146
2147
Return the number of non-overlapping occurrences of subsection 'sub' in bytes B[start:end].
2148
[clinic start generated code]*/
2149
2150
static PyObject *
2151
bytes_count_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2152
                 Py_ssize_t end)
2153
/*[clinic end generated code: output=9848140b9be17d0f input=bb2f136f83f0d30e]*/
2154
2.11k
{
2155
2.11k
    return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2156
2.11k
                           sub, start, end);
2157
2.11k
}
2158
2159
2160
/*[clinic input]
2161
bytes.translate
2162
2163
    table: object
2164
        Translation table, which must be a bytes object of length 256.
2165
    /
2166
    delete as deletechars: object(c_default="NULL") = b''
2167
2168
Return a copy with each character mapped by the given translation table.
2169
2170
All characters occurring in the optional argument delete are removed.
2171
The remaining characters are mapped through the given translation table.
2172
[clinic start generated code]*/
2173
2174
static PyObject *
2175
bytes_translate_impl(PyBytesObject *self, PyObject *table,
2176
                     PyObject *deletechars)
2177
/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2178
0
{
2179
0
    const char *input;
2180
0
    char *output;
2181
0
    Py_buffer table_view = {NULL, NULL};
2182
0
    Py_buffer del_table_view = {NULL, NULL};
2183
0
    const char *table_chars;
2184
0
    Py_ssize_t i, c, changed = 0;
2185
0
    PyObject *input_obj = (PyObject*)self;
2186
0
    const char *output_start, *del_table_chars=NULL;
2187
0
    Py_ssize_t inlen, tablen, dellen = 0;
2188
0
    PyObject *result;
2189
0
    int trans_table[256];
2190
2191
0
    if (PyBytes_Check(table)) {
2192
0
        table_chars = PyBytes_AS_STRING(table);
2193
0
        tablen = PyBytes_GET_SIZE(table);
2194
0
    }
2195
0
    else if (table == Py_None) {
2196
0
        table_chars = NULL;
2197
0
        tablen = 256;
2198
0
    }
2199
0
    else {
2200
0
        if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2201
0
            return NULL;
2202
0
        table_chars = table_view.buf;
2203
0
        tablen = table_view.len;
2204
0
    }
2205
2206
0
    if (tablen != 256) {
2207
0
        PyErr_SetString(PyExc_ValueError,
2208
0
          "translation table must be 256 characters long");
2209
0
        PyBuffer_Release(&table_view);
2210
0
        return NULL;
2211
0
    }
2212
2213
0
    if (deletechars != NULL) {
2214
0
        if (PyBytes_Check(deletechars)) {
2215
0
            del_table_chars = PyBytes_AS_STRING(deletechars);
2216
0
            dellen = PyBytes_GET_SIZE(deletechars);
2217
0
        }
2218
0
        else {
2219
0
            if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2220
0
                PyBuffer_Release(&table_view);
2221
0
                return NULL;
2222
0
            }
2223
0
            del_table_chars = del_table_view.buf;
2224
0
            dellen = del_table_view.len;
2225
0
        }
2226
0
    }
2227
0
    else {
2228
0
        del_table_chars = NULL;
2229
0
        dellen = 0;
2230
0
    }
2231
2232
0
    inlen = PyBytes_GET_SIZE(input_obj);
2233
0
    result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2234
0
    if (result == NULL) {
2235
0
        PyBuffer_Release(&del_table_view);
2236
0
        PyBuffer_Release(&table_view);
2237
0
        return NULL;
2238
0
    }
2239
0
    output_start = output = PyBytes_AS_STRING(result);
2240
0
    input = PyBytes_AS_STRING(input_obj);
2241
2242
0
    if (dellen == 0 && table_chars != NULL) {
2243
        /* If no deletions are required, use faster code */
2244
0
        for (i = inlen; --i >= 0; ) {
2245
0
            c = Py_CHARMASK(*input++);
2246
0
            if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2247
0
                changed = 1;
2248
0
        }
2249
0
        if (!changed && PyBytes_CheckExact(input_obj)) {
2250
0
            Py_SETREF(result, Py_NewRef(input_obj));
2251
0
        }
2252
0
        PyBuffer_Release(&del_table_view);
2253
0
        PyBuffer_Release(&table_view);
2254
0
        return result;
2255
0
    }
2256
2257
0
    if (table_chars == NULL) {
2258
0
        for (i = 0; i < 256; i++)
2259
0
            trans_table[i] = Py_CHARMASK(i);
2260
0
    } else {
2261
0
        for (i = 0; i < 256; i++)
2262
0
            trans_table[i] = Py_CHARMASK(table_chars[i]);
2263
0
    }
2264
0
    PyBuffer_Release(&table_view);
2265
2266
0
    for (i = 0; i < dellen; i++)
2267
0
        trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2268
0
    PyBuffer_Release(&del_table_view);
2269
2270
0
    for (i = inlen; --i >= 0; ) {
2271
0
        c = Py_CHARMASK(*input++);
2272
0
        if (trans_table[c] != -1)
2273
0
            if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2274
0
                continue;
2275
0
        changed = 1;
2276
0
    }
2277
0
    if (!changed && PyBytes_CheckExact(input_obj)) {
2278
0
        Py_DECREF(result);
2279
0
        return Py_NewRef(input_obj);
2280
0
    }
2281
    /* Fix the size of the resulting byte string */
2282
0
    if (inlen > 0)
2283
0
        _PyBytes_Resize(&result, output - output_start);
2284
0
    return result;
2285
0
}
2286
2287
2288
/*[clinic input]
2289
2290
@permit_long_summary
2291
@permit_long_docstring_body
2292
@staticmethod
2293
bytes.maketrans
2294
2295
    frm: Py_buffer
2296
    to: Py_buffer
2297
    /
2298
2299
Return a translation table usable for the bytes or bytearray translate method.
2300
2301
The returned table will be one where each byte in frm is mapped to the byte at
2302
the same position in to.
2303
2304
The bytes objects frm and to must be of the same length.
2305
[clinic start generated code]*/
2306
2307
static PyObject *
2308
bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2309
/*[clinic end generated code: output=a36f6399d4b77f6f input=a06b75f44d933fb3]*/
2310
0
{
2311
0
    return _Py_bytes_maketrans(frm, to);
2312
0
}
2313
2314
2315
/*[clinic input]
2316
@permit_long_docstring_body
2317
bytes.replace
2318
2319
    old: Py_buffer
2320
    new: Py_buffer
2321
    count: Py_ssize_t = -1
2322
        Maximum number of occurrences to replace.
2323
        -1 (the default value) means replace all occurrences.
2324
    /
2325
2326
Return a copy with all occurrences of substring old replaced by new.
2327
2328
If the optional argument count is given, only the first count occurrences are
2329
replaced.
2330
[clinic start generated code]*/
2331
2332
static PyObject *
2333
bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2334
                   Py_ssize_t count)
2335
/*[clinic end generated code: output=994fa588b6b9c104 input=8b99a9ab32bc06a2]*/
2336
28
{
2337
28
    return stringlib_replace((PyObject *)self,
2338
28
                             (const char *)old->buf, old->len,
2339
28
                             (const char *)new->buf, new->len, count);
2340
28
}
2341
2342
/** End DALKE **/
2343
2344
/*[clinic input]
2345
bytes.removeprefix as bytes_removeprefix
2346
2347
    prefix: Py_buffer
2348
    /
2349
2350
Return a bytes object with the given prefix string removed if present.
2351
2352
If the bytes starts with the prefix string, return bytes[len(prefix):].
2353
Otherwise, return a copy of the original bytes.
2354
[clinic start generated code]*/
2355
2356
static PyObject *
2357
bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2358
/*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2359
0
{
2360
0
    const char *self_start = PyBytes_AS_STRING(self);
2361
0
    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2362
0
    const char *prefix_start = prefix->buf;
2363
0
    Py_ssize_t prefix_len = prefix->len;
2364
2365
0
    if (self_len >= prefix_len
2366
0
        && prefix_len > 0
2367
0
        && memcmp(self_start, prefix_start, prefix_len) == 0)
2368
0
    {
2369
0
        return PyBytes_FromStringAndSize(self_start + prefix_len,
2370
0
                                         self_len - prefix_len);
2371
0
    }
2372
2373
0
    if (PyBytes_CheckExact(self)) {
2374
0
        return Py_NewRef(self);
2375
0
    }
2376
2377
0
    return PyBytes_FromStringAndSize(self_start, self_len);
2378
0
}
2379
2380
/*[clinic input]
2381
bytes.removesuffix as bytes_removesuffix
2382
2383
    suffix: Py_buffer
2384
    /
2385
2386
Return a bytes object with the given suffix string removed if present.
2387
2388
If the bytes ends with the suffix string and that suffix is not empty,
2389
return bytes[:-len(prefix)].  Otherwise, return a copy of the original
2390
bytes.
2391
[clinic start generated code]*/
2392
2393
static PyObject *
2394
bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2395
/*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2396
0
{
2397
0
    const char *self_start = PyBytes_AS_STRING(self);
2398
0
    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2399
0
    const char *suffix_start = suffix->buf;
2400
0
    Py_ssize_t suffix_len = suffix->len;
2401
2402
0
    if (self_len >= suffix_len
2403
0
        && suffix_len > 0
2404
0
        && memcmp(self_start + self_len - suffix_len,
2405
0
                  suffix_start, suffix_len) == 0)
2406
0
    {
2407
0
        return PyBytes_FromStringAndSize(self_start,
2408
0
                                         self_len - suffix_len);
2409
0
    }
2410
2411
0
    if (PyBytes_CheckExact(self)) {
2412
0
        return Py_NewRef(self);
2413
0
    }
2414
2415
0
    return PyBytes_FromStringAndSize(self_start, self_len);
2416
0
}
2417
2418
/*[clinic input]
2419
@permit_long_summary
2420
@text_signature "($self, prefix[, start[, end]], /)"
2421
bytes.startswith
2422
2423
    prefix as subobj: object
2424
        A bytes or a tuple of bytes to try.
2425
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2426
        Optional start position. Default: start of the bytes.
2427
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2428
        Optional stop position. Default: end of the bytes.
2429
    /
2430
2431
Return True if the bytes starts with the specified prefix, False otherwise.
2432
[clinic start generated code]*/
2433
2434
static PyObject *
2435
bytes_startswith_impl(PyBytesObject *self, PyObject *subobj,
2436
                      Py_ssize_t start, Py_ssize_t end)
2437
/*[clinic end generated code: output=b1e8da1cbd528e8c input=a14efd070f15be80]*/
2438
67.8k
{
2439
67.8k
    return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2440
67.8k
                                subobj, start, end);
2441
67.8k
}
2442
2443
/*[clinic input]
2444
@permit_long_summary
2445
@text_signature "($self, suffix[, start[, end]], /)"
2446
bytes.endswith
2447
2448
    suffix as subobj: object
2449
        A bytes or a tuple of bytes to try.
2450
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2451
         Optional start position. Default: start of the bytes.
2452
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2453
         Optional stop position. Default: end of the bytes.
2454
    /
2455
2456
Return True if the bytes ends with the specified suffix, False otherwise.
2457
[clinic start generated code]*/
2458
2459
static PyObject *
2460
bytes_endswith_impl(PyBytesObject *self, PyObject *subobj, Py_ssize_t start,
2461
                    Py_ssize_t end)
2462
/*[clinic end generated code: output=038b633111f3629d input=49e383eaaf292713]*/
2463
0
{
2464
0
    return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2465
0
                              subobj, start, end);
2466
0
}
2467
2468
2469
/*[clinic input]
2470
bytes.decode
2471
2472
    encoding: str(c_default="NULL") = 'utf-8'
2473
        The encoding with which to decode the bytes.
2474
    errors: str(c_default="NULL") = 'strict'
2475
        The error handling scheme to use for the handling of decoding errors.
2476
        The default is 'strict' meaning that decoding errors raise a
2477
        UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2478
        as well as any other name registered with codecs.register_error that
2479
        can handle UnicodeDecodeErrors.
2480
2481
Decode the bytes using the codec registered for encoding.
2482
[clinic start generated code]*/
2483
2484
static PyObject *
2485
bytes_decode_impl(PyBytesObject *self, const char *encoding,
2486
                  const char *errors)
2487
/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2488
23.2k
{
2489
23.2k
    return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2490
23.2k
}
2491
2492
2493
/*[clinic input]
2494
@permit_long_docstring_body
2495
bytes.splitlines
2496
2497
    keepends: bool = False
2498
2499
Return a list of the lines in the bytes, breaking at line boundaries.
2500
2501
Line breaks are not included in the resulting list unless keepends is given and
2502
true.
2503
[clinic start generated code]*/
2504
2505
static PyObject *
2506
bytes_splitlines_impl(PyBytesObject *self, int keepends)
2507
/*[clinic end generated code: output=3484149a5d880ffb input=d17968d2a355fe55]*/
2508
0
{
2509
0
    return stringlib_splitlines(
2510
0
        (PyObject*) self, PyBytes_AS_STRING(self),
2511
0
        PyBytes_GET_SIZE(self), keepends
2512
0
        );
2513
0
}
2514
2515
/*[clinic input]
2516
@classmethod
2517
bytes.fromhex
2518
2519
    string: object
2520
    /
2521
2522
Create a bytes object from a string of hexadecimal numbers.
2523
2524
Spaces between two numbers are accepted.
2525
Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2526
[clinic start generated code]*/
2527
2528
static PyObject *
2529
bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2530
/*[clinic end generated code: output=0973acc63661bb2e input=f37d98ed51088a21]*/
2531
0
{
2532
0
    PyObject *result = _PyBytes_FromHex(string, 0);
2533
0
    if (type != &PyBytes_Type && result != NULL) {
2534
0
        Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
2535
0
    }
2536
0
    return result;
2537
0
}
2538
2539
PyObject*
2540
_PyBytes_FromHex(PyObject *string, int use_bytearray)
2541
0
{
2542
0
    Py_ssize_t hexlen, invalid_char;
2543
0
    unsigned int top, bot;
2544
0
    const Py_UCS1 *str, *start, *end;
2545
0
    PyBytesWriter *writer = NULL;
2546
0
    Py_buffer view;
2547
0
    view.obj = NULL;
2548
2549
0
    if (PyUnicode_Check(string)) {
2550
0
        hexlen = PyUnicode_GET_LENGTH(string);
2551
2552
0
        if (!PyUnicode_IS_ASCII(string)) {
2553
0
            const void *data = PyUnicode_DATA(string);
2554
0
            int kind = PyUnicode_KIND(string);
2555
0
            Py_ssize_t i;
2556
2557
            /* search for the first non-ASCII character */
2558
0
            for (i = 0; i < hexlen; i++) {
2559
0
                if (PyUnicode_READ(kind, data, i) >= 128)
2560
0
                    break;
2561
0
            }
2562
0
            invalid_char = i;
2563
0
            goto error;
2564
0
        }
2565
2566
0
        assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2567
0
        str = PyUnicode_1BYTE_DATA(string);
2568
0
    }
2569
0
    else if (PyObject_CheckBuffer(string)) {
2570
0
        if (PyObject_GetBuffer(string, &view, PyBUF_SIMPLE) != 0) {
2571
0
            return NULL;
2572
0
        }
2573
0
        hexlen = view.len;
2574
0
        str = view.buf;
2575
0
    }
2576
0
    else {
2577
0
        PyErr_Format(PyExc_TypeError,
2578
0
                     "fromhex() argument must be str or bytes-like, not %T",
2579
0
                     string);
2580
0
        return NULL;
2581
0
    }
2582
2583
    /* This overestimates if there are spaces */
2584
0
    if (use_bytearray) {
2585
0
        writer = _PyBytesWriter_CreateByteArray(hexlen / 2);
2586
0
    }
2587
0
    else {
2588
0
        writer = PyBytesWriter_Create(hexlen / 2);
2589
0
    }
2590
0
    if (writer == NULL) {
2591
0
        goto release_buffer;
2592
0
    }
2593
0
    char *buf = PyBytesWriter_GetData(writer);
2594
2595
0
    start = str;
2596
0
    end = str + hexlen;
2597
0
    while (str < end) {
2598
        /* skip over spaces in the input */
2599
0
        if (Py_ISSPACE(*str)) {
2600
0
            do {
2601
0
                str++;
2602
0
            } while (Py_ISSPACE(*str));
2603
0
            if (str >= end)
2604
0
                break;
2605
0
        }
2606
2607
0
        top = _PyLong_DigitValue[*str];
2608
0
        if (top >= 16) {
2609
0
            invalid_char = str - start;
2610
0
            goto error;
2611
0
        }
2612
0
        str++;
2613
2614
0
        bot = _PyLong_DigitValue[*str];
2615
0
        if (bot >= 16) {
2616
            /* Check if we had a second digit */
2617
0
            if (str >= end){
2618
0
                invalid_char = -1;
2619
0
            } else {
2620
0
                invalid_char = str - start;
2621
0
            }
2622
0
            goto error;
2623
0
        }
2624
0
        str++;
2625
2626
0
        *buf++ = (unsigned char)((top << 4) + bot);
2627
0
    }
2628
2629
0
    if (view.obj != NULL) {
2630
0
       PyBuffer_Release(&view);
2631
0
    }
2632
0
    return PyBytesWriter_FinishWithPointer(writer, buf);
2633
2634
0
  error:
2635
0
    if (invalid_char == -1) {
2636
0
        PyErr_SetString(PyExc_ValueError,
2637
0
                        "fromhex() arg must contain an even number of hexadecimal digits");
2638
0
    } else {
2639
0
        PyErr_Format(PyExc_ValueError,
2640
0
                     "non-hexadecimal number found in "
2641
0
                     "fromhex() arg at position %zd", invalid_char);
2642
0
    }
2643
0
    PyBytesWriter_Discard(writer);
2644
2645
0
  release_buffer:
2646
0
    if (view.obj != NULL) {
2647
0
        PyBuffer_Release(&view);
2648
0
    }
2649
0
    return NULL;
2650
0
}
2651
2652
/*[clinic input]
2653
bytes.hex
2654
2655
    sep: object = NULL
2656
        An optional single character or byte to separate hex bytes.
2657
    bytes_per_sep: int = 1
2658
        How many bytes between separators.  Positive values count from the
2659
        right, negative values count from the left.
2660
2661
Create a string of hexadecimal numbers from a bytes object.
2662
2663
Example:
2664
>>> value = b'\xb9\x01\xef'
2665
>>> value.hex()
2666
'b901ef'
2667
>>> value.hex(':')
2668
'b9:01:ef'
2669
>>> value.hex(':', 2)
2670
'b9:01ef'
2671
>>> value.hex(':', -2)
2672
'b901:ef'
2673
[clinic start generated code]*/
2674
2675
static PyObject *
2676
bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2677
/*[clinic end generated code: output=1f134da504064139 input=1a21282b1f1ae595]*/
2678
0
{
2679
0
    const char *argbuf = PyBytes_AS_STRING(self);
2680
0
    Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2681
0
    return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2682
0
}
2683
2684
static PyObject *
2685
bytes_getnewargs(PyObject *op, PyObject *Py_UNUSED(dummy))
2686
0
{
2687
0
    PyBytesObject *v = _PyBytes_CAST(op);
2688
0
    return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2689
0
}
2690
2691
2692
static PyMethodDef
2693
bytes_methods[] = {
2694
    {"__getnewargs__", bytes_getnewargs,  METH_NOARGS},
2695
    BYTES___BYTES___METHODDEF
2696
    {"capitalize", stringlib_capitalize, METH_NOARGS,
2697
     _Py_capitalize__doc__},
2698
    STRINGLIB_CENTER_METHODDEF
2699
    BYTES_COUNT_METHODDEF
2700
    BYTES_DECODE_METHODDEF
2701
    BYTES_ENDSWITH_METHODDEF
2702
    STRINGLIB_EXPANDTABS_METHODDEF
2703
    BYTES_FIND_METHODDEF
2704
    BYTES_FROMHEX_METHODDEF
2705
    BYTES_HEX_METHODDEF
2706
    BYTES_INDEX_METHODDEF
2707
    {"isalnum", stringlib_isalnum, METH_NOARGS,
2708
     _Py_isalnum__doc__},
2709
    {"isalpha", stringlib_isalpha, METH_NOARGS,
2710
     _Py_isalpha__doc__},
2711
    {"isascii", stringlib_isascii, METH_NOARGS,
2712
     _Py_isascii__doc__},
2713
    {"isdigit", stringlib_isdigit, METH_NOARGS,
2714
     _Py_isdigit__doc__},
2715
    {"islower", stringlib_islower, METH_NOARGS,
2716
     _Py_islower__doc__},
2717
    {"isspace", stringlib_isspace, METH_NOARGS,
2718
     _Py_isspace__doc__},
2719
    {"istitle", stringlib_istitle, METH_NOARGS,
2720
     _Py_istitle__doc__},
2721
    {"isupper", stringlib_isupper, METH_NOARGS,
2722
     _Py_isupper__doc__},
2723
    BYTES_JOIN_METHODDEF
2724
    STRINGLIB_LJUST_METHODDEF
2725
    {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2726
    BYTES_LSTRIP_METHODDEF
2727
    BYTES_MAKETRANS_METHODDEF
2728
    BYTES_PARTITION_METHODDEF
2729
    BYTES_REPLACE_METHODDEF
2730
    BYTES_REMOVEPREFIX_METHODDEF
2731
    BYTES_REMOVESUFFIX_METHODDEF
2732
    BYTES_RFIND_METHODDEF
2733
    BYTES_RINDEX_METHODDEF
2734
    STRINGLIB_RJUST_METHODDEF
2735
    BYTES_RPARTITION_METHODDEF
2736
    BYTES_RSPLIT_METHODDEF
2737
    BYTES_RSTRIP_METHODDEF
2738
    BYTES_SPLIT_METHODDEF
2739
    BYTES_SPLITLINES_METHODDEF
2740
    BYTES_STARTSWITH_METHODDEF
2741
    BYTES_STRIP_METHODDEF
2742
    {"swapcase", stringlib_swapcase, METH_NOARGS,
2743
     _Py_swapcase__doc__},
2744
    {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2745
    BYTES_TRANSLATE_METHODDEF
2746
    {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2747
    STRINGLIB_ZFILL_METHODDEF
2748
    {NULL,     NULL}                         /* sentinel */
2749
};
2750
2751
static PyObject *
2752
bytes_mod(PyObject *self, PyObject *arg)
2753
431
{
2754
431
    if (!PyBytes_Check(self)) {
2755
431
        Py_RETURN_NOTIMPLEMENTED;
2756
431
    }
2757
0
    return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2758
0
                             arg, 0);
2759
431
}
2760
2761
static PyNumberMethods bytes_as_number = {
2762
    0,              /*nb_add*/
2763
    0,              /*nb_subtract*/
2764
    0,              /*nb_multiply*/
2765
    bytes_mod,      /*nb_remainder*/
2766
};
2767
2768
static PyObject *
2769
bytes_subtype_new(PyTypeObject *, PyObject *);
2770
2771
/*[clinic input]
2772
@classmethod
2773
bytes.__new__ as bytes_new
2774
2775
    source as x: object = NULL
2776
    encoding: str = NULL
2777
    errors: str = NULL
2778
2779
[clinic start generated code]*/
2780
2781
static PyObject *
2782
bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
2783
               const char *errors)
2784
/*[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]*/
2785
193k
{
2786
193k
    PyObject *bytes;
2787
193k
    PyObject *func;
2788
193k
    Py_ssize_t size;
2789
2790
193k
    if (x == NULL) {
2791
0
        if (encoding != NULL || errors != NULL) {
2792
0
            PyErr_SetString(PyExc_TypeError,
2793
0
                            encoding != NULL ?
2794
0
                            "encoding without a string argument" :
2795
0
                            "errors without a string argument");
2796
0
            return NULL;
2797
0
        }
2798
0
        bytes = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
2799
0
    }
2800
193k
    else if (encoding != NULL) {
2801
        /* Encode via the codec registry */
2802
0
        if (!PyUnicode_Check(x)) {
2803
0
            PyErr_SetString(PyExc_TypeError,
2804
0
                            "encoding without a string argument");
2805
0
            return NULL;
2806
0
        }
2807
0
        bytes = PyUnicode_AsEncodedString(x, encoding, errors);
2808
0
    }
2809
193k
    else if (errors != NULL) {
2810
0
        PyErr_SetString(PyExc_TypeError,
2811
0
                        PyUnicode_Check(x) ?
2812
0
                        "string argument without an encoding" :
2813
0
                        "errors without a string argument");
2814
0
        return NULL;
2815
0
    }
2816
    /* We'd like to call PyObject_Bytes here, but we need to check for an
2817
       integer argument before deferring to PyBytes_FromObject, something
2818
       PyObject_Bytes doesn't do. */
2819
193k
    else if ((func = _PyObject_LookupSpecial(x, &_Py_ID(__bytes__))) != NULL) {
2820
0
        bytes = _PyObject_CallNoArgs(func);
2821
0
        Py_DECREF(func);
2822
0
        if (bytes == NULL)
2823
0
            return NULL;
2824
0
        if (!PyBytes_Check(bytes)) {
2825
0
            PyErr_Format(PyExc_TypeError,
2826
0
                         "%T.__bytes__() must return a bytes, not %T",
2827
0
                         x, bytes);
2828
0
            Py_DECREF(bytes);
2829
0
            return NULL;
2830
0
        }
2831
0
    }
2832
193k
    else if (PyErr_Occurred())
2833
0
        return NULL;
2834
193k
    else if (PyUnicode_Check(x)) {
2835
0
        PyErr_SetString(PyExc_TypeError,
2836
0
                        "string argument without an encoding");
2837
0
        return NULL;
2838
0
    }
2839
    /* Is it an integer? */
2840
193k
    else if (_PyIndex_Check(x)) {
2841
0
        size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2842
0
        if (size == -1 && PyErr_Occurred()) {
2843
0
            if (!PyErr_ExceptionMatches(PyExc_TypeError))
2844
0
                return NULL;
2845
0
            PyErr_Clear();  /* fall through */
2846
0
            bytes = PyBytes_FromObject(x);
2847
0
        }
2848
0
        else {
2849
0
            if (size < 0) {
2850
0
                PyErr_SetString(PyExc_ValueError, "negative count");
2851
0
                return NULL;
2852
0
            }
2853
0
            bytes = _PyBytes_FromSize(size, 1);
2854
0
        }
2855
0
    }
2856
193k
    else {
2857
193k
        bytes = PyBytes_FromObject(x);
2858
193k
    }
2859
2860
193k
    if (bytes != NULL && type != &PyBytes_Type) {
2861
0
        Py_SETREF(bytes, bytes_subtype_new(type, bytes));
2862
0
    }
2863
2864
193k
    return bytes;
2865
193k
}
2866
2867
static PyObject*
2868
_PyBytes_FromBuffer(PyObject *x)
2869
193k
{
2870
193k
    Py_buffer view;
2871
193k
    if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2872
0
        return NULL;
2873
2874
193k
    PyBytesWriter *writer = PyBytesWriter_Create(view.len);
2875
193k
    if (writer == NULL) {
2876
0
        goto fail;
2877
0
    }
2878
2879
193k
    if (PyBuffer_ToContiguous(PyBytesWriter_GetData(writer),
2880
193k
                              &view, view.len, 'C') < 0) {
2881
0
        goto fail;
2882
0
    }
2883
2884
193k
    PyBuffer_Release(&view);
2885
193k
    return PyBytesWriter_Finish(writer);
2886
2887
0
fail:
2888
0
    PyBytesWriter_Discard(writer);
2889
0
    PyBuffer_Release(&view);
2890
0
    return NULL;
2891
193k
}
2892
2893
static PyObject*
2894
_PyBytes_FromList(PyObject *x)
2895
0
{
2896
0
    Py_ssize_t size = PyList_GET_SIZE(x);
2897
0
    PyBytesWriter *writer = PyBytesWriter_Create(size);
2898
0
    if (writer == NULL) {
2899
0
        return NULL;
2900
0
    }
2901
0
    char *str = PyBytesWriter_GetData(writer);
2902
0
    size = _PyBytesWriter_GetAllocated(writer);
2903
2904
0
    for (Py_ssize_t i = 0; i < PyList_GET_SIZE(x); i++) {
2905
0
        PyObject *item = PyList_GET_ITEM(x, i);
2906
0
        Py_INCREF(item);
2907
0
        Py_ssize_t value = PyNumber_AsSsize_t(item, NULL);
2908
0
        Py_DECREF(item);
2909
0
        if (value == -1 && PyErr_Occurred())
2910
0
            goto error;
2911
2912
0
        if (value < 0 || value >= 256) {
2913
0
            PyErr_SetString(PyExc_ValueError,
2914
0
                            "bytes must be in range(0, 256)");
2915
0
            goto error;
2916
0
        }
2917
2918
0
        if (i >= size) {
2919
0
            str = _PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str);
2920
0
            if (str == NULL) {
2921
0
                goto error;
2922
0
            }
2923
0
            size = _PyBytesWriter_GetAllocated(writer);
2924
0
        }
2925
0
        *str++ = (char) value;
2926
0
    }
2927
0
    return PyBytesWriter_FinishWithPointer(writer, str);
2928
2929
0
error:
2930
0
    PyBytesWriter_Discard(writer);
2931
0
    return NULL;
2932
0
}
2933
2934
static PyObject*
2935
_PyBytes_FromTuple(PyObject *x)
2936
0
{
2937
0
    Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2938
0
    Py_ssize_t value;
2939
0
    PyObject *item;
2940
2941
0
    PyBytesWriter *writer = PyBytesWriter_Create(size);
2942
0
    if (writer == NULL) {
2943
0
        return NULL;
2944
0
    }
2945
0
    char *str = PyBytesWriter_GetData(writer);
2946
2947
0
    for (i = 0; i < size; i++) {
2948
0
        item = PyTuple_GET_ITEM(x, i);
2949
0
        value = PyNumber_AsSsize_t(item, NULL);
2950
0
        if (value == -1 && PyErr_Occurred())
2951
0
            goto error;
2952
2953
0
        if (value < 0 || value >= 256) {
2954
0
            PyErr_SetString(PyExc_ValueError,
2955
0
                            "bytes must be in range(0, 256)");
2956
0
            goto error;
2957
0
        }
2958
0
        *str++ = (char) value;
2959
0
    }
2960
0
    return PyBytesWriter_Finish(writer);
2961
2962
0
  error:
2963
0
    PyBytesWriter_Discard(writer);
2964
0
    return NULL;
2965
0
}
2966
2967
static PyObject *
2968
_PyBytes_FromIterator(PyObject *it, PyObject *x)
2969
0
{
2970
0
    Py_ssize_t i, size;
2971
2972
    /* For iterator version, create a bytes object and resize as needed */
2973
0
    size = PyObject_LengthHint(x, 64);
2974
0
    if (size == -1 && PyErr_Occurred())
2975
0
        return NULL;
2976
2977
0
    PyBytesWriter *writer = PyBytesWriter_Create(size);
2978
0
    if (writer == NULL) {
2979
0
        return NULL;
2980
0
    }
2981
0
    char *str = PyBytesWriter_GetData(writer);
2982
0
    size = _PyBytesWriter_GetAllocated(writer);
2983
2984
    /* Run the iterator to exhaustion */
2985
0
    for (i = 0; ; i++) {
2986
0
        PyObject *item;
2987
0
        Py_ssize_t value;
2988
2989
        /* Get the next item */
2990
0
        item = PyIter_Next(it);
2991
0
        if (item == NULL) {
2992
0
            if (PyErr_Occurred())
2993
0
                goto error;
2994
0
            break;
2995
0
        }
2996
2997
        /* Interpret it as an int (__index__) */
2998
0
        value = PyNumber_AsSsize_t(item, NULL);
2999
0
        Py_DECREF(item);
3000
0
        if (value == -1 && PyErr_Occurred())
3001
0
            goto error;
3002
3003
        /* Range check */
3004
0
        if (value < 0 || value >= 256) {
3005
0
            PyErr_SetString(PyExc_ValueError,
3006
0
                            "bytes must be in range(0, 256)");
3007
0
            goto error;
3008
0
        }
3009
3010
        /* Append the byte */
3011
0
        if (i >= size) {
3012
0
            str = _PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str);
3013
0
            if (str == NULL) {
3014
0
                goto error;
3015
0
            }
3016
0
            size = _PyBytesWriter_GetAllocated(writer);
3017
0
        }
3018
0
        *str++ = (char) value;
3019
0
    }
3020
0
    return PyBytesWriter_FinishWithPointer(writer, str);
3021
3022
0
  error:
3023
0
    PyBytesWriter_Discard(writer);
3024
0
    return NULL;
3025
0
}
3026
3027
PyObject *
3028
PyBytes_FromObject(PyObject *x)
3029
193k
{
3030
193k
    PyObject *it, *result;
3031
3032
193k
    if (x == NULL) {
3033
0
        PyErr_BadInternalCall();
3034
0
        return NULL;
3035
0
    }
3036
3037
193k
    if (PyBytes_CheckExact(x)) {
3038
0
        return Py_NewRef(x);
3039
0
    }
3040
3041
    /* Use the modern buffer interface */
3042
193k
    if (PyObject_CheckBuffer(x))
3043
193k
        return _PyBytes_FromBuffer(x);
3044
3045
0
    if (PyList_CheckExact(x))
3046
0
        return _PyBytes_FromList(x);
3047
3048
0
    if (PyTuple_CheckExact(x))
3049
0
        return _PyBytes_FromTuple(x);
3050
3051
0
    if (!PyUnicode_Check(x)) {
3052
0
        it = PyObject_GetIter(x);
3053
0
        if (it != NULL) {
3054
0
            result = _PyBytes_FromIterator(it, x);
3055
0
            Py_DECREF(it);
3056
0
            return result;
3057
0
        }
3058
0
        if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
3059
0
            return NULL;
3060
0
        }
3061
0
    }
3062
3063
0
    PyErr_Format(PyExc_TypeError,
3064
0
                 "cannot convert '%.200s' object to bytes",
3065
0
                 Py_TYPE(x)->tp_name);
3066
0
    return NULL;
3067
0
}
3068
3069
/* This allocator is needed for subclasses don't want to use __new__.
3070
 * See https://github.com/python/cpython/issues/91020#issuecomment-1096793239
3071
 *
3072
 * This allocator will be removed when ob_shash is removed.
3073
 */
3074
static PyObject *
3075
bytes_alloc(PyTypeObject *self, Py_ssize_t nitems)
3076
0
{
3077
0
    PyBytesObject *obj = (PyBytesObject*)PyType_GenericAlloc(self, nitems);
3078
0
    if (obj == NULL) {
3079
0
        return NULL;
3080
0
    }
3081
0
    set_ob_shash(obj, -1);
3082
0
    return (PyObject*)obj;
3083
0
}
3084
3085
static PyObject *
3086
bytes_subtype_new(PyTypeObject *type, PyObject *tmp)
3087
0
{
3088
0
    PyObject *pnew;
3089
0
    Py_ssize_t n;
3090
3091
0
    assert(PyType_IsSubtype(type, &PyBytes_Type));
3092
0
    assert(PyBytes_Check(tmp));
3093
0
    n = PyBytes_GET_SIZE(tmp);
3094
0
    pnew = type->tp_alloc(type, n);
3095
0
    if (pnew != NULL) {
3096
0
        memcpy(PyBytes_AS_STRING(pnew),
3097
0
                  PyBytes_AS_STRING(tmp), n+1);
3098
0
        set_ob_shash((PyBytesObject *)pnew,
3099
0
            get_ob_shash((PyBytesObject *)tmp));
3100
0
    }
3101
0
    return pnew;
3102
0
}
3103
3104
PyDoc_STRVAR(bytes_doc,
3105
"bytes(iterable_of_ints) -> bytes\n\
3106
bytes(string, encoding[, errors]) -> bytes\n\
3107
bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
3108
bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3109
bytes() -> empty bytes object\n\
3110
\n\
3111
Construct an immutable array of bytes from:\n\
3112
  - an iterable yielding integers in range(256)\n\
3113
  - a text string encoded using the specified encoding\n\
3114
  - any object implementing the buffer API.\n\
3115
  - an integer");
3116
3117
static PyObject *bytes_iter(PyObject *seq);
3118
3119
PyTypeObject PyBytes_Type = {
3120
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3121
    "bytes",
3122
    PyBytesObject_SIZE,
3123
    sizeof(char),
3124
    0,                                          /* tp_dealloc */
3125
    0,                                          /* tp_vectorcall_offset */
3126
    0,                                          /* tp_getattr */
3127
    0,                                          /* tp_setattr */
3128
    0,                                          /* tp_as_async */
3129
    bytes_repr,                                 /* tp_repr */
3130
    &bytes_as_number,                           /* tp_as_number */
3131
    &bytes_as_sequence,                         /* tp_as_sequence */
3132
    &bytes_as_mapping,                          /* tp_as_mapping */
3133
    bytes_hash,                                 /* tp_hash */
3134
    0,                                          /* tp_call */
3135
    bytes_str,                                  /* tp_str */
3136
    PyObject_GenericGetAttr,                    /* tp_getattro */
3137
    0,                                          /* tp_setattro */
3138
    &bytes_as_buffer,                           /* tp_as_buffer */
3139
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3140
        Py_TPFLAGS_BYTES_SUBCLASS |
3141
        _Py_TPFLAGS_MATCH_SELF,               /* tp_flags */
3142
    bytes_doc,                                  /* tp_doc */
3143
    0,                                          /* tp_traverse */
3144
    0,                                          /* tp_clear */
3145
    bytes_richcompare,                          /* tp_richcompare */
3146
    0,                                          /* tp_weaklistoffset */
3147
    bytes_iter,                                 /* tp_iter */
3148
    0,                                          /* tp_iternext */
3149
    bytes_methods,                              /* tp_methods */
3150
    0,                                          /* tp_members */
3151
    0,                                          /* tp_getset */
3152
    0,                                          /* tp_base */
3153
    0,                                          /* tp_dict */
3154
    0,                                          /* tp_descr_get */
3155
    0,                                          /* tp_descr_set */
3156
    0,                                          /* tp_dictoffset */
3157
    0,                                          /* tp_init */
3158
    bytes_alloc,                                /* tp_alloc */
3159
    bytes_new,                                  /* tp_new */
3160
    PyObject_Free,                              /* tp_free */
3161
    .tp_version_tag = _Py_TYPE_VERSION_BYTES,
3162
};
3163
3164
void
3165
PyBytes_Concat(PyObject **pv, PyObject *w)
3166
0
{
3167
0
    assert(pv != NULL);
3168
0
    if (*pv == NULL)
3169
0
        return;
3170
0
    if (w == NULL) {
3171
0
        Py_CLEAR(*pv);
3172
0
        return;
3173
0
    }
3174
3175
0
    if (_PyObject_IsUniquelyReferenced(*pv) && PyBytes_CheckExact(*pv)) {
3176
        /* Only one reference, so we can resize in place */
3177
0
        Py_ssize_t oldsize;
3178
0
        Py_buffer wb;
3179
3180
0
        if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
3181
0
            PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3182
0
                         Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3183
0
            Py_CLEAR(*pv);
3184
0
            return;
3185
0
        }
3186
3187
0
        oldsize = PyBytes_GET_SIZE(*pv);
3188
0
        if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3189
0
            PyErr_NoMemory();
3190
0
            goto error;
3191
0
        }
3192
0
        if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3193
0
            goto error;
3194
3195
0
        memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3196
0
        PyBuffer_Release(&wb);
3197
0
        return;
3198
3199
0
      error:
3200
0
        PyBuffer_Release(&wb);
3201
0
        Py_CLEAR(*pv);
3202
0
        return;
3203
0
    }
3204
3205
0
    else {
3206
        /* Multiple references, need to create new object */
3207
0
        PyObject *v;
3208
0
        v = bytes_concat(*pv, w);
3209
0
        Py_SETREF(*pv, v);
3210
0
    }
3211
0
}
3212
3213
void
3214
PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
3215
0
{
3216
0
    PyBytes_Concat(pv, w);
3217
0
    Py_XDECREF(w);
3218
0
}
3219
3220
3221
/* The following function breaks the notion that bytes are immutable:
3222
   it changes the size of a bytes object.  You can think of it
3223
   as creating a new bytes object and destroying the old one, only
3224
   more efficiently.
3225
   Note that if there's not enough memory to resize the bytes object, the
3226
   original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
3227
   memory" exception is set, and -1 is returned.  Else (on success) 0 is
3228
   returned, and the value in *pv may or may not be the same as on input.
3229
   As always, an extra byte is allocated for a trailing \0 byte (newsize
3230
   does *not* include that), and a trailing \0 byte is stored.
3231
*/
3232
3233
int
3234
_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3235
223k
{
3236
223k
    PyObject *v;
3237
223k
    PyBytesObject *sv;
3238
223k
    v = *pv;
3239
223k
    if (!PyBytes_Check(v) || newsize < 0) {
3240
0
        *pv = 0;
3241
0
        Py_DECREF(v);
3242
0
        PyErr_BadInternalCall();
3243
0
        return -1;
3244
0
    }
3245
223k
    Py_ssize_t oldsize = PyBytes_GET_SIZE(v);
3246
223k
    if (oldsize == newsize) {
3247
        /* return early if newsize equals to v->ob_size */
3248
0
        return 0;
3249
0
    }
3250
223k
    if (oldsize == 0) {
3251
0
        *pv = _PyBytes_FromSize(newsize, 0);
3252
0
        Py_DECREF(v);
3253
0
        return (*pv == NULL) ? -1 : 0;
3254
0
    }
3255
223k
    if (newsize == 0) {
3256
42.8k
        *pv = bytes_get_empty();
3257
42.8k
        Py_DECREF(v);
3258
42.8k
        return 0;
3259
42.8k
    }
3260
181k
    if (!_PyObject_IsUniquelyReferenced(v)) {
3261
0
        if (oldsize < newsize) {
3262
0
            *pv = _PyBytes_FromSize(newsize, 0);
3263
0
            if (*pv) {
3264
0
                memcpy(PyBytes_AS_STRING(*pv), PyBytes_AS_STRING(v), oldsize);
3265
0
            }
3266
0
        }
3267
0
        else {
3268
0
            *pv = PyBytes_FromStringAndSize(PyBytes_AS_STRING(v), newsize);
3269
0
        }
3270
0
        Py_DECREF(v);
3271
0
        return (*pv == NULL) ? -1 : 0;
3272
0
    }
3273
3274
#ifdef Py_TRACE_REFS
3275
    _Py_ForgetReference(v);
3276
#endif
3277
181k
    _PyReftracerTrack(v, PyRefTracer_DESTROY);
3278
181k
    *pv = (PyObject *)
3279
181k
        PyObject_Realloc(v, PyBytesObject_SIZE + newsize);
3280
181k
    if (*pv == NULL) {
3281
#ifdef Py_REF_DEBUG
3282
        _Py_DecRefTotal(_PyThreadState_GET());
3283
#endif
3284
0
        PyObject_Free(v);
3285
0
        PyErr_NoMemory();
3286
0
        return -1;
3287
0
    }
3288
181k
    _Py_NewReferenceNoTotal(*pv);
3289
181k
    sv = (PyBytesObject *) *pv;
3290
181k
    Py_SET_SIZE(sv, newsize);
3291
181k
    sv->ob_sval[newsize] = '\0';
3292
181k
    set_ob_shash(sv, -1);          /* invalidate cached hash value */
3293
181k
    return 0;
3294
181k
}
3295
3296
3297
/*********************** Bytes Iterator ****************************/
3298
3299
typedef struct {
3300
    PyObject_HEAD
3301
    Py_ssize_t it_index;
3302
    PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3303
} striterobject;
3304
3305
204
#define _striterobject_CAST(op)  ((striterobject *)(op))
3306
3307
static void
3308
striter_dealloc(PyObject *op)
3309
29
{
3310
29
    striterobject *it = _striterobject_CAST(op);
3311
29
    _PyObject_GC_UNTRACK(it);
3312
29
    Py_XDECREF(it->it_seq);
3313
29
    PyObject_GC_Del(it);
3314
29
}
3315
3316
static int
3317
striter_traverse(PyObject *op, visitproc visit, void *arg)
3318
0
{
3319
0
    striterobject *it = _striterobject_CAST(op);
3320
0
    Py_VISIT(it->it_seq);
3321
0
    return 0;
3322
0
}
3323
3324
static PyObject *
3325
striter_next(PyObject *op)
3326
175
{
3327
175
    striterobject *it = _striterobject_CAST(op);
3328
175
    PyBytesObject *seq;
3329
3330
175
    assert(it != NULL);
3331
175
    seq = it->it_seq;
3332
175
    if (seq == NULL)
3333
0
        return NULL;
3334
175
    assert(PyBytes_Check(seq));
3335
3336
175
    if (it->it_index < PyBytes_GET_SIZE(seq)) {
3337
168
        return _PyLong_FromUnsignedChar(
3338
168
            (unsigned char)seq->ob_sval[it->it_index++]);
3339
168
    }
3340
3341
7
    it->it_seq = NULL;
3342
7
    Py_DECREF(seq);
3343
7
    return NULL;
3344
175
}
3345
3346
static PyObject *
3347
striter_len(PyObject *op, PyObject *Py_UNUSED(ignored))
3348
0
{
3349
0
    striterobject *it = _striterobject_CAST(op);
3350
0
    Py_ssize_t len = 0;
3351
0
    if (it->it_seq)
3352
0
        len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3353
0
    return PyLong_FromSsize_t(len);
3354
0
}
3355
3356
PyDoc_STRVAR(length_hint_doc,
3357
             "Private method returning an estimate of len(list(it)).");
3358
3359
static PyObject *
3360
striter_reduce(PyObject *op, PyObject *Py_UNUSED(ignored))
3361
0
{
3362
0
    PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));
3363
3364
    /* _PyEval_GetBuiltin can invoke arbitrary code,
3365
     * call must be before access of iterator pointers.
3366
     * see issue #101765 */
3367
0
    striterobject *it = _striterobject_CAST(op);
3368
0
    if (it->it_seq != NULL) {
3369
0
        return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
3370
0
    } else {
3371
0
        return Py_BuildValue("N(())", iter);
3372
0
    }
3373
0
}
3374
3375
PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3376
3377
static PyObject *
3378
striter_setstate(PyObject *op, PyObject *state)
3379
0
{
3380
0
    Py_ssize_t index = PyLong_AsSsize_t(state);
3381
0
    if (index == -1 && PyErr_Occurred())
3382
0
        return NULL;
3383
0
    striterobject *it = _striterobject_CAST(op);
3384
0
    if (it->it_seq != NULL) {
3385
0
        if (index < 0)
3386
0
            index = 0;
3387
0
        else if (index > PyBytes_GET_SIZE(it->it_seq))
3388
0
            index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3389
0
        it->it_index = index;
3390
0
    }
3391
0
    Py_RETURN_NONE;
3392
0
}
3393
3394
PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3395
3396
static PyMethodDef striter_methods[] = {
3397
    {"__length_hint__", striter_len, METH_NOARGS, length_hint_doc},
3398
    {"__reduce__",      striter_reduce, METH_NOARGS, reduce_doc},
3399
    {"__setstate__",    striter_setstate, METH_O, setstate_doc},
3400
    {NULL,              NULL}           /* sentinel */
3401
};
3402
3403
PyTypeObject PyBytesIter_Type = {
3404
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3405
    "bytes_iterator",                           /* tp_name */
3406
    sizeof(striterobject),                      /* tp_basicsize */
3407
    0,                                          /* tp_itemsize */
3408
    /* methods */
3409
    striter_dealloc,                            /* tp_dealloc */
3410
    0,                                          /* tp_vectorcall_offset */
3411
    0,                                          /* tp_getattr */
3412
    0,                                          /* tp_setattr */
3413
    0,                                          /* tp_as_async */
3414
    0,                                          /* tp_repr */
3415
    0,                                          /* tp_as_number */
3416
    0,                                          /* tp_as_sequence */
3417
    0,                                          /* tp_as_mapping */
3418
    0,                                          /* tp_hash */
3419
    0,                                          /* tp_call */
3420
    0,                                          /* tp_str */
3421
    PyObject_GenericGetAttr,                    /* tp_getattro */
3422
    0,                                          /* tp_setattro */
3423
    0,                                          /* tp_as_buffer */
3424
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3425
    0,                                          /* tp_doc */
3426
    striter_traverse,                           /* tp_traverse */
3427
    0,                                          /* tp_clear */
3428
    0,                                          /* tp_richcompare */
3429
    0,                                          /* tp_weaklistoffset */
3430
    PyObject_SelfIter,                          /* tp_iter */
3431
    striter_next,                               /* tp_iternext */
3432
    striter_methods,                            /* tp_methods */
3433
    0,
3434
};
3435
3436
static PyObject *
3437
bytes_iter(PyObject *seq)
3438
29
{
3439
29
    striterobject *it;
3440
3441
29
    if (!PyBytes_Check(seq)) {
3442
0
        PyErr_BadInternalCall();
3443
0
        return NULL;
3444
0
    }
3445
29
    it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3446
29
    if (it == NULL)
3447
0
        return NULL;
3448
29
    it->it_index = 0;
3449
29
    it->it_seq = (PyBytesObject *)Py_NewRef(seq);
3450
29
    _PyObject_GC_TRACK(it);
3451
29
    return (PyObject *)it;
3452
29
}
3453
3454
3455
void
3456
_PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
3457
    const char* src, Py_ssize_t len_src)
3458
10.0k
{
3459
10.0k
    if (len_dest == 0) {
3460
4.26k
        return;
3461
4.26k
    }
3462
5.77k
    if (len_src == 1) {
3463
1.05k
        memset(dest, src[0], len_dest);
3464
1.05k
    }
3465
4.72k
    else {
3466
4.72k
        if (src != dest) {
3467
4.72k
            memcpy(dest, src, len_src);
3468
4.72k
        }
3469
4.72k
        Py_ssize_t copied = len_src;
3470
24.4k
        while (copied < len_dest) {
3471
19.7k
            Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied);
3472
19.7k
            memcpy(dest + copied, dest, bytes_to_copy);
3473
19.7k
            copied += bytes_to_copy;
3474
19.7k
        }
3475
4.72k
    }
3476
5.77k
}
3477
3478
3479
// --- PyBytesWriter API -----------------------------------------------------
3480
3481
static inline char*
3482
byteswriter_data(PyBytesWriter *writer)
3483
266k
{
3484
266k
    return _PyBytesWriter_GetData(writer);
3485
266k
}
3486
3487
3488
static inline Py_ssize_t
3489
byteswriter_allocated(PyBytesWriter *writer)
3490
241k
{
3491
241k
    if (writer->obj == NULL) {
3492
231k
        return sizeof(writer->small_buffer);
3493
231k
    }
3494
10.2k
    else if (writer->use_bytearray) {
3495
0
        return PyByteArray_GET_SIZE(writer->obj);
3496
0
    }
3497
10.2k
    else {
3498
10.2k
        return PyBytes_GET_SIZE(writer->obj);
3499
10.2k
    }
3500
241k
}
3501
3502
3503
#ifdef MS_WINDOWS
3504
   /* On Windows, overallocate by 50% is the best factor */
3505
#  define OVERALLOCATE_FACTOR 2
3506
#else
3507
   /* On Linux, overallocate by 25% is the best factor */
3508
582
#  define OVERALLOCATE_FACTOR 4
3509
#endif
3510
3511
static inline int
3512
byteswriter_resize(PyBytesWriter *writer, Py_ssize_t size, int resize)
3513
233k
{
3514
233k
    assert(size >= 0);
3515
3516
233k
    Py_ssize_t old_allocated = byteswriter_allocated(writer);
3517
233k
    if (size <= old_allocated) {
3518
229k
        return 0;
3519
229k
    }
3520
3521
4.36k
    if (resize & writer->overallocate) {
3522
291
        if (size <= (PY_SSIZE_T_MAX - size / OVERALLOCATE_FACTOR)) {
3523
291
            size += size / OVERALLOCATE_FACTOR;
3524
291
        }
3525
291
    }
3526
3527
4.36k
    if (writer->obj != NULL) {
3528
268
        if (writer->use_bytearray) {
3529
0
            if (PyByteArray_Resize(writer->obj, size)) {
3530
0
                return -1;
3531
0
            }
3532
0
        }
3533
268
        else {
3534
268
            if (_PyBytes_Resize(&writer->obj, size)) {
3535
0
                return -1;
3536
0
            }
3537
268
        }
3538
268
        assert(writer->obj != NULL);
3539
268
    }
3540
4.09k
    else if (writer->use_bytearray) {
3541
0
        writer->obj = PyByteArray_FromStringAndSize(NULL, size);
3542
0
        if (writer->obj == NULL) {
3543
0
            return -1;
3544
0
        }
3545
0
        if (resize) {
3546
0
            assert((size_t)size > sizeof(writer->small_buffer));
3547
0
            memcpy(PyByteArray_AS_STRING(writer->obj),
3548
0
                   writer->small_buffer,
3549
0
                   sizeof(writer->small_buffer));
3550
0
        }
3551
0
    }
3552
4.09k
    else {
3553
4.09k
        writer->obj = PyBytes_FromStringAndSize(NULL, size);
3554
4.09k
        if (writer->obj == NULL) {
3555
0
            return -1;
3556
0
        }
3557
4.09k
        if (resize) {
3558
23
            assert((size_t)size > sizeof(writer->small_buffer));
3559
23
            memcpy(PyBytes_AS_STRING(writer->obj),
3560
23
                   writer->small_buffer,
3561
23
                   sizeof(writer->small_buffer));
3562
23
        }
3563
4.09k
    }
3564
3565
#ifdef Py_DEBUG
3566
    Py_ssize_t allocated = byteswriter_allocated(writer);
3567
    if (resize && allocated > old_allocated) {
3568
        memset(byteswriter_data(writer) + old_allocated, 0xff,
3569
               allocated - old_allocated);
3570
    }
3571
#endif
3572
3573
4.36k
    return 0;
3574
4.36k
}
3575
3576
3577
static PyBytesWriter*
3578
byteswriter_create(Py_ssize_t size, int use_bytearray)
3579
234k
{
3580
234k
    if (size < 0) {
3581
0
        PyErr_SetString(PyExc_ValueError, "size must be >= 0");
3582
0
        return NULL;
3583
0
    }
3584
3585
234k
    PyBytesWriter *writer = _Py_FREELIST_POP_MEM(bytes_writers);
3586
234k
    if (writer == NULL) {
3587
22
        writer = (PyBytesWriter *)PyMem_Malloc(sizeof(PyBytesWriter));
3588
22
        if (writer == NULL) {
3589
0
            PyErr_NoMemory();
3590
0
            return NULL;
3591
0
        }
3592
22
    }
3593
234k
    writer->obj = NULL;
3594
234k
    writer->size = 0;
3595
234k
    writer->use_bytearray = use_bytearray;
3596
234k
    writer->overallocate = !use_bytearray;
3597
3598
234k
    if (size >= 1) {
3599
224k
        if (byteswriter_resize(writer, size, 0) < 0) {
3600
0
            PyBytesWriter_Discard(writer);
3601
0
            return NULL;
3602
0
        }
3603
224k
        writer->size = size;
3604
224k
    }
3605
#ifdef Py_DEBUG
3606
    memset(byteswriter_data(writer), 0xff, byteswriter_allocated(writer));
3607
#endif
3608
234k
    return writer;
3609
234k
}
3610
3611
PyBytesWriter*
3612
PyBytesWriter_Create(Py_ssize_t size)
3613
234k
{
3614
234k
    return byteswriter_create(size, 0);
3615
234k
}
3616
3617
PyBytesWriter*
3618
_PyBytesWriter_CreateByteArray(Py_ssize_t size)
3619
0
{
3620
0
    return byteswriter_create(size, 1);
3621
0
}
3622
3623
3624
void
3625
PyBytesWriter_Discard(PyBytesWriter *writer)
3626
234k
{
3627
234k
    if (writer == NULL) {
3628
36
        return;
3629
36
    }
3630
3631
234k
    Py_XDECREF(writer->obj);
3632
234k
    _Py_FREELIST_FREE(bytes_writers, writer, PyMem_Free);
3633
234k
}
3634
3635
3636
PyObject*
3637
PyBytesWriter_FinishWithSize(PyBytesWriter *writer, Py_ssize_t size)
3638
202k
{
3639
202k
    PyObject *result;
3640
202k
    if (size == 0) {
3641
9.79k
        result = bytes_get_empty();
3642
9.79k
    }
3643
192k
    else if (writer->obj != NULL) {
3644
2.97k
        if (writer->use_bytearray) {
3645
0
            if (size != PyByteArray_GET_SIZE(writer->obj)) {
3646
0
                if (PyByteArray_Resize(writer->obj, size)) {
3647
0
                    goto error;
3648
0
                }
3649
0
            }
3650
0
        }
3651
2.97k
        else {
3652
2.97k
            if (size != PyBytes_GET_SIZE(writer->obj)) {
3653
2.28k
                if (_PyBytes_Resize(&writer->obj, size)) {
3654
0
                    goto error;
3655
0
                }
3656
2.28k
            }
3657
2.97k
        }
3658
2.97k
        result = writer->obj;
3659
2.97k
        writer->obj = NULL;
3660
2.97k
    }
3661
189k
    else if (writer->use_bytearray) {
3662
0
        result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3663
0
    }
3664
189k
    else {
3665
189k
        result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3666
189k
    }
3667
202k
    PyBytesWriter_Discard(writer);
3668
202k
    return result;
3669
3670
0
error:
3671
0
    PyBytesWriter_Discard(writer);
3672
0
    return NULL;
3673
202k
}
3674
3675
PyObject*
3676
PyBytesWriter_Finish(PyBytesWriter *writer)
3677
193k
{
3678
193k
    return PyBytesWriter_FinishWithSize(writer, writer->size);
3679
193k
}
3680
3681
3682
PyObject*
3683
PyBytesWriter_FinishWithPointer(PyBytesWriter *writer, void *buf)
3684
8.36k
{
3685
8.36k
    Py_ssize_t size = (char*)buf - byteswriter_data(writer);
3686
8.36k
    if (size < 0 || size > byteswriter_allocated(writer)) {
3687
0
        PyBytesWriter_Discard(writer);
3688
0
        PyErr_SetString(PyExc_ValueError, "invalid end pointer");
3689
0
        return NULL;
3690
0
    }
3691
3692
8.36k
    return PyBytesWriter_FinishWithSize(writer, size);
3693
8.36k
}
3694
3695
3696
void*
3697
PyBytesWriter_GetData(PyBytesWriter *writer)
3698
240k
{
3699
240k
    return byteswriter_data(writer);
3700
240k
}
3701
3702
3703
Py_ssize_t
3704
PyBytesWriter_GetSize(PyBytesWriter *writer)
3705
0
{
3706
0
    return _PyBytesWriter_GetSize(writer);
3707
0
}
3708
3709
3710
static Py_ssize_t
3711
_PyBytesWriter_GetAllocated(PyBytesWriter *writer)
3712
0
{
3713
0
    return byteswriter_allocated(writer);
3714
0
}
3715
3716
3717
int
3718
PyBytesWriter_Resize(PyBytesWriter *writer, Py_ssize_t size)
3719
0
{
3720
0
    if (size < 0) {
3721
0
        PyErr_SetString(PyExc_ValueError, "size must be >= 0");
3722
0
        return -1;
3723
0
    }
3724
0
    if (byteswriter_resize(writer, size, 1) < 0) {
3725
0
        return -1;
3726
0
    }
3727
0
    writer->size = size;
3728
0
    return 0;
3729
0
}
3730
3731
3732
static void*
3733
_PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size,
3734
                                      void *data)
3735
0
{
3736
0
    Py_ssize_t pos = (char*)data - byteswriter_data(writer);
3737
0
    if (PyBytesWriter_Resize(writer, size) < 0) {
3738
0
        return NULL;
3739
0
    }
3740
0
    return byteswriter_data(writer) + pos;
3741
0
}
3742
3743
3744
int
3745
PyBytesWriter_Grow(PyBytesWriter *writer, Py_ssize_t size)
3746
8.48k
{
3747
8.48k
    if (size < 0 && writer->size + size < 0) {
3748
0
        PyErr_SetString(PyExc_ValueError, "invalid size");
3749
0
        return -1;
3750
0
    }
3751
8.48k
    if (size > PY_SSIZE_T_MAX - writer->size) {
3752
0
        PyErr_NoMemory();
3753
0
        return -1;
3754
0
    }
3755
8.48k
    size = writer->size + size;
3756
3757
8.48k
    if (byteswriter_resize(writer, size, 1) < 0) {
3758
0
        return -1;
3759
0
    }
3760
8.48k
    writer->size = size;
3761
8.48k
    return 0;
3762
8.48k
}
3763
3764
3765
void*
3766
PyBytesWriter_GrowAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size,
3767
                                   void *buf)
3768
8.48k
{
3769
8.48k
    Py_ssize_t pos = (char*)buf - byteswriter_data(writer);
3770
8.48k
    if (PyBytesWriter_Grow(writer, size) < 0) {
3771
0
        return NULL;
3772
0
    }
3773
8.48k
    return byteswriter_data(writer) + pos;
3774
8.48k
}
3775
3776
3777
int
3778
PyBytesWriter_WriteBytes(PyBytesWriter *writer,
3779
                         const void *bytes, Py_ssize_t size)
3780
0
{
3781
0
    if (size < 0) {
3782
0
        size_t len = strlen(bytes);
3783
0
        if (len > (size_t)PY_SSIZE_T_MAX) {
3784
0
            PyErr_NoMemory();
3785
0
            return -1;
3786
0
        }
3787
0
        size = (Py_ssize_t)len;
3788
0
    }
3789
3790
0
    Py_ssize_t pos = writer->size;
3791
0
    if (PyBytesWriter_Grow(writer, size) < 0) {
3792
0
        return -1;
3793
0
    }
3794
0
    char *buf = byteswriter_data(writer);
3795
0
    memcpy(buf + pos, bytes, size);
3796
0
    return 0;
3797
0
}
3798
3799
3800
int
3801
PyBytesWriter_Format(PyBytesWriter *writer, const char *format, ...)
3802
0
{
3803
0
    Py_ssize_t pos = writer->size;
3804
0
    if (PyBytesWriter_Grow(writer, strlen(format)) < 0) {
3805
0
        return -1;
3806
0
    }
3807
3808
0
    va_list vargs;
3809
0
    va_start(vargs, format);
3810
0
    char *buf = bytes_fromformat(writer, pos, format, vargs);
3811
0
    va_end(vargs);
3812
3813
0
    Py_ssize_t size = buf - byteswriter_data(writer);
3814
0
    return PyBytesWriter_Resize(writer, size);
3815
0
}