Coverage Report

Created: 2025-11-24 06:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/bytesobject.c
Line
Count
Source
1
/* bytes object implementation */
2
3
#include "Python.h"
4
#include "pycore_abstract.h"      // _PyIndex_Check()
5
#include "pycore_bytes_methods.h" // _Py_bytes_startswith()
6
#include "pycore_bytesobject.h"   // _PyBytes_Find(), _PyBytes_Repeat()
7
#include "pycore_call.h"          // _PyObject_CallNoArgs()
8
#include "pycore_ceval.h"         // _PyEval_GetBuiltin()
9
#include "pycore_format.h"        // F_LJUST
10
#include "pycore_freelist.h"      // _Py_FREELIST_FREE()
11
#include "pycore_global_objects.h"// _Py_GET_GLOBAL_OBJECT()
12
#include "pycore_initconfig.h"    // _PyStatus_OK()
13
#include "pycore_long.h"          // _PyLong_DigitValue
14
#include "pycore_object.h"        // _PyObject_GC_TRACK
15
#include "pycore_pymem.h"         // PYMEM_CLEANBYTE
16
#include "pycore_strhex.h"        // _Py_strhex_with_sep()
17
#include "pycore_unicodeobject.h" // _PyUnicode_FormatLong()
18
19
#include <stddef.h>
20
21
/*[clinic input]
22
class bytes "PyBytesObject *" "&PyBytes_Type"
23
[clinic start generated code]*/
24
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
25
26
#include "clinic/bytesobject.c.h"
27
28
100M
#define PyBytesObject_SIZE _PyBytesObject_SIZE
29
30
/* Forward declaration */
31
static void* _PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer,
32
                                                   Py_ssize_t size, void *data);
33
static Py_ssize_t _PyBytesWriter_GetAllocated(PyBytesWriter *writer);
34
35
36
7.92M
#define CHARACTERS _Py_SINGLETON(bytes_characters)
37
#define CHARACTER(ch) \
38
7.92M
     ((PyBytesObject *)&(CHARACTERS[ch]));
39
5.78M
#define EMPTY (&_Py_SINGLETON(bytes_empty))
40
41
42
// Return a reference to the immortal empty bytes string singleton.
43
static inline PyObject* bytes_get_empty(void)
44
5.78M
{
45
5.78M
    PyObject *empty = &EMPTY->ob_base.ob_base;
46
5.78M
    assert(_Py_IsImmortal(empty));
47
5.78M
    return empty;
48
5.78M
}
49
50
51
static inline void
52
set_ob_shash(PyBytesObject *a, Py_hash_t hash)
53
64.5M
{
54
64.5M
_Py_COMP_DIAG_PUSH
55
64.5M
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
56
#ifdef Py_GIL_DISABLED
57
    _Py_atomic_store_ssize_relaxed(&a->ob_shash, hash);
58
#else
59
64.5M
    a->ob_shash = hash;
60
64.5M
#endif
61
64.5M
_Py_COMP_DIAG_POP
62
64.5M
}
63
64
static inline Py_hash_t
65
get_ob_shash(PyBytesObject *a)
66
30.7M
{
67
30.7M
_Py_COMP_DIAG_PUSH
68
30.7M
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
69
#ifdef Py_GIL_DISABLED
70
    return _Py_atomic_load_ssize_relaxed(&a->ob_shash);
71
#else
72
30.7M
    return a->ob_shash;
73
30.7M
#endif
74
30.7M
_Py_COMP_DIAG_POP
75
30.7M
}
76
77
78
/*
79
   For PyBytes_FromString(), the parameter 'str' points to a null-terminated
80
   string containing exactly 'size' bytes.
81
82
   For PyBytes_FromStringAndSize(), the parameter 'str' is
83
   either NULL or else points to a string containing at least 'size' bytes.
84
   For PyBytes_FromStringAndSize(), the string in the 'str' parameter does
85
   not have to be null-terminated.  (Therefore it is safe to construct a
86
   substring by calling 'PyBytes_FromStringAndSize(origstring, substrlen)'.)
87
   If 'str' is NULL then PyBytes_FromStringAndSize() will allocate 'size+1'
88
   bytes (setting the last byte to the null terminating character) and you can
89
   fill in the data yourself.  If 'str' is non-NULL then the resulting
90
   PyBytes object must be treated as immutable and you must not fill in nor
91
   alter the data yourself, since the strings may be shared.
92
93
   The PyObject member 'op->ob_size', which denotes the number of "extra
94
   items" in a variable-size object, will contain the number of bytes
95
   allocated for string data, not counting the null terminating character.
96
   It is therefore equal to the 'size' parameter (for
97
   PyBytes_FromStringAndSize()) or the length of the string in the 'str'
98
   parameter (for PyBytes_FromString()).
99
*/
100
static PyObject *
101
_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
102
49.2M
{
103
49.2M
    PyBytesObject *op;
104
49.2M
    assert(size >= 0);
105
106
49.2M
    if (size == 0) {
107
0
        return bytes_get_empty();
108
0
    }
109
110
49.2M
    if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
111
0
        PyErr_SetString(PyExc_OverflowError,
112
0
                        "byte string is too large");
113
0
        return NULL;
114
0
    }
115
116
    /* Inline PyObject_NewVar */
117
49.2M
    if (use_calloc)
118
0
        op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
119
49.2M
    else
120
49.2M
        op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
121
49.2M
    if (op == NULL) {
122
0
        return PyErr_NoMemory();
123
0
    }
124
49.2M
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
125
49.2M
    set_ob_shash(op, -1);
126
49.2M
    if (!use_calloc) {
127
49.2M
        op->ob_sval[size] = '\0';
128
49.2M
    }
129
49.2M
    return (PyObject *) op;
130
49.2M
}
131
132
PyObject *
133
PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
134
61.3M
{
135
61.3M
    PyBytesObject *op;
136
61.3M
    if (size < 0) {
137
0
        PyErr_SetString(PyExc_SystemError,
138
0
            "Negative size passed to PyBytes_FromStringAndSize");
139
0
        return NULL;
140
0
    }
141
61.3M
    if (size == 1 && str != NULL) {
142
7.92M
        op = CHARACTER(*str & 255);
143
7.92M
        assert(_Py_IsImmortal(op));
144
7.92M
        return (PyObject *)op;
145
7.92M
    }
146
53.4M
    if (size == 0) {
147
5.74M
        return bytes_get_empty();
148
5.74M
    }
149
150
47.7M
    op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
151
47.7M
    if (op == NULL)
152
0
        return NULL;
153
47.7M
    if (str == NULL)
154
6.02M
        return (PyObject *) op;
155
156
41.6M
    memcpy(op->ob_sval, str, size);
157
41.6M
    return (PyObject *) op;
158
47.7M
}
159
160
PyObject *
161
PyBytes_FromString(const char *str)
162
696
{
163
696
    size_t size;
164
696
    PyBytesObject *op;
165
166
696
    assert(str != NULL);
167
696
    size = strlen(str);
168
696
    if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
169
0
        PyErr_SetString(PyExc_OverflowError,
170
0
            "byte string is too long");
171
0
        return NULL;
172
0
    }
173
174
696
    if (size == 0) {
175
0
        return bytes_get_empty();
176
0
    }
177
696
    else if (size == 1) {
178
0
        op = CHARACTER(*str & 255);
179
0
        assert(_Py_IsImmortal(op));
180
0
        return (PyObject *)op;
181
0
    }
182
183
    /* Inline PyObject_NewVar */
184
696
    op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
185
696
    if (op == NULL) {
186
0
        return PyErr_NoMemory();
187
0
    }
188
696
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
189
696
    set_ob_shash(op, -1);
190
696
    memcpy(op->ob_sval, str, size+1);
191
696
    return (PyObject *) op;
192
696
}
193
194
195
static char*
196
bytes_fromformat(PyBytesWriter *writer, Py_ssize_t writer_pos,
197
                 const char *format, va_list vargs)
198
0
{
199
0
    const char *f;
200
0
    const char *p;
201
0
    Py_ssize_t prec;
202
0
    int longflag;
203
0
    int size_tflag;
204
    /* Longest 64-bit formatted numbers:
205
       - "18446744073709551615\0" (21 bytes)
206
       - "-9223372036854775808\0" (21 bytes)
207
       Decimal takes the most space (it isn't enough for octal.)
208
209
       Longest 64-bit pointer representation:
210
       "0xffffffffffffffff\0" (19 bytes). */
211
0
    char buffer[21];
212
213
0
    char *s = (char*)PyBytesWriter_GetData(writer) + writer_pos;
214
215
0
#define WRITE_BYTES_LEN(str, len_expr) \
216
0
    do { \
217
0
        size_t len = (len_expr); \
218
0
        s = PyBytesWriter_GrowAndUpdatePointer(writer, len, s); \
219
0
        if (s == NULL) { \
220
0
            goto error; \
221
0
        } \
222
0
        memcpy(s, (str), len); \
223
0
        s += len; \
224
0
    } while (0)
225
0
#define WRITE_BYTES(str) WRITE_BYTES_LEN(str, strlen(str))
226
227
0
    for (f = format; *f; f++) {
228
0
        if (*f != '%') {
229
0
            *s++ = *f;
230
0
            continue;
231
0
        }
232
233
0
        p = f++;
234
235
        /* ignore the width (ex: 10 in "%10s") */
236
0
        while (Py_ISDIGIT(*f))
237
0
            f++;
238
239
        /* parse the precision (ex: 10 in "%.10s") */
240
0
        prec = 0;
241
0
        if (*f == '.') {
242
0
            f++;
243
0
            for (; Py_ISDIGIT(*f); f++) {
244
0
                prec = (prec * 10) + (*f - '0');
245
0
            }
246
0
        }
247
248
0
        while (*f && *f != '%' && !Py_ISALPHA(*f))
249
0
            f++;
250
251
        /* handle the long flag ('l'), but only for %ld and %lu.
252
           others can be added when necessary. */
253
0
        longflag = 0;
254
0
        if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
255
0
            longflag = 1;
256
0
            ++f;
257
0
        }
258
259
        /* handle the size_t flag ('z'). */
260
0
        size_tflag = 0;
261
0
        if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
262
0
            size_tflag = 1;
263
0
            ++f;
264
0
        }
265
266
0
        switch (*f) {
267
0
        case 'c':
268
0
        {
269
0
            int c = va_arg(vargs, int);
270
0
            if (c < 0 || c > 255) {
271
0
                PyErr_SetString(PyExc_OverflowError,
272
0
                                "PyBytes_FromFormatV(): %c format "
273
0
                                "expects an integer in range [0; 255]");
274
0
                goto error;
275
0
            }
276
0
            *s++ = (unsigned char)c;
277
0
            break;
278
0
        }
279
280
0
        case 'd':
281
0
            if (longflag) {
282
0
                sprintf(buffer, "%ld", va_arg(vargs, long));
283
0
            }
284
0
            else if (size_tflag) {
285
0
                sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
286
0
            }
287
0
            else {
288
0
                sprintf(buffer, "%d", va_arg(vargs, int));
289
0
            }
290
0
            assert(strlen(buffer) < sizeof(buffer));
291
0
            WRITE_BYTES(buffer);
292
0
            break;
293
294
0
        case 'u':
295
0
            if (longflag) {
296
0
                sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
297
0
            }
298
0
            else if (size_tflag) {
299
0
                sprintf(buffer, "%zu", va_arg(vargs, size_t));
300
0
            }
301
0
            else {
302
0
                sprintf(buffer, "%u", va_arg(vargs, unsigned int));
303
0
            }
304
0
            assert(strlen(buffer) < sizeof(buffer));
305
0
            WRITE_BYTES(buffer);
306
0
            break;
307
308
0
        case 'i':
309
0
            sprintf(buffer, "%i", va_arg(vargs, int));
310
0
            assert(strlen(buffer) < sizeof(buffer));
311
0
            WRITE_BYTES(buffer);
312
0
            break;
313
314
0
        case 'x':
315
0
            sprintf(buffer, "%x", va_arg(vargs, int));
316
0
            assert(strlen(buffer) < sizeof(buffer));
317
0
            WRITE_BYTES(buffer);
318
0
            break;
319
320
0
        case 's':
321
0
        {
322
0
            Py_ssize_t i;
323
324
0
            p = va_arg(vargs, const char*);
325
0
            if (prec <= 0) {
326
0
                i = strlen(p);
327
0
            }
328
0
            else {
329
0
                i = 0;
330
0
                while (i < prec && p[i]) {
331
0
                    i++;
332
0
                }
333
0
            }
334
0
            WRITE_BYTES_LEN(p, i);
335
0
            break;
336
0
        }
337
338
0
        case 'p':
339
0
            sprintf(buffer, "%p", va_arg(vargs, void*));
340
0
            assert(strlen(buffer) < sizeof(buffer));
341
            /* %p is ill-defined:  ensure leading 0x. */
342
0
            if (buffer[1] == 'X')
343
0
                buffer[1] = 'x';
344
0
            else if (buffer[1] != 'x') {
345
0
                memmove(buffer+2, buffer, strlen(buffer)+1);
346
0
                buffer[0] = '0';
347
0
                buffer[1] = 'x';
348
0
            }
349
0
            WRITE_BYTES(buffer);
350
0
            break;
351
352
0
        case '%':
353
0
            *s++ = '%';
354
0
            break;
355
356
0
        default:
357
            /* invalid format string: copy unformatted string and exit */
358
0
            WRITE_BYTES(p);
359
0
            return s;
360
0
        }
361
0
    }
362
363
0
#undef WRITE_BYTES
364
0
#undef WRITE_BYTES_LEN
365
366
0
    return s;
367
368
0
 error:
369
0
    return NULL;
370
0
}
371
372
373
PyObject *
374
PyBytes_FromFormatV(const char *format, va_list vargs)
375
0
{
376
0
    Py_ssize_t alloc = strlen(format);
377
0
    PyBytesWriter *writer = PyBytesWriter_Create(alloc);
378
0
    if (writer == NULL) {
379
0
        return NULL;
380
0
    }
381
382
0
    char *s = bytes_fromformat(writer, 0, format, vargs);
383
0
    if (s == NULL) {
384
0
        PyBytesWriter_Discard(writer);
385
0
        return NULL;
386
0
    }
387
388
0
    return PyBytesWriter_FinishWithPointer(writer, s);
389
0
}
390
391
392
PyObject *
393
PyBytes_FromFormat(const char *format, ...)
394
0
{
395
0
    PyObject* ret;
396
0
    va_list vargs;
397
398
0
    va_start(vargs, format);
399
0
    ret = PyBytes_FromFormatV(format, vargs);
400
0
    va_end(vargs);
401
0
    return ret;
402
0
}
403
404
405
/* Helpers for formatstring */
406
407
Py_LOCAL_INLINE(PyObject *)
408
getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
409
0
{
410
0
    Py_ssize_t argidx = *p_argidx;
411
0
    if (argidx < arglen) {
412
0
        (*p_argidx)++;
413
0
        if (arglen < 0)
414
0
            return args;
415
0
        else
416
0
            return PyTuple_GetItem(args, argidx);
417
0
    }
418
0
    PyErr_SetString(PyExc_TypeError,
419
0
                    "not enough arguments for format string");
420
0
    return NULL;
421
0
}
422
423
/* Returns a new reference to a PyBytes object, or NULL on failure. */
424
425
static char*
426
formatfloat(PyObject *v, int flags, int prec, int type,
427
            PyObject **p_result, PyBytesWriter *writer, char *str)
428
0
{
429
0
    char *p;
430
0
    PyObject *result;
431
0
    double x;
432
0
    size_t len;
433
0
    int dtoa_flags = 0;
434
435
0
    x = PyFloat_AsDouble(v);
436
0
    if (x == -1.0 && PyErr_Occurred()) {
437
0
        PyErr_Format(PyExc_TypeError, "float argument required, "
438
0
                     "not %.200s", Py_TYPE(v)->tp_name);
439
0
        return NULL;
440
0
    }
441
442
0
    if (prec < 0)
443
0
        prec = 6;
444
445
0
    if (flags & F_ALT) {
446
0
        dtoa_flags |= Py_DTSF_ALT;
447
0
    }
448
0
    p = PyOS_double_to_string(x, type, prec, dtoa_flags, NULL);
449
450
0
    if (p == NULL)
451
0
        return NULL;
452
453
0
    len = strlen(p);
454
0
    if (writer != NULL) {
455
0
        str = PyBytesWriter_GrowAndUpdatePointer(writer, len, str);
456
0
        if (str == NULL) {
457
0
            PyMem_Free(p);
458
0
            return NULL;
459
0
        }
460
0
        memcpy(str, p, len);
461
0
        PyMem_Free(p);
462
0
        str += len;
463
0
        return str;
464
0
    }
465
466
0
    result = PyBytes_FromStringAndSize(p, len);
467
0
    PyMem_Free(p);
468
0
    *p_result = result;
469
0
    return result != NULL ? str : NULL;
470
0
}
471
472
static PyObject *
473
formatlong(PyObject *v, int flags, int prec, int type)
474
0
{
475
0
    PyObject *result, *iobj;
476
0
    if (PyLong_Check(v))
477
0
        return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
478
0
    if (PyNumber_Check(v)) {
479
        /* make sure number is a type of integer for o, x, and X */
480
0
        if (type == 'o' || type == 'x' || type == 'X')
481
0
            iobj = _PyNumber_Index(v);
482
0
        else
483
0
            iobj = PyNumber_Long(v);
484
0
        if (iobj != NULL) {
485
0
            assert(PyLong_Check(iobj));
486
0
            result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
487
0
            Py_DECREF(iobj);
488
0
            return result;
489
0
        }
490
0
        if (!PyErr_ExceptionMatches(PyExc_TypeError))
491
0
            return NULL;
492
0
    }
493
0
    PyErr_Format(PyExc_TypeError,
494
0
        "%%%c format: %s is required, not %.200s", type,
495
0
        (type == 'o' || type == 'x' || type == 'X') ? "an integer"
496
0
                                                    : "a real number",
497
0
        Py_TYPE(v)->tp_name);
498
0
    return NULL;
499
0
}
500
501
static int
502
byte_converter(PyObject *arg, char *p)
503
0
{
504
0
    if (PyBytes_Check(arg)) {
505
0
        if (PyBytes_GET_SIZE(arg) != 1) {
506
0
            PyErr_Format(PyExc_TypeError,
507
0
                         "%%c requires an integer in range(256) or "
508
0
                         "a single byte, not a bytes object of length %zd",
509
0
                         PyBytes_GET_SIZE(arg));
510
0
            return 0;
511
0
        }
512
0
        *p = PyBytes_AS_STRING(arg)[0];
513
0
        return 1;
514
0
    }
515
0
    else if (PyByteArray_Check(arg)) {
516
0
        if (PyByteArray_GET_SIZE(arg) != 1) {
517
0
            PyErr_Format(PyExc_TypeError,
518
0
                         "%%c requires an integer in range(256) or "
519
0
                         "a single byte, not a bytearray object of length %zd",
520
0
                         PyByteArray_GET_SIZE(arg));
521
0
            return 0;
522
0
        }
523
0
        *p = PyByteArray_AS_STRING(arg)[0];
524
0
        return 1;
525
0
    }
526
0
    else if (PyIndex_Check(arg)) {
527
0
        int overflow;
528
0
        long ival = PyLong_AsLongAndOverflow(arg, &overflow);
529
0
        if (ival == -1 && PyErr_Occurred()) {
530
0
            return 0;
531
0
        }
532
0
        if (!(0 <= ival && ival <= 255)) {
533
            /* this includes an overflow in converting to C long */
534
0
            PyErr_SetString(PyExc_OverflowError,
535
0
                            "%c arg not in range(256)");
536
0
            return 0;
537
0
        }
538
0
        *p = (char)ival;
539
0
        return 1;
540
0
    }
541
0
    PyErr_Format(PyExc_TypeError,
542
0
        "%%c requires an integer in range(256) or a single byte, not %T",
543
0
        arg);
544
0
    return 0;
545
0
}
546
547
static PyObject *_PyBytes_FromBuffer(PyObject *x);
548
549
static PyObject *
550
format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
551
0
{
552
0
    PyObject *func, *result;
553
    /* is it a bytes object? */
554
0
    if (PyBytes_Check(v)) {
555
0
        *pbuf = PyBytes_AS_STRING(v);
556
0
        *plen = PyBytes_GET_SIZE(v);
557
0
        return Py_NewRef(v);
558
0
    }
559
0
    if (PyByteArray_Check(v)) {
560
0
        *pbuf = PyByteArray_AS_STRING(v);
561
0
        *plen = PyByteArray_GET_SIZE(v);
562
0
        return Py_NewRef(v);
563
0
    }
564
    /* does it support __bytes__? */
565
0
    func = _PyObject_LookupSpecial(v, &_Py_ID(__bytes__));
566
0
    if (func != NULL) {
567
0
        result = _PyObject_CallNoArgs(func);
568
0
        Py_DECREF(func);
569
0
        if (result == NULL)
570
0
            return NULL;
571
0
        if (!PyBytes_Check(result)) {
572
0
            PyErr_Format(PyExc_TypeError,
573
0
                         "%T.__bytes__() must return a bytes, not %T",
574
0
                         v, result);
575
0
            Py_DECREF(result);
576
0
            return NULL;
577
0
        }
578
0
        *pbuf = PyBytes_AS_STRING(result);
579
0
        *plen = PyBytes_GET_SIZE(result);
580
0
        return result;
581
0
    }
582
    /* does it support buffer protocol? */
583
0
    if (PyObject_CheckBuffer(v)) {
584
        /* maybe we can avoid making a copy of the buffer object here? */
585
0
        result = _PyBytes_FromBuffer(v);
586
0
        if (result == NULL)
587
0
            return NULL;
588
0
        *pbuf = PyBytes_AS_STRING(result);
589
0
        *plen = PyBytes_GET_SIZE(result);
590
0
        return result;
591
0
    }
592
0
    PyErr_Format(PyExc_TypeError,
593
0
                 "%%b requires a bytes-like object, "
594
0
                 "or an object that implements __bytes__, not '%.100s'",
595
0
                 Py_TYPE(v)->tp_name);
596
0
    return NULL;
597
0
}
598
599
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
600
601
PyObject *
602
_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
603
                  PyObject *args, int use_bytearray)
604
0
{
605
0
    const char *fmt;
606
0
    Py_ssize_t arglen, argidx;
607
0
    Py_ssize_t fmtcnt;
608
0
    int args_owned = 0;
609
0
    PyObject *dict = NULL;
610
611
0
    if (args == NULL) {
612
0
        PyErr_BadInternalCall();
613
0
        return NULL;
614
0
    }
615
0
    fmt = format;
616
0
    fmtcnt = format_len;
617
618
0
    PyBytesWriter *writer;
619
0
    if (use_bytearray) {
620
0
        writer = _PyBytesWriter_CreateByteArray(fmtcnt);
621
0
    }
622
0
    else {
623
0
        writer = PyBytesWriter_Create(fmtcnt);
624
0
    }
625
0
    if (writer == NULL) {
626
0
        return NULL;
627
0
    }
628
0
    char *res = PyBytesWriter_GetData(writer);
629
630
0
    if (PyTuple_Check(args)) {
631
0
        arglen = PyTuple_GET_SIZE(args);
632
0
        argidx = 0;
633
0
    }
634
0
    else {
635
0
        arglen = -1;
636
0
        argidx = -2;
637
0
    }
638
0
    if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
639
0
        !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
640
0
        !PyByteArray_Check(args)) {
641
0
            dict = args;
642
0
    }
643
644
0
    while (--fmtcnt >= 0) {
645
0
        if (*fmt != '%') {
646
0
            Py_ssize_t len;
647
0
            char *pos;
648
649
0
            pos = (char *)memchr(fmt + 1, '%', fmtcnt);
650
0
            if (pos != NULL)
651
0
                len = pos - fmt;
652
0
            else
653
0
                len = fmtcnt + 1;
654
0
            assert(len != 0);
655
656
0
            memcpy(res, fmt, len);
657
0
            res += len;
658
0
            fmt += len;
659
0
            fmtcnt -= (len - 1);
660
0
        }
661
0
        else {
662
            /* Got a format specifier */
663
0
            int flags = 0;
664
0
            Py_ssize_t width = -1;
665
0
            int prec = -1;
666
0
            int c = '\0';
667
0
            int fill;
668
0
            PyObject *v = NULL;
669
0
            PyObject *temp = NULL;
670
0
            const char *pbuf = NULL;
671
0
            int sign;
672
0
            Py_ssize_t len = 0;
673
0
            char onechar; /* For byte_converter() */
674
0
            Py_ssize_t alloc;
675
676
0
            fmt++;
677
0
            if (*fmt == '%') {
678
0
                *res++ = '%';
679
0
                fmt++;
680
0
                fmtcnt--;
681
0
                continue;
682
0
            }
683
0
            if (*fmt == '(') {
684
0
                const char *keystart;
685
0
                Py_ssize_t keylen;
686
0
                PyObject *key;
687
0
                int pcount = 1;
688
689
0
                if (dict == NULL) {
690
0
                    PyErr_SetString(PyExc_TypeError,
691
0
                             "format requires a mapping");
692
0
                    goto error;
693
0
                }
694
0
                ++fmt;
695
0
                --fmtcnt;
696
0
                keystart = fmt;
697
                /* Skip over balanced parentheses */
698
0
                while (pcount > 0 && --fmtcnt >= 0) {
699
0
                    if (*fmt == ')')
700
0
                        --pcount;
701
0
                    else if (*fmt == '(')
702
0
                        ++pcount;
703
0
                    fmt++;
704
0
                }
705
0
                keylen = fmt - keystart - 1;
706
0
                if (fmtcnt < 0 || pcount > 0) {
707
0
                    PyErr_SetString(PyExc_ValueError,
708
0
                               "incomplete format key");
709
0
                    goto error;
710
0
                }
711
0
                key = PyBytes_FromStringAndSize(keystart,
712
0
                                                 keylen);
713
0
                if (key == NULL)
714
0
                    goto error;
715
0
                if (args_owned) {
716
0
                    Py_DECREF(args);
717
0
                    args_owned = 0;
718
0
                }
719
0
                args = PyObject_GetItem(dict, key);
720
0
                Py_DECREF(key);
721
0
                if (args == NULL) {
722
0
                    goto error;
723
0
                }
724
0
                args_owned = 1;
725
0
                arglen = -1;
726
0
                argidx = -2;
727
0
            }
728
729
            /* Parse flags. Example: "%+i" => flags=F_SIGN. */
730
0
            while (--fmtcnt >= 0) {
731
0
                switch (c = *fmt++) {
732
0
                case '-': flags |= F_LJUST; continue;
733
0
                case '+': flags |= F_SIGN; continue;
734
0
                case ' ': flags |= F_BLANK; continue;
735
0
                case '#': flags |= F_ALT; continue;
736
0
                case '0': flags |= F_ZERO; continue;
737
0
                }
738
0
                break;
739
0
            }
740
741
            /* Parse width. Example: "%10s" => width=10 */
742
0
            if (c == '*') {
743
0
                v = getnextarg(args, arglen, &argidx);
744
0
                if (v == NULL)
745
0
                    goto error;
746
0
                if (!PyLong_Check(v)) {
747
0
                    PyErr_SetString(PyExc_TypeError,
748
0
                                    "* wants int");
749
0
                    goto error;
750
0
                }
751
0
                width = PyLong_AsSsize_t(v);
752
0
                if (width == -1 && PyErr_Occurred())
753
0
                    goto error;
754
0
                if (width < 0) {
755
0
                    flags |= F_LJUST;
756
0
                    width = -width;
757
0
                }
758
0
                if (--fmtcnt >= 0)
759
0
                    c = *fmt++;
760
0
            }
761
0
            else if (c >= 0 && Py_ISDIGIT(c)) {
762
0
                width = c - '0';
763
0
                while (--fmtcnt >= 0) {
764
0
                    c = Py_CHARMASK(*fmt++);
765
0
                    if (!Py_ISDIGIT(c))
766
0
                        break;
767
0
                    if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
768
0
                        PyErr_SetString(
769
0
                            PyExc_ValueError,
770
0
                            "width too big");
771
0
                        goto error;
772
0
                    }
773
0
                    width = width*10 + (c - '0');
774
0
                }
775
0
            }
776
777
            /* Parse precision. Example: "%.3f" => prec=3 */
778
0
            if (c == '.') {
779
0
                prec = 0;
780
0
                if (--fmtcnt >= 0)
781
0
                    c = *fmt++;
782
0
                if (c == '*') {
783
0
                    v = getnextarg(args, arglen, &argidx);
784
0
                    if (v == NULL)
785
0
                        goto error;
786
0
                    if (!PyLong_Check(v)) {
787
0
                        PyErr_SetString(
788
0
                            PyExc_TypeError,
789
0
                            "* wants int");
790
0
                        goto error;
791
0
                    }
792
0
                    prec = PyLong_AsInt(v);
793
0
                    if (prec == -1 && PyErr_Occurred())
794
0
                        goto error;
795
0
                    if (prec < 0)
796
0
                        prec = 0;
797
0
                    if (--fmtcnt >= 0)
798
0
                        c = *fmt++;
799
0
                }
800
0
                else if (c >= 0 && Py_ISDIGIT(c)) {
801
0
                    prec = c - '0';
802
0
                    while (--fmtcnt >= 0) {
803
0
                        c = Py_CHARMASK(*fmt++);
804
0
                        if (!Py_ISDIGIT(c))
805
0
                            break;
806
0
                        if (prec > (INT_MAX - ((int)c - '0')) / 10) {
807
0
                            PyErr_SetString(
808
0
                                PyExc_ValueError,
809
0
                                "prec too big");
810
0
                            goto error;
811
0
                        }
812
0
                        prec = prec*10 + (c - '0');
813
0
                    }
814
0
                }
815
0
            } /* prec */
816
0
            if (fmtcnt >= 0) {
817
0
                if (c == 'h' || c == 'l' || c == 'L') {
818
0
                    if (--fmtcnt >= 0)
819
0
                        c = *fmt++;
820
0
                }
821
0
            }
822
0
            if (fmtcnt < 0) {
823
0
                PyErr_SetString(PyExc_ValueError,
824
0
                                "incomplete format");
825
0
                goto error;
826
0
            }
827
0
            v = getnextarg(args, arglen, &argidx);
828
0
            if (v == NULL)
829
0
                goto error;
830
831
0
            if (fmtcnt == 0) {
832
                /* last write: disable writer overallocation */
833
0
                writer->overallocate = 0;
834
0
            }
835
836
0
            sign = 0;
837
0
            fill = ' ';
838
0
            switch (c) {
839
0
            case 'r':
840
                // %r is only for 2/3 code; 3 only code should use %a
841
0
            case 'a':
842
0
                temp = PyObject_ASCII(v);
843
0
                if (temp == NULL)
844
0
                    goto error;
845
0
                assert(PyUnicode_IS_ASCII(temp));
846
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
847
0
                len = PyUnicode_GET_LENGTH(temp);
848
0
                if (prec >= 0 && len > prec)
849
0
                    len = prec;
850
0
                break;
851
852
0
            case 's':
853
                // %s is only for 2/3 code; 3 only code should use %b
854
0
            case 'b':
855
0
                temp = format_obj(v, &pbuf, &len);
856
0
                if (temp == NULL)
857
0
                    goto error;
858
0
                if (prec >= 0 && len > prec)
859
0
                    len = prec;
860
0
                break;
861
862
0
            case 'i':
863
0
            case 'd':
864
0
            case 'u':
865
0
            case 'o':
866
0
            case 'x':
867
0
            case 'X':
868
0
                if (PyLong_CheckExact(v)
869
0
                    && width == -1 && prec == -1
870
0
                    && !(flags & (F_SIGN | F_BLANK))
871
0
                    && c != 'X')
872
0
                {
873
                    /* Fast path */
874
0
                    int alternate = flags & F_ALT;
875
0
                    int base;
876
877
0
                    switch(c)
878
0
                    {
879
0
                        default:
880
0
                            Py_UNREACHABLE();
881
0
                        case 'd':
882
0
                        case 'i':
883
0
                        case 'u':
884
0
                            base = 10;
885
0
                            break;
886
0
                        case 'o':
887
0
                            base = 8;
888
0
                            break;
889
0
                        case 'x':
890
0
                        case 'X':
891
0
                            base = 16;
892
0
                            break;
893
0
                    }
894
895
                    /* Fast path */
896
0
                    res = _PyLong_FormatBytesWriter(writer, res,
897
0
                                                    v, base, alternate);
898
0
                    if (res == NULL)
899
0
                        goto error;
900
0
                    continue;
901
0
                }
902
903
0
                temp = formatlong(v, flags, prec, c);
904
0
                if (!temp)
905
0
                    goto error;
906
0
                assert(PyUnicode_IS_ASCII(temp));
907
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
908
0
                len = PyUnicode_GET_LENGTH(temp);
909
0
                sign = 1;
910
0
                if (flags & F_ZERO)
911
0
                    fill = '0';
912
0
                break;
913
914
0
            case 'e':
915
0
            case 'E':
916
0
            case 'f':
917
0
            case 'F':
918
0
            case 'g':
919
0
            case 'G':
920
0
                if (width == -1 && prec == -1
921
0
                    && !(flags & (F_SIGN | F_BLANK)))
922
0
                {
923
                    /* Fast path */
924
0
                    res = formatfloat(v, flags, prec, c, NULL, writer, res);
925
0
                    if (res == NULL)
926
0
                        goto error;
927
0
                    continue;
928
0
                }
929
930
0
                if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
931
0
                    goto error;
932
0
                pbuf = PyBytes_AS_STRING(temp);
933
0
                len = PyBytes_GET_SIZE(temp);
934
0
                sign = 1;
935
0
                if (flags & F_ZERO)
936
0
                    fill = '0';
937
0
                break;
938
939
0
            case 'c':
940
0
                pbuf = &onechar;
941
0
                len = byte_converter(v, &onechar);
942
0
                if (!len)
943
0
                    goto error;
944
0
                if (width == -1) {
945
                    /* Fast path */
946
0
                    *res++ = onechar;
947
0
                    continue;
948
0
                }
949
0
                break;
950
951
0
            default:
952
0
                PyErr_Format(PyExc_ValueError,
953
0
                  "unsupported format character '%c' (0x%x) "
954
0
                  "at index %zd",
955
0
                  c, c,
956
0
                  (Py_ssize_t)(fmt - 1 - format));
957
0
                goto error;
958
0
            }
959
960
0
            if (sign) {
961
0
                if (*pbuf == '-' || *pbuf == '+') {
962
0
                    sign = *pbuf++;
963
0
                    len--;
964
0
                }
965
0
                else if (flags & F_SIGN)
966
0
                    sign = '+';
967
0
                else if (flags & F_BLANK)
968
0
                    sign = ' ';
969
0
                else
970
0
                    sign = 0;
971
0
            }
972
0
            if (width < len)
973
0
                width = len;
974
975
0
            alloc = width;
976
0
            if (sign != 0 && len == width)
977
0
                alloc++;
978
            /* 2: size preallocated for %s */
979
0
            if (alloc > 2) {
980
0
                res = PyBytesWriter_GrowAndUpdatePointer(writer, alloc - 2, res);
981
0
                if (res == NULL) {
982
0
                    Py_XDECREF(temp);
983
0
                    goto error;
984
0
                }
985
0
            }
986
#ifndef NDEBUG
987
            char *before = res;
988
#endif
989
990
            /* Write the sign if needed */
991
0
            if (sign) {
992
0
                if (fill != ' ')
993
0
                    *res++ = sign;
994
0
                if (width > len)
995
0
                    width--;
996
0
            }
997
998
            /* Write the numeric prefix for "x", "X" and "o" formats
999
               if the alternate form is used.
1000
               For example, write "0x" for the "%#x" format. */
1001
0
            if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1002
0
                assert(pbuf[0] == '0');
1003
0
                assert(pbuf[1] == c);
1004
0
                if (fill != ' ') {
1005
0
                    *res++ = *pbuf++;
1006
0
                    *res++ = *pbuf++;
1007
0
                }
1008
0
                width -= 2;
1009
0
                if (width < 0)
1010
0
                    width = 0;
1011
0
                len -= 2;
1012
0
            }
1013
1014
            /* Pad left with the fill character if needed */
1015
0
            if (width > len && !(flags & F_LJUST)) {
1016
0
                memset(res, fill, width - len);
1017
0
                res += (width - len);
1018
0
                width = len;
1019
0
            }
1020
1021
            /* If padding with spaces: write sign if needed and/or numeric
1022
               prefix if the alternate form is used */
1023
0
            if (fill == ' ') {
1024
0
                if (sign)
1025
0
                    *res++ = sign;
1026
0
                if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1027
0
                    assert(pbuf[0] == '0');
1028
0
                    assert(pbuf[1] == c);
1029
0
                    *res++ = *pbuf++;
1030
0
                    *res++ = *pbuf++;
1031
0
                }
1032
0
            }
1033
1034
            /* Copy bytes */
1035
0
            memcpy(res, pbuf, len);
1036
0
            res += len;
1037
1038
            /* Pad right with the fill character if needed */
1039
0
            if (width > len) {
1040
0
                memset(res, ' ', width - len);
1041
0
                res += (width - len);
1042
0
            }
1043
1044
0
            if (dict && (argidx < arglen)) {
1045
0
                PyErr_SetString(PyExc_TypeError,
1046
0
                           "not all arguments converted during bytes formatting");
1047
0
                Py_XDECREF(temp);
1048
0
                goto error;
1049
0
            }
1050
0
            Py_XDECREF(temp);
1051
1052
#ifndef NDEBUG
1053
            /* check that we computed the exact size for this write */
1054
            assert((res - before) == alloc);
1055
#endif
1056
0
        } /* '%' */
1057
1058
        /* If overallocation was disabled, ensure that it was the last
1059
           write. Otherwise, we missed an optimization */
1060
0
        assert(writer->overallocate || fmtcnt == 0 || use_bytearray);
1061
0
    } /* until end */
1062
1063
0
    if (argidx < arglen && !dict) {
1064
0
        PyErr_SetString(PyExc_TypeError,
1065
0
                        "not all arguments converted during bytes formatting");
1066
0
        goto error;
1067
0
    }
1068
1069
0
    if (args_owned) {
1070
0
        Py_DECREF(args);
1071
0
    }
1072
0
    return PyBytesWriter_FinishWithPointer(writer, res);
1073
1074
0
 error:
1075
0
    PyBytesWriter_Discard(writer);
1076
0
    if (args_owned) {
1077
0
        Py_DECREF(args);
1078
0
    }
1079
0
    return NULL;
1080
0
}
1081
1082
/* Unescape a backslash-escaped string. */
1083
PyObject *_PyBytes_DecodeEscape2(const char *s,
1084
                                Py_ssize_t len,
1085
                                const char *errors,
1086
                                int *first_invalid_escape_char,
1087
                                const char **first_invalid_escape_ptr)
1088
2.41k
{
1089
2.41k
    PyBytesWriter *writer = PyBytesWriter_Create(len);
1090
2.41k
    if (writer == NULL) {
1091
0
        return NULL;
1092
0
    }
1093
2.41k
    char *p = PyBytesWriter_GetData(writer);
1094
1095
2.41k
    *first_invalid_escape_char = -1;
1096
2.41k
    *first_invalid_escape_ptr = NULL;
1097
1098
2.41k
    const char *end = s + len;
1099
63.8k
    while (s < end) {
1100
61.4k
        if (*s != '\\') {
1101
50.4k
            *p++ = *s++;
1102
50.4k
            continue;
1103
50.4k
        }
1104
1105
10.9k
        s++;
1106
10.9k
        if (s == end) {
1107
0
            PyErr_SetString(PyExc_ValueError,
1108
0
                            "Trailing \\ in string");
1109
0
            goto failed;
1110
0
        }
1111
1112
10.9k
        switch (*s++) {
1113
        /* XXX This assumes ASCII! */
1114
922
        case '\n': break;
1115
596
        case '\\': *p++ = '\\'; break;
1116
262
        case '\'': *p++ = '\''; break;
1117
525
        case '\"': *p++ = '\"'; break;
1118
228
        case 'b': *p++ = '\b'; break;
1119
323
        case 'f': *p++ = '\014'; break; /* FF */
1120
234
        case 't': *p++ = '\t'; break;
1121
402
        case 'n': *p++ = '\n'; break;
1122
498
        case 'r': *p++ = '\r'; break;
1123
230
        case 'v': *p++ = '\013'; break; /* VT */
1124
201
        case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1125
2.17k
        case '0': case '1': case '2': case '3':
1126
4.09k
        case '4': case '5': case '6': case '7':
1127
4.09k
        {
1128
4.09k
            int c = s[-1] - '0';
1129
4.09k
            if (s < end && '0' <= *s && *s <= '7') {
1130
1.60k
                c = (c<<3) + *s++ - '0';
1131
1.60k
                if (s < end && '0' <= *s && *s <= '7')
1132
933
                    c = (c<<3) + *s++ - '0';
1133
1.60k
            }
1134
4.09k
            if (c > 0377) {
1135
587
                if (*first_invalid_escape_char == -1) {
1136
152
                    *first_invalid_escape_char = c;
1137
                    /* Back up 3 chars, since we've already incremented s. */
1138
152
                    *first_invalid_escape_ptr = s - 3;
1139
152
                }
1140
587
            }
1141
4.09k
            *p++ = c;
1142
4.09k
            break;
1143
3.52k
        }
1144
1.16k
        case 'x':
1145
1.16k
            if (s+1 < end) {
1146
1.16k
                int digit1, digit2;
1147
1.16k
                digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1148
1.16k
                digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1149
1.16k
                if (digit1 < 16 && digit2 < 16) {
1150
1.16k
                    *p++ = (unsigned char)((digit1 << 4) + digit2);
1151
1.16k
                    s += 2;
1152
1.16k
                    break;
1153
1.16k
                }
1154
1.16k
            }
1155
            /* invalid hexadecimal digits */
1156
1157
5
            if (!errors || strcmp(errors, "strict") == 0) {
1158
5
                PyErr_Format(PyExc_ValueError,
1159
5
                             "invalid \\x escape at position %zd",
1160
5
                             s - 2 - (end - len));
1161
5
                goto failed;
1162
5
            }
1163
0
            if (strcmp(errors, "replace") == 0) {
1164
0
                *p++ = '?';
1165
0
            } else if (strcmp(errors, "ignore") == 0)
1166
0
                /* do nothing */;
1167
0
            else {
1168
0
                PyErr_Format(PyExc_ValueError,
1169
0
                             "decoding error; unknown "
1170
0
                             "error handling code: %.400s",
1171
0
                             errors);
1172
0
                goto failed;
1173
0
            }
1174
            /* skip \x */
1175
0
            if (s < end && Py_ISXDIGIT(s[0]))
1176
0
                s++; /* and a hexdigit */
1177
0
            break;
1178
1179
1.27k
        default:
1180
1.27k
            if (*first_invalid_escape_char == -1) {
1181
711
                *first_invalid_escape_char = (unsigned char)s[-1];
1182
                /* Back up one char, since we've already incremented s. */
1183
711
                *first_invalid_escape_ptr = s - 1;
1184
711
            }
1185
1.27k
            *p++ = '\\';
1186
1.27k
            s--;
1187
10.9k
        }
1188
10.9k
    }
1189
1190
2.40k
    return PyBytesWriter_FinishWithPointer(writer, p);
1191
1192
5
  failed:
1193
5
    PyBytesWriter_Discard(writer);
1194
5
    return NULL;
1195
2.41k
}
1196
1197
PyObject *PyBytes_DecodeEscape(const char *s,
1198
                                Py_ssize_t len,
1199
                                const char *errors,
1200
                                Py_ssize_t Py_UNUSED(unicode),
1201
                                const char *Py_UNUSED(recode_encoding))
1202
0
{
1203
0
    int first_invalid_escape_char;
1204
0
    const char *first_invalid_escape_ptr;
1205
0
    PyObject *result = _PyBytes_DecodeEscape2(s, len, errors,
1206
0
                                             &first_invalid_escape_char,
1207
0
                                             &first_invalid_escape_ptr);
1208
0
    if (result == NULL)
1209
0
        return NULL;
1210
0
    if (first_invalid_escape_char != -1) {
1211
0
        if (first_invalid_escape_char > 0xff) {
1212
0
            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1213
0
                                 "b\"\\%o\" is an invalid octal escape sequence. "
1214
0
                                 "Such sequences will not work in the future. ",
1215
0
                                 first_invalid_escape_char) < 0)
1216
0
            {
1217
0
                Py_DECREF(result);
1218
0
                return NULL;
1219
0
            }
1220
0
        }
1221
0
        else {
1222
0
            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1223
0
                                 "b\"\\%c\" is an invalid escape sequence. "
1224
0
                                 "Such sequences will not work in the future. ",
1225
0
                                 first_invalid_escape_char) < 0)
1226
0
            {
1227
0
                Py_DECREF(result);
1228
0
                return NULL;
1229
0
            }
1230
0
        }
1231
0
    }
1232
0
    return result;
1233
0
}
1234
/* -------------------------------------------------------------------- */
1235
/* object api */
1236
1237
Py_ssize_t
1238
PyBytes_Size(PyObject *op)
1239
5.07k
{
1240
5.07k
    if (!PyBytes_Check(op)) {
1241
0
        PyErr_Format(PyExc_TypeError,
1242
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1243
0
        return -1;
1244
0
    }
1245
5.07k
    return Py_SIZE(op);
1246
5.07k
}
1247
1248
char *
1249
PyBytes_AsString(PyObject *op)
1250
2.97M
{
1251
2.97M
    if (!PyBytes_Check(op)) {
1252
0
        PyErr_Format(PyExc_TypeError,
1253
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1254
0
        return NULL;
1255
0
    }
1256
2.97M
    return ((PyBytesObject *)op)->ob_sval;
1257
2.97M
}
1258
1259
int
1260
PyBytes_AsStringAndSize(PyObject *obj,
1261
                         char **s,
1262
                         Py_ssize_t *len)
1263
74.8k
{
1264
74.8k
    if (s == NULL) {
1265
0
        PyErr_BadInternalCall();
1266
0
        return -1;
1267
0
    }
1268
1269
74.8k
    if (!PyBytes_Check(obj)) {
1270
0
        PyErr_Format(PyExc_TypeError,
1271
0
             "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1272
0
        return -1;
1273
0
    }
1274
1275
74.8k
    *s = PyBytes_AS_STRING(obj);
1276
74.8k
    if (len != NULL)
1277
74.8k
        *len = PyBytes_GET_SIZE(obj);
1278
0
    else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1279
0
        PyErr_SetString(PyExc_ValueError,
1280
0
                        "embedded null byte");
1281
0
        return -1;
1282
0
    }
1283
74.8k
    return 0;
1284
74.8k
}
1285
1286
/* -------------------------------------------------------------------- */
1287
/* Methods */
1288
1289
0
#define STRINGLIB_GET_EMPTY() bytes_get_empty()
1290
1291
#include "stringlib/stringdefs.h"
1292
#define STRINGLIB_MUTABLE 0
1293
1294
#include "stringlib/fastsearch.h"
1295
#include "stringlib/count.h"
1296
#include "stringlib/find.h"
1297
#include "stringlib/join.h"
1298
#include "stringlib/partition.h"
1299
#include "stringlib/split.h"
1300
#include "stringlib/ctype.h"
1301
1302
#include "stringlib/transmogrify.h"
1303
1304
#undef STRINGLIB_GET_EMPTY
1305
1306
Py_ssize_t
1307
_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
1308
              const char *needle, Py_ssize_t len_needle,
1309
              Py_ssize_t offset)
1310
0
{
1311
0
    assert(len_haystack >= 0);
1312
0
    assert(len_needle >= 0);
1313
    // Extra checks because stringlib_find accesses haystack[len_haystack].
1314
0
    if (len_needle == 0) {
1315
0
        return offset;
1316
0
    }
1317
0
    if (len_needle > len_haystack) {
1318
0
        return -1;
1319
0
    }
1320
0
    assert(len_haystack >= 1);
1321
0
    Py_ssize_t res = stringlib_find(haystack, len_haystack - 1,
1322
0
                                    needle, len_needle, offset);
1323
0
    if (res == -1) {
1324
0
        Py_ssize_t last_align = len_haystack - len_needle;
1325
0
        if (memcmp(haystack + last_align, needle, len_needle) == 0) {
1326
0
            return offset + last_align;
1327
0
        }
1328
0
    }
1329
0
    return res;
1330
0
}
1331
1332
Py_ssize_t
1333
_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
1334
                     const char *needle, Py_ssize_t len_needle,
1335
                     Py_ssize_t offset)
1336
0
{
1337
0
    return stringlib_rfind(haystack, len_haystack,
1338
0
                           needle, len_needle, offset);
1339
0
}
1340
1341
PyObject *
1342
PyBytes_Repr(PyObject *obj, int smartquotes)
1343
2.97k
{
1344
2.97k
    return _Py_bytes_repr(PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
1345
2.97k
                          smartquotes, "bytes");
1346
2.97k
}
1347
1348
PyObject *
1349
_Py_bytes_repr(const char *data, Py_ssize_t length, int smartquotes,
1350
               const char *classname)
1351
2.97k
{
1352
2.97k
    Py_ssize_t i;
1353
2.97k
    Py_ssize_t newsize, squotes, dquotes;
1354
2.97k
    PyObject *v;
1355
2.97k
    unsigned char quote;
1356
2.97k
    Py_UCS1 *p;
1357
1358
    /* Compute size of output string */
1359
2.97k
    squotes = dquotes = 0;
1360
2.97k
    newsize = 3; /* b'' */
1361
1.10M
    for (i = 0; i < length; i++) {
1362
1.09M
        unsigned char c = data[i];
1363
1.09M
        Py_ssize_t incr = 1;
1364
1.09M
        switch(c) {
1365
3.38k
        case '\'': squotes++; break;
1366
3.03k
        case '"':  dquotes++; break;
1367
79.2k
        case '\\': case '\t': case '\n': case '\r':
1368
79.2k
            incr = 2; break; /* \C */
1369
1.01M
        default:
1370
1.01M
            if (c < ' ' || c >= 0x7f)
1371
665k
                incr = 4; /* \xHH */
1372
1.09M
        }
1373
1.09M
        if (newsize > PY_SSIZE_T_MAX - incr)
1374
0
            goto overflow;
1375
1.09M
        newsize += incr;
1376
1.09M
    }
1377
2.97k
    quote = '\'';
1378
2.97k
    if (smartquotes && squotes && !dquotes)
1379
126
        quote = '"';
1380
2.97k
    if (squotes && quote == '\'') {
1381
362
        if (newsize > PY_SSIZE_T_MAX - squotes)
1382
0
            goto overflow;
1383
362
        newsize += squotes;
1384
362
    }
1385
1386
2.97k
    v = PyUnicode_New(newsize, 127);
1387
2.97k
    if (v == NULL) {
1388
0
        return NULL;
1389
0
    }
1390
2.97k
    p = PyUnicode_1BYTE_DATA(v);
1391
1392
2.97k
    *p++ = 'b', *p++ = quote;
1393
1.10M
    for (i = 0; i < length; i++) {
1394
1.09M
        unsigned char c = data[i];
1395
1.09M
        if (c == quote || c == '\\')
1396
5.19k
            *p++ = '\\', *p++ = c;
1397
1.09M
        else if (c == '\t')
1398
66.7k
            *p++ = '\\', *p++ = 't';
1399
1.02M
        else if (c == '\n')
1400
7.36k
            *p++ = '\\', *p++ = 'n';
1401
1.02M
        else if (c == '\r')
1402
2.92k
            *p++ = '\\', *p++ = 'r';
1403
1.01M
        else if (c < ' ' || c >= 0x7f) {
1404
665k
            *p++ = '\\';
1405
665k
            *p++ = 'x';
1406
665k
            *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1407
665k
            *p++ = Py_hexdigits[c & 0xf];
1408
665k
        }
1409
351k
        else
1410
351k
            *p++ = c;
1411
1.09M
    }
1412
2.97k
    *p++ = quote;
1413
2.97k
    assert(_PyUnicode_CheckConsistency(v, 1));
1414
2.97k
    return v;
1415
1416
0
  overflow:
1417
0
    PyErr_Format(PyExc_OverflowError,
1418
0
                 "%s object is too large to make repr", classname);
1419
0
    return NULL;
1420
2.97k
}
1421
1422
static PyObject *
1423
bytes_repr(PyObject *op)
1424
2.97k
{
1425
2.97k
    return PyBytes_Repr(op, 1);
1426
2.97k
}
1427
1428
static PyObject *
1429
bytes_str(PyObject *op)
1430
0
{
1431
0
    if (_Py_GetConfig()->bytes_warning) {
1432
0
        if (PyErr_WarnEx(PyExc_BytesWarning,
1433
0
                         "str() on a bytes instance", 1)) {
1434
0
            return NULL;
1435
0
        }
1436
0
    }
1437
0
    return bytes_repr(op);
1438
0
}
1439
1440
static Py_ssize_t
1441
bytes_length(PyObject *self)
1442
16.0M
{
1443
16.0M
    PyBytesObject *a = _PyBytes_CAST(self);
1444
16.0M
    return Py_SIZE(a);
1445
16.0M
}
1446
1447
/* This is also used by PyBytes_Concat() */
1448
static PyObject *
1449
bytes_concat(PyObject *a, PyObject *b)
1450
109k
{
1451
109k
    Py_buffer va, vb;
1452
109k
    PyObject *result = NULL;
1453
1454
109k
    va.len = -1;
1455
109k
    vb.len = -1;
1456
109k
    if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1457
109k
        PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1458
0
        PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1459
0
                     Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1460
0
        goto done;
1461
0
    }
1462
1463
    /* Optimize end cases */
1464
109k
    if (va.len == 0 && PyBytes_CheckExact(b)) {
1465
5.98k
        result = Py_NewRef(b);
1466
5.98k
        goto done;
1467
5.98k
    }
1468
103k
    if (vb.len == 0 && PyBytes_CheckExact(a)) {
1469
37.1k
        result = Py_NewRef(a);
1470
37.1k
        goto done;
1471
37.1k
    }
1472
1473
66.1k
    if (va.len > PY_SSIZE_T_MAX - vb.len) {
1474
0
        PyErr_NoMemory();
1475
0
        goto done;
1476
0
    }
1477
1478
66.1k
    result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1479
66.1k
    if (result != NULL) {
1480
66.1k
        memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1481
66.1k
        memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1482
66.1k
    }
1483
1484
109k
  done:
1485
109k
    if (va.len != -1)
1486
109k
        PyBuffer_Release(&va);
1487
109k
    if (vb.len != -1)
1488
109k
        PyBuffer_Release(&vb);
1489
109k
    return result;
1490
66.1k
}
1491
1492
static PyObject *
1493
bytes_repeat(PyObject *self, Py_ssize_t n)
1494
52.2k
{
1495
52.2k
    PyBytesObject *a = _PyBytes_CAST(self);
1496
52.2k
    if (n < 0)
1497
0
        n = 0;
1498
    /* watch out for overflows:  the size can overflow int,
1499
     * and the # of bytes needed can overflow size_t
1500
     */
1501
52.2k
    if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1502
0
        PyErr_SetString(PyExc_OverflowError,
1503
0
            "repeated bytes are too long");
1504
0
        return NULL;
1505
0
    }
1506
52.2k
    Py_ssize_t size = Py_SIZE(a) * n;
1507
52.2k
    if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1508
0
        return Py_NewRef(a);
1509
0
    }
1510
52.2k
    size_t nbytes = (size_t)size;
1511
52.2k
    if (nbytes + PyBytesObject_SIZE <= nbytes) {
1512
0
        PyErr_SetString(PyExc_OverflowError,
1513
0
            "repeated bytes are too long");
1514
0
        return NULL;
1515
0
    }
1516
52.2k
    PyBytesObject *op = PyObject_Malloc(PyBytesObject_SIZE + nbytes);
1517
52.2k
    if (op == NULL) {
1518
0
        return PyErr_NoMemory();
1519
0
    }
1520
52.2k
    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
1521
52.2k
    set_ob_shash(op, -1);
1522
52.2k
    op->ob_sval[size] = '\0';
1523
1524
52.2k
    _PyBytes_Repeat(op->ob_sval, size, a->ob_sval, Py_SIZE(a));
1525
1526
52.2k
    return (PyObject *) op;
1527
52.2k
}
1528
1529
static int
1530
bytes_contains(PyObject *self, PyObject *arg)
1531
2.80k
{
1532
2.80k
    return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1533
2.80k
}
1534
1535
static PyObject *
1536
bytes_item(PyObject *self, Py_ssize_t i)
1537
0
{
1538
0
    PyBytesObject *a = _PyBytes_CAST(self);
1539
0
    if (i < 0 || i >= Py_SIZE(a)) {
1540
0
        PyErr_SetString(PyExc_IndexError, "index out of range");
1541
0
        return NULL;
1542
0
    }
1543
0
    return _PyLong_FromUnsignedChar((unsigned char)a->ob_sval[i]);
1544
0
}
1545
1546
static int
1547
bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1548
29.2M
{
1549
29.2M
    int cmp;
1550
29.2M
    Py_ssize_t len;
1551
1552
29.2M
    len = Py_SIZE(a);
1553
29.2M
    if (Py_SIZE(b) != len)
1554
421k
        return 0;
1555
1556
28.8M
    if (a->ob_sval[0] != b->ob_sval[0])
1557
2.59M
        return 0;
1558
1559
26.2M
    cmp = memcmp(a->ob_sval, b->ob_sval, len);
1560
26.2M
    return (cmp == 0);
1561
28.8M
}
1562
1563
static PyObject*
1564
bytes_richcompare(PyObject *aa, PyObject *bb, int op)
1565
30.8M
{
1566
    /* Make sure both arguments are strings. */
1567
30.8M
    if (!(PyBytes_Check(aa) && PyBytes_Check(bb))) {
1568
0
        if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1569
0
            if (PyUnicode_Check(aa) || PyUnicode_Check(bb)) {
1570
0
                if (PyErr_WarnEx(PyExc_BytesWarning,
1571
0
                                 "Comparison between bytes and string", 1))
1572
0
                    return NULL;
1573
0
            }
1574
0
            if (PyLong_Check(aa) || PyLong_Check(bb)) {
1575
0
                if (PyErr_WarnEx(PyExc_BytesWarning,
1576
0
                                 "Comparison between bytes and int", 1))
1577
0
                    return NULL;
1578
0
            }
1579
0
        }
1580
0
        Py_RETURN_NOTIMPLEMENTED;
1581
0
    }
1582
1583
30.8M
    PyBytesObject *a = _PyBytes_CAST(aa);
1584
30.8M
    PyBytesObject *b = _PyBytes_CAST(bb);
1585
30.8M
    if (a == b) {
1586
1.63M
        switch (op) {
1587
1.59M
        case Py_EQ:
1588
1.59M
        case Py_LE:
1589
1.59M
        case Py_GE:
1590
            /* a byte string is equal to itself */
1591
1.59M
            Py_RETURN_TRUE;
1592
37.0k
        case Py_NE:
1593
37.0k
        case Py_LT:
1594
37.0k
        case Py_GT:
1595
37.0k
            Py_RETURN_FALSE;
1596
0
        default:
1597
0
            PyErr_BadArgument();
1598
0
            return NULL;
1599
1.63M
        }
1600
1.63M
    }
1601
29.2M
    else if (op == Py_EQ || op == Py_NE) {
1602
29.2M
        int eq = bytes_compare_eq(a, b);
1603
29.2M
        eq ^= (op == Py_NE);
1604
29.2M
        return PyBool_FromLong(eq);
1605
29.2M
    }
1606
259
    else {
1607
259
        Py_ssize_t len_a = Py_SIZE(a);
1608
259
        Py_ssize_t len_b = Py_SIZE(b);
1609
259
        Py_ssize_t min_len = Py_MIN(len_a, len_b);
1610
259
        int c;
1611
259
        if (min_len > 0) {
1612
259
            c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1613
259
            if (c == 0)
1614
258
                c = memcmp(a->ob_sval, b->ob_sval, min_len);
1615
259
        }
1616
0
        else {
1617
0
            c = 0;
1618
0
        }
1619
259
        if (c != 0) {
1620
259
            Py_RETURN_RICHCOMPARE(c, 0, op);
1621
259
        }
1622
0
        Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1623
0
    }
1624
30.8M
}
1625
1626
static Py_hash_t
1627
bytes_hash(PyObject *self)
1628
30.7M
{
1629
30.7M
    PyBytesObject *a = _PyBytes_CAST(self);
1630
30.7M
    Py_hash_t hash = get_ob_shash(a);
1631
30.7M
    if (hash == -1) {
1632
        /* Can't fail */
1633
13.3M
        hash = Py_HashBuffer(a->ob_sval, Py_SIZE(a));
1634
13.3M
        set_ob_shash(a, hash);
1635
13.3M
    }
1636
30.7M
    return hash;
1637
30.7M
}
1638
1639
static PyObject*
1640
bytes_subscript(PyObject *op, PyObject* item)
1641
42.2M
{
1642
42.2M
    PyBytesObject *self = _PyBytes_CAST(op);
1643
42.2M
    if (_PyIndex_Check(item)) {
1644
15.5M
        Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1645
15.5M
        if (i == -1 && PyErr_Occurred())
1646
0
            return NULL;
1647
15.5M
        if (i < 0)
1648
0
            i += PyBytes_GET_SIZE(self);
1649
15.5M
        if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1650
69
            PyErr_SetString(PyExc_IndexError,
1651
69
                            "index out of range");
1652
69
            return NULL;
1653
69
        }
1654
15.5M
        return _PyLong_FromUnsignedChar((unsigned char)self->ob_sval[i]);
1655
15.5M
    }
1656
26.6M
    else if (PySlice_Check(item)) {
1657
26.6M
        Py_ssize_t start, stop, step, slicelength, i;
1658
26.6M
        size_t cur;
1659
26.6M
        const char* source_buf;
1660
26.6M
        char* result_buf;
1661
26.6M
        PyObject* result;
1662
1663
26.6M
        if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1664
0
            return NULL;
1665
0
        }
1666
26.6M
        slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1667
26.6M
                                            &stop, step);
1668
1669
26.6M
        if (slicelength <= 0) {
1670
4.18M
            return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
1671
4.18M
        }
1672
22.5M
        else if (start == 0 && step == 1 &&
1673
489k
                 slicelength == PyBytes_GET_SIZE(self) &&
1674
126k
                 PyBytes_CheckExact(self)) {
1675
126k
            return Py_NewRef(self);
1676
126k
        }
1677
22.3M
        else if (step == 1) {
1678
22.3M
            return PyBytes_FromStringAndSize(
1679
22.3M
                PyBytes_AS_STRING(self) + start,
1680
22.3M
                slicelength);
1681
22.3M
        }
1682
0
        else {
1683
0
            source_buf = PyBytes_AS_STRING(self);
1684
0
            result = PyBytes_FromStringAndSize(NULL, slicelength);
1685
0
            if (result == NULL)
1686
0
                return NULL;
1687
1688
0
            result_buf = PyBytes_AS_STRING(result);
1689
0
            for (cur = start, i = 0; i < slicelength;
1690
0
                 cur += step, i++) {
1691
0
                result_buf[i] = source_buf[cur];
1692
0
            }
1693
1694
0
            return result;
1695
0
        }
1696
26.6M
    }
1697
0
    else {
1698
0
        PyErr_Format(PyExc_TypeError,
1699
0
                     "byte indices must be integers or slices, not %.200s",
1700
0
                     Py_TYPE(item)->tp_name);
1701
0
        return NULL;
1702
0
    }
1703
42.2M
}
1704
1705
static int
1706
bytes_buffer_getbuffer(PyObject *op, Py_buffer *view, int flags)
1707
25.5M
{
1708
25.5M
    PyBytesObject *self = _PyBytes_CAST(op);
1709
25.5M
    return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1710
25.5M
                             1, flags);
1711
25.5M
}
1712
1713
static PySequenceMethods bytes_as_sequence = {
1714
    bytes_length,       /*sq_length*/
1715
    bytes_concat,       /*sq_concat*/
1716
    bytes_repeat,       /*sq_repeat*/
1717
    bytes_item,         /*sq_item*/
1718
    0,                  /*sq_slice*/
1719
    0,                  /*sq_ass_item*/
1720
    0,                  /*sq_ass_slice*/
1721
    bytes_contains      /*sq_contains*/
1722
};
1723
1724
static PyMappingMethods bytes_as_mapping = {
1725
    bytes_length,
1726
    bytes_subscript,
1727
    0,
1728
};
1729
1730
static PyBufferProcs bytes_as_buffer = {
1731
    bytes_buffer_getbuffer,
1732
    NULL,
1733
};
1734
1735
1736
/*[clinic input]
1737
bytes.__bytes__
1738
Convert this value to exact type bytes.
1739
[clinic start generated code]*/
1740
1741
static PyObject *
1742
bytes___bytes___impl(PyBytesObject *self)
1743
/*[clinic end generated code: output=63a306a9bc0caac5 input=34ec5ddba98bd6bb]*/
1744
56.3k
{
1745
56.3k
    if (PyBytes_CheckExact(self)) {
1746
56.3k
        return Py_NewRef(self);
1747
56.3k
    }
1748
0
    else {
1749
0
        return PyBytes_FromStringAndSize(self->ob_sval, Py_SIZE(self));
1750
0
    }
1751
56.3k
}
1752
1753
1754
0
#define LEFTSTRIP 0
1755
0
#define RIGHTSTRIP 1
1756
0
#define BOTHSTRIP 2
1757
1758
/*[clinic input]
1759
bytes.split
1760
1761
    sep: object = None
1762
        The delimiter according which to split the bytes.
1763
        None (the default value) means split on ASCII whitespace characters
1764
        (space, tab, return, newline, formfeed, vertical tab).
1765
    maxsplit: Py_ssize_t = -1
1766
        Maximum number of splits to do.
1767
        -1 (the default value) means no limit.
1768
1769
Return a list of the sections in the bytes, using sep as the delimiter.
1770
[clinic start generated code]*/
1771
1772
static PyObject *
1773
bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1774
/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1775
3.28M
{
1776
3.28M
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1777
3.28M
    const char *s = PyBytes_AS_STRING(self), *sub;
1778
3.28M
    Py_buffer vsub;
1779
3.28M
    PyObject *list;
1780
1781
3.28M
    if (maxsplit < 0)
1782
3.28M
        maxsplit = PY_SSIZE_T_MAX;
1783
3.28M
    if (sep == Py_None)
1784
0
        return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1785
3.28M
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1786
0
        return NULL;
1787
3.28M
    sub = vsub.buf;
1788
3.28M
    n = vsub.len;
1789
1790
3.28M
    list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1791
3.28M
    PyBuffer_Release(&vsub);
1792
3.28M
    return list;
1793
3.28M
}
1794
1795
/*[clinic input]
1796
@permit_long_docstring_body
1797
bytes.partition
1798
1799
    sep: Py_buffer
1800
    /
1801
1802
Partition the bytes into three parts using the given separator.
1803
1804
This will search for the separator sep in the bytes. If the separator is found,
1805
returns a 3-tuple containing the part before the separator, the separator
1806
itself, and the part after it.
1807
1808
If the separator is not found, returns a 3-tuple containing the original bytes
1809
object and two empty bytes objects.
1810
[clinic start generated code]*/
1811
1812
static PyObject *
1813
bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1814
/*[clinic end generated code: output=f532b392a17ff695 input=31c55a0cebaf7722]*/
1815
0
{
1816
0
    return stringlib_partition(
1817
0
        (PyObject*) self,
1818
0
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1819
0
        sep->obj, (const char *)sep->buf, sep->len
1820
0
        );
1821
0
}
1822
1823
/*[clinic input]
1824
@permit_long_docstring_body
1825
bytes.rpartition
1826
1827
    sep: Py_buffer
1828
    /
1829
1830
Partition the bytes into three parts using the given separator.
1831
1832
This will search for the separator sep in the bytes, starting at the end. If
1833
the separator is found, returns a 3-tuple containing the part before the
1834
separator, the separator itself, and the part after it.
1835
1836
If the separator is not found, returns a 3-tuple containing two empty bytes
1837
objects and the original bytes object.
1838
[clinic start generated code]*/
1839
1840
static PyObject *
1841
bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1842
/*[clinic end generated code: output=191b114cbb028e50 input=9ea5a3ab0b02bf52]*/
1843
0
{
1844
0
    return stringlib_rpartition(
1845
0
        (PyObject*) self,
1846
0
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1847
0
        sep->obj, (const char *)sep->buf, sep->len
1848
0
        );
1849
0
}
1850
1851
/*[clinic input]
1852
@permit_long_docstring_body
1853
bytes.rsplit = bytes.split
1854
1855
Return a list of the sections in the bytes, using sep as the delimiter.
1856
1857
Splitting is done starting at the end of the bytes and working to the front.
1858
[clinic start generated code]*/
1859
1860
static PyObject *
1861
bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1862
/*[clinic end generated code: output=ba698d9ea01e1c8f input=55b6eaea1f3d7046]*/
1863
0
{
1864
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1865
0
    const char *s = PyBytes_AS_STRING(self), *sub;
1866
0
    Py_buffer vsub;
1867
0
    PyObject *list;
1868
1869
0
    if (maxsplit < 0)
1870
0
        maxsplit = PY_SSIZE_T_MAX;
1871
0
    if (sep == Py_None)
1872
0
        return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1873
0
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1874
0
        return NULL;
1875
0
    sub = vsub.buf;
1876
0
    n = vsub.len;
1877
1878
0
    list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1879
0
    PyBuffer_Release(&vsub);
1880
0
    return list;
1881
0
}
1882
1883
1884
/*[clinic input]
1885
bytes.join
1886
1887
    iterable_of_bytes: object
1888
    /
1889
1890
Concatenate any number of bytes objects.
1891
1892
The bytes whose method is called is inserted in between each pair.
1893
1894
The result is returned as a new bytes object.
1895
1896
Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1897
[clinic start generated code]*/
1898
1899
static PyObject *
1900
bytes_join_impl(PyBytesObject *self, PyObject *iterable_of_bytes)
1901
/*[clinic end generated code: output=0687abb94d7d438e input=7fe377b95bd549d2]*/
1902
8.87k
{
1903
8.87k
    return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1904
8.87k
}
1905
1906
PyObject *
1907
PyBytes_Join(PyObject *sep, PyObject *iterable)
1908
46.4k
{
1909
46.4k
    if (sep == NULL) {
1910
0
        PyErr_BadInternalCall();
1911
0
        return NULL;
1912
0
    }
1913
46.4k
    if (!PyBytes_Check(sep)) {
1914
0
        PyErr_Format(PyExc_TypeError,
1915
0
                     "sep: expected bytes, got %T", sep);
1916
0
        return NULL;
1917
0
    }
1918
1919
46.4k
    return stringlib_bytes_join(sep, iterable);
1920
46.4k
}
1921
1922
/*[clinic input]
1923
@permit_long_summary
1924
@text_signature "($self, sub[, start[, end]], /)"
1925
bytes.find
1926
1927
    sub: object
1928
    start: slice_index(accept={int, NoneType}, c_default='0') = None
1929
         Optional start position. Default: start of the bytes.
1930
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
1931
         Optional stop position. Default: end of the bytes.
1932
    /
1933
1934
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
1935
1936
Return -1 on failure.
1937
[clinic start generated code]*/
1938
1939
static PyObject *
1940
bytes_find_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
1941
                Py_ssize_t end)
1942
/*[clinic end generated code: output=d5961a1c77b472a1 input=47d0929adafc6b0b]*/
1943
4.02M
{
1944
4.02M
    return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1945
4.02M
                          sub, start, end);
1946
4.02M
}
1947
1948
/*[clinic input]
1949
@permit_long_summary
1950
bytes.index = bytes.find
1951
1952
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
1953
1954
Raise ValueError if the subsection is not found.
1955
[clinic start generated code]*/
1956
1957
static PyObject *
1958
bytes_index_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
1959
                 Py_ssize_t end)
1960
/*[clinic end generated code: output=0da25cc74683ba42 input=1cb45ce71456a269]*/
1961
0
{
1962
0
    return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1963
0
                           sub, start, end);
1964
0
}
1965
1966
/*[clinic input]
1967
@permit_long_summary
1968
bytes.rfind = bytes.find
1969
1970
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
1971
1972
Return -1 on failure.
1973
[clinic start generated code]*/
1974
1975
static PyObject *
1976
bytes_rfind_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
1977
                 Py_ssize_t end)
1978
/*[clinic end generated code: output=51b60fa4ad011c09 input=c9473d714251f1ab]*/
1979
25.7k
{
1980
25.7k
    return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1981
25.7k
                           sub, start, end);
1982
25.7k
}
1983
1984
/*[clinic input]
1985
@permit_long_summary
1986
bytes.rindex = bytes.find
1987
1988
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start,end].
1989
1990
Raise ValueError if the subsection is not found.
1991
[clinic start generated code]*/
1992
1993
static PyObject *
1994
bytes_rindex_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
1995
                  Py_ssize_t end)
1996
/*[clinic end generated code: output=42bf674e0a0aabf6 input=bb5f473c64610c43]*/
1997
0
{
1998
0
    return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1999
0
                            sub, start, end);
2000
0
}
2001
2002
2003
Py_LOCAL_INLINE(PyObject *)
2004
do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
2005
0
{
2006
0
    Py_buffer vsep;
2007
0
    const char *s = PyBytes_AS_STRING(self);
2008
0
    Py_ssize_t len = PyBytes_GET_SIZE(self);
2009
0
    char *sep;
2010
0
    Py_ssize_t seplen;
2011
0
    Py_ssize_t i, j;
2012
2013
0
    if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
2014
0
        return NULL;
2015
0
    sep = vsep.buf;
2016
0
    seplen = vsep.len;
2017
2018
0
    i = 0;
2019
0
    if (striptype != RIGHTSTRIP) {
2020
0
        while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2021
0
            i++;
2022
0
        }
2023
0
    }
2024
2025
0
    j = len;
2026
0
    if (striptype != LEFTSTRIP) {
2027
0
        do {
2028
0
            j--;
2029
0
        } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2030
0
        j++;
2031
0
    }
2032
2033
0
    PyBuffer_Release(&vsep);
2034
2035
0
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2036
0
        return Py_NewRef(self);
2037
0
    }
2038
0
    else
2039
0
        return PyBytes_FromStringAndSize(s+i, j-i);
2040
0
}
2041
2042
2043
Py_LOCAL_INLINE(PyObject *)
2044
do_strip(PyBytesObject *self, int striptype)
2045
0
{
2046
0
    const char *s = PyBytes_AS_STRING(self);
2047
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
2048
2049
0
    i = 0;
2050
0
    if (striptype != RIGHTSTRIP) {
2051
0
        while (i < len && Py_ISSPACE(s[i])) {
2052
0
            i++;
2053
0
        }
2054
0
    }
2055
2056
0
    j = len;
2057
0
    if (striptype != LEFTSTRIP) {
2058
0
        do {
2059
0
            j--;
2060
0
        } while (j >= i && Py_ISSPACE(s[j]));
2061
0
        j++;
2062
0
    }
2063
2064
0
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2065
0
        return Py_NewRef(self);
2066
0
    }
2067
0
    else
2068
0
        return PyBytes_FromStringAndSize(s+i, j-i);
2069
0
}
2070
2071
2072
Py_LOCAL_INLINE(PyObject *)
2073
do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
2074
0
{
2075
0
    if (bytes != Py_None) {
2076
0
        return do_xstrip(self, striptype, bytes);
2077
0
    }
2078
0
    return do_strip(self, striptype);
2079
0
}
2080
2081
/*[clinic input]
2082
@permit_long_docstring_body
2083
bytes.strip
2084
2085
    bytes: object = None
2086
    /
2087
2088
Strip leading and trailing bytes contained in the argument.
2089
2090
If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2091
[clinic start generated code]*/
2092
2093
static PyObject *
2094
bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
2095
/*[clinic end generated code: output=c7c228d3bd104a1b input=71904cd278c0ee03]*/
2096
0
{
2097
0
    return do_argstrip(self, BOTHSTRIP, bytes);
2098
0
}
2099
2100
/*[clinic input]
2101
bytes.lstrip
2102
2103
    bytes: object = None
2104
    /
2105
2106
Strip leading bytes contained in the argument.
2107
2108
If the argument is omitted or None, strip leading  ASCII whitespace.
2109
[clinic start generated code]*/
2110
2111
static PyObject *
2112
bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2113
/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2114
0
{
2115
0
    return do_argstrip(self, LEFTSTRIP, bytes);
2116
0
}
2117
2118
/*[clinic input]
2119
bytes.rstrip
2120
2121
    bytes: object = None
2122
    /
2123
2124
Strip trailing bytes contained in the argument.
2125
2126
If the argument is omitted or None, strip trailing ASCII whitespace.
2127
[clinic start generated code]*/
2128
2129
static PyObject *
2130
bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2131
/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2132
0
{
2133
0
    return do_argstrip(self, RIGHTSTRIP, bytes);
2134
0
}
2135
2136
2137
/*[clinic input]
2138
@permit_long_summary
2139
bytes.count = bytes.find
2140
2141
Return the number of non-overlapping occurrences of subsection 'sub' in bytes B[start:end].
2142
[clinic start generated code]*/
2143
2144
static PyObject *
2145
bytes_count_impl(PyBytesObject *self, PyObject *sub, Py_ssize_t start,
2146
                 Py_ssize_t end)
2147
/*[clinic end generated code: output=9848140b9be17d0f input=bb2f136f83f0d30e]*/
2148
4.95M
{
2149
4.95M
    return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2150
4.95M
                           sub, start, end);
2151
4.95M
}
2152
2153
2154
/*[clinic input]
2155
bytes.translate
2156
2157
    table: object
2158
        Translation table, which must be a bytes object of length 256.
2159
    /
2160
    delete as deletechars: object(c_default="NULL") = b''
2161
2162
Return a copy with each character mapped by the given translation table.
2163
2164
All characters occurring in the optional argument delete are removed.
2165
The remaining characters are mapped through the given translation table.
2166
[clinic start generated code]*/
2167
2168
static PyObject *
2169
bytes_translate_impl(PyBytesObject *self, PyObject *table,
2170
                     PyObject *deletechars)
2171
/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2172
0
{
2173
0
    const char *input;
2174
0
    char *output;
2175
0
    Py_buffer table_view = {NULL, NULL};
2176
0
    Py_buffer del_table_view = {NULL, NULL};
2177
0
    const char *table_chars;
2178
0
    Py_ssize_t i, c, changed = 0;
2179
0
    PyObject *input_obj = (PyObject*)self;
2180
0
    const char *output_start, *del_table_chars=NULL;
2181
0
    Py_ssize_t inlen, tablen, dellen = 0;
2182
0
    PyObject *result;
2183
0
    int trans_table[256];
2184
2185
0
    if (PyBytes_Check(table)) {
2186
0
        table_chars = PyBytes_AS_STRING(table);
2187
0
        tablen = PyBytes_GET_SIZE(table);
2188
0
    }
2189
0
    else if (table == Py_None) {
2190
0
        table_chars = NULL;
2191
0
        tablen = 256;
2192
0
    }
2193
0
    else {
2194
0
        if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2195
0
            return NULL;
2196
0
        table_chars = table_view.buf;
2197
0
        tablen = table_view.len;
2198
0
    }
2199
2200
0
    if (tablen != 256) {
2201
0
        PyErr_SetString(PyExc_ValueError,
2202
0
          "translation table must be 256 characters long");
2203
0
        PyBuffer_Release(&table_view);
2204
0
        return NULL;
2205
0
    }
2206
2207
0
    if (deletechars != NULL) {
2208
0
        if (PyBytes_Check(deletechars)) {
2209
0
            del_table_chars = PyBytes_AS_STRING(deletechars);
2210
0
            dellen = PyBytes_GET_SIZE(deletechars);
2211
0
        }
2212
0
        else {
2213
0
            if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2214
0
                PyBuffer_Release(&table_view);
2215
0
                return NULL;
2216
0
            }
2217
0
            del_table_chars = del_table_view.buf;
2218
0
            dellen = del_table_view.len;
2219
0
        }
2220
0
    }
2221
0
    else {
2222
0
        del_table_chars = NULL;
2223
0
        dellen = 0;
2224
0
    }
2225
2226
0
    inlen = PyBytes_GET_SIZE(input_obj);
2227
0
    result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2228
0
    if (result == NULL) {
2229
0
        PyBuffer_Release(&del_table_view);
2230
0
        PyBuffer_Release(&table_view);
2231
0
        return NULL;
2232
0
    }
2233
0
    output_start = output = PyBytes_AS_STRING(result);
2234
0
    input = PyBytes_AS_STRING(input_obj);
2235
2236
0
    if (dellen == 0 && table_chars != NULL) {
2237
        /* If no deletions are required, use faster code */
2238
0
        for (i = inlen; --i >= 0; ) {
2239
0
            c = Py_CHARMASK(*input++);
2240
0
            if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2241
0
                changed = 1;
2242
0
        }
2243
0
        if (!changed && PyBytes_CheckExact(input_obj)) {
2244
0
            Py_SETREF(result, Py_NewRef(input_obj));
2245
0
        }
2246
0
        PyBuffer_Release(&del_table_view);
2247
0
        PyBuffer_Release(&table_view);
2248
0
        return result;
2249
0
    }
2250
2251
0
    if (table_chars == NULL) {
2252
0
        for (i = 0; i < 256; i++)
2253
0
            trans_table[i] = Py_CHARMASK(i);
2254
0
    } else {
2255
0
        for (i = 0; i < 256; i++)
2256
0
            trans_table[i] = Py_CHARMASK(table_chars[i]);
2257
0
    }
2258
0
    PyBuffer_Release(&table_view);
2259
2260
0
    for (i = 0; i < dellen; i++)
2261
0
        trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2262
0
    PyBuffer_Release(&del_table_view);
2263
2264
0
    for (i = inlen; --i >= 0; ) {
2265
0
        c = Py_CHARMASK(*input++);
2266
0
        if (trans_table[c] != -1)
2267
0
            if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2268
0
                continue;
2269
0
        changed = 1;
2270
0
    }
2271
0
    if (!changed && PyBytes_CheckExact(input_obj)) {
2272
0
        Py_DECREF(result);
2273
0
        return Py_NewRef(input_obj);
2274
0
    }
2275
    /* Fix the size of the resulting byte string */
2276
0
    if (inlen > 0)
2277
0
        _PyBytes_Resize(&result, output - output_start);
2278
0
    return result;
2279
0
}
2280
2281
2282
/*[clinic input]
2283
2284
@permit_long_summary
2285
@permit_long_docstring_body
2286
@staticmethod
2287
bytes.maketrans
2288
2289
    frm: Py_buffer
2290
    to: Py_buffer
2291
    /
2292
2293
Return a translation table usable for the bytes or bytearray translate method.
2294
2295
The returned table will be one where each byte in frm is mapped to the byte at
2296
the same position in to.
2297
2298
The bytes objects frm and to must be of the same length.
2299
[clinic start generated code]*/
2300
2301
static PyObject *
2302
bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2303
/*[clinic end generated code: output=a36f6399d4b77f6f input=a06b75f44d933fb3]*/
2304
28
{
2305
28
    return _Py_bytes_maketrans(frm, to);
2306
28
}
2307
2308
2309
/*[clinic input]
2310
@permit_long_docstring_body
2311
bytes.replace
2312
2313
    old: Py_buffer
2314
    new: Py_buffer
2315
    count: Py_ssize_t = -1
2316
        Maximum number of occurrences to replace.
2317
        -1 (the default value) means replace all occurrences.
2318
    /
2319
2320
Return a copy with all occurrences of substring old replaced by new.
2321
2322
If the optional argument count is given, only the first count occurrences are
2323
replaced.
2324
[clinic start generated code]*/
2325
2326
static PyObject *
2327
bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2328
                   Py_ssize_t count)
2329
/*[clinic end generated code: output=994fa588b6b9c104 input=8b99a9ab32bc06a2]*/
2330
46.7k
{
2331
46.7k
    return stringlib_replace((PyObject *)self,
2332
46.7k
                             (const char *)old->buf, old->len,
2333
46.7k
                             (const char *)new->buf, new->len, count);
2334
46.7k
}
2335
2336
/** End DALKE **/
2337
2338
/*[clinic input]
2339
bytes.removeprefix as bytes_removeprefix
2340
2341
    prefix: Py_buffer
2342
    /
2343
2344
Return a bytes object with the given prefix string removed if present.
2345
2346
If the bytes starts with the prefix string, return bytes[len(prefix):].
2347
Otherwise, return a copy of the original bytes.
2348
[clinic start generated code]*/
2349
2350
static PyObject *
2351
bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2352
/*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2353
0
{
2354
0
    const char *self_start = PyBytes_AS_STRING(self);
2355
0
    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2356
0
    const char *prefix_start = prefix->buf;
2357
0
    Py_ssize_t prefix_len = prefix->len;
2358
2359
0
    if (self_len >= prefix_len
2360
0
        && prefix_len > 0
2361
0
        && memcmp(self_start, prefix_start, prefix_len) == 0)
2362
0
    {
2363
0
        return PyBytes_FromStringAndSize(self_start + prefix_len,
2364
0
                                         self_len - prefix_len);
2365
0
    }
2366
2367
0
    if (PyBytes_CheckExact(self)) {
2368
0
        return Py_NewRef(self);
2369
0
    }
2370
2371
0
    return PyBytes_FromStringAndSize(self_start, self_len);
2372
0
}
2373
2374
/*[clinic input]
2375
bytes.removesuffix as bytes_removesuffix
2376
2377
    suffix: Py_buffer
2378
    /
2379
2380
Return a bytes object with the given suffix string removed if present.
2381
2382
If the bytes ends with the suffix string and that suffix is not empty,
2383
return bytes[:-len(prefix)].  Otherwise, return a copy of the original
2384
bytes.
2385
[clinic start generated code]*/
2386
2387
static PyObject *
2388
bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2389
/*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2390
0
{
2391
0
    const char *self_start = PyBytes_AS_STRING(self);
2392
0
    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2393
0
    const char *suffix_start = suffix->buf;
2394
0
    Py_ssize_t suffix_len = suffix->len;
2395
2396
0
    if (self_len >= suffix_len
2397
0
        && suffix_len > 0
2398
0
        && memcmp(self_start + self_len - suffix_len,
2399
0
                  suffix_start, suffix_len) == 0)
2400
0
    {
2401
0
        return PyBytes_FromStringAndSize(self_start,
2402
0
                                         self_len - suffix_len);
2403
0
    }
2404
2405
0
    if (PyBytes_CheckExact(self)) {
2406
0
        return Py_NewRef(self);
2407
0
    }
2408
2409
0
    return PyBytes_FromStringAndSize(self_start, self_len);
2410
0
}
2411
2412
/*[clinic input]
2413
@permit_long_summary
2414
@text_signature "($self, prefix[, start[, end]], /)"
2415
bytes.startswith
2416
2417
    prefix as subobj: object
2418
        A bytes or a tuple of bytes to try.
2419
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2420
        Optional start position. Default: start of the bytes.
2421
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2422
        Optional stop position. Default: end of the bytes.
2423
    /
2424
2425
Return True if the bytes starts with the specified prefix, False otherwise.
2426
[clinic start generated code]*/
2427
2428
static PyObject *
2429
bytes_startswith_impl(PyBytesObject *self, PyObject *subobj,
2430
                      Py_ssize_t start, Py_ssize_t end)
2431
/*[clinic end generated code: output=b1e8da1cbd528e8c input=a14efd070f15be80]*/
2432
398k
{
2433
398k
    return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2434
398k
                                subobj, start, end);
2435
398k
}
2436
2437
/*[clinic input]
2438
@permit_long_summary
2439
@text_signature "($self, suffix[, start[, end]], /)"
2440
bytes.endswith
2441
2442
    suffix as subobj: object
2443
        A bytes or a tuple of bytes to try.
2444
    start: slice_index(accept={int, NoneType}, c_default='0') = None
2445
         Optional start position. Default: start of the bytes.
2446
    end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
2447
         Optional stop position. Default: end of the bytes.
2448
    /
2449
2450
Return True if the bytes ends with the specified suffix, False otherwise.
2451
[clinic start generated code]*/
2452
2453
static PyObject *
2454
bytes_endswith_impl(PyBytesObject *self, PyObject *subobj, Py_ssize_t start,
2455
                    Py_ssize_t end)
2456
/*[clinic end generated code: output=038b633111f3629d input=49e383eaaf292713]*/
2457
0
{
2458
0
    return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2459
0
                              subobj, start, end);
2460
0
}
2461
2462
2463
/*[clinic input]
2464
bytes.decode
2465
2466
    encoding: str(c_default="NULL") = 'utf-8'
2467
        The encoding with which to decode the bytes.
2468
    errors: str(c_default="NULL") = 'strict'
2469
        The error handling scheme to use for the handling of decoding errors.
2470
        The default is 'strict' meaning that decoding errors raise a
2471
        UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2472
        as well as any other name registered with codecs.register_error that
2473
        can handle UnicodeDecodeErrors.
2474
2475
Decode the bytes using the codec registered for encoding.
2476
[clinic start generated code]*/
2477
2478
static PyObject *
2479
bytes_decode_impl(PyBytesObject *self, const char *encoding,
2480
                  const char *errors)
2481
/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2482
8.25M
{
2483
8.25M
    return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2484
8.25M
}
2485
2486
2487
/*[clinic input]
2488
@permit_long_docstring_body
2489
bytes.splitlines
2490
2491
    keepends: bool = False
2492
2493
Return a list of the lines in the bytes, breaking at line boundaries.
2494
2495
Line breaks are not included in the resulting list unless keepends is given and
2496
true.
2497
[clinic start generated code]*/
2498
2499
static PyObject *
2500
bytes_splitlines_impl(PyBytesObject *self, int keepends)
2501
/*[clinic end generated code: output=3484149a5d880ffb input=d17968d2a355fe55]*/
2502
0
{
2503
0
    return stringlib_splitlines(
2504
0
        (PyObject*) self, PyBytes_AS_STRING(self),
2505
0
        PyBytes_GET_SIZE(self), keepends
2506
0
        );
2507
0
}
2508
2509
/*[clinic input]
2510
@classmethod
2511
bytes.fromhex
2512
2513
    string: object
2514
    /
2515
2516
Create a bytes object from a string of hexadecimal numbers.
2517
2518
Spaces between two numbers are accepted.
2519
Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2520
[clinic start generated code]*/
2521
2522
static PyObject *
2523
bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2524
/*[clinic end generated code: output=0973acc63661bb2e input=f37d98ed51088a21]*/
2525
45.7k
{
2526
45.7k
    PyObject *result = _PyBytes_FromHex(string, 0);
2527
45.7k
    if (type != &PyBytes_Type && result != NULL) {
2528
0
        Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
2529
0
    }
2530
45.7k
    return result;
2531
45.7k
}
2532
2533
PyObject*
2534
_PyBytes_FromHex(PyObject *string, int use_bytearray)
2535
45.7k
{
2536
45.7k
    Py_ssize_t hexlen, invalid_char;
2537
45.7k
    unsigned int top, bot;
2538
45.7k
    const Py_UCS1 *str, *start, *end;
2539
45.7k
    PyBytesWriter *writer = NULL;
2540
45.7k
    Py_buffer view;
2541
45.7k
    view.obj = NULL;
2542
2543
45.7k
    if (PyUnicode_Check(string)) {
2544
45.7k
        hexlen = PyUnicode_GET_LENGTH(string);
2545
2546
45.7k
        if (!PyUnicode_IS_ASCII(string)) {
2547
0
            const void *data = PyUnicode_DATA(string);
2548
0
            int kind = PyUnicode_KIND(string);
2549
0
            Py_ssize_t i;
2550
2551
            /* search for the first non-ASCII character */
2552
0
            for (i = 0; i < hexlen; i++) {
2553
0
                if (PyUnicode_READ(kind, data, i) >= 128)
2554
0
                    break;
2555
0
            }
2556
0
            invalid_char = i;
2557
0
            goto error;
2558
0
        }
2559
2560
45.7k
        assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2561
45.7k
        str = PyUnicode_1BYTE_DATA(string);
2562
45.7k
    }
2563
0
    else if (PyObject_CheckBuffer(string)) {
2564
0
        if (PyObject_GetBuffer(string, &view, PyBUF_SIMPLE) != 0) {
2565
0
            return NULL;
2566
0
        }
2567
0
        hexlen = view.len;
2568
0
        str = view.buf;
2569
0
    }
2570
0
    else {
2571
0
        PyErr_Format(PyExc_TypeError,
2572
0
                     "fromhex() argument must be str or bytes-like, not %T",
2573
0
                     string);
2574
0
        return NULL;
2575
0
    }
2576
2577
    /* This overestimates if there are spaces */
2578
45.7k
    if (use_bytearray) {
2579
0
        writer = _PyBytesWriter_CreateByteArray(hexlen / 2);
2580
0
    }
2581
45.7k
    else {
2582
45.7k
        writer = PyBytesWriter_Create(hexlen / 2);
2583
45.7k
    }
2584
45.7k
    if (writer == NULL) {
2585
0
        goto release_buffer;
2586
0
    }
2587
45.7k
    char *buf = PyBytesWriter_GetData(writer);
2588
2589
45.7k
    start = str;
2590
45.7k
    end = str + hexlen;
2591
91.4k
    while (str < end) {
2592
        /* skip over spaces in the input */
2593
45.7k
        if (Py_ISSPACE(*str)) {
2594
0
            do {
2595
0
                str++;
2596
0
            } while (Py_ISSPACE(*str));
2597
0
            if (str >= end)
2598
0
                break;
2599
0
        }
2600
2601
45.7k
        top = _PyLong_DigitValue[*str];
2602
45.7k
        if (top >= 16) {
2603
0
            invalid_char = str - start;
2604
0
            goto error;
2605
0
        }
2606
45.7k
        str++;
2607
2608
45.7k
        bot = _PyLong_DigitValue[*str];
2609
45.7k
        if (bot >= 16) {
2610
            /* Check if we had a second digit */
2611
0
            if (str >= end){
2612
0
                invalid_char = -1;
2613
0
            } else {
2614
0
                invalid_char = str - start;
2615
0
            }
2616
0
            goto error;
2617
0
        }
2618
45.7k
        str++;
2619
2620
45.7k
        *buf++ = (unsigned char)((top << 4) + bot);
2621
45.7k
    }
2622
2623
45.7k
    if (view.obj != NULL) {
2624
0
       PyBuffer_Release(&view);
2625
0
    }
2626
45.7k
    return PyBytesWriter_FinishWithPointer(writer, buf);
2627
2628
0
  error:
2629
0
    if (invalid_char == -1) {
2630
0
        PyErr_SetString(PyExc_ValueError,
2631
0
                        "fromhex() arg must contain an even number of hexadecimal digits");
2632
0
    } else {
2633
0
        PyErr_Format(PyExc_ValueError,
2634
0
                     "non-hexadecimal number found in "
2635
0
                     "fromhex() arg at position %zd", invalid_char);
2636
0
    }
2637
0
    PyBytesWriter_Discard(writer);
2638
2639
0
  release_buffer:
2640
0
    if (view.obj != NULL) {
2641
0
        PyBuffer_Release(&view);
2642
0
    }
2643
0
    return NULL;
2644
0
}
2645
2646
/*[clinic input]
2647
bytes.hex
2648
2649
    sep: object = NULL
2650
        An optional single character or byte to separate hex bytes.
2651
    bytes_per_sep: int = 1
2652
        How many bytes between separators.  Positive values count from the
2653
        right, negative values count from the left.
2654
2655
Create a string of hexadecimal numbers from a bytes object.
2656
2657
Example:
2658
>>> value = b'\xb9\x01\xef'
2659
>>> value.hex()
2660
'b901ef'
2661
>>> value.hex(':')
2662
'b9:01:ef'
2663
>>> value.hex(':', 2)
2664
'b9:01ef'
2665
>>> value.hex(':', -2)
2666
'b901:ef'
2667
[clinic start generated code]*/
2668
2669
static PyObject *
2670
bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2671
/*[clinic end generated code: output=1f134da504064139 input=1a21282b1f1ae595]*/
2672
0
{
2673
0
    const char *argbuf = PyBytes_AS_STRING(self);
2674
0
    Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2675
0
    return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2676
0
}
2677
2678
static PyObject *
2679
bytes_getnewargs(PyObject *op, PyObject *Py_UNUSED(dummy))
2680
0
{
2681
0
    PyBytesObject *v = _PyBytes_CAST(op);
2682
0
    return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2683
0
}
2684
2685
2686
static PyMethodDef
2687
bytes_methods[] = {
2688
    {"__getnewargs__", bytes_getnewargs,  METH_NOARGS},
2689
    BYTES___BYTES___METHODDEF
2690
    {"capitalize", stringlib_capitalize, METH_NOARGS,
2691
     _Py_capitalize__doc__},
2692
    STRINGLIB_CENTER_METHODDEF
2693
    BYTES_COUNT_METHODDEF
2694
    BYTES_DECODE_METHODDEF
2695
    BYTES_ENDSWITH_METHODDEF
2696
    STRINGLIB_EXPANDTABS_METHODDEF
2697
    BYTES_FIND_METHODDEF
2698
    BYTES_FROMHEX_METHODDEF
2699
    BYTES_HEX_METHODDEF
2700
    BYTES_INDEX_METHODDEF
2701
    {"isalnum", stringlib_isalnum, METH_NOARGS,
2702
     _Py_isalnum__doc__},
2703
    {"isalpha", stringlib_isalpha, METH_NOARGS,
2704
     _Py_isalpha__doc__},
2705
    {"isascii", stringlib_isascii, METH_NOARGS,
2706
     _Py_isascii__doc__},
2707
    {"isdigit", stringlib_isdigit, METH_NOARGS,
2708
     _Py_isdigit__doc__},
2709
    {"islower", stringlib_islower, METH_NOARGS,
2710
     _Py_islower__doc__},
2711
    {"isspace", stringlib_isspace, METH_NOARGS,
2712
     _Py_isspace__doc__},
2713
    {"istitle", stringlib_istitle, METH_NOARGS,
2714
     _Py_istitle__doc__},
2715
    {"isupper", stringlib_isupper, METH_NOARGS,
2716
     _Py_isupper__doc__},
2717
    BYTES_JOIN_METHODDEF
2718
    STRINGLIB_LJUST_METHODDEF
2719
    {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2720
    BYTES_LSTRIP_METHODDEF
2721
    BYTES_MAKETRANS_METHODDEF
2722
    BYTES_PARTITION_METHODDEF
2723
    BYTES_REPLACE_METHODDEF
2724
    BYTES_REMOVEPREFIX_METHODDEF
2725
    BYTES_REMOVESUFFIX_METHODDEF
2726
    BYTES_RFIND_METHODDEF
2727
    BYTES_RINDEX_METHODDEF
2728
    STRINGLIB_RJUST_METHODDEF
2729
    BYTES_RPARTITION_METHODDEF
2730
    BYTES_RSPLIT_METHODDEF
2731
    BYTES_RSTRIP_METHODDEF
2732
    BYTES_SPLIT_METHODDEF
2733
    BYTES_SPLITLINES_METHODDEF
2734
    BYTES_STARTSWITH_METHODDEF
2735
    BYTES_STRIP_METHODDEF
2736
    {"swapcase", stringlib_swapcase, METH_NOARGS,
2737
     _Py_swapcase__doc__},
2738
    {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2739
    BYTES_TRANSLATE_METHODDEF
2740
    {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2741
    STRINGLIB_ZFILL_METHODDEF
2742
    {NULL,     NULL}                         /* sentinel */
2743
};
2744
2745
static PyObject *
2746
bytes_mod(PyObject *self, PyObject *arg)
2747
0
{
2748
0
    if (!PyBytes_Check(self)) {
2749
0
        Py_RETURN_NOTIMPLEMENTED;
2750
0
    }
2751
0
    return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2752
0
                             arg, 0);
2753
0
}
2754
2755
static PyNumberMethods bytes_as_number = {
2756
    0,              /*nb_add*/
2757
    0,              /*nb_subtract*/
2758
    0,              /*nb_multiply*/
2759
    bytes_mod,      /*nb_remainder*/
2760
};
2761
2762
static PyObject *
2763
bytes_subtype_new(PyTypeObject *, PyObject *);
2764
2765
/*[clinic input]
2766
@classmethod
2767
bytes.__new__ as bytes_new
2768
2769
    source as x: object = NULL
2770
    encoding: str = NULL
2771
    errors: str = NULL
2772
2773
[clinic start generated code]*/
2774
2775
static PyObject *
2776
bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
2777
               const char *errors)
2778
/*[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]*/
2779
801k
{
2780
801k
    PyObject *bytes;
2781
801k
    PyObject *func;
2782
801k
    Py_ssize_t size;
2783
2784
801k
    if (x == NULL) {
2785
0
        if (encoding != NULL || errors != NULL) {
2786
0
            PyErr_SetString(PyExc_TypeError,
2787
0
                            encoding != NULL ?
2788
0
                            "encoding without a string argument" :
2789
0
                            "errors without a string argument");
2790
0
            return NULL;
2791
0
        }
2792
0
        bytes = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
2793
0
    }
2794
801k
    else if (encoding != NULL) {
2795
        /* Encode via the codec registry */
2796
253k
        if (!PyUnicode_Check(x)) {
2797
0
            PyErr_SetString(PyExc_TypeError,
2798
0
                            "encoding without a string argument");
2799
0
            return NULL;
2800
0
        }
2801
253k
        bytes = PyUnicode_AsEncodedString(x, encoding, errors);
2802
253k
    }
2803
547k
    else if (errors != NULL) {
2804
0
        PyErr_SetString(PyExc_TypeError,
2805
0
                        PyUnicode_Check(x) ?
2806
0
                        "string argument without an encoding" :
2807
0
                        "errors without a string argument");
2808
0
        return NULL;
2809
0
    }
2810
    /* We'd like to call PyObject_Bytes here, but we need to check for an
2811
       integer argument before deferring to PyBytes_FromObject, something
2812
       PyObject_Bytes doesn't do. */
2813
547k
    else if ((func = _PyObject_LookupSpecial(x, &_Py_ID(__bytes__))) != NULL) {
2814
56.3k
        bytes = _PyObject_CallNoArgs(func);
2815
56.3k
        Py_DECREF(func);
2816
56.3k
        if (bytes == NULL)
2817
0
            return NULL;
2818
56.3k
        if (!PyBytes_Check(bytes)) {
2819
0
            PyErr_Format(PyExc_TypeError,
2820
0
                         "%T.__bytes__() must return a bytes, not %T",
2821
0
                         x, bytes);
2822
0
            Py_DECREF(bytes);
2823
0
            return NULL;
2824
0
        }
2825
56.3k
    }
2826
491k
    else if (PyErr_Occurred())
2827
0
        return NULL;
2828
491k
    else if (PyUnicode_Check(x)) {
2829
0
        PyErr_SetString(PyExc_TypeError,
2830
0
                        "string argument without an encoding");
2831
0
        return NULL;
2832
0
    }
2833
    /* Is it an integer? */
2834
491k
    else if (_PyIndex_Check(x)) {
2835
0
        size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2836
0
        if (size == -1 && PyErr_Occurred()) {
2837
0
            if (!PyErr_ExceptionMatches(PyExc_TypeError))
2838
0
                return NULL;
2839
0
            PyErr_Clear();  /* fall through */
2840
0
            bytes = PyBytes_FromObject(x);
2841
0
        }
2842
0
        else {
2843
0
            if (size < 0) {
2844
0
                PyErr_SetString(PyExc_ValueError, "negative count");
2845
0
                return NULL;
2846
0
            }
2847
0
            bytes = _PyBytes_FromSize(size, 1);
2848
0
        }
2849
0
    }
2850
491k
    else {
2851
491k
        bytes = PyBytes_FromObject(x);
2852
491k
    }
2853
2854
801k
    if (bytes != NULL && type != &PyBytes_Type) {
2855
0
        Py_SETREF(bytes, bytes_subtype_new(type, bytes));
2856
0
    }
2857
2858
801k
    return bytes;
2859
801k
}
2860
2861
static PyObject*
2862
_PyBytes_FromBuffer(PyObject *x)
2863
491k
{
2864
491k
    Py_buffer view;
2865
491k
    if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2866
0
        return NULL;
2867
2868
491k
    PyBytesWriter *writer = PyBytesWriter_Create(view.len);
2869
491k
    if (writer == NULL) {
2870
0
        goto fail;
2871
0
    }
2872
2873
491k
    if (PyBuffer_ToContiguous(PyBytesWriter_GetData(writer),
2874
491k
                              &view, view.len, 'C') < 0) {
2875
0
        goto fail;
2876
0
    }
2877
2878
491k
    PyBuffer_Release(&view);
2879
491k
    return PyBytesWriter_Finish(writer);
2880
2881
0
fail:
2882
0
    PyBytesWriter_Discard(writer);
2883
0
    PyBuffer_Release(&view);
2884
0
    return NULL;
2885
491k
}
2886
2887
static PyObject*
2888
_PyBytes_FromList(PyObject *x)
2889
0
{
2890
0
    Py_ssize_t size = PyList_GET_SIZE(x);
2891
0
    PyBytesWriter *writer = PyBytesWriter_Create(size);
2892
0
    if (writer == NULL) {
2893
0
        return NULL;
2894
0
    }
2895
0
    char *str = PyBytesWriter_GetData(writer);
2896
0
    size = _PyBytesWriter_GetAllocated(writer);
2897
2898
0
    for (Py_ssize_t i = 0; i < PyList_GET_SIZE(x); i++) {
2899
0
        PyObject *item = PyList_GET_ITEM(x, i);
2900
0
        Py_INCREF(item);
2901
0
        Py_ssize_t value = PyNumber_AsSsize_t(item, NULL);
2902
0
        Py_DECREF(item);
2903
0
        if (value == -1 && PyErr_Occurred())
2904
0
            goto error;
2905
2906
0
        if (value < 0 || value >= 256) {
2907
0
            PyErr_SetString(PyExc_ValueError,
2908
0
                            "bytes must be in range(0, 256)");
2909
0
            goto error;
2910
0
        }
2911
2912
0
        if (i >= size) {
2913
0
            str = _PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str);
2914
0
            if (str == NULL) {
2915
0
                goto error;
2916
0
            }
2917
0
            size = _PyBytesWriter_GetAllocated(writer);
2918
0
        }
2919
0
        *str++ = (char) value;
2920
0
    }
2921
0
    return PyBytesWriter_FinishWithPointer(writer, str);
2922
2923
0
error:
2924
0
    PyBytesWriter_Discard(writer);
2925
0
    return NULL;
2926
0
}
2927
2928
static PyObject*
2929
_PyBytes_FromTuple(PyObject *x)
2930
0
{
2931
0
    Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2932
0
    Py_ssize_t value;
2933
0
    PyObject *item;
2934
2935
0
    PyBytesWriter *writer = PyBytesWriter_Create(size);
2936
0
    if (writer == NULL) {
2937
0
        return NULL;
2938
0
    }
2939
0
    char *str = PyBytesWriter_GetData(writer);
2940
2941
0
    for (i = 0; i < size; i++) {
2942
0
        item = PyTuple_GET_ITEM(x, i);
2943
0
        value = PyNumber_AsSsize_t(item, NULL);
2944
0
        if (value == -1 && PyErr_Occurred())
2945
0
            goto error;
2946
2947
0
        if (value < 0 || value >= 256) {
2948
0
            PyErr_SetString(PyExc_ValueError,
2949
0
                            "bytes must be in range(0, 256)");
2950
0
            goto error;
2951
0
        }
2952
0
        *str++ = (char) value;
2953
0
    }
2954
0
    return PyBytesWriter_Finish(writer);
2955
2956
0
  error:
2957
0
    PyBytesWriter_Discard(writer);
2958
0
    return NULL;
2959
0
}
2960
2961
static PyObject *
2962
_PyBytes_FromIterator(PyObject *it, PyObject *x)
2963
138
{
2964
138
    Py_ssize_t i, size;
2965
2966
    /* For iterator version, create a bytes object and resize as needed */
2967
138
    size = PyObject_LengthHint(x, 64);
2968
138
    if (size == -1 && PyErr_Occurred())
2969
0
        return NULL;
2970
2971
138
    PyBytesWriter *writer = PyBytesWriter_Create(size);
2972
138
    if (writer == NULL) {
2973
0
        return NULL;
2974
0
    }
2975
138
    char *str = PyBytesWriter_GetData(writer);
2976
138
    size = _PyBytesWriter_GetAllocated(writer);
2977
2978
    /* Run the iterator to exhaustion */
2979
1.06k
    for (i = 0; ; i++) {
2980
1.06k
        PyObject *item;
2981
1.06k
        Py_ssize_t value;
2982
2983
        /* Get the next item */
2984
1.06k
        item = PyIter_Next(it);
2985
1.06k
        if (item == NULL) {
2986
138
            if (PyErr_Occurred())
2987
0
                goto error;
2988
138
            break;
2989
138
        }
2990
2991
        /* Interpret it as an int (__index__) */
2992
924
        value = PyNumber_AsSsize_t(item, NULL);
2993
924
        Py_DECREF(item);
2994
924
        if (value == -1 && PyErr_Occurred())
2995
0
            goto error;
2996
2997
        /* Range check */
2998
924
        if (value < 0 || value >= 256) {
2999
0
            PyErr_SetString(PyExc_ValueError,
3000
0
                            "bytes must be in range(0, 256)");
3001
0
            goto error;
3002
0
        }
3003
3004
        /* Append the byte */
3005
924
        if (i >= size) {
3006
0
            str = _PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str);
3007
0
            if (str == NULL) {
3008
0
                goto error;
3009
0
            }
3010
0
            size = _PyBytesWriter_GetAllocated(writer);
3011
0
        }
3012
924
        *str++ = (char) value;
3013
924
    }
3014
138
    return PyBytesWriter_FinishWithPointer(writer, str);
3015
3016
0
  error:
3017
0
    PyBytesWriter_Discard(writer);
3018
0
    return NULL;
3019
138
}
3020
3021
PyObject *
3022
PyBytes_FromObject(PyObject *x)
3023
491k
{
3024
491k
    PyObject *it, *result;
3025
3026
491k
    if (x == NULL) {
3027
0
        PyErr_BadInternalCall();
3028
0
        return NULL;
3029
0
    }
3030
3031
491k
    if (PyBytes_CheckExact(x)) {
3032
0
        return Py_NewRef(x);
3033
0
    }
3034
3035
    /* Use the modern buffer interface */
3036
491k
    if (PyObject_CheckBuffer(x))
3037
491k
        return _PyBytes_FromBuffer(x);
3038
3039
138
    if (PyList_CheckExact(x))
3040
0
        return _PyBytes_FromList(x);
3041
3042
138
    if (PyTuple_CheckExact(x))
3043
0
        return _PyBytes_FromTuple(x);
3044
3045
138
    if (!PyUnicode_Check(x)) {
3046
138
        it = PyObject_GetIter(x);
3047
138
        if (it != NULL) {
3048
138
            result = _PyBytes_FromIterator(it, x);
3049
138
            Py_DECREF(it);
3050
138
            return result;
3051
138
        }
3052
0
        if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
3053
0
            return NULL;
3054
0
        }
3055
0
    }
3056
3057
0
    PyErr_Format(PyExc_TypeError,
3058
0
                 "cannot convert '%.200s' object to bytes",
3059
0
                 Py_TYPE(x)->tp_name);
3060
0
    return NULL;
3061
138
}
3062
3063
/* This allocator is needed for subclasses don't want to use __new__.
3064
 * See https://github.com/python/cpython/issues/91020#issuecomment-1096793239
3065
 *
3066
 * This allocator will be removed when ob_shash is removed.
3067
 */
3068
static PyObject *
3069
bytes_alloc(PyTypeObject *self, Py_ssize_t nitems)
3070
0
{
3071
0
    PyBytesObject *obj = (PyBytesObject*)PyType_GenericAlloc(self, nitems);
3072
0
    if (obj == NULL) {
3073
0
        return NULL;
3074
0
    }
3075
0
    set_ob_shash(obj, -1);
3076
0
    return (PyObject*)obj;
3077
0
}
3078
3079
static PyObject *
3080
bytes_subtype_new(PyTypeObject *type, PyObject *tmp)
3081
0
{
3082
0
    PyObject *pnew;
3083
0
    Py_ssize_t n;
3084
3085
0
    assert(PyType_IsSubtype(type, &PyBytes_Type));
3086
0
    assert(PyBytes_Check(tmp));
3087
0
    n = PyBytes_GET_SIZE(tmp);
3088
0
    pnew = type->tp_alloc(type, n);
3089
0
    if (pnew != NULL) {
3090
0
        memcpy(PyBytes_AS_STRING(pnew),
3091
0
                  PyBytes_AS_STRING(tmp), n+1);
3092
0
        set_ob_shash((PyBytesObject *)pnew,
3093
0
            get_ob_shash((PyBytesObject *)tmp));
3094
0
    }
3095
0
    return pnew;
3096
0
}
3097
3098
PyDoc_STRVAR(bytes_doc,
3099
"bytes(iterable_of_ints) -> bytes\n\
3100
bytes(string, encoding[, errors]) -> bytes\n\
3101
bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
3102
bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3103
bytes() -> empty bytes object\n\
3104
\n\
3105
Construct an immutable array of bytes from:\n\
3106
  - an iterable yielding integers in range(256)\n\
3107
  - a text string encoded using the specified encoding\n\
3108
  - any object implementing the buffer API.\n\
3109
  - an integer");
3110
3111
static PyObject *bytes_iter(PyObject *seq);
3112
3113
PyTypeObject PyBytes_Type = {
3114
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3115
    "bytes",
3116
    PyBytesObject_SIZE,
3117
    sizeof(char),
3118
    0,                                          /* tp_dealloc */
3119
    0,                                          /* tp_vectorcall_offset */
3120
    0,                                          /* tp_getattr */
3121
    0,                                          /* tp_setattr */
3122
    0,                                          /* tp_as_async */
3123
    bytes_repr,                                 /* tp_repr */
3124
    &bytes_as_number,                           /* tp_as_number */
3125
    &bytes_as_sequence,                         /* tp_as_sequence */
3126
    &bytes_as_mapping,                          /* tp_as_mapping */
3127
    bytes_hash,                                 /* tp_hash */
3128
    0,                                          /* tp_call */
3129
    bytes_str,                                  /* tp_str */
3130
    PyObject_GenericGetAttr,                    /* tp_getattro */
3131
    0,                                          /* tp_setattro */
3132
    &bytes_as_buffer,                           /* tp_as_buffer */
3133
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3134
        Py_TPFLAGS_BYTES_SUBCLASS |
3135
        _Py_TPFLAGS_MATCH_SELF,               /* tp_flags */
3136
    bytes_doc,                                  /* tp_doc */
3137
    0,                                          /* tp_traverse */
3138
    0,                                          /* tp_clear */
3139
    bytes_richcompare,                          /* tp_richcompare */
3140
    0,                                          /* tp_weaklistoffset */
3141
    bytes_iter,                                 /* tp_iter */
3142
    0,                                          /* tp_iternext */
3143
    bytes_methods,                              /* tp_methods */
3144
    0,                                          /* tp_members */
3145
    0,                                          /* tp_getset */
3146
    0,                                          /* tp_base */
3147
    0,                                          /* tp_dict */
3148
    0,                                          /* tp_descr_get */
3149
    0,                                          /* tp_descr_set */
3150
    0,                                          /* tp_dictoffset */
3151
    0,                                          /* tp_init */
3152
    bytes_alloc,                                /* tp_alloc */
3153
    bytes_new,                                  /* tp_new */
3154
    PyObject_Free,                              /* tp_free */
3155
    .tp_version_tag = _Py_TYPE_VERSION_BYTES,
3156
};
3157
3158
void
3159
PyBytes_Concat(PyObject **pv, PyObject *w)
3160
0
{
3161
0
    assert(pv != NULL);
3162
0
    if (*pv == NULL)
3163
0
        return;
3164
0
    if (w == NULL) {
3165
0
        Py_CLEAR(*pv);
3166
0
        return;
3167
0
    }
3168
3169
0
    if (_PyObject_IsUniquelyReferenced(*pv) && PyBytes_CheckExact(*pv)) {
3170
        /* Only one reference, so we can resize in place */
3171
0
        Py_ssize_t oldsize;
3172
0
        Py_buffer wb;
3173
3174
0
        if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
3175
0
            PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3176
0
                         Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3177
0
            Py_CLEAR(*pv);
3178
0
            return;
3179
0
        }
3180
3181
0
        oldsize = PyBytes_GET_SIZE(*pv);
3182
0
        if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3183
0
            PyErr_NoMemory();
3184
0
            goto error;
3185
0
        }
3186
0
        if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3187
0
            goto error;
3188
3189
0
        memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3190
0
        PyBuffer_Release(&wb);
3191
0
        return;
3192
3193
0
      error:
3194
0
        PyBuffer_Release(&wb);
3195
0
        Py_CLEAR(*pv);
3196
0
        return;
3197
0
    }
3198
3199
0
    else {
3200
        /* Multiple references, need to create new object */
3201
0
        PyObject *v;
3202
0
        v = bytes_concat(*pv, w);
3203
0
        Py_SETREF(*pv, v);
3204
0
    }
3205
0
}
3206
3207
void
3208
PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
3209
0
{
3210
0
    PyBytes_Concat(pv, w);
3211
0
    Py_XDECREF(w);
3212
0
}
3213
3214
3215
/* The following function breaks the notion that bytes are immutable:
3216
   it changes the size of a bytes object.  You can think of it
3217
   as creating a new bytes object and destroying the old one, only
3218
   more efficiently.
3219
   Note that if there's not enough memory to resize the bytes object, the
3220
   original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
3221
   memory" exception is set, and -1 is returned.  Else (on success) 0 is
3222
   returned, and the value in *pv may or may not be the same as on input.
3223
   As always, an extra byte is allocated for a trailing \0 byte (newsize
3224
   does *not* include that), and a trailing \0 byte is stored.
3225
*/
3226
3227
int
3228
_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3229
3.45M
{
3230
3.45M
    PyObject *v;
3231
3.45M
    PyBytesObject *sv;
3232
3.45M
    v = *pv;
3233
3.45M
    if (!PyBytes_Check(v) || newsize < 0) {
3234
0
        *pv = 0;
3235
0
        Py_DECREF(v);
3236
0
        PyErr_BadInternalCall();
3237
0
        return -1;
3238
0
    }
3239
3.45M
    Py_ssize_t oldsize = PyBytes_GET_SIZE(v);
3240
3.45M
    if (oldsize == newsize) {
3241
        /* return early if newsize equals to v->ob_size */
3242
17.8k
        return 0;
3243
17.8k
    }
3244
3.43M
    if (oldsize == 0) {
3245
1.50M
        *pv = _PyBytes_FromSize(newsize, 0);
3246
1.50M
        Py_DECREF(v);
3247
1.50M
        return (*pv == NULL) ? -1 : 0;
3248
1.50M
    }
3249
1.93M
    if (newsize == 0) {
3250
24.4k
        *pv = bytes_get_empty();
3251
24.4k
        Py_DECREF(v);
3252
24.4k
        return 0;
3253
24.4k
    }
3254
1.91M
    if (!_PyObject_IsUniquelyReferenced(v)) {
3255
0
        if (oldsize < newsize) {
3256
0
            *pv = _PyBytes_FromSize(newsize, 0);
3257
0
            if (*pv) {
3258
0
                memcpy(PyBytes_AS_STRING(*pv), PyBytes_AS_STRING(v), oldsize);
3259
0
            }
3260
0
        }
3261
0
        else {
3262
0
            *pv = PyBytes_FromStringAndSize(PyBytes_AS_STRING(v), newsize);
3263
0
        }
3264
0
        Py_DECREF(v);
3265
0
        return (*pv == NULL) ? -1 : 0;
3266
0
    }
3267
3268
#ifdef Py_TRACE_REFS
3269
    _Py_ForgetReference(v);
3270
#endif
3271
1.91M
    _PyReftracerTrack(v, PyRefTracer_DESTROY);
3272
1.91M
    *pv = (PyObject *)
3273
1.91M
        PyObject_Realloc(v, PyBytesObject_SIZE + newsize);
3274
1.91M
    if (*pv == NULL) {
3275
#ifdef Py_REF_DEBUG
3276
        _Py_DecRefTotal(_PyThreadState_GET());
3277
#endif
3278
0
        PyObject_Free(v);
3279
0
        PyErr_NoMemory();
3280
0
        return -1;
3281
0
    }
3282
1.91M
    _Py_NewReferenceNoTotal(*pv);
3283
1.91M
    sv = (PyBytesObject *) *pv;
3284
1.91M
    Py_SET_SIZE(sv, newsize);
3285
1.91M
    sv->ob_sval[newsize] = '\0';
3286
1.91M
    set_ob_shash(sv, -1);          /* invalidate cached hash value */
3287
1.91M
    return 0;
3288
1.91M
}
3289
3290
3291
/*********************** Bytes Iterator ****************************/
3292
3293
typedef struct {
3294
    PyObject_HEAD
3295
    Py_ssize_t it_index;
3296
    PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3297
} striterobject;
3298
3299
2.08k
#define _striterobject_CAST(op)  ((striterobject *)(op))
3300
3301
static void
3302
striter_dealloc(PyObject *op)
3303
76
{
3304
76
    striterobject *it = _striterobject_CAST(op);
3305
76
    _PyObject_GC_UNTRACK(it);
3306
76
    Py_XDECREF(it->it_seq);
3307
76
    PyObject_GC_Del(it);
3308
76
}
3309
3310
static int
3311
striter_traverse(PyObject *op, visitproc visit, void *arg)
3312
0
{
3313
0
    striterobject *it = _striterobject_CAST(op);
3314
0
    Py_VISIT(it->it_seq);
3315
0
    return 0;
3316
0
}
3317
3318
static PyObject *
3319
striter_next(PyObject *op)
3320
2.00k
{
3321
2.00k
    striterobject *it = _striterobject_CAST(op);
3322
2.00k
    PyBytesObject *seq;
3323
3324
2.00k
    assert(it != NULL);
3325
2.00k
    seq = it->it_seq;
3326
2.00k
    if (seq == NULL)
3327
0
        return NULL;
3328
2.00k
    assert(PyBytes_Check(seq));
3329
3330
2.00k
    if (it->it_index < PyBytes_GET_SIZE(seq)) {
3331
1.95k
        return _PyLong_FromUnsignedChar(
3332
1.95k
            (unsigned char)seq->ob_sval[it->it_index++]);
3333
1.95k
    }
3334
3335
48
    it->it_seq = NULL;
3336
48
    Py_DECREF(seq);
3337
48
    return NULL;
3338
2.00k
}
3339
3340
static PyObject *
3341
striter_len(PyObject *op, PyObject *Py_UNUSED(ignored))
3342
0
{
3343
0
    striterobject *it = _striterobject_CAST(op);
3344
0
    Py_ssize_t len = 0;
3345
0
    if (it->it_seq)
3346
0
        len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3347
0
    return PyLong_FromSsize_t(len);
3348
0
}
3349
3350
PyDoc_STRVAR(length_hint_doc,
3351
             "Private method returning an estimate of len(list(it)).");
3352
3353
static PyObject *
3354
striter_reduce(PyObject *op, PyObject *Py_UNUSED(ignored))
3355
0
{
3356
0
    PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));
3357
3358
    /* _PyEval_GetBuiltin can invoke arbitrary code,
3359
     * call must be before access of iterator pointers.
3360
     * see issue #101765 */
3361
0
    striterobject *it = _striterobject_CAST(op);
3362
0
    if (it->it_seq != NULL) {
3363
0
        return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
3364
0
    } else {
3365
0
        return Py_BuildValue("N(())", iter);
3366
0
    }
3367
0
}
3368
3369
PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3370
3371
static PyObject *
3372
striter_setstate(PyObject *op, PyObject *state)
3373
0
{
3374
0
    Py_ssize_t index = PyLong_AsSsize_t(state);
3375
0
    if (index == -1 && PyErr_Occurred())
3376
0
        return NULL;
3377
0
    striterobject *it = _striterobject_CAST(op);
3378
0
    if (it->it_seq != NULL) {
3379
0
        if (index < 0)
3380
0
            index = 0;
3381
0
        else if (index > PyBytes_GET_SIZE(it->it_seq))
3382
0
            index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3383
0
        it->it_index = index;
3384
0
    }
3385
0
    Py_RETURN_NONE;
3386
0
}
3387
3388
PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3389
3390
static PyMethodDef striter_methods[] = {
3391
    {"__length_hint__", striter_len, METH_NOARGS, length_hint_doc},
3392
    {"__reduce__",      striter_reduce, METH_NOARGS, reduce_doc},
3393
    {"__setstate__",    striter_setstate, METH_O, setstate_doc},
3394
    {NULL,              NULL}           /* sentinel */
3395
};
3396
3397
PyTypeObject PyBytesIter_Type = {
3398
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3399
    "bytes_iterator",                           /* tp_name */
3400
    sizeof(striterobject),                      /* tp_basicsize */
3401
    0,                                          /* tp_itemsize */
3402
    /* methods */
3403
    striter_dealloc,                            /* tp_dealloc */
3404
    0,                                          /* tp_vectorcall_offset */
3405
    0,                                          /* tp_getattr */
3406
    0,                                          /* tp_setattr */
3407
    0,                                          /* tp_as_async */
3408
    0,                                          /* tp_repr */
3409
    0,                                          /* tp_as_number */
3410
    0,                                          /* tp_as_sequence */
3411
    0,                                          /* tp_as_mapping */
3412
    0,                                          /* tp_hash */
3413
    0,                                          /* tp_call */
3414
    0,                                          /* tp_str */
3415
    PyObject_GenericGetAttr,                    /* tp_getattro */
3416
    0,                                          /* tp_setattro */
3417
    0,                                          /* tp_as_buffer */
3418
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3419
    0,                                          /* tp_doc */
3420
    striter_traverse,                           /* tp_traverse */
3421
    0,                                          /* tp_clear */
3422
    0,                                          /* tp_richcompare */
3423
    0,                                          /* tp_weaklistoffset */
3424
    PyObject_SelfIter,                          /* tp_iter */
3425
    striter_next,                               /* tp_iternext */
3426
    striter_methods,                            /* tp_methods */
3427
    0,
3428
};
3429
3430
static PyObject *
3431
bytes_iter(PyObject *seq)
3432
76
{
3433
76
    striterobject *it;
3434
3435
76
    if (!PyBytes_Check(seq)) {
3436
0
        PyErr_BadInternalCall();
3437
0
        return NULL;
3438
0
    }
3439
76
    it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3440
76
    if (it == NULL)
3441
0
        return NULL;
3442
76
    it->it_index = 0;
3443
76
    it->it_seq = (PyBytesObject *)Py_NewRef(seq);
3444
76
    _PyObject_GC_TRACK(it);
3445
76
    return (PyObject *)it;
3446
76
}
3447
3448
3449
void
3450
_PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
3451
    const char* src, Py_ssize_t len_src)
3452
54.6k
{
3453
54.6k
    if (len_dest == 0) {
3454
312
        return;
3455
312
    }
3456
54.3k
    if (len_src == 1) {
3457
51.9k
        memset(dest, src[0], len_dest);
3458
51.9k
    }
3459
2.36k
    else {
3460
2.36k
        if (src != dest) {
3461
2.36k
            memcpy(dest, src, len_src);
3462
2.36k
        }
3463
2.36k
        Py_ssize_t copied = len_src;
3464
5.36k
        while (copied < len_dest) {
3465
2.99k
            Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied);
3466
2.99k
            memcpy(dest + copied, dest, bytes_to_copy);
3467
2.99k
            copied += bytes_to_copy;
3468
2.99k
        }
3469
2.36k
    }
3470
54.3k
}
3471
3472
3473
// --- PyBytesWriter API -----------------------------------------------------
3474
3475
static inline char*
3476
byteswriter_data(PyBytesWriter *writer)
3477
17.1M
{
3478
17.1M
    return _PyBytesWriter_GetData(writer);
3479
17.1M
}
3480
3481
3482
static inline Py_ssize_t
3483
byteswriter_allocated(PyBytesWriter *writer)
3484
16.9M
{
3485
16.9M
    if (writer->obj == NULL) {
3486
16.1M
        return sizeof(writer->small_buffer);
3487
16.1M
    }
3488
778k
    else if (writer->use_bytearray) {
3489
0
        return PyByteArray_GET_SIZE(writer->obj);
3490
0
    }
3491
778k
    else {
3492
778k
        return PyBytes_GET_SIZE(writer->obj);
3493
778k
    }
3494
16.9M
}
3495
3496
3497
#ifdef MS_WINDOWS
3498
   /* On Windows, overallocate by 50% is the best factor */
3499
#  define OVERALLOCATE_FACTOR 2
3500
#else
3501
   /* On Linux, overallocate by 25% is the best factor */
3502
1.39k
#  define OVERALLOCATE_FACTOR 4
3503
#endif
3504
3505
static inline int
3506
byteswriter_resize(PyBytesWriter *writer, Py_ssize_t size, int resize)
3507
8.95M
{
3508
8.95M
    assert(size >= 0);
3509
3510
8.95M
    Py_ssize_t old_allocated = byteswriter_allocated(writer);
3511
8.95M
    if (size <= old_allocated) {
3512
8.01M
        return 0;
3513
8.01M
    }
3514
3515
945k
    if (resize & writer->overallocate) {
3516
699
        if (size <= (PY_SSIZE_T_MAX - size / OVERALLOCATE_FACTOR)) {
3517
699
            size += size / OVERALLOCATE_FACTOR;
3518
699
        }
3519
699
    }
3520
3521
945k
    if (writer->obj != NULL) {
3522
699
        if (writer->use_bytearray) {
3523
0
            if (PyByteArray_Resize(writer->obj, size)) {
3524
0
                return -1;
3525
0
            }
3526
0
        }
3527
699
        else {
3528
699
            if (_PyBytes_Resize(&writer->obj, size)) {
3529
0
                return -1;
3530
0
            }
3531
699
        }
3532
699
        assert(writer->obj != NULL);
3533
699
    }
3534
945k
    else if (writer->use_bytearray) {
3535
0
        writer->obj = PyByteArray_FromStringAndSize(NULL, size);
3536
0
        if (writer->obj == NULL) {
3537
0
            return -1;
3538
0
        }
3539
0
        if (resize) {
3540
0
            assert((size_t)size > sizeof(writer->small_buffer));
3541
0
            memcpy(PyByteArray_AS_STRING(writer->obj),
3542
0
                   writer->small_buffer,
3543
0
                   sizeof(writer->small_buffer));
3544
0
        }
3545
0
    }
3546
945k
    else {
3547
945k
        writer->obj = PyBytes_FromStringAndSize(NULL, size);
3548
945k
        if (writer->obj == NULL) {
3549
0
            return -1;
3550
0
        }
3551
945k
        if (resize) {
3552
0
            assert((size_t)size > sizeof(writer->small_buffer));
3553
0
            memcpy(PyBytes_AS_STRING(writer->obj),
3554
0
                   writer->small_buffer,
3555
0
                   sizeof(writer->small_buffer));
3556
0
        }
3557
945k
    }
3558
3559
#ifdef Py_DEBUG
3560
    Py_ssize_t allocated = byteswriter_allocated(writer);
3561
    if (resize && allocated > old_allocated) {
3562
        memset(byteswriter_data(writer) + old_allocated, 0xff,
3563
               allocated - old_allocated);
3564
    }
3565
#endif
3566
3567
945k
    return 0;
3568
945k
}
3569
3570
3571
static PyBytesWriter*
3572
byteswriter_create(Py_ssize_t size, int use_bytearray)
3573
8.97M
{
3574
8.97M
    if (size < 0) {
3575
0
        PyErr_SetString(PyExc_ValueError, "size must be >= 0");
3576
0
        return NULL;
3577
0
    }
3578
3579
8.97M
    PyBytesWriter *writer = _Py_FREELIST_POP_MEM(bytes_writers);
3580
8.97M
    if (writer == NULL) {
3581
2.75k
        writer = (PyBytesWriter *)PyMem_Malloc(sizeof(PyBytesWriter));
3582
2.75k
        if (writer == NULL) {
3583
0
            PyErr_NoMemory();
3584
0
            return NULL;
3585
0
        }
3586
2.75k
    }
3587
8.97M
    writer->obj = NULL;
3588
8.97M
    writer->size = 0;
3589
8.97M
    writer->use_bytearray = use_bytearray;
3590
8.97M
    writer->overallocate = !use_bytearray;
3591
3592
8.97M
    if (size >= 1) {
3593
8.95M
        if (byteswriter_resize(writer, size, 0) < 0) {
3594
0
            PyBytesWriter_Discard(writer);
3595
0
            return NULL;
3596
0
        }
3597
8.95M
        writer->size = size;
3598
8.95M
    }
3599
#ifdef Py_DEBUG
3600
    memset(byteswriter_data(writer), 0xff, byteswriter_allocated(writer));
3601
#endif
3602
8.97M
    return writer;
3603
8.97M
}
3604
3605
PyBytesWriter*
3606
PyBytesWriter_Create(Py_ssize_t size)
3607
8.97M
{
3608
8.97M
    return byteswriter_create(size, 0);
3609
8.97M
}
3610
3611
PyBytesWriter*
3612
_PyBytesWriter_CreateByteArray(Py_ssize_t size)
3613
0
{
3614
0
    return byteswriter_create(size, 1);
3615
0
}
3616
3617
3618
void
3619
PyBytesWriter_Discard(PyBytesWriter *writer)
3620
9.15M
{
3621
9.15M
    if (writer == NULL) {
3622
187k
        return;
3623
187k
    }
3624
3625
8.97M
    Py_XDECREF(writer->obj);
3626
8.97M
    _Py_FREELIST_FREE(bytes_writers, writer, PyMem_Free);
3627
8.97M
}
3628
3629
3630
PyObject*
3631
PyBytesWriter_FinishWithSize(PyBytesWriter *writer, Py_ssize_t size)
3632
8.50M
{
3633
8.50M
    PyObject *result;
3634
8.50M
    if (size == 0) {
3635
15.5k
        result = bytes_get_empty();
3636
15.5k
    }
3637
8.48M
    else if (writer->obj != NULL) {
3638
859k
        if (writer->use_bytearray) {
3639
0
            if (size != PyByteArray_GET_SIZE(writer->obj)) {
3640
0
                if (PyByteArray_Resize(writer->obj, size)) {
3641
0
                    goto error;
3642
0
                }
3643
0
            }
3644
0
        }
3645
859k
        else {
3646
859k
            if (size != PyBytes_GET_SIZE(writer->obj)) {
3647
785k
                if (_PyBytes_Resize(&writer->obj, size)) {
3648
0
                    goto error;
3649
0
                }
3650
785k
            }
3651
859k
        }
3652
859k
        result = writer->obj;
3653
859k
        writer->obj = NULL;
3654
859k
    }
3655
7.62M
    else if (writer->use_bytearray) {
3656
0
        result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3657
0
    }
3658
7.62M
    else {
3659
7.62M
        result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3660
7.62M
    }
3661
8.50M
    PyBytesWriter_Discard(writer);
3662
8.50M
    return result;
3663
3664
0
error:
3665
0
    PyBytesWriter_Discard(writer);
3666
0
    return NULL;
3667
8.50M
}
3668
3669
PyObject*
3670
PyBytesWriter_Finish(PyBytesWriter *writer)
3671
513k
{
3672
513k
    return PyBytesWriter_FinishWithSize(writer, writer->size);
3673
513k
}
3674
3675
3676
PyObject*
3677
PyBytesWriter_FinishWithPointer(PyBytesWriter *writer, void *buf)
3678
7.98M
{
3679
7.98M
    Py_ssize_t size = (char*)buf - byteswriter_data(writer);
3680
7.98M
    if (size < 0 || size > byteswriter_allocated(writer)) {
3681
0
        PyBytesWriter_Discard(writer);
3682
0
        PyErr_SetString(PyExc_ValueError, "invalid end pointer");
3683
0
        return NULL;
3684
0
    }
3685
3686
7.98M
    return PyBytesWriter_FinishWithSize(writer, size);
3687
7.98M
}
3688
3689
3690
void*
3691
PyBytesWriter_GetData(PyBytesWriter *writer)
3692
9.15M
{
3693
9.15M
    return byteswriter_data(writer);
3694
9.15M
}
3695
3696
3697
Py_ssize_t
3698
PyBytesWriter_GetSize(PyBytesWriter *writer)
3699
0
{
3700
0
    return _PyBytesWriter_GetSize(writer);
3701
0
}
3702
3703
3704
static Py_ssize_t
3705
_PyBytesWriter_GetAllocated(PyBytesWriter *writer)
3706
138
{
3707
138
    return byteswriter_allocated(writer);
3708
138
}
3709
3710
3711
int
3712
PyBytesWriter_Resize(PyBytesWriter *writer, Py_ssize_t size)
3713
0
{
3714
0
    if (size < 0) {
3715
0
        PyErr_SetString(PyExc_ValueError, "size must be >= 0");
3716
0
        return -1;
3717
0
    }
3718
0
    if (byteswriter_resize(writer, size, 1) < 0) {
3719
0
        return -1;
3720
0
    }
3721
0
    writer->size = size;
3722
0
    return 0;
3723
0
}
3724
3725
3726
static void*
3727
_PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size,
3728
                                      void *data)
3729
0
{
3730
0
    Py_ssize_t pos = (char*)data - byteswriter_data(writer);
3731
0
    if (PyBytesWriter_Resize(writer, size) < 0) {
3732
0
        return NULL;
3733
0
    }
3734
0
    return byteswriter_data(writer) + pos;
3735
0
}
3736
3737
3738
int
3739
PyBytesWriter_Grow(PyBytesWriter *writer, Py_ssize_t size)
3740
699
{
3741
699
    if (size < 0 && writer->size + size < 0) {
3742
0
        PyErr_SetString(PyExc_ValueError, "invalid size");
3743
0
        return -1;
3744
0
    }
3745
699
    if (size > PY_SSIZE_T_MAX - writer->size) {
3746
0
        PyErr_NoMemory();
3747
0
        return -1;
3748
0
    }
3749
699
    size = writer->size + size;
3750
3751
699
    if (byteswriter_resize(writer, size, 1) < 0) {
3752
0
        return -1;
3753
0
    }
3754
699
    writer->size = size;
3755
699
    return 0;
3756
699
}
3757
3758
3759
void*
3760
PyBytesWriter_GrowAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size,
3761
                                   void *buf)
3762
0
{
3763
0
    Py_ssize_t pos = (char*)buf - byteswriter_data(writer);
3764
0
    if (PyBytesWriter_Grow(writer, size) < 0) {
3765
0
        return NULL;
3766
0
    }
3767
0
    return byteswriter_data(writer) + pos;
3768
0
}
3769
3770
3771
int
3772
PyBytesWriter_WriteBytes(PyBytesWriter *writer,
3773
                         const void *bytes, Py_ssize_t size)
3774
0
{
3775
0
    if (size < 0) {
3776
0
        size_t len = strlen(bytes);
3777
0
        if (len > (size_t)PY_SSIZE_T_MAX) {
3778
0
            PyErr_NoMemory();
3779
0
            return -1;
3780
0
        }
3781
0
        size = (Py_ssize_t)len;
3782
0
    }
3783
3784
0
    Py_ssize_t pos = writer->size;
3785
0
    if (PyBytesWriter_Grow(writer, size) < 0) {
3786
0
        return -1;
3787
0
    }
3788
0
    char *buf = byteswriter_data(writer);
3789
0
    memcpy(buf + pos, bytes, size);
3790
0
    return 0;
3791
0
}
3792
3793
3794
int
3795
PyBytesWriter_Format(PyBytesWriter *writer, const char *format, ...)
3796
0
{
3797
0
    Py_ssize_t pos = writer->size;
3798
0
    if (PyBytesWriter_Grow(writer, strlen(format)) < 0) {
3799
0
        return -1;
3800
0
    }
3801
3802
0
    va_list vargs;
3803
0
    va_start(vargs, format);
3804
0
    char *buf = bytes_fromformat(writer, pos, format, vargs);
3805
0
    va_end(vargs);
3806
3807
0
    Py_ssize_t size = buf - byteswriter_data(writer);
3808
0
    return PyBytesWriter_Resize(writer, size);
3809
0
}