Coverage Report

Created: 2025-07-11 06:59

/src/Python-3.8.3/Objects/bytesobject.c
Line
Count
Source (jump to first uncovered line)
1
/* bytes object implementation */
2
3
#define PY_SSIZE_T_CLEAN
4
5
#include "Python.h"
6
#include "pycore_object.h"
7
#include "pycore_pymem.h"
8
#include "pycore_pystate.h"
9
10
#include "bytes_methods.h"
11
#include "pystrhex.h"
12
#include <stddef.h>
13
14
/*[clinic input]
15
class bytes "PyBytesObject *" "&PyBytes_Type"
16
[clinic start generated code]*/
17
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
18
19
#include "clinic/bytesobject.c.h"
20
21
#ifdef COUNT_ALLOCS
22
Py_ssize_t _Py_null_strings, _Py_one_strings;
23
#endif
24
25
static PyBytesObject *characters[UCHAR_MAX + 1];
26
static PyBytesObject *nullstring;
27
28
/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
29
   for a string of length n should request PyBytesObject_SIZE + n bytes.
30
31
   Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
32
   3 bytes per string allocation on a typical system.
33
*/
34
342k
#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
35
36
/* Forward declaration */
37
Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
38
                                                   char *str);
39
40
/*
41
   For PyBytes_FromString(), the parameter `str' points to a null-terminated
42
   string containing exactly `size' bytes.
43
44
   For PyBytes_FromStringAndSize(), the parameter `str' is
45
   either NULL or else points to a string containing at least `size' bytes.
46
   For PyBytes_FromStringAndSize(), the string in the `str' parameter does
47
   not have to be null-terminated.  (Therefore it is safe to construct a
48
   substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
49
   If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
50
   bytes (setting the last byte to the null terminating character) and you can
51
   fill in the data yourself.  If `str' is non-NULL then the resulting
52
   PyBytes object must be treated as immutable and you must not fill in nor
53
   alter the data yourself, since the strings may be shared.
54
55
   The PyObject member `op->ob_size', which denotes the number of "extra
56
   items" in a variable-size object, will contain the number of bytes
57
   allocated for string data, not counting the null terminating character.
58
   It is therefore equal to the `size' parameter (for
59
   PyBytes_FromStringAndSize()) or the length of the string in the `str'
60
   parameter (for PyBytes_FromString()).
61
*/
62
static PyObject *
63
_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
64
221k
{
65
221k
    PyBytesObject *op;
66
221k
    assert(size >= 0);
67
68
221k
    if (size == 0 && (op = nullstring) != NULL) {
69
#ifdef COUNT_ALLOCS
70
        _Py_null_strings++;
71
#endif
72
50.8k
        Py_INCREF(op);
73
50.8k
        return (PyObject *)op;
74
50.8k
    }
75
76
170k
    if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
77
0
        PyErr_SetString(PyExc_OverflowError,
78
0
                        "byte string is too large");
79
0
        return NULL;
80
0
    }
81
82
    /* Inline PyObject_NewVar */
83
170k
    if (use_calloc)
84
0
        op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
85
170k
    else
86
170k
        op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
87
170k
    if (op == NULL)
88
0
        return PyErr_NoMemory();
89
170k
    (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
90
170k
    op->ob_shash = -1;
91
170k
    if (!use_calloc)
92
170k
        op->ob_sval[size] = '\0';
93
    /* empty byte string singleton */
94
170k
    if (size == 0) {
95
14
        nullstring = op;
96
14
        Py_INCREF(op);
97
14
    }
98
170k
    return (PyObject *) op;
99
170k
}
100
101
PyObject *
102
PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
103
238k
{
104
238k
    PyBytesObject *op;
105
238k
    if (size < 0) {
106
0
        PyErr_SetString(PyExc_SystemError,
107
0
            "Negative size passed to PyBytes_FromStringAndSize");
108
0
        return NULL;
109
0
    }
110
238k
    if (size == 1 && str != NULL &&
111
238k
        (op = characters[*str & UCHAR_MAX]) != NULL)
112
16.9k
    {
113
#ifdef COUNT_ALLOCS
114
        _Py_one_strings++;
115
#endif
116
16.9k
        Py_INCREF(op);
117
16.9k
        return (PyObject *)op;
118
16.9k
    }
119
120
221k
    op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
121
221k
    if (op == NULL)
122
0
        return NULL;
123
221k
    if (str == NULL)
124
26.1k
        return (PyObject *) op;
125
126
195k
    memcpy(op->ob_sval, str, size);
127
    /* share short strings */
128
195k
    if (size == 1) {
129
340
        characters[*str & UCHAR_MAX] = op;
130
340
        Py_INCREF(op);
131
340
    }
132
195k
    return (PyObject *) op;
133
221k
}
134
135
PyObject *
136
PyBytes_FromString(const char *str)
137
350
{
138
350
    size_t size;
139
350
    PyBytesObject *op;
140
141
350
    assert(str != NULL);
142
350
    size = strlen(str);
143
350
    if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
144
0
        PyErr_SetString(PyExc_OverflowError,
145
0
            "byte string is too long");
146
0
        return NULL;
147
0
    }
148
350
    if (size == 0 && (op = nullstring) != NULL) {
149
#ifdef COUNT_ALLOCS
150
        _Py_null_strings++;
151
#endif
152
0
        Py_INCREF(op);
153
0
        return (PyObject *)op;
154
0
    }
155
350
    if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
156
#ifdef COUNT_ALLOCS
157
        _Py_one_strings++;
158
#endif
159
0
        Py_INCREF(op);
160
0
        return (PyObject *)op;
161
0
    }
162
163
    /* Inline PyObject_NewVar */
164
350
    op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
165
350
    if (op == NULL)
166
0
        return PyErr_NoMemory();
167
350
    (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
168
350
    op->ob_shash = -1;
169
350
    memcpy(op->ob_sval, str, size+1);
170
    /* share short strings */
171
350
    if (size == 0) {
172
0
        nullstring = op;
173
0
        Py_INCREF(op);
174
350
    } else if (size == 1) {
175
0
        characters[*str & UCHAR_MAX] = op;
176
0
        Py_INCREF(op);
177
0
    }
178
350
    return (PyObject *) op;
179
350
}
180
181
PyObject *
182
PyBytes_FromFormatV(const char *format, va_list vargs)
183
0
{
184
0
    char *s;
185
0
    const char *f;
186
0
    const char *p;
187
0
    Py_ssize_t prec;
188
0
    int longflag;
189
0
    int size_tflag;
190
    /* Longest 64-bit formatted numbers:
191
       - "18446744073709551615\0" (21 bytes)
192
       - "-9223372036854775808\0" (21 bytes)
193
       Decimal takes the most space (it isn't enough for octal.)
194
195
       Longest 64-bit pointer representation:
196
       "0xffffffffffffffff\0" (19 bytes). */
197
0
    char buffer[21];
198
0
    _PyBytesWriter writer;
199
200
0
    _PyBytesWriter_Init(&writer);
201
202
0
    s = _PyBytesWriter_Alloc(&writer, strlen(format));
203
0
    if (s == NULL)
204
0
        return NULL;
205
0
    writer.overallocate = 1;
206
207
0
#define WRITE_BYTES(str) \
208
0
    do { \
209
0
        s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
210
0
        if (s == NULL) \
211
0
            goto error; \
212
0
    } while (0)
213
214
0
    for (f = format; *f; f++) {
215
0
        if (*f != '%') {
216
0
            *s++ = *f;
217
0
            continue;
218
0
        }
219
220
0
        p = f++;
221
222
        /* ignore the width (ex: 10 in "%10s") */
223
0
        while (Py_ISDIGIT(*f))
224
0
            f++;
225
226
        /* parse the precision (ex: 10 in "%.10s") */
227
0
        prec = 0;
228
0
        if (*f == '.') {
229
0
            f++;
230
0
            for (; Py_ISDIGIT(*f); f++) {
231
0
                prec = (prec * 10) + (*f - '0');
232
0
            }
233
0
        }
234
235
0
        while (*f && *f != '%' && !Py_ISALPHA(*f))
236
0
            f++;
237
238
        /* handle the long flag ('l'), but only for %ld and %lu.
239
           others can be added when necessary. */
240
0
        longflag = 0;
241
0
        if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
242
0
            longflag = 1;
243
0
            ++f;
244
0
        }
245
246
        /* handle the size_t flag ('z'). */
247
0
        size_tflag = 0;
248
0
        if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
249
0
            size_tflag = 1;
250
0
            ++f;
251
0
        }
252
253
        /* subtract bytes preallocated for the format string
254
           (ex: 2 for "%s") */
255
0
        writer.min_size -= (f - p + 1);
256
257
0
        switch (*f) {
258
0
        case 'c':
259
0
        {
260
0
            int c = va_arg(vargs, int);
261
0
            if (c < 0 || c > 255) {
262
0
                PyErr_SetString(PyExc_OverflowError,
263
0
                                "PyBytes_FromFormatV(): %c format "
264
0
                                "expects an integer in range [0; 255]");
265
0
                goto error;
266
0
            }
267
0
            writer.min_size++;
268
0
            *s++ = (unsigned char)c;
269
0
            break;
270
0
        }
271
272
0
        case 'd':
273
0
            if (longflag)
274
0
                sprintf(buffer, "%ld", va_arg(vargs, long));
275
0
            else if (size_tflag)
276
0
                sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
277
0
                    va_arg(vargs, Py_ssize_t));
278
0
            else
279
0
                sprintf(buffer, "%d", va_arg(vargs, int));
280
0
            assert(strlen(buffer) < sizeof(buffer));
281
0
            WRITE_BYTES(buffer);
282
0
            break;
283
284
0
        case 'u':
285
0
            if (longflag)
286
0
                sprintf(buffer, "%lu",
287
0
                    va_arg(vargs, unsigned long));
288
0
            else if (size_tflag)
289
0
                sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
290
0
                    va_arg(vargs, size_t));
291
0
            else
292
0
                sprintf(buffer, "%u",
293
0
                    va_arg(vargs, unsigned int));
294
0
            assert(strlen(buffer) < sizeof(buffer));
295
0
            WRITE_BYTES(buffer);
296
0
            break;
297
298
0
        case 'i':
299
0
            sprintf(buffer, "%i", va_arg(vargs, int));
300
0
            assert(strlen(buffer) < sizeof(buffer));
301
0
            WRITE_BYTES(buffer);
302
0
            break;
303
304
0
        case 'x':
305
0
            sprintf(buffer, "%x", va_arg(vargs, int));
306
0
            assert(strlen(buffer) < sizeof(buffer));
307
0
            WRITE_BYTES(buffer);
308
0
            break;
309
310
0
        case 's':
311
0
        {
312
0
            Py_ssize_t i;
313
314
0
            p = va_arg(vargs, const char*);
315
0
            if (prec <= 0) {
316
0
                i = strlen(p);
317
0
            }
318
0
            else {
319
0
                i = 0;
320
0
                while (i < prec && p[i]) {
321
0
                    i++;
322
0
                }
323
0
            }
324
0
            s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
325
0
            if (s == NULL)
326
0
                goto error;
327
0
            break;
328
0
        }
329
330
0
        case 'p':
331
0
            sprintf(buffer, "%p", va_arg(vargs, void*));
332
0
            assert(strlen(buffer) < sizeof(buffer));
333
            /* %p is ill-defined:  ensure leading 0x. */
334
0
            if (buffer[1] == 'X')
335
0
                buffer[1] = 'x';
336
0
            else if (buffer[1] != 'x') {
337
0
                memmove(buffer+2, buffer, strlen(buffer)+1);
338
0
                buffer[0] = '0';
339
0
                buffer[1] = 'x';
340
0
            }
341
0
            WRITE_BYTES(buffer);
342
0
            break;
343
344
0
        case '%':
345
0
            writer.min_size++;
346
0
            *s++ = '%';
347
0
            break;
348
349
0
        default:
350
0
            if (*f == 0) {
351
                /* fix min_size if we reached the end of the format string */
352
0
                writer.min_size++;
353
0
            }
354
355
            /* invalid format string: copy unformatted string and exit */
356
0
            WRITE_BYTES(p);
357
0
            return _PyBytesWriter_Finish(&writer, s);
358
0
        }
359
0
    }
360
361
0
#undef WRITE_BYTES
362
363
0
    return _PyBytesWriter_Finish(&writer, s);
364
365
0
 error:
366
0
    _PyBytesWriter_Dealloc(&writer);
367
0
    return NULL;
368
0
}
369
370
PyObject *
371
PyBytes_FromFormat(const char *format, ...)
372
0
{
373
0
    PyObject* ret;
374
0
    va_list vargs;
375
376
0
#ifdef HAVE_STDARG_PROTOTYPES
377
0
    va_start(vargs, format);
378
#else
379
    va_start(vargs);
380
#endif
381
0
    ret = PyBytes_FromFormatV(format, vargs);
382
0
    va_end(vargs);
383
0
    return ret;
384
0
}
385
386
/* Helpers for formatstring */
387
388
Py_LOCAL_INLINE(PyObject *)
389
getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
390
0
{
391
0
    Py_ssize_t argidx = *p_argidx;
392
0
    if (argidx < arglen) {
393
0
        (*p_argidx)++;
394
0
        if (arglen < 0)
395
0
            return args;
396
0
        else
397
0
            return PyTuple_GetItem(args, argidx);
398
0
    }
399
0
    PyErr_SetString(PyExc_TypeError,
400
0
                    "not enough arguments for format string");
401
0
    return NULL;
402
0
}
403
404
/* Format codes
405
 * F_LJUST      '-'
406
 * F_SIGN       '+'
407
 * F_BLANK      ' '
408
 * F_ALT        '#'
409
 * F_ZERO       '0'
410
 */
411
0
#define F_LJUST (1<<0)
412
0
#define F_SIGN  (1<<1)
413
0
#define F_BLANK (1<<2)
414
0
#define F_ALT   (1<<3)
415
0
#define F_ZERO  (1<<4)
416
417
/* Returns a new reference to a PyBytes object, or NULL on failure. */
418
419
static char*
420
formatfloat(PyObject *v, int flags, int prec, int type,
421
            PyObject **p_result, _PyBytesWriter *writer, char *str)
422
0
{
423
0
    char *p;
424
0
    PyObject *result;
425
0
    double x;
426
0
    size_t len;
427
428
0
    x = PyFloat_AsDouble(v);
429
0
    if (x == -1.0 && PyErr_Occurred()) {
430
0
        PyErr_Format(PyExc_TypeError, "float argument required, "
431
0
                     "not %.200s", Py_TYPE(v)->tp_name);
432
0
        return NULL;
433
0
    }
434
435
0
    if (prec < 0)
436
0
        prec = 6;
437
438
0
    p = PyOS_double_to_string(x, type, prec,
439
0
                              (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
440
441
0
    if (p == NULL)
442
0
        return NULL;
443
444
0
    len = strlen(p);
445
0
    if (writer != NULL) {
446
0
        str = _PyBytesWriter_Prepare(writer, str, len);
447
0
        if (str == NULL)
448
0
            return NULL;
449
0
        memcpy(str, p, len);
450
0
        PyMem_Free(p);
451
0
        str += len;
452
0
        return str;
453
0
    }
454
455
0
    result = PyBytes_FromStringAndSize(p, len);
456
0
    PyMem_Free(p);
457
0
    *p_result = result;
458
0
    return result != NULL ? str : NULL;
459
0
}
460
461
static PyObject *
462
formatlong(PyObject *v, int flags, int prec, int type)
463
0
{
464
0
    PyObject *result, *iobj;
465
0
    if (type == 'i')
466
0
        type = 'd';
467
0
    if (PyLong_Check(v))
468
0
        return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
469
0
    if (PyNumber_Check(v)) {
470
        /* make sure number is a type of integer for o, x, and X */
471
0
        if (type == 'o' || type == 'x' || type == 'X')
472
0
            iobj = PyNumber_Index(v);
473
0
        else
474
0
            iobj = PyNumber_Long(v);
475
0
        if (iobj == NULL) {
476
0
            if (!PyErr_ExceptionMatches(PyExc_TypeError))
477
0
                return NULL;
478
0
        }
479
0
        else if (!PyLong_Check(iobj))
480
0
            Py_CLEAR(iobj);
481
0
        if (iobj != NULL) {
482
0
            result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
483
0
            Py_DECREF(iobj);
484
0
            return result;
485
0
        }
486
0
    }
487
0
    PyErr_Format(PyExc_TypeError,
488
0
        "%%%c format: %s is required, not %.200s", type,
489
0
        (type == 'o' || type == 'x' || type == 'X') ? "an integer"
490
0
                                                    : "a number",
491
0
        Py_TYPE(v)->tp_name);
492
0
    return NULL;
493
0
}
494
495
static int
496
byte_converter(PyObject *arg, char *p)
497
0
{
498
0
    if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
499
0
        *p = PyBytes_AS_STRING(arg)[0];
500
0
        return 1;
501
0
    }
502
0
    else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
503
0
        *p = PyByteArray_AS_STRING(arg)[0];
504
0
        return 1;
505
0
    }
506
0
    else {
507
0
        PyObject *iobj;
508
0
        long ival;
509
0
        int overflow;
510
        /* make sure number is a type of integer */
511
0
        if (PyLong_Check(arg)) {
512
0
            ival = PyLong_AsLongAndOverflow(arg, &overflow);
513
0
        }
514
0
        else {
515
0
            iobj = PyNumber_Index(arg);
516
0
            if (iobj == NULL) {
517
0
                if (!PyErr_ExceptionMatches(PyExc_TypeError))
518
0
                    return 0;
519
0
                goto onError;
520
0
            }
521
0
            ival = PyLong_AsLongAndOverflow(iobj, &overflow);
522
0
            Py_DECREF(iobj);
523
0
        }
524
0
        if (!overflow && ival == -1 && PyErr_Occurred())
525
0
            goto onError;
526
0
        if (overflow || !(0 <= ival && ival <= 255)) {
527
0
            PyErr_SetString(PyExc_OverflowError,
528
0
                            "%c arg not in range(256)");
529
0
            return 0;
530
0
        }
531
0
        *p = (char)ival;
532
0
        return 1;
533
0
    }
534
0
  onError:
535
0
    PyErr_SetString(PyExc_TypeError,
536
0
        "%c requires an integer in range(256) or a single byte");
537
0
    return 0;
538
0
}
539
540
static PyObject *_PyBytes_FromBuffer(PyObject *x);
541
542
static PyObject *
543
format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
544
0
{
545
0
    PyObject *func, *result;
546
0
    _Py_IDENTIFIER(__bytes__);
547
    /* is it a bytes object? */
548
0
    if (PyBytes_Check(v)) {
549
0
        *pbuf = PyBytes_AS_STRING(v);
550
0
        *plen = PyBytes_GET_SIZE(v);
551
0
        Py_INCREF(v);
552
0
        return v;
553
0
    }
554
0
    if (PyByteArray_Check(v)) {
555
0
        *pbuf = PyByteArray_AS_STRING(v);
556
0
        *plen = PyByteArray_GET_SIZE(v);
557
0
        Py_INCREF(v);
558
0
        return v;
559
0
    }
560
    /* does it support __bytes__? */
561
0
    func = _PyObject_LookupSpecial(v, &PyId___bytes__);
562
0
    if (func != NULL) {
563
0
        result = _PyObject_CallNoArg(func);
564
0
        Py_DECREF(func);
565
0
        if (result == NULL)
566
0
            return NULL;
567
0
        if (!PyBytes_Check(result)) {
568
0
            PyErr_Format(PyExc_TypeError,
569
0
                         "__bytes__ returned non-bytes (type %.200s)",
570
0
                         Py_TYPE(result)->tp_name);
571
0
            Py_DECREF(result);
572
0
            return NULL;
573
0
        }
574
0
        *pbuf = PyBytes_AS_STRING(result);
575
0
        *plen = PyBytes_GET_SIZE(result);
576
0
        return result;
577
0
    }
578
    /* does it support buffer protocol? */
579
0
    if (PyObject_CheckBuffer(v)) {
580
        /* maybe we can avoid making a copy of the buffer object here? */
581
0
        result = _PyBytes_FromBuffer(v);
582
0
        if (result == NULL)
583
0
            return NULL;
584
0
        *pbuf = PyBytes_AS_STRING(result);
585
0
        *plen = PyBytes_GET_SIZE(result);
586
0
        return result;
587
0
    }
588
0
    PyErr_Format(PyExc_TypeError,
589
0
                 "%%b requires a bytes-like object, "
590
0
                 "or an object that implements __bytes__, not '%.100s'",
591
0
                 Py_TYPE(v)->tp_name);
592
0
    return NULL;
593
0
}
594
595
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
596
597
PyObject *
598
_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
599
                  PyObject *args, int use_bytearray)
600
0
{
601
0
    const char *fmt;
602
0
    char *res;
603
0
    Py_ssize_t arglen, argidx;
604
0
    Py_ssize_t fmtcnt;
605
0
    int args_owned = 0;
606
0
    PyObject *dict = NULL;
607
0
    _PyBytesWriter writer;
608
609
0
    if (args == NULL) {
610
0
        PyErr_BadInternalCall();
611
0
        return NULL;
612
0
    }
613
0
    fmt = format;
614
0
    fmtcnt = format_len;
615
616
0
    _PyBytesWriter_Init(&writer);
617
0
    writer.use_bytearray = use_bytearray;
618
619
0
    res = _PyBytesWriter_Alloc(&writer, fmtcnt);
620
0
    if (res == NULL)
621
0
        return NULL;
622
0
    if (!use_bytearray)
623
0
        writer.overallocate = 1;
624
625
0
    if (PyTuple_Check(args)) {
626
0
        arglen = PyTuple_GET_SIZE(args);
627
0
        argidx = 0;
628
0
    }
629
0
    else {
630
0
        arglen = -1;
631
0
        argidx = -2;
632
0
    }
633
0
    if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
634
0
        !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
635
0
        !PyByteArray_Check(args)) {
636
0
            dict = args;
637
0
    }
638
639
0
    while (--fmtcnt >= 0) {
640
0
        if (*fmt != '%') {
641
0
            Py_ssize_t len;
642
0
            char *pos;
643
644
0
            pos = (char *)memchr(fmt + 1, '%', fmtcnt);
645
0
            if (pos != NULL)
646
0
                len = pos - fmt;
647
0
            else
648
0
                len = fmtcnt + 1;
649
0
            assert(len != 0);
650
651
0
            memcpy(res, fmt, len);
652
0
            res += len;
653
0
            fmt += len;
654
0
            fmtcnt -= (len - 1);
655
0
        }
656
0
        else {
657
            /* Got a format specifier */
658
0
            int flags = 0;
659
0
            Py_ssize_t width = -1;
660
0
            int prec = -1;
661
0
            int c = '\0';
662
0
            int fill;
663
0
            PyObject *v = NULL;
664
0
            PyObject *temp = NULL;
665
0
            const char *pbuf = NULL;
666
0
            int sign;
667
0
            Py_ssize_t len = 0;
668
0
            char onechar; /* For byte_converter() */
669
0
            Py_ssize_t alloc;
670
671
0
            fmt++;
672
0
            if (*fmt == '%') {
673
0
                *res++ = '%';
674
0
                fmt++;
675
0
                fmtcnt--;
676
0
                continue;
677
0
            }
678
0
            if (*fmt == '(') {
679
0
                const char *keystart;
680
0
                Py_ssize_t keylen;
681
0
                PyObject *key;
682
0
                int pcount = 1;
683
684
0
                if (dict == NULL) {
685
0
                    PyErr_SetString(PyExc_TypeError,
686
0
                             "format requires a mapping");
687
0
                    goto error;
688
0
                }
689
0
                ++fmt;
690
0
                --fmtcnt;
691
0
                keystart = fmt;
692
                /* Skip over balanced parentheses */
693
0
                while (pcount > 0 && --fmtcnt >= 0) {
694
0
                    if (*fmt == ')')
695
0
                        --pcount;
696
0
                    else if (*fmt == '(')
697
0
                        ++pcount;
698
0
                    fmt++;
699
0
                }
700
0
                keylen = fmt - keystart - 1;
701
0
                if (fmtcnt < 0 || pcount > 0) {
702
0
                    PyErr_SetString(PyExc_ValueError,
703
0
                               "incomplete format key");
704
0
                    goto error;
705
0
                }
706
0
                key = PyBytes_FromStringAndSize(keystart,
707
0
                                                 keylen);
708
0
                if (key == NULL)
709
0
                    goto error;
710
0
                if (args_owned) {
711
0
                    Py_DECREF(args);
712
0
                    args_owned = 0;
713
0
                }
714
0
                args = PyObject_GetItem(dict, key);
715
0
                Py_DECREF(key);
716
0
                if (args == NULL) {
717
0
                    goto error;
718
0
                }
719
0
                args_owned = 1;
720
0
                arglen = -1;
721
0
                argidx = -2;
722
0
            }
723
724
            /* Parse flags. Example: "%+i" => flags=F_SIGN. */
725
0
            while (--fmtcnt >= 0) {
726
0
                switch (c = *fmt++) {
727
0
                case '-': flags |= F_LJUST; continue;
728
0
                case '+': flags |= F_SIGN; continue;
729
0
                case ' ': flags |= F_BLANK; continue;
730
0
                case '#': flags |= F_ALT; continue;
731
0
                case '0': flags |= F_ZERO; continue;
732
0
                }
733
0
                break;
734
0
            }
735
736
            /* Parse width. Example: "%10s" => width=10 */
737
0
            if (c == '*') {
738
0
                v = getnextarg(args, arglen, &argidx);
739
0
                if (v == NULL)
740
0
                    goto error;
741
0
                if (!PyLong_Check(v)) {
742
0
                    PyErr_SetString(PyExc_TypeError,
743
0
                                    "* wants int");
744
0
                    goto error;
745
0
                }
746
0
                width = PyLong_AsSsize_t(v);
747
0
                if (width == -1 && PyErr_Occurred())
748
0
                    goto error;
749
0
                if (width < 0) {
750
0
                    flags |= F_LJUST;
751
0
                    width = -width;
752
0
                }
753
0
                if (--fmtcnt >= 0)
754
0
                    c = *fmt++;
755
0
            }
756
0
            else if (c >= 0 && isdigit(c)) {
757
0
                width = c - '0';
758
0
                while (--fmtcnt >= 0) {
759
0
                    c = Py_CHARMASK(*fmt++);
760
0
                    if (!isdigit(c))
761
0
                        break;
762
0
                    if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
763
0
                        PyErr_SetString(
764
0
                            PyExc_ValueError,
765
0
                            "width too big");
766
0
                        goto error;
767
0
                    }
768
0
                    width = width*10 + (c - '0');
769
0
                }
770
0
            }
771
772
            /* Parse precision. Example: "%.3f" => prec=3 */
773
0
            if (c == '.') {
774
0
                prec = 0;
775
0
                if (--fmtcnt >= 0)
776
0
                    c = *fmt++;
777
0
                if (c == '*') {
778
0
                    v = getnextarg(args, arglen, &argidx);
779
0
                    if (v == NULL)
780
0
                        goto error;
781
0
                    if (!PyLong_Check(v)) {
782
0
                        PyErr_SetString(
783
0
                            PyExc_TypeError,
784
0
                            "* wants int");
785
0
                        goto error;
786
0
                    }
787
0
                    prec = _PyLong_AsInt(v);
788
0
                    if (prec == -1 && PyErr_Occurred())
789
0
                        goto error;
790
0
                    if (prec < 0)
791
0
                        prec = 0;
792
0
                    if (--fmtcnt >= 0)
793
0
                        c = *fmt++;
794
0
                }
795
0
                else if (c >= 0 && isdigit(c)) {
796
0
                    prec = c - '0';
797
0
                    while (--fmtcnt >= 0) {
798
0
                        c = Py_CHARMASK(*fmt++);
799
0
                        if (!isdigit(c))
800
0
                            break;
801
0
                        if (prec > (INT_MAX - ((int)c - '0')) / 10) {
802
0
                            PyErr_SetString(
803
0
                                PyExc_ValueError,
804
0
                                "prec too big");
805
0
                            goto error;
806
0
                        }
807
0
                        prec = prec*10 + (c - '0');
808
0
                    }
809
0
                }
810
0
            } /* prec */
811
0
            if (fmtcnt >= 0) {
812
0
                if (c == 'h' || c == 'l' || c == 'L') {
813
0
                    if (--fmtcnt >= 0)
814
0
                        c = *fmt++;
815
0
                }
816
0
            }
817
0
            if (fmtcnt < 0) {
818
0
                PyErr_SetString(PyExc_ValueError,
819
0
                                "incomplete format");
820
0
                goto error;
821
0
            }
822
0
            v = getnextarg(args, arglen, &argidx);
823
0
            if (v == NULL)
824
0
                goto error;
825
826
0
            if (fmtcnt == 0) {
827
                /* last write: disable writer overallocation */
828
0
                writer.overallocate = 0;
829
0
            }
830
831
0
            sign = 0;
832
0
            fill = ' ';
833
0
            switch (c) {
834
0
            case 'r':
835
                // %r is only for 2/3 code; 3 only code should use %a
836
0
            case 'a':
837
0
                temp = PyObject_ASCII(v);
838
0
                if (temp == NULL)
839
0
                    goto error;
840
0
                assert(PyUnicode_IS_ASCII(temp));
841
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
842
0
                len = PyUnicode_GET_LENGTH(temp);
843
0
                if (prec >= 0 && len > prec)
844
0
                    len = prec;
845
0
                break;
846
847
0
            case 's':
848
                // %s is only for 2/3 code; 3 only code should use %b
849
0
            case 'b':
850
0
                temp = format_obj(v, &pbuf, &len);
851
0
                if (temp == NULL)
852
0
                    goto error;
853
0
                if (prec >= 0 && len > prec)
854
0
                    len = prec;
855
0
                break;
856
857
0
            case 'i':
858
0
            case 'd':
859
0
            case 'u':
860
0
            case 'o':
861
0
            case 'x':
862
0
            case 'X':
863
0
                if (PyLong_CheckExact(v)
864
0
                    && width == -1 && prec == -1
865
0
                    && !(flags & (F_SIGN | F_BLANK))
866
0
                    && c != 'X')
867
0
                {
868
                    /* Fast path */
869
0
                    int alternate = flags & F_ALT;
870
0
                    int base;
871
872
0
                    switch(c)
873
0
                    {
874
0
                        default:
875
0
                            Py_UNREACHABLE();
876
0
                        case 'd':
877
0
                        case 'i':
878
0
                        case 'u':
879
0
                            base = 10;
880
0
                            break;
881
0
                        case 'o':
882
0
                            base = 8;
883
0
                            break;
884
0
                        case 'x':
885
0
                        case 'X':
886
0
                            base = 16;
887
0
                            break;
888
0
                    }
889
890
                    /* Fast path */
891
0
                    writer.min_size -= 2; /* size preallocated for "%d" */
892
0
                    res = _PyLong_FormatBytesWriter(&writer, res,
893
0
                                                    v, base, alternate);
894
0
                    if (res == NULL)
895
0
                        goto error;
896
0
                    continue;
897
0
                }
898
899
0
                temp = formatlong(v, flags, prec, c);
900
0
                if (!temp)
901
0
                    goto error;
902
0
                assert(PyUnicode_IS_ASCII(temp));
903
0
                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
904
0
                len = PyUnicode_GET_LENGTH(temp);
905
0
                sign = 1;
906
0
                if (flags & F_ZERO)
907
0
                    fill = '0';
908
0
                break;
909
910
0
            case 'e':
911
0
            case 'E':
912
0
            case 'f':
913
0
            case 'F':
914
0
            case 'g':
915
0
            case 'G':
916
0
                if (width == -1 && prec == -1
917
0
                    && !(flags & (F_SIGN | F_BLANK)))
918
0
                {
919
                    /* Fast path */
920
0
                    writer.min_size -= 2; /* size preallocated for "%f" */
921
0
                    res = formatfloat(v, flags, prec, c, NULL, &writer, res);
922
0
                    if (res == NULL)
923
0
                        goto error;
924
0
                    continue;
925
0
                }
926
927
0
                if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
928
0
                    goto error;
929
0
                pbuf = PyBytes_AS_STRING(temp);
930
0
                len = PyBytes_GET_SIZE(temp);
931
0
                sign = 1;
932
0
                if (flags & F_ZERO)
933
0
                    fill = '0';
934
0
                break;
935
936
0
            case 'c':
937
0
                pbuf = &onechar;
938
0
                len = byte_converter(v, &onechar);
939
0
                if (!len)
940
0
                    goto error;
941
0
                if (width == -1) {
942
                    /* Fast path */
943
0
                    *res++ = onechar;
944
0
                    continue;
945
0
                }
946
0
                break;
947
948
0
            default:
949
0
                PyErr_Format(PyExc_ValueError,
950
0
                  "unsupported format character '%c' (0x%x) "
951
0
                  "at index %zd",
952
0
                  c, c,
953
0
                  (Py_ssize_t)(fmt - 1 - format));
954
0
                goto error;
955
0
            }
956
957
0
            if (sign) {
958
0
                if (*pbuf == '-' || *pbuf == '+') {
959
0
                    sign = *pbuf++;
960
0
                    len--;
961
0
                }
962
0
                else if (flags & F_SIGN)
963
0
                    sign = '+';
964
0
                else if (flags & F_BLANK)
965
0
                    sign = ' ';
966
0
                else
967
0
                    sign = 0;
968
0
            }
969
0
            if (width < len)
970
0
                width = len;
971
972
0
            alloc = width;
973
0
            if (sign != 0 && len == width)
974
0
                alloc++;
975
            /* 2: size preallocated for %s */
976
0
            if (alloc > 2) {
977
0
                res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
978
0
                if (res == NULL)
979
0
                    goto error;
980
0
            }
981
#ifndef NDEBUG
982
            char *before = res;
983
#endif
984
985
            /* Write the sign if needed */
986
0
            if (sign) {
987
0
                if (fill != ' ')
988
0
                    *res++ = sign;
989
0
                if (width > len)
990
0
                    width--;
991
0
            }
992
993
            /* Write the numeric prefix for "x", "X" and "o" formats
994
               if the alternate form is used.
995
               For example, write "0x" for the "%#x" format. */
996
0
            if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
997
0
                assert(pbuf[0] == '0');
998
0
                assert(pbuf[1] == c);
999
0
                if (fill != ' ') {
1000
0
                    *res++ = *pbuf++;
1001
0
                    *res++ = *pbuf++;
1002
0
                }
1003
0
                width -= 2;
1004
0
                if (width < 0)
1005
0
                    width = 0;
1006
0
                len -= 2;
1007
0
            }
1008
1009
            /* Pad left with the fill character if needed */
1010
0
            if (width > len && !(flags & F_LJUST)) {
1011
0
                memset(res, fill, width - len);
1012
0
                res += (width - len);
1013
0
                width = len;
1014
0
            }
1015
1016
            /* If padding with spaces: write sign if needed and/or numeric
1017
               prefix if the alternate form is used */
1018
0
            if (fill == ' ') {
1019
0
                if (sign)
1020
0
                    *res++ = sign;
1021
0
                if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1022
0
                    assert(pbuf[0] == '0');
1023
0
                    assert(pbuf[1] == c);
1024
0
                    *res++ = *pbuf++;
1025
0
                    *res++ = *pbuf++;
1026
0
                }
1027
0
            }
1028
1029
            /* Copy bytes */
1030
0
            memcpy(res, pbuf, len);
1031
0
            res += len;
1032
1033
            /* Pad right with the fill character if needed */
1034
0
            if (width > len) {
1035
0
                memset(res, ' ', width - len);
1036
0
                res += (width - len);
1037
0
            }
1038
1039
0
            if (dict && (argidx < arglen)) {
1040
0
                PyErr_SetString(PyExc_TypeError,
1041
0
                           "not all arguments converted during bytes formatting");
1042
0
                Py_XDECREF(temp);
1043
0
                goto error;
1044
0
            }
1045
0
            Py_XDECREF(temp);
1046
1047
#ifndef NDEBUG
1048
            /* check that we computed the exact size for this write */
1049
            assert((res - before) == alloc);
1050
#endif
1051
0
        } /* '%' */
1052
1053
        /* If overallocation was disabled, ensure that it was the last
1054
           write. Otherwise, we missed an optimization */
1055
0
        assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
1056
0
    } /* until end */
1057
1058
0
    if (argidx < arglen && !dict) {
1059
0
        PyErr_SetString(PyExc_TypeError,
1060
0
                        "not all arguments converted during bytes formatting");
1061
0
        goto error;
1062
0
    }
1063
1064
0
    if (args_owned) {
1065
0
        Py_DECREF(args);
1066
0
    }
1067
0
    return _PyBytesWriter_Finish(&writer, res);
1068
1069
0
 error:
1070
0
    _PyBytesWriter_Dealloc(&writer);
1071
0
    if (args_owned) {
1072
0
        Py_DECREF(args);
1073
0
    }
1074
0
    return NULL;
1075
0
}
1076
1077
/* Unescape a backslash-escaped string. If unicode is non-zero,
1078
   the string is a u-literal. If recode_encoding is non-zero,
1079
   the string is UTF-8 encoded and should be re-encoded in the
1080
   specified encoding.  */
1081
1082
static char *
1083
_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1084
                            const char *errors, const char *recode_encoding,
1085
                            _PyBytesWriter *writer, char *p)
1086
0
{
1087
0
    PyObject *u, *w;
1088
0
    const char* t;
1089
1090
0
    t = *s;
1091
    /* Decode non-ASCII bytes as UTF-8. */
1092
0
    while (t < end && (*t & 0x80))
1093
0
        t++;
1094
0
    u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1095
0
    if (u == NULL)
1096
0
        return NULL;
1097
1098
    /* Recode them in target encoding. */
1099
0
    w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1100
0
    Py_DECREF(u);
1101
0
    if  (w == NULL)
1102
0
        return NULL;
1103
0
    assert(PyBytes_Check(w));
1104
1105
    /* Append bytes to output buffer. */
1106
0
    writer->min_size--;   /* subtract 1 preallocated byte */
1107
0
    p = _PyBytesWriter_WriteBytes(writer, p,
1108
0
                                  PyBytes_AS_STRING(w),
1109
0
                                  PyBytes_GET_SIZE(w));
1110
0
    Py_DECREF(w);
1111
0
    if (p == NULL)
1112
0
        return NULL;
1113
1114
0
    *s = t;
1115
0
    return p;
1116
0
}
1117
1118
PyObject *_PyBytes_DecodeEscape(const char *s,
1119
                                Py_ssize_t len,
1120
                                const char *errors,
1121
                                Py_ssize_t unicode,
1122
                                const char *recode_encoding,
1123
                                const char **first_invalid_escape)
1124
2
{
1125
2
    int c;
1126
2
    char *p;
1127
2
    const char *end;
1128
2
    _PyBytesWriter writer;
1129
1130
2
    _PyBytesWriter_Init(&writer);
1131
1132
2
    p = _PyBytesWriter_Alloc(&writer, len);
1133
2
    if (p == NULL)
1134
0
        return NULL;
1135
2
    writer.overallocate = 1;
1136
1137
2
    *first_invalid_escape = NULL;
1138
1139
2
    end = s + len;
1140
4
    while (s < end) {
1141
2
        if (*s != '\\') {
1142
0
          non_esc:
1143
0
            if (!(recode_encoding && (*s & 0x80))) {
1144
0
                *p++ = *s++;
1145
0
            }
1146
0
            else {
1147
                /* non-ASCII character and need to recode */
1148
0
                p = _PyBytes_DecodeEscapeRecode(&s, end,
1149
0
                                                errors, recode_encoding,
1150
0
                                                &writer, p);
1151
0
                if (p == NULL)
1152
0
                    goto failed;
1153
0
            }
1154
0
            continue;
1155
0
        }
1156
1157
2
        s++;
1158
2
        if (s == end) {
1159
0
            PyErr_SetString(PyExc_ValueError,
1160
0
                            "Trailing \\ in string");
1161
0
            goto failed;
1162
0
        }
1163
1164
2
        switch (*s++) {
1165
        /* XXX This assumes ASCII! */
1166
0
        case '\n': break;
1167
0
        case '\\': *p++ = '\\'; break;
1168
0
        case '\'': *p++ = '\''; break;
1169
0
        case '\"': *p++ = '\"'; break;
1170
0
        case 'b': *p++ = '\b'; break;
1171
0
        case 'f': *p++ = '\014'; break; /* FF */
1172
0
        case 't': *p++ = '\t'; break;
1173
0
        case 'n': *p++ = '\n'; break;
1174
0
        case 'r': *p++ = '\r'; break;
1175
0
        case 'v': *p++ = '\013'; break; /* VT */
1176
0
        case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1177
2
        case '0': case '1': case '2': case '3':
1178
2
        case '4': case '5': case '6': case '7':
1179
2
            c = s[-1] - '0';
1180
2
            if (s < end && '0' <= *s && *s <= '7') {
1181
0
                c = (c<<3) + *s++ - '0';
1182
0
                if (s < end && '0' <= *s && *s <= '7')
1183
0
                    c = (c<<3) + *s++ - '0';
1184
0
            }
1185
2
            *p++ = c;
1186
2
            break;
1187
0
        case 'x':
1188
0
            if (s+1 < end) {
1189
0
                int digit1, digit2;
1190
0
                digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1191
0
                digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1192
0
                if (digit1 < 16 && digit2 < 16) {
1193
0
                    *p++ = (unsigned char)((digit1 << 4) + digit2);
1194
0
                    s += 2;
1195
0
                    break;
1196
0
                }
1197
0
            }
1198
            /* invalid hexadecimal digits */
1199
1200
0
            if (!errors || strcmp(errors, "strict") == 0) {
1201
0
                PyErr_Format(PyExc_ValueError,
1202
0
                             "invalid \\x escape at position %zd",
1203
0
                             s - 2 - (end - len));
1204
0
                goto failed;
1205
0
            }
1206
0
            if (strcmp(errors, "replace") == 0) {
1207
0
                *p++ = '?';
1208
0
            } else if (strcmp(errors, "ignore") == 0)
1209
0
                /* do nothing */;
1210
0
            else {
1211
0
                PyErr_Format(PyExc_ValueError,
1212
0
                             "decoding error; unknown "
1213
0
                             "error handling code: %.400s",
1214
0
                             errors);
1215
0
                goto failed;
1216
0
            }
1217
            /* skip \x */
1218
0
            if (s < end && Py_ISXDIGIT(s[0]))
1219
0
                s++; /* and a hexdigit */
1220
0
            break;
1221
1222
0
        default:
1223
0
            if (*first_invalid_escape == NULL) {
1224
0
                *first_invalid_escape = s-1; /* Back up one char, since we've
1225
                                                already incremented s. */
1226
0
            }
1227
0
            *p++ = '\\';
1228
0
            s--;
1229
0
            goto non_esc; /* an arbitrary number of unescaped
1230
                             UTF-8 bytes may follow. */
1231
2
        }
1232
2
    }
1233
1234
2
    return _PyBytesWriter_Finish(&writer, p);
1235
1236
0
  failed:
1237
0
    _PyBytesWriter_Dealloc(&writer);
1238
0
    return NULL;
1239
2
}
1240
1241
PyObject *PyBytes_DecodeEscape(const char *s,
1242
                                Py_ssize_t len,
1243
                                const char *errors,
1244
                                Py_ssize_t unicode,
1245
                                const char *recode_encoding)
1246
0
{
1247
0
    const char* first_invalid_escape;
1248
0
    PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
1249
0
                                             recode_encoding,
1250
0
                                             &first_invalid_escape);
1251
0
    if (result == NULL)
1252
0
        return NULL;
1253
0
    if (first_invalid_escape != NULL) {
1254
0
        if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1255
0
                             "invalid escape sequence '\\%c'",
1256
0
                             (unsigned char)*first_invalid_escape) < 0) {
1257
0
            Py_DECREF(result);
1258
0
            return NULL;
1259
0
        }
1260
0
    }
1261
0
    return result;
1262
1263
0
}
1264
/* -------------------------------------------------------------------- */
1265
/* object api */
1266
1267
Py_ssize_t
1268
PyBytes_Size(PyObject *op)
1269
3.55k
{
1270
3.55k
    if (!PyBytes_Check(op)) {
1271
0
        PyErr_Format(PyExc_TypeError,
1272
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1273
0
        return -1;
1274
0
    }
1275
3.55k
    return Py_SIZE(op);
1276
3.55k
}
1277
1278
char *
1279
PyBytes_AsString(PyObject *op)
1280
3.55k
{
1281
3.55k
    if (!PyBytes_Check(op)) {
1282
0
        PyErr_Format(PyExc_TypeError,
1283
0
             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1284
0
        return NULL;
1285
0
    }
1286
3.55k
    return ((PyBytesObject *)op)->ob_sval;
1287
3.55k
}
1288
1289
int
1290
PyBytes_AsStringAndSize(PyObject *obj,
1291
                         char **s,
1292
                         Py_ssize_t *len)
1293
1.84k
{
1294
1.84k
    if (s == NULL) {
1295
0
        PyErr_BadInternalCall();
1296
0
        return -1;
1297
0
    }
1298
1299
1.84k
    if (!PyBytes_Check(obj)) {
1300
0
        PyErr_Format(PyExc_TypeError,
1301
0
             "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1302
0
        return -1;
1303
0
    }
1304
1305
1.84k
    *s = PyBytes_AS_STRING(obj);
1306
1.84k
    if (len != NULL)
1307
1.84k
        *len = PyBytes_GET_SIZE(obj);
1308
0
    else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1309
0
        PyErr_SetString(PyExc_ValueError,
1310
0
                        "embedded null byte");
1311
0
        return -1;
1312
0
    }
1313
1.84k
    return 0;
1314
1.84k
}
1315
1316
/* -------------------------------------------------------------------- */
1317
/* Methods */
1318
1319
#include "stringlib/stringdefs.h"
1320
1321
#include "stringlib/fastsearch.h"
1322
#include "stringlib/count.h"
1323
#include "stringlib/find.h"
1324
#include "stringlib/join.h"
1325
#include "stringlib/partition.h"
1326
#include "stringlib/split.h"
1327
#include "stringlib/ctype.h"
1328
1329
#include "stringlib/transmogrify.h"
1330
1331
PyObject *
1332
PyBytes_Repr(PyObject *obj, int smartquotes)
1333
0
{
1334
0
    PyBytesObject* op = (PyBytesObject*) obj;
1335
0
    Py_ssize_t i, length = Py_SIZE(op);
1336
0
    Py_ssize_t newsize, squotes, dquotes;
1337
0
    PyObject *v;
1338
0
    unsigned char quote, *s, *p;
1339
1340
    /* Compute size of output string */
1341
0
    squotes = dquotes = 0;
1342
0
    newsize = 3; /* b'' */
1343
0
    s = (unsigned char*)op->ob_sval;
1344
0
    for (i = 0; i < length; i++) {
1345
0
        Py_ssize_t incr = 1;
1346
0
        switch(s[i]) {
1347
0
        case '\'': squotes++; break;
1348
0
        case '"':  dquotes++; break;
1349
0
        case '\\': case '\t': case '\n': case '\r':
1350
0
            incr = 2; break; /* \C */
1351
0
        default:
1352
0
            if (s[i] < ' ' || s[i] >= 0x7f)
1353
0
                incr = 4; /* \xHH */
1354
0
        }
1355
0
        if (newsize > PY_SSIZE_T_MAX - incr)
1356
0
            goto overflow;
1357
0
        newsize += incr;
1358
0
    }
1359
0
    quote = '\'';
1360
0
    if (smartquotes && squotes && !dquotes)
1361
0
        quote = '"';
1362
0
    if (squotes && quote == '\'') {
1363
0
        if (newsize > PY_SSIZE_T_MAX - squotes)
1364
0
            goto overflow;
1365
0
        newsize += squotes;
1366
0
    }
1367
1368
0
    v = PyUnicode_New(newsize, 127);
1369
0
    if (v == NULL) {
1370
0
        return NULL;
1371
0
    }
1372
0
    p = PyUnicode_1BYTE_DATA(v);
1373
1374
0
    *p++ = 'b', *p++ = quote;
1375
0
    for (i = 0; i < length; i++) {
1376
0
        unsigned char c = op->ob_sval[i];
1377
0
        if (c == quote || c == '\\')
1378
0
            *p++ = '\\', *p++ = c;
1379
0
        else if (c == '\t')
1380
0
            *p++ = '\\', *p++ = 't';
1381
0
        else if (c == '\n')
1382
0
            *p++ = '\\', *p++ = 'n';
1383
0
        else if (c == '\r')
1384
0
            *p++ = '\\', *p++ = 'r';
1385
0
        else if (c < ' ' || c >= 0x7f) {
1386
0
            *p++ = '\\';
1387
0
            *p++ = 'x';
1388
0
            *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1389
0
            *p++ = Py_hexdigits[c & 0xf];
1390
0
        }
1391
0
        else
1392
0
            *p++ = c;
1393
0
    }
1394
0
    *p++ = quote;
1395
0
    assert(_PyUnicode_CheckConsistency(v, 1));
1396
0
    return v;
1397
1398
0
  overflow:
1399
0
    PyErr_SetString(PyExc_OverflowError,
1400
0
                    "bytes object is too large to make repr");
1401
0
    return NULL;
1402
0
}
1403
1404
static PyObject *
1405
bytes_repr(PyObject *op)
1406
0
{
1407
0
    return PyBytes_Repr(op, 1);
1408
0
}
1409
1410
static PyObject *
1411
bytes_str(PyObject *op)
1412
0
{
1413
0
    PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
1414
0
    if (config->bytes_warning) {
1415
0
        if (PyErr_WarnEx(PyExc_BytesWarning,
1416
0
                         "str() on a bytes instance", 1)) {
1417
0
            return NULL;
1418
0
        }
1419
0
    }
1420
0
    return bytes_repr(op);
1421
0
}
1422
1423
static Py_ssize_t
1424
bytes_length(PyBytesObject *a)
1425
942
{
1426
942
    return Py_SIZE(a);
1427
942
}
1428
1429
/* This is also used by PyBytes_Concat() */
1430
static PyObject *
1431
bytes_concat(PyObject *a, PyObject *b)
1432
17
{
1433
17
    Py_buffer va, vb;
1434
17
    PyObject *result = NULL;
1435
1436
17
    va.len = -1;
1437
17
    vb.len = -1;
1438
17
    if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1439
17
        PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1440
0
        PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1441
0
                     Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1442
0
        goto done;
1443
0
    }
1444
1445
    /* Optimize end cases */
1446
17
    if (va.len == 0 && PyBytes_CheckExact(b)) {
1447
3
        result = b;
1448
3
        Py_INCREF(result);
1449
3
        goto done;
1450
3
    }
1451
14
    if (vb.len == 0 && PyBytes_CheckExact(a)) {
1452
0
        result = a;
1453
0
        Py_INCREF(result);
1454
0
        goto done;
1455
0
    }
1456
1457
14
    if (va.len > PY_SSIZE_T_MAX - vb.len) {
1458
0
        PyErr_NoMemory();
1459
0
        goto done;
1460
0
    }
1461
1462
14
    result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1463
14
    if (result != NULL) {
1464
14
        memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1465
14
        memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1466
14
    }
1467
1468
17
  done:
1469
17
    if (va.len != -1)
1470
17
        PyBuffer_Release(&va);
1471
17
    if (vb.len != -1)
1472
17
        PyBuffer_Release(&vb);
1473
17
    return result;
1474
14
}
1475
1476
static PyObject *
1477
bytes_repeat(PyBytesObject *a, Py_ssize_t n)
1478
2
{
1479
2
    Py_ssize_t i;
1480
2
    Py_ssize_t j;
1481
2
    Py_ssize_t size;
1482
2
    PyBytesObject *op;
1483
2
    size_t nbytes;
1484
2
    if (n < 0)
1485
0
        n = 0;
1486
    /* watch out for overflows:  the size can overflow int,
1487
     * and the # of bytes needed can overflow size_t
1488
     */
1489
2
    if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1490
0
        PyErr_SetString(PyExc_OverflowError,
1491
0
            "repeated bytes are too long");
1492
0
        return NULL;
1493
0
    }
1494
2
    size = Py_SIZE(a) * n;
1495
2
    if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1496
0
        Py_INCREF(a);
1497
0
        return (PyObject *)a;
1498
0
    }
1499
2
    nbytes = (size_t)size;
1500
2
    if (nbytes + PyBytesObject_SIZE <= nbytes) {
1501
0
        PyErr_SetString(PyExc_OverflowError,
1502
0
            "repeated bytes are too long");
1503
0
        return NULL;
1504
0
    }
1505
2
    op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1506
2
    if (op == NULL)
1507
0
        return PyErr_NoMemory();
1508
2
    (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
1509
2
    op->ob_shash = -1;
1510
2
    op->ob_sval[size] = '\0';
1511
2
    if (Py_SIZE(a) == 1 && n > 0) {
1512
2
        memset(op->ob_sval, a->ob_sval[0] , n);
1513
2
        return (PyObject *) op;
1514
2
    }
1515
0
    i = 0;
1516
0
    if (i < size) {
1517
0
        memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
1518
0
        i = Py_SIZE(a);
1519
0
    }
1520
0
    while (i < size) {
1521
0
        j = (i <= size-i)  ?  i  :  size-i;
1522
0
        memcpy(op->ob_sval+i, op->ob_sval, j);
1523
0
        i += j;
1524
0
    }
1525
0
    return (PyObject *) op;
1526
2
}
1527
1528
static int
1529
bytes_contains(PyObject *self, PyObject *arg)
1530
0
{
1531
0
    return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1532
0
}
1533
1534
static PyObject *
1535
bytes_item(PyBytesObject *a, Py_ssize_t i)
1536
0
{
1537
0
    if (i < 0 || i >= Py_SIZE(a)) {
1538
0
        PyErr_SetString(PyExc_IndexError, "index out of range");
1539
0
        return NULL;
1540
0
    }
1541
0
    return PyLong_FromLong((unsigned char)a->ob_sval[i]);
1542
0
}
1543
1544
static int
1545
bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1546
83.8k
{
1547
83.8k
    int cmp;
1548
83.8k
    Py_ssize_t len;
1549
1550
83.8k
    len = Py_SIZE(a);
1551
83.8k
    if (Py_SIZE(b) != len)
1552
0
        return 0;
1553
1554
83.8k
    if (a->ob_sval[0] != b->ob_sval[0])
1555
0
        return 0;
1556
1557
83.8k
    cmp = memcmp(a->ob_sval, b->ob_sval, len);
1558
83.8k
    return (cmp == 0);
1559
83.8k
}
1560
1561
static PyObject*
1562
bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
1563
83.8k
{
1564
83.8k
    int c;
1565
83.8k
    Py_ssize_t len_a, len_b;
1566
83.8k
    Py_ssize_t min_len;
1567
83.8k
    int rc;
1568
1569
    /* Make sure both arguments are strings. */
1570
83.8k
    if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1571
0
        PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
1572
0
        if (config->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1573
0
            rc = PyObject_IsInstance((PyObject*)a,
1574
0
                                     (PyObject*)&PyUnicode_Type);
1575
0
            if (!rc)
1576
0
                rc = PyObject_IsInstance((PyObject*)b,
1577
0
                                         (PyObject*)&PyUnicode_Type);
1578
0
            if (rc < 0)
1579
0
                return NULL;
1580
0
            if (rc) {
1581
0
                if (PyErr_WarnEx(PyExc_BytesWarning,
1582
0
                                 "Comparison between bytes and string", 1))
1583
0
                    return NULL;
1584
0
            }
1585
0
            else {
1586
0
                rc = PyObject_IsInstance((PyObject*)a,
1587
0
                                         (PyObject*)&PyLong_Type);
1588
0
                if (!rc)
1589
0
                    rc = PyObject_IsInstance((PyObject*)b,
1590
0
                                             (PyObject*)&PyLong_Type);
1591
0
                if (rc < 0)
1592
0
                    return NULL;
1593
0
                if (rc) {
1594
0
                    if (PyErr_WarnEx(PyExc_BytesWarning,
1595
0
                                     "Comparison between bytes and int", 1))
1596
0
                        return NULL;
1597
0
                }
1598
0
            }
1599
0
        }
1600
0
        Py_RETURN_NOTIMPLEMENTED;
1601
0
    }
1602
83.8k
    else if (a == b) {
1603
0
        switch (op) {
1604
0
        case Py_EQ:
1605
0
        case Py_LE:
1606
0
        case Py_GE:
1607
            /* a string is equal to itself */
1608
0
            Py_RETURN_TRUE;
1609
0
        case Py_NE:
1610
0
        case Py_LT:
1611
0
        case Py_GT:
1612
0
            Py_RETURN_FALSE;
1613
0
        default:
1614
0
            PyErr_BadArgument();
1615
0
            return NULL;
1616
0
        }
1617
0
    }
1618
83.8k
    else if (op == Py_EQ || op == Py_NE) {
1619
83.8k
        int eq = bytes_compare_eq(a, b);
1620
83.8k
        eq ^= (op == Py_NE);
1621
83.8k
        return PyBool_FromLong(eq);
1622
83.8k
    }
1623
0
    else {
1624
0
        len_a = Py_SIZE(a);
1625
0
        len_b = Py_SIZE(b);
1626
0
        min_len = Py_MIN(len_a, len_b);
1627
0
        if (min_len > 0) {
1628
0
            c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1629
0
            if (c == 0)
1630
0
                c = memcmp(a->ob_sval, b->ob_sval, min_len);
1631
0
        }
1632
0
        else
1633
0
            c = 0;
1634
0
        if (c != 0)
1635
0
            Py_RETURN_RICHCOMPARE(c, 0, op);
1636
0
        Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1637
0
    }
1638
83.8k
}
1639
1640
static Py_hash_t
1641
bytes_hash(PyBytesObject *a)
1642
206k
{
1643
206k
    if (a->ob_shash == -1) {
1644
        /* Can't fail */
1645
113k
        a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
1646
113k
    }
1647
206k
    return a->ob_shash;
1648
206k
}
1649
1650
static PyObject*
1651
bytes_subscript(PyBytesObject* self, PyObject* item)
1652
1.45k
{
1653
1.45k
    if (PyIndex_Check(item)) {
1654
0
        Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1655
0
        if (i == -1 && PyErr_Occurred())
1656
0
            return NULL;
1657
0
        if (i < 0)
1658
0
            i += PyBytes_GET_SIZE(self);
1659
0
        if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1660
0
            PyErr_SetString(PyExc_IndexError,
1661
0
                            "index out of range");
1662
0
            return NULL;
1663
0
        }
1664
0
        return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1665
0
    }
1666
1.45k
    else if (PySlice_Check(item)) {
1667
1.45k
        Py_ssize_t start, stop, step, slicelength, i;
1668
1.45k
        size_t cur;
1669
1.45k
        char* source_buf;
1670
1.45k
        char* result_buf;
1671
1.45k
        PyObject* result;
1672
1673
1.45k
        if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1674
0
            return NULL;
1675
0
        }
1676
1.45k
        slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1677
1.45k
                                            &stop, step);
1678
1679
1.45k
        if (slicelength <= 0) {
1680
3
            return PyBytes_FromStringAndSize("", 0);
1681
3
        }
1682
1.45k
        else if (start == 0 && step == 1 &&
1683
1.45k
                 slicelength == PyBytes_GET_SIZE(self) &&
1684
1.45k
                 PyBytes_CheckExact(self)) {
1685
0
            Py_INCREF(self);
1686
0
            return (PyObject *)self;
1687
0
        }
1688
1.45k
        else if (step == 1) {
1689
1.45k
            return PyBytes_FromStringAndSize(
1690
1.45k
                PyBytes_AS_STRING(self) + start,
1691
1.45k
                slicelength);
1692
1.45k
        }
1693
0
        else {
1694
0
            source_buf = PyBytes_AS_STRING(self);
1695
0
            result = PyBytes_FromStringAndSize(NULL, slicelength);
1696
0
            if (result == NULL)
1697
0
                return NULL;
1698
1699
0
            result_buf = PyBytes_AS_STRING(result);
1700
0
            for (cur = start, i = 0; i < slicelength;
1701
0
                 cur += step, i++) {
1702
0
                result_buf[i] = source_buf[cur];
1703
0
            }
1704
1705
0
            return result;
1706
0
        }
1707
1.45k
    }
1708
0
    else {
1709
0
        PyErr_Format(PyExc_TypeError,
1710
0
                     "byte indices must be integers or slices, not %.200s",
1711
0
                     Py_TYPE(item)->tp_name);
1712
0
        return NULL;
1713
0
    }
1714
1.45k
}
1715
1716
static int
1717
bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
1718
2.73k
{
1719
2.73k
    return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1720
2.73k
                             1, flags);
1721
2.73k
}
1722
1723
static PySequenceMethods bytes_as_sequence = {
1724
    (lenfunc)bytes_length, /*sq_length*/
1725
    (binaryfunc)bytes_concat, /*sq_concat*/
1726
    (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1727
    (ssizeargfunc)bytes_item, /*sq_item*/
1728
    0,                  /*sq_slice*/
1729
    0,                  /*sq_ass_item*/
1730
    0,                  /*sq_ass_slice*/
1731
    (objobjproc)bytes_contains /*sq_contains*/
1732
};
1733
1734
static PyMappingMethods bytes_as_mapping = {
1735
    (lenfunc)bytes_length,
1736
    (binaryfunc)bytes_subscript,
1737
    0,
1738
};
1739
1740
static PyBufferProcs bytes_as_buffer = {
1741
    (getbufferproc)bytes_buffer_getbuffer,
1742
    NULL,
1743
};
1744
1745
1746
0
#define LEFTSTRIP 0
1747
0
#define RIGHTSTRIP 1
1748
0
#define BOTHSTRIP 2
1749
1750
/*[clinic input]
1751
bytes.split
1752
1753
    sep: object = None
1754
        The delimiter according which to split the bytes.
1755
        None (the default value) means split on ASCII whitespace characters
1756
        (space, tab, return, newline, formfeed, vertical tab).
1757
    maxsplit: Py_ssize_t = -1
1758
        Maximum number of splits to do.
1759
        -1 (the default value) means no limit.
1760
1761
Return a list of the sections in the bytes, using sep as the delimiter.
1762
[clinic start generated code]*/
1763
1764
static PyObject *
1765
bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1766
/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1767
155
{
1768
155
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1769
155
    const char *s = PyBytes_AS_STRING(self), *sub;
1770
155
    Py_buffer vsub;
1771
155
    PyObject *list;
1772
1773
155
    if (maxsplit < 0)
1774
77
        maxsplit = PY_SSIZE_T_MAX;
1775
155
    if (sep == Py_None)
1776
0
        return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1777
155
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1778
155
        return NULL;
1779
0
    sub = vsub.buf;
1780
0
    n = vsub.len;
1781
1782
0
    list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1783
0
    PyBuffer_Release(&vsub);
1784
0
    return list;
1785
155
}
1786
1787
/*[clinic input]
1788
bytes.partition
1789
1790
    sep: Py_buffer
1791
    /
1792
1793
Partition the bytes into three parts using the given separator.
1794
1795
This will search for the separator sep in the bytes. If the separator is found,
1796
returns a 3-tuple containing the part before the separator, the separator
1797
itself, and the part after it.
1798
1799
If the separator is not found, returns a 3-tuple containing the original bytes
1800
object and two empty bytes objects.
1801
[clinic start generated code]*/
1802
1803
static PyObject *
1804
bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1805
/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
1806
0
{
1807
0
    return stringlib_partition(
1808
0
        (PyObject*) self,
1809
0
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1810
0
        sep->obj, (const char *)sep->buf, sep->len
1811
0
        );
1812
0
}
1813
1814
/*[clinic input]
1815
bytes.rpartition
1816
1817
    sep: Py_buffer
1818
    /
1819
1820
Partition the bytes into three parts using the given separator.
1821
1822
This will search for the separator sep in the bytes, starting at the end. If
1823
the separator is found, returns a 3-tuple containing the part before the
1824
separator, the separator itself, and the part after it.
1825
1826
If the separator is not found, returns a 3-tuple containing two empty bytes
1827
objects and the original bytes object.
1828
[clinic start generated code]*/
1829
1830
static PyObject *
1831
bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1832
/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
1833
0
{
1834
0
    return stringlib_rpartition(
1835
0
        (PyObject*) self,
1836
0
        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1837
0
        sep->obj, (const char *)sep->buf, sep->len
1838
0
        );
1839
0
}
1840
1841
/*[clinic input]
1842
bytes.rsplit = bytes.split
1843
1844
Return a list of the sections in the bytes, using sep as the delimiter.
1845
1846
Splitting is done starting at the end of the bytes and working to the front.
1847
[clinic start generated code]*/
1848
1849
static PyObject *
1850
bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1851
/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
1852
0
{
1853
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1854
0
    const char *s = PyBytes_AS_STRING(self), *sub;
1855
0
    Py_buffer vsub;
1856
0
    PyObject *list;
1857
1858
0
    if (maxsplit < 0)
1859
0
        maxsplit = PY_SSIZE_T_MAX;
1860
0
    if (sep == Py_None)
1861
0
        return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1862
0
    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1863
0
        return NULL;
1864
0
    sub = vsub.buf;
1865
0
    n = vsub.len;
1866
1867
0
    list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1868
0
    PyBuffer_Release(&vsub);
1869
0
    return list;
1870
0
}
1871
1872
1873
/*[clinic input]
1874
bytes.join
1875
1876
    iterable_of_bytes: object
1877
    /
1878
1879
Concatenate any number of bytes objects.
1880
1881
The bytes whose method is called is inserted in between each pair.
1882
1883
The result is returned as a new bytes object.
1884
1885
Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1886
[clinic start generated code]*/
1887
1888
static PyObject *
1889
bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1890
/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
1891
1
{
1892
1
    return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1893
1
}
1894
1895
PyObject *
1896
_PyBytes_Join(PyObject *sep, PyObject *x)
1897
1
{
1898
1
    assert(sep != NULL && PyBytes_Check(sep));
1899
1
    assert(x != NULL);
1900
1
    return bytes_join((PyBytesObject*)sep, x);
1901
1
}
1902
1903
static PyObject *
1904
bytes_find(PyBytesObject *self, PyObject *args)
1905
0
{
1906
0
    return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1907
0
}
1908
1909
static PyObject *
1910
bytes_index(PyBytesObject *self, PyObject *args)
1911
0
{
1912
0
    return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1913
0
}
1914
1915
1916
static PyObject *
1917
bytes_rfind(PyBytesObject *self, PyObject *args)
1918
0
{
1919
0
    return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1920
0
}
1921
1922
1923
static PyObject *
1924
bytes_rindex(PyBytesObject *self, PyObject *args)
1925
0
{
1926
0
    return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1927
0
}
1928
1929
1930
Py_LOCAL_INLINE(PyObject *)
1931
do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
1932
0
{
1933
0
    Py_buffer vsep;
1934
0
    char *s = PyBytes_AS_STRING(self);
1935
0
    Py_ssize_t len = PyBytes_GET_SIZE(self);
1936
0
    char *sep;
1937
0
    Py_ssize_t seplen;
1938
0
    Py_ssize_t i, j;
1939
1940
0
    if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
1941
0
        return NULL;
1942
0
    sep = vsep.buf;
1943
0
    seplen = vsep.len;
1944
1945
0
    i = 0;
1946
0
    if (striptype != RIGHTSTRIP) {
1947
0
        while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1948
0
            i++;
1949
0
        }
1950
0
    }
1951
1952
0
    j = len;
1953
0
    if (striptype != LEFTSTRIP) {
1954
0
        do {
1955
0
            j--;
1956
0
        } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1957
0
        j++;
1958
0
    }
1959
1960
0
    PyBuffer_Release(&vsep);
1961
1962
0
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1963
0
        Py_INCREF(self);
1964
0
        return (PyObject*)self;
1965
0
    }
1966
0
    else
1967
0
        return PyBytes_FromStringAndSize(s+i, j-i);
1968
0
}
1969
1970
1971
Py_LOCAL_INLINE(PyObject *)
1972
do_strip(PyBytesObject *self, int striptype)
1973
0
{
1974
0
    char *s = PyBytes_AS_STRING(self);
1975
0
    Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1976
1977
0
    i = 0;
1978
0
    if (striptype != RIGHTSTRIP) {
1979
0
        while (i < len && Py_ISSPACE(s[i])) {
1980
0
            i++;
1981
0
        }
1982
0
    }
1983
1984
0
    j = len;
1985
0
    if (striptype != LEFTSTRIP) {
1986
0
        do {
1987
0
            j--;
1988
0
        } while (j >= i && Py_ISSPACE(s[j]));
1989
0
        j++;
1990
0
    }
1991
1992
0
    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1993
0
        Py_INCREF(self);
1994
0
        return (PyObject*)self;
1995
0
    }
1996
0
    else
1997
0
        return PyBytes_FromStringAndSize(s+i, j-i);
1998
0
}
1999
2000
2001
Py_LOCAL_INLINE(PyObject *)
2002
do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
2003
0
{
2004
0
    if (bytes != Py_None) {
2005
0
        return do_xstrip(self, striptype, bytes);
2006
0
    }
2007
0
    return do_strip(self, striptype);
2008
0
}
2009
2010
/*[clinic input]
2011
bytes.strip
2012
2013
    bytes: object = None
2014
    /
2015
2016
Strip leading and trailing bytes contained in the argument.
2017
2018
If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2019
[clinic start generated code]*/
2020
2021
static PyObject *
2022
bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
2023
/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
2024
0
{
2025
0
    return do_argstrip(self, BOTHSTRIP, bytes);
2026
0
}
2027
2028
/*[clinic input]
2029
bytes.lstrip
2030
2031
    bytes: object = None
2032
    /
2033
2034
Strip leading bytes contained in the argument.
2035
2036
If the argument is omitted or None, strip leading  ASCII whitespace.
2037
[clinic start generated code]*/
2038
2039
static PyObject *
2040
bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2041
/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2042
0
{
2043
0
    return do_argstrip(self, LEFTSTRIP, bytes);
2044
0
}
2045
2046
/*[clinic input]
2047
bytes.rstrip
2048
2049
    bytes: object = None
2050
    /
2051
2052
Strip trailing bytes contained in the argument.
2053
2054
If the argument is omitted or None, strip trailing ASCII whitespace.
2055
[clinic start generated code]*/
2056
2057
static PyObject *
2058
bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2059
/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2060
0
{
2061
0
    return do_argstrip(self, RIGHTSTRIP, bytes);
2062
0
}
2063
2064
2065
static PyObject *
2066
bytes_count(PyBytesObject *self, PyObject *args)
2067
0
{
2068
0
    return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2069
0
}
2070
2071
2072
/*[clinic input]
2073
bytes.translate
2074
2075
    table: object
2076
        Translation table, which must be a bytes object of length 256.
2077
    /
2078
    delete as deletechars: object(c_default="NULL") = b''
2079
2080
Return a copy with each character mapped by the given translation table.
2081
2082
All characters occurring in the optional argument delete are removed.
2083
The remaining characters are mapped through the given translation table.
2084
[clinic start generated code]*/
2085
2086
static PyObject *
2087
bytes_translate_impl(PyBytesObject *self, PyObject *table,
2088
                     PyObject *deletechars)
2089
/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2090
0
{
2091
0
    char *input, *output;
2092
0
    Py_buffer table_view = {NULL, NULL};
2093
0
    Py_buffer del_table_view = {NULL, NULL};
2094
0
    const char *table_chars;
2095
0
    Py_ssize_t i, c, changed = 0;
2096
0
    PyObject *input_obj = (PyObject*)self;
2097
0
    const char *output_start, *del_table_chars=NULL;
2098
0
    Py_ssize_t inlen, tablen, dellen = 0;
2099
0
    PyObject *result;
2100
0
    int trans_table[256];
2101
2102
0
    if (PyBytes_Check(table)) {
2103
0
        table_chars = PyBytes_AS_STRING(table);
2104
0
        tablen = PyBytes_GET_SIZE(table);
2105
0
    }
2106
0
    else if (table == Py_None) {
2107
0
        table_chars = NULL;
2108
0
        tablen = 256;
2109
0
    }
2110
0
    else {
2111
0
        if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2112
0
            return NULL;
2113
0
        table_chars = table_view.buf;
2114
0
        tablen = table_view.len;
2115
0
    }
2116
2117
0
    if (tablen != 256) {
2118
0
        PyErr_SetString(PyExc_ValueError,
2119
0
          "translation table must be 256 characters long");
2120
0
        PyBuffer_Release(&table_view);
2121
0
        return NULL;
2122
0
    }
2123
2124
0
    if (deletechars != NULL) {
2125
0
        if (PyBytes_Check(deletechars)) {
2126
0
            del_table_chars = PyBytes_AS_STRING(deletechars);
2127
0
            dellen = PyBytes_GET_SIZE(deletechars);
2128
0
        }
2129
0
        else {
2130
0
            if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2131
0
                PyBuffer_Release(&table_view);
2132
0
                return NULL;
2133
0
            }
2134
0
            del_table_chars = del_table_view.buf;
2135
0
            dellen = del_table_view.len;
2136
0
        }
2137
0
    }
2138
0
    else {
2139
0
        del_table_chars = NULL;
2140
0
        dellen = 0;
2141
0
    }
2142
2143
0
    inlen = PyBytes_GET_SIZE(input_obj);
2144
0
    result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2145
0
    if (result == NULL) {
2146
0
        PyBuffer_Release(&del_table_view);
2147
0
        PyBuffer_Release(&table_view);
2148
0
        return NULL;
2149
0
    }
2150
0
    output_start = output = PyBytes_AS_STRING(result);
2151
0
    input = PyBytes_AS_STRING(input_obj);
2152
2153
0
    if (dellen == 0 && table_chars != NULL) {
2154
        /* If no deletions are required, use faster code */
2155
0
        for (i = inlen; --i >= 0; ) {
2156
0
            c = Py_CHARMASK(*input++);
2157
0
            if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2158
0
                changed = 1;
2159
0
        }
2160
0
        if (!changed && PyBytes_CheckExact(input_obj)) {
2161
0
            Py_INCREF(input_obj);
2162
0
            Py_DECREF(result);
2163
0
            result = input_obj;
2164
0
        }
2165
0
        PyBuffer_Release(&del_table_view);
2166
0
        PyBuffer_Release(&table_view);
2167
0
        return result;
2168
0
    }
2169
2170
0
    if (table_chars == NULL) {
2171
0
        for (i = 0; i < 256; i++)
2172
0
            trans_table[i] = Py_CHARMASK(i);
2173
0
    } else {
2174
0
        for (i = 0; i < 256; i++)
2175
0
            trans_table[i] = Py_CHARMASK(table_chars[i]);
2176
0
    }
2177
0
    PyBuffer_Release(&table_view);
2178
2179
0
    for (i = 0; i < dellen; i++)
2180
0
        trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2181
0
    PyBuffer_Release(&del_table_view);
2182
2183
0
    for (i = inlen; --i >= 0; ) {
2184
0
        c = Py_CHARMASK(*input++);
2185
0
        if (trans_table[c] != -1)
2186
0
            if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2187
0
                continue;
2188
0
        changed = 1;
2189
0
    }
2190
0
    if (!changed && PyBytes_CheckExact(input_obj)) {
2191
0
        Py_DECREF(result);
2192
0
        Py_INCREF(input_obj);
2193
0
        return input_obj;
2194
0
    }
2195
    /* Fix the size of the resulting string */
2196
0
    if (inlen > 0)
2197
0
        _PyBytes_Resize(&result, output - output_start);
2198
0
    return result;
2199
0
}
2200
2201
2202
/*[clinic input]
2203
2204
@staticmethod
2205
bytes.maketrans
2206
2207
    frm: Py_buffer
2208
    to: Py_buffer
2209
    /
2210
2211
Return a translation table useable for the bytes or bytearray translate method.
2212
2213
The returned table will be one where each byte in frm is mapped to the byte at
2214
the same position in to.
2215
2216
The bytes objects frm and to must be of the same length.
2217
[clinic start generated code]*/
2218
2219
static PyObject *
2220
bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2221
/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
2222
0
{
2223
0
    return _Py_bytes_maketrans(frm, to);
2224
0
}
2225
2226
2227
/*[clinic input]
2228
bytes.replace
2229
2230
    old: Py_buffer
2231
    new: Py_buffer
2232
    count: Py_ssize_t = -1
2233
        Maximum number of occurrences to replace.
2234
        -1 (the default value) means replace all occurrences.
2235
    /
2236
2237
Return a copy with all occurrences of substring old replaced by new.
2238
2239
If the optional argument count is given, only the first count occurrences are
2240
replaced.
2241
[clinic start generated code]*/
2242
2243
static PyObject *
2244
bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2245
                   Py_ssize_t count)
2246
/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
2247
0
{
2248
0
    return stringlib_replace((PyObject *)self,
2249
0
                             (const char *)old->buf, old->len,
2250
0
                             (const char *)new->buf, new->len, count);
2251
0
}
2252
2253
/** End DALKE **/
2254
2255
2256
static PyObject *
2257
bytes_startswith(PyBytesObject *self, PyObject *args)
2258
1
{
2259
1
    return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2260
1
}
2261
2262
static PyObject *
2263
bytes_endswith(PyBytesObject *self, PyObject *args)
2264
0
{
2265
0
    return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2266
0
}
2267
2268
2269
/*[clinic input]
2270
bytes.decode
2271
2272
    encoding: str(c_default="NULL") = 'utf-8'
2273
        The encoding with which to decode the bytes.
2274
    errors: str(c_default="NULL") = 'strict'
2275
        The error handling scheme to use for the handling of decoding errors.
2276
        The default is 'strict' meaning that decoding errors raise a
2277
        UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2278
        as well as any other name registered with codecs.register_error that
2279
        can handle UnicodeDecodeErrors.
2280
2281
Decode the bytes using the codec registered for encoding.
2282
[clinic start generated code]*/
2283
2284
static PyObject *
2285
bytes_decode_impl(PyBytesObject *self, const char *encoding,
2286
                  const char *errors)
2287
/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2288
16
{
2289
16
    return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2290
16
}
2291
2292
2293
/*[clinic input]
2294
bytes.splitlines
2295
2296
    keepends: bool(accept={int}) = False
2297
2298
Return a list of the lines in the bytes, breaking at line boundaries.
2299
2300
Line breaks are not included in the resulting list unless keepends is given and
2301
true.
2302
[clinic start generated code]*/
2303
2304
static PyObject *
2305
bytes_splitlines_impl(PyBytesObject *self, int keepends)
2306
/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
2307
0
{
2308
0
    return stringlib_splitlines(
2309
0
        (PyObject*) self, PyBytes_AS_STRING(self),
2310
0
        PyBytes_GET_SIZE(self), keepends
2311
0
        );
2312
0
}
2313
2314
/*[clinic input]
2315
@classmethod
2316
bytes.fromhex
2317
2318
    string: unicode
2319
    /
2320
2321
Create a bytes object from a string of hexadecimal numbers.
2322
2323
Spaces between two numbers are accepted.
2324
Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2325
[clinic start generated code]*/
2326
2327
static PyObject *
2328
bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2329
/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
2330
0
{
2331
0
    PyObject *result = _PyBytes_FromHex(string, 0);
2332
0
    if (type != &PyBytes_Type && result != NULL) {
2333
0
        Py_SETREF(result, PyObject_CallFunctionObjArgs((PyObject *)type,
2334
0
                                                       result, NULL));
2335
0
    }
2336
0
    return result;
2337
0
}
2338
2339
PyObject*
2340
_PyBytes_FromHex(PyObject *string, int use_bytearray)
2341
0
{
2342
0
    char *buf;
2343
0
    Py_ssize_t hexlen, invalid_char;
2344
0
    unsigned int top, bot;
2345
0
    Py_UCS1 *str, *end;
2346
0
    _PyBytesWriter writer;
2347
2348
0
    _PyBytesWriter_Init(&writer);
2349
0
    writer.use_bytearray = use_bytearray;
2350
2351
0
    assert(PyUnicode_Check(string));
2352
0
    if (PyUnicode_READY(string))
2353
0
        return NULL;
2354
0
    hexlen = PyUnicode_GET_LENGTH(string);
2355
2356
0
    if (!PyUnicode_IS_ASCII(string)) {
2357
0
        void *data = PyUnicode_DATA(string);
2358
0
        unsigned int kind = PyUnicode_KIND(string);
2359
0
        Py_ssize_t i;
2360
2361
        /* search for the first non-ASCII character */
2362
0
        for (i = 0; i < hexlen; i++) {
2363
0
            if (PyUnicode_READ(kind, data, i) >= 128)
2364
0
                break;
2365
0
        }
2366
0
        invalid_char = i;
2367
0
        goto error;
2368
0
    }
2369
2370
0
    assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2371
0
    str = PyUnicode_1BYTE_DATA(string);
2372
2373
    /* This overestimates if there are spaces */
2374
0
    buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2375
0
    if (buf == NULL)
2376
0
        return NULL;
2377
2378
0
    end = str + hexlen;
2379
0
    while (str < end) {
2380
        /* skip over spaces in the input */
2381
0
        if (Py_ISSPACE(*str)) {
2382
0
            do {
2383
0
                str++;
2384
0
            } while (Py_ISSPACE(*str));
2385
0
            if (str >= end)
2386
0
                break;
2387
0
        }
2388
2389
0
        top = _PyLong_DigitValue[*str];
2390
0
        if (top >= 16) {
2391
0
            invalid_char = str - PyUnicode_1BYTE_DATA(string);
2392
0
            goto error;
2393
0
        }
2394
0
        str++;
2395
2396
0
        bot = _PyLong_DigitValue[*str];
2397
0
        if (bot >= 16) {
2398
0
            invalid_char = str - PyUnicode_1BYTE_DATA(string);
2399
0
            goto error;
2400
0
        }
2401
0
        str++;
2402
2403
0
        *buf++ = (unsigned char)((top << 4) + bot);
2404
0
    }
2405
2406
0
    return _PyBytesWriter_Finish(&writer, buf);
2407
2408
0
  error:
2409
0
    PyErr_Format(PyExc_ValueError,
2410
0
                 "non-hexadecimal number found in "
2411
0
                 "fromhex() arg at position %zd", invalid_char);
2412
0
    _PyBytesWriter_Dealloc(&writer);
2413
0
    return NULL;
2414
0
}
2415
2416
/*[clinic input]
2417
bytes.hex
2418
2419
    sep: object = NULL
2420
        An optional single character or byte to separate hex bytes.
2421
    bytes_per_sep: int = 1
2422
        How many bytes between separators.  Positive values count from the
2423
        right, negative values count from the left.
2424
2425
Create a str of hexadecimal numbers from a bytes object.
2426
2427
Example:
2428
>>> value = b'\xb9\x01\xef'
2429
>>> value.hex()
2430
'b901ef'
2431
>>> value.hex(':')
2432
'b9:01:ef'
2433
>>> value.hex(':', 2)
2434
'b9:01ef'
2435
>>> value.hex(':', -2)
2436
'b901:ef'
2437
[clinic start generated code]*/
2438
2439
static PyObject *
2440
bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2441
/*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/
2442
0
{
2443
0
    char* argbuf = PyBytes_AS_STRING(self);
2444
0
    Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2445
0
    return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2446
0
}
2447
2448
static PyObject *
2449
bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
2450
0
{
2451
0
    return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2452
0
}
2453
2454
2455
static PyMethodDef
2456
bytes_methods[] = {
2457
    {"__getnewargs__",          (PyCFunction)bytes_getnewargs,  METH_NOARGS},
2458
    {"capitalize", stringlib_capitalize, METH_NOARGS,
2459
     _Py_capitalize__doc__},
2460
    STRINGLIB_CENTER_METHODDEF
2461
    {"count", (PyCFunction)bytes_count, METH_VARARGS,
2462
     _Py_count__doc__},
2463
    BYTES_DECODE_METHODDEF
2464
    {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2465
     _Py_endswith__doc__},
2466
    STRINGLIB_EXPANDTABS_METHODDEF
2467
    {"find", (PyCFunction)bytes_find, METH_VARARGS,
2468
     _Py_find__doc__},
2469
    BYTES_FROMHEX_METHODDEF
2470
    BYTES_HEX_METHODDEF
2471
    {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
2472
    {"isalnum", stringlib_isalnum, METH_NOARGS,
2473
     _Py_isalnum__doc__},
2474
    {"isalpha", stringlib_isalpha, METH_NOARGS,
2475
     _Py_isalpha__doc__},
2476
    {"isascii", stringlib_isascii, METH_NOARGS,
2477
     _Py_isascii__doc__},
2478
    {"isdigit", stringlib_isdigit, METH_NOARGS,
2479
     _Py_isdigit__doc__},
2480
    {"islower", stringlib_islower, METH_NOARGS,
2481
     _Py_islower__doc__},
2482
    {"isspace", stringlib_isspace, METH_NOARGS,
2483
     _Py_isspace__doc__},
2484
    {"istitle", stringlib_istitle, METH_NOARGS,
2485
     _Py_istitle__doc__},
2486
    {"isupper", stringlib_isupper, METH_NOARGS,
2487
     _Py_isupper__doc__},
2488
    BYTES_JOIN_METHODDEF
2489
    STRINGLIB_LJUST_METHODDEF
2490
    {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2491
    BYTES_LSTRIP_METHODDEF
2492
    BYTES_MAKETRANS_METHODDEF
2493
    BYTES_PARTITION_METHODDEF
2494
    BYTES_REPLACE_METHODDEF
2495
    {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2496
    {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
2497
    STRINGLIB_RJUST_METHODDEF
2498
    BYTES_RPARTITION_METHODDEF
2499
    BYTES_RSPLIT_METHODDEF
2500
    BYTES_RSTRIP_METHODDEF
2501
    BYTES_SPLIT_METHODDEF
2502
    BYTES_SPLITLINES_METHODDEF
2503
    {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2504
     _Py_startswith__doc__},
2505
    BYTES_STRIP_METHODDEF
2506
    {"swapcase", stringlib_swapcase, METH_NOARGS,
2507
     _Py_swapcase__doc__},
2508
    {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2509
    BYTES_TRANSLATE_METHODDEF
2510
    {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2511
    STRINGLIB_ZFILL_METHODDEF
2512
    {NULL,     NULL}                         /* sentinel */
2513
};
2514
2515
static PyObject *
2516
bytes_mod(PyObject *self, PyObject *arg)
2517
0
{
2518
0
    if (!PyBytes_Check(self)) {
2519
0
        Py_RETURN_NOTIMPLEMENTED;
2520
0
    }
2521
0
    return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2522
0
                             arg, 0);
2523
0
}
2524
2525
static PyNumberMethods bytes_as_number = {
2526
    0,              /*nb_add*/
2527
    0,              /*nb_subtract*/
2528
    0,              /*nb_multiply*/
2529
    bytes_mod,      /*nb_remainder*/
2530
};
2531
2532
static PyObject *
2533
bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2534
2535
static PyObject *
2536
bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2537
4
{
2538
4
    PyObject *x = NULL;
2539
4
    const char *encoding = NULL;
2540
4
    const char *errors = NULL;
2541
4
    PyObject *new = NULL;
2542
4
    PyObject *func;
2543
4
    Py_ssize_t size;
2544
4
    static char *kwlist[] = {"source", "encoding", "errors", 0};
2545
4
    _Py_IDENTIFIER(__bytes__);
2546
2547
4
    if (type != &PyBytes_Type)
2548
0
        return bytes_subtype_new(type, args, kwds);
2549
4
    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2550
4
                                     &encoding, &errors))
2551
0
        return NULL;
2552
4
    if (x == NULL) {
2553
0
        if (encoding != NULL || errors != NULL) {
2554
0
            PyErr_SetString(PyExc_TypeError,
2555
0
                            encoding != NULL ?
2556
0
                            "encoding without a string argument" :
2557
0
                            "errors without a string argument");
2558
0
            return NULL;
2559
0
        }
2560
0
        return PyBytes_FromStringAndSize(NULL, 0);
2561
0
    }
2562
2563
4
    if (encoding != NULL) {
2564
        /* Encode via the codec registry */
2565
0
        if (!PyUnicode_Check(x)) {
2566
0
            PyErr_SetString(PyExc_TypeError,
2567
0
                            "encoding without a string argument");
2568
0
            return NULL;
2569
0
        }
2570
0
        new = PyUnicode_AsEncodedString(x, encoding, errors);
2571
0
        if (new == NULL)
2572
0
            return NULL;
2573
0
        assert(PyBytes_Check(new));
2574
0
        return new;
2575
0
    }
2576
2577
4
    if (errors != NULL) {
2578
0
        PyErr_SetString(PyExc_TypeError,
2579
0
                        PyUnicode_Check(x) ?
2580
0
                        "string argument without an encoding" :
2581
0
                        "errors without a string argument");
2582
0
        return NULL;
2583
0
    }
2584
2585
    /* We'd like to call PyObject_Bytes here, but we need to check for an
2586
       integer argument before deferring to PyBytes_FromObject, something
2587
       PyObject_Bytes doesn't do. */
2588
4
    func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2589
4
    if (func != NULL) {
2590
0
        new = _PyObject_CallNoArg(func);
2591
0
        Py_DECREF(func);
2592
0
        if (new == NULL)
2593
0
            return NULL;
2594
0
        if (!PyBytes_Check(new)) {
2595
0
            PyErr_Format(PyExc_TypeError,
2596
0
                         "__bytes__ returned non-bytes (type %.200s)",
2597
0
                         Py_TYPE(new)->tp_name);
2598
0
            Py_DECREF(new);
2599
0
            return NULL;
2600
0
        }
2601
0
        return new;
2602
0
    }
2603
4
    else if (PyErr_Occurred())
2604
0
        return NULL;
2605
2606
4
    if (PyUnicode_Check(x)) {
2607
0
        PyErr_SetString(PyExc_TypeError,
2608
0
                        "string argument without an encoding");
2609
0
        return NULL;
2610
0
    }
2611
    /* Is it an integer? */
2612
4
    if (PyIndex_Check(x)) {
2613
0
        size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2614
0
        if (size == -1 && PyErr_Occurred()) {
2615
0
            if (!PyErr_ExceptionMatches(PyExc_TypeError))
2616
0
                return NULL;
2617
0
            PyErr_Clear();  /* fall through */
2618
0
        }
2619
0
        else {
2620
0
            if (size < 0) {
2621
0
                PyErr_SetString(PyExc_ValueError, "negative count");
2622
0
                return NULL;
2623
0
            }
2624
0
            new = _PyBytes_FromSize(size, 1);
2625
0
            if (new == NULL)
2626
0
                return NULL;
2627
0
            return new;
2628
0
        }
2629
0
    }
2630
2631
4
    return PyBytes_FromObject(x);
2632
4
}
2633
2634
static PyObject*
2635
_PyBytes_FromBuffer(PyObject *x)
2636
2
{
2637
2
    PyObject *new;
2638
2
    Py_buffer view;
2639
2640
2
    if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2641
0
        return NULL;
2642
2643
2
    new = PyBytes_FromStringAndSize(NULL, view.len);
2644
2
    if (!new)
2645
0
        goto fail;
2646
2
    if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2647
2
                &view, view.len, 'C') < 0)
2648
0
        goto fail;
2649
2
    PyBuffer_Release(&view);
2650
2
    return new;
2651
2652
0
fail:
2653
0
    Py_XDECREF(new);
2654
0
    PyBuffer_Release(&view);
2655
0
    return NULL;
2656
2
}
2657
2658
static PyObject*
2659
_PyBytes_FromList(PyObject *x)
2660
0
{
2661
0
    Py_ssize_t i, size = PyList_GET_SIZE(x);
2662
0
    Py_ssize_t value;
2663
0
    char *str;
2664
0
    PyObject *item;
2665
0
    _PyBytesWriter writer;
2666
2667
0
    _PyBytesWriter_Init(&writer);
2668
0
    str = _PyBytesWriter_Alloc(&writer, size);
2669
0
    if (str == NULL)
2670
0
        return NULL;
2671
0
    writer.overallocate = 1;
2672
0
    size = writer.allocated;
2673
2674
0
    for (i = 0; i < PyList_GET_SIZE(x); i++) {
2675
0
        item = PyList_GET_ITEM(x, i);
2676
0
        Py_INCREF(item);
2677
0
        value = PyNumber_AsSsize_t(item, NULL);
2678
0
        Py_DECREF(item);
2679
0
        if (value == -1 && PyErr_Occurred())
2680
0
            goto error;
2681
2682
0
        if (value < 0 || value >= 256) {
2683
0
            PyErr_SetString(PyExc_ValueError,
2684
0
                            "bytes must be in range(0, 256)");
2685
0
            goto error;
2686
0
        }
2687
2688
0
        if (i >= size) {
2689
0
            str = _PyBytesWriter_Resize(&writer, str, size+1);
2690
0
            if (str == NULL)
2691
0
                return NULL;
2692
0
            size = writer.allocated;
2693
0
        }
2694
0
        *str++ = (char) value;
2695
0
    }
2696
0
    return _PyBytesWriter_Finish(&writer, str);
2697
2698
0
  error:
2699
0
    _PyBytesWriter_Dealloc(&writer);
2700
0
    return NULL;
2701
0
}
2702
2703
static PyObject*
2704
_PyBytes_FromTuple(PyObject *x)
2705
0
{
2706
0
    PyObject *bytes;
2707
0
    Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2708
0
    Py_ssize_t value;
2709
0
    char *str;
2710
0
    PyObject *item;
2711
2712
0
    bytes = PyBytes_FromStringAndSize(NULL, size);
2713
0
    if (bytes == NULL)
2714
0
        return NULL;
2715
0
    str = ((PyBytesObject *)bytes)->ob_sval;
2716
2717
0
    for (i = 0; i < size; i++) {
2718
0
        item = PyTuple_GET_ITEM(x, i);
2719
0
        value = PyNumber_AsSsize_t(item, NULL);
2720
0
        if (value == -1 && PyErr_Occurred())
2721
0
            goto error;
2722
2723
0
        if (value < 0 || value >= 256) {
2724
0
            PyErr_SetString(PyExc_ValueError,
2725
0
                            "bytes must be in range(0, 256)");
2726
0
            goto error;
2727
0
        }
2728
0
        *str++ = (char) value;
2729
0
    }
2730
0
    return bytes;
2731
2732
0
  error:
2733
0
    Py_DECREF(bytes);
2734
0
    return NULL;
2735
0
}
2736
2737
static PyObject *
2738
_PyBytes_FromIterator(PyObject *it, PyObject *x)
2739
2
{
2740
2
    char *str;
2741
2
    Py_ssize_t i, size;
2742
2
    _PyBytesWriter writer;
2743
2744
    /* For iterator version, create a string object and resize as needed */
2745
2
    size = PyObject_LengthHint(x, 64);
2746
2
    if (size == -1 && PyErr_Occurred())
2747
0
        return NULL;
2748
2749
2
    _PyBytesWriter_Init(&writer);
2750
2
    str = _PyBytesWriter_Alloc(&writer, size);
2751
2
    if (str == NULL)
2752
0
        return NULL;
2753
2
    writer.overallocate = 1;
2754
2
    size = writer.allocated;
2755
2756
    /* Run the iterator to exhaustion */
2757
514
    for (i = 0; ; i++) {
2758
514
        PyObject *item;
2759
514
        Py_ssize_t value;
2760
2761
        /* Get the next item */
2762
514
        item = PyIter_Next(it);
2763
514
        if (item == NULL) {
2764
2
            if (PyErr_Occurred())
2765
0
                goto error;
2766
2
            break;
2767
2
        }
2768
2769
        /* Interpret it as an int (__index__) */
2770
512
        value = PyNumber_AsSsize_t(item, NULL);
2771
512
        Py_DECREF(item);
2772
512
        if (value == -1 && PyErr_Occurred())
2773
0
            goto error;
2774
2775
        /* Range check */
2776
512
        if (value < 0 || value >= 256) {
2777
0
            PyErr_SetString(PyExc_ValueError,
2778
0
                            "bytes must be in range(0, 256)");
2779
0
            goto error;
2780
0
        }
2781
2782
        /* Append the byte */
2783
512
        if (i >= size) {
2784
0
            str = _PyBytesWriter_Resize(&writer, str, size+1);
2785
0
            if (str == NULL)
2786
0
                return NULL;
2787
0
            size = writer.allocated;
2788
0
        }
2789
512
        *str++ = (char) value;
2790
512
    }
2791
2792
2
    return _PyBytesWriter_Finish(&writer, str);
2793
2794
0
  error:
2795
0
    _PyBytesWriter_Dealloc(&writer);
2796
0
    return NULL;
2797
2
}
2798
2799
PyObject *
2800
PyBytes_FromObject(PyObject *x)
2801
4
{
2802
4
    PyObject *it, *result;
2803
2804
4
    if (x == NULL) {
2805
0
        PyErr_BadInternalCall();
2806
0
        return NULL;
2807
0
    }
2808
2809
4
    if (PyBytes_CheckExact(x)) {
2810
0
        Py_INCREF(x);
2811
0
        return x;
2812
0
    }
2813
2814
    /* Use the modern buffer interface */
2815
4
    if (PyObject_CheckBuffer(x))
2816
2
        return _PyBytes_FromBuffer(x);
2817
2818
2
    if (PyList_CheckExact(x))
2819
0
        return _PyBytes_FromList(x);
2820
2821
2
    if (PyTuple_CheckExact(x))
2822
0
        return _PyBytes_FromTuple(x);
2823
2824
2
    if (!PyUnicode_Check(x)) {
2825
2
        it = PyObject_GetIter(x);
2826
2
        if (it != NULL) {
2827
2
            result = _PyBytes_FromIterator(it, x);
2828
2
            Py_DECREF(it);
2829
2
            return result;
2830
2
        }
2831
0
        if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2832
0
            return NULL;
2833
0
        }
2834
0
    }
2835
2836
0
    PyErr_Format(PyExc_TypeError,
2837
0
                 "cannot convert '%.200s' object to bytes",
2838
0
                 x->ob_type->tp_name);
2839
0
    return NULL;
2840
2
}
2841
2842
static PyObject *
2843
bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2844
0
{
2845
0
    PyObject *tmp, *pnew;
2846
0
    Py_ssize_t n;
2847
2848
0
    assert(PyType_IsSubtype(type, &PyBytes_Type));
2849
0
    tmp = bytes_new(&PyBytes_Type, args, kwds);
2850
0
    if (tmp == NULL)
2851
0
        return NULL;
2852
0
    assert(PyBytes_Check(tmp));
2853
0
    n = PyBytes_GET_SIZE(tmp);
2854
0
    pnew = type->tp_alloc(type, n);
2855
0
    if (pnew != NULL) {
2856
0
        memcpy(PyBytes_AS_STRING(pnew),
2857
0
                  PyBytes_AS_STRING(tmp), n+1);
2858
0
        ((PyBytesObject *)pnew)->ob_shash =
2859
0
            ((PyBytesObject *)tmp)->ob_shash;
2860
0
    }
2861
0
    Py_DECREF(tmp);
2862
0
    return pnew;
2863
0
}
2864
2865
PyDoc_STRVAR(bytes_doc,
2866
"bytes(iterable_of_ints) -> bytes\n\
2867
bytes(string, encoding[, errors]) -> bytes\n\
2868
bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2869
bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2870
bytes() -> empty bytes object\n\
2871
\n\
2872
Construct an immutable array of bytes from:\n\
2873
  - an iterable yielding integers in range(256)\n\
2874
  - a text string encoded using the specified encoding\n\
2875
  - any object implementing the buffer API.\n\
2876
  - an integer");
2877
2878
static PyObject *bytes_iter(PyObject *seq);
2879
2880
PyTypeObject PyBytes_Type = {
2881
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
2882
    "bytes",
2883
    PyBytesObject_SIZE,
2884
    sizeof(char),
2885
    0,                                          /* tp_dealloc */
2886
    0,                                          /* tp_vectorcall_offset */
2887
    0,                                          /* tp_getattr */
2888
    0,                                          /* tp_setattr */
2889
    0,                                          /* tp_as_async */
2890
    (reprfunc)bytes_repr,                       /* tp_repr */
2891
    &bytes_as_number,                           /* tp_as_number */
2892
    &bytes_as_sequence,                         /* tp_as_sequence */
2893
    &bytes_as_mapping,                          /* tp_as_mapping */
2894
    (hashfunc)bytes_hash,                       /* tp_hash */
2895
    0,                                          /* tp_call */
2896
    bytes_str,                                  /* tp_str */
2897
    PyObject_GenericGetAttr,                    /* tp_getattro */
2898
    0,                                          /* tp_setattro */
2899
    &bytes_as_buffer,                           /* tp_as_buffer */
2900
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2901
        Py_TPFLAGS_BYTES_SUBCLASS,              /* tp_flags */
2902
    bytes_doc,                                  /* tp_doc */
2903
    0,                                          /* tp_traverse */
2904
    0,                                          /* tp_clear */
2905
    (richcmpfunc)bytes_richcompare,             /* tp_richcompare */
2906
    0,                                          /* tp_weaklistoffset */
2907
    bytes_iter,                                 /* tp_iter */
2908
    0,                                          /* tp_iternext */
2909
    bytes_methods,                              /* tp_methods */
2910
    0,                                          /* tp_members */
2911
    0,                                          /* tp_getset */
2912
    &PyBaseObject_Type,                         /* tp_base */
2913
    0,                                          /* tp_dict */
2914
    0,                                          /* tp_descr_get */
2915
    0,                                          /* tp_descr_set */
2916
    0,                                          /* tp_dictoffset */
2917
    0,                                          /* tp_init */
2918
    0,                                          /* tp_alloc */
2919
    bytes_new,                                  /* tp_new */
2920
    PyObject_Del,                               /* tp_free */
2921
};
2922
2923
void
2924
PyBytes_Concat(PyObject **pv, PyObject *w)
2925
0
{
2926
0
    assert(pv != NULL);
2927
0
    if (*pv == NULL)
2928
0
        return;
2929
0
    if (w == NULL) {
2930
0
        Py_CLEAR(*pv);
2931
0
        return;
2932
0
    }
2933
2934
0
    if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2935
        /* Only one reference, so we can resize in place */
2936
0
        Py_ssize_t oldsize;
2937
0
        Py_buffer wb;
2938
2939
0
        if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
2940
0
            PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2941
0
                         Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2942
0
            Py_CLEAR(*pv);
2943
0
            return;
2944
0
        }
2945
2946
0
        oldsize = PyBytes_GET_SIZE(*pv);
2947
0
        if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2948
0
            PyErr_NoMemory();
2949
0
            goto error;
2950
0
        }
2951
0
        if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2952
0
            goto error;
2953
2954
0
        memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2955
0
        PyBuffer_Release(&wb);
2956
0
        return;
2957
2958
0
      error:
2959
0
        PyBuffer_Release(&wb);
2960
0
        Py_CLEAR(*pv);
2961
0
        return;
2962
0
    }
2963
2964
0
    else {
2965
        /* Multiple references, need to create new object */
2966
0
        PyObject *v;
2967
0
        v = bytes_concat(*pv, w);
2968
0
        Py_SETREF(*pv, v);
2969
0
    }
2970
0
}
2971
2972
void
2973
PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
2974
0
{
2975
0
    PyBytes_Concat(pv, w);
2976
0
    Py_XDECREF(w);
2977
0
}
2978
2979
2980
/* The following function breaks the notion that bytes are immutable:
2981
   it changes the size of a bytes object.  We get away with this only if there
2982
   is only one module referencing the object.  You can also think of it
2983
   as creating a new bytes object and destroying the old one, only
2984
   more efficiently.  In any case, don't use this if the bytes object may
2985
   already be known to some other part of the code...
2986
   Note that if there's not enough memory to resize the bytes object, the
2987
   original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
2988
   memory" exception is set, and -1 is returned.  Else (on success) 0 is
2989
   returned, and the value in *pv may or may not be the same as on input.
2990
   As always, an extra byte is allocated for a trailing \0 byte (newsize
2991
   does *not* include that), and a trailing \0 byte is stored.
2992
*/
2993
2994
int
2995
_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2996
301
{
2997
301
    PyObject *v;
2998
301
    PyBytesObject *sv;
2999
301
    v = *pv;
3000
301
    if (!PyBytes_Check(v) || newsize < 0) {
3001
0
        goto error;
3002
0
    }
3003
301
    if (Py_SIZE(v) == newsize) {
3004
        /* return early if newsize equals to v->ob_size */
3005
0
        return 0;
3006
0
    }
3007
301
    if (Py_SIZE(v) == 0) {
3008
0
        if (newsize == 0) {
3009
0
            return 0;
3010
0
        }
3011
0
        *pv = _PyBytes_FromSize(newsize, 0);
3012
0
        Py_DECREF(v);
3013
0
        return (*pv == NULL) ? -1 : 0;
3014
0
    }
3015
301
    if (Py_REFCNT(v) != 1) {
3016
0
        goto error;
3017
0
    }
3018
301
    if (newsize == 0) {
3019
7
        *pv = _PyBytes_FromSize(0, 0);
3020
7
        Py_DECREF(v);
3021
7
        return (*pv == NULL) ? -1 : 0;
3022
7
    }
3023
    /* XXX UNREF/NEWREF interface should be more symmetrical */
3024
294
    _Py_DEC_REFTOTAL;
3025
294
    _Py_ForgetReference(v);
3026
294
    *pv = (PyObject *)
3027
294
        PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
3028
294
    if (*pv == NULL) {
3029
0
        PyObject_Del(v);
3030
0
        PyErr_NoMemory();
3031
0
        return -1;
3032
0
    }
3033
294
    _Py_NewReference(*pv);
3034
294
    sv = (PyBytesObject *) *pv;
3035
294
    Py_SIZE(sv) = newsize;
3036
294
    sv->ob_sval[newsize] = '\0';
3037
294
    sv->ob_shash = -1;          /* invalidate cached hash value */
3038
294
    return 0;
3039
0
error:
3040
0
    *pv = 0;
3041
0
    Py_DECREF(v);
3042
0
    PyErr_BadInternalCall();
3043
0
    return -1;
3044
294
}
3045
3046
void
3047
PyBytes_Fini(void)
3048
0
{
3049
0
    int i;
3050
0
    for (i = 0; i < UCHAR_MAX + 1; i++)
3051
0
        Py_CLEAR(characters[i]);
3052
0
    Py_CLEAR(nullstring);
3053
0
}
3054
3055
/*********************** Bytes Iterator ****************************/
3056
3057
typedef struct {
3058
    PyObject_HEAD
3059
    Py_ssize_t it_index;
3060
    PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3061
} striterobject;
3062
3063
static void
3064
striter_dealloc(striterobject *it)
3065
15
{
3066
15
    _PyObject_GC_UNTRACK(it);
3067
15
    Py_XDECREF(it->it_seq);
3068
15
    PyObject_GC_Del(it);
3069
15
}
3070
3071
static int
3072
striter_traverse(striterobject *it, visitproc visit, void *arg)
3073
0
{
3074
0
    Py_VISIT(it->it_seq);
3075
0
    return 0;
3076
0
}
3077
3078
static PyObject *
3079
striter_next(striterobject *it)
3080
25
{
3081
25
    PyBytesObject *seq;
3082
25
    PyObject *item;
3083
3084
25
    assert(it != NULL);
3085
25
    seq = it->it_seq;
3086
25
    if (seq == NULL)
3087
0
        return NULL;
3088
25
    assert(PyBytes_Check(seq));
3089
3090
25
    if (it->it_index < PyBytes_GET_SIZE(seq)) {
3091
24
        item = PyLong_FromLong(
3092
24
            (unsigned char)seq->ob_sval[it->it_index]);
3093
24
        if (item != NULL)
3094
24
            ++it->it_index;
3095
24
        return item;
3096
24
    }
3097
3098
1
    it->it_seq = NULL;
3099
1
    Py_DECREF(seq);
3100
1
    return NULL;
3101
25
}
3102
3103
static PyObject *
3104
striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
3105
0
{
3106
0
    Py_ssize_t len = 0;
3107
0
    if (it->it_seq)
3108
0
        len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3109
0
    return PyLong_FromSsize_t(len);
3110
0
}
3111
3112
PyDoc_STRVAR(length_hint_doc,
3113
             "Private method returning an estimate of len(list(it)).");
3114
3115
static PyObject *
3116
striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
3117
0
{
3118
0
    _Py_IDENTIFIER(iter);
3119
0
    if (it->it_seq != NULL) {
3120
0
        return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
3121
0
                             it->it_seq, it->it_index);
3122
0
    } else {
3123
0
        return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
3124
0
    }
3125
0
}
3126
3127
PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3128
3129
static PyObject *
3130
striter_setstate(striterobject *it, PyObject *state)
3131
0
{
3132
0
    Py_ssize_t index = PyLong_AsSsize_t(state);
3133
0
    if (index == -1 && PyErr_Occurred())
3134
0
        return NULL;
3135
0
    if (it->it_seq != NULL) {
3136
0
        if (index < 0)
3137
0
            index = 0;
3138
0
        else if (index > PyBytes_GET_SIZE(it->it_seq))
3139
0
            index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3140
0
        it->it_index = index;
3141
0
    }
3142
0
    Py_RETURN_NONE;
3143
0
}
3144
3145
PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3146
3147
static PyMethodDef striter_methods[] = {
3148
    {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3149
     length_hint_doc},
3150
    {"__reduce__",      (PyCFunction)striter_reduce, METH_NOARGS,
3151
     reduce_doc},
3152
    {"__setstate__",    (PyCFunction)striter_setstate, METH_O,
3153
     setstate_doc},
3154
    {NULL,              NULL}           /* sentinel */
3155
};
3156
3157
PyTypeObject PyBytesIter_Type = {
3158
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3159
    "bytes_iterator",                           /* tp_name */
3160
    sizeof(striterobject),                      /* tp_basicsize */
3161
    0,                                          /* tp_itemsize */
3162
    /* methods */
3163
    (destructor)striter_dealloc,                /* tp_dealloc */
3164
    0,                                          /* tp_vectorcall_offset */
3165
    0,                                          /* tp_getattr */
3166
    0,                                          /* tp_setattr */
3167
    0,                                          /* tp_as_async */
3168
    0,                                          /* tp_repr */
3169
    0,                                          /* tp_as_number */
3170
    0,                                          /* tp_as_sequence */
3171
    0,                                          /* tp_as_mapping */
3172
    0,                                          /* tp_hash */
3173
    0,                                          /* tp_call */
3174
    0,                                          /* tp_str */
3175
    PyObject_GenericGetAttr,                    /* tp_getattro */
3176
    0,                                          /* tp_setattro */
3177
    0,                                          /* tp_as_buffer */
3178
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3179
    0,                                          /* tp_doc */
3180
    (traverseproc)striter_traverse,     /* tp_traverse */
3181
    0,                                          /* tp_clear */
3182
    0,                                          /* tp_richcompare */
3183
    0,                                          /* tp_weaklistoffset */
3184
    PyObject_SelfIter,                          /* tp_iter */
3185
    (iternextfunc)striter_next,                 /* tp_iternext */
3186
    striter_methods,                            /* tp_methods */
3187
    0,
3188
};
3189
3190
static PyObject *
3191
bytes_iter(PyObject *seq)
3192
15
{
3193
15
    striterobject *it;
3194
3195
15
    if (!PyBytes_Check(seq)) {
3196
0
        PyErr_BadInternalCall();
3197
0
        return NULL;
3198
0
    }
3199
15
    it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3200
15
    if (it == NULL)
3201
0
        return NULL;
3202
15
    it->it_index = 0;
3203
15
    Py_INCREF(seq);
3204
15
    it->it_seq = (PyBytesObject *)seq;
3205
15
    _PyObject_GC_TRACK(it);
3206
15
    return (PyObject *)it;
3207
15
}
3208
3209
3210
/* _PyBytesWriter API */
3211
3212
#ifdef MS_WINDOWS
3213
   /* On Windows, overallocate by 50% is the best factor */
3214
#  define OVERALLOCATE_FACTOR 2
3215
#else
3216
   /* On Linux, overallocate by 25% is the best factor */
3217
0
#  define OVERALLOCATE_FACTOR 4
3218
#endif
3219
3220
void
3221
_PyBytesWriter_Init(_PyBytesWriter *writer)
3222
4
{
3223
    /* Set all attributes before small_buffer to 0 */
3224
4
    memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
3225
#ifndef NDEBUG
3226
    memset(writer->small_buffer, PYMEM_CLEANBYTE,
3227
           sizeof(writer->small_buffer));
3228
#endif
3229
4
}
3230
3231
void
3232
_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3233
0
{
3234
0
    Py_CLEAR(writer->buffer);
3235
0
}
3236
3237
Py_LOCAL_INLINE(char*)
3238
_PyBytesWriter_AsString(_PyBytesWriter *writer)
3239
4
{
3240
4
    if (writer->use_small_buffer) {
3241
4
        assert(writer->buffer == NULL);
3242
4
        return writer->small_buffer;
3243
4
    }
3244
0
    else if (writer->use_bytearray) {
3245
0
        assert(writer->buffer != NULL);
3246
0
        return PyByteArray_AS_STRING(writer->buffer);
3247
0
    }
3248
0
    else {
3249
0
        assert(writer->buffer != NULL);
3250
0
        return PyBytes_AS_STRING(writer->buffer);
3251
0
    }
3252
4
}
3253
3254
Py_LOCAL_INLINE(Py_ssize_t)
3255
_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
3256
4
{
3257
4
    char *start = _PyBytesWriter_AsString(writer);
3258
4
    assert(str != NULL);
3259
4
    assert(str >= start);
3260
4
    assert(str - start <= writer->allocated);
3261
4
    return str - start;
3262
4
}
3263
3264
#ifndef NDEBUG
3265
Py_LOCAL_INLINE(int)
3266
_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3267
{
3268
    char *start, *end;
3269
3270
    if (writer->use_small_buffer) {
3271
        assert(writer->buffer == NULL);
3272
    }
3273
    else {
3274
        assert(writer->buffer != NULL);
3275
        if (writer->use_bytearray)
3276
            assert(PyByteArray_CheckExact(writer->buffer));
3277
        else
3278
            assert(PyBytes_CheckExact(writer->buffer));
3279
        assert(Py_REFCNT(writer->buffer) == 1);
3280
    }
3281
3282
    if (writer->use_bytearray) {
3283
        /* bytearray has its own overallocation algorithm,
3284
           writer overallocation must be disabled */
3285
        assert(!writer->overallocate);
3286
    }
3287
3288
    assert(0 <= writer->allocated);
3289
    assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
3290
    /* the last byte must always be null */
3291
    start = _PyBytesWriter_AsString(writer);
3292
    assert(start[writer->allocated] == 0);
3293
3294
    end = start + writer->allocated;
3295
    assert(str != NULL);
3296
    assert(start <= str && str <= end);
3297
    return 1;
3298
}
3299
#endif
3300
3301
void*
3302
_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3303
0
{
3304
0
    Py_ssize_t allocated, pos;
3305
3306
0
    assert(_PyBytesWriter_CheckConsistency(writer, str));
3307
0
    assert(writer->allocated < size);
3308
3309
0
    allocated = size;
3310
0
    if (writer->overallocate
3311
0
        && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3312
        /* overallocate to limit the number of realloc() */
3313
0
        allocated += allocated / OVERALLOCATE_FACTOR;
3314
0
    }
3315
3316
0
    pos = _PyBytesWriter_GetSize(writer, str);
3317
0
    if (!writer->use_small_buffer) {
3318
0
        if (writer->use_bytearray) {
3319
0
            if (PyByteArray_Resize(writer->buffer, allocated))
3320
0
                goto error;
3321
            /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3322
               but we cannot use ob_alloc because bytes may need to be moved
3323
               to use the whole buffer. bytearray uses an internal optimization
3324
               to avoid moving or copying bytes when bytes are removed at the
3325
               beginning (ex: del bytearray[:1]). */
3326
0
        }
3327
0
        else {
3328
0
            if (_PyBytes_Resize(&writer->buffer, allocated))
3329
0
                goto error;
3330
0
        }
3331
0
    }
3332
0
    else {
3333
        /* convert from stack buffer to bytes object buffer */
3334
0
        assert(writer->buffer == NULL);
3335
3336
0
        if (writer->use_bytearray)
3337
0
            writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3338
0
        else
3339
0
            writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3340
0
        if (writer->buffer == NULL)
3341
0
            goto error;
3342
3343
0
        if (pos != 0) {
3344
0
            char *dest;
3345
0
            if (writer->use_bytearray)
3346
0
                dest = PyByteArray_AS_STRING(writer->buffer);
3347
0
            else
3348
0
                dest = PyBytes_AS_STRING(writer->buffer);
3349
0
            memcpy(dest,
3350
0
                      writer->small_buffer,
3351
0
                      pos);
3352
0
        }
3353
3354
0
        writer->use_small_buffer = 0;
3355
#ifndef NDEBUG
3356
        memset(writer->small_buffer, PYMEM_CLEANBYTE,
3357
               sizeof(writer->small_buffer));
3358
#endif
3359
0
    }
3360
0
    writer->allocated = allocated;
3361
3362
0
    str = _PyBytesWriter_AsString(writer) + pos;
3363
0
    assert(_PyBytesWriter_CheckConsistency(writer, str));
3364
0
    return str;
3365
3366
0
error:
3367
0
    _PyBytesWriter_Dealloc(writer);
3368
0
    return NULL;
3369
0
}
3370
3371
void*
3372
_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3373
4
{
3374
4
    Py_ssize_t new_min_size;
3375
3376
4
    assert(_PyBytesWriter_CheckConsistency(writer, str));
3377
4
    assert(size >= 0);
3378
3379
4
    if (size == 0) {
3380
        /* nothing to do */
3381
0
        return str;
3382
0
    }
3383
3384
4
    if (writer->min_size > PY_SSIZE_T_MAX - size) {
3385
0
        PyErr_NoMemory();
3386
0
        _PyBytesWriter_Dealloc(writer);
3387
0
        return NULL;
3388
0
    }
3389
4
    new_min_size = writer->min_size + size;
3390
3391
4
    if (new_min_size > writer->allocated)
3392
0
        str = _PyBytesWriter_Resize(writer, str, new_min_size);
3393
3394
4
    writer->min_size = new_min_size;
3395
4
    return str;
3396
4
}
3397
3398
/* Allocate the buffer to write size bytes.
3399
   Return the pointer to the beginning of buffer data.
3400
   Raise an exception and return NULL on error. */
3401
void*
3402
_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3403
4
{
3404
    /* ensure that _PyBytesWriter_Alloc() is only called once */
3405
4
    assert(writer->min_size == 0 && writer->buffer == NULL);
3406
4
    assert(size >= 0);
3407
3408
4
    writer->use_small_buffer = 1;
3409
#ifndef NDEBUG
3410
    writer->allocated = sizeof(writer->small_buffer) - 1;
3411
    /* In debug mode, don't use the full small buffer because it is less
3412
       efficient than bytes and bytearray objects to detect buffer underflow
3413
       and buffer overflow. Use 10 bytes of the small buffer to test also
3414
       code using the smaller buffer in debug mode.
3415
3416
       Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3417
       in debug mode to also be able to detect stack overflow when running
3418
       tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3419
       if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3420
       stack overflow. */
3421
    writer->allocated = Py_MIN(writer->allocated, 10);
3422
    /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3423
       to detect buffer overflow */
3424
    writer->small_buffer[writer->allocated] = 0;
3425
#else
3426
4
    writer->allocated = sizeof(writer->small_buffer);
3427
4
#endif
3428
4
    return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
3429
4
}
3430
3431
PyObject *
3432
_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
3433
4
{
3434
4
    Py_ssize_t size;
3435
4
    PyObject *result;
3436
3437
4
    assert(_PyBytesWriter_CheckConsistency(writer, str));
3438
3439
4
    size = _PyBytesWriter_GetSize(writer, str);
3440
4
    if (size == 0 && !writer->use_bytearray) {
3441
0
        Py_CLEAR(writer->buffer);
3442
        /* Get the empty byte string singleton */
3443
0
        result = PyBytes_FromStringAndSize(NULL, 0);
3444
0
    }
3445
4
    else if (writer->use_small_buffer) {
3446
4
        if (writer->use_bytearray) {
3447
0
            result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3448
0
        }
3449
4
        else {
3450
4
            result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3451
4
        }
3452
4
    }
3453
0
    else {
3454
0
        result = writer->buffer;
3455
0
        writer->buffer = NULL;
3456
3457
0
        if (size != writer->allocated) {
3458
0
            if (writer->use_bytearray) {
3459
0
                if (PyByteArray_Resize(result, size)) {
3460
0
                    Py_DECREF(result);
3461
0
                    return NULL;
3462
0
                }
3463
0
            }
3464
0
            else {
3465
0
                if (_PyBytes_Resize(&result, size)) {
3466
0
                    assert(result == NULL);
3467
0
                    return NULL;
3468
0
                }
3469
0
            }
3470
0
        }
3471
0
    }
3472
4
    return result;
3473
4
}
3474
3475
void*
3476
_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
3477
                          const void *bytes, Py_ssize_t size)
3478
0
{
3479
0
    char *str = (char *)ptr;
3480
3481
0
    str = _PyBytesWriter_Prepare(writer, str, size);
3482
0
    if (str == NULL)
3483
0
        return NULL;
3484
3485
0
    memcpy(str, bytes, size);
3486
0
    str += size;
3487
3488
0
    return str;
3489
0
}