Coverage Report

Created: 2025-08-26 06:57

/src/cpython3/Python/marshal.c
Line
Count
Source (jump to first uncovered line)
1
2
/* Write Python objects to files and read them back.
3
   This is primarily intended for writing and reading compiled Python code,
4
   even though dicts, lists, sets and frozensets, not commonly seen in
5
   code objects, are supported.
6
   Version 3 of this protocol properly supports circular links
7
   and sharing. */
8
9
#include "Python.h"
10
#include "pycore_call.h"             // _PyObject_CallNoArgs()
11
#include "pycore_code.h"             // _PyCode_New()
12
#include "pycore_hashtable.h"        // _Py_hashtable_t
13
#include "pycore_long.h"             // _PyLong_IsZero()
14
#include "pycore_pystate.h"          // _PyInterpreterState_GET()
15
#include "pycore_setobject.h"        // _PySet_NextEntryRef()
16
#include "pycore_unicodeobject.h"    // _PyUnicode_InternImmortal()
17
18
#include "marshal.h"                 // Py_MARSHAL_VERSION
19
20
#ifdef __APPLE__
21
#  include "TargetConditionals.h"
22
#endif /* __APPLE__ */
23
24
25
/*[clinic input]
26
module marshal
27
[clinic start generated code]*/
28
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
29
30
#include "clinic/marshal.c.h"
31
32
/* High water mark to determine when the marshalled object is dangerously deep
33
 * and risks coring the interpreter.  When the object stack gets this deep,
34
 * raise an exception instead of continuing.
35
 * On Windows debug builds, reduce this value.
36
 *
37
 * BUG: https://bugs.python.org/issue33720
38
 * On Windows PGO builds, the r_object function overallocates its stack and
39
 * can cause a stack overflow. We reduce the maximum depth for all Windows
40
 * releases to protect against this.
41
 * #if defined(MS_WINDOWS) && defined(Py_DEBUG)
42
 */
43
#if defined(MS_WINDOWS)
44
#  define MAX_MARSHAL_STACK_DEPTH 1000
45
#elif defined(__wasi__)
46
#  define MAX_MARSHAL_STACK_DEPTH 1500
47
// TARGET_OS_IPHONE covers any non-macOS Apple platform.
48
// It won't be defined on older macOS SDKs
49
#elif defined(__APPLE__) && defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
50
#  define MAX_MARSHAL_STACK_DEPTH 1500
51
#else
52
593k
#  define MAX_MARSHAL_STACK_DEPTH 2000
53
#endif
54
55
/* Supported types */
56
0
#define TYPE_NULL               '0'
57
12.7k
#define TYPE_NONE               'N'
58
1.93k
#define TYPE_FALSE              'F'
59
1.91k
#define TYPE_TRUE               'T'
60
0
#define TYPE_STOPITER           'S'
61
28
#define TYPE_ELLIPSIS           '.'
62
23
#define TYPE_BINARY_FLOAT       'g'  // Version 0 uses TYPE_FLOAT instead.
63
0
#define TYPE_BINARY_COMPLEX     'y'  // Version 0 uses TYPE_COMPLEX instead.
64
110
#define TYPE_LONG               'l'  // See also TYPE_INT.
65
74.2k
#define TYPE_STRING             's'  // Bytes. (Name comes from Python 2.)
66
28
#define TYPE_TUPLE              '('  // See also TYPE_SMALL_TUPLE.
67
0
#define TYPE_LIST               '['
68
0
#define TYPE_DICT               '{'
69
24.5k
#define TYPE_CODE               'c'
70
223
#define TYPE_UNICODE            'u'
71
#define TYPE_UNKNOWN            '?'
72
// added in version 2:
73
321
#define TYPE_SET                '<'
74
107
#define TYPE_FROZENSET          '>'
75
// added in version 5:
76
1.22k
#define TYPE_SLICE              ':'
77
// Remember to update the version and documentation when adding new types.
78
79
/* Special cases for unicode strings (added in version 4) */
80
0
#define TYPE_INTERNED           't' // Version 1+
81
2.28k
#define TYPE_ASCII              'a'
82
0
#define TYPE_ASCII_INTERNED     'A'
83
144k
#define TYPE_SHORT_ASCII        'z'
84
122k
#define TYPE_SHORT_ASCII_INTERNED 'Z'
85
86
/* Special cases for small objects */
87
9.08k
#define TYPE_INT                'i'  // All versions. 32-bit encoding.
88
57.0k
#define TYPE_SMALL_TUPLE        ')'  // Version 4+
89
90
/* Supported for backwards compatibility */
91
0
#define TYPE_COMPLEX            'x'  // Generated for version 0 only.
92
0
#define TYPE_FLOAT              'f'  // Generated for version 0 only.
93
0
#define TYPE_INT64              'I'  // Not generated any more.
94
95
/* References (added in version 3) */
96
263k
#define TYPE_REF                'r'
97
1.18M
#define FLAG_REF                '\x80' /* with a type, add obj to index */
98
99
100
// Error codes:
101
0
#define WFERR_OK 0
102
0
#define WFERR_UNMARSHALLABLE 1
103
0
#define WFERR_NESTEDTOODEEP 2
104
0
#define WFERR_NOMEMORY 3
105
0
#define WFERR_CODE_NOT_ALLOWED 4
106
107
typedef struct {
108
    FILE *fp;
109
    int error;  /* see WFERR_* values */
110
    int depth;
111
    PyObject *str;
112
    char *ptr;
113
    const char *end;
114
    char *buf;
115
    _Py_hashtable_t *hashtable;
116
    int version;
117
    int allow_code;
118
} WFILE;
119
120
0
#define w_byte(c, p) do {                               \
121
0
        if ((p)->ptr != (p)->end || w_reserve((p), 1))  \
122
0
            *(p)->ptr++ = (c);                          \
123
0
    } while(0)
124
125
static void
126
w_flush(WFILE *p)
127
0
{
128
0
    assert(p->fp != NULL);
129
0
    fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
130
0
    p->ptr = p->buf;
131
0
}
132
133
static int
134
w_reserve(WFILE *p, Py_ssize_t needed)
135
0
{
136
0
    Py_ssize_t pos, size, delta;
137
0
    if (p->ptr == NULL)
138
0
        return 0; /* An error already occurred */
139
0
    if (p->fp != NULL) {
140
0
        w_flush(p);
141
0
        return needed <= p->end - p->ptr;
142
0
    }
143
0
    assert(p->str != NULL);
144
0
    pos = p->ptr - p->buf;
145
0
    size = PyBytes_GET_SIZE(p->str);
146
0
    if (size > 16*1024*1024)
147
0
        delta = (size >> 3);            /* 12.5% overallocation */
148
0
    else
149
0
        delta = size + 1024;
150
0
    delta = Py_MAX(delta, needed);
151
0
    if (delta > PY_SSIZE_T_MAX - size) {
152
0
        p->error = WFERR_NOMEMORY;
153
0
        return 0;
154
0
    }
155
0
    size += delta;
156
0
    if (_PyBytes_Resize(&p->str, size) != 0) {
157
0
        p->end = p->ptr = p->buf = NULL;
158
0
        return 0;
159
0
    }
160
0
    else {
161
0
        p->buf = PyBytes_AS_STRING(p->str);
162
0
        p->ptr = p->buf + pos;
163
0
        p->end = p->buf + size;
164
0
        return 1;
165
0
    }
166
0
}
167
168
static void
169
w_string(const void *s, Py_ssize_t n, WFILE *p)
170
0
{
171
0
    Py_ssize_t m;
172
0
    if (!n || p->ptr == NULL)
173
0
        return;
174
0
    m = p->end - p->ptr;
175
0
    if (p->fp != NULL) {
176
0
        if (n <= m) {
177
0
            memcpy(p->ptr, s, n);
178
0
            p->ptr += n;
179
0
        }
180
0
        else {
181
0
            w_flush(p);
182
0
            fwrite(s, 1, n, p->fp);
183
0
        }
184
0
    }
185
0
    else {
186
0
        if (n <= m || w_reserve(p, n - m)) {
187
0
            memcpy(p->ptr, s, n);
188
0
            p->ptr += n;
189
0
        }
190
0
    }
191
0
}
192
193
static void
194
w_short(int x, WFILE *p)
195
0
{
196
0
    w_byte((char)( x      & 0xff), p);
197
0
    w_byte((char)((x>> 8) & 0xff), p);
198
0
}
199
200
static void
201
w_long(long x, WFILE *p)
202
0
{
203
0
    w_byte((char)( x      & 0xff), p);
204
0
    w_byte((char)((x>> 8) & 0xff), p);
205
0
    w_byte((char)((x>>16) & 0xff), p);
206
0
    w_byte((char)((x>>24) & 0xff), p);
207
0
}
208
209
77.2k
#define SIZE32_MAX  0x7FFFFFFF
210
211
#if SIZEOF_SIZE_T > 4
212
0
# define W_SIZE(n, p)  do {                     \
213
0
        if ((n) > SIZE32_MAX) {                 \
214
0
            (p)->depth--;                       \
215
0
            (p)->error = WFERR_UNMARSHALLABLE;  \
216
0
            return;                             \
217
0
        }                                       \
218
0
        w_long((long)(n), p);                   \
219
0
    } while(0)
220
#else
221
# define W_SIZE  w_long
222
#endif
223
224
static void
225
w_pstring(const void *s, Py_ssize_t n, WFILE *p)
226
0
{
227
0
        W_SIZE(n, p);
228
0
        w_string(s, n, p);
229
0
}
230
231
static void
232
w_short_pstring(const void *s, Py_ssize_t n, WFILE *p)
233
0
{
234
0
    w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
235
0
    w_string(s, n, p);
236
0
}
237
238
/* We assume that Python ints are stored internally in base some power of
239
   2**15; for the sake of portability we'll always read and write them in base
240
   exactly 2**15. */
241
242
838
#define PyLong_MARSHAL_SHIFT 15
243
364
#define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
244
0
#define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
245
246
0
#define W_TYPE(t, p) do { \
247
0
    w_byte((t) | flag, (p)); \
248
0
} while(0)
249
250
static PyObject *
251
_PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code);
252
253
#define _r_digits(bitsize)                                                \
254
static void                                                               \
255
_r_digits##bitsize(const uint ## bitsize ## _t *digits, Py_ssize_t n,     \
256
0
                   uint8_t negative, Py_ssize_t marshal_ratio, WFILE *p)  \
257
0
{                                                                         \
258
0
    /* set l to number of base PyLong_MARSHAL_BASE digits */              \
259
0
    Py_ssize_t l = (n - 1)*marshal_ratio;                                 \
260
0
    uint ## bitsize ## _t d = digits[n - 1];                              \
261
0
                                                                          \
262
0
    assert(marshal_ratio > 0);                                            \
263
0
    assert(n >= 1);                                                       \
264
0
    assert(d != 0); /* a PyLong is always normalized */                   \
265
0
    do {                                                                  \
266
0
        d >>= PyLong_MARSHAL_SHIFT;                                       \
267
0
        l++;                                                              \
268
0
    } while (d != 0);                                                     \
269
0
    if (l > SIZE32_MAX) {                                                 \
270
0
        p->depth--;                                                       \
271
0
        p->error = WFERR_UNMARSHALLABLE;                                  \
272
0
        return;                                                           \
273
0
    }                                                                     \
274
0
    w_long((long)(negative ? -l : l), p);                                 \
275
0
                                                                          \
276
0
    for (Py_ssize_t i = 0; i < n - 1; i++) {                              \
277
0
        d = digits[i];                                                    \
278
0
        for (Py_ssize_t j = 0; j < marshal_ratio; j++) {                  \
279
0
            w_short(d & PyLong_MARSHAL_MASK, p);                          \
280
0
            d >>= PyLong_MARSHAL_SHIFT;                                   \
281
0
        }                                                                 \
282
0
        assert(d == 0);                                                   \
283
0
    }                                                                     \
284
0
    d = digits[n - 1];                                                    \
285
0
    do {                                                                  \
286
0
        w_short(d & PyLong_MARSHAL_MASK, p);                              \
287
0
        d >>= PyLong_MARSHAL_SHIFT;                                       \
288
0
    } while (d != 0);                                                     \
289
0
}
Unexecuted instantiation: marshal.c:_r_digits32
Unexecuted instantiation: marshal.c:_r_digits16
290
_r_digits(16)
291
_r_digits(32)
292
#undef _r_digits
293
294
static void
295
w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
296
0
{
297
0
    W_TYPE(TYPE_LONG, p);
298
0
    if (_PyLong_IsZero(ob)) {
299
0
        w_long((long)0, p);
300
0
        return;
301
0
    }
302
303
0
    PyLongExport long_export;
304
305
0
    if (PyLong_Export((PyObject *)ob, &long_export) < 0) {
306
0
        p->depth--;
307
0
        p->error = WFERR_UNMARSHALLABLE;
308
0
        return;
309
0
    }
310
0
    if (!long_export.digits) {
311
0
        int8_t sign = long_export.value < 0 ? -1 : 1;
312
0
        uint64_t abs_value = Py_ABS(long_export.value);
313
0
        uint64_t d = abs_value;
314
0
        long l = 0;
315
316
        /* set l to number of base PyLong_MARSHAL_BASE digits */
317
0
        do {
318
0
            d >>= PyLong_MARSHAL_SHIFT;
319
0
            l += sign;
320
0
        } while (d);
321
0
        w_long(l, p);
322
323
0
        d = abs_value;
324
0
        do {
325
0
            w_short(d & PyLong_MARSHAL_MASK, p);
326
0
            d >>= PyLong_MARSHAL_SHIFT;
327
0
        } while (d);
328
0
        return;
329
0
    }
330
331
0
    const PyLongLayout *layout = PyLong_GetNativeLayout();
332
0
    Py_ssize_t marshal_ratio = layout->bits_per_digit/PyLong_MARSHAL_SHIFT;
333
334
    /* must be a multiple of PyLong_MARSHAL_SHIFT */
335
0
    assert(layout->bits_per_digit % PyLong_MARSHAL_SHIFT == 0);
336
0
    assert(layout->bits_per_digit >= PyLong_MARSHAL_SHIFT);
337
338
    /* other assumptions on PyLongObject internals */
339
0
    assert(layout->bits_per_digit <= 32);
340
0
    assert(layout->digits_order == -1);
341
0
    assert(layout->digit_endianness == (PY_LITTLE_ENDIAN ? -1 : 1));
342
0
    assert(layout->digit_size == 2 || layout->digit_size == 4);
343
344
0
    if (layout->digit_size == 4) {
345
0
        _r_digits32(long_export.digits, long_export.ndigits,
346
0
                    long_export.negative, marshal_ratio, p);
347
0
    }
348
0
    else {
349
0
        _r_digits16(long_export.digits, long_export.ndigits,
350
0
                    long_export.negative, marshal_ratio, p);
351
0
    }
352
0
    PyLong_FreeExport(&long_export);
353
0
}
354
355
static void
356
w_float_bin(double v, WFILE *p)
357
0
{
358
0
    char buf[8];
359
0
    if (PyFloat_Pack8(v, buf, 1) < 0) {
360
0
        p->error = WFERR_UNMARSHALLABLE;
361
0
        return;
362
0
    }
363
0
    w_string(buf, 8, p);
364
0
}
365
366
static void
367
w_float_str(double v, WFILE *p)
368
0
{
369
0
    char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
370
0
    if (!buf) {
371
0
        p->error = WFERR_NOMEMORY;
372
0
        return;
373
0
    }
374
0
    w_short_pstring(buf, strlen(buf), p);
375
0
    PyMem_Free(buf);
376
0
}
377
378
static int
379
w_ref(PyObject *v, char *flag, WFILE *p)
380
0
{
381
0
    _Py_hashtable_entry_t *entry;
382
0
    int w;
383
384
0
    if (p->version < 3 || p->hashtable == NULL)
385
0
        return 0; /* not writing object references */
386
387
    /* If it has only one reference, it definitely isn't shared.
388
     * But we use TYPE_REF always for interned string, to PYC file stable
389
     * as possible.
390
     */
391
0
    if (Py_REFCNT(v) == 1 &&
392
0
            !(PyUnicode_CheckExact(v) && PyUnicode_CHECK_INTERNED(v))) {
393
0
        return 0;
394
0
    }
395
396
0
    entry = _Py_hashtable_get_entry(p->hashtable, v);
397
0
    if (entry != NULL) {
398
        /* write the reference index to the stream */
399
0
        w = (int)(uintptr_t)entry->value;
400
        /* we don't store "long" indices in the dict */
401
0
        assert(0 <= w && w <= 0x7fffffff);
402
0
        w_byte(TYPE_REF, p);
403
0
        w_long(w, p);
404
0
        return 1;
405
0
    } else {
406
0
        size_t s = p->hashtable->nentries;
407
        /* we don't support long indices */
408
0
        if (s >= 0x7fffffff) {
409
0
            PyErr_SetString(PyExc_ValueError, "too many objects");
410
0
            goto err;
411
0
        }
412
0
        w = (int)s;
413
0
        if (_Py_hashtable_set(p->hashtable, Py_NewRef(v),
414
0
                              (void *)(uintptr_t)w) < 0) {
415
0
            Py_DECREF(v);
416
0
            goto err;
417
0
        }
418
0
        *flag |= FLAG_REF;
419
0
        return 0;
420
0
    }
421
0
err:
422
0
    p->error = WFERR_UNMARSHALLABLE;
423
0
    return 1;
424
0
}
425
426
static void
427
w_complex_object(PyObject *v, char flag, WFILE *p);
428
429
static void
430
w_object(PyObject *v, WFILE *p)
431
0
{
432
0
    char flag = '\0';
433
434
0
    p->depth++;
435
436
0
    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
437
0
        p->error = WFERR_NESTEDTOODEEP;
438
0
    }
439
0
    else if (v == NULL) {
440
0
        w_byte(TYPE_NULL, p);
441
0
    }
442
0
    else if (v == Py_None) {
443
0
        w_byte(TYPE_NONE, p);
444
0
    }
445
0
    else if (v == PyExc_StopIteration) {
446
0
        w_byte(TYPE_STOPITER, p);
447
0
    }
448
0
    else if (v == Py_Ellipsis) {
449
0
        w_byte(TYPE_ELLIPSIS, p);
450
0
    }
451
0
    else if (v == Py_False) {
452
0
        w_byte(TYPE_FALSE, p);
453
0
    }
454
0
    else if (v == Py_True) {
455
0
        w_byte(TYPE_TRUE, p);
456
0
    }
457
0
    else if (!w_ref(v, &flag, p))
458
0
        w_complex_object(v, flag, p);
459
460
0
    p->depth--;
461
0
}
462
463
static void
464
w_complex_object(PyObject *v, char flag, WFILE *p)
465
0
{
466
0
    Py_ssize_t i, n;
467
468
0
    if (PyLong_CheckExact(v)) {
469
0
        int overflow;
470
0
        long x = PyLong_AsLongAndOverflow(v, &overflow);
471
0
        if (overflow) {
472
0
            w_PyLong((PyLongObject *)v, flag, p);
473
0
        }
474
0
        else {
475
0
#if SIZEOF_LONG > 4
476
0
            long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
477
0
            if (y && y != -1) {
478
                /* Too large for TYPE_INT */
479
0
                w_PyLong((PyLongObject*)v, flag, p);
480
0
            }
481
0
            else
482
0
#endif
483
0
            {
484
0
                W_TYPE(TYPE_INT, p);
485
0
                w_long(x, p);
486
0
            }
487
0
        }
488
0
    }
489
0
    else if (PyFloat_CheckExact(v)) {
490
0
        if (p->version > 1) {
491
0
            W_TYPE(TYPE_BINARY_FLOAT, p);
492
0
            w_float_bin(PyFloat_AS_DOUBLE(v), p);
493
0
        }
494
0
        else {
495
0
            W_TYPE(TYPE_FLOAT, p);
496
0
            w_float_str(PyFloat_AS_DOUBLE(v), p);
497
0
        }
498
0
    }
499
0
    else if (PyComplex_CheckExact(v)) {
500
0
        if (p->version > 1) {
501
0
            W_TYPE(TYPE_BINARY_COMPLEX, p);
502
0
            w_float_bin(PyComplex_RealAsDouble(v), p);
503
0
            w_float_bin(PyComplex_ImagAsDouble(v), p);
504
0
        }
505
0
        else {
506
0
            W_TYPE(TYPE_COMPLEX, p);
507
0
            w_float_str(PyComplex_RealAsDouble(v), p);
508
0
            w_float_str(PyComplex_ImagAsDouble(v), p);
509
0
        }
510
0
    }
511
0
    else if (PyBytes_CheckExact(v)) {
512
0
        W_TYPE(TYPE_STRING, p);
513
0
        w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
514
0
    }
515
0
    else if (PyUnicode_CheckExact(v)) {
516
0
        if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
517
0
            int is_short = PyUnicode_GET_LENGTH(v) < 256;
518
0
            if (is_short) {
519
0
                if (PyUnicode_CHECK_INTERNED(v))
520
0
                    W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
521
0
                else
522
0
                    W_TYPE(TYPE_SHORT_ASCII, p);
523
0
                w_short_pstring(PyUnicode_1BYTE_DATA(v),
524
0
                                PyUnicode_GET_LENGTH(v), p);
525
0
            }
526
0
            else {
527
0
                if (PyUnicode_CHECK_INTERNED(v))
528
0
                    W_TYPE(TYPE_ASCII_INTERNED, p);
529
0
                else
530
0
                    W_TYPE(TYPE_ASCII, p);
531
0
                w_pstring(PyUnicode_1BYTE_DATA(v),
532
0
                          PyUnicode_GET_LENGTH(v), p);
533
0
            }
534
0
        }
535
0
        else {
536
0
            PyObject *utf8;
537
0
            utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
538
0
            if (utf8 == NULL) {
539
0
                p->depth--;
540
0
                p->error = WFERR_UNMARSHALLABLE;
541
0
                return;
542
0
            }
543
0
            if (p->version >= 3 &&  PyUnicode_CHECK_INTERNED(v))
544
0
                W_TYPE(TYPE_INTERNED, p);
545
0
            else
546
0
                W_TYPE(TYPE_UNICODE, p);
547
0
            w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
548
0
            Py_DECREF(utf8);
549
0
        }
550
0
    }
551
0
    else if (PyTuple_CheckExact(v)) {
552
0
        n = PyTuple_GET_SIZE(v);
553
0
        if (p->version >= 4 && n < 256) {
554
0
            W_TYPE(TYPE_SMALL_TUPLE, p);
555
0
            w_byte((unsigned char)n, p);
556
0
        }
557
0
        else {
558
0
            W_TYPE(TYPE_TUPLE, p);
559
0
            W_SIZE(n, p);
560
0
        }
561
0
        for (i = 0; i < n; i++) {
562
0
            w_object(PyTuple_GET_ITEM(v, i), p);
563
0
        }
564
0
    }
565
0
    else if (PyList_CheckExact(v)) {
566
0
        W_TYPE(TYPE_LIST, p);
567
0
        n = PyList_GET_SIZE(v);
568
0
        W_SIZE(n, p);
569
0
        for (i = 0; i < n; i++) {
570
0
            w_object(PyList_GET_ITEM(v, i), p);
571
0
        }
572
0
    }
573
0
    else if (PyDict_CheckExact(v)) {
574
0
        Py_ssize_t pos;
575
0
        PyObject *key, *value;
576
0
        W_TYPE(TYPE_DICT, p);
577
        /* This one is NULL object terminated! */
578
0
        pos = 0;
579
0
        while (PyDict_Next(v, &pos, &key, &value)) {
580
0
            w_object(key, p);
581
0
            w_object(value, p);
582
0
        }
583
0
        w_object((PyObject *)NULL, p);
584
0
    }
585
0
    else if (PyAnySet_CheckExact(v)) {
586
0
        PyObject *value;
587
0
        Py_ssize_t pos = 0;
588
0
        Py_hash_t hash;
589
590
0
        if (PyFrozenSet_CheckExact(v))
591
0
            W_TYPE(TYPE_FROZENSET, p);
592
0
        else
593
0
            W_TYPE(TYPE_SET, p);
594
0
        n = PySet_GET_SIZE(v);
595
0
        W_SIZE(n, p);
596
        // bpo-37596: To support reproducible builds, sets and frozensets need
597
        // to have their elements serialized in a consistent order (even when
598
        // they have been scrambled by hash randomization). To ensure this, we
599
        // use an order equivalent to sorted(v, key=marshal.dumps):
600
0
        PyObject *pairs = PyList_New(n);
601
0
        if (pairs == NULL) {
602
0
            p->error = WFERR_NOMEMORY;
603
0
            return;
604
0
        }
605
0
        Py_ssize_t i = 0;
606
0
        Py_BEGIN_CRITICAL_SECTION(v);
607
0
        while (_PySet_NextEntryRef(v, &pos, &value, &hash)) {
608
0
            PyObject *dump = _PyMarshal_WriteObjectToString(value,
609
0
                                    p->version, p->allow_code);
610
0
            if (dump == NULL) {
611
0
                p->error = WFERR_UNMARSHALLABLE;
612
0
                Py_DECREF(value);
613
0
                break;
614
0
            }
615
0
            PyObject *pair = PyTuple_Pack(2, dump, value);
616
0
            Py_DECREF(dump);
617
0
            Py_DECREF(value);
618
0
            if (pair == NULL) {
619
0
                p->error = WFERR_NOMEMORY;
620
0
                break;
621
0
            }
622
0
            PyList_SET_ITEM(pairs, i++, pair);
623
0
        }
624
0
        Py_END_CRITICAL_SECTION();
625
0
        if (p->error == WFERR_UNMARSHALLABLE || p->error == WFERR_NOMEMORY) {
626
0
            Py_DECREF(pairs);
627
0
            return;
628
0
        }
629
0
        assert(i == n);
630
0
        if (PyList_Sort(pairs)) {
631
0
            p->error = WFERR_NOMEMORY;
632
0
            Py_DECREF(pairs);
633
0
            return;
634
0
        }
635
0
        for (Py_ssize_t i = 0; i < n; i++) {
636
0
            PyObject *pair = PyList_GET_ITEM(pairs, i);
637
0
            value = PyTuple_GET_ITEM(pair, 1);
638
0
            w_object(value, p);
639
0
        }
640
0
        Py_DECREF(pairs);
641
0
    }
642
0
    else if (PyCode_Check(v)) {
643
0
        if (!p->allow_code) {
644
0
            p->error = WFERR_CODE_NOT_ALLOWED;
645
0
            return;
646
0
        }
647
0
        PyCodeObject *co = (PyCodeObject *)v;
648
0
        PyObject *co_code = _PyCode_GetCode(co);
649
0
        if (co_code == NULL) {
650
0
            p->error = WFERR_NOMEMORY;
651
0
            return;
652
0
        }
653
0
        W_TYPE(TYPE_CODE, p);
654
0
        w_long(co->co_argcount, p);
655
0
        w_long(co->co_posonlyargcount, p);
656
0
        w_long(co->co_kwonlyargcount, p);
657
0
        w_long(co->co_stacksize, p);
658
0
        w_long(co->co_flags, p);
659
0
        w_object(co_code, p);
660
0
        w_object(co->co_consts, p);
661
0
        w_object(co->co_names, p);
662
0
        w_object(co->co_localsplusnames, p);
663
0
        w_object(co->co_localspluskinds, p);
664
0
        w_object(co->co_filename, p);
665
0
        w_object(co->co_name, p);
666
0
        w_object(co->co_qualname, p);
667
0
        w_long(co->co_firstlineno, p);
668
0
        w_object(co->co_linetable, p);
669
0
        w_object(co->co_exceptiontable, p);
670
0
        Py_DECREF(co_code);
671
0
    }
672
0
    else if (PyObject_CheckBuffer(v)) {
673
        /* Write unknown bytes-like objects as a bytes object */
674
0
        Py_buffer view;
675
0
        if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
676
0
            w_byte(TYPE_UNKNOWN, p);
677
0
            p->depth--;
678
0
            p->error = WFERR_UNMARSHALLABLE;
679
0
            return;
680
0
        }
681
0
        W_TYPE(TYPE_STRING, p);
682
0
        w_pstring(view.buf, view.len, p);
683
0
        PyBuffer_Release(&view);
684
0
    }
685
0
    else if (PySlice_Check(v)) {
686
0
        if (p->version < 5) {
687
0
            w_byte(TYPE_UNKNOWN, p);
688
0
            p->error = WFERR_UNMARSHALLABLE;
689
0
            return;
690
0
        }
691
0
        PySliceObject *slice = (PySliceObject *)v;
692
0
        W_TYPE(TYPE_SLICE, p);
693
0
        w_object(slice->start, p);
694
0
        w_object(slice->stop, p);
695
0
        w_object(slice->step, p);
696
0
    }
697
0
    else {
698
0
        W_TYPE(TYPE_UNKNOWN, p);
699
0
        p->error = WFERR_UNMARSHALLABLE;
700
0
    }
701
0
}
702
703
static void
704
w_decref_entry(void *key)
705
0
{
706
0
    PyObject *entry_key = (PyObject *)key;
707
0
    Py_XDECREF(entry_key);
708
0
}
709
710
static int
711
w_init_refs(WFILE *wf, int version)
712
0
{
713
0
    if (version >= 3) {
714
0
        wf->hashtable = _Py_hashtable_new_full(_Py_hashtable_hash_ptr,
715
0
                                               _Py_hashtable_compare_direct,
716
0
                                               w_decref_entry, NULL, NULL);
717
0
        if (wf->hashtable == NULL) {
718
0
            PyErr_NoMemory();
719
0
            return -1;
720
0
        }
721
0
    }
722
0
    return 0;
723
0
}
724
725
static void
726
w_clear_refs(WFILE *wf)
727
0
{
728
0
    if (wf->hashtable != NULL) {
729
0
        _Py_hashtable_destroy(wf->hashtable);
730
0
    }
731
0
}
732
733
/* version currently has no effect for writing ints. */
734
/* Note that while the documentation states that this function
735
 * can error, currently it never does. Setting an exception in
736
 * this function should be regarded as an API-breaking change.
737
 */
738
void
739
PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
740
0
{
741
0
    char buf[4];
742
0
    WFILE wf;
743
0
    memset(&wf, 0, sizeof(wf));
744
0
    wf.fp = fp;
745
0
    wf.ptr = wf.buf = buf;
746
0
    wf.end = wf.ptr + sizeof(buf);
747
0
    wf.error = WFERR_OK;
748
0
    wf.version = version;
749
0
    w_long(x, &wf);
750
0
    w_flush(&wf);
751
0
}
752
753
void
754
PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
755
0
{
756
0
    char buf[BUFSIZ];
757
0
    WFILE wf;
758
0
    if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
759
0
        return; /* caller must check PyErr_Occurred() */
760
0
    }
761
0
    memset(&wf, 0, sizeof(wf));
762
0
    wf.fp = fp;
763
0
    wf.ptr = wf.buf = buf;
764
0
    wf.end = wf.ptr + sizeof(buf);
765
0
    wf.error = WFERR_OK;
766
0
    wf.version = version;
767
0
    wf.allow_code = 1;
768
0
    if (w_init_refs(&wf, version)) {
769
0
        return; /* caller must check PyErr_Occurred() */
770
0
    }
771
0
    w_object(x, &wf);
772
0
    w_clear_refs(&wf);
773
0
    w_flush(&wf);
774
0
}
775
776
typedef struct {
777
    FILE *fp;
778
    int depth;
779
    PyObject *readable;  /* Stream-like object being read from */
780
    const char *ptr;
781
    const char *end;
782
    char *buf;
783
    Py_ssize_t buf_size;
784
    PyObject *refs;  /* a list */
785
    int allow_code;
786
} RFILE;
787
788
static const char *
789
r_string(Py_ssize_t n, RFILE *p)
790
718k
{
791
718k
    Py_ssize_t read = -1;
792
793
718k
    if (p->ptr != NULL) {
794
        /* Fast path for loads() */
795
718k
        const char *res = p->ptr;
796
718k
        Py_ssize_t left = p->end - p->ptr;
797
718k
        if (left < n) {
798
0
            PyErr_SetString(PyExc_EOFError,
799
0
                            "marshal data too short");
800
0
            return NULL;
801
0
        }
802
718k
        p->ptr += n;
803
718k
        return res;
804
718k
    }
805
0
    if (p->buf == NULL) {
806
0
        p->buf = PyMem_Malloc(n);
807
0
        if (p->buf == NULL) {
808
0
            PyErr_NoMemory();
809
0
            return NULL;
810
0
        }
811
0
        p->buf_size = n;
812
0
    }
813
0
    else if (p->buf_size < n) {
814
0
        char *tmp = PyMem_Realloc(p->buf, n);
815
0
        if (tmp == NULL) {
816
0
            PyErr_NoMemory();
817
0
            return NULL;
818
0
        }
819
0
        p->buf = tmp;
820
0
        p->buf_size = n;
821
0
    }
822
823
0
    if (!p->readable) {
824
0
        assert(p->fp != NULL);
825
0
        read = fread(p->buf, 1, n, p->fp);
826
0
    }
827
0
    else {
828
0
        PyObject *res, *mview;
829
0
        Py_buffer buf;
830
831
0
        if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
832
0
            return NULL;
833
0
        mview = PyMemoryView_FromBuffer(&buf);
834
0
        if (mview == NULL)
835
0
            return NULL;
836
837
0
        res = _PyObject_CallMethod(p->readable, &_Py_ID(readinto), "N", mview);
838
0
        if (res != NULL) {
839
0
            read = PyNumber_AsSsize_t(res, PyExc_ValueError);
840
0
            Py_DECREF(res);
841
0
        }
842
0
    }
843
0
    if (read != n) {
844
0
        if (!PyErr_Occurred()) {
845
0
            if (read > n)
846
0
                PyErr_Format(PyExc_ValueError,
847
0
                             "read() returned too much data: "
848
0
                             "%zd bytes requested, %zd returned",
849
0
                             n, read);
850
0
            else
851
0
                PyErr_SetString(PyExc_EOFError,
852
0
                                "EOF read where not expected");
853
0
        }
854
0
        return NULL;
855
0
    }
856
0
    return p->buf;
857
0
}
858
859
static int
860
r_byte(RFILE *p)
861
794k
{
862
794k
    if (p->ptr != NULL) {
863
794k
        if (p->ptr < p->end) {
864
794k
            return (unsigned char) *p->ptr++;
865
794k
        }
866
794k
    }
867
0
    else if (!p->readable) {
868
0
        assert(p->fp);
869
0
        int c = getc(p->fp);
870
0
        if (c != EOF) {
871
0
            return c;
872
0
        }
873
0
    }
874
0
    else {
875
0
        const char *ptr = r_string(1, p);
876
0
        if (ptr != NULL) {
877
0
            return *(const unsigned char *) ptr;
878
0
        }
879
0
        return EOF;
880
0
    }
881
0
    PyErr_SetString(PyExc_EOFError,
882
0
                    "EOF read where not expected");
883
0
    return EOF;
884
794k
}
885
886
static int
887
r_short(RFILE *p)
888
364
{
889
364
    short x = -1;
890
364
    const unsigned char *buffer;
891
892
364
    buffer = (const unsigned char *) r_string(2, p);
893
364
    if (buffer != NULL) {
894
364
        x = buffer[0];
895
364
        x |= buffer[1] << 8;
896
        /* Sign-extension, in case short greater than 16 bits */
897
364
        x |= -(x & 0x8000);
898
364
    }
899
364
    return x;
900
364
}
901
902
static long
903
r_long(RFILE *p)
904
497k
{
905
497k
    long x = -1;
906
497k
    const unsigned char *buffer;
907
908
497k
    buffer = (const unsigned char *) r_string(4, p);
909
497k
    if (buffer != NULL) {
910
497k
        x = buffer[0];
911
497k
        x |= (long)buffer[1] << 8;
912
497k
        x |= (long)buffer[2] << 16;
913
497k
        x |= (long)buffer[3] << 24;
914
497k
#if SIZEOF_LONG > 4
915
        /* Sign extension for 64-bit machines */
916
497k
        x |= -(x & 0x80000000L);
917
497k
#endif
918
497k
    }
919
497k
    return x;
920
497k
}
921
922
/* r_long64 deals with the TYPE_INT64 code. */
923
static PyObject *
924
r_long64(RFILE *p)
925
0
{
926
0
    const unsigned char *buffer = (const unsigned char *) r_string(8, p);
927
0
    if (buffer == NULL) {
928
0
        return NULL;
929
0
    }
930
0
    return _PyLong_FromByteArray(buffer, 8,
931
0
                                 1 /* little endian */,
932
0
                                 1 /* signed */);
933
0
}
934
935
#define _w_digits(bitsize)                                              \
936
static int                                                              \
937
_w_digits##bitsize(uint ## bitsize ## _t *digits, Py_ssize_t size,      \
938
                   Py_ssize_t marshal_ratio,                            \
939
110
                   int shorts_in_top_digit, RFILE *p)                   \
940
110
{                                                                       \
941
110
    uint ## bitsize ## _t d;                                            \
942
110
                                                                        \
943
110
    assert(size >= 1);                                                  \
944
232
    for (Py_ssize_t i = 0; i < size - 1; i++) {                         \
945
122
        d = 0;                                                          \
946
366
        for (Py_ssize_t j = 0; j < marshal_ratio; j++) {                \
947
244
            int md = r_short(p);                                        \
948
244
            if (md < 0 || md > PyLong_MARSHAL_BASE) {                   \
949
0
                goto bad_digit;                                         \
950
0
            }                                                           \
951
244
            d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT;   \
952
244
        }                                                               \
953
122
        digits[i] = d;                                                  \
954
122
    }                                                                   \
955
110
                                                                        \
956
110
    d = 0;                                                              \
957
230
    for (Py_ssize_t j = 0; j < shorts_in_top_digit; j++) {              \
958
120
        int md = r_short(p);                                            \
959
120
        if (md < 0 || md > PyLong_MARSHAL_BASE) {                       \
960
0
            goto bad_digit;                                             \
961
0
        }                                                               \
962
120
        /* topmost marshal digit should be nonzero */                   \
963
120
        if (md == 0 && j == shorts_in_top_digit - 1) {                  \
964
0
            PyErr_SetString(PyExc_ValueError,                           \
965
0
                "bad marshal data (unnormalized long data)");           \
966
0
            return -1;                                                  \
967
0
        }                                                               \
968
120
        d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT;       \
969
120
    }                                                                   \
970
110
    assert(!PyErr_Occurred());                                          \
971
110
    /* top digit should be nonzero, else the resulting PyLong won't be  \
972
110
       normalized */                                                    \
973
110
    digits[size - 1] = d;                                               \
974
110
    return 0;                                                           \
975
110
                                                                        \
976
0
bad_digit:                                                              \
977
0
    if (!PyErr_Occurred()) {                                            \
978
0
        PyErr_SetString(PyExc_ValueError,                               \
979
0
            "bad marshal data (digit out of range in long)");           \
980
0
    }                                                                   \
981
0
    return -1;                                                          \
982
110
}
marshal.c:_w_digits32
Line
Count
Source
939
110
                   int shorts_in_top_digit, RFILE *p)                   \
940
110
{                                                                       \
941
110
    uint ## bitsize ## _t d;                                            \
942
110
                                                                        \
943
110
    assert(size >= 1);                                                  \
944
232
    for (Py_ssize_t i = 0; i < size - 1; i++) {                         \
945
122
        d = 0;                                                          \
946
366
        for (Py_ssize_t j = 0; j < marshal_ratio; j++) {                \
947
244
            int md = r_short(p);                                        \
948
244
            if (md < 0 || md > PyLong_MARSHAL_BASE) {                   \
949
0
                goto bad_digit;                                         \
950
0
            }                                                           \
951
244
            d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT;   \
952
244
        }                                                               \
953
122
        digits[i] = d;                                                  \
954
122
    }                                                                   \
955
110
                                                                        \
956
110
    d = 0;                                                              \
957
230
    for (Py_ssize_t j = 0; j < shorts_in_top_digit; j++) {              \
958
120
        int md = r_short(p);                                            \
959
120
        if (md < 0 || md > PyLong_MARSHAL_BASE) {                       \
960
0
            goto bad_digit;                                             \
961
0
        }                                                               \
962
120
        /* topmost marshal digit should be nonzero */                   \
963
120
        if (md == 0 && j == shorts_in_top_digit - 1) {                  \
964
0
            PyErr_SetString(PyExc_ValueError,                           \
965
0
                "bad marshal data (unnormalized long data)");           \
966
0
            return -1;                                                  \
967
0
        }                                                               \
968
120
        d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT;       \
969
120
    }                                                                   \
970
110
    assert(!PyErr_Occurred());                                          \
971
110
    /* top digit should be nonzero, else the resulting PyLong won't be  \
972
110
       normalized */                                                    \
973
110
    digits[size - 1] = d;                                               \
974
110
    return 0;                                                           \
975
110
                                                                        \
976
0
bad_digit:                                                              \
977
0
    if (!PyErr_Occurred()) {                                            \
978
0
        PyErr_SetString(PyExc_ValueError,                               \
979
0
            "bad marshal data (digit out of range in long)");           \
980
0
    }                                                                   \
981
0
    return -1;                                                          \
982
110
}
Unexecuted instantiation: marshal.c:_w_digits16
983
_w_digits(32)
984
_w_digits(16)
985
#undef _w_digits
986
987
static PyObject *
988
r_PyLong(RFILE *p)
989
110
{
990
110
    long n = r_long(p);
991
110
    if (n == -1 && PyErr_Occurred()) {
992
0
        return NULL;
993
0
    }
994
110
    if (n < -SIZE32_MAX || n > SIZE32_MAX) {
995
0
        PyErr_SetString(PyExc_ValueError,
996
0
                       "bad marshal data (long size out of range)");
997
0
        return NULL;
998
0
    }
999
1000
110
    const PyLongLayout *layout = PyLong_GetNativeLayout();
1001
110
    Py_ssize_t marshal_ratio = layout->bits_per_digit/PyLong_MARSHAL_SHIFT;
1002
1003
    /* must be a multiple of PyLong_MARSHAL_SHIFT */
1004
110
    assert(layout->bits_per_digit % PyLong_MARSHAL_SHIFT == 0);
1005
110
    assert(layout->bits_per_digit >= PyLong_MARSHAL_SHIFT);
1006
1007
    /* other assumptions on PyLongObject internals */
1008
110
    assert(layout->bits_per_digit <= 32);
1009
110
    assert(layout->digits_order == -1);
1010
110
    assert(layout->digit_endianness == (PY_LITTLE_ENDIAN ? -1 : 1));
1011
110
    assert(layout->digit_size == 2 || layout->digit_size == 4);
1012
1013
110
    Py_ssize_t size = 1 + (Py_ABS(n) - 1) / marshal_ratio;
1014
1015
110
    assert(size >= 1);
1016
1017
110
    int shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % marshal_ratio;
1018
110
    void *digits;
1019
110
    PyLongWriter *writer = PyLongWriter_Create(n < 0, size, &digits);
1020
1021
110
    if (writer == NULL) {
1022
0
        return NULL;
1023
0
    }
1024
1025
110
    int ret;
1026
1027
110
    if (layout->digit_size == 4) {
1028
110
        ret = _w_digits32(digits, size, marshal_ratio, shorts_in_top_digit, p);
1029
110
    }
1030
0
    else {
1031
0
        ret = _w_digits16(digits, size, marshal_ratio, shorts_in_top_digit, p);
1032
0
    }
1033
110
    if (ret < 0) {
1034
0
        PyLongWriter_Discard(writer);
1035
0
        return NULL;
1036
0
    }
1037
110
    return PyLongWriter_Finish(writer);
1038
110
}
1039
1040
static double
1041
r_float_bin(RFILE *p)
1042
23
{
1043
23
    const char *buf = r_string(8, p);
1044
23
    if (buf == NULL)
1045
0
        return -1;
1046
23
    return PyFloat_Unpack8(buf, 1);
1047
23
}
1048
1049
/* Issue #33720: Disable inlining for reducing the C stack consumption
1050
   on PGO builds. */
1051
Py_NO_INLINE static double
1052
r_float_str(RFILE *p)
1053
0
{
1054
0
    int n;
1055
0
    char buf[256];
1056
0
    const char *ptr;
1057
0
    n = r_byte(p);
1058
0
    if (n == EOF) {
1059
0
        return -1;
1060
0
    }
1061
0
    ptr = r_string(n, p);
1062
0
    if (ptr == NULL) {
1063
0
        return -1;
1064
0
    }
1065
0
    memcpy(buf, ptr, n);
1066
0
    buf[n] = '\0';
1067
0
    return PyOS_string_to_double(buf, NULL, NULL);
1068
0
}
1069
1070
/* allocate the reflist index for a new object. Return -1 on failure */
1071
static Py_ssize_t
1072
r_ref_reserve(int flag, RFILE *p)
1073
25.8k
{
1074
25.8k
    if (flag) { /* currently only FLAG_REF is defined */
1075
553
        Py_ssize_t idx = PyList_GET_SIZE(p->refs);
1076
553
        if (idx >= 0x7ffffffe) {
1077
0
            PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
1078
0
            return -1;
1079
0
        }
1080
553
        if (PyList_Append(p->refs, Py_None) < 0)
1081
0
            return -1;
1082
553
        return idx;
1083
553
    } else
1084
25.3k
        return 0;
1085
25.8k
}
1086
1087
/* insert the new object 'o' to the reflist at previously
1088
 * allocated index 'idx'.
1089
 * 'o' can be NULL, in which case nothing is done.
1090
 * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
1091
 * if 'o' was non-NULL, and the function fails, 'o' is released and
1092
 * NULL returned. This simplifies error checking at the call site since
1093
 * a single test for NULL for the function result is enough.
1094
 */
1095
static PyObject *
1096
r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
1097
25.8k
{
1098
25.8k
    if (o != NULL && flag) { /* currently only FLAG_REF is defined */
1099
553
        PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
1100
553
        PyList_SET_ITEM(p->refs, idx, Py_NewRef(o));
1101
553
        Py_DECREF(tmp);
1102
553
    }
1103
25.8k
    return o;
1104
25.8k
}
1105
1106
/* combination of both above, used when an object can be
1107
 * created whenever it is seen in the file, as opposed to
1108
 * after having loaded its sub-objects.
1109
 */
1110
static PyObject *
1111
r_ref(PyObject *o, int flag, RFILE *p)
1112
159k
{
1113
159k
    assert(flag & FLAG_REF);
1114
159k
    if (o == NULL)
1115
0
        return NULL;
1116
159k
    if (PyList_Append(p->refs, o) < 0) {
1117
0
        Py_DECREF(o); /* release the new object */
1118
0
        return NULL;
1119
0
    }
1120
159k
    return o;
1121
159k
}
1122
1123
static PyObject *
1124
r_object(RFILE *p)
1125
593k
{
1126
    /* NULL is a valid return value, it does not necessarily means that
1127
       an exception is set. */
1128
593k
    PyObject *v, *v2;
1129
593k
    Py_ssize_t idx = 0;
1130
593k
    long i, n;
1131
593k
    int type, code = r_byte(p);
1132
593k
    int flag, is_interned = 0;
1133
593k
    PyObject *retval = NULL;
1134
1135
593k
    if (code == EOF) {
1136
0
        if (PyErr_ExceptionMatches(PyExc_EOFError)) {
1137
0
            PyErr_SetString(PyExc_EOFError,
1138
0
                            "EOF read where object expected");
1139
0
        }
1140
0
        return NULL;
1141
0
    }
1142
1143
593k
    p->depth++;
1144
1145
593k
    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
1146
0
        p->depth--;
1147
0
        PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
1148
0
        return NULL;
1149
0
    }
1150
1151
593k
    flag = code & FLAG_REF;
1152
593k
    type = code & ~FLAG_REF;
1153
1154
593k
#define R_REF(O) do{\
1155
287k
    if (flag) \
1156
287k
        O = r_ref(O, flag, p);\
1157
287k
} while (0)
1158
1159
593k
    switch (type) {
1160
1161
0
    case TYPE_NULL:
1162
0
        break;
1163
1164
12.7k
    case TYPE_NONE:
1165
12.7k
        retval = Py_None;
1166
12.7k
        break;
1167
1168
0
    case TYPE_STOPITER:
1169
0
        retval = Py_NewRef(PyExc_StopIteration);
1170
0
        break;
1171
1172
28
    case TYPE_ELLIPSIS:
1173
28
        retval = Py_Ellipsis;
1174
28
        break;
1175
1176
1.93k
    case TYPE_FALSE:
1177
1.93k
        retval = Py_False;
1178
1.93k
        break;
1179
1180
1.91k
    case TYPE_TRUE:
1181
1.91k
        retval = Py_True;
1182
1.91k
        break;
1183
1184
9.08k
    case TYPE_INT:
1185
9.08k
        n = r_long(p);
1186
9.08k
        if (n == -1 && PyErr_Occurred()) {
1187
0
            break;
1188
0
        }
1189
9.08k
        retval = PyLong_FromLong(n);
1190
9.08k
        R_REF(retval);
1191
9.08k
        break;
1192
1193
0
    case TYPE_INT64:
1194
0
        retval = r_long64(p);
1195
0
        R_REF(retval);
1196
0
        break;
1197
1198
110
    case TYPE_LONG:
1199
110
        retval = r_PyLong(p);
1200
110
        R_REF(retval);
1201
110
        break;
1202
1203
0
    case TYPE_FLOAT:
1204
0
        {
1205
0
            double x = r_float_str(p);
1206
0
            if (x == -1.0 && PyErr_Occurred())
1207
0
                break;
1208
0
            retval = PyFloat_FromDouble(x);
1209
0
            R_REF(retval);
1210
0
            break;
1211
0
        }
1212
1213
23
    case TYPE_BINARY_FLOAT:
1214
23
        {
1215
23
            double x = r_float_bin(p);
1216
23
            if (x == -1.0 && PyErr_Occurred())
1217
0
                break;
1218
23
            retval = PyFloat_FromDouble(x);
1219
23
            R_REF(retval);
1220
23
            break;
1221
23
        }
1222
1223
0
    case TYPE_COMPLEX:
1224
0
        {
1225
0
            Py_complex c;
1226
0
            c.real = r_float_str(p);
1227
0
            if (c.real == -1.0 && PyErr_Occurred())
1228
0
                break;
1229
0
            c.imag = r_float_str(p);
1230
0
            if (c.imag == -1.0 && PyErr_Occurred())
1231
0
                break;
1232
0
            retval = PyComplex_FromCComplex(c);
1233
0
            R_REF(retval);
1234
0
            break;
1235
0
        }
1236
1237
0
    case TYPE_BINARY_COMPLEX:
1238
0
        {
1239
0
            Py_complex c;
1240
0
            c.real = r_float_bin(p);
1241
0
            if (c.real == -1.0 && PyErr_Occurred())
1242
0
                break;
1243
0
            c.imag = r_float_bin(p);
1244
0
            if (c.imag == -1.0 && PyErr_Occurred())
1245
0
                break;
1246
0
            retval = PyComplex_FromCComplex(c);
1247
0
            R_REF(retval);
1248
0
            break;
1249
0
        }
1250
1251
74.2k
    case TYPE_STRING:
1252
74.2k
        {
1253
74.2k
            const char *ptr;
1254
74.2k
            n = r_long(p);
1255
74.2k
            if (n < 0 || n > SIZE32_MAX) {
1256
0
                if (!PyErr_Occurred()) {
1257
0
                    PyErr_SetString(PyExc_ValueError,
1258
0
                        "bad marshal data (bytes object size out of range)");
1259
0
                }
1260
0
                break;
1261
0
            }
1262
74.2k
            v = PyBytes_FromStringAndSize((char *)NULL, n);
1263
74.2k
            if (v == NULL)
1264
0
                break;
1265
74.2k
            ptr = r_string(n, p);
1266
74.2k
            if (ptr == NULL) {
1267
0
                Py_DECREF(v);
1268
0
                break;
1269
0
            }
1270
74.2k
            memcpy(PyBytes_AS_STRING(v), ptr, n);
1271
74.2k
            retval = v;
1272
74.2k
            R_REF(retval);
1273
74.2k
            break;
1274
74.2k
        }
1275
1276
0
    case TYPE_ASCII_INTERNED:
1277
0
        is_interned = 1;
1278
0
        _Py_FALLTHROUGH;
1279
2.28k
    case TYPE_ASCII:
1280
2.28k
        n = r_long(p);
1281
2.28k
        if (n < 0 || n > SIZE32_MAX) {
1282
0
            if (!PyErr_Occurred()) {
1283
0
                PyErr_SetString(PyExc_ValueError,
1284
0
                    "bad marshal data (string size out of range)");
1285
0
            }
1286
0
            break;
1287
0
        }
1288
2.28k
        goto _read_ascii;
1289
1290
122k
    case TYPE_SHORT_ASCII_INTERNED:
1291
122k
        is_interned = 1;
1292
122k
        _Py_FALLTHROUGH;
1293
144k
    case TYPE_SHORT_ASCII:
1294
144k
        n = r_byte(p);
1295
144k
        if (n == EOF) {
1296
0
            break;
1297
0
        }
1298
146k
    _read_ascii:
1299
146k
        {
1300
146k
            const char *ptr;
1301
146k
            ptr = r_string(n, p);
1302
146k
            if (ptr == NULL)
1303
0
                break;
1304
146k
            v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1305
146k
            if (v == NULL)
1306
0
                break;
1307
146k
            if (is_interned) {
1308
                // marshal is meant to serialize .pyc files with code
1309
                // objects, and code-related strings are currently immortal.
1310
122k
                PyInterpreterState *interp = _PyInterpreterState_GET();
1311
122k
                _PyUnicode_InternImmortal(interp, &v);
1312
122k
            }
1313
146k
            retval = v;
1314
146k
            R_REF(retval);
1315
146k
            break;
1316
146k
        }
1317
1318
0
    case TYPE_INTERNED:
1319
0
        is_interned = 1;
1320
0
        _Py_FALLTHROUGH;
1321
223
    case TYPE_UNICODE:
1322
223
        {
1323
223
        const char *buffer;
1324
1325
223
        n = r_long(p);
1326
223
        if (n < 0 || n > SIZE32_MAX) {
1327
0
            if (!PyErr_Occurred()) {
1328
0
                PyErr_SetString(PyExc_ValueError,
1329
0
                    "bad marshal data (string size out of range)");
1330
0
            }
1331
0
            break;
1332
0
        }
1333
223
        if (n != 0) {
1334
223
            buffer = r_string(n, p);
1335
223
            if (buffer == NULL)
1336
0
                break;
1337
223
            v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1338
223
        }
1339
0
        else {
1340
0
            v = Py_GetConstant(Py_CONSTANT_EMPTY_STR);
1341
0
        }
1342
223
        if (v == NULL)
1343
0
            break;
1344
223
        if (is_interned) {
1345
            // marshal is meant to serialize .pyc files with code
1346
            // objects, and code-related strings are currently immortal.
1347
0
            PyInterpreterState *interp = _PyInterpreterState_GET();
1348
0
            _PyUnicode_InternImmortal(interp, &v);
1349
0
        }
1350
223
        retval = v;
1351
223
        R_REF(retval);
1352
223
        break;
1353
223
        }
1354
1355
57.0k
    case TYPE_SMALL_TUPLE:
1356
57.0k
        n = r_byte(p);
1357
57.0k
        if (n == EOF) {
1358
0
            break;
1359
0
        }
1360
57.0k
        goto _read_tuple;
1361
57.0k
    case TYPE_TUPLE:
1362
28
        n = r_long(p);
1363
28
        if (n < 0 || n > SIZE32_MAX) {
1364
0
            if (!PyErr_Occurred()) {
1365
0
                PyErr_SetString(PyExc_ValueError,
1366
0
                    "bad marshal data (tuple size out of range)");
1367
0
            }
1368
0
            break;
1369
0
        }
1370
57.0k
    _read_tuple:
1371
57.0k
        v = PyTuple_New(n);
1372
57.0k
        R_REF(v);
1373
57.0k
        if (v == NULL)
1374
0
            break;
1375
1376
400k
        for (i = 0; i < n; i++) {
1377
343k
            v2 = r_object(p);
1378
343k
            if ( v2 == NULL ) {
1379
0
                if (!PyErr_Occurred())
1380
0
                    PyErr_SetString(PyExc_TypeError,
1381
0
                        "NULL object in marshal data for tuple");
1382
0
                Py_SETREF(v, NULL);
1383
0
                break;
1384
0
            }
1385
343k
            PyTuple_SET_ITEM(v, i, v2);
1386
343k
        }
1387
57.0k
        retval = v;
1388
57.0k
        break;
1389
1390
0
    case TYPE_LIST:
1391
0
        n = r_long(p);
1392
0
        if (n < 0 || n > SIZE32_MAX) {
1393
0
            if (!PyErr_Occurred()) {
1394
0
                PyErr_SetString(PyExc_ValueError,
1395
0
                    "bad marshal data (list size out of range)");
1396
0
            }
1397
0
            break;
1398
0
        }
1399
0
        v = PyList_New(n);
1400
0
        R_REF(v);
1401
0
        if (v == NULL)
1402
0
            break;
1403
0
        for (i = 0; i < n; i++) {
1404
0
            v2 = r_object(p);
1405
0
            if ( v2 == NULL ) {
1406
0
                if (!PyErr_Occurred())
1407
0
                    PyErr_SetString(PyExc_TypeError,
1408
0
                        "NULL object in marshal data for list");
1409
0
                Py_SETREF(v, NULL);
1410
0
                break;
1411
0
            }
1412
0
            PyList_SET_ITEM(v, i, v2);
1413
0
        }
1414
0
        retval = v;
1415
0
        break;
1416
1417
0
    case TYPE_DICT:
1418
0
        v = PyDict_New();
1419
0
        R_REF(v);
1420
0
        if (v == NULL)
1421
0
            break;
1422
0
        for (;;) {
1423
0
            PyObject *key, *val;
1424
0
            key = r_object(p);
1425
0
            if (key == NULL)
1426
0
                break;
1427
0
            val = r_object(p);
1428
0
            if (val == NULL) {
1429
0
                Py_DECREF(key);
1430
0
                break;
1431
0
            }
1432
0
            if (PyDict_SetItem(v, key, val) < 0) {
1433
0
                Py_DECREF(key);
1434
0
                Py_DECREF(val);
1435
0
                break;
1436
0
            }
1437
0
            Py_DECREF(key);
1438
0
            Py_DECREF(val);
1439
0
        }
1440
0
        if (PyErr_Occurred()) {
1441
0
            Py_SETREF(v, NULL);
1442
0
        }
1443
0
        retval = v;
1444
0
        break;
1445
1446
0
    case TYPE_SET:
1447
107
    case TYPE_FROZENSET:
1448
107
        n = r_long(p);
1449
107
        if (n < 0 || n > SIZE32_MAX) {
1450
0
            if (!PyErr_Occurred()) {
1451
0
                PyErr_SetString(PyExc_ValueError,
1452
0
                    "bad marshal data (set size out of range)");
1453
0
            }
1454
0
            break;
1455
0
        }
1456
1457
107
        if (n == 0 && type == TYPE_FROZENSET) {
1458
            /* call frozenset() to get the empty frozenset singleton */
1459
0
            v = _PyObject_CallNoArgs((PyObject*)&PyFrozenSet_Type);
1460
0
            if (v == NULL)
1461
0
                break;
1462
0
            R_REF(v);
1463
0
            retval = v;
1464
0
        }
1465
107
        else {
1466
107
            v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1467
107
            if (type == TYPE_SET) {
1468
0
                R_REF(v);
1469
107
            } else {
1470
                /* must use delayed registration of frozensets because they must
1471
                 * be init with a refcount of 1
1472
                 */
1473
107
                idx = r_ref_reserve(flag, p);
1474
107
                if (idx < 0)
1475
0
                    Py_CLEAR(v); /* signal error */
1476
107
            }
1477
107
            if (v == NULL)
1478
0
                break;
1479
1480
482
            for (i = 0; i < n; i++) {
1481
375
                v2 = r_object(p);
1482
375
                if ( v2 == NULL ) {
1483
0
                    if (!PyErr_Occurred())
1484
0
                        PyErr_SetString(PyExc_TypeError,
1485
0
                            "NULL object in marshal data for set");
1486
0
                    Py_SETREF(v, NULL);
1487
0
                    break;
1488
0
                }
1489
375
                if (PySet_Add(v, v2) == -1) {
1490
0
                    Py_DECREF(v);
1491
0
                    Py_DECREF(v2);
1492
0
                    v = NULL;
1493
0
                    break;
1494
0
                }
1495
375
                Py_DECREF(v2);
1496
375
            }
1497
107
            if (type != TYPE_SET)
1498
107
                v = r_ref_insert(v, idx, flag, p);
1499
107
            retval = v;
1500
107
        }
1501
107
        break;
1502
1503
24.5k
    case TYPE_CODE:
1504
24.5k
        {
1505
24.5k
            int argcount;
1506
24.5k
            int posonlyargcount;
1507
24.5k
            int kwonlyargcount;
1508
24.5k
            int stacksize;
1509
24.5k
            int flags;
1510
24.5k
            PyObject *code = NULL;
1511
24.5k
            PyObject *consts = NULL;
1512
24.5k
            PyObject *names = NULL;
1513
24.5k
            PyObject *localsplusnames = NULL;
1514
24.5k
            PyObject *localspluskinds = NULL;
1515
24.5k
            PyObject *filename = NULL;
1516
24.5k
            PyObject *name = NULL;
1517
24.5k
            PyObject *qualname = NULL;
1518
24.5k
            int firstlineno;
1519
24.5k
            PyObject* linetable = NULL;
1520
24.5k
            PyObject *exceptiontable = NULL;
1521
1522
24.5k
            if (!p->allow_code) {
1523
0
                PyErr_SetString(PyExc_ValueError,
1524
0
                                "unmarshalling code objects is disallowed");
1525
0
                break;
1526
0
            }
1527
24.5k
            idx = r_ref_reserve(flag, p);
1528
24.5k
            if (idx < 0)
1529
0
                break;
1530
1531
24.5k
            v = NULL;
1532
1533
            /* XXX ignore long->int overflows for now */
1534
24.5k
            argcount = (int)r_long(p);
1535
24.5k
            if (argcount == -1 && PyErr_Occurred())
1536
0
                goto code_error;
1537
24.5k
            posonlyargcount = (int)r_long(p);
1538
24.5k
            if (posonlyargcount == -1 && PyErr_Occurred()) {
1539
0
                goto code_error;
1540
0
            }
1541
24.5k
            kwonlyargcount = (int)r_long(p);
1542
24.5k
            if (kwonlyargcount == -1 && PyErr_Occurred())
1543
0
                goto code_error;
1544
24.5k
            stacksize = (int)r_long(p);
1545
24.5k
            if (stacksize == -1 && PyErr_Occurred())
1546
0
                goto code_error;
1547
24.5k
            flags = (int)r_long(p);
1548
24.5k
            if (flags == -1 && PyErr_Occurred())
1549
0
                goto code_error;
1550
24.5k
            code = r_object(p);
1551
24.5k
            if (code == NULL)
1552
0
                goto code_error;
1553
24.5k
            consts = r_object(p);
1554
24.5k
            if (consts == NULL)
1555
0
                goto code_error;
1556
24.5k
            names = r_object(p);
1557
24.5k
            if (names == NULL)
1558
0
                goto code_error;
1559
24.5k
            localsplusnames = r_object(p);
1560
24.5k
            if (localsplusnames == NULL)
1561
0
                goto code_error;
1562
24.5k
            localspluskinds = r_object(p);
1563
24.5k
            if (localspluskinds == NULL)
1564
0
                goto code_error;
1565
24.5k
            filename = r_object(p);
1566
24.5k
            if (filename == NULL)
1567
0
                goto code_error;
1568
24.5k
            name = r_object(p);
1569
24.5k
            if (name == NULL)
1570
0
                goto code_error;
1571
24.5k
            qualname = r_object(p);
1572
24.5k
            if (qualname == NULL)
1573
0
                goto code_error;
1574
24.5k
            firstlineno = (int)r_long(p);
1575
24.5k
            if (firstlineno == -1 && PyErr_Occurred())
1576
0
                break;
1577
24.5k
            linetable = r_object(p);
1578
24.5k
            if (linetable == NULL)
1579
0
                goto code_error;
1580
24.5k
            exceptiontable = r_object(p);
1581
24.5k
            if (exceptiontable == NULL)
1582
0
                goto code_error;
1583
1584
24.5k
            struct _PyCodeConstructor con = {
1585
24.5k
                .filename = filename,
1586
24.5k
                .name = name,
1587
24.5k
                .qualname = qualname,
1588
24.5k
                .flags = flags,
1589
1590
24.5k
                .code = code,
1591
24.5k
                .firstlineno = firstlineno,
1592
24.5k
                .linetable = linetable,
1593
1594
24.5k
                .consts = consts,
1595
24.5k
                .names = names,
1596
1597
24.5k
                .localsplusnames = localsplusnames,
1598
24.5k
                .localspluskinds = localspluskinds,
1599
1600
24.5k
                .argcount = argcount,
1601
24.5k
                .posonlyargcount = posonlyargcount,
1602
24.5k
                .kwonlyargcount = kwonlyargcount,
1603
1604
24.5k
                .stacksize = stacksize,
1605
1606
24.5k
                .exceptiontable = exceptiontable,
1607
24.5k
            };
1608
1609
24.5k
            if (_PyCode_Validate(&con) < 0) {
1610
0
                goto code_error;
1611
0
            }
1612
1613
24.5k
            v = (PyObject *)_PyCode_New(&con);
1614
24.5k
            if (v == NULL) {
1615
0
                goto code_error;
1616
0
            }
1617
1618
24.5k
            v = r_ref_insert(v, idx, flag, p);
1619
1620
24.5k
          code_error:
1621
24.5k
            if (v == NULL && !PyErr_Occurred()) {
1622
0
                PyErr_SetString(PyExc_TypeError,
1623
0
                    "NULL object in marshal data for code object");
1624
0
            }
1625
24.5k
            Py_XDECREF(code);
1626
24.5k
            Py_XDECREF(consts);
1627
24.5k
            Py_XDECREF(names);
1628
24.5k
            Py_XDECREF(localsplusnames);
1629
24.5k
            Py_XDECREF(localspluskinds);
1630
24.5k
            Py_XDECREF(filename);
1631
24.5k
            Py_XDECREF(name);
1632
24.5k
            Py_XDECREF(qualname);
1633
24.5k
            Py_XDECREF(linetable);
1634
24.5k
            Py_XDECREF(exceptiontable);
1635
24.5k
        }
1636
0
        retval = v;
1637
24.5k
        break;
1638
1639
263k
    case TYPE_REF:
1640
263k
        n = r_long(p);
1641
263k
        if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1642
0
            if (!PyErr_Occurred()) {
1643
0
                PyErr_SetString(PyExc_ValueError,
1644
0
                    "bad marshal data (invalid reference)");
1645
0
            }
1646
0
            break;
1647
0
        }
1648
263k
        v = PyList_GET_ITEM(p->refs, n);
1649
263k
        if (v == Py_None) {
1650
0
            PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1651
0
            break;
1652
0
        }
1653
263k
        retval = Py_NewRef(v);
1654
263k
        break;
1655
1656
1.22k
    case TYPE_SLICE:
1657
1.22k
    {
1658
1.22k
        Py_ssize_t idx = r_ref_reserve(flag, p);
1659
1.22k
        if (idx < 0) {
1660
0
            break;
1661
0
        }
1662
1.22k
        PyObject *stop = NULL;
1663
1.22k
        PyObject *step = NULL;
1664
1.22k
        PyObject *start = r_object(p);
1665
1.22k
        if (start == NULL) {
1666
0
            goto cleanup;
1667
0
        }
1668
1.22k
        stop = r_object(p);
1669
1.22k
        if (stop == NULL) {
1670
0
            goto cleanup;
1671
0
        }
1672
1.22k
        step = r_object(p);
1673
1.22k
        if (step == NULL) {
1674
0
            goto cleanup;
1675
0
        }
1676
1.22k
        retval = PySlice_New(start, stop, step);
1677
1.22k
        r_ref_insert(retval, idx, flag, p);
1678
1.22k
    cleanup:
1679
1.22k
        Py_XDECREF(start);
1680
1.22k
        Py_XDECREF(stop);
1681
1.22k
        Py_XDECREF(step);
1682
1.22k
        break;
1683
1.22k
    }
1684
1685
0
    default:
1686
        /* Bogus data got written, which isn't ideal.
1687
           This will let you keep working and recover. */
1688
0
        PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1689
0
        break;
1690
1691
593k
    }
1692
593k
    p->depth--;
1693
593k
    return retval;
1694
593k
}
1695
1696
static PyObject *
1697
read_object(RFILE *p)
1698
642
{
1699
642
    PyObject *v;
1700
642
    if (PyErr_Occurred()) {
1701
0
        fprintf(stderr, "XXX readobject called with exception set\n");
1702
0
        return NULL;
1703
0
    }
1704
642
    if (p->ptr && p->end) {
1705
642
        if (PySys_Audit("marshal.loads", "y#", p->ptr, (Py_ssize_t)(p->end - p->ptr)) < 0) {
1706
0
            return NULL;
1707
0
        }
1708
642
    } else if (p->fp || p->readable) {
1709
0
        if (PySys_Audit("marshal.load", NULL) < 0) {
1710
0
            return NULL;
1711
0
        }
1712
0
    }
1713
642
    v = r_object(p);
1714
642
    if (v == NULL && !PyErr_Occurred())
1715
0
        PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1716
642
    return v;
1717
642
}
1718
1719
int
1720
PyMarshal_ReadShortFromFile(FILE *fp)
1721
0
{
1722
0
    RFILE rf;
1723
0
    int res;
1724
0
    assert(fp);
1725
0
    rf.readable = NULL;
1726
0
    rf.fp = fp;
1727
0
    rf.end = rf.ptr = NULL;
1728
0
    rf.buf = NULL;
1729
0
    res = r_short(&rf);
1730
0
    if (rf.buf != NULL)
1731
0
        PyMem_Free(rf.buf);
1732
0
    return res;
1733
0
}
1734
1735
long
1736
PyMarshal_ReadLongFromFile(FILE *fp)
1737
0
{
1738
0
    RFILE rf;
1739
0
    long res;
1740
0
    rf.fp = fp;
1741
0
    rf.readable = NULL;
1742
0
    rf.ptr = rf.end = NULL;
1743
0
    rf.buf = NULL;
1744
0
    res = r_long(&rf);
1745
0
    if (rf.buf != NULL)
1746
0
        PyMem_Free(rf.buf);
1747
0
    return res;
1748
0
}
1749
1750
/* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1751
static off_t
1752
getfilesize(FILE *fp)
1753
0
{
1754
0
    struct _Py_stat_struct st;
1755
0
    if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1756
0
        return -1;
1757
#if SIZEOF_OFF_T == 4
1758
    else if (st.st_size >= INT_MAX)
1759
        return (off_t)INT_MAX;
1760
#endif
1761
0
    else
1762
0
        return (off_t)st.st_size;
1763
0
}
1764
1765
/* If we can get the size of the file up-front, and it's reasonably small,
1766
 * read it in one gulp and delegate to ...FromString() instead.  Much quicker
1767
 * than reading a byte at a time from file; speeds .pyc imports.
1768
 * CAUTION:  since this may read the entire remainder of the file, don't
1769
 * call it unless you know you're done with the file.
1770
 */
1771
PyObject *
1772
PyMarshal_ReadLastObjectFromFile(FILE *fp)
1773
0
{
1774
/* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1775
0
#define REASONABLE_FILE_LIMIT (1L << 18)
1776
0
    off_t filesize;
1777
0
    filesize = getfilesize(fp);
1778
0
    if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1779
0
        char* pBuf = (char *)PyMem_Malloc(filesize);
1780
0
        if (pBuf != NULL) {
1781
0
            size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1782
0
            PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1783
0
            PyMem_Free(pBuf);
1784
0
            return v;
1785
0
        }
1786
1787
0
    }
1788
    /* We don't have fstat, or we do but the file is larger than
1789
     * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1790
     */
1791
0
    return PyMarshal_ReadObjectFromFile(fp);
1792
1793
0
#undef REASONABLE_FILE_LIMIT
1794
0
}
1795
1796
PyObject *
1797
PyMarshal_ReadObjectFromFile(FILE *fp)
1798
0
{
1799
0
    RFILE rf;
1800
0
    PyObject *result;
1801
0
    rf.allow_code = 1;
1802
0
    rf.fp = fp;
1803
0
    rf.readable = NULL;
1804
0
    rf.depth = 0;
1805
0
    rf.ptr = rf.end = NULL;
1806
0
    rf.buf = NULL;
1807
0
    rf.refs = PyList_New(0);
1808
0
    if (rf.refs == NULL)
1809
0
        return NULL;
1810
0
    result = read_object(&rf);
1811
0
    Py_DECREF(rf.refs);
1812
0
    if (rf.buf != NULL)
1813
0
        PyMem_Free(rf.buf);
1814
0
    return result;
1815
0
}
1816
1817
PyObject *
1818
PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1819
291
{
1820
291
    RFILE rf;
1821
291
    PyObject *result;
1822
291
    rf.allow_code = 1;
1823
291
    rf.fp = NULL;
1824
291
    rf.readable = NULL;
1825
291
    rf.ptr = str;
1826
291
    rf.end = str + len;
1827
291
    rf.buf = NULL;
1828
291
    rf.depth = 0;
1829
291
    rf.refs = PyList_New(0);
1830
291
    if (rf.refs == NULL)
1831
0
        return NULL;
1832
291
    result = read_object(&rf);
1833
291
    Py_DECREF(rf.refs);
1834
291
    if (rf.buf != NULL)
1835
0
        PyMem_Free(rf.buf);
1836
291
    return result;
1837
291
}
1838
1839
static PyObject *
1840
_PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code)
1841
0
{
1842
0
    WFILE wf;
1843
1844
0
    if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
1845
0
        return NULL;
1846
0
    }
1847
0
    memset(&wf, 0, sizeof(wf));
1848
0
    wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1849
0
    if (wf.str == NULL)
1850
0
        return NULL;
1851
0
    wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str);
1852
0
    wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str);
1853
0
    wf.error = WFERR_OK;
1854
0
    wf.version = version;
1855
0
    wf.allow_code = allow_code;
1856
0
    if (w_init_refs(&wf, version)) {
1857
0
        Py_DECREF(wf.str);
1858
0
        return NULL;
1859
0
    }
1860
0
    w_object(x, &wf);
1861
0
    w_clear_refs(&wf);
1862
0
    if (wf.str != NULL) {
1863
0
        const char *base = PyBytes_AS_STRING(wf.str);
1864
0
        if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1865
0
            return NULL;
1866
0
    }
1867
0
    if (wf.error != WFERR_OK) {
1868
0
        Py_XDECREF(wf.str);
1869
0
        switch (wf.error) {
1870
0
        case WFERR_NOMEMORY:
1871
0
            PyErr_NoMemory();
1872
0
            break;
1873
0
        case WFERR_NESTEDTOODEEP:
1874
0
            PyErr_SetString(PyExc_ValueError,
1875
0
                            "object too deeply nested to marshal");
1876
0
            break;
1877
0
        case WFERR_CODE_NOT_ALLOWED:
1878
0
            PyErr_SetString(PyExc_ValueError,
1879
0
                            "marshalling code objects is disallowed");
1880
0
            break;
1881
0
        default:
1882
0
        case WFERR_UNMARSHALLABLE:
1883
0
            PyErr_SetString(PyExc_ValueError,
1884
0
                            "unmarshallable object");
1885
0
            break;
1886
0
        }
1887
0
        return NULL;
1888
0
    }
1889
0
    return wf.str;
1890
0
}
1891
1892
PyObject *
1893
PyMarshal_WriteObjectToString(PyObject *x, int version)
1894
0
{
1895
0
    return _PyMarshal_WriteObjectToString(x, version, 1);
1896
0
}
1897
1898
/* And an interface for Python programs... */
1899
/*[clinic input]
1900
marshal.dump
1901
1902
    value: object
1903
        Must be a supported type.
1904
    file: object
1905
        Must be a writeable binary file.
1906
    version: int(c_default="Py_MARSHAL_VERSION") = version
1907
        Indicates the data format that dump should use.
1908
    /
1909
    *
1910
    allow_code: bool = True
1911
        Allow to write code objects.
1912
1913
Write the value on the open file.
1914
1915
If the value has (or contains an object that has) an unsupported type, a
1916
ValueError exception is raised - but garbage data will also be written
1917
to the file. The object will not be properly read back by load().
1918
[clinic start generated code]*/
1919
1920
static PyObject *
1921
marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1922
                  int version, int allow_code)
1923
/*[clinic end generated code: output=429e5fd61c2196b9 input=041f7f6669b0aafb]*/
1924
0
{
1925
    /* XXX Quick hack -- need to do this differently */
1926
0
    PyObject *s;
1927
0
    PyObject *res;
1928
1929
0
    s = _PyMarshal_WriteObjectToString(value, version, allow_code);
1930
0
    if (s == NULL)
1931
0
        return NULL;
1932
0
    res = PyObject_CallMethodOneArg(file, &_Py_ID(write), s);
1933
0
    Py_DECREF(s);
1934
0
    return res;
1935
0
}
1936
1937
/*[clinic input]
1938
marshal.load
1939
1940
    file: object
1941
        Must be readable binary file.
1942
    /
1943
    *
1944
    allow_code: bool = True
1945
        Allow to load code objects.
1946
1947
Read one value from the open file and return it.
1948
1949
If no valid value is read (e.g. because the data has a different Python
1950
version's incompatible marshal format), raise EOFError, ValueError or
1951
TypeError.
1952
1953
Note: If an object containing an unsupported type was marshalled with
1954
dump(), load() will substitute None for the unmarshallable type.
1955
[clinic start generated code]*/
1956
1957
static PyObject *
1958
marshal_load_impl(PyObject *module, PyObject *file, int allow_code)
1959
/*[clinic end generated code: output=0c1aaf3546ae3ed3 input=2dca7b570653b82f]*/
1960
0
{
1961
0
    PyObject *data, *result;
1962
0
    RFILE rf;
1963
1964
    /*
1965
     * Make a call to the read method, but read zero bytes.
1966
     * This is to ensure that the object passed in at least
1967
     * has a read method which returns bytes.
1968
     * This can be removed if we guarantee good error handling
1969
     * for r_string()
1970
     */
1971
0
    data = _PyObject_CallMethod(file, &_Py_ID(read), "i", 0);
1972
0
    if (data == NULL)
1973
0
        return NULL;
1974
0
    if (!PyBytes_Check(data)) {
1975
0
        PyErr_Format(PyExc_TypeError,
1976
0
                     "file.read() returned not bytes but %.100s",
1977
0
                     Py_TYPE(data)->tp_name);
1978
0
        result = NULL;
1979
0
    }
1980
0
    else {
1981
0
        rf.allow_code = allow_code;
1982
0
        rf.depth = 0;
1983
0
        rf.fp = NULL;
1984
0
        rf.readable = file;
1985
0
        rf.ptr = rf.end = NULL;
1986
0
        rf.buf = NULL;
1987
0
        if ((rf.refs = PyList_New(0)) != NULL) {
1988
0
            result = read_object(&rf);
1989
0
            Py_DECREF(rf.refs);
1990
0
            if (rf.buf != NULL)
1991
0
                PyMem_Free(rf.buf);
1992
0
        } else
1993
0
            result = NULL;
1994
0
    }
1995
0
    Py_DECREF(data);
1996
0
    return result;
1997
0
}
1998
1999
/*[clinic input]
2000
@permit_long_summary
2001
@permit_long_docstring_body
2002
marshal.dumps
2003
2004
    value: object
2005
        Must be a supported type.
2006
    version: int(c_default="Py_MARSHAL_VERSION") = version
2007
        Indicates the data format that dumps should use.
2008
    /
2009
    *
2010
    allow_code: bool = True
2011
        Allow to write code objects.
2012
2013
Return the bytes object that would be written to a file by dump(value, file).
2014
2015
Raise a ValueError exception if value has (or contains an object that has) an
2016
unsupported type.
2017
[clinic start generated code]*/
2018
2019
static PyObject *
2020
marshal_dumps_impl(PyObject *module, PyObject *value, int version,
2021
                   int allow_code)
2022
/*[clinic end generated code: output=115f90da518d1d49 input=80cd3f30c1637ade]*/
2023
0
{
2024
0
    return _PyMarshal_WriteObjectToString(value, version, allow_code);
2025
0
}
2026
2027
/*[clinic input]
2028
marshal.loads
2029
2030
    bytes: Py_buffer
2031
    /
2032
    *
2033
    allow_code: bool = True
2034
        Allow to load code objects.
2035
2036
Convert the bytes-like object to a value.
2037
2038
If no valid value is found, raise EOFError, ValueError or TypeError.  Extra
2039
bytes in the input are ignored.
2040
[clinic start generated code]*/
2041
2042
static PyObject *
2043
marshal_loads_impl(PyObject *module, Py_buffer *bytes, int allow_code)
2044
/*[clinic end generated code: output=62c0c538d3edc31f input=14de68965b45aaa7]*/
2045
351
{
2046
351
    RFILE rf;
2047
351
    char *s = bytes->buf;
2048
351
    Py_ssize_t n = bytes->len;
2049
351
    PyObject* result;
2050
351
    rf.allow_code = allow_code;
2051
351
    rf.fp = NULL;
2052
351
    rf.readable = NULL;
2053
351
    rf.ptr = s;
2054
351
    rf.end = s + n;
2055
351
    rf.depth = 0;
2056
351
    if ((rf.refs = PyList_New(0)) == NULL)
2057
0
        return NULL;
2058
351
    result = read_object(&rf);
2059
351
    Py_DECREF(rf.refs);
2060
351
    return result;
2061
351
}
2062
2063
static PyMethodDef marshal_methods[] = {
2064
    MARSHAL_DUMP_METHODDEF
2065
    MARSHAL_LOAD_METHODDEF
2066
    MARSHAL_DUMPS_METHODDEF
2067
    MARSHAL_LOADS_METHODDEF
2068
    {NULL,              NULL}           /* sentinel */
2069
};
2070
2071
2072
PyDoc_STRVAR(module_doc,
2073
"This module contains functions that can read and write Python values in\n\
2074
a binary format. The format is specific to Python, but independent of\n\
2075
machine architecture issues.\n\
2076
\n\
2077
Not all Python object types are supported; in general, only objects\n\
2078
whose value is independent from a particular invocation of Python can be\n\
2079
written and read by this module. The following types are supported:\n\
2080
None, integers, floating-point numbers, strings, bytes, bytearrays,\n\
2081
tuples, lists, sets, dictionaries, and code objects, where it\n\
2082
should be understood that tuples, lists and dictionaries are only\n\
2083
supported as long as the values contained therein are themselves\n\
2084
supported; and recursive lists and dictionaries should not be written\n\
2085
(they will cause infinite loops).\n\
2086
\n\
2087
Variables:\n\
2088
\n\
2089
version -- indicates the format that the module uses. Version 0 is the\n\
2090
    historical format, version 1 shares interned strings and version 2\n\
2091
    uses a binary format for floating-point numbers.\n\
2092
    Version 3 shares common object references (New in version 3.4).\n\
2093
\n\
2094
Functions:\n\
2095
\n\
2096
dump() -- write value to a file\n\
2097
load() -- read value from a file\n\
2098
dumps() -- marshal value as a bytes object\n\
2099
loads() -- read value from a bytes-like object");
2100
2101
2102
static int
2103
marshal_module_exec(PyObject *mod)
2104
22
{
2105
22
    if (PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION) < 0) {
2106
0
        return -1;
2107
0
    }
2108
22
    return 0;
2109
22
}
2110
2111
static PyModuleDef_Slot marshalmodule_slots[] = {
2112
    {Py_mod_exec, marshal_module_exec},
2113
    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
2114
    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
2115
    {0, NULL}
2116
};
2117
2118
static struct PyModuleDef marshalmodule = {
2119
    PyModuleDef_HEAD_INIT,
2120
    .m_name = "marshal",
2121
    .m_doc = module_doc,
2122
    .m_methods = marshal_methods,
2123
    .m_slots = marshalmodule_slots,
2124
};
2125
2126
PyMODINIT_FUNC
2127
PyMarshal_Init(void)
2128
22
{
2129
22
    return PyModuleDef_Init(&marshalmodule);
2130
22
}