Coverage Report

Created: 2026-05-30 06:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/marshal.c
Line
Count
Source
1
2
/* Write Python objects to files and read them back.
3
   This is primarily intended for writing and reading compiled Python code,
4
   even though dicts, lists, sets and frozensets, not commonly seen in
5
   code objects, are supported.
6
   Version 3 of this protocol properly supports circular links
7
   and sharing. */
8
9
#include "Python.h"
10
#include "pycore_call.h"             // _PyObject_CallNoArgs()
11
#include "pycore_code.h"             // _PyCode_New()
12
#include "pycore_hashtable.h"        // _Py_hashtable_t
13
#include "pycore_long.h"             // _PyLong_IsZero()
14
#include "pycore_object.h"           // _PyObject_IsUniquelyReferenced
15
#include "pycore_pystate.h"          // _PyInterpreterState_GET()
16
#include "pycore_setobject.h"        // _PySet_NextEntryRef()
17
#include "pycore_tuple.h"            // _PyTuple_FromPairSteal
18
#include "pycore_unicodeobject.h"    // _PyUnicode_InternImmortal()
19
20
#include "marshal.h"                 // Py_MARSHAL_VERSION
21
22
#ifdef __APPLE__
23
#  include "TargetConditionals.h"
24
#endif /* __APPLE__ */
25
26
27
/*[clinic input]
28
module marshal
29
[clinic start generated code]*/
30
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
31
32
#include "clinic/marshal.c.h"
33
34
/* High water mark to determine when the marshalled object is dangerously deep
35
 * and risks coring the interpreter.  When the object stack gets this deep,
36
 * raise an exception instead of continuing.
37
 * On Windows debug builds, reduce this value.
38
 *
39
 * BUG: https://bugs.python.org/issue33720
40
 * On Windows PGO builds, the r_object function overallocates its stack and
41
 * can cause a stack overflow. We reduce the maximum depth for all Windows
42
 * releases to protect against this.
43
 * #if defined(MS_WINDOWS) && defined(Py_DEBUG)
44
 */
45
#if defined(MS_WINDOWS)
46
#  define MAX_MARSHAL_STACK_DEPTH 1000
47
#elif defined(__wasi__)
48
#  define MAX_MARSHAL_STACK_DEPTH 1500
49
// TARGET_OS_IPHONE covers any non-macOS Apple platform.
50
// It won't be defined on older macOS SDKs
51
#elif defined(__APPLE__) && defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
52
#  define MAX_MARSHAL_STACK_DEPTH 1500
53
#else
54
4.57M
#  define MAX_MARSHAL_STACK_DEPTH 2000
55
#endif
56
57
/* Supported types */
58
0
#define TYPE_NULL               '0'
59
40.9k
#define TYPE_NONE               'N'
60
1.94k
#define TYPE_FALSE              'F'
61
1.72k
#define TYPE_TRUE               'T'
62
0
#define TYPE_STOPITER           'S'
63
400
#define TYPE_ELLIPSIS           '.'
64
651
#define TYPE_BINARY_FLOAT       'g'  // Version 0 uses TYPE_FLOAT instead.
65
3
#define TYPE_BINARY_COMPLEX     'y'  // Version 0 uses TYPE_COMPLEX instead.
66
442
#define TYPE_LONG               'l'  // See also TYPE_INT.
67
530k
#define TYPE_STRING             's'  // Bytes. (Name comes from Python 2.)
68
71
#define TYPE_TUPLE              '('  // See also TYPE_SMALL_TUPLE.
69
0
#define TYPE_LIST               '['
70
0
#define TYPE_DICT               '{'
71
0
#define TYPE_FROZENDICT         '}'
72
175k
#define TYPE_CODE               'c'
73
3.58k
#define TYPE_UNICODE            'u'
74
#define TYPE_UNKNOWN            '?'
75
// added in version 2:
76
1.53k
#define TYPE_SET                '<'
77
512
#define TYPE_FROZENSET          '>'
78
// added in version 5:
79
3.32k
#define TYPE_SLICE              ':'
80
// Remember to update the version and documentation when adding new types.
81
82
/* Special cases for unicode strings (added in version 4) */
83
206
#define TYPE_INTERNED           't' // Version 1+
84
27.2k
#define TYPE_ASCII              'a'
85
0
#define TYPE_ASCII_INTERNED     'A'
86
1.31M
#define TYPE_SHORT_ASCII        'z'
87
1.15M
#define TYPE_SHORT_ASCII_INTERNED 'Z'
88
89
/* Special cases for small objects */
90
24.8k
#define TYPE_INT                'i'  // All versions. 32-bit encoding.
91
469k
#define TYPE_SMALL_TUPLE        ')'  // Version 4+
92
93
/* Supported for backwards compatibility */
94
0
#define TYPE_COMPLEX            'x'  // Generated for version 0 only.
95
0
#define TYPE_FLOAT              'f'  // Generated for version 0 only.
96
0
#define TYPE_INT64              'I'  // Not generated any more.
97
98
/* References (added in version 3) */
99
1.89M
#define TYPE_REF                'r'
100
9.01M
#define FLAG_REF                '\x80' /* with a type, add obj to index */
101
102
103
// Error codes:
104
79.6k
#define WFERR_OK 0
105
24
#define WFERR_UNMARSHALLABLE 1
106
0
#define WFERR_NESTEDTOODEEP 2
107
12
#define WFERR_NOMEMORY 3
108
0
#define WFERR_CODE_NOT_ALLOWED 4
109
110
typedef struct {
111
    FILE *fp;
112
    int error;  /* see WFERR_* values */
113
    int depth;
114
    PyObject *str;
115
    char *ptr;
116
    const char *end;
117
    char *buf;
118
    _Py_hashtable_t *hashtable;
119
    int version;
120
    int allow_code;
121
} WFILE;
122
123
386k
#define w_byte(c, p) do {                               \
124
386k
        if ((p)->ptr != (p)->end || w_reserve((p), 1))  \
125
386k
            *(p)->ptr++ = (c);                          \
126
386k
    } while(0)
127
128
static void
129
w_flush(WFILE *p)
130
0
{
131
0
    assert(p->fp != NULL);
132
0
    fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
133
0
    p->ptr = p->buf;
134
0
}
135
136
static int
137
w_reserve(WFILE *p, Py_ssize_t needed)
138
567
{
139
567
    Py_ssize_t pos, size, delta;
140
567
    if (p->ptr == NULL)
141
0
        return 0; /* An error already occurred */
142
567
    if (p->fp != NULL) {
143
0
        w_flush(p);
144
0
        return needed <= p->end - p->ptr;
145
0
    }
146
567
    assert(p->str != NULL);
147
567
    pos = p->ptr - p->buf;
148
567
    size = PyBytes_GET_SIZE(p->str);
149
567
    if (size > 16*1024*1024)
150
0
        delta = (size >> 3);            /* 12.5% overallocation */
151
567
    else
152
567
        delta = size + 1024;
153
567
    delta = Py_MAX(delta, needed);
154
567
    if (delta > PY_SSIZE_T_MAX - size) {
155
0
        p->error = WFERR_NOMEMORY;
156
0
        return 0;
157
0
    }
158
567
    size += delta;
159
567
    if (_PyBytes_Resize(&p->str, size) != 0) {
160
0
        p->end = p->ptr = p->buf = NULL;
161
0
        return 0;
162
0
    }
163
567
    else {
164
567
        p->buf = PyBytes_AS_STRING(p->str);
165
567
        p->ptr = p->buf + pos;
166
567
        p->end = p->buf + size;
167
567
        return 1;
168
567
    }
169
567
}
170
171
static void
172
w_string(const void *s, Py_ssize_t n, WFILE *p)
173
27.2k
{
174
27.2k
    Py_ssize_t m;
175
27.2k
    if (!n || p->ptr == NULL)
176
213
        return;
177
27.0k
    m = p->end - p->ptr;
178
27.0k
    if (p->fp != NULL) {
179
0
        if (n <= m) {
180
0
            memcpy(p->ptr, s, n);
181
0
            p->ptr += n;
182
0
        }
183
0
        else {
184
0
            w_flush(p);
185
0
            fwrite(s, 1, n, p->fp);
186
0
        }
187
0
    }
188
27.0k
    else {
189
27.0k
        if (n <= m || w_reserve(p, n - m)) {
190
27.0k
            memcpy(p->ptr, s, n);
191
27.0k
            p->ptr += n;
192
27.0k
        }
193
27.0k
    }
194
27.0k
}
195
196
static void
197
w_short(int x, WFILE *p)
198
23
{
199
23
    w_byte((char)( x      & 0xff), p);
200
23
    w_byte((char)((x>> 8) & 0xff), p);
201
23
}
202
203
static void
204
w_long(long x, WFILE *p)
205
70.4k
{
206
70.4k
    w_byte((char)( x      & 0xff), p);
207
70.4k
    w_byte((char)((x>> 8) & 0xff), p);
208
70.4k
    w_byte((char)((x>>16) & 0xff), p);
209
70.4k
    w_byte((char)((x>>24) & 0xff), p);
210
70.4k
}
211
212
573k
#define SIZE32_MAX  0x7FFFFFFF
213
214
#if SIZEOF_SIZE_T > 4
215
10.1k
# define W_SIZE(n, p)  do {                     \
216
10.1k
        if ((n) > SIZE32_MAX) {                 \
217
0
            (p)->depth--;                       \
218
0
            (p)->error = WFERR_UNMARSHALLABLE;  \
219
0
            return;                             \
220
0
        }                                       \
221
10.1k
        w_long((long)(n), p);                   \
222
10.1k
    } while(0)
223
#else
224
# define W_SIZE  w_long
225
#endif
226
227
static void
228
w_pstring(const void *s, Py_ssize_t n, WFILE *p)
229
10.1k
{
230
10.1k
        W_SIZE(n, p);
231
10.1k
        w_string(s, n, p);
232
10.1k
}
233
234
static void
235
w_short_pstring(const void *s, Py_ssize_t n, WFILE *p)
236
17.0k
{
237
17.0k
    w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
238
17.0k
    w_string(s, n, p);
239
17.0k
}
240
241
/* We assume that Python ints are stored internally in base some power of
242
   2**15; for the sake of portability we'll always read and write them in base
243
   exactly 2**15. */
244
245
3.87k
#define PyLong_MARSHAL_SHIFT 15
246
1.71k
#define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
247
23
#define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
248
249
41.1k
#define W_TYPE(t, p) do { \
250
41.1k
    w_byte((t) | flag, (p)); \
251
41.1k
} while(0)
252
253
static PyObject *
254
_PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code);
255
256
#define _r_digits(bitsize)                                                \
257
static void                                                               \
258
_r_digits##bitsize(const uint ## bitsize ## _t *digits, Py_ssize_t n,     \
259
3
                   uint8_t negative, Py_ssize_t marshal_ratio, WFILE *p)  \
260
3
{                                                                         \
261
3
    /* set l to number of base PyLong_MARSHAL_BASE digits */              \
262
3
    Py_ssize_t l = (n - 1)*marshal_ratio;                                 \
263
3
    uint ## bitsize ## _t d = digits[n - 1];                              \
264
3
                                                                          \
265
3
    assert(marshal_ratio > 0);                                            \
266
3
    assert(n >= 1);                                                       \
267
3
    assert(d != 0); /* a PyLong is always normalized */                   \
268
3
    do {                                                                  \
269
3
        d >>= PyLong_MARSHAL_SHIFT;                                       \
270
3
        l++;                                                              \
271
3
    } while (d != 0);                                                     \
272
3
    if (l > SIZE32_MAX) {                                                 \
273
0
        p->depth--;                                                       \
274
0
        p->error = WFERR_UNMARSHALLABLE;                                  \
275
0
        return;                                                           \
276
0
    }                                                                     \
277
3
    w_long((long)(negative ? -l : l), p);                                 \
278
3
                                                                          \
279
9
    for (Py_ssize_t i = 0; i < n - 1; i++) {                              \
280
6
        d = digits[i];                                                    \
281
18
        for (Py_ssize_t j = 0; j < marshal_ratio; j++) {                  \
282
12
            w_short(d & PyLong_MARSHAL_MASK, p);                          \
283
12
            d >>= PyLong_MARSHAL_SHIFT;                                   \
284
12
        }                                                                 \
285
6
        assert(d == 0);                                                   \
286
6
    }                                                                     \
287
3
    d = digits[n - 1];                                                    \
288
3
    do {                                                                  \
289
3
        w_short(d & PyLong_MARSHAL_MASK, p);                              \
290
3
        d >>= PyLong_MARSHAL_SHIFT;                                       \
291
3
    } while (d != 0);                                                     \
292
3
}
293
0
_r_digits(16)
294
3
_r_digits(32)
295
#undef _r_digits
296
297
static void
298
w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
299
5
{
300
5
    W_TYPE(TYPE_LONG, p);
301
5
    if (_PyLong_IsZero(ob)) {
302
0
        w_long((long)0, p);
303
0
        return;
304
0
    }
305
306
5
    PyLongExport long_export;
307
308
5
    if (PyLong_Export((PyObject *)ob, &long_export) < 0) {
309
0
        p->depth--;
310
0
        p->error = WFERR_UNMARSHALLABLE;
311
0
        return;
312
0
    }
313
5
    if (!long_export.digits) {
314
2
        int8_t sign = long_export.value < 0 ? -1 : 1;
315
2
        uint64_t abs_value = _Py_ABS_CAST(uint64_t, long_export.value);
316
2
        uint64_t d = abs_value;
317
2
        long l = 0;
318
319
        /* set l to number of base PyLong_MARSHAL_BASE digits */
320
8
        do {
321
8
            d >>= PyLong_MARSHAL_SHIFT;
322
8
            l += sign;
323
8
        } while (d);
324
2
        w_long(l, p);
325
326
2
        d = abs_value;
327
8
        do {
328
8
            w_short(d & PyLong_MARSHAL_MASK, p);
329
8
            d >>= PyLong_MARSHAL_SHIFT;
330
8
        } while (d);
331
2
        return;
332
2
    }
333
334
3
    const PyLongLayout *layout = PyLong_GetNativeLayout();
335
3
    Py_ssize_t marshal_ratio = layout->bits_per_digit/PyLong_MARSHAL_SHIFT;
336
337
    /* must be a multiple of PyLong_MARSHAL_SHIFT */
338
3
    assert(layout->bits_per_digit % PyLong_MARSHAL_SHIFT == 0);
339
3
    assert(layout->bits_per_digit >= PyLong_MARSHAL_SHIFT);
340
341
    /* other assumptions on PyLongObject internals */
342
3
    assert(layout->bits_per_digit <= 32);
343
3
    assert(layout->digits_order == -1);
344
3
    assert(layout->digit_endianness == (PY_LITTLE_ENDIAN ? -1 : 1));
345
3
    assert(layout->digit_size == 2 || layout->digit_size == 4);
346
347
3
    if (layout->digit_size == 4) {
348
3
        _r_digits32(long_export.digits, long_export.ndigits,
349
3
                    long_export.negative, marshal_ratio, p);
350
3
    }
351
0
    else {
352
0
        _r_digits16(long_export.digits, long_export.ndigits,
353
0
                    long_export.negative, marshal_ratio, p);
354
0
    }
355
3
    PyLong_FreeExport(&long_export);
356
3
}
357
358
static void
359
w_float_bin(double v, WFILE *p)
360
16
{
361
16
    char buf[8];
362
16
    if (PyFloat_Pack8(v, buf, 1) < 0) {
363
0
        p->error = WFERR_UNMARSHALLABLE;
364
0
        return;
365
0
    }
366
16
    w_string(buf, 8, p);
367
16
}
368
369
static void
370
w_float_str(double v, WFILE *p)
371
0
{
372
0
    char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
373
0
    if (!buf) {
374
0
        p->error = WFERR_NOMEMORY;
375
0
        return;
376
0
    }
377
0
    w_short_pstring(buf, strlen(buf), p);
378
0
    PyMem_Free(buf);
379
0
}
380
381
static int
382
w_ref(PyObject *v, char *flag, WFILE *p)
383
78.5k
{
384
78.5k
    _Py_hashtable_entry_t *entry;
385
386
78.5k
    if (p->version < 3 || p->hashtable == NULL)
387
0
        return 0; /* not writing object references */
388
389
    /* If it has only one reference, it definitely isn't shared.
390
     * But we use TYPE_REF always for interned string, to PYC file stable
391
     * as possible.
392
     */
393
78.5k
    if (_PyObject_IsUniquelyReferenced(v) &&
394
21.2k
            !(PyUnicode_CheckExact(v) && PyUnicode_CHECK_INTERNED(v))) {
395
19.5k
        return 0;
396
19.5k
    }
397
398
59.0k
    entry = _Py_hashtable_get_entry(p->hashtable, v);
399
59.0k
    if (entry != NULL) {
400
        /* write the reference index to the stream */
401
37.4k
        uintptr_t w = (uintptr_t)entry->value;
402
37.4k
        if (w & 0x80000000LU) {
403
0
            PyErr_Format(PyExc_ValueError, "cannot marshal recursion %T objects", v);
404
0
            goto err;
405
0
        }
406
        /* we don't store "long" indices in the dict */
407
37.4k
        assert(w <= 0x7fffffff);
408
37.4k
        w_byte(TYPE_REF, p);
409
37.4k
        w_long((int)w, p);
410
37.4k
        return 1;
411
37.4k
    } else {
412
21.5k
        size_t w = p->hashtable->nentries;
413
        /* we don't support long indices */
414
21.5k
        if (w >= 0x7fffffff) {
415
0
            PyErr_SetString(PyExc_ValueError, "too many objects");
416
0
            goto err;
417
0
        }
418
        // Corresponding code should call w_complete() after
419
        // writing the object.
420
21.5k
        if (PyCode_Check(v) || PySlice_Check(v) || PyFrozenDict_CheckExact(v)) {
421
184
            w |= 0x80000000LU;
422
184
        }
423
21.5k
        if (_Py_hashtable_set(p->hashtable, Py_NewRef(v),
424
21.5k
                              (void *)(uintptr_t)w) < 0) {
425
0
            Py_DECREF(v);
426
0
            goto err;
427
0
        }
428
21.5k
        *flag |= FLAG_REF;
429
21.5k
        return 0;
430
21.5k
    }
431
0
err:
432
0
    p->error = WFERR_UNMARSHALLABLE;
433
0
    return 1;
434
59.0k
}
435
436
static void
437
w_complete(PyObject *v, WFILE *p)
438
3.47k
{
439
3.47k
    if (p->version < 3 || p->hashtable == NULL) {
440
0
        return;
441
0
    }
442
3.47k
    if (_PyObject_IsUniquelyReferenced(v)) {
443
3.29k
        return;
444
3.29k
    }
445
446
184
    _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(p->hashtable, v);
447
184
    if (entry == NULL) {
448
0
        return;
449
0
    }
450
184
    assert(entry != NULL);
451
184
    uintptr_t w = (uintptr_t)entry->value;
452
184
    assert(w & 0x80000000LU);
453
184
    w &= ~0x80000000LU;
454
184
    entry->value = (void *)(uintptr_t)w;
455
184
}
456
457
static void
458
w_complex_object(PyObject *v, char flag, WFILE *p);
459
460
static void
461
w_object(PyObject *v, WFILE *p)
462
79.2k
{
463
79.2k
    char flag = '\0';
464
465
79.2k
    if (p->error != WFERR_OK) {
466
0
        return;
467
0
    }
468
469
79.2k
    p->depth++;
470
471
79.2k
    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
472
0
        p->error = WFERR_NESTEDTOODEEP;
473
0
    }
474
79.2k
    else if (v == NULL) {
475
0
        w_byte(TYPE_NULL, p);
476
0
    }
477
79.2k
    else if (v == Py_None) {
478
414
        w_byte(TYPE_NONE, p);
479
414
    }
480
78.7k
    else if (v == PyExc_StopIteration) {
481
0
        w_byte(TYPE_STOPITER, p);
482
0
    }
483
78.7k
    else if (v == Py_Ellipsis) {
484
3
        w_byte(TYPE_ELLIPSIS, p);
485
3
    }
486
78.7k
    else if (v == Py_False) {
487
158
        w_byte(TYPE_FALSE, p);
488
158
    }
489
78.6k
    else if (v == Py_True) {
490
53
        w_byte(TYPE_TRUE, p);
491
53
    }
492
78.5k
    else if (!w_ref(v, &flag, p))
493
41.1k
        w_complex_object(v, flag, p);
494
495
79.2k
    p->depth--;
496
79.2k
}
497
498
static void
499
w_complex_object(PyObject *v, char flag, WFILE *p)
500
41.1k
{
501
41.1k
    Py_ssize_t i, n;
502
503
41.1k
    if (PyLong_CheckExact(v)) {
504
2.15k
        int overflow;
505
2.15k
        long x = PyLong_AsLongAndOverflow(v, &overflow);
506
2.15k
        if (overflow) {
507
3
            w_PyLong((PyLongObject *)v, flag, p);
508
3
        }
509
2.14k
        else {
510
2.14k
#if SIZEOF_LONG > 4
511
2.14k
            long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
512
2.14k
            if (y && y != -1) {
513
                /* Too large for TYPE_INT */
514
2
                w_PyLong((PyLongObject*)v, flag, p);
515
2
            }
516
2.14k
            else
517
2.14k
#endif
518
2.14k
            {
519
2.14k
                W_TYPE(TYPE_INT, p);
520
2.14k
                w_long(x, p);
521
2.14k
            }
522
2.14k
        }
523
2.15k
    }
524
38.9k
    else if (PyFloat_CheckExact(v)) {
525
14
        if (p->version > 1) {
526
14
            W_TYPE(TYPE_BINARY_FLOAT, p);
527
14
            w_float_bin(PyFloat_AS_DOUBLE(v), p);
528
14
        }
529
0
        else {
530
0
            W_TYPE(TYPE_FLOAT, p);
531
0
            w_float_str(PyFloat_AS_DOUBLE(v), p);
532
0
        }
533
14
    }
534
38.9k
    else if (PyComplex_CheckExact(v)) {
535
1
        if (p->version > 1) {
536
1
            W_TYPE(TYPE_BINARY_COMPLEX, p);
537
1
            w_float_bin(PyComplex_RealAsDouble(v), p);
538
1
            w_float_bin(PyComplex_ImagAsDouble(v), p);
539
1
        }
540
0
        else {
541
0
            W_TYPE(TYPE_COMPLEX, p);
542
0
            w_float_str(PyComplex_RealAsDouble(v), p);
543
0
            w_float_str(PyComplex_ImagAsDouble(v), p);
544
0
        }
545
1
    }
546
38.9k
    else if (PyBytes_CheckExact(v)) {
547
9.90k
        W_TYPE(TYPE_STRING, p);
548
9.90k
        w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
549
9.90k
    }
550
29.0k
    else if (PyUnicode_CheckExact(v)) {
551
17.3k
        if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
552
17.2k
            int is_short = PyUnicode_GET_LENGTH(v) < 256;
553
17.2k
            if (is_short) {
554
17.0k
                if (PyUnicode_CHECK_INTERNED(v))
555
15.2k
                    W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
556
1.79k
                else
557
1.79k
                    W_TYPE(TYPE_SHORT_ASCII, p);
558
17.0k
                w_short_pstring(PyUnicode_1BYTE_DATA(v),
559
17.0k
                                PyUnicode_GET_LENGTH(v), p);
560
17.0k
            }
561
208
            else {
562
208
                if (PyUnicode_CHECK_INTERNED(v))
563
0
                    W_TYPE(TYPE_ASCII_INTERNED, p);
564
208
                else
565
208
                    W_TYPE(TYPE_ASCII, p);
566
208
                w_pstring(PyUnicode_1BYTE_DATA(v),
567
208
                          PyUnicode_GET_LENGTH(v), p);
568
208
            }
569
17.2k
        }
570
69
        else {
571
69
            PyObject *utf8;
572
69
            utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
573
69
            if (utf8 == NULL) {
574
0
                p->depth--;
575
0
                p->error = WFERR_UNMARSHALLABLE;
576
0
                return;
577
0
            }
578
69
            if (p->version >= 3 &&  PyUnicode_CHECK_INTERNED(v))
579
0
                W_TYPE(TYPE_INTERNED, p);
580
69
            else
581
69
                W_TYPE(TYPE_UNICODE, p);
582
69
            w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
583
69
            Py_DECREF(utf8);
584
69
        }
585
17.3k
    }
586
11.7k
    else if (PyTuple_CheckExact(v)) {
587
8.26k
        n = PyTuple_GET_SIZE(v);
588
8.26k
        if (p->version >= 4 && n < 256) {
589
8.26k
            W_TYPE(TYPE_SMALL_TUPLE, p);
590
8.26k
            w_byte((unsigned char)n, p);
591
8.26k
        }
592
0
        else {
593
0
            W_TYPE(TYPE_TUPLE, p);
594
0
            W_SIZE(n, p);
595
0
        }
596
52.5k
        for (i = 0; i < n; i++) {
597
44.2k
            w_object(PyTuple_GET_ITEM(v, i), p);
598
44.2k
        }
599
8.26k
    }
600
3.48k
    else if (PyList_CheckExact(v)) {
601
0
        W_TYPE(TYPE_LIST, p);
602
0
        n = PyList_GET_SIZE(v);
603
0
        W_SIZE(n, p);
604
0
        for (i = 0; i < n; i++) {
605
0
            w_object(PyList_GET_ITEM(v, i), p);
606
0
        }
607
0
    }
608
3.48k
    else if (PyAnyDict_CheckExact(v)) {
609
0
        Py_ssize_t pos;
610
0
        PyObject *key, *value;
611
0
        if (PyFrozenDict_CheckExact(v)) {
612
0
            if (p->version < 6) {
613
0
                w_byte(TYPE_UNKNOWN, p);
614
0
                p->error = WFERR_UNMARSHALLABLE;
615
0
                return;
616
0
            }
617
618
0
            W_TYPE(TYPE_FROZENDICT, p);
619
0
        }
620
0
        else {
621
0
            W_TYPE(TYPE_DICT, p);
622
0
        }
623
        /* This one is NULL object terminated! */
624
0
        pos = 0;
625
0
        while (PyDict_Next(v, &pos, &key, &value)) {
626
0
            w_object(key, p);
627
0
            w_object(value, p);
628
0
        }
629
0
        w_object((PyObject *)NULL, p);
630
0
        if (PyFrozenDict_CheckExact(v)) {
631
0
            w_complete(v, p);
632
0
        }
633
0
    }
634
3.48k
    else if (PyAnySet_CheckExact(v)) {
635
12
        PyObject *value;
636
12
        Py_ssize_t pos = 0;
637
12
        Py_hash_t hash;
638
639
12
        if (PyFrozenSet_CheckExact(v))
640
12
            W_TYPE(TYPE_FROZENSET, p);
641
0
        else
642
0
            W_TYPE(TYPE_SET, p);
643
12
        n = PySet_GET_SIZE(v);
644
12
        W_SIZE(n, p);
645
        // bpo-37596: To support reproducible builds, sets and frozensets need
646
        // to have their elements serialized in a consistent order (even when
647
        // they have been scrambled by hash randomization). To ensure this, we
648
        // use an order equivalent to sorted(v, key=marshal.dumps):
649
12
        PyObject *pairs = PyList_New(n);
650
12
        if (pairs == NULL) {
651
0
            p->error = WFERR_NOMEMORY;
652
0
            return;
653
0
        }
654
12
        Py_ssize_t i = 0;
655
12
        Py_BEGIN_CRITICAL_SECTION(v);
656
85
        while (_PySet_NextEntryRef(v, &pos, &value, &hash)) {
657
73
            PyObject *dump = _PyMarshal_WriteObjectToString(value,
658
73
                                    p->version, p->allow_code);
659
73
            if (dump == NULL) {
660
0
                p->error = WFERR_UNMARSHALLABLE;
661
0
                Py_DECREF(value);
662
0
                break;
663
0
            }
664
73
            PyObject *pair = _PyTuple_FromPairSteal(dump, value);
665
73
            if (pair == NULL) {
666
0
                p->error = WFERR_NOMEMORY;
667
0
                break;
668
0
            }
669
73
            PyList_SET_ITEM(pairs, i++, pair);
670
73
        }
671
12
        Py_END_CRITICAL_SECTION();
672
12
        if (p->error == WFERR_UNMARSHALLABLE || p->error == WFERR_NOMEMORY) {
673
0
            Py_DECREF(pairs);
674
0
            return;
675
0
        }
676
12
        assert(i == n);
677
12
        if (PyList_Sort(pairs)) {
678
0
            p->error = WFERR_NOMEMORY;
679
0
            Py_DECREF(pairs);
680
0
            return;
681
0
        }
682
85
        for (Py_ssize_t i = 0; i < n; i++) {
683
73
            PyObject *pair = PyList_GET_ITEM(pairs, i);
684
73
            value = PyTuple_GET_ITEM(pair, 1);
685
73
            w_object(value, p);
686
73
        }
687
12
        Py_DECREF(pairs);
688
12
    }
689
3.47k
    else if (PyCode_Check(v)) {
690
3.45k
        if (!p->allow_code) {
691
0
            p->error = WFERR_CODE_NOT_ALLOWED;
692
0
            return;
693
0
        }
694
3.45k
        PyCodeObject *co = (PyCodeObject *)v;
695
3.45k
        PyObject *co_code = _PyCode_GetCode(co);
696
3.45k
        if (co_code == NULL) {
697
0
            p->error = WFERR_NOMEMORY;
698
0
            return;
699
0
        }
700
3.45k
        W_TYPE(TYPE_CODE, p);
701
3.45k
        w_long(co->co_argcount, p);
702
3.45k
        w_long(co->co_posonlyargcount, p);
703
3.45k
        w_long(co->co_kwonlyargcount, p);
704
3.45k
        w_long(co->co_stacksize, p);
705
3.45k
        w_long(co->co_flags, p);
706
3.45k
        w_object(co_code, p);
707
3.45k
        w_object(co->co_consts, p);
708
3.45k
        w_object(co->co_names, p);
709
3.45k
        w_object(co->co_localsplusnames, p);
710
3.45k
        w_object(co->co_localspluskinds, p);
711
3.45k
        w_object(co->co_filename, p);
712
3.45k
        w_object(co->co_name, p);
713
3.45k
        w_object(co->co_qualname, p);
714
3.45k
        w_long(co->co_firstlineno, p);
715
3.45k
        w_object(co->co_linetable, p);
716
3.45k
        w_object(co->co_exceptiontable, p);
717
3.45k
        Py_DECREF(co_code);
718
3.45k
        w_complete(v, p);
719
3.45k
    }
720
26
    else if (PyObject_CheckBuffer(v)) {
721
        /* Write unknown bytes-like objects as a bytes object */
722
0
        Py_buffer view;
723
0
        if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
724
0
            w_byte(TYPE_UNKNOWN, p);
725
0
            p->depth--;
726
0
            p->error = WFERR_UNMARSHALLABLE;
727
0
            return;
728
0
        }
729
0
        W_TYPE(TYPE_STRING, p);
730
0
        w_pstring(view.buf, view.len, p);
731
0
        PyBuffer_Release(&view);
732
0
    }
733
26
    else if (PySlice_Check(v)) {
734
26
        if (p->version < 5) {
735
0
            w_byte(TYPE_UNKNOWN, p);
736
0
            p->error = WFERR_UNMARSHALLABLE;
737
0
            return;
738
0
        }
739
26
        PySliceObject *slice = (PySliceObject *)v;
740
26
        W_TYPE(TYPE_SLICE, p);
741
26
        w_object(slice->start, p);
742
26
        w_object(slice->stop, p);
743
26
        w_object(slice->step, p);
744
26
        w_complete(v, p);
745
26
    }
746
0
    else {
747
0
        W_TYPE(TYPE_UNKNOWN, p);
748
0
        p->error = WFERR_UNMARSHALLABLE;
749
0
    }
750
41.1k
}
751
752
static void
753
w_decref_entry(void *key)
754
21.5k
{
755
21.5k
    PyObject *entry_key = (PyObject *)key;
756
21.5k
    Py_XDECREF(entry_key);
757
21.5k
}
758
759
static int
760
w_init_refs(WFILE *wf, int version)
761
246
{
762
246
    if (version >= 3) {
763
246
        wf->hashtable = _Py_hashtable_new_full(_Py_hashtable_hash_ptr,
764
246
                                               _Py_hashtable_compare_direct,
765
246
                                               w_decref_entry, NULL, NULL);
766
246
        if (wf->hashtable == NULL) {
767
0
            PyErr_NoMemory();
768
0
            return -1;
769
0
        }
770
246
    }
771
246
    return 0;
772
246
}
773
774
static void
775
w_clear_refs(WFILE *wf)
776
246
{
777
246
    if (wf->hashtable != NULL) {
778
246
        _Py_hashtable_destroy(wf->hashtable);
779
246
    }
780
246
}
781
782
/* version currently has no effect for writing ints. */
783
/* Note that while the documentation states that this function
784
 * can error, currently it never does. Setting an exception in
785
 * this function should be regarded as an API-breaking change.
786
 */
787
void
788
PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
789
0
{
790
0
    char buf[4];
791
0
    WFILE wf;
792
0
    memset(&wf, 0, sizeof(wf));
793
0
    wf.fp = fp;
794
0
    wf.ptr = wf.buf = buf;
795
0
    wf.end = wf.ptr + sizeof(buf);
796
0
    wf.error = WFERR_OK;
797
0
    wf.version = version;
798
0
    w_long(x, &wf);
799
0
    w_flush(&wf);
800
0
}
801
802
void
803
PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
804
0
{
805
0
    char buf[BUFSIZ];
806
0
    WFILE wf;
807
0
    if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
808
0
        return; /* caller must check PyErr_Occurred() */
809
0
    }
810
0
    memset(&wf, 0, sizeof(wf));
811
0
    wf.fp = fp;
812
0
    wf.ptr = wf.buf = buf;
813
0
    wf.end = wf.ptr + sizeof(buf);
814
0
    wf.error = WFERR_OK;
815
0
    wf.version = version;
816
0
    wf.allow_code = 1;
817
0
    if (w_init_refs(&wf, version)) {
818
0
        return; /* caller must check PyErr_Occurred() */
819
0
    }
820
0
    w_object(x, &wf);
821
0
    w_clear_refs(&wf);
822
0
    w_flush(&wf);
823
0
}
824
825
typedef struct {
826
    FILE *fp;
827
    int depth;
828
    PyObject *readable;  /* Stream-like object being read from */
829
    const char *ptr;
830
    const char *end;
831
    char *buf;
832
    Py_ssize_t buf_size;
833
    PyObject *refs;  /* a list */
834
    int allow_code;
835
} RFILE;
836
837
static const char *
838
r_string(Py_ssize_t n, RFILE *p)
839
5.42M
{
840
5.42M
    Py_ssize_t read = -1;
841
842
5.42M
    if (p->ptr != NULL) {
843
        /* Fast path for loads() */
844
5.42M
        const char *res = p->ptr;
845
5.42M
        Py_ssize_t left = p->end - p->ptr;
846
5.42M
        if (left < n) {
847
0
            PyErr_SetString(PyExc_EOFError,
848
0
                            "marshal data too short");
849
0
            return NULL;
850
0
        }
851
5.42M
        p->ptr += n;
852
5.42M
        return res;
853
5.42M
    }
854
0
    if (p->buf == NULL) {
855
0
        p->buf = PyMem_Malloc(n);
856
0
        if (p->buf == NULL) {
857
0
            PyErr_NoMemory();
858
0
            return NULL;
859
0
        }
860
0
        p->buf_size = n;
861
0
    }
862
0
    else if (p->buf_size < n) {
863
0
        char *tmp = PyMem_Realloc(p->buf, n);
864
0
        if (tmp == NULL) {
865
0
            PyErr_NoMemory();
866
0
            return NULL;
867
0
        }
868
0
        p->buf = tmp;
869
0
        p->buf_size = n;
870
0
    }
871
872
0
    if (!p->readable) {
873
0
        assert(p->fp != NULL);
874
0
        read = fread(p->buf, 1, n, p->fp);
875
0
    }
876
0
    else {
877
0
        PyObject *res, *mview;
878
0
        Py_buffer buf;
879
880
0
        if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
881
0
            return NULL;
882
0
        mview = PyMemoryView_FromBuffer(&buf);
883
0
        if (mview == NULL)
884
0
            return NULL;
885
886
0
        res = _PyObject_CallMethod(p->readable, &_Py_ID(readinto), "N", mview);
887
0
        if (res != NULL) {
888
0
            read = PyNumber_AsSsize_t(res, PyExc_ValueError);
889
0
            Py_DECREF(res);
890
0
        }
891
0
    }
892
0
    if (read != n) {
893
0
        if (!PyErr_Occurred()) {
894
0
            if (read > n)
895
0
                PyErr_Format(PyExc_ValueError,
896
0
                             "read() returned too much data: "
897
0
                             "%zd bytes requested, %zd returned",
898
0
                             n, read);
899
0
            else
900
0
                PyErr_SetString(PyExc_EOFError,
901
0
                                "EOF read where not expected");
902
0
        }
903
0
        return NULL;
904
0
    }
905
0
    return p->buf;
906
0
}
907
908
static int
909
r_byte(RFILE *p)
910
6.28M
{
911
6.28M
    if (p->ptr != NULL) {
912
6.28M
        if (p->ptr < p->end) {
913
6.28M
            return (unsigned char) *p->ptr++;
914
6.28M
        }
915
6.28M
    }
916
0
    else if (!p->readable) {
917
0
        assert(p->fp);
918
0
        int c = getc(p->fp);
919
0
        if (c != EOF) {
920
0
            return c;
921
0
        }
922
0
    }
923
0
    else {
924
0
        const char *ptr = r_string(1, p);
925
0
        if (ptr != NULL) {
926
0
            return *(const unsigned char *) ptr;
927
0
        }
928
0
        return EOF;
929
0
    }
930
0
    PyErr_SetString(PyExc_EOFError,
931
0
                    "EOF read where not expected");
932
0
    return EOF;
933
6.28M
}
934
935
static int
936
r_short(RFILE *p)
937
1.68k
{
938
1.68k
    short x = -1;
939
1.68k
    const unsigned char *buffer;
940
941
1.68k
    buffer = (const unsigned char *) r_string(2, p);
942
1.68k
    if (buffer != NULL) {
943
1.68k
        x = buffer[0];
944
1.68k
        x |= buffer[1] << 8;
945
        /* Sign-extension, in case short greater than 16 bits */
946
1.68k
        x |= -(x & 0x8000);
947
1.68k
    }
948
1.68k
    return x;
949
1.68k
}
950
951
static long
952
r_long(RFILE *p)
953
3.54M
{
954
3.54M
    long x = -1;
955
3.54M
    const unsigned char *buffer;
956
957
3.54M
    buffer = (const unsigned char *) r_string(4, p);
958
3.54M
    if (buffer != NULL) {
959
3.54M
        x = buffer[0];
960
3.54M
        x |= (long)buffer[1] << 8;
961
3.54M
        x |= (long)buffer[2] << 16;
962
3.54M
        x |= (long)buffer[3] << 24;
963
3.54M
#if SIZEOF_LONG > 4
964
        /* Sign extension for 64-bit machines */
965
3.54M
        x |= -(x & 0x80000000L);
966
3.54M
#endif
967
3.54M
    }
968
3.54M
    return x;
969
3.54M
}
970
971
/* r_long64 deals with the TYPE_INT64 code. */
972
static PyObject *
973
r_long64(RFILE *p)
974
0
{
975
0
    const unsigned char *buffer = (const unsigned char *) r_string(8, p);
976
0
    if (buffer == NULL) {
977
0
        return NULL;
978
0
    }
979
0
    return _PyLong_FromByteArray(buffer, 8,
980
0
                                 1 /* little endian */,
981
0
                                 1 /* signed */);
982
0
}
983
984
#define _w_digits(bitsize)                                              \
985
static int                                                              \
986
_w_digits##bitsize(uint ## bitsize ## _t *digits, Py_ssize_t size,      \
987
                   Py_ssize_t marshal_ratio,                            \
988
442
                   int shorts_in_top_digit, RFILE *p)                   \
989
442
{                                                                       \
990
442
    uint ## bitsize ## _t d;                                            \
991
442
                                                                        \
992
442
    assert(size >= 1);                                                  \
993
1.02k
    for (Py_ssize_t i = 0; i < size - 1; i++) {                         \
994
585
        d = 0;                                                          \
995
1.75k
        for (Py_ssize_t j = 0; j < marshal_ratio; j++) {                \
996
1.17k
            int md = r_short(p);                                        \
997
1.17k
            if (md < 0 || md > PyLong_MARSHAL_BASE) {                   \
998
0
                goto bad_digit;                                         \
999
0
            }                                                           \
1000
1.17k
            d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT;   \
1001
1.17k
        }                                                               \
1002
585
        digits[i] = d;                                                  \
1003
585
    }                                                                   \
1004
442
                                                                        \
1005
442
    d = 0;                                                              \
1006
960
    for (Py_ssize_t j = 0; j < shorts_in_top_digit; j++) {              \
1007
518
        int md = r_short(p);                                            \
1008
518
        if (md < 0 || md > PyLong_MARSHAL_BASE) {                       \
1009
0
            goto bad_digit;                                             \
1010
0
        }                                                               \
1011
518
        /* topmost marshal digit should be nonzero */                   \
1012
518
        if (md == 0 && j == shorts_in_top_digit - 1) {                  \
1013
0
            PyErr_SetString(PyExc_ValueError,                           \
1014
0
                "bad marshal data (unnormalized long data)");           \
1015
0
            return -1;                                                  \
1016
0
        }                                                               \
1017
518
        d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT;       \
1018
518
    }                                                                   \
1019
442
    assert(!PyErr_Occurred());                                          \
1020
442
    /* top digit should be nonzero, else the resulting PyLong won't be  \
1021
442
       normalized */                                                    \
1022
442
    digits[size - 1] = d;                                               \
1023
442
    return 0;                                                           \
1024
442
                                                                        \
1025
0
bad_digit:                                                              \
1026
0
    if (!PyErr_Occurred()) {                                            \
1027
0
        PyErr_SetString(PyExc_ValueError,                               \
1028
0
            "bad marshal data (digit out of range in long)");           \
1029
0
    }                                                                   \
1030
0
    return -1;                                                          \
1031
442
}
1032
442
_w_digits(32)
1033
0
_w_digits(16)
1034
#undef _w_digits
1035
1036
static PyObject *
1037
r_PyLong(RFILE *p)
1038
442
{
1039
442
    long n = r_long(p);
1040
442
    if (n == -1 && PyErr_Occurred()) {
1041
0
        return NULL;
1042
0
    }
1043
442
    if (n < -SIZE32_MAX || n > SIZE32_MAX) {
1044
0
        PyErr_SetString(PyExc_ValueError,
1045
0
                       "bad marshal data (long size out of range)");
1046
0
        return NULL;
1047
0
    }
1048
1049
442
    const PyLongLayout *layout = PyLong_GetNativeLayout();
1050
442
    Py_ssize_t marshal_ratio = layout->bits_per_digit/PyLong_MARSHAL_SHIFT;
1051
1052
    /* must be a multiple of PyLong_MARSHAL_SHIFT */
1053
442
    assert(layout->bits_per_digit % PyLong_MARSHAL_SHIFT == 0);
1054
442
    assert(layout->bits_per_digit >= PyLong_MARSHAL_SHIFT);
1055
1056
    /* other assumptions on PyLongObject internals */
1057
442
    assert(layout->bits_per_digit <= 32);
1058
442
    assert(layout->digits_order == -1);
1059
442
    assert(layout->digit_endianness == (PY_LITTLE_ENDIAN ? -1 : 1));
1060
442
    assert(layout->digit_size == 2 || layout->digit_size == 4);
1061
1062
442
    Py_ssize_t size = 1 + (Py_ABS(n) - 1) / marshal_ratio;
1063
1064
442
    assert(size >= 1);
1065
1066
442
    int shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % marshal_ratio;
1067
442
    void *digits;
1068
442
    PyLongWriter *writer = PyLongWriter_Create(n < 0, size, &digits);
1069
1070
442
    if (writer == NULL) {
1071
0
        return NULL;
1072
0
    }
1073
1074
442
    int ret;
1075
1076
442
    if (layout->digit_size == 4) {
1077
442
        ret = _w_digits32(digits, size, marshal_ratio, shorts_in_top_digit, p);
1078
442
    }
1079
0
    else {
1080
0
        ret = _w_digits16(digits, size, marshal_ratio, shorts_in_top_digit, p);
1081
0
    }
1082
442
    if (ret < 0) {
1083
0
        PyLongWriter_Discard(writer);
1084
0
        return NULL;
1085
0
    }
1086
442
    return PyLongWriter_Finish(writer);
1087
442
}
1088
1089
static double
1090
r_float_bin(RFILE *p)
1091
657
{
1092
657
    const char *buf = r_string(8, p);
1093
657
    if (buf == NULL)
1094
0
        return -1;
1095
657
    return PyFloat_Unpack8(buf, 1);
1096
657
}
1097
1098
/* Issue #33720: Disable inlining for reducing the C stack consumption
1099
   on PGO builds. */
1100
Py_NO_INLINE static double
1101
r_float_str(RFILE *p)
1102
0
{
1103
0
    int n;
1104
0
    char buf[256];
1105
0
    const char *ptr;
1106
0
    n = r_byte(p);
1107
0
    if (n == EOF) {
1108
0
        return -1;
1109
0
    }
1110
0
    ptr = r_string(n, p);
1111
0
    if (ptr == NULL) {
1112
0
        return -1;
1113
0
    }
1114
0
    memcpy(buf, ptr, n);
1115
0
    buf[n] = '\0';
1116
0
    return PyOS_string_to_double(buf, NULL, NULL);
1117
0
}
1118
1119
/* allocate the reflist index for a new object. Return -1 on failure */
1120
static Py_ssize_t
1121
r_ref_reserve(int flag, RFILE *p)
1122
179k
{
1123
179k
    if (flag) { /* currently only FLAG_REF is defined */
1124
6.59k
        Py_ssize_t idx = PyList_GET_SIZE(p->refs);
1125
6.59k
        if (idx >= 0x7ffffffe) {
1126
0
            PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
1127
0
            return -1;
1128
0
        }
1129
6.59k
        if (PyList_Append(p->refs, Py_None) < 0)
1130
0
            return -1;
1131
6.59k
        return idx;
1132
6.59k
    } else
1133
173k
        return 0;
1134
179k
}
1135
1136
/* insert the new object 'o' to the reflist at previously
1137
 * allocated index 'idx'.
1138
 * 'o' can be NULL, in which case nothing is done.
1139
 * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
1140
 * if 'o' was non-NULL, and the function fails, 'o' is released and
1141
 * NULL returned. This simplifies error checking at the call site since
1142
 * a single test for NULL for the function result is enough.
1143
 */
1144
static PyObject *
1145
r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
1146
179k
{
1147
179k
    if (o != NULL && flag) { /* currently only FLAG_REF is defined */
1148
6.59k
        PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
1149
6.59k
        PyList_SET_ITEM(p->refs, idx, Py_NewRef(o));
1150
6.59k
        Py_DECREF(tmp);
1151
6.59k
    }
1152
179k
    return o;
1153
179k
}
1154
1155
/* combination of both above, used when an object can be
1156
 * created whenever it is seen in the file, as opposed to
1157
 * after having loaded its sub-objects.
1158
 */
1159
static PyObject *
1160
r_ref(PyObject *o, int flag, RFILE *p)
1161
1.43M
{
1162
1.43M
    assert(flag & FLAG_REF);
1163
1.43M
    if (o == NULL)
1164
0
        return NULL;
1165
1.43M
    if (PyList_Append(p->refs, o) < 0) {
1166
0
        Py_DECREF(o); /* release the new object */
1167
0
        return NULL;
1168
0
    }
1169
1.43M
    return o;
1170
1.43M
}
1171
1172
static PyObject *
1173
r_object(RFILE *p)
1174
4.49M
{
1175
    /* NULL is a valid return value, it does not necessarily means that
1176
       an exception is set. */
1177
4.49M
    PyObject *v, *v2;
1178
4.49M
    Py_ssize_t idx = 0;
1179
4.49M
    long i, n;
1180
4.49M
    int type, code = r_byte(p);
1181
4.49M
    int flag, is_interned = 0;
1182
4.49M
    PyObject *retval = NULL;
1183
1184
4.49M
    if (code == EOF) {
1185
0
        if (PyErr_ExceptionMatches(PyExc_EOFError)) {
1186
0
            PyErr_SetString(PyExc_EOFError,
1187
0
                            "EOF read where object expected");
1188
0
        }
1189
0
        return NULL;
1190
0
    }
1191
1192
4.49M
    p->depth++;
1193
1194
4.49M
    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
1195
0
        p->depth--;
1196
0
        PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
1197
0
        return NULL;
1198
0
    }
1199
1200
4.49M
    flag = code & FLAG_REF;
1201
4.49M
    type = code & ~FLAG_REF;
1202
1203
4.49M
#define R_REF(O) do{\
1204
2.37M
    if (flag) \
1205
2.37M
        O = r_ref(O, flag, p);\
1206
2.37M
} while (0)
1207
1208
4.49M
    switch (type) {
1209
1210
0
    case TYPE_NULL:
1211
0
        break;
1212
1213
40.9k
    case TYPE_NONE:
1214
40.9k
        retval = Py_None;
1215
40.9k
        break;
1216
1217
0
    case TYPE_STOPITER:
1218
0
        retval = Py_NewRef(PyExc_StopIteration);
1219
0
        break;
1220
1221
400
    case TYPE_ELLIPSIS:
1222
400
        retval = Py_Ellipsis;
1223
400
        break;
1224
1225
1.94k
    case TYPE_FALSE:
1226
1.94k
        retval = Py_False;
1227
1.94k
        break;
1228
1229
1.72k
    case TYPE_TRUE:
1230
1.72k
        retval = Py_True;
1231
1.72k
        break;
1232
1233
24.8k
    case TYPE_INT:
1234
24.8k
        n = r_long(p);
1235
24.8k
        if (n == -1 && PyErr_Occurred()) {
1236
0
            break;
1237
0
        }
1238
24.8k
        retval = PyLong_FromLong(n);
1239
24.8k
        R_REF(retval);
1240
24.8k
        break;
1241
1242
0
    case TYPE_INT64:
1243
0
        retval = r_long64(p);
1244
0
        R_REF(retval);
1245
0
        break;
1246
1247
442
    case TYPE_LONG:
1248
442
        retval = r_PyLong(p);
1249
442
        R_REF(retval);
1250
442
        break;
1251
1252
0
    case TYPE_FLOAT:
1253
0
        {
1254
0
            double x = r_float_str(p);
1255
0
            if (x == -1.0 && PyErr_Occurred())
1256
0
                break;
1257
0
            retval = PyFloat_FromDouble(x);
1258
0
            R_REF(retval);
1259
0
            break;
1260
0
        }
1261
1262
651
    case TYPE_BINARY_FLOAT:
1263
651
        {
1264
651
            double x = r_float_bin(p);
1265
651
            if (x == -1.0 && PyErr_Occurred())
1266
0
                break;
1267
651
            retval = PyFloat_FromDouble(x);
1268
651
            R_REF(retval);
1269
651
            break;
1270
651
        }
1271
1272
0
    case TYPE_COMPLEX:
1273
0
        {
1274
0
            Py_complex c;
1275
0
            c.real = r_float_str(p);
1276
0
            if (c.real == -1.0 && PyErr_Occurred())
1277
0
                break;
1278
0
            c.imag = r_float_str(p);
1279
0
            if (c.imag == -1.0 && PyErr_Occurred())
1280
0
                break;
1281
0
            retval = PyComplex_FromCComplex(c);
1282
0
            R_REF(retval);
1283
0
            break;
1284
0
        }
1285
1286
3
    case TYPE_BINARY_COMPLEX:
1287
3
        {
1288
3
            Py_complex c;
1289
3
            c.real = r_float_bin(p);
1290
3
            if (c.real == -1.0 && PyErr_Occurred())
1291
0
                break;
1292
3
            c.imag = r_float_bin(p);
1293
3
            if (c.imag == -1.0 && PyErr_Occurred())
1294
0
                break;
1295
3
            retval = PyComplex_FromCComplex(c);
1296
3
            R_REF(retval);
1297
3
            break;
1298
3
        }
1299
1300
530k
    case TYPE_STRING:
1301
530k
        {
1302
530k
            const char *ptr;
1303
530k
            n = r_long(p);
1304
530k
            if (n < 0 || n > SIZE32_MAX) {
1305
0
                if (!PyErr_Occurred()) {
1306
0
                    PyErr_SetString(PyExc_ValueError,
1307
0
                        "bad marshal data (bytes object size out of range)");
1308
0
                }
1309
0
                break;
1310
0
            }
1311
530k
            v = PyBytes_FromStringAndSize((char *)NULL, n);
1312
530k
            if (v == NULL)
1313
0
                break;
1314
530k
            ptr = r_string(n, p);
1315
530k
            if (ptr == NULL) {
1316
0
                Py_DECREF(v);
1317
0
                break;
1318
0
            }
1319
530k
            memcpy(PyBytes_AS_STRING(v), ptr, n);
1320
530k
            retval = v;
1321
530k
            R_REF(retval);
1322
530k
            break;
1323
530k
        }
1324
1325
0
    case TYPE_ASCII_INTERNED:
1326
0
        is_interned = 1;
1327
0
        _Py_FALLTHROUGH;
1328
27.2k
    case TYPE_ASCII:
1329
27.2k
        n = r_long(p);
1330
27.2k
        if (n < 0 || n > SIZE32_MAX) {
1331
0
            if (!PyErr_Occurred()) {
1332
0
                PyErr_SetString(PyExc_ValueError,
1333
0
                    "bad marshal data (string size out of range)");
1334
0
            }
1335
0
            break;
1336
0
        }
1337
27.2k
        goto _read_ascii;
1338
1339
1.15M
    case TYPE_SHORT_ASCII_INTERNED:
1340
1.15M
        is_interned = 1;
1341
1.15M
        _Py_FALLTHROUGH;
1342
1.31M
    case TYPE_SHORT_ASCII:
1343
1.31M
        n = r_byte(p);
1344
1.31M
        if (n == EOF) {
1345
0
            break;
1346
0
        }
1347
1.34M
    _read_ascii:
1348
1.34M
        {
1349
1.34M
            const char *ptr;
1350
1.34M
            ptr = r_string(n, p);
1351
1.34M
            if (ptr == NULL)
1352
0
                break;
1353
1.34M
            v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1354
1.34M
            if (v == NULL)
1355
0
                break;
1356
1.34M
            if (is_interned) {
1357
                // marshal is meant to serialize .pyc files with code
1358
                // objects, and code-related strings are currently immortal.
1359
1.15M
                PyInterpreterState *interp = _PyInterpreterState_GET();
1360
1.15M
                _PyUnicode_InternImmortal(interp, &v);
1361
1.15M
            }
1362
1.34M
            retval = v;
1363
1.34M
            R_REF(retval);
1364
1.34M
            break;
1365
1.34M
        }
1366
1367
206
    case TYPE_INTERNED:
1368
206
        is_interned = 1;
1369
206
        _Py_FALLTHROUGH;
1370
3.58k
    case TYPE_UNICODE:
1371
3.58k
        {
1372
3.58k
        const char *buffer;
1373
1374
3.58k
        n = r_long(p);
1375
3.58k
        if (n < 0 || n > SIZE32_MAX) {
1376
0
            if (!PyErr_Occurred()) {
1377
0
                PyErr_SetString(PyExc_ValueError,
1378
0
                    "bad marshal data (string size out of range)");
1379
0
            }
1380
0
            break;
1381
0
        }
1382
3.58k
        if (n != 0) {
1383
3.58k
            buffer = r_string(n, p);
1384
3.58k
            if (buffer == NULL)
1385
0
                break;
1386
3.58k
            v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1387
3.58k
        }
1388
0
        else {
1389
0
            v = Py_GetConstant(Py_CONSTANT_EMPTY_STR);
1390
0
        }
1391
3.58k
        if (v == NULL)
1392
0
            break;
1393
3.58k
        if (is_interned) {
1394
            // marshal is meant to serialize .pyc files with code
1395
            // objects, and code-related strings are currently immortal.
1396
206
            PyInterpreterState *interp = _PyInterpreterState_GET();
1397
206
            _PyUnicode_InternImmortal(interp, &v);
1398
206
        }
1399
3.58k
        retval = v;
1400
3.58k
        R_REF(retval);
1401
3.58k
        break;
1402
3.58k
        }
1403
1404
469k
    case TYPE_SMALL_TUPLE:
1405
469k
        n = r_byte(p);
1406
469k
        if (n == EOF) {
1407
0
            break;
1408
0
        }
1409
469k
        goto _read_tuple;
1410
469k
    case TYPE_TUPLE:
1411
71
        n = r_long(p);
1412
71
        if (n < 0 || n > SIZE32_MAX) {
1413
0
            if (!PyErr_Occurred()) {
1414
0
                PyErr_SetString(PyExc_ValueError,
1415
0
                    "bad marshal data (tuple size out of range)");
1416
0
            }
1417
0
            break;
1418
0
        }
1419
469k
    _read_tuple:
1420
469k
        v = PyTuple_New(n);
1421
469k
        R_REF(v);
1422
469k
        if (v == NULL)
1423
0
            break;
1424
1425
3.18M
        for (i = 0; i < n; i++) {
1426
2.71M
            v2 = r_object(p);
1427
2.71M
            if ( v2 == NULL ) {
1428
0
                if (!PyErr_Occurred())
1429
0
                    PyErr_SetString(PyExc_TypeError,
1430
0
                        "NULL object in marshal data for tuple");
1431
0
                Py_SETREF(v, NULL);
1432
0
                break;
1433
0
            }
1434
2.71M
            PyTuple_SET_ITEM(v, i, v2);
1435
2.71M
        }
1436
469k
        retval = v;
1437
469k
        break;
1438
1439
0
    case TYPE_LIST:
1440
0
        n = r_long(p);
1441
0
        if (n < 0 || n > SIZE32_MAX) {
1442
0
            if (!PyErr_Occurred()) {
1443
0
                PyErr_SetString(PyExc_ValueError,
1444
0
                    "bad marshal data (list size out of range)");
1445
0
            }
1446
0
            break;
1447
0
        }
1448
0
        v = PyList_New(n);
1449
0
        R_REF(v);
1450
0
        if (v == NULL)
1451
0
            break;
1452
0
        for (i = 0; i < n; i++) {
1453
0
            v2 = r_object(p);
1454
0
            if ( v2 == NULL ) {
1455
0
                if (!PyErr_Occurred())
1456
0
                    PyErr_SetString(PyExc_TypeError,
1457
0
                        "NULL object in marshal data for list");
1458
0
                Py_SETREF(v, NULL);
1459
0
                break;
1460
0
            }
1461
0
            PyList_SET_ITEM(v, i, v2);
1462
0
        }
1463
0
        retval = v;
1464
0
        break;
1465
1466
0
    case TYPE_DICT:
1467
0
    case TYPE_FROZENDICT:
1468
0
        v = PyDict_New();
1469
0
        if (v == NULL) {
1470
0
            break;
1471
0
        }
1472
0
        if (type == TYPE_DICT) {
1473
0
            R_REF(v);
1474
0
        }
1475
0
        else {
1476
0
            idx = r_ref_reserve(flag, p);
1477
0
            if (idx < 0) {
1478
0
                Py_CLEAR(v);
1479
0
                break;
1480
0
            }
1481
0
        }
1482
0
        for (;;) {
1483
0
            PyObject *key, *val;
1484
0
            key = r_object(p);
1485
0
            if (key == NULL)
1486
0
                break;
1487
0
            val = r_object(p);
1488
0
            if (val == NULL) {
1489
0
                Py_DECREF(key);
1490
0
                break;
1491
0
            }
1492
0
            if (PyDict_SetItem(v, key, val) < 0) {
1493
0
                Py_DECREF(key);
1494
0
                Py_DECREF(val);
1495
0
                break;
1496
0
            }
1497
0
            Py_DECREF(key);
1498
0
            Py_DECREF(val);
1499
0
        }
1500
0
        if (PyErr_Occurred()) {
1501
0
            Py_CLEAR(v);
1502
0
        }
1503
0
        if (type == TYPE_FROZENDICT && v != NULL) {
1504
0
            Py_SETREF(v, PyFrozenDict_New(v));
1505
0
        }
1506
0
        retval = v;
1507
0
        break;
1508
1509
0
    case TYPE_SET:
1510
512
    case TYPE_FROZENSET:
1511
512
        n = r_long(p);
1512
512
        if (n < 0 || n > SIZE32_MAX) {
1513
0
            if (!PyErr_Occurred()) {
1514
0
                PyErr_SetString(PyExc_ValueError,
1515
0
                    "bad marshal data (set size out of range)");
1516
0
            }
1517
0
            break;
1518
0
        }
1519
1520
512
        if (n == 0 && type == TYPE_FROZENSET) {
1521
            /* call frozenset() to get the empty frozenset singleton */
1522
0
            v = _PyObject_CallNoArgs((PyObject*)&PyFrozenSet_Type);
1523
0
            if (v == NULL)
1524
0
                break;
1525
0
            R_REF(v);
1526
0
            retval = v;
1527
0
        }
1528
512
        else {
1529
512
            v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1530
512
            if (type == TYPE_SET) {
1531
0
                R_REF(v);
1532
512
            } else {
1533
                /* must use delayed registration of frozensets because they must
1534
                 * be init with a refcount of 1
1535
                 */
1536
512
                idx = r_ref_reserve(flag, p);
1537
512
                if (idx < 0)
1538
0
                    Py_CLEAR(v); /* signal error */
1539
512
            }
1540
512
            if (v == NULL)
1541
0
                break;
1542
1543
2.64k
            for (i = 0; i < n; i++) {
1544
2.13k
                v2 = r_object(p);
1545
2.13k
                if ( v2 == NULL ) {
1546
0
                    if (!PyErr_Occurred())
1547
0
                        PyErr_SetString(PyExc_TypeError,
1548
0
                            "NULL object in marshal data for set");
1549
0
                    Py_SETREF(v, NULL);
1550
0
                    break;
1551
0
                }
1552
2.13k
                if (PySet_Add(v, v2) == -1) {
1553
0
                    Py_DECREF(v);
1554
0
                    Py_DECREF(v2);
1555
0
                    v = NULL;
1556
0
                    break;
1557
0
                }
1558
2.13k
                Py_DECREF(v2);
1559
2.13k
            }
1560
512
            if (type != TYPE_SET)
1561
512
                v = r_ref_insert(v, idx, flag, p);
1562
512
            retval = v;
1563
512
        }
1564
512
        break;
1565
1566
175k
    case TYPE_CODE:
1567
175k
        {
1568
175k
            int argcount;
1569
175k
            int posonlyargcount;
1570
175k
            int kwonlyargcount;
1571
175k
            int stacksize;
1572
175k
            int flags;
1573
175k
            PyObject *code = NULL;
1574
175k
            PyObject *consts = NULL;
1575
175k
            PyObject *names = NULL;
1576
175k
            PyObject *localsplusnames = NULL;
1577
175k
            PyObject *localspluskinds = NULL;
1578
175k
            PyObject *filename = NULL;
1579
175k
            PyObject *name = NULL;
1580
175k
            PyObject *qualname = NULL;
1581
175k
            int firstlineno;
1582
175k
            PyObject* linetable = NULL;
1583
175k
            PyObject *exceptiontable = NULL;
1584
1585
175k
            if (!p->allow_code) {
1586
0
                PyErr_SetString(PyExc_ValueError,
1587
0
                                "unmarshalling code objects is disallowed");
1588
0
                break;
1589
0
            }
1590
175k
            idx = r_ref_reserve(flag, p);
1591
175k
            if (idx < 0)
1592
0
                break;
1593
1594
175k
            v = NULL;
1595
1596
            /* XXX ignore long->int overflows for now */
1597
175k
            argcount = (int)r_long(p);
1598
175k
            if (argcount == -1 && PyErr_Occurred())
1599
0
                goto code_error;
1600
175k
            posonlyargcount = (int)r_long(p);
1601
175k
            if (posonlyargcount == -1 && PyErr_Occurred()) {
1602
0
                goto code_error;
1603
0
            }
1604
175k
            kwonlyargcount = (int)r_long(p);
1605
175k
            if (kwonlyargcount == -1 && PyErr_Occurred())
1606
0
                goto code_error;
1607
175k
            stacksize = (int)r_long(p);
1608
175k
            if (stacksize == -1 && PyErr_Occurred())
1609
0
                goto code_error;
1610
175k
            flags = (int)r_long(p);
1611
175k
            if (flags == -1 && PyErr_Occurred())
1612
0
                goto code_error;
1613
175k
            code = r_object(p);
1614
175k
            if (code == NULL)
1615
0
                goto code_error;
1616
175k
            consts = r_object(p);
1617
175k
            if (consts == NULL)
1618
0
                goto code_error;
1619
175k
            names = r_object(p);
1620
175k
            if (names == NULL)
1621
0
                goto code_error;
1622
175k
            localsplusnames = r_object(p);
1623
175k
            if (localsplusnames == NULL)
1624
0
                goto code_error;
1625
175k
            localspluskinds = r_object(p);
1626
175k
            if (localspluskinds == NULL)
1627
0
                goto code_error;
1628
175k
            filename = r_object(p);
1629
175k
            if (filename == NULL)
1630
0
                goto code_error;
1631
175k
            name = r_object(p);
1632
175k
            if (name == NULL)
1633
0
                goto code_error;
1634
175k
            qualname = r_object(p);
1635
175k
            if (qualname == NULL)
1636
0
                goto code_error;
1637
175k
            firstlineno = (int)r_long(p);
1638
175k
            if (firstlineno == -1 && PyErr_Occurred())
1639
0
                goto code_error;
1640
175k
            linetable = r_object(p);
1641
175k
            if (linetable == NULL)
1642
0
                goto code_error;
1643
175k
            exceptiontable = r_object(p);
1644
175k
            if (exceptiontable == NULL)
1645
0
                goto code_error;
1646
1647
175k
            struct _PyCodeConstructor con = {
1648
175k
                .filename = filename,
1649
175k
                .name = name,
1650
175k
                .qualname = qualname,
1651
175k
                .flags = flags,
1652
1653
175k
                .code = code,
1654
175k
                .firstlineno = firstlineno,
1655
175k
                .linetable = linetable,
1656
1657
175k
                .consts = consts,
1658
175k
                .names = names,
1659
1660
175k
                .localsplusnames = localsplusnames,
1661
175k
                .localspluskinds = localspluskinds,
1662
1663
175k
                .argcount = argcount,
1664
175k
                .posonlyargcount = posonlyargcount,
1665
175k
                .kwonlyargcount = kwonlyargcount,
1666
1667
175k
                .stacksize = stacksize,
1668
1669
175k
                .exceptiontable = exceptiontable,
1670
175k
            };
1671
1672
175k
            if (_PyCode_Validate(&con) < 0) {
1673
0
                goto code_error;
1674
0
            }
1675
1676
175k
            v = (PyObject *)_PyCode_New(&con);
1677
175k
            if (v == NULL) {
1678
0
                goto code_error;
1679
0
            }
1680
1681
175k
            v = r_ref_insert(v, idx, flag, p);
1682
1683
175k
          code_error:
1684
175k
            if (v == NULL && !PyErr_Occurred()) {
1685
0
                PyErr_SetString(PyExc_TypeError,
1686
0
                    "NULL object in marshal data for code object");
1687
0
            }
1688
175k
            Py_XDECREF(code);
1689
175k
            Py_XDECREF(consts);
1690
175k
            Py_XDECREF(names);
1691
175k
            Py_XDECREF(localsplusnames);
1692
175k
            Py_XDECREF(localspluskinds);
1693
175k
            Py_XDECREF(filename);
1694
175k
            Py_XDECREF(name);
1695
175k
            Py_XDECREF(qualname);
1696
175k
            Py_XDECREF(linetable);
1697
175k
            Py_XDECREF(exceptiontable);
1698
175k
        }
1699
0
        retval = v;
1700
175k
        break;
1701
1702
1.89M
    case TYPE_REF:
1703
1.89M
        n = r_long(p);
1704
1.89M
        if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1705
0
            if (!PyErr_Occurred()) {
1706
0
                PyErr_SetString(PyExc_ValueError,
1707
0
                    "bad marshal data (invalid reference)");
1708
0
            }
1709
0
            break;
1710
0
        }
1711
1.89M
        v = PyList_GET_ITEM(p->refs, n);
1712
1.89M
        if (v == Py_None) {
1713
0
            PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1714
0
            break;
1715
0
        }
1716
1.89M
        retval = Py_NewRef(v);
1717
1.89M
        break;
1718
1719
3.32k
    case TYPE_SLICE:
1720
3.32k
    {
1721
3.32k
        Py_ssize_t idx = r_ref_reserve(flag, p);
1722
3.32k
        if (idx < 0) {
1723
0
            break;
1724
0
        }
1725
3.32k
        PyObject *stop = NULL;
1726
3.32k
        PyObject *step = NULL;
1727
3.32k
        PyObject *start = r_object(p);
1728
3.32k
        if (start == NULL) {
1729
0
            goto cleanup;
1730
0
        }
1731
3.32k
        stop = r_object(p);
1732
3.32k
        if (stop == NULL) {
1733
0
            goto cleanup;
1734
0
        }
1735
3.32k
        step = r_object(p);
1736
3.32k
        if (step == NULL) {
1737
0
            goto cleanup;
1738
0
        }
1739
3.32k
        retval = PySlice_New(start, stop, step);
1740
3.32k
        r_ref_insert(retval, idx, flag, p);
1741
3.32k
    cleanup:
1742
3.32k
        Py_XDECREF(start);
1743
3.32k
        Py_XDECREF(stop);
1744
3.32k
        Py_XDECREF(step);
1745
3.32k
        break;
1746
3.32k
    }
1747
1748
0
    default:
1749
        /* Bogus data got written, which isn't ideal.
1750
           This will let you keep working and recover. */
1751
0
        PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1752
0
        break;
1753
1754
4.49M
    }
1755
4.49M
    p->depth--;
1756
4.49M
    return retval;
1757
4.49M
}
1758
1759
static PyObject *
1760
read_object(RFILE *p)
1761
6.83k
{
1762
6.83k
    PyObject *v;
1763
6.83k
    if (PyErr_Occurred()) {
1764
0
        fprintf(stderr, "XXX readobject called with exception set\n");
1765
0
        return NULL;
1766
0
    }
1767
6.83k
    if (p->ptr && p->end) {
1768
6.83k
        if (PySys_Audit("marshal.loads", "y#", p->ptr, (Py_ssize_t)(p->end - p->ptr)) < 0) {
1769
0
            return NULL;
1770
0
        }
1771
6.83k
    } else if (p->fp || p->readable) {
1772
0
        if (PySys_Audit("marshal.load", NULL) < 0) {
1773
0
            return NULL;
1774
0
        }
1775
0
    }
1776
6.83k
    v = r_object(p);
1777
6.83k
    if (v == NULL && !PyErr_Occurred())
1778
0
        PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1779
6.83k
    return v;
1780
6.83k
}
1781
1782
int
1783
PyMarshal_ReadShortFromFile(FILE *fp)
1784
0
{
1785
0
    RFILE rf;
1786
0
    int res;
1787
0
    assert(fp);
1788
0
    rf.readable = NULL;
1789
0
    rf.fp = fp;
1790
0
    rf.end = rf.ptr = NULL;
1791
0
    rf.buf = NULL;
1792
0
    res = r_short(&rf);
1793
0
    if (rf.buf != NULL)
1794
0
        PyMem_Free(rf.buf);
1795
0
    return res;
1796
0
}
1797
1798
long
1799
PyMarshal_ReadLongFromFile(FILE *fp)
1800
0
{
1801
0
    RFILE rf;
1802
0
    long res;
1803
0
    rf.fp = fp;
1804
0
    rf.readable = NULL;
1805
0
    rf.ptr = rf.end = NULL;
1806
0
    rf.buf = NULL;
1807
0
    res = r_long(&rf);
1808
0
    if (rf.buf != NULL)
1809
0
        PyMem_Free(rf.buf);
1810
0
    return res;
1811
0
}
1812
1813
/* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1814
static off_t
1815
getfilesize(FILE *fp)
1816
0
{
1817
0
    struct _Py_stat_struct st;
1818
0
    if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1819
0
        return -1;
1820
#if SIZEOF_OFF_T == 4
1821
    else if (st.st_size >= INT_MAX)
1822
        return (off_t)INT_MAX;
1823
#endif
1824
0
    else
1825
0
        return (off_t)st.st_size;
1826
0
}
1827
1828
/* If we can get the size of the file up-front, and it's reasonably small,
1829
 * read it in one gulp and delegate to ...FromString() instead.  Much quicker
1830
 * than reading a byte at a time from file; speeds .pyc imports.
1831
 * CAUTION:  since this may read the entire remainder of the file, don't
1832
 * call it unless you know you're done with the file.
1833
 */
1834
PyObject *
1835
PyMarshal_ReadLastObjectFromFile(FILE *fp)
1836
0
{
1837
/* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1838
0
#define REASONABLE_FILE_LIMIT (1L << 18)
1839
0
    off_t filesize;
1840
0
    filesize = getfilesize(fp);
1841
0
    if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1842
0
        char* pBuf = (char *)PyMem_Malloc(filesize);
1843
0
        if (pBuf != NULL) {
1844
0
            size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1845
0
            PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1846
0
            PyMem_Free(pBuf);
1847
0
            return v;
1848
0
        }
1849
1850
0
    }
1851
    /* We don't have fstat, or we do but the file is larger than
1852
     * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1853
     */
1854
0
    return PyMarshal_ReadObjectFromFile(fp);
1855
1856
0
#undef REASONABLE_FILE_LIMIT
1857
0
}
1858
1859
PyObject *
1860
PyMarshal_ReadObjectFromFile(FILE *fp)
1861
0
{
1862
0
    RFILE rf;
1863
0
    PyObject *result;
1864
0
    rf.allow_code = 1;
1865
0
    rf.fp = fp;
1866
0
    rf.readable = NULL;
1867
0
    rf.depth = 0;
1868
0
    rf.ptr = rf.end = NULL;
1869
0
    rf.buf = NULL;
1870
0
    rf.refs = PyList_New(0);
1871
0
    if (rf.refs == NULL)
1872
0
        return NULL;
1873
0
    result = read_object(&rf);
1874
0
    Py_DECREF(rf.refs);
1875
0
    if (rf.buf != NULL)
1876
0
        PyMem_Free(rf.buf);
1877
0
    return result;
1878
0
}
1879
1880
PyObject *
1881
PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1882
648
{
1883
648
    RFILE rf;
1884
648
    PyObject *result;
1885
648
    rf.allow_code = 1;
1886
648
    rf.fp = NULL;
1887
648
    rf.readable = NULL;
1888
648
    rf.ptr = str;
1889
648
    rf.end = str + len;
1890
648
    rf.buf = NULL;
1891
648
    rf.depth = 0;
1892
648
    rf.refs = PyList_New(0);
1893
648
    if (rf.refs == NULL)
1894
0
        return NULL;
1895
648
    result = read_object(&rf);
1896
648
    Py_DECREF(rf.refs);
1897
648
    if (rf.buf != NULL)
1898
0
        PyMem_Free(rf.buf);
1899
648
    return result;
1900
648
}
1901
1902
static PyObject *
1903
_PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code)
1904
246
{
1905
246
    WFILE wf;
1906
1907
246
    if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
1908
0
        return NULL;
1909
0
    }
1910
246
    memset(&wf, 0, sizeof(wf));
1911
246
    wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1912
246
    if (wf.str == NULL)
1913
0
        return NULL;
1914
246
    wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str);
1915
246
    wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str);
1916
246
    wf.error = WFERR_OK;
1917
246
    wf.version = version;
1918
246
    wf.allow_code = allow_code;
1919
246
    if (w_init_refs(&wf, version)) {
1920
0
        Py_DECREF(wf.str);
1921
0
        return NULL;
1922
0
    }
1923
246
    w_object(x, &wf);
1924
246
    w_clear_refs(&wf);
1925
246
    if (wf.str != NULL) {
1926
246
        const char *base = PyBytes_AS_STRING(wf.str);
1927
246
        if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1928
0
            return NULL;
1929
246
    }
1930
246
    if (wf.error != WFERR_OK) {
1931
0
        Py_XDECREF(wf.str);
1932
0
        switch (wf.error) {
1933
0
        case WFERR_NOMEMORY:
1934
0
            PyErr_NoMemory();
1935
0
            break;
1936
0
        case WFERR_NESTEDTOODEEP:
1937
0
            PyErr_SetString(PyExc_ValueError,
1938
0
                            "object too deeply nested to marshal");
1939
0
            break;
1940
0
        case WFERR_CODE_NOT_ALLOWED:
1941
0
            PyErr_SetString(PyExc_ValueError,
1942
0
                            "marshalling code objects is disallowed");
1943
0
            break;
1944
0
        default:
1945
0
        case WFERR_UNMARSHALLABLE:
1946
0
            PyErr_SetString(PyExc_ValueError,
1947
0
                            "unmarshallable object");
1948
0
            break;
1949
0
        }
1950
0
        return NULL;
1951
0
    }
1952
246
    return wf.str;
1953
246
}
1954
1955
PyObject *
1956
PyMarshal_WriteObjectToString(PyObject *x, int version)
1957
0
{
1958
0
    return _PyMarshal_WriteObjectToString(x, version, 1);
1959
0
}
1960
1961
/* And an interface for Python programs... */
1962
/*[clinic input]
1963
marshal.dump
1964
1965
    value: object
1966
        Must be a supported type.
1967
    file: object
1968
        Must be a writeable binary file.
1969
    version: int(c_default="Py_MARSHAL_VERSION") = version
1970
        Indicates the data format that dump should use.
1971
    /
1972
    *
1973
    allow_code: bool = True
1974
        Allow to write code objects.
1975
1976
Write the value on the open file.
1977
1978
If the value has (or contains an object that has) an unsupported type, a
1979
ValueError exception is raised - but garbage data will also be written
1980
to the file. The object will not be properly read back by load().
1981
[clinic start generated code]*/
1982
1983
static PyObject *
1984
marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1985
                  int version, int allow_code)
1986
/*[clinic end generated code: output=429e5fd61c2196b9 input=041f7f6669b0aafb]*/
1987
0
{
1988
    /* XXX Quick hack -- need to do this differently */
1989
0
    PyObject *s;
1990
0
    PyObject *res;
1991
1992
0
    s = _PyMarshal_WriteObjectToString(value, version, allow_code);
1993
0
    if (s == NULL)
1994
0
        return NULL;
1995
0
    res = PyObject_CallMethodOneArg(file, &_Py_ID(write), s);
1996
0
    Py_DECREF(s);
1997
0
    return res;
1998
0
}
1999
2000
/*[clinic input]
2001
marshal.load
2002
2003
    file: object
2004
        Must be readable binary file.
2005
    /
2006
    *
2007
    allow_code: bool = True
2008
        Allow to load code objects.
2009
2010
Read one value from the open file and return it.
2011
2012
If no valid value is read (e.g. because the data has a different Python
2013
version's incompatible marshal format), raise EOFError, ValueError or
2014
TypeError.
2015
2016
Note: If an object containing an unsupported type was marshalled with
2017
dump(), load() will substitute None for the unmarshallable type.
2018
[clinic start generated code]*/
2019
2020
static PyObject *
2021
marshal_load_impl(PyObject *module, PyObject *file, int allow_code)
2022
/*[clinic end generated code: output=0c1aaf3546ae3ed3 input=2dca7b570653b82f]*/
2023
0
{
2024
0
    PyObject *data, *result;
2025
0
    RFILE rf;
2026
2027
    /*
2028
     * Make a call to the read method, but read zero bytes.
2029
     * This is to ensure that the object passed in at least
2030
     * has a read method which returns bytes.
2031
     * This can be removed if we guarantee good error handling
2032
     * for r_string()
2033
     */
2034
0
    data = _PyObject_CallMethod(file, &_Py_ID(read), "i", 0);
2035
0
    if (data == NULL)
2036
0
        return NULL;
2037
0
    if (!PyBytes_Check(data)) {
2038
0
        PyErr_Format(PyExc_TypeError,
2039
0
                     "file.read() returned not bytes but %.100s",
2040
0
                     Py_TYPE(data)->tp_name);
2041
0
        result = NULL;
2042
0
    }
2043
0
    else {
2044
0
        rf.allow_code = allow_code;
2045
0
        rf.depth = 0;
2046
0
        rf.fp = NULL;
2047
0
        rf.readable = file;
2048
0
        rf.ptr = rf.end = NULL;
2049
0
        rf.buf = NULL;
2050
0
        if ((rf.refs = PyList_New(0)) != NULL) {
2051
0
            result = read_object(&rf);
2052
0
            Py_DECREF(rf.refs);
2053
0
            if (rf.buf != NULL)
2054
0
                PyMem_Free(rf.buf);
2055
0
        } else
2056
0
            result = NULL;
2057
0
    }
2058
0
    Py_DECREF(data);
2059
0
    return result;
2060
0
}
2061
2062
/*[clinic input]
2063
@permit_long_summary
2064
marshal.dumps
2065
2066
    value: object
2067
        Must be a supported type.
2068
    version: int(c_default="Py_MARSHAL_VERSION") = version
2069
        Indicates the data format that dumps should use.
2070
    /
2071
    *
2072
    allow_code: bool = True
2073
        Allow to write code objects.
2074
2075
Return the bytes object that would be written to a file by dump(value, file).
2076
2077
Raise a ValueError exception if value has (or contains an object that
2078
has) an unsupported type.
2079
[clinic start generated code]*/
2080
2081
static PyObject *
2082
marshal_dumps_impl(PyObject *module, PyObject *value, int version,
2083
                   int allow_code)
2084
/*[clinic end generated code: output=115f90da518d1d49 input=dc1edcafd43124c5]*/
2085
173
{
2086
173
    return _PyMarshal_WriteObjectToString(value, version, allow_code);
2087
173
}
2088
2089
/*[clinic input]
2090
marshal.loads
2091
2092
    bytes: Py_buffer
2093
    /
2094
    *
2095
    allow_code: bool = True
2096
        Allow to load code objects.
2097
2098
Convert the bytes-like object to a value.
2099
2100
If no valid value is found, raise EOFError, ValueError or TypeError.
2101
Extra bytes in the input are ignored.
2102
[clinic start generated code]*/
2103
2104
static PyObject *
2105
marshal_loads_impl(PyObject *module, Py_buffer *bytes, int allow_code)
2106
/*[clinic end generated code: output=62c0c538d3edc31f input=286f1dbd6811d2ad]*/
2107
6.18k
{
2108
6.18k
    RFILE rf;
2109
6.18k
    char *s = bytes->buf;
2110
6.18k
    Py_ssize_t n = bytes->len;
2111
6.18k
    PyObject* result;
2112
6.18k
    rf.allow_code = allow_code;
2113
6.18k
    rf.fp = NULL;
2114
6.18k
    rf.readable = NULL;
2115
6.18k
    rf.ptr = s;
2116
6.18k
    rf.end = s + n;
2117
6.18k
    rf.depth = 0;
2118
6.18k
    if ((rf.refs = PyList_New(0)) == NULL)
2119
0
        return NULL;
2120
6.18k
    result = read_object(&rf);
2121
6.18k
    Py_DECREF(rf.refs);
2122
6.18k
    return result;
2123
6.18k
}
2124
2125
static PyMethodDef marshal_methods[] = {
2126
    MARSHAL_DUMP_METHODDEF
2127
    MARSHAL_LOAD_METHODDEF
2128
    MARSHAL_DUMPS_METHODDEF
2129
    MARSHAL_LOADS_METHODDEF
2130
    {NULL,              NULL}           /* sentinel */
2131
};
2132
2133
2134
PyDoc_STRVAR(module_doc,
2135
"This module contains functions that can read and write Python values in\n\
2136
a binary format. The format is specific to Python, but independent of\n\
2137
machine architecture issues.\n\
2138
\n\
2139
Not all Python object types are supported; in general, only objects\n\
2140
whose value is independent from a particular invocation of Python can be\n\
2141
written and read by this module. The following types are supported:\n\
2142
None, integers, floating-point numbers, strings, bytes, bytearrays,\n\
2143
tuples, lists, sets, dictionaries, and code objects, where it\n\
2144
should be understood that tuples, lists and dictionaries are only\n\
2145
supported as long as the values contained therein are themselves\n\
2146
supported; and recursive lists and dictionaries should not be written\n\
2147
(they will cause infinite loops).\n\
2148
\n\
2149
Variables:\n\
2150
\n\
2151
version -- indicates the format that the module uses. Version 0 is the\n\
2152
    historical format, version 1 shares interned strings and version 2\n\
2153
    uses a binary format for floating-point numbers.\n\
2154
    Version 3 shares common object references (New in version 3.4).\n\
2155
\n\
2156
Functions:\n\
2157
\n\
2158
dump() -- write value to a file\n\
2159
load() -- read value from a file\n\
2160
dumps() -- marshal value as a bytes object\n\
2161
loads() -- read value from a bytes-like object");
2162
2163
2164
static int
2165
marshal_module_exec(PyObject *mod)
2166
37
{
2167
37
    if (PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION) < 0) {
2168
0
        return -1;
2169
0
    }
2170
37
    return 0;
2171
37
}
2172
2173
static PyModuleDef_Slot marshalmodule_slots[] = {
2174
     _Py_ABI_SLOT,
2175
    {Py_mod_exec, marshal_module_exec},
2176
    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
2177
    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
2178
    {0, NULL}
2179
};
2180
2181
static struct PyModuleDef marshalmodule = {
2182
    PyModuleDef_HEAD_INIT,
2183
    .m_name = "marshal",
2184
    .m_doc = module_doc,
2185
    .m_methods = marshal_methods,
2186
    .m_slots = marshalmodule_slots,
2187
};
2188
2189
PyMODINIT_FUNC
2190
PyMarshal_Init(void)
2191
37
{
2192
37
    return PyModuleDef_Init(&marshalmodule);
2193
37
}