Coverage Report

Created: 2026-04-12 06:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/marshal.c
Line
Count
Source
1
2
/* Write Python objects to files and read them back.
3
   This is primarily intended for writing and reading compiled Python code,
4
   even though dicts, lists, sets and frozensets, not commonly seen in
5
   code objects, are supported.
6
   Version 3 of this protocol properly supports circular links
7
   and sharing. */
8
9
#include "Python.h"
10
#include "pycore_call.h"             // _PyObject_CallNoArgs()
11
#include "pycore_code.h"             // _PyCode_New()
12
#include "pycore_hashtable.h"        // _Py_hashtable_t
13
#include "pycore_long.h"             // _PyLong_IsZero()
14
#include "pycore_object.h"           // _PyObject_IsUniquelyReferenced
15
#include "pycore_pystate.h"          // _PyInterpreterState_GET()
16
#include "pycore_setobject.h"        // _PySet_NextEntryRef()
17
#include "pycore_tuple.h"            // _PyTuple_FromPairSteal
18
#include "pycore_unicodeobject.h"    // _PyUnicode_InternImmortal()
19
20
#include "marshal.h"                 // Py_MARSHAL_VERSION
21
22
#ifdef __APPLE__
23
#  include "TargetConditionals.h"
24
#endif /* __APPLE__ */
25
26
27
/*[clinic input]
28
module marshal
29
[clinic start generated code]*/
30
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
31
32
#include "clinic/marshal.c.h"
33
34
/* High water mark to determine when the marshalled object is dangerously deep
35
 * and risks coring the interpreter.  When the object stack gets this deep,
36
 * raise an exception instead of continuing.
37
 * On Windows debug builds, reduce this value.
38
 *
39
 * BUG: https://bugs.python.org/issue33720
40
 * On Windows PGO builds, the r_object function overallocates its stack and
41
 * can cause a stack overflow. We reduce the maximum depth for all Windows
42
 * releases to protect against this.
43
 * #if defined(MS_WINDOWS) && defined(Py_DEBUG)
44
 */
45
#if defined(MS_WINDOWS)
46
#  define MAX_MARSHAL_STACK_DEPTH 1000
47
#elif defined(__wasi__)
48
#  define MAX_MARSHAL_STACK_DEPTH 1500
49
// TARGET_OS_IPHONE covers any non-macOS Apple platform.
50
// It won't be defined on older macOS SDKs
51
#elif defined(__APPLE__) && defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
52
#  define MAX_MARSHAL_STACK_DEPTH 1500
53
#else
54
4.54M
#  define MAX_MARSHAL_STACK_DEPTH 2000
55
#endif
56
57
/* Supported types */
58
0
#define TYPE_NULL               '0'
59
95.1k
#define TYPE_NONE               'N'
60
12.3k
#define TYPE_FALSE              'F'
61
11.5k
#define TYPE_TRUE               'T'
62
0
#define TYPE_STOPITER           'S'
63
412
#define TYPE_ELLIPSIS           '.'
64
631
#define TYPE_BINARY_FLOAT       'g'  // Version 0 uses TYPE_FLOAT instead.
65
3
#define TYPE_BINARY_COMPLEX     'y'  // Version 0 uses TYPE_COMPLEX instead.
66
435
#define TYPE_LONG               'l'  // See also TYPE_INT.
67
514k
#define TYPE_STRING             's'  // Bytes. (Name comes from Python 2.)
68
74
#define TYPE_TUPLE              '('  // See also TYPE_SMALL_TUPLE.
69
0
#define TYPE_LIST               '['
70
0
#define TYPE_DICT               '{'
71
0
#define TYPE_FROZENDICT         '}'
72
170k
#define TYPE_CODE               'c'
73
3.55k
#define TYPE_UNICODE            'u'
74
#define TYPE_UNKNOWN            '?'
75
// added in version 2:
76
1.51k
#define TYPE_SET                '<'
77
505
#define TYPE_FROZENSET          '>'
78
// added in version 5:
79
3.34k
#define TYPE_SLICE              ':'
80
// Remember to update the version and documentation when adding new types.
81
82
/* Special cases for unicode strings (added in version 4) */
83
206
#define TYPE_INTERNED           't' // Version 1+
84
25.8k
#define TYPE_ASCII              'a'
85
0
#define TYPE_ASCII_INTERNED     'A'
86
1.25M
#define TYPE_SHORT_ASCII        'z'
87
1.09M
#define TYPE_SHORT_ASCII_INTERNED 'Z'
88
89
/* Special cases for small objects */
90
24.8k
#define TYPE_INT                'i'  // All versions. 32-bit encoding.
91
458k
#define TYPE_SMALL_TUPLE        ')'  // Version 4+
92
93
/* Supported for backwards compatibility */
94
0
#define TYPE_COMPLEX            'x'  // Generated for version 0 only.
95
0
#define TYPE_FLOAT              'f'  // Generated for version 0 only.
96
0
#define TYPE_INT64              'I'  // Not generated any more.
97
98
/* References (added in version 3) */
99
1.88M
#define TYPE_REF                'r'
100
8.94M
#define FLAG_REF                '\x80' /* with a type, add obj to index */
101
102
103
// Error codes:
104
79.5k
#define WFERR_OK 0
105
22
#define WFERR_UNMARSHALLABLE 1
106
0
#define WFERR_NESTEDTOODEEP 2
107
11
#define WFERR_NOMEMORY 3
108
0
#define WFERR_CODE_NOT_ALLOWED 4
109
110
typedef struct {
111
    FILE *fp;
112
    int error;  /* see WFERR_* values */
113
    int depth;
114
    PyObject *str;
115
    char *ptr;
116
    const char *end;
117
    char *buf;
118
    _Py_hashtable_t *hashtable;
119
    int version;
120
    int allow_code;
121
} WFILE;
122
123
378k
#define w_byte(c, p) do {                               \
124
378k
        if ((p)->ptr != (p)->end || w_reserve((p), 1))  \
125
378k
            *(p)->ptr++ = (c);                          \
126
378k
    } while(0)
127
128
static void
129
w_flush(WFILE *p)
130
0
{
131
0
    assert(p->fp != NULL);
132
0
    fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
133
0
    p->ptr = p->buf;
134
0
}
135
136
static int
137
w_reserve(WFILE *p, Py_ssize_t needed)
138
543
{
139
543
    Py_ssize_t pos, size, delta;
140
543
    if (p->ptr == NULL)
141
0
        return 0; /* An error already occurred */
142
543
    if (p->fp != NULL) {
143
0
        w_flush(p);
144
0
        return needed <= p->end - p->ptr;
145
0
    }
146
543
    assert(p->str != NULL);
147
543
    pos = p->ptr - p->buf;
148
543
    size = PyBytes_GET_SIZE(p->str);
149
543
    if (size > 16*1024*1024)
150
0
        delta = (size >> 3);            /* 12.5% overallocation */
151
543
    else
152
543
        delta = size + 1024;
153
543
    delta = Py_MAX(delta, needed);
154
543
    if (delta > PY_SSIZE_T_MAX - size) {
155
0
        p->error = WFERR_NOMEMORY;
156
0
        return 0;
157
0
    }
158
543
    size += delta;
159
543
    if (_PyBytes_Resize(&p->str, size) != 0) {
160
0
        p->end = p->ptr = p->buf = NULL;
161
0
        return 0;
162
0
    }
163
543
    else {
164
543
        p->buf = PyBytes_AS_STRING(p->str);
165
543
        p->ptr = p->buf + pos;
166
543
        p->end = p->buf + size;
167
543
        return 1;
168
543
    }
169
543
}
170
171
static void
172
w_string(const void *s, Py_ssize_t n, WFILE *p)
173
26.2k
{
174
26.2k
    Py_ssize_t m;
175
26.2k
    if (!n || p->ptr == NULL)
176
210
        return;
177
26.0k
    m = p->end - p->ptr;
178
26.0k
    if (p->fp != NULL) {
179
0
        if (n <= m) {
180
0
            memcpy(p->ptr, s, n);
181
0
            p->ptr += n;
182
0
        }
183
0
        else {
184
0
            w_flush(p);
185
0
            fwrite(s, 1, n, p->fp);
186
0
        }
187
0
    }
188
26.0k
    else {
189
26.0k
        if (n <= m || w_reserve(p, n - m)) {
190
26.0k
            memcpy(p->ptr, s, n);
191
26.0k
            p->ptr += n;
192
26.0k
        }
193
26.0k
    }
194
26.0k
}
195
196
static void
197
w_short(int x, WFILE *p)
198
18
{
199
18
    w_byte((char)( x      & 0xff), p);
200
18
    w_byte((char)((x>> 8) & 0xff), p);
201
18
}
202
203
static void
204
w_long(long x, WFILE *p)
205
68.7k
{
206
68.7k
    w_byte((char)( x      & 0xff), p);
207
68.7k
    w_byte((char)((x>> 8) & 0xff), p);
208
68.7k
    w_byte((char)((x>>16) & 0xff), p);
209
68.7k
    w_byte((char)((x>>24) & 0xff), p);
210
68.7k
}
211
212
555k
#define SIZE32_MAX  0x7FFFFFFF
213
214
#if SIZEOF_SIZE_T > 4
215
9.92k
# define W_SIZE(n, p)  do {                     \
216
9.92k
        if ((n) > SIZE32_MAX) {                 \
217
0
            (p)->depth--;                       \
218
0
            (p)->error = WFERR_UNMARSHALLABLE;  \
219
0
            return;                             \
220
0
        }                                       \
221
9.92k
        w_long((long)(n), p);                   \
222
9.92k
    } while(0)
223
#else
224
# define W_SIZE  w_long
225
#endif
226
227
static void
228
w_pstring(const void *s, Py_ssize_t n, WFILE *p)
229
9.90k
{
230
9.90k
        W_SIZE(n, p);
231
9.90k
        w_string(s, n, p);
232
9.90k
}
233
234
static void
235
w_short_pstring(const void *s, Py_ssize_t n, WFILE *p)
236
16.3k
{
237
16.3k
    w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
238
16.3k
    w_string(s, n, p);
239
16.3k
}
240
241
/* We assume that Python ints are stored internally in base some power of
242
   2**15; for the sake of portability we'll always read and write them in base
243
   exactly 2**15. */
244
245
3.82k
#define PyLong_MARSHAL_SHIFT 15
246
1.68k
#define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
247
18
#define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
248
249
39.6k
#define W_TYPE(t, p) do { \
250
39.6k
    w_byte((t) | flag, (p)); \
251
39.6k
} while(0)
252
253
static PyObject *
254
_PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code);
255
256
#define _r_digits(bitsize)                                                \
257
static void                                                               \
258
_r_digits##bitsize(const uint ## bitsize ## _t *digits, Py_ssize_t n,     \
259
2
                   uint8_t negative, Py_ssize_t marshal_ratio, WFILE *p)  \
260
2
{                                                                         \
261
2
    /* set l to number of base PyLong_MARSHAL_BASE digits */              \
262
2
    Py_ssize_t l = (n - 1)*marshal_ratio;                                 \
263
2
    uint ## bitsize ## _t d = digits[n - 1];                              \
264
2
                                                                          \
265
2
    assert(marshal_ratio > 0);                                            \
266
2
    assert(n >= 1);                                                       \
267
2
    assert(d != 0); /* a PyLong is always normalized */                   \
268
2
    do {                                                                  \
269
2
        d >>= PyLong_MARSHAL_SHIFT;                                       \
270
2
        l++;                                                              \
271
2
    } while (d != 0);                                                     \
272
2
    if (l > SIZE32_MAX) {                                                 \
273
0
        p->depth--;                                                       \
274
0
        p->error = WFERR_UNMARSHALLABLE;                                  \
275
0
        return;                                                           \
276
0
    }                                                                     \
277
2
    w_long((long)(negative ? -l : l), p);                                 \
278
2
                                                                          \
279
6
    for (Py_ssize_t i = 0; i < n - 1; i++) {                              \
280
4
        d = digits[i];                                                    \
281
12
        for (Py_ssize_t j = 0; j < marshal_ratio; j++) {                  \
282
8
            w_short(d & PyLong_MARSHAL_MASK, p);                          \
283
8
            d >>= PyLong_MARSHAL_SHIFT;                                   \
284
8
        }                                                                 \
285
4
        assert(d == 0);                                                   \
286
4
    }                                                                     \
287
2
    d = digits[n - 1];                                                    \
288
2
    do {                                                                  \
289
2
        w_short(d & PyLong_MARSHAL_MASK, p);                              \
290
2
        d >>= PyLong_MARSHAL_SHIFT;                                       \
291
2
    } while (d != 0);                                                     \
292
2
}
293
0
_r_digits(16)
294
2
_r_digits(32)
295
#undef _r_digits
296
297
static void
298
w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
299
4
{
300
4
    W_TYPE(TYPE_LONG, p);
301
4
    if (_PyLong_IsZero(ob)) {
302
0
        w_long((long)0, p);
303
0
        return;
304
0
    }
305
306
4
    PyLongExport long_export;
307
308
4
    if (PyLong_Export((PyObject *)ob, &long_export) < 0) {
309
0
        p->depth--;
310
0
        p->error = WFERR_UNMARSHALLABLE;
311
0
        return;
312
0
    }
313
4
    if (!long_export.digits) {
314
2
        int8_t sign = long_export.value < 0 ? -1 : 1;
315
2
        uint64_t abs_value = _Py_ABS_CAST(uint64_t, long_export.value);
316
2
        uint64_t d = abs_value;
317
2
        long l = 0;
318
319
        /* set l to number of base PyLong_MARSHAL_BASE digits */
320
8
        do {
321
8
            d >>= PyLong_MARSHAL_SHIFT;
322
8
            l += sign;
323
8
        } while (d);
324
2
        w_long(l, p);
325
326
2
        d = abs_value;
327
8
        do {
328
8
            w_short(d & PyLong_MARSHAL_MASK, p);
329
8
            d >>= PyLong_MARSHAL_SHIFT;
330
8
        } while (d);
331
2
        return;
332
2
    }
333
334
2
    const PyLongLayout *layout = PyLong_GetNativeLayout();
335
2
    Py_ssize_t marshal_ratio = layout->bits_per_digit/PyLong_MARSHAL_SHIFT;
336
337
    /* must be a multiple of PyLong_MARSHAL_SHIFT */
338
2
    assert(layout->bits_per_digit % PyLong_MARSHAL_SHIFT == 0);
339
2
    assert(layout->bits_per_digit >= PyLong_MARSHAL_SHIFT);
340
341
    /* other assumptions on PyLongObject internals */
342
2
    assert(layout->bits_per_digit <= 32);
343
2
    assert(layout->digits_order == -1);
344
2
    assert(layout->digit_endianness == (PY_LITTLE_ENDIAN ? -1 : 1));
345
2
    assert(layout->digit_size == 2 || layout->digit_size == 4);
346
347
2
    if (layout->digit_size == 4) {
348
2
        _r_digits32(long_export.digits, long_export.ndigits,
349
2
                    long_export.negative, marshal_ratio, p);
350
2
    }
351
0
    else {
352
0
        _r_digits16(long_export.digits, long_export.ndigits,
353
0
                    long_export.negative, marshal_ratio, p);
354
0
    }
355
2
    PyLong_FreeExport(&long_export);
356
2
}
357
358
static void
359
w_float_bin(double v, WFILE *p)
360
15
{
361
15
    char buf[8];
362
15
    if (PyFloat_Pack8(v, buf, 1) < 0) {
363
0
        p->error = WFERR_UNMARSHALLABLE;
364
0
        return;
365
0
    }
366
15
    w_string(buf, 8, p);
367
15
}
368
369
static void
370
w_float_str(double v, WFILE *p)
371
0
{
372
0
    char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
373
0
    if (!buf) {
374
0
        p->error = WFERR_NOMEMORY;
375
0
        return;
376
0
    }
377
0
    w_short_pstring(buf, strlen(buf), p);
378
0
    PyMem_Free(buf);
379
0
}
380
381
static int
382
w_ref(PyObject *v, char *flag, WFILE *p)
383
76.4k
{
384
76.4k
    _Py_hashtable_entry_t *entry;
385
76.4k
    int w;
386
387
76.4k
    if (p->version < 3 || p->hashtable == NULL)
388
0
        return 0; /* not writing object references */
389
390
    /* If it has only one reference, it definitely isn't shared.
391
     * But we use TYPE_REF always for interned string, to PYC file stable
392
     * as possible.
393
     */
394
76.4k
    if (_PyObject_IsUniquelyReferenced(v) &&
395
20.4k
            !(PyUnicode_CheckExact(v) && PyUnicode_CHECK_INTERNED(v))) {
396
18.8k
        return 0;
397
18.8k
    }
398
399
57.6k
    entry = _Py_hashtable_get_entry(p->hashtable, v);
400
57.6k
    if (entry != NULL) {
401
        /* write the reference index to the stream */
402
36.8k
        w = (int)(uintptr_t)entry->value;
403
        /* we don't store "long" indices in the dict */
404
36.8k
        assert(0 <= w && w <= 0x7fffffff);
405
36.8k
        w_byte(TYPE_REF, p);
406
36.8k
        w_long(w, p);
407
36.8k
        return 1;
408
36.8k
    } else {
409
20.8k
        size_t s = p->hashtable->nentries;
410
        /* we don't support long indices */
411
20.8k
        if (s >= 0x7fffffff) {
412
0
            PyErr_SetString(PyExc_ValueError, "too many objects");
413
0
            goto err;
414
0
        }
415
20.8k
        w = (int)s;
416
20.8k
        if (_Py_hashtable_set(p->hashtable, Py_NewRef(v),
417
20.8k
                              (void *)(uintptr_t)w) < 0) {
418
0
            Py_DECREF(v);
419
0
            goto err;
420
0
        }
421
20.8k
        *flag |= FLAG_REF;
422
20.8k
        return 0;
423
20.8k
    }
424
0
err:
425
0
    p->error = WFERR_UNMARSHALLABLE;
426
0
    return 1;
427
57.6k
}
428
429
static void
430
w_complex_object(PyObject *v, char flag, WFILE *p);
431
432
static void
433
w_object(PyObject *v, WFILE *p)
434
79.0k
{
435
79.0k
    char flag = '\0';
436
437
79.0k
    if (p->error != WFERR_OK) {
438
0
        return;
439
0
    }
440
441
79.0k
    p->depth++;
442
443
79.0k
    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
444
0
        p->error = WFERR_NESTEDTOODEEP;
445
0
    }
446
79.0k
    else if (v == NULL) {
447
0
        w_byte(TYPE_NULL, p);
448
0
    }
449
79.0k
    else if (v == Py_None) {
450
2.05k
        w_byte(TYPE_NONE, p);
451
2.05k
    }
452
77.0k
    else if (v == PyExc_StopIteration) {
453
0
        w_byte(TYPE_STOPITER, p);
454
0
    }
455
77.0k
    else if (v == Py_Ellipsis) {
456
3
        w_byte(TYPE_ELLIPSIS, p);
457
3
    }
458
77.0k
    else if (v == Py_False) {
459
347
        w_byte(TYPE_FALSE, p);
460
347
    }
461
76.6k
    else if (v == Py_True) {
462
223
        w_byte(TYPE_TRUE, p);
463
223
    }
464
76.4k
    else if (!w_ref(v, &flag, p))
465
39.6k
        w_complex_object(v, flag, p);
466
467
79.0k
    p->depth--;
468
79.0k
}
469
470
static void
471
w_complex_object(PyObject *v, char flag, WFILE *p)
472
39.6k
{
473
39.6k
    Py_ssize_t i, n;
474
475
39.6k
    if (PyLong_CheckExact(v)) {
476
1.88k
        int overflow;
477
1.88k
        long x = PyLong_AsLongAndOverflow(v, &overflow);
478
1.88k
        if (overflow) {
479
2
            w_PyLong((PyLongObject *)v, flag, p);
480
2
        }
481
1.88k
        else {
482
1.88k
#if SIZEOF_LONG > 4
483
1.88k
            long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
484
1.88k
            if (y && y != -1) {
485
                /* Too large for TYPE_INT */
486
2
                w_PyLong((PyLongObject*)v, flag, p);
487
2
            }
488
1.88k
            else
489
1.88k
#endif
490
1.88k
            {
491
1.88k
                W_TYPE(TYPE_INT, p);
492
1.88k
                w_long(x, p);
493
1.88k
            }
494
1.88k
        }
495
1.88k
    }
496
37.7k
    else if (PyFloat_CheckExact(v)) {
497
13
        if (p->version > 1) {
498
13
            W_TYPE(TYPE_BINARY_FLOAT, p);
499
13
            w_float_bin(PyFloat_AS_DOUBLE(v), p);
500
13
        }
501
0
        else {
502
0
            W_TYPE(TYPE_FLOAT, p);
503
0
            w_float_str(PyFloat_AS_DOUBLE(v), p);
504
0
        }
505
13
    }
506
37.7k
    else if (PyComplex_CheckExact(v)) {
507
1
        if (p->version > 1) {
508
1
            W_TYPE(TYPE_BINARY_COMPLEX, p);
509
1
            w_float_bin(PyComplex_RealAsDouble(v), p);
510
1
            w_float_bin(PyComplex_ImagAsDouble(v), p);
511
1
        }
512
0
        else {
513
0
            W_TYPE(TYPE_COMPLEX, p);
514
0
            w_float_str(PyComplex_RealAsDouble(v), p);
515
0
            w_float_str(PyComplex_ImagAsDouble(v), p);
516
0
        }
517
1
    }
518
37.7k
    else if (PyBytes_CheckExact(v)) {
519
9.63k
        W_TYPE(TYPE_STRING, p);
520
9.63k
        w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
521
9.63k
    }
522
28.1k
    else if (PyUnicode_CheckExact(v)) {
523
16.6k
        if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
524
16.5k
            int is_short = PyUnicode_GET_LENGTH(v) < 256;
525
16.5k
            if (is_short) {
526
16.3k
                if (PyUnicode_CHECK_INTERNED(v))
527
14.6k
                    W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
528
1.73k
                else
529
1.73k
                    W_TYPE(TYPE_SHORT_ASCII, p);
530
16.3k
                w_short_pstring(PyUnicode_1BYTE_DATA(v),
531
16.3k
                                PyUnicode_GET_LENGTH(v), p);
532
16.3k
            }
533
203
            else {
534
203
                if (PyUnicode_CHECK_INTERNED(v))
535
0
                    W_TYPE(TYPE_ASCII_INTERNED, p);
536
203
                else
537
203
                    W_TYPE(TYPE_ASCII, p);
538
203
                w_pstring(PyUnicode_1BYTE_DATA(v),
539
203
                          PyUnicode_GET_LENGTH(v), p);
540
203
            }
541
16.5k
        }
542
70
        else {
543
70
            PyObject *utf8;
544
70
            utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
545
70
            if (utf8 == NULL) {
546
0
                p->depth--;
547
0
                p->error = WFERR_UNMARSHALLABLE;
548
0
                return;
549
0
            }
550
70
            if (p->version >= 3 &&  PyUnicode_CHECK_INTERNED(v))
551
0
                W_TYPE(TYPE_INTERNED, p);
552
70
            else
553
70
                W_TYPE(TYPE_UNICODE, p);
554
70
            w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
555
70
            Py_DECREF(utf8);
556
70
        }
557
16.6k
    }
558
11.4k
    else if (PyTuple_CheckExact(v)) {
559
8.09k
        n = PyTuple_GET_SIZE(v);
560
8.09k
        if (p->version >= 4 && n < 256) {
561
8.09k
            W_TYPE(TYPE_SMALL_TUPLE, p);
562
8.09k
            w_byte((unsigned char)n, p);
563
8.09k
        }
564
0
        else {
565
0
            W_TYPE(TYPE_TUPLE, p);
566
0
            W_SIZE(n, p);
567
0
        }
568
53.2k
        for (i = 0; i < n; i++) {
569
45.1k
            w_object(PyTuple_GET_ITEM(v, i), p);
570
45.1k
        }
571
8.09k
    }
572
3.39k
    else if (PyList_CheckExact(v)) {
573
0
        W_TYPE(TYPE_LIST, p);
574
0
        n = PyList_GET_SIZE(v);
575
0
        W_SIZE(n, p);
576
0
        for (i = 0; i < n; i++) {
577
0
            w_object(PyList_GET_ITEM(v, i), p);
578
0
        }
579
0
    }
580
3.39k
    else if (PyAnyDict_CheckExact(v)) {
581
0
        Py_ssize_t pos;
582
0
        PyObject *key, *value;
583
0
        if (PyFrozenDict_CheckExact(v)) {
584
0
            if (p->version < 6) {
585
0
                w_byte(TYPE_UNKNOWN, p);
586
0
                p->error = WFERR_UNMARSHALLABLE;
587
0
                return;
588
0
            }
589
590
0
            W_TYPE(TYPE_FROZENDICT, p);
591
0
        }
592
0
        else {
593
0
            W_TYPE(TYPE_DICT, p);
594
0
        }
595
        /* This one is NULL object terminated! */
596
0
        pos = 0;
597
0
        while (PyDict_Next(v, &pos, &key, &value)) {
598
0
            w_object(key, p);
599
0
            w_object(value, p);
600
0
        }
601
0
        w_object((PyObject *)NULL, p);
602
0
    }
603
3.39k
    else if (PyAnySet_CheckExact(v)) {
604
11
        PyObject *value;
605
11
        Py_ssize_t pos = 0;
606
11
        Py_hash_t hash;
607
608
11
        if (PyFrozenSet_CheckExact(v))
609
11
            W_TYPE(TYPE_FROZENSET, p);
610
0
        else
611
0
            W_TYPE(TYPE_SET, p);
612
11
        n = PySet_GET_SIZE(v);
613
11
        W_SIZE(n, p);
614
        // bpo-37596: To support reproducible builds, sets and frozensets need
615
        // to have their elements serialized in a consistent order (even when
616
        // they have been scrambled by hash randomization). To ensure this, we
617
        // use an order equivalent to sorted(v, key=marshal.dumps):
618
11
        PyObject *pairs = PyList_New(n);
619
11
        if (pairs == NULL) {
620
0
            p->error = WFERR_NOMEMORY;
621
0
            return;
622
0
        }
623
11
        Py_ssize_t i = 0;
624
11
        Py_BEGIN_CRITICAL_SECTION(v);
625
76
        while (_PySet_NextEntryRef(v, &pos, &value, &hash)) {
626
65
            PyObject *dump = _PyMarshal_WriteObjectToString(value,
627
65
                                    p->version, p->allow_code);
628
65
            if (dump == NULL) {
629
0
                p->error = WFERR_UNMARSHALLABLE;
630
0
                Py_DECREF(value);
631
0
                break;
632
0
            }
633
65
            PyObject *pair = _PyTuple_FromPairSteal(dump, value);
634
65
            if (pair == NULL) {
635
0
                p->error = WFERR_NOMEMORY;
636
0
                break;
637
0
            }
638
65
            PyList_SET_ITEM(pairs, i++, pair);
639
65
        }
640
11
        Py_END_CRITICAL_SECTION();
641
11
        if (p->error == WFERR_UNMARSHALLABLE || p->error == WFERR_NOMEMORY) {
642
0
            Py_DECREF(pairs);
643
0
            return;
644
0
        }
645
11
        assert(i == n);
646
11
        if (PyList_Sort(pairs)) {
647
0
            p->error = WFERR_NOMEMORY;
648
0
            Py_DECREF(pairs);
649
0
            return;
650
0
        }
651
76
        for (Py_ssize_t i = 0; i < n; i++) {
652
65
            PyObject *pair = PyList_GET_ITEM(pairs, i);
653
65
            value = PyTuple_GET_ITEM(pair, 1);
654
65
            w_object(value, p);
655
65
        }
656
11
        Py_DECREF(pairs);
657
11
    }
658
3.38k
    else if (PyCode_Check(v)) {
659
3.36k
        if (!p->allow_code) {
660
0
            p->error = WFERR_CODE_NOT_ALLOWED;
661
0
            return;
662
0
        }
663
3.36k
        PyCodeObject *co = (PyCodeObject *)v;
664
3.36k
        PyObject *co_code = _PyCode_GetCode(co);
665
3.36k
        if (co_code == NULL) {
666
0
            p->error = WFERR_NOMEMORY;
667
0
            return;
668
0
        }
669
3.36k
        W_TYPE(TYPE_CODE, p);
670
3.36k
        w_long(co->co_argcount, p);
671
3.36k
        w_long(co->co_posonlyargcount, p);
672
3.36k
        w_long(co->co_kwonlyargcount, p);
673
3.36k
        w_long(co->co_stacksize, p);
674
3.36k
        w_long(co->co_flags, p);
675
3.36k
        w_object(co_code, p);
676
3.36k
        w_object(co->co_consts, p);
677
3.36k
        w_object(co->co_names, p);
678
3.36k
        w_object(co->co_localsplusnames, p);
679
3.36k
        w_object(co->co_localspluskinds, p);
680
3.36k
        w_object(co->co_filename, p);
681
3.36k
        w_object(co->co_name, p);
682
3.36k
        w_object(co->co_qualname, p);
683
3.36k
        w_long(co->co_firstlineno, p);
684
3.36k
        w_object(co->co_linetable, p);
685
3.36k
        w_object(co->co_exceptiontable, p);
686
3.36k
        Py_DECREF(co_code);
687
3.36k
    }
688
25
    else if (PyObject_CheckBuffer(v)) {
689
        /* Write unknown bytes-like objects as a bytes object */
690
0
        Py_buffer view;
691
0
        if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
692
0
            w_byte(TYPE_UNKNOWN, p);
693
0
            p->depth--;
694
0
            p->error = WFERR_UNMARSHALLABLE;
695
0
            return;
696
0
        }
697
0
        W_TYPE(TYPE_STRING, p);
698
0
        w_pstring(view.buf, view.len, p);
699
0
        PyBuffer_Release(&view);
700
0
    }
701
25
    else if (PySlice_Check(v)) {
702
25
        if (p->version < 5) {
703
0
            w_byte(TYPE_UNKNOWN, p);
704
0
            p->error = WFERR_UNMARSHALLABLE;
705
0
            return;
706
0
        }
707
25
        PySliceObject *slice = (PySliceObject *)v;
708
25
        W_TYPE(TYPE_SLICE, p);
709
25
        w_object(slice->start, p);
710
25
        w_object(slice->stop, p);
711
25
        w_object(slice->step, p);
712
25
    }
713
0
    else {
714
0
        W_TYPE(TYPE_UNKNOWN, p);
715
0
        p->error = WFERR_UNMARSHALLABLE;
716
0
    }
717
39.6k
}
718
719
static void
720
w_decref_entry(void *key)
721
20.8k
{
722
20.8k
    PyObject *entry_key = (PyObject *)key;
723
20.8k
    Py_XDECREF(entry_key);
724
20.8k
}
725
726
static int
727
w_init_refs(WFILE *wf, int version)
728
235
{
729
235
    if (version >= 3) {
730
235
        wf->hashtable = _Py_hashtable_new_full(_Py_hashtable_hash_ptr,
731
235
                                               _Py_hashtable_compare_direct,
732
235
                                               w_decref_entry, NULL, NULL);
733
235
        if (wf->hashtable == NULL) {
734
0
            PyErr_NoMemory();
735
0
            return -1;
736
0
        }
737
235
    }
738
235
    return 0;
739
235
}
740
741
static void
742
w_clear_refs(WFILE *wf)
743
235
{
744
235
    if (wf->hashtable != NULL) {
745
235
        _Py_hashtable_destroy(wf->hashtable);
746
235
    }
747
235
}
748
749
/* version currently has no effect for writing ints. */
750
/* Note that while the documentation states that this function
751
 * can error, currently it never does. Setting an exception in
752
 * this function should be regarded as an API-breaking change.
753
 */
754
void
755
PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
756
0
{
757
0
    char buf[4];
758
0
    WFILE wf;
759
0
    memset(&wf, 0, sizeof(wf));
760
0
    wf.fp = fp;
761
0
    wf.ptr = wf.buf = buf;
762
0
    wf.end = wf.ptr + sizeof(buf);
763
0
    wf.error = WFERR_OK;
764
0
    wf.version = version;
765
0
    w_long(x, &wf);
766
0
    w_flush(&wf);
767
0
}
768
769
void
770
PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
771
0
{
772
0
    char buf[BUFSIZ];
773
0
    WFILE wf;
774
0
    if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
775
0
        return; /* caller must check PyErr_Occurred() */
776
0
    }
777
0
    memset(&wf, 0, sizeof(wf));
778
0
    wf.fp = fp;
779
0
    wf.ptr = wf.buf = buf;
780
0
    wf.end = wf.ptr + sizeof(buf);
781
0
    wf.error = WFERR_OK;
782
0
    wf.version = version;
783
0
    wf.allow_code = 1;
784
0
    if (w_init_refs(&wf, version)) {
785
0
        return; /* caller must check PyErr_Occurred() */
786
0
    }
787
0
    w_object(x, &wf);
788
0
    w_clear_refs(&wf);
789
0
    w_flush(&wf);
790
0
}
791
792
typedef struct {
793
    FILE *fp;
794
    int depth;
795
    PyObject *readable;  /* Stream-like object being read from */
796
    const char *ptr;
797
    const char *end;
798
    char *buf;
799
    Py_ssize_t buf_size;
800
    PyObject *refs;  /* a list */
801
    int allow_code;
802
} RFILE;
803
804
static const char *
805
r_string(Py_ssize_t n, RFILE *p)
806
5.28M
{
807
5.28M
    Py_ssize_t read = -1;
808
809
5.28M
    if (p->ptr != NULL) {
810
        /* Fast path for loads() */
811
5.28M
        const char *res = p->ptr;
812
5.28M
        Py_ssize_t left = p->end - p->ptr;
813
5.28M
        if (left < n) {
814
0
            PyErr_SetString(PyExc_EOFError,
815
0
                            "marshal data too short");
816
0
            return NULL;
817
0
        }
818
5.28M
        p->ptr += n;
819
5.28M
        return res;
820
5.28M
    }
821
0
    if (p->buf == NULL) {
822
0
        p->buf = PyMem_Malloc(n);
823
0
        if (p->buf == NULL) {
824
0
            PyErr_NoMemory();
825
0
            return NULL;
826
0
        }
827
0
        p->buf_size = n;
828
0
    }
829
0
    else if (p->buf_size < n) {
830
0
        char *tmp = PyMem_Realloc(p->buf, n);
831
0
        if (tmp == NULL) {
832
0
            PyErr_NoMemory();
833
0
            return NULL;
834
0
        }
835
0
        p->buf = tmp;
836
0
        p->buf_size = n;
837
0
    }
838
839
0
    if (!p->readable) {
840
0
        assert(p->fp != NULL);
841
0
        read = fread(p->buf, 1, n, p->fp);
842
0
    }
843
0
    else {
844
0
        PyObject *res, *mview;
845
0
        Py_buffer buf;
846
847
0
        if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
848
0
            return NULL;
849
0
        mview = PyMemoryView_FromBuffer(&buf);
850
0
        if (mview == NULL)
851
0
            return NULL;
852
853
0
        res = _PyObject_CallMethod(p->readable, &_Py_ID(readinto), "N", mview);
854
0
        if (res != NULL) {
855
0
            read = PyNumber_AsSsize_t(res, PyExc_ValueError);
856
0
            Py_DECREF(res);
857
0
        }
858
0
    }
859
0
    if (read != n) {
860
0
        if (!PyErr_Occurred()) {
861
0
            if (read > n)
862
0
                PyErr_Format(PyExc_ValueError,
863
0
                             "read() returned too much data: "
864
0
                             "%zd bytes requested, %zd returned",
865
0
                             n, read);
866
0
            else
867
0
                PyErr_SetString(PyExc_EOFError,
868
0
                                "EOF read where not expected");
869
0
        }
870
0
        return NULL;
871
0
    }
872
0
    return p->buf;
873
0
}
874
875
static int
876
r_byte(RFILE *p)
877
6.18M
{
878
6.18M
    if (p->ptr != NULL) {
879
6.18M
        if (p->ptr < p->end) {
880
6.18M
            return (unsigned char) *p->ptr++;
881
6.18M
        }
882
6.18M
    }
883
0
    else if (!p->readable) {
884
0
        assert(p->fp);
885
0
        int c = getc(p->fp);
886
0
        if (c != EOF) {
887
0
            return c;
888
0
        }
889
0
    }
890
0
    else {
891
0
        const char *ptr = r_string(1, p);
892
0
        if (ptr != NULL) {
893
0
            return *(const unsigned char *) ptr;
894
0
        }
895
0
        return EOF;
896
0
    }
897
0
    PyErr_SetString(PyExc_EOFError,
898
0
                    "EOF read where not expected");
899
0
    return EOF;
900
6.18M
}
901
902
static int
903
r_short(RFILE *p)
904
1.67k
{
905
1.67k
    short x = -1;
906
1.67k
    const unsigned char *buffer;
907
908
1.67k
    buffer = (const unsigned char *) r_string(2, p);
909
1.67k
    if (buffer != NULL) {
910
1.67k
        x = buffer[0];
911
1.67k
        x |= buffer[1] << 8;
912
        /* Sign-extension, in case short greater than 16 bits */
913
1.67k
        x |= -(x & 0x8000);
914
1.67k
    }
915
1.67k
    return x;
916
1.67k
}
917
918
static long
919
r_long(RFILE *p)
920
3.47M
{
921
3.47M
    long x = -1;
922
3.47M
    const unsigned char *buffer;
923
924
3.47M
    buffer = (const unsigned char *) r_string(4, p);
925
3.47M
    if (buffer != NULL) {
926
3.47M
        x = buffer[0];
927
3.47M
        x |= (long)buffer[1] << 8;
928
3.47M
        x |= (long)buffer[2] << 16;
929
3.47M
        x |= (long)buffer[3] << 24;
930
3.47M
#if SIZEOF_LONG > 4
931
        /* Sign extension for 64-bit machines */
932
3.47M
        x |= -(x & 0x80000000L);
933
3.47M
#endif
934
3.47M
    }
935
3.47M
    return x;
936
3.47M
}
937
938
/* r_long64 deals with the TYPE_INT64 code. */
939
static PyObject *
940
r_long64(RFILE *p)
941
0
{
942
0
    const unsigned char *buffer = (const unsigned char *) r_string(8, p);
943
0
    if (buffer == NULL) {
944
0
        return NULL;
945
0
    }
946
0
    return _PyLong_FromByteArray(buffer, 8,
947
0
                                 1 /* little endian */,
948
0
                                 1 /* signed */);
949
0
}
950
951
#define _w_digits(bitsize)                                              \
952
static int                                                              \
953
_w_digits##bitsize(uint ## bitsize ## _t *digits, Py_ssize_t size,      \
954
                   Py_ssize_t marshal_ratio,                            \
955
435
                   int shorts_in_top_digit, RFILE *p)                   \
956
435
{                                                                       \
957
435
    uint ## bitsize ## _t d;                                            \
958
435
                                                                        \
959
435
    assert(size >= 1);                                                  \
960
1.01k
    for (Py_ssize_t i = 0; i < size - 1; i++) {                         \
961
580
        d = 0;                                                          \
962
1.74k
        for (Py_ssize_t j = 0; j < marshal_ratio; j++) {                \
963
1.16k
            int md = r_short(p);                                        \
964
1.16k
            if (md < 0 || md > PyLong_MARSHAL_BASE) {                   \
965
0
                goto bad_digit;                                         \
966
0
            }                                                           \
967
1.16k
            d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT;   \
968
1.16k
        }                                                               \
969
580
        digits[i] = d;                                                  \
970
580
    }                                                                   \
971
435
                                                                        \
972
435
    d = 0;                                                              \
973
946
    for (Py_ssize_t j = 0; j < shorts_in_top_digit; j++) {              \
974
511
        int md = r_short(p);                                            \
975
511
        if (md < 0 || md > PyLong_MARSHAL_BASE) {                       \
976
0
            goto bad_digit;                                             \
977
0
        }                                                               \
978
511
        /* topmost marshal digit should be nonzero */                   \
979
511
        if (md == 0 && j == shorts_in_top_digit - 1) {                  \
980
0
            PyErr_SetString(PyExc_ValueError,                           \
981
0
                "bad marshal data (unnormalized long data)");           \
982
0
            return -1;                                                  \
983
0
        }                                                               \
984
511
        d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT;       \
985
511
    }                                                                   \
986
435
    assert(!PyErr_Occurred());                                          \
987
435
    /* top digit should be nonzero, else the resulting PyLong won't be  \
988
435
       normalized */                                                    \
989
435
    digits[size - 1] = d;                                               \
990
435
    return 0;                                                           \
991
435
                                                                        \
992
0
bad_digit:                                                              \
993
0
    if (!PyErr_Occurred()) {                                            \
994
0
        PyErr_SetString(PyExc_ValueError,                               \
995
0
            "bad marshal data (digit out of range in long)");           \
996
0
    }                                                                   \
997
0
    return -1;                                                          \
998
435
}
999
435
_w_digits(32)
1000
0
_w_digits(16)
1001
#undef _w_digits
1002
1003
static PyObject *
1004
r_PyLong(RFILE *p)
1005
435
{
1006
435
    long n = r_long(p);
1007
435
    if (n == -1 && PyErr_Occurred()) {
1008
0
        return NULL;
1009
0
    }
1010
435
    if (n < -SIZE32_MAX || n > SIZE32_MAX) {
1011
0
        PyErr_SetString(PyExc_ValueError,
1012
0
                       "bad marshal data (long size out of range)");
1013
0
        return NULL;
1014
0
    }
1015
1016
435
    const PyLongLayout *layout = PyLong_GetNativeLayout();
1017
435
    Py_ssize_t marshal_ratio = layout->bits_per_digit/PyLong_MARSHAL_SHIFT;
1018
1019
    /* must be a multiple of PyLong_MARSHAL_SHIFT */
1020
435
    assert(layout->bits_per_digit % PyLong_MARSHAL_SHIFT == 0);
1021
435
    assert(layout->bits_per_digit >= PyLong_MARSHAL_SHIFT);
1022
1023
    /* other assumptions on PyLongObject internals */
1024
435
    assert(layout->bits_per_digit <= 32);
1025
435
    assert(layout->digits_order == -1);
1026
435
    assert(layout->digit_endianness == (PY_LITTLE_ENDIAN ? -1 : 1));
1027
435
    assert(layout->digit_size == 2 || layout->digit_size == 4);
1028
1029
435
    Py_ssize_t size = 1 + (Py_ABS(n) - 1) / marshal_ratio;
1030
1031
435
    assert(size >= 1);
1032
1033
435
    int shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % marshal_ratio;
1034
435
    void *digits;
1035
435
    PyLongWriter *writer = PyLongWriter_Create(n < 0, size, &digits);
1036
1037
435
    if (writer == NULL) {
1038
0
        return NULL;
1039
0
    }
1040
1041
435
    int ret;
1042
1043
435
    if (layout->digit_size == 4) {
1044
435
        ret = _w_digits32(digits, size, marshal_ratio, shorts_in_top_digit, p);
1045
435
    }
1046
0
    else {
1047
0
        ret = _w_digits16(digits, size, marshal_ratio, shorts_in_top_digit, p);
1048
0
    }
1049
435
    if (ret < 0) {
1050
0
        PyLongWriter_Discard(writer);
1051
0
        return NULL;
1052
0
    }
1053
435
    return PyLongWriter_Finish(writer);
1054
435
}
1055
1056
static double
1057
r_float_bin(RFILE *p)
1058
637
{
1059
637
    const char *buf = r_string(8, p);
1060
637
    if (buf == NULL)
1061
0
        return -1;
1062
637
    return PyFloat_Unpack8(buf, 1);
1063
637
}
1064
1065
/* Issue #33720: Disable inlining for reducing the C stack consumption
1066
   on PGO builds. */
1067
Py_NO_INLINE static double
1068
r_float_str(RFILE *p)
1069
0
{
1070
0
    int n;
1071
0
    char buf[256];
1072
0
    const char *ptr;
1073
0
    n = r_byte(p);
1074
0
    if (n == EOF) {
1075
0
        return -1;
1076
0
    }
1077
0
    ptr = r_string(n, p);
1078
0
    if (ptr == NULL) {
1079
0
        return -1;
1080
0
    }
1081
0
    memcpy(buf, ptr, n);
1082
0
    buf[n] = '\0';
1083
0
    return PyOS_string_to_double(buf, NULL, NULL);
1084
0
}
1085
1086
/* allocate the reflist index for a new object. Return -1 on failure */
1087
static Py_ssize_t
1088
r_ref_reserve(int flag, RFILE *p)
1089
174k
{
1090
174k
    if (flag) { /* currently only FLAG_REF is defined */
1091
6.40k
        Py_ssize_t idx = PyList_GET_SIZE(p->refs);
1092
6.40k
        if (idx >= 0x7ffffffe) {
1093
0
            PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
1094
0
            return -1;
1095
0
        }
1096
6.40k
        if (PyList_Append(p->refs, Py_None) < 0)
1097
0
            return -1;
1098
6.40k
        return idx;
1099
6.40k
    } else
1100
168k
        return 0;
1101
174k
}
1102
1103
/* insert the new object 'o' to the reflist at previously
1104
 * allocated index 'idx'.
1105
 * 'o' can be NULL, in which case nothing is done.
1106
 * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
1107
 * if 'o' was non-NULL, and the function fails, 'o' is released and
1108
 * NULL returned. This simplifies error checking at the call site since
1109
 * a single test for NULL for the function result is enough.
1110
 */
1111
static PyObject *
1112
r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
1113
174k
{
1114
174k
    if (o != NULL && flag) { /* currently only FLAG_REF is defined */
1115
6.40k
        PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
1116
6.40k
        PyList_SET_ITEM(p->refs, idx, Py_NewRef(o));
1117
6.40k
        Py_DECREF(tmp);
1118
6.40k
    }
1119
174k
    return o;
1120
174k
}
1121
1122
/* combination of both above, used when an object can be
1123
 * created whenever it is seen in the file, as opposed to
1124
 * after having loaded its sub-objects.
1125
 */
1126
static PyObject *
1127
r_ref(PyObject *o, int flag, RFILE *p)
1128
1.37M
{
1129
1.37M
    assert(flag & FLAG_REF);
1130
1.37M
    if (o == NULL)
1131
0
        return NULL;
1132
1.37M
    if (PyList_Append(p->refs, o) < 0) {
1133
0
        Py_DECREF(o); /* release the new object */
1134
0
        return NULL;
1135
0
    }
1136
1.37M
    return o;
1137
1.37M
}
1138
1139
static PyObject *
1140
r_object(RFILE *p)
1141
4.46M
{
1142
    /* NULL is a valid return value, it does not necessarily means that
1143
       an exception is set. */
1144
4.46M
    PyObject *v, *v2;
1145
4.46M
    Py_ssize_t idx = 0;
1146
4.46M
    long i, n;
1147
4.46M
    int type, code = r_byte(p);
1148
4.46M
    int flag, is_interned = 0;
1149
4.46M
    PyObject *retval = NULL;
1150
1151
4.46M
    if (code == EOF) {
1152
0
        if (PyErr_ExceptionMatches(PyExc_EOFError)) {
1153
0
            PyErr_SetString(PyExc_EOFError,
1154
0
                            "EOF read where object expected");
1155
0
        }
1156
0
        return NULL;
1157
0
    }
1158
1159
4.46M
    p->depth++;
1160
1161
4.46M
    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
1162
0
        p->depth--;
1163
0
        PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
1164
0
        return NULL;
1165
0
    }
1166
1167
4.46M
    flag = code & FLAG_REF;
1168
4.46M
    type = code & ~FLAG_REF;
1169
1170
4.46M
#define R_REF(O) do{\
1171
2.28M
    if (flag) \
1172
2.28M
        O = r_ref(O, flag, p);\
1173
2.28M
} while (0)
1174
1175
4.46M
    switch (type) {
1176
1177
0
    case TYPE_NULL:
1178
0
        break;
1179
1180
95.1k
    case TYPE_NONE:
1181
95.1k
        retval = Py_None;
1182
95.1k
        break;
1183
1184
0
    case TYPE_STOPITER:
1185
0
        retval = Py_NewRef(PyExc_StopIteration);
1186
0
        break;
1187
1188
412
    case TYPE_ELLIPSIS:
1189
412
        retval = Py_Ellipsis;
1190
412
        break;
1191
1192
12.3k
    case TYPE_FALSE:
1193
12.3k
        retval = Py_False;
1194
12.3k
        break;
1195
1196
11.5k
    case TYPE_TRUE:
1197
11.5k
        retval = Py_True;
1198
11.5k
        break;
1199
1200
24.8k
    case TYPE_INT:
1201
24.8k
        n = r_long(p);
1202
24.8k
        if (n == -1 && PyErr_Occurred()) {
1203
0
            break;
1204
0
        }
1205
24.8k
        retval = PyLong_FromLong(n);
1206
24.8k
        R_REF(retval);
1207
24.8k
        break;
1208
1209
0
    case TYPE_INT64:
1210
0
        retval = r_long64(p);
1211
0
        R_REF(retval);
1212
0
        break;
1213
1214
435
    case TYPE_LONG:
1215
435
        retval = r_PyLong(p);
1216
435
        R_REF(retval);
1217
435
        break;
1218
1219
0
    case TYPE_FLOAT:
1220
0
        {
1221
0
            double x = r_float_str(p);
1222
0
            if (x == -1.0 && PyErr_Occurred())
1223
0
                break;
1224
0
            retval = PyFloat_FromDouble(x);
1225
0
            R_REF(retval);
1226
0
            break;
1227
0
        }
1228
1229
631
    case TYPE_BINARY_FLOAT:
1230
631
        {
1231
631
            double x = r_float_bin(p);
1232
631
            if (x == -1.0 && PyErr_Occurred())
1233
0
                break;
1234
631
            retval = PyFloat_FromDouble(x);
1235
631
            R_REF(retval);
1236
631
            break;
1237
631
        }
1238
1239
0
    case TYPE_COMPLEX:
1240
0
        {
1241
0
            Py_complex c;
1242
0
            c.real = r_float_str(p);
1243
0
            if (c.real == -1.0 && PyErr_Occurred())
1244
0
                break;
1245
0
            c.imag = r_float_str(p);
1246
0
            if (c.imag == -1.0 && PyErr_Occurred())
1247
0
                break;
1248
0
            retval = PyComplex_FromCComplex(c);
1249
0
            R_REF(retval);
1250
0
            break;
1251
0
        }
1252
1253
3
    case TYPE_BINARY_COMPLEX:
1254
3
        {
1255
3
            Py_complex c;
1256
3
            c.real = r_float_bin(p);
1257
3
            if (c.real == -1.0 && PyErr_Occurred())
1258
0
                break;
1259
3
            c.imag = r_float_bin(p);
1260
3
            if (c.imag == -1.0 && PyErr_Occurred())
1261
0
                break;
1262
3
            retval = PyComplex_FromCComplex(c);
1263
3
            R_REF(retval);
1264
3
            break;
1265
3
        }
1266
1267
514k
    case TYPE_STRING:
1268
514k
        {
1269
514k
            const char *ptr;
1270
514k
            n = r_long(p);
1271
514k
            if (n < 0 || n > SIZE32_MAX) {
1272
0
                if (!PyErr_Occurred()) {
1273
0
                    PyErr_SetString(PyExc_ValueError,
1274
0
                        "bad marshal data (bytes object size out of range)");
1275
0
                }
1276
0
                break;
1277
0
            }
1278
514k
            v = PyBytes_FromStringAndSize((char *)NULL, n);
1279
514k
            if (v == NULL)
1280
0
                break;
1281
514k
            ptr = r_string(n, p);
1282
514k
            if (ptr == NULL) {
1283
0
                Py_DECREF(v);
1284
0
                break;
1285
0
            }
1286
514k
            memcpy(PyBytes_AS_STRING(v), ptr, n);
1287
514k
            retval = v;
1288
514k
            R_REF(retval);
1289
514k
            break;
1290
514k
        }
1291
1292
0
    case TYPE_ASCII_INTERNED:
1293
0
        is_interned = 1;
1294
0
        _Py_FALLTHROUGH;
1295
25.8k
    case TYPE_ASCII:
1296
25.8k
        n = r_long(p);
1297
25.8k
        if (n < 0 || n > SIZE32_MAX) {
1298
0
            if (!PyErr_Occurred()) {
1299
0
                PyErr_SetString(PyExc_ValueError,
1300
0
                    "bad marshal data (string size out of range)");
1301
0
            }
1302
0
            break;
1303
0
        }
1304
25.8k
        goto _read_ascii;
1305
1306
1.09M
    case TYPE_SHORT_ASCII_INTERNED:
1307
1.09M
        is_interned = 1;
1308
1.09M
        _Py_FALLTHROUGH;
1309
1.25M
    case TYPE_SHORT_ASCII:
1310
1.25M
        n = r_byte(p);
1311
1.25M
        if (n == EOF) {
1312
0
            break;
1313
0
        }
1314
1.28M
    _read_ascii:
1315
1.28M
        {
1316
1.28M
            const char *ptr;
1317
1.28M
            ptr = r_string(n, p);
1318
1.28M
            if (ptr == NULL)
1319
0
                break;
1320
1.28M
            v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1321
1.28M
            if (v == NULL)
1322
0
                break;
1323
1.28M
            if (is_interned) {
1324
                // marshal is meant to serialize .pyc files with code
1325
                // objects, and code-related strings are currently immortal.
1326
1.09M
                PyInterpreterState *interp = _PyInterpreterState_GET();
1327
1.09M
                _PyUnicode_InternImmortal(interp, &v);
1328
1.09M
            }
1329
1.28M
            retval = v;
1330
1.28M
            R_REF(retval);
1331
1.28M
            break;
1332
1.28M
        }
1333
1334
206
    case TYPE_INTERNED:
1335
206
        is_interned = 1;
1336
206
        _Py_FALLTHROUGH;
1337
3.55k
    case TYPE_UNICODE:
1338
3.55k
        {
1339
3.55k
        const char *buffer;
1340
1341
3.55k
        n = r_long(p);
1342
3.55k
        if (n < 0 || n > SIZE32_MAX) {
1343
0
            if (!PyErr_Occurred()) {
1344
0
                PyErr_SetString(PyExc_ValueError,
1345
0
                    "bad marshal data (string size out of range)");
1346
0
            }
1347
0
            break;
1348
0
        }
1349
3.55k
        if (n != 0) {
1350
3.55k
            buffer = r_string(n, p);
1351
3.55k
            if (buffer == NULL)
1352
0
                break;
1353
3.55k
            v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1354
3.55k
        }
1355
0
        else {
1356
0
            v = Py_GetConstant(Py_CONSTANT_EMPTY_STR);
1357
0
        }
1358
3.55k
        if (v == NULL)
1359
0
            break;
1360
3.55k
        if (is_interned) {
1361
            // marshal is meant to serialize .pyc files with code
1362
            // objects, and code-related strings are currently immortal.
1363
206
            PyInterpreterState *interp = _PyInterpreterState_GET();
1364
206
            _PyUnicode_InternImmortal(interp, &v);
1365
206
        }
1366
3.55k
        retval = v;
1367
3.55k
        R_REF(retval);
1368
3.55k
        break;
1369
3.55k
        }
1370
1371
458k
    case TYPE_SMALL_TUPLE:
1372
458k
        n = r_byte(p);
1373
458k
        if (n == EOF) {
1374
0
            break;
1375
0
        }
1376
458k
        goto _read_tuple;
1377
458k
    case TYPE_TUPLE:
1378
74
        n = r_long(p);
1379
74
        if (n < 0 || n > SIZE32_MAX) {
1380
0
            if (!PyErr_Occurred()) {
1381
0
                PyErr_SetString(PyExc_ValueError,
1382
0
                    "bad marshal data (tuple size out of range)");
1383
0
            }
1384
0
            break;
1385
0
        }
1386
459k
    _read_tuple:
1387
459k
        v = PyTuple_New(n);
1388
459k
        R_REF(v);
1389
459k
        if (v == NULL)
1390
0
            break;
1391
1392
3.19M
        for (i = 0; i < n; i++) {
1393
2.73M
            v2 = r_object(p);
1394
2.73M
            if ( v2 == NULL ) {
1395
0
                if (!PyErr_Occurred())
1396
0
                    PyErr_SetString(PyExc_TypeError,
1397
0
                        "NULL object in marshal data for tuple");
1398
0
                Py_SETREF(v, NULL);
1399
0
                break;
1400
0
            }
1401
2.73M
            PyTuple_SET_ITEM(v, i, v2);
1402
2.73M
        }
1403
459k
        retval = v;
1404
459k
        break;
1405
1406
0
    case TYPE_LIST:
1407
0
        n = r_long(p);
1408
0
        if (n < 0 || n > SIZE32_MAX) {
1409
0
            if (!PyErr_Occurred()) {
1410
0
                PyErr_SetString(PyExc_ValueError,
1411
0
                    "bad marshal data (list size out of range)");
1412
0
            }
1413
0
            break;
1414
0
        }
1415
0
        v = PyList_New(n);
1416
0
        R_REF(v);
1417
0
        if (v == NULL)
1418
0
            break;
1419
0
        for (i = 0; i < n; i++) {
1420
0
            v2 = r_object(p);
1421
0
            if ( v2 == NULL ) {
1422
0
                if (!PyErr_Occurred())
1423
0
                    PyErr_SetString(PyExc_TypeError,
1424
0
                        "NULL object in marshal data for list");
1425
0
                Py_SETREF(v, NULL);
1426
0
                break;
1427
0
            }
1428
0
            PyList_SET_ITEM(v, i, v2);
1429
0
        }
1430
0
        retval = v;
1431
0
        break;
1432
1433
0
    case TYPE_DICT:
1434
0
    case TYPE_FROZENDICT:
1435
0
        v = PyDict_New();
1436
0
        R_REF(v);
1437
0
        if (v == NULL)
1438
0
            break;
1439
0
        for (;;) {
1440
0
            PyObject *key, *val;
1441
0
            key = r_object(p);
1442
0
            if (key == NULL)
1443
0
                break;
1444
0
            val = r_object(p);
1445
0
            if (val == NULL) {
1446
0
                Py_DECREF(key);
1447
0
                break;
1448
0
            }
1449
0
            if (PyDict_SetItem(v, key, val) < 0) {
1450
0
                Py_DECREF(key);
1451
0
                Py_DECREF(val);
1452
0
                break;
1453
0
            }
1454
0
            Py_DECREF(key);
1455
0
            Py_DECREF(val);
1456
0
        }
1457
0
        if (PyErr_Occurred()) {
1458
0
            Py_CLEAR(v);
1459
0
        }
1460
0
        if (type == TYPE_FROZENDICT && v != NULL) {
1461
0
            PyObject *frozendict = PyFrozenDict_New(v);
1462
0
            if (frozendict != NULL) {
1463
0
                Py_SETREF(v, frozendict);
1464
0
            }
1465
0
            else {
1466
0
                Py_CLEAR(v);
1467
0
            }
1468
0
        }
1469
0
        retval = v;
1470
0
        break;
1471
1472
0
    case TYPE_SET:
1473
505
    case TYPE_FROZENSET:
1474
505
        n = r_long(p);
1475
505
        if (n < 0 || n > SIZE32_MAX) {
1476
0
            if (!PyErr_Occurred()) {
1477
0
                PyErr_SetString(PyExc_ValueError,
1478
0
                    "bad marshal data (set size out of range)");
1479
0
            }
1480
0
            break;
1481
0
        }
1482
1483
505
        if (n == 0 && type == TYPE_FROZENSET) {
1484
            /* call frozenset() to get the empty frozenset singleton */
1485
0
            v = _PyObject_CallNoArgs((PyObject*)&PyFrozenSet_Type);
1486
0
            if (v == NULL)
1487
0
                break;
1488
0
            R_REF(v);
1489
0
            retval = v;
1490
0
        }
1491
505
        else {
1492
505
            v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1493
505
            if (type == TYPE_SET) {
1494
0
                R_REF(v);
1495
505
            } else {
1496
                /* must use delayed registration of frozensets because they must
1497
                 * be init with a refcount of 1
1498
                 */
1499
505
                idx = r_ref_reserve(flag, p);
1500
505
                if (idx < 0)
1501
0
                    Py_CLEAR(v); /* signal error */
1502
505
            }
1503
505
            if (v == NULL)
1504
0
                break;
1505
1506
2.61k
            for (i = 0; i < n; i++) {
1507
2.11k
                v2 = r_object(p);
1508
2.11k
                if ( v2 == NULL ) {
1509
0
                    if (!PyErr_Occurred())
1510
0
                        PyErr_SetString(PyExc_TypeError,
1511
0
                            "NULL object in marshal data for set");
1512
0
                    Py_SETREF(v, NULL);
1513
0
                    break;
1514
0
                }
1515
2.11k
                if (PySet_Add(v, v2) == -1) {
1516
0
                    Py_DECREF(v);
1517
0
                    Py_DECREF(v2);
1518
0
                    v = NULL;
1519
0
                    break;
1520
0
                }
1521
2.11k
                Py_DECREF(v2);
1522
2.11k
            }
1523
505
            if (type != TYPE_SET)
1524
505
                v = r_ref_insert(v, idx, flag, p);
1525
505
            retval = v;
1526
505
        }
1527
505
        break;
1528
1529
170k
    case TYPE_CODE:
1530
170k
        {
1531
170k
            int argcount;
1532
170k
            int posonlyargcount;
1533
170k
            int kwonlyargcount;
1534
170k
            int stacksize;
1535
170k
            int flags;
1536
170k
            PyObject *code = NULL;
1537
170k
            PyObject *consts = NULL;
1538
170k
            PyObject *names = NULL;
1539
170k
            PyObject *localsplusnames = NULL;
1540
170k
            PyObject *localspluskinds = NULL;
1541
170k
            PyObject *filename = NULL;
1542
170k
            PyObject *name = NULL;
1543
170k
            PyObject *qualname = NULL;
1544
170k
            int firstlineno;
1545
170k
            PyObject* linetable = NULL;
1546
170k
            PyObject *exceptiontable = NULL;
1547
1548
170k
            if (!p->allow_code) {
1549
0
                PyErr_SetString(PyExc_ValueError,
1550
0
                                "unmarshalling code objects is disallowed");
1551
0
                break;
1552
0
            }
1553
170k
            idx = r_ref_reserve(flag, p);
1554
170k
            if (idx < 0)
1555
0
                break;
1556
1557
170k
            v = NULL;
1558
1559
            /* XXX ignore long->int overflows for now */
1560
170k
            argcount = (int)r_long(p);
1561
170k
            if (argcount == -1 && PyErr_Occurred())
1562
0
                goto code_error;
1563
170k
            posonlyargcount = (int)r_long(p);
1564
170k
            if (posonlyargcount == -1 && PyErr_Occurred()) {
1565
0
                goto code_error;
1566
0
            }
1567
170k
            kwonlyargcount = (int)r_long(p);
1568
170k
            if (kwonlyargcount == -1 && PyErr_Occurred())
1569
0
                goto code_error;
1570
170k
            stacksize = (int)r_long(p);
1571
170k
            if (stacksize == -1 && PyErr_Occurred())
1572
0
                goto code_error;
1573
170k
            flags = (int)r_long(p);
1574
170k
            if (flags == -1 && PyErr_Occurred())
1575
0
                goto code_error;
1576
170k
            code = r_object(p);
1577
170k
            if (code == NULL)
1578
0
                goto code_error;
1579
170k
            consts = r_object(p);
1580
170k
            if (consts == NULL)
1581
0
                goto code_error;
1582
170k
            names = r_object(p);
1583
170k
            if (names == NULL)
1584
0
                goto code_error;
1585
170k
            localsplusnames = r_object(p);
1586
170k
            if (localsplusnames == NULL)
1587
0
                goto code_error;
1588
170k
            localspluskinds = r_object(p);
1589
170k
            if (localspluskinds == NULL)
1590
0
                goto code_error;
1591
170k
            filename = r_object(p);
1592
170k
            if (filename == NULL)
1593
0
                goto code_error;
1594
170k
            name = r_object(p);
1595
170k
            if (name == NULL)
1596
0
                goto code_error;
1597
170k
            qualname = r_object(p);
1598
170k
            if (qualname == NULL)
1599
0
                goto code_error;
1600
170k
            firstlineno = (int)r_long(p);
1601
170k
            if (firstlineno == -1 && PyErr_Occurred())
1602
0
                break;
1603
170k
            linetable = r_object(p);
1604
170k
            if (linetable == NULL)
1605
0
                goto code_error;
1606
170k
            exceptiontable = r_object(p);
1607
170k
            if (exceptiontable == NULL)
1608
0
                goto code_error;
1609
1610
170k
            struct _PyCodeConstructor con = {
1611
170k
                .filename = filename,
1612
170k
                .name = name,
1613
170k
                .qualname = qualname,
1614
170k
                .flags = flags,
1615
1616
170k
                .code = code,
1617
170k
                .firstlineno = firstlineno,
1618
170k
                .linetable = linetable,
1619
1620
170k
                .consts = consts,
1621
170k
                .names = names,
1622
1623
170k
                .localsplusnames = localsplusnames,
1624
170k
                .localspluskinds = localspluskinds,
1625
1626
170k
                .argcount = argcount,
1627
170k
                .posonlyargcount = posonlyargcount,
1628
170k
                .kwonlyargcount = kwonlyargcount,
1629
1630
170k
                .stacksize = stacksize,
1631
1632
170k
                .exceptiontable = exceptiontable,
1633
170k
            };
1634
1635
170k
            if (_PyCode_Validate(&con) < 0) {
1636
0
                goto code_error;
1637
0
            }
1638
1639
170k
            v = (PyObject *)_PyCode_New(&con);
1640
170k
            if (v == NULL) {
1641
0
                goto code_error;
1642
0
            }
1643
1644
170k
            v = r_ref_insert(v, idx, flag, p);
1645
1646
170k
          code_error:
1647
170k
            if (v == NULL && !PyErr_Occurred()) {
1648
0
                PyErr_SetString(PyExc_TypeError,
1649
0
                    "NULL object in marshal data for code object");
1650
0
            }
1651
170k
            Py_XDECREF(code);
1652
170k
            Py_XDECREF(consts);
1653
170k
            Py_XDECREF(names);
1654
170k
            Py_XDECREF(localsplusnames);
1655
170k
            Py_XDECREF(localspluskinds);
1656
170k
            Py_XDECREF(filename);
1657
170k
            Py_XDECREF(name);
1658
170k
            Py_XDECREF(qualname);
1659
170k
            Py_XDECREF(linetable);
1660
170k
            Py_XDECREF(exceptiontable);
1661
170k
        }
1662
0
        retval = v;
1663
170k
        break;
1664
1665
1.88M
    case TYPE_REF:
1666
1.88M
        n = r_long(p);
1667
1.88M
        if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1668
0
            if (!PyErr_Occurred()) {
1669
0
                PyErr_SetString(PyExc_ValueError,
1670
0
                    "bad marshal data (invalid reference)");
1671
0
            }
1672
0
            break;
1673
0
        }
1674
1.88M
        v = PyList_GET_ITEM(p->refs, n);
1675
1.88M
        if (v == Py_None) {
1676
0
            PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1677
0
            break;
1678
0
        }
1679
1.88M
        retval = Py_NewRef(v);
1680
1.88M
        break;
1681
1682
3.34k
    case TYPE_SLICE:
1683
3.34k
    {
1684
3.34k
        Py_ssize_t idx = r_ref_reserve(flag, p);
1685
3.34k
        if (idx < 0) {
1686
0
            break;
1687
0
        }
1688
3.34k
        PyObject *stop = NULL;
1689
3.34k
        PyObject *step = NULL;
1690
3.34k
        PyObject *start = r_object(p);
1691
3.34k
        if (start == NULL) {
1692
0
            goto cleanup;
1693
0
        }
1694
3.34k
        stop = r_object(p);
1695
3.34k
        if (stop == NULL) {
1696
0
            goto cleanup;
1697
0
        }
1698
3.34k
        step = r_object(p);
1699
3.34k
        if (step == NULL) {
1700
0
            goto cleanup;
1701
0
        }
1702
3.34k
        retval = PySlice_New(start, stop, step);
1703
3.34k
        r_ref_insert(retval, idx, flag, p);
1704
3.34k
    cleanup:
1705
3.34k
        Py_XDECREF(start);
1706
3.34k
        Py_XDECREF(stop);
1707
3.34k
        Py_XDECREF(step);
1708
3.34k
        break;
1709
3.34k
    }
1710
1711
0
    default:
1712
        /* Bogus data got written, which isn't ideal.
1713
           This will let you keep working and recover. */
1714
0
        PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1715
0
        break;
1716
1717
4.46M
    }
1718
4.46M
    p->depth--;
1719
4.46M
    return retval;
1720
4.46M
}
1721
1722
static PyObject *
1723
read_object(RFILE *p)
1724
6.52k
{
1725
6.52k
    PyObject *v;
1726
6.52k
    if (PyErr_Occurred()) {
1727
0
        fprintf(stderr, "XXX readobject called with exception set\n");
1728
0
        return NULL;
1729
0
    }
1730
6.52k
    if (p->ptr && p->end) {
1731
6.52k
        if (PySys_Audit("marshal.loads", "y#", p->ptr, (Py_ssize_t)(p->end - p->ptr)) < 0) {
1732
0
            return NULL;
1733
0
        }
1734
6.52k
    } else if (p->fp || p->readable) {
1735
0
        if (PySys_Audit("marshal.load", NULL) < 0) {
1736
0
            return NULL;
1737
0
        }
1738
0
    }
1739
6.52k
    v = r_object(p);
1740
6.52k
    if (v == NULL && !PyErr_Occurred())
1741
0
        PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1742
6.52k
    return v;
1743
6.52k
}
1744
1745
int
1746
PyMarshal_ReadShortFromFile(FILE *fp)
1747
0
{
1748
0
    RFILE rf;
1749
0
    int res;
1750
0
    assert(fp);
1751
0
    rf.readable = NULL;
1752
0
    rf.fp = fp;
1753
0
    rf.end = rf.ptr = NULL;
1754
0
    rf.buf = NULL;
1755
0
    res = r_short(&rf);
1756
0
    if (rf.buf != NULL)
1757
0
        PyMem_Free(rf.buf);
1758
0
    return res;
1759
0
}
1760
1761
long
1762
PyMarshal_ReadLongFromFile(FILE *fp)
1763
0
{
1764
0
    RFILE rf;
1765
0
    long res;
1766
0
    rf.fp = fp;
1767
0
    rf.readable = NULL;
1768
0
    rf.ptr = rf.end = NULL;
1769
0
    rf.buf = NULL;
1770
0
    res = r_long(&rf);
1771
0
    if (rf.buf != NULL)
1772
0
        PyMem_Free(rf.buf);
1773
0
    return res;
1774
0
}
1775
1776
/* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1777
static off_t
1778
getfilesize(FILE *fp)
1779
0
{
1780
0
    struct _Py_stat_struct st;
1781
0
    if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1782
0
        return -1;
1783
#if SIZEOF_OFF_T == 4
1784
    else if (st.st_size >= INT_MAX)
1785
        return (off_t)INT_MAX;
1786
#endif
1787
0
    else
1788
0
        return (off_t)st.st_size;
1789
0
}
1790
1791
/* If we can get the size of the file up-front, and it's reasonably small,
1792
 * read it in one gulp and delegate to ...FromString() instead.  Much quicker
1793
 * than reading a byte at a time from file; speeds .pyc imports.
1794
 * CAUTION:  since this may read the entire remainder of the file, don't
1795
 * call it unless you know you're done with the file.
1796
 */
1797
PyObject *
1798
PyMarshal_ReadLastObjectFromFile(FILE *fp)
1799
0
{
1800
/* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1801
0
#define REASONABLE_FILE_LIMIT (1L << 18)
1802
0
    off_t filesize;
1803
0
    filesize = getfilesize(fp);
1804
0
    if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1805
0
        char* pBuf = (char *)PyMem_Malloc(filesize);
1806
0
        if (pBuf != NULL) {
1807
0
            size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1808
0
            PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1809
0
            PyMem_Free(pBuf);
1810
0
            return v;
1811
0
        }
1812
1813
0
    }
1814
    /* We don't have fstat, or we do but the file is larger than
1815
     * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1816
     */
1817
0
    return PyMarshal_ReadObjectFromFile(fp);
1818
1819
0
#undef REASONABLE_FILE_LIMIT
1820
0
}
1821
1822
PyObject *
1823
PyMarshal_ReadObjectFromFile(FILE *fp)
1824
0
{
1825
0
    RFILE rf;
1826
0
    PyObject *result;
1827
0
    rf.allow_code = 1;
1828
0
    rf.fp = fp;
1829
0
    rf.readable = NULL;
1830
0
    rf.depth = 0;
1831
0
    rf.ptr = rf.end = NULL;
1832
0
    rf.buf = NULL;
1833
0
    rf.refs = PyList_New(0);
1834
0
    if (rf.refs == NULL)
1835
0
        return NULL;
1836
0
    result = read_object(&rf);
1837
0
    Py_DECREF(rf.refs);
1838
0
    if (rf.buf != NULL)
1839
0
        PyMem_Free(rf.buf);
1840
0
    return result;
1841
0
}
1842
1843
PyObject *
1844
PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1845
520
{
1846
520
    RFILE rf;
1847
520
    PyObject *result;
1848
520
    rf.allow_code = 1;
1849
520
    rf.fp = NULL;
1850
520
    rf.readable = NULL;
1851
520
    rf.ptr = str;
1852
520
    rf.end = str + len;
1853
520
    rf.buf = NULL;
1854
520
    rf.depth = 0;
1855
520
    rf.refs = PyList_New(0);
1856
520
    if (rf.refs == NULL)
1857
0
        return NULL;
1858
520
    result = read_object(&rf);
1859
520
    Py_DECREF(rf.refs);
1860
520
    if (rf.buf != NULL)
1861
0
        PyMem_Free(rf.buf);
1862
520
    return result;
1863
520
}
1864
1865
static PyObject *
1866
_PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code)
1867
235
{
1868
235
    WFILE wf;
1869
1870
235
    if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
1871
0
        return NULL;
1872
0
    }
1873
235
    memset(&wf, 0, sizeof(wf));
1874
235
    wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1875
235
    if (wf.str == NULL)
1876
0
        return NULL;
1877
235
    wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str);
1878
235
    wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str);
1879
235
    wf.error = WFERR_OK;
1880
235
    wf.version = version;
1881
235
    wf.allow_code = allow_code;
1882
235
    if (w_init_refs(&wf, version)) {
1883
0
        Py_DECREF(wf.str);
1884
0
        return NULL;
1885
0
    }
1886
235
    w_object(x, &wf);
1887
235
    w_clear_refs(&wf);
1888
235
    if (wf.str != NULL) {
1889
235
        const char *base = PyBytes_AS_STRING(wf.str);
1890
235
        if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1891
0
            return NULL;
1892
235
    }
1893
235
    if (wf.error != WFERR_OK) {
1894
0
        Py_XDECREF(wf.str);
1895
0
        switch (wf.error) {
1896
0
        case WFERR_NOMEMORY:
1897
0
            PyErr_NoMemory();
1898
0
            break;
1899
0
        case WFERR_NESTEDTOODEEP:
1900
0
            PyErr_SetString(PyExc_ValueError,
1901
0
                            "object too deeply nested to marshal");
1902
0
            break;
1903
0
        case WFERR_CODE_NOT_ALLOWED:
1904
0
            PyErr_SetString(PyExc_ValueError,
1905
0
                            "marshalling code objects is disallowed");
1906
0
            break;
1907
0
        default:
1908
0
        case WFERR_UNMARSHALLABLE:
1909
0
            PyErr_SetString(PyExc_ValueError,
1910
0
                            "unmarshallable object");
1911
0
            break;
1912
0
        }
1913
0
        return NULL;
1914
0
    }
1915
235
    return wf.str;
1916
235
}
1917
1918
PyObject *
1919
PyMarshal_WriteObjectToString(PyObject *x, int version)
1920
0
{
1921
0
    return _PyMarshal_WriteObjectToString(x, version, 1);
1922
0
}
1923
1924
/* And an interface for Python programs... */
1925
/*[clinic input]
1926
marshal.dump
1927
1928
    value: object
1929
        Must be a supported type.
1930
    file: object
1931
        Must be a writeable binary file.
1932
    version: int(c_default="Py_MARSHAL_VERSION") = version
1933
        Indicates the data format that dump should use.
1934
    /
1935
    *
1936
    allow_code: bool = True
1937
        Allow to write code objects.
1938
1939
Write the value on the open file.
1940
1941
If the value has (or contains an object that has) an unsupported type, a
1942
ValueError exception is raised - but garbage data will also be written
1943
to the file. The object will not be properly read back by load().
1944
[clinic start generated code]*/
1945
1946
static PyObject *
1947
marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1948
                  int version, int allow_code)
1949
/*[clinic end generated code: output=429e5fd61c2196b9 input=041f7f6669b0aafb]*/
1950
0
{
1951
    /* XXX Quick hack -- need to do this differently */
1952
0
    PyObject *s;
1953
0
    PyObject *res;
1954
1955
0
    s = _PyMarshal_WriteObjectToString(value, version, allow_code);
1956
0
    if (s == NULL)
1957
0
        return NULL;
1958
0
    res = PyObject_CallMethodOneArg(file, &_Py_ID(write), s);
1959
0
    Py_DECREF(s);
1960
0
    return res;
1961
0
}
1962
1963
/*[clinic input]
1964
marshal.load
1965
1966
    file: object
1967
        Must be readable binary file.
1968
    /
1969
    *
1970
    allow_code: bool = True
1971
        Allow to load code objects.
1972
1973
Read one value from the open file and return it.
1974
1975
If no valid value is read (e.g. because the data has a different Python
1976
version's incompatible marshal format), raise EOFError, ValueError or
1977
TypeError.
1978
1979
Note: If an object containing an unsupported type was marshalled with
1980
dump(), load() will substitute None for the unmarshallable type.
1981
[clinic start generated code]*/
1982
1983
static PyObject *
1984
marshal_load_impl(PyObject *module, PyObject *file, int allow_code)
1985
/*[clinic end generated code: output=0c1aaf3546ae3ed3 input=2dca7b570653b82f]*/
1986
0
{
1987
0
    PyObject *data, *result;
1988
0
    RFILE rf;
1989
1990
    /*
1991
     * Make a call to the read method, but read zero bytes.
1992
     * This is to ensure that the object passed in at least
1993
     * has a read method which returns bytes.
1994
     * This can be removed if we guarantee good error handling
1995
     * for r_string()
1996
     */
1997
0
    data = _PyObject_CallMethod(file, &_Py_ID(read), "i", 0);
1998
0
    if (data == NULL)
1999
0
        return NULL;
2000
0
    if (!PyBytes_Check(data)) {
2001
0
        PyErr_Format(PyExc_TypeError,
2002
0
                     "file.read() returned not bytes but %.100s",
2003
0
                     Py_TYPE(data)->tp_name);
2004
0
        result = NULL;
2005
0
    }
2006
0
    else {
2007
0
        rf.allow_code = allow_code;
2008
0
        rf.depth = 0;
2009
0
        rf.fp = NULL;
2010
0
        rf.readable = file;
2011
0
        rf.ptr = rf.end = NULL;
2012
0
        rf.buf = NULL;
2013
0
        if ((rf.refs = PyList_New(0)) != NULL) {
2014
0
            result = read_object(&rf);
2015
0
            Py_DECREF(rf.refs);
2016
0
            if (rf.buf != NULL)
2017
0
                PyMem_Free(rf.buf);
2018
0
        } else
2019
0
            result = NULL;
2020
0
    }
2021
0
    Py_DECREF(data);
2022
0
    return result;
2023
0
}
2024
2025
/*[clinic input]
2026
@permit_long_summary
2027
@permit_long_docstring_body
2028
marshal.dumps
2029
2030
    value: object
2031
        Must be a supported type.
2032
    version: int(c_default="Py_MARSHAL_VERSION") = version
2033
        Indicates the data format that dumps should use.
2034
    /
2035
    *
2036
    allow_code: bool = True
2037
        Allow to write code objects.
2038
2039
Return the bytes object that would be written to a file by dump(value, file).
2040
2041
Raise a ValueError exception if value has (or contains an object that has) an
2042
unsupported type.
2043
[clinic start generated code]*/
2044
2045
static PyObject *
2046
marshal_dumps_impl(PyObject *module, PyObject *value, int version,
2047
                   int allow_code)
2048
/*[clinic end generated code: output=115f90da518d1d49 input=80cd3f30c1637ade]*/
2049
170
{
2050
170
    return _PyMarshal_WriteObjectToString(value, version, allow_code);
2051
170
}
2052
2053
/*[clinic input]
2054
marshal.loads
2055
2056
    bytes: Py_buffer
2057
    /
2058
    *
2059
    allow_code: bool = True
2060
        Allow to load code objects.
2061
2062
Convert the bytes-like object to a value.
2063
2064
If no valid value is found, raise EOFError, ValueError or TypeError.  Extra
2065
bytes in the input are ignored.
2066
[clinic start generated code]*/
2067
2068
static PyObject *
2069
marshal_loads_impl(PyObject *module, Py_buffer *bytes, int allow_code)
2070
/*[clinic end generated code: output=62c0c538d3edc31f input=14de68965b45aaa7]*/
2071
6.00k
{
2072
6.00k
    RFILE rf;
2073
6.00k
    char *s = bytes->buf;
2074
6.00k
    Py_ssize_t n = bytes->len;
2075
6.00k
    PyObject* result;
2076
6.00k
    rf.allow_code = allow_code;
2077
6.00k
    rf.fp = NULL;
2078
6.00k
    rf.readable = NULL;
2079
6.00k
    rf.ptr = s;
2080
6.00k
    rf.end = s + n;
2081
6.00k
    rf.depth = 0;
2082
6.00k
    if ((rf.refs = PyList_New(0)) == NULL)
2083
0
        return NULL;
2084
6.00k
    result = read_object(&rf);
2085
6.00k
    Py_DECREF(rf.refs);
2086
6.00k
    return result;
2087
6.00k
}
2088
2089
static PyMethodDef marshal_methods[] = {
2090
    MARSHAL_DUMP_METHODDEF
2091
    MARSHAL_LOAD_METHODDEF
2092
    MARSHAL_DUMPS_METHODDEF
2093
    MARSHAL_LOADS_METHODDEF
2094
    {NULL,              NULL}           /* sentinel */
2095
};
2096
2097
2098
PyDoc_STRVAR(module_doc,
2099
"This module contains functions that can read and write Python values in\n\
2100
a binary format. The format is specific to Python, but independent of\n\
2101
machine architecture issues.\n\
2102
\n\
2103
Not all Python object types are supported; in general, only objects\n\
2104
whose value is independent from a particular invocation of Python can be\n\
2105
written and read by this module. The following types are supported:\n\
2106
None, integers, floating-point numbers, strings, bytes, bytearrays,\n\
2107
tuples, lists, sets, dictionaries, and code objects, where it\n\
2108
should be understood that tuples, lists and dictionaries are only\n\
2109
supported as long as the values contained therein are themselves\n\
2110
supported; and recursive lists and dictionaries should not be written\n\
2111
(they will cause infinite loops).\n\
2112
\n\
2113
Variables:\n\
2114
\n\
2115
version -- indicates the format that the module uses. Version 0 is the\n\
2116
    historical format, version 1 shares interned strings and version 2\n\
2117
    uses a binary format for floating-point numbers.\n\
2118
    Version 3 shares common object references (New in version 3.4).\n\
2119
\n\
2120
Functions:\n\
2121
\n\
2122
dump() -- write value to a file\n\
2123
load() -- read value from a file\n\
2124
dumps() -- marshal value as a bytes object\n\
2125
loads() -- read value from a bytes-like object");
2126
2127
2128
static int
2129
marshal_module_exec(PyObject *mod)
2130
36
{
2131
36
    if (PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION) < 0) {
2132
0
        return -1;
2133
0
    }
2134
36
    return 0;
2135
36
}
2136
2137
static PyModuleDef_Slot marshalmodule_slots[] = {
2138
     _Py_ABI_SLOT,
2139
    {Py_mod_exec, marshal_module_exec},
2140
    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
2141
    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
2142
    {0, NULL}
2143
};
2144
2145
static struct PyModuleDef marshalmodule = {
2146
    PyModuleDef_HEAD_INIT,
2147
    .m_name = "marshal",
2148
    .m_doc = module_doc,
2149
    .m_methods = marshal_methods,
2150
    .m_slots = marshalmodule_slots,
2151
};
2152
2153
PyMODINIT_FUNC
2154
PyMarshal_Init(void)
2155
36
{
2156
36
    return PyModuleDef_Init(&marshalmodule);
2157
36
}