Coverage Report

Created: 2026-03-08 06:40

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/marshal.c
Line
Count
Source
1
2
/* Write Python objects to files and read them back.
3
   This is primarily intended for writing and reading compiled Python code,
4
   even though dicts, lists, sets and frozensets, not commonly seen in
5
   code objects, are supported.
6
   Version 3 of this protocol properly supports circular links
7
   and sharing. */
8
9
#include "Python.h"
10
#include "pycore_call.h"             // _PyObject_CallNoArgs()
11
#include "pycore_code.h"             // _PyCode_New()
12
#include "pycore_hashtable.h"        // _Py_hashtable_t
13
#include "pycore_long.h"             // _PyLong_IsZero()
14
#include "pycore_object.h"           // _PyObject_IsUniquelyReferenced
15
#include "pycore_pystate.h"          // _PyInterpreterState_GET()
16
#include "pycore_setobject.h"        // _PySet_NextEntryRef()
17
#include "pycore_unicodeobject.h"    // _PyUnicode_InternImmortal()
18
19
#include "marshal.h"                 // Py_MARSHAL_VERSION
20
21
#ifdef __APPLE__
22
#  include "TargetConditionals.h"
23
#endif /* __APPLE__ */
24
25
26
/*[clinic input]
27
module marshal
28
[clinic start generated code]*/
29
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
30
31
#include "clinic/marshal.c.h"
32
33
/* High water mark to determine when the marshalled object is dangerously deep
34
 * and risks coring the interpreter.  When the object stack gets this deep,
35
 * raise an exception instead of continuing.
36
 * On Windows debug builds, reduce this value.
37
 *
38
 * BUG: https://bugs.python.org/issue33720
39
 * On Windows PGO builds, the r_object function overallocates its stack and
40
 * can cause a stack overflow. We reduce the maximum depth for all Windows
41
 * releases to protect against this.
42
 * #if defined(MS_WINDOWS) && defined(Py_DEBUG)
43
 */
44
#if defined(MS_WINDOWS)
45
#  define MAX_MARSHAL_STACK_DEPTH 1000
46
#elif defined(__wasi__)
47
#  define MAX_MARSHAL_STACK_DEPTH 1500
48
// TARGET_OS_IPHONE covers any non-macOS Apple platform.
49
// It won't be defined on older macOS SDKs
50
#elif defined(__APPLE__) && defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
51
#  define MAX_MARSHAL_STACK_DEPTH 1500
52
#else
53
5.15M
#  define MAX_MARSHAL_STACK_DEPTH 2000
54
#endif
55
56
/* Supported types */
57
0
#define TYPE_NULL               '0'
58
108k
#define TYPE_NONE               'N'
59
13.6k
#define TYPE_FALSE              'F'
60
12.8k
#define TYPE_TRUE               'T'
61
0
#define TYPE_STOPITER           'S'
62
399
#define TYPE_ELLIPSIS           '.'
63
695
#define TYPE_BINARY_FLOAT       'g'  // Version 0 uses TYPE_FLOAT instead.
64
3
#define TYPE_BINARY_COMPLEX     'y'  // Version 0 uses TYPE_COMPLEX instead.
65
432
#define TYPE_LONG               'l'  // See also TYPE_INT.
66
577k
#define TYPE_STRING             's'  // Bytes. (Name comes from Python 2.)
67
66
#define TYPE_TUPLE              '('  // See also TYPE_SMALL_TUPLE.
68
0
#define TYPE_LIST               '['
69
0
#define TYPE_DICT               '{'
70
0
#define TYPE_FROZENDICT         '}'
71
191k
#define TYPE_CODE               'c'
72
3.54k
#define TYPE_UNICODE            'u'
73
#define TYPE_UNKNOWN            '?'
74
// added in version 2:
75
1.60k
#define TYPE_SET                '<'
76
534
#define TYPE_FROZENSET          '>'
77
// added in version 5:
78
3.30k
#define TYPE_SLICE              ':'
79
// Remember to update the version and documentation when adding new types.
80
81
/* Special cases for unicode strings (added in version 4) */
82
202
#define TYPE_INTERNED           't' // Version 1+
83
30.9k
#define TYPE_ASCII              'a'
84
0
#define TYPE_ASCII_INTERNED     'A'
85
1.46M
#define TYPE_SHORT_ASCII        'z'
86
1.28M
#define TYPE_SHORT_ASCII_INTERNED 'Z'
87
88
/* Special cases for small objects */
89
25.2k
#define TYPE_INT                'i'  // All versions. 32-bit encoding.
90
524k
#define TYPE_SMALL_TUPLE        ')'  // Version 4+
91
92
/* Supported for backwards compatibility */
93
0
#define TYPE_COMPLEX            'x'  // Generated for version 0 only.
94
0
#define TYPE_FLOAT              'f'  // Generated for version 0 only.
95
0
#define TYPE_INT64              'I'  // Not generated any more.
96
97
/* References (added in version 3) */
98
2.11M
#define TYPE_REF                'r'
99
10.1M
#define FLAG_REF                '\x80' /* with a type, add obj to index */
100
101
102
// Error codes:
103
77.7k
#define WFERR_OK 0
104
22
#define WFERR_UNMARSHALLABLE 1
105
0
#define WFERR_NESTEDTOODEEP 2
106
11
#define WFERR_NOMEMORY 3
107
0
#define WFERR_CODE_NOT_ALLOWED 4
108
109
typedef struct {
110
    FILE *fp;
111
    int error;  /* see WFERR_* values */
112
    int depth;
113
    PyObject *str;
114
    char *ptr;
115
    const char *end;
116
    char *buf;
117
    _Py_hashtable_t *hashtable;
118
    int version;
119
    int allow_code;
120
} WFILE;
121
122
369k
#define w_byte(c, p) do {                               \
123
369k
        if ((p)->ptr != (p)->end || w_reserve((p), 1))  \
124
369k
            *(p)->ptr++ = (c);                          \
125
369k
    } while(0)
126
127
static void
128
w_flush(WFILE *p)
129
0
{
130
0
    assert(p->fp != NULL);
131
0
    fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
132
0
    p->ptr = p->buf;
133
0
}
134
135
static int
136
w_reserve(WFILE *p, Py_ssize_t needed)
137
522
{
138
522
    Py_ssize_t pos, size, delta;
139
522
    if (p->ptr == NULL)
140
0
        return 0; /* An error already occurred */
141
522
    if (p->fp != NULL) {
142
0
        w_flush(p);
143
0
        return needed <= p->end - p->ptr;
144
0
    }
145
522
    assert(p->str != NULL);
146
522
    pos = p->ptr - p->buf;
147
522
    size = PyBytes_GET_SIZE(p->str);
148
522
    if (size > 16*1024*1024)
149
0
        delta = (size >> 3);            /* 12.5% overallocation */
150
522
    else
151
522
        delta = size + 1024;
152
522
    delta = Py_MAX(delta, needed);
153
522
    if (delta > PY_SSIZE_T_MAX - size) {
154
0
        p->error = WFERR_NOMEMORY;
155
0
        return 0;
156
0
    }
157
522
    size += delta;
158
522
    if (_PyBytes_Resize(&p->str, size) != 0) {
159
0
        p->end = p->ptr = p->buf = NULL;
160
0
        return 0;
161
0
    }
162
522
    else {
163
522
        p->buf = PyBytes_AS_STRING(p->str);
164
522
        p->ptr = p->buf + pos;
165
522
        p->end = p->buf + size;
166
522
        return 1;
167
522
    }
168
522
}
169
170
static void
171
w_string(const void *s, Py_ssize_t n, WFILE *p)
172
25.8k
{
173
25.8k
    Py_ssize_t m;
174
25.8k
    if (!n || p->ptr == NULL)
175
204
        return;
176
25.6k
    m = p->end - p->ptr;
177
25.6k
    if (p->fp != NULL) {
178
0
        if (n <= m) {
179
0
            memcpy(p->ptr, s, n);
180
0
            p->ptr += n;
181
0
        }
182
0
        else {
183
0
            w_flush(p);
184
0
            fwrite(s, 1, n, p->fp);
185
0
        }
186
0
    }
187
25.6k
    else {
188
25.6k
        if (n <= m || w_reserve(p, n - m)) {
189
25.6k
            memcpy(p->ptr, s, n);
190
25.6k
            p->ptr += n;
191
25.6k
        }
192
25.6k
    }
193
25.6k
}
194
195
static void
196
w_short(int x, WFILE *p)
197
18
{
198
18
    w_byte((char)( x      & 0xff), p);
199
18
    w_byte((char)((x>> 8) & 0xff), p);
200
18
}
201
202
static void
203
w_long(long x, WFILE *p)
204
67.0k
{
205
67.0k
    w_byte((char)( x      & 0xff), p);
206
67.0k
    w_byte((char)((x>> 8) & 0xff), p);
207
67.0k
    w_byte((char)((x>>16) & 0xff), p);
208
67.0k
    w_byte((char)((x>>24) & 0xff), p);
209
67.0k
}
210
211
623k
#define SIZE32_MAX  0x7FFFFFFF
212
213
#if SIZEOF_SIZE_T > 4
214
9.70k
# define W_SIZE(n, p)  do {                     \
215
9.70k
        if ((n) > SIZE32_MAX) {                 \
216
0
            (p)->depth--;                       \
217
0
            (p)->error = WFERR_UNMARSHALLABLE;  \
218
0
            return;                             \
219
0
        }                                       \
220
9.70k
        w_long((long)(n), p);                   \
221
9.70k
    } while(0)
222
#else
223
# define W_SIZE  w_long
224
#endif
225
226
static void
227
w_pstring(const void *s, Py_ssize_t n, WFILE *p)
228
9.69k
{
229
9.69k
        W_SIZE(n, p);
230
9.69k
        w_string(s, n, p);
231
9.69k
}
232
233
static void
234
w_short_pstring(const void *s, Py_ssize_t n, WFILE *p)
235
16.1k
{
236
16.1k
    w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
237
16.1k
    w_string(s, n, p);
238
16.1k
}
239
240
/* We assume that Python ints are stored internally in base some power of
241
   2**15; for the sake of portability we'll always read and write them in base
242
   exactly 2**15. */
243
244
3.80k
#define PyLong_MARSHAL_SHIFT 15
245
1.68k
#define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
246
18
#define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
247
248
38.9k
#define W_TYPE(t, p) do { \
249
38.9k
    w_byte((t) | flag, (p)); \
250
38.9k
} while(0)
251
252
static PyObject *
253
_PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code);
254
255
#define _r_digits(bitsize)                                                \
256
static void                                                               \
257
_r_digits##bitsize(const uint ## bitsize ## _t *digits, Py_ssize_t n,     \
258
2
                   uint8_t negative, Py_ssize_t marshal_ratio, WFILE *p)  \
259
2
{                                                                         \
260
2
    /* set l to number of base PyLong_MARSHAL_BASE digits */              \
261
2
    Py_ssize_t l = (n - 1)*marshal_ratio;                                 \
262
2
    uint ## bitsize ## _t d = digits[n - 1];                              \
263
2
                                                                          \
264
2
    assert(marshal_ratio > 0);                                            \
265
2
    assert(n >= 1);                                                       \
266
2
    assert(d != 0); /* a PyLong is always normalized */                   \
267
2
    do {                                                                  \
268
2
        d >>= PyLong_MARSHAL_SHIFT;                                       \
269
2
        l++;                                                              \
270
2
    } while (d != 0);                                                     \
271
2
    if (l > SIZE32_MAX) {                                                 \
272
0
        p->depth--;                                                       \
273
0
        p->error = WFERR_UNMARSHALLABLE;                                  \
274
0
        return;                                                           \
275
0
    }                                                                     \
276
2
    w_long((long)(negative ? -l : l), p);                                 \
277
2
                                                                          \
278
6
    for (Py_ssize_t i = 0; i < n - 1; i++) {                              \
279
4
        d = digits[i];                                                    \
280
12
        for (Py_ssize_t j = 0; j < marshal_ratio; j++) {                  \
281
8
            w_short(d & PyLong_MARSHAL_MASK, p);                          \
282
8
            d >>= PyLong_MARSHAL_SHIFT;                                   \
283
8
        }                                                                 \
284
4
        assert(d == 0);                                                   \
285
4
    }                                                                     \
286
2
    d = digits[n - 1];                                                    \
287
2
    do {                                                                  \
288
2
        w_short(d & PyLong_MARSHAL_MASK, p);                              \
289
2
        d >>= PyLong_MARSHAL_SHIFT;                                       \
290
2
    } while (d != 0);                                                     \
291
2
}
292
0
_r_digits(16)
293
2
_r_digits(32)
294
#undef _r_digits
295
296
static void
297
w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
298
4
{
299
4
    W_TYPE(TYPE_LONG, p);
300
4
    if (_PyLong_IsZero(ob)) {
301
0
        w_long((long)0, p);
302
0
        return;
303
0
    }
304
305
4
    PyLongExport long_export;
306
307
4
    if (PyLong_Export((PyObject *)ob, &long_export) < 0) {
308
0
        p->depth--;
309
0
        p->error = WFERR_UNMARSHALLABLE;
310
0
        return;
311
0
    }
312
4
    if (!long_export.digits) {
313
2
        int8_t sign = long_export.value < 0 ? -1 : 1;
314
2
        uint64_t abs_value = _Py_ABS_CAST(uint64_t, long_export.value);
315
2
        uint64_t d = abs_value;
316
2
        long l = 0;
317
318
        /* set l to number of base PyLong_MARSHAL_BASE digits */
319
8
        do {
320
8
            d >>= PyLong_MARSHAL_SHIFT;
321
8
            l += sign;
322
8
        } while (d);
323
2
        w_long(l, p);
324
325
2
        d = abs_value;
326
8
        do {
327
8
            w_short(d & PyLong_MARSHAL_MASK, p);
328
8
            d >>= PyLong_MARSHAL_SHIFT;
329
8
        } while (d);
330
2
        return;
331
2
    }
332
333
2
    const PyLongLayout *layout = PyLong_GetNativeLayout();
334
2
    Py_ssize_t marshal_ratio = layout->bits_per_digit/PyLong_MARSHAL_SHIFT;
335
336
    /* must be a multiple of PyLong_MARSHAL_SHIFT */
337
2
    assert(layout->bits_per_digit % PyLong_MARSHAL_SHIFT == 0);
338
2
    assert(layout->bits_per_digit >= PyLong_MARSHAL_SHIFT);
339
340
    /* other assumptions on PyLongObject internals */
341
2
    assert(layout->bits_per_digit <= 32);
342
2
    assert(layout->digits_order == -1);
343
2
    assert(layout->digit_endianness == (PY_LITTLE_ENDIAN ? -1 : 1));
344
2
    assert(layout->digit_size == 2 || layout->digit_size == 4);
345
346
2
    if (layout->digit_size == 4) {
347
2
        _r_digits32(long_export.digits, long_export.ndigits,
348
2
                    long_export.negative, marshal_ratio, p);
349
2
    }
350
0
    else {
351
0
        _r_digits16(long_export.digits, long_export.ndigits,
352
0
                    long_export.negative, marshal_ratio, p);
353
0
    }
354
2
    PyLong_FreeExport(&long_export);
355
2
}
356
357
static void
358
w_float_bin(double v, WFILE *p)
359
15
{
360
15
    char buf[8];
361
15
    if (PyFloat_Pack8(v, buf, 1) < 0) {
362
0
        p->error = WFERR_UNMARSHALLABLE;
363
0
        return;
364
0
    }
365
15
    w_string(buf, 8, p);
366
15
}
367
368
static void
369
w_float_str(double v, WFILE *p)
370
0
{
371
0
    char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
372
0
    if (!buf) {
373
0
        p->error = WFERR_NOMEMORY;
374
0
        return;
375
0
    }
376
0
    w_short_pstring(buf, strlen(buf), p);
377
0
    PyMem_Free(buf);
378
0
}
379
380
static int
381
w_ref(PyObject *v, char *flag, WFILE *p)
382
74.7k
{
383
74.7k
    _Py_hashtable_entry_t *entry;
384
74.7k
    int w;
385
386
74.7k
    if (p->version < 3 || p->hashtable == NULL)
387
0
        return 0; /* not writing object references */
388
389
    /* If it has only one reference, it definitely isn't shared.
390
     * But we use TYPE_REF always for interned string, to PYC file stable
391
     * as possible.
392
     */
393
74.7k
    if (_PyObject_IsUniquelyReferenced(v) &&
394
20.1k
            !(PyUnicode_CheckExact(v) && PyUnicode_CHECK_INTERNED(v))) {
395
18.5k
        return 0;
396
18.5k
    }
397
398
56.1k
    entry = _Py_hashtable_get_entry(p->hashtable, v);
399
56.1k
    if (entry != NULL) {
400
        /* write the reference index to the stream */
401
35.8k
        w = (int)(uintptr_t)entry->value;
402
        /* we don't store "long" indices in the dict */
403
35.8k
        assert(0 <= w && w <= 0x7fffffff);
404
35.8k
        w_byte(TYPE_REF, p);
405
35.8k
        w_long(w, p);
406
35.8k
        return 1;
407
35.8k
    } else {
408
20.3k
        size_t s = p->hashtable->nentries;
409
        /* we don't support long indices */
410
20.3k
        if (s >= 0x7fffffff) {
411
0
            PyErr_SetString(PyExc_ValueError, "too many objects");
412
0
            goto err;
413
0
        }
414
20.3k
        w = (int)s;
415
20.3k
        if (_Py_hashtable_set(p->hashtable, Py_NewRef(v),
416
20.3k
                              (void *)(uintptr_t)w) < 0) {
417
0
            Py_DECREF(v);
418
0
            goto err;
419
0
        }
420
20.3k
        *flag |= FLAG_REF;
421
20.3k
        return 0;
422
20.3k
    }
423
0
err:
424
0
    p->error = WFERR_UNMARSHALLABLE;
425
0
    return 1;
426
56.1k
}
427
428
static void
429
w_complex_object(PyObject *v, char flag, WFILE *p);
430
431
static void
432
w_object(PyObject *v, WFILE *p)
433
77.3k
{
434
77.3k
    char flag = '\0';
435
436
77.3k
    if (p->error != WFERR_OK) {
437
0
        return;
438
0
    }
439
440
77.3k
    p->depth++;
441
442
77.3k
    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
443
0
        p->error = WFERR_NESTEDTOODEEP;
444
0
    }
445
77.3k
    else if (v == NULL) {
446
0
        w_byte(TYPE_NULL, p);
447
0
    }
448
77.3k
    else if (v == Py_None) {
449
2.00k
        w_byte(TYPE_NONE, p);
450
2.00k
    }
451
75.3k
    else if (v == PyExc_StopIteration) {
452
0
        w_byte(TYPE_STOPITER, p);
453
0
    }
454
75.3k
    else if (v == Py_Ellipsis) {
455
3
        w_byte(TYPE_ELLIPSIS, p);
456
3
    }
457
75.2k
    else if (v == Py_False) {
458
343
        w_byte(TYPE_FALSE, p);
459
343
    }
460
74.9k
    else if (v == Py_True) {
461
227
        w_byte(TYPE_TRUE, p);
462
227
    }
463
74.7k
    else if (!w_ref(v, &flag, p))
464
38.9k
        w_complex_object(v, flag, p);
465
466
77.3k
    p->depth--;
467
77.3k
}
468
469
static void
470
w_complex_object(PyObject *v, char flag, WFILE *p)
471
38.9k
{
472
38.9k
    Py_ssize_t i, n;
473
474
38.9k
    if (PyLong_CheckExact(v)) {
475
1.88k
        int overflow;
476
1.88k
        long x = PyLong_AsLongAndOverflow(v, &overflow);
477
1.88k
        if (overflow) {
478
2
            w_PyLong((PyLongObject *)v, flag, p);
479
2
        }
480
1.88k
        else {
481
1.88k
#if SIZEOF_LONG > 4
482
1.88k
            long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
483
1.88k
            if (y && y != -1) {
484
                /* Too large for TYPE_INT */
485
2
                w_PyLong((PyLongObject*)v, flag, p);
486
2
            }
487
1.87k
            else
488
1.87k
#endif
489
1.87k
            {
490
1.87k
                W_TYPE(TYPE_INT, p);
491
1.87k
                w_long(x, p);
492
1.87k
            }
493
1.88k
        }
494
1.88k
    }
495
37.0k
    else if (PyFloat_CheckExact(v)) {
496
13
        if (p->version > 1) {
497
13
            W_TYPE(TYPE_BINARY_FLOAT, p);
498
13
            w_float_bin(PyFloat_AS_DOUBLE(v), p);
499
13
        }
500
0
        else {
501
0
            W_TYPE(TYPE_FLOAT, p);
502
0
            w_float_str(PyFloat_AS_DOUBLE(v), p);
503
0
        }
504
13
    }
505
37.0k
    else if (PyComplex_CheckExact(v)) {
506
1
        if (p->version > 1) {
507
1
            W_TYPE(TYPE_BINARY_COMPLEX, p);
508
1
            w_float_bin(PyComplex_RealAsDouble(v), p);
509
1
            w_float_bin(PyComplex_ImagAsDouble(v), p);
510
1
        }
511
0
        else {
512
0
            W_TYPE(TYPE_COMPLEX, p);
513
0
            w_float_str(PyComplex_RealAsDouble(v), p);
514
0
            w_float_str(PyComplex_ImagAsDouble(v), p);
515
0
        }
516
1
    }
517
37.0k
    else if (PyBytes_CheckExact(v)) {
518
9.42k
        W_TYPE(TYPE_STRING, p);
519
9.42k
        w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
520
9.42k
    }
521
27.6k
    else if (PyUnicode_CheckExact(v)) {
522
16.3k
        if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
523
16.3k
            int is_short = PyUnicode_GET_LENGTH(v) < 256;
524
16.3k
            if (is_short) {
525
16.1k
                if (PyUnicode_CHECK_INTERNED(v))
526
14.3k
                    W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
527
1.73k
                else
528
1.73k
                    W_TYPE(TYPE_SHORT_ASCII, p);
529
16.1k
                w_short_pstring(PyUnicode_1BYTE_DATA(v),
530
16.1k
                                PyUnicode_GET_LENGTH(v), p);
531
16.1k
            }
532
202
            else {
533
202
                if (PyUnicode_CHECK_INTERNED(v))
534
0
                    W_TYPE(TYPE_ASCII_INTERNED, p);
535
202
                else
536
202
                    W_TYPE(TYPE_ASCII, p);
537
202
                w_pstring(PyUnicode_1BYTE_DATA(v),
538
202
                          PyUnicode_GET_LENGTH(v), p);
539
202
            }
540
16.3k
        }
541
66
        else {
542
66
            PyObject *utf8;
543
66
            utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
544
66
            if (utf8 == NULL) {
545
0
                p->depth--;
546
0
                p->error = WFERR_UNMARSHALLABLE;
547
0
                return;
548
0
            }
549
66
            if (p->version >= 3 &&  PyUnicode_CHECK_INTERNED(v))
550
0
                W_TYPE(TYPE_INTERNED, p);
551
66
            else
552
66
                W_TYPE(TYPE_UNICODE, p);
553
66
            w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
554
66
            Py_DECREF(utf8);
555
66
        }
556
16.3k
    }
557
11.2k
    else if (PyTuple_CheckExact(v)) {
558
7.90k
        n = PyTuple_GET_SIZE(v);
559
7.90k
        if (p->version >= 4 && n < 256) {
560
7.90k
            W_TYPE(TYPE_SMALL_TUPLE, p);
561
7.90k
            w_byte((unsigned char)n, p);
562
7.90k
        }
563
0
        else {
564
0
            W_TYPE(TYPE_TUPLE, p);
565
0
            W_SIZE(n, p);
566
0
        }
567
52.0k
        for (i = 0; i < n; i++) {
568
44.1k
            w_object(PyTuple_GET_ITEM(v, i), p);
569
44.1k
        }
570
7.90k
    }
571
3.31k
    else if (PyList_CheckExact(v)) {
572
0
        W_TYPE(TYPE_LIST, p);
573
0
        n = PyList_GET_SIZE(v);
574
0
        W_SIZE(n, p);
575
0
        for (i = 0; i < n; i++) {
576
0
            w_object(PyList_GET_ITEM(v, i), p);
577
0
        }
578
0
    }
579
3.31k
    else if (PyAnyDict_CheckExact(v)) {
580
0
        Py_ssize_t pos;
581
0
        PyObject *key, *value;
582
0
        if (PyFrozenDict_CheckExact(v)) {
583
0
            if (p->version < 6) {
584
0
                w_byte(TYPE_UNKNOWN, p);
585
0
                p->error = WFERR_UNMARSHALLABLE;
586
0
                return;
587
0
            }
588
589
0
            W_TYPE(TYPE_FROZENDICT, p);
590
0
        }
591
0
        else {
592
0
            W_TYPE(TYPE_DICT, p);
593
0
        }
594
        /* This one is NULL object terminated! */
595
0
        pos = 0;
596
0
        while (PyDict_Next(v, &pos, &key, &value)) {
597
0
            w_object(key, p);
598
0
            w_object(value, p);
599
0
        }
600
0
        w_object((PyObject *)NULL, p);
601
0
    }
602
3.31k
    else if (PyAnySet_CheckExact(v)) {
603
11
        PyObject *value;
604
11
        Py_ssize_t pos = 0;
605
11
        Py_hash_t hash;
606
607
11
        if (PyFrozenSet_CheckExact(v))
608
11
            W_TYPE(TYPE_FROZENSET, p);
609
0
        else
610
0
            W_TYPE(TYPE_SET, p);
611
11
        n = PySet_GET_SIZE(v);
612
11
        W_SIZE(n, p);
613
        // bpo-37596: To support reproducible builds, sets and frozensets need
614
        // to have their elements serialized in a consistent order (even when
615
        // they have been scrambled by hash randomization). To ensure this, we
616
        // use an order equivalent to sorted(v, key=marshal.dumps):
617
11
        PyObject *pairs = PyList_New(n);
618
11
        if (pairs == NULL) {
619
0
            p->error = WFERR_NOMEMORY;
620
0
            return;
621
0
        }
622
11
        Py_ssize_t i = 0;
623
11
        Py_BEGIN_CRITICAL_SECTION(v);
624
76
        while (_PySet_NextEntryRef(v, &pos, &value, &hash)) {
625
65
            PyObject *dump = _PyMarshal_WriteObjectToString(value,
626
65
                                    p->version, p->allow_code);
627
65
            if (dump == NULL) {
628
0
                p->error = WFERR_UNMARSHALLABLE;
629
0
                Py_DECREF(value);
630
0
                break;
631
0
            }
632
65
            PyObject *pair = PyTuple_Pack(2, dump, value);
633
65
            Py_DECREF(dump);
634
65
            Py_DECREF(value);
635
65
            if (pair == NULL) {
636
0
                p->error = WFERR_NOMEMORY;
637
0
                break;
638
0
            }
639
65
            PyList_SET_ITEM(pairs, i++, pair);
640
65
        }
641
11
        Py_END_CRITICAL_SECTION();
642
11
        if (p->error == WFERR_UNMARSHALLABLE || p->error == WFERR_NOMEMORY) {
643
0
            Py_DECREF(pairs);
644
0
            return;
645
0
        }
646
11
        assert(i == n);
647
11
        if (PyList_Sort(pairs)) {
648
0
            p->error = WFERR_NOMEMORY;
649
0
            Py_DECREF(pairs);
650
0
            return;
651
0
        }
652
76
        for (Py_ssize_t i = 0; i < n; i++) {
653
65
            PyObject *pair = PyList_GET_ITEM(pairs, i);
654
65
            value = PyTuple_GET_ITEM(pair, 1);
655
65
            w_object(value, p);
656
65
        }
657
11
        Py_DECREF(pairs);
658
11
    }
659
3.30k
    else if (PyCode_Check(v)) {
660
3.27k
        if (!p->allow_code) {
661
0
            p->error = WFERR_CODE_NOT_ALLOWED;
662
0
            return;
663
0
        }
664
3.27k
        PyCodeObject *co = (PyCodeObject *)v;
665
3.27k
        PyObject *co_code = _PyCode_GetCode(co);
666
3.27k
        if (co_code == NULL) {
667
0
            p->error = WFERR_NOMEMORY;
668
0
            return;
669
0
        }
670
3.27k
        W_TYPE(TYPE_CODE, p);
671
3.27k
        w_long(co->co_argcount, p);
672
3.27k
        w_long(co->co_posonlyargcount, p);
673
3.27k
        w_long(co->co_kwonlyargcount, p);
674
3.27k
        w_long(co->co_stacksize, p);
675
3.27k
        w_long(co->co_flags, p);
676
3.27k
        w_object(co_code, p);
677
3.27k
        w_object(co->co_consts, p);
678
3.27k
        w_object(co->co_names, p);
679
3.27k
        w_object(co->co_localsplusnames, p);
680
3.27k
        w_object(co->co_localspluskinds, p);
681
3.27k
        w_object(co->co_filename, p);
682
3.27k
        w_object(co->co_name, p);
683
3.27k
        w_object(co->co_qualname, p);
684
3.27k
        w_long(co->co_firstlineno, p);
685
3.27k
        w_object(co->co_linetable, p);
686
3.27k
        w_object(co->co_exceptiontable, p);
687
3.27k
        Py_DECREF(co_code);
688
3.27k
    }
689
25
    else if (PyObject_CheckBuffer(v)) {
690
        /* Write unknown bytes-like objects as a bytes object */
691
0
        Py_buffer view;
692
0
        if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
693
0
            w_byte(TYPE_UNKNOWN, p);
694
0
            p->depth--;
695
0
            p->error = WFERR_UNMARSHALLABLE;
696
0
            return;
697
0
        }
698
0
        W_TYPE(TYPE_STRING, p);
699
0
        w_pstring(view.buf, view.len, p);
700
0
        PyBuffer_Release(&view);
701
0
    }
702
25
    else if (PySlice_Check(v)) {
703
25
        if (p->version < 5) {
704
0
            w_byte(TYPE_UNKNOWN, p);
705
0
            p->error = WFERR_UNMARSHALLABLE;
706
0
            return;
707
0
        }
708
25
        PySliceObject *slice = (PySliceObject *)v;
709
25
        W_TYPE(TYPE_SLICE, p);
710
25
        w_object(slice->start, p);
711
25
        w_object(slice->stop, p);
712
25
        w_object(slice->step, p);
713
25
    }
714
0
    else {
715
0
        W_TYPE(TYPE_UNKNOWN, p);
716
0
        p->error = WFERR_UNMARSHALLABLE;
717
0
    }
718
38.9k
}
719
720
static void
721
w_decref_entry(void *key)
722
20.3k
{
723
20.3k
    PyObject *entry_key = (PyObject *)key;
724
20.3k
    Py_XDECREF(entry_key);
725
20.3k
}
726
727
static int
728
w_init_refs(WFILE *wf, int version)
729
229
{
730
229
    if (version >= 3) {
731
229
        wf->hashtable = _Py_hashtable_new_full(_Py_hashtable_hash_ptr,
732
229
                                               _Py_hashtable_compare_direct,
733
229
                                               w_decref_entry, NULL, NULL);
734
229
        if (wf->hashtable == NULL) {
735
0
            PyErr_NoMemory();
736
0
            return -1;
737
0
        }
738
229
    }
739
229
    return 0;
740
229
}
741
742
static void
743
w_clear_refs(WFILE *wf)
744
229
{
745
229
    if (wf->hashtable != NULL) {
746
229
        _Py_hashtable_destroy(wf->hashtable);
747
229
    }
748
229
}
749
750
/* version currently has no effect for writing ints. */
751
/* Note that while the documentation states that this function
752
 * can error, currently it never does. Setting an exception in
753
 * this function should be regarded as an API-breaking change.
754
 */
755
void
756
PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
757
0
{
758
0
    char buf[4];
759
0
    WFILE wf;
760
0
    memset(&wf, 0, sizeof(wf));
761
0
    wf.fp = fp;
762
0
    wf.ptr = wf.buf = buf;
763
0
    wf.end = wf.ptr + sizeof(buf);
764
0
    wf.error = WFERR_OK;
765
0
    wf.version = version;
766
0
    w_long(x, &wf);
767
0
    w_flush(&wf);
768
0
}
769
770
void
771
PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
772
0
{
773
0
    char buf[BUFSIZ];
774
0
    WFILE wf;
775
0
    if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
776
0
        return; /* caller must check PyErr_Occurred() */
777
0
    }
778
0
    memset(&wf, 0, sizeof(wf));
779
0
    wf.fp = fp;
780
0
    wf.ptr = wf.buf = buf;
781
0
    wf.end = wf.ptr + sizeof(buf);
782
0
    wf.error = WFERR_OK;
783
0
    wf.version = version;
784
0
    wf.allow_code = 1;
785
0
    if (w_init_refs(&wf, version)) {
786
0
        return; /* caller must check PyErr_Occurred() */
787
0
    }
788
0
    w_object(x, &wf);
789
0
    w_clear_refs(&wf);
790
0
    w_flush(&wf);
791
0
}
792
793
typedef struct {
794
    FILE *fp;
795
    int depth;
796
    PyObject *readable;  /* Stream-like object being read from */
797
    const char *ptr;
798
    const char *end;
799
    char *buf;
800
    Py_ssize_t buf_size;
801
    PyObject *refs;  /* a list */
802
    int allow_code;
803
} RFILE;
804
805
static const char *
806
r_string(Py_ssize_t n, RFILE *p)
807
5.98M
{
808
5.98M
    Py_ssize_t read = -1;
809
810
5.98M
    if (p->ptr != NULL) {
811
        /* Fast path for loads() */
812
5.98M
        const char *res = p->ptr;
813
5.98M
        Py_ssize_t left = p->end - p->ptr;
814
5.98M
        if (left < n) {
815
0
            PyErr_SetString(PyExc_EOFError,
816
0
                            "marshal data too short");
817
0
            return NULL;
818
0
        }
819
5.98M
        p->ptr += n;
820
5.98M
        return res;
821
5.98M
    }
822
0
    if (p->buf == NULL) {
823
0
        p->buf = PyMem_Malloc(n);
824
0
        if (p->buf == NULL) {
825
0
            PyErr_NoMemory();
826
0
            return NULL;
827
0
        }
828
0
        p->buf_size = n;
829
0
    }
830
0
    else if (p->buf_size < n) {
831
0
        char *tmp = PyMem_Realloc(p->buf, n);
832
0
        if (tmp == NULL) {
833
0
            PyErr_NoMemory();
834
0
            return NULL;
835
0
        }
836
0
        p->buf = tmp;
837
0
        p->buf_size = n;
838
0
    }
839
840
0
    if (!p->readable) {
841
0
        assert(p->fp != NULL);
842
0
        read = fread(p->buf, 1, n, p->fp);
843
0
    }
844
0
    else {
845
0
        PyObject *res, *mview;
846
0
        Py_buffer buf;
847
848
0
        if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
849
0
            return NULL;
850
0
        mview = PyMemoryView_FromBuffer(&buf);
851
0
        if (mview == NULL)
852
0
            return NULL;
853
854
0
        res = _PyObject_CallMethod(p->readable, &_Py_ID(readinto), "N", mview);
855
0
        if (res != NULL) {
856
0
            read = PyNumber_AsSsize_t(res, PyExc_ValueError);
857
0
            Py_DECREF(res);
858
0
        }
859
0
    }
860
0
    if (read != n) {
861
0
        if (!PyErr_Occurred()) {
862
0
            if (read > n)
863
0
                PyErr_Format(PyExc_ValueError,
864
0
                             "read() returned too much data: "
865
0
                             "%zd bytes requested, %zd returned",
866
0
                             n, read);
867
0
            else
868
0
                PyErr_SetString(PyExc_EOFError,
869
0
                                "EOF read where not expected");
870
0
        }
871
0
        return NULL;
872
0
    }
873
0
    return p->buf;
874
0
}
875
876
static int
877
r_byte(RFILE *p)
878
7.06M
{
879
7.06M
    if (p->ptr != NULL) {
880
7.06M
        if (p->ptr < p->end) {
881
7.06M
            return (unsigned char) *p->ptr++;
882
7.06M
        }
883
7.06M
    }
884
0
    else if (!p->readable) {
885
0
        assert(p->fp);
886
0
        int c = getc(p->fp);
887
0
        if (c != EOF) {
888
0
            return c;
889
0
        }
890
0
    }
891
0
    else {
892
0
        const char *ptr = r_string(1, p);
893
0
        if (ptr != NULL) {
894
0
            return *(const unsigned char *) ptr;
895
0
        }
896
0
        return EOF;
897
0
    }
898
0
    PyErr_SetString(PyExc_EOFError,
899
0
                    "EOF read where not expected");
900
0
    return EOF;
901
7.06M
}
902
903
static int
904
r_short(RFILE *p)
905
1.66k
{
906
1.66k
    short x = -1;
907
1.66k
    const unsigned char *buffer;
908
909
1.66k
    buffer = (const unsigned char *) r_string(2, p);
910
1.66k
    if (buffer != NULL) {
911
1.66k
        x = buffer[0];
912
1.66k
        x |= buffer[1] << 8;
913
        /* Sign-extension, in case short greater than 16 bits */
914
1.66k
        x |= -(x & 0x8000);
915
1.66k
    }
916
1.66k
    return x;
917
1.66k
}
918
919
static long
920
r_long(RFILE *p)
921
3.90M
{
922
3.90M
    long x = -1;
923
3.90M
    const unsigned char *buffer;
924
925
3.90M
    buffer = (const unsigned char *) r_string(4, p);
926
3.90M
    if (buffer != NULL) {
927
3.90M
        x = buffer[0];
928
3.90M
        x |= (long)buffer[1] << 8;
929
3.90M
        x |= (long)buffer[2] << 16;
930
3.90M
        x |= (long)buffer[3] << 24;
931
3.90M
#if SIZEOF_LONG > 4
932
        /* Sign extension for 64-bit machines */
933
3.90M
        x |= -(x & 0x80000000L);
934
3.90M
#endif
935
3.90M
    }
936
3.90M
    return x;
937
3.90M
}
938
939
/* r_long64 deals with the TYPE_INT64 code. */
940
static PyObject *
941
r_long64(RFILE *p)
942
0
{
943
0
    const unsigned char *buffer = (const unsigned char *) r_string(8, p);
944
0
    if (buffer == NULL) {
945
0
        return NULL;
946
0
    }
947
0
    return _PyLong_FromByteArray(buffer, 8,
948
0
                                 1 /* little endian */,
949
0
                                 1 /* signed */);
950
0
}
951
952
#define _w_digits(bitsize)                                              \
953
static int                                                              \
954
_w_digits##bitsize(uint ## bitsize ## _t *digits, Py_ssize_t size,      \
955
                   Py_ssize_t marshal_ratio,                            \
956
432
                   int shorts_in_top_digit, RFILE *p)                   \
957
432
{                                                                       \
958
432
    uint ## bitsize ## _t d;                                            \
959
432
                                                                        \
960
432
    assert(size >= 1);                                                  \
961
1.00k
    for (Py_ssize_t i = 0; i < size - 1; i++) {                         \
962
577
        d = 0;                                                          \
963
1.73k
        for (Py_ssize_t j = 0; j < marshal_ratio; j++) {                \
964
1.15k
            int md = r_short(p);                                        \
965
1.15k
            if (md < 0 || md > PyLong_MARSHAL_BASE) {                   \
966
0
                goto bad_digit;                                         \
967
0
            }                                                           \
968
1.15k
            d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT;   \
969
1.15k
        }                                                               \
970
577
        digits[i] = d;                                                  \
971
577
    }                                                                   \
972
432
                                                                        \
973
432
    d = 0;                                                              \
974
940
    for (Py_ssize_t j = 0; j < shorts_in_top_digit; j++) {              \
975
508
        int md = r_short(p);                                            \
976
508
        if (md < 0 || md > PyLong_MARSHAL_BASE) {                       \
977
0
            goto bad_digit;                                             \
978
0
        }                                                               \
979
508
        /* topmost marshal digit should be nonzero */                   \
980
508
        if (md == 0 && j == shorts_in_top_digit - 1) {                  \
981
0
            PyErr_SetString(PyExc_ValueError,                           \
982
0
                "bad marshal data (unnormalized long data)");           \
983
0
            return -1;                                                  \
984
0
        }                                                               \
985
508
        d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT;       \
986
508
    }                                                                   \
987
432
    assert(!PyErr_Occurred());                                          \
988
432
    /* top digit should be nonzero, else the resulting PyLong won't be  \
989
432
       normalized */                                                    \
990
432
    digits[size - 1] = d;                                               \
991
432
    return 0;                                                           \
992
432
                                                                        \
993
0
bad_digit:                                                              \
994
0
    if (!PyErr_Occurred()) {                                            \
995
0
        PyErr_SetString(PyExc_ValueError,                               \
996
0
            "bad marshal data (digit out of range in long)");           \
997
0
    }                                                                   \
998
0
    return -1;                                                          \
999
432
}
1000
432
_w_digits(32)
1001
0
_w_digits(16)
1002
#undef _w_digits
1003
1004
static PyObject *
1005
r_PyLong(RFILE *p)
1006
432
{
1007
432
    long n = r_long(p);
1008
432
    if (n == -1 && PyErr_Occurred()) {
1009
0
        return NULL;
1010
0
    }
1011
432
    if (n < -SIZE32_MAX || n > SIZE32_MAX) {
1012
0
        PyErr_SetString(PyExc_ValueError,
1013
0
                       "bad marshal data (long size out of range)");
1014
0
        return NULL;
1015
0
    }
1016
1017
432
    const PyLongLayout *layout = PyLong_GetNativeLayout();
1018
432
    Py_ssize_t marshal_ratio = layout->bits_per_digit/PyLong_MARSHAL_SHIFT;
1019
1020
    /* must be a multiple of PyLong_MARSHAL_SHIFT */
1021
432
    assert(layout->bits_per_digit % PyLong_MARSHAL_SHIFT == 0);
1022
432
    assert(layout->bits_per_digit >= PyLong_MARSHAL_SHIFT);
1023
1024
    /* other assumptions on PyLongObject internals */
1025
432
    assert(layout->bits_per_digit <= 32);
1026
432
    assert(layout->digits_order == -1);
1027
432
    assert(layout->digit_endianness == (PY_LITTLE_ENDIAN ? -1 : 1));
1028
432
    assert(layout->digit_size == 2 || layout->digit_size == 4);
1029
1030
432
    Py_ssize_t size = 1 + (Py_ABS(n) - 1) / marshal_ratio;
1031
1032
432
    assert(size >= 1);
1033
1034
432
    int shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % marshal_ratio;
1035
432
    void *digits;
1036
432
    PyLongWriter *writer = PyLongWriter_Create(n < 0, size, &digits);
1037
1038
432
    if (writer == NULL) {
1039
0
        return NULL;
1040
0
    }
1041
1042
432
    int ret;
1043
1044
432
    if (layout->digit_size == 4) {
1045
432
        ret = _w_digits32(digits, size, marshal_ratio, shorts_in_top_digit, p);
1046
432
    }
1047
0
    else {
1048
0
        ret = _w_digits16(digits, size, marshal_ratio, shorts_in_top_digit, p);
1049
0
    }
1050
432
    if (ret < 0) {
1051
0
        PyLongWriter_Discard(writer);
1052
0
        return NULL;
1053
0
    }
1054
432
    return PyLongWriter_Finish(writer);
1055
432
}
1056
1057
static double
1058
r_float_bin(RFILE *p)
1059
701
{
1060
701
    const char *buf = r_string(8, p);
1061
701
    if (buf == NULL)
1062
0
        return -1;
1063
701
    return PyFloat_Unpack8(buf, 1);
1064
701
}
1065
1066
/* Issue #33720: Disable inlining for reducing the C stack consumption
1067
   on PGO builds. */
1068
Py_NO_INLINE static double
1069
r_float_str(RFILE *p)
1070
0
{
1071
0
    int n;
1072
0
    char buf[256];
1073
0
    const char *ptr;
1074
0
    n = r_byte(p);
1075
0
    if (n == EOF) {
1076
0
        return -1;
1077
0
    }
1078
0
    ptr = r_string(n, p);
1079
0
    if (ptr == NULL) {
1080
0
        return -1;
1081
0
    }
1082
0
    memcpy(buf, ptr, n);
1083
0
    buf[n] = '\0';
1084
0
    return PyOS_string_to_double(buf, NULL, NULL);
1085
0
}
1086
1087
/* allocate the reflist index for a new object. Return -1 on failure */
1088
static Py_ssize_t
1089
r_ref_reserve(int flag, RFILE *p)
1090
195k
{
1091
195k
    if (flag) { /* currently only FLAG_REF is defined */
1092
7.71k
        Py_ssize_t idx = PyList_GET_SIZE(p->refs);
1093
7.71k
        if (idx >= 0x7ffffffe) {
1094
0
            PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
1095
0
            return -1;
1096
0
        }
1097
7.71k
        if (PyList_Append(p->refs, Py_None) < 0)
1098
0
            return -1;
1099
7.71k
        return idx;
1100
7.71k
    } else
1101
187k
        return 0;
1102
195k
}
1103
1104
/* insert the new object 'o' to the reflist at previously
1105
 * allocated index 'idx'.
1106
 * 'o' can be NULL, in which case nothing is done.
1107
 * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
1108
 * if 'o' was non-NULL, and the function fails, 'o' is released and
1109
 * NULL returned. This simplifies error checking at the call site since
1110
 * a single test for NULL for the function result is enough.
1111
 */
1112
static PyObject *
1113
r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
1114
195k
{
1115
195k
    if (o != NULL && flag) { /* currently only FLAG_REF is defined */
1116
7.71k
        PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
1117
7.71k
        PyList_SET_ITEM(p->refs, idx, Py_NewRef(o));
1118
7.71k
        Py_DECREF(tmp);
1119
7.71k
    }
1120
195k
    return o;
1121
195k
}
1122
1123
/* combination of both above, used when an object can be
1124
 * created whenever it is seen in the file, as opposed to
1125
 * after having loaded its sub-objects.
1126
 */
1127
static PyObject *
1128
r_ref(PyObject *o, int flag, RFILE *p)
1129
1.59M
{
1130
1.59M
    assert(flag & FLAG_REF);
1131
1.59M
    if (o == NULL)
1132
0
        return NULL;
1133
1.59M
    if (PyList_Append(p->refs, o) < 0) {
1134
0
        Py_DECREF(o); /* release the new object */
1135
0
        return NULL;
1136
0
    }
1137
1.59M
    return o;
1138
1.59M
}
1139
1140
static PyObject *
1141
r_object(RFILE *p)
1142
5.07M
{
1143
    /* NULL is a valid return value, it does not necessarily means that
1144
       an exception is set. */
1145
5.07M
    PyObject *v, *v2;
1146
5.07M
    Py_ssize_t idx = 0;
1147
5.07M
    long i, n;
1148
5.07M
    int type, code = r_byte(p);
1149
5.07M
    int flag, is_interned = 0;
1150
5.07M
    PyObject *retval = NULL;
1151
1152
5.07M
    if (code == EOF) {
1153
0
        if (PyErr_ExceptionMatches(PyExc_EOFError)) {
1154
0
            PyErr_SetString(PyExc_EOFError,
1155
0
                            "EOF read where object expected");
1156
0
        }
1157
0
        return NULL;
1158
0
    }
1159
1160
5.07M
    p->depth++;
1161
1162
5.07M
    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
1163
0
        p->depth--;
1164
0
        PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
1165
0
        return NULL;
1166
0
    }
1167
1168
5.07M
    flag = code & FLAG_REF;
1169
5.07M
    type = code & ~FLAG_REF;
1170
1171
5.07M
#define R_REF(O) do{\
1172
2.63M
    if (flag) \
1173
2.63M
        O = r_ref(O, flag, p);\
1174
2.63M
} while (0)
1175
1176
5.07M
    switch (type) {
1177
1178
0
    case TYPE_NULL:
1179
0
        break;
1180
1181
108k
    case TYPE_NONE:
1182
108k
        retval = Py_None;
1183
108k
        break;
1184
1185
0
    case TYPE_STOPITER:
1186
0
        retval = Py_NewRef(PyExc_StopIteration);
1187
0
        break;
1188
1189
399
    case TYPE_ELLIPSIS:
1190
399
        retval = Py_Ellipsis;
1191
399
        break;
1192
1193
13.6k
    case TYPE_FALSE:
1194
13.6k
        retval = Py_False;
1195
13.6k
        break;
1196
1197
12.8k
    case TYPE_TRUE:
1198
12.8k
        retval = Py_True;
1199
12.8k
        break;
1200
1201
25.2k
    case TYPE_INT:
1202
25.2k
        n = r_long(p);
1203
25.2k
        if (n == -1 && PyErr_Occurred()) {
1204
0
            break;
1205
0
        }
1206
25.2k
        retval = PyLong_FromLong(n);
1207
25.2k
        R_REF(retval);
1208
25.2k
        break;
1209
1210
0
    case TYPE_INT64:
1211
0
        retval = r_long64(p);
1212
0
        R_REF(retval);
1213
0
        break;
1214
1215
432
    case TYPE_LONG:
1216
432
        retval = r_PyLong(p);
1217
432
        R_REF(retval);
1218
432
        break;
1219
1220
0
    case TYPE_FLOAT:
1221
0
        {
1222
0
            double x = r_float_str(p);
1223
0
            if (x == -1.0 && PyErr_Occurred())
1224
0
                break;
1225
0
            retval = PyFloat_FromDouble(x);
1226
0
            R_REF(retval);
1227
0
            break;
1228
0
        }
1229
1230
695
    case TYPE_BINARY_FLOAT:
1231
695
        {
1232
695
            double x = r_float_bin(p);
1233
695
            if (x == -1.0 && PyErr_Occurred())
1234
0
                break;
1235
695
            retval = PyFloat_FromDouble(x);
1236
695
            R_REF(retval);
1237
695
            break;
1238
695
        }
1239
1240
0
    case TYPE_COMPLEX:
1241
0
        {
1242
0
            Py_complex c;
1243
0
            c.real = r_float_str(p);
1244
0
            if (c.real == -1.0 && PyErr_Occurred())
1245
0
                break;
1246
0
            c.imag = r_float_str(p);
1247
0
            if (c.imag == -1.0 && PyErr_Occurred())
1248
0
                break;
1249
0
            retval = PyComplex_FromCComplex(c);
1250
0
            R_REF(retval);
1251
0
            break;
1252
0
        }
1253
1254
3
    case TYPE_BINARY_COMPLEX:
1255
3
        {
1256
3
            Py_complex c;
1257
3
            c.real = r_float_bin(p);
1258
3
            if (c.real == -1.0 && PyErr_Occurred())
1259
0
                break;
1260
3
            c.imag = r_float_bin(p);
1261
3
            if (c.imag == -1.0 && PyErr_Occurred())
1262
0
                break;
1263
3
            retval = PyComplex_FromCComplex(c);
1264
3
            R_REF(retval);
1265
3
            break;
1266
3
        }
1267
1268
577k
    case TYPE_STRING:
1269
577k
        {
1270
577k
            const char *ptr;
1271
577k
            n = r_long(p);
1272
577k
            if (n < 0 || n > SIZE32_MAX) {
1273
0
                if (!PyErr_Occurred()) {
1274
0
                    PyErr_SetString(PyExc_ValueError,
1275
0
                        "bad marshal data (bytes object size out of range)");
1276
0
                }
1277
0
                break;
1278
0
            }
1279
577k
            v = PyBytes_FromStringAndSize((char *)NULL, n);
1280
577k
            if (v == NULL)
1281
0
                break;
1282
577k
            ptr = r_string(n, p);
1283
577k
            if (ptr == NULL) {
1284
0
                Py_DECREF(v);
1285
0
                break;
1286
0
            }
1287
577k
            memcpy(PyBytes_AS_STRING(v), ptr, n);
1288
577k
            retval = v;
1289
577k
            R_REF(retval);
1290
577k
            break;
1291
577k
        }
1292
1293
0
    case TYPE_ASCII_INTERNED:
1294
0
        is_interned = 1;
1295
0
        _Py_FALLTHROUGH;
1296
30.9k
    case TYPE_ASCII:
1297
30.9k
        n = r_long(p);
1298
30.9k
        if (n < 0 || n > SIZE32_MAX) {
1299
0
            if (!PyErr_Occurred()) {
1300
0
                PyErr_SetString(PyExc_ValueError,
1301
0
                    "bad marshal data (string size out of range)");
1302
0
            }
1303
0
            break;
1304
0
        }
1305
30.9k
        goto _read_ascii;
1306
1307
1.28M
    case TYPE_SHORT_ASCII_INTERNED:
1308
1.28M
        is_interned = 1;
1309
1.28M
        _Py_FALLTHROUGH;
1310
1.46M
    case TYPE_SHORT_ASCII:
1311
1.46M
        n = r_byte(p);
1312
1.46M
        if (n == EOF) {
1313
0
            break;
1314
0
        }
1315
1.50M
    _read_ascii:
1316
1.50M
        {
1317
1.50M
            const char *ptr;
1318
1.50M
            ptr = r_string(n, p);
1319
1.50M
            if (ptr == NULL)
1320
0
                break;
1321
1.50M
            v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1322
1.50M
            if (v == NULL)
1323
0
                break;
1324
1.50M
            if (is_interned) {
1325
                // marshal is meant to serialize .pyc files with code
1326
                // objects, and code-related strings are currently immortal.
1327
1.28M
                PyInterpreterState *interp = _PyInterpreterState_GET();
1328
1.28M
                _PyUnicode_InternImmortal(interp, &v);
1329
1.28M
            }
1330
1.50M
            retval = v;
1331
1.50M
            R_REF(retval);
1332
1.50M
            break;
1333
1.50M
        }
1334
1335
202
    case TYPE_INTERNED:
1336
202
        is_interned = 1;
1337
202
        _Py_FALLTHROUGH;
1338
3.54k
    case TYPE_UNICODE:
1339
3.54k
        {
1340
3.54k
        const char *buffer;
1341
1342
3.54k
        n = r_long(p);
1343
3.54k
        if (n < 0 || n > SIZE32_MAX) {
1344
0
            if (!PyErr_Occurred()) {
1345
0
                PyErr_SetString(PyExc_ValueError,
1346
0
                    "bad marshal data (string size out of range)");
1347
0
            }
1348
0
            break;
1349
0
        }
1350
3.54k
        if (n != 0) {
1351
3.54k
            buffer = r_string(n, p);
1352
3.54k
            if (buffer == NULL)
1353
0
                break;
1354
3.54k
            v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1355
3.54k
        }
1356
0
        else {
1357
0
            v = Py_GetConstant(Py_CONSTANT_EMPTY_STR);
1358
0
        }
1359
3.54k
        if (v == NULL)
1360
0
            break;
1361
3.54k
        if (is_interned) {
1362
            // marshal is meant to serialize .pyc files with code
1363
            // objects, and code-related strings are currently immortal.
1364
202
            PyInterpreterState *interp = _PyInterpreterState_GET();
1365
202
            _PyUnicode_InternImmortal(interp, &v);
1366
202
        }
1367
3.54k
        retval = v;
1368
3.54k
        R_REF(retval);
1369
3.54k
        break;
1370
3.54k
        }
1371
1372
524k
    case TYPE_SMALL_TUPLE:
1373
524k
        n = r_byte(p);
1374
524k
        if (n == EOF) {
1375
0
            break;
1376
0
        }
1377
524k
        goto _read_tuple;
1378
524k
    case TYPE_TUPLE:
1379
66
        n = r_long(p);
1380
66
        if (n < 0 || n > SIZE32_MAX) {
1381
0
            if (!PyErr_Occurred()) {
1382
0
                PyErr_SetString(PyExc_ValueError,
1383
0
                    "bad marshal data (tuple size out of range)");
1384
0
            }
1385
0
            break;
1386
0
        }
1387
524k
    _read_tuple:
1388
524k
        v = PyTuple_New(n);
1389
524k
        R_REF(v);
1390
524k
        if (v == NULL)
1391
0
            break;
1392
1393
3.66M
        for (i = 0; i < n; i++) {
1394
3.13M
            v2 = r_object(p);
1395
3.13M
            if ( v2 == NULL ) {
1396
0
                if (!PyErr_Occurred())
1397
0
                    PyErr_SetString(PyExc_TypeError,
1398
0
                        "NULL object in marshal data for tuple");
1399
0
                Py_SETREF(v, NULL);
1400
0
                break;
1401
0
            }
1402
3.13M
            PyTuple_SET_ITEM(v, i, v2);
1403
3.13M
        }
1404
524k
        retval = v;
1405
524k
        break;
1406
1407
0
    case TYPE_LIST:
1408
0
        n = r_long(p);
1409
0
        if (n < 0 || n > SIZE32_MAX) {
1410
0
            if (!PyErr_Occurred()) {
1411
0
                PyErr_SetString(PyExc_ValueError,
1412
0
                    "bad marshal data (list size out of range)");
1413
0
            }
1414
0
            break;
1415
0
        }
1416
0
        v = PyList_New(n);
1417
0
        R_REF(v);
1418
0
        if (v == NULL)
1419
0
            break;
1420
0
        for (i = 0; i < n; i++) {
1421
0
            v2 = r_object(p);
1422
0
            if ( v2 == NULL ) {
1423
0
                if (!PyErr_Occurred())
1424
0
                    PyErr_SetString(PyExc_TypeError,
1425
0
                        "NULL object in marshal data for list");
1426
0
                Py_SETREF(v, NULL);
1427
0
                break;
1428
0
            }
1429
0
            PyList_SET_ITEM(v, i, v2);
1430
0
        }
1431
0
        retval = v;
1432
0
        break;
1433
1434
0
    case TYPE_DICT:
1435
0
    case TYPE_FROZENDICT:
1436
0
        v = PyDict_New();
1437
0
        R_REF(v);
1438
0
        if (v == NULL)
1439
0
            break;
1440
0
        for (;;) {
1441
0
            PyObject *key, *val;
1442
0
            key = r_object(p);
1443
0
            if (key == NULL)
1444
0
                break;
1445
0
            val = r_object(p);
1446
0
            if (val == NULL) {
1447
0
                Py_DECREF(key);
1448
0
                break;
1449
0
            }
1450
0
            if (PyDict_SetItem(v, key, val) < 0) {
1451
0
                Py_DECREF(key);
1452
0
                Py_DECREF(val);
1453
0
                break;
1454
0
            }
1455
0
            Py_DECREF(key);
1456
0
            Py_DECREF(val);
1457
0
        }
1458
0
        if (PyErr_Occurred()) {
1459
0
            Py_CLEAR(v);
1460
0
        }
1461
0
        if (type == TYPE_FROZENDICT && v != NULL) {
1462
0
            PyObject *frozendict = PyFrozenDict_New(v);
1463
0
            if (frozendict != NULL) {
1464
0
                Py_SETREF(v, frozendict);
1465
0
            }
1466
0
            else {
1467
0
                Py_CLEAR(v);
1468
0
            }
1469
0
        }
1470
0
        retval = v;
1471
0
        break;
1472
1473
0
    case TYPE_SET:
1474
534
    case TYPE_FROZENSET:
1475
534
        n = r_long(p);
1476
534
        if (n < 0 || n > SIZE32_MAX) {
1477
0
            if (!PyErr_Occurred()) {
1478
0
                PyErr_SetString(PyExc_ValueError,
1479
0
                    "bad marshal data (set size out of range)");
1480
0
            }
1481
0
            break;
1482
0
        }
1483
1484
534
        if (n == 0 && type == TYPE_FROZENSET) {
1485
            /* call frozenset() to get the empty frozenset singleton */
1486
0
            v = _PyObject_CallNoArgs((PyObject*)&PyFrozenSet_Type);
1487
0
            if (v == NULL)
1488
0
                break;
1489
0
            R_REF(v);
1490
0
            retval = v;
1491
0
        }
1492
534
        else {
1493
534
            v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1494
534
            if (type == TYPE_SET) {
1495
0
                R_REF(v);
1496
534
            } else {
1497
                /* must use delayed registration of frozensets because they must
1498
                 * be init with a refcount of 1
1499
                 */
1500
534
                idx = r_ref_reserve(flag, p);
1501
534
                if (idx < 0)
1502
0
                    Py_CLEAR(v); /* signal error */
1503
534
            }
1504
534
            if (v == NULL)
1505
0
                break;
1506
1507
2.76k
            for (i = 0; i < n; i++) {
1508
2.23k
                v2 = r_object(p);
1509
2.23k
                if ( v2 == NULL ) {
1510
0
                    if (!PyErr_Occurred())
1511
0
                        PyErr_SetString(PyExc_TypeError,
1512
0
                            "NULL object in marshal data for set");
1513
0
                    Py_SETREF(v, NULL);
1514
0
                    break;
1515
0
                }
1516
2.23k
                if (PySet_Add(v, v2) == -1) {
1517
0
                    Py_DECREF(v);
1518
0
                    Py_DECREF(v2);
1519
0
                    v = NULL;
1520
0
                    break;
1521
0
                }
1522
2.23k
                Py_DECREF(v2);
1523
2.23k
            }
1524
534
            if (type != TYPE_SET)
1525
534
                v = r_ref_insert(v, idx, flag, p);
1526
534
            retval = v;
1527
534
        }
1528
534
        break;
1529
1530
191k
    case TYPE_CODE:
1531
191k
        {
1532
191k
            int argcount;
1533
191k
            int posonlyargcount;
1534
191k
            int kwonlyargcount;
1535
191k
            int stacksize;
1536
191k
            int flags;
1537
191k
            PyObject *code = NULL;
1538
191k
            PyObject *consts = NULL;
1539
191k
            PyObject *names = NULL;
1540
191k
            PyObject *localsplusnames = NULL;
1541
191k
            PyObject *localspluskinds = NULL;
1542
191k
            PyObject *filename = NULL;
1543
191k
            PyObject *name = NULL;
1544
191k
            PyObject *qualname = NULL;
1545
191k
            int firstlineno;
1546
191k
            PyObject* linetable = NULL;
1547
191k
            PyObject *exceptiontable = NULL;
1548
1549
191k
            if (!p->allow_code) {
1550
0
                PyErr_SetString(PyExc_ValueError,
1551
0
                                "unmarshalling code objects is disallowed");
1552
0
                break;
1553
0
            }
1554
191k
            idx = r_ref_reserve(flag, p);
1555
191k
            if (idx < 0)
1556
0
                break;
1557
1558
191k
            v = NULL;
1559
1560
            /* XXX ignore long->int overflows for now */
1561
191k
            argcount = (int)r_long(p);
1562
191k
            if (argcount == -1 && PyErr_Occurred())
1563
0
                goto code_error;
1564
191k
            posonlyargcount = (int)r_long(p);
1565
191k
            if (posonlyargcount == -1 && PyErr_Occurred()) {
1566
0
                goto code_error;
1567
0
            }
1568
191k
            kwonlyargcount = (int)r_long(p);
1569
191k
            if (kwonlyargcount == -1 && PyErr_Occurred())
1570
0
                goto code_error;
1571
191k
            stacksize = (int)r_long(p);
1572
191k
            if (stacksize == -1 && PyErr_Occurred())
1573
0
                goto code_error;
1574
191k
            flags = (int)r_long(p);
1575
191k
            if (flags == -1 && PyErr_Occurred())
1576
0
                goto code_error;
1577
191k
            code = r_object(p);
1578
191k
            if (code == NULL)
1579
0
                goto code_error;
1580
191k
            consts = r_object(p);
1581
191k
            if (consts == NULL)
1582
0
                goto code_error;
1583
191k
            names = r_object(p);
1584
191k
            if (names == NULL)
1585
0
                goto code_error;
1586
191k
            localsplusnames = r_object(p);
1587
191k
            if (localsplusnames == NULL)
1588
0
                goto code_error;
1589
191k
            localspluskinds = r_object(p);
1590
191k
            if (localspluskinds == NULL)
1591
0
                goto code_error;
1592
191k
            filename = r_object(p);
1593
191k
            if (filename == NULL)
1594
0
                goto code_error;
1595
191k
            name = r_object(p);
1596
191k
            if (name == NULL)
1597
0
                goto code_error;
1598
191k
            qualname = r_object(p);
1599
191k
            if (qualname == NULL)
1600
0
                goto code_error;
1601
191k
            firstlineno = (int)r_long(p);
1602
191k
            if (firstlineno == -1 && PyErr_Occurred())
1603
0
                break;
1604
191k
            linetable = r_object(p);
1605
191k
            if (linetable == NULL)
1606
0
                goto code_error;
1607
191k
            exceptiontable = r_object(p);
1608
191k
            if (exceptiontable == NULL)
1609
0
                goto code_error;
1610
1611
191k
            struct _PyCodeConstructor con = {
1612
191k
                .filename = filename,
1613
191k
                .name = name,
1614
191k
                .qualname = qualname,
1615
191k
                .flags = flags,
1616
1617
191k
                .code = code,
1618
191k
                .firstlineno = firstlineno,
1619
191k
                .linetable = linetable,
1620
1621
191k
                .consts = consts,
1622
191k
                .names = names,
1623
1624
191k
                .localsplusnames = localsplusnames,
1625
191k
                .localspluskinds = localspluskinds,
1626
1627
191k
                .argcount = argcount,
1628
191k
                .posonlyargcount = posonlyargcount,
1629
191k
                .kwonlyargcount = kwonlyargcount,
1630
1631
191k
                .stacksize = stacksize,
1632
1633
191k
                .exceptiontable = exceptiontable,
1634
191k
            };
1635
1636
191k
            if (_PyCode_Validate(&con) < 0) {
1637
0
                goto code_error;
1638
0
            }
1639
1640
191k
            v = (PyObject *)_PyCode_New(&con);
1641
191k
            if (v == NULL) {
1642
0
                goto code_error;
1643
0
            }
1644
1645
191k
            v = r_ref_insert(v, idx, flag, p);
1646
1647
191k
          code_error:
1648
191k
            if (v == NULL && !PyErr_Occurred()) {
1649
0
                PyErr_SetString(PyExc_TypeError,
1650
0
                    "NULL object in marshal data for code object");
1651
0
            }
1652
191k
            Py_XDECREF(code);
1653
191k
            Py_XDECREF(consts);
1654
191k
            Py_XDECREF(names);
1655
191k
            Py_XDECREF(localsplusnames);
1656
191k
            Py_XDECREF(localspluskinds);
1657
191k
            Py_XDECREF(filename);
1658
191k
            Py_XDECREF(name);
1659
191k
            Py_XDECREF(qualname);
1660
191k
            Py_XDECREF(linetable);
1661
191k
            Py_XDECREF(exceptiontable);
1662
191k
        }
1663
0
        retval = v;
1664
191k
        break;
1665
1666
2.11M
    case TYPE_REF:
1667
2.11M
        n = r_long(p);
1668
2.11M
        if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1669
0
            if (!PyErr_Occurred()) {
1670
0
                PyErr_SetString(PyExc_ValueError,
1671
0
                    "bad marshal data (invalid reference)");
1672
0
            }
1673
0
            break;
1674
0
        }
1675
2.11M
        v = PyList_GET_ITEM(p->refs, n);
1676
2.11M
        if (v == Py_None) {
1677
0
            PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1678
0
            break;
1679
0
        }
1680
2.11M
        retval = Py_NewRef(v);
1681
2.11M
        break;
1682
1683
3.30k
    case TYPE_SLICE:
1684
3.30k
    {
1685
3.30k
        Py_ssize_t idx = r_ref_reserve(flag, p);
1686
3.30k
        if (idx < 0) {
1687
0
            break;
1688
0
        }
1689
3.30k
        PyObject *stop = NULL;
1690
3.30k
        PyObject *step = NULL;
1691
3.30k
        PyObject *start = r_object(p);
1692
3.30k
        if (start == NULL) {
1693
0
            goto cleanup;
1694
0
        }
1695
3.30k
        stop = r_object(p);
1696
3.30k
        if (stop == NULL) {
1697
0
            goto cleanup;
1698
0
        }
1699
3.30k
        step = r_object(p);
1700
3.30k
        if (step == NULL) {
1701
0
            goto cleanup;
1702
0
        }
1703
3.30k
        retval = PySlice_New(start, stop, step);
1704
3.30k
        r_ref_insert(retval, idx, flag, p);
1705
3.30k
    cleanup:
1706
3.30k
        Py_XDECREF(start);
1707
3.30k
        Py_XDECREF(stop);
1708
3.30k
        Py_XDECREF(step);
1709
3.30k
        break;
1710
3.30k
    }
1711
1712
0
    default:
1713
        /* Bogus data got written, which isn't ideal.
1714
           This will let you keep working and recover. */
1715
0
        PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1716
0
        break;
1717
1718
5.07M
    }
1719
5.07M
    p->depth--;
1720
5.07M
    return retval;
1721
5.07M
}
1722
1723
static PyObject *
1724
read_object(RFILE *p)
1725
7.81k
{
1726
7.81k
    PyObject *v;
1727
7.81k
    if (PyErr_Occurred()) {
1728
0
        fprintf(stderr, "XXX readobject called with exception set\n");
1729
0
        return NULL;
1730
0
    }
1731
7.81k
    if (p->ptr && p->end) {
1732
7.81k
        if (PySys_Audit("marshal.loads", "y#", p->ptr, (Py_ssize_t)(p->end - p->ptr)) < 0) {
1733
0
            return NULL;
1734
0
        }
1735
7.81k
    } else if (p->fp || p->readable) {
1736
0
        if (PySys_Audit("marshal.load", NULL) < 0) {
1737
0
            return NULL;
1738
0
        }
1739
0
    }
1740
7.81k
    v = r_object(p);
1741
7.81k
    if (v == NULL && !PyErr_Occurred())
1742
0
        PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1743
7.81k
    return v;
1744
7.81k
}
1745
1746
int
1747
PyMarshal_ReadShortFromFile(FILE *fp)
1748
0
{
1749
0
    RFILE rf;
1750
0
    int res;
1751
0
    assert(fp);
1752
0
    rf.readable = NULL;
1753
0
    rf.fp = fp;
1754
0
    rf.end = rf.ptr = NULL;
1755
0
    rf.buf = NULL;
1756
0
    res = r_short(&rf);
1757
0
    if (rf.buf != NULL)
1758
0
        PyMem_Free(rf.buf);
1759
0
    return res;
1760
0
}
1761
1762
long
1763
PyMarshal_ReadLongFromFile(FILE *fp)
1764
0
{
1765
0
    RFILE rf;
1766
0
    long res;
1767
0
    rf.fp = fp;
1768
0
    rf.readable = NULL;
1769
0
    rf.ptr = rf.end = NULL;
1770
0
    rf.buf = NULL;
1771
0
    res = r_long(&rf);
1772
0
    if (rf.buf != NULL)
1773
0
        PyMem_Free(rf.buf);
1774
0
    return res;
1775
0
}
1776
1777
/* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1778
static off_t
1779
getfilesize(FILE *fp)
1780
0
{
1781
0
    struct _Py_stat_struct st;
1782
0
    if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1783
0
        return -1;
1784
#if SIZEOF_OFF_T == 4
1785
    else if (st.st_size >= INT_MAX)
1786
        return (off_t)INT_MAX;
1787
#endif
1788
0
    else
1789
0
        return (off_t)st.st_size;
1790
0
}
1791
1792
/* If we can get the size of the file up-front, and it's reasonably small,
1793
 * read it in one gulp and delegate to ...FromString() instead.  Much quicker
1794
 * than reading a byte at a time from file; speeds .pyc imports.
1795
 * CAUTION:  since this may read the entire remainder of the file, don't
1796
 * call it unless you know you're done with the file.
1797
 */
1798
PyObject *
1799
PyMarshal_ReadLastObjectFromFile(FILE *fp)
1800
0
{
1801
/* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1802
0
#define REASONABLE_FILE_LIMIT (1L << 18)
1803
0
    off_t filesize;
1804
0
    filesize = getfilesize(fp);
1805
0
    if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1806
0
        char* pBuf = (char *)PyMem_Malloc(filesize);
1807
0
        if (pBuf != NULL) {
1808
0
            size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1809
0
            PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1810
0
            PyMem_Free(pBuf);
1811
0
            return v;
1812
0
        }
1813
1814
0
    }
1815
    /* We don't have fstat, or we do but the file is larger than
1816
     * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1817
     */
1818
0
    return PyMarshal_ReadObjectFromFile(fp);
1819
1820
0
#undef REASONABLE_FILE_LIMIT
1821
0
}
1822
1823
PyObject *
1824
PyMarshal_ReadObjectFromFile(FILE *fp)
1825
0
{
1826
0
    RFILE rf;
1827
0
    PyObject *result;
1828
0
    rf.allow_code = 1;
1829
0
    rf.fp = fp;
1830
0
    rf.readable = NULL;
1831
0
    rf.depth = 0;
1832
0
    rf.ptr = rf.end = NULL;
1833
0
    rf.buf = NULL;
1834
0
    rf.refs = PyList_New(0);
1835
0
    if (rf.refs == NULL)
1836
0
        return NULL;
1837
0
    result = read_object(&rf);
1838
0
    Py_DECREF(rf.refs);
1839
0
    if (rf.buf != NULL)
1840
0
        PyMem_Free(rf.buf);
1841
0
    return result;
1842
0
}
1843
1844
PyObject *
1845
PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1846
489
{
1847
489
    RFILE rf;
1848
489
    PyObject *result;
1849
489
    rf.allow_code = 1;
1850
489
    rf.fp = NULL;
1851
489
    rf.readable = NULL;
1852
489
    rf.ptr = str;
1853
489
    rf.end = str + len;
1854
489
    rf.buf = NULL;
1855
489
    rf.depth = 0;
1856
489
    rf.refs = PyList_New(0);
1857
489
    if (rf.refs == NULL)
1858
0
        return NULL;
1859
489
    result = read_object(&rf);
1860
489
    Py_DECREF(rf.refs);
1861
489
    if (rf.buf != NULL)
1862
0
        PyMem_Free(rf.buf);
1863
489
    return result;
1864
489
}
1865
1866
static PyObject *
1867
_PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code)
1868
229
{
1869
229
    WFILE wf;
1870
1871
229
    if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
1872
0
        return NULL;
1873
0
    }
1874
229
    memset(&wf, 0, sizeof(wf));
1875
229
    wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1876
229
    if (wf.str == NULL)
1877
0
        return NULL;
1878
229
    wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str);
1879
229
    wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str);
1880
229
    wf.error = WFERR_OK;
1881
229
    wf.version = version;
1882
229
    wf.allow_code = allow_code;
1883
229
    if (w_init_refs(&wf, version)) {
1884
0
        Py_DECREF(wf.str);
1885
0
        return NULL;
1886
0
    }
1887
229
    w_object(x, &wf);
1888
229
    w_clear_refs(&wf);
1889
229
    if (wf.str != NULL) {
1890
229
        const char *base = PyBytes_AS_STRING(wf.str);
1891
229
        if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1892
0
            return NULL;
1893
229
    }
1894
229
    if (wf.error != WFERR_OK) {
1895
0
        Py_XDECREF(wf.str);
1896
0
        switch (wf.error) {
1897
0
        case WFERR_NOMEMORY:
1898
0
            PyErr_NoMemory();
1899
0
            break;
1900
0
        case WFERR_NESTEDTOODEEP:
1901
0
            PyErr_SetString(PyExc_ValueError,
1902
0
                            "object too deeply nested to marshal");
1903
0
            break;
1904
0
        case WFERR_CODE_NOT_ALLOWED:
1905
0
            PyErr_SetString(PyExc_ValueError,
1906
0
                            "marshalling code objects is disallowed");
1907
0
            break;
1908
0
        default:
1909
0
        case WFERR_UNMARSHALLABLE:
1910
0
            PyErr_SetString(PyExc_ValueError,
1911
0
                            "unmarshallable object");
1912
0
            break;
1913
0
        }
1914
0
        return NULL;
1915
0
    }
1916
229
    return wf.str;
1917
229
}
1918
1919
PyObject *
1920
PyMarshal_WriteObjectToString(PyObject *x, int version)
1921
0
{
1922
0
    return _PyMarshal_WriteObjectToString(x, version, 1);
1923
0
}
1924
1925
/* And an interface for Python programs... */
1926
/*[clinic input]
1927
marshal.dump
1928
1929
    value: object
1930
        Must be a supported type.
1931
    file: object
1932
        Must be a writeable binary file.
1933
    version: int(c_default="Py_MARSHAL_VERSION") = version
1934
        Indicates the data format that dump should use.
1935
    /
1936
    *
1937
    allow_code: bool = True
1938
        Allow to write code objects.
1939
1940
Write the value on the open file.
1941
1942
If the value has (or contains an object that has) an unsupported type, a
1943
ValueError exception is raised - but garbage data will also be written
1944
to the file. The object will not be properly read back by load().
1945
[clinic start generated code]*/
1946
1947
static PyObject *
1948
marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1949
                  int version, int allow_code)
1950
/*[clinic end generated code: output=429e5fd61c2196b9 input=041f7f6669b0aafb]*/
1951
0
{
1952
    /* XXX Quick hack -- need to do this differently */
1953
0
    PyObject *s;
1954
0
    PyObject *res;
1955
1956
0
    s = _PyMarshal_WriteObjectToString(value, version, allow_code);
1957
0
    if (s == NULL)
1958
0
        return NULL;
1959
0
    res = PyObject_CallMethodOneArg(file, &_Py_ID(write), s);
1960
0
    Py_DECREF(s);
1961
0
    return res;
1962
0
}
1963
1964
/*[clinic input]
1965
marshal.load
1966
1967
    file: object
1968
        Must be readable binary file.
1969
    /
1970
    *
1971
    allow_code: bool = True
1972
        Allow to load code objects.
1973
1974
Read one value from the open file and return it.
1975
1976
If no valid value is read (e.g. because the data has a different Python
1977
version's incompatible marshal format), raise EOFError, ValueError or
1978
TypeError.
1979
1980
Note: If an object containing an unsupported type was marshalled with
1981
dump(), load() will substitute None for the unmarshallable type.
1982
[clinic start generated code]*/
1983
1984
static PyObject *
1985
marshal_load_impl(PyObject *module, PyObject *file, int allow_code)
1986
/*[clinic end generated code: output=0c1aaf3546ae3ed3 input=2dca7b570653b82f]*/
1987
0
{
1988
0
    PyObject *data, *result;
1989
0
    RFILE rf;
1990
1991
    /*
1992
     * Make a call to the read method, but read zero bytes.
1993
     * This is to ensure that the object passed in at least
1994
     * has a read method which returns bytes.
1995
     * This can be removed if we guarantee good error handling
1996
     * for r_string()
1997
     */
1998
0
    data = _PyObject_CallMethod(file, &_Py_ID(read), "i", 0);
1999
0
    if (data == NULL)
2000
0
        return NULL;
2001
0
    if (!PyBytes_Check(data)) {
2002
0
        PyErr_Format(PyExc_TypeError,
2003
0
                     "file.read() returned not bytes but %.100s",
2004
0
                     Py_TYPE(data)->tp_name);
2005
0
        result = NULL;
2006
0
    }
2007
0
    else {
2008
0
        rf.allow_code = allow_code;
2009
0
        rf.depth = 0;
2010
0
        rf.fp = NULL;
2011
0
        rf.readable = file;
2012
0
        rf.ptr = rf.end = NULL;
2013
0
        rf.buf = NULL;
2014
0
        if ((rf.refs = PyList_New(0)) != NULL) {
2015
0
            result = read_object(&rf);
2016
0
            Py_DECREF(rf.refs);
2017
0
            if (rf.buf != NULL)
2018
0
                PyMem_Free(rf.buf);
2019
0
        } else
2020
0
            result = NULL;
2021
0
    }
2022
0
    Py_DECREF(data);
2023
0
    return result;
2024
0
}
2025
2026
/*[clinic input]
2027
@permit_long_summary
2028
@permit_long_docstring_body
2029
marshal.dumps
2030
2031
    value: object
2032
        Must be a supported type.
2033
    version: int(c_default="Py_MARSHAL_VERSION") = version
2034
        Indicates the data format that dumps should use.
2035
    /
2036
    *
2037
    allow_code: bool = True
2038
        Allow to write code objects.
2039
2040
Return the bytes object that would be written to a file by dump(value, file).
2041
2042
Raise a ValueError exception if value has (or contains an object that has) an
2043
unsupported type.
2044
[clinic start generated code]*/
2045
2046
static PyObject *
2047
marshal_dumps_impl(PyObject *module, PyObject *value, int version,
2048
                   int allow_code)
2049
/*[clinic end generated code: output=115f90da518d1d49 input=80cd3f30c1637ade]*/
2050
164
{
2051
164
    return _PyMarshal_WriteObjectToString(value, version, allow_code);
2052
164
}
2053
2054
/*[clinic input]
2055
marshal.loads
2056
2057
    bytes: Py_buffer
2058
    /
2059
    *
2060
    allow_code: bool = True
2061
        Allow to load code objects.
2062
2063
Convert the bytes-like object to a value.
2064
2065
If no valid value is found, raise EOFError, ValueError or TypeError.  Extra
2066
bytes in the input are ignored.
2067
[clinic start generated code]*/
2068
2069
static PyObject *
2070
marshal_loads_impl(PyObject *module, Py_buffer *bytes, int allow_code)
2071
/*[clinic end generated code: output=62c0c538d3edc31f input=14de68965b45aaa7]*/
2072
7.32k
{
2073
7.32k
    RFILE rf;
2074
7.32k
    char *s = bytes->buf;
2075
7.32k
    Py_ssize_t n = bytes->len;
2076
7.32k
    PyObject* result;
2077
7.32k
    rf.allow_code = allow_code;
2078
7.32k
    rf.fp = NULL;
2079
7.32k
    rf.readable = NULL;
2080
7.32k
    rf.ptr = s;
2081
7.32k
    rf.end = s + n;
2082
7.32k
    rf.depth = 0;
2083
7.32k
    if ((rf.refs = PyList_New(0)) == NULL)
2084
0
        return NULL;
2085
7.32k
    result = read_object(&rf);
2086
7.32k
    Py_DECREF(rf.refs);
2087
7.32k
    return result;
2088
7.32k
}
2089
2090
static PyMethodDef marshal_methods[] = {
2091
    MARSHAL_DUMP_METHODDEF
2092
    MARSHAL_LOAD_METHODDEF
2093
    MARSHAL_DUMPS_METHODDEF
2094
    MARSHAL_LOADS_METHODDEF
2095
    {NULL,              NULL}           /* sentinel */
2096
};
2097
2098
2099
PyDoc_STRVAR(module_doc,
2100
"This module contains functions that can read and write Python values in\n\
2101
a binary format. The format is specific to Python, but independent of\n\
2102
machine architecture issues.\n\
2103
\n\
2104
Not all Python object types are supported; in general, only objects\n\
2105
whose value is independent from a particular invocation of Python can be\n\
2106
written and read by this module. The following types are supported:\n\
2107
None, integers, floating-point numbers, strings, bytes, bytearrays,\n\
2108
tuples, lists, sets, dictionaries, and code objects, where it\n\
2109
should be understood that tuples, lists and dictionaries are only\n\
2110
supported as long as the values contained therein are themselves\n\
2111
supported; and recursive lists and dictionaries should not be written\n\
2112
(they will cause infinite loops).\n\
2113
\n\
2114
Variables:\n\
2115
\n\
2116
version -- indicates the format that the module uses. Version 0 is the\n\
2117
    historical format, version 1 shares interned strings and version 2\n\
2118
    uses a binary format for floating-point numbers.\n\
2119
    Version 3 shares common object references (New in version 3.4).\n\
2120
\n\
2121
Functions:\n\
2122
\n\
2123
dump() -- write value to a file\n\
2124
load() -- read value from a file\n\
2125
dumps() -- marshal value as a bytes object\n\
2126
loads() -- read value from a bytes-like object");
2127
2128
2129
static int
2130
marshal_module_exec(PyObject *mod)
2131
34
{
2132
34
    if (PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION) < 0) {
2133
0
        return -1;
2134
0
    }
2135
34
    return 0;
2136
34
}
2137
2138
static PyModuleDef_Slot marshalmodule_slots[] = {
2139
    {Py_mod_exec, marshal_module_exec},
2140
    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
2141
    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
2142
    {0, NULL}
2143
};
2144
2145
static struct PyModuleDef marshalmodule = {
2146
    PyModuleDef_HEAD_INIT,
2147
    .m_name = "marshal",
2148
    .m_doc = module_doc,
2149
    .m_methods = marshal_methods,
2150
    .m_slots = marshalmodule_slots,
2151
};
2152
2153
PyMODINIT_FUNC
2154
PyMarshal_Init(void)
2155
34
{
2156
34
    return PyModuleDef_Init(&marshalmodule);
2157
34
}