Coverage Report

Created: 2026-03-23 06:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/marshal.c
Line
Count
Source
1
2
/* Write Python objects to files and read them back.
3
   This is primarily intended for writing and reading compiled Python code,
4
   even though dicts, lists, sets and frozensets, not commonly seen in
5
   code objects, are supported.
6
   Version 3 of this protocol properly supports circular links
7
   and sharing. */
8
9
#include "Python.h"
10
#include "pycore_call.h"             // _PyObject_CallNoArgs()
11
#include "pycore_code.h"             // _PyCode_New()
12
#include "pycore_hashtable.h"        // _Py_hashtable_t
13
#include "pycore_long.h"             // _PyLong_IsZero()
14
#include "pycore_object.h"           // _PyObject_IsUniquelyReferenced
15
#include "pycore_pystate.h"          // _PyInterpreterState_GET()
16
#include "pycore_setobject.h"        // _PySet_NextEntryRef()
17
#include "pycore_tuple.h"            // _PyTuple_FromPairSteal
18
#include "pycore_unicodeobject.h"    // _PyUnicode_InternImmortal()
19
20
#include "marshal.h"                 // Py_MARSHAL_VERSION
21
22
#ifdef __APPLE__
23
#  include "TargetConditionals.h"
24
#endif /* __APPLE__ */
25
26
27
/*[clinic input]
28
module marshal
29
[clinic start generated code]*/
30
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
31
32
#include "clinic/marshal.c.h"
33
34
/* High water mark to determine when the marshalled object is dangerously deep
35
 * and risks coring the interpreter.  When the object stack gets this deep,
36
 * raise an exception instead of continuing.
37
 * On Windows debug builds, reduce this value.
38
 *
39
 * BUG: https://bugs.python.org/issue33720
40
 * On Windows PGO builds, the r_object function overallocates its stack and
41
 * can cause a stack overflow. We reduce the maximum depth for all Windows
42
 * releases to protect against this.
43
 * #if defined(MS_WINDOWS) && defined(Py_DEBUG)
44
 */
45
#if defined(MS_WINDOWS)
46
#  define MAX_MARSHAL_STACK_DEPTH 1000
47
#elif defined(__wasi__)
48
#  define MAX_MARSHAL_STACK_DEPTH 1500
49
// TARGET_OS_IPHONE covers any non-macOS Apple platform.
50
// It won't be defined on older macOS SDKs
51
#elif defined(__APPLE__) && defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
52
#  define MAX_MARSHAL_STACK_DEPTH 1500
53
#else
54
4.59M
#  define MAX_MARSHAL_STACK_DEPTH 2000
55
#endif
56
57
/* Supported types */
58
0
#define TYPE_NULL               '0'
59
96.5k
#define TYPE_NONE               'N'
60
12.6k
#define TYPE_FALSE              'F'
61
11.7k
#define TYPE_TRUE               'T'
62
0
#define TYPE_STOPITER           'S'
63
407
#define TYPE_ELLIPSIS           '.'
64
695
#define TYPE_BINARY_FLOAT       'g'  // Version 0 uses TYPE_FLOAT instead.
65
3
#define TYPE_BINARY_COMPLEX     'y'  // Version 0 uses TYPE_COMPLEX instead.
66
443
#define TYPE_LONG               'l'  // See also TYPE_INT.
67
522k
#define TYPE_STRING             's'  // Bytes. (Name comes from Python 2.)
68
70
#define TYPE_TUPLE              '('  // See also TYPE_SMALL_TUPLE.
69
0
#define TYPE_LIST               '['
70
0
#define TYPE_DICT               '{'
71
0
#define TYPE_FROZENDICT         '}'
72
173k
#define TYPE_CODE               'c'
73
3.54k
#define TYPE_UNICODE            'u'
74
#define TYPE_UNKNOWN            '?'
75
// added in version 2:
76
1.59k
#define TYPE_SET                '<'
77
533
#define TYPE_FROZENSET          '>'
78
// added in version 5:
79
3.41k
#define TYPE_SLICE              ':'
80
// Remember to update the version and documentation when adding new types.
81
82
/* Special cases for unicode strings (added in version 4) */
83
202
#define TYPE_INTERNED           't' // Version 1+
84
26.2k
#define TYPE_ASCII              'a'
85
0
#define TYPE_ASCII_INTERNED     'A'
86
1.27M
#define TYPE_SHORT_ASCII        'z'
87
1.11M
#define TYPE_SHORT_ASCII_INTERNED 'Z'
88
89
/* Special cases for small objects */
90
25.5k
#define TYPE_INT                'i'  // All versions. 32-bit encoding.
91
465k
#define TYPE_SMALL_TUPLE        ')'  // Version 4+
92
93
/* Supported for backwards compatibility */
94
0
#define TYPE_COMPLEX            'x'  // Generated for version 0 only.
95
0
#define TYPE_FLOAT              'f'  // Generated for version 0 only.
96
0
#define TYPE_INT64              'I'  // Not generated any more.
97
98
/* References (added in version 3) */
99
1.90M
#define TYPE_REF                'r'
100
9.06M
#define FLAG_REF                '\x80' /* with a type, add obj to index */
101
102
103
// Error codes:
104
77.5k
#define WFERR_OK 0
105
22
#define WFERR_UNMARSHALLABLE 1
106
0
#define WFERR_NESTEDTOODEEP 2
107
11
#define WFERR_NOMEMORY 3
108
0
#define WFERR_CODE_NOT_ALLOWED 4
109
110
typedef struct {
111
    FILE *fp;
112
    int error;  /* see WFERR_* values */
113
    int depth;
114
    PyObject *str;
115
    char *ptr;
116
    const char *end;
117
    char *buf;
118
    _Py_hashtable_t *hashtable;
119
    int version;
120
    int allow_code;
121
} WFILE;
122
123
368k
#define w_byte(c, p) do {                               \
124
368k
        if ((p)->ptr != (p)->end || w_reserve((p), 1))  \
125
368k
            *(p)->ptr++ = (c);                          \
126
368k
    } while(0)
127
128
static void
129
w_flush(WFILE *p)
130
0
{
131
0
    assert(p->fp != NULL);
132
0
    fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
133
0
    p->ptr = p->buf;
134
0
}
135
136
static int
137
w_reserve(WFILE *p, Py_ssize_t needed)
138
522
{
139
522
    Py_ssize_t pos, size, delta;
140
522
    if (p->ptr == NULL)
141
0
        return 0; /* An error already occurred */
142
522
    if (p->fp != NULL) {
143
0
        w_flush(p);
144
0
        return needed <= p->end - p->ptr;
145
0
    }
146
522
    assert(p->str != NULL);
147
522
    pos = p->ptr - p->buf;
148
522
    size = PyBytes_GET_SIZE(p->str);
149
522
    if (size > 16*1024*1024)
150
0
        delta = (size >> 3);            /* 12.5% overallocation */
151
522
    else
152
522
        delta = size + 1024;
153
522
    delta = Py_MAX(delta, needed);
154
522
    if (delta > PY_SSIZE_T_MAX - size) {
155
0
        p->error = WFERR_NOMEMORY;
156
0
        return 0;
157
0
    }
158
522
    size += delta;
159
522
    if (_PyBytes_Resize(&p->str, size) != 0) {
160
0
        p->end = p->ptr = p->buf = NULL;
161
0
        return 0;
162
0
    }
163
522
    else {
164
522
        p->buf = PyBytes_AS_STRING(p->str);
165
522
        p->ptr = p->buf + pos;
166
522
        p->end = p->buf + size;
167
522
        return 1;
168
522
    }
169
522
}
170
171
static void
172
w_string(const void *s, Py_ssize_t n, WFILE *p)
173
25.7k
{
174
25.7k
    Py_ssize_t m;
175
25.7k
    if (!n || p->ptr == NULL)
176
203
        return;
177
25.5k
    m = p->end - p->ptr;
178
25.5k
    if (p->fp != NULL) {
179
0
        if (n <= m) {
180
0
            memcpy(p->ptr, s, n);
181
0
            p->ptr += n;
182
0
        }
183
0
        else {
184
0
            w_flush(p);
185
0
            fwrite(s, 1, n, p->fp);
186
0
        }
187
0
    }
188
25.5k
    else {
189
25.5k
        if (n <= m || w_reserve(p, n - m)) {
190
25.5k
            memcpy(p->ptr, s, n);
191
25.5k
            p->ptr += n;
192
25.5k
        }
193
25.5k
    }
194
25.5k
}
195
196
static void
197
w_short(int x, WFILE *p)
198
18
{
199
18
    w_byte((char)( x      & 0xff), p);
200
18
    w_byte((char)((x>> 8) & 0xff), p);
201
18
}
202
203
static void
204
w_long(long x, WFILE *p)
205
66.8k
{
206
66.8k
    w_byte((char)( x      & 0xff), p);
207
66.8k
    w_byte((char)((x>> 8) & 0xff), p);
208
66.8k
    w_byte((char)((x>>16) & 0xff), p);
209
66.8k
    w_byte((char)((x>>24) & 0xff), p);
210
66.8k
}
211
212
563k
#define SIZE32_MAX  0x7FFFFFFF
213
214
#if SIZEOF_SIZE_T > 4
215
9.68k
# define W_SIZE(n, p)  do {                     \
216
9.68k
        if ((n) > SIZE32_MAX) {                 \
217
0
            (p)->depth--;                       \
218
0
            (p)->error = WFERR_UNMARSHALLABLE;  \
219
0
            return;                             \
220
0
        }                                       \
221
9.68k
        w_long((long)(n), p);                   \
222
9.68k
    } while(0)
223
#else
224
# define W_SIZE  w_long
225
#endif
226
227
static void
228
w_pstring(const void *s, Py_ssize_t n, WFILE *p)
229
9.67k
{
230
9.67k
        W_SIZE(n, p);
231
9.67k
        w_string(s, n, p);
232
9.67k
}
233
234
static void
235
w_short_pstring(const void *s, Py_ssize_t n, WFILE *p)
236
16.0k
{
237
16.0k
    w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
238
16.0k
    w_string(s, n, p);
239
16.0k
}
240
241
/* We assume that Python ints are stored internally in base some power of
242
   2**15; for the sake of portability we'll always read and write them in base
243
   exactly 2**15. */
244
245
3.88k
#define PyLong_MARSHAL_SHIFT 15
246
1.71k
#define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
247
18
#define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
248
249
38.7k
#define W_TYPE(t, p) do { \
250
38.7k
    w_byte((t) | flag, (p)); \
251
38.7k
} while(0)
252
253
static PyObject *
254
_PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code);
255
256
#define _r_digits(bitsize)                                                \
257
static void                                                               \
258
_r_digits##bitsize(const uint ## bitsize ## _t *digits, Py_ssize_t n,     \
259
2
                   uint8_t negative, Py_ssize_t marshal_ratio, WFILE *p)  \
260
2
{                                                                         \
261
2
    /* set l to number of base PyLong_MARSHAL_BASE digits */              \
262
2
    Py_ssize_t l = (n - 1)*marshal_ratio;                                 \
263
2
    uint ## bitsize ## _t d = digits[n - 1];                              \
264
2
                                                                          \
265
2
    assert(marshal_ratio > 0);                                            \
266
2
    assert(n >= 1);                                                       \
267
2
    assert(d != 0); /* a PyLong is always normalized */                   \
268
2
    do {                                                                  \
269
2
        d >>= PyLong_MARSHAL_SHIFT;                                       \
270
2
        l++;                                                              \
271
2
    } while (d != 0);                                                     \
272
2
    if (l > SIZE32_MAX) {                                                 \
273
0
        p->depth--;                                                       \
274
0
        p->error = WFERR_UNMARSHALLABLE;                                  \
275
0
        return;                                                           \
276
0
    }                                                                     \
277
2
    w_long((long)(negative ? -l : l), p);                                 \
278
2
                                                                          \
279
6
    for (Py_ssize_t i = 0; i < n - 1; i++) {                              \
280
4
        d = digits[i];                                                    \
281
12
        for (Py_ssize_t j = 0; j < marshal_ratio; j++) {                  \
282
8
            w_short(d & PyLong_MARSHAL_MASK, p);                          \
283
8
            d >>= PyLong_MARSHAL_SHIFT;                                   \
284
8
        }                                                                 \
285
4
        assert(d == 0);                                                   \
286
4
    }                                                                     \
287
2
    d = digits[n - 1];                                                    \
288
2
    do {                                                                  \
289
2
        w_short(d & PyLong_MARSHAL_MASK, p);                              \
290
2
        d >>= PyLong_MARSHAL_SHIFT;                                       \
291
2
    } while (d != 0);                                                     \
292
2
}
293
0
_r_digits(16)
294
2
_r_digits(32)
295
#undef _r_digits
296
297
static void
298
w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
299
4
{
300
4
    W_TYPE(TYPE_LONG, p);
301
4
    if (_PyLong_IsZero(ob)) {
302
0
        w_long((long)0, p);
303
0
        return;
304
0
    }
305
306
4
    PyLongExport long_export;
307
308
4
    if (PyLong_Export((PyObject *)ob, &long_export) < 0) {
309
0
        p->depth--;
310
0
        p->error = WFERR_UNMARSHALLABLE;
311
0
        return;
312
0
    }
313
4
    if (!long_export.digits) {
314
2
        int8_t sign = long_export.value < 0 ? -1 : 1;
315
2
        uint64_t abs_value = _Py_ABS_CAST(uint64_t, long_export.value);
316
2
        uint64_t d = abs_value;
317
2
        long l = 0;
318
319
        /* set l to number of base PyLong_MARSHAL_BASE digits */
320
8
        do {
321
8
            d >>= PyLong_MARSHAL_SHIFT;
322
8
            l += sign;
323
8
        } while (d);
324
2
        w_long(l, p);
325
326
2
        d = abs_value;
327
8
        do {
328
8
            w_short(d & PyLong_MARSHAL_MASK, p);
329
8
            d >>= PyLong_MARSHAL_SHIFT;
330
8
        } while (d);
331
2
        return;
332
2
    }
333
334
2
    const PyLongLayout *layout = PyLong_GetNativeLayout();
335
2
    Py_ssize_t marshal_ratio = layout->bits_per_digit/PyLong_MARSHAL_SHIFT;
336
337
    /* must be a multiple of PyLong_MARSHAL_SHIFT */
338
2
    assert(layout->bits_per_digit % PyLong_MARSHAL_SHIFT == 0);
339
2
    assert(layout->bits_per_digit >= PyLong_MARSHAL_SHIFT);
340
341
    /* other assumptions on PyLongObject internals */
342
2
    assert(layout->bits_per_digit <= 32);
343
2
    assert(layout->digits_order == -1);
344
2
    assert(layout->digit_endianness == (PY_LITTLE_ENDIAN ? -1 : 1));
345
2
    assert(layout->digit_size == 2 || layout->digit_size == 4);
346
347
2
    if (layout->digit_size == 4) {
348
2
        _r_digits32(long_export.digits, long_export.ndigits,
349
2
                    long_export.negative, marshal_ratio, p);
350
2
    }
351
0
    else {
352
0
        _r_digits16(long_export.digits, long_export.ndigits,
353
0
                    long_export.negative, marshal_ratio, p);
354
0
    }
355
2
    PyLong_FreeExport(&long_export);
356
2
}
357
358
static void
359
w_float_bin(double v, WFILE *p)
360
15
{
361
15
    char buf[8];
362
15
    if (PyFloat_Pack8(v, buf, 1) < 0) {
363
0
        p->error = WFERR_UNMARSHALLABLE;
364
0
        return;
365
0
    }
366
15
    w_string(buf, 8, p);
367
15
}
368
369
static void
370
w_float_str(double v, WFILE *p)
371
0
{
372
0
    char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
373
0
    if (!buf) {
374
0
        p->error = WFERR_NOMEMORY;
375
0
        return;
376
0
    }
377
0
    w_short_pstring(buf, strlen(buf), p);
378
0
    PyMem_Free(buf);
379
0
}
380
381
static int
382
w_ref(PyObject *v, char *flag, WFILE *p)
383
74.5k
{
384
74.5k
    _Py_hashtable_entry_t *entry;
385
74.5k
    int w;
386
387
74.5k
    if (p->version < 3 || p->hashtable == NULL)
388
0
        return 0; /* not writing object references */
389
390
    /* If it has only one reference, it definitely isn't shared.
391
     * But we use TYPE_REF always for interned string, to PYC file stable
392
     * as possible.
393
     */
394
74.5k
    if (_PyObject_IsUniquelyReferenced(v) &&
395
20.0k
            !(PyUnicode_CheckExact(v) && PyUnicode_CHECK_INTERNED(v))) {
396
18.4k
        return 0;
397
18.4k
    }
398
399
56.0k
    entry = _Py_hashtable_get_entry(p->hashtable, v);
400
56.0k
    if (entry != NULL) {
401
        /* write the reference index to the stream */
402
35.7k
        w = (int)(uintptr_t)entry->value;
403
        /* we don't store "long" indices in the dict */
404
35.7k
        assert(0 <= w && w <= 0x7fffffff);
405
35.7k
        w_byte(TYPE_REF, p);
406
35.7k
        w_long(w, p);
407
35.7k
        return 1;
408
35.7k
    } else {
409
20.3k
        size_t s = p->hashtable->nentries;
410
        /* we don't support long indices */
411
20.3k
        if (s >= 0x7fffffff) {
412
0
            PyErr_SetString(PyExc_ValueError, "too many objects");
413
0
            goto err;
414
0
        }
415
20.3k
        w = (int)s;
416
20.3k
        if (_Py_hashtable_set(p->hashtable, Py_NewRef(v),
417
20.3k
                              (void *)(uintptr_t)w) < 0) {
418
0
            Py_DECREF(v);
419
0
            goto err;
420
0
        }
421
20.3k
        *flag |= FLAG_REF;
422
20.3k
        return 0;
423
20.3k
    }
424
0
err:
425
0
    p->error = WFERR_UNMARSHALLABLE;
426
0
    return 1;
427
56.0k
}
428
429
static void
430
w_complex_object(PyObject *v, char flag, WFILE *p);
431
432
static void
433
w_object(PyObject *v, WFILE *p)
434
77.1k
{
435
77.1k
    char flag = '\0';
436
437
77.1k
    if (p->error != WFERR_OK) {
438
0
        return;
439
0
    }
440
441
77.1k
    p->depth++;
442
443
77.1k
    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
444
0
        p->error = WFERR_NESTEDTOODEEP;
445
0
    }
446
77.1k
    else if (v == NULL) {
447
0
        w_byte(TYPE_NULL, p);
448
0
    }
449
77.1k
    else if (v == Py_None) {
450
2.00k
        w_byte(TYPE_NONE, p);
451
2.00k
    }
452
75.1k
    else if (v == PyExc_StopIteration) {
453
0
        w_byte(TYPE_STOPITER, p);
454
0
    }
455
75.1k
    else if (v == Py_Ellipsis) {
456
3
        w_byte(TYPE_ELLIPSIS, p);
457
3
    }
458
75.0k
    else if (v == Py_False) {
459
342
        w_byte(TYPE_FALSE, p);
460
342
    }
461
74.7k
    else if (v == Py_True) {
462
227
        w_byte(TYPE_TRUE, p);
463
227
    }
464
74.5k
    else if (!w_ref(v, &flag, p))
465
38.7k
        w_complex_object(v, flag, p);
466
467
77.1k
    p->depth--;
468
77.1k
}
469
470
static void
471
w_complex_object(PyObject *v, char flag, WFILE *p)
472
38.7k
{
473
38.7k
    Py_ssize_t i, n;
474
475
38.7k
    if (PyLong_CheckExact(v)) {
476
1.80k
        int overflow;
477
1.80k
        long x = PyLong_AsLongAndOverflow(v, &overflow);
478
1.80k
        if (overflow) {
479
2
            w_PyLong((PyLongObject *)v, flag, p);
480
2
        }
481
1.80k
        else {
482
1.80k
#if SIZEOF_LONG > 4
483
1.80k
            long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
484
1.80k
            if (y && y != -1) {
485
                /* Too large for TYPE_INT */
486
2
                w_PyLong((PyLongObject*)v, flag, p);
487
2
            }
488
1.80k
            else
489
1.80k
#endif
490
1.80k
            {
491
1.80k
                W_TYPE(TYPE_INT, p);
492
1.80k
                w_long(x, p);
493
1.80k
            }
494
1.80k
        }
495
1.80k
    }
496
36.9k
    else if (PyFloat_CheckExact(v)) {
497
13
        if (p->version > 1) {
498
13
            W_TYPE(TYPE_BINARY_FLOAT, p);
499
13
            w_float_bin(PyFloat_AS_DOUBLE(v), p);
500
13
        }
501
0
        else {
502
0
            W_TYPE(TYPE_FLOAT, p);
503
0
            w_float_str(PyFloat_AS_DOUBLE(v), p);
504
0
        }
505
13
    }
506
36.9k
    else if (PyComplex_CheckExact(v)) {
507
1
        if (p->version > 1) {
508
1
            W_TYPE(TYPE_BINARY_COMPLEX, p);
509
1
            w_float_bin(PyComplex_RealAsDouble(v), p);
510
1
            w_float_bin(PyComplex_ImagAsDouble(v), p);
511
1
        }
512
0
        else {
513
0
            W_TYPE(TYPE_COMPLEX, p);
514
0
            w_float_str(PyComplex_RealAsDouble(v), p);
515
0
            w_float_str(PyComplex_ImagAsDouble(v), p);
516
0
        }
517
1
    }
518
36.9k
    else if (PyBytes_CheckExact(v)) {
519
9.40k
        W_TYPE(TYPE_STRING, p);
520
9.40k
        w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
521
9.40k
    }
522
27.5k
    else if (PyUnicode_CheckExact(v)) {
523
16.3k
        if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
524
16.2k
            int is_short = PyUnicode_GET_LENGTH(v) < 256;
525
16.2k
            if (is_short) {
526
16.0k
                if (PyUnicode_CHECK_INTERNED(v))
527
14.3k
                    W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
528
1.73k
                else
529
1.73k
                    W_TYPE(TYPE_SHORT_ASCII, p);
530
16.0k
                w_short_pstring(PyUnicode_1BYTE_DATA(v),
531
16.0k
                                PyUnicode_GET_LENGTH(v), p);
532
16.0k
            }
533
202
            else {
534
202
                if (PyUnicode_CHECK_INTERNED(v))
535
0
                    W_TYPE(TYPE_ASCII_INTERNED, p);
536
202
                else
537
202
                    W_TYPE(TYPE_ASCII, p);
538
202
                w_pstring(PyUnicode_1BYTE_DATA(v),
539
202
                          PyUnicode_GET_LENGTH(v), p);
540
202
            }
541
16.2k
        }
542
65
        else {
543
65
            PyObject *utf8;
544
65
            utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
545
65
            if (utf8 == NULL) {
546
0
                p->depth--;
547
0
                p->error = WFERR_UNMARSHALLABLE;
548
0
                return;
549
0
            }
550
65
            if (p->version >= 3 &&  PyUnicode_CHECK_INTERNED(v))
551
0
                W_TYPE(TYPE_INTERNED, p);
552
65
            else
553
65
                W_TYPE(TYPE_UNICODE, p);
554
65
            w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
555
65
            Py_DECREF(utf8);
556
65
        }
557
16.3k
    }
558
11.1k
    else if (PyTuple_CheckExact(v)) {
559
7.89k
        n = PyTuple_GET_SIZE(v);
560
7.89k
        if (p->version >= 4 && n < 256) {
561
7.89k
            W_TYPE(TYPE_SMALL_TUPLE, p);
562
7.89k
            w_byte((unsigned char)n, p);
563
7.89k
        }
564
0
        else {
565
0
            W_TYPE(TYPE_TUPLE, p);
566
0
            W_SIZE(n, p);
567
0
        }
568
51.9k
        for (i = 0; i < n; i++) {
569
44.0k
            w_object(PyTuple_GET_ITEM(v, i), p);
570
44.0k
        }
571
7.89k
    }
572
3.30k
    else if (PyList_CheckExact(v)) {
573
0
        W_TYPE(TYPE_LIST, p);
574
0
        n = PyList_GET_SIZE(v);
575
0
        W_SIZE(n, p);
576
0
        for (i = 0; i < n; i++) {
577
0
            w_object(PyList_GET_ITEM(v, i), p);
578
0
        }
579
0
    }
580
3.30k
    else if (PyAnyDict_CheckExact(v)) {
581
0
        Py_ssize_t pos;
582
0
        PyObject *key, *value;
583
0
        if (PyFrozenDict_CheckExact(v)) {
584
0
            if (p->version < 6) {
585
0
                w_byte(TYPE_UNKNOWN, p);
586
0
                p->error = WFERR_UNMARSHALLABLE;
587
0
                return;
588
0
            }
589
590
0
            W_TYPE(TYPE_FROZENDICT, p);
591
0
        }
592
0
        else {
593
0
            W_TYPE(TYPE_DICT, p);
594
0
        }
595
        /* This one is NULL object terminated! */
596
0
        pos = 0;
597
0
        while (PyDict_Next(v, &pos, &key, &value)) {
598
0
            w_object(key, p);
599
0
            w_object(value, p);
600
0
        }
601
0
        w_object((PyObject *)NULL, p);
602
0
    }
603
3.30k
    else if (PyAnySet_CheckExact(v)) {
604
11
        PyObject *value;
605
11
        Py_ssize_t pos = 0;
606
11
        Py_hash_t hash;
607
608
11
        if (PyFrozenSet_CheckExact(v))
609
11
            W_TYPE(TYPE_FROZENSET, p);
610
0
        else
611
0
            W_TYPE(TYPE_SET, p);
612
11
        n = PySet_GET_SIZE(v);
613
11
        W_SIZE(n, p);
614
        // bpo-37596: To support reproducible builds, sets and frozensets need
615
        // to have their elements serialized in a consistent order (even when
616
        // they have been scrambled by hash randomization). To ensure this, we
617
        // use an order equivalent to sorted(v, key=marshal.dumps):
618
11
        PyObject *pairs = PyList_New(n);
619
11
        if (pairs == NULL) {
620
0
            p->error = WFERR_NOMEMORY;
621
0
            return;
622
0
        }
623
11
        Py_ssize_t i = 0;
624
11
        Py_BEGIN_CRITICAL_SECTION(v);
625
76
        while (_PySet_NextEntryRef(v, &pos, &value, &hash)) {
626
65
            PyObject *dump = _PyMarshal_WriteObjectToString(value,
627
65
                                    p->version, p->allow_code);
628
65
            if (dump == NULL) {
629
0
                p->error = WFERR_UNMARSHALLABLE;
630
0
                Py_DECREF(value);
631
0
                break;
632
0
            }
633
65
            PyObject *pair = _PyTuple_FromPairSteal(dump, value);
634
65
            if (pair == NULL) {
635
0
                p->error = WFERR_NOMEMORY;
636
0
                break;
637
0
            }
638
65
            PyList_SET_ITEM(pairs, i++, pair);
639
65
        }
640
11
        Py_END_CRITICAL_SECTION();
641
11
        if (p->error == WFERR_UNMARSHALLABLE || p->error == WFERR_NOMEMORY) {
642
0
            Py_DECREF(pairs);
643
0
            return;
644
0
        }
645
11
        assert(i == n);
646
11
        if (PyList_Sort(pairs)) {
647
0
            p->error = WFERR_NOMEMORY;
648
0
            Py_DECREF(pairs);
649
0
            return;
650
0
        }
651
76
        for (Py_ssize_t i = 0; i < n; i++) {
652
65
            PyObject *pair = PyList_GET_ITEM(pairs, i);
653
65
            value = PyTuple_GET_ITEM(pair, 1);
654
65
            w_object(value, p);
655
65
        }
656
11
        Py_DECREF(pairs);
657
11
    }
658
3.29k
    else if (PyCode_Check(v)) {
659
3.26k
        if (!p->allow_code) {
660
0
            p->error = WFERR_CODE_NOT_ALLOWED;
661
0
            return;
662
0
        }
663
3.26k
        PyCodeObject *co = (PyCodeObject *)v;
664
3.26k
        PyObject *co_code = _PyCode_GetCode(co);
665
3.26k
        if (co_code == NULL) {
666
0
            p->error = WFERR_NOMEMORY;
667
0
            return;
668
0
        }
669
3.26k
        W_TYPE(TYPE_CODE, p);
670
3.26k
        w_long(co->co_argcount, p);
671
3.26k
        w_long(co->co_posonlyargcount, p);
672
3.26k
        w_long(co->co_kwonlyargcount, p);
673
3.26k
        w_long(co->co_stacksize, p);
674
3.26k
        w_long(co->co_flags, p);
675
3.26k
        w_object(co_code, p);
676
3.26k
        w_object(co->co_consts, p);
677
3.26k
        w_object(co->co_names, p);
678
3.26k
        w_object(co->co_localsplusnames, p);
679
3.26k
        w_object(co->co_localspluskinds, p);
680
3.26k
        w_object(co->co_filename, p);
681
3.26k
        w_object(co->co_name, p);
682
3.26k
        w_object(co->co_qualname, p);
683
3.26k
        w_long(co->co_firstlineno, p);
684
3.26k
        w_object(co->co_linetable, p);
685
3.26k
        w_object(co->co_exceptiontable, p);
686
3.26k
        Py_DECREF(co_code);
687
3.26k
    }
688
25
    else if (PyObject_CheckBuffer(v)) {
689
        /* Write unknown bytes-like objects as a bytes object */
690
0
        Py_buffer view;
691
0
        if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
692
0
            w_byte(TYPE_UNKNOWN, p);
693
0
            p->depth--;
694
0
            p->error = WFERR_UNMARSHALLABLE;
695
0
            return;
696
0
        }
697
0
        W_TYPE(TYPE_STRING, p);
698
0
        w_pstring(view.buf, view.len, p);
699
0
        PyBuffer_Release(&view);
700
0
    }
701
25
    else if (PySlice_Check(v)) {
702
25
        if (p->version < 5) {
703
0
            w_byte(TYPE_UNKNOWN, p);
704
0
            p->error = WFERR_UNMARSHALLABLE;
705
0
            return;
706
0
        }
707
25
        PySliceObject *slice = (PySliceObject *)v;
708
25
        W_TYPE(TYPE_SLICE, p);
709
25
        w_object(slice->start, p);
710
25
        w_object(slice->stop, p);
711
25
        w_object(slice->step, p);
712
25
    }
713
0
    else {
714
0
        W_TYPE(TYPE_UNKNOWN, p);
715
0
        p->error = WFERR_UNMARSHALLABLE;
716
0
    }
717
38.7k
}
718
719
static void
720
w_decref_entry(void *key)
721
20.3k
{
722
20.3k
    PyObject *entry_key = (PyObject *)key;
723
20.3k
    Py_XDECREF(entry_key);
724
20.3k
}
725
726
static int
727
w_init_refs(WFILE *wf, int version)
728
228
{
729
228
    if (version >= 3) {
730
228
        wf->hashtable = _Py_hashtable_new_full(_Py_hashtable_hash_ptr,
731
228
                                               _Py_hashtable_compare_direct,
732
228
                                               w_decref_entry, NULL, NULL);
733
228
        if (wf->hashtable == NULL) {
734
0
            PyErr_NoMemory();
735
0
            return -1;
736
0
        }
737
228
    }
738
228
    return 0;
739
228
}
740
741
static void
742
w_clear_refs(WFILE *wf)
743
228
{
744
228
    if (wf->hashtable != NULL) {
745
228
        _Py_hashtable_destroy(wf->hashtable);
746
228
    }
747
228
}
748
749
/* version currently has no effect for writing ints. */
750
/* Note that while the documentation states that this function
751
 * can error, currently it never does. Setting an exception in
752
 * this function should be regarded as an API-breaking change.
753
 */
754
void
755
PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
756
0
{
757
0
    char buf[4];
758
0
    WFILE wf;
759
0
    memset(&wf, 0, sizeof(wf));
760
0
    wf.fp = fp;
761
0
    wf.ptr = wf.buf = buf;
762
0
    wf.end = wf.ptr + sizeof(buf);
763
0
    wf.error = WFERR_OK;
764
0
    wf.version = version;
765
0
    w_long(x, &wf);
766
0
    w_flush(&wf);
767
0
}
768
769
void
770
PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
771
0
{
772
0
    char buf[BUFSIZ];
773
0
    WFILE wf;
774
0
    if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
775
0
        return; /* caller must check PyErr_Occurred() */
776
0
    }
777
0
    memset(&wf, 0, sizeof(wf));
778
0
    wf.fp = fp;
779
0
    wf.ptr = wf.buf = buf;
780
0
    wf.end = wf.ptr + sizeof(buf);
781
0
    wf.error = WFERR_OK;
782
0
    wf.version = version;
783
0
    wf.allow_code = 1;
784
0
    if (w_init_refs(&wf, version)) {
785
0
        return; /* caller must check PyErr_Occurred() */
786
0
    }
787
0
    w_object(x, &wf);
788
0
    w_clear_refs(&wf);
789
0
    w_flush(&wf);
790
0
}
791
792
typedef struct {
793
    FILE *fp;
794
    int depth;
795
    PyObject *readable;  /* Stream-like object being read from */
796
    const char *ptr;
797
    const char *end;
798
    char *buf;
799
    Py_ssize_t buf_size;
800
    PyObject *refs;  /* a list */
801
    int allow_code;
802
} RFILE;
803
804
static const char *
805
r_string(Py_ssize_t n, RFILE *p)
806
5.35M
{
807
5.35M
    Py_ssize_t read = -1;
808
809
5.35M
    if (p->ptr != NULL) {
810
        /* Fast path for loads() */
811
5.35M
        const char *res = p->ptr;
812
5.35M
        Py_ssize_t left = p->end - p->ptr;
813
5.35M
        if (left < n) {
814
0
            PyErr_SetString(PyExc_EOFError,
815
0
                            "marshal data too short");
816
0
            return NULL;
817
0
        }
818
5.35M
        p->ptr += n;
819
5.35M
        return res;
820
5.35M
    }
821
0
    if (p->buf == NULL) {
822
0
        p->buf = PyMem_Malloc(n);
823
0
        if (p->buf == NULL) {
824
0
            PyErr_NoMemory();
825
0
            return NULL;
826
0
        }
827
0
        p->buf_size = n;
828
0
    }
829
0
    else if (p->buf_size < n) {
830
0
        char *tmp = PyMem_Realloc(p->buf, n);
831
0
        if (tmp == NULL) {
832
0
            PyErr_NoMemory();
833
0
            return NULL;
834
0
        }
835
0
        p->buf = tmp;
836
0
        p->buf_size = n;
837
0
    }
838
839
0
    if (!p->readable) {
840
0
        assert(p->fp != NULL);
841
0
        read = fread(p->buf, 1, n, p->fp);
842
0
    }
843
0
    else {
844
0
        PyObject *res, *mview;
845
0
        Py_buffer buf;
846
847
0
        if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
848
0
            return NULL;
849
0
        mview = PyMemoryView_FromBuffer(&buf);
850
0
        if (mview == NULL)
851
0
            return NULL;
852
853
0
        res = _PyObject_CallMethod(p->readable, &_Py_ID(readinto), "N", mview);
854
0
        if (res != NULL) {
855
0
            read = PyNumber_AsSsize_t(res, PyExc_ValueError);
856
0
            Py_DECREF(res);
857
0
        }
858
0
    }
859
0
    if (read != n) {
860
0
        if (!PyErr_Occurred()) {
861
0
            if (read > n)
862
0
                PyErr_Format(PyExc_ValueError,
863
0
                             "read() returned too much data: "
864
0
                             "%zd bytes requested, %zd returned",
865
0
                             n, read);
866
0
            else
867
0
                PyErr_SetString(PyExc_EOFError,
868
0
                                "EOF read where not expected");
869
0
        }
870
0
        return NULL;
871
0
    }
872
0
    return p->buf;
873
0
}
874
875
static int
876
r_byte(RFILE *p)
877
6.26M
{
878
6.26M
    if (p->ptr != NULL) {
879
6.26M
        if (p->ptr < p->end) {
880
6.26M
            return (unsigned char) *p->ptr++;
881
6.26M
        }
882
6.26M
    }
883
0
    else if (!p->readable) {
884
0
        assert(p->fp);
885
0
        int c = getc(p->fp);
886
0
        if (c != EOF) {
887
0
            return c;
888
0
        }
889
0
    }
890
0
    else {
891
0
        const char *ptr = r_string(1, p);
892
0
        if (ptr != NULL) {
893
0
            return *(const unsigned char *) ptr;
894
0
        }
895
0
        return EOF;
896
0
    }
897
0
    PyErr_SetString(PyExc_EOFError,
898
0
                    "EOF read where not expected");
899
0
    return EOF;
900
6.26M
}
901
902
static int
903
r_short(RFILE *p)
904
1.69k
{
905
1.69k
    short x = -1;
906
1.69k
    const unsigned char *buffer;
907
908
1.69k
    buffer = (const unsigned char *) r_string(2, p);
909
1.69k
    if (buffer != NULL) {
910
1.69k
        x = buffer[0];
911
1.69k
        x |= buffer[1] << 8;
912
        /* Sign-extension, in case short greater than 16 bits */
913
1.69k
        x |= -(x & 0x8000);
914
1.69k
    }
915
1.69k
    return x;
916
1.69k
}
917
918
static long
919
r_long(RFILE *p)
920
3.52M
{
921
3.52M
    long x = -1;
922
3.52M
    const unsigned char *buffer;
923
924
3.52M
    buffer = (const unsigned char *) r_string(4, p);
925
3.52M
    if (buffer != NULL) {
926
3.52M
        x = buffer[0];
927
3.52M
        x |= (long)buffer[1] << 8;
928
3.52M
        x |= (long)buffer[2] << 16;
929
3.52M
        x |= (long)buffer[3] << 24;
930
3.52M
#if SIZEOF_LONG > 4
931
        /* Sign extension for 64-bit machines */
932
3.52M
        x |= -(x & 0x80000000L);
933
3.52M
#endif
934
3.52M
    }
935
3.52M
    return x;
936
3.52M
}
937
938
/* r_long64 deals with the TYPE_INT64 code. */
939
static PyObject *
940
r_long64(RFILE *p)
941
0
{
942
0
    const unsigned char *buffer = (const unsigned char *) r_string(8, p);
943
0
    if (buffer == NULL) {
944
0
        return NULL;
945
0
    }
946
0
    return _PyLong_FromByteArray(buffer, 8,
947
0
                                 1 /* little endian */,
948
0
                                 1 /* signed */);
949
0
}
950
951
#define _w_digits(bitsize)                                              \
952
static int                                                              \
953
_w_digits##bitsize(uint ## bitsize ## _t *digits, Py_ssize_t size,      \
954
                   Py_ssize_t marshal_ratio,                            \
955
443
                   int shorts_in_top_digit, RFILE *p)                   \
956
443
{                                                                       \
957
443
    uint ## bitsize ## _t d;                                            \
958
443
                                                                        \
959
443
    assert(size >= 1);                                                  \
960
1.03k
    for (Py_ssize_t i = 0; i < size - 1; i++) {                         \
961
590
        d = 0;                                                          \
962
1.77k
        for (Py_ssize_t j = 0; j < marshal_ratio; j++) {                \
963
1.18k
            int md = r_short(p);                                        \
964
1.18k
            if (md < 0 || md > PyLong_MARSHAL_BASE) {                   \
965
0
                goto bad_digit;                                         \
966
0
            }                                                           \
967
1.18k
            d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT;   \
968
1.18k
        }                                                               \
969
590
        digits[i] = d;                                                  \
970
590
    }                                                                   \
971
443
                                                                        \
972
443
    d = 0;                                                              \
973
962
    for (Py_ssize_t j = 0; j < shorts_in_top_digit; j++) {              \
974
519
        int md = r_short(p);                                            \
975
519
        if (md < 0 || md > PyLong_MARSHAL_BASE) {                       \
976
0
            goto bad_digit;                                             \
977
0
        }                                                               \
978
519
        /* topmost marshal digit should be nonzero */                   \
979
519
        if (md == 0 && j == shorts_in_top_digit - 1) {                  \
980
0
            PyErr_SetString(PyExc_ValueError,                           \
981
0
                "bad marshal data (unnormalized long data)");           \
982
0
            return -1;                                                  \
983
0
        }                                                               \
984
519
        d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT;       \
985
519
    }                                                                   \
986
443
    assert(!PyErr_Occurred());                                          \
987
443
    /* top digit should be nonzero, else the resulting PyLong won't be  \
988
443
       normalized */                                                    \
989
443
    digits[size - 1] = d;                                               \
990
443
    return 0;                                                           \
991
443
                                                                        \
992
0
bad_digit:                                                              \
993
0
    if (!PyErr_Occurred()) {                                            \
994
0
        PyErr_SetString(PyExc_ValueError,                               \
995
0
            "bad marshal data (digit out of range in long)");           \
996
0
    }                                                                   \
997
0
    return -1;                                                          \
998
443
}
999
443
_w_digits(32)
1000
0
_w_digits(16)
1001
#undef _w_digits
1002
1003
static PyObject *
1004
r_PyLong(RFILE *p)
1005
443
{
1006
443
    long n = r_long(p);
1007
443
    if (n == -1 && PyErr_Occurred()) {
1008
0
        return NULL;
1009
0
    }
1010
443
    if (n < -SIZE32_MAX || n > SIZE32_MAX) {
1011
0
        PyErr_SetString(PyExc_ValueError,
1012
0
                       "bad marshal data (long size out of range)");
1013
0
        return NULL;
1014
0
    }
1015
1016
443
    const PyLongLayout *layout = PyLong_GetNativeLayout();
1017
443
    Py_ssize_t marshal_ratio = layout->bits_per_digit/PyLong_MARSHAL_SHIFT;
1018
1019
    /* must be a multiple of PyLong_MARSHAL_SHIFT */
1020
443
    assert(layout->bits_per_digit % PyLong_MARSHAL_SHIFT == 0);
1021
443
    assert(layout->bits_per_digit >= PyLong_MARSHAL_SHIFT);
1022
1023
    /* other assumptions on PyLongObject internals */
1024
443
    assert(layout->bits_per_digit <= 32);
1025
443
    assert(layout->digits_order == -1);
1026
443
    assert(layout->digit_endianness == (PY_LITTLE_ENDIAN ? -1 : 1));
1027
443
    assert(layout->digit_size == 2 || layout->digit_size == 4);
1028
1029
443
    Py_ssize_t size = 1 + (Py_ABS(n) - 1) / marshal_ratio;
1030
1031
443
    assert(size >= 1);
1032
1033
443
    int shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % marshal_ratio;
1034
443
    void *digits;
1035
443
    PyLongWriter *writer = PyLongWriter_Create(n < 0, size, &digits);
1036
1037
443
    if (writer == NULL) {
1038
0
        return NULL;
1039
0
    }
1040
1041
443
    int ret;
1042
1043
443
    if (layout->digit_size == 4) {
1044
443
        ret = _w_digits32(digits, size, marshal_ratio, shorts_in_top_digit, p);
1045
443
    }
1046
0
    else {
1047
0
        ret = _w_digits16(digits, size, marshal_ratio, shorts_in_top_digit, p);
1048
0
    }
1049
443
    if (ret < 0) {
1050
0
        PyLongWriter_Discard(writer);
1051
0
        return NULL;
1052
0
    }
1053
443
    return PyLongWriter_Finish(writer);
1054
443
}
1055
1056
static double
1057
r_float_bin(RFILE *p)
1058
701
{
1059
701
    const char *buf = r_string(8, p);
1060
701
    if (buf == NULL)
1061
0
        return -1;
1062
701
    return PyFloat_Unpack8(buf, 1);
1063
701
}
1064
1065
/* Issue #33720: Disable inlining for reducing the C stack consumption
1066
   on PGO builds. */
1067
Py_NO_INLINE static double
1068
r_float_str(RFILE *p)
1069
0
{
1070
0
    int n;
1071
0
    char buf[256];
1072
0
    const char *ptr;
1073
0
    n = r_byte(p);
1074
0
    if (n == EOF) {
1075
0
        return -1;
1076
0
    }
1077
0
    ptr = r_string(n, p);
1078
0
    if (ptr == NULL) {
1079
0
        return -1;
1080
0
    }
1081
0
    memcpy(buf, ptr, n);
1082
0
    buf[n] = '\0';
1083
0
    return PyOS_string_to_double(buf, NULL, NULL);
1084
0
}
1085
1086
/* allocate the reflist index for a new object. Return -1 on failure */
1087
static Py_ssize_t
1088
r_ref_reserve(int flag, RFILE *p)
1089
176k
{
1090
176k
    if (flag) { /* currently only FLAG_REF is defined */
1091
6.51k
        Py_ssize_t idx = PyList_GET_SIZE(p->refs);
1092
6.51k
        if (idx >= 0x7ffffffe) {
1093
0
            PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
1094
0
            return -1;
1095
0
        }
1096
6.51k
        if (PyList_Append(p->refs, Py_None) < 0)
1097
0
            return -1;
1098
6.51k
        return idx;
1099
6.51k
    } else
1100
170k
        return 0;
1101
176k
}
1102
1103
/* insert the new object 'o' to the reflist at previously
1104
 * allocated index 'idx'.
1105
 * 'o' can be NULL, in which case nothing is done.
1106
 * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
1107
 * if 'o' was non-NULL, and the function fails, 'o' is released and
1108
 * NULL returned. This simplifies error checking at the call site since
1109
 * a single test for NULL for the function result is enough.
1110
 */
1111
static PyObject *
1112
r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
1113
176k
{
1114
176k
    if (o != NULL && flag) { /* currently only FLAG_REF is defined */
1115
6.51k
        PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
1116
6.51k
        PyList_SET_ITEM(p->refs, idx, Py_NewRef(o));
1117
6.51k
        Py_DECREF(tmp);
1118
6.51k
    }
1119
176k
    return o;
1120
176k
}
1121
1122
/* combination of both above, used when an object can be
1123
 * created whenever it is seen in the file, as opposed to
1124
 * after having loaded its sub-objects.
1125
 */
1126
static PyObject *
1127
r_ref(PyObject *o, int flag, RFILE *p)
1128
1.39M
{
1129
1.39M
    assert(flag & FLAG_REF);
1130
1.39M
    if (o == NULL)
1131
0
        return NULL;
1132
1.39M
    if (PyList_Append(p->refs, o) < 0) {
1133
0
        Py_DECREF(o); /* release the new object */
1134
0
        return NULL;
1135
0
    }
1136
1.39M
    return o;
1137
1.39M
}
1138
1139
static PyObject *
1140
r_object(RFILE *p)
1141
4.52M
{
1142
    /* NULL is a valid return value, it does not necessarily means that
1143
       an exception is set. */
1144
4.52M
    PyObject *v, *v2;
1145
4.52M
    Py_ssize_t idx = 0;
1146
4.52M
    long i, n;
1147
4.52M
    int type, code = r_byte(p);
1148
4.52M
    int flag, is_interned = 0;
1149
4.52M
    PyObject *retval = NULL;
1150
1151
4.52M
    if (code == EOF) {
1152
0
        if (PyErr_ExceptionMatches(PyExc_EOFError)) {
1153
0
            PyErr_SetString(PyExc_EOFError,
1154
0
                            "EOF read where object expected");
1155
0
        }
1156
0
        return NULL;
1157
0
    }
1158
1159
4.52M
    p->depth++;
1160
1161
4.52M
    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
1162
0
        p->depth--;
1163
0
        PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
1164
0
        return NULL;
1165
0
    }
1166
1167
4.52M
    flag = code & FLAG_REF;
1168
4.52M
    type = code & ~FLAG_REF;
1169
1170
4.52M
#define R_REF(O) do{\
1171
2.31M
    if (flag) \
1172
2.31M
        O = r_ref(O, flag, p);\
1173
2.31M
} while (0)
1174
1175
4.52M
    switch (type) {
1176
1177
0
    case TYPE_NULL:
1178
0
        break;
1179
1180
96.5k
    case TYPE_NONE:
1181
96.5k
        retval = Py_None;
1182
96.5k
        break;
1183
1184
0
    case TYPE_STOPITER:
1185
0
        retval = Py_NewRef(PyExc_StopIteration);
1186
0
        break;
1187
1188
407
    case TYPE_ELLIPSIS:
1189
407
        retval = Py_Ellipsis;
1190
407
        break;
1191
1192
12.6k
    case TYPE_FALSE:
1193
12.6k
        retval = Py_False;
1194
12.6k
        break;
1195
1196
11.7k
    case TYPE_TRUE:
1197
11.7k
        retval = Py_True;
1198
11.7k
        break;
1199
1200
25.5k
    case TYPE_INT:
1201
25.5k
        n = r_long(p);
1202
25.5k
        if (n == -1 && PyErr_Occurred()) {
1203
0
            break;
1204
0
        }
1205
25.5k
        retval = PyLong_FromLong(n);
1206
25.5k
        R_REF(retval);
1207
25.5k
        break;
1208
1209
0
    case TYPE_INT64:
1210
0
        retval = r_long64(p);
1211
0
        R_REF(retval);
1212
0
        break;
1213
1214
443
    case TYPE_LONG:
1215
443
        retval = r_PyLong(p);
1216
443
        R_REF(retval);
1217
443
        break;
1218
1219
0
    case TYPE_FLOAT:
1220
0
        {
1221
0
            double x = r_float_str(p);
1222
0
            if (x == -1.0 && PyErr_Occurred())
1223
0
                break;
1224
0
            retval = PyFloat_FromDouble(x);
1225
0
            R_REF(retval);
1226
0
            break;
1227
0
        }
1228
1229
695
    case TYPE_BINARY_FLOAT:
1230
695
        {
1231
695
            double x = r_float_bin(p);
1232
695
            if (x == -1.0 && PyErr_Occurred())
1233
0
                break;
1234
695
            retval = PyFloat_FromDouble(x);
1235
695
            R_REF(retval);
1236
695
            break;
1237
695
        }
1238
1239
0
    case TYPE_COMPLEX:
1240
0
        {
1241
0
            Py_complex c;
1242
0
            c.real = r_float_str(p);
1243
0
            if (c.real == -1.0 && PyErr_Occurred())
1244
0
                break;
1245
0
            c.imag = r_float_str(p);
1246
0
            if (c.imag == -1.0 && PyErr_Occurred())
1247
0
                break;
1248
0
            retval = PyComplex_FromCComplex(c);
1249
0
            R_REF(retval);
1250
0
            break;
1251
0
        }
1252
1253
3
    case TYPE_BINARY_COMPLEX:
1254
3
        {
1255
3
            Py_complex c;
1256
3
            c.real = r_float_bin(p);
1257
3
            if (c.real == -1.0 && PyErr_Occurred())
1258
0
                break;
1259
3
            c.imag = r_float_bin(p);
1260
3
            if (c.imag == -1.0 && PyErr_Occurred())
1261
0
                break;
1262
3
            retval = PyComplex_FromCComplex(c);
1263
3
            R_REF(retval);
1264
3
            break;
1265
3
        }
1266
1267
522k
    case TYPE_STRING:
1268
522k
        {
1269
522k
            const char *ptr;
1270
522k
            n = r_long(p);
1271
522k
            if (n < 0 || n > SIZE32_MAX) {
1272
0
                if (!PyErr_Occurred()) {
1273
0
                    PyErr_SetString(PyExc_ValueError,
1274
0
                        "bad marshal data (bytes object size out of range)");
1275
0
                }
1276
0
                break;
1277
0
            }
1278
522k
            v = PyBytes_FromStringAndSize((char *)NULL, n);
1279
522k
            if (v == NULL)
1280
0
                break;
1281
522k
            ptr = r_string(n, p);
1282
522k
            if (ptr == NULL) {
1283
0
                Py_DECREF(v);
1284
0
                break;
1285
0
            }
1286
522k
            memcpy(PyBytes_AS_STRING(v), ptr, n);
1287
522k
            retval = v;
1288
522k
            R_REF(retval);
1289
522k
            break;
1290
522k
        }
1291
1292
0
    case TYPE_ASCII_INTERNED:
1293
0
        is_interned = 1;
1294
0
        _Py_FALLTHROUGH;
1295
26.2k
    case TYPE_ASCII:
1296
26.2k
        n = r_long(p);
1297
26.2k
        if (n < 0 || n > SIZE32_MAX) {
1298
0
            if (!PyErr_Occurred()) {
1299
0
                PyErr_SetString(PyExc_ValueError,
1300
0
                    "bad marshal data (string size out of range)");
1301
0
            }
1302
0
            break;
1303
0
        }
1304
26.2k
        goto _read_ascii;
1305
1306
1.11M
    case TYPE_SHORT_ASCII_INTERNED:
1307
1.11M
        is_interned = 1;
1308
1.11M
        _Py_FALLTHROUGH;
1309
1.27M
    case TYPE_SHORT_ASCII:
1310
1.27M
        n = r_byte(p);
1311
1.27M
        if (n == EOF) {
1312
0
            break;
1313
0
        }
1314
1.29M
    _read_ascii:
1315
1.29M
        {
1316
1.29M
            const char *ptr;
1317
1.29M
            ptr = r_string(n, p);
1318
1.29M
            if (ptr == NULL)
1319
0
                break;
1320
1.29M
            v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1321
1.29M
            if (v == NULL)
1322
0
                break;
1323
1.29M
            if (is_interned) {
1324
                // marshal is meant to serialize .pyc files with code
1325
                // objects, and code-related strings are currently immortal.
1326
1.11M
                PyInterpreterState *interp = _PyInterpreterState_GET();
1327
1.11M
                _PyUnicode_InternImmortal(interp, &v);
1328
1.11M
            }
1329
1.29M
            retval = v;
1330
1.29M
            R_REF(retval);
1331
1.29M
            break;
1332
1.29M
        }
1333
1334
202
    case TYPE_INTERNED:
1335
202
        is_interned = 1;
1336
202
        _Py_FALLTHROUGH;
1337
3.54k
    case TYPE_UNICODE:
1338
3.54k
        {
1339
3.54k
        const char *buffer;
1340
1341
3.54k
        n = r_long(p);
1342
3.54k
        if (n < 0 || n > SIZE32_MAX) {
1343
0
            if (!PyErr_Occurred()) {
1344
0
                PyErr_SetString(PyExc_ValueError,
1345
0
                    "bad marshal data (string size out of range)");
1346
0
            }
1347
0
            break;
1348
0
        }
1349
3.54k
        if (n != 0) {
1350
3.54k
            buffer = r_string(n, p);
1351
3.54k
            if (buffer == NULL)
1352
0
                break;
1353
3.54k
            v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1354
3.54k
        }
1355
0
        else {
1356
0
            v = Py_GetConstant(Py_CONSTANT_EMPTY_STR);
1357
0
        }
1358
3.54k
        if (v == NULL)
1359
0
            break;
1360
3.54k
        if (is_interned) {
1361
            // marshal is meant to serialize .pyc files with code
1362
            // objects, and code-related strings are currently immortal.
1363
202
            PyInterpreterState *interp = _PyInterpreterState_GET();
1364
202
            _PyUnicode_InternImmortal(interp, &v);
1365
202
        }
1366
3.54k
        retval = v;
1367
3.54k
        R_REF(retval);
1368
3.54k
        break;
1369
3.54k
        }
1370
1371
465k
    case TYPE_SMALL_TUPLE:
1372
465k
        n = r_byte(p);
1373
465k
        if (n == EOF) {
1374
0
            break;
1375
0
        }
1376
465k
        goto _read_tuple;
1377
465k
    case TYPE_TUPLE:
1378
70
        n = r_long(p);
1379
70
        if (n < 0 || n > SIZE32_MAX) {
1380
0
            if (!PyErr_Occurred()) {
1381
0
                PyErr_SetString(PyExc_ValueError,
1382
0
                    "bad marshal data (tuple size out of range)");
1383
0
            }
1384
0
            break;
1385
0
        }
1386
465k
    _read_tuple:
1387
465k
        v = PyTuple_New(n);
1388
465k
        R_REF(v);
1389
465k
        if (v == NULL)
1390
0
            break;
1391
1392
3.23M
        for (i = 0; i < n; i++) {
1393
2.77M
            v2 = r_object(p);
1394
2.77M
            if ( v2 == NULL ) {
1395
0
                if (!PyErr_Occurred())
1396
0
                    PyErr_SetString(PyExc_TypeError,
1397
0
                        "NULL object in marshal data for tuple");
1398
0
                Py_SETREF(v, NULL);
1399
0
                break;
1400
0
            }
1401
2.77M
            PyTuple_SET_ITEM(v, i, v2);
1402
2.77M
        }
1403
465k
        retval = v;
1404
465k
        break;
1405
1406
0
    case TYPE_LIST:
1407
0
        n = r_long(p);
1408
0
        if (n < 0 || n > SIZE32_MAX) {
1409
0
            if (!PyErr_Occurred()) {
1410
0
                PyErr_SetString(PyExc_ValueError,
1411
0
                    "bad marshal data (list size out of range)");
1412
0
            }
1413
0
            break;
1414
0
        }
1415
0
        v = PyList_New(n);
1416
0
        R_REF(v);
1417
0
        if (v == NULL)
1418
0
            break;
1419
0
        for (i = 0; i < n; i++) {
1420
0
            v2 = r_object(p);
1421
0
            if ( v2 == NULL ) {
1422
0
                if (!PyErr_Occurred())
1423
0
                    PyErr_SetString(PyExc_TypeError,
1424
0
                        "NULL object in marshal data for list");
1425
0
                Py_SETREF(v, NULL);
1426
0
                break;
1427
0
            }
1428
0
            PyList_SET_ITEM(v, i, v2);
1429
0
        }
1430
0
        retval = v;
1431
0
        break;
1432
1433
0
    case TYPE_DICT:
1434
0
    case TYPE_FROZENDICT:
1435
0
        v = PyDict_New();
1436
0
        R_REF(v);
1437
0
        if (v == NULL)
1438
0
            break;
1439
0
        for (;;) {
1440
0
            PyObject *key, *val;
1441
0
            key = r_object(p);
1442
0
            if (key == NULL)
1443
0
                break;
1444
0
            val = r_object(p);
1445
0
            if (val == NULL) {
1446
0
                Py_DECREF(key);
1447
0
                break;
1448
0
            }
1449
0
            if (PyDict_SetItem(v, key, val) < 0) {
1450
0
                Py_DECREF(key);
1451
0
                Py_DECREF(val);
1452
0
                break;
1453
0
            }
1454
0
            Py_DECREF(key);
1455
0
            Py_DECREF(val);
1456
0
        }
1457
0
        if (PyErr_Occurred()) {
1458
0
            Py_CLEAR(v);
1459
0
        }
1460
0
        if (type == TYPE_FROZENDICT && v != NULL) {
1461
0
            PyObject *frozendict = PyFrozenDict_New(v);
1462
0
            if (frozendict != NULL) {
1463
0
                Py_SETREF(v, frozendict);
1464
0
            }
1465
0
            else {
1466
0
                Py_CLEAR(v);
1467
0
            }
1468
0
        }
1469
0
        retval = v;
1470
0
        break;
1471
1472
0
    case TYPE_SET:
1473
533
    case TYPE_FROZENSET:
1474
533
        n = r_long(p);
1475
533
        if (n < 0 || n > SIZE32_MAX) {
1476
0
            if (!PyErr_Occurred()) {
1477
0
                PyErr_SetString(PyExc_ValueError,
1478
0
                    "bad marshal data (set size out of range)");
1479
0
            }
1480
0
            break;
1481
0
        }
1482
1483
533
        if (n == 0 && type == TYPE_FROZENSET) {
1484
            /* call frozenset() to get the empty frozenset singleton */
1485
0
            v = _PyObject_CallNoArgs((PyObject*)&PyFrozenSet_Type);
1486
0
            if (v == NULL)
1487
0
                break;
1488
0
            R_REF(v);
1489
0
            retval = v;
1490
0
        }
1491
533
        else {
1492
533
            v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1493
533
            if (type == TYPE_SET) {
1494
0
                R_REF(v);
1495
533
            } else {
1496
                /* must use delayed registration of frozensets because they must
1497
                 * be init with a refcount of 1
1498
                 */
1499
533
                idx = r_ref_reserve(flag, p);
1500
533
                if (idx < 0)
1501
0
                    Py_CLEAR(v); /* signal error */
1502
533
            }
1503
533
            if (v == NULL)
1504
0
                break;
1505
1506
2.75k
            for (i = 0; i < n; i++) {
1507
2.21k
                v2 = r_object(p);
1508
2.21k
                if ( v2 == NULL ) {
1509
0
                    if (!PyErr_Occurred())
1510
0
                        PyErr_SetString(PyExc_TypeError,
1511
0
                            "NULL object in marshal data for set");
1512
0
                    Py_SETREF(v, NULL);
1513
0
                    break;
1514
0
                }
1515
2.21k
                if (PySet_Add(v, v2) == -1) {
1516
0
                    Py_DECREF(v);
1517
0
                    Py_DECREF(v2);
1518
0
                    v = NULL;
1519
0
                    break;
1520
0
                }
1521
2.21k
                Py_DECREF(v2);
1522
2.21k
            }
1523
533
            if (type != TYPE_SET)
1524
533
                v = r_ref_insert(v, idx, flag, p);
1525
533
            retval = v;
1526
533
        }
1527
533
        break;
1528
1529
173k
    case TYPE_CODE:
1530
173k
        {
1531
173k
            int argcount;
1532
173k
            int posonlyargcount;
1533
173k
            int kwonlyargcount;
1534
173k
            int stacksize;
1535
173k
            int flags;
1536
173k
            PyObject *code = NULL;
1537
173k
            PyObject *consts = NULL;
1538
173k
            PyObject *names = NULL;
1539
173k
            PyObject *localsplusnames = NULL;
1540
173k
            PyObject *localspluskinds = NULL;
1541
173k
            PyObject *filename = NULL;
1542
173k
            PyObject *name = NULL;
1543
173k
            PyObject *qualname = NULL;
1544
173k
            int firstlineno;
1545
173k
            PyObject* linetable = NULL;
1546
173k
            PyObject *exceptiontable = NULL;
1547
1548
173k
            if (!p->allow_code) {
1549
0
                PyErr_SetString(PyExc_ValueError,
1550
0
                                "unmarshalling code objects is disallowed");
1551
0
                break;
1552
0
            }
1553
173k
            idx = r_ref_reserve(flag, p);
1554
173k
            if (idx < 0)
1555
0
                break;
1556
1557
173k
            v = NULL;
1558
1559
            /* XXX ignore long->int overflows for now */
1560
173k
            argcount = (int)r_long(p);
1561
173k
            if (argcount == -1 && PyErr_Occurred())
1562
0
                goto code_error;
1563
173k
            posonlyargcount = (int)r_long(p);
1564
173k
            if (posonlyargcount == -1 && PyErr_Occurred()) {
1565
0
                goto code_error;
1566
0
            }
1567
173k
            kwonlyargcount = (int)r_long(p);
1568
173k
            if (kwonlyargcount == -1 && PyErr_Occurred())
1569
0
                goto code_error;
1570
173k
            stacksize = (int)r_long(p);
1571
173k
            if (stacksize == -1 && PyErr_Occurred())
1572
0
                goto code_error;
1573
173k
            flags = (int)r_long(p);
1574
173k
            if (flags == -1 && PyErr_Occurred())
1575
0
                goto code_error;
1576
173k
            code = r_object(p);
1577
173k
            if (code == NULL)
1578
0
                goto code_error;
1579
173k
            consts = r_object(p);
1580
173k
            if (consts == NULL)
1581
0
                goto code_error;
1582
173k
            names = r_object(p);
1583
173k
            if (names == NULL)
1584
0
                goto code_error;
1585
173k
            localsplusnames = r_object(p);
1586
173k
            if (localsplusnames == NULL)
1587
0
                goto code_error;
1588
173k
            localspluskinds = r_object(p);
1589
173k
            if (localspluskinds == NULL)
1590
0
                goto code_error;
1591
173k
            filename = r_object(p);
1592
173k
            if (filename == NULL)
1593
0
                goto code_error;
1594
173k
            name = r_object(p);
1595
173k
            if (name == NULL)
1596
0
                goto code_error;
1597
173k
            qualname = r_object(p);
1598
173k
            if (qualname == NULL)
1599
0
                goto code_error;
1600
173k
            firstlineno = (int)r_long(p);
1601
173k
            if (firstlineno == -1 && PyErr_Occurred())
1602
0
                break;
1603
173k
            linetable = r_object(p);
1604
173k
            if (linetable == NULL)
1605
0
                goto code_error;
1606
173k
            exceptiontable = r_object(p);
1607
173k
            if (exceptiontable == NULL)
1608
0
                goto code_error;
1609
1610
173k
            struct _PyCodeConstructor con = {
1611
173k
                .filename = filename,
1612
173k
                .name = name,
1613
173k
                .qualname = qualname,
1614
173k
                .flags = flags,
1615
1616
173k
                .code = code,
1617
173k
                .firstlineno = firstlineno,
1618
173k
                .linetable = linetable,
1619
1620
173k
                .consts = consts,
1621
173k
                .names = names,
1622
1623
173k
                .localsplusnames = localsplusnames,
1624
173k
                .localspluskinds = localspluskinds,
1625
1626
173k
                .argcount = argcount,
1627
173k
                .posonlyargcount = posonlyargcount,
1628
173k
                .kwonlyargcount = kwonlyargcount,
1629
1630
173k
                .stacksize = stacksize,
1631
1632
173k
                .exceptiontable = exceptiontable,
1633
173k
            };
1634
1635
173k
            if (_PyCode_Validate(&con) < 0) {
1636
0
                goto code_error;
1637
0
            }
1638
1639
173k
            v = (PyObject *)_PyCode_New(&con);
1640
173k
            if (v == NULL) {
1641
0
                goto code_error;
1642
0
            }
1643
1644
173k
            v = r_ref_insert(v, idx, flag, p);
1645
1646
173k
          code_error:
1647
173k
            if (v == NULL && !PyErr_Occurred()) {
1648
0
                PyErr_SetString(PyExc_TypeError,
1649
0
                    "NULL object in marshal data for code object");
1650
0
            }
1651
173k
            Py_XDECREF(code);
1652
173k
            Py_XDECREF(consts);
1653
173k
            Py_XDECREF(names);
1654
173k
            Py_XDECREF(localsplusnames);
1655
173k
            Py_XDECREF(localspluskinds);
1656
173k
            Py_XDECREF(filename);
1657
173k
            Py_XDECREF(name);
1658
173k
            Py_XDECREF(qualname);
1659
173k
            Py_XDECREF(linetable);
1660
173k
            Py_XDECREF(exceptiontable);
1661
173k
        }
1662
0
        retval = v;
1663
173k
        break;
1664
1665
1.90M
    case TYPE_REF:
1666
1.90M
        n = r_long(p);
1667
1.90M
        if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1668
0
            if (!PyErr_Occurred()) {
1669
0
                PyErr_SetString(PyExc_ValueError,
1670
0
                    "bad marshal data (invalid reference)");
1671
0
            }
1672
0
            break;
1673
0
        }
1674
1.90M
        v = PyList_GET_ITEM(p->refs, n);
1675
1.90M
        if (v == Py_None) {
1676
0
            PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1677
0
            break;
1678
0
        }
1679
1.90M
        retval = Py_NewRef(v);
1680
1.90M
        break;
1681
1682
3.41k
    case TYPE_SLICE:
1683
3.41k
    {
1684
3.41k
        Py_ssize_t idx = r_ref_reserve(flag, p);
1685
3.41k
        if (idx < 0) {
1686
0
            break;
1687
0
        }
1688
3.41k
        PyObject *stop = NULL;
1689
3.41k
        PyObject *step = NULL;
1690
3.41k
        PyObject *start = r_object(p);
1691
3.41k
        if (start == NULL) {
1692
0
            goto cleanup;
1693
0
        }
1694
3.41k
        stop = r_object(p);
1695
3.41k
        if (stop == NULL) {
1696
0
            goto cleanup;
1697
0
        }
1698
3.41k
        step = r_object(p);
1699
3.41k
        if (step == NULL) {
1700
0
            goto cleanup;
1701
0
        }
1702
3.41k
        retval = PySlice_New(start, stop, step);
1703
3.41k
        r_ref_insert(retval, idx, flag, p);
1704
3.41k
    cleanup:
1705
3.41k
        Py_XDECREF(start);
1706
3.41k
        Py_XDECREF(stop);
1707
3.41k
        Py_XDECREF(step);
1708
3.41k
        break;
1709
3.41k
    }
1710
1711
0
    default:
1712
        /* Bogus data got written, which isn't ideal.
1713
           This will let you keep working and recover. */
1714
0
        PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1715
0
        break;
1716
1717
4.52M
    }
1718
4.52M
    p->depth--;
1719
4.52M
    return retval;
1720
4.52M
}
1721
1722
static PyObject *
1723
read_object(RFILE *p)
1724
6.62k
{
1725
6.62k
    PyObject *v;
1726
6.62k
    if (PyErr_Occurred()) {
1727
0
        fprintf(stderr, "XXX readobject called with exception set\n");
1728
0
        return NULL;
1729
0
    }
1730
6.62k
    if (p->ptr && p->end) {
1731
6.62k
        if (PySys_Audit("marshal.loads", "y#", p->ptr, (Py_ssize_t)(p->end - p->ptr)) < 0) {
1732
0
            return NULL;
1733
0
        }
1734
6.62k
    } else if (p->fp || p->readable) {
1735
0
        if (PySys_Audit("marshal.load", NULL) < 0) {
1736
0
            return NULL;
1737
0
        }
1738
0
    }
1739
6.62k
    v = r_object(p);
1740
6.62k
    if (v == NULL && !PyErr_Occurred())
1741
0
        PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1742
6.62k
    return v;
1743
6.62k
}
1744
1745
int
1746
PyMarshal_ReadShortFromFile(FILE *fp)
1747
0
{
1748
0
    RFILE rf;
1749
0
    int res;
1750
0
    assert(fp);
1751
0
    rf.readable = NULL;
1752
0
    rf.fp = fp;
1753
0
    rf.end = rf.ptr = NULL;
1754
0
    rf.buf = NULL;
1755
0
    res = r_short(&rf);
1756
0
    if (rf.buf != NULL)
1757
0
        PyMem_Free(rf.buf);
1758
0
    return res;
1759
0
}
1760
1761
long
1762
PyMarshal_ReadLongFromFile(FILE *fp)
1763
0
{
1764
0
    RFILE rf;
1765
0
    long res;
1766
0
    rf.fp = fp;
1767
0
    rf.readable = NULL;
1768
0
    rf.ptr = rf.end = NULL;
1769
0
    rf.buf = NULL;
1770
0
    res = r_long(&rf);
1771
0
    if (rf.buf != NULL)
1772
0
        PyMem_Free(rf.buf);
1773
0
    return res;
1774
0
}
1775
1776
/* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1777
static off_t
1778
getfilesize(FILE *fp)
1779
0
{
1780
0
    struct _Py_stat_struct st;
1781
0
    if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1782
0
        return -1;
1783
#if SIZEOF_OFF_T == 4
1784
    else if (st.st_size >= INT_MAX)
1785
        return (off_t)INT_MAX;
1786
#endif
1787
0
    else
1788
0
        return (off_t)st.st_size;
1789
0
}
1790
1791
/* If we can get the size of the file up-front, and it's reasonably small,
1792
 * read it in one gulp and delegate to ...FromString() instead.  Much quicker
1793
 * than reading a byte at a time from file; speeds .pyc imports.
1794
 * CAUTION:  since this may read the entire remainder of the file, don't
1795
 * call it unless you know you're done with the file.
1796
 */
1797
PyObject *
1798
PyMarshal_ReadLastObjectFromFile(FILE *fp)
1799
0
{
1800
/* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1801
0
#define REASONABLE_FILE_LIMIT (1L << 18)
1802
0
    off_t filesize;
1803
0
    filesize = getfilesize(fp);
1804
0
    if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1805
0
        char* pBuf = (char *)PyMem_Malloc(filesize);
1806
0
        if (pBuf != NULL) {
1807
0
            size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1808
0
            PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1809
0
            PyMem_Free(pBuf);
1810
0
            return v;
1811
0
        }
1812
1813
0
    }
1814
    /* We don't have fstat, or we do but the file is larger than
1815
     * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1816
     */
1817
0
    return PyMarshal_ReadObjectFromFile(fp);
1818
1819
0
#undef REASONABLE_FILE_LIMIT
1820
0
}
1821
1822
PyObject *
1823
PyMarshal_ReadObjectFromFile(FILE *fp)
1824
0
{
1825
0
    RFILE rf;
1826
0
    PyObject *result;
1827
0
    rf.allow_code = 1;
1828
0
    rf.fp = fp;
1829
0
    rf.readable = NULL;
1830
0
    rf.depth = 0;
1831
0
    rf.ptr = rf.end = NULL;
1832
0
    rf.buf = NULL;
1833
0
    rf.refs = PyList_New(0);
1834
0
    if (rf.refs == NULL)
1835
0
        return NULL;
1836
0
    result = read_object(&rf);
1837
0
    Py_DECREF(rf.refs);
1838
0
    if (rf.buf != NULL)
1839
0
        PyMem_Free(rf.buf);
1840
0
    return result;
1841
0
}
1842
1843
PyObject *
1844
PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1845
517
{
1846
517
    RFILE rf;
1847
517
    PyObject *result;
1848
517
    rf.allow_code = 1;
1849
517
    rf.fp = NULL;
1850
517
    rf.readable = NULL;
1851
517
    rf.ptr = str;
1852
517
    rf.end = str + len;
1853
517
    rf.buf = NULL;
1854
517
    rf.depth = 0;
1855
517
    rf.refs = PyList_New(0);
1856
517
    if (rf.refs == NULL)
1857
0
        return NULL;
1858
517
    result = read_object(&rf);
1859
517
    Py_DECREF(rf.refs);
1860
517
    if (rf.buf != NULL)
1861
0
        PyMem_Free(rf.buf);
1862
517
    return result;
1863
517
}
1864
1865
static PyObject *
1866
_PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code)
1867
228
{
1868
228
    WFILE wf;
1869
1870
228
    if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
1871
0
        return NULL;
1872
0
    }
1873
228
    memset(&wf, 0, sizeof(wf));
1874
228
    wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1875
228
    if (wf.str == NULL)
1876
0
        return NULL;
1877
228
    wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str);
1878
228
    wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str);
1879
228
    wf.error = WFERR_OK;
1880
228
    wf.version = version;
1881
228
    wf.allow_code = allow_code;
1882
228
    if (w_init_refs(&wf, version)) {
1883
0
        Py_DECREF(wf.str);
1884
0
        return NULL;
1885
0
    }
1886
228
    w_object(x, &wf);
1887
228
    w_clear_refs(&wf);
1888
228
    if (wf.str != NULL) {
1889
228
        const char *base = PyBytes_AS_STRING(wf.str);
1890
228
        if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1891
0
            return NULL;
1892
228
    }
1893
228
    if (wf.error != WFERR_OK) {
1894
0
        Py_XDECREF(wf.str);
1895
0
        switch (wf.error) {
1896
0
        case WFERR_NOMEMORY:
1897
0
            PyErr_NoMemory();
1898
0
            break;
1899
0
        case WFERR_NESTEDTOODEEP:
1900
0
            PyErr_SetString(PyExc_ValueError,
1901
0
                            "object too deeply nested to marshal");
1902
0
            break;
1903
0
        case WFERR_CODE_NOT_ALLOWED:
1904
0
            PyErr_SetString(PyExc_ValueError,
1905
0
                            "marshalling code objects is disallowed");
1906
0
            break;
1907
0
        default:
1908
0
        case WFERR_UNMARSHALLABLE:
1909
0
            PyErr_SetString(PyExc_ValueError,
1910
0
                            "unmarshallable object");
1911
0
            break;
1912
0
        }
1913
0
        return NULL;
1914
0
    }
1915
228
    return wf.str;
1916
228
}
1917
1918
PyObject *
1919
PyMarshal_WriteObjectToString(PyObject *x, int version)
1920
0
{
1921
0
    return _PyMarshal_WriteObjectToString(x, version, 1);
1922
0
}
1923
1924
/* And an interface for Python programs... */
1925
/*[clinic input]
1926
marshal.dump
1927
1928
    value: object
1929
        Must be a supported type.
1930
    file: object
1931
        Must be a writeable binary file.
1932
    version: int(c_default="Py_MARSHAL_VERSION") = version
1933
        Indicates the data format that dump should use.
1934
    /
1935
    *
1936
    allow_code: bool = True
1937
        Allow to write code objects.
1938
1939
Write the value on the open file.
1940
1941
If the value has (or contains an object that has) an unsupported type, a
1942
ValueError exception is raised - but garbage data will also be written
1943
to the file. The object will not be properly read back by load().
1944
[clinic start generated code]*/
1945
1946
static PyObject *
1947
marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1948
                  int version, int allow_code)
1949
/*[clinic end generated code: output=429e5fd61c2196b9 input=041f7f6669b0aafb]*/
1950
0
{
1951
    /* XXX Quick hack -- need to do this differently */
1952
0
    PyObject *s;
1953
0
    PyObject *res;
1954
1955
0
    s = _PyMarshal_WriteObjectToString(value, version, allow_code);
1956
0
    if (s == NULL)
1957
0
        return NULL;
1958
0
    res = PyObject_CallMethodOneArg(file, &_Py_ID(write), s);
1959
0
    Py_DECREF(s);
1960
0
    return res;
1961
0
}
1962
1963
/*[clinic input]
1964
marshal.load
1965
1966
    file: object
1967
        Must be readable binary file.
1968
    /
1969
    *
1970
    allow_code: bool = True
1971
        Allow to load code objects.
1972
1973
Read one value from the open file and return it.
1974
1975
If no valid value is read (e.g. because the data has a different Python
1976
version's incompatible marshal format), raise EOFError, ValueError or
1977
TypeError.
1978
1979
Note: If an object containing an unsupported type was marshalled with
1980
dump(), load() will substitute None for the unmarshallable type.
1981
[clinic start generated code]*/
1982
1983
static PyObject *
1984
marshal_load_impl(PyObject *module, PyObject *file, int allow_code)
1985
/*[clinic end generated code: output=0c1aaf3546ae3ed3 input=2dca7b570653b82f]*/
1986
0
{
1987
0
    PyObject *data, *result;
1988
0
    RFILE rf;
1989
1990
    /*
1991
     * Make a call to the read method, but read zero bytes.
1992
     * This is to ensure that the object passed in at least
1993
     * has a read method which returns bytes.
1994
     * This can be removed if we guarantee good error handling
1995
     * for r_string()
1996
     */
1997
0
    data = _PyObject_CallMethod(file, &_Py_ID(read), "i", 0);
1998
0
    if (data == NULL)
1999
0
        return NULL;
2000
0
    if (!PyBytes_Check(data)) {
2001
0
        PyErr_Format(PyExc_TypeError,
2002
0
                     "file.read() returned not bytes but %.100s",
2003
0
                     Py_TYPE(data)->tp_name);
2004
0
        result = NULL;
2005
0
    }
2006
0
    else {
2007
0
        rf.allow_code = allow_code;
2008
0
        rf.depth = 0;
2009
0
        rf.fp = NULL;
2010
0
        rf.readable = file;
2011
0
        rf.ptr = rf.end = NULL;
2012
0
        rf.buf = NULL;
2013
0
        if ((rf.refs = PyList_New(0)) != NULL) {
2014
0
            result = read_object(&rf);
2015
0
            Py_DECREF(rf.refs);
2016
0
            if (rf.buf != NULL)
2017
0
                PyMem_Free(rf.buf);
2018
0
        } else
2019
0
            result = NULL;
2020
0
    }
2021
0
    Py_DECREF(data);
2022
0
    return result;
2023
0
}
2024
2025
/*[clinic input]
2026
@permit_long_summary
2027
@permit_long_docstring_body
2028
marshal.dumps
2029
2030
    value: object
2031
        Must be a supported type.
2032
    version: int(c_default="Py_MARSHAL_VERSION") = version
2033
        Indicates the data format that dumps should use.
2034
    /
2035
    *
2036
    allow_code: bool = True
2037
        Allow to write code objects.
2038
2039
Return the bytes object that would be written to a file by dump(value, file).
2040
2041
Raise a ValueError exception if value has (or contains an object that has) an
2042
unsupported type.
2043
[clinic start generated code]*/
2044
2045
static PyObject *
2046
marshal_dumps_impl(PyObject *module, PyObject *value, int version,
2047
                   int allow_code)
2048
/*[clinic end generated code: output=115f90da518d1d49 input=80cd3f30c1637ade]*/
2049
163
{
2050
163
    return _PyMarshal_WriteObjectToString(value, version, allow_code);
2051
163
}
2052
2053
/*[clinic input]
2054
marshal.loads
2055
2056
    bytes: Py_buffer
2057
    /
2058
    *
2059
    allow_code: bool = True
2060
        Allow to load code objects.
2061
2062
Convert the bytes-like object to a value.
2063
2064
If no valid value is found, raise EOFError, ValueError or TypeError.  Extra
2065
bytes in the input are ignored.
2066
[clinic start generated code]*/
2067
2068
static PyObject *
2069
marshal_loads_impl(PyObject *module, Py_buffer *bytes, int allow_code)
2070
/*[clinic end generated code: output=62c0c538d3edc31f input=14de68965b45aaa7]*/
2071
6.11k
{
2072
6.11k
    RFILE rf;
2073
6.11k
    char *s = bytes->buf;
2074
6.11k
    Py_ssize_t n = bytes->len;
2075
6.11k
    PyObject* result;
2076
6.11k
    rf.allow_code = allow_code;
2077
6.11k
    rf.fp = NULL;
2078
6.11k
    rf.readable = NULL;
2079
6.11k
    rf.ptr = s;
2080
6.11k
    rf.end = s + n;
2081
6.11k
    rf.depth = 0;
2082
6.11k
    if ((rf.refs = PyList_New(0)) == NULL)
2083
0
        return NULL;
2084
6.11k
    result = read_object(&rf);
2085
6.11k
    Py_DECREF(rf.refs);
2086
6.11k
    return result;
2087
6.11k
}
2088
2089
static PyMethodDef marshal_methods[] = {
2090
    MARSHAL_DUMP_METHODDEF
2091
    MARSHAL_LOAD_METHODDEF
2092
    MARSHAL_DUMPS_METHODDEF
2093
    MARSHAL_LOADS_METHODDEF
2094
    {NULL,              NULL}           /* sentinel */
2095
};
2096
2097
2098
PyDoc_STRVAR(module_doc,
2099
"This module contains functions that can read and write Python values in\n\
2100
a binary format. The format is specific to Python, but independent of\n\
2101
machine architecture issues.\n\
2102
\n\
2103
Not all Python object types are supported; in general, only objects\n\
2104
whose value is independent from a particular invocation of Python can be\n\
2105
written and read by this module. The following types are supported:\n\
2106
None, integers, floating-point numbers, strings, bytes, bytearrays,\n\
2107
tuples, lists, sets, dictionaries, and code objects, where it\n\
2108
should be understood that tuples, lists and dictionaries are only\n\
2109
supported as long as the values contained therein are themselves\n\
2110
supported; and recursive lists and dictionaries should not be written\n\
2111
(they will cause infinite loops).\n\
2112
\n\
2113
Variables:\n\
2114
\n\
2115
version -- indicates the format that the module uses. Version 0 is the\n\
2116
    historical format, version 1 shares interned strings and version 2\n\
2117
    uses a binary format for floating-point numbers.\n\
2118
    Version 3 shares common object references (New in version 3.4).\n\
2119
\n\
2120
Functions:\n\
2121
\n\
2122
dump() -- write value to a file\n\
2123
load() -- read value from a file\n\
2124
dumps() -- marshal value as a bytes object\n\
2125
loads() -- read value from a bytes-like object");
2126
2127
2128
static int
2129
marshal_module_exec(PyObject *mod)
2130
36
{
2131
36
    if (PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION) < 0) {
2132
0
        return -1;
2133
0
    }
2134
36
    return 0;
2135
36
}
2136
2137
static PyModuleDef_Slot marshalmodule_slots[] = {
2138
    {Py_mod_exec, marshal_module_exec},
2139
    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
2140
    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
2141
    {0, NULL}
2142
};
2143
2144
static struct PyModuleDef marshalmodule = {
2145
    PyModuleDef_HEAD_INIT,
2146
    .m_name = "marshal",
2147
    .m_doc = module_doc,
2148
    .m_methods = marshal_methods,
2149
    .m_slots = marshalmodule_slots,
2150
};
2151
2152
PyMODINIT_FUNC
2153
PyMarshal_Init(void)
2154
36
{
2155
36
    return PyModuleDef_Init(&marshalmodule);
2156
36
}