Coverage Report

Created: 2026-06-21 06:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/marshal.c
Line
Count
Source
1
2
/* Write Python objects to files and read them back.
3
   This is primarily intended for writing and reading compiled Python code,
4
   even though dicts, lists, sets and frozensets, not commonly seen in
5
   code objects, are supported.
6
   Version 3 of this protocol properly supports circular links
7
   and sharing. */
8
9
#include "Python.h"
10
#include "pycore_call.h"             // _PyObject_CallNoArgs()
11
#include "pycore_code.h"             // _PyCode_New()
12
#include "pycore_hashtable.h"        // _Py_hashtable_t
13
#include "pycore_long.h"             // _PyLong_IsZero()
14
#include "pycore_object.h"           // _PyObject_IsUniquelyReferenced
15
#include "pycore_pystate.h"          // _PyInterpreterState_GET()
16
#include "pycore_setobject.h"        // _PySet_NextEntryRef()
17
#include "pycore_tuple.h"            // _PyTuple_FromPairSteal
18
#include "pycore_unicodeobject.h"    // _PyUnicode_InternImmortal()
19
20
#include "marshal.h"                 // Py_MARSHAL_VERSION
21
22
#ifdef __APPLE__
23
#  include "TargetConditionals.h"
24
#endif /* __APPLE__ */
25
26
27
/*[clinic input]
28
module marshal
29
[clinic start generated code]*/
30
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
31
32
#include "clinic/marshal.c.h"
33
34
/* High water mark to determine when the marshalled object is dangerously deep
35
 * and risks coring the interpreter.  When the object stack gets this deep,
36
 * raise an exception instead of continuing.
37
 * On Windows debug builds, reduce this value.
38
 *
39
 * BUG: https://bugs.python.org/issue33720
40
 * On Windows PGO builds, the r_object function overallocates its stack and
41
 * can cause a stack overflow. We reduce the maximum depth for all Windows
42
 * releases to protect against this.
43
 * #if defined(MS_WINDOWS) && defined(Py_DEBUG)
44
 */
45
#if defined(MS_WINDOWS)
46
#  define MAX_MARSHAL_STACK_DEPTH 1000
47
#elif defined(__wasi__)
48
#  define MAX_MARSHAL_STACK_DEPTH 1500
49
// TARGET_OS_IPHONE covers any non-macOS Apple platform.
50
// It won't be defined on older macOS SDKs
51
#elif defined(__APPLE__) && defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
52
#  define MAX_MARSHAL_STACK_DEPTH 1500
53
#else
54
4.66M
#  define MAX_MARSHAL_STACK_DEPTH 2000
55
#endif
56
57
/* Supported types */
58
0
#define TYPE_NULL               '0'
59
43.4k
#define TYPE_NONE               'N'
60
1.86k
#define TYPE_FALSE              'F'
61
1.59k
#define TYPE_TRUE               'T'
62
0
#define TYPE_STOPITER           'S'
63
392
#define TYPE_ELLIPSIS           '.'
64
617
#define TYPE_BINARY_FLOAT       'g'  // Version 0 uses TYPE_FLOAT instead.
65
3
#define TYPE_BINARY_COMPLEX     'y'  // Version 0 uses TYPE_COMPLEX instead.
66
437
#define TYPE_LONG               'l'  // See also TYPE_INT.
67
538k
#define TYPE_STRING             's'  // Bytes. (Name comes from Python 2.)
68
70
#define TYPE_TUPLE              '('  // See also TYPE_SMALL_TUPLE.
69
0
#define TYPE_LIST               '['
70
0
#define TYPE_DICT               '{'
71
0
#define TYPE_FROZENDICT         '}'
72
178k
#define TYPE_CODE               'c'
73
3.58k
#define TYPE_UNICODE            'u'
74
#define TYPE_UNKNOWN            '?'
75
// added in version 2:
76
1.51k
#define TYPE_SET                '<'
77
504
#define TYPE_FROZENSET          '>'
78
// added in version 5:
79
3.23k
#define TYPE_SLICE              ':'
80
// Remember to update the version and documentation when adding new types.
81
82
/* Special cases for unicode strings (added in version 4) */
83
206
#define TYPE_INTERNED           't' // Version 1+
84
29.0k
#define TYPE_ASCII              'a'
85
0
#define TYPE_ASCII_INTERNED     'A'
86
1.37M
#define TYPE_SHORT_ASCII        'z'
87
1.20M
#define TYPE_SHORT_ASCII_INTERNED 'Z'
88
89
/* Special cases for small objects */
90
24.2k
#define TYPE_INT                'i'  // All versions. 32-bit encoding.
91
482k
#define TYPE_SMALL_TUPLE        ')'  // Version 4+
92
93
/* Supported for backwards compatibility */
94
0
#define TYPE_COMPLEX            'x'  // Generated for version 0 only.
95
0
#define TYPE_FLOAT              'f'  // Generated for version 0 only.
96
0
#define TYPE_INT64              'I'  // Not generated any more.
97
98
/* References (added in version 3) */
99
1.93M
#define TYPE_REF                'r'
100
9.24M
#define FLAG_REF                '\x80' /* with a type, add obj to index */
101
102
103
// Error codes:
104
55.0k
#define WFERR_OK 0
105
22
#define WFERR_UNMARSHALLABLE 1
106
0
#define WFERR_NESTEDTOODEEP 2
107
11
#define WFERR_NOMEMORY 3
108
0
#define WFERR_CODE_NOT_ALLOWED 4
109
110
typedef struct {
111
    FILE *fp;
112
    int error;  /* see WFERR_* values */
113
    int depth;
114
    PyObject *str;
115
    char *ptr;
116
    const char *end;
117
    char *buf;
118
    _Py_hashtable_t *hashtable;
119
    int version;
120
    int allow_code;
121
} WFILE;
122
123
268k
#define w_byte(c, p) do {                               \
124
268k
        if ((p)->ptr != (p)->end || w_reserve((p), 1))  \
125
268k
            *(p)->ptr++ = (c);                          \
126
268k
    } while(0)
127
128
static void
129
w_flush(WFILE *p)
130
0
{
131
0
    assert(p->fp != NULL);
132
0
    fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
133
0
    p->ptr = p->buf;
134
0
}
135
136
static int
137
w_reserve(WFILE *p, Py_ssize_t needed)
138
433
{
139
433
    Py_ssize_t pos, size, delta;
140
433
    if (p->ptr == NULL)
141
0
        return 0; /* An error already occurred */
142
433
    if (p->fp != NULL) {
143
0
        w_flush(p);
144
0
        return needed <= p->end - p->ptr;
145
0
    }
146
433
    assert(p->str != NULL);
147
433
    pos = p->ptr - p->buf;
148
433
    size = PyBytes_GET_SIZE(p->str);
149
433
    if (size > 16*1024*1024)
150
0
        delta = (size >> 3);            /* 12.5% overallocation */
151
433
    else
152
433
        delta = size + 1024;
153
433
    delta = Py_MAX(delta, needed);
154
433
    if (delta > PY_SSIZE_T_MAX - size) {
155
0
        p->error = WFERR_NOMEMORY;
156
0
        return 0;
157
0
    }
158
433
    size += delta;
159
433
    if (_PyBytes_Resize(&p->str, size) != 0) {
160
0
        p->end = p->ptr = p->buf = NULL;
161
0
        return 0;
162
0
    }
163
433
    else {
164
433
        p->buf = PyBytes_AS_STRING(p->str);
165
433
        p->ptr = p->buf + pos;
166
433
        p->end = p->buf + size;
167
433
        return 1;
168
433
    }
169
433
}
170
171
static void
172
w_string(const void *s, Py_ssize_t n, WFILE *p)
173
18.0k
{
174
18.0k
    Py_ssize_t m;
175
18.0k
    if (!n || p->ptr == NULL)
176
156
        return;
177
17.9k
    m = p->end - p->ptr;
178
17.9k
    if (p->fp != NULL) {
179
0
        if (n <= m) {
180
0
            memcpy(p->ptr, s, n);
181
0
            p->ptr += n;
182
0
        }
183
0
        else {
184
0
            w_flush(p);
185
0
            fwrite(s, 1, n, p->fp);
186
0
        }
187
0
    }
188
17.9k
    else {
189
17.9k
        if (n <= m || w_reserve(p, n - m)) {
190
17.9k
            memcpy(p->ptr, s, n);
191
17.9k
            p->ptr += n;
192
17.9k
        }
193
17.9k
    }
194
17.9k
}
195
196
static void
197
w_short(int x, WFILE *p)
198
23
{
199
23
    w_byte((char)( x      & 0xff), p);
200
23
    w_byte((char)((x>> 8) & 0xff), p);
201
23
}
202
203
static void
204
w_long(long x, WFILE *p)
205
49.1k
{
206
49.1k
    w_byte((char)( x      & 0xff), p);
207
49.1k
    w_byte((char)((x>> 8) & 0xff), p);
208
49.1k
    w_byte((char)((x>>16) & 0xff), p);
209
49.1k
    w_byte((char)((x>>24) & 0xff), p);
210
49.1k
}
211
212
579k
#define SIZE32_MAX  0x7FFFFFFF
213
214
#if SIZEOF_SIZE_T > 4
215
6.80k
# define W_SIZE(n, p)  do {                     \
216
6.80k
        if ((n) > SIZE32_MAX) {                 \
217
0
            (p)->depth--;                       \
218
0
            (p)->error = WFERR_UNMARSHALLABLE;  \
219
0
            return;                             \
220
0
        }                                       \
221
6.80k
        w_long((long)(n), p);                   \
222
6.80k
    } while(0)
223
#else
224
# define W_SIZE  w_long
225
#endif
226
227
static void
228
w_pstring(const void *s, Py_ssize_t n, WFILE *p)
229
6.79k
{
230
6.79k
        W_SIZE(n, p);
231
6.79k
        w_string(s, n, p);
232
6.79k
}
233
234
static void
235
w_short_pstring(const void *s, Py_ssize_t n, WFILE *p)
236
11.2k
{
237
11.2k
    w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
238
11.2k
    w_string(s, n, p);
239
11.2k
}
240
241
/* We assume that Python ints are stored internally in base some power of
242
   2**15; for the sake of portability we'll always read and write them in base
243
   exactly 2**15. */
244
245
3.83k
#define PyLong_MARSHAL_SHIFT 15
246
1.69k
#define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
247
23
#define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
248
249
28.2k
#define W_TYPE(t, p) do { \
250
28.2k
    w_byte((t) | flag, (p)); \
251
28.2k
} while(0)
252
253
static PyObject *
254
_PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code);
255
256
#define _r_digits(bitsize)                                                \
257
static void                                                               \
258
_r_digits##bitsize(const uint ## bitsize ## _t *digits, Py_ssize_t n,     \
259
3
                   uint8_t negative, Py_ssize_t marshal_ratio, WFILE *p)  \
260
3
{                                                                         \
261
3
    /* set l to number of base PyLong_MARSHAL_BASE digits */              \
262
3
    Py_ssize_t l = (n - 1)*marshal_ratio;                                 \
263
3
    uint ## bitsize ## _t d = digits[n - 1];                              \
264
3
                                                                          \
265
3
    assert(marshal_ratio > 0);                                            \
266
3
    assert(n >= 1);                                                       \
267
3
    assert(d != 0); /* a PyLong is always normalized */                   \
268
3
    do {                                                                  \
269
3
        d >>= PyLong_MARSHAL_SHIFT;                                       \
270
3
        l++;                                                              \
271
3
    } while (d != 0);                                                     \
272
3
    if (l > SIZE32_MAX) {                                                 \
273
0
        p->depth--;                                                       \
274
0
        p->error = WFERR_UNMARSHALLABLE;                                  \
275
0
        return;                                                           \
276
0
    }                                                                     \
277
3
    w_long((long)(negative ? -l : l), p);                                 \
278
3
                                                                          \
279
9
    for (Py_ssize_t i = 0; i < n - 1; i++) {                              \
280
6
        d = digits[i];                                                    \
281
18
        for (Py_ssize_t j = 0; j < marshal_ratio; j++) {                  \
282
12
            w_short(d & PyLong_MARSHAL_MASK, p);                          \
283
12
            d >>= PyLong_MARSHAL_SHIFT;                                   \
284
12
        }                                                                 \
285
6
        assert(d == 0);                                                   \
286
6
    }                                                                     \
287
3
    d = digits[n - 1];                                                    \
288
3
    do {                                                                  \
289
3
        w_short(d & PyLong_MARSHAL_MASK, p);                              \
290
3
        d >>= PyLong_MARSHAL_SHIFT;                                       \
291
3
    } while (d != 0);                                                     \
292
3
}
293
0
_r_digits(16)
294
3
_r_digits(32)
295
#undef _r_digits
296
297
static void
298
w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
299
5
{
300
5
    W_TYPE(TYPE_LONG, p);
301
5
    if (_PyLong_IsZero(ob)) {
302
0
        w_long((long)0, p);
303
0
        return;
304
0
    }
305
306
5
    PyLongExport long_export;
307
308
5
    if (PyLong_Export((PyObject *)ob, &long_export) < 0) {
309
0
        p->depth--;
310
0
        p->error = WFERR_UNMARSHALLABLE;
311
0
        return;
312
0
    }
313
5
    if (!long_export.digits) {
314
2
        int8_t sign = long_export.value < 0 ? -1 : 1;
315
2
        uint64_t abs_value = _Py_ABS_CAST(uint64_t, long_export.value);
316
2
        uint64_t d = abs_value;
317
2
        long l = 0;
318
319
        /* set l to number of base PyLong_MARSHAL_BASE digits */
320
8
        do {
321
8
            d >>= PyLong_MARSHAL_SHIFT;
322
8
            l += sign;
323
8
        } while (d);
324
2
        w_long(l, p);
325
326
2
        d = abs_value;
327
8
        do {
328
8
            w_short(d & PyLong_MARSHAL_MASK, p);
329
8
            d >>= PyLong_MARSHAL_SHIFT;
330
8
        } while (d);
331
2
        return;
332
2
    }
333
334
3
    const PyLongLayout *layout = PyLong_GetNativeLayout();
335
3
    Py_ssize_t marshal_ratio = layout->bits_per_digit/PyLong_MARSHAL_SHIFT;
336
337
    /* must be a multiple of PyLong_MARSHAL_SHIFT */
338
3
    assert(layout->bits_per_digit % PyLong_MARSHAL_SHIFT == 0);
339
3
    assert(layout->bits_per_digit >= PyLong_MARSHAL_SHIFT);
340
341
    /* other assumptions on PyLongObject internals */
342
3
    assert(layout->bits_per_digit <= 32);
343
3
    assert(layout->digits_order == -1);
344
3
    assert(layout->digit_endianness == (PY_LITTLE_ENDIAN ? -1 : 1));
345
3
    assert(layout->digit_size == 2 || layout->digit_size == 4);
346
347
3
    if (layout->digit_size == 4) {
348
3
        _r_digits32(long_export.digits, long_export.ndigits,
349
3
                    long_export.negative, marshal_ratio, p);
350
3
    }
351
0
    else {
352
0
        _r_digits16(long_export.digits, long_export.ndigits,
353
0
                    long_export.negative, marshal_ratio, p);
354
0
    }
355
3
    PyLong_FreeExport(&long_export);
356
3
}
357
358
static void
359
w_float_bin(double v, WFILE *p)
360
10
{
361
10
    char buf[8];
362
10
    if (PyFloat_Pack8(v, buf, 1) < 0) {
363
0
        p->error = WFERR_UNMARSHALLABLE;
364
0
        return;
365
0
    }
366
10
    w_string(buf, 8, p);
367
10
}
368
369
static void
370
w_float_str(double v, WFILE *p)
371
0
{
372
0
    char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
373
0
    if (!buf) {
374
0
        p->error = WFERR_NOMEMORY;
375
0
        return;
376
0
    }
377
0
    w_short_pstring(buf, strlen(buf), p);
378
0
    PyMem_Free(buf);
379
0
}
380
381
static int
382
w_ref(PyObject *v, char *flag, WFILE *p)
383
54.2k
{
384
54.2k
    _Py_hashtable_entry_t *entry;
385
386
54.2k
    if (p->version < 3 || p->hashtable == NULL)
387
0
        return 0; /* not writing object references */
388
389
    /* If it has only one reference, it definitely isn't shared.
390
     * But we use TYPE_REF always for interned string, to PYC file stable
391
     * as possible.
392
     */
393
54.2k
    if (_PyObject_IsUniquelyReferenced(v) &&
394
14.2k
            !(PyUnicode_CheckExact(v) && PyUnicode_CHECK_INTERNED(v))) {
395
13.3k
        return 0;
396
13.3k
    }
397
398
40.8k
    entry = _Py_hashtable_get_entry(p->hashtable, v);
399
40.8k
    if (entry != NULL) {
400
        /* write the reference index to the stream */
401
25.9k
        uintptr_t w = (uintptr_t)entry->value;
402
25.9k
        if (w & 0x80000000LU) {
403
0
            PyErr_Format(PyExc_ValueError, "cannot marshal recursion %T objects", v);
404
0
            goto err;
405
0
        }
406
        /* we don't store "long" indices in the dict */
407
25.9k
        assert(w <= 0x7fffffff);
408
25.9k
        w_byte(TYPE_REF, p);
409
25.9k
        w_long((int)w, p);
410
25.9k
        return 1;
411
25.9k
    } else {
412
14.9k
        size_t w = p->hashtable->nentries;
413
        /* we don't support long indices */
414
14.9k
        if (w >= 0x7fffffff) {
415
0
            PyErr_SetString(PyExc_ValueError, "too many objects");
416
0
            goto err;
417
0
        }
418
        // Corresponding code should call w_complete() after
419
        // writing the object.
420
14.9k
        if (PyCode_Check(v) || PySlice_Check(v) || PyFrozenDict_CheckExact(v)) {
421
148
            w |= 0x80000000LU;
422
148
        }
423
14.9k
        if (_Py_hashtable_set(p->hashtable, Py_NewRef(v),
424
14.9k
                              (void *)(uintptr_t)w) < 0) {
425
0
            Py_DECREF(v);
426
0
            goto err;
427
0
        }
428
14.9k
        *flag |= FLAG_REF;
429
14.9k
        return 0;
430
14.9k
    }
431
0
err:
432
0
    p->error = WFERR_UNMARSHALLABLE;
433
0
    return 1;
434
40.8k
}
435
436
static void
437
w_complete(PyObject *v, WFILE *p)
438
2.40k
{
439
2.40k
    if (p->version < 3 || p->hashtable == NULL) {
440
0
        return;
441
0
    }
442
2.40k
    if (_PyObject_IsUniquelyReferenced(v)) {
443
2.25k
        return;
444
2.25k
    }
445
446
148
    _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(p->hashtable, v);
447
148
    if (entry == NULL) {
448
0
        return;
449
0
    }
450
148
    assert(entry != NULL);
451
148
    uintptr_t w = (uintptr_t)entry->value;
452
148
    assert(w & 0x80000000LU);
453
148
    w &= ~0x80000000LU;
454
148
    entry->value = (void *)(uintptr_t)w;
455
148
}
456
457
static void
458
w_complex_object(PyObject *v, char flag, WFILE *p);
459
460
static void
461
w_object(PyObject *v, WFILE *p)
462
54.6k
{
463
54.6k
    char flag = '\0';
464
465
54.6k
    if (p->error != WFERR_OK) {
466
0
        return;
467
0
    }
468
469
54.6k
    p->depth++;
470
471
54.6k
    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
472
0
        p->error = WFERR_NESTEDTOODEEP;
473
0
    }
474
54.6k
    else if (v == NULL) {
475
0
        w_byte(TYPE_NULL, p);
476
0
    }
477
54.6k
    else if (v == Py_None) {
478
279
        w_byte(TYPE_NONE, p);
479
279
    }
480
54.3k
    else if (v == PyExc_StopIteration) {
481
0
        w_byte(TYPE_STOPITER, p);
482
0
    }
483
54.3k
    else if (v == Py_Ellipsis) {
484
1
        w_byte(TYPE_ELLIPSIS, p);
485
1
    }
486
54.3k
    else if (v == Py_False) {
487
136
        w_byte(TYPE_FALSE, p);
488
136
    }
489
54.2k
    else if (v == Py_True) {
490
33
        w_byte(TYPE_TRUE, p);
491
33
    }
492
54.2k
    else if (!w_ref(v, &flag, p))
493
28.2k
        w_complex_object(v, flag, p);
494
495
54.6k
    p->depth--;
496
54.6k
}
497
498
static void
499
w_complex_object(PyObject *v, char flag, WFILE *p)
500
28.2k
{
501
28.2k
    Py_ssize_t i, n;
502
503
28.2k
    if (PyLong_CheckExact(v)) {
504
2.09k
        int overflow;
505
2.09k
        long x = PyLong_AsLongAndOverflow(v, &overflow);
506
2.09k
        if (overflow) {
507
3
            w_PyLong((PyLongObject *)v, flag, p);
508
3
        }
509
2.09k
        else {
510
2.09k
#if SIZEOF_LONG > 4
511
2.09k
            long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
512
2.09k
            if (y && y != -1) {
513
                /* Too large for TYPE_INT */
514
2
                w_PyLong((PyLongObject*)v, flag, p);
515
2
            }
516
2.08k
            else
517
2.08k
#endif
518
2.08k
            {
519
2.08k
                W_TYPE(TYPE_INT, p);
520
2.08k
                w_long(x, p);
521
2.08k
            }
522
2.09k
        }
523
2.09k
    }
524
26.2k
    else if (PyFloat_CheckExact(v)) {
525
8
        if (p->version > 1) {
526
8
            W_TYPE(TYPE_BINARY_FLOAT, p);
527
8
            w_float_bin(PyFloat_AS_DOUBLE(v), p);
528
8
        }
529
0
        else {
530
0
            W_TYPE(TYPE_FLOAT, p);
531
0
            w_float_str(PyFloat_AS_DOUBLE(v), p);
532
0
        }
533
8
    }
534
26.1k
    else if (PyComplex_CheckExact(v)) {
535
1
        if (p->version > 1) {
536
1
            W_TYPE(TYPE_BINARY_COMPLEX, p);
537
1
            w_float_bin(PyComplex_RealAsDouble(v), p);
538
1
            w_float_bin(PyComplex_ImagAsDouble(v), p);
539
1
        }
540
0
        else {
541
0
            W_TYPE(TYPE_COMPLEX, p);
542
0
            w_float_str(PyComplex_RealAsDouble(v), p);
543
0
            w_float_str(PyComplex_ImagAsDouble(v), p);
544
0
        }
545
1
    }
546
26.1k
    else if (PyBytes_CheckExact(v)) {
547
6.60k
        W_TYPE(TYPE_STRING, p);
548
6.60k
        w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
549
6.60k
    }
550
19.5k
    else if (PyUnicode_CheckExact(v)) {
551
11.4k
        if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
552
11.3k
            int is_short = PyUnicode_GET_LENGTH(v) < 256;
553
11.3k
            if (is_short) {
554
11.2k
                if (PyUnicode_CHECK_INTERNED(v))
555
10.2k
                    W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
556
1.04k
                else
557
1.04k
                    W_TYPE(TYPE_SHORT_ASCII, p);
558
11.2k
                w_short_pstring(PyUnicode_1BYTE_DATA(v),
559
11.2k
                                PyUnicode_GET_LENGTH(v), p);
560
11.2k
            }
561
120
            else {
562
120
                if (PyUnicode_CHECK_INTERNED(v))
563
0
                    W_TYPE(TYPE_ASCII_INTERNED, p);
564
120
                else
565
120
                    W_TYPE(TYPE_ASCII, p);
566
120
                w_pstring(PyUnicode_1BYTE_DATA(v),
567
120
                          PyUnicode_GET_LENGTH(v), p);
568
120
            }
569
11.3k
        }
570
69
        else {
571
69
            PyObject *utf8;
572
69
            utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
573
69
            if (utf8 == NULL) {
574
0
                p->depth--;
575
0
                p->error = WFERR_UNMARSHALLABLE;
576
0
                return;
577
0
            }
578
69
            if (p->version >= 3 &&  PyUnicode_CHECK_INTERNED(v))
579
0
                W_TYPE(TYPE_INTERNED, p);
580
69
            else
581
69
                W_TYPE(TYPE_UNICODE, p);
582
69
            w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
583
69
            Py_DECREF(utf8);
584
69
        }
585
11.4k
    }
586
8.13k
    else if (PyTuple_CheckExact(v)) {
587
5.72k
        n = PyTuple_GET_SIZE(v);
588
5.72k
        if (p->version >= 4 && n < 256) {
589
5.72k
            W_TYPE(TYPE_SMALL_TUPLE, p);
590
5.72k
            w_byte((unsigned char)n, p);
591
5.72k
        }
592
0
        else {
593
0
            W_TYPE(TYPE_TUPLE, p);
594
0
            W_SIZE(n, p);
595
0
        }
596
36.2k
        for (i = 0; i < n; i++) {
597
30.5k
            w_object(PyTuple_GET_ITEM(v, i), p);
598
30.5k
        }
599
5.72k
    }
600
2.41k
    else if (PyList_CheckExact(v)) {
601
0
        W_TYPE(TYPE_LIST, p);
602
0
        n = PyList_GET_SIZE(v);
603
0
        W_SIZE(n, p);
604
0
        for (i = 0; i < n; i++) {
605
0
            w_object(PyList_GET_ITEM(v, i), p);
606
0
        }
607
0
    }
608
2.41k
    else if (PyAnyDict_CheckExact(v)) {
609
0
        Py_ssize_t pos;
610
0
        PyObject *key, *value;
611
0
        if (PyFrozenDict_CheckExact(v)) {
612
0
            if (p->version < 6) {
613
0
                w_byte(TYPE_UNKNOWN, p);
614
0
                p->error = WFERR_UNMARSHALLABLE;
615
0
                return;
616
0
            }
617
618
0
            W_TYPE(TYPE_FROZENDICT, p);
619
0
        }
620
0
        else {
621
0
            W_TYPE(TYPE_DICT, p);
622
0
        }
623
        /* This one is NULL object terminated! */
624
0
        pos = 0;
625
0
        while (PyDict_Next(v, &pos, &key, &value)) {
626
0
            w_object(key, p);
627
0
            w_object(value, p);
628
0
        }
629
0
        w_object((PyObject *)NULL, p);
630
0
        if (PyFrozenDict_CheckExact(v)) {
631
0
            w_complete(v, p);
632
0
        }
633
0
    }
634
2.41k
    else if (PyAnySet_CheckExact(v)) {
635
11
        PyObject *value;
636
11
        Py_ssize_t pos = 0;
637
11
        Py_hash_t hash;
638
639
11
        if (PyFrozenSet_CheckExact(v))
640
11
            W_TYPE(TYPE_FROZENSET, p);
641
0
        else
642
0
            W_TYPE(TYPE_SET, p);
643
11
        n = PySet_GET_SIZE(v);
644
11
        W_SIZE(n, p);
645
        // bpo-37596: To support reproducible builds, sets and frozensets need
646
        // to have their elements serialized in a consistent order (even when
647
        // they have been scrambled by hash randomization). To ensure this, we
648
        // use an order equivalent to sorted(v, key=marshal.dumps):
649
11
        PyObject *pairs = PyList_New(n);
650
11
        if (pairs == NULL) {
651
0
            p->error = WFERR_NOMEMORY;
652
0
            return;
653
0
        }
654
11
        Py_ssize_t i = 0;
655
11
        Py_BEGIN_CRITICAL_SECTION(v);
656
82
        while (_PySet_NextEntryRef(v, &pos, &value, &hash)) {
657
71
            PyObject *dump = _PyMarshal_WriteObjectToString(value,
658
71
                                    p->version, p->allow_code);
659
71
            if (dump == NULL) {
660
0
                p->error = WFERR_UNMARSHALLABLE;
661
0
                Py_DECREF(value);
662
0
                break;
663
0
            }
664
71
            PyObject *pair = _PyTuple_FromPairSteal(dump, value);
665
71
            if (pair == NULL) {
666
0
                p->error = WFERR_NOMEMORY;
667
0
                break;
668
0
            }
669
71
            PyList_SET_ITEM(pairs, i++, pair);
670
71
        }
671
11
        Py_END_CRITICAL_SECTION();
672
11
        if (p->error == WFERR_UNMARSHALLABLE || p->error == WFERR_NOMEMORY) {
673
0
            Py_DECREF(pairs);
674
0
            return;
675
0
        }
676
11
        assert(i == n);
677
11
        if (PyList_Sort(pairs)) {
678
0
            p->error = WFERR_NOMEMORY;
679
0
            Py_DECREF(pairs);
680
0
            return;
681
0
        }
682
82
        for (Py_ssize_t i = 0; i < n; i++) {
683
71
            PyObject *pair = PyList_GET_ITEM(pairs, i);
684
71
            value = PyTuple_GET_ITEM(pair, 1);
685
71
            w_object(value, p);
686
71
        }
687
11
        Py_DECREF(pairs);
688
11
    }
689
2.40k
    else if (PyCode_Check(v)) {
690
2.38k
        if (!p->allow_code) {
691
0
            p->error = WFERR_CODE_NOT_ALLOWED;
692
0
            return;
693
0
        }
694
2.38k
        PyCodeObject *co = (PyCodeObject *)v;
695
2.38k
        PyObject *co_code = _PyCode_GetCode(co);
696
2.38k
        if (co_code == NULL) {
697
0
            p->error = WFERR_NOMEMORY;
698
0
            return;
699
0
        }
700
2.38k
        W_TYPE(TYPE_CODE, p);
701
2.38k
        w_long(co->co_argcount, p);
702
2.38k
        w_long(co->co_posonlyargcount, p);
703
2.38k
        w_long(co->co_kwonlyargcount, p);
704
2.38k
        w_long(co->co_stacksize, p);
705
2.38k
        w_long(co->co_flags, p);
706
2.38k
        w_object(co_code, p);
707
2.38k
        w_object(co->co_consts, p);
708
2.38k
        w_object(co->co_names, p);
709
2.38k
        w_object(co->co_localsplusnames, p);
710
2.38k
        w_object(co->co_localspluskinds, p);
711
2.38k
        w_object(co->co_filename, p);
712
2.38k
        w_object(co->co_name, p);
713
2.38k
        w_object(co->co_qualname, p);
714
2.38k
        w_long(co->co_firstlineno, p);
715
2.38k
        w_object(co->co_linetable, p);
716
2.38k
        w_object(co->co_exceptiontable, p);
717
2.38k
        Py_DECREF(co_code);
718
2.38k
        w_complete(v, p);
719
2.38k
    }
720
22
    else if (PyObject_CheckBuffer(v)) {
721
        /* Write unknown bytes-like objects as a bytes object */
722
0
        Py_buffer view;
723
0
        if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
724
0
            w_byte(TYPE_UNKNOWN, p);
725
0
            p->depth--;
726
0
            p->error = WFERR_UNMARSHALLABLE;
727
0
            return;
728
0
        }
729
0
        W_TYPE(TYPE_STRING, p);
730
0
        w_pstring(view.buf, view.len, p);
731
0
        PyBuffer_Release(&view);
732
0
    }
733
22
    else if (PySlice_Check(v)) {
734
22
        if (p->version < 5) {
735
0
            w_byte(TYPE_UNKNOWN, p);
736
0
            p->error = WFERR_UNMARSHALLABLE;
737
0
            return;
738
0
        }
739
22
        PySliceObject *slice = (PySliceObject *)v;
740
22
        W_TYPE(TYPE_SLICE, p);
741
22
        w_object(slice->start, p);
742
22
        w_object(slice->stop, p);
743
22
        w_object(slice->step, p);
744
22
        w_complete(v, p);
745
22
    }
746
0
    else {
747
0
        W_TYPE(TYPE_UNKNOWN, p);
748
0
        p->error = WFERR_UNMARSHALLABLE;
749
0
    }
750
28.2k
}
751
752
static void
753
w_decref_entry(void *key)
754
14.9k
{
755
14.9k
    PyObject *entry_key = (PyObject *)key;
756
14.9k
    Py_XDECREF(entry_key);
757
14.9k
}
758
759
static int
760
w_init_refs(WFILE *wf, int version)
761
209
{
762
209
    if (version >= 3) {
763
209
        wf->hashtable = _Py_hashtable_new_full(_Py_hashtable_hash_ptr,
764
209
                                               _Py_hashtable_compare_direct,
765
209
                                               w_decref_entry, NULL, NULL);
766
209
        if (wf->hashtable == NULL) {
767
0
            PyErr_NoMemory();
768
0
            return -1;
769
0
        }
770
209
    }
771
209
    return 0;
772
209
}
773
774
static void
775
w_clear_refs(WFILE *wf)
776
209
{
777
209
    if (wf->hashtable != NULL) {
778
209
        _Py_hashtable_destroy(wf->hashtable);
779
209
    }
780
209
}
781
782
/* version currently has no effect for writing ints. */
783
/* Note that while the documentation states that this function
784
 * can error, currently it never does. Setting an exception in
785
 * this function should be regarded as an API-breaking change.
786
 */
787
void
788
PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
789
0
{
790
0
    char buf[4];
791
0
    WFILE wf;
792
0
    memset(&wf, 0, sizeof(wf));
793
0
    wf.fp = fp;
794
0
    wf.ptr = wf.buf = buf;
795
0
    wf.end = wf.ptr + sizeof(buf);
796
0
    wf.error = WFERR_OK;
797
0
    wf.version = version;
798
0
    w_long(x, &wf);
799
0
    w_flush(&wf);
800
0
}
801
802
void
803
PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
804
0
{
805
0
    char buf[BUFSIZ];
806
0
    WFILE wf;
807
0
    if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
808
0
        return; /* caller must check PyErr_Occurred() */
809
0
    }
810
0
    memset(&wf, 0, sizeof(wf));
811
0
    wf.fp = fp;
812
0
    wf.ptr = wf.buf = buf;
813
0
    wf.end = wf.ptr + sizeof(buf);
814
0
    wf.error = WFERR_OK;
815
0
    wf.version = version;
816
0
    wf.allow_code = 1;
817
0
    if (w_init_refs(&wf, version)) {
818
0
        return; /* caller must check PyErr_Occurred() */
819
0
    }
820
0
    w_object(x, &wf);
821
0
    w_clear_refs(&wf);
822
0
    w_flush(&wf);
823
0
}
824
825
typedef struct {
826
    FILE *fp;
827
    int depth;
828
    PyObject *readable;  /* Stream-like object being read from */
829
    const char *ptr;
830
    const char *end;
831
    char *buf;
832
    Py_ssize_t buf_size;
833
    PyObject *refs;  /* a list */
834
    int allow_code;
835
} RFILE;
836
837
static const char *
838
r_string(Py_ssize_t n, RFILE *p)
839
5.54M
{
840
5.54M
    Py_ssize_t read = -1;
841
842
5.54M
    if (p->ptr != NULL) {
843
        /* Fast path for loads() */
844
5.54M
        const char *res = p->ptr;
845
5.54M
        Py_ssize_t left = p->end - p->ptr;
846
5.54M
        if (left < n) {
847
0
            PyErr_SetString(PyExc_EOFError,
848
0
                            "marshal data too short");
849
0
            return NULL;
850
0
        }
851
5.54M
        p->ptr += n;
852
5.54M
        return res;
853
5.54M
    }
854
0
    if (p->buf == NULL) {
855
0
        p->buf = PyMem_Malloc(n);
856
0
        if (p->buf == NULL) {
857
0
            PyErr_NoMemory();
858
0
            return NULL;
859
0
        }
860
0
        p->buf_size = n;
861
0
    }
862
0
    else if (p->buf_size < n) {
863
0
        char *tmp = PyMem_Realloc(p->buf, n);
864
0
        if (tmp == NULL) {
865
0
            PyErr_NoMemory();
866
0
            return NULL;
867
0
        }
868
0
        p->buf = tmp;
869
0
        p->buf_size = n;
870
0
    }
871
872
0
    if (!p->readable) {
873
0
        assert(p->fp != NULL);
874
0
        read = fread(p->buf, 1, n, p->fp);
875
0
    }
876
0
    else {
877
0
        PyObject *res, *mview;
878
0
        Py_buffer buf;
879
880
0
        if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
881
0
            return NULL;
882
0
        mview = PyMemoryView_FromBuffer(&buf);
883
0
        if (mview == NULL)
884
0
            return NULL;
885
886
0
        res = _PyObject_CallMethod(p->readable, &_Py_ID(readinto), "N", mview);
887
0
        if (res != NULL) {
888
0
            read = PyNumber_AsSsize_t(res, PyExc_ValueError);
889
0
            Py_DECREF(res);
890
0
        }
891
0
    }
892
0
    if (read != n) {
893
0
        if (!PyErr_Occurred()) {
894
0
            if (read > n)
895
0
                PyErr_Format(PyExc_ValueError,
896
0
                             "read() returned too much data: "
897
0
                             "%zd bytes requested, %zd returned",
898
0
                             n, read);
899
0
            else
900
0
                PyErr_SetString(PyExc_EOFError,
901
0
                                "EOF read where not expected");
902
0
        }
903
0
        return NULL;
904
0
    }
905
0
    return p->buf;
906
0
}
907
908
static int
909
r_byte(RFILE *p)
910
6.46M
{
911
6.46M
    if (p->ptr != NULL) {
912
6.46M
        if (p->ptr < p->end) {
913
6.46M
            return (unsigned char) *p->ptr++;
914
6.46M
        }
915
6.46M
    }
916
0
    else if (!p->readable) {
917
0
        assert(p->fp);
918
0
        int c = getc(p->fp);
919
0
        if (c != EOF) {
920
0
            return c;
921
0
        }
922
0
    }
923
0
    else {
924
0
        const char *ptr = r_string(1, p);
925
0
        if (ptr != NULL) {
926
0
            return *(const unsigned char *) ptr;
927
0
        }
928
0
        return EOF;
929
0
    }
930
0
    PyErr_SetString(PyExc_EOFError,
931
0
                    "EOF read where not expected");
932
0
    return EOF;
933
6.46M
}
934
935
static int
936
r_short(RFILE *p)
937
1.67k
{
938
1.67k
    short x = -1;
939
1.67k
    const unsigned char *buffer;
940
941
1.67k
    buffer = (const unsigned char *) r_string(2, p);
942
1.67k
    if (buffer != NULL) {
943
1.67k
        x = buffer[0];
944
1.67k
        x |= buffer[1] << 8;
945
        /* Sign-extension, in case short greater than 16 bits */
946
1.67k
        x |= -(x & 0x8000);
947
1.67k
    }
948
1.67k
    return x;
949
1.67k
}
950
951
static long
952
r_long(RFILE *p)
953
3.59M
{
954
3.59M
    long x = -1;
955
3.59M
    const unsigned char *buffer;
956
957
3.59M
    buffer = (const unsigned char *) r_string(4, p);
958
3.59M
    if (buffer != NULL) {
959
3.59M
        x = buffer[0];
960
3.59M
        x |= (long)buffer[1] << 8;
961
3.59M
        x |= (long)buffer[2] << 16;
962
3.59M
        x |= (long)buffer[3] << 24;
963
3.59M
#if SIZEOF_LONG > 4
964
        /* Sign extension for 64-bit machines */
965
3.59M
        x |= -(x & 0x80000000L);
966
3.59M
#endif
967
3.59M
    }
968
3.59M
    return x;
969
3.59M
}
970
971
/* r_long64 deals with the TYPE_INT64 code. */
972
static PyObject *
973
r_long64(RFILE *p)
974
0
{
975
0
    const unsigned char *buffer = (const unsigned char *) r_string(8, p);
976
0
    if (buffer == NULL) {
977
0
        return NULL;
978
0
    }
979
0
    return _PyLong_FromByteArray(buffer, 8,
980
0
                                 1 /* little endian */,
981
0
                                 1 /* signed */);
982
0
}
983
984
#define _w_digits(bitsize)                                              \
985
static int                                                              \
986
_w_digits##bitsize(uint ## bitsize ## _t *digits, Py_ssize_t size,      \
987
                   Py_ssize_t marshal_ratio,                            \
988
437
                   int shorts_in_top_digit, RFILE *p)                   \
989
437
{                                                                       \
990
437
    uint ## bitsize ## _t d;                                            \
991
437
                                                                        \
992
437
    assert(size >= 1);                                                  \
993
1.01k
    for (Py_ssize_t i = 0; i < size - 1; i++) {                         \
994
579
        d = 0;                                                          \
995
1.73k
        for (Py_ssize_t j = 0; j < marshal_ratio; j++) {                \
996
1.15k
            int md = r_short(p);                                        \
997
1.15k
            if (md < 0 || md > PyLong_MARSHAL_BASE) {                   \
998
0
                goto bad_digit;                                         \
999
0
            }                                                           \
1000
1.15k
            d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT;   \
1001
1.15k
        }                                                               \
1002
579
        digits[i] = d;                                                  \
1003
579
    }                                                                   \
1004
437
                                                                        \
1005
437
    d = 0;                                                              \
1006
950
    for (Py_ssize_t j = 0; j < shorts_in_top_digit; j++) {              \
1007
513
        int md = r_short(p);                                            \
1008
513
        if (md < 0 || md > PyLong_MARSHAL_BASE) {                       \
1009
0
            goto bad_digit;                                             \
1010
0
        }                                                               \
1011
513
        /* topmost marshal digit should be nonzero */                   \
1012
513
        if (md == 0 && j == shorts_in_top_digit - 1) {                  \
1013
0
            PyErr_SetString(PyExc_ValueError,                           \
1014
0
                "bad marshal data (unnormalized long data)");           \
1015
0
            return -1;                                                  \
1016
0
        }                                                               \
1017
513
        d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT;       \
1018
513
    }                                                                   \
1019
437
    assert(!PyErr_Occurred());                                          \
1020
437
    /* top digit should be nonzero, else the resulting PyLong won't be  \
1021
437
       normalized */                                                    \
1022
437
    digits[size - 1] = d;                                               \
1023
437
    return 0;                                                           \
1024
437
                                                                        \
1025
0
bad_digit:                                                              \
1026
0
    if (!PyErr_Occurred()) {                                            \
1027
0
        PyErr_SetString(PyExc_ValueError,                               \
1028
0
            "bad marshal data (digit out of range in long)");           \
1029
0
    }                                                                   \
1030
0
    return -1;                                                          \
1031
437
}
1032
437
_w_digits(32)
1033
0
_w_digits(16)
1034
#undef _w_digits
1035
1036
static PyObject *
1037
r_PyLong(RFILE *p)
1038
437
{
1039
437
    long n = r_long(p);
1040
437
    if (n == -1 && PyErr_Occurred()) {
1041
0
        return NULL;
1042
0
    }
1043
437
    if (n < -SIZE32_MAX || n > SIZE32_MAX) {
1044
0
        PyErr_SetString(PyExc_ValueError,
1045
0
                       "bad marshal data (long size out of range)");
1046
0
        return NULL;
1047
0
    }
1048
1049
437
    const PyLongLayout *layout = PyLong_GetNativeLayout();
1050
437
    Py_ssize_t marshal_ratio = layout->bits_per_digit/PyLong_MARSHAL_SHIFT;
1051
1052
    /* must be a multiple of PyLong_MARSHAL_SHIFT */
1053
437
    assert(layout->bits_per_digit % PyLong_MARSHAL_SHIFT == 0);
1054
437
    assert(layout->bits_per_digit >= PyLong_MARSHAL_SHIFT);
1055
1056
    /* other assumptions on PyLongObject internals */
1057
437
    assert(layout->bits_per_digit <= 32);
1058
437
    assert(layout->digits_order == -1);
1059
437
    assert(layout->digit_endianness == (PY_LITTLE_ENDIAN ? -1 : 1));
1060
437
    assert(layout->digit_size == 2 || layout->digit_size == 4);
1061
1062
437
    Py_ssize_t size = 1 + (Py_ABS(n) - 1) / marshal_ratio;
1063
1064
437
    assert(size >= 1);
1065
1066
437
    int shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % marshal_ratio;
1067
437
    void *digits;
1068
437
    PyLongWriter *writer = PyLongWriter_Create(n < 0, size, &digits);
1069
1070
437
    if (writer == NULL) {
1071
0
        return NULL;
1072
0
    }
1073
1074
437
    int ret;
1075
1076
437
    if (layout->digit_size == 4) {
1077
437
        ret = _w_digits32(digits, size, marshal_ratio, shorts_in_top_digit, p);
1078
437
    }
1079
0
    else {
1080
0
        ret = _w_digits16(digits, size, marshal_ratio, shorts_in_top_digit, p);
1081
0
    }
1082
437
    if (ret < 0) {
1083
0
        PyLongWriter_Discard(writer);
1084
0
        return NULL;
1085
0
    }
1086
437
    return PyLongWriter_Finish(writer);
1087
437
}
1088
1089
static double
1090
r_float_bin(RFILE *p)
1091
623
{
1092
623
    const char *buf = r_string(8, p);
1093
623
    if (buf == NULL)
1094
0
        return -1;
1095
623
    return PyFloat_Unpack8(buf, 1);
1096
623
}
1097
1098
/* Issue #33720: Disable inlining for reducing the C stack consumption
1099
   on PGO builds. */
1100
Py_NO_INLINE static double
1101
r_float_str(RFILE *p)
1102
0
{
1103
0
    int n;
1104
0
    char buf[256];
1105
0
    const char *ptr;
1106
0
    n = r_byte(p);
1107
0
    if (n == EOF) {
1108
0
        return -1;
1109
0
    }
1110
0
    ptr = r_string(n, p);
1111
0
    if (ptr == NULL) {
1112
0
        return -1;
1113
0
    }
1114
0
    memcpy(buf, ptr, n);
1115
0
    buf[n] = '\0';
1116
0
    return PyOS_string_to_double(buf, NULL, NULL);
1117
0
}
1118
1119
/* allocate the reflist index for a new object. Return -1 on failure */
1120
static Py_ssize_t
1121
r_ref_reserve(int flag, RFILE *p)
1122
182k
{
1123
182k
    if (flag) { /* currently only FLAG_REF is defined */
1124
6.95k
        Py_ssize_t idx = PyList_GET_SIZE(p->refs);
1125
6.95k
        if (idx >= 0x7ffffffe) {
1126
0
            PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
1127
0
            return -1;
1128
0
        }
1129
6.95k
        if (PyList_Append(p->refs, Py_None) < 0)
1130
0
            return -1;
1131
6.95k
        return idx;
1132
6.95k
    } else
1133
175k
        return 0;
1134
182k
}
1135
1136
/* insert the new object 'o' to the reflist at previously
1137
 * allocated index 'idx'.
1138
 * 'o' can be NULL, in which case nothing is done.
1139
 * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
1140
 * if 'o' was non-NULL, and the function fails, 'o' is released and
1141
 * NULL returned. This simplifies error checking at the call site since
1142
 * a single test for NULL for the function result is enough.
1143
 */
1144
static PyObject *
1145
r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
1146
182k
{
1147
182k
    if (o != NULL && flag) { /* currently only FLAG_REF is defined */
1148
6.95k
        PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
1149
6.95k
        PyList_SET_ITEM(p->refs, idx, Py_NewRef(o));
1150
6.95k
        Py_DECREF(tmp);
1151
6.95k
    }
1152
182k
    return o;
1153
182k
}
1154
1155
/* combination of both above, used when an object can be
1156
 * created whenever it is seen in the file, as opposed to
1157
 * after having loaded its sub-objects.
1158
 */
1159
static PyObject *
1160
r_ref(PyObject *o, int flag, RFILE *p)
1161
1.49M
{
1162
1.49M
    assert(flag & FLAG_REF);
1163
1.49M
    if (o == NULL)
1164
0
        return NULL;
1165
1.49M
    if (PyList_Append(p->refs, o) < 0) {
1166
0
        Py_DECREF(o); /* release the new object */
1167
0
        return NULL;
1168
0
    }
1169
1.49M
    return o;
1170
1.49M
}
1171
1172
static PyObject *
1173
r_object(RFILE *p)
1174
4.61M
{
1175
    /* NULL is a valid return value, it does not necessarily means that
1176
       an exception is set. */
1177
4.61M
    PyObject *v, *v2;
1178
4.61M
    Py_ssize_t idx = 0;
1179
4.61M
    long i, n;
1180
4.61M
    int type, code = r_byte(p);
1181
4.61M
    int flag, is_interned = 0;
1182
4.61M
    PyObject *retval = NULL;
1183
1184
4.61M
    if (code == EOF) {
1185
0
        if (PyErr_ExceptionMatches(PyExc_EOFError)) {
1186
0
            PyErr_SetString(PyExc_EOFError,
1187
0
                            "EOF read where object expected");
1188
0
        }
1189
0
        return NULL;
1190
0
    }
1191
1192
4.61M
    p->depth++;
1193
1194
4.61M
    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
1195
0
        p->depth--;
1196
0
        PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
1197
0
        return NULL;
1198
0
    }
1199
1200
4.61M
    flag = code & FLAG_REF;
1201
4.61M
    type = code & ~FLAG_REF;
1202
1203
4.61M
#define R_REF(O) do{\
1204
2.45M
    if (flag) \
1205
2.45M
        O = r_ref(O, flag, p);\
1206
2.45M
} while (0)
1207
1208
4.61M
    switch (type) {
1209
1210
0
    case TYPE_NULL:
1211
0
        break;
1212
1213
43.4k
    case TYPE_NONE:
1214
43.4k
        retval = Py_None;
1215
43.4k
        break;
1216
1217
0
    case TYPE_STOPITER:
1218
0
        retval = Py_NewRef(PyExc_StopIteration);
1219
0
        break;
1220
1221
392
    case TYPE_ELLIPSIS:
1222
392
        retval = Py_Ellipsis;
1223
392
        break;
1224
1225
1.86k
    case TYPE_FALSE:
1226
1.86k
        retval = Py_False;
1227
1.86k
        break;
1228
1229
1.59k
    case TYPE_TRUE:
1230
1.59k
        retval = Py_True;
1231
1.59k
        break;
1232
1233
24.2k
    case TYPE_INT:
1234
24.2k
        n = r_long(p);
1235
24.2k
        if (n == -1 && PyErr_Occurred()) {
1236
0
            break;
1237
0
        }
1238
24.2k
        retval = PyLong_FromLong(n);
1239
24.2k
        R_REF(retval);
1240
24.2k
        break;
1241
1242
0
    case TYPE_INT64:
1243
0
        retval = r_long64(p);
1244
0
        R_REF(retval);
1245
0
        break;
1246
1247
437
    case TYPE_LONG:
1248
437
        retval = r_PyLong(p);
1249
437
        R_REF(retval);
1250
437
        break;
1251
1252
0
    case TYPE_FLOAT:
1253
0
        {
1254
0
            double x = r_float_str(p);
1255
0
            if (x == -1.0 && PyErr_Occurred())
1256
0
                break;
1257
0
            retval = PyFloat_FromDouble(x);
1258
0
            R_REF(retval);
1259
0
            break;
1260
0
        }
1261
1262
617
    case TYPE_BINARY_FLOAT:
1263
617
        {
1264
617
            double x = r_float_bin(p);
1265
617
            if (x == -1.0 && PyErr_Occurred())
1266
0
                break;
1267
617
            retval = PyFloat_FromDouble(x);
1268
617
            R_REF(retval);
1269
617
            break;
1270
617
        }
1271
1272
0
    case TYPE_COMPLEX:
1273
0
        {
1274
0
            Py_complex c;
1275
0
            c.real = r_float_str(p);
1276
0
            if (c.real == -1.0 && PyErr_Occurred())
1277
0
                break;
1278
0
            c.imag = r_float_str(p);
1279
0
            if (c.imag == -1.0 && PyErr_Occurred())
1280
0
                break;
1281
0
            retval = PyComplex_FromCComplex(c);
1282
0
            R_REF(retval);
1283
0
            break;
1284
0
        }
1285
1286
3
    case TYPE_BINARY_COMPLEX:
1287
3
        {
1288
3
            Py_complex c;
1289
3
            c.real = r_float_bin(p);
1290
3
            if (c.real == -1.0 && PyErr_Occurred())
1291
0
                break;
1292
3
            c.imag = r_float_bin(p);
1293
3
            if (c.imag == -1.0 && PyErr_Occurred())
1294
0
                break;
1295
3
            retval = PyComplex_FromCComplex(c);
1296
3
            R_REF(retval);
1297
3
            break;
1298
3
        }
1299
1300
538k
    case TYPE_STRING:
1301
538k
        {
1302
538k
            const char *ptr;
1303
538k
            n = r_long(p);
1304
538k
            if (n < 0 || n > SIZE32_MAX) {
1305
0
                if (!PyErr_Occurred()) {
1306
0
                    PyErr_SetString(PyExc_ValueError,
1307
0
                        "bad marshal data (bytes object size out of range)");
1308
0
                }
1309
0
                break;
1310
0
            }
1311
538k
            v = PyBytes_FromStringAndSize((char *)NULL, n);
1312
538k
            if (v == NULL)
1313
0
                break;
1314
538k
            ptr = r_string(n, p);
1315
538k
            if (ptr == NULL) {
1316
0
                Py_DECREF(v);
1317
0
                break;
1318
0
            }
1319
538k
            memcpy(PyBytes_AS_STRING(v), ptr, n);
1320
538k
            retval = v;
1321
538k
            R_REF(retval);
1322
538k
            break;
1323
538k
        }
1324
1325
0
    case TYPE_ASCII_INTERNED:
1326
0
        is_interned = 1;
1327
0
        _Py_FALLTHROUGH;
1328
29.0k
    case TYPE_ASCII:
1329
29.0k
        n = r_long(p);
1330
29.0k
        if (n < 0 || n > SIZE32_MAX) {
1331
0
            if (!PyErr_Occurred()) {
1332
0
                PyErr_SetString(PyExc_ValueError,
1333
0
                    "bad marshal data (string size out of range)");
1334
0
            }
1335
0
            break;
1336
0
        }
1337
29.0k
        goto _read_ascii;
1338
1339
1.20M
    case TYPE_SHORT_ASCII_INTERNED:
1340
1.20M
        is_interned = 1;
1341
1.20M
        _Py_FALLTHROUGH;
1342
1.37M
    case TYPE_SHORT_ASCII:
1343
1.37M
        n = r_byte(p);
1344
1.37M
        if (n == EOF) {
1345
0
            break;
1346
0
        }
1347
1.40M
    _read_ascii:
1348
1.40M
        {
1349
1.40M
            const char *ptr;
1350
1.40M
            ptr = r_string(n, p);
1351
1.40M
            if (ptr == NULL)
1352
0
                break;
1353
1.40M
            v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1354
1.40M
            if (v == NULL)
1355
0
                break;
1356
1.40M
            if (is_interned) {
1357
                // marshal is meant to serialize .pyc files with code
1358
                // objects, and code-related strings are currently immortal.
1359
1.20M
                PyInterpreterState *interp = _PyInterpreterState_GET();
1360
1.20M
                _PyUnicode_InternImmortal(interp, &v);
1361
1.20M
            }
1362
1.40M
            retval = v;
1363
1.40M
            R_REF(retval);
1364
1.40M
            break;
1365
1.40M
        }
1366
1367
206
    case TYPE_INTERNED:
1368
206
        is_interned = 1;
1369
206
        _Py_FALLTHROUGH;
1370
3.58k
    case TYPE_UNICODE:
1371
3.58k
        {
1372
3.58k
        const char *buffer;
1373
1374
3.58k
        n = r_long(p);
1375
3.58k
        if (n < 0 || n > SIZE32_MAX) {
1376
0
            if (!PyErr_Occurred()) {
1377
0
                PyErr_SetString(PyExc_ValueError,
1378
0
                    "bad marshal data (string size out of range)");
1379
0
            }
1380
0
            break;
1381
0
        }
1382
3.58k
        if (n != 0) {
1383
3.58k
            buffer = r_string(n, p);
1384
3.58k
            if (buffer == NULL)
1385
0
                break;
1386
3.58k
            v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1387
3.58k
        }
1388
0
        else {
1389
0
            v = Py_GetConstant(Py_CONSTANT_EMPTY_STR);
1390
0
        }
1391
3.58k
        if (v == NULL)
1392
0
            break;
1393
3.58k
        if (is_interned) {
1394
            // marshal is meant to serialize .pyc files with code
1395
            // objects, and code-related strings are currently immortal.
1396
206
            PyInterpreterState *interp = _PyInterpreterState_GET();
1397
206
            _PyUnicode_InternImmortal(interp, &v);
1398
206
        }
1399
3.58k
        retval = v;
1400
3.58k
        R_REF(retval);
1401
3.58k
        break;
1402
3.58k
        }
1403
1404
482k
    case TYPE_SMALL_TUPLE:
1405
482k
        n = r_byte(p);
1406
482k
        if (n == EOF) {
1407
0
            break;
1408
0
        }
1409
482k
        goto _read_tuple;
1410
482k
    case TYPE_TUPLE:
1411
70
        n = r_long(p);
1412
70
        if (n < 0 || n > SIZE32_MAX) {
1413
0
            if (!PyErr_Occurred()) {
1414
0
                PyErr_SetString(PyExc_ValueError,
1415
0
                    "bad marshal data (tuple size out of range)");
1416
0
            }
1417
0
            break;
1418
0
        }
1419
482k
    _read_tuple:
1420
482k
        v = PyTuple_New(n);
1421
482k
        R_REF(v);
1422
482k
        if (v == NULL)
1423
0
            break;
1424
1425
3.28M
        for (i = 0; i < n; i++) {
1426
2.80M
            v2 = r_object(p);
1427
2.80M
            if ( v2 == NULL ) {
1428
0
                if (!PyErr_Occurred())
1429
0
                    PyErr_SetString(PyExc_TypeError,
1430
0
                        "NULL object in marshal data for tuple");
1431
0
                Py_SETREF(v, NULL);
1432
0
                break;
1433
0
            }
1434
2.80M
            PyTuple_SET_ITEM(v, i, v2);
1435
2.80M
        }
1436
482k
        retval = v;
1437
482k
        break;
1438
1439
0
    case TYPE_LIST:
1440
0
        n = r_long(p);
1441
0
        if (n < 0 || n > SIZE32_MAX) {
1442
0
            if (!PyErr_Occurred()) {
1443
0
                PyErr_SetString(PyExc_ValueError,
1444
0
                    "bad marshal data (list size out of range)");
1445
0
            }
1446
0
            break;
1447
0
        }
1448
0
        v = PyList_New(n);
1449
0
        R_REF(v);
1450
0
        if (v == NULL)
1451
0
            break;
1452
0
        for (i = 0; i < n; i++) {
1453
0
            v2 = r_object(p);
1454
0
            if ( v2 == NULL ) {
1455
0
                if (!PyErr_Occurred())
1456
0
                    PyErr_SetString(PyExc_TypeError,
1457
0
                        "NULL object in marshal data for list");
1458
0
                Py_SETREF(v, NULL);
1459
0
                break;
1460
0
            }
1461
0
            PyList_SET_ITEM(v, i, v2);
1462
0
        }
1463
0
        retval = v;
1464
0
        break;
1465
1466
0
    case TYPE_DICT:
1467
0
    case TYPE_FROZENDICT:
1468
0
        v = PyDict_New();
1469
0
        if (v == NULL) {
1470
0
            break;
1471
0
        }
1472
0
        if (type == TYPE_DICT) {
1473
0
            R_REF(v);
1474
0
        }
1475
0
        else {
1476
0
            idx = r_ref_reserve(flag, p);
1477
0
            if (idx < 0) {
1478
0
                Py_CLEAR(v);
1479
0
                break;
1480
0
            }
1481
0
        }
1482
0
        for (;;) {
1483
0
            PyObject *key, *val;
1484
0
            key = r_object(p);
1485
0
            if (key == NULL)
1486
0
                break;
1487
0
            val = r_object(p);
1488
0
            if (val == NULL) {
1489
0
                Py_DECREF(key);
1490
0
                break;
1491
0
            }
1492
0
            if (PyDict_SetItem(v, key, val) < 0) {
1493
0
                Py_DECREF(key);
1494
0
                Py_DECREF(val);
1495
0
                break;
1496
0
            }
1497
0
            Py_DECREF(key);
1498
0
            Py_DECREF(val);
1499
0
        }
1500
0
        if (PyErr_Occurred()) {
1501
0
            Py_CLEAR(v);
1502
0
        }
1503
0
        if (type == TYPE_FROZENDICT && v != NULL) {
1504
0
            Py_SETREF(v, PyFrozenDict_New(v));
1505
0
        }
1506
0
        retval = v;
1507
0
        break;
1508
1509
0
    case TYPE_SET:
1510
504
    case TYPE_FROZENSET:
1511
504
        n = r_long(p);
1512
504
        if (n < 0 || n > SIZE32_MAX) {
1513
0
            if (!PyErr_Occurred()) {
1514
0
                PyErr_SetString(PyExc_ValueError,
1515
0
                    "bad marshal data (set size out of range)");
1516
0
            }
1517
0
            break;
1518
0
        }
1519
1520
504
        if (n == 0 && type == TYPE_FROZENSET) {
1521
            /* call frozenset() to get the empty frozenset singleton */
1522
0
            v = _PyObject_CallNoArgs((PyObject*)&PyFrozenSet_Type);
1523
0
            if (v == NULL)
1524
0
                break;
1525
0
            R_REF(v);
1526
0
            retval = v;
1527
0
        }
1528
504
        else {
1529
504
            v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1530
504
            if (type == TYPE_SET) {
1531
0
                R_REF(v);
1532
504
            } else {
1533
                /* must use delayed registration of frozensets because they must
1534
                 * be init with a refcount of 1
1535
                 */
1536
504
                idx = r_ref_reserve(flag, p);
1537
504
                if (idx < 0)
1538
0
                    Py_CLEAR(v); /* signal error */
1539
504
            }
1540
504
            if (v == NULL)
1541
0
                break;
1542
1543
2.61k
            for (i = 0; i < n; i++) {
1544
2.10k
                v2 = r_object(p);
1545
2.10k
                if ( v2 == NULL ) {
1546
0
                    if (!PyErr_Occurred())
1547
0
                        PyErr_SetString(PyExc_TypeError,
1548
0
                            "NULL object in marshal data for set");
1549
0
                    Py_SETREF(v, NULL);
1550
0
                    break;
1551
0
                }
1552
2.10k
                if (PySet_Add(v, v2) == -1) {
1553
0
                    Py_DECREF(v);
1554
0
                    Py_DECREF(v2);
1555
0
                    v = NULL;
1556
0
                    break;
1557
0
                }
1558
2.10k
                Py_DECREF(v2);
1559
2.10k
            }
1560
504
            if (type != TYPE_SET)
1561
504
                v = r_ref_insert(v, idx, flag, p);
1562
504
            retval = v;
1563
504
        }
1564
504
        break;
1565
1566
178k
    case TYPE_CODE:
1567
178k
        {
1568
178k
            int argcount;
1569
178k
            int posonlyargcount;
1570
178k
            int kwonlyargcount;
1571
178k
            int stacksize;
1572
178k
            int flags;
1573
178k
            PyObject *code = NULL;
1574
178k
            PyObject *consts = NULL;
1575
178k
            PyObject *names = NULL;
1576
178k
            PyObject *localsplusnames = NULL;
1577
178k
            PyObject *localspluskinds = NULL;
1578
178k
            PyObject *filename = NULL;
1579
178k
            PyObject *name = NULL;
1580
178k
            PyObject *qualname = NULL;
1581
178k
            int firstlineno;
1582
178k
            PyObject* linetable = NULL;
1583
178k
            PyObject *exceptiontable = NULL;
1584
1585
178k
            if (!p->allow_code) {
1586
0
                PyErr_SetString(PyExc_ValueError,
1587
0
                                "unmarshalling code objects is disallowed");
1588
0
                break;
1589
0
            }
1590
178k
            idx = r_ref_reserve(flag, p);
1591
178k
            if (idx < 0)
1592
0
                break;
1593
1594
178k
            v = NULL;
1595
1596
            /* XXX ignore long->int overflows for now */
1597
178k
            argcount = (int)r_long(p);
1598
178k
            if (argcount == -1 && PyErr_Occurred())
1599
0
                goto code_error;
1600
178k
            posonlyargcount = (int)r_long(p);
1601
178k
            if (posonlyargcount == -1 && PyErr_Occurred()) {
1602
0
                goto code_error;
1603
0
            }
1604
178k
            kwonlyargcount = (int)r_long(p);
1605
178k
            if (kwonlyargcount == -1 && PyErr_Occurred())
1606
0
                goto code_error;
1607
178k
            stacksize = (int)r_long(p);
1608
178k
            if (stacksize == -1 && PyErr_Occurred())
1609
0
                goto code_error;
1610
178k
            flags = (int)r_long(p);
1611
178k
            if (flags == -1 && PyErr_Occurred())
1612
0
                goto code_error;
1613
178k
            code = r_object(p);
1614
178k
            if (code == NULL)
1615
0
                goto code_error;
1616
178k
            consts = r_object(p);
1617
178k
            if (consts == NULL)
1618
0
                goto code_error;
1619
178k
            names = r_object(p);
1620
178k
            if (names == NULL)
1621
0
                goto code_error;
1622
178k
            localsplusnames = r_object(p);
1623
178k
            if (localsplusnames == NULL)
1624
0
                goto code_error;
1625
178k
            localspluskinds = r_object(p);
1626
178k
            if (localspluskinds == NULL)
1627
0
                goto code_error;
1628
178k
            filename = r_object(p);
1629
178k
            if (filename == NULL)
1630
0
                goto code_error;
1631
178k
            name = r_object(p);
1632
178k
            if (name == NULL)
1633
0
                goto code_error;
1634
178k
            qualname = r_object(p);
1635
178k
            if (qualname == NULL)
1636
0
                goto code_error;
1637
178k
            firstlineno = (int)r_long(p);
1638
178k
            if (firstlineno == -1 && PyErr_Occurred())
1639
0
                goto code_error;
1640
178k
            linetable = r_object(p);
1641
178k
            if (linetable == NULL)
1642
0
                goto code_error;
1643
178k
            exceptiontable = r_object(p);
1644
178k
            if (exceptiontable == NULL)
1645
0
                goto code_error;
1646
1647
178k
            struct _PyCodeConstructor con = {
1648
178k
                .filename = filename,
1649
178k
                .name = name,
1650
178k
                .qualname = qualname,
1651
178k
                .flags = flags,
1652
1653
178k
                .code = code,
1654
178k
                .firstlineno = firstlineno,
1655
178k
                .linetable = linetable,
1656
1657
178k
                .consts = consts,
1658
178k
                .names = names,
1659
1660
178k
                .localsplusnames = localsplusnames,
1661
178k
                .localspluskinds = localspluskinds,
1662
1663
178k
                .argcount = argcount,
1664
178k
                .posonlyargcount = posonlyargcount,
1665
178k
                .kwonlyargcount = kwonlyargcount,
1666
1667
178k
                .stacksize = stacksize,
1668
1669
178k
                .exceptiontable = exceptiontable,
1670
178k
            };
1671
1672
178k
            if (_PyCode_Validate(&con) < 0) {
1673
0
                goto code_error;
1674
0
            }
1675
1676
178k
            v = (PyObject *)_PyCode_New(&con);
1677
178k
            if (v == NULL) {
1678
0
                goto code_error;
1679
0
            }
1680
1681
178k
            v = r_ref_insert(v, idx, flag, p);
1682
1683
178k
          code_error:
1684
178k
            if (v == NULL && !PyErr_Occurred()) {
1685
0
                PyErr_SetString(PyExc_TypeError,
1686
0
                    "NULL object in marshal data for code object");
1687
0
            }
1688
178k
            Py_XDECREF(code);
1689
178k
            Py_XDECREF(consts);
1690
178k
            Py_XDECREF(names);
1691
178k
            Py_XDECREF(localsplusnames);
1692
178k
            Py_XDECREF(localspluskinds);
1693
178k
            Py_XDECREF(filename);
1694
178k
            Py_XDECREF(name);
1695
178k
            Py_XDECREF(qualname);
1696
178k
            Py_XDECREF(linetable);
1697
178k
            Py_XDECREF(exceptiontable);
1698
178k
        }
1699
0
        retval = v;
1700
178k
        break;
1701
1702
1.93M
    case TYPE_REF:
1703
1.93M
        n = r_long(p);
1704
1.93M
        if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1705
0
            if (!PyErr_Occurred()) {
1706
0
                PyErr_SetString(PyExc_ValueError,
1707
0
                    "bad marshal data (invalid reference)");
1708
0
            }
1709
0
            break;
1710
0
        }
1711
1.93M
        v = PyList_GET_ITEM(p->refs, n);
1712
1.93M
        if (v == Py_None) {
1713
0
            PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1714
0
            break;
1715
0
        }
1716
1.93M
        retval = Py_NewRef(v);
1717
1.93M
        break;
1718
1719
3.23k
    case TYPE_SLICE:
1720
3.23k
    {
1721
3.23k
        Py_ssize_t idx = r_ref_reserve(flag, p);
1722
3.23k
        if (idx < 0) {
1723
0
            break;
1724
0
        }
1725
3.23k
        PyObject *stop = NULL;
1726
3.23k
        PyObject *step = NULL;
1727
3.23k
        PyObject *start = r_object(p);
1728
3.23k
        if (start == NULL) {
1729
0
            goto cleanup;
1730
0
        }
1731
3.23k
        stop = r_object(p);
1732
3.23k
        if (stop == NULL) {
1733
0
            goto cleanup;
1734
0
        }
1735
3.23k
        step = r_object(p);
1736
3.23k
        if (step == NULL) {
1737
0
            goto cleanup;
1738
0
        }
1739
3.23k
        retval = PySlice_New(start, stop, step);
1740
3.23k
        r_ref_insert(retval, idx, flag, p);
1741
3.23k
    cleanup:
1742
3.23k
        Py_XDECREF(start);
1743
3.23k
        Py_XDECREF(stop);
1744
3.23k
        Py_XDECREF(step);
1745
3.23k
        break;
1746
3.23k
    }
1747
1748
0
    default:
1749
        /* Bogus data got written, which isn't ideal.
1750
           This will let you keep working and recover. */
1751
0
        PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1752
0
        break;
1753
1754
4.61M
    }
1755
4.61M
    p->depth--;
1756
4.61M
    return retval;
1757
4.61M
}
1758
1759
static PyObject *
1760
read_object(RFILE *p)
1761
7.20k
{
1762
7.20k
    PyObject *v;
1763
7.20k
    if (PyErr_Occurred()) {
1764
0
        fprintf(stderr, "XXX readobject called with exception set\n");
1765
0
        return NULL;
1766
0
    }
1767
7.20k
    if (p->ptr && p->end) {
1768
7.20k
        if (PySys_Audit("marshal.loads", "y#", p->ptr, (Py_ssize_t)(p->end - p->ptr)) < 0) {
1769
0
            return NULL;
1770
0
        }
1771
7.20k
    } else if (p->fp || p->readable) {
1772
0
        if (PySys_Audit("marshal.load", NULL) < 0) {
1773
0
            return NULL;
1774
0
        }
1775
0
    }
1776
7.20k
    v = r_object(p);
1777
7.20k
    if (v == NULL && !PyErr_Occurred())
1778
0
        PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1779
7.20k
    return v;
1780
7.20k
}
1781
1782
int
1783
PyMarshal_ReadShortFromFile(FILE *fp)
1784
0
{
1785
0
    RFILE rf;
1786
0
    int res;
1787
0
    assert(fp);
1788
0
    rf.readable = NULL;
1789
0
    rf.fp = fp;
1790
0
    rf.end = rf.ptr = NULL;
1791
0
    rf.buf = NULL;
1792
0
    res = r_short(&rf);
1793
0
    if (rf.buf != NULL)
1794
0
        PyMem_Free(rf.buf);
1795
0
    return res;
1796
0
}
1797
1798
long
1799
PyMarshal_ReadLongFromFile(FILE *fp)
1800
0
{
1801
0
    RFILE rf;
1802
0
    long res;
1803
0
    rf.fp = fp;
1804
0
    rf.readable = NULL;
1805
0
    rf.ptr = rf.end = NULL;
1806
0
    rf.buf = NULL;
1807
0
    res = r_long(&rf);
1808
0
    if (rf.buf != NULL)
1809
0
        PyMem_Free(rf.buf);
1810
0
    return res;
1811
0
}
1812
1813
/* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1814
static off_t
1815
getfilesize(FILE *fp)
1816
0
{
1817
0
    struct _Py_stat_struct st;
1818
0
    if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1819
0
        return -1;
1820
#if SIZEOF_OFF_T == 4
1821
    else if (st.st_size >= INT_MAX)
1822
        return (off_t)INT_MAX;
1823
#endif
1824
0
    else
1825
0
        return (off_t)st.st_size;
1826
0
}
1827
1828
/* If we can get the size of the file up-front, and it's reasonably small,
1829
 * read it in one gulp and delegate to ...FromString() instead.  Much quicker
1830
 * than reading a byte at a time from file; speeds .pyc imports.
1831
 * CAUTION:  since this may read the entire remainder of the file, don't
1832
 * call it unless you know you're done with the file.
1833
 */
1834
PyObject *
1835
PyMarshal_ReadLastObjectFromFile(FILE *fp)
1836
0
{
1837
/* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1838
0
#define REASONABLE_FILE_LIMIT (1L << 18)
1839
0
    off_t filesize;
1840
0
    filesize = getfilesize(fp);
1841
0
    if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1842
0
        char* pBuf = (char *)PyMem_Malloc(filesize);
1843
0
        if (pBuf != NULL) {
1844
0
            size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1845
0
            PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1846
0
            PyMem_Free(pBuf);
1847
0
            return v;
1848
0
        }
1849
1850
0
    }
1851
    /* We don't have fstat, or we do but the file is larger than
1852
     * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1853
     */
1854
0
    return PyMarshal_ReadObjectFromFile(fp);
1855
1856
0
#undef REASONABLE_FILE_LIMIT
1857
0
}
1858
1859
PyObject *
1860
PyMarshal_ReadObjectFromFile(FILE *fp)
1861
0
{
1862
0
    RFILE rf;
1863
0
    PyObject *result;
1864
0
    rf.allow_code = 1;
1865
0
    rf.fp = fp;
1866
0
    rf.readable = NULL;
1867
0
    rf.depth = 0;
1868
0
    rf.ptr = rf.end = NULL;
1869
0
    rf.buf = NULL;
1870
0
    rf.refs = PyList_New(0);
1871
0
    if (rf.refs == NULL)
1872
0
        return NULL;
1873
0
    result = read_object(&rf);
1874
0
    Py_DECREF(rf.refs);
1875
0
    if (rf.buf != NULL)
1876
0
        PyMem_Free(rf.buf);
1877
0
    return result;
1878
0
}
1879
1880
PyObject *
1881
PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1882
629
{
1883
629
    RFILE rf;
1884
629
    PyObject *result;
1885
629
    rf.allow_code = 1;
1886
629
    rf.fp = NULL;
1887
629
    rf.readable = NULL;
1888
629
    rf.ptr = str;
1889
629
    rf.end = str + len;
1890
629
    rf.buf = NULL;
1891
629
    rf.depth = 0;
1892
629
    rf.refs = PyList_New(0);
1893
629
    if (rf.refs == NULL)
1894
0
        return NULL;
1895
629
    result = read_object(&rf);
1896
629
    Py_DECREF(rf.refs);
1897
629
    if (rf.buf != NULL)
1898
0
        PyMem_Free(rf.buf);
1899
629
    return result;
1900
629
}
1901
1902
static PyObject *
1903
_PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code)
1904
209
{
1905
209
    WFILE wf;
1906
1907
209
    if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
1908
0
        return NULL;
1909
0
    }
1910
209
    memset(&wf, 0, sizeof(wf));
1911
209
    wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1912
209
    if (wf.str == NULL)
1913
0
        return NULL;
1914
209
    wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str);
1915
209
    wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str);
1916
209
    wf.error = WFERR_OK;
1917
209
    wf.version = version;
1918
209
    wf.allow_code = allow_code;
1919
209
    if (w_init_refs(&wf, version)) {
1920
0
        Py_DECREF(wf.str);
1921
0
        return NULL;
1922
0
    }
1923
209
    w_object(x, &wf);
1924
209
    w_clear_refs(&wf);
1925
209
    if (wf.str != NULL) {
1926
209
        const char *base = PyBytes_AS_STRING(wf.str);
1927
209
        if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1928
0
            return NULL;
1929
209
    }
1930
209
    if (wf.error != WFERR_OK) {
1931
0
        Py_XDECREF(wf.str);
1932
0
        switch (wf.error) {
1933
0
        case WFERR_NOMEMORY:
1934
0
            PyErr_NoMemory();
1935
0
            break;
1936
0
        case WFERR_NESTEDTOODEEP:
1937
0
            PyErr_SetString(PyExc_ValueError,
1938
0
                            "object too deeply nested to marshal");
1939
0
            break;
1940
0
        case WFERR_CODE_NOT_ALLOWED:
1941
0
            PyErr_SetString(PyExc_ValueError,
1942
0
                            "marshalling code objects is disallowed");
1943
0
            break;
1944
0
        default:
1945
0
        case WFERR_UNMARSHALLABLE:
1946
0
            PyErr_SetString(PyExc_ValueError,
1947
0
                            "unmarshallable object");
1948
0
            break;
1949
0
        }
1950
0
        return NULL;
1951
0
    }
1952
209
    return wf.str;
1953
209
}
1954
1955
PyObject *
1956
PyMarshal_WriteObjectToString(PyObject *x, int version)
1957
0
{
1958
0
    return _PyMarshal_WriteObjectToString(x, version, 1);
1959
0
}
1960
1961
/* And an interface for Python programs... */
1962
/*[clinic input]
1963
marshal.dump
1964
1965
    value: object
1966
        Must be a supported type.
1967
    file: object
1968
        Must be a writeable binary file.
1969
    version: int(c_default="Py_MARSHAL_VERSION") = version
1970
        Indicates the data format that dump should use.
1971
    /
1972
    *
1973
    allow_code: bool = True
1974
        Allow to write code objects.
1975
1976
Write the value on the open file.
1977
1978
If the value has (or contains an object that has) an unsupported type, a
1979
ValueError exception is raised - but garbage data will also be written
1980
to the file. The object will not be properly read back by load().
1981
[clinic start generated code]*/
1982
1983
static PyObject *
1984
marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1985
                  int version, int allow_code)
1986
/*[clinic end generated code: output=429e5fd61c2196b9 input=041f7f6669b0aafb]*/
1987
0
{
1988
    /* XXX Quick hack -- need to do this differently */
1989
0
    PyObject *s;
1990
0
    PyObject *res;
1991
1992
0
    s = _PyMarshal_WriteObjectToString(value, version, allow_code);
1993
0
    if (s == NULL)
1994
0
        return NULL;
1995
0
    res = PyObject_CallMethodOneArg(file, &_Py_ID(write), s);
1996
0
    Py_DECREF(s);
1997
0
    return res;
1998
0
}
1999
2000
/*[clinic input]
2001
marshal.load
2002
2003
    file: object
2004
        Must be readable binary file.
2005
    /
2006
    *
2007
    allow_code: bool = True
2008
        Allow to load code objects.
2009
2010
Read one value from the open file and return it.
2011
2012
If no valid value is read (e.g. because the data has a different Python
2013
version's incompatible marshal format), raise EOFError, ValueError or
2014
TypeError.
2015
2016
Note: If an object containing an unsupported type was marshalled with
2017
dump(), load() will substitute None for the unmarshallable type.
2018
[clinic start generated code]*/
2019
2020
static PyObject *
2021
marshal_load_impl(PyObject *module, PyObject *file, int allow_code)
2022
/*[clinic end generated code: output=0c1aaf3546ae3ed3 input=2dca7b570653b82f]*/
2023
0
{
2024
0
    PyObject *data, *result;
2025
0
    RFILE rf;
2026
2027
    /*
2028
     * Make a call to the read method, but read zero bytes.
2029
     * This is to ensure that the object passed in at least
2030
     * has a read method which returns bytes.
2031
     * This can be removed if we guarantee good error handling
2032
     * for r_string()
2033
     */
2034
0
    data = _PyObject_CallMethod(file, &_Py_ID(read), "i", 0);
2035
0
    if (data == NULL)
2036
0
        return NULL;
2037
0
    if (!PyBytes_Check(data)) {
2038
0
        PyErr_Format(PyExc_TypeError,
2039
0
                     "file.read() returned not bytes but %.100s",
2040
0
                     Py_TYPE(data)->tp_name);
2041
0
        result = NULL;
2042
0
    }
2043
0
    else {
2044
0
        rf.allow_code = allow_code;
2045
0
        rf.depth = 0;
2046
0
        rf.fp = NULL;
2047
0
        rf.readable = file;
2048
0
        rf.ptr = rf.end = NULL;
2049
0
        rf.buf = NULL;
2050
0
        if ((rf.refs = PyList_New(0)) != NULL) {
2051
0
            result = read_object(&rf);
2052
0
            Py_DECREF(rf.refs);
2053
0
            if (rf.buf != NULL)
2054
0
                PyMem_Free(rf.buf);
2055
0
        } else
2056
0
            result = NULL;
2057
0
    }
2058
0
    Py_DECREF(data);
2059
0
    return result;
2060
0
}
2061
2062
/*[clinic input]
2063
@permit_long_summary
2064
marshal.dumps
2065
2066
    value: object
2067
        Must be a supported type.
2068
    version: int(c_default="Py_MARSHAL_VERSION") = version
2069
        Indicates the data format that dumps should use.
2070
    /
2071
    *
2072
    allow_code: bool = True
2073
        Allow to write code objects.
2074
2075
Return the bytes object that would be written to a file by dump(value, file).
2076
2077
Raise a ValueError exception if value has (or contains an object that
2078
has) an unsupported type.
2079
[clinic start generated code]*/
2080
2081
static PyObject *
2082
marshal_dumps_impl(PyObject *module, PyObject *value, int version,
2083
                   int allow_code)
2084
/*[clinic end generated code: output=115f90da518d1d49 input=dc1edcafd43124c5]*/
2085
138
{
2086
138
    return _PyMarshal_WriteObjectToString(value, version, allow_code);
2087
138
}
2088
2089
/*[clinic input]
2090
marshal.loads
2091
2092
    bytes: Py_buffer
2093
    /
2094
    *
2095
    allow_code: bool = True
2096
        Allow to load code objects.
2097
2098
Convert the bytes-like object to a value.
2099
2100
If no valid value is found, raise EOFError, ValueError or TypeError.
2101
Extra bytes in the input are ignored.
2102
[clinic start generated code]*/
2103
2104
static PyObject *
2105
marshal_loads_impl(PyObject *module, Py_buffer *bytes, int allow_code)
2106
/*[clinic end generated code: output=62c0c538d3edc31f input=286f1dbd6811d2ad]*/
2107
6.57k
{
2108
6.57k
    RFILE rf;
2109
6.57k
    char *s = bytes->buf;
2110
6.57k
    Py_ssize_t n = bytes->len;
2111
6.57k
    PyObject* result;
2112
6.57k
    rf.allow_code = allow_code;
2113
6.57k
    rf.fp = NULL;
2114
6.57k
    rf.readable = NULL;
2115
6.57k
    rf.ptr = s;
2116
6.57k
    rf.end = s + n;
2117
6.57k
    rf.depth = 0;
2118
6.57k
    if ((rf.refs = PyList_New(0)) == NULL)
2119
0
        return NULL;
2120
6.57k
    result = read_object(&rf);
2121
6.57k
    Py_DECREF(rf.refs);
2122
6.57k
    return result;
2123
6.57k
}
2124
2125
static PyMethodDef marshal_methods[] = {
2126
    MARSHAL_DUMP_METHODDEF
2127
    MARSHAL_LOAD_METHODDEF
2128
    MARSHAL_DUMPS_METHODDEF
2129
    MARSHAL_LOADS_METHODDEF
2130
    {NULL,              NULL}           /* sentinel */
2131
};
2132
2133
2134
PyDoc_STRVAR(module_doc,
2135
"This module contains functions that can read and write Python values in\n\
2136
a binary format. The format is specific to Python, but independent of\n\
2137
machine architecture issues.\n\
2138
\n\
2139
Not all Python object types are supported; in general, only objects\n\
2140
whose value is independent from a particular invocation of Python can be\n\
2141
written and read by this module. The following types are supported:\n\
2142
None, integers, floating-point numbers, strings, bytes, bytearrays,\n\
2143
tuples, lists, sets, dictionaries, and code objects, where it\n\
2144
should be understood that tuples, lists and dictionaries are only\n\
2145
supported as long as the values contained therein are themselves\n\
2146
supported; and recursive lists and dictionaries should not be written\n\
2147
(they will cause infinite loops).\n\
2148
\n\
2149
Variables:\n\
2150
\n\
2151
version -- indicates the format that the module uses. Version 0 is the\n\
2152
    historical format, version 1 shares interned strings and version 2\n\
2153
    uses a binary format for floating-point numbers.\n\
2154
    Version 3 shares common object references (New in version 3.4).\n\
2155
\n\
2156
Functions:\n\
2157
\n\
2158
dump() -- write value to a file\n\
2159
load() -- read value from a file\n\
2160
dumps() -- marshal value as a bytes object\n\
2161
loads() -- read value from a bytes-like object");
2162
2163
2164
static int
2165
marshal_module_exec(PyObject *mod)
2166
36
{
2167
36
    if (PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION) < 0) {
2168
0
        return -1;
2169
0
    }
2170
36
    return 0;
2171
36
}
2172
2173
static PyModuleDef_Slot marshalmodule_slots[] = {
2174
     _Py_ABI_SLOT,
2175
    {Py_mod_exec, marshal_module_exec},
2176
    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
2177
    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
2178
    {0, NULL}
2179
};
2180
2181
static struct PyModuleDef marshalmodule = {
2182
    PyModuleDef_HEAD_INIT,
2183
    .m_name = "marshal",
2184
    .m_doc = module_doc,
2185
    .m_methods = marshal_methods,
2186
    .m_slots = marshalmodule_slots,
2187
};
2188
2189
PyMODINIT_FUNC
2190
PyMarshal_Init(void)
2191
36
{
2192
36
    return PyModuleDef_Init(&marshalmodule);
2193
36
}