Coverage Report

Created: 2026-05-16 06:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/marshal.c
Line
Count
Source
1
2
/* Write Python objects to files and read them back.
3
   This is primarily intended for writing and reading compiled Python code,
4
   even though dicts, lists, sets and frozensets, not commonly seen in
5
   code objects, are supported.
6
   Version 3 of this protocol properly supports circular links
7
   and sharing. */
8
9
#include "Python.h"
10
#include "pycore_call.h"             // _PyObject_CallNoArgs()
11
#include "pycore_code.h"             // _PyCode_New()
12
#include "pycore_hashtable.h"        // _Py_hashtable_t
13
#include "pycore_long.h"             // _PyLong_IsZero()
14
#include "pycore_object.h"           // _PyObject_IsUniquelyReferenced
15
#include "pycore_pystate.h"          // _PyInterpreterState_GET()
16
#include "pycore_setobject.h"        // _PySet_NextEntryRef()
17
#include "pycore_tuple.h"            // _PyTuple_FromPairSteal
18
#include "pycore_unicodeobject.h"    // _PyUnicode_InternImmortal()
19
20
#include "marshal.h"                 // Py_MARSHAL_VERSION
21
22
#ifdef __APPLE__
23
#  include "TargetConditionals.h"
24
#endif /* __APPLE__ */
25
26
27
/*[clinic input]
28
module marshal
29
[clinic start generated code]*/
30
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
31
32
#include "clinic/marshal.c.h"
33
34
/* High water mark to determine when the marshalled object is dangerously deep
35
 * and risks coring the interpreter.  When the object stack gets this deep,
36
 * raise an exception instead of continuing.
37
 * On Windows debug builds, reduce this value.
38
 *
39
 * BUG: https://bugs.python.org/issue33720
40
 * On Windows PGO builds, the r_object function overallocates its stack and
41
 * can cause a stack overflow. We reduce the maximum depth for all Windows
42
 * releases to protect against this.
43
 * #if defined(MS_WINDOWS) && defined(Py_DEBUG)
44
 */
45
#if defined(MS_WINDOWS)
46
#  define MAX_MARSHAL_STACK_DEPTH 1000
47
#elif defined(__wasi__)
48
#  define MAX_MARSHAL_STACK_DEPTH 1500
49
// TARGET_OS_IPHONE covers any non-macOS Apple platform.
50
// It won't be defined on older macOS SDKs
51
#elif defined(__APPLE__) && defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
52
#  define MAX_MARSHAL_STACK_DEPTH 1500
53
#else
54
4.56M
#  define MAX_MARSHAL_STACK_DEPTH 2000
55
#endif
56
57
/* Supported types */
58
0
#define TYPE_NULL               '0'
59
40.6k
#define TYPE_NONE               'N'
60
1.94k
#define TYPE_FALSE              'F'
61
1.72k
#define TYPE_TRUE               'T'
62
0
#define TYPE_STOPITER           'S'
63
396
#define TYPE_ELLIPSIS           '.'
64
631
#define TYPE_BINARY_FLOAT       'g'  // Version 0 uses TYPE_FLOAT instead.
65
3
#define TYPE_BINARY_COMPLEX     'y'  // Version 0 uses TYPE_COMPLEX instead.
66
437
#define TYPE_LONG               'l'  // See also TYPE_INT.
67
526k
#define TYPE_STRING             's'  // Bytes. (Name comes from Python 2.)
68
71
#define TYPE_TUPLE              '('  // See also TYPE_SMALL_TUPLE.
69
0
#define TYPE_LIST               '['
70
0
#define TYPE_DICT               '{'
71
0
#define TYPE_FROZENDICT         '}'
72
174k
#define TYPE_CODE               'c'
73
3.58k
#define TYPE_UNICODE            'u'
74
#define TYPE_UNKNOWN            '?'
75
// added in version 2:
76
1.52k
#define TYPE_SET                '<'
77
507
#define TYPE_FROZENSET          '>'
78
// added in version 5:
79
3.32k
#define TYPE_SLICE              ':'
80
// Remember to update the version and documentation when adding new types.
81
82
/* Special cases for unicode strings (added in version 4) */
83
206
#define TYPE_INTERNED           't' // Version 1+
84
27.0k
#define TYPE_ASCII              'a'
85
0
#define TYPE_ASCII_INTERNED     'A'
86
1.30M
#define TYPE_SHORT_ASCII        'z'
87
1.14M
#define TYPE_SHORT_ASCII_INTERNED 'Z'
88
89
/* Special cases for small objects */
90
24.4k
#define TYPE_INT                'i'  // All versions. 32-bit encoding.
91
465k
#define TYPE_SMALL_TUPLE        ')'  // Version 4+
92
93
/* Supported for backwards compatibility */
94
0
#define TYPE_COMPLEX            'x'  // Generated for version 0 only.
95
0
#define TYPE_FLOAT              'f'  // Generated for version 0 only.
96
0
#define TYPE_INT64              'I'  // Not generated any more.
97
98
/* References (added in version 3) */
99
1.90M
#define TYPE_REF                'r'
100
8.99M
#define FLAG_REF                '\x80' /* with a type, add obj to index */
101
102
103
// Error codes:
104
79.9k
#define WFERR_OK 0
105
22
#define WFERR_UNMARSHALLABLE 1
106
0
#define WFERR_NESTEDTOODEEP 2
107
11
#define WFERR_NOMEMORY 3
108
0
#define WFERR_CODE_NOT_ALLOWED 4
109
110
typedef struct {
111
    FILE *fp;
112
    int error;  /* see WFERR_* values */
113
    int depth;
114
    PyObject *str;
115
    char *ptr;
116
    const char *end;
117
    char *buf;
118
    _Py_hashtable_t *hashtable;
119
    int version;
120
    int allow_code;
121
} WFILE;
122
123
388k
#define w_byte(c, p) do {                               \
124
388k
        if ((p)->ptr != (p)->end || w_reserve((p), 1))  \
125
388k
            *(p)->ptr++ = (c);                          \
126
388k
    } while(0)
127
128
static void
129
w_flush(WFILE *p)
130
0
{
131
0
    assert(p->fp != NULL);
132
0
    fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
133
0
    p->ptr = p->buf;
134
0
}
135
136
static int
137
w_reserve(WFILE *p, Py_ssize_t needed)
138
552
{
139
552
    Py_ssize_t pos, size, delta;
140
552
    if (p->ptr == NULL)
141
0
        return 0; /* An error already occurred */
142
552
    if (p->fp != NULL) {
143
0
        w_flush(p);
144
0
        return needed <= p->end - p->ptr;
145
0
    }
146
552
    assert(p->str != NULL);
147
552
    pos = p->ptr - p->buf;
148
552
    size = PyBytes_GET_SIZE(p->str);
149
552
    if (size > 16*1024*1024)
150
0
        delta = (size >> 3);            /* 12.5% overallocation */
151
552
    else
152
552
        delta = size + 1024;
153
552
    delta = Py_MAX(delta, needed);
154
552
    if (delta > PY_SSIZE_T_MAX - size) {
155
0
        p->error = WFERR_NOMEMORY;
156
0
        return 0;
157
0
    }
158
552
    size += delta;
159
552
    if (_PyBytes_Resize(&p->str, size) != 0) {
160
0
        p->end = p->ptr = p->buf = NULL;
161
0
        return 0;
162
0
    }
163
552
    else {
164
552
        p->buf = PyBytes_AS_STRING(p->str);
165
552
        p->ptr = p->buf + pos;
166
552
        p->end = p->buf + size;
167
552
        return 1;
168
552
    }
169
552
}
170
171
static void
172
w_string(const void *s, Py_ssize_t n, WFILE *p)
173
27.1k
{
174
27.1k
    Py_ssize_t m;
175
27.1k
    if (!n || p->ptr == NULL)
176
213
        return;
177
26.9k
    m = p->end - p->ptr;
178
26.9k
    if (p->fp != NULL) {
179
0
        if (n <= m) {
180
0
            memcpy(p->ptr, s, n);
181
0
            p->ptr += n;
182
0
        }
183
0
        else {
184
0
            w_flush(p);
185
0
            fwrite(s, 1, n, p->fp);
186
0
        }
187
0
    }
188
26.9k
    else {
189
26.9k
        if (n <= m || w_reserve(p, n - m)) {
190
26.9k
            memcpy(p->ptr, s, n);
191
26.9k
            p->ptr += n;
192
26.9k
        }
193
26.9k
    }
194
26.9k
}
195
196
static void
197
w_short(int x, WFILE *p)
198
23
{
199
23
    w_byte((char)( x      & 0xff), p);
200
23
    w_byte((char)((x>> 8) & 0xff), p);
201
23
}
202
203
static void
204
w_long(long x, WFILE *p)
205
70.8k
{
206
70.8k
    w_byte((char)( x      & 0xff), p);
207
70.8k
    w_byte((char)((x>> 8) & 0xff), p);
208
70.8k
    w_byte((char)((x>>16) & 0xff), p);
209
70.8k
    w_byte((char)((x>>24) & 0xff), p);
210
70.8k
}
211
212
569k
#define SIZE32_MAX  0x7FFFFFFF
213
214
#if SIZEOF_SIZE_T > 4
215
10.1k
# define W_SIZE(n, p)  do {                     \
216
10.1k
        if ((n) > SIZE32_MAX) {                 \
217
0
            (p)->depth--;                       \
218
0
            (p)->error = WFERR_UNMARSHALLABLE;  \
219
0
            return;                             \
220
0
        }                                       \
221
10.1k
        w_long((long)(n), p);                   \
222
10.1k
    } while(0)
223
#else
224
# define W_SIZE  w_long
225
#endif
226
227
static void
228
w_pstring(const void *s, Py_ssize_t n, WFILE *p)
229
10.1k
{
230
10.1k
        W_SIZE(n, p);
231
10.1k
        w_string(s, n, p);
232
10.1k
}
233
234
static void
235
w_short_pstring(const void *s, Py_ssize_t n, WFILE *p)
236
16.9k
{
237
16.9k
    w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
238
16.9k
    w_string(s, n, p);
239
16.9k
}
240
241
/* We assume that Python ints are stored internally in base some power of
242
   2**15; for the sake of portability we'll always read and write them in base
243
   exactly 2**15. */
244
245
3.84k
#define PyLong_MARSHAL_SHIFT 15
246
1.69k
#define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
247
23
#define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
248
249
40.7k
#define W_TYPE(t, p) do { \
250
40.7k
    w_byte((t) | flag, (p)); \
251
40.7k
} while(0)
252
253
static PyObject *
254
_PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code);
255
256
#define _r_digits(bitsize)                                                \
257
static void                                                               \
258
_r_digits##bitsize(const uint ## bitsize ## _t *digits, Py_ssize_t n,     \
259
3
                   uint8_t negative, Py_ssize_t marshal_ratio, WFILE *p)  \
260
3
{                                                                         \
261
3
    /* set l to number of base PyLong_MARSHAL_BASE digits */              \
262
3
    Py_ssize_t l = (n - 1)*marshal_ratio;                                 \
263
3
    uint ## bitsize ## _t d = digits[n - 1];                              \
264
3
                                                                          \
265
3
    assert(marshal_ratio > 0);                                            \
266
3
    assert(n >= 1);                                                       \
267
3
    assert(d != 0); /* a PyLong is always normalized */                   \
268
3
    do {                                                                  \
269
3
        d >>= PyLong_MARSHAL_SHIFT;                                       \
270
3
        l++;                                                              \
271
3
    } while (d != 0);                                                     \
272
3
    if (l > SIZE32_MAX) {                                                 \
273
0
        p->depth--;                                                       \
274
0
        p->error = WFERR_UNMARSHALLABLE;                                  \
275
0
        return;                                                           \
276
0
    }                                                                     \
277
3
    w_long((long)(negative ? -l : l), p);                                 \
278
3
                                                                          \
279
9
    for (Py_ssize_t i = 0; i < n - 1; i++) {                              \
280
6
        d = digits[i];                                                    \
281
18
        for (Py_ssize_t j = 0; j < marshal_ratio; j++) {                  \
282
12
            w_short(d & PyLong_MARSHAL_MASK, p);                          \
283
12
            d >>= PyLong_MARSHAL_SHIFT;                                   \
284
12
        }                                                                 \
285
6
        assert(d == 0);                                                   \
286
6
    }                                                                     \
287
3
    d = digits[n - 1];                                                    \
288
3
    do {                                                                  \
289
3
        w_short(d & PyLong_MARSHAL_MASK, p);                              \
290
3
        d >>= PyLong_MARSHAL_SHIFT;                                       \
291
3
    } while (d != 0);                                                     \
292
3
}
293
0
_r_digits(16)
294
3
_r_digits(32)
295
#undef _r_digits
296
297
static void
298
w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
299
5
{
300
5
    W_TYPE(TYPE_LONG, p);
301
5
    if (_PyLong_IsZero(ob)) {
302
0
        w_long((long)0, p);
303
0
        return;
304
0
    }
305
306
5
    PyLongExport long_export;
307
308
5
    if (PyLong_Export((PyObject *)ob, &long_export) < 0) {
309
0
        p->depth--;
310
0
        p->error = WFERR_UNMARSHALLABLE;
311
0
        return;
312
0
    }
313
5
    if (!long_export.digits) {
314
2
        int8_t sign = long_export.value < 0 ? -1 : 1;
315
2
        uint64_t abs_value = _Py_ABS_CAST(uint64_t, long_export.value);
316
2
        uint64_t d = abs_value;
317
2
        long l = 0;
318
319
        /* set l to number of base PyLong_MARSHAL_BASE digits */
320
8
        do {
321
8
            d >>= PyLong_MARSHAL_SHIFT;
322
8
            l += sign;
323
8
        } while (d);
324
2
        w_long(l, p);
325
326
2
        d = abs_value;
327
8
        do {
328
8
            w_short(d & PyLong_MARSHAL_MASK, p);
329
8
            d >>= PyLong_MARSHAL_SHIFT;
330
8
        } while (d);
331
2
        return;
332
2
    }
333
334
3
    const PyLongLayout *layout = PyLong_GetNativeLayout();
335
3
    Py_ssize_t marshal_ratio = layout->bits_per_digit/PyLong_MARSHAL_SHIFT;
336
337
    /* must be a multiple of PyLong_MARSHAL_SHIFT */
338
3
    assert(layout->bits_per_digit % PyLong_MARSHAL_SHIFT == 0);
339
3
    assert(layout->bits_per_digit >= PyLong_MARSHAL_SHIFT);
340
341
    /* other assumptions on PyLongObject internals */
342
3
    assert(layout->bits_per_digit <= 32);
343
3
    assert(layout->digits_order == -1);
344
3
    assert(layout->digit_endianness == (PY_LITTLE_ENDIAN ? -1 : 1));
345
3
    assert(layout->digit_size == 2 || layout->digit_size == 4);
346
347
3
    if (layout->digit_size == 4) {
348
3
        _r_digits32(long_export.digits, long_export.ndigits,
349
3
                    long_export.negative, marshal_ratio, p);
350
3
    }
351
0
    else {
352
0
        _r_digits16(long_export.digits, long_export.ndigits,
353
0
                    long_export.negative, marshal_ratio, p);
354
0
    }
355
3
    PyLong_FreeExport(&long_export);
356
3
}
357
358
static void
359
w_float_bin(double v, WFILE *p)
360
16
{
361
16
    char buf[8];
362
16
    if (PyFloat_Pack8(v, buf, 1) < 0) {
363
0
        p->error = WFERR_UNMARSHALLABLE;
364
0
        return;
365
0
    }
366
16
    w_string(buf, 8, p);
367
16
}
368
369
static void
370
w_float_str(double v, WFILE *p)
371
0
{
372
0
    char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
373
0
    if (!buf) {
374
0
        p->error = WFERR_NOMEMORY;
375
0
        return;
376
0
    }
377
0
    w_short_pstring(buf, strlen(buf), p);
378
0
    PyMem_Free(buf);
379
0
}
380
381
static int
382
w_ref(PyObject *v, char *flag, WFILE *p)
383
78.8k
{
384
78.8k
    _Py_hashtable_entry_t *entry;
385
386
78.8k
    if (p->version < 3 || p->hashtable == NULL)
387
0
        return 0; /* not writing object references */
388
389
    /* If it has only one reference, it definitely isn't shared.
390
     * But we use TYPE_REF always for interned string, to PYC file stable
391
     * as possible.
392
     */
393
78.8k
    if (_PyObject_IsUniquelyReferenced(v) &&
394
20.9k
            !(PyUnicode_CheckExact(v) && PyUnicode_CHECK_INTERNED(v))) {
395
19.3k
        return 0;
396
19.3k
    }
397
398
59.5k
    entry = _Py_hashtable_get_entry(p->hashtable, v);
399
59.5k
    if (entry != NULL) {
400
        /* write the reference index to the stream */
401
38.0k
        uintptr_t w = (uintptr_t)entry->value;
402
38.0k
        if (w & 0x80000000LU) {
403
0
            PyErr_Format(PyExc_ValueError, "cannot marshal recursion %T objects", v);
404
0
            goto err;
405
0
        }
406
        /* we don't store "long" indices in the dict */
407
38.0k
        assert(w <= 0x7fffffff);
408
38.0k
        w_byte(TYPE_REF, p);
409
38.0k
        w_long((int)w, p);
410
38.0k
        return 1;
411
38.0k
    } else {
412
21.4k
        size_t w = p->hashtable->nentries;
413
        /* we don't support long indices */
414
21.4k
        if (w >= 0x7fffffff) {
415
0
            PyErr_SetString(PyExc_ValueError, "too many objects");
416
0
            goto err;
417
0
        }
418
        // Corresponding code should call w_complete() after
419
        // writing the object.
420
21.4k
        if (PyCode_Check(v) || PySlice_Check(v) || PyFrozenDict_CheckExact(v)) {
421
184
            w |= 0x80000000LU;
422
184
        }
423
21.4k
        if (_Py_hashtable_set(p->hashtable, Py_NewRef(v),
424
21.4k
                              (void *)(uintptr_t)w) < 0) {
425
0
            Py_DECREF(v);
426
0
            goto err;
427
0
        }
428
21.4k
        *flag |= FLAG_REF;
429
21.4k
        return 0;
430
21.4k
    }
431
0
err:
432
0
    p->error = WFERR_UNMARSHALLABLE;
433
0
    return 1;
434
59.5k
}
435
436
static void
437
w_complete(PyObject *v, WFILE *p)
438
3.47k
{
439
3.47k
    if (p->version < 3 || p->hashtable == NULL) {
440
0
        return;
441
0
    }
442
3.47k
    if (_PyObject_IsUniquelyReferenced(v)) {
443
3.29k
        return;
444
3.29k
    }
445
446
184
    _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(p->hashtable, v);
447
184
    if (entry == NULL) {
448
0
        return;
449
0
    }
450
184
    assert(entry != NULL);
451
184
    uintptr_t w = (uintptr_t)entry->value;
452
184
    assert(w & 0x80000000LU);
453
184
    w &= ~0x80000000LU;
454
184
    entry->value = (void *)(uintptr_t)w;
455
184
}
456
457
static void
458
w_complex_object(PyObject *v, char flag, WFILE *p);
459
460
static void
461
w_object(PyObject *v, WFILE *p)
462
79.4k
{
463
79.4k
    char flag = '\0';
464
465
79.4k
    if (p->error != WFERR_OK) {
466
0
        return;
467
0
    }
468
469
79.4k
    p->depth++;
470
471
79.4k
    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
472
0
        p->error = WFERR_NESTEDTOODEEP;
473
0
    }
474
79.4k
    else if (v == NULL) {
475
0
        w_byte(TYPE_NULL, p);
476
0
    }
477
79.4k
    else if (v == Py_None) {
478
414
        w_byte(TYPE_NONE, p);
479
414
    }
480
79.0k
    else if (v == PyExc_StopIteration) {
481
0
        w_byte(TYPE_STOPITER, p);
482
0
    }
483
79.0k
    else if (v == Py_Ellipsis) {
484
3
        w_byte(TYPE_ELLIPSIS, p);
485
3
    }
486
79.0k
    else if (v == Py_False) {
487
159
        w_byte(TYPE_FALSE, p);
488
159
    }
489
78.8k
    else if (v == Py_True) {
490
54
        w_byte(TYPE_TRUE, p);
491
54
    }
492
78.8k
    else if (!w_ref(v, &flag, p))
493
40.7k
        w_complex_object(v, flag, p);
494
495
79.4k
    p->depth--;
496
79.4k
}
497
498
static void
499
w_complex_object(PyObject *v, char flag, WFILE *p)
500
40.7k
{
501
40.7k
    Py_ssize_t i, n;
502
503
40.7k
    if (PyLong_CheckExact(v)) {
504
1.87k
        int overflow;
505
1.87k
        long x = PyLong_AsLongAndOverflow(v, &overflow);
506
1.87k
        if (overflow) {
507
3
            w_PyLong((PyLongObject *)v, flag, p);
508
3
        }
509
1.87k
        else {
510
1.87k
#if SIZEOF_LONG > 4
511
1.87k
            long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
512
1.87k
            if (y && y != -1) {
513
                /* Too large for TYPE_INT */
514
2
                w_PyLong((PyLongObject*)v, flag, p);
515
2
            }
516
1.87k
            else
517
1.87k
#endif
518
1.87k
            {
519
1.87k
                W_TYPE(TYPE_INT, p);
520
1.87k
                w_long(x, p);
521
1.87k
            }
522
1.87k
        }
523
1.87k
    }
524
38.8k
    else if (PyFloat_CheckExact(v)) {
525
14
        if (p->version > 1) {
526
14
            W_TYPE(TYPE_BINARY_FLOAT, p);
527
14
            w_float_bin(PyFloat_AS_DOUBLE(v), p);
528
14
        }
529
0
        else {
530
0
            W_TYPE(TYPE_FLOAT, p);
531
0
            w_float_str(PyFloat_AS_DOUBLE(v), p);
532
0
        }
533
14
    }
534
38.8k
    else if (PyComplex_CheckExact(v)) {
535
1
        if (p->version > 1) {
536
1
            W_TYPE(TYPE_BINARY_COMPLEX, p);
537
1
            w_float_bin(PyComplex_RealAsDouble(v), p);
538
1
            w_float_bin(PyComplex_ImagAsDouble(v), p);
539
1
        }
540
0
        else {
541
0
            W_TYPE(TYPE_COMPLEX, p);
542
0
            w_float_str(PyComplex_RealAsDouble(v), p);
543
0
            w_float_str(PyComplex_ImagAsDouble(v), p);
544
0
        }
545
1
    }
546
38.8k
    else if (PyBytes_CheckExact(v)) {
547
9.90k
        W_TYPE(TYPE_STRING, p);
548
9.90k
        w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
549
9.90k
    }
550
28.9k
    else if (PyUnicode_CheckExact(v)) {
551
17.2k
        if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
552
17.1k
            int is_short = PyUnicode_GET_LENGTH(v) < 256;
553
17.1k
            if (is_short) {
554
16.9k
                if (PyUnicode_CHECK_INTERNED(v))
555
15.1k
                    W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
556
1.78k
                else
557
1.78k
                    W_TYPE(TYPE_SHORT_ASCII, p);
558
16.9k
                w_short_pstring(PyUnicode_1BYTE_DATA(v),
559
16.9k
                                PyUnicode_GET_LENGTH(v), p);
560
16.9k
            }
561
207
            else {
562
207
                if (PyUnicode_CHECK_INTERNED(v))
563
0
                    W_TYPE(TYPE_ASCII_INTERNED, p);
564
207
                else
565
207
                    W_TYPE(TYPE_ASCII, p);
566
207
                w_pstring(PyUnicode_1BYTE_DATA(v),
567
207
                          PyUnicode_GET_LENGTH(v), p);
568
207
            }
569
17.1k
        }
570
70
        else {
571
70
            PyObject *utf8;
572
70
            utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
573
70
            if (utf8 == NULL) {
574
0
                p->depth--;
575
0
                p->error = WFERR_UNMARSHALLABLE;
576
0
                return;
577
0
            }
578
70
            if (p->version >= 3 &&  PyUnicode_CHECK_INTERNED(v))
579
0
                W_TYPE(TYPE_INTERNED, p);
580
70
            else
581
70
                W_TYPE(TYPE_UNICODE, p);
582
70
            w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
583
70
            Py_DECREF(utf8);
584
70
        }
585
17.2k
    }
586
11.7k
    else if (PyTuple_CheckExact(v)) {
587
8.26k
        n = PyTuple_GET_SIZE(v);
588
8.26k
        if (p->version >= 4 && n < 256) {
589
8.26k
            W_TYPE(TYPE_SMALL_TUPLE, p);
590
8.26k
            w_byte((unsigned char)n, p);
591
8.26k
        }
592
0
        else {
593
0
            W_TYPE(TYPE_TUPLE, p);
594
0
            W_SIZE(n, p);
595
0
        }
596
52.8k
        for (i = 0; i < n; i++) {
597
44.6k
            w_object(PyTuple_GET_ITEM(v, i), p);
598
44.6k
        }
599
8.26k
    }
600
3.48k
    else if (PyList_CheckExact(v)) {
601
0
        W_TYPE(TYPE_LIST, p);
602
0
        n = PyList_GET_SIZE(v);
603
0
        W_SIZE(n, p);
604
0
        for (i = 0; i < n; i++) {
605
0
            w_object(PyList_GET_ITEM(v, i), p);
606
0
        }
607
0
    }
608
3.48k
    else if (PyAnyDict_CheckExact(v)) {
609
0
        Py_ssize_t pos;
610
0
        PyObject *key, *value;
611
0
        if (PyFrozenDict_CheckExact(v)) {
612
0
            if (p->version < 6) {
613
0
                w_byte(TYPE_UNKNOWN, p);
614
0
                p->error = WFERR_UNMARSHALLABLE;
615
0
                return;
616
0
            }
617
618
0
            W_TYPE(TYPE_FROZENDICT, p);
619
0
        }
620
0
        else {
621
0
            W_TYPE(TYPE_DICT, p);
622
0
        }
623
        /* This one is NULL object terminated! */
624
0
        pos = 0;
625
0
        while (PyDict_Next(v, &pos, &key, &value)) {
626
0
            w_object(key, p);
627
0
            w_object(value, p);
628
0
        }
629
0
        w_object((PyObject *)NULL, p);
630
0
        if (PyFrozenDict_CheckExact(v)) {
631
0
            w_complete(v, p);
632
0
        }
633
0
    }
634
3.48k
    else if (PyAnySet_CheckExact(v)) {
635
11
        PyObject *value;
636
11
        Py_ssize_t pos = 0;
637
11
        Py_hash_t hash;
638
639
11
        if (PyFrozenSet_CheckExact(v))
640
11
            W_TYPE(TYPE_FROZENSET, p);
641
0
        else
642
0
            W_TYPE(TYPE_SET, p);
643
11
        n = PySet_GET_SIZE(v);
644
11
        W_SIZE(n, p);
645
        // bpo-37596: To support reproducible builds, sets and frozensets need
646
        // to have their elements serialized in a consistent order (even when
647
        // they have been scrambled by hash randomization). To ensure this, we
648
        // use an order equivalent to sorted(v, key=marshal.dumps):
649
11
        PyObject *pairs = PyList_New(n);
650
11
        if (pairs == NULL) {
651
0
            p->error = WFERR_NOMEMORY;
652
0
            return;
653
0
        }
654
11
        Py_ssize_t i = 0;
655
11
        Py_BEGIN_CRITICAL_SECTION(v);
656
76
        while (_PySet_NextEntryRef(v, &pos, &value, &hash)) {
657
65
            PyObject *dump = _PyMarshal_WriteObjectToString(value,
658
65
                                    p->version, p->allow_code);
659
65
            if (dump == NULL) {
660
0
                p->error = WFERR_UNMARSHALLABLE;
661
0
                Py_DECREF(value);
662
0
                break;
663
0
            }
664
65
            PyObject *pair = _PyTuple_FromPairSteal(dump, value);
665
65
            if (pair == NULL) {
666
0
                p->error = WFERR_NOMEMORY;
667
0
                break;
668
0
            }
669
65
            PyList_SET_ITEM(pairs, i++, pair);
670
65
        }
671
11
        Py_END_CRITICAL_SECTION();
672
11
        if (p->error == WFERR_UNMARSHALLABLE || p->error == WFERR_NOMEMORY) {
673
0
            Py_DECREF(pairs);
674
0
            return;
675
0
        }
676
11
        assert(i == n);
677
11
        if (PyList_Sort(pairs)) {
678
0
            p->error = WFERR_NOMEMORY;
679
0
            Py_DECREF(pairs);
680
0
            return;
681
0
        }
682
76
        for (Py_ssize_t i = 0; i < n; i++) {
683
65
            PyObject *pair = PyList_GET_ITEM(pairs, i);
684
65
            value = PyTuple_GET_ITEM(pair, 1);
685
65
            w_object(value, p);
686
65
        }
687
11
        Py_DECREF(pairs);
688
11
    }
689
3.47k
    else if (PyCode_Check(v)) {
690
3.44k
        if (!p->allow_code) {
691
0
            p->error = WFERR_CODE_NOT_ALLOWED;
692
0
            return;
693
0
        }
694
3.44k
        PyCodeObject *co = (PyCodeObject *)v;
695
3.44k
        PyObject *co_code = _PyCode_GetCode(co);
696
3.44k
        if (co_code == NULL) {
697
0
            p->error = WFERR_NOMEMORY;
698
0
            return;
699
0
        }
700
3.44k
        W_TYPE(TYPE_CODE, p);
701
3.44k
        w_long(co->co_argcount, p);
702
3.44k
        w_long(co->co_posonlyargcount, p);
703
3.44k
        w_long(co->co_kwonlyargcount, p);
704
3.44k
        w_long(co->co_stacksize, p);
705
3.44k
        w_long(co->co_flags, p);
706
3.44k
        w_object(co_code, p);
707
3.44k
        w_object(co->co_consts, p);
708
3.44k
        w_object(co->co_names, p);
709
3.44k
        w_object(co->co_localsplusnames, p);
710
3.44k
        w_object(co->co_localspluskinds, p);
711
3.44k
        w_object(co->co_filename, p);
712
3.44k
        w_object(co->co_name, p);
713
3.44k
        w_object(co->co_qualname, p);
714
3.44k
        w_long(co->co_firstlineno, p);
715
3.44k
        w_object(co->co_linetable, p);
716
3.44k
        w_object(co->co_exceptiontable, p);
717
3.44k
        Py_DECREF(co_code);
718
3.44k
        w_complete(v, p);
719
3.44k
    }
720
25
    else if (PyObject_CheckBuffer(v)) {
721
        /* Write unknown bytes-like objects as a bytes object */
722
0
        Py_buffer view;
723
0
        if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
724
0
            w_byte(TYPE_UNKNOWN, p);
725
0
            p->depth--;
726
0
            p->error = WFERR_UNMARSHALLABLE;
727
0
            return;
728
0
        }
729
0
        W_TYPE(TYPE_STRING, p);
730
0
        w_pstring(view.buf, view.len, p);
731
0
        PyBuffer_Release(&view);
732
0
    }
733
25
    else if (PySlice_Check(v)) {
734
25
        if (p->version < 5) {
735
0
            w_byte(TYPE_UNKNOWN, p);
736
0
            p->error = WFERR_UNMARSHALLABLE;
737
0
            return;
738
0
        }
739
25
        PySliceObject *slice = (PySliceObject *)v;
740
25
        W_TYPE(TYPE_SLICE, p);
741
25
        w_object(slice->start, p);
742
25
        w_object(slice->stop, p);
743
25
        w_object(slice->step, p);
744
25
        w_complete(v, p);
745
25
    }
746
0
    else {
747
0
        W_TYPE(TYPE_UNKNOWN, p);
748
0
        p->error = WFERR_UNMARSHALLABLE;
749
0
    }
750
40.7k
}
751
752
static void
753
w_decref_entry(void *key)
754
21.4k
{
755
21.4k
    PyObject *entry_key = (PyObject *)key;
756
21.4k
    Py_XDECREF(entry_key);
757
21.4k
}
758
759
static int
760
w_init_refs(WFILE *wf, int version)
761
238
{
762
238
    if (version >= 3) {
763
238
        wf->hashtable = _Py_hashtable_new_full(_Py_hashtable_hash_ptr,
764
238
                                               _Py_hashtable_compare_direct,
765
238
                                               w_decref_entry, NULL, NULL);
766
238
        if (wf->hashtable == NULL) {
767
0
            PyErr_NoMemory();
768
0
            return -1;
769
0
        }
770
238
    }
771
238
    return 0;
772
238
}
773
774
static void
775
w_clear_refs(WFILE *wf)
776
238
{
777
238
    if (wf->hashtable != NULL) {
778
238
        _Py_hashtable_destroy(wf->hashtable);
779
238
    }
780
238
}
781
782
/* version currently has no effect for writing ints. */
783
/* Note that while the documentation states that this function
784
 * can error, currently it never does. Setting an exception in
785
 * this function should be regarded as an API-breaking change.
786
 */
787
void
788
PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
789
0
{
790
0
    char buf[4];
791
0
    WFILE wf;
792
0
    memset(&wf, 0, sizeof(wf));
793
0
    wf.fp = fp;
794
0
    wf.ptr = wf.buf = buf;
795
0
    wf.end = wf.ptr + sizeof(buf);
796
0
    wf.error = WFERR_OK;
797
0
    wf.version = version;
798
0
    w_long(x, &wf);
799
0
    w_flush(&wf);
800
0
}
801
802
void
803
PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
804
0
{
805
0
    char buf[BUFSIZ];
806
0
    WFILE wf;
807
0
    if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
808
0
        return; /* caller must check PyErr_Occurred() */
809
0
    }
810
0
    memset(&wf, 0, sizeof(wf));
811
0
    wf.fp = fp;
812
0
    wf.ptr = wf.buf = buf;
813
0
    wf.end = wf.ptr + sizeof(buf);
814
0
    wf.error = WFERR_OK;
815
0
    wf.version = version;
816
0
    wf.allow_code = 1;
817
0
    if (w_init_refs(&wf, version)) {
818
0
        return; /* caller must check PyErr_Occurred() */
819
0
    }
820
0
    w_object(x, &wf);
821
0
    w_clear_refs(&wf);
822
0
    w_flush(&wf);
823
0
}
824
825
typedef struct {
826
    FILE *fp;
827
    int depth;
828
    PyObject *readable;  /* Stream-like object being read from */
829
    const char *ptr;
830
    const char *end;
831
    char *buf;
832
    Py_ssize_t buf_size;
833
    PyObject *refs;  /* a list */
834
    int allow_code;
835
} RFILE;
836
837
static const char *
838
r_string(Py_ssize_t n, RFILE *p)
839
5.40M
{
840
5.40M
    Py_ssize_t read = -1;
841
842
5.40M
    if (p->ptr != NULL) {
843
        /* Fast path for loads() */
844
5.40M
        const char *res = p->ptr;
845
5.40M
        Py_ssize_t left = p->end - p->ptr;
846
5.40M
        if (left < n) {
847
0
            PyErr_SetString(PyExc_EOFError,
848
0
                            "marshal data too short");
849
0
            return NULL;
850
0
        }
851
5.40M
        p->ptr += n;
852
5.40M
        return res;
853
5.40M
    }
854
0
    if (p->buf == NULL) {
855
0
        p->buf = PyMem_Malloc(n);
856
0
        if (p->buf == NULL) {
857
0
            PyErr_NoMemory();
858
0
            return NULL;
859
0
        }
860
0
        p->buf_size = n;
861
0
    }
862
0
    else if (p->buf_size < n) {
863
0
        char *tmp = PyMem_Realloc(p->buf, n);
864
0
        if (tmp == NULL) {
865
0
            PyErr_NoMemory();
866
0
            return NULL;
867
0
        }
868
0
        p->buf = tmp;
869
0
        p->buf_size = n;
870
0
    }
871
872
0
    if (!p->readable) {
873
0
        assert(p->fp != NULL);
874
0
        read = fread(p->buf, 1, n, p->fp);
875
0
    }
876
0
    else {
877
0
        PyObject *res, *mview;
878
0
        Py_buffer buf;
879
880
0
        if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
881
0
            return NULL;
882
0
        mview = PyMemoryView_FromBuffer(&buf);
883
0
        if (mview == NULL)
884
0
            return NULL;
885
886
0
        res = _PyObject_CallMethod(p->readable, &_Py_ID(readinto), "N", mview);
887
0
        if (res != NULL) {
888
0
            read = PyNumber_AsSsize_t(res, PyExc_ValueError);
889
0
            Py_DECREF(res);
890
0
        }
891
0
    }
892
0
    if (read != n) {
893
0
        if (!PyErr_Occurred()) {
894
0
            if (read > n)
895
0
                PyErr_Format(PyExc_ValueError,
896
0
                             "read() returned too much data: "
897
0
                             "%zd bytes requested, %zd returned",
898
0
                             n, read);
899
0
            else
900
0
                PyErr_SetString(PyExc_EOFError,
901
0
                                "EOF read where not expected");
902
0
        }
903
0
        return NULL;
904
0
    }
905
0
    return p->buf;
906
0
}
907
908
static int
909
r_byte(RFILE *p)
910
6.25M
{
911
6.25M
    if (p->ptr != NULL) {
912
6.25M
        if (p->ptr < p->end) {
913
6.25M
            return (unsigned char) *p->ptr++;
914
6.25M
        }
915
6.25M
    }
916
0
    else if (!p->readable) {
917
0
        assert(p->fp);
918
0
        int c = getc(p->fp);
919
0
        if (c != EOF) {
920
0
            return c;
921
0
        }
922
0
    }
923
0
    else {
924
0
        const char *ptr = r_string(1, p);
925
0
        if (ptr != NULL) {
926
0
            return *(const unsigned char *) ptr;
927
0
        }
928
0
        return EOF;
929
0
    }
930
0
    PyErr_SetString(PyExc_EOFError,
931
0
                    "EOF read where not expected");
932
0
    return EOF;
933
6.25M
}
934
935
static int
936
r_short(RFILE *p)
937
1.67k
{
938
1.67k
    short x = -1;
939
1.67k
    const unsigned char *buffer;
940
941
1.67k
    buffer = (const unsigned char *) r_string(2, p);
942
1.67k
    if (buffer != NULL) {
943
1.67k
        x = buffer[0];
944
1.67k
        x |= buffer[1] << 8;
945
        /* Sign-extension, in case short greater than 16 bits */
946
1.67k
        x |= -(x & 0x8000);
947
1.67k
    }
948
1.67k
    return x;
949
1.67k
}
950
951
static long
952
r_long(RFILE *p)
953
3.53M
{
954
3.53M
    long x = -1;
955
3.53M
    const unsigned char *buffer;
956
957
3.53M
    buffer = (const unsigned char *) r_string(4, p);
958
3.53M
    if (buffer != NULL) {
959
3.53M
        x = buffer[0];
960
3.53M
        x |= (long)buffer[1] << 8;
961
3.53M
        x |= (long)buffer[2] << 16;
962
3.53M
        x |= (long)buffer[3] << 24;
963
3.53M
#if SIZEOF_LONG > 4
964
        /* Sign extension for 64-bit machines */
965
3.53M
        x |= -(x & 0x80000000L);
966
3.53M
#endif
967
3.53M
    }
968
3.53M
    return x;
969
3.53M
}
970
971
/* r_long64 deals with the TYPE_INT64 code. */
972
static PyObject *
973
r_long64(RFILE *p)
974
0
{
975
0
    const unsigned char *buffer = (const unsigned char *) r_string(8, p);
976
0
    if (buffer == NULL) {
977
0
        return NULL;
978
0
    }
979
0
    return _PyLong_FromByteArray(buffer, 8,
980
0
                                 1 /* little endian */,
981
0
                                 1 /* signed */);
982
0
}
983
984
#define _w_digits(bitsize)                                              \
985
static int                                                              \
986
_w_digits##bitsize(uint ## bitsize ## _t *digits, Py_ssize_t size,      \
987
                   Py_ssize_t marshal_ratio,                            \
988
437
                   int shorts_in_top_digit, RFILE *p)                   \
989
437
{                                                                       \
990
437
    uint ## bitsize ## _t d;                                            \
991
437
                                                                        \
992
437
    assert(size >= 1);                                                  \
993
1.01k
    for (Py_ssize_t i = 0; i < size - 1; i++) {                         \
994
580
        d = 0;                                                          \
995
1.74k
        for (Py_ssize_t j = 0; j < marshal_ratio; j++) {                \
996
1.16k
            int md = r_short(p);                                        \
997
1.16k
            if (md < 0 || md > PyLong_MARSHAL_BASE) {                   \
998
0
                goto bad_digit;                                         \
999
0
            }                                                           \
1000
1.16k
            d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT;   \
1001
1.16k
        }                                                               \
1002
580
        digits[i] = d;                                                  \
1003
580
    }                                                                   \
1004
437
                                                                        \
1005
437
    d = 0;                                                              \
1006
950
    for (Py_ssize_t j = 0; j < shorts_in_top_digit; j++) {              \
1007
513
        int md = r_short(p);                                            \
1008
513
        if (md < 0 || md > PyLong_MARSHAL_BASE) {                       \
1009
0
            goto bad_digit;                                             \
1010
0
        }                                                               \
1011
513
        /* topmost marshal digit should be nonzero */                   \
1012
513
        if (md == 0 && j == shorts_in_top_digit - 1) {                  \
1013
0
            PyErr_SetString(PyExc_ValueError,                           \
1014
0
                "bad marshal data (unnormalized long data)");           \
1015
0
            return -1;                                                  \
1016
0
        }                                                               \
1017
513
        d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT;       \
1018
513
    }                                                                   \
1019
437
    assert(!PyErr_Occurred());                                          \
1020
437
    /* top digit should be nonzero, else the resulting PyLong won't be  \
1021
437
       normalized */                                                    \
1022
437
    digits[size - 1] = d;                                               \
1023
437
    return 0;                                                           \
1024
437
                                                                        \
1025
0
bad_digit:                                                              \
1026
0
    if (!PyErr_Occurred()) {                                            \
1027
0
        PyErr_SetString(PyExc_ValueError,                               \
1028
0
            "bad marshal data (digit out of range in long)");           \
1029
0
    }                                                                   \
1030
0
    return -1;                                                          \
1031
437
}
1032
437
_w_digits(32)
1033
0
_w_digits(16)
1034
#undef _w_digits
1035
1036
static PyObject *
1037
r_PyLong(RFILE *p)
1038
437
{
1039
437
    long n = r_long(p);
1040
437
    if (n == -1 && PyErr_Occurred()) {
1041
0
        return NULL;
1042
0
    }
1043
437
    if (n < -SIZE32_MAX || n > SIZE32_MAX) {
1044
0
        PyErr_SetString(PyExc_ValueError,
1045
0
                       "bad marshal data (long size out of range)");
1046
0
        return NULL;
1047
0
    }
1048
1049
437
    const PyLongLayout *layout = PyLong_GetNativeLayout();
1050
437
    Py_ssize_t marshal_ratio = layout->bits_per_digit/PyLong_MARSHAL_SHIFT;
1051
1052
    /* must be a multiple of PyLong_MARSHAL_SHIFT */
1053
437
    assert(layout->bits_per_digit % PyLong_MARSHAL_SHIFT == 0);
1054
437
    assert(layout->bits_per_digit >= PyLong_MARSHAL_SHIFT);
1055
1056
    /* other assumptions on PyLongObject internals */
1057
437
    assert(layout->bits_per_digit <= 32);
1058
437
    assert(layout->digits_order == -1);
1059
437
    assert(layout->digit_endianness == (PY_LITTLE_ENDIAN ? -1 : 1));
1060
437
    assert(layout->digit_size == 2 || layout->digit_size == 4);
1061
1062
437
    Py_ssize_t size = 1 + (Py_ABS(n) - 1) / marshal_ratio;
1063
1064
437
    assert(size >= 1);
1065
1066
437
    int shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % marshal_ratio;
1067
437
    void *digits;
1068
437
    PyLongWriter *writer = PyLongWriter_Create(n < 0, size, &digits);
1069
1070
437
    if (writer == NULL) {
1071
0
        return NULL;
1072
0
    }
1073
1074
437
    int ret;
1075
1076
437
    if (layout->digit_size == 4) {
1077
437
        ret = _w_digits32(digits, size, marshal_ratio, shorts_in_top_digit, p);
1078
437
    }
1079
0
    else {
1080
0
        ret = _w_digits16(digits, size, marshal_ratio, shorts_in_top_digit, p);
1081
0
    }
1082
437
    if (ret < 0) {
1083
0
        PyLongWriter_Discard(writer);
1084
0
        return NULL;
1085
0
    }
1086
437
    return PyLongWriter_Finish(writer);
1087
437
}
1088
1089
static double
1090
r_float_bin(RFILE *p)
1091
637
{
1092
637
    const char *buf = r_string(8, p);
1093
637
    if (buf == NULL)
1094
0
        return -1;
1095
637
    return PyFloat_Unpack8(buf, 1);
1096
637
}
1097
1098
/* Issue #33720: Disable inlining for reducing the C stack consumption
1099
   on PGO builds. */
1100
Py_NO_INLINE static double
1101
r_float_str(RFILE *p)
1102
0
{
1103
0
    int n;
1104
0
    char buf[256];
1105
0
    const char *ptr;
1106
0
    n = r_byte(p);
1107
0
    if (n == EOF) {
1108
0
        return -1;
1109
0
    }
1110
0
    ptr = r_string(n, p);
1111
0
    if (ptr == NULL) {
1112
0
        return -1;
1113
0
    }
1114
0
    memcpy(buf, ptr, n);
1115
0
    buf[n] = '\0';
1116
0
    return PyOS_string_to_double(buf, NULL, NULL);
1117
0
}
1118
1119
/* allocate the reflist index for a new object. Return -1 on failure */
1120
static Py_ssize_t
1121
r_ref_reserve(int flag, RFILE *p)
1122
178k
{
1123
178k
    if (flag) { /* currently only FLAG_REF is defined */
1124
6.53k
        Py_ssize_t idx = PyList_GET_SIZE(p->refs);
1125
6.53k
        if (idx >= 0x7ffffffe) {
1126
0
            PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
1127
0
            return -1;
1128
0
        }
1129
6.53k
        if (PyList_Append(p->refs, Py_None) < 0)
1130
0
            return -1;
1131
6.53k
        return idx;
1132
6.53k
    } else
1133
171k
        return 0;
1134
178k
}
1135
1136
/* insert the new object 'o' to the reflist at previously
1137
 * allocated index 'idx'.
1138
 * 'o' can be NULL, in which case nothing is done.
1139
 * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
1140
 * if 'o' was non-NULL, and the function fails, 'o' is released and
1141
 * NULL returned. This simplifies error checking at the call site since
1142
 * a single test for NULL for the function result is enough.
1143
 */
1144
static PyObject *
1145
r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
1146
178k
{
1147
178k
    if (o != NULL && flag) { /* currently only FLAG_REF is defined */
1148
6.53k
        PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
1149
6.53k
        PyList_SET_ITEM(p->refs, idx, Py_NewRef(o));
1150
6.53k
        Py_DECREF(tmp);
1151
6.53k
    }
1152
178k
    return o;
1153
178k
}
1154
1155
/* combination of both above, used when an object can be
1156
 * created whenever it is seen in the file, as opposed to
1157
 * after having loaded its sub-objects.
1158
 */
1159
static PyObject *
1160
r_ref(PyObject *o, int flag, RFILE *p)
1161
1.42M
{
1162
1.42M
    assert(flag & FLAG_REF);
1163
1.42M
    if (o == NULL)
1164
0
        return NULL;
1165
1.42M
    if (PyList_Append(p->refs, o) < 0) {
1166
0
        Py_DECREF(o); /* release the new object */
1167
0
        return NULL;
1168
0
    }
1169
1.42M
    return o;
1170
1.42M
}
1171
1172
static PyObject *
1173
r_object(RFILE *p)
1174
4.48M
{
1175
    /* NULL is a valid return value, it does not necessarily means that
1176
       an exception is set. */
1177
4.48M
    PyObject *v, *v2;
1178
4.48M
    Py_ssize_t idx = 0;
1179
4.48M
    long i, n;
1180
4.48M
    int type, code = r_byte(p);
1181
4.48M
    int flag, is_interned = 0;
1182
4.48M
    PyObject *retval = NULL;
1183
1184
4.48M
    if (code == EOF) {
1185
0
        if (PyErr_ExceptionMatches(PyExc_EOFError)) {
1186
0
            PyErr_SetString(PyExc_EOFError,
1187
0
                            "EOF read where object expected");
1188
0
        }
1189
0
        return NULL;
1190
0
    }
1191
1192
4.48M
    p->depth++;
1193
1194
4.48M
    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
1195
0
        p->depth--;
1196
0
        PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
1197
0
        return NULL;
1198
0
    }
1199
1200
4.48M
    flag = code & FLAG_REF;
1201
4.48M
    type = code & ~FLAG_REF;
1202
1203
4.48M
#define R_REF(O) do{\
1204
2.35M
    if (flag) \
1205
2.35M
        O = r_ref(O, flag, p);\
1206
2.35M
} while (0)
1207
1208
4.48M
    switch (type) {
1209
1210
0
    case TYPE_NULL:
1211
0
        break;
1212
1213
40.6k
    case TYPE_NONE:
1214
40.6k
        retval = Py_None;
1215
40.6k
        break;
1216
1217
0
    case TYPE_STOPITER:
1218
0
        retval = Py_NewRef(PyExc_StopIteration);
1219
0
        break;
1220
1221
396
    case TYPE_ELLIPSIS:
1222
396
        retval = Py_Ellipsis;
1223
396
        break;
1224
1225
1.94k
    case TYPE_FALSE:
1226
1.94k
        retval = Py_False;
1227
1.94k
        break;
1228
1229
1.72k
    case TYPE_TRUE:
1230
1.72k
        retval = Py_True;
1231
1.72k
        break;
1232
1233
24.4k
    case TYPE_INT:
1234
24.4k
        n = r_long(p);
1235
24.4k
        if (n == -1 && PyErr_Occurred()) {
1236
0
            break;
1237
0
        }
1238
24.4k
        retval = PyLong_FromLong(n);
1239
24.4k
        R_REF(retval);
1240
24.4k
        break;
1241
1242
0
    case TYPE_INT64:
1243
0
        retval = r_long64(p);
1244
0
        R_REF(retval);
1245
0
        break;
1246
1247
437
    case TYPE_LONG:
1248
437
        retval = r_PyLong(p);
1249
437
        R_REF(retval);
1250
437
        break;
1251
1252
0
    case TYPE_FLOAT:
1253
0
        {
1254
0
            double x = r_float_str(p);
1255
0
            if (x == -1.0 && PyErr_Occurred())
1256
0
                break;
1257
0
            retval = PyFloat_FromDouble(x);
1258
0
            R_REF(retval);
1259
0
            break;
1260
0
        }
1261
1262
631
    case TYPE_BINARY_FLOAT:
1263
631
        {
1264
631
            double x = r_float_bin(p);
1265
631
            if (x == -1.0 && PyErr_Occurred())
1266
0
                break;
1267
631
            retval = PyFloat_FromDouble(x);
1268
631
            R_REF(retval);
1269
631
            break;
1270
631
        }
1271
1272
0
    case TYPE_COMPLEX:
1273
0
        {
1274
0
            Py_complex c;
1275
0
            c.real = r_float_str(p);
1276
0
            if (c.real == -1.0 && PyErr_Occurred())
1277
0
                break;
1278
0
            c.imag = r_float_str(p);
1279
0
            if (c.imag == -1.0 && PyErr_Occurred())
1280
0
                break;
1281
0
            retval = PyComplex_FromCComplex(c);
1282
0
            R_REF(retval);
1283
0
            break;
1284
0
        }
1285
1286
3
    case TYPE_BINARY_COMPLEX:
1287
3
        {
1288
3
            Py_complex c;
1289
3
            c.real = r_float_bin(p);
1290
3
            if (c.real == -1.0 && PyErr_Occurred())
1291
0
                break;
1292
3
            c.imag = r_float_bin(p);
1293
3
            if (c.imag == -1.0 && PyErr_Occurred())
1294
0
                break;
1295
3
            retval = PyComplex_FromCComplex(c);
1296
3
            R_REF(retval);
1297
3
            break;
1298
3
        }
1299
1300
526k
    case TYPE_STRING:
1301
526k
        {
1302
526k
            const char *ptr;
1303
526k
            n = r_long(p);
1304
526k
            if (n < 0 || n > SIZE32_MAX) {
1305
0
                if (!PyErr_Occurred()) {
1306
0
                    PyErr_SetString(PyExc_ValueError,
1307
0
                        "bad marshal data (bytes object size out of range)");
1308
0
                }
1309
0
                break;
1310
0
            }
1311
526k
            v = PyBytes_FromStringAndSize((char *)NULL, n);
1312
526k
            if (v == NULL)
1313
0
                break;
1314
526k
            ptr = r_string(n, p);
1315
526k
            if (ptr == NULL) {
1316
0
                Py_DECREF(v);
1317
0
                break;
1318
0
            }
1319
526k
            memcpy(PyBytes_AS_STRING(v), ptr, n);
1320
526k
            retval = v;
1321
526k
            R_REF(retval);
1322
526k
            break;
1323
526k
        }
1324
1325
0
    case TYPE_ASCII_INTERNED:
1326
0
        is_interned = 1;
1327
0
        _Py_FALLTHROUGH;
1328
27.0k
    case TYPE_ASCII:
1329
27.0k
        n = r_long(p);
1330
27.0k
        if (n < 0 || n > SIZE32_MAX) {
1331
0
            if (!PyErr_Occurred()) {
1332
0
                PyErr_SetString(PyExc_ValueError,
1333
0
                    "bad marshal data (string size out of range)");
1334
0
            }
1335
0
            break;
1336
0
        }
1337
27.0k
        goto _read_ascii;
1338
1339
1.14M
    case TYPE_SHORT_ASCII_INTERNED:
1340
1.14M
        is_interned = 1;
1341
1.14M
        _Py_FALLTHROUGH;
1342
1.30M
    case TYPE_SHORT_ASCII:
1343
1.30M
        n = r_byte(p);
1344
1.30M
        if (n == EOF) {
1345
0
            break;
1346
0
        }
1347
1.33M
    _read_ascii:
1348
1.33M
        {
1349
1.33M
            const char *ptr;
1350
1.33M
            ptr = r_string(n, p);
1351
1.33M
            if (ptr == NULL)
1352
0
                break;
1353
1.33M
            v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1354
1.33M
            if (v == NULL)
1355
0
                break;
1356
1.33M
            if (is_interned) {
1357
                // marshal is meant to serialize .pyc files with code
1358
                // objects, and code-related strings are currently immortal.
1359
1.14M
                PyInterpreterState *interp = _PyInterpreterState_GET();
1360
1.14M
                _PyUnicode_InternImmortal(interp, &v);
1361
1.14M
            }
1362
1.33M
            retval = v;
1363
1.33M
            R_REF(retval);
1364
1.33M
            break;
1365
1.33M
        }
1366
1367
206
    case TYPE_INTERNED:
1368
206
        is_interned = 1;
1369
206
        _Py_FALLTHROUGH;
1370
3.58k
    case TYPE_UNICODE:
1371
3.58k
        {
1372
3.58k
        const char *buffer;
1373
1374
3.58k
        n = r_long(p);
1375
3.58k
        if (n < 0 || n > SIZE32_MAX) {
1376
0
            if (!PyErr_Occurred()) {
1377
0
                PyErr_SetString(PyExc_ValueError,
1378
0
                    "bad marshal data (string size out of range)");
1379
0
            }
1380
0
            break;
1381
0
        }
1382
3.58k
        if (n != 0) {
1383
3.58k
            buffer = r_string(n, p);
1384
3.58k
            if (buffer == NULL)
1385
0
                break;
1386
3.58k
            v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1387
3.58k
        }
1388
0
        else {
1389
0
            v = Py_GetConstant(Py_CONSTANT_EMPTY_STR);
1390
0
        }
1391
3.58k
        if (v == NULL)
1392
0
            break;
1393
3.58k
        if (is_interned) {
1394
            // marshal is meant to serialize .pyc files with code
1395
            // objects, and code-related strings are currently immortal.
1396
206
            PyInterpreterState *interp = _PyInterpreterState_GET();
1397
206
            _PyUnicode_InternImmortal(interp, &v);
1398
206
        }
1399
3.58k
        retval = v;
1400
3.58k
        R_REF(retval);
1401
3.58k
        break;
1402
3.58k
        }
1403
1404
465k
    case TYPE_SMALL_TUPLE:
1405
465k
        n = r_byte(p);
1406
465k
        if (n == EOF) {
1407
0
            break;
1408
0
        }
1409
465k
        goto _read_tuple;
1410
465k
    case TYPE_TUPLE:
1411
71
        n = r_long(p);
1412
71
        if (n < 0 || n > SIZE32_MAX) {
1413
0
            if (!PyErr_Occurred()) {
1414
0
                PyErr_SetString(PyExc_ValueError,
1415
0
                    "bad marshal data (tuple size out of range)");
1416
0
            }
1417
0
            break;
1418
0
        }
1419
465k
    _read_tuple:
1420
465k
        v = PyTuple_New(n);
1421
465k
        R_REF(v);
1422
465k
        if (v == NULL)
1423
0
            break;
1424
1425
3.18M
        for (i = 0; i < n; i++) {
1426
2.72M
            v2 = r_object(p);
1427
2.72M
            if ( v2 == NULL ) {
1428
0
                if (!PyErr_Occurred())
1429
0
                    PyErr_SetString(PyExc_TypeError,
1430
0
                        "NULL object in marshal data for tuple");
1431
0
                Py_SETREF(v, NULL);
1432
0
                break;
1433
0
            }
1434
2.72M
            PyTuple_SET_ITEM(v, i, v2);
1435
2.72M
        }
1436
465k
        retval = v;
1437
465k
        break;
1438
1439
0
    case TYPE_LIST:
1440
0
        n = r_long(p);
1441
0
        if (n < 0 || n > SIZE32_MAX) {
1442
0
            if (!PyErr_Occurred()) {
1443
0
                PyErr_SetString(PyExc_ValueError,
1444
0
                    "bad marshal data (list size out of range)");
1445
0
            }
1446
0
            break;
1447
0
        }
1448
0
        v = PyList_New(n);
1449
0
        R_REF(v);
1450
0
        if (v == NULL)
1451
0
            break;
1452
0
        for (i = 0; i < n; i++) {
1453
0
            v2 = r_object(p);
1454
0
            if ( v2 == NULL ) {
1455
0
                if (!PyErr_Occurred())
1456
0
                    PyErr_SetString(PyExc_TypeError,
1457
0
                        "NULL object in marshal data for list");
1458
0
                Py_SETREF(v, NULL);
1459
0
                break;
1460
0
            }
1461
0
            PyList_SET_ITEM(v, i, v2);
1462
0
        }
1463
0
        retval = v;
1464
0
        break;
1465
1466
0
    case TYPE_DICT:
1467
0
    case TYPE_FROZENDICT:
1468
0
        v = PyDict_New();
1469
0
        if (v == NULL) {
1470
0
            break;
1471
0
        }
1472
0
        if (type == TYPE_DICT) {
1473
0
            R_REF(v);
1474
0
        }
1475
0
        else {
1476
0
            idx = r_ref_reserve(flag, p);
1477
0
            if (idx < 0) {
1478
0
                Py_CLEAR(v);
1479
0
                break;
1480
0
            }
1481
0
        }
1482
0
        for (;;) {
1483
0
            PyObject *key, *val;
1484
0
            key = r_object(p);
1485
0
            if (key == NULL)
1486
0
                break;
1487
0
            val = r_object(p);
1488
0
            if (val == NULL) {
1489
0
                Py_DECREF(key);
1490
0
                break;
1491
0
            }
1492
0
            if (PyDict_SetItem(v, key, val) < 0) {
1493
0
                Py_DECREF(key);
1494
0
                Py_DECREF(val);
1495
0
                break;
1496
0
            }
1497
0
            Py_DECREF(key);
1498
0
            Py_DECREF(val);
1499
0
        }
1500
0
        if (PyErr_Occurred()) {
1501
0
            Py_CLEAR(v);
1502
0
        }
1503
0
        if (type == TYPE_FROZENDICT && v != NULL) {
1504
0
            Py_SETREF(v, PyFrozenDict_New(v));
1505
0
        }
1506
0
        retval = v;
1507
0
        break;
1508
1509
0
    case TYPE_SET:
1510
507
    case TYPE_FROZENSET:
1511
507
        n = r_long(p);
1512
507
        if (n < 0 || n > SIZE32_MAX) {
1513
0
            if (!PyErr_Occurred()) {
1514
0
                PyErr_SetString(PyExc_ValueError,
1515
0
                    "bad marshal data (set size out of range)");
1516
0
            }
1517
0
            break;
1518
0
        }
1519
1520
507
        if (n == 0 && type == TYPE_FROZENSET) {
1521
            /* call frozenset() to get the empty frozenset singleton */
1522
0
            v = _PyObject_CallNoArgs((PyObject*)&PyFrozenSet_Type);
1523
0
            if (v == NULL)
1524
0
                break;
1525
0
            R_REF(v);
1526
0
            retval = v;
1527
0
        }
1528
507
        else {
1529
507
            v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1530
507
            if (type == TYPE_SET) {
1531
0
                R_REF(v);
1532
507
            } else {
1533
                /* must use delayed registration of frozensets because they must
1534
                 * be init with a refcount of 1
1535
                 */
1536
507
                idx = r_ref_reserve(flag, p);
1537
507
                if (idx < 0)
1538
0
                    Py_CLEAR(v); /* signal error */
1539
507
            }
1540
507
            if (v == NULL)
1541
0
                break;
1542
1543
2.62k
            for (i = 0; i < n; i++) {
1544
2.11k
                v2 = r_object(p);
1545
2.11k
                if ( v2 == NULL ) {
1546
0
                    if (!PyErr_Occurred())
1547
0
                        PyErr_SetString(PyExc_TypeError,
1548
0
                            "NULL object in marshal data for set");
1549
0
                    Py_SETREF(v, NULL);
1550
0
                    break;
1551
0
                }
1552
2.11k
                if (PySet_Add(v, v2) == -1) {
1553
0
                    Py_DECREF(v);
1554
0
                    Py_DECREF(v2);
1555
0
                    v = NULL;
1556
0
                    break;
1557
0
                }
1558
2.11k
                Py_DECREF(v2);
1559
2.11k
            }
1560
507
            if (type != TYPE_SET)
1561
507
                v = r_ref_insert(v, idx, flag, p);
1562
507
            retval = v;
1563
507
        }
1564
507
        break;
1565
1566
174k
    case TYPE_CODE:
1567
174k
        {
1568
174k
            int argcount;
1569
174k
            int posonlyargcount;
1570
174k
            int kwonlyargcount;
1571
174k
            int stacksize;
1572
174k
            int flags;
1573
174k
            PyObject *code = NULL;
1574
174k
            PyObject *consts = NULL;
1575
174k
            PyObject *names = NULL;
1576
174k
            PyObject *localsplusnames = NULL;
1577
174k
            PyObject *localspluskinds = NULL;
1578
174k
            PyObject *filename = NULL;
1579
174k
            PyObject *name = NULL;
1580
174k
            PyObject *qualname = NULL;
1581
174k
            int firstlineno;
1582
174k
            PyObject* linetable = NULL;
1583
174k
            PyObject *exceptiontable = NULL;
1584
1585
174k
            if (!p->allow_code) {
1586
0
                PyErr_SetString(PyExc_ValueError,
1587
0
                                "unmarshalling code objects is disallowed");
1588
0
                break;
1589
0
            }
1590
174k
            idx = r_ref_reserve(flag, p);
1591
174k
            if (idx < 0)
1592
0
                break;
1593
1594
174k
            v = NULL;
1595
1596
            /* XXX ignore long->int overflows for now */
1597
174k
            argcount = (int)r_long(p);
1598
174k
            if (argcount == -1 && PyErr_Occurred())
1599
0
                goto code_error;
1600
174k
            posonlyargcount = (int)r_long(p);
1601
174k
            if (posonlyargcount == -1 && PyErr_Occurred()) {
1602
0
                goto code_error;
1603
0
            }
1604
174k
            kwonlyargcount = (int)r_long(p);
1605
174k
            if (kwonlyargcount == -1 && PyErr_Occurred())
1606
0
                goto code_error;
1607
174k
            stacksize = (int)r_long(p);
1608
174k
            if (stacksize == -1 && PyErr_Occurred())
1609
0
                goto code_error;
1610
174k
            flags = (int)r_long(p);
1611
174k
            if (flags == -1 && PyErr_Occurred())
1612
0
                goto code_error;
1613
174k
            code = r_object(p);
1614
174k
            if (code == NULL)
1615
0
                goto code_error;
1616
174k
            consts = r_object(p);
1617
174k
            if (consts == NULL)
1618
0
                goto code_error;
1619
174k
            names = r_object(p);
1620
174k
            if (names == NULL)
1621
0
                goto code_error;
1622
174k
            localsplusnames = r_object(p);
1623
174k
            if (localsplusnames == NULL)
1624
0
                goto code_error;
1625
174k
            localspluskinds = r_object(p);
1626
174k
            if (localspluskinds == NULL)
1627
0
                goto code_error;
1628
174k
            filename = r_object(p);
1629
174k
            if (filename == NULL)
1630
0
                goto code_error;
1631
174k
            name = r_object(p);
1632
174k
            if (name == NULL)
1633
0
                goto code_error;
1634
174k
            qualname = r_object(p);
1635
174k
            if (qualname == NULL)
1636
0
                goto code_error;
1637
174k
            firstlineno = (int)r_long(p);
1638
174k
            if (firstlineno == -1 && PyErr_Occurred())
1639
0
                goto code_error;
1640
174k
            linetable = r_object(p);
1641
174k
            if (linetable == NULL)
1642
0
                goto code_error;
1643
174k
            exceptiontable = r_object(p);
1644
174k
            if (exceptiontable == NULL)
1645
0
                goto code_error;
1646
1647
174k
            struct _PyCodeConstructor con = {
1648
174k
                .filename = filename,
1649
174k
                .name = name,
1650
174k
                .qualname = qualname,
1651
174k
                .flags = flags,
1652
1653
174k
                .code = code,
1654
174k
                .firstlineno = firstlineno,
1655
174k
                .linetable = linetable,
1656
1657
174k
                .consts = consts,
1658
174k
                .names = names,
1659
1660
174k
                .localsplusnames = localsplusnames,
1661
174k
                .localspluskinds = localspluskinds,
1662
1663
174k
                .argcount = argcount,
1664
174k
                .posonlyargcount = posonlyargcount,
1665
174k
                .kwonlyargcount = kwonlyargcount,
1666
1667
174k
                .stacksize = stacksize,
1668
1669
174k
                .exceptiontable = exceptiontable,
1670
174k
            };
1671
1672
174k
            if (_PyCode_Validate(&con) < 0) {
1673
0
                goto code_error;
1674
0
            }
1675
1676
174k
            v = (PyObject *)_PyCode_New(&con);
1677
174k
            if (v == NULL) {
1678
0
                goto code_error;
1679
0
            }
1680
1681
174k
            v = r_ref_insert(v, idx, flag, p);
1682
1683
174k
          code_error:
1684
174k
            if (v == NULL && !PyErr_Occurred()) {
1685
0
                PyErr_SetString(PyExc_TypeError,
1686
0
                    "NULL object in marshal data for code object");
1687
0
            }
1688
174k
            Py_XDECREF(code);
1689
174k
            Py_XDECREF(consts);
1690
174k
            Py_XDECREF(names);
1691
174k
            Py_XDECREF(localsplusnames);
1692
174k
            Py_XDECREF(localspluskinds);
1693
174k
            Py_XDECREF(filename);
1694
174k
            Py_XDECREF(name);
1695
174k
            Py_XDECREF(qualname);
1696
174k
            Py_XDECREF(linetable);
1697
174k
            Py_XDECREF(exceptiontable);
1698
174k
        }
1699
0
        retval = v;
1700
174k
        break;
1701
1702
1.90M
    case TYPE_REF:
1703
1.90M
        n = r_long(p);
1704
1.90M
        if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1705
0
            if (!PyErr_Occurred()) {
1706
0
                PyErr_SetString(PyExc_ValueError,
1707
0
                    "bad marshal data (invalid reference)");
1708
0
            }
1709
0
            break;
1710
0
        }
1711
1.90M
        v = PyList_GET_ITEM(p->refs, n);
1712
1.90M
        if (v == Py_None) {
1713
0
            PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1714
0
            break;
1715
0
        }
1716
1.90M
        retval = Py_NewRef(v);
1717
1.90M
        break;
1718
1719
3.32k
    case TYPE_SLICE:
1720
3.32k
    {
1721
3.32k
        Py_ssize_t idx = r_ref_reserve(flag, p);
1722
3.32k
        if (idx < 0) {
1723
0
            break;
1724
0
        }
1725
3.32k
        PyObject *stop = NULL;
1726
3.32k
        PyObject *step = NULL;
1727
3.32k
        PyObject *start = r_object(p);
1728
3.32k
        if (start == NULL) {
1729
0
            goto cleanup;
1730
0
        }
1731
3.32k
        stop = r_object(p);
1732
3.32k
        if (stop == NULL) {
1733
0
            goto cleanup;
1734
0
        }
1735
3.32k
        step = r_object(p);
1736
3.32k
        if (step == NULL) {
1737
0
            goto cleanup;
1738
0
        }
1739
3.32k
        retval = PySlice_New(start, stop, step);
1740
3.32k
        r_ref_insert(retval, idx, flag, p);
1741
3.32k
    cleanup:
1742
3.32k
        Py_XDECREF(start);
1743
3.32k
        Py_XDECREF(stop);
1744
3.32k
        Py_XDECREF(step);
1745
3.32k
        break;
1746
3.32k
    }
1747
1748
0
    default:
1749
        /* Bogus data got written, which isn't ideal.
1750
           This will let you keep working and recover. */
1751
0
        PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1752
0
        break;
1753
1754
4.48M
    }
1755
4.48M
    p->depth--;
1756
4.48M
    return retval;
1757
4.48M
}
1758
1759
static PyObject *
1760
read_object(RFILE *p)
1761
6.77k
{
1762
6.77k
    PyObject *v;
1763
6.77k
    if (PyErr_Occurred()) {
1764
0
        fprintf(stderr, "XXX readobject called with exception set\n");
1765
0
        return NULL;
1766
0
    }
1767
6.77k
    if (p->ptr && p->end) {
1768
6.77k
        if (PySys_Audit("marshal.loads", "y#", p->ptr, (Py_ssize_t)(p->end - p->ptr)) < 0) {
1769
0
            return NULL;
1770
0
        }
1771
6.77k
    } else if (p->fp || p->readable) {
1772
0
        if (PySys_Audit("marshal.load", NULL) < 0) {
1773
0
            return NULL;
1774
0
        }
1775
0
    }
1776
6.77k
    v = r_object(p);
1777
6.77k
    if (v == NULL && !PyErr_Occurred())
1778
0
        PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1779
6.77k
    return v;
1780
6.77k
}
1781
1782
int
1783
PyMarshal_ReadShortFromFile(FILE *fp)
1784
0
{
1785
0
    RFILE rf;
1786
0
    int res;
1787
0
    assert(fp);
1788
0
    rf.readable = NULL;
1789
0
    rf.fp = fp;
1790
0
    rf.end = rf.ptr = NULL;
1791
0
    rf.buf = NULL;
1792
0
    res = r_short(&rf);
1793
0
    if (rf.buf != NULL)
1794
0
        PyMem_Free(rf.buf);
1795
0
    return res;
1796
0
}
1797
1798
long
1799
PyMarshal_ReadLongFromFile(FILE *fp)
1800
0
{
1801
0
    RFILE rf;
1802
0
    long res;
1803
0
    rf.fp = fp;
1804
0
    rf.readable = NULL;
1805
0
    rf.ptr = rf.end = NULL;
1806
0
    rf.buf = NULL;
1807
0
    res = r_long(&rf);
1808
0
    if (rf.buf != NULL)
1809
0
        PyMem_Free(rf.buf);
1810
0
    return res;
1811
0
}
1812
1813
/* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1814
static off_t
1815
getfilesize(FILE *fp)
1816
0
{
1817
0
    struct _Py_stat_struct st;
1818
0
    if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1819
0
        return -1;
1820
#if SIZEOF_OFF_T == 4
1821
    else if (st.st_size >= INT_MAX)
1822
        return (off_t)INT_MAX;
1823
#endif
1824
0
    else
1825
0
        return (off_t)st.st_size;
1826
0
}
1827
1828
/* If we can get the size of the file up-front, and it's reasonably small,
1829
 * read it in one gulp and delegate to ...FromString() instead.  Much quicker
1830
 * than reading a byte at a time from file; speeds .pyc imports.
1831
 * CAUTION:  since this may read the entire remainder of the file, don't
1832
 * call it unless you know you're done with the file.
1833
 */
1834
PyObject *
1835
PyMarshal_ReadLastObjectFromFile(FILE *fp)
1836
0
{
1837
/* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1838
0
#define REASONABLE_FILE_LIMIT (1L << 18)
1839
0
    off_t filesize;
1840
0
    filesize = getfilesize(fp);
1841
0
    if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1842
0
        char* pBuf = (char *)PyMem_Malloc(filesize);
1843
0
        if (pBuf != NULL) {
1844
0
            size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1845
0
            PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1846
0
            PyMem_Free(pBuf);
1847
0
            return v;
1848
0
        }
1849
1850
0
    }
1851
    /* We don't have fstat, or we do but the file is larger than
1852
     * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1853
     */
1854
0
    return PyMarshal_ReadObjectFromFile(fp);
1855
1856
0
#undef REASONABLE_FILE_LIMIT
1857
0
}
1858
1859
PyObject *
1860
PyMarshal_ReadObjectFromFile(FILE *fp)
1861
0
{
1862
0
    RFILE rf;
1863
0
    PyObject *result;
1864
0
    rf.allow_code = 1;
1865
0
    rf.fp = fp;
1866
0
    rf.readable = NULL;
1867
0
    rf.depth = 0;
1868
0
    rf.ptr = rf.end = NULL;
1869
0
    rf.buf = NULL;
1870
0
    rf.refs = PyList_New(0);
1871
0
    if (rf.refs == NULL)
1872
0
        return NULL;
1873
0
    result = read_object(&rf);
1874
0
    Py_DECREF(rf.refs);
1875
0
    if (rf.buf != NULL)
1876
0
        PyMem_Free(rf.buf);
1877
0
    return result;
1878
0
}
1879
1880
PyObject *
1881
PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1882
648
{
1883
648
    RFILE rf;
1884
648
    PyObject *result;
1885
648
    rf.allow_code = 1;
1886
648
    rf.fp = NULL;
1887
648
    rf.readable = NULL;
1888
648
    rf.ptr = str;
1889
648
    rf.end = str + len;
1890
648
    rf.buf = NULL;
1891
648
    rf.depth = 0;
1892
648
    rf.refs = PyList_New(0);
1893
648
    if (rf.refs == NULL)
1894
0
        return NULL;
1895
648
    result = read_object(&rf);
1896
648
    Py_DECREF(rf.refs);
1897
648
    if (rf.buf != NULL)
1898
0
        PyMem_Free(rf.buf);
1899
648
    return result;
1900
648
}
1901
1902
static PyObject *
1903
_PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code)
1904
238
{
1905
238
    WFILE wf;
1906
1907
238
    if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
1908
0
        return NULL;
1909
0
    }
1910
238
    memset(&wf, 0, sizeof(wf));
1911
238
    wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1912
238
    if (wf.str == NULL)
1913
0
        return NULL;
1914
238
    wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str);
1915
238
    wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str);
1916
238
    wf.error = WFERR_OK;
1917
238
    wf.version = version;
1918
238
    wf.allow_code = allow_code;
1919
238
    if (w_init_refs(&wf, version)) {
1920
0
        Py_DECREF(wf.str);
1921
0
        return NULL;
1922
0
    }
1923
238
    w_object(x, &wf);
1924
238
    w_clear_refs(&wf);
1925
238
    if (wf.str != NULL) {
1926
238
        const char *base = PyBytes_AS_STRING(wf.str);
1927
238
        if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1928
0
            return NULL;
1929
238
    }
1930
238
    if (wf.error != WFERR_OK) {
1931
0
        Py_XDECREF(wf.str);
1932
0
        switch (wf.error) {
1933
0
        case WFERR_NOMEMORY:
1934
0
            PyErr_NoMemory();
1935
0
            break;
1936
0
        case WFERR_NESTEDTOODEEP:
1937
0
            PyErr_SetString(PyExc_ValueError,
1938
0
                            "object too deeply nested to marshal");
1939
0
            break;
1940
0
        case WFERR_CODE_NOT_ALLOWED:
1941
0
            PyErr_SetString(PyExc_ValueError,
1942
0
                            "marshalling code objects is disallowed");
1943
0
            break;
1944
0
        default:
1945
0
        case WFERR_UNMARSHALLABLE:
1946
0
            PyErr_SetString(PyExc_ValueError,
1947
0
                            "unmarshallable object");
1948
0
            break;
1949
0
        }
1950
0
        return NULL;
1951
0
    }
1952
238
    return wf.str;
1953
238
}
1954
1955
PyObject *
1956
PyMarshal_WriteObjectToString(PyObject *x, int version)
1957
0
{
1958
0
    return _PyMarshal_WriteObjectToString(x, version, 1);
1959
0
}
1960
1961
/* And an interface for Python programs... */
1962
/*[clinic input]
1963
marshal.dump
1964
1965
    value: object
1966
        Must be a supported type.
1967
    file: object
1968
        Must be a writeable binary file.
1969
    version: int(c_default="Py_MARSHAL_VERSION") = version
1970
        Indicates the data format that dump should use.
1971
    /
1972
    *
1973
    allow_code: bool = True
1974
        Allow to write code objects.
1975
1976
Write the value on the open file.
1977
1978
If the value has (or contains an object that has) an unsupported type, a
1979
ValueError exception is raised - but garbage data will also be written
1980
to the file. The object will not be properly read back by load().
1981
[clinic start generated code]*/
1982
1983
static PyObject *
1984
marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1985
                  int version, int allow_code)
1986
/*[clinic end generated code: output=429e5fd61c2196b9 input=041f7f6669b0aafb]*/
1987
0
{
1988
    /* XXX Quick hack -- need to do this differently */
1989
0
    PyObject *s;
1990
0
    PyObject *res;
1991
1992
0
    s = _PyMarshal_WriteObjectToString(value, version, allow_code);
1993
0
    if (s == NULL)
1994
0
        return NULL;
1995
0
    res = PyObject_CallMethodOneArg(file, &_Py_ID(write), s);
1996
0
    Py_DECREF(s);
1997
0
    return res;
1998
0
}
1999
2000
/*[clinic input]
2001
marshal.load
2002
2003
    file: object
2004
        Must be readable binary file.
2005
    /
2006
    *
2007
    allow_code: bool = True
2008
        Allow to load code objects.
2009
2010
Read one value from the open file and return it.
2011
2012
If no valid value is read (e.g. because the data has a different Python
2013
version's incompatible marshal format), raise EOFError, ValueError or
2014
TypeError.
2015
2016
Note: If an object containing an unsupported type was marshalled with
2017
dump(), load() will substitute None for the unmarshallable type.
2018
[clinic start generated code]*/
2019
2020
static PyObject *
2021
marshal_load_impl(PyObject *module, PyObject *file, int allow_code)
2022
/*[clinic end generated code: output=0c1aaf3546ae3ed3 input=2dca7b570653b82f]*/
2023
0
{
2024
0
    PyObject *data, *result;
2025
0
    RFILE rf;
2026
2027
    /*
2028
     * Make a call to the read method, but read zero bytes.
2029
     * This is to ensure that the object passed in at least
2030
     * has a read method which returns bytes.
2031
     * This can be removed if we guarantee good error handling
2032
     * for r_string()
2033
     */
2034
0
    data = _PyObject_CallMethod(file, &_Py_ID(read), "i", 0);
2035
0
    if (data == NULL)
2036
0
        return NULL;
2037
0
    if (!PyBytes_Check(data)) {
2038
0
        PyErr_Format(PyExc_TypeError,
2039
0
                     "file.read() returned not bytes but %.100s",
2040
0
                     Py_TYPE(data)->tp_name);
2041
0
        result = NULL;
2042
0
    }
2043
0
    else {
2044
0
        rf.allow_code = allow_code;
2045
0
        rf.depth = 0;
2046
0
        rf.fp = NULL;
2047
0
        rf.readable = file;
2048
0
        rf.ptr = rf.end = NULL;
2049
0
        rf.buf = NULL;
2050
0
        if ((rf.refs = PyList_New(0)) != NULL) {
2051
0
            result = read_object(&rf);
2052
0
            Py_DECREF(rf.refs);
2053
0
            if (rf.buf != NULL)
2054
0
                PyMem_Free(rf.buf);
2055
0
        } else
2056
0
            result = NULL;
2057
0
    }
2058
0
    Py_DECREF(data);
2059
0
    return result;
2060
0
}
2061
2062
/*[clinic input]
2063
@permit_long_summary
2064
@permit_long_docstring_body
2065
marshal.dumps
2066
2067
    value: object
2068
        Must be a supported type.
2069
    version: int(c_default="Py_MARSHAL_VERSION") = version
2070
        Indicates the data format that dumps should use.
2071
    /
2072
    *
2073
    allow_code: bool = True
2074
        Allow to write code objects.
2075
2076
Return the bytes object that would be written to a file by dump(value, file).
2077
2078
Raise a ValueError exception if value has (or contains an object that has) an
2079
unsupported type.
2080
[clinic start generated code]*/
2081
2082
static PyObject *
2083
marshal_dumps_impl(PyObject *module, PyObject *value, int version,
2084
                   int allow_code)
2085
/*[clinic end generated code: output=115f90da518d1d49 input=80cd3f30c1637ade]*/
2086
173
{
2087
173
    return _PyMarshal_WriteObjectToString(value, version, allow_code);
2088
173
}
2089
2090
/*[clinic input]
2091
marshal.loads
2092
2093
    bytes: Py_buffer
2094
    /
2095
    *
2096
    allow_code: bool = True
2097
        Allow to load code objects.
2098
2099
Convert the bytes-like object to a value.
2100
2101
If no valid value is found, raise EOFError, ValueError or TypeError.  Extra
2102
bytes in the input are ignored.
2103
[clinic start generated code]*/
2104
2105
static PyObject *
2106
marshal_loads_impl(PyObject *module, Py_buffer *bytes, int allow_code)
2107
/*[clinic end generated code: output=62c0c538d3edc31f input=14de68965b45aaa7]*/
2108
6.12k
{
2109
6.12k
    RFILE rf;
2110
6.12k
    char *s = bytes->buf;
2111
6.12k
    Py_ssize_t n = bytes->len;
2112
6.12k
    PyObject* result;
2113
6.12k
    rf.allow_code = allow_code;
2114
6.12k
    rf.fp = NULL;
2115
6.12k
    rf.readable = NULL;
2116
6.12k
    rf.ptr = s;
2117
6.12k
    rf.end = s + n;
2118
6.12k
    rf.depth = 0;
2119
6.12k
    if ((rf.refs = PyList_New(0)) == NULL)
2120
0
        return NULL;
2121
6.12k
    result = read_object(&rf);
2122
6.12k
    Py_DECREF(rf.refs);
2123
6.12k
    return result;
2124
6.12k
}
2125
2126
static PyMethodDef marshal_methods[] = {
2127
    MARSHAL_DUMP_METHODDEF
2128
    MARSHAL_LOAD_METHODDEF
2129
    MARSHAL_DUMPS_METHODDEF
2130
    MARSHAL_LOADS_METHODDEF
2131
    {NULL,              NULL}           /* sentinel */
2132
};
2133
2134
2135
PyDoc_STRVAR(module_doc,
2136
"This module contains functions that can read and write Python values in\n\
2137
a binary format. The format is specific to Python, but independent of\n\
2138
machine architecture issues.\n\
2139
\n\
2140
Not all Python object types are supported; in general, only objects\n\
2141
whose value is independent from a particular invocation of Python can be\n\
2142
written and read by this module. The following types are supported:\n\
2143
None, integers, floating-point numbers, strings, bytes, bytearrays,\n\
2144
tuples, lists, sets, dictionaries, and code objects, where it\n\
2145
should be understood that tuples, lists and dictionaries are only\n\
2146
supported as long as the values contained therein are themselves\n\
2147
supported; and recursive lists and dictionaries should not be written\n\
2148
(they will cause infinite loops).\n\
2149
\n\
2150
Variables:\n\
2151
\n\
2152
version -- indicates the format that the module uses. Version 0 is the\n\
2153
    historical format, version 1 shares interned strings and version 2\n\
2154
    uses a binary format for floating-point numbers.\n\
2155
    Version 3 shares common object references (New in version 3.4).\n\
2156
\n\
2157
Functions:\n\
2158
\n\
2159
dump() -- write value to a file\n\
2160
load() -- read value from a file\n\
2161
dumps() -- marshal value as a bytes object\n\
2162
loads() -- read value from a bytes-like object");
2163
2164
2165
static int
2166
marshal_module_exec(PyObject *mod)
2167
37
{
2168
37
    if (PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION) < 0) {
2169
0
        return -1;
2170
0
    }
2171
37
    return 0;
2172
37
}
2173
2174
static PyModuleDef_Slot marshalmodule_slots[] = {
2175
     _Py_ABI_SLOT,
2176
    {Py_mod_exec, marshal_module_exec},
2177
    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
2178
    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
2179
    {0, NULL}
2180
};
2181
2182
static struct PyModuleDef marshalmodule = {
2183
    PyModuleDef_HEAD_INIT,
2184
    .m_name = "marshal",
2185
    .m_doc = module_doc,
2186
    .m_methods = marshal_methods,
2187
    .m_slots = marshalmodule_slots,
2188
};
2189
2190
PyMODINIT_FUNC
2191
PyMarshal_Init(void)
2192
37
{
2193
37
    return PyModuleDef_Init(&marshalmodule);
2194
37
}