Coverage Report

Created: 2026-04-20 06:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/marshal.c
Line
Count
Source
1
2
/* Write Python objects to files and read them back.
3
   This is primarily intended for writing and reading compiled Python code,
4
   even though dicts, lists, sets and frozensets, not commonly seen in
5
   code objects, are supported.
6
   Version 3 of this protocol properly supports circular links
7
   and sharing. */
8
9
#include "Python.h"
10
#include "pycore_call.h"             // _PyObject_CallNoArgs()
11
#include "pycore_code.h"             // _PyCode_New()
12
#include "pycore_hashtable.h"        // _Py_hashtable_t
13
#include "pycore_long.h"             // _PyLong_IsZero()
14
#include "pycore_object.h"           // _PyObject_IsUniquelyReferenced
15
#include "pycore_pystate.h"          // _PyInterpreterState_GET()
16
#include "pycore_setobject.h"        // _PySet_NextEntryRef()
17
#include "pycore_tuple.h"            // _PyTuple_FromPairSteal
18
#include "pycore_unicodeobject.h"    // _PyUnicode_InternImmortal()
19
20
#include "marshal.h"                 // Py_MARSHAL_VERSION
21
22
#ifdef __APPLE__
23
#  include "TargetConditionals.h"
24
#endif /* __APPLE__ */
25
26
27
/*[clinic input]
28
module marshal
29
[clinic start generated code]*/
30
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
31
32
#include "clinic/marshal.c.h"
33
34
/* High water mark to determine when the marshalled object is dangerously deep
35
 * and risks coring the interpreter.  When the object stack gets this deep,
36
 * raise an exception instead of continuing.
37
 * On Windows debug builds, reduce this value.
38
 *
39
 * BUG: https://bugs.python.org/issue33720
40
 * On Windows PGO builds, the r_object function overallocates its stack and
41
 * can cause a stack overflow. We reduce the maximum depth for all Windows
42
 * releases to protect against this.
43
 * #if defined(MS_WINDOWS) && defined(Py_DEBUG)
44
 */
45
#if defined(MS_WINDOWS)
46
#  define MAX_MARSHAL_STACK_DEPTH 1000
47
#elif defined(__wasi__)
48
#  define MAX_MARSHAL_STACK_DEPTH 1500
49
// TARGET_OS_IPHONE covers any non-macOS Apple platform.
50
// It won't be defined on older macOS SDKs
51
#elif defined(__APPLE__) && defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
52
#  define MAX_MARSHAL_STACK_DEPTH 1500
53
#else
54
4.53M
#  define MAX_MARSHAL_STACK_DEPTH 2000
55
#endif
56
57
/* Supported types */
58
0
#define TYPE_NULL               '0'
59
94.9k
#define TYPE_NONE               'N'
60
12.3k
#define TYPE_FALSE              'F'
61
11.5k
#define TYPE_TRUE               'T'
62
0
#define TYPE_STOPITER           'S'
63
412
#define TYPE_ELLIPSIS           '.'
64
631
#define TYPE_BINARY_FLOAT       'g'  // Version 0 uses TYPE_FLOAT instead.
65
3
#define TYPE_BINARY_COMPLEX     'y'  // Version 0 uses TYPE_COMPLEX instead.
66
434
#define TYPE_LONG               'l'  // See also TYPE_INT.
67
514k
#define TYPE_STRING             's'  // Bytes. (Name comes from Python 2.)
68
74
#define TYPE_TUPLE              '('  // See also TYPE_SMALL_TUPLE.
69
0
#define TYPE_LIST               '['
70
0
#define TYPE_DICT               '{'
71
0
#define TYPE_FROZENDICT         '}'
72
170k
#define TYPE_CODE               'c'
73
3.56k
#define TYPE_UNICODE            'u'
74
#define TYPE_UNKNOWN            '?'
75
// added in version 2:
76
1.51k
#define TYPE_SET                '<'
77
505
#define TYPE_FROZENSET          '>'
78
// added in version 5:
79
3.33k
#define TYPE_SLICE              ':'
80
// Remember to update the version and documentation when adding new types.
81
82
/* Special cases for unicode strings (added in version 4) */
83
206
#define TYPE_INTERNED           't' // Version 1+
84
25.8k
#define TYPE_ASCII              'a'
85
0
#define TYPE_ASCII_INTERNED     'A'
86
1.26M
#define TYPE_SHORT_ASCII        'z'
87
1.10M
#define TYPE_SHORT_ASCII_INTERNED 'Z'
88
89
/* Special cases for small objects */
90
24.9k
#define TYPE_INT                'i'  // All versions. 32-bit encoding.
91
458k
#define TYPE_SMALL_TUPLE        ')'  // Version 4+
92
93
/* Supported for backwards compatibility */
94
0
#define TYPE_COMPLEX            'x'  // Generated for version 0 only.
95
0
#define TYPE_FLOAT              'f'  // Generated for version 0 only.
96
0
#define TYPE_INT64              'I'  // Not generated any more.
97
98
/* References (added in version 3) */
99
1.87M
#define TYPE_REF                'r'
100
8.93M
#define FLAG_REF                '\x80' /* with a type, add obj to index */
101
102
103
// Error codes:
104
78.6k
#define WFERR_OK 0
105
22
#define WFERR_UNMARSHALLABLE 1
106
0
#define WFERR_NESTEDTOODEEP 2
107
11
#define WFERR_NOMEMORY 3
108
0
#define WFERR_CODE_NOT_ALLOWED 4
109
110
typedef struct {
111
    FILE *fp;
112
    int error;  /* see WFERR_* values */
113
    int depth;
114
    PyObject *str;
115
    char *ptr;
116
    const char *end;
117
    char *buf;
118
    _Py_hashtable_t *hashtable;
119
    int version;
120
    int allow_code;
121
} WFILE;
122
123
373k
#define w_byte(c, p) do {                               \
124
373k
        if ((p)->ptr != (p)->end || w_reserve((p), 1))  \
125
373k
            *(p)->ptr++ = (c);                          \
126
373k
    } while(0)
127
128
static void
129
w_flush(WFILE *p)
130
0
{
131
0
    assert(p->fp != NULL);
132
0
    fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
133
0
    p->ptr = p->buf;
134
0
}
135
136
static int
137
w_reserve(WFILE *p, Py_ssize_t needed)
138
533
{
139
533
    Py_ssize_t pos, size, delta;
140
533
    if (p->ptr == NULL)
141
0
        return 0; /* An error already occurred */
142
533
    if (p->fp != NULL) {
143
0
        w_flush(p);
144
0
        return needed <= p->end - p->ptr;
145
0
    }
146
533
    assert(p->str != NULL);
147
533
    pos = p->ptr - p->buf;
148
533
    size = PyBytes_GET_SIZE(p->str);
149
533
    if (size > 16*1024*1024)
150
0
        delta = (size >> 3);            /* 12.5% overallocation */
151
533
    else
152
533
        delta = size + 1024;
153
533
    delta = Py_MAX(delta, needed);
154
533
    if (delta > PY_SSIZE_T_MAX - size) {
155
0
        p->error = WFERR_NOMEMORY;
156
0
        return 0;
157
0
    }
158
533
    size += delta;
159
533
    if (_PyBytes_Resize(&p->str, size) != 0) {
160
0
        p->end = p->ptr = p->buf = NULL;
161
0
        return 0;
162
0
    }
163
533
    else {
164
533
        p->buf = PyBytes_AS_STRING(p->str);
165
533
        p->ptr = p->buf + pos;
166
533
        p->end = p->buf + size;
167
533
        return 1;
168
533
    }
169
533
}
170
171
static void
172
w_string(const void *s, Py_ssize_t n, WFILE *p)
173
26.0k
{
174
26.0k
    Py_ssize_t m;
175
26.0k
    if (!n || p->ptr == NULL)
176
207
        return;
177
25.8k
    m = p->end - p->ptr;
178
25.8k
    if (p->fp != NULL) {
179
0
        if (n <= m) {
180
0
            memcpy(p->ptr, s, n);
181
0
            p->ptr += n;
182
0
        }
183
0
        else {
184
0
            w_flush(p);
185
0
            fwrite(s, 1, n, p->fp);
186
0
        }
187
0
    }
188
25.8k
    else {
189
25.8k
        if (n <= m || w_reserve(p, n - m)) {
190
25.8k
            memcpy(p->ptr, s, n);
191
25.8k
            p->ptr += n;
192
25.8k
        }
193
25.8k
    }
194
25.8k
}
195
196
static void
197
w_short(int x, WFILE *p)
198
18
{
199
18
    w_byte((char)( x      & 0xff), p);
200
18
    w_byte((char)((x>> 8) & 0xff), p);
201
18
}
202
203
static void
204
w_long(long x, WFILE *p)
205
67.8k
{
206
67.8k
    w_byte((char)( x      & 0xff), p);
207
67.8k
    w_byte((char)((x>> 8) & 0xff), p);
208
67.8k
    w_byte((char)((x>>16) & 0xff), p);
209
67.8k
    w_byte((char)((x>>24) & 0xff), p);
210
67.8k
}
211
212
555k
#define SIZE32_MAX  0x7FFFFFFF
213
214
#if SIZEOF_SIZE_T > 4
215
9.80k
# define W_SIZE(n, p)  do {                     \
216
9.80k
        if ((n) > SIZE32_MAX) {                 \
217
0
            (p)->depth--;                       \
218
0
            (p)->error = WFERR_UNMARSHALLABLE;  \
219
0
            return;                             \
220
0
        }                                       \
221
9.80k
        w_long((long)(n), p);                   \
222
9.80k
    } while(0)
223
#else
224
# define W_SIZE  w_long
225
#endif
226
227
static void
228
w_pstring(const void *s, Py_ssize_t n, WFILE *p)
229
9.79k
{
230
9.79k
        W_SIZE(n, p);
231
9.79k
        w_string(s, n, p);
232
9.79k
}
233
234
static void
235
w_short_pstring(const void *s, Py_ssize_t n, WFILE *p)
236
16.2k
{
237
16.2k
    w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
238
16.2k
    w_string(s, n, p);
239
16.2k
}
240
241
/* We assume that Python ints are stored internally in base some power of
242
   2**15; for the sake of portability we'll always read and write them in base
243
   exactly 2**15. */
244
245
3.81k
#define PyLong_MARSHAL_SHIFT 15
246
1.68k
#define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
247
18
#define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
248
249
39.1k
#define W_TYPE(t, p) do { \
250
39.1k
    w_byte((t) | flag, (p)); \
251
39.1k
} while(0)
252
253
static PyObject *
254
_PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code);
255
256
#define _r_digits(bitsize)                                                \
257
static void                                                               \
258
_r_digits##bitsize(const uint ## bitsize ## _t *digits, Py_ssize_t n,     \
259
2
                   uint8_t negative, Py_ssize_t marshal_ratio, WFILE *p)  \
260
2
{                                                                         \
261
2
    /* set l to number of base PyLong_MARSHAL_BASE digits */              \
262
2
    Py_ssize_t l = (n - 1)*marshal_ratio;                                 \
263
2
    uint ## bitsize ## _t d = digits[n - 1];                              \
264
2
                                                                          \
265
2
    assert(marshal_ratio > 0);                                            \
266
2
    assert(n >= 1);                                                       \
267
2
    assert(d != 0); /* a PyLong is always normalized */                   \
268
2
    do {                                                                  \
269
2
        d >>= PyLong_MARSHAL_SHIFT;                                       \
270
2
        l++;                                                              \
271
2
    } while (d != 0);                                                     \
272
2
    if (l > SIZE32_MAX) {                                                 \
273
0
        p->depth--;                                                       \
274
0
        p->error = WFERR_UNMARSHALLABLE;                                  \
275
0
        return;                                                           \
276
0
    }                                                                     \
277
2
    w_long((long)(negative ? -l : l), p);                                 \
278
2
                                                                          \
279
6
    for (Py_ssize_t i = 0; i < n - 1; i++) {                              \
280
4
        d = digits[i];                                                    \
281
12
        for (Py_ssize_t j = 0; j < marshal_ratio; j++) {                  \
282
8
            w_short(d & PyLong_MARSHAL_MASK, p);                          \
283
8
            d >>= PyLong_MARSHAL_SHIFT;                                   \
284
8
        }                                                                 \
285
4
        assert(d == 0);                                                   \
286
4
    }                                                                     \
287
2
    d = digits[n - 1];                                                    \
288
2
    do {                                                                  \
289
2
        w_short(d & PyLong_MARSHAL_MASK, p);                              \
290
2
        d >>= PyLong_MARSHAL_SHIFT;                                       \
291
2
    } while (d != 0);                                                     \
292
2
}
293
0
_r_digits(16)
294
2
_r_digits(32)
295
#undef _r_digits
296
297
static void
298
w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
299
4
{
300
4
    W_TYPE(TYPE_LONG, p);
301
4
    if (_PyLong_IsZero(ob)) {
302
0
        w_long((long)0, p);
303
0
        return;
304
0
    }
305
306
4
    PyLongExport long_export;
307
308
4
    if (PyLong_Export((PyObject *)ob, &long_export) < 0) {
309
0
        p->depth--;
310
0
        p->error = WFERR_UNMARSHALLABLE;
311
0
        return;
312
0
    }
313
4
    if (!long_export.digits) {
314
2
        int8_t sign = long_export.value < 0 ? -1 : 1;
315
2
        uint64_t abs_value = _Py_ABS_CAST(uint64_t, long_export.value);
316
2
        uint64_t d = abs_value;
317
2
        long l = 0;
318
319
        /* set l to number of base PyLong_MARSHAL_BASE digits */
320
8
        do {
321
8
            d >>= PyLong_MARSHAL_SHIFT;
322
8
            l += sign;
323
8
        } while (d);
324
2
        w_long(l, p);
325
326
2
        d = abs_value;
327
8
        do {
328
8
            w_short(d & PyLong_MARSHAL_MASK, p);
329
8
            d >>= PyLong_MARSHAL_SHIFT;
330
8
        } while (d);
331
2
        return;
332
2
    }
333
334
2
    const PyLongLayout *layout = PyLong_GetNativeLayout();
335
2
    Py_ssize_t marshal_ratio = layout->bits_per_digit/PyLong_MARSHAL_SHIFT;
336
337
    /* must be a multiple of PyLong_MARSHAL_SHIFT */
338
2
    assert(layout->bits_per_digit % PyLong_MARSHAL_SHIFT == 0);
339
2
    assert(layout->bits_per_digit >= PyLong_MARSHAL_SHIFT);
340
341
    /* other assumptions on PyLongObject internals */
342
2
    assert(layout->bits_per_digit <= 32);
343
2
    assert(layout->digits_order == -1);
344
2
    assert(layout->digit_endianness == (PY_LITTLE_ENDIAN ? -1 : 1));
345
2
    assert(layout->digit_size == 2 || layout->digit_size == 4);
346
347
2
    if (layout->digit_size == 4) {
348
2
        _r_digits32(long_export.digits, long_export.ndigits,
349
2
                    long_export.negative, marshal_ratio, p);
350
2
    }
351
0
    else {
352
0
        _r_digits16(long_export.digits, long_export.ndigits,
353
0
                    long_export.negative, marshal_ratio, p);
354
0
    }
355
2
    PyLong_FreeExport(&long_export);
356
2
}
357
358
static void
359
w_float_bin(double v, WFILE *p)
360
15
{
361
15
    char buf[8];
362
15
    if (PyFloat_Pack8(v, buf, 1) < 0) {
363
0
        p->error = WFERR_UNMARSHALLABLE;
364
0
        return;
365
0
    }
366
15
    w_string(buf, 8, p);
367
15
}
368
369
static void
370
w_float_str(double v, WFILE *p)
371
0
{
372
0
    char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
373
0
    if (!buf) {
374
0
        p->error = WFERR_NOMEMORY;
375
0
        return;
376
0
    }
377
0
    w_short_pstring(buf, strlen(buf), p);
378
0
    PyMem_Free(buf);
379
0
}
380
381
static int
382
w_ref(PyObject *v, char *flag, WFILE *p)
383
75.5k
{
384
75.5k
    _Py_hashtable_entry_t *entry;
385
386
75.5k
    if (p->version < 3 || p->hashtable == NULL)
387
0
        return 0; /* not writing object references */
388
389
    /* If it has only one reference, it definitely isn't shared.
390
     * But we use TYPE_REF always for interned string, to PYC file stable
391
     * as possible.
392
     */
393
75.5k
    if (_PyObject_IsUniquelyReferenced(v) &&
394
20.1k
            !(PyUnicode_CheckExact(v) && PyUnicode_CHECK_INTERNED(v))) {
395
18.5k
        return 0;
396
18.5k
    }
397
398
56.9k
    entry = _Py_hashtable_get_entry(p->hashtable, v);
399
56.9k
    if (entry != NULL) {
400
        /* write the reference index to the stream */
401
36.3k
        uintptr_t w = (uintptr_t)entry->value;
402
36.3k
        if (w & 0x80000000LU) {
403
0
            PyErr_Format(PyExc_ValueError, "cannot marshal recursion %T objects", v);
404
0
            goto err;
405
0
        }
406
        /* we don't store "long" indices in the dict */
407
36.3k
        assert(w <= 0x7fffffff);
408
36.3k
        w_byte(TYPE_REF, p);
409
36.3k
        w_long((int)w, p);
410
36.3k
        return 1;
411
36.3k
    } else {
412
20.5k
        size_t w = p->hashtable->nentries;
413
        /* we don't support long indices */
414
20.5k
        if (w >= 0x7fffffff) {
415
0
            PyErr_SetString(PyExc_ValueError, "too many objects");
416
0
            goto err;
417
0
        }
418
        // Corresponding code should call w_complete() after
419
        // writing the object.
420
20.5k
        if (PyCode_Check(v) || PySlice_Check(v) || PyFrozenDict_CheckExact(v)) {
421
179
            w |= 0x80000000LU;
422
179
        }
423
20.5k
        if (_Py_hashtable_set(p->hashtable, Py_NewRef(v),
424
20.5k
                              (void *)(uintptr_t)w) < 0) {
425
0
            Py_DECREF(v);
426
0
            goto err;
427
0
        }
428
20.5k
        *flag |= FLAG_REF;
429
20.5k
        return 0;
430
20.5k
    }
431
0
err:
432
0
    p->error = WFERR_UNMARSHALLABLE;
433
0
    return 1;
434
56.9k
}
435
436
static void
437
w_complete(PyObject *v, WFILE *p)
438
3.34k
{
439
3.34k
    if (p->version < 3 || p->hashtable == NULL) {
440
0
        return;
441
0
    }
442
3.34k
    if (_PyObject_IsUniquelyReferenced(v)) {
443
3.16k
        return;
444
3.16k
    }
445
446
179
    _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(p->hashtable, v);
447
179
    if (entry == NULL) {
448
0
        return;
449
0
    }
450
179
    assert(entry != NULL);
451
179
    uintptr_t w = (uintptr_t)entry->value;
452
179
    assert(w & 0x80000000LU);
453
179
    w &= ~0x80000000LU;
454
179
    entry->value = (void *)(uintptr_t)w;
455
179
}
456
457
static void
458
w_complex_object(PyObject *v, char flag, WFILE *p);
459
460
static void
461
w_object(PyObject *v, WFILE *p)
462
78.1k
{
463
78.1k
    char flag = '\0';
464
465
78.1k
    if (p->error != WFERR_OK) {
466
0
        return;
467
0
    }
468
469
78.1k
    p->depth++;
470
471
78.1k
    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
472
0
        p->error = WFERR_NESTEDTOODEEP;
473
0
    }
474
78.1k
    else if (v == NULL) {
475
0
        w_byte(TYPE_NULL, p);
476
0
    }
477
78.1k
    else if (v == Py_None) {
478
2.03k
        w_byte(TYPE_NONE, p);
479
2.03k
    }
480
76.1k
    else if (v == PyExc_StopIteration) {
481
0
        w_byte(TYPE_STOPITER, p);
482
0
    }
483
76.1k
    else if (v == Py_Ellipsis) {
484
3
        w_byte(TYPE_ELLIPSIS, p);
485
3
    }
486
76.1k
    else if (v == Py_False) {
487
343
        w_byte(TYPE_FALSE, p);
488
343
    }
489
75.7k
    else if (v == Py_True) {
490
222
        w_byte(TYPE_TRUE, p);
491
222
    }
492
75.5k
    else if (!w_ref(v, &flag, p))
493
39.1k
        w_complex_object(v, flag, p);
494
495
78.1k
    p->depth--;
496
78.1k
}
497
498
static void
499
w_complex_object(PyObject *v, char flag, WFILE *p)
500
39.1k
{
501
39.1k
    Py_ssize_t i, n;
502
503
39.1k
    if (PyLong_CheckExact(v)) {
504
1.80k
        int overflow;
505
1.80k
        long x = PyLong_AsLongAndOverflow(v, &overflow);
506
1.80k
        if (overflow) {
507
2
            w_PyLong((PyLongObject *)v, flag, p);
508
2
        }
509
1.80k
        else {
510
1.80k
#if SIZEOF_LONG > 4
511
1.80k
            long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
512
1.80k
            if (y && y != -1) {
513
                /* Too large for TYPE_INT */
514
2
                w_PyLong((PyLongObject*)v, flag, p);
515
2
            }
516
1.80k
            else
517
1.80k
#endif
518
1.80k
            {
519
1.80k
                W_TYPE(TYPE_INT, p);
520
1.80k
                w_long(x, p);
521
1.80k
            }
522
1.80k
        }
523
1.80k
    }
524
37.3k
    else if (PyFloat_CheckExact(v)) {
525
13
        if (p->version > 1) {
526
13
            W_TYPE(TYPE_BINARY_FLOAT, p);
527
13
            w_float_bin(PyFloat_AS_DOUBLE(v), p);
528
13
        }
529
0
        else {
530
0
            W_TYPE(TYPE_FLOAT, p);
531
0
            w_float_str(PyFloat_AS_DOUBLE(v), p);
532
0
        }
533
13
    }
534
37.3k
    else if (PyComplex_CheckExact(v)) {
535
1
        if (p->version > 1) {
536
1
            W_TYPE(TYPE_BINARY_COMPLEX, p);
537
1
            w_float_bin(PyComplex_RealAsDouble(v), p);
538
1
            w_float_bin(PyComplex_ImagAsDouble(v), p);
539
1
        }
540
0
        else {
541
0
            W_TYPE(TYPE_COMPLEX, p);
542
0
            w_float_str(PyComplex_RealAsDouble(v), p);
543
0
            w_float_str(PyComplex_ImagAsDouble(v), p);
544
0
        }
545
1
    }
546
37.3k
    else if (PyBytes_CheckExact(v)) {
547
9.52k
        W_TYPE(TYPE_STRING, p);
548
9.52k
        w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
549
9.52k
    }
550
27.8k
    else if (PyUnicode_CheckExact(v)) {
551
16.4k
        if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
552
16.4k
            int is_short = PyUnicode_GET_LENGTH(v) < 256;
553
16.4k
            if (is_short) {
554
16.2k
                if (PyUnicode_CHECK_INTERNED(v))
555
14.4k
                    W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
556
1.72k
                else
557
1.72k
                    W_TYPE(TYPE_SHORT_ASCII, p);
558
16.2k
                w_short_pstring(PyUnicode_1BYTE_DATA(v),
559
16.2k
                                PyUnicode_GET_LENGTH(v), p);
560
16.2k
            }
561
203
            else {
562
203
                if (PyUnicode_CHECK_INTERNED(v))
563
0
                    W_TYPE(TYPE_ASCII_INTERNED, p);
564
203
                else
565
203
                    W_TYPE(TYPE_ASCII, p);
566
203
                w_pstring(PyUnicode_1BYTE_DATA(v),
567
203
                          PyUnicode_GET_LENGTH(v), p);
568
203
            }
569
16.4k
        }
570
68
        else {
571
68
            PyObject *utf8;
572
68
            utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
573
68
            if (utf8 == NULL) {
574
0
                p->depth--;
575
0
                p->error = WFERR_UNMARSHALLABLE;
576
0
                return;
577
0
            }
578
68
            if (p->version >= 3 &&  PyUnicode_CHECK_INTERNED(v))
579
0
                W_TYPE(TYPE_INTERNED, p);
580
68
            else
581
68
                W_TYPE(TYPE_UNICODE, p);
582
68
            w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
583
68
            Py_DECREF(utf8);
584
68
        }
585
16.4k
    }
586
11.3k
    else if (PyTuple_CheckExact(v)) {
587
7.99k
        n = PyTuple_GET_SIZE(v);
588
7.99k
        if (p->version >= 4 && n < 256) {
589
7.99k
            W_TYPE(TYPE_SMALL_TUPLE, p);
590
7.99k
            w_byte((unsigned char)n, p);
591
7.99k
        }
592
0
        else {
593
0
            W_TYPE(TYPE_TUPLE, p);
594
0
            W_SIZE(n, p);
595
0
        }
596
52.5k
        for (i = 0; i < n; i++) {
597
44.5k
            w_object(PyTuple_GET_ITEM(v, i), p);
598
44.5k
        }
599
7.99k
    }
600
3.35k
    else if (PyList_CheckExact(v)) {
601
0
        W_TYPE(TYPE_LIST, p);
602
0
        n = PyList_GET_SIZE(v);
603
0
        W_SIZE(n, p);
604
0
        for (i = 0; i < n; i++) {
605
0
            w_object(PyList_GET_ITEM(v, i), p);
606
0
        }
607
0
    }
608
3.35k
    else if (PyAnyDict_CheckExact(v)) {
609
0
        Py_ssize_t pos;
610
0
        PyObject *key, *value;
611
0
        if (PyFrozenDict_CheckExact(v)) {
612
0
            if (p->version < 6) {
613
0
                w_byte(TYPE_UNKNOWN, p);
614
0
                p->error = WFERR_UNMARSHALLABLE;
615
0
                return;
616
0
            }
617
618
0
            W_TYPE(TYPE_FROZENDICT, p);
619
0
        }
620
0
        else {
621
0
            W_TYPE(TYPE_DICT, p);
622
0
        }
623
        /* This one is NULL object terminated! */
624
0
        pos = 0;
625
0
        while (PyDict_Next(v, &pos, &key, &value)) {
626
0
            w_object(key, p);
627
0
            w_object(value, p);
628
0
        }
629
0
        w_object((PyObject *)NULL, p);
630
0
        if (PyFrozenDict_CheckExact(v)) {
631
0
            w_complete(v, p);
632
0
        }
633
0
    }
634
3.35k
    else if (PyAnySet_CheckExact(v)) {
635
11
        PyObject *value;
636
11
        Py_ssize_t pos = 0;
637
11
        Py_hash_t hash;
638
639
11
        if (PyFrozenSet_CheckExact(v))
640
11
            W_TYPE(TYPE_FROZENSET, p);
641
0
        else
642
0
            W_TYPE(TYPE_SET, p);
643
11
        n = PySet_GET_SIZE(v);
644
11
        W_SIZE(n, p);
645
        // bpo-37596: To support reproducible builds, sets and frozensets need
646
        // to have their elements serialized in a consistent order (even when
647
        // they have been scrambled by hash randomization). To ensure this, we
648
        // use an order equivalent to sorted(v, key=marshal.dumps):
649
11
        PyObject *pairs = PyList_New(n);
650
11
        if (pairs == NULL) {
651
0
            p->error = WFERR_NOMEMORY;
652
0
            return;
653
0
        }
654
11
        Py_ssize_t i = 0;
655
11
        Py_BEGIN_CRITICAL_SECTION(v);
656
76
        while (_PySet_NextEntryRef(v, &pos, &value, &hash)) {
657
65
            PyObject *dump = _PyMarshal_WriteObjectToString(value,
658
65
                                    p->version, p->allow_code);
659
65
            if (dump == NULL) {
660
0
                p->error = WFERR_UNMARSHALLABLE;
661
0
                Py_DECREF(value);
662
0
                break;
663
0
            }
664
65
            PyObject *pair = _PyTuple_FromPairSteal(dump, value);
665
65
            if (pair == NULL) {
666
0
                p->error = WFERR_NOMEMORY;
667
0
                break;
668
0
            }
669
65
            PyList_SET_ITEM(pairs, i++, pair);
670
65
        }
671
11
        Py_END_CRITICAL_SECTION();
672
11
        if (p->error == WFERR_UNMARSHALLABLE || p->error == WFERR_NOMEMORY) {
673
0
            Py_DECREF(pairs);
674
0
            return;
675
0
        }
676
11
        assert(i == n);
677
11
        if (PyList_Sort(pairs)) {
678
0
            p->error = WFERR_NOMEMORY;
679
0
            Py_DECREF(pairs);
680
0
            return;
681
0
        }
682
76
        for (Py_ssize_t i = 0; i < n; i++) {
683
65
            PyObject *pair = PyList_GET_ITEM(pairs, i);
684
65
            value = PyTuple_GET_ITEM(pair, 1);
685
65
            w_object(value, p);
686
65
        }
687
11
        Py_DECREF(pairs);
688
11
    }
689
3.34k
    else if (PyCode_Check(v)) {
690
3.31k
        if (!p->allow_code) {
691
0
            p->error = WFERR_CODE_NOT_ALLOWED;
692
0
            return;
693
0
        }
694
3.31k
        PyCodeObject *co = (PyCodeObject *)v;
695
3.31k
        PyObject *co_code = _PyCode_GetCode(co);
696
3.31k
        if (co_code == NULL) {
697
0
            p->error = WFERR_NOMEMORY;
698
0
            return;
699
0
        }
700
3.31k
        W_TYPE(TYPE_CODE, p);
701
3.31k
        w_long(co->co_argcount, p);
702
3.31k
        w_long(co->co_posonlyargcount, p);
703
3.31k
        w_long(co->co_kwonlyargcount, p);
704
3.31k
        w_long(co->co_stacksize, p);
705
3.31k
        w_long(co->co_flags, p);
706
3.31k
        w_object(co_code, p);
707
3.31k
        w_object(co->co_consts, p);
708
3.31k
        w_object(co->co_names, p);
709
3.31k
        w_object(co->co_localsplusnames, p);
710
3.31k
        w_object(co->co_localspluskinds, p);
711
3.31k
        w_object(co->co_filename, p);
712
3.31k
        w_object(co->co_name, p);
713
3.31k
        w_object(co->co_qualname, p);
714
3.31k
        w_long(co->co_firstlineno, p);
715
3.31k
        w_object(co->co_linetable, p);
716
3.31k
        w_object(co->co_exceptiontable, p);
717
3.31k
        Py_DECREF(co_code);
718
3.31k
        w_complete(v, p);
719
3.31k
    }
720
25
    else if (PyObject_CheckBuffer(v)) {
721
        /* Write unknown bytes-like objects as a bytes object */
722
0
        Py_buffer view;
723
0
        if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
724
0
            w_byte(TYPE_UNKNOWN, p);
725
0
            p->depth--;
726
0
            p->error = WFERR_UNMARSHALLABLE;
727
0
            return;
728
0
        }
729
0
        W_TYPE(TYPE_STRING, p);
730
0
        w_pstring(view.buf, view.len, p);
731
0
        PyBuffer_Release(&view);
732
0
    }
733
25
    else if (PySlice_Check(v)) {
734
25
        if (p->version < 5) {
735
0
            w_byte(TYPE_UNKNOWN, p);
736
0
            p->error = WFERR_UNMARSHALLABLE;
737
0
            return;
738
0
        }
739
25
        PySliceObject *slice = (PySliceObject *)v;
740
25
        W_TYPE(TYPE_SLICE, p);
741
25
        w_object(slice->start, p);
742
25
        w_object(slice->stop, p);
743
25
        w_object(slice->step, p);
744
25
        w_complete(v, p);
745
25
    }
746
0
    else {
747
0
        W_TYPE(TYPE_UNKNOWN, p);
748
0
        p->error = WFERR_UNMARSHALLABLE;
749
0
    }
750
39.1k
}
751
752
static void
753
w_decref_entry(void *key)
754
20.5k
{
755
20.5k
    PyObject *entry_key = (PyObject *)key;
756
20.5k
    Py_XDECREF(entry_key);
757
20.5k
}
758
759
static int
760
w_init_refs(WFILE *wf, int version)
761
232
{
762
232
    if (version >= 3) {
763
232
        wf->hashtable = _Py_hashtable_new_full(_Py_hashtable_hash_ptr,
764
232
                                               _Py_hashtable_compare_direct,
765
232
                                               w_decref_entry, NULL, NULL);
766
232
        if (wf->hashtable == NULL) {
767
0
            PyErr_NoMemory();
768
0
            return -1;
769
0
        }
770
232
    }
771
232
    return 0;
772
232
}
773
774
static void
775
w_clear_refs(WFILE *wf)
776
232
{
777
232
    if (wf->hashtable != NULL) {
778
232
        _Py_hashtable_destroy(wf->hashtable);
779
232
    }
780
232
}
781
782
/* version currently has no effect for writing ints. */
783
/* Note that while the documentation states that this function
784
 * can error, currently it never does. Setting an exception in
785
 * this function should be regarded as an API-breaking change.
786
 */
787
void
788
PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
789
0
{
790
0
    char buf[4];
791
0
    WFILE wf;
792
0
    memset(&wf, 0, sizeof(wf));
793
0
    wf.fp = fp;
794
0
    wf.ptr = wf.buf = buf;
795
0
    wf.end = wf.ptr + sizeof(buf);
796
0
    wf.error = WFERR_OK;
797
0
    wf.version = version;
798
0
    w_long(x, &wf);
799
0
    w_flush(&wf);
800
0
}
801
802
void
803
PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
804
0
{
805
0
    char buf[BUFSIZ];
806
0
    WFILE wf;
807
0
    if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
808
0
        return; /* caller must check PyErr_Occurred() */
809
0
    }
810
0
    memset(&wf, 0, sizeof(wf));
811
0
    wf.fp = fp;
812
0
    wf.ptr = wf.buf = buf;
813
0
    wf.end = wf.ptr + sizeof(buf);
814
0
    wf.error = WFERR_OK;
815
0
    wf.version = version;
816
0
    wf.allow_code = 1;
817
0
    if (w_init_refs(&wf, version)) {
818
0
        return; /* caller must check PyErr_Occurred() */
819
0
    }
820
0
    w_object(x, &wf);
821
0
    w_clear_refs(&wf);
822
0
    w_flush(&wf);
823
0
}
824
825
typedef struct {
826
    FILE *fp;
827
    int depth;
828
    PyObject *readable;  /* Stream-like object being read from */
829
    const char *ptr;
830
    const char *end;
831
    char *buf;
832
    Py_ssize_t buf_size;
833
    PyObject *refs;  /* a list */
834
    int allow_code;
835
} RFILE;
836
837
static const char *
838
r_string(Py_ssize_t n, RFILE *p)
839
5.27M
{
840
5.27M
    Py_ssize_t read = -1;
841
842
5.27M
    if (p->ptr != NULL) {
843
        /* Fast path for loads() */
844
5.27M
        const char *res = p->ptr;
845
5.27M
        Py_ssize_t left = p->end - p->ptr;
846
5.27M
        if (left < n) {
847
0
            PyErr_SetString(PyExc_EOFError,
848
0
                            "marshal data too short");
849
0
            return NULL;
850
0
        }
851
5.27M
        p->ptr += n;
852
5.27M
        return res;
853
5.27M
    }
854
0
    if (p->buf == NULL) {
855
0
        p->buf = PyMem_Malloc(n);
856
0
        if (p->buf == NULL) {
857
0
            PyErr_NoMemory();
858
0
            return NULL;
859
0
        }
860
0
        p->buf_size = n;
861
0
    }
862
0
    else if (p->buf_size < n) {
863
0
        char *tmp = PyMem_Realloc(p->buf, n);
864
0
        if (tmp == NULL) {
865
0
            PyErr_NoMemory();
866
0
            return NULL;
867
0
        }
868
0
        p->buf = tmp;
869
0
        p->buf_size = n;
870
0
    }
871
872
0
    if (!p->readable) {
873
0
        assert(p->fp != NULL);
874
0
        read = fread(p->buf, 1, n, p->fp);
875
0
    }
876
0
    else {
877
0
        PyObject *res, *mview;
878
0
        Py_buffer buf;
879
880
0
        if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
881
0
            return NULL;
882
0
        mview = PyMemoryView_FromBuffer(&buf);
883
0
        if (mview == NULL)
884
0
            return NULL;
885
886
0
        res = _PyObject_CallMethod(p->readable, &_Py_ID(readinto), "N", mview);
887
0
        if (res != NULL) {
888
0
            read = PyNumber_AsSsize_t(res, PyExc_ValueError);
889
0
            Py_DECREF(res);
890
0
        }
891
0
    }
892
0
    if (read != n) {
893
0
        if (!PyErr_Occurred()) {
894
0
            if (read > n)
895
0
                PyErr_Format(PyExc_ValueError,
896
0
                             "read() returned too much data: "
897
0
                             "%zd bytes requested, %zd returned",
898
0
                             n, read);
899
0
            else
900
0
                PyErr_SetString(PyExc_EOFError,
901
0
                                "EOF read where not expected");
902
0
        }
903
0
        return NULL;
904
0
    }
905
0
    return p->buf;
906
0
}
907
908
static int
909
r_byte(RFILE *p)
910
6.17M
{
911
6.17M
    if (p->ptr != NULL) {
912
6.17M
        if (p->ptr < p->end) {
913
6.17M
            return (unsigned char) *p->ptr++;
914
6.17M
        }
915
6.17M
    }
916
0
    else if (!p->readable) {
917
0
        assert(p->fp);
918
0
        int c = getc(p->fp);
919
0
        if (c != EOF) {
920
0
            return c;
921
0
        }
922
0
    }
923
0
    else {
924
0
        const char *ptr = r_string(1, p);
925
0
        if (ptr != NULL) {
926
0
            return *(const unsigned char *) ptr;
927
0
        }
928
0
        return EOF;
929
0
    }
930
0
    PyErr_SetString(PyExc_EOFError,
931
0
                    "EOF read where not expected");
932
0
    return EOF;
933
6.17M
}
934
935
static int
936
r_short(RFILE *p)
937
1.66k
{
938
1.66k
    short x = -1;
939
1.66k
    const unsigned char *buffer;
940
941
1.66k
    buffer = (const unsigned char *) r_string(2, p);
942
1.66k
    if (buffer != NULL) {
943
1.66k
        x = buffer[0];
944
1.66k
        x |= buffer[1] << 8;
945
        /* Sign-extension, in case short greater than 16 bits */
946
1.66k
        x |= -(x & 0x8000);
947
1.66k
    }
948
1.66k
    return x;
949
1.66k
}
950
951
static long
952
r_long(RFILE *p)
953
3.46M
{
954
3.46M
    long x = -1;
955
3.46M
    const unsigned char *buffer;
956
957
3.46M
    buffer = (const unsigned char *) r_string(4, p);
958
3.46M
    if (buffer != NULL) {
959
3.46M
        x = buffer[0];
960
3.46M
        x |= (long)buffer[1] << 8;
961
3.46M
        x |= (long)buffer[2] << 16;
962
3.46M
        x |= (long)buffer[3] << 24;
963
3.46M
#if SIZEOF_LONG > 4
964
        /* Sign extension for 64-bit machines */
965
3.46M
        x |= -(x & 0x80000000L);
966
3.46M
#endif
967
3.46M
    }
968
3.46M
    return x;
969
3.46M
}
970
971
/* r_long64 deals with the TYPE_INT64 code. */
972
static PyObject *
973
r_long64(RFILE *p)
974
0
{
975
0
    const unsigned char *buffer = (const unsigned char *) r_string(8, p);
976
0
    if (buffer == NULL) {
977
0
        return NULL;
978
0
    }
979
0
    return _PyLong_FromByteArray(buffer, 8,
980
0
                                 1 /* little endian */,
981
0
                                 1 /* signed */);
982
0
}
983
984
#define _w_digits(bitsize)                                              \
985
static int                                                              \
986
_w_digits##bitsize(uint ## bitsize ## _t *digits, Py_ssize_t size,      \
987
                   Py_ssize_t marshal_ratio,                            \
988
434
                   int shorts_in_top_digit, RFILE *p)                   \
989
434
{                                                                       \
990
434
    uint ## bitsize ## _t d;                                            \
991
434
                                                                        \
992
434
    assert(size >= 1);                                                  \
993
1.01k
    for (Py_ssize_t i = 0; i < size - 1; i++) {                         \
994
578
        d = 0;                                                          \
995
1.73k
        for (Py_ssize_t j = 0; j < marshal_ratio; j++) {                \
996
1.15k
            int md = r_short(p);                                        \
997
1.15k
            if (md < 0 || md > PyLong_MARSHAL_BASE) {                   \
998
0
                goto bad_digit;                                         \
999
0
            }                                                           \
1000
1.15k
            d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT;   \
1001
1.15k
        }                                                               \
1002
578
        digits[i] = d;                                                  \
1003
578
    }                                                                   \
1004
434
                                                                        \
1005
434
    d = 0;                                                              \
1006
944
    for (Py_ssize_t j = 0; j < shorts_in_top_digit; j++) {              \
1007
510
        int md = r_short(p);                                            \
1008
510
        if (md < 0 || md > PyLong_MARSHAL_BASE) {                       \
1009
0
            goto bad_digit;                                             \
1010
0
        }                                                               \
1011
510
        /* topmost marshal digit should be nonzero */                   \
1012
510
        if (md == 0 && j == shorts_in_top_digit - 1) {                  \
1013
0
            PyErr_SetString(PyExc_ValueError,                           \
1014
0
                "bad marshal data (unnormalized long data)");           \
1015
0
            return -1;                                                  \
1016
0
        }                                                               \
1017
510
        d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT;       \
1018
510
    }                                                                   \
1019
434
    assert(!PyErr_Occurred());                                          \
1020
434
    /* top digit should be nonzero, else the resulting PyLong won't be  \
1021
434
       normalized */                                                    \
1022
434
    digits[size - 1] = d;                                               \
1023
434
    return 0;                                                           \
1024
434
                                                                        \
1025
0
bad_digit:                                                              \
1026
0
    if (!PyErr_Occurred()) {                                            \
1027
0
        PyErr_SetString(PyExc_ValueError,                               \
1028
0
            "bad marshal data (digit out of range in long)");           \
1029
0
    }                                                                   \
1030
0
    return -1;                                                          \
1031
434
}
1032
434
_w_digits(32)
1033
0
_w_digits(16)
1034
#undef _w_digits
1035
1036
static PyObject *
1037
r_PyLong(RFILE *p)
1038
434
{
1039
434
    long n = r_long(p);
1040
434
    if (n == -1 && PyErr_Occurred()) {
1041
0
        return NULL;
1042
0
    }
1043
434
    if (n < -SIZE32_MAX || n > SIZE32_MAX) {
1044
0
        PyErr_SetString(PyExc_ValueError,
1045
0
                       "bad marshal data (long size out of range)");
1046
0
        return NULL;
1047
0
    }
1048
1049
434
    const PyLongLayout *layout = PyLong_GetNativeLayout();
1050
434
    Py_ssize_t marshal_ratio = layout->bits_per_digit/PyLong_MARSHAL_SHIFT;
1051
1052
    /* must be a multiple of PyLong_MARSHAL_SHIFT */
1053
434
    assert(layout->bits_per_digit % PyLong_MARSHAL_SHIFT == 0);
1054
434
    assert(layout->bits_per_digit >= PyLong_MARSHAL_SHIFT);
1055
1056
    /* other assumptions on PyLongObject internals */
1057
434
    assert(layout->bits_per_digit <= 32);
1058
434
    assert(layout->digits_order == -1);
1059
434
    assert(layout->digit_endianness == (PY_LITTLE_ENDIAN ? -1 : 1));
1060
434
    assert(layout->digit_size == 2 || layout->digit_size == 4);
1061
1062
434
    Py_ssize_t size = 1 + (Py_ABS(n) - 1) / marshal_ratio;
1063
1064
434
    assert(size >= 1);
1065
1066
434
    int shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % marshal_ratio;
1067
434
    void *digits;
1068
434
    PyLongWriter *writer = PyLongWriter_Create(n < 0, size, &digits);
1069
1070
434
    if (writer == NULL) {
1071
0
        return NULL;
1072
0
    }
1073
1074
434
    int ret;
1075
1076
434
    if (layout->digit_size == 4) {
1077
434
        ret = _w_digits32(digits, size, marshal_ratio, shorts_in_top_digit, p);
1078
434
    }
1079
0
    else {
1080
0
        ret = _w_digits16(digits, size, marshal_ratio, shorts_in_top_digit, p);
1081
0
    }
1082
434
    if (ret < 0) {
1083
0
        PyLongWriter_Discard(writer);
1084
0
        return NULL;
1085
0
    }
1086
434
    return PyLongWriter_Finish(writer);
1087
434
}
1088
1089
static double
1090
r_float_bin(RFILE *p)
1091
637
{
1092
637
    const char *buf = r_string(8, p);
1093
637
    if (buf == NULL)
1094
0
        return -1;
1095
637
    return PyFloat_Unpack8(buf, 1);
1096
637
}
1097
1098
/* Issue #33720: Disable inlining for reducing the C stack consumption
1099
   on PGO builds. */
1100
Py_NO_INLINE static double
1101
r_float_str(RFILE *p)
1102
0
{
1103
0
    int n;
1104
0
    char buf[256];
1105
0
    const char *ptr;
1106
0
    n = r_byte(p);
1107
0
    if (n == EOF) {
1108
0
        return -1;
1109
0
    }
1110
0
    ptr = r_string(n, p);
1111
0
    if (ptr == NULL) {
1112
0
        return -1;
1113
0
    }
1114
0
    memcpy(buf, ptr, n);
1115
0
    buf[n] = '\0';
1116
0
    return PyOS_string_to_double(buf, NULL, NULL);
1117
0
}
1118
1119
/* allocate the reflist index for a new object. Return -1 on failure */
1120
static Py_ssize_t
1121
r_ref_reserve(int flag, RFILE *p)
1122
174k
{
1123
174k
    if (flag) { /* currently only FLAG_REF is defined */
1124
6.26k
        Py_ssize_t idx = PyList_GET_SIZE(p->refs);
1125
6.26k
        if (idx >= 0x7ffffffe) {
1126
0
            PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
1127
0
            return -1;
1128
0
        }
1129
6.26k
        if (PyList_Append(p->refs, Py_None) < 0)
1130
0
            return -1;
1131
6.26k
        return idx;
1132
6.26k
    } else
1133
167k
        return 0;
1134
174k
}
1135
1136
/* insert the new object 'o' to the reflist at previously
1137
 * allocated index 'idx'.
1138
 * 'o' can be NULL, in which case nothing is done.
1139
 * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
1140
 * if 'o' was non-NULL, and the function fails, 'o' is released and
1141
 * NULL returned. This simplifies error checking at the call site since
1142
 * a single test for NULL for the function result is enough.
1143
 */
1144
static PyObject *
1145
r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
1146
174k
{
1147
174k
    if (o != NULL && flag) { /* currently only FLAG_REF is defined */
1148
6.26k
        PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
1149
6.26k
        PyList_SET_ITEM(p->refs, idx, Py_NewRef(o));
1150
6.26k
        Py_DECREF(tmp);
1151
6.26k
    }
1152
174k
    return o;
1153
174k
}
1154
1155
/* combination of both above, used when an object can be
1156
 * created whenever it is seen in the file, as opposed to
1157
 * after having loaded its sub-objects.
1158
 */
1159
static PyObject *
1160
r_ref(PyObject *o, int flag, RFILE *p)
1161
1.37M
{
1162
1.37M
    assert(flag & FLAG_REF);
1163
1.37M
    if (o == NULL)
1164
0
        return NULL;
1165
1.37M
    if (PyList_Append(p->refs, o) < 0) {
1166
0
        Py_DECREF(o); /* release the new object */
1167
0
        return NULL;
1168
0
    }
1169
1.37M
    return o;
1170
1.37M
}
1171
1172
static PyObject *
1173
r_object(RFILE *p)
1174
4.45M
{
1175
    /* NULL is a valid return value, it does not necessarily means that
1176
       an exception is set. */
1177
4.45M
    PyObject *v, *v2;
1178
4.45M
    Py_ssize_t idx = 0;
1179
4.45M
    long i, n;
1180
4.45M
    int type, code = r_byte(p);
1181
4.45M
    int flag, is_interned = 0;
1182
4.45M
    PyObject *retval = NULL;
1183
1184
4.45M
    if (code == EOF) {
1185
0
        if (PyErr_ExceptionMatches(PyExc_EOFError)) {
1186
0
            PyErr_SetString(PyExc_EOFError,
1187
0
                            "EOF read where object expected");
1188
0
        }
1189
0
        return NULL;
1190
0
    }
1191
1192
4.45M
    p->depth++;
1193
1194
4.45M
    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
1195
0
        p->depth--;
1196
0
        PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
1197
0
        return NULL;
1198
0
    }
1199
1200
4.45M
    flag = code & FLAG_REF;
1201
4.45M
    type = code & ~FLAG_REF;
1202
1203
4.45M
#define R_REF(O) do{\
1204
2.28M
    if (flag) \
1205
2.28M
        O = r_ref(O, flag, p);\
1206
2.28M
} while (0)
1207
1208
4.45M
    switch (type) {
1209
1210
0
    case TYPE_NULL:
1211
0
        break;
1212
1213
94.9k
    case TYPE_NONE:
1214
94.9k
        retval = Py_None;
1215
94.9k
        break;
1216
1217
0
    case TYPE_STOPITER:
1218
0
        retval = Py_NewRef(PyExc_StopIteration);
1219
0
        break;
1220
1221
412
    case TYPE_ELLIPSIS:
1222
412
        retval = Py_Ellipsis;
1223
412
        break;
1224
1225
12.3k
    case TYPE_FALSE:
1226
12.3k
        retval = Py_False;
1227
12.3k
        break;
1228
1229
11.5k
    case TYPE_TRUE:
1230
11.5k
        retval = Py_True;
1231
11.5k
        break;
1232
1233
24.9k
    case TYPE_INT:
1234
24.9k
        n = r_long(p);
1235
24.9k
        if (n == -1 && PyErr_Occurred()) {
1236
0
            break;
1237
0
        }
1238
24.9k
        retval = PyLong_FromLong(n);
1239
24.9k
        R_REF(retval);
1240
24.9k
        break;
1241
1242
0
    case TYPE_INT64:
1243
0
        retval = r_long64(p);
1244
0
        R_REF(retval);
1245
0
        break;
1246
1247
434
    case TYPE_LONG:
1248
434
        retval = r_PyLong(p);
1249
434
        R_REF(retval);
1250
434
        break;
1251
1252
0
    case TYPE_FLOAT:
1253
0
        {
1254
0
            double x = r_float_str(p);
1255
0
            if (x == -1.0 && PyErr_Occurred())
1256
0
                break;
1257
0
            retval = PyFloat_FromDouble(x);
1258
0
            R_REF(retval);
1259
0
            break;
1260
0
        }
1261
1262
631
    case TYPE_BINARY_FLOAT:
1263
631
        {
1264
631
            double x = r_float_bin(p);
1265
631
            if (x == -1.0 && PyErr_Occurred())
1266
0
                break;
1267
631
            retval = PyFloat_FromDouble(x);
1268
631
            R_REF(retval);
1269
631
            break;
1270
631
        }
1271
1272
0
    case TYPE_COMPLEX:
1273
0
        {
1274
0
            Py_complex c;
1275
0
            c.real = r_float_str(p);
1276
0
            if (c.real == -1.0 && PyErr_Occurred())
1277
0
                break;
1278
0
            c.imag = r_float_str(p);
1279
0
            if (c.imag == -1.0 && PyErr_Occurred())
1280
0
                break;
1281
0
            retval = PyComplex_FromCComplex(c);
1282
0
            R_REF(retval);
1283
0
            break;
1284
0
        }
1285
1286
3
    case TYPE_BINARY_COMPLEX:
1287
3
        {
1288
3
            Py_complex c;
1289
3
            c.real = r_float_bin(p);
1290
3
            if (c.real == -1.0 && PyErr_Occurred())
1291
0
                break;
1292
3
            c.imag = r_float_bin(p);
1293
3
            if (c.imag == -1.0 && PyErr_Occurred())
1294
0
                break;
1295
3
            retval = PyComplex_FromCComplex(c);
1296
3
            R_REF(retval);
1297
3
            break;
1298
3
        }
1299
1300
514k
    case TYPE_STRING:
1301
514k
        {
1302
514k
            const char *ptr;
1303
514k
            n = r_long(p);
1304
514k
            if (n < 0 || n > SIZE32_MAX) {
1305
0
                if (!PyErr_Occurred()) {
1306
0
                    PyErr_SetString(PyExc_ValueError,
1307
0
                        "bad marshal data (bytes object size out of range)");
1308
0
                }
1309
0
                break;
1310
0
            }
1311
514k
            v = PyBytes_FromStringAndSize((char *)NULL, n);
1312
514k
            if (v == NULL)
1313
0
                break;
1314
514k
            ptr = r_string(n, p);
1315
514k
            if (ptr == NULL) {
1316
0
                Py_DECREF(v);
1317
0
                break;
1318
0
            }
1319
514k
            memcpy(PyBytes_AS_STRING(v), ptr, n);
1320
514k
            retval = v;
1321
514k
            R_REF(retval);
1322
514k
            break;
1323
514k
        }
1324
1325
0
    case TYPE_ASCII_INTERNED:
1326
0
        is_interned = 1;
1327
0
        _Py_FALLTHROUGH;
1328
25.8k
    case TYPE_ASCII:
1329
25.8k
        n = r_long(p);
1330
25.8k
        if (n < 0 || n > SIZE32_MAX) {
1331
0
            if (!PyErr_Occurred()) {
1332
0
                PyErr_SetString(PyExc_ValueError,
1333
0
                    "bad marshal data (string size out of range)");
1334
0
            }
1335
0
            break;
1336
0
        }
1337
25.8k
        goto _read_ascii;
1338
1339
1.10M
    case TYPE_SHORT_ASCII_INTERNED:
1340
1.10M
        is_interned = 1;
1341
1.10M
        _Py_FALLTHROUGH;
1342
1.26M
    case TYPE_SHORT_ASCII:
1343
1.26M
        n = r_byte(p);
1344
1.26M
        if (n == EOF) {
1345
0
            break;
1346
0
        }
1347
1.28M
    _read_ascii:
1348
1.28M
        {
1349
1.28M
            const char *ptr;
1350
1.28M
            ptr = r_string(n, p);
1351
1.28M
            if (ptr == NULL)
1352
0
                break;
1353
1.28M
            v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1354
1.28M
            if (v == NULL)
1355
0
                break;
1356
1.28M
            if (is_interned) {
1357
                // marshal is meant to serialize .pyc files with code
1358
                // objects, and code-related strings are currently immortal.
1359
1.10M
                PyInterpreterState *interp = _PyInterpreterState_GET();
1360
1.10M
                _PyUnicode_InternImmortal(interp, &v);
1361
1.10M
            }
1362
1.28M
            retval = v;
1363
1.28M
            R_REF(retval);
1364
1.28M
            break;
1365
1.28M
        }
1366
1367
206
    case TYPE_INTERNED:
1368
206
        is_interned = 1;
1369
206
        _Py_FALLTHROUGH;
1370
3.56k
    case TYPE_UNICODE:
1371
3.56k
        {
1372
3.56k
        const char *buffer;
1373
1374
3.56k
        n = r_long(p);
1375
3.56k
        if (n < 0 || n > SIZE32_MAX) {
1376
0
            if (!PyErr_Occurred()) {
1377
0
                PyErr_SetString(PyExc_ValueError,
1378
0
                    "bad marshal data (string size out of range)");
1379
0
            }
1380
0
            break;
1381
0
        }
1382
3.56k
        if (n != 0) {
1383
3.56k
            buffer = r_string(n, p);
1384
3.56k
            if (buffer == NULL)
1385
0
                break;
1386
3.56k
            v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1387
3.56k
        }
1388
0
        else {
1389
0
            v = Py_GetConstant(Py_CONSTANT_EMPTY_STR);
1390
0
        }
1391
3.56k
        if (v == NULL)
1392
0
            break;
1393
3.56k
        if (is_interned) {
1394
            // marshal is meant to serialize .pyc files with code
1395
            // objects, and code-related strings are currently immortal.
1396
206
            PyInterpreterState *interp = _PyInterpreterState_GET();
1397
206
            _PyUnicode_InternImmortal(interp, &v);
1398
206
        }
1399
3.56k
        retval = v;
1400
3.56k
        R_REF(retval);
1401
3.56k
        break;
1402
3.56k
        }
1403
1404
458k
    case TYPE_SMALL_TUPLE:
1405
458k
        n = r_byte(p);
1406
458k
        if (n == EOF) {
1407
0
            break;
1408
0
        }
1409
458k
        goto _read_tuple;
1410
458k
    case TYPE_TUPLE:
1411
74
        n = r_long(p);
1412
74
        if (n < 0 || n > SIZE32_MAX) {
1413
0
            if (!PyErr_Occurred()) {
1414
0
                PyErr_SetString(PyExc_ValueError,
1415
0
                    "bad marshal data (tuple size out of range)");
1416
0
            }
1417
0
            break;
1418
0
        }
1419
458k
    _read_tuple:
1420
458k
        v = PyTuple_New(n);
1421
458k
        R_REF(v);
1422
458k
        if (v == NULL)
1423
0
            break;
1424
1425
3.19M
        for (i = 0; i < n; i++) {
1426
2.73M
            v2 = r_object(p);
1427
2.73M
            if ( v2 == NULL ) {
1428
0
                if (!PyErr_Occurred())
1429
0
                    PyErr_SetString(PyExc_TypeError,
1430
0
                        "NULL object in marshal data for tuple");
1431
0
                Py_SETREF(v, NULL);
1432
0
                break;
1433
0
            }
1434
2.73M
            PyTuple_SET_ITEM(v, i, v2);
1435
2.73M
        }
1436
458k
        retval = v;
1437
458k
        break;
1438
1439
0
    case TYPE_LIST:
1440
0
        n = r_long(p);
1441
0
        if (n < 0 || n > SIZE32_MAX) {
1442
0
            if (!PyErr_Occurred()) {
1443
0
                PyErr_SetString(PyExc_ValueError,
1444
0
                    "bad marshal data (list size out of range)");
1445
0
            }
1446
0
            break;
1447
0
        }
1448
0
        v = PyList_New(n);
1449
0
        R_REF(v);
1450
0
        if (v == NULL)
1451
0
            break;
1452
0
        for (i = 0; i < n; i++) {
1453
0
            v2 = r_object(p);
1454
0
            if ( v2 == NULL ) {
1455
0
                if (!PyErr_Occurred())
1456
0
                    PyErr_SetString(PyExc_TypeError,
1457
0
                        "NULL object in marshal data for list");
1458
0
                Py_SETREF(v, NULL);
1459
0
                break;
1460
0
            }
1461
0
            PyList_SET_ITEM(v, i, v2);
1462
0
        }
1463
0
        retval = v;
1464
0
        break;
1465
1466
0
    case TYPE_DICT:
1467
0
    case TYPE_FROZENDICT:
1468
0
        v = PyDict_New();
1469
0
        if (v == NULL) {
1470
0
            break;
1471
0
        }
1472
0
        if (type == TYPE_DICT) {
1473
0
            R_REF(v);
1474
0
        }
1475
0
        else {
1476
0
            idx = r_ref_reserve(flag, p);
1477
0
            if (idx < 0) {
1478
0
                Py_CLEAR(v);
1479
0
                break;
1480
0
            }
1481
0
        }
1482
0
        for (;;) {
1483
0
            PyObject *key, *val;
1484
0
            key = r_object(p);
1485
0
            if (key == NULL)
1486
0
                break;
1487
0
            val = r_object(p);
1488
0
            if (val == NULL) {
1489
0
                Py_DECREF(key);
1490
0
                break;
1491
0
            }
1492
0
            if (PyDict_SetItem(v, key, val) < 0) {
1493
0
                Py_DECREF(key);
1494
0
                Py_DECREF(val);
1495
0
                break;
1496
0
            }
1497
0
            Py_DECREF(key);
1498
0
            Py_DECREF(val);
1499
0
        }
1500
0
        if (PyErr_Occurred()) {
1501
0
            Py_CLEAR(v);
1502
0
        }
1503
0
        if (type == TYPE_FROZENDICT && v != NULL) {
1504
0
            Py_SETREF(v, PyFrozenDict_New(v));
1505
0
        }
1506
0
        retval = v;
1507
0
        break;
1508
1509
0
    case TYPE_SET:
1510
505
    case TYPE_FROZENSET:
1511
505
        n = r_long(p);
1512
505
        if (n < 0 || n > SIZE32_MAX) {
1513
0
            if (!PyErr_Occurred()) {
1514
0
                PyErr_SetString(PyExc_ValueError,
1515
0
                    "bad marshal data (set size out of range)");
1516
0
            }
1517
0
            break;
1518
0
        }
1519
1520
505
        if (n == 0 && type == TYPE_FROZENSET) {
1521
            /* call frozenset() to get the empty frozenset singleton */
1522
0
            v = _PyObject_CallNoArgs((PyObject*)&PyFrozenSet_Type);
1523
0
            if (v == NULL)
1524
0
                break;
1525
0
            R_REF(v);
1526
0
            retval = v;
1527
0
        }
1528
505
        else {
1529
505
            v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1530
505
            if (type == TYPE_SET) {
1531
0
                R_REF(v);
1532
505
            } else {
1533
                /* must use delayed registration of frozensets because they must
1534
                 * be init with a refcount of 1
1535
                 */
1536
505
                idx = r_ref_reserve(flag, p);
1537
505
                if (idx < 0)
1538
0
                    Py_CLEAR(v); /* signal error */
1539
505
            }
1540
505
            if (v == NULL)
1541
0
                break;
1542
1543
2.61k
            for (i = 0; i < n; i++) {
1544
2.11k
                v2 = r_object(p);
1545
2.11k
                if ( v2 == NULL ) {
1546
0
                    if (!PyErr_Occurred())
1547
0
                        PyErr_SetString(PyExc_TypeError,
1548
0
                            "NULL object in marshal data for set");
1549
0
                    Py_SETREF(v, NULL);
1550
0
                    break;
1551
0
                }
1552
2.11k
                if (PySet_Add(v, v2) == -1) {
1553
0
                    Py_DECREF(v);
1554
0
                    Py_DECREF(v2);
1555
0
                    v = NULL;
1556
0
                    break;
1557
0
                }
1558
2.11k
                Py_DECREF(v2);
1559
2.11k
            }
1560
505
            if (type != TYPE_SET)
1561
505
                v = r_ref_insert(v, idx, flag, p);
1562
505
            retval = v;
1563
505
        }
1564
505
        break;
1565
1566
170k
    case TYPE_CODE:
1567
170k
        {
1568
170k
            int argcount;
1569
170k
            int posonlyargcount;
1570
170k
            int kwonlyargcount;
1571
170k
            int stacksize;
1572
170k
            int flags;
1573
170k
            PyObject *code = NULL;
1574
170k
            PyObject *consts = NULL;
1575
170k
            PyObject *names = NULL;
1576
170k
            PyObject *localsplusnames = NULL;
1577
170k
            PyObject *localspluskinds = NULL;
1578
170k
            PyObject *filename = NULL;
1579
170k
            PyObject *name = NULL;
1580
170k
            PyObject *qualname = NULL;
1581
170k
            int firstlineno;
1582
170k
            PyObject* linetable = NULL;
1583
170k
            PyObject *exceptiontable = NULL;
1584
1585
170k
            if (!p->allow_code) {
1586
0
                PyErr_SetString(PyExc_ValueError,
1587
0
                                "unmarshalling code objects is disallowed");
1588
0
                break;
1589
0
            }
1590
170k
            idx = r_ref_reserve(flag, p);
1591
170k
            if (idx < 0)
1592
0
                break;
1593
1594
170k
            v = NULL;
1595
1596
            /* XXX ignore long->int overflows for now */
1597
170k
            argcount = (int)r_long(p);
1598
170k
            if (argcount == -1 && PyErr_Occurred())
1599
0
                goto code_error;
1600
170k
            posonlyargcount = (int)r_long(p);
1601
170k
            if (posonlyargcount == -1 && PyErr_Occurred()) {
1602
0
                goto code_error;
1603
0
            }
1604
170k
            kwonlyargcount = (int)r_long(p);
1605
170k
            if (kwonlyargcount == -1 && PyErr_Occurred())
1606
0
                goto code_error;
1607
170k
            stacksize = (int)r_long(p);
1608
170k
            if (stacksize == -1 && PyErr_Occurred())
1609
0
                goto code_error;
1610
170k
            flags = (int)r_long(p);
1611
170k
            if (flags == -1 && PyErr_Occurred())
1612
0
                goto code_error;
1613
170k
            code = r_object(p);
1614
170k
            if (code == NULL)
1615
0
                goto code_error;
1616
170k
            consts = r_object(p);
1617
170k
            if (consts == NULL)
1618
0
                goto code_error;
1619
170k
            names = r_object(p);
1620
170k
            if (names == NULL)
1621
0
                goto code_error;
1622
170k
            localsplusnames = r_object(p);
1623
170k
            if (localsplusnames == NULL)
1624
0
                goto code_error;
1625
170k
            localspluskinds = r_object(p);
1626
170k
            if (localspluskinds == NULL)
1627
0
                goto code_error;
1628
170k
            filename = r_object(p);
1629
170k
            if (filename == NULL)
1630
0
                goto code_error;
1631
170k
            name = r_object(p);
1632
170k
            if (name == NULL)
1633
0
                goto code_error;
1634
170k
            qualname = r_object(p);
1635
170k
            if (qualname == NULL)
1636
0
                goto code_error;
1637
170k
            firstlineno = (int)r_long(p);
1638
170k
            if (firstlineno == -1 && PyErr_Occurred())
1639
0
                break;
1640
170k
            linetable = r_object(p);
1641
170k
            if (linetable == NULL)
1642
0
                goto code_error;
1643
170k
            exceptiontable = r_object(p);
1644
170k
            if (exceptiontable == NULL)
1645
0
                goto code_error;
1646
1647
170k
            struct _PyCodeConstructor con = {
1648
170k
                .filename = filename,
1649
170k
                .name = name,
1650
170k
                .qualname = qualname,
1651
170k
                .flags = flags,
1652
1653
170k
                .code = code,
1654
170k
                .firstlineno = firstlineno,
1655
170k
                .linetable = linetable,
1656
1657
170k
                .consts = consts,
1658
170k
                .names = names,
1659
1660
170k
                .localsplusnames = localsplusnames,
1661
170k
                .localspluskinds = localspluskinds,
1662
1663
170k
                .argcount = argcount,
1664
170k
                .posonlyargcount = posonlyargcount,
1665
170k
                .kwonlyargcount = kwonlyargcount,
1666
1667
170k
                .stacksize = stacksize,
1668
1669
170k
                .exceptiontable = exceptiontable,
1670
170k
            };
1671
1672
170k
            if (_PyCode_Validate(&con) < 0) {
1673
0
                goto code_error;
1674
0
            }
1675
1676
170k
            v = (PyObject *)_PyCode_New(&con);
1677
170k
            if (v == NULL) {
1678
0
                goto code_error;
1679
0
            }
1680
1681
170k
            v = r_ref_insert(v, idx, flag, p);
1682
1683
170k
          code_error:
1684
170k
            if (v == NULL && !PyErr_Occurred()) {
1685
0
                PyErr_SetString(PyExc_TypeError,
1686
0
                    "NULL object in marshal data for code object");
1687
0
            }
1688
170k
            Py_XDECREF(code);
1689
170k
            Py_XDECREF(consts);
1690
170k
            Py_XDECREF(names);
1691
170k
            Py_XDECREF(localsplusnames);
1692
170k
            Py_XDECREF(localspluskinds);
1693
170k
            Py_XDECREF(filename);
1694
170k
            Py_XDECREF(name);
1695
170k
            Py_XDECREF(qualname);
1696
170k
            Py_XDECREF(linetable);
1697
170k
            Py_XDECREF(exceptiontable);
1698
170k
        }
1699
0
        retval = v;
1700
170k
        break;
1701
1702
1.87M
    case TYPE_REF:
1703
1.87M
        n = r_long(p);
1704
1.87M
        if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1705
0
            if (!PyErr_Occurred()) {
1706
0
                PyErr_SetString(PyExc_ValueError,
1707
0
                    "bad marshal data (invalid reference)");
1708
0
            }
1709
0
            break;
1710
0
        }
1711
1.87M
        v = PyList_GET_ITEM(p->refs, n);
1712
1.87M
        if (v == Py_None) {
1713
0
            PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1714
0
            break;
1715
0
        }
1716
1.87M
        retval = Py_NewRef(v);
1717
1.87M
        break;
1718
1719
3.33k
    case TYPE_SLICE:
1720
3.33k
    {
1721
3.33k
        Py_ssize_t idx = r_ref_reserve(flag, p);
1722
3.33k
        if (idx < 0) {
1723
0
            break;
1724
0
        }
1725
3.33k
        PyObject *stop = NULL;
1726
3.33k
        PyObject *step = NULL;
1727
3.33k
        PyObject *start = r_object(p);
1728
3.33k
        if (start == NULL) {
1729
0
            goto cleanup;
1730
0
        }
1731
3.33k
        stop = r_object(p);
1732
3.33k
        if (stop == NULL) {
1733
0
            goto cleanup;
1734
0
        }
1735
3.33k
        step = r_object(p);
1736
3.33k
        if (step == NULL) {
1737
0
            goto cleanup;
1738
0
        }
1739
3.33k
        retval = PySlice_New(start, stop, step);
1740
3.33k
        r_ref_insert(retval, idx, flag, p);
1741
3.33k
    cleanup:
1742
3.33k
        Py_XDECREF(start);
1743
3.33k
        Py_XDECREF(stop);
1744
3.33k
        Py_XDECREF(step);
1745
3.33k
        break;
1746
3.33k
    }
1747
1748
0
    default:
1749
        /* Bogus data got written, which isn't ideal.
1750
           This will let you keep working and recover. */
1751
0
        PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1752
0
        break;
1753
1754
4.45M
    }
1755
4.45M
    p->depth--;
1756
4.45M
    return retval;
1757
4.45M
}
1758
1759
static PyObject *
1760
read_object(RFILE *p)
1761
6.50k
{
1762
6.50k
    PyObject *v;
1763
6.50k
    if (PyErr_Occurred()) {
1764
0
        fprintf(stderr, "XXX readobject called with exception set\n");
1765
0
        return NULL;
1766
0
    }
1767
6.50k
    if (p->ptr && p->end) {
1768
6.50k
        if (PySys_Audit("marshal.loads", "y#", p->ptr, (Py_ssize_t)(p->end - p->ptr)) < 0) {
1769
0
            return NULL;
1770
0
        }
1771
6.50k
    } else if (p->fp || p->readable) {
1772
0
        if (PySys_Audit("marshal.load", NULL) < 0) {
1773
0
            return NULL;
1774
0
        }
1775
0
    }
1776
6.50k
    v = r_object(p);
1777
6.50k
    if (v == NULL && !PyErr_Occurred())
1778
0
        PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1779
6.50k
    return v;
1780
6.50k
}
1781
1782
int
1783
PyMarshal_ReadShortFromFile(FILE *fp)
1784
0
{
1785
0
    RFILE rf;
1786
0
    int res;
1787
0
    assert(fp);
1788
0
    rf.readable = NULL;
1789
0
    rf.fp = fp;
1790
0
    rf.end = rf.ptr = NULL;
1791
0
    rf.buf = NULL;
1792
0
    res = r_short(&rf);
1793
0
    if (rf.buf != NULL)
1794
0
        PyMem_Free(rf.buf);
1795
0
    return res;
1796
0
}
1797
1798
long
1799
PyMarshal_ReadLongFromFile(FILE *fp)
1800
0
{
1801
0
    RFILE rf;
1802
0
    long res;
1803
0
    rf.fp = fp;
1804
0
    rf.readable = NULL;
1805
0
    rf.ptr = rf.end = NULL;
1806
0
    rf.buf = NULL;
1807
0
    res = r_long(&rf);
1808
0
    if (rf.buf != NULL)
1809
0
        PyMem_Free(rf.buf);
1810
0
    return res;
1811
0
}
1812
1813
/* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1814
static off_t
1815
getfilesize(FILE *fp)
1816
0
{
1817
0
    struct _Py_stat_struct st;
1818
0
    if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1819
0
        return -1;
1820
#if SIZEOF_OFF_T == 4
1821
    else if (st.st_size >= INT_MAX)
1822
        return (off_t)INT_MAX;
1823
#endif
1824
0
    else
1825
0
        return (off_t)st.st_size;
1826
0
}
1827
1828
/* If we can get the size of the file up-front, and it's reasonably small,
1829
 * read it in one gulp and delegate to ...FromString() instead.  Much quicker
1830
 * than reading a byte at a time from file; speeds .pyc imports.
1831
 * CAUTION:  since this may read the entire remainder of the file, don't
1832
 * call it unless you know you're done with the file.
1833
 */
1834
PyObject *
1835
PyMarshal_ReadLastObjectFromFile(FILE *fp)
1836
0
{
1837
/* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1838
0
#define REASONABLE_FILE_LIMIT (1L << 18)
1839
0
    off_t filesize;
1840
0
    filesize = getfilesize(fp);
1841
0
    if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1842
0
        char* pBuf = (char *)PyMem_Malloc(filesize);
1843
0
        if (pBuf != NULL) {
1844
0
            size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1845
0
            PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1846
0
            PyMem_Free(pBuf);
1847
0
            return v;
1848
0
        }
1849
1850
0
    }
1851
    /* We don't have fstat, or we do but the file is larger than
1852
     * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1853
     */
1854
0
    return PyMarshal_ReadObjectFromFile(fp);
1855
1856
0
#undef REASONABLE_FILE_LIMIT
1857
0
}
1858
1859
PyObject *
1860
PyMarshal_ReadObjectFromFile(FILE *fp)
1861
0
{
1862
0
    RFILE rf;
1863
0
    PyObject *result;
1864
0
    rf.allow_code = 1;
1865
0
    rf.fp = fp;
1866
0
    rf.readable = NULL;
1867
0
    rf.depth = 0;
1868
0
    rf.ptr = rf.end = NULL;
1869
0
    rf.buf = NULL;
1870
0
    rf.refs = PyList_New(0);
1871
0
    if (rf.refs == NULL)
1872
0
        return NULL;
1873
0
    result = read_object(&rf);
1874
0
    Py_DECREF(rf.refs);
1875
0
    if (rf.buf != NULL)
1876
0
        PyMem_Free(rf.buf);
1877
0
    return result;
1878
0
}
1879
1880
PyObject *
1881
PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1882
638
{
1883
638
    RFILE rf;
1884
638
    PyObject *result;
1885
638
    rf.allow_code = 1;
1886
638
    rf.fp = NULL;
1887
638
    rf.readable = NULL;
1888
638
    rf.ptr = str;
1889
638
    rf.end = str + len;
1890
638
    rf.buf = NULL;
1891
638
    rf.depth = 0;
1892
638
    rf.refs = PyList_New(0);
1893
638
    if (rf.refs == NULL)
1894
0
        return NULL;
1895
638
    result = read_object(&rf);
1896
638
    Py_DECREF(rf.refs);
1897
638
    if (rf.buf != NULL)
1898
0
        PyMem_Free(rf.buf);
1899
638
    return result;
1900
638
}
1901
1902
static PyObject *
1903
_PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code)
1904
232
{
1905
232
    WFILE wf;
1906
1907
232
    if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
1908
0
        return NULL;
1909
0
    }
1910
232
    memset(&wf, 0, sizeof(wf));
1911
232
    wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1912
232
    if (wf.str == NULL)
1913
0
        return NULL;
1914
232
    wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str);
1915
232
    wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str);
1916
232
    wf.error = WFERR_OK;
1917
232
    wf.version = version;
1918
232
    wf.allow_code = allow_code;
1919
232
    if (w_init_refs(&wf, version)) {
1920
0
        Py_DECREF(wf.str);
1921
0
        return NULL;
1922
0
    }
1923
232
    w_object(x, &wf);
1924
232
    w_clear_refs(&wf);
1925
232
    if (wf.str != NULL) {
1926
232
        const char *base = PyBytes_AS_STRING(wf.str);
1927
232
        if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1928
0
            return NULL;
1929
232
    }
1930
232
    if (wf.error != WFERR_OK) {
1931
0
        Py_XDECREF(wf.str);
1932
0
        switch (wf.error) {
1933
0
        case WFERR_NOMEMORY:
1934
0
            PyErr_NoMemory();
1935
0
            break;
1936
0
        case WFERR_NESTEDTOODEEP:
1937
0
            PyErr_SetString(PyExc_ValueError,
1938
0
                            "object too deeply nested to marshal");
1939
0
            break;
1940
0
        case WFERR_CODE_NOT_ALLOWED:
1941
0
            PyErr_SetString(PyExc_ValueError,
1942
0
                            "marshalling code objects is disallowed");
1943
0
            break;
1944
0
        default:
1945
0
        case WFERR_UNMARSHALLABLE:
1946
0
            PyErr_SetString(PyExc_ValueError,
1947
0
                            "unmarshallable object");
1948
0
            break;
1949
0
        }
1950
0
        return NULL;
1951
0
    }
1952
232
    return wf.str;
1953
232
}
1954
1955
PyObject *
1956
PyMarshal_WriteObjectToString(PyObject *x, int version)
1957
0
{
1958
0
    return _PyMarshal_WriteObjectToString(x, version, 1);
1959
0
}
1960
1961
/* And an interface for Python programs... */
1962
/*[clinic input]
1963
marshal.dump
1964
1965
    value: object
1966
        Must be a supported type.
1967
    file: object
1968
        Must be a writeable binary file.
1969
    version: int(c_default="Py_MARSHAL_VERSION") = version
1970
        Indicates the data format that dump should use.
1971
    /
1972
    *
1973
    allow_code: bool = True
1974
        Allow to write code objects.
1975
1976
Write the value on the open file.
1977
1978
If the value has (or contains an object that has) an unsupported type, a
1979
ValueError exception is raised - but garbage data will also be written
1980
to the file. The object will not be properly read back by load().
1981
[clinic start generated code]*/
1982
1983
static PyObject *
1984
marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1985
                  int version, int allow_code)
1986
/*[clinic end generated code: output=429e5fd61c2196b9 input=041f7f6669b0aafb]*/
1987
0
{
1988
    /* XXX Quick hack -- need to do this differently */
1989
0
    PyObject *s;
1990
0
    PyObject *res;
1991
1992
0
    s = _PyMarshal_WriteObjectToString(value, version, allow_code);
1993
0
    if (s == NULL)
1994
0
        return NULL;
1995
0
    res = PyObject_CallMethodOneArg(file, &_Py_ID(write), s);
1996
0
    Py_DECREF(s);
1997
0
    return res;
1998
0
}
1999
2000
/*[clinic input]
2001
marshal.load
2002
2003
    file: object
2004
        Must be readable binary file.
2005
    /
2006
    *
2007
    allow_code: bool = True
2008
        Allow to load code objects.
2009
2010
Read one value from the open file and return it.
2011
2012
If no valid value is read (e.g. because the data has a different Python
2013
version's incompatible marshal format), raise EOFError, ValueError or
2014
TypeError.
2015
2016
Note: If an object containing an unsupported type was marshalled with
2017
dump(), load() will substitute None for the unmarshallable type.
2018
[clinic start generated code]*/
2019
2020
static PyObject *
2021
marshal_load_impl(PyObject *module, PyObject *file, int allow_code)
2022
/*[clinic end generated code: output=0c1aaf3546ae3ed3 input=2dca7b570653b82f]*/
2023
0
{
2024
0
    PyObject *data, *result;
2025
0
    RFILE rf;
2026
2027
    /*
2028
     * Make a call to the read method, but read zero bytes.
2029
     * This is to ensure that the object passed in at least
2030
     * has a read method which returns bytes.
2031
     * This can be removed if we guarantee good error handling
2032
     * for r_string()
2033
     */
2034
0
    data = _PyObject_CallMethod(file, &_Py_ID(read), "i", 0);
2035
0
    if (data == NULL)
2036
0
        return NULL;
2037
0
    if (!PyBytes_Check(data)) {
2038
0
        PyErr_Format(PyExc_TypeError,
2039
0
                     "file.read() returned not bytes but %.100s",
2040
0
                     Py_TYPE(data)->tp_name);
2041
0
        result = NULL;
2042
0
    }
2043
0
    else {
2044
0
        rf.allow_code = allow_code;
2045
0
        rf.depth = 0;
2046
0
        rf.fp = NULL;
2047
0
        rf.readable = file;
2048
0
        rf.ptr = rf.end = NULL;
2049
0
        rf.buf = NULL;
2050
0
        if ((rf.refs = PyList_New(0)) != NULL) {
2051
0
            result = read_object(&rf);
2052
0
            Py_DECREF(rf.refs);
2053
0
            if (rf.buf != NULL)
2054
0
                PyMem_Free(rf.buf);
2055
0
        } else
2056
0
            result = NULL;
2057
0
    }
2058
0
    Py_DECREF(data);
2059
0
    return result;
2060
0
}
2061
2062
/*[clinic input]
2063
@permit_long_summary
2064
@permit_long_docstring_body
2065
marshal.dumps
2066
2067
    value: object
2068
        Must be a supported type.
2069
    version: int(c_default="Py_MARSHAL_VERSION") = version
2070
        Indicates the data format that dumps should use.
2071
    /
2072
    *
2073
    allow_code: bool = True
2074
        Allow to write code objects.
2075
2076
Return the bytes object that would be written to a file by dump(value, file).
2077
2078
Raise a ValueError exception if value has (or contains an object that has) an
2079
unsupported type.
2080
[clinic start generated code]*/
2081
2082
static PyObject *
2083
marshal_dumps_impl(PyObject *module, PyObject *value, int version,
2084
                   int allow_code)
2085
/*[clinic end generated code: output=115f90da518d1d49 input=80cd3f30c1637ade]*/
2086
167
{
2087
167
    return _PyMarshal_WriteObjectToString(value, version, allow_code);
2088
167
}
2089
2090
/*[clinic input]
2091
marshal.loads
2092
2093
    bytes: Py_buffer
2094
    /
2095
    *
2096
    allow_code: bool = True
2097
        Allow to load code objects.
2098
2099
Convert the bytes-like object to a value.
2100
2101
If no valid value is found, raise EOFError, ValueError or TypeError.  Extra
2102
bytes in the input are ignored.
2103
[clinic start generated code]*/
2104
2105
static PyObject *
2106
marshal_loads_impl(PyObject *module, Py_buffer *bytes, int allow_code)
2107
/*[clinic end generated code: output=62c0c538d3edc31f input=14de68965b45aaa7]*/
2108
5.86k
{
2109
5.86k
    RFILE rf;
2110
5.86k
    char *s = bytes->buf;
2111
5.86k
    Py_ssize_t n = bytes->len;
2112
5.86k
    PyObject* result;
2113
5.86k
    rf.allow_code = allow_code;
2114
5.86k
    rf.fp = NULL;
2115
5.86k
    rf.readable = NULL;
2116
5.86k
    rf.ptr = s;
2117
5.86k
    rf.end = s + n;
2118
5.86k
    rf.depth = 0;
2119
5.86k
    if ((rf.refs = PyList_New(0)) == NULL)
2120
0
        return NULL;
2121
5.86k
    result = read_object(&rf);
2122
5.86k
    Py_DECREF(rf.refs);
2123
5.86k
    return result;
2124
5.86k
}
2125
2126
static PyMethodDef marshal_methods[] = {
2127
    MARSHAL_DUMP_METHODDEF
2128
    MARSHAL_LOAD_METHODDEF
2129
    MARSHAL_DUMPS_METHODDEF
2130
    MARSHAL_LOADS_METHODDEF
2131
    {NULL,              NULL}           /* sentinel */
2132
};
2133
2134
2135
PyDoc_STRVAR(module_doc,
2136
"This module contains functions that can read and write Python values in\n\
2137
a binary format. The format is specific to Python, but independent of\n\
2138
machine architecture issues.\n\
2139
\n\
2140
Not all Python object types are supported; in general, only objects\n\
2141
whose value is independent from a particular invocation of Python can be\n\
2142
written and read by this module. The following types are supported:\n\
2143
None, integers, floating-point numbers, strings, bytes, bytearrays,\n\
2144
tuples, lists, sets, dictionaries, and code objects, where it\n\
2145
should be understood that tuples, lists and dictionaries are only\n\
2146
supported as long as the values contained therein are themselves\n\
2147
supported; and recursive lists and dictionaries should not be written\n\
2148
(they will cause infinite loops).\n\
2149
\n\
2150
Variables:\n\
2151
\n\
2152
version -- indicates the format that the module uses. Version 0 is the\n\
2153
    historical format, version 1 shares interned strings and version 2\n\
2154
    uses a binary format for floating-point numbers.\n\
2155
    Version 3 shares common object references (New in version 3.4).\n\
2156
\n\
2157
Functions:\n\
2158
\n\
2159
dump() -- write value to a file\n\
2160
load() -- read value from a file\n\
2161
dumps() -- marshal value as a bytes object\n\
2162
loads() -- read value from a bytes-like object");
2163
2164
2165
static int
2166
marshal_module_exec(PyObject *mod)
2167
36
{
2168
36
    if (PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION) < 0) {
2169
0
        return -1;
2170
0
    }
2171
36
    return 0;
2172
36
}
2173
2174
static PyModuleDef_Slot marshalmodule_slots[] = {
2175
     _Py_ABI_SLOT,
2176
    {Py_mod_exec, marshal_module_exec},
2177
    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
2178
    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
2179
    {0, NULL}
2180
};
2181
2182
static struct PyModuleDef marshalmodule = {
2183
    PyModuleDef_HEAD_INIT,
2184
    .m_name = "marshal",
2185
    .m_doc = module_doc,
2186
    .m_methods = marshal_methods,
2187
    .m_slots = marshalmodule_slots,
2188
};
2189
2190
PyMODINIT_FUNC
2191
PyMarshal_Init(void)
2192
36
{
2193
36
    return PyModuleDef_Init(&marshalmodule);
2194
36
}