Coverage Report

Created: 2025-07-04 06:49

/src/cpython/Python/marshal.c
Line
Count
Source (jump to first uncovered line)
1
2
/* Write Python objects to files and read them back.
3
   This is primarily intended for writing and reading compiled Python code,
4
   even though dicts, lists, sets and frozensets, not commonly seen in
5
   code objects, are supported.
6
   Version 3 of this protocol properly supports circular links
7
   and sharing. */
8
9
#include "Python.h"
10
#include "pycore_call.h"             // _PyObject_CallNoArgs()
11
#include "pycore_code.h"             // _PyCode_New()
12
#include "pycore_hashtable.h"        // _Py_hashtable_t
13
#include "pycore_long.h"             // _PyLong_IsZero()
14
#include "pycore_pystate.h"          // _PyInterpreterState_GET()
15
#include "pycore_setobject.h"        // _PySet_NextEntryRef()
16
#include "pycore_unicodeobject.h"    // _PyUnicode_InternImmortal()
17
18
#include "marshal.h"                 // Py_MARSHAL_VERSION
19
20
#ifdef __APPLE__
21
#  include "TargetConditionals.h"
22
#endif /* __APPLE__ */
23
24
25
/*[clinic input]
26
module marshal
27
[clinic start generated code]*/
28
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
29
30
#include "clinic/marshal.c.h"
31
32
/* High water mark to determine when the marshalled object is dangerously deep
33
 * and risks coring the interpreter.  When the object stack gets this deep,
34
 * raise an exception instead of continuing.
35
 * On Windows debug builds, reduce this value.
36
 *
37
 * BUG: https://bugs.python.org/issue33720
38
 * On Windows PGO builds, the r_object function overallocates its stack and
39
 * can cause a stack overflow. We reduce the maximum depth for all Windows
40
 * releases to protect against this.
41
 * #if defined(MS_WINDOWS) && defined(Py_DEBUG)
42
 */
43
#if defined(MS_WINDOWS)
44
#  define MAX_MARSHAL_STACK_DEPTH 1000
45
#elif defined(__wasi__)
46
#  define MAX_MARSHAL_STACK_DEPTH 1500
47
// TARGET_OS_IPHONE covers any non-macOS Apple platform.
48
// It won't be defined on older macOS SDKs
49
#elif defined(__APPLE__) && defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
50
#  define MAX_MARSHAL_STACK_DEPTH 1500
51
#else
52
743k
#  define MAX_MARSHAL_STACK_DEPTH 2000
53
#endif
54
55
/* Supported types */
56
0
#define TYPE_NULL               '0'
57
13.3k
#define TYPE_NONE               'N'
58
1.98k
#define TYPE_FALSE              'F'
59
1.53k
#define TYPE_TRUE               'T'
60
0
#define TYPE_STOPITER           'S'
61
27
#define TYPE_ELLIPSIS           '.'
62
41
#define TYPE_BINARY_FLOAT       'g'  // Version 0 uses TYPE_FLOAT instead.
63
0
#define TYPE_BINARY_COMPLEX     'y'  // Version 0 uses TYPE_COMPLEX instead.
64
78
#define TYPE_LONG               'l'  // See also TYPE_INT.
65
76.2k
#define TYPE_STRING             's'  // Bytes. (Name comes from Python 2.)
66
17
#define TYPE_TUPLE              '('  // See also TYPE_SMALL_TUPLE.
67
0
#define TYPE_LIST               '['
68
0
#define TYPE_DICT               '{'
69
25.1k
#define TYPE_CODE               'c'
70
1.74k
#define TYPE_UNICODE            'u'
71
#define TYPE_UNKNOWN            '?'
72
// added in version 2:
73
372
#define TYPE_SET                '<'
74
124
#define TYPE_FROZENSET          '>'
75
// added in version 5:
76
1.06k
#define TYPE_SLICE              ':'
77
// Remember to update the version and documentation when adding new types.
78
79
/* Special cases for unicode strings (added in version 4) */
80
101
#define TYPE_INTERNED           't' // Version 1+
81
2.21k
#define TYPE_ASCII              'a'
82
0
#define TYPE_ASCII_INTERNED     'A'
83
142k
#define TYPE_SHORT_ASCII        'z'
84
120k
#define TYPE_SHORT_ASCII_INTERNED 'Z'
85
86
/* Special cases for small objects */
87
8.70k
#define TYPE_INT                'i'  // All versions. 32-bit encoding.
88
59.5k
#define TYPE_SMALL_TUPLE        ')'  // Version 4+
89
90
/* Supported for backwards compatibility */
91
0
#define TYPE_COMPLEX            'x'  // Generated for version 0 only.
92
0
#define TYPE_FLOAT              'f'  // Generated for version 0 only.
93
0
#define TYPE_INT64              'I'  // Not generated any more.
94
95
/* References (added in version 3) */
96
268k
#define TYPE_REF                'r'
97
1.24M
#define FLAG_REF                '\x80' /* with a type, add obj to index */
98
99
100
// Error codes:
101
622
#define WFERR_OK 0
102
52
#define WFERR_UNMARSHALLABLE 1
103
0
#define WFERR_NESTEDTOODEEP 2
104
26
#define WFERR_NOMEMORY 3
105
0
#define WFERR_CODE_NOT_ALLOWED 4
106
107
typedef struct {
108
    FILE *fp;
109
    int error;  /* see WFERR_* values */
110
    int depth;
111
    PyObject *str;
112
    char *ptr;
113
    const char *end;
114
    char *buf;
115
    _Py_hashtable_t *hashtable;
116
    int version;
117
    int allow_code;
118
} WFILE;
119
120
655k
#define w_byte(c, p) do {                               \
121
655k
        if ((p)->ptr != (p)->end || w_reserve((p), 1))  \
122
655k
            *(p)->ptr++ = (c);                          \
123
655k
    } while(0)
124
125
static void
126
w_flush(WFILE *p)
127
0
{
128
0
    assert(p->fp != NULL);
129
0
    fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
130
0
    p->ptr = p->buf;
131
0
}
132
133
static int
134
w_reserve(WFILE *p, Py_ssize_t needed)
135
762
{
136
762
    Py_ssize_t pos, size, delta;
137
762
    if (p->ptr == NULL)
138
0
        return 0; /* An error already occurred */
139
762
    if (p->fp != NULL) {
140
0
        w_flush(p);
141
0
        return needed <= p->end - p->ptr;
142
0
    }
143
762
    assert(p->str != NULL);
144
762
    pos = p->ptr - p->buf;
145
762
    size = PyBytes_GET_SIZE(p->str);
146
762
    if (size > 16*1024*1024)
147
0
        delta = (size >> 3);            /* 12.5% overallocation */
148
762
    else
149
762
        delta = size + 1024;
150
762
    delta = Py_MAX(delta, needed);
151
762
    if (delta > PY_SSIZE_T_MAX - size) {
152
0
        p->error = WFERR_NOMEMORY;
153
0
        return 0;
154
0
    }
155
762
    size += delta;
156
762
    if (_PyBytes_Resize(&p->str, size) != 0) {
157
0
        p->end = p->ptr = p->buf = NULL;
158
0
        return 0;
159
0
    }
160
762
    else {
161
762
        p->buf = PyBytes_AS_STRING(p->str);
162
762
        p->ptr = p->buf + pos;
163
762
        p->end = p->buf + size;
164
762
        return 1;
165
762
    }
166
762
}
167
168
static void
169
w_string(const void *s, Py_ssize_t n, WFILE *p)
170
51.4k
{
171
51.4k
    Py_ssize_t m;
172
51.4k
    if (!n || p->ptr == NULL)
173
284
        return;
174
51.1k
    m = p->end - p->ptr;
175
51.1k
    if (p->fp != NULL) {
176
0
        if (n <= m) {
177
0
            memcpy(p->ptr, s, n);
178
0
            p->ptr += n;
179
0
        }
180
0
        else {
181
0
            w_flush(p);
182
0
            fwrite(s, 1, n, p->fp);
183
0
        }
184
0
    }
185
51.1k
    else {
186
51.1k
        if (n <= m || w_reserve(p, n - m)) {
187
51.1k
            memcpy(p->ptr, s, n);
188
51.1k
            p->ptr += n;
189
51.1k
        }
190
51.1k
    }
191
51.1k
}
192
193
static void
194
w_short(int x, WFILE *p)
195
35
{
196
35
    w_byte((char)( x      & 0xff), p);
197
35
    w_byte((char)((x>> 8) & 0xff), p);
198
35
}
199
200
static void
201
w_long(long x, WFILE *p)
202
117k
{
203
117k
    w_byte((char)( x      & 0xff), p);
204
117k
    w_byte((char)((x>> 8) & 0xff), p);
205
117k
    w_byte((char)((x>>16) & 0xff), p);
206
117k
    w_byte((char)((x>>24) & 0xff), p);
207
117k
}
208
209
99.2k
#define SIZE32_MAX  0x7FFFFFFF
210
211
#if SIZEOF_SIZE_T > 4
212
18.6k
# define W_SIZE(n, p)  do {                     \
213
18.6k
        if ((n) > SIZE32_MAX) {                 \
214
0
            (p)->depth--;                       \
215
0
            (p)->error = WFERR_UNMARSHALLABLE;  \
216
0
            return;                             \
217
0
        }                                       \
218
18.6k
        w_long((long)(n), p);                   \
219
18.6k
    } while(0)
220
#else
221
# define W_SIZE  w_long
222
#endif
223
224
static void
225
w_pstring(const void *s, Py_ssize_t n, WFILE *p)
226
18.6k
{
227
18.6k
        W_SIZE(n, p);
228
18.6k
        w_string(s, n, p);
229
18.6k
}
230
231
static void
232
w_short_pstring(const void *s, Py_ssize_t n, WFILE *p)
233
32.7k
{
234
32.7k
    w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
235
32.7k
    w_string(s, n, p);
236
32.7k
}
237
238
/* We assume that Python ints are stored internally in base some power of
239
   2**15; for the sake of portability we'll always read and write them in base
240
   exactly 2**15. */
241
242
675
#define PyLong_MARSHAL_SHIFT 15
243
287
#define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
244
35
#define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
245
246
74.2k
#define W_TYPE(t, p) do { \
247
74.2k
    w_byte((t) | flag, (p)); \
248
74.2k
} while(0)
249
250
static PyObject *
251
_PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code);
252
253
#define _r_digits(bitsize)                                                \
254
static void                                                               \
255
_r_digits##bitsize(const uint ## bitsize ## _t *digits, Py_ssize_t n,     \
256
4
                   uint8_t negative, Py_ssize_t marshal_ratio, WFILE *p)  \
257
4
{                                                                         \
258
4
    /* set l to number of base PyLong_MARSHAL_BASE digits */              \
259
4
    Py_ssize_t l = (n - 1)*marshal_ratio;                                 \
260
4
    uint ## bitsize ## _t d = digits[n - 1];                              \
261
4
                                                                          \
262
4
    assert(marshal_ratio > 0);                                            \
263
4
    assert(n >= 1);                                                       \
264
4
    assert(d != 0); /* a PyLong is always normalized */                   \
265
4
    do {                                                                  \
266
4
        d >>= PyLong_MARSHAL_SHIFT;                                       \
267
4
        l++;                                                              \
268
4
    } while (d != 0);                                                     \
269
4
    if (l > SIZE32_MAX) {                                                 \
270
0
        p->depth--;                                                       \
271
0
        p->error = WFERR_UNMARSHALLABLE;                                  \
272
0
        return;                                                           \
273
0
    }                                                                     \
274
4
    w_long((long)(negative ? -l : l), p);                                 \
275
4
                                                                          \
276
12
    for (Py_ssize_t i = 0; i < n - 1; i++) {                              \
277
8
        d = digits[i];                                                    \
278
24
        for (Py_ssize_t j = 0; j < marshal_ratio; j++) {                  \
279
16
            w_short(d & PyLong_MARSHAL_MASK, p);                          \
280
16
            d >>= PyLong_MARSHAL_SHIFT;                                   \
281
16
        }                                                                 \
282
8
        assert(d == 0);                                                   \
283
8
    }                                                                     \
284
4
    d = digits[n - 1];                                                    \
285
4
    do {                                                                  \
286
4
        w_short(d & PyLong_MARSHAL_MASK, p);                              \
287
4
        d >>= PyLong_MARSHAL_SHIFT;                                       \
288
4
    } while (d != 0);                                                     \
289
4
}
marshal.c:_r_digits32
Line
Count
Source
256
4
                   uint8_t negative, Py_ssize_t marshal_ratio, WFILE *p)  \
257
4
{                                                                         \
258
4
    /* set l to number of base PyLong_MARSHAL_BASE digits */              \
259
4
    Py_ssize_t l = (n - 1)*marshal_ratio;                                 \
260
4
    uint ## bitsize ## _t d = digits[n - 1];                              \
261
4
                                                                          \
262
4
    assert(marshal_ratio > 0);                                            \
263
4
    assert(n >= 1);                                                       \
264
4
    assert(d != 0); /* a PyLong is always normalized */                   \
265
4
    do {                                                                  \
266
4
        d >>= PyLong_MARSHAL_SHIFT;                                       \
267
4
        l++;                                                              \
268
4
    } while (d != 0);                                                     \
269
4
    if (l > SIZE32_MAX) {                                                 \
270
0
        p->depth--;                                                       \
271
0
        p->error = WFERR_UNMARSHALLABLE;                                  \
272
0
        return;                                                           \
273
0
    }                                                                     \
274
4
    w_long((long)(negative ? -l : l), p);                                 \
275
4
                                                                          \
276
12
    for (Py_ssize_t i = 0; i < n - 1; i++) {                              \
277
8
        d = digits[i];                                                    \
278
24
        for (Py_ssize_t j = 0; j < marshal_ratio; j++) {                  \
279
16
            w_short(d & PyLong_MARSHAL_MASK, p);                          \
280
16
            d >>= PyLong_MARSHAL_SHIFT;                                   \
281
16
        }                                                                 \
282
8
        assert(d == 0);                                                   \
283
8
    }                                                                     \
284
4
    d = digits[n - 1];                                                    \
285
4
    do {                                                                  \
286
4
        w_short(d & PyLong_MARSHAL_MASK, p);                              \
287
4
        d >>= PyLong_MARSHAL_SHIFT;                                       \
288
4
    } while (d != 0);                                                     \
289
4
}
Unexecuted instantiation: marshal.c:_r_digits16
290
_r_digits(16)
291
_r_digits(32)
292
#undef _r_digits
293
294
static void
295
w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
296
9
{
297
9
    W_TYPE(TYPE_LONG, p);
298
9
    if (_PyLong_IsZero(ob)) {
299
0
        w_long((long)0, p);
300
0
        return;
301
0
    }
302
303
9
    PyLongExport long_export;
304
305
9
    if (PyLong_Export((PyObject *)ob, &long_export) < 0) {
306
0
        p->depth--;
307
0
        p->error = WFERR_UNMARSHALLABLE;
308
0
        return;
309
0
    }
310
9
    if (!long_export.digits) {
311
5
        int8_t sign = long_export.value < 0 ? -1 : 1;
312
5
        uint64_t abs_value = Py_ABS(long_export.value);
313
5
        uint64_t d = abs_value;
314
5
        long l = 0;
315
316
        /* set l to number of base PyLong_MARSHAL_BASE digits */
317
15
        do {
318
15
            d >>= PyLong_MARSHAL_SHIFT;
319
15
            l += sign;
320
15
        } while (d);
321
5
        w_long(l, p);
322
323
5
        d = abs_value;
324
15
        do {
325
15
            w_short(d & PyLong_MARSHAL_MASK, p);
326
15
            d >>= PyLong_MARSHAL_SHIFT;
327
15
        } while (d);
328
5
        return;
329
5
    }
330
331
4
    const PyLongLayout *layout = PyLong_GetNativeLayout();
332
4
    Py_ssize_t marshal_ratio = layout->bits_per_digit/PyLong_MARSHAL_SHIFT;
333
334
    /* must be a multiple of PyLong_MARSHAL_SHIFT */
335
4
    assert(layout->bits_per_digit % PyLong_MARSHAL_SHIFT == 0);
336
4
    assert(layout->bits_per_digit >= PyLong_MARSHAL_SHIFT);
337
338
    /* other assumptions on PyLongObject internals */
339
4
    assert(layout->bits_per_digit <= 32);
340
4
    assert(layout->digits_order == -1);
341
4
    assert(layout->digit_endianness == (PY_LITTLE_ENDIAN ? -1 : 1));
342
4
    assert(layout->digit_size == 2 || layout->digit_size == 4);
343
344
4
    if (layout->digit_size == 4) {
345
4
        _r_digits32(long_export.digits, long_export.ndigits,
346
4
                    long_export.negative, marshal_ratio, p);
347
4
    }
348
0
    else {
349
0
        _r_digits16(long_export.digits, long_export.ndigits,
350
0
                    long_export.negative, marshal_ratio, p);
351
0
    }
352
4
    PyLong_FreeExport(&long_export);
353
4
}
354
355
static void
356
w_float_bin(double v, WFILE *p)
357
41
{
358
41
    char buf[8];
359
41
    if (PyFloat_Pack8(v, buf, 1) < 0) {
360
0
        p->error = WFERR_UNMARSHALLABLE;
361
0
        return;
362
0
    }
363
41
    w_string(buf, 8, p);
364
41
}
365
366
static void
367
w_float_str(double v, WFILE *p)
368
0
{
369
0
    char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
370
0
    if (!buf) {
371
0
        p->error = WFERR_NOMEMORY;
372
0
        return;
373
0
    }
374
0
    w_short_pstring(buf, strlen(buf), p);
375
0
    PyMem_Free(buf);
376
0
}
377
378
static int
379
w_ref(PyObject *v, char *flag, WFILE *p)
380
135k
{
381
135k
    _Py_hashtable_entry_t *entry;
382
135k
    int w;
383
384
135k
    if (p->version < 3 || p->hashtable == NULL)
385
0
        return 0; /* not writing object references */
386
387
    /* If it has only one reference, it definitely isn't shared.
388
     * But we use TYPE_REF always for interned string, to PYC file stable
389
     * as possible.
390
     */
391
135k
    if (Py_REFCNT(v) == 1 &&
392
135k
            !(PyUnicode_CheckExact(v) && PyUnicode_CHECK_INTERNED(v))) {
393
38.4k
        return 0;
394
38.4k
    }
395
396
97.5k
    entry = _Py_hashtable_get_entry(p->hashtable, v);
397
97.5k
    if (entry != NULL) {
398
        /* write the reference index to the stream */
399
61.7k
        w = (int)(uintptr_t)entry->value;
400
        /* we don't store "long" indices in the dict */
401
61.7k
        assert(0 <= w && w <= 0x7fffffff);
402
61.7k
        w_byte(TYPE_REF, p);
403
61.7k
        w_long(w, p);
404
61.7k
        return 1;
405
61.7k
    } else {
406
35.7k
        size_t s = p->hashtable->nentries;
407
        /* we don't support long indices */
408
35.7k
        if (s >= 0x7fffffff) {
409
0
            PyErr_SetString(PyExc_ValueError, "too many objects");
410
0
            goto err;
411
0
        }
412
35.7k
        w = (int)s;
413
35.7k
        if (_Py_hashtable_set(p->hashtable, Py_NewRef(v),
414
35.7k
                              (void *)(uintptr_t)w) < 0) {
415
0
            Py_DECREF(v);
416
0
            goto err;
417
0
        }
418
35.7k
        *flag |= FLAG_REF;
419
35.7k
        return 0;
420
35.7k
    }
421
0
err:
422
0
    p->error = WFERR_UNMARSHALLABLE;
423
0
    return 1;
424
97.5k
}
425
426
static void
427
w_complex_object(PyObject *v, char flag, WFILE *p);
428
429
static void
430
w_object(PyObject *v, WFILE *p)
431
139k
{
432
139k
    char flag = '\0';
433
434
139k
    p->depth++;
435
436
139k
    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
437
0
        p->error = WFERR_NESTEDTOODEEP;
438
0
    }
439
139k
    else if (v == NULL) {
440
0
        w_byte(TYPE_NULL, p);
441
0
    }
442
139k
    else if (v == Py_None) {
443
3.09k
        w_byte(TYPE_NONE, p);
444
3.09k
    }
445
136k
    else if (v == PyExc_StopIteration) {
446
0
        w_byte(TYPE_STOPITER, p);
447
0
    }
448
136k
    else if (v == Py_Ellipsis) {
449
2
        w_byte(TYPE_ELLIPSIS, p);
450
2
    }
451
136k
    else if (v == Py_False) {
452
477
        w_byte(TYPE_FALSE, p);
453
477
    }
454
136k
    else if (v == Py_True) {
455
322
        w_byte(TYPE_TRUE, p);
456
322
    }
457
135k
    else if (!w_ref(v, &flag, p))
458
74.2k
        w_complex_object(v, flag, p);
459
460
139k
    p->depth--;
461
139k
}
462
463
static void
464
w_complex_object(PyObject *v, char flag, WFILE *p)
465
74.2k
{
466
74.2k
    Py_ssize_t i, n;
467
468
74.2k
    if (PyLong_CheckExact(v)) {
469
3.52k
        int overflow;
470
3.52k
        long x = PyLong_AsLongAndOverflow(v, &overflow);
471
3.52k
        if (overflow) {
472
4
            w_PyLong((PyLongObject *)v, flag, p);
473
4
        }
474
3.52k
        else {
475
3.52k
#if SIZEOF_LONG > 4
476
3.52k
            long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
477
3.52k
            if (y && y != -1) {
478
                /* Too large for TYPE_INT */
479
5
                w_PyLong((PyLongObject*)v, flag, p);
480
5
            }
481
3.51k
            else
482
3.51k
#endif
483
3.51k
            {
484
3.51k
                W_TYPE(TYPE_INT, p);
485
3.51k
                w_long(x, p);
486
3.51k
            }
487
3.52k
        }
488
3.52k
    }
489
70.7k
    else if (PyFloat_CheckExact(v)) {
490
41
        if (p->version > 1) {
491
41
            W_TYPE(TYPE_BINARY_FLOAT, p);
492
41
            w_float_bin(PyFloat_AS_DOUBLE(v), p);
493
41
        }
494
0
        else {
495
0
            W_TYPE(TYPE_FLOAT, p);
496
0
            w_float_str(PyFloat_AS_DOUBLE(v), p);
497
0
        }
498
41
    }
499
70.6k
    else if (PyComplex_CheckExact(v)) {
500
0
        if (p->version > 1) {
501
0
            W_TYPE(TYPE_BINARY_COMPLEX, p);
502
0
            w_float_bin(PyComplex_RealAsDouble(v), p);
503
0
            w_float_bin(PyComplex_ImagAsDouble(v), p);
504
0
        }
505
0
        else {
506
0
            W_TYPE(TYPE_COMPLEX, p);
507
0
            w_float_str(PyComplex_RealAsDouble(v), p);
508
0
            w_float_str(PyComplex_ImagAsDouble(v), p);
509
0
        }
510
0
    }
511
70.6k
    else if (PyBytes_CheckExact(v)) {
512
16.4k
        W_TYPE(TYPE_STRING, p);
513
16.4k
        w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
514
16.4k
    }
515
54.2k
    else if (PyUnicode_CheckExact(v)) {
516
34.9k
        if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
517
33.2k
            int is_short = PyUnicode_GET_LENGTH(v) < 256;
518
33.2k
            if (is_short) {
519
32.7k
                if (PyUnicode_CHECK_INTERNED(v))
520
26.2k
                    W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
521
6.49k
                else
522
6.49k
                    W_TYPE(TYPE_SHORT_ASCII, p);
523
32.7k
                w_short_pstring(PyUnicode_1BYTE_DATA(v),
524
32.7k
                                PyUnicode_GET_LENGTH(v), p);
525
32.7k
            }
526
512
            else {
527
512
                if (PyUnicode_CHECK_INTERNED(v))
528
0
                    W_TYPE(TYPE_ASCII_INTERNED, p);
529
512
                else
530
512
                    W_TYPE(TYPE_ASCII, p);
531
512
                w_pstring(PyUnicode_1BYTE_DATA(v),
532
512
                          PyUnicode_GET_LENGTH(v), p);
533
512
            }
534
33.2k
        }
535
1.68k
        else {
536
1.68k
            PyObject *utf8;
537
1.68k
            utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
538
1.68k
            if (utf8 == NULL) {
539
0
                p->depth--;
540
0
                p->error = WFERR_UNMARSHALLABLE;
541
0
                return;
542
0
            }
543
1.68k
            if (p->version >= 3 &&  PyUnicode_CHECK_INTERNED(v))
544
101
                W_TYPE(TYPE_INTERNED, p);
545
1.58k
            else
546
1.58k
                W_TYPE(TYPE_UNICODE, p);
547
1.68k
            w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
548
1.68k
            Py_DECREF(utf8);
549
1.68k
        }
550
34.9k
    }
551
19.2k
    else if (PyTuple_CheckExact(v)) {
552
13.5k
        n = PyTuple_GET_SIZE(v);
553
13.5k
        if (p->version >= 4 && n < 256) {
554
13.5k
            W_TYPE(TYPE_SMALL_TUPLE, p);
555
13.5k
            w_byte((unsigned char)n, p);
556
13.5k
        }
557
3
        else {
558
3
            W_TYPE(TYPE_TUPLE, p);
559
3
            W_SIZE(n, p);
560
3
        }
561
97.0k
        for (i = 0; i < n; i++) {
562
83.4k
            w_object(PyTuple_GET_ITEM(v, i), p);
563
83.4k
        }
564
13.5k
    }
565
5.73k
    else if (PyList_CheckExact(v)) {
566
0
        W_TYPE(TYPE_LIST, p);
567
0
        n = PyList_GET_SIZE(v);
568
0
        W_SIZE(n, p);
569
0
        for (i = 0; i < n; i++) {
570
0
            w_object(PyList_GET_ITEM(v, i), p);
571
0
        }
572
0
    }
573
5.73k
    else if (PyDict_CheckExact(v)) {
574
0
        Py_ssize_t pos;
575
0
        PyObject *key, *value;
576
0
        W_TYPE(TYPE_DICT, p);
577
        /* This one is NULL object terminated! */
578
0
        pos = 0;
579
0
        while (PyDict_Next(v, &pos, &key, &value)) {
580
0
            w_object(key, p);
581
0
            w_object(value, p);
582
0
        }
583
0
        w_object((PyObject *)NULL, p);
584
0
    }
585
5.73k
    else if (PyAnySet_CheckExact(v)) {
586
26
        PyObject *value;
587
26
        Py_ssize_t pos = 0;
588
26
        Py_hash_t hash;
589
590
26
        if (PyFrozenSet_CheckExact(v))
591
26
            W_TYPE(TYPE_FROZENSET, p);
592
0
        else
593
0
            W_TYPE(TYPE_SET, p);
594
26
        n = PySet_GET_SIZE(v);
595
26
        W_SIZE(n, p);
596
        // bpo-37596: To support reproducible builds, sets and frozensets need
597
        // to have their elements serialized in a consistent order (even when
598
        // they have been scrambled by hash randomization). To ensure this, we
599
        // use an order equivalent to sorted(v, key=marshal.dumps):
600
26
        PyObject *pairs = PyList_New(n);
601
26
        if (pairs == NULL) {
602
0
            p->error = WFERR_NOMEMORY;
603
0
            return;
604
0
        }
605
26
        Py_ssize_t i = 0;
606
26
        Py_BEGIN_CRITICAL_SECTION(v);
607
121
        while (_PySet_NextEntryRef(v, &pos, &value, &hash)) {
608
95
            PyObject *dump = _PyMarshal_WriteObjectToString(value,
609
95
                                    p->version, p->allow_code);
610
95
            if (dump == NULL) {
611
0
                p->error = WFERR_UNMARSHALLABLE;
612
0
                Py_DECREF(value);
613
0
                break;
614
0
            }
615
95
            PyObject *pair = PyTuple_Pack(2, dump, value);
616
95
            Py_DECREF(dump);
617
95
            Py_DECREF(value);
618
95
            if (pair == NULL) {
619
0
                p->error = WFERR_NOMEMORY;
620
0
                break;
621
0
            }
622
95
            PyList_SET_ITEM(pairs, i++, pair);
623
95
        }
624
26
        Py_END_CRITICAL_SECTION();
625
26
        if (p->error == WFERR_UNMARSHALLABLE || p->error == WFERR_NOMEMORY) {
626
0
            Py_DECREF(pairs);
627
0
            return;
628
0
        }
629
26
        assert(i == n);
630
26
        if (PyList_Sort(pairs)) {
631
0
            p->error = WFERR_NOMEMORY;
632
0
            Py_DECREF(pairs);
633
0
            return;
634
0
        }
635
121
        for (Py_ssize_t i = 0; i < n; i++) {
636
95
            PyObject *pair = PyList_GET_ITEM(pairs, i);
637
95
            value = PyTuple_GET_ITEM(pair, 1);
638
95
            w_object(value, p);
639
95
        }
640
26
        Py_DECREF(pairs);
641
26
    }
642
5.70k
    else if (PyCode_Check(v)) {
643
5.56k
        if (!p->allow_code) {
644
0
            p->error = WFERR_CODE_NOT_ALLOWED;
645
0
            return;
646
0
        }
647
5.56k
        PyCodeObject *co = (PyCodeObject *)v;
648
5.56k
        PyObject *co_code = _PyCode_GetCode(co);
649
5.56k
        if (co_code == NULL) {
650
0
            p->error = WFERR_NOMEMORY;
651
0
            return;
652
0
        }
653
5.56k
        W_TYPE(TYPE_CODE, p);
654
5.56k
        w_long(co->co_argcount, p);
655
5.56k
        w_long(co->co_posonlyargcount, p);
656
5.56k
        w_long(co->co_kwonlyargcount, p);
657
5.56k
        w_long(co->co_stacksize, p);
658
5.56k
        w_long(co->co_flags, p);
659
5.56k
        w_object(co_code, p);
660
5.56k
        w_object(co->co_consts, p);
661
5.56k
        w_object(co->co_names, p);
662
5.56k
        w_object(co->co_localsplusnames, p);
663
5.56k
        w_object(co->co_localspluskinds, p);
664
5.56k
        w_object(co->co_filename, p);
665
5.56k
        w_object(co->co_name, p);
666
5.56k
        w_object(co->co_qualname, p);
667
5.56k
        w_long(co->co_firstlineno, p);
668
5.56k
        w_object(co->co_linetable, p);
669
5.56k
        w_object(co->co_exceptiontable, p);
670
5.56k
        Py_DECREF(co_code);
671
5.56k
    }
672
143
    else if (PyObject_CheckBuffer(v)) {
673
        /* Write unknown bytes-like objects as a bytes object */
674
0
        Py_buffer view;
675
0
        if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
676
0
            w_byte(TYPE_UNKNOWN, p);
677
0
            p->depth--;
678
0
            p->error = WFERR_UNMARSHALLABLE;
679
0
            return;
680
0
        }
681
0
        W_TYPE(TYPE_STRING, p);
682
0
        w_pstring(view.buf, view.len, p);
683
0
        PyBuffer_Release(&view);
684
0
    }
685
143
    else if (PySlice_Check(v)) {
686
143
        if (p->version < 5) {
687
0
            w_byte(TYPE_UNKNOWN, p);
688
0
            p->error = WFERR_UNMARSHALLABLE;
689
0
            return;
690
0
        }
691
143
        PySliceObject *slice = (PySliceObject *)v;
692
143
        W_TYPE(TYPE_SLICE, p);
693
143
        w_object(slice->start, p);
694
143
        w_object(slice->stop, p);
695
143
        w_object(slice->step, p);
696
143
    }
697
0
    else {
698
0
        W_TYPE(TYPE_UNKNOWN, p);
699
0
        p->error = WFERR_UNMARSHALLABLE;
700
0
    }
701
74.2k
}
702
703
static void
704
w_decref_entry(void *key)
705
35.7k
{
706
35.7k
    PyObject *entry_key = (PyObject *)key;
707
35.7k
    Py_XDECREF(entry_key);
708
35.7k
}
709
710
static int
711
w_init_refs(WFILE *wf, int version)
712
311
{
713
311
    if (version >= 3) {
714
311
        wf->hashtable = _Py_hashtable_new_full(_Py_hashtable_hash_ptr,
715
311
                                               _Py_hashtable_compare_direct,
716
311
                                               w_decref_entry, NULL, NULL);
717
311
        if (wf->hashtable == NULL) {
718
0
            PyErr_NoMemory();
719
0
            return -1;
720
0
        }
721
311
    }
722
311
    return 0;
723
311
}
724
725
static void
726
w_clear_refs(WFILE *wf)
727
311
{
728
311
    if (wf->hashtable != NULL) {
729
311
        _Py_hashtable_destroy(wf->hashtable);
730
311
    }
731
311
}
732
733
/* version currently has no effect for writing ints. */
734
/* Note that while the documentation states that this function
735
 * can error, currently it never does. Setting an exception in
736
 * this function should be regarded as an API-breaking change.
737
 */
738
void
739
PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
740
0
{
741
0
    char buf[4];
742
0
    WFILE wf;
743
0
    memset(&wf, 0, sizeof(wf));
744
0
    wf.fp = fp;
745
0
    wf.ptr = wf.buf = buf;
746
0
    wf.end = wf.ptr + sizeof(buf);
747
0
    wf.error = WFERR_OK;
748
0
    wf.version = version;
749
0
    w_long(x, &wf);
750
0
    w_flush(&wf);
751
0
}
752
753
void
754
PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
755
0
{
756
0
    char buf[BUFSIZ];
757
0
    WFILE wf;
758
0
    if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
759
0
        return; /* caller must check PyErr_Occurred() */
760
0
    }
761
0
    memset(&wf, 0, sizeof(wf));
762
0
    wf.fp = fp;
763
0
    wf.ptr = wf.buf = buf;
764
0
    wf.end = wf.ptr + sizeof(buf);
765
0
    wf.error = WFERR_OK;
766
0
    wf.version = version;
767
0
    wf.allow_code = 1;
768
0
    if (w_init_refs(&wf, version)) {
769
0
        return; /* caller must check PyErr_Occurred() */
770
0
    }
771
0
    w_object(x, &wf);
772
0
    w_clear_refs(&wf);
773
0
    w_flush(&wf);
774
0
}
775
776
typedef struct {
777
    FILE *fp;
778
    int depth;
779
    PyObject *readable;  /* Stream-like object being read from */
780
    const char *ptr;
781
    const char *end;
782
    char *buf;
783
    Py_ssize_t buf_size;
784
    PyObject *refs;  /* a list */
785
    int allow_code;
786
} RFILE;
787
788
static const char *
789
r_string(Py_ssize_t n, RFILE *p)
790
732k
{
791
732k
    Py_ssize_t read = -1;
792
793
732k
    if (p->ptr != NULL) {
794
        /* Fast path for loads() */
795
732k
        const char *res = p->ptr;
796
732k
        Py_ssize_t left = p->end - p->ptr;
797
732k
        if (left < n) {
798
0
            PyErr_SetString(PyExc_EOFError,
799
0
                            "marshal data too short");
800
0
            return NULL;
801
0
        }
802
732k
        p->ptr += n;
803
732k
        return res;
804
732k
    }
805
0
    if (p->buf == NULL) {
806
0
        p->buf = PyMem_Malloc(n);
807
0
        if (p->buf == NULL) {
808
0
            PyErr_NoMemory();
809
0
            return NULL;
810
0
        }
811
0
        p->buf_size = n;
812
0
    }
813
0
    else if (p->buf_size < n) {
814
0
        char *tmp = PyMem_Realloc(p->buf, n);
815
0
        if (tmp == NULL) {
816
0
            PyErr_NoMemory();
817
0
            return NULL;
818
0
        }
819
0
        p->buf = tmp;
820
0
        p->buf_size = n;
821
0
    }
822
823
0
    if (!p->readable) {
824
0
        assert(p->fp != NULL);
825
0
        read = fread(p->buf, 1, n, p->fp);
826
0
    }
827
0
    else {
828
0
        PyObject *res, *mview;
829
0
        Py_buffer buf;
830
831
0
        if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
832
0
            return NULL;
833
0
        mview = PyMemoryView_FromBuffer(&buf);
834
0
        if (mview == NULL)
835
0
            return NULL;
836
837
0
        res = _PyObject_CallMethod(p->readable, &_Py_ID(readinto), "N", mview);
838
0
        if (res != NULL) {
839
0
            read = PyNumber_AsSsize_t(res, PyExc_ValueError);
840
0
            Py_DECREF(res);
841
0
        }
842
0
    }
843
0
    if (read != n) {
844
0
        if (!PyErr_Occurred()) {
845
0
            if (read > n)
846
0
                PyErr_Format(PyExc_ValueError,
847
0
                             "read() returned too much data: "
848
0
                             "%zd bytes requested, %zd returned",
849
0
                             n, read);
850
0
            else
851
0
                PyErr_SetString(PyExc_EOFError,
852
0
                                "EOF read where not expected");
853
0
        }
854
0
        return NULL;
855
0
    }
856
0
    return p->buf;
857
0
}
858
859
static int
860
r_byte(RFILE *p)
861
805k
{
862
805k
    if (p->ptr != NULL) {
863
805k
        if (p->ptr < p->end) {
864
805k
            return (unsigned char) *p->ptr++;
865
805k
        }
866
805k
    }
867
0
    else if (!p->readable) {
868
0
        assert(p->fp);
869
0
        int c = getc(p->fp);
870
0
        if (c != EOF) {
871
0
            return c;
872
0
        }
873
0
    }
874
0
    else {
875
0
        const char *ptr = r_string(1, p);
876
0
        if (ptr != NULL) {
877
0
            return *(const unsigned char *) ptr;
878
0
        }
879
0
        return EOF;
880
0
    }
881
0
    PyErr_SetString(PyExc_EOFError,
882
0
                    "EOF read where not expected");
883
0
    return EOF;
884
805k
}
885
886
static int
887
r_short(RFILE *p)
888
252
{
889
252
    short x = -1;
890
252
    const unsigned char *buffer;
891
892
252
    buffer = (const unsigned char *) r_string(2, p);
893
252
    if (buffer != NULL) {
894
252
        x = buffer[0];
895
252
        x |= buffer[1] << 8;
896
        /* Sign-extension, in case short greater than 16 bits */
897
252
        x |= -(x & 0x8000);
898
252
    }
899
252
    return x;
900
252
}
901
902
static long
903
r_long(RFILE *p)
904
509k
{
905
509k
    long x = -1;
906
509k
    const unsigned char *buffer;
907
908
509k
    buffer = (const unsigned char *) r_string(4, p);
909
509k
    if (buffer != NULL) {
910
509k
        x = buffer[0];
911
509k
        x |= (long)buffer[1] << 8;
912
509k
        x |= (long)buffer[2] << 16;
913
509k
        x |= (long)buffer[3] << 24;
914
509k
#if SIZEOF_LONG > 4
915
        /* Sign extension for 64-bit machines */
916
509k
        x |= -(x & 0x80000000L);
917
509k
#endif
918
509k
    }
919
509k
    return x;
920
509k
}
921
922
/* r_long64 deals with the TYPE_INT64 code. */
923
static PyObject *
924
r_long64(RFILE *p)
925
0
{
926
0
    const unsigned char *buffer = (const unsigned char *) r_string(8, p);
927
0
    if (buffer == NULL) {
928
0
        return NULL;
929
0
    }
930
0
    return _PyLong_FromByteArray(buffer, 8,
931
0
                                 1 /* little endian */,
932
0
                                 1 /* signed */);
933
0
}
934
935
#define _w_digits(bitsize)                                              \
936
static int                                                              \
937
_w_digits##bitsize(uint ## bitsize ## _t *digits, Py_ssize_t size,      \
938
                   Py_ssize_t marshal_ratio,                            \
939
78
                   int shorts_in_top_digit, RFILE *p)                   \
940
78
{                                                                       \
941
78
    uint ## bitsize ## _t d;                                            \
942
78
                                                                        \
943
78
    assert(size >= 1);                                                  \
944
165
    for (Py_ssize_t i = 0; i < size - 1; i++) {                         \
945
87
        d = 0;                                                          \
946
261
        for (Py_ssize_t j = 0; j < marshal_ratio; j++) {                \
947
174
            int md = r_short(p);                                        \
948
174
            if (md < 0 || md > PyLong_MARSHAL_BASE) {                   \
949
0
                goto bad_digit;                                         \
950
0
            }                                                           \
951
174
            d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT;   \
952
174
        }                                                               \
953
87
        digits[i] = d;                                                  \
954
87
    }                                                                   \
955
78
                                                                        \
956
78
    d = 0;                                                              \
957
156
    for (Py_ssize_t j = 0; j < shorts_in_top_digit; j++) {              \
958
78
        int md = r_short(p);                                            \
959
78
        if (md < 0 || md > PyLong_MARSHAL_BASE) {                       \
960
0
            goto bad_digit;                                             \
961
0
        }                                                               \
962
78
        /* topmost marshal digit should be nonzero */                   \
963
78
        if (md == 0 && j == shorts_in_top_digit - 1) {                  \
964
0
            PyErr_SetString(PyExc_ValueError,                           \
965
0
                "bad marshal data (unnormalized long data)");           \
966
0
            return -1;                                                  \
967
0
        }                                                               \
968
78
        d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT;       \
969
78
    }                                                                   \
970
78
    assert(!PyErr_Occurred());                                          \
971
78
    /* top digit should be nonzero, else the resulting PyLong won't be  \
972
78
       normalized */                                                    \
973
78
    digits[size - 1] = d;                                               \
974
78
    return 0;                                                           \
975
78
                                                                        \
976
0
bad_digit:                                                              \
977
0
    if (!PyErr_Occurred()) {                                            \
978
0
        PyErr_SetString(PyExc_ValueError,                               \
979
0
            "bad marshal data (digit out of range in long)");           \
980
0
    }                                                                   \
981
0
    return -1;                                                          \
982
78
}
marshal.c:_w_digits32
Line
Count
Source
939
78
                   int shorts_in_top_digit, RFILE *p)                   \
940
78
{                                                                       \
941
78
    uint ## bitsize ## _t d;                                            \
942
78
                                                                        \
943
78
    assert(size >= 1);                                                  \
944
165
    for (Py_ssize_t i = 0; i < size - 1; i++) {                         \
945
87
        d = 0;                                                          \
946
261
        for (Py_ssize_t j = 0; j < marshal_ratio; j++) {                \
947
174
            int md = r_short(p);                                        \
948
174
            if (md < 0 || md > PyLong_MARSHAL_BASE) {                   \
949
0
                goto bad_digit;                                         \
950
0
            }                                                           \
951
174
            d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT;   \
952
174
        }                                                               \
953
87
        digits[i] = d;                                                  \
954
87
    }                                                                   \
955
78
                                                                        \
956
78
    d = 0;                                                              \
957
156
    for (Py_ssize_t j = 0; j < shorts_in_top_digit; j++) {              \
958
78
        int md = r_short(p);                                            \
959
78
        if (md < 0 || md > PyLong_MARSHAL_BASE) {                       \
960
0
            goto bad_digit;                                             \
961
0
        }                                                               \
962
78
        /* topmost marshal digit should be nonzero */                   \
963
78
        if (md == 0 && j == shorts_in_top_digit - 1) {                  \
964
0
            PyErr_SetString(PyExc_ValueError,                           \
965
0
                "bad marshal data (unnormalized long data)");           \
966
0
            return -1;                                                  \
967
0
        }                                                               \
968
78
        d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT;       \
969
78
    }                                                                   \
970
78
    assert(!PyErr_Occurred());                                          \
971
78
    /* top digit should be nonzero, else the resulting PyLong won't be  \
972
78
       normalized */                                                    \
973
78
    digits[size - 1] = d;                                               \
974
78
    return 0;                                                           \
975
78
                                                                        \
976
0
bad_digit:                                                              \
977
0
    if (!PyErr_Occurred()) {                                            \
978
0
        PyErr_SetString(PyExc_ValueError,                               \
979
0
            "bad marshal data (digit out of range in long)");           \
980
0
    }                                                                   \
981
0
    return -1;                                                          \
982
78
}
Unexecuted instantiation: marshal.c:_w_digits16
983
_w_digits(32)
984
_w_digits(16)
985
#undef _w_digits
986
987
static PyObject *
988
r_PyLong(RFILE *p)
989
78
{
990
78
    long n = r_long(p);
991
78
    if (n == -1 && PyErr_Occurred()) {
992
0
        return NULL;
993
0
    }
994
78
    if (n < -SIZE32_MAX || n > SIZE32_MAX) {
995
0
        PyErr_SetString(PyExc_ValueError,
996
0
                       "bad marshal data (long size out of range)");
997
0
        return NULL;
998
0
    }
999
1000
78
    const PyLongLayout *layout = PyLong_GetNativeLayout();
1001
78
    Py_ssize_t marshal_ratio = layout->bits_per_digit/PyLong_MARSHAL_SHIFT;
1002
1003
    /* must be a multiple of PyLong_MARSHAL_SHIFT */
1004
78
    assert(layout->bits_per_digit % PyLong_MARSHAL_SHIFT == 0);
1005
78
    assert(layout->bits_per_digit >= PyLong_MARSHAL_SHIFT);
1006
1007
    /* other assumptions on PyLongObject internals */
1008
78
    assert(layout->bits_per_digit <= 32);
1009
78
    assert(layout->digits_order == -1);
1010
78
    assert(layout->digit_endianness == (PY_LITTLE_ENDIAN ? -1 : 1));
1011
78
    assert(layout->digit_size == 2 || layout->digit_size == 4);
1012
1013
78
    Py_ssize_t size = 1 + (Py_ABS(n) - 1) / marshal_ratio;
1014
1015
78
    assert(size >= 1);
1016
1017
78
    int shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % marshal_ratio;
1018
78
    void *digits;
1019
78
    PyLongWriter *writer = PyLongWriter_Create(n < 0, size, &digits);
1020
1021
78
    if (writer == NULL) {
1022
0
        return NULL;
1023
0
    }
1024
1025
78
    int ret;
1026
1027
78
    if (layout->digit_size == 4) {
1028
78
        ret = _w_digits32(digits, size, marshal_ratio, shorts_in_top_digit, p);
1029
78
    }
1030
0
    else {
1031
0
        ret = _w_digits16(digits, size, marshal_ratio, shorts_in_top_digit, p);
1032
0
    }
1033
78
    if (ret < 0) {
1034
0
        PyLongWriter_Discard(writer);
1035
0
        return NULL;
1036
0
    }
1037
78
    return PyLongWriter_Finish(writer);
1038
78
}
1039
1040
static double
1041
r_float_bin(RFILE *p)
1042
41
{
1043
41
    const char *buf = r_string(8, p);
1044
41
    if (buf == NULL)
1045
0
        return -1;
1046
41
    return PyFloat_Unpack8(buf, 1);
1047
41
}
1048
1049
/* Issue #33720: Disable inlining for reducing the C stack consumption
1050
   on PGO builds. */
1051
Py_NO_INLINE static double
1052
r_float_str(RFILE *p)
1053
0
{
1054
0
    int n;
1055
0
    char buf[256];
1056
0
    const char *ptr;
1057
0
    n = r_byte(p);
1058
0
    if (n == EOF) {
1059
0
        return -1;
1060
0
    }
1061
0
    ptr = r_string(n, p);
1062
0
    if (ptr == NULL) {
1063
0
        return -1;
1064
0
    }
1065
0
    memcpy(buf, ptr, n);
1066
0
    buf[n] = '\0';
1067
0
    return PyOS_string_to_double(buf, NULL, NULL);
1068
0
}
1069
1070
/* allocate the reflist index for a new object. Return -1 on failure */
1071
static Py_ssize_t
1072
r_ref_reserve(int flag, RFILE *p)
1073
26.3k
{
1074
26.3k
    if (flag) { /* currently only FLAG_REF is defined */
1075
700
        Py_ssize_t idx = PyList_GET_SIZE(p->refs);
1076
700
        if (idx >= 0x7ffffffe) {
1077
0
            PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
1078
0
            return -1;
1079
0
        }
1080
700
        if (PyList_Append(p->refs, Py_None) < 0)
1081
0
            return -1;
1082
700
        return idx;
1083
700
    } else
1084
25.6k
        return 0;
1085
26.3k
}
1086
1087
/* insert the new object 'o' to the reflist at previously
1088
 * allocated index 'idx'.
1089
 * 'o' can be NULL, in which case nothing is done.
1090
 * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
1091
 * if 'o' was non-NULL, and the function fails, 'o' is released and
1092
 * NULL returned. This simplifies error checking at the call site since
1093
 * a single test for NULL for the function result is enough.
1094
 */
1095
static PyObject *
1096
r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
1097
26.3k
{
1098
26.3k
    if (o != NULL && flag) { /* currently only FLAG_REF is defined */
1099
700
        PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
1100
700
        PyList_SET_ITEM(p->refs, idx, Py_NewRef(o));
1101
700
        Py_DECREF(tmp);
1102
700
    }
1103
26.3k
    return o;
1104
26.3k
}
1105
1106
/* combination of both above, used when an object can be
1107
 * created whenever it is seen in the file, as opposed to
1108
 * after having loaded its sub-objects.
1109
 */
1110
static PyObject *
1111
r_ref(PyObject *o, int flag, RFILE *p)
1112
159k
{
1113
159k
    assert(flag & FLAG_REF);
1114
159k
    if (o == NULL)
1115
0
        return NULL;
1116
159k
    if (PyList_Append(p->refs, o) < 0) {
1117
0
        Py_DECREF(o); /* release the new object */
1118
0
        return NULL;
1119
0
    }
1120
159k
    return o;
1121
159k
}
1122
1123
static PyObject *
1124
r_object(RFILE *p)
1125
603k
{
1126
    /* NULL is a valid return value, it does not necessarily means that
1127
       an exception is set. */
1128
603k
    PyObject *v, *v2;
1129
603k
    Py_ssize_t idx = 0;
1130
603k
    long i, n;
1131
603k
    int type, code = r_byte(p);
1132
603k
    int flag, is_interned = 0;
1133
603k
    PyObject *retval = NULL;
1134
1135
603k
    if (code == EOF) {
1136
0
        if (PyErr_ExceptionMatches(PyExc_EOFError)) {
1137
0
            PyErr_SetString(PyExc_EOFError,
1138
0
                            "EOF read where object expected");
1139
0
        }
1140
0
        return NULL;
1141
0
    }
1142
1143
603k
    p->depth++;
1144
1145
603k
    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
1146
0
        p->depth--;
1147
0
        PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
1148
0
        return NULL;
1149
0
    }
1150
1151
603k
    flag = code & FLAG_REF;
1152
603k
    type = code & ~FLAG_REF;
1153
1154
603k
#define R_REF(O) do{\
1155
291k
    if (flag) \
1156
291k
        O = r_ref(O, flag, p);\
1157
291k
} while (0)
1158
1159
603k
    switch (type) {
1160
1161
0
    case TYPE_NULL:
1162
0
        break;
1163
1164
13.3k
    case TYPE_NONE:
1165
13.3k
        retval = Py_None;
1166
13.3k
        break;
1167
1168
0
    case TYPE_STOPITER:
1169
0
        retval = Py_NewRef(PyExc_StopIteration);
1170
0
        break;
1171
1172
27
    case TYPE_ELLIPSIS:
1173
27
        retval = Py_Ellipsis;
1174
27
        break;
1175
1176
1.98k
    case TYPE_FALSE:
1177
1.98k
        retval = Py_False;
1178
1.98k
        break;
1179
1180
1.53k
    case TYPE_TRUE:
1181
1.53k
        retval = Py_True;
1182
1.53k
        break;
1183
1184
8.70k
    case TYPE_INT:
1185
8.70k
        n = r_long(p);
1186
8.70k
        if (n == -1 && PyErr_Occurred()) {
1187
0
            break;
1188
0
        }
1189
8.70k
        retval = PyLong_FromLong(n);
1190
8.70k
        R_REF(retval);
1191
8.70k
        break;
1192
1193
0
    case TYPE_INT64:
1194
0
        retval = r_long64(p);
1195
0
        R_REF(retval);
1196
0
        break;
1197
1198
78
    case TYPE_LONG:
1199
78
        retval = r_PyLong(p);
1200
78
        R_REF(retval);
1201
78
        break;
1202
1203
0
    case TYPE_FLOAT:
1204
0
        {
1205
0
            double x = r_float_str(p);
1206
0
            if (x == -1.0 && PyErr_Occurred())
1207
0
                break;
1208
0
            retval = PyFloat_FromDouble(x);
1209
0
            R_REF(retval);
1210
0
            break;
1211
0
        }
1212
1213
41
    case TYPE_BINARY_FLOAT:
1214
41
        {
1215
41
            double x = r_float_bin(p);
1216
41
            if (x == -1.0 && PyErr_Occurred())
1217
0
                break;
1218
41
            retval = PyFloat_FromDouble(x);
1219
41
            R_REF(retval);
1220
41
            break;
1221
41
        }
1222
1223
0
    case TYPE_COMPLEX:
1224
0
        {
1225
0
            Py_complex c;
1226
0
            c.real = r_float_str(p);
1227
0
            if (c.real == -1.0 && PyErr_Occurred())
1228
0
                break;
1229
0
            c.imag = r_float_str(p);
1230
0
            if (c.imag == -1.0 && PyErr_Occurred())
1231
0
                break;
1232
0
            retval = PyComplex_FromCComplex(c);
1233
0
            R_REF(retval);
1234
0
            break;
1235
0
        }
1236
1237
0
    case TYPE_BINARY_COMPLEX:
1238
0
        {
1239
0
            Py_complex c;
1240
0
            c.real = r_float_bin(p);
1241
0
            if (c.real == -1.0 && PyErr_Occurred())
1242
0
                break;
1243
0
            c.imag = r_float_bin(p);
1244
0
            if (c.imag == -1.0 && PyErr_Occurred())
1245
0
                break;
1246
0
            retval = PyComplex_FromCComplex(c);
1247
0
            R_REF(retval);
1248
0
            break;
1249
0
        }
1250
1251
76.2k
    case TYPE_STRING:
1252
76.2k
        {
1253
76.2k
            const char *ptr;
1254
76.2k
            n = r_long(p);
1255
76.2k
            if (n < 0 || n > SIZE32_MAX) {
1256
0
                if (!PyErr_Occurred()) {
1257
0
                    PyErr_SetString(PyExc_ValueError,
1258
0
                        "bad marshal data (bytes object size out of range)");
1259
0
                }
1260
0
                break;
1261
0
            }
1262
76.2k
            v = PyBytes_FromStringAndSize((char *)NULL, n);
1263
76.2k
            if (v == NULL)
1264
0
                break;
1265
76.2k
            ptr = r_string(n, p);
1266
76.2k
            if (ptr == NULL) {
1267
0
                Py_DECREF(v);
1268
0
                break;
1269
0
            }
1270
76.2k
            memcpy(PyBytes_AS_STRING(v), ptr, n);
1271
76.2k
            retval = v;
1272
76.2k
            R_REF(retval);
1273
76.2k
            break;
1274
76.2k
        }
1275
1276
0
    case TYPE_ASCII_INTERNED:
1277
0
        is_interned = 1;
1278
0
        _Py_FALLTHROUGH;
1279
2.21k
    case TYPE_ASCII:
1280
2.21k
        n = r_long(p);
1281
2.21k
        if (n < 0 || n > SIZE32_MAX) {
1282
0
            if (!PyErr_Occurred()) {
1283
0
                PyErr_SetString(PyExc_ValueError,
1284
0
                    "bad marshal data (string size out of range)");
1285
0
            }
1286
0
            break;
1287
0
        }
1288
2.21k
        goto _read_ascii;
1289
1290
120k
    case TYPE_SHORT_ASCII_INTERNED:
1291
120k
        is_interned = 1;
1292
120k
        _Py_FALLTHROUGH;
1293
142k
    case TYPE_SHORT_ASCII:
1294
142k
        n = r_byte(p);
1295
142k
        if (n == EOF) {
1296
0
            break;
1297
0
        }
1298
145k
    _read_ascii:
1299
145k
        {
1300
145k
            const char *ptr;
1301
145k
            ptr = r_string(n, p);
1302
145k
            if (ptr == NULL)
1303
0
                break;
1304
145k
            v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1305
145k
            if (v == NULL)
1306
0
                break;
1307
145k
            if (is_interned) {
1308
                // marshal is meant to serialize .pyc files with code
1309
                // objects, and code-related strings are currently immortal.
1310
120k
                PyInterpreterState *interp = _PyInterpreterState_GET();
1311
120k
                _PyUnicode_InternImmortal(interp, &v);
1312
120k
            }
1313
145k
            retval = v;
1314
145k
            R_REF(retval);
1315
145k
            break;
1316
145k
        }
1317
1318
101
    case TYPE_INTERNED:
1319
101
        is_interned = 1;
1320
101
        _Py_FALLTHROUGH;
1321
1.74k
    case TYPE_UNICODE:
1322
1.74k
        {
1323
1.74k
        const char *buffer;
1324
1325
1.74k
        n = r_long(p);
1326
1.74k
        if (n < 0 || n > SIZE32_MAX) {
1327
0
            if (!PyErr_Occurred()) {
1328
0
                PyErr_SetString(PyExc_ValueError,
1329
0
                    "bad marshal data (string size out of range)");
1330
0
            }
1331
0
            break;
1332
0
        }
1333
1.74k
        if (n != 0) {
1334
1.74k
            buffer = r_string(n, p);
1335
1.74k
            if (buffer == NULL)
1336
0
                break;
1337
1.74k
            v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1338
1.74k
        }
1339
0
        else {
1340
0
            v = Py_GetConstant(Py_CONSTANT_EMPTY_STR);
1341
0
        }
1342
1.74k
        if (v == NULL)
1343
0
            break;
1344
1.74k
        if (is_interned) {
1345
            // marshal is meant to serialize .pyc files with code
1346
            // objects, and code-related strings are currently immortal.
1347
101
            PyInterpreterState *interp = _PyInterpreterState_GET();
1348
101
            _PyUnicode_InternImmortal(interp, &v);
1349
101
        }
1350
1.74k
        retval = v;
1351
1.74k
        R_REF(retval);
1352
1.74k
        break;
1353
1.74k
        }
1354
1355
59.5k
    case TYPE_SMALL_TUPLE:
1356
59.5k
        n = r_byte(p);
1357
59.5k
        if (n == EOF) {
1358
0
            break;
1359
0
        }
1360
59.5k
        goto _read_tuple;
1361
59.5k
    case TYPE_TUPLE:
1362
17
        n = r_long(p);
1363
17
        if (n < 0 || n > SIZE32_MAX) {
1364
0
            if (!PyErr_Occurred()) {
1365
0
                PyErr_SetString(PyExc_ValueError,
1366
0
                    "bad marshal data (tuple size out of range)");
1367
0
            }
1368
0
            break;
1369
0
        }
1370
59.5k
    _read_tuple:
1371
59.5k
        v = PyTuple_New(n);
1372
59.5k
        R_REF(v);
1373
59.5k
        if (v == NULL)
1374
0
            break;
1375
1376
407k
        for (i = 0; i < n; i++) {
1377
347k
            v2 = r_object(p);
1378
347k
            if ( v2 == NULL ) {
1379
0
                if (!PyErr_Occurred())
1380
0
                    PyErr_SetString(PyExc_TypeError,
1381
0
                        "NULL object in marshal data for tuple");
1382
0
                Py_SETREF(v, NULL);
1383
0
                break;
1384
0
            }
1385
347k
            PyTuple_SET_ITEM(v, i, v2);
1386
347k
        }
1387
59.5k
        retval = v;
1388
59.5k
        break;
1389
1390
0
    case TYPE_LIST:
1391
0
        n = r_long(p);
1392
0
        if (n < 0 || n > SIZE32_MAX) {
1393
0
            if (!PyErr_Occurred()) {
1394
0
                PyErr_SetString(PyExc_ValueError,
1395
0
                    "bad marshal data (list size out of range)");
1396
0
            }
1397
0
            break;
1398
0
        }
1399
0
        v = PyList_New(n);
1400
0
        R_REF(v);
1401
0
        if (v == NULL)
1402
0
            break;
1403
0
        for (i = 0; i < n; i++) {
1404
0
            v2 = r_object(p);
1405
0
            if ( v2 == NULL ) {
1406
0
                if (!PyErr_Occurred())
1407
0
                    PyErr_SetString(PyExc_TypeError,
1408
0
                        "NULL object in marshal data for list");
1409
0
                Py_SETREF(v, NULL);
1410
0
                break;
1411
0
            }
1412
0
            PyList_SET_ITEM(v, i, v2);
1413
0
        }
1414
0
        retval = v;
1415
0
        break;
1416
1417
0
    case TYPE_DICT:
1418
0
        v = PyDict_New();
1419
0
        R_REF(v);
1420
0
        if (v == NULL)
1421
0
            break;
1422
0
        for (;;) {
1423
0
            PyObject *key, *val;
1424
0
            key = r_object(p);
1425
0
            if (key == NULL)
1426
0
                break;
1427
0
            val = r_object(p);
1428
0
            if (val == NULL) {
1429
0
                Py_DECREF(key);
1430
0
                break;
1431
0
            }
1432
0
            if (PyDict_SetItem(v, key, val) < 0) {
1433
0
                Py_DECREF(key);
1434
0
                Py_DECREF(val);
1435
0
                break;
1436
0
            }
1437
0
            Py_DECREF(key);
1438
0
            Py_DECREF(val);
1439
0
        }
1440
0
        if (PyErr_Occurred()) {
1441
0
            Py_SETREF(v, NULL);
1442
0
        }
1443
0
        retval = v;
1444
0
        break;
1445
1446
0
    case TYPE_SET:
1447
124
    case TYPE_FROZENSET:
1448
124
        n = r_long(p);
1449
124
        if (n < 0 || n > SIZE32_MAX) {
1450
0
            if (!PyErr_Occurred()) {
1451
0
                PyErr_SetString(PyExc_ValueError,
1452
0
                    "bad marshal data (set size out of range)");
1453
0
            }
1454
0
            break;
1455
0
        }
1456
1457
124
        if (n == 0 && type == TYPE_FROZENSET) {
1458
            /* call frozenset() to get the empty frozenset singleton */
1459
0
            v = _PyObject_CallNoArgs((PyObject*)&PyFrozenSet_Type);
1460
0
            if (v == NULL)
1461
0
                break;
1462
0
            R_REF(v);
1463
0
            retval = v;
1464
0
        }
1465
124
        else {
1466
124
            v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1467
124
            if (type == TYPE_SET) {
1468
0
                R_REF(v);
1469
124
            } else {
1470
                /* must use delayed registration of frozensets because they must
1471
                 * be init with a refcount of 1
1472
                 */
1473
124
                idx = r_ref_reserve(flag, p);
1474
124
                if (idx < 0)
1475
0
                    Py_CLEAR(v); /* signal error */
1476
124
            }
1477
124
            if (v == NULL)
1478
0
                break;
1479
1480
591
            for (i = 0; i < n; i++) {
1481
467
                v2 = r_object(p);
1482
467
                if ( v2 == NULL ) {
1483
0
                    if (!PyErr_Occurred())
1484
0
                        PyErr_SetString(PyExc_TypeError,
1485
0
                            "NULL object in marshal data for set");
1486
0
                    Py_SETREF(v, NULL);
1487
0
                    break;
1488
0
                }
1489
467
                if (PySet_Add(v, v2) == -1) {
1490
0
                    Py_DECREF(v);
1491
0
                    Py_DECREF(v2);
1492
0
                    v = NULL;
1493
0
                    break;
1494
0
                }
1495
467
                Py_DECREF(v2);
1496
467
            }
1497
124
            if (type != TYPE_SET)
1498
124
                v = r_ref_insert(v, idx, flag, p);
1499
124
            retval = v;
1500
124
        }
1501
124
        break;
1502
1503
25.1k
    case TYPE_CODE:
1504
25.1k
        {
1505
25.1k
            int argcount;
1506
25.1k
            int posonlyargcount;
1507
25.1k
            int kwonlyargcount;
1508
25.1k
            int stacksize;
1509
25.1k
            int flags;
1510
25.1k
            PyObject *code = NULL;
1511
25.1k
            PyObject *consts = NULL;
1512
25.1k
            PyObject *names = NULL;
1513
25.1k
            PyObject *localsplusnames = NULL;
1514
25.1k
            PyObject *localspluskinds = NULL;
1515
25.1k
            PyObject *filename = NULL;
1516
25.1k
            PyObject *name = NULL;
1517
25.1k
            PyObject *qualname = NULL;
1518
25.1k
            int firstlineno;
1519
25.1k
            PyObject* linetable = NULL;
1520
25.1k
            PyObject *exceptiontable = NULL;
1521
1522
25.1k
            if (!p->allow_code) {
1523
0
                PyErr_SetString(PyExc_ValueError,
1524
0
                                "unmarshalling code objects is disallowed");
1525
0
                break;
1526
0
            }
1527
25.1k
            idx = r_ref_reserve(flag, p);
1528
25.1k
            if (idx < 0)
1529
0
                break;
1530
1531
25.1k
            v = NULL;
1532
1533
            /* XXX ignore long->int overflows for now */
1534
25.1k
            argcount = (int)r_long(p);
1535
25.1k
            if (argcount == -1 && PyErr_Occurred())
1536
0
                goto code_error;
1537
25.1k
            posonlyargcount = (int)r_long(p);
1538
25.1k
            if (posonlyargcount == -1 && PyErr_Occurred()) {
1539
0
                goto code_error;
1540
0
            }
1541
25.1k
            kwonlyargcount = (int)r_long(p);
1542
25.1k
            if (kwonlyargcount == -1 && PyErr_Occurred())
1543
0
                goto code_error;
1544
25.1k
            stacksize = (int)r_long(p);
1545
25.1k
            if (stacksize == -1 && PyErr_Occurred())
1546
0
                goto code_error;
1547
25.1k
            flags = (int)r_long(p);
1548
25.1k
            if (flags == -1 && PyErr_Occurred())
1549
0
                goto code_error;
1550
25.1k
            code = r_object(p);
1551
25.1k
            if (code == NULL)
1552
0
                goto code_error;
1553
25.1k
            consts = r_object(p);
1554
25.1k
            if (consts == NULL)
1555
0
                goto code_error;
1556
25.1k
            names = r_object(p);
1557
25.1k
            if (names == NULL)
1558
0
                goto code_error;
1559
25.1k
            localsplusnames = r_object(p);
1560
25.1k
            if (localsplusnames == NULL)
1561
0
                goto code_error;
1562
25.1k
            localspluskinds = r_object(p);
1563
25.1k
            if (localspluskinds == NULL)
1564
0
                goto code_error;
1565
25.1k
            filename = r_object(p);
1566
25.1k
            if (filename == NULL)
1567
0
                goto code_error;
1568
25.1k
            name = r_object(p);
1569
25.1k
            if (name == NULL)
1570
0
                goto code_error;
1571
25.1k
            qualname = r_object(p);
1572
25.1k
            if (qualname == NULL)
1573
0
                goto code_error;
1574
25.1k
            firstlineno = (int)r_long(p);
1575
25.1k
            if (firstlineno == -1 && PyErr_Occurred())
1576
0
                break;
1577
25.1k
            linetable = r_object(p);
1578
25.1k
            if (linetable == NULL)
1579
0
                goto code_error;
1580
25.1k
            exceptiontable = r_object(p);
1581
25.1k
            if (exceptiontable == NULL)
1582
0
                goto code_error;
1583
1584
25.1k
            struct _PyCodeConstructor con = {
1585
25.1k
                .filename = filename,
1586
25.1k
                .name = name,
1587
25.1k
                .qualname = qualname,
1588
25.1k
                .flags = flags,
1589
1590
25.1k
                .code = code,
1591
25.1k
                .firstlineno = firstlineno,
1592
25.1k
                .linetable = linetable,
1593
1594
25.1k
                .consts = consts,
1595
25.1k
                .names = names,
1596
1597
25.1k
                .localsplusnames = localsplusnames,
1598
25.1k
                .localspluskinds = localspluskinds,
1599
1600
25.1k
                .argcount = argcount,
1601
25.1k
                .posonlyargcount = posonlyargcount,
1602
25.1k
                .kwonlyargcount = kwonlyargcount,
1603
1604
25.1k
                .stacksize = stacksize,
1605
1606
25.1k
                .exceptiontable = exceptiontable,
1607
25.1k
            };
1608
1609
25.1k
            if (_PyCode_Validate(&con) < 0) {
1610
0
                goto code_error;
1611
0
            }
1612
1613
25.1k
            v = (PyObject *)_PyCode_New(&con);
1614
25.1k
            if (v == NULL) {
1615
0
                goto code_error;
1616
0
            }
1617
1618
25.1k
            v = r_ref_insert(v, idx, flag, p);
1619
1620
25.1k
          code_error:
1621
25.1k
            if (v == NULL && !PyErr_Occurred()) {
1622
0
                PyErr_SetString(PyExc_TypeError,
1623
0
                    "NULL object in marshal data for code object");
1624
0
            }
1625
25.1k
            Py_XDECREF(code);
1626
25.1k
            Py_XDECREF(consts);
1627
25.1k
            Py_XDECREF(names);
1628
25.1k
            Py_XDECREF(localsplusnames);
1629
25.1k
            Py_XDECREF(localspluskinds);
1630
25.1k
            Py_XDECREF(filename);
1631
25.1k
            Py_XDECREF(name);
1632
25.1k
            Py_XDECREF(qualname);
1633
25.1k
            Py_XDECREF(linetable);
1634
25.1k
            Py_XDECREF(exceptiontable);
1635
25.1k
        }
1636
0
        retval = v;
1637
25.1k
        break;
1638
1639
268k
    case TYPE_REF:
1640
268k
        n = r_long(p);
1641
268k
        if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1642
0
            if (!PyErr_Occurred()) {
1643
0
                PyErr_SetString(PyExc_ValueError,
1644
0
                    "bad marshal data (invalid reference)");
1645
0
            }
1646
0
            break;
1647
0
        }
1648
268k
        v = PyList_GET_ITEM(p->refs, n);
1649
268k
        if (v == Py_None) {
1650
0
            PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1651
0
            break;
1652
0
        }
1653
268k
        retval = Py_NewRef(v);
1654
268k
        break;
1655
1656
1.06k
    case TYPE_SLICE:
1657
1.06k
    {
1658
1.06k
        Py_ssize_t idx = r_ref_reserve(flag, p);
1659
1.06k
        if (idx < 0) {
1660
0
            break;
1661
0
        }
1662
1.06k
        PyObject *stop = NULL;
1663
1.06k
        PyObject *step = NULL;
1664
1.06k
        PyObject *start = r_object(p);
1665
1.06k
        if (start == NULL) {
1666
0
            goto cleanup;
1667
0
        }
1668
1.06k
        stop = r_object(p);
1669
1.06k
        if (stop == NULL) {
1670
0
            goto cleanup;
1671
0
        }
1672
1.06k
        step = r_object(p);
1673
1.06k
        if (step == NULL) {
1674
0
            goto cleanup;
1675
0
        }
1676
1.06k
        retval = PySlice_New(start, stop, step);
1677
1.06k
        r_ref_insert(retval, idx, flag, p);
1678
1.06k
    cleanup:
1679
1.06k
        Py_XDECREF(start);
1680
1.06k
        Py_XDECREF(stop);
1681
1.06k
        Py_XDECREF(step);
1682
1.06k
        break;
1683
1.06k
    }
1684
1685
0
    default:
1686
        /* Bogus data got written, which isn't ideal.
1687
           This will let you keep working and recover. */
1688
0
        PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1689
0
        break;
1690
1691
603k
    }
1692
603k
    p->depth--;
1693
603k
    return retval;
1694
603k
}
1695
1696
static PyObject *
1697
read_object(RFILE *p)
1698
728
{
1699
728
    PyObject *v;
1700
728
    if (PyErr_Occurred()) {
1701
0
        fprintf(stderr, "XXX readobject called with exception set\n");
1702
0
        return NULL;
1703
0
    }
1704
728
    if (p->ptr && p->end) {
1705
728
        if (PySys_Audit("marshal.loads", "y#", p->ptr, (Py_ssize_t)(p->end - p->ptr)) < 0) {
1706
0
            return NULL;
1707
0
        }
1708
728
    } else if (p->fp || p->readable) {
1709
0
        if (PySys_Audit("marshal.load", NULL) < 0) {
1710
0
            return NULL;
1711
0
        }
1712
0
    }
1713
728
    v = r_object(p);
1714
728
    if (v == NULL && !PyErr_Occurred())
1715
0
        PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1716
728
    return v;
1717
728
}
1718
1719
int
1720
PyMarshal_ReadShortFromFile(FILE *fp)
1721
0
{
1722
0
    RFILE rf;
1723
0
    int res;
1724
0
    assert(fp);
1725
0
    rf.readable = NULL;
1726
0
    rf.fp = fp;
1727
0
    rf.end = rf.ptr = NULL;
1728
0
    rf.buf = NULL;
1729
0
    res = r_short(&rf);
1730
0
    if (rf.buf != NULL)
1731
0
        PyMem_Free(rf.buf);
1732
0
    return res;
1733
0
}
1734
1735
long
1736
PyMarshal_ReadLongFromFile(FILE *fp)
1737
0
{
1738
0
    RFILE rf;
1739
0
    long res;
1740
0
    rf.fp = fp;
1741
0
    rf.readable = NULL;
1742
0
    rf.ptr = rf.end = NULL;
1743
0
    rf.buf = NULL;
1744
0
    res = r_long(&rf);
1745
0
    if (rf.buf != NULL)
1746
0
        PyMem_Free(rf.buf);
1747
0
    return res;
1748
0
}
1749
1750
/* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1751
static off_t
1752
getfilesize(FILE *fp)
1753
0
{
1754
0
    struct _Py_stat_struct st;
1755
0
    if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1756
0
        return -1;
1757
#if SIZEOF_OFF_T == 4
1758
    else if (st.st_size >= INT_MAX)
1759
        return (off_t)INT_MAX;
1760
#endif
1761
0
    else
1762
0
        return (off_t)st.st_size;
1763
0
}
1764
1765
/* If we can get the size of the file up-front, and it's reasonably small,
1766
 * read it in one gulp and delegate to ...FromString() instead.  Much quicker
1767
 * than reading a byte at a time from file; speeds .pyc imports.
1768
 * CAUTION:  since this may read the entire remainder of the file, don't
1769
 * call it unless you know you're done with the file.
1770
 */
1771
PyObject *
1772
PyMarshal_ReadLastObjectFromFile(FILE *fp)
1773
0
{
1774
/* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1775
0
#define REASONABLE_FILE_LIMIT (1L << 18)
1776
0
    off_t filesize;
1777
0
    filesize = getfilesize(fp);
1778
0
    if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1779
0
        char* pBuf = (char *)PyMem_Malloc(filesize);
1780
0
        if (pBuf != NULL) {
1781
0
            size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1782
0
            PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1783
0
            PyMem_Free(pBuf);
1784
0
            return v;
1785
0
        }
1786
1787
0
    }
1788
    /* We don't have fstat, or we do but the file is larger than
1789
     * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1790
     */
1791
0
    return PyMarshal_ReadObjectFromFile(fp);
1792
1793
0
#undef REASONABLE_FILE_LIMIT
1794
0
}
1795
1796
PyObject *
1797
PyMarshal_ReadObjectFromFile(FILE *fp)
1798
0
{
1799
0
    RFILE rf;
1800
0
    PyObject *result;
1801
0
    rf.allow_code = 1;
1802
0
    rf.fp = fp;
1803
0
    rf.readable = NULL;
1804
0
    rf.depth = 0;
1805
0
    rf.ptr = rf.end = NULL;
1806
0
    rf.buf = NULL;
1807
0
    rf.refs = PyList_New(0);
1808
0
    if (rf.refs == NULL)
1809
0
        return NULL;
1810
0
    result = read_object(&rf);
1811
0
    Py_DECREF(rf.refs);
1812
0
    if (rf.buf != NULL)
1813
0
        PyMem_Free(rf.buf);
1814
0
    return result;
1815
0
}
1816
1817
PyObject *
1818
PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1819
218
{
1820
218
    RFILE rf;
1821
218
    PyObject *result;
1822
218
    rf.allow_code = 1;
1823
218
    rf.fp = NULL;
1824
218
    rf.readable = NULL;
1825
218
    rf.ptr = str;
1826
218
    rf.end = str + len;
1827
218
    rf.buf = NULL;
1828
218
    rf.depth = 0;
1829
218
    rf.refs = PyList_New(0);
1830
218
    if (rf.refs == NULL)
1831
0
        return NULL;
1832
218
    result = read_object(&rf);
1833
218
    Py_DECREF(rf.refs);
1834
218
    if (rf.buf != NULL)
1835
0
        PyMem_Free(rf.buf);
1836
218
    return result;
1837
218
}
1838
1839
static PyObject *
1840
_PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code)
1841
311
{
1842
311
    WFILE wf;
1843
1844
311
    if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
1845
0
        return NULL;
1846
0
    }
1847
311
    memset(&wf, 0, sizeof(wf));
1848
311
    wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1849
311
    if (wf.str == NULL)
1850
0
        return NULL;
1851
311
    wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str);
1852
311
    wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str);
1853
311
    wf.error = WFERR_OK;
1854
311
    wf.version = version;
1855
311
    wf.allow_code = allow_code;
1856
311
    if (w_init_refs(&wf, version)) {
1857
0
        Py_DECREF(wf.str);
1858
0
        return NULL;
1859
0
    }
1860
311
    w_object(x, &wf);
1861
311
    w_clear_refs(&wf);
1862
311
    if (wf.str != NULL) {
1863
311
        const char *base = PyBytes_AS_STRING(wf.str);
1864
311
        if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1865
0
            return NULL;
1866
311
    }
1867
311
    if (wf.error != WFERR_OK) {
1868
0
        Py_XDECREF(wf.str);
1869
0
        switch (wf.error) {
1870
0
        case WFERR_NOMEMORY:
1871
0
            PyErr_NoMemory();
1872
0
            break;
1873
0
        case WFERR_NESTEDTOODEEP:
1874
0
            PyErr_SetString(PyExc_ValueError,
1875
0
                            "object too deeply nested to marshal");
1876
0
            break;
1877
0
        case WFERR_CODE_NOT_ALLOWED:
1878
0
            PyErr_SetString(PyExc_ValueError,
1879
0
                            "marshalling code objects is disallowed");
1880
0
            break;
1881
0
        default:
1882
0
        case WFERR_UNMARSHALLABLE:
1883
0
            PyErr_SetString(PyExc_ValueError,
1884
0
                            "unmarshallable object");
1885
0
            break;
1886
0
        }
1887
0
        return NULL;
1888
0
    }
1889
311
    return wf.str;
1890
311
}
1891
1892
PyObject *
1893
PyMarshal_WriteObjectToString(PyObject *x, int version)
1894
0
{
1895
0
    return _PyMarshal_WriteObjectToString(x, version, 1);
1896
0
}
1897
1898
/* And an interface for Python programs... */
1899
/*[clinic input]
1900
marshal.dump
1901
1902
    value: object
1903
        Must be a supported type.
1904
    file: object
1905
        Must be a writeable binary file.
1906
    version: int(c_default="Py_MARSHAL_VERSION") = version
1907
        Indicates the data format that dump should use.
1908
    /
1909
    *
1910
    allow_code: bool = True
1911
        Allow to write code objects.
1912
1913
Write the value on the open file.
1914
1915
If the value has (or contains an object that has) an unsupported type, a
1916
ValueError exception is raised - but garbage data will also be written
1917
to the file. The object will not be properly read back by load().
1918
[clinic start generated code]*/
1919
1920
static PyObject *
1921
marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1922
                  int version, int allow_code)
1923
/*[clinic end generated code: output=429e5fd61c2196b9 input=041f7f6669b0aafb]*/
1924
0
{
1925
    /* XXX Quick hack -- need to do this differently */
1926
0
    PyObject *s;
1927
0
    PyObject *res;
1928
1929
0
    s = _PyMarshal_WriteObjectToString(value, version, allow_code);
1930
0
    if (s == NULL)
1931
0
        return NULL;
1932
0
    res = PyObject_CallMethodOneArg(file, &_Py_ID(write), s);
1933
0
    Py_DECREF(s);
1934
0
    return res;
1935
0
}
1936
1937
/*[clinic input]
1938
marshal.load
1939
1940
    file: object
1941
        Must be readable binary file.
1942
    /
1943
    *
1944
    allow_code: bool = True
1945
        Allow to load code objects.
1946
1947
Read one value from the open file and return it.
1948
1949
If no valid value is read (e.g. because the data has a different Python
1950
version's incompatible marshal format), raise EOFError, ValueError or
1951
TypeError.
1952
1953
Note: If an object containing an unsupported type was marshalled with
1954
dump(), load() will substitute None for the unmarshallable type.
1955
[clinic start generated code]*/
1956
1957
static PyObject *
1958
marshal_load_impl(PyObject *module, PyObject *file, int allow_code)
1959
/*[clinic end generated code: output=0c1aaf3546ae3ed3 input=2dca7b570653b82f]*/
1960
0
{
1961
0
    PyObject *data, *result;
1962
0
    RFILE rf;
1963
1964
    /*
1965
     * Make a call to the read method, but read zero bytes.
1966
     * This is to ensure that the object passed in at least
1967
     * has a read method which returns bytes.
1968
     * This can be removed if we guarantee good error handling
1969
     * for r_string()
1970
     */
1971
0
    data = _PyObject_CallMethod(file, &_Py_ID(read), "i", 0);
1972
0
    if (data == NULL)
1973
0
        return NULL;
1974
0
    if (!PyBytes_Check(data)) {
1975
0
        PyErr_Format(PyExc_TypeError,
1976
0
                     "file.read() returned not bytes but %.100s",
1977
0
                     Py_TYPE(data)->tp_name);
1978
0
        result = NULL;
1979
0
    }
1980
0
    else {
1981
0
        rf.allow_code = allow_code;
1982
0
        rf.depth = 0;
1983
0
        rf.fp = NULL;
1984
0
        rf.readable = file;
1985
0
        rf.ptr = rf.end = NULL;
1986
0
        rf.buf = NULL;
1987
0
        if ((rf.refs = PyList_New(0)) != NULL) {
1988
0
            result = read_object(&rf);
1989
0
            Py_DECREF(rf.refs);
1990
0
            if (rf.buf != NULL)
1991
0
                PyMem_Free(rf.buf);
1992
0
        } else
1993
0
            result = NULL;
1994
0
    }
1995
0
    Py_DECREF(data);
1996
0
    return result;
1997
0
}
1998
1999
/*[clinic input]
2000
marshal.dumps
2001
2002
    value: object
2003
        Must be a supported type.
2004
    version: int(c_default="Py_MARSHAL_VERSION") = version
2005
        Indicates the data format that dumps should use.
2006
    /
2007
    *
2008
    allow_code: bool = True
2009
        Allow to write code objects.
2010
2011
Return the bytes object that would be written to a file by dump(value, file).
2012
2013
Raise a ValueError exception if value has (or contains an object that has) an
2014
unsupported type.
2015
[clinic start generated code]*/
2016
2017
static PyObject *
2018
marshal_dumps_impl(PyObject *module, PyObject *value, int version,
2019
                   int allow_code)
2020
/*[clinic end generated code: output=115f90da518d1d49 input=167eaecceb63f0a8]*/
2021
216
{
2022
216
    return _PyMarshal_WriteObjectToString(value, version, allow_code);
2023
216
}
2024
2025
/*[clinic input]
2026
marshal.loads
2027
2028
    bytes: Py_buffer
2029
    /
2030
    *
2031
    allow_code: bool = True
2032
        Allow to load code objects.
2033
2034
Convert the bytes-like object to a value.
2035
2036
If no valid value is found, raise EOFError, ValueError or TypeError.  Extra
2037
bytes in the input are ignored.
2038
[clinic start generated code]*/
2039
2040
static PyObject *
2041
marshal_loads_impl(PyObject *module, Py_buffer *bytes, int allow_code)
2042
/*[clinic end generated code: output=62c0c538d3edc31f input=14de68965b45aaa7]*/
2043
510
{
2044
510
    RFILE rf;
2045
510
    char *s = bytes->buf;
2046
510
    Py_ssize_t n = bytes->len;
2047
510
    PyObject* result;
2048
510
    rf.allow_code = allow_code;
2049
510
    rf.fp = NULL;
2050
510
    rf.readable = NULL;
2051
510
    rf.ptr = s;
2052
510
    rf.end = s + n;
2053
510
    rf.depth = 0;
2054
510
    if ((rf.refs = PyList_New(0)) == NULL)
2055
0
        return NULL;
2056
510
    result = read_object(&rf);
2057
510
    Py_DECREF(rf.refs);
2058
510
    return result;
2059
510
}
2060
2061
static PyMethodDef marshal_methods[] = {
2062
    MARSHAL_DUMP_METHODDEF
2063
    MARSHAL_LOAD_METHODDEF
2064
    MARSHAL_DUMPS_METHODDEF
2065
    MARSHAL_LOADS_METHODDEF
2066
    {NULL,              NULL}           /* sentinel */
2067
};
2068
2069
2070
PyDoc_STRVAR(module_doc,
2071
"This module contains functions that can read and write Python values in\n\
2072
a binary format. The format is specific to Python, but independent of\n\
2073
machine architecture issues.\n\
2074
\n\
2075
Not all Python object types are supported; in general, only objects\n\
2076
whose value is independent from a particular invocation of Python can be\n\
2077
written and read by this module. The following types are supported:\n\
2078
None, integers, floating-point numbers, strings, bytes, bytearrays,\n\
2079
tuples, lists, sets, dictionaries, and code objects, where it\n\
2080
should be understood that tuples, lists and dictionaries are only\n\
2081
supported as long as the values contained therein are themselves\n\
2082
supported; and recursive lists and dictionaries should not be written\n\
2083
(they will cause infinite loops).\n\
2084
\n\
2085
Variables:\n\
2086
\n\
2087
version -- indicates the format that the module uses. Version 0 is the\n\
2088
    historical format, version 1 shares interned strings and version 2\n\
2089
    uses a binary format for floating-point numbers.\n\
2090
    Version 3 shares common object references (New in version 3.4).\n\
2091
\n\
2092
Functions:\n\
2093
\n\
2094
dump() -- write value to a file\n\
2095
load() -- read value from a file\n\
2096
dumps() -- marshal value as a bytes object\n\
2097
loads() -- read value from a bytes-like object");
2098
2099
2100
static int
2101
marshal_module_exec(PyObject *mod)
2102
16
{
2103
16
    if (PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION) < 0) {
2104
0
        return -1;
2105
0
    }
2106
16
    return 0;
2107
16
}
2108
2109
static PyModuleDef_Slot marshalmodule_slots[] = {
2110
    {Py_mod_exec, marshal_module_exec},
2111
    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
2112
    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
2113
    {0, NULL}
2114
};
2115
2116
static struct PyModuleDef marshalmodule = {
2117
    PyModuleDef_HEAD_INIT,
2118
    .m_name = "marshal",
2119
    .m_doc = module_doc,
2120
    .m_methods = marshal_methods,
2121
    .m_slots = marshalmodule_slots,
2122
};
2123
2124
PyMODINIT_FUNC
2125
PyMarshal_Init(void)
2126
16
{
2127
16
    return PyModuleDef_Init(&marshalmodule);
2128
16
}