Coverage Report

Created: 2025-07-11 06:59

/src/Python-3.8.3/Parser/parsetok.c
Line
Count
Source (jump to first uncovered line)
1
2
/* Parser-tokenizer link implementation */
3
4
#include "Python.h"
5
#include "tokenizer.h"
6
#include "node.h"
7
#include "grammar.h"
8
#include "parser.h"
9
#include "parsetok.h"
10
#include "errcode.h"
11
#include "graminit.h"
12
13
14
/* Forward */
15
static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
16
static int initerr(perrdetail *err_ret, PyObject * filename);
17
18
typedef struct {
19
    struct {
20
        int lineno;
21
        char *comment;
22
    } *items;
23
    size_t size;
24
    size_t num_items;
25
} growable_comment_array;
26
27
static int
28
16
growable_comment_array_init(growable_comment_array *arr, size_t initial_size) {
29
16
    assert(initial_size > 0);
30
16
    arr->items = malloc(initial_size * sizeof(*arr->items));
31
16
    arr->size = initial_size;
32
16
    arr->num_items = 0;
33
34
16
    return arr->items != NULL;
35
16
}
36
37
static int
38
0
growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) {
39
0
    if (arr->num_items >= arr->size) {
40
0
        arr->size *= 2;
41
0
        arr->items = realloc(arr->items, arr->size * sizeof(*arr->items));
42
0
        if (!arr->items) {
43
0
            return 0;
44
0
        }
45
0
    }
46
47
0
    arr->items[arr->num_items].lineno = lineno;
48
0
    arr->items[arr->num_items].comment = comment;
49
0
    arr->num_items++;
50
0
    return 1;
51
0
}
52
53
static void
54
16
growable_comment_array_deallocate(growable_comment_array *arr) {
55
16
    for (unsigned i = 0; i < arr->num_items; i++) {
56
0
        PyObject_FREE(arr->items[i].comment);
57
0
    }
58
16
    free(arr->items);
59
16
}
60
61
/* Parse input coming from a string.  Return error code, print some errors. */
62
node *
63
PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
64
0
{
65
0
    return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
66
0
}
67
68
node *
69
PyParser_ParseStringFlags(const char *s, grammar *g, int start,
70
                          perrdetail *err_ret, int flags)
71
0
{
72
0
    return PyParser_ParseStringFlagsFilename(s, NULL,
73
0
                                             g, start, err_ret, flags);
74
0
}
75
76
node *
77
PyParser_ParseStringFlagsFilename(const char *s, const char *filename,
78
                          grammar *g, int start,
79
                          perrdetail *err_ret, int flags)
80
0
{
81
0
    int iflags = flags;
82
0
    return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start,
83
0
                                               err_ret, &iflags);
84
0
}
85
86
node *
87
PyParser_ParseStringObject(const char *s, PyObject *filename,
88
                           grammar *g, int start,
89
                           perrdetail *err_ret, int *flags)
90
16
{
91
16
    struct tok_state *tok;
92
16
    int exec_input = start == file_input;
93
94
16
    if (initerr(err_ret, filename) < 0)
95
0
        return NULL;
96
97
16
    if (PySys_Audit("compile", "yO", s, err_ret->filename) < 0) {
98
0
        err_ret->error = E_ERROR;
99
0
        return NULL;
100
0
    }
101
102
16
    if (*flags & PyPARSE_IGNORE_COOKIE)
103
2
        tok = PyTokenizer_FromUTF8(s, exec_input);
104
14
    else
105
14
        tok = PyTokenizer_FromString(s, exec_input);
106
16
    if (tok == NULL) {
107
0
        err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
108
0
        return NULL;
109
0
    }
110
16
    if (*flags & PyPARSE_TYPE_COMMENTS) {
111
0
        tok->type_comments = 1;
112
0
    }
113
114
16
    Py_INCREF(err_ret->filename);
115
16
    tok->filename = err_ret->filename;
116
16
    if (*flags & PyPARSE_ASYNC_HACKS)
117
0
        tok->async_hacks = 1;
118
16
    return parsetok(tok, g, start, err_ret, flags);
119
16
}
120
121
node *
122
PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename_str,
123
                          grammar *g, int start,
124
                          perrdetail *err_ret, int *flags)
125
0
{
126
0
    node *n;
127
0
    PyObject *filename = NULL;
128
0
    if (filename_str != NULL) {
129
0
        filename = PyUnicode_DecodeFSDefault(filename_str);
130
0
        if (filename == NULL) {
131
0
            err_ret->error = E_ERROR;
132
0
            return NULL;
133
0
        }
134
0
    }
135
0
    n = PyParser_ParseStringObject(s, filename, g, start, err_ret, flags);
136
0
    Py_XDECREF(filename);
137
0
    return n;
138
0
}
139
140
/* Parse input coming from a file.  Return error code, print some errors. */
141
142
node *
143
PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
144
                   const char *ps1, const char *ps2,
145
                   perrdetail *err_ret)
146
0
{
147
0
    return PyParser_ParseFileFlags(fp, filename, NULL,
148
0
                                   g, start, ps1, ps2, err_ret, 0);
149
0
}
150
151
node *
152
PyParser_ParseFileFlags(FILE *fp, const char *filename, const char *enc,
153
                        grammar *g, int start,
154
                        const char *ps1, const char *ps2,
155
                        perrdetail *err_ret, int flags)
156
0
{
157
0
    int iflags = flags;
158
0
    return PyParser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1,
159
0
                                     ps2, err_ret, &iflags);
160
0
}
161
162
node *
163
PyParser_ParseFileObject(FILE *fp, PyObject *filename,
164
                         const char *enc, grammar *g, int start,
165
                         const char *ps1, const char *ps2,
166
                         perrdetail *err_ret, int *flags)
167
0
{
168
0
    struct tok_state *tok;
169
170
0
    if (initerr(err_ret, filename) < 0)
171
0
        return NULL;
172
173
0
    if (PySys_Audit("compile", "OO", Py_None, err_ret->filename) < 0) {
174
0
        return NULL;
175
0
    }
176
177
0
    if ((tok = PyTokenizer_FromFile(fp, enc, ps1, ps2)) == NULL) {
178
0
        err_ret->error = E_NOMEM;
179
0
        return NULL;
180
0
    }
181
0
    if (*flags & PyPARSE_TYPE_COMMENTS) {
182
0
        tok->type_comments = 1;
183
0
    }
184
0
    Py_INCREF(err_ret->filename);
185
0
    tok->filename = err_ret->filename;
186
0
    return parsetok(tok, g, start, err_ret, flags);
187
0
}
188
189
node *
190
PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
191
                          const char *enc, grammar *g, int start,
192
                          const char *ps1, const char *ps2,
193
                          perrdetail *err_ret, int *flags)
194
0
{
195
0
    node *n;
196
0
    PyObject *fileobj = NULL;
197
0
    if (filename != NULL) {
198
0
        fileobj = PyUnicode_DecodeFSDefault(filename);
199
0
        if (fileobj == NULL) {
200
0
            err_ret->error = E_ERROR;
201
0
            return NULL;
202
0
        }
203
0
    }
204
0
    n = PyParser_ParseFileObject(fp, fileobj, enc, g,
205
0
                                 start, ps1, ps2, err_ret, flags);
206
0
    Py_XDECREF(fileobj);
207
0
    return n;
208
0
}
209
210
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
211
#if 0
212
static const char with_msg[] =
213
"%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";
214
215
static const char as_msg[] =
216
"%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";
217
218
static void
219
warn(const char *msg, const char *filename, int lineno)
220
{
221
    if (filename == NULL)
222
        filename = "<string>";
223
    PySys_WriteStderr(msg, filename, lineno);
224
}
225
#endif
226
#endif
227
228
/* Parse input coming from the given tokenizer structure.
229
   Return error code. */
230
231
static node *
232
parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
233
         int *flags)
234
16
{
235
16
    parser_state *ps;
236
16
    node *n;
237
16
    int started = 0;
238
16
    int col_offset, end_col_offset;
239
16
    growable_comment_array type_ignores;
240
241
16
    if (!growable_comment_array_init(&type_ignores, 10)) {
242
0
        err_ret->error = E_NOMEM;
243
0
        PyTokenizer_Free(tok);
244
0
        return NULL;
245
0
    }
246
247
16
    if ((ps = PyParser_New(g, start)) == NULL) {
248
0
        err_ret->error = E_NOMEM;
249
0
        growable_comment_array_deallocate(&type_ignores);
250
0
        PyTokenizer_Free(tok);
251
0
        return NULL;
252
0
    }
253
16
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
254
16
    if (*flags & PyPARSE_BARRY_AS_BDFL)
255
0
        ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
256
16
    if (*flags & PyPARSE_TYPE_COMMENTS)
257
0
        ps->p_flags |= PyCF_TYPE_COMMENTS;
258
16
#endif
259
260
1.41k
    for (;;) {
261
1.41k
        char *a, *b;
262
1.41k
        int type;
263
1.41k
        size_t len;
264
1.41k
        char *str;
265
1.41k
        col_offset = -1;
266
1.41k
        int lineno;
267
1.41k
        const char *line_start;
268
269
1.41k
        type = PyTokenizer_Get(tok, &a, &b);
270
1.41k
        if (type == ERRORTOKEN) {
271
0
            err_ret->error = tok->done;
272
0
            break;
273
0
        }
274
1.41k
        if (type == ENDMARKER && started) {
275
16
            type = NEWLINE; /* Add an extra newline */
276
16
            started = 0;
277
            /* Add the right number of dedent tokens,
278
               except if a certain flag is given --
279
               codeop.py uses this. */
280
16
            if (tok->indent &&
281
16
                !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
282
0
            {
283
0
                tok->pendin = -tok->indent;
284
0
                tok->indent = 0;
285
0
            }
286
16
        }
287
1.39k
        else
288
1.39k
            started = 1;
289
1.41k
        len = (a != NULL && b != NULL) ? b - a : 0;
290
1.41k
        str = (char *) PyObject_MALLOC(len + 1);
291
1.41k
        if (str == NULL) {
292
0
            err_ret->error = E_NOMEM;
293
0
            break;
294
0
        }
295
1.41k
        if (len > 0)
296
1.10k
            strncpy(str, a, len);
297
1.41k
        str[len] = '\0';
298
299
1.41k
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
300
1.41k
        if (type == NOTEQUAL) {
301
6
            if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
302
6
                            strcmp(str, "!=")) {
303
0
                PyObject_FREE(str);
304
0
                err_ret->error = E_SYNTAX;
305
0
                break;
306
0
            }
307
6
            else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
308
6
                            strcmp(str, "<>")) {
309
0
                PyObject_FREE(str);
310
0
                err_ret->expected = NOTEQUAL;
311
0
                err_ret->error = E_SYNTAX;
312
0
                break;
313
0
            }
314
6
        }
315
1.41k
#endif
316
317
        /* Nodes of type STRING, especially multi line strings
318
           must be handled differently in order to get both
319
           the starting line number and the column offset right.
320
           (cf. issue 16806) */
321
1.41k
        lineno = type == STRING ? tok->first_lineno : tok->lineno;
322
1.41k
        line_start = type == STRING ? tok->multi_line_start : tok->line_start;
323
1.41k
        if (a != NULL && a >= line_start) {
324
1.26k
            col_offset = Py_SAFE_DOWNCAST(a - line_start,
325
1.26k
                                          intptr_t, int);
326
1.26k
        }
327
148
        else {
328
148
            col_offset = -1;
329
148
        }
330
331
1.41k
        if (b != NULL && b >= tok->line_start) {
332
1.26k
            end_col_offset = Py_SAFE_DOWNCAST(b - tok->line_start,
333
1.26k
                                              intptr_t, int);
334
1.26k
        }
335
148
        else {
336
148
            end_col_offset = -1;
337
148
        }
338
339
1.41k
        if (type == TYPE_IGNORE) {
340
0
            if (!growable_comment_array_add(&type_ignores, tok->lineno, str)) {
341
0
                err_ret->error = E_NOMEM;
342
0
                break;
343
0
            }
344
0
            continue;
345
0
        }
346
347
1.41k
        if ((err_ret->error =
348
1.41k
             PyParser_AddToken(ps, (int)type, str,
349
1.41k
                               lineno, col_offset, tok->lineno, end_col_offset,
350
1.41k
                               &(err_ret->expected))) != E_OK) {
351
16
            if (err_ret->error != E_DONE) {
352
0
                PyObject_FREE(str);
353
0
                err_ret->token = type;
354
0
            }
355
16
            break;
356
16
        }
357
1.41k
    }
358
359
16
    if (err_ret->error == E_DONE) {
360
16
        n = ps->p_tree;
361
16
        ps->p_tree = NULL;
362
363
16
        if (n->n_type == file_input) {
364
            /* Put type_ignore nodes in the ENDMARKER of file_input. */
365
16
            int num;
366
16
            node *ch;
367
16
            size_t i;
368
369
16
            num = NCH(n);
370
16
            ch = CHILD(n, num - 1);
371
16
            REQ(ch, ENDMARKER);
372
373
16
            for (i = 0; i < type_ignores.num_items; i++) {
374
0
                int res = PyNode_AddChild(ch, TYPE_IGNORE, type_ignores.items[i].comment,
375
0
                                          type_ignores.items[i].lineno, 0,
376
0
                                          type_ignores.items[i].lineno, 0);
377
0
                if (res != 0) {
378
0
                    err_ret->error = res;
379
0
                    PyNode_Free(n);
380
0
                    n = NULL;
381
0
                    break;
382
0
                }
383
0
                type_ignores.items[i].comment = NULL;
384
0
            }
385
16
        }
386
387
        /* Check that the source for a single input statement really
388
           is a single statement by looking at what is left in the
389
           buffer after parsing.  Trailing whitespace and comments
390
           are OK.  */
391
16
        if (err_ret->error == E_DONE && start == single_input) {
392
0
            char *cur = tok->cur;
393
0
            char c = *tok->cur;
394
395
0
            for (;;) {
396
0
                while (c == ' ' || c == '\t' || c == '\n' || c == '\014')
397
0
                    c = *++cur;
398
399
0
                if (!c)
400
0
                    break;
401
402
0
                if (c != '#') {
403
0
                    err_ret->error = E_BADSINGLE;
404
0
                    PyNode_Free(n);
405
0
                    n = NULL;
406
0
                    break;
407
0
                }
408
409
                /* Suck up comment. */
410
0
                while (c && c != '\n')
411
0
                    c = *++cur;
412
0
            }
413
0
        }
414
16
    }
415
0
    else
416
0
        n = NULL;
417
418
16
    growable_comment_array_deallocate(&type_ignores);
419
420
16
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
421
16
    *flags = ps->p_flags;
422
16
#endif
423
16
    PyParser_Delete(ps);
424
425
16
    if (n == NULL) {
426
0
        if (tok->done == E_EOF)
427
0
            err_ret->error = E_EOF;
428
0
        err_ret->lineno = tok->lineno;
429
0
        if (tok->buf != NULL) {
430
0
            size_t len;
431
0
            assert(tok->cur - tok->buf < INT_MAX);
432
            /* if we've managed to parse a token, point the offset to its start,
433
             * else use the current reading position of the tokenizer
434
             */
435
0
            err_ret->offset = col_offset != -1 ? col_offset + 1 : ((int)(tok->cur - tok->buf));
436
0
            len = tok->inp - tok->buf;
437
0
            err_ret->text = (char *) PyObject_MALLOC(len + 1);
438
0
            if (err_ret->text != NULL) {
439
0
                if (len > 0)
440
0
                    strncpy(err_ret->text, tok->buf, len);
441
0
                err_ret->text[len] = '\0';
442
0
            }
443
0
        }
444
16
    } else if (tok->encoding != NULL) {
445
        /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
446
         * allocated using PyMem_
447
         */
448
2
        node* r = PyNode_New(encoding_decl);
449
2
        if (r)
450
2
            r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
451
2
        if (!r || !r->n_str) {
452
0
            err_ret->error = E_NOMEM;
453
0
            if (r)
454
0
                PyObject_FREE(r);
455
0
            n = NULL;
456
0
            goto done;
457
0
        }
458
2
        strcpy(r->n_str, tok->encoding);
459
2
        PyMem_FREE(tok->encoding);
460
2
        tok->encoding = NULL;
461
2
        r->n_nchildren = 1;
462
2
        r->n_child = n;
463
2
        n = r;
464
2
    }
465
466
16
done:
467
16
    PyTokenizer_Free(tok);
468
469
16
    if (n != NULL) {
470
16
        _PyNode_FinalizeEndPos(n);
471
16
    }
472
16
    return n;
473
16
}
474
475
static int
476
initerr(perrdetail *err_ret, PyObject *filename)
477
16
{
478
16
    err_ret->error = E_OK;
479
16
    err_ret->lineno = 0;
480
16
    err_ret->offset = 0;
481
16
    err_ret->text = NULL;
482
16
    err_ret->token = -1;
483
16
    err_ret->expected = -1;
484
16
    if (filename) {
485
16
        Py_INCREF(filename);
486
16
        err_ret->filename = filename;
487
16
    }
488
0
    else {
489
0
        err_ret->filename = PyUnicode_FromString("<string>");
490
0
        if (err_ret->filename == NULL) {
491
0
            err_ret->error = E_ERROR;
492
0
            return -1;
493
0
        }
494
0
    }
495
16
    return 0;
496
16
}