Coverage Report

Created: 2026-06-14 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/Python-3.8.3/Parser/parsetok.c
Line
Count
Source
1
2
/* Parser-tokenizer link implementation */
3
4
#include "Python.h"
5
#include "tokenizer.h"
6
#include "node.h"
7
#include "grammar.h"
8
#include "parser.h"
9
#include "parsetok.h"
10
#include "errcode.h"
11
#include "graminit.h"
12
13
14
/* Forward */
15
static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
16
static int initerr(perrdetail *err_ret, PyObject * filename);
17
18
typedef struct {
19
    struct {
20
        int lineno;
21
        char *comment;
22
    } *items;
23
    size_t size;
24
    size_t num_items;
25
} growable_comment_array;
26
27
static int
28
15
growable_comment_array_init(growable_comment_array *arr, size_t initial_size) {
29
15
    assert(initial_size > 0);
30
15
    arr->items = malloc(initial_size * sizeof(*arr->items));
31
15
    arr->size = initial_size;
32
15
    arr->num_items = 0;
33
34
15
    return arr->items != NULL;
35
15
}
36
37
static int
38
0
growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) {
39
0
    if (arr->num_items >= arr->size) {
40
0
        arr->size *= 2;
41
0
        arr->items = realloc(arr->items, arr->size * sizeof(*arr->items));
42
0
        if (!arr->items) {
43
0
            return 0;
44
0
        }
45
0
    }
46
47
0
    arr->items[arr->num_items].lineno = lineno;
48
0
    arr->items[arr->num_items].comment = comment;
49
0
    arr->num_items++;
50
0
    return 1;
51
0
}
52
53
static void
54
15
growable_comment_array_deallocate(growable_comment_array *arr) {
55
15
    for (unsigned i = 0; i < arr->num_items; i++) {
56
0
        PyObject_FREE(arr->items[i].comment);
57
0
    }
58
15
    free(arr->items);
59
15
}
60
61
/* Parse input coming from a string.  Return error code, print some errors. */
62
node *
63
PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
64
0
{
65
0
    return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
66
0
}
67
68
node *
69
PyParser_ParseStringFlags(const char *s, grammar *g, int start,
70
                          perrdetail *err_ret, int flags)
71
0
{
72
0
    return PyParser_ParseStringFlagsFilename(s, NULL,
73
0
                                             g, start, err_ret, flags);
74
0
}
75
76
node *
77
PyParser_ParseStringFlagsFilename(const char *s, const char *filename,
78
                          grammar *g, int start,
79
                          perrdetail *err_ret, int flags)
80
0
{
81
0
    int iflags = flags;
82
0
    return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start,
83
0
                                               err_ret, &iflags);
84
0
}
85
86
node *
87
PyParser_ParseStringObject(const char *s, PyObject *filename,
88
                           grammar *g, int start,
89
                           perrdetail *err_ret, int *flags)
90
15
{
91
15
    struct tok_state *tok;
92
15
    int exec_input = start == file_input;
93
94
15
    if (initerr(err_ret, filename) < 0)
95
0
        return NULL;
96
97
15
    if (PySys_Audit("compile", "yO", s, err_ret->filename) < 0) {
98
0
        err_ret->error = E_ERROR;
99
0
        return NULL;
100
0
    }
101
102
15
    if (*flags & PyPARSE_IGNORE_COOKIE)
103
2
        tok = PyTokenizer_FromUTF8(s, exec_input);
104
13
    else
105
13
        tok = PyTokenizer_FromString(s, exec_input);
106
15
    if (tok == NULL) {
107
0
        err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
108
0
        return NULL;
109
0
    }
110
15
    if (*flags & PyPARSE_TYPE_COMMENTS) {
111
0
        tok->type_comments = 1;
112
0
    }
113
114
15
    Py_INCREF(err_ret->filename);
115
15
    tok->filename = err_ret->filename;
116
15
    if (*flags & PyPARSE_ASYNC_HACKS)
117
0
        tok->async_hacks = 1;
118
15
    return parsetok(tok, g, start, err_ret, flags);
119
15
}
120
121
node *
122
PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename_str,
123
                          grammar *g, int start,
124
                          perrdetail *err_ret, int *flags)
125
0
{
126
0
    node *n;
127
0
    PyObject *filename = NULL;
128
0
    if (filename_str != NULL) {
129
0
        filename = PyUnicode_DecodeFSDefault(filename_str);
130
0
        if (filename == NULL) {
131
0
            err_ret->error = E_ERROR;
132
0
            return NULL;
133
0
        }
134
0
    }
135
0
    n = PyParser_ParseStringObject(s, filename, g, start, err_ret, flags);
136
0
    Py_XDECREF(filename);
137
0
    return n;
138
0
}
139
140
/* Parse input coming from a file.  Return error code, print some errors. */
141
142
node *
143
PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
144
                   const char *ps1, const char *ps2,
145
                   perrdetail *err_ret)
146
0
{
147
0
    return PyParser_ParseFileFlags(fp, filename, NULL,
148
0
                                   g, start, ps1, ps2, err_ret, 0);
149
0
}
150
151
node *
152
PyParser_ParseFileFlags(FILE *fp, const char *filename, const char *enc,
153
                        grammar *g, int start,
154
                        const char *ps1, const char *ps2,
155
                        perrdetail *err_ret, int flags)
156
0
{
157
0
    int iflags = flags;
158
0
    return PyParser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1,
159
0
                                     ps2, err_ret, &iflags);
160
0
}
161
162
node *
163
PyParser_ParseFileObject(FILE *fp, PyObject *filename,
164
                         const char *enc, grammar *g, int start,
165
                         const char *ps1, const char *ps2,
166
                         perrdetail *err_ret, int *flags)
167
0
{
168
0
    struct tok_state *tok;
169
170
0
    if (initerr(err_ret, filename) < 0)
171
0
        return NULL;
172
173
0
    if (PySys_Audit("compile", "OO", Py_None, err_ret->filename) < 0) {
174
0
        return NULL;
175
0
    }
176
177
0
    if ((tok = PyTokenizer_FromFile(fp, enc, ps1, ps2)) == NULL) {
178
0
        err_ret->error = E_NOMEM;
179
0
        return NULL;
180
0
    }
181
0
    if (*flags & PyPARSE_TYPE_COMMENTS) {
182
0
        tok->type_comments = 1;
183
0
    }
184
0
    Py_INCREF(err_ret->filename);
185
0
    tok->filename = err_ret->filename;
186
0
    return parsetok(tok, g, start, err_ret, flags);
187
0
}
188
189
node *
190
PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
191
                          const char *enc, grammar *g, int start,
192
                          const char *ps1, const char *ps2,
193
                          perrdetail *err_ret, int *flags)
194
0
{
195
0
    node *n;
196
0
    PyObject *fileobj = NULL;
197
0
    if (filename != NULL) {
198
0
        fileobj = PyUnicode_DecodeFSDefault(filename);
199
0
        if (fileobj == NULL) {
200
0
            err_ret->error = E_ERROR;
201
0
            return NULL;
202
0
        }
203
0
    }
204
0
    n = PyParser_ParseFileObject(fp, fileobj, enc, g,
205
0
                                 start, ps1, ps2, err_ret, flags);
206
0
    Py_XDECREF(fileobj);
207
0
    return n;
208
0
}
209
210
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
211
#if 0
212
static const char with_msg[] =
213
"%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";
214
215
static const char as_msg[] =
216
"%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";
217
218
static void
219
warn(const char *msg, const char *filename, int lineno)
220
{
221
    if (filename == NULL)
222
        filename = "<string>";
223
    PySys_WriteStderr(msg, filename, lineno);
224
}
225
#endif
226
#endif
227
228
/* Parse input coming from the given tokenizer structure.
229
   Return error code. */
230
231
static node *
232
parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
233
         int *flags)
234
15
{
235
15
    parser_state *ps;
236
15
    node *n;
237
15
    int started = 0;
238
15
    int col_offset, end_col_offset;
239
15
    growable_comment_array type_ignores;
240
241
15
    if (!growable_comment_array_init(&type_ignores, 10)) {
242
0
        err_ret->error = E_NOMEM;
243
0
        PyTokenizer_Free(tok);
244
0
        return NULL;
245
0
    }
246
247
15
    if ((ps = PyParser_New(g, start)) == NULL) {
248
0
        err_ret->error = E_NOMEM;
249
0
        growable_comment_array_deallocate(&type_ignores);
250
0
        PyTokenizer_Free(tok);
251
0
        return NULL;
252
0
    }
253
15
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
254
15
    if (*flags & PyPARSE_BARRY_AS_BDFL)
255
0
        ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
256
15
    if (*flags & PyPARSE_TYPE_COMMENTS)
257
0
        ps->p_flags |= PyCF_TYPE_COMMENTS;
258
15
#endif
259
260
1.38k
    for (;;) {
261
1.38k
        char *a, *b;
262
1.38k
        int type;
263
1.38k
        size_t len;
264
1.38k
        char *str;
265
1.38k
        col_offset = -1;
266
1.38k
        int lineno;
267
1.38k
        const char *line_start;
268
269
1.38k
        type = PyTokenizer_Get(tok, &a, &b);
270
1.38k
        if (type == ERRORTOKEN) {
271
0
            err_ret->error = tok->done;
272
0
            break;
273
0
        }
274
1.38k
        if (type == ENDMARKER && started) {
275
15
            type = NEWLINE; /* Add an extra newline */
276
15
            started = 0;
277
            /* Add the right number of dedent tokens,
278
               except if a certain flag is given --
279
               codeop.py uses this. */
280
15
            if (tok->indent &&
281
0
                !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
282
0
            {
283
0
                tok->pendin = -tok->indent;
284
0
                tok->indent = 0;
285
0
            }
286
15
        }
287
1.37k
        else
288
1.37k
            started = 1;
289
1.38k
        len = (a != NULL && b != NULL) ? b - a : 0;
290
1.38k
        str = (char *) PyObject_MALLOC(len + 1);
291
1.38k
        if (str == NULL) {
292
0
            err_ret->error = E_NOMEM;
293
0
            break;
294
0
        }
295
1.38k
        if (len > 0)
296
1.09k
            strncpy(str, a, len);
297
1.38k
        str[len] = '\0';
298
299
1.38k
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
300
1.38k
        if (type == NOTEQUAL) {
301
6
            if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
302
6
                            strcmp(str, "!=")) {
303
0
                PyObject_FREE(str);
304
0
                err_ret->error = E_SYNTAX;
305
0
                break;
306
0
            }
307
6
            else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
308
0
                            strcmp(str, "<>")) {
309
0
                PyObject_FREE(str);
310
0
                err_ret->expected = NOTEQUAL;
311
0
                err_ret->error = E_SYNTAX;
312
0
                break;
313
0
            }
314
6
        }
315
1.38k
#endif
316
317
        /* Nodes of type STRING, especially multi line strings
318
           must be handled differently in order to get both
319
           the starting line number and the column offset right.
320
           (cf. issue 16806) */
321
1.38k
        lineno = type == STRING ? tok->first_lineno : tok->lineno;
322
1.38k
        line_start = type == STRING ? tok->multi_line_start : tok->line_start;
323
1.38k
        if (a != NULL && a >= line_start) {
324
1.24k
            col_offset = Py_SAFE_DOWNCAST(a - line_start,
325
1.24k
                                          intptr_t, int);
326
1.24k
        }
327
142
        else {
328
142
            col_offset = -1;
329
142
        }
330
331
1.38k
        if (b != NULL && b >= tok->line_start) {
332
1.24k
            end_col_offset = Py_SAFE_DOWNCAST(b - tok->line_start,
333
1.24k
                                              intptr_t, int);
334
1.24k
        }
335
142
        else {
336
142
            end_col_offset = -1;
337
142
        }
338
339
1.38k
        if (type == TYPE_IGNORE) {
340
0
            if (!growable_comment_array_add(&type_ignores, tok->lineno, str)) {
341
0
                err_ret->error = E_NOMEM;
342
0
                break;
343
0
            }
344
0
            continue;
345
0
        }
346
347
1.38k
        if ((err_ret->error =
348
1.38k
             PyParser_AddToken(ps, (int)type, str,
349
1.38k
                               lineno, col_offset, tok->lineno, end_col_offset,
350
1.38k
                               &(err_ret->expected))) != E_OK) {
351
15
            if (err_ret->error != E_DONE) {
352
0
                PyObject_FREE(str);
353
0
                err_ret->token = type;
354
0
            }
355
15
            break;
356
15
        }
357
1.38k
    }
358
359
15
    if (err_ret->error == E_DONE) {
360
15
        n = ps->p_tree;
361
15
        ps->p_tree = NULL;
362
363
15
        if (n->n_type == file_input) {
364
            /* Put type_ignore nodes in the ENDMARKER of file_input. */
365
15
            int num;
366
15
            node *ch;
367
15
            size_t i;
368
369
15
            num = NCH(n);
370
15
            ch = CHILD(n, num - 1);
371
15
            REQ(ch, ENDMARKER);
372
373
15
            for (i = 0; i < type_ignores.num_items; i++) {
374
0
                int res = PyNode_AddChild(ch, TYPE_IGNORE, type_ignores.items[i].comment,
375
0
                                          type_ignores.items[i].lineno, 0,
376
0
                                          type_ignores.items[i].lineno, 0);
377
0
                if (res != 0) {
378
0
                    err_ret->error = res;
379
0
                    PyNode_Free(n);
380
0
                    n = NULL;
381
0
                    break;
382
0
                }
383
0
                type_ignores.items[i].comment = NULL;
384
0
            }
385
15
        }
386
387
        /* Check that the source for a single input statement really
388
           is a single statement by looking at what is left in the
389
           buffer after parsing.  Trailing whitespace and comments
390
           are OK.  */
391
15
        if (err_ret->error == E_DONE && start == single_input) {
392
0
            char *cur = tok->cur;
393
0
            char c = *tok->cur;
394
395
0
            for (;;) {
396
0
                while (c == ' ' || c == '\t' || c == '\n' || c == '\014')
397
0
                    c = *++cur;
398
399
0
                if (!c)
400
0
                    break;
401
402
0
                if (c != '#') {
403
0
                    err_ret->error = E_BADSINGLE;
404
0
                    PyNode_Free(n);
405
0
                    n = NULL;
406
0
                    break;
407
0
                }
408
409
                /* Suck up comment. */
410
0
                while (c && c != '\n')
411
0
                    c = *++cur;
412
0
            }
413
0
        }
414
15
    }
415
0
    else
416
0
        n = NULL;
417
418
15
    growable_comment_array_deallocate(&type_ignores);
419
420
15
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
421
15
    *flags = ps->p_flags;
422
15
#endif
423
15
    PyParser_Delete(ps);
424
425
15
    if (n == NULL) {
426
0
        if (tok->done == E_EOF)
427
0
            err_ret->error = E_EOF;
428
0
        err_ret->lineno = tok->lineno;
429
0
        if (tok->buf != NULL) {
430
0
            size_t len;
431
0
            assert(tok->cur - tok->buf < INT_MAX);
432
            /* if we've managed to parse a token, point the offset to its start,
433
             * else use the current reading position of the tokenizer
434
             */
435
0
            err_ret->offset = col_offset != -1 ? col_offset + 1 : ((int)(tok->cur - tok->buf));
436
0
            len = tok->inp - tok->buf;
437
0
            err_ret->text = (char *) PyObject_MALLOC(len + 1);
438
0
            if (err_ret->text != NULL) {
439
0
                if (len > 0)
440
0
                    strncpy(err_ret->text, tok->buf, len);
441
0
                err_ret->text[len] = '\0';
442
0
            }
443
0
        }
444
15
    } else if (tok->encoding != NULL) {
445
        /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
446
         * allocated using PyMem_
447
         */
448
2
        node* r = PyNode_New(encoding_decl);
449
2
        if (r)
450
2
            r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
451
2
        if (!r || !r->n_str) {
452
0
            err_ret->error = E_NOMEM;
453
0
            if (r)
454
0
                PyObject_FREE(r);
455
0
            n = NULL;
456
0
            goto done;
457
0
        }
458
2
        strcpy(r->n_str, tok->encoding);
459
2
        PyMem_FREE(tok->encoding);
460
2
        tok->encoding = NULL;
461
2
        r->n_nchildren = 1;
462
2
        r->n_child = n;
463
2
        n = r;
464
2
    }
465
466
15
done:
467
15
    PyTokenizer_Free(tok);
468
469
15
    if (n != NULL) {
470
15
        _PyNode_FinalizeEndPos(n);
471
15
    }
472
15
    return n;
473
15
}
474
475
static int
476
initerr(perrdetail *err_ret, PyObject *filename)
477
15
{
478
15
    err_ret->error = E_OK;
479
15
    err_ret->lineno = 0;
480
15
    err_ret->offset = 0;
481
15
    err_ret->text = NULL;
482
15
    err_ret->token = -1;
483
15
    err_ret->expected = -1;
484
15
    if (filename) {
485
15
        Py_INCREF(filename);
486
15
        err_ret->filename = filename;
487
15
    }
488
0
    else {
489
0
        err_ret->filename = PyUnicode_FromString("<string>");
490
0
        if (err_ret->filename == NULL) {
491
0
            err_ret->error = E_ERROR;
492
0
            return -1;
493
0
        }
494
0
    }
495
15
    return 0;
496
15
}