Coverage Report

Created: 2025-07-11 06:59

/src/Python-3.8.3/Parser/parser.c
Line
Count
Source (jump to first uncovered line)
1
2
/* Parser implementation */
3
4
/* For a description, see the comments at end of this file */
5
6
/* XXX To do: error recovery */
7
8
#include "Python.h"
9
#include "token.h"
10
#include "grammar.h"
11
#include "node.h"
12
#include "parser.h"
13
#include "errcode.h"
14
#include "graminit.h"
15
16
17
#ifdef Py_DEBUG
18
extern int Py_DebugFlag;
19
#define D(x) if (!Py_DebugFlag); else x
20
#else
21
#define D(x)
22
#endif
23
24
25
/* STACK DATA TYPE */
26
27
static void s_reset(stack *);
28
29
static void
30
s_reset(stack *s)
31
16
{
32
16
    s->s_top = &s->s_base[MAXSTACK];
33
16
}
34
35
6.62k
#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
36
37
static int
38
s_push(stack *s, const dfa *d, node *parent)
39
6.62k
{
40
6.62k
    stackentry *top;
41
6.62k
    if (s->s_top == s->s_base) {
42
0
        fprintf(stderr, "s_push: parser stack overflow\n");
43
0
        return E_NOMEM;
44
0
    }
45
6.62k
    top = --s->s_top;
46
6.62k
    top->s_dfa = d;
47
6.62k
    top->s_parent = parent;
48
6.62k
    top->s_state = 0;
49
6.62k
    return 0;
50
6.62k
}
51
52
#ifdef Py_DEBUG
53
54
static void
55
s_pop(stack *s)
56
{
57
    if (s_empty(s))
58
        Py_FatalError("s_pop: parser stack underflow -- FATAL");
59
    s->s_top++;
60
}
61
62
#else /* !Py_DEBUG */
63
64
6.62k
#define s_pop(s) (s)->s_top++
65
66
#endif
67
68
69
/* PARSER CREATION */
70
71
parser_state *
72
PyParser_New(grammar *g, int start)
73
16
{
74
16
    parser_state *ps;
75
76
16
    if (!g->g_accel)
77
14
        PyGrammar_AddAccelerators(g);
78
16
    ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
79
16
    if (ps == NULL)
80
0
        return NULL;
81
16
    ps->p_grammar = g;
82
16
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
83
16
    ps->p_flags = 0;
84
16
#endif
85
16
    ps->p_tree = PyNode_New(start);
86
16
    if (ps->p_tree == NULL) {
87
0
        PyMem_FREE(ps);
88
0
        return NULL;
89
0
    }
90
16
    s_reset(&ps->p_stack);
91
16
    (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
92
16
    return ps;
93
16
}
94
95
void
96
PyParser_Delete(parser_state *ps)
97
16
{
98
    /* NB If you want to save the parse tree,
99
       you must set p_tree to NULL before calling delparser! */
100
16
    PyNode_Free(ps->p_tree);
101
16
    PyMem_FREE(ps);
102
16
}
103
104
105
/* PARSER STACK OPERATIONS */
106
107
static int
108
shift(stack *s, int type, char *str, int newstate, int lineno, int col_offset,
109
      int end_lineno, int end_col_offset)
110
1.41k
{
111
1.41k
    int err;
112
1.41k
    assert(!s_empty(s));
113
1.41k
    err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset,
114
1.41k
                          end_lineno, end_col_offset);
115
1.41k
    if (err)
116
0
        return err;
117
1.41k
    s->s_top->s_state = newstate;
118
1.41k
    return 0;
119
1.41k
}
120
121
static int
122
push(stack *s, int type, const dfa *d, int newstate, int lineno, int col_offset,
123
     int end_lineno, int end_col_offset)
124
6.60k
{
125
6.60k
    int err;
126
6.60k
    node *n;
127
6.60k
    n = s->s_top->s_parent;
128
6.60k
    assert(!s_empty(s));
129
6.60k
    err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset,
130
6.60k
                          end_lineno, end_col_offset);
131
6.60k
    if (err)
132
0
        return err;
133
6.60k
    s->s_top->s_state = newstate;
134
6.60k
    return s_push(s, d, CHILD(n, NCH(n)-1));
135
6.60k
}
136
137
138
/* PARSER PROPER */
139
140
static int
141
classify(parser_state *ps, int type, const char *str)
142
1.41k
{
143
1.41k
    grammar *g = ps->p_grammar;
144
1.41k
    int n = g->g_ll.ll_nlabels;
145
146
1.41k
    if (type == NAME) {
147
524
        const label *l = g->g_ll.ll_label;
148
524
        int i;
149
80.6k
        for (i = n; i > 0; i--, l++) {
150
80.2k
            if (l->lb_type != NAME || l->lb_str == NULL ||
151
80.2k
                l->lb_str[0] != str[0] ||
152
80.2k
                strcmp(l->lb_str, str) != 0)
153
80.1k
                continue;
154
134
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
155
#if 0
156
            /* Leaving this in as an example */
157
            if (!(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) {
158
                if (str[0] == 'w' && strcmp(str, "with") == 0)
159
                    break; /* not a keyword yet */
160
                else if (str[0] == 'a' && strcmp(str, "as") == 0)
161
                    break; /* not a keyword yet */
162
            }
163
#endif
164
134
#endif
165
134
            D(printf("It's a keyword\n"));
166
134
            return n - i;
167
80.2k
        }
168
524
    }
169
170
1.27k
    {
171
1.27k
        const label *l = g->g_ll.ll_label;
172
1.27k
        int i;
173
66.7k
        for (i = n; i > 0; i--, l++) {
174
66.7k
            if (l->lb_type == type && l->lb_str == NULL) {
175
1.27k
                D(printf("It's a token we know\n"));
176
1.27k
                return n - i;
177
1.27k
            }
178
66.7k
        }
179
1.27k
    }
180
181
0
    D(printf("Illegal token\n"));
182
0
    return -1;
183
1.27k
}
184
185
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
186
#if 0
187
/* Leaving this in as an example */
188
static void
189
future_hack(parser_state *ps)
190
{
191
    node *n = ps->p_stack.s_top->s_parent;
192
    node *ch, *cch;
193
    int i;
194
195
    /* from __future__ import ..., must have at least 4 children */
196
    n = CHILD(n, 0);
197
    if (NCH(n) < 4)
198
        return;
199
    ch = CHILD(n, 0);
200
    if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
201
        return;
202
    ch = CHILD(n, 1);
203
    if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
204
        strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
205
        return;
206
    ch = CHILD(n, 3);
207
    /* ch can be a star, a parenthesis or import_as_names */
208
    if (TYPE(ch) == STAR)
209
        return;
210
    if (TYPE(ch) == LPAR)
211
        ch = CHILD(n, 4);
212
213
    for (i = 0; i < NCH(ch); i += 2) {
214
        cch = CHILD(ch, i);
215
        if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {
216
            char *str_ch = STR(CHILD(cch, 0));
217
            if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
218
                ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
219
            } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
220
                ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
221
            } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
222
                ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
223
            }
224
        }
225
    }
226
}
227
#endif
228
#endif /* future keyword */
229
230
int
231
PyParser_AddToken(parser_state *ps, int type, char *str,
232
                  int lineno, int col_offset,
233
                  int end_lineno, int end_col_offset,
234
                  int *expected_ret)
235
1.41k
{
236
1.41k
    int ilabel;
237
1.41k
    int err;
238
239
1.41k
    D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
240
241
    /* Find out which label this token is */
242
1.41k
    ilabel = classify(ps, type, str);
243
1.41k
    if (ilabel < 0)
244
0
        return E_SYNTAX;
245
246
    /* Loop until the token is shifted or an error occurred */
247
13.7k
    for (;;) {
248
        /* Fetch the current dfa and state */
249
13.7k
        const dfa *d = ps->p_stack.s_top->s_dfa;
250
13.7k
        state *s = &d->d_state[ps->p_stack.s_top->s_state];
251
252
13.7k
        D(printf(" DFA '%s', state %d:",
253
13.7k
            d->d_name, ps->p_stack.s_top->s_state));
254
255
        /* Check accelerator */
256
13.7k
        if (s->s_lower <= ilabel && ilabel < s->s_upper) {
257
8.85k
            int x = s->s_accel[ilabel - s->s_lower];
258
8.85k
            if (x != -1) {
259
8.01k
                if (x & (1<<7)) {
260
                    /* Push non-terminal */
261
6.60k
                    int nt = (x >> 8) + NT_OFFSET;
262
6.60k
                    int arrow = x & ((1<<7)-1);
263
6.60k
                    if (nt == func_body_suite && !(ps->p_flags & PyCF_TYPE_COMMENTS)) {
264
                        /* When parsing type comments is not requested,
265
                           we can provide better errors about bad indentation
266
                           by using 'suite' for the body of a funcdef */
267
4
                        D(printf(" [switch func_body_suite to suite]"));
268
4
                        nt = suite;
269
4
                    }
270
6.60k
                    const dfa *d1 = PyGrammar_FindDFA(
271
6.60k
                        ps->p_grammar, nt);
272
6.60k
                    if ((err = push(&ps->p_stack, nt, d1,
273
6.60k
                        arrow, lineno, col_offset,
274
6.60k
                        end_lineno, end_col_offset)) > 0) {
275
0
                        D(printf(" MemError: push\n"));
276
0
                        return err;
277
0
                    }
278
6.60k
                    D(printf(" Push '%s'\n", d1->d_name));
279
6.60k
                    continue;
280
6.60k
                }
281
282
                /* Shift the token */
283
1.41k
                if ((err = shift(&ps->p_stack, type, str,
284
1.41k
                                x, lineno, col_offset,
285
1.41k
                                end_lineno, end_col_offset)) > 0) {
286
0
                    D(printf(" MemError: shift.\n"));
287
0
                    return err;
288
0
                }
289
1.41k
                D(printf(" Shift.\n"));
290
                /* Pop while we are in an accept-only state */
291
2.27k
                while (s = &d->d_state
292
2.27k
                                [ps->p_stack.s_top->s_state],
293
2.27k
                    s->s_accept && s->s_narcs == 1) {
294
883
                    D(printf("  DFA '%s', state %d: "
295
883
                             "Direct pop.\n",
296
883
                             d->d_name,
297
883
                             ps->p_stack.s_top->s_state));
298
883
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
299
#if 0
300
                    if (d->d_name[0] == 'i' &&
301
                        strcmp(d->d_name,
302
                           "import_stmt") == 0)
303
                        future_hack(ps);
304
#endif
305
883
#endif
306
883
                    s_pop(&ps->p_stack);
307
883
                    if (s_empty(&ps->p_stack)) {
308
16
                        D(printf("  ACCEPT.\n"));
309
16
                        return E_DONE;
310
16
                    }
311
867
                    d = ps->p_stack.s_top->s_dfa;
312
867
                }
313
1.39k
                return E_OK;
314
1.41k
            }
315
8.85k
        }
316
317
5.73k
        if (s->s_accept) {
318
5.73k
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
319
#if 0
320
            if (d->d_name[0] == 'i' &&
321
                strcmp(d->d_name, "import_stmt") == 0)
322
                future_hack(ps);
323
#endif
324
5.73k
#endif
325
            /* Pop this dfa and try again */
326
5.73k
            s_pop(&ps->p_stack);
327
5.73k
            D(printf(" Pop ...\n"));
328
5.73k
            if (s_empty(&ps->p_stack)) {
329
0
                D(printf(" Error: bottom of stack.\n"));
330
0
                return E_SYNTAX;
331
0
            }
332
5.73k
            continue;
333
5.73k
        }
334
335
        /* Stuck, report syntax error */
336
0
        D(printf(" Error.\n"));
337
0
        if (expected_ret) {
338
0
            if (s->s_lower == s->s_upper - 1) {
339
                /* Only one possible expected token */
340
0
                *expected_ret = ps->p_grammar->
341
0
                    g_ll.ll_label[s->s_lower].lb_type;
342
0
            }
343
0
            else
344
0
                *expected_ret = -1;
345
0
        }
346
0
        return E_SYNTAX;
347
5.73k
    }
348
1.41k
}
349
350
351
#ifdef Py_DEBUG
352
353
/* DEBUG OUTPUT */
354
355
void
356
dumptree(grammar *g, node *n)
357
{
358
    int i;
359
360
    if (n == NULL)
361
        printf("NIL");
362
    else {
363
        label l;
364
        l.lb_type = TYPE(n);
365
        l.lb_str = STR(n);
366
        printf("%s", PyGrammar_LabelRepr(&l));
367
        if (ISNONTERMINAL(TYPE(n))) {
368
            printf("(");
369
            for (i = 0; i < NCH(n); i++) {
370
                if (i > 0)
371
                    printf(",");
372
                dumptree(g, CHILD(n, i));
373
            }
374
            printf(")");
375
        }
376
    }
377
}
378
379
void
380
showtree(grammar *g, node *n)
381
{
382
    int i;
383
384
    if (n == NULL)
385
        return;
386
    if (ISNONTERMINAL(TYPE(n))) {
387
        for (i = 0; i < NCH(n); i++)
388
            showtree(g, CHILD(n, i));
389
    }
390
    else if (ISTERMINAL(TYPE(n))) {
391
        printf("%s", _PyParser_TokenNames[TYPE(n)]);
392
        if (TYPE(n) == NUMBER || TYPE(n) == NAME)
393
            printf("(%s)", STR(n));
394
        printf(" ");
395
    }
396
    else
397
        printf("? ");
398
}
399
400
void
401
printtree(parser_state *ps)
402
{
403
    if (Py_DebugFlag) {
404
        printf("Parse tree:\n");
405
        dumptree(ps->p_grammar, ps->p_tree);
406
        printf("\n");
407
        printf("Tokens:\n");
408
        showtree(ps->p_grammar, ps->p_tree);
409
        printf("\n");
410
    }
411
    printf("Listing:\n");
412
    PyNode_ListTree(ps->p_tree);
413
    printf("\n");
414
}
415
416
#endif /* Py_DEBUG */
417
418
/*
419
420
Description
421
-----------
422
423
The parser's interface is different than usual: the function addtoken()
424
must be called for each token in the input.  This makes it possible to
425
turn it into an incremental parsing system later.  The parsing system
426
constructs a parse tree as it goes.
427
428
A parsing rule is represented as a Deterministic Finite-state Automaton
429
(DFA).  A node in a DFA represents a state of the parser; an arc represents
430
a transition.  Transitions are either labeled with terminal symbols or
431
with non-terminals.  When the parser decides to follow an arc labeled
432
with a non-terminal, it is invoked recursively with the DFA representing
433
the parsing rule for that as its initial state; when that DFA accepts,
434
the parser that invoked it continues.  The parse tree constructed by the
435
recursively called parser is inserted as a child in the current parse tree.
436
437
The DFA's can be constructed automatically from a more conventional
438
language description.  An extended LL(1) grammar (ELL(1)) is suitable.
439
Certain restrictions make the parser's life easier: rules that can produce
440
the empty string should be outlawed (there are other ways to put loops
441
or optional parts in the language).  To avoid the need to construct
442
FIRST sets, we can require that all but the last alternative of a rule
443
(really: arc going out of a DFA's state) must begin with a terminal
444
symbol.
445
446
As an example, consider this grammar:
447
448
expr:   term (OP term)*
449
term:   CONSTANT | '(' expr ')'
450
451
The DFA corresponding to the rule for expr is:
452
453
------->.---term-->.------->
454
    ^          |
455
    |          |
456
    \----OP----/
457
458
The parse tree generated for the input a+b is:
459
460
(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
461
462
*/