/src/Python-3.8.3/Parser/parser.c
Line  | Count  | Source  | 
1  |  |  | 
2  |  | /* Parser implementation */  | 
3  |  |  | 
4  |  | /* For a description, see the comments at end of this file */  | 
5  |  |  | 
6  |  | /* XXX To do: error recovery */  | 
7  |  |  | 
8  |  | #include "Python.h"  | 
9  |  | #include "token.h"  | 
10  |  | #include "grammar.h"  | 
11  |  | #include "node.h"  | 
12  |  | #include "parser.h"  | 
13  |  | #include "errcode.h"  | 
14  |  | #include "graminit.h"  | 
15  |  |  | 
16  |  |  | 
17  |  | #ifdef Py_DEBUG  | 
18  |  | extern int Py_DebugFlag;  | 
19  |  | #define D(x) if (!Py_DebugFlag); else x  | 
20  |  | #else  | 
21  |  | #define D(x)  | 
22  |  | #endif  | 
23  |  |  | 
24  |  |  | 
25  |  | /* STACK DATA TYPE */  | 
26  |  |  | 
27  |  | static void s_reset(stack *);  | 
28  |  |  | 
29  |  | static void  | 
30  |  | s_reset(stack *s)  | 
31  | 16  | { | 
32  | 16  |     s->s_top = &s->s_base[MAXSTACK];  | 
33  | 16  | }  | 
34  |  |  | 
35  | 6.62k  | #define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])  | 
36  |  |  | 
37  |  | static int  | 
38  |  | s_push(stack *s, const dfa *d, node *parent)  | 
39  | 6.62k  | { | 
40  | 6.62k  |     stackentry *top;  | 
41  | 6.62k  |     if (s->s_top == s->s_base) { | 
42  | 0  |         fprintf(stderr, "s_push: parser stack overflow\n");  | 
43  | 0  |         return E_NOMEM;  | 
44  | 0  |     }  | 
45  | 6.62k  |     top = --s->s_top;  | 
46  | 6.62k  |     top->s_dfa = d;  | 
47  | 6.62k  |     top->s_parent = parent;  | 
48  | 6.62k  |     top->s_state = 0;  | 
49  | 6.62k  |     return 0;  | 
50  | 6.62k  | }  | 
51  |  |  | 
52  |  | #ifdef Py_DEBUG  | 
53  |  |  | 
54  |  | static void  | 
55  |  | s_pop(stack *s)  | 
56  |  | { | 
57  |  |     if (s_empty(s))  | 
58  |  |         Py_FatalError("s_pop: parser stack underflow -- FATAL"); | 
59  |  |     s->s_top++;  | 
60  |  | }  | 
61  |  |  | 
62  |  | #else /* !Py_DEBUG */  | 
63  |  |  | 
64  | 6.62k  | #define s_pop(s) (s)->s_top++  | 
65  |  |  | 
66  |  | #endif  | 
67  |  |  | 
68  |  |  | 
69  |  | /* PARSER CREATION */  | 
70  |  |  | 
71  |  | parser_state *  | 
72  |  | PyParser_New(grammar *g, int start)  | 
73  | 16  | { | 
74  | 16  |     parser_state *ps;  | 
75  |  |  | 
76  | 16  |     if (!g->g_accel)  | 
77  | 14  |         PyGrammar_AddAccelerators(g);  | 
78  | 16  |     ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));  | 
79  | 16  |     if (ps == NULL)  | 
80  | 0  |         return NULL;  | 
81  | 16  |     ps->p_grammar = g;  | 
82  | 16  | #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD  | 
83  | 16  |     ps->p_flags = 0;  | 
84  | 16  | #endif  | 
85  | 16  |     ps->p_tree = PyNode_New(start);  | 
86  | 16  |     if (ps->p_tree == NULL) { | 
87  | 0  |         PyMem_FREE(ps);  | 
88  | 0  |         return NULL;  | 
89  | 0  |     }  | 
90  | 16  |     s_reset(&ps->p_stack);  | 
91  | 16  |     (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);  | 
92  | 16  |     return ps;  | 
93  | 16  | }  | 
94  |  |  | 
95  |  | void  | 
96  |  | PyParser_Delete(parser_state *ps)  | 
97  | 16  | { | 
98  |  |     /* NB If you want to save the parse tree,  | 
99  |  |        you must set p_tree to NULL before calling delparser! */  | 
100  | 16  |     PyNode_Free(ps->p_tree);  | 
101  | 16  |     PyMem_FREE(ps);  | 
102  | 16  | }  | 
103  |  |  | 
104  |  |  | 
105  |  | /* PARSER STACK OPERATIONS */  | 
106  |  |  | 
107  |  | static int  | 
108  |  | shift(stack *s, int type, char *str, int newstate, int lineno, int col_offset,  | 
109  |  |       int end_lineno, int end_col_offset)  | 
110  | 1.41k  | { | 
111  | 1.41k  |     int err;  | 
112  | 1.41k  |     assert(!s_empty(s));  | 
113  | 1.41k  |     err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset,  | 
114  | 1.41k  |                           end_lineno, end_col_offset);  | 
115  | 1.41k  |     if (err)  | 
116  | 0  |         return err;  | 
117  | 1.41k  |     s->s_top->s_state = newstate;  | 
118  | 1.41k  |     return 0;  | 
119  | 1.41k  | }  | 
120  |  |  | 
121  |  | static int  | 
122  |  | push(stack *s, int type, const dfa *d, int newstate, int lineno, int col_offset,  | 
123  |  |      int end_lineno, int end_col_offset)  | 
124  | 6.60k  | { | 
125  | 6.60k  |     int err;  | 
126  | 6.60k  |     node *n;  | 
127  | 6.60k  |     n = s->s_top->s_parent;  | 
128  | 6.60k  |     assert(!s_empty(s));  | 
129  | 6.60k  |     err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset,  | 
130  | 6.60k  |                           end_lineno, end_col_offset);  | 
131  | 6.60k  |     if (err)  | 
132  | 0  |         return err;  | 
133  | 6.60k  |     s->s_top->s_state = newstate;  | 
134  | 6.60k  |     return s_push(s, d, CHILD(n, NCH(n)-1));  | 
135  | 6.60k  | }  | 
136  |  |  | 
137  |  |  | 
138  |  | /* PARSER PROPER */  | 
139  |  |  | 
140  |  | static int  | 
141  |  | classify(parser_state *ps, int type, const char *str)  | 
142  | 1.41k  | { | 
143  | 1.41k  |     grammar *g = ps->p_grammar;  | 
144  | 1.41k  |     int n = g->g_ll.ll_nlabels;  | 
145  |  |  | 
146  | 1.41k  |     if (type == NAME) { | 
147  | 524  |         const label *l = g->g_ll.ll_label;  | 
148  | 524  |         int i;  | 
149  | 80.6k  |         for (i = n; i > 0; i--, l++) { | 
150  | 80.2k  |             if (l->lb_type != NAME || l->lb_str == NULL ||  | 
151  | 15.5k  |                 l->lb_str[0] != str[0] ||  | 
152  | 824  |                 strcmp(l->lb_str, str) != 0)  | 
153  | 80.1k  |                 continue;  | 
154  | 134  | #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD  | 
155  |  | #if 0  | 
156  |  |             /* Leaving this in as an example */  | 
157  |  |             if (!(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) { | 
158  |  |                 if (str[0] == 'w' && strcmp(str, "with") == 0)  | 
159  |  |                     break; /* not a keyword yet */  | 
160  |  |                 else if (str[0] == 'a' && strcmp(str, "as") == 0)  | 
161  |  |                     break; /* not a keyword yet */  | 
162  |  |             }  | 
163  |  | #endif  | 
164  | 134  | #endif  | 
165  | 134  |             D(printf("It's a keyword\n")); | 
166  | 134  |             return n - i;  | 
167  | 80.2k  |         }  | 
168  | 524  |     }  | 
169  |  |  | 
170  | 1.27k  |     { | 
171  | 1.27k  |         const label *l = g->g_ll.ll_label;  | 
172  | 1.27k  |         int i;  | 
173  | 66.7k  |         for (i = n; i > 0; i--, l++) { | 
174  | 66.7k  |             if (l->lb_type == type && l->lb_str == NULL) { | 
175  | 1.27k  |                 D(printf("It's a token we know\n")); | 
176  | 1.27k  |                 return n - i;  | 
177  | 1.27k  |             }  | 
178  | 66.7k  |         }  | 
179  | 1.27k  |     }  | 
180  |  |  | 
181  | 0  |     D(printf("Illegal token\n")); | 
182  | 0  |     return -1;  | 
183  | 1.27k  | }  | 
184  |  |  | 
185  |  | #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD  | 
186  |  | #if 0  | 
187  |  | /* Leaving this in as an example */  | 
188  |  | static void  | 
189  |  | future_hack(parser_state *ps)  | 
190  |  | { | 
191  |  |     node *n = ps->p_stack.s_top->s_parent;  | 
192  |  |     node *ch, *cch;  | 
193  |  |     int i;  | 
194  |  |  | 
195  |  |     /* from __future__ import ..., must have at least 4 children */  | 
196  |  |     n = CHILD(n, 0);  | 
197  |  |     if (NCH(n) < 4)  | 
198  |  |         return;  | 
199  |  |     ch = CHILD(n, 0);  | 
200  |  |     if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)  | 
201  |  |         return;  | 
202  |  |     ch = CHILD(n, 1);  | 
203  |  |     if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&  | 
204  |  |         strcmp(STR(CHILD(ch, 0)), "__future__") != 0)  | 
205  |  |         return;  | 
206  |  |     ch = CHILD(n, 3);  | 
207  |  |     /* ch can be a star, a parenthesis or import_as_names */  | 
208  |  |     if (TYPE(ch) == STAR)  | 
209  |  |         return;  | 
210  |  |     if (TYPE(ch) == LPAR)  | 
211  |  |         ch = CHILD(n, 4);  | 
212  |  |  | 
213  |  |     for (i = 0; i < NCH(ch); i += 2) { | 
214  |  |         cch = CHILD(ch, i);  | 
215  |  |         if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) { | 
216  |  |             char *str_ch = STR(CHILD(cch, 0));  | 
217  |  |             if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) { | 
218  |  |                 ps->p_flags |= CO_FUTURE_WITH_STATEMENT;  | 
219  |  |             } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) { | 
220  |  |                 ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;  | 
221  |  |             } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) { | 
222  |  |                 ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;  | 
223  |  |             }  | 
224  |  |         }  | 
225  |  |     }  | 
226  |  | }  | 
227  |  | #endif  | 
228  |  | #endif /* future keyword */  | 
229  |  |  | 
230  |  | int  | 
231  |  | PyParser_AddToken(parser_state *ps, int type, char *str,  | 
232  |  |                   int lineno, int col_offset,  | 
233  |  |                   int end_lineno, int end_col_offset,  | 
234  |  |                   int *expected_ret)  | 
235  | 1.41k  | { | 
236  | 1.41k  |     int ilabel;  | 
237  | 1.41k  |     int err;  | 
238  |  |  | 
239  | 1.41k  |     D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str)); | 
240  |  |  | 
241  |  |     /* Find out which label this token is */  | 
242  | 1.41k  |     ilabel = classify(ps, type, str);  | 
243  | 1.41k  |     if (ilabel < 0)  | 
244  | 0  |         return E_SYNTAX;  | 
245  |  |  | 
246  |  |     /* Loop until the token is shifted or an error occurred */  | 
247  | 13.7k  |     for (;;) { | 
248  |  |         /* Fetch the current dfa and state */  | 
249  | 13.7k  |         const dfa *d = ps->p_stack.s_top->s_dfa;  | 
250  | 13.7k  |         state *s = &d->d_state[ps->p_stack.s_top->s_state];  | 
251  |  |  | 
252  | 13.7k  |         D(printf(" DFA '%s', state %d:", | 
253  | 13.7k  |             d->d_name, ps->p_stack.s_top->s_state));  | 
254  |  |  | 
255  |  |         /* Check accelerator */  | 
256  | 13.7k  |         if (s->s_lower <= ilabel && ilabel < s->s_upper) { | 
257  | 8.85k  |             int x = s->s_accel[ilabel - s->s_lower];  | 
258  | 8.85k  |             if (x != -1) { | 
259  | 8.01k  |                 if (x & (1<<7)) { | 
260  |  |                     /* Push non-terminal */  | 
261  | 6.60k  |                     int nt = (x >> 8) + NT_OFFSET;  | 
262  | 6.60k  |                     int arrow = x & ((1<<7)-1);  | 
263  | 6.60k  |                     if (nt == func_body_suite && !(ps->p_flags & PyCF_TYPE_COMMENTS)) { | 
264  |  |                         /* When parsing type comments is not requested,  | 
265  |  |                            we can provide better errors about bad indentation  | 
266  |  |                            by using 'suite' for the body of a funcdef */  | 
267  | 4  |                         D(printf(" [switch func_body_suite to suite]")); | 
268  | 4  |                         nt = suite;  | 
269  | 4  |                     }  | 
270  | 6.60k  |                     const dfa *d1 = PyGrammar_FindDFA(  | 
271  | 6.60k  |                         ps->p_grammar, nt);  | 
272  | 6.60k  |                     if ((err = push(&ps->p_stack, nt, d1,  | 
273  | 6.60k  |                         arrow, lineno, col_offset,  | 
274  | 6.60k  |                         end_lineno, end_col_offset)) > 0) { | 
275  | 0  |                         D(printf(" MemError: push\n")); | 
276  | 0  |                         return err;  | 
277  | 0  |                     }  | 
278  | 6.60k  |                     D(printf(" Push '%s'\n", d1->d_name)); | 
279  | 6.60k  |                     continue;  | 
280  | 6.60k  |                 }  | 
281  |  |  | 
282  |  |                 /* Shift the token */  | 
283  | 1.41k  |                 if ((err = shift(&ps->p_stack, type, str,  | 
284  | 1.41k  |                                 x, lineno, col_offset,  | 
285  | 1.41k  |                                 end_lineno, end_col_offset)) > 0) { | 
286  | 0  |                     D(printf(" MemError: shift.\n")); | 
287  | 0  |                     return err;  | 
288  | 0  |                 }  | 
289  | 1.41k  |                 D(printf(" Shift.\n")); | 
290  |  |                 /* Pop while we are in an accept-only state */  | 
291  | 2.27k  |                 while (s = &d->d_state  | 
292  | 2.27k  |                                 [ps->p_stack.s_top->s_state],  | 
293  | 2.27k  |                     s->s_accept && s->s_narcs == 1) { | 
294  | 883  |                     D(printf("  DFA '%s', state %d: " | 
295  | 883  |                              "Direct pop.\n",  | 
296  | 883  |                              d->d_name,  | 
297  | 883  |                              ps->p_stack.s_top->s_state));  | 
298  | 883  | #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD  | 
299  |  | #if 0  | 
300  |  |                     if (d->d_name[0] == 'i' &&  | 
301  |  |                         strcmp(d->d_name,  | 
302  |  |                            "import_stmt") == 0)  | 
303  |  |                         future_hack(ps);  | 
304  |  | #endif  | 
305  | 883  | #endif  | 
306  | 883  |                     s_pop(&ps->p_stack);  | 
307  | 883  |                     if (s_empty(&ps->p_stack)) { | 
308  | 16  |                         D(printf("  ACCEPT.\n")); | 
309  | 16  |                         return E_DONE;  | 
310  | 16  |                     }  | 
311  | 867  |                     d = ps->p_stack.s_top->s_dfa;  | 
312  | 867  |                 }  | 
313  | 1.39k  |                 return E_OK;  | 
314  | 1.41k  |             }  | 
315  | 8.85k  |         }  | 
316  |  |  | 
317  | 5.73k  |         if (s->s_accept) { | 
318  | 5.73k  | #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD  | 
319  |  | #if 0  | 
320  |  |             if (d->d_name[0] == 'i' &&  | 
321  |  |                 strcmp(d->d_name, "import_stmt") == 0)  | 
322  |  |                 future_hack(ps);  | 
323  |  | #endif  | 
324  | 5.73k  | #endif  | 
325  |  |             /* Pop this dfa and try again */  | 
326  | 5.73k  |             s_pop(&ps->p_stack);  | 
327  | 5.73k  |             D(printf(" Pop ...\n")); | 
328  | 5.73k  |             if (s_empty(&ps->p_stack)) { | 
329  | 0  |                 D(printf(" Error: bottom of stack.\n")); | 
330  | 0  |                 return E_SYNTAX;  | 
331  | 0  |             }  | 
332  | 5.73k  |             continue;  | 
333  | 5.73k  |         }  | 
334  |  |  | 
335  |  |         /* Stuck, report syntax error */  | 
336  | 0  |         D(printf(" Error.\n")); | 
337  | 0  |         if (expected_ret) { | 
338  | 0  |             if (s->s_lower == s->s_upper - 1) { | 
339  |  |                 /* Only one possible expected token */  | 
340  | 0  |                 *expected_ret = ps->p_grammar->  | 
341  | 0  |                     g_ll.ll_label[s->s_lower].lb_type;  | 
342  | 0  |             }  | 
343  | 0  |             else  | 
344  | 0  |                 *expected_ret = -1;  | 
345  | 0  |         }  | 
346  | 0  |         return E_SYNTAX;  | 
347  | 5.73k  |     }  | 
348  | 1.41k  | }  | 
349  |  |  | 
350  |  |  | 
351  |  | #ifdef Py_DEBUG  | 
352  |  |  | 
353  |  | /* DEBUG OUTPUT */  | 
354  |  |  | 
355  |  | void  | 
356  |  | dumptree(grammar *g, node *n)  | 
357  |  | { | 
358  |  |     int i;  | 
359  |  |  | 
360  |  |     if (n == NULL)  | 
361  |  |         printf("NIL"); | 
362  |  |     else { | 
363  |  |         label l;  | 
364  |  |         l.lb_type = TYPE(n);  | 
365  |  |         l.lb_str = STR(n);  | 
366  |  |         printf("%s", PyGrammar_LabelRepr(&l)); | 
367  |  |         if (ISNONTERMINAL(TYPE(n))) { | 
368  |  |             printf("("); | 
369  |  |             for (i = 0; i < NCH(n); i++) { | 
370  |  |                 if (i > 0)  | 
371  |  |                     printf(","); | 
372  |  |                 dumptree(g, CHILD(n, i));  | 
373  |  |             }  | 
374  |  |             printf(")"); | 
375  |  |         }  | 
376  |  |     }  | 
377  |  | }  | 
378  |  |  | 
379  |  | void  | 
380  |  | showtree(grammar *g, node *n)  | 
381  |  | { | 
382  |  |     int i;  | 
383  |  |  | 
384  |  |     if (n == NULL)  | 
385  |  |         return;  | 
386  |  |     if (ISNONTERMINAL(TYPE(n))) { | 
387  |  |         for (i = 0; i < NCH(n); i++)  | 
388  |  |             showtree(g, CHILD(n, i));  | 
389  |  |     }  | 
390  |  |     else if (ISTERMINAL(TYPE(n))) { | 
391  |  |         printf("%s", _PyParser_TokenNames[TYPE(n)]); | 
392  |  |         if (TYPE(n) == NUMBER || TYPE(n) == NAME)  | 
393  |  |             printf("(%s)", STR(n)); | 
394  |  |         printf(" "); | 
395  |  |     }  | 
396  |  |     else  | 
397  |  |         printf("? "); | 
398  |  | }  | 
399  |  |  | 
400  |  | void  | 
401  |  | printtree(parser_state *ps)  | 
402  |  | { | 
403  |  |     if (Py_DebugFlag) { | 
404  |  |         printf("Parse tree:\n"); | 
405  |  |         dumptree(ps->p_grammar, ps->p_tree);  | 
406  |  |         printf("\n"); | 
407  |  |         printf("Tokens:\n"); | 
408  |  |         showtree(ps->p_grammar, ps->p_tree);  | 
409  |  |         printf("\n"); | 
410  |  |     }  | 
411  |  |     printf("Listing:\n"); | 
412  |  |     PyNode_ListTree(ps->p_tree);  | 
413  |  |     printf("\n"); | 
414  |  | }  | 
415  |  |  | 
416  |  | #endif /* Py_DEBUG */  | 
417  |  |  | 
418  |  | /*  | 
419  |  |  | 
420  |  | Description  | 
421  |  | -----------  | 
422  |  |  | 
423  |  | The parser's interface is different than usual: the function addtoken()  | 
424  |  | must be called for each token in the input.  This makes it possible to  | 
425  |  | turn it into an incremental parsing system later.  The parsing system  | 
426  |  | constructs a parse tree as it goes.  | 
427  |  |  | 
428  |  | A parsing rule is represented as a Deterministic Finite-state Automaton  | 
429  |  | (DFA).  A node in a DFA represents a state of the parser; an arc represents  | 
430  |  | a transition.  Transitions are either labeled with terminal symbols or  | 
431  |  | with non-terminals.  When the parser decides to follow an arc labeled  | 
432  |  | with a non-terminal, it is invoked recursively with the DFA representing  | 
433  |  | the parsing rule for that as its initial state; when that DFA accepts,  | 
434  |  | the parser that invoked it continues.  The parse tree constructed by the  | 
435  |  | recursively called parser is inserted as a child in the current parse tree.  | 
436  |  |  | 
437  |  | The DFA's can be constructed automatically from a more conventional  | 
438  |  | language description.  An extended LL(1) grammar (ELL(1)) is suitable.  | 
439  |  | Certain restrictions make the parser's life easier: rules that can produce  | 
440  |  | the empty string should be outlawed (there are other ways to put loops  | 
441  |  | or optional parts in the language).  To avoid the need to construct  | 
442  |  | FIRST sets, we can require that all but the last alternative of a rule  | 
443  |  | (really: arc going out of a DFA's state) must begin with a terminal  | 
444  |  | symbol.  | 
445  |  |  | 
446  |  | As an example, consider this grammar:  | 
447  |  |  | 
448  |  | expr:   term (OP term)*  | 
449  |  | term:   CONSTANT | '(' expr ')' | 
450  |  |  | 
451  |  | The DFA corresponding to the rule for expr is:  | 
452  |  |  | 
453  |  | ------->.---term-->.------->  | 
454  |  |     ^          |  | 
455  |  |     |          |  | 
456  |  |     \----OP----/  | 
457  |  |  | 
458  |  | The parse tree generated for the input a+b is:  | 
459  |  |  | 
460  |  | (expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))  | 
461  |  |  | 
462  |  | */  |