/src/Python-3.8.3/Parser/parsetok.c
Line | Count | Source (jump to first uncovered line) |
1 | | |
2 | | /* Parser-tokenizer link implementation */ |
3 | | |
4 | | #include "Python.h" |
5 | | #include "tokenizer.h" |
6 | | #include "node.h" |
7 | | #include "grammar.h" |
8 | | #include "parser.h" |
9 | | #include "parsetok.h" |
10 | | #include "errcode.h" |
11 | | #include "graminit.h" |
12 | | |
13 | | |
14 | | /* Forward */ |
15 | | static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *); |
16 | | static int initerr(perrdetail *err_ret, PyObject * filename); |
17 | | |
18 | | typedef struct { |
19 | | struct { |
20 | | int lineno; |
21 | | char *comment; |
22 | | } *items; |
23 | | size_t size; |
24 | | size_t num_items; |
25 | | } growable_comment_array; |
26 | | |
27 | | static int |
28 | 16 | growable_comment_array_init(growable_comment_array *arr, size_t initial_size) { |
29 | 16 | assert(initial_size > 0); |
30 | 16 | arr->items = malloc(initial_size * sizeof(*arr->items)); |
31 | 16 | arr->size = initial_size; |
32 | 16 | arr->num_items = 0; |
33 | | |
34 | 16 | return arr->items != NULL; |
35 | 16 | } |
36 | | |
37 | | static int |
38 | 0 | growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) { |
39 | 0 | if (arr->num_items >= arr->size) { |
40 | 0 | arr->size *= 2; |
41 | 0 | arr->items = realloc(arr->items, arr->size * sizeof(*arr->items)); |
42 | 0 | if (!arr->items) { |
43 | 0 | return 0; |
44 | 0 | } |
45 | 0 | } |
46 | | |
47 | 0 | arr->items[arr->num_items].lineno = lineno; |
48 | 0 | arr->items[arr->num_items].comment = comment; |
49 | 0 | arr->num_items++; |
50 | 0 | return 1; |
51 | 0 | } |
52 | | |
53 | | static void |
54 | 16 | growable_comment_array_deallocate(growable_comment_array *arr) { |
55 | 16 | for (unsigned i = 0; i < arr->num_items; i++) { |
56 | 0 | PyObject_FREE(arr->items[i].comment); |
57 | 0 | } |
58 | 16 | free(arr->items); |
59 | 16 | } |
60 | | |
61 | | /* Parse input coming from a string. Return error code, print some errors. */ |
62 | | node * |
63 | | PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret) |
64 | 0 | { |
65 | 0 | return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0); |
66 | 0 | } |
67 | | |
68 | | node * |
69 | | PyParser_ParseStringFlags(const char *s, grammar *g, int start, |
70 | | perrdetail *err_ret, int flags) |
71 | 0 | { |
72 | 0 | return PyParser_ParseStringFlagsFilename(s, NULL, |
73 | 0 | g, start, err_ret, flags); |
74 | 0 | } |
75 | | |
76 | | node * |
77 | | PyParser_ParseStringFlagsFilename(const char *s, const char *filename, |
78 | | grammar *g, int start, |
79 | | perrdetail *err_ret, int flags) |
80 | 0 | { |
81 | 0 | int iflags = flags; |
82 | 0 | return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start, |
83 | 0 | err_ret, &iflags); |
84 | 0 | } |
85 | | |
86 | | node * |
87 | | PyParser_ParseStringObject(const char *s, PyObject *filename, |
88 | | grammar *g, int start, |
89 | | perrdetail *err_ret, int *flags) |
90 | 16 | { |
91 | 16 | struct tok_state *tok; |
92 | 16 | int exec_input = start == file_input; |
93 | | |
94 | 16 | if (initerr(err_ret, filename) < 0) |
95 | 0 | return NULL; |
96 | | |
97 | 16 | if (PySys_Audit("compile", "yO", s, err_ret->filename) < 0) { |
98 | 0 | err_ret->error = E_ERROR; |
99 | 0 | return NULL; |
100 | 0 | } |
101 | | |
102 | 16 | if (*flags & PyPARSE_IGNORE_COOKIE) |
103 | 2 | tok = PyTokenizer_FromUTF8(s, exec_input); |
104 | 14 | else |
105 | 14 | tok = PyTokenizer_FromString(s, exec_input); |
106 | 16 | if (tok == NULL) { |
107 | 0 | err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM; |
108 | 0 | return NULL; |
109 | 0 | } |
110 | 16 | if (*flags & PyPARSE_TYPE_COMMENTS) { |
111 | 0 | tok->type_comments = 1; |
112 | 0 | } |
113 | | |
114 | 16 | Py_INCREF(err_ret->filename); |
115 | 16 | tok->filename = err_ret->filename; |
116 | 16 | if (*flags & PyPARSE_ASYNC_HACKS) |
117 | 0 | tok->async_hacks = 1; |
118 | 16 | return parsetok(tok, g, start, err_ret, flags); |
119 | 16 | } |
120 | | |
121 | | node * |
122 | | PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename_str, |
123 | | grammar *g, int start, |
124 | | perrdetail *err_ret, int *flags) |
125 | 0 | { |
126 | 0 | node *n; |
127 | 0 | PyObject *filename = NULL; |
128 | 0 | if (filename_str != NULL) { |
129 | 0 | filename = PyUnicode_DecodeFSDefault(filename_str); |
130 | 0 | if (filename == NULL) { |
131 | 0 | err_ret->error = E_ERROR; |
132 | 0 | return NULL; |
133 | 0 | } |
134 | 0 | } |
135 | 0 | n = PyParser_ParseStringObject(s, filename, g, start, err_ret, flags); |
136 | 0 | Py_XDECREF(filename); |
137 | 0 | return n; |
138 | 0 | } |
139 | | |
140 | | /* Parse input coming from a file. Return error code, print some errors. */ |
141 | | |
142 | | node * |
143 | | PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start, |
144 | | const char *ps1, const char *ps2, |
145 | | perrdetail *err_ret) |
146 | 0 | { |
147 | 0 | return PyParser_ParseFileFlags(fp, filename, NULL, |
148 | 0 | g, start, ps1, ps2, err_ret, 0); |
149 | 0 | } |
150 | | |
151 | | node * |
152 | | PyParser_ParseFileFlags(FILE *fp, const char *filename, const char *enc, |
153 | | grammar *g, int start, |
154 | | const char *ps1, const char *ps2, |
155 | | perrdetail *err_ret, int flags) |
156 | 0 | { |
157 | 0 | int iflags = flags; |
158 | 0 | return PyParser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1, |
159 | 0 | ps2, err_ret, &iflags); |
160 | 0 | } |
161 | | |
162 | | node * |
163 | | PyParser_ParseFileObject(FILE *fp, PyObject *filename, |
164 | | const char *enc, grammar *g, int start, |
165 | | const char *ps1, const char *ps2, |
166 | | perrdetail *err_ret, int *flags) |
167 | 0 | { |
168 | 0 | struct tok_state *tok; |
169 | |
|
170 | 0 | if (initerr(err_ret, filename) < 0) |
171 | 0 | return NULL; |
172 | | |
173 | 0 | if (PySys_Audit("compile", "OO", Py_None, err_ret->filename) < 0) { |
174 | 0 | return NULL; |
175 | 0 | } |
176 | | |
177 | 0 | if ((tok = PyTokenizer_FromFile(fp, enc, ps1, ps2)) == NULL) { |
178 | 0 | err_ret->error = E_NOMEM; |
179 | 0 | return NULL; |
180 | 0 | } |
181 | 0 | if (*flags & PyPARSE_TYPE_COMMENTS) { |
182 | 0 | tok->type_comments = 1; |
183 | 0 | } |
184 | 0 | Py_INCREF(err_ret->filename); |
185 | 0 | tok->filename = err_ret->filename; |
186 | 0 | return parsetok(tok, g, start, err_ret, flags); |
187 | 0 | } |
188 | | |
189 | | node * |
190 | | PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, |
191 | | const char *enc, grammar *g, int start, |
192 | | const char *ps1, const char *ps2, |
193 | | perrdetail *err_ret, int *flags) |
194 | 0 | { |
195 | 0 | node *n; |
196 | 0 | PyObject *fileobj = NULL; |
197 | 0 | if (filename != NULL) { |
198 | 0 | fileobj = PyUnicode_DecodeFSDefault(filename); |
199 | 0 | if (fileobj == NULL) { |
200 | 0 | err_ret->error = E_ERROR; |
201 | 0 | return NULL; |
202 | 0 | } |
203 | 0 | } |
204 | 0 | n = PyParser_ParseFileObject(fp, fileobj, enc, g, |
205 | 0 | start, ps1, ps2, err_ret, flags); |
206 | 0 | Py_XDECREF(fileobj); |
207 | 0 | return n; |
208 | 0 | } |
209 | | |
210 | | #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD |
211 | | #if 0 |
212 | | static const char with_msg[] = |
213 | | "%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n"; |
214 | | |
215 | | static const char as_msg[] = |
216 | | "%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n"; |
217 | | |
218 | | static void |
219 | | warn(const char *msg, const char *filename, int lineno) |
220 | | { |
221 | | if (filename == NULL) |
222 | | filename = "<string>"; |
223 | | PySys_WriteStderr(msg, filename, lineno); |
224 | | } |
225 | | #endif |
226 | | #endif |
227 | | |
228 | | /* Parse input coming from the given tokenizer structure. |
229 | | Return error code. */ |
230 | | |
231 | | static node * |
232 | | parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, |
233 | | int *flags) |
234 | 16 | { |
235 | 16 | parser_state *ps; |
236 | 16 | node *n; |
237 | 16 | int started = 0; |
238 | 16 | int col_offset, end_col_offset; |
239 | 16 | growable_comment_array type_ignores; |
240 | | |
241 | 16 | if (!growable_comment_array_init(&type_ignores, 10)) { |
242 | 0 | err_ret->error = E_NOMEM; |
243 | 0 | PyTokenizer_Free(tok); |
244 | 0 | return NULL; |
245 | 0 | } |
246 | | |
247 | 16 | if ((ps = PyParser_New(g, start)) == NULL) { |
248 | 0 | err_ret->error = E_NOMEM; |
249 | 0 | growable_comment_array_deallocate(&type_ignores); |
250 | 0 | PyTokenizer_Free(tok); |
251 | 0 | return NULL; |
252 | 0 | } |
253 | 16 | #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD |
254 | 16 | if (*flags & PyPARSE_BARRY_AS_BDFL) |
255 | 0 | ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL; |
256 | 16 | if (*flags & PyPARSE_TYPE_COMMENTS) |
257 | 0 | ps->p_flags |= PyCF_TYPE_COMMENTS; |
258 | 16 | #endif |
259 | | |
260 | 1.41k | for (;;) { |
261 | 1.41k | char *a, *b; |
262 | 1.41k | int type; |
263 | 1.41k | size_t len; |
264 | 1.41k | char *str; |
265 | 1.41k | col_offset = -1; |
266 | 1.41k | int lineno; |
267 | 1.41k | const char *line_start; |
268 | | |
269 | 1.41k | type = PyTokenizer_Get(tok, &a, &b); |
270 | 1.41k | if (type == ERRORTOKEN) { |
271 | 0 | err_ret->error = tok->done; |
272 | 0 | break; |
273 | 0 | } |
274 | 1.41k | if (type == ENDMARKER && started) { |
275 | 16 | type = NEWLINE; /* Add an extra newline */ |
276 | 16 | started = 0; |
277 | | /* Add the right number of dedent tokens, |
278 | | except if a certain flag is given -- |
279 | | codeop.py uses this. */ |
280 | 16 | if (tok->indent && |
281 | 16 | !(*flags & PyPARSE_DONT_IMPLY_DEDENT)) |
282 | 0 | { |
283 | 0 | tok->pendin = -tok->indent; |
284 | 0 | tok->indent = 0; |
285 | 0 | } |
286 | 16 | } |
287 | 1.39k | else |
288 | 1.39k | started = 1; |
289 | 1.41k | len = (a != NULL && b != NULL) ? b - a : 0; |
290 | 1.41k | str = (char *) PyObject_MALLOC(len + 1); |
291 | 1.41k | if (str == NULL) { |
292 | 0 | err_ret->error = E_NOMEM; |
293 | 0 | break; |
294 | 0 | } |
295 | 1.41k | if (len > 0) |
296 | 1.10k | strncpy(str, a, len); |
297 | 1.41k | str[len] = '\0'; |
298 | | |
299 | 1.41k | #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD |
300 | 1.41k | if (type == NOTEQUAL) { |
301 | 6 | if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) && |
302 | 6 | strcmp(str, "!=")) { |
303 | 0 | PyObject_FREE(str); |
304 | 0 | err_ret->error = E_SYNTAX; |
305 | 0 | break; |
306 | 0 | } |
307 | 6 | else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) && |
308 | 6 | strcmp(str, "<>")) { |
309 | 0 | PyObject_FREE(str); |
310 | 0 | err_ret->expected = NOTEQUAL; |
311 | 0 | err_ret->error = E_SYNTAX; |
312 | 0 | break; |
313 | 0 | } |
314 | 6 | } |
315 | 1.41k | #endif |
316 | | |
317 | | /* Nodes of type STRING, especially multi line strings |
318 | | must be handled differently in order to get both |
319 | | the starting line number and the column offset right. |
320 | | (cf. issue 16806) */ |
321 | 1.41k | lineno = type == STRING ? tok->first_lineno : tok->lineno; |
322 | 1.41k | line_start = type == STRING ? tok->multi_line_start : tok->line_start; |
323 | 1.41k | if (a != NULL && a >= line_start) { |
324 | 1.26k | col_offset = Py_SAFE_DOWNCAST(a - line_start, |
325 | 1.26k | intptr_t, int); |
326 | 1.26k | } |
327 | 148 | else { |
328 | 148 | col_offset = -1; |
329 | 148 | } |
330 | | |
331 | 1.41k | if (b != NULL && b >= tok->line_start) { |
332 | 1.26k | end_col_offset = Py_SAFE_DOWNCAST(b - tok->line_start, |
333 | 1.26k | intptr_t, int); |
334 | 1.26k | } |
335 | 148 | else { |
336 | 148 | end_col_offset = -1; |
337 | 148 | } |
338 | | |
339 | 1.41k | if (type == TYPE_IGNORE) { |
340 | 0 | if (!growable_comment_array_add(&type_ignores, tok->lineno, str)) { |
341 | 0 | err_ret->error = E_NOMEM; |
342 | 0 | break; |
343 | 0 | } |
344 | 0 | continue; |
345 | 0 | } |
346 | | |
347 | 1.41k | if ((err_ret->error = |
348 | 1.41k | PyParser_AddToken(ps, (int)type, str, |
349 | 1.41k | lineno, col_offset, tok->lineno, end_col_offset, |
350 | 1.41k | &(err_ret->expected))) != E_OK) { |
351 | 16 | if (err_ret->error != E_DONE) { |
352 | 0 | PyObject_FREE(str); |
353 | 0 | err_ret->token = type; |
354 | 0 | } |
355 | 16 | break; |
356 | 16 | } |
357 | 1.41k | } |
358 | | |
359 | 16 | if (err_ret->error == E_DONE) { |
360 | 16 | n = ps->p_tree; |
361 | 16 | ps->p_tree = NULL; |
362 | | |
363 | 16 | if (n->n_type == file_input) { |
364 | | /* Put type_ignore nodes in the ENDMARKER of file_input. */ |
365 | 16 | int num; |
366 | 16 | node *ch; |
367 | 16 | size_t i; |
368 | | |
369 | 16 | num = NCH(n); |
370 | 16 | ch = CHILD(n, num - 1); |
371 | 16 | REQ(ch, ENDMARKER); |
372 | | |
373 | 16 | for (i = 0; i < type_ignores.num_items; i++) { |
374 | 0 | int res = PyNode_AddChild(ch, TYPE_IGNORE, type_ignores.items[i].comment, |
375 | 0 | type_ignores.items[i].lineno, 0, |
376 | 0 | type_ignores.items[i].lineno, 0); |
377 | 0 | if (res != 0) { |
378 | 0 | err_ret->error = res; |
379 | 0 | PyNode_Free(n); |
380 | 0 | n = NULL; |
381 | 0 | break; |
382 | 0 | } |
383 | 0 | type_ignores.items[i].comment = NULL; |
384 | 0 | } |
385 | 16 | } |
386 | | |
387 | | /* Check that the source for a single input statement really |
388 | | is a single statement by looking at what is left in the |
389 | | buffer after parsing. Trailing whitespace and comments |
390 | | are OK. */ |
391 | 16 | if (err_ret->error == E_DONE && start == single_input) { |
392 | 0 | char *cur = tok->cur; |
393 | 0 | char c = *tok->cur; |
394 | |
|
395 | 0 | for (;;) { |
396 | 0 | while (c == ' ' || c == '\t' || c == '\n' || c == '\014') |
397 | 0 | c = *++cur; |
398 | |
|
399 | 0 | if (!c) |
400 | 0 | break; |
401 | | |
402 | 0 | if (c != '#') { |
403 | 0 | err_ret->error = E_BADSINGLE; |
404 | 0 | PyNode_Free(n); |
405 | 0 | n = NULL; |
406 | 0 | break; |
407 | 0 | } |
408 | | |
409 | | /* Suck up comment. */ |
410 | 0 | while (c && c != '\n') |
411 | 0 | c = *++cur; |
412 | 0 | } |
413 | 0 | } |
414 | 16 | } |
415 | 0 | else |
416 | 0 | n = NULL; |
417 | | |
418 | 16 | growable_comment_array_deallocate(&type_ignores); |
419 | | |
420 | 16 | #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD |
421 | 16 | *flags = ps->p_flags; |
422 | 16 | #endif |
423 | 16 | PyParser_Delete(ps); |
424 | | |
425 | 16 | if (n == NULL) { |
426 | 0 | if (tok->done == E_EOF) |
427 | 0 | err_ret->error = E_EOF; |
428 | 0 | err_ret->lineno = tok->lineno; |
429 | 0 | if (tok->buf != NULL) { |
430 | 0 | size_t len; |
431 | 0 | assert(tok->cur - tok->buf < INT_MAX); |
432 | | /* if we've managed to parse a token, point the offset to its start, |
433 | | * else use the current reading position of the tokenizer |
434 | | */ |
435 | 0 | err_ret->offset = col_offset != -1 ? col_offset + 1 : ((int)(tok->cur - tok->buf)); |
436 | 0 | len = tok->inp - tok->buf; |
437 | 0 | err_ret->text = (char *) PyObject_MALLOC(len + 1); |
438 | 0 | if (err_ret->text != NULL) { |
439 | 0 | if (len > 0) |
440 | 0 | strncpy(err_ret->text, tok->buf, len); |
441 | 0 | err_ret->text[len] = '\0'; |
442 | 0 | } |
443 | 0 | } |
444 | 16 | } else if (tok->encoding != NULL) { |
445 | | /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was |
446 | | * allocated using PyMem_ |
447 | | */ |
448 | 2 | node* r = PyNode_New(encoding_decl); |
449 | 2 | if (r) |
450 | 2 | r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1); |
451 | 2 | if (!r || !r->n_str) { |
452 | 0 | err_ret->error = E_NOMEM; |
453 | 0 | if (r) |
454 | 0 | PyObject_FREE(r); |
455 | 0 | n = NULL; |
456 | 0 | goto done; |
457 | 0 | } |
458 | 2 | strcpy(r->n_str, tok->encoding); |
459 | 2 | PyMem_FREE(tok->encoding); |
460 | 2 | tok->encoding = NULL; |
461 | 2 | r->n_nchildren = 1; |
462 | 2 | r->n_child = n; |
463 | 2 | n = r; |
464 | 2 | } |
465 | | |
466 | 16 | done: |
467 | 16 | PyTokenizer_Free(tok); |
468 | | |
469 | 16 | if (n != NULL) { |
470 | 16 | _PyNode_FinalizeEndPos(n); |
471 | 16 | } |
472 | 16 | return n; |
473 | 16 | } |
474 | | |
475 | | static int |
476 | | initerr(perrdetail *err_ret, PyObject *filename) |
477 | 16 | { |
478 | 16 | err_ret->error = E_OK; |
479 | 16 | err_ret->lineno = 0; |
480 | 16 | err_ret->offset = 0; |
481 | 16 | err_ret->text = NULL; |
482 | 16 | err_ret->token = -1; |
483 | 16 | err_ret->expected = -1; |
484 | 16 | if (filename) { |
485 | 16 | Py_INCREF(filename); |
486 | 16 | err_ret->filename = filename; |
487 | 16 | } |
488 | 0 | else { |
489 | 0 | err_ret->filename = PyUnicode_FromString("<string>"); |
490 | 0 | if (err_ret->filename == NULL) { |
491 | 0 | err_ret->error = E_ERROR; |
492 | 0 | return -1; |
493 | 0 | } |
494 | 0 | } |
495 | 16 | return 0; |
496 | 16 | } |