/src/cpython/Parser/pegen.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include <Python.h> |
2 | | #include "pycore_ast.h" // _PyAST_Validate(), |
3 | | #include "pycore_pystate.h" // _PyThreadState_GET() |
4 | | #include "pycore_parser.h" // _PYPEGEN_NSTATISTICS |
5 | | #include "pycore_pyerrors.h" // PyExc_IncompleteInputError |
6 | | #include "pycore_runtime.h" // _PyRuntime |
7 | | #include "pycore_unicodeobject.h" // _PyUnicode_InternImmortal |
8 | | #include "pycore_pyatomic_ft_wrappers.h" |
9 | | #include <errcode.h> |
10 | | |
11 | | #include "lexer/lexer.h" |
12 | | #include "tokenizer/tokenizer.h" |
13 | | #include "pegen.h" |
14 | | |
15 | | // Internal parser functions |
16 | | |
17 | | asdl_stmt_seq* |
18 | | _PyPegen_interactive_exit(Parser *p) |
19 | 0 | { |
20 | 0 | if (p->errcode) { |
21 | 0 | *(p->errcode) = E_EOF; |
22 | 0 | } |
23 | 0 | return NULL; |
24 | 0 | } |
25 | | |
26 | | Py_ssize_t |
27 | | _PyPegen_byte_offset_to_character_offset_line(PyObject *line, Py_ssize_t col_offset, Py_ssize_t end_col_offset) |
28 | 0 | { |
29 | 0 | const unsigned char *data = (const unsigned char*)PyUnicode_AsUTF8(line); |
30 | |
|
31 | 0 | Py_ssize_t len = 0; |
32 | 0 | while (col_offset < end_col_offset) { |
33 | 0 | Py_UCS4 ch = data[col_offset]; |
34 | 0 | if (ch < 0x80) { |
35 | 0 | col_offset += 1; |
36 | 0 | } else if ((ch & 0xe0) == 0xc0) { |
37 | 0 | col_offset += 2; |
38 | 0 | } else if ((ch & 0xf0) == 0xe0) { |
39 | 0 | col_offset += 3; |
40 | 0 | } else if ((ch & 0xf8) == 0xf0) { |
41 | 0 | col_offset += 4; |
42 | 0 | } else { |
43 | 0 | PyErr_SetString(PyExc_ValueError, "Invalid UTF-8 sequence"); |
44 | 0 | return -1; |
45 | 0 | } |
46 | 0 | len++; |
47 | 0 | } |
48 | 0 | return len; |
49 | 0 | } |
50 | | |
51 | | Py_ssize_t |
52 | | _PyPegen_byte_offset_to_character_offset_raw(const char* str, Py_ssize_t col_offset) |
53 | 20.2k | { |
54 | 20.2k | Py_ssize_t len = (Py_ssize_t)strlen(str); |
55 | 20.2k | if (col_offset > len + 1) { |
56 | 12 | col_offset = len + 1; |
57 | 12 | } |
58 | 20.2k | assert(col_offset >= 0); |
59 | 20.2k | PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace"); |
60 | 20.2k | if (!text) { |
61 | 0 | return -1; |
62 | 0 | } |
63 | 20.2k | Py_ssize_t size = PyUnicode_GET_LENGTH(text); |
64 | 20.2k | Py_DECREF(text); |
65 | 20.2k | return size; |
66 | 20.2k | } |
67 | | |
68 | | Py_ssize_t |
69 | | _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset) |
70 | 20.2k | { |
71 | 20.2k | const char *str = PyUnicode_AsUTF8(line); |
72 | 20.2k | if (!str) { |
73 | 0 | return -1; |
74 | 0 | } |
75 | 20.2k | return _PyPegen_byte_offset_to_character_offset_raw(str, col_offset); |
76 | 20.2k | } |
77 | | |
78 | | // Here, mark is the start of the node, while p->mark is the end. |
79 | | // If node==NULL, they should be the same. |
80 | | int |
81 | | _PyPegen_insert_memo(Parser *p, int mark, int type, void *node) |
82 | 11.4M | { |
83 | | // Insert in front |
84 | 11.4M | Memo *m = _PyArena_Malloc(p->arena, sizeof(Memo)); |
85 | 11.4M | if (m == NULL) { |
86 | 0 | return -1; |
87 | 0 | } |
88 | 11.4M | m->type = type; |
89 | 11.4M | m->node = node; |
90 | 11.4M | m->mark = p->mark; |
91 | 11.4M | m->next = p->tokens[mark]->memo; |
92 | 11.4M | p->tokens[mark]->memo = m; |
93 | 11.4M | return 0; |
94 | 11.4M | } |
95 | | |
96 | | // Like _PyPegen_insert_memo(), but updates an existing node if found. |
97 | | int |
98 | | _PyPegen_update_memo(Parser *p, int mark, int type, void *node) |
99 | 8.86M | { |
100 | 43.6M | for (Memo *m = p->tokens[mark]->memo; m != NULL; m = m->next) { |
101 | 38.0M | if (m->type == type) { |
102 | | // Update existing node. |
103 | 3.24M | m->node = node; |
104 | 3.24M | m->mark = p->mark; |
105 | 3.24M | return 0; |
106 | 3.24M | } |
107 | 38.0M | } |
108 | | // Insert new node. |
109 | 5.61M | return _PyPegen_insert_memo(p, mark, type, node); |
110 | 8.86M | } |
111 | | |
112 | | static int |
113 | | init_normalization(Parser *p) |
114 | 56.5k | { |
115 | 56.5k | if (p->normalize) { |
116 | 54.8k | return 1; |
117 | 54.8k | } |
118 | 1.65k | p->normalize = PyImport_ImportModuleAttrString("unicodedata", "normalize"); |
119 | 1.65k | if (!p->normalize) |
120 | 0 | { |
121 | 0 | return 0; |
122 | 0 | } |
123 | 1.65k | return 1; |
124 | 1.65k | } |
125 | | |
126 | | static int |
127 | 21.3k | growable_comment_array_init(growable_comment_array *arr, size_t initial_size) { |
128 | 21.3k | assert(initial_size > 0); |
129 | 21.3k | arr->items = PyMem_Malloc(initial_size * sizeof(*arr->items)); |
130 | 21.3k | arr->size = initial_size; |
131 | 21.3k | arr->num_items = 0; |
132 | | |
133 | 21.3k | return arr->items != NULL; |
134 | 21.3k | } |
135 | | |
136 | | static int |
137 | 0 | growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) { |
138 | 0 | if (arr->num_items >= arr->size) { |
139 | 0 | size_t new_size = arr->size * 2; |
140 | 0 | void *new_items_array = PyMem_Realloc(arr->items, new_size * sizeof(*arr->items)); |
141 | 0 | if (!new_items_array) { |
142 | 0 | return 0; |
143 | 0 | } |
144 | 0 | arr->items = new_items_array; |
145 | 0 | arr->size = new_size; |
146 | 0 | } |
147 | | |
148 | 0 | arr->items[arr->num_items].lineno = lineno; |
149 | 0 | arr->items[arr->num_items].comment = comment; // Take ownership |
150 | 0 | arr->num_items++; |
151 | 0 | return 1; |
152 | 0 | } |
153 | | |
154 | | static void |
155 | 21.3k | growable_comment_array_deallocate(growable_comment_array *arr) { |
156 | 21.3k | for (unsigned i = 0; i < arr->num_items; i++) { |
157 | 0 | PyMem_Free(arr->items[i].comment); |
158 | 0 | } |
159 | 21.3k | PyMem_Free(arr->items); |
160 | 21.3k | } |
161 | | |
162 | | static int |
163 | | _get_keyword_or_name_type(Parser *p, struct token *new_token) |
164 | 496k | { |
165 | 496k | Py_ssize_t name_len = new_token->end_col_offset - new_token->col_offset; |
166 | 496k | assert(name_len > 0); |
167 | | |
168 | 496k | if (name_len >= p->n_keyword_lists || |
169 | 496k | p->keywords[name_len] == NULL || |
170 | 496k | p->keywords[name_len]->type == -1) { |
171 | 218k | return NAME; |
172 | 218k | } |
173 | 1.43M | for (KeywordToken *k = p->keywords[name_len]; k != NULL && k->type != -1; k++) { |
174 | 1.27M | if (strncmp(k->str, new_token->start, (size_t)name_len) == 0) { |
175 | 121k | return k->type; |
176 | 121k | } |
177 | 1.27M | } |
178 | 156k | return NAME; |
179 | 277k | } |
180 | | |
181 | | static int |
182 | 1.75M | initialize_token(Parser *p, Token *parser_token, struct token *new_token, int token_type) { |
183 | 1.75M | assert(parser_token != NULL); |
184 | | |
185 | 1.75M | parser_token->type = (token_type == NAME) ? _get_keyword_or_name_type(p, new_token) : token_type; |
186 | 1.75M | parser_token->bytes = PyBytes_FromStringAndSize(new_token->start, new_token->end - new_token->start); |
187 | 1.75M | if (parser_token->bytes == NULL) { |
188 | 0 | return -1; |
189 | 0 | } |
190 | 1.75M | if (_PyArena_AddPyObject(p->arena, parser_token->bytes) < 0) { |
191 | 0 | Py_DECREF(parser_token->bytes); |
192 | 0 | return -1; |
193 | 0 | } |
194 | | |
195 | 1.75M | parser_token->metadata = NULL; |
196 | 1.75M | if (new_token->metadata != NULL) { |
197 | 9.75k | if (_PyArena_AddPyObject(p->arena, new_token->metadata) < 0) { |
198 | 0 | Py_DECREF(new_token->metadata); |
199 | 0 | return -1; |
200 | 0 | } |
201 | 9.75k | parser_token->metadata = new_token->metadata; |
202 | 9.75k | new_token->metadata = NULL; |
203 | 9.75k | } |
204 | | |
205 | 1.75M | parser_token->level = new_token->level; |
206 | 1.75M | parser_token->lineno = new_token->lineno; |
207 | 1.75M | parser_token->col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + new_token->col_offset |
208 | 1.75M | : new_token->col_offset; |
209 | 1.75M | parser_token->end_lineno = new_token->end_lineno; |
210 | 1.75M | parser_token->end_col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + new_token->end_col_offset |
211 | 1.75M | : new_token->end_col_offset; |
212 | | |
213 | 1.75M | p->fill += 1; |
214 | | |
215 | 1.75M | if (token_type == ERRORTOKEN && p->tok->done == E_DECODE) { |
216 | 858 | return _Pypegen_raise_decode_error(p); |
217 | 858 | } |
218 | | |
219 | 1.75M | return (token_type == ERRORTOKEN ? _Pypegen_tokenizer_error(p) : 0); |
220 | 1.75M | } |
221 | | |
222 | | static int |
223 | 80.3k | _resize_tokens_array(Parser *p) { |
224 | 80.3k | int newsize = p->size * 2; |
225 | 80.3k | Token **new_tokens = PyMem_Realloc(p->tokens, (size_t)newsize * sizeof(Token *)); |
226 | 80.3k | if (new_tokens == NULL) { |
227 | 0 | PyErr_NoMemory(); |
228 | 0 | return -1; |
229 | 0 | } |
230 | 80.3k | p->tokens = new_tokens; |
231 | | |
232 | 2.64M | for (int i = p->size; i < newsize; i++) { |
233 | 2.56M | p->tokens[i] = PyMem_Calloc(1, sizeof(Token)); |
234 | 2.56M | if (p->tokens[i] == NULL) { |
235 | 0 | p->size = i; // Needed, in order to cleanup correctly after parser fails |
236 | 0 | PyErr_NoMemory(); |
237 | 0 | return -1; |
238 | 0 | } |
239 | 2.56M | } |
240 | 80.3k | p->size = newsize; |
241 | 80.3k | return 0; |
242 | 80.3k | } |
243 | | |
244 | | int |
245 | | _PyPegen_fill_token(Parser *p) |
246 | 1.75M | { |
247 | 1.75M | struct token new_token; |
248 | 1.75M | _PyToken_Init(&new_token); |
249 | 1.75M | int type = _PyTokenizer_Get(p->tok, &new_token); |
250 | | |
251 | | // Record and skip '# type: ignore' comments |
252 | 1.75M | while (type == TYPE_IGNORE) { |
253 | 0 | Py_ssize_t len = new_token.end_col_offset - new_token.col_offset; |
254 | 0 | char *tag = PyMem_Malloc((size_t)len + 1); |
255 | 0 | if (tag == NULL) { |
256 | 0 | PyErr_NoMemory(); |
257 | 0 | goto error; |
258 | 0 | } |
259 | 0 | strncpy(tag, new_token.start, (size_t)len); |
260 | 0 | tag[len] = '\0'; |
261 | | // Ownership of tag passes to the growable array |
262 | 0 | if (!growable_comment_array_add(&p->type_ignore_comments, p->tok->lineno, tag)) { |
263 | 0 | PyErr_NoMemory(); |
264 | 0 | goto error; |
265 | 0 | } |
266 | 0 | type = _PyTokenizer_Get(p->tok, &new_token); |
267 | 0 | } |
268 | | |
269 | | // If we have reached the end and we are in single input mode we need to insert a newline and reset the parsing |
270 | 1.75M | if (p->start_rule == Py_single_input && type == ENDMARKER && p->parsing_started) { |
271 | 0 | type = NEWLINE; /* Add an extra newline */ |
272 | 0 | p->parsing_started = 0; |
273 | |
|
274 | 0 | if (p->tok->indent && !(p->flags & PyPARSE_DONT_IMPLY_DEDENT)) { |
275 | 0 | p->tok->pendin = -p->tok->indent; |
276 | 0 | p->tok->indent = 0; |
277 | 0 | } |
278 | 0 | } |
279 | 1.75M | else { |
280 | 1.75M | p->parsing_started = 1; |
281 | 1.75M | } |
282 | | |
283 | | // Check if we are at the limit of the token array capacity and resize if needed |
284 | 1.75M | if ((p->fill == p->size) && (_resize_tokens_array(p) != 0)) { |
285 | 0 | goto error; |
286 | 0 | } |
287 | | |
288 | 1.75M | Token *t = p->tokens[p->fill]; |
289 | 1.75M | return initialize_token(p, t, &new_token, type); |
290 | 0 | error: |
291 | 0 | _PyToken_Free(&new_token); |
292 | 0 | return -1; |
293 | 1.75M | } |
294 | | |
295 | | #if defined(Py_DEBUG) |
296 | | // Instrumentation to count the effectiveness of memoization. |
297 | | // The array counts the number of tokens skipped by memoization, |
298 | | // indexed by type. |
299 | | |
300 | | #define NSTATISTICS _PYPEGEN_NSTATISTICS |
301 | | #define memo_statistics _PyRuntime.parser.memo_statistics |
302 | | |
303 | | void |
304 | | _PyPegen_clear_memo_statistics(void) |
305 | | { |
306 | | FT_MUTEX_LOCK(&_PyRuntime.parser.mutex); |
307 | | for (int i = 0; i < NSTATISTICS; i++) { |
308 | | memo_statistics[i] = 0; |
309 | | } |
310 | | FT_MUTEX_UNLOCK(&_PyRuntime.parser.mutex); |
311 | | } |
312 | | |
313 | | PyObject * |
314 | | _PyPegen_get_memo_statistics(void) |
315 | | { |
316 | | PyObject *ret = PyList_New(NSTATISTICS); |
317 | | if (ret == NULL) { |
318 | | return NULL; |
319 | | } |
320 | | |
321 | | FT_MUTEX_LOCK(&_PyRuntime.parser.mutex); |
322 | | for (int i = 0; i < NSTATISTICS; i++) { |
323 | | PyObject *value = PyLong_FromLong(memo_statistics[i]); |
324 | | if (value == NULL) { |
325 | | FT_MUTEX_UNLOCK(&_PyRuntime.parser.mutex); |
326 | | Py_DECREF(ret); |
327 | | return NULL; |
328 | | } |
329 | | // PyList_SetItem borrows a reference to value. |
330 | | if (PyList_SetItem(ret, i, value) < 0) { |
331 | | FT_MUTEX_UNLOCK(&_PyRuntime.parser.mutex); |
332 | | Py_DECREF(ret); |
333 | | return NULL; |
334 | | } |
335 | | } |
336 | | FT_MUTEX_UNLOCK(&_PyRuntime.parser.mutex); |
337 | | return ret; |
338 | | } |
339 | | #endif |
340 | | |
341 | | int // bool |
342 | | _PyPegen_is_memoized(Parser *p, int type, void *pres) |
343 | 43.5M | { |
344 | 43.5M | if (p->mark == p->fill) { |
345 | 447k | if (_PyPegen_fill_token(p) < 0) { |
346 | 751 | p->error_indicator = 1; |
347 | 751 | return -1; |
348 | 751 | } |
349 | 447k | } |
350 | | |
351 | 43.5M | Token *t = p->tokens[p->mark]; |
352 | | |
353 | 127M | for (Memo *m = t->memo; m != NULL; m = m->next) { |
354 | 115M | if (m->type == type) { |
355 | | #if defined(Py_DEBUG) |
356 | | if (0 <= type && type < NSTATISTICS) { |
357 | | long count = m->mark - p->mark; |
358 | | // A memoized negative result counts for one. |
359 | | if (count <= 0) { |
360 | | count = 1; |
361 | | } |
362 | | FT_MUTEX_LOCK(&_PyRuntime.parser.mutex); |
363 | | memo_statistics[type] += count; |
364 | | FT_MUTEX_UNLOCK(&_PyRuntime.parser.mutex); |
365 | | } |
366 | | #endif |
367 | 31.9M | p->mark = m->mark; |
368 | 31.9M | *(void **)(pres) = m->node; |
369 | 31.9M | return 1; |
370 | 31.9M | } |
371 | 115M | } |
372 | 11.6M | return 0; |
373 | 43.5M | } |
374 | | |
375 | | #define LOOKAHEAD1(NAME, RES_TYPE) \ |
376 | | int \ |
377 | | NAME (int positive, RES_TYPE (func)(Parser *), Parser *p) \ |
378 | 2.26M | { \ |
379 | 2.26M | int mark = p->mark; \ |
380 | 2.26M | void *res = func(p); \ |
381 | 2.26M | p->mark = mark; \ |
382 | 2.26M | return (res != NULL) == positive; \ |
383 | 2.26M | } Line | Count | Source | 378 | 2.26M | { \ | 379 | 2.26M | int mark = p->mark; \ | 380 | 2.26M | void *res = func(p); \ | 381 | 2.26M | p->mark = mark; \ | 382 | 2.26M | return (res != NULL) == positive; \ | 383 | 2.26M | } |
_PyPegen_lookahead_for_expr Line | Count | Source | 378 | 963 | { \ | 379 | 963 | int mark = p->mark; \ | 380 | 963 | void *res = func(p); \ | 381 | 963 | p->mark = mark; \ | 382 | 963 | return (res != NULL) == positive; \ | 383 | 963 | } |
Unexecuted instantiation: _PyPegen_lookahead_for_stmt |
384 | | |
385 | | LOOKAHEAD1(_PyPegen_lookahead, void *) |
386 | | LOOKAHEAD1(_PyPegen_lookahead_for_expr, expr_ty) |
387 | | LOOKAHEAD1(_PyPegen_lookahead_for_stmt, stmt_ty) |
388 | | #undef LOOKAHEAD1 |
389 | | |
390 | | #define LOOKAHEAD2(NAME, RES_TYPE, T) \ |
391 | | int \ |
392 | | NAME (int positive, RES_TYPE (func)(Parser *, T), Parser *p, T arg) \ |
393 | 3.45M | { \ |
394 | 3.45M | int mark = p->mark; \ |
395 | 3.45M | void *res = func(p, arg); \ |
396 | 3.45M | p->mark = mark; \ |
397 | 3.45M | return (res != NULL) == positive; \ |
398 | 3.45M | } _PyPegen_lookahead_with_int Line | Count | Source | 393 | 3.23M | { \ | 394 | 3.23M | int mark = p->mark; \ | 395 | 3.23M | void *res = func(p, arg); \ | 396 | 3.23M | p->mark = mark; \ | 397 | 3.23M | return (res != NULL) == positive; \ | 398 | 3.23M | } |
_PyPegen_lookahead_with_string Line | Count | Source | 393 | 214k | { \ | 394 | 214k | int mark = p->mark; \ | 395 | 214k | void *res = func(p, arg); \ | 396 | 214k | p->mark = mark; \ | 397 | 214k | return (res != NULL) == positive; \ | 398 | 214k | } |
|
399 | | |
400 | | LOOKAHEAD2(_PyPegen_lookahead_with_int, Token *, int) |
401 | | LOOKAHEAD2(_PyPegen_lookahead_with_string, expr_ty, const char *) |
402 | | #undef LOOKAHEAD2 |
403 | | |
404 | | Token * |
405 | | _PyPegen_expect_token(Parser *p, int type) |
406 | 53.1M | { |
407 | 53.1M | if (p->mark == p->fill) { |
408 | 936k | if (_PyPegen_fill_token(p) < 0) { |
409 | 2.72k | p->error_indicator = 1; |
410 | 2.72k | return NULL; |
411 | 2.72k | } |
412 | 936k | } |
413 | 53.1M | Token *t = p->tokens[p->mark]; |
414 | 53.1M | if (t->type != type) { |
415 | 46.6M | return NULL; |
416 | 46.6M | } |
417 | 6.46M | p->mark += 1; |
418 | 6.46M | return t; |
419 | 53.1M | } |
420 | | |
421 | | void* |
422 | 0 | _PyPegen_expect_forced_result(Parser *p, void* result, const char* expected) { |
423 | |
|
424 | 0 | if (p->error_indicator == 1) { |
425 | 0 | return NULL; |
426 | 0 | } |
427 | 0 | if (result == NULL) { |
428 | 0 | RAISE_SYNTAX_ERROR("expected (%s)", expected); |
429 | 0 | return NULL; |
430 | 0 | } |
431 | 0 | return result; |
432 | 0 | } |
433 | | |
434 | | Token * |
435 | 20.5k | _PyPegen_expect_forced_token(Parser *p, int type, const char* expected) { |
436 | | |
437 | 20.5k | if (p->error_indicator == 1) { |
438 | 0 | return NULL; |
439 | 0 | } |
440 | | |
441 | 20.5k | if (p->mark == p->fill) { |
442 | 6.57k | if (_PyPegen_fill_token(p) < 0) { |
443 | 1 | p->error_indicator = 1; |
444 | 1 | return NULL; |
445 | 1 | } |
446 | 6.57k | } |
447 | 20.5k | Token *t = p->tokens[p->mark]; |
448 | 20.5k | if (t->type != type) { |
449 | 144 | RAISE_SYNTAX_ERROR_KNOWN_LOCATION(t, "expected '%s'", expected); |
450 | 144 | return NULL; |
451 | 144 | } |
452 | 20.4k | p->mark += 1; |
453 | 20.4k | return t; |
454 | 20.5k | } |
455 | | |
456 | | expr_ty |
457 | | _PyPegen_expect_soft_keyword(Parser *p, const char *keyword) |
458 | 435k | { |
459 | 435k | if (p->mark == p->fill) { |
460 | 6.19k | if (_PyPegen_fill_token(p) < 0) { |
461 | 7 | p->error_indicator = 1; |
462 | 7 | return NULL; |
463 | 7 | } |
464 | 6.19k | } |
465 | 435k | Token *t = p->tokens[p->mark]; |
466 | 435k | if (t->type != NAME) { |
467 | 236k | return NULL; |
468 | 236k | } |
469 | 199k | const char *s = PyBytes_AsString(t->bytes); |
470 | 199k | if (!s) { |
471 | 0 | p->error_indicator = 1; |
472 | 0 | return NULL; |
473 | 0 | } |
474 | 199k | if (strcmp(s, keyword) != 0) { |
475 | 170k | return NULL; |
476 | 170k | } |
477 | 28.2k | return _PyPegen_name_token(p); |
478 | 199k | } |
479 | | |
480 | | Token * |
481 | | _PyPegen_get_last_nonnwhitespace_token(Parser *p) |
482 | 1.68M | { |
483 | 1.68M | assert(p->mark >= 0); |
484 | 1.68M | Token *token = NULL; |
485 | 1.75M | for (int m = p->mark - 1; m >= 0; m--) { |
486 | 1.75M | token = p->tokens[m]; |
487 | 1.75M | if (token->type != ENDMARKER && (token->type < NEWLINE || token->type > DEDENT)) { |
488 | 1.68M | break; |
489 | 1.68M | } |
490 | 1.75M | } |
491 | 1.68M | return token; |
492 | 1.68M | } |
493 | | |
494 | | PyObject * |
495 | | _PyPegen_new_identifier(Parser *p, const char *n) |
496 | 2.32M | { |
497 | 2.32M | PyObject *id = PyUnicode_DecodeUTF8(n, (Py_ssize_t)strlen(n), NULL); |
498 | 2.32M | if (!id) { |
499 | 0 | goto error; |
500 | 0 | } |
501 | | /* Check whether there are non-ASCII characters in the |
502 | | identifier; if so, normalize to NFKC. */ |
503 | 2.32M | if (!PyUnicode_IS_ASCII(id)) |
504 | 56.5k | { |
505 | 56.5k | if (!init_normalization(p)) |
506 | 0 | { |
507 | 0 | Py_DECREF(id); |
508 | 0 | goto error; |
509 | 0 | } |
510 | 56.5k | PyObject *form = PyUnicode_InternFromString("NFKC"); |
511 | 56.5k | if (form == NULL) |
512 | 0 | { |
513 | 0 | Py_DECREF(id); |
514 | 0 | goto error; |
515 | 0 | } |
516 | 56.5k | PyObject *args[2] = {form, id}; |
517 | 56.5k | PyObject *id2 = PyObject_Vectorcall(p->normalize, args, 2, NULL); |
518 | 56.5k | Py_DECREF(id); |
519 | 56.5k | Py_DECREF(form); |
520 | 56.5k | if (!id2) { |
521 | 0 | goto error; |
522 | 0 | } |
523 | | |
524 | 56.5k | if (!PyUnicode_Check(id2)) |
525 | 0 | { |
526 | 0 | PyErr_Format(PyExc_TypeError, |
527 | 0 | "unicodedata.normalize() must return a string, not " |
528 | 0 | "%.200s", |
529 | 0 | _PyType_Name(Py_TYPE(id2))); |
530 | 0 | Py_DECREF(id2); |
531 | 0 | goto error; |
532 | 0 | } |
533 | 56.5k | id = id2; |
534 | 56.5k | } |
535 | 2.32M | static const char * const forbidden[] = { |
536 | 2.32M | "None", |
537 | 2.32M | "True", |
538 | 2.32M | "False", |
539 | 2.32M | NULL |
540 | 2.32M | }; |
541 | 9.28M | for (int i = 0; forbidden[i] != NULL; i++) { |
542 | 6.96M | if (_PyUnicode_EqualToASCIIString(id, forbidden[i])) { |
543 | 1 | PyErr_Format(PyExc_ValueError, |
544 | 1 | "identifier field can't represent '%s' constant", |
545 | 1 | forbidden[i]); |
546 | 1 | Py_DECREF(id); |
547 | 1 | goto error; |
548 | 1 | } |
549 | 6.96M | } |
550 | 2.32M | PyInterpreterState *interp = _PyInterpreterState_GET(); |
551 | 2.32M | _PyUnicode_InternImmortal(interp, &id); |
552 | 2.32M | if (_PyArena_AddPyObject(p->arena, id) < 0) |
553 | 0 | { |
554 | 0 | Py_DECREF(id); |
555 | 0 | goto error; |
556 | 0 | } |
557 | 2.32M | return id; |
558 | | |
559 | 1 | error: |
560 | 1 | p->error_indicator = 1; |
561 | 1 | return NULL; |
562 | 2.32M | } |
563 | | |
564 | | static expr_ty |
565 | | _PyPegen_name_from_token(Parser *p, Token* t) |
566 | 5.53M | { |
567 | 5.53M | if (t == NULL) { |
568 | 3.21M | return NULL; |
569 | 3.21M | } |
570 | 2.32M | const char *s = PyBytes_AsString(t->bytes); |
571 | 2.32M | if (!s) { |
572 | 0 | p->error_indicator = 1; |
573 | 0 | return NULL; |
574 | 0 | } |
575 | 2.32M | PyObject *id = _PyPegen_new_identifier(p, s); |
576 | 2.32M | if (id == NULL) { |
577 | 1 | p->error_indicator = 1; |
578 | 1 | return NULL; |
579 | 1 | } |
580 | 2.32M | return _PyAST_Name(id, Load, t->lineno, t->col_offset, t->end_lineno, |
581 | 2.32M | t->end_col_offset, p->arena); |
582 | 2.32M | } |
583 | | |
584 | | expr_ty |
585 | | _PyPegen_name_token(Parser *p) |
586 | 5.52M | { |
587 | 5.52M | Token *t = _PyPegen_expect_token(p, NAME); |
588 | 5.52M | return _PyPegen_name_from_token(p, t); |
589 | 5.52M | } |
590 | | |
591 | | void * |
592 | | _PyPegen_string_token(Parser *p) |
593 | 1.36M | { |
594 | 1.36M | return _PyPegen_expect_token(p, STRING); |
595 | 1.36M | } |
596 | | |
597 | 225k | expr_ty _PyPegen_soft_keyword_token(Parser *p) { |
598 | 225k | Token *t = _PyPegen_expect_token(p, NAME); |
599 | 225k | if (t == NULL) { |
600 | 156k | return NULL; |
601 | 156k | } |
602 | 69.7k | char *the_token; |
603 | 69.7k | Py_ssize_t size; |
604 | 69.7k | PyBytes_AsStringAndSize(t->bytes, &the_token, &size); |
605 | 333k | for (char **keyword = p->soft_keywords; *keyword != NULL; keyword++) { |
606 | 267k | if (strlen(*keyword) == (size_t)size && |
607 | 267k | strncmp(*keyword, the_token, (size_t)size) == 0) { |
608 | 4.60k | return _PyPegen_name_from_token(p, t); |
609 | 4.60k | } |
610 | 267k | } |
611 | 65.1k | return NULL; |
612 | 69.7k | } |
613 | | |
614 | | static PyObject * |
615 | | parsenumber_raw(const char *s) |
616 | 257k | { |
617 | 257k | const char *end; |
618 | 257k | long x; |
619 | 257k | double dx; |
620 | 257k | Py_complex compl; |
621 | 257k | int imflag; |
622 | | |
623 | 257k | assert(s != NULL); |
624 | 257k | errno = 0; |
625 | 257k | end = s + strlen(s) - 1; |
626 | 257k | imflag = *end == 'j' || *end == 'J'; |
627 | 257k | if (s[0] == '0') { |
628 | 77.6k | x = (long)PyOS_strtoul(s, (char **)&end, 0); |
629 | 77.6k | if (x < 0 && errno == 0) { |
630 | 335 | return PyLong_FromString(s, (char **)0, 0); |
631 | 335 | } |
632 | 77.6k | } |
633 | 179k | else { |
634 | 179k | x = PyOS_strtol(s, (char **)&end, 0); |
635 | 179k | } |
636 | 257k | if (*end == '\0') { |
637 | 195k | if (errno != 0) { |
638 | 2.76k | return PyLong_FromString(s, (char **)0, 0); |
639 | 2.76k | } |
640 | 192k | return PyLong_FromLong(x); |
641 | 195k | } |
642 | | /* XXX Huge floats may silently fail */ |
643 | 61.4k | if (imflag) { |
644 | 10.4k | compl.real = 0.; |
645 | 10.4k | compl.imag = PyOS_string_to_double(s, (char **)&end, NULL); |
646 | 10.4k | if (compl.imag == -1.0 && PyErr_Occurred()) { |
647 | 1 | return NULL; |
648 | 1 | } |
649 | 10.4k | return PyComplex_FromCComplex(compl); |
650 | 10.4k | } |
651 | 51.0k | dx = PyOS_string_to_double(s, NULL, NULL); |
652 | 51.0k | if (dx == -1.0 && PyErr_Occurred()) { |
653 | 14 | return NULL; |
654 | 14 | } |
655 | 51.0k | return PyFloat_FromDouble(dx); |
656 | 51.0k | } |
657 | | |
658 | | static PyObject * |
659 | | parsenumber(const char *s) |
660 | 257k | { |
661 | 257k | char *dup; |
662 | 257k | char *end; |
663 | 257k | PyObject *res = NULL; |
664 | | |
665 | 257k | assert(s != NULL); |
666 | | |
667 | 257k | if (strchr(s, '_') == NULL) { |
668 | 256k | return parsenumber_raw(s); |
669 | 256k | } |
670 | | /* Create a duplicate without underscores. */ |
671 | 1.10k | dup = PyMem_Malloc(strlen(s) + 1); |
672 | 1.10k | if (dup == NULL) { |
673 | 0 | return PyErr_NoMemory(); |
674 | 0 | } |
675 | 1.10k | end = dup; |
676 | 17.7k | for (; *s; s++) { |
677 | 16.6k | if (*s != '_') { |
678 | 13.4k | *end++ = *s; |
679 | 13.4k | } |
680 | 16.6k | } |
681 | 1.10k | *end = '\0'; |
682 | 1.10k | res = parsenumber_raw(dup); |
683 | 1.10k | PyMem_Free(dup); |
684 | 1.10k | return res; |
685 | 1.10k | } |
686 | | |
687 | | expr_ty |
688 | | _PyPegen_number_token(Parser *p) |
689 | 909k | { |
690 | 909k | Token *t = _PyPegen_expect_token(p, NUMBER); |
691 | 909k | if (t == NULL) { |
692 | 652k | return NULL; |
693 | 652k | } |
694 | | |
695 | 257k | const char *num_raw = PyBytes_AsString(t->bytes); |
696 | 257k | if (num_raw == NULL) { |
697 | 0 | p->error_indicator = 1; |
698 | 0 | return NULL; |
699 | 0 | } |
700 | | |
701 | 257k | if (p->feature_version < 6 && strchr(num_raw, '_') != NULL) { |
702 | 0 | p->error_indicator = 1; |
703 | 0 | return RAISE_SYNTAX_ERROR("Underscores in numeric literals are only supported " |
704 | 0 | "in Python 3.6 and greater"); |
705 | 0 | } |
706 | | |
707 | 257k | PyObject *c = parsenumber(num_raw); |
708 | | |
709 | 257k | if (c == NULL) { |
710 | 15 | p->error_indicator = 1; |
711 | 15 | PyThreadState *tstate = _PyThreadState_GET(); |
712 | | // The only way a ValueError should happen in _this_ code is via |
713 | | // PyLong_FromString hitting a length limit. |
714 | 15 | if (tstate->current_exception != NULL && |
715 | 15 | Py_TYPE(tstate->current_exception) == (PyTypeObject *)PyExc_ValueError |
716 | 15 | ) { |
717 | 15 | PyObject *exc = PyErr_GetRaisedException(); |
718 | | /* Intentionally omitting columns to avoid a wall of 1000s of '^'s |
719 | | * on the error message. Nobody is going to overlook their huge |
720 | | * numeric literal once given the line. */ |
721 | 15 | RAISE_ERROR_KNOWN_LOCATION( |
722 | 15 | p, PyExc_SyntaxError, |
723 | 15 | t->lineno, -1 /* col_offset */, |
724 | 15 | t->end_lineno, -1 /* end_col_offset */, |
725 | 15 | "%S - Consider hexadecimal for huge integer literals " |
726 | 15 | "to avoid decimal conversion limits.", |
727 | 15 | exc); |
728 | 15 | Py_DECREF(exc); |
729 | 15 | } |
730 | 15 | return NULL; |
731 | 15 | } |
732 | | |
733 | 257k | if (_PyArena_AddPyObject(p->arena, c) < 0) { |
734 | 0 | Py_DECREF(c); |
735 | 0 | p->error_indicator = 1; |
736 | 0 | return NULL; |
737 | 0 | } |
738 | | |
739 | 257k | return _PyAST_Constant(c, NULL, t->lineno, t->col_offset, t->end_lineno, |
740 | 257k | t->end_col_offset, p->arena); |
741 | 257k | } |
742 | | |
743 | | /* Check that the source for a single input statement really is a single |
744 | | statement by looking at what is left in the buffer after parsing. |
745 | | Trailing whitespace and comments are OK. */ |
746 | | static int // bool |
747 | | bad_single_statement(Parser *p) |
748 | 0 | { |
749 | 0 | char *cur = p->tok->cur; |
750 | 0 | char c = *cur; |
751 | |
|
752 | 0 | for (;;) { |
753 | 0 | while (c == ' ' || c == '\t' || c == '\n' || c == '\014') { |
754 | 0 | c = *++cur; |
755 | 0 | } |
756 | |
|
757 | 0 | if (!c) { |
758 | 0 | return 0; |
759 | 0 | } |
760 | | |
761 | 0 | if (c != '#') { |
762 | 0 | return 1; |
763 | 0 | } |
764 | | |
765 | | /* Suck up comment. */ |
766 | 0 | while (c && c != '\n') { |
767 | 0 | c = *++cur; |
768 | 0 | } |
769 | 0 | } |
770 | 0 | } |
771 | | |
772 | | static int |
773 | | compute_parser_flags(PyCompilerFlags *flags) |
774 | 21.3k | { |
775 | 21.3k | int parser_flags = 0; |
776 | 21.3k | if (!flags) { |
777 | 48 | return 0; |
778 | 48 | } |
779 | 21.3k | if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) { |
780 | 0 | parser_flags |= PyPARSE_DONT_IMPLY_DEDENT; |
781 | 0 | } |
782 | 21.3k | if (flags->cf_flags & PyCF_IGNORE_COOKIE) { |
783 | 78 | parser_flags |= PyPARSE_IGNORE_COOKIE; |
784 | 78 | } |
785 | 21.3k | if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL) { |
786 | 0 | parser_flags |= PyPARSE_BARRY_AS_BDFL; |
787 | 0 | } |
788 | 21.3k | if (flags->cf_flags & PyCF_TYPE_COMMENTS) { |
789 | 0 | parser_flags |= PyPARSE_TYPE_COMMENTS; |
790 | 0 | } |
791 | 21.3k | if (flags->cf_flags & PyCF_ALLOW_INCOMPLETE_INPUT) { |
792 | 0 | parser_flags |= PyPARSE_ALLOW_INCOMPLETE_INPUT; |
793 | 0 | } |
794 | 21.3k | return parser_flags; |
795 | 21.3k | } |
796 | | |
797 | | // Parser API |
798 | | |
799 | | Parser * |
800 | | _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags, |
801 | | int feature_version, int *errcode, const char* source, PyArena *arena) |
802 | 21.3k | { |
803 | 21.3k | Parser *p = PyMem_Malloc(sizeof(Parser)); |
804 | 21.3k | if (p == NULL) { |
805 | 0 | return (Parser *) PyErr_NoMemory(); |
806 | 0 | } |
807 | 21.3k | assert(tok != NULL); |
808 | 21.3k | tok->type_comments = (flags & PyPARSE_TYPE_COMMENTS) > 0; |
809 | 21.3k | p->tok = tok; |
810 | 21.3k | p->keywords = NULL; |
811 | 21.3k | p->n_keyword_lists = -1; |
812 | 21.3k | p->soft_keywords = NULL; |
813 | 21.3k | p->tokens = PyMem_Malloc(sizeof(Token *)); |
814 | 21.3k | if (!p->tokens) { |
815 | 0 | PyMem_Free(p); |
816 | 0 | return (Parser *) PyErr_NoMemory(); |
817 | 0 | } |
818 | 21.3k | p->tokens[0] = PyMem_Calloc(1, sizeof(Token)); |
819 | 21.3k | if (!p->tokens[0]) { |
820 | 0 | PyMem_Free(p->tokens); |
821 | 0 | PyMem_Free(p); |
822 | 0 | return (Parser *) PyErr_NoMemory(); |
823 | 0 | } |
824 | 21.3k | if (!growable_comment_array_init(&p->type_ignore_comments, 10)) { |
825 | 0 | PyMem_Free(p->tokens[0]); |
826 | 0 | PyMem_Free(p->tokens); |
827 | 0 | PyMem_Free(p); |
828 | 0 | return (Parser *) PyErr_NoMemory(); |
829 | 0 | } |
830 | | |
831 | 21.3k | p->mark = 0; |
832 | 21.3k | p->fill = 0; |
833 | 21.3k | p->size = 1; |
834 | | |
835 | 21.3k | p->errcode = errcode; |
836 | 21.3k | p->arena = arena; |
837 | 21.3k | p->start_rule = start_rule; |
838 | 21.3k | p->parsing_started = 0; |
839 | 21.3k | p->normalize = NULL; |
840 | 21.3k | p->error_indicator = 0; |
841 | | |
842 | 21.3k | p->starting_lineno = 0; |
843 | 21.3k | p->starting_col_offset = 0; |
844 | 21.3k | p->flags = flags; |
845 | 21.3k | p->feature_version = feature_version; |
846 | 21.3k | p->known_err_token = NULL; |
847 | 21.3k | p->level = 0; |
848 | 21.3k | p->call_invalid_rules = 0; |
849 | 21.3k | p->last_stmt_location.lineno = 0; |
850 | 21.3k | p->last_stmt_location.col_offset = 0; |
851 | 21.3k | p->last_stmt_location.end_lineno = 0; |
852 | 21.3k | p->last_stmt_location.end_col_offset = 0; |
853 | | #ifdef Py_DEBUG |
854 | | p->debug = _Py_GetConfig()->parser_debug; |
855 | | #endif |
856 | 21.3k | return p; |
857 | 21.3k | } |
858 | | |
859 | | void |
860 | | _PyPegen_Parser_Free(Parser *p) |
861 | 21.3k | { |
862 | 21.3k | Py_XDECREF(p->normalize); |
863 | 2.60M | for (int i = 0; i < p->size; i++) { |
864 | 2.58M | PyMem_Free(p->tokens[i]); |
865 | 2.58M | } |
866 | 21.3k | PyMem_Free(p->tokens); |
867 | 21.3k | growable_comment_array_deallocate(&p->type_ignore_comments); |
868 | 21.3k | PyMem_Free(p); |
869 | 21.3k | } |
870 | | |
871 | | static void |
872 | | reset_parser_state_for_error_pass(Parser *p) |
873 | 13.6k | { |
874 | 13.6k | p->last_stmt_location.lineno = 0; |
875 | 13.6k | p->last_stmt_location.col_offset = 0; |
876 | 13.6k | p->last_stmt_location.end_lineno = 0; |
877 | 13.6k | p->last_stmt_location.end_col_offset = 0; |
878 | 512k | for (int i = 0; i < p->fill; i++) { |
879 | 498k | p->tokens[i]->memo = NULL; |
880 | 498k | } |
881 | 13.6k | p->mark = 0; |
882 | 13.6k | p->call_invalid_rules = 1; |
883 | | // Don't try to get extra tokens in interactive mode when trying to |
884 | | // raise specialized errors in the second pass. |
885 | 13.6k | p->tok->interactive_underflow = IUNDERFLOW_STOP; |
886 | 13.6k | } |
887 | | |
888 | | static inline int |
889 | 0 | _is_end_of_source(Parser *p) { |
890 | 0 | int err = p->tok->done; |
891 | 0 | return err == E_EOF || err == E_EOFS || err == E_EOLS; |
892 | 0 | } |
893 | | |
894 | | static void |
895 | 13.3k | _PyPegen_set_syntax_error_metadata(Parser *p) { |
896 | 13.3k | PyObject *exc = PyErr_GetRaisedException(); |
897 | 13.3k | if (!exc || !PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_SyntaxError)) { |
898 | 0 | PyErr_SetRaisedException(exc); |
899 | 0 | return; |
900 | 0 | } |
901 | 13.3k | const char *source = NULL; |
902 | 13.3k | if (p->tok->str != NULL) { |
903 | 13.3k | source = p->tok->str; |
904 | 13.3k | } |
905 | 13.3k | if (!source && p->tok->fp_interactive && p->tok->interactive_src_start) { |
906 | 0 | source = p->tok->interactive_src_start; |
907 | 0 | } |
908 | 13.3k | PyObject* the_source = NULL; |
909 | 13.3k | if (source) { |
910 | 13.3k | if (p->tok->encoding == NULL) { |
911 | 11.8k | the_source = PyUnicode_FromString(source); |
912 | 11.8k | } else { |
913 | 1.50k | the_source = PyUnicode_Decode(source, strlen(source), p->tok->encoding, NULL); |
914 | 1.50k | } |
915 | 13.3k | } |
916 | 13.3k | if (!the_source) { |
917 | 1.49k | PyErr_Clear(); |
918 | 1.49k | the_source = Py_None; |
919 | 1.49k | Py_INCREF(the_source); |
920 | 1.49k | } |
921 | 13.3k | PyObject* metadata = Py_BuildValue( |
922 | 13.3k | "(iiN)", |
923 | 13.3k | p->last_stmt_location.lineno, |
924 | 13.3k | p->last_stmt_location.col_offset, |
925 | 13.3k | the_source // N gives ownership to metadata |
926 | 13.3k | ); |
927 | 13.3k | if (!metadata) { |
928 | 0 | Py_DECREF(the_source); |
929 | 0 | PyErr_Clear(); |
930 | 0 | return; |
931 | 0 | } |
932 | 13.3k | PySyntaxErrorObject *syntax_error = (PySyntaxErrorObject *)exc; |
933 | | |
934 | 13.3k | Py_XDECREF(syntax_error->metadata); |
935 | 13.3k | syntax_error->metadata = metadata; |
936 | 13.3k | PyErr_SetRaisedException(exc); |
937 | 13.3k | } |
938 | | |
939 | | void * |
940 | | _PyPegen_run_parser(Parser *p) |
941 | 21.3k | { |
942 | 21.3k | void *res = _PyPegen_parse(p); |
943 | 21.3k | assert(p->level == 0); |
944 | 21.3k | if (res == NULL) { |
945 | 13.6k | if ((p->flags & PyPARSE_ALLOW_INCOMPLETE_INPUT) && _is_end_of_source(p)) { |
946 | 0 | PyErr_Clear(); |
947 | 0 | return _PyPegen_raise_error(p, PyExc_IncompleteInputError, 0, "incomplete input"); |
948 | 0 | } |
949 | 13.6k | if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) { |
950 | 16 | return NULL; |
951 | 16 | } |
952 | | // Make a second parser pass. In this pass we activate heavier and slower checks |
953 | | // to produce better error messages and more complete diagnostics. Extra "invalid_*" |
954 | | // rules will be active during parsing. |
955 | 13.6k | Token *last_token = p->tokens[p->fill - 1]; |
956 | 13.6k | reset_parser_state_for_error_pass(p); |
957 | 13.6k | _PyPegen_parse(p); |
958 | | |
959 | | // Set SyntaxErrors accordingly depending on the parser/tokenizer status at the failure |
960 | | // point. |
961 | 13.6k | _Pypegen_set_syntax_error(p, last_token); |
962 | | |
963 | | // Set the metadata in the exception from p->last_stmt_location |
964 | 13.6k | if (PyErr_ExceptionMatches(PyExc_SyntaxError)) { |
965 | 13.3k | _PyPegen_set_syntax_error_metadata(p); |
966 | 13.3k | } |
967 | 13.6k | return NULL; |
968 | 13.6k | } |
969 | | |
970 | 7.69k | if (p->start_rule == Py_single_input && bad_single_statement(p)) { |
971 | 0 | p->tok->done = E_BADSINGLE; // This is not necessary for now, but might be in the future |
972 | 0 | return RAISE_SYNTAX_ERROR("multiple statements found while compiling a single statement"); |
973 | 0 | } |
974 | | |
975 | | // test_peg_generator defines _Py_TEST_PEGEN to not call PyAST_Validate() |
976 | | #if defined(Py_DEBUG) && !defined(_Py_TEST_PEGEN) |
977 | | if (p->start_rule == Py_single_input || |
978 | | p->start_rule == Py_file_input || |
979 | | p->start_rule == Py_eval_input) |
980 | | { |
981 | | if (!_PyAST_Validate(res)) { |
982 | | return NULL; |
983 | | } |
984 | | } |
985 | | #endif |
986 | 7.69k | return res; |
987 | 7.69k | } |
988 | | |
989 | | mod_ty |
990 | | _PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filename_ob, |
991 | | const char *enc, const char *ps1, const char *ps2, |
992 | | PyCompilerFlags *flags, int *errcode, |
993 | | PyObject **interactive_src, PyArena *arena) |
994 | 0 | { |
995 | 0 | struct tok_state *tok = _PyTokenizer_FromFile(fp, enc, ps1, ps2); |
996 | 0 | if (tok == NULL) { |
997 | 0 | if (PyErr_Occurred()) { |
998 | 0 | _PyPegen_raise_tokenizer_init_error(filename_ob); |
999 | 0 | return NULL; |
1000 | 0 | } |
1001 | 0 | return NULL; |
1002 | 0 | } |
1003 | 0 | if (!tok->fp || ps1 != NULL || ps2 != NULL || |
1004 | 0 | PyUnicode_CompareWithASCIIString(filename_ob, "<stdin>") == 0) { |
1005 | 0 | tok->fp_interactive = 1; |
1006 | 0 | } |
1007 | | // This transfers the ownership to the tokenizer |
1008 | 0 | tok->filename = Py_NewRef(filename_ob); |
1009 | | |
1010 | | // From here on we need to clean up even if there's an error |
1011 | 0 | mod_ty result = NULL; |
1012 | |
|
1013 | 0 | int parser_flags = compute_parser_flags(flags); |
1014 | 0 | Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, PY_MINOR_VERSION, |
1015 | 0 | errcode, NULL, arena); |
1016 | 0 | if (p == NULL) { |
1017 | 0 | goto error; |
1018 | 0 | } |
1019 | | |
1020 | 0 | result = _PyPegen_run_parser(p); |
1021 | 0 | _PyPegen_Parser_Free(p); |
1022 | |
|
1023 | 0 | if (tok->fp_interactive && tok->interactive_src_start && result && interactive_src != NULL) { |
1024 | 0 | *interactive_src = PyUnicode_FromString(tok->interactive_src_start); |
1025 | 0 | if (!interactive_src || _PyArena_AddPyObject(arena, *interactive_src) < 0) { |
1026 | 0 | Py_XDECREF(interactive_src); |
1027 | 0 | result = NULL; |
1028 | 0 | goto error; |
1029 | 0 | } |
1030 | 0 | } |
1031 | | |
1032 | 0 | error: |
1033 | 0 | _PyTokenizer_Free(tok); |
1034 | 0 | return result; |
1035 | 0 | } |
1036 | | |
1037 | | mod_ty |
1038 | | _PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filename_ob, |
1039 | | PyCompilerFlags *flags, PyArena *arena) |
1040 | 23.1k | { |
1041 | 23.1k | int exec_input = start_rule == Py_file_input; |
1042 | | |
1043 | 23.1k | struct tok_state *tok; |
1044 | 23.1k | if (flags != NULL && flags->cf_flags & PyCF_IGNORE_COOKIE) { |
1045 | 78 | tok = _PyTokenizer_FromUTF8(str, exec_input, 0); |
1046 | 23.0k | } else { |
1047 | 23.0k | tok = _PyTokenizer_FromString(str, exec_input, 0); |
1048 | 23.0k | } |
1049 | 23.1k | if (tok == NULL) { |
1050 | 1.79k | if (PyErr_Occurred()) { |
1051 | 1.79k | _PyPegen_raise_tokenizer_init_error(filename_ob); |
1052 | 1.79k | } |
1053 | 1.79k | return NULL; |
1054 | 1.79k | } |
1055 | | // This transfers the ownership to the tokenizer |
1056 | 21.3k | tok->filename = Py_NewRef(filename_ob); |
1057 | | |
1058 | | // We need to clear up from here on |
1059 | 21.3k | mod_ty result = NULL; |
1060 | | |
1061 | 21.3k | int parser_flags = compute_parser_flags(flags); |
1062 | 21.3k | int feature_version = flags && (flags->cf_flags & PyCF_ONLY_AST) ? |
1063 | 20.9k | flags->cf_feature_version : PY_MINOR_VERSION; |
1064 | 21.3k | Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, feature_version, |
1065 | 21.3k | NULL, str, arena); |
1066 | 21.3k | if (p == NULL) { |
1067 | 0 | goto error; |
1068 | 0 | } |
1069 | | |
1070 | 21.3k | result = _PyPegen_run_parser(p); |
1071 | 21.3k | _PyPegen_Parser_Free(p); |
1072 | | |
1073 | 21.3k | error: |
1074 | 21.3k | _PyTokenizer_Free(tok); |
1075 | 21.3k | return result; |
1076 | 21.3k | } |