/src/cpython/Parser/pegen.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include <Python.h> |
2 | | #include "pycore_ast.h" // _PyAST_Validate(), |
3 | | #include "pycore_pystate.h" // _PyThreadState_GET() |
4 | | #include "pycore_parser.h" // _PYPEGEN_NSTATISTICS |
5 | | #include "pycore_pyerrors.h" // PyExc_IncompleteInputError |
6 | | #include "pycore_runtime.h" // _PyRuntime |
7 | | #include "pycore_unicodeobject.h" // _PyUnicode_InternImmortal |
8 | | #include <errcode.h> |
9 | | |
10 | | #include "lexer/lexer.h" |
11 | | #include "tokenizer/tokenizer.h" |
12 | | #include "pegen.h" |
13 | | |
14 | | // Internal parser functions |
15 | | |
16 | | asdl_stmt_seq* |
17 | | _PyPegen_interactive_exit(Parser *p) |
18 | 0 | { |
19 | 0 | if (p->errcode) { |
20 | 0 | *(p->errcode) = E_EOF; |
21 | 0 | } |
22 | 0 | return NULL; |
23 | 0 | } |
24 | | |
25 | | Py_ssize_t |
26 | | _PyPegen_byte_offset_to_character_offset_line(PyObject *line, Py_ssize_t col_offset, Py_ssize_t end_col_offset) |
27 | 0 | { |
28 | 0 | const unsigned char *data = (const unsigned char*)PyUnicode_AsUTF8(line); |
29 | |
|
30 | 0 | Py_ssize_t len = 0; |
31 | 0 | while (col_offset < end_col_offset) { |
32 | 0 | Py_UCS4 ch = data[col_offset]; |
33 | 0 | if (ch < 0x80) { |
34 | 0 | col_offset += 1; |
35 | 0 | } else if ((ch & 0xe0) == 0xc0) { |
36 | 0 | col_offset += 2; |
37 | 0 | } else if ((ch & 0xf0) == 0xe0) { |
38 | 0 | col_offset += 3; |
39 | 0 | } else if ((ch & 0xf8) == 0xf0) { |
40 | 0 | col_offset += 4; |
41 | 0 | } else { |
42 | 0 | PyErr_SetString(PyExc_ValueError, "Invalid UTF-8 sequence"); |
43 | 0 | return -1; |
44 | 0 | } |
45 | 0 | len++; |
46 | 0 | } |
47 | 0 | return len; |
48 | 0 | } |
49 | | |
50 | | Py_ssize_t |
51 | | _PyPegen_byte_offset_to_character_offset_raw(const char* str, Py_ssize_t col_offset) |
52 | 20.4k | { |
53 | 20.4k | Py_ssize_t len = (Py_ssize_t)strlen(str); |
54 | 20.4k | if (col_offset > len + 1) { |
55 | 11 | col_offset = len + 1; |
56 | 11 | } |
57 | 20.4k | assert(col_offset >= 0); |
58 | 20.4k | PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace"); |
59 | 20.4k | if (!text) { |
60 | 0 | return -1; |
61 | 0 | } |
62 | 20.4k | Py_ssize_t size = PyUnicode_GET_LENGTH(text); |
63 | 20.4k | Py_DECREF(text); |
64 | 20.4k | return size; |
65 | 20.4k | } |
66 | | |
67 | | Py_ssize_t |
68 | | _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset) |
69 | 20.4k | { |
70 | 20.4k | const char *str = PyUnicode_AsUTF8(line); |
71 | 20.4k | if (!str) { |
72 | 0 | return -1; |
73 | 0 | } |
74 | 20.4k | return _PyPegen_byte_offset_to_character_offset_raw(str, col_offset); |
75 | 20.4k | } |
76 | | |
77 | | // Here, mark is the start of the node, while p->mark is the end. |
78 | | // If node==NULL, they should be the same. |
79 | | int |
80 | | _PyPegen_insert_memo(Parser *p, int mark, int type, void *node) |
81 | 11.6M | { |
82 | | // Insert in front |
83 | 11.6M | Memo *m = _PyArena_Malloc(p->arena, sizeof(Memo)); |
84 | 11.6M | if (m == NULL) { |
85 | 0 | return -1; |
86 | 0 | } |
87 | 11.6M | m->type = type; |
88 | 11.6M | m->node = node; |
89 | 11.6M | m->mark = p->mark; |
90 | 11.6M | m->next = p->tokens[mark]->memo; |
91 | 11.6M | p->tokens[mark]->memo = m; |
92 | 11.6M | return 0; |
93 | 11.6M | } |
94 | | |
95 | | // Like _PyPegen_insert_memo(), but updates an existing node if found. |
96 | | int |
97 | | _PyPegen_update_memo(Parser *p, int mark, int type, void *node) |
98 | 8.97M | { |
99 | 44.2M | for (Memo *m = p->tokens[mark]->memo; m != NULL; m = m->next) { |
100 | 38.5M | if (m->type == type) { |
101 | | // Update existing node. |
102 | 3.27M | m->node = node; |
103 | 3.27M | m->mark = p->mark; |
104 | 3.27M | return 0; |
105 | 3.27M | } |
106 | 38.5M | } |
107 | | // Insert new node. |
108 | 5.70M | return _PyPegen_insert_memo(p, mark, type, node); |
109 | 8.97M | } |
110 | | |
111 | | static int |
112 | | init_normalization(Parser *p) |
113 | 59.2k | { |
114 | 59.2k | if (p->normalize) { |
115 | 57.6k | return 1; |
116 | 57.6k | } |
117 | 1.61k | p->normalize = PyImport_ImportModuleAttrString("unicodedata", "normalize"); |
118 | 1.61k | if (!p->normalize) |
119 | 0 | { |
120 | 0 | return 0; |
121 | 0 | } |
122 | 1.61k | return 1; |
123 | 1.61k | } |
124 | | |
125 | | static int |
126 | 21.5k | growable_comment_array_init(growable_comment_array *arr, size_t initial_size) { |
127 | 21.5k | assert(initial_size > 0); |
128 | 21.5k | arr->items = PyMem_Malloc(initial_size * sizeof(*arr->items)); |
129 | 21.5k | arr->size = initial_size; |
130 | 21.5k | arr->num_items = 0; |
131 | | |
132 | 21.5k | return arr->items != NULL; |
133 | 21.5k | } |
134 | | |
135 | | static int |
136 | 0 | growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) { |
137 | 0 | if (arr->num_items >= arr->size) { |
138 | 0 | size_t new_size = arr->size * 2; |
139 | 0 | void *new_items_array = PyMem_Realloc(arr->items, new_size * sizeof(*arr->items)); |
140 | 0 | if (!new_items_array) { |
141 | 0 | return 0; |
142 | 0 | } |
143 | 0 | arr->items = new_items_array; |
144 | 0 | arr->size = new_size; |
145 | 0 | } |
146 | | |
147 | 0 | arr->items[arr->num_items].lineno = lineno; |
148 | 0 | arr->items[arr->num_items].comment = comment; // Take ownership |
149 | 0 | arr->num_items++; |
150 | 0 | return 1; |
151 | 0 | } |
152 | | |
153 | | static void |
154 | 21.5k | growable_comment_array_deallocate(growable_comment_array *arr) { |
155 | 21.5k | for (unsigned i = 0; i < arr->num_items; i++) { |
156 | 0 | PyMem_Free(arr->items[i].comment); |
157 | 0 | } |
158 | 21.5k | PyMem_Free(arr->items); |
159 | 21.5k | } |
160 | | |
161 | | static int |
162 | | _get_keyword_or_name_type(Parser *p, struct token *new_token) |
163 | 500k | { |
164 | 500k | Py_ssize_t name_len = new_token->end_col_offset - new_token->col_offset; |
165 | 500k | assert(name_len > 0); |
166 | | |
167 | 500k | if (name_len >= p->n_keyword_lists || |
168 | 500k | p->keywords[name_len] == NULL || |
169 | 500k | p->keywords[name_len]->type == -1) { |
170 | 231k | return NAME; |
171 | 231k | } |
172 | 1.39M | for (KeywordToken *k = p->keywords[name_len]; k != NULL && k->type != -1; k++) { |
173 | 1.24M | if (strncmp(k->str, new_token->start, (size_t)name_len) == 0) { |
174 | 117k | return k->type; |
175 | 117k | } |
176 | 1.24M | } |
177 | 151k | return NAME; |
178 | 269k | } |
179 | | |
180 | | static int |
181 | 1.73M | initialize_token(Parser *p, Token *parser_token, struct token *new_token, int token_type) { |
182 | 1.73M | assert(parser_token != NULL); |
183 | | |
184 | 1.73M | parser_token->type = (token_type == NAME) ? _get_keyword_or_name_type(p, new_token) : token_type; |
185 | 1.73M | parser_token->bytes = PyBytes_FromStringAndSize(new_token->start, new_token->end - new_token->start); |
186 | 1.73M | if (parser_token->bytes == NULL) { |
187 | 0 | return -1; |
188 | 0 | } |
189 | 1.73M | if (_PyArena_AddPyObject(p->arena, parser_token->bytes) < 0) { |
190 | 0 | Py_DECREF(parser_token->bytes); |
191 | 0 | return -1; |
192 | 0 | } |
193 | | |
194 | 1.73M | parser_token->metadata = NULL; |
195 | 1.73M | if (new_token->metadata != NULL) { |
196 | 8.63k | if (_PyArena_AddPyObject(p->arena, new_token->metadata) < 0) { |
197 | 0 | Py_DECREF(new_token->metadata); |
198 | 0 | return -1; |
199 | 0 | } |
200 | 8.63k | parser_token->metadata = new_token->metadata; |
201 | 8.63k | new_token->metadata = NULL; |
202 | 8.63k | } |
203 | | |
204 | 1.73M | parser_token->level = new_token->level; |
205 | 1.73M | parser_token->lineno = new_token->lineno; |
206 | 1.73M | parser_token->col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + new_token->col_offset |
207 | 1.73M | : new_token->col_offset; |
208 | 1.73M | parser_token->end_lineno = new_token->end_lineno; |
209 | 1.73M | parser_token->end_col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + new_token->end_col_offset |
210 | 1.73M | : new_token->end_col_offset; |
211 | | |
212 | 1.73M | p->fill += 1; |
213 | | |
214 | 1.73M | if (token_type == ERRORTOKEN && p->tok->done == E_DECODE) { |
215 | 789 | return _Pypegen_raise_decode_error(p); |
216 | 789 | } |
217 | | |
218 | 1.73M | return (token_type == ERRORTOKEN ? _Pypegen_tokenizer_error(p) : 0); |
219 | 1.73M | } |
220 | | |
221 | | static int |
222 | 81.1k | _resize_tokens_array(Parser *p) { |
223 | 81.1k | int newsize = p->size * 2; |
224 | 81.1k | Token **new_tokens = PyMem_Realloc(p->tokens, (size_t)newsize * sizeof(Token *)); |
225 | 81.1k | if (new_tokens == NULL) { |
226 | 0 | PyErr_NoMemory(); |
227 | 0 | return -1; |
228 | 0 | } |
229 | 81.1k | p->tokens = new_tokens; |
230 | | |
231 | 2.62M | for (int i = p->size; i < newsize; i++) { |
232 | 2.54M | p->tokens[i] = PyMem_Calloc(1, sizeof(Token)); |
233 | 2.54M | if (p->tokens[i] == NULL) { |
234 | 0 | p->size = i; // Needed, in order to cleanup correctly after parser fails |
235 | 0 | PyErr_NoMemory(); |
236 | 0 | return -1; |
237 | 0 | } |
238 | 2.54M | } |
239 | 81.1k | p->size = newsize; |
240 | 81.1k | return 0; |
241 | 81.1k | } |
242 | | |
243 | | int |
244 | | _PyPegen_fill_token(Parser *p) |
245 | 1.73M | { |
246 | 1.73M | struct token new_token; |
247 | 1.73M | _PyToken_Init(&new_token); |
248 | 1.73M | int type = _PyTokenizer_Get(p->tok, &new_token); |
249 | | |
250 | | // Record and skip '# type: ignore' comments |
251 | 1.73M | while (type == TYPE_IGNORE) { |
252 | 0 | Py_ssize_t len = new_token.end_col_offset - new_token.col_offset; |
253 | 0 | char *tag = PyMem_Malloc((size_t)len + 1); |
254 | 0 | if (tag == NULL) { |
255 | 0 | PyErr_NoMemory(); |
256 | 0 | goto error; |
257 | 0 | } |
258 | 0 | strncpy(tag, new_token.start, (size_t)len); |
259 | 0 | tag[len] = '\0'; |
260 | | // Ownership of tag passes to the growable array |
261 | 0 | if (!growable_comment_array_add(&p->type_ignore_comments, p->tok->lineno, tag)) { |
262 | 0 | PyErr_NoMemory(); |
263 | 0 | goto error; |
264 | 0 | } |
265 | 0 | type = _PyTokenizer_Get(p->tok, &new_token); |
266 | 0 | } |
267 | | |
268 | | // If we have reached the end and we are in single input mode we need to insert a newline and reset the parsing |
269 | 1.73M | if (p->start_rule == Py_single_input && type == ENDMARKER && p->parsing_started) { |
270 | 0 | type = NEWLINE; /* Add an extra newline */ |
271 | 0 | p->parsing_started = 0; |
272 | |
|
273 | 0 | if (p->tok->indent && !(p->flags & PyPARSE_DONT_IMPLY_DEDENT)) { |
274 | 0 | p->tok->pendin = -p->tok->indent; |
275 | 0 | p->tok->indent = 0; |
276 | 0 | } |
277 | 0 | } |
278 | 1.73M | else { |
279 | 1.73M | p->parsing_started = 1; |
280 | 1.73M | } |
281 | | |
282 | | // Check if we are at the limit of the token array capacity and resize if needed |
283 | 1.73M | if ((p->fill == p->size) && (_resize_tokens_array(p) != 0)) { |
284 | 0 | goto error; |
285 | 0 | } |
286 | | |
287 | 1.73M | Token *t = p->tokens[p->fill]; |
288 | 1.73M | return initialize_token(p, t, &new_token, type); |
289 | 0 | error: |
290 | 0 | _PyToken_Free(&new_token); |
291 | 0 | return -1; |
292 | 1.73M | } |
293 | | |
294 | | #if defined(Py_DEBUG) |
295 | | // Instrumentation to count the effectiveness of memoization. |
296 | | // The array counts the number of tokens skipped by memoization, |
297 | | // indexed by type. |
298 | | |
299 | | #define NSTATISTICS _PYPEGEN_NSTATISTICS |
300 | | #define memo_statistics _PyRuntime.parser.memo_statistics |
301 | | |
302 | | #ifdef Py_GIL_DISABLED |
303 | | #define MUTEX_LOCK() PyMutex_Lock(&_PyRuntime.parser.mutex) |
304 | | #define MUTEX_UNLOCK() PyMutex_Unlock(&_PyRuntime.parser.mutex) |
305 | | #else |
306 | | #define MUTEX_LOCK() |
307 | | #define MUTEX_UNLOCK() |
308 | | #endif |
309 | | |
310 | | void |
311 | | _PyPegen_clear_memo_statistics(void) |
312 | | { |
313 | | MUTEX_LOCK(); |
314 | | for (int i = 0; i < NSTATISTICS; i++) { |
315 | | memo_statistics[i] = 0; |
316 | | } |
317 | | MUTEX_UNLOCK(); |
318 | | } |
319 | | |
320 | | PyObject * |
321 | | _PyPegen_get_memo_statistics(void) |
322 | | { |
323 | | PyObject *ret = PyList_New(NSTATISTICS); |
324 | | if (ret == NULL) { |
325 | | return NULL; |
326 | | } |
327 | | |
328 | | MUTEX_LOCK(); |
329 | | for (int i = 0; i < NSTATISTICS; i++) { |
330 | | PyObject *value = PyLong_FromLong(memo_statistics[i]); |
331 | | if (value == NULL) { |
332 | | MUTEX_UNLOCK(); |
333 | | Py_DECREF(ret); |
334 | | return NULL; |
335 | | } |
336 | | // PyList_SetItem borrows a reference to value. |
337 | | if (PyList_SetItem(ret, i, value) < 0) { |
338 | | MUTEX_UNLOCK(); |
339 | | Py_DECREF(ret); |
340 | | return NULL; |
341 | | } |
342 | | } |
343 | | MUTEX_UNLOCK(); |
344 | | return ret; |
345 | | } |
346 | | #endif |
347 | | |
348 | | int // bool |
349 | | _PyPegen_is_memoized(Parser *p, int type, void *pres) |
350 | 44.2M | { |
351 | 44.2M | if (p->mark == p->fill) { |
352 | 440k | if (_PyPegen_fill_token(p) < 0) { |
353 | 763 | p->error_indicator = 1; |
354 | 763 | return -1; |
355 | 763 | } |
356 | 440k | } |
357 | | |
358 | 44.2M | Token *t = p->tokens[p->mark]; |
359 | | |
360 | 128M | for (Memo *m = t->memo; m != NULL; m = m->next) { |
361 | 117M | if (m->type == type) { |
362 | | #if defined(Py_DEBUG) |
363 | | if (0 <= type && type < NSTATISTICS) { |
364 | | long count = m->mark - p->mark; |
365 | | // A memoized negative result counts for one. |
366 | | if (count <= 0) { |
367 | | count = 1; |
368 | | } |
369 | | MUTEX_LOCK(); |
370 | | memo_statistics[type] += count; |
371 | | MUTEX_UNLOCK(); |
372 | | } |
373 | | #endif |
374 | 32.3M | p->mark = m->mark; |
375 | 32.3M | *(void **)(pres) = m->node; |
376 | 32.3M | return 1; |
377 | 32.3M | } |
378 | 117M | } |
379 | 11.8M | return 0; |
380 | 44.2M | } |
381 | | |
382 | | #define LOOKAHEAD1(NAME, RES_TYPE) \ |
383 | | int \ |
384 | | NAME (int positive, RES_TYPE (func)(Parser *), Parser *p) \ |
385 | 2.29M | { \ |
386 | 2.29M | int mark = p->mark; \ |
387 | 2.29M | void *res = func(p); \ |
388 | 2.29M | p->mark = mark; \ |
389 | 2.29M | return (res != NULL) == positive; \ |
390 | 2.29M | } Line | Count | Source | 385 | 2.29M | { \ | 386 | 2.29M | int mark = p->mark; \ | 387 | 2.29M | void *res = func(p); \ | 388 | 2.29M | p->mark = mark; \ | 389 | 2.29M | return (res != NULL) == positive; \ | 390 | 2.29M | } |
_PyPegen_lookahead_for_expr Line | Count | Source | 385 | 1.01k | { \ | 386 | 1.01k | int mark = p->mark; \ | 387 | 1.01k | void *res = func(p); \ | 388 | 1.01k | p->mark = mark; \ | 389 | 1.01k | return (res != NULL) == positive; \ | 390 | 1.01k | } |
Unexecuted instantiation: _PyPegen_lookahead_for_stmt |
391 | | |
392 | | LOOKAHEAD1(_PyPegen_lookahead, void *) |
393 | | LOOKAHEAD1(_PyPegen_lookahead_for_expr, expr_ty) |
394 | | LOOKAHEAD1(_PyPegen_lookahead_for_stmt, stmt_ty) |
395 | | #undef LOOKAHEAD1 |
396 | | |
397 | | #define LOOKAHEAD2(NAME, RES_TYPE, T) \ |
398 | | int \ |
399 | | NAME (int positive, RES_TYPE (func)(Parser *, T), Parser *p, T arg) \ |
400 | 3.51M | { \ |
401 | 3.51M | int mark = p->mark; \ |
402 | 3.51M | void *res = func(p, arg); \ |
403 | 3.51M | p->mark = mark; \ |
404 | 3.51M | return (res != NULL) == positive; \ |
405 | 3.51M | } _PyPegen_lookahead_with_int Line | Count | Source | 400 | 3.29M | { \ | 401 | 3.29M | int mark = p->mark; \ | 402 | 3.29M | void *res = func(p, arg); \ | 403 | 3.29M | p->mark = mark; \ | 404 | 3.29M | return (res != NULL) == positive; \ | 405 | 3.29M | } |
_PyPegen_lookahead_with_string Line | Count | Source | 400 | 217k | { \ | 401 | 217k | int mark = p->mark; \ | 402 | 217k | void *res = func(p, arg); \ | 403 | 217k | p->mark = mark; \ | 404 | 217k | return (res != NULL) == positive; \ | 405 | 217k | } |
|
406 | | |
407 | | LOOKAHEAD2(_PyPegen_lookahead_with_int, Token *, int) |
408 | | LOOKAHEAD2(_PyPegen_lookahead_with_string, expr_ty, const char *) |
409 | | #undef LOOKAHEAD2 |
410 | | |
411 | | Token * |
412 | | _PyPegen_expect_token(Parser *p, int type) |
413 | 54.0M | { |
414 | 54.0M | if (p->mark == p->fill) { |
415 | 921k | if (_PyPegen_fill_token(p) < 0) { |
416 | 2.68k | p->error_indicator = 1; |
417 | 2.68k | return NULL; |
418 | 2.68k | } |
419 | 921k | } |
420 | 54.0M | Token *t = p->tokens[p->mark]; |
421 | 54.0M | if (t->type != type) { |
422 | 47.3M | return NULL; |
423 | 47.3M | } |
424 | 6.68M | p->mark += 1; |
425 | 6.68M | return t; |
426 | 54.0M | } |
427 | | |
428 | | void* |
429 | 0 | _PyPegen_expect_forced_result(Parser *p, void* result, const char* expected) { |
430 | |
|
431 | 0 | if (p->error_indicator == 1) { |
432 | 0 | return NULL; |
433 | 0 | } |
434 | 0 | if (result == NULL) { |
435 | 0 | RAISE_SYNTAX_ERROR("expected (%s)", expected); |
436 | 0 | return NULL; |
437 | 0 | } |
438 | 0 | return result; |
439 | 0 | } |
440 | | |
441 | | Token * |
442 | 19.6k | _PyPegen_expect_forced_token(Parser *p, int type, const char* expected) { |
443 | | |
444 | 19.6k | if (p->error_indicator == 1) { |
445 | 0 | return NULL; |
446 | 0 | } |
447 | | |
448 | 19.6k | if (p->mark == p->fill) { |
449 | 5.79k | if (_PyPegen_fill_token(p) < 0) { |
450 | 1 | p->error_indicator = 1; |
451 | 1 | return NULL; |
452 | 1 | } |
453 | 5.79k | } |
454 | 19.6k | Token *t = p->tokens[p->mark]; |
455 | 19.6k | if (t->type != type) { |
456 | 139 | RAISE_SYNTAX_ERROR_KNOWN_LOCATION(t, "expected '%s'", expected); |
457 | 139 | return NULL; |
458 | 139 | } |
459 | 19.5k | p->mark += 1; |
460 | 19.5k | return t; |
461 | 19.6k | } |
462 | | |
463 | | expr_ty |
464 | | _PyPegen_expect_soft_keyword(Parser *p, const char *keyword) |
465 | 438k | { |
466 | 438k | if (p->mark == p->fill) { |
467 | 6.91k | if (_PyPegen_fill_token(p) < 0) { |
468 | 8 | p->error_indicator = 1; |
469 | 8 | return NULL; |
470 | 8 | } |
471 | 6.91k | } |
472 | 438k | Token *t = p->tokens[p->mark]; |
473 | 438k | if (t->type != NAME) { |
474 | 233k | return NULL; |
475 | 233k | } |
476 | 205k | const char *s = PyBytes_AsString(t->bytes); |
477 | 205k | if (!s) { |
478 | 0 | p->error_indicator = 1; |
479 | 0 | return NULL; |
480 | 0 | } |
481 | 205k | if (strcmp(s, keyword) != 0) { |
482 | 177k | return NULL; |
483 | 177k | } |
484 | 28.4k | return _PyPegen_name_token(p); |
485 | 205k | } |
486 | | |
487 | | Token * |
488 | | _PyPegen_get_last_nonnwhitespace_token(Parser *p) |
489 | 1.77M | { |
490 | 1.77M | assert(p->mark >= 0); |
491 | 1.77M | Token *token = NULL; |
492 | 1.84M | for (int m = p->mark - 1; m >= 0; m--) { |
493 | 1.84M | token = p->tokens[m]; |
494 | 1.84M | if (token->type != ENDMARKER && (token->type < NEWLINE || token->type > DEDENT)) { |
495 | 1.77M | break; |
496 | 1.77M | } |
497 | 1.84M | } |
498 | 1.77M | return token; |
499 | 1.77M | } |
500 | | |
501 | | PyObject * |
502 | | _PyPegen_new_identifier(Parser *p, const char *n) |
503 | 2.48M | { |
504 | 2.48M | PyObject *id = PyUnicode_DecodeUTF8(n, (Py_ssize_t)strlen(n), NULL); |
505 | 2.48M | if (!id) { |
506 | 0 | goto error; |
507 | 0 | } |
508 | | /* Check whether there are non-ASCII characters in the |
509 | | identifier; if so, normalize to NFKC. */ |
510 | 2.48M | if (!PyUnicode_IS_ASCII(id)) |
511 | 59.2k | { |
512 | 59.2k | if (!init_normalization(p)) |
513 | 0 | { |
514 | 0 | Py_DECREF(id); |
515 | 0 | goto error; |
516 | 0 | } |
517 | 59.2k | PyObject *form = PyUnicode_InternFromString("NFKC"); |
518 | 59.2k | if (form == NULL) |
519 | 0 | { |
520 | 0 | Py_DECREF(id); |
521 | 0 | goto error; |
522 | 0 | } |
523 | 59.2k | PyObject *args[2] = {form, id}; |
524 | 59.2k | PyObject *id2 = PyObject_Vectorcall(p->normalize, args, 2, NULL); |
525 | 59.2k | Py_DECREF(id); |
526 | 59.2k | Py_DECREF(form); |
527 | 59.2k | if (!id2) { |
528 | 0 | goto error; |
529 | 0 | } |
530 | | |
531 | 59.2k | if (!PyUnicode_Check(id2)) |
532 | 0 | { |
533 | 0 | PyErr_Format(PyExc_TypeError, |
534 | 0 | "unicodedata.normalize() must return a string, not " |
535 | 0 | "%.200s", |
536 | 0 | _PyType_Name(Py_TYPE(id2))); |
537 | 0 | Py_DECREF(id2); |
538 | 0 | goto error; |
539 | 0 | } |
540 | 59.2k | id = id2; |
541 | 59.2k | } |
542 | 2.48M | static const char * const forbidden[] = { |
543 | 2.48M | "None", |
544 | 2.48M | "True", |
545 | 2.48M | "False", |
546 | 2.48M | NULL |
547 | 2.48M | }; |
548 | 9.93M | for (int i = 0; forbidden[i] != NULL; i++) { |
549 | 7.45M | if (_PyUnicode_EqualToASCIIString(id, forbidden[i])) { |
550 | 0 | PyErr_Format(PyExc_ValueError, |
551 | 0 | "identifier field can't represent '%s' constant", |
552 | 0 | forbidden[i]); |
553 | 0 | Py_DECREF(id); |
554 | 0 | goto error; |
555 | 0 | } |
556 | 7.45M | } |
557 | 2.48M | PyInterpreterState *interp = _PyInterpreterState_GET(); |
558 | 2.48M | _PyUnicode_InternImmortal(interp, &id); |
559 | 2.48M | if (_PyArena_AddPyObject(p->arena, id) < 0) |
560 | 0 | { |
561 | 0 | Py_DECREF(id); |
562 | 0 | goto error; |
563 | 0 | } |
564 | 2.48M | return id; |
565 | | |
566 | 0 | error: |
567 | 0 | p->error_indicator = 1; |
568 | 0 | return NULL; |
569 | 2.48M | } |
570 | | |
571 | | static expr_ty |
572 | | _PyPegen_name_from_token(Parser *p, Token* t) |
573 | 5.66M | { |
574 | 5.66M | if (t == NULL) { |
575 | 3.17M | return NULL; |
576 | 3.17M | } |
577 | 2.48M | const char *s = PyBytes_AsString(t->bytes); |
578 | 2.48M | if (!s) { |
579 | 0 | p->error_indicator = 1; |
580 | 0 | return NULL; |
581 | 0 | } |
582 | 2.48M | PyObject *id = _PyPegen_new_identifier(p, s); |
583 | 2.48M | if (id == NULL) { |
584 | 0 | p->error_indicator = 1; |
585 | 0 | return NULL; |
586 | 0 | } |
587 | 2.48M | return _PyAST_Name(id, Load, t->lineno, t->col_offset, t->end_lineno, |
588 | 2.48M | t->end_col_offset, p->arena); |
589 | 2.48M | } |
590 | | |
591 | | expr_ty |
592 | | _PyPegen_name_token(Parser *p) |
593 | 5.65M | { |
594 | 5.65M | Token *t = _PyPegen_expect_token(p, NAME); |
595 | 5.65M | return _PyPegen_name_from_token(p, t); |
596 | 5.65M | } |
597 | | |
598 | | void * |
599 | | _PyPegen_string_token(Parser *p) |
600 | 1.32M | { |
601 | 1.32M | return _PyPegen_expect_token(p, STRING); |
602 | 1.32M | } |
603 | | |
604 | 237k | expr_ty _PyPegen_soft_keyword_token(Parser *p) { |
605 | 237k | Token *t = _PyPegen_expect_token(p, NAME); |
606 | 237k | if (t == NULL) { |
607 | 161k | return NULL; |
608 | 161k | } |
609 | 76.2k | char *the_token; |
610 | 76.2k | Py_ssize_t size; |
611 | 76.2k | PyBytes_AsStringAndSize(t->bytes, &the_token, &size); |
612 | 366k | for (char **keyword = p->soft_keywords; *keyword != NULL; keyword++) { |
613 | 294k | if (strlen(*keyword) == (size_t)size && |
614 | 294k | strncmp(*keyword, the_token, (size_t)size) == 0) { |
615 | 4.48k | return _PyPegen_name_from_token(p, t); |
616 | 4.48k | } |
617 | 294k | } |
618 | 71.8k | return NULL; |
619 | 76.2k | } |
620 | | |
621 | | static PyObject * |
622 | | parsenumber_raw(const char *s) |
623 | 244k | { |
624 | 244k | const char *end; |
625 | 244k | long x; |
626 | 244k | double dx; |
627 | 244k | Py_complex compl; |
628 | 244k | int imflag; |
629 | | |
630 | 244k | assert(s != NULL); |
631 | 244k | errno = 0; |
632 | 244k | end = s + strlen(s) - 1; |
633 | 244k | imflag = *end == 'j' || *end == 'J'; |
634 | 244k | if (s[0] == '0') { |
635 | 72.0k | x = (long)PyOS_strtoul(s, (char **)&end, 0); |
636 | 72.0k | if (x < 0 && errno == 0) { |
637 | 376 | return PyLong_FromString(s, (char **)0, 0); |
638 | 376 | } |
639 | 72.0k | } |
640 | 172k | else { |
641 | 172k | x = PyOS_strtol(s, (char **)&end, 0); |
642 | 172k | } |
643 | 244k | if (*end == '\0') { |
644 | 183k | if (errno != 0) { |
645 | 2.91k | return PyLong_FromString(s, (char **)0, 0); |
646 | 2.91k | } |
647 | 180k | return PyLong_FromLong(x); |
648 | 183k | } |
649 | | /* XXX Huge floats may silently fail */ |
650 | 60.8k | if (imflag) { |
651 | 10.1k | compl.real = 0.; |
652 | 10.1k | compl.imag = PyOS_string_to_double(s, (char **)&end, NULL); |
653 | 10.1k | if (compl.imag == -1.0 && PyErr_Occurred()) { |
654 | 1 | return NULL; |
655 | 1 | } |
656 | 10.1k | return PyComplex_FromCComplex(compl); |
657 | 10.1k | } |
658 | 50.6k | dx = PyOS_string_to_double(s, NULL, NULL); |
659 | 50.6k | if (dx == -1.0 && PyErr_Occurred()) { |
660 | 14 | return NULL; |
661 | 14 | } |
662 | 50.6k | return PyFloat_FromDouble(dx); |
663 | 50.6k | } |
664 | | |
665 | | static PyObject * |
666 | | parsenumber(const char *s) |
667 | 244k | { |
668 | 244k | char *dup; |
669 | 244k | char *end; |
670 | 244k | PyObject *res = NULL; |
671 | | |
672 | 244k | assert(s != NULL); |
673 | | |
674 | 244k | if (strchr(s, '_') == NULL) { |
675 | 243k | return parsenumber_raw(s); |
676 | 243k | } |
677 | | /* Create a duplicate without underscores. */ |
678 | 1.30k | dup = PyMem_Malloc(strlen(s) + 1); |
679 | 1.30k | if (dup == NULL) { |
680 | 0 | return PyErr_NoMemory(); |
681 | 0 | } |
682 | 1.30k | end = dup; |
683 | 25.8k | for (; *s; s++) { |
684 | 24.4k | if (*s != '_') { |
685 | 20.8k | *end++ = *s; |
686 | 20.8k | } |
687 | 24.4k | } |
688 | 1.30k | *end = '\0'; |
689 | 1.30k | res = parsenumber_raw(dup); |
690 | 1.30k | PyMem_Free(dup); |
691 | 1.30k | return res; |
692 | 1.30k | } |
693 | | |
694 | | expr_ty |
695 | | _PyPegen_number_token(Parser *p) |
696 | 906k | { |
697 | 906k | Token *t = _PyPegen_expect_token(p, NUMBER); |
698 | 906k | if (t == NULL) { |
699 | 661k | return NULL; |
700 | 661k | } |
701 | | |
702 | 244k | const char *num_raw = PyBytes_AsString(t->bytes); |
703 | 244k | if (num_raw == NULL) { |
704 | 0 | p->error_indicator = 1; |
705 | 0 | return NULL; |
706 | 0 | } |
707 | | |
708 | 244k | if (p->feature_version < 6 && strchr(num_raw, '_') != NULL) { |
709 | 0 | p->error_indicator = 1; |
710 | 0 | return RAISE_SYNTAX_ERROR("Underscores in numeric literals are only supported " |
711 | 0 | "in Python 3.6 and greater"); |
712 | 0 | } |
713 | | |
714 | 244k | PyObject *c = parsenumber(num_raw); |
715 | | |
716 | 244k | if (c == NULL) { |
717 | 15 | p->error_indicator = 1; |
718 | 15 | PyThreadState *tstate = _PyThreadState_GET(); |
719 | | // The only way a ValueError should happen in _this_ code is via |
720 | | // PyLong_FromString hitting a length limit. |
721 | 15 | if (tstate->current_exception != NULL && |
722 | 15 | Py_TYPE(tstate->current_exception) == (PyTypeObject *)PyExc_ValueError |
723 | 15 | ) { |
724 | 15 | PyObject *exc = PyErr_GetRaisedException(); |
725 | | /* Intentionally omitting columns to avoid a wall of 1000s of '^'s |
726 | | * on the error message. Nobody is going to overlook their huge |
727 | | * numeric literal once given the line. */ |
728 | 15 | RAISE_ERROR_KNOWN_LOCATION( |
729 | 15 | p, PyExc_SyntaxError, |
730 | 15 | t->lineno, -1 /* col_offset */, |
731 | 15 | t->end_lineno, -1 /* end_col_offset */, |
732 | 15 | "%S - Consider hexadecimal for huge integer literals " |
733 | 15 | "to avoid decimal conversion limits.", |
734 | 15 | exc); |
735 | 15 | Py_DECREF(exc); |
736 | 15 | } |
737 | 15 | return NULL; |
738 | 15 | } |
739 | | |
740 | 244k | if (_PyArena_AddPyObject(p->arena, c) < 0) { |
741 | 0 | Py_DECREF(c); |
742 | 0 | p->error_indicator = 1; |
743 | 0 | return NULL; |
744 | 0 | } |
745 | | |
746 | 244k | return _PyAST_Constant(c, NULL, t->lineno, t->col_offset, t->end_lineno, |
747 | 244k | t->end_col_offset, p->arena); |
748 | 244k | } |
749 | | |
750 | | /* Check that the source for a single input statement really is a single |
751 | | statement by looking at what is left in the buffer after parsing. |
752 | | Trailing whitespace and comments are OK. */ |
753 | | static int // bool |
754 | | bad_single_statement(Parser *p) |
755 | 0 | { |
756 | 0 | char *cur = p->tok->cur; |
757 | 0 | char c = *cur; |
758 | |
|
759 | 0 | for (;;) { |
760 | 0 | while (c == ' ' || c == '\t' || c == '\n' || c == '\014') { |
761 | 0 | c = *++cur; |
762 | 0 | } |
763 | |
|
764 | 0 | if (!c) { |
765 | 0 | return 0; |
766 | 0 | } |
767 | | |
768 | 0 | if (c != '#') { |
769 | 0 | return 1; |
770 | 0 | } |
771 | | |
772 | | /* Suck up comment. */ |
773 | 0 | while (c && c != '\n') { |
774 | 0 | c = *++cur; |
775 | 0 | } |
776 | 0 | } |
777 | 0 | } |
778 | | |
779 | | static int |
780 | | compute_parser_flags(PyCompilerFlags *flags) |
781 | 21.5k | { |
782 | 21.5k | int parser_flags = 0; |
783 | 21.5k | if (!flags) { |
784 | 48 | return 0; |
785 | 48 | } |
786 | 21.5k | if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) { |
787 | 0 | parser_flags |= PyPARSE_DONT_IMPLY_DEDENT; |
788 | 0 | } |
789 | 21.5k | if (flags->cf_flags & PyCF_IGNORE_COOKIE) { |
790 | 43 | parser_flags |= PyPARSE_IGNORE_COOKIE; |
791 | 43 | } |
792 | 21.5k | if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL) { |
793 | 0 | parser_flags |= PyPARSE_BARRY_AS_BDFL; |
794 | 0 | } |
795 | 21.5k | if (flags->cf_flags & PyCF_TYPE_COMMENTS) { |
796 | 0 | parser_flags |= PyPARSE_TYPE_COMMENTS; |
797 | 0 | } |
798 | 21.5k | if (flags->cf_flags & PyCF_ALLOW_INCOMPLETE_INPUT) { |
799 | 0 | parser_flags |= PyPARSE_ALLOW_INCOMPLETE_INPUT; |
800 | 0 | } |
801 | 21.5k | return parser_flags; |
802 | 21.5k | } |
803 | | |
804 | | // Parser API |
805 | | |
806 | | Parser * |
807 | | _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags, |
808 | | int feature_version, int *errcode, const char* source, PyArena *arena) |
809 | 21.5k | { |
810 | 21.5k | Parser *p = PyMem_Malloc(sizeof(Parser)); |
811 | 21.5k | if (p == NULL) { |
812 | 0 | return (Parser *) PyErr_NoMemory(); |
813 | 0 | } |
814 | 21.5k | assert(tok != NULL); |
815 | 21.5k | tok->type_comments = (flags & PyPARSE_TYPE_COMMENTS) > 0; |
816 | 21.5k | p->tok = tok; |
817 | 21.5k | p->keywords = NULL; |
818 | 21.5k | p->n_keyword_lists = -1; |
819 | 21.5k | p->soft_keywords = NULL; |
820 | 21.5k | p->tokens = PyMem_Malloc(sizeof(Token *)); |
821 | 21.5k | if (!p->tokens) { |
822 | 0 | PyMem_Free(p); |
823 | 0 | return (Parser *) PyErr_NoMemory(); |
824 | 0 | } |
825 | 21.5k | p->tokens[0] = PyMem_Calloc(1, sizeof(Token)); |
826 | 21.5k | if (!p->tokens[0]) { |
827 | 0 | PyMem_Free(p->tokens); |
828 | 0 | PyMem_Free(p); |
829 | 0 | return (Parser *) PyErr_NoMemory(); |
830 | 0 | } |
831 | 21.5k | if (!growable_comment_array_init(&p->type_ignore_comments, 10)) { |
832 | 0 | PyMem_Free(p->tokens[0]); |
833 | 0 | PyMem_Free(p->tokens); |
834 | 0 | PyMem_Free(p); |
835 | 0 | return (Parser *) PyErr_NoMemory(); |
836 | 0 | } |
837 | | |
838 | 21.5k | p->mark = 0; |
839 | 21.5k | p->fill = 0; |
840 | 21.5k | p->size = 1; |
841 | | |
842 | 21.5k | p->errcode = errcode; |
843 | 21.5k | p->arena = arena; |
844 | 21.5k | p->start_rule = start_rule; |
845 | 21.5k | p->parsing_started = 0; |
846 | 21.5k | p->normalize = NULL; |
847 | 21.5k | p->error_indicator = 0; |
848 | | |
849 | 21.5k | p->starting_lineno = 0; |
850 | 21.5k | p->starting_col_offset = 0; |
851 | 21.5k | p->flags = flags; |
852 | 21.5k | p->feature_version = feature_version; |
853 | 21.5k | p->known_err_token = NULL; |
854 | 21.5k | p->level = 0; |
855 | 21.5k | p->call_invalid_rules = 0; |
856 | 21.5k | p->last_stmt_location.lineno = 0; |
857 | 21.5k | p->last_stmt_location.col_offset = 0; |
858 | 21.5k | p->last_stmt_location.end_lineno = 0; |
859 | 21.5k | p->last_stmt_location.end_col_offset = 0; |
860 | | #ifdef Py_DEBUG |
861 | | p->debug = _Py_GetConfig()->parser_debug; |
862 | | #endif |
863 | 21.5k | return p; |
864 | 21.5k | } |
865 | | |
866 | | void |
867 | | _PyPegen_Parser_Free(Parser *p) |
868 | 21.5k | { |
869 | 21.5k | Py_XDECREF(p->normalize); |
870 | 2.58M | for (int i = 0; i < p->size; i++) { |
871 | 2.56M | PyMem_Free(p->tokens[i]); |
872 | 2.56M | } |
873 | 21.5k | PyMem_Free(p->tokens); |
874 | 21.5k | growable_comment_array_deallocate(&p->type_ignore_comments); |
875 | 21.5k | PyMem_Free(p); |
876 | 21.5k | } |
877 | | |
878 | | static void |
879 | | reset_parser_state_for_error_pass(Parser *p) |
880 | 13.6k | { |
881 | 13.6k | p->last_stmt_location.lineno = 0; |
882 | 13.6k | p->last_stmt_location.col_offset = 0; |
883 | 13.6k | p->last_stmt_location.end_lineno = 0; |
884 | 13.6k | p->last_stmt_location.end_col_offset = 0; |
885 | 534k | for (int i = 0; i < p->fill; i++) { |
886 | 520k | p->tokens[i]->memo = NULL; |
887 | 520k | } |
888 | 13.6k | p->mark = 0; |
889 | 13.6k | p->call_invalid_rules = 1; |
890 | | // Don't try to get extra tokens in interactive mode when trying to |
891 | | // raise specialized errors in the second pass. |
892 | 13.6k | p->tok->interactive_underflow = IUNDERFLOW_STOP; |
893 | 13.6k | } |
894 | | |
895 | | static inline int |
896 | 0 | _is_end_of_source(Parser *p) { |
897 | 0 | int err = p->tok->done; |
898 | 0 | return err == E_EOF || err == E_EOFS || err == E_EOLS; |
899 | 0 | } |
900 | | |
901 | | static void |
902 | 13.3k | _PyPegen_set_syntax_error_metadata(Parser *p) { |
903 | 13.3k | PyObject *exc = PyErr_GetRaisedException(); |
904 | 13.3k | if (!exc || !PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_SyntaxError)) { |
905 | 0 | PyErr_SetRaisedException(exc); |
906 | 0 | return; |
907 | 0 | } |
908 | 13.3k | const char *source = NULL; |
909 | 13.3k | if (p->tok->str != NULL) { |
910 | 13.3k | source = p->tok->str; |
911 | 13.3k | } |
912 | 13.3k | if (!source && p->tok->fp_interactive && p->tok->interactive_src_start) { |
913 | 0 | source = p->tok->interactive_src_start; |
914 | 0 | } |
915 | 13.3k | PyObject* the_source = NULL; |
916 | 13.3k | if (source) { |
917 | 13.3k | if (p->tok->encoding == NULL) { |
918 | 12.0k | the_source = PyUnicode_FromString(source); |
919 | 12.0k | } else { |
920 | 1.36k | the_source = PyUnicode_Decode(source, strlen(source), p->tok->encoding, NULL); |
921 | 1.36k | } |
922 | 13.3k | } |
923 | 13.3k | if (!the_source) { |
924 | 1.43k | PyErr_Clear(); |
925 | 1.43k | the_source = Py_None; |
926 | 1.43k | Py_INCREF(the_source); |
927 | 1.43k | } |
928 | 13.3k | PyObject* metadata = Py_BuildValue( |
929 | 13.3k | "(iiN)", |
930 | 13.3k | p->last_stmt_location.lineno, |
931 | 13.3k | p->last_stmt_location.col_offset, |
932 | 13.3k | the_source // N gives ownership to metadata |
933 | 13.3k | ); |
934 | 13.3k | if (!metadata) { |
935 | 0 | Py_DECREF(the_source); |
936 | 0 | PyErr_Clear(); |
937 | 0 | return; |
938 | 0 | } |
939 | 13.3k | PySyntaxErrorObject *syntax_error = (PySyntaxErrorObject *)exc; |
940 | | |
941 | 13.3k | Py_XDECREF(syntax_error->metadata); |
942 | 13.3k | syntax_error->metadata = metadata; |
943 | 13.3k | PyErr_SetRaisedException(exc); |
944 | 13.3k | } |
945 | | |
946 | | void * |
947 | | _PyPegen_run_parser(Parser *p) |
948 | 21.5k | { |
949 | 21.5k | void *res = _PyPegen_parse(p); |
950 | 21.5k | assert(p->level == 0); |
951 | 21.5k | if (res == NULL) { |
952 | 13.6k | if ((p->flags & PyPARSE_ALLOW_INCOMPLETE_INPUT) && _is_end_of_source(p)) { |
953 | 0 | PyErr_Clear(); |
954 | 0 | return _PyPegen_raise_error(p, PyExc_IncompleteInputError, 0, "incomplete input"); |
955 | 0 | } |
956 | 13.6k | if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) { |
957 | 11 | return NULL; |
958 | 11 | } |
959 | | // Make a second parser pass. In this pass we activate heavier and slower checks |
960 | | // to produce better error messages and more complete diagnostics. Extra "invalid_*" |
961 | | // rules will be active during parsing. |
962 | 13.6k | Token *last_token = p->tokens[p->fill - 1]; |
963 | 13.6k | reset_parser_state_for_error_pass(p); |
964 | 13.6k | _PyPegen_parse(p); |
965 | | |
966 | | // Set SyntaxErrors accordingly depending on the parser/tokenizer status at the failure |
967 | | // point. |
968 | 13.6k | _Pypegen_set_syntax_error(p, last_token); |
969 | | |
970 | | // Set the metadata in the exception from p->last_stmt_location |
971 | 13.6k | if (PyErr_ExceptionMatches(PyExc_SyntaxError)) { |
972 | 13.3k | _PyPegen_set_syntax_error_metadata(p); |
973 | 13.3k | } |
974 | 13.6k | return NULL; |
975 | 13.6k | } |
976 | | |
977 | 7.90k | if (p->start_rule == Py_single_input && bad_single_statement(p)) { |
978 | 0 | p->tok->done = E_BADSINGLE; // This is not necessary for now, but might be in the future |
979 | 0 | return RAISE_SYNTAX_ERROR("multiple statements found while compiling a single statement"); |
980 | 0 | } |
981 | | |
982 | | // test_peg_generator defines _Py_TEST_PEGEN to not call PyAST_Validate() |
983 | | #if defined(Py_DEBUG) && !defined(_Py_TEST_PEGEN) |
984 | | if (p->start_rule == Py_single_input || |
985 | | p->start_rule == Py_file_input || |
986 | | p->start_rule == Py_eval_input) |
987 | | { |
988 | | if (!_PyAST_Validate(res)) { |
989 | | return NULL; |
990 | | } |
991 | | } |
992 | | #endif |
993 | 7.90k | return res; |
994 | 7.90k | } |
995 | | |
996 | | mod_ty |
997 | | _PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filename_ob, |
998 | | const char *enc, const char *ps1, const char *ps2, |
999 | | PyCompilerFlags *flags, int *errcode, |
1000 | | PyObject **interactive_src, PyArena *arena) |
1001 | 0 | { |
1002 | 0 | struct tok_state *tok = _PyTokenizer_FromFile(fp, enc, ps1, ps2); |
1003 | 0 | if (tok == NULL) { |
1004 | 0 | if (PyErr_Occurred()) { |
1005 | 0 | _PyPegen_raise_tokenizer_init_error(filename_ob); |
1006 | 0 | return NULL; |
1007 | 0 | } |
1008 | 0 | return NULL; |
1009 | 0 | } |
1010 | 0 | if (!tok->fp || ps1 != NULL || ps2 != NULL || |
1011 | 0 | PyUnicode_CompareWithASCIIString(filename_ob, "<stdin>") == 0) { |
1012 | 0 | tok->fp_interactive = 1; |
1013 | 0 | } |
1014 | | // This transfers the ownership to the tokenizer |
1015 | 0 | tok->filename = Py_NewRef(filename_ob); |
1016 | | |
1017 | | // From here on we need to clean up even if there's an error |
1018 | 0 | mod_ty result = NULL; |
1019 | |
|
1020 | 0 | int parser_flags = compute_parser_flags(flags); |
1021 | 0 | Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, PY_MINOR_VERSION, |
1022 | 0 | errcode, NULL, arena); |
1023 | 0 | if (p == NULL) { |
1024 | 0 | goto error; |
1025 | 0 | } |
1026 | | |
1027 | 0 | result = _PyPegen_run_parser(p); |
1028 | 0 | _PyPegen_Parser_Free(p); |
1029 | |
|
1030 | 0 | if (tok->fp_interactive && tok->interactive_src_start && result && interactive_src != NULL) { |
1031 | 0 | *interactive_src = PyUnicode_FromString(tok->interactive_src_start); |
1032 | 0 | if (!interactive_src || _PyArena_AddPyObject(arena, *interactive_src) < 0) { |
1033 | 0 | Py_XDECREF(interactive_src); |
1034 | 0 | result = NULL; |
1035 | 0 | goto error; |
1036 | 0 | } |
1037 | 0 | } |
1038 | | |
1039 | 0 | error: |
1040 | 0 | _PyTokenizer_Free(tok); |
1041 | 0 | return result; |
1042 | 0 | } |
1043 | | |
1044 | | mod_ty |
1045 | | _PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filename_ob, |
1046 | | PyCompilerFlags *flags, PyArena *arena) |
1047 | 23.4k | { |
1048 | 23.4k | int exec_input = start_rule == Py_file_input; |
1049 | | |
1050 | 23.4k | struct tok_state *tok; |
1051 | 23.4k | if (flags != NULL && flags->cf_flags & PyCF_IGNORE_COOKIE) { |
1052 | 43 | tok = _PyTokenizer_FromUTF8(str, exec_input, 0); |
1053 | 23.3k | } else { |
1054 | 23.3k | tok = _PyTokenizer_FromString(str, exec_input, 0); |
1055 | 23.3k | } |
1056 | 23.4k | if (tok == NULL) { |
1057 | 1.84k | if (PyErr_Occurred()) { |
1058 | 1.84k | _PyPegen_raise_tokenizer_init_error(filename_ob); |
1059 | 1.84k | } |
1060 | 1.84k | return NULL; |
1061 | 1.84k | } |
1062 | | // This transfers the ownership to the tokenizer |
1063 | 21.5k | tok->filename = Py_NewRef(filename_ob); |
1064 | | |
1065 | | // We need to clear up from here on |
1066 | 21.5k | mod_ty result = NULL; |
1067 | | |
1068 | 21.5k | int parser_flags = compute_parser_flags(flags); |
1069 | 21.5k | int feature_version = flags && (flags->cf_flags & PyCF_ONLY_AST) ? |
1070 | 21.2k | flags->cf_feature_version : PY_MINOR_VERSION; |
1071 | 21.5k | Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, feature_version, |
1072 | 21.5k | NULL, str, arena); |
1073 | 21.5k | if (p == NULL) { |
1074 | 0 | goto error; |
1075 | 0 | } |
1076 | | |
1077 | 21.5k | result = _PyPegen_run_parser(p); |
1078 | 21.5k | _PyPegen_Parser_Free(p); |
1079 | | |
1080 | 21.5k | error: |
1081 | 21.5k | _PyTokenizer_Free(tok); |
1082 | 21.5k | return result; |
1083 | 21.5k | } |