Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pycparser/c_parser.py: 55%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# ------------------------------------------------------------------------------
2# pycparser: c_parser.py
3#
4# Recursive-descent parser for the C language.
5#
6# Eli Bendersky [https://eli.thegreenplace.net/]
7# License: BSD
8# ------------------------------------------------------------------------------
9from dataclasses import dataclass
10from typing import (
11 Any,
12 Dict,
13 List,
14 Literal,
15 NoReturn,
16 Optional,
17 Tuple,
18 TypedDict,
19 cast,
20)
22from . import c_ast
23from .c_lexer import CLexer, _Token
24from .ast_transforms import fix_switch_cases, fix_atomic_specifiers
27@dataclass
28class Coord:
29 """Coordinates of a syntactic element. Consists of:
30 - File name
31 - Line number
32 - Column number
33 """
35 file: str
36 line: int
37 column: Optional[int] = None
39 def __str__(self) -> str:
40 text = f"{self.file}:{self.line}"
41 if self.column:
42 text += f":{self.column}"
43 return text
46class ParseError(Exception):
47 pass
50class CParser:
51 """Recursive-descent C parser.
53 Usage:
54 parser = CParser()
55 ast = parser.parse(text, filename)
57 The `lexer` parameter lets you inject a lexer class (defaults to CLexer).
58 The parameters after `lexer` are accepted for backward compatibility with
59 the old PLY-based parser and are otherwise unused.
60 """
62 def __init__(
63 self,
64 lex_optimize: bool = True,
65 lexer: type[CLexer] = CLexer,
66 lextab: str = "pycparser.lextab",
67 yacc_optimize: bool = True,
68 yacctab: str = "pycparser.yacctab",
69 yacc_debug: bool = False,
70 taboutputdir: str = "",
71 ) -> None:
72 self.clex: CLexer = lexer(
73 error_func=self._lex_error_func,
74 on_lbrace_func=self._lex_on_lbrace_func,
75 on_rbrace_func=self._lex_on_rbrace_func,
76 type_lookup_func=self._lex_type_lookup_func,
77 )
79 # Stack of scopes for keeping track of symbols. _scope_stack[-1] is
80 # the current (topmost) scope. Each scope is a dictionary that
81 # specifies whether a name is a type. If _scope_stack[n][name] is
82 # True, 'name' is currently a type in the scope. If it's False,
83 # 'name' is used in the scope but not as a type (for instance, if we
84 # saw: int name;
85 # If 'name' is not a key in _scope_stack[n] then 'name' was not defined
86 # in this scope at all.
87 self._scope_stack: List[Dict[str, bool]] = [dict()]
88 self._tokens: _TokenStream = _TokenStream(self.clex)
90 def parse(
91 self, text: str, filename: str = "", debug: bool = False
92 ) -> c_ast.FileAST:
93 """Parses C code and returns an AST.
95 text:
96 A string containing the C source code
98 filename:
99 Name of the file being parsed (for meaningful
100 error messages)
102 debug:
103 Deprecated debug flag (unused); for backwards compatibility.
104 """
105 self._scope_stack = [dict()]
106 self.clex.input(text, filename)
107 self._tokens = _TokenStream(self.clex)
109 ast = self._parse_translation_unit_or_empty()
110 tok = self._peek()
111 if tok is not None:
112 self._parse_error(f"before: {tok.value}", self._tok_coord(tok))
113 return ast
115 # ------------------------------------------------------------------
116 # Scope and declaration helpers
117 # ------------------------------------------------------------------
118 def _coord(self, lineno: int, column: Optional[int] = None) -> Coord:
119 return Coord(file=self.clex.filename, line=lineno, column=column)
121 def _parse_error(self, msg: str, coord: Coord | str | None) -> NoReturn:
122 raise ParseError(f"{coord}: {msg}")
124 def _push_scope(self) -> None:
125 self._scope_stack.append(dict())
127 def _pop_scope(self) -> None:
128 assert len(self._scope_stack) > 1
129 self._scope_stack.pop()
131 def _add_typedef_name(self, name: str, coord: Optional[Coord]) -> None:
132 """Add a new typedef name (ie a TYPEID) to the current scope"""
133 if not self._scope_stack[-1].get(name, True):
134 self._parse_error(
135 f"Typedef {name!r} previously declared as non-typedef in this scope",
136 coord,
137 )
138 self._scope_stack[-1][name] = True
140 def _add_identifier(self, name: str, coord: Optional[Coord]) -> None:
141 """Add a new object, function, or enum member name (ie an ID) to the
142 current scope
143 """
144 if self._scope_stack[-1].get(name, False):
145 self._parse_error(
146 f"Non-typedef {name!r} previously declared as typedef in this scope",
147 coord,
148 )
149 self._scope_stack[-1][name] = False
151 def _is_type_in_scope(self, name: str) -> bool:
152 """Is *name* a typedef-name in the current scope?"""
153 for scope in reversed(self._scope_stack):
154 # If name is an identifier in this scope it shadows typedefs in
155 # higher scopes.
156 in_scope = scope.get(name)
157 if in_scope is not None:
158 return in_scope
159 return False
161 def _lex_error_func(self, msg: str, line: int, column: int) -> None:
162 self._parse_error(msg, self._coord(line, column))
164 def _lex_on_lbrace_func(self) -> None:
165 self._push_scope()
167 def _lex_on_rbrace_func(self) -> None:
168 self._pop_scope()
170 def _lex_type_lookup_func(self, name: str) -> bool:
171 """Looks up types that were previously defined with
172 typedef.
173 Passed to the lexer for recognizing identifiers that
174 are types.
175 """
176 return self._is_type_in_scope(name)
178 # To understand what's going on here, read sections A.8.5 and
179 # A.8.6 of K&R2 very carefully.
180 #
181 # A C type consists of a basic type declaration, with a list
182 # of modifiers. For example:
183 #
184 # int *c[5];
185 #
186 # The basic declaration here is 'int c', and the pointer and
187 # the array are the modifiers.
188 #
189 # Basic declarations are represented by TypeDecl (from module c_ast) and the
190 # modifiers are FuncDecl, PtrDecl and ArrayDecl.
191 #
192 # The standard states that whenever a new modifier is parsed, it should be
193 # added to the end of the list of modifiers. For example:
194 #
195 # K&R2 A.8.6.2: Array Declarators
196 #
197 # In a declaration T D where D has the form
198 # D1 [constant-expression-opt]
199 # and the type of the identifier in the declaration T D1 is
200 # "type-modifier T", the type of the
201 # identifier of D is "type-modifier array of T"
202 #
203 # This is what this method does. The declarator it receives
204 # can be a list of declarators ending with TypeDecl. It
205 # tacks the modifier to the end of this list, just before
206 # the TypeDecl.
207 #
208 # Additionally, the modifier may be a list itself. This is
209 # useful for pointers, that can come as a chain from the rule
210 # p_pointer. In this case, the whole modifier list is spliced
211 # into the new location.
212 def _type_modify_decl(self, decl: Any, modifier: Any) -> c_ast.Node:
213 """Tacks a type modifier on a declarator, and returns
214 the modified declarator.
216 Note: the declarator and modifier may be modified
217 """
218 modifier_head = modifier
219 modifier_tail = modifier
221 # The modifier may be a nested list. Reach its tail.
222 while modifier_tail.type:
223 modifier_tail = modifier_tail.type
225 # If the decl is a basic type, just tack the modifier onto it.
226 if isinstance(decl, c_ast.TypeDecl):
227 modifier_tail.type = decl
228 return modifier
229 else:
230 # Otherwise, the decl is a list of modifiers. Reach
231 # its tail and splice the modifier onto the tail,
232 # pointing to the underlying basic type.
233 decl_tail = decl
234 while not isinstance(decl_tail.type, c_ast.TypeDecl):
235 decl_tail = decl_tail.type
237 modifier_tail.type = decl_tail.type
238 decl_tail.type = modifier_head
239 return decl
241 # Due to the order in which declarators are constructed,
242 # they have to be fixed in order to look like a normal AST.
243 #
244 # When a declaration arrives from syntax construction, it has
245 # these problems:
246 # * The innermost TypeDecl has no type (because the basic
247 # type is only known at the uppermost declaration level)
248 # * The declaration has no variable name, since that is saved
249 # in the innermost TypeDecl
250 # * The typename of the declaration is a list of type
251 # specifiers, and not a node. Here, basic identifier types
252 # should be separated from more complex types like enums
253 # and structs.
254 #
255 # This method fixes these problems.
256 def _fix_decl_name_type(
257 self,
258 decl: c_ast.Decl | c_ast.Typedef | c_ast.Typename,
259 typename: List[Any],
260 ) -> c_ast.Decl | c_ast.Typedef | c_ast.Typename:
261 """Fixes a declaration. Modifies decl."""
262 # Reach the underlying basic type
263 typ = decl
264 while not isinstance(typ, c_ast.TypeDecl):
265 typ = typ.type
267 decl.name = typ.declname
268 typ.quals = decl.quals[:]
270 # The typename is a list of types. If any type in this
271 # list isn't an IdentifierType, it must be the only
272 # type in the list (it's illegal to declare "int enum ..")
273 # If all the types are basic, they're collected in the
274 # IdentifierType holder.
275 for tn in typename:
276 if not isinstance(tn, c_ast.IdentifierType):
277 if len(typename) > 1:
278 self._parse_error("Invalid multiple types specified", tn.coord)
279 else:
280 typ.type = tn
281 return decl
283 if not typename:
284 # Functions default to returning int
285 if not isinstance(decl.type, c_ast.FuncDecl):
286 self._parse_error("Missing type in declaration", decl.coord)
287 typ.type = c_ast.IdentifierType(["int"], coord=decl.coord)
288 else:
289 # At this point, we know that typename is a list of IdentifierType
290 # nodes. Concatenate all the names into a single list.
291 typ.type = c_ast.IdentifierType(
292 [name for id in typename for name in id.names], coord=typename[0].coord
293 )
294 return decl
296 def _add_declaration_specifier(
297 self,
298 declspec: Optional["_DeclSpec"],
299 newspec: Any,
300 kind: "_DeclSpecKind",
301 append: bool = False,
302 ) -> "_DeclSpec":
303 """See _DeclSpec for the specifier dictionary layout."""
304 if declspec is None:
305 spec: _DeclSpec = dict(
306 qual=[], storage=[], type=[], function=[], alignment=[]
307 )
308 else:
309 spec = declspec
311 if append:
312 spec[kind].append(newspec)
313 else:
314 spec[kind].insert(0, newspec)
316 return spec
318 def _build_declarations(
319 self,
320 spec: "_DeclSpec",
321 decls: List["_DeclInfo"],
322 typedef_namespace: bool = False,
323 ) -> List[c_ast.Node]:
324 """Builds a list of declarations all sharing the given specifiers.
325 If typedef_namespace is true, each declared name is added
326 to the "typedef namespace", which also includes objects,
327 functions, and enum constants.
328 """
329 is_typedef = "typedef" in spec["storage"]
330 declarations = []
332 # Bit-fields are allowed to be unnamed.
333 if decls[0].get("bitsize") is None:
334 # When redeclaring typedef names as identifiers in inner scopes, a
335 # problem can occur where the identifier gets grouped into
336 # spec['type'], leaving decl as None. This can only occur for the
337 # first declarator.
338 if decls[0]["decl"] is None:
339 if (
340 len(spec["type"]) < 2
341 or len(spec["type"][-1].names) != 1
342 or not self._is_type_in_scope(spec["type"][-1].names[0])
343 ):
344 coord = "?"
345 for t in spec["type"]:
346 if hasattr(t, "coord"):
347 coord = t.coord
348 break
349 self._parse_error("Invalid declaration", coord)
351 # Make this look as if it came from "direct_declarator:ID"
352 decls[0]["decl"] = c_ast.TypeDecl(
353 declname=spec["type"][-1].names[0],
354 type=None,
355 quals=None,
356 align=spec["alignment"],
357 coord=spec["type"][-1].coord,
358 )
359 # Remove the "new" type's name from the end of spec['type']
360 del spec["type"][-1]
361 # A similar problem can occur where the declaration ends up
362 # looking like an abstract declarator. Give it a name if this is
363 # the case.
364 elif not isinstance(
365 decls[0]["decl"],
366 (c_ast.Enum, c_ast.Struct, c_ast.Union, c_ast.IdentifierType),
367 ):
368 decls_0_tail = cast(Any, decls[0]["decl"])
369 while not isinstance(decls_0_tail, c_ast.TypeDecl):
370 decls_0_tail = decls_0_tail.type
371 if decls_0_tail.declname is None:
372 decls_0_tail.declname = spec["type"][-1].names[0]
373 del spec["type"][-1]
375 for decl in decls:
376 assert decl["decl"] is not None
377 if is_typedef:
378 declaration = c_ast.Typedef(
379 name=None,
380 quals=spec["qual"],
381 storage=spec["storage"],
382 type=decl["decl"],
383 coord=decl["decl"].coord,
384 )
385 else:
386 declaration = c_ast.Decl(
387 name=None,
388 quals=spec["qual"],
389 align=spec["alignment"],
390 storage=spec["storage"],
391 funcspec=spec["function"],
392 type=decl["decl"],
393 init=decl.get("init"),
394 bitsize=decl.get("bitsize"),
395 coord=decl["decl"].coord,
396 )
398 if isinstance(
399 declaration.type,
400 (c_ast.Enum, c_ast.Struct, c_ast.Union, c_ast.IdentifierType),
401 ):
402 fixed_decl = declaration
403 else:
404 fixed_decl = self._fix_decl_name_type(declaration, spec["type"])
406 # Add the type name defined by typedef to a
407 # symbol table (for usage in the lexer)
408 if typedef_namespace:
409 if is_typedef:
410 self._add_typedef_name(fixed_decl.name, fixed_decl.coord)
411 else:
412 self._add_identifier(fixed_decl.name, fixed_decl.coord)
414 fixed_decl = fix_atomic_specifiers(
415 cast(c_ast.Decl | c_ast.Typedef, fixed_decl)
416 )
417 declarations.append(fixed_decl)
419 return declarations
421 def _build_function_definition(
422 self,
423 spec: "_DeclSpec",
424 decl: c_ast.Node,
425 param_decls: Optional[List[c_ast.Node]],
426 body: c_ast.Node,
427 ) -> c_ast.Node:
428 """Builds a function definition."""
429 if "typedef" in spec["storage"]:
430 self._parse_error("Invalid typedef", decl.coord)
432 declaration = self._build_declarations(
433 spec=spec,
434 decls=[dict(decl=decl, init=None, bitsize=None)],
435 typedef_namespace=True,
436 )[0]
438 return c_ast.FuncDef(
439 decl=declaration, param_decls=param_decls, body=body, coord=decl.coord
440 )
442 def _select_struct_union_class(self, token: str) -> type:
443 """Given a token (either STRUCT or UNION), selects the
444 appropriate AST class.
445 """
446 if token == "struct":
447 return c_ast.Struct
448 else:
449 return c_ast.Union
451 # ------------------------------------------------------------------
452 # Token helpers
453 # ------------------------------------------------------------------
454 def _peek(self, k: int = 1) -> Optional[_Token]:
455 """Return the k-th next token without consuming it (1-based)."""
456 return self._tokens.peek(k)
458 def _peek_type(self, k: int = 1) -> Optional[str]:
459 """Return the type of the k-th next token, or None if absent (1-based)."""
460 tok = self._peek(k)
461 return tok.type if tok is not None else None
463 def _advance(self) -> _Token:
464 tok = self._tokens.next()
465 if tok is None:
466 self._parse_error("At end of input", self.clex.filename)
467 else:
468 return tok
470 def _accept(self, token_type: str) -> Optional[_Token]:
471 """Conditionally consume next token, only if it's of token_type.
473 If it is of the expected type, consume and return it.
474 Otherwise, leaves the token intact and returns None.
475 """
476 tok = self._peek()
477 if tok is not None and tok.type == token_type:
478 return self._advance()
479 return None
481 def _expect(self, token_type: str) -> _Token:
482 tok = self._advance()
483 if tok.type != token_type:
484 self._parse_error(f"before: {tok.value}", self._tok_coord(tok))
485 return tok
487 def _mark(self) -> int:
488 return self._tokens.mark()
490 def _reset(self, mark: int) -> None:
491 self._tokens.reset(mark)
493 def _tok_coord(self, tok: _Token) -> Coord:
494 return self._coord(tok.lineno, tok.column)
496 def _starts_declaration(self, tok: Optional[_Token] = None) -> bool:
497 tok = tok or self._peek()
498 if tok is None:
499 return False
500 return tok.type in _DECL_START
502 def _starts_expression(self, tok: Optional[_Token] = None) -> bool:
503 tok = tok or self._peek()
504 if tok is None:
505 return False
506 return tok.type in _STARTS_EXPRESSION
508 def _starts_statement(self) -> bool:
509 tok_type = self._peek_type()
510 if tok_type is None:
511 return False
512 if tok_type in _STARTS_STATEMENT:
513 return True
514 return self._starts_expression()
516 def _starts_declarator(self, id_only: bool = False) -> bool:
517 tok_type = self._peek_type()
518 if tok_type is None:
519 return False
520 if tok_type in {"TIMES", "LPAREN"}:
521 return True
522 if id_only:
523 return tok_type == "ID"
524 return tok_type in {"ID", "TYPEID"}
526 def _peek_declarator_name_info(self) -> Tuple[Optional[str], bool]:
527 mark = self._mark()
528 tok_type, saw_paren = self._scan_declarator_name_info()
529 self._reset(mark)
530 return tok_type, saw_paren
532 def _parse_any_declarator(
533 self, allow_abstract: bool = False, typeid_paren_as_abstract: bool = False
534 ) -> Tuple[Optional[c_ast.Node], bool]:
535 # C declarators are ambiguous without lookahead. For example:
536 # int foo(int (aa)); -> aa is a name (ID)
537 # typedef char TT;
538 # int bar(int (TT)); -> TT is a type (TYPEID) in parens
539 name_type, saw_paren = self._peek_declarator_name_info()
540 if name_type is None or (
541 typeid_paren_as_abstract and name_type == "TYPEID" and saw_paren
542 ):
543 if not allow_abstract:
544 tok = self._peek()
545 coord = self._tok_coord(tok) if tok is not None else self.clex.filename
546 self._parse_error("Invalid declarator", coord)
547 decl = self._parse_abstract_declarator_opt()
548 return decl, False
550 if name_type == "TYPEID":
551 if typeid_paren_as_abstract:
552 decl = self._parse_typeid_noparen_declarator()
553 else:
554 decl = self._parse_typeid_declarator()
555 else:
556 decl = self._parse_id_declarator()
557 return decl, True
559 def _scan_declarator_name_info(self) -> Tuple[Optional[str], bool]:
560 saw_paren = False
561 while self._accept("TIMES"):
562 while self._peek_type() in _TYPE_QUALIFIER:
563 self._advance()
565 tok = self._peek()
566 if tok is None:
567 return None, saw_paren
568 if tok.type in {"ID", "TYPEID"}:
569 self._advance()
570 return tok.type, saw_paren
571 if tok.type == "LPAREN":
572 saw_paren = True
573 self._advance()
574 tok_type, nested_paren = self._scan_declarator_name_info()
575 if nested_paren:
576 saw_paren = True
577 depth = 1
578 while True:
579 tok = self._peek()
580 if tok is None:
581 return None, saw_paren
582 if tok.type == "LPAREN":
583 depth += 1
584 elif tok.type == "RPAREN":
585 depth -= 1
586 self._advance()
587 if depth == 0:
588 break
589 continue
590 self._advance()
591 return tok_type, saw_paren
592 return None, saw_paren
594 def _starts_direct_abstract_declarator(self) -> bool:
595 return self._peek_type() in {"LPAREN", "LBRACKET"}
597 def _is_assignment_op(self) -> bool:
598 tok = self._peek()
599 return tok is not None and tok.type in _ASSIGNMENT_OPS
601 def _try_parse_paren_type_name(
602 self,
603 ) -> Optional[Tuple[c_ast.Typename, int, _Token]]:
604 """Parse and return a parenthesized type name if present.
606 Returns (typ, mark, lparen_tok) when the next tokens look like
607 '(' type_name ')', where typ is the parsed type name, mark is the
608 token-stream position before parsing, and lparen_tok is the LPAREN
609 token. Returns None if no parenthesized type name is present.
610 """
611 mark = self._mark()
612 lparen_tok = self._accept("LPAREN")
613 if lparen_tok is None:
614 return None
615 if not self._starts_declaration():
616 self._reset(mark)
617 return None
618 typ = self._parse_type_name()
619 if self._accept("RPAREN") is None:
620 self._reset(mark)
621 return None
622 return typ, mark, lparen_tok
624 # ------------------------------------------------------------------
625 # Top-level
626 # ------------------------------------------------------------------
627 # BNF: translation_unit_or_empty : translation_unit | empty
628 def _parse_translation_unit_or_empty(self) -> c_ast.FileAST:
629 if self._peek() is None:
630 return c_ast.FileAST([])
631 return c_ast.FileAST(self._parse_translation_unit())
633 # BNF: translation_unit : external_declaration+
634 def _parse_translation_unit(self) -> List[c_ast.Node]:
635 ext = []
636 while self._peek() is not None:
637 ext.extend(self._parse_external_declaration())
638 return ext
640 # BNF: external_declaration : function_definition
641 # | declaration
642 # | pp_directive
643 # | pppragma_directive
644 # | static_assert
645 # | ';'
646 def _parse_external_declaration(self) -> List[c_ast.Node]:
647 tok = self._peek()
648 if tok is None:
649 return []
650 if tok.type == "PPHASH":
651 self._parse_pp_directive()
652 return []
653 if tok.type in {"PPPRAGMA", "_PRAGMA"}:
654 return [self._parse_pppragma_directive()]
655 if self._accept("SEMI"):
656 return []
657 if tok.type == "_STATIC_ASSERT":
658 return self._parse_static_assert()
660 if not self._starts_declaration(tok):
661 # Special handling for old-style function definitions that have an
662 # implicit return type, e.g.
663 #
664 # foo() {
665 # return 5;
666 # }
667 #
668 # These get an implicit 'int' return type.
669 decl = self._parse_id_declarator()
670 param_decls = None
671 if self._peek_type() != "LBRACE":
672 self._parse_error("Invalid function definition", decl.coord)
673 spec: _DeclSpec = dict(
674 qual=[],
675 alignment=[],
676 storage=[],
677 type=[c_ast.IdentifierType(["int"], coord=decl.coord)],
678 function=[],
679 )
680 func = self._build_function_definition(
681 spec=spec,
682 decl=decl,
683 param_decls=param_decls,
684 body=self._parse_compound_statement(),
685 )
686 return [func]
688 # From here on, parsing a standard declatation/definition.
689 spec, saw_type, spec_coord = self._parse_declaration_specifiers(
690 allow_no_type=True
691 )
693 name_type, _ = self._peek_declarator_name_info()
694 if name_type != "ID":
695 decls = self._parse_decl_body_with_spec(spec, saw_type)
696 self._expect("SEMI")
697 return decls
699 decl = self._parse_id_declarator()
701 if self._peek_type() == "LBRACE" or self._starts_declaration():
702 param_decls = None
703 if self._starts_declaration():
704 param_decls = self._parse_declaration_list()
705 if self._peek_type() != "LBRACE":
706 self._parse_error("Invalid function definition", decl.coord)
707 if not spec["type"]:
708 spec["type"] = [c_ast.IdentifierType(["int"], coord=spec_coord)]
709 func = self._build_function_definition(
710 spec=spec,
711 decl=decl,
712 param_decls=param_decls,
713 body=self._parse_compound_statement(),
714 )
715 return [func]
717 decl_dict: "_DeclInfo" = dict(decl=decl, init=None, bitsize=None)
718 if self._accept("EQUALS"):
719 decl_dict["init"] = self._parse_initializer()
720 decls = self._parse_init_declarator_list(first=decl_dict)
721 decls = self._build_declarations(spec=spec, decls=decls, typedef_namespace=True)
722 self._expect("SEMI")
723 return decls
725 # ------------------------------------------------------------------
726 # Declarations
727 #
728 # Declarations always come as lists (because they can be several in one
729 # line). When returning parsed declarations, a list is always returned -
730 # even if it contains a single element.
731 # ------------------------------------------------------------------
732 def _parse_declaration(self) -> List[c_ast.Node]:
733 decls = self._parse_decl_body()
734 self._expect("SEMI")
735 return decls
737 # BNF: decl_body : declaration_specifiers decl_body_with_spec
738 def _parse_decl_body(self) -> List[c_ast.Node]:
739 spec, saw_type, _ = self._parse_declaration_specifiers(allow_no_type=True)
740 return self._parse_decl_body_with_spec(spec, saw_type)
742 # BNF: decl_body_with_spec : init_declarator_list
743 # | struct_or_union_or_enum_only
744 def _parse_decl_body_with_spec(
745 self, spec: "_DeclSpec", saw_type: bool
746 ) -> List[c_ast.Node]:
747 decls = None
748 if saw_type:
749 if self._starts_declarator():
750 decls = self._parse_init_declarator_list()
751 else:
752 if self._starts_declarator(id_only=True):
753 decls = self._parse_init_declarator_list(id_only=True)
755 if decls is None:
756 ty = spec["type"]
757 s_u_or_e = (c_ast.Struct, c_ast.Union, c_ast.Enum)
758 if len(ty) == 1 and isinstance(ty[0], s_u_or_e):
759 decls = [
760 c_ast.Decl(
761 name=None,
762 quals=spec["qual"],
763 align=spec["alignment"],
764 storage=spec["storage"],
765 funcspec=spec["function"],
766 type=ty[0],
767 init=None,
768 bitsize=None,
769 coord=ty[0].coord,
770 )
771 ]
772 else:
773 decls = self._build_declarations(
774 spec=spec,
775 decls=[dict(decl=None, init=None, bitsize=None)],
776 typedef_namespace=True,
777 )
778 else:
779 decls = self._build_declarations(
780 spec=spec, decls=decls, typedef_namespace=True
781 )
783 return decls
785 # BNF: declaration_list : declaration+
786 def _parse_declaration_list(self) -> List[c_ast.Node]:
787 decls = []
788 while self._starts_declaration():
789 decls.extend(self._parse_declaration())
790 return decls
792 # BNF: declaration_specifiers : (storage_class_specifier
793 # | type_specifier
794 # | type_qualifier
795 # | function_specifier
796 # | alignment_specifier)+
797 def _parse_declaration_specifiers(
798 self, allow_no_type: bool = False
799 ) -> Tuple["_DeclSpec", bool, Optional[Coord]]:
800 """Parse declaration-specifier sequence.
802 allow_no_type:
803 If True, allow a missing type specifier without error.
805 Returns:
806 (spec, saw_type, first_coord) where spec is a dict with
807 qual/storage/type/function/alignment entries, saw_type is True
808 if a type specifier was consumed, and first_coord is the coord
809 of the first specifier token (used for diagnostics).
810 """
811 spec = None
812 saw_type = False
813 first_coord = None
815 while True:
816 tok = self._peek()
817 if tok is None:
818 break
820 if tok.type == "_ALIGNAS":
821 if first_coord is None:
822 first_coord = self._tok_coord(tok)
823 spec = self._add_declaration_specifier(
824 spec, self._parse_alignment_specifier(), "alignment", append=True
825 )
826 continue
828 if tok.type == "_ATOMIC" and self._peek_type(2) == "LPAREN":
829 if first_coord is None:
830 first_coord = self._tok_coord(tok)
831 spec = self._add_declaration_specifier(
832 spec, self._parse_atomic_specifier(), "type", append=True
833 )
834 saw_type = True
835 continue
837 if tok.type in _TYPE_QUALIFIER:
838 if first_coord is None:
839 first_coord = self._tok_coord(tok)
840 spec = self._add_declaration_specifier(
841 spec, self._advance().value, "qual", append=True
842 )
843 continue
845 if tok.type in _STORAGE_CLASS:
846 if first_coord is None:
847 first_coord = self._tok_coord(tok)
848 spec = self._add_declaration_specifier(
849 spec, self._advance().value, "storage", append=True
850 )
851 continue
853 if tok.type in _FUNCTION_SPEC:
854 if first_coord is None:
855 first_coord = self._tok_coord(tok)
856 spec = self._add_declaration_specifier(
857 spec, self._advance().value, "function", append=True
858 )
859 continue
861 if tok.type in _TYPE_SPEC_SIMPLE:
862 if first_coord is None:
863 first_coord = self._tok_coord(tok)
864 tok = self._advance()
865 spec = self._add_declaration_specifier(
866 spec,
867 c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)),
868 "type",
869 append=True,
870 )
871 saw_type = True
872 continue
874 if tok.type == "TYPEID":
875 if saw_type:
876 break
877 if first_coord is None:
878 first_coord = self._tok_coord(tok)
879 tok = self._advance()
880 spec = self._add_declaration_specifier(
881 spec,
882 c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)),
883 "type",
884 append=True,
885 )
886 saw_type = True
887 continue
889 if tok.type in {"STRUCT", "UNION"}:
890 if first_coord is None:
891 first_coord = self._tok_coord(tok)
892 spec = self._add_declaration_specifier(
893 spec, self._parse_struct_or_union_specifier(), "type", append=True
894 )
895 saw_type = True
896 continue
898 if tok.type == "ENUM":
899 if first_coord is None:
900 first_coord = self._tok_coord(tok)
901 spec = self._add_declaration_specifier(
902 spec, self._parse_enum_specifier(), "type", append=True
903 )
904 saw_type = True
905 continue
907 break
909 if spec is None:
910 self._parse_error("Invalid declaration", self.clex.filename)
912 if not saw_type and not allow_no_type:
913 self._parse_error("Missing type in declaration", first_coord)
915 return spec, saw_type, first_coord
917 # BNF: specifier_qualifier_list : (type_specifier
918 # | type_qualifier
919 # | alignment_specifier)+
920 def _parse_specifier_qualifier_list(self) -> "_DeclSpec":
921 spec = None
922 saw_type = False
923 saw_alignment = False
924 first_coord = None
926 while True:
927 tok = self._peek()
928 if tok is None:
929 break
931 if tok.type == "_ALIGNAS":
932 if first_coord is None:
933 first_coord = self._tok_coord(tok)
934 spec = self._add_declaration_specifier(
935 spec, self._parse_alignment_specifier(), "alignment", append=True
936 )
937 saw_alignment = True
938 continue
940 if tok.type == "_ATOMIC" and self._peek_type(2) == "LPAREN":
941 if first_coord is None:
942 first_coord = self._tok_coord(tok)
943 spec = self._add_declaration_specifier(
944 spec, self._parse_atomic_specifier(), "type", append=True
945 )
946 saw_type = True
947 continue
949 if tok.type in _TYPE_QUALIFIER:
950 if first_coord is None:
951 first_coord = self._tok_coord(tok)
952 spec = self._add_declaration_specifier(
953 spec, self._advance().value, "qual", append=True
954 )
955 continue
957 if tok.type in _TYPE_SPEC_SIMPLE:
958 if first_coord is None:
959 first_coord = self._tok_coord(tok)
960 tok = self._advance()
961 spec = self._add_declaration_specifier(
962 spec,
963 c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)),
964 "type",
965 append=True,
966 )
967 saw_type = True
968 continue
970 if tok.type == "TYPEID":
971 if saw_type:
972 break
973 if first_coord is None:
974 first_coord = self._tok_coord(tok)
975 tok = self._advance()
976 spec = self._add_declaration_specifier(
977 spec,
978 c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)),
979 "type",
980 append=True,
981 )
982 saw_type = True
983 continue
985 if tok.type in {"STRUCT", "UNION"}:
986 if first_coord is None:
987 first_coord = self._tok_coord(tok)
988 spec = self._add_declaration_specifier(
989 spec, self._parse_struct_or_union_specifier(), "type", append=True
990 )
991 saw_type = True
992 continue
994 if tok.type == "ENUM":
995 if first_coord is None:
996 first_coord = self._tok_coord(tok)
997 spec = self._add_declaration_specifier(
998 spec, self._parse_enum_specifier(), "type", append=True
999 )
1000 saw_type = True
1001 continue
1003 break
1005 if spec is None:
1006 self._parse_error("Invalid specifier list", self.clex.filename)
1008 if not saw_type and not saw_alignment:
1009 self._parse_error("Missing type in declaration", first_coord)
1011 if spec.get("storage") is None:
1012 spec["storage"] = []
1013 if spec.get("function") is None:
1014 spec["function"] = []
1016 return spec
1018 # BNF: type_qualifier_list : type_qualifier+
1019 def _parse_type_qualifier_list(self) -> List[str]:
1020 quals = []
1021 while self._peek_type() in _TYPE_QUALIFIER:
1022 quals.append(self._advance().value)
1023 return quals
1025 # BNF: alignment_specifier : _ALIGNAS '(' type_name | constant_expression ')'
1026 def _parse_alignment_specifier(self) -> c_ast.Node:
1027 tok = self._expect("_ALIGNAS")
1028 self._expect("LPAREN")
1030 if self._starts_declaration():
1031 typ = self._parse_type_name()
1032 self._expect("RPAREN")
1033 return c_ast.Alignas(typ, self._tok_coord(tok))
1035 expr = self._parse_constant_expression()
1036 self._expect("RPAREN")
1037 return c_ast.Alignas(expr, self._tok_coord(tok))
1039 # BNF: atomic_specifier : _ATOMIC '(' type_name ')'
1040 def _parse_atomic_specifier(self) -> c_ast.Node:
1041 self._expect("_ATOMIC")
1042 self._expect("LPAREN")
1043 typ = self._parse_type_name()
1044 self._expect("RPAREN")
1045 typ.quals.append("_Atomic")
1046 return typ
1048 # BNF: init_declarator_list : init_declarator (',' init_declarator)*
1049 def _parse_init_declarator_list(
1050 self, first: Optional["_DeclInfo"] = None, id_only: bool = False
1051 ) -> List["_DeclInfo"]:
1052 decls = (
1053 [first]
1054 if first is not None
1055 else [self._parse_init_declarator(id_only=id_only)]
1056 )
1058 while self._accept("COMMA"):
1059 decls.append(self._parse_init_declarator(id_only=id_only))
1060 return decls
1062 # BNF: init_declarator : declarator ('=' initializer)?
1063 def _parse_init_declarator(self, id_only: bool = False) -> "_DeclInfo":
1064 decl = self._parse_id_declarator() if id_only else self._parse_declarator()
1065 init = None
1066 if self._accept("EQUALS"):
1067 init = self._parse_initializer()
1068 return dict(decl=decl, init=init, bitsize=None)
1070 # ------------------------------------------------------------------
1071 # Structs/unions/enums
1072 # ------------------------------------------------------------------
1073 # BNF: struct_or_union_specifier : struct_or_union ID? '{' struct_declaration_list? '}'
1074 # | struct_or_union ID
1075 def _parse_struct_or_union_specifier(self) -> c_ast.Node:
1076 tok = self._advance()
1077 klass = self._select_struct_union_class(tok.value)
1079 if self._peek_type() in {"ID", "TYPEID"}:
1080 name_tok = self._advance()
1081 if self._peek_type() == "LBRACE":
1082 self._advance()
1083 if self._accept("RBRACE"):
1084 return klass(
1085 name=name_tok.value, decls=[], coord=self._tok_coord(name_tok)
1086 )
1087 decls = self._parse_struct_declaration_list()
1088 self._expect("RBRACE")
1089 return klass(
1090 name=name_tok.value, decls=decls, coord=self._tok_coord(name_tok)
1091 )
1093 return klass(
1094 name=name_tok.value, decls=None, coord=self._tok_coord(name_tok)
1095 )
1097 if self._peek_type() == "LBRACE":
1098 brace_tok = self._advance()
1099 if self._accept("RBRACE"):
1100 return klass(name=None, decls=[], coord=self._tok_coord(brace_tok))
1101 decls = self._parse_struct_declaration_list()
1102 self._expect("RBRACE")
1103 return klass(name=None, decls=decls, coord=self._tok_coord(brace_tok))
1105 self._parse_error("Invalid struct/union declaration", self._tok_coord(tok))
1107 # BNF: struct_declaration_list : struct_declaration+
1108 def _parse_struct_declaration_list(self) -> List[c_ast.Node]:
1109 decls = []
1110 while self._peek_type() not in {None, "RBRACE"}:
1111 items = self._parse_struct_declaration()
1112 if items is None:
1113 continue
1114 decls.extend(items)
1115 return decls
1117 # BNF: struct_declaration : specifier_qualifier_list struct_declarator_list? ';'
1118 # | static_assert
1119 # | pppragma_directive
1120 def _parse_struct_declaration(self) -> Optional[List[c_ast.Node]]:
1121 if self._peek_type() == "SEMI":
1122 self._advance()
1123 return None
1124 if self._peek_type() in {"PPPRAGMA", "_PRAGMA"}:
1125 return [self._parse_pppragma_directive()]
1127 spec = self._parse_specifier_qualifier_list()
1128 assert "typedef" not in spec.get("storage", [])
1130 decls = None
1131 if self._starts_declarator() or self._peek_type() == "COLON":
1132 decls = self._parse_struct_declarator_list()
1133 if decls is not None:
1134 self._expect("SEMI")
1135 return self._build_declarations(spec=spec, decls=decls)
1137 if len(spec["type"]) == 1:
1138 node = spec["type"][0]
1139 if isinstance(node, c_ast.Node):
1140 decl_type = node
1141 else:
1142 decl_type = c_ast.IdentifierType(node)
1143 self._expect("SEMI")
1144 return self._build_declarations(
1145 spec=spec, decls=[dict(decl=decl_type, init=None, bitsize=None)]
1146 )
1148 self._expect("SEMI")
1149 return self._build_declarations(
1150 spec=spec, decls=[dict(decl=None, init=None, bitsize=None)]
1151 )
1153 # BNF: struct_declarator_list : struct_declarator (',' struct_declarator)*
1154 def _parse_struct_declarator_list(self) -> List["_DeclInfo"]:
1155 decls = [self._parse_struct_declarator()]
1156 while self._accept("COMMA"):
1157 decls.append(self._parse_struct_declarator())
1158 return decls
1160 # BNF: struct_declarator : declarator? ':' constant_expression
1161 # | declarator (':' constant_expression)?
1162 def _parse_struct_declarator(self) -> "_DeclInfo":
1163 if self._accept("COLON"):
1164 bitsize = self._parse_constant_expression()
1165 return {
1166 "decl": c_ast.TypeDecl(None, None, None, None),
1167 "init": None,
1168 "bitsize": bitsize,
1169 }
1171 decl = self._parse_declarator()
1172 if self._accept("COLON"):
1173 bitsize = self._parse_constant_expression()
1174 return {"decl": decl, "init": None, "bitsize": bitsize}
1176 return {"decl": decl, "init": None, "bitsize": None}
1178 # BNF: enum_specifier : ENUM ID? '{' enumerator_list? '}'
1179 # | ENUM ID
1180 def _parse_enum_specifier(self) -> c_ast.Node:
1181 tok = self._expect("ENUM")
1182 if self._peek_type() in {"ID", "TYPEID"}:
1183 name_tok = self._advance()
1184 if self._peek_type() == "LBRACE":
1185 self._advance()
1186 enums = self._parse_enumerator_list()
1187 self._expect("RBRACE")
1188 return c_ast.Enum(name_tok.value, enums, self._tok_coord(tok))
1189 return c_ast.Enum(name_tok.value, None, self._tok_coord(tok))
1191 self._expect("LBRACE")
1192 enums = self._parse_enumerator_list()
1193 self._expect("RBRACE")
1194 return c_ast.Enum(None, enums, self._tok_coord(tok))
1196 # BNF: enumerator_list : enumerator (',' enumerator)* ','?
1197 def _parse_enumerator_list(self) -> c_ast.Node:
1198 enum = self._parse_enumerator()
1199 enum_list = c_ast.EnumeratorList([enum], enum.coord)
1200 while self._accept("COMMA"):
1201 if self._peek_type() == "RBRACE":
1202 break
1203 enum = self._parse_enumerator()
1204 enum_list.enumerators.append(enum)
1205 return enum_list
1207 # BNF: enumerator : ID ('=' constant_expression)?
1208 def _parse_enumerator(self) -> c_ast.Node:
1209 name_tok = self._expect("ID")
1210 if self._accept("EQUALS"):
1211 value = self._parse_constant_expression()
1212 else:
1213 value = None
1214 enum = c_ast.Enumerator(name_tok.value, value, self._tok_coord(name_tok))
1215 self._add_identifier(enum.name, enum.coord)
1216 return enum
1218 # ------------------------------------------------------------------
1219 # Declarators
1220 # ------------------------------------------------------------------
1221 # BNF: declarator : pointer? direct_declarator
1222 def _parse_declarator(self) -> c_ast.Node:
1223 decl, _ = self._parse_any_declarator(
1224 allow_abstract=False, typeid_paren_as_abstract=False
1225 )
1226 assert decl is not None
1227 return decl
1229 # BNF: id_declarator : declarator with ID name
1230 def _parse_id_declarator(self) -> c_ast.Node:
1231 return self._parse_declarator_kind(kind="id", allow_paren=True)
1233 # BNF: typeid_declarator : declarator with TYPEID name
1234 def _parse_typeid_declarator(self) -> c_ast.Node:
1235 return self._parse_declarator_kind(kind="typeid", allow_paren=True)
1237 # BNF: typeid_noparen_declarator : declarator without parenthesized name
1238 def _parse_typeid_noparen_declarator(self) -> c_ast.Node:
1239 return self._parse_declarator_kind(kind="typeid", allow_paren=False)
1241 # BNF: declarator_kind : pointer? direct_declarator(kind)
1242 def _parse_declarator_kind(self, kind: str, allow_paren: bool) -> c_ast.Node:
1243 ptr = None
1244 if self._peek_type() == "TIMES":
1245 ptr = self._parse_pointer()
1246 direct = self._parse_direct_declarator(kind, allow_paren=allow_paren)
1247 if ptr is not None:
1248 return self._type_modify_decl(direct, ptr)
1249 return direct
1251 # BNF: direct_declarator : ID | TYPEID | '(' declarator ')'
1252 # | direct_declarator '[' ... ']'
1253 # | direct_declarator '(' ... ')'
1254 def _parse_direct_declarator(
1255 self, kind: str, allow_paren: bool = True
1256 ) -> c_ast.Node:
1257 if allow_paren and self._accept("LPAREN"):
1258 decl = self._parse_declarator_kind(kind, allow_paren=True)
1259 self._expect("RPAREN")
1260 else:
1261 if kind == "id":
1262 name_tok = self._expect("ID")
1263 else:
1264 name_tok = self._expect("TYPEID")
1265 decl = c_ast.TypeDecl(
1266 declname=name_tok.value,
1267 type=None,
1268 quals=None,
1269 align=None,
1270 coord=self._tok_coord(name_tok),
1271 )
1273 return self._parse_decl_suffixes(decl)
1275 def _parse_decl_suffixes(self, decl: c_ast.Node) -> c_ast.Node:
1276 """Parse a chain of array/function suffixes and attach them to decl."""
1277 while True:
1278 if self._peek_type() == "LBRACKET":
1279 decl = self._type_modify_decl(decl, self._parse_array_decl(decl))
1280 continue
1281 if self._peek_type() == "LPAREN":
1282 func = self._parse_function_decl(decl)
1283 decl = self._type_modify_decl(decl, func)
1284 continue
1285 break
1286 return decl
1288 # BNF: array_decl : '[' array_specifiers? assignment_expression? ']'
1289 def _parse_array_decl(self, base_decl: c_ast.Node) -> c_ast.Node:
1290 return self._parse_array_decl_common(base_type=None, coord=base_decl.coord)
1292 def _parse_array_decl_common(
1293 self, base_type: Optional[c_ast.Node], coord: Optional[Coord] = None
1294 ) -> c_ast.Node:
1295 """Parse an array declarator suffix and return an ArrayDecl node.
1297 base_type:
1298 Base declarator node to attach (None for direct-declarator parsing,
1299 TypeDecl for abstract declarators).
1301 coord:
1302 Coordinate to use for the ArrayDecl. If None, uses the '[' token.
1303 """
1304 lbrack_tok = self._expect("LBRACKET")
1305 if coord is None:
1306 coord = self._tok_coord(lbrack_tok)
1308 def make_array_decl(dim, dim_quals):
1309 return c_ast.ArrayDecl(
1310 type=base_type, dim=dim, dim_quals=dim_quals, coord=coord
1311 )
1313 if self._accept("STATIC"):
1314 dim_quals = ["static"] + (self._parse_type_qualifier_list() or [])
1315 dim = self._parse_assignment_expression()
1316 self._expect("RBRACKET")
1317 return make_array_decl(dim, dim_quals)
1319 if self._peek_type() in _TYPE_QUALIFIER:
1320 dim_quals = self._parse_type_qualifier_list() or []
1321 if self._accept("STATIC"):
1322 dim_quals = dim_quals + ["static"]
1323 dim = self._parse_assignment_expression()
1324 self._expect("RBRACKET")
1325 return make_array_decl(dim, dim_quals)
1326 times_tok = self._accept("TIMES")
1327 if times_tok:
1328 self._expect("RBRACKET")
1329 dim = c_ast.ID(times_tok.value, self._tok_coord(times_tok))
1330 return make_array_decl(dim, dim_quals)
1331 dim = None
1332 if self._starts_expression():
1333 dim = self._parse_assignment_expression()
1334 self._expect("RBRACKET")
1335 return make_array_decl(dim, dim_quals)
1337 times_tok = self._accept("TIMES")
1338 if times_tok:
1339 self._expect("RBRACKET")
1340 dim = c_ast.ID(times_tok.value, self._tok_coord(times_tok))
1341 return make_array_decl(dim, [])
1343 dim = None
1344 if self._starts_expression():
1345 dim = self._parse_assignment_expression()
1346 self._expect("RBRACKET")
1347 return make_array_decl(dim, [])
1349 # BNF: function_decl : '(' parameter_type_list_opt | identifier_list_opt ')'
1350 def _parse_function_decl(self, base_decl: c_ast.Node) -> c_ast.Node:
1351 self._expect("LPAREN")
1352 if self._accept("RPAREN"):
1353 args = None
1354 else:
1355 args = (
1356 self._parse_parameter_type_list()
1357 if self._starts_declaration()
1358 else self._parse_identifier_list_opt()
1359 )
1360 self._expect("RPAREN")
1362 func = c_ast.FuncDecl(args=args, type=None, coord=base_decl.coord)
1364 if self._peek_type() == "LBRACE":
1365 if func.args is not None:
1366 for param in func.args.params:
1367 if isinstance(param, c_ast.EllipsisParam):
1368 break
1369 name = getattr(param, "name", None)
1370 if name:
1371 self._add_identifier(name, param.coord)
1373 return func
1375 # BNF: pointer : '*' type_qualifier_list? pointer?
1376 def _parse_pointer(self) -> Optional[c_ast.Node]:
1377 stars = []
1378 times_tok = self._accept("TIMES")
1379 while times_tok:
1380 quals = self._parse_type_qualifier_list() or []
1381 stars.append((quals, self._tok_coord(times_tok)))
1382 times_tok = self._accept("TIMES")
1384 if not stars:
1385 return None
1387 ptr = None
1388 for quals, coord in stars:
1389 ptr = c_ast.PtrDecl(quals=quals, type=ptr, coord=coord)
1390 return ptr
1392 # BNF: parameter_type_list : parameter_list (',' ELLIPSIS)?
1393 def _parse_parameter_type_list(self) -> c_ast.ParamList:
1394 params = self._parse_parameter_list()
1395 if self._peek_type() == "COMMA" and self._peek_type(2) == "ELLIPSIS":
1396 self._advance()
1397 ell_tok = self._advance()
1398 params.params.append(c_ast.EllipsisParam(self._tok_coord(ell_tok)))
1399 return params
1401 # BNF: parameter_list : parameter_declaration (',' parameter_declaration)*
1402 def _parse_parameter_list(self) -> c_ast.ParamList:
1403 first = self._parse_parameter_declaration()
1404 params = c_ast.ParamList([first], first.coord)
1405 while self._peek_type() == "COMMA" and self._peek_type(2) != "ELLIPSIS":
1406 self._advance()
1407 params.params.append(self._parse_parameter_declaration())
1408 return params
1410 # BNF: parameter_declaration : declaration_specifiers declarator?
1411 # | declaration_specifiers abstract_declarator_opt
1412 def _parse_parameter_declaration(self) -> c_ast.Node:
1413 spec, _, spec_coord = self._parse_declaration_specifiers(allow_no_type=True)
1415 if not spec["type"]:
1416 spec["type"] = [c_ast.IdentifierType(["int"], coord=spec_coord)]
1418 if self._starts_declarator():
1419 decl, is_named = self._parse_any_declarator(
1420 allow_abstract=True, typeid_paren_as_abstract=True
1421 )
1422 if is_named:
1423 return self._build_declarations(
1424 spec=spec, decls=[dict(decl=decl, init=None, bitsize=None)]
1425 )[0]
1426 return self._build_parameter_declaration(spec, decl, spec_coord)
1428 decl = self._parse_abstract_declarator_opt()
1429 return self._build_parameter_declaration(spec, decl, spec_coord)
1431 def _build_parameter_declaration(
1432 self, spec: "_DeclSpec", decl: Optional[c_ast.Node], spec_coord: Optional[Coord]
1433 ) -> c_ast.Node:
1434 if (
1435 len(spec["type"]) > 1
1436 and len(spec["type"][-1].names) == 1
1437 and self._is_type_in_scope(spec["type"][-1].names[0])
1438 ):
1439 return self._build_declarations(
1440 spec=spec, decls=[dict(decl=decl, init=None, bitsize=None)]
1441 )[0]
1443 decl = c_ast.Typename(
1444 name="",
1445 quals=spec["qual"],
1446 align=None,
1447 type=decl or c_ast.TypeDecl(None, None, None, None),
1448 coord=spec_coord,
1449 )
1450 return self._fix_decl_name_type(decl, spec["type"])
1452 # BNF: identifier_list_opt : identifier_list | empty
1453 def _parse_identifier_list_opt(self) -> Optional[c_ast.Node]:
1454 if self._peek_type() == "RPAREN":
1455 return None
1456 return self._parse_identifier_list()
1458 # BNF: identifier_list : identifier (',' identifier)*
1459 def _parse_identifier_list(self) -> c_ast.Node:
1460 first = self._parse_identifier()
1461 params = c_ast.ParamList([first], first.coord)
1462 while self._accept("COMMA"):
1463 params.params.append(self._parse_identifier())
1464 return params
1466 # ------------------------------------------------------------------
1467 # Abstract declarators
1468 # ------------------------------------------------------------------
1469 # BNF: type_name : specifier_qualifier_list abstract_declarator_opt
1470 def _parse_type_name(self) -> c_ast.Typename:
1471 spec = self._parse_specifier_qualifier_list()
1472 decl = self._parse_abstract_declarator_opt()
1474 coord = None
1475 if decl is not None:
1476 coord = decl.coord
1477 elif spec["type"]:
1478 coord = spec["type"][0].coord
1480 typename = c_ast.Typename(
1481 name="",
1482 quals=spec["qual"][:],
1483 align=None,
1484 type=decl or c_ast.TypeDecl(None, None, None, None),
1485 coord=coord,
1486 )
1487 return cast(c_ast.Typename, self._fix_decl_name_type(typename, spec["type"]))
1489 # BNF: abstract_declarator_opt : pointer? direct_abstract_declarator?
1490 def _parse_abstract_declarator_opt(self) -> Optional[c_ast.Node]:
1491 if self._peek_type() == "TIMES":
1492 ptr = self._parse_pointer()
1493 if self._starts_direct_abstract_declarator():
1494 decl = self._parse_direct_abstract_declarator()
1495 else:
1496 decl = c_ast.TypeDecl(None, None, None, None)
1497 assert ptr is not None
1498 return self._type_modify_decl(decl, ptr)
1500 if self._starts_direct_abstract_declarator():
1501 return self._parse_direct_abstract_declarator()
1503 return None
1505 # BNF: direct_abstract_declarator : '(' parameter_type_list_opt ')'
1506 # | '(' abstract_declarator ')'
1507 # | '[' ... ']'
1508 def _parse_direct_abstract_declarator(self) -> c_ast.Node:
1509 lparen_tok = self._accept("LPAREN")
1510 if lparen_tok:
1511 if self._starts_declaration() or self._peek_type() == "RPAREN":
1512 params = self._parse_parameter_type_list_opt()
1513 self._expect("RPAREN")
1514 decl = c_ast.FuncDecl(
1515 args=params,
1516 type=c_ast.TypeDecl(None, None, None, None),
1517 coord=self._tok_coord(lparen_tok),
1518 )
1519 else:
1520 decl = self._parse_abstract_declarator_opt()
1521 self._expect("RPAREN")
1522 assert decl is not None
1523 elif self._peek_type() == "LBRACKET":
1524 decl = self._parse_abstract_array_base()
1525 else:
1526 self._parse_error("Invalid abstract declarator", self.clex.filename)
1528 return self._parse_decl_suffixes(decl)
1530 # BNF: parameter_type_list_opt : parameter_type_list | empty
1531 def _parse_parameter_type_list_opt(self) -> Optional[c_ast.ParamList]:
1532 if self._peek_type() == "RPAREN":
1533 return None
1534 return self._parse_parameter_type_list()
1536 # BNF: abstract_array_base : '[' array_specifiers? assignment_expression? ']'
1537 def _parse_abstract_array_base(self) -> c_ast.Node:
1538 return self._parse_array_decl_common(
1539 base_type=c_ast.TypeDecl(None, None, None, None), coord=None
1540 )
1542 # ------------------------------------------------------------------
1543 # Statements
1544 # ------------------------------------------------------------------
1545 # BNF: statement : labeled_statement | compound_statement
1546 # | selection_statement | iteration_statement
1547 # | jump_statement | expression_statement
1548 # | static_assert | pppragma_directive
1549 def _parse_statement(self) -> c_ast.Node | List[c_ast.Node]:
1550 tok_type = self._peek_type()
1551 match tok_type:
1552 case "CASE" | "DEFAULT":
1553 return self._parse_labeled_statement()
1554 case "ID" if self._peek_type(2) == "COLON":
1555 return self._parse_labeled_statement()
1556 case "LBRACE":
1557 return self._parse_compound_statement()
1558 case "IF" | "SWITCH":
1559 return self._parse_selection_statement()
1560 case "WHILE" | "DO" | "FOR":
1561 return self._parse_iteration_statement()
1562 case "GOTO" | "BREAK" | "CONTINUE" | "RETURN":
1563 return self._parse_jump_statement()
1564 case "PPPRAGMA" | "_PRAGMA":
1565 return self._parse_pppragma_directive()
1566 case "_STATIC_ASSERT":
1567 return self._parse_static_assert()
1568 case _:
1569 return self._parse_expression_statement()
1571 # BNF: pragmacomp_or_statement : pppragma_directive* statement
1572 def _parse_pragmacomp_or_statement(self) -> c_ast.Node | List[c_ast.Node]:
1573 if self._peek_type() in {"PPPRAGMA", "_PRAGMA"}:
1574 pragmas = self._parse_pppragma_directive_list()
1575 stmt = self._parse_statement()
1576 return c_ast.Compound(block_items=pragmas + [stmt], coord=pragmas[0].coord)
1577 return self._parse_statement()
1579 # BNF: block_item : declaration | statement
1580 def _parse_block_item(self) -> c_ast.Node | List[c_ast.Node]:
1581 if self._starts_declaration():
1582 return self._parse_declaration()
1583 return self._parse_statement()
1585 # BNF: block_item_list : block_item+
1586 def _parse_block_item_list(self) -> List[c_ast.Node]:
1587 items = []
1588 while self._peek_type() not in {"RBRACE", None}:
1589 item = self._parse_block_item()
1590 if isinstance(item, list):
1591 if item == [None]:
1592 continue
1593 items.extend(item)
1594 else:
1595 items.append(item)
1596 return items
1598 # BNF: compound_statement : '{' block_item_list? '}'
1599 def _parse_compound_statement(self) -> c_ast.Node:
1600 lbrace_tok = self._expect("LBRACE")
1601 if self._accept("RBRACE"):
1602 return c_ast.Compound(block_items=None, coord=self._tok_coord(lbrace_tok))
1603 block_items = self._parse_block_item_list()
1604 self._expect("RBRACE")
1605 return c_ast.Compound(
1606 block_items=block_items, coord=self._tok_coord(lbrace_tok)
1607 )
1609 # BNF: labeled_statement : ID ':' statement
1610 # | CASE constant_expression ':' statement
1611 # | DEFAULT ':' statement
1612 def _parse_labeled_statement(self) -> c_ast.Node:
1613 tok_type = self._peek_type()
1614 match tok_type:
1615 case "ID":
1616 name_tok = self._advance()
1617 self._expect("COLON")
1618 if self._starts_statement():
1619 stmt = self._parse_pragmacomp_or_statement()
1620 else:
1621 stmt = c_ast.EmptyStatement(self._tok_coord(name_tok))
1622 return c_ast.Label(name_tok.value, stmt, self._tok_coord(name_tok))
1623 case "CASE":
1624 case_tok = self._advance()
1625 expr = self._parse_constant_expression()
1626 self._expect("COLON")
1627 if self._starts_statement():
1628 stmt = self._parse_pragmacomp_or_statement()
1629 else:
1630 stmt = c_ast.EmptyStatement(self._tok_coord(case_tok))
1631 return c_ast.Case(expr, [stmt], self._tok_coord(case_tok))
1632 case "DEFAULT":
1633 def_tok = self._advance()
1634 self._expect("COLON")
1635 if self._starts_statement():
1636 stmt = self._parse_pragmacomp_or_statement()
1637 else:
1638 stmt = c_ast.EmptyStatement(self._tok_coord(def_tok))
1639 return c_ast.Default([stmt], self._tok_coord(def_tok))
1640 case _:
1641 self._parse_error("Invalid labeled statement", self.clex.filename)
1643 # BNF: selection_statement : IF '(' expression ')' statement (ELSE statement)?
1644 # | SWITCH '(' expression ')' statement
1645 def _parse_selection_statement(self) -> c_ast.Node:
1646 tok = self._advance()
1647 match tok.type:
1648 case "IF":
1649 self._expect("LPAREN")
1650 cond = self._parse_expression()
1651 self._expect("RPAREN")
1652 then_stmt = self._parse_pragmacomp_or_statement()
1653 if self._accept("ELSE"):
1654 else_stmt = self._parse_pragmacomp_or_statement()
1655 return c_ast.If(cond, then_stmt, else_stmt, self._tok_coord(tok))
1656 return c_ast.If(cond, then_stmt, None, self._tok_coord(tok))
1657 case "SWITCH":
1658 self._expect("LPAREN")
1659 expr = self._parse_expression()
1660 self._expect("RPAREN")
1661 stmt = self._parse_pragmacomp_or_statement()
1662 return fix_switch_cases(c_ast.Switch(expr, stmt, self._tok_coord(tok)))
1663 case _:
1664 self._parse_error("Invalid selection statement", self._tok_coord(tok))
1666 # BNF: iteration_statement : WHILE '(' expression ')' statement
1667 # | DO statement WHILE '(' expression ')' ';'
1668 # | FOR '(' (declaration | expression_opt) ';'
1669 # expression_opt ';' expression_opt ')' statement
1670 def _parse_iteration_statement(self) -> c_ast.Node:
1671 tok = self._advance()
1672 match tok.type:
1673 case "WHILE":
1674 self._expect("LPAREN")
1675 cond = self._parse_expression()
1676 self._expect("RPAREN")
1677 stmt = self._parse_pragmacomp_or_statement()
1678 return c_ast.While(cond, stmt, self._tok_coord(tok))
1679 case "DO":
1680 stmt = self._parse_pragmacomp_or_statement()
1681 self._expect("WHILE")
1682 self._expect("LPAREN")
1683 cond = self._parse_expression()
1684 self._expect("RPAREN")
1685 self._expect("SEMI")
1686 return c_ast.DoWhile(cond, stmt, self._tok_coord(tok))
1687 case "FOR":
1688 self._expect("LPAREN")
1689 if self._starts_declaration():
1690 decls = self._parse_declaration()
1691 init = c_ast.DeclList(decls, self._tok_coord(tok))
1692 cond = self._parse_expression_opt()
1693 self._expect("SEMI")
1694 next_expr = self._parse_expression_opt()
1695 self._expect("RPAREN")
1696 stmt = self._parse_pragmacomp_or_statement()
1697 return c_ast.For(init, cond, next_expr, stmt, self._tok_coord(tok))
1699 init = self._parse_expression_opt()
1700 self._expect("SEMI")
1701 cond = self._parse_expression_opt()
1702 self._expect("SEMI")
1703 next_expr = self._parse_expression_opt()
1704 self._expect("RPAREN")
1705 stmt = self._parse_pragmacomp_or_statement()
1706 return c_ast.For(init, cond, next_expr, stmt, self._tok_coord(tok))
1707 case _:
1708 self._parse_error("Invalid iteration statement", self._tok_coord(tok))
1710 # BNF: jump_statement : GOTO ID ';' | BREAK ';' | CONTINUE ';'
1711 # | RETURN expression? ';'
1712 def _parse_jump_statement(self) -> c_ast.Node:
1713 tok = self._advance()
1714 match tok.type:
1715 case "GOTO":
1716 name_tok = self._expect("ID")
1717 self._expect("SEMI")
1718 return c_ast.Goto(name_tok.value, self._tok_coord(tok))
1719 case "BREAK":
1720 self._expect("SEMI")
1721 return c_ast.Break(self._tok_coord(tok))
1722 case "CONTINUE":
1723 self._expect("SEMI")
1724 return c_ast.Continue(self._tok_coord(tok))
1725 case "RETURN":
1726 if self._accept("SEMI"):
1727 return c_ast.Return(None, self._tok_coord(tok))
1728 expr = self._parse_expression()
1729 self._expect("SEMI")
1730 return c_ast.Return(expr, self._tok_coord(tok))
1731 case _:
1732 self._parse_error("Invalid jump statement", self._tok_coord(tok))
1734 # BNF: expression_statement : expression_opt ';'
1735 def _parse_expression_statement(self) -> c_ast.Node:
1736 expr = self._parse_expression_opt()
1737 semi_tok = self._expect("SEMI")
1738 if expr is None:
1739 return c_ast.EmptyStatement(self._tok_coord(semi_tok))
1740 return expr
1742 # ------------------------------------------------------------------
1743 # Expressions
1744 # ------------------------------------------------------------------
1745 # BNF: expression_opt : expression | empty
1746 def _parse_expression_opt(self) -> Optional[c_ast.Node]:
1747 if self._starts_expression():
1748 return self._parse_expression()
1749 return None
1751 # BNF: expression : assignment_expression (',' assignment_expression)*
1752 def _parse_expression(self) -> c_ast.Node:
1753 expr = self._parse_assignment_expression()
1754 if not self._accept("COMMA"):
1755 return expr
1756 exprs = [expr, self._parse_assignment_expression()]
1757 while self._accept("COMMA"):
1758 exprs.append(self._parse_assignment_expression())
1759 return c_ast.ExprList(exprs, expr.coord)
1761 # BNF: assignment_expression : conditional_expression
1762 # | unary_expression assignment_op assignment_expression
1763 def _parse_assignment_expression(self) -> c_ast.Node:
1764 if self._peek_type() == "LPAREN" and self._peek_type(2) == "LBRACE":
1765 self._advance()
1766 comp = self._parse_compound_statement()
1767 self._expect("RPAREN")
1768 return comp
1770 expr = self._parse_conditional_expression()
1771 if self._is_assignment_op():
1772 op = self._advance().value
1773 rhs = self._parse_assignment_expression()
1774 return c_ast.Assignment(op, expr, rhs, expr.coord)
1775 return expr
1777 # BNF: conditional_expression : binary_expression
1778 # | binary_expression '?' expression ':' conditional_expression
1779 def _parse_conditional_expression(self) -> c_ast.Node:
1780 expr = self._parse_binary_expression()
1781 if self._accept("CONDOP"):
1782 iftrue = self._parse_expression()
1783 self._expect("COLON")
1784 iffalse = self._parse_conditional_expression()
1785 return c_ast.TernaryOp(expr, iftrue, iffalse, expr.coord)
1786 return expr
1788 # BNF: binary_expression : cast_expression (binary_op cast_expression)*
1789 def _parse_binary_expression(
1790 self, min_prec: int = 0, lhs: Optional[c_ast.Node] = None
1791 ) -> c_ast.Node:
1792 if lhs is None:
1793 lhs = self._parse_cast_expression()
1795 while True:
1796 tok = self._peek()
1797 if tok is None or tok.type not in _BINARY_PRECEDENCE:
1798 break
1799 prec = _BINARY_PRECEDENCE[tok.type]
1800 if prec < min_prec:
1801 break
1803 op = tok.value
1804 self._advance()
1805 rhs = self._parse_cast_expression()
1807 while True:
1808 next_tok = self._peek()
1809 if next_tok is None or next_tok.type not in _BINARY_PRECEDENCE:
1810 break
1811 next_prec = _BINARY_PRECEDENCE[next_tok.type]
1812 if next_prec > prec:
1813 rhs = self._parse_binary_expression(next_prec, rhs)
1814 else:
1815 break
1817 lhs = c_ast.BinaryOp(op, lhs, rhs, lhs.coord)
1819 return lhs
1821 # BNF: cast_expression : '(' type_name ')' cast_expression
1822 # | unary_expression
1823 def _parse_cast_expression(self) -> c_ast.Node:
1824 result = self._try_parse_paren_type_name()
1825 if result is not None:
1826 typ, mark, lparen_tok = result
1827 if self._peek_type() == "LBRACE":
1828 # (type){...} is a compound literal, not a cast. Examples:
1829 # (int){1} -> compound literal, handled in postfix
1830 # (int) x -> cast, handled below
1831 self._reset(mark)
1832 else:
1833 expr = self._parse_cast_expression()
1834 return c_ast.Cast(typ, expr, self._tok_coord(lparen_tok))
1835 return self._parse_unary_expression()
1837 # BNF: unary_expression : postfix_expression
1838 # | '++' unary_expression
1839 # | '--' unary_expression
1840 # | unary_op cast_expression
1841 # | 'sizeof' unary_expression
1842 # | 'sizeof' '(' type_name ')'
1843 # | '_Alignof' '(' type_name ')'
1844 def _parse_unary_expression(self) -> c_ast.Node:
1845 tok_type = self._peek_type()
1846 if tok_type in {"PLUSPLUS", "MINUSMINUS"}:
1847 tok = self._advance()
1848 expr = self._parse_unary_expression()
1849 return c_ast.UnaryOp(tok.value, expr, expr.coord)
1851 if tok_type in {"AND", "TIMES", "PLUS", "MINUS", "NOT", "LNOT"}:
1852 tok = self._advance()
1853 expr = self._parse_cast_expression()
1854 return c_ast.UnaryOp(tok.value, expr, expr.coord)
1856 if tok_type == "SIZEOF":
1857 tok = self._advance()
1858 result = self._try_parse_paren_type_name()
1859 if result is not None:
1860 typ, _, _ = result
1861 return c_ast.UnaryOp(tok.value, typ, self._tok_coord(tok))
1862 expr = self._parse_unary_expression()
1863 return c_ast.UnaryOp(tok.value, expr, self._tok_coord(tok))
1865 if tok_type == "_ALIGNOF":
1866 tok = self._advance()
1867 self._expect("LPAREN")
1868 typ = self._parse_type_name()
1869 self._expect("RPAREN")
1870 return c_ast.UnaryOp(tok.value, typ, self._tok_coord(tok))
1872 return self._parse_postfix_expression()
1874 # BNF: postfix_expression : primary_expression postfix_suffix*
1875 # | '(' type_name ')' '{' initializer_list ','? '}'
1876 def _parse_postfix_expression(self) -> c_ast.Node:
1877 result = self._try_parse_paren_type_name()
1878 if result is not None:
1879 typ, mark, _ = result
1880 # Disambiguate between casts and compound literals:
1881 # (int) x -> cast
1882 # (int) {1} -> compound literal
1883 if self._accept("LBRACE"):
1884 init = self._parse_initializer_list()
1885 self._accept("COMMA")
1886 self._expect("RBRACE")
1887 return c_ast.CompoundLiteral(typ, init)
1888 else:
1889 self._reset(mark)
1891 expr = self._parse_primary_expression()
1892 while True:
1893 if self._accept("LBRACKET"):
1894 sub = self._parse_expression()
1895 self._expect("RBRACKET")
1896 expr = c_ast.ArrayRef(expr, sub, expr.coord)
1897 continue
1898 if self._accept("LPAREN"):
1899 if self._peek_type() == "RPAREN":
1900 self._advance()
1901 args = None
1902 else:
1903 args = self._parse_argument_expression_list()
1904 self._expect("RPAREN")
1905 expr = c_ast.FuncCall(expr, args, expr.coord)
1906 continue
1907 if self._peek_type() in {"PERIOD", "ARROW"}:
1908 op_tok = self._advance()
1909 name_tok = self._advance()
1910 if name_tok.type not in {"ID", "TYPEID"}:
1911 self._parse_error(
1912 "Invalid struct reference", self._tok_coord(name_tok)
1913 )
1914 field = c_ast.ID(name_tok.value, self._tok_coord(name_tok))
1915 expr = c_ast.StructRef(expr, op_tok.value, field, expr.coord)
1916 continue
1917 if self._peek_type() in {"PLUSPLUS", "MINUSMINUS"}:
1918 tok = self._advance()
1919 expr = c_ast.UnaryOp("p" + tok.value, expr, expr.coord)
1920 continue
1921 break
1922 return expr
1924 # BNF: primary_expression : ID | constant | string_literal
1925 # | '(' expression ')' | offsetof
1926 def _parse_primary_expression(self) -> c_ast.Node:
1927 tok_type = self._peek_type()
1928 if tok_type == "ID":
1929 return self._parse_identifier()
1930 if (
1931 tok_type in _INT_CONST
1932 or tok_type in _FLOAT_CONST
1933 or tok_type in _CHAR_CONST
1934 ):
1935 return self._parse_constant()
1936 if tok_type in _STRING_LITERAL:
1937 return self._parse_unified_string_literal()
1938 if tok_type in _WSTR_LITERAL:
1939 return self._parse_unified_wstring_literal()
1940 if tok_type == "LPAREN":
1941 self._advance()
1942 expr = self._parse_expression()
1943 self._expect("RPAREN")
1944 return expr
1945 if tok_type == "OFFSETOF":
1946 off_tok = self._advance()
1947 self._expect("LPAREN")
1948 typ = self._parse_type_name()
1949 self._expect("COMMA")
1950 designator = self._parse_offsetof_member_designator()
1951 self._expect("RPAREN")
1952 coord = self._tok_coord(off_tok)
1953 return c_ast.FuncCall(
1954 c_ast.ID(off_tok.value, coord),
1955 c_ast.ExprList([typ, designator], coord),
1956 coord,
1957 )
1959 self._parse_error("Invalid expression", self.clex.filename)
1961 # BNF: offsetof_member_designator : identifier_or_typeid
1962 # ('.' identifier_or_typeid | '[' expression ']')*
1963 def _parse_offsetof_member_designator(self) -> c_ast.Node:
1964 node = self._parse_identifier_or_typeid()
1965 while True:
1966 if self._accept("PERIOD"):
1967 field = self._parse_identifier_or_typeid()
1968 node = c_ast.StructRef(node, ".", field, node.coord)
1969 continue
1970 if self._accept("LBRACKET"):
1971 expr = self._parse_expression()
1972 self._expect("RBRACKET")
1973 node = c_ast.ArrayRef(node, expr, node.coord)
1974 continue
1975 break
1976 return node
1978 # BNF: argument_expression_list : assignment_expression (',' assignment_expression)*
1979 def _parse_argument_expression_list(self) -> c_ast.Node:
1980 expr = self._parse_assignment_expression()
1981 exprs = [expr]
1982 while self._accept("COMMA"):
1983 exprs.append(self._parse_assignment_expression())
1984 return c_ast.ExprList(exprs, expr.coord)
1986 # BNF: constant_expression : conditional_expression
1987 def _parse_constant_expression(self) -> c_ast.Node:
1988 return self._parse_conditional_expression()
1990 # ------------------------------------------------------------------
1991 # Terminals
1992 # ------------------------------------------------------------------
1993 # BNF: identifier : ID
1994 def _parse_identifier(self) -> c_ast.Node:
1995 tok = self._expect("ID")
1996 return c_ast.ID(tok.value, self._tok_coord(tok))
1998 # BNF: identifier_or_typeid : ID | TYPEID
1999 def _parse_identifier_or_typeid(self) -> c_ast.Node:
2000 tok = self._advance()
2001 if tok.type not in {"ID", "TYPEID"}:
2002 self._parse_error("Expected identifier", self._tok_coord(tok))
2003 return c_ast.ID(tok.value, self._tok_coord(tok))
2005 # BNF: constant : INT_CONST | FLOAT_CONST | CHAR_CONST
2006 def _parse_constant(self) -> c_ast.Node:
2007 tok = self._advance()
2008 if tok.type in _INT_CONST:
2009 u_count = 0
2010 l_count = 0
2011 for ch in tok.value[-3:]:
2012 if ch in ("l", "L"):
2013 l_count += 1
2014 elif ch in ("u", "U"):
2015 u_count += 1
2016 if u_count > 1:
2017 raise ValueError("Constant cannot have more than one u/U suffix.")
2018 if l_count > 2:
2019 raise ValueError("Constant cannot have more than two l/L suffix.")
2020 prefix = "unsigned " * u_count + "long " * l_count
2021 return c_ast.Constant(prefix + "int", tok.value, self._tok_coord(tok))
2023 if tok.type in _FLOAT_CONST:
2024 if tok.value[-1] in ("f", "F"):
2025 t = "float"
2026 elif tok.value[-1] in ("l", "L"):
2027 t = "long double"
2028 else:
2029 t = "double"
2030 return c_ast.Constant(t, tok.value, self._tok_coord(tok))
2032 if tok.type in _CHAR_CONST:
2033 return c_ast.Constant("char", tok.value, self._tok_coord(tok))
2035 self._parse_error("Invalid constant", self._tok_coord(tok))
2037 # BNF: unified_string_literal : STRING_LITERAL+
2038 def _parse_unified_string_literal(self) -> c_ast.Node:
2039 tok = self._expect("STRING_LITERAL")
2040 node = c_ast.Constant("string", tok.value, self._tok_coord(tok))
2041 while self._peek_type() == "STRING_LITERAL":
2042 tok2 = self._advance()
2043 node.value = node.value[:-1] + tok2.value[1:]
2044 return node
2046 # BNF: unified_wstring_literal : WSTRING_LITERAL+
2047 def _parse_unified_wstring_literal(self) -> c_ast.Node:
2048 tok = self._advance()
2049 if tok.type not in _WSTR_LITERAL:
2050 self._parse_error("Invalid string literal", self._tok_coord(tok))
2051 node = c_ast.Constant("string", tok.value, self._tok_coord(tok))
2052 while self._peek_type() in _WSTR_LITERAL:
2053 tok2 = self._advance()
2054 node.value = node.value.rstrip()[:-1] + tok2.value[2:]
2055 return node
2057 # ------------------------------------------------------------------
2058 # Initializers
2059 # ------------------------------------------------------------------
2060 # BNF: initializer : assignment_expression
2061 # | '{' initializer_list ','? '}'
2062 # | '{' '}'
2063 def _parse_initializer(self) -> c_ast.Node:
2064 lbrace_tok = self._accept("LBRACE")
2065 if lbrace_tok:
2066 if self._accept("RBRACE"):
2067 return c_ast.InitList([], self._tok_coord(lbrace_tok))
2068 init_list = self._parse_initializer_list()
2069 self._accept("COMMA")
2070 self._expect("RBRACE")
2071 return init_list
2073 return self._parse_assignment_expression()
2075 # BNF: initializer_list : initializer_item (',' initializer_item)* ','?
2076 def _parse_initializer_list(self) -> c_ast.Node:
2077 items = [self._parse_initializer_item()]
2078 while self._accept("COMMA"):
2079 if self._peek_type() == "RBRACE":
2080 break
2081 items.append(self._parse_initializer_item())
2082 return c_ast.InitList(items, items[0].coord)
2084 # BNF: initializer_item : designation? initializer
2085 def _parse_initializer_item(self) -> c_ast.Node:
2086 designation = None
2087 if self._peek_type() in {"LBRACKET", "PERIOD"}:
2088 designation = self._parse_designation()
2089 init = self._parse_initializer()
2090 if designation is not None:
2091 return c_ast.NamedInitializer(designation, init)
2092 return init
2094 # BNF: designation : designator_list '='
2095 def _parse_designation(self) -> List[c_ast.Node]:
2096 designators = self._parse_designator_list()
2097 self._expect("EQUALS")
2098 return designators
2100 # BNF: designator_list : designator+
2101 def _parse_designator_list(self) -> List[c_ast.Node]:
2102 designators = []
2103 while self._peek_type() in {"LBRACKET", "PERIOD"}:
2104 designators.append(self._parse_designator())
2105 return designators
2107 # BNF: designator : '[' constant_expression ']'
2108 # | '.' identifier_or_typeid
2109 def _parse_designator(self) -> c_ast.Node:
2110 if self._accept("LBRACKET"):
2111 expr = self._parse_constant_expression()
2112 self._expect("RBRACKET")
2113 return expr
2114 if self._accept("PERIOD"):
2115 return self._parse_identifier_or_typeid()
2116 self._parse_error("Invalid designator", self.clex.filename)
2118 # ------------------------------------------------------------------
2119 # Preprocessor-like directives
2120 # ------------------------------------------------------------------
2121 # BNF: pp_directive : '#' ... (unsupported)
2122 def _parse_pp_directive(self) -> NoReturn:
2123 tok = self._expect("PPHASH")
2124 self._parse_error("Directives not supported yet", self._tok_coord(tok))
2126 # BNF: pppragma_directive : PPPRAGMA PPPRAGMASTR?
2127 # | _PRAGMA '(' string_literal ')'
2128 def _parse_pppragma_directive(self) -> c_ast.Node:
2129 if self._peek_type() == "PPPRAGMA":
2130 tok = self._advance()
2131 if self._peek_type() == "PPPRAGMASTR":
2132 str_tok = self._advance()
2133 return c_ast.Pragma(str_tok.value, self._tok_coord(str_tok))
2134 return c_ast.Pragma("", self._tok_coord(tok))
2136 if self._peek_type() == "_PRAGMA":
2137 tok = self._advance()
2138 lparen = self._expect("LPAREN")
2139 literal = self._parse_unified_string_literal()
2140 self._expect("RPAREN")
2141 return c_ast.Pragma(literal, self._tok_coord(lparen))
2143 self._parse_error("Invalid pragma", self.clex.filename)
2145 # BNF: pppragma_directive_list : pppragma_directive+
2146 def _parse_pppragma_directive_list(self) -> List[c_ast.Node]:
2147 pragmas = []
2148 while self._peek_type() in {"PPPRAGMA", "_PRAGMA"}:
2149 pragmas.append(self._parse_pppragma_directive())
2150 return pragmas
2152 # BNF: static_assert : _STATIC_ASSERT '(' constant_expression (',' string_literal)? ')'
2153 def _parse_static_assert(self) -> List[c_ast.Node]:
2154 tok = self._expect("_STATIC_ASSERT")
2155 self._expect("LPAREN")
2156 cond = self._parse_constant_expression()
2157 msg = None
2158 if self._accept("COMMA"):
2159 msg = self._parse_unified_string_literal()
2160 self._expect("RPAREN")
2161 return [c_ast.StaticAssert(cond, msg, self._tok_coord(tok))]
2164_ASSIGNMENT_OPS = {
2165 "EQUALS",
2166 "XOREQUAL",
2167 "TIMESEQUAL",
2168 "DIVEQUAL",
2169 "MODEQUAL",
2170 "PLUSEQUAL",
2171 "MINUSEQUAL",
2172 "LSHIFTEQUAL",
2173 "RSHIFTEQUAL",
2174 "ANDEQUAL",
2175 "OREQUAL",
2176}
2178# Precedence of operators (lower number = weather binding)
2179# If this changes, c_generator.CGenerator.precedence_map needs to change as
2180# well
2181_BINARY_PRECEDENCE = {
2182 "LOR": 0,
2183 "LAND": 1,
2184 "OR": 2,
2185 "XOR": 3,
2186 "AND": 4,
2187 "EQ": 5,
2188 "NE": 5,
2189 "GT": 6,
2190 "GE": 6,
2191 "LT": 6,
2192 "LE": 6,
2193 "RSHIFT": 7,
2194 "LSHIFT": 7,
2195 "PLUS": 8,
2196 "MINUS": 8,
2197 "TIMES": 9,
2198 "DIVIDE": 9,
2199 "MOD": 9,
2200}
2202_STORAGE_CLASS = {"AUTO", "REGISTER", "STATIC", "EXTERN", "TYPEDEF", "_THREAD_LOCAL"}
2204_FUNCTION_SPEC = {"INLINE", "_NORETURN"}
2206_TYPE_QUALIFIER = {"CONST", "RESTRICT", "VOLATILE", "_ATOMIC"}
2208_TYPE_SPEC_SIMPLE = {
2209 "VOID",
2210 "_BOOL",
2211 "CHAR",
2212 "SHORT",
2213 "INT",
2214 "LONG",
2215 "FLOAT",
2216 "DOUBLE",
2217 "_COMPLEX",
2218 "SIGNED",
2219 "UNSIGNED",
2220 "__INT128",
2221}
2223_DECL_START = (
2224 _STORAGE_CLASS
2225 | _FUNCTION_SPEC
2226 | _TYPE_QUALIFIER
2227 | _TYPE_SPEC_SIMPLE
2228 | {"TYPEID", "STRUCT", "UNION", "ENUM", "_ALIGNAS", "_ATOMIC"}
2229)
2231_EXPR_START = {
2232 "ID",
2233 "LPAREN",
2234 "PLUSPLUS",
2235 "MINUSMINUS",
2236 "PLUS",
2237 "MINUS",
2238 "TIMES",
2239 "AND",
2240 "NOT",
2241 "LNOT",
2242 "SIZEOF",
2243 "_ALIGNOF",
2244 "OFFSETOF",
2245}
2247_INT_CONST = {
2248 "INT_CONST_DEC",
2249 "INT_CONST_OCT",
2250 "INT_CONST_HEX",
2251 "INT_CONST_BIN",
2252 "INT_CONST_CHAR",
2253}
2255_FLOAT_CONST = {"FLOAT_CONST", "HEX_FLOAT_CONST"}
2257_CHAR_CONST = {
2258 "CHAR_CONST",
2259 "WCHAR_CONST",
2260 "U8CHAR_CONST",
2261 "U16CHAR_CONST",
2262 "U32CHAR_CONST",
2263}
2265_STRING_LITERAL = {"STRING_LITERAL"}
2267_WSTR_LITERAL = {
2268 "WSTRING_LITERAL",
2269 "U8STRING_LITERAL",
2270 "U16STRING_LITERAL",
2271 "U32STRING_LITERAL",
2272}
2274_STARTS_EXPRESSION = (
2275 _EXPR_START
2276 | _INT_CONST
2277 | _FLOAT_CONST
2278 | _CHAR_CONST
2279 | _STRING_LITERAL
2280 | _WSTR_LITERAL
2281)
2283_STARTS_STATEMENT = {
2284 "LBRACE",
2285 "IF",
2286 "SWITCH",
2287 "WHILE",
2288 "DO",
2289 "FOR",
2290 "GOTO",
2291 "BREAK",
2292 "CONTINUE",
2293 "RETURN",
2294 "CASE",
2295 "DEFAULT",
2296 "PPPRAGMA",
2297 "_PRAGMA",
2298 "_STATIC_ASSERT",
2299 "SEMI",
2300}
2303class _TokenStream:
2304 """Wraps a lexer to provide convenient, buffered access to the underlying
2305 token stream. The lexer is expected to be initialized with the input
2306 string already.
2307 """
2309 def __init__(self, lexer: CLexer) -> None:
2310 self._lexer = lexer
2311 self._buffer: List[Optional[_Token]] = []
2312 self._index = 0
2314 def peek(self, k: int = 1) -> Optional[_Token]:
2315 """Peek at the k-th next token in the stream, without consuming it.
2317 Examples:
2318 k=1 returns the immediate next token.
2319 k=2 returns the token after that.
2320 """
2321 if k <= 0:
2322 return None
2323 self._fill(k)
2324 return self._buffer[self._index + k - 1]
2326 def next(self) -> Optional[_Token]:
2327 """Consume a single token and return it."""
2328 self._fill(1)
2329 tok = self._buffer[self._index]
2330 self._index += 1
2331 return tok
2333 # The 'mark' and 'reset' methods are useful for speculative parsing with
2334 # backtracking; when the parser needs to examine a sequence of tokens
2335 # and potentially decide to try a different path on the same sequence, it
2336 # can call 'mark' to obtain the current token position, and if the first
2337 # path fails restore the position with `reset(pos)`.
2338 def mark(self) -> int:
2339 return self._index
2341 def reset(self, mark: int) -> None:
2342 self._index = mark
2344 def _fill(self, n: int) -> None:
2345 while len(self._buffer) < self._index + n:
2346 tok = self._lexer.token()
2347 self._buffer.append(tok)
2348 if tok is None:
2349 break
2352# Declaration specifiers are represented by a dictionary with entries:
2353# - qual: a list of type qualifiers
2354# - storage: a list of storage class specifiers
2355# - type: a list of type specifiers
2356# - function: a list of function specifiers
2357# - alignment: a list of alignment specifiers
2358class _DeclSpec(TypedDict):
2359 qual: List[Any]
2360 storage: List[Any]
2361 type: List[Any]
2362 function: List[Any]
2363 alignment: List[Any]
2366_DeclSpecKind = Literal["qual", "storage", "type", "function", "alignment"]
2369class _DeclInfo(TypedDict):
2370 # Declarator payloads used by declaration/initializer parsing:
2371 # - decl: the declarator node (may be None for abstract/implicit cases)
2372 # - init: optional initializer expression
2373 # - bitsize: optional bit-field width expression (for struct declarators)
2374 decl: Optional[c_ast.Node]
2375 init: Optional[c_ast.Node]
2376 bitsize: Optional[c_ast.Node]