Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pycparser/c_parser.py: 42%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# ------------------------------------------------------------------------------
2# pycparser: c_parser.py
3#
4# Recursive-descent parser for the C language.
5#
6# Eli Bendersky [https://eli.thegreenplace.net/]
7# License: BSD
8# ------------------------------------------------------------------------------
9from dataclasses import dataclass
10from typing import (
11 Any,
12 Dict,
13 List,
14 Literal,
15 NoReturn,
16 Optional,
17 Tuple,
18 TypedDict,
19 cast,
20)
22from . import c_ast
23from .c_lexer import CLexer, Token
24from .ast_transforms import fix_switch_cases, fix_atomic_specifiers
27@dataclass
28class Coord:
29 """Coordinates of a syntactic element. Consists of:
30 - File name
31 - Line number
32 - Column number
33 """
35 file: str
36 line: int
37 column: Optional[int] = None
39 def __str__(self) -> str:
40 text = f"{self.file}:{self.line}"
41 if self.column is not None:
42 text += f":{self.column}"
43 return text
46class ParseError(Exception):
47 pass
50class CParser:
51 """Recursive-descent C parser.
53 Usage:
54 parser = CParser()
55 ast = parser.parse(text, filename)
57 The `lexer` parameter lets you inject a lexer class (defaults to CLexer).
58 The parameters after `lexer` are accepted for backward compatibility with
59 the old PLY-based parser and are otherwise unused.
60 """
62 def __init__(
63 self,
64 lex_optimize: bool = True,
65 lexer: type[CLexer] = CLexer,
66 lextab: str = "pycparser.lextab",
67 yacc_optimize: bool = True,
68 yacctab: str = "pycparser.yacctab",
69 yacc_debug: bool = False,
70 taboutputdir: str = "",
71 ) -> None:
72 self.clex: CLexer = lexer(
73 error_func=self._lex_error_func,
74 on_lbrace_func=self._lex_on_lbrace_func,
75 on_rbrace_func=self._lex_on_rbrace_func,
76 type_lookup_func=self._lex_type_lookup_func,
77 )
79 # Stack of scopes for keeping track of symbols. _scope_stack[-1] is
80 # the current (topmost) scope. Each scope is a dictionary that
81 # specifies whether a name is a type. If _scope_stack[n][name] is
82 # True, 'name' is currently a type in the scope. If it's False,
83 # 'name' is defined in the scope but not as a type (for instance, if we
84 # saw: int name;)
85 # If 'name' is not a key in _scope_stack[n] then 'name' was not defined
86 # in this scope at all.
87 self._scope_stack: List[Dict[str, bool]] = [dict()]
88 self._tokens: _TokenStream = _TokenStream(self.clex)
90 def parse(
91 self, text: str, filename: str = "", debug: bool = False
92 ) -> c_ast.FileAST:
93 """Parses C code and returns an AST.
95 text:
96 A string containing the C source code
98 filename:
99 Name of the file being parsed (for meaningful error messages)
101 debug:
102 Deprecated debug flag (unused); for backwards compatibility.
103 """
104 self._scope_stack = [dict()]
105 self.clex.input(text, filename)
106 self._tokens = _TokenStream(self.clex)
108 ast = self._parse_translation_unit_or_empty()
109 tok = self._peek()
110 if tok is not None:
111 self._parse_error(f"before: {tok.value}", self._tok_coord(tok))
112 return ast
114 # ------------------------------------------------------------------
115 # Scope and declaration helpers
116 # ------------------------------------------------------------------
117 def _coord(self, lineno: int, column: Optional[int] = None) -> Coord:
118 return Coord(file=self.clex.filename, line=lineno, column=column)
120 def _parse_error(self, msg: str, coord: Coord | str | None) -> NoReturn:
121 raise ParseError(f"{coord}: {msg}")
123 def _push_scope(self) -> None:
124 self._scope_stack.append(dict())
126 def _pop_scope(self) -> None:
127 assert len(self._scope_stack) > 1
128 self._scope_stack.pop()
130 def _add_typedef_name(self, name: str, coord: Optional[Coord]) -> None:
131 """Add a new typedef name (ie a TYPEID) to the current scope"""
132 if not self._scope_stack[-1].get(name, True):
133 self._parse_error(
134 f"Typedef {name!r} previously declared as non-typedef in this scope",
135 coord,
136 )
137 self._scope_stack[-1][name] = True
139 def _add_identifier(self, name: str, coord: Optional[Coord]) -> None:
140 """Add a new object, function, or enum member name (ie an ID) to the
141 current scope
142 """
143 if self._scope_stack[-1].get(name, False):
144 self._parse_error(
145 f"Non-typedef {name!r} previously declared as typedef in this scope",
146 coord,
147 )
148 self._scope_stack[-1][name] = False
150 def _is_type_in_scope(self, name: str) -> bool:
151 """Is *name* a typedef-name in the current scope?"""
152 for scope in reversed(self._scope_stack):
153 # If name is an identifier in this scope it shadows typedefs in
154 # higher scopes.
155 if name in scope:
156 return scope[name]
157 return False
159 def _lex_error_func(self, msg: str, line: int, column: int) -> None:
160 self._parse_error(msg, self._coord(line, column))
162 def _lex_on_lbrace_func(self) -> None:
163 self._push_scope()
165 def _lex_on_rbrace_func(self) -> None:
166 self._pop_scope()
168 def _lex_type_lookup_func(self, name: str) -> bool:
169 """Looks up types that were previously defined with typedef.
171 Passed to the lexer for recognizing identifiers that are types.
172 """
173 return self._is_type_in_scope(name)
175 # To understand what's going on here, read sections A.8.5 and
176 # A.8.6 of K&R2 very carefully.
177 #
178 # A C type consists of a basic type declaration, with a list
179 # of modifiers. For example:
180 #
181 # int *c[5];
182 #
183 # The basic declaration here is 'int c', and the pointer and
184 # the array are the modifiers.
185 #
186 # Basic declarations are represented by TypeDecl (from module c_ast) and the
187 # modifiers are FuncDecl, PtrDecl and ArrayDecl.
188 #
189 # The standard states that whenever a new modifier is parsed, it should be
190 # added to the end of the list of modifiers. For example:
191 #
192 # K&R2 A.8.6.2: Array Declarators
193 #
194 # In a declaration T D where D has the form
195 # D1 [constant-expression-opt]
196 # and the type of the identifier in the declaration T D1 is
197 # "type-modifier T", the type of the
198 # identifier of D is "type-modifier array of T"
199 #
200 # This is what this method does. The declarator it receives
201 # can be a list of declarators ending with TypeDecl. It
202 # tacks the modifier to the end of this list, just before
203 # the TypeDecl.
204 #
205 # Additionally, the modifier may be a list itself. This is
206 # useful for pointers, that can come as a chain from the rule
207 # p_pointer. In this case, the whole modifier list is spliced
208 # into the new location.
209 def _type_modify_decl(self, decl: Any, modifier: Any) -> c_ast.Node:
210 """Tacks a type modifier on a declarator, and returns
211 the modified declarator.
213 Note: the declarator and modifier may be modified
214 """
215 modifier_head = modifier
216 modifier_tail = modifier
218 # The modifier may be a nested list. Reach its tail.
219 while modifier_tail.type:
220 modifier_tail = modifier_tail.type
222 # If the decl is a basic type, just tack the modifier onto it.
223 if isinstance(decl, c_ast.TypeDecl):
224 modifier_tail.type = decl
225 return modifier
226 else:
227 # Otherwise, the decl is a list of modifiers. Reach
228 # its tail and splice the modifier onto the tail,
229 # pointing to the underlying basic type.
230 decl_tail = decl
231 while not isinstance(decl_tail.type, c_ast.TypeDecl):
232 decl_tail = decl_tail.type
234 modifier_tail.type = decl_tail.type
235 decl_tail.type = modifier_head
236 return decl
238 # Due to the order in which declarators are constructed,
239 # they have to be fixed in order to look like a normal AST.
240 #
241 # When a declaration arrives from syntax construction, it has
242 # these problems:
243 # * The innermost TypeDecl has no type (because the basic
244 # type is only known at the uppermost declaration level)
245 # * The declaration has no variable name, since that is saved
246 # in the innermost TypeDecl
247 # * The typename of the declaration is a list of type
248 # specifiers, and not a node. Here, basic identifier types
249 # should be separated from more complex types like enums
250 # and structs.
251 #
252 # This method fixes these problems.
253 def _fix_decl_name_type(
254 self,
255 decl: c_ast.Decl | c_ast.Typedef | c_ast.Typename,
256 typename: List[Any],
257 ) -> c_ast.Decl | c_ast.Typedef | c_ast.Typename:
258 """Fixes a declaration. Modifies decl."""
259 # Reach the underlying basic type
260 typ = decl
261 while not isinstance(typ, c_ast.TypeDecl):
262 typ = typ.type
264 decl.name = typ.declname
265 typ.quals = decl.quals[:]
267 # The typename is a list of types. If any type in this
268 # list isn't an IdentifierType, it must be the only
269 # type in the list (it's illegal to declare "int enum ..")
270 # If all the types are basic, they're collected in the
271 # IdentifierType holder.
272 for tn in typename:
273 if not isinstance(tn, c_ast.IdentifierType):
274 if len(typename) > 1:
275 self._parse_error("Invalid multiple types specified", tn.coord)
276 else:
277 typ.type = tn
278 return decl
280 if not typename:
281 # Functions default to returning int
282 if not isinstance(decl.type, c_ast.FuncDecl):
283 self._parse_error("Missing type in declaration", decl.coord)
284 typ.type = c_ast.IdentifierType(["int"], coord=decl.coord)
285 else:
286 # At this point, we know that typename is a list of IdentifierType
287 # nodes. Concatenate all the names into a single list.
288 typ.type = c_ast.IdentifierType(
289 [name for id in typename for name in id.names], coord=typename[0].coord
290 )
291 return decl
293 def _add_declaration_specifier(
294 self,
295 declspec: Optional["_DeclSpec"],
296 newspec: Any,
297 kind: "_DeclSpecKind",
298 append: bool = False,
299 ) -> "_DeclSpec":
300 """See _DeclSpec for the specifier dictionary layout."""
301 if declspec is None:
302 spec: _DeclSpec = dict(
303 qual=[], storage=[], type=[], function=[], alignment=[]
304 )
305 else:
306 spec = declspec
308 if append:
309 spec[kind].append(newspec)
310 else:
311 spec[kind].insert(0, newspec)
313 return spec
315 def _build_declarations(
316 self,
317 spec: "_DeclSpec",
318 decls: List["_DeclInfo"],
319 typedef_namespace: bool = False,
320 ) -> List[c_ast.Node]:
321 """Builds a list of declarations all sharing the given specifiers.
322 If typedef_namespace is true, each declared name is added
323 to the "typedef namespace", which also includes objects,
324 functions, and enum constants.
325 """
326 is_typedef = "typedef" in spec["storage"]
327 declarations = []
329 # Bit-fields are allowed to be unnamed.
330 if decls[0].get("bitsize") is None:
331 # When redeclaring typedef names as identifiers in inner scopes, a
332 # problem can occur where the identifier gets grouped into
333 # spec['type'], leaving decl as None. This can only occur for the
334 # first declarator.
335 if decls[0]["decl"] is None:
336 if (
337 len(spec["type"]) < 2
338 or len(spec["type"][-1].names) != 1
339 or not self._is_type_in_scope(spec["type"][-1].names[0])
340 ):
341 coord = "?"
342 for t in spec["type"]:
343 if hasattr(t, "coord"):
344 coord = t.coord
345 break
346 self._parse_error("Invalid declaration", coord)
348 # Make this look as if it came from "direct_declarator:ID"
349 decls[0]["decl"] = c_ast.TypeDecl(
350 declname=spec["type"][-1].names[0],
351 type=None,
352 quals=None,
353 align=spec["alignment"],
354 coord=spec["type"][-1].coord,
355 )
356 # Remove the "new" type's name from the end of spec['type']
357 del spec["type"][-1]
358 # A similar problem can occur where the declaration ends up
359 # looking like an abstract declarator. Give it a name if this is
360 # the case.
361 elif not isinstance(
362 decls[0]["decl"],
363 (c_ast.Enum, c_ast.Struct, c_ast.Union, c_ast.IdentifierType),
364 ):
365 decls_0_tail = cast(Any, decls[0]["decl"])
366 while not isinstance(decls_0_tail, c_ast.TypeDecl):
367 decls_0_tail = decls_0_tail.type
368 if decls_0_tail.declname is None:
369 decls_0_tail.declname = spec["type"][-1].names[0]
370 del spec["type"][-1]
372 for decl in decls:
373 assert decl["decl"] is not None
374 if is_typedef:
375 declaration = c_ast.Typedef(
376 name=None,
377 quals=spec["qual"],
378 storage=spec["storage"],
379 type=decl["decl"],
380 coord=decl["decl"].coord,
381 )
382 else:
383 declaration = c_ast.Decl(
384 name=None,
385 quals=spec["qual"],
386 align=spec["alignment"],
387 storage=spec["storage"],
388 funcspec=spec["function"],
389 type=decl["decl"],
390 init=decl.get("init"),
391 bitsize=decl.get("bitsize"),
392 coord=decl["decl"].coord,
393 )
395 if isinstance(
396 declaration.type,
397 (c_ast.Enum, c_ast.Struct, c_ast.Union, c_ast.IdentifierType),
398 ):
399 fixed_decl = declaration
400 else:
401 fixed_decl = self._fix_decl_name_type(declaration, spec["type"])
403 # Add the type name defined by typedef to a
404 # symbol table (for usage in the lexer)
405 if typedef_namespace:
406 if is_typedef:
407 self._add_typedef_name(fixed_decl.name, fixed_decl.coord)
408 else:
409 self._add_identifier(fixed_decl.name, fixed_decl.coord)
411 fixed_decl = fix_atomic_specifiers(
412 cast(c_ast.Decl | c_ast.Typedef, fixed_decl)
413 )
414 declarations.append(fixed_decl)
416 return declarations
418 def _build_function_definition(
419 self,
420 spec: "_DeclSpec",
421 decl: c_ast.Node,
422 param_decls: Optional[List[c_ast.Node]],
423 body: c_ast.Node,
424 ) -> c_ast.Node:
425 """Builds a function definition."""
426 if "typedef" in spec["storage"]:
427 self._parse_error("Invalid typedef", decl.coord)
429 declaration = self._build_declarations(
430 spec=spec,
431 decls=[dict(decl=decl, init=None, bitsize=None)],
432 typedef_namespace=True,
433 )[0]
435 return c_ast.FuncDef(
436 decl=declaration, param_decls=param_decls, body=body, coord=decl.coord
437 )
439 def _select_struct_union_class(self, token: str) -> type:
440 """Given a token (either STRUCT or UNION), selects the
441 appropriate AST class.
442 """
443 if token == "struct":
444 return c_ast.Struct
445 else:
446 return c_ast.Union
448 # ------------------------------------------------------------------
449 # Token helpers
450 # ------------------------------------------------------------------
451 def _peek(self, k: int = 1) -> Optional[Token]:
452 """Return the k-th next token without consuming it (1-based)."""
453 return self._tokens.peek(k)
455 def _peek_type(self, k: int = 1) -> Optional[str]:
456 """Return the type of the k-th next token, or None if absent (1-based)."""
457 tok = self._peek(k)
458 return tok.type if tok is not None else None
460 def _advance(self) -> Token:
461 tok = self._tokens.next()
462 if tok is None:
463 self._parse_error("At end of input", self.clex.filename)
464 else:
465 return tok
467 def _accept(self, token_type: str) -> Optional[Token]:
468 """Conditionally consume next token, only if it's of token_type.
470 If it is of the expected type, consume and return it.
471 Otherwise, leaves the token intact and returns None.
472 """
473 tok = self._peek()
474 if tok is not None and tok.type == token_type:
475 return self._advance()
476 return None
478 def _expect(self, token_type: str) -> Token:
479 tok = self._advance()
480 if tok.type != token_type:
481 self._parse_error(f"before: {tok.value}", self._tok_coord(tok))
482 return tok
484 def _mark(self) -> int:
485 return self._tokens.mark()
487 def _reset(self, mark: int) -> None:
488 self._tokens.reset(mark)
490 def _tok_coord(self, tok: Token) -> Coord:
491 return self._coord(tok.lineno, tok.column)
493 def _starts_declaration(self, tok: Optional[Token] = None) -> bool:
494 tok = tok or self._peek()
495 if tok is None:
496 return False
497 return tok.type in _DECL_START
499 def _starts_expression(self, tok: Optional[Token] = None) -> bool:
500 tok = tok or self._peek()
501 if tok is None:
502 return False
503 return tok.type in _STARTS_EXPRESSION
505 def _starts_statement(self) -> bool:
506 tok_type = self._peek_type()
507 if tok_type is None:
508 return False
509 if tok_type in _STARTS_STATEMENT:
510 return True
511 return self._starts_expression()
513 def _starts_declarator(self, id_only: bool = False) -> bool:
514 tok_type = self._peek_type()
515 if tok_type is None:
516 return False
517 if tok_type in {"TIMES", "LPAREN"}:
518 return True
519 if id_only:
520 return tok_type == "ID"
521 return tok_type in {"ID", "TYPEID"}
523 def _peek_declarator_name_info(self) -> Tuple[Optional[str], bool]:
524 mark = self._mark()
525 tok_type, saw_paren = self._scan_declarator_name_info()
526 self._reset(mark)
527 return tok_type, saw_paren
529 def _parse_any_declarator(
530 self, allow_abstract: bool = False, typeid_paren_as_abstract: bool = False
531 ) -> Tuple[Optional[c_ast.Node], bool]:
532 # C declarators are ambiguous without lookahead. For example:
533 # int foo(int (aa)); -> aa is a name (ID)
534 # typedef char TT;
535 # int bar(int (TT)); -> TT is a type (TYPEID) in parens
536 name_type, saw_paren = self._peek_declarator_name_info()
537 if name_type is None or (
538 typeid_paren_as_abstract and name_type == "TYPEID" and saw_paren
539 ):
540 if not allow_abstract:
541 tok = self._peek()
542 coord = self._tok_coord(tok) if tok is not None else self.clex.filename
543 self._parse_error("Invalid declarator", coord)
544 decl = self._parse_abstract_declarator_opt()
545 return decl, False
547 if name_type == "TYPEID":
548 if typeid_paren_as_abstract:
549 decl = self._parse_typeid_noparen_declarator()
550 else:
551 decl = self._parse_typeid_declarator()
552 else:
553 decl = self._parse_id_declarator()
554 return decl, True
556 def _scan_declarator_name_info(self) -> Tuple[Optional[str], bool]:
557 saw_paren = False
558 while self._accept("TIMES"):
559 while self._peek_type() in _TYPE_QUALIFIER:
560 self._advance()
562 tok = self._peek()
563 if tok is None:
564 return None, saw_paren
565 if tok.type in {"ID", "TYPEID"}:
566 self._advance()
567 return tok.type, saw_paren
568 if tok.type == "LPAREN":
569 saw_paren = True
570 self._advance()
571 tok_type, nested_paren = self._scan_declarator_name_info()
572 if nested_paren:
573 saw_paren = True
574 depth = 1
575 while True:
576 tok = self._peek()
577 if tok is None:
578 return None, saw_paren
579 if tok.type == "LPAREN":
580 depth += 1
581 elif tok.type == "RPAREN":
582 depth -= 1
583 self._advance()
584 if depth == 0:
585 break
586 continue
587 self._advance()
588 return tok_type, saw_paren
589 return None, saw_paren
591 def _starts_direct_abstract_declarator(self) -> bool:
592 return self._peek_type() in {"LPAREN", "LBRACKET"}
594 def _is_assignment_op(self) -> bool:
595 tok = self._peek()
596 return tok is not None and tok.type in _ASSIGNMENT_OPS
598 def _try_parse_paren_type_name(
599 self,
600 ) -> Optional[Tuple[c_ast.Typename, int, Token]]:
601 """Parse and return a parenthesized type name if present.
603 Returns (typ, mark, lparen_tok) when the next tokens look like
604 '(' type_name ')', where typ is the parsed type name, mark is the
605 token-stream position before parsing, and lparen_tok is the LPAREN
606 token. Returns None if no parenthesized type name is present.
607 """
608 mark = self._mark()
609 lparen_tok = self._accept("LPAREN")
610 if lparen_tok is None:
611 return None
612 if not self._starts_declaration():
613 self._reset(mark)
614 return None
615 typ = self._parse_type_name()
616 if self._accept("RPAREN") is None:
617 self._reset(mark)
618 return None
619 return typ, mark, lparen_tok
621 # ------------------------------------------------------------------
622 # Top-level
623 # ------------------------------------------------------------------
624 # BNF: translation_unit_or_empty : translation_unit | empty
625 def _parse_translation_unit_or_empty(self) -> c_ast.FileAST:
626 if self._peek() is None:
627 return c_ast.FileAST([])
628 return c_ast.FileAST(self._parse_translation_unit())
630 # BNF: translation_unit : external_declaration+
631 def _parse_translation_unit(self) -> List[c_ast.Node]:
632 ext = []
633 while self._peek() is not None:
634 ext.extend(self._parse_external_declaration())
635 return ext
637 # BNF: external_declaration : function_definition
638 # | declaration
639 # | pp_directive
640 # | pppragma_directive
641 # | static_assert
642 # | ';'
643 def _parse_external_declaration(self) -> List[c_ast.Node]:
644 tok = self._peek()
645 if tok is None:
646 return []
647 if tok.type == "PPHASH":
648 self._parse_pp_directive()
649 return []
650 if tok.type in {"PPPRAGMA", "_PRAGMA"}:
651 return [self._parse_pppragma_directive()]
652 if self._accept("SEMI"):
653 return []
654 if tok.type == "_STATIC_ASSERT":
655 return self._parse_static_assert()
657 if not self._starts_declaration(tok):
658 # Special handling for old-style function definitions that have an
659 # implicit return type, e.g.
660 #
661 # foo() {
662 # return 5;
663 # }
664 #
665 # These get an implicit 'int' return type.
666 decl = self._parse_id_declarator()
667 param_decls = None
668 if self._peek_type() != "LBRACE":
669 self._parse_error("Invalid function definition", decl.coord)
670 spec: _DeclSpec = dict(
671 qual=[],
672 alignment=[],
673 storage=[],
674 type=[c_ast.IdentifierType(["int"], coord=decl.coord)],
675 function=[],
676 )
677 func = self._build_function_definition(
678 spec=spec,
679 decl=decl,
680 param_decls=param_decls,
681 body=self._parse_compound_statement(),
682 )
683 return [func]
685 # From here on, parsing a standard declatation/definition.
686 spec, saw_type, spec_coord = self._parse_declaration_specifiers(
687 allow_no_type=True
688 )
690 name_type, _ = self._peek_declarator_name_info()
691 if name_type != "ID":
692 decls = self._parse_decl_body_with_spec(spec, saw_type)
693 self._expect("SEMI")
694 return decls
696 decl = self._parse_id_declarator()
698 if self._peek_type() == "LBRACE" or self._starts_declaration():
699 param_decls = None
700 if self._starts_declaration():
701 param_decls = self._parse_declaration_list()
702 if self._peek_type() != "LBRACE":
703 self._parse_error("Invalid function definition", decl.coord)
704 if not spec["type"]:
705 spec["type"] = [c_ast.IdentifierType(["int"], coord=spec_coord)]
706 func = self._build_function_definition(
707 spec=spec,
708 decl=decl,
709 param_decls=param_decls,
710 body=self._parse_compound_statement(),
711 )
712 return [func]
714 decl_dict: "_DeclInfo" = dict(decl=decl, init=None, bitsize=None)
715 if self._accept("EQUALS"):
716 decl_dict["init"] = self._parse_initializer()
717 decls = self._parse_init_declarator_list(first=decl_dict)
718 decls = self._build_declarations(spec=spec, decls=decls, typedef_namespace=True)
719 self._expect("SEMI")
720 return decls
722 # ------------------------------------------------------------------
723 # Declarations
724 #
725 # Declarations always come as lists (because they can be several in one
726 # line). When returning parsed declarations, a list is always returned -
727 # even if it contains a single element.
728 # ------------------------------------------------------------------
729 def _parse_declaration(self) -> List[c_ast.Node]:
730 decls = self._parse_decl_body()
731 self._expect("SEMI")
732 return decls
734 # BNF: decl_body : declaration_specifiers decl_body_with_spec
735 def _parse_decl_body(self) -> List[c_ast.Node]:
736 spec, saw_type, _ = self._parse_declaration_specifiers(allow_no_type=True)
737 return self._parse_decl_body_with_spec(spec, saw_type)
739 # BNF: decl_body_with_spec : init_declarator_list
740 # | struct_or_union_or_enum_only
741 def _parse_decl_body_with_spec(
742 self, spec: "_DeclSpec", saw_type: bool
743 ) -> List[c_ast.Node]:
744 # saw_type is True if the specifiers included an actual type (as
745 # opposed to only storage/function/qualifiers).
746 decls = None
747 if saw_type:
748 if self._starts_declarator():
749 decls = self._parse_init_declarator_list()
750 else:
751 if self._starts_declarator(id_only=True):
752 decls = self._parse_init_declarator_list(id_only=True)
754 if decls is None:
755 ty = spec["type"]
756 s_u_or_e = (c_ast.Struct, c_ast.Union, c_ast.Enum)
757 if len(ty) == 1 and isinstance(ty[0], s_u_or_e):
758 decls = [
759 c_ast.Decl(
760 name=None,
761 quals=spec["qual"],
762 align=spec["alignment"],
763 storage=spec["storage"],
764 funcspec=spec["function"],
765 type=ty[0],
766 init=None,
767 bitsize=None,
768 coord=ty[0].coord,
769 )
770 ]
771 else:
772 decls = self._build_declarations(
773 spec=spec,
774 decls=[dict(decl=None, init=None, bitsize=None)],
775 typedef_namespace=True,
776 )
777 else:
778 decls = self._build_declarations(
779 spec=spec, decls=decls, typedef_namespace=True
780 )
782 return decls
784 # BNF: declaration_list : declaration+
785 def _parse_declaration_list(self) -> List[c_ast.Node]:
786 decls = []
787 while self._starts_declaration():
788 decls.extend(self._parse_declaration())
789 return decls
791 # BNF: declaration_specifiers : (storage_class_specifier
792 # | type_specifier
793 # | type_qualifier
794 # | function_specifier
795 # | alignment_specifier)+
796 def _parse_declaration_specifiers(
797 self, allow_no_type: bool = False
798 ) -> Tuple["_DeclSpec", bool, Optional[Coord]]:
799 """Parse declaration-specifier sequence.
801 allow_no_type:
802 If True, allow a missing type specifier without error.
804 Returns:
805 (spec, saw_type, first_coord) where spec is a dict with
806 qual/storage/type/function/alignment entries, saw_type is True
807 if a type specifier was consumed, and first_coord is the coord
808 of the first specifier token (used for diagnostics).
809 """
810 spec = None
811 saw_type = False
812 first_coord = None
814 while True:
815 tok = self._peek()
816 if tok is None:
817 break
819 if tok.type == "_ALIGNAS":
820 if first_coord is None:
821 first_coord = self._tok_coord(tok)
822 spec = self._add_declaration_specifier(
823 spec, self._parse_alignment_specifier(), "alignment", append=True
824 )
825 continue
827 if tok.type == "_ATOMIC" and self._peek_type(2) == "LPAREN":
828 if first_coord is None:
829 first_coord = self._tok_coord(tok)
830 spec = self._add_declaration_specifier(
831 spec, self._parse_atomic_specifier(), "type", append=True
832 )
833 saw_type = True
834 continue
836 if tok.type in _TYPE_QUALIFIER:
837 if first_coord is None:
838 first_coord = self._tok_coord(tok)
839 spec = self._add_declaration_specifier(
840 spec, self._advance().value, "qual", append=True
841 )
842 continue
844 if tok.type in _STORAGE_CLASS:
845 if first_coord is None:
846 first_coord = self._tok_coord(tok)
847 spec = self._add_declaration_specifier(
848 spec, self._advance().value, "storage", append=True
849 )
850 continue
852 if tok.type in _FUNCTION_SPEC:
853 if first_coord is None:
854 first_coord = self._tok_coord(tok)
855 spec = self._add_declaration_specifier(
856 spec, self._advance().value, "function", append=True
857 )
858 continue
860 if tok.type in _TYPE_SPEC_SIMPLE:
861 if first_coord is None:
862 first_coord = self._tok_coord(tok)
863 tok = self._advance()
864 spec = self._add_declaration_specifier(
865 spec,
866 c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)),
867 "type",
868 append=True,
869 )
870 saw_type = True
871 continue
873 if tok.type == "TYPEID":
874 if saw_type:
875 break
876 if first_coord is None:
877 first_coord = self._tok_coord(tok)
878 tok = self._advance()
879 spec = self._add_declaration_specifier(
880 spec,
881 c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)),
882 "type",
883 append=True,
884 )
885 saw_type = True
886 continue
888 if tok.type in {"STRUCT", "UNION"}:
889 if first_coord is None:
890 first_coord = self._tok_coord(tok)
891 spec = self._add_declaration_specifier(
892 spec, self._parse_struct_or_union_specifier(), "type", append=True
893 )
894 saw_type = True
895 continue
897 if tok.type == "ENUM":
898 if first_coord is None:
899 first_coord = self._tok_coord(tok)
900 spec = self._add_declaration_specifier(
901 spec, self._parse_enum_specifier(), "type", append=True
902 )
903 saw_type = True
904 continue
906 break
908 if spec is None:
909 self._parse_error("Invalid declaration", self.clex.filename)
911 if not saw_type and not allow_no_type:
912 self._parse_error("Missing type in declaration", first_coord)
914 return spec, saw_type, first_coord
916 # BNF: specifier_qualifier_list : (type_specifier
917 # | type_qualifier
918 # | alignment_specifier)+
919 def _parse_specifier_qualifier_list(self) -> "_DeclSpec":
920 spec = None
921 saw_type = False
922 saw_alignment = False
923 first_coord = None
925 while True:
926 tok = self._peek()
927 if tok is None:
928 break
930 if tok.type == "_ALIGNAS":
931 if first_coord is None:
932 first_coord = self._tok_coord(tok)
933 spec = self._add_declaration_specifier(
934 spec, self._parse_alignment_specifier(), "alignment", append=True
935 )
936 saw_alignment = True
937 continue
939 if tok.type == "_ATOMIC" and self._peek_type(2) == "LPAREN":
940 if first_coord is None:
941 first_coord = self._tok_coord(tok)
942 spec = self._add_declaration_specifier(
943 spec, self._parse_atomic_specifier(), "type", append=True
944 )
945 saw_type = True
946 continue
948 if tok.type in _TYPE_QUALIFIER:
949 if first_coord is None:
950 first_coord = self._tok_coord(tok)
951 spec = self._add_declaration_specifier(
952 spec, self._advance().value, "qual", append=True
953 )
954 continue
956 if tok.type in _TYPE_SPEC_SIMPLE:
957 if first_coord is None:
958 first_coord = self._tok_coord(tok)
959 tok = self._advance()
960 spec = self._add_declaration_specifier(
961 spec,
962 c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)),
963 "type",
964 append=True,
965 )
966 saw_type = True
967 continue
969 if tok.type == "TYPEID":
970 if saw_type:
971 break
972 if first_coord is None:
973 first_coord = self._tok_coord(tok)
974 tok = self._advance()
975 spec = self._add_declaration_specifier(
976 spec,
977 c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)),
978 "type",
979 append=True,
980 )
981 saw_type = True
982 continue
984 if tok.type in {"STRUCT", "UNION"}:
985 if first_coord is None:
986 first_coord = self._tok_coord(tok)
987 spec = self._add_declaration_specifier(
988 spec, self._parse_struct_or_union_specifier(), "type", append=True
989 )
990 saw_type = True
991 continue
993 if tok.type == "ENUM":
994 if first_coord is None:
995 first_coord = self._tok_coord(tok)
996 spec = self._add_declaration_specifier(
997 spec, self._parse_enum_specifier(), "type", append=True
998 )
999 saw_type = True
1000 continue
1002 break
1004 if spec is None:
1005 self._parse_error("Invalid specifier list", self.clex.filename)
1007 if not saw_type and not saw_alignment:
1008 self._parse_error("Missing type in declaration", first_coord)
1010 if spec.get("storage") is None:
1011 spec["storage"] = []
1012 if spec.get("function") is None:
1013 spec["function"] = []
1015 return spec
1017 # BNF: type_qualifier_list : type_qualifier+
1018 def _parse_type_qualifier_list(self) -> List[str]:
1019 quals = []
1020 while self._peek_type() in _TYPE_QUALIFIER:
1021 quals.append(self._advance().value)
1022 return quals
1024 # BNF: alignment_specifier : _ALIGNAS '(' type_name | constant_expression ')'
1025 def _parse_alignment_specifier(self) -> c_ast.Node:
1026 tok = self._expect("_ALIGNAS")
1027 self._expect("LPAREN")
1029 if self._starts_declaration():
1030 typ = self._parse_type_name()
1031 self._expect("RPAREN")
1032 return c_ast.Alignas(typ, self._tok_coord(tok))
1034 expr = self._parse_constant_expression()
1035 self._expect("RPAREN")
1036 return c_ast.Alignas(expr, self._tok_coord(tok))
1038 # BNF: atomic_specifier : _ATOMIC '(' type_name ')'
1039 def _parse_atomic_specifier(self) -> c_ast.Node:
1040 self._expect("_ATOMIC")
1041 self._expect("LPAREN")
1042 typ = self._parse_type_name()
1043 self._expect("RPAREN")
1044 typ.quals.append("_Atomic")
1045 return typ
1047 # BNF: init_declarator_list : init_declarator (',' init_declarator)*
1048 def _parse_init_declarator_list(
1049 self, first: Optional["_DeclInfo"] = None, id_only: bool = False
1050 ) -> List["_DeclInfo"]:
1051 decls = (
1052 [first]
1053 if first is not None
1054 else [self._parse_init_declarator(id_only=id_only)]
1055 )
1057 while self._accept("COMMA"):
1058 decls.append(self._parse_init_declarator(id_only=id_only))
1059 return decls
1061 # BNF: init_declarator : declarator ('=' initializer)?
1062 def _parse_init_declarator(self, id_only: bool = False) -> "_DeclInfo":
1063 decl = self._parse_id_declarator() if id_only else self._parse_declarator()
1064 init = None
1065 if self._accept("EQUALS"):
1066 init = self._parse_initializer()
1067 return dict(decl=decl, init=init, bitsize=None)
1069 # ------------------------------------------------------------------
1070 # Structs/unions/enums
1071 # ------------------------------------------------------------------
1072 # BNF: struct_or_union_specifier : struct_or_union ID? '{' struct_declaration_list? '}'
1073 # | struct_or_union ID
1074 def _parse_struct_or_union_specifier(self) -> c_ast.Node:
1075 tok = self._advance()
1076 klass = self._select_struct_union_class(tok.value)
1078 if self._peek_type() in {"ID", "TYPEID"}:
1079 name_tok = self._advance()
1080 if self._peek_type() == "LBRACE":
1081 self._advance()
1082 if self._accept("RBRACE"):
1083 return klass(
1084 name=name_tok.value, decls=[], coord=self._tok_coord(name_tok)
1085 )
1086 decls = self._parse_struct_declaration_list()
1087 self._expect("RBRACE")
1088 return klass(
1089 name=name_tok.value, decls=decls, coord=self._tok_coord(name_tok)
1090 )
1092 return klass(
1093 name=name_tok.value, decls=None, coord=self._tok_coord(name_tok)
1094 )
1096 if self._peek_type() == "LBRACE":
1097 brace_tok = self._advance()
1098 if self._accept("RBRACE"):
1099 return klass(name=None, decls=[], coord=self._tok_coord(brace_tok))
1100 decls = self._parse_struct_declaration_list()
1101 self._expect("RBRACE")
1102 return klass(name=None, decls=decls, coord=self._tok_coord(brace_tok))
1104 self._parse_error("Invalid struct/union declaration", self._tok_coord(tok))
1106 # BNF: struct_declaration_list : struct_declaration+
1107 def _parse_struct_declaration_list(self) -> List[c_ast.Node]:
1108 decls = []
1109 while self._peek_type() not in {None, "RBRACE"}:
1110 items = self._parse_struct_declaration()
1111 if items is None:
1112 continue
1113 decls.extend(items)
1114 return decls
1116 # BNF: struct_declaration : specifier_qualifier_list struct_declarator_list? ';'
1117 # | static_assert
1118 # | pppragma_directive
1119 def _parse_struct_declaration(self) -> Optional[List[c_ast.Node]]:
1120 if self._peek_type() == "SEMI":
1121 self._advance()
1122 return None
1123 if self._peek_type() in {"PPPRAGMA", "_PRAGMA"}:
1124 return [self._parse_pppragma_directive()]
1126 spec = self._parse_specifier_qualifier_list()
1127 assert "typedef" not in spec.get("storage", [])
1129 decls = None
1130 if self._starts_declarator() or self._peek_type() == "COLON":
1131 decls = self._parse_struct_declarator_list()
1132 if decls is not None:
1133 self._expect("SEMI")
1134 return self._build_declarations(spec=spec, decls=decls)
1136 if len(spec["type"]) == 1:
1137 node = spec["type"][0]
1138 if isinstance(node, c_ast.Node):
1139 decl_type = node
1140 else:
1141 decl_type = c_ast.IdentifierType(node)
1142 self._expect("SEMI")
1143 return self._build_declarations(
1144 spec=spec, decls=[dict(decl=decl_type, init=None, bitsize=None)]
1145 )
1147 self._expect("SEMI")
1148 return self._build_declarations(
1149 spec=spec, decls=[dict(decl=None, init=None, bitsize=None)]
1150 )
1152 # BNF: struct_declarator_list : struct_declarator (',' struct_declarator)*
1153 def _parse_struct_declarator_list(self) -> List["_DeclInfo"]:
1154 decls = [self._parse_struct_declarator()]
1155 while self._accept("COMMA"):
1156 decls.append(self._parse_struct_declarator())
1157 return decls
1159 # BNF: struct_declarator : declarator? ':' constant_expression
1160 # | declarator (':' constant_expression)?
1161 def _parse_struct_declarator(self) -> "_DeclInfo":
1162 if self._accept("COLON"):
1163 bitsize = self._parse_constant_expression()
1164 return {
1165 "decl": c_ast.TypeDecl(None, None, None, None),
1166 "init": None,
1167 "bitsize": bitsize,
1168 }
1170 decl = self._parse_declarator()
1171 if self._accept("COLON"):
1172 bitsize = self._parse_constant_expression()
1173 return {"decl": decl, "init": None, "bitsize": bitsize}
1175 return {"decl": decl, "init": None, "bitsize": None}
1177 # BNF: enum_specifier : ENUM ID? '{' enumerator_list? '}'
1178 # | ENUM ID
1179 def _parse_enum_specifier(self) -> c_ast.Node:
1180 tok = self._expect("ENUM")
1181 if self._peek_type() in {"ID", "TYPEID"}:
1182 name_tok = self._advance()
1183 if self._peek_type() == "LBRACE":
1184 self._advance()
1185 enums = self._parse_enumerator_list()
1186 self._expect("RBRACE")
1187 return c_ast.Enum(name_tok.value, enums, self._tok_coord(tok))
1188 return c_ast.Enum(name_tok.value, None, self._tok_coord(tok))
1190 self._expect("LBRACE")
1191 enums = self._parse_enumerator_list()
1192 self._expect("RBRACE")
1193 return c_ast.Enum(None, enums, self._tok_coord(tok))
1195 # BNF: enumerator_list : enumerator (',' enumerator)* ','?
1196 def _parse_enumerator_list(self) -> c_ast.Node:
1197 enum = self._parse_enumerator()
1198 enum_list = c_ast.EnumeratorList([enum], enum.coord)
1199 while self._accept("COMMA"):
1200 if self._peek_type() == "RBRACE":
1201 break
1202 enum = self._parse_enumerator()
1203 enum_list.enumerators.append(enum)
1204 return enum_list
1206 # BNF: enumerator : ID ('=' constant_expression)?
1207 def _parse_enumerator(self) -> c_ast.Node:
1208 name_tok = self._expect("ID")
1209 if self._accept("EQUALS"):
1210 value = self._parse_constant_expression()
1211 else:
1212 value = None
1213 enum = c_ast.Enumerator(name_tok.value, value, self._tok_coord(name_tok))
1214 self._add_identifier(enum.name, enum.coord)
1215 return enum
1217 # ------------------------------------------------------------------
1218 # Declarators
1219 # ------------------------------------------------------------------
1220 # BNF: declarator : pointer? direct_declarator
1221 def _parse_declarator(self) -> c_ast.Node:
1222 decl, _ = self._parse_any_declarator(
1223 allow_abstract=False, typeid_paren_as_abstract=False
1224 )
1225 assert decl is not None
1226 return decl
1228 # BNF: id_declarator : declarator with ID name
1229 def _parse_id_declarator(self) -> c_ast.Node:
1230 return self._parse_declarator_kind(kind="id", allow_paren=True)
1232 # BNF: typeid_declarator : declarator with TYPEID name
1233 def _parse_typeid_declarator(self) -> c_ast.Node:
1234 return self._parse_declarator_kind(kind="typeid", allow_paren=True)
1236 # BNF: typeid_noparen_declarator : declarator without parenthesized name
1237 def _parse_typeid_noparen_declarator(self) -> c_ast.Node:
1238 return self._parse_declarator_kind(kind="typeid", allow_paren=False)
1240 # BNF: declarator_kind : pointer? direct_declarator(kind)
1241 def _parse_declarator_kind(self, kind: str, allow_paren: bool) -> c_ast.Node:
1242 ptr = None
1243 if self._peek_type() == "TIMES":
1244 ptr = self._parse_pointer()
1245 direct = self._parse_direct_declarator(kind, allow_paren=allow_paren)
1246 if ptr is not None:
1247 return self._type_modify_decl(direct, ptr)
1248 return direct
1250 # BNF: direct_declarator : ID | TYPEID | '(' declarator ')'
1251 # | direct_declarator '[' ... ']'
1252 # | direct_declarator '(' ... ')'
1253 def _parse_direct_declarator(
1254 self, kind: str, allow_paren: bool = True
1255 ) -> c_ast.Node:
1256 if allow_paren and self._accept("LPAREN"):
1257 decl = self._parse_declarator_kind(kind, allow_paren=True)
1258 self._expect("RPAREN")
1259 else:
1260 if kind == "id":
1261 name_tok = self._expect("ID")
1262 else:
1263 name_tok = self._expect("TYPEID")
1264 decl = c_ast.TypeDecl(
1265 declname=name_tok.value,
1266 type=None,
1267 quals=None,
1268 align=None,
1269 coord=self._tok_coord(name_tok),
1270 )
1272 return self._parse_decl_suffixes(decl)
1274 def _parse_decl_suffixes(self, decl: c_ast.Node) -> c_ast.Node:
1275 """Parse a chain of array/function suffixes and attach them to decl."""
1276 while True:
1277 if self._peek_type() == "LBRACKET":
1278 decl = self._type_modify_decl(decl, self._parse_array_decl(decl))
1279 continue
1280 if self._peek_type() == "LPAREN":
1281 func = self._parse_function_decl(decl)
1282 decl = self._type_modify_decl(decl, func)
1283 continue
1284 break
1285 return decl
1287 # BNF: array_decl : '[' array_specifiers? assignment_expression? ']'
1288 def _parse_array_decl(self, base_decl: c_ast.Node) -> c_ast.Node:
1289 return self._parse_array_decl_common(base_type=None, coord=base_decl.coord)
1291 def _parse_array_decl_common(
1292 self, base_type: Optional[c_ast.Node], coord: Optional[Coord] = None
1293 ) -> c_ast.Node:
1294 """Parse an array declarator suffix and return an ArrayDecl node.
1296 base_type:
1297 Base declarator node to attach (None for direct-declarator parsing,
1298 TypeDecl for abstract declarators).
1300 coord:
1301 Coordinate to use for the ArrayDecl. If None, uses the '[' token.
1302 """
1303 lbrack_tok = self._expect("LBRACKET")
1304 if coord is None:
1305 coord = self._tok_coord(lbrack_tok)
1307 def make_array_decl(dim, dim_quals):
1308 return c_ast.ArrayDecl(
1309 type=base_type, dim=dim, dim_quals=dim_quals, coord=coord
1310 )
1312 if self._accept("STATIC"):
1313 dim_quals = ["static"] + (self._parse_type_qualifier_list() or [])
1314 dim = self._parse_assignment_expression()
1315 self._expect("RBRACKET")
1316 return make_array_decl(dim, dim_quals)
1318 if self._peek_type() in _TYPE_QUALIFIER:
1319 dim_quals = self._parse_type_qualifier_list() or []
1320 if self._accept("STATIC"):
1321 dim_quals = dim_quals + ["static"]
1322 dim = self._parse_assignment_expression()
1323 self._expect("RBRACKET")
1324 return make_array_decl(dim, dim_quals)
1325 times_tok = self._accept("TIMES")
1326 if times_tok:
1327 self._expect("RBRACKET")
1328 dim = c_ast.ID(times_tok.value, self._tok_coord(times_tok))
1329 return make_array_decl(dim, dim_quals)
1330 dim = None
1331 if self._starts_expression():
1332 dim = self._parse_assignment_expression()
1333 self._expect("RBRACKET")
1334 return make_array_decl(dim, dim_quals)
1336 times_tok = self._accept("TIMES")
1337 if times_tok:
1338 self._expect("RBRACKET")
1339 dim = c_ast.ID(times_tok.value, self._tok_coord(times_tok))
1340 return make_array_decl(dim, [])
1342 dim = None
1343 if self._starts_expression():
1344 dim = self._parse_assignment_expression()
1345 self._expect("RBRACKET")
1346 return make_array_decl(dim, [])
1348 # BNF: function_decl : '(' parameter_type_list_opt | identifier_list_opt ')'
1349 def _parse_function_decl(self, base_decl: c_ast.Node) -> c_ast.Node:
1350 self._expect("LPAREN")
1351 if self._accept("RPAREN"):
1352 args = None
1353 else:
1354 args = (
1355 self._parse_parameter_type_list()
1356 if self._starts_declaration()
1357 else self._parse_identifier_list_opt()
1358 )
1359 self._expect("RPAREN")
1361 func = c_ast.FuncDecl(args=args, type=None, coord=base_decl.coord)
1363 if self._peek_type() == "LBRACE":
1364 if func.args is not None:
1365 for param in func.args.params:
1366 if isinstance(param, c_ast.EllipsisParam):
1367 break
1368 name = getattr(param, "name", None)
1369 if name:
1370 self._add_identifier(name, param.coord)
1372 return func
1374 # BNF: pointer : '*' type_qualifier_list? pointer?
1375 def _parse_pointer(self) -> Optional[c_ast.Node]:
1376 stars = []
1377 times_tok = self._accept("TIMES")
1378 while times_tok:
1379 quals = self._parse_type_qualifier_list() or []
1380 stars.append((quals, self._tok_coord(times_tok)))
1381 times_tok = self._accept("TIMES")
1383 if not stars:
1384 return None
1386 ptr = None
1387 for quals, coord in stars:
1388 ptr = c_ast.PtrDecl(quals=quals, type=ptr, coord=coord)
1389 return ptr
1391 # BNF: parameter_type_list : parameter_list (',' ELLIPSIS)?
1392 def _parse_parameter_type_list(self) -> c_ast.ParamList:
1393 params = self._parse_parameter_list()
1394 if self._peek_type() == "COMMA" and self._peek_type(2) == "ELLIPSIS":
1395 self._advance()
1396 ell_tok = self._advance()
1397 params.params.append(c_ast.EllipsisParam(self._tok_coord(ell_tok)))
1398 return params
1400 # BNF: parameter_list : parameter_declaration (',' parameter_declaration)*
1401 def _parse_parameter_list(self) -> c_ast.ParamList:
1402 first = self._parse_parameter_declaration()
1403 params = c_ast.ParamList([first], first.coord)
1404 while self._peek_type() == "COMMA" and self._peek_type(2) != "ELLIPSIS":
1405 self._advance()
1406 params.params.append(self._parse_parameter_declaration())
1407 return params
1409 # BNF: parameter_declaration : declaration_specifiers declarator?
1410 # | declaration_specifiers abstract_declarator_opt
1411 def _parse_parameter_declaration(self) -> c_ast.Node:
1412 spec, _, spec_coord = self._parse_declaration_specifiers(allow_no_type=True)
1414 if not spec["type"]:
1415 spec["type"] = [c_ast.IdentifierType(["int"], coord=spec_coord)]
1417 if self._starts_declarator():
1418 decl, is_named = self._parse_any_declarator(
1419 allow_abstract=True, typeid_paren_as_abstract=True
1420 )
1421 if is_named:
1422 return self._build_declarations(
1423 spec=spec, decls=[dict(decl=decl, init=None, bitsize=None)]
1424 )[0]
1425 return self._build_parameter_declaration(spec, decl, spec_coord)
1427 decl = self._parse_abstract_declarator_opt()
1428 return self._build_parameter_declaration(spec, decl, spec_coord)
1430 def _build_parameter_declaration(
1431 self, spec: "_DeclSpec", decl: Optional[c_ast.Node], spec_coord: Optional[Coord]
1432 ) -> c_ast.Node:
1433 if (
1434 len(spec["type"]) > 1
1435 and len(spec["type"][-1].names) == 1
1436 and self._is_type_in_scope(spec["type"][-1].names[0])
1437 ):
1438 return self._build_declarations(
1439 spec=spec, decls=[dict(decl=decl, init=None, bitsize=None)]
1440 )[0]
1442 decl = c_ast.Typename(
1443 name="",
1444 quals=spec["qual"],
1445 align=None,
1446 type=decl or c_ast.TypeDecl(None, None, None, None),
1447 coord=spec_coord,
1448 )
1449 return self._fix_decl_name_type(decl, spec["type"])
1451 # BNF: identifier_list_opt : identifier_list | empty
1452 def _parse_identifier_list_opt(self) -> Optional[c_ast.Node]:
1453 if self._peek_type() == "RPAREN":
1454 return None
1455 return self._parse_identifier_list()
1457 # BNF: identifier_list : identifier (',' identifier)*
1458 def _parse_identifier_list(self) -> c_ast.Node:
1459 first = self._parse_identifier()
1460 params = c_ast.ParamList([first], first.coord)
1461 while self._accept("COMMA"):
1462 params.params.append(self._parse_identifier())
1463 return params
1465 # ------------------------------------------------------------------
1466 # Abstract declarators
1467 # ------------------------------------------------------------------
1468 # BNF: type_name : specifier_qualifier_list abstract_declarator_opt
1469 def _parse_type_name(self) -> c_ast.Typename:
1470 spec = self._parse_specifier_qualifier_list()
1471 decl = self._parse_abstract_declarator_opt()
1473 coord = None
1474 if decl is not None:
1475 coord = decl.coord
1476 elif spec["type"]:
1477 coord = spec["type"][0].coord
1479 typename = c_ast.Typename(
1480 name="",
1481 quals=spec["qual"][:],
1482 align=None,
1483 type=decl or c_ast.TypeDecl(None, None, None, None),
1484 coord=coord,
1485 )
1486 return cast(c_ast.Typename, self._fix_decl_name_type(typename, spec["type"]))
1488 # BNF: abstract_declarator_opt : pointer? direct_abstract_declarator?
1489 def _parse_abstract_declarator_opt(self) -> Optional[c_ast.Node]:
1490 if self._peek_type() == "TIMES":
1491 ptr = self._parse_pointer()
1492 if self._starts_direct_abstract_declarator():
1493 decl = self._parse_direct_abstract_declarator()
1494 else:
1495 decl = c_ast.TypeDecl(None, None, None, None)
1496 assert ptr is not None
1497 return self._type_modify_decl(decl, ptr)
1499 if self._starts_direct_abstract_declarator():
1500 return self._parse_direct_abstract_declarator()
1502 return None
1504 # BNF: direct_abstract_declarator : '(' parameter_type_list_opt ')'
1505 # | '(' abstract_declarator ')'
1506 # | '[' ... ']'
1507 def _parse_direct_abstract_declarator(self) -> c_ast.Node:
1508 lparen_tok = self._accept("LPAREN")
1509 if lparen_tok:
1510 if self._starts_declaration() or self._peek_type() == "RPAREN":
1511 params = self._parse_parameter_type_list_opt()
1512 self._expect("RPAREN")
1513 decl = c_ast.FuncDecl(
1514 args=params,
1515 type=c_ast.TypeDecl(None, None, None, None),
1516 coord=self._tok_coord(lparen_tok),
1517 )
1518 else:
1519 decl = self._parse_abstract_declarator_opt()
1520 self._expect("RPAREN")
1521 assert decl is not None
1522 elif self._peek_type() == "LBRACKET":
1523 decl = self._parse_abstract_array_base()
1524 else:
1525 self._parse_error("Invalid abstract declarator", self.clex.filename)
1527 return self._parse_decl_suffixes(decl)
1529 # BNF: parameter_type_list_opt : parameter_type_list | empty
1530 def _parse_parameter_type_list_opt(self) -> Optional[c_ast.ParamList]:
1531 if self._peek_type() == "RPAREN":
1532 return None
1533 return self._parse_parameter_type_list()
1535 # BNF: abstract_array_base : '[' array_specifiers? assignment_expression? ']'
1536 def _parse_abstract_array_base(self) -> c_ast.Node:
1537 return self._parse_array_decl_common(
1538 base_type=c_ast.TypeDecl(None, None, None, None), coord=None
1539 )
1541 # ------------------------------------------------------------------
1542 # Statements
1543 # ------------------------------------------------------------------
1544 # BNF: statement : labeled_statement | compound_statement
1545 # | selection_statement | iteration_statement
1546 # | jump_statement | expression_statement
1547 # | static_assert | pppragma_directive
1548 def _parse_statement(self) -> c_ast.Node | List[c_ast.Node]:
1549 tok_type = self._peek_type()
1550 match tok_type:
1551 case "CASE" | "DEFAULT":
1552 return self._parse_labeled_statement()
1553 case "ID" if self._peek_type(2) == "COLON":
1554 return self._parse_labeled_statement()
1555 case "LBRACE":
1556 return self._parse_compound_statement()
1557 case "IF" | "SWITCH":
1558 return self._parse_selection_statement()
1559 case "WHILE" | "DO" | "FOR":
1560 return self._parse_iteration_statement()
1561 case "GOTO" | "BREAK" | "CONTINUE" | "RETURN":
1562 return self._parse_jump_statement()
1563 case "PPPRAGMA" | "_PRAGMA":
1564 return self._parse_pppragma_directive()
1565 case "_STATIC_ASSERT":
1566 return self._parse_static_assert()
1567 case _:
1568 return self._parse_expression_statement()
1570 # BNF: pragmacomp_or_statement : pppragma_directive* statement
1571 def _parse_pragmacomp_or_statement(self) -> c_ast.Node | List[c_ast.Node]:
1572 if self._peek_type() in {"PPPRAGMA", "_PRAGMA"}:
1573 pragmas = self._parse_pppragma_directive_list()
1574 stmt = self._parse_statement()
1575 return c_ast.Compound(block_items=pragmas + [stmt], coord=pragmas[0].coord)
1576 return self._parse_statement()
1578 # BNF: block_item : declaration | statement
1579 def _parse_block_item(self) -> c_ast.Node | List[c_ast.Node]:
1580 if self._starts_declaration():
1581 return self._parse_declaration()
1582 return self._parse_statement()
1584 # BNF: block_item_list : block_item+
1585 def _parse_block_item_list(self) -> List[c_ast.Node]:
1586 items = []
1587 while self._peek_type() not in {"RBRACE", None}:
1588 item = self._parse_block_item()
1589 if isinstance(item, list):
1590 if item == [None]:
1591 continue
1592 items.extend(item)
1593 else:
1594 items.append(item)
1595 return items
1597 # BNF: compound_statement : '{' block_item_list? '}'
1598 def _parse_compound_statement(self) -> c_ast.Node:
1599 lbrace_tok = self._expect("LBRACE")
1600 if self._accept("RBRACE"):
1601 return c_ast.Compound(block_items=None, coord=self._tok_coord(lbrace_tok))
1602 block_items = self._parse_block_item_list()
1603 self._expect("RBRACE")
1604 return c_ast.Compound(
1605 block_items=block_items, coord=self._tok_coord(lbrace_tok)
1606 )
1608 # BNF: labeled_statement : ID ':' statement
1609 # | CASE constant_expression ':' statement
1610 # | DEFAULT ':' statement
1611 def _parse_labeled_statement(self) -> c_ast.Node:
1612 tok_type = self._peek_type()
1613 match tok_type:
1614 case "ID":
1615 name_tok = self._advance()
1616 self._expect("COLON")
1617 if self._starts_statement():
1618 stmt = self._parse_pragmacomp_or_statement()
1619 else:
1620 stmt = c_ast.EmptyStatement(self._tok_coord(name_tok))
1621 return c_ast.Label(name_tok.value, stmt, self._tok_coord(name_tok))
1622 case "CASE":
1623 case_tok = self._advance()
1624 expr = self._parse_constant_expression()
1625 self._expect("COLON")
1626 if self._starts_statement():
1627 stmt = self._parse_pragmacomp_or_statement()
1628 else:
1629 stmt = c_ast.EmptyStatement(self._tok_coord(case_tok))
1630 return c_ast.Case(expr, [stmt], self._tok_coord(case_tok))
1631 case "DEFAULT":
1632 def_tok = self._advance()
1633 self._expect("COLON")
1634 if self._starts_statement():
1635 stmt = self._parse_pragmacomp_or_statement()
1636 else:
1637 stmt = c_ast.EmptyStatement(self._tok_coord(def_tok))
1638 return c_ast.Default([stmt], self._tok_coord(def_tok))
1639 case _:
1640 self._parse_error("Invalid labeled statement", self.clex.filename)
1642 # BNF: selection_statement : IF '(' expression ')' statement (ELSE statement)?
1643 # | SWITCH '(' expression ')' statement
1644 def _parse_selection_statement(self) -> c_ast.Node:
1645 tok = self._advance()
1646 match tok.type:
1647 case "IF":
1648 self._expect("LPAREN")
1649 cond = self._parse_expression()
1650 self._expect("RPAREN")
1651 then_stmt = self._parse_pragmacomp_or_statement()
1652 if self._accept("ELSE"):
1653 else_stmt = self._parse_pragmacomp_or_statement()
1654 return c_ast.If(cond, then_stmt, else_stmt, self._tok_coord(tok))
1655 return c_ast.If(cond, then_stmt, None, self._tok_coord(tok))
1656 case "SWITCH":
1657 self._expect("LPAREN")
1658 expr = self._parse_expression()
1659 self._expect("RPAREN")
1660 stmt = self._parse_pragmacomp_or_statement()
1661 return fix_switch_cases(c_ast.Switch(expr, stmt, self._tok_coord(tok)))
1662 case _:
1663 self._parse_error("Invalid selection statement", self._tok_coord(tok))
1665 # BNF: iteration_statement : WHILE '(' expression ')' statement
1666 # | DO statement WHILE '(' expression ')' ';'
1667 # | FOR '(' (declaration | expression_opt) ';'
1668 # expression_opt ';' expression_opt ')' statement
1669 def _parse_iteration_statement(self) -> c_ast.Node:
1670 tok = self._advance()
1671 match tok.type:
1672 case "WHILE":
1673 self._expect("LPAREN")
1674 cond = self._parse_expression()
1675 self._expect("RPAREN")
1676 stmt = self._parse_pragmacomp_or_statement()
1677 return c_ast.While(cond, stmt, self._tok_coord(tok))
1678 case "DO":
1679 stmt = self._parse_pragmacomp_or_statement()
1680 self._expect("WHILE")
1681 self._expect("LPAREN")
1682 cond = self._parse_expression()
1683 self._expect("RPAREN")
1684 self._expect("SEMI")
1685 return c_ast.DoWhile(cond, stmt, self._tok_coord(tok))
1686 case "FOR":
1687 self._expect("LPAREN")
1688 if self._starts_declaration():
1689 decls = self._parse_declaration()
1690 init = c_ast.DeclList(decls, self._tok_coord(tok))
1691 cond = self._parse_expression_opt()
1692 self._expect("SEMI")
1693 next_expr = self._parse_expression_opt()
1694 self._expect("RPAREN")
1695 stmt = self._parse_pragmacomp_or_statement()
1696 return c_ast.For(init, cond, next_expr, stmt, self._tok_coord(tok))
1698 init = self._parse_expression_opt()
1699 self._expect("SEMI")
1700 cond = self._parse_expression_opt()
1701 self._expect("SEMI")
1702 next_expr = self._parse_expression_opt()
1703 self._expect("RPAREN")
1704 stmt = self._parse_pragmacomp_or_statement()
1705 return c_ast.For(init, cond, next_expr, stmt, self._tok_coord(tok))
1706 case _:
1707 self._parse_error("Invalid iteration statement", self._tok_coord(tok))
1709 # BNF: jump_statement : GOTO ID ';' | BREAK ';' | CONTINUE ';'
1710 # | RETURN expression? ';'
1711 def _parse_jump_statement(self) -> c_ast.Node:
1712 tok = self._advance()
1713 match tok.type:
1714 case "GOTO":
1715 name_tok = self._expect("ID")
1716 self._expect("SEMI")
1717 return c_ast.Goto(name_tok.value, self._tok_coord(tok))
1718 case "BREAK":
1719 self._expect("SEMI")
1720 return c_ast.Break(self._tok_coord(tok))
1721 case "CONTINUE":
1722 self._expect("SEMI")
1723 return c_ast.Continue(self._tok_coord(tok))
1724 case "RETURN":
1725 if self._accept("SEMI"):
1726 return c_ast.Return(None, self._tok_coord(tok))
1727 expr = self._parse_expression()
1728 self._expect("SEMI")
1729 return c_ast.Return(expr, self._tok_coord(tok))
1730 case _:
1731 self._parse_error("Invalid jump statement", self._tok_coord(tok))
1733 # BNF: expression_statement : expression_opt ';'
1734 def _parse_expression_statement(self) -> c_ast.Node:
1735 expr = self._parse_expression_opt()
1736 semi_tok = self._expect("SEMI")
1737 if expr is None:
1738 return c_ast.EmptyStatement(self._tok_coord(semi_tok))
1739 return expr
1741 # ------------------------------------------------------------------
1742 # Expressions
1743 # ------------------------------------------------------------------
1744 # BNF: expression_opt : expression | empty
1745 def _parse_expression_opt(self) -> Optional[c_ast.Node]:
1746 if self._starts_expression():
1747 return self._parse_expression()
1748 return None
1750 # BNF: expression : assignment_expression (',' assignment_expression)*
1751 def _parse_expression(self) -> c_ast.Node:
1752 expr = self._parse_assignment_expression()
1753 if not self._accept("COMMA"):
1754 return expr
1755 exprs = [expr, self._parse_assignment_expression()]
1756 while self._accept("COMMA"):
1757 exprs.append(self._parse_assignment_expression())
1758 return c_ast.ExprList(exprs, expr.coord)
1760 # BNF: assignment_expression : conditional_expression
1761 # | unary_expression assignment_op assignment_expression
1762 def _parse_assignment_expression(self) -> c_ast.Node:
1763 if self._peek_type() == "LPAREN" and self._peek_type(2) == "LBRACE":
1764 self._advance()
1765 comp = self._parse_compound_statement()
1766 self._expect("RPAREN")
1767 return comp
1769 expr = self._parse_conditional_expression()
1770 if self._is_assignment_op():
1771 op = self._advance().value
1772 rhs = self._parse_assignment_expression()
1773 return c_ast.Assignment(op, expr, rhs, expr.coord)
1774 return expr
1776 # BNF: conditional_expression : binary_expression
1777 # | binary_expression '?' expression ':' conditional_expression
1778 def _parse_conditional_expression(self) -> c_ast.Node:
1779 expr = self._parse_binary_expression()
1780 if self._accept("CONDOP"):
1781 iftrue = self._parse_expression()
1782 self._expect("COLON")
1783 iffalse = self._parse_conditional_expression()
1784 return c_ast.TernaryOp(expr, iftrue, iffalse, expr.coord)
1785 return expr
1787 # BNF: binary_expression : cast_expression (binary_op cast_expression)*
1788 def _parse_binary_expression(
1789 self, min_prec: int = 0, lhs: Optional[c_ast.Node] = None
1790 ) -> c_ast.Node:
1791 if lhs is None:
1792 lhs = self._parse_cast_expression()
1794 while True:
1795 tok = self._peek()
1796 if tok is None or tok.type not in _BINARY_PRECEDENCE:
1797 break
1798 prec = _BINARY_PRECEDENCE[tok.type]
1799 if prec < min_prec:
1800 break
1802 op = tok.value
1803 self._advance()
1804 rhs = self._parse_cast_expression()
1806 while True:
1807 next_tok = self._peek()
1808 if next_tok is None or next_tok.type not in _BINARY_PRECEDENCE:
1809 break
1810 next_prec = _BINARY_PRECEDENCE[next_tok.type]
1811 if next_prec > prec:
1812 rhs = self._parse_binary_expression(next_prec, rhs)
1813 else:
1814 break
1816 lhs = c_ast.BinaryOp(op, lhs, rhs, lhs.coord)
1818 return lhs
1820 # BNF: cast_expression : '(' type_name ')' cast_expression
1821 # | unary_expression
1822 def _parse_cast_expression(self) -> c_ast.Node:
1823 result = self._try_parse_paren_type_name()
1824 if result is not None:
1825 typ, mark, lparen_tok = result
1826 if self._peek_type() == "LBRACE":
1827 # (type){...} is a compound literal, not a cast. Examples:
1828 # (int){1} -> compound literal, handled in postfix
1829 # (int) x -> cast, handled below
1830 self._reset(mark)
1831 else:
1832 expr = self._parse_cast_expression()
1833 return c_ast.Cast(typ, expr, self._tok_coord(lparen_tok))
1834 return self._parse_unary_expression()
1836 # BNF: unary_expression : postfix_expression
1837 # | '++' unary_expression
1838 # | '--' unary_expression
1839 # | unary_op cast_expression
1840 # | 'sizeof' unary_expression
1841 # | 'sizeof' '(' type_name ')'
1842 # | '_Alignof' '(' type_name ')'
1843 def _parse_unary_expression(self) -> c_ast.Node:
1844 tok_type = self._peek_type()
1845 if tok_type in {"PLUSPLUS", "MINUSMINUS"}:
1846 tok = self._advance()
1847 expr = self._parse_unary_expression()
1848 return c_ast.UnaryOp(tok.value, expr, expr.coord)
1850 if tok_type in {"AND", "TIMES", "PLUS", "MINUS", "NOT", "LNOT"}:
1851 tok = self._advance()
1852 expr = self._parse_cast_expression()
1853 return c_ast.UnaryOp(tok.value, expr, expr.coord)
1855 if tok_type == "SIZEOF":
1856 tok = self._advance()
1857 result = self._try_parse_paren_type_name()
1858 if result is not None:
1859 typ, _, _ = result
1860 return c_ast.UnaryOp(tok.value, typ, self._tok_coord(tok))
1861 expr = self._parse_unary_expression()
1862 return c_ast.UnaryOp(tok.value, expr, self._tok_coord(tok))
1864 if tok_type == "_ALIGNOF":
1865 tok = self._advance()
1866 self._expect("LPAREN")
1867 typ = self._parse_type_name()
1868 self._expect("RPAREN")
1869 return c_ast.UnaryOp(tok.value, typ, self._tok_coord(tok))
1871 return self._parse_postfix_expression()
1873 # BNF: postfix_expression : primary_expression postfix_suffix*
1874 # | '(' type_name ')' '{' initializer_list ','? '}'
1875 def _parse_postfix_expression(self) -> c_ast.Node:
1876 result = self._try_parse_paren_type_name()
1877 if result is not None:
1878 typ, mark, _ = result
1879 # Disambiguate between casts and compound literals:
1880 # (int) x -> cast
1881 # (int) {1} -> compound literal
1882 if self._accept("LBRACE"):
1883 init = self._parse_initializer_list()
1884 self._accept("COMMA")
1885 self._expect("RBRACE")
1886 return c_ast.CompoundLiteral(typ, init)
1887 else:
1888 self._reset(mark)
1890 expr = self._parse_primary_expression()
1891 while True:
1892 if self._accept("LBRACKET"):
1893 sub = self._parse_expression()
1894 self._expect("RBRACKET")
1895 expr = c_ast.ArrayRef(expr, sub, expr.coord)
1896 continue
1897 if self._accept("LPAREN"):
1898 if self._peek_type() == "RPAREN":
1899 self._advance()
1900 args = None
1901 else:
1902 args = self._parse_argument_expression_list()
1903 self._expect("RPAREN")
1904 expr = c_ast.FuncCall(expr, args, expr.coord)
1905 continue
1906 if self._peek_type() in {"PERIOD", "ARROW"}:
1907 op_tok = self._advance()
1908 name_tok = self._advance()
1909 if name_tok.type not in {"ID", "TYPEID"}:
1910 self._parse_error(
1911 "Invalid struct reference", self._tok_coord(name_tok)
1912 )
1913 field = c_ast.ID(name_tok.value, self._tok_coord(name_tok))
1914 expr = c_ast.StructRef(expr, op_tok.value, field, expr.coord)
1915 continue
1916 if self._peek_type() in {"PLUSPLUS", "MINUSMINUS"}:
1917 tok = self._advance()
1918 expr = c_ast.UnaryOp("p" + tok.value, expr, expr.coord)
1919 continue
1920 break
1921 return expr
1923 # BNF: primary_expression : ID | constant | string_literal
1924 # | '(' expression ')' | offsetof
1925 def _parse_primary_expression(self) -> c_ast.Node:
1926 tok_type = self._peek_type()
1927 if tok_type == "ID":
1928 return self._parse_identifier()
1929 if (
1930 tok_type in _INT_CONST
1931 or tok_type in _FLOAT_CONST
1932 or tok_type in _CHAR_CONST
1933 ):
1934 return self._parse_constant()
1935 if tok_type in _STRING_LITERAL:
1936 return self._parse_unified_string_literal()
1937 if tok_type in _WSTR_LITERAL:
1938 return self._parse_unified_wstring_literal()
1939 if tok_type == "LPAREN":
1940 self._advance()
1941 expr = self._parse_expression()
1942 self._expect("RPAREN")
1943 return expr
1944 if tok_type == "OFFSETOF":
1945 off_tok = self._advance()
1946 self._expect("LPAREN")
1947 typ = self._parse_type_name()
1948 self._expect("COMMA")
1949 designator = self._parse_offsetof_member_designator()
1950 self._expect("RPAREN")
1951 coord = self._tok_coord(off_tok)
1952 return c_ast.FuncCall(
1953 c_ast.ID(off_tok.value, coord),
1954 c_ast.ExprList([typ, designator], coord),
1955 coord,
1956 )
1958 self._parse_error("Invalid expression", self.clex.filename)
1960 # BNF: offsetof_member_designator : identifier_or_typeid
1961 # ('.' identifier_or_typeid | '[' expression ']')*
1962 def _parse_offsetof_member_designator(self) -> c_ast.Node:
1963 node = self._parse_identifier_or_typeid()
1964 while True:
1965 if self._accept("PERIOD"):
1966 field = self._parse_identifier_or_typeid()
1967 node = c_ast.StructRef(node, ".", field, node.coord)
1968 continue
1969 if self._accept("LBRACKET"):
1970 expr = self._parse_expression()
1971 self._expect("RBRACKET")
1972 node = c_ast.ArrayRef(node, expr, node.coord)
1973 continue
1974 break
1975 return node
1977 # BNF: argument_expression_list : assignment_expression (',' assignment_expression)*
1978 def _parse_argument_expression_list(self) -> c_ast.Node:
1979 expr = self._parse_assignment_expression()
1980 exprs = [expr]
1981 while self._accept("COMMA"):
1982 exprs.append(self._parse_assignment_expression())
1983 return c_ast.ExprList(exprs, expr.coord)
1985 # BNF: constant_expression : conditional_expression
1986 def _parse_constant_expression(self) -> c_ast.Node:
1987 return self._parse_conditional_expression()
1989 # ------------------------------------------------------------------
1990 # Terminals
1991 # ------------------------------------------------------------------
1992 # BNF: identifier : ID
1993 def _parse_identifier(self) -> c_ast.Node:
1994 tok = self._expect("ID")
1995 return c_ast.ID(tok.value, self._tok_coord(tok))
1997 # BNF: identifier_or_typeid : ID | TYPEID
1998 def _parse_identifier_or_typeid(self) -> c_ast.Node:
1999 tok = self._advance()
2000 if tok.type not in {"ID", "TYPEID"}:
2001 self._parse_error("Expected identifier", self._tok_coord(tok))
2002 return c_ast.ID(tok.value, self._tok_coord(tok))
2004 # BNF: constant : INT_CONST | FLOAT_CONST | CHAR_CONST
2005 def _parse_constant(self) -> c_ast.Node:
2006 tok = self._advance()
2007 if tok.type in _INT_CONST:
2008 u_count = 0
2009 l_count = 0
2010 for ch in tok.value[-3:]:
2011 if ch in ("l", "L"):
2012 l_count += 1
2013 elif ch in ("u", "U"):
2014 u_count += 1
2015 if u_count > 1:
2016 raise ValueError("Constant cannot have more than one u/U suffix.")
2017 if l_count > 2:
2018 raise ValueError("Constant cannot have more than two l/L suffix.")
2019 prefix = "unsigned " * u_count + "long " * l_count
2020 return c_ast.Constant(prefix + "int", tok.value, self._tok_coord(tok))
2022 if tok.type in _FLOAT_CONST:
2023 if tok.value[-1] in ("f", "F"):
2024 t = "float"
2025 elif tok.value[-1] in ("l", "L"):
2026 t = "long double"
2027 else:
2028 t = "double"
2029 return c_ast.Constant(t, tok.value, self._tok_coord(tok))
2031 if tok.type in _CHAR_CONST:
2032 return c_ast.Constant("char", tok.value, self._tok_coord(tok))
2034 self._parse_error("Invalid constant", self._tok_coord(tok))
2036 # BNF: unified_string_literal : STRING_LITERAL+
2037 def _parse_unified_string_literal(self) -> c_ast.Node:
2038 tok = self._expect("STRING_LITERAL")
2039 node = c_ast.Constant("string", tok.value, self._tok_coord(tok))
2040 while self._peek_type() == "STRING_LITERAL":
2041 tok2 = self._advance()
2042 node.value = node.value[:-1] + tok2.value[1:]
2043 return node
2045 # BNF: unified_wstring_literal : WSTRING_LITERAL+
2046 def _parse_unified_wstring_literal(self) -> c_ast.Node:
2047 tok = self._advance()
2048 if tok.type not in _WSTR_LITERAL:
2049 self._parse_error("Invalid string literal", self._tok_coord(tok))
2050 node = c_ast.Constant("string", tok.value, self._tok_coord(tok))
2051 while self._peek_type() in _WSTR_LITERAL:
2052 tok2 = self._advance()
2053 node.value = node.value.rstrip()[:-1] + tok2.value[2:]
2054 return node
2056 # ------------------------------------------------------------------
2057 # Initializers
2058 # ------------------------------------------------------------------
2059 # BNF: initializer : assignment_expression
2060 # | '{' initializer_list ','? '}'
2061 # | '{' '}'
2062 def _parse_initializer(self) -> c_ast.Node:
2063 lbrace_tok = self._accept("LBRACE")
2064 if lbrace_tok:
2065 if self._accept("RBRACE"):
2066 return c_ast.InitList([], self._tok_coord(lbrace_tok))
2067 init_list = self._parse_initializer_list()
2068 self._accept("COMMA")
2069 self._expect("RBRACE")
2070 return init_list
2072 return self._parse_assignment_expression()
2074 # BNF: initializer_list : initializer_item (',' initializer_item)* ','?
2075 def _parse_initializer_list(self) -> c_ast.Node:
2076 items = [self._parse_initializer_item()]
2077 while self._accept("COMMA"):
2078 if self._peek_type() == "RBRACE":
2079 break
2080 items.append(self._parse_initializer_item())
2081 return c_ast.InitList(items, items[0].coord)
2083 # BNF: initializer_item : designation? initializer
2084 def _parse_initializer_item(self) -> c_ast.Node:
2085 designation = None
2086 if self._peek_type() in {"LBRACKET", "PERIOD"}:
2087 designation = self._parse_designation()
2088 init = self._parse_initializer()
2089 if designation is not None:
2090 return c_ast.NamedInitializer(designation, init)
2091 return init
2093 # BNF: designation : designator_list '='
2094 def _parse_designation(self) -> List[c_ast.Node]:
2095 designators = self._parse_designator_list()
2096 self._expect("EQUALS")
2097 return designators
2099 # BNF: designator_list : designator+
2100 def _parse_designator_list(self) -> List[c_ast.Node]:
2101 designators = []
2102 while self._peek_type() in {"LBRACKET", "PERIOD"}:
2103 designators.append(self._parse_designator())
2104 return designators
2106 # BNF: designator : '[' constant_expression ']'
2107 # | '.' identifier_or_typeid
2108 def _parse_designator(self) -> c_ast.Node:
2109 if self._accept("LBRACKET"):
2110 expr = self._parse_constant_expression()
2111 self._expect("RBRACKET")
2112 return expr
2113 if self._accept("PERIOD"):
2114 return self._parse_identifier_or_typeid()
2115 self._parse_error("Invalid designator", self.clex.filename)
2117 # ------------------------------------------------------------------
2118 # Preprocessor-like directives
2119 # ------------------------------------------------------------------
2120 # BNF: pp_directive : '#' ... (unsupported)
2121 def _parse_pp_directive(self) -> NoReturn:
2122 tok = self._expect("PPHASH")
2123 self._parse_error("Directives not supported yet", self._tok_coord(tok))
2125 # BNF: pppragma_directive : PPPRAGMA PPPRAGMASTR?
2126 # | _PRAGMA '(' string_literal ')'
2127 def _parse_pppragma_directive(self) -> c_ast.Node:
2128 if self._peek_type() == "PPPRAGMA":
2129 tok = self._advance()
2130 if self._peek_type() == "PPPRAGMASTR":
2131 str_tok = self._advance()
2132 return c_ast.Pragma(str_tok.value, self._tok_coord(str_tok))
2133 return c_ast.Pragma("", self._tok_coord(tok))
2135 if self._peek_type() == "_PRAGMA":
2136 tok = self._advance()
2137 lparen = self._expect("LPAREN")
2138 literal = self._parse_unified_string_literal()
2139 self._expect("RPAREN")
2140 return c_ast.Pragma(literal, self._tok_coord(lparen))
2142 self._parse_error("Invalid pragma", self.clex.filename)
2144 # BNF: pppragma_directive_list : pppragma_directive+
2145 def _parse_pppragma_directive_list(self) -> List[c_ast.Node]:
2146 pragmas = []
2147 while self._peek_type() in {"PPPRAGMA", "_PRAGMA"}:
2148 pragmas.append(self._parse_pppragma_directive())
2149 return pragmas
2151 # BNF: static_assert : _STATIC_ASSERT '(' constant_expression (',' string_literal)? ')'
2152 def _parse_static_assert(self) -> List[c_ast.Node]:
2153 tok = self._expect("_STATIC_ASSERT")
2154 self._expect("LPAREN")
2155 cond = self._parse_constant_expression()
2156 msg = None
2157 if self._accept("COMMA"):
2158 msg = self._parse_unified_string_literal()
2159 self._expect("RPAREN")
2160 return [c_ast.StaticAssert(cond, msg, self._tok_coord(tok))]
2163_ASSIGNMENT_OPS = {
2164 "EQUALS",
2165 "XOREQUAL",
2166 "TIMESEQUAL",
2167 "DIVEQUAL",
2168 "MODEQUAL",
2169 "PLUSEQUAL",
2170 "MINUSEQUAL",
2171 "LSHIFTEQUAL",
2172 "RSHIFTEQUAL",
2173 "ANDEQUAL",
2174 "OREQUAL",
2175}
2177# Precedence of operators (lower number = weather binding)
2178# If this changes, c_generator.CGenerator.precedence_map needs to change as
2179# well
2180_BINARY_PRECEDENCE = {
2181 "LOR": 0,
2182 "LAND": 1,
2183 "OR": 2,
2184 "XOR": 3,
2185 "AND": 4,
2186 "EQ": 5,
2187 "NE": 5,
2188 "GT": 6,
2189 "GE": 6,
2190 "LT": 6,
2191 "LE": 6,
2192 "RSHIFT": 7,
2193 "LSHIFT": 7,
2194 "PLUS": 8,
2195 "MINUS": 8,
2196 "TIMES": 9,
2197 "DIVIDE": 9,
2198 "MOD": 9,
2199}
2201_STORAGE_CLASS = {"AUTO", "REGISTER", "STATIC", "EXTERN", "TYPEDEF", "_THREAD_LOCAL"}
2203_FUNCTION_SPEC = {"INLINE", "_NORETURN"}
2205_TYPE_QUALIFIER = {"CONST", "RESTRICT", "VOLATILE", "_ATOMIC"}
2207_TYPE_SPEC_SIMPLE = {
2208 "VOID",
2209 "_BOOL",
2210 "CHAR",
2211 "SHORT",
2212 "INT",
2213 "LONG",
2214 "FLOAT",
2215 "DOUBLE",
2216 "_COMPLEX",
2217 "SIGNED",
2218 "UNSIGNED",
2219 "__INT128",
2220}
2222_DECL_START = (
2223 _STORAGE_CLASS
2224 | _FUNCTION_SPEC
2225 | _TYPE_QUALIFIER
2226 | _TYPE_SPEC_SIMPLE
2227 | {"TYPEID", "STRUCT", "UNION", "ENUM", "_ALIGNAS", "_ATOMIC"}
2228)
2230_EXPR_START = {
2231 "ID",
2232 "LPAREN",
2233 "PLUSPLUS",
2234 "MINUSMINUS",
2235 "PLUS",
2236 "MINUS",
2237 "TIMES",
2238 "AND",
2239 "NOT",
2240 "LNOT",
2241 "SIZEOF",
2242 "_ALIGNOF",
2243 "OFFSETOF",
2244}
2246_INT_CONST = {
2247 "INT_CONST_DEC",
2248 "INT_CONST_OCT",
2249 "INT_CONST_HEX",
2250 "INT_CONST_BIN",
2251 "INT_CONST_CHAR",
2252}
2254_FLOAT_CONST = {"FLOAT_CONST", "HEX_FLOAT_CONST"}
2256_CHAR_CONST = {
2257 "CHAR_CONST",
2258 "WCHAR_CONST",
2259 "U8CHAR_CONST",
2260 "U16CHAR_CONST",
2261 "U32CHAR_CONST",
2262}
2264_STRING_LITERAL = {"STRING_LITERAL"}
2266_WSTR_LITERAL = {
2267 "WSTRING_LITERAL",
2268 "U8STRING_LITERAL",
2269 "U16STRING_LITERAL",
2270 "U32STRING_LITERAL",
2271}
2273_STARTS_EXPRESSION = (
2274 _EXPR_START
2275 | _INT_CONST
2276 | _FLOAT_CONST
2277 | _CHAR_CONST
2278 | _STRING_LITERAL
2279 | _WSTR_LITERAL
2280)
2282_STARTS_STATEMENT = {
2283 "LBRACE",
2284 "IF",
2285 "SWITCH",
2286 "WHILE",
2287 "DO",
2288 "FOR",
2289 "GOTO",
2290 "BREAK",
2291 "CONTINUE",
2292 "RETURN",
2293 "CASE",
2294 "DEFAULT",
2295 "PPPRAGMA",
2296 "_PRAGMA",
2297 "_STATIC_ASSERT",
2298 "SEMI",
2299}
2302class _TokenStream:
2303 """Wraps a lexer to provide convenient, buffered access to the underlying
2304 token stream. The lexer is expected to be initialized with the input
2305 string already.
2306 """
2308 def __init__(self, lexer: CLexer) -> None:
2309 self._lexer = lexer
2310 self._buffer: List[Optional[Token]] = []
2311 self._index = 0
2313 def peek(self, k: int = 1) -> Optional[Token]:
2314 """Peek at the k-th next token in the stream, without consuming it.
2316 Examples:
2317 k=1 returns the immediate next token.
2318 k=2 returns the token after that.
2319 """
2320 if k <= 0:
2321 return None
2322 self._fill(k)
2323 return self._buffer[self._index + k - 1]
2325 def next(self) -> Optional[Token]:
2326 """Consume a single token and return it."""
2327 self._fill(1)
2328 tok = self._buffer[self._index]
2329 self._index += 1
2330 return tok
2332 # The 'mark' and 'reset' methods are useful for speculative parsing with
2333 # backtracking; when the parser needs to examine a sequence of tokens
2334 # and potentially decide to try a different path on the same sequence, it
2335 # can call 'mark' to obtain the current token position, and if the first
2336 # path fails restore the position with `reset(pos)`.
2337 def mark(self) -> int:
2338 return self._index
2340 def reset(self, mark: int) -> None:
2341 self._index = mark
2343 def _fill(self, n: int) -> None:
2344 while len(self._buffer) < self._index + n:
2345 tok = self._lexer.token()
2346 self._buffer.append(tok)
2347 if tok is None:
2348 break
2351# Declaration specifiers are represented by a dictionary with entries:
2352# - qual: a list of type qualifiers
2353# - storage: a list of storage class specifiers
2354# - type: a list of type specifiers
2355# - function: a list of function specifiers
2356# - alignment: a list of alignment specifiers
2357class _DeclSpec(TypedDict):
2358 qual: List[Any]
2359 storage: List[Any]
2360 type: List[Any]
2361 function: List[Any]
2362 alignment: List[Any]
2365_DeclSpecKind = Literal["qual", "storage", "type", "function", "alignment"]
2368class _DeclInfo(TypedDict):
2369 # Declarator payloads used by declaration/initializer parsing:
2370 # - decl: the declarator node (may be None for abstract/implicit cases)
2371 # - init: optional initializer expression
2372 # - bitsize: optional bit-field width expression (for struct declarators)
2373 decl: Optional[c_ast.Node]
2374 init: Optional[c_ast.Node]
2375 bitsize: Optional[c_ast.Node]