Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pycparser/c_parser.py: 42%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1310 statements  

1# ------------------------------------------------------------------------------ 

2# pycparser: c_parser.py 

3# 

4# Recursive-descent parser for the C language. 

5# 

6# Eli Bendersky [https://eli.thegreenplace.net/] 

7# License: BSD 

8# ------------------------------------------------------------------------------ 

9from dataclasses import dataclass 

10from typing import ( 

11 Any, 

12 Dict, 

13 List, 

14 Literal, 

15 NoReturn, 

16 Optional, 

17 Tuple, 

18 TypedDict, 

19 cast, 

20) 

21 

22from . import c_ast 

23from .c_lexer import CLexer, Token 

24from .ast_transforms import fix_switch_cases, fix_atomic_specifiers 

25 

26 

27@dataclass 

28class Coord: 

29 """Coordinates of a syntactic element. Consists of: 

30 - File name 

31 - Line number 

32 - Column number 

33 """ 

34 

35 file: str 

36 line: int 

37 column: Optional[int] = None 

38 

39 def __str__(self) -> str: 

40 text = f"{self.file}:{self.line}" 

41 if self.column is not None: 

42 text += f":{self.column}" 

43 return text 

44 

45 

46class ParseError(Exception): 

47 pass 

48 

49 

50class CParser: 

51 """Recursive-descent C parser. 

52 

53 Usage: 

54 parser = CParser() 

55 ast = parser.parse(text, filename) 

56 

57 The `lexer` parameter lets you inject a lexer class (defaults to CLexer). 

58 The parameters after `lexer` are accepted for backward compatibility with 

59 the old PLY-based parser and are otherwise unused. 

60 """ 

61 

62 def __init__( 

63 self, 

64 lex_optimize: bool = True, 

65 lexer: type[CLexer] = CLexer, 

66 lextab: str = "pycparser.lextab", 

67 yacc_optimize: bool = True, 

68 yacctab: str = "pycparser.yacctab", 

69 yacc_debug: bool = False, 

70 taboutputdir: str = "", 

71 ) -> None: 

72 self.clex: CLexer = lexer( 

73 error_func=self._lex_error_func, 

74 on_lbrace_func=self._lex_on_lbrace_func, 

75 on_rbrace_func=self._lex_on_rbrace_func, 

76 type_lookup_func=self._lex_type_lookup_func, 

77 ) 

78 

79 # Stack of scopes for keeping track of symbols. _scope_stack[-1] is 

80 # the current (topmost) scope. Each scope is a dictionary that 

81 # specifies whether a name is a type. If _scope_stack[n][name] is 

82 # True, 'name' is currently a type in the scope. If it's False, 

83 # 'name' is defined in the scope but not as a type (for instance, if we 

84 # saw: int name;) 

85 # If 'name' is not a key in _scope_stack[n] then 'name' was not defined 

86 # in this scope at all. 

87 self._scope_stack: List[Dict[str, bool]] = [dict()] 

88 self._tokens: _TokenStream = _TokenStream(self.clex) 

89 

90 def parse( 

91 self, text: str, filename: str = "", debug: bool = False 

92 ) -> c_ast.FileAST: 

93 """Parses C code and returns an AST. 

94 

95 text: 

96 A string containing the C source code 

97 

98 filename: 

99 Name of the file being parsed (for meaningful error messages) 

100 

101 debug: 

102 Deprecated debug flag (unused); for backwards compatibility. 

103 """ 

104 self._scope_stack = [dict()] 

105 self.clex.input(text, filename) 

106 self._tokens = _TokenStream(self.clex) 

107 

108 ast = self._parse_translation_unit_or_empty() 

109 tok = self._peek() 

110 if tok is not None: 

111 self._parse_error(f"before: {tok.value}", self._tok_coord(tok)) 

112 return ast 

113 

114 # ------------------------------------------------------------------ 

115 # Scope and declaration helpers 

116 # ------------------------------------------------------------------ 

117 def _coord(self, lineno: int, column: Optional[int] = None) -> Coord: 

118 return Coord(file=self.clex.filename, line=lineno, column=column) 

119 

120 def _parse_error(self, msg: str, coord: Coord | str | None) -> NoReturn: 

121 raise ParseError(f"{coord}: {msg}") 

122 

123 def _push_scope(self) -> None: 

124 self._scope_stack.append(dict()) 

125 

126 def _pop_scope(self) -> None: 

127 assert len(self._scope_stack) > 1 

128 self._scope_stack.pop() 

129 

130 def _add_typedef_name(self, name: str, coord: Optional[Coord]) -> None: 

131 """Add a new typedef name (ie a TYPEID) to the current scope""" 

132 if not self._scope_stack[-1].get(name, True): 

133 self._parse_error( 

134 f"Typedef {name!r} previously declared as non-typedef in this scope", 

135 coord, 

136 ) 

137 self._scope_stack[-1][name] = True 

138 

139 def _add_identifier(self, name: str, coord: Optional[Coord]) -> None: 

140 """Add a new object, function, or enum member name (ie an ID) to the 

141 current scope 

142 """ 

143 if self._scope_stack[-1].get(name, False): 

144 self._parse_error( 

145 f"Non-typedef {name!r} previously declared as typedef in this scope", 

146 coord, 

147 ) 

148 self._scope_stack[-1][name] = False 

149 

150 def _is_type_in_scope(self, name: str) -> bool: 

151 """Is *name* a typedef-name in the current scope?""" 

152 for scope in reversed(self._scope_stack): 

153 # If name is an identifier in this scope it shadows typedefs in 

154 # higher scopes. 

155 if name in scope: 

156 return scope[name] 

157 return False 

158 

159 def _lex_error_func(self, msg: str, line: int, column: int) -> None: 

160 self._parse_error(msg, self._coord(line, column)) 

161 

162 def _lex_on_lbrace_func(self) -> None: 

163 self._push_scope() 

164 

165 def _lex_on_rbrace_func(self) -> None: 

166 self._pop_scope() 

167 

168 def _lex_type_lookup_func(self, name: str) -> bool: 

169 """Looks up types that were previously defined with typedef. 

170 

171 Passed to the lexer for recognizing identifiers that are types. 

172 """ 

173 return self._is_type_in_scope(name) 

174 

175 # To understand what's going on here, read sections A.8.5 and 

176 # A.8.6 of K&R2 very carefully. 

177 # 

178 # A C type consists of a basic type declaration, with a list 

179 # of modifiers. For example: 

180 # 

181 # int *c[5]; 

182 # 

183 # The basic declaration here is 'int c', and the pointer and 

184 # the array are the modifiers. 

185 # 

186 # Basic declarations are represented by TypeDecl (from module c_ast) and the 

187 # modifiers are FuncDecl, PtrDecl and ArrayDecl. 

188 # 

189 # The standard states that whenever a new modifier is parsed, it should be 

190 # added to the end of the list of modifiers. For example: 

191 # 

192 # K&R2 A.8.6.2: Array Declarators 

193 # 

194 # In a declaration T D where D has the form 

195 # D1 [constant-expression-opt] 

196 # and the type of the identifier in the declaration T D1 is 

197 # "type-modifier T", the type of the 

198 # identifier of D is "type-modifier array of T" 

199 # 

200 # This is what this method does. The declarator it receives 

201 # can be a list of declarators ending with TypeDecl. It 

202 # tacks the modifier to the end of this list, just before 

203 # the TypeDecl. 

204 # 

205 # Additionally, the modifier may be a list itself. This is 

206 # useful for pointers, that can come as a chain from the rule 

207 # p_pointer. In this case, the whole modifier list is spliced 

208 # into the new location. 

209 def _type_modify_decl(self, decl: Any, modifier: Any) -> c_ast.Node: 

210 """Tacks a type modifier on a declarator, and returns 

211 the modified declarator. 

212 

213 Note: the declarator and modifier may be modified 

214 """ 

215 modifier_head = modifier 

216 modifier_tail = modifier 

217 

218 # The modifier may be a nested list. Reach its tail. 

219 while modifier_tail.type: 

220 modifier_tail = modifier_tail.type 

221 

222 # If the decl is a basic type, just tack the modifier onto it. 

223 if isinstance(decl, c_ast.TypeDecl): 

224 modifier_tail.type = decl 

225 return modifier 

226 else: 

227 # Otherwise, the decl is a list of modifiers. Reach 

228 # its tail and splice the modifier onto the tail, 

229 # pointing to the underlying basic type. 

230 decl_tail = decl 

231 while not isinstance(decl_tail.type, c_ast.TypeDecl): 

232 decl_tail = decl_tail.type 

233 

234 modifier_tail.type = decl_tail.type 

235 decl_tail.type = modifier_head 

236 return decl 

237 

238 # Due to the order in which declarators are constructed, 

239 # they have to be fixed in order to look like a normal AST. 

240 # 

241 # When a declaration arrives from syntax construction, it has 

242 # these problems: 

243 # * The innermost TypeDecl has no type (because the basic 

244 # type is only known at the uppermost declaration level) 

245 # * The declaration has no variable name, since that is saved 

246 # in the innermost TypeDecl 

247 # * The typename of the declaration is a list of type 

248 # specifiers, and not a node. Here, basic identifier types 

249 # should be separated from more complex types like enums 

250 # and structs. 

251 # 

252 # This method fixes these problems. 

253 def _fix_decl_name_type( 

254 self, 

255 decl: c_ast.Decl | c_ast.Typedef | c_ast.Typename, 

256 typename: List[Any], 

257 ) -> c_ast.Decl | c_ast.Typedef | c_ast.Typename: 

258 """Fixes a declaration. Modifies decl.""" 

259 # Reach the underlying basic type 

260 typ = decl 

261 while not isinstance(typ, c_ast.TypeDecl): 

262 typ = typ.type 

263 

264 decl.name = typ.declname 

265 typ.quals = decl.quals[:] 

266 

267 # The typename is a list of types. If any type in this 

268 # list isn't an IdentifierType, it must be the only 

269 # type in the list (it's illegal to declare "int enum ..") 

270 # If all the types are basic, they're collected in the 

271 # IdentifierType holder. 

272 for tn in typename: 

273 if not isinstance(tn, c_ast.IdentifierType): 

274 if len(typename) > 1: 

275 self._parse_error("Invalid multiple types specified", tn.coord) 

276 else: 

277 typ.type = tn 

278 return decl 

279 

280 if not typename: 

281 # Functions default to returning int 

282 if not isinstance(decl.type, c_ast.FuncDecl): 

283 self._parse_error("Missing type in declaration", decl.coord) 

284 typ.type = c_ast.IdentifierType(["int"], coord=decl.coord) 

285 else: 

286 # At this point, we know that typename is a list of IdentifierType 

287 # nodes. Concatenate all the names into a single list. 

288 typ.type = c_ast.IdentifierType( 

289 [name for id in typename for name in id.names], coord=typename[0].coord 

290 ) 

291 return decl 

292 

293 def _add_declaration_specifier( 

294 self, 

295 declspec: Optional["_DeclSpec"], 

296 newspec: Any, 

297 kind: "_DeclSpecKind", 

298 append: bool = False, 

299 ) -> "_DeclSpec": 

300 """See _DeclSpec for the specifier dictionary layout.""" 

301 if declspec is None: 

302 spec: _DeclSpec = dict( 

303 qual=[], storage=[], type=[], function=[], alignment=[] 

304 ) 

305 else: 

306 spec = declspec 

307 

308 if append: 

309 spec[kind].append(newspec) 

310 else: 

311 spec[kind].insert(0, newspec) 

312 

313 return spec 

314 

315 def _build_declarations( 

316 self, 

317 spec: "_DeclSpec", 

318 decls: List["_DeclInfo"], 

319 typedef_namespace: bool = False, 

320 ) -> List[c_ast.Node]: 

321 """Builds a list of declarations all sharing the given specifiers. 

322 If typedef_namespace is true, each declared name is added 

323 to the "typedef namespace", which also includes objects, 

324 functions, and enum constants. 

325 """ 

326 is_typedef = "typedef" in spec["storage"] 

327 declarations = [] 

328 

329 # Bit-fields are allowed to be unnamed. 

330 if decls[0].get("bitsize") is None: 

331 # When redeclaring typedef names as identifiers in inner scopes, a 

332 # problem can occur where the identifier gets grouped into 

333 # spec['type'], leaving decl as None. This can only occur for the 

334 # first declarator. 

335 if decls[0]["decl"] is None: 

336 if ( 

337 len(spec["type"]) < 2 

338 or len(spec["type"][-1].names) != 1 

339 or not self._is_type_in_scope(spec["type"][-1].names[0]) 

340 ): 

341 coord = "?" 

342 for t in spec["type"]: 

343 if hasattr(t, "coord"): 

344 coord = t.coord 

345 break 

346 self._parse_error("Invalid declaration", coord) 

347 

348 # Make this look as if it came from "direct_declarator:ID" 

349 decls[0]["decl"] = c_ast.TypeDecl( 

350 declname=spec["type"][-1].names[0], 

351 type=None, 

352 quals=None, 

353 align=spec["alignment"], 

354 coord=spec["type"][-1].coord, 

355 ) 

356 # Remove the "new" type's name from the end of spec['type'] 

357 del spec["type"][-1] 

358 # A similar problem can occur where the declaration ends up 

359 # looking like an abstract declarator. Give it a name if this is 

360 # the case. 

361 elif not isinstance( 

362 decls[0]["decl"], 

363 (c_ast.Enum, c_ast.Struct, c_ast.Union, c_ast.IdentifierType), 

364 ): 

365 decls_0_tail = cast(Any, decls[0]["decl"]) 

366 while not isinstance(decls_0_tail, c_ast.TypeDecl): 

367 decls_0_tail = decls_0_tail.type 

368 if decls_0_tail.declname is None: 

369 decls_0_tail.declname = spec["type"][-1].names[0] 

370 del spec["type"][-1] 

371 

372 for decl in decls: 

373 assert decl["decl"] is not None 

374 if is_typedef: 

375 declaration = c_ast.Typedef( 

376 name=None, 

377 quals=spec["qual"], 

378 storage=spec["storage"], 

379 type=decl["decl"], 

380 coord=decl["decl"].coord, 

381 ) 

382 else: 

383 declaration = c_ast.Decl( 

384 name=None, 

385 quals=spec["qual"], 

386 align=spec["alignment"], 

387 storage=spec["storage"], 

388 funcspec=spec["function"], 

389 type=decl["decl"], 

390 init=decl.get("init"), 

391 bitsize=decl.get("bitsize"), 

392 coord=decl["decl"].coord, 

393 ) 

394 

395 if isinstance( 

396 declaration.type, 

397 (c_ast.Enum, c_ast.Struct, c_ast.Union, c_ast.IdentifierType), 

398 ): 

399 fixed_decl = declaration 

400 else: 

401 fixed_decl = self._fix_decl_name_type(declaration, spec["type"]) 

402 

403 # Add the type name defined by typedef to a 

404 # symbol table (for usage in the lexer) 

405 if typedef_namespace: 

406 if is_typedef: 

407 self._add_typedef_name(fixed_decl.name, fixed_decl.coord) 

408 else: 

409 self._add_identifier(fixed_decl.name, fixed_decl.coord) 

410 

411 fixed_decl = fix_atomic_specifiers( 

412 cast(c_ast.Decl | c_ast.Typedef, fixed_decl) 

413 ) 

414 declarations.append(fixed_decl) 

415 

416 return declarations 

417 

418 def _build_function_definition( 

419 self, 

420 spec: "_DeclSpec", 

421 decl: c_ast.Node, 

422 param_decls: Optional[List[c_ast.Node]], 

423 body: c_ast.Node, 

424 ) -> c_ast.Node: 

425 """Builds a function definition.""" 

426 if "typedef" in spec["storage"]: 

427 self._parse_error("Invalid typedef", decl.coord) 

428 

429 declaration = self._build_declarations( 

430 spec=spec, 

431 decls=[dict(decl=decl, init=None, bitsize=None)], 

432 typedef_namespace=True, 

433 )[0] 

434 

435 return c_ast.FuncDef( 

436 decl=declaration, param_decls=param_decls, body=body, coord=decl.coord 

437 ) 

438 

439 def _select_struct_union_class(self, token: str) -> type: 

440 """Given a token (either STRUCT or UNION), selects the 

441 appropriate AST class. 

442 """ 

443 if token == "struct": 

444 return c_ast.Struct 

445 else: 

446 return c_ast.Union 

447 

448 # ------------------------------------------------------------------ 

449 # Token helpers 

450 # ------------------------------------------------------------------ 

451 def _peek(self, k: int = 1) -> Optional[Token]: 

452 """Return the k-th next token without consuming it (1-based).""" 

453 return self._tokens.peek(k) 

454 

455 def _peek_type(self, k: int = 1) -> Optional[str]: 

456 """Return the type of the k-th next token, or None if absent (1-based).""" 

457 tok = self._peek(k) 

458 return tok.type if tok is not None else None 

459 

460 def _advance(self) -> Token: 

461 tok = self._tokens.next() 

462 if tok is None: 

463 self._parse_error("At end of input", self.clex.filename) 

464 else: 

465 return tok 

466 

467 def _accept(self, token_type: str) -> Optional[Token]: 

468 """Conditionally consume next token, only if it's of token_type. 

469 

470 If it is of the expected type, consume and return it. 

471 Otherwise, leaves the token intact and returns None. 

472 """ 

473 tok = self._peek() 

474 if tok is not None and tok.type == token_type: 

475 return self._advance() 

476 return None 

477 

478 def _expect(self, token_type: str) -> Token: 

479 tok = self._advance() 

480 if tok.type != token_type: 

481 self._parse_error(f"before: {tok.value}", self._tok_coord(tok)) 

482 return tok 

483 

484 def _mark(self) -> int: 

485 return self._tokens.mark() 

486 

487 def _reset(self, mark: int) -> None: 

488 self._tokens.reset(mark) 

489 

490 def _tok_coord(self, tok: Token) -> Coord: 

491 return self._coord(tok.lineno, tok.column) 

492 

493 def _starts_declaration(self, tok: Optional[Token] = None) -> bool: 

494 tok = tok or self._peek() 

495 if tok is None: 

496 return False 

497 return tok.type in _DECL_START 

498 

499 def _starts_expression(self, tok: Optional[Token] = None) -> bool: 

500 tok = tok or self._peek() 

501 if tok is None: 

502 return False 

503 return tok.type in _STARTS_EXPRESSION 

504 

505 def _starts_statement(self) -> bool: 

506 tok_type = self._peek_type() 

507 if tok_type is None: 

508 return False 

509 if tok_type in _STARTS_STATEMENT: 

510 return True 

511 return self._starts_expression() 

512 

513 def _starts_declarator(self, id_only: bool = False) -> bool: 

514 tok_type = self._peek_type() 

515 if tok_type is None: 

516 return False 

517 if tok_type in {"TIMES", "LPAREN"}: 

518 return True 

519 if id_only: 

520 return tok_type == "ID" 

521 return tok_type in {"ID", "TYPEID"} 

522 

523 def _peek_declarator_name_info(self) -> Tuple[Optional[str], bool]: 

524 mark = self._mark() 

525 tok_type, saw_paren = self._scan_declarator_name_info() 

526 self._reset(mark) 

527 return tok_type, saw_paren 

528 

529 def _parse_any_declarator( 

530 self, allow_abstract: bool = False, typeid_paren_as_abstract: bool = False 

531 ) -> Tuple[Optional[c_ast.Node], bool]: 

532 # C declarators are ambiguous without lookahead. For example: 

533 # int foo(int (aa)); -> aa is a name (ID) 

534 # typedef char TT; 

535 # int bar(int (TT)); -> TT is a type (TYPEID) in parens 

536 name_type, saw_paren = self._peek_declarator_name_info() 

537 if name_type is None or ( 

538 typeid_paren_as_abstract and name_type == "TYPEID" and saw_paren 

539 ): 

540 if not allow_abstract: 

541 tok = self._peek() 

542 coord = self._tok_coord(tok) if tok is not None else self.clex.filename 

543 self._parse_error("Invalid declarator", coord) 

544 decl = self._parse_abstract_declarator_opt() 

545 return decl, False 

546 

547 if name_type == "TYPEID": 

548 if typeid_paren_as_abstract: 

549 decl = self._parse_typeid_noparen_declarator() 

550 else: 

551 decl = self._parse_typeid_declarator() 

552 else: 

553 decl = self._parse_id_declarator() 

554 return decl, True 

555 

556 def _scan_declarator_name_info(self) -> Tuple[Optional[str], bool]: 

557 saw_paren = False 

558 while self._accept("TIMES"): 

559 while self._peek_type() in _TYPE_QUALIFIER: 

560 self._advance() 

561 

562 tok = self._peek() 

563 if tok is None: 

564 return None, saw_paren 

565 if tok.type in {"ID", "TYPEID"}: 

566 self._advance() 

567 return tok.type, saw_paren 

568 if tok.type == "LPAREN": 

569 saw_paren = True 

570 self._advance() 

571 tok_type, nested_paren = self._scan_declarator_name_info() 

572 if nested_paren: 

573 saw_paren = True 

574 depth = 1 

575 while True: 

576 tok = self._peek() 

577 if tok is None: 

578 return None, saw_paren 

579 if tok.type == "LPAREN": 

580 depth += 1 

581 elif tok.type == "RPAREN": 

582 depth -= 1 

583 self._advance() 

584 if depth == 0: 

585 break 

586 continue 

587 self._advance() 

588 return tok_type, saw_paren 

589 return None, saw_paren 

590 

591 def _starts_direct_abstract_declarator(self) -> bool: 

592 return self._peek_type() in {"LPAREN", "LBRACKET"} 

593 

594 def _is_assignment_op(self) -> bool: 

595 tok = self._peek() 

596 return tok is not None and tok.type in _ASSIGNMENT_OPS 

597 

598 def _try_parse_paren_type_name( 

599 self, 

600 ) -> Optional[Tuple[c_ast.Typename, int, Token]]: 

601 """Parse and return a parenthesized type name if present. 

602 

603 Returns (typ, mark, lparen_tok) when the next tokens look like 

604 '(' type_name ')', where typ is the parsed type name, mark is the 

605 token-stream position before parsing, and lparen_tok is the LPAREN 

606 token. Returns None if no parenthesized type name is present. 

607 """ 

608 mark = self._mark() 

609 lparen_tok = self._accept("LPAREN") 

610 if lparen_tok is None: 

611 return None 

612 if not self._starts_declaration(): 

613 self._reset(mark) 

614 return None 

615 typ = self._parse_type_name() 

616 if self._accept("RPAREN") is None: 

617 self._reset(mark) 

618 return None 

619 return typ, mark, lparen_tok 

620 

621 # ------------------------------------------------------------------ 

622 # Top-level 

623 # ------------------------------------------------------------------ 

624 # BNF: translation_unit_or_empty : translation_unit | empty 

625 def _parse_translation_unit_or_empty(self) -> c_ast.FileAST: 

626 if self._peek() is None: 

627 return c_ast.FileAST([]) 

628 return c_ast.FileAST(self._parse_translation_unit()) 

629 

630 # BNF: translation_unit : external_declaration+ 

631 def _parse_translation_unit(self) -> List[c_ast.Node]: 

632 ext = [] 

633 while self._peek() is not None: 

634 ext.extend(self._parse_external_declaration()) 

635 return ext 

636 

637 # BNF: external_declaration : function_definition 

638 # | declaration 

639 # | pp_directive 

640 # | pppragma_directive 

641 # | static_assert 

642 # | ';' 

643 def _parse_external_declaration(self) -> List[c_ast.Node]: 

644 tok = self._peek() 

645 if tok is None: 

646 return [] 

647 if tok.type == "PPHASH": 

648 self._parse_pp_directive() 

649 return [] 

650 if tok.type in {"PPPRAGMA", "_PRAGMA"}: 

651 return [self._parse_pppragma_directive()] 

652 if self._accept("SEMI"): 

653 return [] 

654 if tok.type == "_STATIC_ASSERT": 

655 return self._parse_static_assert() 

656 

657 if not self._starts_declaration(tok): 

658 # Special handling for old-style function definitions that have an 

659 # implicit return type, e.g. 

660 # 

661 # foo() { 

662 # return 5; 

663 # } 

664 # 

665 # These get an implicit 'int' return type. 

666 decl = self._parse_id_declarator() 

667 param_decls = None 

668 if self._peek_type() != "LBRACE": 

669 self._parse_error("Invalid function definition", decl.coord) 

670 spec: _DeclSpec = dict( 

671 qual=[], 

672 alignment=[], 

673 storage=[], 

674 type=[c_ast.IdentifierType(["int"], coord=decl.coord)], 

675 function=[], 

676 ) 

677 func = self._build_function_definition( 

678 spec=spec, 

679 decl=decl, 

680 param_decls=param_decls, 

681 body=self._parse_compound_statement(), 

682 ) 

683 return [func] 

684 

685 # From here on, parsing a standard declatation/definition. 

686 spec, saw_type, spec_coord = self._parse_declaration_specifiers( 

687 allow_no_type=True 

688 ) 

689 

690 name_type, _ = self._peek_declarator_name_info() 

691 if name_type != "ID": 

692 decls = self._parse_decl_body_with_spec(spec, saw_type) 

693 self._expect("SEMI") 

694 return decls 

695 

696 decl = self._parse_id_declarator() 

697 

698 if self._peek_type() == "LBRACE" or self._starts_declaration(): 

699 param_decls = None 

700 if self._starts_declaration(): 

701 param_decls = self._parse_declaration_list() 

702 if self._peek_type() != "LBRACE": 

703 self._parse_error("Invalid function definition", decl.coord) 

704 if not spec["type"]: 

705 spec["type"] = [c_ast.IdentifierType(["int"], coord=spec_coord)] 

706 func = self._build_function_definition( 

707 spec=spec, 

708 decl=decl, 

709 param_decls=param_decls, 

710 body=self._parse_compound_statement(), 

711 ) 

712 return [func] 

713 

714 decl_dict: "_DeclInfo" = dict(decl=decl, init=None, bitsize=None) 

715 if self._accept("EQUALS"): 

716 decl_dict["init"] = self._parse_initializer() 

717 decls = self._parse_init_declarator_list(first=decl_dict) 

718 decls = self._build_declarations(spec=spec, decls=decls, typedef_namespace=True) 

719 self._expect("SEMI") 

720 return decls 

721 

722 # ------------------------------------------------------------------ 

723 # Declarations 

724 # 

725 # Declarations always come as lists (because they can be several in one 

726 # line). When returning parsed declarations, a list is always returned - 

727 # even if it contains a single element. 

728 # ------------------------------------------------------------------ 

729 def _parse_declaration(self) -> List[c_ast.Node]: 

730 decls = self._parse_decl_body() 

731 self._expect("SEMI") 

732 return decls 

733 

734 # BNF: decl_body : declaration_specifiers decl_body_with_spec 

735 def _parse_decl_body(self) -> List[c_ast.Node]: 

736 spec, saw_type, _ = self._parse_declaration_specifiers(allow_no_type=True) 

737 return self._parse_decl_body_with_spec(spec, saw_type) 

738 

739 # BNF: decl_body_with_spec : init_declarator_list 

740 # | struct_or_union_or_enum_only 

741 def _parse_decl_body_with_spec( 

742 self, spec: "_DeclSpec", saw_type: bool 

743 ) -> List[c_ast.Node]: 

744 # saw_type is True if the specifiers included an actual type (as 

745 # opposed to only storage/function/qualifiers). 

746 decls = None 

747 if saw_type: 

748 if self._starts_declarator(): 

749 decls = self._parse_init_declarator_list() 

750 else: 

751 if self._starts_declarator(id_only=True): 

752 decls = self._parse_init_declarator_list(id_only=True) 

753 

754 if decls is None: 

755 ty = spec["type"] 

756 s_u_or_e = (c_ast.Struct, c_ast.Union, c_ast.Enum) 

757 if len(ty) == 1 and isinstance(ty[0], s_u_or_e): 

758 decls = [ 

759 c_ast.Decl( 

760 name=None, 

761 quals=spec["qual"], 

762 align=spec["alignment"], 

763 storage=spec["storage"], 

764 funcspec=spec["function"], 

765 type=ty[0], 

766 init=None, 

767 bitsize=None, 

768 coord=ty[0].coord, 

769 ) 

770 ] 

771 else: 

772 decls = self._build_declarations( 

773 spec=spec, 

774 decls=[dict(decl=None, init=None, bitsize=None)], 

775 typedef_namespace=True, 

776 ) 

777 else: 

778 decls = self._build_declarations( 

779 spec=spec, decls=decls, typedef_namespace=True 

780 ) 

781 

782 return decls 

783 

784 # BNF: declaration_list : declaration+ 

785 def _parse_declaration_list(self) -> List[c_ast.Node]: 

786 decls = [] 

787 while self._starts_declaration(): 

788 decls.extend(self._parse_declaration()) 

789 return decls 

790 

791 # BNF: declaration_specifiers : (storage_class_specifier 

792 # | type_specifier 

793 # | type_qualifier 

794 # | function_specifier 

795 # | alignment_specifier)+ 

796 def _parse_declaration_specifiers( 

797 self, allow_no_type: bool = False 

798 ) -> Tuple["_DeclSpec", bool, Optional[Coord]]: 

799 """Parse declaration-specifier sequence. 

800 

801 allow_no_type: 

802 If True, allow a missing type specifier without error. 

803 

804 Returns: 

805 (spec, saw_type, first_coord) where spec is a dict with 

806 qual/storage/type/function/alignment entries, saw_type is True 

807 if a type specifier was consumed, and first_coord is the coord 

808 of the first specifier token (used for diagnostics). 

809 """ 

810 spec = None 

811 saw_type = False 

812 first_coord = None 

813 

814 while True: 

815 tok = self._peek() 

816 if tok is None: 

817 break 

818 

819 if tok.type == "_ALIGNAS": 

820 if first_coord is None: 

821 first_coord = self._tok_coord(tok) 

822 spec = self._add_declaration_specifier( 

823 spec, self._parse_alignment_specifier(), "alignment", append=True 

824 ) 

825 continue 

826 

827 if tok.type == "_ATOMIC" and self._peek_type(2) == "LPAREN": 

828 if first_coord is None: 

829 first_coord = self._tok_coord(tok) 

830 spec = self._add_declaration_specifier( 

831 spec, self._parse_atomic_specifier(), "type", append=True 

832 ) 

833 saw_type = True 

834 continue 

835 

836 if tok.type in _TYPE_QUALIFIER: 

837 if first_coord is None: 

838 first_coord = self._tok_coord(tok) 

839 spec = self._add_declaration_specifier( 

840 spec, self._advance().value, "qual", append=True 

841 ) 

842 continue 

843 

844 if tok.type in _STORAGE_CLASS: 

845 if first_coord is None: 

846 first_coord = self._tok_coord(tok) 

847 spec = self._add_declaration_specifier( 

848 spec, self._advance().value, "storage", append=True 

849 ) 

850 continue 

851 

852 if tok.type in _FUNCTION_SPEC: 

853 if first_coord is None: 

854 first_coord = self._tok_coord(tok) 

855 spec = self._add_declaration_specifier( 

856 spec, self._advance().value, "function", append=True 

857 ) 

858 continue 

859 

860 if tok.type in _TYPE_SPEC_SIMPLE: 

861 if first_coord is None: 

862 first_coord = self._tok_coord(tok) 

863 tok = self._advance() 

864 spec = self._add_declaration_specifier( 

865 spec, 

866 c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)), 

867 "type", 

868 append=True, 

869 ) 

870 saw_type = True 

871 continue 

872 

873 if tok.type == "TYPEID": 

874 if saw_type: 

875 break 

876 if first_coord is None: 

877 first_coord = self._tok_coord(tok) 

878 tok = self._advance() 

879 spec = self._add_declaration_specifier( 

880 spec, 

881 c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)), 

882 "type", 

883 append=True, 

884 ) 

885 saw_type = True 

886 continue 

887 

888 if tok.type in {"STRUCT", "UNION"}: 

889 if first_coord is None: 

890 first_coord = self._tok_coord(tok) 

891 spec = self._add_declaration_specifier( 

892 spec, self._parse_struct_or_union_specifier(), "type", append=True 

893 ) 

894 saw_type = True 

895 continue 

896 

897 if tok.type == "ENUM": 

898 if first_coord is None: 

899 first_coord = self._tok_coord(tok) 

900 spec = self._add_declaration_specifier( 

901 spec, self._parse_enum_specifier(), "type", append=True 

902 ) 

903 saw_type = True 

904 continue 

905 

906 break 

907 

908 if spec is None: 

909 self._parse_error("Invalid declaration", self.clex.filename) 

910 

911 if not saw_type and not allow_no_type: 

912 self._parse_error("Missing type in declaration", first_coord) 

913 

914 return spec, saw_type, first_coord 

915 

916 # BNF: specifier_qualifier_list : (type_specifier 

917 # | type_qualifier 

918 # | alignment_specifier)+ 

919 def _parse_specifier_qualifier_list(self) -> "_DeclSpec": 

920 spec = None 

921 saw_type = False 

922 saw_alignment = False 

923 first_coord = None 

924 

925 while True: 

926 tok = self._peek() 

927 if tok is None: 

928 break 

929 

930 if tok.type == "_ALIGNAS": 

931 if first_coord is None: 

932 first_coord = self._tok_coord(tok) 

933 spec = self._add_declaration_specifier( 

934 spec, self._parse_alignment_specifier(), "alignment", append=True 

935 ) 

936 saw_alignment = True 

937 continue 

938 

939 if tok.type == "_ATOMIC" and self._peek_type(2) == "LPAREN": 

940 if first_coord is None: 

941 first_coord = self._tok_coord(tok) 

942 spec = self._add_declaration_specifier( 

943 spec, self._parse_atomic_specifier(), "type", append=True 

944 ) 

945 saw_type = True 

946 continue 

947 

948 if tok.type in _TYPE_QUALIFIER: 

949 if first_coord is None: 

950 first_coord = self._tok_coord(tok) 

951 spec = self._add_declaration_specifier( 

952 spec, self._advance().value, "qual", append=True 

953 ) 

954 continue 

955 

956 if tok.type in _TYPE_SPEC_SIMPLE: 

957 if first_coord is None: 

958 first_coord = self._tok_coord(tok) 

959 tok = self._advance() 

960 spec = self._add_declaration_specifier( 

961 spec, 

962 c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)), 

963 "type", 

964 append=True, 

965 ) 

966 saw_type = True 

967 continue 

968 

969 if tok.type == "TYPEID": 

970 if saw_type: 

971 break 

972 if first_coord is None: 

973 first_coord = self._tok_coord(tok) 

974 tok = self._advance() 

975 spec = self._add_declaration_specifier( 

976 spec, 

977 c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)), 

978 "type", 

979 append=True, 

980 ) 

981 saw_type = True 

982 continue 

983 

984 if tok.type in {"STRUCT", "UNION"}: 

985 if first_coord is None: 

986 first_coord = self._tok_coord(tok) 

987 spec = self._add_declaration_specifier( 

988 spec, self._parse_struct_or_union_specifier(), "type", append=True 

989 ) 

990 saw_type = True 

991 continue 

992 

993 if tok.type == "ENUM": 

994 if first_coord is None: 

995 first_coord = self._tok_coord(tok) 

996 spec = self._add_declaration_specifier( 

997 spec, self._parse_enum_specifier(), "type", append=True 

998 ) 

999 saw_type = True 

1000 continue 

1001 

1002 break 

1003 

1004 if spec is None: 

1005 self._parse_error("Invalid specifier list", self.clex.filename) 

1006 

1007 if not saw_type and not saw_alignment: 

1008 self._parse_error("Missing type in declaration", first_coord) 

1009 

1010 if spec.get("storage") is None: 

1011 spec["storage"] = [] 

1012 if spec.get("function") is None: 

1013 spec["function"] = [] 

1014 

1015 return spec 

1016 

1017 # BNF: type_qualifier_list : type_qualifier+ 

1018 def _parse_type_qualifier_list(self) -> List[str]: 

1019 quals = [] 

1020 while self._peek_type() in _TYPE_QUALIFIER: 

1021 quals.append(self._advance().value) 

1022 return quals 

1023 

1024 # BNF: alignment_specifier : _ALIGNAS '(' type_name | constant_expression ')' 

1025 def _parse_alignment_specifier(self) -> c_ast.Node: 

1026 tok = self._expect("_ALIGNAS") 

1027 self._expect("LPAREN") 

1028 

1029 if self._starts_declaration(): 

1030 typ = self._parse_type_name() 

1031 self._expect("RPAREN") 

1032 return c_ast.Alignas(typ, self._tok_coord(tok)) 

1033 

1034 expr = self._parse_constant_expression() 

1035 self._expect("RPAREN") 

1036 return c_ast.Alignas(expr, self._tok_coord(tok)) 

1037 

1038 # BNF: atomic_specifier : _ATOMIC '(' type_name ')' 

1039 def _parse_atomic_specifier(self) -> c_ast.Node: 

1040 self._expect("_ATOMIC") 

1041 self._expect("LPAREN") 

1042 typ = self._parse_type_name() 

1043 self._expect("RPAREN") 

1044 typ.quals.append("_Atomic") 

1045 return typ 

1046 

1047 # BNF: init_declarator_list : init_declarator (',' init_declarator)* 

1048 def _parse_init_declarator_list( 

1049 self, first: Optional["_DeclInfo"] = None, id_only: bool = False 

1050 ) -> List["_DeclInfo"]: 

1051 decls = ( 

1052 [first] 

1053 if first is not None 

1054 else [self._parse_init_declarator(id_only=id_only)] 

1055 ) 

1056 

1057 while self._accept("COMMA"): 

1058 decls.append(self._parse_init_declarator(id_only=id_only)) 

1059 return decls 

1060 

1061 # BNF: init_declarator : declarator ('=' initializer)? 

1062 def _parse_init_declarator(self, id_only: bool = False) -> "_DeclInfo": 

1063 decl = self._parse_id_declarator() if id_only else self._parse_declarator() 

1064 init = None 

1065 if self._accept("EQUALS"): 

1066 init = self._parse_initializer() 

1067 return dict(decl=decl, init=init, bitsize=None) 

1068 

1069 # ------------------------------------------------------------------ 

1070 # Structs/unions/enums 

1071 # ------------------------------------------------------------------ 

1072 # BNF: struct_or_union_specifier : struct_or_union ID? '{' struct_declaration_list? '}' 

1073 # | struct_or_union ID 

1074 def _parse_struct_or_union_specifier(self) -> c_ast.Node: 

1075 tok = self._advance() 

1076 klass = self._select_struct_union_class(tok.value) 

1077 

1078 if self._peek_type() in {"ID", "TYPEID"}: 

1079 name_tok = self._advance() 

1080 if self._peek_type() == "LBRACE": 

1081 self._advance() 

1082 if self._accept("RBRACE"): 

1083 return klass( 

1084 name=name_tok.value, decls=[], coord=self._tok_coord(name_tok) 

1085 ) 

1086 decls = self._parse_struct_declaration_list() 

1087 self._expect("RBRACE") 

1088 return klass( 

1089 name=name_tok.value, decls=decls, coord=self._tok_coord(name_tok) 

1090 ) 

1091 

1092 return klass( 

1093 name=name_tok.value, decls=None, coord=self._tok_coord(name_tok) 

1094 ) 

1095 

1096 if self._peek_type() == "LBRACE": 

1097 brace_tok = self._advance() 

1098 if self._accept("RBRACE"): 

1099 return klass(name=None, decls=[], coord=self._tok_coord(brace_tok)) 

1100 decls = self._parse_struct_declaration_list() 

1101 self._expect("RBRACE") 

1102 return klass(name=None, decls=decls, coord=self._tok_coord(brace_tok)) 

1103 

1104 self._parse_error("Invalid struct/union declaration", self._tok_coord(tok)) 

1105 

1106 # BNF: struct_declaration_list : struct_declaration+ 

1107 def _parse_struct_declaration_list(self) -> List[c_ast.Node]: 

1108 decls = [] 

1109 while self._peek_type() not in {None, "RBRACE"}: 

1110 items = self._parse_struct_declaration() 

1111 if items is None: 

1112 continue 

1113 decls.extend(items) 

1114 return decls 

1115 

1116 # BNF: struct_declaration : specifier_qualifier_list struct_declarator_list? ';' 

1117 # | static_assert 

1118 # | pppragma_directive 

1119 def _parse_struct_declaration(self) -> Optional[List[c_ast.Node]]: 

1120 if self._peek_type() == "SEMI": 

1121 self._advance() 

1122 return None 

1123 if self._peek_type() in {"PPPRAGMA", "_PRAGMA"}: 

1124 return [self._parse_pppragma_directive()] 

1125 

1126 spec = self._parse_specifier_qualifier_list() 

1127 assert "typedef" not in spec.get("storage", []) 

1128 

1129 decls = None 

1130 if self._starts_declarator() or self._peek_type() == "COLON": 

1131 decls = self._parse_struct_declarator_list() 

1132 if decls is not None: 

1133 self._expect("SEMI") 

1134 return self._build_declarations(spec=spec, decls=decls) 

1135 

1136 if len(spec["type"]) == 1: 

1137 node = spec["type"][0] 

1138 if isinstance(node, c_ast.Node): 

1139 decl_type = node 

1140 else: 

1141 decl_type = c_ast.IdentifierType(node) 

1142 self._expect("SEMI") 

1143 return self._build_declarations( 

1144 spec=spec, decls=[dict(decl=decl_type, init=None, bitsize=None)] 

1145 ) 

1146 

1147 self._expect("SEMI") 

1148 return self._build_declarations( 

1149 spec=spec, decls=[dict(decl=None, init=None, bitsize=None)] 

1150 ) 

1151 

1152 # BNF: struct_declarator_list : struct_declarator (',' struct_declarator)* 

1153 def _parse_struct_declarator_list(self) -> List["_DeclInfo"]: 

1154 decls = [self._parse_struct_declarator()] 

1155 while self._accept("COMMA"): 

1156 decls.append(self._parse_struct_declarator()) 

1157 return decls 

1158 

1159 # BNF: struct_declarator : declarator? ':' constant_expression 

1160 # | declarator (':' constant_expression)? 

1161 def _parse_struct_declarator(self) -> "_DeclInfo": 

1162 if self._accept("COLON"): 

1163 bitsize = self._parse_constant_expression() 

1164 return { 

1165 "decl": c_ast.TypeDecl(None, None, None, None), 

1166 "init": None, 

1167 "bitsize": bitsize, 

1168 } 

1169 

1170 decl = self._parse_declarator() 

1171 if self._accept("COLON"): 

1172 bitsize = self._parse_constant_expression() 

1173 return {"decl": decl, "init": None, "bitsize": bitsize} 

1174 

1175 return {"decl": decl, "init": None, "bitsize": None} 

1176 

1177 # BNF: enum_specifier : ENUM ID? '{' enumerator_list? '}' 

1178 # | ENUM ID 

1179 def _parse_enum_specifier(self) -> c_ast.Node: 

1180 tok = self._expect("ENUM") 

1181 if self._peek_type() in {"ID", "TYPEID"}: 

1182 name_tok = self._advance() 

1183 if self._peek_type() == "LBRACE": 

1184 self._advance() 

1185 enums = self._parse_enumerator_list() 

1186 self._expect("RBRACE") 

1187 return c_ast.Enum(name_tok.value, enums, self._tok_coord(tok)) 

1188 return c_ast.Enum(name_tok.value, None, self._tok_coord(tok)) 

1189 

1190 self._expect("LBRACE") 

1191 enums = self._parse_enumerator_list() 

1192 self._expect("RBRACE") 

1193 return c_ast.Enum(None, enums, self._tok_coord(tok)) 

1194 

1195 # BNF: enumerator_list : enumerator (',' enumerator)* ','? 

1196 def _parse_enumerator_list(self) -> c_ast.Node: 

1197 enum = self._parse_enumerator() 

1198 enum_list = c_ast.EnumeratorList([enum], enum.coord) 

1199 while self._accept("COMMA"): 

1200 if self._peek_type() == "RBRACE": 

1201 break 

1202 enum = self._parse_enumerator() 

1203 enum_list.enumerators.append(enum) 

1204 return enum_list 

1205 

1206 # BNF: enumerator : ID ('=' constant_expression)? 

1207 def _parse_enumerator(self) -> c_ast.Node: 

1208 name_tok = self._expect("ID") 

1209 if self._accept("EQUALS"): 

1210 value = self._parse_constant_expression() 

1211 else: 

1212 value = None 

1213 enum = c_ast.Enumerator(name_tok.value, value, self._tok_coord(name_tok)) 

1214 self._add_identifier(enum.name, enum.coord) 

1215 return enum 

1216 

1217 # ------------------------------------------------------------------ 

1218 # Declarators 

1219 # ------------------------------------------------------------------ 

1220 # BNF: declarator : pointer? direct_declarator 

1221 def _parse_declarator(self) -> c_ast.Node: 

1222 decl, _ = self._parse_any_declarator( 

1223 allow_abstract=False, typeid_paren_as_abstract=False 

1224 ) 

1225 assert decl is not None 

1226 return decl 

1227 

1228 # BNF: id_declarator : declarator with ID name 

1229 def _parse_id_declarator(self) -> c_ast.Node: 

1230 return self._parse_declarator_kind(kind="id", allow_paren=True) 

1231 

1232 # BNF: typeid_declarator : declarator with TYPEID name 

1233 def _parse_typeid_declarator(self) -> c_ast.Node: 

1234 return self._parse_declarator_kind(kind="typeid", allow_paren=True) 

1235 

1236 # BNF: typeid_noparen_declarator : declarator without parenthesized name 

1237 def _parse_typeid_noparen_declarator(self) -> c_ast.Node: 

1238 return self._parse_declarator_kind(kind="typeid", allow_paren=False) 

1239 

1240 # BNF: declarator_kind : pointer? direct_declarator(kind) 

1241 def _parse_declarator_kind(self, kind: str, allow_paren: bool) -> c_ast.Node: 

1242 ptr = None 

1243 if self._peek_type() == "TIMES": 

1244 ptr = self._parse_pointer() 

1245 direct = self._parse_direct_declarator(kind, allow_paren=allow_paren) 

1246 if ptr is not None: 

1247 return self._type_modify_decl(direct, ptr) 

1248 return direct 

1249 

1250 # BNF: direct_declarator : ID | TYPEID | '(' declarator ')' 

1251 # | direct_declarator '[' ... ']' 

1252 # | direct_declarator '(' ... ')' 

1253 def _parse_direct_declarator( 

1254 self, kind: str, allow_paren: bool = True 

1255 ) -> c_ast.Node: 

1256 if allow_paren and self._accept("LPAREN"): 

1257 decl = self._parse_declarator_kind(kind, allow_paren=True) 

1258 self._expect("RPAREN") 

1259 else: 

1260 if kind == "id": 

1261 name_tok = self._expect("ID") 

1262 else: 

1263 name_tok = self._expect("TYPEID") 

1264 decl = c_ast.TypeDecl( 

1265 declname=name_tok.value, 

1266 type=None, 

1267 quals=None, 

1268 align=None, 

1269 coord=self._tok_coord(name_tok), 

1270 ) 

1271 

1272 return self._parse_decl_suffixes(decl) 

1273 

1274 def _parse_decl_suffixes(self, decl: c_ast.Node) -> c_ast.Node: 

1275 """Parse a chain of array/function suffixes and attach them to decl.""" 

1276 while True: 

1277 if self._peek_type() == "LBRACKET": 

1278 decl = self._type_modify_decl(decl, self._parse_array_decl(decl)) 

1279 continue 

1280 if self._peek_type() == "LPAREN": 

1281 func = self._parse_function_decl(decl) 

1282 decl = self._type_modify_decl(decl, func) 

1283 continue 

1284 break 

1285 return decl 

1286 

1287 # BNF: array_decl : '[' array_specifiers? assignment_expression? ']' 

1288 def _parse_array_decl(self, base_decl: c_ast.Node) -> c_ast.Node: 

1289 return self._parse_array_decl_common(base_type=None, coord=base_decl.coord) 

1290 

1291 def _parse_array_decl_common( 

1292 self, base_type: Optional[c_ast.Node], coord: Optional[Coord] = None 

1293 ) -> c_ast.Node: 

1294 """Parse an array declarator suffix and return an ArrayDecl node. 

1295 

1296 base_type: 

1297 Base declarator node to attach (None for direct-declarator parsing, 

1298 TypeDecl for abstract declarators). 

1299 

1300 coord: 

1301 Coordinate to use for the ArrayDecl. If None, uses the '[' token. 

1302 """ 

1303 lbrack_tok = self._expect("LBRACKET") 

1304 if coord is None: 

1305 coord = self._tok_coord(lbrack_tok) 

1306 

1307 def make_array_decl(dim, dim_quals): 

1308 return c_ast.ArrayDecl( 

1309 type=base_type, dim=dim, dim_quals=dim_quals, coord=coord 

1310 ) 

1311 

1312 if self._accept("STATIC"): 

1313 dim_quals = ["static"] + (self._parse_type_qualifier_list() or []) 

1314 dim = self._parse_assignment_expression() 

1315 self._expect("RBRACKET") 

1316 return make_array_decl(dim, dim_quals) 

1317 

1318 if self._peek_type() in _TYPE_QUALIFIER: 

1319 dim_quals = self._parse_type_qualifier_list() or [] 

1320 if self._accept("STATIC"): 

1321 dim_quals = dim_quals + ["static"] 

1322 dim = self._parse_assignment_expression() 

1323 self._expect("RBRACKET") 

1324 return make_array_decl(dim, dim_quals) 

1325 times_tok = self._accept("TIMES") 

1326 if times_tok: 

1327 self._expect("RBRACKET") 

1328 dim = c_ast.ID(times_tok.value, self._tok_coord(times_tok)) 

1329 return make_array_decl(dim, dim_quals) 

1330 dim = None 

1331 if self._starts_expression(): 

1332 dim = self._parse_assignment_expression() 

1333 self._expect("RBRACKET") 

1334 return make_array_decl(dim, dim_quals) 

1335 

1336 times_tok = self._accept("TIMES") 

1337 if times_tok: 

1338 self._expect("RBRACKET") 

1339 dim = c_ast.ID(times_tok.value, self._tok_coord(times_tok)) 

1340 return make_array_decl(dim, []) 

1341 

1342 dim = None 

1343 if self._starts_expression(): 

1344 dim = self._parse_assignment_expression() 

1345 self._expect("RBRACKET") 

1346 return make_array_decl(dim, []) 

1347 

1348 # BNF: function_decl : '(' parameter_type_list_opt | identifier_list_opt ')' 

1349 def _parse_function_decl(self, base_decl: c_ast.Node) -> c_ast.Node: 

1350 self._expect("LPAREN") 

1351 if self._accept("RPAREN"): 

1352 args = None 

1353 else: 

1354 args = ( 

1355 self._parse_parameter_type_list() 

1356 if self._starts_declaration() 

1357 else self._parse_identifier_list_opt() 

1358 ) 

1359 self._expect("RPAREN") 

1360 

1361 func = c_ast.FuncDecl(args=args, type=None, coord=base_decl.coord) 

1362 

1363 if self._peek_type() == "LBRACE": 

1364 if func.args is not None: 

1365 for param in func.args.params: 

1366 if isinstance(param, c_ast.EllipsisParam): 

1367 break 

1368 name = getattr(param, "name", None) 

1369 if name: 

1370 self._add_identifier(name, param.coord) 

1371 

1372 return func 

1373 

1374 # BNF: pointer : '*' type_qualifier_list? pointer? 

1375 def _parse_pointer(self) -> Optional[c_ast.Node]: 

1376 stars = [] 

1377 times_tok = self._accept("TIMES") 

1378 while times_tok: 

1379 quals = self._parse_type_qualifier_list() or [] 

1380 stars.append((quals, self._tok_coord(times_tok))) 

1381 times_tok = self._accept("TIMES") 

1382 

1383 if not stars: 

1384 return None 

1385 

1386 ptr = None 

1387 for quals, coord in stars: 

1388 ptr = c_ast.PtrDecl(quals=quals, type=ptr, coord=coord) 

1389 return ptr 

1390 

1391 # BNF: parameter_type_list : parameter_list (',' ELLIPSIS)? 

1392 def _parse_parameter_type_list(self) -> c_ast.ParamList: 

1393 params = self._parse_parameter_list() 

1394 if self._peek_type() == "COMMA" and self._peek_type(2) == "ELLIPSIS": 

1395 self._advance() 

1396 ell_tok = self._advance() 

1397 params.params.append(c_ast.EllipsisParam(self._tok_coord(ell_tok))) 

1398 return params 

1399 

1400 # BNF: parameter_list : parameter_declaration (',' parameter_declaration)* 

1401 def _parse_parameter_list(self) -> c_ast.ParamList: 

1402 first = self._parse_parameter_declaration() 

1403 params = c_ast.ParamList([first], first.coord) 

1404 while self._peek_type() == "COMMA" and self._peek_type(2) != "ELLIPSIS": 

1405 self._advance() 

1406 params.params.append(self._parse_parameter_declaration()) 

1407 return params 

1408 

1409 # BNF: parameter_declaration : declaration_specifiers declarator? 

1410 # | declaration_specifiers abstract_declarator_opt 

1411 def _parse_parameter_declaration(self) -> c_ast.Node: 

1412 spec, _, spec_coord = self._parse_declaration_specifiers(allow_no_type=True) 

1413 

1414 if not spec["type"]: 

1415 spec["type"] = [c_ast.IdentifierType(["int"], coord=spec_coord)] 

1416 

1417 if self._starts_declarator(): 

1418 decl, is_named = self._parse_any_declarator( 

1419 allow_abstract=True, typeid_paren_as_abstract=True 

1420 ) 

1421 if is_named: 

1422 return self._build_declarations( 

1423 spec=spec, decls=[dict(decl=decl, init=None, bitsize=None)] 

1424 )[0] 

1425 return self._build_parameter_declaration(spec, decl, spec_coord) 

1426 

1427 decl = self._parse_abstract_declarator_opt() 

1428 return self._build_parameter_declaration(spec, decl, spec_coord) 

1429 

1430 def _build_parameter_declaration( 

1431 self, spec: "_DeclSpec", decl: Optional[c_ast.Node], spec_coord: Optional[Coord] 

1432 ) -> c_ast.Node: 

1433 if ( 

1434 len(spec["type"]) > 1 

1435 and len(spec["type"][-1].names) == 1 

1436 and self._is_type_in_scope(spec["type"][-1].names[0]) 

1437 ): 

1438 return self._build_declarations( 

1439 spec=spec, decls=[dict(decl=decl, init=None, bitsize=None)] 

1440 )[0] 

1441 

1442 decl = c_ast.Typename( 

1443 name="", 

1444 quals=spec["qual"], 

1445 align=None, 

1446 type=decl or c_ast.TypeDecl(None, None, None, None), 

1447 coord=spec_coord, 

1448 ) 

1449 return self._fix_decl_name_type(decl, spec["type"]) 

1450 

1451 # BNF: identifier_list_opt : identifier_list | empty 

1452 def _parse_identifier_list_opt(self) -> Optional[c_ast.Node]: 

1453 if self._peek_type() == "RPAREN": 

1454 return None 

1455 return self._parse_identifier_list() 

1456 

1457 # BNF: identifier_list : identifier (',' identifier)* 

1458 def _parse_identifier_list(self) -> c_ast.Node: 

1459 first = self._parse_identifier() 

1460 params = c_ast.ParamList([first], first.coord) 

1461 while self._accept("COMMA"): 

1462 params.params.append(self._parse_identifier()) 

1463 return params 

1464 

1465 # ------------------------------------------------------------------ 

1466 # Abstract declarators 

1467 # ------------------------------------------------------------------ 

1468 # BNF: type_name : specifier_qualifier_list abstract_declarator_opt 

1469 def _parse_type_name(self) -> c_ast.Typename: 

1470 spec = self._parse_specifier_qualifier_list() 

1471 decl = self._parse_abstract_declarator_opt() 

1472 

1473 coord = None 

1474 if decl is not None: 

1475 coord = decl.coord 

1476 elif spec["type"]: 

1477 coord = spec["type"][0].coord 

1478 

1479 typename = c_ast.Typename( 

1480 name="", 

1481 quals=spec["qual"][:], 

1482 align=None, 

1483 type=decl or c_ast.TypeDecl(None, None, None, None), 

1484 coord=coord, 

1485 ) 

1486 return cast(c_ast.Typename, self._fix_decl_name_type(typename, spec["type"])) 

1487 

1488 # BNF: abstract_declarator_opt : pointer? direct_abstract_declarator? 

1489 def _parse_abstract_declarator_opt(self) -> Optional[c_ast.Node]: 

1490 if self._peek_type() == "TIMES": 

1491 ptr = self._parse_pointer() 

1492 if self._starts_direct_abstract_declarator(): 

1493 decl = self._parse_direct_abstract_declarator() 

1494 else: 

1495 decl = c_ast.TypeDecl(None, None, None, None) 

1496 assert ptr is not None 

1497 return self._type_modify_decl(decl, ptr) 

1498 

1499 if self._starts_direct_abstract_declarator(): 

1500 return self._parse_direct_abstract_declarator() 

1501 

1502 return None 

1503 

1504 # BNF: direct_abstract_declarator : '(' parameter_type_list_opt ')' 

1505 # | '(' abstract_declarator ')' 

1506 # | '[' ... ']' 

1507 def _parse_direct_abstract_declarator(self) -> c_ast.Node: 

1508 lparen_tok = self._accept("LPAREN") 

1509 if lparen_tok: 

1510 if self._starts_declaration() or self._peek_type() == "RPAREN": 

1511 params = self._parse_parameter_type_list_opt() 

1512 self._expect("RPAREN") 

1513 decl = c_ast.FuncDecl( 

1514 args=params, 

1515 type=c_ast.TypeDecl(None, None, None, None), 

1516 coord=self._tok_coord(lparen_tok), 

1517 ) 

1518 else: 

1519 decl = self._parse_abstract_declarator_opt() 

1520 self._expect("RPAREN") 

1521 assert decl is not None 

1522 elif self._peek_type() == "LBRACKET": 

1523 decl = self._parse_abstract_array_base() 

1524 else: 

1525 self._parse_error("Invalid abstract declarator", self.clex.filename) 

1526 

1527 return self._parse_decl_suffixes(decl) 

1528 

1529 # BNF: parameter_type_list_opt : parameter_type_list | empty 

1530 def _parse_parameter_type_list_opt(self) -> Optional[c_ast.ParamList]: 

1531 if self._peek_type() == "RPAREN": 

1532 return None 

1533 return self._parse_parameter_type_list() 

1534 

1535 # BNF: abstract_array_base : '[' array_specifiers? assignment_expression? ']' 

1536 def _parse_abstract_array_base(self) -> c_ast.Node: 

1537 return self._parse_array_decl_common( 

1538 base_type=c_ast.TypeDecl(None, None, None, None), coord=None 

1539 ) 

1540 

1541 # ------------------------------------------------------------------ 

1542 # Statements 

1543 # ------------------------------------------------------------------ 

1544 # BNF: statement : labeled_statement | compound_statement 

1545 # | selection_statement | iteration_statement 

1546 # | jump_statement | expression_statement 

1547 # | static_assert | pppragma_directive 

1548 def _parse_statement(self) -> c_ast.Node | List[c_ast.Node]: 

1549 tok_type = self._peek_type() 

1550 match tok_type: 

1551 case "CASE" | "DEFAULT": 

1552 return self._parse_labeled_statement() 

1553 case "ID" if self._peek_type(2) == "COLON": 

1554 return self._parse_labeled_statement() 

1555 case "LBRACE": 

1556 return self._parse_compound_statement() 

1557 case "IF" | "SWITCH": 

1558 return self._parse_selection_statement() 

1559 case "WHILE" | "DO" | "FOR": 

1560 return self._parse_iteration_statement() 

1561 case "GOTO" | "BREAK" | "CONTINUE" | "RETURN": 

1562 return self._parse_jump_statement() 

1563 case "PPPRAGMA" | "_PRAGMA": 

1564 return self._parse_pppragma_directive() 

1565 case "_STATIC_ASSERT": 

1566 return self._parse_static_assert() 

1567 case _: 

1568 return self._parse_expression_statement() 

1569 

1570 # BNF: pragmacomp_or_statement : pppragma_directive* statement 

1571 def _parse_pragmacomp_or_statement(self) -> c_ast.Node | List[c_ast.Node]: 

1572 if self._peek_type() in {"PPPRAGMA", "_PRAGMA"}: 

1573 pragmas = self._parse_pppragma_directive_list() 

1574 stmt = self._parse_statement() 

1575 return c_ast.Compound(block_items=pragmas + [stmt], coord=pragmas[0].coord) 

1576 return self._parse_statement() 

1577 

1578 # BNF: block_item : declaration | statement 

1579 def _parse_block_item(self) -> c_ast.Node | List[c_ast.Node]: 

1580 if self._starts_declaration(): 

1581 return self._parse_declaration() 

1582 return self._parse_statement() 

1583 

1584 # BNF: block_item_list : block_item+ 

1585 def _parse_block_item_list(self) -> List[c_ast.Node]: 

1586 items = [] 

1587 while self._peek_type() not in {"RBRACE", None}: 

1588 item = self._parse_block_item() 

1589 if isinstance(item, list): 

1590 if item == [None]: 

1591 continue 

1592 items.extend(item) 

1593 else: 

1594 items.append(item) 

1595 return items 

1596 

1597 # BNF: compound_statement : '{' block_item_list? '}' 

1598 def _parse_compound_statement(self) -> c_ast.Node: 

1599 lbrace_tok = self._expect("LBRACE") 

1600 if self._accept("RBRACE"): 

1601 return c_ast.Compound(block_items=None, coord=self._tok_coord(lbrace_tok)) 

1602 block_items = self._parse_block_item_list() 

1603 self._expect("RBRACE") 

1604 return c_ast.Compound( 

1605 block_items=block_items, coord=self._tok_coord(lbrace_tok) 

1606 ) 

1607 

1608 # BNF: labeled_statement : ID ':' statement 

1609 # | CASE constant_expression ':' statement 

1610 # | DEFAULT ':' statement 

1611 def _parse_labeled_statement(self) -> c_ast.Node: 

1612 tok_type = self._peek_type() 

1613 match tok_type: 

1614 case "ID": 

1615 name_tok = self._advance() 

1616 self._expect("COLON") 

1617 if self._starts_statement(): 

1618 stmt = self._parse_pragmacomp_or_statement() 

1619 else: 

1620 stmt = c_ast.EmptyStatement(self._tok_coord(name_tok)) 

1621 return c_ast.Label(name_tok.value, stmt, self._tok_coord(name_tok)) 

1622 case "CASE": 

1623 case_tok = self._advance() 

1624 expr = self._parse_constant_expression() 

1625 self._expect("COLON") 

1626 if self._starts_statement(): 

1627 stmt = self._parse_pragmacomp_or_statement() 

1628 else: 

1629 stmt = c_ast.EmptyStatement(self._tok_coord(case_tok)) 

1630 return c_ast.Case(expr, [stmt], self._tok_coord(case_tok)) 

1631 case "DEFAULT": 

1632 def_tok = self._advance() 

1633 self._expect("COLON") 

1634 if self._starts_statement(): 

1635 stmt = self._parse_pragmacomp_or_statement() 

1636 else: 

1637 stmt = c_ast.EmptyStatement(self._tok_coord(def_tok)) 

1638 return c_ast.Default([stmt], self._tok_coord(def_tok)) 

1639 case _: 

1640 self._parse_error("Invalid labeled statement", self.clex.filename) 

1641 

1642 # BNF: selection_statement : IF '(' expression ')' statement (ELSE statement)? 

1643 # | SWITCH '(' expression ')' statement 

1644 def _parse_selection_statement(self) -> c_ast.Node: 

1645 tok = self._advance() 

1646 match tok.type: 

1647 case "IF": 

1648 self._expect("LPAREN") 

1649 cond = self._parse_expression() 

1650 self._expect("RPAREN") 

1651 then_stmt = self._parse_pragmacomp_or_statement() 

1652 if self._accept("ELSE"): 

1653 else_stmt = self._parse_pragmacomp_or_statement() 

1654 return c_ast.If(cond, then_stmt, else_stmt, self._tok_coord(tok)) 

1655 return c_ast.If(cond, then_stmt, None, self._tok_coord(tok)) 

1656 case "SWITCH": 

1657 self._expect("LPAREN") 

1658 expr = self._parse_expression() 

1659 self._expect("RPAREN") 

1660 stmt = self._parse_pragmacomp_or_statement() 

1661 return fix_switch_cases(c_ast.Switch(expr, stmt, self._tok_coord(tok))) 

1662 case _: 

1663 self._parse_error("Invalid selection statement", self._tok_coord(tok)) 

1664 

1665 # BNF: iteration_statement : WHILE '(' expression ')' statement 

1666 # | DO statement WHILE '(' expression ')' ';' 

1667 # | FOR '(' (declaration | expression_opt) ';' 

1668 # expression_opt ';' expression_opt ')' statement 

1669 def _parse_iteration_statement(self) -> c_ast.Node: 

1670 tok = self._advance() 

1671 match tok.type: 

1672 case "WHILE": 

1673 self._expect("LPAREN") 

1674 cond = self._parse_expression() 

1675 self._expect("RPAREN") 

1676 stmt = self._parse_pragmacomp_or_statement() 

1677 return c_ast.While(cond, stmt, self._tok_coord(tok)) 

1678 case "DO": 

1679 stmt = self._parse_pragmacomp_or_statement() 

1680 self._expect("WHILE") 

1681 self._expect("LPAREN") 

1682 cond = self._parse_expression() 

1683 self._expect("RPAREN") 

1684 self._expect("SEMI") 

1685 return c_ast.DoWhile(cond, stmt, self._tok_coord(tok)) 

1686 case "FOR": 

1687 self._expect("LPAREN") 

1688 if self._starts_declaration(): 

1689 decls = self._parse_declaration() 

1690 init = c_ast.DeclList(decls, self._tok_coord(tok)) 

1691 cond = self._parse_expression_opt() 

1692 self._expect("SEMI") 

1693 next_expr = self._parse_expression_opt() 

1694 self._expect("RPAREN") 

1695 stmt = self._parse_pragmacomp_or_statement() 

1696 return c_ast.For(init, cond, next_expr, stmt, self._tok_coord(tok)) 

1697 

1698 init = self._parse_expression_opt() 

1699 self._expect("SEMI") 

1700 cond = self._parse_expression_opt() 

1701 self._expect("SEMI") 

1702 next_expr = self._parse_expression_opt() 

1703 self._expect("RPAREN") 

1704 stmt = self._parse_pragmacomp_or_statement() 

1705 return c_ast.For(init, cond, next_expr, stmt, self._tok_coord(tok)) 

1706 case _: 

1707 self._parse_error("Invalid iteration statement", self._tok_coord(tok)) 

1708 

1709 # BNF: jump_statement : GOTO ID ';' | BREAK ';' | CONTINUE ';' 

1710 # | RETURN expression? ';' 

1711 def _parse_jump_statement(self) -> c_ast.Node: 

1712 tok = self._advance() 

1713 match tok.type: 

1714 case "GOTO": 

1715 name_tok = self._expect("ID") 

1716 self._expect("SEMI") 

1717 return c_ast.Goto(name_tok.value, self._tok_coord(tok)) 

1718 case "BREAK": 

1719 self._expect("SEMI") 

1720 return c_ast.Break(self._tok_coord(tok)) 

1721 case "CONTINUE": 

1722 self._expect("SEMI") 

1723 return c_ast.Continue(self._tok_coord(tok)) 

1724 case "RETURN": 

1725 if self._accept("SEMI"): 

1726 return c_ast.Return(None, self._tok_coord(tok)) 

1727 expr = self._parse_expression() 

1728 self._expect("SEMI") 

1729 return c_ast.Return(expr, self._tok_coord(tok)) 

1730 case _: 

1731 self._parse_error("Invalid jump statement", self._tok_coord(tok)) 

1732 

1733 # BNF: expression_statement : expression_opt ';' 

1734 def _parse_expression_statement(self) -> c_ast.Node: 

1735 expr = self._parse_expression_opt() 

1736 semi_tok = self._expect("SEMI") 

1737 if expr is None: 

1738 return c_ast.EmptyStatement(self._tok_coord(semi_tok)) 

1739 return expr 

1740 

1741 # ------------------------------------------------------------------ 

1742 # Expressions 

1743 # ------------------------------------------------------------------ 

1744 # BNF: expression_opt : expression | empty 

1745 def _parse_expression_opt(self) -> Optional[c_ast.Node]: 

1746 if self._starts_expression(): 

1747 return self._parse_expression() 

1748 return None 

1749 

1750 # BNF: expression : assignment_expression (',' assignment_expression)* 

1751 def _parse_expression(self) -> c_ast.Node: 

1752 expr = self._parse_assignment_expression() 

1753 if not self._accept("COMMA"): 

1754 return expr 

1755 exprs = [expr, self._parse_assignment_expression()] 

1756 while self._accept("COMMA"): 

1757 exprs.append(self._parse_assignment_expression()) 

1758 return c_ast.ExprList(exprs, expr.coord) 

1759 

1760 # BNF: assignment_expression : conditional_expression 

1761 # | unary_expression assignment_op assignment_expression 

1762 def _parse_assignment_expression(self) -> c_ast.Node: 

1763 if self._peek_type() == "LPAREN" and self._peek_type(2) == "LBRACE": 

1764 self._advance() 

1765 comp = self._parse_compound_statement() 

1766 self._expect("RPAREN") 

1767 return comp 

1768 

1769 expr = self._parse_conditional_expression() 

1770 if self._is_assignment_op(): 

1771 op = self._advance().value 

1772 rhs = self._parse_assignment_expression() 

1773 return c_ast.Assignment(op, expr, rhs, expr.coord) 

1774 return expr 

1775 

1776 # BNF: conditional_expression : binary_expression 

1777 # | binary_expression '?' expression ':' conditional_expression 

1778 def _parse_conditional_expression(self) -> c_ast.Node: 

1779 expr = self._parse_binary_expression() 

1780 if self._accept("CONDOP"): 

1781 iftrue = self._parse_expression() 

1782 self._expect("COLON") 

1783 iffalse = self._parse_conditional_expression() 

1784 return c_ast.TernaryOp(expr, iftrue, iffalse, expr.coord) 

1785 return expr 

1786 

1787 # BNF: binary_expression : cast_expression (binary_op cast_expression)* 

1788 def _parse_binary_expression( 

1789 self, min_prec: int = 0, lhs: Optional[c_ast.Node] = None 

1790 ) -> c_ast.Node: 

1791 if lhs is None: 

1792 lhs = self._parse_cast_expression() 

1793 

1794 while True: 

1795 tok = self._peek() 

1796 if tok is None or tok.type not in _BINARY_PRECEDENCE: 

1797 break 

1798 prec = _BINARY_PRECEDENCE[tok.type] 

1799 if prec < min_prec: 

1800 break 

1801 

1802 op = tok.value 

1803 self._advance() 

1804 rhs = self._parse_cast_expression() 

1805 

1806 while True: 

1807 next_tok = self._peek() 

1808 if next_tok is None or next_tok.type not in _BINARY_PRECEDENCE: 

1809 break 

1810 next_prec = _BINARY_PRECEDENCE[next_tok.type] 

1811 if next_prec > prec: 

1812 rhs = self._parse_binary_expression(next_prec, rhs) 

1813 else: 

1814 break 

1815 

1816 lhs = c_ast.BinaryOp(op, lhs, rhs, lhs.coord) 

1817 

1818 return lhs 

1819 

1820 # BNF: cast_expression : '(' type_name ')' cast_expression 

1821 # | unary_expression 

1822 def _parse_cast_expression(self) -> c_ast.Node: 

1823 result = self._try_parse_paren_type_name() 

1824 if result is not None: 

1825 typ, mark, lparen_tok = result 

1826 if self._peek_type() == "LBRACE": 

1827 # (type){...} is a compound literal, not a cast. Examples: 

1828 # (int){1} -> compound literal, handled in postfix 

1829 # (int) x -> cast, handled below 

1830 self._reset(mark) 

1831 else: 

1832 expr = self._parse_cast_expression() 

1833 return c_ast.Cast(typ, expr, self._tok_coord(lparen_tok)) 

1834 return self._parse_unary_expression() 

1835 

1836 # BNF: unary_expression : postfix_expression 

1837 # | '++' unary_expression 

1838 # | '--' unary_expression 

1839 # | unary_op cast_expression 

1840 # | 'sizeof' unary_expression 

1841 # | 'sizeof' '(' type_name ')' 

1842 # | '_Alignof' '(' type_name ')' 

1843 def _parse_unary_expression(self) -> c_ast.Node: 

1844 tok_type = self._peek_type() 

1845 if tok_type in {"PLUSPLUS", "MINUSMINUS"}: 

1846 tok = self._advance() 

1847 expr = self._parse_unary_expression() 

1848 return c_ast.UnaryOp(tok.value, expr, expr.coord) 

1849 

1850 if tok_type in {"AND", "TIMES", "PLUS", "MINUS", "NOT", "LNOT"}: 

1851 tok = self._advance() 

1852 expr = self._parse_cast_expression() 

1853 return c_ast.UnaryOp(tok.value, expr, expr.coord) 

1854 

1855 if tok_type == "SIZEOF": 

1856 tok = self._advance() 

1857 result = self._try_parse_paren_type_name() 

1858 if result is not None: 

1859 typ, _, _ = result 

1860 return c_ast.UnaryOp(tok.value, typ, self._tok_coord(tok)) 

1861 expr = self._parse_unary_expression() 

1862 return c_ast.UnaryOp(tok.value, expr, self._tok_coord(tok)) 

1863 

1864 if tok_type == "_ALIGNOF": 

1865 tok = self._advance() 

1866 self._expect("LPAREN") 

1867 typ = self._parse_type_name() 

1868 self._expect("RPAREN") 

1869 return c_ast.UnaryOp(tok.value, typ, self._tok_coord(tok)) 

1870 

1871 return self._parse_postfix_expression() 

1872 

1873 # BNF: postfix_expression : primary_expression postfix_suffix* 

1874 # | '(' type_name ')' '{' initializer_list ','? '}' 

1875 def _parse_postfix_expression(self) -> c_ast.Node: 

1876 result = self._try_parse_paren_type_name() 

1877 if result is not None: 

1878 typ, mark, _ = result 

1879 # Disambiguate between casts and compound literals: 

1880 # (int) x -> cast 

1881 # (int) {1} -> compound literal 

1882 if self._accept("LBRACE"): 

1883 init = self._parse_initializer_list() 

1884 self._accept("COMMA") 

1885 self._expect("RBRACE") 

1886 return c_ast.CompoundLiteral(typ, init) 

1887 else: 

1888 self._reset(mark) 

1889 

1890 expr = self._parse_primary_expression() 

1891 while True: 

1892 if self._accept("LBRACKET"): 

1893 sub = self._parse_expression() 

1894 self._expect("RBRACKET") 

1895 expr = c_ast.ArrayRef(expr, sub, expr.coord) 

1896 continue 

1897 if self._accept("LPAREN"): 

1898 if self._peek_type() == "RPAREN": 

1899 self._advance() 

1900 args = None 

1901 else: 

1902 args = self._parse_argument_expression_list() 

1903 self._expect("RPAREN") 

1904 expr = c_ast.FuncCall(expr, args, expr.coord) 

1905 continue 

1906 if self._peek_type() in {"PERIOD", "ARROW"}: 

1907 op_tok = self._advance() 

1908 name_tok = self._advance() 

1909 if name_tok.type not in {"ID", "TYPEID"}: 

1910 self._parse_error( 

1911 "Invalid struct reference", self._tok_coord(name_tok) 

1912 ) 

1913 field = c_ast.ID(name_tok.value, self._tok_coord(name_tok)) 

1914 expr = c_ast.StructRef(expr, op_tok.value, field, expr.coord) 

1915 continue 

1916 if self._peek_type() in {"PLUSPLUS", "MINUSMINUS"}: 

1917 tok = self._advance() 

1918 expr = c_ast.UnaryOp("p" + tok.value, expr, expr.coord) 

1919 continue 

1920 break 

1921 return expr 

1922 

1923 # BNF: primary_expression : ID | constant | string_literal 

1924 # | '(' expression ')' | offsetof 

1925 def _parse_primary_expression(self) -> c_ast.Node: 

1926 tok_type = self._peek_type() 

1927 if tok_type == "ID": 

1928 return self._parse_identifier() 

1929 if ( 

1930 tok_type in _INT_CONST 

1931 or tok_type in _FLOAT_CONST 

1932 or tok_type in _CHAR_CONST 

1933 ): 

1934 return self._parse_constant() 

1935 if tok_type in _STRING_LITERAL: 

1936 return self._parse_unified_string_literal() 

1937 if tok_type in _WSTR_LITERAL: 

1938 return self._parse_unified_wstring_literal() 

1939 if tok_type == "LPAREN": 

1940 self._advance() 

1941 expr = self._parse_expression() 

1942 self._expect("RPAREN") 

1943 return expr 

1944 if tok_type == "OFFSETOF": 

1945 off_tok = self._advance() 

1946 self._expect("LPAREN") 

1947 typ = self._parse_type_name() 

1948 self._expect("COMMA") 

1949 designator = self._parse_offsetof_member_designator() 

1950 self._expect("RPAREN") 

1951 coord = self._tok_coord(off_tok) 

1952 return c_ast.FuncCall( 

1953 c_ast.ID(off_tok.value, coord), 

1954 c_ast.ExprList([typ, designator], coord), 

1955 coord, 

1956 ) 

1957 

1958 self._parse_error("Invalid expression", self.clex.filename) 

1959 

1960 # BNF: offsetof_member_designator : identifier_or_typeid 

1961 # ('.' identifier_or_typeid | '[' expression ']')* 

1962 def _parse_offsetof_member_designator(self) -> c_ast.Node: 

1963 node = self._parse_identifier_or_typeid() 

1964 while True: 

1965 if self._accept("PERIOD"): 

1966 field = self._parse_identifier_or_typeid() 

1967 node = c_ast.StructRef(node, ".", field, node.coord) 

1968 continue 

1969 if self._accept("LBRACKET"): 

1970 expr = self._parse_expression() 

1971 self._expect("RBRACKET") 

1972 node = c_ast.ArrayRef(node, expr, node.coord) 

1973 continue 

1974 break 

1975 return node 

1976 

1977 # BNF: argument_expression_list : assignment_expression (',' assignment_expression)* 

1978 def _parse_argument_expression_list(self) -> c_ast.Node: 

1979 expr = self._parse_assignment_expression() 

1980 exprs = [expr] 

1981 while self._accept("COMMA"): 

1982 exprs.append(self._parse_assignment_expression()) 

1983 return c_ast.ExprList(exprs, expr.coord) 

1984 

1985 # BNF: constant_expression : conditional_expression 

1986 def _parse_constant_expression(self) -> c_ast.Node: 

1987 return self._parse_conditional_expression() 

1988 

1989 # ------------------------------------------------------------------ 

1990 # Terminals 

1991 # ------------------------------------------------------------------ 

1992 # BNF: identifier : ID 

1993 def _parse_identifier(self) -> c_ast.Node: 

1994 tok = self._expect("ID") 

1995 return c_ast.ID(tok.value, self._tok_coord(tok)) 

1996 

1997 # BNF: identifier_or_typeid : ID | TYPEID 

1998 def _parse_identifier_or_typeid(self) -> c_ast.Node: 

1999 tok = self._advance() 

2000 if tok.type not in {"ID", "TYPEID"}: 

2001 self._parse_error("Expected identifier", self._tok_coord(tok)) 

2002 return c_ast.ID(tok.value, self._tok_coord(tok)) 

2003 

2004 # BNF: constant : INT_CONST | FLOAT_CONST | CHAR_CONST 

2005 def _parse_constant(self) -> c_ast.Node: 

2006 tok = self._advance() 

2007 if tok.type in _INT_CONST: 

2008 u_count = 0 

2009 l_count = 0 

2010 for ch in tok.value[-3:]: 

2011 if ch in ("l", "L"): 

2012 l_count += 1 

2013 elif ch in ("u", "U"): 

2014 u_count += 1 

2015 if u_count > 1: 

2016 raise ValueError("Constant cannot have more than one u/U suffix.") 

2017 if l_count > 2: 

2018 raise ValueError("Constant cannot have more than two l/L suffix.") 

2019 prefix = "unsigned " * u_count + "long " * l_count 

2020 return c_ast.Constant(prefix + "int", tok.value, self._tok_coord(tok)) 

2021 

2022 if tok.type in _FLOAT_CONST: 

2023 if tok.value[-1] in ("f", "F"): 

2024 t = "float" 

2025 elif tok.value[-1] in ("l", "L"): 

2026 t = "long double" 

2027 else: 

2028 t = "double" 

2029 return c_ast.Constant(t, tok.value, self._tok_coord(tok)) 

2030 

2031 if tok.type in _CHAR_CONST: 

2032 return c_ast.Constant("char", tok.value, self._tok_coord(tok)) 

2033 

2034 self._parse_error("Invalid constant", self._tok_coord(tok)) 

2035 

2036 # BNF: unified_string_literal : STRING_LITERAL+ 

2037 def _parse_unified_string_literal(self) -> c_ast.Node: 

2038 tok = self._expect("STRING_LITERAL") 

2039 node = c_ast.Constant("string", tok.value, self._tok_coord(tok)) 

2040 while self._peek_type() == "STRING_LITERAL": 

2041 tok2 = self._advance() 

2042 node.value = node.value[:-1] + tok2.value[1:] 

2043 return node 

2044 

2045 # BNF: unified_wstring_literal : WSTRING_LITERAL+ 

2046 def _parse_unified_wstring_literal(self) -> c_ast.Node: 

2047 tok = self._advance() 

2048 if tok.type not in _WSTR_LITERAL: 

2049 self._parse_error("Invalid string literal", self._tok_coord(tok)) 

2050 node = c_ast.Constant("string", tok.value, self._tok_coord(tok)) 

2051 while self._peek_type() in _WSTR_LITERAL: 

2052 tok2 = self._advance() 

2053 node.value = node.value.rstrip()[:-1] + tok2.value[2:] 

2054 return node 

2055 

2056 # ------------------------------------------------------------------ 

2057 # Initializers 

2058 # ------------------------------------------------------------------ 

2059 # BNF: initializer : assignment_expression 

2060 # | '{' initializer_list ','? '}' 

2061 # | '{' '}' 

2062 def _parse_initializer(self) -> c_ast.Node: 

2063 lbrace_tok = self._accept("LBRACE") 

2064 if lbrace_tok: 

2065 if self._accept("RBRACE"): 

2066 return c_ast.InitList([], self._tok_coord(lbrace_tok)) 

2067 init_list = self._parse_initializer_list() 

2068 self._accept("COMMA") 

2069 self._expect("RBRACE") 

2070 return init_list 

2071 

2072 return self._parse_assignment_expression() 

2073 

2074 # BNF: initializer_list : initializer_item (',' initializer_item)* ','? 

2075 def _parse_initializer_list(self) -> c_ast.Node: 

2076 items = [self._parse_initializer_item()] 

2077 while self._accept("COMMA"): 

2078 if self._peek_type() == "RBRACE": 

2079 break 

2080 items.append(self._parse_initializer_item()) 

2081 return c_ast.InitList(items, items[0].coord) 

2082 

2083 # BNF: initializer_item : designation? initializer 

2084 def _parse_initializer_item(self) -> c_ast.Node: 

2085 designation = None 

2086 if self._peek_type() in {"LBRACKET", "PERIOD"}: 

2087 designation = self._parse_designation() 

2088 init = self._parse_initializer() 

2089 if designation is not None: 

2090 return c_ast.NamedInitializer(designation, init) 

2091 return init 

2092 

2093 # BNF: designation : designator_list '=' 

2094 def _parse_designation(self) -> List[c_ast.Node]: 

2095 designators = self._parse_designator_list() 

2096 self._expect("EQUALS") 

2097 return designators 

2098 

2099 # BNF: designator_list : designator+ 

2100 def _parse_designator_list(self) -> List[c_ast.Node]: 

2101 designators = [] 

2102 while self._peek_type() in {"LBRACKET", "PERIOD"}: 

2103 designators.append(self._parse_designator()) 

2104 return designators 

2105 

2106 # BNF: designator : '[' constant_expression ']' 

2107 # | '.' identifier_or_typeid 

2108 def _parse_designator(self) -> c_ast.Node: 

2109 if self._accept("LBRACKET"): 

2110 expr = self._parse_constant_expression() 

2111 self._expect("RBRACKET") 

2112 return expr 

2113 if self._accept("PERIOD"): 

2114 return self._parse_identifier_or_typeid() 

2115 self._parse_error("Invalid designator", self.clex.filename) 

2116 

2117 # ------------------------------------------------------------------ 

2118 # Preprocessor-like directives 

2119 # ------------------------------------------------------------------ 

2120 # BNF: pp_directive : '#' ... (unsupported) 

2121 def _parse_pp_directive(self) -> NoReturn: 

2122 tok = self._expect("PPHASH") 

2123 self._parse_error("Directives not supported yet", self._tok_coord(tok)) 

2124 

2125 # BNF: pppragma_directive : PPPRAGMA PPPRAGMASTR? 

2126 # | _PRAGMA '(' string_literal ')' 

2127 def _parse_pppragma_directive(self) -> c_ast.Node: 

2128 if self._peek_type() == "PPPRAGMA": 

2129 tok = self._advance() 

2130 if self._peek_type() == "PPPRAGMASTR": 

2131 str_tok = self._advance() 

2132 return c_ast.Pragma(str_tok.value, self._tok_coord(str_tok)) 

2133 return c_ast.Pragma("", self._tok_coord(tok)) 

2134 

2135 if self._peek_type() == "_PRAGMA": 

2136 tok = self._advance() 

2137 lparen = self._expect("LPAREN") 

2138 literal = self._parse_unified_string_literal() 

2139 self._expect("RPAREN") 

2140 return c_ast.Pragma(literal, self._tok_coord(lparen)) 

2141 

2142 self._parse_error("Invalid pragma", self.clex.filename) 

2143 

2144 # BNF: pppragma_directive_list : pppragma_directive+ 

2145 def _parse_pppragma_directive_list(self) -> List[c_ast.Node]: 

2146 pragmas = [] 

2147 while self._peek_type() in {"PPPRAGMA", "_PRAGMA"}: 

2148 pragmas.append(self._parse_pppragma_directive()) 

2149 return pragmas 

2150 

2151 # BNF: static_assert : _STATIC_ASSERT '(' constant_expression (',' string_literal)? ')' 

2152 def _parse_static_assert(self) -> List[c_ast.Node]: 

2153 tok = self._expect("_STATIC_ASSERT") 

2154 self._expect("LPAREN") 

2155 cond = self._parse_constant_expression() 

2156 msg = None 

2157 if self._accept("COMMA"): 

2158 msg = self._parse_unified_string_literal() 

2159 self._expect("RPAREN") 

2160 return [c_ast.StaticAssert(cond, msg, self._tok_coord(tok))] 

2161 

2162 

2163_ASSIGNMENT_OPS = { 

2164 "EQUALS", 

2165 "XOREQUAL", 

2166 "TIMESEQUAL", 

2167 "DIVEQUAL", 

2168 "MODEQUAL", 

2169 "PLUSEQUAL", 

2170 "MINUSEQUAL", 

2171 "LSHIFTEQUAL", 

2172 "RSHIFTEQUAL", 

2173 "ANDEQUAL", 

2174 "OREQUAL", 

2175} 

2176 

2177# Precedence of operators (lower number = weather binding) 

2178# If this changes, c_generator.CGenerator.precedence_map needs to change as 

2179# well 

2180_BINARY_PRECEDENCE = { 

2181 "LOR": 0, 

2182 "LAND": 1, 

2183 "OR": 2, 

2184 "XOR": 3, 

2185 "AND": 4, 

2186 "EQ": 5, 

2187 "NE": 5, 

2188 "GT": 6, 

2189 "GE": 6, 

2190 "LT": 6, 

2191 "LE": 6, 

2192 "RSHIFT": 7, 

2193 "LSHIFT": 7, 

2194 "PLUS": 8, 

2195 "MINUS": 8, 

2196 "TIMES": 9, 

2197 "DIVIDE": 9, 

2198 "MOD": 9, 

2199} 

2200 

2201_STORAGE_CLASS = {"AUTO", "REGISTER", "STATIC", "EXTERN", "TYPEDEF", "_THREAD_LOCAL"} 

2202 

2203_FUNCTION_SPEC = {"INLINE", "_NORETURN"} 

2204 

2205_TYPE_QUALIFIER = {"CONST", "RESTRICT", "VOLATILE", "_ATOMIC"} 

2206 

2207_TYPE_SPEC_SIMPLE = { 

2208 "VOID", 

2209 "_BOOL", 

2210 "CHAR", 

2211 "SHORT", 

2212 "INT", 

2213 "LONG", 

2214 "FLOAT", 

2215 "DOUBLE", 

2216 "_COMPLEX", 

2217 "SIGNED", 

2218 "UNSIGNED", 

2219 "__INT128", 

2220} 

2221 

2222_DECL_START = ( 

2223 _STORAGE_CLASS 

2224 | _FUNCTION_SPEC 

2225 | _TYPE_QUALIFIER 

2226 | _TYPE_SPEC_SIMPLE 

2227 | {"TYPEID", "STRUCT", "UNION", "ENUM", "_ALIGNAS", "_ATOMIC"} 

2228) 

2229 

2230_EXPR_START = { 

2231 "ID", 

2232 "LPAREN", 

2233 "PLUSPLUS", 

2234 "MINUSMINUS", 

2235 "PLUS", 

2236 "MINUS", 

2237 "TIMES", 

2238 "AND", 

2239 "NOT", 

2240 "LNOT", 

2241 "SIZEOF", 

2242 "_ALIGNOF", 

2243 "OFFSETOF", 

2244} 

2245 

2246_INT_CONST = { 

2247 "INT_CONST_DEC", 

2248 "INT_CONST_OCT", 

2249 "INT_CONST_HEX", 

2250 "INT_CONST_BIN", 

2251 "INT_CONST_CHAR", 

2252} 

2253 

2254_FLOAT_CONST = {"FLOAT_CONST", "HEX_FLOAT_CONST"} 

2255 

2256_CHAR_CONST = { 

2257 "CHAR_CONST", 

2258 "WCHAR_CONST", 

2259 "U8CHAR_CONST", 

2260 "U16CHAR_CONST", 

2261 "U32CHAR_CONST", 

2262} 

2263 

2264_STRING_LITERAL = {"STRING_LITERAL"} 

2265 

2266_WSTR_LITERAL = { 

2267 "WSTRING_LITERAL", 

2268 "U8STRING_LITERAL", 

2269 "U16STRING_LITERAL", 

2270 "U32STRING_LITERAL", 

2271} 

2272 

2273_STARTS_EXPRESSION = ( 

2274 _EXPR_START 

2275 | _INT_CONST 

2276 | _FLOAT_CONST 

2277 | _CHAR_CONST 

2278 | _STRING_LITERAL 

2279 | _WSTR_LITERAL 

2280) 

2281 

2282_STARTS_STATEMENT = { 

2283 "LBRACE", 

2284 "IF", 

2285 "SWITCH", 

2286 "WHILE", 

2287 "DO", 

2288 "FOR", 

2289 "GOTO", 

2290 "BREAK", 

2291 "CONTINUE", 

2292 "RETURN", 

2293 "CASE", 

2294 "DEFAULT", 

2295 "PPPRAGMA", 

2296 "_PRAGMA", 

2297 "_STATIC_ASSERT", 

2298 "SEMI", 

2299} 

2300 

2301 

2302class _TokenStream: 

2303 """Wraps a lexer to provide convenient, buffered access to the underlying 

2304 token stream. The lexer is expected to be initialized with the input 

2305 string already. 

2306 """ 

2307 

2308 def __init__(self, lexer: CLexer) -> None: 

2309 self._lexer = lexer 

2310 self._buffer: List[Optional[Token]] = [] 

2311 self._index = 0 

2312 

2313 def peek(self, k: int = 1) -> Optional[Token]: 

2314 """Peek at the k-th next token in the stream, without consuming it. 

2315 

2316 Examples: 

2317 k=1 returns the immediate next token. 

2318 k=2 returns the token after that. 

2319 """ 

2320 if k <= 0: 

2321 return None 

2322 self._fill(k) 

2323 return self._buffer[self._index + k - 1] 

2324 

2325 def next(self) -> Optional[Token]: 

2326 """Consume a single token and return it.""" 

2327 self._fill(1) 

2328 tok = self._buffer[self._index] 

2329 self._index += 1 

2330 return tok 

2331 

2332 # The 'mark' and 'reset' methods are useful for speculative parsing with 

2333 # backtracking; when the parser needs to examine a sequence of tokens 

2334 # and potentially decide to try a different path on the same sequence, it 

2335 # can call 'mark' to obtain the current token position, and if the first 

2336 # path fails restore the position with `reset(pos)`. 

2337 def mark(self) -> int: 

2338 return self._index 

2339 

2340 def reset(self, mark: int) -> None: 

2341 self._index = mark 

2342 

2343 def _fill(self, n: int) -> None: 

2344 while len(self._buffer) < self._index + n: 

2345 tok = self._lexer.token() 

2346 self._buffer.append(tok) 

2347 if tok is None: 

2348 break 

2349 

2350 

2351# Declaration specifiers are represented by a dictionary with entries: 

2352# - qual: a list of type qualifiers 

2353# - storage: a list of storage class specifiers 

2354# - type: a list of type specifiers 

2355# - function: a list of function specifiers 

2356# - alignment: a list of alignment specifiers 

2357class _DeclSpec(TypedDict): 

2358 qual: List[Any] 

2359 storage: List[Any] 

2360 type: List[Any] 

2361 function: List[Any] 

2362 alignment: List[Any] 

2363 

2364 

2365_DeclSpecKind = Literal["qual", "storage", "type", "function", "alignment"] 

2366 

2367 

2368class _DeclInfo(TypedDict): 

2369 # Declarator payloads used by declaration/initializer parsing: 

2370 # - decl: the declarator node (may be None for abstract/implicit cases) 

2371 # - init: optional initializer expression 

2372 # - bitsize: optional bit-field width expression (for struct declarators) 

2373 decl: Optional[c_ast.Node] 

2374 init: Optional[c_ast.Node] 

2375 bitsize: Optional[c_ast.Node]