Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pycparser/c_parser.py: 55%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1311 statements  

1# ------------------------------------------------------------------------------ 

2# pycparser: c_parser.py 

3# 

4# Recursive-descent parser for the C language. 

5# 

6# Eli Bendersky [https://eli.thegreenplace.net/] 

7# License: BSD 

8# ------------------------------------------------------------------------------ 

9from dataclasses import dataclass 

10from typing import ( 

11 Any, 

12 Dict, 

13 List, 

14 Literal, 

15 NoReturn, 

16 Optional, 

17 Tuple, 

18 TypedDict, 

19 cast, 

20) 

21 

22from . import c_ast 

23from .c_lexer import CLexer, _Token 

24from .ast_transforms import fix_switch_cases, fix_atomic_specifiers 

25 

26 

27@dataclass 

28class Coord: 

29 """Coordinates of a syntactic element. Consists of: 

30 - File name 

31 - Line number 

32 - Column number 

33 """ 

34 

35 file: str 

36 line: int 

37 column: Optional[int] = None 

38 

39 def __str__(self) -> str: 

40 text = f"{self.file}:{self.line}" 

41 if self.column: 

42 text += f":{self.column}" 

43 return text 

44 

45 

46class ParseError(Exception): 

47 pass 

48 

49 

50class CParser: 

51 """Recursive-descent C parser. 

52 

53 Usage: 

54 parser = CParser() 

55 ast = parser.parse(text, filename) 

56 

57 The `lexer` parameter lets you inject a lexer class (defaults to CLexer). 

58 The parameters after `lexer` are accepted for backward compatibility with 

59 the old PLY-based parser and are otherwise unused. 

60 """ 

61 

62 def __init__( 

63 self, 

64 lex_optimize: bool = True, 

65 lexer: type[CLexer] = CLexer, 

66 lextab: str = "pycparser.lextab", 

67 yacc_optimize: bool = True, 

68 yacctab: str = "pycparser.yacctab", 

69 yacc_debug: bool = False, 

70 taboutputdir: str = "", 

71 ) -> None: 

72 self.clex: CLexer = lexer( 

73 error_func=self._lex_error_func, 

74 on_lbrace_func=self._lex_on_lbrace_func, 

75 on_rbrace_func=self._lex_on_rbrace_func, 

76 type_lookup_func=self._lex_type_lookup_func, 

77 ) 

78 

79 # Stack of scopes for keeping track of symbols. _scope_stack[-1] is 

80 # the current (topmost) scope. Each scope is a dictionary that 

81 # specifies whether a name is a type. If _scope_stack[n][name] is 

82 # True, 'name' is currently a type in the scope. If it's False, 

83 # 'name' is used in the scope but not as a type (for instance, if we 

84 # saw: int name; 

85 # If 'name' is not a key in _scope_stack[n] then 'name' was not defined 

86 # in this scope at all. 

87 self._scope_stack: List[Dict[str, bool]] = [dict()] 

88 self._tokens: _TokenStream = _TokenStream(self.clex) 

89 

90 def parse( 

91 self, text: str, filename: str = "", debug: bool = False 

92 ) -> c_ast.FileAST: 

93 """Parses C code and returns an AST. 

94 

95 text: 

96 A string containing the C source code 

97 

98 filename: 

99 Name of the file being parsed (for meaningful 

100 error messages) 

101 

102 debug: 

103 Deprecated debug flag (unused); for backwards compatibility. 

104 """ 

105 self._scope_stack = [dict()] 

106 self.clex.input(text, filename) 

107 self._tokens = _TokenStream(self.clex) 

108 

109 ast = self._parse_translation_unit_or_empty() 

110 tok = self._peek() 

111 if tok is not None: 

112 self._parse_error(f"before: {tok.value}", self._tok_coord(tok)) 

113 return ast 

114 

115 # ------------------------------------------------------------------ 

116 # Scope and declaration helpers 

117 # ------------------------------------------------------------------ 

118 def _coord(self, lineno: int, column: Optional[int] = None) -> Coord: 

119 return Coord(file=self.clex.filename, line=lineno, column=column) 

120 

121 def _parse_error(self, msg: str, coord: Coord | str | None) -> NoReturn: 

122 raise ParseError(f"{coord}: {msg}") 

123 

124 def _push_scope(self) -> None: 

125 self._scope_stack.append(dict()) 

126 

127 def _pop_scope(self) -> None: 

128 assert len(self._scope_stack) > 1 

129 self._scope_stack.pop() 

130 

131 def _add_typedef_name(self, name: str, coord: Optional[Coord]) -> None: 

132 """Add a new typedef name (ie a TYPEID) to the current scope""" 

133 if not self._scope_stack[-1].get(name, True): 

134 self._parse_error( 

135 f"Typedef {name!r} previously declared as non-typedef in this scope", 

136 coord, 

137 ) 

138 self._scope_stack[-1][name] = True 

139 

140 def _add_identifier(self, name: str, coord: Optional[Coord]) -> None: 

141 """Add a new object, function, or enum member name (ie an ID) to the 

142 current scope 

143 """ 

144 if self._scope_stack[-1].get(name, False): 

145 self._parse_error( 

146 f"Non-typedef {name!r} previously declared as typedef in this scope", 

147 coord, 

148 ) 

149 self._scope_stack[-1][name] = False 

150 

151 def _is_type_in_scope(self, name: str) -> bool: 

152 """Is *name* a typedef-name in the current scope?""" 

153 for scope in reversed(self._scope_stack): 

154 # If name is an identifier in this scope it shadows typedefs in 

155 # higher scopes. 

156 in_scope = scope.get(name) 

157 if in_scope is not None: 

158 return in_scope 

159 return False 

160 

161 def _lex_error_func(self, msg: str, line: int, column: int) -> None: 

162 self._parse_error(msg, self._coord(line, column)) 

163 

164 def _lex_on_lbrace_func(self) -> None: 

165 self._push_scope() 

166 

167 def _lex_on_rbrace_func(self) -> None: 

168 self._pop_scope() 

169 

170 def _lex_type_lookup_func(self, name: str) -> bool: 

171 """Looks up types that were previously defined with 

172 typedef. 

173 Passed to the lexer for recognizing identifiers that 

174 are types. 

175 """ 

176 return self._is_type_in_scope(name) 

177 

178 # To understand what's going on here, read sections A.8.5 and 

179 # A.8.6 of K&R2 very carefully. 

180 # 

181 # A C type consists of a basic type declaration, with a list 

182 # of modifiers. For example: 

183 # 

184 # int *c[5]; 

185 # 

186 # The basic declaration here is 'int c', and the pointer and 

187 # the array are the modifiers. 

188 # 

189 # Basic declarations are represented by TypeDecl (from module c_ast) and the 

190 # modifiers are FuncDecl, PtrDecl and ArrayDecl. 

191 # 

192 # The standard states that whenever a new modifier is parsed, it should be 

193 # added to the end of the list of modifiers. For example: 

194 # 

195 # K&R2 A.8.6.2: Array Declarators 

196 # 

197 # In a declaration T D where D has the form 

198 # D1 [constant-expression-opt] 

199 # and the type of the identifier in the declaration T D1 is 

200 # "type-modifier T", the type of the 

201 # identifier of D is "type-modifier array of T" 

202 # 

203 # This is what this method does. The declarator it receives 

204 # can be a list of declarators ending with TypeDecl. It 

205 # tacks the modifier to the end of this list, just before 

206 # the TypeDecl. 

207 # 

208 # Additionally, the modifier may be a list itself. This is 

209 # useful for pointers, that can come as a chain from the rule 

210 # p_pointer. In this case, the whole modifier list is spliced 

211 # into the new location. 

212 def _type_modify_decl(self, decl: Any, modifier: Any) -> c_ast.Node: 

213 """Tacks a type modifier on a declarator, and returns 

214 the modified declarator. 

215 

216 Note: the declarator and modifier may be modified 

217 """ 

218 modifier_head = modifier 

219 modifier_tail = modifier 

220 

221 # The modifier may be a nested list. Reach its tail. 

222 while modifier_tail.type: 

223 modifier_tail = modifier_tail.type 

224 

225 # If the decl is a basic type, just tack the modifier onto it. 

226 if isinstance(decl, c_ast.TypeDecl): 

227 modifier_tail.type = decl 

228 return modifier 

229 else: 

230 # Otherwise, the decl is a list of modifiers. Reach 

231 # its tail and splice the modifier onto the tail, 

232 # pointing to the underlying basic type. 

233 decl_tail = decl 

234 while not isinstance(decl_tail.type, c_ast.TypeDecl): 

235 decl_tail = decl_tail.type 

236 

237 modifier_tail.type = decl_tail.type 

238 decl_tail.type = modifier_head 

239 return decl 

240 

241 # Due to the order in which declarators are constructed, 

242 # they have to be fixed in order to look like a normal AST. 

243 # 

244 # When a declaration arrives from syntax construction, it has 

245 # these problems: 

246 # * The innermost TypeDecl has no type (because the basic 

247 # type is only known at the uppermost declaration level) 

248 # * The declaration has no variable name, since that is saved 

249 # in the innermost TypeDecl 

250 # * The typename of the declaration is a list of type 

251 # specifiers, and not a node. Here, basic identifier types 

252 # should be separated from more complex types like enums 

253 # and structs. 

254 # 

255 # This method fixes these problems. 

256 def _fix_decl_name_type( 

257 self, 

258 decl: c_ast.Decl | c_ast.Typedef | c_ast.Typename, 

259 typename: List[Any], 

260 ) -> c_ast.Decl | c_ast.Typedef | c_ast.Typename: 

261 """Fixes a declaration. Modifies decl.""" 

262 # Reach the underlying basic type 

263 typ = decl 

264 while not isinstance(typ, c_ast.TypeDecl): 

265 typ = typ.type 

266 

267 decl.name = typ.declname 

268 typ.quals = decl.quals[:] 

269 

270 # The typename is a list of types. If any type in this 

271 # list isn't an IdentifierType, it must be the only 

272 # type in the list (it's illegal to declare "int enum ..") 

273 # If all the types are basic, they're collected in the 

274 # IdentifierType holder. 

275 for tn in typename: 

276 if not isinstance(tn, c_ast.IdentifierType): 

277 if len(typename) > 1: 

278 self._parse_error("Invalid multiple types specified", tn.coord) 

279 else: 

280 typ.type = tn 

281 return decl 

282 

283 if not typename: 

284 # Functions default to returning int 

285 if not isinstance(decl.type, c_ast.FuncDecl): 

286 self._parse_error("Missing type in declaration", decl.coord) 

287 typ.type = c_ast.IdentifierType(["int"], coord=decl.coord) 

288 else: 

289 # At this point, we know that typename is a list of IdentifierType 

290 # nodes. Concatenate all the names into a single list. 

291 typ.type = c_ast.IdentifierType( 

292 [name for id in typename for name in id.names], coord=typename[0].coord 

293 ) 

294 return decl 

295 

296 def _add_declaration_specifier( 

297 self, 

298 declspec: Optional["_DeclSpec"], 

299 newspec: Any, 

300 kind: "_DeclSpecKind", 

301 append: bool = False, 

302 ) -> "_DeclSpec": 

303 """See _DeclSpec for the specifier dictionary layout.""" 

304 if declspec is None: 

305 spec: _DeclSpec = dict( 

306 qual=[], storage=[], type=[], function=[], alignment=[] 

307 ) 

308 else: 

309 spec = declspec 

310 

311 if append: 

312 spec[kind].append(newspec) 

313 else: 

314 spec[kind].insert(0, newspec) 

315 

316 return spec 

317 

318 def _build_declarations( 

319 self, 

320 spec: "_DeclSpec", 

321 decls: List["_DeclInfo"], 

322 typedef_namespace: bool = False, 

323 ) -> List[c_ast.Node]: 

324 """Builds a list of declarations all sharing the given specifiers. 

325 If typedef_namespace is true, each declared name is added 

326 to the "typedef namespace", which also includes objects, 

327 functions, and enum constants. 

328 """ 

329 is_typedef = "typedef" in spec["storage"] 

330 declarations = [] 

331 

332 # Bit-fields are allowed to be unnamed. 

333 if decls[0].get("bitsize") is None: 

334 # When redeclaring typedef names as identifiers in inner scopes, a 

335 # problem can occur where the identifier gets grouped into 

336 # spec['type'], leaving decl as None. This can only occur for the 

337 # first declarator. 

338 if decls[0]["decl"] is None: 

339 if ( 

340 len(spec["type"]) < 2 

341 or len(spec["type"][-1].names) != 1 

342 or not self._is_type_in_scope(spec["type"][-1].names[0]) 

343 ): 

344 coord = "?" 

345 for t in spec["type"]: 

346 if hasattr(t, "coord"): 

347 coord = t.coord 

348 break 

349 self._parse_error("Invalid declaration", coord) 

350 

351 # Make this look as if it came from "direct_declarator:ID" 

352 decls[0]["decl"] = c_ast.TypeDecl( 

353 declname=spec["type"][-1].names[0], 

354 type=None, 

355 quals=None, 

356 align=spec["alignment"], 

357 coord=spec["type"][-1].coord, 

358 ) 

359 # Remove the "new" type's name from the end of spec['type'] 

360 del spec["type"][-1] 

361 # A similar problem can occur where the declaration ends up 

362 # looking like an abstract declarator. Give it a name if this is 

363 # the case. 

364 elif not isinstance( 

365 decls[0]["decl"], 

366 (c_ast.Enum, c_ast.Struct, c_ast.Union, c_ast.IdentifierType), 

367 ): 

368 decls_0_tail = cast(Any, decls[0]["decl"]) 

369 while not isinstance(decls_0_tail, c_ast.TypeDecl): 

370 decls_0_tail = decls_0_tail.type 

371 if decls_0_tail.declname is None: 

372 decls_0_tail.declname = spec["type"][-1].names[0] 

373 del spec["type"][-1] 

374 

375 for decl in decls: 

376 assert decl["decl"] is not None 

377 if is_typedef: 

378 declaration = c_ast.Typedef( 

379 name=None, 

380 quals=spec["qual"], 

381 storage=spec["storage"], 

382 type=decl["decl"], 

383 coord=decl["decl"].coord, 

384 ) 

385 else: 

386 declaration = c_ast.Decl( 

387 name=None, 

388 quals=spec["qual"], 

389 align=spec["alignment"], 

390 storage=spec["storage"], 

391 funcspec=spec["function"], 

392 type=decl["decl"], 

393 init=decl.get("init"), 

394 bitsize=decl.get("bitsize"), 

395 coord=decl["decl"].coord, 

396 ) 

397 

398 if isinstance( 

399 declaration.type, 

400 (c_ast.Enum, c_ast.Struct, c_ast.Union, c_ast.IdentifierType), 

401 ): 

402 fixed_decl = declaration 

403 else: 

404 fixed_decl = self._fix_decl_name_type(declaration, spec["type"]) 

405 

406 # Add the type name defined by typedef to a 

407 # symbol table (for usage in the lexer) 

408 if typedef_namespace: 

409 if is_typedef: 

410 self._add_typedef_name(fixed_decl.name, fixed_decl.coord) 

411 else: 

412 self._add_identifier(fixed_decl.name, fixed_decl.coord) 

413 

414 fixed_decl = fix_atomic_specifiers( 

415 cast(c_ast.Decl | c_ast.Typedef, fixed_decl) 

416 ) 

417 declarations.append(fixed_decl) 

418 

419 return declarations 

420 

421 def _build_function_definition( 

422 self, 

423 spec: "_DeclSpec", 

424 decl: c_ast.Node, 

425 param_decls: Optional[List[c_ast.Node]], 

426 body: c_ast.Node, 

427 ) -> c_ast.Node: 

428 """Builds a function definition.""" 

429 if "typedef" in spec["storage"]: 

430 self._parse_error("Invalid typedef", decl.coord) 

431 

432 declaration = self._build_declarations( 

433 spec=spec, 

434 decls=[dict(decl=decl, init=None, bitsize=None)], 

435 typedef_namespace=True, 

436 )[0] 

437 

438 return c_ast.FuncDef( 

439 decl=declaration, param_decls=param_decls, body=body, coord=decl.coord 

440 ) 

441 

442 def _select_struct_union_class(self, token: str) -> type: 

443 """Given a token (either STRUCT or UNION), selects the 

444 appropriate AST class. 

445 """ 

446 if token == "struct": 

447 return c_ast.Struct 

448 else: 

449 return c_ast.Union 

450 

451 # ------------------------------------------------------------------ 

452 # Token helpers 

453 # ------------------------------------------------------------------ 

454 def _peek(self, k: int = 1) -> Optional[_Token]: 

455 """Return the k-th next token without consuming it (1-based).""" 

456 return self._tokens.peek(k) 

457 

458 def _peek_type(self, k: int = 1) -> Optional[str]: 

459 """Return the type of the k-th next token, or None if absent (1-based).""" 

460 tok = self._peek(k) 

461 return tok.type if tok is not None else None 

462 

463 def _advance(self) -> _Token: 

464 tok = self._tokens.next() 

465 if tok is None: 

466 self._parse_error("At end of input", self.clex.filename) 

467 else: 

468 return tok 

469 

470 def _accept(self, token_type: str) -> Optional[_Token]: 

471 """Conditionally consume next token, only if it's of token_type. 

472 

473 If it is of the expected type, consume and return it. 

474 Otherwise, leaves the token intact and returns None. 

475 """ 

476 tok = self._peek() 

477 if tok is not None and tok.type == token_type: 

478 return self._advance() 

479 return None 

480 

481 def _expect(self, token_type: str) -> _Token: 

482 tok = self._advance() 

483 if tok.type != token_type: 

484 self._parse_error(f"before: {tok.value}", self._tok_coord(tok)) 

485 return tok 

486 

487 def _mark(self) -> int: 

488 return self._tokens.mark() 

489 

490 def _reset(self, mark: int) -> None: 

491 self._tokens.reset(mark) 

492 

493 def _tok_coord(self, tok: _Token) -> Coord: 

494 return self._coord(tok.lineno, tok.column) 

495 

496 def _starts_declaration(self, tok: Optional[_Token] = None) -> bool: 

497 tok = tok or self._peek() 

498 if tok is None: 

499 return False 

500 return tok.type in _DECL_START 

501 

502 def _starts_expression(self, tok: Optional[_Token] = None) -> bool: 

503 tok = tok or self._peek() 

504 if tok is None: 

505 return False 

506 return tok.type in _STARTS_EXPRESSION 

507 

508 def _starts_statement(self) -> bool: 

509 tok_type = self._peek_type() 

510 if tok_type is None: 

511 return False 

512 if tok_type in _STARTS_STATEMENT: 

513 return True 

514 return self._starts_expression() 

515 

516 def _starts_declarator(self, id_only: bool = False) -> bool: 

517 tok_type = self._peek_type() 

518 if tok_type is None: 

519 return False 

520 if tok_type in {"TIMES", "LPAREN"}: 

521 return True 

522 if id_only: 

523 return tok_type == "ID" 

524 return tok_type in {"ID", "TYPEID"} 

525 

526 def _peek_declarator_name_info(self) -> Tuple[Optional[str], bool]: 

527 mark = self._mark() 

528 tok_type, saw_paren = self._scan_declarator_name_info() 

529 self._reset(mark) 

530 return tok_type, saw_paren 

531 

532 def _parse_any_declarator( 

533 self, allow_abstract: bool = False, typeid_paren_as_abstract: bool = False 

534 ) -> Tuple[Optional[c_ast.Node], bool]: 

535 # C declarators are ambiguous without lookahead. For example: 

536 # int foo(int (aa)); -> aa is a name (ID) 

537 # typedef char TT; 

538 # int bar(int (TT)); -> TT is a type (TYPEID) in parens 

539 name_type, saw_paren = self._peek_declarator_name_info() 

540 if name_type is None or ( 

541 typeid_paren_as_abstract and name_type == "TYPEID" and saw_paren 

542 ): 

543 if not allow_abstract: 

544 tok = self._peek() 

545 coord = self._tok_coord(tok) if tok is not None else self.clex.filename 

546 self._parse_error("Invalid declarator", coord) 

547 decl = self._parse_abstract_declarator_opt() 

548 return decl, False 

549 

550 if name_type == "TYPEID": 

551 if typeid_paren_as_abstract: 

552 decl = self._parse_typeid_noparen_declarator() 

553 else: 

554 decl = self._parse_typeid_declarator() 

555 else: 

556 decl = self._parse_id_declarator() 

557 return decl, True 

558 

559 def _scan_declarator_name_info(self) -> Tuple[Optional[str], bool]: 

560 saw_paren = False 

561 while self._accept("TIMES"): 

562 while self._peek_type() in _TYPE_QUALIFIER: 

563 self._advance() 

564 

565 tok = self._peek() 

566 if tok is None: 

567 return None, saw_paren 

568 if tok.type in {"ID", "TYPEID"}: 

569 self._advance() 

570 return tok.type, saw_paren 

571 if tok.type == "LPAREN": 

572 saw_paren = True 

573 self._advance() 

574 tok_type, nested_paren = self._scan_declarator_name_info() 

575 if nested_paren: 

576 saw_paren = True 

577 depth = 1 

578 while True: 

579 tok = self._peek() 

580 if tok is None: 

581 return None, saw_paren 

582 if tok.type == "LPAREN": 

583 depth += 1 

584 elif tok.type == "RPAREN": 

585 depth -= 1 

586 self._advance() 

587 if depth == 0: 

588 break 

589 continue 

590 self._advance() 

591 return tok_type, saw_paren 

592 return None, saw_paren 

593 

594 def _starts_direct_abstract_declarator(self) -> bool: 

595 return self._peek_type() in {"LPAREN", "LBRACKET"} 

596 

597 def _is_assignment_op(self) -> bool: 

598 tok = self._peek() 

599 return tok is not None and tok.type in _ASSIGNMENT_OPS 

600 

601 def _try_parse_paren_type_name( 

602 self, 

603 ) -> Optional[Tuple[c_ast.Typename, int, _Token]]: 

604 """Parse and return a parenthesized type name if present. 

605 

606 Returns (typ, mark, lparen_tok) when the next tokens look like 

607 '(' type_name ')', where typ is the parsed type name, mark is the 

608 token-stream position before parsing, and lparen_tok is the LPAREN 

609 token. Returns None if no parenthesized type name is present. 

610 """ 

611 mark = self._mark() 

612 lparen_tok = self._accept("LPAREN") 

613 if lparen_tok is None: 

614 return None 

615 if not self._starts_declaration(): 

616 self._reset(mark) 

617 return None 

618 typ = self._parse_type_name() 

619 if self._accept("RPAREN") is None: 

620 self._reset(mark) 

621 return None 

622 return typ, mark, lparen_tok 

623 

624 # ------------------------------------------------------------------ 

625 # Top-level 

626 # ------------------------------------------------------------------ 

627 # BNF: translation_unit_or_empty : translation_unit | empty 

628 def _parse_translation_unit_or_empty(self) -> c_ast.FileAST: 

629 if self._peek() is None: 

630 return c_ast.FileAST([]) 

631 return c_ast.FileAST(self._parse_translation_unit()) 

632 

633 # BNF: translation_unit : external_declaration+ 

634 def _parse_translation_unit(self) -> List[c_ast.Node]: 

635 ext = [] 

636 while self._peek() is not None: 

637 ext.extend(self._parse_external_declaration()) 

638 return ext 

639 

640 # BNF: external_declaration : function_definition 

641 # | declaration 

642 # | pp_directive 

643 # | pppragma_directive 

644 # | static_assert 

645 # | ';' 

646 def _parse_external_declaration(self) -> List[c_ast.Node]: 

647 tok = self._peek() 

648 if tok is None: 

649 return [] 

650 if tok.type == "PPHASH": 

651 self._parse_pp_directive() 

652 return [] 

653 if tok.type in {"PPPRAGMA", "_PRAGMA"}: 

654 return [self._parse_pppragma_directive()] 

655 if self._accept("SEMI"): 

656 return [] 

657 if tok.type == "_STATIC_ASSERT": 

658 return self._parse_static_assert() 

659 

660 if not self._starts_declaration(tok): 

661 # Special handling for old-style function definitions that have an 

662 # implicit return type, e.g. 

663 # 

664 # foo() { 

665 # return 5; 

666 # } 

667 # 

668 # These get an implicit 'int' return type. 

669 decl = self._parse_id_declarator() 

670 param_decls = None 

671 if self._peek_type() != "LBRACE": 

672 self._parse_error("Invalid function definition", decl.coord) 

673 spec: _DeclSpec = dict( 

674 qual=[], 

675 alignment=[], 

676 storage=[], 

677 type=[c_ast.IdentifierType(["int"], coord=decl.coord)], 

678 function=[], 

679 ) 

680 func = self._build_function_definition( 

681 spec=spec, 

682 decl=decl, 

683 param_decls=param_decls, 

684 body=self._parse_compound_statement(), 

685 ) 

686 return [func] 

687 

688 # From here on, parsing a standard declatation/definition. 

689 spec, saw_type, spec_coord = self._parse_declaration_specifiers( 

690 allow_no_type=True 

691 ) 

692 

693 name_type, _ = self._peek_declarator_name_info() 

694 if name_type != "ID": 

695 decls = self._parse_decl_body_with_spec(spec, saw_type) 

696 self._expect("SEMI") 

697 return decls 

698 

699 decl = self._parse_id_declarator() 

700 

701 if self._peek_type() == "LBRACE" or self._starts_declaration(): 

702 param_decls = None 

703 if self._starts_declaration(): 

704 param_decls = self._parse_declaration_list() 

705 if self._peek_type() != "LBRACE": 

706 self._parse_error("Invalid function definition", decl.coord) 

707 if not spec["type"]: 

708 spec["type"] = [c_ast.IdentifierType(["int"], coord=spec_coord)] 

709 func = self._build_function_definition( 

710 spec=spec, 

711 decl=decl, 

712 param_decls=param_decls, 

713 body=self._parse_compound_statement(), 

714 ) 

715 return [func] 

716 

717 decl_dict: "_DeclInfo" = dict(decl=decl, init=None, bitsize=None) 

718 if self._accept("EQUALS"): 

719 decl_dict["init"] = self._parse_initializer() 

720 decls = self._parse_init_declarator_list(first=decl_dict) 

721 decls = self._build_declarations(spec=spec, decls=decls, typedef_namespace=True) 

722 self._expect("SEMI") 

723 return decls 

724 

725 # ------------------------------------------------------------------ 

726 # Declarations 

727 # 

728 # Declarations always come as lists (because they can be several in one 

729 # line). When returning parsed declarations, a list is always returned - 

730 # even if it contains a single element. 

731 # ------------------------------------------------------------------ 

732 def _parse_declaration(self) -> List[c_ast.Node]: 

733 decls = self._parse_decl_body() 

734 self._expect("SEMI") 

735 return decls 

736 

737 # BNF: decl_body : declaration_specifiers decl_body_with_spec 

738 def _parse_decl_body(self) -> List[c_ast.Node]: 

739 spec, saw_type, _ = self._parse_declaration_specifiers(allow_no_type=True) 

740 return self._parse_decl_body_with_spec(spec, saw_type) 

741 

742 # BNF: decl_body_with_spec : init_declarator_list 

743 # | struct_or_union_or_enum_only 

744 def _parse_decl_body_with_spec( 

745 self, spec: "_DeclSpec", saw_type: bool 

746 ) -> List[c_ast.Node]: 

747 decls = None 

748 if saw_type: 

749 if self._starts_declarator(): 

750 decls = self._parse_init_declarator_list() 

751 else: 

752 if self._starts_declarator(id_only=True): 

753 decls = self._parse_init_declarator_list(id_only=True) 

754 

755 if decls is None: 

756 ty = spec["type"] 

757 s_u_or_e = (c_ast.Struct, c_ast.Union, c_ast.Enum) 

758 if len(ty) == 1 and isinstance(ty[0], s_u_or_e): 

759 decls = [ 

760 c_ast.Decl( 

761 name=None, 

762 quals=spec["qual"], 

763 align=spec["alignment"], 

764 storage=spec["storage"], 

765 funcspec=spec["function"], 

766 type=ty[0], 

767 init=None, 

768 bitsize=None, 

769 coord=ty[0].coord, 

770 ) 

771 ] 

772 else: 

773 decls = self._build_declarations( 

774 spec=spec, 

775 decls=[dict(decl=None, init=None, bitsize=None)], 

776 typedef_namespace=True, 

777 ) 

778 else: 

779 decls = self._build_declarations( 

780 spec=spec, decls=decls, typedef_namespace=True 

781 ) 

782 

783 return decls 

784 

785 # BNF: declaration_list : declaration+ 

786 def _parse_declaration_list(self) -> List[c_ast.Node]: 

787 decls = [] 

788 while self._starts_declaration(): 

789 decls.extend(self._parse_declaration()) 

790 return decls 

791 

792 # BNF: declaration_specifiers : (storage_class_specifier 

793 # | type_specifier 

794 # | type_qualifier 

795 # | function_specifier 

796 # | alignment_specifier)+ 

797 def _parse_declaration_specifiers( 

798 self, allow_no_type: bool = False 

799 ) -> Tuple["_DeclSpec", bool, Optional[Coord]]: 

800 """Parse declaration-specifier sequence. 

801 

802 allow_no_type: 

803 If True, allow a missing type specifier without error. 

804 

805 Returns: 

806 (spec, saw_type, first_coord) where spec is a dict with 

807 qual/storage/type/function/alignment entries, saw_type is True 

808 if a type specifier was consumed, and first_coord is the coord 

809 of the first specifier token (used for diagnostics). 

810 """ 

811 spec = None 

812 saw_type = False 

813 first_coord = None 

814 

815 while True: 

816 tok = self._peek() 

817 if tok is None: 

818 break 

819 

820 if tok.type == "_ALIGNAS": 

821 if first_coord is None: 

822 first_coord = self._tok_coord(tok) 

823 spec = self._add_declaration_specifier( 

824 spec, self._parse_alignment_specifier(), "alignment", append=True 

825 ) 

826 continue 

827 

828 if tok.type == "_ATOMIC" and self._peek_type(2) == "LPAREN": 

829 if first_coord is None: 

830 first_coord = self._tok_coord(tok) 

831 spec = self._add_declaration_specifier( 

832 spec, self._parse_atomic_specifier(), "type", append=True 

833 ) 

834 saw_type = True 

835 continue 

836 

837 if tok.type in _TYPE_QUALIFIER: 

838 if first_coord is None: 

839 first_coord = self._tok_coord(tok) 

840 spec = self._add_declaration_specifier( 

841 spec, self._advance().value, "qual", append=True 

842 ) 

843 continue 

844 

845 if tok.type in _STORAGE_CLASS: 

846 if first_coord is None: 

847 first_coord = self._tok_coord(tok) 

848 spec = self._add_declaration_specifier( 

849 spec, self._advance().value, "storage", append=True 

850 ) 

851 continue 

852 

853 if tok.type in _FUNCTION_SPEC: 

854 if first_coord is None: 

855 first_coord = self._tok_coord(tok) 

856 spec = self._add_declaration_specifier( 

857 spec, self._advance().value, "function", append=True 

858 ) 

859 continue 

860 

861 if tok.type in _TYPE_SPEC_SIMPLE: 

862 if first_coord is None: 

863 first_coord = self._tok_coord(tok) 

864 tok = self._advance() 

865 spec = self._add_declaration_specifier( 

866 spec, 

867 c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)), 

868 "type", 

869 append=True, 

870 ) 

871 saw_type = True 

872 continue 

873 

874 if tok.type == "TYPEID": 

875 if saw_type: 

876 break 

877 if first_coord is None: 

878 first_coord = self._tok_coord(tok) 

879 tok = self._advance() 

880 spec = self._add_declaration_specifier( 

881 spec, 

882 c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)), 

883 "type", 

884 append=True, 

885 ) 

886 saw_type = True 

887 continue 

888 

889 if tok.type in {"STRUCT", "UNION"}: 

890 if first_coord is None: 

891 first_coord = self._tok_coord(tok) 

892 spec = self._add_declaration_specifier( 

893 spec, self._parse_struct_or_union_specifier(), "type", append=True 

894 ) 

895 saw_type = True 

896 continue 

897 

898 if tok.type == "ENUM": 

899 if first_coord is None: 

900 first_coord = self._tok_coord(tok) 

901 spec = self._add_declaration_specifier( 

902 spec, self._parse_enum_specifier(), "type", append=True 

903 ) 

904 saw_type = True 

905 continue 

906 

907 break 

908 

909 if spec is None: 

910 self._parse_error("Invalid declaration", self.clex.filename) 

911 

912 if not saw_type and not allow_no_type: 

913 self._parse_error("Missing type in declaration", first_coord) 

914 

915 return spec, saw_type, first_coord 

916 

917 # BNF: specifier_qualifier_list : (type_specifier 

918 # | type_qualifier 

919 # | alignment_specifier)+ 

920 def _parse_specifier_qualifier_list(self) -> "_DeclSpec": 

921 spec = None 

922 saw_type = False 

923 saw_alignment = False 

924 first_coord = None 

925 

926 while True: 

927 tok = self._peek() 

928 if tok is None: 

929 break 

930 

931 if tok.type == "_ALIGNAS": 

932 if first_coord is None: 

933 first_coord = self._tok_coord(tok) 

934 spec = self._add_declaration_specifier( 

935 spec, self._parse_alignment_specifier(), "alignment", append=True 

936 ) 

937 saw_alignment = True 

938 continue 

939 

940 if tok.type == "_ATOMIC" and self._peek_type(2) == "LPAREN": 

941 if first_coord is None: 

942 first_coord = self._tok_coord(tok) 

943 spec = self._add_declaration_specifier( 

944 spec, self._parse_atomic_specifier(), "type", append=True 

945 ) 

946 saw_type = True 

947 continue 

948 

949 if tok.type in _TYPE_QUALIFIER: 

950 if first_coord is None: 

951 first_coord = self._tok_coord(tok) 

952 spec = self._add_declaration_specifier( 

953 spec, self._advance().value, "qual", append=True 

954 ) 

955 continue 

956 

957 if tok.type in _TYPE_SPEC_SIMPLE: 

958 if first_coord is None: 

959 first_coord = self._tok_coord(tok) 

960 tok = self._advance() 

961 spec = self._add_declaration_specifier( 

962 spec, 

963 c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)), 

964 "type", 

965 append=True, 

966 ) 

967 saw_type = True 

968 continue 

969 

970 if tok.type == "TYPEID": 

971 if saw_type: 

972 break 

973 if first_coord is None: 

974 first_coord = self._tok_coord(tok) 

975 tok = self._advance() 

976 spec = self._add_declaration_specifier( 

977 spec, 

978 c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)), 

979 "type", 

980 append=True, 

981 ) 

982 saw_type = True 

983 continue 

984 

985 if tok.type in {"STRUCT", "UNION"}: 

986 if first_coord is None: 

987 first_coord = self._tok_coord(tok) 

988 spec = self._add_declaration_specifier( 

989 spec, self._parse_struct_or_union_specifier(), "type", append=True 

990 ) 

991 saw_type = True 

992 continue 

993 

994 if tok.type == "ENUM": 

995 if first_coord is None: 

996 first_coord = self._tok_coord(tok) 

997 spec = self._add_declaration_specifier( 

998 spec, self._parse_enum_specifier(), "type", append=True 

999 ) 

1000 saw_type = True 

1001 continue 

1002 

1003 break 

1004 

1005 if spec is None: 

1006 self._parse_error("Invalid specifier list", self.clex.filename) 

1007 

1008 if not saw_type and not saw_alignment: 

1009 self._parse_error("Missing type in declaration", first_coord) 

1010 

1011 if spec.get("storage") is None: 

1012 spec["storage"] = [] 

1013 if spec.get("function") is None: 

1014 spec["function"] = [] 

1015 

1016 return spec 

1017 

1018 # BNF: type_qualifier_list : type_qualifier+ 

1019 def _parse_type_qualifier_list(self) -> List[str]: 

1020 quals = [] 

1021 while self._peek_type() in _TYPE_QUALIFIER: 

1022 quals.append(self._advance().value) 

1023 return quals 

1024 

1025 # BNF: alignment_specifier : _ALIGNAS '(' type_name | constant_expression ')' 

1026 def _parse_alignment_specifier(self) -> c_ast.Node: 

1027 tok = self._expect("_ALIGNAS") 

1028 self._expect("LPAREN") 

1029 

1030 if self._starts_declaration(): 

1031 typ = self._parse_type_name() 

1032 self._expect("RPAREN") 

1033 return c_ast.Alignas(typ, self._tok_coord(tok)) 

1034 

1035 expr = self._parse_constant_expression() 

1036 self._expect("RPAREN") 

1037 return c_ast.Alignas(expr, self._tok_coord(tok)) 

1038 

1039 # BNF: atomic_specifier : _ATOMIC '(' type_name ')' 

1040 def _parse_atomic_specifier(self) -> c_ast.Node: 

1041 self._expect("_ATOMIC") 

1042 self._expect("LPAREN") 

1043 typ = self._parse_type_name() 

1044 self._expect("RPAREN") 

1045 typ.quals.append("_Atomic") 

1046 return typ 

1047 

1048 # BNF: init_declarator_list : init_declarator (',' init_declarator)* 

1049 def _parse_init_declarator_list( 

1050 self, first: Optional["_DeclInfo"] = None, id_only: bool = False 

1051 ) -> List["_DeclInfo"]: 

1052 decls = ( 

1053 [first] 

1054 if first is not None 

1055 else [self._parse_init_declarator(id_only=id_only)] 

1056 ) 

1057 

1058 while self._accept("COMMA"): 

1059 decls.append(self._parse_init_declarator(id_only=id_only)) 

1060 return decls 

1061 

1062 # BNF: init_declarator : declarator ('=' initializer)? 

1063 def _parse_init_declarator(self, id_only: bool = False) -> "_DeclInfo": 

1064 decl = self._parse_id_declarator() if id_only else self._parse_declarator() 

1065 init = None 

1066 if self._accept("EQUALS"): 

1067 init = self._parse_initializer() 

1068 return dict(decl=decl, init=init, bitsize=None) 

1069 

1070 # ------------------------------------------------------------------ 

1071 # Structs/unions/enums 

1072 # ------------------------------------------------------------------ 

1073 # BNF: struct_or_union_specifier : struct_or_union ID? '{' struct_declaration_list? '}' 

1074 # | struct_or_union ID 

1075 def _parse_struct_or_union_specifier(self) -> c_ast.Node: 

1076 tok = self._advance() 

1077 klass = self._select_struct_union_class(tok.value) 

1078 

1079 if self._peek_type() in {"ID", "TYPEID"}: 

1080 name_tok = self._advance() 

1081 if self._peek_type() == "LBRACE": 

1082 self._advance() 

1083 if self._accept("RBRACE"): 

1084 return klass( 

1085 name=name_tok.value, decls=[], coord=self._tok_coord(name_tok) 

1086 ) 

1087 decls = self._parse_struct_declaration_list() 

1088 self._expect("RBRACE") 

1089 return klass( 

1090 name=name_tok.value, decls=decls, coord=self._tok_coord(name_tok) 

1091 ) 

1092 

1093 return klass( 

1094 name=name_tok.value, decls=None, coord=self._tok_coord(name_tok) 

1095 ) 

1096 

1097 if self._peek_type() == "LBRACE": 

1098 brace_tok = self._advance() 

1099 if self._accept("RBRACE"): 

1100 return klass(name=None, decls=[], coord=self._tok_coord(brace_tok)) 

1101 decls = self._parse_struct_declaration_list() 

1102 self._expect("RBRACE") 

1103 return klass(name=None, decls=decls, coord=self._tok_coord(brace_tok)) 

1104 

1105 self._parse_error("Invalid struct/union declaration", self._tok_coord(tok)) 

1106 

1107 # BNF: struct_declaration_list : struct_declaration+ 

1108 def _parse_struct_declaration_list(self) -> List[c_ast.Node]: 

1109 decls = [] 

1110 while self._peek_type() not in {None, "RBRACE"}: 

1111 items = self._parse_struct_declaration() 

1112 if items is None: 

1113 continue 

1114 decls.extend(items) 

1115 return decls 

1116 

1117 # BNF: struct_declaration : specifier_qualifier_list struct_declarator_list? ';' 

1118 # | static_assert 

1119 # | pppragma_directive 

1120 def _parse_struct_declaration(self) -> Optional[List[c_ast.Node]]: 

1121 if self._peek_type() == "SEMI": 

1122 self._advance() 

1123 return None 

1124 if self._peek_type() in {"PPPRAGMA", "_PRAGMA"}: 

1125 return [self._parse_pppragma_directive()] 

1126 

1127 spec = self._parse_specifier_qualifier_list() 

1128 assert "typedef" not in spec.get("storage", []) 

1129 

1130 decls = None 

1131 if self._starts_declarator() or self._peek_type() == "COLON": 

1132 decls = self._parse_struct_declarator_list() 

1133 if decls is not None: 

1134 self._expect("SEMI") 

1135 return self._build_declarations(spec=spec, decls=decls) 

1136 

1137 if len(spec["type"]) == 1: 

1138 node = spec["type"][0] 

1139 if isinstance(node, c_ast.Node): 

1140 decl_type = node 

1141 else: 

1142 decl_type = c_ast.IdentifierType(node) 

1143 self._expect("SEMI") 

1144 return self._build_declarations( 

1145 spec=spec, decls=[dict(decl=decl_type, init=None, bitsize=None)] 

1146 ) 

1147 

1148 self._expect("SEMI") 

1149 return self._build_declarations( 

1150 spec=spec, decls=[dict(decl=None, init=None, bitsize=None)] 

1151 ) 

1152 

1153 # BNF: struct_declarator_list : struct_declarator (',' struct_declarator)* 

1154 def _parse_struct_declarator_list(self) -> List["_DeclInfo"]: 

1155 decls = [self._parse_struct_declarator()] 

1156 while self._accept("COMMA"): 

1157 decls.append(self._parse_struct_declarator()) 

1158 return decls 

1159 

1160 # BNF: struct_declarator : declarator? ':' constant_expression 

1161 # | declarator (':' constant_expression)? 

1162 def _parse_struct_declarator(self) -> "_DeclInfo": 

1163 if self._accept("COLON"): 

1164 bitsize = self._parse_constant_expression() 

1165 return { 

1166 "decl": c_ast.TypeDecl(None, None, None, None), 

1167 "init": None, 

1168 "bitsize": bitsize, 

1169 } 

1170 

1171 decl = self._parse_declarator() 

1172 if self._accept("COLON"): 

1173 bitsize = self._parse_constant_expression() 

1174 return {"decl": decl, "init": None, "bitsize": bitsize} 

1175 

1176 return {"decl": decl, "init": None, "bitsize": None} 

1177 

1178 # BNF: enum_specifier : ENUM ID? '{' enumerator_list? '}' 

1179 # | ENUM ID 

1180 def _parse_enum_specifier(self) -> c_ast.Node: 

1181 tok = self._expect("ENUM") 

1182 if self._peek_type() in {"ID", "TYPEID"}: 

1183 name_tok = self._advance() 

1184 if self._peek_type() == "LBRACE": 

1185 self._advance() 

1186 enums = self._parse_enumerator_list() 

1187 self._expect("RBRACE") 

1188 return c_ast.Enum(name_tok.value, enums, self._tok_coord(tok)) 

1189 return c_ast.Enum(name_tok.value, None, self._tok_coord(tok)) 

1190 

1191 self._expect("LBRACE") 

1192 enums = self._parse_enumerator_list() 

1193 self._expect("RBRACE") 

1194 return c_ast.Enum(None, enums, self._tok_coord(tok)) 

1195 

1196 # BNF: enumerator_list : enumerator (',' enumerator)* ','? 

1197 def _parse_enumerator_list(self) -> c_ast.Node: 

1198 enum = self._parse_enumerator() 

1199 enum_list = c_ast.EnumeratorList([enum], enum.coord) 

1200 while self._accept("COMMA"): 

1201 if self._peek_type() == "RBRACE": 

1202 break 

1203 enum = self._parse_enumerator() 

1204 enum_list.enumerators.append(enum) 

1205 return enum_list 

1206 

1207 # BNF: enumerator : ID ('=' constant_expression)? 

1208 def _parse_enumerator(self) -> c_ast.Node: 

1209 name_tok = self._expect("ID") 

1210 if self._accept("EQUALS"): 

1211 value = self._parse_constant_expression() 

1212 else: 

1213 value = None 

1214 enum = c_ast.Enumerator(name_tok.value, value, self._tok_coord(name_tok)) 

1215 self._add_identifier(enum.name, enum.coord) 

1216 return enum 

1217 

1218 # ------------------------------------------------------------------ 

1219 # Declarators 

1220 # ------------------------------------------------------------------ 

1221 # BNF: declarator : pointer? direct_declarator 

1222 def _parse_declarator(self) -> c_ast.Node: 

1223 decl, _ = self._parse_any_declarator( 

1224 allow_abstract=False, typeid_paren_as_abstract=False 

1225 ) 

1226 assert decl is not None 

1227 return decl 

1228 

1229 # BNF: id_declarator : declarator with ID name 

1230 def _parse_id_declarator(self) -> c_ast.Node: 

1231 return self._parse_declarator_kind(kind="id", allow_paren=True) 

1232 

1233 # BNF: typeid_declarator : declarator with TYPEID name 

1234 def _parse_typeid_declarator(self) -> c_ast.Node: 

1235 return self._parse_declarator_kind(kind="typeid", allow_paren=True) 

1236 

1237 # BNF: typeid_noparen_declarator : declarator without parenthesized name 

1238 def _parse_typeid_noparen_declarator(self) -> c_ast.Node: 

1239 return self._parse_declarator_kind(kind="typeid", allow_paren=False) 

1240 

1241 # BNF: declarator_kind : pointer? direct_declarator(kind) 

1242 def _parse_declarator_kind(self, kind: str, allow_paren: bool) -> c_ast.Node: 

1243 ptr = None 

1244 if self._peek_type() == "TIMES": 

1245 ptr = self._parse_pointer() 

1246 direct = self._parse_direct_declarator(kind, allow_paren=allow_paren) 

1247 if ptr is not None: 

1248 return self._type_modify_decl(direct, ptr) 

1249 return direct 

1250 

1251 # BNF: direct_declarator : ID | TYPEID | '(' declarator ')' 

1252 # | direct_declarator '[' ... ']' 

1253 # | direct_declarator '(' ... ')' 

1254 def _parse_direct_declarator( 

1255 self, kind: str, allow_paren: bool = True 

1256 ) -> c_ast.Node: 

1257 if allow_paren and self._accept("LPAREN"): 

1258 decl = self._parse_declarator_kind(kind, allow_paren=True) 

1259 self._expect("RPAREN") 

1260 else: 

1261 if kind == "id": 

1262 name_tok = self._expect("ID") 

1263 else: 

1264 name_tok = self._expect("TYPEID") 

1265 decl = c_ast.TypeDecl( 

1266 declname=name_tok.value, 

1267 type=None, 

1268 quals=None, 

1269 align=None, 

1270 coord=self._tok_coord(name_tok), 

1271 ) 

1272 

1273 return self._parse_decl_suffixes(decl) 

1274 

1275 def _parse_decl_suffixes(self, decl: c_ast.Node) -> c_ast.Node: 

1276 """Parse a chain of array/function suffixes and attach them to decl.""" 

1277 while True: 

1278 if self._peek_type() == "LBRACKET": 

1279 decl = self._type_modify_decl(decl, self._parse_array_decl(decl)) 

1280 continue 

1281 if self._peek_type() == "LPAREN": 

1282 func = self._parse_function_decl(decl) 

1283 decl = self._type_modify_decl(decl, func) 

1284 continue 

1285 break 

1286 return decl 

1287 

1288 # BNF: array_decl : '[' array_specifiers? assignment_expression? ']' 

1289 def _parse_array_decl(self, base_decl: c_ast.Node) -> c_ast.Node: 

1290 return self._parse_array_decl_common(base_type=None, coord=base_decl.coord) 

1291 

1292 def _parse_array_decl_common( 

1293 self, base_type: Optional[c_ast.Node], coord: Optional[Coord] = None 

1294 ) -> c_ast.Node: 

1295 """Parse an array declarator suffix and return an ArrayDecl node. 

1296 

1297 base_type: 

1298 Base declarator node to attach (None for direct-declarator parsing, 

1299 TypeDecl for abstract declarators). 

1300 

1301 coord: 

1302 Coordinate to use for the ArrayDecl. If None, uses the '[' token. 

1303 """ 

1304 lbrack_tok = self._expect("LBRACKET") 

1305 if coord is None: 

1306 coord = self._tok_coord(lbrack_tok) 

1307 

1308 def make_array_decl(dim, dim_quals): 

1309 return c_ast.ArrayDecl( 

1310 type=base_type, dim=dim, dim_quals=dim_quals, coord=coord 

1311 ) 

1312 

1313 if self._accept("STATIC"): 

1314 dim_quals = ["static"] + (self._parse_type_qualifier_list() or []) 

1315 dim = self._parse_assignment_expression() 

1316 self._expect("RBRACKET") 

1317 return make_array_decl(dim, dim_quals) 

1318 

1319 if self._peek_type() in _TYPE_QUALIFIER: 

1320 dim_quals = self._parse_type_qualifier_list() or [] 

1321 if self._accept("STATIC"): 

1322 dim_quals = dim_quals + ["static"] 

1323 dim = self._parse_assignment_expression() 

1324 self._expect("RBRACKET") 

1325 return make_array_decl(dim, dim_quals) 

1326 times_tok = self._accept("TIMES") 

1327 if times_tok: 

1328 self._expect("RBRACKET") 

1329 dim = c_ast.ID(times_tok.value, self._tok_coord(times_tok)) 

1330 return make_array_decl(dim, dim_quals) 

1331 dim = None 

1332 if self._starts_expression(): 

1333 dim = self._parse_assignment_expression() 

1334 self._expect("RBRACKET") 

1335 return make_array_decl(dim, dim_quals) 

1336 

1337 times_tok = self._accept("TIMES") 

1338 if times_tok: 

1339 self._expect("RBRACKET") 

1340 dim = c_ast.ID(times_tok.value, self._tok_coord(times_tok)) 

1341 return make_array_decl(dim, []) 

1342 

1343 dim = None 

1344 if self._starts_expression(): 

1345 dim = self._parse_assignment_expression() 

1346 self._expect("RBRACKET") 

1347 return make_array_decl(dim, []) 

1348 

1349 # BNF: function_decl : '(' parameter_type_list_opt | identifier_list_opt ')' 

1350 def _parse_function_decl(self, base_decl: c_ast.Node) -> c_ast.Node: 

1351 self._expect("LPAREN") 

1352 if self._accept("RPAREN"): 

1353 args = None 

1354 else: 

1355 args = ( 

1356 self._parse_parameter_type_list() 

1357 if self._starts_declaration() 

1358 else self._parse_identifier_list_opt() 

1359 ) 

1360 self._expect("RPAREN") 

1361 

1362 func = c_ast.FuncDecl(args=args, type=None, coord=base_decl.coord) 

1363 

1364 if self._peek_type() == "LBRACE": 

1365 if func.args is not None: 

1366 for param in func.args.params: 

1367 if isinstance(param, c_ast.EllipsisParam): 

1368 break 

1369 name = getattr(param, "name", None) 

1370 if name: 

1371 self._add_identifier(name, param.coord) 

1372 

1373 return func 

1374 

1375 # BNF: pointer : '*' type_qualifier_list? pointer? 

1376 def _parse_pointer(self) -> Optional[c_ast.Node]: 

1377 stars = [] 

1378 times_tok = self._accept("TIMES") 

1379 while times_tok: 

1380 quals = self._parse_type_qualifier_list() or [] 

1381 stars.append((quals, self._tok_coord(times_tok))) 

1382 times_tok = self._accept("TIMES") 

1383 

1384 if not stars: 

1385 return None 

1386 

1387 ptr = None 

1388 for quals, coord in stars: 

1389 ptr = c_ast.PtrDecl(quals=quals, type=ptr, coord=coord) 

1390 return ptr 

1391 

1392 # BNF: parameter_type_list : parameter_list (',' ELLIPSIS)? 

1393 def _parse_parameter_type_list(self) -> c_ast.ParamList: 

1394 params = self._parse_parameter_list() 

1395 if self._peek_type() == "COMMA" and self._peek_type(2) == "ELLIPSIS": 

1396 self._advance() 

1397 ell_tok = self._advance() 

1398 params.params.append(c_ast.EllipsisParam(self._tok_coord(ell_tok))) 

1399 return params 

1400 

1401 # BNF: parameter_list : parameter_declaration (',' parameter_declaration)* 

1402 def _parse_parameter_list(self) -> c_ast.ParamList: 

1403 first = self._parse_parameter_declaration() 

1404 params = c_ast.ParamList([first], first.coord) 

1405 while self._peek_type() == "COMMA" and self._peek_type(2) != "ELLIPSIS": 

1406 self._advance() 

1407 params.params.append(self._parse_parameter_declaration()) 

1408 return params 

1409 

1410 # BNF: parameter_declaration : declaration_specifiers declarator? 

1411 # | declaration_specifiers abstract_declarator_opt 

1412 def _parse_parameter_declaration(self) -> c_ast.Node: 

1413 spec, _, spec_coord = self._parse_declaration_specifiers(allow_no_type=True) 

1414 

1415 if not spec["type"]: 

1416 spec["type"] = [c_ast.IdentifierType(["int"], coord=spec_coord)] 

1417 

1418 if self._starts_declarator(): 

1419 decl, is_named = self._parse_any_declarator( 

1420 allow_abstract=True, typeid_paren_as_abstract=True 

1421 ) 

1422 if is_named: 

1423 return self._build_declarations( 

1424 spec=spec, decls=[dict(decl=decl, init=None, bitsize=None)] 

1425 )[0] 

1426 return self._build_parameter_declaration(spec, decl, spec_coord) 

1427 

1428 decl = self._parse_abstract_declarator_opt() 

1429 return self._build_parameter_declaration(spec, decl, spec_coord) 

1430 

1431 def _build_parameter_declaration( 

1432 self, spec: "_DeclSpec", decl: Optional[c_ast.Node], spec_coord: Optional[Coord] 

1433 ) -> c_ast.Node: 

1434 if ( 

1435 len(spec["type"]) > 1 

1436 and len(spec["type"][-1].names) == 1 

1437 and self._is_type_in_scope(spec["type"][-1].names[0]) 

1438 ): 

1439 return self._build_declarations( 

1440 spec=spec, decls=[dict(decl=decl, init=None, bitsize=None)] 

1441 )[0] 

1442 

1443 decl = c_ast.Typename( 

1444 name="", 

1445 quals=spec["qual"], 

1446 align=None, 

1447 type=decl or c_ast.TypeDecl(None, None, None, None), 

1448 coord=spec_coord, 

1449 ) 

1450 return self._fix_decl_name_type(decl, spec["type"]) 

1451 

1452 # BNF: identifier_list_opt : identifier_list | empty 

1453 def _parse_identifier_list_opt(self) -> Optional[c_ast.Node]: 

1454 if self._peek_type() == "RPAREN": 

1455 return None 

1456 return self._parse_identifier_list() 

1457 

1458 # BNF: identifier_list : identifier (',' identifier)* 

1459 def _parse_identifier_list(self) -> c_ast.Node: 

1460 first = self._parse_identifier() 

1461 params = c_ast.ParamList([first], first.coord) 

1462 while self._accept("COMMA"): 

1463 params.params.append(self._parse_identifier()) 

1464 return params 

1465 

1466 # ------------------------------------------------------------------ 

1467 # Abstract declarators 

1468 # ------------------------------------------------------------------ 

1469 # BNF: type_name : specifier_qualifier_list abstract_declarator_opt 

1470 def _parse_type_name(self) -> c_ast.Typename: 

1471 spec = self._parse_specifier_qualifier_list() 

1472 decl = self._parse_abstract_declarator_opt() 

1473 

1474 coord = None 

1475 if decl is not None: 

1476 coord = decl.coord 

1477 elif spec["type"]: 

1478 coord = spec["type"][0].coord 

1479 

1480 typename = c_ast.Typename( 

1481 name="", 

1482 quals=spec["qual"][:], 

1483 align=None, 

1484 type=decl or c_ast.TypeDecl(None, None, None, None), 

1485 coord=coord, 

1486 ) 

1487 return cast(c_ast.Typename, self._fix_decl_name_type(typename, spec["type"])) 

1488 

1489 # BNF: abstract_declarator_opt : pointer? direct_abstract_declarator? 

1490 def _parse_abstract_declarator_opt(self) -> Optional[c_ast.Node]: 

1491 if self._peek_type() == "TIMES": 

1492 ptr = self._parse_pointer() 

1493 if self._starts_direct_abstract_declarator(): 

1494 decl = self._parse_direct_abstract_declarator() 

1495 else: 

1496 decl = c_ast.TypeDecl(None, None, None, None) 

1497 assert ptr is not None 

1498 return self._type_modify_decl(decl, ptr) 

1499 

1500 if self._starts_direct_abstract_declarator(): 

1501 return self._parse_direct_abstract_declarator() 

1502 

1503 return None 

1504 

1505 # BNF: direct_abstract_declarator : '(' parameter_type_list_opt ')' 

1506 # | '(' abstract_declarator ')' 

1507 # | '[' ... ']' 

1508 def _parse_direct_abstract_declarator(self) -> c_ast.Node: 

1509 lparen_tok = self._accept("LPAREN") 

1510 if lparen_tok: 

1511 if self._starts_declaration() or self._peek_type() == "RPAREN": 

1512 params = self._parse_parameter_type_list_opt() 

1513 self._expect("RPAREN") 

1514 decl = c_ast.FuncDecl( 

1515 args=params, 

1516 type=c_ast.TypeDecl(None, None, None, None), 

1517 coord=self._tok_coord(lparen_tok), 

1518 ) 

1519 else: 

1520 decl = self._parse_abstract_declarator_opt() 

1521 self._expect("RPAREN") 

1522 assert decl is not None 

1523 elif self._peek_type() == "LBRACKET": 

1524 decl = self._parse_abstract_array_base() 

1525 else: 

1526 self._parse_error("Invalid abstract declarator", self.clex.filename) 

1527 

1528 return self._parse_decl_suffixes(decl) 

1529 

1530 # BNF: parameter_type_list_opt : parameter_type_list | empty 

1531 def _parse_parameter_type_list_opt(self) -> Optional[c_ast.ParamList]: 

1532 if self._peek_type() == "RPAREN": 

1533 return None 

1534 return self._parse_parameter_type_list() 

1535 

1536 # BNF: abstract_array_base : '[' array_specifiers? assignment_expression? ']' 

1537 def _parse_abstract_array_base(self) -> c_ast.Node: 

1538 return self._parse_array_decl_common( 

1539 base_type=c_ast.TypeDecl(None, None, None, None), coord=None 

1540 ) 

1541 

1542 # ------------------------------------------------------------------ 

1543 # Statements 

1544 # ------------------------------------------------------------------ 

1545 # BNF: statement : labeled_statement | compound_statement 

1546 # | selection_statement | iteration_statement 

1547 # | jump_statement | expression_statement 

1548 # | static_assert | pppragma_directive 

1549 def _parse_statement(self) -> c_ast.Node | List[c_ast.Node]: 

1550 tok_type = self._peek_type() 

1551 match tok_type: 

1552 case "CASE" | "DEFAULT": 

1553 return self._parse_labeled_statement() 

1554 case "ID" if self._peek_type(2) == "COLON": 

1555 return self._parse_labeled_statement() 

1556 case "LBRACE": 

1557 return self._parse_compound_statement() 

1558 case "IF" | "SWITCH": 

1559 return self._parse_selection_statement() 

1560 case "WHILE" | "DO" | "FOR": 

1561 return self._parse_iteration_statement() 

1562 case "GOTO" | "BREAK" | "CONTINUE" | "RETURN": 

1563 return self._parse_jump_statement() 

1564 case "PPPRAGMA" | "_PRAGMA": 

1565 return self._parse_pppragma_directive() 

1566 case "_STATIC_ASSERT": 

1567 return self._parse_static_assert() 

1568 case _: 

1569 return self._parse_expression_statement() 

1570 

1571 # BNF: pragmacomp_or_statement : pppragma_directive* statement 

1572 def _parse_pragmacomp_or_statement(self) -> c_ast.Node | List[c_ast.Node]: 

1573 if self._peek_type() in {"PPPRAGMA", "_PRAGMA"}: 

1574 pragmas = self._parse_pppragma_directive_list() 

1575 stmt = self._parse_statement() 

1576 return c_ast.Compound(block_items=pragmas + [stmt], coord=pragmas[0].coord) 

1577 return self._parse_statement() 

1578 

1579 # BNF: block_item : declaration | statement 

1580 def _parse_block_item(self) -> c_ast.Node | List[c_ast.Node]: 

1581 if self._starts_declaration(): 

1582 return self._parse_declaration() 

1583 return self._parse_statement() 

1584 

1585 # BNF: block_item_list : block_item+ 

1586 def _parse_block_item_list(self) -> List[c_ast.Node]: 

1587 items = [] 

1588 while self._peek_type() not in {"RBRACE", None}: 

1589 item = self._parse_block_item() 

1590 if isinstance(item, list): 

1591 if item == [None]: 

1592 continue 

1593 items.extend(item) 

1594 else: 

1595 items.append(item) 

1596 return items 

1597 

1598 # BNF: compound_statement : '{' block_item_list? '}' 

1599 def _parse_compound_statement(self) -> c_ast.Node: 

1600 lbrace_tok = self._expect("LBRACE") 

1601 if self._accept("RBRACE"): 

1602 return c_ast.Compound(block_items=None, coord=self._tok_coord(lbrace_tok)) 

1603 block_items = self._parse_block_item_list() 

1604 self._expect("RBRACE") 

1605 return c_ast.Compound( 

1606 block_items=block_items, coord=self._tok_coord(lbrace_tok) 

1607 ) 

1608 

1609 # BNF: labeled_statement : ID ':' statement 

1610 # | CASE constant_expression ':' statement 

1611 # | DEFAULT ':' statement 

1612 def _parse_labeled_statement(self) -> c_ast.Node: 

1613 tok_type = self._peek_type() 

1614 match tok_type: 

1615 case "ID": 

1616 name_tok = self._advance() 

1617 self._expect("COLON") 

1618 if self._starts_statement(): 

1619 stmt = self._parse_pragmacomp_or_statement() 

1620 else: 

1621 stmt = c_ast.EmptyStatement(self._tok_coord(name_tok)) 

1622 return c_ast.Label(name_tok.value, stmt, self._tok_coord(name_tok)) 

1623 case "CASE": 

1624 case_tok = self._advance() 

1625 expr = self._parse_constant_expression() 

1626 self._expect("COLON") 

1627 if self._starts_statement(): 

1628 stmt = self._parse_pragmacomp_or_statement() 

1629 else: 

1630 stmt = c_ast.EmptyStatement(self._tok_coord(case_tok)) 

1631 return c_ast.Case(expr, [stmt], self._tok_coord(case_tok)) 

1632 case "DEFAULT": 

1633 def_tok = self._advance() 

1634 self._expect("COLON") 

1635 if self._starts_statement(): 

1636 stmt = self._parse_pragmacomp_or_statement() 

1637 else: 

1638 stmt = c_ast.EmptyStatement(self._tok_coord(def_tok)) 

1639 return c_ast.Default([stmt], self._tok_coord(def_tok)) 

1640 case _: 

1641 self._parse_error("Invalid labeled statement", self.clex.filename) 

1642 

1643 # BNF: selection_statement : IF '(' expression ')' statement (ELSE statement)? 

1644 # | SWITCH '(' expression ')' statement 

1645 def _parse_selection_statement(self) -> c_ast.Node: 

1646 tok = self._advance() 

1647 match tok.type: 

1648 case "IF": 

1649 self._expect("LPAREN") 

1650 cond = self._parse_expression() 

1651 self._expect("RPAREN") 

1652 then_stmt = self._parse_pragmacomp_or_statement() 

1653 if self._accept("ELSE"): 

1654 else_stmt = self._parse_pragmacomp_or_statement() 

1655 return c_ast.If(cond, then_stmt, else_stmt, self._tok_coord(tok)) 

1656 return c_ast.If(cond, then_stmt, None, self._tok_coord(tok)) 

1657 case "SWITCH": 

1658 self._expect("LPAREN") 

1659 expr = self._parse_expression() 

1660 self._expect("RPAREN") 

1661 stmt = self._parse_pragmacomp_or_statement() 

1662 return fix_switch_cases(c_ast.Switch(expr, stmt, self._tok_coord(tok))) 

1663 case _: 

1664 self._parse_error("Invalid selection statement", self._tok_coord(tok)) 

1665 

1666 # BNF: iteration_statement : WHILE '(' expression ')' statement 

1667 # | DO statement WHILE '(' expression ')' ';' 

1668 # | FOR '(' (declaration | expression_opt) ';' 

1669 # expression_opt ';' expression_opt ')' statement 

1670 def _parse_iteration_statement(self) -> c_ast.Node: 

1671 tok = self._advance() 

1672 match tok.type: 

1673 case "WHILE": 

1674 self._expect("LPAREN") 

1675 cond = self._parse_expression() 

1676 self._expect("RPAREN") 

1677 stmt = self._parse_pragmacomp_or_statement() 

1678 return c_ast.While(cond, stmt, self._tok_coord(tok)) 

1679 case "DO": 

1680 stmt = self._parse_pragmacomp_or_statement() 

1681 self._expect("WHILE") 

1682 self._expect("LPAREN") 

1683 cond = self._parse_expression() 

1684 self._expect("RPAREN") 

1685 self._expect("SEMI") 

1686 return c_ast.DoWhile(cond, stmt, self._tok_coord(tok)) 

1687 case "FOR": 

1688 self._expect("LPAREN") 

1689 if self._starts_declaration(): 

1690 decls = self._parse_declaration() 

1691 init = c_ast.DeclList(decls, self._tok_coord(tok)) 

1692 cond = self._parse_expression_opt() 

1693 self._expect("SEMI") 

1694 next_expr = self._parse_expression_opt() 

1695 self._expect("RPAREN") 

1696 stmt = self._parse_pragmacomp_or_statement() 

1697 return c_ast.For(init, cond, next_expr, stmt, self._tok_coord(tok)) 

1698 

1699 init = self._parse_expression_opt() 

1700 self._expect("SEMI") 

1701 cond = self._parse_expression_opt() 

1702 self._expect("SEMI") 

1703 next_expr = self._parse_expression_opt() 

1704 self._expect("RPAREN") 

1705 stmt = self._parse_pragmacomp_or_statement() 

1706 return c_ast.For(init, cond, next_expr, stmt, self._tok_coord(tok)) 

1707 case _: 

1708 self._parse_error("Invalid iteration statement", self._tok_coord(tok)) 

1709 

1710 # BNF: jump_statement : GOTO ID ';' | BREAK ';' | CONTINUE ';' 

1711 # | RETURN expression? ';' 

1712 def _parse_jump_statement(self) -> c_ast.Node: 

1713 tok = self._advance() 

1714 match tok.type: 

1715 case "GOTO": 

1716 name_tok = self._expect("ID") 

1717 self._expect("SEMI") 

1718 return c_ast.Goto(name_tok.value, self._tok_coord(tok)) 

1719 case "BREAK": 

1720 self._expect("SEMI") 

1721 return c_ast.Break(self._tok_coord(tok)) 

1722 case "CONTINUE": 

1723 self._expect("SEMI") 

1724 return c_ast.Continue(self._tok_coord(tok)) 

1725 case "RETURN": 

1726 if self._accept("SEMI"): 

1727 return c_ast.Return(None, self._tok_coord(tok)) 

1728 expr = self._parse_expression() 

1729 self._expect("SEMI") 

1730 return c_ast.Return(expr, self._tok_coord(tok)) 

1731 case _: 

1732 self._parse_error("Invalid jump statement", self._tok_coord(tok)) 

1733 

1734 # BNF: expression_statement : expression_opt ';' 

1735 def _parse_expression_statement(self) -> c_ast.Node: 

1736 expr = self._parse_expression_opt() 

1737 semi_tok = self._expect("SEMI") 

1738 if expr is None: 

1739 return c_ast.EmptyStatement(self._tok_coord(semi_tok)) 

1740 return expr 

1741 

1742 # ------------------------------------------------------------------ 

1743 # Expressions 

1744 # ------------------------------------------------------------------ 

1745 # BNF: expression_opt : expression | empty 

1746 def _parse_expression_opt(self) -> Optional[c_ast.Node]: 

1747 if self._starts_expression(): 

1748 return self._parse_expression() 

1749 return None 

1750 

1751 # BNF: expression : assignment_expression (',' assignment_expression)* 

1752 def _parse_expression(self) -> c_ast.Node: 

1753 expr = self._parse_assignment_expression() 

1754 if not self._accept("COMMA"): 

1755 return expr 

1756 exprs = [expr, self._parse_assignment_expression()] 

1757 while self._accept("COMMA"): 

1758 exprs.append(self._parse_assignment_expression()) 

1759 return c_ast.ExprList(exprs, expr.coord) 

1760 

1761 # BNF: assignment_expression : conditional_expression 

1762 # | unary_expression assignment_op assignment_expression 

1763 def _parse_assignment_expression(self) -> c_ast.Node: 

1764 if self._peek_type() == "LPAREN" and self._peek_type(2) == "LBRACE": 

1765 self._advance() 

1766 comp = self._parse_compound_statement() 

1767 self._expect("RPAREN") 

1768 return comp 

1769 

1770 expr = self._parse_conditional_expression() 

1771 if self._is_assignment_op(): 

1772 op = self._advance().value 

1773 rhs = self._parse_assignment_expression() 

1774 return c_ast.Assignment(op, expr, rhs, expr.coord) 

1775 return expr 

1776 

1777 # BNF: conditional_expression : binary_expression 

1778 # | binary_expression '?' expression ':' conditional_expression 

1779 def _parse_conditional_expression(self) -> c_ast.Node: 

1780 expr = self._parse_binary_expression() 

1781 if self._accept("CONDOP"): 

1782 iftrue = self._parse_expression() 

1783 self._expect("COLON") 

1784 iffalse = self._parse_conditional_expression() 

1785 return c_ast.TernaryOp(expr, iftrue, iffalse, expr.coord) 

1786 return expr 

1787 

1788 # BNF: binary_expression : cast_expression (binary_op cast_expression)* 

1789 def _parse_binary_expression( 

1790 self, min_prec: int = 0, lhs: Optional[c_ast.Node] = None 

1791 ) -> c_ast.Node: 

1792 if lhs is None: 

1793 lhs = self._parse_cast_expression() 

1794 

1795 while True: 

1796 tok = self._peek() 

1797 if tok is None or tok.type not in _BINARY_PRECEDENCE: 

1798 break 

1799 prec = _BINARY_PRECEDENCE[tok.type] 

1800 if prec < min_prec: 

1801 break 

1802 

1803 op = tok.value 

1804 self._advance() 

1805 rhs = self._parse_cast_expression() 

1806 

1807 while True: 

1808 next_tok = self._peek() 

1809 if next_tok is None or next_tok.type not in _BINARY_PRECEDENCE: 

1810 break 

1811 next_prec = _BINARY_PRECEDENCE[next_tok.type] 

1812 if next_prec > prec: 

1813 rhs = self._parse_binary_expression(next_prec, rhs) 

1814 else: 

1815 break 

1816 

1817 lhs = c_ast.BinaryOp(op, lhs, rhs, lhs.coord) 

1818 

1819 return lhs 

1820 

1821 # BNF: cast_expression : '(' type_name ')' cast_expression 

1822 # | unary_expression 

1823 def _parse_cast_expression(self) -> c_ast.Node: 

1824 result = self._try_parse_paren_type_name() 

1825 if result is not None: 

1826 typ, mark, lparen_tok = result 

1827 if self._peek_type() == "LBRACE": 

1828 # (type){...} is a compound literal, not a cast. Examples: 

1829 # (int){1} -> compound literal, handled in postfix 

1830 # (int) x -> cast, handled below 

1831 self._reset(mark) 

1832 else: 

1833 expr = self._parse_cast_expression() 

1834 return c_ast.Cast(typ, expr, self._tok_coord(lparen_tok)) 

1835 return self._parse_unary_expression() 

1836 

1837 # BNF: unary_expression : postfix_expression 

1838 # | '++' unary_expression 

1839 # | '--' unary_expression 

1840 # | unary_op cast_expression 

1841 # | 'sizeof' unary_expression 

1842 # | 'sizeof' '(' type_name ')' 

1843 # | '_Alignof' '(' type_name ')' 

1844 def _parse_unary_expression(self) -> c_ast.Node: 

1845 tok_type = self._peek_type() 

1846 if tok_type in {"PLUSPLUS", "MINUSMINUS"}: 

1847 tok = self._advance() 

1848 expr = self._parse_unary_expression() 

1849 return c_ast.UnaryOp(tok.value, expr, expr.coord) 

1850 

1851 if tok_type in {"AND", "TIMES", "PLUS", "MINUS", "NOT", "LNOT"}: 

1852 tok = self._advance() 

1853 expr = self._parse_cast_expression() 

1854 return c_ast.UnaryOp(tok.value, expr, expr.coord) 

1855 

1856 if tok_type == "SIZEOF": 

1857 tok = self._advance() 

1858 result = self._try_parse_paren_type_name() 

1859 if result is not None: 

1860 typ, _, _ = result 

1861 return c_ast.UnaryOp(tok.value, typ, self._tok_coord(tok)) 

1862 expr = self._parse_unary_expression() 

1863 return c_ast.UnaryOp(tok.value, expr, self._tok_coord(tok)) 

1864 

1865 if tok_type == "_ALIGNOF": 

1866 tok = self._advance() 

1867 self._expect("LPAREN") 

1868 typ = self._parse_type_name() 

1869 self._expect("RPAREN") 

1870 return c_ast.UnaryOp(tok.value, typ, self._tok_coord(tok)) 

1871 

1872 return self._parse_postfix_expression() 

1873 

1874 # BNF: postfix_expression : primary_expression postfix_suffix* 

1875 # | '(' type_name ')' '{' initializer_list ','? '}' 

1876 def _parse_postfix_expression(self) -> c_ast.Node: 

1877 result = self._try_parse_paren_type_name() 

1878 if result is not None: 

1879 typ, mark, _ = result 

1880 # Disambiguate between casts and compound literals: 

1881 # (int) x -> cast 

1882 # (int) {1} -> compound literal 

1883 if self._accept("LBRACE"): 

1884 init = self._parse_initializer_list() 

1885 self._accept("COMMA") 

1886 self._expect("RBRACE") 

1887 return c_ast.CompoundLiteral(typ, init) 

1888 else: 

1889 self._reset(mark) 

1890 

1891 expr = self._parse_primary_expression() 

1892 while True: 

1893 if self._accept("LBRACKET"): 

1894 sub = self._parse_expression() 

1895 self._expect("RBRACKET") 

1896 expr = c_ast.ArrayRef(expr, sub, expr.coord) 

1897 continue 

1898 if self._accept("LPAREN"): 

1899 if self._peek_type() == "RPAREN": 

1900 self._advance() 

1901 args = None 

1902 else: 

1903 args = self._parse_argument_expression_list() 

1904 self._expect("RPAREN") 

1905 expr = c_ast.FuncCall(expr, args, expr.coord) 

1906 continue 

1907 if self._peek_type() in {"PERIOD", "ARROW"}: 

1908 op_tok = self._advance() 

1909 name_tok = self._advance() 

1910 if name_tok.type not in {"ID", "TYPEID"}: 

1911 self._parse_error( 

1912 "Invalid struct reference", self._tok_coord(name_tok) 

1913 ) 

1914 field = c_ast.ID(name_tok.value, self._tok_coord(name_tok)) 

1915 expr = c_ast.StructRef(expr, op_tok.value, field, expr.coord) 

1916 continue 

1917 if self._peek_type() in {"PLUSPLUS", "MINUSMINUS"}: 

1918 tok = self._advance() 

1919 expr = c_ast.UnaryOp("p" + tok.value, expr, expr.coord) 

1920 continue 

1921 break 

1922 return expr 

1923 

1924 # BNF: primary_expression : ID | constant | string_literal 

1925 # | '(' expression ')' | offsetof 

1926 def _parse_primary_expression(self) -> c_ast.Node: 

1927 tok_type = self._peek_type() 

1928 if tok_type == "ID": 

1929 return self._parse_identifier() 

1930 if ( 

1931 tok_type in _INT_CONST 

1932 or tok_type in _FLOAT_CONST 

1933 or tok_type in _CHAR_CONST 

1934 ): 

1935 return self._parse_constant() 

1936 if tok_type in _STRING_LITERAL: 

1937 return self._parse_unified_string_literal() 

1938 if tok_type in _WSTR_LITERAL: 

1939 return self._parse_unified_wstring_literal() 

1940 if tok_type == "LPAREN": 

1941 self._advance() 

1942 expr = self._parse_expression() 

1943 self._expect("RPAREN") 

1944 return expr 

1945 if tok_type == "OFFSETOF": 

1946 off_tok = self._advance() 

1947 self._expect("LPAREN") 

1948 typ = self._parse_type_name() 

1949 self._expect("COMMA") 

1950 designator = self._parse_offsetof_member_designator() 

1951 self._expect("RPAREN") 

1952 coord = self._tok_coord(off_tok) 

1953 return c_ast.FuncCall( 

1954 c_ast.ID(off_tok.value, coord), 

1955 c_ast.ExprList([typ, designator], coord), 

1956 coord, 

1957 ) 

1958 

1959 self._parse_error("Invalid expression", self.clex.filename) 

1960 

1961 # BNF: offsetof_member_designator : identifier_or_typeid 

1962 # ('.' identifier_or_typeid | '[' expression ']')* 

1963 def _parse_offsetof_member_designator(self) -> c_ast.Node: 

1964 node = self._parse_identifier_or_typeid() 

1965 while True: 

1966 if self._accept("PERIOD"): 

1967 field = self._parse_identifier_or_typeid() 

1968 node = c_ast.StructRef(node, ".", field, node.coord) 

1969 continue 

1970 if self._accept("LBRACKET"): 

1971 expr = self._parse_expression() 

1972 self._expect("RBRACKET") 

1973 node = c_ast.ArrayRef(node, expr, node.coord) 

1974 continue 

1975 break 

1976 return node 

1977 

1978 # BNF: argument_expression_list : assignment_expression (',' assignment_expression)* 

1979 def _parse_argument_expression_list(self) -> c_ast.Node: 

1980 expr = self._parse_assignment_expression() 

1981 exprs = [expr] 

1982 while self._accept("COMMA"): 

1983 exprs.append(self._parse_assignment_expression()) 

1984 return c_ast.ExprList(exprs, expr.coord) 

1985 

1986 # BNF: constant_expression : conditional_expression 

1987 def _parse_constant_expression(self) -> c_ast.Node: 

1988 return self._parse_conditional_expression() 

1989 

1990 # ------------------------------------------------------------------ 

1991 # Terminals 

1992 # ------------------------------------------------------------------ 

1993 # BNF: identifier : ID 

1994 def _parse_identifier(self) -> c_ast.Node: 

1995 tok = self._expect("ID") 

1996 return c_ast.ID(tok.value, self._tok_coord(tok)) 

1997 

1998 # BNF: identifier_or_typeid : ID | TYPEID 

1999 def _parse_identifier_or_typeid(self) -> c_ast.Node: 

2000 tok = self._advance() 

2001 if tok.type not in {"ID", "TYPEID"}: 

2002 self._parse_error("Expected identifier", self._tok_coord(tok)) 

2003 return c_ast.ID(tok.value, self._tok_coord(tok)) 

2004 

2005 # BNF: constant : INT_CONST | FLOAT_CONST | CHAR_CONST 

2006 def _parse_constant(self) -> c_ast.Node: 

2007 tok = self._advance() 

2008 if tok.type in _INT_CONST: 

2009 u_count = 0 

2010 l_count = 0 

2011 for ch in tok.value[-3:]: 

2012 if ch in ("l", "L"): 

2013 l_count += 1 

2014 elif ch in ("u", "U"): 

2015 u_count += 1 

2016 if u_count > 1: 

2017 raise ValueError("Constant cannot have more than one u/U suffix.") 

2018 if l_count > 2: 

2019 raise ValueError("Constant cannot have more than two l/L suffix.") 

2020 prefix = "unsigned " * u_count + "long " * l_count 

2021 return c_ast.Constant(prefix + "int", tok.value, self._tok_coord(tok)) 

2022 

2023 if tok.type in _FLOAT_CONST: 

2024 if tok.value[-1] in ("f", "F"): 

2025 t = "float" 

2026 elif tok.value[-1] in ("l", "L"): 

2027 t = "long double" 

2028 else: 

2029 t = "double" 

2030 return c_ast.Constant(t, tok.value, self._tok_coord(tok)) 

2031 

2032 if tok.type in _CHAR_CONST: 

2033 return c_ast.Constant("char", tok.value, self._tok_coord(tok)) 

2034 

2035 self._parse_error("Invalid constant", self._tok_coord(tok)) 

2036 

2037 # BNF: unified_string_literal : STRING_LITERAL+ 

2038 def _parse_unified_string_literal(self) -> c_ast.Node: 

2039 tok = self._expect("STRING_LITERAL") 

2040 node = c_ast.Constant("string", tok.value, self._tok_coord(tok)) 

2041 while self._peek_type() == "STRING_LITERAL": 

2042 tok2 = self._advance() 

2043 node.value = node.value[:-1] + tok2.value[1:] 

2044 return node 

2045 

2046 # BNF: unified_wstring_literal : WSTRING_LITERAL+ 

2047 def _parse_unified_wstring_literal(self) -> c_ast.Node: 

2048 tok = self._advance() 

2049 if tok.type not in _WSTR_LITERAL: 

2050 self._parse_error("Invalid string literal", self._tok_coord(tok)) 

2051 node = c_ast.Constant("string", tok.value, self._tok_coord(tok)) 

2052 while self._peek_type() in _WSTR_LITERAL: 

2053 tok2 = self._advance() 

2054 node.value = node.value.rstrip()[:-1] + tok2.value[2:] 

2055 return node 

2056 

2057 # ------------------------------------------------------------------ 

2058 # Initializers 

2059 # ------------------------------------------------------------------ 

2060 # BNF: initializer : assignment_expression 

2061 # | '{' initializer_list ','? '}' 

2062 # | '{' '}' 

2063 def _parse_initializer(self) -> c_ast.Node: 

2064 lbrace_tok = self._accept("LBRACE") 

2065 if lbrace_tok: 

2066 if self._accept("RBRACE"): 

2067 return c_ast.InitList([], self._tok_coord(lbrace_tok)) 

2068 init_list = self._parse_initializer_list() 

2069 self._accept("COMMA") 

2070 self._expect("RBRACE") 

2071 return init_list 

2072 

2073 return self._parse_assignment_expression() 

2074 

2075 # BNF: initializer_list : initializer_item (',' initializer_item)* ','? 

2076 def _parse_initializer_list(self) -> c_ast.Node: 

2077 items = [self._parse_initializer_item()] 

2078 while self._accept("COMMA"): 

2079 if self._peek_type() == "RBRACE": 

2080 break 

2081 items.append(self._parse_initializer_item()) 

2082 return c_ast.InitList(items, items[0].coord) 

2083 

2084 # BNF: initializer_item : designation? initializer 

2085 def _parse_initializer_item(self) -> c_ast.Node: 

2086 designation = None 

2087 if self._peek_type() in {"LBRACKET", "PERIOD"}: 

2088 designation = self._parse_designation() 

2089 init = self._parse_initializer() 

2090 if designation is not None: 

2091 return c_ast.NamedInitializer(designation, init) 

2092 return init 

2093 

2094 # BNF: designation : designator_list '=' 

2095 def _parse_designation(self) -> List[c_ast.Node]: 

2096 designators = self._parse_designator_list() 

2097 self._expect("EQUALS") 

2098 return designators 

2099 

2100 # BNF: designator_list : designator+ 

2101 def _parse_designator_list(self) -> List[c_ast.Node]: 

2102 designators = [] 

2103 while self._peek_type() in {"LBRACKET", "PERIOD"}: 

2104 designators.append(self._parse_designator()) 

2105 return designators 

2106 

2107 # BNF: designator : '[' constant_expression ']' 

2108 # | '.' identifier_or_typeid 

2109 def _parse_designator(self) -> c_ast.Node: 

2110 if self._accept("LBRACKET"): 

2111 expr = self._parse_constant_expression() 

2112 self._expect("RBRACKET") 

2113 return expr 

2114 if self._accept("PERIOD"): 

2115 return self._parse_identifier_or_typeid() 

2116 self._parse_error("Invalid designator", self.clex.filename) 

2117 

2118 # ------------------------------------------------------------------ 

2119 # Preprocessor-like directives 

2120 # ------------------------------------------------------------------ 

2121 # BNF: pp_directive : '#' ... (unsupported) 

2122 def _parse_pp_directive(self) -> NoReturn: 

2123 tok = self._expect("PPHASH") 

2124 self._parse_error("Directives not supported yet", self._tok_coord(tok)) 

2125 

2126 # BNF: pppragma_directive : PPPRAGMA PPPRAGMASTR? 

2127 # | _PRAGMA '(' string_literal ')' 

2128 def _parse_pppragma_directive(self) -> c_ast.Node: 

2129 if self._peek_type() == "PPPRAGMA": 

2130 tok = self._advance() 

2131 if self._peek_type() == "PPPRAGMASTR": 

2132 str_tok = self._advance() 

2133 return c_ast.Pragma(str_tok.value, self._tok_coord(str_tok)) 

2134 return c_ast.Pragma("", self._tok_coord(tok)) 

2135 

2136 if self._peek_type() == "_PRAGMA": 

2137 tok = self._advance() 

2138 lparen = self._expect("LPAREN") 

2139 literal = self._parse_unified_string_literal() 

2140 self._expect("RPAREN") 

2141 return c_ast.Pragma(literal, self._tok_coord(lparen)) 

2142 

2143 self._parse_error("Invalid pragma", self.clex.filename) 

2144 

2145 # BNF: pppragma_directive_list : pppragma_directive+ 

2146 def _parse_pppragma_directive_list(self) -> List[c_ast.Node]: 

2147 pragmas = [] 

2148 while self._peek_type() in {"PPPRAGMA", "_PRAGMA"}: 

2149 pragmas.append(self._parse_pppragma_directive()) 

2150 return pragmas 

2151 

2152 # BNF: static_assert : _STATIC_ASSERT '(' constant_expression (',' string_literal)? ')' 

2153 def _parse_static_assert(self) -> List[c_ast.Node]: 

2154 tok = self._expect("_STATIC_ASSERT") 

2155 self._expect("LPAREN") 

2156 cond = self._parse_constant_expression() 

2157 msg = None 

2158 if self._accept("COMMA"): 

2159 msg = self._parse_unified_string_literal() 

2160 self._expect("RPAREN") 

2161 return [c_ast.StaticAssert(cond, msg, self._tok_coord(tok))] 

2162 

2163 

2164_ASSIGNMENT_OPS = { 

2165 "EQUALS", 

2166 "XOREQUAL", 

2167 "TIMESEQUAL", 

2168 "DIVEQUAL", 

2169 "MODEQUAL", 

2170 "PLUSEQUAL", 

2171 "MINUSEQUAL", 

2172 "LSHIFTEQUAL", 

2173 "RSHIFTEQUAL", 

2174 "ANDEQUAL", 

2175 "OREQUAL", 

2176} 

2177 

2178# Precedence of operators (lower number = weather binding) 

2179# If this changes, c_generator.CGenerator.precedence_map needs to change as 

2180# well 

2181_BINARY_PRECEDENCE = { 

2182 "LOR": 0, 

2183 "LAND": 1, 

2184 "OR": 2, 

2185 "XOR": 3, 

2186 "AND": 4, 

2187 "EQ": 5, 

2188 "NE": 5, 

2189 "GT": 6, 

2190 "GE": 6, 

2191 "LT": 6, 

2192 "LE": 6, 

2193 "RSHIFT": 7, 

2194 "LSHIFT": 7, 

2195 "PLUS": 8, 

2196 "MINUS": 8, 

2197 "TIMES": 9, 

2198 "DIVIDE": 9, 

2199 "MOD": 9, 

2200} 

2201 

2202_STORAGE_CLASS = {"AUTO", "REGISTER", "STATIC", "EXTERN", "TYPEDEF", "_THREAD_LOCAL"} 

2203 

2204_FUNCTION_SPEC = {"INLINE", "_NORETURN"} 

2205 

2206_TYPE_QUALIFIER = {"CONST", "RESTRICT", "VOLATILE", "_ATOMIC"} 

2207 

2208_TYPE_SPEC_SIMPLE = { 

2209 "VOID", 

2210 "_BOOL", 

2211 "CHAR", 

2212 "SHORT", 

2213 "INT", 

2214 "LONG", 

2215 "FLOAT", 

2216 "DOUBLE", 

2217 "_COMPLEX", 

2218 "SIGNED", 

2219 "UNSIGNED", 

2220 "__INT128", 

2221} 

2222 

2223_DECL_START = ( 

2224 _STORAGE_CLASS 

2225 | _FUNCTION_SPEC 

2226 | _TYPE_QUALIFIER 

2227 | _TYPE_SPEC_SIMPLE 

2228 | {"TYPEID", "STRUCT", "UNION", "ENUM", "_ALIGNAS", "_ATOMIC"} 

2229) 

2230 

2231_EXPR_START = { 

2232 "ID", 

2233 "LPAREN", 

2234 "PLUSPLUS", 

2235 "MINUSMINUS", 

2236 "PLUS", 

2237 "MINUS", 

2238 "TIMES", 

2239 "AND", 

2240 "NOT", 

2241 "LNOT", 

2242 "SIZEOF", 

2243 "_ALIGNOF", 

2244 "OFFSETOF", 

2245} 

2246 

2247_INT_CONST = { 

2248 "INT_CONST_DEC", 

2249 "INT_CONST_OCT", 

2250 "INT_CONST_HEX", 

2251 "INT_CONST_BIN", 

2252 "INT_CONST_CHAR", 

2253} 

2254 

2255_FLOAT_CONST = {"FLOAT_CONST", "HEX_FLOAT_CONST"} 

2256 

2257_CHAR_CONST = { 

2258 "CHAR_CONST", 

2259 "WCHAR_CONST", 

2260 "U8CHAR_CONST", 

2261 "U16CHAR_CONST", 

2262 "U32CHAR_CONST", 

2263} 

2264 

2265_STRING_LITERAL = {"STRING_LITERAL"} 

2266 

2267_WSTR_LITERAL = { 

2268 "WSTRING_LITERAL", 

2269 "U8STRING_LITERAL", 

2270 "U16STRING_LITERAL", 

2271 "U32STRING_LITERAL", 

2272} 

2273 

2274_STARTS_EXPRESSION = ( 

2275 _EXPR_START 

2276 | _INT_CONST 

2277 | _FLOAT_CONST 

2278 | _CHAR_CONST 

2279 | _STRING_LITERAL 

2280 | _WSTR_LITERAL 

2281) 

2282 

2283_STARTS_STATEMENT = { 

2284 "LBRACE", 

2285 "IF", 

2286 "SWITCH", 

2287 "WHILE", 

2288 "DO", 

2289 "FOR", 

2290 "GOTO", 

2291 "BREAK", 

2292 "CONTINUE", 

2293 "RETURN", 

2294 "CASE", 

2295 "DEFAULT", 

2296 "PPPRAGMA", 

2297 "_PRAGMA", 

2298 "_STATIC_ASSERT", 

2299 "SEMI", 

2300} 

2301 

2302 

2303class _TokenStream: 

2304 """Wraps a lexer to provide convenient, buffered access to the underlying 

2305 token stream. The lexer is expected to be initialized with the input 

2306 string already. 

2307 """ 

2308 

2309 def __init__(self, lexer: CLexer) -> None: 

2310 self._lexer = lexer 

2311 self._buffer: List[Optional[_Token]] = [] 

2312 self._index = 0 

2313 

2314 def peek(self, k: int = 1) -> Optional[_Token]: 

2315 """Peek at the k-th next token in the stream, without consuming it. 

2316 

2317 Examples: 

2318 k=1 returns the immediate next token. 

2319 k=2 returns the token after that. 

2320 """ 

2321 if k <= 0: 

2322 return None 

2323 self._fill(k) 

2324 return self._buffer[self._index + k - 1] 

2325 

2326 def next(self) -> Optional[_Token]: 

2327 """Consume a single token and return it.""" 

2328 self._fill(1) 

2329 tok = self._buffer[self._index] 

2330 self._index += 1 

2331 return tok 

2332 

2333 # The 'mark' and 'reset' methods are useful for speculative parsing with 

2334 # backtracking; when the parser needs to examine a sequence of tokens 

2335 # and potentially decide to try a different path on the same sequence, it 

2336 # can call 'mark' to obtain the current token position, and if the first 

2337 # path fails restore the position with `reset(pos)`. 

2338 def mark(self) -> int: 

2339 return self._index 

2340 

2341 def reset(self, mark: int) -> None: 

2342 self._index = mark 

2343 

2344 def _fill(self, n: int) -> None: 

2345 while len(self._buffer) < self._index + n: 

2346 tok = self._lexer.token() 

2347 self._buffer.append(tok) 

2348 if tok is None: 

2349 break 

2350 

2351 

2352# Declaration specifiers are represented by a dictionary with entries: 

2353# - qual: a list of type qualifiers 

2354# - storage: a list of storage class specifiers 

2355# - type: a list of type specifiers 

2356# - function: a list of function specifiers 

2357# - alignment: a list of alignment specifiers 

2358class _DeclSpec(TypedDict): 

2359 qual: List[Any] 

2360 storage: List[Any] 

2361 type: List[Any] 

2362 function: List[Any] 

2363 alignment: List[Any] 

2364 

2365 

2366_DeclSpecKind = Literal["qual", "storage", "type", "function", "alignment"] 

2367 

2368 

2369class _DeclInfo(TypedDict): 

2370 # Declarator payloads used by declaration/initializer parsing: 

2371 # - decl: the declarator node (may be None for abstract/implicit cases) 

2372 # - init: optional initializer expression 

2373 # - bitsize: optional bit-field width expression (for struct declarators) 

2374 decl: Optional[c_ast.Node] 

2375 init: Optional[c_ast.Node] 

2376 bitsize: Optional[c_ast.Node]