Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pycparser/c_parser.py: 62%
605 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:10 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:10 +0000
1#------------------------------------------------------------------------------
2# pycparser: c_parser.py
3#
4# CParser class: Parser and AST builder for the C language
5#
6# Eli Bendersky [https://eli.thegreenplace.net/]
7# License: BSD
8#------------------------------------------------------------------------------
9from .ply import yacc
11from . import c_ast
12from .c_lexer import CLexer
13from .plyparser import PLYParser, ParseError, parameterized, template
14from .ast_transforms import fix_switch_cases, fix_atomic_specifiers
17@template
18class CParser(PLYParser):
19 def __init__(
20 self,
21 lex_optimize=True,
22 lexer=CLexer,
23 lextab='pycparser.lextab',
24 yacc_optimize=True,
25 yacctab='pycparser.yacctab',
26 yacc_debug=False,
27 taboutputdir=''):
28 """ Create a new CParser.
30 Some arguments for controlling the debug/optimization
31 level of the parser are provided. The defaults are
32 tuned for release/performance mode.
33 The simple rules for using them are:
34 *) When tweaking CParser/CLexer, set these to False
35 *) When releasing a stable parser, set to True
37 lex_optimize:
38 Set to False when you're modifying the lexer.
39 Otherwise, changes in the lexer won't be used, if
40 some lextab.py file exists.
41 When releasing with a stable lexer, set to True
42 to save the re-generation of the lexer table on
43 each run.
45 lexer:
46 Set this parameter to define the lexer to use if
47 you're not using the default CLexer.
49 lextab:
50 Points to the lex table that's used for optimized
51 mode. Only if you're modifying the lexer and want
52 some tests to avoid re-generating the table, make
53 this point to a local lex table file (that's been
54 earlier generated with lex_optimize=True)
56 yacc_optimize:
57 Set to False when you're modifying the parser.
58 Otherwise, changes in the parser won't be used, if
59 some parsetab.py file exists.
60 When releasing with a stable parser, set to True
61 to save the re-generation of the parser table on
62 each run.
64 yacctab:
65 Points to the yacc table that's used for optimized
66 mode. Only if you're modifying the parser, make
67 this point to a local yacc table file
69 yacc_debug:
70 Generate a parser.out file that explains how yacc
71 built the parsing table from the grammar.
73 taboutputdir:
74 Set this parameter to control the location of generated
75 lextab and yacctab files.
76 """
77 self.clex = lexer(
78 error_func=self._lex_error_func,
79 on_lbrace_func=self._lex_on_lbrace_func,
80 on_rbrace_func=self._lex_on_rbrace_func,
81 type_lookup_func=self._lex_type_lookup_func)
83 self.clex.build(
84 optimize=lex_optimize,
85 lextab=lextab,
86 outputdir=taboutputdir)
87 self.tokens = self.clex.tokens
89 rules_with_opt = [
90 'abstract_declarator',
91 'assignment_expression',
92 'declaration_list',
93 'declaration_specifiers_no_type',
94 'designation',
95 'expression',
96 'identifier_list',
97 'init_declarator_list',
98 'id_init_declarator_list',
99 'initializer_list',
100 'parameter_type_list',
101 'block_item_list',
102 'type_qualifier_list',
103 'struct_declarator_list'
104 ]
106 for rule in rules_with_opt:
107 self._create_opt_rule(rule)
109 self.cparser = yacc.yacc(
110 module=self,
111 start='translation_unit_or_empty',
112 debug=yacc_debug,
113 optimize=yacc_optimize,
114 tabmodule=yacctab,
115 outputdir=taboutputdir)
117 # Stack of scopes for keeping track of symbols. _scope_stack[-1] is
118 # the current (topmost) scope. Each scope is a dictionary that
119 # specifies whether a name is a type. If _scope_stack[n][name] is
120 # True, 'name' is currently a type in the scope. If it's False,
121 # 'name' is used in the scope but not as a type (for instance, if we
122 # saw: int name;
123 # If 'name' is not a key in _scope_stack[n] then 'name' was not defined
124 # in this scope at all.
125 self._scope_stack = [dict()]
127 # Keeps track of the last token given to yacc (the lookahead token)
128 self._last_yielded_token = None
130 def parse(self, text, filename='', debug=False):
131 """ Parses C code and returns an AST.
133 text:
134 A string containing the C source code
136 filename:
137 Name of the file being parsed (for meaningful
138 error messages)
140 debug:
141 Debug flag to YACC
142 """
143 self.clex.filename = filename
144 self.clex.reset_lineno()
145 self._scope_stack = [dict()]
146 self._last_yielded_token = None
147 return self.cparser.parse(
148 input=text,
149 lexer=self.clex,
150 debug=debug)
152 ######################-- PRIVATE --######################
154 def _push_scope(self):
155 self._scope_stack.append(dict())
157 def _pop_scope(self):
158 assert len(self._scope_stack) > 1
159 self._scope_stack.pop()
161 def _add_typedef_name(self, name, coord):
162 """ Add a new typedef name (ie a TYPEID) to the current scope
163 """
164 if not self._scope_stack[-1].get(name, True):
165 self._parse_error(
166 "Typedef %r previously declared as non-typedef "
167 "in this scope" % name, coord)
168 self._scope_stack[-1][name] = True
170 def _add_identifier(self, name, coord):
171 """ Add a new object, function, or enum member name (ie an ID) to the
172 current scope
173 """
174 if self._scope_stack[-1].get(name, False):
175 self._parse_error(
176 "Non-typedef %r previously declared as typedef "
177 "in this scope" % name, coord)
178 self._scope_stack[-1][name] = False
180 def _is_type_in_scope(self, name):
181 """ Is *name* a typedef-name in the current scope?
182 """
183 for scope in reversed(self._scope_stack):
184 # If name is an identifier in this scope it shadows typedefs in
185 # higher scopes.
186 in_scope = scope.get(name)
187 if in_scope is not None: return in_scope
188 return False
190 def _lex_error_func(self, msg, line, column):
191 self._parse_error(msg, self._coord(line, column))
193 def _lex_on_lbrace_func(self):
194 self._push_scope()
196 def _lex_on_rbrace_func(self):
197 self._pop_scope()
199 def _lex_type_lookup_func(self, name):
200 """ Looks up types that were previously defined with
201 typedef.
202 Passed to the lexer for recognizing identifiers that
203 are types.
204 """
205 is_type = self._is_type_in_scope(name)
206 return is_type
208 def _get_yacc_lookahead_token(self):
209 """ We need access to yacc's lookahead token in certain cases.
210 This is the last token yacc requested from the lexer, so we
211 ask the lexer.
212 """
213 return self.clex.last_token
215 # To understand what's going on here, read sections A.8.5 and
216 # A.8.6 of K&R2 very carefully.
217 #
218 # A C type consists of a basic type declaration, with a list
219 # of modifiers. For example:
220 #
221 # int *c[5];
222 #
223 # The basic declaration here is 'int c', and the pointer and
224 # the array are the modifiers.
225 #
226 # Basic declarations are represented by TypeDecl (from module c_ast) and the
227 # modifiers are FuncDecl, PtrDecl and ArrayDecl.
228 #
229 # The standard states that whenever a new modifier is parsed, it should be
230 # added to the end of the list of modifiers. For example:
231 #
232 # K&R2 A.8.6.2: Array Declarators
233 #
234 # In a declaration T D where D has the form
235 # D1 [constant-expression-opt]
236 # and the type of the identifier in the declaration T D1 is
237 # "type-modifier T", the type of the
238 # identifier of D is "type-modifier array of T"
239 #
240 # This is what this method does. The declarator it receives
241 # can be a list of declarators ending with TypeDecl. It
242 # tacks the modifier to the end of this list, just before
243 # the TypeDecl.
244 #
245 # Additionally, the modifier may be a list itself. This is
246 # useful for pointers, that can come as a chain from the rule
247 # p_pointer. In this case, the whole modifier list is spliced
248 # into the new location.
249 def _type_modify_decl(self, decl, modifier):
250 """ Tacks a type modifier on a declarator, and returns
251 the modified declarator.
253 Note: the declarator and modifier may be modified
254 """
255 #~ print '****'
256 #~ decl.show(offset=3)
257 #~ modifier.show(offset=3)
258 #~ print '****'
260 modifier_head = modifier
261 modifier_tail = modifier
263 # The modifier may be a nested list. Reach its tail.
264 while modifier_tail.type:
265 modifier_tail = modifier_tail.type
267 # If the decl is a basic type, just tack the modifier onto it.
268 if isinstance(decl, c_ast.TypeDecl):
269 modifier_tail.type = decl
270 return modifier
271 else:
272 # Otherwise, the decl is a list of modifiers. Reach
273 # its tail and splice the modifier onto the tail,
274 # pointing to the underlying basic type.
275 decl_tail = decl
277 while not isinstance(decl_tail.type, c_ast.TypeDecl):
278 decl_tail = decl_tail.type
280 modifier_tail.type = decl_tail.type
281 decl_tail.type = modifier_head
282 return decl
284 # Due to the order in which declarators are constructed,
285 # they have to be fixed in order to look like a normal AST.
286 #
287 # When a declaration arrives from syntax construction, it has
288 # these problems:
289 # * The innermost TypeDecl has no type (because the basic
290 # type is only known at the uppermost declaration level)
291 # * The declaration has no variable name, since that is saved
292 # in the innermost TypeDecl
293 # * The typename of the declaration is a list of type
294 # specifiers, and not a node. Here, basic identifier types
295 # should be separated from more complex types like enums
296 # and structs.
297 #
298 # This method fixes these problems.
299 def _fix_decl_name_type(self, decl, typename):
300 """ Fixes a declaration. Modifies decl.
301 """
302 # Reach the underlying basic type
303 #
304 type = decl
305 while not isinstance(type, c_ast.TypeDecl):
306 type = type.type
308 decl.name = type.declname
309 type.quals = decl.quals[:]
311 # The typename is a list of types. If any type in this
312 # list isn't an IdentifierType, it must be the only
313 # type in the list (it's illegal to declare "int enum ..")
314 # If all the types are basic, they're collected in the
315 # IdentifierType holder.
316 for tn in typename:
317 if not isinstance(tn, c_ast.IdentifierType):
318 if len(typename) > 1:
319 self._parse_error(
320 "Invalid multiple types specified", tn.coord)
321 else:
322 type.type = tn
323 return decl
325 if not typename:
326 # Functions default to returning int
327 #
328 if not isinstance(decl.type, c_ast.FuncDecl):
329 self._parse_error(
330 "Missing type in declaration", decl.coord)
331 type.type = c_ast.IdentifierType(
332 ['int'],
333 coord=decl.coord)
334 else:
335 # At this point, we know that typename is a list of IdentifierType
336 # nodes. Concatenate all the names into a single list.
337 #
338 type.type = c_ast.IdentifierType(
339 [name for id in typename for name in id.names],
340 coord=typename[0].coord)
341 return decl
343 def _add_declaration_specifier(self, declspec, newspec, kind, append=False):
344 """ Declaration specifiers are represented by a dictionary
345 with the entries:
346 * qual: a list of type qualifiers
347 * storage: a list of storage type qualifiers
348 * type: a list of type specifiers
349 * function: a list of function specifiers
350 * alignment: a list of alignment specifiers
352 This method is given a declaration specifier, and a
353 new specifier of a given kind.
354 If `append` is True, the new specifier is added to the end of
355 the specifiers list, otherwise it's added at the beginning.
356 Returns the declaration specifier, with the new
357 specifier incorporated.
358 """
359 spec = declspec or dict(qual=[], storage=[], type=[], function=[], alignment=[])
361 if append:
362 spec[kind].append(newspec)
363 else:
364 spec[kind].insert(0, newspec)
366 return spec
368 def _build_declarations(self, spec, decls, typedef_namespace=False):
369 """ Builds a list of declarations all sharing the given specifiers.
370 If typedef_namespace is true, each declared name is added
371 to the "typedef namespace", which also includes objects,
372 functions, and enum constants.
373 """
374 is_typedef = 'typedef' in spec['storage']
375 declarations = []
377 # Bit-fields are allowed to be unnamed.
378 if decls[0].get('bitsize') is not None:
379 pass
381 # When redeclaring typedef names as identifiers in inner scopes, a
382 # problem can occur where the identifier gets grouped into
383 # spec['type'], leaving decl as None. This can only occur for the
384 # first declarator.
385 elif decls[0]['decl'] is None:
386 if len(spec['type']) < 2 or len(spec['type'][-1].names) != 1 or \
387 not self._is_type_in_scope(spec['type'][-1].names[0]):
388 coord = '?'
389 for t in spec['type']:
390 if hasattr(t, 'coord'):
391 coord = t.coord
392 break
393 self._parse_error('Invalid declaration', coord)
395 # Make this look as if it came from "direct_declarator:ID"
396 decls[0]['decl'] = c_ast.TypeDecl(
397 declname=spec['type'][-1].names[0],
398 type=None,
399 quals=None,
400 align=spec['alignment'],
401 coord=spec['type'][-1].coord)
402 # Remove the "new" type's name from the end of spec['type']
403 del spec['type'][-1]
405 # A similar problem can occur where the declaration ends up looking
406 # like an abstract declarator. Give it a name if this is the case.
407 elif not isinstance(decls[0]['decl'], (
408 c_ast.Enum, c_ast.Struct, c_ast.Union, c_ast.IdentifierType)):
409 decls_0_tail = decls[0]['decl']
410 while not isinstance(decls_0_tail, c_ast.TypeDecl):
411 decls_0_tail = decls_0_tail.type
412 if decls_0_tail.declname is None:
413 decls_0_tail.declname = spec['type'][-1].names[0]
414 del spec['type'][-1]
416 for decl in decls:
417 assert decl['decl'] is not None
418 if is_typedef:
419 declaration = c_ast.Typedef(
420 name=None,
421 quals=spec['qual'],
422 storage=spec['storage'],
423 type=decl['decl'],
424 coord=decl['decl'].coord)
425 else:
426 declaration = c_ast.Decl(
427 name=None,
428 quals=spec['qual'],
429 align=spec['alignment'],
430 storage=spec['storage'],
431 funcspec=spec['function'],
432 type=decl['decl'],
433 init=decl.get('init'),
434 bitsize=decl.get('bitsize'),
435 coord=decl['decl'].coord)
437 if isinstance(declaration.type, (
438 c_ast.Enum, c_ast.Struct, c_ast.Union,
439 c_ast.IdentifierType)):
440 fixed_decl = declaration
441 else:
442 fixed_decl = self._fix_decl_name_type(declaration, spec['type'])
444 # Add the type name defined by typedef to a
445 # symbol table (for usage in the lexer)
446 if typedef_namespace:
447 if is_typedef:
448 self._add_typedef_name(fixed_decl.name, fixed_decl.coord)
449 else:
450 self._add_identifier(fixed_decl.name, fixed_decl.coord)
452 fixed_decl = fix_atomic_specifiers(fixed_decl)
453 declarations.append(fixed_decl)
455 return declarations
457 def _build_function_definition(self, spec, decl, param_decls, body):
458 """ Builds a function definition.
459 """
460 if 'typedef' in spec['storage']:
461 self._parse_error("Invalid typedef", decl.coord)
463 declaration = self._build_declarations(
464 spec=spec,
465 decls=[dict(decl=decl, init=None)],
466 typedef_namespace=True)[0]
468 return c_ast.FuncDef(
469 decl=declaration,
470 param_decls=param_decls,
471 body=body,
472 coord=decl.coord)
474 def _select_struct_union_class(self, token):
475 """ Given a token (either STRUCT or UNION), selects the
476 appropriate AST class.
477 """
478 if token == 'struct':
479 return c_ast.Struct
480 else:
481 return c_ast.Union
483 ##
484 ## Precedence and associativity of operators
485 ##
486 # If this changes, c_generator.CGenerator.precedence_map needs to change as
487 # well
488 precedence = (
489 ('left', 'LOR'),
490 ('left', 'LAND'),
491 ('left', 'OR'),
492 ('left', 'XOR'),
493 ('left', 'AND'),
494 ('left', 'EQ', 'NE'),
495 ('left', 'GT', 'GE', 'LT', 'LE'),
496 ('left', 'RSHIFT', 'LSHIFT'),
497 ('left', 'PLUS', 'MINUS'),
498 ('left', 'TIMES', 'DIVIDE', 'MOD')
499 )
501 ##
502 ## Grammar productions
503 ## Implementation of the BNF defined in K&R2 A.13
504 ##
506 # Wrapper around a translation unit, to allow for empty input.
507 # Not strictly part of the C99 Grammar, but useful in practice.
508 def p_translation_unit_or_empty(self, p):
509 """ translation_unit_or_empty : translation_unit
510 | empty
511 """
512 if p[1] is None:
513 p[0] = c_ast.FileAST([])
514 else:
515 p[0] = c_ast.FileAST(p[1])
517 def p_translation_unit_1(self, p):
518 """ translation_unit : external_declaration
519 """
520 # Note: external_declaration is already a list
521 p[0] = p[1]
523 def p_translation_unit_2(self, p):
524 """ translation_unit : translation_unit external_declaration
525 """
526 p[1].extend(p[2])
527 p[0] = p[1]
529 # Declarations always come as lists (because they can be
530 # several in one line), so we wrap the function definition
531 # into a list as well, to make the return value of
532 # external_declaration homogeneous.
533 def p_external_declaration_1(self, p):
534 """ external_declaration : function_definition
535 """
536 p[0] = [p[1]]
538 def p_external_declaration_2(self, p):
539 """ external_declaration : declaration
540 """
541 p[0] = p[1]
543 def p_external_declaration_3(self, p):
544 """ external_declaration : pp_directive
545 | pppragma_directive
546 """
547 p[0] = [p[1]]
549 def p_external_declaration_4(self, p):
550 """ external_declaration : SEMI
551 """
552 p[0] = []
554 def p_external_declaration_5(self, p):
555 """ external_declaration : static_assert
556 """
557 p[0] = p[1]
559 def p_static_assert_declaration(self, p):
560 """ static_assert : _STATIC_ASSERT LPAREN constant_expression COMMA unified_string_literal RPAREN
561 | _STATIC_ASSERT LPAREN constant_expression RPAREN
562 """
563 if len(p) == 5:
564 p[0] = [c_ast.StaticAssert(p[3], None, self._token_coord(p, 1))]
565 else:
566 p[0] = [c_ast.StaticAssert(p[3], p[5], self._token_coord(p, 1))]
568 def p_pp_directive(self, p):
569 """ pp_directive : PPHASH
570 """
571 self._parse_error('Directives not supported yet',
572 self._token_coord(p, 1))
574 def p_pppragma_directive(self, p):
575 """ pppragma_directive : PPPRAGMA
576 | PPPRAGMA PPPRAGMASTR
577 """
578 if len(p) == 3:
579 p[0] = c_ast.Pragma(p[2], self._token_coord(p, 2))
580 else:
581 p[0] = c_ast.Pragma("", self._token_coord(p, 1))
583 # In function definitions, the declarator can be followed by
584 # a declaration list, for old "K&R style" function definitios.
585 def p_function_definition_1(self, p):
586 """ function_definition : id_declarator declaration_list_opt compound_statement
587 """
588 # no declaration specifiers - 'int' becomes the default type
589 spec = dict(
590 qual=[],
591 alignment=[],
592 storage=[],
593 type=[c_ast.IdentifierType(['int'],
594 coord=self._token_coord(p, 1))],
595 function=[])
597 p[0] = self._build_function_definition(
598 spec=spec,
599 decl=p[1],
600 param_decls=p[2],
601 body=p[3])
603 def p_function_definition_2(self, p):
604 """ function_definition : declaration_specifiers id_declarator declaration_list_opt compound_statement
605 """
606 spec = p[1]
608 p[0] = self._build_function_definition(
609 spec=spec,
610 decl=p[2],
611 param_decls=p[3],
612 body=p[4])
614 # Note, according to C18 A.2.2 6.7.10 static_assert-declaration _Static_assert
615 # is a declaration, not a statement. We additionally recognise it as a statement
616 # to fix parsing of _Static_assert inside the functions.
617 #
618 def p_statement(self, p):
619 """ statement : labeled_statement
620 | expression_statement
621 | compound_statement
622 | selection_statement
623 | iteration_statement
624 | jump_statement
625 | pppragma_directive
626 | static_assert
627 """
628 p[0] = p[1]
630 # A pragma is generally considered a decorator rather than an actual
631 # statement. Still, for the purposes of analyzing an abstract syntax tree of
632 # C code, pragma's should not be ignored and were previously treated as a
633 # statement. This presents a problem for constructs that take a statement
634 # such as labeled_statements, selection_statements, and
635 # iteration_statements, causing a misleading structure in the AST. For
636 # example, consider the following C code.
637 #
638 # for (int i = 0; i < 3; i++)
639 # #pragma omp critical
640 # sum += 1;
641 #
642 # This code will compile and execute "sum += 1;" as the body of the for
643 # loop. Previous implementations of PyCParser would render the AST for this
644 # block of code as follows:
645 #
646 # For:
647 # DeclList:
648 # Decl: i, [], [], []
649 # TypeDecl: i, []
650 # IdentifierType: ['int']
651 # Constant: int, 0
652 # BinaryOp: <
653 # ID: i
654 # Constant: int, 3
655 # UnaryOp: p++
656 # ID: i
657 # Pragma: omp critical
658 # Assignment: +=
659 # ID: sum
660 # Constant: int, 1
661 #
662 # This AST misleadingly takes the Pragma as the body of the loop and the
663 # assignment then becomes a sibling of the loop.
664 #
665 # To solve edge cases like these, the pragmacomp_or_statement rule groups
666 # a pragma and its following statement (which would otherwise be orphaned)
667 # using a compound block, effectively turning the above code into:
668 #
669 # for (int i = 0; i < 3; i++) {
670 # #pragma omp critical
671 # sum += 1;
672 # }
673 def p_pragmacomp_or_statement(self, p):
674 """ pragmacomp_or_statement : pppragma_directive statement
675 | statement
676 """
677 if isinstance(p[1], c_ast.Pragma) and len(p) == 3:
678 p[0] = c_ast.Compound(
679 block_items=[p[1], p[2]],
680 coord=self._token_coord(p, 1))
681 else:
682 p[0] = p[1]
684 # In C, declarations can come several in a line:
685 # int x, *px, romulo = 5;
686 #
687 # However, for the AST, we will split them to separate Decl
688 # nodes.
689 #
690 # This rule splits its declarations and always returns a list
691 # of Decl nodes, even if it's one element long.
692 #
693 def p_decl_body(self, p):
694 """ decl_body : declaration_specifiers init_declarator_list_opt
695 | declaration_specifiers_no_type id_init_declarator_list_opt
696 """
697 spec = p[1]
699 # p[2] (init_declarator_list_opt) is either a list or None
700 #
701 if p[2] is None:
702 # By the standard, you must have at least one declarator unless
703 # declaring a structure tag, a union tag, or the members of an
704 # enumeration.
705 #
706 ty = spec['type']
707 s_u_or_e = (c_ast.Struct, c_ast.Union, c_ast.Enum)
708 if len(ty) == 1 and isinstance(ty[0], s_u_or_e):
709 decls = [c_ast.Decl(
710 name=None,
711 quals=spec['qual'],
712 align=spec['alignment'],
713 storage=spec['storage'],
714 funcspec=spec['function'],
715 type=ty[0],
716 init=None,
717 bitsize=None,
718 coord=ty[0].coord)]
720 # However, this case can also occur on redeclared identifiers in
721 # an inner scope. The trouble is that the redeclared type's name
722 # gets grouped into declaration_specifiers; _build_declarations
723 # compensates for this.
724 #
725 else:
726 decls = self._build_declarations(
727 spec=spec,
728 decls=[dict(decl=None, init=None)],
729 typedef_namespace=True)
731 else:
732 decls = self._build_declarations(
733 spec=spec,
734 decls=p[2],
735 typedef_namespace=True)
737 p[0] = decls
739 # The declaration has been split to a decl_body sub-rule and
740 # SEMI, because having them in a single rule created a problem
741 # for defining typedefs.
742 #
743 # If a typedef line was directly followed by a line using the
744 # type defined with the typedef, the type would not be
745 # recognized. This is because to reduce the declaration rule,
746 # the parser's lookahead asked for the token after SEMI, which
747 # was the type from the next line, and the lexer had no chance
748 # to see the updated type symbol table.
749 #
750 # Splitting solves this problem, because after seeing SEMI,
751 # the parser reduces decl_body, which actually adds the new
752 # type into the table to be seen by the lexer before the next
753 # line is reached.
754 def p_declaration(self, p):
755 """ declaration : decl_body SEMI
756 """
757 p[0] = p[1]
759 # Since each declaration is a list of declarations, this
760 # rule will combine all the declarations and return a single
761 # list
762 #
763 def p_declaration_list(self, p):
764 """ declaration_list : declaration
765 | declaration_list declaration
766 """
767 p[0] = p[1] if len(p) == 2 else p[1] + p[2]
769 # To know when declaration-specifiers end and declarators begin,
770 # we require declaration-specifiers to have at least one
771 # type-specifier, and disallow typedef-names after we've seen any
772 # type-specifier. These are both required by the spec.
773 #
774 def p_declaration_specifiers_no_type_1(self, p):
775 """ declaration_specifiers_no_type : type_qualifier declaration_specifiers_no_type_opt
776 """
777 p[0] = self._add_declaration_specifier(p[2], p[1], 'qual')
779 def p_declaration_specifiers_no_type_2(self, p):
780 """ declaration_specifiers_no_type : storage_class_specifier declaration_specifiers_no_type_opt
781 """
782 p[0] = self._add_declaration_specifier(p[2], p[1], 'storage')
784 def p_declaration_specifiers_no_type_3(self, p):
785 """ declaration_specifiers_no_type : function_specifier declaration_specifiers_no_type_opt
786 """
787 p[0] = self._add_declaration_specifier(p[2], p[1], 'function')
789 # Without this, `typedef _Atomic(T) U` will parse incorrectly because the
790 # _Atomic qualifier will match, instead of the specifier.
791 def p_declaration_specifiers_no_type_4(self, p):
792 """ declaration_specifiers_no_type : atomic_specifier declaration_specifiers_no_type_opt
793 """
794 p[0] = self._add_declaration_specifier(p[2], p[1], 'type')
796 def p_declaration_specifiers_no_type_5(self, p):
797 """ declaration_specifiers_no_type : alignment_specifier declaration_specifiers_no_type_opt
798 """
799 p[0] = self._add_declaration_specifier(p[2], p[1], 'alignment')
801 def p_declaration_specifiers_1(self, p):
802 """ declaration_specifiers : declaration_specifiers type_qualifier
803 """
804 p[0] = self._add_declaration_specifier(p[1], p[2], 'qual', append=True)
806 def p_declaration_specifiers_2(self, p):
807 """ declaration_specifiers : declaration_specifiers storage_class_specifier
808 """
809 p[0] = self._add_declaration_specifier(p[1], p[2], 'storage', append=True)
811 def p_declaration_specifiers_3(self, p):
812 """ declaration_specifiers : declaration_specifiers function_specifier
813 """
814 p[0] = self._add_declaration_specifier(p[1], p[2], 'function', append=True)
816 def p_declaration_specifiers_4(self, p):
817 """ declaration_specifiers : declaration_specifiers type_specifier_no_typeid
818 """
819 p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True)
821 def p_declaration_specifiers_5(self, p):
822 """ declaration_specifiers : type_specifier
823 """
824 p[0] = self._add_declaration_specifier(None, p[1], 'type')
826 def p_declaration_specifiers_6(self, p):
827 """ declaration_specifiers : declaration_specifiers_no_type type_specifier
828 """
829 p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True)
831 def p_declaration_specifiers_7(self, p):
832 """ declaration_specifiers : declaration_specifiers alignment_specifier
833 """
834 p[0] = self._add_declaration_specifier(p[1], p[2], 'alignment', append=True)
836 def p_storage_class_specifier(self, p):
837 """ storage_class_specifier : AUTO
838 | REGISTER
839 | STATIC
840 | EXTERN
841 | TYPEDEF
842 | _THREAD_LOCAL
843 """
844 p[0] = p[1]
846 def p_function_specifier(self, p):
847 """ function_specifier : INLINE
848 | _NORETURN
849 """
850 p[0] = p[1]
852 def p_type_specifier_no_typeid(self, p):
853 """ type_specifier_no_typeid : VOID
854 | _BOOL
855 | CHAR
856 | SHORT
857 | INT
858 | LONG
859 | FLOAT
860 | DOUBLE
861 | _COMPLEX
862 | SIGNED
863 | UNSIGNED
864 | __INT128
865 """
866 p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1))
868 def p_type_specifier(self, p):
869 """ type_specifier : typedef_name
870 | enum_specifier
871 | struct_or_union_specifier
872 | type_specifier_no_typeid
873 | atomic_specifier
874 """
875 p[0] = p[1]
877 # See section 6.7.2.4 of the C11 standard.
878 def p_atomic_specifier(self, p):
879 """ atomic_specifier : _ATOMIC LPAREN type_name RPAREN
880 """
881 typ = p[3]
882 typ.quals.append('_Atomic')
883 p[0] = typ
885 def p_type_qualifier(self, p):
886 """ type_qualifier : CONST
887 | RESTRICT
888 | VOLATILE
889 | _ATOMIC
890 """
891 p[0] = p[1]
893 def p_init_declarator_list(self, p):
894 """ init_declarator_list : init_declarator
895 | init_declarator_list COMMA init_declarator
896 """
897 p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
899 # Returns a {decl=<declarator> : init=<initializer>} dictionary
900 # If there's no initializer, uses None
901 #
902 def p_init_declarator(self, p):
903 """ init_declarator : declarator
904 | declarator EQUALS initializer
905 """
906 p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None))
908 def p_id_init_declarator_list(self, p):
909 """ id_init_declarator_list : id_init_declarator
910 | id_init_declarator_list COMMA init_declarator
911 """
912 p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
914 def p_id_init_declarator(self, p):
915 """ id_init_declarator : id_declarator
916 | id_declarator EQUALS initializer
917 """
918 p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None))
920 # Require at least one type specifier in a specifier-qualifier-list
921 #
922 def p_specifier_qualifier_list_1(self, p):
923 """ specifier_qualifier_list : specifier_qualifier_list type_specifier_no_typeid
924 """
925 p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True)
927 def p_specifier_qualifier_list_2(self, p):
928 """ specifier_qualifier_list : specifier_qualifier_list type_qualifier
929 """
930 p[0] = self._add_declaration_specifier(p[1], p[2], 'qual', append=True)
932 def p_specifier_qualifier_list_3(self, p):
933 """ specifier_qualifier_list : type_specifier
934 """
935 p[0] = self._add_declaration_specifier(None, p[1], 'type')
937 def p_specifier_qualifier_list_4(self, p):
938 """ specifier_qualifier_list : type_qualifier_list type_specifier
939 """
940 p[0] = dict(qual=p[1], alignment=[], storage=[], type=[p[2]], function=[])
942 def p_specifier_qualifier_list_5(self, p):
943 """ specifier_qualifier_list : alignment_specifier
944 """
945 p[0] = dict(qual=[], alignment=[p[1]], storage=[], type=[], function=[])
947 def p_specifier_qualifier_list_6(self, p):
948 """ specifier_qualifier_list : specifier_qualifier_list alignment_specifier
949 """
950 p[0] = self._add_declaration_specifier(p[1], p[2], 'alignment')
952 # TYPEID is allowed here (and in other struct/enum related tag names), because
953 # struct/enum tags reside in their own namespace and can be named the same as types
954 #
955 def p_struct_or_union_specifier_1(self, p):
956 """ struct_or_union_specifier : struct_or_union ID
957 | struct_or_union TYPEID
958 """
959 klass = self._select_struct_union_class(p[1])
960 # None means no list of members
961 p[0] = klass(
962 name=p[2],
963 decls=None,
964 coord=self._token_coord(p, 2))
966 def p_struct_or_union_specifier_2(self, p):
967 """ struct_or_union_specifier : struct_or_union brace_open struct_declaration_list brace_close
968 | struct_or_union brace_open brace_close
969 """
970 klass = self._select_struct_union_class(p[1])
971 if len(p) == 4:
972 # Empty sequence means an empty list of members
973 p[0] = klass(
974 name=None,
975 decls=[],
976 coord=self._token_coord(p, 2))
977 else:
978 p[0] = klass(
979 name=None,
980 decls=p[3],
981 coord=self._token_coord(p, 2))
984 def p_struct_or_union_specifier_3(self, p):
985 """ struct_or_union_specifier : struct_or_union ID brace_open struct_declaration_list brace_close
986 | struct_or_union ID brace_open brace_close
987 | struct_or_union TYPEID brace_open struct_declaration_list brace_close
988 | struct_or_union TYPEID brace_open brace_close
989 """
990 klass = self._select_struct_union_class(p[1])
991 if len(p) == 5:
992 # Empty sequence means an empty list of members
993 p[0] = klass(
994 name=p[2],
995 decls=[],
996 coord=self._token_coord(p, 2))
997 else:
998 p[0] = klass(
999 name=p[2],
1000 decls=p[4],
1001 coord=self._token_coord(p, 2))
1003 def p_struct_or_union(self, p):
1004 """ struct_or_union : STRUCT
1005 | UNION
1006 """
1007 p[0] = p[1]
1009 # Combine all declarations into a single list
1010 #
1011 def p_struct_declaration_list(self, p):
1012 """ struct_declaration_list : struct_declaration
1013 | struct_declaration_list struct_declaration
1014 """
1015 if len(p) == 2:
1016 p[0] = p[1] or []
1017 else:
1018 p[0] = p[1] + (p[2] or [])
1020 def p_struct_declaration_1(self, p):
1021 """ struct_declaration : specifier_qualifier_list struct_declarator_list_opt SEMI
1022 """
1023 spec = p[1]
1024 assert 'typedef' not in spec['storage']
1026 if p[2] is not None:
1027 decls = self._build_declarations(
1028 spec=spec,
1029 decls=p[2])
1031 elif len(spec['type']) == 1:
1032 # Anonymous struct/union, gcc extension, C1x feature.
1033 # Although the standard only allows structs/unions here, I see no
1034 # reason to disallow other types since some compilers have typedefs
1035 # here, and pycparser isn't about rejecting all invalid code.
1036 #
1037 node = spec['type'][0]
1038 if isinstance(node, c_ast.Node):
1039 decl_type = node
1040 else:
1041 decl_type = c_ast.IdentifierType(node)
1043 decls = self._build_declarations(
1044 spec=spec,
1045 decls=[dict(decl=decl_type)])
1047 else:
1048 # Structure/union members can have the same names as typedefs.
1049 # The trouble is that the member's name gets grouped into
1050 # specifier_qualifier_list; _build_declarations compensates.
1051 #
1052 decls = self._build_declarations(
1053 spec=spec,
1054 decls=[dict(decl=None, init=None)])
1056 p[0] = decls
1058 def p_struct_declaration_2(self, p):
1059 """ struct_declaration : SEMI
1060 """
1061 p[0] = None
1063 def p_struct_declaration_3(self, p):
1064 """ struct_declaration : pppragma_directive
1065 """
1066 p[0] = [p[1]]
1068 def p_struct_declarator_list(self, p):
1069 """ struct_declarator_list : struct_declarator
1070 | struct_declarator_list COMMA struct_declarator
1071 """
1072 p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
1074 # struct_declarator passes up a dict with the keys: decl (for
1075 # the underlying declarator) and bitsize (for the bitsize)
1076 #
1077 def p_struct_declarator_1(self, p):
1078 """ struct_declarator : declarator
1079 """
1080 p[0] = {'decl': p[1], 'bitsize': None}
1082 def p_struct_declarator_2(self, p):
1083 """ struct_declarator : declarator COLON constant_expression
1084 | COLON constant_expression
1085 """
1086 if len(p) > 3:
1087 p[0] = {'decl': p[1], 'bitsize': p[3]}
1088 else:
1089 p[0] = {'decl': c_ast.TypeDecl(None, None, None, None), 'bitsize': p[2]}
1091 def p_enum_specifier_1(self, p):
1092 """ enum_specifier : ENUM ID
1093 | ENUM TYPEID
1094 """
1095 p[0] = c_ast.Enum(p[2], None, self._token_coord(p, 1))
1097 def p_enum_specifier_2(self, p):
1098 """ enum_specifier : ENUM brace_open enumerator_list brace_close
1099 """
1100 p[0] = c_ast.Enum(None, p[3], self._token_coord(p, 1))
1102 def p_enum_specifier_3(self, p):
1103 """ enum_specifier : ENUM ID brace_open enumerator_list brace_close
1104 | ENUM TYPEID brace_open enumerator_list brace_close
1105 """
1106 p[0] = c_ast.Enum(p[2], p[4], self._token_coord(p, 1))
1108 def p_enumerator_list(self, p):
1109 """ enumerator_list : enumerator
1110 | enumerator_list COMMA
1111 | enumerator_list COMMA enumerator
1112 """
1113 if len(p) == 2:
1114 p[0] = c_ast.EnumeratorList([p[1]], p[1].coord)
1115 elif len(p) == 3:
1116 p[0] = p[1]
1117 else:
1118 p[1].enumerators.append(p[3])
1119 p[0] = p[1]
1121 def p_alignment_specifier(self, p):
1122 """ alignment_specifier : _ALIGNAS LPAREN type_name RPAREN
1123 | _ALIGNAS LPAREN constant_expression RPAREN
1124 """
1125 p[0] = c_ast.Alignas(p[3], self._token_coord(p, 1))
1127 def p_enumerator(self, p):
1128 """ enumerator : ID
1129 | ID EQUALS constant_expression
1130 """
1131 if len(p) == 2:
1132 enumerator = c_ast.Enumerator(
1133 p[1], None,
1134 self._token_coord(p, 1))
1135 else:
1136 enumerator = c_ast.Enumerator(
1137 p[1], p[3],
1138 self._token_coord(p, 1))
1139 self._add_identifier(enumerator.name, enumerator.coord)
1141 p[0] = enumerator
1143 def p_declarator(self, p):
1144 """ declarator : id_declarator
1145 | typeid_declarator
1146 """
1147 p[0] = p[1]
1149 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
1150 def p_xxx_declarator_1(self, p):
1151 """ xxx_declarator : direct_xxx_declarator
1152 """
1153 p[0] = p[1]
1155 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
1156 def p_xxx_declarator_2(self, p):
1157 """ xxx_declarator : pointer direct_xxx_declarator
1158 """
1159 p[0] = self._type_modify_decl(p[2], p[1])
1161 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
1162 def p_direct_xxx_declarator_1(self, p):
1163 """ direct_xxx_declarator : yyy
1164 """
1165 p[0] = c_ast.TypeDecl(
1166 declname=p[1],
1167 type=None,
1168 quals=None,
1169 align=None,
1170 coord=self._token_coord(p, 1))
1172 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'))
1173 def p_direct_xxx_declarator_2(self, p):
1174 """ direct_xxx_declarator : LPAREN xxx_declarator RPAREN
1175 """
1176 p[0] = p[2]
1178 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
1179 def p_direct_xxx_declarator_3(self, p):
1180 """ direct_xxx_declarator : direct_xxx_declarator LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET
1181 """
1182 quals = (p[3] if len(p) > 5 else []) or []
1183 # Accept dimension qualifiers
1184 # Per C99 6.7.5.3 p7
1185 arr = c_ast.ArrayDecl(
1186 type=None,
1187 dim=p[4] if len(p) > 5 else p[3],
1188 dim_quals=quals,
1189 coord=p[1].coord)
1191 p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
1193 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
1194 def p_direct_xxx_declarator_4(self, p):
1195 """ direct_xxx_declarator : direct_xxx_declarator LBRACKET STATIC type_qualifier_list_opt assignment_expression RBRACKET
1196 | direct_xxx_declarator LBRACKET type_qualifier_list STATIC assignment_expression RBRACKET
1197 """
1198 # Using slice notation for PLY objects doesn't work in Python 3 for the
1199 # version of PLY embedded with pycparser; see PLY Google Code issue 30.
1200 # Work around that here by listing the two elements separately.
1201 listed_quals = [item if isinstance(item, list) else [item]
1202 for item in [p[3],p[4]]]
1203 dim_quals = [qual for sublist in listed_quals for qual in sublist
1204 if qual is not None]
1205 arr = c_ast.ArrayDecl(
1206 type=None,
1207 dim=p[5],
1208 dim_quals=dim_quals,
1209 coord=p[1].coord)
1211 p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
1213 # Special for VLAs
1214 #
1215 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
1216 def p_direct_xxx_declarator_5(self, p):
1217 """ direct_xxx_declarator : direct_xxx_declarator LBRACKET type_qualifier_list_opt TIMES RBRACKET
1218 """
1219 arr = c_ast.ArrayDecl(
1220 type=None,
1221 dim=c_ast.ID(p[4], self._token_coord(p, 4)),
1222 dim_quals=p[3] if p[3] is not None else [],
1223 coord=p[1].coord)
1225 p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
1227 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
1228 def p_direct_xxx_declarator_6(self, p):
1229 """ direct_xxx_declarator : direct_xxx_declarator LPAREN parameter_type_list RPAREN
1230 | direct_xxx_declarator LPAREN identifier_list_opt RPAREN
1231 """
1232 func = c_ast.FuncDecl(
1233 args=p[3],
1234 type=None,
1235 coord=p[1].coord)
1237 # To see why _get_yacc_lookahead_token is needed, consider:
1238 # typedef char TT;
1239 # void foo(int TT) { TT = 10; }
1240 # Outside the function, TT is a typedef, but inside (starting and
1241 # ending with the braces) it's a parameter. The trouble begins with
1242 # yacc's lookahead token. We don't know if we're declaring or
1243 # defining a function until we see LBRACE, but if we wait for yacc to
1244 # trigger a rule on that token, then TT will have already been read
1245 # and incorrectly interpreted as TYPEID. We need to add the
1246 # parameters to the scope the moment the lexer sees LBRACE.
1247 #
1248 if self._get_yacc_lookahead_token().type == "LBRACE":
1249 if func.args is not None:
1250 for param in func.args.params:
1251 if isinstance(param, c_ast.EllipsisParam): break
1252 self._add_identifier(param.name, param.coord)
1254 p[0] = self._type_modify_decl(decl=p[1], modifier=func)
1256 def p_pointer(self, p):
1257 """ pointer : TIMES type_qualifier_list_opt
1258 | TIMES type_qualifier_list_opt pointer
1259 """
1260 coord = self._token_coord(p, 1)
1261 # Pointer decls nest from inside out. This is important when different
1262 # levels have different qualifiers. For example:
1263 #
1264 # char * const * p;
1265 #
1266 # Means "pointer to const pointer to char"
1267 #
1268 # While:
1269 #
1270 # char ** const p;
1271 #
1272 # Means "const pointer to pointer to char"
1273 #
1274 # So when we construct PtrDecl nestings, the leftmost pointer goes in
1275 # as the most nested type.
1276 nested_type = c_ast.PtrDecl(quals=p[2] or [], type=None, coord=coord)
1277 if len(p) > 3:
1278 tail_type = p[3]
1279 while tail_type.type is not None:
1280 tail_type = tail_type.type
1281 tail_type.type = nested_type
1282 p[0] = p[3]
1283 else:
1284 p[0] = nested_type
1286 def p_type_qualifier_list(self, p):
1287 """ type_qualifier_list : type_qualifier
1288 | type_qualifier_list type_qualifier
1289 """
1290 p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]]
1292 def p_parameter_type_list(self, p):
1293 """ parameter_type_list : parameter_list
1294 | parameter_list COMMA ELLIPSIS
1295 """
1296 if len(p) > 2:
1297 p[1].params.append(c_ast.EllipsisParam(self._token_coord(p, 3)))
1299 p[0] = p[1]
1301 def p_parameter_list(self, p):
1302 """ parameter_list : parameter_declaration
1303 | parameter_list COMMA parameter_declaration
1304 """
1305 if len(p) == 2: # single parameter
1306 p[0] = c_ast.ParamList([p[1]], p[1].coord)
1307 else:
1308 p[1].params.append(p[3])
1309 p[0] = p[1]
1311 # From ISO/IEC 9899:TC2, 6.7.5.3.11:
1312 # "If, in a parameter declaration, an identifier can be treated either
1313 # as a typedef name or as a parameter name, it shall be taken as a
1314 # typedef name."
1315 #
1316 # Inside a parameter declaration, once we've reduced declaration specifiers,
1317 # if we shift in an LPAREN and see a TYPEID, it could be either an abstract
1318 # declarator or a declarator nested inside parens. This rule tells us to
1319 # always treat it as an abstract declarator. Therefore, we only accept
1320 # `id_declarator`s and `typeid_noparen_declarator`s.
1321 def p_parameter_declaration_1(self, p):
1322 """ parameter_declaration : declaration_specifiers id_declarator
1323 | declaration_specifiers typeid_noparen_declarator
1324 """
1325 spec = p[1]
1326 if not spec['type']:
1327 spec['type'] = [c_ast.IdentifierType(['int'],
1328 coord=self._token_coord(p, 1))]
1329 p[0] = self._build_declarations(
1330 spec=spec,
1331 decls=[dict(decl=p[2])])[0]
1333 def p_parameter_declaration_2(self, p):
1334 """ parameter_declaration : declaration_specifiers abstract_declarator_opt
1335 """
1336 spec = p[1]
1337 if not spec['type']:
1338 spec['type'] = [c_ast.IdentifierType(['int'],
1339 coord=self._token_coord(p, 1))]
1341 # Parameters can have the same names as typedefs. The trouble is that
1342 # the parameter's name gets grouped into declaration_specifiers, making
1343 # it look like an old-style declaration; compensate.
1344 #
1345 if len(spec['type']) > 1 and len(spec['type'][-1].names) == 1 and \
1346 self._is_type_in_scope(spec['type'][-1].names[0]):
1347 decl = self._build_declarations(
1348 spec=spec,
1349 decls=[dict(decl=p[2], init=None)])[0]
1351 # This truly is an old-style parameter declaration
1352 #
1353 else:
1354 decl = c_ast.Typename(
1355 name='',
1356 quals=spec['qual'],
1357 align=None,
1358 type=p[2] or c_ast.TypeDecl(None, None, None, None),
1359 coord=self._token_coord(p, 2))
1360 typename = spec['type']
1361 decl = self._fix_decl_name_type(decl, typename)
1363 p[0] = decl
1365 def p_identifier_list(self, p):
1366 """ identifier_list : identifier
1367 | identifier_list COMMA identifier
1368 """
1369 if len(p) == 2: # single parameter
1370 p[0] = c_ast.ParamList([p[1]], p[1].coord)
1371 else:
1372 p[1].params.append(p[3])
1373 p[0] = p[1]
1375 def p_initializer_1(self, p):
1376 """ initializer : assignment_expression
1377 """
1378 p[0] = p[1]
1380 def p_initializer_2(self, p):
1381 """ initializer : brace_open initializer_list_opt brace_close
1382 | brace_open initializer_list COMMA brace_close
1383 """
1384 if p[2] is None:
1385 p[0] = c_ast.InitList([], self._token_coord(p, 1))
1386 else:
1387 p[0] = p[2]
1389 def p_initializer_list(self, p):
1390 """ initializer_list : designation_opt initializer
1391 | initializer_list COMMA designation_opt initializer
1392 """
1393 if len(p) == 3: # single initializer
1394 init = p[2] if p[1] is None else c_ast.NamedInitializer(p[1], p[2])
1395 p[0] = c_ast.InitList([init], p[2].coord)
1396 else:
1397 init = p[4] if p[3] is None else c_ast.NamedInitializer(p[3], p[4])
1398 p[1].exprs.append(init)
1399 p[0] = p[1]
1401 def p_designation(self, p):
1402 """ designation : designator_list EQUALS
1403 """
1404 p[0] = p[1]
1406 # Designators are represented as a list of nodes, in the order in which
1407 # they're written in the code.
1408 #
1409 def p_designator_list(self, p):
1410 """ designator_list : designator
1411 | designator_list designator
1412 """
1413 p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]]
1415 def p_designator(self, p):
1416 """ designator : LBRACKET constant_expression RBRACKET
1417 | PERIOD identifier
1418 """
1419 p[0] = p[2]
1421 def p_type_name(self, p):
1422 """ type_name : specifier_qualifier_list abstract_declarator_opt
1423 """
1424 typename = c_ast.Typename(
1425 name='',
1426 quals=p[1]['qual'][:],
1427 align=None,
1428 type=p[2] or c_ast.TypeDecl(None, None, None, None),
1429 coord=self._token_coord(p, 2))
1431 p[0] = self._fix_decl_name_type(typename, p[1]['type'])
1433 def p_abstract_declarator_1(self, p):
1434 """ abstract_declarator : pointer
1435 """
1436 dummytype = c_ast.TypeDecl(None, None, None, None)
1437 p[0] = self._type_modify_decl(
1438 decl=dummytype,
1439 modifier=p[1])
1441 def p_abstract_declarator_2(self, p):
1442 """ abstract_declarator : pointer direct_abstract_declarator
1443 """
1444 p[0] = self._type_modify_decl(p[2], p[1])
1446 def p_abstract_declarator_3(self, p):
1447 """ abstract_declarator : direct_abstract_declarator
1448 """
1449 p[0] = p[1]
1451 # Creating and using direct_abstract_declarator_opt here
1452 # instead of listing both direct_abstract_declarator and the
1453 # lack of it in the beginning of _1 and _2 caused two
1454 # shift/reduce errors.
1455 #
1456 def p_direct_abstract_declarator_1(self, p):
1457 """ direct_abstract_declarator : LPAREN abstract_declarator RPAREN """
1458 p[0] = p[2]
1460 def p_direct_abstract_declarator_2(self, p):
1461 """ direct_abstract_declarator : direct_abstract_declarator LBRACKET assignment_expression_opt RBRACKET
1462 """
1463 arr = c_ast.ArrayDecl(
1464 type=None,
1465 dim=p[3],
1466 dim_quals=[],
1467 coord=p[1].coord)
1469 p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
1471 def p_direct_abstract_declarator_3(self, p):
1472 """ direct_abstract_declarator : LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET
1473 """
1474 quals = (p[2] if len(p) > 4 else []) or []
1475 p[0] = c_ast.ArrayDecl(
1476 type=c_ast.TypeDecl(None, None, None, None),
1477 dim=p[3] if len(p) > 4 else p[2],
1478 dim_quals=quals,
1479 coord=self._token_coord(p, 1))
1481 def p_direct_abstract_declarator_4(self, p):
1482 """ direct_abstract_declarator : direct_abstract_declarator LBRACKET TIMES RBRACKET
1483 """
1484 arr = c_ast.ArrayDecl(
1485 type=None,
1486 dim=c_ast.ID(p[3], self._token_coord(p, 3)),
1487 dim_quals=[],
1488 coord=p[1].coord)
1490 p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
1492 def p_direct_abstract_declarator_5(self, p):
1493 """ direct_abstract_declarator : LBRACKET TIMES RBRACKET
1494 """
1495 p[0] = c_ast.ArrayDecl(
1496 type=c_ast.TypeDecl(None, None, None, None),
1497 dim=c_ast.ID(p[3], self._token_coord(p, 3)),
1498 dim_quals=[],
1499 coord=self._token_coord(p, 1))
1501 def p_direct_abstract_declarator_6(self, p):
1502 """ direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN
1503 """
1504 func = c_ast.FuncDecl(
1505 args=p[3],
1506 type=None,
1507 coord=p[1].coord)
1509 p[0] = self._type_modify_decl(decl=p[1], modifier=func)
1511 def p_direct_abstract_declarator_7(self, p):
1512 """ direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN
1513 """
1514 p[0] = c_ast.FuncDecl(
1515 args=p[2],
1516 type=c_ast.TypeDecl(None, None, None, None),
1517 coord=self._token_coord(p, 1))
1519 # declaration is a list, statement isn't. To make it consistent, block_item
1520 # will always be a list
1521 #
1522 def p_block_item(self, p):
1523 """ block_item : declaration
1524 | statement
1525 """
1526 p[0] = p[1] if isinstance(p[1], list) else [p[1]]
1528 # Since we made block_item a list, this just combines lists
1529 #
1530 def p_block_item_list(self, p):
1531 """ block_item_list : block_item
1532 | block_item_list block_item
1533 """
1534 # Empty block items (plain ';') produce [None], so ignore them
1535 p[0] = p[1] if (len(p) == 2 or p[2] == [None]) else p[1] + p[2]
1537 def p_compound_statement_1(self, p):
1538 """ compound_statement : brace_open block_item_list_opt brace_close """
1539 p[0] = c_ast.Compound(
1540 block_items=p[2],
1541 coord=self._token_coord(p, 1))
1543 def p_labeled_statement_1(self, p):
1544 """ labeled_statement : ID COLON pragmacomp_or_statement """
1545 p[0] = c_ast.Label(p[1], p[3], self._token_coord(p, 1))
1547 def p_labeled_statement_2(self, p):
1548 """ labeled_statement : CASE constant_expression COLON pragmacomp_or_statement """
1549 p[0] = c_ast.Case(p[2], [p[4]], self._token_coord(p, 1))
1551 def p_labeled_statement_3(self, p):
1552 """ labeled_statement : DEFAULT COLON pragmacomp_or_statement """
1553 p[0] = c_ast.Default([p[3]], self._token_coord(p, 1))
1555 def p_selection_statement_1(self, p):
1556 """ selection_statement : IF LPAREN expression RPAREN pragmacomp_or_statement """
1557 p[0] = c_ast.If(p[3], p[5], None, self._token_coord(p, 1))
1559 def p_selection_statement_2(self, p):
1560 """ selection_statement : IF LPAREN expression RPAREN statement ELSE pragmacomp_or_statement """
1561 p[0] = c_ast.If(p[3], p[5], p[7], self._token_coord(p, 1))
1563 def p_selection_statement_3(self, p):
1564 """ selection_statement : SWITCH LPAREN expression RPAREN pragmacomp_or_statement """
1565 p[0] = fix_switch_cases(
1566 c_ast.Switch(p[3], p[5], self._token_coord(p, 1)))
1568 def p_iteration_statement_1(self, p):
1569 """ iteration_statement : WHILE LPAREN expression RPAREN pragmacomp_or_statement """
1570 p[0] = c_ast.While(p[3], p[5], self._token_coord(p, 1))
1572 def p_iteration_statement_2(self, p):
1573 """ iteration_statement : DO pragmacomp_or_statement WHILE LPAREN expression RPAREN SEMI """
1574 p[0] = c_ast.DoWhile(p[5], p[2], self._token_coord(p, 1))
1576 def p_iteration_statement_3(self, p):
1577 """ iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN pragmacomp_or_statement """
1578 p[0] = c_ast.For(p[3], p[5], p[7], p[9], self._token_coord(p, 1))
1580 def p_iteration_statement_4(self, p):
1581 """ iteration_statement : FOR LPAREN declaration expression_opt SEMI expression_opt RPAREN pragmacomp_or_statement """
1582 p[0] = c_ast.For(c_ast.DeclList(p[3], self._token_coord(p, 1)),
1583 p[4], p[6], p[8], self._token_coord(p, 1))
1585 def p_jump_statement_1(self, p):
1586 """ jump_statement : GOTO ID SEMI """
1587 p[0] = c_ast.Goto(p[2], self._token_coord(p, 1))
1589 def p_jump_statement_2(self, p):
1590 """ jump_statement : BREAK SEMI """
1591 p[0] = c_ast.Break(self._token_coord(p, 1))
1593 def p_jump_statement_3(self, p):
1594 """ jump_statement : CONTINUE SEMI """
1595 p[0] = c_ast.Continue(self._token_coord(p, 1))
1597 def p_jump_statement_4(self, p):
1598 """ jump_statement : RETURN expression SEMI
1599 | RETURN SEMI
1600 """
1601 p[0] = c_ast.Return(p[2] if len(p) == 4 else None, self._token_coord(p, 1))
1603 def p_expression_statement(self, p):
1604 """ expression_statement : expression_opt SEMI """
1605 if p[1] is None:
1606 p[0] = c_ast.EmptyStatement(self._token_coord(p, 2))
1607 else:
1608 p[0] = p[1]
1610 def p_expression(self, p):
1611 """ expression : assignment_expression
1612 | expression COMMA assignment_expression
1613 """
1614 if len(p) == 2:
1615 p[0] = p[1]
1616 else:
1617 if not isinstance(p[1], c_ast.ExprList):
1618 p[1] = c_ast.ExprList([p[1]], p[1].coord)
1620 p[1].exprs.append(p[3])
1621 p[0] = p[1]
1623 def p_parenthesized_compound_expression(self, p):
1624 """ assignment_expression : LPAREN compound_statement RPAREN """
1625 p[0] = p[2]
1627 def p_typedef_name(self, p):
1628 """ typedef_name : TYPEID """
1629 p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1))
1631 def p_assignment_expression(self, p):
1632 """ assignment_expression : conditional_expression
1633 | unary_expression assignment_operator assignment_expression
1634 """
1635 if len(p) == 2:
1636 p[0] = p[1]
1637 else:
1638 p[0] = c_ast.Assignment(p[2], p[1], p[3], p[1].coord)
1640 # K&R2 defines these as many separate rules, to encode
1641 # precedence and associativity. Why work hard ? I'll just use
1642 # the built in precedence/associativity specification feature
1643 # of PLY. (see precedence declaration above)
1644 #
1645 def p_assignment_operator(self, p):
1646 """ assignment_operator : EQUALS
1647 | XOREQUAL
1648 | TIMESEQUAL
1649 | DIVEQUAL
1650 | MODEQUAL
1651 | PLUSEQUAL
1652 | MINUSEQUAL
1653 | LSHIFTEQUAL
1654 | RSHIFTEQUAL
1655 | ANDEQUAL
1656 | OREQUAL
1657 """
1658 p[0] = p[1]
1660 def p_constant_expression(self, p):
1661 """ constant_expression : conditional_expression """
1662 p[0] = p[1]
1664 def p_conditional_expression(self, p):
1665 """ conditional_expression : binary_expression
1666 | binary_expression CONDOP expression COLON conditional_expression
1667 """
1668 if len(p) == 2:
1669 p[0] = p[1]
1670 else:
1671 p[0] = c_ast.TernaryOp(p[1], p[3], p[5], p[1].coord)
1673 def p_binary_expression(self, p):
1674 """ binary_expression : cast_expression
1675 | binary_expression TIMES binary_expression
1676 | binary_expression DIVIDE binary_expression
1677 | binary_expression MOD binary_expression
1678 | binary_expression PLUS binary_expression
1679 | binary_expression MINUS binary_expression
1680 | binary_expression RSHIFT binary_expression
1681 | binary_expression LSHIFT binary_expression
1682 | binary_expression LT binary_expression
1683 | binary_expression LE binary_expression
1684 | binary_expression GE binary_expression
1685 | binary_expression GT binary_expression
1686 | binary_expression EQ binary_expression
1687 | binary_expression NE binary_expression
1688 | binary_expression AND binary_expression
1689 | binary_expression OR binary_expression
1690 | binary_expression XOR binary_expression
1691 | binary_expression LAND binary_expression
1692 | binary_expression LOR binary_expression
1693 """
1694 if len(p) == 2:
1695 p[0] = p[1]
1696 else:
1697 p[0] = c_ast.BinaryOp(p[2], p[1], p[3], p[1].coord)
1699 def p_cast_expression_1(self, p):
1700 """ cast_expression : unary_expression """
1701 p[0] = p[1]
1703 def p_cast_expression_2(self, p):
1704 """ cast_expression : LPAREN type_name RPAREN cast_expression """
1705 p[0] = c_ast.Cast(p[2], p[4], self._token_coord(p, 1))
1707 def p_unary_expression_1(self, p):
1708 """ unary_expression : postfix_expression """
1709 p[0] = p[1]
1711 def p_unary_expression_2(self, p):
1712 """ unary_expression : PLUSPLUS unary_expression
1713 | MINUSMINUS unary_expression
1714 | unary_operator cast_expression
1715 """
1716 p[0] = c_ast.UnaryOp(p[1], p[2], p[2].coord)
1718 def p_unary_expression_3(self, p):
1719 """ unary_expression : SIZEOF unary_expression
1720 | SIZEOF LPAREN type_name RPAREN
1721 | _ALIGNOF LPAREN type_name RPAREN
1722 """
1723 p[0] = c_ast.UnaryOp(
1724 p[1],
1725 p[2] if len(p) == 3 else p[3],
1726 self._token_coord(p, 1))
1728 def p_unary_operator(self, p):
1729 """ unary_operator : AND
1730 | TIMES
1731 | PLUS
1732 | MINUS
1733 | NOT
1734 | LNOT
1735 """
1736 p[0] = p[1]
1738 def p_postfix_expression_1(self, p):
1739 """ postfix_expression : primary_expression """
1740 p[0] = p[1]
1742 def p_postfix_expression_2(self, p):
1743 """ postfix_expression : postfix_expression LBRACKET expression RBRACKET """
1744 p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord)
1746 def p_postfix_expression_3(self, p):
1747 """ postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN
1748 | postfix_expression LPAREN RPAREN
1749 """
1750 p[0] = c_ast.FuncCall(p[1], p[3] if len(p) == 5 else None, p[1].coord)
1752 def p_postfix_expression_4(self, p):
1753 """ postfix_expression : postfix_expression PERIOD ID
1754 | postfix_expression PERIOD TYPEID
1755 | postfix_expression ARROW ID
1756 | postfix_expression ARROW TYPEID
1757 """
1758 field = c_ast.ID(p[3], self._token_coord(p, 3))
1759 p[0] = c_ast.StructRef(p[1], p[2], field, p[1].coord)
1761 def p_postfix_expression_5(self, p):
1762 """ postfix_expression : postfix_expression PLUSPLUS
1763 | postfix_expression MINUSMINUS
1764 """
1765 p[0] = c_ast.UnaryOp('p' + p[2], p[1], p[1].coord)
1767 def p_postfix_expression_6(self, p):
1768 """ postfix_expression : LPAREN type_name RPAREN brace_open initializer_list brace_close
1769 | LPAREN type_name RPAREN brace_open initializer_list COMMA brace_close
1770 """
1771 p[0] = c_ast.CompoundLiteral(p[2], p[5])
1773 def p_primary_expression_1(self, p):
1774 """ primary_expression : identifier """
1775 p[0] = p[1]
1777 def p_primary_expression_2(self, p):
1778 """ primary_expression : constant """
1779 p[0] = p[1]
1781 def p_primary_expression_3(self, p):
1782 """ primary_expression : unified_string_literal
1783 | unified_wstring_literal
1784 """
1785 p[0] = p[1]
1787 def p_primary_expression_4(self, p):
1788 """ primary_expression : LPAREN expression RPAREN """
1789 p[0] = p[2]
1791 def p_primary_expression_5(self, p):
1792 """ primary_expression : OFFSETOF LPAREN type_name COMMA offsetof_member_designator RPAREN
1793 """
1794 coord = self._token_coord(p, 1)
1795 p[0] = c_ast.FuncCall(c_ast.ID(p[1], coord),
1796 c_ast.ExprList([p[3], p[5]], coord),
1797 coord)
1799 def p_offsetof_member_designator(self, p):
1800 """ offsetof_member_designator : identifier
1801 | offsetof_member_designator PERIOD identifier
1802 | offsetof_member_designator LBRACKET expression RBRACKET
1803 """
1804 if len(p) == 2:
1805 p[0] = p[1]
1806 elif len(p) == 4:
1807 p[0] = c_ast.StructRef(p[1], p[2], p[3], p[1].coord)
1808 elif len(p) == 5:
1809 p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord)
1810 else:
1811 raise NotImplementedError("Unexpected parsing state. len(p): %u" % len(p))
1813 def p_argument_expression_list(self, p):
1814 """ argument_expression_list : assignment_expression
1815 | argument_expression_list COMMA assignment_expression
1816 """
1817 if len(p) == 2: # single expr
1818 p[0] = c_ast.ExprList([p[1]], p[1].coord)
1819 else:
1820 p[1].exprs.append(p[3])
1821 p[0] = p[1]
1823 def p_identifier(self, p):
1824 """ identifier : ID """
1825 p[0] = c_ast.ID(p[1], self._token_coord(p, 1))
1827 def p_constant_1(self, p):
1828 """ constant : INT_CONST_DEC
1829 | INT_CONST_OCT
1830 | INT_CONST_HEX
1831 | INT_CONST_BIN
1832 | INT_CONST_CHAR
1833 """
1834 uCount = 0
1835 lCount = 0
1836 for x in p[1][-3:]:
1837 if x in ('l', 'L'):
1838 lCount += 1
1839 elif x in ('u', 'U'):
1840 uCount += 1
1841 t = ''
1842 if uCount > 1:
1843 raise ValueError('Constant cannot have more than one u/U suffix.')
1844 elif lCount > 2:
1845 raise ValueError('Constant cannot have more than two l/L suffix.')
1846 prefix = 'unsigned ' * uCount + 'long ' * lCount
1847 p[0] = c_ast.Constant(
1848 prefix + 'int', p[1], self._token_coord(p, 1))
1850 def p_constant_2(self, p):
1851 """ constant : FLOAT_CONST
1852 | HEX_FLOAT_CONST
1853 """
1854 if 'x' in p[1].lower():
1855 t = 'float'
1856 else:
1857 if p[1][-1] in ('f', 'F'):
1858 t = 'float'
1859 elif p[1][-1] in ('l', 'L'):
1860 t = 'long double'
1861 else:
1862 t = 'double'
1864 p[0] = c_ast.Constant(
1865 t, p[1], self._token_coord(p, 1))
1867 def p_constant_3(self, p):
1868 """ constant : CHAR_CONST
1869 | WCHAR_CONST
1870 | U8CHAR_CONST
1871 | U16CHAR_CONST
1872 | U32CHAR_CONST
1873 """
1874 p[0] = c_ast.Constant(
1875 'char', p[1], self._token_coord(p, 1))
1877 # The "unified" string and wstring literal rules are for supporting
1878 # concatenation of adjacent string literals.
1879 # I.e. "hello " "world" is seen by the C compiler as a single string literal
1880 # with the value "hello world"
1881 #
1882 def p_unified_string_literal(self, p):
1883 """ unified_string_literal : STRING_LITERAL
1884 | unified_string_literal STRING_LITERAL
1885 """
1886 if len(p) == 2: # single literal
1887 p[0] = c_ast.Constant(
1888 'string', p[1], self._token_coord(p, 1))
1889 else:
1890 p[1].value = p[1].value[:-1] + p[2][1:]
1891 p[0] = p[1]
1893 def p_unified_wstring_literal(self, p):
1894 """ unified_wstring_literal : WSTRING_LITERAL
1895 | U8STRING_LITERAL
1896 | U16STRING_LITERAL
1897 | U32STRING_LITERAL
1898 | unified_wstring_literal WSTRING_LITERAL
1899 | unified_wstring_literal U8STRING_LITERAL
1900 | unified_wstring_literal U16STRING_LITERAL
1901 | unified_wstring_literal U32STRING_LITERAL
1902 """
1903 if len(p) == 2: # single literal
1904 p[0] = c_ast.Constant(
1905 'string', p[1], self._token_coord(p, 1))
1906 else:
1907 p[1].value = p[1].value.rstrip()[:-1] + p[2][2:]
1908 p[0] = p[1]
1910 def p_brace_open(self, p):
1911 """ brace_open : LBRACE
1912 """
1913 p[0] = p[1]
1914 p.set_lineno(0, p.lineno(1))
1916 def p_brace_close(self, p):
1917 """ brace_close : RBRACE
1918 """
1919 p[0] = p[1]
1920 p.set_lineno(0, p.lineno(1))
1922 def p_empty(self, p):
1923 'empty : '
1924 p[0] = None
1926 def p_error(self, p):
1927 # If error recovery is added here in the future, make sure
1928 # _get_yacc_lookahead_token still works!
1929 #
1930 if p:
1931 self._parse_error(
1932 'before: %s' % p.value,
1933 self._coord(lineno=p.lineno,
1934 column=self.clex.find_tok_column(p)))
1935 else:
1936 self._parse_error('At end of input', self.clex.filename)