1#------------------------------------------------------------------------------
2# pycparser: c_parser.py
3#
4# CParser class: Parser and AST builder for the C language
5#
6# Eli Bendersky [https://eli.thegreenplace.net/]
7# License: BSD
8#------------------------------------------------------------------------------
9from .ply import yacc
10
11from . import c_ast
12from .c_lexer import CLexer
13from .plyparser import PLYParser, ParseError, parameterized, template
14from .ast_transforms import fix_switch_cases, fix_atomic_specifiers
15
16
17@template
18class CParser(PLYParser):
19 def __init__(
20 self,
21 lex_optimize=True,
22 lexer=CLexer,
23 lextab='pycparser.lextab',
24 yacc_optimize=True,
25 yacctab='pycparser.yacctab',
26 yacc_debug=False,
27 taboutputdir=''):
28 """ Create a new CParser.
29
30 Some arguments for controlling the debug/optimization
31 level of the parser are provided. The defaults are
32 tuned for release/performance mode.
33 The simple rules for using them are:
34 *) When tweaking CParser/CLexer, set these to False
35 *) When releasing a stable parser, set to True
36
37 lex_optimize:
38 Set to False when you're modifying the lexer.
39 Otherwise, changes in the lexer won't be used, if
40 some lextab.py file exists.
41 When releasing with a stable lexer, set to True
42 to save the re-generation of the lexer table on
43 each run.
44
45 lexer:
46 Set this parameter to define the lexer to use if
47 you're not using the default CLexer.
48
49 lextab:
50 Points to the lex table that's used for optimized
51 mode. Only if you're modifying the lexer and want
52 some tests to avoid re-generating the table, make
53 this point to a local lex table file (that's been
54 earlier generated with lex_optimize=True)
55
56 yacc_optimize:
57 Set to False when you're modifying the parser.
58 Otherwise, changes in the parser won't be used, if
59 some parsetab.py file exists.
60 When releasing with a stable parser, set to True
61 to save the re-generation of the parser table on
62 each run.
63
64 yacctab:
65 Points to the yacc table that's used for optimized
66 mode. Only if you're modifying the parser, make
67 this point to a local yacc table file
68
69 yacc_debug:
70 Generate a parser.out file that explains how yacc
71 built the parsing table from the grammar.
72
73 taboutputdir:
74 Set this parameter to control the location of generated
75 lextab and yacctab files.
76 """
77 self.clex = lexer(
78 error_func=self._lex_error_func,
79 on_lbrace_func=self._lex_on_lbrace_func,
80 on_rbrace_func=self._lex_on_rbrace_func,
81 type_lookup_func=self._lex_type_lookup_func)
82
83 self.clex.build(
84 optimize=lex_optimize,
85 lextab=lextab,
86 outputdir=taboutputdir)
87 self.tokens = self.clex.tokens
88
89 rules_with_opt = [
90 'abstract_declarator',
91 'assignment_expression',
92 'declaration_list',
93 'declaration_specifiers_no_type',
94 'designation',
95 'expression',
96 'identifier_list',
97 'init_declarator_list',
98 'id_init_declarator_list',
99 'initializer_list',
100 'parameter_type_list',
101 'block_item_list',
102 'type_qualifier_list',
103 'struct_declarator_list'
104 ]
105
106 for rule in rules_with_opt:
107 self._create_opt_rule(rule)
108
109 self.cparser = yacc.yacc(
110 module=self,
111 start='translation_unit_or_empty',
112 debug=yacc_debug,
113 optimize=yacc_optimize,
114 tabmodule=yacctab,
115 outputdir=taboutputdir)
116
117 # Stack of scopes for keeping track of symbols. _scope_stack[-1] is
118 # the current (topmost) scope. Each scope is a dictionary that
119 # specifies whether a name is a type. If _scope_stack[n][name] is
120 # True, 'name' is currently a type in the scope. If it's False,
121 # 'name' is used in the scope but not as a type (for instance, if we
122 # saw: int name;
123 # If 'name' is not a key in _scope_stack[n] then 'name' was not defined
124 # in this scope at all.
125 self._scope_stack = [dict()]
126
127 # Keeps track of the last token given to yacc (the lookahead token)
128 self._last_yielded_token = None
129
130 def parse(self, text, filename='', debug=False):
131 """ Parses C code and returns an AST.
132
133 text:
134 A string containing the C source code
135
136 filename:
137 Name of the file being parsed (for meaningful
138 error messages)
139
140 debug:
141 Debug flag to YACC
142 """
143 self.clex.filename = filename
144 self.clex.reset_lineno()
145 self._scope_stack = [dict()]
146 self._last_yielded_token = None
147 return self.cparser.parse(
148 input=text,
149 lexer=self.clex,
150 debug=debug)
151
152 ######################-- PRIVATE --######################
153
154 def _push_scope(self):
155 self._scope_stack.append(dict())
156
157 def _pop_scope(self):
158 assert len(self._scope_stack) > 1
159 self._scope_stack.pop()
160
161 def _add_typedef_name(self, name, coord):
162 """ Add a new typedef name (ie a TYPEID) to the current scope
163 """
164 if not self._scope_stack[-1].get(name, True):
165 self._parse_error(
166 "Typedef %r previously declared as non-typedef "
167 "in this scope" % name, coord)
168 self._scope_stack[-1][name] = True
169
170 def _add_identifier(self, name, coord):
171 """ Add a new object, function, or enum member name (ie an ID) to the
172 current scope
173 """
174 if self._scope_stack[-1].get(name, False):
175 self._parse_error(
176 "Non-typedef %r previously declared as typedef "
177 "in this scope" % name, coord)
178 self._scope_stack[-1][name] = False
179
180 def _is_type_in_scope(self, name):
181 """ Is *name* a typedef-name in the current scope?
182 """
183 for scope in reversed(self._scope_stack):
184 # If name is an identifier in this scope it shadows typedefs in
185 # higher scopes.
186 in_scope = scope.get(name)
187 if in_scope is not None: return in_scope
188 return False
189
190 def _lex_error_func(self, msg, line, column):
191 self._parse_error(msg, self._coord(line, column))
192
193 def _lex_on_lbrace_func(self):
194 self._push_scope()
195
196 def _lex_on_rbrace_func(self):
197 self._pop_scope()
198
199 def _lex_type_lookup_func(self, name):
200 """ Looks up types that were previously defined with
201 typedef.
202 Passed to the lexer for recognizing identifiers that
203 are types.
204 """
205 is_type = self._is_type_in_scope(name)
206 return is_type
207
208 def _get_yacc_lookahead_token(self):
209 """ We need access to yacc's lookahead token in certain cases.
210 This is the last token yacc requested from the lexer, so we
211 ask the lexer.
212 """
213 return self.clex.last_token
214
215 # To understand what's going on here, read sections A.8.5 and
216 # A.8.6 of K&R2 very carefully.
217 #
218 # A C type consists of a basic type declaration, with a list
219 # of modifiers. For example:
220 #
221 # int *c[5];
222 #
223 # The basic declaration here is 'int c', and the pointer and
224 # the array are the modifiers.
225 #
226 # Basic declarations are represented by TypeDecl (from module c_ast) and the
227 # modifiers are FuncDecl, PtrDecl and ArrayDecl.
228 #
229 # The standard states that whenever a new modifier is parsed, it should be
230 # added to the end of the list of modifiers. For example:
231 #
232 # K&R2 A.8.6.2: Array Declarators
233 #
234 # In a declaration T D where D has the form
235 # D1 [constant-expression-opt]
236 # and the type of the identifier in the declaration T D1 is
237 # "type-modifier T", the type of the
238 # identifier of D is "type-modifier array of T"
239 #
240 # This is what this method does. The declarator it receives
241 # can be a list of declarators ending with TypeDecl. It
242 # tacks the modifier to the end of this list, just before
243 # the TypeDecl.
244 #
245 # Additionally, the modifier may be a list itself. This is
246 # useful for pointers, that can come as a chain from the rule
247 # p_pointer. In this case, the whole modifier list is spliced
248 # into the new location.
249 def _type_modify_decl(self, decl, modifier):
250 """ Tacks a type modifier on a declarator, and returns
251 the modified declarator.
252
253 Note: the declarator and modifier may be modified
254 """
255 #~ print '****'
256 #~ decl.show(offset=3)
257 #~ modifier.show(offset=3)
258 #~ print '****'
259
260 modifier_head = modifier
261 modifier_tail = modifier
262
263 # The modifier may be a nested list. Reach its tail.
264 while modifier_tail.type:
265 modifier_tail = modifier_tail.type
266
267 # If the decl is a basic type, just tack the modifier onto it.
268 if isinstance(decl, c_ast.TypeDecl):
269 modifier_tail.type = decl
270 return modifier
271 else:
272 # Otherwise, the decl is a list of modifiers. Reach
273 # its tail and splice the modifier onto the tail,
274 # pointing to the underlying basic type.
275 decl_tail = decl
276
277 while not isinstance(decl_tail.type, c_ast.TypeDecl):
278 decl_tail = decl_tail.type
279
280 modifier_tail.type = decl_tail.type
281 decl_tail.type = modifier_head
282 return decl
283
284 # Due to the order in which declarators are constructed,
285 # they have to be fixed in order to look like a normal AST.
286 #
287 # When a declaration arrives from syntax construction, it has
288 # these problems:
289 # * The innermost TypeDecl has no type (because the basic
290 # type is only known at the uppermost declaration level)
291 # * The declaration has no variable name, since that is saved
292 # in the innermost TypeDecl
293 # * The typename of the declaration is a list of type
294 # specifiers, and not a node. Here, basic identifier types
295 # should be separated from more complex types like enums
296 # and structs.
297 #
298 # This method fixes these problems.
299 def _fix_decl_name_type(self, decl, typename):
300 """ Fixes a declaration. Modifies decl.
301 """
302 # Reach the underlying basic type
303 #
304 type = decl
305 while not isinstance(type, c_ast.TypeDecl):
306 type = type.type
307
308 decl.name = type.declname
309 type.quals = decl.quals[:]
310
311 # The typename is a list of types. If any type in this
312 # list isn't an IdentifierType, it must be the only
313 # type in the list (it's illegal to declare "int enum ..")
314 # If all the types are basic, they're collected in the
315 # IdentifierType holder.
316 for tn in typename:
317 if not isinstance(tn, c_ast.IdentifierType):
318 if len(typename) > 1:
319 self._parse_error(
320 "Invalid multiple types specified", tn.coord)
321 else:
322 type.type = tn
323 return decl
324
325 if not typename:
326 # Functions default to returning int
327 #
328 if not isinstance(decl.type, c_ast.FuncDecl):
329 self._parse_error(
330 "Missing type in declaration", decl.coord)
331 type.type = c_ast.IdentifierType(
332 ['int'],
333 coord=decl.coord)
334 else:
335 # At this point, we know that typename is a list of IdentifierType
336 # nodes. Concatenate all the names into a single list.
337 #
338 type.type = c_ast.IdentifierType(
339 [name for id in typename for name in id.names],
340 coord=typename[0].coord)
341 return decl
342
343 def _add_declaration_specifier(self, declspec, newspec, kind, append=False):
344 """ Declaration specifiers are represented by a dictionary
345 with the entries:
346 * qual: a list of type qualifiers
347 * storage: a list of storage type qualifiers
348 * type: a list of type specifiers
349 * function: a list of function specifiers
350 * alignment: a list of alignment specifiers
351
352 This method is given a declaration specifier, and a
353 new specifier of a given kind.
354 If `append` is True, the new specifier is added to the end of
355 the specifiers list, otherwise it's added at the beginning.
356 Returns the declaration specifier, with the new
357 specifier incorporated.
358 """
359 spec = declspec or dict(qual=[], storage=[], type=[], function=[], alignment=[])
360
361 if append:
362 spec[kind].append(newspec)
363 else:
364 spec[kind].insert(0, newspec)
365
366 return spec
367
368 def _build_declarations(self, spec, decls, typedef_namespace=False):
369 """ Builds a list of declarations all sharing the given specifiers.
370 If typedef_namespace is true, each declared name is added
371 to the "typedef namespace", which also includes objects,
372 functions, and enum constants.
373 """
374 is_typedef = 'typedef' in spec['storage']
375 declarations = []
376
377 # Bit-fields are allowed to be unnamed.
378 if decls[0].get('bitsize') is not None:
379 pass
380
381 # When redeclaring typedef names as identifiers in inner scopes, a
382 # problem can occur where the identifier gets grouped into
383 # spec['type'], leaving decl as None. This can only occur for the
384 # first declarator.
385 elif decls[0]['decl'] is None:
386 if len(spec['type']) < 2 or len(spec['type'][-1].names) != 1 or \
387 not self._is_type_in_scope(spec['type'][-1].names[0]):
388 coord = '?'
389 for t in spec['type']:
390 if hasattr(t, 'coord'):
391 coord = t.coord
392 break
393 self._parse_error('Invalid declaration', coord)
394
395 # Make this look as if it came from "direct_declarator:ID"
396 decls[0]['decl'] = c_ast.TypeDecl(
397 declname=spec['type'][-1].names[0],
398 type=None,
399 quals=None,
400 align=spec['alignment'],
401 coord=spec['type'][-1].coord)
402 # Remove the "new" type's name from the end of spec['type']
403 del spec['type'][-1]
404
405 # A similar problem can occur where the declaration ends up looking
406 # like an abstract declarator. Give it a name if this is the case.
407 elif not isinstance(decls[0]['decl'], (
408 c_ast.Enum, c_ast.Struct, c_ast.Union, c_ast.IdentifierType)):
409 decls_0_tail = decls[0]['decl']
410 while not isinstance(decls_0_tail, c_ast.TypeDecl):
411 decls_0_tail = decls_0_tail.type
412 if decls_0_tail.declname is None:
413 decls_0_tail.declname = spec['type'][-1].names[0]
414 del spec['type'][-1]
415
416 for decl in decls:
417 assert decl['decl'] is not None
418 if is_typedef:
419 declaration = c_ast.Typedef(
420 name=None,
421 quals=spec['qual'],
422 storage=spec['storage'],
423 type=decl['decl'],
424 coord=decl['decl'].coord)
425 else:
426 declaration = c_ast.Decl(
427 name=None,
428 quals=spec['qual'],
429 align=spec['alignment'],
430 storage=spec['storage'],
431 funcspec=spec['function'],
432 type=decl['decl'],
433 init=decl.get('init'),
434 bitsize=decl.get('bitsize'),
435 coord=decl['decl'].coord)
436
437 if isinstance(declaration.type, (
438 c_ast.Enum, c_ast.Struct, c_ast.Union,
439 c_ast.IdentifierType)):
440 fixed_decl = declaration
441 else:
442 fixed_decl = self._fix_decl_name_type(declaration, spec['type'])
443
444 # Add the type name defined by typedef to a
445 # symbol table (for usage in the lexer)
446 if typedef_namespace:
447 if is_typedef:
448 self._add_typedef_name(fixed_decl.name, fixed_decl.coord)
449 else:
450 self._add_identifier(fixed_decl.name, fixed_decl.coord)
451
452 fixed_decl = fix_atomic_specifiers(fixed_decl)
453 declarations.append(fixed_decl)
454
455 return declarations
456
457 def _build_function_definition(self, spec, decl, param_decls, body):
458 """ Builds a function definition.
459 """
460 if 'typedef' in spec['storage']:
461 self._parse_error("Invalid typedef", decl.coord)
462
463 declaration = self._build_declarations(
464 spec=spec,
465 decls=[dict(decl=decl, init=None)],
466 typedef_namespace=True)[0]
467
468 return c_ast.FuncDef(
469 decl=declaration,
470 param_decls=param_decls,
471 body=body,
472 coord=decl.coord)
473
474 def _select_struct_union_class(self, token):
475 """ Given a token (either STRUCT or UNION), selects the
476 appropriate AST class.
477 """
478 if token == 'struct':
479 return c_ast.Struct
480 else:
481 return c_ast.Union
482
483 ##
484 ## Precedence and associativity of operators
485 ##
486 # If this changes, c_generator.CGenerator.precedence_map needs to change as
487 # well
488 precedence = (
489 ('left', 'LOR'),
490 ('left', 'LAND'),
491 ('left', 'OR'),
492 ('left', 'XOR'),
493 ('left', 'AND'),
494 ('left', 'EQ', 'NE'),
495 ('left', 'GT', 'GE', 'LT', 'LE'),
496 ('left', 'RSHIFT', 'LSHIFT'),
497 ('left', 'PLUS', 'MINUS'),
498 ('left', 'TIMES', 'DIVIDE', 'MOD')
499 )
500
501 ##
502 ## Grammar productions
503 ## Implementation of the BNF defined in K&R2 A.13
504 ##
505
506 # Wrapper around a translation unit, to allow for empty input.
507 # Not strictly part of the C99 Grammar, but useful in practice.
508 def p_translation_unit_or_empty(self, p):
509 """ translation_unit_or_empty : translation_unit
510 | empty
511 """
512 if p[1] is None:
513 p[0] = c_ast.FileAST([])
514 else:
515 p[0] = c_ast.FileAST(p[1])
516
517 def p_translation_unit_1(self, p):
518 """ translation_unit : external_declaration
519 """
520 # Note: external_declaration is already a list
521 p[0] = p[1]
522
523 def p_translation_unit_2(self, p):
524 """ translation_unit : translation_unit external_declaration
525 """
526 p[1].extend(p[2])
527 p[0] = p[1]
528
529 # Declarations always come as lists (because they can be
530 # several in one line), so we wrap the function definition
531 # into a list as well, to make the return value of
532 # external_declaration homogeneous.
533 def p_external_declaration_1(self, p):
534 """ external_declaration : function_definition
535 """
536 p[0] = [p[1]]
537
538 def p_external_declaration_2(self, p):
539 """ external_declaration : declaration
540 """
541 p[0] = p[1]
542
543 def p_external_declaration_3(self, p):
544 """ external_declaration : pp_directive
545 | pppragma_directive
546 """
547 p[0] = [p[1]]
548
549 def p_external_declaration_4(self, p):
550 """ external_declaration : SEMI
551 """
552 p[0] = []
553
554 def p_external_declaration_5(self, p):
555 """ external_declaration : static_assert
556 """
557 p[0] = p[1]
558
559 def p_static_assert_declaration(self, p):
560 """ static_assert : _STATIC_ASSERT LPAREN constant_expression COMMA unified_string_literal RPAREN
561 | _STATIC_ASSERT LPAREN constant_expression RPAREN
562 """
563 if len(p) == 5:
564 p[0] = [c_ast.StaticAssert(p[3], None, self._token_coord(p, 1))]
565 else:
566 p[0] = [c_ast.StaticAssert(p[3], p[5], self._token_coord(p, 1))]
567
568 def p_pp_directive(self, p):
569 """ pp_directive : PPHASH
570 """
571 self._parse_error('Directives not supported yet',
572 self._token_coord(p, 1))
573
574 # This encompasses two types of C99-compatible pragmas:
575 # - The #pragma directive:
576 # # pragma character_sequence
577 # - The _Pragma unary operator:
578 # _Pragma ( " string_literal " )
579 def p_pppragma_directive(self, p):
580 """ pppragma_directive : PPPRAGMA
581 | PPPRAGMA PPPRAGMASTR
582 | _PRAGMA LPAREN unified_string_literal RPAREN
583 """
584 if len(p) == 5:
585 p[0] = c_ast.Pragma(p[3], self._token_coord(p, 2))
586 elif len(p) == 3:
587 p[0] = c_ast.Pragma(p[2], self._token_coord(p, 2))
588 else:
589 p[0] = c_ast.Pragma("", self._token_coord(p, 1))
590
591 def p_pppragma_directive_list(self, p):
592 """ pppragma_directive_list : pppragma_directive
593 | pppragma_directive_list pppragma_directive
594 """
595 p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]]
596
597 # In function definitions, the declarator can be followed by
598 # a declaration list, for old "K&R style" function definitios.
599 def p_function_definition_1(self, p):
600 """ function_definition : id_declarator declaration_list_opt compound_statement
601 """
602 # no declaration specifiers - 'int' becomes the default type
603 spec = dict(
604 qual=[],
605 alignment=[],
606 storage=[],
607 type=[c_ast.IdentifierType(['int'],
608 coord=self._token_coord(p, 1))],
609 function=[])
610
611 p[0] = self._build_function_definition(
612 spec=spec,
613 decl=p[1],
614 param_decls=p[2],
615 body=p[3])
616
617 def p_function_definition_2(self, p):
618 """ function_definition : declaration_specifiers id_declarator declaration_list_opt compound_statement
619 """
620 spec = p[1]
621
622 p[0] = self._build_function_definition(
623 spec=spec,
624 decl=p[2],
625 param_decls=p[3],
626 body=p[4])
627
628 # Note, according to C18 A.2.2 6.7.10 static_assert-declaration _Static_assert
629 # is a declaration, not a statement. We additionally recognise it as a statement
630 # to fix parsing of _Static_assert inside the functions.
631 #
632 def p_statement(self, p):
633 """ statement : labeled_statement
634 | expression_statement
635 | compound_statement
636 | selection_statement
637 | iteration_statement
638 | jump_statement
639 | pppragma_directive
640 | static_assert
641 """
642 p[0] = p[1]
643
644 # A pragma is generally considered a decorator rather than an actual
645 # statement. Still, for the purposes of analyzing an abstract syntax tree of
646 # C code, pragma's should not be ignored and were previously treated as a
647 # statement. This presents a problem for constructs that take a statement
648 # such as labeled_statements, selection_statements, and
649 # iteration_statements, causing a misleading structure in the AST. For
650 # example, consider the following C code.
651 #
652 # for (int i = 0; i < 3; i++)
653 # #pragma omp critical
654 # sum += 1;
655 #
656 # This code will compile and execute "sum += 1;" as the body of the for
657 # loop. Previous implementations of PyCParser would render the AST for this
658 # block of code as follows:
659 #
660 # For:
661 # DeclList:
662 # Decl: i, [], [], []
663 # TypeDecl: i, []
664 # IdentifierType: ['int']
665 # Constant: int, 0
666 # BinaryOp: <
667 # ID: i
668 # Constant: int, 3
669 # UnaryOp: p++
670 # ID: i
671 # Pragma: omp critical
672 # Assignment: +=
673 # ID: sum
674 # Constant: int, 1
675 #
676 # This AST misleadingly takes the Pragma as the body of the loop and the
677 # assignment then becomes a sibling of the loop.
678 #
679 # To solve edge cases like these, the pragmacomp_or_statement rule groups
680 # a pragma and its following statement (which would otherwise be orphaned)
681 # using a compound block, effectively turning the above code into:
682 #
683 # for (int i = 0; i < 3; i++) {
684 # #pragma omp critical
685 # sum += 1;
686 # }
687 def p_pragmacomp_or_statement(self, p):
688 """ pragmacomp_or_statement : pppragma_directive_list statement
689 | statement
690 """
691 if len(p) == 3:
692 p[0] = c_ast.Compound(
693 block_items=p[1]+[p[2]],
694 coord=self._token_coord(p, 1))
695 else:
696 p[0] = p[1]
697
698 # In C, declarations can come several in a line:
699 # int x, *px, romulo = 5;
700 #
701 # However, for the AST, we will split them to separate Decl
702 # nodes.
703 #
704 # This rule splits its declarations and always returns a list
705 # of Decl nodes, even if it's one element long.
706 #
707 def p_decl_body(self, p):
708 """ decl_body : declaration_specifiers init_declarator_list_opt
709 | declaration_specifiers_no_type id_init_declarator_list_opt
710 """
711 spec = p[1]
712
713 # p[2] (init_declarator_list_opt) is either a list or None
714 #
715 if p[2] is None:
716 # By the standard, you must have at least one declarator unless
717 # declaring a structure tag, a union tag, or the members of an
718 # enumeration.
719 #
720 ty = spec['type']
721 s_u_or_e = (c_ast.Struct, c_ast.Union, c_ast.Enum)
722 if len(ty) == 1 and isinstance(ty[0], s_u_or_e):
723 decls = [c_ast.Decl(
724 name=None,
725 quals=spec['qual'],
726 align=spec['alignment'],
727 storage=spec['storage'],
728 funcspec=spec['function'],
729 type=ty[0],
730 init=None,
731 bitsize=None,
732 coord=ty[0].coord)]
733
734 # However, this case can also occur on redeclared identifiers in
735 # an inner scope. The trouble is that the redeclared type's name
736 # gets grouped into declaration_specifiers; _build_declarations
737 # compensates for this.
738 #
739 else:
740 decls = self._build_declarations(
741 spec=spec,
742 decls=[dict(decl=None, init=None)],
743 typedef_namespace=True)
744
745 else:
746 decls = self._build_declarations(
747 spec=spec,
748 decls=p[2],
749 typedef_namespace=True)
750
751 p[0] = decls
752
753 # The declaration has been split to a decl_body sub-rule and
754 # SEMI, because having them in a single rule created a problem
755 # for defining typedefs.
756 #
757 # If a typedef line was directly followed by a line using the
758 # type defined with the typedef, the type would not be
759 # recognized. This is because to reduce the declaration rule,
760 # the parser's lookahead asked for the token after SEMI, which
761 # was the type from the next line, and the lexer had no chance
762 # to see the updated type symbol table.
763 #
764 # Splitting solves this problem, because after seeing SEMI,
765 # the parser reduces decl_body, which actually adds the new
766 # type into the table to be seen by the lexer before the next
767 # line is reached.
768 def p_declaration(self, p):
769 """ declaration : decl_body SEMI
770 """
771 p[0] = p[1]
772
773 # Since each declaration is a list of declarations, this
774 # rule will combine all the declarations and return a single
775 # list
776 #
777 def p_declaration_list(self, p):
778 """ declaration_list : declaration
779 | declaration_list declaration
780 """
781 p[0] = p[1] if len(p) == 2 else p[1] + p[2]
782
783 # To know when declaration-specifiers end and declarators begin,
784 # we require declaration-specifiers to have at least one
785 # type-specifier, and disallow typedef-names after we've seen any
786 # type-specifier. These are both required by the spec.
787 #
788 def p_declaration_specifiers_no_type_1(self, p):
789 """ declaration_specifiers_no_type : type_qualifier declaration_specifiers_no_type_opt
790 """
791 p[0] = self._add_declaration_specifier(p[2], p[1], 'qual')
792
793 def p_declaration_specifiers_no_type_2(self, p):
794 """ declaration_specifiers_no_type : storage_class_specifier declaration_specifiers_no_type_opt
795 """
796 p[0] = self._add_declaration_specifier(p[2], p[1], 'storage')
797
798 def p_declaration_specifiers_no_type_3(self, p):
799 """ declaration_specifiers_no_type : function_specifier declaration_specifiers_no_type_opt
800 """
801 p[0] = self._add_declaration_specifier(p[2], p[1], 'function')
802
803 # Without this, `typedef _Atomic(T) U` will parse incorrectly because the
804 # _Atomic qualifier will match, instead of the specifier.
805 def p_declaration_specifiers_no_type_4(self, p):
806 """ declaration_specifiers_no_type : atomic_specifier declaration_specifiers_no_type_opt
807 """
808 p[0] = self._add_declaration_specifier(p[2], p[1], 'type')
809
810 def p_declaration_specifiers_no_type_5(self, p):
811 """ declaration_specifiers_no_type : alignment_specifier declaration_specifiers_no_type_opt
812 """
813 p[0] = self._add_declaration_specifier(p[2], p[1], 'alignment')
814
815 def p_declaration_specifiers_1(self, p):
816 """ declaration_specifiers : declaration_specifiers type_qualifier
817 """
818 p[0] = self._add_declaration_specifier(p[1], p[2], 'qual', append=True)
819
820 def p_declaration_specifiers_2(self, p):
821 """ declaration_specifiers : declaration_specifiers storage_class_specifier
822 """
823 p[0] = self._add_declaration_specifier(p[1], p[2], 'storage', append=True)
824
825 def p_declaration_specifiers_3(self, p):
826 """ declaration_specifiers : declaration_specifiers function_specifier
827 """
828 p[0] = self._add_declaration_specifier(p[1], p[2], 'function', append=True)
829
830 def p_declaration_specifiers_4(self, p):
831 """ declaration_specifiers : declaration_specifiers type_specifier_no_typeid
832 """
833 p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True)
834
835 def p_declaration_specifiers_5(self, p):
836 """ declaration_specifiers : type_specifier
837 """
838 p[0] = self._add_declaration_specifier(None, p[1], 'type')
839
840 def p_declaration_specifiers_6(self, p):
841 """ declaration_specifiers : declaration_specifiers_no_type type_specifier
842 """
843 p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True)
844
845 def p_declaration_specifiers_7(self, p):
846 """ declaration_specifiers : declaration_specifiers alignment_specifier
847 """
848 p[0] = self._add_declaration_specifier(p[1], p[2], 'alignment', append=True)
849
850 def p_storage_class_specifier(self, p):
851 """ storage_class_specifier : AUTO
852 | REGISTER
853 | STATIC
854 | EXTERN
855 | TYPEDEF
856 | _THREAD_LOCAL
857 """
858 p[0] = p[1]
859
860 def p_function_specifier(self, p):
861 """ function_specifier : INLINE
862 | _NORETURN
863 """
864 p[0] = p[1]
865
866 def p_type_specifier_no_typeid(self, p):
867 """ type_specifier_no_typeid : VOID
868 | _BOOL
869 | CHAR
870 | SHORT
871 | INT
872 | LONG
873 | FLOAT
874 | DOUBLE
875 | _COMPLEX
876 | SIGNED
877 | UNSIGNED
878 | __INT128
879 """
880 p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1))
881
882 def p_type_specifier(self, p):
883 """ type_specifier : typedef_name
884 | enum_specifier
885 | struct_or_union_specifier
886 | type_specifier_no_typeid
887 | atomic_specifier
888 """
889 p[0] = p[1]
890
891 # See section 6.7.2.4 of the C11 standard.
892 def p_atomic_specifier(self, p):
893 """ atomic_specifier : _ATOMIC LPAREN type_name RPAREN
894 """
895 typ = p[3]
896 typ.quals.append('_Atomic')
897 p[0] = typ
898
899 def p_type_qualifier(self, p):
900 """ type_qualifier : CONST
901 | RESTRICT
902 | VOLATILE
903 | _ATOMIC
904 """
905 p[0] = p[1]
906
907 def p_init_declarator_list(self, p):
908 """ init_declarator_list : init_declarator
909 | init_declarator_list COMMA init_declarator
910 """
911 p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
912
913 # Returns a {decl=<declarator> : init=<initializer>} dictionary
914 # If there's no initializer, uses None
915 #
916 def p_init_declarator(self, p):
917 """ init_declarator : declarator
918 | declarator EQUALS initializer
919 """
920 p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None))
921
922 def p_id_init_declarator_list(self, p):
923 """ id_init_declarator_list : id_init_declarator
924 | id_init_declarator_list COMMA init_declarator
925 """
926 p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
927
928 def p_id_init_declarator(self, p):
929 """ id_init_declarator : id_declarator
930 | id_declarator EQUALS initializer
931 """
932 p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None))
933
934 # Require at least one type specifier in a specifier-qualifier-list
935 #
936 def p_specifier_qualifier_list_1(self, p):
937 """ specifier_qualifier_list : specifier_qualifier_list type_specifier_no_typeid
938 """
939 p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True)
940
941 def p_specifier_qualifier_list_2(self, p):
942 """ specifier_qualifier_list : specifier_qualifier_list type_qualifier
943 """
944 p[0] = self._add_declaration_specifier(p[1], p[2], 'qual', append=True)
945
946 def p_specifier_qualifier_list_3(self, p):
947 """ specifier_qualifier_list : type_specifier
948 """
949 p[0] = self._add_declaration_specifier(None, p[1], 'type')
950
951 def p_specifier_qualifier_list_4(self, p):
952 """ specifier_qualifier_list : type_qualifier_list type_specifier
953 """
954 p[0] = dict(qual=p[1], alignment=[], storage=[], type=[p[2]], function=[])
955
956 def p_specifier_qualifier_list_5(self, p):
957 """ specifier_qualifier_list : alignment_specifier
958 """
959 p[0] = dict(qual=[], alignment=[p[1]], storage=[], type=[], function=[])
960
961 def p_specifier_qualifier_list_6(self, p):
962 """ specifier_qualifier_list : specifier_qualifier_list alignment_specifier
963 """
964 p[0] = self._add_declaration_specifier(p[1], p[2], 'alignment')
965
966 # TYPEID is allowed here (and in other struct/enum related tag names), because
967 # struct/enum tags reside in their own namespace and can be named the same as types
968 #
969 def p_struct_or_union_specifier_1(self, p):
970 """ struct_or_union_specifier : struct_or_union ID
971 | struct_or_union TYPEID
972 """
973 klass = self._select_struct_union_class(p[1])
974 # None means no list of members
975 p[0] = klass(
976 name=p[2],
977 decls=None,
978 coord=self._token_coord(p, 2))
979
980 def p_struct_or_union_specifier_2(self, p):
981 """ struct_or_union_specifier : struct_or_union brace_open struct_declaration_list brace_close
982 | struct_or_union brace_open brace_close
983 """
984 klass = self._select_struct_union_class(p[1])
985 if len(p) == 4:
986 # Empty sequence means an empty list of members
987 p[0] = klass(
988 name=None,
989 decls=[],
990 coord=self._token_coord(p, 2))
991 else:
992 p[0] = klass(
993 name=None,
994 decls=p[3],
995 coord=self._token_coord(p, 2))
996
997
998 def p_struct_or_union_specifier_3(self, p):
999 """ struct_or_union_specifier : struct_or_union ID brace_open struct_declaration_list brace_close
1000 | struct_or_union ID brace_open brace_close
1001 | struct_or_union TYPEID brace_open struct_declaration_list brace_close
1002 | struct_or_union TYPEID brace_open brace_close
1003 """
1004 klass = self._select_struct_union_class(p[1])
1005 if len(p) == 5:
1006 # Empty sequence means an empty list of members
1007 p[0] = klass(
1008 name=p[2],
1009 decls=[],
1010 coord=self._token_coord(p, 2))
1011 else:
1012 p[0] = klass(
1013 name=p[2],
1014 decls=p[4],
1015 coord=self._token_coord(p, 2))
1016
1017 def p_struct_or_union(self, p):
1018 """ struct_or_union : STRUCT
1019 | UNION
1020 """
1021 p[0] = p[1]
1022
1023 # Combine all declarations into a single list
1024 #
1025 def p_struct_declaration_list(self, p):
1026 """ struct_declaration_list : struct_declaration
1027 | struct_declaration_list struct_declaration
1028 """
1029 if len(p) == 2:
1030 p[0] = p[1] or []
1031 else:
1032 p[0] = p[1] + (p[2] or [])
1033
1034 def p_struct_declaration_1(self, p):
1035 """ struct_declaration : specifier_qualifier_list struct_declarator_list_opt SEMI
1036 """
1037 spec = p[1]
1038 assert 'typedef' not in spec['storage']
1039
1040 if p[2] is not None:
1041 decls = self._build_declarations(
1042 spec=spec,
1043 decls=p[2])
1044
1045 elif len(spec['type']) == 1:
1046 # Anonymous struct/union, gcc extension, C1x feature.
1047 # Although the standard only allows structs/unions here, I see no
1048 # reason to disallow other types since some compilers have typedefs
1049 # here, and pycparser isn't about rejecting all invalid code.
1050 #
1051 node = spec['type'][0]
1052 if isinstance(node, c_ast.Node):
1053 decl_type = node
1054 else:
1055 decl_type = c_ast.IdentifierType(node)
1056
1057 decls = self._build_declarations(
1058 spec=spec,
1059 decls=[dict(decl=decl_type)])
1060
1061 else:
1062 # Structure/union members can have the same names as typedefs.
1063 # The trouble is that the member's name gets grouped into
1064 # specifier_qualifier_list; _build_declarations compensates.
1065 #
1066 decls = self._build_declarations(
1067 spec=spec,
1068 decls=[dict(decl=None, init=None)])
1069
1070 p[0] = decls
1071
1072 def p_struct_declaration_2(self, p):
1073 """ struct_declaration : SEMI
1074 """
1075 p[0] = None
1076
1077 def p_struct_declaration_3(self, p):
1078 """ struct_declaration : pppragma_directive
1079 """
1080 p[0] = [p[1]]
1081
1082 def p_struct_declarator_list(self, p):
1083 """ struct_declarator_list : struct_declarator
1084 | struct_declarator_list COMMA struct_declarator
1085 """
1086 p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
1087
1088 # struct_declarator passes up a dict with the keys: decl (for
1089 # the underlying declarator) and bitsize (for the bitsize)
1090 #
1091 def p_struct_declarator_1(self, p):
1092 """ struct_declarator : declarator
1093 """
1094 p[0] = {'decl': p[1], 'bitsize': None}
1095
1096 def p_struct_declarator_2(self, p):
1097 """ struct_declarator : declarator COLON constant_expression
1098 | COLON constant_expression
1099 """
1100 if len(p) > 3:
1101 p[0] = {'decl': p[1], 'bitsize': p[3]}
1102 else:
1103 p[0] = {'decl': c_ast.TypeDecl(None, None, None, None), 'bitsize': p[2]}
1104
1105 def p_enum_specifier_1(self, p):
1106 """ enum_specifier : ENUM ID
1107 | ENUM TYPEID
1108 """
1109 p[0] = c_ast.Enum(p[2], None, self._token_coord(p, 1))
1110
1111 def p_enum_specifier_2(self, p):
1112 """ enum_specifier : ENUM brace_open enumerator_list brace_close
1113 """
1114 p[0] = c_ast.Enum(None, p[3], self._token_coord(p, 1))
1115
1116 def p_enum_specifier_3(self, p):
1117 """ enum_specifier : ENUM ID brace_open enumerator_list brace_close
1118 | ENUM TYPEID brace_open enumerator_list brace_close
1119 """
1120 p[0] = c_ast.Enum(p[2], p[4], self._token_coord(p, 1))
1121
1122 def p_enumerator_list(self, p):
1123 """ enumerator_list : enumerator
1124 | enumerator_list COMMA
1125 | enumerator_list COMMA enumerator
1126 """
1127 if len(p) == 2:
1128 p[0] = c_ast.EnumeratorList([p[1]], p[1].coord)
1129 elif len(p) == 3:
1130 p[0] = p[1]
1131 else:
1132 p[1].enumerators.append(p[3])
1133 p[0] = p[1]
1134
1135 def p_alignment_specifier(self, p):
1136 """ alignment_specifier : _ALIGNAS LPAREN type_name RPAREN
1137 | _ALIGNAS LPAREN constant_expression RPAREN
1138 """
1139 p[0] = c_ast.Alignas(p[3], self._token_coord(p, 1))
1140
1141 def p_enumerator(self, p):
1142 """ enumerator : ID
1143 | ID EQUALS constant_expression
1144 """
1145 if len(p) == 2:
1146 enumerator = c_ast.Enumerator(
1147 p[1], None,
1148 self._token_coord(p, 1))
1149 else:
1150 enumerator = c_ast.Enumerator(
1151 p[1], p[3],
1152 self._token_coord(p, 1))
1153 self._add_identifier(enumerator.name, enumerator.coord)
1154
1155 p[0] = enumerator
1156
1157 def p_declarator(self, p):
1158 """ declarator : id_declarator
1159 | typeid_declarator
1160 """
1161 p[0] = p[1]
1162
1163 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
1164 def p_xxx_declarator_1(self, p):
1165 """ xxx_declarator : direct_xxx_declarator
1166 """
1167 p[0] = p[1]
1168
1169 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
1170 def p_xxx_declarator_2(self, p):
1171 """ xxx_declarator : pointer direct_xxx_declarator
1172 """
1173 p[0] = self._type_modify_decl(p[2], p[1])
1174
1175 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
1176 def p_direct_xxx_declarator_1(self, p):
1177 """ direct_xxx_declarator : yyy
1178 """
1179 p[0] = c_ast.TypeDecl(
1180 declname=p[1],
1181 type=None,
1182 quals=None,
1183 align=None,
1184 coord=self._token_coord(p, 1))
1185
1186 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'))
1187 def p_direct_xxx_declarator_2(self, p):
1188 """ direct_xxx_declarator : LPAREN xxx_declarator RPAREN
1189 """
1190 p[0] = p[2]
1191
1192 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
1193 def p_direct_xxx_declarator_3(self, p):
1194 """ direct_xxx_declarator : direct_xxx_declarator LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET
1195 """
1196 quals = (p[3] if len(p) > 5 else []) or []
1197 # Accept dimension qualifiers
1198 # Per C99 6.7.5.3 p7
1199 arr = c_ast.ArrayDecl(
1200 type=None,
1201 dim=p[4] if len(p) > 5 else p[3],
1202 dim_quals=quals,
1203 coord=p[1].coord)
1204
1205 p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
1206
1207 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
1208 def p_direct_xxx_declarator_4(self, p):
1209 """ direct_xxx_declarator : direct_xxx_declarator LBRACKET STATIC type_qualifier_list_opt assignment_expression RBRACKET
1210 | direct_xxx_declarator LBRACKET type_qualifier_list STATIC assignment_expression RBRACKET
1211 """
1212 # Using slice notation for PLY objects doesn't work in Python 3 for the
1213 # version of PLY embedded with pycparser; see PLY Google Code issue 30.
1214 # Work around that here by listing the two elements separately.
1215 listed_quals = [item if isinstance(item, list) else [item]
1216 for item in [p[3],p[4]]]
1217 dim_quals = [qual for sublist in listed_quals for qual in sublist
1218 if qual is not None]
1219 arr = c_ast.ArrayDecl(
1220 type=None,
1221 dim=p[5],
1222 dim_quals=dim_quals,
1223 coord=p[1].coord)
1224
1225 p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
1226
1227 # Special for VLAs
1228 #
1229 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
1230 def p_direct_xxx_declarator_5(self, p):
1231 """ direct_xxx_declarator : direct_xxx_declarator LBRACKET type_qualifier_list_opt TIMES RBRACKET
1232 """
1233 arr = c_ast.ArrayDecl(
1234 type=None,
1235 dim=c_ast.ID(p[4], self._token_coord(p, 4)),
1236 dim_quals=p[3] if p[3] is not None else [],
1237 coord=p[1].coord)
1238
1239 p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
1240
1241 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
1242 def p_direct_xxx_declarator_6(self, p):
1243 """ direct_xxx_declarator : direct_xxx_declarator LPAREN parameter_type_list RPAREN
1244 | direct_xxx_declarator LPAREN identifier_list_opt RPAREN
1245 """
1246 func = c_ast.FuncDecl(
1247 args=p[3],
1248 type=None,
1249 coord=p[1].coord)
1250
1251 # To see why _get_yacc_lookahead_token is needed, consider:
1252 # typedef char TT;
1253 # void foo(int TT) { TT = 10; }
1254 # Outside the function, TT is a typedef, but inside (starting and
1255 # ending with the braces) it's a parameter. The trouble begins with
1256 # yacc's lookahead token. We don't know if we're declaring or
1257 # defining a function until we see LBRACE, but if we wait for yacc to
1258 # trigger a rule on that token, then TT will have already been read
1259 # and incorrectly interpreted as TYPEID. We need to add the
1260 # parameters to the scope the moment the lexer sees LBRACE.
1261 #
1262 if self._get_yacc_lookahead_token().type == "LBRACE":
1263 if func.args is not None:
1264 for param in func.args.params:
1265 if isinstance(param, c_ast.EllipsisParam): break
1266 self._add_identifier(param.name, param.coord)
1267
1268 p[0] = self._type_modify_decl(decl=p[1], modifier=func)
1269
1270 def p_pointer(self, p):
1271 """ pointer : TIMES type_qualifier_list_opt
1272 | TIMES type_qualifier_list_opt pointer
1273 """
1274 coord = self._token_coord(p, 1)
1275 # Pointer decls nest from inside out. This is important when different
1276 # levels have different qualifiers. For example:
1277 #
1278 # char * const * p;
1279 #
1280 # Means "pointer to const pointer to char"
1281 #
1282 # While:
1283 #
1284 # char ** const p;
1285 #
1286 # Means "const pointer to pointer to char"
1287 #
1288 # So when we construct PtrDecl nestings, the leftmost pointer goes in
1289 # as the most nested type.
1290 nested_type = c_ast.PtrDecl(quals=p[2] or [], type=None, coord=coord)
1291 if len(p) > 3:
1292 tail_type = p[3]
1293 while tail_type.type is not None:
1294 tail_type = tail_type.type
1295 tail_type.type = nested_type
1296 p[0] = p[3]
1297 else:
1298 p[0] = nested_type
1299
1300 def p_type_qualifier_list(self, p):
1301 """ type_qualifier_list : type_qualifier
1302 | type_qualifier_list type_qualifier
1303 """
1304 p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]]
1305
1306 def p_parameter_type_list(self, p):
1307 """ parameter_type_list : parameter_list
1308 | parameter_list COMMA ELLIPSIS
1309 """
1310 if len(p) > 2:
1311 p[1].params.append(c_ast.EllipsisParam(self._token_coord(p, 3)))
1312
1313 p[0] = p[1]
1314
1315 def p_parameter_list(self, p):
1316 """ parameter_list : parameter_declaration
1317 | parameter_list COMMA parameter_declaration
1318 """
1319 if len(p) == 2: # single parameter
1320 p[0] = c_ast.ParamList([p[1]], p[1].coord)
1321 else:
1322 p[1].params.append(p[3])
1323 p[0] = p[1]
1324
1325 # From ISO/IEC 9899:TC2, 6.7.5.3.11:
1326 # "If, in a parameter declaration, an identifier can be treated either
1327 # as a typedef name or as a parameter name, it shall be taken as a
1328 # typedef name."
1329 #
1330 # Inside a parameter declaration, once we've reduced declaration specifiers,
1331 # if we shift in an LPAREN and see a TYPEID, it could be either an abstract
1332 # declarator or a declarator nested inside parens. This rule tells us to
1333 # always treat it as an abstract declarator. Therefore, we only accept
1334 # `id_declarator`s and `typeid_noparen_declarator`s.
1335 def p_parameter_declaration_1(self, p):
1336 """ parameter_declaration : declaration_specifiers id_declarator
1337 | declaration_specifiers typeid_noparen_declarator
1338 """
1339 spec = p[1]
1340 if not spec['type']:
1341 spec['type'] = [c_ast.IdentifierType(['int'],
1342 coord=self._token_coord(p, 1))]
1343 p[0] = self._build_declarations(
1344 spec=spec,
1345 decls=[dict(decl=p[2])])[0]
1346
1347 def p_parameter_declaration_2(self, p):
1348 """ parameter_declaration : declaration_specifiers abstract_declarator_opt
1349 """
1350 spec = p[1]
1351 if not spec['type']:
1352 spec['type'] = [c_ast.IdentifierType(['int'],
1353 coord=self._token_coord(p, 1))]
1354
1355 # Parameters can have the same names as typedefs. The trouble is that
1356 # the parameter's name gets grouped into declaration_specifiers, making
1357 # it look like an old-style declaration; compensate.
1358 #
1359 if len(spec['type']) > 1 and len(spec['type'][-1].names) == 1 and \
1360 self._is_type_in_scope(spec['type'][-1].names[0]):
1361 decl = self._build_declarations(
1362 spec=spec,
1363 decls=[dict(decl=p[2], init=None)])[0]
1364
1365 # This truly is an old-style parameter declaration
1366 #
1367 else:
1368 decl = c_ast.Typename(
1369 name='',
1370 quals=spec['qual'],
1371 align=None,
1372 type=p[2] or c_ast.TypeDecl(None, None, None, None),
1373 coord=self._token_coord(p, 2))
1374 typename = spec['type']
1375 decl = self._fix_decl_name_type(decl, typename)
1376
1377 p[0] = decl
1378
1379 def p_identifier_list(self, p):
1380 """ identifier_list : identifier
1381 | identifier_list COMMA identifier
1382 """
1383 if len(p) == 2: # single parameter
1384 p[0] = c_ast.ParamList([p[1]], p[1].coord)
1385 else:
1386 p[1].params.append(p[3])
1387 p[0] = p[1]
1388
1389 def p_initializer_1(self, p):
1390 """ initializer : assignment_expression
1391 """
1392 p[0] = p[1]
1393
1394 def p_initializer_2(self, p):
1395 """ initializer : brace_open initializer_list_opt brace_close
1396 | brace_open initializer_list COMMA brace_close
1397 """
1398 if p[2] is None:
1399 p[0] = c_ast.InitList([], self._token_coord(p, 1))
1400 else:
1401 p[0] = p[2]
1402
1403 def p_initializer_list(self, p):
1404 """ initializer_list : designation_opt initializer
1405 | initializer_list COMMA designation_opt initializer
1406 """
1407 if len(p) == 3: # single initializer
1408 init = p[2] if p[1] is None else c_ast.NamedInitializer(p[1], p[2])
1409 p[0] = c_ast.InitList([init], p[2].coord)
1410 else:
1411 init = p[4] if p[3] is None else c_ast.NamedInitializer(p[3], p[4])
1412 p[1].exprs.append(init)
1413 p[0] = p[1]
1414
1415 def p_designation(self, p):
1416 """ designation : designator_list EQUALS
1417 """
1418 p[0] = p[1]
1419
1420 # Designators are represented as a list of nodes, in the order in which
1421 # they're written in the code.
1422 #
1423 def p_designator_list(self, p):
1424 """ designator_list : designator
1425 | designator_list designator
1426 """
1427 p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]]
1428
1429 def p_designator(self, p):
1430 """ designator : LBRACKET constant_expression RBRACKET
1431 | PERIOD identifier
1432 """
1433 p[0] = p[2]
1434
1435 def p_type_name(self, p):
1436 """ type_name : specifier_qualifier_list abstract_declarator_opt
1437 """
1438 typename = c_ast.Typename(
1439 name='',
1440 quals=p[1]['qual'][:],
1441 align=None,
1442 type=p[2] or c_ast.TypeDecl(None, None, None, None),
1443 coord=self._token_coord(p, 2))
1444
1445 p[0] = self._fix_decl_name_type(typename, p[1]['type'])
1446
1447 def p_abstract_declarator_1(self, p):
1448 """ abstract_declarator : pointer
1449 """
1450 dummytype = c_ast.TypeDecl(None, None, None, None)
1451 p[0] = self._type_modify_decl(
1452 decl=dummytype,
1453 modifier=p[1])
1454
1455 def p_abstract_declarator_2(self, p):
1456 """ abstract_declarator : pointer direct_abstract_declarator
1457 """
1458 p[0] = self._type_modify_decl(p[2], p[1])
1459
1460 def p_abstract_declarator_3(self, p):
1461 """ abstract_declarator : direct_abstract_declarator
1462 """
1463 p[0] = p[1]
1464
1465 # Creating and using direct_abstract_declarator_opt here
1466 # instead of listing both direct_abstract_declarator and the
1467 # lack of it in the beginning of _1 and _2 caused two
1468 # shift/reduce errors.
1469 #
1470 def p_direct_abstract_declarator_1(self, p):
1471 """ direct_abstract_declarator : LPAREN abstract_declarator RPAREN """
1472 p[0] = p[2]
1473
1474 def p_direct_abstract_declarator_2(self, p):
1475 """ direct_abstract_declarator : direct_abstract_declarator LBRACKET assignment_expression_opt RBRACKET
1476 """
1477 arr = c_ast.ArrayDecl(
1478 type=None,
1479 dim=p[3],
1480 dim_quals=[],
1481 coord=p[1].coord)
1482
1483 p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
1484
1485 def p_direct_abstract_declarator_3(self, p):
1486 """ direct_abstract_declarator : LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET
1487 """
1488 quals = (p[2] if len(p) > 4 else []) or []
1489 p[0] = c_ast.ArrayDecl(
1490 type=c_ast.TypeDecl(None, None, None, None),
1491 dim=p[3] if len(p) > 4 else p[2],
1492 dim_quals=quals,
1493 coord=self._token_coord(p, 1))
1494
1495 def p_direct_abstract_declarator_4(self, p):
1496 """ direct_abstract_declarator : direct_abstract_declarator LBRACKET TIMES RBRACKET
1497 """
1498 arr = c_ast.ArrayDecl(
1499 type=None,
1500 dim=c_ast.ID(p[3], self._token_coord(p, 3)),
1501 dim_quals=[],
1502 coord=p[1].coord)
1503
1504 p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
1505
1506 def p_direct_abstract_declarator_5(self, p):
1507 """ direct_abstract_declarator : LBRACKET TIMES RBRACKET
1508 """
1509 p[0] = c_ast.ArrayDecl(
1510 type=c_ast.TypeDecl(None, None, None, None),
1511 dim=c_ast.ID(p[3], self._token_coord(p, 3)),
1512 dim_quals=[],
1513 coord=self._token_coord(p, 1))
1514
1515 def p_direct_abstract_declarator_6(self, p):
1516 """ direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN
1517 """
1518 func = c_ast.FuncDecl(
1519 args=p[3],
1520 type=None,
1521 coord=p[1].coord)
1522
1523 p[0] = self._type_modify_decl(decl=p[1], modifier=func)
1524
1525 def p_direct_abstract_declarator_7(self, p):
1526 """ direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN
1527 """
1528 p[0] = c_ast.FuncDecl(
1529 args=p[2],
1530 type=c_ast.TypeDecl(None, None, None, None),
1531 coord=self._token_coord(p, 1))
1532
1533 # declaration is a list, statement isn't. To make it consistent, block_item
1534 # will always be a list
1535 #
1536 def p_block_item(self, p):
1537 """ block_item : declaration
1538 | statement
1539 """
1540 p[0] = p[1] if isinstance(p[1], list) else [p[1]]
1541
1542 # Since we made block_item a list, this just combines lists
1543 #
1544 def p_block_item_list(self, p):
1545 """ block_item_list : block_item
1546 | block_item_list block_item
1547 """
1548 # Empty block items (plain ';') produce [None], so ignore them
1549 p[0] = p[1] if (len(p) == 2 or p[2] == [None]) else p[1] + p[2]
1550
1551 def p_compound_statement_1(self, p):
1552 """ compound_statement : brace_open block_item_list_opt brace_close """
1553 p[0] = c_ast.Compound(
1554 block_items=p[2],
1555 coord=self._token_coord(p, 1))
1556
1557 def p_labeled_statement_1(self, p):
1558 """ labeled_statement : ID COLON pragmacomp_or_statement """
1559 p[0] = c_ast.Label(p[1], p[3], self._token_coord(p, 1))
1560
1561 def p_labeled_statement_2(self, p):
1562 """ labeled_statement : CASE constant_expression COLON pragmacomp_or_statement """
1563 p[0] = c_ast.Case(p[2], [p[4]], self._token_coord(p, 1))
1564
1565 def p_labeled_statement_3(self, p):
1566 """ labeled_statement : DEFAULT COLON pragmacomp_or_statement """
1567 p[0] = c_ast.Default([p[3]], self._token_coord(p, 1))
1568
1569 def p_selection_statement_1(self, p):
1570 """ selection_statement : IF LPAREN expression RPAREN pragmacomp_or_statement """
1571 p[0] = c_ast.If(p[3], p[5], None, self._token_coord(p, 1))
1572
1573 def p_selection_statement_2(self, p):
1574 """ selection_statement : IF LPAREN expression RPAREN statement ELSE pragmacomp_or_statement """
1575 p[0] = c_ast.If(p[3], p[5], p[7], self._token_coord(p, 1))
1576
1577 def p_selection_statement_3(self, p):
1578 """ selection_statement : SWITCH LPAREN expression RPAREN pragmacomp_or_statement """
1579 p[0] = fix_switch_cases(
1580 c_ast.Switch(p[3], p[5], self._token_coord(p, 1)))
1581
1582 def p_iteration_statement_1(self, p):
1583 """ iteration_statement : WHILE LPAREN expression RPAREN pragmacomp_or_statement """
1584 p[0] = c_ast.While(p[3], p[5], self._token_coord(p, 1))
1585
1586 def p_iteration_statement_2(self, p):
1587 """ iteration_statement : DO pragmacomp_or_statement WHILE LPAREN expression RPAREN SEMI """
1588 p[0] = c_ast.DoWhile(p[5], p[2], self._token_coord(p, 1))
1589
1590 def p_iteration_statement_3(self, p):
1591 """ iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN pragmacomp_or_statement """
1592 p[0] = c_ast.For(p[3], p[5], p[7], p[9], self._token_coord(p, 1))
1593
1594 def p_iteration_statement_4(self, p):
1595 """ iteration_statement : FOR LPAREN declaration expression_opt SEMI expression_opt RPAREN pragmacomp_or_statement """
1596 p[0] = c_ast.For(c_ast.DeclList(p[3], self._token_coord(p, 1)),
1597 p[4], p[6], p[8], self._token_coord(p, 1))
1598
1599 def p_jump_statement_1(self, p):
1600 """ jump_statement : GOTO ID SEMI """
1601 p[0] = c_ast.Goto(p[2], self._token_coord(p, 1))
1602
1603 def p_jump_statement_2(self, p):
1604 """ jump_statement : BREAK SEMI """
1605 p[0] = c_ast.Break(self._token_coord(p, 1))
1606
1607 def p_jump_statement_3(self, p):
1608 """ jump_statement : CONTINUE SEMI """
1609 p[0] = c_ast.Continue(self._token_coord(p, 1))
1610
1611 def p_jump_statement_4(self, p):
1612 """ jump_statement : RETURN expression SEMI
1613 | RETURN SEMI
1614 """
1615 p[0] = c_ast.Return(p[2] if len(p) == 4 else None, self._token_coord(p, 1))
1616
1617 def p_expression_statement(self, p):
1618 """ expression_statement : expression_opt SEMI """
1619 if p[1] is None:
1620 p[0] = c_ast.EmptyStatement(self._token_coord(p, 2))
1621 else:
1622 p[0] = p[1]
1623
1624 def p_expression(self, p):
1625 """ expression : assignment_expression
1626 | expression COMMA assignment_expression
1627 """
1628 if len(p) == 2:
1629 p[0] = p[1]
1630 else:
1631 if not isinstance(p[1], c_ast.ExprList):
1632 p[1] = c_ast.ExprList([p[1]], p[1].coord)
1633
1634 p[1].exprs.append(p[3])
1635 p[0] = p[1]
1636
1637 def p_parenthesized_compound_expression(self, p):
1638 """ assignment_expression : LPAREN compound_statement RPAREN """
1639 p[0] = p[2]
1640
1641 def p_typedef_name(self, p):
1642 """ typedef_name : TYPEID """
1643 p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1))
1644
1645 def p_assignment_expression(self, p):
1646 """ assignment_expression : conditional_expression
1647 | unary_expression assignment_operator assignment_expression
1648 """
1649 if len(p) == 2:
1650 p[0] = p[1]
1651 else:
1652 p[0] = c_ast.Assignment(p[2], p[1], p[3], p[1].coord)
1653
1654 # K&R2 defines these as many separate rules, to encode
1655 # precedence and associativity. Why work hard ? I'll just use
1656 # the built in precedence/associativity specification feature
1657 # of PLY. (see precedence declaration above)
1658 #
1659 def p_assignment_operator(self, p):
1660 """ assignment_operator : EQUALS
1661 | XOREQUAL
1662 | TIMESEQUAL
1663 | DIVEQUAL
1664 | MODEQUAL
1665 | PLUSEQUAL
1666 | MINUSEQUAL
1667 | LSHIFTEQUAL
1668 | RSHIFTEQUAL
1669 | ANDEQUAL
1670 | OREQUAL
1671 """
1672 p[0] = p[1]
1673
1674 def p_constant_expression(self, p):
1675 """ constant_expression : conditional_expression """
1676 p[0] = p[1]
1677
1678 def p_conditional_expression(self, p):
1679 """ conditional_expression : binary_expression
1680 | binary_expression CONDOP expression COLON conditional_expression
1681 """
1682 if len(p) == 2:
1683 p[0] = p[1]
1684 else:
1685 p[0] = c_ast.TernaryOp(p[1], p[3], p[5], p[1].coord)
1686
1687 def p_binary_expression(self, p):
1688 """ binary_expression : cast_expression
1689 | binary_expression TIMES binary_expression
1690 | binary_expression DIVIDE binary_expression
1691 | binary_expression MOD binary_expression
1692 | binary_expression PLUS binary_expression
1693 | binary_expression MINUS binary_expression
1694 | binary_expression RSHIFT binary_expression
1695 | binary_expression LSHIFT binary_expression
1696 | binary_expression LT binary_expression
1697 | binary_expression LE binary_expression
1698 | binary_expression GE binary_expression
1699 | binary_expression GT binary_expression
1700 | binary_expression EQ binary_expression
1701 | binary_expression NE binary_expression
1702 | binary_expression AND binary_expression
1703 | binary_expression OR binary_expression
1704 | binary_expression XOR binary_expression
1705 | binary_expression LAND binary_expression
1706 | binary_expression LOR binary_expression
1707 """
1708 if len(p) == 2:
1709 p[0] = p[1]
1710 else:
1711 p[0] = c_ast.BinaryOp(p[2], p[1], p[3], p[1].coord)
1712
1713 def p_cast_expression_1(self, p):
1714 """ cast_expression : unary_expression """
1715 p[0] = p[1]
1716
1717 def p_cast_expression_2(self, p):
1718 """ cast_expression : LPAREN type_name RPAREN cast_expression """
1719 p[0] = c_ast.Cast(p[2], p[4], self._token_coord(p, 1))
1720
1721 def p_unary_expression_1(self, p):
1722 """ unary_expression : postfix_expression """
1723 p[0] = p[1]
1724
1725 def p_unary_expression_2(self, p):
1726 """ unary_expression : PLUSPLUS unary_expression
1727 | MINUSMINUS unary_expression
1728 | unary_operator cast_expression
1729 """
1730 p[0] = c_ast.UnaryOp(p[1], p[2], p[2].coord)
1731
1732 def p_unary_expression_3(self, p):
1733 """ unary_expression : SIZEOF unary_expression
1734 | SIZEOF LPAREN type_name RPAREN
1735 | _ALIGNOF LPAREN type_name RPAREN
1736 """
1737 p[0] = c_ast.UnaryOp(
1738 p[1],
1739 p[2] if len(p) == 3 else p[3],
1740 self._token_coord(p, 1))
1741
1742 def p_unary_operator(self, p):
1743 """ unary_operator : AND
1744 | TIMES
1745 | PLUS
1746 | MINUS
1747 | NOT
1748 | LNOT
1749 """
1750 p[0] = p[1]
1751
1752 def p_postfix_expression_1(self, p):
1753 """ postfix_expression : primary_expression """
1754 p[0] = p[1]
1755
1756 def p_postfix_expression_2(self, p):
1757 """ postfix_expression : postfix_expression LBRACKET expression RBRACKET """
1758 p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord)
1759
1760 def p_postfix_expression_3(self, p):
1761 """ postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN
1762 | postfix_expression LPAREN RPAREN
1763 """
1764 p[0] = c_ast.FuncCall(p[1], p[3] if len(p) == 5 else None, p[1].coord)
1765
1766 def p_postfix_expression_4(self, p):
1767 """ postfix_expression : postfix_expression PERIOD ID
1768 | postfix_expression PERIOD TYPEID
1769 | postfix_expression ARROW ID
1770 | postfix_expression ARROW TYPEID
1771 """
1772 field = c_ast.ID(p[3], self._token_coord(p, 3))
1773 p[0] = c_ast.StructRef(p[1], p[2], field, p[1].coord)
1774
1775 def p_postfix_expression_5(self, p):
1776 """ postfix_expression : postfix_expression PLUSPLUS
1777 | postfix_expression MINUSMINUS
1778 """
1779 p[0] = c_ast.UnaryOp('p' + p[2], p[1], p[1].coord)
1780
1781 def p_postfix_expression_6(self, p):
1782 """ postfix_expression : LPAREN type_name RPAREN brace_open initializer_list brace_close
1783 | LPAREN type_name RPAREN brace_open initializer_list COMMA brace_close
1784 """
1785 p[0] = c_ast.CompoundLiteral(p[2], p[5])
1786
1787 def p_primary_expression_1(self, p):
1788 """ primary_expression : identifier """
1789 p[0] = p[1]
1790
1791 def p_primary_expression_2(self, p):
1792 """ primary_expression : constant """
1793 p[0] = p[1]
1794
1795 def p_primary_expression_3(self, p):
1796 """ primary_expression : unified_string_literal
1797 | unified_wstring_literal
1798 """
1799 p[0] = p[1]
1800
1801 def p_primary_expression_4(self, p):
1802 """ primary_expression : LPAREN expression RPAREN """
1803 p[0] = p[2]
1804
1805 def p_primary_expression_5(self, p):
1806 """ primary_expression : OFFSETOF LPAREN type_name COMMA offsetof_member_designator RPAREN
1807 """
1808 coord = self._token_coord(p, 1)
1809 p[0] = c_ast.FuncCall(c_ast.ID(p[1], coord),
1810 c_ast.ExprList([p[3], p[5]], coord),
1811 coord)
1812
1813 def p_offsetof_member_designator(self, p):
1814 """ offsetof_member_designator : identifier
1815 | offsetof_member_designator PERIOD identifier
1816 | offsetof_member_designator LBRACKET expression RBRACKET
1817 """
1818 if len(p) == 2:
1819 p[0] = p[1]
1820 elif len(p) == 4:
1821 p[0] = c_ast.StructRef(p[1], p[2], p[3], p[1].coord)
1822 elif len(p) == 5:
1823 p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord)
1824 else:
1825 raise NotImplementedError("Unexpected parsing state. len(p): %u" % len(p))
1826
1827 def p_argument_expression_list(self, p):
1828 """ argument_expression_list : assignment_expression
1829 | argument_expression_list COMMA assignment_expression
1830 """
1831 if len(p) == 2: # single expr
1832 p[0] = c_ast.ExprList([p[1]], p[1].coord)
1833 else:
1834 p[1].exprs.append(p[3])
1835 p[0] = p[1]
1836
1837 def p_identifier(self, p):
1838 """ identifier : ID """
1839 p[0] = c_ast.ID(p[1], self._token_coord(p, 1))
1840
1841 def p_constant_1(self, p):
1842 """ constant : INT_CONST_DEC
1843 | INT_CONST_OCT
1844 | INT_CONST_HEX
1845 | INT_CONST_BIN
1846 | INT_CONST_CHAR
1847 """
1848 uCount = 0
1849 lCount = 0
1850 for x in p[1][-3:]:
1851 if x in ('l', 'L'):
1852 lCount += 1
1853 elif x in ('u', 'U'):
1854 uCount += 1
1855 t = ''
1856 if uCount > 1:
1857 raise ValueError('Constant cannot have more than one u/U suffix.')
1858 elif lCount > 2:
1859 raise ValueError('Constant cannot have more than two l/L suffix.')
1860 prefix = 'unsigned ' * uCount + 'long ' * lCount
1861 p[0] = c_ast.Constant(
1862 prefix + 'int', p[1], self._token_coord(p, 1))
1863
1864 def p_constant_2(self, p):
1865 """ constant : FLOAT_CONST
1866 | HEX_FLOAT_CONST
1867 """
1868 if 'x' in p[1].lower():
1869 t = 'float'
1870 else:
1871 if p[1][-1] in ('f', 'F'):
1872 t = 'float'
1873 elif p[1][-1] in ('l', 'L'):
1874 t = 'long double'
1875 else:
1876 t = 'double'
1877
1878 p[0] = c_ast.Constant(
1879 t, p[1], self._token_coord(p, 1))
1880
1881 def p_constant_3(self, p):
1882 """ constant : CHAR_CONST
1883 | WCHAR_CONST
1884 | U8CHAR_CONST
1885 | U16CHAR_CONST
1886 | U32CHAR_CONST
1887 """
1888 p[0] = c_ast.Constant(
1889 'char', p[1], self._token_coord(p, 1))
1890
1891 # The "unified" string and wstring literal rules are for supporting
1892 # concatenation of adjacent string literals.
1893 # I.e. "hello " "world" is seen by the C compiler as a single string literal
1894 # with the value "hello world"
1895 #
1896 def p_unified_string_literal(self, p):
1897 """ unified_string_literal : STRING_LITERAL
1898 | unified_string_literal STRING_LITERAL
1899 """
1900 if len(p) == 2: # single literal
1901 p[0] = c_ast.Constant(
1902 'string', p[1], self._token_coord(p, 1))
1903 else:
1904 p[1].value = p[1].value[:-1] + p[2][1:]
1905 p[0] = p[1]
1906
1907 def p_unified_wstring_literal(self, p):
1908 """ unified_wstring_literal : WSTRING_LITERAL
1909 | U8STRING_LITERAL
1910 | U16STRING_LITERAL
1911 | U32STRING_LITERAL
1912 | unified_wstring_literal WSTRING_LITERAL
1913 | unified_wstring_literal U8STRING_LITERAL
1914 | unified_wstring_literal U16STRING_LITERAL
1915 | unified_wstring_literal U32STRING_LITERAL
1916 """
1917 if len(p) == 2: # single literal
1918 p[0] = c_ast.Constant(
1919 'string', p[1], self._token_coord(p, 1))
1920 else:
1921 p[1].value = p[1].value.rstrip()[:-1] + p[2][2:]
1922 p[0] = p[1]
1923
1924 def p_brace_open(self, p):
1925 """ brace_open : LBRACE
1926 """
1927 p[0] = p[1]
1928 p.set_lineno(0, p.lineno(1))
1929
1930 def p_brace_close(self, p):
1931 """ brace_close : RBRACE
1932 """
1933 p[0] = p[1]
1934 p.set_lineno(0, p.lineno(1))
1935
1936 def p_empty(self, p):
1937 'empty : '
1938 p[0] = None
1939
1940 def p_error(self, p):
1941 # If error recovery is added here in the future, make sure
1942 # _get_yacc_lookahead_token still works!
1943 #
1944 if p:
1945 self._parse_error(
1946 'before: %s' % p.value,
1947 self._coord(lineno=p.lineno,
1948 column=self.clex.find_tok_column(p)))
1949 else:
1950 self._parse_error('At end of input', self.clex.filename)