Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/parso/pgen2/grammar_parser.py: 21%

102 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-20 06:09 +0000

1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. 

2# Licensed to PSF under a Contributor Agreement. 

3 

4# Modifications: 

5# Copyright David Halter and Contributors 

6# Modifications are dual-licensed: MIT and PSF. 

7from typing import Optional, Iterator, Tuple, List 

8 

9from parso.python.tokenize import tokenize 

10from parso.utils import parse_version_string 

11from parso.python.token import PythonTokenTypes 

12 

13 

14class NFAArc: 

15 def __init__(self, next_: 'NFAState', nonterminal_or_string: Optional[str]): 

16 self.next: NFAState = next_ 

17 self.nonterminal_or_string: Optional[str] = nonterminal_or_string 

18 

19 def __repr__(self): 

20 return '<%s: %s>' % (self.__class__.__name__, self.nonterminal_or_string) 

21 

22 

23class NFAState: 

24 def __init__(self, from_rule: str): 

25 self.from_rule: str = from_rule 

26 self.arcs: List[NFAArc] = [] 

27 

28 def add_arc(self, next_, nonterminal_or_string=None): 

29 assert nonterminal_or_string is None or isinstance(nonterminal_or_string, str) 

30 assert isinstance(next_, NFAState) 

31 self.arcs.append(NFAArc(next_, nonterminal_or_string)) 

32 

33 def __repr__(self): 

34 return '<%s: from %s>' % (self.__class__.__name__, self.from_rule) 

35 

36 

37class GrammarParser: 

38 """ 

39 The parser for Python grammar files. 

40 """ 

41 def __init__(self, bnf_grammar: str): 

42 self._bnf_grammar = bnf_grammar 

43 self.generator = tokenize( 

44 bnf_grammar, 

45 version_info=parse_version_string('3.9') 

46 ) 

47 self._gettoken() # Initialize lookahead 

48 

49 def parse(self) -> Iterator[Tuple[NFAState, NFAState]]: 

50 # grammar: (NEWLINE | rule)* ENDMARKER 

51 while self.type != PythonTokenTypes.ENDMARKER: 

52 while self.type == PythonTokenTypes.NEWLINE: 

53 self._gettoken() 

54 

55 # rule: NAME ':' rhs NEWLINE 

56 self._current_rule_name = self._expect(PythonTokenTypes.NAME) 

57 self._expect(PythonTokenTypes.OP, ':') 

58 

59 a, z = self._parse_rhs() 

60 self._expect(PythonTokenTypes.NEWLINE) 

61 

62 yield a, z 

63 

64 def _parse_rhs(self): 

65 # rhs: items ('|' items)* 

66 a, z = self._parse_items() 

67 if self.value != "|": 

68 return a, z 

69 else: 

70 aa = NFAState(self._current_rule_name) 

71 zz = NFAState(self._current_rule_name) 

72 while True: 

73 # Add the possibility to go into the state of a and come back 

74 # to finish. 

75 aa.add_arc(a) 

76 z.add_arc(zz) 

77 if self.value != "|": 

78 break 

79 

80 self._gettoken() 

81 a, z = self._parse_items() 

82 return aa, zz 

83 

84 def _parse_items(self): 

85 # items: item+ 

86 a, b = self._parse_item() 

87 while self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING) \ 

88 or self.value in ('(', '['): 

89 c, d = self._parse_item() 

90 # Need to end on the next item. 

91 b.add_arc(c) 

92 b = d 

93 return a, b 

94 

95 def _parse_item(self): 

96 # item: '[' rhs ']' | atom ['+' | '*'] 

97 if self.value == "[": 

98 self._gettoken() 

99 a, z = self._parse_rhs() 

100 self._expect(PythonTokenTypes.OP, ']') 

101 # Make it also possible that there is no token and change the 

102 # state. 

103 a.add_arc(z) 

104 return a, z 

105 else: 

106 a, z = self._parse_atom() 

107 value = self.value 

108 if value not in ("+", "*"): 

109 return a, z 

110 self._gettoken() 

111 # Make it clear that we can go back to the old state and repeat. 

112 z.add_arc(a) 

113 if value == "+": 

114 return a, z 

115 else: 

116 # The end state is the same as the beginning, nothing must 

117 # change. 

118 return a, a 

119 

120 def _parse_atom(self): 

121 # atom: '(' rhs ')' | NAME | STRING 

122 if self.value == "(": 

123 self._gettoken() 

124 a, z = self._parse_rhs() 

125 self._expect(PythonTokenTypes.OP, ')') 

126 return a, z 

127 elif self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING): 

128 a = NFAState(self._current_rule_name) 

129 z = NFAState(self._current_rule_name) 

130 # Make it clear that the state transition requires that value. 

131 a.add_arc(z, self.value) 

132 self._gettoken() 

133 return a, z 

134 else: 

135 self._raise_error("expected (...) or NAME or STRING, got %s/%s", 

136 self.type, self.value) 

137 

138 def _expect(self, type_, value=None): 

139 if self.type != type_: 

140 self._raise_error("expected %s, got %s [%s]", 

141 type_, self.type, self.value) 

142 if value is not None and self.value != value: 

143 self._raise_error("expected %s, got %s", value, self.value) 

144 value = self.value 

145 self._gettoken() 

146 return value 

147 

148 def _gettoken(self): 

149 tup = next(self.generator) 

150 self.type, self.value, self.begin, prefix = tup 

151 

152 def _raise_error(self, msg, *args): 

153 if args: 

154 try: 

155 msg = msg % args 

156 except: 

157 msg = " ".join([msg] + list(map(str, args))) 

158 line = self._bnf_grammar.splitlines()[self.begin[0] - 1] 

159 raise SyntaxError(msg, ('<grammar>', self.begin[0], 

160 self.begin[1], line))