Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/libcst/_parser/parso/pgen2/grammar_parser.py: 21%

100 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-25 06:43 +0000

1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. 

2# Licensed to PSF under a Contributor Agreement. 

3# 

4# Modifications: 

5# Copyright David Halter and Contributors 

6# Modifications are dual-licensed: MIT and PSF. 

7# 99% of the code is different from pgen2, now. 

8# 

9# A fork of `parso.pgen2.grammar_parser`. 

10# https://github.com/davidhalter/parso/blob/master/parso/pgen2/grammar_parser.py 

11# 

12# The following changes were made: 

13# - Type stubs were directly applied. 

14# pyre-unsafe 

15 

16from typing import Generator, List, Optional, Tuple 

17 

18from libcst._parser.parso.python.token import PythonTokenTypes 

19from libcst._parser.parso.python.tokenize import tokenize 

20from libcst._parser.parso.utils import parse_version_string 

21 

22 

23class NFAArc: 

24 def __init__(self, next_: "NFAState", nonterminal_or_string: Optional[str]) -> None: 

25 self.next: NFAState = next_ 

26 self.nonterminal_or_string: Optional[str] = nonterminal_or_string 

27 

28 def __repr__(self) -> str: 

29 return "<%s: %s>" % (self.__class__.__name__, self.nonterminal_or_string) 

30 

31 

32class NFAState: 

33 def __init__(self, from_rule: str) -> None: 

34 self.from_rule = from_rule 

35 self.arcs: List[NFAArc] = [] 

36 

37 def add_arc( 

38 self, next_: "NFAState", nonterminal_or_string: Optional[str] = None 

39 ) -> None: 

40 self.arcs.append(NFAArc(next_, nonterminal_or_string)) 

41 

42 def __repr__(self) -> str: 

43 return "<%s: from %s>" % (self.__class__.__name__, self.from_rule) 

44 

45 

46class GrammarParser: 

47 """ 

48 The parser for Python grammar files. 

49 """ 

50 

51 def __init__(self, bnf_grammar: str) -> None: 

52 self._bnf_grammar: str = bnf_grammar 

53 self.generator = tokenize(bnf_grammar, version_info=parse_version_string("3.6")) 

54 self._gettoken() # Initialize lookahead 

55 

56 def parse(self) -> Generator[Tuple[NFAState, NFAState], None, None]: 

57 # grammar: (NEWLINE | rule)* ENDMARKER 

58 while self.type != PythonTokenTypes.ENDMARKER: 

59 while self.type == PythonTokenTypes.NEWLINE: 

60 self._gettoken() 

61 

62 # rule: NAME ':' rhs NEWLINE 

63 # pyre-ignore Pyre is unhappy with the fact that we haven't put 

64 # _current_rule_name in the constructor. 

65 self._current_rule_name = self._expect(PythonTokenTypes.NAME) 

66 self._expect(PythonTokenTypes.OP, ":") 

67 

68 a, z = self._parse_rhs() 

69 self._expect(PythonTokenTypes.NEWLINE) 

70 

71 yield a, z 

72 

73 def _parse_rhs(self): 

74 # rhs: items ('|' items)* 

75 a, z = self._parse_items() 

76 if self.value != "|": 

77 return a, z 

78 else: 

79 aa = NFAState(self._current_rule_name) 

80 zz = NFAState(self._current_rule_name) 

81 while True: 

82 # Add the possibility to go into the state of a and come back 

83 # to finish. 

84 aa.add_arc(a) 

85 z.add_arc(zz) 

86 if self.value != "|": 

87 break 

88 

89 self._gettoken() 

90 a, z = self._parse_items() 

91 return aa, zz 

92 

93 def _parse_items(self): 

94 # items: item+ 

95 a, b = self._parse_item() 

96 while self.type in ( 

97 PythonTokenTypes.NAME, 

98 PythonTokenTypes.STRING, 

99 ) or self.value in ("(", "["): 

100 c, d = self._parse_item() 

101 # Need to end on the next item. 

102 b.add_arc(c) 

103 b = d 

104 return a, b 

105 

106 def _parse_item(self): 

107 # item: '[' rhs ']' | atom ['+' | '*'] 

108 if self.value == "[": 

109 self._gettoken() 

110 a, z = self._parse_rhs() 

111 self._expect(PythonTokenTypes.OP, "]") 

112 # Make it also possible that there is no token and change the 

113 # state. 

114 a.add_arc(z) 

115 return a, z 

116 else: 

117 a, z = self._parse_atom() 

118 value = self.value 

119 if value not in ("+", "*"): 

120 return a, z 

121 self._gettoken() 

122 # Make it clear that we can go back to the old state and repeat. 

123 z.add_arc(a) 

124 if value == "+": 

125 return a, z 

126 else: 

127 # The end state is the same as the beginning, nothing must 

128 # change. 

129 return a, a 

130 

131 def _parse_atom(self): 

132 # atom: '(' rhs ')' | NAME | STRING 

133 if self.value == "(": 

134 self._gettoken() 

135 a, z = self._parse_rhs() 

136 self._expect(PythonTokenTypes.OP, ")") 

137 return a, z 

138 elif self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING): 

139 a = NFAState(self._current_rule_name) 

140 z = NFAState(self._current_rule_name) 

141 # Make it clear that the state transition requires that value. 

142 a.add_arc(z, self.value) 

143 self._gettoken() 

144 return a, z 

145 else: 

146 self._raise_error( 

147 "expected (...) or NAME or STRING, got %s/%s", self.type, self.value 

148 ) 

149 

150 def _expect(self, type_, value=None): 

151 if self.type != type_: 

152 self._raise_error("expected %s, got %s [%s]", type_, self.type, self.value) 

153 if value is not None and self.value != value: 

154 self._raise_error("expected %s, got %s", value, self.value) 

155 value = self.value 

156 self._gettoken() 

157 return value 

158 

159 def _gettoken(self) -> None: 

160 tup = next(self.generator) 

161 self.type, self.value, self.begin, prefix = tup 

162 

163 def _raise_error(self, msg: str, *args: object) -> None: 

164 if args: 

165 try: 

166 msg = msg % args 

167 except Exception: 

168 msg = " ".join([msg] + list(map(str, args))) 

169 line = self._bnf_grammar.splitlines()[self.begin[0] - 1] 

170 raise SyntaxError(msg, ("<grammar>", self.begin[0], self.begin[1], line))