Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/libcst/_parser/parso/pgen2/grammar_parser.py: 21%
100 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:43 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:43 +0000
1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3#
4# Modifications:
5# Copyright David Halter and Contributors
6# Modifications are dual-licensed: MIT and PSF.
7# 99% of the code is different from pgen2, now.
8#
9# A fork of `parso.pgen2.grammar_parser`.
10# https://github.com/davidhalter/parso/blob/master/parso/pgen2/grammar_parser.py
11#
12# The following changes were made:
13# - Type stubs were directly applied.
14# pyre-unsafe
16from typing import Generator, List, Optional, Tuple
18from libcst._parser.parso.python.token import PythonTokenTypes
19from libcst._parser.parso.python.tokenize import tokenize
20from libcst._parser.parso.utils import parse_version_string
23class NFAArc:
24 def __init__(self, next_: "NFAState", nonterminal_or_string: Optional[str]) -> None:
25 self.next: NFAState = next_
26 self.nonterminal_or_string: Optional[str] = nonterminal_or_string
28 def __repr__(self) -> str:
29 return "<%s: %s>" % (self.__class__.__name__, self.nonterminal_or_string)
32class NFAState:
33 def __init__(self, from_rule: str) -> None:
34 self.from_rule = from_rule
35 self.arcs: List[NFAArc] = []
37 def add_arc(
38 self, next_: "NFAState", nonterminal_or_string: Optional[str] = None
39 ) -> None:
40 self.arcs.append(NFAArc(next_, nonterminal_or_string))
42 def __repr__(self) -> str:
43 return "<%s: from %s>" % (self.__class__.__name__, self.from_rule)
46class GrammarParser:
47 """
48 The parser for Python grammar files.
49 """
51 def __init__(self, bnf_grammar: str) -> None:
52 self._bnf_grammar: str = bnf_grammar
53 self.generator = tokenize(bnf_grammar, version_info=parse_version_string("3.6"))
54 self._gettoken() # Initialize lookahead
56 def parse(self) -> Generator[Tuple[NFAState, NFAState], None, None]:
57 # grammar: (NEWLINE | rule)* ENDMARKER
58 while self.type != PythonTokenTypes.ENDMARKER:
59 while self.type == PythonTokenTypes.NEWLINE:
60 self._gettoken()
62 # rule: NAME ':' rhs NEWLINE
63 # pyre-ignore Pyre is unhappy with the fact that we haven't put
64 # _current_rule_name in the constructor.
65 self._current_rule_name = self._expect(PythonTokenTypes.NAME)
66 self._expect(PythonTokenTypes.OP, ":")
68 a, z = self._parse_rhs()
69 self._expect(PythonTokenTypes.NEWLINE)
71 yield a, z
73 def _parse_rhs(self):
74 # rhs: items ('|' items)*
75 a, z = self._parse_items()
76 if self.value != "|":
77 return a, z
78 else:
79 aa = NFAState(self._current_rule_name)
80 zz = NFAState(self._current_rule_name)
81 while True:
82 # Add the possibility to go into the state of a and come back
83 # to finish.
84 aa.add_arc(a)
85 z.add_arc(zz)
86 if self.value != "|":
87 break
89 self._gettoken()
90 a, z = self._parse_items()
91 return aa, zz
93 def _parse_items(self):
94 # items: item+
95 a, b = self._parse_item()
96 while self.type in (
97 PythonTokenTypes.NAME,
98 PythonTokenTypes.STRING,
99 ) or self.value in ("(", "["):
100 c, d = self._parse_item()
101 # Need to end on the next item.
102 b.add_arc(c)
103 b = d
104 return a, b
106 def _parse_item(self):
107 # item: '[' rhs ']' | atom ['+' | '*']
108 if self.value == "[":
109 self._gettoken()
110 a, z = self._parse_rhs()
111 self._expect(PythonTokenTypes.OP, "]")
112 # Make it also possible that there is no token and change the
113 # state.
114 a.add_arc(z)
115 return a, z
116 else:
117 a, z = self._parse_atom()
118 value = self.value
119 if value not in ("+", "*"):
120 return a, z
121 self._gettoken()
122 # Make it clear that we can go back to the old state and repeat.
123 z.add_arc(a)
124 if value == "+":
125 return a, z
126 else:
127 # The end state is the same as the beginning, nothing must
128 # change.
129 return a, a
131 def _parse_atom(self):
132 # atom: '(' rhs ')' | NAME | STRING
133 if self.value == "(":
134 self._gettoken()
135 a, z = self._parse_rhs()
136 self._expect(PythonTokenTypes.OP, ")")
137 return a, z
138 elif self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING):
139 a = NFAState(self._current_rule_name)
140 z = NFAState(self._current_rule_name)
141 # Make it clear that the state transition requires that value.
142 a.add_arc(z, self.value)
143 self._gettoken()
144 return a, z
145 else:
146 self._raise_error(
147 "expected (...) or NAME or STRING, got %s/%s", self.type, self.value
148 )
150 def _expect(self, type_, value=None):
151 if self.type != type_:
152 self._raise_error("expected %s, got %s [%s]", type_, self.type, self.value)
153 if value is not None and self.value != value:
154 self._raise_error("expected %s, got %s", value, self.value)
155 value = self.value
156 self._gettoken()
157 return value
159 def _gettoken(self) -> None:
160 tup = next(self.generator)
161 self.type, self.value, self.begin, prefix = tup
163 def _raise_error(self, msg: str, *args: object) -> None:
164 if args:
165 try:
166 msg = msg % args
167 except Exception:
168 msg = " ".join([msg] + list(map(str, args)))
169 line = self._bnf_grammar.splitlines()[self.begin[0] - 1]
170 raise SyntaxError(msg, ("<grammar>", self.begin[0], self.begin[1], line))