Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/lark/exceptions.py: 79%
150 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:30 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:30 +0000
1from .utils import logger, NO_VALUE
2from typing import Mapping, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, Collection, TYPE_CHECKING
4if TYPE_CHECKING:
5 from .lexer import Token
6 from .parsers.lalr_interactive_parser import InteractiveParser
7 from .tree import Tree
9###{standalone
11class LarkError(Exception):
12 pass
15class ConfigurationError(LarkError, ValueError):
16 pass
19def assert_config(value, options: Collection, msg='Got %r, expected one of %s'):
20 if value not in options:
21 raise ConfigurationError(msg % (value, options))
24class GrammarError(LarkError):
25 pass
28class ParseError(LarkError):
29 pass
32class LexError(LarkError):
33 pass
35T = TypeVar('T')
37class UnexpectedInput(LarkError):
38 """UnexpectedInput Error.
40 Used as a base class for the following exceptions:
42 - ``UnexpectedCharacters``: The lexer encountered an unexpected string
43 - ``UnexpectedToken``: The parser received an unexpected token
44 - ``UnexpectedEOF``: The parser expected a token, but the input ended
46 After catching one of these exceptions, you may call the following helper methods to create a nicer error message.
47 """
48 line: int
49 column: int
50 pos_in_stream = None
51 state: Any
52 _terminals_by_name = None
54 def get_context(self, text: str, span: int=40) -> str:
55 """Returns a pretty string pinpointing the error in the text,
56 with span amount of context characters around it.
58 Note:
59 The parser doesn't hold a copy of the text it has to parse,
60 so you have to provide it again
61 """
62 assert self.pos_in_stream is not None, self
63 pos = self.pos_in_stream
64 start = max(pos - span, 0)
65 end = pos + span
66 if not isinstance(text, bytes):
67 before = text[start:pos].rsplit('\n', 1)[-1]
68 after = text[pos:end].split('\n', 1)[0]
69 return before + after + '\n' + ' ' * len(before.expandtabs()) + '^\n'
70 else:
71 before = text[start:pos].rsplit(b'\n', 1)[-1]
72 after = text[pos:end].split(b'\n', 1)[0]
73 return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace")
75 def match_examples(self, parse_fn: 'Callable[[str], Tree]',
76 examples: Union[Mapping[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]],
77 token_type_match_fallback: bool=False,
78 use_accepts: bool=True
79 ) -> Optional[T]:
80 """Allows you to detect what's wrong in the input text by matching
81 against example errors.
83 Given a parser instance and a dictionary mapping some label with
84 some malformed syntax examples, it'll return the label for the
85 example that bests matches the current error. The function will
86 iterate the dictionary until it finds a matching error, and
87 return the corresponding value.
89 For an example usage, see `examples/error_reporting_lalr.py`
91 Parameters:
92 parse_fn: parse function (usually ``lark_instance.parse``)
93 examples: dictionary of ``{'example_string': value}``.
94 use_accepts: Recommended to keep this as ``use_accepts=True``.
95 """
96 assert self.state is not None, "Not supported for this exception"
98 if isinstance(examples, Mapping):
99 examples = examples.items()
101 candidate = (None, False)
102 for i, (label, example) in enumerate(examples):
103 assert not isinstance(example, str), "Expecting a list"
105 for j, malformed in enumerate(example):
106 try:
107 parse_fn(malformed)
108 except UnexpectedInput as ut:
109 if ut.state == self.state:
110 if (
111 use_accepts
112 and isinstance(self, UnexpectedToken)
113 and isinstance(ut, UnexpectedToken)
114 and ut.accepts != self.accepts
115 ):
116 logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
117 (self.state, self.accepts, ut.accepts, i, j))
118 continue
119 if (
120 isinstance(self, (UnexpectedToken, UnexpectedEOF))
121 and isinstance(ut, (UnexpectedToken, UnexpectedEOF))
122 ):
123 if ut.token == self.token: # Try exact match first
124 logger.debug("Exact Match at example [%s][%s]" % (i, j))
125 return label
127 if token_type_match_fallback:
128 # Fallback to token types match
129 if (ut.token.type == self.token.type) and not candidate[-1]:
130 logger.debug("Token Type Fallback at example [%s][%s]" % (i, j))
131 candidate = label, True
133 if candidate[0] is None:
134 logger.debug("Same State match at example [%s][%s]" % (i, j))
135 candidate = label, False
137 return candidate[0]
139 def _format_expected(self, expected):
140 if self._terminals_by_name:
141 d = self._terminals_by_name
142 expected = [d[t_name].user_repr() if t_name in d else t_name for t_name in expected]
143 return "Expected one of: \n\t* %s\n" % '\n\t* '.join(expected)
146class UnexpectedEOF(ParseError, UnexpectedInput):
147 """An exception that is raised by the parser, when the input ends while it still expects a token.
148 """
149 expected: 'List[Token]'
151 def __init__(self, expected, state=None, terminals_by_name=None):
152 super(UnexpectedEOF, self).__init__()
154 self.expected = expected
155 self.state = state
156 from .lexer import Token
157 self.token = Token("<EOF>", "") # , line=-1, column=-1, pos_in_stream=-1)
158 self.pos_in_stream = -1
159 self.line = -1
160 self.column = -1
161 self._terminals_by_name = terminals_by_name
164 def __str__(self):
165 message = "Unexpected end-of-input. "
166 message += self._format_expected(self.expected)
167 return message
170class UnexpectedCharacters(LexError, UnexpectedInput):
171 """An exception that is raised by the lexer, when it cannot match the next
172 string of characters to any of its terminals.
173 """
175 allowed: Set[str]
176 considered_tokens: Set[Any]
178 def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None,
179 terminals_by_name=None, considered_rules=None):
180 super(UnexpectedCharacters, self).__init__()
182 # TODO considered_tokens and allowed can be figured out using state
183 self.line = line
184 self.column = column
185 self.pos_in_stream = lex_pos
186 self.state = state
187 self._terminals_by_name = terminals_by_name
189 self.allowed = allowed
190 self.considered_tokens = considered_tokens
191 self.considered_rules = considered_rules
192 self.token_history = token_history
194 if isinstance(seq, bytes):
195 self.char = seq[lex_pos:lex_pos + 1].decode("ascii", "backslashreplace")
196 else:
197 self.char = seq[lex_pos]
198 self._context = self.get_context(seq)
201 def __str__(self):
202 message = "No terminal matches '%s' in the current parser context, at line %d col %d" % (self.char, self.line, self.column)
203 message += '\n\n' + self._context
204 if self.allowed:
205 message += self._format_expected(self.allowed)
206 if self.token_history:
207 message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in self.token_history)
208 return message
211class UnexpectedToken(ParseError, UnexpectedInput):
212 """An exception that is raised by the parser, when the token it received
213 doesn't match any valid step forward.
215 Parameters:
216 token: The mismatched token
217 expected: The set of expected tokens
218 considered_rules: Which rules were considered, to deduce the expected tokens
219 state: A value representing the parser state. Do not rely on its value or type.
220 interactive_parser: An instance of ``InteractiveParser``, that is initialized to the point of failure,
221 and can be used for debugging and error handling.
223 Note: These parameters are available as attributes of the instance.
224 """
226 expected: Set[str]
227 considered_rules: Set[str]
228 interactive_parser: 'InteractiveParser'
230 def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None):
231 super(UnexpectedToken, self).__init__()
233 # TODO considered_rules and expected can be figured out using state
234 self.line = getattr(token, 'line', '?')
235 self.column = getattr(token, 'column', '?')
236 self.pos_in_stream = getattr(token, 'start_pos', None)
237 self.state = state
239 self.token = token
240 self.expected = expected # XXX deprecate? `accepts` is better
241 self._accepts = NO_VALUE
242 self.considered_rules = considered_rules
243 self.interactive_parser = interactive_parser
244 self._terminals_by_name = terminals_by_name
245 self.token_history = token_history
248 @property
249 def accepts(self) -> Set[str]:
250 if self._accepts is NO_VALUE:
251 self._accepts = self.interactive_parser and self.interactive_parser.accepts()
252 return self._accepts
254 def __str__(self):
255 message = ("Unexpected token %r at line %s, column %s.\n%s"
256 % (self.token, self.line, self.column, self._format_expected(self.accepts or self.expected)))
257 if self.token_history:
258 message += "Previous tokens: %r\n" % self.token_history
260 return message
264class VisitError(LarkError):
265 """VisitError is raised when visitors are interrupted by an exception
267 It provides the following attributes for inspection:
269 Parameters:
270 rule: the name of the visit rule that failed
271 obj: the tree-node or token that was being processed
272 orig_exc: the exception that cause it to fail
274 Note: These parameters are available as attributes
275 """
277 obj: 'Union[Tree, Token]'
278 orig_exc: Exception
280 def __init__(self, rule, obj, orig_exc):
281 message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc)
282 super(VisitError, self).__init__(message)
284 self.rule = rule
285 self.obj = obj
286 self.orig_exc = orig_exc
289class MissingVariableError(LarkError):
290 pass
292###}