1from __future__ import annotations
2
3import contextlib
4import re
5from dataclasses import dataclass
6from typing import Generator, Mapping, NoReturn
7
8from .specifiers import Specifier
9
10
11@dataclass
12class Token:
13 name: str
14 text: str
15 position: int
16
17
18class ParserSyntaxError(Exception):
19 """The provided source text could not be parsed correctly."""
20
21 def __init__(
22 self,
23 message: str,
24 *,
25 source: str,
26 span: tuple[int, int],
27 ) -> None:
28 self.span = span
29 self.message = message
30 self.source = source
31
32 super().__init__()
33
34 def __str__(self) -> str:
35 marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^"
36 return f"{self.message}\n {self.source}\n {marker}"
37
38
39DEFAULT_RULES: dict[str, re.Pattern[str]] = {
40 "LEFT_PARENTHESIS": re.compile(r"\("),
41 "RIGHT_PARENTHESIS": re.compile(r"\)"),
42 "LEFT_BRACKET": re.compile(r"\["),
43 "RIGHT_BRACKET": re.compile(r"\]"),
44 "SEMICOLON": re.compile(r";"),
45 "COMMA": re.compile(r","),
46 "QUOTED_STRING": re.compile(
47 r"""
48 (
49 ('[^']*')
50 |
51 ("[^"]*")
52 )
53 """,
54 re.VERBOSE,
55 ),
56 "OP": re.compile(r"(===|==|~=|!=|<=|>=|<|>)"),
57 "BOOLOP": re.compile(r"\b(or|and)\b"),
58 "IN": re.compile(r"\bin\b"),
59 "NOT": re.compile(r"\bnot\b"),
60 "VARIABLE": re.compile(
61 r"""
62 \b(
63 python_version
64 |python_full_version
65 |os[._]name
66 |sys[._]platform
67 |platform_(release|system)
68 |platform[._](version|machine|python_implementation)
69 |python_implementation
70 |implementation_(name|version)
71 |extras?
72 |dependency_groups
73 )\b
74 """,
75 re.VERBOSE,
76 ),
77 "SPECIFIER": re.compile(
78 Specifier._operator_regex_str + Specifier._version_regex_str,
79 re.VERBOSE | re.IGNORECASE,
80 ),
81 "AT": re.compile(r"\@"),
82 "URL": re.compile(r"[^ \t]+"),
83 "IDENTIFIER": re.compile(r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b"),
84 "VERSION_PREFIX_TRAIL": re.compile(r"\.\*"),
85 "VERSION_LOCAL_LABEL_TRAIL": re.compile(r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*"),
86 "WS": re.compile(r"[ \t]+"),
87 "END": re.compile(r"$"),
88}
89
90
91class Tokenizer:
92 """Context-sensitive token parsing.
93
94 Provides methods to examine the input stream to check whether the next token
95 matches.
96 """
97
98 def __init__(
99 self,
100 source: str,
101 *,
102 rules: Mapping[str, re.Pattern[str]],
103 ) -> None:
104 self.source = source
105 self.rules = rules
106 self.next_token: Token | None = None
107 self.position = 0
108
109 def consume(self, name: str) -> None:
110 """Move beyond provided token name, if at current position."""
111 if self.check(name):
112 self.read()
113
114 def check(self, name: str, *, peek: bool = False) -> bool:
115 """Check whether the next token has the provided name.
116
117 By default, if the check succeeds, the token *must* be read before
118 another check. If `peek` is set to `True`, the token is not loaded and
119 would need to be checked again.
120 """
121 assert self.next_token is None, (
122 f"Cannot check for {name!r}, already have {self.next_token!r}"
123 )
124 assert name in self.rules, f"Unknown token name: {name!r}"
125
126 expression = self.rules[name]
127
128 match = expression.match(self.source, self.position)
129 if match is None:
130 return False
131 if not peek:
132 self.next_token = Token(name, match[0], self.position)
133 return True
134
135 def expect(self, name: str, *, expected: str) -> Token:
136 """Expect a certain token name next, failing with a syntax error otherwise.
137
138 The token is *not* read.
139 """
140 if not self.check(name):
141 raise self.raise_syntax_error(f"Expected {expected}")
142 return self.read()
143
144 def read(self) -> Token:
145 """Consume the next token and return it."""
146 token = self.next_token
147 assert token is not None
148
149 self.position += len(token.text)
150 self.next_token = None
151
152 return token
153
154 def raise_syntax_error(
155 self,
156 message: str,
157 *,
158 span_start: int | None = None,
159 span_end: int | None = None,
160 ) -> NoReturn:
161 """Raise ParserSyntaxError at the given position."""
162 span = (
163 self.position if span_start is None else span_start,
164 self.position if span_end is None else span_end,
165 )
166 raise ParserSyntaxError(
167 message,
168 source=self.source,
169 span=span,
170 )
171
172 @contextlib.contextmanager
173 def enclosing_tokens(
174 self, open_token: str, close_token: str, *, around: str
175 ) -> Generator[None, None, None]:
176 if self.check(open_token):
177 open_position = self.position
178 self.read()
179 else:
180 open_position = None
181
182 yield
183
184 if open_position is None:
185 return
186
187 if not self.check(close_token):
188 self.raise_syntax_error(
189 f"Expected matching {close_token} for {open_token}, after {around}",
190 span_start=open_position,
191 )
192
193 self.read()