1"""
2Adaptor classes for using Pygments lexers within prompt_toolkit.
3
4This includes syntax synchronization code, so that we don't have to start
5lexing at the beginning of a document, when displaying a very large text.
6"""
7
8from __future__ import annotations
9
10import re
11from abc import ABCMeta, abstractmethod
12from typing import TYPE_CHECKING, Callable, Dict, Generator, Iterable, Tuple
13
14from prompt_toolkit.document import Document
15from prompt_toolkit.filters import FilterOrBool, to_filter
16from prompt_toolkit.formatted_text.base import StyleAndTextTuples
17from prompt_toolkit.formatted_text.utils import split_lines
18from prompt_toolkit.styles.pygments import pygments_token_to_classname
19
20from .base import Lexer, SimpleLexer
21
22if TYPE_CHECKING:
23 from pygments.lexer import Lexer as PygmentsLexerCls
24
25__all__ = [
26 "PygmentsLexer",
27 "SyntaxSync",
28 "SyncFromStart",
29 "RegexSync",
30]
31
32
33class SyntaxSync(metaclass=ABCMeta):
34 """
35 Syntax synchronizer. This is a tool that finds a start position for the
36 lexer. This is especially important when editing big documents; we don't
37 want to start the highlighting by running the lexer from the beginning of
38 the file. That is very slow when editing.
39 """
40
41 @abstractmethod
42 def get_sync_start_position(
43 self, document: Document, lineno: int
44 ) -> tuple[int, int]:
45 """
46 Return the position from where we can start lexing as a (row, column)
47 tuple.
48
49 :param document: `Document` instance that contains all the lines.
50 :param lineno: The line that we want to highlight. (We need to return
51 this line, or an earlier position.)
52 """
53
54
55class SyncFromStart(SyntaxSync):
56 """
57 Always start the syntax highlighting from the beginning.
58 """
59
60 def get_sync_start_position(
61 self, document: Document, lineno: int
62 ) -> tuple[int, int]:
63 return 0, 0
64
65
66class RegexSync(SyntaxSync):
67 """
68 Synchronize by starting at a line that matches the given regex pattern.
69 """
70
71 # Never go more than this amount of lines backwards for synchronization.
72 # That would be too CPU intensive.
73 MAX_BACKWARDS = 500
74
75 # Start lexing at the start, if we are in the first 'n' lines and no
76 # synchronization position was found.
77 FROM_START_IF_NO_SYNC_POS_FOUND = 100
78
79 def __init__(self, pattern: str) -> None:
80 self._compiled_pattern = re.compile(pattern)
81
82 def get_sync_start_position(
83 self, document: Document, lineno: int
84 ) -> tuple[int, int]:
85 """
86 Scan backwards, and find a possible position to start.
87 """
88 pattern = self._compiled_pattern
89 lines = document.lines
90
91 # Scan upwards, until we find a point where we can start the syntax
92 # synchronization.
93 for i in range(lineno, max(-1, lineno - self.MAX_BACKWARDS), -1):
94 match = pattern.match(lines[i])
95 if match:
96 return i, match.start()
97
98 # No synchronization point found. If we aren't that far from the
99 # beginning, start at the very beginning, otherwise, just try to start
100 # at the current line.
101 if lineno < self.FROM_START_IF_NO_SYNC_POS_FOUND:
102 return 0, 0
103 else:
104 return lineno, 0
105
106 @classmethod
107 def from_pygments_lexer_cls(cls, lexer_cls: PygmentsLexerCls) -> RegexSync:
108 """
109 Create a :class:`.RegexSync` instance for this Pygments lexer class.
110 """
111 patterns = {
112 # For Python, start highlighting at any class/def block.
113 "Python": r"^\s*(class|def)\s+",
114 "Python 3": r"^\s*(class|def)\s+",
115 # For HTML, start at any open/close tag definition.
116 "HTML": r"<[/a-zA-Z]",
117 # For javascript, start at a function.
118 "JavaScript": r"\bfunction\b",
119 # TODO: Add definitions for other languages.
120 # By default, we start at every possible line.
121 }
122 p = patterns.get(lexer_cls.name, "^")
123 return cls(p)
124
125
126class _TokenCache(Dict[Tuple[str, ...], str]):
127 """
128 Cache that converts Pygments tokens into `prompt_toolkit` style objects.
129
130 ``Token.A.B.C`` will be converted into:
131 ``class:pygments,pygments.A,pygments.A.B,pygments.A.B.C``
132 """
133
134 def __missing__(self, key: tuple[str, ...]) -> str:
135 result = "class:" + pygments_token_to_classname(key)
136 self[key] = result
137 return result
138
139
140_token_cache = _TokenCache()
141
142
143class PygmentsLexer(Lexer):
144 """
145 Lexer that calls a pygments lexer.
146
147 Example::
148
149 from pygments.lexers.html import HtmlLexer
150 lexer = PygmentsLexer(HtmlLexer)
151
152 Note: Don't forget to also load a Pygments compatible style. E.g.::
153
154 from prompt_toolkit.styles.from_pygments import style_from_pygments_cls
155 from pygments.styles import get_style_by_name
156 style = style_from_pygments_cls(get_style_by_name('monokai'))
157
158 :param pygments_lexer_cls: A `Lexer` from Pygments.
159 :param sync_from_start: Start lexing at the start of the document. This
160 will always give the best results, but it will be slow for bigger
161 documents. (When the last part of the document is display, then the
162 whole document will be lexed by Pygments on every key stroke.) It is
163 recommended to disable this for inputs that are expected to be more
164 than 1,000 lines.
165 :param syntax_sync: `SyntaxSync` object.
166 """
167
168 # Minimum amount of lines to go backwards when starting the parser.
169 # This is important when the lines are retrieved in reverse order, or when
170 # scrolling upwards. (Due to the complexity of calculating the vertical
171 # scroll offset in the `Window` class, lines are not always retrieved in
172 # order.)
173 MIN_LINES_BACKWARDS = 50
174
175 # When a parser was started this amount of lines back, read the parser
176 # until we get the current line. Otherwise, start a new parser.
177 # (This should probably be bigger than MIN_LINES_BACKWARDS.)
178 REUSE_GENERATOR_MAX_DISTANCE = 100
179
180 def __init__(
181 self,
182 pygments_lexer_cls: type[PygmentsLexerCls],
183 sync_from_start: FilterOrBool = True,
184 syntax_sync: SyntaxSync | None = None,
185 ) -> None:
186 self.pygments_lexer_cls = pygments_lexer_cls
187 self.sync_from_start = to_filter(sync_from_start)
188
189 # Instantiate the Pygments lexer.
190 self.pygments_lexer = pygments_lexer_cls(
191 stripnl=False, stripall=False, ensurenl=False
192 )
193
194 # Create syntax sync instance.
195 self.syntax_sync = syntax_sync or RegexSync.from_pygments_lexer_cls(
196 pygments_lexer_cls
197 )
198
199 @classmethod
200 def from_filename(
201 cls, filename: str, sync_from_start: FilterOrBool = True
202 ) -> Lexer:
203 """
204 Create a `Lexer` from a filename.
205 """
206 # Inline imports: the Pygments dependency is optional!
207 from pygments.lexers import get_lexer_for_filename
208 from pygments.util import ClassNotFound
209
210 try:
211 pygments_lexer = get_lexer_for_filename(filename)
212 except ClassNotFound:
213 return SimpleLexer()
214 else:
215 return cls(pygments_lexer.__class__, sync_from_start=sync_from_start)
216
217 def lex_document(self, document: Document) -> Callable[[int], StyleAndTextTuples]:
218 """
219 Create a lexer function that takes a line number and returns the list
220 of (style_str, text) tuples as the Pygments lexer returns for that line.
221 """
222 LineGenerator = Generator[Tuple[int, StyleAndTextTuples], None, None]
223
224 # Cache of already lexed lines.
225 cache: dict[int, StyleAndTextTuples] = {}
226
227 # Pygments generators that are currently lexing.
228 # Map lexer generator to the line number.
229 line_generators: dict[LineGenerator, int] = {}
230
231 def get_syntax_sync() -> SyntaxSync:
232 "The Syntax synchronization object that we currently use."
233 if self.sync_from_start():
234 return SyncFromStart()
235 else:
236 return self.syntax_sync
237
238 def find_closest_generator(i: int) -> LineGenerator | None:
239 "Return a generator close to line 'i', or None if none was found."
240 for generator, lineno in line_generators.items():
241 if lineno < i and i - lineno < self.REUSE_GENERATOR_MAX_DISTANCE:
242 return generator
243 return None
244
245 def create_line_generator(start_lineno: int, column: int = 0) -> LineGenerator:
246 """
247 Create a generator that yields the lexed lines.
248 Each iteration it yields a (line_number, [(style_str, text), ...]) tuple.
249 """
250
251 def get_text_fragments() -> Iterable[tuple[str, str]]:
252 text = "\n".join(document.lines[start_lineno:])[column:]
253
254 # We call `get_text_fragments_unprocessed`, because `get_tokens` will
255 # still replace \r\n and \r by \n. (We don't want that,
256 # Pygments should return exactly the same amount of text, as we
257 # have given as input.)
258 for _, t, v in self.pygments_lexer.get_tokens_unprocessed(text):
259 # Turn Pygments `Token` object into prompt_toolkit style
260 # strings.
261 yield _token_cache[t], v
262
263 yield from enumerate(split_lines(list(get_text_fragments())), start_lineno)
264
265 def get_generator(i: int) -> LineGenerator:
266 """
267 Find an already started generator that is close, or create a new one.
268 """
269 # Find closest line generator.
270 generator = find_closest_generator(i)
271 if generator:
272 return generator
273
274 # No generator found. Determine starting point for the syntax
275 # synchronization first.
276
277 # Go at least x lines back. (Make scrolling upwards more
278 # efficient.)
279 i = max(0, i - self.MIN_LINES_BACKWARDS)
280
281 if i == 0:
282 row = 0
283 column = 0
284 else:
285 row, column = get_syntax_sync().get_sync_start_position(document, i)
286
287 # Find generator close to this point, or otherwise create a new one.
288 generator = find_closest_generator(i)
289 if generator:
290 return generator
291 else:
292 generator = create_line_generator(row, column)
293
294 # If the column is not 0, ignore the first line. (Which is
295 # incomplete. This happens when the synchronization algorithm tells
296 # us to start parsing in the middle of a line.)
297 if column:
298 next(generator)
299 row += 1
300
301 line_generators[generator] = row
302 return generator
303
304 def get_line(i: int) -> StyleAndTextTuples:
305 "Return the tokens for a given line number."
306 try:
307 return cache[i]
308 except KeyError:
309 generator = get_generator(i)
310
311 # Exhaust the generator, until we find the requested line.
312 for num, line in generator:
313 cache[num] = line
314 if num == i:
315 line_generators[generator] = i
316
317 # Remove the next item from the cache.
318 # (It could happen that it's already there, because of
319 # another generator that started filling these lines,
320 # but we want to synchronize these lines with the
321 # current lexer's state.)
322 if num + 1 in cache:
323 del cache[num + 1]
324
325 return cache[num]
326 return []
327
328 return get_line