Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/prompt_toolkit/lexers/pygments.py: 30%
113 statements
« prev ^ index » next coverage.py v7.2.2, created at 2023-03-26 06:07 +0000
« prev ^ index » next coverage.py v7.2.2, created at 2023-03-26 06:07 +0000
1"""
2Adaptor classes for using Pygments lexers within prompt_toolkit.
4This includes syntax synchronization code, so that we don't have to start
5lexing at the beginning of a document, when displaying a very large text.
6"""
7from __future__ import annotations
9import re
10from abc import ABCMeta, abstractmethod
11from typing import (
12 TYPE_CHECKING,
13 Callable,
14 Dict,
15 Generator,
16 Iterable,
17 Optional,
18 Tuple,
19 Type,
20)
22from prompt_toolkit.document import Document
23from prompt_toolkit.filters import FilterOrBool, to_filter
24from prompt_toolkit.formatted_text.base import StyleAndTextTuples
25from prompt_toolkit.formatted_text.utils import split_lines
26from prompt_toolkit.styles.pygments import pygments_token_to_classname
28from .base import Lexer, SimpleLexer
30if TYPE_CHECKING:
31 from pygments.lexer import Lexer as PygmentsLexerCls
33__all__ = [
34 "PygmentsLexer",
35 "SyntaxSync",
36 "SyncFromStart",
37 "RegexSync",
38]
41class SyntaxSync(metaclass=ABCMeta):
42 """
43 Syntax synchroniser. This is a tool that finds a start position for the
44 lexer. This is especially important when editing big documents; we don't
45 want to start the highlighting by running the lexer from the beginning of
46 the file. That is very slow when editing.
47 """
49 @abstractmethod
50 def get_sync_start_position(
51 self, document: Document, lineno: int
52 ) -> tuple[int, int]:
53 """
54 Return the position from where we can start lexing as a (row, column)
55 tuple.
57 :param document: `Document` instance that contains all the lines.
58 :param lineno: The line that we want to highlight. (We need to return
59 this line, or an earlier position.)
60 """
63class SyncFromStart(SyntaxSync):
64 """
65 Always start the syntax highlighting from the beginning.
66 """
68 def get_sync_start_position(
69 self, document: Document, lineno: int
70 ) -> tuple[int, int]:
71 return 0, 0
74class RegexSync(SyntaxSync):
75 """
76 Synchronize by starting at a line that matches the given regex pattern.
77 """
79 # Never go more than this amount of lines backwards for synchronisation.
80 # That would be too CPU intensive.
81 MAX_BACKWARDS = 500
83 # Start lexing at the start, if we are in the first 'n' lines and no
84 # synchronisation position was found.
85 FROM_START_IF_NO_SYNC_POS_FOUND = 100
87 def __init__(self, pattern: str) -> None:
88 self._compiled_pattern = re.compile(pattern)
90 def get_sync_start_position(
91 self, document: Document, lineno: int
92 ) -> tuple[int, int]:
93 """
94 Scan backwards, and find a possible position to start.
95 """
96 pattern = self._compiled_pattern
97 lines = document.lines
99 # Scan upwards, until we find a point where we can start the syntax
100 # synchronisation.
101 for i in range(lineno, max(-1, lineno - self.MAX_BACKWARDS), -1):
102 match = pattern.match(lines[i])
103 if match:
104 return i, match.start()
106 # No synchronisation point found. If we aren't that far from the
107 # beginning, start at the very beginning, otherwise, just try to start
108 # at the current line.
109 if lineno < self.FROM_START_IF_NO_SYNC_POS_FOUND:
110 return 0, 0
111 else:
112 return lineno, 0
114 @classmethod
115 def from_pygments_lexer_cls(cls, lexer_cls: PygmentsLexerCls) -> RegexSync:
116 """
117 Create a :class:`.RegexSync` instance for this Pygments lexer class.
118 """
119 patterns = {
120 # For Python, start highlighting at any class/def block.
121 "Python": r"^\s*(class|def)\s+",
122 "Python 3": r"^\s*(class|def)\s+",
123 # For HTML, start at any open/close tag definition.
124 "HTML": r"<[/a-zA-Z]",
125 # For javascript, start at a function.
126 "JavaScript": r"\bfunction\b"
127 # TODO: Add definitions for other languages.
128 # By default, we start at every possible line.
129 }
130 p = patterns.get(lexer_cls.name, "^")
131 return cls(p)
134class _TokenCache(Dict[Tuple[str, ...], str]):
135 """
136 Cache that converts Pygments tokens into `prompt_toolkit` style objects.
138 ``Token.A.B.C`` will be converted into:
139 ``class:pygments,pygments.A,pygments.A.B,pygments.A.B.C``
140 """
142 def __missing__(self, key: tuple[str, ...]) -> str:
143 result = "class:" + pygments_token_to_classname(key)
144 self[key] = result
145 return result
148_token_cache = _TokenCache()
151class PygmentsLexer(Lexer):
152 """
153 Lexer that calls a pygments lexer.
155 Example::
157 from pygments.lexers.html import HtmlLexer
158 lexer = PygmentsLexer(HtmlLexer)
160 Note: Don't forget to also load a Pygments compatible style. E.g.::
162 from prompt_toolkit.styles.from_pygments import style_from_pygments_cls
163 from pygments.styles import get_style_by_name
164 style = style_from_pygments_cls(get_style_by_name('monokai'))
166 :param pygments_lexer_cls: A `Lexer` from Pygments.
167 :param sync_from_start: Start lexing at the start of the document. This
168 will always give the best results, but it will be slow for bigger
169 documents. (When the last part of the document is display, then the
170 whole document will be lexed by Pygments on every key stroke.) It is
171 recommended to disable this for inputs that are expected to be more
172 than 1,000 lines.
173 :param syntax_sync: `SyntaxSync` object.
174 """
176 # Minimum amount of lines to go backwards when starting the parser.
177 # This is important when the lines are retrieved in reverse order, or when
178 # scrolling upwards. (Due to the complexity of calculating the vertical
179 # scroll offset in the `Window` class, lines are not always retrieved in
180 # order.)
181 MIN_LINES_BACKWARDS = 50
183 # When a parser was started this amount of lines back, read the parser
184 # until we get the current line. Otherwise, start a new parser.
185 # (This should probably be bigger than MIN_LINES_BACKWARDS.)
186 REUSE_GENERATOR_MAX_DISTANCE = 100
188 def __init__(
189 self,
190 pygments_lexer_cls: type[PygmentsLexerCls],
191 sync_from_start: FilterOrBool = True,
192 syntax_sync: SyntaxSync | None = None,
193 ) -> None:
194 self.pygments_lexer_cls = pygments_lexer_cls
195 self.sync_from_start = to_filter(sync_from_start)
197 # Instantiate the Pygments lexer.
198 self.pygments_lexer = pygments_lexer_cls(
199 stripnl=False, stripall=False, ensurenl=False
200 )
202 # Create syntax sync instance.
203 self.syntax_sync = syntax_sync or RegexSync.from_pygments_lexer_cls(
204 pygments_lexer_cls
205 )
207 @classmethod
208 def from_filename(
209 cls, filename: str, sync_from_start: FilterOrBool = True
210 ) -> Lexer:
211 """
212 Create a `Lexer` from a filename.
213 """
214 # Inline imports: the Pygments dependency is optional!
215 from pygments.lexers import get_lexer_for_filename
216 from pygments.util import ClassNotFound
218 try:
219 pygments_lexer = get_lexer_for_filename(filename)
220 except ClassNotFound:
221 return SimpleLexer()
222 else:
223 return cls(pygments_lexer.__class__, sync_from_start=sync_from_start)
225 def lex_document(self, document: Document) -> Callable[[int], StyleAndTextTuples]:
226 """
227 Create a lexer function that takes a line number and returns the list
228 of (style_str, text) tuples as the Pygments lexer returns for that line.
229 """
230 LineGenerator = Generator[Tuple[int, StyleAndTextTuples], None, None]
232 # Cache of already lexed lines.
233 cache: dict[int, StyleAndTextTuples] = {}
235 # Pygments generators that are currently lexing.
236 # Map lexer generator to the line number.
237 line_generators: dict[LineGenerator, int] = {}
239 def get_syntax_sync() -> SyntaxSync:
240 "The Syntax synchronisation object that we currently use."
241 if self.sync_from_start():
242 return SyncFromStart()
243 else:
244 return self.syntax_sync
246 def find_closest_generator(i: int) -> LineGenerator | None:
247 "Return a generator close to line 'i', or None if none was found."
248 for generator, lineno in line_generators.items():
249 if lineno < i and i - lineno < self.REUSE_GENERATOR_MAX_DISTANCE:
250 return generator
251 return None
253 def create_line_generator(start_lineno: int, column: int = 0) -> LineGenerator:
254 """
255 Create a generator that yields the lexed lines.
256 Each iteration it yields a (line_number, [(style_str, text), ...]) tuple.
257 """
259 def get_text_fragments() -> Iterable[tuple[str, str]]:
260 text = "\n".join(document.lines[start_lineno:])[column:]
262 # We call `get_text_fragments_unprocessed`, because `get_tokens` will
263 # still replace \r\n and \r by \n. (We don't want that,
264 # Pygments should return exactly the same amount of text, as we
265 # have given as input.)
266 for _, t, v in self.pygments_lexer.get_tokens_unprocessed(text):
267 # Turn Pygments `Token` object into prompt_toolkit style
268 # strings.
269 yield _token_cache[t], v
271 yield from enumerate(split_lines(list(get_text_fragments())), start_lineno)
273 def get_generator(i: int) -> LineGenerator:
274 """
275 Find an already started generator that is close, or create a new one.
276 """
277 # Find closest line generator.
278 generator = find_closest_generator(i)
279 if generator:
280 return generator
282 # No generator found. Determine starting point for the syntax
283 # synchronisation first.
285 # Go at least x lines back. (Make scrolling upwards more
286 # efficient.)
287 i = max(0, i - self.MIN_LINES_BACKWARDS)
289 if i == 0:
290 row = 0
291 column = 0
292 else:
293 row, column = get_syntax_sync().get_sync_start_position(document, i)
295 # Find generator close to this point, or otherwise create a new one.
296 generator = find_closest_generator(i)
297 if generator:
298 return generator
299 else:
300 generator = create_line_generator(row, column)
302 # If the column is not 0, ignore the first line. (Which is
303 # incomplete. This happens when the synchronisation algorithm tells
304 # us to start parsing in the middle of a line.)
305 if column:
306 next(generator)
307 row += 1
309 line_generators[generator] = row
310 return generator
312 def get_line(i: int) -> StyleAndTextTuples:
313 "Return the tokens for a given line number."
314 try:
315 return cache[i]
316 except KeyError:
317 generator = get_generator(i)
319 # Exhaust the generator, until we find the requested line.
320 for num, line in generator:
321 cache[num] = line
322 if num == i:
323 line_generators[generator] = i
325 # Remove the next item from the cache.
326 # (It could happen that it's already there, because of
327 # another generator that started filling these lines,
328 # but we want to synchronise these lines with the
329 # current lexer's state.)
330 if num + 1 in cache:
331 del cache[num + 1]
333 return cache[num]
334 return []
336 return get_line