Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/prompt_toolkit/lexers/pygments.py: 30%
113 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-20 06:09 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-20 06:09 +0000
1"""
2Adaptor classes for using Pygments lexers within prompt_toolkit.
4This includes syntax synchronization code, so that we don't have to start
5lexing at the beginning of a document, when displaying a very large text.
6"""
7from __future__ import annotations
9import re
10from abc import ABCMeta, abstractmethod
11from typing import TYPE_CHECKING, Callable, Dict, Generator, Iterable, Tuple
13from prompt_toolkit.document import Document
14from prompt_toolkit.filters import FilterOrBool, to_filter
15from prompt_toolkit.formatted_text.base import StyleAndTextTuples
16from prompt_toolkit.formatted_text.utils import split_lines
17from prompt_toolkit.styles.pygments import pygments_token_to_classname
19from .base import Lexer, SimpleLexer
21if TYPE_CHECKING:
22 from pygments.lexer import Lexer as PygmentsLexerCls
24__all__ = [
25 "PygmentsLexer",
26 "SyntaxSync",
27 "SyncFromStart",
28 "RegexSync",
29]
32class SyntaxSync(metaclass=ABCMeta):
33 """
34 Syntax synchronizer. This is a tool that finds a start position for the
35 lexer. This is especially important when editing big documents; we don't
36 want to start the highlighting by running the lexer from the beginning of
37 the file. That is very slow when editing.
38 """
40 @abstractmethod
41 def get_sync_start_position(
42 self, document: Document, lineno: int
43 ) -> tuple[int, int]:
44 """
45 Return the position from where we can start lexing as a (row, column)
46 tuple.
48 :param document: `Document` instance that contains all the lines.
49 :param lineno: The line that we want to highlight. (We need to return
50 this line, or an earlier position.)
51 """
54class SyncFromStart(SyntaxSync):
55 """
56 Always start the syntax highlighting from the beginning.
57 """
59 def get_sync_start_position(
60 self, document: Document, lineno: int
61 ) -> tuple[int, int]:
62 return 0, 0
65class RegexSync(SyntaxSync):
66 """
67 Synchronize by starting at a line that matches the given regex pattern.
68 """
70 # Never go more than this amount of lines backwards for synchronization.
71 # That would be too CPU intensive.
72 MAX_BACKWARDS = 500
74 # Start lexing at the start, if we are in the first 'n' lines and no
75 # synchronization position was found.
76 FROM_START_IF_NO_SYNC_POS_FOUND = 100
78 def __init__(self, pattern: str) -> None:
79 self._compiled_pattern = re.compile(pattern)
81 def get_sync_start_position(
82 self, document: Document, lineno: int
83 ) -> tuple[int, int]:
84 """
85 Scan backwards, and find a possible position to start.
86 """
87 pattern = self._compiled_pattern
88 lines = document.lines
90 # Scan upwards, until we find a point where we can start the syntax
91 # synchronization.
92 for i in range(lineno, max(-1, lineno - self.MAX_BACKWARDS), -1):
93 match = pattern.match(lines[i])
94 if match:
95 return i, match.start()
97 # No synchronization point found. If we aren't that far from the
98 # beginning, start at the very beginning, otherwise, just try to start
99 # at the current line.
100 if lineno < self.FROM_START_IF_NO_SYNC_POS_FOUND:
101 return 0, 0
102 else:
103 return lineno, 0
105 @classmethod
106 def from_pygments_lexer_cls(cls, lexer_cls: PygmentsLexerCls) -> RegexSync:
107 """
108 Create a :class:`.RegexSync` instance for this Pygments lexer class.
109 """
110 patterns = {
111 # For Python, start highlighting at any class/def block.
112 "Python": r"^\s*(class|def)\s+",
113 "Python 3": r"^\s*(class|def)\s+",
114 # For HTML, start at any open/close tag definition.
115 "HTML": r"<[/a-zA-Z]",
116 # For javascript, start at a function.
117 "JavaScript": r"\bfunction\b",
118 # TODO: Add definitions for other languages.
119 # By default, we start at every possible line.
120 }
121 p = patterns.get(lexer_cls.name, "^")
122 return cls(p)
125class _TokenCache(Dict[Tuple[str, ...], str]):
126 """
127 Cache that converts Pygments tokens into `prompt_toolkit` style objects.
129 ``Token.A.B.C`` will be converted into:
130 ``class:pygments,pygments.A,pygments.A.B,pygments.A.B.C``
131 """
133 def __missing__(self, key: tuple[str, ...]) -> str:
134 result = "class:" + pygments_token_to_classname(key)
135 self[key] = result
136 return result
139_token_cache = _TokenCache()
142class PygmentsLexer(Lexer):
143 """
144 Lexer that calls a pygments lexer.
146 Example::
148 from pygments.lexers.html import HtmlLexer
149 lexer = PygmentsLexer(HtmlLexer)
151 Note: Don't forget to also load a Pygments compatible style. E.g.::
153 from prompt_toolkit.styles.from_pygments import style_from_pygments_cls
154 from pygments.styles import get_style_by_name
155 style = style_from_pygments_cls(get_style_by_name('monokai'))
157 :param pygments_lexer_cls: A `Lexer` from Pygments.
158 :param sync_from_start: Start lexing at the start of the document. This
159 will always give the best results, but it will be slow for bigger
160 documents. (When the last part of the document is display, then the
161 whole document will be lexed by Pygments on every key stroke.) It is
162 recommended to disable this for inputs that are expected to be more
163 than 1,000 lines.
164 :param syntax_sync: `SyntaxSync` object.
165 """
167 # Minimum amount of lines to go backwards when starting the parser.
168 # This is important when the lines are retrieved in reverse order, or when
169 # scrolling upwards. (Due to the complexity of calculating the vertical
170 # scroll offset in the `Window` class, lines are not always retrieved in
171 # order.)
172 MIN_LINES_BACKWARDS = 50
174 # When a parser was started this amount of lines back, read the parser
175 # until we get the current line. Otherwise, start a new parser.
176 # (This should probably be bigger than MIN_LINES_BACKWARDS.)
177 REUSE_GENERATOR_MAX_DISTANCE = 100
179 def __init__(
180 self,
181 pygments_lexer_cls: type[PygmentsLexerCls],
182 sync_from_start: FilterOrBool = True,
183 syntax_sync: SyntaxSync | None = None,
184 ) -> None:
185 self.pygments_lexer_cls = pygments_lexer_cls
186 self.sync_from_start = to_filter(sync_from_start)
188 # Instantiate the Pygments lexer.
189 self.pygments_lexer = pygments_lexer_cls(
190 stripnl=False, stripall=False, ensurenl=False
191 )
193 # Create syntax sync instance.
194 self.syntax_sync = syntax_sync or RegexSync.from_pygments_lexer_cls(
195 pygments_lexer_cls
196 )
198 @classmethod
199 def from_filename(
200 cls, filename: str, sync_from_start: FilterOrBool = True
201 ) -> Lexer:
202 """
203 Create a `Lexer` from a filename.
204 """
205 # Inline imports: the Pygments dependency is optional!
206 from pygments.lexers import get_lexer_for_filename
207 from pygments.util import ClassNotFound
209 try:
210 pygments_lexer = get_lexer_for_filename(filename)
211 except ClassNotFound:
212 return SimpleLexer()
213 else:
214 return cls(pygments_lexer.__class__, sync_from_start=sync_from_start)
216 def lex_document(self, document: Document) -> Callable[[int], StyleAndTextTuples]:
217 """
218 Create a lexer function that takes a line number and returns the list
219 of (style_str, text) tuples as the Pygments lexer returns for that line.
220 """
221 LineGenerator = Generator[Tuple[int, StyleAndTextTuples], None, None]
223 # Cache of already lexed lines.
224 cache: dict[int, StyleAndTextTuples] = {}
226 # Pygments generators that are currently lexing.
227 # Map lexer generator to the line number.
228 line_generators: dict[LineGenerator, int] = {}
230 def get_syntax_sync() -> SyntaxSync:
231 "The Syntax synchronization object that we currently use."
232 if self.sync_from_start():
233 return SyncFromStart()
234 else:
235 return self.syntax_sync
237 def find_closest_generator(i: int) -> LineGenerator | None:
238 "Return a generator close to line 'i', or None if none was found."
239 for generator, lineno in line_generators.items():
240 if lineno < i and i - lineno < self.REUSE_GENERATOR_MAX_DISTANCE:
241 return generator
242 return None
244 def create_line_generator(start_lineno: int, column: int = 0) -> LineGenerator:
245 """
246 Create a generator that yields the lexed lines.
247 Each iteration it yields a (line_number, [(style_str, text), ...]) tuple.
248 """
250 def get_text_fragments() -> Iterable[tuple[str, str]]:
251 text = "\n".join(document.lines[start_lineno:])[column:]
253 # We call `get_text_fragments_unprocessed`, because `get_tokens` will
254 # still replace \r\n and \r by \n. (We don't want that,
255 # Pygments should return exactly the same amount of text, as we
256 # have given as input.)
257 for _, t, v in self.pygments_lexer.get_tokens_unprocessed(text):
258 # Turn Pygments `Token` object into prompt_toolkit style
259 # strings.
260 yield _token_cache[t], v
262 yield from enumerate(split_lines(list(get_text_fragments())), start_lineno)
264 def get_generator(i: int) -> LineGenerator:
265 """
266 Find an already started generator that is close, or create a new one.
267 """
268 # Find closest line generator.
269 generator = find_closest_generator(i)
270 if generator:
271 return generator
273 # No generator found. Determine starting point for the syntax
274 # synchronization first.
276 # Go at least x lines back. (Make scrolling upwards more
277 # efficient.)
278 i = max(0, i - self.MIN_LINES_BACKWARDS)
280 if i == 0:
281 row = 0
282 column = 0
283 else:
284 row, column = get_syntax_sync().get_sync_start_position(document, i)
286 # Find generator close to this point, or otherwise create a new one.
287 generator = find_closest_generator(i)
288 if generator:
289 return generator
290 else:
291 generator = create_line_generator(row, column)
293 # If the column is not 0, ignore the first line. (Which is
294 # incomplete. This happens when the synchronization algorithm tells
295 # us to start parsing in the middle of a line.)
296 if column:
297 next(generator)
298 row += 1
300 line_generators[generator] = row
301 return generator
303 def get_line(i: int) -> StyleAndTextTuples:
304 "Return the tokens for a given line number."
305 try:
306 return cache[i]
307 except KeyError:
308 generator = get_generator(i)
310 # Exhaust the generator, until we find the requested line.
311 for num, line in generator:
312 cache[num] = line
313 if num == i:
314 line_generators[generator] = i
316 # Remove the next item from the cache.
317 # (It could happen that it's already there, because of
318 # another generator that started filling these lines,
319 # but we want to synchronize these lines with the
320 # current lexer's state.)
321 if num + 1 in cache:
322 del cache[num + 1]
324 return cache[num]
325 return []
327 return get_line