1""" 
    2Adaptor classes for using Pygments lexers within prompt_toolkit. 
    3 
    4This includes syntax synchronization code, so that we don't have to start 
    5lexing at the beginning of a document, when displaying a very large text. 
    6""" 
    7 
    8from __future__ import annotations 
    9 
    10import re 
    11from abc import ABCMeta, abstractmethod 
    12from typing import TYPE_CHECKING, Callable, Dict, Generator, Iterable, Tuple 
    13 
    14from prompt_toolkit.document import Document 
    15from prompt_toolkit.filters import FilterOrBool, to_filter 
    16from prompt_toolkit.formatted_text.base import StyleAndTextTuples 
    17from prompt_toolkit.formatted_text.utils import split_lines 
    18from prompt_toolkit.styles.pygments import pygments_token_to_classname 
    19 
    20from .base import Lexer, SimpleLexer 
    21 
    22if TYPE_CHECKING: 
    23    from pygments.lexer import Lexer as PygmentsLexerCls 
    24 
    25__all__ = [ 
    26    "PygmentsLexer", 
    27    "SyntaxSync", 
    28    "SyncFromStart", 
    29    "RegexSync", 
    30] 
    31 
    32 
    33class SyntaxSync(metaclass=ABCMeta): 
    34    """ 
    35    Syntax synchronizer. This is a tool that finds a start position for the 
    36    lexer. This is especially important when editing big documents; we don't 
    37    want to start the highlighting by running the lexer from the beginning of 
    38    the file. That is very slow when editing. 
    39    """ 
    40 
    41    @abstractmethod 
    42    def get_sync_start_position( 
    43        self, document: Document, lineno: int 
    44    ) -> tuple[int, int]: 
    45        """ 
    46        Return the position from where we can start lexing as a (row, column) 
    47        tuple. 
    48 
    49        :param document: `Document` instance that contains all the lines. 
    50        :param lineno: The line that we want to highlight. (We need to return 
    51            this line, or an earlier position.) 
    52        """ 
    53 
    54 
    55class SyncFromStart(SyntaxSync): 
    56    """ 
    57    Always start the syntax highlighting from the beginning. 
    58    """ 
    59 
    60    def get_sync_start_position( 
    61        self, document: Document, lineno: int 
    62    ) -> tuple[int, int]: 
    63        return 0, 0 
    64 
    65 
    66class RegexSync(SyntaxSync): 
    67    """ 
    68    Synchronize by starting at a line that matches the given regex pattern. 
    69    """ 
    70 
    71    # Never go more than this amount of lines backwards for synchronization. 
    72    # That would be too CPU intensive. 
    73    MAX_BACKWARDS = 500 
    74 
    75    # Start lexing at the start, if we are in the first 'n' lines and no 
    76    # synchronization position was found. 
    77    FROM_START_IF_NO_SYNC_POS_FOUND = 100 
    78 
    79    def __init__(self, pattern: str) -> None: 
    80        self._compiled_pattern = re.compile(pattern) 
    81 
    82    def get_sync_start_position( 
    83        self, document: Document, lineno: int 
    84    ) -> tuple[int, int]: 
    85        """ 
    86        Scan backwards, and find a possible position to start. 
    87        """ 
    88        pattern = self._compiled_pattern 
    89        lines = document.lines 
    90 
    91        # Scan upwards, until we find a point where we can start the syntax 
    92        # synchronization. 
    93        for i in range(lineno, max(-1, lineno - self.MAX_BACKWARDS), -1): 
    94            match = pattern.match(lines[i]) 
    95            if match: 
    96                return i, match.start() 
    97 
    98        # No synchronization point found. If we aren't that far from the 
    99        # beginning, start at the very beginning, otherwise, just try to start 
    100        # at the current line. 
    101        if lineno < self.FROM_START_IF_NO_SYNC_POS_FOUND: 
    102            return 0, 0 
    103        else: 
    104            return lineno, 0 
    105 
    106    @classmethod 
    107    def from_pygments_lexer_cls(cls, lexer_cls: type[PygmentsLexerCls]) -> RegexSync: 
    108        """ 
    109        Create a :class:`.RegexSync` instance for this Pygments lexer class. 
    110        """ 
    111        patterns = { 
    112            # For Python, start highlighting at any class/def block. 
    113            "Python": r"^\s*(class|def)\s+", 
    114            "Python 3": r"^\s*(class|def)\s+", 
    115            # For HTML, start at any open/close tag definition. 
    116            "HTML": r"<[/a-zA-Z]", 
    117            # For javascript, start at a function. 
    118            "JavaScript": r"\bfunction\b", 
    119            # TODO: Add definitions for other languages. 
    120            #       By default, we start at every possible line. 
    121        } 
    122        p = patterns.get(lexer_cls.name, "^") 
    123        return cls(p) 
    124 
    125 
    126class _TokenCache(Dict[Tuple[str, ...], str]): 
    127    """ 
    128    Cache that converts Pygments tokens into `prompt_toolkit` style objects. 
    129 
    130    ``Token.A.B.C`` will be converted into: 
    131    ``class:pygments,pygments.A,pygments.A.B,pygments.A.B.C`` 
    132    """ 
    133 
    134    def __missing__(self, key: tuple[str, ...]) -> str: 
    135        result = "class:" + pygments_token_to_classname(key) 
    136        self[key] = result 
    137        return result 
    138 
    139 
    140_token_cache = _TokenCache() 
    141 
    142 
    143class PygmentsLexer(Lexer): 
    144    """ 
    145    Lexer that calls a pygments lexer. 
    146 
    147    Example:: 
    148 
    149        from pygments.lexers.html import HtmlLexer 
    150        lexer = PygmentsLexer(HtmlLexer) 
    151 
    152    Note: Don't forget to also load a Pygments compatible style. E.g.:: 
    153 
    154        from prompt_toolkit.styles.from_pygments import style_from_pygments_cls 
    155        from pygments.styles import get_style_by_name 
    156        style = style_from_pygments_cls(get_style_by_name('monokai')) 
    157 
    158    :param pygments_lexer_cls: A `Lexer` from Pygments. 
    159    :param sync_from_start: Start lexing at the start of the document. This 
    160        will always give the best results, but it will be slow for bigger 
    161        documents. (When the last part of the document is display, then the 
    162        whole document will be lexed by Pygments on every key stroke.) It is 
    163        recommended to disable this for inputs that are expected to be more 
    164        than 1,000 lines. 
    165    :param syntax_sync: `SyntaxSync` object. 
    166    """ 
    167 
    168    # Minimum amount of lines to go backwards when starting the parser. 
    169    # This is important when the lines are retrieved in reverse order, or when 
    170    # scrolling upwards. (Due to the complexity of calculating the vertical 
    171    # scroll offset in the `Window` class, lines are not always retrieved in 
    172    # order.) 
    173    MIN_LINES_BACKWARDS = 50 
    174 
    175    # When a parser was started this amount of lines back, read the parser 
    176    # until we get the current line. Otherwise, start a new parser. 
    177    # (This should probably be bigger than MIN_LINES_BACKWARDS.) 
    178    REUSE_GENERATOR_MAX_DISTANCE = 100 
    179 
    180    def __init__( 
    181        self, 
    182        pygments_lexer_cls: type[PygmentsLexerCls], 
    183        sync_from_start: FilterOrBool = True, 
    184        syntax_sync: SyntaxSync | None = None, 
    185    ) -> None: 
    186        self.pygments_lexer_cls = pygments_lexer_cls 
    187        self.sync_from_start = to_filter(sync_from_start) 
    188 
    189        # Instantiate the Pygments lexer. 
    190        self.pygments_lexer = pygments_lexer_cls( 
    191            stripnl=False, stripall=False, ensurenl=False 
    192        ) 
    193 
    194        # Create syntax sync instance. 
    195        self.syntax_sync = syntax_sync or RegexSync.from_pygments_lexer_cls( 
    196            pygments_lexer_cls 
    197        ) 
    198 
    199    @classmethod 
    200    def from_filename( 
    201        cls, filename: str, sync_from_start: FilterOrBool = True 
    202    ) -> Lexer: 
    203        """ 
    204        Create a `Lexer` from a filename. 
    205        """ 
    206        # Inline imports: the Pygments dependency is optional! 
    207        from pygments.lexers import get_lexer_for_filename 
    208        from pygments.util import ClassNotFound 
    209 
    210        try: 
    211            pygments_lexer = get_lexer_for_filename(filename) 
    212        except ClassNotFound: 
    213            return SimpleLexer() 
    214        else: 
    215            return cls(pygments_lexer.__class__, sync_from_start=sync_from_start) 
    216 
    217    def lex_document(self, document: Document) -> Callable[[int], StyleAndTextTuples]: 
    218        """ 
    219        Create a lexer function that takes a line number and returns the list 
    220        of (style_str, text) tuples as the Pygments lexer returns for that line. 
    221        """ 
    222        LineGenerator = Generator[Tuple[int, StyleAndTextTuples], None, None] 
    223 
    224        # Cache of already lexed lines. 
    225        cache: dict[int, StyleAndTextTuples] = {} 
    226 
    227        # Pygments generators that are currently lexing. 
    228        # Map lexer generator to the line number. 
    229        line_generators: dict[LineGenerator, int] = {} 
    230 
    231        def get_syntax_sync() -> SyntaxSync: 
    232            "The Syntax synchronization object that we currently use." 
    233            if self.sync_from_start(): 
    234                return SyncFromStart() 
    235            else: 
    236                return self.syntax_sync 
    237 
    238        def find_closest_generator(i: int) -> LineGenerator | None: 
    239            "Return a generator close to line 'i', or None if none was found." 
    240            for generator, lineno in line_generators.items(): 
    241                if lineno < i and i - lineno < self.REUSE_GENERATOR_MAX_DISTANCE: 
    242                    return generator 
    243            return None 
    244 
    245        def create_line_generator(start_lineno: int, column: int = 0) -> LineGenerator: 
    246            """ 
    247            Create a generator that yields the lexed lines. 
    248            Each iteration it yields a (line_number, [(style_str, text), ...]) tuple. 
    249            """ 
    250 
    251            def get_text_fragments() -> Iterable[tuple[str, str]]: 
    252                text = "\n".join(document.lines[start_lineno:])[column:] 
    253 
    254                # We call `get_text_fragments_unprocessed`, because `get_tokens` will 
    255                # still replace \r\n and \r by \n.  (We don't want that, 
    256                # Pygments should return exactly the same amount of text, as we 
    257                # have given as input.) 
    258                for _, t, v in self.pygments_lexer.get_tokens_unprocessed(text): 
    259                    # Turn Pygments `Token` object into prompt_toolkit style 
    260                    # strings. 
    261                    yield _token_cache[t], v 
    262 
    263            yield from enumerate(split_lines(list(get_text_fragments())), start_lineno) 
    264 
    265        def get_generator(i: int) -> LineGenerator: 
    266            """ 
    267            Find an already started generator that is close, or create a new one. 
    268            """ 
    269            # Find closest line generator. 
    270            generator = find_closest_generator(i) 
    271            if generator: 
    272                return generator 
    273 
    274            # No generator found. Determine starting point for the syntax 
    275            # synchronization first. 
    276 
    277            # Go at least x lines back. (Make scrolling upwards more 
    278            # efficient.) 
    279            i = max(0, i - self.MIN_LINES_BACKWARDS) 
    280 
    281            if i == 0: 
    282                row = 0 
    283                column = 0 
    284            else: 
    285                row, column = get_syntax_sync().get_sync_start_position(document, i) 
    286 
    287            # Find generator close to this point, or otherwise create a new one. 
    288            generator = find_closest_generator(i) 
    289            if generator: 
    290                return generator 
    291            else: 
    292                generator = create_line_generator(row, column) 
    293 
    294            # If the column is not 0, ignore the first line. (Which is 
    295            # incomplete. This happens when the synchronization algorithm tells 
    296            # us to start parsing in the middle of a line.) 
    297            if column: 
    298                next(generator) 
    299                row += 1 
    300 
    301            line_generators[generator] = row 
    302            return generator 
    303 
    304        def get_line(i: int) -> StyleAndTextTuples: 
    305            "Return the tokens for a given line number." 
    306            try: 
    307                return cache[i] 
    308            except KeyError: 
    309                generator = get_generator(i) 
    310 
    311                # Exhaust the generator, until we find the requested line. 
    312                for num, line in generator: 
    313                    cache[num] = line 
    314                    if num == i: 
    315                        line_generators[generator] = i 
    316 
    317                        # Remove the next item from the cache. 
    318                        # (It could happen that it's already there, because of 
    319                        # another generator that started filling these lines, 
    320                        # but we want to synchronize these lines with the 
    321                        # current lexer's state.) 
    322                        if num + 1 in cache: 
    323                            del cache[num + 1] 
    324 
    325                        return cache[num] 
    326            return [] 
    327 
    328        return get_line