Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/prompt_toolkit/lexers/pygments.py: 31%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

115 statements  

1""" 

2Adaptor classes for using Pygments lexers within prompt_toolkit. 

3 

4This includes syntax synchronization code, so that we don't have to start 

5lexing at the beginning of a document, when displaying a very large text. 

6""" 

7 

8from __future__ import annotations 

9 

10import re 

11from abc import ABCMeta, abstractmethod 

12from collections.abc import Callable, Generator, Iterable 

13from typing import TYPE_CHECKING 

14 

15from prompt_toolkit.document import Document 

16from prompt_toolkit.filters import FilterOrBool, to_filter 

17from prompt_toolkit.formatted_text.base import StyleAndTextTuples 

18from prompt_toolkit.formatted_text.utils import split_lines 

19from prompt_toolkit.styles.pygments import pygments_token_to_classname 

20 

21from .base import Lexer, SimpleLexer 

22 

23if TYPE_CHECKING: 

24 from pygments.lexer import Lexer as PygmentsLexerCls 

25 

26__all__ = [ 

27 "PygmentsLexer", 

28 "SyntaxSync", 

29 "SyncFromStart", 

30 "RegexSync", 

31] 

32 

33 

34class SyntaxSync(metaclass=ABCMeta): 

35 """ 

36 Syntax synchronizer. This is a tool that finds a start position for the 

37 lexer. This is especially important when editing big documents; we don't 

38 want to start the highlighting by running the lexer from the beginning of 

39 the file. That is very slow when editing. 

40 """ 

41 

42 @abstractmethod 

43 def get_sync_start_position( 

44 self, document: Document, lineno: int 

45 ) -> tuple[int, int]: 

46 """ 

47 Return the position from where we can start lexing as a (row, column) 

48 tuple. 

49 

50 :param document: `Document` instance that contains all the lines. 

51 :param lineno: The line that we want to highlight. (We need to return 

52 this line, or an earlier position.) 

53 """ 

54 

55 

56class SyncFromStart(SyntaxSync): 

57 """ 

58 Always start the syntax highlighting from the beginning. 

59 """ 

60 

61 def get_sync_start_position( 

62 self, document: Document, lineno: int 

63 ) -> tuple[int, int]: 

64 return 0, 0 

65 

66 

67class RegexSync(SyntaxSync): 

68 """ 

69 Synchronize by starting at a line that matches the given regex pattern. 

70 """ 

71 

72 # Never go more than this amount of lines backwards for synchronization. 

73 # That would be too CPU intensive. 

74 MAX_BACKWARDS = 500 

75 

76 # Start lexing at the start, if we are in the first 'n' lines and no 

77 # synchronization position was found. 

78 FROM_START_IF_NO_SYNC_POS_FOUND = 100 

79 

80 def __init__(self, pattern: str) -> None: 

81 self._compiled_pattern = re.compile(pattern) 

82 

83 def get_sync_start_position( 

84 self, document: Document, lineno: int 

85 ) -> tuple[int, int]: 

86 """ 

87 Scan backwards, and find a possible position to start. 

88 """ 

89 pattern = self._compiled_pattern 

90 lines = document.lines 

91 

92 # Scan upwards, until we find a point where we can start the syntax 

93 # synchronization. 

94 for i in range(lineno, max(-1, lineno - self.MAX_BACKWARDS), -1): 

95 match = pattern.match(lines[i]) 

96 if match: 

97 return i, match.start() 

98 

99 # No synchronization point found. If we aren't that far from the 

100 # beginning, start at the very beginning, otherwise, just try to start 

101 # at the current line. 

102 if lineno < self.FROM_START_IF_NO_SYNC_POS_FOUND: 

103 return 0, 0 

104 else: 

105 return lineno, 0 

106 

107 @classmethod 

108 def from_pygments_lexer_cls(cls, lexer_cls: type[PygmentsLexerCls]) -> RegexSync: 

109 """ 

110 Create a :class:`.RegexSync` instance for this Pygments lexer class. 

111 """ 

112 patterns = { 

113 # For Python, start highlighting at any class/def block. 

114 "Python": r"^\s*(class|def)\s+", 

115 "Python 3": r"^\s*(class|def)\s+", 

116 # For HTML, start at any open/close tag definition. 

117 "HTML": r"<[/a-zA-Z]", 

118 # For javascript, start at a function. 

119 "JavaScript": r"\bfunction\b", 

120 # TODO: Add definitions for other languages. 

121 # By default, we start at every possible line. 

122 } 

123 p = patterns.get(lexer_cls.name, "^") 

124 return cls(p) 

125 

126 

127class _TokenCache(dict[tuple[str, ...], str]): 

128 """ 

129 Cache that converts Pygments tokens into `prompt_toolkit` style objects. 

130 

131 ``Token.A.B.C`` will be converted into: 

132 ``class:pygments,pygments.A,pygments.A.B,pygments.A.B.C`` 

133 """ 

134 

135 def __missing__(self, key: tuple[str, ...]) -> str: 

136 result = "class:" + pygments_token_to_classname(key) 

137 self[key] = result 

138 return result 

139 

140 

141_token_cache = _TokenCache() 

142 

143 

144class PygmentsLexer(Lexer): 

145 """ 

146 Lexer that calls a pygments lexer. 

147 

148 Example:: 

149 

150 from pygments.lexers.html import HtmlLexer 

151 lexer = PygmentsLexer(HtmlLexer) 

152 

153 Note: Don't forget to also load a Pygments compatible style. E.g.:: 

154 

155 from prompt_toolkit.styles.from_pygments import style_from_pygments_cls 

156 from pygments.styles import get_style_by_name 

157 style = style_from_pygments_cls(get_style_by_name('monokai')) 

158 

159 :param pygments_lexer_cls: A `Lexer` from Pygments. 

160 :param sync_from_start: Start lexing at the start of the document. This 

161 will always give the best results, but it will be slow for bigger 

162 documents. (When the last part of the document is display, then the 

163 whole document will be lexed by Pygments on every key stroke.) It is 

164 recommended to disable this for inputs that are expected to be more 

165 than 1,000 lines. 

166 :param syntax_sync: `SyntaxSync` object. 

167 """ 

168 

169 # Minimum amount of lines to go backwards when starting the parser. 

170 # This is important when the lines are retrieved in reverse order, or when 

171 # scrolling upwards. (Due to the complexity of calculating the vertical 

172 # scroll offset in the `Window` class, lines are not always retrieved in 

173 # order.) 

174 MIN_LINES_BACKWARDS = 50 

175 

176 # When a parser was started this amount of lines back, read the parser 

177 # until we get the current line. Otherwise, start a new parser. 

178 # (This should probably be bigger than MIN_LINES_BACKWARDS.) 

179 REUSE_GENERATOR_MAX_DISTANCE = 100 

180 

181 def __init__( 

182 self, 

183 pygments_lexer_cls: type[PygmentsLexerCls], 

184 sync_from_start: FilterOrBool = True, 

185 syntax_sync: SyntaxSync | None = None, 

186 ) -> None: 

187 self.pygments_lexer_cls = pygments_lexer_cls 

188 self.sync_from_start = to_filter(sync_from_start) 

189 

190 # Instantiate the Pygments lexer. 

191 self.pygments_lexer = pygments_lexer_cls( 

192 stripnl=False, stripall=False, ensurenl=False 

193 ) 

194 

195 # Create syntax sync instance. 

196 self.syntax_sync = syntax_sync or RegexSync.from_pygments_lexer_cls( 

197 pygments_lexer_cls 

198 ) 

199 

200 @classmethod 

201 def from_filename( 

202 cls, filename: str, sync_from_start: FilterOrBool = True 

203 ) -> Lexer: 

204 """ 

205 Create a `Lexer` from a filename. 

206 """ 

207 # Inline imports: the Pygments dependency is optional! 

208 from pygments.lexers import get_lexer_for_filename 

209 from pygments.util import ClassNotFound 

210 

211 try: 

212 pygments_lexer = get_lexer_for_filename(filename) 

213 except ClassNotFound: 

214 return SimpleLexer() 

215 else: 

216 return cls(pygments_lexer.__class__, sync_from_start=sync_from_start) 

217 

218 def lex_document(self, document: Document) -> Callable[[int], StyleAndTextTuples]: 

219 """ 

220 Create a lexer function that takes a line number and returns the list 

221 of (style_str, text) tuples as the Pygments lexer returns for that line. 

222 """ 

223 LineGenerator = Generator[tuple[int, StyleAndTextTuples], None, None] 

224 

225 # Cache of already lexed lines. 

226 cache: dict[int, StyleAndTextTuples] = {} 

227 

228 # Pygments generators that are currently lexing. 

229 # Map lexer generator to the line number. 

230 line_generators: dict[LineGenerator, int] = {} 

231 

232 def get_syntax_sync() -> SyntaxSync: 

233 "The Syntax synchronization object that we currently use." 

234 if self.sync_from_start(): 

235 return SyncFromStart() 

236 else: 

237 return self.syntax_sync 

238 

239 def find_closest_generator(i: int) -> LineGenerator | None: 

240 "Return a generator close to line 'i', or None if none was found." 

241 for generator, lineno in line_generators.items(): 

242 if lineno < i and i - lineno < self.REUSE_GENERATOR_MAX_DISTANCE: 

243 return generator 

244 return None 

245 

246 def create_line_generator(start_lineno: int, column: int = 0) -> LineGenerator: 

247 """ 

248 Create a generator that yields the lexed lines. 

249 Each iteration it yields a (line_number, [(style_str, text), ...]) tuple. 

250 """ 

251 

252 def get_text_fragments() -> Iterable[tuple[str, str]]: 

253 text = "\n".join(document.lines[start_lineno:])[column:] 

254 

255 # We call `get_text_fragments_unprocessed`, because `get_tokens` will 

256 # still replace \r\n and \r by \n. (We don't want that, 

257 # Pygments should return exactly the same amount of text, as we 

258 # have given as input.) 

259 for _, t, v in self.pygments_lexer.get_tokens_unprocessed(text): 

260 # Turn Pygments `Token` object into prompt_toolkit style 

261 # strings. 

262 yield _token_cache[t], v 

263 

264 yield from enumerate(split_lines(list(get_text_fragments())), start_lineno) 

265 

266 def get_generator(i: int) -> LineGenerator: 

267 """ 

268 Find an already started generator that is close, or create a new one. 

269 """ 

270 # Find closest line generator. 

271 generator = find_closest_generator(i) 

272 if generator: 

273 return generator 

274 

275 # No generator found. Determine starting point for the syntax 

276 # synchronization first. 

277 

278 # Go at least x lines back. (Make scrolling upwards more 

279 # efficient.) 

280 i = max(0, i - self.MIN_LINES_BACKWARDS) 

281 

282 if i == 0: 

283 row = 0 

284 column = 0 

285 else: 

286 row, column = get_syntax_sync().get_sync_start_position(document, i) 

287 

288 # Find generator close to this point, or otherwise create a new one. 

289 generator = find_closest_generator(i) 

290 if generator: 

291 return generator 

292 else: 

293 generator = create_line_generator(row, column) 

294 

295 # If the column is not 0, ignore the first line. (Which is 

296 # incomplete. This happens when the synchronization algorithm tells 

297 # us to start parsing in the middle of a line.) 

298 if column: 

299 next(generator) 

300 row += 1 

301 

302 line_generators[generator] = row 

303 return generator 

304 

305 def get_line(i: int) -> StyleAndTextTuples: 

306 "Return the tokens for a given line number." 

307 try: 

308 return cache[i] 

309 except KeyError: 

310 generator = get_generator(i) 

311 

312 # Exhaust the generator, until we find the requested line. 

313 for num, line in generator: 

314 cache[num] = line 

315 if num == i: 

316 line_generators[generator] = i 

317 

318 # Remove the next item from the cache. 

319 # (It could happen that it's already there, because of 

320 # another generator that started filling these lines, 

321 # but we want to synchronize these lines with the 

322 # current lexer's state.) 

323 if num + 1 in cache: 

324 del cache[num + 1] 

325 

326 return cache[num] 

327 return [] 

328 

329 return get_line