Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/prompt_toolkit/lexers/pygments.py: 31%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

114 statements  

1""" 

2Adaptor classes for using Pygments lexers within prompt_toolkit. 

3 

4This includes syntax synchronization code, so that we don't have to start 

5lexing at the beginning of a document, when displaying a very large text. 

6""" 

7 

8from __future__ import annotations 

9 

10import re 

11from abc import ABCMeta, abstractmethod 

12from typing import TYPE_CHECKING, Callable, Dict, Generator, Iterable, Tuple 

13 

14from prompt_toolkit.document import Document 

15from prompt_toolkit.filters import FilterOrBool, to_filter 

16from prompt_toolkit.formatted_text.base import StyleAndTextTuples 

17from prompt_toolkit.formatted_text.utils import split_lines 

18from prompt_toolkit.styles.pygments import pygments_token_to_classname 

19 

20from .base import Lexer, SimpleLexer 

21 

22if TYPE_CHECKING: 

23 from pygments.lexer import Lexer as PygmentsLexerCls 

24 

25__all__ = [ 

26 "PygmentsLexer", 

27 "SyntaxSync", 

28 "SyncFromStart", 

29 "RegexSync", 

30] 

31 

32 

33class SyntaxSync(metaclass=ABCMeta): 

34 """ 

35 Syntax synchronizer. This is a tool that finds a start position for the 

36 lexer. This is especially important when editing big documents; we don't 

37 want to start the highlighting by running the lexer from the beginning of 

38 the file. That is very slow when editing. 

39 """ 

40 

41 @abstractmethod 

42 def get_sync_start_position( 

43 self, document: Document, lineno: int 

44 ) -> tuple[int, int]: 

45 """ 

46 Return the position from where we can start lexing as a (row, column) 

47 tuple. 

48 

49 :param document: `Document` instance that contains all the lines. 

50 :param lineno: The line that we want to highlight. (We need to return 

51 this line, or an earlier position.) 

52 """ 

53 

54 

55class SyncFromStart(SyntaxSync): 

56 """ 

57 Always start the syntax highlighting from the beginning. 

58 """ 

59 

60 def get_sync_start_position( 

61 self, document: Document, lineno: int 

62 ) -> tuple[int, int]: 

63 return 0, 0 

64 

65 

66class RegexSync(SyntaxSync): 

67 """ 

68 Synchronize by starting at a line that matches the given regex pattern. 

69 """ 

70 

71 # Never go more than this amount of lines backwards for synchronization. 

72 # That would be too CPU intensive. 

73 MAX_BACKWARDS = 500 

74 

75 # Start lexing at the start, if we are in the first 'n' lines and no 

76 # synchronization position was found. 

77 FROM_START_IF_NO_SYNC_POS_FOUND = 100 

78 

79 def __init__(self, pattern: str) -> None: 

80 self._compiled_pattern = re.compile(pattern) 

81 

82 def get_sync_start_position( 

83 self, document: Document, lineno: int 

84 ) -> tuple[int, int]: 

85 """ 

86 Scan backwards, and find a possible position to start. 

87 """ 

88 pattern = self._compiled_pattern 

89 lines = document.lines 

90 

91 # Scan upwards, until we find a point where we can start the syntax 

92 # synchronization. 

93 for i in range(lineno, max(-1, lineno - self.MAX_BACKWARDS), -1): 

94 match = pattern.match(lines[i]) 

95 if match: 

96 return i, match.start() 

97 

98 # No synchronization point found. If we aren't that far from the 

99 # beginning, start at the very beginning, otherwise, just try to start 

100 # at the current line. 

101 if lineno < self.FROM_START_IF_NO_SYNC_POS_FOUND: 

102 return 0, 0 

103 else: 

104 return lineno, 0 

105 

106 @classmethod 

107 def from_pygments_lexer_cls(cls, lexer_cls: PygmentsLexerCls) -> RegexSync: 

108 """ 

109 Create a :class:`.RegexSync` instance for this Pygments lexer class. 

110 """ 

111 patterns = { 

112 # For Python, start highlighting at any class/def block. 

113 "Python": r"^\s*(class|def)\s+", 

114 "Python 3": r"^\s*(class|def)\s+", 

115 # For HTML, start at any open/close tag definition. 

116 "HTML": r"<[/a-zA-Z]", 

117 # For javascript, start at a function. 

118 "JavaScript": r"\bfunction\b", 

119 # TODO: Add definitions for other languages. 

120 # By default, we start at every possible line. 

121 } 

122 p = patterns.get(lexer_cls.name, "^") 

123 return cls(p) 

124 

125 

126class _TokenCache(Dict[Tuple[str, ...], str]): 

127 """ 

128 Cache that converts Pygments tokens into `prompt_toolkit` style objects. 

129 

130 ``Token.A.B.C`` will be converted into: 

131 ``class:pygments,pygments.A,pygments.A.B,pygments.A.B.C`` 

132 """ 

133 

134 def __missing__(self, key: tuple[str, ...]) -> str: 

135 result = "class:" + pygments_token_to_classname(key) 

136 self[key] = result 

137 return result 

138 

139 

140_token_cache = _TokenCache() 

141 

142 

143class PygmentsLexer(Lexer): 

144 """ 

145 Lexer that calls a pygments lexer. 

146 

147 Example:: 

148 

149 from pygments.lexers.html import HtmlLexer 

150 lexer = PygmentsLexer(HtmlLexer) 

151 

152 Note: Don't forget to also load a Pygments compatible style. E.g.:: 

153 

154 from prompt_toolkit.styles.from_pygments import style_from_pygments_cls 

155 from pygments.styles import get_style_by_name 

156 style = style_from_pygments_cls(get_style_by_name('monokai')) 

157 

158 :param pygments_lexer_cls: A `Lexer` from Pygments. 

159 :param sync_from_start: Start lexing at the start of the document. This 

160 will always give the best results, but it will be slow for bigger 

161 documents. (When the last part of the document is display, then the 

162 whole document will be lexed by Pygments on every key stroke.) It is 

163 recommended to disable this for inputs that are expected to be more 

164 than 1,000 lines. 

165 :param syntax_sync: `SyntaxSync` object. 

166 """ 

167 

168 # Minimum amount of lines to go backwards when starting the parser. 

169 # This is important when the lines are retrieved in reverse order, or when 

170 # scrolling upwards. (Due to the complexity of calculating the vertical 

171 # scroll offset in the `Window` class, lines are not always retrieved in 

172 # order.) 

173 MIN_LINES_BACKWARDS = 50 

174 

175 # When a parser was started this amount of lines back, read the parser 

176 # until we get the current line. Otherwise, start a new parser. 

177 # (This should probably be bigger than MIN_LINES_BACKWARDS.) 

178 REUSE_GENERATOR_MAX_DISTANCE = 100 

179 

180 def __init__( 

181 self, 

182 pygments_lexer_cls: type[PygmentsLexerCls], 

183 sync_from_start: FilterOrBool = True, 

184 syntax_sync: SyntaxSync | None = None, 

185 ) -> None: 

186 self.pygments_lexer_cls = pygments_lexer_cls 

187 self.sync_from_start = to_filter(sync_from_start) 

188 

189 # Instantiate the Pygments lexer. 

190 self.pygments_lexer = pygments_lexer_cls( 

191 stripnl=False, stripall=False, ensurenl=False 

192 ) 

193 

194 # Create syntax sync instance. 

195 self.syntax_sync = syntax_sync or RegexSync.from_pygments_lexer_cls( 

196 pygments_lexer_cls 

197 ) 

198 

199 @classmethod 

200 def from_filename( 

201 cls, filename: str, sync_from_start: FilterOrBool = True 

202 ) -> Lexer: 

203 """ 

204 Create a `Lexer` from a filename. 

205 """ 

206 # Inline imports: the Pygments dependency is optional! 

207 from pygments.lexers import get_lexer_for_filename 

208 from pygments.util import ClassNotFound 

209 

210 try: 

211 pygments_lexer = get_lexer_for_filename(filename) 

212 except ClassNotFound: 

213 return SimpleLexer() 

214 else: 

215 return cls(pygments_lexer.__class__, sync_from_start=sync_from_start) 

216 

217 def lex_document(self, document: Document) -> Callable[[int], StyleAndTextTuples]: 

218 """ 

219 Create a lexer function that takes a line number and returns the list 

220 of (style_str, text) tuples as the Pygments lexer returns for that line. 

221 """ 

222 LineGenerator = Generator[Tuple[int, StyleAndTextTuples], None, None] 

223 

224 # Cache of already lexed lines. 

225 cache: dict[int, StyleAndTextTuples] = {} 

226 

227 # Pygments generators that are currently lexing. 

228 # Map lexer generator to the line number. 

229 line_generators: dict[LineGenerator, int] = {} 

230 

231 def get_syntax_sync() -> SyntaxSync: 

232 "The Syntax synchronization object that we currently use." 

233 if self.sync_from_start(): 

234 return SyncFromStart() 

235 else: 

236 return self.syntax_sync 

237 

238 def find_closest_generator(i: int) -> LineGenerator | None: 

239 "Return a generator close to line 'i', or None if none was found." 

240 for generator, lineno in line_generators.items(): 

241 if lineno < i and i - lineno < self.REUSE_GENERATOR_MAX_DISTANCE: 

242 return generator 

243 return None 

244 

245 def create_line_generator(start_lineno: int, column: int = 0) -> LineGenerator: 

246 """ 

247 Create a generator that yields the lexed lines. 

248 Each iteration it yields a (line_number, [(style_str, text), ...]) tuple. 

249 """ 

250 

251 def get_text_fragments() -> Iterable[tuple[str, str]]: 

252 text = "\n".join(document.lines[start_lineno:])[column:] 

253 

254 # We call `get_text_fragments_unprocessed`, because `get_tokens` will 

255 # still replace \r\n and \r by \n. (We don't want that, 

256 # Pygments should return exactly the same amount of text, as we 

257 # have given as input.) 

258 for _, t, v in self.pygments_lexer.get_tokens_unprocessed(text): 

259 # Turn Pygments `Token` object into prompt_toolkit style 

260 # strings. 

261 yield _token_cache[t], v 

262 

263 yield from enumerate(split_lines(list(get_text_fragments())), start_lineno) 

264 

265 def get_generator(i: int) -> LineGenerator: 

266 """ 

267 Find an already started generator that is close, or create a new one. 

268 """ 

269 # Find closest line generator. 

270 generator = find_closest_generator(i) 

271 if generator: 

272 return generator 

273 

274 # No generator found. Determine starting point for the syntax 

275 # synchronization first. 

276 

277 # Go at least x lines back. (Make scrolling upwards more 

278 # efficient.) 

279 i = max(0, i - self.MIN_LINES_BACKWARDS) 

280 

281 if i == 0: 

282 row = 0 

283 column = 0 

284 else: 

285 row, column = get_syntax_sync().get_sync_start_position(document, i) 

286 

287 # Find generator close to this point, or otherwise create a new one. 

288 generator = find_closest_generator(i) 

289 if generator: 

290 return generator 

291 else: 

292 generator = create_line_generator(row, column) 

293 

294 # If the column is not 0, ignore the first line. (Which is 

295 # incomplete. This happens when the synchronization algorithm tells 

296 # us to start parsing in the middle of a line.) 

297 if column: 

298 next(generator) 

299 row += 1 

300 

301 line_generators[generator] = row 

302 return generator 

303 

304 def get_line(i: int) -> StyleAndTextTuples: 

305 "Return the tokens for a given line number." 

306 try: 

307 return cache[i] 

308 except KeyError: 

309 generator = get_generator(i) 

310 

311 # Exhaust the generator, until we find the requested line. 

312 for num, line in generator: 

313 cache[num] = line 

314 if num == i: 

315 line_generators[generator] = i 

316 

317 # Remove the next item from the cache. 

318 # (It could happen that it's already there, because of 

319 # another generator that started filling these lines, 

320 # but we want to synchronize these lines with the 

321 # current lexer's state.) 

322 if num + 1 in cache: 

323 del cache[num + 1] 

324 

325 return cache[num] 

326 return [] 

327 

328 return get_line