Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/prompt_toolkit/lexers/pygments.py: 30%

113 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-20 06:09 +0000

1""" 

2Adaptor classes for using Pygments lexers within prompt_toolkit. 

3 

4This includes syntax synchronization code, so that we don't have to start 

5lexing at the beginning of a document, when displaying a very large text. 

6""" 

7from __future__ import annotations 

8 

9import re 

10from abc import ABCMeta, abstractmethod 

11from typing import TYPE_CHECKING, Callable, Dict, Generator, Iterable, Tuple 

12 

13from prompt_toolkit.document import Document 

14from prompt_toolkit.filters import FilterOrBool, to_filter 

15from prompt_toolkit.formatted_text.base import StyleAndTextTuples 

16from prompt_toolkit.formatted_text.utils import split_lines 

17from prompt_toolkit.styles.pygments import pygments_token_to_classname 

18 

19from .base import Lexer, SimpleLexer 

20 

21if TYPE_CHECKING: 

22 from pygments.lexer import Lexer as PygmentsLexerCls 

23 

24__all__ = [ 

25 "PygmentsLexer", 

26 "SyntaxSync", 

27 "SyncFromStart", 

28 "RegexSync", 

29] 

30 

31 

32class SyntaxSync(metaclass=ABCMeta): 

33 """ 

34 Syntax synchronizer. This is a tool that finds a start position for the 

35 lexer. This is especially important when editing big documents; we don't 

36 want to start the highlighting by running the lexer from the beginning of 

37 the file. That is very slow when editing. 

38 """ 

39 

40 @abstractmethod 

41 def get_sync_start_position( 

42 self, document: Document, lineno: int 

43 ) -> tuple[int, int]: 

44 """ 

45 Return the position from where we can start lexing as a (row, column) 

46 tuple. 

47 

48 :param document: `Document` instance that contains all the lines. 

49 :param lineno: The line that we want to highlight. (We need to return 

50 this line, or an earlier position.) 

51 """ 

52 

53 

54class SyncFromStart(SyntaxSync): 

55 """ 

56 Always start the syntax highlighting from the beginning. 

57 """ 

58 

59 def get_sync_start_position( 

60 self, document: Document, lineno: int 

61 ) -> tuple[int, int]: 

62 return 0, 0 

63 

64 

65class RegexSync(SyntaxSync): 

66 """ 

67 Synchronize by starting at a line that matches the given regex pattern. 

68 """ 

69 

70 # Never go more than this amount of lines backwards for synchronization. 

71 # That would be too CPU intensive. 

72 MAX_BACKWARDS = 500 

73 

74 # Start lexing at the start, if we are in the first 'n' lines and no 

75 # synchronization position was found. 

76 FROM_START_IF_NO_SYNC_POS_FOUND = 100 

77 

78 def __init__(self, pattern: str) -> None: 

79 self._compiled_pattern = re.compile(pattern) 

80 

81 def get_sync_start_position( 

82 self, document: Document, lineno: int 

83 ) -> tuple[int, int]: 

84 """ 

85 Scan backwards, and find a possible position to start. 

86 """ 

87 pattern = self._compiled_pattern 

88 lines = document.lines 

89 

90 # Scan upwards, until we find a point where we can start the syntax 

91 # synchronization. 

92 for i in range(lineno, max(-1, lineno - self.MAX_BACKWARDS), -1): 

93 match = pattern.match(lines[i]) 

94 if match: 

95 return i, match.start() 

96 

97 # No synchronization point found. If we aren't that far from the 

98 # beginning, start at the very beginning, otherwise, just try to start 

99 # at the current line. 

100 if lineno < self.FROM_START_IF_NO_SYNC_POS_FOUND: 

101 return 0, 0 

102 else: 

103 return lineno, 0 

104 

105 @classmethod 

106 def from_pygments_lexer_cls(cls, lexer_cls: PygmentsLexerCls) -> RegexSync: 

107 """ 

108 Create a :class:`.RegexSync` instance for this Pygments lexer class. 

109 """ 

110 patterns = { 

111 # For Python, start highlighting at any class/def block. 

112 "Python": r"^\s*(class|def)\s+", 

113 "Python 3": r"^\s*(class|def)\s+", 

114 # For HTML, start at any open/close tag definition. 

115 "HTML": r"<[/a-zA-Z]", 

116 # For javascript, start at a function. 

117 "JavaScript": r"\bfunction\b", 

118 # TODO: Add definitions for other languages. 

119 # By default, we start at every possible line. 

120 } 

121 p = patterns.get(lexer_cls.name, "^") 

122 return cls(p) 

123 

124 

125class _TokenCache(Dict[Tuple[str, ...], str]): 

126 """ 

127 Cache that converts Pygments tokens into `prompt_toolkit` style objects. 

128 

129 ``Token.A.B.C`` will be converted into: 

130 ``class:pygments,pygments.A,pygments.A.B,pygments.A.B.C`` 

131 """ 

132 

133 def __missing__(self, key: tuple[str, ...]) -> str: 

134 result = "class:" + pygments_token_to_classname(key) 

135 self[key] = result 

136 return result 

137 

138 

139_token_cache = _TokenCache() 

140 

141 

142class PygmentsLexer(Lexer): 

143 """ 

144 Lexer that calls a pygments lexer. 

145 

146 Example:: 

147 

148 from pygments.lexers.html import HtmlLexer 

149 lexer = PygmentsLexer(HtmlLexer) 

150 

151 Note: Don't forget to also load a Pygments compatible style. E.g.:: 

152 

153 from prompt_toolkit.styles.from_pygments import style_from_pygments_cls 

154 from pygments.styles import get_style_by_name 

155 style = style_from_pygments_cls(get_style_by_name('monokai')) 

156 

157 :param pygments_lexer_cls: A `Lexer` from Pygments. 

158 :param sync_from_start: Start lexing at the start of the document. This 

159 will always give the best results, but it will be slow for bigger 

160 documents. (When the last part of the document is display, then the 

161 whole document will be lexed by Pygments on every key stroke.) It is 

162 recommended to disable this for inputs that are expected to be more 

163 than 1,000 lines. 

164 :param syntax_sync: `SyntaxSync` object. 

165 """ 

166 

167 # Minimum amount of lines to go backwards when starting the parser. 

168 # This is important when the lines are retrieved in reverse order, or when 

169 # scrolling upwards. (Due to the complexity of calculating the vertical 

170 # scroll offset in the `Window` class, lines are not always retrieved in 

171 # order.) 

172 MIN_LINES_BACKWARDS = 50 

173 

174 # When a parser was started this amount of lines back, read the parser 

175 # until we get the current line. Otherwise, start a new parser. 

176 # (This should probably be bigger than MIN_LINES_BACKWARDS.) 

177 REUSE_GENERATOR_MAX_DISTANCE = 100 

178 

179 def __init__( 

180 self, 

181 pygments_lexer_cls: type[PygmentsLexerCls], 

182 sync_from_start: FilterOrBool = True, 

183 syntax_sync: SyntaxSync | None = None, 

184 ) -> None: 

185 self.pygments_lexer_cls = pygments_lexer_cls 

186 self.sync_from_start = to_filter(sync_from_start) 

187 

188 # Instantiate the Pygments lexer. 

189 self.pygments_lexer = pygments_lexer_cls( 

190 stripnl=False, stripall=False, ensurenl=False 

191 ) 

192 

193 # Create syntax sync instance. 

194 self.syntax_sync = syntax_sync or RegexSync.from_pygments_lexer_cls( 

195 pygments_lexer_cls 

196 ) 

197 

198 @classmethod 

199 def from_filename( 

200 cls, filename: str, sync_from_start: FilterOrBool = True 

201 ) -> Lexer: 

202 """ 

203 Create a `Lexer` from a filename. 

204 """ 

205 # Inline imports: the Pygments dependency is optional! 

206 from pygments.lexers import get_lexer_for_filename 

207 from pygments.util import ClassNotFound 

208 

209 try: 

210 pygments_lexer = get_lexer_for_filename(filename) 

211 except ClassNotFound: 

212 return SimpleLexer() 

213 else: 

214 return cls(pygments_lexer.__class__, sync_from_start=sync_from_start) 

215 

216 def lex_document(self, document: Document) -> Callable[[int], StyleAndTextTuples]: 

217 """ 

218 Create a lexer function that takes a line number and returns the list 

219 of (style_str, text) tuples as the Pygments lexer returns for that line. 

220 """ 

221 LineGenerator = Generator[Tuple[int, StyleAndTextTuples], None, None] 

222 

223 # Cache of already lexed lines. 

224 cache: dict[int, StyleAndTextTuples] = {} 

225 

226 # Pygments generators that are currently lexing. 

227 # Map lexer generator to the line number. 

228 line_generators: dict[LineGenerator, int] = {} 

229 

230 def get_syntax_sync() -> SyntaxSync: 

231 "The Syntax synchronization object that we currently use." 

232 if self.sync_from_start(): 

233 return SyncFromStart() 

234 else: 

235 return self.syntax_sync 

236 

237 def find_closest_generator(i: int) -> LineGenerator | None: 

238 "Return a generator close to line 'i', or None if none was found." 

239 for generator, lineno in line_generators.items(): 

240 if lineno < i and i - lineno < self.REUSE_GENERATOR_MAX_DISTANCE: 

241 return generator 

242 return None 

243 

244 def create_line_generator(start_lineno: int, column: int = 0) -> LineGenerator: 

245 """ 

246 Create a generator that yields the lexed lines. 

247 Each iteration it yields a (line_number, [(style_str, text), ...]) tuple. 

248 """ 

249 

250 def get_text_fragments() -> Iterable[tuple[str, str]]: 

251 text = "\n".join(document.lines[start_lineno:])[column:] 

252 

253 # We call `get_text_fragments_unprocessed`, because `get_tokens` will 

254 # still replace \r\n and \r by \n. (We don't want that, 

255 # Pygments should return exactly the same amount of text, as we 

256 # have given as input.) 

257 for _, t, v in self.pygments_lexer.get_tokens_unprocessed(text): 

258 # Turn Pygments `Token` object into prompt_toolkit style 

259 # strings. 

260 yield _token_cache[t], v 

261 

262 yield from enumerate(split_lines(list(get_text_fragments())), start_lineno) 

263 

264 def get_generator(i: int) -> LineGenerator: 

265 """ 

266 Find an already started generator that is close, or create a new one. 

267 """ 

268 # Find closest line generator. 

269 generator = find_closest_generator(i) 

270 if generator: 

271 return generator 

272 

273 # No generator found. Determine starting point for the syntax 

274 # synchronization first. 

275 

276 # Go at least x lines back. (Make scrolling upwards more 

277 # efficient.) 

278 i = max(0, i - self.MIN_LINES_BACKWARDS) 

279 

280 if i == 0: 

281 row = 0 

282 column = 0 

283 else: 

284 row, column = get_syntax_sync().get_sync_start_position(document, i) 

285 

286 # Find generator close to this point, or otherwise create a new one. 

287 generator = find_closest_generator(i) 

288 if generator: 

289 return generator 

290 else: 

291 generator = create_line_generator(row, column) 

292 

293 # If the column is not 0, ignore the first line. (Which is 

294 # incomplete. This happens when the synchronization algorithm tells 

295 # us to start parsing in the middle of a line.) 

296 if column: 

297 next(generator) 

298 row += 1 

299 

300 line_generators[generator] = row 

301 return generator 

302 

303 def get_line(i: int) -> StyleAndTextTuples: 

304 "Return the tokens for a given line number." 

305 try: 

306 return cache[i] 

307 except KeyError: 

308 generator = get_generator(i) 

309 

310 # Exhaust the generator, until we find the requested line. 

311 for num, line in generator: 

312 cache[num] = line 

313 if num == i: 

314 line_generators[generator] = i 

315 

316 # Remove the next item from the cache. 

317 # (It could happen that it's already there, because of 

318 # another generator that started filling these lines, 

319 # but we want to synchronize these lines with the 

320 # current lexer's state.) 

321 if num + 1 in cache: 

322 del cache[num + 1] 

323 

324 return cache[num] 

325 return [] 

326 

327 return get_line