Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/prompt_toolkit/lexers/pygments.py: 30%

113 statements  

« prev     ^ index     » next       coverage.py v7.2.2, created at 2023-03-26 06:07 +0000

1""" 

2Adaptor classes for using Pygments lexers within prompt_toolkit. 

3 

4This includes syntax synchronization code, so that we don't have to start 

5lexing at the beginning of a document, when displaying a very large text. 

6""" 

7from __future__ import annotations 

8 

9import re 

10from abc import ABCMeta, abstractmethod 

11from typing import ( 

12 TYPE_CHECKING, 

13 Callable, 

14 Dict, 

15 Generator, 

16 Iterable, 

17 Optional, 

18 Tuple, 

19 Type, 

20) 

21 

22from prompt_toolkit.document import Document 

23from prompt_toolkit.filters import FilterOrBool, to_filter 

24from prompt_toolkit.formatted_text.base import StyleAndTextTuples 

25from prompt_toolkit.formatted_text.utils import split_lines 

26from prompt_toolkit.styles.pygments import pygments_token_to_classname 

27 

28from .base import Lexer, SimpleLexer 

29 

30if TYPE_CHECKING: 

31 from pygments.lexer import Lexer as PygmentsLexerCls 

32 

33__all__ = [ 

34 "PygmentsLexer", 

35 "SyntaxSync", 

36 "SyncFromStart", 

37 "RegexSync", 

38] 

39 

40 

41class SyntaxSync(metaclass=ABCMeta): 

42 """ 

43 Syntax synchroniser. This is a tool that finds a start position for the 

44 lexer. This is especially important when editing big documents; we don't 

45 want to start the highlighting by running the lexer from the beginning of 

46 the file. That is very slow when editing. 

47 """ 

48 

49 @abstractmethod 

50 def get_sync_start_position( 

51 self, document: Document, lineno: int 

52 ) -> tuple[int, int]: 

53 """ 

54 Return the position from where we can start lexing as a (row, column) 

55 tuple. 

56 

57 :param document: `Document` instance that contains all the lines. 

58 :param lineno: The line that we want to highlight. (We need to return 

59 this line, or an earlier position.) 

60 """ 

61 

62 

63class SyncFromStart(SyntaxSync): 

64 """ 

65 Always start the syntax highlighting from the beginning. 

66 """ 

67 

68 def get_sync_start_position( 

69 self, document: Document, lineno: int 

70 ) -> tuple[int, int]: 

71 return 0, 0 

72 

73 

74class RegexSync(SyntaxSync): 

75 """ 

76 Synchronize by starting at a line that matches the given regex pattern. 

77 """ 

78 

79 # Never go more than this amount of lines backwards for synchronisation. 

80 # That would be too CPU intensive. 

81 MAX_BACKWARDS = 500 

82 

83 # Start lexing at the start, if we are in the first 'n' lines and no 

84 # synchronisation position was found. 

85 FROM_START_IF_NO_SYNC_POS_FOUND = 100 

86 

87 def __init__(self, pattern: str) -> None: 

88 self._compiled_pattern = re.compile(pattern) 

89 

90 def get_sync_start_position( 

91 self, document: Document, lineno: int 

92 ) -> tuple[int, int]: 

93 """ 

94 Scan backwards, and find a possible position to start. 

95 """ 

96 pattern = self._compiled_pattern 

97 lines = document.lines 

98 

99 # Scan upwards, until we find a point where we can start the syntax 

100 # synchronisation. 

101 for i in range(lineno, max(-1, lineno - self.MAX_BACKWARDS), -1): 

102 match = pattern.match(lines[i]) 

103 if match: 

104 return i, match.start() 

105 

106 # No synchronisation point found. If we aren't that far from the 

107 # beginning, start at the very beginning, otherwise, just try to start 

108 # at the current line. 

109 if lineno < self.FROM_START_IF_NO_SYNC_POS_FOUND: 

110 return 0, 0 

111 else: 

112 return lineno, 0 

113 

114 @classmethod 

115 def from_pygments_lexer_cls(cls, lexer_cls: PygmentsLexerCls) -> RegexSync: 

116 """ 

117 Create a :class:`.RegexSync` instance for this Pygments lexer class. 

118 """ 

119 patterns = { 

120 # For Python, start highlighting at any class/def block. 

121 "Python": r"^\s*(class|def)\s+", 

122 "Python 3": r"^\s*(class|def)\s+", 

123 # For HTML, start at any open/close tag definition. 

124 "HTML": r"<[/a-zA-Z]", 

125 # For javascript, start at a function. 

126 "JavaScript": r"\bfunction\b" 

127 # TODO: Add definitions for other languages. 

128 # By default, we start at every possible line. 

129 } 

130 p = patterns.get(lexer_cls.name, "^") 

131 return cls(p) 

132 

133 

134class _TokenCache(Dict[Tuple[str, ...], str]): 

135 """ 

136 Cache that converts Pygments tokens into `prompt_toolkit` style objects. 

137 

138 ``Token.A.B.C`` will be converted into: 

139 ``class:pygments,pygments.A,pygments.A.B,pygments.A.B.C`` 

140 """ 

141 

142 def __missing__(self, key: tuple[str, ...]) -> str: 

143 result = "class:" + pygments_token_to_classname(key) 

144 self[key] = result 

145 return result 

146 

147 

148_token_cache = _TokenCache() 

149 

150 

151class PygmentsLexer(Lexer): 

152 """ 

153 Lexer that calls a pygments lexer. 

154 

155 Example:: 

156 

157 from pygments.lexers.html import HtmlLexer 

158 lexer = PygmentsLexer(HtmlLexer) 

159 

160 Note: Don't forget to also load a Pygments compatible style. E.g.:: 

161 

162 from prompt_toolkit.styles.from_pygments import style_from_pygments_cls 

163 from pygments.styles import get_style_by_name 

164 style = style_from_pygments_cls(get_style_by_name('monokai')) 

165 

166 :param pygments_lexer_cls: A `Lexer` from Pygments. 

167 :param sync_from_start: Start lexing at the start of the document. This 

168 will always give the best results, but it will be slow for bigger 

169 documents. (When the last part of the document is display, then the 

170 whole document will be lexed by Pygments on every key stroke.) It is 

171 recommended to disable this for inputs that are expected to be more 

172 than 1,000 lines. 

173 :param syntax_sync: `SyntaxSync` object. 

174 """ 

175 

176 # Minimum amount of lines to go backwards when starting the parser. 

177 # This is important when the lines are retrieved in reverse order, or when 

178 # scrolling upwards. (Due to the complexity of calculating the vertical 

179 # scroll offset in the `Window` class, lines are not always retrieved in 

180 # order.) 

181 MIN_LINES_BACKWARDS = 50 

182 

183 # When a parser was started this amount of lines back, read the parser 

184 # until we get the current line. Otherwise, start a new parser. 

185 # (This should probably be bigger than MIN_LINES_BACKWARDS.) 

186 REUSE_GENERATOR_MAX_DISTANCE = 100 

187 

188 def __init__( 

189 self, 

190 pygments_lexer_cls: type[PygmentsLexerCls], 

191 sync_from_start: FilterOrBool = True, 

192 syntax_sync: SyntaxSync | None = None, 

193 ) -> None: 

194 self.pygments_lexer_cls = pygments_lexer_cls 

195 self.sync_from_start = to_filter(sync_from_start) 

196 

197 # Instantiate the Pygments lexer. 

198 self.pygments_lexer = pygments_lexer_cls( 

199 stripnl=False, stripall=False, ensurenl=False 

200 ) 

201 

202 # Create syntax sync instance. 

203 self.syntax_sync = syntax_sync or RegexSync.from_pygments_lexer_cls( 

204 pygments_lexer_cls 

205 ) 

206 

207 @classmethod 

208 def from_filename( 

209 cls, filename: str, sync_from_start: FilterOrBool = True 

210 ) -> Lexer: 

211 """ 

212 Create a `Lexer` from a filename. 

213 """ 

214 # Inline imports: the Pygments dependency is optional! 

215 from pygments.lexers import get_lexer_for_filename 

216 from pygments.util import ClassNotFound 

217 

218 try: 

219 pygments_lexer = get_lexer_for_filename(filename) 

220 except ClassNotFound: 

221 return SimpleLexer() 

222 else: 

223 return cls(pygments_lexer.__class__, sync_from_start=sync_from_start) 

224 

225 def lex_document(self, document: Document) -> Callable[[int], StyleAndTextTuples]: 

226 """ 

227 Create a lexer function that takes a line number and returns the list 

228 of (style_str, text) tuples as the Pygments lexer returns for that line. 

229 """ 

230 LineGenerator = Generator[Tuple[int, StyleAndTextTuples], None, None] 

231 

232 # Cache of already lexed lines. 

233 cache: dict[int, StyleAndTextTuples] = {} 

234 

235 # Pygments generators that are currently lexing. 

236 # Map lexer generator to the line number. 

237 line_generators: dict[LineGenerator, int] = {} 

238 

239 def get_syntax_sync() -> SyntaxSync: 

240 "The Syntax synchronisation object that we currently use." 

241 if self.sync_from_start(): 

242 return SyncFromStart() 

243 else: 

244 return self.syntax_sync 

245 

246 def find_closest_generator(i: int) -> LineGenerator | None: 

247 "Return a generator close to line 'i', or None if none was found." 

248 for generator, lineno in line_generators.items(): 

249 if lineno < i and i - lineno < self.REUSE_GENERATOR_MAX_DISTANCE: 

250 return generator 

251 return None 

252 

253 def create_line_generator(start_lineno: int, column: int = 0) -> LineGenerator: 

254 """ 

255 Create a generator that yields the lexed lines. 

256 Each iteration it yields a (line_number, [(style_str, text), ...]) tuple. 

257 """ 

258 

259 def get_text_fragments() -> Iterable[tuple[str, str]]: 

260 text = "\n".join(document.lines[start_lineno:])[column:] 

261 

262 # We call `get_text_fragments_unprocessed`, because `get_tokens` will 

263 # still replace \r\n and \r by \n. (We don't want that, 

264 # Pygments should return exactly the same amount of text, as we 

265 # have given as input.) 

266 for _, t, v in self.pygments_lexer.get_tokens_unprocessed(text): 

267 # Turn Pygments `Token` object into prompt_toolkit style 

268 # strings. 

269 yield _token_cache[t], v 

270 

271 yield from enumerate(split_lines(list(get_text_fragments())), start_lineno) 

272 

273 def get_generator(i: int) -> LineGenerator: 

274 """ 

275 Find an already started generator that is close, or create a new one. 

276 """ 

277 # Find closest line generator. 

278 generator = find_closest_generator(i) 

279 if generator: 

280 return generator 

281 

282 # No generator found. Determine starting point for the syntax 

283 # synchronisation first. 

284 

285 # Go at least x lines back. (Make scrolling upwards more 

286 # efficient.) 

287 i = max(0, i - self.MIN_LINES_BACKWARDS) 

288 

289 if i == 0: 

290 row = 0 

291 column = 0 

292 else: 

293 row, column = get_syntax_sync().get_sync_start_position(document, i) 

294 

295 # Find generator close to this point, or otherwise create a new one. 

296 generator = find_closest_generator(i) 

297 if generator: 

298 return generator 

299 else: 

300 generator = create_line_generator(row, column) 

301 

302 # If the column is not 0, ignore the first line. (Which is 

303 # incomplete. This happens when the synchronisation algorithm tells 

304 # us to start parsing in the middle of a line.) 

305 if column: 

306 next(generator) 

307 row += 1 

308 

309 line_generators[generator] = row 

310 return generator 

311 

312 def get_line(i: int) -> StyleAndTextTuples: 

313 "Return the tokens for a given line number." 

314 try: 

315 return cache[i] 

316 except KeyError: 

317 generator = get_generator(i) 

318 

319 # Exhaust the generator, until we find the requested line. 

320 for num, line in generator: 

321 cache[num] = line 

322 if num == i: 

323 line_generators[generator] = i 

324 

325 # Remove the next item from the cache. 

326 # (It could happen that it's already there, because of 

327 # another generator that started filling these lines, 

328 # but we want to synchronise these lines with the 

329 # current lexer's state.) 

330 if num + 1 in cache: 

331 del cache[num + 1] 

332 

333 return cache[num] 

334 return [] 

335 

336 return get_line