Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/docutils/utils/code_analyzer.py: 27%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

66 statements  

1# :Author: Georg Brandl; Lea Wiemann; Günter Milde 

2# :Date: $Date$ 

3# :Copyright: This module has been placed in the public domain. 

4 

5"""Lexical analysis of formal languages (i.e. code) using Pygments.""" 

6 

7from __future__ import annotations 

8 

9__docformat__ = 'reStructuredText' 

10 

11try: 

12 import pygments 

13 from pygments.lexers import get_lexer_by_name 

14 from pygments.formatters.html import _get_ttype_class 

15 with_pygments = True 

16except ImportError: 

17 with_pygments = False 

18 

19from docutils import ApplicationError 

20 

21# Filter the following token types from the list of class arguments: 

22unstyled_tokens = ['token', # Token (base token type) 

23 'text', # Token.Text 

24 ''] # short name for Token and Text 

25# (Add, e.g., Token.Punctuation with ``unstyled_tokens += 'punctuation'``.) 

26 

27 

28class LexerError(ApplicationError): 

29 pass 

30 

31 

32class Lexer: 

33 """Parse `code` lines and yield "classified" tokens. 

34 

35 Arguments 

36 

37 code -- string of source code to parse, 

38 language -- formal language the code is written in, 

39 tokennames -- either 'long', 'short', or 'none' (see below). 

40 

41 Merge subsequent tokens of the same token-type. 

42 

43 Iterating over an instance yields the tokens as ``(tokentype, value)`` 

44 tuples. The value of `tokennames` configures the naming of the tokentype: 

45 

46 'long': downcased full token type name, 

47 'short': short name defined by pygments.token.STANDARD_TYPES 

48 (= class argument used in pygments html output), 

49 'none': skip lexical analysis. 

50 """ 

51 

52 def __init__(self, code, language, tokennames='short') -> None: 

53 """ 

54 Set up a lexical analyzer for `code` in `language`. 

55 """ 

56 self.code = code 

57 self.language = language 

58 self.tokennames = tokennames 

59 self.lexer = None 

60 # get lexical analyzer for `language`: 

61 if language in ('', 'text') or tokennames == 'none': 

62 return 

63 if not with_pygments: 

64 raise LexerError('Cannot analyze code. ' 

65 'Pygments package not found.') 

66 try: 

67 self.lexer = get_lexer_by_name(self.language) 

68 except pygments.util.ClassNotFound: 

69 raise LexerError('Cannot analyze code. ' 

70 'No Pygments lexer found for "%s".' % language) 

71 # self.lexer.add_filter('tokenmerge') 

72 # Since version 1.2. (released Jan 01, 2010) Pygments has a 

73 # TokenMergeFilter. # ``self.merge(tokens)`` in __iter__ could 

74 # be replaced by ``self.lexer.add_filter('tokenmerge')`` in __init__. 

75 # However, `merge` below also strips a final newline added by pygments. 

76 # 

77 # self.lexer.add_filter('tokenmerge') 

78 

79 def merge(self, tokens): 

80 """Merge subsequent tokens of same token-type. 

81 

82 Also strip the final newline (added by pygments). 

83 """ 

84 tokens = iter(tokens) 

85 (lasttype, lastval) = next(tokens) 

86 for ttype, value in tokens: 

87 if ttype is lasttype: 

88 lastval += value 

89 else: 

90 yield lasttype, lastval 

91 (lasttype, lastval) = (ttype, value) 

92 lastval = lastval.removesuffix('\n') 

93 if lastval: 

94 yield lasttype, lastval 

95 

96 def __iter__(self): 

97 """Parse self.code and yield "classified" tokens. 

98 """ 

99 if self.lexer is None: 

100 yield [], self.code 

101 return 

102 tokens = pygments.lex(self.code, self.lexer) 

103 for tokentype, value in self.merge(tokens): 

104 if self.tokennames == 'long': # long CSS class args 

105 classes = str(tokentype).lower().split('.') 

106 else: # short CSS class args 

107 classes = [_get_ttype_class(tokentype)] 

108 classes = [cls for cls in classes if cls not in unstyled_tokens] 

109 yield classes, value 

110 

111 

112class NumberLines: 

113 """Insert linenumber-tokens at the start of every code line. 

114 

115 Arguments 

116 

117 tokens -- iterable of ``(classes, value)`` tuples 

118 startline -- first line number 

119 endline -- last line number 

120 

121 Iterating over an instance yields the tokens with a 

122 ``(['ln'], '<the line number>')`` token added for every code line. 

123 Multi-line tokens are split.""" 

124 

125 def __init__(self, tokens, startline, endline) -> None: 

126 self.tokens = tokens 

127 self.startline = startline 

128 # pad linenumbers, e.g. endline == 100 -> fmt_str = '%3d ' 

129 self.fmt_str = f'%{len(str(endline))}d ' 

130 

131 def __iter__(self): 

132 lineno = self.startline 

133 yield ['ln'], self.fmt_str % lineno 

134 for ttype, value in self.tokens: 

135 lines = value.split('\n') 

136 for line in lines[:-1]: 

137 yield ttype, line + '\n' 

138 lineno += 1 

139 yield ['ln'], self.fmt_str % lineno 

140 yield ttype, lines[-1]