Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/docutils/utils/code_analyzer.py: 25%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

64 statements  

1# :Author: Georg Brandl; Lea Wiemann; Günter Milde 

2# :Date: $Date$ 

3# :Copyright: This module has been placed in the public domain. 

4 

5"""Lexical analysis of formal languages (i.e. code) using Pygments.""" 

6 

7from docutils import ApplicationError 

8try: 

9 import pygments 

10 from pygments.lexers import get_lexer_by_name 

11 from pygments.formatters.html import _get_ttype_class 

12 with_pygments = True 

13except ImportError: 

14 with_pygments = False 

15 

16# Filter the following token types from the list of class arguments: 

17unstyled_tokens = ['token', # Token (base token type) 

18 'text', # Token.Text 

19 ''] # short name for Token and Text 

20# (Add, e.g., Token.Punctuation with ``unstyled_tokens += 'punctuation'``.) 

21 

22 

23class LexerError(ApplicationError): 

24 pass 

25 

26 

27class Lexer: 

28 """Parse `code` lines and yield "classified" tokens. 

29 

30 Arguments 

31 

32 code -- string of source code to parse, 

33 language -- formal language the code is written in, 

34 tokennames -- either 'long', 'short', or 'none' (see below). 

35 

36 Merge subsequent tokens of the same token-type. 

37 

38 Iterating over an instance yields the tokens as ``(tokentype, value)`` 

39 tuples. The value of `tokennames` configures the naming of the tokentype: 

40 

41 'long': downcased full token type name, 

42 'short': short name defined by pygments.token.STANDARD_TYPES 

43 (= class argument used in pygments html output), 

44 'none': skip lexical analysis. 

45 """ 

46 

47 def __init__(self, code, language, tokennames='short'): 

48 """ 

49 Set up a lexical analyzer for `code` in `language`. 

50 """ 

51 self.code = code 

52 self.language = language 

53 self.tokennames = tokennames 

54 self.lexer = None 

55 # get lexical analyzer for `language`: 

56 if language in ('', 'text') or tokennames == 'none': 

57 return 

58 if not with_pygments: 

59 raise LexerError('Cannot analyze code. ' 

60 'Pygments package not found.') 

61 try: 

62 self.lexer = get_lexer_by_name(self.language) 

63 except pygments.util.ClassNotFound: 

64 raise LexerError('Cannot analyze code. ' 

65 'No Pygments lexer found for "%s".' % language) 

66 # self.lexer.add_filter('tokenmerge') 

67 # Since version 1.2. (released Jan 01, 2010) Pygments has a 

68 # TokenMergeFilter. # ``self.merge(tokens)`` in __iter__ could 

69 # be replaced by ``self.lexer.add_filter('tokenmerge')`` in __init__. 

70 # However, `merge` below also strips a final newline added by pygments. 

71 # 

72 # self.lexer.add_filter('tokenmerge') 

73 

74 def merge(self, tokens): 

75 """Merge subsequent tokens of same token-type. 

76 

77 Also strip the final newline (added by pygments). 

78 """ 

79 tokens = iter(tokens) 

80 (lasttype, lastval) = next(tokens) 

81 for ttype, value in tokens: 

82 if ttype is lasttype: 

83 lastval += value 

84 else: 

85 yield lasttype, lastval 

86 (lasttype, lastval) = (ttype, value) 

87 if lastval.endswith('\n'): 

88 lastval = lastval[:-1] 

89 if lastval: 

90 yield lasttype, lastval 

91 

92 def __iter__(self): 

93 """Parse self.code and yield "classified" tokens. 

94 """ 

95 if self.lexer is None: 

96 yield [], self.code 

97 return 

98 tokens = pygments.lex(self.code, self.lexer) 

99 for tokentype, value in self.merge(tokens): 

100 if self.tokennames == 'long': # long CSS class args 

101 classes = str(tokentype).lower().split('.') 

102 else: # short CSS class args 

103 classes = [_get_ttype_class(tokentype)] 

104 classes = [cls for cls in classes if cls not in unstyled_tokens] 

105 yield classes, value 

106 

107 

108class NumberLines: 

109 """Insert linenumber-tokens at the start of every code line. 

110 

111 Arguments 

112 

113 tokens -- iterable of ``(classes, value)`` tuples 

114 startline -- first line number 

115 endline -- last line number 

116 

117 Iterating over an instance yields the tokens with a 

118 ``(['ln'], '<the line number>')`` token added for every code line. 

119 Multi-line tokens are split.""" 

120 

121 def __init__(self, tokens, startline, endline): 

122 self.tokens = tokens 

123 self.startline = startline 

124 # pad linenumbers, e.g. endline == 100 -> fmt_str = '%3d ' 

125 self.fmt_str = '%%%dd ' % len(str(endline)) 

126 

127 def __iter__(self): 

128 lineno = self.startline 

129 yield ['ln'], self.fmt_str % lineno 

130 for ttype, value in self.tokens: 

131 lines = value.split('\n') 

132 for line in lines[:-1]: 

133 yield ttype, line + '\n' 

134 lineno += 1 

135 yield ['ln'], self.fmt_str % lineno 

136 yield ttype, lines[-1]