Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/special.py: 39%

69 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1""" 

2 pygments.lexers.special 

3 ~~~~~~~~~~~~~~~~~~~~~~~ 

4 

5 Special lexers. 

6 

7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import ast 

12 

13from pygments.lexer import Lexer, line_re 

14from pygments.token import Token, Error, Text, Generic 

15from pygments.util import get_choice_opt 

16 

17 

18__all__ = ['TextLexer', 'OutputLexer', 'RawTokenLexer'] 

19 

20 

21class TextLexer(Lexer): 

22 """ 

23 "Null" lexer, doesn't highlight anything. 

24 """ 

25 name = 'Text only' 

26 aliases = ['text'] 

27 filenames = ['*.txt'] 

28 mimetypes = ['text/plain'] 

29 priority = 0.01 

30 

31 def get_tokens_unprocessed(self, text): 

32 yield 0, Text, text 

33 

34 def analyse_text(text): 

35 return TextLexer.priority 

36 

37 

38class OutputLexer(Lexer): 

39 """ 

40 Simple lexer that highlights everything as ``Token.Generic.Output``. 

41 

42 .. versionadded:: 2.10 

43 """ 

44 name = 'Text output' 

45 aliases = ['output'] 

46 

47 def get_tokens_unprocessed(self, text): 

48 yield 0, Generic.Output, text 

49 

50 

51_ttype_cache = {} 

52 

53 

54class RawTokenLexer(Lexer): 

55 """ 

56 Recreate a token stream formatted with the `RawTokenFormatter`. 

57 

58 Additional options accepted: 

59 

60 `compress` 

61 If set to ``"gz"`` or ``"bz2"``, decompress the token stream with 

62 the given compression algorithm before lexing (default: ``""``). 

63 """ 

64 name = 'Raw token data' 

65 aliases = [] 

66 filenames = [] 

67 mimetypes = ['application/x-pygments-tokens'] 

68 

69 def __init__(self, **options): 

70 self.compress = get_choice_opt(options, 'compress', 

71 ['', 'none', 'gz', 'bz2'], '') 

72 Lexer.__init__(self, **options) 

73 

74 def get_tokens(self, text): 

75 if self.compress: 

76 if isinstance(text, str): 

77 text = text.encode('latin1') 

78 try: 

79 if self.compress == 'gz': 

80 import gzip 

81 text = gzip.decompress(text) 

82 elif self.compress == 'bz2': 

83 import bz2 

84 text = bz2.decompress(text) 

85 except OSError: 

86 yield Error, text.decode('latin1') 

87 if isinstance(text, bytes): 

88 text = text.decode('latin1') 

89 

90 # do not call Lexer.get_tokens() because stripping is not optional. 

91 text = text.strip('\n') + '\n' 

92 for i, t, v in self.get_tokens_unprocessed(text): 

93 yield t, v 

94 

95 def get_tokens_unprocessed(self, text): 

96 length = 0 

97 for match in line_re.finditer(text): 

98 try: 

99 ttypestr, val = match.group().rstrip().split('\t', 1) 

100 ttype = _ttype_cache.get(ttypestr) 

101 if not ttype: 

102 ttype = Token 

103 ttypes = ttypestr.split('.')[1:] 

104 for ttype_ in ttypes: 

105 if not ttype_ or not ttype_[0].isupper(): 

106 raise ValueError('malformed token name') 

107 ttype = getattr(ttype, ttype_) 

108 _ttype_cache[ttypestr] = ttype 

109 val = ast.literal_eval(val) 

110 if not isinstance(val, str): 

111 raise ValueError('expected str') 

112 except (SyntaxError, ValueError): 

113 val = match.group() 

114 ttype = Error 

115 yield length, ttype, val 

116 length += len(val)