Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/r.py: 59%

41 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1""" 

2 pygments.lexers.r 

3 ~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for the R/S languages. 

6 

7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12 

13from pygments.lexer import Lexer, RegexLexer, include, do_insertions 

14from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ 

15 Number, Punctuation, Generic, Whitespace 

16 

17__all__ = ['RConsoleLexer', 'SLexer', 'RdLexer'] 

18 

19 

20line_re = re.compile('.*?\n') 

21 

22 

23class RConsoleLexer(Lexer): 

24 """ 

25 For R console transcripts or R CMD BATCH output files. 

26 """ 

27 

28 name = 'RConsole' 

29 aliases = ['rconsole', 'rout'] 

30 filenames = ['*.Rout'] 

31 

32 def get_tokens_unprocessed(self, text): 

33 slexer = SLexer(**self.options) 

34 

35 current_code_block = '' 

36 insertions = [] 

37 

38 for match in line_re.finditer(text): 

39 line = match.group() 

40 if line.startswith('>') or line.startswith('+'): 

41 # Colorize the prompt as such, 

42 # then put rest of line into current_code_block 

43 insertions.append((len(current_code_block), 

44 [(0, Generic.Prompt, line[:2])])) 

45 current_code_block += line[2:] 

46 else: 

47 # We have reached a non-prompt line! 

48 # If we have stored prompt lines, need to process them first. 

49 if current_code_block: 

50 # Weave together the prompts and highlight code. 

51 yield from do_insertions( 

52 insertions, slexer.get_tokens_unprocessed(current_code_block)) 

53 # Reset vars for next code block. 

54 current_code_block = '' 

55 insertions = [] 

56 # Now process the actual line itself, this is output from R. 

57 yield match.start(), Generic.Output, line 

58 

59 # If we happen to end on a code block with nothing after it, need to 

60 # process the last code block. This is neither elegant nor DRY so 

61 # should be changed. 

62 if current_code_block: 

63 yield from do_insertions( 

64 insertions, slexer.get_tokens_unprocessed(current_code_block)) 

65 

66 

67class SLexer(RegexLexer): 

68 """ 

69 For S, S-plus, and R source code. 

70 

71 .. versionadded:: 0.10 

72 """ 

73 

74 name = 'S' 

75 aliases = ['splus', 's', 'r'] 

76 filenames = ['*.S', '*.R', '.Rhistory', '.Rprofile', '.Renviron'] 

77 mimetypes = ['text/S-plus', 'text/S', 'text/x-r-source', 'text/x-r', 

78 'text/x-R', 'text/x-r-history', 'text/x-r-profile'] 

79 

80 valid_name = r'`[^`\\]*(?:\\.[^`\\]*)*`|(?:[a-zA-Z]|\.[A-Za-z_.])[\w.]*|\.' 

81 tokens = { 

82 'comments': [ 

83 (r'#.*$', Comment.Single), 

84 ], 

85 'valid_name': [ 

86 (valid_name, Name), 

87 ], 

88 'punctuation': [ 

89 (r'\[{1,2}|\]{1,2}|\(|\)|;|,', Punctuation), 

90 ], 

91 'keywords': [ 

92 (r'(if|else|for|while|repeat|in|next|break|return|switch|function)' 

93 r'(?![\w.])', 

94 Keyword.Reserved), 

95 ], 

96 'operators': [ 

97 (r'<<?-|->>?|-|==|<=|>=|<|>|&&?|!=|\|\|?|\?', Operator), 

98 (r'\*|\+|\^|/|!|%[^%]*%|=|~|\$|@|:{1,3}', Operator), 

99 ], 

100 'builtin_symbols': [ 

101 (r'(NULL|NA(_(integer|real|complex|character)_)?|' 

102 r'letters|LETTERS|Inf|TRUE|FALSE|NaN|pi|\.\.(\.|[0-9]+))' 

103 r'(?![\w.])', 

104 Keyword.Constant), 

105 (r'(T|F)\b', Name.Builtin.Pseudo), 

106 ], 

107 'numbers': [ 

108 # hex number 

109 (r'0[xX][a-fA-F0-9]+([pP][0-9]+)?[Li]?', Number.Hex), 

110 # decimal number 

111 (r'[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[Li]?', 

112 Number), 

113 ], 

114 'statements': [ 

115 include('comments'), 

116 # whitespaces 

117 (r'\s+', Whitespace), 

118 (r'\'', String, 'string_squote'), 

119 (r'\"', String, 'string_dquote'), 

120 include('builtin_symbols'), 

121 include('valid_name'), 

122 include('numbers'), 

123 include('keywords'), 

124 include('punctuation'), 

125 include('operators'), 

126 ], 

127 'root': [ 

128 # calls: 

129 (r'(%s)\s*(?=\()' % valid_name, Name.Function), 

130 include('statements'), 

131 # blocks: 

132 (r'\{|\}', Punctuation), 

133 # (r'\{', Punctuation, 'block'), 

134 (r'.', Text), 

135 ], 

136 # 'block': [ 

137 # include('statements'), 

138 # ('\{', Punctuation, '#push'), 

139 # ('\}', Punctuation, '#pop') 

140 # ], 

141 'string_squote': [ 

142 (r'([^\'\\]|\\.)*\'', String, '#pop'), 

143 ], 

144 'string_dquote': [ 

145 (r'([^"\\]|\\.)*"', String, '#pop'), 

146 ], 

147 } 

148 

149 def analyse_text(text): 

150 if re.search(r'[a-z0-9_\])\s]<-(?!-)', text): 

151 return 0.11 

152 

153 

154class RdLexer(RegexLexer): 

155 """ 

156 Pygments Lexer for R documentation (Rd) files 

157 

158 This is a very minimal implementation, highlighting little more 

159 than the macros. A description of Rd syntax is found in `Writing R 

160 Extensions <http://cran.r-project.org/doc/manuals/R-exts.html>`_ 

161 and `Parsing Rd files <http://developer.r-project.org/parseRd.pdf>`_. 

162 

163 .. versionadded:: 1.6 

164 """ 

165 name = 'Rd' 

166 aliases = ['rd'] 

167 filenames = ['*.Rd'] 

168 mimetypes = ['text/x-r-doc'] 

169 

170 # To account for verbatim / LaTeX-like / and R-like areas 

171 # would require parsing. 

172 tokens = { 

173 'root': [ 

174 # catch escaped brackets and percent sign 

175 (r'\\[\\{}%]', String.Escape), 

176 # comments 

177 (r'%.*$', Comment), 

178 # special macros with no arguments 

179 (r'\\(?:cr|l?dots|R|tab)\b', Keyword.Constant), 

180 # macros 

181 (r'\\[a-zA-Z]+\b', Keyword), 

182 # special preprocessor macros 

183 (r'^\s*#(?:ifn?def|endif).*\b', Comment.Preproc), 

184 # non-escaped brackets 

185 (r'[{}]', Name.Builtin), 

186 # everything else 

187 (r'[^\\%\n{}]+', Text), 

188 (r'.', Text), 

189 ] 

190 }