1"""
2 pygments.lexers.r
3 ~~~~~~~~~~~~~~~~~
4
5 Lexers for the R/S languages.
6
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11import re
12
13from pygments.lexer import Lexer, RegexLexer, include, do_insertions
14from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
15 Number, Punctuation, Generic, Whitespace
16
17__all__ = ['RConsoleLexer', 'SLexer', 'RdLexer']
18
19
20line_re = re.compile('.*?\n')
21
22
23class RConsoleLexer(Lexer):
24 """
25 For R console transcripts or R CMD BATCH output files.
26 """
27
28 name = 'RConsole'
29 aliases = ['rconsole', 'rout']
30 filenames = ['*.Rout']
31 url = 'https://www.r-project.org'
32 version_added = ''
33 _example = "rconsole/r-console-transcript.Rout"
34
35 def get_tokens_unprocessed(self, text):
36 slexer = SLexer(**self.options)
37
38 current_code_block = ''
39 insertions = []
40
41 for match in line_re.finditer(text):
42 line = match.group()
43 if line.startswith('>') or line.startswith('+'):
44 # Colorize the prompt as such,
45 # then put rest of line into current_code_block
46 insertions.append((len(current_code_block),
47 [(0, Generic.Prompt, line[:2])]))
48 current_code_block += line[2:]
49 else:
50 # We have reached a non-prompt line!
51 # If we have stored prompt lines, need to process them first.
52 if current_code_block:
53 # Weave together the prompts and highlight code.
54 yield from do_insertions(
55 insertions, slexer.get_tokens_unprocessed(current_code_block))
56 # Reset vars for next code block.
57 current_code_block = ''
58 insertions = []
59 # Now process the actual line itself, this is output from R.
60 yield match.start(), Generic.Output, line
61
62 # If we happen to end on a code block with nothing after it, need to
63 # process the last code block. This is neither elegant nor DRY so
64 # should be changed.
65 if current_code_block:
66 yield from do_insertions(
67 insertions, slexer.get_tokens_unprocessed(current_code_block))
68
69
70class SLexer(RegexLexer):
71 """
72 For S, S-plus, and R source code.
73 """
74
75 name = 'S'
76 aliases = ['splus', 's', 'r']
77 filenames = ['*.S', '*.R', '.Rhistory', '.Rprofile', '.Renviron']
78 mimetypes = ['text/S-plus', 'text/S', 'text/x-r-source', 'text/x-r',
79 'text/x-R', 'text/x-r-history', 'text/x-r-profile']
80 url = 'https://www.r-project.org'
81 version_added = '0.10'
82
83 valid_name = r'`[^`\\]*(?:\\.[^`\\]*)*`|(?:[a-zA-Z]|\.[A-Za-z_.])[\w.]*|\.'
84 tokens = {
85 'comments': [
86 (r'#.*$', Comment.Single),
87 ],
88 'valid_name': [
89 (valid_name, Name),
90 ],
91 'function_name': [
92 (rf'({valid_name})\s*(?=\()', Name.Function),
93 ],
94 'punctuation': [
95 (r'\[{1,2}|\]{1,2}|\(|\)|;|,', Punctuation),
96 ],
97 'keywords': [
98 (r'(if|else|for|while|repeat|in|next|break|return|switch|function)'
99 r'(?![\w.])',
100 Keyword.Reserved),
101 ],
102 'operators': [
103 (r'<<?-|->>?|-|==|<=|>=|\|>|<|>|&&?|!=|\|\|?|\?', Operator),
104 (r'\*|\+|\^|/|!|%[^%]*%|=|~|\$|@|:{1,3}', Operator),
105 ],
106 'builtin_symbols': [
107 (r'(NULL|NA(_(integer|real|complex|character)_)?|'
108 r'letters|LETTERS|Inf|TRUE|FALSE|NaN|pi|\.\.(\.|[0-9]+))'
109 r'(?![\w.])',
110 Keyword.Constant),
111 (r'(T|F)\b', Name.Builtin.Pseudo),
112 ],
113 'numbers': [
114 # hex number
115 (r'0[xX][a-fA-F0-9]+([pP][0-9]+)?[Li]?', Number.Hex),
116 # decimal number
117 (r'[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[Li]?',
118 Number),
119 ],
120 'statements': [
121 include('comments'),
122 # whitespaces
123 (r'\s+', Whitespace),
124 (r'\'', String, 'string_squote'),
125 (r'\"', String, 'string_dquote'),
126 include('builtin_symbols'),
127 include('keywords'),
128 include('function_name'),
129 include('valid_name'),
130 include('numbers'),
131 include('punctuation'),
132 include('operators'),
133 ],
134 'root': [
135 # calls:
136 include('statements'),
137 # blocks:
138 (r'\{|\}', Punctuation),
139 # (r'\{', Punctuation, 'block'),
140 (r'.', Text),
141 ],
142 # 'block': [
143 # include('statements'),
144 # ('\{', Punctuation, '#push'),
145 # ('\}', Punctuation, '#pop')
146 # ],
147 'string_squote': [
148 (r'([^\'\\]|\\.)*\'', String, '#pop'),
149 ],
150 'string_dquote': [
151 (r'([^"\\]|\\.)*"', String, '#pop'),
152 ],
153 }
154
155 def analyse_text(text):
156 if re.search(r'[a-z0-9_\])\s]<-(?!-)', text):
157 return 0.11
158
159
160class RdLexer(RegexLexer):
161 """
162 Pygments Lexer for R documentation (Rd) files
163
164 This is a very minimal implementation, highlighting little more
165 than the macros. A description of Rd syntax is found in `Writing R
166 Extensions <http://cran.r-project.org/doc/manuals/R-exts.html>`_
167 and `Parsing Rd files <http://developer.r-project.org/parseRd.pdf>`_.
168 """
169 name = 'Rd'
170 aliases = ['rd']
171 filenames = ['*.Rd']
172 mimetypes = ['text/x-r-doc']
173 url = 'http://cran.r-project.org/doc/manuals/R-exts.html'
174 version_added = '1.6'
175
176 # To account for verbatim / LaTeX-like / and R-like areas
177 # would require parsing.
178 tokens = {
179 'root': [
180 # catch escaped brackets and percent sign
181 (r'\\[\\{}%]', String.Escape),
182 # comments
183 (r'%.*$', Comment),
184 # special macros with no arguments
185 (r'\\(?:cr|l?dots|R|tab)\b', Keyword.Constant),
186 # macros
187 (r'\\[a-zA-Z]+\b', Keyword),
188 # special preprocessor macros
189 (r'^\s*#(?:ifn?def|endif).*\b', Comment.Preproc),
190 # non-escaped brackets
191 (r'[{}]', Name.Builtin),
192 # everything else
193 (r'[^\\%\n{}]+', Text),
194 (r'.', Text),
195 ]
196 }