1"""
2 pygments.lexers.prql
3 ~~~~~~~~~~~~~~~~~~~~
4
5 Lexer for the PRQL query language.
6
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11from pygments.lexer import RegexLexer, combined, words, include, bygroups
12from pygments.token import Comment, Literal, Keyword, Name, Number, Operator, \
13 Punctuation, String, Text, Whitespace
14
15__all__ = ['PrqlLexer']
16
17
18class PrqlLexer(RegexLexer):
19 """
20 For PRQL source code.
21
22 grammar: https://github.com/PRQL/prql/tree/main/grammars
23 """
24
25 name = 'PRQL'
26 url = 'https://prql-lang.org/'
27 aliases = ['prql']
28 filenames = ['*.prql']
29 mimetypes = ['application/prql', 'application/x-prql']
30 version_added = '2.17'
31
32 builtinTypes = words((
33 "bool",
34 "int",
35 "int8", "int16", "int32", "int64", "int128",
36 "float",
37 "text",
38 "set"), suffix=r'\b')
39
40 def innerstring_rules(ttype):
41 return [
42 # the new style '{}'.format(...) string formatting
43 (r'\{'
44 r'((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name
45 r'(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?'
46 r'\}', String.Interpol),
47
48 (r'[^\\\'"%{\n]+', ttype),
49 (r'[\'"\\]', ttype),
50 (r'%|(\{{1,2})', ttype)
51 ]
52
53 def fstring_rules(ttype):
54 return [
55 (r'\}', String.Interpol),
56 (r'\{', String.Interpol, 'expr-inside-fstring'),
57 (r'[^\\\'"{}\n]+', ttype),
58 (r'[\'"\\]', ttype),
59 ]
60
61 tokens = {
62 'root': [
63
64 # Comments
65 (r'#!.*', String.Doc),
66 (r'#.*', Comment.Single),
67
68 # Whitespace
69 (r'\s+', Whitespace),
70
71 # Modules
72 (r'^(\s*)(module)(\s*)',
73 bygroups(Whitespace, Keyword.Namespace, Whitespace),
74 'imports'),
75
76 (builtinTypes, Keyword.Type),
77
78 # Main
79 (r'^prql ', Keyword.Reserved),
80
81 ('let', Keyword.Declaration),
82
83 include('keywords'),
84 include('expr'),
85
86 # Transforms
87 (r'^[A-Za-z_][a-zA-Z0-9_]*', Keyword),
88 ],
89 'expr': [
90 # non-raw f-strings
91 ('(f)(""")', bygroups(String.Affix, String.Double),
92 combined('fstringescape', 'tdqf')),
93 ("(f)(''')", bygroups(String.Affix, String.Single),
94 combined('fstringescape', 'tsqf')),
95 ('(f)(")', bygroups(String.Affix, String.Double),
96 combined('fstringescape', 'dqf')),
97 ("(f)(')", bygroups(String.Affix, String.Single),
98 combined('fstringescape', 'sqf')),
99
100 # non-raw s-strings
101 ('(s)(""")', bygroups(String.Affix, String.Double),
102 combined('stringescape', 'tdqf')),
103 ("(s)(''')", bygroups(String.Affix, String.Single),
104 combined('stringescape', 'tsqf')),
105 ('(s)(")', bygroups(String.Affix, String.Double),
106 combined('stringescape', 'dqf')),
107 ("(s)(')", bygroups(String.Affix, String.Single),
108 combined('stringescape', 'sqf')),
109
110 # raw strings
111 ('(?i)(r)(""")',
112 bygroups(String.Affix, String.Double), 'tdqs'),
113 ("(?i)(r)(''')",
114 bygroups(String.Affix, String.Single), 'tsqs'),
115 ('(?i)(r)(")',
116 bygroups(String.Affix, String.Double), 'dqs'),
117 ("(?i)(r)(')",
118 bygroups(String.Affix, String.Single), 'sqs'),
119
120 # non-raw strings
121 ('"""', String.Double, combined('stringescape', 'tdqs')),
122 ("'''", String.Single, combined('stringescape', 'tsqs')),
123 ('"', String.Double, combined('stringescape', 'dqs')),
124 ("'", String.Single, combined('stringescape', 'sqs')),
125
126 # Time and dates
127 (r'@\d{4}-\d{2}-\d{2}T\d{2}(:\d{2})?(:\d{2})?(\.\d{1,6})?(Z|[+-]\d{1,2}(:\d{1,2})?)?', Literal.Date),
128 (r'@\d{4}-\d{2}-\d{2}', Literal.Date),
129 (r'@\d{2}(:\d{2})?(:\d{2})?(\.\d{1,6})?(Z|[+-]\d{1,2}(:\d{1,2})?)?', Literal.Date),
130
131 (r'[^\S\n]+', Text),
132 include('numbers'),
133 (r'->|=>|==|!=|>=|<=|~=|&&|\|\||\?\?|\/\/', Operator),
134 (r'[-~+/*%=<>&^|.@]', Operator),
135 (r'[]{}:(),;[]', Punctuation),
136 include('functions'),
137
138 # Variable Names
139 (r'[A-Za-z_][a-zA-Z0-9_]*', Name.Variable),
140 ],
141 'numbers': [
142 (r'(\d(?:_?\d)*\.(?:\d(?:_?\d)*)?|(?:\d(?:_?\d)*)?\.\d(?:_?\d)*)'
143 r'([eE][+-]?\d(?:_?\d)*)?', Number.Float),
144 (r'\d(?:_?\d)*[eE][+-]?\d(?:_?\d)*j?', Number.Float),
145 (r'0[oO](?:_?[0-7])+', Number.Oct),
146 (r'0[bB](?:_?[01])+', Number.Bin),
147 (r'0[xX](?:_?[a-fA-F0-9])+', Number.Hex),
148 (r'\d(?:_?\d)*', Number.Integer),
149 ],
150 'fstringescape': [
151 include('stringescape'),
152 ],
153 'bytesescape': [
154 (r'\\([\\bfnrt"\']|\n|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
155 ],
156 'stringescape': [
157 (r'\\(N\{.*?\}|u\{[a-fA-F0-9]{1,6}\})', String.Escape),
158 include('bytesescape')
159 ],
160 'fstrings-single': fstring_rules(String.Single),
161 'fstrings-double': fstring_rules(String.Double),
162 'strings-single': innerstring_rules(String.Single),
163 'strings-double': innerstring_rules(String.Double),
164 'dqf': [
165 (r'"', String.Double, '#pop'),
166 (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
167 include('fstrings-double')
168 ],
169 'sqf': [
170 (r"'", String.Single, '#pop'),
171 (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
172 include('fstrings-single')
173 ],
174 'dqs': [
175 (r'"', String.Double, '#pop'),
176 (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
177 include('strings-double')
178 ],
179 'sqs': [
180 (r"'", String.Single, '#pop'),
181 (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
182 include('strings-single')
183 ],
184 'tdqf': [
185 (r'"""', String.Double, '#pop'),
186 include('fstrings-double'),
187 (r'\n', String.Double)
188 ],
189 'tsqf': [
190 (r"'''", String.Single, '#pop'),
191 include('fstrings-single'),
192 (r'\n', String.Single)
193 ],
194 'tdqs': [
195 (r'"""', String.Double, '#pop'),
196 include('strings-double'),
197 (r'\n', String.Double)
198 ],
199 'tsqs': [
200 (r"'''", String.Single, '#pop'),
201 include('strings-single'),
202 (r'\n', String.Single)
203 ],
204
205 'expr-inside-fstring': [
206 (r'[{([]', Punctuation, 'expr-inside-fstring-inner'),
207 # without format specifier
208 (r'(=\s*)?' # debug (https://bugs.python.org/issue36817)
209 r'\}', String.Interpol, '#pop'),
210 # with format specifier
211 # we'll catch the remaining '}' in the outer scope
212 (r'(=\s*)?' # debug (https://bugs.python.org/issue36817)
213 r':', String.Interpol, '#pop'),
214 (r'\s+', Whitespace), # allow new lines
215 include('expr'),
216 ],
217 'expr-inside-fstring-inner': [
218 (r'[{([]', Punctuation, 'expr-inside-fstring-inner'),
219 (r'[])}]', Punctuation, '#pop'),
220 (r'\s+', Whitespace), # allow new lines
221 include('expr'),
222 ],
223 'keywords': [
224 (words((
225 'into', 'case', 'type', 'module', 'internal',
226 ), suffix=r'\b'),
227 Keyword),
228 (words(('true', 'false', 'null'), suffix=r'\b'), Keyword.Constant),
229 ],
230 'functions': [
231 (words((
232 "min", "max", "sum", "average", "stddev", "every", "any",
233 "concat_array", "count", "lag", "lead", "first", "last",
234 "rank", "rank_dense", "row_number", "round", "as", "in",
235 "tuple_every", "tuple_map", "tuple_zip", "_eq", "_is_null",
236 "from_text", "lower", "upper", "read_parquet", "read_csv"),
237 suffix=r'\b'),
238 Name.Function),
239 ],
240
241 'comment': [
242 (r'-(?!\})', Comment.Multiline),
243 (r'\{-', Comment.Multiline, 'comment'),
244 (r'[^-}]', Comment.Multiline),
245 (r'-\}', Comment.Multiline, '#pop'),
246 ],
247
248 'imports': [
249 (r'\w+(\.\w+)*', Name.Class, '#pop'),
250 ],
251 }