Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/sql.py: 62%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2 pygments.lexers.sql
3 ~~~~~~~~~~~~~~~~~~~
5 Lexers for various SQL dialects and related interactive sessions.
7 Postgres specific lexers:
9 `PostgresLexer`
10 A SQL lexer for the PostgreSQL dialect. Differences w.r.t. the SQL
11 lexer are:
13 - keywords and data types list parsed from the PG docs (run the
14 `_postgres_builtins` module to update them);
15 - Content of $-strings parsed using a specific lexer, e.g. the content
16 of a PL/Python function is parsed using the Python lexer;
17 - parse PG specific constructs: E-strings, $-strings, U&-strings,
18 different operators and punctuation.
20 `PlPgsqlLexer`
21 A lexer for the PL/pgSQL language. Adds a few specific construct on
22 top of the PG SQL lexer (such as <<label>>).
24 `PostgresConsoleLexer`
25 A lexer to highlight an interactive psql session:
27 - identifies the prompt and does its best to detect the end of command
28 in multiline statement where not all the lines are prefixed by a
29 prompt, telling them apart from the output;
30 - highlights errors in the output and notification levels;
31 - handles psql backslash commands.
33 `PostgresExplainLexer`
34 A lexer to highlight Postgres execution plan.
36 The ``tests/examplefiles`` contains a few test files with data to be
37 parsed by these lexers.
39 :copyright: Copyright 2006-present by the Pygments team, see AUTHORS.
40 :license: BSD, see LICENSE for details.
41"""
43import collections
44import re
46from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words
47from pygments.lexers import _googlesql_builtins
48from pygments.lexers import _mysql_builtins
49from pygments.lexers import _postgres_builtins
50from pygments.lexers import _sql_builtins
51from pygments.lexers import _tsql_builtins
52from pygments.lexers import get_lexer_by_name, ClassNotFound
53from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \
54 Keyword, Name, String, Number, Generic, Literal
57__all__ = ['GoogleSqlLexer', 'PostgresLexer', 'PlPgsqlLexer',
58 'PostgresConsoleLexer', 'PostgresExplainLexer', 'SqlLexer',
59 'TransactSqlLexer', 'MySqlLexer', 'SqliteConsoleLexer', 'RqlLexer']
61line_re = re.compile('.*?\n')
62sqlite_prompt_re = re.compile(r'^(?:sqlite| ...)>(?= )')
64language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE)
66do_re = re.compile(r'\bDO\b', re.IGNORECASE)
68# Regular expressions for analyse_text()
69name_between_bracket_re = re.compile(r'\[[a-zA-Z_]\w*\]')
70name_between_backtick_re = re.compile(r'`[a-zA-Z_]\w*`')
71tsql_go_re = re.compile(r'\bgo\b', re.IGNORECASE)
72tsql_declare_re = re.compile(r'\bdeclare\s+@', re.IGNORECASE)
73tsql_variable_re = re.compile(r'@[a-zA-Z_]\w*\b')
75# Identifiers for analyse_text()
76googlesql_identifiers = (
77 _googlesql_builtins.functionnames
78 + _googlesql_builtins.keywords
79 + _googlesql_builtins.types)
82def language_callback(lexer, match):
83 """Parse the content of a $-string using a lexer
85 The lexer is chosen looking for a nearby LANGUAGE or assumed as
86 plpgsql if inside a DO statement and no LANGUAGE has been found.
87 """
88 lx = None
89 m = language_re.match(lexer.text[match.end():match.end()+100])
90 if m is not None:
91 lx = lexer._get_lexer(m.group(1))
92 else:
93 m = list(language_re.finditer(
94 lexer.text[max(0, match.start()-100):match.start()]))
95 if m:
96 lx = lexer._get_lexer(m[-1].group(1))
97 else:
98 m = list(do_re.finditer(
99 lexer.text[max(0, match.start()-25):match.start()]))
100 if m:
101 lx = lexer._get_lexer('plpgsql')
103 # 1 = $, 2 = delimiter, 3 = $
104 yield (match.start(1), String, match.group(1))
105 yield (match.start(2), String.Delimiter, match.group(2))
106 yield (match.start(3), String, match.group(3))
107 # 4 = string contents
108 if lx:
109 yield from lx.get_tokens_unprocessed(match.group(4))
110 else:
111 yield (match.start(4), String, match.group(4))
112 # 5 = $, 6 = delimiter, 7 = $
113 yield (match.start(5), String, match.group(5))
114 yield (match.start(6), String.Delimiter, match.group(6))
115 yield (match.start(7), String, match.group(7))
118class PostgresBase:
119 """Base class for Postgres-related lexers.
121 This is implemented as a mixin to avoid the Lexer metaclass kicking in.
122 this way the different lexer don't have a common Lexer ancestor. If they
123 had, _tokens could be created on this ancestor and not updated for the
124 other classes, resulting e.g. in PL/pgSQL parsed as SQL. This shortcoming
125 seem to suggest that regexp lexers are not really subclassable.
126 """
127 def get_tokens_unprocessed(self, text, *args):
128 # Have a copy of the entire text to be used by `language_callback`.
129 self.text = text
130 yield from super().get_tokens_unprocessed(text, *args)
132 def _get_lexer(self, lang):
133 if lang.lower() == 'sql':
134 return get_lexer_by_name('postgresql', **self.options)
136 tries = [lang]
137 if lang.startswith('pl'):
138 tries.append(lang[2:])
139 if lang.endswith('u'):
140 tries.append(lang[:-1])
141 if lang.startswith('pl') and lang.endswith('u'):
142 tries.append(lang[2:-1])
144 for lx in tries:
145 try:
146 return get_lexer_by_name(lx, **self.options)
147 except ClassNotFound:
148 pass
149 else:
150 # TODO: better logging
151 # print >>sys.stderr, "language not found:", lang
152 return None
155class PostgresLexer(PostgresBase, RegexLexer):
156 """
157 Lexer for the PostgreSQL dialect of SQL.
158 """
160 name = 'PostgreSQL SQL dialect'
161 aliases = ['postgresql', 'postgres']
162 mimetypes = ['text/x-postgresql']
163 url = 'https://www.postgresql.org'
164 version_added = '1.5'
166 flags = re.IGNORECASE
167 tokens = {
168 'root': [
169 (r'\s+', Whitespace),
170 (r'--.*\n?', Comment.Single),
171 (r'/\*', Comment.Multiline, 'multiline-comments'),
172 (r'(' + '|'.join(s.replace(" ", r"\s+")
173 for s in _postgres_builtins.DATATYPES +
174 _postgres_builtins.PSEUDO_TYPES) + r')\b',
175 Name.Builtin),
176 (words(_postgres_builtins.KEYWORDS, suffix=r'\b'), Keyword),
177 (r'[+*/<>=~!@#%^&|`?-]+', Operator),
178 (r'::', Operator), # cast
179 (r'\$\d+', Name.Variable),
180 (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float),
181 (r'[0-9]+', Number.Integer),
182 (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'),
183 # quoted identifier
184 (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'),
185 (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback),
186 (r'[a-z_]\w*', Name),
188 # psql variable in SQL
189 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),
191 (r'[;:()\[\]{},.]', Punctuation),
192 ],
193 'multiline-comments': [
194 (r'/\*', Comment.Multiline, 'multiline-comments'),
195 (r'\*/', Comment.Multiline, '#pop'),
196 (r'[^/*]+', Comment.Multiline),
197 (r'[/*]', Comment.Multiline)
198 ],
199 'string': [
200 (r"[^']+", String.Single),
201 (r"''", String.Single),
202 (r"'", String.Single, '#pop'),
203 ],
204 'quoted-ident': [
205 (r'[^"]+', String.Name),
206 (r'""', String.Name),
207 (r'"', String.Name, '#pop'),
208 ],
209 }
212class PlPgsqlLexer(PostgresBase, RegexLexer):
213 """
214 Handle the extra syntax in Pl/pgSQL language.
215 """
216 name = 'PL/pgSQL'
217 aliases = ['plpgsql']
218 mimetypes = ['text/x-plpgsql']
219 url = 'https://www.postgresql.org/docs/current/plpgsql.html'
220 version_added = '1.5'
222 flags = re.IGNORECASE
223 # FIXME: use inheritance
224 tokens = {name: state[:] for (name, state) in PostgresLexer.tokens.items()}
226 # extend the keywords list
227 for i, pattern in enumerate(tokens['root']):
228 if pattern[1] == Keyword:
229 tokens['root'][i] = (
230 words(_postgres_builtins.KEYWORDS +
231 _postgres_builtins.PLPGSQL_KEYWORDS, suffix=r'\b'),
232 Keyword)
233 del i
234 break
235 else:
236 assert 0, "SQL keywords not found"
238 # Add specific PL/pgSQL rules (before the SQL ones)
239 tokens['root'][:0] = [
240 (r'\%[a-z]\w*\b', Name.Builtin), # actually, a datatype
241 (r':=', Operator),
242 (r'\<\<[a-z]\w*\>\>', Name.Label),
243 (r'\#[a-z]\w*\b', Keyword.Pseudo), # #variable_conflict
244 ]
247class PsqlRegexLexer(PostgresBase, RegexLexer):
248 """
249 Extend the PostgresLexer adding support specific for psql commands.
251 This is not a complete psql lexer yet as it lacks prompt support
252 and output rendering.
253 """
255 name = 'PostgreSQL console - regexp based lexer'
256 aliases = [] # not public
258 flags = re.IGNORECASE
259 tokens = {name: state[:] for (name, state) in PostgresLexer.tokens.items()}
261 tokens['root'].append(
262 (r'\\[^\s]+', Keyword.Pseudo, 'psql-command'))
263 tokens['psql-command'] = [
264 (r'\n', Text, 'root'),
265 (r'\s+', Whitespace),
266 (r'\\[^\s]+', Keyword.Pseudo),
267 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),
268 (r"'(''|[^'])*'", String.Single),
269 (r"`([^`])*`", String.Backtick),
270 (r"[^\s]+", String.Symbol),
271 ]
274re_prompt = re.compile(r'^(\S.*?)??[=\-\(\$\'\"][#>]')
275re_psql_command = re.compile(r'\s*\\')
276re_end_command = re.compile(r';\s*(--.*?)?$')
277re_psql_command = re.compile(r'(\s*)(\\.+?)(\s+)$')
278re_error = re.compile(r'(ERROR|FATAL):')
279re_message = re.compile(
280 r'((?:DEBUG|INFO|NOTICE|WARNING|ERROR|'
281 r'FATAL|HINT|DETAIL|CONTEXT|LINE [0-9]+):)(.*?\n)')
284class lookahead:
285 """Wrap an iterator and allow pushing back an item."""
286 def __init__(self, x):
287 self.iter = iter(x)
288 self._nextitem = None
290 def __iter__(self):
291 return self
293 def send(self, i):
294 self._nextitem = i
295 return i
297 def __next__(self):
298 if self._nextitem is not None:
299 ni = self._nextitem
300 self._nextitem = None
301 return ni
302 return next(self.iter)
303 next = __next__
306class PostgresConsoleLexer(Lexer):
307 """
308 Lexer for psql sessions.
309 """
311 name = 'PostgreSQL console (psql)'
312 aliases = ['psql', 'postgresql-console', 'postgres-console']
313 mimetypes = ['text/x-postgresql-psql']
314 url = 'https://www.postgresql.org'
315 version_added = '1.5'
316 _example = "psql/psql_session.txt"
318 def get_tokens_unprocessed(self, data):
319 sql = PsqlRegexLexer(**self.options)
321 lines = lookahead(line_re.findall(data))
323 # prompt-output cycle
324 while 1:
326 # consume the lines of the command: start with an optional prompt
327 # and continue until the end of command is detected
328 curcode = ''
329 insertions = []
330 for line in lines:
331 # Identify a shell prompt in case of psql commandline example
332 if line.startswith('$') and not curcode:
333 lexer = get_lexer_by_name('console', **self.options)
334 yield from lexer.get_tokens_unprocessed(line)
335 break
337 # Identify a psql prompt
338 mprompt = re_prompt.match(line)
339 if mprompt is not None:
340 insertions.append((len(curcode),
341 [(0, Generic.Prompt, mprompt.group())]))
342 curcode += line[len(mprompt.group()):]
343 else:
344 curcode += line
346 # Check if this is the end of the command
347 # TODO: better handle multiline comments at the end with
348 # a lexer with an external state?
349 if re_psql_command.match(curcode) \
350 or re_end_command.search(curcode):
351 break
353 # Emit the combined stream of command and prompt(s)
354 yield from do_insertions(insertions,
355 sql.get_tokens_unprocessed(curcode))
357 # Emit the output lines
358 out_token = Generic.Output
359 for line in lines:
360 mprompt = re_prompt.match(line)
361 if mprompt is not None:
362 # push the line back to have it processed by the prompt
363 lines.send(line)
364 break
366 mmsg = re_message.match(line)
367 if mmsg is not None:
368 if mmsg.group(1).startswith("ERROR") \
369 or mmsg.group(1).startswith("FATAL"):
370 out_token = Generic.Error
371 yield (mmsg.start(1), Generic.Strong, mmsg.group(1))
372 yield (mmsg.start(2), out_token, mmsg.group(2))
373 else:
374 yield (0, out_token, line)
375 else:
376 return
379class PostgresExplainLexer(RegexLexer):
380 """
381 Handle PostgreSQL EXPLAIN output
382 """
384 name = 'PostgreSQL EXPLAIN dialect'
385 aliases = ['postgres-explain']
386 filenames = ['*.explain']
387 mimetypes = ['text/x-postgresql-explain']
388 url = 'https://www.postgresql.org/docs/current/using-explain.html'
389 version_added = '2.15'
391 tokens = {
392 'root': [
393 (r'(:|\(|\)|ms|kB|->|\.\.|\,|\/|=|%|text)', Punctuation),
394 (r'(\s+)', Whitespace),
396 # This match estimated cost and effectively measured counters with ANALYZE
397 # Then, we move to instrumentation state
398 (r'(cost)(=?)', bygroups(Name.Class, Punctuation), 'instrumentation'),
399 (r'(actual)( )(=?)', bygroups(Name.Class, Whitespace, Punctuation), 'instrumentation'),
401 # Misc keywords
402 (words(('actual', 'Memory Usage', 'Disk Usage', 'Memory', 'Buckets',
403 'Batches', 'originally', 'row', 'rows', 'Hits', 'Misses',
404 'Evictions', 'Overflows', 'Planned Partitions', 'Estimates',
405 'capacity', 'distinct keys', 'lookups', 'hit percent',
406 'Index Searches', 'Storage', 'Disk Maximum Storage'), suffix=r'\b'),
407 Comment.Single),
409 (r'(hit|read|dirtied|written|write|time|calls|records|bytes|allocated|used|output|format)(=)', bygroups(Comment.Single, Operator)),
410 (r'(shared|temp|local)', Keyword.Pseudo),
412 # We move to sort state in order to emphasize specific keywords (especially disk access)
413 (r'(Sort Method)(: )', bygroups(Comment.Preproc, Punctuation), 'sort'),
415 # These keywords can be followed by an object, like a table
416 (r'(Sort Key|Group Key|Presorted Key|Hash Key)(:)( )',
417 bygroups(Comment.Preproc, Punctuation, Whitespace), 'object_name'),
418 (r'(Cache Key|Cache Mode)(:)( )', bygroups(Comment, Punctuation, Whitespace), 'object_name'),
420 # These keywords can be followed by a predicate
421 (words(('Join Filter', 'Subplans Removed', 'Filter', 'Merge Cond',
422 'Hash Cond', 'Index Cond', 'Recheck Cond', 'Heap Blocks',
423 'TID Cond', 'Run Condition', 'Order By', 'Function Call',
424 'Table Function Call', 'Inner Unique', 'Params Evaluated',
425 'Single Copy', 'Sampling', 'One-Time Filter', 'Output',
426 'Relations', 'Remote SQL', 'Disabled'), suffix=r'\b'),
427 Comment.Preproc, 'predicate'),
429 # Special keyword to handle ON CONFLICT
430 (r'Conflict(ing)? ', Comment.Preproc, 'conflict'),
431 (r'(Tuples Inserted: )', Comment.Preproc, 'predicate'),
433 # Special keyword for InitPlan or SubPlan
434 (r'(InitPlan|SubPlan)( )(\d+)( )',
435 bygroups(Keyword, Whitespace, Number.Integer, Whitespace),
436 'init_plan'),
438 (words(('Sort Method', 'Join Filter', 'Planning time',
439 'Planning Time', 'Execution time', 'Execution Time',
440 'Workers Planned', 'Workers Launched', 'Buffers',
441 'Planning', 'Worker', 'Query Identifier', 'Time',
442 'Full-sort Groups', 'Pre-sorted Groups', 'Serialization'), suffix=r'\b'), Comment.Preproc),
444 # Emphasize these keywords
446 (words(('Rows Removed by Join Filter', 'Rows Removed by Filter',
447 'Rows Removed by Index Recheck',
448 'Heap Fetches', 'never executed'),
449 suffix=r'\b'), Name.Exception),
450 (r'(I/O Timings)(:)( )', bygroups(Name.Exception, Punctuation, Whitespace)),
451 (r'(WAL)(:)( )', bygroups(Name.Exception, Punctuation, Whitespace)),
453 (words(_postgres_builtins.EXPLAIN_KEYWORDS, suffix=r'\b'), Keyword),
455 # join keywords
456 (r'((Right|Left|Full|Semi|Anti) Join)', Keyword.Type),
457 (r'(Parallel |Async |Finalize |Partial )', Comment.Preproc),
458 (r'Backward', Comment.Preproc),
459 (r'(Intersect|Except|Hash)', Comment.Preproc),
461 (r'(CTE)( )(\w*)?', bygroups(Comment, Whitespace, Name.Variable)),
464 # Treat "on" and "using" as a punctuation
465 (r'(on|using)', Punctuation, 'object_name'),
468 # strings
469 (r"'(''|[^'])*'", String.Single),
470 # numbers
471 (r'-?\d+\.\d+', Number.Float),
472 (r'(-?\d+)', Number.Integer),
474 # boolean
475 (r'(true|false)', Name.Constant),
476 # explain header
477 (r'\s*QUERY PLAN\s*\n\s*-+', Comment.Single),
478 # Settings
479 (r'(Settings)(:)( )', bygroups(Comment.Preproc, Punctuation, Whitespace), 'setting'),
481 # Handle JIT counters
482 (r'(JIT|Functions|Options|Timing)(:)', bygroups(Comment.Preproc, Punctuation)),
483 (r'(Inlining|Optimization|Expressions|Deforming|Generation|Emission|Total)', Keyword.Pseudo),
485 # Handle Triggers counters
486 (r'(Trigger)( )(\S*)(:)( )',
487 bygroups(Comment.Preproc, Whitespace, Name.Variable, Punctuation, Whitespace)),
489 ],
490 'expression': [
491 # matches any kind of parenthesized expression
492 # the first opening paren is matched by the 'caller'
493 (r'\(', Punctuation, '#push'),
494 (r'\)', Punctuation, '#pop'),
495 (r'(never executed)', Name.Exception),
496 (r'[^)(]+', Comment),
497 ],
498 'object_name': [
500 # This is a cost or analyze measure
501 (r'(\(cost)(=?)', bygroups(Name.Class, Punctuation), 'instrumentation'),
502 (r'(\(actual)( )(=?)', bygroups(Name.Class, Whitespace, Punctuation), 'instrumentation'),
504 # if object_name is parenthesized, mark opening paren as
505 # punctuation, call 'expression', and exit state
506 (r'\(', Punctuation, 'expression'),
507 (r'(on)', Punctuation),
508 # matches possibly schema-qualified table and column names
509 (r'\w+(\.\w+)*( USING \S+| \w+ USING \S+)', Name.Variable),
510 (r'\"?\w+\"?(?:\.\"?\w+\"?)?', Name.Variable),
511 (r'\'\S*\'', Name.Variable),
513 # if we encounter a comma, another object is listed
514 (r',\n', Punctuation, 'object_name'),
515 (r',', Punctuation, 'object_name'),
517 # special case: "*SELECT*"
518 (r'"\*SELECT\*( \d+)?"(.\w+)?', Name.Variable),
519 (r'"\*VALUES\*(_\d+)?"(.\w+)?', Name.Variable),
520 (r'"ANY_subquery"', Name.Variable),
522 # Variable $1 ...
523 (r'\$\d+', Name.Variable),
524 # cast
525 (r'::\w+', Name.Variable),
526 (r' +', Whitespace),
527 (r'"', Punctuation),
528 (r'\[\.\.\.\]', Punctuation),
529 (r'\)', Punctuation, '#pop'),
530 ],
531 'predicate': [
532 # if predicate is parenthesized, mark paren as punctuation
533 (r'(\()([^\n]*)(\))', bygroups(Punctuation, Name.Variable, Punctuation), '#pop'),
534 # otherwise color until newline
535 (r'[^\n]*', Name.Variable, '#pop'),
536 ],
537 'instrumentation': [
538 (r'=|\.\.', Punctuation),
539 (r' +', Whitespace),
540 (r'(rows|width|time|loops)', Name.Class),
541 (r'\d+\.\d+', Number.Float),
542 (r'(\d+)', Number.Integer),
543 (r'\)', Punctuation, '#pop'),
544 ],
545 'conflict': [
546 (r'(Resolution: )(\w+)', bygroups(Comment.Preproc, Name.Variable)),
547 (r'(Arbiter \w+:)', Comment.Preproc, 'object_name'),
548 (r'(Filter: )', Comment.Preproc, 'predicate'),
549 (r'(Tuples: )', Comment.Preproc, 'predicate'),
550 ],
551 'setting': [
552 (r'([a-z_]*?)(\s*)(=)(\s*)(\'.*?\')', bygroups(Name.Attribute, Whitespace, Operator, Whitespace, String)),
553 (r'\, ', Punctuation),
554 ],
555 'init_plan': [
556 (r'\(', Punctuation),
557 (r'returns \$\d+(,\$\d+)?', Name.Variable),
558 (r'\)', Punctuation, '#pop'),
559 ],
560 'sort': [
561 (r':|kB', Punctuation),
562 (r'(quicksort|top-N|heapsort|Average|Memory|Peak)', Comment.Prepoc),
563 (r'(external|merge|Disk|sort)', Name.Exception),
564 (r'(\d+)', Number.Integer),
565 (r' +', Whitespace),
566 ],
567 }
570class SqlLexer(RegexLexer):
571 """
572 Lexer for Structured Query Language. Currently, this lexer does
573 not recognize any special syntax except ANSI SQL.
574 """
576 name = 'SQL'
577 aliases = ['sql']
578 filenames = ['*.sql']
579 mimetypes = ['text/x-sql']
580 url = 'https://en.wikipedia.org/wiki/SQL'
581 version_added = ''
583 flags = re.IGNORECASE
584 tokens = {
585 'root': [
586 (r'\s+', Whitespace),
587 (r'--.*\n?', Comment.Single),
588 (r'/\*', Comment.Multiline, 'multiline-comments'),
589 (words(_sql_builtins.KEYWORDS, suffix=r'\b'), Keyword),
590 (words(_sql_builtins.DATATYPES, suffix=r'\b'), Name.Builtin),
591 (r'[+*/<>=~!@#%^&|`?-]', Operator),
592 (r'[0-9]+', Number.Integer),
593 # TODO: Backslash escapes?
594 (r"'(''|[^'])*'", String.Single),
595 (r'"(""|[^"])*"', String.Symbol), # not a real string literal in ANSI SQL
596 (r'[a-z_][\w$]*', Name), # allow $s in strings for Oracle
597 (r'[;:()\[\],.]', Punctuation)
598 ],
599 'multiline-comments': [
600 (r'/\*', Comment.Multiline, 'multiline-comments'),
601 (r'\*/', Comment.Multiline, '#pop'),
602 (r'[^/*]+', Comment.Multiline),
603 (r'[/*]', Comment.Multiline)
604 ]
605 }
607 def analyse_text(self, text):
608 return
611class TransactSqlLexer(RegexLexer):
612 """
613 Transact-SQL (T-SQL) is Microsoft's and Sybase's proprietary extension to
614 SQL.
616 The list of keywords includes ODBC and keywords reserved for future use.
617 """
619 name = 'Transact-SQL'
620 aliases = ['tsql', 't-sql']
621 filenames = ['*.sql']
622 mimetypes = ['text/x-tsql']
623 url = 'https://www.tsql.info'
624 version_added = ''
626 flags = re.IGNORECASE
628 tokens = {
629 'root': [
630 (r'\s+', Whitespace),
631 (r'--.*[$|\n]?', Comment.Single),
632 (r'/\*', Comment.Multiline, 'multiline-comments'),
633 (words(_tsql_builtins.OPERATORS), Operator),
634 (words(_tsql_builtins.OPERATOR_WORDS, suffix=r'\b'), Operator.Word),
635 (words(_tsql_builtins.TYPES, suffix=r'\b'), Name.Class),
636 (words(_tsql_builtins.FUNCTIONS, suffix=r'\b'), Name.Function),
637 (r'(goto)(\s+)(\w+\b)', bygroups(Keyword, Whitespace, Name.Label)),
638 (words(_tsql_builtins.KEYWORDS, suffix=r'\b'), Keyword),
639 (r'(\[)([^]]+)(\])', bygroups(Operator, Name, Operator)),
640 (r'0x[0-9a-f]+', Number.Hex),
641 # Float variant 1, for example: 1., 1.e2, 1.2e3
642 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float),
643 # Float variant 2, for example: .1, .1e2
644 (r'\.[0-9]+(e[+-]?[0-9]+)?', Number.Float),
645 # Float variant 3, for example: 123e45
646 (r'[0-9]+e[+-]?[0-9]+', Number.Float),
647 (r'[0-9]+', Number.Integer),
648 (r"'(''|[^'])*'", String.Single),
649 (r'"(""|[^"])*"', String.Symbol),
650 (r'[;(),.]', Punctuation),
651 # Below we use \w even for the first "real" character because
652 # tokens starting with a digit have already been recognized
653 # as Number above.
654 (r'@@\w+', Name.Builtin),
655 (r'@\w+', Name.Variable),
656 (r'(\w+)(:)', bygroups(Name.Label, Punctuation)),
657 (r'#?#?\w+', Name), # names for temp tables and anything else
658 (r'\?', Name.Variable.Magic), # parameter for prepared statements
659 ],
660 'multiline-comments': [
661 (r'/\*', Comment.Multiline, 'multiline-comments'),
662 (r'\*/', Comment.Multiline, '#pop'),
663 (r'[^/*]+', Comment.Multiline),
664 (r'[/*]', Comment.Multiline)
665 ]
666 }
668 def analyse_text(text):
669 rating = 0
670 if tsql_declare_re.search(text):
671 # Found T-SQL variable declaration.
672 rating = 1.0
673 else:
674 name_between_backtick_count = len(
675 name_between_backtick_re.findall(text))
676 name_between_bracket_count = len(
677 name_between_bracket_re.findall(text))
678 # We need to check if there are any names using
679 # backticks or brackets, as otherwise both are 0
680 # and 0 >= 2 * 0, so we would always assume it's true
681 dialect_name_count = name_between_backtick_count + name_between_bracket_count
682 if dialect_name_count >= 1 and \
683 name_between_bracket_count >= 2 * name_between_backtick_count:
684 # Found at least twice as many [name] as `name`.
685 rating += 0.5
686 elif name_between_bracket_count > name_between_backtick_count:
687 rating += 0.2
688 elif name_between_bracket_count > 0:
689 rating += 0.1
690 if tsql_variable_re.search(text) is not None:
691 rating += 0.1
692 if tsql_go_re.search(text) is not None:
693 rating += 0.1
694 return rating
697class MySqlLexer(RegexLexer):
698 """The Oracle MySQL lexer.
700 This lexer does not attempt to maintain strict compatibility with
701 MariaDB syntax or keywords. Although MySQL and MariaDB's common code
702 history suggests there may be significant overlap between the two,
703 compatibility between the two is not a target for this lexer.
704 """
706 name = 'MySQL'
707 aliases = ['mysql']
708 mimetypes = ['text/x-mysql']
709 url = 'https://www.mysql.com'
710 version_added = ''
712 flags = re.IGNORECASE
713 tokens = {
714 'root': [
715 (r'\s+', Whitespace),
717 # Comments
718 (r'(?:#|--\s+).*', Comment.Single),
719 (r'/\*\+', Comment.Special, 'optimizer-hints'),
720 (r'/\*', Comment.Multiline, 'multiline-comment'),
722 # Hexadecimal literals
723 (r"x'([0-9a-f]{2})+'", Number.Hex), # MySQL requires paired hex characters in this form.
724 (r'0x[0-9a-f]+', Number.Hex),
726 # Binary literals
727 (r"b'[01]+'", Number.Bin),
728 (r'0b[01]+', Number.Bin),
730 # Numeric literals
731 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent
732 (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent
733 (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats
734 (r'[0-9]+(?=[^0-9a-z$_\u0080-\uffff])', Number.Integer), # Integers that are not in a schema object name
736 # Date literals
737 (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}",
738 Literal.Date),
740 # Time literals
741 (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}",
742 Literal.Date),
744 # Timestamp literals
745 (
746 r"\{\s*ts\s*(?P<quote>['\"])\s*"
747 r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}" # Date part
748 r"\s+" # Whitespace between date and time
749 r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?" # Time part
750 r"\s*(?P=quote)\s*\}",
751 Literal.Date
752 ),
754 # String literals
755 (r"'", String.Single, 'single-quoted-string'),
756 (r'"', String.Double, 'double-quoted-string'),
758 # Variables
759 (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable),
760 (r'@[a-z0-9_$.]+', Name.Variable),
761 (r"@'", Name.Variable, 'single-quoted-variable'),
762 (r'@"', Name.Variable, 'double-quoted-variable'),
763 (r"@`", Name.Variable, 'backtick-quoted-variable'),
764 (r'\?', Name.Variable), # For demonstrating prepared statements
766 # Operators
767 (r'[!%&*+/:<=>^|~-]+', Operator),
769 # Exceptions; these words tokenize differently in different contexts.
770 (r'\b(set)\b(?!\s*\()', Keyword),
771 (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Whitespace, Keyword)),
772 # In all other known cases, "SET" is tokenized by MYSQL_DATATYPES.
774 (words(_mysql_builtins.MYSQL_CONSTANTS, prefix=r'\b', suffix=r'\b'),
775 Name.Constant),
776 (words(_mysql_builtins.MYSQL_DATATYPES, prefix=r'\b', suffix=r'\b'),
777 Keyword.Type),
778 (words(_mysql_builtins.MYSQL_KEYWORDS, prefix=r'\b', suffix=r'\b'),
779 Keyword),
780 (words(_mysql_builtins.MYSQL_FUNCTIONS, prefix=r'\b', suffix=r'\b(\s*)(\()'),
781 bygroups(Name.Function, Whitespace, Punctuation)),
783 # Schema object names
784 #
785 # Note: Although the first regex supports unquoted all-numeric
786 # identifiers, this will not be a problem in practice because
787 # numeric literals have already been handled above.
788 #
789 ('[0-9a-z$_\u0080-\uffff]+', Name),
790 (r'`', Name.Quoted, 'schema-object-name'),
792 # Punctuation
793 (r'[(),.;]', Punctuation),
794 ],
796 # Multiline comment substates
797 # ---------------------------
799 'optimizer-hints': [
800 (r'[^*a-z]+', Comment.Special),
801 (r'\*/', Comment.Special, '#pop'),
802 (words(_mysql_builtins.MYSQL_OPTIMIZER_HINTS, suffix=r'\b'),
803 Comment.Preproc),
804 ('[a-z]+', Comment.Special),
805 (r'\*', Comment.Special),
806 ],
808 'multiline-comment': [
809 (r'[^*]+', Comment.Multiline),
810 (r'\*/', Comment.Multiline, '#pop'),
811 (r'\*', Comment.Multiline),
812 ],
814 # String substates
815 # ----------------
817 'single-quoted-string': [
818 (r"[^'\\]+", String.Single),
819 (r"''", String.Escape),
820 (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
821 (r"'", String.Single, '#pop'),
822 ],
824 'double-quoted-string': [
825 (r'[^"\\]+', String.Double),
826 (r'""', String.Escape),
827 (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
828 (r'"', String.Double, '#pop'),
829 ],
831 # Variable substates
832 # ------------------
834 'single-quoted-variable': [
835 (r"[^']+", Name.Variable),
836 (r"''", Name.Variable),
837 (r"'", Name.Variable, '#pop'),
838 ],
840 'double-quoted-variable': [
841 (r'[^"]+', Name.Variable),
842 (r'""', Name.Variable),
843 (r'"', Name.Variable, '#pop'),
844 ],
846 'backtick-quoted-variable': [
847 (r'[^`]+', Name.Variable),
848 (r'``', Name.Variable),
849 (r'`', Name.Variable, '#pop'),
850 ],
852 # Schema object name substates
853 # ----------------------------
854 #
855 # "Name.Quoted" and "Name.Quoted.Escape" are non-standard but
856 # formatters will style them as "Name" by default but add
857 # additional styles based on the token name. This gives users
858 # flexibility to add custom styles as desired.
859 #
860 'schema-object-name': [
861 (r'[^`]+', Name.Quoted),
862 (r'``', Name.Quoted.Escape),
863 (r'`', Name.Quoted, '#pop'),
864 ],
865 }
867 def analyse_text(text):
868 rating = 0
869 name_between_backtick_count = len(
870 name_between_backtick_re.findall(text))
871 name_between_bracket_count = len(
872 name_between_bracket_re.findall(text))
873 # Same logic as above in the TSQL analysis
874 dialect_name_count = name_between_backtick_count + name_between_bracket_count
875 if dialect_name_count >= 1 and \
876 name_between_backtick_count >= 2 * name_between_bracket_count:
877 # Found at least twice as many `name` as [name].
878 rating += 0.5
879 elif name_between_backtick_count > name_between_bracket_count:
880 rating += 0.2
881 elif name_between_backtick_count > 0:
882 rating += 0.1
883 return rating
886class GoogleSqlLexer(RegexLexer):
887 """
888 GoogleSQL is Google's standard SQL dialect, formerly known as ZetaSQL.
890 The list of keywords includes reserved words for future use.
891 """
893 name = 'GoogleSQL'
894 aliases = ['googlesql', 'zetasql']
895 filenames = ['*.googlesql', '*.googlesql.sql']
896 mimetypes = ['text/x-google-sql', 'text/x-google-sql-aux']
897 url = 'https://cloud.google.com/bigquery/googlesql'
898 version_added = '2.19'
900 flags = re.IGNORECASE
901 tokens = {
902 'root': [
903 (r'\s+', Whitespace),
905 # Comments
906 (r'(?:#|--\s+).*', Comment.Single),
907 (r'/\*', Comment.Multiline, 'multiline-comment'),
909 # Hexadecimal literals
910 (r"x'([0-9a-f]{2})+'", Number.Hex),
911 (r'0x[0-9a-f]+', Number.Hex),
913 # Binary literals
914 (r"b'[01]+'", Number.Bin),
915 (r'0b[01]+', Number.Bin),
917 # Numeric literals
918 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent
919 (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent
920 (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats
921 (r'[0-9]+(?=[^0-9a-z$_\u0080-\uffff])', Number.Integer), # Integers that are not in a schema object name
923 # Date literals
924 (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}",
925 Literal.Date),
927 # Time literals
928 (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}",
929 Literal.Date),
931 # Timestamp literals
932 (
933 r"\{\s*ts\s*(?P<quote>['\"])\s*"
934 r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}" # Date part
935 r"\s+" # Whitespace between date and time
936 r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?" # Time part
937 r"\s*(?P=quote)\s*\}",
938 Literal.Date
939 ),
941 # String literals
942 (r"'", String.Single, 'single-quoted-string'),
943 (r'"', String.Double, 'double-quoted-string'),
945 # Variables
946 (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable),
947 (r'@[a-z0-9_$.]+', Name.Variable),
948 (r"@'", Name.Variable, 'single-quoted-variable'),
949 (r'@"', Name.Variable, 'double-quoted-variable'),
950 (r"@`", Name.Variable, 'backtick-quoted-variable'),
951 (r'\?', Name.Variable), # For demonstrating prepared statements
953 # Exceptions; these words tokenize differently in different contexts.
954 (r'\b(set)(?!\s*\()', Keyword),
955 (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Whitespace, Keyword)),
957 # Constants, types, keywords, functions, operators
958 (words(_googlesql_builtins.constants, prefix=r'\b', suffix=r'\b'), Name.Constant),
959 (words(_googlesql_builtins.types, prefix=r'\b', suffix=r'\b'), Keyword.Type),
960 (words(_googlesql_builtins.keywords, prefix=r'\b', suffix=r'\b'), Keyword),
961 (words(_googlesql_builtins.functionnames, prefix=r'\b', suffix=r'\b(\s*)(\()'),
962 bygroups(Name.Function, Whitespace, Punctuation)),
963 (words(_googlesql_builtins.operators, prefix=r'\b', suffix=r'\b'), Operator),
965 # Schema object names
966 #
967 # Note: Although the first regex supports unquoted all-numeric
968 # identifiers, this will not be a problem in practice because
969 # numeric literals have already been handled above.
970 #
971 ('[0-9a-z$_\u0080-\uffff]+', Name),
972 (r'`', Name.Quoted, 'schema-object-name'),
974 # Punctuation
975 (r'[(),.;]', Punctuation),
976 ],
978 # Multiline comment substates
979 # ---------------------------
981 'multiline-comment': [
982 (r'[^*]+', Comment.Multiline),
983 (r'\*/', Comment.Multiline, '#pop'),
984 (r'\*', Comment.Multiline),
985 ],
987 # String substates
988 # ----------------
990 'single-quoted-string': [
991 (r"[^'\\]+", String.Single),
992 (r"''", String.Escape),
993 (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
994 (r"'", String.Single, '#pop'),
995 ],
997 'double-quoted-string': [
998 (r'[^"\\]+', String.Double),
999 (r'""', String.Escape),
1000 (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
1001 (r'"', String.Double, '#pop'),
1002 ],
1004 # Variable substates
1005 # ------------------
1007 'single-quoted-variable': [
1008 (r"[^']+", Name.Variable),
1009 (r"''", Name.Variable),
1010 (r"'", Name.Variable, '#pop'),
1011 ],
1013 'double-quoted-variable': [
1014 (r'[^"]+', Name.Variable),
1015 (r'""', Name.Variable),
1016 (r'"', Name.Variable, '#pop'),
1017 ],
1019 'backtick-quoted-variable': [
1020 (r'[^`]+', Name.Variable),
1021 (r'``', Name.Variable),
1022 (r'`', Name.Variable, '#pop'),
1023 ],
1025 # Schema object name substates
1026 # ----------------------------
1027 #
1028 # "Name.Quoted" and "Name.Quoted.Escape" are non-standard but
1029 # formatters will style them as "Name" by default but add
1030 # additional styles based on the token name. This gives users
1031 # flexibility to add custom styles as desired.
1032 #
1033 'schema-object-name': [
1034 (r'[^`]+', Name.Quoted),
1035 (r'``', Name.Quoted.Escape),
1036 (r'`', Name.Quoted, '#pop'),
1037 ],
1038 }
1040 def analyse_text(text):
1041 tokens = collections.Counter(text.split())
1042 return 0.001 * sum(count for t, count in tokens.items()
1043 if t in googlesql_identifiers)
1046class SqliteConsoleLexer(Lexer):
1047 """
1048 Lexer for example sessions using sqlite3.
1049 """
1051 name = 'sqlite3con'
1052 aliases = ['sqlite3']
1053 filenames = ['*.sqlite3-console']
1054 mimetypes = ['text/x-sqlite3-console']
1055 url = 'https://www.sqlite.org'
1056 version_added = '0.11'
1057 _example = "sqlite3/sqlite3.sqlite3-console"
1059 def get_tokens_unprocessed(self, data):
1060 sql = SqlLexer(**self.options)
1062 curcode = ''
1063 insertions = []
1064 for match in line_re.finditer(data):
1065 line = match.group()
1066 prompt_match = sqlite_prompt_re.match(line)
1067 if prompt_match is not None:
1068 insertions.append((len(curcode),
1069 [(0, Generic.Prompt, line[:7])]))
1070 insertions.append((len(curcode),
1071 [(7, Whitespace, ' ')]))
1072 curcode += line[8:]
1073 else:
1074 if curcode:
1075 yield from do_insertions(insertions,
1076 sql.get_tokens_unprocessed(curcode))
1077 curcode = ''
1078 insertions = []
1079 if line.startswith('SQL error: '):
1080 yield (match.start(), Generic.Traceback, line)
1081 else:
1082 yield (match.start(), Generic.Output, line)
1083 if curcode:
1084 yield from do_insertions(insertions,
1085 sql.get_tokens_unprocessed(curcode))
1088class RqlLexer(RegexLexer):
1089 """
1090 Lexer for Relation Query Language.
1091 """
1092 name = 'RQL'
1093 url = 'http://www.logilab.org/project/rql'
1094 aliases = ['rql']
1095 filenames = ['*.rql']
1096 mimetypes = ['text/x-rql']
1097 version_added = '2.0'
1099 flags = re.IGNORECASE
1100 tokens = {
1101 'root': [
1102 (r'\s+', Whitespace),
1103 (r'(DELETE|SET|INSERT|UNION|DISTINCT|WITH|WHERE|BEING|OR'
1104 r'|AND|NOT|GROUPBY|HAVING|ORDERBY|ASC|DESC|LIMIT|OFFSET'
1105 r'|TODAY|NOW|TRUE|FALSE|NULL|EXISTS)\b', Keyword),
1106 (r'[+*/<>=%-]', Operator),
1107 (r'(Any|is|instance_of|CWEType|CWRelation)\b', Name.Builtin),
1108 (r'[0-9]+', Number.Integer),
1109 (r'[A-Z_]\w*\??', Name),
1110 (r"'(''|[^'])*'", String.Single),
1111 (r'"(""|[^"])*"', String.Single),
1112 (r'[;:()\[\],.]', Punctuation)
1113 ],
1114 }