Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/sql.py: 46%
232 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
1"""
2 pygments.lexers.sql
3 ~~~~~~~~~~~~~~~~~~~
5 Lexers for various SQL dialects and related interactive sessions.
7 Postgres specific lexers:
9 `PostgresLexer`
10 A SQL lexer for the PostgreSQL dialect. Differences w.r.t. the SQL
11 lexer are:
13 - keywords and data types list parsed from the PG docs (run the
14 `_postgres_builtins` module to update them);
15 - Content of $-strings parsed using a specific lexer, e.g. the content
16 of a PL/Python function is parsed using the Python lexer;
17 - parse PG specific constructs: E-strings, $-strings, U&-strings,
18 different operators and punctuation.
20 `PlPgsqlLexer`
21 A lexer for the PL/pgSQL language. Adds a few specific construct on
22 top of the PG SQL lexer (such as <<label>>).
24 `PostgresConsoleLexer`
25 A lexer to highlight an interactive psql session:
27 - identifies the prompt and does its best to detect the end of command
28 in multiline statement where not all the lines are prefixed by a
29 prompt, telling them apart from the output;
30 - highlights errors in the output and notification levels;
31 - handles psql backslash commands.
33 `PostgresExplainLexer`
34 A lexer to highlight Postgres execution plan.
36 The ``tests/examplefiles`` contains a few test files with data to be
37 parsed by these lexers.
39 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
40 :license: BSD, see LICENSE for details.
41"""
43import re
45from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words
46from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \
47 Keyword, Name, String, Number, Generic, Literal
48from pygments.lexers import get_lexer_by_name, ClassNotFound
50from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \
51 PSEUDO_TYPES, PLPGSQL_KEYWORDS, EXPLAIN_KEYWORDS
52from pygments.lexers._mysql_builtins import \
53 MYSQL_CONSTANTS, \
54 MYSQL_DATATYPES, \
55 MYSQL_FUNCTIONS, \
56 MYSQL_KEYWORDS, \
57 MYSQL_OPTIMIZER_HINTS
59from pygments.lexers import _tsql_builtins
62__all__ = ['PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer',
63 'PostgresExplainLexer', 'SqlLexer', 'TransactSqlLexer',
64 'MySqlLexer', 'SqliteConsoleLexer', 'RqlLexer']
66line_re = re.compile('.*?\n')
67sqlite_prompt_re = re.compile(r'^(?:sqlite| ...)>(?= )')
69language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE)
71do_re = re.compile(r'\bDO\b', re.IGNORECASE)
73# Regular expressions for analyse_text()
74name_between_bracket_re = re.compile(r'\[[a-zA-Z_]\w*\]')
75name_between_backtick_re = re.compile(r'`[a-zA-Z_]\w*`')
76tsql_go_re = re.compile(r'\bgo\b', re.IGNORECASE)
77tsql_declare_re = re.compile(r'\bdeclare\s+@', re.IGNORECASE)
78tsql_variable_re = re.compile(r'@[a-zA-Z_]\w*\b')
81def language_callback(lexer, match):
82 """Parse the content of a $-string using a lexer
84 The lexer is chosen looking for a nearby LANGUAGE or assumed as
85 plpgsql if inside a DO statement and no LANGUAGE has been found.
86 """
87 lx = None
88 m = language_re.match(lexer.text[match.end():match.end()+100])
89 if m is not None:
90 lx = lexer._get_lexer(m.group(1))
91 else:
92 m = list(language_re.finditer(
93 lexer.text[max(0, match.start()-100):match.start()]))
94 if m:
95 lx = lexer._get_lexer(m[-1].group(1))
96 else:
97 m = list(do_re.finditer(
98 lexer.text[max(0, match.start()-25):match.start()]))
99 if m:
100 lx = lexer._get_lexer('plpgsql')
102 # 1 = $, 2 = delimiter, 3 = $
103 yield (match.start(1), String, match.group(1))
104 yield (match.start(2), String.Delimiter, match.group(2))
105 yield (match.start(3), String, match.group(3))
106 # 4 = string contents
107 if lx:
108 yield from lx.get_tokens_unprocessed(match.group(4))
109 else:
110 yield (match.start(4), String, match.group(4))
111 # 5 = $, 6 = delimiter, 7 = $
112 yield (match.start(5), String, match.group(5))
113 yield (match.start(6), String.Delimiter, match.group(6))
114 yield (match.start(7), String, match.group(7))
117class PostgresBase:
118 """Base class for Postgres-related lexers.
120 This is implemented as a mixin to avoid the Lexer metaclass kicking in.
121 this way the different lexer don't have a common Lexer ancestor. If they
122 had, _tokens could be created on this ancestor and not updated for the
123 other classes, resulting e.g. in PL/pgSQL parsed as SQL. This shortcoming
124 seem to suggest that regexp lexers are not really subclassable.
125 """
126 def get_tokens_unprocessed(self, text, *args):
127 # Have a copy of the entire text to be used by `language_callback`.
128 self.text = text
129 yield from super().get_tokens_unprocessed(text, *args)
131 def _get_lexer(self, lang):
132 if lang.lower() == 'sql':
133 return get_lexer_by_name('postgresql', **self.options)
135 tries = [lang]
136 if lang.startswith('pl'):
137 tries.append(lang[2:])
138 if lang.endswith('u'):
139 tries.append(lang[:-1])
140 if lang.startswith('pl') and lang.endswith('u'):
141 tries.append(lang[2:-1])
143 for lx in tries:
144 try:
145 return get_lexer_by_name(lx, **self.options)
146 except ClassNotFound:
147 pass
148 else:
149 # TODO: better logging
150 # print >>sys.stderr, "language not found:", lang
151 return None
154class PostgresLexer(PostgresBase, RegexLexer):
155 """
156 Lexer for the PostgreSQL dialect of SQL.
158 .. versionadded:: 1.5
159 """
161 name = 'PostgreSQL SQL dialect'
162 aliases = ['postgresql', 'postgres']
163 mimetypes = ['text/x-postgresql']
165 flags = re.IGNORECASE
166 tokens = {
167 'root': [
168 (r'\s+', Whitespace),
169 (r'--.*\n?', Comment.Single),
170 (r'/\*', Comment.Multiline, 'multiline-comments'),
171 (r'(' + '|'.join(s.replace(" ", r"\s+")
172 for s in DATATYPES + PSEUDO_TYPES) + r')\b',
173 Name.Builtin),
174 (words(KEYWORDS, suffix=r'\b'), Keyword),
175 (r'[+*/<>=~!@#%^&|`?-]+', Operator),
176 (r'::', Operator), # cast
177 (r'\$\d+', Name.Variable),
178 (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float),
179 (r'[0-9]+', Number.Integer),
180 (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'),
181 # quoted identifier
182 (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'),
183 (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback),
184 (r'[a-z_]\w*', Name),
186 # psql variable in SQL
187 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),
189 (r'[;:()\[\]{},.]', Punctuation),
190 ],
191 'multiline-comments': [
192 (r'/\*', Comment.Multiline, 'multiline-comments'),
193 (r'\*/', Comment.Multiline, '#pop'),
194 (r'[^/*]+', Comment.Multiline),
195 (r'[/*]', Comment.Multiline)
196 ],
197 'string': [
198 (r"[^']+", String.Single),
199 (r"''", String.Single),
200 (r"'", String.Single, '#pop'),
201 ],
202 'quoted-ident': [
203 (r'[^"]+', String.Name),
204 (r'""', String.Name),
205 (r'"', String.Name, '#pop'),
206 ],
207 }
210class PlPgsqlLexer(PostgresBase, RegexLexer):
211 """
212 Handle the extra syntax in Pl/pgSQL language.
214 .. versionadded:: 1.5
215 """
216 name = 'PL/pgSQL'
217 aliases = ['plpgsql']
218 mimetypes = ['text/x-plpgsql']
220 flags = re.IGNORECASE
221 tokens = {k: l[:] for (k, l) in PostgresLexer.tokens.items()}
223 # extend the keywords list
224 for i, pattern in enumerate(tokens['root']):
225 if pattern[1] == Keyword:
226 tokens['root'][i] = (
227 words(KEYWORDS + PLPGSQL_KEYWORDS, suffix=r'\b'),
228 Keyword)
229 del i
230 break
231 else:
232 assert 0, "SQL keywords not found"
234 # Add specific PL/pgSQL rules (before the SQL ones)
235 tokens['root'][:0] = [
236 (r'\%[a-z]\w*\b', Name.Builtin), # actually, a datatype
237 (r':=', Operator),
238 (r'\<\<[a-z]\w*\>\>', Name.Label),
239 (r'\#[a-z]\w*\b', Keyword.Pseudo), # #variable_conflict
240 ]
243class PsqlRegexLexer(PostgresBase, RegexLexer):
244 """
245 Extend the PostgresLexer adding support specific for psql commands.
247 This is not a complete psql lexer yet as it lacks prompt support
248 and output rendering.
249 """
251 name = 'PostgreSQL console - regexp based lexer'
252 aliases = [] # not public
254 flags = re.IGNORECASE
255 tokens = {k: l[:] for (k, l) in PostgresLexer.tokens.items()}
257 tokens['root'].append(
258 (r'\\[^\s]+', Keyword.Pseudo, 'psql-command'))
259 tokens['psql-command'] = [
260 (r'\n', Text, 'root'),
261 (r'\s+', Whitespace),
262 (r'\\[^\s]+', Keyword.Pseudo),
263 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),
264 (r"'(''|[^'])*'", String.Single),
265 (r"`([^`])*`", String.Backtick),
266 (r"[^\s]+", String.Symbol),
267 ]
270re_prompt = re.compile(r'^(\S.*?)??[=\-\(\$\'\"][#>]')
271re_psql_command = re.compile(r'\s*\\')
272re_end_command = re.compile(r';\s*(--.*?)?$')
273re_psql_command = re.compile(r'(\s*)(\\.+?)(\s+)$')
274re_error = re.compile(r'(ERROR|FATAL):')
275re_message = re.compile(
276 r'((?:DEBUG|INFO|NOTICE|WARNING|ERROR|'
277 r'FATAL|HINT|DETAIL|CONTEXT|LINE [0-9]+):)(.*?\n)')
280class lookahead:
281 """Wrap an iterator and allow pushing back an item."""
282 def __init__(self, x):
283 self.iter = iter(x)
284 self._nextitem = None
286 def __iter__(self):
287 return self
289 def send(self, i):
290 self._nextitem = i
291 return i
293 def __next__(self):
294 if self._nextitem is not None:
295 ni = self._nextitem
296 self._nextitem = None
297 return ni
298 return next(self.iter)
299 next = __next__
302class PostgresConsoleLexer(Lexer):
303 """
304 Lexer for psql sessions.
306 .. versionadded:: 1.5
307 """
309 name = 'PostgreSQL console (psql)'
310 aliases = ['psql', 'postgresql-console', 'postgres-console']
311 mimetypes = ['text/x-postgresql-psql']
313 def get_tokens_unprocessed(self, data):
314 sql = PsqlRegexLexer(**self.options)
316 lines = lookahead(line_re.findall(data))
318 # prompt-output cycle
319 while 1:
321 # consume the lines of the command: start with an optional prompt
322 # and continue until the end of command is detected
323 curcode = ''
324 insertions = []
325 for line in lines:
326 # Identify a shell prompt in case of psql commandline example
327 if line.startswith('$') and not curcode:
328 lexer = get_lexer_by_name('console', **self.options)
329 yield from lexer.get_tokens_unprocessed(line)
330 break
332 # Identify a psql prompt
333 mprompt = re_prompt.match(line)
334 if mprompt is not None:
335 insertions.append((len(curcode),
336 [(0, Generic.Prompt, mprompt.group())]))
337 curcode += line[len(mprompt.group()):]
338 else:
339 curcode += line
341 # Check if this is the end of the command
342 # TODO: better handle multiline comments at the end with
343 # a lexer with an external state?
344 if re_psql_command.match(curcode) \
345 or re_end_command.search(curcode):
346 break
348 # Emit the combined stream of command and prompt(s)
349 yield from do_insertions(insertions,
350 sql.get_tokens_unprocessed(curcode))
352 # Emit the output lines
353 out_token = Generic.Output
354 for line in lines:
355 mprompt = re_prompt.match(line)
356 if mprompt is not None:
357 # push the line back to have it processed by the prompt
358 lines.send(line)
359 break
361 mmsg = re_message.match(line)
362 if mmsg is not None:
363 if mmsg.group(1).startswith("ERROR") \
364 or mmsg.group(1).startswith("FATAL"):
365 out_token = Generic.Error
366 yield (mmsg.start(1), Generic.Strong, mmsg.group(1))
367 yield (mmsg.start(2), out_token, mmsg.group(2))
368 else:
369 yield (0, out_token, line)
370 else:
371 return
374class PostgresExplainLexer(RegexLexer):
375 """
376 Handle PostgreSQL EXPLAIN output
378 .. versionadded:: 2.15
379 """
381 name = 'PostgreSQL EXPLAIN dialect'
382 aliases = ['postgres-explain']
383 filenames = ['*.explain']
384 mimetypes = ['text/x-postgresql-explain']
386 tokens = {
387 'root': [
388 (r'(:|\(|\)|ms|kB|->|\.\.|\,)', Punctuation),
389 (r'(\s+)', Whitespace),
391 # This match estimated cost and effectively measured counters with ANALYZE
392 # Then, we move to instrumentation state
393 (r'(cost)(=?)', bygroups(Name.Class, Punctuation), 'instrumentation'),
394 (r'(actual)( )(=?)', bygroups(Name.Class, Whitespace, Punctuation), 'instrumentation'),
396 # Misc keywords
397 (words(('actual', 'Memory Usage', 'Memory', 'Buckets', 'Batches',
398 'originally', 'row', 'rows', 'Hits', 'Misses',
399 'Evictions', 'Overflows'), suffix=r'\b'),
400 Comment.Single),
402 (r'(hit|read|dirtied|written|write|time|calls)(=)', bygroups(Comment.Single, Operator)),
403 (r'(shared|temp|local)', Keyword.Pseudo),
405 # We move to sort state in order to emphasize specific keywords (especially disk access)
406 (r'(Sort Method)(: )', bygroups(Comment.Preproc, Punctuation), 'sort'),
408 # These keywords can be followed by an object, like a table
409 (r'(Sort Key|Group Key|Presorted Key|Hash Key)(:)( )',
410 bygroups(Comment.Preproc, Punctuation, Whitespace), 'object_name'),
411 (r'(Cache Key|Cache Mode)(:)( )', bygroups(Comment, Punctuation, Whitespace), 'object_name'),
413 # These keywords can be followed by a predicate
414 (words(('Join Filter', 'Subplans Removed', 'Filter', 'Merge Cond',
415 'Hash Cond', 'Index Cond', 'Recheck Cond', 'Heap Blocks',
416 'TID Cond', 'Run Condition', 'Order By', 'Function Call',
417 'Table Function Call', 'Inner Unique', 'Params Evaluated',
418 'Single Copy', 'Sampling', 'One-Time Filter', 'Output',
419 'Relations', 'Remote SQL'), suffix=r'\b'),
420 Comment.Preproc, 'predicate'),
422 # Special keyword to handle ON CONFLICT
423 (r'Conflict ', Comment.Preproc, 'conflict'),
425 # Special keyword for InitPlan or SubPlan
426 (r'(InitPlan|SubPlan)( )(\d+)( )',
427 bygroups(Keyword, Whitespace, Number.Integer, Whitespace),
428 'init_plan'),
430 (words(('Sort Method', 'Join Filter', 'Planning time',
431 'Planning Time', 'Execution time', 'Execution Time',
432 'Workers Planned', 'Workers Launched', 'Buffers',
433 'Planning', 'Worker', 'Query Identifier', 'Time',
434 'Full-sort Groups'), suffix=r'\b'), Comment.Preproc),
436 # Emphasize these keywords
438 (words(('Rows Removed by Join Filter', 'Rows Removed by Filter',
439 'Rows Removed by Index Recheck',
440 'Heap Fetches', 'never executed'),
441 suffix=r'\b'), Name.Exception),
442 (r'(I/O Timings)(:)( )', bygroups(Name.Exception, Punctuation, Whitespace)),
444 (words(EXPLAIN_KEYWORDS, suffix=r'\b'), Keyword),
446 # join keywords
447 (r'((Right|Left|Full|Semi|Anti) Join)', Keyword.Type),
448 (r'(Parallel |Async |Finalize |Partial )', Comment.Preproc),
449 (r'Backward', Comment.Preproc),
450 (r'(Intersect|Except|Hash)', Comment.Preproc),
452 (r'(CTE)( )(\w*)?', bygroups(Comment, Whitespace, Name.Variable)),
455 # Treat "on" and "using" as a punctuation
456 (r'(on|using)', Punctuation, 'object_name'),
459 # strings
460 (r"'(''|[^'])*'", String.Single),
461 # numbers
462 (r'\d+\.\d+', Number.Float),
463 (r'(\d+)', Number.Integer),
465 # boolean
466 (r'(true|false)', Name.Constant),
467 # explain header
468 (r'\s*QUERY PLAN\s*\n\s*-+', Comment.Single),
469 # Settings
470 (r'(Settings)(:)( )', bygroups(Comment.Preproc, Punctuation, Whitespace), 'setting'),
472 # Handle JIT counters
473 (r'(JIT|Functions|Options|Timing)(:)', bygroups(Comment.Preproc, Punctuation)),
474 (r'(Inlining|Optimization|Expressions|Deforming|Generation|Emission|Total)', Keyword.Pseudo),
476 # Handle Triggers counters
477 (r'(Trigger)( )(\S*)(:)( )',
478 bygroups(Comment.Preproc, Whitespace, Name.Variable, Punctuation, Whitespace)),
480 ],
481 'expression': [
482 # matches any kind of parenthesized expression
483 # the first opening paren is matched by the 'caller'
484 (r'\(', Punctuation, '#push'),
485 (r'\)', Punctuation, '#pop'),
486 (r'(never executed)', Name.Exception),
487 (r'[^)(]+', Comment),
488 ],
489 'object_name': [
491 # This is a cost or analyze measure
492 (r'(\(cost)(=?)', bygroups(Name.Class, Punctuation), 'instrumentation'),
493 (r'(\(actual)( )(=?)', bygroups(Name.Class, Whitespace, Punctuation), 'instrumentation'),
495 # if object_name is parenthesized, mark opening paren as
496 # punctuation, call 'expression', and exit state
497 (r'\(', Punctuation, 'expression'),
498 (r'(on)', Punctuation),
499 # matches possibly schema-qualified table and column names
500 (r'\w+(\.\w+)*( USING \S+| \w+ USING \S+)', Name.Variable),
501 (r'\"?\w+\"?(?:\.\"?\w+\"?)?', Name.Variable),
502 (r'\'\S*\'', Name.Variable),
504 # if we encounter a comma, another object is listed
505 (r',\n', Punctuation, 'object_name'),
506 (r',', Punctuation, 'object_name'),
508 # special case: "*SELECT*"
509 (r'"\*SELECT\*( \d+)?"(.\w+)?', Name.Variable),
510 (r'"\*VALUES\*(_\d+)?"(.\w+)?', Name.Variable),
511 (r'"ANY_subquery"', Name.Variable),
513 # Variable $1 ...
514 (r'\$\d+', Name.Variable),
515 # cast
516 (r'::\w+', Name.Variable),
517 (r' +', Whitespace),
518 (r'"', Punctuation),
519 (r'\[\.\.\.\]', Punctuation),
520 (r'\)', Punctuation, '#pop'),
521 ],
522 'predicate': [
523 # if predicate is parenthesized, mark paren as punctuation
524 (r'(\()([^\n]*)(\))', bygroups(Punctuation, Name.Variable, Punctuation), '#pop'),
525 # otherwise color until newline
526 (r'[^\n]*', Name.Variable, '#pop'),
527 ],
528 'instrumentation': [
529 (r'=|\.\.', Punctuation),
530 (r' +', Whitespace),
531 (r'(rows|width|time|loops)', Name.Class),
532 (r'\d+\.\d+', Number.Float),
533 (r'(\d+)', Number.Integer),
534 (r'\)', Punctuation, '#pop'),
535 ],
536 'conflict': [
537 (r'(Resolution: )(\w+)', bygroups(Comment.Preproc, Name.Variable)),
538 (r'(Arbiter \w+:)', Comment.Preproc, 'object_name'),
539 (r'(Filter: )', Comment.Preproc, 'predicate'),
540 ],
541 'setting': [
542 (r'([a-z_]*?)(\s*)(=)(\s*)(\'.*?\')', bygroups(Name.Attribute, Whitespace, Operator, Whitespace, String)),
543 (r'\, ', Punctuation),
544 ],
545 'init_plan': [
546 (r'\(', Punctuation),
547 (r'returns \$\d+(,\$\d+)?', Name.Variable),
548 (r'\)', Punctuation, '#pop'),
549 ],
550 'sort': [
551 (r':|kB', Punctuation),
552 (r'(quicksort|top-N|heapsort|Average|Memory|Peak)', Comment.Prepoc),
553 (r'(external|merge|Disk|sort)', Name.Exception),
554 (r'(\d+)', Number.Integer),
555 (r' +', Whitespace),
556 ],
557 }
560class SqlLexer(RegexLexer):
561 """
562 Lexer for Structured Query Language. Currently, this lexer does
563 not recognize any special syntax except ANSI SQL.
564 """
566 name = 'SQL'
567 aliases = ['sql']
568 filenames = ['*.sql']
569 mimetypes = ['text/x-sql']
571 flags = re.IGNORECASE
572 tokens = {
573 'root': [
574 (r'\s+', Whitespace),
575 (r'--.*\n?', Comment.Single),
576 (r'/\*', Comment.Multiline, 'multiline-comments'),
577 (words((
578 'ABORT', 'ABS', 'ABSOLUTE', 'ACCESS', 'ADA', 'ADD', 'ADMIN', 'AFTER',
579 'AGGREGATE', 'ALIAS', 'ALL', 'ALLOCATE', 'ALTER', 'ANALYSE', 'ANALYZE',
580 'AND', 'ANY', 'ARE', 'AS', 'ASC', 'ASENSITIVE', 'ASSERTION', 'ASSIGNMENT',
581 'ASYMMETRIC', 'AT', 'ATOMIC', 'AUTHORIZATION', 'AVG', 'BACKWARD',
582 'BEFORE', 'BEGIN', 'BETWEEN', 'BITVAR', 'BIT_LENGTH', 'BOTH', 'BREADTH',
583 'BY', 'C', 'CACHE', 'CALL', 'CALLED', 'CARDINALITY', 'CASCADE',
584 'CASCADED', 'CASE', 'CAST', 'CATALOG', 'CATALOG_NAME', 'CHAIN',
585 'CHARACTERISTICS', 'CHARACTER_LENGTH', 'CHARACTER_SET_CATALOG',
586 'CHARACTER_SET_NAME', 'CHARACTER_SET_SCHEMA', 'CHAR_LENGTH', 'CHECK',
587 'CHECKED', 'CHECKPOINT', 'CLASS', 'CLASS_ORIGIN', 'CLOB', 'CLOSE',
588 'CLUSTER', 'COALESCE', 'COBOL', 'COLLATE', 'COLLATION',
589 'COLLATION_CATALOG', 'COLLATION_NAME', 'COLLATION_SCHEMA', 'COLUMN',
590 'COLUMN_NAME', 'COMMAND_FUNCTION', 'COMMAND_FUNCTION_CODE', 'COMMENT',
591 'COMMIT', 'COMMITTED', 'COMPLETION', 'CONDITION_NUMBER', 'CONNECT',
592 'CONNECTION', 'CONNECTION_NAME', 'CONSTRAINT', 'CONSTRAINTS',
593 'CONSTRAINT_CATALOG', 'CONSTRAINT_NAME', 'CONSTRAINT_SCHEMA',
594 'CONSTRUCTOR', 'CONTAINS', 'CONTINUE', 'CONVERSION', 'CONVERT',
595 'COPY', 'CORRESPONDING', 'COUNT', 'CREATE', 'CREATEDB', 'CREATEUSER',
596 'CROSS', 'CUBE', 'CURRENT', 'CURRENT_DATE', 'CURRENT_PATH',
597 'CURRENT_ROLE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER',
598 'CURSOR', 'CURSOR_NAME', 'CYCLE', 'DATA', 'DATABASE',
599 'DATETIME_INTERVAL_CODE', 'DATETIME_INTERVAL_PRECISION', 'DAY',
600 'DEALLOCATE', 'DECLARE', 'DEFAULT', 'DEFAULTS', 'DEFERRABLE',
601 'DEFERRED', 'DEFINED', 'DEFINER', 'DELETE', 'DELIMITER', 'DELIMITERS',
602 'DEREF', 'DESC', 'DESCRIBE', 'DESCRIPTOR', 'DESTROY', 'DESTRUCTOR',
603 'DETERMINISTIC', 'DIAGNOSTICS', 'DICTIONARY', 'DISCONNECT', 'DISPATCH',
604 'DISTINCT', 'DO', 'DOMAIN', 'DROP', 'DYNAMIC', 'DYNAMIC_FUNCTION',
605 'DYNAMIC_FUNCTION_CODE', 'EACH', 'ELSE', 'ELSIF', 'ENCODING',
606 'ENCRYPTED', 'END', 'END-EXEC', 'EQUALS', 'ESCAPE', 'EVERY', 'EXCEPTION',
607 'EXCEPT', 'EXCLUDING', 'EXCLUSIVE', 'EXEC', 'EXECUTE', 'EXISTING',
608 'EXISTS', 'EXPLAIN', 'EXTERNAL', 'EXTRACT', 'FALSE', 'FETCH', 'FINAL',
609 'FIRST', 'FOR', 'FORCE', 'FOREIGN', 'FORTRAN', 'FORWARD', 'FOUND', 'FREE',
610 'FREEZE', 'FROM', 'FULL', 'FUNCTION', 'G', 'GENERAL', 'GENERATED', 'GET',
611 'GLOBAL', 'GO', 'GOTO', 'GRANT', 'GRANTED', 'GROUP', 'GROUPING',
612 'HANDLER', 'HAVING', 'HIERARCHY', 'HOLD', 'HOST', 'IDENTITY', 'IF',
613 'IGNORE', 'ILIKE', 'IMMEDIATE', 'IMMEDIATELY', 'IMMUTABLE', 'IMPLEMENTATION', 'IMPLICIT',
614 'IN', 'INCLUDING', 'INCREMENT', 'INDEX', 'INDITCATOR', 'INFIX',
615 'INHERITS', 'INITIALIZE', 'INITIALLY', 'INNER', 'INOUT', 'INPUT',
616 'INSENSITIVE', 'INSERT', 'INSTANTIABLE', 'INSTEAD', 'INTERSECT', 'INTO',
617 'INVOKER', 'IS', 'ISNULL', 'ISOLATION', 'ITERATE', 'JOIN', 'KEY',
618 'KEY_MEMBER', 'KEY_TYPE', 'LANCOMPILER', 'LANGUAGE', 'LARGE', 'LAST',
619 'LATERAL', 'LEADING', 'LEFT', 'LENGTH', 'LESS', 'LEVEL', 'LIKE', 'LIMIT',
620 'LISTEN', 'LOAD', 'LOCAL', 'LOCALTIME', 'LOCALTIMESTAMP', 'LOCATION',
621 'LOCATOR', 'LOCK', 'LOWER', 'MAP', 'MATCH', 'MAX', 'MAXVALUE',
622 'MESSAGE_LENGTH', 'MESSAGE_OCTET_LENGTH', 'MESSAGE_TEXT', 'METHOD', 'MIN',
623 'MINUTE', 'MINVALUE', 'MOD', 'MODE', 'MODIFIES', 'MODIFY', 'MONTH',
624 'MORE', 'MOVE', 'MUMPS', 'NAMES', 'NATIONAL', 'NATURAL', 'NCHAR', 'NCLOB',
625 'NEW', 'NEXT', 'NO', 'NOCREATEDB', 'NOCREATEUSER', 'NONE', 'NOT',
626 'NOTHING', 'NOTIFY', 'NOTNULL', 'NULL', 'NULLABLE', 'NULLIF', 'OBJECT',
627 'OCTET_LENGTH', 'OF', 'OFF', 'OFFSET', 'OIDS', 'OLD', 'ON', 'ONLY',
628 'OPEN', 'OPERATION', 'OPERATOR', 'OPTION', 'OPTIONS', 'OR', 'ORDER',
629 'ORDINALITY', 'OUT', 'OUTER', 'OUTPUT', 'OVERLAPS', 'OVERLAY',
630 'OVERRIDING', 'OWNER', 'PAD', 'PARAMETER', 'PARAMETERS', 'PARAMETER_MODE',
631 'PARAMETER_NAME', 'PARAMETER_ORDINAL_POSITION',
632 'PARAMETER_SPECIFIC_CATALOG', 'PARAMETER_SPECIFIC_NAME',
633 'PARAMETER_SPECIFIC_SCHEMA', 'PARTIAL', 'PASCAL', 'PENDANT', 'PERIOD', 'PLACING',
634 'PLI', 'POSITION', 'POSTFIX', 'PRECEEDS', 'PRECISION', 'PREFIX', 'PREORDER',
635 'PREPARE', 'PRESERVE', 'PRIMARY', 'PRIOR', 'PRIVILEGES', 'PROCEDURAL',
636 'PROCEDURE', 'PUBLIC', 'READ', 'READS', 'RECHECK', 'RECURSIVE', 'REF',
637 'REFERENCES', 'REFERENCING', 'REINDEX', 'RELATIVE', 'RENAME',
638 'REPEATABLE', 'REPLACE', 'RESET', 'RESTART', 'RESTRICT', 'RESULT',
639 'RETURN', 'RETURNED_LENGTH', 'RETURNED_OCTET_LENGTH', 'RETURNED_SQLSTATE',
640 'RETURNS', 'REVOKE', 'RIGHT', 'ROLE', 'ROLLBACK', 'ROLLUP', 'ROUTINE',
641 'ROUTINE_CATALOG', 'ROUTINE_NAME', 'ROUTINE_SCHEMA', 'ROW', 'ROWS',
642 'ROW_COUNT', 'RULE', 'SAVE_POINT', 'SCALE', 'SCHEMA', 'SCHEMA_NAME',
643 'SCOPE', 'SCROLL', 'SEARCH', 'SECOND', 'SECURITY', 'SELECT', 'SELF',
644 'SENSITIVE', 'SERIALIZABLE', 'SERVER_NAME', 'SESSION', 'SESSION_USER',
645 'SET', 'SETOF', 'SETS', 'SHARE', 'SHOW', 'SIMILAR', 'SIMPLE', 'SIZE',
646 'SOME', 'SOURCE', 'SPACE', 'SPECIFIC', 'SPECIFICTYPE', 'SPECIFIC_NAME',
647 'SQL', 'SQLCODE', 'SQLERROR', 'SQLEXCEPTION', 'SQLSTATE', 'SQLWARNINIG',
648 'STABLE', 'START', 'STATE', 'STATEMENT', 'STATIC', 'STATISTICS', 'STDIN',
649 'STDOUT', 'STORAGE', 'STRICT', 'STRUCTURE', 'STYPE', 'SUBCLASS_ORIGIN',
650 'SUBLIST', 'SUBSTRING', 'SUCCEEDS', 'SUM', 'SYMMETRIC', 'SYSID', 'SYSTEM',
651 'SYSTEM_USER', 'TABLE', 'TABLE_NAME', ' TEMP', 'TEMPLATE', 'TEMPORARY',
652 'TERMINATE', 'THAN', 'THEN', 'TIME', 'TIMESTAMP', 'TIMEZONE_HOUR',
653 'TIMEZONE_MINUTE', 'TO', 'TOAST', 'TRAILING', 'TRANSACTION',
654 'TRANSACTIONS_COMMITTED', 'TRANSACTIONS_ROLLED_BACK', 'TRANSACTION_ACTIVE',
655 'TRANSFORM', 'TRANSFORMS', 'TRANSLATE', 'TRANSLATION', 'TREAT', 'TRIGGER',
656 'TRIGGER_CATALOG', 'TRIGGER_NAME', 'TRIGGER_SCHEMA', 'TRIM', 'TRUE',
657 'TRUNCATE', 'TRUSTED', 'TYPE', 'UNCOMMITTED', 'UNDER', 'UNENCRYPTED',
658 'UNION', 'UNIQUE', 'UNKNOWN', 'UNLISTEN', 'UNNAMED', 'UNNEST', 'UNTIL',
659 'UPDATE', 'UPPER', 'USAGE', 'USER', 'USER_DEFINED_TYPE_CATALOG',
660 'USER_DEFINED_TYPE_NAME', 'USER_DEFINED_TYPE_SCHEMA', 'USING', 'VACUUM',
661 'VALID', 'VALIDATOR', 'VALUES', 'VARIABLE', 'VERBOSE',
662 'VERSION', 'VERSIONS', 'VERSIONING', 'VIEW',
663 'VOLATILE', 'WHEN', 'WHENEVER', 'WHERE', 'WITH', 'WITHOUT', 'WORK',
664 'WRITE', 'YEAR', 'ZONE'), suffix=r'\b'),
665 Keyword),
666 (words((
667 'ARRAY', 'BIGINT', 'BINARY', 'BIT', 'BLOB', 'BOOLEAN', 'CHAR',
668 'CHARACTER', 'DATE', 'DEC', 'DECIMAL', 'FLOAT', 'INT', 'INTEGER',
669 'INTERVAL', 'NUMBER', 'NUMERIC', 'REAL', 'SERIAL', 'SMALLINT',
670 'VARCHAR', 'VARYING', 'INT8', 'SERIAL8', 'TEXT'), suffix=r'\b'),
671 Name.Builtin),
672 (r'[+*/<>=~!@#%^&|`?-]', Operator),
673 (r'[0-9]+', Number.Integer),
674 # TODO: Backslash escapes?
675 (r"'(''|[^'])*'", String.Single),
676 (r'"(""|[^"])*"', String.Symbol), # not a real string literal in ANSI SQL
677 (r'[a-z_][\w$]*', Name), # allow $s in strings for Oracle
678 (r'[;:()\[\],.]', Punctuation)
679 ],
680 'multiline-comments': [
681 (r'/\*', Comment.Multiline, 'multiline-comments'),
682 (r'\*/', Comment.Multiline, '#pop'),
683 (r'[^/*]+', Comment.Multiline),
684 (r'[/*]', Comment.Multiline)
685 ]
686 }
688 def analyse_text(self, text):
689 return
692class TransactSqlLexer(RegexLexer):
693 """
694 Transact-SQL (T-SQL) is Microsoft's and Sybase's proprietary extension to
695 SQL.
697 The list of keywords includes ODBC and keywords reserved for future use..
698 """
700 name = 'Transact-SQL'
701 aliases = ['tsql', 't-sql']
702 filenames = ['*.sql']
703 mimetypes = ['text/x-tsql']
705 flags = re.IGNORECASE
707 tokens = {
708 'root': [
709 (r'\s+', Whitespace),
710 (r'--.*?$\n?', Comment.Single),
711 (r'/\*', Comment.Multiline, 'multiline-comments'),
712 (words(_tsql_builtins.OPERATORS), Operator),
713 (words(_tsql_builtins.OPERATOR_WORDS, suffix=r'\b'), Operator.Word),
714 (words(_tsql_builtins.TYPES, suffix=r'\b'), Name.Class),
715 (words(_tsql_builtins.FUNCTIONS, suffix=r'\b'), Name.Function),
716 (r'(goto)(\s+)(\w+\b)', bygroups(Keyword, Whitespace, Name.Label)),
717 (words(_tsql_builtins.KEYWORDS, suffix=r'\b'), Keyword),
718 (r'(\[)([^]]+)(\])', bygroups(Operator, Name, Operator)),
719 (r'0x[0-9a-f]+', Number.Hex),
720 # Float variant 1, for example: 1., 1.e2, 1.2e3
721 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float),
722 # Float variant 2, for example: .1, .1e2
723 (r'\.[0-9]+(e[+-]?[0-9]+)?', Number.Float),
724 # Float variant 3, for example: 123e45
725 (r'[0-9]+e[+-]?[0-9]+', Number.Float),
726 (r'[0-9]+', Number.Integer),
727 (r"'(''|[^'])*'", String.Single),
728 (r'"(""|[^"])*"', String.Symbol),
729 (r'[;(),.]', Punctuation),
730 # Below we use \w even for the first "real" character because
731 # tokens starting with a digit have already been recognized
732 # as Number above.
733 (r'@@\w+', Name.Builtin),
734 (r'@\w+', Name.Variable),
735 (r'(\w+)(:)', bygroups(Name.Label, Punctuation)),
736 (r'#?#?\w+', Name), # names for temp tables and anything else
737 (r'\?', Name.Variable.Magic), # parameter for prepared statements
738 ],
739 'multiline-comments': [
740 (r'/\*', Comment.Multiline, 'multiline-comments'),
741 (r'\*/', Comment.Multiline, '#pop'),
742 (r'[^/*]+', Comment.Multiline),
743 (r'[/*]', Comment.Multiline)
744 ]
745 }
747 def analyse_text(text):
748 rating = 0
749 if tsql_declare_re.search(text):
750 # Found T-SQL variable declaration.
751 rating = 1.0
752 else:
753 name_between_backtick_count = len(
754 name_between_backtick_re.findall(text))
755 name_between_bracket_count = len(
756 name_between_bracket_re.findall(text))
757 # We need to check if there are any names using
758 # backticks or brackets, as otherwise both are 0
759 # and 0 >= 2 * 0, so we would always assume it's true
760 dialect_name_count = name_between_backtick_count + name_between_bracket_count
761 if dialect_name_count >= 1 and \
762 name_between_bracket_count >= 2 * name_between_backtick_count:
763 # Found at least twice as many [name] as `name`.
764 rating += 0.5
765 elif name_between_bracket_count > name_between_backtick_count:
766 rating += 0.2
767 elif name_between_bracket_count > 0:
768 rating += 0.1
769 if tsql_variable_re.search(text) is not None:
770 rating += 0.1
771 if tsql_go_re.search(text) is not None:
772 rating += 0.1
773 return rating
776class MySqlLexer(RegexLexer):
777 """The Oracle MySQL lexer.
779 This lexer does not attempt to maintain strict compatibility with
780 MariaDB syntax or keywords. Although MySQL and MariaDB's common code
781 history suggests there may be significant overlap between the two,
782 compatibility between the two is not a target for this lexer.
783 """
785 name = 'MySQL'
786 aliases = ['mysql']
787 mimetypes = ['text/x-mysql']
789 flags = re.IGNORECASE
790 tokens = {
791 'root': [
792 (r'\s+', Whitespace),
794 # Comments
795 (r'(?:#|--\s+).*', Comment.Single),
796 (r'/\*\+', Comment.Special, 'optimizer-hints'),
797 (r'/\*', Comment.Multiline, 'multiline-comment'),
799 # Hexadecimal literals
800 (r"x'([0-9a-f]{2})+'", Number.Hex), # MySQL requires paired hex characters in this form.
801 (r'0x[0-9a-f]+', Number.Hex),
803 # Binary literals
804 (r"b'[01]+'", Number.Bin),
805 (r'0b[01]+', Number.Bin),
807 # Numeric literals
808 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent
809 (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent
810 (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats
811 (r'[0-9]+(?=[^0-9a-z$_\u0080-\uffff])', Number.Integer), # Integers that are not in a schema object name
813 # Date literals
814 (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}",
815 Literal.Date),
817 # Time literals
818 (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}",
819 Literal.Date),
821 # Timestamp literals
822 (
823 r"\{\s*ts\s*(?P<quote>['\"])\s*"
824 r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}" # Date part
825 r"\s+" # Whitespace between date and time
826 r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?" # Time part
827 r"\s*(?P=quote)\s*\}",
828 Literal.Date
829 ),
831 # String literals
832 (r"'", String.Single, 'single-quoted-string'),
833 (r'"', String.Double, 'double-quoted-string'),
835 # Variables
836 (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable),
837 (r'@[a-z0-9_$.]+', Name.Variable),
838 (r"@'", Name.Variable, 'single-quoted-variable'),
839 (r'@"', Name.Variable, 'double-quoted-variable'),
840 (r"@`", Name.Variable, 'backtick-quoted-variable'),
841 (r'\?', Name.Variable), # For demonstrating prepared statements
843 # Operators
844 (r'[!%&*+/:<=>^|~-]+', Operator),
846 # Exceptions; these words tokenize differently in different contexts.
847 (r'\b(set)(?!\s*\()', Keyword),
848 (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Whitespace, Keyword)),
849 # In all other known cases, "SET" is tokenized by MYSQL_DATATYPES.
851 (words(MYSQL_CONSTANTS, prefix=r'\b', suffix=r'\b'), Name.Constant),
852 (words(MYSQL_DATATYPES, prefix=r'\b', suffix=r'\b'), Keyword.Type),
853 (words(MYSQL_KEYWORDS, prefix=r'\b', suffix=r'\b'), Keyword),
854 (words(MYSQL_FUNCTIONS, prefix=r'\b', suffix=r'\b(\s*)(\()'),
855 bygroups(Name.Function, Whitespace, Punctuation)),
857 # Schema object names
858 #
859 # Note: Although the first regex supports unquoted all-numeric
860 # identifiers, this will not be a problem in practice because
861 # numeric literals have already been handled above.
862 #
863 ('[0-9a-z$_\u0080-\uffff]+', Name),
864 (r'`', Name.Quoted, 'schema-object-name'),
866 # Punctuation
867 (r'[(),.;]', Punctuation),
868 ],
870 # Multiline comment substates
871 # ---------------------------
873 'optimizer-hints': [
874 (r'[^*a-z]+', Comment.Special),
875 (r'\*/', Comment.Special, '#pop'),
876 (words(MYSQL_OPTIMIZER_HINTS, suffix=r'\b'), Comment.Preproc),
877 ('[a-z]+', Comment.Special),
878 (r'\*', Comment.Special),
879 ],
881 'multiline-comment': [
882 (r'[^*]+', Comment.Multiline),
883 (r'\*/', Comment.Multiline, '#pop'),
884 (r'\*', Comment.Multiline),
885 ],
887 # String substates
888 # ----------------
890 'single-quoted-string': [
891 (r"[^'\\]+", String.Single),
892 (r"''", String.Escape),
893 (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
894 (r"'", String.Single, '#pop'),
895 ],
897 'double-quoted-string': [
898 (r'[^"\\]+', String.Double),
899 (r'""', String.Escape),
900 (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
901 (r'"', String.Double, '#pop'),
902 ],
904 # Variable substates
905 # ------------------
907 'single-quoted-variable': [
908 (r"[^']+", Name.Variable),
909 (r"''", Name.Variable),
910 (r"'", Name.Variable, '#pop'),
911 ],
913 'double-quoted-variable': [
914 (r'[^"]+', Name.Variable),
915 (r'""', Name.Variable),
916 (r'"', Name.Variable, '#pop'),
917 ],
919 'backtick-quoted-variable': [
920 (r'[^`]+', Name.Variable),
921 (r'``', Name.Variable),
922 (r'`', Name.Variable, '#pop'),
923 ],
925 # Schema object name substates
926 # ----------------------------
927 #
928 # "Name.Quoted" and "Name.Quoted.Escape" are non-standard but
929 # formatters will style them as "Name" by default but add
930 # additional styles based on the token name. This gives users
931 # flexibility to add custom styles as desired.
932 #
933 'schema-object-name': [
934 (r'[^`]+', Name.Quoted),
935 (r'``', Name.Quoted.Escape),
936 (r'`', Name.Quoted, '#pop'),
937 ],
938 }
940 def analyse_text(text):
941 rating = 0
942 name_between_backtick_count = len(
943 name_between_backtick_re.findall(text))
944 name_between_bracket_count = len(
945 name_between_bracket_re.findall(text))
946 # Same logic as above in the TSQL analysis
947 dialect_name_count = name_between_backtick_count + name_between_bracket_count
948 if dialect_name_count >= 1 and \
949 name_between_backtick_count >= 2 * name_between_bracket_count:
950 # Found at least twice as many `name` as [name].
951 rating += 0.5
952 elif name_between_backtick_count > name_between_bracket_count:
953 rating += 0.2
954 elif name_between_backtick_count > 0:
955 rating += 0.1
956 return rating
959class SqliteConsoleLexer(Lexer):
960 """
961 Lexer for example sessions using sqlite3.
963 .. versionadded:: 0.11
964 """
966 name = 'sqlite3con'
967 aliases = ['sqlite3']
968 filenames = ['*.sqlite3-console']
969 mimetypes = ['text/x-sqlite3-console']
971 def get_tokens_unprocessed(self, data):
972 sql = SqlLexer(**self.options)
974 curcode = ''
975 insertions = []
976 for match in line_re.finditer(data):
977 line = match.group()
978 prompt_match = sqlite_prompt_re.match(line)
979 if prompt_match is not None:
980 insertions.append((len(curcode),
981 [(0, Generic.Prompt, line[:7])]))
982 insertions.append((len(curcode),
983 [(7, Whitespace, ' ')]))
984 curcode += line[8:]
985 else:
986 if curcode:
987 yield from do_insertions(insertions,
988 sql.get_tokens_unprocessed(curcode))
989 curcode = ''
990 insertions = []
991 if line.startswith('SQL error: '):
992 yield (match.start(), Generic.Traceback, line)
993 else:
994 yield (match.start(), Generic.Output, line)
995 if curcode:
996 yield from do_insertions(insertions,
997 sql.get_tokens_unprocessed(curcode))
1000class RqlLexer(RegexLexer):
1001 """
1002 Lexer for Relation Query Language.
1004 .. versionadded:: 2.0
1005 """
1006 name = 'RQL'
1007 url = 'http://www.logilab.org/project/rql'
1008 aliases = ['rql']
1009 filenames = ['*.rql']
1010 mimetypes = ['text/x-rql']
1012 flags = re.IGNORECASE
1013 tokens = {
1014 'root': [
1015 (r'\s+', Whitespace),
1016 (r'(DELETE|SET|INSERT|UNION|DISTINCT|WITH|WHERE|BEING|OR'
1017 r'|AND|NOT|GROUPBY|HAVING|ORDERBY|ASC|DESC|LIMIT|OFFSET'
1018 r'|TODAY|NOW|TRUE|FALSE|NULL|EXISTS)\b', Keyword),
1019 (r'[+*/<>=%-]', Operator),
1020 (r'(Any|is|instance_of|CWEType|CWRelation)\b', Name.Builtin),
1021 (r'[0-9]+', Number.Integer),
1022 (r'[A-Z_]\w*\??', Name),
1023 (r"'(''|[^'])*'", String.Single),
1024 (r'"(""|[^"])*"', String.Single),
1025 (r'[;:()\[\],.]', Punctuation)
1026 ],
1027 }