Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/sql.py: 74%
226 statements
« prev ^ index » next coverage.py v7.2.2, created at 2023-03-26 07:45 +0000
« prev ^ index » next coverage.py v7.2.2, created at 2023-03-26 07:45 +0000
1"""
2 pygments.lexers.sql
3 ~~~~~~~~~~~~~~~~~~~
5 Lexers for various SQL dialects and related interactive sessions.
7 Postgres specific lexers:
9 `PostgresLexer`
10 A SQL lexer for the PostgreSQL dialect. Differences w.r.t. the SQL
11 lexer are:
13 - keywords and data types list parsed from the PG docs (run the
14 `_postgres_builtins` module to update them);
15 - Content of $-strings parsed using a specific lexer, e.g. the content
16 of a PL/Python function is parsed using the Python lexer;
17 - parse PG specific constructs: E-strings, $-strings, U&-strings,
18 different operators and punctuation.
20 `PlPgsqlLexer`
21 A lexer for the PL/pgSQL language. Adds a few specific construct on
22 top of the PG SQL lexer (such as <<label>>).
24 `PostgresConsoleLexer`
25 A lexer to highlight an interactive psql session:
27 - identifies the prompt and does its best to detect the end of command
28 in multiline statement where not all the lines are prefixed by a
29 prompt, telling them apart from the output;
30 - highlights errors in the output and notification levels;
31 - handles psql backslash commands.
33 The ``tests/examplefiles`` contains a few test files with data to be
34 parsed by these lexers.
36 :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
37 :license: BSD, see LICENSE for details.
38"""
40import re
42from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words
43from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \
44 Keyword, Name, String, Number, Generic, Literal
45from pygments.lexers import get_lexer_by_name, ClassNotFound
47from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \
48 PSEUDO_TYPES, PLPGSQL_KEYWORDS
49from pygments.lexers._mysql_builtins import \
50 MYSQL_CONSTANTS, \
51 MYSQL_DATATYPES, \
52 MYSQL_FUNCTIONS, \
53 MYSQL_KEYWORDS, \
54 MYSQL_OPTIMIZER_HINTS
56from pygments.lexers import _tsql_builtins
59__all__ = ['PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer',
60 'SqlLexer', 'TransactSqlLexer', 'MySqlLexer',
61 'SqliteConsoleLexer', 'RqlLexer']
63line_re = re.compile('.*?\n')
64sqlite_prompt_re = re.compile(r'^(?:sqlite| ...)>(?= )')
66language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE)
68do_re = re.compile(r'\bDO\b', re.IGNORECASE)
70# Regular expressions for analyse_text()
71name_between_bracket_re = re.compile(r'\[[a-zA-Z_]\w*\]')
72name_between_backtick_re = re.compile(r'`[a-zA-Z_]\w*`')
73tsql_go_re = re.compile(r'\bgo\b', re.IGNORECASE)
74tsql_declare_re = re.compile(r'\bdeclare\s+@', re.IGNORECASE)
75tsql_variable_re = re.compile(r'@[a-zA-Z_]\w*\b')
78def language_callback(lexer, match):
79 """Parse the content of a $-string using a lexer
81 The lexer is chosen looking for a nearby LANGUAGE or assumed as
82 plpgsql if inside a DO statement and no LANGUAGE has been found.
83 """
84 lx = None
85 m = language_re.match(lexer.text[match.end():match.end()+100])
86 if m is not None:
87 lx = lexer._get_lexer(m.group(1))
88 else:
89 m = list(language_re.finditer(
90 lexer.text[max(0, match.start()-100):match.start()]))
91 if m:
92 lx = lexer._get_lexer(m[-1].group(1))
93 else:
94 m = list(do_re.finditer(
95 lexer.text[max(0, match.start()-25):match.start()]))
96 if m:
97 lx = lexer._get_lexer('plpgsql')
99 # 1 = $, 2 = delimiter, 3 = $
100 yield (match.start(1), String, match.group(1))
101 yield (match.start(2), String.Delimiter, match.group(2))
102 yield (match.start(3), String, match.group(3))
103 # 4 = string contents
104 if lx:
105 yield from lx.get_tokens_unprocessed(match.group(4))
106 else:
107 yield (match.start(4), String, match.group(4))
108 # 5 = $, 6 = delimiter, 7 = $
109 yield (match.start(5), String, match.group(5))
110 yield (match.start(6), String.Delimiter, match.group(6))
111 yield (match.start(7), String, match.group(7))
114class PostgresBase:
115 """Base class for Postgres-related lexers.
117 This is implemented as a mixin to avoid the Lexer metaclass kicking in.
118 this way the different lexer don't have a common Lexer ancestor. If they
119 had, _tokens could be created on this ancestor and not updated for the
120 other classes, resulting e.g. in PL/pgSQL parsed as SQL. This shortcoming
121 seem to suggest that regexp lexers are not really subclassable.
122 """
123 def get_tokens_unprocessed(self, text, *args):
124 # Have a copy of the entire text to be used by `language_callback`.
125 self.text = text
126 yield from super().get_tokens_unprocessed(text, *args)
128 def _get_lexer(self, lang):
129 if lang.lower() == 'sql':
130 return get_lexer_by_name('postgresql', **self.options)
132 tries = [lang]
133 if lang.startswith('pl'):
134 tries.append(lang[2:])
135 if lang.endswith('u'):
136 tries.append(lang[:-1])
137 if lang.startswith('pl') and lang.endswith('u'):
138 tries.append(lang[2:-1])
140 for lx in tries:
141 try:
142 return get_lexer_by_name(lx, **self.options)
143 except ClassNotFound:
144 pass
145 else:
146 # TODO: better logging
147 # print >>sys.stderr, "language not found:", lang
148 return None
151class PostgresLexer(PostgresBase, RegexLexer):
152 """
153 Lexer for the PostgreSQL dialect of SQL.
155 .. versionadded:: 1.5
156 """
158 name = 'PostgreSQL SQL dialect'
159 aliases = ['postgresql', 'postgres']
160 mimetypes = ['text/x-postgresql']
162 flags = re.IGNORECASE
163 tokens = {
164 'root': [
165 (r'\s+', Whitespace),
166 (r'--.*\n?', Comment.Single),
167 (r'/\*', Comment.Multiline, 'multiline-comments'),
168 (r'(' + '|'.join(s.replace(" ", r"\s+")
169 for s in DATATYPES + PSEUDO_TYPES) + r')\b',
170 Name.Builtin),
171 (words(KEYWORDS, suffix=r'\b'), Keyword),
172 (r'[+*/<>=~!@#%^&|`?-]+', Operator),
173 (r'::', Operator), # cast
174 (r'\$\d+', Name.Variable),
175 (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float),
176 (r'[0-9]+', Number.Integer),
177 (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'),
178 # quoted identifier
179 (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'),
180 (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback),
181 (r'[a-z_]\w*', Name),
183 # psql variable in SQL
184 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),
186 (r'[;:()\[\]{},.]', Punctuation),
187 ],
188 'multiline-comments': [
189 (r'/\*', Comment.Multiline, 'multiline-comments'),
190 (r'\*/', Comment.Multiline, '#pop'),
191 (r'[^/*]+', Comment.Multiline),
192 (r'[/*]', Comment.Multiline)
193 ],
194 'string': [
195 (r"[^']+", String.Single),
196 (r"''", String.Single),
197 (r"'", String.Single, '#pop'),
198 ],
199 'quoted-ident': [
200 (r'[^"]+', String.Name),
201 (r'""', String.Name),
202 (r'"', String.Name, '#pop'),
203 ],
204 }
207class PlPgsqlLexer(PostgresBase, RegexLexer):
208 """
209 Handle the extra syntax in Pl/pgSQL language.
211 .. versionadded:: 1.5
212 """
213 name = 'PL/pgSQL'
214 aliases = ['plpgsql']
215 mimetypes = ['text/x-plpgsql']
217 flags = re.IGNORECASE
218 tokens = {k: l[:] for (k, l) in PostgresLexer.tokens.items()}
220 # extend the keywords list
221 for i, pattern in enumerate(tokens['root']):
222 if pattern[1] == Keyword:
223 tokens['root'][i] = (
224 words(KEYWORDS + PLPGSQL_KEYWORDS, suffix=r'\b'),
225 Keyword)
226 del i
227 break
228 else:
229 assert 0, "SQL keywords not found"
231 # Add specific PL/pgSQL rules (before the SQL ones)
232 tokens['root'][:0] = [
233 (r'\%[a-z]\w*\b', Name.Builtin), # actually, a datatype
234 (r':=', Operator),
235 (r'\<\<[a-z]\w*\>\>', Name.Label),
236 (r'\#[a-z]\w*\b', Keyword.Pseudo), # #variable_conflict
237 ]
240class PsqlRegexLexer(PostgresBase, RegexLexer):
241 """
242 Extend the PostgresLexer adding support specific for psql commands.
244 This is not a complete psql lexer yet as it lacks prompt support
245 and output rendering.
246 """
248 name = 'PostgreSQL console - regexp based lexer'
249 aliases = [] # not public
251 flags = re.IGNORECASE
252 tokens = {k: l[:] for (k, l) in PostgresLexer.tokens.items()}
254 tokens['root'].append(
255 (r'\\[^\s]+', Keyword.Pseudo, 'psql-command'))
256 tokens['psql-command'] = [
257 (r'\n', Text, 'root'),
258 (r'\s+', Whitespace),
259 (r'\\[^\s]+', Keyword.Pseudo),
260 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),
261 (r"'(''|[^'])*'", String.Single),
262 (r"`([^`])*`", String.Backtick),
263 (r"[^\s]+", String.Symbol),
264 ]
267re_prompt = re.compile(r'^(\S.*?)??[=\-\(\$\'\"][#>]')
268re_psql_command = re.compile(r'\s*\\')
269re_end_command = re.compile(r';\s*(--.*?)?$')
270re_psql_command = re.compile(r'(\s*)(\\.+?)(\s+)$')
271re_error = re.compile(r'(ERROR|FATAL):')
272re_message = re.compile(
273 r'((?:DEBUG|INFO|NOTICE|WARNING|ERROR|'
274 r'FATAL|HINT|DETAIL|CONTEXT|LINE [0-9]+):)(.*?\n)')
277class lookahead:
278 """Wrap an iterator and allow pushing back an item."""
279 def __init__(self, x):
280 self.iter = iter(x)
281 self._nextitem = None
283 def __iter__(self):
284 return self
286 def send(self, i):
287 self._nextitem = i
288 return i
290 def __next__(self):
291 if self._nextitem is not None:
292 ni = self._nextitem
293 self._nextitem = None
294 return ni
295 return next(self.iter)
296 next = __next__
299class PostgresConsoleLexer(Lexer):
300 """
301 Lexer for psql sessions.
303 .. versionadded:: 1.5
304 """
306 name = 'PostgreSQL console (psql)'
307 aliases = ['psql', 'postgresql-console', 'postgres-console']
308 mimetypes = ['text/x-postgresql-psql']
310 def get_tokens_unprocessed(self, data):
311 sql = PsqlRegexLexer(**self.options)
313 lines = lookahead(line_re.findall(data))
315 # prompt-output cycle
316 while 1:
318 # consume the lines of the command: start with an optional prompt
319 # and continue until the end of command is detected
320 curcode = ''
321 insertions = []
322 for line in lines:
323 # Identify a shell prompt in case of psql commandline example
324 if line.startswith('$') and not curcode:
325 lexer = get_lexer_by_name('console', **self.options)
326 yield from lexer.get_tokens_unprocessed(line)
327 break
329 # Identify a psql prompt
330 mprompt = re_prompt.match(line)
331 if mprompt is not None:
332 insertions.append((len(curcode),
333 [(0, Generic.Prompt, mprompt.group())]))
334 curcode += line[len(mprompt.group()):]
335 else:
336 curcode += line
338 # Check if this is the end of the command
339 # TODO: better handle multiline comments at the end with
340 # a lexer with an external state?
341 if re_psql_command.match(curcode) \
342 or re_end_command.search(curcode):
343 break
345 # Emit the combined stream of command and prompt(s)
346 yield from do_insertions(insertions,
347 sql.get_tokens_unprocessed(curcode))
349 # Emit the output lines
350 out_token = Generic.Output
351 for line in lines:
352 mprompt = re_prompt.match(line)
353 if mprompt is not None:
354 # push the line back to have it processed by the prompt
355 lines.send(line)
356 break
358 mmsg = re_message.match(line)
359 if mmsg is not None:
360 if mmsg.group(1).startswith("ERROR") \
361 or mmsg.group(1).startswith("FATAL"):
362 out_token = Generic.Error
363 yield (mmsg.start(1), Generic.Strong, mmsg.group(1))
364 yield (mmsg.start(2), out_token, mmsg.group(2))
365 else:
366 yield (0, out_token, line)
367 else:
368 return
371class SqlLexer(RegexLexer):
372 """
373 Lexer for Structured Query Language. Currently, this lexer does
374 not recognize any special syntax except ANSI SQL.
375 """
377 name = 'SQL'
378 aliases = ['sql']
379 filenames = ['*.sql']
380 mimetypes = ['text/x-sql']
382 flags = re.IGNORECASE
383 tokens = {
384 'root': [
385 (r'\s+', Whitespace),
386 (r'--.*\n?', Comment.Single),
387 (r'/\*', Comment.Multiline, 'multiline-comments'),
388 (words((
389 'ABORT', 'ABS', 'ABSOLUTE', 'ACCESS', 'ADA', 'ADD', 'ADMIN', 'AFTER',
390 'AGGREGATE', 'ALIAS', 'ALL', 'ALLOCATE', 'ALTER', 'ANALYSE', 'ANALYZE',
391 'AND', 'ANY', 'ARE', 'AS', 'ASC', 'ASENSITIVE', 'ASSERTION', 'ASSIGNMENT',
392 'ASYMMETRIC', 'AT', 'ATOMIC', 'AUTHORIZATION', 'AVG', 'BACKWARD',
393 'BEFORE', 'BEGIN', 'BETWEEN', 'BITVAR', 'BIT_LENGTH', 'BOTH', 'BREADTH',
394 'BY', 'C', 'CACHE', 'CALL', 'CALLED', 'CARDINALITY', 'CASCADE',
395 'CASCADED', 'CASE', 'CAST', 'CATALOG', 'CATALOG_NAME', 'CHAIN',
396 'CHARACTERISTICS', 'CHARACTER_LENGTH', 'CHARACTER_SET_CATALOG',
397 'CHARACTER_SET_NAME', 'CHARACTER_SET_SCHEMA', 'CHAR_LENGTH', 'CHECK',
398 'CHECKED', 'CHECKPOINT', 'CLASS', 'CLASS_ORIGIN', 'CLOB', 'CLOSE',
399 'CLUSTER', 'COALESCE', 'COBOL', 'COLLATE', 'COLLATION',
400 'COLLATION_CATALOG', 'COLLATION_NAME', 'COLLATION_SCHEMA', 'COLUMN',
401 'COLUMN_NAME', 'COMMAND_FUNCTION', 'COMMAND_FUNCTION_CODE', 'COMMENT',
402 'COMMIT', 'COMMITTED', 'COMPLETION', 'CONDITION_NUMBER', 'CONNECT',
403 'CONNECTION', 'CONNECTION_NAME', 'CONSTRAINT', 'CONSTRAINTS',
404 'CONSTRAINT_CATALOG', 'CONSTRAINT_NAME', 'CONSTRAINT_SCHEMA',
405 'CONSTRUCTOR', 'CONTAINS', 'CONTINUE', 'CONVERSION', 'CONVERT',
406 'COPY', 'CORRESPONDING', 'COUNT', 'CREATE', 'CREATEDB', 'CREATEUSER',
407 'CROSS', 'CUBE', 'CURRENT', 'CURRENT_DATE', 'CURRENT_PATH',
408 'CURRENT_ROLE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER',
409 'CURSOR', 'CURSOR_NAME', 'CYCLE', 'DATA', 'DATABASE',
410 'DATETIME_INTERVAL_CODE', 'DATETIME_INTERVAL_PRECISION', 'DAY',
411 'DEALLOCATE', 'DECLARE', 'DEFAULT', 'DEFAULTS', 'DEFERRABLE',
412 'DEFERRED', 'DEFINED', 'DEFINER', 'DELETE', 'DELIMITER', 'DELIMITERS',
413 'DEREF', 'DESC', 'DESCRIBE', 'DESCRIPTOR', 'DESTROY', 'DESTRUCTOR',
414 'DETERMINISTIC', 'DIAGNOSTICS', 'DICTIONARY', 'DISCONNECT', 'DISPATCH',
415 'DISTINCT', 'DO', 'DOMAIN', 'DROP', 'DYNAMIC', 'DYNAMIC_FUNCTION',
416 'DYNAMIC_FUNCTION_CODE', 'EACH', 'ELSE', 'ELSIF', 'ENCODING',
417 'ENCRYPTED', 'END', 'END-EXEC', 'EQUALS', 'ESCAPE', 'EVERY', 'EXCEPTION',
418 'EXCEPT', 'EXCLUDING', 'EXCLUSIVE', 'EXEC', 'EXECUTE', 'EXISTING',
419 'EXISTS', 'EXPLAIN', 'EXTERNAL', 'EXTRACT', 'FALSE', 'FETCH', 'FINAL',
420 'FIRST', 'FOR', 'FORCE', 'FOREIGN', 'FORTRAN', 'FORWARD', 'FOUND', 'FREE',
421 'FREEZE', 'FROM', 'FULL', 'FUNCTION', 'G', 'GENERAL', 'GENERATED', 'GET',
422 'GLOBAL', 'GO', 'GOTO', 'GRANT', 'GRANTED', 'GROUP', 'GROUPING',
423 'HANDLER', 'HAVING', 'HIERARCHY', 'HOLD', 'HOST', 'IDENTITY', 'IF',
424 'IGNORE', 'ILIKE', 'IMMEDIATE', 'IMMEDIATELY', 'IMMUTABLE', 'IMPLEMENTATION', 'IMPLICIT',
425 'IN', 'INCLUDING', 'INCREMENT', 'INDEX', 'INDITCATOR', 'INFIX',
426 'INHERITS', 'INITIALIZE', 'INITIALLY', 'INNER', 'INOUT', 'INPUT',
427 'INSENSITIVE', 'INSERT', 'INSTANTIABLE', 'INSTEAD', 'INTERSECT', 'INTO',
428 'INVOKER', 'IS', 'ISNULL', 'ISOLATION', 'ITERATE', 'JOIN', 'KEY',
429 'KEY_MEMBER', 'KEY_TYPE', 'LANCOMPILER', 'LANGUAGE', 'LARGE', 'LAST',
430 'LATERAL', 'LEADING', 'LEFT', 'LENGTH', 'LESS', 'LEVEL', 'LIKE', 'LIMIT',
431 'LISTEN', 'LOAD', 'LOCAL', 'LOCALTIME', 'LOCALTIMESTAMP', 'LOCATION',
432 'LOCATOR', 'LOCK', 'LOWER', 'MAP', 'MATCH', 'MAX', 'MAXVALUE',
433 'MESSAGE_LENGTH', 'MESSAGE_OCTET_LENGTH', 'MESSAGE_TEXT', 'METHOD', 'MIN',
434 'MINUTE', 'MINVALUE', 'MOD', 'MODE', 'MODIFIES', 'MODIFY', 'MONTH',
435 'MORE', 'MOVE', 'MUMPS', 'NAMES', 'NATIONAL', 'NATURAL', 'NCHAR', 'NCLOB',
436 'NEW', 'NEXT', 'NO', 'NOCREATEDB', 'NOCREATEUSER', 'NONE', 'NOT',
437 'NOTHING', 'NOTIFY', 'NOTNULL', 'NULL', 'NULLABLE', 'NULLIF', 'OBJECT',
438 'OCTET_LENGTH', 'OF', 'OFF', 'OFFSET', 'OIDS', 'OLD', 'ON', 'ONLY',
439 'OPEN', 'OPERATION', 'OPERATOR', 'OPTION', 'OPTIONS', 'OR', 'ORDER',
440 'ORDINALITY', 'OUT', 'OUTER', 'OUTPUT', 'OVERLAPS', 'OVERLAY',
441 'OVERRIDING', 'OWNER', 'PAD', 'PARAMETER', 'PARAMETERS', 'PARAMETER_MODE',
442 'PARAMETER_NAME', 'PARAMETER_ORDINAL_POSITION',
443 'PARAMETER_SPECIFIC_CATALOG', 'PARAMETER_SPECIFIC_NAME',
444 'PARAMETER_SPECIFIC_SCHEMA', 'PARTIAL', 'PASCAL', 'PENDANT', 'PERIOD', 'PLACING',
445 'PLI', 'POSITION', 'POSTFIX', 'PRECEEDS', 'PRECISION', 'PREFIX', 'PREORDER',
446 'PREPARE', 'PRESERVE', 'PRIMARY', 'PRIOR', 'PRIVILEGES', 'PROCEDURAL',
447 'PROCEDURE', 'PUBLIC', 'READ', 'READS', 'RECHECK', 'RECURSIVE', 'REF',
448 'REFERENCES', 'REFERENCING', 'REINDEX', 'RELATIVE', 'RENAME',
449 'REPEATABLE', 'REPLACE', 'RESET', 'RESTART', 'RESTRICT', 'RESULT',
450 'RETURN', 'RETURNED_LENGTH', 'RETURNED_OCTET_LENGTH', 'RETURNED_SQLSTATE',
451 'RETURNS', 'REVOKE', 'RIGHT', 'ROLE', 'ROLLBACK', 'ROLLUP', 'ROUTINE',
452 'ROUTINE_CATALOG', 'ROUTINE_NAME', 'ROUTINE_SCHEMA', 'ROW', 'ROWS',
453 'ROW_COUNT', 'RULE', 'SAVE_POINT', 'SCALE', 'SCHEMA', 'SCHEMA_NAME',
454 'SCOPE', 'SCROLL', 'SEARCH', 'SECOND', 'SECURITY', 'SELECT', 'SELF',
455 'SENSITIVE', 'SERIALIZABLE', 'SERVER_NAME', 'SESSION', 'SESSION_USER',
456 'SET', 'SETOF', 'SETS', 'SHARE', 'SHOW', 'SIMILAR', 'SIMPLE', 'SIZE',
457 'SOME', 'SOURCE', 'SPACE', 'SPECIFIC', 'SPECIFICTYPE', 'SPECIFIC_NAME',
458 'SQL', 'SQLCODE', 'SQLERROR', 'SQLEXCEPTION', 'SQLSTATE', 'SQLWARNINIG',
459 'STABLE', 'START', 'STATE', 'STATEMENT', 'STATIC', 'STATISTICS', 'STDIN',
460 'STDOUT', 'STORAGE', 'STRICT', 'STRUCTURE', 'STYPE', 'SUBCLASS_ORIGIN',
461 'SUBLIST', 'SUBSTRING', 'SUCCEEDS', 'SUM', 'SYMMETRIC', 'SYSID', 'SYSTEM',
462 'SYSTEM_USER', 'TABLE', 'TABLE_NAME', ' TEMP', 'TEMPLATE', 'TEMPORARY',
463 'TERMINATE', 'THAN', 'THEN', 'TIME', 'TIMESTAMP', 'TIMEZONE_HOUR',
464 'TIMEZONE_MINUTE', 'TO', 'TOAST', 'TRAILING', 'TRANSACTION',
465 'TRANSACTIONS_COMMITTED', 'TRANSACTIONS_ROLLED_BACK', 'TRANSACTION_ACTIVE',
466 'TRANSFORM', 'TRANSFORMS', 'TRANSLATE', 'TRANSLATION', 'TREAT', 'TRIGGER',
467 'TRIGGER_CATALOG', 'TRIGGER_NAME', 'TRIGGER_SCHEMA', 'TRIM', 'TRUE',
468 'TRUNCATE', 'TRUSTED', 'TYPE', 'UNCOMMITTED', 'UNDER', 'UNENCRYPTED',
469 'UNION', 'UNIQUE', 'UNKNOWN', 'UNLISTEN', 'UNNAMED', 'UNNEST', 'UNTIL',
470 'UPDATE', 'UPPER', 'USAGE', 'USER', 'USER_DEFINED_TYPE_CATALOG',
471 'USER_DEFINED_TYPE_NAME', 'USER_DEFINED_TYPE_SCHEMA', 'USING', 'VACUUM',
472 'VALID', 'VALIDATOR', 'VALUES', 'VARIABLE', 'VERBOSE',
473 'VERSION', 'VERSIONS', 'VERSIONING', 'VIEW',
474 'VOLATILE', 'WHEN', 'WHENEVER', 'WHERE', 'WITH', 'WITHOUT', 'WORK',
475 'WRITE', 'YEAR', 'ZONE'), suffix=r'\b'),
476 Keyword),
477 (words((
478 'ARRAY', 'BIGINT', 'BINARY', 'BIT', 'BLOB', 'BOOLEAN', 'CHAR',
479 'CHARACTER', 'DATE', 'DEC', 'DECIMAL', 'FLOAT', 'INT', 'INTEGER',
480 'INTERVAL', 'NUMBER', 'NUMERIC', 'REAL', 'SERIAL', 'SMALLINT',
481 'VARCHAR', 'VARYING', 'INT8', 'SERIAL8', 'TEXT'), suffix=r'\b'),
482 Name.Builtin),
483 (r'[+*/<>=~!@#%^&|`?-]', Operator),
484 (r'[0-9]+', Number.Integer),
485 # TODO: Backslash escapes?
486 (r"'(''|[^'])*'", String.Single),
487 (r'"(""|[^"])*"', String.Symbol), # not a real string literal in ANSI SQL
488 (r'[a-z_][\w$]*', Name), # allow $s in strings for Oracle
489 (r'[;:()\[\],.]', Punctuation)
490 ],
491 'multiline-comments': [
492 (r'/\*', Comment.Multiline, 'multiline-comments'),
493 (r'\*/', Comment.Multiline, '#pop'),
494 (r'[^/*]+', Comment.Multiline),
495 (r'[/*]', Comment.Multiline)
496 ]
497 }
499 def analyse_text(self, text):
500 return
503class TransactSqlLexer(RegexLexer):
504 """
505 Transact-SQL (T-SQL) is Microsoft's and Sybase's proprietary extension to
506 SQL.
508 The list of keywords includes ODBC and keywords reserved for future use..
509 """
511 name = 'Transact-SQL'
512 aliases = ['tsql', 't-sql']
513 filenames = ['*.sql']
514 mimetypes = ['text/x-tsql']
516 flags = re.IGNORECASE
518 tokens = {
519 'root': [
520 (r'\s+', Whitespace),
521 (r'--.*?$\n?', Comment.Single),
522 (r'/\*', Comment.Multiline, 'multiline-comments'),
523 (words(_tsql_builtins.OPERATORS), Operator),
524 (words(_tsql_builtins.OPERATOR_WORDS, suffix=r'\b'), Operator.Word),
525 (words(_tsql_builtins.TYPES, suffix=r'\b'), Name.Class),
526 (words(_tsql_builtins.FUNCTIONS, suffix=r'\b'), Name.Function),
527 (r'(goto)(\s+)(\w+\b)', bygroups(Keyword, Whitespace, Name.Label)),
528 (words(_tsql_builtins.KEYWORDS, suffix=r'\b'), Keyword),
529 (r'(\[)([^]]+)(\])', bygroups(Operator, Name, Operator)),
530 (r'0x[0-9a-f]+', Number.Hex),
531 # Float variant 1, for example: 1., 1.e2, 1.2e3
532 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float),
533 # Float variant 2, for example: .1, .1e2
534 (r'\.[0-9]+(e[+-]?[0-9]+)?', Number.Float),
535 # Float variant 3, for example: 123e45
536 (r'[0-9]+e[+-]?[0-9]+', Number.Float),
537 (r'[0-9]+', Number.Integer),
538 (r"'(''|[^'])*'", String.Single),
539 (r'"(""|[^"])*"', String.Symbol),
540 (r'[;(),.]', Punctuation),
541 # Below we use \w even for the first "real" character because
542 # tokens starting with a digit have already been recognized
543 # as Number above.
544 (r'@@\w+', Name.Builtin),
545 (r'@\w+', Name.Variable),
546 (r'(\w+)(:)', bygroups(Name.Label, Punctuation)),
547 (r'#?#?\w+', Name), # names for temp tables and anything else
548 (r'\?', Name.Variable.Magic), # parameter for prepared statements
549 ],
550 'multiline-comments': [
551 (r'/\*', Comment.Multiline, 'multiline-comments'),
552 (r'\*/', Comment.Multiline, '#pop'),
553 (r'[^/*]+', Comment.Multiline),
554 (r'[/*]', Comment.Multiline)
555 ]
556 }
558 def analyse_text(text):
559 rating = 0
560 if tsql_declare_re.search(text):
561 # Found T-SQL variable declaration.
562 rating = 1.0
563 else:
564 name_between_backtick_count = len(
565 name_between_backtick_re.findall(text))
566 name_between_bracket_count = len(
567 name_between_bracket_re.findall(text))
568 # We need to check if there are any names using
569 # backticks or brackets, as otherwise both are 0
570 # and 0 >= 2 * 0, so we would always assume it's true
571 dialect_name_count = name_between_backtick_count + name_between_bracket_count
572 if dialect_name_count >= 1 and \
573 name_between_bracket_count >= 2 * name_between_backtick_count:
574 # Found at least twice as many [name] as `name`.
575 rating += 0.5
576 elif name_between_bracket_count > name_between_backtick_count:
577 rating += 0.2
578 elif name_between_bracket_count > 0:
579 rating += 0.1
580 if tsql_variable_re.search(text) is not None:
581 rating += 0.1
582 if tsql_go_re.search(text) is not None:
583 rating += 0.1
584 return rating
587class MySqlLexer(RegexLexer):
588 """The Oracle MySQL lexer.
590 This lexer does not attempt to maintain strict compatibility with
591 MariaDB syntax or keywords. Although MySQL and MariaDB's common code
592 history suggests there may be significant overlap between the two,
593 compatibility between the two is not a target for this lexer.
594 """
596 name = 'MySQL'
597 aliases = ['mysql']
598 mimetypes = ['text/x-mysql']
600 flags = re.IGNORECASE
601 tokens = {
602 'root': [
603 (r'\s+', Whitespace),
605 # Comments
606 (r'(?:#|--\s+).*', Comment.Single),
607 (r'/\*\+', Comment.Special, 'optimizer-hints'),
608 (r'/\*', Comment.Multiline, 'multiline-comment'),
610 # Hexadecimal literals
611 (r"x'([0-9a-f]{2})+'", Number.Hex), # MySQL requires paired hex characters in this form.
612 (r'0x[0-9a-f]+', Number.Hex),
614 # Binary literals
615 (r"b'[01]+'", Number.Bin),
616 (r'0b[01]+', Number.Bin),
618 # Numeric literals
619 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent
620 (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent
621 (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats
622 (r'[0-9]+(?=[^0-9a-z$_\u0080-\uffff])', Number.Integer), # Integers that are not in a schema object name
624 # Date literals
625 (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}",
626 Literal.Date),
628 # Time literals
629 (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}",
630 Literal.Date),
632 # Timestamp literals
633 (
634 r"\{\s*ts\s*(?P<quote>['\"])\s*"
635 r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}" # Date part
636 r"\s+" # Whitespace between date and time
637 r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?" # Time part
638 r"\s*(?P=quote)\s*\}",
639 Literal.Date
640 ),
642 # String literals
643 (r"'", String.Single, 'single-quoted-string'),
644 (r'"', String.Double, 'double-quoted-string'),
646 # Variables
647 (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable),
648 (r'@[a-z0-9_$.]+', Name.Variable),
649 (r"@'", Name.Variable, 'single-quoted-variable'),
650 (r'@"', Name.Variable, 'double-quoted-variable'),
651 (r"@`", Name.Variable, 'backtick-quoted-variable'),
652 (r'\?', Name.Variable), # For demonstrating prepared statements
654 # Operators
655 (r'[!%&*+/:<=>^|~-]+', Operator),
657 # Exceptions; these words tokenize differently in different contexts.
658 (r'\b(set)(?!\s*\()', Keyword),
659 (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Whitespace, Keyword)),
660 # In all other known cases, "SET" is tokenized by MYSQL_DATATYPES.
662 (words(MYSQL_CONSTANTS, prefix=r'\b', suffix=r'\b'), Name.Constant),
663 (words(MYSQL_DATATYPES, prefix=r'\b', suffix=r'\b'), Keyword.Type),
664 (words(MYSQL_KEYWORDS, prefix=r'\b', suffix=r'\b'), Keyword),
665 (words(MYSQL_FUNCTIONS, prefix=r'\b', suffix=r'\b(\s*)(\()'),
666 bygroups(Name.Function, Whitespace, Punctuation)),
668 # Schema object names
669 #
670 # Note: Although the first regex supports unquoted all-numeric
671 # identifiers, this will not be a problem in practice because
672 # numeric literals have already been handled above.
673 #
674 ('[0-9a-z$_\u0080-\uffff]+', Name),
675 (r'`', Name.Quoted, 'schema-object-name'),
677 # Punctuation
678 (r'[(),.;]', Punctuation),
679 ],
681 # Multiline comment substates
682 # ---------------------------
684 'optimizer-hints': [
685 (r'[^*a-z]+', Comment.Special),
686 (r'\*/', Comment.Special, '#pop'),
687 (words(MYSQL_OPTIMIZER_HINTS, suffix=r'\b'), Comment.Preproc),
688 ('[a-z]+', Comment.Special),
689 (r'\*', Comment.Special),
690 ],
692 'multiline-comment': [
693 (r'[^*]+', Comment.Multiline),
694 (r'\*/', Comment.Multiline, '#pop'),
695 (r'\*', Comment.Multiline),
696 ],
698 # String substates
699 # ----------------
701 'single-quoted-string': [
702 (r"[^'\\]+", String.Single),
703 (r"''", String.Escape),
704 (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
705 (r"'", String.Single, '#pop'),
706 ],
708 'double-quoted-string': [
709 (r'[^"\\]+', String.Double),
710 (r'""', String.Escape),
711 (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
712 (r'"', String.Double, '#pop'),
713 ],
715 # Variable substates
716 # ------------------
718 'single-quoted-variable': [
719 (r"[^']+", Name.Variable),
720 (r"''", Name.Variable),
721 (r"'", Name.Variable, '#pop'),
722 ],
724 'double-quoted-variable': [
725 (r'[^"]+', Name.Variable),
726 (r'""', Name.Variable),
727 (r'"', Name.Variable, '#pop'),
728 ],
730 'backtick-quoted-variable': [
731 (r'[^`]+', Name.Variable),
732 (r'``', Name.Variable),
733 (r'`', Name.Variable, '#pop'),
734 ],
736 # Schema object name substates
737 # ----------------------------
738 #
739 # "Name.Quoted" and "Name.Quoted.Escape" are non-standard but
740 # formatters will style them as "Name" by default but add
741 # additional styles based on the token name. This gives users
742 # flexibility to add custom styles as desired.
743 #
744 'schema-object-name': [
745 (r'[^`]+', Name.Quoted),
746 (r'``', Name.Quoted.Escape),
747 (r'`', Name.Quoted, '#pop'),
748 ],
749 }
751 def analyse_text(text):
752 rating = 0
753 name_between_backtick_count = len(
754 name_between_backtick_re.findall(text))
755 name_between_bracket_count = len(
756 name_between_bracket_re.findall(text))
757 # Same logic as above in the TSQL analysis
758 dialect_name_count = name_between_backtick_count + name_between_bracket_count
759 if dialect_name_count >= 1 and \
760 name_between_backtick_count >= 2 * name_between_bracket_count:
761 # Found at least twice as many `name` as [name].
762 rating += 0.5
763 elif name_between_backtick_count > name_between_bracket_count:
764 rating += 0.2
765 elif name_between_backtick_count > 0:
766 rating += 0.1
767 return rating
770class SqliteConsoleLexer(Lexer):
771 """
772 Lexer for example sessions using sqlite3.
774 .. versionadded:: 0.11
775 """
777 name = 'sqlite3con'
778 aliases = ['sqlite3']
779 filenames = ['*.sqlite3-console']
780 mimetypes = ['text/x-sqlite3-console']
782 def get_tokens_unprocessed(self, data):
783 sql = SqlLexer(**self.options)
785 curcode = ''
786 insertions = []
787 for match in line_re.finditer(data):
788 line = match.group()
789 prompt_match = sqlite_prompt_re.match(line)
790 if prompt_match is not None:
791 insertions.append((len(curcode),
792 [(0, Generic.Prompt, line[:7])]))
793 insertions.append((len(curcode),
794 [(7, Whitespace, ' ')]))
795 curcode += line[8:]
796 else:
797 if curcode:
798 yield from do_insertions(insertions,
799 sql.get_tokens_unprocessed(curcode))
800 curcode = ''
801 insertions = []
802 if line.startswith('SQL error: '):
803 yield (match.start(), Generic.Traceback, line)
804 else:
805 yield (match.start(), Generic.Output, line)
806 if curcode:
807 yield from do_insertions(insertions,
808 sql.get_tokens_unprocessed(curcode))
811class RqlLexer(RegexLexer):
812 """
813 Lexer for Relation Query Language.
815 .. versionadded:: 2.0
816 """
817 name = 'RQL'
818 url = 'http://www.logilab.org/project/rql'
819 aliases = ['rql']
820 filenames = ['*.rql']
821 mimetypes = ['text/x-rql']
823 flags = re.IGNORECASE
824 tokens = {
825 'root': [
826 (r'\s+', Whitespace),
827 (r'(DELETE|SET|INSERT|UNION|DISTINCT|WITH|WHERE|BEING|OR'
828 r'|AND|NOT|GROUPBY|HAVING|ORDERBY|ASC|DESC|LIMIT|OFFSET'
829 r'|TODAY|NOW|TRUE|FALSE|NULL|EXISTS)\b', Keyword),
830 (r'[+*/<>=%-]', Operator),
831 (r'(Any|is|instance_of|CWEType|CWRelation)\b', Name.Builtin),
832 (r'[0-9]+', Number.Integer),
833 (r'[A-Z_]\w*\??', Name),
834 (r"'(''|[^'])*'", String.Single),
835 (r'"(""|[^"])*"', String.Single),
836 (r'[;:()\[\],.]', Punctuation)
837 ],
838 }