Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/sql.py: 74%

1"""

2 pygments.lexers.sql

3 ~~~~~~~~~~~~~~~~~~~

5 Lexers for various SQL dialects and related interactive sessions.

7 Postgres specific lexers:

9 `PostgresLexer`

10 A SQL lexer for the PostgreSQL dialect. Differences w.r.t. the SQL

11 lexer are:

13 - keywords and data types list parsed from the PG docs (run the

14 `_postgres_builtins` module to update them);

15 - Content of $-strings parsed using a specific lexer, e.g. the content

16 of a PL/Python function is parsed using the Python lexer;

17 - parse PG specific constructs: E-strings, $-strings, U&-strings,

18 different operators and punctuation.

20 `PlPgsqlLexer`

21 A lexer for the PL/pgSQL language. Adds a few specific construct on

22 top of the PG SQL lexer (such as <<label>>).

24 `PostgresConsoleLexer`

25 A lexer to highlight an interactive psql session:

27 - identifies the prompt and does its best to detect the end of command

28 in multiline statement where not all the lines are prefixed by a

29 prompt, telling them apart from the output;

30 - highlights errors in the output and notification levels;

31 - handles psql backslash commands.

33 The ``tests/examplefiles`` contains a few test files with data to be

34 parsed by these lexers.

37 :license: BSD, see LICENSE for details.

38"""

40import re

42from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words

43from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \

44 Keyword, Name, String, Number, Generic, Literal

45from pygments.lexers import get_lexer_by_name, ClassNotFound

47from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \

48 PSEUDO_TYPES, PLPGSQL_KEYWORDS

49from pygments.lexers._mysql_builtins import \

50 MYSQL_CONSTANTS, \

51 MYSQL_DATATYPES, \

52 MYSQL_FUNCTIONS, \

53 MYSQL_KEYWORDS, \

54 MYSQL_OPTIMIZER_HINTS

56from pygments.lexers import _tsql_builtins

59__all__ = ['PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer',

60 'SqlLexer', 'TransactSqlLexer', 'MySqlLexer',

61 'SqliteConsoleLexer', 'RqlLexer']

63line_re = re.compile('.*?\n')

64sqlite_prompt_re = re.compile(r'^(?:sqlite| ...)>(?= )')

66language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE)

68do_re = re.compile(r'\bDO\b', re.IGNORECASE)

70# Regular expressions for analyse_text()

71name_between_bracket_re = re.compile(r'\[[a-zA-Z_]\w*\]')

72name_between_backtick_re = re.compile(r'`[a-zA-Z_]\w*`')

73tsql_go_re = re.compile(r'\bgo\b', re.IGNORECASE)

74tsql_declare_re = re.compile(r'\bdeclare\s+@', re.IGNORECASE)

75tsql_variable_re = re.compile(r'@[a-zA-Z_]\w*\b')

78def language_callback(lexer, match):

79 """Parse the content of a $-string using a lexer

81 The lexer is chosen looking for a nearby LANGUAGE or assumed as

82 plpgsql if inside a DO statement and no LANGUAGE has been found.

83 """

84 lx = None

85 m = language_re.match(lexer.text[match.end():match.end()+100])

86 if m is not None:

87 lx = lexer._get_lexer(m.group(1))

88 else:

89 m = list(language_re.finditer(

90 lexer.text[max(0, match.start()-100):match.start()]))

91 if m:

92 lx = lexer._get_lexer(m[-1].group(1))

93 else:

94 m = list(do_re.finditer(

95 lexer.text[max(0, match.start()-25):match.start()]))

96 if m:

97 lx = lexer._get_lexer('plpgsql')

99 # 1 = $, 2 = delimiter, 3 = $

100 yield (match.start(1), String, match.group(1))

101 yield (match.start(2), String.Delimiter, match.group(2))

102 yield (match.start(3), String, match.group(3))

103 # 4 = string contents

104 if lx:

105 yield from lx.get_tokens_unprocessed(match.group(4))

106 else:

107 yield (match.start(4), String, match.group(4))

108 # 5 = $, 6 = delimiter, 7 = $

109 yield (match.start(5), String, match.group(5))

110 yield (match.start(6), String.Delimiter, match.group(6))

111 yield (match.start(7), String, match.group(7))

112

113

114class PostgresBase:

115 """Base class for Postgres-related lexers.

116

117 This is implemented as a mixin to avoid the Lexer metaclass kicking in.

118 this way the different lexer don't have a common Lexer ancestor. If they

119 had, _tokens could be created on this ancestor and not updated for the

120 other classes, resulting e.g. in PL/pgSQL parsed as SQL. This shortcoming

121 seem to suggest that regexp lexers are not really subclassable.

122 """

123 def get_tokens_unprocessed(self, text, *args):

124 # Have a copy of the entire text to be used by `language_callback`.

125 self.text = text

126 yield from super().get_tokens_unprocessed(text, *args)

127

128 def _get_lexer(self, lang):

129 if lang.lower() == 'sql':

130 return get_lexer_by_name('postgresql', **self.options)

131

132 tries = [lang]

133 if lang.startswith('pl'):

134 tries.append(lang[2:])

135 if lang.endswith('u'):

136 tries.append(lang[:-1])

137 if lang.startswith('pl') and lang.endswith('u'):

138 tries.append(lang[2:-1])

139

140 for lx in tries:

141 try:

142 return get_lexer_by_name(lx, **self.options)

143 except ClassNotFound:

144 pass

145 else:

146 # TODO: better logging

147 # print >>sys.stderr, "language not found:", lang

148 return None

149

150

151class PostgresLexer(PostgresBase, RegexLexer):

152 """

153 Lexer for the PostgreSQL dialect of SQL.

154

155 .. versionadded:: 1.5

156 """

157

158 name = 'PostgreSQL SQL dialect'

159 aliases = ['postgresql', 'postgres']

160 mimetypes = ['text/x-postgresql']

161

162 flags = re.IGNORECASE

163 tokens = {

164 'root': [

165 (r'\s+', Whitespace),

166 (r'--.*\n?', Comment.Single),

167 (r'/\*', Comment.Multiline, 'multiline-comments'),

168 (r'(' + '|'.join(s.replace(" ", r"\s+")

169 for s in DATATYPES + PSEUDO_TYPES) + r')\b',

170 Name.Builtin),

171 (words(KEYWORDS, suffix=r'\b'), Keyword),

172 (r'[+*/<>=~!@#%^&|`?-]+', Operator),

173 (r'::', Operator), # cast

174 (r'\$\d+', Name.Variable),

175 (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float),

176 (r'[0-9]+', Number.Integer),

177 (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'),

178 # quoted identifier

179 (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'),

180 (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback),

181 (r'[a-z_]\w*', Name),

182

183 # psql variable in SQL

184 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),

185

186 (r'[;:()\[\]{},.]', Punctuation),

187 ],

188 'multiline-comments': [

189 (r'/\*', Comment.Multiline, 'multiline-comments'),

190 (r'\*/', Comment.Multiline, '#pop'),

191 (r'[^/*]+', Comment.Multiline),

192 (r'[/*]', Comment.Multiline)

193 ],

194 'string': [

195 (r"[^']+", String.Single),

196 (r"''", String.Single),

197 (r"'", String.Single, '#pop'),

198 ],

199 'quoted-ident': [

200 (r'[^"]+', String.Name),

201 (r'""', String.Name),

202 (r'"', String.Name, '#pop'),

203 ],

204 }

205

206

207class PlPgsqlLexer(PostgresBase, RegexLexer):

208 """

209 Handle the extra syntax in Pl/pgSQL language.

210

211 .. versionadded:: 1.5

212 """

213 name = 'PL/pgSQL'

214 aliases = ['plpgsql']

215 mimetypes = ['text/x-plpgsql']

216

217 flags = re.IGNORECASE

218 tokens = {k: l[:] for (k, l) in PostgresLexer.tokens.items()}

219

220 # extend the keywords list

221 for i, pattern in enumerate(tokens['root']):

222 if pattern[1] == Keyword:

223 tokens['root'][i] = (

224 words(KEYWORDS + PLPGSQL_KEYWORDS, suffix=r'\b'),

225 Keyword)

226 del i

227 break

228 else:

229 assert 0, "SQL keywords not found"

230

231 # Add specific PL/pgSQL rules (before the SQL ones)

232 tokens['root'][:0] = [

233 (r'\%[a-z]\w*\b', Name.Builtin), # actually, a datatype

234 (r':=', Operator),

235 (r'\<\<[a-z]\w*\>\>', Name.Label),

236 (r'\#[a-z]\w*\b', Keyword.Pseudo), # #variable_conflict

237 ]

238

239

240class PsqlRegexLexer(PostgresBase, RegexLexer):

241 """

242 Extend the PostgresLexer adding support specific for psql commands.

243

244 This is not a complete psql lexer yet as it lacks prompt support

245 and output rendering.

246 """

247

248 name = 'PostgreSQL console - regexp based lexer'

249 aliases = [] # not public

250

251 flags = re.IGNORECASE

252 tokens = {k: l[:] for (k, l) in PostgresLexer.tokens.items()}

253

254 tokens['root'].append(

255 (r'\\[^\s]+', Keyword.Pseudo, 'psql-command'))

256 tokens['psql-command'] = [

257 (r'\n', Text, 'root'),

258 (r'\s+', Whitespace),

259 (r'\\[^\s]+', Keyword.Pseudo),

260 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),

261 (r"'(''|[^'])*'", String.Single),

262 (r"`([^`])*`", String.Backtick),

263 (r"[^\s]+", String.Symbol),

264 ]

265

266

267re_prompt = re.compile(r'^(\S.*?)??[=\-\(\$\'\"][#>]')

268re_psql_command = re.compile(r'\s*\\')

269re_end_command = re.compile(r';\s*(--.*?)?$')

270re_psql_command = re.compile(r'(\s*)(\\.+?)(\s+)$')

271re_error = re.compile(r'(ERROR|FATAL):')

272re_message = re.compile(

274 r'FATAL|HINT|DETAIL|CONTEXT|LINE [0-9]+):)(.*?\n)')

275

276

277class lookahead:

278 """Wrap an iterator and allow pushing back an item."""

279 def __init__(self, x):

280 self.iter = iter(x)

281 self._nextitem = None

282

283 def __iter__(self):

284 return self

285

286 def send(self, i):

287 self._nextitem = i

288 return i

289

290 def __next__(self):

291 if self._nextitem is not None:

292 ni = self._nextitem

293 self._nextitem = None

294 return ni

295 return next(self.iter)

296 next = __next__

297

298

299class PostgresConsoleLexer(Lexer):

300 """

301 Lexer for psql sessions.

302

303 .. versionadded:: 1.5

304 """

305

306 name = 'PostgreSQL console (psql)'

307 aliases = ['psql', 'postgresql-console', 'postgres-console']

308 mimetypes = ['text/x-postgresql-psql']

309

310 def get_tokens_unprocessed(self, data):

311 sql = PsqlRegexLexer(**self.options)

312

313 lines = lookahead(line_re.findall(data))

314

315 # prompt-output cycle

316 while 1:

317

318 # consume the lines of the command: start with an optional prompt

319 # and continue until the end of command is detected

320 curcode = ''

321 insertions = []

322 for line in lines:

323 # Identify a shell prompt in case of psql commandline example

324 if line.startswith('$') and not curcode:

325 lexer = get_lexer_by_name('console', **self.options)

326 yield from lexer.get_tokens_unprocessed(line)

327 break

328

329 # Identify a psql prompt

330 mprompt = re_prompt.match(line)

331 if mprompt is not None:

332 insertions.append((len(curcode),

333 [(0, Generic.Prompt, mprompt.group())]))

334 curcode += line[len(mprompt.group()):]

335 else:

336 curcode += line

337

338 # Check if this is the end of the command

339 # TODO: better handle multiline comments at the end with

340 # a lexer with an external state?

341 if re_psql_command.match(curcode) \

342 or re_end_command.search(curcode):

343 break

344

345 # Emit the combined stream of command and prompt(s)

346 yield from do_insertions(insertions,

347 sql.get_tokens_unprocessed(curcode))

348

349 # Emit the output lines

350 out_token = Generic.Output

351 for line in lines:

352 mprompt = re_prompt.match(line)

353 if mprompt is not None:

354 # push the line back to have it processed by the prompt

355 lines.send(line)

356 break

357

358 mmsg = re_message.match(line)

359 if mmsg is not None:

360 if mmsg.group(1).startswith("ERROR") \

361 or mmsg.group(1).startswith("FATAL"):

362 out_token = Generic.Error

363 yield (mmsg.start(1), Generic.Strong, mmsg.group(1))

364 yield (mmsg.start(2), out_token, mmsg.group(2))

365 else:

366 yield (0, out_token, line)

367 else:

368 return

369

370

371class SqlLexer(RegexLexer):

372 """

373 Lexer for Structured Query Language. Currently, this lexer does

374 not recognize any special syntax except ANSI SQL.

375 """

376

377 name = 'SQL'

378 aliases = ['sql']

379 filenames = ['*.sql']

380 mimetypes = ['text/x-sql']

381

382 flags = re.IGNORECASE

383 tokens = {

384 'root': [

385 (r'\s+', Whitespace),

386 (r'--.*\n?', Comment.Single),

387 (r'/\*', Comment.Multiline, 'multiline-comments'),

388 (words((

389 'ABORT', 'ABS', 'ABSOLUTE', 'ACCESS', 'ADA', 'ADD', 'ADMIN', 'AFTER',

390 'AGGREGATE', 'ALIAS', 'ALL', 'ALLOCATE', 'ALTER', 'ANALYSE', 'ANALYZE',

391 'AND', 'ANY', 'ARE', 'AS', 'ASC', 'ASENSITIVE', 'ASSERTION', 'ASSIGNMENT',

392 'ASYMMETRIC', 'AT', 'ATOMIC', 'AUTHORIZATION', 'AVG', 'BACKWARD',

393 'BEFORE', 'BEGIN', 'BETWEEN', 'BITVAR', 'BIT_LENGTH', 'BOTH', 'BREADTH',

394 'BY', 'C', 'CACHE', 'CALL', 'CALLED', 'CARDINALITY', 'CASCADE',

395 'CASCADED', 'CASE', 'CAST', 'CATALOG', 'CATALOG_NAME', 'CHAIN',

396 'CHARACTERISTICS', 'CHARACTER_LENGTH', 'CHARACTER_SET_CATALOG',

397 'CHARACTER_SET_NAME', 'CHARACTER_SET_SCHEMA', 'CHAR_LENGTH', 'CHECK',

398 'CHECKED', 'CHECKPOINT', 'CLASS', 'CLASS_ORIGIN', 'CLOB', 'CLOSE',

399 'CLUSTER', 'COALESCE', 'COBOL', 'COLLATE', 'COLLATION',

400 'COLLATION_CATALOG', 'COLLATION_NAME', 'COLLATION_SCHEMA', 'COLUMN',

401 'COLUMN_NAME', 'COMMAND_FUNCTION', 'COMMAND_FUNCTION_CODE', 'COMMENT',

402 'COMMIT', 'COMMITTED', 'COMPLETION', 'CONDITION_NUMBER', 'CONNECT',

403 'CONNECTION', 'CONNECTION_NAME', 'CONSTRAINT', 'CONSTRAINTS',

404 'CONSTRAINT_CATALOG', 'CONSTRAINT_NAME', 'CONSTRAINT_SCHEMA',

405 'CONSTRUCTOR', 'CONTAINS', 'CONTINUE', 'CONVERSION', 'CONVERT',

406 'COPY', 'CORRESPONDING', 'COUNT', 'CREATE', 'CREATEDB', 'CREATEUSER',

407 'CROSS', 'CUBE', 'CURRENT', 'CURRENT_DATE', 'CURRENT_PATH',

408 'CURRENT_ROLE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER',

409 'CURSOR', 'CURSOR_NAME', 'CYCLE', 'DATA', 'DATABASE',

410 'DATETIME_INTERVAL_CODE', 'DATETIME_INTERVAL_PRECISION', 'DAY',

411 'DEALLOCATE', 'DECLARE', 'DEFAULT', 'DEFAULTS', 'DEFERRABLE',

412 'DEFERRED', 'DEFINED', 'DEFINER', 'DELETE', 'DELIMITER', 'DELIMITERS',

413 'DEREF', 'DESC', 'DESCRIBE', 'DESCRIPTOR', 'DESTROY', 'DESTRUCTOR',

414 'DETERMINISTIC', 'DIAGNOSTICS', 'DICTIONARY', 'DISCONNECT', 'DISPATCH',

415 'DISTINCT', 'DO', 'DOMAIN', 'DROP', 'DYNAMIC', 'DYNAMIC_FUNCTION',

416 'DYNAMIC_FUNCTION_CODE', 'EACH', 'ELSE', 'ELSIF', 'ENCODING',

417 'ENCRYPTED', 'END', 'END-EXEC', 'EQUALS', 'ESCAPE', 'EVERY', 'EXCEPTION',

418 'EXCEPT', 'EXCLUDING', 'EXCLUSIVE', 'EXEC', 'EXECUTE', 'EXISTING',

419 'EXISTS', 'EXPLAIN', 'EXTERNAL', 'EXTRACT', 'FALSE', 'FETCH', 'FINAL',

420 'FIRST', 'FOR', 'FORCE', 'FOREIGN', 'FORTRAN', 'FORWARD', 'FOUND', 'FREE',

421 'FREEZE', 'FROM', 'FULL', 'FUNCTION', 'G', 'GENERAL', 'GENERATED', 'GET',

422 'GLOBAL', 'GO', 'GOTO', 'GRANT', 'GRANTED', 'GROUP', 'GROUPING',

423 'HANDLER', 'HAVING', 'HIERARCHY', 'HOLD', 'HOST', 'IDENTITY', 'IF',

424 'IGNORE', 'ILIKE', 'IMMEDIATE', 'IMMEDIATELY', 'IMMUTABLE', 'IMPLEMENTATION', 'IMPLICIT',

425 'IN', 'INCLUDING', 'INCREMENT', 'INDEX', 'INDITCATOR', 'INFIX',

426 'INHERITS', 'INITIALIZE', 'INITIALLY', 'INNER', 'INOUT', 'INPUT',

427 'INSENSITIVE', 'INSERT', 'INSTANTIABLE', 'INSTEAD', 'INTERSECT', 'INTO',

428 'INVOKER', 'IS', 'ISNULL', 'ISOLATION', 'ITERATE', 'JOIN', 'KEY',

429 'KEY_MEMBER', 'KEY_TYPE', 'LANCOMPILER', 'LANGUAGE', 'LARGE', 'LAST',

430 'LATERAL', 'LEADING', 'LEFT', 'LENGTH', 'LESS', 'LEVEL', 'LIKE', 'LIMIT',

431 'LISTEN', 'LOAD', 'LOCAL', 'LOCALTIME', 'LOCALTIMESTAMP', 'LOCATION',

432 'LOCATOR', 'LOCK', 'LOWER', 'MAP', 'MATCH', 'MAX', 'MAXVALUE',

433 'MESSAGE_LENGTH', 'MESSAGE_OCTET_LENGTH', 'MESSAGE_TEXT', 'METHOD', 'MIN',

434 'MINUTE', 'MINVALUE', 'MOD', 'MODE', 'MODIFIES', 'MODIFY', 'MONTH',

435 'MORE', 'MOVE', 'MUMPS', 'NAMES', 'NATIONAL', 'NATURAL', 'NCHAR', 'NCLOB',

436 'NEW', 'NEXT', 'NO', 'NOCREATEDB', 'NOCREATEUSER', 'NONE', 'NOT',

437 'NOTHING', 'NOTIFY', 'NOTNULL', 'NULL', 'NULLABLE', 'NULLIF', 'OBJECT',

438 'OCTET_LENGTH', 'OF', 'OFF', 'OFFSET', 'OIDS', 'OLD', 'ON', 'ONLY',

439 'OPEN', 'OPERATION', 'OPERATOR', 'OPTION', 'OPTIONS', 'OR', 'ORDER',

440 'ORDINALITY', 'OUT', 'OUTER', 'OUTPUT', 'OVERLAPS', 'OVERLAY',

441 'OVERRIDING', 'OWNER', 'PAD', 'PARAMETER', 'PARAMETERS', 'PARAMETER_MODE',

442 'PARAMETER_NAME', 'PARAMETER_ORDINAL_POSITION',

443 'PARAMETER_SPECIFIC_CATALOG', 'PARAMETER_SPECIFIC_NAME',

444 'PARAMETER_SPECIFIC_SCHEMA', 'PARTIAL', 'PASCAL', 'PENDANT', 'PERIOD', 'PLACING',

445 'PLI', 'POSITION', 'POSTFIX', 'PRECEEDS', 'PRECISION', 'PREFIX', 'PREORDER',

446 'PREPARE', 'PRESERVE', 'PRIMARY', 'PRIOR', 'PRIVILEGES', 'PROCEDURAL',

447 'PROCEDURE', 'PUBLIC', 'READ', 'READS', 'RECHECK', 'RECURSIVE', 'REF',

448 'REFERENCES', 'REFERENCING', 'REINDEX', 'RELATIVE', 'RENAME',

449 'REPEATABLE', 'REPLACE', 'RESET', 'RESTART', 'RESTRICT', 'RESULT',

450 'RETURN', 'RETURNED_LENGTH', 'RETURNED_OCTET_LENGTH', 'RETURNED_SQLSTATE',

451 'RETURNS', 'REVOKE', 'RIGHT', 'ROLE', 'ROLLBACK', 'ROLLUP', 'ROUTINE',

452 'ROUTINE_CATALOG', 'ROUTINE_NAME', 'ROUTINE_SCHEMA', 'ROW', 'ROWS',

453 'ROW_COUNT', 'RULE', 'SAVE_POINT', 'SCALE', 'SCHEMA', 'SCHEMA_NAME',

454 'SCOPE', 'SCROLL', 'SEARCH', 'SECOND', 'SECURITY', 'SELECT', 'SELF',

455 'SENSITIVE', 'SERIALIZABLE', 'SERVER_NAME', 'SESSION', 'SESSION_USER',

456 'SET', 'SETOF', 'SETS', 'SHARE', 'SHOW', 'SIMILAR', 'SIMPLE', 'SIZE',

457 'SOME', 'SOURCE', 'SPACE', 'SPECIFIC', 'SPECIFICTYPE', 'SPECIFIC_NAME',

458 'SQL', 'SQLCODE', 'SQLERROR', 'SQLEXCEPTION', 'SQLSTATE', 'SQLWARNINIG',

459 'STABLE', 'START', 'STATE', 'STATEMENT', 'STATIC', 'STATISTICS', 'STDIN',

460 'STDOUT', 'STORAGE', 'STRICT', 'STRUCTURE', 'STYPE', 'SUBCLASS_ORIGIN',

461 'SUBLIST', 'SUBSTRING', 'SUCCEEDS', 'SUM', 'SYMMETRIC', 'SYSID', 'SYSTEM',

462 'SYSTEM_USER', 'TABLE', 'TABLE_NAME', ' TEMP', 'TEMPLATE', 'TEMPORARY',

463 'TERMINATE', 'THAN', 'THEN', 'TIME', 'TIMESTAMP', 'TIMEZONE_HOUR',

464 'TIMEZONE_MINUTE', 'TO', 'TOAST', 'TRAILING', 'TRANSACTION',

465 'TRANSACTIONS_COMMITTED', 'TRANSACTIONS_ROLLED_BACK', 'TRANSACTION_ACTIVE',

466 'TRANSFORM', 'TRANSFORMS', 'TRANSLATE', 'TRANSLATION', 'TREAT', 'TRIGGER',

467 'TRIGGER_CATALOG', 'TRIGGER_NAME', 'TRIGGER_SCHEMA', 'TRIM', 'TRUE',

468 'TRUNCATE', 'TRUSTED', 'TYPE', 'UNCOMMITTED', 'UNDER', 'UNENCRYPTED',

469 'UNION', 'UNIQUE', 'UNKNOWN', 'UNLISTEN', 'UNNAMED', 'UNNEST', 'UNTIL',

470 'UPDATE', 'UPPER', 'USAGE', 'USER', 'USER_DEFINED_TYPE_CATALOG',

471 'USER_DEFINED_TYPE_NAME', 'USER_DEFINED_TYPE_SCHEMA', 'USING', 'VACUUM',

472 'VALID', 'VALIDATOR', 'VALUES', 'VARIABLE', 'VERBOSE',

473 'VERSION', 'VERSIONS', 'VERSIONING', 'VIEW',

474 'VOLATILE', 'WHEN', 'WHENEVER', 'WHERE', 'WITH', 'WITHOUT', 'WORK',

475 'WRITE', 'YEAR', 'ZONE'), suffix=r'\b'),

476 Keyword),

477 (words((

478 'ARRAY', 'BIGINT', 'BINARY', 'BIT', 'BLOB', 'BOOLEAN', 'CHAR',

479 'CHARACTER', 'DATE', 'DEC', 'DECIMAL', 'FLOAT', 'INT', 'INTEGER',

480 'INTERVAL', 'NUMBER', 'NUMERIC', 'REAL', 'SERIAL', 'SMALLINT',

481 'VARCHAR', 'VARYING', 'INT8', 'SERIAL8', 'TEXT'), suffix=r'\b'),

482 Name.Builtin),

483 (r'[+*/<>=~!@#%^&|`?-]', Operator),

484 (r'[0-9]+', Number.Integer),

485 # TODO: Backslash escapes?

486 (r"'(''|[^'])*'", String.Single),

487 (r'"(""|[^"])*"', String.Symbol), # not a real string literal in ANSI SQL

488 (r'[a-z_][\w$]*', Name), # allow $s in strings for Oracle

489 (r'[;:()\[\],.]', Punctuation)

490 ],

491 'multiline-comments': [

492 (r'/\*', Comment.Multiline, 'multiline-comments'),

493 (r'\*/', Comment.Multiline, '#pop'),

494 (r'[^/*]+', Comment.Multiline),

495 (r'[/*]', Comment.Multiline)

496 ]

497 }

498

499 def analyse_text(self, text):

500 return

501

502

503class TransactSqlLexer(RegexLexer):

504 """

505 Transact-SQL (T-SQL) is Microsoft's and Sybase's proprietary extension to

506 SQL.

507

508 The list of keywords includes ODBC and keywords reserved for future use..

509 """

510

511 name = 'Transact-SQL'

512 aliases = ['tsql', 't-sql']

513 filenames = ['*.sql']

514 mimetypes = ['text/x-tsql']

515

516 flags = re.IGNORECASE

517

518 tokens = {

519 'root': [

520 (r'\s+', Whitespace),

521 (r'--.*?$\n?', Comment.Single),

522 (r'/\*', Comment.Multiline, 'multiline-comments'),

523 (words(_tsql_builtins.OPERATORS), Operator),

524 (words(_tsql_builtins.OPERATOR_WORDS, suffix=r'\b'), Operator.Word),

525 (words(_tsql_builtins.TYPES, suffix=r'\b'), Name.Class),

526 (words(_tsql_builtins.FUNCTIONS, suffix=r'\b'), Name.Function),

527 (r'(goto)(\s+)(\w+\b)', bygroups(Keyword, Whitespace, Name.Label)),

528 (words(_tsql_builtins.KEYWORDS, suffix=r'\b'), Keyword),

529 (r'(\[)([^]]+)(\])', bygroups(Operator, Name, Operator)),

530 (r'0x[0-9a-f]+', Number.Hex),

531 # Float variant 1, for example: 1., 1.e2, 1.2e3

532 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float),

533 # Float variant 2, for example: .1, .1e2

534 (r'\.[0-9]+(e[+-]?[0-9]+)?', Number.Float),

535 # Float variant 3, for example: 123e45

536 (r'[0-9]+e[+-]?[0-9]+', Number.Float),

537 (r'[0-9]+', Number.Integer),

538 (r"'(''|[^'])*'", String.Single),

539 (r'"(""|[^"])*"', String.Symbol),

540 (r'[;(),.]', Punctuation),

541 # Below we use \w even for the first "real" character because

542 # tokens starting with a digit have already been recognized

543 # as Number above.

544 (r'@@\w+', Name.Builtin),

545 (r'@\w+', Name.Variable),

546 (r'(\w+)(:)', bygroups(Name.Label, Punctuation)),

547 (r'#?#?\w+', Name), # names for temp tables and anything else

548 (r'\?', Name.Variable.Magic), # parameter for prepared statements

549 ],

550 'multiline-comments': [

551 (r'/\*', Comment.Multiline, 'multiline-comments'),

552 (r'\*/', Comment.Multiline, '#pop'),

553 (r'[^/*]+', Comment.Multiline),

554 (r'[/*]', Comment.Multiline)

555 ]

556 }

557

558 def analyse_text(text):

559 rating = 0

560 if tsql_declare_re.search(text):

561 # Found T-SQL variable declaration.

562 rating = 1.0

563 else:

564 name_between_backtick_count = len(

565 name_between_backtick_re.findall(text))

566 name_between_bracket_count = len(

567 name_between_bracket_re.findall(text))

568 # We need to check if there are any names using

569 # backticks or brackets, as otherwise both are 0

570 # and 0 >= 2 * 0, so we would always assume it's true

571 dialect_name_count = name_between_backtick_count + name_between_bracket_count

572 if dialect_name_count >= 1 and \

573 name_between_bracket_count >= 2 * name_between_backtick_count:

574 # Found at least twice as many [name] as `name`.

575 rating += 0.5

576 elif name_between_bracket_count > name_between_backtick_count:

577 rating += 0.2

578 elif name_between_bracket_count > 0:

579 rating += 0.1

580 if tsql_variable_re.search(text) is not None:

581 rating += 0.1

582 if tsql_go_re.search(text) is not None:

583 rating += 0.1

584 return rating

585

586

587class MySqlLexer(RegexLexer):

588 """The Oracle MySQL lexer.

589

590 This lexer does not attempt to maintain strict compatibility with

591 MariaDB syntax or keywords. Although MySQL and MariaDB's common code

592 history suggests there may be significant overlap between the two,

593 compatibility between the two is not a target for this lexer.

594 """

595

596 name = 'MySQL'

597 aliases = ['mysql']

598 mimetypes = ['text/x-mysql']

599

600 flags = re.IGNORECASE

601 tokens = {

602 'root': [

603 (r'\s+', Whitespace),

604

605 # Comments

606 (r'(?:#|--\s+).*', Comment.Single),

607 (r'/\*\+', Comment.Special, 'optimizer-hints'),

608 (r'/\*', Comment.Multiline, 'multiline-comment'),

609

610 # Hexadecimal literals

611 (r"x'([0-9a-f]{2})+'", Number.Hex), # MySQL requires paired hex characters in this form.

612 (r'0x[0-9a-f]+', Number.Hex),

613

614 # Binary literals

615 (r"b'[01]+'", Number.Bin),

616 (r'0b[01]+', Number.Bin),

617

618 # Numeric literals

619 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent

620 (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent

621 (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats

622 (r'[0-9]+(?=[^0-9a-z$_\u0080-\uffff])', Number.Integer), # Integers that are not in a schema object name

623

624 # Date literals

625 (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}",

626 Literal.Date),

627

628 # Time literals

629 (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}",

630 Literal.Date),

631

632 # Timestamp literals

633 (

634 r"\{\s*ts\s*(?P<quote>['\"])\s*"

635 r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}" # Date part

636 r"\s+" # Whitespace between date and time

637 r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?" # Time part

638 r"\s*(?P=quote)\s*\}",

639 Literal.Date

640 ),

641

642 # String literals

643 (r"'", String.Single, 'single-quoted-string'),

644 (r'"', String.Double, 'double-quoted-string'),

645

646 # Variables

647 (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable),

648 (r'@[a-z0-9_$.]+', Name.Variable),

649 (r"@'", Name.Variable, 'single-quoted-variable'),

650 (r'@"', Name.Variable, 'double-quoted-variable'),

651 (r"@`", Name.Variable, 'backtick-quoted-variable'),

652 (r'\?', Name.Variable), # For demonstrating prepared statements

653

654 # Operators

655 (r'[!%&*+/:<=>^|~-]+', Operator),

656

657 # Exceptions; these words tokenize differently in different contexts.

658 (r'\b(set)(?!\s*\()', Keyword),

659 (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Whitespace, Keyword)),

660 # In all other known cases, "SET" is tokenized by MYSQL_DATATYPES.

661

662 (words(MYSQL_CONSTANTS, prefix=r'\b', suffix=r'\b'), Name.Constant),

663 (words(MYSQL_DATATYPES, prefix=r'\b', suffix=r'\b'), Keyword.Type),

664 (words(MYSQL_KEYWORDS, prefix=r'\b', suffix=r'\b'), Keyword),

665 (words(MYSQL_FUNCTIONS, prefix=r'\b', suffix=r'\b(\s*)(\()'),

666 bygroups(Name.Function, Whitespace, Punctuation)),

667

668 # Schema object names

669 #

670 # Note: Although the first regex supports unquoted all-numeric

671 # identifiers, this will not be a problem in practice because

672 # numeric literals have already been handled above.

673 #

674 ('[0-9a-z$_\u0080-\uffff]+', Name),

675 (r'`', Name.Quoted, 'schema-object-name'),

676

677 # Punctuation

678 (r'[(),.;]', Punctuation),

679 ],

680

681 # Multiline comment substates

682 # ---------------------------

683

684 'optimizer-hints': [

685 (r'[^*a-z]+', Comment.Special),

686 (r'\*/', Comment.Special, '#pop'),

687 (words(MYSQL_OPTIMIZER_HINTS, suffix=r'\b'), Comment.Preproc),

688 ('[a-z]+', Comment.Special),

689 (r'\*', Comment.Special),

690 ],

691

692 'multiline-comment': [

693 (r'[^*]+', Comment.Multiline),

694 (r'\*/', Comment.Multiline, '#pop'),

695 (r'\*', Comment.Multiline),

696 ],

697

698 # String substates

699 # ----------------

700

701 'single-quoted-string': [

702 (r"[^'\\]+", String.Single),

703 (r"''", String.Escape),

704 (r"""\\[0'"bnrtZ\\%_]""", String.Escape),

705 (r"'", String.Single, '#pop'),

706 ],

707

708 'double-quoted-string': [

709 (r'[^"\\]+', String.Double),

710 (r'""', String.Escape),

711 (r"""\\[0'"bnrtZ\\%_]""", String.Escape),

712 (r'"', String.Double, '#pop'),

713 ],

714

715 # Variable substates

716 # ------------------

717

718 'single-quoted-variable': [

719 (r"[^']+", Name.Variable),

720 (r"''", Name.Variable),

721 (r"'", Name.Variable, '#pop'),

722 ],

723

724 'double-quoted-variable': [

725 (r'[^"]+', Name.Variable),

726 (r'""', Name.Variable),

727 (r'"', Name.Variable, '#pop'),

728 ],

729

730 'backtick-quoted-variable': [

731 (r'[^`]+', Name.Variable),

732 (r'``', Name.Variable),

733 (r'`', Name.Variable, '#pop'),

734 ],

735

736 # Schema object name substates

737 # ----------------------------

738 #

739 # "Name.Quoted" and "Name.Quoted.Escape" are non-standard but

740 # formatters will style them as "Name" by default but add

741 # additional styles based on the token name. This gives users

742 # flexibility to add custom styles as desired.

743 #

744 'schema-object-name': [

745 (r'[^`]+', Name.Quoted),

746 (r'``', Name.Quoted.Escape),

747 (r'`', Name.Quoted, '#pop'),

748 ],

749 }

750

751 def analyse_text(text):

752 rating = 0

753 name_between_backtick_count = len(

754 name_between_backtick_re.findall(text))

755 name_between_bracket_count = len(

756 name_between_bracket_re.findall(text))

757 # Same logic as above in the TSQL analysis

758 dialect_name_count = name_between_backtick_count + name_between_bracket_count

759 if dialect_name_count >= 1 and \

760 name_between_backtick_count >= 2 * name_between_bracket_count:

761 # Found at least twice as many `name` as [name].

762 rating += 0.5

763 elif name_between_backtick_count > name_between_bracket_count:

764 rating += 0.2

765 elif name_between_backtick_count > 0:

766 rating += 0.1

767 return rating

768

769

770class SqliteConsoleLexer(Lexer):

771 """

772 Lexer for example sessions using sqlite3.

773

774 .. versionadded:: 0.11

775 """

776

777 name = 'sqlite3con'

778 aliases = ['sqlite3']

779 filenames = ['*.sqlite3-console']

780 mimetypes = ['text/x-sqlite3-console']

781

782 def get_tokens_unprocessed(self, data):

783 sql = SqlLexer(**self.options)

784

785 curcode = ''

786 insertions = []

787 for match in line_re.finditer(data):

788 line = match.group()

789 prompt_match = sqlite_prompt_re.match(line)

790 if prompt_match is not None:

791 insertions.append((len(curcode),

792 [(0, Generic.Prompt, line[:7])]))

793 insertions.append((len(curcode),

794 [(7, Whitespace, ' ')]))

795 curcode += line[8:]

796 else:

797 if curcode:

798 yield from do_insertions(insertions,

799 sql.get_tokens_unprocessed(curcode))

800 curcode = ''

801 insertions = []

802 if line.startswith('SQL error: '):

803 yield (match.start(), Generic.Traceback, line)

804 else:

805 yield (match.start(), Generic.Output, line)

806 if curcode:

807 yield from do_insertions(insertions,

808 sql.get_tokens_unprocessed(curcode))

809

810

811class RqlLexer(RegexLexer):

812 """

813 Lexer for Relation Query Language.

814

815 .. versionadded:: 2.0

816 """

817 name = 'RQL'

818 url = 'http://www.logilab.org/project/rql'

819 aliases = ['rql']

820 filenames = ['*.rql']

821 mimetypes = ['text/x-rql']

822

823 flags = re.IGNORECASE

824 tokens = {

825 'root': [

826 (r'\s+', Whitespace),

830 (r'[+*/<>=%-]', Operator),

831 (r'(Any|is|instance_of|CWEType|CWRelation)\b', Name.Builtin),

832 (r'[0-9]+', Number.Integer),

833 (r'[A-Z_]\w*\??', Name),

834 (r"'(''|[^'])*'", String.Single),

835 (r'"(""|[^"])*"', String.Single),

836 (r'[;:()\[\],.]', Punctuation)

837 ],

838 }