Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/sql.py: 62%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

269 statements  

1""" 

2 pygments.lexers.sql 

3 ~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for various SQL dialects and related interactive sessions. 

6 

7 Postgres specific lexers: 

8 

9 `PostgresLexer` 

10 A SQL lexer for the PostgreSQL dialect. Differences w.r.t. the SQL 

11 lexer are: 

12 

13 - keywords and data types list parsed from the PG docs (run the 

14 `_postgres_builtins` module to update them); 

15 - Content of $-strings parsed using a specific lexer, e.g. the content 

16 of a PL/Python function is parsed using the Python lexer; 

17 - parse PG specific constructs: E-strings, $-strings, U&-strings, 

18 different operators and punctuation. 

19 

20 `PlPgsqlLexer` 

21 A lexer for the PL/pgSQL language. Adds a few specific construct on 

22 top of the PG SQL lexer (such as <<label>>). 

23 

24 `PostgresConsoleLexer` 

25 A lexer to highlight an interactive psql session: 

26 

27 - identifies the prompt and does its best to detect the end of command 

28 in multiline statement where not all the lines are prefixed by a 

29 prompt, telling them apart from the output; 

30 - highlights errors in the output and notification levels; 

31 - handles psql backslash commands. 

32 

33 `PostgresExplainLexer` 

34 A lexer to highlight Postgres execution plan. 

35 

36 The ``tests/examplefiles`` contains a few test files with data to be 

37 parsed by these lexers. 

38 

39 :copyright: Copyright 2006-present by the Pygments team, see AUTHORS. 

40 :license: BSD, see LICENSE for details. 

41""" 

42 

43import collections 

44import re 

45 

46from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words 

47from pygments.lexers import _googlesql_builtins 

48from pygments.lexers import _mysql_builtins 

49from pygments.lexers import _postgres_builtins 

50from pygments.lexers import _sql_builtins 

51from pygments.lexers import _tsql_builtins 

52from pygments.lexers import get_lexer_by_name, ClassNotFound 

53from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \ 

54 Keyword, Name, String, Number, Generic, Literal 

55 

56 

57__all__ = ['GoogleSqlLexer', 'PostgresLexer', 'PlPgsqlLexer', 

58 'PostgresConsoleLexer', 'PostgresExplainLexer', 'SqlLexer', 

59 'TransactSqlLexer', 'MySqlLexer', 'SqliteConsoleLexer', 'RqlLexer'] 

60 

61line_re = re.compile('.*?\n') 

62sqlite_prompt_re = re.compile(r'^(?:sqlite| ...)>(?= )') 

63 

64language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE) 

65 

66do_re = re.compile(r'\bDO\b', re.IGNORECASE) 

67 

68# Regular expressions for analyse_text() 

69name_between_bracket_re = re.compile(r'\[[a-zA-Z_]\w*\]') 

70name_between_backtick_re = re.compile(r'`[a-zA-Z_]\w*`') 

71tsql_go_re = re.compile(r'\bgo\b', re.IGNORECASE) 

72tsql_declare_re = re.compile(r'\bdeclare\s+@', re.IGNORECASE) 

73tsql_variable_re = re.compile(r'@[a-zA-Z_]\w*\b') 

74 

75# Identifiers for analyse_text() 

76googlesql_identifiers = ( 

77 _googlesql_builtins.functionnames 

78 + _googlesql_builtins.keywords 

79 + _googlesql_builtins.types) 

80 

81 

82def language_callback(lexer, match): 

83 """Parse the content of a $-string using a lexer 

84 

85 The lexer is chosen looking for a nearby LANGUAGE or assumed as 

86 plpgsql if inside a DO statement and no LANGUAGE has been found. 

87 """ 

88 lx = None 

89 m = language_re.match(lexer.text[match.end():match.end()+100]) 

90 if m is not None: 

91 lx = lexer._get_lexer(m.group(1)) 

92 else: 

93 m = list(language_re.finditer( 

94 lexer.text[max(0, match.start()-100):match.start()])) 

95 if m: 

96 lx = lexer._get_lexer(m[-1].group(1)) 

97 else: 

98 m = list(do_re.finditer( 

99 lexer.text[max(0, match.start()-25):match.start()])) 

100 if m: 

101 lx = lexer._get_lexer('plpgsql') 

102 

103 # 1 = $, 2 = delimiter, 3 = $ 

104 yield (match.start(1), String, match.group(1)) 

105 yield (match.start(2), String.Delimiter, match.group(2)) 

106 yield (match.start(3), String, match.group(3)) 

107 # 4 = string contents 

108 if lx: 

109 yield from lx.get_tokens_unprocessed(match.group(4)) 

110 else: 

111 yield (match.start(4), String, match.group(4)) 

112 # 5 = $, 6 = delimiter, 7 = $ 

113 yield (match.start(5), String, match.group(5)) 

114 yield (match.start(6), String.Delimiter, match.group(6)) 

115 yield (match.start(7), String, match.group(7)) 

116 

117 

118class PostgresBase: 

119 """Base class for Postgres-related lexers. 

120 

121 This is implemented as a mixin to avoid the Lexer metaclass kicking in. 

122 this way the different lexer don't have a common Lexer ancestor. If they 

123 had, _tokens could be created on this ancestor and not updated for the 

124 other classes, resulting e.g. in PL/pgSQL parsed as SQL. This shortcoming 

125 seem to suggest that regexp lexers are not really subclassable. 

126 """ 

127 def get_tokens_unprocessed(self, text, *args): 

128 # Have a copy of the entire text to be used by `language_callback`. 

129 self.text = text 

130 yield from super().get_tokens_unprocessed(text, *args) 

131 

132 def _get_lexer(self, lang): 

133 if lang.lower() == 'sql': 

134 return get_lexer_by_name('postgresql', **self.options) 

135 

136 tries = [lang] 

137 if lang.startswith('pl'): 

138 tries.append(lang[2:]) 

139 if lang.endswith('u'): 

140 tries.append(lang[:-1]) 

141 if lang.startswith('pl') and lang.endswith('u'): 

142 tries.append(lang[2:-1]) 

143 

144 for lx in tries: 

145 try: 

146 return get_lexer_by_name(lx, **self.options) 

147 except ClassNotFound: 

148 pass 

149 else: 

150 # TODO: better logging 

151 # print >>sys.stderr, "language not found:", lang 

152 return None 

153 

154 

155class PostgresLexer(PostgresBase, RegexLexer): 

156 """ 

157 Lexer for the PostgreSQL dialect of SQL. 

158 """ 

159 

160 name = 'PostgreSQL SQL dialect' 

161 aliases = ['postgresql', 'postgres'] 

162 mimetypes = ['text/x-postgresql'] 

163 url = 'https://www.postgresql.org' 

164 version_added = '1.5' 

165 

166 flags = re.IGNORECASE 

167 tokens = { 

168 'root': [ 

169 (r'\s+', Whitespace), 

170 (r'--.*\n?', Comment.Single), 

171 (r'/\*', Comment.Multiline, 'multiline-comments'), 

172 (r'(' + '|'.join(s.replace(" ", r"\s+") 

173 for s in _postgres_builtins.DATATYPES + 

174 _postgres_builtins.PSEUDO_TYPES) + r')\b', 

175 Name.Builtin), 

176 (words(_postgres_builtins.KEYWORDS, suffix=r'\b'), Keyword), 

177 (r'[+*/<>=~!@#%^&|`?-]+', Operator), 

178 (r'::', Operator), # cast 

179 (r'\$\d+', Name.Variable), 

180 (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float), 

181 (r'[0-9]+', Number.Integer), 

182 (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'), 

183 # quoted identifier 

184 (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'), 

185 (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback), 

186 (r'[a-z_]\w*', Name), 

187 

188 # psql variable in SQL 

189 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable), 

190 

191 (r'[;:()\[\]{},.]', Punctuation), 

192 ], 

193 'multiline-comments': [ 

194 (r'/\*', Comment.Multiline, 'multiline-comments'), 

195 (r'\*/', Comment.Multiline, '#pop'), 

196 (r'[^/*]+', Comment.Multiline), 

197 (r'[/*]', Comment.Multiline) 

198 ], 

199 'string': [ 

200 (r"[^']+", String.Single), 

201 (r"''", String.Single), 

202 (r"'", String.Single, '#pop'), 

203 ], 

204 'quoted-ident': [ 

205 (r'[^"]+', String.Name), 

206 (r'""', String.Name), 

207 (r'"', String.Name, '#pop'), 

208 ], 

209 } 

210 

211 

212class PlPgsqlLexer(PostgresBase, RegexLexer): 

213 """ 

214 Handle the extra syntax in Pl/pgSQL language. 

215 """ 

216 name = 'PL/pgSQL' 

217 aliases = ['plpgsql'] 

218 mimetypes = ['text/x-plpgsql'] 

219 url = 'https://www.postgresql.org/docs/current/plpgsql.html' 

220 version_added = '1.5' 

221 

222 flags = re.IGNORECASE 

223 # FIXME: use inheritance 

224 tokens = {name: state[:] for (name, state) in PostgresLexer.tokens.items()} 

225 

226 # extend the keywords list 

227 for i, pattern in enumerate(tokens['root']): 

228 if pattern[1] == Keyword: 

229 tokens['root'][i] = ( 

230 words(_postgres_builtins.KEYWORDS + 

231 _postgres_builtins.PLPGSQL_KEYWORDS, suffix=r'\b'), 

232 Keyword) 

233 del i 

234 break 

235 else: 

236 assert 0, "SQL keywords not found" 

237 

238 # Add specific PL/pgSQL rules (before the SQL ones) 

239 tokens['root'][:0] = [ 

240 (r'\%[a-z]\w*\b', Name.Builtin), # actually, a datatype 

241 (r':=', Operator), 

242 (r'\<\<[a-z]\w*\>\>', Name.Label), 

243 (r'\#[a-z]\w*\b', Keyword.Pseudo), # #variable_conflict 

244 ] 

245 

246 

247class PsqlRegexLexer(PostgresBase, RegexLexer): 

248 """ 

249 Extend the PostgresLexer adding support specific for psql commands. 

250 

251 This is not a complete psql lexer yet as it lacks prompt support 

252 and output rendering. 

253 """ 

254 

255 name = 'PostgreSQL console - regexp based lexer' 

256 aliases = [] # not public 

257 

258 flags = re.IGNORECASE 

259 tokens = {name: state[:] for (name, state) in PostgresLexer.tokens.items()} 

260 

261 tokens['root'].append( 

262 (r'\\[^\s]+', Keyword.Pseudo, 'psql-command')) 

263 tokens['psql-command'] = [ 

264 (r'\n', Text, 'root'), 

265 (r'\s+', Whitespace), 

266 (r'\\[^\s]+', Keyword.Pseudo), 

267 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable), 

268 (r"'(''|[^'])*'", String.Single), 

269 (r"`([^`])*`", String.Backtick), 

270 (r"[^\s]+", String.Symbol), 

271 ] 

272 

273 

274re_prompt = re.compile(r'^(\S.*?)??[=\-\(\$\'\"][#>]') 

275re_psql_command = re.compile(r'\s*\\') 

276re_end_command = re.compile(r';\s*(--.*?)?$') 

277re_psql_command = re.compile(r'(\s*)(\\.+?)(\s+)$') 

278re_error = re.compile(r'(ERROR|FATAL):') 

279re_message = re.compile( 

280 r'((?:DEBUG|INFO|NOTICE|WARNING|ERROR|' 

281 r'FATAL|HINT|DETAIL|CONTEXT|LINE [0-9]+):)(.*?\n)') 

282 

283 

284class lookahead: 

285 """Wrap an iterator and allow pushing back an item.""" 

286 def __init__(self, x): 

287 self.iter = iter(x) 

288 self._nextitem = None 

289 

290 def __iter__(self): 

291 return self 

292 

293 def send(self, i): 

294 self._nextitem = i 

295 return i 

296 

297 def __next__(self): 

298 if self._nextitem is not None: 

299 ni = self._nextitem 

300 self._nextitem = None 

301 return ni 

302 return next(self.iter) 

303 next = __next__ 

304 

305 

306class PostgresConsoleLexer(Lexer): 

307 """ 

308 Lexer for psql sessions. 

309 """ 

310 

311 name = 'PostgreSQL console (psql)' 

312 aliases = ['psql', 'postgresql-console', 'postgres-console'] 

313 mimetypes = ['text/x-postgresql-psql'] 

314 url = 'https://www.postgresql.org' 

315 version_added = '1.5' 

316 _example = "psql/psql_session.txt" 

317 

318 def get_tokens_unprocessed(self, data): 

319 sql = PsqlRegexLexer(**self.options) 

320 

321 lines = lookahead(line_re.findall(data)) 

322 

323 # prompt-output cycle 

324 while 1: 

325 

326 # consume the lines of the command: start with an optional prompt 

327 # and continue until the end of command is detected 

328 curcode = '' 

329 insertions = [] 

330 for line in lines: 

331 # Identify a shell prompt in case of psql commandline example 

332 if line.startswith('$') and not curcode: 

333 lexer = get_lexer_by_name('console', **self.options) 

334 yield from lexer.get_tokens_unprocessed(line) 

335 break 

336 

337 # Identify a psql prompt 

338 mprompt = re_prompt.match(line) 

339 if mprompt is not None: 

340 insertions.append((len(curcode), 

341 [(0, Generic.Prompt, mprompt.group())])) 

342 curcode += line[len(mprompt.group()):] 

343 else: 

344 curcode += line 

345 

346 # Check if this is the end of the command 

347 # TODO: better handle multiline comments at the end with 

348 # a lexer with an external state? 

349 if re_psql_command.match(curcode) \ 

350 or re_end_command.search(curcode): 

351 break 

352 

353 # Emit the combined stream of command and prompt(s) 

354 yield from do_insertions(insertions, 

355 sql.get_tokens_unprocessed(curcode)) 

356 

357 # Emit the output lines 

358 out_token = Generic.Output 

359 for line in lines: 

360 mprompt = re_prompt.match(line) 

361 if mprompt is not None: 

362 # push the line back to have it processed by the prompt 

363 lines.send(line) 

364 break 

365 

366 mmsg = re_message.match(line) 

367 if mmsg is not None: 

368 if mmsg.group(1).startswith("ERROR") \ 

369 or mmsg.group(1).startswith("FATAL"): 

370 out_token = Generic.Error 

371 yield (mmsg.start(1), Generic.Strong, mmsg.group(1)) 

372 yield (mmsg.start(2), out_token, mmsg.group(2)) 

373 else: 

374 yield (0, out_token, line) 

375 else: 

376 return 

377 

378 

379class PostgresExplainLexer(RegexLexer): 

380 """ 

381 Handle PostgreSQL EXPLAIN output 

382 """ 

383 

384 name = 'PostgreSQL EXPLAIN dialect' 

385 aliases = ['postgres-explain'] 

386 filenames = ['*.explain'] 

387 mimetypes = ['text/x-postgresql-explain'] 

388 url = 'https://www.postgresql.org/docs/current/using-explain.html' 

389 version_added = '2.15' 

390 

391 tokens = { 

392 'root': [ 

393 (r'(:|\(|\)|ms|kB|->|\.\.|\,|\/|=|%|text)', Punctuation), 

394 (r'(\s+)', Whitespace), 

395 

396 # This match estimated cost and effectively measured counters with ANALYZE 

397 # Then, we move to instrumentation state 

398 (r'(cost)(=?)', bygroups(Name.Class, Punctuation), 'instrumentation'), 

399 (r'(actual)( )(=?)', bygroups(Name.Class, Whitespace, Punctuation), 'instrumentation'), 

400 

401 # Misc keywords 

402 (words(('actual', 'Memory Usage', 'Disk Usage', 'Memory', 'Buckets', 

403 'Batches', 'originally', 'row', 'rows', 'Hits', 'Misses', 

404 'Evictions', 'Overflows', 'Planned Partitions', 'Estimates', 

405 'capacity', 'distinct keys', 'lookups', 'hit percent', 

406 'Index Searches', 'Storage', 'Disk Maximum Storage'), suffix=r'\b'), 

407 Comment.Single), 

408 

409 (r'(hit|read|dirtied|written|write|time|calls|records|bytes|allocated|used|output|format)(=)', bygroups(Comment.Single, Operator)), 

410 (r'(shared|temp|local)', Keyword.Pseudo), 

411 

412 # We move to sort state in order to emphasize specific keywords (especially disk access) 

413 (r'(Sort Method)(: )', bygroups(Comment.Preproc, Punctuation), 'sort'), 

414 

415 # These keywords can be followed by an object, like a table 

416 (r'(Sort Key|Group Key|Presorted Key|Hash Key)(:)( )', 

417 bygroups(Comment.Preproc, Punctuation, Whitespace), 'object_name'), 

418 (r'(Cache Key|Cache Mode)(:)( )', bygroups(Comment, Punctuation, Whitespace), 'object_name'), 

419 

420 # These keywords can be followed by a predicate 

421 (words(('Join Filter', 'Subplans Removed', 'Filter', 'Merge Cond', 

422 'Hash Cond', 'Index Cond', 'Recheck Cond', 'Heap Blocks', 

423 'TID Cond', 'Run Condition', 'Order By', 'Function Call', 

424 'Table Function Call', 'Inner Unique', 'Params Evaluated', 

425 'Single Copy', 'Sampling', 'One-Time Filter', 'Output', 

426 'Relations', 'Remote SQL', 'Disabled'), suffix=r'\b'), 

427 Comment.Preproc, 'predicate'), 

428 

429 # Special keyword to handle ON CONFLICT 

430 (r'Conflict(ing)? ', Comment.Preproc, 'conflict'), 

431 (r'(Tuples Inserted: )', Comment.Preproc, 'predicate'), 

432 

433 # Special keyword for InitPlan or SubPlan 

434 (r'(InitPlan|SubPlan)( )(\d+)( )', 

435 bygroups(Keyword, Whitespace, Number.Integer, Whitespace), 

436 'init_plan'), 

437 

438 (words(('Sort Method', 'Join Filter', 'Planning time', 

439 'Planning Time', 'Execution time', 'Execution Time', 

440 'Workers Planned', 'Workers Launched', 'Buffers', 

441 'Planning', 'Worker', 'Query Identifier', 'Time', 

442 'Full-sort Groups', 'Pre-sorted Groups', 'Serialization'), suffix=r'\b'), Comment.Preproc), 

443 

444 # Emphasize these keywords 

445 

446 (words(('Rows Removed by Join Filter', 'Rows Removed by Filter', 

447 'Rows Removed by Index Recheck', 

448 'Heap Fetches', 'never executed'), 

449 suffix=r'\b'), Name.Exception), 

450 (r'(I/O Timings)(:)( )', bygroups(Name.Exception, Punctuation, Whitespace)), 

451 (r'(WAL)(:)( )', bygroups(Name.Exception, Punctuation, Whitespace)), 

452 

453 (words(_postgres_builtins.EXPLAIN_KEYWORDS, suffix=r'\b'), Keyword), 

454 

455 # join keywords 

456 (r'((Right|Left|Full|Semi|Anti) Join)', Keyword.Type), 

457 (r'(Parallel |Async |Finalize |Partial )', Comment.Preproc), 

458 (r'Backward', Comment.Preproc), 

459 (r'(Intersect|Except|Hash)', Comment.Preproc), 

460 

461 (r'(CTE)( )(\w*)?', bygroups(Comment, Whitespace, Name.Variable)), 

462 

463 

464 # Treat "on" and "using" as a punctuation 

465 (r'(on|using)', Punctuation, 'object_name'), 

466 

467 

468 # strings 

469 (r"'(''|[^'])*'", String.Single), 

470 # numbers 

471 (r'-?\d+\.\d+', Number.Float), 

472 (r'(-?\d+)', Number.Integer), 

473 

474 # boolean 

475 (r'(true|false)', Name.Constant), 

476 # explain header 

477 (r'\s*QUERY PLAN\s*\n\s*-+', Comment.Single), 

478 # Settings 

479 (r'(Settings)(:)( )', bygroups(Comment.Preproc, Punctuation, Whitespace), 'setting'), 

480 

481 # Handle JIT counters 

482 (r'(JIT|Functions|Options|Timing)(:)', bygroups(Comment.Preproc, Punctuation)), 

483 (r'(Inlining|Optimization|Expressions|Deforming|Generation|Emission|Total)', Keyword.Pseudo), 

484 

485 # Handle Triggers counters 

486 (r'(Trigger)( )(\S*)(:)( )', 

487 bygroups(Comment.Preproc, Whitespace, Name.Variable, Punctuation, Whitespace)), 

488 

489 ], 

490 'expression': [ 

491 # matches any kind of parenthesized expression 

492 # the first opening paren is matched by the 'caller' 

493 (r'\(', Punctuation, '#push'), 

494 (r'\)', Punctuation, '#pop'), 

495 (r'(never executed)', Name.Exception), 

496 (r'[^)(]+', Comment), 

497 ], 

498 'object_name': [ 

499 

500 # This is a cost or analyze measure 

501 (r'(\(cost)(=?)', bygroups(Name.Class, Punctuation), 'instrumentation'), 

502 (r'(\(actual)( )(=?)', bygroups(Name.Class, Whitespace, Punctuation), 'instrumentation'), 

503 

504 # if object_name is parenthesized, mark opening paren as 

505 # punctuation, call 'expression', and exit state 

506 (r'\(', Punctuation, 'expression'), 

507 (r'(on)', Punctuation), 

508 # matches possibly schema-qualified table and column names 

509 (r'\w+(\.\w+)*( USING \S+| \w+ USING \S+)', Name.Variable), 

510 (r'\"?\w+\"?(?:\.\"?\w+\"?)?', Name.Variable), 

511 (r'\'\S*\'', Name.Variable), 

512 

513 # if we encounter a comma, another object is listed 

514 (r',\n', Punctuation, 'object_name'), 

515 (r',', Punctuation, 'object_name'), 

516 

517 # special case: "*SELECT*" 

518 (r'"\*SELECT\*( \d+)?"(.\w+)?', Name.Variable), 

519 (r'"\*VALUES\*(_\d+)?"(.\w+)?', Name.Variable), 

520 (r'"ANY_subquery"', Name.Variable), 

521 

522 # Variable $1 ... 

523 (r'\$\d+', Name.Variable), 

524 # cast 

525 (r'::\w+', Name.Variable), 

526 (r' +', Whitespace), 

527 (r'"', Punctuation), 

528 (r'\[\.\.\.\]', Punctuation), 

529 (r'\)', Punctuation, '#pop'), 

530 ], 

531 'predicate': [ 

532 # if predicate is parenthesized, mark paren as punctuation 

533 (r'(\()([^\n]*)(\))', bygroups(Punctuation, Name.Variable, Punctuation), '#pop'), 

534 # otherwise color until newline 

535 (r'[^\n]*', Name.Variable, '#pop'), 

536 ], 

537 'instrumentation': [ 

538 (r'=|\.\.', Punctuation), 

539 (r' +', Whitespace), 

540 (r'(rows|width|time|loops)', Name.Class), 

541 (r'\d+\.\d+', Number.Float), 

542 (r'(\d+)', Number.Integer), 

543 (r'\)', Punctuation, '#pop'), 

544 ], 

545 'conflict': [ 

546 (r'(Resolution: )(\w+)', bygroups(Comment.Preproc, Name.Variable)), 

547 (r'(Arbiter \w+:)', Comment.Preproc, 'object_name'), 

548 (r'(Filter: )', Comment.Preproc, 'predicate'), 

549 (r'(Tuples: )', Comment.Preproc, 'predicate'), 

550 ], 

551 'setting': [ 

552 (r'([a-z_]*?)(\s*)(=)(\s*)(\'.*?\')', bygroups(Name.Attribute, Whitespace, Operator, Whitespace, String)), 

553 (r'\, ', Punctuation), 

554 ], 

555 'init_plan': [ 

556 (r'\(', Punctuation), 

557 (r'returns \$\d+(,\$\d+)?', Name.Variable), 

558 (r'\)', Punctuation, '#pop'), 

559 ], 

560 'sort': [ 

561 (r':|kB', Punctuation), 

562 (r'(quicksort|top-N|heapsort|Average|Memory|Peak)', Comment.Prepoc), 

563 (r'(external|merge|Disk|sort)', Name.Exception), 

564 (r'(\d+)', Number.Integer), 

565 (r' +', Whitespace), 

566 ], 

567 } 

568 

569 

570class SqlLexer(RegexLexer): 

571 """ 

572 Lexer for Structured Query Language. Currently, this lexer does 

573 not recognize any special syntax except ANSI SQL. 

574 """ 

575 

576 name = 'SQL' 

577 aliases = ['sql'] 

578 filenames = ['*.sql'] 

579 mimetypes = ['text/x-sql'] 

580 url = 'https://en.wikipedia.org/wiki/SQL' 

581 version_added = '' 

582 

583 flags = re.IGNORECASE 

584 tokens = { 

585 'root': [ 

586 (r'\s+', Whitespace), 

587 (r'--.*\n?', Comment.Single), 

588 (r'/\*', Comment.Multiline, 'multiline-comments'), 

589 (words(_sql_builtins.KEYWORDS, suffix=r'\b'), Keyword), 

590 (words(_sql_builtins.DATATYPES, suffix=r'\b'), Name.Builtin), 

591 (r'[+*/<>=~!@#%^&|`?-]', Operator), 

592 (r'[0-9]+', Number.Integer), 

593 # TODO: Backslash escapes? 

594 (r"'(''|[^'])*'", String.Single), 

595 (r'"(""|[^"])*"', String.Symbol), # not a real string literal in ANSI SQL 

596 (r'[a-z_][\w$]*', Name), # allow $s in strings for Oracle 

597 (r'[;:()\[\],.]', Punctuation) 

598 ], 

599 'multiline-comments': [ 

600 (r'/\*', Comment.Multiline, 'multiline-comments'), 

601 (r'\*/', Comment.Multiline, '#pop'), 

602 (r'[^/*]+', Comment.Multiline), 

603 (r'[/*]', Comment.Multiline) 

604 ] 

605 } 

606 

607 def analyse_text(self, text): 

608 return 

609 

610 

611class TransactSqlLexer(RegexLexer): 

612 """ 

613 Transact-SQL (T-SQL) is Microsoft's and Sybase's proprietary extension to 

614 SQL. 

615 

616 The list of keywords includes ODBC and keywords reserved for future use. 

617 """ 

618 

619 name = 'Transact-SQL' 

620 aliases = ['tsql', 't-sql'] 

621 filenames = ['*.sql'] 

622 mimetypes = ['text/x-tsql'] 

623 url = 'https://www.tsql.info' 

624 version_added = '' 

625 

626 flags = re.IGNORECASE 

627 

628 tokens = { 

629 'root': [ 

630 (r'\s+', Whitespace), 

631 (r'--.*[$|\n]?', Comment.Single), 

632 (r'/\*', Comment.Multiline, 'multiline-comments'), 

633 (words(_tsql_builtins.OPERATORS), Operator), 

634 (words(_tsql_builtins.OPERATOR_WORDS, suffix=r'\b'), Operator.Word), 

635 (words(_tsql_builtins.TYPES, suffix=r'\b'), Name.Class), 

636 (words(_tsql_builtins.FUNCTIONS, suffix=r'\b'), Name.Function), 

637 (r'(goto)(\s+)(\w+\b)', bygroups(Keyword, Whitespace, Name.Label)), 

638 (words(_tsql_builtins.KEYWORDS, suffix=r'\b'), Keyword), 

639 (r'(\[)([^]]+)(\])', bygroups(Operator, Name, Operator)), 

640 (r'0x[0-9a-f]+', Number.Hex), 

641 # Float variant 1, for example: 1., 1.e2, 1.2e3 

642 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), 

643 # Float variant 2, for example: .1, .1e2 

644 (r'\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), 

645 # Float variant 3, for example: 123e45 

646 (r'[0-9]+e[+-]?[0-9]+', Number.Float), 

647 (r'[0-9]+', Number.Integer), 

648 (r"'(''|[^'])*'", String.Single), 

649 (r'"(""|[^"])*"', String.Symbol), 

650 (r'[;(),.]', Punctuation), 

651 # Below we use \w even for the first "real" character because 

652 # tokens starting with a digit have already been recognized 

653 # as Number above. 

654 (r'@@\w+', Name.Builtin), 

655 (r'@\w+', Name.Variable), 

656 (r'(\w+)(:)', bygroups(Name.Label, Punctuation)), 

657 (r'#?#?\w+', Name), # names for temp tables and anything else 

658 (r'\?', Name.Variable.Magic), # parameter for prepared statements 

659 ], 

660 'multiline-comments': [ 

661 (r'/\*', Comment.Multiline, 'multiline-comments'), 

662 (r'\*/', Comment.Multiline, '#pop'), 

663 (r'[^/*]+', Comment.Multiline), 

664 (r'[/*]', Comment.Multiline) 

665 ] 

666 } 

667 

668 def analyse_text(text): 

669 rating = 0 

670 if tsql_declare_re.search(text): 

671 # Found T-SQL variable declaration. 

672 rating = 1.0 

673 else: 

674 name_between_backtick_count = len( 

675 name_between_backtick_re.findall(text)) 

676 name_between_bracket_count = len( 

677 name_between_bracket_re.findall(text)) 

678 # We need to check if there are any names using 

679 # backticks or brackets, as otherwise both are 0 

680 # and 0 >= 2 * 0, so we would always assume it's true 

681 dialect_name_count = name_between_backtick_count + name_between_bracket_count 

682 if dialect_name_count >= 1 and \ 

683 name_between_bracket_count >= 2 * name_between_backtick_count: 

684 # Found at least twice as many [name] as `name`. 

685 rating += 0.5 

686 elif name_between_bracket_count > name_between_backtick_count: 

687 rating += 0.2 

688 elif name_between_bracket_count > 0: 

689 rating += 0.1 

690 if tsql_variable_re.search(text) is not None: 

691 rating += 0.1 

692 if tsql_go_re.search(text) is not None: 

693 rating += 0.1 

694 return rating 

695 

696 

697class MySqlLexer(RegexLexer): 

698 """The Oracle MySQL lexer. 

699 

700 This lexer does not attempt to maintain strict compatibility with 

701 MariaDB syntax or keywords. Although MySQL and MariaDB's common code 

702 history suggests there may be significant overlap between the two, 

703 compatibility between the two is not a target for this lexer. 

704 """ 

705 

706 name = 'MySQL' 

707 aliases = ['mysql'] 

708 mimetypes = ['text/x-mysql'] 

709 url = 'https://www.mysql.com' 

710 version_added = '' 

711 

712 flags = re.IGNORECASE 

713 tokens = { 

714 'root': [ 

715 (r'\s+', Whitespace), 

716 

717 # Comments 

718 (r'(?:#|--\s+).*', Comment.Single), 

719 (r'/\*\+', Comment.Special, 'optimizer-hints'), 

720 (r'/\*', Comment.Multiline, 'multiline-comment'), 

721 

722 # Hexadecimal literals 

723 (r"x'([0-9a-f]{2})+'", Number.Hex), # MySQL requires paired hex characters in this form. 

724 (r'0x[0-9a-f]+', Number.Hex), 

725 

726 # Binary literals 

727 (r"b'[01]+'", Number.Bin), 

728 (r'0b[01]+', Number.Bin), 

729 

730 # Numeric literals 

731 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent 

732 (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent 

733 (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats 

734 (r'[0-9]+(?=[^0-9a-z$_\u0080-\uffff])', Number.Integer), # Integers that are not in a schema object name 

735 

736 # Date literals 

737 (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}", 

738 Literal.Date), 

739 

740 # Time literals 

741 (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}", 

742 Literal.Date), 

743 

744 # Timestamp literals 

745 ( 

746 r"\{\s*ts\s*(?P<quote>['\"])\s*" 

747 r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}" # Date part 

748 r"\s+" # Whitespace between date and time 

749 r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?" # Time part 

750 r"\s*(?P=quote)\s*\}", 

751 Literal.Date 

752 ), 

753 

754 # String literals 

755 (r"'", String.Single, 'single-quoted-string'), 

756 (r'"', String.Double, 'double-quoted-string'), 

757 

758 # Variables 

759 (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable), 

760 (r'@[a-z0-9_$.]+', Name.Variable), 

761 (r"@'", Name.Variable, 'single-quoted-variable'), 

762 (r'@"', Name.Variable, 'double-quoted-variable'), 

763 (r"@`", Name.Variable, 'backtick-quoted-variable'), 

764 (r'\?', Name.Variable), # For demonstrating prepared statements 

765 

766 # Operators 

767 (r'[!%&*+/:<=>^|~-]+', Operator), 

768 

769 # Exceptions; these words tokenize differently in different contexts. 

770 (r'\b(set)\b(?!\s*\()', Keyword), 

771 (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Whitespace, Keyword)), 

772 # In all other known cases, "SET" is tokenized by MYSQL_DATATYPES. 

773 

774 (words(_mysql_builtins.MYSQL_CONSTANTS, prefix=r'\b', suffix=r'\b'), 

775 Name.Constant), 

776 (words(_mysql_builtins.MYSQL_DATATYPES, prefix=r'\b', suffix=r'\b'), 

777 Keyword.Type), 

778 (words(_mysql_builtins.MYSQL_KEYWORDS, prefix=r'\b', suffix=r'\b'), 

779 Keyword), 

780 (words(_mysql_builtins.MYSQL_FUNCTIONS, prefix=r'\b', suffix=r'\b(\s*)(\()'), 

781 bygroups(Name.Function, Whitespace, Punctuation)), 

782 

783 # Schema object names 

784 # 

785 # Note: Although the first regex supports unquoted all-numeric 

786 # identifiers, this will not be a problem in practice because 

787 # numeric literals have already been handled above. 

788 # 

789 ('[0-9a-z$_\u0080-\uffff]+', Name), 

790 (r'`', Name.Quoted, 'schema-object-name'), 

791 

792 # Punctuation 

793 (r'[(),.;]', Punctuation), 

794 ], 

795 

796 # Multiline comment substates 

797 # --------------------------- 

798 

799 'optimizer-hints': [ 

800 (r'[^*a-z]+', Comment.Special), 

801 (r'\*/', Comment.Special, '#pop'), 

802 (words(_mysql_builtins.MYSQL_OPTIMIZER_HINTS, suffix=r'\b'), 

803 Comment.Preproc), 

804 ('[a-z]+', Comment.Special), 

805 (r'\*', Comment.Special), 

806 ], 

807 

808 'multiline-comment': [ 

809 (r'[^*]+', Comment.Multiline), 

810 (r'\*/', Comment.Multiline, '#pop'), 

811 (r'\*', Comment.Multiline), 

812 ], 

813 

814 # String substates 

815 # ---------------- 

816 

817 'single-quoted-string': [ 

818 (r"[^'\\]+", String.Single), 

819 (r"''", String.Escape), 

820 (r"""\\[0'"bnrtZ\\%_]""", String.Escape), 

821 (r"'", String.Single, '#pop'), 

822 ], 

823 

824 'double-quoted-string': [ 

825 (r'[^"\\]+', String.Double), 

826 (r'""', String.Escape), 

827 (r"""\\[0'"bnrtZ\\%_]""", String.Escape), 

828 (r'"', String.Double, '#pop'), 

829 ], 

830 

831 # Variable substates 

832 # ------------------ 

833 

834 'single-quoted-variable': [ 

835 (r"[^']+", Name.Variable), 

836 (r"''", Name.Variable), 

837 (r"'", Name.Variable, '#pop'), 

838 ], 

839 

840 'double-quoted-variable': [ 

841 (r'[^"]+', Name.Variable), 

842 (r'""', Name.Variable), 

843 (r'"', Name.Variable, '#pop'), 

844 ], 

845 

846 'backtick-quoted-variable': [ 

847 (r'[^`]+', Name.Variable), 

848 (r'``', Name.Variable), 

849 (r'`', Name.Variable, '#pop'), 

850 ], 

851 

852 # Schema object name substates 

853 # ---------------------------- 

854 # 

855 # "Name.Quoted" and "Name.Quoted.Escape" are non-standard but 

856 # formatters will style them as "Name" by default but add 

857 # additional styles based on the token name. This gives users 

858 # flexibility to add custom styles as desired. 

859 # 

860 'schema-object-name': [ 

861 (r'[^`]+', Name.Quoted), 

862 (r'``', Name.Quoted.Escape), 

863 (r'`', Name.Quoted, '#pop'), 

864 ], 

865 } 

866 

867 def analyse_text(text): 

868 rating = 0 

869 name_between_backtick_count = len( 

870 name_between_backtick_re.findall(text)) 

871 name_between_bracket_count = len( 

872 name_between_bracket_re.findall(text)) 

873 # Same logic as above in the TSQL analysis 

874 dialect_name_count = name_between_backtick_count + name_between_bracket_count 

875 if dialect_name_count >= 1 and \ 

876 name_between_backtick_count >= 2 * name_between_bracket_count: 

877 # Found at least twice as many `name` as [name]. 

878 rating += 0.5 

879 elif name_between_backtick_count > name_between_bracket_count: 

880 rating += 0.2 

881 elif name_between_backtick_count > 0: 

882 rating += 0.1 

883 return rating 

884 

885 

886class GoogleSqlLexer(RegexLexer): 

887 """ 

888 GoogleSQL is Google's standard SQL dialect, formerly known as ZetaSQL. 

889 

890 The list of keywords includes reserved words for future use. 

891 """ 

892 

893 name = 'GoogleSQL' 

894 aliases = ['googlesql', 'zetasql'] 

895 filenames = ['*.googlesql', '*.googlesql.sql'] 

896 mimetypes = ['text/x-google-sql', 'text/x-google-sql-aux'] 

897 url = 'https://cloud.google.com/bigquery/googlesql' 

898 version_added = '2.19' 

899 

900 flags = re.IGNORECASE 

901 tokens = { 

902 'root': [ 

903 (r'\s+', Whitespace), 

904 

905 # Comments 

906 (r'(?:#|--\s+).*', Comment.Single), 

907 (r'/\*', Comment.Multiline, 'multiline-comment'), 

908 

909 # Hexadecimal literals 

910 (r"x'([0-9a-f]{2})+'", Number.Hex), 

911 (r'0x[0-9a-f]+', Number.Hex), 

912 

913 # Binary literals 

914 (r"b'[01]+'", Number.Bin), 

915 (r'0b[01]+', Number.Bin), 

916 

917 # Numeric literals 

918 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent 

919 (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent 

920 (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats 

921 (r'[0-9]+(?=[^0-9a-z$_\u0080-\uffff])', Number.Integer), # Integers that are not in a schema object name 

922 

923 # Date literals 

924 (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}", 

925 Literal.Date), 

926 

927 # Time literals 

928 (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}", 

929 Literal.Date), 

930 

931 # Timestamp literals 

932 ( 

933 r"\{\s*ts\s*(?P<quote>['\"])\s*" 

934 r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}" # Date part 

935 r"\s+" # Whitespace between date and time 

936 r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?" # Time part 

937 r"\s*(?P=quote)\s*\}", 

938 Literal.Date 

939 ), 

940 

941 # String literals 

942 (r"'", String.Single, 'single-quoted-string'), 

943 (r'"', String.Double, 'double-quoted-string'), 

944 

945 # Variables 

946 (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable), 

947 (r'@[a-z0-9_$.]+', Name.Variable), 

948 (r"@'", Name.Variable, 'single-quoted-variable'), 

949 (r'@"', Name.Variable, 'double-quoted-variable'), 

950 (r"@`", Name.Variable, 'backtick-quoted-variable'), 

951 (r'\?', Name.Variable), # For demonstrating prepared statements 

952 

953 # Exceptions; these words tokenize differently in different contexts. 

954 (r'\b(set)(?!\s*\()', Keyword), 

955 (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Whitespace, Keyword)), 

956 

957 # Constants, types, keywords, functions, operators 

958 (words(_googlesql_builtins.constants, prefix=r'\b', suffix=r'\b'), Name.Constant), 

959 (words(_googlesql_builtins.types, prefix=r'\b', suffix=r'\b'), Keyword.Type), 

960 (words(_googlesql_builtins.keywords, prefix=r'\b', suffix=r'\b'), Keyword), 

961 (words(_googlesql_builtins.functionnames, prefix=r'\b', suffix=r'\b(\s*)(\()'), 

962 bygroups(Name.Function, Whitespace, Punctuation)), 

963 (words(_googlesql_builtins.operators, prefix=r'\b', suffix=r'\b'), Operator), 

964 

965 # Schema object names 

966 # 

967 # Note: Although the first regex supports unquoted all-numeric 

968 # identifiers, this will not be a problem in practice because 

969 # numeric literals have already been handled above. 

970 # 

971 ('[0-9a-z$_\u0080-\uffff]+', Name), 

972 (r'`', Name.Quoted, 'schema-object-name'), 

973 

974 # Punctuation 

975 (r'[(),.;]', Punctuation), 

976 ], 

977 

978 # Multiline comment substates 

979 # --------------------------- 

980 

981 'multiline-comment': [ 

982 (r'[^*]+', Comment.Multiline), 

983 (r'\*/', Comment.Multiline, '#pop'), 

984 (r'\*', Comment.Multiline), 

985 ], 

986 

987 # String substates 

988 # ---------------- 

989 

990 'single-quoted-string': [ 

991 (r"[^'\\]+", String.Single), 

992 (r"''", String.Escape), 

993 (r"""\\[0'"bnrtZ\\%_]""", String.Escape), 

994 (r"'", String.Single, '#pop'), 

995 ], 

996 

997 'double-quoted-string': [ 

998 (r'[^"\\]+', String.Double), 

999 (r'""', String.Escape), 

1000 (r"""\\[0'"bnrtZ\\%_]""", String.Escape), 

1001 (r'"', String.Double, '#pop'), 

1002 ], 

1003 

1004 # Variable substates 

1005 # ------------------ 

1006 

1007 'single-quoted-variable': [ 

1008 (r"[^']+", Name.Variable), 

1009 (r"''", Name.Variable), 

1010 (r"'", Name.Variable, '#pop'), 

1011 ], 

1012 

1013 'double-quoted-variable': [ 

1014 (r'[^"]+', Name.Variable), 

1015 (r'""', Name.Variable), 

1016 (r'"', Name.Variable, '#pop'), 

1017 ], 

1018 

1019 'backtick-quoted-variable': [ 

1020 (r'[^`]+', Name.Variable), 

1021 (r'``', Name.Variable), 

1022 (r'`', Name.Variable, '#pop'), 

1023 ], 

1024 

1025 # Schema object name substates 

1026 # ---------------------------- 

1027 # 

1028 # "Name.Quoted" and "Name.Quoted.Escape" are non-standard but 

1029 # formatters will style them as "Name" by default but add 

1030 # additional styles based on the token name. This gives users 

1031 # flexibility to add custom styles as desired. 

1032 # 

1033 'schema-object-name': [ 

1034 (r'[^`]+', Name.Quoted), 

1035 (r'``', Name.Quoted.Escape), 

1036 (r'`', Name.Quoted, '#pop'), 

1037 ], 

1038 } 

1039 

1040 def analyse_text(text): 

1041 tokens = collections.Counter(text.split()) 

1042 return 0.001 * sum(count for t, count in tokens.items() 

1043 if t in googlesql_identifiers) 

1044 

1045 

1046class SqliteConsoleLexer(Lexer): 

1047 """ 

1048 Lexer for example sessions using sqlite3. 

1049 """ 

1050 

1051 name = 'sqlite3con' 

1052 aliases = ['sqlite3'] 

1053 filenames = ['*.sqlite3-console'] 

1054 mimetypes = ['text/x-sqlite3-console'] 

1055 url = 'https://www.sqlite.org' 

1056 version_added = '0.11' 

1057 _example = "sqlite3/sqlite3.sqlite3-console" 

1058 

1059 def get_tokens_unprocessed(self, data): 

1060 sql = SqlLexer(**self.options) 

1061 

1062 curcode = '' 

1063 insertions = [] 

1064 for match in line_re.finditer(data): 

1065 line = match.group() 

1066 prompt_match = sqlite_prompt_re.match(line) 

1067 if prompt_match is not None: 

1068 insertions.append((len(curcode), 

1069 [(0, Generic.Prompt, line[:7])])) 

1070 insertions.append((len(curcode), 

1071 [(7, Whitespace, ' ')])) 

1072 curcode += line[8:] 

1073 else: 

1074 if curcode: 

1075 yield from do_insertions(insertions, 

1076 sql.get_tokens_unprocessed(curcode)) 

1077 curcode = '' 

1078 insertions = [] 

1079 if line.startswith('SQL error: '): 

1080 yield (match.start(), Generic.Traceback, line) 

1081 else: 

1082 yield (match.start(), Generic.Output, line) 

1083 if curcode: 

1084 yield from do_insertions(insertions, 

1085 sql.get_tokens_unprocessed(curcode)) 

1086 

1087 

1088class RqlLexer(RegexLexer): 

1089 """ 

1090 Lexer for Relation Query Language. 

1091 """ 

1092 name = 'RQL' 

1093 url = 'http://www.logilab.org/project/rql' 

1094 aliases = ['rql'] 

1095 filenames = ['*.rql'] 

1096 mimetypes = ['text/x-rql'] 

1097 version_added = '2.0' 

1098 

1099 flags = re.IGNORECASE 

1100 tokens = { 

1101 'root': [ 

1102 (r'\s+', Whitespace), 

1103 (r'(DELETE|SET|INSERT|UNION|DISTINCT|WITH|WHERE|BEING|OR' 

1104 r'|AND|NOT|GROUPBY|HAVING|ORDERBY|ASC|DESC|LIMIT|OFFSET' 

1105 r'|TODAY|NOW|TRUE|FALSE|NULL|EXISTS)\b', Keyword), 

1106 (r'[+*/<>=%-]', Operator), 

1107 (r'(Any|is|instance_of|CWEType|CWRelation)\b', Name.Builtin), 

1108 (r'[0-9]+', Number.Integer), 

1109 (r'[A-Z_]\w*\??', Name), 

1110 (r"'(''|[^'])*'", String.Single), 

1111 (r'"(""|[^"])*"', String.Single), 

1112 (r'[;:()\[\],.]', Punctuation) 

1113 ], 

1114 }