Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/sql.py: 46%

1"""

2 pygments.lexers.sql

3 ~~~~~~~~~~~~~~~~~~~

5 Lexers for various SQL dialects and related interactive sessions.

7 Postgres specific lexers:

9 `PostgresLexer`

10 A SQL lexer for the PostgreSQL dialect. Differences w.r.t. the SQL

11 lexer are:

13 - keywords and data types list parsed from the PG docs (run the

14 `_postgres_builtins` module to update them);

15 - Content of $-strings parsed using a specific lexer, e.g. the content

16 of a PL/Python function is parsed using the Python lexer;

17 - parse PG specific constructs: E-strings, $-strings, U&-strings,

18 different operators and punctuation.

20 `PlPgsqlLexer`

21 A lexer for the PL/pgSQL language. Adds a few specific construct on

22 top of the PG SQL lexer (such as <<label>>).

24 `PostgresConsoleLexer`

25 A lexer to highlight an interactive psql session:

27 - identifies the prompt and does its best to detect the end of command

28 in multiline statement where not all the lines are prefixed by a

29 prompt, telling them apart from the output;

30 - highlights errors in the output and notification levels;

31 - handles psql backslash commands.

33 `PostgresExplainLexer`

34 A lexer to highlight Postgres execution plan.

36 The ``tests/examplefiles`` contains a few test files with data to be

37 parsed by these lexers.

40 :license: BSD, see LICENSE for details.

41"""

43import re

45from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words

46from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \

47 Keyword, Name, String, Number, Generic, Literal

48from pygments.lexers import get_lexer_by_name, ClassNotFound

50from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \

51 PSEUDO_TYPES, PLPGSQL_KEYWORDS, EXPLAIN_KEYWORDS

52from pygments.lexers._mysql_builtins import \

53 MYSQL_CONSTANTS, \

54 MYSQL_DATATYPES, \

55 MYSQL_FUNCTIONS, \

56 MYSQL_KEYWORDS, \

57 MYSQL_OPTIMIZER_HINTS

59from pygments.lexers import _tsql_builtins

62__all__ = ['PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer',

63 'PostgresExplainLexer', 'SqlLexer', 'TransactSqlLexer',

64 'MySqlLexer', 'SqliteConsoleLexer', 'RqlLexer']

66line_re = re.compile('.*?\n')

67sqlite_prompt_re = re.compile(r'^(?:sqlite| ...)>(?= )')

69language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE)

71do_re = re.compile(r'\bDO\b', re.IGNORECASE)

73# Regular expressions for analyse_text()

74name_between_bracket_re = re.compile(r'\[[a-zA-Z_]\w*\]')

75name_between_backtick_re = re.compile(r'`[a-zA-Z_]\w*`')

76tsql_go_re = re.compile(r'\bgo\b', re.IGNORECASE)

77tsql_declare_re = re.compile(r'\bdeclare\s+@', re.IGNORECASE)

78tsql_variable_re = re.compile(r'@[a-zA-Z_]\w*\b')

81def language_callback(lexer, match):

82 """Parse the content of a $-string using a lexer

84 The lexer is chosen looking for a nearby LANGUAGE or assumed as

85 plpgsql if inside a DO statement and no LANGUAGE has been found.

86 """

87 lx = None

88 m = language_re.match(lexer.text[match.end():match.end()+100])

89 if m is not None:

90 lx = lexer._get_lexer(m.group(1))

91 else:

92 m = list(language_re.finditer(

93 lexer.text[max(0, match.start()-100):match.start()]))

94 if m:

95 lx = lexer._get_lexer(m[-1].group(1))

96 else:

97 m = list(do_re.finditer(

98 lexer.text[max(0, match.start()-25):match.start()]))

99 if m:

100 lx = lexer._get_lexer('plpgsql')

101

102 # 1 = $, 2 = delimiter, 3 = $

103 yield (match.start(1), String, match.group(1))

104 yield (match.start(2), String.Delimiter, match.group(2))

105 yield (match.start(3), String, match.group(3))

106 # 4 = string contents

107 if lx:

108 yield from lx.get_tokens_unprocessed(match.group(4))

109 else:

110 yield (match.start(4), String, match.group(4))

111 # 5 = $, 6 = delimiter, 7 = $

112 yield (match.start(5), String, match.group(5))

113 yield (match.start(6), String.Delimiter, match.group(6))

114 yield (match.start(7), String, match.group(7))

115

116

117class PostgresBase:

118 """Base class for Postgres-related lexers.

119

120 This is implemented as a mixin to avoid the Lexer metaclass kicking in.

121 this way the different lexer don't have a common Lexer ancestor. If they

122 had, _tokens could be created on this ancestor and not updated for the

123 other classes, resulting e.g. in PL/pgSQL parsed as SQL. This shortcoming

124 seem to suggest that regexp lexers are not really subclassable.

125 """

126 def get_tokens_unprocessed(self, text, *args):

127 # Have a copy of the entire text to be used by `language_callback`.

128 self.text = text

129 yield from super().get_tokens_unprocessed(text, *args)

130

131 def _get_lexer(self, lang):

132 if lang.lower() == 'sql':

133 return get_lexer_by_name('postgresql', **self.options)

134

135 tries = [lang]

136 if lang.startswith('pl'):

137 tries.append(lang[2:])

138 if lang.endswith('u'):

139 tries.append(lang[:-1])

140 if lang.startswith('pl') and lang.endswith('u'):

141 tries.append(lang[2:-1])

142

143 for lx in tries:

144 try:

145 return get_lexer_by_name(lx, **self.options)

146 except ClassNotFound:

147 pass

148 else:

149 # TODO: better logging

150 # print >>sys.stderr, "language not found:", lang

151 return None

152

153

154class PostgresLexer(PostgresBase, RegexLexer):

155 """

156 Lexer for the PostgreSQL dialect of SQL.

157

158 .. versionadded:: 1.5

159 """

160

161 name = 'PostgreSQL SQL dialect'

162 aliases = ['postgresql', 'postgres']

163 mimetypes = ['text/x-postgresql']

164

165 flags = re.IGNORECASE

166 tokens = {

167 'root': [

168 (r'\s+', Whitespace),

169 (r'--.*\n?', Comment.Single),

170 (r'/\*', Comment.Multiline, 'multiline-comments'),

171 (r'(' + '|'.join(s.replace(" ", r"\s+")

172 for s in DATATYPES + PSEUDO_TYPES) + r')\b',

173 Name.Builtin),

174 (words(KEYWORDS, suffix=r'\b'), Keyword),

175 (r'[+*/<>=~!@#%^&|`?-]+', Operator),

176 (r'::', Operator), # cast

177 (r'\$\d+', Name.Variable),

178 (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float),

179 (r'[0-9]+', Number.Integer),

180 (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'),

181 # quoted identifier

182 (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'),

183 (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback),

184 (r'[a-z_]\w*', Name),

185

186 # psql variable in SQL

187 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),

188

189 (r'[;:()\[\]{},.]', Punctuation),

190 ],

191 'multiline-comments': [

192 (r'/\*', Comment.Multiline, 'multiline-comments'),

193 (r'\*/', Comment.Multiline, '#pop'),

194 (r'[^/*]+', Comment.Multiline),

195 (r'[/*]', Comment.Multiline)

196 ],

197 'string': [

198 (r"[^']+", String.Single),

199 (r"''", String.Single),

200 (r"'", String.Single, '#pop'),

201 ],

202 'quoted-ident': [

203 (r'[^"]+', String.Name),

204 (r'""', String.Name),

205 (r'"', String.Name, '#pop'),

206 ],

207 }

208

209

210class PlPgsqlLexer(PostgresBase, RegexLexer):

211 """

212 Handle the extra syntax in Pl/pgSQL language.

213

214 .. versionadded:: 1.5

215 """

216 name = 'PL/pgSQL'

217 aliases = ['plpgsql']

218 mimetypes = ['text/x-plpgsql']

219

220 flags = re.IGNORECASE

221 tokens = {k: l[:] for (k, l) in PostgresLexer.tokens.items()}

222

223 # extend the keywords list

224 for i, pattern in enumerate(tokens['root']):

225 if pattern[1] == Keyword:

226 tokens['root'][i] = (

227 words(KEYWORDS + PLPGSQL_KEYWORDS, suffix=r'\b'),

228 Keyword)

229 del i

230 break

231 else:

232 assert 0, "SQL keywords not found"

233

234 # Add specific PL/pgSQL rules (before the SQL ones)

235 tokens['root'][:0] = [

236 (r'\%[a-z]\w*\b', Name.Builtin), # actually, a datatype

237 (r':=', Operator),

238 (r'\<\<[a-z]\w*\>\>', Name.Label),

239 (r'\#[a-z]\w*\b', Keyword.Pseudo), # #variable_conflict

240 ]

241

242

243class PsqlRegexLexer(PostgresBase, RegexLexer):

244 """

245 Extend the PostgresLexer adding support specific for psql commands.

246

247 This is not a complete psql lexer yet as it lacks prompt support

248 and output rendering.

249 """

250

251 name = 'PostgreSQL console - regexp based lexer'

252 aliases = [] # not public

253

254 flags = re.IGNORECASE

255 tokens = {k: l[:] for (k, l) in PostgresLexer.tokens.items()}

256

257 tokens['root'].append(

258 (r'\\[^\s]+', Keyword.Pseudo, 'psql-command'))

259 tokens['psql-command'] = [

260 (r'\n', Text, 'root'),

261 (r'\s+', Whitespace),

262 (r'\\[^\s]+', Keyword.Pseudo),

263 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),

264 (r"'(''|[^'])*'", String.Single),

265 (r"`([^`])*`", String.Backtick),

266 (r"[^\s]+", String.Symbol),

267 ]

268

269

270re_prompt = re.compile(r'^(\S.*?)??[=\-\(\$\'\"][#>]')

271re_psql_command = re.compile(r'\s*\\')

272re_end_command = re.compile(r';\s*(--.*?)?$')

273re_psql_command = re.compile(r'(\s*)(\\.+?)(\s+)$')

274re_error = re.compile(r'(ERROR|FATAL):')

275re_message = re.compile(

277 r'FATAL|HINT|DETAIL|CONTEXT|LINE [0-9]+):)(.*?\n)')

278

279

280class lookahead:

281 """Wrap an iterator and allow pushing back an item."""

282 def __init__(self, x):

283 self.iter = iter(x)

284 self._nextitem = None

285

286 def __iter__(self):

287 return self

288

289 def send(self, i):

290 self._nextitem = i

291 return i

292

293 def __next__(self):

294 if self._nextitem is not None:

295 ni = self._nextitem

296 self._nextitem = None

297 return ni

298 return next(self.iter)

299 next = __next__

300

301

302class PostgresConsoleLexer(Lexer):

303 """

304 Lexer for psql sessions.

305

306 .. versionadded:: 1.5

307 """

308

309 name = 'PostgreSQL console (psql)'

310 aliases = ['psql', 'postgresql-console', 'postgres-console']

311 mimetypes = ['text/x-postgresql-psql']

312

313 def get_tokens_unprocessed(self, data):

314 sql = PsqlRegexLexer(**self.options)

315

316 lines = lookahead(line_re.findall(data))

317

318 # prompt-output cycle

319 while 1:

320

321 # consume the lines of the command: start with an optional prompt

322 # and continue until the end of command is detected

323 curcode = ''

324 insertions = []

325 for line in lines:

326 # Identify a shell prompt in case of psql commandline example

327 if line.startswith('$') and not curcode:

328 lexer = get_lexer_by_name('console', **self.options)

329 yield from lexer.get_tokens_unprocessed(line)

330 break

331

332 # Identify a psql prompt

333 mprompt = re_prompt.match(line)

334 if mprompt is not None:

335 insertions.append((len(curcode),

336 [(0, Generic.Prompt, mprompt.group())]))

337 curcode += line[len(mprompt.group()):]

338 else:

339 curcode += line

340

341 # Check if this is the end of the command

342 # TODO: better handle multiline comments at the end with

343 # a lexer with an external state?

344 if re_psql_command.match(curcode) \

345 or re_end_command.search(curcode):

346 break

347

348 # Emit the combined stream of command and prompt(s)

349 yield from do_insertions(insertions,

350 sql.get_tokens_unprocessed(curcode))

351

352 # Emit the output lines

353 out_token = Generic.Output

354 for line in lines:

355 mprompt = re_prompt.match(line)

356 if mprompt is not None:

357 # push the line back to have it processed by the prompt

358 lines.send(line)

359 break

360

361 mmsg = re_message.match(line)

362 if mmsg is not None:

363 if mmsg.group(1).startswith("ERROR") \

364 or mmsg.group(1).startswith("FATAL"):

365 out_token = Generic.Error

366 yield (mmsg.start(1), Generic.Strong, mmsg.group(1))

367 yield (mmsg.start(2), out_token, mmsg.group(2))

368 else:

369 yield (0, out_token, line)

370 else:

371 return

372

373

374class PostgresExplainLexer(RegexLexer):

375 """

376 Handle PostgreSQL EXPLAIN output

377

378 .. versionadded:: 2.15

379 """

380

381 name = 'PostgreSQL EXPLAIN dialect'

382 aliases = ['postgres-explain']

383 filenames = ['*.explain']

384 mimetypes = ['text/x-postgresql-explain']

385

386 tokens = {

387 'root': [

388 (r'(:|$|$|ms|kB|->|\.\.|\,)', Punctuation),

389 (r'(\s+)', Whitespace),

390

391 # This match estimated cost and effectively measured counters with ANALYZE

392 # Then, we move to instrumentation state

393 (r'(cost)(=?)', bygroups(Name.Class, Punctuation), 'instrumentation'),

394 (r'(actual)( )(=?)', bygroups(Name.Class, Whitespace, Punctuation), 'instrumentation'),

395

396 # Misc keywords

397 (words(('actual', 'Memory Usage', 'Memory', 'Buckets', 'Batches',

398 'originally', 'row', 'rows', 'Hits', 'Misses',

399 'Evictions', 'Overflows'), suffix=r'\b'),

400 Comment.Single),

401

403 (r'(shared|temp|local)', Keyword.Pseudo),

404

405 # We move to sort state in order to emphasize specific keywords (especially disk access)

406 (r'(Sort Method)(: )', bygroups(Comment.Preproc, Punctuation), 'sort'),

407

408 # These keywords can be followed by an object, like a table

409 (r'(Sort Key|Group Key|Presorted Key|Hash Key)(:)( )',

410 bygroups(Comment.Preproc, Punctuation, Whitespace), 'object_name'),

411 (r'(Cache Key|Cache Mode)(:)( )', bygroups(Comment, Punctuation, Whitespace), 'object_name'),

412

413 # These keywords can be followed by a predicate

414 (words(('Join Filter', 'Subplans Removed', 'Filter', 'Merge Cond',

415 'Hash Cond', 'Index Cond', 'Recheck Cond', 'Heap Blocks',

416 'TID Cond', 'Run Condition', 'Order By', 'Function Call',

417 'Table Function Call', 'Inner Unique', 'Params Evaluated',

418 'Single Copy', 'Sampling', 'One-Time Filter', 'Output',

419 'Relations', 'Remote SQL'), suffix=r'\b'),

420 Comment.Preproc, 'predicate'),

421

422 # Special keyword to handle ON CONFLICT

423 (r'Conflict ', Comment.Preproc, 'conflict'),

424

425 # Special keyword for InitPlan or SubPlan

426 (r'(InitPlan|SubPlan)( )(\d+)( )',

427 bygroups(Keyword, Whitespace, Number.Integer, Whitespace),

428 'init_plan'),

429

430 (words(('Sort Method', 'Join Filter', 'Planning time',

431 'Planning Time', 'Execution time', 'Execution Time',

432 'Workers Planned', 'Workers Launched', 'Buffers',

433 'Planning', 'Worker', 'Query Identifier', 'Time',

434 'Full-sort Groups'), suffix=r'\b'), Comment.Preproc),

435

436 # Emphasize these keywords

437

438 (words(('Rows Removed by Join Filter', 'Rows Removed by Filter',

439 'Rows Removed by Index Recheck',

440 'Heap Fetches', 'never executed'),

441 suffix=r'\b'), Name.Exception),

442 (r'(I/O Timings)(:)( )', bygroups(Name.Exception, Punctuation, Whitespace)),

443

444 (words(EXPLAIN_KEYWORDS, suffix=r'\b'), Keyword),

445

446 # join keywords

447 (r'((Right|Left|Full|Semi|Anti) Join)', Keyword.Type),

448 (r'(Parallel |Async |Finalize |Partial )', Comment.Preproc),

449 (r'Backward', Comment.Preproc),

450 (r'(Intersect|Except|Hash)', Comment.Preproc),

451

452 (r'(CTE)( )(\w*)?', bygroups(Comment, Whitespace, Name.Variable)),

453

454

455 # Treat "on" and "using" as a punctuation

456 (r'(on|using)', Punctuation, 'object_name'),

457

458

459 # strings

460 (r"'(''|[^'])*'", String.Single),

461 # numbers

462 (r'\d+\.\d+', Number.Float),

463 (r'(\d+)', Number.Integer),

464

465 # boolean

466 (r'(true|false)', Name.Constant),

467 # explain header

468 (r'\s*QUERY PLAN\s*\n\s*-+', Comment.Single),

469 # Settings

470 (r'(Settings)(:)( )', bygroups(Comment.Preproc, Punctuation, Whitespace), 'setting'),

471

472 # Handle JIT counters

473 (r'(JIT|Functions|Options|Timing)(:)', bygroups(Comment.Preproc, Punctuation)),

475

476 # Handle Triggers counters

477 (r'(Trigger)( )(\S*)(:)( )',

478 bygroups(Comment.Preproc, Whitespace, Name.Variable, Punctuation, Whitespace)),

479

480 ],

481 'expression': [

482 # matches any kind of parenthesized expression

483 # the first opening paren is matched by the 'caller'

484 (r'\(', Punctuation, '#push'),

485 (r'\)', Punctuation, '#pop'),

486 (r'(never executed)', Name.Exception),

487 (r'[^)(]+', Comment),

488 ],

489 'object_name': [

490

491 # This is a cost or analyze measure

492 (r'(\(cost)(=?)', bygroups(Name.Class, Punctuation), 'instrumentation'),

493 (r'(\(actual)( )(=?)', bygroups(Name.Class, Whitespace, Punctuation), 'instrumentation'),

494

495 # if object_name is parenthesized, mark opening paren as

496 # punctuation, call 'expression', and exit state

497 (r'\(', Punctuation, 'expression'),

498 (r'(on)', Punctuation),

499 # matches possibly schema-qualified table and column names

500 (r'\w+(\.\w+)*( USING \S+| \w+ USING \S+)', Name.Variable),

501 (r'\"?\w+\"?(?:\.\"?\w+\"?)?', Name.Variable),

502 (r'\'\S*\'', Name.Variable),

503

504 # if we encounter a comma, another object is listed

505 (r',\n', Punctuation, 'object_name'),

506 (r',', Punctuation, 'object_name'),

507

508 # special case: "*SELECT*"

509 (r'"\*SELECT\*( \d+)?"(.\w+)?', Name.Variable),

510 (r'"\*VALUES\*(_\d+)?"(.\w+)?', Name.Variable),

511 (r'"ANY_subquery"', Name.Variable),

512

513 # Variable $1 ...

514 (r'\$\d+', Name.Variable),

515 # cast

516 (r'::\w+', Name.Variable),

517 (r' +', Whitespace),

518 (r'"', Punctuation),

519 (r'\[\.\.\.\]', Punctuation),

520 (r'\)', Punctuation, '#pop'),

521 ],

522 'predicate': [

523 # if predicate is parenthesized, mark paren as punctuation

524 (r'($)([^\n]*)($)', bygroups(Punctuation, Name.Variable, Punctuation), '#pop'),

525 # otherwise color until newline

526 (r'[^\n]*', Name.Variable, '#pop'),

527 ],

528 'instrumentation': [

529 (r'=|\.\.', Punctuation),

530 (r' +', Whitespace),

531 (r'(rows|width|time|loops)', Name.Class),

532 (r'\d+\.\d+', Number.Float),

533 (r'(\d+)', Number.Integer),

534 (r'\)', Punctuation, '#pop'),

535 ],

536 'conflict': [

537 (r'(Resolution: )(\w+)', bygroups(Comment.Preproc, Name.Variable)),

538 (r'(Arbiter \w+:)', Comment.Preproc, 'object_name'),

539 (r'(Filter: )', Comment.Preproc, 'predicate'),

540 ],

541 'setting': [

542 (r'([a-z_]*?)(\s*)(=)(\s*)(\'.*?\')', bygroups(Name.Attribute, Whitespace, Operator, Whitespace, String)),

543 (r'\, ', Punctuation),

544 ],

545 'init_plan': [

546 (r'\(', Punctuation),

547 (r'returns \$\d+(,\$\d+)?', Name.Variable),

548 (r'\)', Punctuation, '#pop'),

549 ],

550 'sort': [

551 (r':|kB', Punctuation),

553 (r'(external|merge|Disk|sort)', Name.Exception),

554 (r'(\d+)', Number.Integer),

555 (r' +', Whitespace),

556 ],

557 }

558

559

560class SqlLexer(RegexLexer):

561 """

562 Lexer for Structured Query Language. Currently, this lexer does

563 not recognize any special syntax except ANSI SQL.

564 """

565

566 name = 'SQL'

567 aliases = ['sql']

568 filenames = ['*.sql']

569 mimetypes = ['text/x-sql']

570

571 flags = re.IGNORECASE

572 tokens = {

573 'root': [

574 (r'\s+', Whitespace),

575 (r'--.*\n?', Comment.Single),

576 (r'/\*', Comment.Multiline, 'multiline-comments'),

577 (words((

578 'ABORT', 'ABS', 'ABSOLUTE', 'ACCESS', 'ADA', 'ADD', 'ADMIN', 'AFTER',

579 'AGGREGATE', 'ALIAS', 'ALL', 'ALLOCATE', 'ALTER', 'ANALYSE', 'ANALYZE',

580 'AND', 'ANY', 'ARE', 'AS', 'ASC', 'ASENSITIVE', 'ASSERTION', 'ASSIGNMENT',

581 'ASYMMETRIC', 'AT', 'ATOMIC', 'AUTHORIZATION', 'AVG', 'BACKWARD',

582 'BEFORE', 'BEGIN', 'BETWEEN', 'BITVAR', 'BIT_LENGTH', 'BOTH', 'BREADTH',

583 'BY', 'C', 'CACHE', 'CALL', 'CALLED', 'CARDINALITY', 'CASCADE',

584 'CASCADED', 'CASE', 'CAST', 'CATALOG', 'CATALOG_NAME', 'CHAIN',

585 'CHARACTERISTICS', 'CHARACTER_LENGTH', 'CHARACTER_SET_CATALOG',

586 'CHARACTER_SET_NAME', 'CHARACTER_SET_SCHEMA', 'CHAR_LENGTH', 'CHECK',

587 'CHECKED', 'CHECKPOINT', 'CLASS', 'CLASS_ORIGIN', 'CLOB', 'CLOSE',

588 'CLUSTER', 'COALESCE', 'COBOL', 'COLLATE', 'COLLATION',

589 'COLLATION_CATALOG', 'COLLATION_NAME', 'COLLATION_SCHEMA', 'COLUMN',

590 'COLUMN_NAME', 'COMMAND_FUNCTION', 'COMMAND_FUNCTION_CODE', 'COMMENT',

591 'COMMIT', 'COMMITTED', 'COMPLETION', 'CONDITION_NUMBER', 'CONNECT',

592 'CONNECTION', 'CONNECTION_NAME', 'CONSTRAINT', 'CONSTRAINTS',

593 'CONSTRAINT_CATALOG', 'CONSTRAINT_NAME', 'CONSTRAINT_SCHEMA',

594 'CONSTRUCTOR', 'CONTAINS', 'CONTINUE', 'CONVERSION', 'CONVERT',

595 'COPY', 'CORRESPONDING', 'COUNT', 'CREATE', 'CREATEDB', 'CREATEUSER',

596 'CROSS', 'CUBE', 'CURRENT', 'CURRENT_DATE', 'CURRENT_PATH',

597 'CURRENT_ROLE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER',

598 'CURSOR', 'CURSOR_NAME', 'CYCLE', 'DATA', 'DATABASE',

599 'DATETIME_INTERVAL_CODE', 'DATETIME_INTERVAL_PRECISION', 'DAY',

600 'DEALLOCATE', 'DECLARE', 'DEFAULT', 'DEFAULTS', 'DEFERRABLE',

601 'DEFERRED', 'DEFINED', 'DEFINER', 'DELETE', 'DELIMITER', 'DELIMITERS',

602 'DEREF', 'DESC', 'DESCRIBE', 'DESCRIPTOR', 'DESTROY', 'DESTRUCTOR',

603 'DETERMINISTIC', 'DIAGNOSTICS', 'DICTIONARY', 'DISCONNECT', 'DISPATCH',

604 'DISTINCT', 'DO', 'DOMAIN', 'DROP', 'DYNAMIC', 'DYNAMIC_FUNCTION',

605 'DYNAMIC_FUNCTION_CODE', 'EACH', 'ELSE', 'ELSIF', 'ENCODING',

606 'ENCRYPTED', 'END', 'END-EXEC', 'EQUALS', 'ESCAPE', 'EVERY', 'EXCEPTION',

607 'EXCEPT', 'EXCLUDING', 'EXCLUSIVE', 'EXEC', 'EXECUTE', 'EXISTING',

608 'EXISTS', 'EXPLAIN', 'EXTERNAL', 'EXTRACT', 'FALSE', 'FETCH', 'FINAL',

609 'FIRST', 'FOR', 'FORCE', 'FOREIGN', 'FORTRAN', 'FORWARD', 'FOUND', 'FREE',

610 'FREEZE', 'FROM', 'FULL', 'FUNCTION', 'G', 'GENERAL', 'GENERATED', 'GET',

611 'GLOBAL', 'GO', 'GOTO', 'GRANT', 'GRANTED', 'GROUP', 'GROUPING',

612 'HANDLER', 'HAVING', 'HIERARCHY', 'HOLD', 'HOST', 'IDENTITY', 'IF',

613 'IGNORE', 'ILIKE', 'IMMEDIATE', 'IMMEDIATELY', 'IMMUTABLE', 'IMPLEMENTATION', 'IMPLICIT',

614 'IN', 'INCLUDING', 'INCREMENT', 'INDEX', 'INDITCATOR', 'INFIX',

615 'INHERITS', 'INITIALIZE', 'INITIALLY', 'INNER', 'INOUT', 'INPUT',

616 'INSENSITIVE', 'INSERT', 'INSTANTIABLE', 'INSTEAD', 'INTERSECT', 'INTO',

617 'INVOKER', 'IS', 'ISNULL', 'ISOLATION', 'ITERATE', 'JOIN', 'KEY',

618 'KEY_MEMBER', 'KEY_TYPE', 'LANCOMPILER', 'LANGUAGE', 'LARGE', 'LAST',

619 'LATERAL', 'LEADING', 'LEFT', 'LENGTH', 'LESS', 'LEVEL', 'LIKE', 'LIMIT',

620 'LISTEN', 'LOAD', 'LOCAL', 'LOCALTIME', 'LOCALTIMESTAMP', 'LOCATION',

621 'LOCATOR', 'LOCK', 'LOWER', 'MAP', 'MATCH', 'MAX', 'MAXVALUE',

622 'MESSAGE_LENGTH', 'MESSAGE_OCTET_LENGTH', 'MESSAGE_TEXT', 'METHOD', 'MIN',

623 'MINUTE', 'MINVALUE', 'MOD', 'MODE', 'MODIFIES', 'MODIFY', 'MONTH',

624 'MORE', 'MOVE', 'MUMPS', 'NAMES', 'NATIONAL', 'NATURAL', 'NCHAR', 'NCLOB',

625 'NEW', 'NEXT', 'NO', 'NOCREATEDB', 'NOCREATEUSER', 'NONE', 'NOT',

626 'NOTHING', 'NOTIFY', 'NOTNULL', 'NULL', 'NULLABLE', 'NULLIF', 'OBJECT',

627 'OCTET_LENGTH', 'OF', 'OFF', 'OFFSET', 'OIDS', 'OLD', 'ON', 'ONLY',

628 'OPEN', 'OPERATION', 'OPERATOR', 'OPTION', 'OPTIONS', 'OR', 'ORDER',

629 'ORDINALITY', 'OUT', 'OUTER', 'OUTPUT', 'OVERLAPS', 'OVERLAY',

630 'OVERRIDING', 'OWNER', 'PAD', 'PARAMETER', 'PARAMETERS', 'PARAMETER_MODE',

631 'PARAMETER_NAME', 'PARAMETER_ORDINAL_POSITION',

632 'PARAMETER_SPECIFIC_CATALOG', 'PARAMETER_SPECIFIC_NAME',

633 'PARAMETER_SPECIFIC_SCHEMA', 'PARTIAL', 'PASCAL', 'PENDANT', 'PERIOD', 'PLACING',

634 'PLI', 'POSITION', 'POSTFIX', 'PRECEEDS', 'PRECISION', 'PREFIX', 'PREORDER',

635 'PREPARE', 'PRESERVE', 'PRIMARY', 'PRIOR', 'PRIVILEGES', 'PROCEDURAL',

636 'PROCEDURE', 'PUBLIC', 'READ', 'READS', 'RECHECK', 'RECURSIVE', 'REF',

637 'REFERENCES', 'REFERENCING', 'REINDEX', 'RELATIVE', 'RENAME',

638 'REPEATABLE', 'REPLACE', 'RESET', 'RESTART', 'RESTRICT', 'RESULT',

639 'RETURN', 'RETURNED_LENGTH', 'RETURNED_OCTET_LENGTH', 'RETURNED_SQLSTATE',

640 'RETURNS', 'REVOKE', 'RIGHT', 'ROLE', 'ROLLBACK', 'ROLLUP', 'ROUTINE',

641 'ROUTINE_CATALOG', 'ROUTINE_NAME', 'ROUTINE_SCHEMA', 'ROW', 'ROWS',

642 'ROW_COUNT', 'RULE', 'SAVE_POINT', 'SCALE', 'SCHEMA', 'SCHEMA_NAME',

643 'SCOPE', 'SCROLL', 'SEARCH', 'SECOND', 'SECURITY', 'SELECT', 'SELF',

644 'SENSITIVE', 'SERIALIZABLE', 'SERVER_NAME', 'SESSION', 'SESSION_USER',

645 'SET', 'SETOF', 'SETS', 'SHARE', 'SHOW', 'SIMILAR', 'SIMPLE', 'SIZE',

646 'SOME', 'SOURCE', 'SPACE', 'SPECIFIC', 'SPECIFICTYPE', 'SPECIFIC_NAME',

647 'SQL', 'SQLCODE', 'SQLERROR', 'SQLEXCEPTION', 'SQLSTATE', 'SQLWARNINIG',

648 'STABLE', 'START', 'STATE', 'STATEMENT', 'STATIC', 'STATISTICS', 'STDIN',

649 'STDOUT', 'STORAGE', 'STRICT', 'STRUCTURE', 'STYPE', 'SUBCLASS_ORIGIN',

650 'SUBLIST', 'SUBSTRING', 'SUCCEEDS', 'SUM', 'SYMMETRIC', 'SYSID', 'SYSTEM',

651 'SYSTEM_USER', 'TABLE', 'TABLE_NAME', ' TEMP', 'TEMPLATE', 'TEMPORARY',

652 'TERMINATE', 'THAN', 'THEN', 'TIME', 'TIMESTAMP', 'TIMEZONE_HOUR',

653 'TIMEZONE_MINUTE', 'TO', 'TOAST', 'TRAILING', 'TRANSACTION',

654 'TRANSACTIONS_COMMITTED', 'TRANSACTIONS_ROLLED_BACK', 'TRANSACTION_ACTIVE',

655 'TRANSFORM', 'TRANSFORMS', 'TRANSLATE', 'TRANSLATION', 'TREAT', 'TRIGGER',

656 'TRIGGER_CATALOG', 'TRIGGER_NAME', 'TRIGGER_SCHEMA', 'TRIM', 'TRUE',

657 'TRUNCATE', 'TRUSTED', 'TYPE', 'UNCOMMITTED', 'UNDER', 'UNENCRYPTED',

658 'UNION', 'UNIQUE', 'UNKNOWN', 'UNLISTEN', 'UNNAMED', 'UNNEST', 'UNTIL',

659 'UPDATE', 'UPPER', 'USAGE', 'USER', 'USER_DEFINED_TYPE_CATALOG',

660 'USER_DEFINED_TYPE_NAME', 'USER_DEFINED_TYPE_SCHEMA', 'USING', 'VACUUM',

661 'VALID', 'VALIDATOR', 'VALUES', 'VARIABLE', 'VERBOSE',

662 'VERSION', 'VERSIONS', 'VERSIONING', 'VIEW',

663 'VOLATILE', 'WHEN', 'WHENEVER', 'WHERE', 'WITH', 'WITHOUT', 'WORK',

664 'WRITE', 'YEAR', 'ZONE'), suffix=r'\b'),

665 Keyword),

666 (words((

667 'ARRAY', 'BIGINT', 'BINARY', 'BIT', 'BLOB', 'BOOLEAN', 'CHAR',

668 'CHARACTER', 'DATE', 'DEC', 'DECIMAL', 'FLOAT', 'INT', 'INTEGER',

669 'INTERVAL', 'NUMBER', 'NUMERIC', 'REAL', 'SERIAL', 'SMALLINT',

670 'VARCHAR', 'VARYING', 'INT8', 'SERIAL8', 'TEXT'), suffix=r'\b'),

671 Name.Builtin),

672 (r'[+*/<>=~!@#%^&|`?-]', Operator),

673 (r'[0-9]+', Number.Integer),

674 # TODO: Backslash escapes?

675 (r"'(''|[^'])*'", String.Single),

676 (r'"(""|[^"])*"', String.Symbol), # not a real string literal in ANSI SQL

677 (r'[a-z_][\w$]*', Name), # allow $s in strings for Oracle

678 (r'[;:()\[\],.]', Punctuation)

679 ],

680 'multiline-comments': [

681 (r'/\*', Comment.Multiline, 'multiline-comments'),

682 (r'\*/', Comment.Multiline, '#pop'),

683 (r'[^/*]+', Comment.Multiline),

684 (r'[/*]', Comment.Multiline)

685 ]

686 }

687

688 def analyse_text(self, text):

689 return

690

691

692class TransactSqlLexer(RegexLexer):

693 """

694 Transact-SQL (T-SQL) is Microsoft's and Sybase's proprietary extension to

695 SQL.

696

697 The list of keywords includes ODBC and keywords reserved for future use..

698 """

699

700 name = 'Transact-SQL'

701 aliases = ['tsql', 't-sql']

702 filenames = ['*.sql']

703 mimetypes = ['text/x-tsql']

704

705 flags = re.IGNORECASE

706

707 tokens = {

708 'root': [

709 (r'\s+', Whitespace),

710 (r'--.*?$\n?', Comment.Single),

711 (r'/\*', Comment.Multiline, 'multiline-comments'),

712 (words(_tsql_builtins.OPERATORS), Operator),

713 (words(_tsql_builtins.OPERATOR_WORDS, suffix=r'\b'), Operator.Word),

714 (words(_tsql_builtins.TYPES, suffix=r'\b'), Name.Class),

715 (words(_tsql_builtins.FUNCTIONS, suffix=r'\b'), Name.Function),

716 (r'(goto)(\s+)(\w+\b)', bygroups(Keyword, Whitespace, Name.Label)),

717 (words(_tsql_builtins.KEYWORDS, suffix=r'\b'), Keyword),

718 (r'(\[)([^]]+)(\])', bygroups(Operator, Name, Operator)),

719 (r'0x[0-9a-f]+', Number.Hex),

720 # Float variant 1, for example: 1., 1.e2, 1.2e3

721 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float),

722 # Float variant 2, for example: .1, .1e2

723 (r'\.[0-9]+(e[+-]?[0-9]+)?', Number.Float),

724 # Float variant 3, for example: 123e45

725 (r'[0-9]+e[+-]?[0-9]+', Number.Float),

726 (r'[0-9]+', Number.Integer),

727 (r"'(''|[^'])*'", String.Single),

728 (r'"(""|[^"])*"', String.Symbol),

729 (r'[;(),.]', Punctuation),

730 # Below we use \w even for the first "real" character because

731 # tokens starting with a digit have already been recognized

732 # as Number above.

733 (r'@@\w+', Name.Builtin),

734 (r'@\w+', Name.Variable),

735 (r'(\w+)(:)', bygroups(Name.Label, Punctuation)),

736 (r'#?#?\w+', Name), # names for temp tables and anything else

737 (r'\?', Name.Variable.Magic), # parameter for prepared statements

738 ],

739 'multiline-comments': [

740 (r'/\*', Comment.Multiline, 'multiline-comments'),

741 (r'\*/', Comment.Multiline, '#pop'),

742 (r'[^/*]+', Comment.Multiline),

743 (r'[/*]', Comment.Multiline)

744 ]

745 }

746

747 def analyse_text(text):

748 rating = 0

749 if tsql_declare_re.search(text):

750 # Found T-SQL variable declaration.

751 rating = 1.0

752 else:

753 name_between_backtick_count = len(

754 name_between_backtick_re.findall(text))

755 name_between_bracket_count = len(

756 name_between_bracket_re.findall(text))

757 # We need to check if there are any names using

758 # backticks or brackets, as otherwise both are 0

759 # and 0 >= 2 * 0, so we would always assume it's true

760 dialect_name_count = name_between_backtick_count + name_between_bracket_count

761 if dialect_name_count >= 1 and \

762 name_between_bracket_count >= 2 * name_between_backtick_count:

763 # Found at least twice as many [name] as `name`.

764 rating += 0.5

765 elif name_between_bracket_count > name_between_backtick_count:

766 rating += 0.2

767 elif name_between_bracket_count > 0:

768 rating += 0.1

769 if tsql_variable_re.search(text) is not None:

770 rating += 0.1

771 if tsql_go_re.search(text) is not None:

772 rating += 0.1

773 return rating

774

775

776class MySqlLexer(RegexLexer):

777 """The Oracle MySQL lexer.

778

779 This lexer does not attempt to maintain strict compatibility with

780 MariaDB syntax or keywords. Although MySQL and MariaDB's common code

781 history suggests there may be significant overlap between the two,

782 compatibility between the two is not a target for this lexer.

783 """

784

785 name = 'MySQL'

786 aliases = ['mysql']

787 mimetypes = ['text/x-mysql']

788

789 flags = re.IGNORECASE

790 tokens = {

791 'root': [

792 (r'\s+', Whitespace),

793

794 # Comments

795 (r'(?:#|--\s+).*', Comment.Single),

796 (r'/\*\+', Comment.Special, 'optimizer-hints'),

797 (r'/\*', Comment.Multiline, 'multiline-comment'),

798

799 # Hexadecimal literals

800 (r"x'([0-9a-f]{2})+'", Number.Hex), # MySQL requires paired hex characters in this form.

801 (r'0x[0-9a-f]+', Number.Hex),

802

803 # Binary literals

804 (r"b'[01]+'", Number.Bin),

805 (r'0b[01]+', Number.Bin),

806

807 # Numeric literals

808 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent

809 (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent

810 (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats

811 (r'[0-9]+(?=[^0-9a-z$_\u0080-\uffff])', Number.Integer), # Integers that are not in a schema object name

812

813 # Date literals

814 (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}",

815 Literal.Date),

816

817 # Time literals

818 (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}",

819 Literal.Date),

820

821 # Timestamp literals

822 (

823 r"\{\s*ts\s*(?P<quote>['\"])\s*"

824 r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}" # Date part

825 r"\s+" # Whitespace between date and time

826 r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?" # Time part

827 r"\s*(?P=quote)\s*\}",

828 Literal.Date

829 ),

830

831 # String literals

832 (r"'", String.Single, 'single-quoted-string'),

833 (r'"', String.Double, 'double-quoted-string'),

834

835 # Variables

836 (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable),

837 (r'@[a-z0-9_$.]+', Name.Variable),

838 (r"@'", Name.Variable, 'single-quoted-variable'),

839 (r'@"', Name.Variable, 'double-quoted-variable'),

840 (r"@`", Name.Variable, 'backtick-quoted-variable'),

841 (r'\?', Name.Variable), # For demonstrating prepared statements

842

843 # Operators

844 (r'[!%&*+/:<=>^|~-]+', Operator),

845

846 # Exceptions; these words tokenize differently in different contexts.

847 (r'\b(set)(?!\s*\()', Keyword),

848 (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Whitespace, Keyword)),

849 # In all other known cases, "SET" is tokenized by MYSQL_DATATYPES.

850

851 (words(MYSQL_CONSTANTS, prefix=r'\b', suffix=r'\b'), Name.Constant),

852 (words(MYSQL_DATATYPES, prefix=r'\b', suffix=r'\b'), Keyword.Type),

853 (words(MYSQL_KEYWORDS, prefix=r'\b', suffix=r'\b'), Keyword),

854 (words(MYSQL_FUNCTIONS, prefix=r'\b', suffix=r'\b(\s*)(\()'),

855 bygroups(Name.Function, Whitespace, Punctuation)),

856

857 # Schema object names

858 #

859 # Note: Although the first regex supports unquoted all-numeric

860 # identifiers, this will not be a problem in practice because

861 # numeric literals have already been handled above.

862 #

863 ('[0-9a-z$_\u0080-\uffff]+', Name),

864 (r'`', Name.Quoted, 'schema-object-name'),

865

866 # Punctuation

867 (r'[(),.;]', Punctuation),

868 ],

869

870 # Multiline comment substates

871 # ---------------------------

872

873 'optimizer-hints': [

874 (r'[^*a-z]+', Comment.Special),

875 (r'\*/', Comment.Special, '#pop'),

876 (words(MYSQL_OPTIMIZER_HINTS, suffix=r'\b'), Comment.Preproc),

877 ('[a-z]+', Comment.Special),

878 (r'\*', Comment.Special),

879 ],

880

881 'multiline-comment': [

882 (r'[^*]+', Comment.Multiline),

883 (r'\*/', Comment.Multiline, '#pop'),

884 (r'\*', Comment.Multiline),

885 ],

886

887 # String substates

888 # ----------------

889

890 'single-quoted-string': [

891 (r"[^'\\]+", String.Single),

892 (r"''", String.Escape),

893 (r"""\\[0'"bnrtZ\\%_]""", String.Escape),

894 (r"'", String.Single, '#pop'),

895 ],

896

897 'double-quoted-string': [

898 (r'[^"\\]+', String.Double),

899 (r'""', String.Escape),

900 (r"""\\[0'"bnrtZ\\%_]""", String.Escape),

901 (r'"', String.Double, '#pop'),

902 ],

903

904 # Variable substates

905 # ------------------

906

907 'single-quoted-variable': [

908 (r"[^']+", Name.Variable),

909 (r"''", Name.Variable),

910 (r"'", Name.Variable, '#pop'),

911 ],

912

913 'double-quoted-variable': [

914 (r'[^"]+', Name.Variable),

915 (r'""', Name.Variable),

916 (r'"', Name.Variable, '#pop'),

917 ],

918

919 'backtick-quoted-variable': [

920 (r'[^`]+', Name.Variable),

921 (r'``', Name.Variable),

922 (r'`', Name.Variable, '#pop'),

923 ],

924

925 # Schema object name substates

926 # ----------------------------

927 #

928 # "Name.Quoted" and "Name.Quoted.Escape" are non-standard but

929 # formatters will style them as "Name" by default but add

930 # additional styles based on the token name. This gives users

931 # flexibility to add custom styles as desired.

932 #

933 'schema-object-name': [

934 (r'[^`]+', Name.Quoted),

935 (r'``', Name.Quoted.Escape),

936 (r'`', Name.Quoted, '#pop'),

937 ],

938 }

939

940 def analyse_text(text):

941 rating = 0

942 name_between_backtick_count = len(

943 name_between_backtick_re.findall(text))

944 name_between_bracket_count = len(

945 name_between_bracket_re.findall(text))

946 # Same logic as above in the TSQL analysis

947 dialect_name_count = name_between_backtick_count + name_between_bracket_count

948 if dialect_name_count >= 1 and \

949 name_between_backtick_count >= 2 * name_between_bracket_count:

950 # Found at least twice as many `name` as [name].

951 rating += 0.5

952 elif name_between_backtick_count > name_between_bracket_count:

953 rating += 0.2

954 elif name_between_backtick_count > 0:

955 rating += 0.1

956 return rating

957

958

959class SqliteConsoleLexer(Lexer):

960 """

961 Lexer for example sessions using sqlite3.

962

963 .. versionadded:: 0.11

964 """

965

966 name = 'sqlite3con'

967 aliases = ['sqlite3']

968 filenames = ['*.sqlite3-console']

969 mimetypes = ['text/x-sqlite3-console']

970

971 def get_tokens_unprocessed(self, data):

972 sql = SqlLexer(**self.options)

973

974 curcode = ''

975 insertions = []

976 for match in line_re.finditer(data):

977 line = match.group()

978 prompt_match = sqlite_prompt_re.match(line)

979 if prompt_match is not None:

980 insertions.append((len(curcode),

981 [(0, Generic.Prompt, line[:7])]))

982 insertions.append((len(curcode),

983 [(7, Whitespace, ' ')]))

984 curcode += line[8:]

985 else:

986 if curcode:

987 yield from do_insertions(insertions,

988 sql.get_tokens_unprocessed(curcode))

989 curcode = ''

990 insertions = []

991 if line.startswith('SQL error: '):

992 yield (match.start(), Generic.Traceback, line)

993 else:

994 yield (match.start(), Generic.Output, line)

995 if curcode:

996 yield from do_insertions(insertions,

997 sql.get_tokens_unprocessed(curcode))

998

999

1000class RqlLexer(RegexLexer):

1001 """

1002 Lexer for Relation Query Language.

1003

1004 .. versionadded:: 2.0

1005 """

1006 name = 'RQL'

1007 url = 'http://www.logilab.org/project/rql'

1008 aliases = ['rql']

1009 filenames = ['*.rql']

1010 mimetypes = ['text/x-rql']

1011

1012 flags = re.IGNORECASE

1013 tokens = {

1014 'root': [

1015 (r'\s+', Whitespace),

1019 (r'[+*/<>=%-]', Operator),

1020 (r'(Any|is|instance_of|CWEType|CWRelation)\b', Name.Builtin),

1021 (r'[0-9]+', Number.Integer),

1022 (r'[A-Z_]\w*\??', Name),

1023 (r"'(''|[^'])*'", String.Single),

1024 (r'"(""|[^"])*"', String.Single),

1025 (r'[;:()\[\],.]', Punctuation)

1026 ],

1027 }