Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/sql.py: 62%

1"""

2 pygments.lexers.sql

3 ~~~~~~~~~~~~~~~~~~~

5 Lexers for various SQL dialects and related interactive sessions.

7 Postgres specific lexers:

9 `PostgresLexer`

10 A SQL lexer for the PostgreSQL dialect. Differences w.r.t. the SQL

11 lexer are:

13 - keywords and data types list parsed from the PG docs (run the

14 `_postgres_builtins` module to update them);

15 - Content of $-strings parsed using a specific lexer, e.g. the content

16 of a PL/Python function is parsed using the Python lexer;

17 - parse PG specific constructs: E-strings, $-strings, U&-strings,

18 different operators and punctuation.

20 `PlPgsqlLexer`

21 A lexer for the PL/pgSQL language. Adds a few specific construct on

22 top of the PG SQL lexer (such as <<label>>).

24 `PostgresConsoleLexer`

25 A lexer to highlight an interactive psql session:

27 - identifies the prompt and does its best to detect the end of command

28 in multiline statement where not all the lines are prefixed by a

29 prompt, telling them apart from the output;

30 - highlights errors in the output and notification levels;

31 - handles psql backslash commands.

33 `PostgresExplainLexer`

34 A lexer to highlight Postgres execution plan.

36 The ``tests/examplefiles`` contains a few test files with data to be

37 parsed by these lexers.

40 :license: BSD, see LICENSE for details.

41"""

43import collections

44import re

46from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words

47from pygments.lexers import _googlesql_builtins

48from pygments.lexers import _mysql_builtins

49from pygments.lexers import _postgres_builtins

50from pygments.lexers import _sql_builtins

51from pygments.lexers import _tsql_builtins

52from pygments.lexers import get_lexer_by_name, ClassNotFound

53from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \

54 Keyword, Name, String, Number, Generic, Literal

57__all__ = ['GoogleSqlLexer', 'PostgresLexer', 'PlPgsqlLexer',

58 'PostgresConsoleLexer', 'PostgresExplainLexer', 'SqlLexer',

59 'TransactSqlLexer', 'MySqlLexer', 'SqliteConsoleLexer', 'RqlLexer']

61line_re = re.compile('.*?\n')

62sqlite_prompt_re = re.compile(r'^(?:sqlite| ...)>(?= )')

64language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE)

66do_re = re.compile(r'\bDO\b', re.IGNORECASE)

68# Regular expressions for analyse_text()

69name_between_bracket_re = re.compile(r'\[[a-zA-Z_]\w*\]')

70name_between_backtick_re = re.compile(r'`[a-zA-Z_]\w*`')

71tsql_go_re = re.compile(r'\bgo\b', re.IGNORECASE)

72tsql_declare_re = re.compile(r'\bdeclare\s+@', re.IGNORECASE)

73tsql_variable_re = re.compile(r'@[a-zA-Z_]\w*\b')

75# Identifiers for analyse_text()

76googlesql_identifiers = (

77 _googlesql_builtins.functionnames

78 + _googlesql_builtins.keywords

79 + _googlesql_builtins.types)

82def language_callback(lexer, match):

83 """Parse the content of a $-string using a lexer

85 The lexer is chosen looking for a nearby LANGUAGE or assumed as

86 plpgsql if inside a DO statement and no LANGUAGE has been found.

87 """

88 lx = None

89 m = language_re.match(lexer.text[match.end():match.end()+100])

90 if m is not None:

91 lx = lexer._get_lexer(m.group(1))

92 else:

93 m = list(language_re.finditer(

94 lexer.text[max(0, match.start()-100):match.start()]))

95 if m:

96 lx = lexer._get_lexer(m[-1].group(1))

97 else:

98 m = list(do_re.finditer(

99 lexer.text[max(0, match.start()-25):match.start()]))

100 if m:

101 lx = lexer._get_lexer('plpgsql')

102

103 # 1 = $, 2 = delimiter, 3 = $

104 yield (match.start(1), String, match.group(1))

105 yield (match.start(2), String.Delimiter, match.group(2))

106 yield (match.start(3), String, match.group(3))

107 # 4 = string contents

108 if lx:

109 yield from lx.get_tokens_unprocessed(match.group(4))

110 else:

111 yield (match.start(4), String, match.group(4))

112 # 5 = $, 6 = delimiter, 7 = $

113 yield (match.start(5), String, match.group(5))

114 yield (match.start(6), String.Delimiter, match.group(6))

115 yield (match.start(7), String, match.group(7))

116

117

118class PostgresBase:

119 """Base class for Postgres-related lexers.

120

121 This is implemented as a mixin to avoid the Lexer metaclass kicking in.

122 this way the different lexer don't have a common Lexer ancestor. If they

123 had, _tokens could be created on this ancestor and not updated for the

124 other classes, resulting e.g. in PL/pgSQL parsed as SQL. This shortcoming

125 seem to suggest that regexp lexers are not really subclassable.

126 """

127 def get_tokens_unprocessed(self, text, *args):

128 # Have a copy of the entire text to be used by `language_callback`.

129 self.text = text

130 yield from super().get_tokens_unprocessed(text, *args)

131

132 def _get_lexer(self, lang):

133 if lang.lower() == 'sql':

134 return get_lexer_by_name('postgresql', **self.options)

135

136 tries = [lang]

137 if lang.startswith('pl'):

138 tries.append(lang[2:])

139 if lang.endswith('u'):

140 tries.append(lang[:-1])

141 if lang.startswith('pl') and lang.endswith('u'):

142 tries.append(lang[2:-1])

143

144 for lx in tries:

145 try:

146 return get_lexer_by_name(lx, **self.options)

147 except ClassNotFound:

148 pass

149 else:

150 # TODO: better logging

151 # print >>sys.stderr, "language not found:", lang

152 return None

153

154

155class PostgresLexer(PostgresBase, RegexLexer):

156 """

157 Lexer for the PostgreSQL dialect of SQL.

158 """

159

160 name = 'PostgreSQL SQL dialect'

161 aliases = ['postgresql', 'postgres']

162 mimetypes = ['text/x-postgresql']

163 url = 'https://www.postgresql.org'

164 version_added = '1.5'

165

166 flags = re.IGNORECASE

167 tokens = {

168 'root': [

169 (r'\s+', Whitespace),

170 (r'--.*\n?', Comment.Single),

171 (r'/\*', Comment.Multiline, 'multiline-comments'),

172 (r'(' + '|'.join(s.replace(" ", r"\s+")

173 for s in _postgres_builtins.DATATYPES +

174 _postgres_builtins.PSEUDO_TYPES) + r')\b',

175 Name.Builtin),

176 (words(_postgres_builtins.KEYWORDS, suffix=r'\b'), Keyword),

177 (r'[+*/<>=~!@#%^&|`?-]+', Operator),

178 (r'::', Operator), # cast

179 (r'\$\d+', Name.Variable),

180 (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float),

181 (r'[0-9]+', Number.Integer),

182 (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'),

183 # quoted identifier

184 (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'),

185 (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback),

186 (r'[a-z_]\w*', Name),

187

188 # psql variable in SQL

189 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),

190

191 (r'[;:()\[\]{},.]', Punctuation),

192 ],

193 'multiline-comments': [

194 (r'/\*', Comment.Multiline, 'multiline-comments'),

195 (r'\*/', Comment.Multiline, '#pop'),

196 (r'[^/*]+', Comment.Multiline),

197 (r'[/*]', Comment.Multiline)

198 ],

199 'string': [

200 (r"[^']+", String.Single),

201 (r"''", String.Single),

202 (r"'", String.Single, '#pop'),

203 ],

204 'quoted-ident': [

205 (r'[^"]+', String.Name),

206 (r'""', String.Name),

207 (r'"', String.Name, '#pop'),

208 ],

209 }

210

211

212class PlPgsqlLexer(PostgresBase, RegexLexer):

213 """

214 Handle the extra syntax in Pl/pgSQL language.

215 """

216 name = 'PL/pgSQL'

217 aliases = ['plpgsql']

218 mimetypes = ['text/x-plpgsql']

219 url = 'https://www.postgresql.org/docs/current/plpgsql.html'

220 version_added = '1.5'

221

222 flags = re.IGNORECASE

223 # FIXME: use inheritance

224 tokens = {name: state[:] for (name, state) in PostgresLexer.tokens.items()}

225

226 # extend the keywords list

227 for i, pattern in enumerate(tokens['root']):

228 if pattern[1] == Keyword:

229 tokens['root'][i] = (

230 words(_postgres_builtins.KEYWORDS +

231 _postgres_builtins.PLPGSQL_KEYWORDS, suffix=r'\b'),

232 Keyword)

233 del i

234 break

235 else:

236 assert 0, "SQL keywords not found"

237

238 # Add specific PL/pgSQL rules (before the SQL ones)

239 tokens['root'][:0] = [

240 (r'\%[a-z]\w*\b', Name.Builtin), # actually, a datatype

241 (r':=', Operator),

242 (r'\<\<[a-z]\w*\>\>', Name.Label),

243 (r'\#[a-z]\w*\b', Keyword.Pseudo), # #variable_conflict

244 ]

245

246

247class PsqlRegexLexer(PostgresBase, RegexLexer):

248 """

249 Extend the PostgresLexer adding support specific for psql commands.

250

251 This is not a complete psql lexer yet as it lacks prompt support

252 and output rendering.

253 """

254

255 name = 'PostgreSQL console - regexp based lexer'

256 aliases = [] # not public

257

258 flags = re.IGNORECASE

259 tokens = {name: state[:] for (name, state) in PostgresLexer.tokens.items()}

260

261 tokens['root'].append(

262 (r'\\[^\s]+', Keyword.Pseudo, 'psql-command'))

263 tokens['psql-command'] = [

264 (r'\n', Text, 'root'),

265 (r'\s+', Whitespace),

266 (r'\\[^\s]+', Keyword.Pseudo),

267 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),

268 (r"'(''|[^'])*'", String.Single),

269 (r"`([^`])*`", String.Backtick),

270 (r"[^\s]+", String.Symbol),

271 ]

272

273

274re_prompt = re.compile(r'^(\S.*?)??[=\-\(\$\'\"][#>]')

275re_psql_command = re.compile(r'\s*\\')

276re_end_command = re.compile(r';\s*(--.*?)?$')

277re_psql_command = re.compile(r'(\s*)(\\.+?)(\s+)$')

278re_error = re.compile(r'(ERROR|FATAL):')

279re_message = re.compile(

281 r'FATAL|HINT|DETAIL|CONTEXT|LINE [0-9]+):)(.*?\n)')

282

283

284class lookahead:

285 """Wrap an iterator and allow pushing back an item."""

286 def __init__(self, x):

287 self.iter = iter(x)

288 self._nextitem = None

289

290 def __iter__(self):

291 return self

292

293 def send(self, i):

294 self._nextitem = i

295 return i

296

297 def __next__(self):

298 if self._nextitem is not None:

299 ni = self._nextitem

300 self._nextitem = None

301 return ni

302 return next(self.iter)

303 next = __next__

304

305

306class PostgresConsoleLexer(Lexer):

307 """

308 Lexer for psql sessions.

309 """

310

311 name = 'PostgreSQL console (psql)'

312 aliases = ['psql', 'postgresql-console', 'postgres-console']

313 mimetypes = ['text/x-postgresql-psql']

314 url = 'https://www.postgresql.org'

315 version_added = '1.5'

316 _example = "psql/psql_session.txt"

317

318 def get_tokens_unprocessed(self, data):

319 sql = PsqlRegexLexer(**self.options)

320

321 lines = lookahead(line_re.findall(data))

322

323 # prompt-output cycle

324 while 1:

325

326 # consume the lines of the command: start with an optional prompt

327 # and continue until the end of command is detected

328 curcode = ''

329 insertions = []

330 for line in lines:

331 # Identify a shell prompt in case of psql commandline example

332 if line.startswith('$') and not curcode:

333 lexer = get_lexer_by_name('console', **self.options)

334 yield from lexer.get_tokens_unprocessed(line)

335 break

336

337 # Identify a psql prompt

338 mprompt = re_prompt.match(line)

339 if mprompt is not None:

340 insertions.append((len(curcode),

341 [(0, Generic.Prompt, mprompt.group())]))

342 curcode += line[len(mprompt.group()):]

343 else:

344 curcode += line

345

346 # Check if this is the end of the command

347 # TODO: better handle multiline comments at the end with

348 # a lexer with an external state?

349 if re_psql_command.match(curcode) \

350 or re_end_command.search(curcode):

351 break

352

353 # Emit the combined stream of command and prompt(s)

354 yield from do_insertions(insertions,

355 sql.get_tokens_unprocessed(curcode))

356

357 # Emit the output lines

358 out_token = Generic.Output

359 for line in lines:

360 mprompt = re_prompt.match(line)

361 if mprompt is not None:

362 # push the line back to have it processed by the prompt

363 lines.send(line)

364 break

365

366 mmsg = re_message.match(line)

367 if mmsg is not None:

368 if mmsg.group(1).startswith("ERROR") \

369 or mmsg.group(1).startswith("FATAL"):

370 out_token = Generic.Error

371 yield (mmsg.start(1), Generic.Strong, mmsg.group(1))

372 yield (mmsg.start(2), out_token, mmsg.group(2))

373 else:

374 yield (0, out_token, line)

375 else:

376 return

377

378

379class PostgresExplainLexer(RegexLexer):

380 """

381 Handle PostgreSQL EXPLAIN output

382 """

383

384 name = 'PostgreSQL EXPLAIN dialect'

385 aliases = ['postgres-explain']

386 filenames = ['*.explain']

387 mimetypes = ['text/x-postgresql-explain']

388 url = 'https://www.postgresql.org/docs/current/using-explain.html'

389 version_added = '2.15'

390

391 tokens = {

392 'root': [

393 (r'(:|$|$|ms|kB|->|\.\.|\,|\/|=|%)', Punctuation),

394 (r'(\s+)', Whitespace),

395

396 # This match estimated cost and effectively measured counters with ANALYZE

397 # Then, we move to instrumentation state

398 (r'(cost)(=?)', bygroups(Name.Class, Punctuation), 'instrumentation'),

399 (r'(actual)( )(=?)', bygroups(Name.Class, Whitespace, Punctuation), 'instrumentation'),

400

401 # Misc keywords

402 (words(('actual', 'Memory Usage', 'Disk Usage', 'Memory', 'Buckets',

403 'Batches', 'originally', 'row', 'rows', 'Hits', 'Misses',

404 'Evictions', 'Overflows', 'Planned Partitions', 'Estimates',

405 'capacity', 'distinct keys', 'lookups', 'hit percent',

406 'Index Searches', 'Storage', 'Disk Maximum Storage'), suffix=r'\b'),

407 Comment.Single),

408

410 (r'(shared|temp|local)', Keyword.Pseudo),

411

412 # We move to sort state in order to emphasize specific keywords (especially disk access)

413 (r'(Sort Method)(: )', bygroups(Comment.Preproc, Punctuation), 'sort'),

414

415 # These keywords can be followed by an object, like a table

416 (r'(Sort Key|Group Key|Presorted Key|Hash Key)(:)( )',

417 bygroups(Comment.Preproc, Punctuation, Whitespace), 'object_name'),

418 (r'(Cache Key|Cache Mode)(:)( )', bygroups(Comment, Punctuation, Whitespace), 'object_name'),

419

420 # These keywords can be followed by a predicate

421 (words(('Join Filter', 'Subplans Removed', 'Filter', 'Merge Cond',

422 'Hash Cond', 'Index Cond', 'Recheck Cond', 'Heap Blocks',

423 'TID Cond', 'Run Condition', 'Order By', 'Function Call',

424 'Table Function Call', 'Inner Unique', 'Params Evaluated',

425 'Single Copy', 'Sampling', 'One-Time Filter', 'Output',

426 'Relations', 'Remote SQL', 'Disabled'), suffix=r'\b'),

427 Comment.Preproc, 'predicate'),

428

429 # Special keyword to handle ON CONFLICT

430 (r'Conflict ', Comment.Preproc, 'conflict'),

431

432 # Special keyword for InitPlan or SubPlan

433 (r'(InitPlan|SubPlan)( )(\d+)( )',

434 bygroups(Keyword, Whitespace, Number.Integer, Whitespace),

435 'init_plan'),

436

437 (words(('Sort Method', 'Join Filter', 'Planning time',

438 'Planning Time', 'Execution time', 'Execution Time',

439 'Workers Planned', 'Workers Launched', 'Buffers',

440 'Planning', 'Worker', 'Query Identifier', 'Time',

441 'Full-sort Groups', 'Pre-sorted Groups'), suffix=r'\b'), Comment.Preproc),

442

443 # Emphasize these keywords

444

445 (words(('Rows Removed by Join Filter', 'Rows Removed by Filter',

446 'Rows Removed by Index Recheck',

447 'Heap Fetches', 'never executed'),

448 suffix=r'\b'), Name.Exception),

449 (r'(I/O Timings)(:)( )', bygroups(Name.Exception, Punctuation, Whitespace)),

450

451 (words(_postgres_builtins.EXPLAIN_KEYWORDS, suffix=r'\b'), Keyword),

452

453 # join keywords

454 (r'((Right|Left|Full|Semi|Anti) Join)', Keyword.Type),

455 (r'(Parallel |Async |Finalize |Partial )', Comment.Preproc),

456 (r'Backward', Comment.Preproc),

457 (r'(Intersect|Except|Hash)', Comment.Preproc),

458

459 (r'(CTE)( )(\w*)?', bygroups(Comment, Whitespace, Name.Variable)),

460

461

462 # Treat "on" and "using" as a punctuation

463 (r'(on|using)', Punctuation, 'object_name'),

464

465

466 # strings

467 (r"'(''|[^'])*'", String.Single),

468 # numbers

469 (r'-?\d+\.\d+', Number.Float),

470 (r'(-?\d+)', Number.Integer),

471

472 # boolean

473 (r'(true|false)', Name.Constant),

474 # explain header

475 (r'\s*QUERY PLAN\s*\n\s*-+', Comment.Single),

476 # Settings

477 (r'(Settings)(:)( )', bygroups(Comment.Preproc, Punctuation, Whitespace), 'setting'),

478

479 # Handle JIT counters

480 (r'(JIT|Functions|Options|Timing)(:)', bygroups(Comment.Preproc, Punctuation)),

482

483 # Handle Triggers counters

484 (r'(Trigger)( )(\S*)(:)( )',

485 bygroups(Comment.Preproc, Whitespace, Name.Variable, Punctuation, Whitespace)),

486

487 ],

488 'expression': [

489 # matches any kind of parenthesized expression

490 # the first opening paren is matched by the 'caller'

491 (r'\(', Punctuation, '#push'),

492 (r'\)', Punctuation, '#pop'),

493 (r'(never executed)', Name.Exception),

494 (r'[^)(]+', Comment),

495 ],

496 'object_name': [

497

498 # This is a cost or analyze measure

499 (r'(\(cost)(=?)', bygroups(Name.Class, Punctuation), 'instrumentation'),

500 (r'(\(actual)( )(=?)', bygroups(Name.Class, Whitespace, Punctuation), 'instrumentation'),

501

502 # if object_name is parenthesized, mark opening paren as

503 # punctuation, call 'expression', and exit state

504 (r'\(', Punctuation, 'expression'),

505 (r'(on)', Punctuation),

506 # matches possibly schema-qualified table and column names

507 (r'\w+(\.\w+)*( USING \S+| \w+ USING \S+)', Name.Variable),

508 (r'\"?\w+\"?(?:\.\"?\w+\"?)?', Name.Variable),

509 (r'\'\S*\'', Name.Variable),

510

511 # if we encounter a comma, another object is listed

512 (r',\n', Punctuation, 'object_name'),

513 (r',', Punctuation, 'object_name'),

514

515 # special case: "*SELECT*"

516 (r'"\*SELECT\*( \d+)?"(.\w+)?', Name.Variable),

517 (r'"\*VALUES\*(_\d+)?"(.\w+)?', Name.Variable),

518 (r'"ANY_subquery"', Name.Variable),

519

520 # Variable $1 ...

521 (r'\$\d+', Name.Variable),

522 # cast

523 (r'::\w+', Name.Variable),

524 (r' +', Whitespace),

525 (r'"', Punctuation),

526 (r'\[\.\.\.\]', Punctuation),

527 (r'\)', Punctuation, '#pop'),

528 ],

529 'predicate': [

530 # if predicate is parenthesized, mark paren as punctuation

531 (r'($)([^\n]*)($)', bygroups(Punctuation, Name.Variable, Punctuation), '#pop'),

532 # otherwise color until newline

533 (r'[^\n]*', Name.Variable, '#pop'),

534 ],

535 'instrumentation': [

536 (r'=|\.\.', Punctuation),

537 (r' +', Whitespace),

538 (r'(rows|width|time|loops)', Name.Class),

539 (r'\d+\.\d+', Number.Float),

540 (r'(\d+)', Number.Integer),

541 (r'\)', Punctuation, '#pop'),

542 ],

543 'conflict': [

544 (r'(Resolution: )(\w+)', bygroups(Comment.Preproc, Name.Variable)),

545 (r'(Arbiter \w+:)', Comment.Preproc, 'object_name'),

546 (r'(Filter: )', Comment.Preproc, 'predicate'),

547 ],

548 'setting': [

549 (r'([a-z_]*?)(\s*)(=)(\s*)(\'.*?\')', bygroups(Name.Attribute, Whitespace, Operator, Whitespace, String)),

550 (r'\, ', Punctuation),

551 ],

552 'init_plan': [

553 (r'\(', Punctuation),

554 (r'returns \$\d+(,\$\d+)?', Name.Variable),

555 (r'\)', Punctuation, '#pop'),

556 ],

557 'sort': [

558 (r':|kB', Punctuation),

560 (r'(external|merge|Disk|sort)', Name.Exception),

561 (r'(\d+)', Number.Integer),

562 (r' +', Whitespace),

563 ],

564 }

565

566

567class SqlLexer(RegexLexer):

568 """

569 Lexer for Structured Query Language. Currently, this lexer does

570 not recognize any special syntax except ANSI SQL.

571 """

572

573 name = 'SQL'

574 aliases = ['sql']

575 filenames = ['*.sql']

576 mimetypes = ['text/x-sql']

577 url = 'https://en.wikipedia.org/wiki/SQL'

578 version_added = ''

579

580 flags = re.IGNORECASE

581 tokens = {

582 'root': [

583 (r'\s+', Whitespace),

584 (r'--.*\n?', Comment.Single),

585 (r'/\*', Comment.Multiline, 'multiline-comments'),

586 (words(_sql_builtins.KEYWORDS, suffix=r'\b'), Keyword),

587 (words(_sql_builtins.DATATYPES, suffix=r'\b'), Name.Builtin),

588 (r'[+*/<>=~!@#%^&|`?-]', Operator),

589 (r'[0-9]+', Number.Integer),

590 # TODO: Backslash escapes?

591 (r"'(''|[^'])*'", String.Single),

592 (r'"(""|[^"])*"', String.Symbol), # not a real string literal in ANSI SQL

593 (r'[a-z_][\w$]*', Name), # allow $s in strings for Oracle

594 (r'[;:()\[\],.]', Punctuation)

595 ],

596 'multiline-comments': [

597 (r'/\*', Comment.Multiline, 'multiline-comments'),

598 (r'\*/', Comment.Multiline, '#pop'),

599 (r'[^/*]+', Comment.Multiline),

600 (r'[/*]', Comment.Multiline)

601 ]

602 }

603

604 def analyse_text(self, text):

605 return

606

607

608class TransactSqlLexer(RegexLexer):

609 """

610 Transact-SQL (T-SQL) is Microsoft's and Sybase's proprietary extension to

611 SQL.

612

613 The list of keywords includes ODBC and keywords reserved for future use.

614 """

615

616 name = 'Transact-SQL'

617 aliases = ['tsql', 't-sql']

618 filenames = ['*.sql']

619 mimetypes = ['text/x-tsql']

620 url = 'https://www.tsql.info'

621 version_added = ''

622

623 flags = re.IGNORECASE

624

625 tokens = {

626 'root': [

627 (r'\s+', Whitespace),

628 (r'--.*[$|\n]?', Comment.Single),

629 (r'/\*', Comment.Multiline, 'multiline-comments'),

630 (words(_tsql_builtins.OPERATORS), Operator),

631 (words(_tsql_builtins.OPERATOR_WORDS, suffix=r'\b'), Operator.Word),

632 (words(_tsql_builtins.TYPES, suffix=r'\b'), Name.Class),

633 (words(_tsql_builtins.FUNCTIONS, suffix=r'\b'), Name.Function),

634 (r'(goto)(\s+)(\w+\b)', bygroups(Keyword, Whitespace, Name.Label)),

635 (words(_tsql_builtins.KEYWORDS, suffix=r'\b'), Keyword),

636 (r'(\[)([^]]+)(\])', bygroups(Operator, Name, Operator)),

637 (r'0x[0-9a-f]+', Number.Hex),

638 # Float variant 1, for example: 1., 1.e2, 1.2e3

639 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float),

640 # Float variant 2, for example: .1, .1e2

641 (r'\.[0-9]+(e[+-]?[0-9]+)?', Number.Float),

642 # Float variant 3, for example: 123e45

643 (r'[0-9]+e[+-]?[0-9]+', Number.Float),

644 (r'[0-9]+', Number.Integer),

645 (r"'(''|[^'])*'", String.Single),

646 (r'"(""|[^"])*"', String.Symbol),

647 (r'[;(),.]', Punctuation),

648 # Below we use \w even for the first "real" character because

649 # tokens starting with a digit have already been recognized

650 # as Number above.

651 (r'@@\w+', Name.Builtin),

652 (r'@\w+', Name.Variable),

653 (r'(\w+)(:)', bygroups(Name.Label, Punctuation)),

654 (r'#?#?\w+', Name), # names for temp tables and anything else

655 (r'\?', Name.Variable.Magic), # parameter for prepared statements

656 ],

657 'multiline-comments': [

658 (r'/\*', Comment.Multiline, 'multiline-comments'),

659 (r'\*/', Comment.Multiline, '#pop'),

660 (r'[^/*]+', Comment.Multiline),

661 (r'[/*]', Comment.Multiline)

662 ]

663 }

664

665 def analyse_text(text):

666 rating = 0

667 if tsql_declare_re.search(text):

668 # Found T-SQL variable declaration.

669 rating = 1.0

670 else:

671 name_between_backtick_count = len(

672 name_between_backtick_re.findall(text))

673 name_between_bracket_count = len(

674 name_between_bracket_re.findall(text))

675 # We need to check if there are any names using

676 # backticks or brackets, as otherwise both are 0

677 # and 0 >= 2 * 0, so we would always assume it's true

678 dialect_name_count = name_between_backtick_count + name_between_bracket_count

679 if dialect_name_count >= 1 and \

680 name_between_bracket_count >= 2 * name_between_backtick_count:

681 # Found at least twice as many [name] as `name`.

682 rating += 0.5

683 elif name_between_bracket_count > name_between_backtick_count:

684 rating += 0.2

685 elif name_between_bracket_count > 0:

686 rating += 0.1

687 if tsql_variable_re.search(text) is not None:

688 rating += 0.1

689 if tsql_go_re.search(text) is not None:

690 rating += 0.1

691 return rating

692

693

694class MySqlLexer(RegexLexer):

695 """The Oracle MySQL lexer.

696

697 This lexer does not attempt to maintain strict compatibility with

698 MariaDB syntax or keywords. Although MySQL and MariaDB's common code

699 history suggests there may be significant overlap between the two,

700 compatibility between the two is not a target for this lexer.

701 """

702

703 name = 'MySQL'

704 aliases = ['mysql']

705 mimetypes = ['text/x-mysql']

706 url = 'https://www.mysql.com'

707 version_added = ''

708

709 flags = re.IGNORECASE

710 tokens = {

711 'root': [

712 (r'\s+', Whitespace),

713

714 # Comments

715 (r'(?:#|--\s+).*', Comment.Single),

716 (r'/\*\+', Comment.Special, 'optimizer-hints'),

717 (r'/\*', Comment.Multiline, 'multiline-comment'),

718

719 # Hexadecimal literals

720 (r"x'([0-9a-f]{2})+'", Number.Hex), # MySQL requires paired hex characters in this form.

721 (r'0x[0-9a-f]+', Number.Hex),

722

723 # Binary literals

724 (r"b'[01]+'", Number.Bin),

725 (r'0b[01]+', Number.Bin),

726

727 # Numeric literals

728 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent

729 (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent

730 (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats

731 (r'[0-9]+(?=[^0-9a-z$_\u0080-\uffff])', Number.Integer), # Integers that are not in a schema object name

732

733 # Date literals

734 (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}",

735 Literal.Date),

736

737 # Time literals

738 (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}",

739 Literal.Date),

740

741 # Timestamp literals

742 (

743 r"\{\s*ts\s*(?P<quote>['\"])\s*"

744 r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}" # Date part

745 r"\s+" # Whitespace between date and time

746 r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?" # Time part

747 r"\s*(?P=quote)\s*\}",

748 Literal.Date

749 ),

750

751 # String literals

752 (r"'", String.Single, 'single-quoted-string'),

753 (r'"', String.Double, 'double-quoted-string'),

754

755 # Variables

756 (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable),

757 (r'@[a-z0-9_$.]+', Name.Variable),

758 (r"@'", Name.Variable, 'single-quoted-variable'),

759 (r'@"', Name.Variable, 'double-quoted-variable'),

760 (r"@`", Name.Variable, 'backtick-quoted-variable'),

761 (r'\?', Name.Variable), # For demonstrating prepared statements

762

763 # Operators

764 (r'[!%&*+/:<=>^|~-]+', Operator),

765

766 # Exceptions; these words tokenize differently in different contexts.

767 (r'\b(set)(?!\s*\()', Keyword),

768 (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Whitespace, Keyword)),

769 # In all other known cases, "SET" is tokenized by MYSQL_DATATYPES.

770

771 (words(_mysql_builtins.MYSQL_CONSTANTS, prefix=r'\b', suffix=r'\b'),

772 Name.Constant),

773 (words(_mysql_builtins.MYSQL_DATATYPES, prefix=r'\b', suffix=r'\b'),

774 Keyword.Type),

775 (words(_mysql_builtins.MYSQL_KEYWORDS, prefix=r'\b', suffix=r'\b'),

776 Keyword),

777 (words(_mysql_builtins.MYSQL_FUNCTIONS, prefix=r'\b', suffix=r'\b(\s*)(\()'),

778 bygroups(Name.Function, Whitespace, Punctuation)),

779

780 # Schema object names

781 #

782 # Note: Although the first regex supports unquoted all-numeric

783 # identifiers, this will not be a problem in practice because

784 # numeric literals have already been handled above.

785 #

786 ('[0-9a-z$_\u0080-\uffff]+', Name),

787 (r'`', Name.Quoted, 'schema-object-name'),

788

789 # Punctuation

790 (r'[(),.;]', Punctuation),

791 ],

792

793 # Multiline comment substates

794 # ---------------------------

795

796 'optimizer-hints': [

797 (r'[^*a-z]+', Comment.Special),

798 (r'\*/', Comment.Special, '#pop'),

799 (words(_mysql_builtins.MYSQL_OPTIMIZER_HINTS, suffix=r'\b'),

800 Comment.Preproc),

801 ('[a-z]+', Comment.Special),

802 (r'\*', Comment.Special),

803 ],

804

805 'multiline-comment': [

806 (r'[^*]+', Comment.Multiline),

807 (r'\*/', Comment.Multiline, '#pop'),

808 (r'\*', Comment.Multiline),

809 ],

810

811 # String substates

812 # ----------------

813

814 'single-quoted-string': [

815 (r"[^'\\]+", String.Single),

816 (r"''", String.Escape),

817 (r"""\\[0'"bnrtZ\\%_]""", String.Escape),

818 (r"'", String.Single, '#pop'),

819 ],

820

821 'double-quoted-string': [

822 (r'[^"\\]+', String.Double),

823 (r'""', String.Escape),

824 (r"""\\[0'"bnrtZ\\%_]""", String.Escape),

825 (r'"', String.Double, '#pop'),

826 ],

827

828 # Variable substates

829 # ------------------

830

831 'single-quoted-variable': [

832 (r"[^']+", Name.Variable),

833 (r"''", Name.Variable),

834 (r"'", Name.Variable, '#pop'),

835 ],

836

837 'double-quoted-variable': [

838 (r'[^"]+', Name.Variable),

839 (r'""', Name.Variable),

840 (r'"', Name.Variable, '#pop'),

841 ],

842

843 'backtick-quoted-variable': [

844 (r'[^`]+', Name.Variable),

845 (r'``', Name.Variable),

846 (r'`', Name.Variable, '#pop'),

847 ],

848

849 # Schema object name substates

850 # ----------------------------

851 #

852 # "Name.Quoted" and "Name.Quoted.Escape" are non-standard but

853 # formatters will style them as "Name" by default but add

854 # additional styles based on the token name. This gives users

855 # flexibility to add custom styles as desired.

856 #

857 'schema-object-name': [

858 (r'[^`]+', Name.Quoted),

859 (r'``', Name.Quoted.Escape),

860 (r'`', Name.Quoted, '#pop'),

861 ],

862 }

863

864 def analyse_text(text):

865 rating = 0

866 name_between_backtick_count = len(

867 name_between_backtick_re.findall(text))

868 name_between_bracket_count = len(

869 name_between_bracket_re.findall(text))

870 # Same logic as above in the TSQL analysis

871 dialect_name_count = name_between_backtick_count + name_between_bracket_count

872 if dialect_name_count >= 1 and \

873 name_between_backtick_count >= 2 * name_between_bracket_count:

874 # Found at least twice as many `name` as [name].

875 rating += 0.5

876 elif name_between_backtick_count > name_between_bracket_count:

877 rating += 0.2

878 elif name_between_backtick_count > 0:

879 rating += 0.1

880 return rating

881

882

883class GoogleSqlLexer(RegexLexer):

884 """

885 GoogleSQL is Google's standard SQL dialect, formerly known as ZetaSQL.

886

887 The list of keywords includes reserved words for future use.

888 """

889

890 name = 'GoogleSQL'

891 aliases = ['googlesql', 'zetasql']

892 filenames = ['*.googlesql', '*.googlesql.sql']

893 mimetypes = ['text/x-google-sql', 'text/x-google-sql-aux']

894 url = 'https://cloud.google.com/bigquery/googlesql'

895 version_added = '2.19'

896

897 flags = re.IGNORECASE

898 tokens = {

899 'root': [

900 (r'\s+', Whitespace),

901

902 # Comments

903 (r'(?:#|--\s+).*', Comment.Single),

904 (r'/\*', Comment.Multiline, 'multiline-comment'),

905

906 # Hexadecimal literals

907 (r"x'([0-9a-f]{2})+'", Number.Hex),

908 (r'0x[0-9a-f]+', Number.Hex),

909

910 # Binary literals

911 (r"b'[01]+'", Number.Bin),

912 (r'0b[01]+', Number.Bin),

913

914 # Numeric literals

915 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent

916 (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent

917 (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats

918 (r'[0-9]+(?=[^0-9a-z$_\u0080-\uffff])', Number.Integer), # Integers that are not in a schema object name

919

920 # Date literals

921 (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}",

922 Literal.Date),

923

924 # Time literals

925 (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}",

926 Literal.Date),

927

928 # Timestamp literals

929 (

930 r"\{\s*ts\s*(?P<quote>['\"])\s*"

931 r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}" # Date part

932 r"\s+" # Whitespace between date and time

933 r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?" # Time part

934 r"\s*(?P=quote)\s*\}",

935 Literal.Date

936 ),

937

938 # String literals

939 (r"'", String.Single, 'single-quoted-string'),

940 (r'"', String.Double, 'double-quoted-string'),

941

942 # Variables

943 (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable),

944 (r'@[a-z0-9_$.]+', Name.Variable),

945 (r"@'", Name.Variable, 'single-quoted-variable'),

946 (r'@"', Name.Variable, 'double-quoted-variable'),

947 (r"@`", Name.Variable, 'backtick-quoted-variable'),

948 (r'\?', Name.Variable), # For demonstrating prepared statements

949

950 # Exceptions; these words tokenize differently in different contexts.

951 (r'\b(set)(?!\s*\()', Keyword),

952 (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Whitespace, Keyword)),

953

954 # Constants, types, keywords, functions, operators

955 (words(_googlesql_builtins.constants, prefix=r'\b', suffix=r'\b'), Name.Constant),

956 (words(_googlesql_builtins.types, prefix=r'\b', suffix=r'\b'), Keyword.Type),

957 (words(_googlesql_builtins.keywords, prefix=r'\b', suffix=r'\b'), Keyword),

958 (words(_googlesql_builtins.functionnames, prefix=r'\b', suffix=r'\b(\s*)(\()'),

959 bygroups(Name.Function, Whitespace, Punctuation)),

960 (words(_googlesql_builtins.operators, prefix=r'\b', suffix=r'\b'), Operator),

961

962 # Schema object names

963 #

964 # Note: Although the first regex supports unquoted all-numeric

965 # identifiers, this will not be a problem in practice because

966 # numeric literals have already been handled above.

967 #

968 ('[0-9a-z$_\u0080-\uffff]+', Name),

969 (r'`', Name.Quoted, 'schema-object-name'),

970

971 # Punctuation

972 (r'[(),.;]', Punctuation),

973 ],

974

975 # Multiline comment substates

976 # ---------------------------

977

978 'multiline-comment': [

979 (r'[^*]+', Comment.Multiline),

980 (r'\*/', Comment.Multiline, '#pop'),

981 (r'\*', Comment.Multiline),

982 ],

983

984 # String substates

985 # ----------------

986

987 'single-quoted-string': [

988 (r"[^'\\]+", String.Single),

989 (r"''", String.Escape),

990 (r"""\\[0'"bnrtZ\\%_]""", String.Escape),

991 (r"'", String.Single, '#pop'),

992 ],

993

994 'double-quoted-string': [

995 (r'[^"\\]+', String.Double),

996 (r'""', String.Escape),

997 (r"""\\[0'"bnrtZ\\%_]""", String.Escape),

998 (r'"', String.Double, '#pop'),

999 ],

1000

1001 # Variable substates

1002 # ------------------

1003

1004 'single-quoted-variable': [

1005 (r"[^']+", Name.Variable),

1006 (r"''", Name.Variable),

1007 (r"'", Name.Variable, '#pop'),

1008 ],

1009

1010 'double-quoted-variable': [

1011 (r'[^"]+', Name.Variable),

1012 (r'""', Name.Variable),

1013 (r'"', Name.Variable, '#pop'),

1014 ],

1015

1016 'backtick-quoted-variable': [

1017 (r'[^`]+', Name.Variable),

1018 (r'``', Name.Variable),

1019 (r'`', Name.Variable, '#pop'),

1020 ],

1021

1022 # Schema object name substates

1023 # ----------------------------

1024 #

1025 # "Name.Quoted" and "Name.Quoted.Escape" are non-standard but

1026 # formatters will style them as "Name" by default but add

1027 # additional styles based on the token name. This gives users

1028 # flexibility to add custom styles as desired.

1029 #

1030 'schema-object-name': [

1031 (r'[^`]+', Name.Quoted),

1032 (r'``', Name.Quoted.Escape),

1033 (r'`', Name.Quoted, '#pop'),

1034 ],

1035 }

1036

1037 def analyse_text(text):

1038 tokens = collections.Counter(text.split())

1039 return 0.001 * sum(count for t, count in tokens.items()

1040 if t in googlesql_identifiers)

1041

1042

1043class SqliteConsoleLexer(Lexer):

1044 """

1045 Lexer for example sessions using sqlite3.

1046 """

1047

1048 name = 'sqlite3con'

1049 aliases = ['sqlite3']

1050 filenames = ['*.sqlite3-console']

1051 mimetypes = ['text/x-sqlite3-console']

1052 url = 'https://www.sqlite.org'

1053 version_added = '0.11'

1054 _example = "sqlite3/sqlite3.sqlite3-console"

1055

1056 def get_tokens_unprocessed(self, data):

1057 sql = SqlLexer(**self.options)

1058

1059 curcode = ''

1060 insertions = []

1061 for match in line_re.finditer(data):

1062 line = match.group()

1063 prompt_match = sqlite_prompt_re.match(line)

1064 if prompt_match is not None:

1065 insertions.append((len(curcode),

1066 [(0, Generic.Prompt, line[:7])]))

1067 insertions.append((len(curcode),

1068 [(7, Whitespace, ' ')]))

1069 curcode += line[8:]

1070 else:

1071 if curcode:

1072 yield from do_insertions(insertions,

1073 sql.get_tokens_unprocessed(curcode))

1074 curcode = ''

1075 insertions = []

1076 if line.startswith('SQL error: '):

1077 yield (match.start(), Generic.Traceback, line)

1078 else:

1079 yield (match.start(), Generic.Output, line)

1080 if curcode:

1081 yield from do_insertions(insertions,

1082 sql.get_tokens_unprocessed(curcode))

1083

1084

1085class RqlLexer(RegexLexer):

1086 """

1087 Lexer for Relation Query Language.

1088 """

1089 name = 'RQL'

1090 url = 'http://www.logilab.org/project/rql'

1091 aliases = ['rql']

1092 filenames = ['*.rql']

1093 mimetypes = ['text/x-rql']

1094 version_added = '2.0'

1095

1096 flags = re.IGNORECASE

1097 tokens = {

1098 'root': [

1099 (r'\s+', Whitespace),

1103 (r'[+*/<>=%-]', Operator),

1104 (r'(Any|is|instance_of|CWEType|CWRelation)\b', Name.Builtin),

1105 (r'[0-9]+', Number.Integer),

1106 (r'[A-Z_]\w*\??', Name),

1107 (r"'(''|[^'])*'", String.Single),

1108 (r'"(""|[^"])*"', String.Single),

1109 (r'[;:()\[\],.]', Punctuation)

1110 ],

1111 }