Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/sql.py: 74%

226 statements  

« prev     ^ index     » next       coverage.py v7.2.2, created at 2023-03-26 07:45 +0000

1""" 

2 pygments.lexers.sql 

3 ~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for various SQL dialects and related interactive sessions. 

6 

7 Postgres specific lexers: 

8 

9 `PostgresLexer` 

10 A SQL lexer for the PostgreSQL dialect. Differences w.r.t. the SQL 

11 lexer are: 

12 

13 - keywords and data types list parsed from the PG docs (run the 

14 `_postgres_builtins` module to update them); 

15 - Content of $-strings parsed using a specific lexer, e.g. the content 

16 of a PL/Python function is parsed using the Python lexer; 

17 - parse PG specific constructs: E-strings, $-strings, U&-strings, 

18 different operators and punctuation. 

19 

20 `PlPgsqlLexer` 

21 A lexer for the PL/pgSQL language. Adds a few specific construct on 

22 top of the PG SQL lexer (such as <<label>>). 

23 

24 `PostgresConsoleLexer` 

25 A lexer to highlight an interactive psql session: 

26 

27 - identifies the prompt and does its best to detect the end of command 

28 in multiline statement where not all the lines are prefixed by a 

29 prompt, telling them apart from the output; 

30 - highlights errors in the output and notification levels; 

31 - handles psql backslash commands. 

32 

33 The ``tests/examplefiles`` contains a few test files with data to be 

34 parsed by these lexers. 

35 

36 :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS. 

37 :license: BSD, see LICENSE for details. 

38""" 

39 

40import re 

41 

42from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words 

43from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \ 

44 Keyword, Name, String, Number, Generic, Literal 

45from pygments.lexers import get_lexer_by_name, ClassNotFound 

46 

47from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \ 

48 PSEUDO_TYPES, PLPGSQL_KEYWORDS 

49from pygments.lexers._mysql_builtins import \ 

50 MYSQL_CONSTANTS, \ 

51 MYSQL_DATATYPES, \ 

52 MYSQL_FUNCTIONS, \ 

53 MYSQL_KEYWORDS, \ 

54 MYSQL_OPTIMIZER_HINTS 

55 

56from pygments.lexers import _tsql_builtins 

57 

58 

59__all__ = ['PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer', 

60 'SqlLexer', 'TransactSqlLexer', 'MySqlLexer', 

61 'SqliteConsoleLexer', 'RqlLexer'] 

62 

63line_re = re.compile('.*?\n') 

64sqlite_prompt_re = re.compile(r'^(?:sqlite| ...)>(?= )') 

65 

66language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE) 

67 

68do_re = re.compile(r'\bDO\b', re.IGNORECASE) 

69 

70# Regular expressions for analyse_text() 

71name_between_bracket_re = re.compile(r'\[[a-zA-Z_]\w*\]') 

72name_between_backtick_re = re.compile(r'`[a-zA-Z_]\w*`') 

73tsql_go_re = re.compile(r'\bgo\b', re.IGNORECASE) 

74tsql_declare_re = re.compile(r'\bdeclare\s+@', re.IGNORECASE) 

75tsql_variable_re = re.compile(r'@[a-zA-Z_]\w*\b') 

76 

77 

78def language_callback(lexer, match): 

79 """Parse the content of a $-string using a lexer 

80 

81 The lexer is chosen looking for a nearby LANGUAGE or assumed as 

82 plpgsql if inside a DO statement and no LANGUAGE has been found. 

83 """ 

84 lx = None 

85 m = language_re.match(lexer.text[match.end():match.end()+100]) 

86 if m is not None: 

87 lx = lexer._get_lexer(m.group(1)) 

88 else: 

89 m = list(language_re.finditer( 

90 lexer.text[max(0, match.start()-100):match.start()])) 

91 if m: 

92 lx = lexer._get_lexer(m[-1].group(1)) 

93 else: 

94 m = list(do_re.finditer( 

95 lexer.text[max(0, match.start()-25):match.start()])) 

96 if m: 

97 lx = lexer._get_lexer('plpgsql') 

98 

99 # 1 = $, 2 = delimiter, 3 = $ 

100 yield (match.start(1), String, match.group(1)) 

101 yield (match.start(2), String.Delimiter, match.group(2)) 

102 yield (match.start(3), String, match.group(3)) 

103 # 4 = string contents 

104 if lx: 

105 yield from lx.get_tokens_unprocessed(match.group(4)) 

106 else: 

107 yield (match.start(4), String, match.group(4)) 

108 # 5 = $, 6 = delimiter, 7 = $ 

109 yield (match.start(5), String, match.group(5)) 

110 yield (match.start(6), String.Delimiter, match.group(6)) 

111 yield (match.start(7), String, match.group(7)) 

112 

113 

114class PostgresBase: 

115 """Base class for Postgres-related lexers. 

116 

117 This is implemented as a mixin to avoid the Lexer metaclass kicking in. 

118 this way the different lexer don't have a common Lexer ancestor. If they 

119 had, _tokens could be created on this ancestor and not updated for the 

120 other classes, resulting e.g. in PL/pgSQL parsed as SQL. This shortcoming 

121 seem to suggest that regexp lexers are not really subclassable. 

122 """ 

123 def get_tokens_unprocessed(self, text, *args): 

124 # Have a copy of the entire text to be used by `language_callback`. 

125 self.text = text 

126 yield from super().get_tokens_unprocessed(text, *args) 

127 

128 def _get_lexer(self, lang): 

129 if lang.lower() == 'sql': 

130 return get_lexer_by_name('postgresql', **self.options) 

131 

132 tries = [lang] 

133 if lang.startswith('pl'): 

134 tries.append(lang[2:]) 

135 if lang.endswith('u'): 

136 tries.append(lang[:-1]) 

137 if lang.startswith('pl') and lang.endswith('u'): 

138 tries.append(lang[2:-1]) 

139 

140 for lx in tries: 

141 try: 

142 return get_lexer_by_name(lx, **self.options) 

143 except ClassNotFound: 

144 pass 

145 else: 

146 # TODO: better logging 

147 # print >>sys.stderr, "language not found:", lang 

148 return None 

149 

150 

151class PostgresLexer(PostgresBase, RegexLexer): 

152 """ 

153 Lexer for the PostgreSQL dialect of SQL. 

154 

155 .. versionadded:: 1.5 

156 """ 

157 

158 name = 'PostgreSQL SQL dialect' 

159 aliases = ['postgresql', 'postgres'] 

160 mimetypes = ['text/x-postgresql'] 

161 

162 flags = re.IGNORECASE 

163 tokens = { 

164 'root': [ 

165 (r'\s+', Whitespace), 

166 (r'--.*\n?', Comment.Single), 

167 (r'/\*', Comment.Multiline, 'multiline-comments'), 

168 (r'(' + '|'.join(s.replace(" ", r"\s+") 

169 for s in DATATYPES + PSEUDO_TYPES) + r')\b', 

170 Name.Builtin), 

171 (words(KEYWORDS, suffix=r'\b'), Keyword), 

172 (r'[+*/<>=~!@#%^&|`?-]+', Operator), 

173 (r'::', Operator), # cast 

174 (r'\$\d+', Name.Variable), 

175 (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float), 

176 (r'[0-9]+', Number.Integer), 

177 (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'), 

178 # quoted identifier 

179 (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'), 

180 (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback), 

181 (r'[a-z_]\w*', Name), 

182 

183 # psql variable in SQL 

184 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable), 

185 

186 (r'[;:()\[\]{},.]', Punctuation), 

187 ], 

188 'multiline-comments': [ 

189 (r'/\*', Comment.Multiline, 'multiline-comments'), 

190 (r'\*/', Comment.Multiline, '#pop'), 

191 (r'[^/*]+', Comment.Multiline), 

192 (r'[/*]', Comment.Multiline) 

193 ], 

194 'string': [ 

195 (r"[^']+", String.Single), 

196 (r"''", String.Single), 

197 (r"'", String.Single, '#pop'), 

198 ], 

199 'quoted-ident': [ 

200 (r'[^"]+', String.Name), 

201 (r'""', String.Name), 

202 (r'"', String.Name, '#pop'), 

203 ], 

204 } 

205 

206 

207class PlPgsqlLexer(PostgresBase, RegexLexer): 

208 """ 

209 Handle the extra syntax in Pl/pgSQL language. 

210 

211 .. versionadded:: 1.5 

212 """ 

213 name = 'PL/pgSQL' 

214 aliases = ['plpgsql'] 

215 mimetypes = ['text/x-plpgsql'] 

216 

217 flags = re.IGNORECASE 

218 tokens = {k: l[:] for (k, l) in PostgresLexer.tokens.items()} 

219 

220 # extend the keywords list 

221 for i, pattern in enumerate(tokens['root']): 

222 if pattern[1] == Keyword: 

223 tokens['root'][i] = ( 

224 words(KEYWORDS + PLPGSQL_KEYWORDS, suffix=r'\b'), 

225 Keyword) 

226 del i 

227 break 

228 else: 

229 assert 0, "SQL keywords not found" 

230 

231 # Add specific PL/pgSQL rules (before the SQL ones) 

232 tokens['root'][:0] = [ 

233 (r'\%[a-z]\w*\b', Name.Builtin), # actually, a datatype 

234 (r':=', Operator), 

235 (r'\<\<[a-z]\w*\>\>', Name.Label), 

236 (r'\#[a-z]\w*\b', Keyword.Pseudo), # #variable_conflict 

237 ] 

238 

239 

240class PsqlRegexLexer(PostgresBase, RegexLexer): 

241 """ 

242 Extend the PostgresLexer adding support specific for psql commands. 

243 

244 This is not a complete psql lexer yet as it lacks prompt support 

245 and output rendering. 

246 """ 

247 

248 name = 'PostgreSQL console - regexp based lexer' 

249 aliases = [] # not public 

250 

251 flags = re.IGNORECASE 

252 tokens = {k: l[:] for (k, l) in PostgresLexer.tokens.items()} 

253 

254 tokens['root'].append( 

255 (r'\\[^\s]+', Keyword.Pseudo, 'psql-command')) 

256 tokens['psql-command'] = [ 

257 (r'\n', Text, 'root'), 

258 (r'\s+', Whitespace), 

259 (r'\\[^\s]+', Keyword.Pseudo), 

260 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable), 

261 (r"'(''|[^'])*'", String.Single), 

262 (r"`([^`])*`", String.Backtick), 

263 (r"[^\s]+", String.Symbol), 

264 ] 

265 

266 

267re_prompt = re.compile(r'^(\S.*?)??[=\-\(\$\'\"][#>]') 

268re_psql_command = re.compile(r'\s*\\') 

269re_end_command = re.compile(r';\s*(--.*?)?$') 

270re_psql_command = re.compile(r'(\s*)(\\.+?)(\s+)$') 

271re_error = re.compile(r'(ERROR|FATAL):') 

272re_message = re.compile( 

273 r'((?:DEBUG|INFO|NOTICE|WARNING|ERROR|' 

274 r'FATAL|HINT|DETAIL|CONTEXT|LINE [0-9]+):)(.*?\n)') 

275 

276 

277class lookahead: 

278 """Wrap an iterator and allow pushing back an item.""" 

279 def __init__(self, x): 

280 self.iter = iter(x) 

281 self._nextitem = None 

282 

283 def __iter__(self): 

284 return self 

285 

286 def send(self, i): 

287 self._nextitem = i 

288 return i 

289 

290 def __next__(self): 

291 if self._nextitem is not None: 

292 ni = self._nextitem 

293 self._nextitem = None 

294 return ni 

295 return next(self.iter) 

296 next = __next__ 

297 

298 

299class PostgresConsoleLexer(Lexer): 

300 """ 

301 Lexer for psql sessions. 

302 

303 .. versionadded:: 1.5 

304 """ 

305 

306 name = 'PostgreSQL console (psql)' 

307 aliases = ['psql', 'postgresql-console', 'postgres-console'] 

308 mimetypes = ['text/x-postgresql-psql'] 

309 

310 def get_tokens_unprocessed(self, data): 

311 sql = PsqlRegexLexer(**self.options) 

312 

313 lines = lookahead(line_re.findall(data)) 

314 

315 # prompt-output cycle 

316 while 1: 

317 

318 # consume the lines of the command: start with an optional prompt 

319 # and continue until the end of command is detected 

320 curcode = '' 

321 insertions = [] 

322 for line in lines: 

323 # Identify a shell prompt in case of psql commandline example 

324 if line.startswith('$') and not curcode: 

325 lexer = get_lexer_by_name('console', **self.options) 

326 yield from lexer.get_tokens_unprocessed(line) 

327 break 

328 

329 # Identify a psql prompt 

330 mprompt = re_prompt.match(line) 

331 if mprompt is not None: 

332 insertions.append((len(curcode), 

333 [(0, Generic.Prompt, mprompt.group())])) 

334 curcode += line[len(mprompt.group()):] 

335 else: 

336 curcode += line 

337 

338 # Check if this is the end of the command 

339 # TODO: better handle multiline comments at the end with 

340 # a lexer with an external state? 

341 if re_psql_command.match(curcode) \ 

342 or re_end_command.search(curcode): 

343 break 

344 

345 # Emit the combined stream of command and prompt(s) 

346 yield from do_insertions(insertions, 

347 sql.get_tokens_unprocessed(curcode)) 

348 

349 # Emit the output lines 

350 out_token = Generic.Output 

351 for line in lines: 

352 mprompt = re_prompt.match(line) 

353 if mprompt is not None: 

354 # push the line back to have it processed by the prompt 

355 lines.send(line) 

356 break 

357 

358 mmsg = re_message.match(line) 

359 if mmsg is not None: 

360 if mmsg.group(1).startswith("ERROR") \ 

361 or mmsg.group(1).startswith("FATAL"): 

362 out_token = Generic.Error 

363 yield (mmsg.start(1), Generic.Strong, mmsg.group(1)) 

364 yield (mmsg.start(2), out_token, mmsg.group(2)) 

365 else: 

366 yield (0, out_token, line) 

367 else: 

368 return 

369 

370 

371class SqlLexer(RegexLexer): 

372 """ 

373 Lexer for Structured Query Language. Currently, this lexer does 

374 not recognize any special syntax except ANSI SQL. 

375 """ 

376 

377 name = 'SQL' 

378 aliases = ['sql'] 

379 filenames = ['*.sql'] 

380 mimetypes = ['text/x-sql'] 

381 

382 flags = re.IGNORECASE 

383 tokens = { 

384 'root': [ 

385 (r'\s+', Whitespace), 

386 (r'--.*\n?', Comment.Single), 

387 (r'/\*', Comment.Multiline, 'multiline-comments'), 

388 (words(( 

389 'ABORT', 'ABS', 'ABSOLUTE', 'ACCESS', 'ADA', 'ADD', 'ADMIN', 'AFTER', 

390 'AGGREGATE', 'ALIAS', 'ALL', 'ALLOCATE', 'ALTER', 'ANALYSE', 'ANALYZE', 

391 'AND', 'ANY', 'ARE', 'AS', 'ASC', 'ASENSITIVE', 'ASSERTION', 'ASSIGNMENT', 

392 'ASYMMETRIC', 'AT', 'ATOMIC', 'AUTHORIZATION', 'AVG', 'BACKWARD', 

393 'BEFORE', 'BEGIN', 'BETWEEN', 'BITVAR', 'BIT_LENGTH', 'BOTH', 'BREADTH', 

394 'BY', 'C', 'CACHE', 'CALL', 'CALLED', 'CARDINALITY', 'CASCADE', 

395 'CASCADED', 'CASE', 'CAST', 'CATALOG', 'CATALOG_NAME', 'CHAIN', 

396 'CHARACTERISTICS', 'CHARACTER_LENGTH', 'CHARACTER_SET_CATALOG', 

397 'CHARACTER_SET_NAME', 'CHARACTER_SET_SCHEMA', 'CHAR_LENGTH', 'CHECK', 

398 'CHECKED', 'CHECKPOINT', 'CLASS', 'CLASS_ORIGIN', 'CLOB', 'CLOSE', 

399 'CLUSTER', 'COALESCE', 'COBOL', 'COLLATE', 'COLLATION', 

400 'COLLATION_CATALOG', 'COLLATION_NAME', 'COLLATION_SCHEMA', 'COLUMN', 

401 'COLUMN_NAME', 'COMMAND_FUNCTION', 'COMMAND_FUNCTION_CODE', 'COMMENT', 

402 'COMMIT', 'COMMITTED', 'COMPLETION', 'CONDITION_NUMBER', 'CONNECT', 

403 'CONNECTION', 'CONNECTION_NAME', 'CONSTRAINT', 'CONSTRAINTS', 

404 'CONSTRAINT_CATALOG', 'CONSTRAINT_NAME', 'CONSTRAINT_SCHEMA', 

405 'CONSTRUCTOR', 'CONTAINS', 'CONTINUE', 'CONVERSION', 'CONVERT', 

406 'COPY', 'CORRESPONDING', 'COUNT', 'CREATE', 'CREATEDB', 'CREATEUSER', 

407 'CROSS', 'CUBE', 'CURRENT', 'CURRENT_DATE', 'CURRENT_PATH', 

408 'CURRENT_ROLE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER', 

409 'CURSOR', 'CURSOR_NAME', 'CYCLE', 'DATA', 'DATABASE', 

410 'DATETIME_INTERVAL_CODE', 'DATETIME_INTERVAL_PRECISION', 'DAY', 

411 'DEALLOCATE', 'DECLARE', 'DEFAULT', 'DEFAULTS', 'DEFERRABLE', 

412 'DEFERRED', 'DEFINED', 'DEFINER', 'DELETE', 'DELIMITER', 'DELIMITERS', 

413 'DEREF', 'DESC', 'DESCRIBE', 'DESCRIPTOR', 'DESTROY', 'DESTRUCTOR', 

414 'DETERMINISTIC', 'DIAGNOSTICS', 'DICTIONARY', 'DISCONNECT', 'DISPATCH', 

415 'DISTINCT', 'DO', 'DOMAIN', 'DROP', 'DYNAMIC', 'DYNAMIC_FUNCTION', 

416 'DYNAMIC_FUNCTION_CODE', 'EACH', 'ELSE', 'ELSIF', 'ENCODING', 

417 'ENCRYPTED', 'END', 'END-EXEC', 'EQUALS', 'ESCAPE', 'EVERY', 'EXCEPTION', 

418 'EXCEPT', 'EXCLUDING', 'EXCLUSIVE', 'EXEC', 'EXECUTE', 'EXISTING', 

419 'EXISTS', 'EXPLAIN', 'EXTERNAL', 'EXTRACT', 'FALSE', 'FETCH', 'FINAL', 

420 'FIRST', 'FOR', 'FORCE', 'FOREIGN', 'FORTRAN', 'FORWARD', 'FOUND', 'FREE', 

421 'FREEZE', 'FROM', 'FULL', 'FUNCTION', 'G', 'GENERAL', 'GENERATED', 'GET', 

422 'GLOBAL', 'GO', 'GOTO', 'GRANT', 'GRANTED', 'GROUP', 'GROUPING', 

423 'HANDLER', 'HAVING', 'HIERARCHY', 'HOLD', 'HOST', 'IDENTITY', 'IF', 

424 'IGNORE', 'ILIKE', 'IMMEDIATE', 'IMMEDIATELY', 'IMMUTABLE', 'IMPLEMENTATION', 'IMPLICIT', 

425 'IN', 'INCLUDING', 'INCREMENT', 'INDEX', 'INDITCATOR', 'INFIX', 

426 'INHERITS', 'INITIALIZE', 'INITIALLY', 'INNER', 'INOUT', 'INPUT', 

427 'INSENSITIVE', 'INSERT', 'INSTANTIABLE', 'INSTEAD', 'INTERSECT', 'INTO', 

428 'INVOKER', 'IS', 'ISNULL', 'ISOLATION', 'ITERATE', 'JOIN', 'KEY', 

429 'KEY_MEMBER', 'KEY_TYPE', 'LANCOMPILER', 'LANGUAGE', 'LARGE', 'LAST', 

430 'LATERAL', 'LEADING', 'LEFT', 'LENGTH', 'LESS', 'LEVEL', 'LIKE', 'LIMIT', 

431 'LISTEN', 'LOAD', 'LOCAL', 'LOCALTIME', 'LOCALTIMESTAMP', 'LOCATION', 

432 'LOCATOR', 'LOCK', 'LOWER', 'MAP', 'MATCH', 'MAX', 'MAXVALUE', 

433 'MESSAGE_LENGTH', 'MESSAGE_OCTET_LENGTH', 'MESSAGE_TEXT', 'METHOD', 'MIN', 

434 'MINUTE', 'MINVALUE', 'MOD', 'MODE', 'MODIFIES', 'MODIFY', 'MONTH', 

435 'MORE', 'MOVE', 'MUMPS', 'NAMES', 'NATIONAL', 'NATURAL', 'NCHAR', 'NCLOB', 

436 'NEW', 'NEXT', 'NO', 'NOCREATEDB', 'NOCREATEUSER', 'NONE', 'NOT', 

437 'NOTHING', 'NOTIFY', 'NOTNULL', 'NULL', 'NULLABLE', 'NULLIF', 'OBJECT', 

438 'OCTET_LENGTH', 'OF', 'OFF', 'OFFSET', 'OIDS', 'OLD', 'ON', 'ONLY', 

439 'OPEN', 'OPERATION', 'OPERATOR', 'OPTION', 'OPTIONS', 'OR', 'ORDER', 

440 'ORDINALITY', 'OUT', 'OUTER', 'OUTPUT', 'OVERLAPS', 'OVERLAY', 

441 'OVERRIDING', 'OWNER', 'PAD', 'PARAMETER', 'PARAMETERS', 'PARAMETER_MODE', 

442 'PARAMETER_NAME', 'PARAMETER_ORDINAL_POSITION', 

443 'PARAMETER_SPECIFIC_CATALOG', 'PARAMETER_SPECIFIC_NAME', 

444 'PARAMETER_SPECIFIC_SCHEMA', 'PARTIAL', 'PASCAL', 'PENDANT', 'PERIOD', 'PLACING', 

445 'PLI', 'POSITION', 'POSTFIX', 'PRECEEDS', 'PRECISION', 'PREFIX', 'PREORDER', 

446 'PREPARE', 'PRESERVE', 'PRIMARY', 'PRIOR', 'PRIVILEGES', 'PROCEDURAL', 

447 'PROCEDURE', 'PUBLIC', 'READ', 'READS', 'RECHECK', 'RECURSIVE', 'REF', 

448 'REFERENCES', 'REFERENCING', 'REINDEX', 'RELATIVE', 'RENAME', 

449 'REPEATABLE', 'REPLACE', 'RESET', 'RESTART', 'RESTRICT', 'RESULT', 

450 'RETURN', 'RETURNED_LENGTH', 'RETURNED_OCTET_LENGTH', 'RETURNED_SQLSTATE', 

451 'RETURNS', 'REVOKE', 'RIGHT', 'ROLE', 'ROLLBACK', 'ROLLUP', 'ROUTINE', 

452 'ROUTINE_CATALOG', 'ROUTINE_NAME', 'ROUTINE_SCHEMA', 'ROW', 'ROWS', 

453 'ROW_COUNT', 'RULE', 'SAVE_POINT', 'SCALE', 'SCHEMA', 'SCHEMA_NAME', 

454 'SCOPE', 'SCROLL', 'SEARCH', 'SECOND', 'SECURITY', 'SELECT', 'SELF', 

455 'SENSITIVE', 'SERIALIZABLE', 'SERVER_NAME', 'SESSION', 'SESSION_USER', 

456 'SET', 'SETOF', 'SETS', 'SHARE', 'SHOW', 'SIMILAR', 'SIMPLE', 'SIZE', 

457 'SOME', 'SOURCE', 'SPACE', 'SPECIFIC', 'SPECIFICTYPE', 'SPECIFIC_NAME', 

458 'SQL', 'SQLCODE', 'SQLERROR', 'SQLEXCEPTION', 'SQLSTATE', 'SQLWARNINIG', 

459 'STABLE', 'START', 'STATE', 'STATEMENT', 'STATIC', 'STATISTICS', 'STDIN', 

460 'STDOUT', 'STORAGE', 'STRICT', 'STRUCTURE', 'STYPE', 'SUBCLASS_ORIGIN', 

461 'SUBLIST', 'SUBSTRING', 'SUCCEEDS', 'SUM', 'SYMMETRIC', 'SYSID', 'SYSTEM', 

462 'SYSTEM_USER', 'TABLE', 'TABLE_NAME', ' TEMP', 'TEMPLATE', 'TEMPORARY', 

463 'TERMINATE', 'THAN', 'THEN', 'TIME', 'TIMESTAMP', 'TIMEZONE_HOUR', 

464 'TIMEZONE_MINUTE', 'TO', 'TOAST', 'TRAILING', 'TRANSACTION', 

465 'TRANSACTIONS_COMMITTED', 'TRANSACTIONS_ROLLED_BACK', 'TRANSACTION_ACTIVE', 

466 'TRANSFORM', 'TRANSFORMS', 'TRANSLATE', 'TRANSLATION', 'TREAT', 'TRIGGER', 

467 'TRIGGER_CATALOG', 'TRIGGER_NAME', 'TRIGGER_SCHEMA', 'TRIM', 'TRUE', 

468 'TRUNCATE', 'TRUSTED', 'TYPE', 'UNCOMMITTED', 'UNDER', 'UNENCRYPTED', 

469 'UNION', 'UNIQUE', 'UNKNOWN', 'UNLISTEN', 'UNNAMED', 'UNNEST', 'UNTIL', 

470 'UPDATE', 'UPPER', 'USAGE', 'USER', 'USER_DEFINED_TYPE_CATALOG', 

471 'USER_DEFINED_TYPE_NAME', 'USER_DEFINED_TYPE_SCHEMA', 'USING', 'VACUUM', 

472 'VALID', 'VALIDATOR', 'VALUES', 'VARIABLE', 'VERBOSE', 

473 'VERSION', 'VERSIONS', 'VERSIONING', 'VIEW', 

474 'VOLATILE', 'WHEN', 'WHENEVER', 'WHERE', 'WITH', 'WITHOUT', 'WORK', 

475 'WRITE', 'YEAR', 'ZONE'), suffix=r'\b'), 

476 Keyword), 

477 (words(( 

478 'ARRAY', 'BIGINT', 'BINARY', 'BIT', 'BLOB', 'BOOLEAN', 'CHAR', 

479 'CHARACTER', 'DATE', 'DEC', 'DECIMAL', 'FLOAT', 'INT', 'INTEGER', 

480 'INTERVAL', 'NUMBER', 'NUMERIC', 'REAL', 'SERIAL', 'SMALLINT', 

481 'VARCHAR', 'VARYING', 'INT8', 'SERIAL8', 'TEXT'), suffix=r'\b'), 

482 Name.Builtin), 

483 (r'[+*/<>=~!@#%^&|`?-]', Operator), 

484 (r'[0-9]+', Number.Integer), 

485 # TODO: Backslash escapes? 

486 (r"'(''|[^'])*'", String.Single), 

487 (r'"(""|[^"])*"', String.Symbol), # not a real string literal in ANSI SQL 

488 (r'[a-z_][\w$]*', Name), # allow $s in strings for Oracle 

489 (r'[;:()\[\],.]', Punctuation) 

490 ], 

491 'multiline-comments': [ 

492 (r'/\*', Comment.Multiline, 'multiline-comments'), 

493 (r'\*/', Comment.Multiline, '#pop'), 

494 (r'[^/*]+', Comment.Multiline), 

495 (r'[/*]', Comment.Multiline) 

496 ] 

497 } 

498 

499 def analyse_text(self, text): 

500 return 

501 

502 

503class TransactSqlLexer(RegexLexer): 

504 """ 

505 Transact-SQL (T-SQL) is Microsoft's and Sybase's proprietary extension to 

506 SQL. 

507 

508 The list of keywords includes ODBC and keywords reserved for future use.. 

509 """ 

510 

511 name = 'Transact-SQL' 

512 aliases = ['tsql', 't-sql'] 

513 filenames = ['*.sql'] 

514 mimetypes = ['text/x-tsql'] 

515 

516 flags = re.IGNORECASE 

517 

518 tokens = { 

519 'root': [ 

520 (r'\s+', Whitespace), 

521 (r'--.*?$\n?', Comment.Single), 

522 (r'/\*', Comment.Multiline, 'multiline-comments'), 

523 (words(_tsql_builtins.OPERATORS), Operator), 

524 (words(_tsql_builtins.OPERATOR_WORDS, suffix=r'\b'), Operator.Word), 

525 (words(_tsql_builtins.TYPES, suffix=r'\b'), Name.Class), 

526 (words(_tsql_builtins.FUNCTIONS, suffix=r'\b'), Name.Function), 

527 (r'(goto)(\s+)(\w+\b)', bygroups(Keyword, Whitespace, Name.Label)), 

528 (words(_tsql_builtins.KEYWORDS, suffix=r'\b'), Keyword), 

529 (r'(\[)([^]]+)(\])', bygroups(Operator, Name, Operator)), 

530 (r'0x[0-9a-f]+', Number.Hex), 

531 # Float variant 1, for example: 1., 1.e2, 1.2e3 

532 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), 

533 # Float variant 2, for example: .1, .1e2 

534 (r'\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), 

535 # Float variant 3, for example: 123e45 

536 (r'[0-9]+e[+-]?[0-9]+', Number.Float), 

537 (r'[0-9]+', Number.Integer), 

538 (r"'(''|[^'])*'", String.Single), 

539 (r'"(""|[^"])*"', String.Symbol), 

540 (r'[;(),.]', Punctuation), 

541 # Below we use \w even for the first "real" character because 

542 # tokens starting with a digit have already been recognized 

543 # as Number above. 

544 (r'@@\w+', Name.Builtin), 

545 (r'@\w+', Name.Variable), 

546 (r'(\w+)(:)', bygroups(Name.Label, Punctuation)), 

547 (r'#?#?\w+', Name), # names for temp tables and anything else 

548 (r'\?', Name.Variable.Magic), # parameter for prepared statements 

549 ], 

550 'multiline-comments': [ 

551 (r'/\*', Comment.Multiline, 'multiline-comments'), 

552 (r'\*/', Comment.Multiline, '#pop'), 

553 (r'[^/*]+', Comment.Multiline), 

554 (r'[/*]', Comment.Multiline) 

555 ] 

556 } 

557 

558 def analyse_text(text): 

559 rating = 0 

560 if tsql_declare_re.search(text): 

561 # Found T-SQL variable declaration. 

562 rating = 1.0 

563 else: 

564 name_between_backtick_count = len( 

565 name_between_backtick_re.findall(text)) 

566 name_between_bracket_count = len( 

567 name_between_bracket_re.findall(text)) 

568 # We need to check if there are any names using 

569 # backticks or brackets, as otherwise both are 0 

570 # and 0 >= 2 * 0, so we would always assume it's true 

571 dialect_name_count = name_between_backtick_count + name_between_bracket_count 

572 if dialect_name_count >= 1 and \ 

573 name_between_bracket_count >= 2 * name_between_backtick_count: 

574 # Found at least twice as many [name] as `name`. 

575 rating += 0.5 

576 elif name_between_bracket_count > name_between_backtick_count: 

577 rating += 0.2 

578 elif name_between_bracket_count > 0: 

579 rating += 0.1 

580 if tsql_variable_re.search(text) is not None: 

581 rating += 0.1 

582 if tsql_go_re.search(text) is not None: 

583 rating += 0.1 

584 return rating 

585 

586 

587class MySqlLexer(RegexLexer): 

588 """The Oracle MySQL lexer. 

589 

590 This lexer does not attempt to maintain strict compatibility with 

591 MariaDB syntax or keywords. Although MySQL and MariaDB's common code 

592 history suggests there may be significant overlap between the two, 

593 compatibility between the two is not a target for this lexer. 

594 """ 

595 

596 name = 'MySQL' 

597 aliases = ['mysql'] 

598 mimetypes = ['text/x-mysql'] 

599 

600 flags = re.IGNORECASE 

601 tokens = { 

602 'root': [ 

603 (r'\s+', Whitespace), 

604 

605 # Comments 

606 (r'(?:#|--\s+).*', Comment.Single), 

607 (r'/\*\+', Comment.Special, 'optimizer-hints'), 

608 (r'/\*', Comment.Multiline, 'multiline-comment'), 

609 

610 # Hexadecimal literals 

611 (r"x'([0-9a-f]{2})+'", Number.Hex), # MySQL requires paired hex characters in this form. 

612 (r'0x[0-9a-f]+', Number.Hex), 

613 

614 # Binary literals 

615 (r"b'[01]+'", Number.Bin), 

616 (r'0b[01]+', Number.Bin), 

617 

618 # Numeric literals 

619 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent 

620 (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent 

621 (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats 

622 (r'[0-9]+(?=[^0-9a-z$_\u0080-\uffff])', Number.Integer), # Integers that are not in a schema object name 

623 

624 # Date literals 

625 (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}", 

626 Literal.Date), 

627 

628 # Time literals 

629 (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}", 

630 Literal.Date), 

631 

632 # Timestamp literals 

633 ( 

634 r"\{\s*ts\s*(?P<quote>['\"])\s*" 

635 r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}" # Date part 

636 r"\s+" # Whitespace between date and time 

637 r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?" # Time part 

638 r"\s*(?P=quote)\s*\}", 

639 Literal.Date 

640 ), 

641 

642 # String literals 

643 (r"'", String.Single, 'single-quoted-string'), 

644 (r'"', String.Double, 'double-quoted-string'), 

645 

646 # Variables 

647 (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable), 

648 (r'@[a-z0-9_$.]+', Name.Variable), 

649 (r"@'", Name.Variable, 'single-quoted-variable'), 

650 (r'@"', Name.Variable, 'double-quoted-variable'), 

651 (r"@`", Name.Variable, 'backtick-quoted-variable'), 

652 (r'\?', Name.Variable), # For demonstrating prepared statements 

653 

654 # Operators 

655 (r'[!%&*+/:<=>^|~-]+', Operator), 

656 

657 # Exceptions; these words tokenize differently in different contexts. 

658 (r'\b(set)(?!\s*\()', Keyword), 

659 (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Whitespace, Keyword)), 

660 # In all other known cases, "SET" is tokenized by MYSQL_DATATYPES. 

661 

662 (words(MYSQL_CONSTANTS, prefix=r'\b', suffix=r'\b'), Name.Constant), 

663 (words(MYSQL_DATATYPES, prefix=r'\b', suffix=r'\b'), Keyword.Type), 

664 (words(MYSQL_KEYWORDS, prefix=r'\b', suffix=r'\b'), Keyword), 

665 (words(MYSQL_FUNCTIONS, prefix=r'\b', suffix=r'\b(\s*)(\()'), 

666 bygroups(Name.Function, Whitespace, Punctuation)), 

667 

668 # Schema object names 

669 # 

670 # Note: Although the first regex supports unquoted all-numeric 

671 # identifiers, this will not be a problem in practice because 

672 # numeric literals have already been handled above. 

673 # 

674 ('[0-9a-z$_\u0080-\uffff]+', Name), 

675 (r'`', Name.Quoted, 'schema-object-name'), 

676 

677 # Punctuation 

678 (r'[(),.;]', Punctuation), 

679 ], 

680 

681 # Multiline comment substates 

682 # --------------------------- 

683 

684 'optimizer-hints': [ 

685 (r'[^*a-z]+', Comment.Special), 

686 (r'\*/', Comment.Special, '#pop'), 

687 (words(MYSQL_OPTIMIZER_HINTS, suffix=r'\b'), Comment.Preproc), 

688 ('[a-z]+', Comment.Special), 

689 (r'\*', Comment.Special), 

690 ], 

691 

692 'multiline-comment': [ 

693 (r'[^*]+', Comment.Multiline), 

694 (r'\*/', Comment.Multiline, '#pop'), 

695 (r'\*', Comment.Multiline), 

696 ], 

697 

698 # String substates 

699 # ---------------- 

700 

701 'single-quoted-string': [ 

702 (r"[^'\\]+", String.Single), 

703 (r"''", String.Escape), 

704 (r"""\\[0'"bnrtZ\\%_]""", String.Escape), 

705 (r"'", String.Single, '#pop'), 

706 ], 

707 

708 'double-quoted-string': [ 

709 (r'[^"\\]+', String.Double), 

710 (r'""', String.Escape), 

711 (r"""\\[0'"bnrtZ\\%_]""", String.Escape), 

712 (r'"', String.Double, '#pop'), 

713 ], 

714 

715 # Variable substates 

716 # ------------------ 

717 

718 'single-quoted-variable': [ 

719 (r"[^']+", Name.Variable), 

720 (r"''", Name.Variable), 

721 (r"'", Name.Variable, '#pop'), 

722 ], 

723 

724 'double-quoted-variable': [ 

725 (r'[^"]+', Name.Variable), 

726 (r'""', Name.Variable), 

727 (r'"', Name.Variable, '#pop'), 

728 ], 

729 

730 'backtick-quoted-variable': [ 

731 (r'[^`]+', Name.Variable), 

732 (r'``', Name.Variable), 

733 (r'`', Name.Variable, '#pop'), 

734 ], 

735 

736 # Schema object name substates 

737 # ---------------------------- 

738 # 

739 # "Name.Quoted" and "Name.Quoted.Escape" are non-standard but 

740 # formatters will style them as "Name" by default but add 

741 # additional styles based on the token name. This gives users 

742 # flexibility to add custom styles as desired. 

743 # 

744 'schema-object-name': [ 

745 (r'[^`]+', Name.Quoted), 

746 (r'``', Name.Quoted.Escape), 

747 (r'`', Name.Quoted, '#pop'), 

748 ], 

749 } 

750 

751 def analyse_text(text): 

752 rating = 0 

753 name_between_backtick_count = len( 

754 name_between_backtick_re.findall(text)) 

755 name_between_bracket_count = len( 

756 name_between_bracket_re.findall(text)) 

757 # Same logic as above in the TSQL analysis 

758 dialect_name_count = name_between_backtick_count + name_between_bracket_count 

759 if dialect_name_count >= 1 and \ 

760 name_between_backtick_count >= 2 * name_between_bracket_count: 

761 # Found at least twice as many `name` as [name]. 

762 rating += 0.5 

763 elif name_between_backtick_count > name_between_bracket_count: 

764 rating += 0.2 

765 elif name_between_backtick_count > 0: 

766 rating += 0.1 

767 return rating 

768 

769 

770class SqliteConsoleLexer(Lexer): 

771 """ 

772 Lexer for example sessions using sqlite3. 

773 

774 .. versionadded:: 0.11 

775 """ 

776 

777 name = 'sqlite3con' 

778 aliases = ['sqlite3'] 

779 filenames = ['*.sqlite3-console'] 

780 mimetypes = ['text/x-sqlite3-console'] 

781 

782 def get_tokens_unprocessed(self, data): 

783 sql = SqlLexer(**self.options) 

784 

785 curcode = '' 

786 insertions = [] 

787 for match in line_re.finditer(data): 

788 line = match.group() 

789 prompt_match = sqlite_prompt_re.match(line) 

790 if prompt_match is not None: 

791 insertions.append((len(curcode), 

792 [(0, Generic.Prompt, line[:7])])) 

793 insertions.append((len(curcode), 

794 [(7, Whitespace, ' ')])) 

795 curcode += line[8:] 

796 else: 

797 if curcode: 

798 yield from do_insertions(insertions, 

799 sql.get_tokens_unprocessed(curcode)) 

800 curcode = '' 

801 insertions = [] 

802 if line.startswith('SQL error: '): 

803 yield (match.start(), Generic.Traceback, line) 

804 else: 

805 yield (match.start(), Generic.Output, line) 

806 if curcode: 

807 yield from do_insertions(insertions, 

808 sql.get_tokens_unprocessed(curcode)) 

809 

810 

811class RqlLexer(RegexLexer): 

812 """ 

813 Lexer for Relation Query Language. 

814 

815 .. versionadded:: 2.0 

816 """ 

817 name = 'RQL' 

818 url = 'http://www.logilab.org/project/rql' 

819 aliases = ['rql'] 

820 filenames = ['*.rql'] 

821 mimetypes = ['text/x-rql'] 

822 

823 flags = re.IGNORECASE 

824 tokens = { 

825 'root': [ 

826 (r'\s+', Whitespace), 

827 (r'(DELETE|SET|INSERT|UNION|DISTINCT|WITH|WHERE|BEING|OR' 

828 r'|AND|NOT|GROUPBY|HAVING|ORDERBY|ASC|DESC|LIMIT|OFFSET' 

829 r'|TODAY|NOW|TRUE|FALSE|NULL|EXISTS)\b', Keyword), 

830 (r'[+*/<>=%-]', Operator), 

831 (r'(Any|is|instance_of|CWEType|CWRelation)\b', Name.Builtin), 

832 (r'[0-9]+', Number.Integer), 

833 (r'[A-Z_]\w*\??', Name), 

834 (r"'(''|[^'])*'", String.Single), 

835 (r'"(""|[^"])*"', String.Single), 

836 (r'[;:()\[\],.]', Punctuation) 

837 ], 

838 }