1"""
2 pygments.lexers._postgres_builtins
3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
5 Self-updating data files for PostgreSQL lexer.
6
7 Run with `python -I` to update itself.
8
9 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
10 :license: BSD, see LICENSE for details.
11"""
12
13# Autogenerated: please edit them if you like wasting your time.
14
15KEYWORDS = (
16 'ABORT',
17 'ABSOLUTE',
18 'ACCESS',
19 'ACTION',
20 'ADD',
21 'ADMIN',
22 'AFTER',
23 'AGGREGATE',
24 'ALL',
25 'ALSO',
26 'ALTER',
27 'ALWAYS',
28 'ANALYSE',
29 'ANALYZE',
30 'AND',
31 'ANY',
32 'ARRAY',
33 'AS',
34 'ASC',
35 'ASENSITIVE',
36 'ASSERTION',
37 'ASSIGNMENT',
38 'ASYMMETRIC',
39 'AT',
40 'ATOMIC',
41 'ATTACH',
42 'ATTRIBUTE',
43 'AUTHORIZATION',
44 'BACKWARD',
45 'BEFORE',
46 'BEGIN',
47 'BETWEEN',
48 'BIGINT',
49 'BINARY',
50 'BIT',
51 'BOOLEAN',
52 'BOTH',
53 'BREADTH',
54 'BY',
55 'CACHE',
56 'CALL',
57 'CALLED',
58 'CASCADE',
59 'CASCADED',
60 'CASE',
61 'CAST',
62 'CATALOG',
63 'CHAIN',
64 'CHAR',
65 'CHARACTER',
66 'CHARACTERISTICS',
67 'CHECK',
68 'CHECKPOINT',
69 'CLASS',
70 'CLOSE',
71 'CLUSTER',
72 'COALESCE',
73 'COLLATE',
74 'COLLATION',
75 'COLUMN',
76 'COLUMNS',
77 'COMMENT',
78 'COMMENTS',
79 'COMMIT',
80 'COMMITTED',
81 'COMPRESSION',
82 'CONCURRENTLY',
83 'CONFIGURATION',
84 'CONFLICT',
85 'CONNECTION',
86 'CONSTRAINT',
87 'CONSTRAINTS',
88 'CONTENT',
89 'CONTINUE',
90 'CONVERSION',
91 'COPY',
92 'COST',
93 'CREATE',
94 'CROSS',
95 'CSV',
96 'CUBE',
97 'CURRENT',
98 'CURRENT_CATALOG',
99 'CURRENT_DATE',
100 'CURRENT_ROLE',
101 'CURRENT_SCHEMA',
102 'CURRENT_TIME',
103 'CURRENT_TIMESTAMP',
104 'CURRENT_USER',
105 'CURSOR',
106 'CYCLE',
107 'DATA',
108 'DATABASE',
109 'DAY',
110 'DEALLOCATE',
111 'DEC',
112 'DECIMAL',
113 'DECLARE',
114 'DEFAULT',
115 'DEFAULTS',
116 'DEFERRABLE',
117 'DEFERRED',
118 'DEFINER',
119 'DELETE',
120 'DELIMITER',
121 'DELIMITERS',
122 'DEPENDS',
123 'DEPTH',
124 'DESC',
125 'DETACH',
126 'DICTIONARY',
127 'DISABLE',
128 'DISCARD',
129 'DISTINCT',
130 'DO',
131 'DOCUMENT',
132 'DOMAIN',
133 'DOUBLE',
134 'DROP',
135 'EACH',
136 'ELSE',
137 'ENABLE',
138 'ENCODING',
139 'ENCRYPTED',
140 'END',
141 'ENUM',
142 'ESCAPE',
143 'EVENT',
144 'EXCEPT',
145 'EXCLUDE',
146 'EXCLUDING',
147 'EXCLUSIVE',
148 'EXECUTE',
149 'EXISTS',
150 'EXPLAIN',
151 'EXPRESSION',
152 'EXTENSION',
153 'EXTERNAL',
154 'EXTRACT',
155 'FALSE',
156 'FAMILY',
157 'FETCH',
158 'FILTER',
159 'FINALIZE',
160 'FIRST',
161 'FLOAT',
162 'FOLLOWING',
163 'FOR',
164 'FORCE',
165 'FOREIGN',
166 'FORWARD',
167 'FREEZE',
168 'FROM',
169 'FULL',
170 'FUNCTION',
171 'FUNCTIONS',
172 'GENERATED',
173 'GLOBAL',
174 'GRANT',
175 'GRANTED',
176 'GREATEST',
177 'GROUP',
178 'GROUPING',
179 'GROUPS',
180 'HANDLER',
181 'HAVING',
182 'HEADER',
183 'HOLD',
184 'HOUR',
185 'IDENTITY',
186 'IF',
187 'ILIKE',
188 'IMMEDIATE',
189 'IMMUTABLE',
190 'IMPLICIT',
191 'IMPORT',
192 'IN',
193 'INCLUDE',
194 'INCLUDING',
195 'INCREMENT',
196 'INDEX',
197 'INDEXES',
198 'INHERIT',
199 'INHERITS',
200 'INITIALLY',
201 'INLINE',
202 'INNER',
203 'INOUT',
204 'INPUT',
205 'INSENSITIVE',
206 'INSERT',
207 'INSTEAD',
208 'INT',
209 'INTEGER',
210 'INTERSECT',
211 'INTERVAL',
212 'INTO',
213 'INVOKER',
214 'IS',
215 'ISNULL',
216 'ISOLATION',
217 'JOIN',
218 'KEY',
219 'LABEL',
220 'LANGUAGE',
221 'LARGE',
222 'LAST',
223 'LATERAL',
224 'LEADING',
225 'LEAKPROOF',
226 'LEAST',
227 'LEFT',
228 'LEVEL',
229 'LIKE',
230 'LIMIT',
231 'LISTEN',
232 'LOAD',
233 'LOCAL',
234 'LOCALTIME',
235 'LOCALTIMESTAMP',
236 'LOCATION',
237 'LOCK',
238 'LOCKED',
239 'LOGGED',
240 'MAPPING',
241 'MATCH',
242 'MATERIALIZED',
243 'MAXVALUE',
244 'METHOD',
245 'MINUTE',
246 'MINVALUE',
247 'MODE',
248 'MONTH',
249 'MOVE',
250 'NAME',
251 'NAMES',
252 'NATIONAL',
253 'NATURAL',
254 'NCHAR',
255 'NEW',
256 'NEXT',
257 'NFC',
258 'NFD',
259 'NFKC',
260 'NFKD',
261 'NO',
262 'NONE',
263 'NORMALIZE',
264 'NORMALIZED',
265 'NOT',
266 'NOTHING',
267 'NOTIFY',
268 'NOTNULL',
269 'NOWAIT',
270 'NULL',
271 'NULLIF',
272 'NULLS',
273 'NUMERIC',
274 'OBJECT',
275 'OF',
276 'OFF',
277 'OFFSET',
278 'OIDS',
279 'OLD',
280 'ON',
281 'ONLY',
282 'OPERATOR',
283 'OPTION',
284 'OPTIONS',
285 'OR',
286 'ORDER',
287 'ORDINALITY',
288 'OTHERS',
289 'OUT',
290 'OUTER',
291 'OVER',
292 'OVERLAPS',
293 'OVERLAY',
294 'OVERRIDING',
295 'OWNED',
296 'OWNER',
297 'PARALLEL',
298 'PARSER',
299 'PARTIAL',
300 'PARTITION',
301 'PASSING',
302 'PASSWORD',
303 'PLACING',
304 'PLANS',
305 'POLICY',
306 'POSITION',
307 'PRECEDING',
308 'PRECISION',
309 'PREPARE',
310 'PREPARED',
311 'PRESERVE',
312 'PRIMARY',
313 'PRIOR',
314 'PRIVILEGES',
315 'PROCEDURAL',
316 'PROCEDURE',
317 'PROCEDURES',
318 'PROGRAM',
319 'PUBLICATION',
320 'QUOTE',
321 'RANGE',
322 'READ',
323 'REAL',
324 'REASSIGN',
325 'RECHECK',
326 'RECURSIVE',
327 'REF',
328 'REFERENCES',
329 'REFERENCING',
330 'REFRESH',
331 'REINDEX',
332 'RELATIVE',
333 'RELEASE',
334 'RENAME',
335 'REPEATABLE',
336 'REPLACE',
337 'REPLICA',
338 'RESET',
339 'RESTART',
340 'RESTRICT',
341 'RETURN',
342 'RETURNING',
343 'RETURNS',
344 'REVOKE',
345 'RIGHT',
346 'ROLE',
347 'ROLLBACK',
348 'ROLLUP',
349 'ROUTINE',
350 'ROUTINES',
351 'ROW',
352 'ROWS',
353 'RULE',
354 'SAVEPOINT',
355 'SCHEMA',
356 'SCHEMAS',
357 'SCROLL',
358 'SEARCH',
359 'SECOND',
360 'SECURITY',
361 'SELECT',
362 'SEQUENCE',
363 'SEQUENCES',
364 'SERIALIZABLE',
365 'SERVER',
366 'SESSION',
367 'SESSION_USER',
368 'SET',
369 'SETOF',
370 'SETS',
371 'SHARE',
372 'SHOW',
373 'SIMILAR',
374 'SIMPLE',
375 'SKIP',
376 'SMALLINT',
377 'SNAPSHOT',
378 'SOME',
379 'SQL',
380 'STABLE',
381 'STANDALONE',
382 'START',
383 'STATEMENT',
384 'STATISTICS',
385 'STDIN',
386 'STDOUT',
387 'STORAGE',
388 'STORED',
389 'STRICT',
390 'STRIP',
391 'SUBSCRIPTION',
392 'SUBSTRING',
393 'SUPPORT',
394 'SYMMETRIC',
395 'SYSID',
396 'SYSTEM',
397 'TABLE',
398 'TABLES',
399 'TABLESAMPLE',
400 'TABLESPACE',
401 'TEMP',
402 'TEMPLATE',
403 'TEMPORARY',
404 'TEXT',
405 'THEN',
406 'TIES',
407 'TIME',
408 'TIMESTAMP',
409 'TO',
410 'TRAILING',
411 'TRANSACTION',
412 'TRANSFORM',
413 'TREAT',
414 'TRIGGER',
415 'TRIM',
416 'TRUE',
417 'TRUNCATE',
418 'TRUSTED',
419 'TYPE',
420 'TYPES',
421 'UESCAPE',
422 'UNBOUNDED',
423 'UNCOMMITTED',
424 'UNENCRYPTED',
425 'UNION',
426 'UNIQUE',
427 'UNKNOWN',
428 'UNLISTEN',
429 'UNLOGGED',
430 'UNTIL',
431 'UPDATE',
432 'USER',
433 'USING',
434 'VACUUM',
435 'VALID',
436 'VALIDATE',
437 'VALIDATOR',
438 'VALUE',
439 'VALUES',
440 'VARCHAR',
441 'VARIADIC',
442 'VARYING',
443 'VERBOSE',
444 'VERSION',
445 'VIEW',
446 'VIEWS',
447 'VOLATILE',
448 'WHEN',
449 'WHERE',
450 'WHITESPACE',
451 'WINDOW',
452 'WITH',
453 'WITHIN',
454 'WITHOUT',
455 'WORK',
456 'WRAPPER',
457 'WRITE',
458 'XML',
459 'XMLATTRIBUTES',
460 'XMLCONCAT',
461 'XMLELEMENT',
462 'XMLEXISTS',
463 'XMLFOREST',
464 'XMLNAMESPACES',
465 'XMLPARSE',
466 'XMLPI',
467 'XMLROOT',
468 'XMLSERIALIZE',
469 'XMLTABLE',
470 'YEAR',
471 'YES',
472 'ZONE',
473)
474
475DATATYPES = (
476 'bigint',
477 'bigserial',
478 'bit',
479 'bit varying',
480 'bool',
481 'boolean',
482 'box',
483 'bytea',
484 'char',
485 'character',
486 'character varying',
487 'cidr',
488 'circle',
489 'date',
490 'decimal',
491 'double precision',
492 'float4',
493 'float8',
494 'inet',
495 'int',
496 'int2',
497 'int4',
498 'int8',
499 'integer',
500 'interval',
501 'json',
502 'jsonb',
503 'line',
504 'lseg',
505 'macaddr',
506 'macaddr8',
507 'money',
508 'numeric',
509 'path',
510 'pg_lsn',
511 'pg_snapshot',
512 'point',
513 'polygon',
514 'real',
515 'serial',
516 'serial2',
517 'serial4',
518 'serial8',
519 'smallint',
520 'smallserial',
521 'text',
522 'time',
523 'timestamp',
524 'timestamptz',
525 'timetz',
526 'tsquery',
527 'tsvector',
528 'txid_snapshot',
529 'uuid',
530 'varbit',
531 'varchar',
532 'with time zone',
533 'without time zone',
534 'xml',
535)
536
537PSEUDO_TYPES = (
538 'any',
539 'anyarray',
540 'anycompatible',
541 'anycompatiblearray',
542 'anycompatiblemultirange',
543 'anycompatiblenonarray',
544 'anycompatiblerange',
545 'anyelement',
546 'anyenum',
547 'anymultirange',
548 'anynonarray',
549 'anyrange',
550 'cstring',
551 'event_trigger',
552 'fdw_handler',
553 'index_am_handler',
554 'internal',
555 'language_handler',
556 'pg_ddl_command',
557 'record',
558 'table_am_handler',
559 'trigger',
560 'tsm_handler',
561 'unknown',
562 'void',
563)
564
565# Remove 'trigger' from types
566PSEUDO_TYPES = tuple(sorted(set(PSEUDO_TYPES) - set(map(str.lower, KEYWORDS))))
567
568PLPGSQL_KEYWORDS = (
569 'ALIAS', 'CONSTANT', 'DIAGNOSTICS', 'ELSIF', 'EXCEPTION', 'EXIT',
570 'FOREACH', 'GET', 'LOOP', 'NOTICE', 'OPEN', 'PERFORM', 'QUERY', 'RAISE',
571 'RETURN', 'REVERSE', 'SQLSTATE', 'WHILE',
572)
573
574# Most of these keywords are from ExplainNode function
575# in src/backend/commands/explain.c
576
577EXPLAIN_KEYWORDS = (
578 'Aggregate',
579 'Append',
580 'Bitmap Heap Scan',
581 'Bitmap Index Scan',
582 'BitmapAnd',
583 'BitmapOr',
584 'CTE Scan',
585 'Custom Scan',
586 'Delete',
587 'Foreign Scan',
588 'Function Scan',
589 'Gather Merge',
590 'Gather',
591 'Group',
592 'GroupAggregate',
593 'Hash Join',
594 'Hash',
595 'HashAggregate',
596 'Incremental Sort',
597 'Index Only Scan',
598 'Index Scan',
599 'Insert',
600 'Limit',
601 'LockRows',
602 'Materialize',
603 'Memoize',
604 'Merge Append',
605 'Merge Join',
606 'Merge',
607 'MixedAggregate',
608 'Named Tuplestore Scan',
609 'Nested Loop',
610 'ProjectSet',
611 'Recursive Union',
612 'Result',
613 'Sample Scan',
614 'Seq Scan',
615 'SetOp',
616 'Sort',
617 'SubPlan',
618 'Subquery Scan',
619 'Table Function Scan',
620 'Tid Range Scan',
621 'Tid Scan',
622 'Unique',
623 'Update',
624 'Values Scan',
625 'WindowAgg',
626 'WorkTable Scan',
627)
628
629
630if __name__ == '__main__': # pragma: no cover
631 import re
632 from urllib.request import urlopen
633
634 from pygments.util import format_lines
635
636 # One man's constant is another man's variable.
637 SOURCE_URL = 'https://github.com/postgres/postgres/raw/master'
638 KEYWORDS_URL = SOURCE_URL + '/src/include/parser/kwlist.h'
639 DATATYPES_URL = SOURCE_URL + '/doc/src/sgml/datatype.sgml'
640
641 def update_myself():
642 content = urlopen(DATATYPES_URL).read().decode('utf-8', errors='ignore')
643 data_file = list(content.splitlines())
644 datatypes = parse_datatypes(data_file)
645 pseudos = parse_pseudos(data_file)
646
647 content = urlopen(KEYWORDS_URL).read().decode('utf-8', errors='ignore')
648 keywords = parse_keywords(content)
649
650 update_consts(__file__, 'DATATYPES', datatypes)
651 update_consts(__file__, 'PSEUDO_TYPES', pseudos)
652 update_consts(__file__, 'KEYWORDS', keywords)
653
654 def parse_keywords(f):
655 kw = []
656 for m in re.finditer(r'PG_KEYWORD\("(.+?)"', f):
657 kw.append(m.group(1).upper())
658
659 if not kw:
660 raise ValueError('no keyword found')
661
662 kw.sort()
663 return kw
664
665 def parse_datatypes(f):
666 dt = set()
667 for line in f:
668 if '<sect1' in line:
669 break
670 if '<entry><type>' not in line:
671 continue
672
673 # Parse a string such as
674 # time [ (<replaceable>p</replaceable>) ] [ without time zone ]
675 # into types "time" and "without time zone"
676
677 # remove all the tags
678 line = re.sub("<replaceable>[^<]+</replaceable>", "", line)
679 line = re.sub("<[^>]+>", "", line)
680
681 # Drop the parts containing braces
682 for tmp in [t for tmp in line.split('[')
683 for t in tmp.split(']') if "(" not in t]:
684 for t in tmp.split(','):
685 t = t.strip()
686 if not t:
687 continue
688 dt.add(" ".join(t.split()))
689
690 dt = list(dt)
691 dt.sort()
692 return dt
693
694 def parse_pseudos(f):
695 dt = []
696 re_start = re.compile(r'\s*<table id="datatype-pseudotypes-table">')
697 re_entry = re.compile(r'\s*<entry><type>(.+?)</type></entry>')
698 re_end = re.compile(r'\s*</table>')
699
700 f = iter(f)
701 for line in f:
702 if re_start.match(line) is not None:
703 break
704 else:
705 raise ValueError('pseudo datatypes table not found')
706
707 for line in f:
708 m = re_entry.match(line)
709 if m is not None:
710 dt.append(m.group(1))
711
712 if re_end.match(line) is not None:
713 break
714 else:
715 raise ValueError('end of pseudo datatypes table not found')
716
717 if not dt:
718 raise ValueError('pseudo datatypes not found')
719
720 dt.sort()
721 return dt
722
723 def update_consts(filename, constname, content):
724 with open(filename, encoding='utf-8') as f:
725 data = f.read()
726
727 # Line to start/end inserting
728 re_match = re.compile(rf'^{constname}\s*=\s*\($.*?^\s*\)$', re.M | re.S)
729 m = re_match.search(data)
730 if not m:
731 raise ValueError(f'Could not find existing definition for {constname}')
732
733 new_block = format_lines(constname, content)
734 data = data[:m.start()] + new_block + data[m.end():]
735
736 with open(filename, 'w', encoding='utf-8', newline='\n') as f:
737 f.write(data)
738
739 update_myself()