1"""
2 pygments.lexers.tablegen
3 ~~~~~~~~~~~~~~~~~~~~~~~~
4
5 Lexer for LLVM's TableGen DSL.
6
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11from pygments.lexer import RegexLexer, include, words, using
12from pygments.lexers.c_cpp import CppLexer
13from pygments.token import Comment, Keyword, Name, Number, Operator, \
14 Punctuation, String, Text, Whitespace, Error
15
16__all__ = ['TableGenLexer']
17
18KEYWORDS = (
19 'assert',
20 'class',
21 'code',
22 'def',
23 'dump',
24 'else',
25 'foreach',
26 'defm',
27 'defset',
28 'defvar',
29 'field',
30 'if',
31 'in',
32 'include',
33 'let',
34 'multiclass',
35 'then',
36)
37
38KEYWORDS_CONST = (
39 'false',
40 'true',
41)
42KEYWORDS_TYPE = (
43 'bit',
44 'bits',
45 'dag',
46 'int',
47 'list',
48 'string',
49)
50
51BANG_OPERATORS = (
52 'add',
53 'and',
54 'cast',
55 'con',
56 'cond',
57 'dag',
58 'div',
59 'empty',
60 'eq',
61 'exists',
62 'filter',
63 'find',
64 'foldl',
65 'foreach',
66 'ge',
67 'getdagarg',
68 'getdagname',
69 'getdagop',
70 'gt',
71 'head',
72 'if',
73 'interleave',
74 'isa',
75 'le',
76 'listconcat',
77 'listremove',
78 'listsplat',
79 'logtwo',
80 'lt',
81 'mul',
82 'ne',
83 'not',
84 'or',
85 'range',
86 'repr',
87 'setdagarg',
88 'setdagname',
89 'setdagop',
90 'shl',
91 'size',
92 'sra',
93 'srl',
94 'strconcat',
95 'sub',
96 'subst',
97 'substr',
98 'tail',
99 'tolower',
100 'toupper',
101 'xor',
102)
103
104class TableGenLexer(RegexLexer):
105 """
106 Lexer for TableGen
107 """
108
109 name = 'TableGen'
110 url = 'https://llvm.org/docs/TableGen/ProgRef.html'
111 aliases = ['tablegen', 'td']
112 filenames = ['*.td']
113
114 version_added = '2.19'
115
116 tokens = {
117 'root': [
118 (r'\s+', Whitespace),
119
120 (r'/\*', Comment.Multiline, 'comment'),
121 (r'//.*?$', Comment.SingleLine),
122 (r'#(define|ifdef|ifndef|else|endif)', Comment.Preproc),
123
124 # Binary/hex numbers. Note that these take priority over names,
125 # which may begin with numbers.
126 (r'0b[10]+', Number.Bin),
127 (r'0x[0-9a-fA-F]+', Number.Hex),
128
129 # Keywords
130 (words(KEYWORDS, suffix=r'\b'), Keyword),
131 (words(KEYWORDS_CONST, suffix=r'\b'), Keyword.Constant),
132 (words(KEYWORDS_TYPE, suffix=r'\b'), Keyword.Type),
133
134 # Bang operators
135 (words(BANG_OPERATORS, prefix=r'\!', suffix=r'\b'), Operator),
136 # Unknown bang operators are an error
137 (r'![a-zA-Z]+', Error),
138
139 # Names and identifiers
140 (r'[0-9]*[a-zA-Z_][a-zA-Z_0-9]*', Name),
141 (r'\$[a-zA-Z_][a-zA-Z_0-9]*', Name.Variable),
142
143 # Place numbers after keywords. Names/identifiers may begin with
144 # numbers, and we want to parse 1X as one name token as opposed to
145 # a number and a name.
146 (r'[-\+]?[0-9]+', Number.Integer),
147
148 # String literals
149 (r'"', String, 'dqs'),
150 (r'\[\{', Text, 'codeblock'),
151
152 # Misc. punctuation
153 (r'[-+\[\]{}()<>\.,;:=?#]+', Punctuation),
154 ],
155 'comment': [
156 (r'[^*/]+', Comment.Multiline),
157 (r'/\*', Comment.Multiline, '#push'),
158 (r'\*/', Comment.Multiline, '#pop'),
159 (r'[*/]', Comment.Multiline)
160 ],
161 'strings': [
162 (r'\\[\\\'"tn]', String.Escape),
163 (r'[^\\"]+', String),
164 ],
165 # Double-quoted string, a la C
166 'dqs': [
167 (r'"', String, '#pop'),
168 include('strings'),
169 ],
170 # No escaping inside a code block - everything is literal
171 # Assume that the code inside a code block is C++. This isn't always
172 # true in TableGen, but is the far most common scenario.
173 'codeblock': [
174 (r'\}\]', Text, '#pop'),
175 (r'([^}]+|\}[^]])*', using(CppLexer)),
176 ],
177 }