Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/asm.py: 94%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2 pygments.lexers.asm
3 ~~~~~~~~~~~~~~~~~~~
5 Lexers for assembly languages.
7 :copyright: Copyright 2006-present by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
11import re
13from pygments.lexer import RegexLexer, include, bygroups, using, words, \
14 DelegatingLexer, default
15from pygments.lexers.c_cpp import CppLexer, CLexer
16from pygments.lexers.d import DLexer
17from pygments.token import Text, Name, Number, String, Comment, Punctuation, \
18 Other, Keyword, Operator, Whitespace
20__all__ = ['GasLexer', 'ObjdumpLexer', 'DObjdumpLexer', 'CppObjdumpLexer',
21 'CObjdumpLexer', 'HsailLexer', 'LlvmLexer', 'LlvmMirBodyLexer',
22 'LlvmMirLexer', 'NasmLexer', 'NasmObjdumpLexer', 'TasmLexer',
23 'Ca65Lexer', 'Dasm16Lexer']
26class GasLexer(RegexLexer):
27 """
28 For Gas (AT&T) assembly code.
29 """
30 name = 'GAS'
31 aliases = ['gas', 'asm']
32 filenames = ['*.s', '*.S']
33 mimetypes = ['text/x-gas']
34 url = 'https://www.gnu.org/software/binutils'
35 version_added = ''
37 #: optional Comment or Whitespace
38 string = r'"(\\"|[^"])*"'
39 char = r'[\w$.@-]'
40 identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)'
41 number = r'(?:0[xX][a-fA-F0-9]+|#?-?\d+)'
42 register = '%' + identifier + r'\b'
44 tokens = {
45 'root': [
46 include('whitespace'),
47 (identifier + ':', Name.Label),
48 (r'\.' + identifier, Name.Attribute, 'directive-args'),
49 (r'lock|rep(n?z)?|data\d+', Name.Attribute),
50 (identifier, Name.Function, 'instruction-args'),
51 (r'[\r\n]+', Text)
52 ],
53 'directive-args': [
54 (identifier, Name.Constant),
55 (string, String),
56 ('@' + identifier, Name.Attribute),
57 (number, Number.Integer),
58 (register, Name.Variable),
59 (r'[\r\n]+', Whitespace, '#pop'),
60 (r'([;#]|//).*?\n', Comment.Single, '#pop'),
61 (r'/[*].*?[*]/', Comment.Multiline),
62 (r'/[*].*?\n[\w\W]*?[*]/', Comment.Multiline, '#pop'),
64 include('punctuation'),
65 include('whitespace')
66 ],
67 'instruction-args': [
68 # For objdump-disassembled code, shouldn't occur in
69 # actual assembler input
70 ('([a-z0-9]+)( )(<)('+identifier+')(>)',
71 bygroups(Number.Hex, Text, Punctuation, Name.Constant,
72 Punctuation)),
73 ('([a-z0-9]+)( )(<)('+identifier+')([-+])('+number+')(>)',
74 bygroups(Number.Hex, Text, Punctuation, Name.Constant,
75 Punctuation, Number.Integer, Punctuation)),
77 # Address constants
78 (identifier, Name.Constant),
79 (number, Number.Integer),
80 # Registers
81 (register, Name.Variable),
82 # Numeric constants
83 ('$'+number, Number.Integer),
84 (r"$'(.|\\')'", String.Char),
85 (r'[\r\n]+', Whitespace, '#pop'),
86 (r'([;#]|//).*?\n', Comment.Single, '#pop'),
87 (r'/[*].*?[*]/', Comment.Multiline),
88 (r'/[*].*?\n[\w\W]*?[*]/', Comment.Multiline, '#pop'),
90 include('punctuation'),
91 include('whitespace')
92 ],
93 'whitespace': [
94 (r'\n', Whitespace),
95 (r'\s+', Whitespace),
96 (r'([;#]|//).*?\n', Comment.Single),
97 (r'/[*][\w\W]*?[*]/', Comment.Multiline)
98 ],
99 'punctuation': [
100 (r'[-*,.()\[\]!:{}]+', Punctuation)
101 ]
102 }
104 def analyse_text(text):
105 if re.search(r'^\.(text|data|section)', text, re.M):
106 return True
107 elif re.search(r'^\.\w+', text, re.M):
108 return 0.1
111def _objdump_lexer_tokens(asm_lexer):
112 """
113 Common objdump lexer tokens to wrap an ASM lexer.
114 """
115 hex_re = r'[0-9A-Za-z]'
116 return {
117 'root': [
118 # File name & format:
119 ('(.*?)(:)( +file format )(.*?)$',
120 bygroups(Name.Label, Punctuation, Text, String)),
121 # Section header
122 ('(Disassembly of section )(.*?)(:)$',
123 bygroups(Text, Name.Label, Punctuation)),
124 # Function labels
125 # (With offset)
126 ('('+hex_re+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$',
127 bygroups(Number.Hex, Whitespace, Punctuation, Name.Function,
128 Punctuation, Number.Hex, Punctuation)),
129 # (Without offset)
130 ('('+hex_re+'+)( )(<)(.*?)(>:)$',
131 bygroups(Number.Hex, Whitespace, Punctuation, Name.Function,
132 Punctuation)),
133 # Code line with disassembled instructions
134 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *\t)([a-zA-Z].*?)$',
135 bygroups(Whitespace, Name.Label, Whitespace, Number.Hex, Whitespace,
136 using(asm_lexer))),
137 # Code line without raw instructions (objdump --no-show-raw-insn)
138 ('( *)('+hex_re+r'+:)( *\t)([a-zA-Z].*?)$',
139 bygroups(Whitespace, Name.Label, Whitespace,
140 using(asm_lexer))),
141 # Code line with ascii
142 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *)(.*?)$',
143 bygroups(Whitespace, Name.Label, Whitespace, Number.Hex, Whitespace, String)),
144 # Continued code line, only raw opcodes without disassembled
145 # instruction
146 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)$',
147 bygroups(Whitespace, Name.Label, Whitespace, Number.Hex)),
148 # Skipped a few bytes
149 (r'\t\.\.\.$', Text),
150 # Relocation line
151 # (With offset)
152 (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x'+hex_re+'+)$',
153 bygroups(Whitespace, Name.Label, Whitespace, Name.Property, Whitespace,
154 Name.Constant, Punctuation, Number.Hex)),
155 # (Without offset)
156 (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)$',
157 bygroups(Whitespace, Name.Label, Whitespace, Name.Property, Whitespace,
158 Name.Constant)),
159 (r'[^\n]+\n', Other)
160 ]
161 }
164class ObjdumpLexer(RegexLexer):
165 """
166 For the output of ``objdump -dr``.
167 """
168 name = 'objdump'
169 aliases = ['objdump']
170 filenames = ['*.objdump']
171 mimetypes = ['text/x-objdump']
172 url = 'https://www.gnu.org/software/binutils'
173 version_added = ''
175 tokens = _objdump_lexer_tokens(GasLexer)
178class DObjdumpLexer(DelegatingLexer):
179 """
180 For the output of ``objdump -Sr`` on compiled D files.
181 """
182 name = 'd-objdump'
183 aliases = ['d-objdump']
184 filenames = ['*.d-objdump']
185 mimetypes = ['text/x-d-objdump']
186 url = 'https://www.gnu.org/software/binutils'
187 version_added = ''
189 def __init__(self, **options):
190 super().__init__(DLexer, ObjdumpLexer, **options)
193class CppObjdumpLexer(DelegatingLexer):
194 """
195 For the output of ``objdump -Sr`` on compiled C++ files.
196 """
197 name = 'cpp-objdump'
198 aliases = ['cpp-objdump', 'c++-objdumb', 'cxx-objdump']
199 filenames = ['*.cpp-objdump', '*.c++-objdump', '*.cxx-objdump']
200 mimetypes = ['text/x-cpp-objdump']
201 url = 'https://www.gnu.org/software/binutils'
202 version_added = ''
204 def __init__(self, **options):
205 super().__init__(CppLexer, ObjdumpLexer, **options)
208class CObjdumpLexer(DelegatingLexer):
209 """
210 For the output of ``objdump -Sr`` on compiled C files.
211 """
212 name = 'c-objdump'
213 aliases = ['c-objdump']
214 filenames = ['*.c-objdump']
215 mimetypes = ['text/x-c-objdump']
216 url = 'https://www.gnu.org/software/binutils'
217 version_added = ''
220 def __init__(self, **options):
221 super().__init__(CLexer, ObjdumpLexer, **options)
224class HsailLexer(RegexLexer):
225 """
226 For HSAIL assembly code.
227 """
228 name = 'HSAIL'
229 aliases = ['hsail', 'hsa']
230 filenames = ['*.hsail']
231 mimetypes = ['text/x-hsail']
232 url = 'https://en.wikipedia.org/wiki/Heterogeneous_System_Architecture#HSA_Intermediate_Layer'
233 version_added = '2.2'
235 string = r'"[^"]*?"'
236 identifier = r'[a-zA-Z_][\w.]*'
237 # Registers
238 register_number = r'[0-9]+'
239 register = r'(\$(c|s|d|q)' + register_number + r')\b'
240 # Qualifiers
241 alignQual = r'(align\(\d+\))'
242 widthQual = r'(width\((\d+|all)\))'
243 allocQual = r'(alloc\(agent\))'
244 # Instruction Modifiers
245 roundingMod = (r'((_ftz)?(_up|_down|_zero|_near))')
246 datatypeMod = (r'_('
247 # packedTypes
248 r'u8x4|s8x4|u16x2|s16x2|u8x8|s8x8|u16x4|s16x4|u32x2|s32x2|'
249 r'u8x16|s8x16|u16x8|s16x8|u32x4|s32x4|u64x2|s64x2|'
250 r'f16x2|f16x4|f16x8|f32x2|f32x4|f64x2|'
251 # baseTypes
252 r'u8|s8|u16|s16|u32|s32|u64|s64|'
253 r'b128|b8|b16|b32|b64|b1|'
254 r'f16|f32|f64|'
255 # opaqueType
256 r'roimg|woimg|rwimg|samp|sig32|sig64)')
258 # Numeric Constant
259 float = r'((\d+\.)|(\d*\.\d+))[eE][+-]?\d+'
260 hexfloat = r'0[xX](([0-9a-fA-F]+\.[0-9a-fA-F]*)|([0-9a-fA-F]*\.[0-9a-fA-F]+))[pP][+-]?\d+'
261 ieeefloat = r'0((h|H)[0-9a-fA-F]{4}|(f|F)[0-9a-fA-F]{8}|(d|D)[0-9a-fA-F]{16})'
263 tokens = {
264 'root': [
265 include('whitespace'),
266 include('comments'),
268 (string, String),
270 (r'@' + identifier + ':?', Name.Label),
272 (register, Name.Variable.Anonymous),
274 include('keyword'),
276 (r'&' + identifier, Name.Variable.Global),
277 (r'%' + identifier, Name.Variable),
279 (hexfloat, Number.Hex),
280 (r'0[xX][a-fA-F0-9]+', Number.Hex),
281 (ieeefloat, Number.Float),
282 (float, Number.Float),
283 (r'\d+', Number.Integer),
285 (r'[=<>{}\[\]()*.,:;!]|x\b', Punctuation)
286 ],
287 'whitespace': [
288 (r'(\n|\s)+', Whitespace),
289 ],
290 'comments': [
291 (r'/\*.*?\*/', Comment.Multiline),
292 (r'//.*?\n', Comment.Single),
293 ],
294 'keyword': [
295 # Types
296 (r'kernarg' + datatypeMod, Keyword.Type),
298 # Regular keywords
299 (r'\$(full|base|small|large|default|zero|near)', Keyword),
300 (words((
301 'module', 'extension', 'pragma', 'prog', 'indirect', 'signature',
302 'decl', 'kernel', 'function', 'enablebreakexceptions',
303 'enabledetectexceptions', 'maxdynamicgroupsize', 'maxflatgridsize',
304 'maxflatworkgroupsize', 'requireddim', 'requiredgridsize',
305 'requiredworkgroupsize', 'requirenopartialworkgroups'),
306 suffix=r'\b'), Keyword),
308 # instructions
309 (roundingMod, Keyword),
310 (datatypeMod, Keyword),
311 (r'_(' + alignQual + '|' + widthQual + ')', Keyword),
312 (r'_kernarg', Keyword),
313 (r'(nop|imagefence)\b', Keyword),
314 (words((
315 'cleardetectexcept', 'clock', 'cuid', 'debugtrap', 'dim',
316 'getdetectexcept', 'groupbaseptr', 'kernargbaseptr', 'laneid',
317 'maxcuid', 'maxwaveid', 'packetid', 'setdetectexcept', 'waveid',
318 'workitemflatabsid', 'workitemflatid', 'nullptr', 'abs', 'bitrev',
319 'currentworkgroupsize', 'currentworkitemflatid', 'fract', 'ncos',
320 'neg', 'nexp2', 'nlog2', 'nrcp', 'nrsqrt', 'nsin', 'nsqrt',
321 'gridgroups', 'gridsize', 'not', 'sqrt', 'workgroupid',
322 'workgroupsize', 'workitemabsid', 'workitemid', 'ceil', 'floor',
323 'rint', 'trunc', 'add', 'bitmask', 'borrow', 'carry', 'copysign',
324 'div', 'rem', 'sub', 'shl', 'shr', 'and', 'or', 'xor', 'unpackhi',
325 'unpacklo', 'max', 'min', 'fma', 'mad', 'bitextract', 'bitselect',
326 'shuffle', 'cmov', 'bitalign', 'bytealign', 'lerp', 'nfma', 'mul',
327 'mulhi', 'mul24hi', 'mul24', 'mad24', 'mad24hi', 'bitinsert',
328 'combine', 'expand', 'lda', 'mov', 'pack', 'unpack', 'packcvt',
329 'unpackcvt', 'sad', 'sementp', 'ftos', 'stof', 'cmp', 'ld', 'st',
330 '_eq', '_ne', '_lt', '_le', '_gt', '_ge', '_equ', '_neu', '_ltu',
331 '_leu', '_gtu', '_geu', '_num', '_nan', '_seq', '_sne', '_slt',
332 '_sle', '_sgt', '_sge', '_snum', '_snan', '_sequ', '_sneu', '_sltu',
333 '_sleu', '_sgtu', '_sgeu', 'atomic', '_ld', '_st', '_cas', '_add',
334 '_and', '_exch', '_max', '_min', '_or', '_sub', '_wrapdec',
335 '_wrapinc', '_xor', 'ret', 'cvt', '_readonly', '_kernarg', '_global',
336 'br', 'cbr', 'sbr', '_scacq', '_screl', '_scar', '_rlx', '_wave',
337 '_wg', '_agent', '_system', 'ldimage', 'stimage', '_v2', '_v3', '_v4',
338 '_1d', '_2d', '_3d', '_1da', '_2da', '_1db', '_2ddepth', '_2dadepth',
339 '_width', '_height', '_depth', '_array', '_channelorder',
340 '_channeltype', 'querysampler', '_coord', '_filter', '_addressing',
341 'barrier', 'wavebarrier', 'initfbar', 'joinfbar', 'waitfbar',
342 'arrivefbar', 'leavefbar', 'releasefbar', 'ldf', 'activelaneid',
343 'activelanecount', 'activelanemask', 'activelanepermute', 'call',
344 'scall', 'icall', 'alloca', 'packetcompletionsig',
345 'addqueuewriteindex', 'casqueuewriteindex', 'ldqueuereadindex',
346 'stqueuereadindex', 'readonly', 'global', 'private', 'group',
347 'spill', 'arg', '_upi', '_downi', '_zeroi', '_neari', '_upi_sat',
348 '_downi_sat', '_zeroi_sat', '_neari_sat', '_supi', '_sdowni',
349 '_szeroi', '_sneari', '_supi_sat', '_sdowni_sat', '_szeroi_sat',
350 '_sneari_sat', '_pp', '_ps', '_sp', '_ss', '_s', '_p', '_pp_sat',
351 '_ps_sat', '_sp_sat', '_ss_sat', '_s_sat', '_p_sat')), Keyword),
353 # Integer types
354 (r'i[1-9]\d*', Keyword)
355 ]
356 }
359class LlvmLexer(RegexLexer):
360 """
361 For LLVM assembly code.
362 """
363 name = 'LLVM'
364 url = 'https://llvm.org/docs/LangRef.html'
365 aliases = ['llvm']
366 filenames = ['*.ll']
367 mimetypes = ['text/x-llvm']
368 version_added = ''
370 #: optional Comment or Whitespace
371 string = r'"[^"]*?"'
372 identifier = r'([-a-zA-Z$._][\w\-$.]*|' + string + ')'
373 block_label = r'(' + identifier + r'|(\d+))'
375 tokens = {
376 'root': [
377 include('whitespace'),
379 # Before keywords, because keywords are valid label names :(...
380 (block_label + r'\s*:', Name.Label),
382 include('keyword'),
384 (r'%' + identifier, Name.Variable),
385 (r'@' + identifier, Name.Variable.Global),
386 (r'%\d+', Name.Variable.Anonymous),
387 (r'@\d+', Name.Variable.Global),
388 (r'#\d+', Name.Variable.Global),
389 (r'!' + identifier, Name.Variable),
390 (r'!\d+', Name.Variable.Anonymous),
391 (r'c?' + string, String),
393 (r'0[xX][KLMHR]?[a-fA-F0-9]+', Number),
394 (r'-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?', Number),
396 (r'[=<>{}\[\]()*.,!]|x\b', Punctuation)
397 ],
398 'whitespace': [
399 (r'(\n|\s+)+', Whitespace),
400 (r';.*?\n', Comment),
401 (r'/\*', Comment, 'c-comment'),
402 ],
403 'c-comment': [
404 (r'[^*]+', Comment),
405 (r'\*/', Comment, '#pop'),
406 # Consume lone asterisks as non-comment-ending content.
407 (r'\*', Comment),
408 ],
409 'keyword': [
410 # Regular keywords
411 (words((
412 'aarch64_sve_vector_pcs', 'aarch64_vector_pcs', 'acq_rel',
413 'acquire', 'add', 'addrspace', 'addrspacecast', 'afn', 'alias',
414 'aliasee', 'align', 'alignLog2', 'alignstack', 'alloca',
415 'allocsize', 'allOnes', 'alwaysinline', 'alwaysInline',
416 'amdgpu_cs', 'amdgpu_es', 'amdgpu_gfx', 'amdgpu_gs',
417 'amdgpu_hs', 'amdgpu_kernel', 'amdgpu_ls', 'amdgpu_ps',
418 'amdgpu_vs', 'and', 'any', 'anyregcc', 'appending', 'arcp',
419 'argmemonly', 'args', 'arm_aapcs_vfpcc', 'arm_aapcscc',
420 'arm_apcscc', 'ashr', 'asm', 'atomic', 'atomicrmw',
421 'attributes', 'available_externally', 'avr_intrcc',
422 'avr_signalcc', 'bit', 'bitcast', 'bitMask', 'blockaddress',
423 'blockcount', 'br', 'branchFunnel', 'builtin', 'byArg',
424 'byref', 'byte', 'byteArray', 'byval', 'c', 'call', 'callbr',
425 'callee', 'caller', 'calls', 'canAutoHide', 'catch',
426 'catchpad', 'catchret', 'catchswitch', 'cc', 'ccc',
427 'cfguard_checkcc', 'cleanup', 'cleanuppad', 'cleanupret',
428 'cmpxchg', 'cold', 'coldcc', 'comdat', 'common', 'constant',
429 'contract', 'convergent', 'critical', 'cxx_fast_tlscc',
430 'datalayout', 'declare', 'default', 'define', 'deplibs',
431 'dereferenceable', 'dereferenceable_or_null', 'distinct',
432 'dllexport', 'dllimport', 'dso_local', 'dso_local_equivalent',
433 'dso_preemptable', 'dsoLocal', 'eq', 'exact', 'exactmatch',
434 'extern_weak', 'external', 'externally_initialized',
435 'extractelement', 'extractvalue', 'fadd', 'false', 'fast',
436 'fastcc', 'fcmp', 'fdiv', 'fence', 'filter', 'flags', 'fmul',
437 'fneg', 'fpext', 'fptosi', 'fptoui', 'fptrunc', 'freeze',
438 'frem', 'from', 'fsub', 'funcFlags', 'function', 'gc',
439 'getelementptr', 'ghccc', 'global', 'guid', 'gv', 'hash',
440 'hhvm_ccc', 'hhvmcc', 'hidden', 'hot', 'hotness', 'icmp',
441 'ifunc', 'inaccessiblemem_or_argmemonly',
442 'inaccessiblememonly', 'inalloca', 'inbounds', 'indir',
443 'indirectbr', 'info', 'initialexec', 'inline', 'inlineBits',
444 'inlinehint', 'inrange', 'inreg', 'insertelement',
445 'insertvalue', 'insts', 'intel_ocl_bicc', 'inteldialect',
446 'internal', 'inttoptr', 'invoke', 'jumptable', 'kind',
447 'landingpad', 'largest', 'linkage', 'linkonce', 'linkonce_odr',
448 'live', 'load', 'local_unnamed_addr', 'localdynamic',
449 'localexec', 'lshr', 'max', 'metadata', 'min', 'minsize',
450 'module', 'monotonic', 'msp430_intrcc', 'mul', 'mustprogress',
451 'musttail', 'naked', 'name', 'nand', 'ne', 'nest', 'ninf',
452 'nnan', 'noalias', 'nobuiltin', 'nocallback', 'nocapture',
453 'nocf_check', 'noduplicate', 'noduplicates', 'nofree',
454 'noimplicitfloat', 'noinline', 'noInline', 'nomerge', 'none',
455 'nonlazybind', 'nonnull', 'noprofile', 'norecurse',
456 'noRecurse', 'noredzone', 'noreturn', 'nosync', 'notail',
457 'notEligibleToImport', 'noundef', 'nounwind', 'nsw',
458 'nsz', 'null', 'null_pointer_is_valid', 'nuw', 'oeq', 'offset',
459 'oge', 'ogt', 'ole', 'olt', 'one', 'opaque', 'optforfuzzing',
460 'optnone', 'optsize', 'or', 'ord', 'param', 'params',
461 'partition', 'path', 'personality', 'phi', 'poison',
462 'preallocated', 'prefix', 'preserve_allcc', 'preserve_mostcc',
463 'private', 'prologue', 'protected', 'ptrtoint', 'ptx_device',
464 'ptx_kernel', 'readnone', 'readNone', 'readonly', 'readOnly',
465 'reassoc', 'refs', 'relbf', 'release', 'resByArg', 'resume',
466 'ret', 'returnDoesNotAlias', 'returned', 'returns_twice',
467 'safestack', 'samesize', 'sanitize_address',
468 'sanitize_hwaddress', 'sanitize_memory', 'sanitize_memtag',
469 'sanitize_thread', 'sdiv', 'section', 'select', 'seq_cst',
470 'sext', 'sge', 'sgt', 'shadowcallstack', 'shl',
471 'shufflevector', 'sideeffect', 'signext', 'single',
472 'singleImpl', 'singleImplName', 'sitofp', 'sizeM1',
473 'sizeM1BitWidth', 'sle', 'slt', 'source_filename',
474 'speculatable', 'speculative_load_hardening', 'spir_func',
475 'spir_kernel', 'splat', 'srem', 'sret', 'ssp', 'sspreq',
476 'sspstrong', 'store', 'strictfp', 'sub', 'summaries',
477 'summary', 'swiftcc', 'swifterror', 'swiftself', 'switch',
478 'syncscope', 'tail', 'tailcc', 'target', 'thread_local', 'to',
479 'token', 'triple', 'true', 'trunc', 'type',
480 'typeCheckedLoadConstVCalls', 'typeCheckedLoadVCalls',
481 'typeid', 'typeidCompatibleVTable', 'typeIdInfo',
482 'typeTestAssumeConstVCalls', 'typeTestAssumeVCalls',
483 'typeTestRes', 'typeTests', 'udiv', 'ueq', 'uge', 'ugt',
484 'uitofp', 'ule', 'ult', 'umax', 'umin', 'undef', 'une',
485 'uniformRetVal', 'uniqueRetVal', 'unknown', 'unnamed_addr',
486 'uno', 'unordered', 'unreachable', 'unsat', 'unwind', 'urem',
487 'uselistorder', 'uselistorder_bb', 'uwtable', 'va_arg',
488 'varFlags', 'variable', 'vcall_visibility', 'vFuncId',
489 'virtFunc', 'virtualConstProp', 'void', 'volatile', 'vscale',
490 'vTableFuncs', 'weak', 'weak_odr', 'webkit_jscc', 'win64cc',
491 'within', 'wpdRes', 'wpdResolutions', 'writeonly', 'x',
492 'x86_64_sysvcc', 'x86_fastcallcc', 'x86_intrcc', 'x86_mmx',
493 'x86_regcallcc', 'x86_stdcallcc', 'x86_thiscallcc',
494 'x86_vectorcallcc', 'xchg', 'xor', 'zeroext',
495 'zeroinitializer', 'zext', 'immarg', 'willreturn'),
496 suffix=r'\b'), Keyword),
498 # Types
499 (words(('void', 'half', 'bfloat', 'float', 'double', 'fp128',
500 'x86_fp80', 'ppc_fp128', 'label', 'metadata', 'x86_mmx',
501 'x86_amx', 'token', 'ptr')),
502 Keyword.Type),
504 # Integer types
505 (r'i[1-9]\d*', Keyword.Type)
506 ]
507 }
510class LlvmMirBodyLexer(RegexLexer):
511 """
512 For LLVM MIR examples without the YAML wrapper.
513 """
514 name = 'LLVM-MIR Body'
515 url = 'https://llvm.org/docs/MIRLangRef.html'
516 aliases = ['llvm-mir-body']
517 filenames = []
518 mimetypes = []
519 version_added = '2.6'
521 tokens = {
522 'root': [
523 # Attributes on basic blocks
524 (words(('liveins', 'successors'), suffix=':'), Keyword),
525 # Basic Block Labels
526 (r'bb\.[0-9]+(\.[a-zA-Z0-9_.-]+)?( \(address-taken\))?:', Name.Label),
527 (r'bb\.[0-9]+ \(%[a-zA-Z0-9_.-]+\)( \(address-taken\))?:', Name.Label),
528 (r'%bb\.[0-9]+(\.\w+)?', Name.Label),
529 # Stack references
530 (r'%stack\.[0-9]+(\.\w+\.addr)?', Name),
531 # Subreg indices
532 (r'%subreg\.\w+', Name),
533 # Virtual registers
534 (r'%[a-zA-Z0-9_]+ *', Name.Variable, 'vreg'),
535 # Reference to LLVM-IR global
536 include('global'),
537 # Reference to Intrinsic
538 (r'intrinsic\(\@[a-zA-Z0-9_.]+\)', Name.Variable.Global),
539 # Comparison predicates
540 (words(('eq', 'ne', 'sgt', 'sge', 'slt', 'sle', 'ugt', 'uge', 'ult',
541 'ule'), prefix=r'intpred\(', suffix=r'\)'), Name.Builtin),
542 (words(('oeq', 'one', 'ogt', 'oge', 'olt', 'ole', 'ugt', 'uge',
543 'ult', 'ule'), prefix=r'floatpred\(', suffix=r'\)'),
544 Name.Builtin),
545 # Physical registers
546 (r'\$\w+', String.Single),
547 # Assignment operator
548 (r'=', Operator),
549 # gMIR Opcodes
550 (r'(G_ANYEXT|G_[SZ]EXT|G_SEXT_INREG|G_TRUNC|G_IMPLICIT_DEF|G_PHI|'
551 r'G_FRAME_INDEX|G_GLOBAL_VALUE|G_INTTOPTR|G_PTRTOINT|G_BITCAST|'
552 r'G_CONSTANT|G_FCONSTANT|G_VASTART|G_VAARG|G_CTLZ|G_CTLZ_ZERO_UNDEF|'
553 r'G_CTTZ|G_CTTZ_ZERO_UNDEF|G_CTPOP|G_BSWAP|G_BITREVERSE|'
554 r'G_ADDRSPACE_CAST|G_BLOCK_ADDR|G_JUMP_TABLE|G_DYN_STACKALLOC|'
555 r'G_ADD|G_SUB|G_MUL|G_[SU]DIV|G_[SU]REM|G_AND|G_OR|G_XOR|G_SHL|'
556 r'G_[LA]SHR|G_[IF]CMP|G_SELECT|G_GEP|G_PTR_MASK|G_SMIN|G_SMAX|'
557 r'G_UMIN|G_UMAX|G_[US]ADDO|G_[US]ADDE|G_[US]SUBO|G_[US]SUBE|'
558 r'G_[US]MULO|G_[US]MULH|G_FNEG|G_FPEXT|G_FPTRUNC|G_FPTO[US]I|'
559 r'G_[US]ITOFP|G_FABS|G_FCOPYSIGN|G_FCANONICALIZE|G_FMINNUM|'
560 r'G_FMAXNUM|G_FMINNUM_IEEE|G_FMAXNUM_IEEE|G_FMINIMUM|G_FMAXIMUM|'
561 r'G_FADD|G_FSUB|G_FMUL|G_FMA|G_FMAD|G_FDIV|G_FREM|G_FPOW|G_FEXP|'
562 r'G_FEXP2|G_FLOG|G_FLOG2|G_FLOG10|G_FCEIL|G_FCOS|G_FSIN|G_FSQRT|'
563 r'G_FFLOOR|G_FRINT|G_FNEARBYINT|G_INTRINSIC_TRUNC|'
564 r'G_INTRINSIC_ROUND|G_LOAD|G_[ZS]EXTLOAD|G_INDEXED_LOAD|'
565 r'G_INDEXED_[ZS]EXTLOAD|G_STORE|G_INDEXED_STORE|'
566 r'G_ATOMIC_CMPXCHG_WITH_SUCCESS|G_ATOMIC_CMPXCHG|'
567 r'G_ATOMICRMW_(XCHG|ADD|SUB|AND|NAND|OR|XOR|MAX|MIN|UMAX|UMIN|FADD|'
568 r'FSUB)'
569 r'|G_FENCE|G_EXTRACT|G_UNMERGE_VALUES|G_INSERT|G_MERGE_VALUES|'
570 r'G_BUILD_VECTOR|G_BUILD_VECTOR_TRUNC|G_CONCAT_VECTORS|'
571 r'G_INTRINSIC|G_INTRINSIC_W_SIDE_EFFECTS|G_BR|G_BRCOND|'
572 r'G_BRINDIRECT|G_BRJT|G_INSERT_VECTOR_ELT|G_EXTRACT_VECTOR_ELT|'
573 r'G_SHUFFLE_VECTOR)\b',
574 Name.Builtin),
575 # Target independent opcodes
576 (r'(COPY|PHI|INSERT_SUBREG|EXTRACT_SUBREG|REG_SEQUENCE)\b',
577 Name.Builtin),
578 # Flags
579 (words(('killed', 'implicit')), Keyword),
580 # ConstantInt values
581 (r'(i[0-9]+)( +)', bygroups(Keyword.Type, Whitespace), 'constantint'),
582 # ConstantFloat values
583 (r'(half|float|double) +', Keyword.Type, 'constantfloat'),
584 # Bare immediates
585 include('integer'),
586 # MMO's
587 (r'(::)( *)', bygroups(Operator, Whitespace), 'mmo'),
588 # MIR Comments
589 (r';.*', Comment),
590 # If we get here, assume it's a target instruction
591 (r'[a-zA-Z0-9_]+', Name),
592 # Everything else that isn't highlighted
593 (r'[(), \n]+', Text),
594 ],
595 # The integer constant from a ConstantInt value
596 'constantint': [
597 include('integer'),
598 (r'(?=.)', Text, '#pop'),
599 ],
600 # The floating point constant from a ConstantFloat value
601 'constantfloat': [
602 include('float'),
603 (r'(?=.)', Text, '#pop'),
604 ],
605 'vreg': [
606 # The bank or class if there is one
607 (r'( *)(:(?!:))', bygroups(Whitespace, Keyword), ('#pop', 'vreg_bank_or_class')),
608 # The LLT if there is one
609 (r'( *)(\()', bygroups(Whitespace, Text), 'vreg_type'),
610 (r'(?=.)', Text, '#pop'),
611 ],
612 'vreg_bank_or_class': [
613 # The unassigned bank/class
614 (r'( *)(_)', bygroups(Whitespace, Name.Variable.Magic)),
615 (r'( *)([a-zA-Z0-9_]+)', bygroups(Whitespace, Name.Variable)),
616 # The LLT if there is one
617 (r'( *)(\()', bygroups(Whitespace, Text), 'vreg_type'),
618 (r'(?=.)', Text, '#pop'),
619 ],
620 'vreg_type': [
621 # Scalar and pointer types
622 (r'( *)([sp][0-9]+)', bygroups(Whitespace, Keyword.Type)),
623 (r'( *)(<[0-9]+ *x *[sp][0-9]+>)', bygroups(Whitespace, Keyword.Type)),
624 (r'\)', Text, '#pop'),
625 (r'(?=.)', Text, '#pop'),
626 ],
627 'mmo': [
628 (r'\(', Text),
629 (r' +', Whitespace),
630 (words(('load', 'store', 'on', 'into', 'from', 'align', 'monotonic',
631 'acquire', 'release', 'acq_rel', 'seq_cst')),
632 Keyword),
633 # IR references
634 (r'%ir\.[a-zA-Z0-9_.-]+', Name),
635 (r'%ir-block\.[a-zA-Z0-9_.-]+', Name),
636 (r'[-+]', Operator),
637 include('integer'),
638 include('global'),
639 (r',', Punctuation),
640 (r'\), \(', Text),
641 (r'\)', Text, '#pop'),
642 ],
643 'integer': [(r'-?[0-9]+', Number.Integer),],
644 'float': [(r'-?[0-9]+\.[0-9]+(e[+-][0-9]+)?', Number.Float)],
645 'global': [(r'\@[a-zA-Z0-9_.]+', Name.Variable.Global)],
646 }
649class LlvmMirLexer(RegexLexer):
650 """
651 Lexer for the overall LLVM MIR document format.
653 MIR is a human readable serialization format that's used to represent LLVM's
654 machine specific intermediate representation. It allows LLVM's developers to
655 see the state of the compilation process at various points, as well as test
656 individual pieces of the compiler.
657 """
658 name = 'LLVM-MIR'
659 url = 'https://llvm.org/docs/MIRLangRef.html'
660 aliases = ['llvm-mir']
661 filenames = ['*.mir']
662 version_added = '2.6'
664 tokens = {
665 'root': [
666 # Comments are hashes at the YAML level
667 (r'#.*', Comment),
668 # Documents starting with | are LLVM-IR
669 (r'--- \|$', Keyword, 'llvm_ir'),
670 # Other documents are MIR
671 (r'---', Keyword, 'llvm_mir'),
672 # Consume everything else in one token for efficiency
673 (r'[^-#]+|.', Text),
674 ],
675 'llvm_ir': [
676 # Documents end with '...' or '---'
677 (r'(\.\.\.|(?=---))', Keyword, '#pop'),
678 # Delegate to the LlvmLexer
679 (r'((?:.|\n)+?)(?=(\.\.\.|---))', bygroups(using(LlvmLexer))),
680 ],
681 'llvm_mir': [
682 # Comments are hashes at the YAML level
683 (r'#.*', Comment),
684 # Documents end with '...' or '---'
685 (r'(\.\.\.|(?=---))', Keyword, '#pop'),
686 # Handle the simple attributes
687 (r'name:', Keyword, 'name'),
688 (words(('alignment', ),
689 suffix=':'), Keyword, 'number'),
690 (words(('legalized', 'regBankSelected', 'tracksRegLiveness',
691 'selected', 'exposesReturnsTwice'),
692 suffix=':'), Keyword, 'boolean'),
693 # Handle the attributes don't highlight inside
694 (words(('registers', 'stack', 'fixedStack', 'liveins', 'frameInfo',
695 'machineFunctionInfo'),
696 suffix=':'), Keyword),
697 # Delegate the body block to the LlvmMirBodyLexer
698 (r'body: *\|', Keyword, 'llvm_mir_body'),
699 # Consume everything else
700 (r'.+', Text),
701 (r'\n', Whitespace),
702 ],
703 'name': [
704 (r'[^\n]+', Name),
705 default('#pop'),
706 ],
707 'boolean': [
708 (r' *(true|false)', Name.Builtin),
709 default('#pop'),
710 ],
711 'number': [
712 (r' *[0-9]+', Number),
713 default('#pop'),
714 ],
715 'llvm_mir_body': [
716 # Documents end with '...' or '---'.
717 # We have to pop llvm_mir_body and llvm_mir
718 (r'(\.\.\.|(?=---))', Keyword, '#pop:2'),
719 # Delegate the body block to the LlvmMirBodyLexer
720 (r'((?:.|\n)+?)(?=\.\.\.|---)', bygroups(using(LlvmMirBodyLexer))),
721 # The '...' is optional. If we didn't already find it then it isn't
722 # there. There might be a '---' instead though.
723 (r'(?!\.\.\.|---)((?:.|\n)+)', bygroups(using(LlvmMirBodyLexer))),
724 ],
725 }
728class NasmLexer(RegexLexer):
729 """
730 For Nasm (Intel) assembly code.
731 """
732 name = 'NASM'
733 aliases = ['nasm']
734 filenames = ['*.asm', '*.ASM', '*.nasm']
735 mimetypes = ['text/x-nasm']
736 url = 'https://nasm.us'
737 version_added = ''
739 # Tasm uses the same file endings, but TASM is not as common as NASM, so
740 # we prioritize NASM higher by default
741 priority = 1.0
743 identifier = r'[a-z$._?][\w$.?#@~]*'
744 hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)'
745 octn = r'[0-7]+q'
746 binn = r'[01]+b'
747 decn = r'[0-9]+'
748 floatn = decn + r'\.e?' + decn
749 string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`"
750 declkw = r'(?:res|d)[bwdqt]|times'
751 register = (r'(r[0-9][0-5]?[bwd]?|'
752 r'[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|'
753 r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]|k[0-7]|'
754 r'[xyz]mm(?:[12][0-9]?|3[01]?|[04-9]))\b')
755 wordop = r'seg|wrt|strict|rel|abs'
756 type = r'byte|[dq]?word'
757 # Directives must be followed by whitespace, otherwise CPU will match
758 # cpuid for instance.
759 directives = (r'(?:BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|'
760 r'ORG|ALIGN|STRUC|ENDSTRUC|COMMON|CPU|GROUP|UPPERCASE|IMPORT|'
761 r'EXPORT|LIBRARY|MODULE)(?=\s)')
763 flags = re.IGNORECASE | re.MULTILINE
764 tokens = {
765 'root': [
766 (r'^\s*%', Comment.Preproc, 'preproc'),
767 include('whitespace'),
768 (identifier + ':', Name.Label),
769 (rf'({identifier})(\s+)(equ)',
770 bygroups(Name.Constant, Whitespace, Keyword.Declaration),
771 'instruction-args'),
772 (directives, Keyword, 'instruction-args'),
773 (declkw, Keyword.Declaration, 'instruction-args'),
774 (identifier, Name.Function, 'instruction-args'),
775 (r'[\r\n]+', Whitespace)
776 ],
777 'instruction-args': [
778 (string, String),
779 (hexn, Number.Hex),
780 (octn, Number.Oct),
781 (binn, Number.Bin),
782 (floatn, Number.Float),
783 (decn, Number.Integer),
784 include('punctuation'),
785 (register, Name.Builtin),
786 (identifier, Name.Variable),
787 (r'[\r\n]+', Whitespace, '#pop'),
788 include('whitespace')
789 ],
790 'preproc': [
791 (r'[^;\n]+', Comment.Preproc),
792 (r';.*?\n', Comment.Single, '#pop'),
793 (r'\n', Comment.Preproc, '#pop'),
794 ],
795 'whitespace': [
796 (r'\n', Whitespace),
797 (r'[ \t]+', Whitespace),
798 (r';.*', Comment.Single),
799 (r'#.*', Comment.Single)
800 ],
801 'punctuation': [
802 (r'[,{}():\[\]]+', Punctuation),
803 (r'[&|^<>+*/%~-]+', Operator),
804 (r'[$]+', Keyword.Constant),
805 (wordop, Operator.Word),
806 (type, Keyword.Type)
807 ],
808 }
810 def analyse_text(text):
811 # Probably TASM
812 if re.match(r'PROC', text, re.IGNORECASE):
813 return False
816class NasmObjdumpLexer(ObjdumpLexer):
817 """
818 For the output of ``objdump -d -M intel``.
819 """
820 name = 'objdump-nasm'
821 aliases = ['objdump-nasm']
822 filenames = ['*.objdump-intel']
823 mimetypes = ['text/x-nasm-objdump']
824 url = 'https://www.gnu.org/software/binutils'
825 version_added = '2.0'
827 tokens = _objdump_lexer_tokens(NasmLexer)
830class TasmLexer(RegexLexer):
831 """
832 For Tasm (Turbo Assembler) assembly code.
833 """
834 name = 'TASM'
835 aliases = ['tasm']
836 filenames = ['*.asm', '*.ASM', '*.tasm']
837 mimetypes = ['text/x-tasm']
838 url = 'https://en.wikipedia.org/wiki/Turbo_Assembler'
839 version_added = ''
841 identifier = r'[@a-z$._?][\w$.?#@~]*'
842 hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)'
843 octn = r'[0-7]+q'
844 binn = r'[01]+b'
845 decn = r'[0-9]+'
846 floatn = decn + r'\.e?' + decn
847 string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`"
848 declkw = r'(?:res|d)[bwdqt]|times'
849 register = (r'(r[0-9][0-5]?[bwd]|'
850 r'[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|'
851 r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7])\b')
852 wordop = r'seg|wrt|strict'
853 type = r'byte|[dq]?word'
854 directives = (r'BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|'
855 r'ORG|ALIGN|STRUC|ENDSTRUC|ENDS|COMMON|CPU|GROUP|UPPERCASE|INCLUDE|'
856 r'EXPORT|LIBRARY|MODULE|PROC|ENDP|USES|ARG|DATASEG|UDATASEG|END|IDEAL|'
857 r'P386|MODEL|ASSUME|CODESEG|SIZE')
858 # T[A-Z][a-z] is more of a convention. Lexer should filter out STRUC definitions
859 # and then 'add' them to datatype somehow.
860 datatype = (r'db|dd|dw|T[A-Z][a-z]+')
862 flags = re.IGNORECASE | re.MULTILINE
863 tokens = {
864 'root': [
865 (r'^\s*%', Comment.Preproc, 'preproc'),
866 include('whitespace'),
867 (identifier + ':', Name.Label),
868 (directives, Keyword, 'instruction-args'),
869 (rf'({identifier})(\s+)({datatype})',
870 bygroups(Name.Constant, Whitespace, Keyword.Declaration),
871 'instruction-args'),
872 (declkw, Keyword.Declaration, 'instruction-args'),
873 (identifier, Name.Function, 'instruction-args'),
874 (r'[\r\n]+', Whitespace)
875 ],
876 'instruction-args': [
877 (string, String),
878 (hexn, Number.Hex),
879 (octn, Number.Oct),
880 (binn, Number.Bin),
881 (floatn, Number.Float),
882 (decn, Number.Integer),
883 include('punctuation'),
884 (register, Name.Builtin),
885 (identifier, Name.Variable),
886 # Do not match newline when it's preceded by a backslash
887 (r'(\\)(\s*)(;.*)([\r\n])',
888 bygroups(Text, Whitespace, Comment.Single, Whitespace)),
889 (r'[\r\n]+', Whitespace, '#pop'),
890 include('whitespace')
891 ],
892 'preproc': [
893 (r'[^;\n]+', Comment.Preproc),
894 (r';.*?\n', Comment.Single, '#pop'),
895 (r'\n', Comment.Preproc, '#pop'),
896 ],
897 'whitespace': [
898 (r'[\n\r]', Whitespace),
899 (r'(\\)([\n\r])', bygroups(Text, Whitespace)),
900 (r'[ \t]+', Whitespace),
901 (r';.*', Comment.Single)
902 ],
903 'punctuation': [
904 (r'[,():\[\]]+', Punctuation),
905 (r'[&|^<>+*=/%~-]+', Operator),
906 (r'[$]+', Keyword.Constant),
907 (wordop, Operator.Word),
908 (type, Keyword.Type)
909 ],
910 }
912 def analyse_text(text):
913 # See above
914 if re.match(r'PROC', text, re.I):
915 return True
918class Ca65Lexer(RegexLexer):
919 """
920 For ca65 assembler sources.
921 """
922 name = 'ca65 assembler'
923 aliases = ['ca65']
924 filenames = ['*.s']
925 url = 'https://cc65.github.io'
926 version_added = '1.6'
928 flags = re.IGNORECASE
930 tokens = {
931 'root': [
932 (r';.*', Comment.Single),
933 (r'\s+', Whitespace),
934 (r'[a-z_.@$][\w.@$]*:', Name.Label),
935 (r'((ld|st)[axy]|(in|de)[cxy]|asl|lsr|ro[lr]|adc|sbc|cmp|cp[xy]'
936 r'|cl[cvdi]|se[cdi]|jmp|jsr|bne|beq|bpl|bmi|bvc|bvs|bcc|bcs'
937 r'|p[lh][ap]|rt[is]|brk|nop|ta[xy]|t[xy]a|txs|tsx|and|ora|eor'
938 r'|bit)\b', Keyword),
939 (r'\.\w+', Keyword.Pseudo),
940 (r'[-+~*/^&|!<>=]', Operator),
941 (r'"[^"\n]*.', String),
942 (r"'[^'\n]*.", String.Char),
943 (r'\$[0-9a-f]+|[0-9a-f]+h\b', Number.Hex),
944 (r'\d+', Number.Integer),
945 (r'%[01]+', Number.Bin),
946 (r'[#,.:()=\[\]]', Punctuation),
947 (r'[a-z_.@$][\w.@$]*', Name),
948 ]
949 }
951 def analyse_text(self, text):
952 # comments in GAS start with "#"
953 if re.search(r'^\s*;', text, re.MULTILINE):
954 return 0.9
957class Dasm16Lexer(RegexLexer):
958 """
959 For DCPU-16 Assembly.
960 """
961 name = 'DASM16'
962 url = 'http://0x10c.com/doc/dcpu-16.txt'
963 aliases = ['dasm16']
964 filenames = ['*.dasm16', '*.dasm']
965 mimetypes = ['text/x-dasm16']
966 version_added = '2.4'
968 INSTRUCTIONS = [
969 'SET',
970 'ADD', 'SUB',
971 'MUL', 'MLI',
972 'DIV', 'DVI',
973 'MOD', 'MDI',
974 'AND', 'BOR', 'XOR',
975 'SHR', 'ASR', 'SHL',
976 'IFB', 'IFC', 'IFE', 'IFN', 'IFG', 'IFA', 'IFL', 'IFU',
977 'ADX', 'SBX',
978 'STI', 'STD',
979 'JSR',
980 'INT', 'IAG', 'IAS', 'RFI', 'IAQ', 'HWN', 'HWQ', 'HWI',
981 ]
983 REGISTERS = [
984 'A', 'B', 'C',
985 'X', 'Y', 'Z',
986 'I', 'J',
987 'SP', 'PC', 'EX',
988 'POP', 'PEEK', 'PUSH'
989 ]
991 # Regexes yo
992 char = r'[a-zA-Z0-9_$@.]'
993 identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)'
994 number = r'[+-]?(?:0[xX][a-zA-Z0-9]+|\d+)'
995 binary_number = r'0b[01_]+'
996 instruction = r'(?i)(' + '|'.join(INSTRUCTIONS) + ')'
997 single_char = r"'\\?" + char + "'"
998 string = r'"(\\"|[^"])*"'
1000 def guess_identifier(lexer, match):
1001 ident = match.group(0)
1002 klass = Name.Variable if ident.upper() in lexer.REGISTERS else Name.Label
1003 yield match.start(), klass, ident
1005 tokens = {
1006 'root': [
1007 include('whitespace'),
1008 (':' + identifier, Name.Label),
1009 (identifier + ':', Name.Label),
1010 (instruction, Name.Function, 'instruction-args'),
1011 (r'\.' + identifier, Name.Function, 'data-args'),
1012 (r'[\r\n]+', Whitespace)
1013 ],
1015 'numeric' : [
1016 (binary_number, Number.Integer),
1017 (number, Number.Integer),
1018 (single_char, String),
1019 ],
1021 'arg' : [
1022 (identifier, guess_identifier),
1023 include('numeric')
1024 ],
1026 'deref' : [
1027 (r'\+', Punctuation),
1028 (r'\]', Punctuation, '#pop'),
1029 include('arg'),
1030 include('whitespace')
1031 ],
1033 'instruction-line' : [
1034 (r'[\r\n]+', Whitespace, '#pop'),
1035 (r';.*?$', Comment, '#pop'),
1036 include('whitespace')
1037 ],
1039 'instruction-args': [
1040 (r',', Punctuation),
1041 (r'\[', Punctuation, 'deref'),
1042 include('arg'),
1043 include('instruction-line')
1044 ],
1046 'data-args' : [
1047 (r',', Punctuation),
1048 include('numeric'),
1049 (string, String),
1050 include('instruction-line')
1051 ],
1053 'whitespace': [
1054 (r'\n', Whitespace),
1055 (r'\s+', Whitespace),
1056 (r';.*?\n', Comment)
1057 ],
1058 }