Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/asm.py: 94%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

199 statements  

1""" 

2 pygments.lexers.asm 

3 ~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for assembly languages. 

6 

7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12 

13from pygments.lexer import RegexLexer, include, bygroups, using, words, \ 

14 DelegatingLexer, default 

15from pygments.lexers.c_cpp import CppLexer, CLexer 

16from pygments.lexers.d import DLexer 

17from pygments.token import Text, Name, Number, String, Comment, Punctuation, \ 

18 Other, Keyword, Operator, Whitespace 

19 

20__all__ = ['GasLexer', 'ObjdumpLexer', 'DObjdumpLexer', 'CppObjdumpLexer', 

21 'CObjdumpLexer', 'HsailLexer', 'LlvmLexer', 'LlvmMirBodyLexer', 

22 'LlvmMirLexer', 'NasmLexer', 'NasmObjdumpLexer', 'TasmLexer', 

23 'Ca65Lexer', 'Dasm16Lexer'] 

24 

25 

26class GasLexer(RegexLexer): 

27 """ 

28 For Gas (AT&T) assembly code. 

29 """ 

30 name = 'GAS' 

31 aliases = ['gas', 'asm'] 

32 filenames = ['*.s', '*.S'] 

33 mimetypes = ['text/x-gas'] 

34 url = 'https://www.gnu.org/software/binutils' 

35 version_added = '' 

36 

37 #: optional Comment or Whitespace 

38 string = r'"(\\"|[^"])*"' 

39 char = r'[\w$.@-]' 

40 identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)' 

41 number = r'(?:0[xX][a-fA-F0-9]+|#?-?\d+)' 

42 register = '%' + identifier + r'\b' 

43 

44 tokens = { 

45 'root': [ 

46 include('whitespace'), 

47 (identifier + ':', Name.Label), 

48 (r'\.' + identifier, Name.Attribute, 'directive-args'), 

49 (r'lock|rep(n?z)?|data\d+', Name.Attribute), 

50 (identifier, Name.Function, 'instruction-args'), 

51 (r'[\r\n]+', Text) 

52 ], 

53 'directive-args': [ 

54 (identifier, Name.Constant), 

55 (string, String), 

56 ('@' + identifier, Name.Attribute), 

57 (number, Number.Integer), 

58 (register, Name.Variable), 

59 (r'[\r\n]+', Whitespace, '#pop'), 

60 (r'([;#]|//).*?\n', Comment.Single, '#pop'), 

61 (r'/[*].*?[*]/', Comment.Multiline), 

62 (r'/[*].*?\n[\w\W]*?[*]/', Comment.Multiline, '#pop'), 

63 

64 include('punctuation'), 

65 include('whitespace') 

66 ], 

67 'instruction-args': [ 

68 # For objdump-disassembled code, shouldn't occur in 

69 # actual assembler input 

70 ('([a-z0-9]+)( )(<)('+identifier+')(>)', 

71 bygroups(Number.Hex, Text, Punctuation, Name.Constant, 

72 Punctuation)), 

73 ('([a-z0-9]+)( )(<)('+identifier+')([-+])('+number+')(>)', 

74 bygroups(Number.Hex, Text, Punctuation, Name.Constant, 

75 Punctuation, Number.Integer, Punctuation)), 

76 

77 # Address constants 

78 (identifier, Name.Constant), 

79 (number, Number.Integer), 

80 # Registers 

81 (register, Name.Variable), 

82 # Numeric constants 

83 ('$'+number, Number.Integer), 

84 (r"$'(.|\\')'", String.Char), 

85 (r'[\r\n]+', Whitespace, '#pop'), 

86 (r'([;#]|//).*?\n', Comment.Single, '#pop'), 

87 (r'/[*].*?[*]/', Comment.Multiline), 

88 (r'/[*].*?\n[\w\W]*?[*]/', Comment.Multiline, '#pop'), 

89 

90 include('punctuation'), 

91 include('whitespace') 

92 ], 

93 'whitespace': [ 

94 (r'\n', Whitespace), 

95 (r'\s+', Whitespace), 

96 (r'([;#]|//).*?\n', Comment.Single), 

97 (r'/[*][\w\W]*?[*]/', Comment.Multiline) 

98 ], 

99 'punctuation': [ 

100 (r'[-*,.()\[\]!:{}]+', Punctuation) 

101 ] 

102 } 

103 

104 def analyse_text(text): 

105 if re.search(r'^\.(text|data|section)', text, re.M): 

106 return True 

107 elif re.search(r'^\.\w+', text, re.M): 

108 return 0.1 

109 

110 

111def _objdump_lexer_tokens(asm_lexer): 

112 """ 

113 Common objdump lexer tokens to wrap an ASM lexer. 

114 """ 

115 hex_re = r'[0-9A-Za-z]' 

116 return { 

117 'root': [ 

118 # File name & format: 

119 ('(.*?)(:)( +file format )(.*?)$', 

120 bygroups(Name.Label, Punctuation, Text, String)), 

121 # Section header 

122 ('(Disassembly of section )(.*?)(:)$', 

123 bygroups(Text, Name.Label, Punctuation)), 

124 # Function labels 

125 # (With offset) 

126 ('('+hex_re+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$', 

127 bygroups(Number.Hex, Whitespace, Punctuation, Name.Function, 

128 Punctuation, Number.Hex, Punctuation)), 

129 # (Without offset) 

130 ('('+hex_re+'+)( )(<)(.*?)(>:)$', 

131 bygroups(Number.Hex, Whitespace, Punctuation, Name.Function, 

132 Punctuation)), 

133 # Code line with disassembled instructions 

134 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *\t)([a-zA-Z].*?)$', 

135 bygroups(Whitespace, Name.Label, Whitespace, Number.Hex, Whitespace, 

136 using(asm_lexer))), 

137 # Code line without raw instructions (objdump --no-show-raw-insn) 

138 ('( *)('+hex_re+r'+:)( *\t)([a-zA-Z].*?)$', 

139 bygroups(Whitespace, Name.Label, Whitespace, 

140 using(asm_lexer))), 

141 # Code line with ascii 

142 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *)(.*?)$', 

143 bygroups(Whitespace, Name.Label, Whitespace, Number.Hex, Whitespace, String)), 

144 # Continued code line, only raw opcodes without disassembled 

145 # instruction 

146 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)$', 

147 bygroups(Whitespace, Name.Label, Whitespace, Number.Hex)), 

148 # Skipped a few bytes 

149 (r'\t\.\.\.$', Text), 

150 # Relocation line 

151 # (With offset) 

152 (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x'+hex_re+'+)$', 

153 bygroups(Whitespace, Name.Label, Whitespace, Name.Property, Whitespace, 

154 Name.Constant, Punctuation, Number.Hex)), 

155 # (Without offset) 

156 (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)$', 

157 bygroups(Whitespace, Name.Label, Whitespace, Name.Property, Whitespace, 

158 Name.Constant)), 

159 (r'[^\n]+\n', Other) 

160 ] 

161 } 

162 

163 

164class ObjdumpLexer(RegexLexer): 

165 """ 

166 For the output of ``objdump -dr``. 

167 """ 

168 name = 'objdump' 

169 aliases = ['objdump'] 

170 filenames = ['*.objdump'] 

171 mimetypes = ['text/x-objdump'] 

172 url = 'https://www.gnu.org/software/binutils' 

173 version_added = '' 

174 

175 tokens = _objdump_lexer_tokens(GasLexer) 

176 

177 

178class DObjdumpLexer(DelegatingLexer): 

179 """ 

180 For the output of ``objdump -Sr`` on compiled D files. 

181 """ 

182 name = 'd-objdump' 

183 aliases = ['d-objdump'] 

184 filenames = ['*.d-objdump'] 

185 mimetypes = ['text/x-d-objdump'] 

186 url = 'https://www.gnu.org/software/binutils' 

187 version_added = '' 

188 

189 def __init__(self, **options): 

190 super().__init__(DLexer, ObjdumpLexer, **options) 

191 

192 

193class CppObjdumpLexer(DelegatingLexer): 

194 """ 

195 For the output of ``objdump -Sr`` on compiled C++ files. 

196 """ 

197 name = 'cpp-objdump' 

198 aliases = ['cpp-objdump', 'c++-objdumb', 'cxx-objdump'] 

199 filenames = ['*.cpp-objdump', '*.c++-objdump', '*.cxx-objdump'] 

200 mimetypes = ['text/x-cpp-objdump'] 

201 url = 'https://www.gnu.org/software/binutils' 

202 version_added = '' 

203 

204 def __init__(self, **options): 

205 super().__init__(CppLexer, ObjdumpLexer, **options) 

206 

207 

208class CObjdumpLexer(DelegatingLexer): 

209 """ 

210 For the output of ``objdump -Sr`` on compiled C files. 

211 """ 

212 name = 'c-objdump' 

213 aliases = ['c-objdump'] 

214 filenames = ['*.c-objdump'] 

215 mimetypes = ['text/x-c-objdump'] 

216 url = 'https://www.gnu.org/software/binutils' 

217 version_added = '' 

218 

219 

220 def __init__(self, **options): 

221 super().__init__(CLexer, ObjdumpLexer, **options) 

222 

223 

224class HsailLexer(RegexLexer): 

225 """ 

226 For HSAIL assembly code. 

227 """ 

228 name = 'HSAIL' 

229 aliases = ['hsail', 'hsa'] 

230 filenames = ['*.hsail'] 

231 mimetypes = ['text/x-hsail'] 

232 url = 'https://en.wikipedia.org/wiki/Heterogeneous_System_Architecture#HSA_Intermediate_Layer' 

233 version_added = '2.2' 

234 

235 string = r'"[^"]*?"' 

236 identifier = r'[a-zA-Z_][\w.]*' 

237 # Registers 

238 register_number = r'[0-9]+' 

239 register = r'(\$(c|s|d|q)' + register_number + r')\b' 

240 # Qualifiers 

241 alignQual = r'(align\(\d+\))' 

242 widthQual = r'(width\((\d+|all)\))' 

243 allocQual = r'(alloc\(agent\))' 

244 # Instruction Modifiers 

245 roundingMod = (r'((_ftz)?(_up|_down|_zero|_near))') 

246 datatypeMod = (r'_(' 

247 # packedTypes 

248 r'u8x4|s8x4|u16x2|s16x2|u8x8|s8x8|u16x4|s16x4|u32x2|s32x2|' 

249 r'u8x16|s8x16|u16x8|s16x8|u32x4|s32x4|u64x2|s64x2|' 

250 r'f16x2|f16x4|f16x8|f32x2|f32x4|f64x2|' 

251 # baseTypes 

252 r'u8|s8|u16|s16|u32|s32|u64|s64|' 

253 r'b128|b8|b16|b32|b64|b1|' 

254 r'f16|f32|f64|' 

255 # opaqueType 

256 r'roimg|woimg|rwimg|samp|sig32|sig64)') 

257 

258 # Numeric Constant 

259 float = r'((\d+\.)|(\d*\.\d+))[eE][+-]?\d+' 

260 hexfloat = r'0[xX](([0-9a-fA-F]+\.[0-9a-fA-F]*)|([0-9a-fA-F]*\.[0-9a-fA-F]+))[pP][+-]?\d+' 

261 ieeefloat = r'0((h|H)[0-9a-fA-F]{4}|(f|F)[0-9a-fA-F]{8}|(d|D)[0-9a-fA-F]{16})' 

262 

263 tokens = { 

264 'root': [ 

265 include('whitespace'), 

266 include('comments'), 

267 

268 (string, String), 

269 

270 (r'@' + identifier + ':?', Name.Label), 

271 

272 (register, Name.Variable.Anonymous), 

273 

274 include('keyword'), 

275 

276 (r'&' + identifier, Name.Variable.Global), 

277 (r'%' + identifier, Name.Variable), 

278 

279 (hexfloat, Number.Hex), 

280 (r'0[xX][a-fA-F0-9]+', Number.Hex), 

281 (ieeefloat, Number.Float), 

282 (float, Number.Float), 

283 (r'\d+', Number.Integer), 

284 

285 (r'[=<>{}\[\]()*.,:;!]|x\b', Punctuation) 

286 ], 

287 'whitespace': [ 

288 (r'(\n|\s)+', Whitespace), 

289 ], 

290 'comments': [ 

291 (r'/\*.*?\*/', Comment.Multiline), 

292 (r'//.*?\n', Comment.Single), 

293 ], 

294 'keyword': [ 

295 # Types 

296 (r'kernarg' + datatypeMod, Keyword.Type), 

297 

298 # Regular keywords 

299 (r'\$(full|base|small|large|default|zero|near)', Keyword), 

300 (words(( 

301 'module', 'extension', 'pragma', 'prog', 'indirect', 'signature', 

302 'decl', 'kernel', 'function', 'enablebreakexceptions', 

303 'enabledetectexceptions', 'maxdynamicgroupsize', 'maxflatgridsize', 

304 'maxflatworkgroupsize', 'requireddim', 'requiredgridsize', 

305 'requiredworkgroupsize', 'requirenopartialworkgroups'), 

306 suffix=r'\b'), Keyword), 

307 

308 # instructions 

309 (roundingMod, Keyword), 

310 (datatypeMod, Keyword), 

311 (r'_(' + alignQual + '|' + widthQual + ')', Keyword), 

312 (r'_kernarg', Keyword), 

313 (r'(nop|imagefence)\b', Keyword), 

314 (words(( 

315 'cleardetectexcept', 'clock', 'cuid', 'debugtrap', 'dim', 

316 'getdetectexcept', 'groupbaseptr', 'kernargbaseptr', 'laneid', 

317 'maxcuid', 'maxwaveid', 'packetid', 'setdetectexcept', 'waveid', 

318 'workitemflatabsid', 'workitemflatid', 'nullptr', 'abs', 'bitrev', 

319 'currentworkgroupsize', 'currentworkitemflatid', 'fract', 'ncos', 

320 'neg', 'nexp2', 'nlog2', 'nrcp', 'nrsqrt', 'nsin', 'nsqrt', 

321 'gridgroups', 'gridsize', 'not', 'sqrt', 'workgroupid', 

322 'workgroupsize', 'workitemabsid', 'workitemid', 'ceil', 'floor', 

323 'rint', 'trunc', 'add', 'bitmask', 'borrow', 'carry', 'copysign', 

324 'div', 'rem', 'sub', 'shl', 'shr', 'and', 'or', 'xor', 'unpackhi', 

325 'unpacklo', 'max', 'min', 'fma', 'mad', 'bitextract', 'bitselect', 

326 'shuffle', 'cmov', 'bitalign', 'bytealign', 'lerp', 'nfma', 'mul', 

327 'mulhi', 'mul24hi', 'mul24', 'mad24', 'mad24hi', 'bitinsert', 

328 'combine', 'expand', 'lda', 'mov', 'pack', 'unpack', 'packcvt', 

329 'unpackcvt', 'sad', 'sementp', 'ftos', 'stof', 'cmp', 'ld', 'st', 

330 '_eq', '_ne', '_lt', '_le', '_gt', '_ge', '_equ', '_neu', '_ltu', 

331 '_leu', '_gtu', '_geu', '_num', '_nan', '_seq', '_sne', '_slt', 

332 '_sle', '_sgt', '_sge', '_snum', '_snan', '_sequ', '_sneu', '_sltu', 

333 '_sleu', '_sgtu', '_sgeu', 'atomic', '_ld', '_st', '_cas', '_add', 

334 '_and', '_exch', '_max', '_min', '_or', '_sub', '_wrapdec', 

335 '_wrapinc', '_xor', 'ret', 'cvt', '_readonly', '_kernarg', '_global', 

336 'br', 'cbr', 'sbr', '_scacq', '_screl', '_scar', '_rlx', '_wave', 

337 '_wg', '_agent', '_system', 'ldimage', 'stimage', '_v2', '_v3', '_v4', 

338 '_1d', '_2d', '_3d', '_1da', '_2da', '_1db', '_2ddepth', '_2dadepth', 

339 '_width', '_height', '_depth', '_array', '_channelorder', 

340 '_channeltype', 'querysampler', '_coord', '_filter', '_addressing', 

341 'barrier', 'wavebarrier', 'initfbar', 'joinfbar', 'waitfbar', 

342 'arrivefbar', 'leavefbar', 'releasefbar', 'ldf', 'activelaneid', 

343 'activelanecount', 'activelanemask', 'activelanepermute', 'call', 

344 'scall', 'icall', 'alloca', 'packetcompletionsig', 

345 'addqueuewriteindex', 'casqueuewriteindex', 'ldqueuereadindex', 

346 'stqueuereadindex', 'readonly', 'global', 'private', 'group', 

347 'spill', 'arg', '_upi', '_downi', '_zeroi', '_neari', '_upi_sat', 

348 '_downi_sat', '_zeroi_sat', '_neari_sat', '_supi', '_sdowni', 

349 '_szeroi', '_sneari', '_supi_sat', '_sdowni_sat', '_szeroi_sat', 

350 '_sneari_sat', '_pp', '_ps', '_sp', '_ss', '_s', '_p', '_pp_sat', 

351 '_ps_sat', '_sp_sat', '_ss_sat', '_s_sat', '_p_sat')), Keyword), 

352 

353 # Integer types 

354 (r'i[1-9]\d*', Keyword) 

355 ] 

356 } 

357 

358 

359class LlvmLexer(RegexLexer): 

360 """ 

361 For LLVM assembly code. 

362 """ 

363 name = 'LLVM' 

364 url = 'https://llvm.org/docs/LangRef.html' 

365 aliases = ['llvm'] 

366 filenames = ['*.ll'] 

367 mimetypes = ['text/x-llvm'] 

368 version_added = '' 

369 

370 #: optional Comment or Whitespace 

371 string = r'"[^"]*?"' 

372 identifier = r'([-a-zA-Z$._][\w\-$.]*|' + string + ')' 

373 block_label = r'(' + identifier + r'|(\d+))' 

374 

375 tokens = { 

376 'root': [ 

377 include('whitespace'), 

378 

379 # Before keywords, because keywords are valid label names :(... 

380 (block_label + r'\s*:', Name.Label), 

381 

382 include('keyword'), 

383 

384 (r'%' + identifier, Name.Variable), 

385 (r'@' + identifier, Name.Variable.Global), 

386 (r'%\d+', Name.Variable.Anonymous), 

387 (r'@\d+', Name.Variable.Global), 

388 (r'#\d+', Name.Variable.Global), 

389 (r'!' + identifier, Name.Variable), 

390 (r'!\d+', Name.Variable.Anonymous), 

391 (r'c?' + string, String), 

392 

393 (r'0[xX][KLMHR]?[a-fA-F0-9]+', Number), 

394 (r'-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?', Number), 

395 

396 (r'[=<>{}\[\]()*.,!]|x\b', Punctuation) 

397 ], 

398 'whitespace': [ 

399 (r'(\n|\s+)+', Whitespace), 

400 (r';.*?\n', Comment) 

401 ], 

402 'keyword': [ 

403 # Regular keywords 

404 (words(( 

405 'aarch64_sve_vector_pcs', 'aarch64_vector_pcs', 'acq_rel', 

406 'acquire', 'add', 'addrspace', 'addrspacecast', 'afn', 'alias', 

407 'aliasee', 'align', 'alignLog2', 'alignstack', 'alloca', 

408 'allocsize', 'allOnes', 'alwaysinline', 'alwaysInline', 

409 'amdgpu_cs', 'amdgpu_es', 'amdgpu_gfx', 'amdgpu_gs', 

410 'amdgpu_hs', 'amdgpu_kernel', 'amdgpu_ls', 'amdgpu_ps', 

411 'amdgpu_vs', 'and', 'any', 'anyregcc', 'appending', 'arcp', 

412 'argmemonly', 'args', 'arm_aapcs_vfpcc', 'arm_aapcscc', 

413 'arm_apcscc', 'ashr', 'asm', 'atomic', 'atomicrmw', 

414 'attributes', 'available_externally', 'avr_intrcc', 

415 'avr_signalcc', 'bit', 'bitcast', 'bitMask', 'blockaddress', 

416 'blockcount', 'br', 'branchFunnel', 'builtin', 'byArg', 

417 'byref', 'byte', 'byteArray', 'byval', 'c', 'call', 'callbr', 

418 'callee', 'caller', 'calls', 'canAutoHide', 'catch', 

419 'catchpad', 'catchret', 'catchswitch', 'cc', 'ccc', 

420 'cfguard_checkcc', 'cleanup', 'cleanuppad', 'cleanupret', 

421 'cmpxchg', 'cold', 'coldcc', 'comdat', 'common', 'constant', 

422 'contract', 'convergent', 'critical', 'cxx_fast_tlscc', 

423 'datalayout', 'declare', 'default', 'define', 'deplibs', 

424 'dereferenceable', 'dereferenceable_or_null', 'distinct', 

425 'dllexport', 'dllimport', 'dso_local', 'dso_local_equivalent', 

426 'dso_preemptable', 'dsoLocal', 'eq', 'exact', 'exactmatch', 

427 'extern_weak', 'external', 'externally_initialized', 

428 'extractelement', 'extractvalue', 'fadd', 'false', 'fast', 

429 'fastcc', 'fcmp', 'fdiv', 'fence', 'filter', 'flags', 'fmul', 

430 'fneg', 'fpext', 'fptosi', 'fptoui', 'fptrunc', 'freeze', 

431 'frem', 'from', 'fsub', 'funcFlags', 'function', 'gc', 

432 'getelementptr', 'ghccc', 'global', 'guid', 'gv', 'hash', 

433 'hhvm_ccc', 'hhvmcc', 'hidden', 'hot', 'hotness', 'icmp', 

434 'ifunc', 'inaccessiblemem_or_argmemonly', 

435 'inaccessiblememonly', 'inalloca', 'inbounds', 'indir', 

436 'indirectbr', 'info', 'initialexec', 'inline', 'inlineBits', 

437 'inlinehint', 'inrange', 'inreg', 'insertelement', 

438 'insertvalue', 'insts', 'intel_ocl_bicc', 'inteldialect', 

439 'internal', 'inttoptr', 'invoke', 'jumptable', 'kind', 

440 'landingpad', 'largest', 'linkage', 'linkonce', 'linkonce_odr', 

441 'live', 'load', 'local_unnamed_addr', 'localdynamic', 

442 'localexec', 'lshr', 'max', 'metadata', 'min', 'minsize', 

443 'module', 'monotonic', 'msp430_intrcc', 'mul', 'mustprogress', 

444 'musttail', 'naked', 'name', 'nand', 'ne', 'nest', 'ninf', 

445 'nnan', 'noalias', 'nobuiltin', 'nocallback', 'nocapture', 

446 'nocf_check', 'noduplicate', 'noduplicates', 'nofree', 

447 'noimplicitfloat', 'noinline', 'noInline', 'nomerge', 'none', 

448 'nonlazybind', 'nonnull', 'noprofile', 'norecurse', 

449 'noRecurse', 'noredzone', 'noreturn', 'nosync', 'notail', 

450 'notEligibleToImport', 'noundef', 'nounwind', 'nsw', 

451 'nsz', 'null', 'null_pointer_is_valid', 'nuw', 'oeq', 'offset', 

452 'oge', 'ogt', 'ole', 'olt', 'one', 'opaque', 'optforfuzzing', 

453 'optnone', 'optsize', 'or', 'ord', 'param', 'params', 

454 'partition', 'path', 'personality', 'phi', 'poison', 

455 'preallocated', 'prefix', 'preserve_allcc', 'preserve_mostcc', 

456 'private', 'prologue', 'protected', 'ptrtoint', 'ptx_device', 

457 'ptx_kernel', 'readnone', 'readNone', 'readonly', 'readOnly', 

458 'reassoc', 'refs', 'relbf', 'release', 'resByArg', 'resume', 

459 'ret', 'returnDoesNotAlias', 'returned', 'returns_twice', 

460 'safestack', 'samesize', 'sanitize_address', 

461 'sanitize_hwaddress', 'sanitize_memory', 'sanitize_memtag', 

462 'sanitize_thread', 'sdiv', 'section', 'select', 'seq_cst', 

463 'sext', 'sge', 'sgt', 'shadowcallstack', 'shl', 

464 'shufflevector', 'sideeffect', 'signext', 'single', 

465 'singleImpl', 'singleImplName', 'sitofp', 'sizeM1', 

466 'sizeM1BitWidth', 'sle', 'slt', 'source_filename', 

467 'speculatable', 'speculative_load_hardening', 'spir_func', 

468 'spir_kernel', 'splat', 'srem', 'sret', 'ssp', 'sspreq', 

469 'sspstrong', 'store', 'strictfp', 'sub', 'summaries', 

470 'summary', 'swiftcc', 'swifterror', 'swiftself', 'switch', 

471 'syncscope', 'tail', 'tailcc', 'target', 'thread_local', 'to', 

472 'token', 'triple', 'true', 'trunc', 'type', 

473 'typeCheckedLoadConstVCalls', 'typeCheckedLoadVCalls', 

474 'typeid', 'typeidCompatibleVTable', 'typeIdInfo', 

475 'typeTestAssumeConstVCalls', 'typeTestAssumeVCalls', 

476 'typeTestRes', 'typeTests', 'udiv', 'ueq', 'uge', 'ugt', 

477 'uitofp', 'ule', 'ult', 'umax', 'umin', 'undef', 'une', 

478 'uniformRetVal', 'uniqueRetVal', 'unknown', 'unnamed_addr', 

479 'uno', 'unordered', 'unreachable', 'unsat', 'unwind', 'urem', 

480 'uselistorder', 'uselistorder_bb', 'uwtable', 'va_arg', 

481 'varFlags', 'variable', 'vcall_visibility', 'vFuncId', 

482 'virtFunc', 'virtualConstProp', 'void', 'volatile', 'vscale', 

483 'vTableFuncs', 'weak', 'weak_odr', 'webkit_jscc', 'win64cc', 

484 'within', 'wpdRes', 'wpdResolutions', 'writeonly', 'x', 

485 'x86_64_sysvcc', 'x86_fastcallcc', 'x86_intrcc', 'x86_mmx', 

486 'x86_regcallcc', 'x86_stdcallcc', 'x86_thiscallcc', 

487 'x86_vectorcallcc', 'xchg', 'xor', 'zeroext', 

488 'zeroinitializer', 'zext', 'immarg', 'willreturn'), 

489 suffix=r'\b'), Keyword), 

490 

491 # Types 

492 (words(('void', 'half', 'bfloat', 'float', 'double', 'fp128', 

493 'x86_fp80', 'ppc_fp128', 'label', 'metadata', 'x86_mmx', 

494 'x86_amx', 'token', 'ptr')), 

495 Keyword.Type), 

496 

497 # Integer types 

498 (r'i[1-9]\d*', Keyword.Type) 

499 ] 

500 } 

501 

502 

503class LlvmMirBodyLexer(RegexLexer): 

504 """ 

505 For LLVM MIR examples without the YAML wrapper. 

506 """ 

507 name = 'LLVM-MIR Body' 

508 url = 'https://llvm.org/docs/MIRLangRef.html' 

509 aliases = ['llvm-mir-body'] 

510 filenames = [] 

511 mimetypes = [] 

512 version_added = '2.6' 

513 

514 tokens = { 

515 'root': [ 

516 # Attributes on basic blocks 

517 (words(('liveins', 'successors'), suffix=':'), Keyword), 

518 # Basic Block Labels 

519 (r'bb\.[0-9]+(\.[a-zA-Z0-9_.-]+)?( \(address-taken\))?:', Name.Label), 

520 (r'bb\.[0-9]+ \(%[a-zA-Z0-9_.-]+\)( \(address-taken\))?:', Name.Label), 

521 (r'%bb\.[0-9]+(\.\w+)?', Name.Label), 

522 # Stack references 

523 (r'%stack\.[0-9]+(\.\w+\.addr)?', Name), 

524 # Subreg indices 

525 (r'%subreg\.\w+', Name), 

526 # Virtual registers 

527 (r'%[a-zA-Z0-9_]+ *', Name.Variable, 'vreg'), 

528 # Reference to LLVM-IR global 

529 include('global'), 

530 # Reference to Intrinsic 

531 (r'intrinsic\(\@[a-zA-Z0-9_.]+\)', Name.Variable.Global), 

532 # Comparison predicates 

533 (words(('eq', 'ne', 'sgt', 'sge', 'slt', 'sle', 'ugt', 'uge', 'ult', 

534 'ule'), prefix=r'intpred\(', suffix=r'\)'), Name.Builtin), 

535 (words(('oeq', 'one', 'ogt', 'oge', 'olt', 'ole', 'ugt', 'uge', 

536 'ult', 'ule'), prefix=r'floatpred\(', suffix=r'\)'), 

537 Name.Builtin), 

538 # Physical registers 

539 (r'\$\w+', String.Single), 

540 # Assignment operator 

541 (r'=', Operator), 

542 # gMIR Opcodes 

543 (r'(G_ANYEXT|G_[SZ]EXT|G_SEXT_INREG|G_TRUNC|G_IMPLICIT_DEF|G_PHI|' 

544 r'G_FRAME_INDEX|G_GLOBAL_VALUE|G_INTTOPTR|G_PTRTOINT|G_BITCAST|' 

545 r'G_CONSTANT|G_FCONSTANT|G_VASTART|G_VAARG|G_CTLZ|G_CTLZ_ZERO_UNDEF|' 

546 r'G_CTTZ|G_CTTZ_ZERO_UNDEF|G_CTPOP|G_BSWAP|G_BITREVERSE|' 

547 r'G_ADDRSPACE_CAST|G_BLOCK_ADDR|G_JUMP_TABLE|G_DYN_STACKALLOC|' 

548 r'G_ADD|G_SUB|G_MUL|G_[SU]DIV|G_[SU]REM|G_AND|G_OR|G_XOR|G_SHL|' 

549 r'G_[LA]SHR|G_[IF]CMP|G_SELECT|G_GEP|G_PTR_MASK|G_SMIN|G_SMAX|' 

550 r'G_UMIN|G_UMAX|G_[US]ADDO|G_[US]ADDE|G_[US]SUBO|G_[US]SUBE|' 

551 r'G_[US]MULO|G_[US]MULH|G_FNEG|G_FPEXT|G_FPTRUNC|G_FPTO[US]I|' 

552 r'G_[US]ITOFP|G_FABS|G_FCOPYSIGN|G_FCANONICALIZE|G_FMINNUM|' 

553 r'G_FMAXNUM|G_FMINNUM_IEEE|G_FMAXNUM_IEEE|G_FMINIMUM|G_FMAXIMUM|' 

554 r'G_FADD|G_FSUB|G_FMUL|G_FMA|G_FMAD|G_FDIV|G_FREM|G_FPOW|G_FEXP|' 

555 r'G_FEXP2|G_FLOG|G_FLOG2|G_FLOG10|G_FCEIL|G_FCOS|G_FSIN|G_FSQRT|' 

556 r'G_FFLOOR|G_FRINT|G_FNEARBYINT|G_INTRINSIC_TRUNC|' 

557 r'G_INTRINSIC_ROUND|G_LOAD|G_[ZS]EXTLOAD|G_INDEXED_LOAD|' 

558 r'G_INDEXED_[ZS]EXTLOAD|G_STORE|G_INDEXED_STORE|' 

559 r'G_ATOMIC_CMPXCHG_WITH_SUCCESS|G_ATOMIC_CMPXCHG|' 

560 r'G_ATOMICRMW_(XCHG|ADD|SUB|AND|NAND|OR|XOR|MAX|MIN|UMAX|UMIN|FADD|' 

561 r'FSUB)' 

562 r'|G_FENCE|G_EXTRACT|G_UNMERGE_VALUES|G_INSERT|G_MERGE_VALUES|' 

563 r'G_BUILD_VECTOR|G_BUILD_VECTOR_TRUNC|G_CONCAT_VECTORS|' 

564 r'G_INTRINSIC|G_INTRINSIC_W_SIDE_EFFECTS|G_BR|G_BRCOND|' 

565 r'G_BRINDIRECT|G_BRJT|G_INSERT_VECTOR_ELT|G_EXTRACT_VECTOR_ELT|' 

566 r'G_SHUFFLE_VECTOR)\b', 

567 Name.Builtin), 

568 # Target independent opcodes 

569 (r'(COPY|PHI|INSERT_SUBREG|EXTRACT_SUBREG|REG_SEQUENCE)\b', 

570 Name.Builtin), 

571 # Flags 

572 (words(('killed', 'implicit')), Keyword), 

573 # ConstantInt values 

574 (r'(i[0-9]+)( +)', bygroups(Keyword.Type, Whitespace), 'constantint'), 

575 # ConstantFloat values 

576 (r'(half|float|double) +', Keyword.Type, 'constantfloat'), 

577 # Bare immediates 

578 include('integer'), 

579 # MMO's 

580 (r'(::)( *)', bygroups(Operator, Whitespace), 'mmo'), 

581 # MIR Comments 

582 (r';.*', Comment), 

583 # If we get here, assume it's a target instruction 

584 (r'[a-zA-Z0-9_]+', Name), 

585 # Everything else that isn't highlighted 

586 (r'[(), \n]+', Text), 

587 ], 

588 # The integer constant from a ConstantInt value 

589 'constantint': [ 

590 include('integer'), 

591 (r'(?=.)', Text, '#pop'), 

592 ], 

593 # The floating point constant from a ConstantFloat value 

594 'constantfloat': [ 

595 include('float'), 

596 (r'(?=.)', Text, '#pop'), 

597 ], 

598 'vreg': [ 

599 # The bank or class if there is one 

600 (r'( *)(:(?!:))', bygroups(Whitespace, Keyword), ('#pop', 'vreg_bank_or_class')), 

601 # The LLT if there is one 

602 (r'( *)(\()', bygroups(Whitespace, Text), 'vreg_type'), 

603 (r'(?=.)', Text, '#pop'), 

604 ], 

605 'vreg_bank_or_class': [ 

606 # The unassigned bank/class 

607 (r'( *)(_)', bygroups(Whitespace, Name.Variable.Magic)), 

608 (r'( *)([a-zA-Z0-9_]+)', bygroups(Whitespace, Name.Variable)), 

609 # The LLT if there is one 

610 (r'( *)(\()', bygroups(Whitespace, Text), 'vreg_type'), 

611 (r'(?=.)', Text, '#pop'), 

612 ], 

613 'vreg_type': [ 

614 # Scalar and pointer types 

615 (r'( *)([sp][0-9]+)', bygroups(Whitespace, Keyword.Type)), 

616 (r'( *)(<[0-9]+ *x *[sp][0-9]+>)', bygroups(Whitespace, Keyword.Type)), 

617 (r'\)', Text, '#pop'), 

618 (r'(?=.)', Text, '#pop'), 

619 ], 

620 'mmo': [ 

621 (r'\(', Text), 

622 (r' +', Whitespace), 

623 (words(('load', 'store', 'on', 'into', 'from', 'align', 'monotonic', 

624 'acquire', 'release', 'acq_rel', 'seq_cst')), 

625 Keyword), 

626 # IR references 

627 (r'%ir\.[a-zA-Z0-9_.-]+', Name), 

628 (r'%ir-block\.[a-zA-Z0-9_.-]+', Name), 

629 (r'[-+]', Operator), 

630 include('integer'), 

631 include('global'), 

632 (r',', Punctuation), 

633 (r'\), \(', Text), 

634 (r'\)', Text, '#pop'), 

635 ], 

636 'integer': [(r'-?[0-9]+', Number.Integer),], 

637 'float': [(r'-?[0-9]+\.[0-9]+(e[+-][0-9]+)?', Number.Float)], 

638 'global': [(r'\@[a-zA-Z0-9_.]+', Name.Variable.Global)], 

639 } 

640 

641 

642class LlvmMirLexer(RegexLexer): 

643 """ 

644 Lexer for the overall LLVM MIR document format. 

645 

646 MIR is a human readable serialization format that's used to represent LLVM's 

647 machine specific intermediate representation. It allows LLVM's developers to 

648 see the state of the compilation process at various points, as well as test 

649 individual pieces of the compiler. 

650 """ 

651 name = 'LLVM-MIR' 

652 url = 'https://llvm.org/docs/MIRLangRef.html' 

653 aliases = ['llvm-mir'] 

654 filenames = ['*.mir'] 

655 version_added = '2.6' 

656 

657 tokens = { 

658 'root': [ 

659 # Comments are hashes at the YAML level 

660 (r'#.*', Comment), 

661 # Documents starting with | are LLVM-IR 

662 (r'--- \|$', Keyword, 'llvm_ir'), 

663 # Other documents are MIR 

664 (r'---', Keyword, 'llvm_mir'), 

665 # Consume everything else in one token for efficiency 

666 (r'[^-#]+|.', Text), 

667 ], 

668 'llvm_ir': [ 

669 # Documents end with '...' or '---' 

670 (r'(\.\.\.|(?=---))', Keyword, '#pop'), 

671 # Delegate to the LlvmLexer 

672 (r'((?:.|\n)+?)(?=(\.\.\.|---))', bygroups(using(LlvmLexer))), 

673 ], 

674 'llvm_mir': [ 

675 # Comments are hashes at the YAML level 

676 (r'#.*', Comment), 

677 # Documents end with '...' or '---' 

678 (r'(\.\.\.|(?=---))', Keyword, '#pop'), 

679 # Handle the simple attributes 

680 (r'name:', Keyword, 'name'), 

681 (words(('alignment', ), 

682 suffix=':'), Keyword, 'number'), 

683 (words(('legalized', 'regBankSelected', 'tracksRegLiveness', 

684 'selected', 'exposesReturnsTwice'), 

685 suffix=':'), Keyword, 'boolean'), 

686 # Handle the attributes don't highlight inside 

687 (words(('registers', 'stack', 'fixedStack', 'liveins', 'frameInfo', 

688 'machineFunctionInfo'), 

689 suffix=':'), Keyword), 

690 # Delegate the body block to the LlvmMirBodyLexer 

691 (r'body: *\|', Keyword, 'llvm_mir_body'), 

692 # Consume everything else 

693 (r'.+', Text), 

694 (r'\n', Whitespace), 

695 ], 

696 'name': [ 

697 (r'[^\n]+', Name), 

698 default('#pop'), 

699 ], 

700 'boolean': [ 

701 (r' *(true|false)', Name.Builtin), 

702 default('#pop'), 

703 ], 

704 'number': [ 

705 (r' *[0-9]+', Number), 

706 default('#pop'), 

707 ], 

708 'llvm_mir_body': [ 

709 # Documents end with '...' or '---'. 

710 # We have to pop llvm_mir_body and llvm_mir 

711 (r'(\.\.\.|(?=---))', Keyword, '#pop:2'), 

712 # Delegate the body block to the LlvmMirBodyLexer 

713 (r'((?:.|\n)+?)(?=\.\.\.|---)', bygroups(using(LlvmMirBodyLexer))), 

714 # The '...' is optional. If we didn't already find it then it isn't 

715 # there. There might be a '---' instead though. 

716 (r'(?!\.\.\.|---)((?:.|\n)+)', bygroups(using(LlvmMirBodyLexer))), 

717 ], 

718 } 

719 

720 

721class NasmLexer(RegexLexer): 

722 """ 

723 For Nasm (Intel) assembly code. 

724 """ 

725 name = 'NASM' 

726 aliases = ['nasm'] 

727 filenames = ['*.asm', '*.ASM', '*.nasm'] 

728 mimetypes = ['text/x-nasm'] 

729 url = 'https://nasm.us' 

730 version_added = '' 

731 

732 # Tasm uses the same file endings, but TASM is not as common as NASM, so 

733 # we prioritize NASM higher by default 

734 priority = 1.0 

735 

736 identifier = r'[a-z$._?][\w$.?#@~]*' 

737 hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)' 

738 octn = r'[0-7]+q' 

739 binn = r'[01]+b' 

740 decn = r'[0-9]+' 

741 floatn = decn + r'\.e?' + decn 

742 string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`" 

743 declkw = r'(?:res|d)[bwdqt]|times' 

744 register = (r'(r[0-9][0-5]?[bwd]?|' 

745 r'[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|' 

746 r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]|k[0-7]|' 

747 r'[xyz]mm(?:[12][0-9]?|3[01]?|[04-9]))\b') 

748 wordop = r'seg|wrt|strict|rel|abs' 

749 type = r'byte|[dq]?word' 

750 # Directives must be followed by whitespace, otherwise CPU will match 

751 # cpuid for instance. 

752 directives = (r'(?:BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|' 

753 r'ORG|ALIGN|STRUC|ENDSTRUC|COMMON|CPU|GROUP|UPPERCASE|IMPORT|' 

754 r'EXPORT|LIBRARY|MODULE)(?=\s)') 

755 

756 flags = re.IGNORECASE | re.MULTILINE 

757 tokens = { 

758 'root': [ 

759 (r'^\s*%', Comment.Preproc, 'preproc'), 

760 include('whitespace'), 

761 (identifier + ':', Name.Label), 

762 (rf'({identifier})(\s+)(equ)', 

763 bygroups(Name.Constant, Whitespace, Keyword.Declaration), 

764 'instruction-args'), 

765 (directives, Keyword, 'instruction-args'), 

766 (declkw, Keyword.Declaration, 'instruction-args'), 

767 (identifier, Name.Function, 'instruction-args'), 

768 (r'[\r\n]+', Whitespace) 

769 ], 

770 'instruction-args': [ 

771 (string, String), 

772 (hexn, Number.Hex), 

773 (octn, Number.Oct), 

774 (binn, Number.Bin), 

775 (floatn, Number.Float), 

776 (decn, Number.Integer), 

777 include('punctuation'), 

778 (register, Name.Builtin), 

779 (identifier, Name.Variable), 

780 (r'[\r\n]+', Whitespace, '#pop'), 

781 include('whitespace') 

782 ], 

783 'preproc': [ 

784 (r'[^;\n]+', Comment.Preproc), 

785 (r';.*?\n', Comment.Single, '#pop'), 

786 (r'\n', Comment.Preproc, '#pop'), 

787 ], 

788 'whitespace': [ 

789 (r'\n', Whitespace), 

790 (r'[ \t]+', Whitespace), 

791 (r';.*', Comment.Single), 

792 (r'#.*', Comment.Single) 

793 ], 

794 'punctuation': [ 

795 (r'[,{}():\[\]]+', Punctuation), 

796 (r'[&|^<>+*/%~-]+', Operator), 

797 (r'[$]+', Keyword.Constant), 

798 (wordop, Operator.Word), 

799 (type, Keyword.Type) 

800 ], 

801 } 

802 

803 def analyse_text(text): 

804 # Probably TASM 

805 if re.match(r'PROC', text, re.IGNORECASE): 

806 return False 

807 

808 

809class NasmObjdumpLexer(ObjdumpLexer): 

810 """ 

811 For the output of ``objdump -d -M intel``. 

812 """ 

813 name = 'objdump-nasm' 

814 aliases = ['objdump-nasm'] 

815 filenames = ['*.objdump-intel'] 

816 mimetypes = ['text/x-nasm-objdump'] 

817 url = 'https://www.gnu.org/software/binutils' 

818 version_added = '2.0' 

819 

820 tokens = _objdump_lexer_tokens(NasmLexer) 

821 

822 

823class TasmLexer(RegexLexer): 

824 """ 

825 For Tasm (Turbo Assembler) assembly code. 

826 """ 

827 name = 'TASM' 

828 aliases = ['tasm'] 

829 filenames = ['*.asm', '*.ASM', '*.tasm'] 

830 mimetypes = ['text/x-tasm'] 

831 url = 'https://en.wikipedia.org/wiki/Turbo_Assembler' 

832 version_added = '' 

833 

834 identifier = r'[@a-z$._?][\w$.?#@~]*' 

835 hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)' 

836 octn = r'[0-7]+q' 

837 binn = r'[01]+b' 

838 decn = r'[0-9]+' 

839 floatn = decn + r'\.e?' + decn 

840 string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`" 

841 declkw = r'(?:res|d)[bwdqt]|times' 

842 register = (r'(r[0-9][0-5]?[bwd]|' 

843 r'[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|' 

844 r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7])\b') 

845 wordop = r'seg|wrt|strict' 

846 type = r'byte|[dq]?word' 

847 directives = (r'BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|' 

848 r'ORG|ALIGN|STRUC|ENDSTRUC|ENDS|COMMON|CPU|GROUP|UPPERCASE|INCLUDE|' 

849 r'EXPORT|LIBRARY|MODULE|PROC|ENDP|USES|ARG|DATASEG|UDATASEG|END|IDEAL|' 

850 r'P386|MODEL|ASSUME|CODESEG|SIZE') 

851 # T[A-Z][a-z] is more of a convention. Lexer should filter out STRUC definitions 

852 # and then 'add' them to datatype somehow. 

853 datatype = (r'db|dd|dw|T[A-Z][a-z]+') 

854 

855 flags = re.IGNORECASE | re.MULTILINE 

856 tokens = { 

857 'root': [ 

858 (r'^\s*%', Comment.Preproc, 'preproc'), 

859 include('whitespace'), 

860 (identifier + ':', Name.Label), 

861 (directives, Keyword, 'instruction-args'), 

862 (rf'({identifier})(\s+)({datatype})', 

863 bygroups(Name.Constant, Whitespace, Keyword.Declaration), 

864 'instruction-args'), 

865 (declkw, Keyword.Declaration, 'instruction-args'), 

866 (identifier, Name.Function, 'instruction-args'), 

867 (r'[\r\n]+', Whitespace) 

868 ], 

869 'instruction-args': [ 

870 (string, String), 

871 (hexn, Number.Hex), 

872 (octn, Number.Oct), 

873 (binn, Number.Bin), 

874 (floatn, Number.Float), 

875 (decn, Number.Integer), 

876 include('punctuation'), 

877 (register, Name.Builtin), 

878 (identifier, Name.Variable), 

879 # Do not match newline when it's preceded by a backslash 

880 (r'(\\)(\s*)(;.*)([\r\n])', 

881 bygroups(Text, Whitespace, Comment.Single, Whitespace)), 

882 (r'[\r\n]+', Whitespace, '#pop'), 

883 include('whitespace') 

884 ], 

885 'preproc': [ 

886 (r'[^;\n]+', Comment.Preproc), 

887 (r';.*?\n', Comment.Single, '#pop'), 

888 (r'\n', Comment.Preproc, '#pop'), 

889 ], 

890 'whitespace': [ 

891 (r'[\n\r]', Whitespace), 

892 (r'(\\)([\n\r])', bygroups(Text, Whitespace)), 

893 (r'[ \t]+', Whitespace), 

894 (r';.*', Comment.Single) 

895 ], 

896 'punctuation': [ 

897 (r'[,():\[\]]+', Punctuation), 

898 (r'[&|^<>+*=/%~-]+', Operator), 

899 (r'[$]+', Keyword.Constant), 

900 (wordop, Operator.Word), 

901 (type, Keyword.Type) 

902 ], 

903 } 

904 

905 def analyse_text(text): 

906 # See above 

907 if re.match(r'PROC', text, re.I): 

908 return True 

909 

910 

911class Ca65Lexer(RegexLexer): 

912 """ 

913 For ca65 assembler sources. 

914 """ 

915 name = 'ca65 assembler' 

916 aliases = ['ca65'] 

917 filenames = ['*.s'] 

918 url = 'https://cc65.github.io' 

919 version_added = '1.6' 

920 

921 flags = re.IGNORECASE 

922 

923 tokens = { 

924 'root': [ 

925 (r';.*', Comment.Single), 

926 (r'\s+', Whitespace), 

927 (r'[a-z_.@$][\w.@$]*:', Name.Label), 

928 (r'((ld|st)[axy]|(in|de)[cxy]|asl|lsr|ro[lr]|adc|sbc|cmp|cp[xy]' 

929 r'|cl[cvdi]|se[cdi]|jmp|jsr|bne|beq|bpl|bmi|bvc|bvs|bcc|bcs' 

930 r'|p[lh][ap]|rt[is]|brk|nop|ta[xy]|t[xy]a|txs|tsx|and|ora|eor' 

931 r'|bit)\b', Keyword), 

932 (r'\.\w+', Keyword.Pseudo), 

933 (r'[-+~*/^&|!<>=]', Operator), 

934 (r'"[^"\n]*.', String), 

935 (r"'[^'\n]*.", String.Char), 

936 (r'\$[0-9a-f]+|[0-9a-f]+h\b', Number.Hex), 

937 (r'\d+', Number.Integer), 

938 (r'%[01]+', Number.Bin), 

939 (r'[#,.:()=\[\]]', Punctuation), 

940 (r'[a-z_.@$][\w.@$]*', Name), 

941 ] 

942 } 

943 

944 def analyse_text(self, text): 

945 # comments in GAS start with "#" 

946 if re.search(r'^\s*;', text, re.MULTILINE): 

947 return 0.9 

948 

949 

950class Dasm16Lexer(RegexLexer): 

951 """ 

952 For DCPU-16 Assembly. 

953 """ 

954 name = 'DASM16' 

955 url = 'http://0x10c.com/doc/dcpu-16.txt' 

956 aliases = ['dasm16'] 

957 filenames = ['*.dasm16', '*.dasm'] 

958 mimetypes = ['text/x-dasm16'] 

959 version_added = '2.4' 

960 

961 INSTRUCTIONS = [ 

962 'SET', 

963 'ADD', 'SUB', 

964 'MUL', 'MLI', 

965 'DIV', 'DVI', 

966 'MOD', 'MDI', 

967 'AND', 'BOR', 'XOR', 

968 'SHR', 'ASR', 'SHL', 

969 'IFB', 'IFC', 'IFE', 'IFN', 'IFG', 'IFA', 'IFL', 'IFU', 

970 'ADX', 'SBX', 

971 'STI', 'STD', 

972 'JSR', 

973 'INT', 'IAG', 'IAS', 'RFI', 'IAQ', 'HWN', 'HWQ', 'HWI', 

974 ] 

975 

976 REGISTERS = [ 

977 'A', 'B', 'C', 

978 'X', 'Y', 'Z', 

979 'I', 'J', 

980 'SP', 'PC', 'EX', 

981 'POP', 'PEEK', 'PUSH' 

982 ] 

983 

984 # Regexes yo 

985 char = r'[a-zA-Z0-9_$@.]' 

986 identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)' 

987 number = r'[+-]?(?:0[xX][a-zA-Z0-9]+|\d+)' 

988 binary_number = r'0b[01_]+' 

989 instruction = r'(?i)(' + '|'.join(INSTRUCTIONS) + ')' 

990 single_char = r"'\\?" + char + "'" 

991 string = r'"(\\"|[^"])*"' 

992 

993 def guess_identifier(lexer, match): 

994 ident = match.group(0) 

995 klass = Name.Variable if ident.upper() in lexer.REGISTERS else Name.Label 

996 yield match.start(), klass, ident 

997 

998 tokens = { 

999 'root': [ 

1000 include('whitespace'), 

1001 (':' + identifier, Name.Label), 

1002 (identifier + ':', Name.Label), 

1003 (instruction, Name.Function, 'instruction-args'), 

1004 (r'\.' + identifier, Name.Function, 'data-args'), 

1005 (r'[\r\n]+', Whitespace) 

1006 ], 

1007 

1008 'numeric' : [ 

1009 (binary_number, Number.Integer), 

1010 (number, Number.Integer), 

1011 (single_char, String), 

1012 ], 

1013 

1014 'arg' : [ 

1015 (identifier, guess_identifier), 

1016 include('numeric') 

1017 ], 

1018 

1019 'deref' : [ 

1020 (r'\+', Punctuation), 

1021 (r'\]', Punctuation, '#pop'), 

1022 include('arg'), 

1023 include('whitespace') 

1024 ], 

1025 

1026 'instruction-line' : [ 

1027 (r'[\r\n]+', Whitespace, '#pop'), 

1028 (r';.*?$', Comment, '#pop'), 

1029 include('whitespace') 

1030 ], 

1031 

1032 'instruction-args': [ 

1033 (r',', Punctuation), 

1034 (r'\[', Punctuation, 'deref'), 

1035 include('arg'), 

1036 include('instruction-line') 

1037 ], 

1038 

1039 'data-args' : [ 

1040 (r',', Punctuation), 

1041 include('numeric'), 

1042 (string, String), 

1043 include('instruction-line') 

1044 ], 

1045 

1046 'whitespace': [ 

1047 (r'\n', Whitespace), 

1048 (r'\s+', Whitespace), 

1049 (r';.*?\n', Comment) 

1050 ], 

1051 }