Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/asm.py: 91%

174 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1""" 

2 pygments.lexers.asm 

3 ~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for assembly languages. 

6 

7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12 

13from pygments.lexer import RegexLexer, include, bygroups, using, words, \ 

14 DelegatingLexer, default 

15from pygments.lexers.c_cpp import CppLexer, CLexer 

16from pygments.lexers.d import DLexer 

17from pygments.token import Text, Name, Number, String, Comment, Punctuation, \ 

18 Other, Keyword, Operator, Whitespace 

19 

20__all__ = ['GasLexer', 'ObjdumpLexer', 'DObjdumpLexer', 'CppObjdumpLexer', 

21 'CObjdumpLexer', 'HsailLexer', 'LlvmLexer', 'LlvmMirBodyLexer', 

22 'LlvmMirLexer', 'NasmLexer', 'NasmObjdumpLexer', 'TasmLexer', 

23 'Ca65Lexer', 'Dasm16Lexer'] 

24 

25 

26class GasLexer(RegexLexer): 

27 """ 

28 For Gas (AT&T) assembly code. 

29 """ 

30 name = 'GAS' 

31 aliases = ['gas', 'asm'] 

32 filenames = ['*.s', '*.S'] 

33 mimetypes = ['text/x-gas'] 

34 

35 #: optional Comment or Whitespace 

36 string = r'"(\\"|[^"])*"' 

37 char = r'[\w$.@-]' 

38 identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)' 

39 number = r'(?:0[xX][a-fA-F0-9]+|#?-?\d+)' 

40 register = '%' + identifier + r'\b' 

41 

42 tokens = { 

43 'root': [ 

44 include('whitespace'), 

45 (identifier + ':', Name.Label), 

46 (r'\.' + identifier, Name.Attribute, 'directive-args'), 

47 (r'lock|rep(n?z)?|data\d+', Name.Attribute), 

48 (identifier, Name.Function, 'instruction-args'), 

49 (r'[\r\n]+', Text) 

50 ], 

51 'directive-args': [ 

52 (identifier, Name.Constant), 

53 (string, String), 

54 ('@' + identifier, Name.Attribute), 

55 (number, Number.Integer), 

56 (register, Name.Variable), 

57 (r'[\r\n]+', Whitespace, '#pop'), 

58 (r'([;#]|//).*?\n', Comment.Single, '#pop'), 

59 (r'/[*].*?[*]/', Comment.Multiline), 

60 (r'/[*].*?\n[\w\W]*?[*]/', Comment.Multiline, '#pop'), 

61 

62 include('punctuation'), 

63 include('whitespace') 

64 ], 

65 'instruction-args': [ 

66 # For objdump-disassembled code, shouldn't occur in 

67 # actual assembler input 

68 ('([a-z0-9]+)( )(<)('+identifier+')(>)', 

69 bygroups(Number.Hex, Text, Punctuation, Name.Constant, 

70 Punctuation)), 

71 ('([a-z0-9]+)( )(<)('+identifier+')([-+])('+number+')(>)', 

72 bygroups(Number.Hex, Text, Punctuation, Name.Constant, 

73 Punctuation, Number.Integer, Punctuation)), 

74 

75 # Address constants 

76 (identifier, Name.Constant), 

77 (number, Number.Integer), 

78 # Registers 

79 (register, Name.Variable), 

80 # Numeric constants 

81 ('$'+number, Number.Integer), 

82 (r"$'(.|\\')'", String.Char), 

83 (r'[\r\n]+', Whitespace, '#pop'), 

84 (r'([;#]|//).*?\n', Comment.Single, '#pop'), 

85 (r'/[*].*?[*]/', Comment.Multiline), 

86 (r'/[*].*?\n[\w\W]*?[*]/', Comment.Multiline, '#pop'), 

87 

88 include('punctuation'), 

89 include('whitespace') 

90 ], 

91 'whitespace': [ 

92 (r'\n', Whitespace), 

93 (r'\s+', Whitespace), 

94 (r'([;#]|//).*?\n', Comment.Single), 

95 (r'/[*][\w\W]*?[*]/', Comment.Multiline) 

96 ], 

97 'punctuation': [ 

98 (r'[-*,.()\[\]!:{}]+', Punctuation) 

99 ] 

100 } 

101 

102 def analyse_text(text): 

103 if re.search(r'^\.(text|data|section)', text, re.M): 

104 return True 

105 elif re.search(r'^\.\w+', text, re.M): 

106 return 0.1 

107 

108 

109def _objdump_lexer_tokens(asm_lexer): 

110 """ 

111 Common objdump lexer tokens to wrap an ASM lexer. 

112 """ 

113 hex_re = r'[0-9A-Za-z]' 

114 return { 

115 'root': [ 

116 # File name & format: 

117 ('(.*?)(:)( +file format )(.*?)$', 

118 bygroups(Name.Label, Punctuation, Text, String)), 

119 # Section header 

120 ('(Disassembly of section )(.*?)(:)$', 

121 bygroups(Text, Name.Label, Punctuation)), 

122 # Function labels 

123 # (With offset) 

124 ('('+hex_re+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$', 

125 bygroups(Number.Hex, Whitespace, Punctuation, Name.Function, 

126 Punctuation, Number.Hex, Punctuation)), 

127 # (Without offset) 

128 ('('+hex_re+'+)( )(<)(.*?)(>:)$', 

129 bygroups(Number.Hex, Whitespace, Punctuation, Name.Function, 

130 Punctuation)), 

131 # Code line with disassembled instructions 

132 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *\t)([a-zA-Z].*?)$', 

133 bygroups(Whitespace, Name.Label, Whitespace, Number.Hex, Whitespace, 

134 using(asm_lexer))), 

135 # Code line without raw instructions (objdump --no-show-raw-insn) 

136 ('( *)('+hex_re+r'+:)( *\t)([a-zA-Z].*?)$', 

137 bygroups(Whitespace, Name.Label, Whitespace, 

138 using(asm_lexer))), 

139 # Code line with ascii 

140 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *)(.*?)$', 

141 bygroups(Whitespace, Name.Label, Whitespace, Number.Hex, Whitespace, String)), 

142 # Continued code line, only raw opcodes without disassembled 

143 # instruction 

144 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)$', 

145 bygroups(Whitespace, Name.Label, Whitespace, Number.Hex)), 

146 # Skipped a few bytes 

147 (r'\t\.\.\.$', Text), 

148 # Relocation line 

149 # (With offset) 

150 (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x'+hex_re+'+)$', 

151 bygroups(Whitespace, Name.Label, Whitespace, Name.Property, Whitespace, 

152 Name.Constant, Punctuation, Number.Hex)), 

153 # (Without offset) 

154 (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)$', 

155 bygroups(Whitespace, Name.Label, Whitespace, Name.Property, Whitespace, 

156 Name.Constant)), 

157 (r'[^\n]+\n', Other) 

158 ] 

159 } 

160 

161 

162class ObjdumpLexer(RegexLexer): 

163 """ 

164 For the output of ``objdump -dr``. 

165 """ 

166 name = 'objdump' 

167 aliases = ['objdump'] 

168 filenames = ['*.objdump'] 

169 mimetypes = ['text/x-objdump'] 

170 

171 tokens = _objdump_lexer_tokens(GasLexer) 

172 

173 

174class DObjdumpLexer(DelegatingLexer): 

175 """ 

176 For the output of ``objdump -Sr`` on compiled D files. 

177 """ 

178 name = 'd-objdump' 

179 aliases = ['d-objdump'] 

180 filenames = ['*.d-objdump'] 

181 mimetypes = ['text/x-d-objdump'] 

182 

183 def __init__(self, **options): 

184 super().__init__(DLexer, ObjdumpLexer, **options) 

185 

186 

187class CppObjdumpLexer(DelegatingLexer): 

188 """ 

189 For the output of ``objdump -Sr`` on compiled C++ files. 

190 """ 

191 name = 'cpp-objdump' 

192 aliases = ['cpp-objdump', 'c++-objdumb', 'cxx-objdump'] 

193 filenames = ['*.cpp-objdump', '*.c++-objdump', '*.cxx-objdump'] 

194 mimetypes = ['text/x-cpp-objdump'] 

195 

196 def __init__(self, **options): 

197 super().__init__(CppLexer, ObjdumpLexer, **options) 

198 

199 

200class CObjdumpLexer(DelegatingLexer): 

201 """ 

202 For the output of ``objdump -Sr`` on compiled C files. 

203 """ 

204 name = 'c-objdump' 

205 aliases = ['c-objdump'] 

206 filenames = ['*.c-objdump'] 

207 mimetypes = ['text/x-c-objdump'] 

208 

209 def __init__(self, **options): 

210 super().__init__(CLexer, ObjdumpLexer, **options) 

211 

212 

213class HsailLexer(RegexLexer): 

214 """ 

215 For HSAIL assembly code. 

216 

217 .. versionadded:: 2.2 

218 """ 

219 name = 'HSAIL' 

220 aliases = ['hsail', 'hsa'] 

221 filenames = ['*.hsail'] 

222 mimetypes = ['text/x-hsail'] 

223 

224 string = r'"[^"]*?"' 

225 identifier = r'[a-zA-Z_][\w.]*' 

226 # Registers 

227 register_number = r'[0-9]+' 

228 register = r'(\$(c|s|d|q)' + register_number + r')\b' 

229 # Qualifiers 

230 alignQual = r'(align\(\d+\))' 

231 widthQual = r'(width\((\d+|all)\))' 

232 allocQual = r'(alloc\(agent\))' 

233 # Instruction Modifiers 

234 roundingMod = (r'((_ftz)?(_up|_down|_zero|_near))') 

235 datatypeMod = (r'_(' 

236 # packedTypes 

237 r'u8x4|s8x4|u16x2|s16x2|u8x8|s8x8|u16x4|s16x4|u32x2|s32x2|' 

238 r'u8x16|s8x16|u16x8|s16x8|u32x4|s32x4|u64x2|s64x2|' 

239 r'f16x2|f16x4|f16x8|f32x2|f32x4|f64x2|' 

240 # baseTypes 

241 r'u8|s8|u16|s16|u32|s32|u64|s64|' 

242 r'b128|b8|b16|b32|b64|b1|' 

243 r'f16|f32|f64|' 

244 # opaqueType 

245 r'roimg|woimg|rwimg|samp|sig32|sig64)') 

246 

247 # Numeric Constant 

248 float = r'((\d+\.)|(\d*\.\d+))[eE][+-]?\d+' 

249 hexfloat = r'0[xX](([0-9a-fA-F]+\.[0-9a-fA-F]*)|([0-9a-fA-F]*\.[0-9a-fA-F]+))[pP][+-]?\d+' 

250 ieeefloat = r'0((h|H)[0-9a-fA-F]{4}|(f|F)[0-9a-fA-F]{8}|(d|D)[0-9a-fA-F]{16})' 

251 

252 tokens = { 

253 'root': [ 

254 include('whitespace'), 

255 include('comments'), 

256 

257 (string, String), 

258 

259 (r'@' + identifier + ':?', Name.Label), 

260 

261 (register, Name.Variable.Anonymous), 

262 

263 include('keyword'), 

264 

265 (r'&' + identifier, Name.Variable.Global), 

266 (r'%' + identifier, Name.Variable), 

267 

268 (hexfloat, Number.Hex), 

269 (r'0[xX][a-fA-F0-9]+', Number.Hex), 

270 (ieeefloat, Number.Float), 

271 (float, Number.Float), 

272 (r'\d+', Number.Integer), 

273 

274 (r'[=<>{}\[\]()*.,:;!]|x\b', Punctuation) 

275 ], 

276 'whitespace': [ 

277 (r'(\n|\s)+', Whitespace), 

278 ], 

279 'comments': [ 

280 (r'/\*.*?\*/', Comment.Multiline), 

281 (r'//.*?\n', Comment.Single), 

282 ], 

283 'keyword': [ 

284 # Types 

285 (r'kernarg' + datatypeMod, Keyword.Type), 

286 

287 # Regular keywords 

288 (r'\$(full|base|small|large|default|zero|near)', Keyword), 

289 (words(( 

290 'module', 'extension', 'pragma', 'prog', 'indirect', 'signature', 

291 'decl', 'kernel', 'function', 'enablebreakexceptions', 

292 'enabledetectexceptions', 'maxdynamicgroupsize', 'maxflatgridsize', 

293 'maxflatworkgroupsize', 'requireddim', 'requiredgridsize', 

294 'requiredworkgroupsize', 'requirenopartialworkgroups'), 

295 suffix=r'\b'), Keyword), 

296 

297 # instructions 

298 (roundingMod, Keyword), 

299 (datatypeMod, Keyword), 

300 (r'_(' + alignQual + '|' + widthQual + ')', Keyword), 

301 (r'_kernarg', Keyword), 

302 (r'(nop|imagefence)\b', Keyword), 

303 (words(( 

304 'cleardetectexcept', 'clock', 'cuid', 'debugtrap', 'dim', 

305 'getdetectexcept', 'groupbaseptr', 'kernargbaseptr', 'laneid', 

306 'maxcuid', 'maxwaveid', 'packetid', 'setdetectexcept', 'waveid', 

307 'workitemflatabsid', 'workitemflatid', 'nullptr', 'abs', 'bitrev', 

308 'currentworkgroupsize', 'currentworkitemflatid', 'fract', 'ncos', 

309 'neg', 'nexp2', 'nlog2', 'nrcp', 'nrsqrt', 'nsin', 'nsqrt', 

310 'gridgroups', 'gridsize', 'not', 'sqrt', 'workgroupid', 

311 'workgroupsize', 'workitemabsid', 'workitemid', 'ceil', 'floor', 

312 'rint', 'trunc', 'add', 'bitmask', 'borrow', 'carry', 'copysign', 

313 'div', 'rem', 'sub', 'shl', 'shr', 'and', 'or', 'xor', 'unpackhi', 

314 'unpacklo', 'max', 'min', 'fma', 'mad', 'bitextract', 'bitselect', 

315 'shuffle', 'cmov', 'bitalign', 'bytealign', 'lerp', 'nfma', 'mul', 

316 'mulhi', 'mul24hi', 'mul24', 'mad24', 'mad24hi', 'bitinsert', 

317 'combine', 'expand', 'lda', 'mov', 'pack', 'unpack', 'packcvt', 

318 'unpackcvt', 'sad', 'sementp', 'ftos', 'stof', 'cmp', 'ld', 'st', 

319 '_eq', '_ne', '_lt', '_le', '_gt', '_ge', '_equ', '_neu', '_ltu', 

320 '_leu', '_gtu', '_geu', '_num', '_nan', '_seq', '_sne', '_slt', 

321 '_sle', '_sgt', '_sge', '_snum', '_snan', '_sequ', '_sneu', '_sltu', 

322 '_sleu', '_sgtu', '_sgeu', 'atomic', '_ld', '_st', '_cas', '_add', 

323 '_and', '_exch', '_max', '_min', '_or', '_sub', '_wrapdec', 

324 '_wrapinc', '_xor', 'ret', 'cvt', '_readonly', '_kernarg', '_global', 

325 'br', 'cbr', 'sbr', '_scacq', '_screl', '_scar', '_rlx', '_wave', 

326 '_wg', '_agent', '_system', 'ldimage', 'stimage', '_v2', '_v3', '_v4', 

327 '_1d', '_2d', '_3d', '_1da', '_2da', '_1db', '_2ddepth', '_2dadepth', 

328 '_width', '_height', '_depth', '_array', '_channelorder', 

329 '_channeltype', 'querysampler', '_coord', '_filter', '_addressing', 

330 'barrier', 'wavebarrier', 'initfbar', 'joinfbar', 'waitfbar', 

331 'arrivefbar', 'leavefbar', 'releasefbar', 'ldf', 'activelaneid', 

332 'activelanecount', 'activelanemask', 'activelanepermute', 'call', 

333 'scall', 'icall', 'alloca', 'packetcompletionsig', 

334 'addqueuewriteindex', 'casqueuewriteindex', 'ldqueuereadindex', 

335 'stqueuereadindex', 'readonly', 'global', 'private', 'group', 

336 'spill', 'arg', '_upi', '_downi', '_zeroi', '_neari', '_upi_sat', 

337 '_downi_sat', '_zeroi_sat', '_neari_sat', '_supi', '_sdowni', 

338 '_szeroi', '_sneari', '_supi_sat', '_sdowni_sat', '_szeroi_sat', 

339 '_sneari_sat', '_pp', '_ps', '_sp', '_ss', '_s', '_p', '_pp_sat', 

340 '_ps_sat', '_sp_sat', '_ss_sat', '_s_sat', '_p_sat')), Keyword), 

341 

342 # Integer types 

343 (r'i[1-9]\d*', Keyword) 

344 ] 

345 } 

346 

347 

348class LlvmLexer(RegexLexer): 

349 """ 

350 For LLVM assembly code. 

351 """ 

352 name = 'LLVM' 

353 url = 'https://llvm.org/docs/LangRef.html' 

354 aliases = ['llvm'] 

355 filenames = ['*.ll'] 

356 mimetypes = ['text/x-llvm'] 

357 

358 #: optional Comment or Whitespace 

359 string = r'"[^"]*?"' 

360 identifier = r'([-a-zA-Z$._][\w\-$.]*|' + string + ')' 

361 block_label = r'(' + identifier + r'|(\d+))' 

362 

363 tokens = { 

364 'root': [ 

365 include('whitespace'), 

366 

367 # Before keywords, because keywords are valid label names :(... 

368 (block_label + r'\s*:', Name.Label), 

369 

370 include('keyword'), 

371 

372 (r'%' + identifier, Name.Variable), 

373 (r'@' + identifier, Name.Variable.Global), 

374 (r'%\d+', Name.Variable.Anonymous), 

375 (r'@\d+', Name.Variable.Global), 

376 (r'#\d+', Name.Variable.Global), 

377 (r'!' + identifier, Name.Variable), 

378 (r'!\d+', Name.Variable.Anonymous), 

379 (r'c?' + string, String), 

380 

381 (r'0[xX][a-fA-F0-9]+', Number), 

382 (r'-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?', Number), 

383 

384 (r'[=<>{}\[\]()*.,!]|x\b', Punctuation) 

385 ], 

386 'whitespace': [ 

387 (r'(\n|\s+)+', Whitespace), 

388 (r';.*?\n', Comment) 

389 ], 

390 'keyword': [ 

391 # Regular keywords 

392 (words(( 

393 'aarch64_sve_vector_pcs', 'aarch64_vector_pcs', 'acq_rel', 

394 'acquire', 'add', 'addrspace', 'addrspacecast', 'afn', 'alias', 

395 'aliasee', 'align', 'alignLog2', 'alignstack', 'alloca', 

396 'allocsize', 'allOnes', 'alwaysinline', 'alwaysInline', 

397 'amdgpu_cs', 'amdgpu_es', 'amdgpu_gfx', 'amdgpu_gs', 

398 'amdgpu_hs', 'amdgpu_kernel', 'amdgpu_ls', 'amdgpu_ps', 

399 'amdgpu_vs', 'and', 'any', 'anyregcc', 'appending', 'arcp', 

400 'argmemonly', 'args', 'arm_aapcs_vfpcc', 'arm_aapcscc', 

401 'arm_apcscc', 'ashr', 'asm', 'atomic', 'atomicrmw', 

402 'attributes', 'available_externally', 'avr_intrcc', 

403 'avr_signalcc', 'bit', 'bitcast', 'bitMask', 'blockaddress', 

404 'blockcount', 'br', 'branchFunnel', 'builtin', 'byArg', 

405 'byref', 'byte', 'byteArray', 'byval', 'c', 'call', 'callbr', 

406 'callee', 'caller', 'calls', 'canAutoHide', 'catch', 

407 'catchpad', 'catchret', 'catchswitch', 'cc', 'ccc', 

408 'cfguard_checkcc', 'cleanup', 'cleanuppad', 'cleanupret', 

409 'cmpxchg', 'cold', 'coldcc', 'comdat', 'common', 'constant', 

410 'contract', 'convergent', 'critical', 'cxx_fast_tlscc', 

411 'datalayout', 'declare', 'default', 'define', 'deplibs', 

412 'dereferenceable', 'dereferenceable_or_null', 'distinct', 

413 'dllexport', 'dllimport', 'dso_local', 'dso_local_equivalent', 

414 'dso_preemptable', 'dsoLocal', 'eq', 'exact', 'exactmatch', 

415 'extern_weak', 'external', 'externally_initialized', 

416 'extractelement', 'extractvalue', 'fadd', 'false', 'fast', 

417 'fastcc', 'fcmp', 'fdiv', 'fence', 'filter', 'flags', 'fmul', 

418 'fneg', 'fpext', 'fptosi', 'fptoui', 'fptrunc', 'freeze', 

419 'frem', 'from', 'fsub', 'funcFlags', 'function', 'gc', 

420 'getelementptr', 'ghccc', 'global', 'guid', 'gv', 'hash', 

421 'hhvm_ccc', 'hhvmcc', 'hidden', 'hot', 'hotness', 'icmp', 

422 'ifunc', 'inaccessiblemem_or_argmemonly', 

423 'inaccessiblememonly', 'inalloca', 'inbounds', 'indir', 

424 'indirectbr', 'info', 'initialexec', 'inline', 'inlineBits', 

425 'inlinehint', 'inrange', 'inreg', 'insertelement', 

426 'insertvalue', 'insts', 'intel_ocl_bicc', 'inteldialect', 

427 'internal', 'inttoptr', 'invoke', 'jumptable', 'kind', 

428 'landingpad', 'largest', 'linkage', 'linkonce', 'linkonce_odr', 

429 'live', 'load', 'local_unnamed_addr', 'localdynamic', 

430 'localexec', 'lshr', 'max', 'metadata', 'min', 'minsize', 

431 'module', 'monotonic', 'msp430_intrcc', 'mul', 'mustprogress', 

432 'musttail', 'naked', 'name', 'nand', 'ne', 'nest', 'ninf', 

433 'nnan', 'noalias', 'nobuiltin', 'nocallback', 'nocapture', 

434 'nocf_check', 'noduplicate', 'noduplicates', 'nofree', 

435 'noimplicitfloat', 'noinline', 'noInline', 'nomerge', 'none', 

436 'nonlazybind', 'nonnull', 'noprofile', 'norecurse', 

437 'noRecurse', 'noredzone', 'noreturn', 'nosync', 'notail', 

438 'notEligibleToImport', 'noundef', 'nounwind', 'nsw', 

439 'nsz', 'null', 'null_pointer_is_valid', 'nuw', 'oeq', 'offset', 

440 'oge', 'ogt', 'ole', 'olt', 'one', 'opaque', 'optforfuzzing', 

441 'optnone', 'optsize', 'or', 'ord', 'param', 'params', 

442 'partition', 'path', 'personality', 'phi', 'poison', 

443 'preallocated', 'prefix', 'preserve_allcc', 'preserve_mostcc', 

444 'private', 'prologue', 'protected', 'ptrtoint', 'ptx_device', 

445 'ptx_kernel', 'readnone', 'readNone', 'readonly', 'readOnly', 

446 'reassoc', 'refs', 'relbf', 'release', 'resByArg', 'resume', 

447 'ret', 'returnDoesNotAlias', 'returned', 'returns_twice', 

448 'safestack', 'samesize', 'sanitize_address', 

449 'sanitize_hwaddress', 'sanitize_memory', 'sanitize_memtag', 

450 'sanitize_thread', 'sdiv', 'section', 'select', 'seq_cst', 

451 'sext', 'sge', 'sgt', 'shadowcallstack', 'shl', 

452 'shufflevector', 'sideeffect', 'signext', 'single', 

453 'singleImpl', 'singleImplName', 'sitofp', 'sizeM1', 

454 'sizeM1BitWidth', 'sle', 'slt', 'source_filename', 

455 'speculatable', 'speculative_load_hardening', 'spir_func', 

456 'spir_kernel', 'srem', 'sret', 'ssp', 'sspreq', 'sspstrong', 

457 'store', 'strictfp', 'sub', 'summaries', 'summary', 'swiftcc', 

458 'swifterror', 'swiftself', 'switch', 'syncscope', 'tail', 

459 'tailcc', 'target', 'thread_local', 'to', 'token', 'triple', 

460 'true', 'trunc', 'type', 'typeCheckedLoadConstVCalls', 

461 'typeCheckedLoadVCalls', 'typeid', 'typeidCompatibleVTable', 

462 'typeIdInfo', 'typeTestAssumeConstVCalls', 

463 'typeTestAssumeVCalls', 'typeTestRes', 'typeTests', 'udiv', 

464 'ueq', 'uge', 'ugt', 'uitofp', 'ule', 'ult', 'umax', 'umin', 

465 'undef', 'une', 'uniformRetVal', 'uniqueRetVal', 'unknown', 

466 'unnamed_addr', 'uno', 'unordered', 'unreachable', 'unsat', 

467 'unwind', 'urem', 'uselistorder', 'uselistorder_bb', 'uwtable', 

468 'va_arg', 'varFlags', 'variable', 'vcall_visibility', 

469 'vFuncId', 'virtFunc', 'virtualConstProp', 'void', 'volatile', 

470 'vscale', 'vTableFuncs', 'weak', 'weak_odr', 'webkit_jscc', 

471 'win64cc', 'within', 'wpdRes', 'wpdResolutions', 'writeonly', 

472 'x', 'x86_64_sysvcc', 'x86_fastcallcc', 'x86_intrcc', 

473 'x86_mmx', 'x86_regcallcc', 'x86_stdcallcc', 'x86_thiscallcc', 

474 'x86_vectorcallcc', 'xchg', 'xor', 'zeroext', 

475 'zeroinitializer', 'zext', 'immarg', 'willreturn'), 

476 suffix=r'\b'), Keyword), 

477 

478 # Types 

479 (words(('void', 'half', 'bfloat', 'float', 'double', 'fp128', 

480 'x86_fp80', 'ppc_fp128', 'label', 'metadata', 'x86_mmx', 

481 'x86_amx', 'token', 'ptr')), 

482 Keyword.Type), 

483 

484 # Integer types 

485 (r'i[1-9]\d*', Keyword.Type) 

486 ] 

487 } 

488 

489 

490class LlvmMirBodyLexer(RegexLexer): 

491 """ 

492 For LLVM MIR examples without the YAML wrapper. 

493 

494 .. versionadded:: 2.6 

495 """ 

496 name = 'LLVM-MIR Body' 

497 url = 'https://llvm.org/docs/MIRLangRef.html' 

498 aliases = ['llvm-mir-body'] 

499 filenames = [] 

500 mimetypes = [] 

501 

502 tokens = { 

503 'root': [ 

504 # Attributes on basic blocks 

505 (words(('liveins', 'successors'), suffix=':'), Keyword), 

506 # Basic Block Labels 

507 (r'bb\.[0-9]+(\.[a-zA-Z0-9_.-]+)?( \(address-taken\))?:', Name.Label), 

508 (r'bb\.[0-9]+ \(%[a-zA-Z0-9_.-]+\)( \(address-taken\))?:', Name.Label), 

509 (r'%bb\.[0-9]+(\.\w+)?', Name.Label), 

510 # Stack references 

511 (r'%stack\.[0-9]+(\.\w+\.addr)?', Name), 

512 # Subreg indices 

513 (r'%subreg\.\w+', Name), 

514 # Virtual registers 

515 (r'%[a-zA-Z0-9_]+ *', Name.Variable, 'vreg'), 

516 # Reference to LLVM-IR global 

517 include('global'), 

518 # Reference to Intrinsic 

519 (r'intrinsic\(\@[a-zA-Z0-9_.]+\)', Name.Variable.Global), 

520 # Comparison predicates 

521 (words(('eq', 'ne', 'sgt', 'sge', 'slt', 'sle', 'ugt', 'uge', 'ult', 

522 'ule'), prefix=r'intpred\(', suffix=r'\)'), Name.Builtin), 

523 (words(('oeq', 'one', 'ogt', 'oge', 'olt', 'ole', 'ugt', 'uge', 

524 'ult', 'ule'), prefix=r'floatpred\(', suffix=r'\)'), 

525 Name.Builtin), 

526 # Physical registers 

527 (r'\$\w+', String.Single), 

528 # Assignment operator 

529 (r'=', Operator), 

530 # gMIR Opcodes 

531 (r'(G_ANYEXT|G_[SZ]EXT|G_SEXT_INREG|G_TRUNC|G_IMPLICIT_DEF|G_PHI|' 

532 r'G_FRAME_INDEX|G_GLOBAL_VALUE|G_INTTOPTR|G_PTRTOINT|G_BITCAST|' 

533 r'G_CONSTANT|G_FCONSTANT|G_VASTART|G_VAARG|G_CTLZ|G_CTLZ_ZERO_UNDEF|' 

534 r'G_CTTZ|G_CTTZ_ZERO_UNDEF|G_CTPOP|G_BSWAP|G_BITREVERSE|' 

535 r'G_ADDRSPACE_CAST|G_BLOCK_ADDR|G_JUMP_TABLE|G_DYN_STACKALLOC|' 

536 r'G_ADD|G_SUB|G_MUL|G_[SU]DIV|G_[SU]REM|G_AND|G_OR|G_XOR|G_SHL|' 

537 r'G_[LA]SHR|G_[IF]CMP|G_SELECT|G_GEP|G_PTR_MASK|G_SMIN|G_SMAX|' 

538 r'G_UMIN|G_UMAX|G_[US]ADDO|G_[US]ADDE|G_[US]SUBO|G_[US]SUBE|' 

539 r'G_[US]MULO|G_[US]MULH|G_FNEG|G_FPEXT|G_FPTRUNC|G_FPTO[US]I|' 

540 r'G_[US]ITOFP|G_FABS|G_FCOPYSIGN|G_FCANONICALIZE|G_FMINNUM|' 

541 r'G_FMAXNUM|G_FMINNUM_IEEE|G_FMAXNUM_IEEE|G_FMINIMUM|G_FMAXIMUM|' 

542 r'G_FADD|G_FSUB|G_FMUL|G_FMA|G_FMAD|G_FDIV|G_FREM|G_FPOW|G_FEXP|' 

543 r'G_FEXP2|G_FLOG|G_FLOG2|G_FLOG10|G_FCEIL|G_FCOS|G_FSIN|G_FSQRT|' 

544 r'G_FFLOOR|G_FRINT|G_FNEARBYINT|G_INTRINSIC_TRUNC|' 

545 r'G_INTRINSIC_ROUND|G_LOAD|G_[ZS]EXTLOAD|G_INDEXED_LOAD|' 

546 r'G_INDEXED_[ZS]EXTLOAD|G_STORE|G_INDEXED_STORE|' 

547 r'G_ATOMIC_CMPXCHG_WITH_SUCCESS|G_ATOMIC_CMPXCHG|' 

548 r'G_ATOMICRMW_(XCHG|ADD|SUB|AND|NAND|OR|XOR|MAX|MIN|UMAX|UMIN|FADD|' 

549 r'FSUB)' 

550 r'|G_FENCE|G_EXTRACT|G_UNMERGE_VALUES|G_INSERT|G_MERGE_VALUES|' 

551 r'G_BUILD_VECTOR|G_BUILD_VECTOR_TRUNC|G_CONCAT_VECTORS|' 

552 r'G_INTRINSIC|G_INTRINSIC_W_SIDE_EFFECTS|G_BR|G_BRCOND|' 

553 r'G_BRINDIRECT|G_BRJT|G_INSERT_VECTOR_ELT|G_EXTRACT_VECTOR_ELT|' 

554 r'G_SHUFFLE_VECTOR)\b', 

555 Name.Builtin), 

556 # Target independent opcodes 

557 (r'(COPY|PHI|INSERT_SUBREG|EXTRACT_SUBREG|REG_SEQUENCE)\b', 

558 Name.Builtin), 

559 # Flags 

560 (words(('killed', 'implicit')), Keyword), 

561 # ConstantInt values 

562 (r'(i[0-9]+)( +)', bygroups(Keyword.Type, Whitespace), 'constantint'), 

563 # ConstantFloat values 

564 (r'(half|float|double) +', Keyword.Type, 'constantfloat'), 

565 # Bare immediates 

566 include('integer'), 

567 # MMO's 

568 (r'(::)( *)', bygroups(Operator, Whitespace), 'mmo'), 

569 # MIR Comments 

570 (r';.*', Comment), 

571 # If we get here, assume it's a target instruction 

572 (r'[a-zA-Z0-9_]+', Name), 

573 # Everything else that isn't highlighted 

574 (r'[(), \n]+', Text), 

575 ], 

576 # The integer constant from a ConstantInt value 

577 'constantint': [ 

578 include('integer'), 

579 (r'(?=.)', Text, '#pop'), 

580 ], 

581 # The floating point constant from a ConstantFloat value 

582 'constantfloat': [ 

583 include('float'), 

584 (r'(?=.)', Text, '#pop'), 

585 ], 

586 'vreg': [ 

587 # The bank or class if there is one 

588 (r'( *)(:(?!:))', bygroups(Whitespace, Keyword), ('#pop', 'vreg_bank_or_class')), 

589 # The LLT if there is one 

590 (r'( *)(\()', bygroups(Whitespace, Text), 'vreg_type'), 

591 (r'(?=.)', Text, '#pop'), 

592 ], 

593 'vreg_bank_or_class': [ 

594 # The unassigned bank/class 

595 (r'( *)(_)', bygroups(Whitespace, Name.Variable.Magic)), 

596 (r'( *)([a-zA-Z0-9_]+)', bygroups(Whitespace, Name.Variable)), 

597 # The LLT if there is one 

598 (r'( *)(\()', bygroups(Whitespace, Text), 'vreg_type'), 

599 (r'(?=.)', Text, '#pop'), 

600 ], 

601 'vreg_type': [ 

602 # Scalar and pointer types 

603 (r'( *)([sp][0-9]+)', bygroups(Whitespace, Keyword.Type)), 

604 (r'( *)(<[0-9]+ *x *[sp][0-9]+>)', bygroups(Whitespace, Keyword.Type)), 

605 (r'\)', Text, '#pop'), 

606 (r'(?=.)', Text, '#pop'), 

607 ], 

608 'mmo': [ 

609 (r'\(', Text), 

610 (r' +', Whitespace), 

611 (words(('load', 'store', 'on', 'into', 'from', 'align', 'monotonic', 

612 'acquire', 'release', 'acq_rel', 'seq_cst')), 

613 Keyword), 

614 # IR references 

615 (r'%ir\.[a-zA-Z0-9_.-]+', Name), 

616 (r'%ir-block\.[a-zA-Z0-9_.-]+', Name), 

617 (r'[-+]', Operator), 

618 include('integer'), 

619 include('global'), 

620 (r',', Punctuation), 

621 (r'\), \(', Text), 

622 (r'\)', Text, '#pop'), 

623 ], 

624 'integer': [(r'-?[0-9]+', Number.Integer),], 

625 'float': [(r'-?[0-9]+\.[0-9]+(e[+-][0-9]+)?', Number.Float)], 

626 'global': [(r'\@[a-zA-Z0-9_.]+', Name.Variable.Global)], 

627 } 

628 

629 

630class LlvmMirLexer(RegexLexer): 

631 """ 

632 Lexer for the overall LLVM MIR document format. 

633 

634 MIR is a human readable serialization format that's used to represent LLVM's 

635 machine specific intermediate representation. It allows LLVM's developers to 

636 see the state of the compilation process at various points, as well as test 

637 individual pieces of the compiler. 

638 

639 .. versionadded:: 2.6 

640 """ 

641 name = 'LLVM-MIR' 

642 url = 'https://llvm.org/docs/MIRLangRef.html' 

643 aliases = ['llvm-mir'] 

644 filenames = ['*.mir'] 

645 

646 tokens = { 

647 'root': [ 

648 # Comments are hashes at the YAML level 

649 (r'#.*', Comment), 

650 # Documents starting with | are LLVM-IR 

651 (r'--- \|$', Keyword, 'llvm_ir'), 

652 # Other documents are MIR 

653 (r'---', Keyword, 'llvm_mir'), 

654 # Consume everything else in one token for efficiency 

655 (r'[^-#]+|.', Text), 

656 ], 

657 'llvm_ir': [ 

658 # Documents end with '...' or '---' 

659 (r'(\.\.\.|(?=---))', Keyword, '#pop'), 

660 # Delegate to the LlvmLexer 

661 (r'((?:.|\n)+?)(?=(\.\.\.|---))', bygroups(using(LlvmLexer))), 

662 ], 

663 'llvm_mir': [ 

664 # Comments are hashes at the YAML level 

665 (r'#.*', Comment), 

666 # Documents end with '...' or '---' 

667 (r'(\.\.\.|(?=---))', Keyword, '#pop'), 

668 # Handle the simple attributes 

669 (r'name:', Keyword, 'name'), 

670 (words(('alignment', ), 

671 suffix=':'), Keyword, 'number'), 

672 (words(('legalized', 'regBankSelected', 'tracksRegLiveness', 

673 'selected', 'exposesReturnsTwice'), 

674 suffix=':'), Keyword, 'boolean'), 

675 # Handle the attributes don't highlight inside 

676 (words(('registers', 'stack', 'fixedStack', 'liveins', 'frameInfo', 

677 'machineFunctionInfo'), 

678 suffix=':'), Keyword), 

679 # Delegate the body block to the LlvmMirBodyLexer 

680 (r'body: *\|', Keyword, 'llvm_mir_body'), 

681 # Consume everything else 

682 (r'.+', Text), 

683 (r'\n', Whitespace), 

684 ], 

685 'name': [ 

686 (r'[^\n]+', Name), 

687 default('#pop'), 

688 ], 

689 'boolean': [ 

690 (r' *(true|false)', Name.Builtin), 

691 default('#pop'), 

692 ], 

693 'number': [ 

694 (r' *[0-9]+', Number), 

695 default('#pop'), 

696 ], 

697 'llvm_mir_body': [ 

698 # Documents end with '...' or '---'. 

699 # We have to pop llvm_mir_body and llvm_mir 

700 (r'(\.\.\.|(?=---))', Keyword, '#pop:2'), 

701 # Delegate the body block to the LlvmMirBodyLexer 

702 (r'((?:.|\n)+?)(?=\.\.\.|---)', bygroups(using(LlvmMirBodyLexer))), 

703 # The '...' is optional. If we didn't already find it then it isn't 

704 # there. There might be a '---' instead though. 

705 (r'(?!\.\.\.|---)((?:.|\n)+)', bygroups(using(LlvmMirBodyLexer))), 

706 ], 

707 } 

708 

709 

710class NasmLexer(RegexLexer): 

711 """ 

712 For Nasm (Intel) assembly code. 

713 """ 

714 name = 'NASM' 

715 aliases = ['nasm'] 

716 filenames = ['*.asm', '*.ASM', '*.nasm'] 

717 mimetypes = ['text/x-nasm'] 

718 

719 # Tasm uses the same file endings, but TASM is not as common as NASM, so 

720 # we prioritize NASM higher by default 

721 priority = 1.0 

722 

723 identifier = r'[a-z$._?][\w$.?#@~]*' 

724 hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)' 

725 octn = r'[0-7]+q' 

726 binn = r'[01]+b' 

727 decn = r'[0-9]+' 

728 floatn = decn + r'\.e?' + decn 

729 string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`" 

730 declkw = r'(?:res|d)[bwdqt]|times' 

731 register = (r'(r[0-9][0-5]?[bwd]?|' 

732 r'[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|' 

733 r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]|k[0-7]|' 

734 r'[xyz]mm(?:[12][0-9]?|3[01]?|[04-9]))\b') 

735 wordop = r'seg|wrt|strict|rel|abs' 

736 type = r'byte|[dq]?word' 

737 # Directives must be followed by whitespace, otherwise CPU will match 

738 # cpuid for instance. 

739 directives = (r'(?:BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|' 

740 r'ORG|ALIGN|STRUC|ENDSTRUC|COMMON|CPU|GROUP|UPPERCASE|IMPORT|' 

741 r'EXPORT|LIBRARY|MODULE)(?=\s)') 

742 

743 flags = re.IGNORECASE | re.MULTILINE 

744 tokens = { 

745 'root': [ 

746 (r'^\s*%', Comment.Preproc, 'preproc'), 

747 include('whitespace'), 

748 (identifier + ':', Name.Label), 

749 (r'(%s)(\s+)(equ)' % identifier, 

750 bygroups(Name.Constant, Whitespace, Keyword.Declaration), 

751 'instruction-args'), 

752 (directives, Keyword, 'instruction-args'), 

753 (declkw, Keyword.Declaration, 'instruction-args'), 

754 (identifier, Name.Function, 'instruction-args'), 

755 (r'[\r\n]+', Whitespace) 

756 ], 

757 'instruction-args': [ 

758 (string, String), 

759 (hexn, Number.Hex), 

760 (octn, Number.Oct), 

761 (binn, Number.Bin), 

762 (floatn, Number.Float), 

763 (decn, Number.Integer), 

764 include('punctuation'), 

765 (register, Name.Builtin), 

766 (identifier, Name.Variable), 

767 (r'[\r\n]+', Whitespace, '#pop'), 

768 include('whitespace') 

769 ], 

770 'preproc': [ 

771 (r'[^;\n]+', Comment.Preproc), 

772 (r';.*?\n', Comment.Single, '#pop'), 

773 (r'\n', Comment.Preproc, '#pop'), 

774 ], 

775 'whitespace': [ 

776 (r'\n', Whitespace), 

777 (r'[ \t]+', Whitespace), 

778 (r';.*', Comment.Single), 

779 (r'#.*', Comment.Single) 

780 ], 

781 'punctuation': [ 

782 (r'[,{}():\[\]]+', Punctuation), 

783 (r'[&|^<>+*/%~-]+', Operator), 

784 (r'[$]+', Keyword.Constant), 

785 (wordop, Operator.Word), 

786 (type, Keyword.Type) 

787 ], 

788 } 

789 

790 def analyse_text(text): 

791 # Probably TASM 

792 if re.match(r'PROC', text, re.IGNORECASE): 

793 return False 

794 

795 

796class NasmObjdumpLexer(ObjdumpLexer): 

797 """ 

798 For the output of ``objdump -d -M intel``. 

799 

800 .. versionadded:: 2.0 

801 """ 

802 name = 'objdump-nasm' 

803 aliases = ['objdump-nasm'] 

804 filenames = ['*.objdump-intel'] 

805 mimetypes = ['text/x-nasm-objdump'] 

806 

807 tokens = _objdump_lexer_tokens(NasmLexer) 

808 

809 

810class TasmLexer(RegexLexer): 

811 """ 

812 For Tasm (Turbo Assembler) assembly code. 

813 """ 

814 name = 'TASM' 

815 aliases = ['tasm'] 

816 filenames = ['*.asm', '*.ASM', '*.tasm'] 

817 mimetypes = ['text/x-tasm'] 

818 

819 identifier = r'[@a-z$._?][\w$.?#@~]*' 

820 hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)' 

821 octn = r'[0-7]+q' 

822 binn = r'[01]+b' 

823 decn = r'[0-9]+' 

824 floatn = decn + r'\.e?' + decn 

825 string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`" 

826 declkw = r'(?:res|d)[bwdqt]|times' 

827 register = (r'(r[0-9][0-5]?[bwd]|' 

828 r'[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|' 

829 r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7])\b') 

830 wordop = r'seg|wrt|strict' 

831 type = r'byte|[dq]?word' 

832 directives = (r'BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|' 

833 r'ORG|ALIGN|STRUC|ENDSTRUC|ENDS|COMMON|CPU|GROUP|UPPERCASE|INCLUDE|' 

834 r'EXPORT|LIBRARY|MODULE|PROC|ENDP|USES|ARG|DATASEG|UDATASEG|END|IDEAL|' 

835 r'P386|MODEL|ASSUME|CODESEG|SIZE') 

836 # T[A-Z][a-z] is more of a convention. Lexer should filter out STRUC definitions 

837 # and then 'add' them to datatype somehow. 

838 datatype = (r'db|dd|dw|T[A-Z][a-z]+') 

839 

840 flags = re.IGNORECASE | re.MULTILINE 

841 tokens = { 

842 'root': [ 

843 (r'^\s*%', Comment.Preproc, 'preproc'), 

844 include('whitespace'), 

845 (identifier + ':', Name.Label), 

846 (directives, Keyword, 'instruction-args'), 

847 (r'(%s)(\s+)(%s)' % (identifier, datatype), 

848 bygroups(Name.Constant, Whitespace, Keyword.Declaration), 

849 'instruction-args'), 

850 (declkw, Keyword.Declaration, 'instruction-args'), 

851 (identifier, Name.Function, 'instruction-args'), 

852 (r'[\r\n]+', Whitespace) 

853 ], 

854 'instruction-args': [ 

855 (string, String), 

856 (hexn, Number.Hex), 

857 (octn, Number.Oct), 

858 (binn, Number.Bin), 

859 (floatn, Number.Float), 

860 (decn, Number.Integer), 

861 include('punctuation'), 

862 (register, Name.Builtin), 

863 (identifier, Name.Variable), 

864 # Do not match newline when it's preceded by a backslash 

865 (r'(\\)(\s*)(;.*)([\r\n])', 

866 bygroups(Text, Whitespace, Comment.Single, Whitespace)), 

867 (r'[\r\n]+', Whitespace, '#pop'), 

868 include('whitespace') 

869 ], 

870 'preproc': [ 

871 (r'[^;\n]+', Comment.Preproc), 

872 (r';.*?\n', Comment.Single, '#pop'), 

873 (r'\n', Comment.Preproc, '#pop'), 

874 ], 

875 'whitespace': [ 

876 (r'[\n\r]', Whitespace), 

877 (r'(\\)([\n\r])', bygroups(Text, Whitespace)), 

878 (r'[ \t]+', Whitespace), 

879 (r';.*', Comment.Single) 

880 ], 

881 'punctuation': [ 

882 (r'[,():\[\]]+', Punctuation), 

883 (r'[&|^<>+*=/%~-]+', Operator), 

884 (r'[$]+', Keyword.Constant), 

885 (wordop, Operator.Word), 

886 (type, Keyword.Type) 

887 ], 

888 } 

889 

890 def analyse_text(text): 

891 # See above 

892 if re.match(r'PROC', text, re.I): 

893 return True 

894 

895 

896class Ca65Lexer(RegexLexer): 

897 """ 

898 For ca65 assembler sources. 

899 

900 .. versionadded:: 1.6 

901 """ 

902 name = 'ca65 assembler' 

903 aliases = ['ca65'] 

904 filenames = ['*.s'] 

905 

906 flags = re.IGNORECASE 

907 

908 tokens = { 

909 'root': [ 

910 (r';.*', Comment.Single), 

911 (r'\s+', Whitespace), 

912 (r'[a-z_.@$][\w.@$]*:', Name.Label), 

913 (r'((ld|st)[axy]|(in|de)[cxy]|asl|lsr|ro[lr]|adc|sbc|cmp|cp[xy]' 

914 r'|cl[cvdi]|se[cdi]|jmp|jsr|bne|beq|bpl|bmi|bvc|bvs|bcc|bcs' 

915 r'|p[lh][ap]|rt[is]|brk|nop|ta[xy]|t[xy]a|txs|tsx|and|ora|eor' 

916 r'|bit)\b', Keyword), 

917 (r'\.\w+', Keyword.Pseudo), 

918 (r'[-+~*/^&|!<>=]', Operator), 

919 (r'"[^"\n]*.', String), 

920 (r"'[^'\n]*.", String.Char), 

921 (r'\$[0-9a-f]+|[0-9a-f]+h\b', Number.Hex), 

922 (r'\d+', Number.Integer), 

923 (r'%[01]+', Number.Bin), 

924 (r'[#,.:()=\[\]]', Punctuation), 

925 (r'[a-z_.@$][\w.@$]*', Name), 

926 ] 

927 } 

928 

929 def analyse_text(self, text): 

930 # comments in GAS start with "#" 

931 if re.search(r'^\s*;', text, re.MULTILINE): 

932 return 0.9 

933 

934 

935class Dasm16Lexer(RegexLexer): 

936 """ 

937 For DCPU-16 Assembly. 

938 

939 .. versionadded:: 2.4 

940 """ 

941 name = 'DASM16' 

942 url = 'http://0x10c.com/doc/dcpu-16.txt' 

943 aliases = ['dasm16'] 

944 filenames = ['*.dasm16', '*.dasm'] 

945 mimetypes = ['text/x-dasm16'] 

946 

947 INSTRUCTIONS = [ 

948 'SET', 

949 'ADD', 'SUB', 

950 'MUL', 'MLI', 

951 'DIV', 'DVI', 

952 'MOD', 'MDI', 

953 'AND', 'BOR', 'XOR', 

954 'SHR', 'ASR', 'SHL', 

955 'IFB', 'IFC', 'IFE', 'IFN', 'IFG', 'IFA', 'IFL', 'IFU', 

956 'ADX', 'SBX', 

957 'STI', 'STD', 

958 'JSR', 

959 'INT', 'IAG', 'IAS', 'RFI', 'IAQ', 'HWN', 'HWQ', 'HWI', 

960 ] 

961 

962 REGISTERS = [ 

963 'A', 'B', 'C', 

964 'X', 'Y', 'Z', 

965 'I', 'J', 

966 'SP', 'PC', 'EX', 

967 'POP', 'PEEK', 'PUSH' 

968 ] 

969 

970 # Regexes yo 

971 char = r'[a-zA-Z0-9_$@.]' 

972 identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)' 

973 number = r'[+-]?(?:0[xX][a-zA-Z0-9]+|\d+)' 

974 binary_number = r'0b[01_]+' 

975 instruction = r'(?i)(' + '|'.join(INSTRUCTIONS) + ')' 

976 single_char = r"'\\?" + char + "'" 

977 string = r'"(\\"|[^"])*"' 

978 

979 def guess_identifier(lexer, match): 

980 ident = match.group(0) 

981 klass = Name.Variable if ident.upper() in lexer.REGISTERS else Name.Label 

982 yield match.start(), klass, ident 

983 

984 tokens = { 

985 'root': [ 

986 include('whitespace'), 

987 (':' + identifier, Name.Label), 

988 (identifier + ':', Name.Label), 

989 (instruction, Name.Function, 'instruction-args'), 

990 (r'\.' + identifier, Name.Function, 'data-args'), 

991 (r'[\r\n]+', Whitespace) 

992 ], 

993 

994 'numeric' : [ 

995 (binary_number, Number.Integer), 

996 (number, Number.Integer), 

997 (single_char, String), 

998 ], 

999 

1000 'arg' : [ 

1001 (identifier, guess_identifier), 

1002 include('numeric') 

1003 ], 

1004 

1005 'deref' : [ 

1006 (r'\+', Punctuation), 

1007 (r'\]', Punctuation, '#pop'), 

1008 include('arg'), 

1009 include('whitespace') 

1010 ], 

1011 

1012 'instruction-line' : [ 

1013 (r'[\r\n]+', Whitespace, '#pop'), 

1014 (r';.*?$', Comment, '#pop'), 

1015 include('whitespace') 

1016 ], 

1017 

1018 'instruction-args': [ 

1019 (r',', Punctuation), 

1020 (r'\[', Punctuation, 'deref'), 

1021 include('arg'), 

1022 include('instruction-line') 

1023 ], 

1024 

1025 'data-args' : [ 

1026 (r',', Punctuation), 

1027 include('numeric'), 

1028 (string, String), 

1029 include('instruction-line') 

1030 ], 

1031 

1032 'whitespace': [ 

1033 (r'\n', Whitespace), 

1034 (r'\s+', Whitespace), 

1035 (r';.*?\n', Comment) 

1036 ], 

1037 }