Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/asm.py: 94%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

199 statements  

1""" 

2 pygments.lexers.asm 

3 ~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for assembly languages. 

6 

7 :copyright: Copyright 2006-present by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12 

13from pygments.lexer import RegexLexer, include, bygroups, using, words, \ 

14 DelegatingLexer, default 

15from pygments.lexers.c_cpp import CppLexer, CLexer 

16from pygments.lexers.d import DLexer 

17from pygments.token import Text, Name, Number, String, Comment, Punctuation, \ 

18 Other, Keyword, Operator, Whitespace 

19 

20__all__ = ['GasLexer', 'ObjdumpLexer', 'DObjdumpLexer', 'CppObjdumpLexer', 

21 'CObjdumpLexer', 'HsailLexer', 'LlvmLexer', 'LlvmMirBodyLexer', 

22 'LlvmMirLexer', 'NasmLexer', 'NasmObjdumpLexer', 'TasmLexer', 

23 'Ca65Lexer', 'Dasm16Lexer'] 

24 

25 

26class GasLexer(RegexLexer): 

27 """ 

28 For Gas (AT&T) assembly code. 

29 """ 

30 name = 'GAS' 

31 aliases = ['gas', 'asm'] 

32 filenames = ['*.s', '*.S'] 

33 mimetypes = ['text/x-gas'] 

34 url = 'https://www.gnu.org/software/binutils' 

35 version_added = '' 

36 

37 #: optional Comment or Whitespace 

38 string = r'"(\\"|[^"])*"' 

39 char = r'[\w$.@-]' 

40 identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)' 

41 number = r'(?:0[xX][a-fA-F0-9]+|#?-?\d+)' 

42 register = '%' + identifier + r'\b' 

43 

44 tokens = { 

45 'root': [ 

46 include('whitespace'), 

47 (identifier + ':', Name.Label), 

48 (r'\.' + identifier, Name.Attribute, 'directive-args'), 

49 (r'lock|rep(n?z)?|data\d+', Name.Attribute), 

50 (identifier, Name.Function, 'instruction-args'), 

51 (r'[\r\n]+', Text) 

52 ], 

53 'directive-args': [ 

54 (identifier, Name.Constant), 

55 (string, String), 

56 ('@' + identifier, Name.Attribute), 

57 (number, Number.Integer), 

58 (register, Name.Variable), 

59 (r'[\r\n]+', Whitespace, '#pop'), 

60 (r'([;#]|//).*?\n', Comment.Single, '#pop'), 

61 (r'/[*].*?[*]/', Comment.Multiline), 

62 (r'/[*].*?\n[\w\W]*?[*]/', Comment.Multiline, '#pop'), 

63 

64 include('punctuation'), 

65 include('whitespace') 

66 ], 

67 'instruction-args': [ 

68 # For objdump-disassembled code, shouldn't occur in 

69 # actual assembler input 

70 ('([a-z0-9]+)( )(<)('+identifier+')(>)', 

71 bygroups(Number.Hex, Text, Punctuation, Name.Constant, 

72 Punctuation)), 

73 ('([a-z0-9]+)( )(<)('+identifier+')([-+])('+number+')(>)', 

74 bygroups(Number.Hex, Text, Punctuation, Name.Constant, 

75 Punctuation, Number.Integer, Punctuation)), 

76 

77 # Address constants 

78 (identifier, Name.Constant), 

79 (number, Number.Integer), 

80 # Registers 

81 (register, Name.Variable), 

82 # Numeric constants 

83 ('$'+number, Number.Integer), 

84 (r"$'(.|\\')'", String.Char), 

85 (r'[\r\n]+', Whitespace, '#pop'), 

86 (r'([;#]|//).*?\n', Comment.Single, '#pop'), 

87 (r'/[*].*?[*]/', Comment.Multiline), 

88 (r'/[*].*?\n[\w\W]*?[*]/', Comment.Multiline, '#pop'), 

89 

90 include('punctuation'), 

91 include('whitespace') 

92 ], 

93 'whitespace': [ 

94 (r'\n', Whitespace), 

95 (r'\s+', Whitespace), 

96 (r'([;#]|//).*?\n', Comment.Single), 

97 (r'/[*][\w\W]*?[*]/', Comment.Multiline) 

98 ], 

99 'punctuation': [ 

100 (r'[-*,.()\[\]!:{}]+', Punctuation) 

101 ] 

102 } 

103 

104 def analyse_text(text): 

105 if re.search(r'^\.(text|data|section)', text, re.M): 

106 return True 

107 elif re.search(r'^\.\w+', text, re.M): 

108 return 0.1 

109 

110 

111def _objdump_lexer_tokens(asm_lexer): 

112 """ 

113 Common objdump lexer tokens to wrap an ASM lexer. 

114 """ 

115 hex_re = r'[0-9A-Za-z]' 

116 return { 

117 'root': [ 

118 # File name & format: 

119 ('(.*?)(:)( +file format )(.*?)$', 

120 bygroups(Name.Label, Punctuation, Text, String)), 

121 # Section header 

122 ('(Disassembly of section )(.*?)(:)$', 

123 bygroups(Text, Name.Label, Punctuation)), 

124 # Function labels 

125 # (With offset) 

126 ('('+hex_re+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$', 

127 bygroups(Number.Hex, Whitespace, Punctuation, Name.Function, 

128 Punctuation, Number.Hex, Punctuation)), 

129 # (Without offset) 

130 ('('+hex_re+'+)( )(<)(.*?)(>:)$', 

131 bygroups(Number.Hex, Whitespace, Punctuation, Name.Function, 

132 Punctuation)), 

133 # Code line with disassembled instructions 

134 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *\t)([a-zA-Z].*?)$', 

135 bygroups(Whitespace, Name.Label, Whitespace, Number.Hex, Whitespace, 

136 using(asm_lexer))), 

137 # Code line without raw instructions (objdump --no-show-raw-insn) 

138 ('( *)('+hex_re+r'+:)( *\t)([a-zA-Z].*?)$', 

139 bygroups(Whitespace, Name.Label, Whitespace, 

140 using(asm_lexer))), 

141 # Code line with ascii 

142 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *)(.*?)$', 

143 bygroups(Whitespace, Name.Label, Whitespace, Number.Hex, Whitespace, String)), 

144 # Continued code line, only raw opcodes without disassembled 

145 # instruction 

146 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)$', 

147 bygroups(Whitespace, Name.Label, Whitespace, Number.Hex)), 

148 # Skipped a few bytes 

149 (r'\t\.\.\.$', Text), 

150 # Relocation line 

151 # (With offset) 

152 (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x'+hex_re+'+)$', 

153 bygroups(Whitespace, Name.Label, Whitespace, Name.Property, Whitespace, 

154 Name.Constant, Punctuation, Number.Hex)), 

155 # (Without offset) 

156 (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)$', 

157 bygroups(Whitespace, Name.Label, Whitespace, Name.Property, Whitespace, 

158 Name.Constant)), 

159 (r'[^\n]+\n', Other) 

160 ] 

161 } 

162 

163 

164class ObjdumpLexer(RegexLexer): 

165 """ 

166 For the output of ``objdump -dr``. 

167 """ 

168 name = 'objdump' 

169 aliases = ['objdump'] 

170 filenames = ['*.objdump'] 

171 mimetypes = ['text/x-objdump'] 

172 url = 'https://www.gnu.org/software/binutils' 

173 version_added = '' 

174 

175 tokens = _objdump_lexer_tokens(GasLexer) 

176 

177 

178class DObjdumpLexer(DelegatingLexer): 

179 """ 

180 For the output of ``objdump -Sr`` on compiled D files. 

181 """ 

182 name = 'd-objdump' 

183 aliases = ['d-objdump'] 

184 filenames = ['*.d-objdump'] 

185 mimetypes = ['text/x-d-objdump'] 

186 url = 'https://www.gnu.org/software/binutils' 

187 version_added = '' 

188 

189 def __init__(self, **options): 

190 super().__init__(DLexer, ObjdumpLexer, **options) 

191 

192 

193class CppObjdumpLexer(DelegatingLexer): 

194 """ 

195 For the output of ``objdump -Sr`` on compiled C++ files. 

196 """ 

197 name = 'cpp-objdump' 

198 aliases = ['cpp-objdump', 'c++-objdumb', 'cxx-objdump'] 

199 filenames = ['*.cpp-objdump', '*.c++-objdump', '*.cxx-objdump'] 

200 mimetypes = ['text/x-cpp-objdump'] 

201 url = 'https://www.gnu.org/software/binutils' 

202 version_added = '' 

203 

204 def __init__(self, **options): 

205 super().__init__(CppLexer, ObjdumpLexer, **options) 

206 

207 

208class CObjdumpLexer(DelegatingLexer): 

209 """ 

210 For the output of ``objdump -Sr`` on compiled C files. 

211 """ 

212 name = 'c-objdump' 

213 aliases = ['c-objdump'] 

214 filenames = ['*.c-objdump'] 

215 mimetypes = ['text/x-c-objdump'] 

216 url = 'https://www.gnu.org/software/binutils' 

217 version_added = '' 

218 

219 

220 def __init__(self, **options): 

221 super().__init__(CLexer, ObjdumpLexer, **options) 

222 

223 

224class HsailLexer(RegexLexer): 

225 """ 

226 For HSAIL assembly code. 

227 """ 

228 name = 'HSAIL' 

229 aliases = ['hsail', 'hsa'] 

230 filenames = ['*.hsail'] 

231 mimetypes = ['text/x-hsail'] 

232 url = 'https://en.wikipedia.org/wiki/Heterogeneous_System_Architecture#HSA_Intermediate_Layer' 

233 version_added = '2.2' 

234 

235 string = r'"[^"]*?"' 

236 identifier = r'[a-zA-Z_][\w.]*' 

237 # Registers 

238 register_number = r'[0-9]+' 

239 register = r'(\$(c|s|d|q)' + register_number + r')\b' 

240 # Qualifiers 

241 alignQual = r'(align\(\d+\))' 

242 widthQual = r'(width\((\d+|all)\))' 

243 allocQual = r'(alloc\(agent\))' 

244 # Instruction Modifiers 

245 roundingMod = (r'((_ftz)?(_up|_down|_zero|_near))') 

246 datatypeMod = (r'_(' 

247 # packedTypes 

248 r'u8x4|s8x4|u16x2|s16x2|u8x8|s8x8|u16x4|s16x4|u32x2|s32x2|' 

249 r'u8x16|s8x16|u16x8|s16x8|u32x4|s32x4|u64x2|s64x2|' 

250 r'f16x2|f16x4|f16x8|f32x2|f32x4|f64x2|' 

251 # baseTypes 

252 r'u8|s8|u16|s16|u32|s32|u64|s64|' 

253 r'b128|b8|b16|b32|b64|b1|' 

254 r'f16|f32|f64|' 

255 # opaqueType 

256 r'roimg|woimg|rwimg|samp|sig32|sig64)') 

257 

258 # Numeric Constant 

259 float = r'((\d+\.)|(\d*\.\d+))[eE][+-]?\d+' 

260 hexfloat = r'0[xX](([0-9a-fA-F]+\.[0-9a-fA-F]*)|([0-9a-fA-F]*\.[0-9a-fA-F]+))[pP][+-]?\d+' 

261 ieeefloat = r'0((h|H)[0-9a-fA-F]{4}|(f|F)[0-9a-fA-F]{8}|(d|D)[0-9a-fA-F]{16})' 

262 

263 tokens = { 

264 'root': [ 

265 include('whitespace'), 

266 include('comments'), 

267 

268 (string, String), 

269 

270 (r'@' + identifier + ':?', Name.Label), 

271 

272 (register, Name.Variable.Anonymous), 

273 

274 include('keyword'), 

275 

276 (r'&' + identifier, Name.Variable.Global), 

277 (r'%' + identifier, Name.Variable), 

278 

279 (hexfloat, Number.Hex), 

280 (r'0[xX][a-fA-F0-9]+', Number.Hex), 

281 (ieeefloat, Number.Float), 

282 (float, Number.Float), 

283 (r'\d+', Number.Integer), 

284 

285 (r'[=<>{}\[\]()*.,:;!]|x\b', Punctuation) 

286 ], 

287 'whitespace': [ 

288 (r'(\n|\s)+', Whitespace), 

289 ], 

290 'comments': [ 

291 (r'/\*.*?\*/', Comment.Multiline), 

292 (r'//.*?\n', Comment.Single), 

293 ], 

294 'keyword': [ 

295 # Types 

296 (r'kernarg' + datatypeMod, Keyword.Type), 

297 

298 # Regular keywords 

299 (r'\$(full|base|small|large|default|zero|near)', Keyword), 

300 (words(( 

301 'module', 'extension', 'pragma', 'prog', 'indirect', 'signature', 

302 'decl', 'kernel', 'function', 'enablebreakexceptions', 

303 'enabledetectexceptions', 'maxdynamicgroupsize', 'maxflatgridsize', 

304 'maxflatworkgroupsize', 'requireddim', 'requiredgridsize', 

305 'requiredworkgroupsize', 'requirenopartialworkgroups'), 

306 suffix=r'\b'), Keyword), 

307 

308 # instructions 

309 (roundingMod, Keyword), 

310 (datatypeMod, Keyword), 

311 (r'_(' + alignQual + '|' + widthQual + ')', Keyword), 

312 (r'_kernarg', Keyword), 

313 (r'(nop|imagefence)\b', Keyword), 

314 (words(( 

315 'cleardetectexcept', 'clock', 'cuid', 'debugtrap', 'dim', 

316 'getdetectexcept', 'groupbaseptr', 'kernargbaseptr', 'laneid', 

317 'maxcuid', 'maxwaveid', 'packetid', 'setdetectexcept', 'waveid', 

318 'workitemflatabsid', 'workitemflatid', 'nullptr', 'abs', 'bitrev', 

319 'currentworkgroupsize', 'currentworkitemflatid', 'fract', 'ncos', 

320 'neg', 'nexp2', 'nlog2', 'nrcp', 'nrsqrt', 'nsin', 'nsqrt', 

321 'gridgroups', 'gridsize', 'not', 'sqrt', 'workgroupid', 

322 'workgroupsize', 'workitemabsid', 'workitemid', 'ceil', 'floor', 

323 'rint', 'trunc', 'add', 'bitmask', 'borrow', 'carry', 'copysign', 

324 'div', 'rem', 'sub', 'shl', 'shr', 'and', 'or', 'xor', 'unpackhi', 

325 'unpacklo', 'max', 'min', 'fma', 'mad', 'bitextract', 'bitselect', 

326 'shuffle', 'cmov', 'bitalign', 'bytealign', 'lerp', 'nfma', 'mul', 

327 'mulhi', 'mul24hi', 'mul24', 'mad24', 'mad24hi', 'bitinsert', 

328 'combine', 'expand', 'lda', 'mov', 'pack', 'unpack', 'packcvt', 

329 'unpackcvt', 'sad', 'sementp', 'ftos', 'stof', 'cmp', 'ld', 'st', 

330 '_eq', '_ne', '_lt', '_le', '_gt', '_ge', '_equ', '_neu', '_ltu', 

331 '_leu', '_gtu', '_geu', '_num', '_nan', '_seq', '_sne', '_slt', 

332 '_sle', '_sgt', '_sge', '_snum', '_snan', '_sequ', '_sneu', '_sltu', 

333 '_sleu', '_sgtu', '_sgeu', 'atomic', '_ld', '_st', '_cas', '_add', 

334 '_and', '_exch', '_max', '_min', '_or', '_sub', '_wrapdec', 

335 '_wrapinc', '_xor', 'ret', 'cvt', '_readonly', '_kernarg', '_global', 

336 'br', 'cbr', 'sbr', '_scacq', '_screl', '_scar', '_rlx', '_wave', 

337 '_wg', '_agent', '_system', 'ldimage', 'stimage', '_v2', '_v3', '_v4', 

338 '_1d', '_2d', '_3d', '_1da', '_2da', '_1db', '_2ddepth', '_2dadepth', 

339 '_width', '_height', '_depth', '_array', '_channelorder', 

340 '_channeltype', 'querysampler', '_coord', '_filter', '_addressing', 

341 'barrier', 'wavebarrier', 'initfbar', 'joinfbar', 'waitfbar', 

342 'arrivefbar', 'leavefbar', 'releasefbar', 'ldf', 'activelaneid', 

343 'activelanecount', 'activelanemask', 'activelanepermute', 'call', 

344 'scall', 'icall', 'alloca', 'packetcompletionsig', 

345 'addqueuewriteindex', 'casqueuewriteindex', 'ldqueuereadindex', 

346 'stqueuereadindex', 'readonly', 'global', 'private', 'group', 

347 'spill', 'arg', '_upi', '_downi', '_zeroi', '_neari', '_upi_sat', 

348 '_downi_sat', '_zeroi_sat', '_neari_sat', '_supi', '_sdowni', 

349 '_szeroi', '_sneari', '_supi_sat', '_sdowni_sat', '_szeroi_sat', 

350 '_sneari_sat', '_pp', '_ps', '_sp', '_ss', '_s', '_p', '_pp_sat', 

351 '_ps_sat', '_sp_sat', '_ss_sat', '_s_sat', '_p_sat')), Keyword), 

352 

353 # Integer types 

354 (r'i[1-9]\d*', Keyword) 

355 ] 

356 } 

357 

358 

359class LlvmLexer(RegexLexer): 

360 """ 

361 For LLVM assembly code. 

362 """ 

363 name = 'LLVM' 

364 url = 'https://llvm.org/docs/LangRef.html' 

365 aliases = ['llvm'] 

366 filenames = ['*.ll'] 

367 mimetypes = ['text/x-llvm'] 

368 version_added = '' 

369 

370 #: optional Comment or Whitespace 

371 string = r'"[^"]*?"' 

372 identifier = r'([-a-zA-Z$._][\w\-$.]*|' + string + ')' 

373 block_label = r'(' + identifier + r'|(\d+))' 

374 

375 tokens = { 

376 'root': [ 

377 include('whitespace'), 

378 

379 # Before keywords, because keywords are valid label names :(... 

380 (block_label + r'\s*:', Name.Label), 

381 

382 include('keyword'), 

383 

384 (r'%' + identifier, Name.Variable), 

385 (r'@' + identifier, Name.Variable.Global), 

386 (r'%\d+', Name.Variable.Anonymous), 

387 (r'@\d+', Name.Variable.Global), 

388 (r'#\d+', Name.Variable.Global), 

389 (r'!' + identifier, Name.Variable), 

390 (r'!\d+', Name.Variable.Anonymous), 

391 (r'c?' + string, String), 

392 

393 (r'0[xX][KLMHR]?[a-fA-F0-9]+', Number), 

394 (r'-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?', Number), 

395 

396 (r'[=<>{}\[\]()*.,!]|x\b', Punctuation) 

397 ], 

398 'whitespace': [ 

399 (r'(\n|\s+)+', Whitespace), 

400 (r';.*?\n', Comment), 

401 (r'/\*', Comment, 'c-comment'), 

402 ], 

403 'c-comment': [ 

404 (r'[^*]+', Comment), 

405 (r'\*/', Comment, '#pop'), 

406 # Consume lone asterisks as non-comment-ending content. 

407 (r'\*', Comment), 

408 ], 

409 'keyword': [ 

410 # Regular keywords 

411 (words(( 

412 'aarch64_sve_vector_pcs', 'aarch64_vector_pcs', 'acq_rel', 

413 'acquire', 'add', 'addrspace', 'addrspacecast', 'afn', 'alias', 

414 'aliasee', 'align', 'alignLog2', 'alignstack', 'alloca', 

415 'allocsize', 'allOnes', 'alwaysinline', 'alwaysInline', 

416 'amdgpu_cs', 'amdgpu_es', 'amdgpu_gfx', 'amdgpu_gs', 

417 'amdgpu_hs', 'amdgpu_kernel', 'amdgpu_ls', 'amdgpu_ps', 

418 'amdgpu_vs', 'and', 'any', 'anyregcc', 'appending', 'arcp', 

419 'argmemonly', 'args', 'arm_aapcs_vfpcc', 'arm_aapcscc', 

420 'arm_apcscc', 'ashr', 'asm', 'atomic', 'atomicrmw', 

421 'attributes', 'available_externally', 'avr_intrcc', 

422 'avr_signalcc', 'bit', 'bitcast', 'bitMask', 'blockaddress', 

423 'blockcount', 'br', 'branchFunnel', 'builtin', 'byArg', 

424 'byref', 'byte', 'byteArray', 'byval', 'c', 'call', 'callbr', 

425 'callee', 'caller', 'calls', 'canAutoHide', 'catch', 

426 'catchpad', 'catchret', 'catchswitch', 'cc', 'ccc', 

427 'cfguard_checkcc', 'cleanup', 'cleanuppad', 'cleanupret', 

428 'cmpxchg', 'cold', 'coldcc', 'comdat', 'common', 'constant', 

429 'contract', 'convergent', 'critical', 'cxx_fast_tlscc', 

430 'datalayout', 'declare', 'default', 'define', 'deplibs', 

431 'dereferenceable', 'dereferenceable_or_null', 'distinct', 

432 'dllexport', 'dllimport', 'dso_local', 'dso_local_equivalent', 

433 'dso_preemptable', 'dsoLocal', 'eq', 'exact', 'exactmatch', 

434 'extern_weak', 'external', 'externally_initialized', 

435 'extractelement', 'extractvalue', 'fadd', 'false', 'fast', 

436 'fastcc', 'fcmp', 'fdiv', 'fence', 'filter', 'flags', 'fmul', 

437 'fneg', 'fpext', 'fptosi', 'fptoui', 'fptrunc', 'freeze', 

438 'frem', 'from', 'fsub', 'funcFlags', 'function', 'gc', 

439 'getelementptr', 'ghccc', 'global', 'guid', 'gv', 'hash', 

440 'hhvm_ccc', 'hhvmcc', 'hidden', 'hot', 'hotness', 'icmp', 

441 'ifunc', 'inaccessiblemem_or_argmemonly', 

442 'inaccessiblememonly', 'inalloca', 'inbounds', 'indir', 

443 'indirectbr', 'info', 'initialexec', 'inline', 'inlineBits', 

444 'inlinehint', 'inrange', 'inreg', 'insertelement', 

445 'insertvalue', 'insts', 'intel_ocl_bicc', 'inteldialect', 

446 'internal', 'inttoptr', 'invoke', 'jumptable', 'kind', 

447 'landingpad', 'largest', 'linkage', 'linkonce', 'linkonce_odr', 

448 'live', 'load', 'local_unnamed_addr', 'localdynamic', 

449 'localexec', 'lshr', 'max', 'metadata', 'min', 'minsize', 

450 'module', 'monotonic', 'msp430_intrcc', 'mul', 'mustprogress', 

451 'musttail', 'naked', 'name', 'nand', 'ne', 'nest', 'ninf', 

452 'nnan', 'noalias', 'nobuiltin', 'nocallback', 'nocapture', 

453 'nocf_check', 'noduplicate', 'noduplicates', 'nofree', 

454 'noimplicitfloat', 'noinline', 'noInline', 'nomerge', 'none', 

455 'nonlazybind', 'nonnull', 'noprofile', 'norecurse', 

456 'noRecurse', 'noredzone', 'noreturn', 'nosync', 'notail', 

457 'notEligibleToImport', 'noundef', 'nounwind', 'nsw', 

458 'nsz', 'null', 'null_pointer_is_valid', 'nuw', 'oeq', 'offset', 

459 'oge', 'ogt', 'ole', 'olt', 'one', 'opaque', 'optforfuzzing', 

460 'optnone', 'optsize', 'or', 'ord', 'param', 'params', 

461 'partition', 'path', 'personality', 'phi', 'poison', 

462 'preallocated', 'prefix', 'preserve_allcc', 'preserve_mostcc', 

463 'private', 'prologue', 'protected', 'ptrtoint', 'ptx_device', 

464 'ptx_kernel', 'readnone', 'readNone', 'readonly', 'readOnly', 

465 'reassoc', 'refs', 'relbf', 'release', 'resByArg', 'resume', 

466 'ret', 'returnDoesNotAlias', 'returned', 'returns_twice', 

467 'safestack', 'samesize', 'sanitize_address', 

468 'sanitize_hwaddress', 'sanitize_memory', 'sanitize_memtag', 

469 'sanitize_thread', 'sdiv', 'section', 'select', 'seq_cst', 

470 'sext', 'sge', 'sgt', 'shadowcallstack', 'shl', 

471 'shufflevector', 'sideeffect', 'signext', 'single', 

472 'singleImpl', 'singleImplName', 'sitofp', 'sizeM1', 

473 'sizeM1BitWidth', 'sle', 'slt', 'source_filename', 

474 'speculatable', 'speculative_load_hardening', 'spir_func', 

475 'spir_kernel', 'splat', 'srem', 'sret', 'ssp', 'sspreq', 

476 'sspstrong', 'store', 'strictfp', 'sub', 'summaries', 

477 'summary', 'swiftcc', 'swifterror', 'swiftself', 'switch', 

478 'syncscope', 'tail', 'tailcc', 'target', 'thread_local', 'to', 

479 'token', 'triple', 'true', 'trunc', 'type', 

480 'typeCheckedLoadConstVCalls', 'typeCheckedLoadVCalls', 

481 'typeid', 'typeidCompatibleVTable', 'typeIdInfo', 

482 'typeTestAssumeConstVCalls', 'typeTestAssumeVCalls', 

483 'typeTestRes', 'typeTests', 'udiv', 'ueq', 'uge', 'ugt', 

484 'uitofp', 'ule', 'ult', 'umax', 'umin', 'undef', 'une', 

485 'uniformRetVal', 'uniqueRetVal', 'unknown', 'unnamed_addr', 

486 'uno', 'unordered', 'unreachable', 'unsat', 'unwind', 'urem', 

487 'uselistorder', 'uselistorder_bb', 'uwtable', 'va_arg', 

488 'varFlags', 'variable', 'vcall_visibility', 'vFuncId', 

489 'virtFunc', 'virtualConstProp', 'void', 'volatile', 'vscale', 

490 'vTableFuncs', 'weak', 'weak_odr', 'webkit_jscc', 'win64cc', 

491 'within', 'wpdRes', 'wpdResolutions', 'writeonly', 'x', 

492 'x86_64_sysvcc', 'x86_fastcallcc', 'x86_intrcc', 'x86_mmx', 

493 'x86_regcallcc', 'x86_stdcallcc', 'x86_thiscallcc', 

494 'x86_vectorcallcc', 'xchg', 'xor', 'zeroext', 

495 'zeroinitializer', 'zext', 'immarg', 'willreturn'), 

496 suffix=r'\b'), Keyword), 

497 

498 # Types 

499 (words(('void', 'half', 'bfloat', 'float', 'double', 'fp128', 

500 'x86_fp80', 'ppc_fp128', 'label', 'metadata', 'x86_mmx', 

501 'x86_amx', 'token', 'ptr')), 

502 Keyword.Type), 

503 

504 # Integer types 

505 (r'i[1-9]\d*', Keyword.Type) 

506 ] 

507 } 

508 

509 

510class LlvmMirBodyLexer(RegexLexer): 

511 """ 

512 For LLVM MIR examples without the YAML wrapper. 

513 """ 

514 name = 'LLVM-MIR Body' 

515 url = 'https://llvm.org/docs/MIRLangRef.html' 

516 aliases = ['llvm-mir-body'] 

517 filenames = [] 

518 mimetypes = [] 

519 version_added = '2.6' 

520 

521 tokens = { 

522 'root': [ 

523 # Attributes on basic blocks 

524 (words(('liveins', 'successors'), suffix=':'), Keyword), 

525 # Basic Block Labels 

526 (r'bb\.[0-9]+(\.[a-zA-Z0-9_.-]+)?( \(address-taken\))?:', Name.Label), 

527 (r'bb\.[0-9]+ \(%[a-zA-Z0-9_.-]+\)( \(address-taken\))?:', Name.Label), 

528 (r'%bb\.[0-9]+(\.\w+)?', Name.Label), 

529 # Stack references 

530 (r'%stack\.[0-9]+(\.\w+\.addr)?', Name), 

531 # Subreg indices 

532 (r'%subreg\.\w+', Name), 

533 # Virtual registers 

534 (r'%[a-zA-Z0-9_]+ *', Name.Variable, 'vreg'), 

535 # Reference to LLVM-IR global 

536 include('global'), 

537 # Reference to Intrinsic 

538 (r'intrinsic\(\@[a-zA-Z0-9_.]+\)', Name.Variable.Global), 

539 # Comparison predicates 

540 (words(('eq', 'ne', 'sgt', 'sge', 'slt', 'sle', 'ugt', 'uge', 'ult', 

541 'ule'), prefix=r'intpred\(', suffix=r'\)'), Name.Builtin), 

542 (words(('oeq', 'one', 'ogt', 'oge', 'olt', 'ole', 'ugt', 'uge', 

543 'ult', 'ule'), prefix=r'floatpred\(', suffix=r'\)'), 

544 Name.Builtin), 

545 # Physical registers 

546 (r'\$\w+', String.Single), 

547 # Assignment operator 

548 (r'=', Operator), 

549 # gMIR Opcodes 

550 (r'(G_ANYEXT|G_[SZ]EXT|G_SEXT_INREG|G_TRUNC|G_IMPLICIT_DEF|G_PHI|' 

551 r'G_FRAME_INDEX|G_GLOBAL_VALUE|G_INTTOPTR|G_PTRTOINT|G_BITCAST|' 

552 r'G_CONSTANT|G_FCONSTANT|G_VASTART|G_VAARG|G_CTLZ|G_CTLZ_ZERO_UNDEF|' 

553 r'G_CTTZ|G_CTTZ_ZERO_UNDEF|G_CTPOP|G_BSWAP|G_BITREVERSE|' 

554 r'G_ADDRSPACE_CAST|G_BLOCK_ADDR|G_JUMP_TABLE|G_DYN_STACKALLOC|' 

555 r'G_ADD|G_SUB|G_MUL|G_[SU]DIV|G_[SU]REM|G_AND|G_OR|G_XOR|G_SHL|' 

556 r'G_[LA]SHR|G_[IF]CMP|G_SELECT|G_GEP|G_PTR_MASK|G_SMIN|G_SMAX|' 

557 r'G_UMIN|G_UMAX|G_[US]ADDO|G_[US]ADDE|G_[US]SUBO|G_[US]SUBE|' 

558 r'G_[US]MULO|G_[US]MULH|G_FNEG|G_FPEXT|G_FPTRUNC|G_FPTO[US]I|' 

559 r'G_[US]ITOFP|G_FABS|G_FCOPYSIGN|G_FCANONICALIZE|G_FMINNUM|' 

560 r'G_FMAXNUM|G_FMINNUM_IEEE|G_FMAXNUM_IEEE|G_FMINIMUM|G_FMAXIMUM|' 

561 r'G_FADD|G_FSUB|G_FMUL|G_FMA|G_FMAD|G_FDIV|G_FREM|G_FPOW|G_FEXP|' 

562 r'G_FEXP2|G_FLOG|G_FLOG2|G_FLOG10|G_FCEIL|G_FCOS|G_FSIN|G_FSQRT|' 

563 r'G_FFLOOR|G_FRINT|G_FNEARBYINT|G_INTRINSIC_TRUNC|' 

564 r'G_INTRINSIC_ROUND|G_LOAD|G_[ZS]EXTLOAD|G_INDEXED_LOAD|' 

565 r'G_INDEXED_[ZS]EXTLOAD|G_STORE|G_INDEXED_STORE|' 

566 r'G_ATOMIC_CMPXCHG_WITH_SUCCESS|G_ATOMIC_CMPXCHG|' 

567 r'G_ATOMICRMW_(XCHG|ADD|SUB|AND|NAND|OR|XOR|MAX|MIN|UMAX|UMIN|FADD|' 

568 r'FSUB)' 

569 r'|G_FENCE|G_EXTRACT|G_UNMERGE_VALUES|G_INSERT|G_MERGE_VALUES|' 

570 r'G_BUILD_VECTOR|G_BUILD_VECTOR_TRUNC|G_CONCAT_VECTORS|' 

571 r'G_INTRINSIC|G_INTRINSIC_W_SIDE_EFFECTS|G_BR|G_BRCOND|' 

572 r'G_BRINDIRECT|G_BRJT|G_INSERT_VECTOR_ELT|G_EXTRACT_VECTOR_ELT|' 

573 r'G_SHUFFLE_VECTOR)\b', 

574 Name.Builtin), 

575 # Target independent opcodes 

576 (r'(COPY|PHI|INSERT_SUBREG|EXTRACT_SUBREG|REG_SEQUENCE)\b', 

577 Name.Builtin), 

578 # Flags 

579 (words(('killed', 'implicit')), Keyword), 

580 # ConstantInt values 

581 (r'(i[0-9]+)( +)', bygroups(Keyword.Type, Whitespace), 'constantint'), 

582 # ConstantFloat values 

583 (r'(half|float|double) +', Keyword.Type, 'constantfloat'), 

584 # Bare immediates 

585 include('integer'), 

586 # MMO's 

587 (r'(::)( *)', bygroups(Operator, Whitespace), 'mmo'), 

588 # MIR Comments 

589 (r';.*', Comment), 

590 # If we get here, assume it's a target instruction 

591 (r'[a-zA-Z0-9_]+', Name), 

592 # Everything else that isn't highlighted 

593 (r'[(), \n]+', Text), 

594 ], 

595 # The integer constant from a ConstantInt value 

596 'constantint': [ 

597 include('integer'), 

598 (r'(?=.)', Text, '#pop'), 

599 ], 

600 # The floating point constant from a ConstantFloat value 

601 'constantfloat': [ 

602 include('float'), 

603 (r'(?=.)', Text, '#pop'), 

604 ], 

605 'vreg': [ 

606 # The bank or class if there is one 

607 (r'( *)(:(?!:))', bygroups(Whitespace, Keyword), ('#pop', 'vreg_bank_or_class')), 

608 # The LLT if there is one 

609 (r'( *)(\()', bygroups(Whitespace, Text), 'vreg_type'), 

610 (r'(?=.)', Text, '#pop'), 

611 ], 

612 'vreg_bank_or_class': [ 

613 # The unassigned bank/class 

614 (r'( *)(_)', bygroups(Whitespace, Name.Variable.Magic)), 

615 (r'( *)([a-zA-Z0-9_]+)', bygroups(Whitespace, Name.Variable)), 

616 # The LLT if there is one 

617 (r'( *)(\()', bygroups(Whitespace, Text), 'vreg_type'), 

618 (r'(?=.)', Text, '#pop'), 

619 ], 

620 'vreg_type': [ 

621 # Scalar and pointer types 

622 (r'( *)([sp][0-9]+)', bygroups(Whitespace, Keyword.Type)), 

623 (r'( *)(<[0-9]+ *x *[sp][0-9]+>)', bygroups(Whitespace, Keyword.Type)), 

624 (r'\)', Text, '#pop'), 

625 (r'(?=.)', Text, '#pop'), 

626 ], 

627 'mmo': [ 

628 (r'\(', Text), 

629 (r' +', Whitespace), 

630 (words(('load', 'store', 'on', 'into', 'from', 'align', 'monotonic', 

631 'acquire', 'release', 'acq_rel', 'seq_cst')), 

632 Keyword), 

633 # IR references 

634 (r'%ir\.[a-zA-Z0-9_.-]+', Name), 

635 (r'%ir-block\.[a-zA-Z0-9_.-]+', Name), 

636 (r'[-+]', Operator), 

637 include('integer'), 

638 include('global'), 

639 (r',', Punctuation), 

640 (r'\), \(', Text), 

641 (r'\)', Text, '#pop'), 

642 ], 

643 'integer': [(r'-?[0-9]+', Number.Integer),], 

644 'float': [(r'-?[0-9]+\.[0-9]+(e[+-][0-9]+)?', Number.Float)], 

645 'global': [(r'\@[a-zA-Z0-9_.]+', Name.Variable.Global)], 

646 } 

647 

648 

649class LlvmMirLexer(RegexLexer): 

650 """ 

651 Lexer for the overall LLVM MIR document format. 

652 

653 MIR is a human readable serialization format that's used to represent LLVM's 

654 machine specific intermediate representation. It allows LLVM's developers to 

655 see the state of the compilation process at various points, as well as test 

656 individual pieces of the compiler. 

657 """ 

658 name = 'LLVM-MIR' 

659 url = 'https://llvm.org/docs/MIRLangRef.html' 

660 aliases = ['llvm-mir'] 

661 filenames = ['*.mir'] 

662 version_added = '2.6' 

663 

664 tokens = { 

665 'root': [ 

666 # Comments are hashes at the YAML level 

667 (r'#.*', Comment), 

668 # Documents starting with | are LLVM-IR 

669 (r'--- \|$', Keyword, 'llvm_ir'), 

670 # Other documents are MIR 

671 (r'---', Keyword, 'llvm_mir'), 

672 # Consume everything else in one token for efficiency 

673 (r'[^-#]+|.', Text), 

674 ], 

675 'llvm_ir': [ 

676 # Documents end with '...' or '---' 

677 (r'(\.\.\.|(?=---))', Keyword, '#pop'), 

678 # Delegate to the LlvmLexer 

679 (r'((?:.|\n)+?)(?=(\.\.\.|---))', bygroups(using(LlvmLexer))), 

680 ], 

681 'llvm_mir': [ 

682 # Comments are hashes at the YAML level 

683 (r'#.*', Comment), 

684 # Documents end with '...' or '---' 

685 (r'(\.\.\.|(?=---))', Keyword, '#pop'), 

686 # Handle the simple attributes 

687 (r'name:', Keyword, 'name'), 

688 (words(('alignment', ), 

689 suffix=':'), Keyword, 'number'), 

690 (words(('legalized', 'regBankSelected', 'tracksRegLiveness', 

691 'selected', 'exposesReturnsTwice'), 

692 suffix=':'), Keyword, 'boolean'), 

693 # Handle the attributes don't highlight inside 

694 (words(('registers', 'stack', 'fixedStack', 'liveins', 'frameInfo', 

695 'machineFunctionInfo'), 

696 suffix=':'), Keyword), 

697 # Delegate the body block to the LlvmMirBodyLexer 

698 (r'body: *\|', Keyword, 'llvm_mir_body'), 

699 # Consume everything else 

700 (r'.+', Text), 

701 (r'\n', Whitespace), 

702 ], 

703 'name': [ 

704 (r'[^\n]+', Name), 

705 default('#pop'), 

706 ], 

707 'boolean': [ 

708 (r' *(true|false)', Name.Builtin), 

709 default('#pop'), 

710 ], 

711 'number': [ 

712 (r' *[0-9]+', Number), 

713 default('#pop'), 

714 ], 

715 'llvm_mir_body': [ 

716 # Documents end with '...' or '---'. 

717 # We have to pop llvm_mir_body and llvm_mir 

718 (r'(\.\.\.|(?=---))', Keyword, '#pop:2'), 

719 # Delegate the body block to the LlvmMirBodyLexer 

720 (r'((?:.|\n)+?)(?=\.\.\.|---)', bygroups(using(LlvmMirBodyLexer))), 

721 # The '...' is optional. If we didn't already find it then it isn't 

722 # there. There might be a '---' instead though. 

723 (r'(?!\.\.\.|---)((?:.|\n)+)', bygroups(using(LlvmMirBodyLexer))), 

724 ], 

725 } 

726 

727 

728class NasmLexer(RegexLexer): 

729 """ 

730 For Nasm (Intel) assembly code. 

731 """ 

732 name = 'NASM' 

733 aliases = ['nasm'] 

734 filenames = ['*.asm', '*.ASM', '*.nasm'] 

735 mimetypes = ['text/x-nasm'] 

736 url = 'https://nasm.us' 

737 version_added = '' 

738 

739 # Tasm uses the same file endings, but TASM is not as common as NASM, so 

740 # we prioritize NASM higher by default 

741 priority = 1.0 

742 

743 identifier = r'[a-z$._?][\w$.?#@~]*' 

744 hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)' 

745 octn = r'[0-7]+q' 

746 binn = r'[01]+b' 

747 decn = r'[0-9]+' 

748 floatn = decn + r'\.e?' + decn 

749 string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`" 

750 declkw = r'(?:res|d)[bwdqt]|times' 

751 register = (r'(r[0-9][0-5]?[bwd]?|' 

752 r'[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|' 

753 r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]|k[0-7]|' 

754 r'[xyz]mm(?:[12][0-9]?|3[01]?|[04-9]))\b') 

755 wordop = r'seg|wrt|strict|rel|abs' 

756 type = r'byte|[dq]?word' 

757 # Directives must be followed by whitespace, otherwise CPU will match 

758 # cpuid for instance. 

759 directives = (r'(?:BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|' 

760 r'ORG|ALIGN|STRUC|ENDSTRUC|COMMON|CPU|GROUP|UPPERCASE|IMPORT|' 

761 r'EXPORT|LIBRARY|MODULE)(?=\s)') 

762 

763 flags = re.IGNORECASE | re.MULTILINE 

764 tokens = { 

765 'root': [ 

766 (r'^\s*%', Comment.Preproc, 'preproc'), 

767 include('whitespace'), 

768 (identifier + ':', Name.Label), 

769 (rf'({identifier})(\s+)(equ)', 

770 bygroups(Name.Constant, Whitespace, Keyword.Declaration), 

771 'instruction-args'), 

772 (directives, Keyword, 'instruction-args'), 

773 (declkw, Keyword.Declaration, 'instruction-args'), 

774 (identifier, Name.Function, 'instruction-args'), 

775 (r'[\r\n]+', Whitespace) 

776 ], 

777 'instruction-args': [ 

778 (string, String), 

779 (hexn, Number.Hex), 

780 (octn, Number.Oct), 

781 (binn, Number.Bin), 

782 (floatn, Number.Float), 

783 (decn, Number.Integer), 

784 include('punctuation'), 

785 (register, Name.Builtin), 

786 (identifier, Name.Variable), 

787 (r'[\r\n]+', Whitespace, '#pop'), 

788 include('whitespace') 

789 ], 

790 'preproc': [ 

791 (r'[^;\n]+', Comment.Preproc), 

792 (r';.*?\n', Comment.Single, '#pop'), 

793 (r'\n', Comment.Preproc, '#pop'), 

794 ], 

795 'whitespace': [ 

796 (r'\n', Whitespace), 

797 (r'[ \t]+', Whitespace), 

798 (r';.*', Comment.Single), 

799 (r'#.*', Comment.Single) 

800 ], 

801 'punctuation': [ 

802 (r'[,{}():\[\]]+', Punctuation), 

803 (r'[&|^<>+*/%~-]+', Operator), 

804 (r'[$]+', Keyword.Constant), 

805 (wordop, Operator.Word), 

806 (type, Keyword.Type) 

807 ], 

808 } 

809 

810 def analyse_text(text): 

811 # Probably TASM 

812 if re.match(r'PROC', text, re.IGNORECASE): 

813 return False 

814 

815 

816class NasmObjdumpLexer(ObjdumpLexer): 

817 """ 

818 For the output of ``objdump -d -M intel``. 

819 """ 

820 name = 'objdump-nasm' 

821 aliases = ['objdump-nasm'] 

822 filenames = ['*.objdump-intel'] 

823 mimetypes = ['text/x-nasm-objdump'] 

824 url = 'https://www.gnu.org/software/binutils' 

825 version_added = '2.0' 

826 

827 tokens = _objdump_lexer_tokens(NasmLexer) 

828 

829 

830class TasmLexer(RegexLexer): 

831 """ 

832 For Tasm (Turbo Assembler) assembly code. 

833 """ 

834 name = 'TASM' 

835 aliases = ['tasm'] 

836 filenames = ['*.asm', '*.ASM', '*.tasm'] 

837 mimetypes = ['text/x-tasm'] 

838 url = 'https://en.wikipedia.org/wiki/Turbo_Assembler' 

839 version_added = '' 

840 

841 identifier = r'[@a-z$._?][\w$.?#@~]*' 

842 hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)' 

843 octn = r'[0-7]+q' 

844 binn = r'[01]+b' 

845 decn = r'[0-9]+' 

846 floatn = decn + r'\.e?' + decn 

847 string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`" 

848 declkw = r'(?:res|d)[bwdqt]|times' 

849 register = (r'(r[0-9][0-5]?[bwd]|' 

850 r'[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|' 

851 r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7])\b') 

852 wordop = r'seg|wrt|strict' 

853 type = r'byte|[dq]?word' 

854 directives = (r'BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|' 

855 r'ORG|ALIGN|STRUC|ENDSTRUC|ENDS|COMMON|CPU|GROUP|UPPERCASE|INCLUDE|' 

856 r'EXPORT|LIBRARY|MODULE|PROC|ENDP|USES|ARG|DATASEG|UDATASEG|END|IDEAL|' 

857 r'P386|MODEL|ASSUME|CODESEG|SIZE') 

858 # T[A-Z][a-z] is more of a convention. Lexer should filter out STRUC definitions 

859 # and then 'add' them to datatype somehow. 

860 datatype = (r'db|dd|dw|T[A-Z][a-z]+') 

861 

862 flags = re.IGNORECASE | re.MULTILINE 

863 tokens = { 

864 'root': [ 

865 (r'^\s*%', Comment.Preproc, 'preproc'), 

866 include('whitespace'), 

867 (identifier + ':', Name.Label), 

868 (directives, Keyword, 'instruction-args'), 

869 (rf'({identifier})(\s+)({datatype})', 

870 bygroups(Name.Constant, Whitespace, Keyword.Declaration), 

871 'instruction-args'), 

872 (declkw, Keyword.Declaration, 'instruction-args'), 

873 (identifier, Name.Function, 'instruction-args'), 

874 (r'[\r\n]+', Whitespace) 

875 ], 

876 'instruction-args': [ 

877 (string, String), 

878 (hexn, Number.Hex), 

879 (octn, Number.Oct), 

880 (binn, Number.Bin), 

881 (floatn, Number.Float), 

882 (decn, Number.Integer), 

883 include('punctuation'), 

884 (register, Name.Builtin), 

885 (identifier, Name.Variable), 

886 # Do not match newline when it's preceded by a backslash 

887 (r'(\\)(\s*)(;.*)([\r\n])', 

888 bygroups(Text, Whitespace, Comment.Single, Whitespace)), 

889 (r'[\r\n]+', Whitespace, '#pop'), 

890 include('whitespace') 

891 ], 

892 'preproc': [ 

893 (r'[^;\n]+', Comment.Preproc), 

894 (r';.*?\n', Comment.Single, '#pop'), 

895 (r'\n', Comment.Preproc, '#pop'), 

896 ], 

897 'whitespace': [ 

898 (r'[\n\r]', Whitespace), 

899 (r'(\\)([\n\r])', bygroups(Text, Whitespace)), 

900 (r'[ \t]+', Whitespace), 

901 (r';.*', Comment.Single) 

902 ], 

903 'punctuation': [ 

904 (r'[,():\[\]]+', Punctuation), 

905 (r'[&|^<>+*=/%~-]+', Operator), 

906 (r'[$]+', Keyword.Constant), 

907 (wordop, Operator.Word), 

908 (type, Keyword.Type) 

909 ], 

910 } 

911 

912 def analyse_text(text): 

913 # See above 

914 if re.match(r'PROC', text, re.I): 

915 return True 

916 

917 

918class Ca65Lexer(RegexLexer): 

919 """ 

920 For ca65 assembler sources. 

921 """ 

922 name = 'ca65 assembler' 

923 aliases = ['ca65'] 

924 filenames = ['*.s'] 

925 url = 'https://cc65.github.io' 

926 version_added = '1.6' 

927 

928 flags = re.IGNORECASE 

929 

930 tokens = { 

931 'root': [ 

932 (r';.*', Comment.Single), 

933 (r'\s+', Whitespace), 

934 (r'[a-z_.@$][\w.@$]*:', Name.Label), 

935 (r'((ld|st)[axy]|(in|de)[cxy]|asl|lsr|ro[lr]|adc|sbc|cmp|cp[xy]' 

936 r'|cl[cvdi]|se[cdi]|jmp|jsr|bne|beq|bpl|bmi|bvc|bvs|bcc|bcs' 

937 r'|p[lh][ap]|rt[is]|brk|nop|ta[xy]|t[xy]a|txs|tsx|and|ora|eor' 

938 r'|bit)\b', Keyword), 

939 (r'\.\w+', Keyword.Pseudo), 

940 (r'[-+~*/^&|!<>=]', Operator), 

941 (r'"[^"\n]*.', String), 

942 (r"'[^'\n]*.", String.Char), 

943 (r'\$[0-9a-f]+|[0-9a-f]+h\b', Number.Hex), 

944 (r'\d+', Number.Integer), 

945 (r'%[01]+', Number.Bin), 

946 (r'[#,.:()=\[\]]', Punctuation), 

947 (r'[a-z_.@$][\w.@$]*', Name), 

948 ] 

949 } 

950 

951 def analyse_text(self, text): 

952 # comments in GAS start with "#" 

953 if re.search(r'^\s*;', text, re.MULTILINE): 

954 return 0.9 

955 

956 

957class Dasm16Lexer(RegexLexer): 

958 """ 

959 For DCPU-16 Assembly. 

960 """ 

961 name = 'DASM16' 

962 url = 'http://0x10c.com/doc/dcpu-16.txt' 

963 aliases = ['dasm16'] 

964 filenames = ['*.dasm16', '*.dasm'] 

965 mimetypes = ['text/x-dasm16'] 

966 version_added = '2.4' 

967 

968 INSTRUCTIONS = [ 

969 'SET', 

970 'ADD', 'SUB', 

971 'MUL', 'MLI', 

972 'DIV', 'DVI', 

973 'MOD', 'MDI', 

974 'AND', 'BOR', 'XOR', 

975 'SHR', 'ASR', 'SHL', 

976 'IFB', 'IFC', 'IFE', 'IFN', 'IFG', 'IFA', 'IFL', 'IFU', 

977 'ADX', 'SBX', 

978 'STI', 'STD', 

979 'JSR', 

980 'INT', 'IAG', 'IAS', 'RFI', 'IAQ', 'HWN', 'HWQ', 'HWI', 

981 ] 

982 

983 REGISTERS = [ 

984 'A', 'B', 'C', 

985 'X', 'Y', 'Z', 

986 'I', 'J', 

987 'SP', 'PC', 'EX', 

988 'POP', 'PEEK', 'PUSH' 

989 ] 

990 

991 # Regexes yo 

992 char = r'[a-zA-Z0-9_$@.]' 

993 identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)' 

994 number = r'[+-]?(?:0[xX][a-zA-Z0-9]+|\d+)' 

995 binary_number = r'0b[01_]+' 

996 instruction = r'(?i)(' + '|'.join(INSTRUCTIONS) + ')' 

997 single_char = r"'\\?" + char + "'" 

998 string = r'"(\\"|[^"])*"' 

999 

1000 def guess_identifier(lexer, match): 

1001 ident = match.group(0) 

1002 klass = Name.Variable if ident.upper() in lexer.REGISTERS else Name.Label 

1003 yield match.start(), klass, ident 

1004 

1005 tokens = { 

1006 'root': [ 

1007 include('whitespace'), 

1008 (':' + identifier, Name.Label), 

1009 (identifier + ':', Name.Label), 

1010 (instruction, Name.Function, 'instruction-args'), 

1011 (r'\.' + identifier, Name.Function, 'data-args'), 

1012 (r'[\r\n]+', Whitespace) 

1013 ], 

1014 

1015 'numeric' : [ 

1016 (binary_number, Number.Integer), 

1017 (number, Number.Integer), 

1018 (single_char, String), 

1019 ], 

1020 

1021 'arg' : [ 

1022 (identifier, guess_identifier), 

1023 include('numeric') 

1024 ], 

1025 

1026 'deref' : [ 

1027 (r'\+', Punctuation), 

1028 (r'\]', Punctuation, '#pop'), 

1029 include('arg'), 

1030 include('whitespace') 

1031 ], 

1032 

1033 'instruction-line' : [ 

1034 (r'[\r\n]+', Whitespace, '#pop'), 

1035 (r';.*?$', Comment, '#pop'), 

1036 include('whitespace') 

1037 ], 

1038 

1039 'instruction-args': [ 

1040 (r',', Punctuation), 

1041 (r'\[', Punctuation, 'deref'), 

1042 include('arg'), 

1043 include('instruction-line') 

1044 ], 

1045 

1046 'data-args' : [ 

1047 (r',', Punctuation), 

1048 include('numeric'), 

1049 (string, String), 

1050 include('instruction-line') 

1051 ], 

1052 

1053 'whitespace': [ 

1054 (r'\n', Whitespace), 

1055 (r'\s+', Whitespace), 

1056 (r';.*?\n', Comment) 

1057 ], 

1058 }