Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/asm.py: 91%

1"""

2 pygments.lexers.asm

3 ~~~~~~~~~~~~~~~~~~~

5 Lexers for assembly languages.

8 :license: BSD, see LICENSE for details.

9"""

11import re

13from pygments.lexer import RegexLexer, include, bygroups, using, words, \

14 DelegatingLexer, default

15from pygments.lexers.c_cpp import CppLexer, CLexer

16from pygments.lexers.d import DLexer

17from pygments.token import Text, Name, Number, String, Comment, Punctuation, \

18 Other, Keyword, Operator, Whitespace

20__all__ = ['GasLexer', 'ObjdumpLexer', 'DObjdumpLexer', 'CppObjdumpLexer',

21 'CObjdumpLexer', 'HsailLexer', 'LlvmLexer', 'LlvmMirBodyLexer',

22 'LlvmMirLexer', 'NasmLexer', 'NasmObjdumpLexer', 'TasmLexer',

23 'Ca65Lexer', 'Dasm16Lexer']

26class GasLexer(RegexLexer):

27 """

28 For Gas (AT&T) assembly code.

29 """

30 name = 'GAS'

31 aliases = ['gas', 'asm']

32 filenames = ['*.s', '*.S']

33 mimetypes = ['text/x-gas']

35 #: optional Comment or Whitespace

36 string = r'"(\\"|[^"])*"'

37 char = r'[\w$.@-]'

38 identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)'

39 number = r'(?:0[xX][a-fA-F0-9]+|#?-?\d+)'

40 register = '%' + identifier + r'\b'

42 tokens = {

43 'root': [

44 include('whitespace'),

45 (identifier + ':', Name.Label),

46 (r'\.' + identifier, Name.Attribute, 'directive-args'),

47 (r'lock|rep(n?z)?|data\d+', Name.Attribute),

48 (identifier, Name.Function, 'instruction-args'),

49 (r'[\r\n]+', Text)

50 ],

51 'directive-args': [

52 (identifier, Name.Constant),

53 (string, String),

54 ('@' + identifier, Name.Attribute),

55 (number, Number.Integer),

56 (register, Name.Variable),

57 (r'[\r\n]+', Whitespace, '#pop'),

58 (r'([;#]|//).*?\n', Comment.Single, '#pop'),

59 (r'/[*].*?[*]/', Comment.Multiline),

60 (r'/[*].*?\n[\w\W]*?[*]/', Comment.Multiline, '#pop'),

62 include('punctuation'),

63 include('whitespace')

64 ],

65 'instruction-args': [

66 # For objdump-disassembled code, shouldn't occur in

67 # actual assembler input

68 ('([a-z0-9]+)( )(<)('+identifier+')(>)',

69 bygroups(Number.Hex, Text, Punctuation, Name.Constant,

70 Punctuation)),

71 ('([a-z0-9]+)( )(<)('+identifier+')([-+])('+number+')(>)',

72 bygroups(Number.Hex, Text, Punctuation, Name.Constant,

73 Punctuation, Number.Integer, Punctuation)),

75 # Address constants

76 (identifier, Name.Constant),

77 (number, Number.Integer),

78 # Registers

79 (register, Name.Variable),

80 # Numeric constants

81 ('$'+number, Number.Integer),

82 (r"$'(.|\\')'", String.Char),

83 (r'[\r\n]+', Whitespace, '#pop'),

84 (r'([;#]|//).*?\n', Comment.Single, '#pop'),

85 (r'/[*].*?[*]/', Comment.Multiline),

86 (r'/[*].*?\n[\w\W]*?[*]/', Comment.Multiline, '#pop'),

88 include('punctuation'),

89 include('whitespace')

90 ],

91 'whitespace': [

92 (r'\n', Whitespace),

93 (r'\s+', Whitespace),

94 (r'([;#]|//).*?\n', Comment.Single),

95 (r'/[*][\w\W]*?[*]/', Comment.Multiline)

96 ],

97 'punctuation': [

98 (r'[-*,.()\[\]!:{}]+', Punctuation)

99 ]

100 }

101

102 def analyse_text(text):

103 if re.search(r'^\.(text|data|section)', text, re.M):

104 return True

105 elif re.search(r'^\.\w+', text, re.M):

106 return 0.1

107

108

109def _objdump_lexer_tokens(asm_lexer):

110 """

111 Common objdump lexer tokens to wrap an ASM lexer.

112 """

113 hex_re = r'[0-9A-Za-z]'

114 return {

115 'root': [

116 # File name & format:

117 ('(.*?)(:)( +file format )(.*?)$',

118 bygroups(Name.Label, Punctuation, Text, String)),

119 # Section header

120 ('(Disassembly of section )(.*?)(:)$',

121 bygroups(Text, Name.Label, Punctuation)),

122 # Function labels

123 # (With offset)

124 ('('+hex_re+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$',

125 bygroups(Number.Hex, Whitespace, Punctuation, Name.Function,

126 Punctuation, Number.Hex, Punctuation)),

127 # (Without offset)

128 ('('+hex_re+'+)( )(<)(.*?)(>:)$',

129 bygroups(Number.Hex, Whitespace, Punctuation, Name.Function,

130 Punctuation)),

131 # Code line with disassembled instructions

132 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *\t)([a-zA-Z].*?)$',

133 bygroups(Whitespace, Name.Label, Whitespace, Number.Hex, Whitespace,

134 using(asm_lexer))),

135 # Code line without raw instructions (objdump --no-show-raw-insn)

136 ('( *)('+hex_re+r'+:)( *\t)([a-zA-Z].*?)$',

137 bygroups(Whitespace, Name.Label, Whitespace,

138 using(asm_lexer))),

139 # Code line with ascii

140 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *)(.*?)$',

141 bygroups(Whitespace, Name.Label, Whitespace, Number.Hex, Whitespace, String)),

142 # Continued code line, only raw opcodes without disassembled

143 # instruction

144 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)$',

145 bygroups(Whitespace, Name.Label, Whitespace, Number.Hex)),

146 # Skipped a few bytes

147 (r'\t\.\.\.$', Text),

148 # Relocation line

149 # (With offset)

150 (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x'+hex_re+'+)$',

151 bygroups(Whitespace, Name.Label, Whitespace, Name.Property, Whitespace,

152 Name.Constant, Punctuation, Number.Hex)),

153 # (Without offset)

154 (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)$',

155 bygroups(Whitespace, Name.Label, Whitespace, Name.Property, Whitespace,

156 Name.Constant)),

157 (r'[^\n]+\n', Other)

158 ]

159 }

160

161

162class ObjdumpLexer(RegexLexer):

163 """

164 For the output of ``objdump -dr``.

165 """

166 name = 'objdump'

167 aliases = ['objdump']

168 filenames = ['*.objdump']

169 mimetypes = ['text/x-objdump']

170

171 tokens = _objdump_lexer_tokens(GasLexer)

172

173

174class DObjdumpLexer(DelegatingLexer):

175 """

176 For the output of ``objdump -Sr`` on compiled D files.

177 """

178 name = 'd-objdump'

179 aliases = ['d-objdump']

180 filenames = ['*.d-objdump']

181 mimetypes = ['text/x-d-objdump']

182

183 def __init__(self, **options):

184 super().__init__(DLexer, ObjdumpLexer, **options)

185

186

187class CppObjdumpLexer(DelegatingLexer):

188 """

189 For the output of ``objdump -Sr`` on compiled C++ files.

190 """

191 name = 'cpp-objdump'

192 aliases = ['cpp-objdump', 'c++-objdumb', 'cxx-objdump']

193 filenames = ['*.cpp-objdump', '*.c++-objdump', '*.cxx-objdump']

194 mimetypes = ['text/x-cpp-objdump']

195

196 def __init__(self, **options):

197 super().__init__(CppLexer, ObjdumpLexer, **options)

198

199

200class CObjdumpLexer(DelegatingLexer):

201 """

202 For the output of ``objdump -Sr`` on compiled C files.

203 """

204 name = 'c-objdump'

205 aliases = ['c-objdump']

206 filenames = ['*.c-objdump']

207 mimetypes = ['text/x-c-objdump']

208

209 def __init__(self, **options):

210 super().__init__(CLexer, ObjdumpLexer, **options)

211

212

213class HsailLexer(RegexLexer):

214 """

215 For HSAIL assembly code.

216

217 .. versionadded:: 2.2

218 """

219 name = 'HSAIL'

220 aliases = ['hsail', 'hsa']

221 filenames = ['*.hsail']

222 mimetypes = ['text/x-hsail']

223

224 string = r'"[^"]*?"'

225 identifier = r'[a-zA-Z_][\w.]*'

226 # Registers

227 register_number = r'[0-9]+'

228 register = r'(\$(c|s|d|q)' + register_number + r')\b'

229 # Qualifiers

230 alignQual = r'(align$\d+$)'

231 widthQual = r'(width$(\d+|all)$)'

232 allocQual = r'(alloc$agent$)'

233 # Instruction Modifiers

234 roundingMod = (r'((_ftz)?(_up|_down|_zero|_near))')

235 datatypeMod = (r'_('

236 # packedTypes

237 r'u8x4|s8x4|u16x2|s16x2|u8x8|s8x8|u16x4|s16x4|u32x2|s32x2|'

238 r'u8x16|s8x16|u16x8|s16x8|u32x4|s32x4|u64x2|s64x2|'

239 r'f16x2|f16x4|f16x8|f32x2|f32x4|f64x2|'

240 # baseTypes

241 r'u8|s8|u16|s16|u32|s32|u64|s64|'

242 r'b128|b8|b16|b32|b64|b1|'

243 r'f16|f32|f64|'

244 # opaqueType

246

247 # Numeric Constant

248 float = r'((\d+\.)|(\d*\.\d+))[eE][+-]?\d+'

249 hexfloat = r'0[xX](([0-9a-fA-F]+\.[0-9a-fA-F]*)|([0-9a-fA-F]*\.[0-9a-fA-F]+))[pP][+-]?\d+'

250 ieeefloat = r'0((h|H)[0-9a-fA-F]{4}|(f|F)[0-9a-fA-F]{8}|(d|D)[0-9a-fA-F]{16})'

251

252 tokens = {

253 'root': [

254 include('whitespace'),

255 include('comments'),

256

257 (string, String),

258

259 (r'@' + identifier + ':?', Name.Label),

260

261 (register, Name.Variable.Anonymous),

262

263 include('keyword'),

264

265 (r'&' + identifier, Name.Variable.Global),

266 (r'%' + identifier, Name.Variable),

267

268 (hexfloat, Number.Hex),

269 (r'0[xX][a-fA-F0-9]+', Number.Hex),

270 (ieeefloat, Number.Float),

271 (float, Number.Float),

272 (r'\d+', Number.Integer),

273

274 (r'[=<>{}\[\]()*.,:;!]|x\b', Punctuation)

275 ],

276 'whitespace': [

277 (r'(\n|\s)+', Whitespace),

278 ],

279 'comments': [

280 (r'/\*.*?\*/', Comment.Multiline),

281 (r'//.*?\n', Comment.Single),

282 ],

283 'keyword': [

284 # Types

285 (r'kernarg' + datatypeMod, Keyword.Type),

286

287 # Regular keywords

289 (words((

290 'module', 'extension', 'pragma', 'prog', 'indirect', 'signature',

291 'decl', 'kernel', 'function', 'enablebreakexceptions',

292 'enabledetectexceptions', 'maxdynamicgroupsize', 'maxflatgridsize',

293 'maxflatworkgroupsize', 'requireddim', 'requiredgridsize',

294 'requiredworkgroupsize', 'requirenopartialworkgroups'),

295 suffix=r'\b'), Keyword),

296

297 # instructions

298 (roundingMod, Keyword),

299 (datatypeMod, Keyword),

300 (r'_(' + alignQual + '|' + widthQual + ')', Keyword),

301 (r'_kernarg', Keyword),

302 (r'(nop|imagefence)\b', Keyword),

303 (words((

304 'cleardetectexcept', 'clock', 'cuid', 'debugtrap', 'dim',

305 'getdetectexcept', 'groupbaseptr', 'kernargbaseptr', 'laneid',

306 'maxcuid', 'maxwaveid', 'packetid', 'setdetectexcept', 'waveid',

307 'workitemflatabsid', 'workitemflatid', 'nullptr', 'abs', 'bitrev',

308 'currentworkgroupsize', 'currentworkitemflatid', 'fract', 'ncos',

309 'neg', 'nexp2', 'nlog2', 'nrcp', 'nrsqrt', 'nsin', 'nsqrt',

310 'gridgroups', 'gridsize', 'not', 'sqrt', 'workgroupid',

311 'workgroupsize', 'workitemabsid', 'workitemid', 'ceil', 'floor',

312 'rint', 'trunc', 'add', 'bitmask', 'borrow', 'carry', 'copysign',

313 'div', 'rem', 'sub', 'shl', 'shr', 'and', 'or', 'xor', 'unpackhi',

314 'unpacklo', 'max', 'min', 'fma', 'mad', 'bitextract', 'bitselect',

315 'shuffle', 'cmov', 'bitalign', 'bytealign', 'lerp', 'nfma', 'mul',

316 'mulhi', 'mul24hi', 'mul24', 'mad24', 'mad24hi', 'bitinsert',

317 'combine', 'expand', 'lda', 'mov', 'pack', 'unpack', 'packcvt',

318 'unpackcvt', 'sad', 'sementp', 'ftos', 'stof', 'cmp', 'ld', 'st',

319 '_eq', '_ne', '_lt', '_le', '_gt', '_ge', '_equ', '_neu', '_ltu',

320 '_leu', '_gtu', '_geu', '_num', '_nan', '_seq', '_sne', '_slt',

321 '_sle', '_sgt', '_sge', '_snum', '_snan', '_sequ', '_sneu', '_sltu',

322 '_sleu', '_sgtu', '_sgeu', 'atomic', '_ld', '_st', '_cas', '_add',

323 '_and', '_exch', '_max', '_min', '_or', '_sub', '_wrapdec',

324 '_wrapinc', '_xor', 'ret', 'cvt', '_readonly', '_kernarg', '_global',

325 'br', 'cbr', 'sbr', '_scacq', '_screl', '_scar', '_rlx', '_wave',

326 '_wg', '_agent', '_system', 'ldimage', 'stimage', '_v2', '_v3', '_v4',

327 '_1d', '_2d', '_3d', '_1da', '_2da', '_1db', '_2ddepth', '_2dadepth',

328 '_width', '_height', '_depth', '_array', '_channelorder',

329 '_channeltype', 'querysampler', '_coord', '_filter', '_addressing',

330 'barrier', 'wavebarrier', 'initfbar', 'joinfbar', 'waitfbar',

331 'arrivefbar', 'leavefbar', 'releasefbar', 'ldf', 'activelaneid',

332 'activelanecount', 'activelanemask', 'activelanepermute', 'call',

333 'scall', 'icall', 'alloca', 'packetcompletionsig',

334 'addqueuewriteindex', 'casqueuewriteindex', 'ldqueuereadindex',

335 'stqueuereadindex', 'readonly', 'global', 'private', 'group',

336 'spill', 'arg', '_upi', '_downi', '_zeroi', '_neari', '_upi_sat',

337 '_downi_sat', '_zeroi_sat', '_neari_sat', '_supi', '_sdowni',

338 '_szeroi', '_sneari', '_supi_sat', '_sdowni_sat', '_szeroi_sat',

339 '_sneari_sat', '_pp', '_ps', '_sp', '_ss', '_s', '_p', '_pp_sat',

340 '_ps_sat', '_sp_sat', '_ss_sat', '_s_sat', '_p_sat')), Keyword),

341

342 # Integer types

343 (r'i[1-9]\d*', Keyword)

344 ]

345 }

346

347

348class LlvmLexer(RegexLexer):

349 """

350 For LLVM assembly code.

351 """

352 name = 'LLVM'

353 url = 'https://llvm.org/docs/LangRef.html'

354 aliases = ['llvm']

355 filenames = ['*.ll']

356 mimetypes = ['text/x-llvm']

357

358 #: optional Comment or Whitespace

359 string = r'"[^"]*?"'

360 identifier = r'([-a-zA-Z$._][\w\-$.]*|' + string + ')'

361 block_label = r'(' + identifier + r'|(\d+))'

362

363 tokens = {

364 'root': [

365 include('whitespace'),

366

367 # Before keywords, because keywords are valid label names :(...

368 (block_label + r'\s*:', Name.Label),

369

370 include('keyword'),

371

372 (r'%' + identifier, Name.Variable),

373 (r'@' + identifier, Name.Variable.Global),

374 (r'%\d+', Name.Variable.Anonymous),

375 (r'@\d+', Name.Variable.Global),

376 (r'#\d+', Name.Variable.Global),

377 (r'!' + identifier, Name.Variable),

378 (r'!\d+', Name.Variable.Anonymous),

379 (r'c?' + string, String),

380

381 (r'0[xX][a-fA-F0-9]+', Number),

382 (r'-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?', Number),

383

384 (r'[=<>{}\[\]()*.,!]|x\b', Punctuation)

385 ],

386 'whitespace': [

387 (r'(\n|\s+)+', Whitespace),

388 (r';.*?\n', Comment)

389 ],

390 'keyword': [

391 # Regular keywords

392 (words((

393 'aarch64_sve_vector_pcs', 'aarch64_vector_pcs', 'acq_rel',

394 'acquire', 'add', 'addrspace', 'addrspacecast', 'afn', 'alias',

395 'aliasee', 'align', 'alignLog2', 'alignstack', 'alloca',

396 'allocsize', 'allOnes', 'alwaysinline', 'alwaysInline',

397 'amdgpu_cs', 'amdgpu_es', 'amdgpu_gfx', 'amdgpu_gs',

398 'amdgpu_hs', 'amdgpu_kernel', 'amdgpu_ls', 'amdgpu_ps',

399 'amdgpu_vs', 'and', 'any', 'anyregcc', 'appending', 'arcp',

400 'argmemonly', 'args', 'arm_aapcs_vfpcc', 'arm_aapcscc',

401 'arm_apcscc', 'ashr', 'asm', 'atomic', 'atomicrmw',

402 'attributes', 'available_externally', 'avr_intrcc',

403 'avr_signalcc', 'bit', 'bitcast', 'bitMask', 'blockaddress',

404 'blockcount', 'br', 'branchFunnel', 'builtin', 'byArg',

405 'byref', 'byte', 'byteArray', 'byval', 'c', 'call', 'callbr',

406 'callee', 'caller', 'calls', 'canAutoHide', 'catch',

407 'catchpad', 'catchret', 'catchswitch', 'cc', 'ccc',

408 'cfguard_checkcc', 'cleanup', 'cleanuppad', 'cleanupret',

409 'cmpxchg', 'cold', 'coldcc', 'comdat', 'common', 'constant',

410 'contract', 'convergent', 'critical', 'cxx_fast_tlscc',

411 'datalayout', 'declare', 'default', 'define', 'deplibs',

412 'dereferenceable', 'dereferenceable_or_null', 'distinct',

413 'dllexport', 'dllimport', 'dso_local', 'dso_local_equivalent',

414 'dso_preemptable', 'dsoLocal', 'eq', 'exact', 'exactmatch',

415 'extern_weak', 'external', 'externally_initialized',

416 'extractelement', 'extractvalue', 'fadd', 'false', 'fast',

417 'fastcc', 'fcmp', 'fdiv', 'fence', 'filter', 'flags', 'fmul',

418 'fneg', 'fpext', 'fptosi', 'fptoui', 'fptrunc', 'freeze',

419 'frem', 'from', 'fsub', 'funcFlags', 'function', 'gc',

420 'getelementptr', 'ghccc', 'global', 'guid', 'gv', 'hash',

421 'hhvm_ccc', 'hhvmcc', 'hidden', 'hot', 'hotness', 'icmp',

422 'ifunc', 'inaccessiblemem_or_argmemonly',

423 'inaccessiblememonly', 'inalloca', 'inbounds', 'indir',

424 'indirectbr', 'info', 'initialexec', 'inline', 'inlineBits',

425 'inlinehint', 'inrange', 'inreg', 'insertelement',

426 'insertvalue', 'insts', 'intel_ocl_bicc', 'inteldialect',

427 'internal', 'inttoptr', 'invoke', 'jumptable', 'kind',

428 'landingpad', 'largest', 'linkage', 'linkonce', 'linkonce_odr',

429 'live', 'load', 'local_unnamed_addr', 'localdynamic',

430 'localexec', 'lshr', 'max', 'metadata', 'min', 'minsize',

431 'module', 'monotonic', 'msp430_intrcc', 'mul', 'mustprogress',

432 'musttail', 'naked', 'name', 'nand', 'ne', 'nest', 'ninf',

433 'nnan', 'noalias', 'nobuiltin', 'nocallback', 'nocapture',

434 'nocf_check', 'noduplicate', 'noduplicates', 'nofree',

435 'noimplicitfloat', 'noinline', 'noInline', 'nomerge', 'none',

436 'nonlazybind', 'nonnull', 'noprofile', 'norecurse',

437 'noRecurse', 'noredzone', 'noreturn', 'nosync', 'notail',

438 'notEligibleToImport', 'noundef', 'nounwind', 'nsw',

439 'nsz', 'null', 'null_pointer_is_valid', 'nuw', 'oeq', 'offset',

440 'oge', 'ogt', 'ole', 'olt', 'one', 'opaque', 'optforfuzzing',

441 'optnone', 'optsize', 'or', 'ord', 'param', 'params',

442 'partition', 'path', 'personality', 'phi', 'poison',

443 'preallocated', 'prefix', 'preserve_allcc', 'preserve_mostcc',

444 'private', 'prologue', 'protected', 'ptrtoint', 'ptx_device',

445 'ptx_kernel', 'readnone', 'readNone', 'readonly', 'readOnly',

446 'reassoc', 'refs', 'relbf', 'release', 'resByArg', 'resume',

447 'ret', 'returnDoesNotAlias', 'returned', 'returns_twice',

448 'safestack', 'samesize', 'sanitize_address',

449 'sanitize_hwaddress', 'sanitize_memory', 'sanitize_memtag',

450 'sanitize_thread', 'sdiv', 'section', 'select', 'seq_cst',

451 'sext', 'sge', 'sgt', 'shadowcallstack', 'shl',

452 'shufflevector', 'sideeffect', 'signext', 'single',

453 'singleImpl', 'singleImplName', 'sitofp', 'sizeM1',

454 'sizeM1BitWidth', 'sle', 'slt', 'source_filename',

455 'speculatable', 'speculative_load_hardening', 'spir_func',

456 'spir_kernel', 'srem', 'sret', 'ssp', 'sspreq', 'sspstrong',

457 'store', 'strictfp', 'sub', 'summaries', 'summary', 'swiftcc',

458 'swifterror', 'swiftself', 'switch', 'syncscope', 'tail',

459 'tailcc', 'target', 'thread_local', 'to', 'token', 'triple',

460 'true', 'trunc', 'type', 'typeCheckedLoadConstVCalls',

461 'typeCheckedLoadVCalls', 'typeid', 'typeidCompatibleVTable',

462 'typeIdInfo', 'typeTestAssumeConstVCalls',

463 'typeTestAssumeVCalls', 'typeTestRes', 'typeTests', 'udiv',

464 'ueq', 'uge', 'ugt', 'uitofp', 'ule', 'ult', 'umax', 'umin',

465 'undef', 'une', 'uniformRetVal', 'uniqueRetVal', 'unknown',

466 'unnamed_addr', 'uno', 'unordered', 'unreachable', 'unsat',

467 'unwind', 'urem', 'uselistorder', 'uselistorder_bb', 'uwtable',

468 'va_arg', 'varFlags', 'variable', 'vcall_visibility',

469 'vFuncId', 'virtFunc', 'virtualConstProp', 'void', 'volatile',

470 'vscale', 'vTableFuncs', 'weak', 'weak_odr', 'webkit_jscc',

471 'win64cc', 'within', 'wpdRes', 'wpdResolutions', 'writeonly',

472 'x', 'x86_64_sysvcc', 'x86_fastcallcc', 'x86_intrcc',

473 'x86_mmx', 'x86_regcallcc', 'x86_stdcallcc', 'x86_thiscallcc',

474 'x86_vectorcallcc', 'xchg', 'xor', 'zeroext',

475 'zeroinitializer', 'zext', 'immarg', 'willreturn'),

476 suffix=r'\b'), Keyword),

477

478 # Types

479 (words(('void', 'half', 'bfloat', 'float', 'double', 'fp128',

480 'x86_fp80', 'ppc_fp128', 'label', 'metadata', 'x86_mmx',

481 'x86_amx', 'token', 'ptr')),

482 Keyword.Type),

483

484 # Integer types

485 (r'i[1-9]\d*', Keyword.Type)

486 ]

487 }

488

489

490class LlvmMirBodyLexer(RegexLexer):

491 """

492 For LLVM MIR examples without the YAML wrapper.

493

494 .. versionadded:: 2.6

495 """

496 name = 'LLVM-MIR Body'

497 url = 'https://llvm.org/docs/MIRLangRef.html'

498 aliases = ['llvm-mir-body']

499 filenames = []

500 mimetypes = []

501

502 tokens = {

503 'root': [

504 # Attributes on basic blocks

505 (words(('liveins', 'successors'), suffix=':'), Keyword),

506 # Basic Block Labels

507 (r'bb\.[0-9]+(\.[a-zA-Z0-9_.-]+)?( $address-taken$)?:', Name.Label),

508 (r'bb\.[0-9]+ $%[a-zA-Z0-9_.-]+$( $address-taken$)?:', Name.Label),

509 (r'%bb\.[0-9]+(\.\w+)?', Name.Label),

510 # Stack references

511 (r'%stack\.[0-9]+(\.\w+\.addr)?', Name),

512 # Subreg indices

513 (r'%subreg\.\w+', Name),

514 # Virtual registers

515 (r'%[a-zA-Z0-9_]+ *', Name.Variable, 'vreg'),

516 # Reference to LLVM-IR global

517 include('global'),

518 # Reference to Intrinsic

519 (r'intrinsic$\@[a-zA-Z0-9_.]+$', Name.Variable.Global),

520 # Comparison predicates

521 (words(('eq', 'ne', 'sgt', 'sge', 'slt', 'sle', 'ugt', 'uge', 'ult',

522 'ule'), prefix=r'intpred$', suffix=r'$'), Name.Builtin),

523 (words(('oeq', 'one', 'ogt', 'oge', 'olt', 'ole', 'ugt', 'uge',

524 'ult', 'ule'), prefix=r'floatpred$', suffix=r'$'),

525 Name.Builtin),

526 # Physical registers

527 (r'\$\w+', String.Single),

528 # Assignment operator

529 (r'=', Operator),

530 # gMIR Opcodes

535 r'G_ADDRSPACE_CAST|G_BLOCK_ADDR|G_JUMP_TABLE|G_DYN_STACKALLOC|'

544 r'G_FFLOOR|G_FRINT|G_FNEARBYINT|G_INTRINSIC_TRUNC|'

545 r'G_INTRINSIC_ROUND|G_LOAD|G_[ZS]EXTLOAD|G_INDEXED_LOAD|'

546 r'G_INDEXED_[ZS]EXTLOAD|G_STORE|G_INDEXED_STORE|'

547 r'G_ATOMIC_CMPXCHG_WITH_SUCCESS|G_ATOMIC_CMPXCHG|'

548 r'G_ATOMICRMW_(XCHG|ADD|SUB|AND|NAND|OR|XOR|MAX|MIN|UMAX|UMIN|FADD|'

549 r'FSUB)'

551 r'G_BUILD_VECTOR|G_BUILD_VECTOR_TRUNC|G_CONCAT_VECTORS|'

552 r'G_INTRINSIC|G_INTRINSIC_W_SIDE_EFFECTS|G_BR|G_BRCOND|'

553 r'G_BRINDIRECT|G_BRJT|G_INSERT_VECTOR_ELT|G_EXTRACT_VECTOR_ELT|'

554 r'G_SHUFFLE_VECTOR)\b',

555 Name.Builtin),

556 # Target independent opcodes

557 (r'(COPY|PHI|INSERT_SUBREG|EXTRACT_SUBREG|REG_SEQUENCE)\b',

558 Name.Builtin),

559 # Flags

560 (words(('killed', 'implicit')), Keyword),

561 # ConstantInt values

562 (r'(i[0-9]+)( +)', bygroups(Keyword.Type, Whitespace), 'constantint'),

563 # ConstantFloat values

564 (r'(half|float|double) +', Keyword.Type, 'constantfloat'),

565 # Bare immediates

566 include('integer'),

567 # MMO's

568 (r'(::)( *)', bygroups(Operator, Whitespace), 'mmo'),

569 # MIR Comments

570 (r';.*', Comment),

571 # If we get here, assume it's a target instruction

572 (r'[a-zA-Z0-9_]+', Name),

573 # Everything else that isn't highlighted

574 (r'[(), \n]+', Text),

575 ],

576 # The integer constant from a ConstantInt value

577 'constantint': [

578 include('integer'),

579 (r'(?=.)', Text, '#pop'),

580 ],

581 # The floating point constant from a ConstantFloat value

582 'constantfloat': [

583 include('float'),

584 (r'(?=.)', Text, '#pop'),

585 ],

586 'vreg': [

587 # The bank or class if there is one

588 (r'( *)(:(?!:))', bygroups(Whitespace, Keyword), ('#pop', 'vreg_bank_or_class')),

589 # The LLT if there is one

590 (r'( *)(\()', bygroups(Whitespace, Text), 'vreg_type'),

591 (r'(?=.)', Text, '#pop'),

592 ],

593 'vreg_bank_or_class': [

594 # The unassigned bank/class

595 (r'( *)(_)', bygroups(Whitespace, Name.Variable.Magic)),

596 (r'( *)([a-zA-Z0-9_]+)', bygroups(Whitespace, Name.Variable)),

597 # The LLT if there is one

598 (r'( *)(\()', bygroups(Whitespace, Text), 'vreg_type'),

599 (r'(?=.)', Text, '#pop'),

600 ],

601 'vreg_type': [

602 # Scalar and pointer types

603 (r'( *)([sp][0-9]+)', bygroups(Whitespace, Keyword.Type)),

604 (r'( *)(<[0-9]+ *x *[sp][0-9]+>)', bygroups(Whitespace, Keyword.Type)),

605 (r'\)', Text, '#pop'),

606 (r'(?=.)', Text, '#pop'),

607 ],

608 'mmo': [

609 (r'\(', Text),

610 (r' +', Whitespace),

611 (words(('load', 'store', 'on', 'into', 'from', 'align', 'monotonic',

612 'acquire', 'release', 'acq_rel', 'seq_cst')),

613 Keyword),

614 # IR references

615 (r'%ir\.[a-zA-Z0-9_.-]+', Name),

616 (r'%ir-block\.[a-zA-Z0-9_.-]+', Name),

617 (r'[-+]', Operator),

618 include('integer'),

619 include('global'),

620 (r',', Punctuation),

621 (r'\), \(', Text),

622 (r'\)', Text, '#pop'),

623 ],

624 'integer': [(r'-?[0-9]+', Number.Integer),],

625 'float': [(r'-?[0-9]+\.[0-9]+(e[+-][0-9]+)?', Number.Float)],

626 'global': [(r'\@[a-zA-Z0-9_.]+', Name.Variable.Global)],

627 }

628

629

630class LlvmMirLexer(RegexLexer):

631 """

632 Lexer for the overall LLVM MIR document format.

633

634 MIR is a human readable serialization format that's used to represent LLVM's

635 machine specific intermediate representation. It allows LLVM's developers to

636 see the state of the compilation process at various points, as well as test

637 individual pieces of the compiler.

638

639 .. versionadded:: 2.6

640 """

641 name = 'LLVM-MIR'

642 url = 'https://llvm.org/docs/MIRLangRef.html'

643 aliases = ['llvm-mir']

644 filenames = ['*.mir']

645

646 tokens = {

647 'root': [

648 # Comments are hashes at the YAML level

649 (r'#.*', Comment),

650 # Documents starting with | are LLVM-IR

651 (r'--- \|$', Keyword, 'llvm_ir'),

652 # Other documents are MIR

653 (r'---', Keyword, 'llvm_mir'),

654 # Consume everything else in one token for efficiency

655 (r'[^-#]+|.', Text),

656 ],

657 'llvm_ir': [

658 # Documents end with '...' or '---'

659 (r'(\.\.\.|(?=---))', Keyword, '#pop'),

660 # Delegate to the LlvmLexer

661 (r'((?:.|\n)+?)(?=(\.\.\.|---))', bygroups(using(LlvmLexer))),

662 ],

663 'llvm_mir': [

664 # Comments are hashes at the YAML level

665 (r'#.*', Comment),

666 # Documents end with '...' or '---'

667 (r'(\.\.\.|(?=---))', Keyword, '#pop'),

668 # Handle the simple attributes

669 (r'name:', Keyword, 'name'),

670 (words(('alignment', ),

671 suffix=':'), Keyword, 'number'),

672 (words(('legalized', 'regBankSelected', 'tracksRegLiveness',

673 'selected', 'exposesReturnsTwice'),

674 suffix=':'), Keyword, 'boolean'),

675 # Handle the attributes don't highlight inside

676 (words(('registers', 'stack', 'fixedStack', 'liveins', 'frameInfo',

677 'machineFunctionInfo'),

678 suffix=':'), Keyword),

679 # Delegate the body block to the LlvmMirBodyLexer

680 (r'body: *\|', Keyword, 'llvm_mir_body'),

681 # Consume everything else

682 (r'.+', Text),

683 (r'\n', Whitespace),

684 ],

685 'name': [

686 (r'[^\n]+', Name),

687 default('#pop'),

688 ],

689 'boolean': [

690 (r' *(true|false)', Name.Builtin),

691 default('#pop'),

692 ],

693 'number': [

694 (r' *[0-9]+', Number),

695 default('#pop'),

696 ],

697 'llvm_mir_body': [

698 # Documents end with '...' or '---'.

699 # We have to pop llvm_mir_body and llvm_mir

700 (r'(\.\.\.|(?=---))', Keyword, '#pop:2'),

701 # Delegate the body block to the LlvmMirBodyLexer

702 (r'((?:.|\n)+?)(?=\.\.\.|---)', bygroups(using(LlvmMirBodyLexer))),

703 # The '...' is optional. If we didn't already find it then it isn't

704 # there. There might be a '---' instead though.

705 (r'(?!\.\.\.|---)((?:.|\n)+)', bygroups(using(LlvmMirBodyLexer))),

706 ],

707 }

708

709

710class NasmLexer(RegexLexer):

711 """

712 For Nasm (Intel) assembly code.

713 """

714 name = 'NASM'

715 aliases = ['nasm']

716 filenames = ['*.asm', '*.ASM', '*.nasm']

717 mimetypes = ['text/x-nasm']

718

719 # Tasm uses the same file endings, but TASM is not as common as NASM, so

720 # we prioritize NASM higher by default

721 priority = 1.0

722

723 identifier = r'[a-z$._?][\w$.?#@~]*'

724 hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)'

725 octn = r'[0-7]+q'

726 binn = r'[01]+b'

727 decn = r'[0-9]+'

728 floatn = decn + r'\.e?' + decn

729 string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`"

730 declkw = r'(?:res|d)[bwdqt]|times'

731 register = (r'(r[0-9][0-5]?[bwd]?|'

733 r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]|k[0-7]|'

734 r'[xyz]mm(?:[12][0-9]?|3[01]?|[04-9]))\b')

735 wordop = r'seg|wrt|strict|rel|abs'

736 type = r'byte|[dq]?word'

737 # Directives must be followed by whitespace, otherwise CPU will match

738 # cpuid for instance.

741 r'EXPORT|LIBRARY|MODULE)(?=\s)')

742

743 flags = re.IGNORECASE | re.MULTILINE

744 tokens = {

745 'root': [

746 (r'^\s*%', Comment.Preproc, 'preproc'),

747 include('whitespace'),

748 (identifier + ':', Name.Label),

749 (r'(%s)(\s+)(equ)' % identifier,

750 bygroups(Name.Constant, Whitespace, Keyword.Declaration),

751 'instruction-args'),

752 (directives, Keyword, 'instruction-args'),

753 (declkw, Keyword.Declaration, 'instruction-args'),

754 (identifier, Name.Function, 'instruction-args'),

755 (r'[\r\n]+', Whitespace)

756 ],

757 'instruction-args': [

758 (string, String),

759 (hexn, Number.Hex),

760 (octn, Number.Oct),

761 (binn, Number.Bin),

762 (floatn, Number.Float),

763 (decn, Number.Integer),

764 include('punctuation'),

765 (register, Name.Builtin),

766 (identifier, Name.Variable),

767 (r'[\r\n]+', Whitespace, '#pop'),

768 include('whitespace')

769 ],

770 'preproc': [

771 (r'[^;\n]+', Comment.Preproc),

772 (r';.*?\n', Comment.Single, '#pop'),

773 (r'\n', Comment.Preproc, '#pop'),

774 ],

775 'whitespace': [

776 (r'\n', Whitespace),

777 (r'[ \t]+', Whitespace),

778 (r';.*', Comment.Single),

779 (r'#.*', Comment.Single)

780 ],

781 'punctuation': [

782 (r'[,{}():\[\]]+', Punctuation),

783 (r'[&|^<>+*/%~-]+', Operator),

784 (r'[$]+', Keyword.Constant),

785 (wordop, Operator.Word),

786 (type, Keyword.Type)

787 ],

788 }

789

790 def analyse_text(text):

791 # Probably TASM

792 if re.match(r'PROC', text, re.IGNORECASE):

793 return False

794

795

796class NasmObjdumpLexer(ObjdumpLexer):

797 """

798 For the output of ``objdump -d -M intel``.

799

800 .. versionadded:: 2.0

801 """

802 name = 'objdump-nasm'

803 aliases = ['objdump-nasm']

804 filenames = ['*.objdump-intel']

805 mimetypes = ['text/x-nasm-objdump']

806

807 tokens = _objdump_lexer_tokens(NasmLexer)

808

809

810class TasmLexer(RegexLexer):

811 """

812 For Tasm (Turbo Assembler) assembly code.

813 """

814 name = 'TASM'

815 aliases = ['tasm']

816 filenames = ['*.asm', '*.ASM', '*.tasm']

817 mimetypes = ['text/x-tasm']

818

819 identifier = r'[@a-z$._?][\w$.?#@~]*'

820 hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)'

821 octn = r'[0-7]+q'

822 binn = r'[01]+b'

823 decn = r'[0-9]+'

824 floatn = decn + r'\.e?' + decn

825 string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`"

826 declkw = r'(?:res|d)[bwdqt]|times'

827 register = (r'(r[0-9][0-5]?[bwd]|'

829 r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7])\b')

830 wordop = r'seg|wrt|strict'

831 type = r'byte|[dq]?word'

835 r'P386|MODEL|ASSUME|CODESEG|SIZE')

836 # T[A-Z][a-z] is more of a convention. Lexer should filter out STRUC definitions

837 # and then 'add' them to datatype somehow.

838 datatype = (r'db|dd|dw|T[A-Z][a-z]+')

839

840 flags = re.IGNORECASE | re.MULTILINE

841 tokens = {

842 'root': [

843 (r'^\s*%', Comment.Preproc, 'preproc'),

844 include('whitespace'),

845 (identifier + ':', Name.Label),

846 (directives, Keyword, 'instruction-args'),

847 (r'(%s)(\s+)(%s)' % (identifier, datatype),

848 bygroups(Name.Constant, Whitespace, Keyword.Declaration),

849 'instruction-args'),

850 (declkw, Keyword.Declaration, 'instruction-args'),

851 (identifier, Name.Function, 'instruction-args'),

852 (r'[\r\n]+', Whitespace)

853 ],

854 'instruction-args': [

855 (string, String),

856 (hexn, Number.Hex),

857 (octn, Number.Oct),

858 (binn, Number.Bin),

859 (floatn, Number.Float),

860 (decn, Number.Integer),

861 include('punctuation'),

862 (register, Name.Builtin),

863 (identifier, Name.Variable),

864 # Do not match newline when it's preceded by a backslash

865 (r'(\\)(\s*)(;.*)([\r\n])',

866 bygroups(Text, Whitespace, Comment.Single, Whitespace)),

867 (r'[\r\n]+', Whitespace, '#pop'),

868 include('whitespace')

869 ],

870 'preproc': [

871 (r'[^;\n]+', Comment.Preproc),

872 (r';.*?\n', Comment.Single, '#pop'),

873 (r'\n', Comment.Preproc, '#pop'),

874 ],

875 'whitespace': [

876 (r'[\n\r]', Whitespace),

877 (r'(\\)([\n\r])', bygroups(Text, Whitespace)),

878 (r'[ \t]+', Whitespace),

879 (r';.*', Comment.Single)

880 ],

881 'punctuation': [

882 (r'[,():\[\]]+', Punctuation),

883 (r'[&|^<>+*=/%~-]+', Operator),

884 (r'[$]+', Keyword.Constant),

885 (wordop, Operator.Word),

886 (type, Keyword.Type)

887 ],

888 }

889

890 def analyse_text(text):

891 # See above

892 if re.match(r'PROC', text, re.I):

893 return True

894

895

896class Ca65Lexer(RegexLexer):

897 """

898 For ca65 assembler sources.

899

900 .. versionadded:: 1.6

901 """

902 name = 'ca65 assembler'

903 aliases = ['ca65']

904 filenames = ['*.s']

905

906 flags = re.IGNORECASE

907

908 tokens = {

909 'root': [

910 (r';.*', Comment.Single),

911 (r'\s+', Whitespace),

912 (r'[a-z_.@$][\w.@$]*:', Name.Label),

913 (r'((ld|st)[axy]|(in|de)[cxy]|asl|lsr|ro[lr]|adc|sbc|cmp|cp[xy]'

914 r'|cl[cvdi]|se[cdi]|jmp|jsr|bne|beq|bpl|bmi|bvc|bvs|bcc|bcs'

915 r'|p[lh][ap]|rt[is]|brk|nop|ta[xy]|t[xy]a|txs|tsx|and|ora|eor'

916 r'|bit)\b', Keyword),

917 (r'\.\w+', Keyword.Pseudo),

918 (r'[-+~*/^&|!<>=]', Operator),

919 (r'"[^"\n]*.', String),

920 (r"'[^'\n]*.", String.Char),

921 (r'\$[0-9a-f]+|[0-9a-f]+h\b', Number.Hex),

922 (r'\d+', Number.Integer),

923 (r'%[01]+', Number.Bin),

924 (r'[#,.:()=\[\]]', Punctuation),

925 (r'[a-z_.@$][\w.@$]*', Name),

926 ]

927 }

928

929 def analyse_text(self, text):

930 # comments in GAS start with "#"

931 if re.search(r'^\s*;', text, re.MULTILINE):

932 return 0.9

933

934

935class Dasm16Lexer(RegexLexer):

936 """

937 For DCPU-16 Assembly.

938

939 .. versionadded:: 2.4

940 """

941 name = 'DASM16'

942 url = 'http://0x10c.com/doc/dcpu-16.txt'

943 aliases = ['dasm16']

944 filenames = ['*.dasm16', '*.dasm']

945 mimetypes = ['text/x-dasm16']

946

947 INSTRUCTIONS = [

948 'SET',

949 'ADD', 'SUB',

950 'MUL', 'MLI',

951 'DIV', 'DVI',

952 'MOD', 'MDI',

953 'AND', 'BOR', 'XOR',

954 'SHR', 'ASR', 'SHL',

955 'IFB', 'IFC', 'IFE', 'IFN', 'IFG', 'IFA', 'IFL', 'IFU',

956 'ADX', 'SBX',

957 'STI', 'STD',

958 'JSR',

959 'INT', 'IAG', 'IAS', 'RFI', 'IAQ', 'HWN', 'HWQ', 'HWI',

960 ]

961

962 REGISTERS = [

963 'A', 'B', 'C',

964 'X', 'Y', 'Z',

965 'I', 'J',

966 'SP', 'PC', 'EX',

967 'POP', 'PEEK', 'PUSH'

968 ]

969

970 # Regexes yo

971 char = r'[a-zA-Z0-9_$@.]'

972 identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)'

973 number = r'[+-]?(?:0[xX][a-zA-Z0-9]+|\d+)'

974 binary_number = r'0b[01_]+'

975 instruction = r'(?i)(' + '|'.join(INSTRUCTIONS) + ')'

976 single_char = r"'\\?" + char + "'"

977 string = r'"(\\"|[^"])*"'

978

979 def guess_identifier(lexer, match):

980 ident = match.group(0)

981 klass = Name.Variable if ident.upper() in lexer.REGISTERS else Name.Label

982 yield match.start(), klass, ident

983

984 tokens = {

985 'root': [

986 include('whitespace'),

987 (':' + identifier, Name.Label),

988 (identifier + ':', Name.Label),

989 (instruction, Name.Function, 'instruction-args'),

990 (r'\.' + identifier, Name.Function, 'data-args'),

991 (r'[\r\n]+', Whitespace)

992 ],

993

994 'numeric' : [

995 (binary_number, Number.Integer),

996 (number, Number.Integer),

997 (single_char, String),

998 ],

999

1000 'arg' : [

1001 (identifier, guess_identifier),

1002 include('numeric')

1003 ],

1004

1005 'deref' : [

1006 (r'\+', Punctuation),

1007 (r'\]', Punctuation, '#pop'),

1008 include('arg'),

1009 include('whitespace')

1010 ],

1011

1012 'instruction-line' : [

1013 (r'[\r\n]+', Whitespace, '#pop'),

1014 (r';.*?$', Comment, '#pop'),

1015 include('whitespace')

1016 ],

1017

1018 'instruction-args': [

1019 (r',', Punctuation),

1020 (r'\[', Punctuation, 'deref'),

1021 include('arg'),

1022 include('instruction-line')

1023 ],

1024

1025 'data-args' : [

1026 (r',', Punctuation),

1027 include('numeric'),

1028 (string, String),

1029 include('instruction-line')

1030 ],

1031

1032 'whitespace': [

1033 (r'\n', Whitespace),

1034 (r'\s+', Whitespace),

1035 (r';.*?\n', Comment)

1036 ],

1037 }