Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/asm.py: 94%

1"""

2 pygments.lexers.asm

3 ~~~~~~~~~~~~~~~~~~~

5 Lexers for assembly languages.

8 :license: BSD, see LICENSE for details.

9"""

11import re

13from pygments.lexer import RegexLexer, include, bygroups, using, words, \

14 DelegatingLexer, default

15from pygments.lexers.c_cpp import CppLexer, CLexer

16from pygments.lexers.d import DLexer

17from pygments.token import Text, Name, Number, String, Comment, Punctuation, \

18 Other, Keyword, Operator, Whitespace

20__all__ = ['GasLexer', 'ObjdumpLexer', 'DObjdumpLexer', 'CppObjdumpLexer',

21 'CObjdumpLexer', 'HsailLexer', 'LlvmLexer', 'LlvmMirBodyLexer',

22 'LlvmMirLexer', 'NasmLexer', 'NasmObjdumpLexer', 'TasmLexer',

23 'Ca65Lexer', 'Dasm16Lexer']

26class GasLexer(RegexLexer):

27 """

28 For Gas (AT&T) assembly code.

29 """

30 name = 'GAS'

31 aliases = ['gas', 'asm']

32 filenames = ['*.s', '*.S']

33 mimetypes = ['text/x-gas']

34 url = 'https://www.gnu.org/software/binutils'

35 version_added = ''

37 #: optional Comment or Whitespace

38 string = r'"(\\"|[^"])*"'

39 char = r'[\w$.@-]'

40 identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)'

41 number = r'(?:0[xX][a-fA-F0-9]+|#?-?\d+)'

42 register = '%' + identifier + r'\b'

44 tokens = {

45 'root': [

46 include('whitespace'),

47 (identifier + ':', Name.Label),

48 (r'\.' + identifier, Name.Attribute, 'directive-args'),

49 (r'lock|rep(n?z)?|data\d+', Name.Attribute),

50 (identifier, Name.Function, 'instruction-args'),

51 (r'[\r\n]+', Text)

52 ],

53 'directive-args': [

54 (identifier, Name.Constant),

55 (string, String),

56 ('@' + identifier, Name.Attribute),

57 (number, Number.Integer),

58 (register, Name.Variable),

59 (r'[\r\n]+', Whitespace, '#pop'),

60 (r'([;#]|//).*?\n', Comment.Single, '#pop'),

61 (r'/[*].*?[*]/', Comment.Multiline),

62 (r'/[*].*?\n[\w\W]*?[*]/', Comment.Multiline, '#pop'),

64 include('punctuation'),

65 include('whitespace')

66 ],

67 'instruction-args': [

68 # For objdump-disassembled code, shouldn't occur in

69 # actual assembler input

70 ('([a-z0-9]+)( )(<)('+identifier+')(>)',

71 bygroups(Number.Hex, Text, Punctuation, Name.Constant,

72 Punctuation)),

73 ('([a-z0-9]+)( )(<)('+identifier+')([-+])('+number+')(>)',

74 bygroups(Number.Hex, Text, Punctuation, Name.Constant,

75 Punctuation, Number.Integer, Punctuation)),

77 # Address constants

78 (identifier, Name.Constant),

79 (number, Number.Integer),

80 # Registers

81 (register, Name.Variable),

82 # Numeric constants

83 ('$'+number, Number.Integer),

84 (r"$'(.|\\')'", String.Char),

85 (r'[\r\n]+', Whitespace, '#pop'),

86 (r'([;#]|//).*?\n', Comment.Single, '#pop'),

87 (r'/[*].*?[*]/', Comment.Multiline),

88 (r'/[*].*?\n[\w\W]*?[*]/', Comment.Multiline, '#pop'),

90 include('punctuation'),

91 include('whitespace')

92 ],

93 'whitespace': [

94 (r'\n', Whitespace),

95 (r'\s+', Whitespace),

96 (r'([;#]|//).*?\n', Comment.Single),

97 (r'/[*][\w\W]*?[*]/', Comment.Multiline)

98 ],

99 'punctuation': [

100 (r'[-*,.()\[\]!:{}]+', Punctuation)

101 ]

102 }

103

104 def analyse_text(text):

105 if re.search(r'^\.(text|data|section)', text, re.M):

106 return True

107 elif re.search(r'^\.\w+', text, re.M):

108 return 0.1

109

110

111def _objdump_lexer_tokens(asm_lexer):

112 """

113 Common objdump lexer tokens to wrap an ASM lexer.

114 """

115 hex_re = r'[0-9A-Za-z]'

116 return {

117 'root': [

118 # File name & format:

119 ('(.*?)(:)( +file format )(.*?)$',

120 bygroups(Name.Label, Punctuation, Text, String)),

121 # Section header

122 ('(Disassembly of section )(.*?)(:)$',

123 bygroups(Text, Name.Label, Punctuation)),

124 # Function labels

125 # (With offset)

126 ('('+hex_re+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$',

127 bygroups(Number.Hex, Whitespace, Punctuation, Name.Function,

128 Punctuation, Number.Hex, Punctuation)),

129 # (Without offset)

130 ('('+hex_re+'+)( )(<)(.*?)(>:)$',

131 bygroups(Number.Hex, Whitespace, Punctuation, Name.Function,

132 Punctuation)),

133 # Code line with disassembled instructions

134 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *\t)([a-zA-Z].*?)$',

135 bygroups(Whitespace, Name.Label, Whitespace, Number.Hex, Whitespace,

136 using(asm_lexer))),

137 # Code line without raw instructions (objdump --no-show-raw-insn)

138 ('( *)('+hex_re+r'+:)( *\t)([a-zA-Z].*?)$',

139 bygroups(Whitespace, Name.Label, Whitespace,

140 using(asm_lexer))),

141 # Code line with ascii

142 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *)(.*?)$',

143 bygroups(Whitespace, Name.Label, Whitespace, Number.Hex, Whitespace, String)),

144 # Continued code line, only raw opcodes without disassembled

145 # instruction

146 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)$',

147 bygroups(Whitespace, Name.Label, Whitespace, Number.Hex)),

148 # Skipped a few bytes

149 (r'\t\.\.\.$', Text),

150 # Relocation line

151 # (With offset)

152 (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x'+hex_re+'+)$',

153 bygroups(Whitespace, Name.Label, Whitespace, Name.Property, Whitespace,

154 Name.Constant, Punctuation, Number.Hex)),

155 # (Without offset)

156 (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)$',

157 bygroups(Whitespace, Name.Label, Whitespace, Name.Property, Whitespace,

158 Name.Constant)),

159 (r'[^\n]+\n', Other)

160 ]

161 }

162

163

164class ObjdumpLexer(RegexLexer):

165 """

166 For the output of ``objdump -dr``.

167 """

168 name = 'objdump'

169 aliases = ['objdump']

170 filenames = ['*.objdump']

171 mimetypes = ['text/x-objdump']

172 url = 'https://www.gnu.org/software/binutils'

173 version_added = ''

174

175 tokens = _objdump_lexer_tokens(GasLexer)

176

177

178class DObjdumpLexer(DelegatingLexer):

179 """

180 For the output of ``objdump -Sr`` on compiled D files.

181 """

182 name = 'd-objdump'

183 aliases = ['d-objdump']

184 filenames = ['*.d-objdump']

185 mimetypes = ['text/x-d-objdump']

186 url = 'https://www.gnu.org/software/binutils'

187 version_added = ''

188

189 def __init__(self, **options):

190 super().__init__(DLexer, ObjdumpLexer, **options)

191

192

193class CppObjdumpLexer(DelegatingLexer):

194 """

195 For the output of ``objdump -Sr`` on compiled C++ files.

196 """

197 name = 'cpp-objdump'

198 aliases = ['cpp-objdump', 'c++-objdumb', 'cxx-objdump']

199 filenames = ['*.cpp-objdump', '*.c++-objdump', '*.cxx-objdump']

200 mimetypes = ['text/x-cpp-objdump']

201 url = 'https://www.gnu.org/software/binutils'

202 version_added = ''

203

204 def __init__(self, **options):

205 super().__init__(CppLexer, ObjdumpLexer, **options)

206

207

208class CObjdumpLexer(DelegatingLexer):

209 """

210 For the output of ``objdump -Sr`` on compiled C files.

211 """

212 name = 'c-objdump'

213 aliases = ['c-objdump']

214 filenames = ['*.c-objdump']

215 mimetypes = ['text/x-c-objdump']

216 url = 'https://www.gnu.org/software/binutils'

217 version_added = ''

218

219

220 def __init__(self, **options):

221 super().__init__(CLexer, ObjdumpLexer, **options)

222

223

224class HsailLexer(RegexLexer):

225 """

226 For HSAIL assembly code.

227 """

228 name = 'HSAIL'

229 aliases = ['hsail', 'hsa']

230 filenames = ['*.hsail']

231 mimetypes = ['text/x-hsail']

232 url = 'https://en.wikipedia.org/wiki/Heterogeneous_System_Architecture#HSA_Intermediate_Layer'

233 version_added = '2.2'

234

235 string = r'"[^"]*?"'

236 identifier = r'[a-zA-Z_][\w.]*'

237 # Registers

238 register_number = r'[0-9]+'

239 register = r'(\$(c|s|d|q)' + register_number + r')\b'

240 # Qualifiers

241 alignQual = r'(align$\d+$)'

242 widthQual = r'(width$(\d+|all)$)'

243 allocQual = r'(alloc$agent$)'

244 # Instruction Modifiers

245 roundingMod = (r'((_ftz)?(_up|_down|_zero|_near))')

246 datatypeMod = (r'_('

247 # packedTypes

248 r'u8x4|s8x4|u16x2|s16x2|u8x8|s8x8|u16x4|s16x4|u32x2|s32x2|'

249 r'u8x16|s8x16|u16x8|s16x8|u32x4|s32x4|u64x2|s64x2|'

250 r'f16x2|f16x4|f16x8|f32x2|f32x4|f64x2|'

251 # baseTypes

252 r'u8|s8|u16|s16|u32|s32|u64|s64|'

253 r'b128|b8|b16|b32|b64|b1|'

254 r'f16|f32|f64|'

255 # opaqueType

257

258 # Numeric Constant

259 float = r'((\d+\.)|(\d*\.\d+))[eE][+-]?\d+'

260 hexfloat = r'0[xX](([0-9a-fA-F]+\.[0-9a-fA-F]*)|([0-9a-fA-F]*\.[0-9a-fA-F]+))[pP][+-]?\d+'

261 ieeefloat = r'0((h|H)[0-9a-fA-F]{4}|(f|F)[0-9a-fA-F]{8}|(d|D)[0-9a-fA-F]{16})'

262

263 tokens = {

264 'root': [

265 include('whitespace'),

266 include('comments'),

267

268 (string, String),

269

270 (r'@' + identifier + ':?', Name.Label),

271

272 (register, Name.Variable.Anonymous),

273

274 include('keyword'),

275

276 (r'&' + identifier, Name.Variable.Global),

277 (r'%' + identifier, Name.Variable),

278

279 (hexfloat, Number.Hex),

280 (r'0[xX][a-fA-F0-9]+', Number.Hex),

281 (ieeefloat, Number.Float),

282 (float, Number.Float),

283 (r'\d+', Number.Integer),

284

285 (r'[=<>{}\[\]()*.,:;!]|x\b', Punctuation)

286 ],

287 'whitespace': [

288 (r'(\n|\s)+', Whitespace),

289 ],

290 'comments': [

291 (r'/\*.*?\*/', Comment.Multiline),

292 (r'//.*?\n', Comment.Single),

293 ],

294 'keyword': [

295 # Types

296 (r'kernarg' + datatypeMod, Keyword.Type),

297

298 # Regular keywords

300 (words((

301 'module', 'extension', 'pragma', 'prog', 'indirect', 'signature',

302 'decl', 'kernel', 'function', 'enablebreakexceptions',

303 'enabledetectexceptions', 'maxdynamicgroupsize', 'maxflatgridsize',

304 'maxflatworkgroupsize', 'requireddim', 'requiredgridsize',

305 'requiredworkgroupsize', 'requirenopartialworkgroups'),

306 suffix=r'\b'), Keyword),

307

308 # instructions

309 (roundingMod, Keyword),

310 (datatypeMod, Keyword),

311 (r'_(' + alignQual + '|' + widthQual + ')', Keyword),

312 (r'_kernarg', Keyword),

313 (r'(nop|imagefence)\b', Keyword),

314 (words((

315 'cleardetectexcept', 'clock', 'cuid', 'debugtrap', 'dim',

316 'getdetectexcept', 'groupbaseptr', 'kernargbaseptr', 'laneid',

317 'maxcuid', 'maxwaveid', 'packetid', 'setdetectexcept', 'waveid',

318 'workitemflatabsid', 'workitemflatid', 'nullptr', 'abs', 'bitrev',

319 'currentworkgroupsize', 'currentworkitemflatid', 'fract', 'ncos',

320 'neg', 'nexp2', 'nlog2', 'nrcp', 'nrsqrt', 'nsin', 'nsqrt',

321 'gridgroups', 'gridsize', 'not', 'sqrt', 'workgroupid',

322 'workgroupsize', 'workitemabsid', 'workitemid', 'ceil', 'floor',

323 'rint', 'trunc', 'add', 'bitmask', 'borrow', 'carry', 'copysign',

324 'div', 'rem', 'sub', 'shl', 'shr', 'and', 'or', 'xor', 'unpackhi',

325 'unpacklo', 'max', 'min', 'fma', 'mad', 'bitextract', 'bitselect',

326 'shuffle', 'cmov', 'bitalign', 'bytealign', 'lerp', 'nfma', 'mul',

327 'mulhi', 'mul24hi', 'mul24', 'mad24', 'mad24hi', 'bitinsert',

328 'combine', 'expand', 'lda', 'mov', 'pack', 'unpack', 'packcvt',

329 'unpackcvt', 'sad', 'sementp', 'ftos', 'stof', 'cmp', 'ld', 'st',

330 '_eq', '_ne', '_lt', '_le', '_gt', '_ge', '_equ', '_neu', '_ltu',

331 '_leu', '_gtu', '_geu', '_num', '_nan', '_seq', '_sne', '_slt',

332 '_sle', '_sgt', '_sge', '_snum', '_snan', '_sequ', '_sneu', '_sltu',

333 '_sleu', '_sgtu', '_sgeu', 'atomic', '_ld', '_st', '_cas', '_add',

334 '_and', '_exch', '_max', '_min', '_or', '_sub', '_wrapdec',

335 '_wrapinc', '_xor', 'ret', 'cvt', '_readonly', '_kernarg', '_global',

336 'br', 'cbr', 'sbr', '_scacq', '_screl', '_scar', '_rlx', '_wave',

337 '_wg', '_agent', '_system', 'ldimage', 'stimage', '_v2', '_v3', '_v4',

338 '_1d', '_2d', '_3d', '_1da', '_2da', '_1db', '_2ddepth', '_2dadepth',

339 '_width', '_height', '_depth', '_array', '_channelorder',

340 '_channeltype', 'querysampler', '_coord', '_filter', '_addressing',

341 'barrier', 'wavebarrier', 'initfbar', 'joinfbar', 'waitfbar',

342 'arrivefbar', 'leavefbar', 'releasefbar', 'ldf', 'activelaneid',

343 'activelanecount', 'activelanemask', 'activelanepermute', 'call',

344 'scall', 'icall', 'alloca', 'packetcompletionsig',

345 'addqueuewriteindex', 'casqueuewriteindex', 'ldqueuereadindex',

346 'stqueuereadindex', 'readonly', 'global', 'private', 'group',

347 'spill', 'arg', '_upi', '_downi', '_zeroi', '_neari', '_upi_sat',

348 '_downi_sat', '_zeroi_sat', '_neari_sat', '_supi', '_sdowni',

349 '_szeroi', '_sneari', '_supi_sat', '_sdowni_sat', '_szeroi_sat',

350 '_sneari_sat', '_pp', '_ps', '_sp', '_ss', '_s', '_p', '_pp_sat',

351 '_ps_sat', '_sp_sat', '_ss_sat', '_s_sat', '_p_sat')), Keyword),

352

353 # Integer types

354 (r'i[1-9]\d*', Keyword)

355 ]

356 }

357

358

359class LlvmLexer(RegexLexer):

360 """

361 For LLVM assembly code.

362 """

363 name = 'LLVM'

364 url = 'https://llvm.org/docs/LangRef.html'

365 aliases = ['llvm']

366 filenames = ['*.ll']

367 mimetypes = ['text/x-llvm']

368 version_added = ''

369

370 #: optional Comment or Whitespace

371 string = r'"[^"]*?"'

372 identifier = r'([-a-zA-Z$._][\w\-$.]*|' + string + ')'

373 block_label = r'(' + identifier + r'|(\d+))'

374

375 tokens = {

376 'root': [

377 include('whitespace'),

378

379 # Before keywords, because keywords are valid label names :(...

380 (block_label + r'\s*:', Name.Label),

381

382 include('keyword'),

383

384 (r'%' + identifier, Name.Variable),

385 (r'@' + identifier, Name.Variable.Global),

386 (r'%\d+', Name.Variable.Anonymous),

387 (r'@\d+', Name.Variable.Global),

388 (r'#\d+', Name.Variable.Global),

389 (r'!' + identifier, Name.Variable),

390 (r'!\d+', Name.Variable.Anonymous),

391 (r'c?' + string, String),

392

393 (r'0[xX][KLMHR]?[a-fA-F0-9]+', Number),

394 (r'-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?', Number),

395

396 (r'[=<>{}\[\]()*.,!]|x\b', Punctuation)

397 ],

398 'whitespace': [

399 (r'(\n|\s+)+', Whitespace),

400 (r';.*?\n', Comment)

401 ],

402 'keyword': [

403 # Regular keywords

404 (words((

405 'aarch64_sve_vector_pcs', 'aarch64_vector_pcs', 'acq_rel',

406 'acquire', 'add', 'addrspace', 'addrspacecast', 'afn', 'alias',

407 'aliasee', 'align', 'alignLog2', 'alignstack', 'alloca',

408 'allocsize', 'allOnes', 'alwaysinline', 'alwaysInline',

409 'amdgpu_cs', 'amdgpu_es', 'amdgpu_gfx', 'amdgpu_gs',

410 'amdgpu_hs', 'amdgpu_kernel', 'amdgpu_ls', 'amdgpu_ps',

411 'amdgpu_vs', 'and', 'any', 'anyregcc', 'appending', 'arcp',

412 'argmemonly', 'args', 'arm_aapcs_vfpcc', 'arm_aapcscc',

413 'arm_apcscc', 'ashr', 'asm', 'atomic', 'atomicrmw',

414 'attributes', 'available_externally', 'avr_intrcc',

415 'avr_signalcc', 'bit', 'bitcast', 'bitMask', 'blockaddress',

416 'blockcount', 'br', 'branchFunnel', 'builtin', 'byArg',

417 'byref', 'byte', 'byteArray', 'byval', 'c', 'call', 'callbr',

418 'callee', 'caller', 'calls', 'canAutoHide', 'catch',

419 'catchpad', 'catchret', 'catchswitch', 'cc', 'ccc',

420 'cfguard_checkcc', 'cleanup', 'cleanuppad', 'cleanupret',

421 'cmpxchg', 'cold', 'coldcc', 'comdat', 'common', 'constant',

422 'contract', 'convergent', 'critical', 'cxx_fast_tlscc',

423 'datalayout', 'declare', 'default', 'define', 'deplibs',

424 'dereferenceable', 'dereferenceable_or_null', 'distinct',

425 'dllexport', 'dllimport', 'dso_local', 'dso_local_equivalent',

426 'dso_preemptable', 'dsoLocal', 'eq', 'exact', 'exactmatch',

427 'extern_weak', 'external', 'externally_initialized',

428 'extractelement', 'extractvalue', 'fadd', 'false', 'fast',

429 'fastcc', 'fcmp', 'fdiv', 'fence', 'filter', 'flags', 'fmul',

430 'fneg', 'fpext', 'fptosi', 'fptoui', 'fptrunc', 'freeze',

431 'frem', 'from', 'fsub', 'funcFlags', 'function', 'gc',

432 'getelementptr', 'ghccc', 'global', 'guid', 'gv', 'hash',

433 'hhvm_ccc', 'hhvmcc', 'hidden', 'hot', 'hotness', 'icmp',

434 'ifunc', 'inaccessiblemem_or_argmemonly',

435 'inaccessiblememonly', 'inalloca', 'inbounds', 'indir',

436 'indirectbr', 'info', 'initialexec', 'inline', 'inlineBits',

437 'inlinehint', 'inrange', 'inreg', 'insertelement',

438 'insertvalue', 'insts', 'intel_ocl_bicc', 'inteldialect',

439 'internal', 'inttoptr', 'invoke', 'jumptable', 'kind',

440 'landingpad', 'largest', 'linkage', 'linkonce', 'linkonce_odr',

441 'live', 'load', 'local_unnamed_addr', 'localdynamic',

442 'localexec', 'lshr', 'max', 'metadata', 'min', 'minsize',

443 'module', 'monotonic', 'msp430_intrcc', 'mul', 'mustprogress',

444 'musttail', 'naked', 'name', 'nand', 'ne', 'nest', 'ninf',

445 'nnan', 'noalias', 'nobuiltin', 'nocallback', 'nocapture',

446 'nocf_check', 'noduplicate', 'noduplicates', 'nofree',

447 'noimplicitfloat', 'noinline', 'noInline', 'nomerge', 'none',

448 'nonlazybind', 'nonnull', 'noprofile', 'norecurse',

449 'noRecurse', 'noredzone', 'noreturn', 'nosync', 'notail',

450 'notEligibleToImport', 'noundef', 'nounwind', 'nsw',

451 'nsz', 'null', 'null_pointer_is_valid', 'nuw', 'oeq', 'offset',

452 'oge', 'ogt', 'ole', 'olt', 'one', 'opaque', 'optforfuzzing',

453 'optnone', 'optsize', 'or', 'ord', 'param', 'params',

454 'partition', 'path', 'personality', 'phi', 'poison',

455 'preallocated', 'prefix', 'preserve_allcc', 'preserve_mostcc',

456 'private', 'prologue', 'protected', 'ptrtoint', 'ptx_device',

457 'ptx_kernel', 'readnone', 'readNone', 'readonly', 'readOnly',

458 'reassoc', 'refs', 'relbf', 'release', 'resByArg', 'resume',

459 'ret', 'returnDoesNotAlias', 'returned', 'returns_twice',

460 'safestack', 'samesize', 'sanitize_address',

461 'sanitize_hwaddress', 'sanitize_memory', 'sanitize_memtag',

462 'sanitize_thread', 'sdiv', 'section', 'select', 'seq_cst',

463 'sext', 'sge', 'sgt', 'shadowcallstack', 'shl',

464 'shufflevector', 'sideeffect', 'signext', 'single',

465 'singleImpl', 'singleImplName', 'sitofp', 'sizeM1',

466 'sizeM1BitWidth', 'sle', 'slt', 'source_filename',

467 'speculatable', 'speculative_load_hardening', 'spir_func',

468 'spir_kernel', 'splat', 'srem', 'sret', 'ssp', 'sspreq',

469 'sspstrong', 'store', 'strictfp', 'sub', 'summaries',

470 'summary', 'swiftcc', 'swifterror', 'swiftself', 'switch',

471 'syncscope', 'tail', 'tailcc', 'target', 'thread_local', 'to',

472 'token', 'triple', 'true', 'trunc', 'type',

473 'typeCheckedLoadConstVCalls', 'typeCheckedLoadVCalls',

474 'typeid', 'typeidCompatibleVTable', 'typeIdInfo',

475 'typeTestAssumeConstVCalls', 'typeTestAssumeVCalls',

476 'typeTestRes', 'typeTests', 'udiv', 'ueq', 'uge', 'ugt',

477 'uitofp', 'ule', 'ult', 'umax', 'umin', 'undef', 'une',

478 'uniformRetVal', 'uniqueRetVal', 'unknown', 'unnamed_addr',

479 'uno', 'unordered', 'unreachable', 'unsat', 'unwind', 'urem',

480 'uselistorder', 'uselistorder_bb', 'uwtable', 'va_arg',

481 'varFlags', 'variable', 'vcall_visibility', 'vFuncId',

482 'virtFunc', 'virtualConstProp', 'void', 'volatile', 'vscale',

483 'vTableFuncs', 'weak', 'weak_odr', 'webkit_jscc', 'win64cc',

484 'within', 'wpdRes', 'wpdResolutions', 'writeonly', 'x',

485 'x86_64_sysvcc', 'x86_fastcallcc', 'x86_intrcc', 'x86_mmx',

486 'x86_regcallcc', 'x86_stdcallcc', 'x86_thiscallcc',

487 'x86_vectorcallcc', 'xchg', 'xor', 'zeroext',

488 'zeroinitializer', 'zext', 'immarg', 'willreturn'),

489 suffix=r'\b'), Keyword),

490

491 # Types

492 (words(('void', 'half', 'bfloat', 'float', 'double', 'fp128',

493 'x86_fp80', 'ppc_fp128', 'label', 'metadata', 'x86_mmx',

494 'x86_amx', 'token', 'ptr')),

495 Keyword.Type),

496

497 # Integer types

498 (r'i[1-9]\d*', Keyword.Type)

499 ]

500 }

501

502

503class LlvmMirBodyLexer(RegexLexer):

504 """

505 For LLVM MIR examples without the YAML wrapper.

506 """

507 name = 'LLVM-MIR Body'

508 url = 'https://llvm.org/docs/MIRLangRef.html'

509 aliases = ['llvm-mir-body']

510 filenames = []

511 mimetypes = []

512 version_added = '2.6'

513

514 tokens = {

515 'root': [

516 # Attributes on basic blocks

517 (words(('liveins', 'successors'), suffix=':'), Keyword),

518 # Basic Block Labels

519 (r'bb\.[0-9]+(\.[a-zA-Z0-9_.-]+)?( $address-taken$)?:', Name.Label),

520 (r'bb\.[0-9]+ $%[a-zA-Z0-9_.-]+$( $address-taken$)?:', Name.Label),

521 (r'%bb\.[0-9]+(\.\w+)?', Name.Label),

522 # Stack references

523 (r'%stack\.[0-9]+(\.\w+\.addr)?', Name),

524 # Subreg indices

525 (r'%subreg\.\w+', Name),

526 # Virtual registers

527 (r'%[a-zA-Z0-9_]+ *', Name.Variable, 'vreg'),

528 # Reference to LLVM-IR global

529 include('global'),

530 # Reference to Intrinsic

531 (r'intrinsic$\@[a-zA-Z0-9_.]+$', Name.Variable.Global),

532 # Comparison predicates

533 (words(('eq', 'ne', 'sgt', 'sge', 'slt', 'sle', 'ugt', 'uge', 'ult',

534 'ule'), prefix=r'intpred$', suffix=r'$'), Name.Builtin),

535 (words(('oeq', 'one', 'ogt', 'oge', 'olt', 'ole', 'ugt', 'uge',

536 'ult', 'ule'), prefix=r'floatpred$', suffix=r'$'),

537 Name.Builtin),

538 # Physical registers

539 (r'\$\w+', String.Single),

540 # Assignment operator

541 (r'=', Operator),

542 # gMIR Opcodes

547 r'G_ADDRSPACE_CAST|G_BLOCK_ADDR|G_JUMP_TABLE|G_DYN_STACKALLOC|'

556 r'G_FFLOOR|G_FRINT|G_FNEARBYINT|G_INTRINSIC_TRUNC|'

557 r'G_INTRINSIC_ROUND|G_LOAD|G_[ZS]EXTLOAD|G_INDEXED_LOAD|'

558 r'G_INDEXED_[ZS]EXTLOAD|G_STORE|G_INDEXED_STORE|'

559 r'G_ATOMIC_CMPXCHG_WITH_SUCCESS|G_ATOMIC_CMPXCHG|'

560 r'G_ATOMICRMW_(XCHG|ADD|SUB|AND|NAND|OR|XOR|MAX|MIN|UMAX|UMIN|FADD|'

561 r'FSUB)'

563 r'G_BUILD_VECTOR|G_BUILD_VECTOR_TRUNC|G_CONCAT_VECTORS|'

564 r'G_INTRINSIC|G_INTRINSIC_W_SIDE_EFFECTS|G_BR|G_BRCOND|'

565 r'G_BRINDIRECT|G_BRJT|G_INSERT_VECTOR_ELT|G_EXTRACT_VECTOR_ELT|'

566 r'G_SHUFFLE_VECTOR)\b',

567 Name.Builtin),

568 # Target independent opcodes

569 (r'(COPY|PHI|INSERT_SUBREG|EXTRACT_SUBREG|REG_SEQUENCE)\b',

570 Name.Builtin),

571 # Flags

572 (words(('killed', 'implicit')), Keyword),

573 # ConstantInt values

574 (r'(i[0-9]+)( +)', bygroups(Keyword.Type, Whitespace), 'constantint'),

575 # ConstantFloat values

576 (r'(half|float|double) +', Keyword.Type, 'constantfloat'),

577 # Bare immediates

578 include('integer'),

579 # MMO's

580 (r'(::)( *)', bygroups(Operator, Whitespace), 'mmo'),

581 # MIR Comments

582 (r';.*', Comment),

583 # If we get here, assume it's a target instruction

584 (r'[a-zA-Z0-9_]+', Name),

585 # Everything else that isn't highlighted

586 (r'[(), \n]+', Text),

587 ],

588 # The integer constant from a ConstantInt value

589 'constantint': [

590 include('integer'),

591 (r'(?=.)', Text, '#pop'),

592 ],

593 # The floating point constant from a ConstantFloat value

594 'constantfloat': [

595 include('float'),

596 (r'(?=.)', Text, '#pop'),

597 ],

598 'vreg': [

599 # The bank or class if there is one

600 (r'( *)(:(?!:))', bygroups(Whitespace, Keyword), ('#pop', 'vreg_bank_or_class')),

601 # The LLT if there is one

602 (r'( *)(\()', bygroups(Whitespace, Text), 'vreg_type'),

603 (r'(?=.)', Text, '#pop'),

604 ],

605 'vreg_bank_or_class': [

606 # The unassigned bank/class

607 (r'( *)(_)', bygroups(Whitespace, Name.Variable.Magic)),

608 (r'( *)([a-zA-Z0-9_]+)', bygroups(Whitespace, Name.Variable)),

609 # The LLT if there is one

610 (r'( *)(\()', bygroups(Whitespace, Text), 'vreg_type'),

611 (r'(?=.)', Text, '#pop'),

612 ],

613 'vreg_type': [

614 # Scalar and pointer types

615 (r'( *)([sp][0-9]+)', bygroups(Whitespace, Keyword.Type)),

616 (r'( *)(<[0-9]+ *x *[sp][0-9]+>)', bygroups(Whitespace, Keyword.Type)),

617 (r'\)', Text, '#pop'),

618 (r'(?=.)', Text, '#pop'),

619 ],

620 'mmo': [

621 (r'\(', Text),

622 (r' +', Whitespace),

623 (words(('load', 'store', 'on', 'into', 'from', 'align', 'monotonic',

624 'acquire', 'release', 'acq_rel', 'seq_cst')),

625 Keyword),

626 # IR references

627 (r'%ir\.[a-zA-Z0-9_.-]+', Name),

628 (r'%ir-block\.[a-zA-Z0-9_.-]+', Name),

629 (r'[-+]', Operator),

630 include('integer'),

631 include('global'),

632 (r',', Punctuation),

633 (r'\), \(', Text),

634 (r'\)', Text, '#pop'),

635 ],

636 'integer': [(r'-?[0-9]+', Number.Integer),],

637 'float': [(r'-?[0-9]+\.[0-9]+(e[+-][0-9]+)?', Number.Float)],

638 'global': [(r'\@[a-zA-Z0-9_.]+', Name.Variable.Global)],

639 }

640

641

642class LlvmMirLexer(RegexLexer):

643 """

644 Lexer for the overall LLVM MIR document format.

645

646 MIR is a human readable serialization format that's used to represent LLVM's

647 machine specific intermediate representation. It allows LLVM's developers to

648 see the state of the compilation process at various points, as well as test

649 individual pieces of the compiler.

650 """

651 name = 'LLVM-MIR'

652 url = 'https://llvm.org/docs/MIRLangRef.html'

653 aliases = ['llvm-mir']

654 filenames = ['*.mir']

655 version_added = '2.6'

656

657 tokens = {

658 'root': [

659 # Comments are hashes at the YAML level

660 (r'#.*', Comment),

661 # Documents starting with | are LLVM-IR

662 (r'--- \|$', Keyword, 'llvm_ir'),

663 # Other documents are MIR

664 (r'---', Keyword, 'llvm_mir'),

665 # Consume everything else in one token for efficiency

666 (r'[^-#]+|.', Text),

667 ],

668 'llvm_ir': [

669 # Documents end with '...' or '---'

670 (r'(\.\.\.|(?=---))', Keyword, '#pop'),

671 # Delegate to the LlvmLexer

672 (r'((?:.|\n)+?)(?=(\.\.\.|---))', bygroups(using(LlvmLexer))),

673 ],

674 'llvm_mir': [

675 # Comments are hashes at the YAML level

676 (r'#.*', Comment),

677 # Documents end with '...' or '---'

678 (r'(\.\.\.|(?=---))', Keyword, '#pop'),

679 # Handle the simple attributes

680 (r'name:', Keyword, 'name'),

681 (words(('alignment', ),

682 suffix=':'), Keyword, 'number'),

683 (words(('legalized', 'regBankSelected', 'tracksRegLiveness',

684 'selected', 'exposesReturnsTwice'),

685 suffix=':'), Keyword, 'boolean'),

686 # Handle the attributes don't highlight inside

687 (words(('registers', 'stack', 'fixedStack', 'liveins', 'frameInfo',

688 'machineFunctionInfo'),

689 suffix=':'), Keyword),

690 # Delegate the body block to the LlvmMirBodyLexer

691 (r'body: *\|', Keyword, 'llvm_mir_body'),

692 # Consume everything else

693 (r'.+', Text),

694 (r'\n', Whitespace),

695 ],

696 'name': [

697 (r'[^\n]+', Name),

698 default('#pop'),

699 ],

700 'boolean': [

701 (r' *(true|false)', Name.Builtin),

702 default('#pop'),

703 ],

704 'number': [

705 (r' *[0-9]+', Number),

706 default('#pop'),

707 ],

708 'llvm_mir_body': [

709 # Documents end with '...' or '---'.

710 # We have to pop llvm_mir_body and llvm_mir

711 (r'(\.\.\.|(?=---))', Keyword, '#pop:2'),

712 # Delegate the body block to the LlvmMirBodyLexer

713 (r'((?:.|\n)+?)(?=\.\.\.|---)', bygroups(using(LlvmMirBodyLexer))),

714 # The '...' is optional. If we didn't already find it then it isn't

715 # there. There might be a '---' instead though.

716 (r'(?!\.\.\.|---)((?:.|\n)+)', bygroups(using(LlvmMirBodyLexer))),

717 ],

718 }

719

720

721class NasmLexer(RegexLexer):

722 """

723 For Nasm (Intel) assembly code.

724 """

725 name = 'NASM'

726 aliases = ['nasm']

727 filenames = ['*.asm', '*.ASM', '*.nasm']

728 mimetypes = ['text/x-nasm']

729 url = 'https://nasm.us'

730 version_added = ''

731

732 # Tasm uses the same file endings, but TASM is not as common as NASM, so

733 # we prioritize NASM higher by default

734 priority = 1.0

735

736 identifier = r'[a-z$._?][\w$.?#@~]*'

737 hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)'

738 octn = r'[0-7]+q'

739 binn = r'[01]+b'

740 decn = r'[0-9]+'

741 floatn = decn + r'\.e?' + decn

742 string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`"

743 declkw = r'(?:res|d)[bwdqt]|times'

744 register = (r'(r[0-9][0-5]?[bwd]?|'

746 r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]|k[0-7]|'

747 r'[xyz]mm(?:[12][0-9]?|3[01]?|[04-9]))\b')

748 wordop = r'seg|wrt|strict|rel|abs'

749 type = r'byte|[dq]?word'

750 # Directives must be followed by whitespace, otherwise CPU will match

751 # cpuid for instance.

754 r'EXPORT|LIBRARY|MODULE)(?=\s)')

755

756 flags = re.IGNORECASE | re.MULTILINE

757 tokens = {

758 'root': [

759 (r'^\s*%', Comment.Preproc, 'preproc'),

760 include('whitespace'),

761 (identifier + ':', Name.Label),

762 (rf'({identifier})(\s+)(equ)',

763 bygroups(Name.Constant, Whitespace, Keyword.Declaration),

764 'instruction-args'),

765 (directives, Keyword, 'instruction-args'),

766 (declkw, Keyword.Declaration, 'instruction-args'),

767 (identifier, Name.Function, 'instruction-args'),

768 (r'[\r\n]+', Whitespace)

769 ],

770 'instruction-args': [

771 (string, String),

772 (hexn, Number.Hex),

773 (octn, Number.Oct),

774 (binn, Number.Bin),

775 (floatn, Number.Float),

776 (decn, Number.Integer),

777 include('punctuation'),

778 (register, Name.Builtin),

779 (identifier, Name.Variable),

780 (r'[\r\n]+', Whitespace, '#pop'),

781 include('whitespace')

782 ],

783 'preproc': [

784 (r'[^;\n]+', Comment.Preproc),

785 (r';.*?\n', Comment.Single, '#pop'),

786 (r'\n', Comment.Preproc, '#pop'),

787 ],

788 'whitespace': [

789 (r'\n', Whitespace),

790 (r'[ \t]+', Whitespace),

791 (r';.*', Comment.Single),

792 (r'#.*', Comment.Single)

793 ],

794 'punctuation': [

795 (r'[,{}():\[\]]+', Punctuation),

796 (r'[&|^<>+*/%~-]+', Operator),

797 (r'[$]+', Keyword.Constant),

798 (wordop, Operator.Word),

799 (type, Keyword.Type)

800 ],

801 }

802

803 def analyse_text(text):

804 # Probably TASM

805 if re.match(r'PROC', text, re.IGNORECASE):

806 return False

807

808

809class NasmObjdumpLexer(ObjdumpLexer):

810 """

811 For the output of ``objdump -d -M intel``.

812 """

813 name = 'objdump-nasm'

814 aliases = ['objdump-nasm']

815 filenames = ['*.objdump-intel']

816 mimetypes = ['text/x-nasm-objdump']

817 url = 'https://www.gnu.org/software/binutils'

818 version_added = '2.0'

819

820 tokens = _objdump_lexer_tokens(NasmLexer)

821

822

823class TasmLexer(RegexLexer):

824 """

825 For Tasm (Turbo Assembler) assembly code.

826 """

827 name = 'TASM'

828 aliases = ['tasm']

829 filenames = ['*.asm', '*.ASM', '*.tasm']

830 mimetypes = ['text/x-tasm']

831 url = 'https://en.wikipedia.org/wiki/Turbo_Assembler'

832 version_added = ''

833

834 identifier = r'[@a-z$._?][\w$.?#@~]*'

835 hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)'

836 octn = r'[0-7]+q'

837 binn = r'[01]+b'

838 decn = r'[0-9]+'

839 floatn = decn + r'\.e?' + decn

840 string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`"

841 declkw = r'(?:res|d)[bwdqt]|times'

842 register = (r'(r[0-9][0-5]?[bwd]|'

844 r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7])\b')

845 wordop = r'seg|wrt|strict'

846 type = r'byte|[dq]?word'

850 r'P386|MODEL|ASSUME|CODESEG|SIZE')

851 # T[A-Z][a-z] is more of a convention. Lexer should filter out STRUC definitions

852 # and then 'add' them to datatype somehow.

853 datatype = (r'db|dd|dw|T[A-Z][a-z]+')

854

855 flags = re.IGNORECASE | re.MULTILINE

856 tokens = {

857 'root': [

858 (r'^\s*%', Comment.Preproc, 'preproc'),

859 include('whitespace'),

860 (identifier + ':', Name.Label),

861 (directives, Keyword, 'instruction-args'),

862 (rf'({identifier})(\s+)({datatype})',

863 bygroups(Name.Constant, Whitespace, Keyword.Declaration),

864 'instruction-args'),

865 (declkw, Keyword.Declaration, 'instruction-args'),

866 (identifier, Name.Function, 'instruction-args'),

867 (r'[\r\n]+', Whitespace)

868 ],

869 'instruction-args': [

870 (string, String),

871 (hexn, Number.Hex),

872 (octn, Number.Oct),

873 (binn, Number.Bin),

874 (floatn, Number.Float),

875 (decn, Number.Integer),

876 include('punctuation'),

877 (register, Name.Builtin),

878 (identifier, Name.Variable),

879 # Do not match newline when it's preceded by a backslash

880 (r'(\\)(\s*)(;.*)([\r\n])',

881 bygroups(Text, Whitespace, Comment.Single, Whitespace)),

882 (r'[\r\n]+', Whitespace, '#pop'),

883 include('whitespace')

884 ],

885 'preproc': [

886 (r'[^;\n]+', Comment.Preproc),

887 (r';.*?\n', Comment.Single, '#pop'),

888 (r'\n', Comment.Preproc, '#pop'),

889 ],

890 'whitespace': [

891 (r'[\n\r]', Whitespace),

892 (r'(\\)([\n\r])', bygroups(Text, Whitespace)),

893 (r'[ \t]+', Whitespace),

894 (r';.*', Comment.Single)

895 ],

896 'punctuation': [

897 (r'[,():\[\]]+', Punctuation),

898 (r'[&|^<>+*=/%~-]+', Operator),

899 (r'[$]+', Keyword.Constant),

900 (wordop, Operator.Word),

901 (type, Keyword.Type)

902 ],

903 }

904

905 def analyse_text(text):

906 # See above

907 if re.match(r'PROC', text, re.I):

908 return True

909

910

911class Ca65Lexer(RegexLexer):

912 """

913 For ca65 assembler sources.

914 """

915 name = 'ca65 assembler'

916 aliases = ['ca65']

917 filenames = ['*.s']

918 url = 'https://cc65.github.io'

919 version_added = '1.6'

920

921 flags = re.IGNORECASE

922

923 tokens = {

924 'root': [

925 (r';.*', Comment.Single),

926 (r'\s+', Whitespace),

927 (r'[a-z_.@$][\w.@$]*:', Name.Label),

928 (r'((ld|st)[axy]|(in|de)[cxy]|asl|lsr|ro[lr]|adc|sbc|cmp|cp[xy]'

929 r'|cl[cvdi]|se[cdi]|jmp|jsr|bne|beq|bpl|bmi|bvc|bvs|bcc|bcs'

930 r'|p[lh][ap]|rt[is]|brk|nop|ta[xy]|t[xy]a|txs|tsx|and|ora|eor'

931 r'|bit)\b', Keyword),

932 (r'\.\w+', Keyword.Pseudo),

933 (r'[-+~*/^&|!<>=]', Operator),

934 (r'"[^"\n]*.', String),

935 (r"'[^'\n]*.", String.Char),

936 (r'\$[0-9a-f]+|[0-9a-f]+h\b', Number.Hex),

937 (r'\d+', Number.Integer),

938 (r'%[01]+', Number.Bin),

939 (r'[#,.:()=\[\]]', Punctuation),

940 (r'[a-z_.@$][\w.@$]*', Name),

941 ]

942 }

943

944 def analyse_text(self, text):

945 # comments in GAS start with "#"

946 if re.search(r'^\s*;', text, re.MULTILINE):

947 return 0.9

948

949

950class Dasm16Lexer(RegexLexer):

951 """

952 For DCPU-16 Assembly.

953 """

954 name = 'DASM16'

955 url = 'http://0x10c.com/doc/dcpu-16.txt'

956 aliases = ['dasm16']

957 filenames = ['*.dasm16', '*.dasm']

958 mimetypes = ['text/x-dasm16']

959 version_added = '2.4'

960

961 INSTRUCTIONS = [

962 'SET',

963 'ADD', 'SUB',

964 'MUL', 'MLI',

965 'DIV', 'DVI',

966 'MOD', 'MDI',

967 'AND', 'BOR', 'XOR',

968 'SHR', 'ASR', 'SHL',

969 'IFB', 'IFC', 'IFE', 'IFN', 'IFG', 'IFA', 'IFL', 'IFU',

970 'ADX', 'SBX',

971 'STI', 'STD',

972 'JSR',

973 'INT', 'IAG', 'IAS', 'RFI', 'IAQ', 'HWN', 'HWQ', 'HWI',

974 ]

975

976 REGISTERS = [

977 'A', 'B', 'C',

978 'X', 'Y', 'Z',

979 'I', 'J',

980 'SP', 'PC', 'EX',

981 'POP', 'PEEK', 'PUSH'

982 ]

983

984 # Regexes yo

985 char = r'[a-zA-Z0-9_$@.]'

986 identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)'

987 number = r'[+-]?(?:0[xX][a-zA-Z0-9]+|\d+)'

988 binary_number = r'0b[01_]+'

989 instruction = r'(?i)(' + '|'.join(INSTRUCTIONS) + ')'

990 single_char = r"'\\?" + char + "'"

991 string = r'"(\\"|[^"])*"'

992

993 def guess_identifier(lexer, match):

994 ident = match.group(0)

995 klass = Name.Variable if ident.upper() in lexer.REGISTERS else Name.Label

996 yield match.start(), klass, ident

997

998 tokens = {

999 'root': [

1000 include('whitespace'),

1001 (':' + identifier, Name.Label),

1002 (identifier + ':', Name.Label),

1003 (instruction, Name.Function, 'instruction-args'),

1004 (r'\.' + identifier, Name.Function, 'data-args'),

1005 (r'[\r\n]+', Whitespace)

1006 ],

1007

1008 'numeric' : [

1009 (binary_number, Number.Integer),

1010 (number, Number.Integer),

1011 (single_char, String),

1012 ],

1013

1014 'arg' : [

1015 (identifier, guess_identifier),

1016 include('numeric')

1017 ],

1018

1019 'deref' : [

1020 (r'\+', Punctuation),

1021 (r'\]', Punctuation, '#pop'),

1022 include('arg'),

1023 include('whitespace')

1024 ],

1025

1026 'instruction-line' : [

1027 (r'[\r\n]+', Whitespace, '#pop'),

1028 (r';.*?$', Comment, '#pop'),

1029 include('whitespace')

1030 ],

1031

1032 'instruction-args': [

1033 (r',', Punctuation),

1034 (r'\[', Punctuation, 'deref'),

1035 include('arg'),

1036 include('instruction-line')

1037 ],

1038

1039 'data-args' : [

1040 (r',', Punctuation),

1041 include('numeric'),

1042 (string, String),

1043 include('instruction-line')

1044 ],

1045

1046 'whitespace': [

1047 (r'\n', Whitespace),

1048 (r'\s+', Whitespace),

1049 (r';.*?\n', Comment)

1050 ],

1051 }