Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/archinfo/arch.py: 56%

521 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-25 06:15 +0000

1import logging 

2from typing import Dict, List, Tuple, Optional 

3import struct as _struct 

4import platform as _platform 

5import re 

6 

7from archinfo.types import RegisterOffset, RegisterName 

8from .archerror import ArchError 

9from .tls import TLSArchInfo 

10 

11import copy 

12 

13log = logging.getLogger("archinfo.arch") 

14log.addHandler(logging.NullHandler()) 

15 

16try: 

17 import pyvex as _pyvex 

18except ImportError: 

19 _pyvex = None 

20 

21try: 

22 import unicorn as _unicorn 

23except ImportError: 

24 _unicorn = None 

25 

26try: 

27 import capstone as _capstone 

28except ImportError: 

29 _capstone = None 

30 

31try: 

32 import keystone as _keystone 

33except ImportError: 

34 _keystone = None 

35 

36 

37class Endness: # pylint: disable=no-init 

38 """Endness specifies the byte order for integer values 

39 

40 :cvar LE: little endian, least significant byte is stored at lowest address 

41 :cvar BE: big endian, most significant byte is stored at lowest address 

42 :cvar ME: Middle-endian. Yep. 

43 """ 

44 

45 LE = "Iend_LE" 

46 BE = "Iend_BE" 

47 ME = "Iend_ME" 

48 

49 

50class Register: 

51 """ 

52 A collection of information about a register. Each different architecture 

53 has its own list of registers, which is the base for all other 

54 register-related collections. 

55 

56 It is, just like for Arch object, assumed that the information is compatible 

57 with PyVEX. 

58 

59 :ivar str name: The name of the register 

60 :ivar int size: The size of the register (in bytes) 

61 :ivar int vex_offset: The VEX offset used to identify this register 

62 :ivar str vex_name: The name libVEX uses to identify the register 

63 :ivar list subregisters: The list of subregisters in the form (name, offset from vex_offset, size) 

64 :ivar tuple alias_names: The list of possible alias names 

65 :ivar bool general_purpose: Whether this is a general purpose register 

66 :ivar bool floating_point: Whether this is a floating-point register 

67 :ivar bool vector: Whether this is a vector register 

68 :ivar bool argument: Whether this is an argument register 

69 :ivar bool persistent: Whether this is a persistent register 

70 :ivar tuple default_value: The offset of the instruction pointer in the register file 

71 :ivar int, str linux_entry_value: The offset of the instruction pointer in the register file 

72 :ivar bool concretize_unique: Whether this register should be concretized, if unique, at the end of each block 

73 :ivar bool concrete: Whether this register should be considered during the synchronization of the concrete execution 

74 of the process 

75 :ivar bool artificial: Whether this register is an artificial register added by VEX IR or other ILs. 

76 """ 

77 

78 def __init__( 

79 self, 

80 name, 

81 size, 

82 vex_offset=None, 

83 vex_name=None, 

84 subregisters=None, 

85 alias_names=None, 

86 general_purpose=False, 

87 floating_point=False, 

88 vector=False, 

89 argument=False, 

90 persistent=False, 

91 default_value=None, 

92 linux_entry_value=None, 

93 concretize_unique=False, 

94 concrete=True, 

95 artificial=False, 

96 ): 

97 self.name: RegisterName = name 

98 self.size: int = size 

99 self.vex_offset: RegisterOffset = vex_offset 

100 self.vex_name = vex_name 

101 self.subregisters: List[Tuple[RegisterName, RegisterOffset, int]] = [] if subregisters is None else subregisters 

102 self.alias_names = () if alias_names is None else alias_names 

103 self.general_purpose = general_purpose 

104 self.floating_point = floating_point 

105 self.vector = vector 

106 self.argument = argument 

107 self.persistent = persistent 

108 self.default_value = default_value 

109 self.linux_entry_value = linux_entry_value 

110 self.concretize_unique = concretize_unique 

111 self.concrete = concrete 

112 self.artificial = artificial 

113 

114 def __repr__(self): 

115 return f"<Register {self.name}>" 

116 

117 

118class Arch: 

119 """ 

120 A collection of information about a given architecture. This class should be subclasses for each different 

121 architecture, and then that subclass should be registered with the ``register_arch`` method. 

122 

123 A good number of assumptions are made that code is being processed under the VEX IR - for instance, it is expected 

124 the register file offsets are expected to match code generated by PyVEX. 

125 

126 Arches may be compared with == and !=. 

127 

128 :ivar str name: The name of the arch 

129 :ivar int bits: The number of bits in a word 

130 :ivar str vex_arch: The VEX enum name used to identify this arch 

131 :ivar str qemu_name: The name used by QEMU to identify this arch 

132 :ivar str ida_processor: The processor string used by IDA to identify this arch 

133 :ivar str triplet: The triplet used to identify a linux system on this arch 

134 :ivar int max_inst_bytes: The maximum number of bytes in a single instruction 

135 :ivar int ip_offset: The offset of the instruction pointer in the register file 

136 :ivar int sp_offset: The offset of the stack pointer in the register file 

137 :ivar int bp_offset: The offset of the base pointer in the register file 

138 :ivar int lr_offset: The offset of the link register (return address) in the register file 

139 :ivar int ret_offset: The offset of the return value register in the register file 

140 :ivar bool vex_conditional_helpers: Whether libVEX will generate code to process the conditional flags for this 

141 arch using ccalls 

142 :ivar int syscall_num_offset: The offset in the register file where the syscall number is stored 

143 :ivar bool call_pushes_ret: Whether this arch's call instruction causes a stack push 

144 :ivar int stack_change: The change to the stack pointer caused by a push instruction 

145 :ivar str memory_endness: The endness of memory, as a VEX enum 

146 :ivar str register_endness: The endness of registers, as a VEX enum. Should usually be same as above 

147 :ivar str instruction_endness: The endness of instructions stored in memory. 

148 In other words, this controls whether instructions are stored endian-flipped compared to their description 

149 in the ISA manual, and should be flipped when lifted. Iend_BE means "don't flip" 

150 NOTE: Only used for non-libVEX lifters. 

151 :ivar dict sizeof: A mapping from C type to variable size in bits 

152 :ivar cs_arch: The Capstone arch value for this arch 

153 :ivar cs_mode: The Capstone mode value for this arch 

154 :ivar ks_arch: The Keystone arch value for this arch 

155 :ivar ks_mode: The Keystone mode value for this arch 

156 :ivar uc_arch: The Unicorn engine arch value for this arch 

157 :ivar uc_mode: The Unicorn engine mode value for this arch 

158 :ivar uc_const: The Unicorn engine constants module for this arch 

159 :ivar uc_prefix: The prefix used for variables in the Unicorn engine constants module 

160 :ivar list function_prologs: A list of regular expressions matching the bytes for common function prologues 

161 :ivar list function_epilogs: A list of regular expressions matching the bytes for common function epilogues 

162 :ivar str ret_instruction: The bytes for a return instruction 

163 :ivar str nop_instruction: The bytes for a nop instruction 

164 :ivar int instruction_alignment: The instruction alignment requirement 

165 :ivar list default_register_values: A weird listing describing how registers should be initialized for purposes of 

166 sanity 

167 :ivar dict entry_register_values: A mapping from register name to a description of the value that should be in it 

168 at program entry on linux 

169 :ivar list default_symbolic_register: Honestly, who knows what this is supposed to do. Fill it with the names of 

170 the general purpose registers. 

171 :ivar dict register_names: A mapping from register file offset to register name 

172 :ivar dict registers: A mapping from register name to a tuple of (register file offset, size in bytes) 

173 :ivar list lib_paths: A listing of common locations where shared libraries for this architecture may be found 

174 :ivar str got_section_name: The name of the GOT section in ELFs 

175 :ivar str ld_linux_name: The name of the linux dynamic loader program 

176 :cvar int byte_width: the number of bits in a byte. 

177 :ivar TLSArchInfo elf_tls: A description of how thread-local storage works 

178 :cvar List[str] dwarf_registers: A list of register names in the order specified in the DWARF specification of the 

179 corresponding arcitecture. 

180 """ 

181 

182 byte_width = 8 

183 instruction_endness = "Iend_BE" 

184 elf_tls: TLSArchInfo = None 

185 dwarf_registers: List[str] = [] 

186 

187 def __init__(self, endness, instruction_endness=None): 

188 self.bytes = self.bits // self.byte_width 

189 

190 if endness not in (Endness.LE, Endness.BE, Endness.ME): 

191 raise ArchError("Must pass a valid endness: Endness.LE, Endness.BE, or Endness.ME") 

192 

193 if instruction_endness is not None: 

194 self.instruction_endness = instruction_endness 

195 

196 if self.vex_support and _pyvex: 

197 self.vex_archinfo = _pyvex.enums.default_vex_archinfo() 

198 

199 if endness == Endness.BE: 

200 if self.vex_archinfo: 

201 self.vex_archinfo["endness"] = _pyvex.enums.vex_endness_from_string("VexEndnessBE") 

202 self.memory_endness = Endness.BE 

203 self.register_endness = Endness.BE 

204 if _capstone and self.cs_mode is not None: 

205 self.cs_mode -= _capstone.CS_MODE_LITTLE_ENDIAN 

206 self.cs_mode += _capstone.CS_MODE_BIG_ENDIAN 

207 if _keystone and self.ks_mode is not None: 

208 self.ks_mode -= _keystone.KS_MODE_LITTLE_ENDIAN 

209 self.ks_mode += _keystone.KS_MODE_BIG_ENDIAN 

210 self.ret_instruction = reverse_ends(self.ret_instruction) 

211 self.nop_instruction = reverse_ends(self.nop_instruction) 

212 

213 if self.register_list and _pyvex is not None: 

214 (_, _), max_offset = max(_pyvex.vex_ffi.guest_offsets.items(), key=lambda x: x[1]) 

215 max_offset += self.bits 

216 # Register collections 

217 if isinstance(self.vex_arch, str): 

218 va = self.vex_arch[7:].lower() # pylint: disable=unsubscriptable-object 

219 for r in self.register_list: 

220 if r.vex_offset is None: 

221 for name in (r.vex_name, r.name) + r.alias_names: 

222 try: 

223 r.vex_offset = _pyvex.vex_ffi.guest_offsets[(va, name)] 

224 except KeyError: 

225 r.vex_offset = max_offset 

226 max_offset += r.size 

227 else: 

228 break 

229 

230 self.default_register_values = [ 

231 (r.name,) + r.default_value for r in self.register_list if r.default_value is not None 

232 ] 

233 self.entry_register_values = { 

234 r.name: r.linux_entry_value for r in self.register_list if r.linux_entry_value is not None 

235 } 

236 self.default_symbolic_registers = [r.name for r in self.register_list if r.general_purpose] 

237 self.register_names = {r.vex_offset: r.name for r in self.register_list} 

238 self.registers = self._get_register_dict() 

239 self.argument_registers = {r.vex_offset for r in self.register_list if r.argument} 

240 self.persistent_regs = [r.name for r in self.register_list if r.persistent] 

241 self.concretize_unique_registers = {r.vex_offset for r in self.register_list if r.concretize_unique} 

242 self.artificial_registers = {r.name for r in self.register_list if r.artificial} 

243 self.cpu_flag_register_offsets_and_bitmasks_map = {} 

244 self.reg_blacklist = [] 

245 self.reg_blacklist_offsets = [] 

246 

247 # Artificial registers offsets 

248 self.artificial_registers_offsets = [] 

249 for reg_name in self.artificial_registers: 

250 reg = self.get_register_by_name(reg_name) 

251 self.artificial_registers_offsets.extend(range(reg.vex_offset, reg.vex_offset + reg.size)) 

252 

253 # Register offsets 

254 try: 

255 self.ip_offset = self.registers["ip"][0] 

256 self.sp_offset = self.registers["sp"][0] 

257 self.bp_offset = self.registers["bp"][0] 

258 self.lr_offset = self.registers.get("lr", (None, None))[0] 

259 except KeyError: 

260 pass 

261 

262 # generate register mapping (offset, size): name 

263 self.register_size_names = {} 

264 for reg in self.register_list: 

265 if reg.vex_offset is None: 

266 continue 

267 self.register_size_names[(reg.vex_offset, reg.size)] = reg.name 

268 for name, off, sz in reg.subregisters: 

269 # special hacks for X86 and AMD64 - don't translate register names to bp, sp, etc. 

270 if self.name in {"X86", "AMD64"} and name in {"bp", "sp", "ip"}: 

271 continue 

272 self.register_size_names[(reg.vex_offset + off, sz)] = name 

273 

274 # allow mapping a sub-register to its base register 

275 self.subregister_map = {} 

276 for reg in self.register_list: 

277 if reg.vex_offset is None: 

278 continue 

279 base_reg = reg.vex_offset, reg.size 

280 self.subregister_map[(reg.vex_offset, reg.size)] = base_reg 

281 self.subregister_map[reg.vex_offset] = base_reg 

282 for name, off, sz in reg.subregisters: 

283 if self.name in {"X86", "AMD64"} and name in {"bp", "sp", "ip"}: 

284 continue 

285 subreg_offset = reg.vex_offset + off 

286 self.subregister_map[(subreg_offset, sz)] = base_reg 

287 if subreg_offset not in self.subregister_map: 

288 self.subregister_map[subreg_offset] = base_reg 

289 

290 # Unicorn specific stuff 

291 if self.uc_mode is not None: 

292 if endness == Endness.BE: 

293 self.uc_mode -= _unicorn.UC_MODE_LITTLE_ENDIAN 

294 self.uc_mode += _unicorn.UC_MODE_BIG_ENDIAN 

295 self.uc_regs = {} 

296 # map register names to Unicorn const 

297 for r in self.register_names.values(): 

298 reg_name = self.uc_prefix + "REG_" + r.upper() 

299 if hasattr(self.uc_const, reg_name): 

300 self.uc_regs[r] = getattr(self.uc_const, reg_name) 

301 

302 # VEX register offset to unicorn register ID map 

303 self.vex_to_unicorn_map = {} 

304 pc_reg_name = self.get_register_by_name("pc") 

305 for reg_name, unicorn_reg_id in self.uc_regs.items(): 

306 if reg_name == pc_reg_name: 

307 continue 

308 

309 vex_reg = self.get_register_by_name(reg_name) 

310 self.vex_to_unicorn_map[vex_reg.vex_offset] = (unicorn_reg_id, vex_reg.size) 

311 

312 # VEX registers used in lieu of flags register 

313 self.vex_cc_regs = [] 

314 vex_cc_register_names = ["cc_op", "cc_dep1", "cc_dep2", "cc_ndep"] 

315 for reg_name in vex_cc_register_names: 

316 vex_flag_reg = self.get_register_by_name(reg_name) 

317 if vex_flag_reg is not None: 

318 self.vex_cc_regs.append(vex_flag_reg) 

319 

320 def copy(self): 

321 """ 

322 Produce a copy of this instance of this arch. 

323 """ 

324 res = copy.copy(self) 

325 res.vex_archinfo = copy.deepcopy(self.vex_archinfo) 

326 res._cs = None 

327 res._ks = None 

328 return res 

329 

330 def __repr__(self): 

331 return f"<Arch {self.name} ({self.memory_endness[-2:]})>" 

332 

333 def __hash__(self): 

334 return hash((self.name, self.bits, self.memory_endness)) 

335 

336 def __eq__(self, other): 

337 if not isinstance(other, Arch): 

338 return False 

339 return self.name == other.name and self.bits == other.bits and self.memory_endness == other.memory_endness 

340 

341 def __ne__(self, other): 

342 return not self == other 

343 

344 def __getstate__(self): 

345 self._cs = None 

346 self._ks = None 

347 if self.vex_archinfo is not None: 

348 # clear hwcacheinfo-caches because it may contain cffi.CData 

349 self.vex_archinfo["hwcache_info"]["caches"] = None 

350 return self.__dict__ 

351 

352 def __setstate__(self, data): 

353 self.__dict__.update(data) 

354 

355 def get_register_by_name(self, reg_name): 

356 """ 

357 Return the Register object associated with the given name. 

358 This includes subregisters. 

359 

360 For example, if you are operating in a platform-independent 

361 setting, and wish to address "whatever the stack pointer is" 

362 you could pass 'sp' here, and get Register(...r13...) back 

363 on an ARM platform. 

364 """ 

365 for r in self.register_list: 

366 if ( 

367 reg_name == r.name 

368 or reg_name in r.alias_names 

369 or reg_name in [sub_reg[0] for sub_reg in r.subregisters] 

370 ): 

371 return r 

372 return None 

373 

374 def get_default_reg_value(self, register): 

375 if register == "sp": 

376 # Convert it to the corresponding register name 

377 registers = [r for r, v in self.registers.items() if v[0] == self.sp_offset] 

378 if len(registers) > 0: 

379 register = registers[0] 

380 else: 

381 return None 

382 for reg, val, _, _ in self.default_register_values: 

383 if reg == register: 

384 return val 

385 return None 

386 

387 def struct_fmt(self, size=None, signed=False, endness=None): 

388 """ 

389 Produce a format string for use in python's ``struct`` module to decode a single word. 

390 

391 :param int size: The size in bytes to pack/unpack. Defaults to wordsize 

392 :param bool signed: Whether the data should be extracted signed/unsigned. Default unsigned 

393 :param str endness: The endian to use in packing/unpacking. Defaults to memory endness 

394 :return str: A format string with an endness modifier and a single format character 

395 """ 

396 if size is None: 

397 size = self.bytes 

398 if endness is None: 

399 endness = self.memory_endness 

400 

401 if endness == Endness.BE: 

402 fmt_end = ">" 

403 elif endness == Endness.LE: 

404 fmt_end = "<" 

405 elif endness == Endness.ME: 

406 raise ValueError("Please don't middle-endian at me, I'm begging you") 

407 else: 

408 raise ValueError("Invalid endness value: %r" % endness) 

409 

410 if size == 8: 

411 fmt_size = "Q" 

412 elif size == 4: 

413 fmt_size = "I" 

414 elif size == 2: 

415 fmt_size = "H" 

416 elif size == 1: 

417 fmt_size = "B" 

418 else: 

419 raise ValueError("Invalid size: Must be a integer power of 2 less than 16") 

420 

421 if signed: 

422 fmt_size = fmt_size.lower() 

423 

424 return fmt_end + fmt_size 

425 

426 def _get_register_dict(self) -> Dict[RegisterName, Tuple[RegisterOffset, int]]: 

427 res = {} 

428 for r in self.register_list: 

429 if r.vex_offset is None: 

430 continue 

431 res[r.name] = (r.vex_offset, r.size) 

432 for i in r.alias_names: 

433 res[i] = (r.vex_offset, r.size) 

434 for reg, offset, size in r.subregisters: 

435 res[reg] = (r.vex_offset + offset, size) 

436 return res 

437 

438 # e.g. sizeof['int'] = 32 

439 sizeof = {} 

440 

441 @property 

442 def capstone(self): 

443 """ 

444 A Capstone instance for this arch 

445 """ 

446 if _capstone is None: 

447 raise Exception("Capstone is not installed!") 

448 if self.cs_arch is None: 

449 raise ArchError("Arch %s does not support disassembly with Capstone" % self.name) 

450 if self._cs is None: 

451 self._cs = _capstone.Cs(self.cs_arch, self.cs_mode) 

452 self._configure_capstone() 

453 self._cs.detail = True 

454 return self._cs 

455 

456 @property 

457 def keystone(self): 

458 """ 

459 A Keystone instance for this arch 

460 """ 

461 if self._ks is None: 

462 if _keystone is None: 

463 raise Exception("Keystone is not installed!") 

464 if self.ks_arch is None: 

465 raise ArchError("Arch %s does not support disassembly with Keystone" % self.name) 

466 self._ks = _keystone.Ks(self.ks_arch, self.ks_mode) 

467 self._configure_keystone() 

468 return self._ks 

469 

470 def _configure_capstone(self): 

471 pass 

472 

473 def _configure_keystone(self): 

474 pass 

475 

476 @property 

477 def unicorn(self): 

478 """ 

479 A Unicorn engine instance for this arch 

480 """ 

481 if _unicorn is None or self.uc_arch is None: 

482 raise ArchError("Arch %s does not support with Unicorn" % self.name) 

483 # always create a new Unicorn instance 

484 return _unicorn.Uc(self.uc_arch, self.uc_mode) 

485 

486 def asm(self, string, addr=0, as_bytes=True, thumb=False): 

487 """ 

488 Compile the assembly instruction represented by string using Keystone 

489 

490 :param string: The textual assembly instructions, separated by semicolons 

491 :param addr: The address at which the text should be assembled, to deal with PC-relative access. Default 0 

492 :param as_bytes: Set to False to return a list of integers instead of a python byte string 

493 :param thumb: If working with an ARM processor, set to True to assemble in thumb mode. 

494 :return: The assembled bytecode 

495 """ 

496 if thumb and not hasattr(self, "keystone_thumb"): 

497 log.warning("Specified thumb=True on non-ARM architecture") 

498 thumb = False 

499 ks = self.keystone_thumb if thumb else self.keystone # pylint: disable=no-member 

500 

501 try: 

502 encoding, _ = ks.asm(string, addr, as_bytes) # pylint: disable=too-many-function-args 

503 except TypeError: 

504 bytelist, _ = ks.asm(string, addr) 

505 if as_bytes: 

506 if bytes is str: 

507 encoding = "".join(chr(c) for c in bytelist) 

508 else: 

509 encoding = bytes(bytelist) 

510 else: 

511 encoding = bytelist 

512 

513 return encoding 

514 

515 def disasm(self, bytestring, addr=0, thumb=False): 

516 if thumb and not hasattr(self, "capstone_thumb"): 

517 log.warning("Specified thumb=True on non-ARM architecture") 

518 thumb = False 

519 cs = self.capstone_thumb if thumb else self.capstone # pylint: disable=no-member 

520 return "\n".join(f"{insn.address:#x}:\t{insn.mnemonic} {insn.op_str}" for insn in cs.disasm(bytestring, addr)) 

521 

522 def translate_dynamic_tag(self, tag): 

523 try: 

524 return self.dynamic_tag_translation[tag] 

525 except KeyError: 

526 if isinstance(tag, int): 

527 log.error("Please look up and add dynamic tag type %#x for %s", tag, self.name) 

528 return tag 

529 

530 def translate_symbol_type(self, tag): 

531 try: 

532 return self.symbol_type_translation[tag] 

533 except KeyError: 

534 if isinstance(tag, int): 

535 log.error("Please look up and add symbol type %#x for %s", tag, self.name) 

536 return tag 

537 

538 def translate_register_name(self, offset, size=None): 

539 if size is not None: 

540 try: 

541 return self.register_size_names[(offset, size)] 

542 except KeyError: 

543 pass 

544 

545 try: 

546 return self.register_names[offset] 

547 except KeyError: 

548 return str(offset) 

549 

550 def get_base_register(self, offset, size=None): 

551 """ 

552 Convert a register or sub-register to its base register's offset. 

553 

554 :param int offset: The offset of the register to look up for. 

555 :param int size: Size of the register. 

556 :return: Offset and size of the base register, or None if no base register is found. 

557 """ 

558 

559 if size is None: 

560 key = offset 

561 else: 

562 key = (offset, size) 

563 

564 return self.subregister_map.get(key, None) 

565 

566 def get_register_offset(self, name): 

567 try: 

568 return self.registers[name][0] 

569 except KeyError as e: 

570 raise ValueError("Register %s does not exist!" % name) from e 

571 

572 def is_artificial_register(self, offset, size): 

573 r = self.get_base_register(offset, size) 

574 if r is None: 

575 return False 

576 r_offset, _ = r 

577 

578 try: 

579 r_name = self.register_names[r_offset] 

580 except KeyError: 

581 return False 

582 

583 return r_name in self.artificial_registers 

584 

585 # Determined by watching the output of strace ld-linux.so.2 --list --inhibit-cache 

586 def library_search_path(self, pedantic=False): 

587 """ 

588 A list of paths in which to search for shared libraries. 

589 """ 

590 

591 def subfunc(x): 

592 return x.replace("${TRIPLET}", self.triplet).replace("${ARCH}", self.linux_name) 

593 

594 path = ["/lib/${TRIPLET}/", "/usr/lib/${TRIPLET}/", "/lib/", "/usr/lib", "/usr/${TRIPLET}/lib/"] 

595 if self.bits == 64: 

596 path.append("/usr/${TRIPLET}/lib64/") 

597 path.append("/usr/lib64/") 

598 path.append("/lib64/") 

599 elif self.bits == 32: 

600 path.append("/usr/${TRIPLET}/lib32/") 

601 path.append("/usr/lib32/") 

602 path.append("/lib32/") 

603 

604 if pedantic: 

605 path = sum([[x + "tls/${ARCH}/", x + "tls/", x + "${ARCH}/", x] for x in path], []) 

606 return list(map(subfunc, path)) 

607 

608 def m_addr(self, addr, *args, **kwargs): 

609 """ 

610 Given the address of some code block, convert it to the address where this block 

611 is stored in memory. The memory address can also be referred to as the "real" address. 

612 

613 :param addr: The address to convert. 

614 :return: The "real" address in memory. 

615 :rtype: int 

616 """ 

617 return addr 

618 

619 def x_addr(self, addr, *args, **kwargs): 

620 """ 

621 Given the address of some code block, convert it to the value that should be assigned 

622 to the instruction pointer register in order to execute the code in that block. 

623 

624 :param addr: The address to convert. 

625 :return: The "execution" address. 

626 :rtype: int 

627 """ 

628 return addr 

629 

630 def is_thumb(self, addr): # pylint:disable=unused-argument 

631 """ 

632 Return True, if the address is the THUMB address. False otherwise. 

633 

634 For non-ARM architectures this method always returns False. 

635 

636 :param addr: The address to check. 

637 :return: Whether the given address is the THUMB address. 

638 """ 

639 return False 

640 

641 @property 

642 def vex_support(self): 

643 """ 

644 Whether the architecture is supported by VEX or not. 

645 

646 :return: True if this Arch is supported by VEX, False otherwise. 

647 :rtype: bool 

648 """ 

649 

650 return self.vex_arch is not None 

651 

652 @property 

653 def unicorn_support(self): 

654 """ 

655 Whether the architecture is supported by Unicorn engine or not, 

656 

657 :return: True if this Arch is supported by the Unicorn engine, False otherwise. 

658 :rtype: bool 

659 """ 

660 

661 return self.qemu_name is not None 

662 

663 @property 

664 def capstone_support(self): 

665 """ 

666 Whether the architecture is supported by the Capstone engine or not. 

667 

668 :return: True if this Arch is supported by the Capstone engine, False otherwise. 

669 :rtype: bool 

670 """ 

671 

672 return self.cs_arch is not None 

673 

674 @property 

675 def keystone_support(self): 

676 """ 

677 Whether the architecture is supported by the Keystone engine or not. 

678 

679 :return: True if this Arch is supported by the Keystone engine, False otherwise. 

680 :rtype: bool 

681 """ 

682 

683 return self.ks_arch is not None 

684 

685 address_types = (int,) 

686 function_address_types = (int,) 

687 

688 # various names 

689 name: str 

690 vex_arch = None 

691 qemu_name = None 

692 ida_processor = None 

693 linux_name = None 

694 triplet = None 

695 

696 # instruction stuff 

697 max_inst_bytes = None 

698 ret_instruction = b"" 

699 nop_instruction = b"" 

700 instruction_alignment = None 

701 

702 # register offsets 

703 ip_offset: Optional[RegisterOffset] = None 

704 sp_offset: Optional[RegisterOffset] = None 

705 bp_offset: Optional[RegisterOffset] = None 

706 ret_offset: Optional[RegisterOffset] = None 

707 fp_ret_offset: Optional[RegisterOffset] = None 

708 lr_offset: Optional[RegisterOffset] = None 

709 

710 # whether or not VEX has ccall handlers for conditionals for this arch 

711 vex_conditional_helpers = False 

712 

713 # memory stuff 

714 bits = None 

715 memory_endness = Endness.LE 

716 register_endness = Endness.LE 

717 stack_change = None 

718 

719 # is it safe to cache IRSBs? 

720 cache_irsb = True 

721 

722 branch_delay_slot = False 

723 

724 function_prologs = set() 

725 function_epilogs = set() 

726 

727 # Capstone stuff 

728 cs_arch = None 

729 cs_mode = None 

730 _cs = None 

731 

732 # Keystone stuff 

733 ks_arch = None 

734 ks_mode = None 

735 _ks = None 

736 

737 # Unicorn stuff 

738 uc_arch = None 

739 uc_mode = None 

740 uc_const = None 

741 uc_prefix = None 

742 uc_regs = None 

743 artificial_registers_offsets = None 

744 artificial_registers = None 

745 cpu_flag_register_offsets_and_bitmasks_map = None 

746 reg_blacklist = None 

747 reg_blacklist_offsets = None 

748 vex_to_unicorn_map = None 

749 vex_cc_regs = None 

750 

751 call_pushes_ret = False 

752 initial_sp = 0x7FFF0000 

753 

754 # Difference of the stack pointer after a call instruction (or its equivalent) is executed 

755 call_sp_fix = 0 

756 

757 stack_size = 0x8000000 

758 

759 # Register information 

760 register_list: List[Register] = [] 

761 default_register_values = [] 

762 entry_register_values = {} 

763 default_symbolic_registers = [] 

764 registers: Dict[RegisterName, Tuple[RegisterOffset, int]] = {} 

765 register_names: Dict[RegisterOffset, RegisterName] = {} 

766 argument_registers = set() 

767 argument_register_positions = {} 

768 persistent_regs = [] 

769 concretize_unique_registers = ( 

770 set() 

771 ) # this is a list of registers that should be concretized, if unique, at the end of each block 

772 

773 lib_paths = [] 

774 reloc_s_a = [] 

775 reloc_b_a = [] 

776 reloc_s = [] 

777 reloc_copy = [] 

778 reloc_tls_mod_id = [] 

779 reloc_tls_doffset = [] 

780 reloc_tls_offset = [] 

781 dynamic_tag_translation = {} 

782 symbol_type_translation = {} 

783 got_section_name = "" 

784 

785 vex_archinfo = None 

786 

787 

788arch_id_map = [] 

789 

790_all_arches_set = set() # for deduplication 

791all_arches = [] 

792 

793 

794def _append_arch_unique(my_arch: Arch) -> bool: 

795 if my_arch in _all_arches_set: 

796 return False 

797 _all_arches_set.add(my_arch) 

798 all_arches.append(my_arch) 

799 return True 

800 

801 

802def register_arch(regexes, bits, endness, my_arch): 

803 """ 

804 Register a new architecture. 

805 Architectures are loaded by their string name using ``arch_from_id()``, and 

806 this defines the mapping it uses to figure it out. 

807 Takes a list of regular expressions, and an Arch class as input. 

808 

809 :param regexes: List of regular expressions (str or SRE_Pattern) 

810 :type regexes: list 

811 :param bits: The canonical "bits" of this architecture, ex. 32 or 64 

812 :type bits: int 

813 :param endness: The "endness" of this architecture. Use Endness.LE, Endness.BE, Endness.ME, "any", or None if the 

814 architecture has no intrinsic endianness. 

815 :type endness: str or None 

816 :param class my_arch: 

817 :return: None 

818 """ 

819 if not isinstance(regexes, list): 

820 raise TypeError("regexes must be a list") 

821 for rx in regexes: 

822 if not isinstance(rx, str) and not isinstance(rx, re._pattern_type): 

823 raise TypeError("Each regex must be a string or compiled regular expression") 

824 try: 

825 re.compile(rx) 

826 except re.error as e: 

827 raise ValueError("Invalid Regular Expression %s" % rx) from e 

828 # if not isinstance(my_arch,Arch): 

829 # raise TypeError("Arch must be a subclass of archinfo.Arch") 

830 if not isinstance(bits, int): 

831 raise TypeError("Bits must be an int") 

832 if endness is not None: 

833 if endness not in (Endness.BE, Endness.LE, Endness.ME, "any"): 

834 raise TypeError("Endness must be Endness.BE, Endness.LE, or 'any'") 

835 arch_id_map.append((regexes, bits, endness, my_arch)) 

836 if endness == "any": 

837 _append_arch_unique(my_arch(Endness.BE)) 

838 _append_arch_unique(my_arch(Endness.LE)) 

839 else: 

840 _append_arch_unique(my_arch(endness)) 

841 

842 

843class ArchNotFound(Exception): 

844 pass 

845 

846 

847def arch_from_id(ident, endness="any", bits="") -> Arch: 

848 """ 

849 Take our best guess at the arch referred to by the given identifier, and return an instance of its class. 

850 

851 You may optionally provide the ``endness`` and ``bits`` parameters (strings) to help this function out. 

852 """ 

853 if bits == 64 or (isinstance(bits, str) and "64" in bits): 

854 bits = 64 

855 elif isinstance(bits, str) and "32" in bits: 

856 bits = 32 

857 elif not bits and "64" in ident: 

858 bits = 64 

859 elif not bits and "32" in ident: 

860 bits = 32 

861 

862 endness = endness.lower() 

863 if "lit" in endness: 

864 endness = Endness.LE 

865 elif "big" in endness: 

866 endness = Endness.BE 

867 elif "lsb" in endness: 

868 endness = Endness.LE 

869 elif "msb" in endness: 

870 endness = Endness.BE 

871 elif "le" in endness: 

872 endness = Endness.LE 

873 elif "be" in endness: 

874 endness = Endness.BE 

875 elif "l" in endness: 

876 endness = "unsure" 

877 elif "b" in endness: 

878 endness = "unsure" 

879 else: 

880 endness = "unsure" 

881 ident = ident.lower() 

882 cls = None 

883 aendness = "" 

884 for arxs, abits, aendness, acls in arch_id_map: 

885 found_it = False 

886 for rx in arxs: 

887 if re.search(rx, ident): 

888 found_it = True 

889 break 

890 if not found_it: 

891 continue 

892 if bits and bits != abits: 

893 continue 

894 if aendness == "any" or endness == aendness or endness == "unsure": 

895 cls = acls 

896 break 

897 if not cls: 

898 raise ArchNotFound( 

899 f"Can't find architecture info for architecture {ident} with {repr(bits)} bits and {endness} endness" 

900 ) 

901 if endness == "unsure": 

902 if aendness == "any": 

903 # We really don't care, use default 

904 return cls() 

905 else: 

906 # We're expecting the ident to pick the endness. 

907 # ex. 'armeb' means obviously this is Iend_BE 

908 return cls(aendness) 

909 else: 

910 return cls(endness) 

911 

912 

913def reverse_ends(string): 

914 count = (len(string) + 3) // 4 

915 ise = "I" * count 

916 string += b"\x00" * (count * 4 - len(string)) 

917 return _struct.pack(">" + ise, *_struct.unpack("<" + ise, string)) 

918 

919 

920def get_host_arch(): 

921 """ 

922 Return the arch of the machine we are currently running on. 

923 """ 

924 return arch_from_id(_platform.machine())