Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/archinfo/arch.py: 56%
521 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:15 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:15 +0000
1import logging
2from typing import Dict, List, Tuple, Optional
3import struct as _struct
4import platform as _platform
5import re
7from archinfo.types import RegisterOffset, RegisterName
8from .archerror import ArchError
9from .tls import TLSArchInfo
11import copy
13log = logging.getLogger("archinfo.arch")
14log.addHandler(logging.NullHandler())
16try:
17 import pyvex as _pyvex
18except ImportError:
19 _pyvex = None
21try:
22 import unicorn as _unicorn
23except ImportError:
24 _unicorn = None
26try:
27 import capstone as _capstone
28except ImportError:
29 _capstone = None
31try:
32 import keystone as _keystone
33except ImportError:
34 _keystone = None
37class Endness: # pylint: disable=no-init
38 """Endness specifies the byte order for integer values
40 :cvar LE: little endian, least significant byte is stored at lowest address
41 :cvar BE: big endian, most significant byte is stored at lowest address
42 :cvar ME: Middle-endian. Yep.
43 """
45 LE = "Iend_LE"
46 BE = "Iend_BE"
47 ME = "Iend_ME"
50class Register:
51 """
52 A collection of information about a register. Each different architecture
53 has its own list of registers, which is the base for all other
54 register-related collections.
56 It is, just like for Arch object, assumed that the information is compatible
57 with PyVEX.
59 :ivar str name: The name of the register
60 :ivar int size: The size of the register (in bytes)
61 :ivar int vex_offset: The VEX offset used to identify this register
62 :ivar str vex_name: The name libVEX uses to identify the register
63 :ivar list subregisters: The list of subregisters in the form (name, offset from vex_offset, size)
64 :ivar tuple alias_names: The list of possible alias names
65 :ivar bool general_purpose: Whether this is a general purpose register
66 :ivar bool floating_point: Whether this is a floating-point register
67 :ivar bool vector: Whether this is a vector register
68 :ivar bool argument: Whether this is an argument register
69 :ivar bool persistent: Whether this is a persistent register
70 :ivar tuple default_value: The offset of the instruction pointer in the register file
71 :ivar int, str linux_entry_value: The offset of the instruction pointer in the register file
72 :ivar bool concretize_unique: Whether this register should be concretized, if unique, at the end of each block
73 :ivar bool concrete: Whether this register should be considered during the synchronization of the concrete execution
74 of the process
75 :ivar bool artificial: Whether this register is an artificial register added by VEX IR or other ILs.
76 """
78 def __init__(
79 self,
80 name,
81 size,
82 vex_offset=None,
83 vex_name=None,
84 subregisters=None,
85 alias_names=None,
86 general_purpose=False,
87 floating_point=False,
88 vector=False,
89 argument=False,
90 persistent=False,
91 default_value=None,
92 linux_entry_value=None,
93 concretize_unique=False,
94 concrete=True,
95 artificial=False,
96 ):
97 self.name: RegisterName = name
98 self.size: int = size
99 self.vex_offset: RegisterOffset = vex_offset
100 self.vex_name = vex_name
101 self.subregisters: List[Tuple[RegisterName, RegisterOffset, int]] = [] if subregisters is None else subregisters
102 self.alias_names = () if alias_names is None else alias_names
103 self.general_purpose = general_purpose
104 self.floating_point = floating_point
105 self.vector = vector
106 self.argument = argument
107 self.persistent = persistent
108 self.default_value = default_value
109 self.linux_entry_value = linux_entry_value
110 self.concretize_unique = concretize_unique
111 self.concrete = concrete
112 self.artificial = artificial
114 def __repr__(self):
115 return f"<Register {self.name}>"
118class Arch:
119 """
120 A collection of information about a given architecture. This class should be subclasses for each different
121 architecture, and then that subclass should be registered with the ``register_arch`` method.
123 A good number of assumptions are made that code is being processed under the VEX IR - for instance, it is expected
124 the register file offsets are expected to match code generated by PyVEX.
126 Arches may be compared with == and !=.
128 :ivar str name: The name of the arch
129 :ivar int bits: The number of bits in a word
130 :ivar str vex_arch: The VEX enum name used to identify this arch
131 :ivar str qemu_name: The name used by QEMU to identify this arch
132 :ivar str ida_processor: The processor string used by IDA to identify this arch
133 :ivar str triplet: The triplet used to identify a linux system on this arch
134 :ivar int max_inst_bytes: The maximum number of bytes in a single instruction
135 :ivar int ip_offset: The offset of the instruction pointer in the register file
136 :ivar int sp_offset: The offset of the stack pointer in the register file
137 :ivar int bp_offset: The offset of the base pointer in the register file
138 :ivar int lr_offset: The offset of the link register (return address) in the register file
139 :ivar int ret_offset: The offset of the return value register in the register file
140 :ivar bool vex_conditional_helpers: Whether libVEX will generate code to process the conditional flags for this
141 arch using ccalls
142 :ivar int syscall_num_offset: The offset in the register file where the syscall number is stored
143 :ivar bool call_pushes_ret: Whether this arch's call instruction causes a stack push
144 :ivar int stack_change: The change to the stack pointer caused by a push instruction
145 :ivar str memory_endness: The endness of memory, as a VEX enum
146 :ivar str register_endness: The endness of registers, as a VEX enum. Should usually be same as above
147 :ivar str instruction_endness: The endness of instructions stored in memory.
148 In other words, this controls whether instructions are stored endian-flipped compared to their description
149 in the ISA manual, and should be flipped when lifted. Iend_BE means "don't flip"
150 NOTE: Only used for non-libVEX lifters.
151 :ivar dict sizeof: A mapping from C type to variable size in bits
152 :ivar cs_arch: The Capstone arch value for this arch
153 :ivar cs_mode: The Capstone mode value for this arch
154 :ivar ks_arch: The Keystone arch value for this arch
155 :ivar ks_mode: The Keystone mode value for this arch
156 :ivar uc_arch: The Unicorn engine arch value for this arch
157 :ivar uc_mode: The Unicorn engine mode value for this arch
158 :ivar uc_const: The Unicorn engine constants module for this arch
159 :ivar uc_prefix: The prefix used for variables in the Unicorn engine constants module
160 :ivar list function_prologs: A list of regular expressions matching the bytes for common function prologues
161 :ivar list function_epilogs: A list of regular expressions matching the bytes for common function epilogues
162 :ivar str ret_instruction: The bytes for a return instruction
163 :ivar str nop_instruction: The bytes for a nop instruction
164 :ivar int instruction_alignment: The instruction alignment requirement
165 :ivar list default_register_values: A weird listing describing how registers should be initialized for purposes of
166 sanity
167 :ivar dict entry_register_values: A mapping from register name to a description of the value that should be in it
168 at program entry on linux
169 :ivar list default_symbolic_register: Honestly, who knows what this is supposed to do. Fill it with the names of
170 the general purpose registers.
171 :ivar dict register_names: A mapping from register file offset to register name
172 :ivar dict registers: A mapping from register name to a tuple of (register file offset, size in bytes)
173 :ivar list lib_paths: A listing of common locations where shared libraries for this architecture may be found
174 :ivar str got_section_name: The name of the GOT section in ELFs
175 :ivar str ld_linux_name: The name of the linux dynamic loader program
176 :cvar int byte_width: the number of bits in a byte.
177 :ivar TLSArchInfo elf_tls: A description of how thread-local storage works
178 :cvar List[str] dwarf_registers: A list of register names in the order specified in the DWARF specification of the
179 corresponding arcitecture.
180 """
182 byte_width = 8
183 instruction_endness = "Iend_BE"
184 elf_tls: TLSArchInfo = None
185 dwarf_registers: List[str] = []
187 def __init__(self, endness, instruction_endness=None):
188 self.bytes = self.bits // self.byte_width
190 if endness not in (Endness.LE, Endness.BE, Endness.ME):
191 raise ArchError("Must pass a valid endness: Endness.LE, Endness.BE, or Endness.ME")
193 if instruction_endness is not None:
194 self.instruction_endness = instruction_endness
196 if self.vex_support and _pyvex:
197 self.vex_archinfo = _pyvex.enums.default_vex_archinfo()
199 if endness == Endness.BE:
200 if self.vex_archinfo:
201 self.vex_archinfo["endness"] = _pyvex.enums.vex_endness_from_string("VexEndnessBE")
202 self.memory_endness = Endness.BE
203 self.register_endness = Endness.BE
204 if _capstone and self.cs_mode is not None:
205 self.cs_mode -= _capstone.CS_MODE_LITTLE_ENDIAN
206 self.cs_mode += _capstone.CS_MODE_BIG_ENDIAN
207 if _keystone and self.ks_mode is not None:
208 self.ks_mode -= _keystone.KS_MODE_LITTLE_ENDIAN
209 self.ks_mode += _keystone.KS_MODE_BIG_ENDIAN
210 self.ret_instruction = reverse_ends(self.ret_instruction)
211 self.nop_instruction = reverse_ends(self.nop_instruction)
213 if self.register_list and _pyvex is not None:
214 (_, _), max_offset = max(_pyvex.vex_ffi.guest_offsets.items(), key=lambda x: x[1])
215 max_offset += self.bits
216 # Register collections
217 if isinstance(self.vex_arch, str):
218 va = self.vex_arch[7:].lower() # pylint: disable=unsubscriptable-object
219 for r in self.register_list:
220 if r.vex_offset is None:
221 for name in (r.vex_name, r.name) + r.alias_names:
222 try:
223 r.vex_offset = _pyvex.vex_ffi.guest_offsets[(va, name)]
224 except KeyError:
225 r.vex_offset = max_offset
226 max_offset += r.size
227 else:
228 break
230 self.default_register_values = [
231 (r.name,) + r.default_value for r in self.register_list if r.default_value is not None
232 ]
233 self.entry_register_values = {
234 r.name: r.linux_entry_value for r in self.register_list if r.linux_entry_value is not None
235 }
236 self.default_symbolic_registers = [r.name for r in self.register_list if r.general_purpose]
237 self.register_names = {r.vex_offset: r.name for r in self.register_list}
238 self.registers = self._get_register_dict()
239 self.argument_registers = {r.vex_offset for r in self.register_list if r.argument}
240 self.persistent_regs = [r.name for r in self.register_list if r.persistent]
241 self.concretize_unique_registers = {r.vex_offset for r in self.register_list if r.concretize_unique}
242 self.artificial_registers = {r.name for r in self.register_list if r.artificial}
243 self.cpu_flag_register_offsets_and_bitmasks_map = {}
244 self.reg_blacklist = []
245 self.reg_blacklist_offsets = []
247 # Artificial registers offsets
248 self.artificial_registers_offsets = []
249 for reg_name in self.artificial_registers:
250 reg = self.get_register_by_name(reg_name)
251 self.artificial_registers_offsets.extend(range(reg.vex_offset, reg.vex_offset + reg.size))
253 # Register offsets
254 try:
255 self.ip_offset = self.registers["ip"][0]
256 self.sp_offset = self.registers["sp"][0]
257 self.bp_offset = self.registers["bp"][0]
258 self.lr_offset = self.registers.get("lr", (None, None))[0]
259 except KeyError:
260 pass
262 # generate register mapping (offset, size): name
263 self.register_size_names = {}
264 for reg in self.register_list:
265 if reg.vex_offset is None:
266 continue
267 self.register_size_names[(reg.vex_offset, reg.size)] = reg.name
268 for name, off, sz in reg.subregisters:
269 # special hacks for X86 and AMD64 - don't translate register names to bp, sp, etc.
270 if self.name in {"X86", "AMD64"} and name in {"bp", "sp", "ip"}:
271 continue
272 self.register_size_names[(reg.vex_offset + off, sz)] = name
274 # allow mapping a sub-register to its base register
275 self.subregister_map = {}
276 for reg in self.register_list:
277 if reg.vex_offset is None:
278 continue
279 base_reg = reg.vex_offset, reg.size
280 self.subregister_map[(reg.vex_offset, reg.size)] = base_reg
281 self.subregister_map[reg.vex_offset] = base_reg
282 for name, off, sz in reg.subregisters:
283 if self.name in {"X86", "AMD64"} and name in {"bp", "sp", "ip"}:
284 continue
285 subreg_offset = reg.vex_offset + off
286 self.subregister_map[(subreg_offset, sz)] = base_reg
287 if subreg_offset not in self.subregister_map:
288 self.subregister_map[subreg_offset] = base_reg
290 # Unicorn specific stuff
291 if self.uc_mode is not None:
292 if endness == Endness.BE:
293 self.uc_mode -= _unicorn.UC_MODE_LITTLE_ENDIAN
294 self.uc_mode += _unicorn.UC_MODE_BIG_ENDIAN
295 self.uc_regs = {}
296 # map register names to Unicorn const
297 for r in self.register_names.values():
298 reg_name = self.uc_prefix + "REG_" + r.upper()
299 if hasattr(self.uc_const, reg_name):
300 self.uc_regs[r] = getattr(self.uc_const, reg_name)
302 # VEX register offset to unicorn register ID map
303 self.vex_to_unicorn_map = {}
304 pc_reg_name = self.get_register_by_name("pc")
305 for reg_name, unicorn_reg_id in self.uc_regs.items():
306 if reg_name == pc_reg_name:
307 continue
309 vex_reg = self.get_register_by_name(reg_name)
310 self.vex_to_unicorn_map[vex_reg.vex_offset] = (unicorn_reg_id, vex_reg.size)
312 # VEX registers used in lieu of flags register
313 self.vex_cc_regs = []
314 vex_cc_register_names = ["cc_op", "cc_dep1", "cc_dep2", "cc_ndep"]
315 for reg_name in vex_cc_register_names:
316 vex_flag_reg = self.get_register_by_name(reg_name)
317 if vex_flag_reg is not None:
318 self.vex_cc_regs.append(vex_flag_reg)
320 def copy(self):
321 """
322 Produce a copy of this instance of this arch.
323 """
324 res = copy.copy(self)
325 res.vex_archinfo = copy.deepcopy(self.vex_archinfo)
326 res._cs = None
327 res._ks = None
328 return res
330 def __repr__(self):
331 return f"<Arch {self.name} ({self.memory_endness[-2:]})>"
333 def __hash__(self):
334 return hash((self.name, self.bits, self.memory_endness))
336 def __eq__(self, other):
337 if not isinstance(other, Arch):
338 return False
339 return self.name == other.name and self.bits == other.bits and self.memory_endness == other.memory_endness
341 def __ne__(self, other):
342 return not self == other
344 def __getstate__(self):
345 self._cs = None
346 self._ks = None
347 if self.vex_archinfo is not None:
348 # clear hwcacheinfo-caches because it may contain cffi.CData
349 self.vex_archinfo["hwcache_info"]["caches"] = None
350 return self.__dict__
352 def __setstate__(self, data):
353 self.__dict__.update(data)
355 def get_register_by_name(self, reg_name):
356 """
357 Return the Register object associated with the given name.
358 This includes subregisters.
360 For example, if you are operating in a platform-independent
361 setting, and wish to address "whatever the stack pointer is"
362 you could pass 'sp' here, and get Register(...r13...) back
363 on an ARM platform.
364 """
365 for r in self.register_list:
366 if (
367 reg_name == r.name
368 or reg_name in r.alias_names
369 or reg_name in [sub_reg[0] for sub_reg in r.subregisters]
370 ):
371 return r
372 return None
374 def get_default_reg_value(self, register):
375 if register == "sp":
376 # Convert it to the corresponding register name
377 registers = [r for r, v in self.registers.items() if v[0] == self.sp_offset]
378 if len(registers) > 0:
379 register = registers[0]
380 else:
381 return None
382 for reg, val, _, _ in self.default_register_values:
383 if reg == register:
384 return val
385 return None
387 def struct_fmt(self, size=None, signed=False, endness=None):
388 """
389 Produce a format string for use in python's ``struct`` module to decode a single word.
391 :param int size: The size in bytes to pack/unpack. Defaults to wordsize
392 :param bool signed: Whether the data should be extracted signed/unsigned. Default unsigned
393 :param str endness: The endian to use in packing/unpacking. Defaults to memory endness
394 :return str: A format string with an endness modifier and a single format character
395 """
396 if size is None:
397 size = self.bytes
398 if endness is None:
399 endness = self.memory_endness
401 if endness == Endness.BE:
402 fmt_end = ">"
403 elif endness == Endness.LE:
404 fmt_end = "<"
405 elif endness == Endness.ME:
406 raise ValueError("Please don't middle-endian at me, I'm begging you")
407 else:
408 raise ValueError("Invalid endness value: %r" % endness)
410 if size == 8:
411 fmt_size = "Q"
412 elif size == 4:
413 fmt_size = "I"
414 elif size == 2:
415 fmt_size = "H"
416 elif size == 1:
417 fmt_size = "B"
418 else:
419 raise ValueError("Invalid size: Must be a integer power of 2 less than 16")
421 if signed:
422 fmt_size = fmt_size.lower()
424 return fmt_end + fmt_size
426 def _get_register_dict(self) -> Dict[RegisterName, Tuple[RegisterOffset, int]]:
427 res = {}
428 for r in self.register_list:
429 if r.vex_offset is None:
430 continue
431 res[r.name] = (r.vex_offset, r.size)
432 for i in r.alias_names:
433 res[i] = (r.vex_offset, r.size)
434 for reg, offset, size in r.subregisters:
435 res[reg] = (r.vex_offset + offset, size)
436 return res
438 # e.g. sizeof['int'] = 32
439 sizeof = {}
441 @property
442 def capstone(self):
443 """
444 A Capstone instance for this arch
445 """
446 if _capstone is None:
447 raise Exception("Capstone is not installed!")
448 if self.cs_arch is None:
449 raise ArchError("Arch %s does not support disassembly with Capstone" % self.name)
450 if self._cs is None:
451 self._cs = _capstone.Cs(self.cs_arch, self.cs_mode)
452 self._configure_capstone()
453 self._cs.detail = True
454 return self._cs
456 @property
457 def keystone(self):
458 """
459 A Keystone instance for this arch
460 """
461 if self._ks is None:
462 if _keystone is None:
463 raise Exception("Keystone is not installed!")
464 if self.ks_arch is None:
465 raise ArchError("Arch %s does not support disassembly with Keystone" % self.name)
466 self._ks = _keystone.Ks(self.ks_arch, self.ks_mode)
467 self._configure_keystone()
468 return self._ks
470 def _configure_capstone(self):
471 pass
473 def _configure_keystone(self):
474 pass
476 @property
477 def unicorn(self):
478 """
479 A Unicorn engine instance for this arch
480 """
481 if _unicorn is None or self.uc_arch is None:
482 raise ArchError("Arch %s does not support with Unicorn" % self.name)
483 # always create a new Unicorn instance
484 return _unicorn.Uc(self.uc_arch, self.uc_mode)
486 def asm(self, string, addr=0, as_bytes=True, thumb=False):
487 """
488 Compile the assembly instruction represented by string using Keystone
490 :param string: The textual assembly instructions, separated by semicolons
491 :param addr: The address at which the text should be assembled, to deal with PC-relative access. Default 0
492 :param as_bytes: Set to False to return a list of integers instead of a python byte string
493 :param thumb: If working with an ARM processor, set to True to assemble in thumb mode.
494 :return: The assembled bytecode
495 """
496 if thumb and not hasattr(self, "keystone_thumb"):
497 log.warning("Specified thumb=True on non-ARM architecture")
498 thumb = False
499 ks = self.keystone_thumb if thumb else self.keystone # pylint: disable=no-member
501 try:
502 encoding, _ = ks.asm(string, addr, as_bytes) # pylint: disable=too-many-function-args
503 except TypeError:
504 bytelist, _ = ks.asm(string, addr)
505 if as_bytes:
506 if bytes is str:
507 encoding = "".join(chr(c) for c in bytelist)
508 else:
509 encoding = bytes(bytelist)
510 else:
511 encoding = bytelist
513 return encoding
515 def disasm(self, bytestring, addr=0, thumb=False):
516 if thumb and not hasattr(self, "capstone_thumb"):
517 log.warning("Specified thumb=True on non-ARM architecture")
518 thumb = False
519 cs = self.capstone_thumb if thumb else self.capstone # pylint: disable=no-member
520 return "\n".join(f"{insn.address:#x}:\t{insn.mnemonic} {insn.op_str}" for insn in cs.disasm(bytestring, addr))
522 def translate_dynamic_tag(self, tag):
523 try:
524 return self.dynamic_tag_translation[tag]
525 except KeyError:
526 if isinstance(tag, int):
527 log.error("Please look up and add dynamic tag type %#x for %s", tag, self.name)
528 return tag
530 def translate_symbol_type(self, tag):
531 try:
532 return self.symbol_type_translation[tag]
533 except KeyError:
534 if isinstance(tag, int):
535 log.error("Please look up and add symbol type %#x for %s", tag, self.name)
536 return tag
538 def translate_register_name(self, offset, size=None):
539 if size is not None:
540 try:
541 return self.register_size_names[(offset, size)]
542 except KeyError:
543 pass
545 try:
546 return self.register_names[offset]
547 except KeyError:
548 return str(offset)
550 def get_base_register(self, offset, size=None):
551 """
552 Convert a register or sub-register to its base register's offset.
554 :param int offset: The offset of the register to look up for.
555 :param int size: Size of the register.
556 :return: Offset and size of the base register, or None if no base register is found.
557 """
559 if size is None:
560 key = offset
561 else:
562 key = (offset, size)
564 return self.subregister_map.get(key, None)
566 def get_register_offset(self, name):
567 try:
568 return self.registers[name][0]
569 except KeyError as e:
570 raise ValueError("Register %s does not exist!" % name) from e
572 def is_artificial_register(self, offset, size):
573 r = self.get_base_register(offset, size)
574 if r is None:
575 return False
576 r_offset, _ = r
578 try:
579 r_name = self.register_names[r_offset]
580 except KeyError:
581 return False
583 return r_name in self.artificial_registers
585 # Determined by watching the output of strace ld-linux.so.2 --list --inhibit-cache
586 def library_search_path(self, pedantic=False):
587 """
588 A list of paths in which to search for shared libraries.
589 """
591 def subfunc(x):
592 return x.replace("${TRIPLET}", self.triplet).replace("${ARCH}", self.linux_name)
594 path = ["/lib/${TRIPLET}/", "/usr/lib/${TRIPLET}/", "/lib/", "/usr/lib", "/usr/${TRIPLET}/lib/"]
595 if self.bits == 64:
596 path.append("/usr/${TRIPLET}/lib64/")
597 path.append("/usr/lib64/")
598 path.append("/lib64/")
599 elif self.bits == 32:
600 path.append("/usr/${TRIPLET}/lib32/")
601 path.append("/usr/lib32/")
602 path.append("/lib32/")
604 if pedantic:
605 path = sum([[x + "tls/${ARCH}/", x + "tls/", x + "${ARCH}/", x] for x in path], [])
606 return list(map(subfunc, path))
608 def m_addr(self, addr, *args, **kwargs):
609 """
610 Given the address of some code block, convert it to the address where this block
611 is stored in memory. The memory address can also be referred to as the "real" address.
613 :param addr: The address to convert.
614 :return: The "real" address in memory.
615 :rtype: int
616 """
617 return addr
619 def x_addr(self, addr, *args, **kwargs):
620 """
621 Given the address of some code block, convert it to the value that should be assigned
622 to the instruction pointer register in order to execute the code in that block.
624 :param addr: The address to convert.
625 :return: The "execution" address.
626 :rtype: int
627 """
628 return addr
630 def is_thumb(self, addr): # pylint:disable=unused-argument
631 """
632 Return True, if the address is the THUMB address. False otherwise.
634 For non-ARM architectures this method always returns False.
636 :param addr: The address to check.
637 :return: Whether the given address is the THUMB address.
638 """
639 return False
641 @property
642 def vex_support(self):
643 """
644 Whether the architecture is supported by VEX or not.
646 :return: True if this Arch is supported by VEX, False otherwise.
647 :rtype: bool
648 """
650 return self.vex_arch is not None
652 @property
653 def unicorn_support(self):
654 """
655 Whether the architecture is supported by Unicorn engine or not,
657 :return: True if this Arch is supported by the Unicorn engine, False otherwise.
658 :rtype: bool
659 """
661 return self.qemu_name is not None
663 @property
664 def capstone_support(self):
665 """
666 Whether the architecture is supported by the Capstone engine or not.
668 :return: True if this Arch is supported by the Capstone engine, False otherwise.
669 :rtype: bool
670 """
672 return self.cs_arch is not None
674 @property
675 def keystone_support(self):
676 """
677 Whether the architecture is supported by the Keystone engine or not.
679 :return: True if this Arch is supported by the Keystone engine, False otherwise.
680 :rtype: bool
681 """
683 return self.ks_arch is not None
685 address_types = (int,)
686 function_address_types = (int,)
688 # various names
689 name: str
690 vex_arch = None
691 qemu_name = None
692 ida_processor = None
693 linux_name = None
694 triplet = None
696 # instruction stuff
697 max_inst_bytes = None
698 ret_instruction = b""
699 nop_instruction = b""
700 instruction_alignment = None
702 # register offsets
703 ip_offset: Optional[RegisterOffset] = None
704 sp_offset: Optional[RegisterOffset] = None
705 bp_offset: Optional[RegisterOffset] = None
706 ret_offset: Optional[RegisterOffset] = None
707 fp_ret_offset: Optional[RegisterOffset] = None
708 lr_offset: Optional[RegisterOffset] = None
710 # whether or not VEX has ccall handlers for conditionals for this arch
711 vex_conditional_helpers = False
713 # memory stuff
714 bits = None
715 memory_endness = Endness.LE
716 register_endness = Endness.LE
717 stack_change = None
719 # is it safe to cache IRSBs?
720 cache_irsb = True
722 branch_delay_slot = False
724 function_prologs = set()
725 function_epilogs = set()
727 # Capstone stuff
728 cs_arch = None
729 cs_mode = None
730 _cs = None
732 # Keystone stuff
733 ks_arch = None
734 ks_mode = None
735 _ks = None
737 # Unicorn stuff
738 uc_arch = None
739 uc_mode = None
740 uc_const = None
741 uc_prefix = None
742 uc_regs = None
743 artificial_registers_offsets = None
744 artificial_registers = None
745 cpu_flag_register_offsets_and_bitmasks_map = None
746 reg_blacklist = None
747 reg_blacklist_offsets = None
748 vex_to_unicorn_map = None
749 vex_cc_regs = None
751 call_pushes_ret = False
752 initial_sp = 0x7FFF0000
754 # Difference of the stack pointer after a call instruction (or its equivalent) is executed
755 call_sp_fix = 0
757 stack_size = 0x8000000
759 # Register information
760 register_list: List[Register] = []
761 default_register_values = []
762 entry_register_values = {}
763 default_symbolic_registers = []
764 registers: Dict[RegisterName, Tuple[RegisterOffset, int]] = {}
765 register_names: Dict[RegisterOffset, RegisterName] = {}
766 argument_registers = set()
767 argument_register_positions = {}
768 persistent_regs = []
769 concretize_unique_registers = (
770 set()
771 ) # this is a list of registers that should be concretized, if unique, at the end of each block
773 lib_paths = []
774 reloc_s_a = []
775 reloc_b_a = []
776 reloc_s = []
777 reloc_copy = []
778 reloc_tls_mod_id = []
779 reloc_tls_doffset = []
780 reloc_tls_offset = []
781 dynamic_tag_translation = {}
782 symbol_type_translation = {}
783 got_section_name = ""
785 vex_archinfo = None
788arch_id_map = []
790_all_arches_set = set() # for deduplication
791all_arches = []
794def _append_arch_unique(my_arch: Arch) -> bool:
795 if my_arch in _all_arches_set:
796 return False
797 _all_arches_set.add(my_arch)
798 all_arches.append(my_arch)
799 return True
802def register_arch(regexes, bits, endness, my_arch):
803 """
804 Register a new architecture.
805 Architectures are loaded by their string name using ``arch_from_id()``, and
806 this defines the mapping it uses to figure it out.
807 Takes a list of regular expressions, and an Arch class as input.
809 :param regexes: List of regular expressions (str or SRE_Pattern)
810 :type regexes: list
811 :param bits: The canonical "bits" of this architecture, ex. 32 or 64
812 :type bits: int
813 :param endness: The "endness" of this architecture. Use Endness.LE, Endness.BE, Endness.ME, "any", or None if the
814 architecture has no intrinsic endianness.
815 :type endness: str or None
816 :param class my_arch:
817 :return: None
818 """
819 if not isinstance(regexes, list):
820 raise TypeError("regexes must be a list")
821 for rx in regexes:
822 if not isinstance(rx, str) and not isinstance(rx, re._pattern_type):
823 raise TypeError("Each regex must be a string or compiled regular expression")
824 try:
825 re.compile(rx)
826 except re.error as e:
827 raise ValueError("Invalid Regular Expression %s" % rx) from e
828 # if not isinstance(my_arch,Arch):
829 # raise TypeError("Arch must be a subclass of archinfo.Arch")
830 if not isinstance(bits, int):
831 raise TypeError("Bits must be an int")
832 if endness is not None:
833 if endness not in (Endness.BE, Endness.LE, Endness.ME, "any"):
834 raise TypeError("Endness must be Endness.BE, Endness.LE, or 'any'")
835 arch_id_map.append((regexes, bits, endness, my_arch))
836 if endness == "any":
837 _append_arch_unique(my_arch(Endness.BE))
838 _append_arch_unique(my_arch(Endness.LE))
839 else:
840 _append_arch_unique(my_arch(endness))
843class ArchNotFound(Exception):
844 pass
847def arch_from_id(ident, endness="any", bits="") -> Arch:
848 """
849 Take our best guess at the arch referred to by the given identifier, and return an instance of its class.
851 You may optionally provide the ``endness`` and ``bits`` parameters (strings) to help this function out.
852 """
853 if bits == 64 or (isinstance(bits, str) and "64" in bits):
854 bits = 64
855 elif isinstance(bits, str) and "32" in bits:
856 bits = 32
857 elif not bits and "64" in ident:
858 bits = 64
859 elif not bits and "32" in ident:
860 bits = 32
862 endness = endness.lower()
863 if "lit" in endness:
864 endness = Endness.LE
865 elif "big" in endness:
866 endness = Endness.BE
867 elif "lsb" in endness:
868 endness = Endness.LE
869 elif "msb" in endness:
870 endness = Endness.BE
871 elif "le" in endness:
872 endness = Endness.LE
873 elif "be" in endness:
874 endness = Endness.BE
875 elif "l" in endness:
876 endness = "unsure"
877 elif "b" in endness:
878 endness = "unsure"
879 else:
880 endness = "unsure"
881 ident = ident.lower()
882 cls = None
883 aendness = ""
884 for arxs, abits, aendness, acls in arch_id_map:
885 found_it = False
886 for rx in arxs:
887 if re.search(rx, ident):
888 found_it = True
889 break
890 if not found_it:
891 continue
892 if bits and bits != abits:
893 continue
894 if aendness == "any" or endness == aendness or endness == "unsure":
895 cls = acls
896 break
897 if not cls:
898 raise ArchNotFound(
899 f"Can't find architecture info for architecture {ident} with {repr(bits)} bits and {endness} endness"
900 )
901 if endness == "unsure":
902 if aendness == "any":
903 # We really don't care, use default
904 return cls()
905 else:
906 # We're expecting the ident to pick the endness.
907 # ex. 'armeb' means obviously this is Iend_BE
908 return cls(aendness)
909 else:
910 return cls(endness)
913def reverse_ends(string):
914 count = (len(string) + 3) // 4
915 ise = "I" * count
916 string += b"\x00" * (count * 4 - len(string))
917 return _struct.pack(">" + ise, *_struct.unpack("<" + ise, string))
920def get_host_arch():
921 """
922 Return the arch of the machine we are currently running on.
923 """
924 return arch_from_id(_platform.machine())