Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pyvex/lifting/util/instr_helper.py: 89%
145 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:15 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:15 +0000
1import abc
2import string
4import bitstring
6from pyvex.expr import IRExpr, RdTmp
8from .lifter_helper import ParseError
9from .syntax_wrapper import VexValue
10from .vex_helper import IRSBCustomizer, JumpKind, vex_int_class
13class Instruction(metaclass=abc.ABCMeta):
14 """
15 Base class for an Instruction.
17 You should make a subclass of this for each instruction you want to lift. These classes will contain the "semantics"
18 of the instruction, that is, what it _does_, in terms of the VEX IR.
20 You may want to subclass this for your architecture, and add arch-specific handling for parsing, argument
21 resolution, etc., and have instructions subclass that instead.
23 The core parsing functionality is done via ``bin_format``. Each instruction should be a subclass of ``Instruction``
24 and will be parsed by comparing bits in the provided bitstream to symbols in the ``bin_format`` member of the class.
25 "Bin formats" are strings of symbols, like those you'd find in an ISA document, such as "0010rrrrddddffmm"
26 0 or 1 specify hard-coded bits that must match for an instruction to match.
27 Any letters specify arguments, grouped by letter, which will be parsed and provided as bitstrings in the ``data``
28 member of the class as a dictionary.
29 So, in our example, the bits ``0010110101101001``, applied to format string ``0010rrrrddddffmm``
30 will result in the following in ``self.data``:
32 {'r': '1101',
33 'd': '0110',
34 'f': '10',
35 'm': '01'}
37 Implement compute_result to provide the "meat" of what your instruction does.
38 You can also implement it in your arch-specific subclass of ``Instruction``, to handle things common to all
39 instructions, and provide instruction implementations elsewhere.
41 We provide the ``VexValue`` syntax wrapper to make expressing instruction semantics easy.
42 You first convert the bitstring arguments into ``VexValue``s using the provided convenience methods
43 (``self.get/put/load/store/etc.``)
44 This loads the register from the actual registers into a temporary value we can work with.
45 You can then write it back to a register when you're done.
46 For example, if you have the register in ``r``, as above, you can make a ``VexValue`` like this:
48 r = int(self.data['r'], 2) # we get bits corresponding to `r` bits and convert it to an int
49 r_vv = self.get(r, Type.int_32)
51 If you then had an instruction to increment ``r``, you could simply:
53 return r_vv += 1
55 You could then write it back to the register like this:
57 self.put(r_vv, r)
59 Note that most architectures have special flags that get set differently for each instruction, make sure to
60 implement those as well (override ``set_flags()`` )
62 Override ``parse()`` to extend parsing.
63 For example, in MSP430, this allows us to grab extra words from the bitstream
64 when extra immediate words are present.
66 All architectures are different enough that there's no magic recipe for how to write a lifter.
67 See the examples provided by gymrat for ideas of how to use this to build your own lifters quickly and easily.
68 """
70 data = None
71 irsb_c = None
73 def __init__(self, bitstrm, arch, addr):
74 """
75 Create an instance of the instruction
77 :param irsb_c: The IRSBCustomizer to put VEX instructions into
78 :param bitstrm: The bitstream to decode instructions from
79 :param addr: The address of the instruction to be lifted, used only for jumps and branches
80 """
81 self.addr = addr
82 self.arch = arch
83 self.bitwidth = len(self.bin_format)
84 self.data = self.parse(bitstrm)
86 @property
87 @abc.abstractmethod
88 def bin_format(self) -> str:
89 """
90 Read the documentation of the class to understand what a bin format string is
92 :return: str bin format string
93 """
95 @property
96 @abc.abstractmethod
97 def name(self) -> str:
98 """
99 Name of the instruction
101 Can be useful to name the instruction when there's an error related to it
102 """
104 def __call__(self, irsb_c, past_instructions, future_instructions):
105 self.lift(irsb_c, past_instructions, future_instructions)
107 def mark_instruction_start(self):
108 self.irsb_c.imark(self.addr, self.bytewidth, 0)
110 def fetch_operands(self): # pylint: disable=no-self-use
111 """
112 Get the operands out of memory or registers
113 Return a tuple of operands for the instruction
114 """
115 return ()
117 def lift(self, irsb_c: IRSBCustomizer, past_instructions, future_instructions): # pylint: disable=unused-argument
118 """
119 This is the main body of the "lifting" for the instruction.
120 This can/should be overridden to provide the general flow of how instructions in your arch work.
121 For example, in MSP430, this is:
123 - Figure out what your operands are by parsing the addressing, and load them into temporary registers
124 - Do the actual operation, and commit the result, if needed.
125 - Compute the flags
126 """
127 self.irsb_c = irsb_c
128 # Always call this first!
129 self.mark_instruction_start()
130 # Then do the actual stuff.
131 inputs = self.fetch_operands()
132 retval = self.compute_result(*inputs) # pylint: disable=assignment-from-none
133 if retval is not None:
134 self.commit_result(retval)
135 vals = list(inputs) + [retval]
136 self.compute_flags(*vals)
138 def commit_result(self, res):
139 """
140 This where the result of the operation is written to a destination.
141 This happens only if compute_result does not return None, and happens before compute_flags is called.
142 Override this to specify how to write out the result.
143 The results of fetch_operands can be used to resolve various addressing modes for the write outward.
144 A common pattern is to return a function from fetch_operands which will be called here to perform the write.
146 :param args: A tuple of the results of fetch_operands and compute_result
147 """
149 def compute_result(self, *args): # pylint: disable=unused-argument,no-self-use
150 """
151 This is where the actual operation performed by your instruction, excluding the calculation of flags, should be
152 performed. Return the VexValue of the "result" of the instruction, which may
153 be used to calculate the flags later.
154 For example, for a simple add, with arguments src and dst, you can simply write:
156 return src + dst:
158 :param args:
159 :return: A VexValue containing the "result" of the operation.
160 """
161 return None
163 def compute_flags(self, *args):
164 """
165 Most CPU architectures have "flags" that should be computed for many instructions.
166 Override this to specify how that happens. One common pattern is to define this method to call specifi methods
167 to update each flag, which can then be overriden in the actual classes for each instruction.
168 """
170 def match_instruction(self, data, bitstrm): # pylint: disable=unused-argument,no-self-use
171 """
172 Override this to extend the parsing functionality.
173 This is great for if your arch has instruction "formats" that have an opcode that has to match.
175 :param data:
176 :param bitstrm:
177 :return: data
178 """
179 return data
181 def parse(self, bitstrm):
182 if self.arch.instruction_endness == "Iend_LE":
183 # This arch stores its instructions in memory endian-flipped compared to the ISA.
184 # To enable natural lifter-writing, we let the user write them like in the manual, and correct for
185 # endness here.
186 instr_bits = self._load_le_instr(bitstrm, self.bitwidth)
187 else:
188 instr_bits = bitstrm.peek("bin:%d" % self.bitwidth)
190 data = {c: "" for c in self.bin_format if c in string.ascii_letters}
191 for c, b in zip(self.bin_format, instr_bits):
192 if c in "01":
193 if b != c:
194 raise ParseError("Mismatch between format bit %c and instruction bit %c" % (c, b))
195 elif c in string.ascii_letters:
196 data[c] += b
197 else:
198 raise ValueError("Invalid bin_format character %c" % c)
200 # Hook here for extra matching functionality
201 if hasattr(self, "match_instruction"):
202 # Should raise if it's not right
203 self.match_instruction(data, bitstrm)
205 # Use up the bits once we're sure it's right
206 self.rawbits = bitstrm.read("hex:%d" % self.bitwidth)
208 # Hook here for extra parsing functionality (e.g., trailers)
209 if hasattr(self, "_extra_parsing"):
210 data = self._extra_parsing(data, bitstrm) # pylint: disable=no-member
212 return data
214 @property
215 def bytewidth(self):
216 if self.bitwidth % self.arch.byte_width != 0:
217 raise ValueError("Instruction is not a multiple of bytes wide!")
218 return self.bitwidth // self.arch.byte_width
220 def disassemble(self):
221 """
222 Return the disassembly of this instruction, as a string.
223 Override this in subclasses.
225 :return: The address (self.addr), the instruction's name, and a list of its operands, as strings
226 """
227 return self.addr, "UNK", [self.rawbits]
229 # These methods should be called in subclasses to do register and memory operations
231 def load(self, addr, ty):
232 """
233 Load a value from memory into a VEX temporary register.
235 :param addr: The VexValue containing the addr to load from.
236 :param ty: The Type of the resulting data
237 :return: a VexValue
238 """
239 rdt = self.irsb_c.load(addr.rdt, ty)
240 return VexValue(self.irsb_c, rdt)
242 def constant(self, val, ty):
243 """
244 Creates a constant as a VexValue
246 :param val: The value, as an integer
247 :param ty: The type of the resulting VexValue
248 :return: a VexValue
249 """
250 if isinstance(val, VexValue) and not isinstance(val, IRExpr):
251 raise Exception("Constant cannot be made from VexValue or IRExpr")
252 rdt = self.irsb_c.mkconst(val, ty)
253 return VexValue(self.irsb_c, rdt)
255 @staticmethod
256 def _lookup_register(arch, reg):
257 if isinstance(reg, int):
258 if hasattr(arch, "register_index"):
259 reg = arch.register_index[reg]
260 else:
261 reg = arch.register_list[reg].name
262 return arch.get_register_offset(reg)
264 def get(self, reg, ty):
265 """
266 Load a value from a machine register into a VEX temporary register.
267 All values must be loaded out of registers before they can be used with operations, etc
268 and stored back into them when the instruction is over. See Put().
270 :param reg: Register number as an integer, or register string name
271 :param ty: The Type to use.
272 :return: A VexValue of the gotten value.
273 """
274 offset = self._lookup_register(self.irsb_c.irsb.arch, reg)
275 if offset == self.irsb_c.irsb.arch.ip_offset:
276 return self.constant(self.addr, ty)
277 rdt = self.irsb_c.rdreg(offset, ty)
278 return VexValue(self.irsb_c, rdt)
280 def put(self, val, reg):
281 """
282 Puts a value from a VEX temporary register into a machine register.
283 This is how the results of operations done to registers get committed to the machine's state.
285 :param val: The VexValue to store (Want to store a constant? See Constant() first)
286 :param reg: The integer register number to store into, or register name
287 :return: None
288 """
289 offset = self._lookup_register(self.irsb_c.irsb.arch, reg)
290 self.irsb_c.put(val.rdt, offset)
292 def put_conditional(self, cond, valiftrue, valiffalse, reg):
293 """
294 Like put, except it checks a condition
295 to decide what to put in the destination register.
297 :param cond: The VexValue representing the logical expression for the condition
298 (if your expression only has constants, don't use this method!)
299 :param valiftrue: the VexValue to put in reg if cond evals as true
300 :param validfalse: the VexValue to put in reg if cond evals as false
301 :param reg: The integer register number to store into, or register name
302 :return: None
303 """
305 val = self.irsb_c.ite(cond.rdt, valiftrue.rdt, valiffalse.rdt)
306 offset = self._lookup_register(self.irsb_c.irsb.arch, reg)
307 self.irsb_c.put(val, offset)
309 def store(self, val, addr):
310 """
311 Store a VexValue in memory at the specified loaction.
313 :param val: The VexValue of the value to store
314 :param addr: The VexValue of the address to store into
315 :return: None
316 """
317 self.irsb_c.store(addr.rdt, val.rdt)
319 def jump(self, condition, to_addr, jumpkind=JumpKind.Boring, ip_offset=None):
320 """
321 Jump to a specified destination, under the specified condition.
322 Used for branches, jumps, calls, returns, etc.
324 :param condition: The VexValue representing the expression for the guard, or None for an unconditional jump
325 :param to_addr: The address to jump to.
326 :param jumpkind: The JumpKind to use. See the VEX docs for what these are; you only need them for things
327 aren't normal jumps (e.g., calls, interrupts, program exits, etc etc)
328 :return: None
329 """
330 to_addr_ty = None
331 if isinstance(to_addr, VexValue):
332 # Unpack a VV
333 to_addr_rdt = to_addr.rdt
334 to_addr_ty = to_addr.ty
335 elif isinstance(to_addr, int):
336 # Direct jump to an int, make an RdT and Ty
337 to_addr_ty = vex_int_class(self.irsb_c.irsb.arch.bits).type
338 to_addr = self.constant(to_addr, to_addr_ty) # TODO archinfo may be changing
339 to_addr_rdt = to_addr.rdt
340 elif isinstance(to_addr, RdTmp):
341 # An RdT; just get the Ty of the arch's pointer type
342 to_addr_ty = vex_int_class(self.irsb_c.irsb.arch.bits).type
343 to_addr_rdt = to_addr
344 else:
345 raise TypeError("Jump destination has unknown type: " + repr(type(to_addr)))
346 if not condition:
347 # This is the default exit.
348 self.irsb_c.irsb.jumpkind = jumpkind
349 self.irsb_c.irsb.next = to_addr_rdt
350 else:
351 # add another exit
352 # EDG says: We should make sure folks set ArchXYZ.ip_offset like they're supposed to
353 if ip_offset is None:
354 ip_offset = self.arch.ip_offset
355 assert ip_offset is not None
357 negated_condition_rdt = self.ite(condition, self.constant(0, condition.ty), self.constant(1, condition.ty))
358 direct_exit_target = self.constant(self.addr + (self.bitwidth // 8), to_addr_ty)
359 self.irsb_c.add_exit(negated_condition_rdt, direct_exit_target.rdt, jumpkind, ip_offset)
360 self.irsb_c.irsb.jumpkind = jumpkind
361 self.irsb_c.irsb.next = to_addr_rdt
363 def ite(self, cond, t, f):
364 return self.irsb_c.ite(cond.rdt, t.rdt, f.rdt)
366 def ccall(self, ret_type, func_name, args):
367 """
368 Creates a CCall operation.
369 A CCall is a procedure that calculates a value at *runtime*, not at lift-time.
370 You can use these for flags, unresolvable jump targets, etc.
371 We caution you to avoid using them when at all possible though.
373 :param ret_type: The return type of the CCall
374 :param func_obj: The name of the helper function to call. If you're using angr, this should be added (or
375 monkeypatched) into ``angr.engines.vex.claripy.ccall``.
376 :param args: List of arguments to the function
377 :return: A VexValue of the result.
378 """
380 # Check the args to make sure they're the right type
381 list_args = list(args)
382 new_args = []
383 for arg in list_args:
384 if isinstance(arg, VexValue):
385 arg = arg.rdt
386 new_args.append(arg)
387 args = tuple(new_args)
389 cc = self.irsb_c.op_ccall(ret_type, func_name, args)
390 return VexValue(self.irsb_c, cc)
392 def dirty(self, ret_type, func_name, args) -> VexValue:
393 """
394 Creates a dirty call operation.
396 These are like ccalls (clean calls) but their implementations are theoretically allowed to read or write to or
397 from any part of the state, making them a nightmare for static analysis to reason about. Avoid their use at all
398 costs.
400 :param ret_type: The return type of the dirty call, or None if the dirty call doesn't return anything.
401 :param func_name: The name of the helper function to call. If you're using angr, this should be added (or
402 monkeypatched) into ``angr.engines.vex.heavy.dirty``.
403 :param args: List of arguments to the function
404 :return: A VexValue of the result.
405 """
407 # Check the args to make sure they're the right type
408 list_args = list(args)
409 new_args = []
410 for arg in list_args:
411 if isinstance(arg, VexValue):
412 arg = arg.rdt
413 new_args.append(arg)
414 args = tuple(new_args)
416 rdt = self.irsb_c.dirty(ret_type, func_name, args)
417 return VexValue(self.irsb_c, rdt)
419 def _load_le_instr(self, bitstream: bitstring.ConstBitStream, numbits: int) -> str:
420 return bitstring.Bits(uint=bitstream.peek("uintle:%d" % numbits), length=numbits).bin