1import logging
2
3import bitstring
4
5from pyvex.lifting.util import JumpKind, Type
6from pyvex.lifting.util.instr_helper import Instruction, ParseError
7from pyvex.lifting.util.lifter_helper import GymratLifter
8from pyvex.types import Arch
9
10log = logging.getLogger(__name__)
11
12
13class ARMInstruction(Instruction): # pylint: disable=abstract-method
14 # NOTE: WARNING: There is no CPSR in VEX's ARM implementation
15 # You must use straight nasty hacks instead.
16
17 # NOTE 2: Something is goofy w/r/t archinfo and VEX; cc_op3 is used in ccalls, but there's
18 # no cc_op3 in archinfo, angr itself uses cc_depn instead. We do the same.
19
20 def match_instruction(self, data, bitstrm):
21 """
22 ARM Instructions are pretty dense, so let's do what we can to weed them out
23 """
24 if "c" not in data or data["c"] == "1111":
25 raise ParseError("Invalid ARM Instruction")
26
27 def get_N(self):
28 cc_op = self.get("cc_op", Type.int_32)
29 cc_dep1 = self.get("cc_dep1", Type.int_32)
30 cc_dep2 = self.get("cc_dep2", Type.int_32)
31 cc_depn = self.get("cc_ndep", Type.int_32)
32 return self.ccall(Type.int_32, "armg_calculate_flag_n", [cc_op, cc_dep1, cc_dep2, cc_depn])
33
34 def get_C(self):
35 cc_op = self.get("cc_op", Type.int_32)
36 cc_dep1 = self.get("cc_dep1", Type.int_32)
37 cc_dep2 = self.get("cc_dep2", Type.int_32)
38 cc_depn = self.get("cc_ndep", Type.int_32)
39 return self.ccall(Type.int_32, "armg_calculate_flag_c", [cc_op, cc_dep1, cc_dep2, cc_depn])
40
41 def get_V(self):
42 cc_op = self.get("cc_op", Type.int_32)
43 cc_dep1 = self.get("cc_dep1", Type.int_32)
44 cc_dep2 = self.get("cc_dep2", Type.int_32)
45 cc_depn = self.get("cc_ndep", Type.int_32)
46 return self.ccall(Type.int_32, "armg_calculate_flag_v", [cc_op, cc_dep1, cc_dep2, cc_depn])
47
48 def get_Z(self):
49 cc_op = self.get("cc_op", Type.int_32)
50 cc_dep1 = self.get("cc_dep1", Type.int_32)
51 cc_dep2 = self.get("cc_dep2", Type.int_32)
52 cc_depn = self.get("cc_ndep", Type.int_32)
53 return self.ccall(Type.int_32, "armg_calculate_flag_z", [cc_op.rdt, cc_dep1.rdt, cc_dep2.rdt, cc_depn.rdt])
54
55 def evaluate_condition(self):
56 # condition codes should be in 'c'
57 cond = self.data["c"]
58 if cond == "0000":
59 # equal, z set
60 return self.get_Z() == 1
61 elif cond == "0001":
62 # not equal, Z clear
63 return self.get_Z() == 0
64 elif cond == "0010":
65 # Carry, C set
66 return self.get_C() == 1
67 elif cond == "0011":
68 # Carry Clear, C clear
69 return self.get_C() == 0
70 elif cond == "0100":
71 # MI / neagative / N set
72 return self.get_N() == 1
73 elif cond == "0101":
74 # PL / plus / positive / N clear
75 return self.get_N() == 0
76 elif cond == "0110":
77 # VS / V set / Overflow
78 return self.get_V() == 1
79 elif cond == "0111":
80 # VC / V Clear / no overflow
81 return self.get_V() == 0
82 elif cond == "1000":
83 # Hi / unsigned higher / C set, Z clear
84 return (self.get_C() == 1) & (self.get_Z() == 0)
85 elif cond == "1001":
86 # LS / C clear, Z set
87 return (self.get_C() == 0) & (self.get_Z() == 1)
88 elif cond == "1011":
89 # LT / Less than / N != V
90 return self.get_N() != self.get_V()
91 elif cond == "1100":
92 # GT / greater than / Z clear and (n == v)
93 return (self.get_Z() == 1) & (self.get_N() != self.get_V())
94 elif cond == "1101":
95 # LE / less than or equal to / Z set OR (N != V)
96 return (self.get_Z() == 1) | (self.get_N() != self.get_V())
97 else:
98 # No condition
99 return None
100
101 def _load_le_instr(self, bitstream: bitstring.ConstBitStream, numbits: int) -> str:
102 # THUMB mode instructions swap endianness every two bytes!
103 if (self.addr & 1) == 1 and numbits > 16:
104 chunk = ""
105 oldpos = bitstream.pos
106 try:
107 for _ in range(0, numbits, 16):
108 chunk += bitstring.Bits(uint=bitstream.peek("uintle:%d" % 16), length=16).bin
109 bitstream.pos += 16
110 finally:
111 bitstream.pos = oldpos
112 return chunk
113 return super()._load_le_instr(bitstream, numbits)
114
115
116class Instruction_MRC(ARMInstruction):
117 name = "MRC"
118 bin_format = "cccc1110CCC1nnnnddddppppOOOOOOOO"
119 # 11101110000100010001111100010000
120 # c = cond
121 # C = Coprocessor operation mode
122 # d = CPd
123 # O = Offset
124 # p = CP#
125
126 def compute_result(self): # pylint: disable=arguments-differ
127 # TODO at least look at the conditionals
128 # TODO Clobber the dst reg of MCR
129 # TODO maybe treat coproc regs as simple storage (even though they are very much not)
130 log.debug("Ignoring MRC instruction at %#x.", self.addr)
131
132
133class Instruction_MCR(ARMInstruction):
134 name = "MCR"
135 bin_format = "cccc1110CCC0nnnnddddppppOOOOOOOO"
136 # 11101110000000010000111100010000
137 # c = cond
138 # C = Coprocessor operation mode
139 # d = CPd
140 # O = Offset
141 # p = CP#
142
143 def compute_result(self): # pylint: disable=arguments-differ
144 # TODO at least look at the conditionals
145 # TODO Clobber the dst reg of MCR
146 # TODO maybe treat coproc regs as simple storage (even though they are very much not)
147 log.debug("Ignoring MCR instruction at %#x.", self.addr)
148
149
150class Instruction_MSR(ARMInstruction):
151 name = "MSR"
152 bin_format = "cccc00i10d10xxxj1111ssssssssssss"
153 # 11100011001000011111000010010001
154 # 11100001011011111111000000000001
155
156 def compute_result(self): # pylint: disable=arguments-differ
157 log.debug(
158 "Ignoring MSR instruction at %#x. VEX cannot support this instruction. "
159 "See pyvex/lifting/gym/arm_spotter.py",
160 self.addr,
161 )
162
163
164class Instruction_MRS(ARMInstruction):
165 name = "MRS"
166 bin_format = "cccc00010s001111dddd000000000000"
167
168 def compute_result(self): # pylint: disable=arguments-differ
169 log.debug(
170 "Ignoring MRS instruction at %#x. VEX cannot support this instruction. "
171 "See pyvex/lifting/gym/arm_spotter.py",
172 self.addr,
173 )
174
175
176class Instruction_STM(ARMInstruction):
177 name = "STM"
178 bin_format = "cccc100pu1w0bbbbrrrrrrrrrrrrrrrr"
179
180 def match_instruction(self, data, bitstrm):
181 # If we don't push anything, that's not real
182 if int(data["r"]) == 0:
183 raise ParseError("Invalid STM instruction")
184 return True
185
186 def compute_result(self): # pylint: disable=arguments-differ
187 log.debug(
188 "Ignoring STMxx ^ instruction at %#x. This mode is not implemented by VEX! "
189 "See pyvex/lifting/gym/arm_spotter.py",
190 self.addr,
191 )
192
193
194class Instruction_LDM(ARMInstruction):
195 name = "LDM"
196 bin_format = "cccc100PU1W1bbbbrrrrrrrrrrrrrrrr"
197
198 def match_instruction(self, data, bitstrm):
199 # If we don't push anything, that's not real
200 if int(data["r"]) == 0:
201 raise ParseError("Invalid LDM instruction")
202 return True
203
204 def compute_result(self): # pylint: disable=arguments-differ
205 # test if PC will be set. If so, the jumpkind of this block should be Ijk_Ret
206 log.debug("Spotting an LDM instruction at %#x. This is not fully tested. Prepare for errors.", self.addr)
207
208 src_n = f"r{int(self.data['b'], 2)}"
209 src = self.get(src_n, Type.int_32)
210
211 for reg_num, bit in enumerate(self.data["r"]):
212 reg_num = 15 - reg_num
213 if bit == "1":
214 if self.data["P"] == "1":
215 if self.data["U"] == "0":
216 src += 4
217 else:
218 src -= 4
219 val = self.load(src, Type.int_32)
220 self.put(val, f"r{reg_num}")
221 if self.data["P"] == "0":
222 if self.data["U"] == "0":
223 src += 4
224 else:
225 src -= 4
226 # If we touch PC, we're doing a RET!
227 if reg_num == 15 and bit == "1":
228 cond = self.evaluate_condition()
229 if cond is not None:
230 self.jump(cond, val, JumpKind.Ret)
231 else:
232 self.jump(None, val, JumpKind.Ret)
233 # Write-back
234 if self.data["W"] == "1":
235 self.put(src, src_n)
236
237
238class Instruction_STC(ARMInstruction):
239 name = "STC"
240 bin_format = "cccc110PUNW0nnnnddddppppOOOOOOOO"
241
242 def compute_result(self): # pylint: disable=arguments-differ
243 # TODO At least look at the conditionals
244 log.debug("Ignoring STC instruction at %#x.", self.addr)
245
246
247class Instruction_STC_THUMB(ARMInstruction):
248 name = "STC"
249 bin_format = "111c110PUNW0nnnnddddppppOOOOOOOO"
250
251 def compute_result(self): # pylint: disable=arguments-differ
252 # TODO At least look at the conditionals
253 log.debug("Ignoring STC instruction at %#x.", self.addr)
254
255
256class Instruction_LDC(ARMInstruction):
257 name = "LDC"
258 bin_format = "cccc110PUNW1nnnnddddppppOOOOOOOO"
259
260 def compute_result(self): # pylint: disable=arguments-differ
261 # TODO At least look at the conditionals
262 # TODO Clobber the dest reg of LDC
263 # TODO Maybe clobber the dst reg of CDP, if we're really adventurous
264 log.debug("Ignoring LDC instruction at %#x.", self.addr)
265
266
267class Instruction_LDC_THUMB(ARMInstruction):
268 name = "LDC"
269 bin_format = "111c110PUNW1nnnnddddppppOOOOOOOO"
270
271 def compute_result(self): # pylint: disable=arguments-differ
272 # TODO At least look at the conditionals
273 # TODO Clobber the dest reg of LDC
274 # TODO Maybe clobber the dst reg of CDP, if we're really adventurous
275 log.debug("Ignoring LDC instruction at %#x.", self.addr)
276
277
278class Instruction_CDP(Instruction):
279 name = "CDP"
280 bin_format = "cccc1110oooonnnnddddppppPPP0mmmm"
281 # c = cond
282 # d = CPd
283 # O = Offset
284 # p = CP#
285
286 def compute_result(self): # pylint: disable=arguments-differ
287 # TODO At least look at the conditionals
288 # TODO Maybe clobber the dst reg of CDP, if we're really adventurous
289 log.debug("Ignoring CDP instruction at %#x.", self.addr)
290
291
292##
293## Thumb! (ugh)
294##
295
296
297class ThumbInstruction(Instruction): # pylint: disable=abstract-method
298 def mark_instruction_start(self):
299 self.irsb_c.imark(self.addr - 1, self.bytewidth, 1)
300
301
302class Instruction_tCPSID(ThumbInstruction):
303 name = "CPSID"
304 bin_format = "101101x0011x0010"
305
306 def compute_result(self): # pylint: disable=arguments-differ
307 # TODO haha lol yeah right
308 log.debug("[thumb] Ignoring CPS instruction at %#x.", self.addr)
309
310
311class Instruction_tMSR(ThumbInstruction):
312 name = "tMSR"
313 bin_format = "10x0mmmmxxxxxxxx11110011100Rrrrr"
314
315 def compute_result(self): # pylint: disable=arguments-differ
316 dest_spec_reg = int(self.data["x"], 2)
317 src_reg = f"r{int(self.data['r'], 2)}"
318
319 # If 0, do not write the SPSR
320 if self.data["R"] == "0":
321 if dest_spec_reg == 8: # msp
322 src = self.get(src_reg, Type.int_32)
323 self.put(src, "sp")
324 elif dest_spec_reg == 16: # primask
325 src = self.get(src_reg, Type.int_32)
326 self.put(src, "primask")
327 else:
328 log.debug(
329 "[thumb] FIXME: tMSR at %#x is writing into an unsupported special register %#x. "
330 "Ignoring the instruction.",
331 self.addr,
332 dest_spec_reg,
333 )
334 else:
335 log.debug("[thumb] tMSR at %#x is writing SPSR. Ignoring the instruction. FixMe.", self.addr)
336 log.debug(
337 "[thumb] Spotting an tMSR instruction at %#x. This is not fully tested. Prepare for errors.", self.addr
338 )
339
340
341class Instruction_tMRS(ThumbInstruction):
342 name = "tMRS"
343 bin_format = "10x0mmmmxxxxxxxx11110011111Rrrrr"
344
345 def compute_result(self): # pylint: disable=arguments-differ
346 spec_reg = int(self.data["x"], 2)
347 dest_reg = f"r{int(self.data['m'], 2)}"
348
349 # Reading from CPSR
350 if self.data["R"] == "0":
351 # See special registers constants here:
352 # https://github.com/aquynh/capstone/blob/45bec1a691e455b864f7e4d394711a467e5493dc/arch/ARM/ARMInstPrinter.c#L1654
353 if spec_reg == 8:
354 # We move the SP and call it a day.
355 src = self.get("sp", Type.int_32)
356 self.put(src, dest_reg)
357 elif spec_reg == 16:
358 src = self.get("primask", Type.int_32)
359 self.put(src, dest_reg)
360 else:
361 log.debug(
362 "[thumb] FIXME: tMRS at %#x is using the unsupported special register %#x. "
363 "Ignoring the instruction.",
364 self.addr,
365 spec_reg,
366 )
367 else:
368 log.debug("[thumb] tMRS at %#x is reading from SPSR. Ignoring the instruction. FixMe.", self.addr)
369 log.debug("[thumb] Ignoring tMRS instruction at %#x.", self.addr)
370 log.debug(
371 "[thumb] Spotting an tMRS instruction at %#x. This is not fully tested. Prepare for errors.", self.addr
372 )
373
374
375class Instruction_tDMB(ThumbInstruction):
376 name = "DMB"
377 bin_format = "100011110101xxxx1111001110111111"
378
379 def compute_result(self): # pylint: disable=arguments-differ
380 # TODO haha lol yeah right
381 log.debug("[thumb] Ignoring DMB instruction at %#x.", self.addr)
382
383
384class Instruction_WFI(ThumbInstruction):
385 name = "WFI"
386 bin_format = "10111111001a0000"
387 # 1011111100110000
388
389 def compute_result(self): # pylint: disable=arguments-differ
390 log.debug("[thumb] Ignoring WFI instruction at %#x.", self.addr)
391
392
393class ARMSpotter(GymratLifter):
394 arm_instrs = [
395 Instruction_MRC,
396 Instruction_MCR,
397 Instruction_MSR,
398 Instruction_MRS,
399 Instruction_STM,
400 Instruction_LDM,
401 Instruction_STC,
402 Instruction_LDC,
403 Instruction_CDP,
404 ]
405 thumb_instrs = [
406 Instruction_tCPSID,
407 Instruction_tMSR,
408 Instruction_tMRS,
409 Instruction_WFI,
410 Instruction_tDMB,
411 Instruction_STC_THUMB,
412 Instruction_LDC_THUMB,
413 ]
414
415 def __init__(self, arch: Arch, addr: int):
416 super().__init__(arch, addr)
417 self.thumb: bool = False
418
419 def _lift(self):
420 if self.irsb.addr & 1:
421 # Thumb!
422 self.instrs = self.thumb_instrs
423 self.thumb = True
424 else:
425 self.instrs = self.arm_instrs
426 self.thumb = False
427 super()._lift()