1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 """
21 Provides the primitives needed to disassemble code using capstone.
22 """
23
24 import binascii
25 import capstone
26 import re
27 import struct
28
29 from capstone import x86_const
30 from rekall import addrspace
31 from rekall import plugin
32 from rekall import obj
33 from rekall import testlib
34 from rekall_lib import utils
38 __abstract = True
39
40 - def __init__(self, mode, session=None, address_space=None):
47
49 """ Starts disassembly of data """
50
53
56
59
62 """A Decoded instruction."""
63 __abstract = True
64
67 """A capstone decoded instruction."""
68
69
70
71 INSTRUCTIONS = {}
72 REGISTERS = {}
73 OP = {}
74
75 @classmethod
89
90 - def __init__(self, insn, session=None, address_space=None):
102
103 @utils.safe_property
105 if self._operands is not None:
106 return self._operands
107
108 result = []
109
110 if self.insn.id == 0:
111 return result
112
113 for op in self.insn.operands:
114 operand = dict(type=self.OP[op.type], size=op.size)
115 if operand["type"] == "REG":
116 operand["reg"] = self.REGISTERS[op.reg]
117
118 elif operand["type"] == "MEM":
119
120 mem = op.mem
121 operand["base"] = self.REGISTERS[mem.base]
122 operand["disp"] = mem.disp
123 operand["index"] = self.REGISTERS[mem.index]
124 operand["scale"] = mem.scale
125
126 if operand["base"] == "RIP":
127 target = self.insn.address + mem.disp + self.insn.size
128 operand["address"] = target
129 operand["target"] = self._read_target(target, operand)
130
131 self._comment = self.format_indirect(target, op.size)
132
133
134 if not operand["base"] and not operand["index"]:
135 operand["address"] = mem.disp
136 operand["target"] = self._read_target(mem.disp, operand)
137 self._comment = self.format_indirect(mem.disp, op.size)
138
139 elif operand["type"] == "IMM":
140 operand["target"] = operand["address"] = op.imm.real
141 self._comment = ", ".join(self.resolver.format_address(
142 op.imm.real))
143
144 result.append(operand)
145
146
147 self._operands = result
148 return result
149
151 data = self.address_space.read(target, operand["size"])
152 if operand["size"] == 8:
153 return struct.unpack("<Q", data)[0]
154
155 if operand["size"] == 4:
156 return struct.unpack("<I", data)[0]
157
159 """Returns the canonical model of the instruction."""
160 result = dict(mnemonic=self.INSTRUCTIONS[self.insn.id],
161 str="%s %s" % (self.insn.mnemonic, self.insn.op_str),
162 operands=self.operands)
163
164 result["comment"] = self._comment
165 return result
166
167 @utils.safe_property
170
171 @utils.safe_property
174
175 @utils.safe_property
177 canonical = self.GetCanonical()
178 if canonical["comment"]:
179 return "%s (%s)" % (canonical["str"], canonical["comment"])
180 return canonical["str"]
181
182 @utils.safe_property
184 return unicode(binascii.hexlify(self.insn.bytes))
185
206
209
210
212 """Is this instruction a branch?
213
214 e.g. JNE JE JG JLE JL JGE JMP JA JAE JB JBE JO JNO JZ JNZ JS JNS
215 """
216 return self.mnemonic.startswith("j")
217
218 @utils.safe_property
220 if self.mnemonic[0] == "j":
221 operand = self.operands[0]
222 if operand["type"] in ("IMM", "MEM"):
223 return operand.get("address")
224
225
226
227
229 """Match the rule against this instruction."""
230
231 mnemonic = rule.get("mnemonic")
232 if mnemonic and mnemonic != self.INSTRUCTIONS[self.insn.id]:
233 return False
234
235 return self._MatchRule(rule, self.GetCanonical(), context)
236
237 - def _MatchRule(self, rule, instruction, context):
238 if isinstance(rule, dict):
239 for k, v in rule.iteritems():
240 expected = instruction.get(k)
241 if not self._MatchRule(v, expected, context):
242 return False
243 return True
244
245 if isinstance(rule, (list, tuple)):
246 for subrule, subinst in zip(rule, instruction):
247 if subrule and not self._MatchRule(subrule, subinst, context):
248 return False
249
250 return True
251
252 if isinstance(rule, basestring):
253
254 if rule[0] == "$":
255 context[rule] = instruction
256 return True
257
258
259 if isinstance(instruction, basestring) and rule[0] == "~":
260 return re.match(rule[1:], instruction)
261
262 return rule == instruction
263
267 super(Capstone, self).__init__(mode, **kwargs)
268
269 if self.mode == "I386":
270 self.cs = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_32)
271 elif self.mode == "AMD64":
272 self.cs = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64)
273 elif self.mode == "MIPS":
274 self.cs = capstone.Cs(capstone.CS_ARCH_MIPS, capstone.CS_MODE_32 +
275 capstone.CS_MODE_BIG_ENDIAN)
276
277 elif self.mode == "ARM":
278 self.cs = capstone.Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_ARM)
279 else:
280 raise NotImplementedError(
281 "No disassembler available for this arch.")
282
283 self.cs.detail = True
284 self.cs.skipdata_setup = ("db", None, None)
285 self.cs.skipdata = True
286
291
292
293 -class Disassemble(plugin.TypedProfileCommand, plugin.Command):
294 """Disassemble the given offset."""
295
296 __name = "dis"
297
298 __args = [
299 dict(name="offset", type="SymbolAddress", positional=True,
300 help="An offset to disassemble. This can also be the name of "
301 "a symbol with an optional offset. For example: "
302 "tcpip!TcpCovetNetBufferList."),
303
304 dict(name="address_space", type="AddressSpace",
305 help="The address space to use."),
306
307 dict(name="length", type="IntParser",
308 help="The number of instructions (lines) to disassemble."),
309
310 dict(name="end", type="IntParser",
311 help="The end address to disassemble up to."),
312
313 dict(name="mode", default=None,
314 choices=["I386", "AMD64", "MIPS"], type="Choices",
315 help="Disassemble Mode (AMD64 or I386). Defaults to 'auto'."),
316
317 dict(name="branch", default=False, type="Boolean",
318 help="If set we follow all branches to cover all code."),
319
320 dict(name="canonical", default=False, type="Boolean",
321 help="If set emit canonical instructions. These can be used to "
322 "develop signatures."),
323 ]
324
325 table_header = [
326 dict(type="TreeNode", name="address",
327 width=20, child=dict(style="address")),
328 dict(name="rel", style="address", width=5),
329 dict(name="opcode", width=20),
330 dict(name="instruction", width=40),
331 dict(name="comment"),
332 ]
333
355
407
424
425 - def render(self, renderer, **options):
426 """Disassemble code at a given address.
427
428 Disassembles code starting at address for a number of bytes
429 given by the length parameter (default: 128).
430
431 Note: This feature requires capstone, available at
432 http://www.capstone-engine.org/
433
434 The mode is '32bit' or '64bit'. If not supplied, the disassembler
435 mode is taken from the profile.
436 """
437 if self.plugin_args.canonical:
438 return self.render_canonical(renderer, **options)
439
440 return super(Disassemble, self).render(renderer, **options)
441
443 self._visited.clear()
444
445 offset = None
446 for depth, instruction in self.disassemble(self.offset):
447 offset = instruction.address
448
449 relative = None
450 resolver = self.session.address_resolver
451 if resolver:
452 (f_offset, f_names) = resolver.get_nearest_constant_by_address(
453 offset)
454
455 f_name = ", ".join(f_names)
456 self.session.report_progress(
457 "Disassembled %s: 0x%x", f_name, offset)
458
459 if offset - f_offset == 0:
460 yield dict(
461 address="------ %s ------\n" % f_name,
462 annotation=True)
463
464 if offset - f_offset < 0x1000:
465 relative = offset - f_offset
466
467 yield dict(address=instruction.address,
468 rel=relative,
469 opcode=instruction.hexbytes,
470 instruction=instruction.op_str,
471 comment=instruction.comment, depth=depth)
472
473
474
475 self.offset = offset
476
479 PARAMETERS = dict(
480
481
482 commandline=("dis --length %(length)s %(func)s "
483 "--name_resolution_strategies Export"),
484 func=0x805031be,
485 length=20
486 )
487
488
489 -class Function(obj.BaseAddressComparisonMixIn, obj.BaseObject):
490 """A base object representing code snippets."""
491
492 - def __init__(self, mode=None, args=None, **kwargs):
515
517 return self.obj_offset
518
520 return self.obj_offset + hash(self.obj_vm)
521
523 if self.mode == "AMD64":
524 format_string = "%0#14x %s"
525 else:
526 format_string = "%0#10x %s"
527
528 result = []
529 for instruction in self.disassemble():
530 result.append(format_string % (
531 instruction.address, instruction.text))
532
533 return "\n".join(result)
534
537
539 for i, x in enumerate(self.disassemble()):
540 if i == item:
541 return x
542
543 - def Rewind(self, length=0, align=True):
544 """Returns another function which starts before this function.
545
546 If align is specified, we increase the length repeatedly until the
547 new function disassebles exactly to the same offset of this
548 function.
549 """
550 while 1:
551 offset = self.obj_offset - length
552 result = self.obj_profile.Function(vm=self.obj_vm, offset=offset)
553 if not align:
554 return result
555
556 for instruction in result.disassemble(instructions=length):
557
558 if instruction.address == self.obj_offset:
559 return result
560
561
562 if instruction.address > self.obj_offset:
563 length += 1
564 break
565
567 """Generate some instructions."""
568 count = 0
569 buffer_offset = offset = self.obj_offset
570 while 1:
571
572 data = self.obj_vm.read(buffer_offset, 0x2000)
573
574 for instruction in self.dis.disassemble(data, buffer_offset):
575 offset = instruction.address
576
577
578
579
580 if offset - buffer_offset > 0x1000:
581 buffer_offset = offset
582 break
583
584 yield instruction
585 count += 1
586
587 if count > instructions:
588 return
589
590 buffer_offset = offset
591
592
593
594 obj.Profile.COMMON_CLASSES["Function"] = Function
595