1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """This module generates types automatically by disassembling code.
24
25 Generally Rekall prefers to use publicly available debugging information for
26 building profiles for the different operating systems supported. However, in
27 many cases, the symbols we need are not publicly available.
28
29 We can, in theory distribute hand written profiles, for each OS version but this
30 has a number of disadvantages:
31
32 - It is very time intensive to prepare hand written profiles for each version of
33 the operating system.
34
35 - It is not possible for us to test all possible versions (The Rekall profile
36 repository has currently hundreds of windows kernel builds - we would need to
37 maintain the profiles for each of these versions, and add new profiles each
38 time a hotfix is released.
39
40 - We also need to document how we arrive at these offsets in order for others to
41 verify our finding. It is important for a forensic application to be as
42 transparent as possible without "magic numbers" in code.
43
44 An additional requirement is that the disassembly process be data driven. This
45 way we can update the exact algorithm within the Rekall profile repository if an
46 error or bug is discovered without needing to update all current users of the
47 tool.
48
49
50 How does it work?
51 =================
52
53 The DynamicProfile profile is loaded from the profile repository as normal. Like
54 in a regular profile, the profile is defined by a json object. A DynamicProfile
55 however, contains an additional section $DYNAMIC_STRUCT, which will be compiled
56 into overlays.
57
58
59 {
60 "$METADATA": {
61 .....
62 }
63
64 "$DYNAMIC_STRUCT": {
65 "_TCP_LISTENER": {
66 "Owner": [
67 ["Disassembler", {
68 "start": "tcpip.sys!_TcpCovetNetBufferList",
69 "rule": [
70 "MOV EAX, [ESI+$out]",
71 "TEST EAX, EAX",
72 "PUSH EAX",
73 "CALL DWORD *__imp__PsGetProcessId",
74 ],
75 target="unsigned int"
76 }]
77 ]
78 }
79 }
80 }
81 """
82
83 __author__ = "Michael Cohen <scudette@gmail.com>"
84
85 import re
86
87 from rekall import registry
88 from rekall import obj
89 from rekall import utils
90
91
101
102
104 """A constant generator deriving values based on the disassembler."""
105
106 - def __init__(self, session=None, name=None, start=None, end=None,
107 length=300, rules=None, max_separation=10):
108 """Derive a value from disassembly.
109
110 Args:
111 start: Where to start disassembly (Usually a symbol name).
112 end: Where to stop disassembly.
113
114 length: If end is not specified, we disassemble at most this many
115 bytes.
116
117 rules: A list of rules (see above).
118 """
119 self.session = session
120 self.text_rules = rules
121 self.rules = self.CompileRule(rules)
122 self.start = start
123 self.length = length
124 self.end = end
125 self.name = name
126 self.cached_value = None
127 self.max_separation = max_separation
128
130 return "Disassemble %s" % self.start
131
133 """Convert the rule into a regular expression.
134
135 Rules are a list of patterns. Each pattern corresponds to a single
136 instruction. There can be an arbitrary number of instructions between
137 each rule.
138
139 Output is captured using $out (this can only be specified once). Wild
140 cards are denoted by *. Wildcards only apply across a single instruction
141 (and comment). The following is an example of a rule:
142
143 MOV EAX, [ESI+$out]
144 TEST EAX, EAX
145 PUSH EAX
146 CALL DWORD *__imp__PsGetProcessId
147 """
148
149 result = []
150 for line in rule:
151
152 line = re.sub(r"([()\[\]\+])", r"\\\1", line)
153
154
155 line = re.sub(r"\*", r".+?", line)
156
157
158
159
160 self.instance = 0
161 def _ReplaceCaptureVars(match):
162 self.instance += 1
163 return r"(?P<%s_%s>[^ \[\]+-]+)" % (
164 match.group(1), self.instance)
165
166 line = re.sub(r"\$([a-zA-Z0-9]+)", _ReplaceCaptureVars, line)
167 result.append(re.compile(line, re.S | re.M))
168
169 return result
170
172 if self.cached_value is not None:
173 return self.cached_value
174
175 self.cached_value = self._calculate(session)
176 return self.cached_value
177
185
187 """Checks that capture variables are consistent in the vector.
188
189 The vector is a list of disassembly lines which match the rules, e.g.
190
191 [16, 60, 61]
192
193 The context is the capture variables from these rules. In order
194 to be valid, the capture variables must all be consistent. For
195 example the following is not consistent (since var1 is RAX in
196 the first rule and RCX in the second rule):
197
198 contexts[16]
199 {'var1': u'RAX'}
200
201 contexts[60]
202 {'var1': u'RCX', 'out': u'0x88'}
203
204 contexts[61]
205 {}
206 """
207 result = {}
208 for rule_number, item in enumerate(vector):
209 rule_context = contexts[rule_number]
210
211 rule_capture_vars_values = {}
212
213 for k, v in rule_context[item].iteritems():
214 var_name = k.rsplit("_", 1)[0]
215
216
217
218 if var_name in result and v != result[var_name]:
219 return
220
221
222
223
224
225
226
227
228
229 if (v in rule_capture_vars_values and
230 rule_capture_vars_values[v] != var_name):
231 return
232
233 result[var_name] = v
234 rule_capture_vars_values[v] = var_name
235
236 return result
237
239 """Find the first vector that matches all the criteria."""
240 for vector in self.GenerateVector(hits, [], 0):
241 context = self._CheckCaptureVariables(vector, contexts)
242 if not context:
243 continue
244
245 return (vector, context)
246
247 return [], {}
248
250 for item in hits.get(level, []):
251 if vector:
252 if item < vector[-1]:
253 continue
254
255 if item > self.max_separation + vector[-1]:
256 break
257
258 new_vector = vector + [item]
259
260 if level + 1 == len(hits):
261 yield new_vector
262
263 elif level + 1 < len(hits):
264 for result in self.GenerateVector(
265 hits, new_vector, level+1):
266
267 yield result
268
270
271 try:
272 disassembler_cache = self.session.GetParameter(
273 "disassembler_cache", utils.FastStore())
274
275 disassembly = disassembler_cache.Get(
276 (self.start, self.length, self.end))
277
278 except KeyError:
279 disassembly = unicode(session.plugins.dis(
280 offset=self.start, branch=True,
281 length=self.length, end=self.end))
282
283 disassembler_cache.Put(
284 (self.start, self.length, self.end), disassembly)
285
286 self.session.SetCache("disassembler_cache", disassembler_cache)
287
288 hits = {}
289 contexts = {}
290
291 disassembly = disassembly.splitlines()
292 for hit, line in enumerate(disassembly):
293 for rule_idx, context in self._FindRuleIndex(line):
294 hits.setdefault(rule_idx, []).append(hit)
295 contexts.setdefault(rule_idx, {})[hit] = context
296
297
298 if len(hits) < len(self.rules):
299 self.session.logging.error("Failed to find match for %s", self.name)
300
301
302 for i, rule in enumerate(self.text_rules):
303 if i not in hits:
304 self.session.logging.debug("Unable to match rule: %s", rule)
305
306 return 0
307
308 vector, context = self._GetMatch(hits, contexts)
309
310 if len(vector) < len(self.rules):
311 self.session.logging.error("Failed to find match for %s.",
312 self.name)
313 return 0
314
315 self.session.logging.debug("Found match for %s", self.name)
316 for x in vector:
317 self.session.logging.debug(disassembly[x])
318
319 return int(context.get("out", "0"), 0)
320
321
323 """A Dynamic profile which parses its overlays from $DYNAMIC_STRUCT."""
324
325
326
327
329 """Parse the definition and generate an overlay from it."""
330 overlay = {}
331 for type_name, definition in dynamic_definition.items():
332 type_overlay = {}
333 overlay[type_name] = [None, type_overlay]
334
335 for field_name, attempts in definition.items():
336 parsers = []
337 for (parser_name, kwargs) in attempts:
338 kwargs = kwargs.copy()
339 target = kwargs.pop("target", None)
340 target_args = kwargs.pop("target_args", {})
341 name = "%s.%s" % (type_name, field_name)
342
343 parsers.append(DynamicParser.classes.get(parser_name)(
344 session=session, name=name, **kwargs))
345
346
347
348
349 def offset_cb(x, parsers=parsers, field_name=field_name):
350 for p in parsers:
351 result = p.calculate(x.obj_session)
352 if result:
353 return result
354 else:
355 session.logging.debug(
356 "Unable to find %s.%s via %s", x.obj_name,
357 field_name, p)
358
359 return 0
360
361 type_overlay[field_name] = [offset_cb, [target, target_args]]
362
363 return overlay
364