1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 __author__ = "Michael Cohen <scudette@gmail.com>"
20 import yara
21
22 from rekall import plugin
23 from rekall import scan
24
25 from rekall.plugins import yarascanner
26 from rekall.plugins.common import pfn
27 from rekall.plugins.tools import yara_support
28 from rekall.plugins.windows import common
29 from rekall.plugins.windows import pagefile
30
31 from rekall_lib import utils
32
33
34 -class WinYaraScan(yarascanner.YaraScanMixin, common.WinScanner):
35 """Scan using yara signatures."""
36
37 scanner_defaults = dict(
38 scan_physical=True
39 )
40
41
42 -class ContextBuffer(object):
43 """A class to manage hits and create contiguous context buffers."""
44
45 - def __init__(self, session):
46 self._context_cache = utils.FastStore(max_size=10000)
47 self.last_pfn_id = -1
48 self.last_context_list = None
49 self.hits_by_context = {}
50 self.session = session
51 self.address_space = session.physical_address_space
52
53 - def _add_hit_offset(self, context_list, string_name, original_offset,
54 value):
55 for context in context_list:
56 hits_by_context_dict = self.hits_by_context.setdefault(context, {})
57
58 if string_name not in hits_by_context_dict:
59 hits_by_context_dict[string_name] = (original_offset,
60 value.encode("base64"))
61
62 - def add_hit(self, string_name, hit_offset, value):
63 pfn_id = hit_offset >> 12
64 if pfn_id == self.last_pfn_id:
65 if self.last_context_list is not None:
66 self._add_hit_offset(
67 self.last_context_list, string_name, hit_offset, value)
68 else:
69 self.last_pfn_id = pfn_id
70 self.last_context_list = self.get_contexts(pfn_id << 12)
71 if self.last_context_list:
72 self._add_hit_offset(
73 self.last_context_list, string_name, hit_offset, value)
74 else:
75 self.session.logging.debug(
76 "No process context for hit at %#x", hit_offset)
77
79 """Yields pseudo_data for each context containing all hits."""
80
81 pad = "\xFF" * 10
82 for context, hits_dict in self.hits_by_context.iteritems():
83 data = []
84 data_len = 0
85
86 omap = {}
87 for hit_offset, encoded_value in hits_dict.itervalues():
88 omap[data_len] = hit_offset
89
90 value = encoded_value.decode("base64")
91
92 data.append(value)
93 data.append(pad)
94
95 data_len += len(value) + len(pad)
96 yield context, omap, "".join(data)
97
99 """Get the process owner from the physical address.
100
101 We could use the ptov() or rammap() plugin but this is a very fast
102 implementation which only cares about the identity of the owner.
103 """
104 pfn_id = address >> 12
105 try:
106 return self._context_cache.Get(pfn_id)
107 except KeyError:
108 pass
109
110
111
112 pfn_database = self.session.profile.get_constant_object(
113 "MmPfnDatabase")
114 pfn_obj = pfn_database[pfn_id]
115
116 if pfn_obj.IsPrototype:
117
118 pte_address = pfn_obj.PteAddress.v()
119 try:
120
121 return self._context_cache.Get(pte_address >> 12)
122 except KeyError:
123 descriptor = pagefile.WindowsFileMappingDescriptor(
124 session=self.session, pte_address=pte_address)
125
126 owners = [x[0] for x in descriptor.get_owners()]
127 self._context_cache.Put(pte_address >> 12, owners)
128 return owners
129
130
131
132 p_addr = address
133 pfns = []
134
135 for _ in range(4):
136 pfn_id = p_addr >> 12
137 try:
138 owners = self._context_cache.Get(pfn_id)
139 for pfn_id in pfns:
140 self._context_cache.Put(pfn_id, owners)
141
142 return owners
143 except KeyError:
144 pass
145
146 pfn_obj = pfn_database[pfn_id]
147 pfns.append(pfn_id)
148
149
150 pte = pfn_obj.PteAddress
151
152
153 p_addr = ((pfn_obj.u4.PteFrame << 12) |
154 (pte.v() & 0xFFF))
155
156
157 descriptor = pagefile.WindowsDTBDescriptor(
158 session=self.session, dtb=p_addr & ~0xFFF)
159
160 owners = [descriptor.owner()]
161 for pfn_id in pfns:
162 self._context_cache.Put(pfn_id, owners)
163
164 return owners
165
166 - def get_contexts(self, offset):
167 """Get some context about this offset.
168
169 We use this context to group similar yara hits into logical groups.
170
171 Returns:
172 a list of things which can be used as contexts - i.e. they are unique
173 for all pages common within this context. Pages will be grouped by
174 these contexts and evaluated together.
175 """
176 owners = self.process_owners_from_physical_address(offset)
177 if not owners:
178 return []
179
180 return [x.obj_offset for x in owners]
181
182
184 """An experimental yara scanner over the physical address space.
185
186 Yara does not provide a streaming interface, which means that when we scan
187 for yara rules we can only ever match strings within the same buffer. This
188 is a problem for physical address space scanning because each page (although
189 it might appear to be contiguous) usually comes from a different
190 process/mapped file.
191
192 Therefore we need a more intelligent way to apply yara signatures on the
193 physical address space:
194
195 1. The original set of yara rules is converted into a single rule with all
196 the strings from all the rules in it. The rule has a condition "any of them"
197 which will match any string appearing in the scanned buffer.
198
199 2. This rule is then applied over the physical address space.
200
201 3. For each hit we derive a context and add the hit to the context.
202
203 4. Finally we test all the rules within the same context with the original
204 rule set.
205 """
206
207 name = "yarascan_physical"
208
209 table_header = [
210 dict(name="Owner", width=20),
211 dict(name="Rule", width=10),
212 dict(name="Offset", style="address"),
213 dict(name="HexDump", hex_width=16, width=67),
214 dict(name="Context"),
215 ]
216
217 __args = [
218 dict(name="hits", default=10, type="IntParser",
219 help="Quit after finding this many hits."),
220
221 dict(name="yara_expression",
222 help="If provided we scan for this yara "
223 "expression specified in the yara DSL."),
224
225 dict(name="yara_ast",
226 help="If provided we scan for this yara "
227 "expression specified in the yara JSON AST."),
228
229 dict(name="start", default=0, type="IntParser",
230 help="Start searching from this offset."),
231
232 dict(name="limit", default=2**64, type="IntParser",
233 help="The length of data to search."),
234
235 dict(name="context", default=0x40, type="IntParser",
236 help="Context to print after the hit."),
237
238 dict(name="pre_context", default=0, type="IntParser",
239 help="Context to print before the hit."),
240 ]
241
242 scanner_defaults = dict(
243 scan_physical=True
244 )
245
290
292 address_space = self.session.physical_address_space
293 for buffer_as in scan.BufferASGenerator(
294 self.session, address_space,
295 self.plugin_args.start,
296 self.plugin_args.start + self.plugin_args.limit):
297 self.session.report_progress(
298 "Scanning buffer %#x->%#x (%#x)",
299 buffer_as.base_offset, buffer_as.end(),
300 buffer_as.end() - buffer_as.base_offset)
301 for match in self.unified_rule.match(data=buffer_as.data):
302 for buffer_offset, string_name, value in sorted(match.strings):
303 hit_offset = buffer_offset + buffer_as.base_offset
304 self.context_buffer.add_hit(string_name, hit_offset, value)
305
306
307 it = self.context_buffer.get_combined_context_buffers()
308 for context, original_offset_map, pseudo_data in it:
309 seen = set()
310 self.session.report_progress(
311 "Scanning pseudo buffer of length %d" % len(pseudo_data))
312
313 for match in self.rules.match(data=pseudo_data):
314 self.session.report_progress()
315
316 dedup_key = (match.rule, context)
317 if dedup_key in seen:
318 continue
319
320 seen.add(dedup_key)
321 for buffer_offset, _, value in match.strings:
322 hit_offset = original_offset_map.get(buffer_offset)
323 if hit_offset is not None:
324 if isinstance(context, int):
325 owner = self.session.profile._EPROCESS(context)
326 else:
327 owner = context
328
329 yield dict(
330 Owner=owner,
331 Rule=match.rule,
332 Offset=hit_offset,
333 HexDump=utils.HexDumpedString(
334 address_space.read(
335 hit_offset - self.plugin_args.pre_context,
336 self.plugin_args.context +
337 self.plugin_args.pre_context)),
338 Context=pfn.PhysicalAddressContext(
339 self.session, hit_offset)
340 )
341