Package rekall :: Package plugins :: Package windows :: Package malware :: Module yarascan
[frames] | no frames]

Source Code for Module rekall.plugins.windows.malware.yarascan

  1  # Rekall Memory Forensics 
  2  # Copyright 2016 Google Inc. All Rights Reserved. 
  3  # 
  4  # This program is free software; you can redistribute it and/or modify 
  5  # it under the terms of the GNU General Public License as published by 
  6  # the Free Software Foundation; either version 2 of the License, or (at 
  7  # your option) any later version. 
  8  # 
  9  # This program is distributed in the hope that it will be useful, but 
 10  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 11  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
 12  # General Public License for more details. 
 13  # 
 14  # You should have received a copy of the GNU General Public License 
 15  # along with this program; if not, write to the Free Software 
 16  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
 17  # 
 18   
 19  __author__ = "Michael Cohen <scudette@gmail.com>" 
 20  import yara 
 21   
 22  from rekall import plugin 
 23  from rekall import scan 
 24   
 25  from rekall.plugins import yarascanner 
 26  from rekall.plugins.common import pfn 
 27  from rekall.plugins.tools import yara_support 
 28  from rekall.plugins.windows import common 
 29  from rekall.plugins.windows import pagefile 
 30   
 31  from rekall_lib import utils 
 32   
 33   
34 -class WinYaraScan(yarascanner.YaraScanMixin, common.WinScanner):
35 """Scan using yara signatures.""" 36 37 scanner_defaults = dict( 38 scan_physical=True 39 )
40 41
42 -class ContextBuffer(object):
43 """A class to manage hits and create contiguous context buffers.""" 44
45 - def __init__(self, session):
46 self._context_cache = utils.FastStore(max_size=10000) 47 self.last_pfn_id = -1 48 self.last_context_list = None 49 self.hits_by_context = {} 50 self.session = session 51 self.address_space = session.physical_address_space
52
53 - def _add_hit_offset(self, context_list, string_name, original_offset, 54 value):
55 for context in context_list: 56 hits_by_context_dict = self.hits_by_context.setdefault(context, {}) 57 58 if string_name not in hits_by_context_dict: 59 hits_by_context_dict[string_name] = (original_offset, 60 value.encode("base64"))
61
62 - def add_hit(self, string_name, hit_offset, value):
63 pfn_id = hit_offset >> 12 64 if pfn_id == self.last_pfn_id: 65 if self.last_context_list is not None: 66 self._add_hit_offset( 67 self.last_context_list, string_name, hit_offset, value) 68 else: 69 self.last_pfn_id = pfn_id 70 self.last_context_list = self.get_contexts(pfn_id << 12) 71 if self.last_context_list: 72 self._add_hit_offset( 73 self.last_context_list, string_name, hit_offset, value) 74 else: 75 self.session.logging.debug( 76 "No process context for hit at %#x", hit_offset)
77
79 """Yields pseudo_data for each context containing all hits.""" 80 81 pad = "\xFF" * 10 82 for context, hits_dict in self.hits_by_context.iteritems(): 83 data = [] 84 data_len = 0 85 # Map the original offset to the dummy buffer offset. 86 omap = {} 87 for hit_offset, encoded_value in hits_dict.itervalues(): 88 omap[data_len] = hit_offset 89 90 value = encoded_value.decode("base64") 91 # Some padding separates out the sigs. 92 data.append(value) 93 data.append(pad) 94 95 data_len += len(value) + len(pad) 96 yield context, omap, "".join(data)
97
98 - def process_owners_from_physical_address(self, address):
99 """Get the process owner from the physical address. 100 101 We could use the ptov() or rammap() plugin but this is a very fast 102 implementation which only cares about the identity of the owner. 103 """ 104 pfn_id = address >> 12 105 try: 106 return self._context_cache.Get(pfn_id) 107 except KeyError: 108 pass 109 110 # Try to find a process that owns this page. This is an optimized 111 # version of the algorithm in the pfn, ptov and rammap plugins. 112 pfn_database = self.session.profile.get_constant_object( 113 "MmPfnDatabase") 114 pfn_obj = pfn_database[pfn_id] 115 # This is a mapped file. 116 if pfn_obj.IsPrototype: 117 # This is the controlling PTE. 118 pte_address = pfn_obj.PteAddress.v() 119 try: 120 # All PTEs in that page are owned by the same owners. 121 return self._context_cache.Get(pte_address >> 12) 122 except KeyError: 123 descriptor = pagefile.WindowsFileMappingDescriptor( 124 session=self.session, pte_address=pte_address) 125 126 owners = [x[0] for x in descriptor.get_owners()] 127 self._context_cache.Put(pte_address >> 12, owners) 128 return owners 129 130 # We only care about the process owner so this is the first half of 131 # pfn.ptov._ptov_x64_hardware_PTE() 132 p_addr = address 133 pfns = [] 134 135 for _ in range(4): 136 pfn_id = p_addr >> 12 137 try: 138 owners = self._context_cache.Get(pfn_id) 139 for pfn_id in pfns: 140 self._context_cache.Put(pfn_id, owners) 141 142 return owners 143 except KeyError: 144 pass 145 146 pfn_obj = pfn_database[pfn_id] 147 pfns.append(pfn_id) 148 149 # The PTE which controls this pfn. 150 pte = pfn_obj.PteAddress 151 152 # The physical address of the PTE. 153 p_addr = ((pfn_obj.u4.PteFrame << 12) | 154 (pte.v() & 0xFFF)) 155 156 # The DTB must be page aligned. 157 descriptor = pagefile.WindowsDTBDescriptor( 158 session=self.session, dtb=p_addr & ~0xFFF) 159 160 owners = [descriptor.owner()] 161 for pfn_id in pfns: 162 self._context_cache.Put(pfn_id, owners) 163 164 return owners
165
166 - def get_contexts(self, offset):
167 """Get some context about this offset. 168 169 We use this context to group similar yara hits into logical groups. 170 171 Returns: 172 a list of things which can be used as contexts - i.e. they are unique 173 for all pages common within this context. Pages will be grouped by 174 these contexts and evaluated together. 175 """ 176 owners = self.process_owners_from_physical_address(offset) 177 if not owners: 178 return [] 179 180 return [x.obj_offset for x in owners]
181 182
183 -class WinPhysicalYaraScanner(common.AbstractWindowsCommandPlugin):
184 """An experimental yara scanner over the physical address space. 185 186 Yara does not provide a streaming interface, which means that when we scan 187 for yara rules we can only ever match strings within the same buffer. This 188 is a problem for physical address space scanning because each page (although 189 it might appear to be contiguous) usually comes from a different 190 process/mapped file. 191 192 Therefore we need a more intelligent way to apply yara signatures on the 193 physical address space: 194 195 1. The original set of yara rules is converted into a single rule with all 196 the strings from all the rules in it. The rule has a condition "any of them" 197 which will match any string appearing in the scanned buffer. 198 199 2. This rule is then applied over the physical address space. 200 201 3. For each hit we derive a context and add the hit to the context. 202 203 4. Finally we test all the rules within the same context with the original 204 rule set. 205 """ 206 207 name = "yarascan_physical" 208 209 table_header = [ 210 dict(name="Owner", width=20), 211 dict(name="Rule", width=10), 212 dict(name="Offset", style="address"), 213 dict(name="HexDump", hex_width=16, width=67), 214 dict(name="Context"), 215 ] 216 217 __args = [ 218 dict(name="hits", default=10, type="IntParser", 219 help="Quit after finding this many hits."), 220 221 dict(name="yara_expression", 222 help="If provided we scan for this yara " 223 "expression specified in the yara DSL."), 224 225 dict(name="yara_ast", 226 help="If provided we scan for this yara " 227 "expression specified in the yara JSON AST."), 228 229 dict(name="start", default=0, type="IntParser", 230 help="Start searching from this offset."), 231 232 dict(name="limit", default=2**64, type="IntParser", 233 help="The length of data to search."), 234 235 dict(name="context", default=0x40, type="IntParser", 236 help="Context to print after the hit."), 237 238 dict(name="pre_context", default=0, type="IntParser", 239 help="Context to print before the hit."), 240 ] 241 242 scanner_defaults = dict( 243 scan_physical=True 244 ) 245
246 - def __init__(self, *args, **kwargs):
247 super(WinPhysicalYaraScanner, self).__init__(*args, **kwargs) 248 try: 249 # The user gave a yara DSL rule. 250 if self.plugin_args.yara_expression: 251 self.rules = yara.compile( 252 source=self.plugin_args.yara_expression) 253 254 self.parsed_rules = yara_support.parse_yara_to_ast( 255 self.plugin_args.yara_expression) 256 257 # User gave a yara AST. 258 elif self.plugin_args.yara_ast: 259 self.parsed_rules = self.plugin_args.yara_ast 260 self.rules = yara.compile( 261 source=yara_support.ast_to_yara(self.parsed_rules)) 262 else: 263 raise plugin.PluginError("A yara expression must be provided.") 264 265 all_strings = [] 266 rule_id = 0 267 for parsed_rule in self.parsed_rules: 268 name = parsed_rule["name"] 269 for k, v in parsed_rule["strings"]: 270 rule_name = "%s_%d_REKALL_%s" % (k, rule_id, name) 271 all_strings.append((rule_name, v)) 272 rule_id += 1 273 274 self.parsed_unified_rule = [ 275 dict(name="XX", 276 strings=all_strings, 277 condition="any of them") 278 ] 279 self.plugin_args.unified_yara_expression = ( 280 yara_support.ast_to_yara(self.parsed_unified_rule)) 281 282 self.unified_rule = yara.compile( 283 source=self.plugin_args.unified_yara_expression) 284 285 self.context_buffer = ContextBuffer(self.session) 286 287 except Exception as e: 288 raise plugin.PluginError( 289 "Failed to compile yara expression: %s" % e)
290
291 - def collect(self):
292 address_space = self.session.physical_address_space 293 for buffer_as in scan.BufferASGenerator( 294 self.session, address_space, 295 self.plugin_args.start, 296 self.plugin_args.start + self.plugin_args.limit): 297 self.session.report_progress( 298 "Scanning buffer %#x->%#x (%#x)", 299 buffer_as.base_offset, buffer_as.end(), 300 buffer_as.end() - buffer_as.base_offset) 301 for match in self.unified_rule.match(data=buffer_as.data): 302 for buffer_offset, string_name, value in sorted(match.strings): 303 hit_offset = buffer_offset + buffer_as.base_offset 304 self.context_buffer.add_hit(string_name, hit_offset, value) 305 306 # Now re-run the original expression on all unique contexts. 307 it = self.context_buffer.get_combined_context_buffers() 308 for context, original_offset_map, pseudo_data in it: 309 seen = set() 310 self.session.report_progress( 311 "Scanning pseudo buffer of length %d" % len(pseudo_data)) 312 # Report any hits of the original sig on this context. 313 for match in self.rules.match(data=pseudo_data): 314 self.session.report_progress() 315 # Only report a single hit of the same rule on the same context. 316 dedup_key = (match.rule, context) 317 if dedup_key in seen: 318 continue 319 320 seen.add(dedup_key) 321 for buffer_offset, _, value in match.strings: 322 hit_offset = original_offset_map.get(buffer_offset) 323 if hit_offset is not None: 324 if isinstance(context, int): 325 owner = self.session.profile._EPROCESS(context) 326 else: 327 owner = context 328 329 yield dict( 330 Owner=owner, 331 Rule=match.rule, 332 Offset=hit_offset, 333 HexDump=utils.HexDumpedString( 334 address_space.read( 335 hit_offset - self.plugin_args.pre_context, 336 self.plugin_args.context + 337 self.plugin_args.pre_context)), 338 Context=pfn.PhysicalAddressContext( 339 self.session, hit_offset) 340 )
341