Package rekall :: Module type_generator
[frames] | no frames]

Source Code for Module rekall.type_generator

  1  # Rekall Memory Forensics 
  2  # 
  3  # Copyright 2014 Google Inc. All Rights Reserved. 
  4  # 
  5  # Authors: 
  6  # Michael Cohen <scudette@google.com> 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or (at 
 11  # your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, but 
 14  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
 16  # General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
 21  # 
 22   
 23  """This module generates types automatically by disassembling code. 
 24   
 25  Generally Rekall prefers to use publicly available debugging information for 
 26  building profiles for the different operating systems supported. However, in 
 27  many cases, the symbols we need are not publicly available. 
 28   
 29  We can, in theory distribute hand written profiles, for each OS version but this 
 30  has a number of disadvantages: 
 31   
 32  - It is very time intensive to prepare hand written profiles for each version of 
 33    the operating system. 
 34   
 35  - It is not possible for us to test all possible versions (The Rekall profile 
 36    repository has currently hundreds of windows kernel builds - we would need to 
 37    maintain the profiles for each of these versions, and add new profiles each 
 38    time a hotfix is released. 
 39   
 40  - We also need to document how we arrive at these offsets in order for others to 
 41    verify our finding. It is important for a forensic application to be as 
 42    transparent as possible without "magic numbers" in code. 
 43   
 44  An additional requirement is that the disassembly process be data driven. This 
 45  way we can update the exact algorithm within the Rekall profile repository if an 
 46  error or bug is discovered without needing to update all current users of the 
 47  tool. 
 48   
 49   
 50  How does it work? 
 51  ================= 
 52   
 53  The DynamicProfile profile is loaded from the profile repository as normal. Like 
 54  in a regular profile, the profile is defined by a json object. A DynamicProfile 
 55  however, contains an additional section $DYNAMIC_STRUCT, which will be compiled 
 56  into overlays. 
 57   
 58   
 59  { 
 60   "$METADATA": { 
 61     ..... 
 62   } 
 63   
 64   "$DYNAMIC_STRUCT": { 
 65     "_TCP_LISTENER": { 
 66       "Owner": [ 
 67         ["Disassembler", { 
 68           "start": "tcpip.sys!_TcpCovetNetBufferList", 
 69           "rule": [ 
 70             "MOV EAX, [ESI+$out]", 
 71             "TEST EAX, EAX", 
 72             "PUSH EAX", 
 73             "CALL DWORD *__imp__PsGetProcessId", 
 74           ], 
 75           target="unsigned int" 
 76         }] 
 77        ] 
 78      } 
 79    } 
 80  } 
 81  """ 
 82   
 83  __author__ = "Michael Cohen <scudette@gmail.com>" 
 84   
 85  import re 
 86   
 87  from rekall import registry 
 88  from rekall import obj 
 89  from rekall import utils 
 90   
 91   
92 -class DynamicParser(object):
93 """A dynamic profile processor base class.""" 94 95 __metaclass__ = registry.MetaclassRegistry 96
97 - def calculate(self, session):
98 """Returns the expected value or a NoneObject.""" 99 _ = session 100 return obj.NoneObject("No value found")
101 102
103 -class Disassembler(DynamicParser):
104 """A constant generator deriving values based on the disassembler.""" 105
106 - def __init__(self, session=None, name=None, start=None, end=None, 107 length=300, rules=None, max_separation=10):
108 """Derive a value from disassembly. 109 110 Args: 111 start: Where to start disassembly (Usually a symbol name). 112 end: Where to stop disassembly. 113 114 length: If end is not specified, we disassemble at most this many 115 bytes. 116 117 rules: A list of rules (see above). 118 """ 119 self.session = session 120 self.text_rules = rules 121 self.rules = self.CompileRule(rules) 122 self.start = start 123 self.length = length 124 self.end = end 125 self.name = name 126 self.cached_value = None 127 self.max_separation = max_separation
128
129 - def __str__(self):
130 return "Disassemble %s" % self.start
131
132 - def CompileRule(self, rule):
133 """Convert the rule into a regular expression. 134 135 Rules are a list of patterns. Each pattern corresponds to a single 136 instruction. There can be an arbitrary number of instructions between 137 each rule. 138 139 Output is captured using $out (this can only be specified once). Wild 140 cards are denoted by *. Wildcards only apply across a single instruction 141 (and comment). The following is an example of a rule: 142 143 MOV EAX, [ESI+$out] 144 TEST EAX, EAX 145 PUSH EAX 146 CALL DWORD *__imp__PsGetProcessId 147 """ 148 # Sanitize all regular expression chars in the rule. 149 result = [] 150 for line in rule: 151 # Escape regex sensitive chars. 152 line = re.sub(r"([()\[\]\+])", r"\\\1", line) 153 154 # Wildcards 155 line = re.sub(r"\*", r".+?", line) 156 157 # Capture variable. The same capture variable may be specified more 158 # than once in the same rule, so we need to append the instance 159 # number of the capture variable to make it unique. 160 self.instance = 0 161 def _ReplaceCaptureVars(match): 162 self.instance += 1 163 return r"(?P<%s_%s>[^ \[\]+-]+)" % ( 164 match.group(1), self.instance)
165 166 line = re.sub(r"\$([a-zA-Z0-9]+)", _ReplaceCaptureVars, line) 167 result.append(re.compile(line, re.S | re.M)) 168 169 return result
170
171 - def calculate(self, session):
172 if self.cached_value is not None: 173 return self.cached_value 174 175 self.cached_value = self._calculate(session) 176 return self.cached_value
177
178 - def _FindRuleIndex(self, line):
179 for i, rule in enumerate(self.rules): 180 # At every line we check if the current rule can be matched - if 181 # it can then it is a better match. 182 m = rule.search(line) 183 if m: 184 yield i, m.groupdict()
185
186 - def _CheckCaptureVariables(self, vector, contexts):
187 """Checks that capture variables are consistent in the vector. 188 189 The vector is a list of disassembly lines which match the rules, e.g. 190 191 [16, 60, 61] 192 193 The context is the capture variables from these rules. In order 194 to be valid, the capture variables must all be consistent. For 195 example the following is not consistent (since var1 is RAX in 196 the first rule and RCX in the second rule): 197 198 contexts[16] 199 {'var1': u'RAX'} 200 201 contexts[60] 202 {'var1': u'RCX', 'out': u'0x88'} 203 204 contexts[61] 205 {} 206 """ 207 result = {} 208 for rule_number, item in enumerate(vector): 209 rule_context = contexts[rule_number] 210 # The capture variables in this rule only. 211 rule_capture_vars_values = {} 212 213 for k, v in rule_context[item].iteritems(): 214 var_name = k.rsplit("_", 1)[0] 215 216 # If this var is previously known, this match must be the same 217 # as previously found. 218 if var_name in result and v != result[var_name]: 219 return 220 221 # If this capture variable's value is the same as another 222 # capture variable's value in the same rule, exclude the 223 # match. This means that an expression like: 224 # 225 # MOV $var2, [$var1+$out] 226 # 227 # Necessarily implies that $var1 and $var2 must be different 228 # registers. 229 if (v in rule_capture_vars_values and 230 rule_capture_vars_values[v] != var_name): 231 return 232 233 result[var_name] = v 234 rule_capture_vars_values[v] = var_name 235 236 return result
237
238 - def _GetMatch(self, hits, contexts):
239 """Find the first vector that matches all the criteria.""" 240 for vector in self.GenerateVector(hits, [], 0): 241 context = self._CheckCaptureVariables(vector, contexts) 242 if not context: 243 continue 244 245 return (vector, context) 246 247 return [], {}
248
249 - def GenerateVector(self, hits, vector, level):
250 for item in hits.get(level, []): 251 if vector: 252 if item < vector[-1]: 253 continue 254 255 if item > self.max_separation + vector[-1]: 256 break 257 258 new_vector = vector + [item] 259 260 if level + 1 == len(hits): 261 yield new_vector 262 263 elif level + 1 < len(hits): 264 for result in self.GenerateVector( 265 hits, new_vector, level+1): 266 267 yield result
268
269 - def _calculate(self, session):
270 # Try to cache disassembly to speed things up. 271 try: 272 disassembler_cache = self.session.GetParameter( 273 "disassembler_cache", utils.FastStore()) 274 275 disassembly = disassembler_cache.Get( 276 (self.start, self.length, self.end)) 277 278 except KeyError: 279 disassembly = unicode(session.plugins.dis( 280 offset=self.start, branch=True, 281 length=self.length, end=self.end)) 282 283 disassembler_cache.Put( 284 (self.start, self.length, self.end), disassembly) 285 286 self.session.SetCache("disassembler_cache", disassembler_cache) 287 288 hits = {} 289 contexts = {} 290 291 disassembly = disassembly.splitlines() 292 for hit, line in enumerate(disassembly): 293 for rule_idx, context in self._FindRuleIndex(line): 294 hits.setdefault(rule_idx, []).append(hit) 295 contexts.setdefault(rule_idx, {})[hit] = context 296 297 # All the hits must match 298 if len(hits) < len(self.rules): 299 self.session.logging.error("Failed to find match for %s", self.name) 300 301 # Add some debugging messages here to make diagnosing errors easier. 302 for i, rule in enumerate(self.text_rules): 303 if i not in hits: 304 self.session.logging.debug("Unable to match rule: %s", rule) 305 306 return 0 307 308 vector, context = self._GetMatch(hits, contexts) 309 310 if len(vector) < len(self.rules): 311 self.session.logging.error("Failed to find match for %s.", 312 self.name) 313 return 0 314 315 self.session.logging.debug("Found match for %s", self.name) 316 for x in vector: 317 self.session.logging.debug(disassembly[x]) 318 319 return int(context.get("out", "0"), 0)
320 321
322 -class DynamicProfile(obj.Profile):
323 """A Dynamic profile which parses its overlays from $DYNAMIC_STRUCT."""
324 325 326 327
328 -def GenerateOverlay(session, dynamic_definition):
329 """Parse the definition and generate an overlay from it.""" 330 overlay = {} 331 for type_name, definition in dynamic_definition.items(): 332 type_overlay = {} 333 overlay[type_name] = [None, type_overlay] 334 335 for field_name, attempts in definition.items(): 336 parsers = [] 337 for (parser_name, kwargs) in attempts: 338 kwargs = kwargs.copy() 339 target = kwargs.pop("target", None) 340 target_args = kwargs.pop("target_args", {}) 341 name = "%s.%s" % (type_name, field_name) 342 343 parsers.append(DynamicParser.classes.get(parser_name)( 344 session=session, name=name, **kwargs)) 345 346 # Make the offset a callable 347 # Bind parameters in lambda: 348 # pylint: disable=dangerous-default-value,cell-var-from-loop 349 def offset_cb(x, parsers=parsers, field_name=field_name): 350 for p in parsers: 351 result = p.calculate(x.obj_session) 352 if result: 353 return result 354 else: 355 session.logging.debug( 356 "Unable to find %s.%s via %s", x.obj_name, 357 field_name, p) 358 359 return 0
360 361 type_overlay[field_name] = [offset_cb, [target, target_args]] 362 363 return overlay 364