Package rekall :: Package plugins :: Package common :: Module address_resolver
[frames] | no frames]

Source Code for Module rekall.plugins.common.address_resolver

  1  # Rekall Memory Forensics 
  2  # Copyright 2014 Google Inc. All Rights Reserved. 
  3  # 
  4  # This program is free software; you can redistribute it and/or modify 
  5  # it under the terms of the GNU General Public License as published by 
  6  # the Free Software Foundation; either version 2 of the License, or (at 
  7  # your option) any later version. 
  8  # 
  9  # This program is distributed in the hope that it will be useful, but 
 10  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 11  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
 12  # General Public License for more details. 
 13  # 
 14  # You should have received a copy of the GNU General Public License 
 15  # along with this program; if not, write to the Free Software 
 16  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
 17  # 
 18   
 19  """The module implements the base class for address resolution.""" 
 20   
 21  __author__ = "Michael Cohen <scudette@gmail.com>" 
 22   
 23  import re 
 24   
 25  from rekall import config 
 26  from rekall import obj 
 27  from rekall_lib import utils 
 28   
 29   
 30  config.DeclareOption( 
 31      "--name_resolution_strategies", default=["Module", "Symbol", "Export"], 
 32      group="Interface", type="ChoiceArray", 
 33      choices=["Module", "Symbol", "Export"]) 
 34   
 35   
36 -class Module(object):
37 """A range in the virtual address space which maps an executable. 38 39 Each binary in the address space has its own profile, which knows about 40 symbols within it. This simple class is just a container to facilitate 41 access to the profile that represents this module. 42 43 Within Rekall, each module has a name. Rekall uses a simple syntax to refer 44 to an address in the address space by name (see below). 45 """
46 - def __init__(self, name=None, start=None, end=None, profile=None, 47 session=None):
48 self.name = name 49 self.start = int(start) 50 self.end = int(end) 51 self.profile = profile 52 self.session = session
53
54 - def __str__(self):
55 return "%s: %s" % (self.__class__.__name__, self.name)
56 57
58 -class AddressResolverMixin(object):
59 60 """The basic building block for constructing an address resolver plugin. 61 62 An address resolver maintains a collection of Modules and abstracts access 63 to specific symbol names within the modules. 64 65 Rekall uses a symbolic notation to refer to specific addresses within the 66 address space. The address resolver is responsible for parsing this notation 67 and resolving it to an actual address. 68 69 Rules of symbol syntax 70 ====================== 71 72 The address space is divided into "modules". A module has a name, a start 73 address and an end address. Modules can also contain a profile which knows 74 about symbols related to that module. 75 76 1. Module reference: The start address of a module can be refered to by its 77 name. e.g: "nt", "ntdll", "tcpip". 78 79 2. If a module contains a valid profile, the profile may also know about 80 symbols within the module. We can refer to these 81 symbols. e.g. "nt!MmGetIoSessionState" 82 83 3. If an exact symbol is not found, it can be referred to with an offset 84 from another symbol name. e.g. "nt!MmGetIoSessionState+5FE" (Note 85 integers are given in hex). 86 87 4. If the symbol is preceeded with a "*" - it means that the symbol is a 88 pointer. The address will be read as a pointer and the symbol name will 89 resolve to the address of the pointer's target. 90 91 """ 92 93 __args = [ 94 dict(name="symbol", type="ArrayString", default=[], 95 help="List of symbols to lookup"), 96 ] 97 98 table_header = [ 99 dict(name="Symbol", width=20), 100 dict(name="Offset", width=20, style="address"), 101 ] 102 103 # The name of the plugin. 104 name = "address_resolver" 105 106 # The format of a symbol name. Used by get_address_by_name(). 107 ADDRESS_NAME_REGEX = re.compile( 108 r"(?P<deref>[*])?" # Pointer dereference. 109 110 r"((?P<address>0x[0-9A-Fa-f]+)|" # Alternative - Either an address, or, 111 112 r"(?P<module>[A-Za-z_0-9\.\\]+)" # Module name - can include extension 113 # (.exe, .sys) 114 115 r"!?" # ! separates module name from symbol 116 # name. 117 118 r"(?P<symbol>[^ +-]+)?" # Symbol name. 119 r")" # End alternative. 120 121 r"(?P<op> *[+-] *)?" # Possible arithmetic operator. 122 r"(?P<offset>[0-9a-fA-Fx]+)?") # Possible hex offset. 123
124 - def __init__(self, **kwargs):
125 super(AddressResolverMixin, self).__init__(**kwargs) 126 self.reset()
127
128 - def reset(self):
129 # A ranged collection of Module() objects. 130 self._address_ranges = utils.RangedCollection() 131 132 # A lookup between module names and the Module object itself. 133 self._modules_by_name = {} 134 135 self._initialized = False
136
137 - def NormalizeModuleName(self, module_name):
138 if module_name is not None: 139 module_name = unicode(module_name) 140 module_name = re.split(r"[/\\]", module_name)[-1] 141 142 return module_name.lower()
143
144 - def _EnsureInitialized(self):
145 """Initialize this address resolver."""
146
147 - def AddModule(self, module):
148 self._address_ranges.insert(module.start, module.end, module) 149 if module.name: 150 self._modules_by_name[module.name] = module
151
152 - def _ParseAddress(self, name):
153 """Parses the symbol from Rekall symbolic notation. 154 155 Raises: 156 TypeError if the expression has a syntax error. 157 158 Returns: 159 a dict containing the different components of the expression. 160 """ 161 m = self.ADDRESS_NAME_REGEX.match(name) 162 if m: 163 capture = m.groupdict() 164 if not capture.get("address"): 165 module = capture.get("module") 166 if not module: 167 raise TypeError("Module name not specified.") 168 169 capture["module"] = self.NormalizeModuleName(module) 170 171 if capture["op"] and not (capture["symbol"] or 172 capture["address"] or 173 capture["module"]): 174 raise TypeError("Operator %s must have an operand." % 175 capture["op"]) 176 177 if capture["op"] and not (capture["symbol"] or capture["address"] or 178 capture["module"]): 179 raise TypeError( 180 "Operator %s must operate on a symbol or address." % 181 capture["op"]) 182 183 return capture 184 185 raise TypeError("Unable to parse %r as a symbol name" % name)
186
187 - def modules(self):
188 self._EnsureInitialized() 189 for _, _, module in self._address_ranges: 190 yield module
191
192 - def GetContainingModule(self, address):
193 """Finds the module containing the specified address. 194 195 Returns: 196 A Module() instance. 197 """ 198 self._EnsureInitialized() 199 address = obj.Pointer.integer_to_address(address) 200 201 _, _, module = self._address_ranges.get_containing_range(address) 202 return module
203
204 - def GetModuleByName(self, name):
205 self._EnsureInitialized() 206 return self._modules_by_name.get(self.NormalizeModuleName(name))
207
208 - def GetAllModules(self):
209 self._EnsureInitialized() 210 return self._modules_by_name.values()
211
212 - def get_constant_object(self, name, target=None, **kwargs):
213 """Instantiate the named constant with these args. 214 215 This method is the main entry point for instantiating constants. It is 216 preferred than calling the profile's method of the same name directly 217 since it will be responsible with loading the right profile. 218 """ 219 self._EnsureInitialized() 220 221 # Parse the name 222 components = self._ParseAddress(name) 223 if not components["symbol"]: 224 raise ValueError("No symbol name specified.") 225 226 module = self._modules_by_name.get(components["module"]) 227 if module is not None: 228 # Just delegate to the module's profile. 229 if module.profile: 230 return module.profile.get_constant_object( 231 components["symbol"], target=target, **kwargs) 232 233 return obj.NoneObject("Profile for name %s unknown." % name, log=True)
234
235 - def get_address_by_name(self, name):
236 """Convert the symbol annotated by name to an address.""" 237 self._EnsureInitialized() 238 239 try: 240 return int(name) 241 except (ValueError, TypeError): 242 pass 243 244 if not isinstance(name, basestring): 245 raise TypeError("Name should be a string.") 246 247 module = None 248 components = self._ParseAddress(name) 249 module_name = self.NormalizeModuleName(components["module"]) 250 address = components["address"] 251 if address is not None: 252 address = int(address, 0) 253 # User did not specify an address 254 else: 255 module = self._modules_by_name.get(module_name) 256 if not module: 257 return obj.NoneObject( 258 "No module %s found" % module_name, log=True) 259 260 # Found the module we use its base address 261 address = module.start 262 263 # Search for a symbol in the module. 264 symbol = components["symbol"] 265 if symbol: 266 # Get the profile for this module. 267 if module.profile: 268 address = module.profile.get_constant(symbol, is_address=True) 269 270 else: 271 return obj.NoneObject("No profile found for module", log=True) 272 273 # Support basic offset operations (+/-). 274 op = components["op"] 275 if op: 276 op = op.strip() 277 # Parse the offset as hex or decimal. 278 offset = int(components["offset"], 0) 279 if op == "+": 280 address += offset 281 elif op == "-": 282 address -= offset 283 else: 284 raise TypeError("Operator '%s' not supported" % op) 285 286 # If the symbol was a dereference, we need to read the address from 287 # this offset. 288 if components.get("deref"): 289 try: 290 address = module.profile.Pointer(address).v() 291 except AttributeError: 292 address = self.session.profile.Pointer(address).v() 293 294 return address
295
296 - def format_address(self, address, max_distance=0x1000000):
297 """Format the address as a symbol name. 298 299 This means to try and find the containing module, the symbol within the 300 module or possibly an offset from a known symbol. e.g. 301 302 nt!PspCidTable 303 nt!PspCidTable + 0x10 304 nt + 0x234 305 306 Returns a list of symbol names for the address. The list is empty if the 307 address is not in a containing module if the nearest known symbol is 308 farther than max_distance away. 309 """ 310 self._EnsureInitialized() 311 312 _, symbols = self.get_nearest_constant_by_address( 313 address, max_distance=max_distance) 314 315 return sorted(symbols)
316
317 - def get_nearest_constant_by_address(self, address, max_distance=0x1000000):
318 """Searches for a known symbol at an address lower than this. 319 320 Returns a tuple (nearest_offset, list of symbol names). 321 """ 322 self._EnsureInitialized() 323 324 address = obj.Pointer.integer_to_address(address) 325 symbols = [] 326 module = self.GetContainingModule(address) 327 if not module or not module.name: 328 return (-1, []) 329 330 if module.profile != None: 331 offset, symbols = module.profile.get_nearest_constant_by_address( 332 address) 333 334 # Symbols not found at all, use module name. 335 if not symbols: 336 if address - module.start > max_distance: 337 return (-1, []) 338 339 if address == module.start: 340 return (module.start, [module.name]) 341 342 return (module.start, [ 343 "%s+%#x" % (module.name, address - module.start)]) 344 345 if address - offset > max_distance: 346 return (-1, []) 347 348 # Exact symbols found. 349 if offset == address: 350 return (offset, ["%s!%s" % (module.name, x) for x in symbols]) 351 352 # Approximate symbol found, check if the profile knows its type. 353 for x in symbols: 354 if x in module.profile.constant_types: 355 type_name = self._format_type(module, x, address) 356 if type_name is not None: 357 return (offset, ["%s!%s" % (module.name, type_name)]) 358 359 return (offset, ["%s!%s+%#x" % (module.name, x, address - offset) 360 for x in symbols])
361
362 - def _format_type(self, module, symbol, offset):
363 """Use the type information to format the address within the struct.""" 364 result = symbol 365 member_obj = module.profile.get_constant_object(symbol) 366 367 while offset > member_obj.obj_offset: 368 if isinstance(member_obj, obj.Struct): 369 members = [ 370 getattr(member_obj, x, None) for x in member_obj.members] 371 member_collection = utils.SortedCollection( 372 (x.obj_offset, x) for x in members) 373 374 member_offset, member_below = ( 375 member_collection.get_value_smaller_than(offset)) 376 377 # No member below this offset? 378 if member_offset is None: 379 result += "+%s" % (offset - member_obj.obj_offset) 380 break 381 382 result += ".%s" % member_below.obj_name 383 member_obj = member_below 384 385 elif isinstance(member_obj, obj.Array): 386 # Next lowest index is a whole number of items. 387 item = member_obj[0] 388 next_lowest_index = ( 389 offset - member_obj.obj_offset) / item.obj_size 390 result += "[%s]" % next_lowest_index 391 392 member_obj = member_obj[next_lowest_index] 393 394 else: 395 result += "+%s" % (offset - member_obj.obj_offset) 396 break 397 398 return result
399
400 - def search_symbol(self, pattern):
401 """Searches symbols for the pattern. 402 403 pattern may contain wild cards (*). Note that currently a module name is 404 required. Example pattern: 405 406 nt!Ps* 407 """ 408 self._EnsureInitialized() 409 result = [] 410 411 components = self._ParseAddress(pattern) 412 module_name = self.NormalizeModuleName(components["module"]) 413 if module_name == None: 414 raise RuntimeError( 415 "Module name must be specified for symbol search.") 416 417 module = self._modules_by_name.get(module_name) 418 if module: 419 # Match all symbols. 420 symbol_regex = re.compile(components["symbol"].replace("*", ".*")) 421 if module.profile: 422 for constant in module.profile.constants: 423 if symbol_regex.match(constant): 424 result.append("%s!%s" % (module_name, constant)) 425 426 return result
427
428 - def collect(self):
429 for symbol in self.plugin_args.symbol: 430 yield symbol, self.get_address_by_name(symbol)
431