Package rekall :: Package plugins :: Package common :: Module profile_index
[frames] | no frames]

Source Code for Module rekall.plugins.common.profile_index

  1  # Rekall Memory Forensics 
  2  # Copyright 2014 Google Inc. All Rights Reserved. 
  3  # 
  4  # This program is free software; you can redistribute it and/or modify 
  5  # it under the terms of the GNU General Public License as published by 
  6  # the Free Software Foundation; either version 2 of the License, or (at 
  7  # your option) any later version. 
  8  # 
  9  # This program is distributed in the hope that it will be useful, but 
 10  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 11  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
 12  # General Public License for more details. 
 13  # 
 14  # You should have received a copy of the GNU General Public License 
 15  # along with this program; if not, write to the Free Software 
 16  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
 17  # 
 18   
 19  """This module implements profile indexing. 
 20   
 21  Rekall relies on accurate profiles for reliable analysis of memory artifacts. We 
 22  depend on selecting the correct profile from the profile repository, but 
 23  sometimes it's hard to determine the exact profile to use. The profile 
 24  repository has index files that are used to lookup the correct profile quickly, 
 25  based on a limited set of symbols and offsets that are known, or can be easily 
 26  detected, about the image. 
 27  """ 
 28   
 29  __author__ = ( 
 30      "Michael Cohen <scudette@google.com>", 
 31      "Adam Sindelar <adamsh@google.com>", 
 32      "Jordi Sanchez <nop@google.com>" 
 33  ) 
 34   
 35  import hashlib 
 36  from rekall import obj 
 37  from rekall_lib import utils 
38 39 40 -class IndexProfileLoader(obj.ProfileSectionLoader):
41 name = "$INDEX" 42
43 - def LoadIntoProfile(self, session, profile, index):
44 profile.LoadIndex(index) 45 return profile
46
47 48 -class Index(obj.Profile):
49 """A profile which contains an index to locate other profiles.""" 50 index = None 51 base_offset = 0 52 53 PERFECT_MATCH = 1.0 54 GOOD_MATCH = 0.75 55
56 - def LoadIndex(self, index):
57 self.index = index
58
59 - def copy(self):
60 result = super(Index, self).copy() 61 result.index = self.index.copy() 62 63 return result
64
65 - def _TestSymbols(self, address_space, offset, possible_values):
66 """Match any of the possible_values at offset. 67 68 Return True if there is a match. 69 """ 70 for value in possible_values: 71 value = value.decode("hex") 72 data = address_space.read(offset, len(value)) 73 if value == data: 74 return data
75
76 - def _TestProfile(self, address_space, image_base, profile, symbols, 77 minimal_match=1):
78 """Match _all_ the symbols against this data.""" 79 count_matched = 0 80 count_unmatched = 0 81 82 for offset, possible_values in symbols: 83 # The possible_values can be a single string which means there is 84 # only one option. If it is a list, then any of the symbols may 85 # match at this offset to be considered a match. 86 if isinstance(possible_values, basestring): 87 possible_values = [possible_values] 88 89 # If the offset is not mapped in we can not compare it. Skip it. 90 offset_to_check = image_base + offset 91 if address_space.vtop(offset_to_check) == None: 92 continue 93 94 match = self._TestSymbols( 95 address_space=address_space, 96 offset=offset_to_check, 97 possible_values=possible_values) 98 99 if match: 100 self.session.logging.debug( 101 "%s matched offset %#x+%#x=%#x (%r)", 102 profile, offset, image_base, offset+image_base, match) 103 count_matched += 1 104 105 else: 106 # FIXME: We get here if the comparison point does not match - 107 # does it make sense to allow some points to not match? Should 108 # we consider these a failure to match? 109 count_unmatched += 1 110 111 # Require at least this many comparison points to be matched. 112 if count_matched < minimal_match: 113 return 0 114 115 if count_matched > 0: 116 self.session.logging.debug( 117 "%s matches %d/%d comparison points", 118 profile, count_matched, count_matched + count_unmatched) 119 120 return float(count_matched) / (count_matched + count_unmatched) 121 122 return 0
123
124 - def IndexHits(self, image_base, address_space=None, minimal_match=1):
125 if address_space == None: 126 address_space = self.session.GetParameter("default_address_space") 127 for profile, symbols in self.index.iteritems(): 128 match = self._TestProfile( 129 address_space=address_space, 130 image_base=image_base, 131 profile=profile, 132 minimal_match=minimal_match, 133 symbols=symbols) 134 135 yield match, profile
136
137 - def LookupIndex(self, image_base, address_space=None, minimal_match=1):
138 partial_matches = [] 139 for match, profile in self.IndexHits(image_base, address_space, 140 minimal_match=minimal_match): 141 if match == self.PERFECT_MATCH: 142 # Yield perfect matches right away. 143 yield (profile, self.PERFECT_MATCH) 144 145 elif match > 0: 146 # Imperfect matches will be saved and returned in order of 147 # accuracy. 148 partial_matches.append((match, profile)) 149 150 partial_matches.sort(reverse=True) 151 for match, profile in partial_matches: 152 yield (profile, match)
153
154 155 -class SymbolOffsetIndex(Index):
156 """A specialized index that works on symbols-offsets.""" 157
158 - def __init__(self, *args, **kwargs):
159 super(SymbolOffsetIndex, self).__init__(*args, **kwargs) 160 if not self.index: 161 self.index = {}
162 163 @utils.safe_property
164 - def hashes(self):
165 return self.index.get("$HASHES", {})
166 167 @utils.safe_property
168 - def traits(self):
169 return self.index.get("$TRAITS", {})
170 171 @utils.safe_property
172 - def profiles(self):
173 return self.index.get("$PROFILES", {})
174 175 @utils.safe_property
176 - def duplicates(self):
177 return [p for p in self.index.get("$PROFILES") if p not in self.hashes]
178
179 - def LookupProfile(self, symbols):
180 """Returns which profiles in the index match a dict of symbols. 181 182 Returns: 183 A list of tuples of (profile, num_matched_traits). 184 """ 185 profiles = [] 186 try: 187 relative_symbols = self.RelativizeSymbols(symbols.copy()) 188 except ValueError as e: 189 self.session.logging.debug(str(e)) 190 return [] 191 192 for profile, traits in self.traits.iteritems(): 193 matched_traits = 0 194 195 for trait in traits: 196 # A trait is a list of symbol-offset tuples. 197 match = all([relative_symbols.get(symbol) == offset 198 for (symbol, offset) in trait 199 if isinstance(symbol, basestring)]) 200 if match: 201 matched_traits += 1 202 203 if matched_traits > 0: 204 profiles.append((profile, matched_traits)) 205 return profiles
206
207 - def LookupHash(self, profile_hash):
208 """Returns the profile with hash profile_hash.""" 209 return self.hashes.get(profile_hash)
210 211 @classmethod
212 - def FilterSymbols(cls, symbols):
213 """Filters a dict of symbols, discarding irrelevant ones.""" 214 return symbols
215 216 @classmethod
217 - def CalculateRawProfileHash(cls, profile):
218 """Calculates a hash of a list of symbols.""" 219 220 # Skip superfluous symbols. 221 symbols = profile["$CONSTANTS"] 222 ordered_symbol_list = sorted( 223 ["(%s, %d)" % (k, v) 224 for (k, v) in cls.FilterSymbols(symbols).iteritems()]) 225 226 hasher = hashlib.sha256() 227 hasher.update("|".join(ordered_symbol_list)) 228 return hasher.hexdigest()
229 230 @classmethod
231 - def CalculateRawSymbolsHash(cls, profile):
232 """Calculates a hash of a list of symbols.""" 233 234 # Skip superfluous symbols. 235 symbols = profile["$CONSTANTS"] 236 ordered_symbol_list = sorted(symbols.keys()) 237 hasher = hashlib.sha256() 238 hasher.update("|".join(ordered_symbol_list)) 239 return hasher.hexdigest()
240
241 - def ProfileMetadata(self, profile_name):
242 return self.profiles.get(profile_name)
243 244 @classmethod
245 - def ProfileMatchesTrait(cls, profile, trait):
246 """Whether a profile matches another profile's trait. 247 248 A trait is a list of tuples (symbol, offset) that uniquely identify 249 a profile. 250 """ 251 return all([profile.get_constant(t[0]) == t[1] for t in trait])
252 253 @classmethod
254 - def RawProfileMatchesTrait(cls, profile, trait):
255 """Whether a raw profile (JSON) matches another profile's trait. 256 257 A trait is a list of tuples (symbol, offset) that uniquely identify 258 a profile. 259 """ 260 try: 261 return all([profile.get(t[0]) == t[1] for t in trait]) 262 except: 263 return False
264 265 @classmethod
266 - def BuildIndex(cls, hashes=None, traits=None, duplicates=None, spec=None, 267 iomanager=None):
268 """Builds a SymbolOffset index from traits, profiles, hashes and a spec. 269 270 Args: 271 hashes: A dictionary of hash:profile_id. Hashes must be obtained via 272 the SymbolOffsetIndex.CalculateRawProfileHash() method. 273 274 traits: A dictionary of profile_id:traits. Traits are the result 275 of calling the SymbolOffsetIndex.FindTraits() method. 276 277 profiles: A dictionary of profile_id metadata. Profile metadata 278 is obtained via SymbolOffsetIndex.GetProfileMetadata(). 279 280 duplicates: A list of newly found profile ids that are duplicate. 281 """ 282 283 spec = spec or {} 284 metadata = dict(Type="Index", 285 ProfileClass=spec.get("implementation", cls.__name__), 286 BaseSymbol=spec.get("base_symbol")) 287 288 hashes = hashes or {} 289 traits = traits or {} 290 # Assert all profiles that have hashes have traits as well 291 if not all([profile in hashes.values() for profile in traits]): 292 raise ValueError("Not all profiles with traits have hashes") 293 294 # Assert all profiles that have traits have hashes as well 295 if not all([profile in traits for profile in hashes.values()]): 296 raise ValueError("Not all profiles with hashes have traits") 297 298 profiles = dict([(profile_id, 299 cls.GetProfileMetadata( 300 iomanager=iomanager, profile_id=profile_id)) 301 for profile_id in traits]) 302 303 duplicates = duplicates or [] 304 for duplicate_profile in duplicates: 305 profiles[duplicate_profile] = cls.GetProfileMetadata( 306 iomanager=iomanager, profile_id=duplicate_profile) 307 308 index = { 309 "$METADATA": metadata, 310 "$INDEX": { 311 "$TRAITS": traits or {}, 312 "$PROFILES": profiles or {}, 313 "$HASHES": hashes or {}, 314 } 315 } 316 317 return index
318 319 @classmethod
320 - def GetProfileMetadata(cls, iomanager=None, profile_id=None):
321 profile_metadata = dict() 322 file_mtime = iomanager.Metadata(profile_id)["LastModified"] 323 profile_metadata["LastModified"] = file_mtime 324 return profile_metadata
325
326 - def __len__(self):
327 return len(self.traits)
328
329 - def __iter__(self):
330 """Yields tuples of profile_id, traits. 331 332 Each trait is a list of tuples of (symbol, offset) that make this 333 profile unique within the repository. 334 """ 335 for profile, traits in self.index.get("$TRAITS").iteritems(): 336 yield profile, traits
337
338 - def RelativizeSymbols(self, symbols, base_symbol=None):
339 """Modifies a dict of symbols so its offsets relative to base_symbol. 340 If no base_symbol is provided and the index itself doesn't define one 341 then returns the symbols as is. 342 343 Args: 344 symbols: A dictionary of symbol:value 345 base_symbol: The name of the symbol to base others' values on. 346 """ 347 348 if not base_symbol: 349 base_symbol = self.metadata("BaseSymbol") 350 351 if not base_symbol: 352 return symbols 353 354 base_value = symbols.get(base_symbol) 355 if not base_value: 356 raise ValueError("Symbol %s not found in profile", base_symbol) 357 new_symbols = symbols.copy() 358 for symbol, value in new_symbols.iteritems(): 359 new_symbols[symbol] = value - base_value 360 return new_symbols
361
362 363 -class LinuxSymbolOffsetIndex(SymbolOffsetIndex):
364 """Specialized symbol-offset index for linux.""" 365 366 @classmethod
367 - def FilterSymbols(cls, symbols):
368 """Filters a dict of symbols, discarding irrelevant ones.""" 369 return dict([(k, v) for (k, v) in symbols.iteritems() 370 if not "." in k and k != "__irf_end"])
371 372 @classmethod
373 - def BuildIndex(cls, hashes=None, traits=None, duplicates=None, spec=None, 374 iomanager=None):
375 index = super(LinuxSymbolOffsetIndex, cls).BuildIndex( 376 hashes=hashes, traits=traits, spec=spec, duplicates=duplicates, 377 iomanager=iomanager) 378 # By default, we'll calculate KASLR from linux_proc_banner which is 379 # present on all kernels. 380 spec = spec or {} 381 index["$METADATA"]["BaseSymbol"] = spec.get("base_symbol", 382 "linux_proc_banner") 383 return index
384