rekall.plugins.tools.profile

1 #!/usr/bin/env python2 2 3 # Rekall Memory Forensics 4 # Copyright 2013 Google Inc. All Rights Reserved. 5 # 6 # Author: Michael Cohen scudette@google.com 7 # 8 # This program is free software; you can redistribute it and/or modify 9 # it under the terms of the GNU General Public License as published by 10 # the Free Software Foundation; either version 2 of the License, or (at 11 # your option) any later version. 12 # 13 # This program is distributed in the hope that it will be useful, but 14 # WITHOUT ANY WARRANTY; without even the implied warranty of 15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 # General Public License for more details. 17 # 18 # You should have received a copy of the GNU General Public License 19 # along with this program; if not, write to the Free Software 20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 21 # 22 23 """ 24 Converts Volatility profile files into the Rekall format. 25 26 The Volatility profiles are derived by dumping debugging symbols using various 27 means into a zip file: 28 29 - On Linux the output of dwarfdump is stored and parsed on each execution. The 30 constants are just copied from the System map. 31 32 - On OSX the symbols are produced using the dsymutil tool while the vtypes are 33 python files. 34 35 - On Windows the vtypes are python files which must be executed. 36 37 Rekall profiles are more structured. All profiles contain a metadata file within 38 the zip archive called "metadata" which simply contains key value pairs encoded 39 using json. For example: 40 41 { 42 # This must point at the implementation of this profile (i.e. the class which 43 # should be created). Valid values include Linux32, Linux64, WinXPSP1x86 44 # etc. You can use the 'info' plugin to see which classes already exist. 45 46 "ProfileClass": "Linux64" 47 48 # This is the name of a member inside this zip file which contains the 49 # constant list. 50 51 "Constants": "System.map.json" 52 53 # This points at a json file within this zip file which contains the vtype 54 # definitions for this profile. 55 56 "VTypes": "vtypes.json" 57 } 58 59 We chose to use json to store the vtype data structures because loading json 60 files in python is extremely quick and leads to much faster start up times than 61 having to parse the vtypes in other formats (We do not allow loading of vtypes 62 inside python files because this may lead to arbitrary code execution since the 63 vtype file needs to be evaluated.). 64 65 Often users already have profiles created for Volatility which they want to use 66 in Rekall. Rather than fall back to the slow and inefficient parsing of these 67 profiles, Rekall allows users to convert the old profile into a new, efficient 68 profile representation. This is what this module does with the convert command. 69 70 For example, suppose you have an existing profile created for use in Volatility, 71 you can just convert it to the rekall format: 72 73 ./tools/profile_converter.py convert Ubuntu-3.0.0-32-generic-pae.zip \ 74 Ubuntu-3.0.0-32-generic-pae.rekall.json 75 76 $ ls -l Ubuntu-3.0.0-32-generic-pae.* 77 -rw-r----- 1 scudette g 643711 Dec 12 02:12 Ubuntu-3.0.0-32-generic-pae.rekall.json 78 -rw-r----- 1 scudette g 726480 Dec 12 00:30 Ubuntu-3.0.0-32-generic-pae.zip 79 80 Now simply specify the rekall profile using the --profile command line arg. 81 """ 82 83 __author__ = ( 84 "Michael Cohen <scudette@google.com>", 85 "Jordi Sanchez <nop@google.com>" 86 ) 87 88 import gzip 89 import itertools 90 import json 91 import os 92 import re 93 import StringIO 94 95 from rekall import io_manager 96 from rekall import obj 97 from rekall import plugin 98 from rekall import testlib 99 100 from rekall.plugins.common import profile_index 101 from rekall.plugins.overlays.linux import dwarfdump 102 from rekall.plugins.overlays.linux import dwarfparser 103 from rekall.plugins.windows import common 104 105 from rekall_lib import registry 106 from rekall_lib import utils

107 108 109 -class ProfileConverter(object):

110 """Base class for converters.""" 111 112 __metaclass__ = registry.MetaclassRegistry 113 __abstract = True 114

115 - def __init__(self, input, profile_class=None, session=None):

116 self.input = input 117 self.session = session 118 self.profile_class = profile_class

119

120 - def SelectFile(self, regex):

121 """Reads the content of the first file which matches regex.""" 122 for f in self.input.ListFiles(): 123 if re.search(regex, f, re.I): 124 return self.input.Open(f).read()

125

126 - def BuildProfile(self, system_map, vtypes, config=None):

127 _ = config 128 # Sorting the json keys usually achieves much smaller file size due to 129 # better compression. Its worth doing it once on conversion. 130 result = { 131 "$METADATA": dict(ProfileClass=self.profile_class, 132 Type="Profile", Version=1), 133 "$CONSTANTS": system_map, 134 "$STRUCTS": vtypes 135 } 136 137 return result

138

139 - def Convert(self):

140 raise RuntimeError("Unknown profile format.")

141

142 143 -class LinuxConverter(ProfileConverter):

144 """Convert an existing Linux profile zip file. 145 146 Since building the linux profile often happens on the target system, where 147 Rekall is not normall running, we just convert the result of running Make in 148 the tools/linux/ directory. See tools/linux/README for details. 149 150 In short: 151 152 - Run make in tools/linux/ directory. This will build module_dwarf.ko with 153 debugging symbols. 154 155 - If you have zip installed, the above step will create the required zip 156 file. Otherwise Create a zip file manually with module_dwarf.ko and 157 /boot/System.map-`uname -r` (Sometimes when running make not as the root 158 user, its not possible to read the System.map file). 159 160 Finally use this tool to convert the profile to a Rekall compatible profile. 161 """ 162 BASE_PROFILE_CLASS = "Linux" 163

164 - def ParseSystemMap(self, system_map):

165 """Parse the system map and return a list of offset, symbol_name.""" 166 sys_map = {} 167 # get the system map 168 for line in system_map.splitlines(): 169 try: 170 (address, _, symbol) = line.strip().split() 171 sys_map[symbol] = long(address, 16) & 0xFFFFFFFFFFFF 172 except ValueError: 173 pass 174 175 return sys_map

176

177 - def ParseConfigFile(self, config_file):

178 """Parse the kernel .config file returning it as a dictionary.""" 179 config = {} 180 for line in config_file.splitlines(): 181 if line.startswith("#"): 182 continue 183 try: 184 (config_param, value) = line.strip().split("=") 185 # Remove leading and trailing spaces from the config_param. 186 config_param = config_param.lstrip(" \t").rstrip(" \t") 187 # Massage the value a bit so plugins trying to use them get more 188 # useful values. This deals with config options like 189 # CONFIG_DEFAULT_HOSTNAME="(none)" having a value of 190 # str("(none)") instead of str("\"(none)\""). 191 value = value.rstrip(" \t").lstrip(" \t") 192 value = value.rstrip('"\'').lstrip('"\'') 193 config[config_param] = value 194 except ValueError: 195 pass 196 197 return config

198

199 - def BuildProfile(self, system_map, vtypes, config=None):

200 """Write all the components needed for the output profile.""" 201 # Try to guess the bit size of the system if not provided. 202 if self.profile_class is None: 203 self.profile_class = self.BASE_PROFILE_CLASS 204 205 enums = vtypes.pop("$ENUMS", {}) 206 reverse_enums = vtypes.pop("$REVENUMS", {}) 207 208 result = super(LinuxConverter, self).BuildProfile(system_map, vtypes) 209 result["$CONFIG"] = config or dict() 210 result["$ENUMS"] = enums 211 result["$REVENUMS"] = reverse_enums 212 213 self.profile_class = self.BASE_PROFILE_CLASS 214 largest_offset = max(system_map.values()) 215 if "CONFIG_CPU_MIPS32" in result["$CONFIG"]: 216 result["$METADATA"]["arch"] = "MIPS" 217 218 elif "arm_syscall" in result["$CONSTANTS"]: 219 result["$METADATA"]["arch"] = "ARM" 220 221 elif largest_offset > 2**32: 222 result["$METADATA"]["arch"] = "AMD64" 223 else: 224 result["$METADATA"]["arch"] = "I386" 225 return result

226

227 - def Convert(self):

228 # Load the config file if it exists 229 config = self.SelectFile("(^|/)config") 230 if config: 231 config = self.ParseConfigFile(config) 232 233 # Check for a linux profile. It should have a System.map in it. 234 system_map = self.SelectFile("(^|/)System.map") 235 if system_map: 236 # Parse the system map file. 237 system_map = self.ParseSystemMap(system_map) 238 239 ko_file = self.SelectFile(r"module.*\.ko$") 240 if ko_file: 241 self.session.logging.info( 242 "Converting Linux profile with ko module.") 243 parser = dwarfparser.DWARFParser(StringIO.StringIO(ko_file), 244 session=self.session) 245 246 profile_file = self.BuildProfile(system_map, parser.VType(), 247 config=config) 248 return profile_file 249 250 dwarf_file = self.SelectFile(r"\.dwarf$") 251 if dwarf_file: 252 self.session.logging.info( 253 "Converting Linux profile with dwarf dump output") 254 parser = dwarfdump.DWARFParser() 255 for line in dwarf_file.splitlines(): 256 parser.feed_line(line) 257 258 # The dwarfdump module returns python code so we must exec it. 259 l = {} 260 exec(parser.print_output(), {}, l) 261 262 profile_file = self.BuildProfile(system_map, l["linux_types"], 263 config=config) 264 return profile_file 265 266 raise RuntimeError("Unknown profile format.")

267

268 269 -class OSXConverter(LinuxConverter):

270 """Automatic conversion from Volatility OSX style profiles. 271 272 You can generate one of those using the instructions here: 273 http://code.google.com/p/volatility/wiki/MacMemoryForensics#Building_a_Profile 274 """ 275 BASE_PROFILE_CLASS = "Darwin" 276 277 DLSYM_REGEX = re.compile("([^ ]+) '([^ ]+)'$") 278

279 - def ParseSystemMap(self, system_map):

280 sys_map = {} 281 for line in system_map.splitlines(): 282 if self.profile_class is None and "Symbol table for" in line: 283 last_part = line.split()[-1] 284 if last_part == "(x86_64)": 285 self.profile_class = "Darwin64" 286 elif last_part == "(i386)": 287 self.profile_class = "Darwin32" 288 else: 289 raise RuntimeError( 290 "Unknown Darwin Architecture %s" % last_part) 291 292 # We only care about few things like functions and global symbols. 293 if "N_FUN" in line or "EXT" in line or "N_STSYM" in line: 294 m = self.DLSYM_REGEX.search(line) 295 if m: 296 try: 297 sys_map[m.group(2)] = long(m.group(1), 16) 298 except ValueError: 299 pass 300 301 return sys_map

302

303 - def Convert(self):

304 # Check for an OSX profile. 305 system_map = self.SelectFile("dsymutil$") 306 if system_map: 307 # Parse the system map file. 308 system_map = self.ParseSystemMap(system_map) 309 310 vtype_file = self.SelectFile(r"\.vtypes$") 311 if vtype_file: 312 self.session.logging.info( 313 "Converting Darwin profile with vtypes dump output") 314 315 # The dwarfdump module returns python code so we must exec it. 316 l = {} 317 exec(vtype_file, {}, l) 318 319 profile_file = self.BuildProfile(system_map, l["mac_types"]) 320 return profile_file 321 322 raise RuntimeError("Unknown profile format.")

323

324 325 -class ConvertProfile(plugin.TypedProfileCommand, plugin.Command):

326 """Convert a profile from another program to the Rekall format. 327 328 The Rekall profile format is optimized for loading at runtime. This plugin 329 produces a Rekall profile from a variety of sources, including: 330 331 - Linux debug compiled kernel module (see tool/linux/README) 332 - OSX Dwarfdump outputs. 333 """ 334 335 __name = "convert_profile" 336 337 __args = [ 338 dict(name="profile_class", 339 help="The name of the profile implementation to specify. " 340 "If not specified, we autodetect."), 341 342 dict(name="converter", 343 help="The name of the converter to use. " 344 "If not specified autoguess."), 345 346 dict(name="source", positional=True, required=True, 347 help="Filename of profile to read."), 348 349 dict(name="out_file", positional=True, required=True, 350 help="Path for output file."), 351 ] 352

353 - def ConvertProfile(self, input):

354 """Converts the input profile to a new standard profile in output.""" 355 # First detect what kind of profile the input profile is. 356 for converter in (LinuxConverter, OSXConverter): 357 try: 358 profile = converter(input, session=self.session).Convert() 359 return profile 360 except RuntimeError: 361 pass 362 363 raise RuntimeError( 364 "No suitable converter found - profile not recognized.")

365

366 - def render(self, renderer):

367 if self.plugin_args.converter: 368 cls = ProfileConverter.classes.get(self.plugin_args.converter) 369 if not cls: 370 raise IOError( 371 "Unknown converter %s" % self.plugin_args.converter) 372 373 return cls(self.plugin_args.source, 374 profile_class=self.plugin_args.profile_class).Convert() 375 376 try: 377 input = io_manager.Factory( 378 self.plugin_args.source, session=self.session, mode="r") 379 except IOError: 380 self.session.logging.critical( 381 "Input profile file %s could not be opened.", 382 self.plugin_args.source) 383 return 384 385 with input: 386 profile = self.ConvertProfile(input) 387 if profile: 388 with renderer.open( 389 filename=self.plugin_args.out_file, mode="wb") as output: 390 output.write(utils.PPrint(profile)) 391 self.session.logging.info("Converted %s to %s", 392 input, output.name)

393

394 395 -class TestConvertProfile(testlib.DisabledTest):

396 PARAMETERS = dict(commandline="convert_profile")

397

398 399 -class TestBuildIndex(testlib.DisabledTest):

400 PARAMETERS = dict(commandline="build_index")

401

402 403 -class BuildIndex(plugin.Command):

404 """Generate a profile index file based on an index specification. 405 406 The index specification is currently a yaml file with the following 407 structure: 408 409 ``` 410 base_symbol: (string) # OPTIONAL Compute ALL offsets as relative to this 411 symbol. This includes MaxOffset and MinOffset. 412 symbols: (array of dicts) # A list of symbols to index. 413 - 414 name: (string) # Symbol name 415 data: (string) # Data that should be at the symbol's offset 416 shift: (int) # OPTIONAL Adjust symbol offset by this number 417 ``` 418 419 ## Example: 420 421 ``` 422 path: win32k.sys 423 symbols: 424 - 425 # The name of the symbol we test for. 426 name: "??_C@_1BO@KLKIFHLC@?$AAG?$AAU?$AAI?$AAF?$AAo?$AAn?$AAt?$AA?4?$AAH?$AAe?$AAi?$AAg?$AAh?$AAt?$AA?$AA@" 427 428 # The data we expect to find at that offset. 429 data: "47005500490046006f006e0074002e00480065006900670068007400" 430 431 - 432 name: "wcschr" 433 shift: -1 434 data: "90" 435 ``` 436 437 The result is an index profile. This has an $INDEX section which is a dict, 438 with keys being the profile name, and values being a list of (offset, match) 439 tuples. For example: 440 441 ``` 442 { 443 "$INDEX": { 444 "tcpip.sys/AMD64/6.0.6001.18000/0C1A1EC1D61E4508A33F5212FC1B37202": [[1184600, "495053656344656c657465496e626f756e644f7574626f756e64536150616972"]], 445 "tcpip.sys/AMD64/6.0.6001.18493/29A4DBCAF840463298F40190DD1492D02": [[1190376, "495053656344656c657465496e626f756e644f7574626f756e64536150616972"]], 446 "tcpip.sys/AMD64/6.0.6002.18272/7E79532FC7E349C690F5FBD16E3562172": [[1194296, "495053656344656c657465496e626f756e644f7574626f756e64536150616972"]], 447 ... 448 449 "$METADATA": { 450 "ProfileClass": "Index", 451 "Type": "Profile" 452 "MaxOffset": 546567 453 "MinOffset": 0 454 } 455 } 456 ``` 457 """ 458 459 __name = "build_index" 460 461 @classmethod

462 - def args(cls, parser):

463 super(BuildIndex, cls).args(parser) 464 parser.add_argument( 465 "spec", default=None, 466 help="An Index specification file.") 467 468 parser.add_argument( 469 "--root", default="./", 470 help="Repository root path.")

471

472 - def __init__(self, spec=None, root="./", manager=None, **kwargs):

473 super(BuildIndex, self).__init__(**kwargs) 474 self.spec = spec 475 if manager is None: 476 manager = io_manager.DirectoryIOManager( 477 root, session=self.session) 478 self.io_manager = manager

479 480 @staticmethod

481 - def _decide_base(data, base_symbol):

482 if base_symbol == None: 483 return 0 484 485 return data["$CONSTANTS"].get(base_symbol, None)

486

487 - def ValidateDataIndex(self, index):

488 """Check the index for collisions. 489 490 An index collision occurs when all the comparison points in one GUID are 491 also contained in another GUID. If these points match it is impossible 492 to distinguish between the two indexes. We need to issue a warning so 493 the user can add additional comparison points to resolve the ambiguity. 494 """ 495 errors = 0 496 497 # The following algorithm is very slow O(n^2) but there aren't that many 498 # profiles in the index. 499 for profile, data in index.iteritems(): 500 for profile2, data2 in index.iteritems(): 501 overlap = [] 502 503 # Don't report collisions with the same profile. 504 if profile == profile2: 505 continue 506 507 for condition in data: 508 if condition in data2: 509 overlap.append(condition) 510 511 if overlap == data: 512 # Some profiles are just rebuilt (so they have a new GUID) 513 # but they are otherwise identical. We can never distinguish 514 # between them so it does not matter. 515 if self._AreProfilesEquivalent(profile, profile2): 516 continue 517 518 errors += 1 519 self.session.logging.error( 520 "Profile %s and %s are ambiguous, please add more " 521 "comparison points.", profile, profile2) 522 523 self.session.logging.error( 524 "Run the following command:\nzdiff %s.gz %s.gz", 525 profile, profile2) 526 527 if errors: 528 self.session.logging.error("Index with errors: %s", errors)

529

530 - def _AreProfilesEquivalent(self, profile, profile2):

531 # Check if the two profiles are equivalent: 532 profile_obj = self.io_manager.GetData(profile) 533 profile2_obj = self.io_manager.GetData(profile2) 534 535 for section in ["$CONSTANTS", "$FUNCTIONS"]: 536 if profile_obj.get(section) != profile2_obj.get(section): 537 return False 538 539 self.session.logging.info("Profile %s and %s are equivalent", 540 profile, profile2) 541 return True

542

543 - def BuildDataIndex(self, spec):

544 """Builds a data index from the specification. 545 546 A data index is an index which collates known data at known offsets 547 in memory. We then apply the index to a memory location to discover 548 the most likely match there. 549 """ 550 index = {} 551 metadata = dict(Type="Profile", 552 ProfileClass="Index") 553 554 result = {"$METADATA": metadata, 555 "$INDEX": index} 556 557 highest_offset = 0 558 lowest_offset = 2 ** 64 559 base_sym = spec.get("base_symbol", None) 560 561 for relative_path, data in self._GetAllProfiles(spec["path"]): 562 for sym_spec in spec["symbols"]: 563 shift = sym_spec.get("shift", 0) 564 565 if "$CONSTANTS" not in data: 566 continue 567 568 offset = data["$CONSTANTS"].get(sym_spec["name"]) 569 if offset is None: 570 # Maybe its a function. 571 offset = data["$FUNCTIONS"].get(sym_spec["name"]) 572 if offset is None: 573 continue 574 575 # Offsets (as well as min/max offset) are computed 576 # relative to base. 577 base = self._decide_base( 578 data=data, 579 base_symbol=base_sym) 580 581 # If we got a base symbol but it's not in the constants 582 # then that means this profile is incompatible with this 583 # index and should be skipped. 584 if base == None: 585 continue 586 587 # We don't record the offset as reported by the profile 588 # but as the reader is actually going to use it. 589 offset = offset + shift - base 590 591 values = [] 592 # If a symbol's expected value is prefixed with 593 # 'str:' then that means it was given to us as 594 # human-readable and we need to encode it. Otherwise it 595 # should already be hex-encoded. 596 for value in sym_spec["data"]: 597 if value.startswith("lstr:"): 598 value = value[5:].encode("utf-16le").encode("hex") 599 600 elif value.startswith("str:"): 601 value = value[4:].encode("hex") 602 603 else: 604 try: 605 value.decode("hex") 606 except TypeError: 607 raise ValueError( 608 "String %r must be encoded in hex, " 609 "or prefixed by str: or lstr:" % value) 610 611 values.append(value) 612 613 index.setdefault(relative_path, []).append((offset, values)) 614 615 # Compute the lowest and highest offsets so the reader 616 # can optimize reading the image. 617 lowest_offset = min(lowest_offset, offset) 618 highest_offset = max( 619 highest_offset, offset + len(sym_spec["data"])) 620 621 metadata["BaseSymbol"] = base_sym 622 metadata["MaxOffset"] = highest_offset 623 metadata["MinOffset"] = lowest_offset 624 625 # Make sure to issue warnings if the index is not good enough. 626 self.ValidateDataIndex(index) 627 628 return result

629

630 - def BuildStructIndex(self, spec):

631 """Builds a Struct index from specification. 632 633 A Struct index is a collection of struct offsets for certain members 634 over all available versions. 635 """ 636 index = {} 637 metadata = dict(Type="Profile", 638 ProfileClass=spec.get("implementation", "Index")) 639 640 result = {"$METADATA": metadata, 641 "$INDEX": index} 642 643 for relative_path, data in self._GetAllProfiles(spec["path"]): 644 try: 645 structs = data["$STRUCTS"] 646 except KeyError: 647 continue 648 649 metadata = index[relative_path] = data["$METADATA"] 650 offsets = metadata["offsets"] = {} 651 for struct, fields in spec["members"].items(): 652 for field in fields: 653 try: 654 offsets["%s.%s" % (struct, field)] = ( 655 structs[struct][1][field][0]) 656 except KeyError: 657 continue 658 659 return result

660

661 - def _SymbolIsUnique(self, profile_id, symbol, profiles):

662 """Does symbol uniquely identify profile_id within profiles. 663 664 Args: 665 profile_id: The unique identifier of symbol's profile. 666 symbol: The symbol to test. 667 profiles: A dictionary of profile_id:symbol_dict entries where 668 symbol_dict is a dictionary of symbol:offset entries. 669 670 Every profile in profiles must be unique. That is, two entries must 671 not share the exact same set of symbol:offset pairs. 672 673 """ 674 675 offset = profiles[profile_id].get(symbol) 676 677 # If the symbol doesn't exist it can't be unique 678 if offset is None: 679 return False 680 681 unique = True 682 683 for other_id, other_symbols in profiles.iteritems(): 684 # Skip comparing this profile against itself. 685 if profile_id == other_id: 686 continue 687 688 # Find duplicates 689 if offset == other_symbols.get(symbol): 690 unique = False 691 break 692 693 return unique

694

695 - def _FindNewProfiles(self, index, target):

696 """Finds new profiles in the repository that were not in the index.""" 697 698 new_profiles = 0 699 700 # Walk all files to find new profiles 701 for profile_id in self.io_manager.ListFiles(): 702 if not profile_id.startswith(target): 703 continue 704 705 # Skip known duplicates. 706 # Skip profiles that haven't changed. 707 file_mtime = self.io_manager.Metadata(profile_id)["LastModified"] 708 709 try: 710 profile_mtime = index.ProfileMetadata( 711 profile_id)["LastModified"] 712 713 # If the current file is not fresher than the old file, we 714 # just copy the metadata from the old profile. Allow 1 715 # second grace for float round ups. 716 if profile_mtime+1 >= file_mtime: 717 continue 718 except (KeyError, TypeError): 719 # Profile doesn't exist in the index yet. 720 # See if it was a duplicate. 721 pass 722 723 try: 724 data = self.io_manager.GetData(profile_id) 725 if "$CONSTANTS" not in data: 726 self.session.logging.debug( 727 "File %s doesn't look like a profile, skipping...", 728 profile_id) 729 continue 730 data["$CONSTANTS"] = index.RelativizeSymbols( 731 data["$CONSTANTS"], "linux_proc_banner") 732 # Free up some memory 733 del data["$CONFIG"] 734 del data["$STRUCTS"] 735 except ValueError as e: 736 self.session.logging.error("ERROR loading %s: %s", 737 profile_id, e) 738 continue 739 740 new_profiles += 1 741 self.session.report_progress( 742 "[STEP 1/6] Found %d new profiles: %s", 743 new_profiles, profile_id) 744 yield profile_id, data

745 746

747 - def _FindProfilesWithSymbolOffset(self, symbol_name, symbol_offset, 748 profiles=None):

749 """Returns a set of profile_ids that have symbol_name: symbol_offset.""" 750 matching_profiles = set() 751 for profile_id, symbols in profiles.iteritems(): 752 if symbols.get(symbol_name) == symbol_offset: 753 matching_profiles.add(profile_id) 754 return matching_profiles

755

756 - def _FindTraits(self, profile_id=None, profiles=None, num_traits=1, 757 trait_length=1, first_try_symbols=None):

758 """Finds traits of profile against other_profiles. 759 760 Args: 761 profile_id: The id of the profile to find traits for within profiles 762 profiles: A dict of profile:symbols tuples where symbols is a dict 763 of symbol:value. 764 num_traits: How many traits to find. 765 trait_length: How many symbols to consider per trait. 766 first_try_symbols: A list of symbols to try first. 767 """ 768 found_traits = [] 769 profile_symbols = profiles.get(profile_id) 770 771 # The set we're looking for. 772 exit_set = set([profile_id]) 773 774 # Store a pool of symbols 775 symbol_pool = profile_symbols.keys() 776 if first_try_symbols: 777 # Reorder these symbols so they are tried first 778 for symbol in reversed(first_try_symbols): 779 try: 780 symbol_pool.remove(symbol) 781 except ValueError: 782 pass 783 symbol_pool.insert(0, symbol) 784 785 for trait_symbols in itertools.combinations(symbol_pool, trait_length): 786 787 symbol = trait_symbols[0] 788 offset = profile_symbols.get(symbol) 789 intersection_set = self._FindProfilesWithSymbolOffset( 790 symbol, offset, profiles=profiles) 791 792 for next_symbol in trait_symbols[1:]: 793 next_offset = profile_symbols.get(next_symbol) 794 next_set = self._FindProfilesWithSymbolOffset( 795 next_symbol, next_offset, 796 profiles=profiles) 797 798 # For a trait to be unique, the resulting set of performing 799 # the intersection of the sets of profiles containing the 800 # symbol-offset tuples must be the original profile_id. 801 intersection_set &= next_set 802 803 # If the comparison set is empty, we're done 804 if intersection_set == exit_set: 805 break 806 807 if intersection_set == exit_set: 808 # Found a trait 809 trait = [(s, profile_symbols.get(s)) for s in trait_symbols] 810 found_traits.append(trait) 811 if len(found_traits) == num_traits: 812 break 813 return found_traits

814

815 - def BuildSymbolsIndex(self, spec):

816 """Builds an index to identify profiles based on their symbols-offsets. 817 818 The index stores traits for each profile. A trait is a combination of 819 1 or more symbol-offset pairs that uniquely identify it within the 820 current profile repository. 821 822 The code handles: 823 - Incremental updates of the index. Adding a new profile to the index 824 doesn't trigger recomputing the entire index. 825 - Detection of duplicates. If a profile is to be added that's already 826 in the index, it will be detected and skipped. 827 - Clash detection. If a new profile has some symbol-offsets that were 828 traits of other profiles, the profile whose traits are not unique 829 anymore will be found and its index rebuilt. 830 """ 831 832 directory_to_index = spec.get("path", "Linux") 833 index_path = os.path.join(directory_to_index, "index") 834 835 # Load the current index from the index directory. 836 #index = self.session.LoadProfile(index_path, use_cache=False) 837 index = obj.Profile.LoadProfileFromData( 838 self.io_manager.GetData(index_path), name=index_path, 839 session=self.session) 840 841 # A list of duplicate profiles to update the index 842 new_duplicate_profiles = [] 843 844 845 # If we don't yet have an index, we start with a blank one. 846 if not index: 847 dummy_index = profile_index.LinuxSymbolOffsetIndex.BuildIndex( 848 iomanager=self.io_manager) 849 index = obj.Profile.LoadProfileFromData( 850 data=dummy_index, session=self.session) 851 852 if not isinstance(index, profile_index.SymbolOffsetIndex): 853 raise ValueError( 854 "The index should be a SymbolOffsetIndex but found %s instead" % 855 (index.___class__.__name__)) 856 self.session.logging.debug("Index is a %s", index.__class__.__name__) 857 858 # STEP 1. Find new profiles. New profiles are profiles not in the 859 # index or profiles that have been updated. 860 self.session.report_progress("[STEP 1/6] Finding new profiles...", 861 force=True) 862 new_profile_candidates = list(self._FindNewProfiles(index, 863 spec["path"])) 864 865 # STEP 2. Determine how many of the new profiles are duplicates. 866 # New profiles can be duplicates because they already exist in the index 867 # with another name or because they clash with some other new profile. 868 self.session.report_progress("[STEP 2/6] Finding duplicate profiles...", 869 force=True) 870 new_hashes_dict = dict() 871 new_profiles = dict() 872 for i, (profile_id, data) in enumerate(sorted(new_profile_candidates)): 873 self.session.report_progress( 874 "[STEP 2/6][%d/%d] Finding if %s is duplicate.", 875 i, len(new_profile_candidates), profile_id) 876 profile_hash = index.CalculateRawProfileHash(data) 877 existing_profile = index.LookupHash(profile_hash) 878 879 if existing_profile == profile_id: 880 # This is a profile already in the index that's been updated. 881 # But if the profile still has the same hash, we have to do 882 # nothing as the index is still good. 883 # This wil be the case when touch()ing profiles or probably 884 # copying them over. 885 continue 886 887 # If it's identical to a profile we already have indexed, this is a 888 # duplicate. 889 # 890 # TODO: We should remove the profile and make it a Symlink. 891 if existing_profile: 892 self.session.logging.info( 893 ("New profile %s is equivalent to %s, which is already " 894 "in the index."), 895 profile_id, existing_profile) 896 new_duplicate_profiles.append(profile_id) 897 continue 898 899 # Otherwise it may clash with another new profile. This can easily 900 # happen when we add more than one profile at a time, with minor 901 # version increases. 902 # 903 # Example: Ubuntu Trusty 3.13.0-54-generic vs 3.13.0-55-generic. 904 if profile_hash in new_hashes_dict: 905 # This is a duplicate. Discard. 906 # TODO: Remove the profile and make it a Symlink. 907 self.session.logging.info( 908 "New profile %s is equivalent to another new profile %s.", 909 profile_id, 910 new_hashes_dict.get(profile_hash)) 911 new_duplicate_profiles.append(profile_id) 912 continue 913 914 # If it was not a duplicate, 915 symbols = data.get("$CONSTANTS") 916 symbols = index.FilterSymbols(symbols) 917 new_profiles[profile_id] = symbols 918 new_hashes_dict[profile_hash] = profile_id 919 920 # Inform of how many profiles we skipped indexing. 921 if len(new_profile_candidates) > len(new_profiles): 922 self.session.logging.info( 923 "Skipped indexing %d profiles, since they were duplicates.", 924 len(new_profile_candidates) - len(new_profiles)) 925 926 927 # STEP 3. Find if any of the new profiles forces us to recompute 928 # traits for profiles already in the repository. This can happen if 929 # the trait that's in the index now appears in one of the 930 # new profiles. 931 # 932 # Since we calculate more than one trait per profile the index may 933 # still work for other traits. But we want healthy indexes, so we 934 # recalculate all the traits. 935 936 num_clashing_profiles = 0 937 self.session.report_progress( 938 "[STEP 3/6] Finding index clashes with new profiles", force=True) 939 940 for i, (profile_id, traits_dict) in enumerate(sorted(index)): 941 self.session.report_progress( 942 "[STEP 3/6][%d/%d] Finding index clashes with new profiles", 943 i, len(index)) 944 profile_needs_rebuild = False 945 946 for trait in traits_dict: 947 for new_profile_id, symbols in new_profiles.iteritems(): 948 if index.RawProfileMatchesTrait(symbols, trait): 949 self.session.logging.warn( 950 "New profile %s clashes with %s, will recalculate.", 951 new_profile_id, profile_id) 952 profile_needs_rebuild = True 953 break 954 955 # Leave the loop early if a trait is not unique anymore. 956 if profile_needs_rebuild: 957 break 958 959 if profile_needs_rebuild: 960 num_clashing_profiles += 1 961 data = self.io_manager.GetData(profile_id) 962 data["$CONSTANTS"] = index.RelativizeSymbols( 963 data["$CONSTANTS"]) 964 new_profiles[profile_id] = data["$CONSTANTS"] 965 966 if not new_profiles: 967 self.session.logging.info("No new profiles found. Exitting.") 968 return profile_index.LinuxSymbolOffsetIndex.BuildIndex( 969 hashes=index.hashes, 970 traits=index.traits, 971 spec=spec, 972 duplicates=index.duplicates + new_duplicate_profiles, 973 iomanager=self.io_manager) 974 975 self.session.logging.info( 976 ("Will regenerate an index for %d profiles. %d are new and %d " 977 "were in the index but now have clashes"), 978 len(new_profiles), 979 len(new_profiles) - num_clashing_profiles, 980 num_clashing_profiles) 981 982 # STEP 4. Find unique symbols for all new profiles. We need to open 983 # all the profiles in the repo 984 # additionally to the new ones which we opened earlier. 985 986 self.session.report_progress( 987 "[STEP 4/6] Opening all profiles in the repository.", force=True) 988 # Start by opening all profiles in the index. 989 index_profiles = dict() 990 for i, (profile_id, _) in enumerate(index): 991 self.session.report_progress( 992 "[STEP 4/6][%d/%d] Opening %s...", 993 i, len(index), profile_id) 994 profile = self.io_manager.GetData(profile_id) 995 profile["$CONSTANTS"] = index.RelativizeSymbols( 996 profile["$CONSTANTS"]) 997 # Free up some memory 998 del profile["$STRUCTS"] 999 del profile["$CONFIG"] 1000 symbols = profile.get("$CONSTANTS") 1001 symbols = index.FilterSymbols(symbols) 1002 index_profiles[profile_id] = symbols 1003 1004 all_profiles = index_profiles.copy() 1005 # Any profile that was in the index but has been updated on disk will 1006 # be overriden here, which is what we want. 1007 all_profiles.update(dict(new_profiles)) 1008 1009 self.session.report_progress( 1010 "[STEP 4/6] Finding single-symbol traits.", force=True) 1011 # A list of profiles we haven't found traits for. 1012 retry_profiles = [] 1013 # Maximum number of traits to find. 1014 min_traits = spec.get("min_traits", 5) 1015 self.session.report_progress( 1016 "[STEP 4/6] Finding single-symbol traits. Opening all, done.", 1017 force=True) 1018 1019 # A dictionary of traits per profile_id 1020 traits_dict = dict() 1021 for i, (profile_id, symbols) in enumerate( 1022 sorted(new_profiles.iteritems())): 1023 1024 self.session.report_progress( 1025 "[STEP 4/6][%d/%d] Finding %d traits for %s", 1026 i, len(new_profiles), min_traits, profile_id) 1027 1028 traits = self._FindTraits(profile_id, 1029 profiles=all_profiles, 1030 num_traits=min_traits, 1031 trait_length=1) 1032 traits_dict[profile_id] = traits 1033 1034 if not traits_dict.get(profile_id): 1035 self.session.logging.warning( 1036 "Profile %s has no single-symbol trait.", profile_id) 1037 retry_profiles.append(profile_id) 1038 elif len(traits_dict.get(profile_id)) < min_traits: 1039 self.session.logging.info( 1040 "[STEP 4/6][%d/%d] Found %d/%d traits for %s. Queueing...", 1041 i, len(new_profiles), len(traits), min_traits, 1042 profile_id) 1043 retry_profiles.append(profile_id) 1044 else: 1045 self.session.logging.info( 1046 "[STEP 4/6][%d/%d] Found %d/%d traits for %s", 1047 i, len(new_profiles), len(traits), min_traits, 1048 profile_id) 1049 1050 1051 self.session.report_progress( 1052 "[STEP 5/6] Finding unique 2-symbol traits...", force=True) 1053 1054 # STEP 5. Process the remaining profiles to find unique pairs. 1055 for i, profile_id in enumerate(retry_profiles): 1056 self.session.report_progress( 1057 "[STEP 5/6][%d/%d] Finding unique 2-symbol pairs for %s", 1058 i, len(retry_profiles), profile_id, force=True) 1059 1060 # We have to find only the remaining number of traits to reach 1061 # min_traits. 1062 num_traits_to_find = (min_traits - 1063 len(traits_dict.get(profile_id, []))) 1064 1065 first_try_symbols = None 1066 if len(traits_dict.get(profile_id, [])) == 1: 1067 first_try_symbols = [trait[0] for trait 1068 in traits_dict.get(profile_id)] 1069 1070 traits = self._FindTraits(profile_id, 1071 profiles=all_profiles, 1072 num_traits=num_traits_to_find, 1073 trait_length=2, 1074 first_try_symbols=first_try_symbols) 1075 traits_dict[profile_id] = traits 1076 1077 if traits_dict.get(profile_id) is None: 1078 self.session.logging.error( 1079 "Profile %s has no 2-symbol trait.", profile_id) 1080 else: 1081 self.session.logging.info( 1082 "[STEP 5/6][%d/%d] Found %d/%d 2-symbol traits for %s", 1083 i, len(retry_profiles), 1084 len(traits_dict.get(profile_id, [])), 1085 min_traits, 1086 profile_id) 1087 1088 # LAST STEP: Build the index augmenting the previous index. 1089 self.session.report_progress( 1090 "[STEP 6/6] Building index...", force=True) 1091 new_index_hashes = index.hashes.copy() 1092 new_index_hashes.update(new_hashes_dict) 1093 1094 new_index_traits = index.traits.copy() 1095 new_index_traits.update(traits_dict) 1096 1097 # Update the profile metadata with the new and updated profiles. 1098 new_index_profile_metadata = index.profiles.copy() 1099 for profile_id in new_profiles: 1100 file_mtime = self.io_manager.Metadata(profile_id)["LastModified"] 1101 metadata_dict = new_index_profile_metadata.get(profile_id, {}) 1102 metadata_dict["LastModified"] = file_mtime 1103 1104 return profile_index.LinuxSymbolOffsetIndex.BuildIndex( 1105 hashes=new_index_hashes, 1106 traits=new_index_traits, 1107 duplicates=index.duplicates + new_duplicate_profiles, 1108 spec=spec, 1109 iomanager=self.io_manager)

1110

1111 - def _GetProfile(self, name):

1112 path = "%s.gz" % name 1113 file_data = gzip.open(path).read() 1114 return json.loads(file_data)

1115

1116 - def _GetAllProfiles(self, path):

1117 """Iterate over all paths and get the profiles.""" 1118 for profile_name in self.io_manager.ListFiles(): 1119 if profile_name.startswith(path): 1120 self.session.report_progress("Processing %s", profile_name) 1121 data = self.io_manager.GetData(profile_name) 1122 1123 yield profile_name, data

1124

1125 - def build_index(self, spec):

1126 if spec.get("type") == "struct": 1127 return self.BuildStructIndex(spec) 1128 elif spec.get("type") == "symbol_offset": 1129 return self.BuildSymbolsIndex(spec) 1130 else: 1131 return self.BuildDataIndex(spec)

1132

1133 - def render(self, renderer):

1134 spec = self.io_manager.GetData(self.spec) 1135 renderer.write(utils.PPrint(self.build_index(spec)))

1136

1137 1138 -class BuildProfileLocally(plugin.Command):

1139 """Download and builds a profile locally in one step. 1140 1141 We store the profile in the first repository in the profile_path which must 1142 be writable. Usually this is a caching repository so the profile goes in the 1143 local cache. 1144 """ 1145 1146 name = "build_local_profile" 1147 1148 @classmethod

1149 - def args(cls, parser):

1150 super(BuildProfileLocally, cls).args(parser) 1151 parser.add_argument( 1152 "module_name", 1153 help="The name of the module (without the .pdb extensilon).", 1154 required=True) 1155 1156 parser.add_argument( 1157 "guid", 1158 help="The guid of the module.", 1159 required=False) 1160 1161 parser.add_argument("--dumpfile", 1162 help="If specified also dump the json file here.")

1163

1164 - def __init__(self, module_name=None, guid=None, dumpfile=None, **kwargs):

1165 super(BuildProfileLocally, self).__init__(**kwargs) 1166 self.module_name = module_name 1167 self.guid = guid 1168 self.dumpfile = dumpfile

1169

1170 - def _fetch_and_parse(self, module_name, guid):

1171 """Fetch the profile from the symbol server. 1172 1173 Raises: 1174 IOError if the profile is not found on the symbol server or can not be 1175 retrieved. 1176 1177 Returns: 1178 the profile data. 1179 """ 1180 with utils.TempDirectory() as dump_dir: 1181 pdb_filename = "%s.pdb" % module_name 1182 fetch_pdb_plugin = self.session.plugins.fetch_pdb( 1183 pdb_filename=pdb_filename, 1184 guid=guid, dump_dir=dump_dir) 1185 1186 # Store the PDB file somewhere. 1187 pdb_pathname = os.path.join(dump_dir, pdb_filename) 1188 with open(pdb_pathname, "wb") as outfd: 1189 outfd.write(fetch_pdb_plugin.FetchPDBFile()) 1190 1191 parse_pdb = self.session.plugins.parse_pdb( 1192 pdb_filename=pdb_pathname, 1193 dump_dir=dump_dir) 1194 1195 return parse_pdb.parse_pdb()

1196

1197 - def fetch_and_parse(self, module_name=None, guid=None, renderer=None):

1198 if module_name is None: 1199 module_name = self.module_name 1200 1201 if guid is None: 1202 guid = self.guid 1203 1204 # Allow the user to specify the required profile by name. 1205 m = re.match("([^/]+)/GUID/([^/]+)$", module_name) 1206 if m: 1207 module_name = m.group(1) 1208 guid = m.group(2) 1209 1210 if not guid or not module_name: 1211 raise TypeError("GUID not specified.") 1212 1213 profile_name = "{0}/GUID/{1}".format(module_name.lower(), guid) 1214 1215 # Get the first repository to write to. 1216 repository = self.session.repository_managers[0][1] 1217 if module_name != "nt": 1218 data = self._fetch_and_parse(module_name, guid) 1219 1220 if self.dumpfile: 1221 with renderer.open(filename=self.dumpfile, mode="wb") as fd: 1222 fd.write(utils.PPrint(data)) 1223 1224 return repository.StoreData(profile_name, data) 1225 1226 for module_name in common.KERNEL_NAMES: 1227 if module_name.endswith(".pdb"): 1228 module_name, _ = os.path.splitext(module_name) 1229 try: 1230 data = self._fetch_and_parse(module_name, guid) 1231 self.session.logging.warning( 1232 "Profile %s fetched and built. Please " 1233 "consider reporting this profile to the " 1234 "Rekall team so we may add it to the public " 1235 "profile repository.", profile_name) 1236 1237 return repository.StoreData(profile_name, data) 1238 except IOError, e: 1239 self.session.logging.error("Error: %s", e) 1240 1241 raise IOError("Profile not found")

1242

1243 - def render(self, renderer):

1244 self.fetch_and_parse(self.module_name, self.guid, renderer=renderer)

1245

1246 1247 -class TestBuildProfileLocally(testlib.HashChecker):

1248 PARAMETERS = dict( 1249 commandline=("build_local_profile --module_name=%(pdb_name)s " 1250 "--guid=%(guid)s --dumpfile %(tempdir)s/output"), 1251 pdb_name="ntkrnlpa", 1252 guid="BD8F451F3E754ED8A34B50560CEB08E31" 1253 )

1254

Source Code for Module rekall.plugins.tools.profile_tool