rekall.plugins.tools.mspdb

1 # Rekall Memory Forensics 2 # Copyright 2014 Google Inc. All Rights Reserved. 3 # 4 # This program is free software; you can redistribute it and/or modify 5 # it under the terms of the GNU General Public License as published by 6 # the Free Software Foundation; either version 2 of the License, or (at 7 # your option) any later version. 8 # 9 # This program is distributed in the hope that it will be useful, but 10 # WITHOUT ANY WARRANTY; without even the implied warranty of 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 # General Public License for more details. 13 # 14 # You should have received a copy of the GNU General Public License 15 # along with this program; if not, write to the Free Software 16 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 # 18 19 # pylint: disable=protected-access 20 21 """These plugins are for manipulating Microsoft PDB file. 22 23 References: 24 https://code.google.com/p/pdbparse/ 25 http://moyix.blogspot.de/2007/10/types-stream.html 26 http://undocumented.rawol.com/win_pdbx.zip 27 28 Our goal here is not to be a complete parser for PDB files. Rather, we are 29 trying to extract only the important information we need in order to build a 30 Rekall profile. This means that we dont necessarily care about modifiers like 31 "const" "volatile" etc, but mostly care about struct, enums, bitfields etc. 32 33 If you are comparing the code here with the code in the pdbparse project, be 34 aware that due to the crazy way the construct library (which is used by 35 pdbparse) splits up bits, the ordering in the pdbparse code does not follow the 36 correct bit number (bits are defined in the order they appear in the bit stream, 37 which for a little endian number is non intuitive). e.g. 38 39 CV_property = BitStruct("prop", 40 Flag("fwdref"), 41 Flag("opcast"), 42 Flag("opassign"), 43 Flag("cnested"), 44 Flag("isnested"), 45 Flag("ovlops"), 46 Flag("ctor"), 47 Flag("packed"), 48 49 BitField("reserved", 7, swapped=True), 50 Flag("scoped"), 51 ) 52 53 Actually is this struct (i.e. above the first field is bit 7, then 6 etc until 54 bit 0 the bit 15 down to 8): 55 56 typedef struct _CV_prop_t 57 { 58 /*000.0*/ WORD packed : 1; 59 /*000.1*/ WORD ctor : 1; 60 /*000.2*/ WORD ovlops : 1; 61 /*000.3*/ WORD isnested : 1; 62 /*000.4*/ WORD cnested : 1; 63 /*000.5*/ WORD opassign : 1; 64 /*000.6*/ WORD opcast : 1; 65 /*000.7*/ WORD fwdref : 1; 66 /*001.0*/ WORD scoped : 1; 67 /*001.1*/ WORD reserved : 7; 68 /*002*/ } 69 CV_prop_t, *PCV_prop_t, **PPCV_prop_t; 70 71 Since we are lazy and do not want to hand code all the structure definitions, we 72 simply build a profile from the C implementation, and then use it here directly 73 using the "mspdb" profile (which is available in the profile repository). 74 75 http://undocumented.rawol.com/win_pdbx.zip: ./sbs_sdk/include/pdb_info.h 76 77 Other known implementations of PDB parsing: 78 https://chromium.googlesource.com/syzygy/+/master/pdb 79 80 The closest thing to official documentation can be found here: 81 http://pierrelib.pagesperso-orange.fr/exec_formats/MS_Symbol_Type_v1.0.pdf 82 83 """ 84 85 __author__ = "Michael Cohen <scudette@gmail.com>" 86 87 import glob 88 import re 89 import ntpath 90 import os 91 import platform 92 import subprocess 93 import sys 94 import urllib2 95 96 from rekall import addrspace 97 from rekall import plugin 98 from rekall import obj 99 from rekall import testlib 100 101 from rekall.plugins import core 102 from rekall.plugins.addrspaces import standard 103 from rekall.plugins.overlays import basic 104 from rekall.plugins.overlays.windows import pe_vtypes 105 106 from rekall_lib import utils

107 108 109 -class FetchPDB(core.DirectoryDumperMixin, plugin.TypedProfileCommand, 110 plugin.Command):

111 """Fetch the PDB file for an executable from the Microsoft PDB server.""" 112 113 __name = "fetch_pdb" 114 115 SYM_URLS = ['http://msdl.microsoft.com/download/symbols'] 116 USER_AGENT = "Microsoft-Symbol-Server/10.0.0.0" 117 118 __args = [ 119 dict(name="pdb_filename", required=True, positional=True, 120 help="The filename of the executable to get the PDB file for."), 121 122 dict(name="guid", positional=True, 123 help="The GUID of the pdb file. If provided, the pdb filename must" 124 " be provided in the --pdb_filename parameter.") 125 ] 126

127 - def render(self, renderer):

128 # The filename is an executable 129 if self.plugin_args.guid is None and self.plugin_args.pdb_filename: 130 self.pe = pe_vtypes.PE(filename=self.plugin_args.pdb_filename, 131 session=self.session) 132 data_directory = self.pe.nt_header.OptionalHeader.DataDirectory[ 133 "IMAGE_DIRECTORY_ENTRY_DEBUG"].VirtualAddress.dereference_as( 134 "_IMAGE_DEBUG_DIRECTORY") 135 136 # We only support the more recent RSDS format. 137 debug = data_directory.AddressOfRawData.dereference_as( 138 "CV_RSDS_HEADER") 139 140 if debug.Signature != "RSDS": 141 self.session.logging.error("PDB stream %s not supported.", 142 debug.Signature) 143 return 144 145 self.plugin_args.pdb_filename = ntpath.basename(str(debug.Filename)) 146 self.plugin_args.guid = self.pe.RSDS.GUID_AGE 147 148 elif self.plugin_args.pdb_filename is None: 149 raise RuntimeError( 150 "Filename must be provided when GUID is specified.") 151 152 # Write the file data to the renderer. 153 pdb_file_data = self.FetchPDBFile() 154 with renderer.open(filename=self.plugin_args.pdb_filename, 155 directory=self.plugin_args.dump_dir, 156 mode="wb") as fd: 157 fd.write(pdb_file_data)

158

159 - def FetchPDBFile(self):

160 # Ensure the pdb filename has the correct extension. 161 pdb_filename = self.plugin_args.pdb_filename 162 guid = self.plugin_args.guid 163 164 if not pdb_filename.endswith(".pdb"): 165 pdb_filename += ".pdb" 166 167 for url in self.SYM_URLS: 168 basename = ntpath.splitext(pdb_filename)[0] 169 url += "/%s/%s/%s.pd_" % (pdb_filename, guid, basename) 170 171 self.session.report_progress("Trying to fetch %s\n", url) 172 request = urllib2.Request(url, None, headers={ 173 'User-Agent': self.USER_AGENT}) 174 175 url_handler = urllib2.urlopen(request) 176 with utils.TempDirectory() as temp_dir: 177 compressed_output_file = os.path.join( 178 temp_dir, "%s.pd_" % basename) 179 180 output_file = os.path.join( 181 temp_dir, "%s.pdb" % basename) 182 183 # Download the compressed file to a temp file. 184 with open(compressed_output_file, "wb") as outfd: 185 while True: 186 data = url_handler.read(8192) 187 if not data: 188 break 189 190 outfd.write(data) 191 self.session.report_progress( 192 "%s: Downloaded %s bytes", basename, outfd.tell()) 193 194 # Now try to decompress it with system tools. This might fail. 195 try: 196 if platform.system() == "Windows": 197 # This should already be installed on windows systems. 198 subprocess.check_call( 199 ["expand", compressed_output_file, output_file], 200 cwd=temp_dir) 201 else: 202 # In Linux we just hope the cabextract program was 203 # installed. 204 subprocess.check_call( 205 ["cabextract", compressed_output_file], 206 cwd=temp_dir, 207 stdout=sys.stderr) 208 209 except (subprocess.CalledProcessError, OSError): 210 raise RuntimeError( 211 "Failed to decompress output file %s. " 212 "Ensure cabextract is installed.\n" % output_file) 213 214 # Sometimes the CAB file contains a PDB file with a different 215 # name or casing than we expect. We use glob to find any PDB 216 # files in the temp directory. 217 output_file = glob.glob("%s/*pdb" % temp_dir)[0] 218 219 # We read the entire file into memory here - it should not be 220 # larger than approximately 10mb. 221 with open(output_file, "rb") as fd: 222 return fd.read(50 * 1024 * 1024)

223

224 225 -class TestFetchPDB(testlib.DisabledTest):

226 """Disable this test.""" 227 PARAMETERS = dict(commandline="fetch_pdb")

228

229 230 -def Pages(length, page_size):

231 """Calculate the number of pages required to store a stream.""" 232 num_pages = length / page_size 233 if length % page_size: 234 num_pages += 1 235 236 return num_pages

237

238 239 -class StreamBasedAddressSpace(addrspace.CachingAddressSpaceMixIn, 240 addrspace.RunBasedAddressSpace):

241 """An address space which combines together the page lists. 242 243 Once we parse the page list, we can build this address space which takes 244 care of reassembling the stream for us automatically. 245 """ 246

247 - def __init__(self, pages=None, page_size=None, **kwargs):

248 super(StreamBasedAddressSpace, self).__init__(**kwargs) 249 self.pages = pages 250 self.PAGE_SIZE = page_size = int(page_size) 251 i = 0 252 for i, page in enumerate(pages): 253 self.add_run(i * page_size, page * page_size, page_size) 254 255 # Record the total size of the file. 256 self.size = (i + 1) * page_size

257 258 259 #################################################################### 260 # The following parses the TPI stream (stream 2). 261 #################################################################### 262 263 # Inside TPI stream we have a list of records. The type of the struct stored in 264 # the record is declared by use of the _LEAF_ENUM_e enum. The following lookup 265 # map is used to map from the _LEAF_ENUM_e to the BaseObject class to 266 # instantiate. 267 LEAF_ENUM_TO_TYPE = dict( 268 LF_STRUCTURE="_lfClass", 269 LF_ARRAY="_lfArray", 270 LF_PROCEDURE="_lfProc", 271 LF_POINTER="_lfPointer", 272 LF_ARGLIST="_lfArgList", 273 LF_MODIFIER="_lfModifier", 274 LF_FIELDLIST="_lfFieldList", 275 LF_ENUM="_lfEnum", 276 LF_UNION="_lfUnion", 277 LF_BITFIELD="_lfBitfield", 278 LF_NESTTYPE="_lfNestType", 279 LF_CHAR="byte", 280 LF_SHORT="short int", 281 LF_USHORT="unsigned short int", 282 LF_LONG="long", 283 LF_ULONG="unsigned long", 284 LF_64PWCHAR="Pointer", 285 ) 286 287 # The SubRecord field is a union which depends on the _LEAF_ENUM_e. The 288 # following maps these to the enum fields. There are other members in the union, 289 # but we dont care about them. 290 LEAF_ENUM_TO_SUBRECORD = dict( 291 LF_MEMBER="Member", 292 LF_ENUMERATE="Enumerate", 293 LF_NESTTYPE="NestType", 294 ) 295 296 # A map between the symbol type enum and the actual record type. 297 SYM_ENUM_TO_SYM = dict( 298 S_PUB32="_PUBSYM32", 299 ) 300 301 302 mspdb_overlays = { 303 # The file header. We only support newer versions. 304 "_PDB_HEADER_700": [None, { 305 "abSignature": [None, ["Signature", dict( 306 value="Microsoft C/C++ MSF 7.00\r\n\x1ADS\0\0\0" 307 )]], 308 309 # Total number of pages in the root stream. 310 "root_pages": lambda x: Pages(x.dRootBytes, x.dPageBytes), 311 312 # This is an array of page indexes which make up the page list of 313 # the root stream. 314 "adIndexPages": [None, ["Array", dict( 315 target="unsigned int", 316 # The root page list is stored in the index stream. Each 317 # page index is 4 bytes. 318 count=lambda x: Pages(4 * x.root_pages, x.dPageBytes), 319 )]], 320 }], 321 322 # The header of the root stream (This applies once we reconstruct the root 323 # stream). It defines the page indexes of all the streams in this file. 324 "_PDB_ROOT_700": [lambda x: (x.dStreams + 1) * 4, { 325 "adStreamBytes": [None, ["Array", dict( 326 count=lambda x: x.dStreams, 327 target="unsigned int", 328 )]], 329 }], 330 331 # A modifier adds some flags to its modified_type. 332 "_lfModifier": [None, { 333 "modified_type": [2, ["unsigned int"]], 334 "modifier": [6, ["Flags", dict( 335 bitmap=dict( 336 unaligned=2, 337 volatile=1, 338 const=0 339 ), 340 target="unsigned short int", 341 )]], 342 }], 343 344 # The size of the SubRecord itself is the size of the value. (ie. depends on 345 # the _LEAF_ENUM_e). We must calculate the exact size because SubRecords (of 346 # variable size) are stored back to back in the lfFieldList. 347 "_lfSubRecord": [lambda x: x.value.obj_size, { 348 "leaf": [None, ["Enumeration", dict( 349 enum_name="_LEAF_ENUM_e", 350 target="unsigned short int")]], 351 352 # This psuedo value automatically selects the correct member of the 353 # union based on the leaf value. 354 "value": lambda x: x.m( 355 LEAF_ENUM_TO_SUBRECORD.get(str(x.leaf), "Unknown")), 356 }], 357 358 "_lfEnum": [None, { 359 # The name of the enum element. 360 "Name": [None, ["String"]], 361 }], 362 363 "_lfNestType": [None, { 364 # The name of the enum element. 365 "Name": [None, ["String"]], 366 }], 367 368 # A lfFieldList holds a back to back variable length array of SubRecords. 369 "_lfFieldList": [None, { 370 "SubRecord": [None, ["ListArray", dict( 371 target="_lfSubRecord", 372 373 # Total length is determined by the size of the 374 # container. 375 maximum_size=lambda x: x.obj_parent.length - 2, 376 )]], 377 }], 378 379 # Arg list for a function. 380 "_lfArgList": [None, { 381 # This is a list of _TYPE_ENUM_e, or an index reference into the TPI 382 # stream. 383 "arg": [None, ["Array", dict( 384 target="Enumeration", 385 target_args=dict( 386 enum_name="_TYPE_ENUM_e", 387 target="unsigned short int", 388 ), 389 count=lambda x: x.count 390 )]], 391 }], 392 393 # A helper type to select the correct implementation. 394 "TypeContainer": [lambda x: x.length + 2, { 395 "length": [0, ["unsigned short int"]], 396 397 # Depending on the value of this enum, this field must be cast to 398 # the correct struct. 399 "type_enum": [2, ["Enumeration", dict( 400 enum_name="_LEAF_ENUM_e", 401 target="unsigned short int" 402 )]], 403 404 # Depending on the enumeration above, the type_enum field must be 405 # cast into one of these structs. 406 "type": lambda x: x.type_enum.cast( 407 LEAF_ENUM_TO_TYPE.get(str(x.type_enum), "unsigned int")) 408 }], 409 410 # This is the TPI stream header. It is followed by a list of TypeContainers 411 # for all the types in this stream. 412 "_HDR": [None, { 413 "types": [lambda x: x.obj_size, 414 ["ListArray", dict( 415 target="TypeContainer", 416 count=lambda x: x.tiMac - x.tiMin, 417 maximum_size=lambda x: x.cbGprec, 418 )]], 419 }], 420 421 "_GUID": [16, { 422 "Data1": [0, ["unsigned long", {}]], 423 "Data2": [4, ["unsigned short", {}]], 424 "Data3": [6, ["unsigned short", {}]], 425 "Data4": [8, ["String", dict(length=8, term=None)]], 426 "AsString": lambda x: ("%08x%04x%04x%s" % ( 427 x.Data1, x.Data2, x.Data3, str(x.Data4).encode('hex'))).upper(), 428 }], 429 430 "Info": [None, { 431 "Version": [0, ["unsigned long int"]], 432 "TimeDateStamp": [4, ["UnixTimeStamp"]], 433 "Age": [8, ["unsigned long int"]], 434 "GUID": [12, ["_GUID"]], 435 }], 436 437 # The record length does not include the tag. 438 "_ALIGNSYM": [lambda x: x.reclen + 2, { 439 "rectyp": [None, ["Enumeration", dict( 440 enum_name="_SYM_ENUM_e", 441 target="unsigned short int")]], 442 443 # The real record type depends on the _SYM_ENUM_e. 444 "value": lambda x: x.cast( 445 SYM_ENUM_TO_SYM.get(str(x.rectyp), "")) 446 447 }], 448 449 "_PUBSYM32": [None, { 450 "name": [None, ["String"]], 451 }], 452 453 "DBI": [None, { 454 "DBIHdr": [0, ["_NewDBIHdr"]], 455 "ExHeaders": [64, ["ListArray", dict( 456 maximum_size=lambda x: x.DBIHdr.cbGpModi, 457 target="DBIExHeaders")]], 458 }], 459 460 "DBIExHeaders": [None, { 461 "modName": [64, ["String"]], 462 "objName": [lambda x: x.modName.obj_offset + x.modName.obj_size, 463 ["String"]], 464 }], 465 466 "IMAGE_SECTION_HEADER": [None, { 467 "Name": [None, ["String"]], 468 }], 469 470 }

471 472 473 -class lfClass(obj.Struct):

474 """Represents a class or struct.""" 475 476 _obj_end = 0 477

478 - def __init__(self, **kwargs):

479 super(lfClass, self).__init__(**kwargs) 480 self._DecodeVariableData()

481 482 @utils.safe_property

483 - def obj_size(self):

484 """Our size is the end of the object plus any padding.""" 485 return pe_vtypes.RoundUpToWordAlignment( 486 self.obj_end - self.obj_offset)

487

488 - def _DecodeVariableData(self):

489 """This object is followed by a variable sized data structure. 490 491 This data structure contains the "value_" and "name" attributes. If the 492 first short int less than 0x8000, it represents the value. Otherwise, it 493 represents an _LEAF_ENUM_e enum which determines the size of the value 494 to read next (e.g. LF_ULONG = 4 bytes, LF_SHORT = 2 bytes) and those 495 represent the value. 496 497 The name field then follows as a String. 498 499 Following the name field, there is padding to 4 byte alignment. 500 501 We must calculate the total size of this struct in this function, after 502 parsing all the components. 503 """ 504 505 obj_end = self.obj_offset + super(lfClass, self).obj_size 506 field_type = self.obj_profile.Object( 507 "unsigned short int", offset=obj_end, vm=self.obj_vm) 508 509 obj_end += field_type.obj_size 510 511 if field_type < 0x8000: 512 self.value_ = field_type 513 self.name = self.obj_profile.String( 514 offset=obj_end, vm=self.obj_vm) 515 516 obj_end += self.name.obj_size 517 518 else: 519 # The field type is an LF_ENUM which determines which struct this 520 # is. 521 type_enum_name = self.obj_profile.get_enum( 522 "_LEAF_ENUM_e").get(str(field_type)) 523 524 type_name = LEAF_ENUM_TO_TYPE.get(type_enum_name) 525 526 self.value_ = self.obj_profile.Object( 527 type_name=type_name, offset=obj_end, vm=self.obj_vm) 528 529 # The name follows the value. 530 self.name = self.obj_profile.String( 531 offset=self.value_.obj_offset + self.value_.obj_size, 532 vm=self.obj_vm) 533 534 obj_end += self.value_.obj_size + self.name.obj_size 535 536 # Record the end of the object 537 self._obj_end = obj_end 538 539 # Sometimes the field is named '__unnamed' so we disambiguate it here. 540 if self.name == "__unnamed": 541 self.name = "__unnamed_%s" % self.field

542 543 @utils.safe_property

544 - def obj_end(self):

545 return self._obj_end

546

547 - def Definition(self, _):

548 """Returns the vtype data structure defining this element. 549 550 Returns: 551 a tuple, the first element is the target name, the second is the dict 552 of the target_args. 553 """ 554 # The target is just the name of this class. 555 return [str(self.name), {}]

556

557 558 -class lfEnumerate(lfClass):

559 """A SubRecord describing a single enumeration definition."""

560

561 562 -class lfBitfield(obj.Struct):

563 """A range of bits.""" 564

565 - def Definition(self, tpi):

566 """BitField overlays on top of another type.""" 567 result = tpi.DefinitionByIndex(self.type) 568 if not result: 569 return [str(self.name), {}] 570 571 target, target_args = result 572 573 return "BitField", dict( 574 start_bit=int(self.position), 575 end_bit=int(self.position) + int(self.length), 576 target_args=target_args, target=target)

577

578 579 -class lfNestType(obj.Struct):

580 UNNAMED_RE = re.compile("<unnamed-type-([^->]+)>") 581

582 - def __init__(self, **kwargs):

583 super(lfNestType, self).__init__(**kwargs) 584 self.value_ = 0 585 self.name = str(self.Name) 586 m = self.UNNAMED_RE.match(self.name) 587 if m: 588 self.name = m.group(1)

589 590 @utils.safe_property

591 - def obj_size(self):

592 """Our size is the end of the object plus any padding.""" 593 return pe_vtypes.RoundUpToWordAlignment( 594 self.Name.obj_offset + self.Name.obj_size)

595

596 - def Definition(self, tpi):

597 return tpi.DefinitionByIndex(self.index)

598

599 600 -class lfUnion(lfClass):

601 """A Union is basically the same as a struct, except members may overlap."""

602

603 604 -class lfModifier(lfClass):

605 - def Definition(self, tpi):

606 """We dont really care about modifiers, just pass the utype through.""" 607 return tpi.DefinitionByIndex(self.modified_type)

608

609 610 -class lfEnum(obj.Struct):

611 """Represents an enumeration definition.""" 612 613 @utils.safe_property

614 - def Name(self):

615 enum_name = str(self.m("Name")) 616 if enum_name == "<unnamed-tag>": 617 enum_name = "ENUM_%X" % self.obj_offset 618 619 return enum_name

620

621 - def AddEnumeration(self, tpi):

622 enumeration = {} 623 reverse_enumeration = {} 624 for x in tpi.Resolve(self.field).SubRecord: 625 enumeration[int(x.value.value_)] = str(x.value.name) 626 reverse_enumeration[str(x.value.name)] = int(x.value.value_) 627 628 tpi.AddEnumeration(self.Name, enumeration) 629 tpi.AddReverseEnumeration(self.Name, reverse_enumeration)

630

631 - def Definition(self, tpi):

632 """Enumerations are defined in two parts. 633 634 First an enumeration dict is added to the profile constants, and then 635 the target "Enumeration" can use it by name (having the enum_name 636 field). This allows many fields which use the same enumeration to share 637 the definition dict. 638 """ 639 result = tpi.DefinitionByIndex(self.utype) 640 if not result: 641 return [str(self.name), {}] 642 643 target, target_args = result 644 645 return "Enumeration", dict( 646 target=target, target_args=target_args, enum_name=self.Name)

647

648 649 -class lfPointer(lfClass):

650 """A Pointer object.""" 651

652 - def Definition(self, tpi):

653 target_index = int(self.u1.utype) 654 result = tpi.DefinitionByIndex(target_index) 655 if not result: 656 return [str(self.name), {}] 657 658 target, target_args = result 659 660 return ["Pointer", dict( 661 target=target, 662 target_args=target_args 663 )]

664

665 666 -class lfProc(lfClass):

667 """A Function object.""" 668

669 - def Definition(self, tpi):

670 """We record the function arg prototype as well.""" 671 args = [] 672 for idx in tpi.Resolve(self.arglist).arg: 673 definition = tpi.DefinitionByIndex(idx) 674 if definition: 675 args.append(definition) 676 677 return "Function", dict(args=args)

678

679 680 -class lfArray(lfClass):

681 """An array of the same object.""" 682

683 - def Definition(self, tpi):

684 result = tpi.DefinitionByIndex(self.elemtype) 685 if not result: 686 return [str(self.name), {}] 687 688 target, target_args = result 689 if target == "<unnamed-tag>": 690 target = "<unnamed-%s>" % self.elemtype 691 692 # Note that we only specify the total size of the array. We have no idea 693 # how many items fit at this stage because we dont know the exact size 694 # of the elements. The post processing step will convert the size into a 695 # count. 696 definition = ["Array", dict( 697 target=target, target_args=target_args, 698 size=int(self.value_), 699 )] 700 701 tpi.RegisterFixUp(definition) 702 703 return definition

704

705 706 -class lfMember(lfClass):

707 """A member in a struct (or class).""" 708

709 - def Definition(self, tpi):

710 """Returns a tuple of target, target_args for the member.""" 711 return tpi.DefinitionByIndex(self.m("index"))

712

713 714 -class _PDB_HEADER_700(obj.Struct):

715 """The file header of a PDB file.""" 716

717 - def get_page_list(self):

718 """The full page list is a double indexed array.""" 719 result = [] 720 for idx in self.adIndexPages: 721 for page_number in self.obj_profile.Array( 722 offset=idx * self.dPageBytes, vm=self.obj_vm, 723 target="unsigned int", count=self.dPageBytes / 4): 724 result.append(int(page_number)) 725 if len(result) >= self.root_pages: 726 return result 727 728 return result

729

730 731 -class _PDB_ROOT_700(obj.Struct):

732 """The root stream contains information about all other streams.""" 733

734 - def _GetStreams(self):

735 """Read all the streams in the file.""" 736 offset_of_index_list = self.obj_offset + self.obj_size 737 page_size = self.obj_context["page_size"] 738 739 for stream_size in self.adStreamBytes: 740 if stream_size == 0xffffffff: 741 stream_size = 0 742 743 page_list = self.obj_profile.Array( 744 offset=offset_of_index_list, vm=self.obj_vm, 745 count=Pages(stream_size, page_size), 746 target="unsigned int") 747 748 offset_of_index_list += page_list.obj_size 749 750 yield StreamBasedAddressSpace( 751 base=self.obj_vm.base, page_size=page_size, 752 session=self.obj_profile.session, pages=page_list)

753

754 - def GetStream(self, number):

755 """Only return the required streams, discarding the rest.""" 756 for i, address_space in enumerate(self._GetStreams()): 757 if i == number: 758 return address_space

759

760 761 -class DBIExHeaders(obj.Struct):

762 @utils.safe_property

763 - def obj_size(self):

764 return (pe_vtypes.RoundUpToWordAlignment( 765 self.objName.obj_offset + self.objName.obj_size) - 766 self.obj_offset)

767

768 769 -class DBI(obj.Struct):

770 - def DBGHeader(self):

771 DBIHdr = self.DBIHdr 772 # Skip over all these sections which we dont care about until we get to 773 # the debug header at the end. 774 header_offset = (self.obj_offset + 775 DBIHdr.obj_size + 776 DBIHdr.cbGpModi + 777 DBIHdr.cbSC + 778 DBIHdr.cbSecMap + 779 DBIHdr.cbFileInfo + 780 DBIHdr.cbTSMap + 781 DBIHdr.cbECInfo) 782 783 return self.obj_profile.DbgHdr(header_offset, vm=self.obj_vm)

784

785 786 -class PDBProfile(basic.Profile32Bits, basic.BasicClasses):

787 """A profile to parse Microsoft PDB files. 788 789 Note that this is built on top of the mspdb profile which exists in the 790 profile repository, as generated from the code here: 791 792 http://undocumented.rawol.com/win_pdbx.zip 793 794 Do not directly instantiate this. Just do: 795 796 profile = session.LoadProfile("mspdb") 797 """ 798

799 - def __init__(self, **kwargs):

800 super(PDBProfile, self).__init__(**kwargs) 801 self.add_overlay(mspdb_overlays) 802 self.add_classes({ 803 "_PDB_HEADER_700": _PDB_HEADER_700, 804 "_PDB_ROOT_700": _PDB_ROOT_700, 805 "_lfClass": lfClass, "_lfArray": lfArray, 806 "_lfMember": lfMember, "_lfPointer": lfPointer, 807 "_lfProc": lfProc, "_lfEnum": lfEnum, 808 "_lfModifier": lfModifier, "_lfUnion": lfUnion, 809 "_lfBitfield": lfBitfield, "_lfEnumerate": lfEnumerate, 810 "_lfNestType": lfNestType, "DBIExHeaders": DBIExHeaders, 811 "DBI": DBI 812 })

813

814 815 -class PDBParser(object):

816 """Parses a Microsoft PDB file.""" 817 818 # A mapping between _TYPE_ENUM_e basic pdb types and vtype 819 # descriptions. Keys: The _TYPE_ENUM_e enum, values a tuple of target, 820 # target_args for instantiating the Rekall object describing this type. 821 TYPE_ENUM_TO_VTYPE = { 822 "T_32PINT4": ["Pointer", dict(target="long")], 823 "T_32PLONG": ["Pointer", dict(target="long")], 824 "T_32PQUAD": ["Pointer", dict(target="long long")], 825 "T_32PRCHAR": ["Pointer", dict(target="unsigned char")], 826 "T_32PREAL32": ["Pointer", dict(target="Void")], 827 "T_32PREAL64": ["Pointer", dict(target="Void")], 828 "T_32PSHORT": ["Pointer", dict(target="short")], 829 "T_32PUCHAR": ["Pointer", dict(target="unsigned char")], 830 "T_32PUINT4": ["Pointer", dict(target="unsigned int")], 831 "T_32PULONG": ["Pointer", dict(target="unsigned long")], 832 "T_32PUQUAD": ["Pointer", dict(target="unsigned long long")], 833 "T_32PUSHORT": ["Pointer", dict(target="unsigned short")], 834 "T_32PVOID": ["Pointer", dict(target="Void")], 835 "T_32PWCHAR": ["Pointer", dict(target="UnicodeString")], 836 "T_64PLONG": ["Pointer", dict(target="long")], 837 "T_64PQUAD": ["Pointer", dict(target="long long")], 838 "T_64PRCHAR": ["Pointer", dict(target="unsigned char")], 839 "T_64PUCHAR": ["Pointer", dict(target="unsigned char")], 840 "T_64PWCHAR": ["Pointer", dict(target="String")], 841 "T_64PULONG": ["Pointer", dict(target="unsigned long")], 842 "T_64PUQUAD": ["Pointer", dict(target="unsigned long long")], 843 "T_64PUSHORT": ["Pointer", dict(target="unsigned short")], 844 "T_64PVOID": ["Pointer", dict(target="Void")], 845 "T_BOOL08": ["unsigned char", {}], 846 "T_CHAR": ["char", {}], 847 "T_INT4": ["long", {}], 848 "T_INT8": ["long long", {}], 849 "T_LONG": ["long", {}], 850 "T_QUAD": ["long long", {}], 851 "T_RCHAR": ["unsigned char", {}], 852 "T_REAL32": ["float", {}], 853 "T_REAL64": ["double", {}], 854 "T_REAL80": ["long double", {}], 855 "T_SHORT": ["short", {}], 856 "T_UCHAR": ["unsigned char", {}], 857 "T_UINT4": ["unsigned long", {}], 858 "T_ULONG": ["unsigned long", {}], 859 "T_UQUAD": ["unsigned long long", {}], 860 "T_USHORT": ["unsigned short", {}], 861 "T_VOID": ["Void", {}], 862 "T_WCHAR": ["UnicodeString", {}], 863 } 864

865 - def __init__(self, filename, session):

866 self.session = session 867 self.fixups = [] 868 self.enums = {} 869 self.rev_enums = {} 870 self.constants = {} 871 self.functions = {} 872 self.profile = self.session.LoadProfile("mspdb") 873 self._TYPE_ENUM_e = self.profile.get_enum("_TYPE_ENUM_e") 874 self._TYPE_ENUM_e = dict( 875 (int(x), y) for x, y in self._TYPE_ENUM_e.items()) 876 877 self.address_space = standard.FileAddressSpace( 878 filename=filename, session=self.session) 879 self.header = self.profile._PDB_HEADER_700( 880 vm=self.address_space, offset=0) 881 882 if not self.header.abSignature.is_valid(): 883 raise IOError("PDB file not supported.") 884 885 root_pages = self.header.get_page_list() 886 887 root_stream = StreamBasedAddressSpace( 888 base=self.address_space, page_size=self.header.dPageBytes, 889 pages=root_pages, session=self.profile.session) 890 891 self.root_stream_header = self.profile._PDB_ROOT_700( 892 offset=0, 893 vm=root_stream, 894 context=dict( 895 page_size=self.header.dPageBytes 896 ) 897 ) 898 899 self.ParsePDB() 900 self.ParseDBI() 901 self.ParseTPI()

902

903 - def ParsePDB(self):

904 """Parse the PDB info stream.""" 905 # Get the info stream. 906 info = self.profile.Info(vm=self.root_stream_header.GetStream(1)) 907 self.metadata = dict( 908 Version=int(info.Version), 909 Timestamp=str(info.TimeDateStamp), 910 GUID_AGE="%s%X" % (info.GUID.AsString, info.Age), 911 )

912

913 - def ParseDBI(self):

914 """Parse the DBI stream. 915 916 This fires off subparsers for contained streams. 917 """ 918 dbi = self.profile.DBI(vm=self.root_stream_header.GetStream(3)) 919 DBGHeader = dbi.DBGHeader() 920 921 # Sometimes this stream is set to 0xFFFF so we need to use the other 922 # stream. 923 section_stream = DBGHeader.snSectionHdrOrig 924 if section_stream == 0xFFFF: 925 section_stream = DBGHeader.snSectionHdr 926 927 self.ParseSectionHeaders(section_stream) 928 self.ParseOMAP(DBGHeader.snOmapFromSrc) 929 self.ParseGlobalSymbols(dbi.DBIHdr.u1.snSymRecs)

930

931 - def ParseSectionHeaders(self, stream_id):

932 """Gather the PE sections of this executable.""" 933 self.sections = [] 934 stream = self.root_stream_header.GetStream(stream_id) 935 if stream is None: 936 return 937 938 for section in self.profile.ListArray( 939 maximum_size=stream.size, 940 target="IMAGE_SECTION_HEADER", vm=stream): 941 self.sections.append(section)

942

943 - def ParseOMAP(self, omap_stream_id):

944 """Build an OMAP lookup table. 945 946 The OMAP is a translation between the original symbol's offset to the 947 final offset. When the linker builds the executable, it reorders the 948 original object files in the executable section. This translation table 949 tells us where the symbols end up. 950 """ 951 self.omap = utils.SortedCollection(key=lambda x: x[0]) 952 omap_stream = self.root_stream_header.GetStream(omap_stream_id) 953 if omap_stream is None: 954 return 955 956 omap_address_space = addrspace.BufferAddressSpace( 957 session=self.session, 958 data=omap_stream.read(0, omap_stream.size)) 959 960 omap_array = self.profile.Array( 961 vm=omap_address_space, 962 count=omap_stream.size / self.profile.get_obj_size("_OMAP_DATA"), 963 max_count=omap_stream.size, 964 target="_OMAP_DATA") 965 966 for i, omap in enumerate(omap_array): 967 src = int(omap.rva) 968 dest = int(omap.rvaTo) 969 970 self.omap.insert((src, dest)) 971 self.session.report_progress( 972 " Extracting OMAP Information %s%%", 973 lambda: i * 100 / omap_array.count)

974

975 - def ParseGlobalSymbols(self, stream_id):

976 """Parse the symbol records stream.""" 977 stream = self.root_stream_header.GetStream(stream_id) 978 for container in self.profile.ListArray(target="_ALIGNSYM", vm=stream, 979 maximum_size=stream.size): 980 981 if container.reclen == 0: 982 break 983 984 symbol = container.value 985 986 # Skip unknown records for now. 987 if not symbol: 988 self.session.logging.warning( 989 "Unimplemented symbol %s" % container.rectyp) 990 continue 991 992 try: 993 name = str(symbol.name) 994 except AttributeError: 995 # We do not support symbols without name (e.g. annotations). 996 continue 997 998 translated_offset = offset = int(symbol.off) 999 1000 # Some files do not have OMAP information or section information. In 1001 # that case we just export the symbol offsets untranslated. 1002 if self.sections: 1003 # Convert the RVA to a virtual address by referencing into the 1004 # correct section. 1005 translated_offset = virtual_address = ( 1006 offset + self.sections[symbol.seg - 1].VirtualAddress) 1007 1008 # If there is no OMAP specified we just translate the symbol 1009 # into the right section. 1010 if self.omap: 1011 # Translate the offset according to the OMAP. 1012 try: 1013 from_offset, dest_offset = self.omap.find_le( 1014 virtual_address) 1015 1016 translated_offset = ( 1017 virtual_address - from_offset + dest_offset) 1018 1019 except ValueError: 1020 pass 1021 1022 if symbol.pubsymflags.u1.fFunction: 1023 self.functions[name] = translated_offset 1024 else: 1025 self.constants[name] = translated_offset 1026 1027 self.session.report_progress(" Parsing Symbols %s", name)

1028

1029 - def ParseTPI(self):

1030 """The TPI stream contains all the struct definitions.""" 1031 self.lookup = {} 1032 tpi = self.profile._HDR(vm=self.root_stream_header.GetStream(2)) 1033 1034 # Build a lookup table for fast resolving of TPI indexes. 1035 for i, t in enumerate(tpi.types): 1036 self.session.report_progress(" Parsing Structs %(spinner)s") 1037 1038 self.lookup[tpi.tiMin + i] = t 1039 if not t: 1040 break 1041 1042 # Extract ALL enumerations, even if they are not referenced by any 1043 # structs. 1044 for value in self.lookup.values(): 1045 if value.type_enum == "LF_ENUM": 1046 value.type.AddEnumeration(self)

1047

1048 - def AddEnumeration(self, name, enumeration):

1049 self.enums[name] = enumeration

1050

1051 - def AddReverseEnumeration(self, name, enumeration):

1052 self.rev_enums[name] = enumeration

1053

1054 - def RegisterFixUp(self, definition):

1055 self.fixups.append(definition)

1056

1057 - def Structs(self):

1058 for key, value in self.lookup.iteritems(): 1059 # Ignore the forward references. 1060 if ((value.type_enum == "LF_STRUCTURE" or 1061 value.type_enum == "LF_UNION") and 1062 not value.type.property.fwdref): 1063 1064 struct_name = value.type.name 1065 if struct_name == "<unnamed-tag>": 1066 struct_name = "<unnamed-%s>" % key 1067 1068 struct_size = int(value.type.value_) 1069 1070 field_list = self.lookup[int(value.type.field)].type 1071 definition = [struct_size, {}] 1072 1073 for field in field_list.SubRecord: 1074 field_definition = field.value.Definition(self) 1075 if field_definition: 1076 if field_definition[0] == "<unnamed-tag>": 1077 field_definition[0] = ( 1078 "<unnamed-%s>" % field.value.index) 1079 1080 definition[1][str(field.value.name)] = [ 1081 int(field.value.value_), field_definition] 1082 1083 yield [struct_name, definition]

1084

1085 - def DefinitionByIndex(self, idx):

1086 """Return the vtype definition of the item identified by idx.""" 1087 result = None 1088 if idx < 0x700: 1089 type_name = self._TYPE_ENUM_e.get(idx) 1090 1091 result = self.TYPE_ENUM_TO_VTYPE.get(type_name) 1092 if result is None and type_name != "T_NOTYPE": 1093 self.session.logging.error("Unrecognized type %s\n", type_name) 1094 1095 else: 1096 try: 1097 result = self.lookup[idx].type.Definition(self) 1098 except AttributeError: 1099 pass 1100 1101 return result

1102

1103 - def Resolve(self, idx):

1104 try: 1105 return self.lookup[idx].type 1106 except KeyError: 1107 return obj.NoneObject("Index not known")

1108

1109 - def __enter__(self):

1110 return self

1111

1112 - def __exit__(self, exc_type, exc_value, trace):

1113 self.address_space.close()

1114

1115 1116 -class ParsePDB(core.DirectoryDumperMixin, plugin.TypedProfileCommand, 1117 plugin.Command):

1118 """Parse the PDB streams.""" 1119 1120 __name = "parse_pdb" 1121 1122 __args = [ 1123 dict(name="pdb_filename", required=True, positional=True, 1124 help="The filename of the PDB file."), 1125 1126 dict(name="profile_class", 1127 help="The name of the profile implementation. " 1128 "Default name is derived from the pdb filename."), 1129 1130 dict(name="output_filename", 1131 help="The name of the file to store this profile. "), 1132 1133 dict(name="windows_version", 1134 help="The windows version (major.minor.revision) " 1135 "corresponding with this PDB. For example, Windows 7 " 1136 "should be given as 6.1"), 1137 1138 dict(name="concise", type="Boolean", 1139 help="Specify this to emit less detailed information."), 1140 ] 1141

1142 - def __init__(self, *args, **kwargs):

1143 self.metadata = kwargs.pop("metadata", {}) 1144 super(ParsePDB, self).__init__(*args, **kwargs) 1145 1146 profile_class = self.metadata.get( 1147 "ProfileClass", self.plugin_args.profile_class) 1148 1149 # By default select the class with the same name as the pdb file. 1150 if profile_class is None: 1151 profile_class = os.path.splitext( 1152 os.path.basename(self.plugin_args.pdb_filename))[0].capitalize() 1153 1154 if profile_class not in obj.Profile.classes: 1155 profile_class = "BasicPEProfile" 1156 1157 self.plugin_args.profile_class = profile_class 1158 1159 versions = [] 1160 if self.plugin_args.windows_version is not None: 1161 versions = self.plugin_args.windows_version.split(".", 2) 1162 1163 for i, metadata in enumerate(["major", "minor", "rev"]): 1164 try: 1165 self.metadata[metadata] = versions[i] 1166 except IndexError: 1167 break 1168 1169 self.tpi = PDBParser(self.plugin_args.pdb_filename, self.session)

1170 1171 NATIVE_TYPE_SIZE = { 1172 "unsigned char": 1, 1173 "unsigned int": 4, 1174 "unsigned long": 4, 1175 "unsigned long long": 8, 1176 "unsigned short": 2, 1177 "char": 1, 1178 "int": 4, 1179 "long": 4, 1180 "long long": 8, 1181 "short": 2, 1182 } 1183

1184 - def PostProcessVTypes(self, vtypes):

1185 """Post process the vtypes to optimize some access members.""" 1186 arch = self.metadata.get("arch", "AMD64") 1187 1188 for defintion in self.tpi.fixups: 1189 target, target_args = defintion 1190 if target == "Array": 1191 # The PDB symbols specify a UnicodeString as an array of wide 1192 # char but we need to fix it to be a UnicodeString with a 1193 # specified length. 1194 if target_args.get("target") == "UnicodeString": 1195 defintion[0] = "UnicodeString" 1196 defintion[1] = dict( 1197 length=target_args.get("size") / 2 1198 ) 1199 elif target_args.has_key("size"): 1200 # Work out the array target size. 1201 array_target = target_args.get("target") 1202 target_size = self.NATIVE_TYPE_SIZE.get(array_target) 1203 if target_size is None: 1204 if array_target == "Pointer": 1205 target_size = 8 if arch == "AMD64" else 4 1206 else: 1207 target_definition = vtypes.get(array_target) 1208 if target_definition is None: 1209 # We have no idea what size it is. Leave the 1210 # size parameter for the object system to work 1211 # out during runtime. 1212 continue 1213 1214 target_size = target_definition[0] 1215 1216 # Replace the size with a count. 1217 target_args["count"] = target_args.pop( 1218 "size") / target_size 1219 1220 return vtypes

1221

1222 - def parse_pdb(self):

1223 with self.tpi: 1224 vtypes = {} 1225 1226 for i, (struct_name, definition) in enumerate(self.tpi.Structs()): 1227 self.session.report_progress( 1228 " Exporting %s: %s", i, struct_name) 1229 1230 struct_name = str(struct_name) 1231 existing_definition = vtypes.get(struct_name) 1232 if existing_definition: 1233 # Merge the old definition into the new definition. 1234 definition[1].update(existing_definition[1]) 1235 1236 vtypes[struct_name] = definition 1237 1238 self.metadata.update(dict( 1239 ProfileClass=self.plugin_args.profile_class, 1240 Type="Profile", 1241 PDBFile=os.path.basename(self.plugin_args.pdb_filename), 1242 )) 1243 1244 self.metadata.update(self.tpi.metadata) 1245 1246 # Demangle all constants. 1247 demangler = pe_vtypes.Demangler(self.metadata) 1248 constants = {} 1249 for name, value in self.tpi.constants.iteritems(): 1250 constants[demangler.DemangleName(name)] = value 1251 1252 functions = {} 1253 for name, value in self.tpi.functions.iteritems(): 1254 functions[demangler.DemangleName(name)] = value 1255 1256 vtypes = self.PostProcessVTypes(vtypes) 1257 1258 result = { 1259 "$METADATA": self.metadata, 1260 "$STRUCTS": vtypes, 1261 "$ENUMS": self.tpi.enums, 1262 } 1263 1264 if not self.plugin_args.concise: 1265 result["$REVENUMS"] = self.tpi.rev_enums 1266 result["$CONSTANTS"] = constants 1267 result["$FUNCTIONS"] = functions 1268 1269 return result

1270

1271 - def render(self, renderer):

1272 result = self.parse_pdb() 1273 1274 if self.plugin_args.output_filename: 1275 with renderer.open(filename=self.plugin_args.output_filename, 1276 directory=self.plugin_args.dump_dir, 1277 mode="wb") as fd: 1278 fd.write(utils.PPrint(result)) 1279 else: 1280 renderer.write(utils.PPrint(result))

1281

Source Code for Module rekall.plugins.tools.mspdb