1   
   2   
   3   
   4   
   5   
   6   
   7   
   8   
   9   
  10   
  11   
  12   
  13   
  14   
  15   
  16   
  17   
  18   
  19   
  20   
  21  """These plugins are for manipulating Microsoft PDB file. 
  22   
  23  References: 
  24  https://code.google.com/p/pdbparse/ 
  25  http://moyix.blogspot.de/2007/10/types-stream.html 
  26  http://undocumented.rawol.com/win_pdbx.zip 
  27   
  28  Our goal here is not to be a complete parser for PDB files. Rather, we are 
  29  trying to extract only the important information we need in order to build a 
  30  Rekall profile. This means that we dont necessarily care about modifiers like 
  31  "const" "volatile" etc, but mostly care about struct, enums, bitfields etc. 
  32   
  33  If you are comparing the code here with the code in the pdbparse project, be 
  34  aware that due to the crazy way the construct library (which is used by 
  35  pdbparse) splits up bits, the ordering in the pdbparse code does not follow the 
  36  correct bit number (bits are defined in the order they appear in the bit stream, 
  37  which for a little endian number is non intuitive). e.g. 
  38   
  39  CV_property = BitStruct("prop", 
  40      Flag("fwdref"), 
  41      Flag("opcast"), 
  42      Flag("opassign"), 
  43      Flag("cnested"), 
  44      Flag("isnested"), 
  45      Flag("ovlops"), 
  46      Flag("ctor"), 
  47      Flag("packed"), 
  48   
  49      BitField("reserved", 7, swapped=True), 
  50      Flag("scoped"), 
  51  ) 
  52   
  53  Actually is this struct (i.e. above the first field is bit 7, then 6 etc until 
  54  bit 0 the bit 15 down to 8): 
  55   
  56  typedef struct _CV_prop_t 
  57          { 
  58  /*000.0*/ WORD packed   : 1; 
  59  /*000.1*/ WORD ctor     : 1; 
  60  /*000.2*/ WORD ovlops   : 1; 
  61  /*000.3*/ WORD isnested : 1; 
  62  /*000.4*/ WORD cnested  : 1; 
  63  /*000.5*/ WORD opassign : 1; 
  64  /*000.6*/ WORD opcast   : 1; 
  65  /*000.7*/ WORD fwdref   : 1; 
  66  /*001.0*/ WORD scoped   : 1; 
  67  /*001.1*/ WORD reserved : 7; 
  68  /*002*/ } 
  69          CV_prop_t, *PCV_prop_t, **PPCV_prop_t; 
  70   
  71  Since we are lazy and do not want to hand code all the structure definitions, we 
  72  simply build a profile from the C implementation, and then use it here directly 
  73  using the "mspdb" profile (which is available in the profile repository). 
  74   
  75  http://undocumented.rawol.com/win_pdbx.zip: ./sbs_sdk/include/pdb_info.h 
  76   
  77  Other known implementations of PDB parsing: 
  78  https://chromium.googlesource.com/syzygy/+/master/pdb 
  79   
  80  The closest thing to official documentation can be found here: 
  81  http://pierrelib.pagesperso-orange.fr/exec_formats/MS_Symbol_Type_v1.0.pdf 
  82   
  83  """ 
  84   
  85  __author__ = "Michael Cohen <scudette@gmail.com>" 
  86   
  87  import glob 
  88  import re 
  89  import ntpath 
  90  import os 
  91  import platform 
  92  import subprocess 
  93  import sys 
  94  import urllib2 
  95   
  96  from rekall import addrspace 
  97  from rekall import plugin 
  98  from rekall import obj 
  99  from rekall import testlib 
 100   
 101  from rekall.plugins import core 
 102  from rekall.plugins.addrspaces import standard 
 103  from rekall.plugins.overlays import basic 
 104  from rekall.plugins.overlays.windows import pe_vtypes 
 105   
 106  from rekall_lib import utils 
 107   
 108   
 109 -class FetchPDB(core.DirectoryDumperMixin, plugin.TypedProfileCommand, 
 110                 plugin.Command): 
  111      """Fetch the PDB file for an executable from the Microsoft PDB server.""" 
 112   
 113      __name = "fetch_pdb" 
 114   
 115      SYM_URLS = ['http://msdl.microsoft.com/download/symbols'] 
 116      USER_AGENT = "Microsoft-Symbol-Server/10.0.0.0" 
 117   
 118      __args = [ 
 119          dict(name="pdb_filename", required=True, positional=True, 
 120               help="The filename of the executable to get the PDB file for."), 
 121   
 122          dict(name="guid", positional=True, 
 123               help="The GUID of the pdb file. If provided, the pdb filename must" 
 124               " be provided in the --pdb_filename parameter.") 
 125      ] 
 126   
 158   
 160           
 161          pdb_filename = self.plugin_args.pdb_filename 
 162          guid = self.plugin_args.guid 
 163   
 164          if not pdb_filename.endswith(".pdb"): 
 165              pdb_filename += ".pdb" 
 166   
 167          for url in self.SYM_URLS: 
 168              basename = ntpath.splitext(pdb_filename)[0] 
 169              url += "/%s/%s/%s.pd_" % (pdb_filename, guid, basename) 
 170   
 171              self.session.report_progress("Trying to fetch %s\n", url) 
 172              request = urllib2.Request(url, None, headers={ 
 173                  'User-Agent': self.USER_AGENT}) 
 174   
 175              url_handler = urllib2.urlopen(request) 
 176              with utils.TempDirectory() as temp_dir: 
 177                  compressed_output_file = os.path.join( 
 178                      temp_dir, "%s.pd_" % basename) 
 179   
 180                  output_file = os.path.join( 
 181                      temp_dir, "%s.pdb" % basename) 
 182   
 183                   
 184                  with open(compressed_output_file, "wb") as outfd: 
 185                      while True: 
 186                          data = url_handler.read(8192) 
 187                          if not data: 
 188                              break 
 189   
 190                          outfd.write(data) 
 191                          self.session.report_progress( 
 192                              "%s: Downloaded %s bytes", basename, outfd.tell()) 
 193   
 194                   
 195                  try: 
 196                      if platform.system() == "Windows": 
 197                           
 198                          subprocess.check_call( 
 199                              ["expand", compressed_output_file, output_file], 
 200                              cwd=temp_dir) 
 201                      else: 
 202                           
 203                           
 204                          subprocess.check_call( 
 205                              ["cabextract", compressed_output_file], 
 206                              cwd=temp_dir, 
 207                              stdout=sys.stderr) 
 208   
 209                  except (subprocess.CalledProcessError, OSError): 
 210                      raise RuntimeError( 
 211                          "Failed to decompress output file %s. " 
 212                          "Ensure cabextract is installed.\n" % output_file) 
 213   
 214                   
 215                   
 216                   
 217                  output_file = glob.glob("%s/*pdb" % temp_dir)[0] 
 218   
 219                   
 220                   
 221                  with open(output_file, "rb") as fd: 
 222                      return fd.read(50 * 1024 * 1024) 
   223   
 228   
 229   
 230 -def Pages(length, page_size): 
  231      """Calculate the number of pages required to store a stream.""" 
 232      num_pages = length / page_size 
 233      if length % page_size: 
 234          num_pages += 1 
 235   
 236      return num_pages 
  237   
 241      """An address space which combines together the page lists. 
 242   
 243      Once we parse the page list, we can build this address space which takes 
 244      care of reassembling the stream for us automatically. 
 245      """ 
 246   
 247 -    def __init__(self, pages=None, page_size=None, **kwargs): 
   257   
 258   
 259   
 260   
 261   
 262   
 263   
 264   
 265   
 266   
 267  LEAF_ENUM_TO_TYPE = dict( 
 268      LF_STRUCTURE="_lfClass", 
 269      LF_ARRAY="_lfArray", 
 270      LF_PROCEDURE="_lfProc", 
 271      LF_POINTER="_lfPointer", 
 272      LF_ARGLIST="_lfArgList", 
 273      LF_MODIFIER="_lfModifier", 
 274      LF_FIELDLIST="_lfFieldList", 
 275      LF_ENUM="_lfEnum", 
 276      LF_UNION="_lfUnion", 
 277      LF_BITFIELD="_lfBitfield", 
 278      LF_NESTTYPE="_lfNestType", 
 279      LF_CHAR="byte", 
 280      LF_SHORT="short int", 
 281      LF_USHORT="unsigned short int", 
 282      LF_LONG="long", 
 283      LF_ULONG="unsigned long", 
 284      LF_64PWCHAR="Pointer", 
 285  ) 
 286   
 287   
 288   
 289   
 290  LEAF_ENUM_TO_SUBRECORD = dict( 
 291      LF_MEMBER="Member", 
 292      LF_ENUMERATE="Enumerate", 
 293      LF_NESTTYPE="NestType", 
 294  ) 
 295   
 296   
 297  SYM_ENUM_TO_SYM = dict( 
 298      S_PUB32="_PUBSYM32", 
 299  ) 
 300   
 301   
 302  mspdb_overlays = { 
 303       
 304      "_PDB_HEADER_700": [None, { 
 305          "abSignature": [None, ["Signature", dict( 
 306              value="Microsoft C/C++ MSF 7.00\r\n\x1ADS\0\0\0" 
 307          )]], 
 308   
 309           
 310          "root_pages": lambda x: Pages(x.dRootBytes, x.dPageBytes), 
 311   
 312           
 313           
 314          "adIndexPages": [None, ["Array", dict( 
 315              target="unsigned int", 
 316               
 317               
 318              count=lambda x: Pages(4 * x.root_pages, x.dPageBytes), 
 319          )]], 
 320      }], 
 321   
 322       
 323       
 324      "_PDB_ROOT_700": [lambda x: (x.dStreams + 1) * 4, { 
 325          "adStreamBytes": [None, ["Array", dict( 
 326              count=lambda x: x.dStreams, 
 327              target="unsigned int", 
 328          )]], 
 329      }], 
 330   
 331       
 332      "_lfModifier": [None, { 
 333          "modified_type": [2, ["unsigned int"]], 
 334          "modifier": [6, ["Flags", dict( 
 335              bitmap=dict( 
 336                  unaligned=2, 
 337                  volatile=1, 
 338                  const=0 
 339              ), 
 340              target="unsigned short int", 
 341          )]], 
 342      }], 
 343   
 344       
 345       
 346       
 347      "_lfSubRecord": [lambda x: x.value.obj_size, { 
 348          "leaf": [None, ["Enumeration", dict( 
 349              enum_name="_LEAF_ENUM_e", 
 350              target="unsigned short int")]], 
 351   
 352           
 353           
 354          "value": lambda x: x.m( 
 355              LEAF_ENUM_TO_SUBRECORD.get(str(x.leaf), "Unknown")), 
 356      }], 
 357   
 358      "_lfEnum": [None, { 
 359           
 360          "Name": [None, ["String"]], 
 361      }], 
 362   
 363      "_lfNestType": [None, { 
 364           
 365          "Name": [None, ["String"]], 
 366      }], 
 367   
 368       
 369      "_lfFieldList": [None, { 
 370          "SubRecord": [None, ["ListArray", dict( 
 371              target="_lfSubRecord", 
 372   
 373               
 374               
 375              maximum_size=lambda x: x.obj_parent.length - 2, 
 376          )]], 
 377      }], 
 378   
 379       
 380      "_lfArgList": [None, { 
 381           
 382           
 383          "arg": [None, ["Array", dict( 
 384              target="Enumeration", 
 385              target_args=dict( 
 386                  enum_name="_TYPE_ENUM_e", 
 387                  target="unsigned short int", 
 388              ), 
 389              count=lambda x: x.count 
 390          )]], 
 391      }], 
 392   
 393       
 394      "TypeContainer": [lambda x: x.length + 2, { 
 395          "length": [0, ["unsigned short int"]], 
 396   
 397           
 398           
 399          "type_enum": [2, ["Enumeration", dict( 
 400              enum_name="_LEAF_ENUM_e", 
 401              target="unsigned short int" 
 402          )]], 
 403   
 404           
 405           
 406          "type": lambda x: x.type_enum.cast( 
 407              LEAF_ENUM_TO_TYPE.get(str(x.type_enum), "unsigned int")) 
 408      }], 
 409   
 410       
 411       
 412      "_HDR": [None, { 
 413          "types": [lambda x: x.obj_size, 
 414                    ["ListArray", dict( 
 415                        target="TypeContainer", 
 416                        count=lambda x: x.tiMac - x.tiMin, 
 417                        maximum_size=lambda x: x.cbGprec, 
 418                    )]], 
 419      }], 
 420   
 421      "_GUID": [16, { 
 422          "Data1": [0, ["unsigned long", {}]], 
 423          "Data2": [4, ["unsigned short", {}]], 
 424          "Data3": [6, ["unsigned short", {}]], 
 425          "Data4": [8, ["String", dict(length=8, term=None)]], 
 426          "AsString": lambda x: ("%08x%04x%04x%s" % ( 
 427              x.Data1, x.Data2, x.Data3, str(x.Data4).encode('hex'))).upper(), 
 428      }], 
 429   
 430      "Info": [None, { 
 431          "Version": [0, ["unsigned long int"]], 
 432          "TimeDateStamp": [4, ["UnixTimeStamp"]], 
 433          "Age": [8, ["unsigned long int"]], 
 434          "GUID": [12, ["_GUID"]], 
 435      }], 
 436   
 437       
 438      "_ALIGNSYM": [lambda x: x.reclen + 2, { 
 439          "rectyp": [None, ["Enumeration", dict( 
 440              enum_name="_SYM_ENUM_e", 
 441              target="unsigned short int")]], 
 442   
 443           
 444          "value": lambda x: x.cast( 
 445              SYM_ENUM_TO_SYM.get(str(x.rectyp), "")) 
 446   
 447      }], 
 448   
 449      "_PUBSYM32": [None, { 
 450          "name": [None, ["String"]], 
 451      }], 
 452   
 453      "DBI": [None, { 
 454          "DBIHdr": [0, ["_NewDBIHdr"]], 
 455          "ExHeaders": [64, ["ListArray", dict( 
 456              maximum_size=lambda x: x.DBIHdr.cbGpModi, 
 457              target="DBIExHeaders")]], 
 458      }], 
 459   
 460      "DBIExHeaders": [None, { 
 461          "modName": [64, ["String"]], 
 462          "objName": [lambda x: x.modName.obj_offset + x.modName.obj_size, 
 463                      ["String"]], 
 464      }], 
 465   
 466      "IMAGE_SECTION_HEADER": [None, { 
 467          "Name": [None, ["String"]], 
 468      }], 
 469   
 470  } 
 474      """Represents a class or struct.""" 
 475   
 476      _obj_end = 0 
 477   
 481   
 482      @utils.safe_property 
 487   
 489          """This object is followed by a variable sized data structure. 
 490   
 491          This data structure contains the "value_" and "name" attributes. If the 
 492          first short int less than 0x8000, it represents the value. Otherwise, it 
 493          represents an _LEAF_ENUM_e enum which determines the size of the value 
 494          to read next (e.g. LF_ULONG = 4 bytes, LF_SHORT = 2 bytes) and those 
 495          represent the value. 
 496   
 497          The name field then follows as a String. 
 498   
 499          Following the name field, there is padding to 4 byte alignment. 
 500   
 501          We must calculate the total size of this struct in this function, after 
 502          parsing all the components. 
 503          """ 
 504   
 505          obj_end = self.obj_offset + super(lfClass, self).obj_size 
 506          field_type = self.obj_profile.Object( 
 507              "unsigned short int", offset=obj_end, vm=self.obj_vm) 
 508   
 509          obj_end += field_type.obj_size 
 510   
 511          if field_type < 0x8000: 
 512              self.value_ = field_type 
 513              self.name = self.obj_profile.String( 
 514                  offset=obj_end, vm=self.obj_vm) 
 515   
 516              obj_end += self.name.obj_size 
 517   
 518          else: 
 519               
 520               
 521              type_enum_name = self.obj_profile.get_enum( 
 522                  "_LEAF_ENUM_e").get(str(field_type)) 
 523   
 524              type_name = LEAF_ENUM_TO_TYPE.get(type_enum_name) 
 525   
 526              self.value_ = self.obj_profile.Object( 
 527                  type_name=type_name, offset=obj_end, vm=self.obj_vm) 
 528   
 529               
 530              self.name = self.obj_profile.String( 
 531                  offset=self.value_.obj_offset + self.value_.obj_size, 
 532                  vm=self.obj_vm) 
 533   
 534              obj_end += self.value_.obj_size + self.name.obj_size 
 535   
 536           
 537          self._obj_end = obj_end 
 538   
 539           
 540          if self.name == "__unnamed": 
 541              self.name = "__unnamed_%s" % self.field 
  542   
 543      @utils.safe_property 
 546   
 548          """Returns the vtype data structure defining this element. 
 549   
 550          Returns: 
 551            a tuple, the first element is the target name, the second is the dict 
 552            of the target_args. 
 553          """ 
 554           
 555          return [str(self.name), {}] 
   556   
 559      """A SubRecord describing a single enumeration definition.""" 
  560   
 563      """A range of bits.""" 
 564   
 566          """BitField overlays on top of another type.""" 
 567          result = tpi.DefinitionByIndex(self.type) 
 568          if not result: 
 569              return [str(self.name), {}] 
 570   
 571          target, target_args = result 
 572   
 573          return "BitField", dict( 
 574              start_bit=int(self.position), 
 575              end_bit=int(self.position) + int(self.length), 
 576              target_args=target_args, target=target) 
   577   
 580      UNNAMED_RE = re.compile("<unnamed-type-([^->]+)>") 
 581   
 589   
 590      @utils.safe_property 
 595   
  598   
 601      """A Union is basically the same as a struct, except members may overlap.""" 
  602   
 606          """We dont really care about modifiers, just pass the utype through.""" 
 607          return tpi.DefinitionByIndex(self.modified_type) 
  611      """Represents an enumeration definition.""" 
 612   
 613      @utils.safe_property 
 615          enum_name = str(self.m("Name")) 
 616          if enum_name == "<unnamed-tag>": 
 617              enum_name = "ENUM_%X" % self.obj_offset 
 618   
 619          return enum_name 
  620   
 630   
 632          """Enumerations are defined in two parts. 
 633   
 634          First an enumeration dict is added to the profile constants, and then 
 635          the target "Enumeration" can use it by name (having the enum_name 
 636          field). This allows many fields which use the same enumeration to share 
 637          the definition dict. 
 638          """ 
 639          result = tpi.DefinitionByIndex(self.utype) 
 640          if not result: 
 641              return [str(self.name), {}] 
 642   
 643          target, target_args = result 
 644   
 645          return "Enumeration", dict( 
 646              target=target, target_args=target_args, enum_name=self.Name) 
   647   
 650      """A Pointer object.""" 
 651   
 653          target_index = int(self.u1.utype) 
 654          result = tpi.DefinitionByIndex(target_index) 
 655          if not result: 
 656              return [str(self.name), {}] 
 657   
 658          target, target_args = result 
 659   
 660          return ["Pointer", dict( 
 661              target=target, 
 662              target_args=target_args 
 663          )] 
   664   
 667      """A Function object.""" 
 668   
 670          """We record the function arg prototype as well.""" 
 671          args = [] 
 672          for idx in tpi.Resolve(self.arglist).arg: 
 673              definition = tpi.DefinitionByIndex(idx) 
 674              if definition: 
 675                  args.append(definition) 
 676   
 677          return "Function", dict(args=args) 
   678   
 681      """An array of the same object.""" 
 682   
 684          result = tpi.DefinitionByIndex(self.elemtype) 
 685          if not result: 
 686              return [str(self.name), {}] 
 687   
 688          target, target_args = result 
 689          if target == "<unnamed-tag>": 
 690              target = "<unnamed-%s>" % self.elemtype 
 691   
 692           
 693           
 694           
 695           
 696          definition = ["Array", dict( 
 697              target=target, target_args=target_args, 
 698              size=int(self.value_), 
 699          )] 
 700   
 701          tpi.RegisterFixUp(definition) 
 702   
 703          return definition 
   704   
 707      """A member in a struct (or class).""" 
 708   
 710          """Returns a tuple of target, target_args for the member.""" 
 711          return tpi.DefinitionByIndex(self.m("index")) 
   712   
 715      """The file header of a PDB file.""" 
 716   
 718          """The full page list is a double indexed array.""" 
 719          result = [] 
 720          for idx in self.adIndexPages: 
 721              for page_number in self.obj_profile.Array( 
 722                      offset=idx * self.dPageBytes, vm=self.obj_vm, 
 723                      target="unsigned int", count=self.dPageBytes / 4): 
 724                  result.append(int(page_number)) 
 725                  if len(result) >= self.root_pages: 
 726                      return result 
 727   
 728          return result 
   729   
 732      """The root stream contains information about all other streams.""" 
 733   
 735          """Read all the streams in the file.""" 
 736          offset_of_index_list = self.obj_offset + self.obj_size 
 737          page_size = self.obj_context["page_size"] 
 738   
 739          for stream_size in self.adStreamBytes: 
 740              if stream_size == 0xffffffff: 
 741                  stream_size = 0 
 742   
 743              page_list = self.obj_profile.Array( 
 744                  offset=offset_of_index_list, vm=self.obj_vm, 
 745                  count=Pages(stream_size, page_size), 
 746                  target="unsigned int") 
 747   
 748              offset_of_index_list += page_list.obj_size 
 749   
 750              yield StreamBasedAddressSpace( 
 751                  base=self.obj_vm.base, page_size=page_size, 
 752                  session=self.obj_profile.session, pages=page_list) 
  753   
  759   
 767   
 768   
 769 -class DBI(obj.Struct): 
  771          DBIHdr = self.DBIHdr 
 772           
 773           
 774          header_offset = (self.obj_offset + 
 775                           DBIHdr.obj_size + 
 776                           DBIHdr.cbGpModi + 
 777                           DBIHdr.cbSC + 
 778                           DBIHdr.cbSecMap + 
 779                           DBIHdr.cbFileInfo + 
 780                           DBIHdr.cbTSMap + 
 781                           DBIHdr.cbECInfo) 
 782   
 783          return self.obj_profile.DbgHdr(header_offset, vm=self.obj_vm) 
  785   
 786 -class PDBProfile(basic.Profile32Bits, basic.BasicClasses): 
  787      """A profile to parse Microsoft PDB files. 
 788   
 789      Note that this is built on top of the mspdb profile which exists in the 
 790      profile repository, as generated from the code here: 
 791   
 792      http://undocumented.rawol.com/win_pdbx.zip 
 793   
 794      Do not directly instantiate this. Just do: 
 795   
 796      profile = session.LoadProfile("mspdb") 
 797      """ 
 798   
 800          super(PDBProfile, self).__init__(**kwargs) 
 801          self.add_overlay(mspdb_overlays) 
 802          self.add_classes({ 
 803              "_PDB_HEADER_700": _PDB_HEADER_700, 
 804              "_PDB_ROOT_700": _PDB_ROOT_700, 
 805              "_lfClass": lfClass, "_lfArray": lfArray, 
 806              "_lfMember": lfMember, "_lfPointer": lfPointer, 
 807              "_lfProc": lfProc, "_lfEnum": lfEnum, 
 808              "_lfModifier": lfModifier, "_lfUnion": lfUnion, 
 809              "_lfBitfield": lfBitfield, "_lfEnumerate": lfEnumerate, 
 810              "_lfNestType": lfNestType, "DBIExHeaders": DBIExHeaders, 
 811              "DBI": DBI 
 812          }) 
   813   
 816      """Parses a Microsoft PDB file.""" 
 817   
 818       
 819       
 820       
 821      TYPE_ENUM_TO_VTYPE = { 
 822          "T_32PINT4": ["Pointer", dict(target="long")], 
 823          "T_32PLONG": ["Pointer", dict(target="long")], 
 824          "T_32PQUAD": ["Pointer", dict(target="long long")], 
 825          "T_32PRCHAR": ["Pointer", dict(target="unsigned char")], 
 826          "T_32PREAL32": ["Pointer", dict(target="Void")], 
 827          "T_32PREAL64": ["Pointer", dict(target="Void")], 
 828          "T_32PSHORT": ["Pointer", dict(target="short")], 
 829          "T_32PUCHAR": ["Pointer", dict(target="unsigned char")], 
 830          "T_32PUINT4": ["Pointer", dict(target="unsigned int")], 
 831          "T_32PULONG": ["Pointer", dict(target="unsigned long")], 
 832          "T_32PUQUAD": ["Pointer", dict(target="unsigned long long")], 
 833          "T_32PUSHORT": ["Pointer", dict(target="unsigned short")], 
 834          "T_32PVOID": ["Pointer", dict(target="Void")], 
 835          "T_32PWCHAR": ["Pointer", dict(target="UnicodeString")], 
 836          "T_64PLONG": ["Pointer", dict(target="long")], 
 837          "T_64PQUAD": ["Pointer", dict(target="long long")], 
 838          "T_64PRCHAR": ["Pointer", dict(target="unsigned char")], 
 839          "T_64PUCHAR": ["Pointer", dict(target="unsigned char")], 
 840          "T_64PWCHAR": ["Pointer", dict(target="String")], 
 841          "T_64PULONG": ["Pointer", dict(target="unsigned long")], 
 842          "T_64PUQUAD": ["Pointer", dict(target="unsigned long long")], 
 843          "T_64PUSHORT": ["Pointer", dict(target="unsigned short")], 
 844          "T_64PVOID": ["Pointer", dict(target="Void")], 
 845          "T_BOOL08": ["unsigned char", {}], 
 846          "T_CHAR": ["char", {}], 
 847          "T_INT4": ["long", {}], 
 848          "T_INT8": ["long long", {}], 
 849          "T_LONG": ["long", {}], 
 850          "T_QUAD": ["long long", {}], 
 851          "T_RCHAR": ["unsigned char", {}], 
 852          "T_REAL32": ["float", {}], 
 853          "T_REAL64": ["double", {}], 
 854          "T_REAL80": ["long double", {}], 
 855          "T_SHORT": ["short", {}], 
 856          "T_UCHAR": ["unsigned char", {}], 
 857          "T_UINT4": ["unsigned long", {}], 
 858          "T_ULONG": ["unsigned long", {}], 
 859          "T_UQUAD": ["unsigned long long", {}], 
 860          "T_USHORT": ["unsigned short", {}], 
 861          "T_VOID": ["Void", {}], 
 862          "T_WCHAR": ["UnicodeString", {}], 
 863      } 
 864   
 902   
 904          """Parse the PDB info stream.""" 
 905           
 906          info = self.profile.Info(vm=self.root_stream_header.GetStream(1)) 
 907          self.metadata = dict( 
 908              Version=int(info.Version), 
 909              Timestamp=str(info.TimeDateStamp), 
 910              GUID_AGE="%s%X" % (info.GUID.AsString, info.Age), 
 911          ) 
  912   
 930   
 932          """Gather the PE sections of this executable.""" 
 933          self.sections = [] 
 934          stream = self.root_stream_header.GetStream(stream_id) 
 935          if stream is None: 
 936              return 
 937   
 938          for section in self.profile.ListArray( 
 939                  maximum_size=stream.size, 
 940                  target="IMAGE_SECTION_HEADER", vm=stream): 
 941              self.sections.append(section) 
  942   
 944          """Build an OMAP lookup table. 
 945   
 946          The OMAP is a translation between the original symbol's offset to the 
 947          final offset. When the linker builds the executable, it reorders the 
 948          original object files in the executable section. This translation table 
 949          tells us where the symbols end up. 
 950          """ 
 951          self.omap = utils.SortedCollection(key=lambda x: x[0]) 
 952          omap_stream = self.root_stream_header.GetStream(omap_stream_id) 
 953          if omap_stream is None: 
 954              return 
 955   
 956          omap_address_space = addrspace.BufferAddressSpace( 
 957              session=self.session, 
 958              data=omap_stream.read(0, omap_stream.size)) 
 959   
 960          omap_array = self.profile.Array( 
 961              vm=omap_address_space, 
 962              count=omap_stream.size / self.profile.get_obj_size("_OMAP_DATA"), 
 963              max_count=omap_stream.size, 
 964              target="_OMAP_DATA") 
 965   
 966          for i, omap in enumerate(omap_array): 
 967              src = int(omap.rva) 
 968              dest = int(omap.rvaTo) 
 969   
 970              self.omap.insert((src, dest)) 
 971              self.session.report_progress( 
 972                  " Extracting OMAP Information %s%%", 
 973                  lambda: i * 100 / omap_array.count) 
  974   
 976          """Parse the symbol records stream.""" 
 977          stream = self.root_stream_header.GetStream(stream_id) 
 978          for container in self.profile.ListArray(target="_ALIGNSYM", vm=stream, 
 979                                                  maximum_size=stream.size): 
 980   
 981              if container.reclen == 0: 
 982                  break 
 983   
 984              symbol = container.value 
 985   
 986               
 987              if not symbol: 
 988                  self.session.logging.warning( 
 989                      "Unimplemented symbol %s" % container.rectyp) 
 990                  continue 
 991   
 992              try: 
 993                  name = str(symbol.name) 
 994              except AttributeError: 
 995                   
 996                  continue 
 997   
 998              translated_offset = offset = int(symbol.off) 
 999   
1000               
1001               
1002              if self.sections: 
1003                   
1004                   
1005                  translated_offset = virtual_address = ( 
1006                      offset + self.sections[symbol.seg - 1].VirtualAddress) 
1007   
1008                   
1009                   
1010                  if self.omap: 
1011                       
1012                      try: 
1013                          from_offset, dest_offset = self.omap.find_le( 
1014                              virtual_address) 
1015   
1016                          translated_offset = ( 
1017                              virtual_address - from_offset + dest_offset) 
1018   
1019                      except ValueError: 
1020                          pass 
1021   
1022              if symbol.pubsymflags.u1.fFunction: 
1023                  self.functions[name] = translated_offset 
1024              else: 
1025                  self.constants[name] = translated_offset 
1026   
1027              self.session.report_progress(" Parsing Symbols %s", name) 
 1028   
1030          """The TPI stream contains all the struct definitions.""" 
1031          self.lookup = {} 
1032          tpi = self.profile._HDR(vm=self.root_stream_header.GetStream(2)) 
1033   
1034           
1035          for i, t in enumerate(tpi.types): 
1036              self.session.report_progress(" Parsing Structs %(spinner)s") 
1037   
1038              self.lookup[tpi.tiMin + i] = t 
1039              if not t: 
1040                  break 
1041   
1042           
1043           
1044          for value in self.lookup.values(): 
1045              if value.type_enum == "LF_ENUM": 
1046                  value.type.AddEnumeration(self) 
 1047   
1049          self.enums[name] = enumeration 
 1050   
1052          self.rev_enums[name] = enumeration 
 1053   
1055          self.fixups.append(definition) 
 1056   
1058          for key, value in self.lookup.iteritems(): 
1059               
1060              if ((value.type_enum == "LF_STRUCTURE" or 
1061                   value.type_enum == "LF_UNION") and 
1062                      not value.type.property.fwdref): 
1063   
1064                  struct_name = value.type.name 
1065                  if struct_name == "<unnamed-tag>": 
1066                      struct_name = "<unnamed-%s>" % key 
1067   
1068                  struct_size = int(value.type.value_) 
1069   
1070                  field_list = self.lookup[int(value.type.field)].type 
1071                  definition = [struct_size, {}] 
1072   
1073                  for field in field_list.SubRecord: 
1074                      field_definition = field.value.Definition(self) 
1075                      if field_definition: 
1076                          if field_definition[0] == "<unnamed-tag>": 
1077                              field_definition[0] = ( 
1078                                  "<unnamed-%s>" % field.value.index) 
1079   
1080                          definition[1][str(field.value.name)] = [ 
1081                              int(field.value.value_), field_definition] 
1082   
1083                  yield [struct_name, definition] 
 1084   
1086          """Return the vtype definition of the item identified by idx.""" 
1087          result = None 
1088          if idx < 0x700: 
1089              type_name = self._TYPE_ENUM_e.get(idx) 
1090   
1091              result = self.TYPE_ENUM_TO_VTYPE.get(type_name) 
1092              if result is None and type_name != "T_NOTYPE": 
1093                  self.session.logging.error("Unrecognized type %s\n", type_name) 
1094   
1095          else: 
1096              try: 
1097                  result = self.lookup[idx].type.Definition(self) 
1098              except AttributeError: 
1099                  pass 
1100   
1101          return result 
 1102   
1104          try: 
1105              return self.lookup[idx].type 
1106          except KeyError: 
1107              return obj.NoneObject("Index not known") 
 1108   
1111   
1112 -    def __exit__(self, exc_type, exc_value, trace): 
  1114   
1115   
1116 -class ParsePDB(core.DirectoryDumperMixin, plugin.TypedProfileCommand, 
1117                 plugin.Command): 
 1118      """Parse the PDB streams.""" 
1119   
1120      __name = "parse_pdb" 
1121   
1122      __args = [ 
1123          dict(name="pdb_filename", required=True, positional=True, 
1124               help="The filename of the PDB file."), 
1125   
1126          dict(name="profile_class", 
1127               help="The name of the profile implementation. " 
1128               "Default name is derived from the pdb filename."), 
1129   
1130          dict(name="output_filename", 
1131               help="The name of the file to store this profile. "), 
1132   
1133          dict(name="windows_version", 
1134               help="The windows version (major.minor.revision) " 
1135               "corresponding with this PDB. For example, Windows 7 " 
1136               "should be given as 6.1"), 
1137   
1138          dict(name="concise", type="Boolean", 
1139               help="Specify this to emit less detailed information."), 
1140      ] 
1141   
1170   
1171      NATIVE_TYPE_SIZE = { 
1172          "unsigned char": 1, 
1173          "unsigned int": 4, 
1174          "unsigned long": 4, 
1175          "unsigned long long": 8, 
1176          "unsigned short": 2, 
1177          "char": 1, 
1178          "int": 4, 
1179          "long": 4, 
1180          "long long": 8, 
1181          "short": 2, 
1182      } 
1183   
1184 -    def PostProcessVTypes(self, vtypes): 
 1185          """Post process the vtypes to optimize some access members.""" 
1186          arch = self.metadata.get("arch", "AMD64") 
1187   
1188          for defintion in self.tpi.fixups: 
1189              target, target_args = defintion 
1190              if target == "Array": 
1191                   
1192                   
1193                   
1194                  if target_args.get("target") == "UnicodeString": 
1195                      defintion[0] = "UnicodeString" 
1196                      defintion[1] = dict( 
1197                          length=target_args.get("size") / 2 
1198                      ) 
1199                  elif target_args.has_key("size"): 
1200                       
1201                      array_target = target_args.get("target") 
1202                      target_size = self.NATIVE_TYPE_SIZE.get(array_target) 
1203                      if target_size is None: 
1204                          if array_target == "Pointer": 
1205                              target_size = 8 if arch == "AMD64" else 4 
1206                          else: 
1207                              target_definition = vtypes.get(array_target) 
1208                              if target_definition is None: 
1209                                   
1210                                   
1211                                   
1212                                  continue 
1213   
1214                              target_size = target_definition[0] 
1215   
1216                       
1217                      target_args["count"] = target_args.pop( 
1218                          "size") / target_size 
1219   
1220          return vtypes 
 1221   
1223          with self.tpi: 
1224              vtypes = {} 
1225   
1226              for i, (struct_name, definition) in enumerate(self.tpi.Structs()): 
1227                  self.session.report_progress( 
1228                      " Exporting %s: %s", i, struct_name) 
1229   
1230                  struct_name = str(struct_name) 
1231                  existing_definition = vtypes.get(struct_name) 
1232                  if existing_definition: 
1233                       
1234                      definition[1].update(existing_definition[1]) 
1235   
1236                  vtypes[struct_name] = definition 
1237   
1238              self.metadata.update(dict( 
1239                  ProfileClass=self.plugin_args.profile_class, 
1240                  Type="Profile", 
1241                  PDBFile=os.path.basename(self.plugin_args.pdb_filename), 
1242              )) 
1243   
1244              self.metadata.update(self.tpi.metadata) 
1245   
1246               
1247              demangler = pe_vtypes.Demangler(self.metadata) 
1248              constants = {} 
1249              for name, value in self.tpi.constants.iteritems(): 
1250                  constants[demangler.DemangleName(name)] = value 
1251   
1252              functions = {} 
1253              for name, value in self.tpi.functions.iteritems(): 
1254                  functions[demangler.DemangleName(name)] = value 
1255   
1256              vtypes = self.PostProcessVTypes(vtypes) 
1257   
1258              result = { 
1259                  "$METADATA": self.metadata, 
1260                  "$STRUCTS": vtypes, 
1261                  "$ENUMS": self.tpi.enums, 
1262              } 
1263   
1264              if not self.plugin_args.concise: 
1265                  result["$REVENUMS"] = self.tpi.rev_enums 
1266                  result["$CONSTANTS"] = constants 
1267                  result["$FUNCTIONS"] = functions 
1268   
1269              return result 
 1270   
 1281