1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """These plugins are for manipulating Microsoft PDB file.
22
23 References:
24 https://code.google.com/p/pdbparse/
25 http://moyix.blogspot.de/2007/10/types-stream.html
26 http://undocumented.rawol.com/win_pdbx.zip
27
28 Our goal here is not to be a complete parser for PDB files. Rather, we are
29 trying to extract only the important information we need in order to build a
30 Rekall profile. This means that we dont necessarily care about modifiers like
31 "const" "volatile" etc, but mostly care about struct, enums, bitfields etc.
32
33 If you are comparing the code here with the code in the pdbparse project, be
34 aware that due to the crazy way the construct library (which is used by
35 pdbparse) splits up bits, the ordering in the pdbparse code does not follow the
36 correct bit number (bits are defined in the order they appear in the bit stream,
37 which for a little endian number is non intuitive). e.g.
38
39 CV_property = BitStruct("prop",
40 Flag("fwdref"),
41 Flag("opcast"),
42 Flag("opassign"),
43 Flag("cnested"),
44 Flag("isnested"),
45 Flag("ovlops"),
46 Flag("ctor"),
47 Flag("packed"),
48
49 BitField("reserved", 7, swapped=True),
50 Flag("scoped"),
51 )
52
53 Actually is this struct (i.e. above the first field is bit 7, then 6 etc until
54 bit 0 the bit 15 down to 8):
55
56 typedef struct _CV_prop_t
57 {
58 /*000.0*/ WORD packed : 1;
59 /*000.1*/ WORD ctor : 1;
60 /*000.2*/ WORD ovlops : 1;
61 /*000.3*/ WORD isnested : 1;
62 /*000.4*/ WORD cnested : 1;
63 /*000.5*/ WORD opassign : 1;
64 /*000.6*/ WORD opcast : 1;
65 /*000.7*/ WORD fwdref : 1;
66 /*001.0*/ WORD scoped : 1;
67 /*001.1*/ WORD reserved : 7;
68 /*002*/ }
69 CV_prop_t, *PCV_prop_t, **PPCV_prop_t;
70
71 Since we are lazy and do not want to hand code all the structure definitions, we
72 simply build a profile from the C implementation, and then use it here directly
73 using the "mspdb" profile (which is available in the profile repository).
74
75 http://undocumented.rawol.com/win_pdbx.zip: ./sbs_sdk/include/pdb_info.h
76
77 Other known implementations of PDB parsing:
78 https://chromium.googlesource.com/syzygy/+/master/pdb
79
80 The closest thing to official documentation can be found here:
81 http://pierrelib.pagesperso-orange.fr/exec_formats/MS_Symbol_Type_v1.0.pdf
82
83 """
84
85 __author__ = "Michael Cohen <scudette@gmail.com>"
86
87 import glob
88 import re
89 import ntpath
90 import os
91 import platform
92 import subprocess
93 import sys
94 import urllib2
95
96 from rekall import addrspace
97 from rekall import plugin
98 from rekall import obj
99 from rekall import testlib
100
101 from rekall.plugins import core
102 from rekall.plugins.addrspaces import standard
103 from rekall.plugins.overlays import basic
104 from rekall.plugins.overlays.windows import pe_vtypes
105
106 from rekall_lib import utils
107
108
109 -class FetchPDB(core.DirectoryDumperMixin, plugin.TypedProfileCommand,
110 plugin.Command):
111 """Fetch the PDB file for an executable from the Microsoft PDB server."""
112
113 __name = "fetch_pdb"
114
115 SYM_URLS = ['http://msdl.microsoft.com/download/symbols']
116 USER_AGENT = "Microsoft-Symbol-Server/10.0.0.0"
117
118 __args = [
119 dict(name="pdb_filename", required=True, positional=True,
120 help="The filename of the executable to get the PDB file for."),
121
122 dict(name="guid", positional=True,
123 help="The GUID of the pdb file. If provided, the pdb filename must"
124 " be provided in the --pdb_filename parameter.")
125 ]
126
158
160
161 pdb_filename = self.plugin_args.pdb_filename
162 guid = self.plugin_args.guid
163
164 if not pdb_filename.endswith(".pdb"):
165 pdb_filename += ".pdb"
166
167 for url in self.SYM_URLS:
168 basename = ntpath.splitext(pdb_filename)[0]
169 url += "/%s/%s/%s.pd_" % (pdb_filename, guid, basename)
170
171 self.session.report_progress("Trying to fetch %s\n", url)
172 request = urllib2.Request(url, None, headers={
173 'User-Agent': self.USER_AGENT})
174
175 url_handler = urllib2.urlopen(request)
176 with utils.TempDirectory() as temp_dir:
177 compressed_output_file = os.path.join(
178 temp_dir, "%s.pd_" % basename)
179
180 output_file = os.path.join(
181 temp_dir, "%s.pdb" % basename)
182
183
184 with open(compressed_output_file, "wb") as outfd:
185 while True:
186 data = url_handler.read(8192)
187 if not data:
188 break
189
190 outfd.write(data)
191 self.session.report_progress(
192 "%s: Downloaded %s bytes", basename, outfd.tell())
193
194
195 try:
196 if platform.system() == "Windows":
197
198 subprocess.check_call(
199 ["expand", compressed_output_file, output_file],
200 cwd=temp_dir)
201 else:
202
203
204 subprocess.check_call(
205 ["cabextract", compressed_output_file],
206 cwd=temp_dir,
207 stdout=sys.stderr)
208
209 except (subprocess.CalledProcessError, OSError):
210 raise RuntimeError(
211 "Failed to decompress output file %s. "
212 "Ensure cabextract is installed.\n" % output_file)
213
214
215
216
217 output_file = glob.glob("%s/*pdb" % temp_dir)[0]
218
219
220
221 with open(output_file, "rb") as fd:
222 return fd.read(50 * 1024 * 1024)
223
228
229
230 -def Pages(length, page_size):
231 """Calculate the number of pages required to store a stream."""
232 num_pages = length / page_size
233 if length % page_size:
234 num_pages += 1
235
236 return num_pages
237
241 """An address space which combines together the page lists.
242
243 Once we parse the page list, we can build this address space which takes
244 care of reassembling the stream for us automatically.
245 """
246
247 - def __init__(self, pages=None, page_size=None, **kwargs):
257
258
259
260
261
262
263
264
265
266
267 LEAF_ENUM_TO_TYPE = dict(
268 LF_STRUCTURE="_lfClass",
269 LF_ARRAY="_lfArray",
270 LF_PROCEDURE="_lfProc",
271 LF_POINTER="_lfPointer",
272 LF_ARGLIST="_lfArgList",
273 LF_MODIFIER="_lfModifier",
274 LF_FIELDLIST="_lfFieldList",
275 LF_ENUM="_lfEnum",
276 LF_UNION="_lfUnion",
277 LF_BITFIELD="_lfBitfield",
278 LF_NESTTYPE="_lfNestType",
279 LF_CHAR="byte",
280 LF_SHORT="short int",
281 LF_USHORT="unsigned short int",
282 LF_LONG="long",
283 LF_ULONG="unsigned long",
284 LF_64PWCHAR="Pointer",
285 )
286
287
288
289
290 LEAF_ENUM_TO_SUBRECORD = dict(
291 LF_MEMBER="Member",
292 LF_ENUMERATE="Enumerate",
293 LF_NESTTYPE="NestType",
294 )
295
296
297 SYM_ENUM_TO_SYM = dict(
298 S_PUB32="_PUBSYM32",
299 )
300
301
302 mspdb_overlays = {
303
304 "_PDB_HEADER_700": [None, {
305 "abSignature": [None, ["Signature", dict(
306 value="Microsoft C/C++ MSF 7.00\r\n\x1ADS\0\0\0"
307 )]],
308
309
310 "root_pages": lambda x: Pages(x.dRootBytes, x.dPageBytes),
311
312
313
314 "adIndexPages": [None, ["Array", dict(
315 target="unsigned int",
316
317
318 count=lambda x: Pages(4 * x.root_pages, x.dPageBytes),
319 )]],
320 }],
321
322
323
324 "_PDB_ROOT_700": [lambda x: (x.dStreams + 1) * 4, {
325 "adStreamBytes": [None, ["Array", dict(
326 count=lambda x: x.dStreams,
327 target="unsigned int",
328 )]],
329 }],
330
331
332 "_lfModifier": [None, {
333 "modified_type": [2, ["unsigned int"]],
334 "modifier": [6, ["Flags", dict(
335 bitmap=dict(
336 unaligned=2,
337 volatile=1,
338 const=0
339 ),
340 target="unsigned short int",
341 )]],
342 }],
343
344
345
346
347 "_lfSubRecord": [lambda x: x.value.obj_size, {
348 "leaf": [None, ["Enumeration", dict(
349 enum_name="_LEAF_ENUM_e",
350 target="unsigned short int")]],
351
352
353
354 "value": lambda x: x.m(
355 LEAF_ENUM_TO_SUBRECORD.get(str(x.leaf), "Unknown")),
356 }],
357
358 "_lfEnum": [None, {
359
360 "Name": [None, ["String"]],
361 }],
362
363 "_lfNestType": [None, {
364
365 "Name": [None, ["String"]],
366 }],
367
368
369 "_lfFieldList": [None, {
370 "SubRecord": [None, ["ListArray", dict(
371 target="_lfSubRecord",
372
373
374
375 maximum_size=lambda x: x.obj_parent.length - 2,
376 )]],
377 }],
378
379
380 "_lfArgList": [None, {
381
382
383 "arg": [None, ["Array", dict(
384 target="Enumeration",
385 target_args=dict(
386 enum_name="_TYPE_ENUM_e",
387 target="unsigned short int",
388 ),
389 count=lambda x: x.count
390 )]],
391 }],
392
393
394 "TypeContainer": [lambda x: x.length + 2, {
395 "length": [0, ["unsigned short int"]],
396
397
398
399 "type_enum": [2, ["Enumeration", dict(
400 enum_name="_LEAF_ENUM_e",
401 target="unsigned short int"
402 )]],
403
404
405
406 "type": lambda x: x.type_enum.cast(
407 LEAF_ENUM_TO_TYPE.get(str(x.type_enum), "unsigned int"))
408 }],
409
410
411
412 "_HDR": [None, {
413 "types": [lambda x: x.obj_size,
414 ["ListArray", dict(
415 target="TypeContainer",
416 count=lambda x: x.tiMac - x.tiMin,
417 maximum_size=lambda x: x.cbGprec,
418 )]],
419 }],
420
421 "_GUID": [16, {
422 "Data1": [0, ["unsigned long", {}]],
423 "Data2": [4, ["unsigned short", {}]],
424 "Data3": [6, ["unsigned short", {}]],
425 "Data4": [8, ["String", dict(length=8, term=None)]],
426 "AsString": lambda x: ("%08x%04x%04x%s" % (
427 x.Data1, x.Data2, x.Data3, str(x.Data4).encode('hex'))).upper(),
428 }],
429
430 "Info": [None, {
431 "Version": [0, ["unsigned long int"]],
432 "TimeDateStamp": [4, ["UnixTimeStamp"]],
433 "Age": [8, ["unsigned long int"]],
434 "GUID": [12, ["_GUID"]],
435 }],
436
437
438 "_ALIGNSYM": [lambda x: x.reclen + 2, {
439 "rectyp": [None, ["Enumeration", dict(
440 enum_name="_SYM_ENUM_e",
441 target="unsigned short int")]],
442
443
444 "value": lambda x: x.cast(
445 SYM_ENUM_TO_SYM.get(str(x.rectyp), ""))
446
447 }],
448
449 "_PUBSYM32": [None, {
450 "name": [None, ["String"]],
451 }],
452
453 "DBI": [None, {
454 "DBIHdr": [0, ["_NewDBIHdr"]],
455 "ExHeaders": [64, ["ListArray", dict(
456 maximum_size=lambda x: x.DBIHdr.cbGpModi,
457 target="DBIExHeaders")]],
458 }],
459
460 "DBIExHeaders": [None, {
461 "modName": [64, ["String"]],
462 "objName": [lambda x: x.modName.obj_offset + x.modName.obj_size,
463 ["String"]],
464 }],
465
466 "IMAGE_SECTION_HEADER": [None, {
467 "Name": [None, ["String"]],
468 }],
469
470 }
474 """Represents a class or struct."""
475
476 _obj_end = 0
477
481
482 @utils.safe_property
487
489 """This object is followed by a variable sized data structure.
490
491 This data structure contains the "value_" and "name" attributes. If the
492 first short int less than 0x8000, it represents the value. Otherwise, it
493 represents an _LEAF_ENUM_e enum which determines the size of the value
494 to read next (e.g. LF_ULONG = 4 bytes, LF_SHORT = 2 bytes) and those
495 represent the value.
496
497 The name field then follows as a String.
498
499 Following the name field, there is padding to 4 byte alignment.
500
501 We must calculate the total size of this struct in this function, after
502 parsing all the components.
503 """
504
505 obj_end = self.obj_offset + super(lfClass, self).obj_size
506 field_type = self.obj_profile.Object(
507 "unsigned short int", offset=obj_end, vm=self.obj_vm)
508
509 obj_end += field_type.obj_size
510
511 if field_type < 0x8000:
512 self.value_ = field_type
513 self.name = self.obj_profile.String(
514 offset=obj_end, vm=self.obj_vm)
515
516 obj_end += self.name.obj_size
517
518 else:
519
520
521 type_enum_name = self.obj_profile.get_enum(
522 "_LEAF_ENUM_e").get(str(field_type))
523
524 type_name = LEAF_ENUM_TO_TYPE.get(type_enum_name)
525
526 self.value_ = self.obj_profile.Object(
527 type_name=type_name, offset=obj_end, vm=self.obj_vm)
528
529
530 self.name = self.obj_profile.String(
531 offset=self.value_.obj_offset + self.value_.obj_size,
532 vm=self.obj_vm)
533
534 obj_end += self.value_.obj_size + self.name.obj_size
535
536
537 self._obj_end = obj_end
538
539
540 if self.name == "__unnamed":
541 self.name = "__unnamed_%s" % self.field
542
543 @utils.safe_property
546
548 """Returns the vtype data structure defining this element.
549
550 Returns:
551 a tuple, the first element is the target name, the second is the dict
552 of the target_args.
553 """
554
555 return [str(self.name), {}]
556
559 """A SubRecord describing a single enumeration definition."""
560
563 """A range of bits."""
564
566 """BitField overlays on top of another type."""
567 result = tpi.DefinitionByIndex(self.type)
568 if not result:
569 return [str(self.name), {}]
570
571 target, target_args = result
572
573 return "BitField", dict(
574 start_bit=int(self.position),
575 end_bit=int(self.position) + int(self.length),
576 target_args=target_args, target=target)
577
580 UNNAMED_RE = re.compile("<unnamed-type-([^->]+)>")
581
589
590 @utils.safe_property
595
598
601 """A Union is basically the same as a struct, except members may overlap."""
602
606 """We dont really care about modifiers, just pass the utype through."""
607 return tpi.DefinitionByIndex(self.modified_type)
608
611 """Represents an enumeration definition."""
612
613 @utils.safe_property
615 enum_name = str(self.m("Name"))
616 if enum_name == "<unnamed-tag>":
617 enum_name = "ENUM_%X" % self.obj_offset
618
619 return enum_name
620
630
632 """Enumerations are defined in two parts.
633
634 First an enumeration dict is added to the profile constants, and then
635 the target "Enumeration" can use it by name (having the enum_name
636 field). This allows many fields which use the same enumeration to share
637 the definition dict.
638 """
639 result = tpi.DefinitionByIndex(self.utype)
640 if not result:
641 return [str(self.name), {}]
642
643 target, target_args = result
644
645 return "Enumeration", dict(
646 target=target, target_args=target_args, enum_name=self.Name)
647
650 """A Pointer object."""
651
653 target_index = int(self.u1.utype)
654 result = tpi.DefinitionByIndex(target_index)
655 if not result:
656 return [str(self.name), {}]
657
658 target, target_args = result
659
660 return ["Pointer", dict(
661 target=target,
662 target_args=target_args
663 )]
664
667 """A Function object."""
668
670 """We record the function arg prototype as well."""
671 args = []
672 for idx in tpi.Resolve(self.arglist).arg:
673 definition = tpi.DefinitionByIndex(idx)
674 if definition:
675 args.append(definition)
676
677 return "Function", dict(args=args)
678
681 """An array of the same object."""
682
684 result = tpi.DefinitionByIndex(self.elemtype)
685 if not result:
686 return [str(self.name), {}]
687
688 target, target_args = result
689 if target == "<unnamed-tag>":
690 target = "<unnamed-%s>" % self.elemtype
691
692
693
694
695
696 definition = ["Array", dict(
697 target=target, target_args=target_args,
698 size=int(self.value_),
699 )]
700
701 tpi.RegisterFixUp(definition)
702
703 return definition
704
707 """A member in a struct (or class)."""
708
710 """Returns a tuple of target, target_args for the member."""
711 return tpi.DefinitionByIndex(self.m("index"))
712
715 """The file header of a PDB file."""
716
718 """The full page list is a double indexed array."""
719 result = []
720 for idx in self.adIndexPages:
721 for page_number in self.obj_profile.Array(
722 offset=idx * self.dPageBytes, vm=self.obj_vm,
723 target="unsigned int", count=self.dPageBytes / 4):
724 result.append(int(page_number))
725 if len(result) >= self.root_pages:
726 return result
727
728 return result
729
732 """The root stream contains information about all other streams."""
733
735 """Read all the streams in the file."""
736 offset_of_index_list = self.obj_offset + self.obj_size
737 page_size = self.obj_context["page_size"]
738
739 for stream_size in self.adStreamBytes:
740 if stream_size == 0xffffffff:
741 stream_size = 0
742
743 page_list = self.obj_profile.Array(
744 offset=offset_of_index_list, vm=self.obj_vm,
745 count=Pages(stream_size, page_size),
746 target="unsigned int")
747
748 offset_of_index_list += page_list.obj_size
749
750 yield StreamBasedAddressSpace(
751 base=self.obj_vm.base, page_size=page_size,
752 session=self.obj_profile.session, pages=page_list)
753
759
767
768
769 -class DBI(obj.Struct):
771 DBIHdr = self.DBIHdr
772
773
774 header_offset = (self.obj_offset +
775 DBIHdr.obj_size +
776 DBIHdr.cbGpModi +
777 DBIHdr.cbSC +
778 DBIHdr.cbSecMap +
779 DBIHdr.cbFileInfo +
780 DBIHdr.cbTSMap +
781 DBIHdr.cbECInfo)
782
783 return self.obj_profile.DbgHdr(header_offset, vm=self.obj_vm)
784
785
786 -class PDBProfile(basic.Profile32Bits, basic.BasicClasses):
787 """A profile to parse Microsoft PDB files.
788
789 Note that this is built on top of the mspdb profile which exists in the
790 profile repository, as generated from the code here:
791
792 http://undocumented.rawol.com/win_pdbx.zip
793
794 Do not directly instantiate this. Just do:
795
796 profile = session.LoadProfile("mspdb")
797 """
798
800 super(PDBProfile, self).__init__(**kwargs)
801 self.add_overlay(mspdb_overlays)
802 self.add_classes({
803 "_PDB_HEADER_700": _PDB_HEADER_700,
804 "_PDB_ROOT_700": _PDB_ROOT_700,
805 "_lfClass": lfClass, "_lfArray": lfArray,
806 "_lfMember": lfMember, "_lfPointer": lfPointer,
807 "_lfProc": lfProc, "_lfEnum": lfEnum,
808 "_lfModifier": lfModifier, "_lfUnion": lfUnion,
809 "_lfBitfield": lfBitfield, "_lfEnumerate": lfEnumerate,
810 "_lfNestType": lfNestType, "DBIExHeaders": DBIExHeaders,
811 "DBI": DBI
812 })
813
816 """Parses a Microsoft PDB file."""
817
818
819
820
821 TYPE_ENUM_TO_VTYPE = {
822 "T_32PINT4": ["Pointer", dict(target="long")],
823 "T_32PLONG": ["Pointer", dict(target="long")],
824 "T_32PQUAD": ["Pointer", dict(target="long long")],
825 "T_32PRCHAR": ["Pointer", dict(target="unsigned char")],
826 "T_32PREAL32": ["Pointer", dict(target="Void")],
827 "T_32PREAL64": ["Pointer", dict(target="Void")],
828 "T_32PSHORT": ["Pointer", dict(target="short")],
829 "T_32PUCHAR": ["Pointer", dict(target="unsigned char")],
830 "T_32PUINT4": ["Pointer", dict(target="unsigned int")],
831 "T_32PULONG": ["Pointer", dict(target="unsigned long")],
832 "T_32PUQUAD": ["Pointer", dict(target="unsigned long long")],
833 "T_32PUSHORT": ["Pointer", dict(target="unsigned short")],
834 "T_32PVOID": ["Pointer", dict(target="Void")],
835 "T_32PWCHAR": ["Pointer", dict(target="UnicodeString")],
836 "T_64PLONG": ["Pointer", dict(target="long")],
837 "T_64PQUAD": ["Pointer", dict(target="long long")],
838 "T_64PRCHAR": ["Pointer", dict(target="unsigned char")],
839 "T_64PUCHAR": ["Pointer", dict(target="unsigned char")],
840 "T_64PWCHAR": ["Pointer", dict(target="String")],
841 "T_64PULONG": ["Pointer", dict(target="unsigned long")],
842 "T_64PUQUAD": ["Pointer", dict(target="unsigned long long")],
843 "T_64PUSHORT": ["Pointer", dict(target="unsigned short")],
844 "T_64PVOID": ["Pointer", dict(target="Void")],
845 "T_BOOL08": ["unsigned char", {}],
846 "T_CHAR": ["char", {}],
847 "T_INT4": ["long", {}],
848 "T_INT8": ["long long", {}],
849 "T_LONG": ["long", {}],
850 "T_QUAD": ["long long", {}],
851 "T_RCHAR": ["unsigned char", {}],
852 "T_REAL32": ["float", {}],
853 "T_REAL64": ["double", {}],
854 "T_REAL80": ["long double", {}],
855 "T_SHORT": ["short", {}],
856 "T_UCHAR": ["unsigned char", {}],
857 "T_UINT4": ["unsigned long", {}],
858 "T_ULONG": ["unsigned long", {}],
859 "T_UQUAD": ["unsigned long long", {}],
860 "T_USHORT": ["unsigned short", {}],
861 "T_VOID": ["Void", {}],
862 "T_WCHAR": ["UnicodeString", {}],
863 }
864
902
904 """Parse the PDB info stream."""
905
906 info = self.profile.Info(vm=self.root_stream_header.GetStream(1))
907 self.metadata = dict(
908 Version=int(info.Version),
909 Timestamp=str(info.TimeDateStamp),
910 GUID_AGE="%s%X" % (info.GUID.AsString, info.Age),
911 )
912
930
932 """Gather the PE sections of this executable."""
933 self.sections = []
934 stream = self.root_stream_header.GetStream(stream_id)
935 if stream is None:
936 return
937
938 for section in self.profile.ListArray(
939 maximum_size=stream.size,
940 target="IMAGE_SECTION_HEADER", vm=stream):
941 self.sections.append(section)
942
944 """Build an OMAP lookup table.
945
946 The OMAP is a translation between the original symbol's offset to the
947 final offset. When the linker builds the executable, it reorders the
948 original object files in the executable section. This translation table
949 tells us where the symbols end up.
950 """
951 self.omap = utils.SortedCollection(key=lambda x: x[0])
952 omap_stream = self.root_stream_header.GetStream(omap_stream_id)
953 if omap_stream is None:
954 return
955
956 omap_address_space = addrspace.BufferAddressSpace(
957 session=self.session,
958 data=omap_stream.read(0, omap_stream.size))
959
960 omap_array = self.profile.Array(
961 vm=omap_address_space,
962 count=omap_stream.size / self.profile.get_obj_size("_OMAP_DATA"),
963 max_count=omap_stream.size,
964 target="_OMAP_DATA")
965
966 for i, omap in enumerate(omap_array):
967 src = int(omap.rva)
968 dest = int(omap.rvaTo)
969
970 self.omap.insert((src, dest))
971 self.session.report_progress(
972 " Extracting OMAP Information %s%%",
973 lambda: i * 100 / omap_array.count)
974
976 """Parse the symbol records stream."""
977 stream = self.root_stream_header.GetStream(stream_id)
978 for container in self.profile.ListArray(target="_ALIGNSYM", vm=stream,
979 maximum_size=stream.size):
980
981 if container.reclen == 0:
982 break
983
984 symbol = container.value
985
986
987 if not symbol:
988 self.session.logging.warning(
989 "Unimplemented symbol %s" % container.rectyp)
990 continue
991
992 try:
993 name = str(symbol.name)
994 except AttributeError:
995
996 continue
997
998 translated_offset = offset = int(symbol.off)
999
1000
1001
1002 if self.sections:
1003
1004
1005 translated_offset = virtual_address = (
1006 offset + self.sections[symbol.seg - 1].VirtualAddress)
1007
1008
1009
1010 if self.omap:
1011
1012 try:
1013 from_offset, dest_offset = self.omap.find_le(
1014 virtual_address)
1015
1016 translated_offset = (
1017 virtual_address - from_offset + dest_offset)
1018
1019 except ValueError:
1020 pass
1021
1022 if symbol.pubsymflags.u1.fFunction:
1023 self.functions[name] = translated_offset
1024 else:
1025 self.constants[name] = translated_offset
1026
1027 self.session.report_progress(" Parsing Symbols %s", name)
1028
1030 """The TPI stream contains all the struct definitions."""
1031 self.lookup = {}
1032 tpi = self.profile._HDR(vm=self.root_stream_header.GetStream(2))
1033
1034
1035 for i, t in enumerate(tpi.types):
1036 self.session.report_progress(" Parsing Structs %(spinner)s")
1037
1038 self.lookup[tpi.tiMin + i] = t
1039 if not t:
1040 break
1041
1042
1043
1044 for value in self.lookup.values():
1045 if value.type_enum == "LF_ENUM":
1046 value.type.AddEnumeration(self)
1047
1049 self.enums[name] = enumeration
1050
1052 self.rev_enums[name] = enumeration
1053
1055 self.fixups.append(definition)
1056
1058 for key, value in self.lookup.iteritems():
1059
1060 if ((value.type_enum == "LF_STRUCTURE" or
1061 value.type_enum == "LF_UNION") and
1062 not value.type.property.fwdref):
1063
1064 struct_name = value.type.name
1065 if struct_name == "<unnamed-tag>":
1066 struct_name = "<unnamed-%s>" % key
1067
1068 struct_size = int(value.type.value_)
1069
1070 field_list = self.lookup[int(value.type.field)].type
1071 definition = [struct_size, {}]
1072
1073 for field in field_list.SubRecord:
1074 field_definition = field.value.Definition(self)
1075 if field_definition:
1076 if field_definition[0] == "<unnamed-tag>":
1077 field_definition[0] = (
1078 "<unnamed-%s>" % field.value.index)
1079
1080 definition[1][str(field.value.name)] = [
1081 int(field.value.value_), field_definition]
1082
1083 yield [struct_name, definition]
1084
1086 """Return the vtype definition of the item identified by idx."""
1087 result = None
1088 if idx < 0x700:
1089 type_name = self._TYPE_ENUM_e.get(idx)
1090
1091 result = self.TYPE_ENUM_TO_VTYPE.get(type_name)
1092 if result is None and type_name != "T_NOTYPE":
1093 self.session.logging.error("Unrecognized type %s\n", type_name)
1094
1095 else:
1096 try:
1097 result = self.lookup[idx].type.Definition(self)
1098 except AttributeError:
1099 pass
1100
1101 return result
1102
1104 try:
1105 return self.lookup[idx].type
1106 except KeyError:
1107 return obj.NoneObject("Index not known")
1108
1111
1112 - def __exit__(self, exc_type, exc_value, trace):
1114
1115
1116 -class ParsePDB(core.DirectoryDumperMixin, plugin.TypedProfileCommand,
1117 plugin.Command):
1118 """Parse the PDB streams."""
1119
1120 __name = "parse_pdb"
1121
1122 __args = [
1123 dict(name="pdb_filename", required=True, positional=True,
1124 help="The filename of the PDB file."),
1125
1126 dict(name="profile_class",
1127 help="The name of the profile implementation. "
1128 "Default name is derived from the pdb filename."),
1129
1130 dict(name="output_filename",
1131 help="The name of the file to store this profile. "),
1132
1133 dict(name="windows_version",
1134 help="The windows version (major.minor.revision) "
1135 "corresponding with this PDB. For example, Windows 7 "
1136 "should be given as 6.1"),
1137
1138 dict(name="concise", type="Boolean",
1139 help="Specify this to emit less detailed information."),
1140 ]
1141
1170
1171 NATIVE_TYPE_SIZE = {
1172 "unsigned char": 1,
1173 "unsigned int": 4,
1174 "unsigned long": 4,
1175 "unsigned long long": 8,
1176 "unsigned short": 2,
1177 "char": 1,
1178 "int": 4,
1179 "long": 4,
1180 "long long": 8,
1181 "short": 2,
1182 }
1183
1184 - def PostProcessVTypes(self, vtypes):
1185 """Post process the vtypes to optimize some access members."""
1186 arch = self.metadata.get("arch", "AMD64")
1187
1188 for defintion in self.tpi.fixups:
1189 target, target_args = defintion
1190 if target == "Array":
1191
1192
1193
1194 if target_args.get("target") == "UnicodeString":
1195 defintion[0] = "UnicodeString"
1196 defintion[1] = dict(
1197 length=target_args.get("size") / 2
1198 )
1199 elif target_args.has_key("size"):
1200
1201 array_target = target_args.get("target")
1202 target_size = self.NATIVE_TYPE_SIZE.get(array_target)
1203 if target_size is None:
1204 if array_target == "Pointer":
1205 target_size = 8 if arch == "AMD64" else 4
1206 else:
1207 target_definition = vtypes.get(array_target)
1208 if target_definition is None:
1209
1210
1211
1212 continue
1213
1214 target_size = target_definition[0]
1215
1216
1217 target_args["count"] = target_args.pop(
1218 "size") / target_size
1219
1220 return vtypes
1221
1223 with self.tpi:
1224 vtypes = {}
1225
1226 for i, (struct_name, definition) in enumerate(self.tpi.Structs()):
1227 self.session.report_progress(
1228 " Exporting %s: %s", i, struct_name)
1229
1230 struct_name = str(struct_name)
1231 existing_definition = vtypes.get(struct_name)
1232 if existing_definition:
1233
1234 definition[1].update(existing_definition[1])
1235
1236 vtypes[struct_name] = definition
1237
1238 self.metadata.update(dict(
1239 ProfileClass=self.plugin_args.profile_class,
1240 Type="Profile",
1241 PDBFile=os.path.basename(self.plugin_args.pdb_filename),
1242 ))
1243
1244 self.metadata.update(self.tpi.metadata)
1245
1246
1247 demangler = pe_vtypes.Demangler(self.metadata)
1248 constants = {}
1249 for name, value in self.tpi.constants.iteritems():
1250 constants[demangler.DemangleName(name)] = value
1251
1252 functions = {}
1253 for name, value in self.tpi.functions.iteritems():
1254 functions[demangler.DemangleName(name)] = value
1255
1256 vtypes = self.PostProcessVTypes(vtypes)
1257
1258 result = {
1259 "$METADATA": self.metadata,
1260 "$STRUCTS": vtypes,
1261 "$ENUMS": self.tpi.enums,
1262 }
1263
1264 if not self.plugin_args.concise:
1265 result["$REVENUMS"] = self.tpi.rev_enums
1266 result["$CONSTANTS"] = constants
1267 result["$FUNCTIONS"] = functions
1268
1269 return result
1270
1281