Package rekall :: Package plugins :: Package linux :: Module heap_analysis
[frames] | no frames]

Source Code for Module rekall.plugins.linux.heap_analysis

   1  #  glibc heap analysis classes 
   2  # 
   3  #    Copyright (c) 2017, Frank Block, ERNW GmbH <fblock@ernw.de> 
   4  # 
   5  #       All rights reserved. 
   6  # 
   7  #       Redistribution and use in source and binary forms, with or without modification, 
   8  #       are permitted provided that the following conditions are met: 
   9  # 
  10  #       * Redistributions of source code must retain the above copyright notice, this 
  11  #         list of conditions and the following disclaimer. 
  12  #       * Redistributions in binary form must reproduce the above copyright notice, 
  13  #         this list of conditions and the following disclaimer in the documentation 
  14  #         and/or other materials provided with the distribution. 
  15  #       * The names of the contributors may not be used to endorse or promote products 
  16  #         derived from this software without specific prior written permission. 
  17  # 
  18  #       THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
  19  #       AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  20  #       IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  21  #       ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
  22  #       LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  23  #       DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
  24  #       SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
  25  #       CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
  26  #       OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
  27  #       OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
  28   
  29  """ 
  30  This module implements several classes, allowing the glibc heap analysis for a 
  31  given process. 
  32  """ 
  33   
  34  import re 
  35  import pdb 
  36  import struct 
  37  import traceback 
  38  import os 
  39  from numbers import Number 
  40  from rekall.plugins.overlays import basic 
  41  from rekall.plugins.linux import common 
  42  from rekall.plugins.linux import cpuinfo 
  43  from rekall.plugins import core 
  44  from rekall import scan 
  45  from rekall import obj 
  46   
  47  ############# 
  48   
  49  _PREV_INUSE = 0x1 
  50  _IS_MMAPPED = 0x2 
  51  _NON_MAIN_ARENA = 0x4 
  52  _SIZE_BITS = (_PREV_INUSE | _IS_MMAPPED | _NON_MAIN_ARENA) 
  53  # is set on HeapAnalysis instantiation 
  54  _MIN_LARGE_SIZE = None 
  55   
  56  # Probably more versions would work, especially when the corresponding vtype 
  57  # information are provided, but those are the versions we tested against 
  58  _SUPPORTED_GLIBC_VERSIONS = ['2.25', '2.24', '2.23', '2.22', '2.21', '2.20'] 
  59  _LIBC_REGEX = '(?:^|/)libc[^a-zA-Z][^/]*\\.so' 
60 61 62 -def get_vma_for_offset(vmas, offset):
63 """Returns a list with identifier and vm_area that given offset belongs to. 64 Expects the output from _get_vmas_for_task as argument. 65 """ 66 67 for vma in vmas: 68 if vma['vma'].vm_start <= offset < vma['vma'].vm_end: 69 return vma 70 71 return None
72
73 74 -def get_libc_filename(vmas):
75 """Returns the libc file name from the vma, where the _LIBC_REGEX matches. 76 """ 77 78 if vmas: 79 for vma in vmas: 80 if re.search(_LIBC_REGEX, vma['name'], re.IGNORECASE): 81 return vma['name']
82
83 84 -def get_libc_range(vmas):
85 """Returns the lowest and highest address for the libc vma. See also 86 get_mem_range_for_regex.""" 87 88 return get_mem_range_for_regex(vmas, _LIBC_REGEX)
89
90 91 -def get_mem_range_for_regex(vmas, regex):
92 """Returns the lowest and highest address of memory areas belonging to the 93 vm_areas, the given regex matches on. The result is given as a list, where 94 the lowest address is the first element. Expects the output from 95 _get_vmas_for_task as argument.""" 96 97 offsets = None 98 if vmas: 99 for vma in vmas: 100 if re.search(regex, vma['name'], re.IGNORECASE): 101 if not offsets: 102 offsets = [vma['vma'].vm_start] 103 offsets.append(vma['vma'].vm_end) 104 105 else: 106 offsets[1] = vma['vma'].vm_end 107 108 return offsets
109
110 111 112 -class HeapAnalysis(common.LinProcessFilter):
113 """Basic abstract class for linux heap analysis. 114 Mostly serves the main_arena. 115 """ 116 117 __abstract = True 118 119 _main_heap_identifier = "[heap]" 120 121 # used to mark vm_areas residing between the [heap] and the first file or 122 # stack object and all other vm_areas that have no file object for 123 # (mmapped regions can also reside somewhere beyond the typical heap/stack 124 # area) note that vmas with this identifier might be empty, old thread 125 # stacks or vm_areas belonging to e.g. mapped files. 126 # A pretty reliable source for heap vm_areas is the self.heap_vmas list as 127 # it contains vm_areas which are identified to most probably belong to a 128 # heap or mmapped region. 129 _heap_vma_identifier = "[heap-vma]" 130 _pot_mmapped_vma_identifier = "[pot-mmapped-vma]" 131 132 133 # is normally only automatically set when using a dummy arena or the chunk 134 # dumper, as in those cases all chunks are walked at least two times
136 """Sets _preserve_chunks to True. This forces all allocated chunk 137 functions to store chunks in lists, which highly increases the speed 138 of a second walk over those chunks. This feature can only be activated 139 if performance is set to 'fast'.""" 140 141 if not self._preserve_chunks and \ 142 self.session.GetParameter("performance") == "fast": 143 self.session.logging.warn( 144 "Chunk preservation has been activated (result from using " 145 "performance=fast). This might consume large amounts of memory" 146 " depending on the chunk count. If you are low on free memory " 147 "space (RAM), you might want to deactivate this feature by " 148 "not using the 'fast' option. The only downside is in some " 149 "cases a longer plugin runtime.") 150 151 self._preserve_chunks = True
152 153
154 - def _get_saved_stack_frame_pointers(self, task):
155 """Returns a list of dicts, containing the ebp,esp and pid values 156 for each thread.""" 157 158 if not task.mm: 159 return None 160 161 # To gather thread stacks, we examine the pt_regs struct for each 162 # thread and extract the saved stack frame pointers 163 thread_stack_offsets = [] 164 thread_group_offset = self.profile.get_obj_offset("task_struct", 165 "thread_group") 166 167 for thread_group in task.thread_group.walk_list("next"): 168 thread_task = self.profile.task_struct( 169 offset=thread_group.obj_offset - thread_group_offset, 170 vm=self.process_as) 171 172 pt_regs = self.profile.pt_regs( 173 offset=(thread_task.thread.sp0 - 174 self.profile.get_obj_size("pt_regs")), 175 vm=self.process_as) 176 177 thread_stack_offsets.append(dict(ebp=pt_regs.bp.v(), 178 esp=pt_regs.sp.v(), 179 pid=thread_task.pid.v())) 180 181 return thread_stack_offsets
182 183 184 # Basically the code from the proc_maps plugin but with thread specific 185 # enhancements
186 - def _get_vmas_for_task(self, task):
187 """Returns a list of lists, containing ["name", vm_area] pairs. """ 188 189 if not task.mm: 190 return None 191 192 result = [] 193 194 thread_stack_offsets = self._get_saved_stack_frame_pointers(task) 195 # The first pair contains the "main" thread and the mm start_stack 196 # value is more reliable for identifying the relevant memory region 197 # than the saved frame pointers 198 thread_stack_offsets[0]['start_stack'] = task.mm.start_stack 199 200 heap_area = False 201 for vma in task.mm.mmap.walk_list("vm_next"): 202 temp_vma = dict() 203 204 if vma.vm_file: 205 fname = task.get_path(vma.vm_file) 206 if heap_area: 207 heap_area = False 208 209 else: 210 fname = "" 211 if heap_area: 212 fname = self._heap_vma_identifier 213 214 else: 215 fname = self._pot_mmapped_vma_identifier 216 217 218 # main heap can have 3 or more vm_area_struct structs 219 if vma.vm_start <= task.mm.start_brk <= vma.vm_end or \ 220 (task.mm.start_brk <= vma.vm_start 221 < vma.vm_end <= task.mm.brk) or \ 222 vma.vm_start <= task.mm.brk <= vma.vm_end: 223 fname = self._main_heap_identifier 224 heap_area = True 225 226 else: 227 for offsets in thread_stack_offsets: 228 if (('start_stack' in offsets.keys() and 229 vma.vm_start <= offsets['start_stack'] 230 <= vma.vm_end) or 231 vma.vm_start <= offsets['ebp'] <= vma.vm_end or 232 vma.vm_start <= offsets['esp'] <= vma.vm_end): 233 234 fname = "[stack" 235 pid = offsets['pid'] 236 fname += "]" if task.pid == pid else \ 237 ":{:d}]".format(pid) 238 239 temp_vma['ebp'] = offsets['ebp'] 240 temp_vma['esp'] = offsets['esp'] 241 242 heap_area = False 243 244 temp_vma['name'] = fname 245 temp_vma['vma'] = vma 246 result.append(temp_vma) 247 248 return sorted(result, key=lambda vma: vma['vma'].vm_start)
249 250
251 - def _load_libc_profile(self):
252 """Loads the Libc profile for the current libc version.""" 253 254 # we try to gather version information from the mapped libc lib 255 libc_version_string = None 256 major_version = None 257 minor_version = None 258 match = None 259 libc_filename = get_libc_filename(self.vmas) 260 261 if libc_filename: 262 match = re.search(r'(\d+)\.(\d+)', libc_filename) 263 264 if match and len(match.groups()) == 2: 265 major_version = int(match.group(1)) 266 minor_version = int(match.group(2)) 267 libc_version_string = str(major_version) + str(minor_version) 268 269 self.session.logging.info("Trying to load profile for version {:s}" 270 " from the repository." 271 .format(libc_version_string)) 272 273 # TODO: dynamic selection of distribution specific profiles 274 dist = 'base' 275 libc_profile = self.session.LoadProfile( 276 "glibc/{:s}/{:s}/{:s}".format(dist, 277 self.profile.metadata("arch"), 278 libc_version_string)) 279 280 281 if not libc_profile: 282 # fallback: there seems to be no profile from the repository, 283 # so we try to load a profile internally 284 self.session.logging.info( 285 "Repository failed: Now using internal profiles.") 286 287 # Fallback 288 libc_version_string = '224' 289 290 if major_version == 2: 291 if minor_version >= 24: 292 libc_version_string = '224' 293 294 elif minor_version == 23: 295 libc_version_string = '223' 296 297 else: 298 libc_version_string = '220' 299 300 self.session.logging.info( 301 "Loading internal profile for version {:s}." 302 .format(libc_version_string)) 303 304 if self.session.profile.metadata("arch") == 'I386': 305 libc_profile = GlibcProfile32(version=libc_version_string, 306 session=self.session) 307 308 elif self.session.profile.metadata("arch") == 'AMD64': 309 libc_profile = GlibcProfile64(version=libc_version_string, 310 session=self.session) 311 312 if not libc_profile: 313 self.session.logging.error('Unable to load a libc profile.') 314 315 else: 316 self.profile.add_types(libc_profile.vtypes) 317 self.profile.add_constants(libc_profile.constants) 318 319 if all(x in self.profile.vtypes.keys() for x in 320 ['malloc_chunk', 'malloc_state', 321 'malloc_par', '_heap_info']): 322 self._libc_profile_success = True 323 324 self.profile.add_classes(dict(malloc_state=malloc_state, 325 _heap_info=_heap_info, 326 malloc_chunk=malloc_chunk)) 327 328 else: 329 self.session.logging.error('Error while loading libc profile.')
330 331
332 - def _check_and_report_chunksize(self, chunk, current_border):
333 """Checks whether or not the current chunk 334 - is bigger than the given border 335 - smaller than the minimum size a chunk is allowed to be 336 - 's address is aligned. 337 """ 338 339 if chunk.v() + chunk.chunksize() > current_border: 340 self.session.logging.warn( 341 "Chunk at offset 0x{:x} has a size larger than the current " 342 "memory region. This shouldn't be the case." 343 .format(chunk.v())) 344 345 return False 346 347 elif chunk.chunksize() < self._minsize: 348 if not self._check_and_report_chunk_for_being_swapped(chunk): 349 self.session.logging.warn( 350 "Chunk at offset 0x{:x} has a size smaller than MINSIZE, " 351 "which shouldn't be the case and indicates a problem." 352 .format(chunk.v())) 353 354 return False 355 356 elif not self._aligned_ok(chunk.chunksize()): 357 self.session.logging.warn( 358 "The size of chunk at offset 0x{:x} is not a multiple of " 359 "MALLOC_ALIGNMENT, which shouldn't be the case and indicates " 360 "a problem.".format(chunk.v())) 361 362 return False 363 364 return True
365 366 367 # TODO reliable verification via page table information
369 """Tests the size field of a given chunk for being 0. If this field 370 is null, it is a good indication that the corresponding memory region 371 has been swapped. The reason might however also be a calculation error 372 for the chunk's offset.""" 373 374 if chunk: 375 if chunk.get_size() == 0: 376 self.session.logging.warn( 377 "It seems like the memory page(s) belonging to the " 378 "chunk at offset 0x{:x} have been swapped. This will lead " 379 "to incorrect/incomplete results and more warnings/errors." 380 .format(chunk.v())) 381 return True 382 383 return False
384 385 386
387 - def _check_and_report_allocated_chunk( 388 self, arena, chunk, next_chunk, current_border):
389 """Checks if the given chunk should be in use (depending on the 390 PREV_INUSE bit of the next chunk), has a size > MINSIZE, is aligned, 391 whether or not it is part of any bin or fastbin in conjunction with 392 next_chunks PREV_INUSE bit and if next_chunks prev_size field has 393 same value as current chunk's size. This function is not intended to 394 be used for the "bottom chunks". It returns True if no error occurs 395 and if the given chunk is not part of bins or fastbins. 396 """ 397 398 error_base_string = ( 399 "Found a presumably {0} chunk at offset 0x{1:x} which is however " 400 "{2}part of the bins. This is unexpected and might either " 401 "indicate an error or possibly in seldom cases be the result " 402 "from a race condition.") 403 404 if not self._check_and_report_chunksize(chunk, current_border): 405 return False 406 407 if not self._aligned_ok(chunk.v()): 408 self.session.logging.warn( 409 "Chunk at offset 0x{:x} is not aligned. As chunks are normally" 410 " always aligned, this indicates a mistakenly chosen chunk and" 411 " probably results in wrong results.".format(chunk.v())) 412 413 return False 414 415 # current chunk is tested in _check_and_report_chunksize for 416 # being swapped 417 self._check_and_report_chunk_for_being_swapped(next_chunk) 418 419 if next_chunk.prev_inuse(): 420 # for chunks in fastbins, the prev_inuse bit is not unset, 421 # so we don't check that here 422 if chunk in arena.freed_chunks: 423 # freed chunks shouldn't be marked as in use 424 self.session.logging.warn( 425 error_base_string.format("allocated", chunk.v(), "") 426 ) 427 428 elif chunk not in arena.freed_fast_chunks: 429 return True 430 431 else: 432 # current chunk seems to be freed, hence its size should equal 433 # next chunk's prev_size 434 if chunk.chunksize() != next_chunk.get_prev_size(): 435 self.session.logging.warn( 436 "Chunk at offset 0x{:x} seems to be freed but its size " 437 "doesn't match the next chunks prev_size value." 438 .format(chunk.v())) 439 440 elif chunk in arena.freed_fast_chunks: 441 # fastbins normally have the prev_inuse bit set 442 self.session.logging.warn( 443 "Unexpected: Found fastbin-chunk at offset 0x{0:x} which " 444 "prev_inuse bit is unset. This shouldn't normally be the " 445 "case.".format(chunk.v())) 446 447 elif chunk not in arena.freed_chunks: 448 # chunk is not marked as in use, but neither part of any bin 449 # or fastbin 450 self.session.logging.warn( 451 error_base_string.format("freed", chunk.v(), "not ") 452 ) 453 454 455 return False
456 457
458 - def _allocated_chunks_for_mmapped_chunk(self, mmap_first_chunk):
459 """Returns all allocated chunks for the mmap region the given chunk 460 belongs to.""" 461 462 if not mmap_first_chunk: 463 self.session.logging.warn( 464 "_allocated_chunks_for_mmapped_chunk has been called with " 465 "invalid pointer.") 466 return 467 468 mmap_vma = get_vma_for_offset(self.vmas, mmap_first_chunk.v())['vma'] 469 current_border = mmap_vma.vm_end 470 471 # we can't check here for hitting the bottom, as mmapped regions can 472 # contain slack space but this test is in essence done in 473 # check_and_report_mmap_chunk 474 for curr_chunk in self.iterate_through_chunks(mmap_first_chunk, 475 current_border): 476 477 if self._check_and_report_mmapped_chunk(curr_chunk, mmap_vma) \ 478 and self._check_and_report_chunksize(curr_chunk, 479 current_border): 480 yield curr_chunk 481 482 else: 483 # As the checks for the last MMAPPED chunk reported an error, 484 # we are stopping walking the MMAPPED chunks for that vm_area. 485 break
486 487 488
489 - def get_all_mmapped_chunks(self):
490 """Returns all allocated MMAPPED chunks.""" 491 492 main_arena = self.get_main_arena() 493 if main_arena: 494 if main_arena.allocated_mmapped_chunks: 495 for chunk in main_arena.allocated_mmapped_chunks: 496 yield chunk 497 498 return 499 500 main_arena.allocated_mmapped_chunks = list() 501 502 for mmap_first_chunk in main_arena.mmapped_first_chunks: 503 for chunk in self._allocated_chunks_for_mmapped_chunk( 504 mmap_first_chunk): 505 main_arena.allocated_mmapped_chunks.append(chunk) 506 yield chunk
507 508 509 510 ######### code taken from malloc/malloc.c (glibc-2.23) 511 # origins from the MINSIZE definition
512 - def get_aligned_address(self, address, different_align_mask=None):
513 """Returns an aligned address or MINSIZE, if given MIN_CHUNK_SIZE as 514 argument.""" 515 516 if different_align_mask: 517 return (address + different_align_mask) & ~ different_align_mask 518 519 return (address + self._malloc_align_mask) & ~ self._malloc_align_mask
520
521 - def _aligned_ok(self, value):
522 """Returns True if the given address/size is aligned.""" 523 524 return (value & self._malloc_align_mask) == 0
525 526 # essentially the request2size macro code
527 - def get_aligned_size(self, size):
528 """Returns an aligned size. Originally used to align a user request 529 size.""" 530 531 if size + self._size_sz + self._malloc_align_mask < self._minsize: 532 return self._minsize & ~ self._malloc_align_mask 533 534 return ((size + self._size_sz + self._malloc_align_mask) 535 & ~ self._malloc_align_mask)
536 537 ########### 538
539 - def _check_mmap_alignment(self, address):
540 """Returns True if the given address is aligned according to the 541 minimum pagesize.""" 542 543 return (address & (self._min_pagesize - 1)) == 0
544 545
546 - def _get_page_aligned_address(self, address):
547 """Returns an address aligned to the internal pagesize. 548 The given address should be a number, not a chunk. 549 This function is primarily used in the context of MMAPPED chunks. 550 """ 551 552 return (address + self._min_pagesize - 1) & ~ (self._min_pagesize - 1)
553 554
555 - def _check_for_bottom_chunks(self, chunk, heap_end):
556 """Checks the current chunk for conditions normally only found on the 557 second last chunk of a heap, when there are more heaps following. 558 """ 559 560 if chunk.chunksize() <= self._minsize and \ 561 (chunk.v() + chunk.chunksize() + (self._size_sz * 2)) == \ 562 heap_end: 563 return True 564 565 return False
566 567
568 - def _allocated_chunks_for_thread_arena(self, arena):
569 """Returns all allocated chunks contained in all heaps for the given 570 arena, assuming the arena is not the main_arena.""" 571 572 if arena.is_main_arena: 573 self.session.logging.warn( 574 "Unexpected: This method has been called with the main_arena.") 575 # since main_arena doesn't contain heap_infos, we return here 576 return 577 578 if arena.allocated_chunks: 579 for chunk in arena.allocated_chunks: 580 yield chunk 581 582 return 583 584 elif self._preserve_chunks: 585 arena.allocated_chunks = list() 586 587 heap_count = len(arena.heaps) 588 589 for i in range(heap_count): 590 heap = arena.heaps[i] 591 current_border = heap.v() + heap.size 592 hit_heap_bottom = False 593 last_chunk = None 594 curr_chunk = None 595 596 for next_chunk in heap.first_chunk.next_chunk_generator(): 597 if not curr_chunk: 598 curr_chunk = next_chunk 599 continue 600 601 last_chunk = curr_chunk 602 603 if (curr_chunk.v() + curr_chunk.chunksize()) == current_border: 604 # we hit the top chunk 605 break 606 607 else: 608 is_in_use = next_chunk.prev_inuse() 609 610 # on multiple heaps, for all but the last heap, the old 611 # top chunk is divided in at least two chunks at the 612 # bottom, where the second last has a size of 613 # minimum 2 * SIZE_SZ and maximum MINSIZE the last chunk 614 # has a size of 2* SIZE_SZ while the size field is set 615 # to 0x1 (the PREV_INUSE bit is set) and the prev_size 616 # contains the second last chunks size 617 # (min: 2 * SIZE_SZ , max: MINSIZE) 618 # 619 # see the part for creating a new heap within the 620 # sysmalloc function in malloc/malloc.c 621 # For glibc-2.23 beginning with line 2417 622 # 623 # as this behavior is included since version 2.0.1 from 624 # 1997, it should be safe to rely on it for most glibc 625 # versions 626 if curr_chunk.chunksize() <= self._minsize \ 627 and (curr_chunk.v() + curr_chunk.chunksize() 628 + (self._size_sz * 2)) == current_border: 629 630 # The last condition also tests if there are further 631 # heaps following. 632 # - if not, the current chunk which is only 633 # size_sz * 2 bytes away from 634 # 635 # - heap border shouldn't normally exist 636 if next_chunk.chunksize() == 0 and is_in_use and \ 637 (next_chunk.get_prev_size() 638 == curr_chunk.chunksize()) and \ 639 i < (heap_count - 1): 640 # we probably hit the bottom of the current heap 641 # which should'nt be the last one 642 self.session.logging.info( 643 "We hit the expected two chunks at the bottom " 644 "of a heap. This is a good sign.") 645 hit_heap_bottom = True 646 647 curr_chunk.is_bottom_chunk = True 648 649 if self._preserve_chunks: 650 arena.allocated_chunks.append(curr_chunk) 651 652 yield curr_chunk 653 654 655 break 656 657 elif curr_chunk.chunksize() < self._minsize: 658 self.session.logging.warn( 659 "Unexpected: We hit a chunk at offset 0x{0:x} " 660 "with a size smaller than the default minimum " 661 "size for a chunk but which appears to be " 662 "not part of the typical end of a heap. This " 663 "might either indicate a fatal error, or " 664 "maybe a custom libc implementation/custom " 665 "compile time flags.".format(curr_chunk.v())) 666 667 else: 668 self.session.logging.warn( 669 "Unexpected: We hit a chunk at offset 0x{0:x} " 670 "which presumably should have been the second " 671 "last chunk of that heap, but some conditions " 672 "don't meet.".format(curr_chunk.v())) 673 674 if curr_chunk not in arena.freed_fast_chunks: 675 self._check_and_report_non_main_arena(curr_chunk, 676 is_in_use) 677 678 if self._preserve_chunks: 679 arena.allocated_chunks.append(curr_chunk) 680 681 yield curr_chunk 682 683 # normal chunk, not located at the bottom of the heap 684 else: 685 if self._check_and_report_allocated_chunk( 686 arena, curr_chunk, next_chunk, current_border): 687 688 self._check_and_report_non_main_arena(curr_chunk, 689 is_in_use) 690 691 if self._preserve_chunks: 692 arena.allocated_chunks.append(curr_chunk) 693 694 yield curr_chunk 695 696 697 698 curr_chunk = next_chunk 699 700 if not hit_heap_bottom and \ 701 (last_chunk.v() + last_chunk.chunksize()) < current_border: 702 self.session.logging.warn( 703 "Seems like we didn't hit the top chunk or the bottom of " 704 "the current heap at offset: 0x{0:x}".format(heap.v()))
705 706
708 """Returns all allocated chunks for the main_arena's heap. 709 mmap'ed regions are not included. 710 """ 711 712 arena = self.get_main_arena() 713 714 if arena.allocated_chunks: 715 for chunk in arena.allocated_chunks: 716 yield chunk 717 718 else: 719 current_border = 0 720 if self._preserve_chunks: 721 arena.allocated_chunks = list() 722 723 if arena.first_chunk and arena.first_chunk.chunksize() > 0: 724 # as the main heap can spread among multiple vm_areas, we take 725 # the system_mem value as the upper boundary 726 if arena.system_mem > 0: 727 current_border = arena.first_chunk.v() + arena.system_mem 728 729 # there have been rare scenarios, in which the system_mem 730 # value was 0 731 else: 732 self.session.logging.warn( 733 "Unexpected: system_mem value of main arena is <= 0. " 734 "We will calculate it with the top chunk. This will " 735 "lead to follow up warnings regarding size " 736 "inconsistencies.") 737 current_border = arena.top.v() + arena.top.chunksize() 738 739 last_chunk = None 740 curr_chunk = None 741 742 for next_chunk in arena.first_chunk.next_chunk_generator(): 743 last_chunk = curr_chunk 744 if not curr_chunk: 745 curr_chunk = next_chunk 746 continue 747 748 if (curr_chunk.v() + curr_chunk.chunksize()) \ 749 == current_border: 750 # reached top chunk 751 break 752 753 else: 754 if self._check_and_report_allocated_chunk( 755 arena, curr_chunk, next_chunk, current_border): 756 757 if self._preserve_chunks: 758 arena.allocated_chunks.append(curr_chunk) 759 760 yield curr_chunk 761 762 763 curr_chunk = next_chunk 764 765 if (last_chunk.v() + last_chunk.chunksize()) < current_border: 766 self.session.logging.warn("Seems like we didn't hit the " 767 "top chunk for main_arena.") 768 769 elif arena.first_chunk and arena.first_chunk.chunksize() == 0: 770 if not self._libc_offset: 771 self.session.logging.warn( 772 "The first main arena chunk seems to have a zero " 773 "size. As we didn't find a mapped libc module, the " 774 "reason might be a statically linked executable. " 775 "Please provide offset for the malloc_par struct " 776 "(symbol name is 'mp_'). Another reason might be " 777 "swapped memory pages.") 778 779 else: 780 self.session.logging.warn( 781 "Unexpected error: The first main arena chunk " 782 "seems to have a zero size. The reason might be " 783 "swapped memory pages. Walking the chunks is aborted.")
784 785 786 787
788 - def get_all_allocated_chunks_for_arena(self, arena):
789 """Returns all allocated chunks for a given arena. 790 This function is basically a wrapper around 791 _allocated_chunks_for_main_arena and allocated_chunks_for_thread_arena. 792 """ 793 794 if not arena: 795 self.session.logging.error( 796 "Error: allocated_chunks_for_arena called with an empty arena") 797 if self.session.GetParameter("debug"): 798 pdb.post_mortem() 799 800 return 801 802 if arena.freed_fast_chunks is None or arena.freed_chunks is None: 803 self.session.logging.error( 804 "Unexpected error: freed chunks seem to not be initialized.") 805 if self.session.GetParameter("debug"): 806 pdb.post_mortem() 807 808 return 809 810 if arena.is_main_arena: 811 for i in self._allocated_chunks_for_main_arena(): 812 yield i 813 814 else: 815 # not main_arena 816 for chunk in self._allocated_chunks_for_thread_arena(arena): 817 yield chunk
818 819 820 # at least the function depends on getting allocated chunks first and then 821 # freed chunks, so this order shouldn't be changed
822 - def get_all_chunks(self):
823 """Returns all chunks (allocated, freed and MMAPPED chunks).""" 824 825 for chunk in self.get_all_allocated_chunks(): 826 yield chunk 827 828 for freed_chunk in self.get_all_freed_chunks(): 829 yield freed_chunk
830 831 832
834 """Returns all allocated chunks belonging to the main arena (excludes 835 thread and MMAPPED chunks).""" 836 837 for chunk in self.get_all_allocated_chunks_for_arena( 838 self.get_main_arena()): 839 yield chunk
840 841
843 """Returns all allocated chunks which belong to a thread arena.""" 844 845 if self.get_main_arena(): 846 for arena in self.arenas: 847 if not arena.is_main_arena: 848 for chunk in self.get_all_allocated_chunks_for_arena( 849 arena): 850 yield chunk
851 852
853 - def get_all_allocated_chunks(self):
854 """Returns all allocated chunks, no matter to what arena they belong 855 or if they are MMAPPED or not.""" 856 857 if self.get_main_arena(): 858 for arena in self.arenas: 859 for chunk in self.get_all_allocated_chunks_for_arena(arena): 860 yield chunk 861 862 for chunk in self.get_all_mmapped_chunks(): 863 yield chunk
864 865 866
868 """Returns all freed fastbin chunks, no matter to what arena they 869 belong.""" 870 871 if self.get_main_arena(): 872 for arena in self.arenas: 873 for free_chunk in arena.freed_fast_chunks: 874 yield free_chunk
875 876
877 - def get_all_freed_bin_chunks(self):
878 """Returns all freed chunks, no matter to what arena they belong.""" 879 880 if self.get_main_arena(): 881 for arena in self.arenas: 882 for free_chunk in arena.freed_chunks: 883 yield free_chunk
884 885
886 - def get_all_freed_chunks(self):
887 """Returns all top chunks, freed chunks and freed fastbin chunks, 888 no matter to what arena they belong.""" 889 890 if self.get_main_arena(): 891 for freed_chunk in self.get_all_freed_fastbin_chunks(): 892 yield freed_chunk 893 894 for freed_chunk in self.get_all_freed_bin_chunks(): 895 yield freed_chunk 896 897 for arena in self.arenas: 898 if arena.top_chunk: 899 yield arena.top_chunk
900 901
902 - def _last_heap_for_vma(self, vma):
903 """Returns the last heap_info within the given vma.""" 904 905 heap_hit = None 906 907 if self.get_main_arena: 908 for arena in self.arenas: 909 for heap in arena.heaps: 910 if vma.vm_start <= heap.v() < vma.vm_end: 911 if not heap_hit or heap.v() > heap_hit.v(): 912 heap_hit = heap 913 914 return heap_hit
915 916 917 918
919 - def heap_for_ptr(self, ptr):
920 """Returns the heap from the internal heap lists, the given pointer 921 belongs to.""" 922 923 if self.get_main_arena: 924 ptr_offset = None 925 926 if isinstance(ptr, Number): 927 ptr_offset = ptr 928 929 else: 930 ptr_offset = ptr.v() 931 932 for arena in self.arenas: 933 for heap in arena.heaps: 934 if heap.v() <= ptr_offset < (heap.v() + heap.size): 935 return heap 936 937 return None
938 939 940 # We don't use the code from glibc for this function, as it depends on the 941 # HEAP_MAX_SIZE value and we might not have the correct value
942 - def _heap_for_ptr(self, ptr, vma=None, suppress_warning=False):
943 """Returns a new heap_info struct object within the memory region, the 944 given pointer belongs to. If the vm_area contains multiple heaps it 945 walks all heap_info structs until it finds the corresponding one. 946 """ 947 948 if self._libc_profile_success: 949 ptr_offset = None 950 951 if isinstance(ptr, Number): 952 ptr_offset = ptr 953 954 else: 955 ptr_offset = ptr.v() 956 957 if not vma: 958 vma = get_vma_for_offset(self.vmas, ptr_offset)['vma'] 959 960 if vma: 961 heap_info = self.profile._heap_info(offset=vma.vm_start, 962 vm=self.process_as) 963 964 # there might be at least two heaps in one vm_area 965 while heap_info.v() + heap_info.size < ptr_offset: 966 heap_info = self.profile._heap_info( 967 offset=heap_info.v() + heap_info.size, 968 vm=self.process_as) 969 970 if heap_info.ar_ptr not in self.arenas and not \ 971 suppress_warning: 972 self.session.logging.warn( 973 "The arena pointer of the heap_info struct gathered " 974 "from the given offset {0:x} does not seem to point " 975 "to any known arena. This either indicates a fatal " 976 "error which probably leads to unreliable results " 977 "or might be the result from using a pointer to a " 978 "MMAPPED region.".format(ptr_offset) 979 ) 980 981 return heap_info 982 983 else: 984 self.session.logging.warn( 985 "No vm_area found for the given pointer 0x{:x}." 986 .format(ptr_offset)) 987 else: 988 self.session.logging.error( 989 "Libc profile is not loaded, hence no struct or constant " 990 "information. Aborting") 991 992 return None
993
994 - def _get_number_of_cores(self):
995 """Returns the number of cpu cores for the current memory image.""" 996 997 return len(list(cpuinfo.CpuInfo(session=self.session).online_cpus()))
998 999
1000 - def _get_max_number_of_arenas(self):
1001 """Returns the maximum number of supported arenas. This value depends 1002 on the number of cpu cores.""" 1003 1004 cores = self._get_number_of_cores() 1005 return cores * (2 if self._size_sz == 4 else 8)
1006 1007
1008 - def _check_arenas(self, arena, deactivate_swap_check=False):
1009 """Iterates the next field of the malloc_state struct and checks if we 1010 end up at the same malloc_state after the maximum number of arenas for 1011 the current system. Checks also for arena structs being part of 1012 swapped memory pages.""" 1013 1014 # This function is only reliable, if we have the offset to mp_ 1015 if not self.mp_: 1016 # at least we test arena for being swapped 1017 if not deactivate_swap_check: 1018 self._check_and_report_arena_for_being_swapped(arena) 1019 1020 return None 1021 1022 # max arena value can be adjusted at runtime via mallopt func: 1023 # see malloc/malloc.c line 4753 and 1024 # http://man7.org/linux/man-pages/man3/mallopt.3.html 1025 # 1026 # or on startup via env vars (see also link) 1027 # if not, this member is 0 1028 arena_max = self.mp_.arena_max 1029 if arena_max > 0x100: 1030 self.session.logging.warn( 1031 "The maximum number of arenas, gathered from the malloc_par " 1032 "struct is unexpected high ({:d}). The reason might be a " 1033 "wrong mp_ offset and will in this case, most probably, lead " 1034 "to follow up errors.".format(arena_max)) 1035 1036 if arena_max == 0: 1037 # The maximum number of arenas is calculated with the macro 1038 # 'NARENAS_FROM_NCORES' - See malloc/arena.c 1039 arena_max = self._get_max_number_of_arenas() 1040 cores = self._get_number_of_cores() 1041 1042 # In the case of one core, there can be one more arena than 1043 # the result from 'NARENAS_FROM_NCORES' 1044 # See function 'arena_get2' in malloc/arena.c 1045 if cores == 1: 1046 arena_max += 1 1047 1048 if arena_max == 0: 1049 self.session.logging.warn( 1050 "The result for arena_max has been 0. This shouldn't be " 1051 "the case and has to be looked into.") 1052 1053 if not deactivate_swap_check: 1054 # as the following for loop will in this case not loop over 1055 # any arena, we check the current arena at least for being 1056 # swapped 1057 self._check_and_report_arena_for_being_swapped(arena) 1058 1059 curr_arena = arena 1060 for _ in range(arena_max): 1061 swap_check_result = self._check_and_report_arena_for_being_swapped( 1062 curr_arena) if not deactivate_swap_check else None 1063 1064 if swap_check_result is not True: 1065 curr_arena = curr_arena.next 1066 if arena == curr_arena: 1067 return True 1068 1069 else: 1070 break 1071 1072 return False
1073 1074 1075
1076 - def __init__(self, **kwargs):
1077 super(HeapAnalysis, self).__init__(**kwargs) 1078 1079 self._libc_profile_success = False 1080 self._libc_offset = None 1081 self.arenas = [] 1082 self.process_as = None 1083 1084 # all vmas belonging to the current task 1085 self.vmas = None 1086 1087 # only the vmas that we consider to belong to the current task's heap 1088 self.heap_vmas = None 1089 1090 self._size_sz = None 1091 self._malloc_alignment = None 1092 self._malloc_align_mask = None 1093 self._minsize = None 1094 1095 self.mp_ = None 1096 self.mp_offset = self.plugin_args.malloc_par 1097 self._mmapped_warnings = set() 1098 self._is_statically_linked = False 1099 self._first_chunk_distance = 0 1100 1101 self.task = None 1102 self.statistics = None 1103 self._mmap_slack_space = dict() 1104 self._heap_slack_space = dict() 1105 self._hidden_chunks = set() 1106 self._stack_vmas_and_offsets = None 1107 1108 self._preserve_chunks = False 1109 1110 self._min_pagesize = 4096 1111 1112 if self.session.profile.metadata("arch") == 'I386': 1113 self._size_sz = 4 1114 1115 elif self.session.profile.metadata("arch") == 'AMD64': 1116 self._size_sz = 8 1117 1118 self._initialize_malloc_alignment() 1119 1120 self._has_dummy_arena = False
1121 1122
1123 - def _initialize_malloc_alignment(self, malloc_alignment=None):
1124 """This function initializes variables that are in relation to 1125 MALLOC_ALIGNMENT.""" 1126 1127 # if not given as argument, we first try to load it from the profile 1128 if not malloc_alignment: 1129 malloc_alignment = self.profile.get_constant('MALLOC_ALIGNMENT') 1130 1131 ##### taken from malloc/malloc.c (glibc-2.23) 1132 # depending on glibc comment, malloc_alignment differs only on 1133 # powerpc32 from 2*SIZE_SZ 1134 self._malloc_alignment = malloc_alignment if malloc_alignment \ 1135 else self._size_sz * 2 1136 self._malloc_align_mask = self._malloc_alignment - 1 1137 1138 # MIN_LARGE_SIZE defines at which size the fd/bk_nextsize pointers 1139 # are used 1140 nsmallbins = self.profile.get_constant('NSMALLBINS') 1141 if not nsmallbins: 1142 nsmallbins = 64 1143 1144 smallbin_width = self._malloc_alignment 1145 smallbin_correction = 1 if self._malloc_alignment > 2 * self._size_sz \ 1146 else 0 1147 1148 global _MIN_LARGE_SIZE 1149 _MIN_LARGE_SIZE = ((nsmallbins - smallbin_correction) * smallbin_width)
1150 1151 ############################################# 1152 1153 1154 # Goes to the top chunk of a given arena, gets its heap_info offset and 1155 # follows all _heap_info.prev members until the last one (for the last 1156 # _heap_info, the prev field is 0x0
1157 - def _heaps_for_arena(self, arena):
1158 """Returns a sorted list of all heap_info structs for a given arena: 1159 [0] = first heap_info. 1160 This method is normally only called on initialization for a new task 1161 and further access to heaps is done via the heaps attribute of each 1162 arena.""" 1163 1164 heap_infos = list() 1165 1166 if arena.top_chunk: 1167 last_heap_info = self._heap_for_ptr(arena.top_chunk) 1168 1169 if not last_heap_info.ar_ptr.dereference() == arena: 1170 self.session.logging.error( 1171 "Unexpected error: current heap_info's arena pointer " 1172 "doesn't point to the expected arena. Maybe wrong " 1173 "profile or different cause.") 1174 1175 heap_infos = list(last_heap_info.walk_list('prev'))[::-1] 1176 1177 return heap_infos
1178 1179
1180 - def get_main_arena(self):
1181 """Returns the main_arena for the current task, which is the first 1182 arena in the arenas list. If the current instance is not intialized, 1183 it logs a warning.""" 1184 1185 if self.arenas: 1186 if self.arenas[0].is_main_arena: 1187 return self.arenas[0] 1188 1189 else: 1190 self.session.logging.warn( 1191 "First arena in the arenas list doesn't seem to be the " 1192 "main_arena.") 1193 1194 else: 1195 self.session.logging.warn( 1196 "There are no arenas. Maybe this instance has not been " 1197 "initialized for the current task. Try to initialize it via " 1198 "'init_for_task'.") 1199 1200 return None
1201 1202
1203 - def _initialize_arenas(self, main_arena):
1204 """Gathers all arenas, their heaps and sets main_arenas first chunk.""" 1205 1206 main_arena.is_main_arena = True 1207 1208 for arena in main_arena.walk_list('next'): 1209 self.arenas.append(arena) 1210 1211 if arena.is_main_arena: 1212 main_arena.mmapped_first_chunks = list() 1213 main_arena_range = get_mem_range_for_regex( 1214 self.vmas, re.escape(self._main_heap_identifier)) 1215 1216 if main_arena_range: 1217 main_arena.first_chunk = self.profile.malloc_chunk( 1218 main_arena_range[0] + self._first_chunk_distance, 1219 vm=self.process_as) 1220 1221 else: 1222 self.session.logging.warn( 1223 "The current process {:d} doesn't seem to have a main " 1224 "heap. There are multiple possible explanations for " 1225 "that: 1. The program uses another heap implementation" 1226 " (e.g. Mozilla products). 2. The process didn't touch" 1227 " the heap at all (didn't allocate any chunks within " 1228 "the main thread). 3. We were unable to correctly " 1229 "identify the main heap. One verification possibility " 1230 "is to check with the 'maps' plugin, whether or not " 1231 "this process seems to have a heap." 1232 .format(self.task.pid)) 1233 1234 else: 1235 arena.heaps = self._heaps_for_arena(arena)
1236 # in this implementation, thread arenas don't use the 1237 # first_chunk member, but their heaps keep them 1238 1239
1241 """Creates a dummy arena, initializes relevant variables and manually 1242 walks the main heap vma and adds all chunks to the allocated and freed 1243 chunks lists.""" 1244 1245 self._has_dummy_arena = True 1246 dummy_arena = self.profile.malloc_state() 1247 1248 self._initialize_arenas(dummy_arena) 1249 1250 main_arena_range = get_mem_range_for_regex( 1251 self.vmas, re.escape(self._main_heap_identifier)) 1252 1253 # There might be scenarios in which there is no main heap but only 1254 # mmapped chunks. In this case, main_arena_range is None. 1255 if main_arena_range: 1256 dummy_arena.system_mem = main_arena_range[1] - main_arena_range[0] 1257 1258 # we activate chunk preservation (if not prevented via cmdline 1259 # option), as we have to walk all chunks at this point anyways 1260 self.activate_chunk_preservation() 1261 1262 if self._preserve_chunks: 1263 dummy_arena.allocated_chunks = list() 1264 1265 curr_chunk = None 1266 # while there will be no freed chunk to gather, we still test for 1267 # it as we need to walk the chunks anyways to get to the top chunk 1268 for next_chunk in dummy_arena.first_chunk.next_chunk_generator(): 1269 if not curr_chunk: 1270 curr_chunk = next_chunk 1271 self._check_and_report_chunksize(curr_chunk, 1272 main_arena_range[1]) 1273 continue 1274 1275 if (curr_chunk.v() + curr_chunk.chunksize()) \ 1276 == main_arena_range[1] and curr_chunk.get_size() > 0x0: 1277 # we hit top chunk 1278 curr_chunk.is_top_chunk = True 1279 dummy_arena.top_chunk = curr_chunk 1280 1281 break 1282 1283 self._check_and_report_chunksize(next_chunk, 1284 main_arena_range[1]) 1285 is_in_use = next_chunk.prev_inuse() 1286 1287 if (curr_chunk.v() + curr_chunk.chunksize()) \ 1288 < main_arena_range[1] and not is_in_use: 1289 1290 curr_chunk.is_bin_chunk = True 1291 dummy_arena.freed_chunks.append(curr_chunk) 1292 1293 1294 elif self._preserve_chunks: 1295 dummy_arena.allocated_chunks.append(curr_chunk) 1296 1297 curr_chunk = next_chunk 1298 1299 1300 if dummy_arena.top_chunk: 1301 end = dummy_arena.top_chunk.v() \ 1302 + dummy_arena.top_chunk.chunksize() 1303 if dummy_arena.system_mem != end - main_arena_range[0]: 1304 self.session.logging.warn( 1305 "Unexpected mismatch: memory range for main heap " 1306 "is not equal to the range calculated with the top " 1307 "chunk. This is unexpected, indicates a problem and " 1308 "will most probably lead to unreliable results.")
1309 1310 1311 1312
1313 - def _mark_heap_vm_areas(self):
1314 """Marks all vm_areas containing known heap_info structs with 1315 '_heap_vma_identifier'. This flag is required by other functions. 1316 The marking process is normally done automatically in the function 1317 _get_vmas_for_task, but in the case where no offset for the main arena 1318 and no main heap is present, this step fails.""" 1319 1320 known_heaps = [heap for arenas in self.arenas for heap in arenas.heaps] 1321 1322 for heap in known_heaps: 1323 vma = get_vma_for_offset(self.vmas, heap.v()) 1324 if vma: 1325 vma['name'] = self._heap_vma_identifier
1326 1327
1328 - def _check_heap_consistency(self):
1329 """Searches manually for heap_info structs on every potential heap 1330 area memory region, which points to a known arena. If it finds one 1331 that is not part of the already known heaps, it prints a warning.""" 1332 1333 known_heaps = [heap for arenas in self.arenas for heap in arenas.heaps] 1334 temp_heaps = set() 1335 for vm_area in self.vmas: 1336 name = vm_area['name'] 1337 vma = vm_area['vma'] 1338 if name == self._heap_vma_identifier or \ 1339 name == self._pot_mmapped_vma_identifier: 1340 heap_info = self._heap_for_ptr(vma.vm_start, 1341 vma=vma, 1342 suppress_warning=True) 1343 1344 if heap_info.ar_ptr in self.arenas: 1345 1346 if heap_info not in known_heaps: 1347 temp_heaps.add(heap_info) 1348 1349 while heap_info.v() + heap_info.size < vma.vm_end \ 1350 and heap_info.ar_ptr in self.arenas: 1351 1352 heap_info = self.profile._heap_info( 1353 offset=heap_info.v() + heap_info.size, 1354 vm=self.process_as) 1355 1356 if heap_info.ar_ptr in self.arenas \ 1357 and heap_info not in known_heaps: 1358 temp_heaps.add(heap_info) 1359 1360 additional_heaps = set() 1361 for temp_heap_info in temp_heaps: 1362 for heap_info in temp_heap_info.walk_list('prev'): 1363 if heap_info not in known_heaps: 1364 additional_heaps.add(heap_info) 1365 1366 additional_heaps = additional_heaps.union(temp_heaps) 1367 1368 if additional_heaps: 1369 self.session.logging.warn( 1370 "We probably found at least one heap, which is not part of our" 1371 "internal list. This shouldn't be the case, indicates a " 1372 "problem and will lead to unreliable results. The offset(s) " 1373 "of the additional heap(s) is/are: " 1374 + ("0x{:x} " * len(additional_heaps)) 1375 .format(*[heap.v() for heap in additional_heaps]))
1376 1377
1379 """There are scenarios in which the last heap of an arena contains 1380 additional space which is not covered by the top chunk, leading to 1381 deviating results with the compare_vma_sizes_with_chunks function. 1382 This function tries to identify those areas and add their size to the 1383 _heap_slack_space attribute.""" 1384 1385 for arena in self.arenas: 1386 if not arena.is_main_arena: 1387 # there are scenarios in which one vma shares heap_infos from 1388 # different arenas so we gather here the last heap_info of a 1389 # given vma and test for slack space 1390 vma = get_vma_for_offset(self.vmas, arena.top_chunk.v())['vma'] 1391 heap = self._last_heap_for_vma(vma) 1392 1393 if heap.v() + heap.size < vma.vm_end: 1394 self._heap_slack_space[heap] = (vma.vm_end - (heap.v() 1395 + heap.size))
1396 1397 1398
1400 """Gathers the first chunk for each heap and sets it as first_chunk in 1401 the _heap_info class.""" 1402 1403 heap_offset = self.profile.get_obj_size('_heap_info') 1404 malloc_offset = self.profile.get_obj_size('malloc_state') 1405 1406 for arena in self.arenas: 1407 # main_arena has no associated _heap_info structs 1408 if arena.is_main_arena: 1409 continue 1410 1411 for heap in arena.heaps: 1412 first_chunk_offset = heap.v() + heap_offset 1413 1414 # only the first heap area contains also the malloc_state 1415 # the prev field for the first heap_info is 0x0 1416 if heap.prev == 0x0: 1417 first_chunk_offset += malloc_offset 1418 1419 # chunks are aligned, so in the case of non main_arenas, the 1420 # address after the heap_info (and malloc_state) is probably 1421 # not directly the first chunk but a few bytes after. So we 1422 # try to find the first non-zero size_sz bytes. 1423 # 1424 # To prevent looking in the middle of a 8 byte size from a 1425 # large chunk, we walk in steps of 8 bytes, as this is also 1426 # the minimal alignment (32 bit) 1427 first_chunk_offset = self.get_aligned_address( 1428 first_chunk_offset, different_align_mask=7) 1429 1430 expected_first_chunk_offset = self.get_aligned_address( 1431 first_chunk_offset) 1432 1433 for _ in range(8): 1434 temp = self.process_as.read(first_chunk_offset, 1435 self._size_sz) 1436 1437 temp = struct.unpack('I' if self._size_sz == 4 else 'Q', 1438 temp)[0] 1439 1440 # the first member of the malloc_chunk is the prev_size 1441 # field, which should be 0x0 for the first chunk and the 1442 # following member is size which should be > 0x0. 1443 if temp != 0x0: 1444 first_chunk_offset -= self._size_sz 1445 break 1446 1447 first_chunk_offset += self._size_sz 1448 1449 # Normally, the first chunk is exactly the aligned address 1450 # after the structs, but if we find it somewhere else, it is 1451 # an indicator for another libc version (e.g. differing 1452 # structs) that we don't have the correct vtypes for or 1453 # another MALLOC_ALIGNMENT value 1454 if first_chunk_offset != expected_first_chunk_offset: 1455 self.session.logging.warn( 1456 "We identified an unexpected address deviation, which " 1457 "indicates another glibc version than the one we are " 1458 "using or another value for MALLOC_ALIGNMENT. Verify " 1459 "which version is used and provide the debug " 1460 "information for that version. At the moment, " 1461 "officially only those versions are supported when " 1462 "not providing debug information for a specific " 1463 "version: {:s}" 1464 .format(', '.join(_SUPPORTED_GLIBC_VERSIONS))) 1465 1466 if self.session.profile.metadata("arch") == 'I386': 1467 self.session.logging.warn( 1468 "We just try for now to adjust the " 1469 "MALLOC_ALIGNMENT to 16 byte (instead of 8). This " 1470 "might solve the problem.") 1471 self._initialize_malloc_alignment(malloc_alignment=16) 1472 1473 heap.first_chunk = self.profile.malloc_chunk( 1474 offset=first_chunk_offset, vm=self.process_as) 1475 1476 if arena.top_chunk != heap.first_chunk: 1477 self._check_and_report_non_main_arena( 1478 heap.first_chunk, heap.first_chunk.is_in_use())
1479 1480 1481
1483 """Gathers the first chunk for each MMAPPED region and sets it on the 1484 main_arena. First chunks for MMAPPED regions are only kept in the 1485 main_arena, which is the first arena in the 'arenas' attribute of the 1486 current class.""" 1487 1488 # we first gather all vm_area offsets belonging to the main heap or 1489 # thread heaps 1490 heap_offsets = [] 1491 main_arena = self.get_main_arena() 1492 if main_arena.first_chunk: 1493 heap_offsets.append(main_arena.first_chunk.v()) 1494 1495 for arena in self.arenas: 1496 for heap in arena.heaps: 1497 heap_offsets.append(heap.v()) 1498 1499 # now we gather all vm_areas that do not contain a known 1500 # heap_info struct 1501 for vm_area in self.vmas: 1502 name = vm_area['name'] 1503 vma = vm_area['vma'] 1504 if (name == self._heap_vma_identifier 1505 or name == self._pot_mmapped_vma_identifier) \ 1506 and str(vma.vm_flags).startswith('rw') \ 1507 and vma.vm_start not in heap_offsets: 1508 1509 mmap_chunk = self.profile.malloc_chunk(offset=vma.vm_start, 1510 vm=self.process_as) 1511 1512 if self._check_and_report_mmapped_chunk(mmap_chunk, vma): 1513 main_arena.mmapped_first_chunks.append(mmap_chunk)
1514 1515
1516 - def _initialize_heap_vma_list(self):
1517 """Searches for vmas that are known to belong to the heap and adds 1518 them to the internal heap_vmas list.""" 1519 1520 self.heap_vmas = [] 1521 1522 for vma in self.vmas: 1523 if vma['name'] == self._main_heap_identifier: 1524 self.heap_vmas.append(vma) 1525 1526 for arena in self.arenas: 1527 if arena.is_main_arena: 1528 for mmap_chunk in arena.mmapped_first_chunks: 1529 vma = get_vma_for_offset(self.vmas, mmap_chunk.v()) 1530 if vma not in self.heap_vmas: 1531 self.heap_vmas.append(vma) 1532 1533 else: 1534 for heap in arena.heaps: 1535 vma = get_vma_for_offset(self.vmas, heap.v()) 1536 if vma not in self.heap_vmas: 1537 self.heap_vmas.append(vma)
1538 1539 1540
1541 - def _check_and_report_non_main_arena(self, chunk, chunk_in_use):
1542 """Checks the given chunk for the NON_MAIN_ARENA bit and prints a 1543 warning if not set. This functions should obviously only be used with 1544 chunks not belonging to main_arena but also not for MMAPPED chunks 1545 (they don't have the NON_MAIN_ARENA bit set).""" 1546 1547 if chunk_in_use and not chunk.non_main_arena(): 1548 self.session.logging.warn( 1549 "Unexpected error: The non main arena chunk at offset 0x{0:x} " 1550 "doesn't have the NON_MAIN_ARENA bit set.".format(chunk.v()))
1551 1552
1553 - def _log_mmapped_warning_messages(self, warning):
1554 1555 if not self.mp_: 1556 self.session.logging.warn(warning) 1557 1558 else: 1559 self._mmapped_warnings.add(warning)
1560 1561 1562 # As there might be multiple scenarios, in which a vm_area is mistakenly 1563 # treated as a mmapped region (see following warn messages for details), 1564 # we strictly test for prev_size to be 0x0 (normally always the case for 1565 # the first chunk in a memory region), the size to be != 0 and the mmapped 1566 # bit to be set
1567 - def _check_and_report_mmapped_chunk(self, mmap_chunk, mmap_vma):
1568 """Checks the given chunk for various MMAPPED chunk specific 1569 attributes. Depending on the results and the location of the chunk, 1570 a info or warning is printed.""" 1571 1572 base_string = ("Current MMAPPED chunk at offset 0x{0:x} " 1573 .format(mmap_chunk.v())) 1574 1575 zero_first_chunk_error_reasons = ( 1576 "As this chunk resides at the beginning of the vm_area, " 1577 "this fact might have multiple reasons: " 1578 "1. It is part of a MMAPPED region but there are not yet any " 1579 "allocated chunks. 2. The current vm_area is in fact the rest of " 1580 "a dead thread stack or belongs to a mapped file, which is not " 1581 "disginguishable from heap-vmas at the moment. " 1582 "3. There might be an unexpected error. " 1583 "In the first two cases, this warning can be considered harmless.") 1584 1585 zero_middle_chunk_error_reasons = ( 1586 "In the current case, this fact might have the following reasons: " 1587 "1. It is the result from an MMAPPED region, which doesn't use " 1588 "the whole space for its chunks (in this case harmless). " 1589 "2. The current data belongs to an MMAPPED region, which shares " 1590 "its vm_area with an mapped file or other data (also harmless). " 1591 "3. It results from an accidently chosen vm_area to be part of " 1592 "the heap (more specifically, to be an MMAPPED chunks region). " 1593 "This can happen with old thread stacks or vm_areas of mapped " 1594 "file and indicates an error and leads to wrong results. " 1595 "4. An unexpected error (might lead to unrealiable results).") 1596 1597 first_chunk_error_reasons = ( 1598 "As this chunk resides at the beginning of the vm_area, " 1599 "this fact might have the following reasons: " 1600 "1. The current vm_area is in fact the rest of a dead thread " 1601 "stack or belongs to a mapped file, which is not disginguishable " 1602 "from heap-vmas at the moment. " 1603 "2. There might be an unexpected error. " 1604 "In the first case, this warning can be considered harmless.") 1605 1606 middle_chunk_error_reasons = ( 1607 "In the current case, this fact might have the following reasons: " 1608 "1. The current data belongs to an MMAPPED region, which shares " 1609 "its vm_area with an mapped file or other data (in this case " 1610 "harmless). 2. It results from an accidently chosen vm_area to be " 1611 "part of the heap (more specifically, to be an MMAPPED chunks " 1612 "region). This can happen with old thread stacks or vm_areas of " 1613 "mapped file and indicates an error and leads to wrong results. " 1614 "3. An unexpected error (might lead to unrealiable results).") 1615 1616 1617 # as the size for mmapped chunks is at least pagesize, we expect them 1618 # to be >= 4096 1619 # see glibc_2.23 malloc/malloc.c lines 2315 - 2318 1620 if mmap_chunk.get_prev_size() != 0 or \ 1621 mmap_chunk.chunksize() < self._min_pagesize or \ 1622 mmap_chunk.chunksize() % self._min_pagesize != 0 or \ 1623 mmap_chunk.v() + mmap_chunk.chunksize() > mmap_vma.vm_end: 1624 1625 if mmap_chunk.get_prev_size() == 0 and mmap_chunk.get_size() == 0: 1626 base_string += "has zero size. " 1627 1628 if mmap_chunk.v() == mmap_vma.vm_start: 1629 1630 # it is possible that a vm_area is marked as rw and does 1631 # not contain a stack or heap or mmap region. we 1632 # identified this case only when no threads are active 1633 number_of_heap_vmas = 0 1634 for vma in self.vmas: 1635 if vma['name'] == self._heap_vma_identifier: 1636 number_of_heap_vmas += 1 1637 1638 if number_of_heap_vmas <= 1 and len(self.arenas) == 1 \ 1639 and not self._are_there_any_threads(): 1640 self.session.logging.info( 1641 base_string + "In this case, it seems " 1642 "to be the result from a process with no threads " 1643 "and a not yet used memory region, hence " 1644 "indicating nothing abnormal.") 1645 1646 else: 1647 self.session.logging.info( 1648 base_string + zero_first_chunk_error_reasons) 1649 1650 else: 1651 self._log_mmapped_warning_messages( 1652 base_string + zero_middle_chunk_error_reasons) 1653 self._mmap_slack_space[mmap_chunk] = (mmap_vma.vm_end - 1654 mmap_chunk.v()) 1655 1656 else: 1657 base_string += "has invalid values. " 1658 if mmap_chunk.v() == mmap_vma.vm_start: 1659 self.session.logging.info(base_string + 1660 first_chunk_error_reasons) 1661 1662 else: 1663 self._log_mmapped_warning_messages( 1664 base_string + middle_chunk_error_reasons) 1665 self._mmap_slack_space[mmap_chunk] = (mmap_vma.vm_end - 1666 mmap_chunk.v()) 1667 1668 1669 elif mmap_chunk.prev_inuse() or mmap_chunk.non_main_arena(): 1670 base_string += ("has either the prev_inuse or non_main_arena bit " 1671 "set, which is normally not the case for MMAPPED " 1672 "chunks.") 1673 1674 if mmap_chunk.v() == mmap_vma.vm_start: 1675 self.session.logging.info( 1676 base_string + first_chunk_error_reasons) 1677 1678 else: 1679 self._log_mmapped_warning_messages( 1680 base_string + middle_chunk_error_reasons) 1681 1682 self._mmap_slack_space[mmap_chunk] = (mmap_vma.vm_end 1683 - mmap_chunk.v()) 1684 1685 1686 elif not mmap_chunk.is_mmapped(): 1687 base_string += "doesn't have the is_mmapped bit set. " 1688 1689 if mmap_chunk.v() == mmap_vma.vm_start: 1690 self.session.logging.info( 1691 base_string + first_chunk_error_reasons) 1692 1693 else: 1694 self._log_mmapped_warning_messages( 1695 base_string + middle_chunk_error_reasons) 1696 1697 self._mmap_slack_space[mmap_chunk] = (mmap_vma.vm_end 1698 - mmap_chunk.v()) 1699 1700 elif not self._check_mmap_alignment(mmap_chunk.v()): 1701 self._log_mmapped_warning_messages( 1702 base_string + "is not aligned. As chunks are normally always " 1703 "aligned, this indicates a mistakenly chosen mmapped chunk " 1704 "and probably results in wrong results.") 1705 1706 # everything is ok 1707 else: 1708 return True 1709 1710 return False
1711 1712
1713 - def _are_there_any_threads(self):
1714 """This function searches for vmas containing the stack for a thread 1715 and returns True if it finds at least one.""" 1716 1717 # mm_users holds the number of mm_struct users. when a thread is 1718 # created, he gets hands on the mm_struct and the counter is 1719 # increased: mm_users >= 2 means there are threads 1720 if self.task.mm.mm_users.counter >= 2: 1721 return True 1722 1723 # if the first test fails, we still look for thread stack segments 1724 for vma in self.vmas: 1725 if vma['name'].startswith('[stack:'): 1726 return True 1727 1728 return False
1729 1730
1731 - def _get_max_fast_chunk_size(self):
1732 """Returns the maximum size for the data part of fast chunks. 1733 E.g. for 32 bit architectures, the max size is normally 64 bytes, 1734 but 4 bytes are used by the size field of the malloc_chunk struct, 1735 which leaves the data part 60 bytes.""" 1736 1737 return 60 if self._size_sz == 4 else 120
1738 1739
1740 - def iterate_through_chunks(self, first_chunk, mem_end, only_free=False, 1741 only_alloc=False):
1742 """This function iterates chunk after chunk until hitting mem_end. 1743 Tests for allocation status are not made via bins/fastbins but with 1744 chunk flags. Note: This function will not return the last chunk, if 1745 only_free or/and only_alloc is set as there is no PREV_INUSE bit which 1746 could be tested.""" 1747 1748 if not (only_free or only_alloc): 1749 for curr_chunk in first_chunk.next_chunk_generator(): 1750 if (curr_chunk.v() + curr_chunk.chunksize()) < mem_end: 1751 yield curr_chunk 1752 1753 else: 1754 yield curr_chunk 1755 break 1756 1757 1758 else: 1759 curr_chunk = None 1760 1761 for next_chunk in first_chunk.next_chunk_generator(): 1762 if not curr_chunk: 1763 curr_chunk = next_chunk 1764 continue 1765 1766 1767 if (curr_chunk.v() + curr_chunk.chunksize()) < mem_end: 1768 is_in_use = next_chunk.prev_inuse() 1769 1770 if only_free and not is_in_use or \ 1771 only_alloc and is_in_use: 1772 yield curr_chunk 1773 1774 else: 1775 # we hit last/top chunk. as there is no following chunk, we 1776 # can't examine the PREV_INUSE bit 1777 break 1778 1779 curr_chunk = next_chunk
1780 1781 1782
1783 - def _offset_in_heap_range(self, offset):
1784 """Returns true if the given offset resides in a vma potentially 1785 belonging to the heap. This function is only used while carving for 1786 the main arena and hence can not use the later on generated internal 1787 heap_vmas list.""" 1788 1789 for vma in self.vmas: 1790 if vma['vma'].vm_start <= offset < vma['vma'].vm_end: 1791 name = vma['name'] 1792 if name == self._main_heap_identifier \ 1793 or name == self._heap_vma_identifier: 1794 return True 1795 1796 return False
1797 1798 1799
1800 - def _carve_main_arena(self):
1801 """Calling this method assumes that we don't have debug information (in 1802 the sense of constant offsets for data structures) for the target libc 1803 implementation and do not know the location of the main_arena. If the 1804 current task contains threads however, we are able to get the location 1805 of the main_arena. If there are no threads, we still are able to locate 1806 the main_arena by folowing the fd/bk pointers in freed chunks. 1807 The last attempt is done by walking the chunks of the main heap until 1808 the top chunk is hit. As the main arena keeps a pointer to this chunk, 1809 we simply search all memory regions for pointers. 1810 This method returns either the main_arena or None.""" 1811 1812 if not self._libc_profile_success: 1813 self.session.logging.error("No libc profile with rudimentary " 1814 "struct information available.") 1815 1816 return None 1817 1818 1819 libc_range = get_libc_range(self.vmas) 1820 1821 if self._are_there_any_threads(): 1822 self.session.logging.info( 1823 "As there are threads, we try to gather the main_arena " 1824 "via the _heap_info structs.") 1825 1826 else: 1827 self.session.logging.info( 1828 "We first try to gather the main_arena via died thread " 1829 "heaps, assuming there are any.") 1830 1831 good_arenas = [] 1832 # bad arenas don't loop with their next pointer within the maximum 1833 # number of arenas for the current number of cores and the architecture 1834 # see _check_arenas 1835 bad_arenas = [] 1836 1837 # first we try to find a heap_info struct whose ar_ptr points right 1838 # after itself this is the case for the first vm_area containing the 1839 # first heap_info and the according malloc_state struct 1840 for vma in self.vmas: 1841 if vma['name'] == self._heap_vma_identifier \ 1842 or vma['name'] == self._pot_mmapped_vma_identifier: 1843 1844 heap_info = self.profile._heap_info(offset=vma['vma'].vm_start, 1845 vm=self.process_as) 1846 1847 # we try to find a heap_info struct which is followed by a 1848 # malloc_state. The prev member of the first _heap_info struct 1849 # (which is the one followed by the malloc_state struct) is 0x0 1850 heap_info_size = self.profile.get_obj_size('_heap_info') 1851 1852 if vma['vma'].vm_start <= heap_info.ar_ptr.v() \ 1853 <= vma['vma'].vm_end: 1854 heap_info_address = self.get_aligned_address( 1855 heap_info_size + vma['vma'].vm_start) 1856 1857 if heap_info.ar_ptr.v() == heap_info_address \ 1858 and heap_info.prev.v() == 0x0: 1859 1860 arena = heap_info.ar_ptr 1861 arena_consistency = self._check_arenas( 1862 arena, deactivate_swap_check=True) 1863 1864 if arena_consistency is True or arena_consistency \ 1865 is None: 1866 good_arenas.append(arena) 1867 1868 else: 1869 bad_arenas.append(arena) 1870 1871 1872 reached_bad_arenas = False 1873 1874 # now we try to use the potential arenas to find the main_arena 1875 # located in the libc 1876 for arena_list in good_arenas, bad_arenas: 1877 for arena in arena_list: 1878 for pot_main_arena in arena.walk_list('next'): 1879 if libc_range and libc_range[0] <= pot_main_arena.v() \ 1880 <= libc_range[1] or not libc_range and \ 1881 not self._offset_in_heap_range(pot_main_arena.v()): 1882 1883 if reached_bad_arenas: 1884 self.session.logging.warn( 1885 "The arena pointers for the gathered " 1886 "main_arena don't seem to loop. The reason " 1887 "might be wrong arena pointers and probably " 1888 "leads to unreliable results.") 1889 1890 else: 1891 self.session.logging.info( 1892 "We most probably found the main_arena via " 1893 "heap_info structs") 1894 1895 return pot_main_arena 1896 1897 reached_bad_arenas = True 1898 1899 self.session.logging.info( 1900 "It doesn't seem like the task with pid {0:d} has any threads, " 1901 "and as we don't have have the main arena offset, we now try to " 1902 "find freed chunks and with them the location of the main_arena." 1903 .format(self.task.pid)) 1904 1905 1906 # the previous method didn't work so we now try to gather the main 1907 # arena via freed chunks 1908 main_heap_range = get_mem_range_for_regex( 1909 self.vmas, re.escape(self._main_heap_identifier)) 1910 1911 if not main_heap_range: 1912 return None 1913 1914 first_chunk = self.profile.malloc_chunk( 1915 offset=main_heap_range[0] + self._first_chunk_distance, 1916 vm=self.process_as) 1917 1918 offset_to_top = self.profile.get_obj_offset("malloc_state", "top") 1919 1920 1921 # not used right here, but part of the next method of carving the 1922 # main arena 1923 last_freed_chunk = None 1924 1925 for free_chunk in self.iterate_through_chunks(first_chunk, 1926 main_heap_range[1], 1927 only_free=True): 1928 1929 last_freed_chunk = free_chunk 1930 1931 # we now try to follow the bk links to get to the main_arena 1932 for curr_free_chunk in free_chunk.walk_list('bk'): 1933 1934 if libc_range and libc_range[0] <= curr_free_chunk.v() \ 1935 <= libc_range[1] or not libc_range and \ 1936 not self._offset_in_heap_range(curr_free_chunk.v()): 1937 # we are now within the main_arena and try 1938 # to find the top chunk by going backwards 1939 1940 offset_to_binmap = self.profile.get_obj_offset( 1941 "malloc_state", "binmap") 1942 maximum_offset_to_top = offset_to_binmap - offset_to_top 1943 1944 curr_off = curr_free_chunk.v() 1945 fmt = 'I' if self._size_sz == 4 else 'Q' 1946 1947 # as between the bins and top are only pointers, walking in 1948 # size_sz steps should be no problem 1949 for i in range(0, maximum_offset_to_top, self._size_sz): 1950 temp = self.process_as.read(curr_off - i, 1951 self._size_sz) 1952 temp = struct.unpack(fmt, temp)[0] 1953 1954 if main_heap_range[0] <= temp <= main_heap_range[1]: 1955 pot_top = self.profile.malloc_chunk( 1956 offset=temp, vm=self.process_as) 1957 1958 if pot_top.v() + pot_top.chunksize() == \ 1959 main_heap_range[1]: 1960 # we hit top chunk 1961 1962 self.session.logging.info( 1963 "We found the main_arena via a freed " 1964 "chunk.") 1965 1966 return self.profile.malloc_state( 1967 offset=(curr_off - i) - offset_to_top, 1968 vm=self.process_as) 1969 1970 1971 # Ending up here means all previous methods were not able to find the 1972 # main arena. The last method we try at this point is to search for 1973 # pointers to the top chunk. At least the main_arena should have a 1974 # pointer to the top chunk 1975 # 1976 # TODO the way we do this (including the last method) is inefficient, 1977 # as for most cases, all chunks from the main heap are walked twice 1978 # => improve it! 1979 1980 # we walk from the last freed chunk from the previous method or from 1981 # the first chunk until the top chunk 1982 if last_freed_chunk: 1983 first_chunk = last_freed_chunk 1984 1985 top_chunk = None 1986 for curr_chunk in self.iterate_through_chunks(first_chunk, 1987 main_heap_range[1]): 1988 top_chunk = curr_chunk 1989 1990 if top_chunk.v() + top_chunk.chunksize() == main_heap_range[1]: 1991 1992 # we most probably found our top chunk and now search for pointers 1993 # to it 1994 for hit in self.search_vmas_for_needle(pointers=[top_chunk.v()]): 1995 pot_main_arena = self.profile.malloc_state( 1996 offset=hit['hit'] - offset_to_top, vm=self.process_as) 1997 1998 if top_chunk == pot_main_arena.top and \ 1999 pot_main_arena.system_mem == \ 2000 (top_chunk.v() + top_chunk.chunksize() 2001 - main_heap_range[0]): 2002 2003 # as the 'thread arena carving' method didn't find an 2004 # arena, the 'next' field should point to itself 2005 if pot_main_arena.next == pot_main_arena: 2006 self.session.logging.info( 2007 "We found the main_arena via top chunk.") 2008 return pot_main_arena 2009 2010 else: 2011 arena_consistency = self._check_arenas( 2012 pot_main_arena, deactivate_swap_check=True) 2013 if arena_consistency is True or arena_consistency \ 2014 is None: 2015 self.session.logging.info( 2016 "We found the main_arena via top chunk.") 2017 return pot_main_arena 2018 2019 2020 # This will most probably only happen, if the page containing the main 2021 # arena has been swapped 2022 self.session.logging.warn( 2023 "We were not able to find the main arena for task {0:d} and since " 2024 "we have no debug information about its offset, we can't retrieve " 2025 "it directly.".format(self.task.pid)) 2026 2027 return None
2028 2029
2030 - def _reset(self):
2031 """Prepares the HeapAnalysis instance to work with a new process.""" 2032 2033 self._libc_profile_success = False 2034 self._libc_offset = None 2035 self.process_as = None 2036 self.arenas = [] 2037 self.vmas = None 2038 self.heap_vmas = None 2039 self.mp_ = None 2040 self._mmapped_warnings = set() 2041 self.task = None 2042 self.statistics = None 2043 self._mmap_slack_space = dict() 2044 self._heap_slack_space = dict() 2045 self._hidden_chunks = set() 2046 self._stack_vmas_and_offsets = None 2047 self._is_statically_linked = False 2048 self._has_dummy_arena = False 2049 self._first_chunk_distance = 0
2050 2051 2052 # TODO reliable verification via page table information
2054 """Tests the fields of an arena for null bytes. If those fields are 2055 null, it is a good indication that the corresponding memory region has 2056 been swapped.""" 2057 2058 if arena: 2059 if arena.top.v() == arena.next.v() == arena.system_mem.v() == 0: 2060 # arena has likely been swappped 2061 self.session.logging.warn( 2062 "Some crucial fields of the arena at offset 0x{:x} are " 2063 "all null. The reason might be a wrong offset to the " 2064 "main arena, a statically linked binary, a fundamental " 2065 "error in this plugin, or (in most cases) swapped memory " 2066 "pages. Either way, the results will most probably be " 2067 "incorrect and incomplete.".format(arena.v())) 2068 return True 2069 2070 return False
2071 2072 2073 # TODO reliable verification via page table information
2074 - def _check_and_report_mp_for_being_swapped(self, malloc_par_struct):
2075 """Tests the fields of the malloc_par struct. If those fields are null, 2076 it is a good indication that the corresponding memory region has been 2077 swapped.""" 2078 2079 if malloc_par_struct: 2080 if malloc_par_struct.mmap_threshold.v() == 0: 2081 # memory page belonging to malloc_par struct has likely 2082 # been swappped 2083 self.session.logging.warn( 2084 "At least the mmap_threshold field of the malloc_par " 2085 "struct at offset 0x{:x} is null. The reason might be a " 2086 "wrong offset to the malloc_par struct, a statically " 2087 "linked binary, a fundamental error in this plugin, or " 2088 "(in most cases) swapped memory pages. Either way, the " 2089 "MMAPPED chunk algorithms will not work perfectly and " 2090 "hence, some chunks might be missing." 2091 .format(malloc_par_struct.v())) 2092 return True 2093 2094 return False
2095 2096
2097 - def init_for_task(self, task):
2098 """initializes the process address space and malloc_par struct and 2099 calls initialize_*. Should be the first method to be called for each 2100 task. 2101 Returns True if everything seems to be gone fine.""" 2102 2103 self._reset() 2104 2105 # processes normally have an associated mm_struct/memory descriptor 2106 # if there is none, it is probably a kernel thread 2107 if task.mm: 2108 self.session.plugins.cc().SwitchProcessContext(task) 2109 2110 self.task = task 2111 self.vmas = self._get_vmas_for_task(task) 2112 2113 if self.vmas: 2114 self._load_libc_profile() 2115 2116 if self._libc_profile_success: 2117 2118 ###### taken from malloc/malloc.c (glibc-2.23) 2119 min_chunk_size = self.profile.get_obj_offset( 2120 "malloc_chunk", "fd_nextsize") 2121 self._minsize = self.get_aligned_address(min_chunk_size) 2122 ###### 2123 2124 self.process_as = task.get_process_address_space() 2125 2126 libc_range = get_libc_range(self.vmas) 2127 # we prepone setting the self._libc_offset as it is 2128 # required for _initialize_malloc_par 2129 if libc_range: 2130 self._libc_offset = libc_range[0] 2131 2132 self._initialize_malloc_par() 2133 2134 if not libc_range: 2135 # seems like a statically linked executable 2136 self.session.logging.warn( 2137 "Didn't find the libc filename in the vm_areas of " 2138 "the current process: {:d} - {:s} . This might " 2139 "lead to unreliable results or might be because " 2140 "the executable has been statically linked." 2141 .format(task.pid, repr(task.comm.v()))) 2142 2143 if self.mp_: 2144 # the beginning of the chunk area is pointed to by 2145 # mp_.sbrk_base 2146 self._is_statically_linked = True 2147 main_arena_range = get_mem_range_for_regex( 2148 self.vmas, 2149 re.escape(self._main_heap_identifier)) 2150 2151 self._first_chunk_distance = \ 2152 self.mp_.sbrk_base.v() - main_arena_range[0] 2153 2154 else: 2155 self.session.logging.info( 2156 "Found libc offset at: " + hex(self._libc_offset)) 2157 2158 pot_main_arena = None 2159 2160 if self.plugin_args.main_arena: 2161 main_arena_offset = self.plugin_args.main_arena 2162 else: 2163 main_arena_offset = self.profile.get_constant( 2164 'main_arena') 2165 2166 if main_arena_offset: 2167 if self._libc_offset: 2168 main_arena_offset += self._libc_offset 2169 2170 pot_main_arena = self.profile.malloc_state( 2171 offset=(main_arena_offset), profile=self.profile, 2172 vm=self.process_as) 2173 2174 else: 2175 self.session.logging.info( 2176 "As it seems like we don't have debug information " 2177 "for the main arena, we now try to retrieve the " 2178 "main_arena via some different techniques for pid " 2179 "{:d}.".format(self.task.pid)) 2180 pot_main_arena = self._carve_main_arena() 2181 2182 if pot_main_arena: 2183 if self._check_arenas(pot_main_arena) is False: 2184 self.session.logging.warn( 2185 "Arena pointers don't seem to loop within the " 2186 "expected range. Maybe the main_arena pointer " 2187 "is wrong. This might lead to unreliable " 2188 "results.") 2189 2190 # despite potential problems, we try to proceed 2191 self._initialize_arenas(pot_main_arena) 2192 2193 self._mark_heap_vm_areas() 2194 2195 self._check_heap_consistency() 2196 2197 self._initialize_heap_first_chunks() 2198 self._check_and_correct_empty_space_in_heaps() 2199 2200 else: 2201 # no main_arena could be found, so we simply walk 2202 # the main_heap for chunks 2203 self.session.logging.warn( 2204 "No main_arena could be found, so we simply try to" 2205 " walk the chunks in the main heap. Without the " 2206 "arena, fastbin chunks can't be recognized " 2207 "reliably, and hence are treated as allocated " 2208 "chunks. This is especially a problem on further " 2209 "analysis (e.g. dumping their content).") 2210 2211 self._initialize_dummy_main_arena() 2212 2213 self._initialize_mmapped_first_chunks() 2214 self._initialize_heap_vma_list() 2215 2216 self.activate_chunk_preservation() 2217 self.check_and_report_size_inconsistencies() 2218 2219 return True 2220 2221 else: 2222 self.session.logging.error( 2223 "Libc profile is not loaded, " 2224 "hence no struct or constant information. Aborting") 2225 2226 else: 2227 self.session.logging.warn( 2228 "No vm_areas could be extracted from current task (maybe " 2229 "kernel thread): {:s} (PID: {:d})" 2230 .format(repr(task.comm.v()), task.pid)) 2231 2232 else: 2233 self.session.logging.warn( 2234 "Current task seems to be a kernel thread. Skipping Task: " 2235 "{:s} (PID: {:d})".format(repr(task.comm.v()), task.pid)) 2236 2237 self._reset() 2238 2239 return False
2240 2241
2242 - def _walk_hidden_mmapped_chunks(self, hidden_chunk):
2243 """Helper function for carve_and_register_hidden_mmapped_chunks. 2244 Walks MMAPPED chunks beginning with hidden_chunks and registers them. 2245 """ 2246 new_mmapped_chunks = [] 2247 2248 # verification steps are triggered 2249 # in allocated_chunks_for_mmapped_chunk 2250 if hidden_chunk: 2251 if hidden_chunk not in self.get_main_arena().mmapped_first_chunks: 2252 for mmapped_chunk in self._allocated_chunks_for_mmapped_chunk( 2253 hidden_chunk): 2254 new_mmapped_chunks.append(mmapped_chunk) 2255 2256 return new_mmapped_chunks
2257 2258 2259
2261 """Tries to find hidden MMAPPED chunks behind stack segemts.""" 2262 2263 # list of new mmapped chunks lists (first and following chunks) 2264 new_mmapped_chunks = [] 2265 relevant_vmas = [] 2266 2267 for vma in self.vmas: 2268 if not re.search('^\[stack', vma['name']): 2269 continue 2270 2271 current_chunks = [] 2272 last_ebp = self._ebp_unrolling(vma['ebp'], vma['vma']) 2273 search_start = last_ebp if last_ebp else vma['vma'].vm_start 2274 2275 temp_chunk = self._search_first_hidden_mmapped_chunk(search_start, 2276 vma['vma']) 2277 current_chunks = self._walk_hidden_mmapped_chunks(temp_chunk) 2278 2279 if current_chunks: 2280 new_mmapped_chunks.append(current_chunks) 2281 relevant_vmas.append([vma, current_chunks[0].v()]) 2282 2283 else: 2284 relevant_vmas.append([vma, vma['vma'].vm_end]) 2285 2286 self._register_hidden_mmapped_chunks(new_mmapped_chunks) 2287 self._stack_vmas_and_offsets = relevant_vmas
2288 2289
2291 """Helper function for carving hidden MMAPPED chunks. 2292 Searches the stack frames for pointers to identified hidden MMAPPED 2293 chunks and reports the findings. This function is supposed to be 2294 called, when the identifed MMAPPED chunk values (number and size) do 2295 not correspond with the malloc_par values.""" 2296 2297 mmapped_chunks = self.get_all_mmapped_chunks() 2298 2299 if self._stack_vmas_and_offsets and mmapped_chunks: 2300 mmap_pointers = [] 2301 chunk_data_offset = self.profile.get_obj_offset("malloc_chunk", 2302 "fd") 2303 mmap_pointers += [x.v() + chunk_data_offset 2304 for x in mmapped_chunks] 2305 2306 found_pointers = set() 2307 2308 for hit in self.search_vmas_for_needle( 2309 pointers=mmap_pointers, 2310 hidden_mmap_vmas=self._stack_vmas_and_offsets): 2311 2312 found_pointers.add(hit['needle']) 2313 2314 if len(found_pointers) == len(mmap_pointers): 2315 self.session.logging.warn( 2316 "It was possible to find at least one pointer on the " 2317 "stack for each of the {:d} identified MMAPPED chunks. " 2318 "This is a good sign for the gathered chunks, but " 2319 "probably means that there is at least one chunk missing." 2320 .format(len(mmap_pointers))) 2321 else: 2322 self.session.logging.warn( 2323 "Found {:d} pointer(s) to MMAPPED chunks in stack " 2324 "segments out of {:d} identified MMAPPED chunks. Each " 2325 "identified \"MMAPPED chunk\" with no associated pointer " 2326 "on the stack might have been mistakenly chosen." 2327 .format(len(found_pointers), len(mmap_pointers)))
2328 2329 2330
2331 - def _register_hidden_mmapped_chunks(self, new_mmapped_chunks):
2332 """Helper function for carving hidden MMAPPED chunks. 2333 Registers the given hidden MMAPPED chunks internally. 2334 """ 2335 2336 if new_mmapped_chunks: 2337 2338 main_arena = self.get_main_arena() 2339 for chunks in new_mmapped_chunks: 2340 for chunk in chunks: 2341 if chunk not in main_arena.allocated_mmapped_chunks: 2342 main_arena.allocated_mmapped_chunks.append(chunk) 2343 2344 main_arena.mmapped_first_chunks.append(chunks[0]) 2345 self._hidden_chunks.add(chunk)
2346 2347
2349 """Tries to find hidden MMAPPED chunks in anonymous vmas.""" 2350 2351 # list of new mmapped chunks lists (first and following chunks) 2352 new_mmapped_chunks = [] 2353 2354 for vma in self.vmas: 2355 # we walk only over anonymous and stack related vmas (for the case, 2356 # the ebp_unrolling went wrong) 2357 if vma['name'] != self._pot_mmapped_vma_identifier \ 2358 and vma['name'] != self._heap_vma_identifier \ 2359 and not re.search('^\[stack', vma['name']): 2360 continue 2361 2362 temp_chunk = self._search_first_hidden_mmapped_chunk( 2363 vma['vma'].vm_start, vma['vma']) 2364 current_chunks = self._walk_hidden_mmapped_chunks(temp_chunk) 2365 2366 if current_chunks: 2367 new_mmapped_chunks.append(current_chunks) 2368 2369 self._register_hidden_mmapped_chunks(new_mmapped_chunks)
2370 2371 2372
2373 - def search_vmas_for_needle(self, search_string=None, search_regex=None, 2374 pointers=None, vmas=None, hidden_mmap_vmas=None, 2375 vma_regex=None):
2376 """Searches all vmas or only the given ones for the given pointer(s). 2377 pointers = a list of int pointers 2378 regex = a regex identifying relevant vm_areas 2379 Returns a list of hits 2380 """ 2381 2382 if search_string: 2383 scanner = scan.BaseScanner(profile=self.profile, 2384 session=self.session, 2385 address_space=self.process_as, 2386 checks=[('StringCheck', 2387 dict(needle=search_string))]) 2388 2389 elif search_regex: 2390 scanner = scan.BaseScanner(profile=self.profile, 2391 session=self.session, 2392 address_space=self.process_as, 2393 checks=[('RegexCheck', 2394 dict(regex=search_regex))]) 2395 2396 elif pointers: 2397 scanner = scan.PointerScanner(profile=self.profile, 2398 session=self.session, 2399 address_space=self.process_as, 2400 pointers=pointers) 2401 2402 else: 2403 return None 2404 2405 if not vmas or hidden_mmap_vmas: 2406 vmas = self.vmas 2407 2408 if hidden_mmap_vmas: 2409 vmas = hidden_mmap_vmas 2410 2411 result = [] 2412 2413 for vma in vmas: 2414 if vma_regex and not hidden_mmap_vmas: 2415 if not re.search(vma_regex, vma['name']): 2416 continue 2417 2418 start = vma[0]['vma'].vm_start if hidden_mmap_vmas \ 2419 else vma['vma'].vm_start 2420 2421 end = vma[1] if hidden_mmap_vmas else vma['vma'].vm_end 2422 length = end - start 2423 2424 for hit in scanner.scan(offset=start, maxlen=length): 2425 temp = dict() 2426 temp['vma'] = vma[0] if hidden_mmap_vmas else vma 2427 temp['hit'] = hit 2428 if pointers: 2429 pointer = self.process_as.read(hit, self._size_sz) 2430 pointer = struct.unpack('I' if self._size_sz == 4 2431 else 'Q', pointer)[0] 2432 temp['needle'] = pointer 2433 2434 elif search_string: 2435 temp['needle'] = search_string 2436 2437 elif search_regex: 2438 temp['needle'] = search_regex 2439 2440 result.append(temp) 2441 2442 return result
2443 2444
2445 - def get_chunks_for_addresses(self, addresses, ignore_prevsize=False):
2446 """Returns the chunks located at the given addresses. 2447 The address can be at the beginning or somewhere in the middle of the 2448 chunk.""" 2449 2450 chunks = dict() 2451 last_chunk = None 2452 addresses = set(addresses) 2453 2454 # get all first chunk offsets (from all arenas/heapinfo structs; 2455 # MMAPPED chunks can be ignored). The first chunk of a memory region 2456 # has the prev_inuse bit set, but no previous chunk. 2457 first_chunk_offsets = set() 2458 for arena in self.arenas: 2459 if arena.is_main_arena: 2460 first_chunk_offsets.add(arena.first_chunk.v()) 2461 2462 for heapinfo in arena.heaps: 2463 first_chunk_offsets.add(heapinfo.first_chunk.v()) 2464 2465 2466 addresses_to_remove = set() 2467 2468 # get_all_chunks returns allocated chunks first, and then freed ones 2469 # so it doesn't screw up the 'last_chunk' functionality 2470 for chunk in self.get_all_chunks(): 2471 2472 # we already found hits for those, so we don't check for them 2473 # anymore 2474 if addresses_to_remove: 2475 addresses ^= addresses_to_remove 2476 addresses_to_remove = set() 2477 2478 for address in addresses: 2479 if chunk.v() <= address < chunk.v() + chunk.chunksize(): 2480 addresses_to_remove.add(address) 2481 2482 chunk_to_add = None 2483 2484 if not ignore_prevsize and \ 2485 chunk.v() not in first_chunk_offsets and \ 2486 chunk.v() <= address \ 2487 < chunk.v() + chunk.get_prev_size().obj_size and \ 2488 chunk.prev_inuse(): 2489 # hit is in prev_size field and PREV_INUSE is set, so 2490 # the last chunk is in use and hence the prev_size 2491 # field belongs to him 2492 # Note: MMAPPED chunks don't use next chunk's 2493 # prev_size field, as it is not guaranteed that an 2494 # MMAPPED chunk has a following MMAPPED chunk. As, 2495 # however, MMAPPED chunks don't use the PREV_INUSE bit, 2496 # we don't have to explicitly test for MMAPPED chunks 2497 # to exclude them 2498 2499 if chunk.is_freed_chunk(): 2500 # in this case, the 'last_chunk' method doesn't 2501 # work, as get_all_chunks does not walk the memory 2502 # chunk by chunk, but first returns all allocated 2503 # chunks from memory and then all freed chunks 2504 # gathered via the bins. So as a ugly workaround 2505 # (that could be improved in the future), we walk 2506 # the allocated chunks until we find this freed 2507 # chunk's previous chunk 2508 found_previous_chunk = False 2509 for allocated_chunk in \ 2510 self.get_all_allocated_chunks(): 2511 2512 if allocated_chunk.v() + \ 2513 allocated_chunk.chunksize() \ 2514 == chunk.v(): 2515 last_chunk = allocated_chunk 2516 found_previous_chunk = True 2517 2518 if not found_previous_chunk: 2519 self.session.logging.warn( 2520 "We didn't find a previous chunk for " 2521 "a freed chunk. This is unexpected and " 2522 "will lead to wrong results") 2523 2524 2525 if last_chunk: 2526 if last_chunk.v() + last_chunk.chunksize() \ 2527 == chunk.v(): 2528 chunk_to_add = last_chunk 2529 2530 else: 2531 self.session.logging.warn( 2532 "The current previous chunk for chunk at " 2533 "offset 0x{:x} does not seem to be its " 2534 "predecessor. This is unexpected at this " 2535 "point and might indicate a major " 2536 "problem.") 2537 2538 else: 2539 self.session.logging.error( 2540 "Error: last_chunk shouldn't be None at " 2541 "this point.") 2542 2543 if self.session.GetParameter("debug"): 2544 pdb.post_mortem() 2545 2546 2547 else: 2548 chunk_to_add = chunk 2549 2550 2551 if chunk_to_add not in chunks.keys(): 2552 # in the case, multiple addresses match the same chunk: 2553 chunks[chunk_to_add] = set() 2554 2555 chunks[chunk_to_add].add(address) 2556 2557 last_chunk = chunk 2558 2559 return chunks
2560 2561 2562 2563 # Note: Does not return chunks containing pointers to the prev_size field 2564 # of the first chunk of the main heap/ heap_info area; but this shouldn't 2565 # be the case anyways. For all other chunks, the prev_size field is treated 2566 # appropriately
2567 - def search_chunks_for_needle(self, search_string=None, search_regex=None, 2568 pointers=None, search_struct=False):
2569 """Searches all chunks for the given pointer(s) and returns the ones 2570 containing them. It only searches the data part of a chunk (e.g. 2571 not fd/bk fields for bin chunks). 2572 2573 pointers = a list of int pointers 2574 search_string/search_regex = a string or regex to search for in a chunk 2575 search_struct = if set to True, also fields like size and fd/bk for bin 2576 chunks are included 2577 """ 2578 2579 # as searching every chunk for data is inefficient, we first search all 2580 # vmas and correlate the hits with known chunks afterwards 2581 if pointers: 2582 hits = self.search_vmas_for_needle(pointers=pointers, 2583 vmas=self.heap_vmas) 2584 2585 elif search_string: 2586 hits = self.search_vmas_for_needle(search_string=search_string, 2587 vmas=self.heap_vmas) 2588 2589 elif search_regex: 2590 hits = self.search_vmas_for_needle(search_regex=search_regex, 2591 vmas=self.heap_vmas) 2592 2593 else: 2594 return None 2595 2596 result = dict() 2597 2598 # the result structure is: 2599 # { chunk_with_hit: { 2600 # { needle (string or pointer): { offsets: {offsets} }, needle2: ...} 2601 # } , more chunks }: 2602 2603 for chunk in self.get_all_chunks(): 2604 2605 start, length = chunk.start_and_length() 2606 end = start + length 2607 2608 if search_struct: 2609 start = chunk.size.obj_offset 2610 2611 for hit in hits: 2612 if start <= hit['hit'] < end: 2613 if chunk not in result.keys(): 2614 result[chunk] = {hit['needle']: {hit['hit']}} 2615 2616 else: 2617 if hit['needle'] not in result[chunk].keys(): 2618 result[chunk][hit['needle']] = {hit['hit']} 2619 2620 else: 2621 result[chunk][hit['needle']].add(hit['hit']) 2622 2623 return result
2624 2625 2626
2627 - def _ebp_unrolling(self, ebp, vma):
2628 """Helper function for carving hidden MMAPPED chunks. 2629 Tries to follow EBP pointers to the first one and returns its offset. 2630 """ 2631 2632 if not vma.vm_start <= ebp < vma.vm_end: 2633 return None 2634 2635 temp = ebp 2636 last_ebp = 0 2637 2638 # infinite loop protection, when dealing with arbitrary data instead of 2639 # real ebp pointers 2640 max_steps = 0x2000 2641 i = 0 2642 2643 while vma.vm_start <= temp < vma.vm_end and last_ebp != temp and \ 2644 i < max_steps: 2645 last_ebp = temp 2646 temp = (self.process_as.read(temp, self._size_sz)) 2647 temp = struct.unpack('I' if self._size_sz == 4 else 'Q', temp)[0] 2648 i += 1 2649 2650 return last_ebp
2651 2652
2653 - def _search_first_hidden_mmapped_chunk(self, initial_address, vma):
2654 """Helper function for carving hidden MMAPPED chunks. 2655 This function searches from initial_address until vma.vm_end for a 2656 MMAPPED chunk and returns it if found.""" 2657 2658 # As mmapped regions are normally on pagesize boundaries (4096 or a 2659 # multiple of it) we only look at those offsets for a mmapped chunk 2660 offset = self._get_page_aligned_address(initial_address) 2661 2662 # as the minimum size for mmapped chunks is normally equal to pagesize 2663 # (4096 bytes), there should be at least 4096 bytes behind the current 2664 # position - see also comment in check_and_report_mmap_chunk 2665 distance = vma.vm_end - offset 2666 2667 while distance >= self._min_pagesize: 2668 2669 temp_chunk = self.profile.malloc_chunk(offset=offset, 2670 vm=self.process_as) 2671 2672 if temp_chunk.get_prev_size() == 0 and \ 2673 temp_chunk.chunksize() >= self._min_pagesize and \ 2674 temp_chunk.chunksize() % self._min_pagesize == 0 and \ 2675 temp_chunk.is_mmapped() and \ 2676 not temp_chunk.prev_inuse() and \ 2677 not temp_chunk.non_main_arena() and \ 2678 temp_chunk.v() + temp_chunk.chunksize() <= vma.vm_end: 2679 2680 return temp_chunk 2681 2682 else: 2683 offset += self._min_pagesize 2684 distance = vma.vm_end - offset
2685 2686
2687 - def calculate_statistics(self):
2688 """Sets the class attribute self.statistics with a dict containing 2689 e.g. number of allocated/freed/fastbin chunks, their sizes...""" 2690 2691 if not self.get_main_arena(): 2692 return 2693 2694 number_of_arenas = len(self.arenas) 2695 number_of_heaps = 0 2696 2697 number_of_bin_chunks = 0 2698 size_of_bin_chunks = 0 2699 number_of_fastbin_chunks = 0 2700 size_of_fastbin_chunks = 0 2701 number_of_top_chunks = 0 2702 size_of_top_chunks = 0 2703 2704 number_of_main_chunks = 0 2705 size_of_main_chunks = 0 2706 2707 # both bottom chunks are excluded here 2708 number_of_thread_chunks = 0 2709 size_of_thread_chunks = 0 2710 2711 number_of_bottom_chunks = 0 2712 size_of_bottom_chunks = 0 2713 2714 ##### mallinfo specific values #### 2715 # includes bin and top chunks, also for empty main arena 2716 mallinfo_number_of_free_chunks = 0 2717 2718 # the sum of the system_mem fields from all arenas 2719 non_mmapped_bytes = 0 2720 2721 # total_allocated_space is the sum of all allocated chunk sizes 2722 # _except_ mmapped chunks 2723 # includes also heap/arena struct sizes and bottom chunks 2724 total_allocated_space = 0 2725 2726 # includes top chunk and fastbins 2727 total_free_space = 0 2728 #################################### 2729 2730 2731 for arena in self.arenas: 2732 2733 non_mmapped_bytes += arena.system_mem 2734 2735 if arena.top_chunk: 2736 number_of_top_chunks += 1 2737 size_of_top_chunks += arena.top_chunk.chunksize() 2738 2739 # mallinfo always counts the top chunk for the main arena, even if 2740 # the main heap and hence the top chunk doesn't exist (in these 2741 # cases, the top chunk pointer points to the top member of the 2742 # malloc_state struct: to itself) 2743 elif arena.is_main_arena: 2744 mallinfo_number_of_free_chunks += 1 2745 2746 for chunk in arena.freed_fast_chunks: 2747 number_of_fastbin_chunks += 1 2748 size_of_fastbin_chunks += chunk.chunksize() 2749 2750 for chunk in arena.freed_chunks: 2751 number_of_bin_chunks += 1 2752 size_of_bin_chunks += chunk.chunksize() 2753 2754 if arena.is_main_arena: 2755 for chunk in self._allocated_chunks_for_main_arena(): 2756 number_of_main_chunks += 1 2757 size_of_main_chunks += chunk.chunksize() 2758 2759 else: 2760 for chunk in self._allocated_chunks_for_thread_arena(arena): 2761 2762 # The last bottom chunk has a size of 0 but in fact takes 2763 # 2 * size_sz. As it normally isn't returned by 2764 # allocated_chunks_for_thread_arena, and has a chunksize 2765 # of 0, we manually add it's size 2766 if chunk.is_bottom_chunk: 2767 number_of_bottom_chunks += 2 2768 size_of_bottom_chunks += chunk.chunksize() 2769 size_of_bottom_chunks += self._size_sz * 2 2770 2771 else: 2772 size_of_thread_chunks += chunk.chunksize() 2773 number_of_thread_chunks += 1 2774 2775 2776 # total_allocated_space includes also the allocated space from 2777 # heap_info and malloc_state structs (except for the 2778 # main_arena) 2779 for heap in arena.heaps: 2780 number_of_heaps += 1 2781 total_allocated_space += heap.first_chunk.v() - heap.v() 2782 2783 2784 ### mallinfo specific calculation 2785 total_free_space += size_of_top_chunks 2786 total_free_space += size_of_fastbin_chunks 2787 total_free_space += size_of_bin_chunks 2788 2789 mallinfo_number_of_free_chunks += number_of_bin_chunks 2790 mallinfo_number_of_free_chunks += number_of_top_chunks 2791 2792 total_allocated_space += size_of_main_chunks 2793 total_allocated_space += size_of_thread_chunks 2794 total_allocated_space += size_of_bottom_chunks 2795 ###################### 2796 2797 statistics = dict() 2798 statistics['number_of_arenas'] = number_of_arenas 2799 statistics['number_of_heaps'] = number_of_heaps 2800 statistics['number_of_bin_chunks'] = number_of_bin_chunks 2801 statistics['size_of_bin_chunks'] = size_of_bin_chunks 2802 statistics['number_of_fastbin_chunks'] = number_of_fastbin_chunks 2803 statistics['size_of_fastbin_chunks'] = size_of_fastbin_chunks 2804 statistics['number_of_top_chunks'] = number_of_top_chunks 2805 statistics['size_of_top_chunks'] = size_of_top_chunks 2806 statistics['number_of_main_chunks'] = number_of_main_chunks 2807 statistics['size_of_main_chunks'] = size_of_main_chunks 2808 statistics['number_of_thread_chunks'] = number_of_thread_chunks 2809 statistics['size_of_thread_chunks'] = size_of_thread_chunks 2810 statistics['number_of_bottom_chunks'] = number_of_bottom_chunks 2811 statistics['size_of_bottom_chunks'] = size_of_bottom_chunks 2812 2813 statistics['non_mmapped_bytes'] = non_mmapped_bytes 2814 statistics['total_allocated_space'] = total_allocated_space 2815 statistics['total_free_space'] = total_free_space 2816 statistics['mallinfo_number_of_free_chunks'] = \ 2817 mallinfo_number_of_free_chunks 2818 2819 self.statistics = statistics 2820 2821 self._calculate_mmapped_statistics()
2822 2823
2825 """Calculates number and size of MMAPPED chunks and sets those values 2826 for the statistics attribute. Is outsourced from calculate_statistics 2827 to be able to recalculate MMAPPED chunks statistics when hidden MMAPPED 2828 chunks have been found, without having to recalculate all statistics. 2829 """ 2830 2831 # This function shouldn't normally be called without having previously 2832 # called calculate_statistics 2833 if not self.statistics: 2834 return 2835 2836 number_of_mmapped_chunks = 0 2837 size_of_mmapped_chunks = 0 2838 2839 for chunk in self.get_all_mmapped_chunks(): 2840 number_of_mmapped_chunks += 1 2841 size_of_mmapped_chunks += chunk.chunksize() 2842 2843 self.statistics['number_of_mmapped_chunks'] = number_of_mmapped_chunks 2844 self.statistics['size_of_mmapped_chunks'] = size_of_mmapped_chunks
2845 2846
2848 """This function calculates the size of all relevant vm_areas and 2849 compares the result with the size of all allocated and freed chunks. 2850 It returns True if both values are the same. 2851 """ 2852 2853 if not self.get_main_arena(): 2854 return None 2855 2856 vma_sum = 0 2857 for vma in self.heap_vmas: 2858 vma_sum += (vma['vma'].vm_end - vma['vma'].vm_start) 2859 2860 if not self.statistics: 2861 self.calculate_statistics() 2862 2863 chunk_sum = (self.statistics['total_allocated_space'] 2864 + self.statistics['total_free_space'] 2865 + self.statistics['size_of_mmapped_chunks']) 2866 2867 chunk_sum += sum(self._mmap_slack_space.values()) 2868 chunk_sum += sum(self._heap_slack_space.values()) 2869 2870 2871 vma_sum += sum([x.chunksize() for x in self._hidden_chunks]) 2872 # as we can't simply add the vm_area for the hidden chunks to the 2873 # vma_sum, as it contains also other data, we add the hidden chunks 2874 # and their slack space to the vma_sum 2875 # 2876 # _mmap_slack_space is filled with the "chunk" after the last mmapped 2877 # chunk which isn't really a chunk but only empty space. to get them, 2878 # we call next_chunk on any mmapped chunk, including the last one for 2879 # each memory segment (which is the relevant one) 2880 hidden_next_chunks = [x.next_chunk() for x in self._hidden_chunks] 2881 2882 # now we get only the slack space for the hidden chunks 2883 vma_sum += sum([y for x, y in self._mmap_slack_space.iteritems() 2884 if x in hidden_next_chunks]) 2885 2886 vma_sum -= self._first_chunk_distance 2887 2888 return chunk_sum == vma_sum
2889 2890
2892 """Calls size comparison methods to verify the gathered chunks and 2893 prints warnings on any discrepancies.""" 2894 2895 if not self.statistics: 2896 self.calculate_statistics() 2897 2898 if self.compare_mmapped_chunks_with_mp_() is False: 2899 self.session.logging.info( 2900 "The values from the malloc_par struct don't correspond to " 2901 "our found MMAPPED chunks. This indicates we didn't find all " 2902 "MMAPPED chunks and that they probably hide somewhere in a " 2903 "vm_area. So we now try to carve them.") 2904 2905 self._carve_register_mmapped_chunks_hidden_behind_stack() 2906 self._calculate_mmapped_statistics() 2907 2908 if self.compare_mmapped_chunks_with_mp_() is False: 2909 self.session.logging.info( 2910 "Seems like we didn't find (all) MMAPPED chunks behind " 2911 "stack frames. We now search in all anonymous vm_areas " 2912 "for them, which might however lead to false positives.") 2913 2914 self._carve_and_register_hidden_mmapped_chunks_globally() 2915 self._calculate_mmapped_statistics() 2916 2917 if self.compare_mmapped_chunks_with_mp_() is False: 2918 self.session.logging.warn( 2919 "The calculated count and size of all MMAPPED chunks " 2920 "doesn't meet the values from the gathered malloc_par " 2921 "struct. We found {:d} MMAPPED chunks with a total " 2922 "size of {:d} and the malloc_par struct reports {:d} " 2923 "MMAPPED chunks with a total size of {:d}. This " 2924 "either results from an error in getting all chunks " 2925 "or in choosing the correct vm_areas. Either way, the " 2926 "MMAPPED results will be wrong." 2927 .format(self.statistics['number_of_mmapped_chunks'], 2928 self.statistics['size_of_mmapped_chunks'], 2929 self.mp_.n_mmaps, 2930 self.mp_.mmapped_mem)) 2931 2932 2933 self._search_stacks_for_mmap_pointers() 2934 2935 else: 2936 self.session.logging.info( 2937 "Seems like all missing MMAPPED chunks have been " 2938 "found.") 2939 2940 else: 2941 self.session.logging.info( 2942 "Seems like all missing MMAPPED chunks have been found.") 2943 2944 2945 2946 if self._compare_vma_sizes_with_chunks() is False: 2947 for warning in self._mmapped_warnings: 2948 self.session.logging.warn(warning) 2949 2950 self.session.logging.warn( 2951 "The calculated sum from all heap objects and chunks does not " 2952 "meet the sum from all heap relevant vm_areas. This either " 2953 "results from an error in getting all chunks or in choosing " 2954 "the relevant vm_areas. Either way, the results are most " 2955 "probably unreliable.") 2956 2957 self._compare_and_report_system_mem_sizes()
2958 2959
2961 """Compares the identified vmas for main and thread heaps with their 2962 system_mem values and prints warnings on any discrepancies.""" 2963 2964 main_heap_size = 0 2965 size_all_vmas = 0 2966 mmapped_first_chunk_pointers = \ 2967 [x.v() for x in self.get_main_arena().mmapped_first_chunks] 2968 2969 relevant_vmas = [x for x in self.heap_vmas 2970 if x['vma'].vm_start 2971 not in mmapped_first_chunk_pointers] 2972 2973 for vma in relevant_vmas: 2974 size = (vma['vma'].vm_end - vma['vma'].vm_start) 2975 2976 if vma['name'] == self._main_heap_identifier: 2977 # as the main heap can spread among multiple vm_areas, we add 2978 # their sizes up 2979 main_heap_size += size 2980 2981 size_all_vmas += size 2982 2983 if not self._has_dummy_arena: 2984 main_heap_size -= self._first_chunk_distance 2985 2986 if self.get_main_arena().system_mem != main_heap_size: 2987 self.session.logging.warn( 2988 "The size of the vm_area identified to belong to the main " 2989 "arena does not have the same size as the system_mem value of " 2990 "that arena. This shouldn't be the case and might indicate, " 2991 "that the wrong vm_area has been selected and hence leading " 2992 "to wrong chunks output.") 2993 2994 else: 2995 system_mem_size = 0 2996 for arena in self.arenas: 2997 system_mem_size += arena.system_mem 2998 2999 system_mem_size += sum(self._heap_slack_space.values()) 3000 3001 if not self._has_dummy_arena: 3002 system_mem_size += self._first_chunk_distance 3003 3004 if size_all_vmas != system_mem_size: 3005 self.session.logging.warn( 3006 "The size of at least one arena (its system_mem value) " 3007 "does not have the same size as the corresponding " 3008 "vm_areas. This shouldn't be the case and might indicate, " 3009 "that either some vm_areas are missing or that at least " 3010 "one vm_area has been mistakenly chosen. This leads " 3011 "either to missing or wrong chunks in the output.")
3012 3013
3014 - def _initialize_malloc_par(self):
3015 """Initializes the malloc_par struct.""" 3016 3017 mp_offset = None 3018 3019 if self.mp_offset: 3020 mp_offset = self.mp_offset 3021 3022 else: 3023 self.mp_offset = self.profile.get_constant('mp_') 3024 mp_offset = self.mp_offset 3025 3026 if mp_offset: 3027 if self._libc_offset: 3028 mp_offset += self._libc_offset 3029 3030 self.mp_ = self.profile.malloc_par(offset=mp_offset, 3031 vm=self.process_as) 3032 3033 self._check_and_report_mp_for_being_swapped(self.mp_) 3034 3035 else: 3036 self.session.logging.warn( 3037 "It seems like the debug information for the mp_ offset are " 3038 "missing. This means some checks/verifications can't be done.")
3039 3040
3042 """Compares the calculated count and size of all MMAPPED chunks with 3043 the data from the malloc_par struct. 3044 Returns None on any errors, True if count and sizes match and 3045 otherwise False.""" 3046 3047 if not self.get_main_arena() or not self.mp_: 3048 return None 3049 3050 if not self.statistics: 3051 self.calculate_statistics() 3052 3053 if self.mp_.mmapped_mem == self.statistics['size_of_mmapped_chunks'] \ 3054 and self.mp_.n_mmaps \ 3055 == self.statistics['number_of_mmapped_chunks']: 3056 return True 3057 3058 return False
3059 3060 3061
3062 - def get_mallinfo_string(self):
3063 """Returns statistics according to the mallinfo struct except for 3064 keepcost and usmblks. 3065 See http://man7.org/linux/man-pages/man3/mallinfo.3.html 3066 """ 3067 3068 if not self.get_main_arena(): 3069 return None 3070 3071 if not self.statistics: 3072 self.calculate_statistics() 3073 3074 result = "" 3075 3076 result += ("Total non-mmapped bytes (arena): " 3077 + str(self.statistics['non_mmapped_bytes']) 3078 + "\n") 3079 result += ("# of free chunks (ordblks): " 3080 + str(self.statistics['mallinfo_number_of_free_chunks']) 3081 + "\n") 3082 result += ("# of free fastbin blocks (smblks): " 3083 + str(self.statistics['number_of_fastbin_chunks']) 3084 + "\n") 3085 result += ("# of mapped regions (hblks): " 3086 + str(self.statistics['number_of_mmapped_chunks']) 3087 + "\n") 3088 result += ("Bytes in mapped regions (hblkhd): " 3089 + str(self.statistics['size_of_mmapped_chunks']) 3090 + "\n") 3091 result += ("Free bytes held in fastbins (fsmblks): " 3092 + str(self.statistics['size_of_fastbin_chunks']) 3093 + "\n") 3094 result += ("Total allocated space (uordblks): " 3095 + str(self.statistics['total_allocated_space']) 3096 + "\n") 3097 result += ("Total free space (fordblks): " 3098 + str(self.statistics['total_free_space']) 3099 + "\n") 3100 3101 return result
3102 3103 3104 @classmethod
3105 - def is_active(cls, session):
3106 return session.profile.metadata("os") == 'linux'
3107 3108 3109 __args = [ 3110 dict(name='main_arena', type='IntParser', default=None, 3111 help=("The main_arena pointer either extracted from the " 3112 "statically linked ELF binary or from the libc library.")), 3113 dict(name='malloc_par', type='IntParser', default=None, 3114 help=("The malloc_par pointer either extracted from the " 3115 "linked ELF binary or from the libc library.")) 3116 ]
3117
3118 3119 -class HeapOverview(HeapAnalysis):
3120 """Tries to gather a list of all arenas/heaps and all allocated chunks.""" 3121 3122 __name = "heapinfo" 3123 3124 table_header = [ 3125 dict(name="pid", width=6), 3126 dict(name="arenas", width=6), 3127 dict(name="heap_infos", width=10), 3128 dict(name="non_mmapped_chunks", width=20), 3129 dict(name="non_mmapped_chunks_size", width=26), 3130 dict(name="mmapped_chunks", width=16), 3131 dict(name="mmapped_chunks_size", width=22), 3132 dict(name="freed_chunks", width=14), 3133 dict(name="freed_chunks_size", width=20) 3134 ] 3135
3136 - def collect(self):
3137 3138 for task in self.filter_processes(): 3139 if not task.mm: 3140 self.session.logging.warn("Analysis for Task {:d} aborted as " 3141 "it seems to be a kernel thread.\n" 3142 .format(task.pid)) 3143 continue 3144 3145 try: 3146 if self.init_for_task(task): 3147 3148 freed_chunks = self.statistics['number_of_bin_chunks'] 3149 freed_chunks += self.statistics['number_of_fastbin_chunks'] 3150 freed_size = self.statistics['size_of_bin_chunks'] 3151 freed_size += self.statistics['size_of_fastbin_chunks'] 3152 3153 non_mmapped_chunks = \ 3154 self.statistics['number_of_main_chunks'] 3155 non_mmapped_chunks += \ 3156 self.statistics['number_of_thread_chunks'] 3157 non_mmapped_size = self.statistics['size_of_main_chunks'] 3158 non_mmapped_size += \ 3159 self.statistics['size_of_thread_chunks'] 3160 3161 3162 yield(task.pid, 3163 self.statistics['number_of_arenas'], 3164 self.statistics['number_of_heaps'], 3165 non_mmapped_chunks, 3166 non_mmapped_size, 3167 self.statistics['number_of_mmapped_chunks'], 3168 self.statistics['size_of_mmapped_chunks'], 3169 freed_chunks, 3170 freed_size) 3171 3172 except: 3173 self.session.logging.warn("Analysis for Task {:d} failed.\n" 3174 .format(task.pid)) 3175 self.session.logging.warn(traceback.format_exc())
3176
3177 3178 3179 -class HeapObjects(HeapAnalysis):
3180 """Prints the structs of heap objects (such as allocated chunks, arenas, 3181 ...)""" 3182 3183 __name = "heapobjects" 3184 3185 __args = [ 3186 dict(name='print_allocated', type="Boolean", default=False, 3187 help="prints all allocated chunk structs"), 3188 dict(name='print_freed', type="Boolean", default=False, 3189 help="prints all freed chunk structs"), 3190 dict(name='print_mmapped', type="Boolean", default=False, 3191 help="prints all MMAPPED chunk structs"), 3192 dict(name='print_mallinfo', type="Boolean", default=False, 3193 help="prints statistic information, similar to glibc's mallinfo") 3194 ] 3195 3196 3197
3198 - def render(self, renderer):
3199 3200 for task in self.filter_processes(): 3201 if not task.mm: 3202 self.session.logging.warn("Object dumping aborted for Task " 3203 "{:d} as it seems to be a kernel " 3204 "thread.\n".format(task.pid)) 3205 continue 3206 3207 try: 3208 if self.init_for_task(task): 3209 # as printing requires walking allocated chunks, we prevent 3210 # walking the memory two times 3211 3212 3213 print_output_separator = '=' * 65 3214 format_string = "{0:s} {1:s} {0:s}" 3215 3216 renderer.write("\n") 3217 renderer.write( 3218 format_string 3219 .format('=' * 18, 'Arena and heap_info objects')) 3220 renderer.write("\n") 3221 3222 for arena in self.arenas: 3223 if arena.is_main_arena: 3224 renderer.write("Main_arena: ") 3225 renderer.write(arena) 3226 renderer.write("\n") 3227 3228 renderer.write("First chunk: ") 3229 renderer.write(arena.first_chunk) 3230 renderer.write("\n") 3231 3232 else: 3233 renderer.write("Thread arena: ") 3234 renderer.write(arena) 3235 renderer.write("\n") 3236 3237 3238 renderer.write("Top chunk: ") 3239 renderer.write(arena.top_chunk) 3240 renderer.write("\n") 3241 3242 3243 for heap in arena.heaps: 3244 renderer.write(heap) 3245 renderer.write("\n") 3246 renderer.write("First chunk: ") 3247 renderer.write(heap.first_chunk) 3248 renderer.write("\n") 3249 3250 renderer.write(print_output_separator) 3251 renderer.write("\n") 3252 3253 3254 if self.plugin_args.print_allocated: 3255 renderer.write("\n") 3256 renderer.write( 3257 format_string 3258 .format('=' * 18, 'Allocated Main Arena Chunks')) 3259 3260 renderer.write("\n") 3261 for chunk in self.get_all_allocated_main_chunks(): 3262 renderer.write(chunk) 3263 renderer.write("\n") 3264 3265 renderer.write(print_output_separator) 3266 renderer.write("\n") 3267 3268 renderer.write("\n") 3269 renderer.write( 3270 format_string 3271 .format('=' * 18, 'Allocated Thread Arena Chunks')) 3272 3273 renderer.write("\n") 3274 for chunk in self.get_all_allocated_thread_chunks(): 3275 renderer.write(chunk) 3276 renderer.write("\n") 3277 3278 renderer.write(print_output_separator) 3279 renderer.write("\n") 3280 3281 if self.plugin_args.print_mmapped or \ 3282 self.plugin_args.print_allocated: 3283 renderer.write("\n") 3284 renderer.write( 3285 format_string 3286 .format('=' * 18, 'MMAPPED Chunks')) 3287 3288 renderer.write("\n") 3289 for chunk in self.get_all_mmapped_chunks(): 3290 renderer.write(chunk) 3291 renderer.write("\n") 3292 3293 renderer.write(print_output_separator) 3294 renderer.write("\n") 3295 3296 3297 if self.plugin_args.print_freed: 3298 renderer.write("\n") 3299 renderer.write( 3300 format_string 3301 .format('=' * 18, 'Freed Chunks')) 3302 3303 renderer.write("\n") 3304 for chunk in self.get_all_freed_chunks(): 3305 renderer.write(chunk) 3306 renderer.write("\n") 3307 3308 renderer.write(print_output_separator) 3309 renderer.write("\n") 3310 3311 3312 if self.plugin_args.print_mallinfo: 3313 renderer.write("\n") 3314 renderer.write( 3315 format_string 3316 .format('=' * 18, 'Mallinfo Output')) 3317 3318 renderer.write("\n") 3319 renderer.write(self.get_mallinfo_string()) 3320 renderer.write("\n") 3321 renderer.write(print_output_separator) 3322 renderer.write("\n") 3323 3324 except: 3325 self.session.logging.warn( 3326 "Object dumping for Task {:d} failed.\n".format(task.pid)) 3327 self.session.logging.warn(traceback.format_exc())
3328
3329 3330 -class HeapChunkDumper(core.DirectoryDumperMixin, HeapAnalysis):
3331 """Dumps allocated/freed chunks from selected processes """ 3332 3333 __name = "heapdump" 3334 _filename_format_string = ("{:d}.{}-chunk_offset-0x{:0{:d}X}_size-{:d}" 3335 "_dumped-{:d}_stripped-{:d}.dmp") 3336 3337 3338 table_header = [ 3339 dict(name="pid", width=6), 3340 dict(name="allocated", width=12), 3341 dict(name="freed_bin", width=12), 3342 dict(name="freed_fastbin", width=14), 3343 dict(name="top_chunks", width=12) 3344 ] 3345 3346
3347 - def collect(self):
3348 3349 for task in self.filter_processes(): 3350 if not task.mm: 3351 continue 3352 3353 if self.init_for_task(task): 3354 3355 allocated_chunk_count = 0 3356 freed_fastbin_chunks = 0 3357 freed_bin_chunks = 0 3358 top_chunks = 0 3359 3360 for arena in self.arenas: 3361 if arena.top_chunk: 3362 top_chunks += 1 3363 self.dump_chunk_to_file(arena.top_chunk, 3364 arena.top_chunk.chunksize(), 3365 'top') 3366 3367 if arena.is_main_arena: 3368 for chunk in self.get_all_allocated_chunks_for_arena( 3369 arena): 3370 allocated_chunk_count += 1 3371 self.dump_chunk_to_file(chunk, 3372 chunk.chunksize(), 3373 'allocated-main') 3374 3375 else: 3376 for chunk in self.get_all_allocated_chunks_for_arena( 3377 arena): 3378 allocated_chunk_count += 1 3379 chunksize = chunk.chunksize() 3380 identifier = 'allocated-thread' 3381 3382 if chunk.is_bottom_chunk: 3383 chunksize -= self._size_sz 3384 identifier = 'bottom' 3385 3386 self.dump_chunk_to_file(chunk, 3387 chunksize, 3388 identifier) 3389 3390 for chunk in self.get_all_mmapped_chunks(): 3391 allocated_chunk_count += 1 3392 3393 3394 self.dump_chunk_to_file(chunk, 3395 chunk.chunksize(), 3396 'allocated-mmapped') 3397 3398 3399 # here we differentiate fastbin chunks from bin chunks, as 3400 # fastbin chunks only overwrite one dword size of data with a 3401 # pointer while bin chunks overwrite 2 3402 for freed_chunk in self.get_all_freed_fastbin_chunks(): 3403 freed_fastbin_chunks += 1 3404 self.dump_chunk_to_file(freed_chunk, 3405 freed_chunk.chunksize(), 3406 'freed-fastbin') 3407 3408 for freed_chunk in self.get_all_freed_bin_chunks(): 3409 freed_bin_chunks += 1 3410 self.dump_chunk_to_file(freed_chunk, 3411 freed_chunk.chunksize(), 3412 'freed-bin') 3413 3414 3415 yield dict(pid=task.pid, allocated=allocated_chunk_count, 3416 freed_bin=freed_bin_chunks, 3417 freed_fastbin=freed_fastbin_chunks, 3418 top_chunks=top_chunks)
3419 3420
3421 - def dump_chunk_to_file(self, chunk, chunksize, identifier):
3422 """Used as the wrapper to dump a given chunk to file.""" 3423 3424 fd_offset = self.profile.get_obj_offset("malloc_chunk", "fd") 3425 3426 try: 3427 data = chunk.to_string() 3428 start, _ = chunk.start_and_length() 3429 3430 filename = self._filename_format_string.format( 3431 self.task.pid, identifier, chunk.v(), self._size_sz * 2, 3432 chunksize, len(data), start - chunk.v() - fd_offset) 3433 3434 output_file = open(self.dump_dir + os.sep + filename, 'wb') 3435 output_file.write(data) 3436 3437 except: 3438 print traceback.format_exc() 3439 3440 finally: 3441 try: 3442 output_file.close() 3443 3444 except: 3445 pass
3446
3447 3448 # TODO: yara support 3449 -class HeapPointerSearch(HeapAnalysis):
3450 """Searches all chunks for the given string, regex or pointer(s).""" 3451 3452 __name = "heapsearch" 3453
3454 - def render(self, renderer):
3455 if not (self.plugin_args.pointers or self.plugin_args.string 3456 or self.plugin_args.regex or self.plugin_args.chunk_addresses): 3457 renderer.write("Specify something to search for.\n") 3458 3459 else: 3460 for task in self.filter_processes(): 3461 if not task.mm: 3462 continue 3463 3464 if self.init_for_task(task): 3465 3466 hits = dict() 3467 3468 if self.plugin_args.pointers: 3469 hits = self.search_chunks_for_needle( 3470 pointers=self.plugin_args.pointers, 3471 search_struct=self.plugin_args.search_struct) 3472 3473 3474 if self.plugin_args.string: 3475 temp_hits = self.search_chunks_for_needle( 3476 search_string=self.plugin_args.string, 3477 search_struct=self.plugin_args.search_struct) 3478 3479 for chunk, needles in temp_hits.iteritems(): 3480 if chunk in hits.keys(): 3481 hits[chunk].update(needles) 3482 3483 else: 3484 hits[chunk] = needles 3485 3486 3487 if self.plugin_args.regex: 3488 temp_hits = self.search_chunks_for_needle( 3489 search_regex=self.plugin_args.regex, 3490 search_struct=self.plugin_args.search_struct) 3491 3492 for chunk, needles in temp_hits.iteritems(): 3493 if chunk in hits.keys(): 3494 hits[chunk].update(needles) 3495 3496 else: 3497 hits[chunk] = needles 3498 3499 3500 3501 if self.plugin_args.chunk_addresses: 3502 # first we gather the chunks of interest 3503 base_chunks = self.get_chunks_for_addresses( 3504 self.plugin_args.chunk_addresses, 3505 ignore_prevsize=True) 3506 3507 if base_chunks: 3508 renderer.write( 3509 "\n\nWe found the following chunks for the " 3510 "given chunk_addresses (all other chunks will " 3511 "now be searched for pointers to them): \n\n") 3512 3513 else: 3514 renderer.write( 3515 "\n\nWe did not find any chunks for the given " 3516 "chunk_addresses.\n\n") 3517 3518 for base_chunk in base_chunks: 3519 renderer.write(base_chunk) 3520 renderer.write("\n\n") 3521 3522 start = base_chunk.v() 3523 if base_chunk.prev_inuse(): 3524 start += base_chunk.get_prev_size().obj_size 3525 3526 pointers = range( 3527 start, sum(base_chunk.start_and_length())) 3528 3529 # now, we search in all chunks for pointers to the 3530 # chunks of interest 3531 temp_hits = self.search_chunks_for_needle( 3532 pointers=pointers, 3533 search_struct=self.plugin_args.search_struct) 3534 3535 3536 # temp_hits: chunks that contain a pointer to one 3537 # of the chunks of interest and the pointer values 3538 for hit_chunk, data in temp_hits.iteritems(): 3539 if hit_chunk not in hits.keys(): 3540 hits[hit_chunk] = {base_chunk: data} 3541 3542 elif base_chunk not in hits[hit_chunk].keys(): 3543 hits[hit_chunk][base_chunk] = data 3544 3545 else: 3546 # Chunks as dict keys are not type safe. 3547 # E.g. if a chunk with the same base offset 3548 # as pointer exists as a key, a test such 3549 # as 'pointer in hits[hit_chunk].keys()' 3550 # will return True. 3551 # Hence, we use the following workaround 3552 # to update hits 3553 try: 3554 hits[hit_chunk][base_chunk].update( 3555 data) 3556 except KeyError: 3557 hits[hit_chunk][base_chunk] = data 3558 3559 if hits: 3560 renderer.write("{0:s} Search results {0:s}" 3561 .format('=' * 18)) 3562 3563 for chunk, needles in hits.iteritems(): 3564 renderer.write("\n\n") 3565 renderer.write("The chunk (0x{:X}) below contains:\n\n" 3566 .format(chunk.v())) 3567 3568 for needle, data in needles.iteritems(): 3569 if isinstance(needle, malloc_chunk): 3570 renderer.write( 3571 "The following pointers at the given " 3572 "offset(s) to the chunk at offset " 3573 "0x{:X}:\n".format(needle.v())) 3574 3575 renderer.write("Pointer Offset(s)\n") 3576 renderer.write("----------------------\n") 3577 for pointer, offsets in data.iteritems(): 3578 renderer.write( 3579 "0x{:X}: ".format(pointer) 3580 + ', '.join(["0x{:X}".format(x) for x 3581 in offsets])) 3582 3583 renderer.write("\n") 3584 3585 renderer.write("\n") 3586 3587 else: 3588 renderer.write( 3589 "The following needle at the given " 3590 "offset(s):\n") 3591 renderer.write("Needle Offset(s)\n") 3592 renderer.write("----------------------\n") 3593 3594 if len(data) <= 9: 3595 renderer.write( 3596 (hex(needle) if isinstance(needle, int) 3597 else needle) + ": " 3598 + ', '.join(["0x{:X}".format(x) for x 3599 in data])) 3600 3601 renderer.write("\n") 3602 else: 3603 renderer.write( 3604 (hex(needle) if isinstance(needle, int) 3605 else needle) + ": The needle has been" 3606 + "found on {0:d} offsets.\n" 3607 .format(len(data))) 3608 3609 renderer.write("\n") 3610 3611 renderer.write("\n\n") 3612 renderer.write(chunk) 3613 renderer.write("\n\n{:s}\n".format('=' * 60))
3614 3615 3616 __args = [ 3617 dict(name='pointers', type='ArrayIntParser', default=None, 3618 help=("Prints chunks that contain exactly the given pointer(s). " 3619 "The pointer(s) can be given as (hexa)decimal numbers.")), 3620 dict(name='chunk_addresses', type='ArrayIntParser', default=None, 3621 help=("Expects address(es) belonging to a chunk(s) of interest, " 3622 "and prints all chunks having a pointer somewhere into " 3623 "the data part of that chunk(s).")), 3624 dict(name='regex', type='str', default=None, 3625 help=("Searches all chunks with the given regex and prints " 3626 "all hits.")), 3627 dict(name='string', type='str', default=None, 3628 help=("Searches all chunks for the given string and prints " 3629 "all hits.")), 3630 dict(name='search_struct', type="Boolean", default=False, 3631 help=("Includes the malloc_struct fields in the search process " 3632 "such as size and fd/bk for bin chunks (but not its own " 3633 "prev_size field). This is normally not desired and hence " 3634 "deactivated by default.")) 3635 ]
3636
3637 3638 3639 -class HeapReferenceSearch(HeapAnalysis):
3640 """Examines the data part of the given chunk for references to other 3641 chunks.""" 3642 3643 __name = "heaprefs" 3644
3645 - def CreateAllocationMap(self, start, length):
3646 """Creates colorful hex map for pointers in a chunk""" 3647 3648 address_map = core.AddressMap() 3649 3650 if self.session.profile.metadata("arch") == 'I386': 3651 int_string = 'I' 3652 3653 else: 3654 int_string = 'Q' 3655 3656 offset_and_pointers = dict() 3657 3658 3659 # walks the chunk of interest and gathers all potential chunk pointers 3660 # with their offset within the chunk 3661 for i in range(start, start+length, 4): 3662 temp = struct.unpack(int_string, 3663 self.process_as.read(i, self._size_sz))[0] 3664 if temp == 0: 3665 continue 3666 3667 elif temp in offset_and_pointers.keys(): 3668 offset_and_pointers[temp].append(i) 3669 3670 else: 3671 offset_and_pointers[temp] = [i] 3672 3673 3674 # gathers a list of chunks, referenced by the potential chunk pointers 3675 chunks = self.get_chunks_for_addresses(offset_and_pointers.keys()) 3676 3677 for chunk, pointers in chunks.iteritems(): 3678 for pointer_offset, offsets in offset_and_pointers.iteritems(): 3679 for pointer in pointers: 3680 if pointer == pointer_offset: 3681 for offset in offsets: 3682 address_map.AddRange( 3683 offset, 3684 offset+self._size_sz, 3685 'Pointer to chunk at offset: 0x{:X}' 3686 .format(chunk.v()), 3687 color_index=self._get_next_color_index( 3688 chunk.v())) 3689 3690 return address_map
3691 3692
3693 - def _get_next_color_index(self, pointer):
3694 """Returns color index values that are easy to read on command line.""" 3695 3696 if pointer not in self._color_index_dict.keys(): 3697 self._current_color_index += 1 3698 3699 while self._current_color_index in self._color_index_blacklist: 3700 self._current_color_index += 1 3701 3702 self._color_index_dict[pointer] = self._current_color_index 3703 3704 return self._current_color_index 3705 3706 return self._color_index_dict[pointer]
3707 3708
3709 - def render(self, renderer):
3710 3711 for task in self.filter_processes(): 3712 if not task.mm: 3713 continue 3714 3715 if self.init_for_task(task): 3716 3717 # first we gather the chunk for the given pointer 3718 chunks = self.get_chunks_for_addresses( 3719 self.plugin_args.chunk_addresses, ignore_prevsize=True) 3720 3721 for chunk, pointers in chunks.iteritems(): 3722 renderer.write("\n\n") 3723 renderer.write( 3724 "Examining chunk at offset 0x{:X}, belonging to the " 3725 "given address(es): {:s}".format( 3726 chunk.v(), ', '.join([hex(x) for x in pointers]))) 3727 3728 renderer.write("\n\n") 3729 start, length = chunk.start_and_length() 3730 3731 if length % 4: 3732 self.session.logging.warn( 3733 "The chunk at offset 0x{:x} seems to have " 3734 "a length not divisable by 4. This is unexpected " 3735 "and indicates a fundamental error.".format(chunk.v())) 3736 3737 dump = self.session.plugins.dump( 3738 offset=start, length=length, 3739 address_map=self.CreateAllocationMap(start, length), 3740 address_space=self.process_as) 3741 3742 dump.render(renderer)
3743 3744 3745 __args = [ 3746 dict(name='chunk_addresses', type='ArrayIntParser', default=None, 3747 help=("The address(es) belonging to chunks of interest. Those " 3748 "chunks are then examined for references to other chunks.")) 3749 ] 3750 3751
3752 - def __init__(self, **kwargs):
3753 super(HeapReferenceSearch, self).__init__(**kwargs) 3754 self._current_color_index = 0 3755 self._color_index_dict = dict() 3756 self._color_index_blacklist = [10, 16, 18, 19, 22, 24, 25, 34]
3757
3758 3759 3760 -class malloc_chunk(obj.Struct):
3761 """Extends the malloc_chunk class""" 3762
3763 - def __init__(self, **kwargs):
3764 super(malloc_chunk, self).__init__(**kwargs) 3765 self._prev_size = None 3766 self._prev_inuse = None 3767 self._non_main_arena = None 3768 self._is_mmapped = None 3769 self._size = None 3770 self._chunksize = None 3771 self.is_bottom_chunk = False 3772 self.is_fastbin_chunk = False 3773 self.is_bin_chunk = False 3774 self.is_top_chunk = False 3775 3776 # since glibc 2.25, size and prev_size have been renamed 3777 if hasattr(self, 'mchunk_size'): 3778 self.size = self.mchunk_size 3779 self._size = self.size 3780 3781 if hasattr(self, 'mchunk_prev_size'): 3782 self.prev_size = self.mchunk_prev_size 3783 self._prev_size = self.prev_size
3784 3785 3786 ###################### Performance related functions ###################### 3787 ### Performance related function 3788 ### As retrieving data from the memory like the size field is pretty time 3789 ### consuming, these functions prevent repeated retrieval of values 3790 ### These functions save especially time when dealing with thousands of 3791 ### chunks. 3792 ###########################################################################
3793 - def get_size(self):
3794 """Returns the value of the size field, including potential bit flags. 3795 """ 3796 3797 if not self._size: 3798 self._size = self.size 3799 3800 return self._size
3801 3802
3803 - def get_prev_size(self):
3804 """Returns the value of the prev_size field.""" 3805 3806 if not self._prev_size: 3807 self._prev_size = self.prev_size 3808 3809 return self._prev_size
3810 3811 3812 ########################################################################### 3813 3814 ############ Taken from glibc-2.23/malloc/malloc.c ##################### 3815
3816 - def prev_inuse(self):
3817 """Returns True if this chunk has its PREV_INUSE bit set.""" 3818 3819 if not self._prev_inuse: 3820 self._prev_inuse = (self.get_size() & _PREV_INUSE) == _PREV_INUSE 3821 3822 return self._prev_inuse
3823 3824
3825 - def is_mmapped(self):
3826 """Returns True if the chunk has been obtained with mmap().""" 3827 3828 if not self._is_mmapped: 3829 self._is_mmapped = (self.get_size() & _IS_MMAPPED) == _IS_MMAPPED 3830 3831 return self._is_mmapped
3832 3833
3834 - def non_main_arena(self):
3835 """Returns True if current chunk does NOT belong to main_arena.""" 3836 3837 if not self._non_main_arena: 3838 self._non_main_arena = (self.get_size() & _NON_MAIN_ARENA) \ 3839 == _NON_MAIN_ARENA 3840 3841 return self._non_main_arena
3842 3843
3844 - def chunksize(self):
3845 """Returns the real size of a chunk, excluding bit flags.""" 3846 3847 # as it got called often, this little improvement can save several 3848 # seconds on a chunk count >100.000 3849 if not self._chunksize: 3850 self._chunksize = self.get_size() & ~ _SIZE_BITS 3851 3852 return self._chunksize
3853 3854 3855 ############################################################################### 3856
3857 - def is_allocated_chunk(self):
3858 """Returns True if this chunk is not a bottom, small/large bin, fastbin 3859 or top chunk.""" 3860 3861 return not self.is_fastbin_chunk and not self.is_bin_chunk and \ 3862 not self.is_top_chunk and not self.is_bottom_chunk
3863 3864
3865 - def is_freed_chunk(self):
3866 """Returns True if this chunk is a small/large bin, fastbin or top 3867 chunk.""" 3868 3869 return self.is_fastbin_chunk or self.is_bin_chunk or self.is_top_chunk
3870 3871 3872 # TODO make this function easier
3873 - def start_and_length(self):
3874 """Returns the relevant start offset and length for dumping.""" 3875 3876 # For allocated chunks, the fd pointer doesn't contain a pointer 3877 # but is the beginning of data 3878 data_offset = self.fd.obj_offset 3879 3880 # we are not testing whether or not the given length is in range of 3881 # the current chunk 3882 length = self.chunksize() 3883 3884 if self.is_bin_chunk: 3885 if self.chunksize() >= _MIN_LARGE_SIZE: 3886 data_offset = self.v() + self.obj_size 3887 3888 else: 3889 data_offset = self.fd_nextsize.obj_offset 3890 3891 # The data part of an allocated chunk reaches until the next 3892 # chunk's prev_size field. On freeing the current chunk, the next 3893 # chunk's prev_size field is overwritten with the size information 3894 # from this chunk and hence doesn't anymore contain useful data 3895 length -= self.prev_size.obj_size 3896 3897 elif self.is_fastbin_chunk: 3898 data_offset = self.bk.obj_offset 3899 3900 elif self.is_bottom_chunk: 3901 # bottom chunk specific subtraction to get the only part which 3902 # contains useful data 3903 length -= self.bk.obj_size 3904 3905 # this subtraction just compensates the addition done later on 3906 length -= self.prev_size.obj_size 3907 3908 elif self.is_top_chunk or self.is_mmapped(): 3909 # both chunks don't use the prev_size field of the next chunk 3910 length -= self.prev_size.obj_size 3911 3912 3913 # we first subtract the offset to the beginning of data 3914 length -= data_offset - self.v() 3915 3916 # and now add the size of the prev_size field, as the data part of any 3917 # chunk except bins, top, mmapped and bottom chunks reaches until the 3918 # next chunk's prev_size field 3919 length += self.prev_size.obj_size 3920 3921 return [data_offset, length]
3922 3923 3924
3925 - def to_string(self, length=None, offset=None):
3926 """Returns a string containing the data part of the given allocated 3927 chunk. 3928 The length parameter is intended only for printing shorter 3929 parts of the current chunk. 3930 The offset makes only sense in combination with the length parameter 3931 and starts from the beginning of the chunk, so an offset of 4 on a 3932 32 bit architecture starts on the size member.""" 3933 3934 data_offset = None 3935 size = None 3936 3937 if not length or not offset: 3938 start, leng = self.start_and_length() 3939 data_offset = self.v() + offset if offset else start 3940 size = length if length else leng 3941 3942 3943 if size <= 0: 3944 return "" 3945 3946 data = self.obj_vm.read(data_offset, size) 3947 3948 if not data: 3949 return obj.NoneObject("Unable to read {0} bytes from {1}", 3950 size, data_offset) 3951 3952 return data
3953 3954
3955 - def next_chunk(self):
3956 """Returns the following chunk.""" 3957 3958 return self.obj_profile.malloc_chunk(self.v() + self.chunksize(), 3959 vm=self.obj_vm)
3960
3961 - def is_in_use(self):
3962 """Returns true, if this chunk is in use: the next chunk's PREV_INUSE 3963 flag is set.""" 3964 3965 return self.next_chunk().prev_inuse()
3966 3967
3968 - def next_chunk_generator(self):
3969 """Returns all following chunks, beginning with the current.""" 3970 3971 yield self 3972 3973 next_chunk = self.next_chunk() 3974 3975 # We expect the last chunk to have null size field. 3976 # Further circumstances must be checked in calling functions. 3977 while next_chunk.get_size() != 0: 3978 yield next_chunk 3979 next_chunk = self.obj_profile.malloc_chunk( 3980 next_chunk.v() + next_chunk.chunksize(), vm=self.obj_vm) 3981 3982 # TODO at the moment we return the last chunk, as other functions 3983 # rely on it for tests; this should be changed in a future release 3984 yield next_chunk
3985
3986 3987 3988 -class _heap_info(obj.Struct):
3989 """Extends the heap_info class""" 3990
3991 - def __init__(self, **kwargs):
3992 super(_heap_info, self).__init__(**kwargs) 3993 self.first_chunk = None
3994
3995 3996 -class malloc_state(obj.Struct):
3997 """Extends the malloc_state class""" 3998
3999 - def __init__(self, **kwargs):
4000 super(malloc_state, self).__init__(**kwargs) 4001 4002 # only used on non main_arenas 4003 self.heaps = list() 4004 4005 self.is_main_arena = False 4006 4007 # only used with main_arena 4008 self.first_chunk = None 4009 4010 # only used with main_arena 4011 self.mmapped_first_chunks = None 4012 4013 # result from walking the fastbinsY lists 4014 self.freed_fast_chunks = list(self.get_freed_chunks_fastbins()) 4015 4016 # result from walking the bins lists 4017 self.freed_chunks = list(self.get_freed_chunks_bins()) 4018 4019 # We generally use this variable instead of the struct field. 4020 # The reason is the scenario, in which we didn't find any main_arena 4021 # and need to set up a dummy arena 4022 self.top_chunk = None 4023 if self.top: 4024 self.top_chunk = self.top.dereference() 4025 self.top_chunk.is_top_chunk = True 4026 4027 # both allocated chunks lists are used for performance improvements 4028 # on analysis systems with few memory resources the usage of these 4029 # lists is discouraged as some processes can have >400000 chunks which 4030 # e.g. results in 1.6 GB memory usage 4031 self.allocated_chunks = None 4032 4033 # only used with main_arena 4034 self.allocated_mmapped_chunks = None
4035 4036
4037 - def get_freed_chunks_fastbins(self):
4038 """Returns all freed chunks referenced by the fastbins.""" 4039 4040 for fastbin in self.fastbinsY: 4041 for fast_chunk in fastbin.walk_list("fd"): 4042 fast_chunk.is_fastbin_chunk = True 4043 yield fast_chunk
4044 4045
4046 - def get_freed_chunks_bins(self):
4047 """Returns all freed chunks referenced by the bins.""" 4048 4049 # as every second pointer is the backwards pointer to the same list of 4050 # freed chunks, we simply skip ahead this second pointer 4051 for _bin in self.bins[::2]: 4052 for free_chunk in _bin.walk_list("fd"): 4053 if not self.v() <= free_chunk.v() \ 4054 < (self.v() + self.struct_size): 4055 4056 free_chunk.is_bin_chunk = True 4057 yield free_chunk
4058
4059 4060 4061 -class GlibcProfile32(basic.Profile32Bits, basic.BasicClasses):
4062 """Profile to parse basic Glibc structures.""" 4063 4064 __abstract = True 4065 4066 # types come from Glibc's malloc/malloc.c 4067 # for simplicity, we just don't use the new chunk size/prev_size names 4068 # coming with 2.25 4069 glibc_base_vtype_32 = { 4070 "malloc_chunk": [24, { 4071 "bk": [12, ["Pointer", { 4072 "target": "malloc_chunk", 4073 "target_args": None 4074 }]], 4075 "bk_nextsize": [20, ["Pointer", { 4076 "target": "malloc_chunk", 4077 "target_args": None 4078 }]], 4079 "fd": [8, ["Pointer", { 4080 "target": "malloc_chunk", 4081 "target_args": None 4082 }]], 4083 "fd_nextsize": [16, ["Pointer", { 4084 "target": "malloc_chunk", 4085 "target_args": None 4086 }]], 4087 "prev_size": [0, ["unsigned int"]], 4088 "size": [4, ["unsigned int"]] 4089 }], 4090 "_heap_info": [16, { 4091 "ar_ptr": [0, ["Pointer", { 4092 "target": "malloc_state", 4093 "target_args": None 4094 }]], 4095 "mprotect_size": [12, ["unsigned int"]], 4096 "pad": [16, ["Array", { 4097 "count": 0, 4098 "target": "char", 4099 "target_args": None 4100 }]], 4101 "prev": [4, ["Pointer", { 4102 "target": "_heap_info", 4103 "target_args": None 4104 }]], 4105 "size": [8, ["unsigned int"]] 4106 }] 4107 } 4108 4109 mp_224_vtype_32 = { 4110 "malloc_par": [48, { 4111 "arena_max": [16, ["unsigned int"]], 4112 "arena_test": [12, ["unsigned int"]], 4113 "max_mmapped_mem": [40, ["unsigned int"]], 4114 "max_n_mmaps": [28, ["int"]], 4115 "mmap_threshold": [8, ["unsigned int"]], 4116 "mmapped_mem": [36, ["unsigned int"]], 4117 "n_mmaps": [20, ["int"]], 4118 "n_mmaps_max": [24, ["int"]], 4119 "no_dyn_threshold": [32, ["int"]], 4120 "sbrk_base": [44, ["Pointer", { 4121 "target": "char", 4122 "target_args": None 4123 }]], 4124 "top_pad": [4, ["unsigned int"]], 4125 "trim_threshold": [0, ["long unsigned int"]] 4126 }] 4127 } 4128 4129 mp_220_vtype_32 = { 4130 "malloc_par": [52, { 4131 "arena_max": [16, ["unsigned int"]], 4132 "arena_test": [12, ["unsigned int"]], 4133 "max_mmapped_mem": [40, ["unsigned int"]], 4134 "max_n_mmaps": [28, ["int"]], 4135 "max_total_mem": [44, ["unsigned int"]], 4136 "mmap_threshold": [8, ["unsigned int"]], 4137 "mmapped_mem": [36, ["unsigned int"]], 4138 "n_mmaps": [20, ["int"]], 4139 "n_mmaps_max": [24, ["int"]], 4140 "no_dyn_threshold": [32, ["int"]], 4141 "sbrk_base": [48, ["Pointer", { 4142 "target": "char", 4143 "target_args": None 4144 }]], 4145 "top_pad": [4, ["unsigned int"]], 4146 "trim_threshold": [0, ["long unsigned int"]] 4147 }] 4148 } 4149 4150 ms_223_vtype_32 = { 4151 "malloc_state": [1108, { 4152 "attached_threads": [1096, ["unsigned int"]], 4153 "binmap": [1072, ["Array", { 4154 "count": 4, 4155 "target": "unsigned int", 4156 "target_args": None 4157 }]], 4158 "bins": [56, ["Array", { 4159 "count": 254, 4160 "target": "Pointer", 4161 "target_args": { 4162 "target": "malloc_chunk", 4163 "target_args": None 4164 } 4165 }]], 4166 "fastbinsY": [8, ["Array", { 4167 "count": 10, 4168 "target": "Pointer", 4169 "target_args": { 4170 "target": "malloc_chunk", 4171 "target_args": None 4172 } 4173 }]], 4174 "flags": [4, ["int"]], 4175 "last_remainder": [52, ["Pointer", { 4176 "target": "malloc_chunk", 4177 "target_args": None 4178 }]], 4179 "max_system_mem": [1104, ["unsigned int"]], 4180 "mutex": [0, ["int"]], 4181 "next": [1088, ["Pointer", { 4182 "target": "malloc_state", 4183 "target_args": None 4184 }]], 4185 "next_free": [1092, ["Pointer", { 4186 "target": "malloc_state", 4187 "target_args": None 4188 }]], 4189 "system_mem": [1100, ["unsigned int"]], 4190 "top": [48, ["Pointer", { 4191 "target": "malloc_chunk", 4192 "target_args": None 4193 }]] 4194 }] 4195 } 4196 4197 4198 ms_220_vtype_32 = { 4199 "malloc_state": [1104, { 4200 "binmap": [1072, ["Array", { 4201 "count": 4, 4202 "target": "unsigned int", 4203 "target_args": None 4204 }]], 4205 "bins": [56, ["Array", { 4206 "count": 254, 4207 "target": "Pointer", 4208 "target_args": { 4209 "target": "malloc_chunk", 4210 "target_args": None 4211 } 4212 }]], 4213 "fastbinsY": [8, ["Array", { 4214 "count": 10, 4215 "target": "Pointer", 4216 "target_args": { 4217 "target": "malloc_chunk", 4218 "target_args": None 4219 } 4220 }]], 4221 "flags": [4, ["int"]], 4222 "last_remainder": [52, ["Pointer", { 4223 "target": "malloc_chunk", 4224 "target_args": None 4225 }]], 4226 "max_system_mem": [1100, ["unsigned int"]], 4227 "mutex": [0, ["int"]], 4228 "next": [1088, ["Pointer", { 4229 "target": "malloc_state", 4230 "target_args": None 4231 }]], 4232 "next_free": [1092, ["Pointer", { 4233 "target": "malloc_state", 4234 "target_args": None 4235 }]], 4236 "system_mem": [1096, ["unsigned int"]], 4237 "top": [48, ["Pointer", { 4238 "target": "malloc_chunk", 4239 "target_args": None 4240 }]] 4241 }] 4242 } 4243 4244 version_dict = { 4245 '220': [glibc_base_vtype_32, ms_220_vtype_32, mp_220_vtype_32], 4246 '223': [glibc_base_vtype_32, ms_223_vtype_32, mp_220_vtype_32], 4247 '224': [glibc_base_vtype_32, ms_223_vtype_32, mp_224_vtype_32] 4248 } 4249 4250
4251 - def __init__(self, version=None, **kwargs):
4252 super(GlibcProfile32, self).__init__(**kwargs) 4253 profile = dict() 4254 4255 # at the moment: either 2.24 (2.25 is similar to 2.24), 2.23 or < 2.23 4256 if version: 4257 try: 4258 self.session.logging.info( 4259 "We are using I386 glibc profile version {:s}" 4260 .format(version)) 4261 4262 for vtypes in self.version_dict[version]: 4263 profile.update(vtypes) 4264 4265 except KeyError: 4266 self.session.logging.warn( 4267 "The given version string: {:s} is not in our dict. " 4268 "This is unexpected.".format(version)) 4269 4270 if not profile: 4271 # the default profile to use 4272 self.session.logging.info( 4273 "We are using the I386 default glibc profile version 2.24") 4274 4275 for vtypes in self.version_dict['224']: 4276 profile.update(vtypes) 4277 4278 self.add_types(profile)
4279
4280 4281 -class GlibcProfile64(basic.ProfileLP64, basic.BasicClasses):
4282 """Profile to parse basic Glibc structures.""" 4283 4284 __abstract = True 4285 4286 # types come from Glibc's malloc/malloc.c 4287 glibc_base_vtype_64 = { 4288 "malloc_chunk": [48, { 4289 "bk": [24, ["Pointer", { 4290 "target": "malloc_chunk", 4291 "target_args": None 4292 }]], 4293 "bk_nextsize": [40, ["Pointer", { 4294 "target": "malloc_chunk", 4295 "target_args": None 4296 }]], 4297 "fd": [16, ["Pointer", { 4298 "target": "malloc_chunk", 4299 "target_args": None 4300 }]], 4301 "fd_nextsize": [32, ["Pointer", { 4302 "target": "malloc_chunk", 4303 "target_args": None 4304 }]], 4305 "prev_size": [0, ["long unsigned int"]], 4306 "size": [8, ["long unsigned int"]] 4307 }], 4308 "_heap_info": [32, { 4309 "ar_ptr": [0, ["Pointer", { 4310 "target": "malloc_state", 4311 "target_args": None 4312 }]], 4313 "mprotect_size": [24, ["long unsigned int"]], 4314 "pad": [32, ["Array", { 4315 "count": 0, 4316 "target": "char", 4317 "target_args": None 4318 }]], 4319 "prev": [8, ["Pointer", { 4320 "target": "_heap_info", 4321 "target_args": None 4322 }]], 4323 "size": [16, ["long unsigned int"]] 4324 }] 4325 } 4326 4327 mp_220_vtype_64 = { 4328 "malloc_par": [88, { 4329 "arena_max": [32, ["long unsigned int"]], 4330 "arena_test": [24, ["long unsigned int"]], 4331 "max_mmapped_mem": [64, ["long unsigned int"]], 4332 "max_n_mmaps": [48, ["int"]], 4333 "max_total_mem": [72, ["long unsigned int"]], 4334 "mmap_threshold": [16, ["long unsigned int"]], 4335 "mmapped_mem": [56, ["long unsigned int"]], 4336 "n_mmaps": [40, ["int"]], 4337 "n_mmaps_max": [44, ["int"]], 4338 "no_dyn_threshold": [52, ["int"]], 4339 "sbrk_base": [80, ["Pointer", { 4340 "target": "char", 4341 "target_args": None 4342 }]], 4343 "top_pad": [8, ["long unsigned int"]], 4344 "trim_threshold": [0, ["long unsigned int"]] 4345 }] 4346 } 4347 4348 mp_224_vtype_64 = { 4349 "malloc_par": [80, { 4350 "arena_max": [32, ["long unsigned int"]], 4351 "arena_test": [24, ["long unsigned int"]], 4352 "max_mmapped_mem": [64, ["long unsigned int"]], 4353 "max_n_mmaps": [48, ["int"]], 4354 "mmap_threshold": [16, ["long unsigned int"]], 4355 "mmapped_mem": [56, ["long unsigned int"]], 4356 "n_mmaps": [40, ["int"]], 4357 "n_mmaps_max": [44, ["int"]], 4358 "no_dyn_threshold": [52, ["int"]], 4359 "sbrk_base": [72, ["Pointer", { 4360 "target": "char", 4361 "target_args": None 4362 }]], 4363 "top_pad": [8, ["long unsigned int"]], 4364 "trim_threshold": [0, ["long unsigned int"]] 4365 }] 4366 } 4367 4368 ms_223_vtype_64 = { 4369 "malloc_state": [2192, { 4370 "mutex": [0, ["int"]], 4371 "flags": [4, ["int"]], 4372 "fastbinsY": [8, ["Array", { 4373 "count": 10, 4374 "target": "Pointer", 4375 "target_args": { 4376 "target": "malloc_chunk", 4377 "target_args": None 4378 } 4379 }]], 4380 "top": [88, ["Pointer", { 4381 "target": "malloc_chunk", 4382 "target_args": None 4383 }]], 4384 "last_remainder": [96, ["Pointer", { 4385 "target": "malloc_chunk", 4386 "target_args": None 4387 }]], 4388 "bins": [104, ["Array", { 4389 "count": 254, 4390 "target": "Pointer", 4391 "target_args": { 4392 "target": "malloc_chunk", 4393 "target_args": None 4394 } 4395 }]], 4396 "binmap": [2136, ["Array", { 4397 "count": 4, 4398 "target": "unsigned int", 4399 "target_args": None 4400 }]], 4401 "next": [2152, ["Pointer", { 4402 "target": "malloc_state", 4403 "target_args": None 4404 }]], 4405 "next_free": [2160, ["Pointer", { 4406 "target": "malloc_state", 4407 "target_args": None 4408 }]], 4409 "attached_threads": [2168, ["long unsigned int"]], 4410 "system_mem": [2176, ["long unsigned int"]], 4411 "max_system_mem": [2184, ["long unsigned int"]] 4412 }] 4413 } 4414 4415 ms_220_vtype_64 = { 4416 "malloc_state": [2184, { 4417 "mutex": [0, ["int"]], 4418 "flags": [4, ["int"]], 4419 "fastbinsY": [8, ["Array", { 4420 "count": 10, 4421 "target": "Pointer", 4422 "target_args": { 4423 "target": "malloc_chunk", 4424 "target_args": None 4425 } 4426 }]], 4427 "top": [88, ["Pointer", { 4428 "target": "malloc_chunk", 4429 "target_args": None 4430 }]], 4431 "last_remainder": [96, ["Pointer", { 4432 "target": "malloc_chunk", 4433 "target_args": None 4434 }]], 4435 "bins": [104, ["Array", { 4436 "count": 254, 4437 "target": "Pointer", 4438 "target_args": { 4439 "target": "malloc_chunk", 4440 "target_args": None 4441 } 4442 }]], 4443 "binmap": [2136, ["Array", { 4444 "count": 4, 4445 "target": "unsigned int", 4446 "target_args": None 4447 }]], 4448 "next": [2152, ["Pointer", { 4449 "target": "malloc_state", 4450 "target_args": None 4451 }]], 4452 "next_free": [2160, ["Pointer", { 4453 "target": "malloc_state", 4454 "target_args": None 4455 }]], 4456 "system_mem": [2168, ["long unsigned int"]], 4457 "max_system_mem": [2176, ["long unsigned int"]] 4458 }] 4459 } 4460 4461 version_dict = { 4462 '220': [glibc_base_vtype_64, ms_220_vtype_64, mp_220_vtype_64], 4463 '223': [glibc_base_vtype_64, ms_223_vtype_64, mp_220_vtype_64], 4464 '224': [glibc_base_vtype_64, ms_223_vtype_64, mp_224_vtype_64] 4465 } 4466 4467
4468 - def __init__(self, version=None, **kwargs):
4469 super(GlibcProfile64, self).__init__(**kwargs) 4470 profile = self.glibc_base_vtype_64 4471 4472 # at the moment: either 2.24 (2.25 is similar to 2.24), 2.23 or < 2.23 4473 if version: 4474 try: 4475 self.session.logging.info( 4476 "We are using AMD64 glibc profile version {:s}" 4477 .format(version)) 4478 4479 for vtypes in self.version_dict[version]: 4480 profile.update(vtypes) 4481 4482 except KeyError: 4483 self.session.logging.warn( 4484 "The given version string: {:s} is not in our dict. " 4485 "This is unexpected.".format(version)) 4486 4487 if not profile: 4488 # the default profile to use 4489 self.session.logging.info( 4490 "We are using the AMD64 default glibc profile version 2.24") 4491 4492 for vtypes in self.version_dict['224']: 4493 profile.update(vtypes) 4494 4495 self.add_types(profile)
4496