Package rekall :: Module addrspace
[frames] | no frames]

Source Code for Module rekall.addrspace

  1  # Rekall Memory Forensics 
  2  # Copyright (C) 2007,2008 Volatile Systems 
  3  # Copyright 2013 Google Inc. All Rights Reserved. 
  4  # 
  5  # Original Source: 
  6  # Copyright (C) 2004,2005,2006 4tphi Research 
  7  # Author: {npetroni,awalters}@4tphi.net (Nick Petroni and AAron Walters) 
  8  # 
  9  # This program is free software; you can redistribute it and/or modify 
 10  # it under the terms of the GNU General Public License as published by 
 11  # the Free Software Foundation; either version 2 of the License, or (at 
 12  # your option) any later version. 
 13  # 
 14  # This program is distributed in the hope that it will be useful, but 
 15  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 16  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
 17  # General Public License for more details. 
 18  # 
 19  # You should have received a copy of the GNU General Public License 
 20  # along with this program; if not, write to the Free Software 
 21  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
 22  # 
 23   
 24  """ 
 25  @author:       AAron Walters 
 26  @license:      GNU General Public License 2.0 or later 
 27  @contact:      awalters@volatilesystems.com 
 28  @organization: Volatile Systems 
 29   
 30     Alias for all address spaces 
 31   
 32  """ 
 33  from rekall_lib import registry 
 34  from rekall_lib import utils 
35 36 37 -class Zeroer(object):
38 - def __init__(self):
39 self.store = utils.FastStore(10, lock=True)
40
41 - def GetZeros(self, length):
42 try: 43 return self.store.Get(length) 44 except KeyError: 45 zeros = "\x00" * length 46 self.store.Put(length, zeros) 47 return zeros
48 49 50 # Keep a bunch of zeros around for speed. 51 ZEROER = Zeroer()
52 53 54 -class TranslationLookasideBuffer(object):
55 """An implementation of a TLB. 56 57 This can be used by an address space to cache translations. 58 """ 59 60 PAGE_SHIFT = 12 61 PAGE_ALIGNMENT = (1 << PAGE_SHIFT) - 1 62 PAGE_MASK = ~ PAGE_ALIGNMENT 63
64 - def __init__(self, max_size=10):
65 self.page_cache = utils.FastStore(max_size)
66
67 - def Get(self, vaddr):
68 """Returns the cached physical address for this virtual address.""" 69 70 # The cache only stores page aligned virtual addresses. We add the page 71 # offset to the physical addresses automatically. 72 result = self.page_cache.Get(vaddr & self.PAGE_MASK) 73 74 # None is a valid cached value, it means no mapping exists. 75 if result is not None: 76 return result + (vaddr & self.PAGE_ALIGNMENT)
77
78 - def Put(self, vaddr, paddr):
79 if vaddr & self.PAGE_ALIGNMENT: 80 raise TypeError("TLB must only cache aligned virtual addresses.") 81 82 self.page_cache.Put(vaddr, paddr)
83
84 85 -class Run(object):
86 """A container for runs.""" 87 __slots__ = ("start", "end", "address_space", "file_offset", "data") 88
89 - def __init__(self, start=None, end=None, address_space=None, 90 file_offset=None, data=None):
91 self.start = start 92 self.end = end 93 self.address_space = address_space 94 self.file_offset = file_offset 95 self.data = data
96 97 @utils.safe_property
98 - def length(self):
99 return self.end - self.start
100 101 @length.setter
102 - def length(self, value):
103 self.end = self.start + value
104
105 - def copy(self, **kw):
106 kwargs = dict(start=self.start, end=self.end, 107 address_space=self.address_space, 108 file_offset=self.file_offset, 109 data=self.data) 110 kwargs.update(kw) 111 112 return self.__class__(**kwargs)
113
114 - def __str__(self):
115 if self.file_offset is None: 116 return u"<%#x, %#x>" % (self.start, self.end) 117 118 return u"<%#x, %#x> -> %#x @ %s" % ( 119 self.start, self.end, self.file_offset, 120 self.address_space)
121
122 123 -class BaseAddressSpace(object):
124 """ This is the base class of all Address Spaces. """ 125 126 __metaclass__ = registry.MetaclassRegistry 127 __abstract = True 128 129 order = 10 130 131 # This can be used to name the address space (e.g. process if etc). 132 name = "" 133 134 # Some useful metadata for address spaces. 135 136 # This signifies that this address space normally operates on memory 137 # images. This flag controls if this address space will participate in 138 # address space autoselection for image detection. Note that it can not be 139 # inherited but must be explicitly set. 140 __image = False 141 142 # This flag signifies whether this address space's contents are likely to 143 # change between reads. If an address space is NOT volatile (this flag is 144 # False) then reads from the same offset MUST always return the same bytes. 145 volatile = False 146 147 # This flag signifies whether this address space is for a virtual machine. 148 virtualized = False 149
150 - def __init__(self, base=None, session=None, profile=None, **_):
151 """Base is the AS we will be stacking on top of, opts are options which 152 we may use. 153 154 Args: 155 base: A base address space to stack on top of (i.e. delegate to it for 156 satisfying read requests). 157 158 session: An optional session object. 159 160 profile: An optional profile to use for parsing the address space 161 (e.g. needed for hibernation, crash etc.) 162 """ 163 if session is None and base is not None: 164 session = base.session 165 166 self.base = base 167 if base: 168 self.volatile = self.base.volatile 169 170 self.profile = profile 171 self.session = session 172 if session is None: 173 raise RuntimeError("Session must be provided.")
174
175 - def as_assert(self, assertion, error=None):
176 """Duplicate for the assert command (so that optimizations don't disable 177 them) 178 179 It had to be called as_assert, since assert is a keyword 180 """ 181 if not assertion: 182 raise ASAssertionError( 183 error or "Instantiation failed for unspecified reason")
184
185 - def describe(self, addr):
186 """Return a string describing an address.""" 187 return "%#x" % addr
188
189 - def read(self, unused_addr, length):
190 """Should be overridden by derived classes.""" 191 if length > self.session.GetParameter("buffer_size"): 192 raise IOError("Too much data to read.") 193 194 return ZEROER.GetZeros(length)
195
196 - def get_mappings(self, start=0, end=2**64):
197 """Generates a sequence of Run() objects. 198 199 Each Run object describes a single range transformation from this 200 address space to another address space at a potentially different 201 mapped_offset. 202 203 Runs are assumed to not overlap and are generated in increasing order. 204 205 Args: 206 start: The suggested start address we are interested in. This function 207 may omit runs that lie entirely below this start address. Note: 208 Runs are not adjusted to begin at the start address - it may be 209 possible that this method returns a run which starts earlier than 210 the specified start address. 211 """ 212 _ = start 213 _ = end 214 return []
215
216 - def end(self):
217 runs = list(self.get_mappings()) 218 if runs: 219 last_run = runs[-1] 220 return last_run.end 221 return 0
222
223 - def get_address_ranges(self, start=0, end=0xfffffffffffff):
224 """Generates the runs which fall between start and end. 225 226 Note that start and end are here specified in the virtual address 227 space. More importantly this does not say anything about the pages in 228 the physical address space - just because pages in the virtual address 229 space are contiguous does not mean they are also contiguous in the 230 physical address space. 231 232 Yields: 233 Run objects describing merged virtual address ranges. NOTE: These runs 234 do not have file_offset or address_space members since the file_offset 235 is not the same across the entire range and therefore it does not make 236 sense to directly read the base address space - If you want to do 237 this, use merge_base_ranges() instead. 238 """ 239 last_voffset = last_voffset_end = 0 240 241 for run in self.get_mappings(start=start, end=end): 242 # No more runs apply. 243 if run.start > end: 244 break 245 246 if run.start < start: 247 # We dont care about the file_offset here since it will be 248 # dropped later. 249 run = run.copy(start=start) 250 251 # This can take some time as we enumerate all the address ranges. 252 self.session.report_progress( 253 "%(name)s: Merging Address Ranges %(offset)#x %(spinner)s", 254 offset=run.start, name=self.name) 255 256 # Extend the last range if this range starts at the end of the last 257 # one. 258 if run.start == last_voffset_end: 259 last_voffset_end = run.end 260 261 else: 262 # Emit the last range 263 if last_voffset_end > last_voffset: 264 yield Run(start=last_voffset, 265 end=last_voffset_end) 266 267 # Reset the contiguous range. 268 last_voffset = run.start 269 last_voffset_end = min(run.end, end) 270 271 if last_voffset_end > last_voffset: 272 yield Run(start=last_voffset, end=last_voffset_end)
273
274 - def merge_base_ranges(self, start=0, end=0xfffffffffffff):
275 """Generates merged address ranges from get_mapping(). 276 277 This method is subtly different from get_address_ranges in that runs are 278 contiguous in the base address space, hence the yielded runs have a 279 valid file_offset member. Callers can safely issue read operations to 280 the address space. 281 282 Yields: 283 runs which are contiguous in the base address space. This function 284 is designed to produce ranges more optimized for reducing the number 285 of read operations from the underlying base address space. 286 287 """ 288 contiguous_voffset = 0 289 contiguous_voffset_end = 0 290 contiguous_poffset = 0 291 last_run_length = 0 292 last_as = None 293 294 for run in self.get_mappings(start=start, end=end): 295 # No more runs apply. 296 if end and run.start > end: 297 break 298 299 if run.start < start: 300 run = run.copy( 301 start=start, 302 file_offset=run.file_offset + start - run.start) 303 304 # This can take some time as we enumerate all the address ranges. 305 self.session.report_progress( 306 "%(name)s: Merging Address Ranges %(offset)#x %(spinner)s", 307 offset=run.start, name=self.name) 308 309 # Try to join up adjacent pages as much as possible. 310 if (run.start == contiguous_voffset_end and 311 run.file_offset == contiguous_poffset + last_run_length and 312 run.address_space is last_as): 313 contiguous_voffset_end = min(run.end, end) 314 last_run_length = contiguous_voffset_end - contiguous_voffset 315 last_as = run.address_space 316 317 else: 318 if last_run_length > 0: 319 yield Run(start=contiguous_voffset, 320 end=contiguous_voffset_end, 321 address_space=last_as, 322 file_offset=contiguous_poffset) 323 324 # Reset the contiguous range. 325 contiguous_voffset = run.start 326 contiguous_voffset_end = min(run.end, end) 327 contiguous_poffset = run.file_offset or 0 328 last_run_length = contiguous_voffset_end - contiguous_voffset 329 last_as = run.address_space 330 331 if last_run_length > 0: 332 yield Run(start=contiguous_voffset, 333 end=contiguous_voffset_end, 334 address_space=last_as, 335 file_offset=contiguous_poffset)
336
337 - def is_valid_address(self, _addr):
338 """Tell us if the address is valid """ 339 return True
340
341 - def write(self, addr, buf):
342 """Write to the address space, if writable. 343 344 The default behavior is to delegate the write to the base address space. 345 If an address space has no base then this function will throw an 346 IOError. Address spaces that actually implement writing should override. 347 348 Raises: 349 IOError if there is no base address space. Subclasses may raise 350 under additional circumstances. 351 352 Arguments: 353 addr: The address to write at, as understood by this AS (i.e. 354 a virtual address for virtual address spaces, physical for 355 physical). 356 buf: The data to write - most commonly a basestring instance. 357 358 Returns: 359 Number of bytes written. 360 """ 361 if not self.base: 362 raise IOError("No base address space set on %r." % self) 363 364 return self.base.write(self.vtop(addr), buf)
365
366 - def vtop(self, addr):
367 """Return the physical address of this virtual address.""" 368 # For physical address spaces, this is a noop. 369 return addr
370
371 - def vtop_run(self, addr):
372 """Returns a Run object describing where addr can be read from.""" 373 return Run(start=addr, 374 end=addr, 375 address_space=self, 376 file_offset=addr)
377 378 @classmethod
379 - def metadata(cls, name, default=None):
380 """Obtain metadata about this address space.""" 381 return getattr(cls, "_%s__%s" % (cls.__name__, name), default)
382
383 - def __unicode__(self):
384 return self.__class__.__name__
385
386 - def __str__(self):
387 return utils.SmartStr(self)
388
389 - def __repr__(self):
390 return "<%s @ %#x %s>" % ( 391 self.__class__.__name__, hash(self), self.name)
392
393 - def __eq__(self, other):
394 return (isinstance(other, self.__class__) and 395 self.base == other.base)
396
397 - def get_file_address_space(self, filename):
398 """Implement this to return an address space for filename."""
399
400 - def get_mapped_offset(self, filename, offset):
401 """Implement this if we can map files into this address space."""
402
403 - def ConfigureSession(self, session_obj):
404 """Implement this method if you need to configure the session."""
405
406 - def close(self):
407 pass
408
409 410 -class BufferAddressSpace(BaseAddressSpace):
411 """Specialized address space for internal use. 412 413 Provides transparent reads through to a string buffer, so that profile 414 types can be instantiated on top of strings. 415 """ 416 __image = False 417 418 @utils.safe_property
419 - def writable(self):
420 """Buffer AS is always writable, no matter what the session says.""" 421 return True
422
423 - def __init__(self, base_offset=0, data='', **kwargs):
424 super(BufferAddressSpace, self).__init__(**kwargs) 425 self.fname = "Buffer" 426 self.data = data 427 self.base_offset = base_offset
428
429 - def assign_buffer(self, data, base_offset=0):
430 self.base_offset = base_offset 431 self.data = data
432
433 - def is_valid_address(self, addr):
434 return not (addr < self.base_offset or addr > self.base_offset + 435 len(self.data))
436
437 - def read(self, addr, length):
438 offset = addr - self.base_offset 439 data = self.data[offset: offset + length] 440 return data + ZEROER.GetZeros(length - len(data))
441
442 - def write(self, addr, data):
443 if addr > len(self.data): 444 raise ValueError( 445 "Cannot write to offset %d of buffer with size %d." % 446 (addr, len(self.data))) 447 self.data = self.data[:addr] + data + self.data[addr + len(data):] 448 return len(data)
449
450 - def get_mappings(self, start=None, end=2**64):
451 if self.end > start and self.end < end: 452 yield Run(start=self.base_offset, 453 end=self.end, 454 file_offset=self.base_offset, 455 address_space=self)
456
457 - def get_buffer_offset(self, offset):
458 """Returns the offset in self.data for the virtual offset.""" 459 return offset - self.base_offset
460
461 - def __repr__(self):
462 return "<%s @ %#x %s [%#X-%#X]>" % ( 463 self.__class__.__name__, hash(self), self.name, 464 self.base_offset, self.end())
465
466 - def __len__(self):
467 return len(self.data)
468
469 - def end(self):
470 """Return the end address of the buffer.""" 471 return self.base_offset + len(self.data)
472
473 474 -class CachingAddressSpaceMixIn(object):
475 # The size of chunks we cache. This should be large enough to make file 476 # reads efficient. 477 CHUNK_SIZE = 32 * 1024 478 CACHE_SIZE = 10 479
480 - def __init__(self, **kwargs):
481 super(CachingAddressSpaceMixIn, self).__init__(**kwargs) 482 self._cache = utils.FastStore(self.CACHE_SIZE)
483
484 - def read(self, addr, length):
485 addr, length = int(addr), int(length) 486 487 result = "" 488 while length > 0: 489 data = self.read_partial(addr, length) 490 if not data: 491 break 492 493 result += data 494 length -= len(data) 495 addr += len(data) 496 497 return result
498
499 - def cached_read_partial(self, addr, length):
500 """Implement this to allow the caching mixin to cache these reads.""" 501 # By default call the next read_partial in the inheritance tree. 502 return super(CachingAddressSpaceMixIn, self).read(addr, length)
503
504 - def read_partial(self, addr, length):
505 if addr == None: 506 return addr 507 508 chunk_number = addr / self.CHUNK_SIZE 509 chunk_offset = addr % self.CHUNK_SIZE 510 511 # Do not cache large reads but still pad them to CHUNK_SIZE. 512 if chunk_offset == 0 and length > self.CHUNK_SIZE: 513 # Deliberately do a short read to avoid copying. 514 to_read = length - length % self.CHUNK_SIZE 515 return self.cached_read_partial(addr, to_read) 516 517 available_length = min(length, self.CHUNK_SIZE - chunk_offset) 518 519 try: 520 data = self._cache.Get(chunk_number) 521 except KeyError: 522 # Just read the data from the real class. 523 data = self.cached_read_partial( 524 chunk_number * self.CHUNK_SIZE, self.CHUNK_SIZE) 525 526 self._cache.Put(chunk_number, data) 527 528 return data[chunk_offset:chunk_offset + available_length]
529
530 531 -class PagedReader(BaseAddressSpace):
532 """An address space which reads in page size. 533 534 This automatically takes care of splitting a large read into smaller reads. 535 """ 536 PAGE_SIZE = 0x1000 537 PAGE_MASK = ~(PAGE_SIZE - 1) 538 __abstract = True 539
540 - def _read_chunk(self, vaddr, length):
541 """Read bytes from a virtual address. 542 543 Args: 544 vaddr: A virtual address to read from. 545 length: The number of bytes to read. 546 547 Returns: 548 As many bytes as can be read within this page. 549 """ 550 to_read = min(length, self.PAGE_SIZE - (vaddr % self.PAGE_SIZE)) 551 paddr = self.vtop(vaddr) 552 if paddr is None: 553 return ZEROER.GetZeros(to_read) 554 555 return self.base.read(paddr, to_read)
556
557 - def _write_chunk(self, vaddr, buf):
558 to_write = min(len(buf), self.PAGE_SIZE - (vaddr % self.PAGE_SIZE)) 559 if not to_write: 560 return 0 561 562 paddr = self.vtop(vaddr) 563 if not paddr: 564 return 0 565 566 return self.base.write(paddr, buf[:to_write])
567
568 - def write(self, addr, buf):
569 available = len(buf) 570 written = 0 571 572 while available > written: 573 chunk_len = self._write_chunk(addr + written, buf[written:]) 574 if not chunk_len: 575 break 576 written += chunk_len 577 578 return written
579
580 - def read(self, addr, length):
581 """Read 'length' bytes from the virtual address 'vaddr'.""" 582 if length > self.session.GetParameter("buffer_size"): 583 raise IOError("Too much data to read.") 584 585 addr, length = int(addr), int(length) 586 587 result = '' 588 589 while length > 0: 590 buf = self._read_chunk(addr, length) 591 if not buf: 592 break 593 594 result += buf 595 addr += len(buf) 596 length -= len(buf) 597 598 return result
599
600 - def is_valid_address(self, addr):
601 vaddr = self.vtop(addr) 602 return vaddr != None and self.base.is_valid_address(vaddr)
603
604 605 -class RunBasedAddressSpace(PagedReader):
606 """An address space which uses a list of runs to specify a mapping. 607 608 This essentially delegates certain address ranges to other address spaces 609 "mapped" into this address space. 610 611 The runs are tuples of this form: 612 613 (virtual_address, physical_address, length, address_space) 614 615 - Virtual Address - An address in this address space's virtual address 616 space. 617 618 - Physical Address - An address in the delegate address space. 619 620 - Length - The length of the mapped region. 621 622 - Address space - the address space that should be read for this 623 region. Note that the physical address above refers to addresses in this 624 delegate address space. 625 """ 626 627 # This is a list of (memory_offset, file_offset, length) tuples. 628 runs = None 629 __abstract = True 630
631 - def __init__(self, **kwargs):
632 super(RunBasedAddressSpace, self).__init__(**kwargs) 633 self.runs = utils.RangedCollection()
634
635 - def add_run(self, virt_addr, file_address, file_len, address_space=None, 636 data=None):
637 """Add a new run to this address space.""" 638 if address_space is None: 639 address_space = self.base 640 641 start = virt_addr # Range start 642 end = virt_addr + file_len # Range end 643 644 self.runs.insert(start, end, 645 Run(start=start, 646 end=end, 647 address_space=address_space, 648 file_offset=file_address, 649 data=data))
650
651 - def _read_chunk(self, addr, length):
652 """Read from addr as much as possible up to a length of length.""" 653 start, end, run = self.runs.get_containing_range(addr) 654 655 # addr is not in any range, pad to the next range. 656 if start is None: 657 end = self.runs.get_next_range_start(addr) 658 if end is None: 659 end = addr + length 660 661 return ZEROER.GetZeros(min(end - addr, length)) 662 663 # Read as much as we can from this address space. 664 available_length = min(end - addr, length) 665 file_offset = run.file_offset + addr - start 666 667 return run.address_space.read(file_offset, available_length)
668
669 - def _write_chunk(self, addr, buf):
670 length = len(buf) 671 start, end, run = self.runs.get_containing_range(addr) 672 673 # addr is not in any range, ignore to the next range. 674 if start is None: 675 end = self.runs.get_next_range_start(addr) 676 if end is None: 677 end = addr + length 678 679 return min(end - addr, length) 680 681 # Write as much as we can to this run. 682 available_length = min(end - addr, length) 683 file_offset = run.file_offset + addr - start 684 685 return run.address_space.write(file_offset, buf[:available_length])
686
687 - def vtop_run(self, addr):
688 start, _, run = self.runs.get_containing_range(addr) 689 if start is not None: 690 return Run(start=addr, 691 end=run.end, 692 address_space=run.address_space, 693 file_offset=run.file_offset + addr - run.start)
694
695 - def vtop(self, addr):
696 """Returns the physical address for this virtual address. 697 698 Note that this does not mean much without also knowing the address space 699 to read from. Maybe we need to change this method's prototype? 700 """ 701 start, end, run = self.runs.get_containing_range(addr) 702 if start is not None: 703 if addr < end: 704 return run.file_offset + addr - start
705
706 - def is_valid_address(self, addr):
707 return self.vtop(addr) is not None
708
709 - def get_mappings(self, start=0, end=2**64):
710 """Yields the mappings. 711 712 Yields: A seqence of Run objects representing each run. 713 """ 714 for _, _, run in self.runs: 715 if start > run.end: 716 continue 717 718 if run.start > end: 719 return 720 721 yield run
722
723 724 -class Error(Exception):
725 """Address space errors."""
726
727 728 -class ASAssertionError(Error, IOError, AssertionError):
729 """The address space failed to instantiate."""
730
731 732 -class AddrSpaceError(Error):
733 """Address Space Exception. 734 735 This exception is raised when an AS decides to not be instantiated. It is 736 used in the voting algorithm. 737 """ 738
739 - def __init__(self):
740 self.reasons = [] 741 Error.__init__(self, "No suitable address space mapping found")
742
743 - def append_reason(self, driver, reason):
744 self.reasons.append((driver, reason))
745
746 - def __str__(self):
747 result = Error.__str__(self) + "\nTried to open image as:\n" 748 for k, v in self.reasons: 749 result += " {0}: {1}\n".format(k, v) 750 751 return result
752