Package rekall :: Module scan
[frames] | no frames]

Source Code for Module rekall.scan

  1  # Rekall Memory Forensics 
  2  # 
  3  # Copyright 2013 Google Inc. All Rights Reserved. 
  4  # 
  5  # This program is free software; you can redistribute it and/or modify 
  6  # it under the terms of the GNU General Public License as published by 
  7  # the Free Software Foundation; either version 2 of the License, or (at 
  8  # your option) any later version. 
  9  # 
 10  # This program is distributed in the hope that it will be useful, but 
 11  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 12  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
 13  # General Public License for more details. 
 14  # 
 15  # You should have received a copy of the GNU General Public License 
 16  # along with this program; if not, write to the Free Software 
 17  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
 18  # 
 19  # 
 20   
 21  __author__ = "Michael Cohen <scudette@gmail.com>" 
 22  import re 
 23   
 24  import acora 
 25   
 26  from rekall import addrspace 
 27  from rekall import constants 
 28  from rekall_lib import registry 
 29   
 30   
31 -class ScannerCheck(object):
32 """A scanner check is a special class which is invoked on an AS to check 33 for a specific condition. 34 35 The main method is def check(self, buffer_as, offset): 36 This will return True if the condition is true or False otherwise. 37 38 This class is the base class for all checks. 39 """ 40 41 __metaclass__ = registry.MetaclassRegistry 42 __abstract = True 43
44 - def __init__(self, profile=None, address_space=None, session=None, 45 **_kwargs):
46 # The profile that this scanner check should use. 47 self.profile = profile 48 self.address_space = address_space 49 self.session = session
50
51 - def object_offset(self, offset):
52 return offset
53
54 - def check(self, buffer_as, offset):
55 """Is the needle found at 'offset'? 56 57 Arguments: 58 buffer_as: An address space object with a chunk of data that can be 59 checked for the needle. 60 offset: The offset in the address space to check. 61 """ 62 _ = offset 63 _ = buffer_as 64 return False
65
66 - def skip(self, buffer_as, offset):
67 """Determine how many bytes we can skip. 68 69 If you want to speed up the scanning define this method - it 70 will be used to skip the data which is obviously not going to 71 match. You will need to return the number of bytes from offset 72 to skip to. We take the maximum number of bytes to guarantee 73 that all checks have a chance of passing. 74 75 Args: 76 buffer_as: A BufferAddressSpace instance wrapping self.address_space, 77 containing a copy of the data at the specified offset. 78 79 offset: The offset in the address space to check. 80 81 Returns: 82 Number of bytes to be skipped. 83 """ 84 _ = buffer_as 85 _ = offset 86 return 0
87 88
89 -class MultiStringFinderCheck(ScannerCheck):
90 """A scanner checker for multiple strings.""" 91
92 - def __init__(self, needles=None, **kwargs):
93 """Init. 94 95 Args: 96 needles: A list of strings we search for. 97 **kwargs: passthrough. 98 Raises: 99 RuntimeError: No needles provided. 100 """ 101 super(MultiStringFinderCheck, self).__init__(**kwargs) 102 103 # It is an error to not provide something to search for and Acora will 104 # raise later. 105 if not needles: 106 raise RuntimeError("No needles provided to search.") 107 108 # Passing large patterns to the acora module will cause huge memory 109 # consumption. 110 if max([len(x) for x in needles]) > 50: 111 raise RuntimeError("Pattern too large to search with ahocorasic.") 112 113 tree = acora.AcoraBuilder(*needles) 114 self.engine = tree.build() 115 116 self.base_offset = None 117 self.hits = None
118
119 - def check(self, buffer_as, offset):
120 # This indicates we haven't already generated hits for this buffer. 121 if buffer_as.base_offset != self.base_offset: 122 self.hits = sorted(self.engine.findall(buffer_as.data), 123 key=lambda x: x[1], reverse=True) 124 self.base_offset = buffer_as.base_offset 125 126 data_offset = offset - buffer_as.base_offset 127 while self.hits: 128 string, offset = self.hits[-1] 129 if offset == data_offset: 130 # This hit was reported, remove it. 131 self.hits.pop() 132 return string 133 elif offset < data_offset: 134 # We skipped over this hit, remove it and check for the 135 # remaining hits. 136 self.hits.pop() 137 else: # offset > data_offset 138 return False 139 return False
140
141 - def skip(self, buffer_as, offset):
142 # Normally the scanner calls the check method first, then the skip 143 # method immediately after. We are depending on this order so self.hits 144 # will be set by the check method which was called before us. 145 # This method also assumes that the offsets to skip/check will be 146 # nondecreasing. 147 148 data_offset = offset - buffer_as.base_offset 149 while self.hits: 150 _, offset = self.hits[-1] 151 if offset < data_offset: 152 self.hits.pop() 153 else: 154 return offset - data_offset 155 156 # No more hits in this buffer, skip it. 157 return buffer_as.end() - offset
158 159
160 -class StringCheck(ScannerCheck):
161 """Checks for a single string.""" 162 maxlen = 100 163 needle = None 164 needle_offset = None 165
166 - def __init__(self, needle=None, needle_offset=0, **kwargs):
167 super(StringCheck, self).__init__(**kwargs) 168 self.needle = needle 169 self.needle_offset = needle_offset
170
171 - def check(self, buffer_as, offset):
172 # Just check the buffer without needing to copy it on slice. 173 buffer_offset = buffer_as.get_buffer_offset(offset) + self.needle_offset 174 if buffer_as.data.startswith(self.needle, buffer_offset): 175 return self.needle
176
177 - def skip(self, buffer_as, offset):
178 # Search the rest of the buffer for the needle. 179 buffer_offset = buffer_as.get_buffer_offset(offset) + self.needle_offset 180 dindex = buffer_as.data.find(self.needle, buffer_offset + 1) 181 if dindex > -1: 182 return dindex - buffer_offset 183 184 # Skip entire region. 185 return buffer_as.end() - offset
186 187
188 -class RegexCheck(ScannerCheck):
189 """This check can be quite slow.""" 190 maxlen = 100 191
192 - def __init__(self, regex=None, **kwargs):
193 super(RegexCheck, self).__init__(**kwargs) 194 self.regex = re.compile(regex)
195
196 - def check(self, buffer_as, offset):
197 m = self.regex.match( 198 buffer_as.data, buffer_as.get_buffer_offset(offset)) 199 200 return bool(m)
201 202
203 -class _Padding(object):
204 """An object representing padding."""
205 - def __init__(self, length):
206 self.length = length
207 208
209 -class _BufferFragments(object):
210 - def __init__(self, base_offset):
211 self._fragments = [] 212 self.base_offset = base_offset 213 self.total_length = 0
214
215 - def pad(self, length):
216 if not self._fragments: 217 self.base_offset += length 218 else: 219 self._fragments.append(_Padding(length)) 220 self.total_length += length
221
222 - def append(self, data):
223 self._fragments.append(data) 224 self.total_length += len(data)
225
226 - def materialize(self):
227 """Remove padding from the end and materialize any padding.""" 228 # Remove the padding from the end. 229 while self._fragments: 230 item = self._fragments[-1] 231 if isinstance(item, _Padding): 232 self._fragments.pop(-1) 233 else: 234 break 235 236 # Now materialize the padding and join it all together. 237 expanded_result = [] 238 start_index = 0 239 end_index = len(self._fragments) 240 241 for x in xrange(start_index, end_index): 242 item = self._fragments[x] 243 if isinstance(item, _Padding): 244 expanded_result.append(addrspace.ZEROER.GetZeros(item.length)) 245 else: 246 expanded_result.append(item) 247 248 return "".join(expanded_result)
249 250
251 -class BufferASGenerator(object):
252 """A Generator of contiguous buffers read from the address space."""
253 - def __init__(self, session, address_space, start, end, 254 buffer_size=constants.SCAN_BLOCKSIZE, 255 overlap_length=0):
256 self.start = start 257 self.end = end 258 self._generator = address_space.merge_base_ranges(start=start, end=end) 259 self.buffer_as = addrspace.BufferAddressSpace(session=session) 260 self.buffer_size = buffer_size 261 self.readptr = start 262 self.overlap_length = overlap_length 263 self.overlap = "" 264 self.current_run = None 265 self.finished = False
266
267 - def __iter__(self):
268 return self
269
270 - def __next__(self):
271 """Python 3 protocol.""" 272 return self.next()
273
274 - def next(self):
275 """Get the next buffer address space from the generator.""" 276 277 # Collect the data in this buffer. 278 fragments = _BufferFragments(self.readptr) 279 280 # Offset of the current readptr in the buffer. 281 readptr = self.readptr 282 283 if self.current_run is None: 284 # If the generator is exhausted this will raise StopIteration and 285 # stop us too. 286 self.current_run = next(self._generator) 287 288 while 1: 289 # We are done - return this buffer. 290 if fragments.total_length >= self.buffer_size: 291 break 292 293 if readptr >= self.end: 294 raise StopIteration 295 296 # First case: run starts after the readptr. We pad the up to the 297 # start of the run and continue with case 2 below: 298 299 # ^__pad____ |~~~~~~~~| 300 # | First run 301 # buffer readptr 302 if self.current_run.start > readptr: 303 if fragments.total_length > 0: 304 padding_length = min( 305 self.current_run.start - readptr, 306 self.buffer_size - fragments.total_length) 307 fragments.pad(padding_length) 308 readptr += padding_length 309 else: 310 fragments.pad(self.current_run.start - readptr) 311 readptr = self.current_run.start 312 313 # Second case: buffer readptr is part way through the run. We just 314 # read the data from it and append to the fragments. 315 if self.current_run.start <= readptr < self.current_run.end: 316 phys_chunk_offset = ( 317 self.current_run.file_offset + ( 318 readptr - self.current_run.start)) 319 320 # Read up to the requested end or the end of this run. 321 chunk_size = min(self.buffer_size - fragments.total_length, 322 self.current_run.end - readptr) 323 324 fragments.append(self.current_run.address_space.read( 325 phys_chunk_offset, chunk_size)) 326 327 readptr += chunk_size 328 329 # Third case: buffer readptr is after the current run. We need to 330 # get the next run and start over. 331 if self.current_run.end <= readptr: 332 try: 333 self.current_run = next(self._generator) 334 except StopIteration: 335 self.finished = True 336 337 # Break to return the last buffer. 338 break 339 340 # Now we can trim the padding from the start and the end. 341 base_offset = fragments.base_offset 342 data = fragments.materialize() 343 344 # No more real ranges we are done. 345 if self.finished and not data: 346 raise StopIteration 347 348 self.buffer_as.assign_buffer(data, base_offset=base_offset) 349 self.readptr = readptr 350 return self.buffer_as
351 352
353 -class BaseScanner(object):
354 """Base class for all scanners.""" 355 356 __metaclass__ = registry.MetaclassRegistry 357 358 progress_message = "Scanning 0x%(offset)08X with %(name)s" 359 360 checks = () 361
362 - def __init__(self, profile=None, address_space=None, window_size=8, 363 session=None, checks=None):
364 """The base scanner. 365 366 Args: 367 profile: The profile to use for this scan. 368 address_space: The address space we use for scanning. 369 window_size: The size of the overlap window between each buffer read. 370 """ 371 self.session = session or address_space.session 372 self.address_space = address_space or self.session.default_address_space 373 self.window_size = window_size 374 self.constraints = None 375 if profile is None and self.session.HasParameter("profile"): 376 profile = self.session.profile 377 378 self.profile = profile 379 self.max_length = None 380 self.base_offset = None 381 self.scan_buffer_offset = None 382 self.buffer_as = addrspace.BufferAddressSpace(session=self.session) 383 if checks is not None: 384 self.checks = checks
385
386 - def build_constraints(self):
387 self.constraints = [] 388 for class_name, args in self.checks: 389 check = ScannerCheck.classes[class_name]( 390 profile=self.profile, address_space=self.address_space, 391 session=self.session, **args) 392 self.constraints.append(check) 393 394 self.skippers = [c for c in self.constraints if hasattr(c, "skip")] 395 self.hits = None
396
397 - def check_addr(self, offset, buffer_as=None):
398 """Check an address. 399 400 This calls our constraints on the offset and returns if any contraints 401 did not match. 402 403 Args: 404 offset: The offset to test (in self.address_space). 405 406 Returns: 407 None if the offset is not a hit, the hit if the hit is correct. 408 """ 409 for check in self.constraints: 410 # Ask the check if this offset is possible. 411 val = check.check(buffer_as, offset) 412 413 # Break out on the first negative hit. 414 if not val: 415 return 416 417 return offset
418
419 - def skip(self, buffer_as, offset):
420 """Skip uninteresting regions. 421 422 Where should we go next? By default we go 1 byte ahead, but if some of 423 the checkers have skippers, we may actually go much farther. Checkers 424 with skippers basically tell us that there is no way they can match 425 anything before the skipped result, so there is no point in trying them 426 on all the data in between. This optimization is useful to really speed 427 things up. 428 """ 429 skip = 1 430 for s in self.skippers: 431 skip_value = s.skip(buffer_as, offset) 432 skip = max(skip, skip_value) 433 434 return skip
435 436 overlap = 1024 437
438 - def scan(self, offset=0, maxlen=None, end=None):
439 """Scan the region from offset for maxlen. 440 441 Args: 442 offset: The starting offset in our current address space to scan. 443 444 maxlen: The maximum length to scan. If not provided we just scan until 445 there is no data. 446 447 Yields: 448 offsets where all the constrainst are satisfied. 449 """ 450 if end is None: 451 if maxlen is None: 452 raise IOError("Range end must be specified.") 453 454 end = int(offset) + int(maxlen) 455 456 # Record the last reported hit to prevent multiple reporting of the same 457 # hits when using an overlap. 458 last_reported_hit = -1 459 460 # Delay building the constraints so they can be added after scanner 461 # construction. 462 if self.constraints is None: 463 self.build_constraints() 464 465 for buffer_as in BufferASGenerator( 466 self.session, self.address_space, offset, end): 467 self.session.report_progress( 468 "Scanning buffer %#x->%#x (%#x)", 469 buffer_as.base_offset, buffer_as.end(), 470 buffer_as.end() - buffer_as.base_offset) 471 472 # Now scan within the received buffer. 473 scan_offset = buffer_as.base_offset 474 while scan_offset < buffer_as.end(): 475 # Check the current offset for a match. 476 res = self.check_addr(scan_offset, buffer_as=buffer_as) 477 478 # Remove multiple matches in the overlap region which we 479 # have previously reported. 480 if res is not None and scan_offset > last_reported_hit: 481 last_reported_hit = scan_offset 482 yield res 483 484 # Skip as much data as the skippers tell us to, up to the 485 # end of the buffer. 486 scan_offset += min(len(buffer_as), 487 self.skip(buffer_as, scan_offset))
488 489
490 -class FastStructScanner(BaseScanner):
491 """This scanner looks for a struct in memory. 492 493 Arguments: 494 expected_values: 495 Provide a list/tuple of dicts mapping member names to their 496 expected values. Each dict in the list you provide will correspond 497 to a struct at the same index in an array. If you're only looking 498 for a single struct, pass a list with only one dict in it. 499 type_name: Name of the type to scan for. 500 """ 501 502 type_name = None 503 prototype = None 504 expected_values = None 505
506 - def __init__(self, type_name=None, expected_values=None, *args, **kwargs):
507 super(FastStructScanner, self).__init__(*args, **kwargs) 508 self.type_name = type_name 509 self.expected_values = expected_values 510 self.prototype = self.profile.Object( 511 type_name=type_name, vm=addrspace.BufferAddressSpace( 512 session=self.session, 513 data="\x00" * self.profile.get_obj_size(type_name))) 514 515 if not self.checks: 516 self.checks = [] 517 elif isinstance(self.checks, tuple): 518 # We need the checks array to be mutable. 519 self.checks = list(self.checks) 520 521 for array_idx, struct_members in enumerate(self.expected_values): 522 self.checks.extend(self.build_checks(array_idx, struct_members))
523
524 - def build_checks(self, array_idx, struct_members):
525 array_offset = array_idx * self.prototype.obj_size 526 for member, expected_value in struct_members.iteritems(): 527 self.prototype.SetMember(member, expected_value) 528 member_obj = self.prototype.m(member) 529 expected_bytes = member_obj.GetData() 530 rel_offset = member_obj.obj_offset 531 yield ("StringCheck", dict(needle=expected_bytes, 532 needle_offset=rel_offset + array_offset))
533 534
535 -class MultiStringScanner(BaseScanner):
536 """A scanner for multiple strings at once.""" 537 538 # Override with the needles to check for. 539 needles = [] 540
541 - def __init__(self, needles=None, **kwargs):
542 super(MultiStringScanner, self).__init__(**kwargs) 543 if needles is not None: 544 self.needles = needles 545 546 # For large patterns acora seems to use huge amount of memory and 547 # CPU. Therefore when there is only a single pattern (common case) use 548 # the normal StringScanner instead. 549 if len(needles) == 1: 550 self.check = StringCheck( 551 profile=self.profile, address_space=self.address_space, 552 needle=self.needles[0]) 553 else: 554 self.check = MultiStringFinderCheck( 555 profile=self.profile, address_space=self.address_space, 556 needles=self.needles)
557
558 - def check_addr(self, offset, buffer_as=None):
559 # Ask the check if this offset is possible. 560 val = self.check.check(buffer_as, offset) 561 if val: 562 return offset, val
563
564 - def skip(self, buffer_as, offset):
565 return self.check.skip(buffer_as, offset)
566 567
568 -class PointerScanner(BaseScanner):
569 """Scan for a bunch of pointers at the same time. 570 571 This scanner takes advantage of the fact that usually the most significant 572 bytes of a group of pointers is the same. This common part is scanned for 573 first, thereby taking advantage of the scanner skippers. 574 """
575 - def __init__(self, pointers=None, **kwargs):
576 """Creates the Pointer Scanner. 577 578 Args: 579 pointers: A list of Pointer objects, or simply memory addresses. This 580 scanner finds direct references to these addresses in memory. 581 """ 582 super(PointerScanner, self).__init__(**kwargs) 583 584 # The size of a pointer depends on the profile. 585 self.address_size = self.session.profile.get_obj_size("address") 586 self.needles = [] 587 588 # Find the common string between all the addresses. 589 for address in pointers: 590 # Encode the address as a pointer according to the current profile. 591 tmp = self.session.profile.address() 592 tmp.write(address) 593 594 self.needles.append(tmp.obj_vm.read(0, tmp.obj_size)) 595 596 # The common string between all the needles. 597 self.checks = [ 598 ("MultiStringFinderCheck", dict(needles=self.needles)), 599 ]
600 601
602 -class ScannerGroup(BaseScanner):
603 """Runs a bunch of scanners in one pass over the image.""" 604
605 - def __init__(self, scanners=None, **kwargs):
606 """Create a new scanner group. 607 608 Args: 609 scanners: A dict of BaseScanner instances. Keys will be used to refer 610 to the scanner, while the value is the scanner instance. 611 """ 612 super(ScannerGroup, self).__init__(**kwargs) 613 self.scanners = scanners 614 for scanner in scanners.values(): 615 scanner.address_space = self.address_space 616 617 # A dict to hold all hits for each scanner. 618 self.result = {}
619
620 - def scan(self, offset=0, maxlen=None):
621 available_length = maxlen or self.session.profile.get_constant( 622 "MaxPointer") 623 624 while available_length > 0: 625 to_read = min(constants.SCAN_BLOCKSIZE + self.overlap, 626 available_length) 627 628 # Now feed all the scanners from the same address space. 629 for name, scanner in self.scanners.items(): 630 for hit in scanner.scan(offset=offset, maxlen=to_read): 631 # Yield the result as well as cache it. 632 yield name, hit 633 634 # Move to the next scan block. 635 offset += constants.SCAN_BLOCKSIZE 636 available_length -= constants.SCAN_BLOCKSIZE
637 638
639 -class DiscontigScannerGroup(ScannerGroup):
640 """A scanner group which works over a virtual address space.""" 641
642 - def scan(self, offset=0, maxlen=None):
643 maxlen = maxlen or self.session.profile.get_constant("MaxPointer") 644 645 for (start, _, length) in self.address_space.get_address_ranges( 646 offset, offset + maxlen): 647 for match in super(DiscontigScannerGroup, self).scan( 648 start, maxlen=length): 649 yield match
650 651
652 -class DebugChecker(ScannerCheck):
653 """A check that breaks into the debugger when a condition is met. 654 655 Insert this check inside the check stack and we will break into the debugger 656 when all the conditions below us are met. 657 """
658 - def check(self, buffer_as, offset):
659 _ = offset 660 _ = buffer_as 661 import pdb; pdb.set_trace() # pylint: disable=multiple-statements 662 return True
663