Package rekall :: Module io_manager
[frames] | no frames]

Source Code for Module rekall.io_manager

  1  # Rekall Memory Forensics 
  2  # Copyright 2013 Google Inc. All Rights Reserved. 
  3  # 
  4  # Author: Michael Cohen scudette@google.com 
  5  # 
  6  # This program is free software; you can redistribute it and/or modify 
  7  # it under the terms of the GNU General Public License as published by 
  8  # the Free Software Foundation; either version 2 of the License, or (at 
  9  # your option) any later version. 
 10  # 
 11  # This program is distributed in the hope that it will be useful, but 
 12  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
 14  # General Public License for more details. 
 15  # 
 16  # You should have received a copy of the GNU General Public License 
 17  # along with this program; if not, write to the Free Software 
 18  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
 19  # 
 20   
 21  """IO Abstraction for Rekall. 
 22   
 23  Since Rekall is a library it should never directly access files: it may be 
 24  running on an environment which has no filesystem access for example, or the 
 25  files may be stored in an unusual way. 
 26   
 27  In order to ensure that the file storage mechanism does not need to be hardcoded 
 28  in each module, Rekall has an abstracted filesystem access mechanism implemented 
 29  through the IO Manager. 
 30   
 31  The session object should contain an instance of the IOManager() class at the 
 32  io_manager attribute, which will be used to create new files, or read from 
 33  existing files. 
 34  """ 
 35   
 36  __author__ = "Michael Cohen <scudette@google.com>" 
 37   
 38  import StringIO 
 39  import gzip 
 40  import json 
 41  import time 
 42  import os 
 43  import shutil 
 44  import urllib2 
 45  import urlparse 
 46  import zipfile 
 47   
 48  from rekall import constants 
 49  from rekall import obj 
 50  from rekall_lib import registry 
 51  from rekall_lib import utils 
 52   
 53  # The maximum size of a single data object we support. This represent the 
 54  # maximum amount of data we are prepared to read into memory at once. 
 55  MAX_DATA_SIZE = 100000000 
56 57 58 -class IOManagerError(IOError):
59 """An IOError from the IO Manager."""
60
61 62 -class EncodeError(IOError):
63 """Raised when unable to encode to the IO Manager."""
64
65 66 -class DecodeError(IOError):
67 """Raised when unable to decode to the IO Manager."""
68
69 70 -class IOManager(object):
71 """The baseclass for abstracted IO implementations. 72 73 The IO manager classes are responsible for managing access to profiles. A 74 profile is a JSON dict which is named using a standard notation. For 75 example, the profile for a certain NT kernel is: 76 77 nt/GUID/BF9E190359784C2D8796CF5537B238B42 78 79 The IO manager may actually store the profile file using some other scheme, 80 but that internal scheme is private to itself. 81 """ 82 83 __metaclass__ = registry.MetaclassRegistry 84 __abstract = True 85 86 order = 100 87
88 - def __init__(self, urn=None, mode="r", session=None, pretty_print=True, 89 version=constants.PROFILE_REPOSITORY_VERSION):
90 """Initialize the IOManager. 91 92 Args: 93 94 urn: The path to the IO manager. This might contain a scheme or 95 protocol specific to a certain IO manager implementation. 96 97 mode: Can be "r" or "w". 98 99 session: The session object. 100 101 pretty_print: If specified we dump sorted yaml data - this ends up 102 being more compressible in reality. 103 104 version: The required version of the repository. The IOManager is free 105 to implement arbitrary storage for different versions if 106 required. Versioning the repository allows us to update the 107 repository file format transparently without affecting older 108 Rekall versions. 109 110 """ 111 self.mode = mode 112 self.urn = urn 113 self.version = version 114 if session == None: 115 raise RuntimeError("Session must be set") 116 117 self.session = session 118 self.pretty_print = pretty_print 119 self._inventory = None 120 self.location = "" 121 self._dirty = False
122 123 @utils.safe_property
124 - def inventory(self):
125 if self._inventory is None: 126 self._inventory = self.GetData("inventory", default={}) 127 128 return self._inventory
129
130 - def ValidateInventory(self):
131 try: 132 metadata = self.inventory.get("$METADATA") 133 if (metadata.get("ProfileClass") == "Inventory" 134 and metadata.get("Type") == "Inventory"): 135 return True 136 except (AttributeError, IndexError, ValueError): 137 pass 138 139 self.session.logging.warn( 140 'Inventory for repository "%s" seems malformed. Are you behind a ' 141 'captive portal or proxy? If this is a custom repository, did you ' 142 'forget to create an inventory? You must use the ' 143 'tools/profiles/build_profile_repo.py tool with the --inventory ' 144 'flag.', self.location or self.urn) 145 146 # If the profile didn't validate, we still fix it so subsequent calls 147 # won't generate additional errors. StoreData and FlushInventory also 148 # rely on this behaviour. 149 if not self._inventory: 150 self._inventory = { 151 "$METADATA": dict( 152 Type="Inventory", 153 ProfileClass="Inventory"), 154 "$INVENTORY": {}, 155 } 156 157 return False
158
159 - def CheckInventory(self, path):
160 """Checks the validity of the inventory and if the path exists in it. 161 162 The inventory is a json object at the root of the repository which lists 163 all the profiles in this repository. It allows us to determine quickly 164 if a profile exists in this repository. 165 """ 166 if self.ValidateInventory(): 167 return path in self.inventory.get("$INVENTORY") 168 169 return False
170
171 - def Metadata(self, path):
172 """Returns metadata about a path.""" 173 inventory = self.inventory.get("$INVENTORY", {}) 174 return inventory.get(path, {})
175
176 - def SetMetadata(self, name, options):
177 existing_options = self.Metadata(name) 178 existing_options.update(options) 179 self.inventory.setdefault("$INVENTORY", {})[name] = existing_options 180 self.FlushInventory()
181
182 - def FlushInventory(self):
183 """Write the inventory to the storage.""" 184 if not self._dirty: 185 return 186 187 self.inventory.setdefault("$METADATA", dict( 188 Type="Inventory", 189 ProfileClass="Inventory")) 190 self.inventory.setdefault("$INVENTORY", dict()) 191 192 self.StoreData("inventory", self.inventory) 193 self._dirty = False
194
195 - def ListFiles(self):
196 """Returns a generator over all the files in this container.""" 197 return []
198
199 - def Create(self, name, **options):
200 """Creates a new file in the container. 201 202 Returns a file like object which should support the context manager 203 protocol. If the file already exists in the container, overwrite it. 204 205 For example: 206 207 with self.session.io_manager.Create("foobar") as fd: 208 fd.Write("hello world") 209 210 Args: 211 name: The name of the new file. 212 """
213
214 - def Destroy(self, name):
215 """Destroys the file/directory at name's path."""
216
217 - def Open(self, name):
218 """Opens a container member for reading. 219 220 This should return a file like object which provides read access to 221 container members. 222 223 Raises: 224 IOManagerError: If the file is not found. 225 """
226
227 - def Encoder(self, data, **options):
228 if options.get("raw"): 229 return utils.SmartStr(data) 230 231 if self.pretty_print: 232 return utils.PPrint(data) 233 234 return json.dumps(data, sort_keys=True, **options)
235
236 - def Decoder(self, raw):
237 return json.loads(raw)
238
239 - def GetData(self, name, raw=False, default=None):
240 """Get the data object stored at container member. 241 242 This returns an arbitrary python object which is stored in the named 243 container member. For example, normally a dict or list. This function 244 wraps the Open() method above and add deserialization to retrieve the 245 actual object. 246 247 Returns None if the file is not found. 248 249 Args: 250 name: The name to retrieve the data under. 251 raw: If specified we do not parse the data, simply return it as is. 252 """ 253 if default is None: 254 default = obj.NoneObject() 255 256 try: 257 fd = self.Open(name) 258 data = fd.read(MAX_DATA_SIZE) 259 if raw: 260 return data 261 262 return self.Decoder(data) 263 264 except IOError: 265 return default 266 267 except Exception as e: 268 self.session.logging.error( 269 "Cannot parse profile %s because of decoding error '%s'.", 270 name, e) 271 return default
272
273 - def StoreData(self, name, data, **options):
274 """Stores the data in the named container member. 275 276 This serializes the data and stores it in the named member. Not all 277 types of data are serializable, so this may raise. For example, when 278 using JSON to store the data, arbitrary python objects may not be used. 279 280 Args: 281 name: The name under which the data will be stored. 282 data: The data to store. 283 284 Common options: 285 raw: If true we write the data directly without encoding to json. In 286 this case data should be a string. 287 uncompressed: File will not be compressed (default gzip compression). 288 """ 289 try: 290 to_write = self.Encoder(data, **options) 291 except EncodeError: 292 self.session.logging.error("Unable to serialize %s", name) 293 return 294 295 self._StoreData(name, to_write, **options) 296 297 # Update the inventory. 298 if name != "inventory": 299 self.inventory.setdefault("$INVENTORY", {})[name] = dict( 300 LastModified=time.time()) 301 302 self.FlushInventory()
303
304 - def _StoreData(self, name, to_write, **options):
305 with self.Create(name, **options) as fd: 306 fd.write(to_write) 307 self._dirty = True
308
309 - def __enter__(self):
310 return self
311
312 - def __exit__(self, exc_type, exc_value, traceback):
313 pass
314
315 316 -class DirectoryIOManager(IOManager):
317 """An IOManager which stores everything in files. 318 319 We prefer to store the profile file as a gzip compressed file within a 320 versioned directory. For example the profile: 321 322 nt/GUID/BF9E190359784C2D8796CF5537B238B42 323 324 will be stored in: 325 326 $urn/nt/GUID/BF9E190359784C2D8796CF5537B238B42.gz 327 328 Where $urn is the path where the DirectoryIOManager was initialized with. 329 """ 330 331 # Any paths beginning with these prefixes will not be included in the 332 # inventory. 333 EXCLUDED_PATH_PREFIX = [] 334
335 - def __init__(self, urn=None, **kwargs):
336 super(DirectoryIOManager, self).__init__(**kwargs) 337 self.location = self.dump_dir = os.path.normpath(os.path.abspath( 338 os.path.expandvars(urn))) 339 if not self.version: 340 self.version = "" 341 342 self.check_dump_dir(self.dump_dir) 343 self.canonical_name = os.path.basename(self.dump_dir)
344 345 @utils.safe_property
346 - def inventory(self):
347 # In DirectoryIOManager the inventory reflects the directory structure. 348 if self._inventory is None: 349 self._inventory = self.GetData("inventory", default={}) 350 if not self._inventory: 351 self._inventory = self.RebuildInventory() 352 353 return self._inventory
354
355 - def RebuildInventory(self):
356 """Rebuild the inventory file.""" 357 result = { 358 "$METADATA": dict( 359 Type="Inventory", 360 ProfileClass="Inventory"), 361 "$INVENTORY": {}, 362 } 363 for member in self.ListFiles(): 364 if not self._is_excluded_member(member): 365 result["$INVENTORY"][member] = self.Metadata(member) 366 367 return result
368
369 - def _is_excluded_member(self, member):
370 for prefix in self.EXCLUDED_PATH_PREFIX: 371 if member.startswith(prefix): 372 return True
373
374 - def CheckInventory(self, path):
375 """Checks the validity of the inventory and if the path exists in it. 376 377 The inventory is a json object at the root of the repository which lists 378 all the profiles in this repository. It allows us to determine quickly 379 if a profile exists in this repository. 380 """ 381 if self.ValidateInventory(): 382 path = self.GetAbsolutePathName(path) 383 return os.access(path, os.R_OK) or os.access(path + ".gz", os.R_OK) 384 return False
385
386 - def Metadata(self, path):
387 path = self.GetAbsolutePathName(path) 388 try: 389 try: 390 st = os.stat(path + ".gz") 391 except OSError: 392 if os.path.isdir(path): 393 return {} 394 395 st = os.stat(path) 396 397 return dict(LastModified=st.st_mtime) 398 except OSError: 399 return {}
400
401 - def check_dump_dir(self, dump_dir=None):
402 if not dump_dir: 403 raise IOManagerError("Please specify a dump directory.") 404 405 if self.mode == "w": 406 self.EnsureDirectoryExists(self.dump_dir) 407 408 if not os.path.isdir(dump_dir): 409 raise IOManagerError("%s is not a directory" % self.dump_dir)
410
411 - def GetAbsolutePathName(self, name):
412 path = os.path.normpath( 413 os.path.join(self.dump_dir, self.version, name)) 414 415 if not path.startswith(self.dump_dir): 416 raise IOManagerError("Path name is outside container.") 417 418 return path
419
420 - def EnsureDirectoryExists(self, dirname):
421 try: 422 os.makedirs(dirname) 423 except OSError: 424 pass
425
426 - def ListFiles(self):
427 top_level = os.path.join(self.dump_dir, self.version) 428 for root, _, files in os.walk(top_level): 429 for f in files: 430 path = os.path.normpath(os.path.join(root, f)) 431 432 if path.endswith(".gz"): 433 path = path[:-3] 434 435 # Return paths relative to the dump dir. 436 yield path[len(top_level) + 1:]
437
438 - def Create(self, name):
439 path = self.GetAbsolutePathName(name) 440 self.EnsureDirectoryExists(os.path.dirname(path)) 441 return gzip.open(path + ".gz", "wb")
442
443 - def Destroy(self, name):
444 path = self.GetAbsolutePathName(name) 445 return shutil.rmtree(path)
446
447 - def Open(self, name):
448 path = self.GetAbsolutePathName(name) 449 try: 450 result = open(path, "rb") 451 except IOError: 452 result = gzip.open(path + ".gz") 453 454 self.session.logging.debug("Opened local file %s" % result.name) 455 return result
456
457 - def _StoreData(self, name, to_write, **options):
458 path = self.GetAbsolutePathName(name) 459 self.EnsureDirectoryExists(os.path.dirname(path)) 460 461 # If we are asked to write uncompressed files we do. 462 if options.get("uncompressed"): 463 with open(path, "wb") as out_fd: 464 out_fd.write(to_write) 465 self._dirty = True 466 return 467 468 # We need to update the file atomically in case someone else is trying 469 # to open it right now. Since the files are expected to be fairly small 470 # its ok to compress into memory and just write atomically. 471 fd = StringIO.StringIO() 472 with gzip.GzipFile(mode="wb", fileobj=fd) as gzip_fd: 473 gzip_fd.write(to_write) 474 475 with open(path + ".gz", "wb") as out_fd: 476 out_fd.write(fd.getvalue()) 477 478 self._dirty = True
479
480 - def __str__(self):
481 return "Directory:%s" % self.dump_dir
482
483 484 # pylint: disable=protected-access 485 486 -class SelfClosingFile(StringIO.StringIO):
487 - def __init__(self, name, manager):
488 self.name = name 489 self.manager = manager 490 StringIO.StringIO.__init__(self)
491
492 - def __enter__(self):
493 return self
494
495 - def __exit__(self, exc_type, exc_value, traceback):
496 if exc_type is None: 497 self.Close() 498 else: 499 self.manager._Cancel(self.name)
500
501 - def Close(self):
502 self.manager._Write(self.name, self.getvalue())
503
504 505 -class ZipFileManager(IOManager):
506 """An IO Manager which stores files in a zip archive.""" 507 508 order = 50 509
510 - def __init__(self, urn=None, fd=None, **kwargs):
511 super(ZipFileManager, self).__init__(**kwargs) 512 if fd is None and not urn.lower().endswith("zip"): 513 if self.mode == "w": 514 raise IOManagerError( 515 "Zip files must have the .zip extensions.") 516 517 self.fd = fd 518 if urn is not None: 519 self.location = self.file_name = os.path.normpath( 520 os.path.abspath(urn)) 521 self.canonical_name = os.path.splitext(os.path.basename(urn))[0] 522 523 self._OpenZipFile() 524 525 # The set of outstanding writers. When all outstanding writers have been 526 # closed we can flush the ZipFile. 527 self._outstanding_writers = set()
528 529 @utils.safe_property
530 - def inventory(self):
531 """We do not really need an inventory for zip files. 532 533 We return a fake one based on the zip file's modification time. 534 """ 535 result = {} 536 for zipinfo in self.zip.filelist: 537 result[zipinfo.filename] = zipinfo.date_time 538 539 return { 540 "$INVENTORY": result 541 }
542
543 - def FlushInventory(self):
544 pass
545
546 - def _OpenZipFile(self, mode=None):
547 try: 548 if self.fd is None: 549 self.zip = zipfile.ZipFile( 550 self.file_name, mode=mode or self.mode, 551 compression=zipfile.ZIP_DEFLATED) 552 553 elif self.mode == "r": 554 self.zip = zipfile.ZipFile(self.fd, mode="r") 555 556 elif self.mode == "a": 557 self.zip = zipfile.ZipFile(self.fd, mode="a") 558 559 except zipfile.BadZipfile: 560 raise IOManagerError("Unable to read zipfile.")
561
562 - def ListFiles(self):
563 return self.zip.namelist()
564
565 - def _Cancel(self, name):
566 self._outstanding_writers.remove(name)
567
568 - def _Write(self, name, data):
569 self.zip.writestr(name, data) 570 self._outstanding_writers.remove(name) 571 if not self._outstanding_writers: 572 self.zip.close() 573 574 # Reopen the zip file so we may add new members. 575 self._OpenZipFile(mode="a")
576
577 - def Create(self, name):
578 if self.mode not in ["w", "a"]: 579 raise IOManagerError("Container not opened for writing.") 580 581 result = SelfClosingFile(name, self) 582 self._outstanding_writers.add(name) 583 return result
584
585 - def Destroy(self, name):
586 _ = name 587 raise IOManagerError( 588 "Removing a file from zipfile is not supported. Use a different " 589 "IOManager subclass.")
590
591 - def Open(self, name):
592 if self.mode not in ["r", "a"]: 593 raise IOManagerError("Container not opened for reading.") 594 if self.zip is None: 595 self._OpenZipFile() 596 597 try: 598 return self.zip.open(name) 599 except KeyError as e: 600 raise IOManagerError(e)
601
602 - def __enter__(self):
603 self._outstanding_writers.add(self) 604 return self
605
606 - def __exit__(self, exc_type, exc_value, traceback):
607 self._outstanding_writers.remove(self) 608 if exc_type is None and not self._outstanding_writers: 609 self.zip.close() 610 if self.mode in ["w", "a"]: 611 self._OpenZipFile(mode="a")
612
613 - def Close(self):
614 self.zip.close()
615
616 - def __str__(self):
617 return "ZipFile:%s" % self.file_name
618
619 620 -class URLManager(IOManager):
621 """Supports opening profile repositories hosted over the web.""" 622
623 - def __init__(self, urn=None, mode="r", **kwargs):
624 super(URLManager, self).__init__(urn=urn, mode=mode, **kwargs) 625 if mode != "r": 626 raise IOManagerError("%s supports only reading." % 627 self.__class__.__name__) 628 629 self.url = urlparse.urlparse(urn) 630 if self.url.scheme not in ("http", "https"): 631 raise IOManagerError("%s supports only http protocol." % 632 self.__class__.__name__)
633
634 - def Create(self, name):
635 _ = name 636 raise IOManagerError("Write support to http is not supported.")
637
638 - def Destroy(self, name):
639 _ = name 640 raise IOManagerError("Write support to http is not supported.")
641
642 - def _GetURL(self, name):
643 url = self.url._replace(path="%s/%s/%s" % ( 644 self.url.path, self.version, name)) 645 return urlparse.urlunparse(url)
646
647 - def Open(self, name):
648 url = self._GetURL(name) 649 650 try: 651 # Rekall repositories always use gzip to compress the files - so 652 # first try with the .gz extension. 653 fd = urllib2.urlopen(url + ".gz", timeout=10) 654 self.session.logging.debug("Opened url %s.gz" % url) 655 return gzip.GzipFile( 656 fileobj=StringIO.StringIO(fd.read(MAX_DATA_SIZE))) 657 except urllib2.HTTPError: 658 # Try to load the file without the .gz extension. 659 self.session.logging.debug("Opened url %s" % url) 660 return urllib2.urlopen(url, timeout=10)
661
662 - def __str__(self):
663 return "URL:%s" % self.urn
664
665 666 -def Factory(urn, mode="r", session=None, **kwargs):
667 """Try to instantiate the IOManager class.""" 668 for cls in sorted(IOManager.classes.values(), key=lambda x: x.order): 669 try: 670 return cls(urn=urn, mode=mode, session=session, **kwargs) 671 except IOError: 672 pass 673 674 raise IOManagerError( 675 "Unable to find any managers which can work on %s" % urn)
676