1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15   
 16   
 17   
 18   
 19   
 20   
 21  """IO Abstraction for Rekall. 
 22   
 23  Since Rekall is a library it should never directly access files: it may be 
 24  running on an environment which has no filesystem access for example, or the 
 25  files may be stored in an unusual way. 
 26   
 27  In order to ensure that the file storage mechanism does not need to be hardcoded 
 28  in each module, Rekall has an abstracted filesystem access mechanism implemented 
 29  through the IO Manager. 
 30   
 31  The session object should contain an instance of the IOManager() class at the 
 32  io_manager attribute, which will be used to create new files, or read from 
 33  existing files. 
 34  """ 
 35   
 36  __author__ = "Michael Cohen <scudette@google.com>" 
 37   
 38  import StringIO 
 39  import gzip 
 40  import json 
 41  import time 
 42  import os 
 43  import shutil 
 44  import urllib2 
 45  import urlparse 
 46  import zipfile 
 47   
 48  from rekall import constants 
 49  from rekall import obj 
 50  from rekall_lib import registry 
 51  from rekall_lib import utils 
 52   
 53   
 54   
 55  MAX_DATA_SIZE = 100000000 
 59      """An IOError from the IO Manager.""" 
  60   
 63      """Raised when unable to encode to the IO Manager.""" 
  64   
 67      """Raised when unable to decode to the IO Manager.""" 
  68   
 71      """The baseclass for abstracted IO implementations. 
 72   
 73      The IO manager classes are responsible for managing access to profiles. A 
 74      profile is a JSON dict which is named using a standard notation. For 
 75      example, the profile for a certain NT kernel is: 
 76   
 77      nt/GUID/BF9E190359784C2D8796CF5537B238B42 
 78   
 79      The IO manager may actually store the profile file using some other scheme, 
 80      but that internal scheme is private to itself. 
 81      """ 
 82   
 83      __metaclass__ = registry.MetaclassRegistry 
 84      __abstract = True 
 85   
 86      order = 100 
 87   
 90          """Initialize the IOManager. 
 91   
 92          Args: 
 93   
 94            urn: The path to the IO manager. This might contain a scheme or 
 95                 protocol specific to a certain IO manager implementation. 
 96   
 97            mode: Can be "r" or "w". 
 98   
 99            session: The session object. 
100   
101            pretty_print: If specified we dump sorted yaml data - this ends up 
102            being more compressible in reality. 
103   
104            version: The required version of the repository. The IOManager is free 
105                 to implement arbitrary storage for different versions if 
106                 required. Versioning the repository allows us to update the 
107                 repository file format transparently without affecting older 
108                 Rekall versions. 
109   
110          """ 
111          self.mode = mode 
112          self.urn = urn 
113          self.version = version 
114          if session == None: 
115              raise RuntimeError("Session must be set") 
116   
117          self.session = session 
118          self.pretty_print = pretty_print 
119          self._inventory = None 
120          self.location = "" 
121          self._dirty = False 
 122   
123      @utils.safe_property 
125          if self._inventory is None: 
126              self._inventory = self.GetData("inventory", default={}) 
127   
128          return self._inventory 
 129   
131          try: 
132              metadata = self.inventory.get("$METADATA") 
133              if (metadata.get("ProfileClass") == "Inventory" 
134                      and metadata.get("Type") == "Inventory"): 
135                  return True 
136          except (AttributeError, IndexError, ValueError): 
137              pass 
138   
139          self.session.logging.warn( 
140              'Inventory for repository "%s" seems malformed. Are you behind a ' 
141              'captive portal or proxy? If this is a custom repository, did you ' 
142              'forget to create an inventory? You must use the ' 
143              'tools/profiles/build_profile_repo.py tool with the --inventory ' 
144              'flag.', self.location or self.urn) 
145   
146           
147           
148           
149          if not self._inventory: 
150              self._inventory = { 
151                  "$METADATA": dict( 
152                      Type="Inventory", 
153                      ProfileClass="Inventory"), 
154                  "$INVENTORY": {}, 
155              } 
156   
157          return False 
 158   
160          """Checks the validity of the inventory and if the path exists in it. 
161   
162          The inventory is a json object at the root of the repository which lists 
163          all the profiles in this repository. It allows us to determine quickly 
164          if a profile exists in this repository. 
165          """ 
166          if self.ValidateInventory(): 
167              return path in self.inventory.get("$INVENTORY") 
168   
169          return False 
 170   
175   
181   
183          """Write the inventory to the storage.""" 
184          if not self._dirty: 
185              return 
186   
187          self.inventory.setdefault("$METADATA", dict( 
188              Type="Inventory", 
189              ProfileClass="Inventory")) 
190          self.inventory.setdefault("$INVENTORY", dict()) 
191   
192          self.StoreData("inventory", self.inventory) 
193          self._dirty = False 
 194   
196          """Returns a generator over all the files in this container.""" 
197          return [] 
 198   
199 -    def Create(self, name, **options): 
 200          """Creates a new file in the container. 
201   
202          Returns a file like object which should support the context manager 
203          protocol. If the file already exists in the container, overwrite it. 
204   
205          For example: 
206   
207          with self.session.io_manager.Create("foobar") as fd: 
208             fd.Write("hello world") 
209   
210          Args: 
211            name: The name of the new file. 
212          """ 
 213   
215          """Destroys the file/directory at name's path.""" 
 216   
217 -    def Open(self, name): 
 218          """Opens a container member for reading. 
219   
220          This should return a file like object which provides read access to 
221          container members. 
222   
223          Raises: 
224            IOManagerError: If the file is not found. 
225          """ 
 226   
227 -    def Encoder(self, data, **options): 
 228          if options.get("raw"): 
229              return utils.SmartStr(data) 
230   
231          if self.pretty_print: 
232              return utils.PPrint(data) 
233   
234          return json.dumps(data, sort_keys=True, **options) 
 235   
237          return json.loads(raw) 
 238   
239 -    def GetData(self, name, raw=False, default=None): 
 240          """Get the data object stored at container member. 
241   
242          This returns an arbitrary python object which is stored in the named 
243          container member. For example, normally a dict or list. This function 
244          wraps the Open() method above and add deserialization to retrieve the 
245          actual object. 
246   
247          Returns None if the file is not found. 
248   
249          Args: 
250            name: The name to retrieve the data under. 
251            raw: If specified we do not parse the data, simply return it as is. 
252          """ 
253          if default is None: 
254              default = obj.NoneObject() 
255   
256          try: 
257              fd = self.Open(name) 
258              data = fd.read(MAX_DATA_SIZE) 
259              if raw: 
260                  return data 
261   
262              return self.Decoder(data) 
263   
264          except IOError: 
265              return default 
266   
267          except Exception as e: 
268              self.session.logging.error( 
269                  "Cannot parse profile %s because of decoding error '%s'.", 
270                  name, e) 
271              return default 
 272   
274          """Stores the data in the named container member. 
275   
276          This serializes the data and stores it in the named member. Not all 
277          types of data are serializable, so this may raise. For example, when 
278          using JSON to store the data, arbitrary python objects may not be used. 
279   
280          Args: 
281            name: The name under which the data will be stored. 
282            data: The data to store. 
283   
284          Common options: 
285            raw: If true we write the data directly without encoding to json. In 
286              this case data should be a string. 
287            uncompressed: File will not be compressed (default gzip compression). 
288          """ 
289          try: 
290              to_write = self.Encoder(data, **options) 
291          except EncodeError: 
292              self.session.logging.error("Unable to serialize %s", name) 
293              return 
294   
295          self._StoreData(name, to_write, **options) 
296   
297           
298          if name != "inventory": 
299              self.inventory.setdefault("$INVENTORY", {})[name] = dict( 
300                  LastModified=time.time()) 
301   
302              self.FlushInventory() 
 303   
308   
311   
312 -    def __exit__(self, exc_type, exc_value, traceback): 
  314   
317      """An IOManager which stores everything in files. 
318   
319      We prefer to store the profile file as a gzip compressed file within a 
320      versioned directory. For example the profile: 
321   
322      nt/GUID/BF9E190359784C2D8796CF5537B238B42 
323   
324      will be stored in: 
325   
326      $urn/nt/GUID/BF9E190359784C2D8796CF5537B238B42.gz 
327   
328      Where $urn is the path where the DirectoryIOManager was initialized with. 
329      """ 
330   
331       
332       
333      EXCLUDED_PATH_PREFIX = [] 
334   
335 -    def __init__(self, urn=None, **kwargs): 
 344   
345      @utils.safe_property 
347           
348          if self._inventory is None: 
349              self._inventory = self.GetData("inventory", default={}) 
350              if not self._inventory: 
351                  self._inventory = self.RebuildInventory() 
352   
353          return self._inventory 
 354   
356          """Rebuild the inventory file.""" 
357          result = { 
358              "$METADATA": dict( 
359                  Type="Inventory", 
360                  ProfileClass="Inventory"), 
361              "$INVENTORY": {}, 
362          } 
363          for member in self.ListFiles(): 
364              if not self._is_excluded_member(member): 
365                  result["$INVENTORY"][member] = self.Metadata(member) 
366   
367          return result 
 368   
373   
375          """Checks the validity of the inventory and if the path exists in it. 
376   
377          The inventory is a json object at the root of the repository which lists 
378          all the profiles in this repository. It allows us to determine quickly 
379          if a profile exists in this repository. 
380          """ 
381          if self.ValidateInventory(): 
382              path = self.GetAbsolutePathName(path) 
383              return os.access(path, os.R_OK) or os.access(path + ".gz", os.R_OK) 
384          return False 
 385   
400   
410   
419   
421          try: 
422              os.makedirs(dirname) 
423          except OSError: 
424              pass 
 425   
427          top_level = os.path.join(self.dump_dir, self.version) 
428          for root, _, files in os.walk(top_level): 
429              for f in files: 
430                  path = os.path.normpath(os.path.join(root, f)) 
431   
432                  if path.endswith(".gz"): 
433                      path = path[:-3] 
434   
435                   
436                  yield path[len(top_level) + 1:] 
 437   
442   
446   
447 -    def Open(self, name): 
 456   
479   
481          return "Directory:%s" % self.dump_dir 
  482   
488          self.name = name 
489          self.manager = manager 
490          StringIO.StringIO.__init__(self) 
 491   
494   
495 -    def __exit__(self, exc_type, exc_value, traceback): 
 496          if exc_type is None: 
497              self.Close() 
498          else: 
499              self.manager._Cancel(self.name) 
 500   
506      """An IO Manager which stores files in a zip archive.""" 
507   
508      order = 50 
509   
510 -    def __init__(self, urn=None, fd=None, **kwargs): 
 511          super(ZipFileManager, self).__init__(**kwargs) 
512          if fd is None and not urn.lower().endswith("zip"): 
513              if self.mode == "w": 
514                  raise IOManagerError( 
515                      "Zip files must have the .zip extensions.") 
516   
517          self.fd = fd 
518          if urn is not None: 
519              self.location = self.file_name = os.path.normpath( 
520                  os.path.abspath(urn)) 
521              self.canonical_name = os.path.splitext(os.path.basename(urn))[0] 
522   
523          self._OpenZipFile() 
524   
525           
526           
527          self._outstanding_writers = set() 
 528   
529      @utils.safe_property 
531          """We do not really need an inventory for zip files. 
532   
533          We return a fake one based on the zip file's modification time. 
534          """ 
535          result = {} 
536          for zipinfo in self.zip.filelist: 
537              result[zipinfo.filename] = zipinfo.date_time 
538   
539          return { 
540              "$INVENTORY": result 
541          } 
 542   
545   
547          try: 
548              if self.fd is None: 
549                  self.zip = zipfile.ZipFile( 
550                      self.file_name, mode=mode or self.mode, 
551                      compression=zipfile.ZIP_DEFLATED) 
552   
553              elif self.mode == "r": 
554                  self.zip = zipfile.ZipFile(self.fd, mode="r") 
555   
556              elif self.mode == "a": 
557                  self.zip = zipfile.ZipFile(self.fd, mode="a") 
558   
559          except zipfile.BadZipfile: 
560              raise IOManagerError("Unable to read zipfile.") 
 561   
563          return self.zip.namelist() 
 564   
566          self._outstanding_writers.remove(name) 
 567   
568 -    def _Write(self, name, data): 
 569          self.zip.writestr(name, data) 
570          self._outstanding_writers.remove(name) 
571          if not self._outstanding_writers: 
572              self.zip.close() 
573   
574               
575              self._OpenZipFile(mode="a") 
 576   
584   
586          _ = name 
587          raise IOManagerError( 
588              "Removing a file from zipfile is not supported. Use a different " 
589              "IOManager subclass.") 
 590   
591 -    def Open(self, name): 
 592          if self.mode not in ["r", "a"]: 
593              raise IOManagerError("Container not opened for reading.") 
594          if self.zip is None: 
595              self._OpenZipFile() 
596   
597          try: 
598              return self.zip.open(name) 
599          except KeyError as e: 
600              raise IOManagerError(e) 
 601   
603          self._outstanding_writers.add(self) 
604          return self 
 605   
606 -    def __exit__(self, exc_type, exc_value, traceback): 
 607          self._outstanding_writers.remove(self) 
608          if exc_type is None and not self._outstanding_writers: 
609              self.zip.close() 
610              if self.mode in ["w", "a"]: 
611                  self._OpenZipFile(mode="a") 
 612   
615   
617          return "ZipFile:%s" % self.file_name 
  618   
621      """Supports opening profile repositories hosted over the web.""" 
622   
623 -    def __init__(self, urn=None, mode="r", **kwargs): 
 624          super(URLManager, self).__init__(urn=urn, mode=mode, **kwargs) 
625          if mode != "r": 
626              raise IOManagerError("%s supports only reading." % 
627                                   self.__class__.__name__) 
628   
629          self.url = urlparse.urlparse(urn) 
630          if self.url.scheme not in ("http", "https"): 
631              raise IOManagerError("%s supports only http protocol." % 
632                                   self.__class__.__name__) 
 633   
637   
641   
643          url = self.url._replace(path="%s/%s/%s" % ( 
644              self.url.path, self.version, name)) 
645          return urlparse.urlunparse(url) 
 646   
647 -    def Open(self, name): 
 648          url = self._GetURL(name) 
649   
650          try: 
651               
652               
653              fd = urllib2.urlopen(url + ".gz", timeout=10) 
654              self.session.logging.debug("Opened url %s.gz" % url) 
655              return gzip.GzipFile( 
656                  fileobj=StringIO.StringIO(fd.read(MAX_DATA_SIZE))) 
657          except urllib2.HTTPError: 
658               
659              self.session.logging.debug("Opened url %s" % url) 
660              return urllib2.urlopen(url, timeout=10) 
 661   
663          return "URL:%s" % self.urn 
  664   
665   
666 -def Factory(urn, mode="r", session=None, **kwargs): 
 667      """Try to instantiate the IOManager class.""" 
668      for cls in sorted(IOManager.classes.values(), key=lambda x: x.order): 
669          try: 
670              return cls(urn=urn, mode=mode, session=session, **kwargs) 
671          except IOError: 
672              pass 
673   
674      raise IOManagerError( 
675          "Unable to find any managers which can work on %s" % urn) 
 676