1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """IO Abstraction for Rekall.
22
23 Since Rekall is a library it should never directly access files: it may be
24 running on an environment which has no filesystem access for example, or the
25 files may be stored in an unusual way.
26
27 In order to ensure that the file storage mechanism does not need to be hardcoded
28 in each module, Rekall has an abstracted filesystem access mechanism implemented
29 through the IO Manager.
30
31 The session object should contain an instance of the IOManager() class at the
32 io_manager attribute, which will be used to create new files, or read from
33 existing files.
34 """
35
36 __author__ = "Michael Cohen <scudette@google.com>"
37
38 import StringIO
39 import gzip
40 import json
41 import time
42 import os
43 import shutil
44 import urllib2
45 import urlparse
46 import zipfile
47
48 from rekall import constants
49 from rekall import obj
50 from rekall_lib import registry
51 from rekall_lib import utils
52
53
54
55 MAX_DATA_SIZE = 100000000
59 """An IOError from the IO Manager."""
60
63 """Raised when unable to encode to the IO Manager."""
64
67 """Raised when unable to decode to the IO Manager."""
68
71 """The baseclass for abstracted IO implementations.
72
73 The IO manager classes are responsible for managing access to profiles. A
74 profile is a JSON dict which is named using a standard notation. For
75 example, the profile for a certain NT kernel is:
76
77 nt/GUID/BF9E190359784C2D8796CF5537B238B42
78
79 The IO manager may actually store the profile file using some other scheme,
80 but that internal scheme is private to itself.
81 """
82
83 __metaclass__ = registry.MetaclassRegistry
84 __abstract = True
85
86 order = 100
87
90 """Initialize the IOManager.
91
92 Args:
93
94 urn: The path to the IO manager. This might contain a scheme or
95 protocol specific to a certain IO manager implementation.
96
97 mode: Can be "r" or "w".
98
99 session: The session object.
100
101 pretty_print: If specified we dump sorted yaml data - this ends up
102 being more compressible in reality.
103
104 version: The required version of the repository. The IOManager is free
105 to implement arbitrary storage for different versions if
106 required. Versioning the repository allows us to update the
107 repository file format transparently without affecting older
108 Rekall versions.
109
110 """
111 self.mode = mode
112 self.urn = urn
113 self.version = version
114 if session == None:
115 raise RuntimeError("Session must be set")
116
117 self.session = session
118 self.pretty_print = pretty_print
119 self._inventory = None
120 self.location = ""
121 self._dirty = False
122
123 @utils.safe_property
125 if self._inventory is None:
126 self._inventory = self.GetData("inventory", default={})
127
128 return self._inventory
129
131 try:
132 metadata = self.inventory.get("$METADATA")
133 if (metadata.get("ProfileClass") == "Inventory"
134 and metadata.get("Type") == "Inventory"):
135 return True
136 except (AttributeError, IndexError, ValueError):
137 pass
138
139 self.session.logging.warn(
140 'Inventory for repository "%s" seems malformed. Are you behind a '
141 'captive portal or proxy? If this is a custom repository, did you '
142 'forget to create an inventory? You must use the '
143 'tools/profiles/build_profile_repo.py tool with the --inventory '
144 'flag.', self.location or self.urn)
145
146
147
148
149 if not self._inventory:
150 self._inventory = {
151 "$METADATA": dict(
152 Type="Inventory",
153 ProfileClass="Inventory"),
154 "$INVENTORY": {},
155 }
156
157 return False
158
160 """Checks the validity of the inventory and if the path exists in it.
161
162 The inventory is a json object at the root of the repository which lists
163 all the profiles in this repository. It allows us to determine quickly
164 if a profile exists in this repository.
165 """
166 if self.ValidateInventory():
167 return path in self.inventory.get("$INVENTORY")
168
169 return False
170
175
181
183 """Write the inventory to the storage."""
184 if not self._dirty:
185 return
186
187 self.inventory.setdefault("$METADATA", dict(
188 Type="Inventory",
189 ProfileClass="Inventory"))
190 self.inventory.setdefault("$INVENTORY", dict())
191
192 self.StoreData("inventory", self.inventory)
193 self._dirty = False
194
196 """Returns a generator over all the files in this container."""
197 return []
198
199 - def Create(self, name, **options):
200 """Creates a new file in the container.
201
202 Returns a file like object which should support the context manager
203 protocol. If the file already exists in the container, overwrite it.
204
205 For example:
206
207 with self.session.io_manager.Create("foobar") as fd:
208 fd.Write("hello world")
209
210 Args:
211 name: The name of the new file.
212 """
213
215 """Destroys the file/directory at name's path."""
216
217 - def Open(self, name):
218 """Opens a container member for reading.
219
220 This should return a file like object which provides read access to
221 container members.
222
223 Raises:
224 IOManagerError: If the file is not found.
225 """
226
227 - def Encoder(self, data, **options):
228 if options.get("raw"):
229 return utils.SmartStr(data)
230
231 if self.pretty_print:
232 return utils.PPrint(data)
233
234 return json.dumps(data, sort_keys=True, **options)
235
237 return json.loads(raw)
238
239 - def GetData(self, name, raw=False, default=None):
240 """Get the data object stored at container member.
241
242 This returns an arbitrary python object which is stored in the named
243 container member. For example, normally a dict or list. This function
244 wraps the Open() method above and add deserialization to retrieve the
245 actual object.
246
247 Returns None if the file is not found.
248
249 Args:
250 name: The name to retrieve the data under.
251 raw: If specified we do not parse the data, simply return it as is.
252 """
253 if default is None:
254 default = obj.NoneObject()
255
256 try:
257 fd = self.Open(name)
258 data = fd.read(MAX_DATA_SIZE)
259 if raw:
260 return data
261
262 return self.Decoder(data)
263
264 except IOError:
265 return default
266
267 except Exception as e:
268 self.session.logging.error(
269 "Cannot parse profile %s because of decoding error '%s'.",
270 name, e)
271 return default
272
274 """Stores the data in the named container member.
275
276 This serializes the data and stores it in the named member. Not all
277 types of data are serializable, so this may raise. For example, when
278 using JSON to store the data, arbitrary python objects may not be used.
279
280 Args:
281 name: The name under which the data will be stored.
282 data: The data to store.
283
284 Common options:
285 raw: If true we write the data directly without encoding to json. In
286 this case data should be a string.
287 uncompressed: File will not be compressed (default gzip compression).
288 """
289 try:
290 to_write = self.Encoder(data, **options)
291 except EncodeError:
292 self.session.logging.error("Unable to serialize %s", name)
293 return
294
295 self._StoreData(name, to_write, **options)
296
297
298 if name != "inventory":
299 self.inventory.setdefault("$INVENTORY", {})[name] = dict(
300 LastModified=time.time())
301
302 self.FlushInventory()
303
308
311
312 - def __exit__(self, exc_type, exc_value, traceback):
314
317 """An IOManager which stores everything in files.
318
319 We prefer to store the profile file as a gzip compressed file within a
320 versioned directory. For example the profile:
321
322 nt/GUID/BF9E190359784C2D8796CF5537B238B42
323
324 will be stored in:
325
326 $urn/nt/GUID/BF9E190359784C2D8796CF5537B238B42.gz
327
328 Where $urn is the path where the DirectoryIOManager was initialized with.
329 """
330
331
332
333 EXCLUDED_PATH_PREFIX = []
334
335 - def __init__(self, urn=None, **kwargs):
344
345 @utils.safe_property
347
348 if self._inventory is None:
349 self._inventory = self.GetData("inventory", default={})
350 if not self._inventory:
351 self._inventory = self.RebuildInventory()
352
353 return self._inventory
354
356 """Rebuild the inventory file."""
357 result = {
358 "$METADATA": dict(
359 Type="Inventory",
360 ProfileClass="Inventory"),
361 "$INVENTORY": {},
362 }
363 for member in self.ListFiles():
364 if not self._is_excluded_member(member):
365 result["$INVENTORY"][member] = self.Metadata(member)
366
367 return result
368
373
375 """Checks the validity of the inventory and if the path exists in it.
376
377 The inventory is a json object at the root of the repository which lists
378 all the profiles in this repository. It allows us to determine quickly
379 if a profile exists in this repository.
380 """
381 if self.ValidateInventory():
382 path = self.GetAbsolutePathName(path)
383 return os.access(path, os.R_OK) or os.access(path + ".gz", os.R_OK)
384 return False
385
400
410
419
421 try:
422 os.makedirs(dirname)
423 except OSError:
424 pass
425
427 top_level = os.path.join(self.dump_dir, self.version)
428 for root, _, files in os.walk(top_level):
429 for f in files:
430 path = os.path.normpath(os.path.join(root, f))
431
432 if path.endswith(".gz"):
433 path = path[:-3]
434
435
436 yield path[len(top_level) + 1:]
437
442
446
447 - def Open(self, name):
456
479
481 return "Directory:%s" % self.dump_dir
482
488 self.name = name
489 self.manager = manager
490 StringIO.StringIO.__init__(self)
491
494
495 - def __exit__(self, exc_type, exc_value, traceback):
496 if exc_type is None:
497 self.Close()
498 else:
499 self.manager._Cancel(self.name)
500
503
506 """An IO Manager which stores files in a zip archive."""
507
508 order = 50
509
510 - def __init__(self, urn=None, fd=None, **kwargs):
511 super(ZipFileManager, self).__init__(**kwargs)
512 if fd is None and not urn.lower().endswith("zip"):
513 if self.mode == "w":
514 raise IOManagerError(
515 "Zip files must have the .zip extensions.")
516
517 self.fd = fd
518 if urn is not None:
519 self.location = self.file_name = os.path.normpath(
520 os.path.abspath(urn))
521 self.canonical_name = os.path.splitext(os.path.basename(urn))[0]
522
523 self._OpenZipFile()
524
525
526
527 self._outstanding_writers = set()
528
529 @utils.safe_property
531 """We do not really need an inventory for zip files.
532
533 We return a fake one based on the zip file's modification time.
534 """
535 result = {}
536 for zipinfo in self.zip.filelist:
537 result[zipinfo.filename] = zipinfo.date_time
538
539 return {
540 "$INVENTORY": result
541 }
542
545
547 try:
548 if self.fd is None:
549 self.zip = zipfile.ZipFile(
550 self.file_name, mode=mode or self.mode,
551 compression=zipfile.ZIP_DEFLATED)
552
553 elif self.mode == "r":
554 self.zip = zipfile.ZipFile(self.fd, mode="r")
555
556 elif self.mode == "a":
557 self.zip = zipfile.ZipFile(self.fd, mode="a")
558
559 except zipfile.BadZipfile:
560 raise IOManagerError("Unable to read zipfile.")
561
563 return self.zip.namelist()
564
566 self._outstanding_writers.remove(name)
567
568 - def _Write(self, name, data):
569 self.zip.writestr(name, data)
570 self._outstanding_writers.remove(name)
571 if not self._outstanding_writers:
572 self.zip.close()
573
574
575 self._OpenZipFile(mode="a")
576
584
586 _ = name
587 raise IOManagerError(
588 "Removing a file from zipfile is not supported. Use a different "
589 "IOManager subclass.")
590
591 - def Open(self, name):
592 if self.mode not in ["r", "a"]:
593 raise IOManagerError("Container not opened for reading.")
594 if self.zip is None:
595 self._OpenZipFile()
596
597 try:
598 return self.zip.open(name)
599 except KeyError as e:
600 raise IOManagerError(e)
601
603 self._outstanding_writers.add(self)
604 return self
605
606 - def __exit__(self, exc_type, exc_value, traceback):
607 self._outstanding_writers.remove(self)
608 if exc_type is None and not self._outstanding_writers:
609 self.zip.close()
610 if self.mode in ["w", "a"]:
611 self._OpenZipFile(mode="a")
612
615
617 return "ZipFile:%s" % self.file_name
618
621 """Supports opening profile repositories hosted over the web."""
622
623 - def __init__(self, urn=None, mode="r", **kwargs):
624 super(URLManager, self).__init__(urn=urn, mode=mode, **kwargs)
625 if mode != "r":
626 raise IOManagerError("%s supports only reading." %
627 self.__class__.__name__)
628
629 self.url = urlparse.urlparse(urn)
630 if self.url.scheme not in ("http", "https"):
631 raise IOManagerError("%s supports only http protocol." %
632 self.__class__.__name__)
633
637
641
643 url = self.url._replace(path="%s/%s/%s" % (
644 self.url.path, self.version, name))
645 return urlparse.urlunparse(url)
646
647 - def Open(self, name):
648 url = self._GetURL(name)
649
650 try:
651
652
653 fd = urllib2.urlopen(url + ".gz", timeout=10)
654 self.session.logging.debug("Opened url %s.gz" % url)
655 return gzip.GzipFile(
656 fileobj=StringIO.StringIO(fd.read(MAX_DATA_SIZE)))
657 except urllib2.HTTPError:
658
659 self.session.logging.debug("Opened url %s" % url)
660 return urllib2.urlopen(url, timeout=10)
661
663 return "URL:%s" % self.urn
664
665
666 -def Factory(urn, mode="r", session=None, **kwargs):
667 """Try to instantiate the IOManager class."""
668 for cls in sorted(IOManager.classes.values(), key=lambda x: x.order):
669 try:
670 return cls(urn=urn, mode=mode, session=session, **kwargs)
671 except IOError:
672 pass
673
674 raise IOManagerError(
675 "Unable to find any managers which can work on %s" % urn)
676