Package rekall :: Package plugins :: Package common :: Package efilter_plugins :: Module search
[frames] | no frames]

Source Code for Module rekall.plugins.common.efilter_plugins.search

   1  # Rekall Memory Forensics 
   2  # Copyright 2014 Google Inc. All Rights Reserved. 
   3  # 
   4  # This program is free software; you can redistribute it and/or modify 
   5  # it under the terms of the GNU General Public License as published by 
   6  # the Free Software Foundation; either version 2 of the License, or (at 
   7  # your option) any later version. 
   8  # 
   9  # This program is distributed in the hope that it will be useful, but 
  10  # WITHOUT ANY WARRANTY; without even the implied warranty of 
  11  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
  12  # General Public License for more details. 
  13  # 
  14  # You should have received a copy of the GNU General Public License 
  15  # along with this program; if not, write to the Free Software 
  16  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
  17  # 
  18   
  19  """Rekall's search function. 
  20   
  21  The following queries should work and not break. 
  22   
  23  1) On Windows with --live API 
  24   
  25  * regex match on array of strings - case insensitive. 
  26   
  27  select proc, proc.environ from pslist() where proc.environ.TMP =~ "temp" 
  28   
  29  2) Format using the hex() method, using as to name columns. 
  30   
  31  select hex(VAD.start) as start, hex(VAD.end) as end, Protect from vad(proc_regex: "rekal") 
  32   
  33  3) Autoselect column names - second column can not clash with first 
  34  column name (should be hex, column 1). 
  35   
  36  select hex(VAD.start), hex(VAD.end), Protect from vad(proc_regex: "rekal") 
  37   
  38  4) Timestamp user function - requires a session to be passed (returns UnixTimeStamp). 
  39   
  40  select timestamp(proc.create_time) from pslist() 
  41   
  42  5) Yarascan with sub query 
  43   
  44  select * from file_yara(paths: (select path.filename from glob("c:\windows\*.exe")).filename, yara_expression: "rule r1 {strings: $a = \"Microsoft\" wide condition: any of them}") 
  45   
  46  6) Parameter interpolations: 
  47   
  48  a =  "select * from file_yara(paths: ( select path.filename from glob({0})).filename, yara_expression: {1})" 
  49   
  50  plugins.search(a, query_parameters=[r"c:\windows\*.exe", "rule r1 {strings: $a = \"Microsoft\" wide condition: any of them}"]) 
  51   
  52  7) WMI integration + unknown field: 
  53   
  54   select Result.Name, Result.SessionId, Result.foo from wmi("select * from Win32_Process") 
  55   
  56   select Result.Name, Result.BootDevice from wmi("select * from Win32_OperatingSystem") 
  57   
  58  8) Describe WMI dynamic query 
  59   
  60  describe wmi, dict(query="select * from Win32_Process") 
  61   
  62  9) Substitute a single string 
  63   
  64  select sub("Microsoft", "MS", Result.Name) from wmi("select * from Win32_OperatingSystem") 
  65   
  66  10) Substiture an array 
  67  select sub("rekal", "REKALL", proc.cmdline) from pslist() 
  68   
  69  """ 
  70   
  71  __author__ = "Adam Sindelar <adamsh@google.com>" 
  72  import itertools 
  73  import re 
  74  import six 
  75   
  76  from efilter import api 
  77  from efilter import ast 
  78  from efilter import errors 
  79  from efilter import protocol 
  80  from efilter import query as q 
  81   
  82  from efilter.ext import row_tuple 
  83   
  84  from efilter.transforms import asdottysql 
  85  from efilter.transforms import solve 
  86  from efilter.transforms import infer_type 
  87   
  88  from efilter.protocols import applicative 
  89  from efilter.protocols import associative 
  90  from efilter.protocols import repeated 
  91  from efilter.protocols import structured 
  92   
  93  from rekall import obj 
  94  from rekall import plugin 
  95  from rekall import testlib 
  96  from rekall.plugins.response import common 
  97  from rekall.plugins.overlays import basic 
  98  from rekall.plugins.common.efilter_plugins import helpers 
  99  from rekall.ui import identity as identity_renderer 
 100  from rekall_lib import utils 
101 102 103 -class TestWhichPlugin(testlib.SimpleTestCase):
104 PLUGIN = "which_plugin" 105 PARAMETERS = dict( 106 commandline="which_plugin %(struct)s", 107 struct="proc" 108 )
109
110 111 -class TestCollect(testlib.SimpleTestCase):
112 PLUGIN = "collect" 113 PARAMETERS = dict( 114 commandline="collect %(struct)s", 115 struct="proc" 116 )
117
118 119 -class TestExplain(testlib.SimpleTestCase):
120 PLUGIN = "explain" 121 PARAMETERS = dict( 122 commandline="explain %(query)r", 123 query="select * from pslist() where (proc.pid == 1)" 124 )
125
126 127 -class TestSearch(testlib.SimpleTestCase):
128 PLUGIN = "search" 129 PARAMETERS = dict( 130 commandline="search %(query)r", 131 query="select * from pslist() where (proc.pid == 1)" 132 )
133
134 135 -class TestLookup(testlib.SimpleTestCase):
136 PLUGIN = "lookup" 137 PARAMETERS = dict( 138 commandline="lookup %(constant)r %(type_name)r", 139 constant="_PE_state", 140 type_name="PE_state" 141 )
142
143 144 -class FindPlugins(plugin.TypedProfileCommand, plugin.ProfileCommand):
145 """Find which plugin(s) are available to produce the desired output.""" 146 147 name = "which_plugin" 148 149 type_name = None 150 producers_only = False 151 152 __args = [ 153 dict(name="type_name", required=True, positional=True, 154 help="The name of the type we're looking for. " 155 "E.g.: 'proc' will find psxview, pslist, etc."), 156 157 dict(name="producers_only", required=False, type="Boolean", 158 help="Only include producers: plugins that output " 159 "only this struct and have no side effects.") 160 ] 161
162 - def collect(self):
163 if self.plugin_args.producers_only: 164 pertinent_cls = plugin.Producer 165 else: 166 pertinent_cls = plugin.TypedProfileCommand 167 168 for plugin_cls in plugin.Command.classes.itervalues(): 169 if not plugin_cls.is_active(self.session): 170 continue 171 172 if not issubclass(plugin_cls, pertinent_cls): 173 continue 174 175 table_header = plugin_cls.table_header 176 if table_header: 177 if isinstance(table_header, list): 178 table_header = plugin.PluginHeader(*table_header) 179 180 try: 181 for t in table_header.types_in_output: 182 if (isinstance(t, type) and 183 self.plugin_args.type_name == t.__name__): 184 yield plugin_cls(session=self.session) 185 elif self.plugin_args.type_name == t: 186 yield plugin_cls(session=self.session) 187 except plugin.Error: 188 # We were unable to instantiate this plugin to figure out 189 # what it wants to emit. We did our best so move on. 190 continue
191
192 - def render(self, renderer):
193 renderer.table_header([ 194 dict(name="plugin", type="Plugin", style="compact", width=30) 195 ]) 196 197 for command in self.collect(): 198 renderer.table_row(command)
199
200 201 -class Collect(plugin.TypedProfileCommand, plugin.ProfileCommand):
202 """Collect instances of struct of type 'type_name'. 203 204 This plugin will find all other plugins that produce 'type_name' and merge 205 all their output. For example, running collect 'proc' will give you a 206 rudimentary psxview. 207 208 This plugin is mostly used by other plugins, like netstat and psxview. 209 """ 210 211 name = "collect" 212 213 type_name = None 214 215 __args = [ 216 dict(name="type_name", required=True, positional=True, 217 help="The type (struct) to collect.") 218 ] 219 220 @classmethod
221 - def GetPrototype(cls, session):
222 """Instantiate with suitable default arguments.""" 223 return cls(None, session=session)
224
225 - def collect(self):
226 which = self.session.plugins.which_plugin( 227 type_name=self.plugin_args.type_name, 228 producers_only=True) 229 230 results = {} 231 for producer in which.collect(): 232 # We know the producer plugin implements 'produce' because 233 # 'which_plugin' guarantees it. 234 self.session.logging.debug("Producing %s from producer %r", 235 self.type_name, producer) 236 for result in producer.produce(): 237 previous = results.get(result.indices) 238 if previous: 239 previous.obj_producers.add(producer.name) 240 else: 241 result.obj_producers = set([producer.name]) 242 results[result.indices] = result 243 244 return results.itervalues()
245
246 - def render(self, renderer):
247 renderer.table_header([ 248 dict(name=self.plugin_args.type_name, 249 type=self.plugin_args.type_name), 250 dict(name="producers") 251 ]) 252 253 for result in self.collect(): 254 renderer.table_row(result, result.obj_producers)
255
256 257 -class Lookup(plugin.TypedProfileCommand, plugin.ProfileCommand):
258 """Lookup a global in the profile. 259 260 This plugin lets the user ask for a specific global constant in the 261 active profile. 262 """ 263 264 name = "lookup" 265 266 __args = [ 267 dict(name="constant", required=True, positional=True, 268 help="The constant to look up in the profile."), 269 dict(name="target", positional=True, default=None, 270 help="The type of the constant."), 271 dict(name="target_args", positional=True, default=None, 272 help="The target args"), 273 ] 274 275 table_header = [ 276 dict(name="field") 277 ] 278
279 - def collect(self):
280 yield dict(field=self.session.address_resolver.get_constant_object( 281 self.plugin_args.constant, 282 target=self.plugin_args.target, 283 target_args=self.plugin_args.target_args))
284
285 286 -class CommandWrapper(object):
287 """Wraps a plugin and its output for the purpose of EFILTER searches. 288 289 This is a helper class for the Search plugin. It lets us pretend that 290 plugins are functions to be called from inside EFILTER queries, and also 291 takes care of running the plugin and saving its output and headers. 292 293 Members: 294 plugin_cls: The type of the Command subclass. 295 rows: Output of rendering the plugin. 296 columns: How 'rows' are structured. 297 table_header: If Command is a subclass of TypedProfileCommand then this 298 will contain its table header once applied. 299 """ 300 plugin_cls = None 301 plugin_obj = None 302 303 rows = None 304 columns = None 305 306 session = None 307 308 # Once the CommandWrapper is run, this will be set to the arguments that 309 # were used. You cannot apply the same CommandWrapper twice with different 310 # args. If you need to do that, create two instances of CommandWrapper. 311 _applied_args = None 312
313 - def __init__(self, plugin_cls, session):
314 self.plugin_cls = plugin_cls 315 self.session = session
316
317 - def __repr__(self):
318 return "<CommandWrapper: %r>" % (self.plugin_cls.__name__)
319 320 # IApplicative 321
322 - def apply(self, args, kwargs):
323 """Instantiate the plugin with given args and run it. 324 325 This caches the output of the plugin. Subsequently, table_header, 326 rows and columns will be populated. 327 328 The CommmandWrapper must not be applied twice with different 329 arguments - each instance represents a unique application. 330 331 Arguments: 332 args, kwargs: Arguments to the plugin. 333 """ 334 if self._applied_args is not None: 335 # Called before. Return what we cached. 336 if self._applied_args != (args, kwargs): 337 raise ValueError( 338 "%r was previously called with %r but is now being called" 339 " with %r. This should never happen." 340 % (self, self._applied_args, (args, kwargs))) 341 342 return self.rows 343 344 kwargs = kwargs.copy() 345 kwargs.pop("vars", None) 346 self._applied_args = (args, kwargs) 347 348 # First time - instantiate the plugin with arguments. 349 plugin_curry = getattr(self.session.plugins, self.plugin_cls.name) 350 self.plugin_obj = plugin_curry(session=self.session, 351 *args, **kwargs) 352 353 output_header = getattr(self.plugin_cls, "table_header", None) 354 collector = getattr(self.plugin_obj, "collect_as_dicts", None) 355 356 if callable(collector) and output_header is not None: 357 # The plugin supports the collect API and declares its output ahead 358 # of time. This is the ideal case. 359 self.columns = output_header 360 self.rows = repeated.lazy(collector) 361 else: 362 # We don't know enough about the plugin to do the easy thing. We 363 # need to create a shim renderer that will cache the plugin output 364 # and then use that. 365 renderer = identity_renderer.IdentityRenderer(session=self.session) 366 with renderer.start(): 367 self.session.RunPlugin(self.plugin_cls.name, format=renderer, 368 *args, **kwargs) 369 370 # The identity renderer will now contain the plugin output and 371 # columns. 372 self.columns = renderer.columns 373 self.rows = repeated.repeated(*list(renderer.rows)) 374 375 return self.rows
376
377 - def reflect_runtime_return(self):
378 """Return the return type* of this CommandWrapper. 379 380 This actually returns a dummy instance (prototype) of the plugin this 381 CommandWrapper wraps. EFILTER allows use of stand-in objects for type 382 inference. We make heavy use of prototypes to represent Rekall's 383 profile-dependent type system. 384 """ 385 # Does this plugin implement the reflection helper? 386 try: 387 return self.plugin_cls.GetPrototype(session=self.session) 388 except NotImplementedError: 389 # GetPrototype is not overriden and the default implementation 390 # didn't work. 391 return None
392 393 394 # Implementing the IApplicative protocol will let EFILTER call the 395 # CommandWrapper as though it were a function. 396 applicative.IApplicative.implicit_static(CommandWrapper)
397 398 399 -class EfilterPlugin(plugin.TypedProfileCommand, plugin.Command):
400 401 """Abstract base class for plugins that do something with queries. 402 403 Provides implementations of the basic EFILTER protocols for selecting and 404 inspecting the output of plugins. Search and Explain extend this. 405 """ 406 __abstract = True 407 408 query = None # The Query instance we're working with. 409 query_source = None # The source of the query, passed by the user. 410 query_error = None # An exception, if any, caused when parsing the query. 411 412 __args = [ 413 dict(name="query", required=True, positional=True, 414 help="The dotty/EFILTER query to run."), 415 416 dict(name="query_parameters", type="ArrayString", positional=True, 417 help="Positional parameters for parametrized queries."), 418 ] 419
420 - def __init__(self, *args, **kwargs):
421 super(EfilterPlugin, self).__init__(*args, **kwargs) 422 423 try: 424 self.query = q.Query(self.plugin_args.query, 425 params=self.plugin_args.query_parameters) 426 self.scopes = self._get_scopes() 427 except errors.EfilterError as error: 428 raise plugin.PluginError("Could not parse your query %r: %s." % ( 429 self.plugin_args.query, error)) 430 431 except Exception: 432 # I am using a broad except here to make sure we always display a 433 # friendly error message. EFILTER will usually raise a friendly 434 # error, but we might get a non-EfilterError exception if the user 435 # gets creative (e.g. passing a custom object as query, instead of a 436 # string). 437 raise plugin.PluginError("Could not parse your query %r." % ( 438 self.plugin_args.query,))
439
440 - def _get_scopes(self):
441 """Builds the scopes for this query.""" 442 scopes = helpers.EFILTER_SCOPES.copy() 443 scopes["timestamp"] = api.user_func( 444 lambda x, **_: basic.UnixTimeStamp(value=x, session=self.session), 445 arg_types=[float, int, long]) 446 447 # This function is used to indicate that the string represents 448 # a filename. This will cause the agent to upload it if the 449 # user requested uploading files. 450 # > select file(path.filename.name).filename.name from glob("/*") 451 scopes["file"] = api.user_func( 452 lambda x: common.FileInformation(session=self.session, filename=x), 453 arg_types=[unicode, str]) 454 return scopes
455 456 # IStructured implementation for EFILTER:
457 - def resolve(self, name):
458 """Find and return a CommandWrapper for the plugin 'name'.""" 459 function = self.scopes.get(name) 460 if function: 461 return function 462 463 meta = self.session.plugins.plugin_db.GetActivePlugin(name) 464 if meta != None: 465 wrapper = CommandWrapper(meta.plugin_cls, self.session) 466 return wrapper 467 468 raise KeyError("No plugin named %r." % name)
469
470 - def getmembers_runtime(self):
471 """Get all available plugins.""" 472 result = dir(self.session.plugins) 473 result += self.scopes.keys() 474 475 return frozenset(result)
476
477 - def reflect_runtime_member(self, name):
478 """Find the type* of 'name', which is a plugin. 479 480 * This returns a CommandWrapper which allows plugins to be called from 481 EFILTER queries as functions. EFILTER allows the use of stand-in objects 482 as proxies for actual types, so we make heavy use of plugin and struct 483 prototypes to represent Rekall's profile-dependent type system. 484 """ 485 cls = self.session.plugins.plugin_db.GetActivePlugin(name).plugin_cls 486 return CommandWrapper(cls, self.session)
487 488 # Plugin methods: 489
490 - def render_error(self, renderer):
491 """Render the query parsing error in a user-friendly manner.""" 492 renderer.section("Query Error") 493 494 try: 495 start = self.query_error.adjusted_start 496 end = self.query_error.adjusted_end 497 source = self.query_error.source 498 text = self.query_error.text 499 except AttributeError: 500 # Maybe query_error isn't a subclass of EfilterError. Let's be 501 # careful. 502 start = None 503 end = None 504 source = self.query_source 505 text = str(self.query_error) 506 507 if start is not None and end is not None: 508 renderer.format( 509 "EFILTER error ({}) {} at position {}-{} in query:\n{}\n\n", 510 type(self.query_error).__name__, repr(text), start, end, 511 utils.AttributedString( 512 source, 513 [dict(start=start, end=end, fg="RED", bold=True)])) 514 else: 515 renderer.format( 516 "EFILTER error ({}) {} in query:\n{}\n", 517 type(self.query_error).__name__, repr(text), source)
518
519 - def render(self, renderer):
520 raise NotImplementedError()
521 522 523 524 525 structured.IStructured.implicit_dynamic(EfilterPlugin)
526 527 528 -class Search(EfilterPlugin):
529 """ 530 Searches and recombines output of other plugins. 531 532 Search allows you to use the EFILTER search engine to filter, transform 533 and combine output of most Rekall plugins. The most common use for this 534 is running IOCs. 535 536 ## Some examples 537 538 * Find the process with pid 1: 539 540 ``` 541 select * pslist() where proc.pid == 1 542 ``` 543 544 * Sort lsof output by file descriptor: 545 546 ``` 547 select * from lsof() order by fd 548 ``` 549 550 * Filter and sort through lsof in one step: 551 552 ``` 553 select * from lsof() where proc.name =~ "rekall" order by fd 554 ``` 555 556 * Is there any proc with PID 1, that has a TCPv6 connection and 557 isn't a dead process? 558 559 ``` 560 search("(any lsof where (proc.pid == 1 and fileproc.human_type == 'TCPv6')) 561 and not (any dead_procs where (proc.pid == 1))") 562 ``` 563 564 Note: "ANY" is just a short hand for "SELECT ANY FROM" which does what 565 it sounds like, and returns True or False depending on whether the 566 query has any results. 567 568 You will probably need to use the *describe* plugin to help 569 discover the exact column structure. 570 571 572 * regex match on array of strings - case insensitive. 573 574 ``` 575 (Windows) 576 select proc, proc.environ from pslist() where 577 proc.environ.TMP =~ "temp" 578 579 (Linux) 580 select proc, proc.environ from pslist() where 581 proc.environ.PATH =~ "home" 582 ``` 583 584 * Format using the hex() method, using *as* to name columns. 585 586 ``` 587 (Windows) 588 select hex(VAD.start) as start, hex(VAD.end) as end, 589 Protect from vad(proc_regex: "rekal") 590 591 (Linux) 592 select hex(start) as start, hex(end) as end, filename 593 from maps(proc_regex: "rekall") 594 ``` 595 596 * Autoselect column names - second column can not clash with first 597 column name (should be hex, column 1). 598 599 ``` 600 (Windows) 601 select hex(VAD.start), hex(VAD.end), Protect 602 from vad(proc_regex: "rekal") 603 604 (Linux) 605 select hex(start), hex(end), filename from maps(proc_regex: "rekall") 606 ``` 607 * Timestamp user function 608 609 ``` 610 select proc, timestamp(proc.create_time) from pslist() 611 ``` 612 613 * Yarascan with sub query 614 615 ``` 616 select * from file_yara( 617 paths: ( 618 select path.filename from glob( 619 "c:\windows\*.exe")).filename, 620 yara_expression: "rule r1 {strings: $a = \"Microsoft\" wide condition: any of them}") 621 ``` 622 623 On Linux: 624 ``` 625 select * from file_yara( 626 paths: ( 627 select path.filename from glob( 628 "/home/*/.ssh/*")).filename, 629 yara_expression: "rule r1 {strings: $a = \"ssh-rsa\" condition: any of them}") 630 ``` 631 632 * Parameter interpolations: 633 634 ``` 635 a = "select * from file_yara(paths: ( select path.filename from glob({0})).filename, yara_expression: {1})" 636 637 search a, [r"c:\windows\*.exe", 638 "rule r1 {strings: $a = \"Microsoft\" wide condition: any of them}"] 639 ``` 640 * WMI integration + unknown field: 641 642 ``` 643 select Result.Name, Result.SessionId, Result.foo 644 from wmi("select * from Win32_Process") 645 646 select Result.Name, Result.BootDevice 647 from wmi("select * from Win32_OperatingSystem") 648 ``` 649 650 * Describe WMI dynamic query 651 652 ``` 653 describe wmi, dict(query="select * from Win32_Process") 654 ``` 655 656 * Substitute a single string 657 658 ``` 659 select sub("Microsoft", "MS", Result.Name) 660 from wmi("select * from Win32_OperatingSystem") 661 ``` 662 * Substiture an array 663 664 ``` 665 select sub("rekal", "REKALL", proc.cmdline) from pslist() 666 ``` 667 """ 668 name = "search" 669 670 __args = [ 671 dict(name="silent", default=False, type="Boolean", 672 help="Queries should fail silently."), 673 ] 674
675 - def collect(self):
676 """Return the search results without displaying them. 677 678 Returns: 679 A list of results from the query solver. 680 681 Raises: 682 EfilterError unless 'silent' flag was set. 683 """ 684 try: 685 result = self.solve() 686 return repeated.getvalues(result) 687 except errors.EfilterError: 688 if self.plugin_args.silent: 689 return None 690 691 raise
692
693 - def solve(self):
694 """Return the search results exactly as EFILTER returns them. 695 696 Returns: 697 Depends on the query. 698 699 Raises: 700 EfilterError if anything goes wrong. 701 """ 702 return solve.solve(self.query, self).value or []
703 704 @utils.safe_property
705 - def first_result(self):
706 """Get only the first search result. 707 708 This is useful when we need to find a concrete structure for some other 709 purpose, such as finding a concrete allocator zone when writing a 710 'dump_zone' plugin. 711 """ 712 try: 713 for result in self.collect(): 714 return result 715 except (TypeError, ValueError): 716 return None
717
718 - def _render_plugin_output(self, renderer, table_header, rows):
719 """Used to render search results if they come from a plugin.""" 720 columns = [] 721 for column in table_header or []: 722 column_name = column.get("name") 723 columns.append(column_name) 724 725 if column_name is None: 726 raise ValueError( 727 "Column spec %r is missing a name. Full header was: %r." % 728 (column, table_header)) 729 730 try: 731 for row in rows: 732 renderer.table_row(*[row.get(key) for key in columns]) 733 except errors.EfilterError as error: 734 # Because 'rows' could be a lazy iterator it's possible that an 735 # exception will get raised while output is already being rendered. 736 self.query_error = error 737 return self.render_error(renderer)
738
739 - def _render_dicts(self, renderer, rows):
740 """Used to render search results if they are basic dicts.""" 741 try: 742 for row in rows: 743 renderer.table_row(*row.itervalues()) 744 except errors.EfilterError as error: 745 self.query_error = error 746 return self.render_error(renderer)
747
748 - def _render_whatever_i_guess(self, renderer, rows):
749 """Used to render search results if we don't know WTF they are.""" 750 try: 751 for row in rows: 752 if isinstance(row, CommandWrapper): 753 raise ValueError( 754 "%(plugin)r is a Rekall plugin and must be called as a" 755 " function. Try '%(name)s()'' instead of '%(name)s'." 756 % dict(plugin=row.plugin_cls, name=row.plugin_cls.name)) 757 renderer.table_row(row) 758 except errors.EfilterError as error: 759 self.query_error = error 760 return self.render_error(renderer)
761
762 - def render(self, renderer):
763 # Do we have a query? 764 if not self.query: 765 return self.render_error(renderer) 766 767 # Figure out what the header should look like. 768 # Can we infer the type? 769 770 # For example for select statements the type will be 771 # associative.IAssociative because they return a dict like result. 772 try: 773 t = infer_type.infer_type(self.query, self) 774 except Exception: 775 t = None 776 777 if isinstance(t, CommandWrapper): 778 raise RuntimeError( 779 "%r is a plugin and must be called as a function. Try '%s()'" 780 " instead of '%s'" 781 % (t.plugin_cls, t.plugin_cls.name, t.plugin_cls.name)) 782 783 # Get the data we're rendering. 784 try: 785 rows = self.collect() or [] 786 except errors.EfilterError as error: 787 self.query_error = error 788 return self.render_error(renderer) 789 790 # If the query returns the output of a plugin then we have to render 791 # the same columns as the plugin. If the plugin declares its columns 792 # then that's easy. Otherwise we have to try and get the columns from 793 # cache. 794 # e.g. select * from pslist() 795 if isinstance(t, plugin.Command): 796 output_header = getattr(t, "table_header", None) 797 if output_header is None: 798 raise plugin.PluginError( 799 "Query is using plugin %s which is not typed." % t.name) 800 801 renderer.table_header(output_header) 802 return self._render_plugin_output(renderer, output_header, rows) 803 804 # For queries which name a list of columns we need to get the first row 805 # to know which columns will be output. Surely efilter can provide this 806 # from the AST? This seems like a hack because if the first row the 807 # plugin produces does not include all the columns we will miss them. 808 # If is also buggy because if the plugin does not produce any rows we 809 # can not know if the query is correct or not. For example "select XXXX 810 # from plugin()" can not raise an unknown column XXXX if the plugin does 811 # not produce at least one row. 812 remaining_rows = iter(rows) 813 try: 814 first_row = next(remaining_rows) 815 except StopIteration: 816 renderer.format("No results.") 817 return 818 819 all_rows = itertools.chain((first_row,), remaining_rows) 820 821 # If we have some output but don't know what it is we can try to use 822 # dict keys as columns. 823 if isinstance(first_row, row_tuple.RowTuple): 824 columns = [dict(name=x) 825 for x in structured.getmembers(first_row)] 826 renderer.table_header(columns, auto_widths=True) 827 return self._render_plugin_output(renderer, columns, all_rows) 828 829 # Sigh. Give up, and render whatever you got, I guess. 830 renderer.table_header([dict(name="result")]) 831 return self._render_whatever_i_guess(renderer, all_rows)
832
833 834 -class Explain(EfilterPlugin):
835 """Prints various information about a query. 836 837 Explains how a query was parsed and how it will be interpreted. It also 838 runs a full type inferencer, to attempt to determine the output of the 839 query once it's executed. 840 841 The Explain plugin can analyse a strict superset of expressions that 842 are valid in the Search plugin. It supports: 843 844 - Any search query that can be passed to Search. 845 - Expressions asking about types and members of profile types 846 (like structs). 847 """ 848 849 name = "explain" 850 851 # As long as this is True, the input is a valid search query and will be 852 # analysed in the output. This may become False if we realize the input 853 # is not a valid search query, but instead asking about something like the 854 # structure of a native type. 855 input_is_regular_query = True 856
857 - def reflect_runtime_member(self, name):
858 """Reflect what Search reflects, and also struct types.""" 859 result = super(Explain, self).reflect_runtime_member(name) 860 861 if not result or result == protocol.AnyType: 862 result = self.session.profile.GetPrototype(name) 863 if result and result != protocol.AnyType: 864 # We found something that makes this not a query (aka a struct). 865 self.input_is_regular_query = False 866 867 return result
868
869 - def getmembers_runtime(self):
870 """Reflect what Search reflects, and also struct types.""" 871 result = super(Explain, self).getmembers_runtime() 872 873 return set(result) | set(self.session.profile.vtypes.iterkeys())
874
875 - def recurse_expr(self, expr, depth):
876 yield expr, depth 877 878 if not isinstance(expr, ast.Expression): 879 return 880 881 for child in expr.children: 882 for expr_, depth in self.recurse_expr(child, depth + 1): 883 yield expr_, depth
884
885 - def _render_node(self, query, node, renderer, depth=1):
886 """Render an AST node and recurse.""" 887 t = infer_type.infer_type(node, self) 888 889 try: 890 name = "(%s) <%s>" % (t.__name__, type(node).__name__) 891 except AttributeError: 892 name = "(%r) <%s>" % (t, type(node).__name__) 893 894 renderer.table_row( 895 name, 896 utils.AttributedString( 897 str(query), 898 [dict(start=node.start, end=node.end, fg="RED", bold=True)] 899 ), 900 depth=depth 901 ) 902 903 for child in node.children: 904 if isinstance(child, ast.Expression): 905 self._render_node(node=child, renderer=renderer, query=query, 906 depth=depth + 1) 907 else: 908 renderer.table_row( 909 "(%s) <leaf: %r>" % (type(child).__name__, child), 910 None, 911 depth=depth + 1 912 )
913
914 - def render(self, renderer):
915 # Do we have a query? 916 if not self.query: 917 return self.render_error(renderer) 918 919 # render_output_analysis must run before render_query_analysis 920 # because it decides whether the input is a regular query. 921 self.render_output_analysis(renderer) 922 self.render_query_analysis(renderer)
923
924 - def render_output_analysis(self, renderer):
925 """Render analysis of the expression's return type and its members.""" 926 output_type = infer_type.infer_type(self.query, self) 927 928 renderer.section("Type Analysis", width=140) 929 renderer.table_header([ 930 dict(name="name", type="TreeNode", max_depth=2, width=60), 931 dict(name="type", width=40) 932 ]) 933 934 renderer.table_row(self.query.source, 935 repr(output_type), 936 depth=1) 937 938 try: 939 for member in structured.getmembers(output_type): 940 subq = "(%s)[%r]" % (self.query.source, member) 941 subtype = infer_type.infer_type(q.Query(subq), self) 942 if isinstance(subtype, type): 943 subtype = subtype.__name__ 944 else: 945 subtype = repr(subtype) 946 947 renderer.table_row(subq, subtype, depth=2) 948 except (NotImplementedError, TypeError, AttributeError): 949 pass
950
951 - def render_query_analysis(self, renderer):
952 """Render query analysis if the input is a regular query. 953 954 A non-regular query could be the user asking us to explain (e.g.) a 955 struct. 956 """ 957 if not self.input_is_regular_query: 958 return 959 960 original_query = self.query.source 961 canonical_query = asdottysql.asdottysql(self.query) 962 963 renderer.section("Query Analysis", width=140) 964 self.render_query(renderer, self.query) 965 966 if canonical_query != original_query: 967 renderer.section("Query Analysis (Using canonical syntax)", 968 width=140) 969 self.render_query(renderer, q.Query(canonical_query))
970
971 - def render_query(self, renderer, query):
972 """Render a single query object's analysis.""" 973 renderer.table_header([ 974 dict(name="expression", type="TreeNode", max_depth=15, width=40), 975 dict(name="query", width=100, nowrap=True), 976 ]) 977 978 self._render_node(query, query.root, renderer)
979 980 981 # Below we implement various EFILTER protocols for various Rekall types. 982 983 984 # Implement IApplicative for Command to get reflection APIs. 985 applicative.IApplicative.implement( 986 for_type=plugin.Command, 987 implementations={ 988 applicative.apply: 989 lambda x, *args, **kwargs: x(*args, **kwargs).collect(), 990 991 # Plugins "return" themselves, as far as the type inference cares. 992 applicative.reflect_runtime_return: lambda command: command 993 } 994 ) 995 996 997 # TypedProfileCommands can reflect a lot about their output columns. 998 # The 'resolve' function will never actually be called on TypeProfileCommand, 999 # because we treat plugins as tables, not rows. 'resolve' will instead be 1000 # passed the rowdicts. 1001 structured.IStructured.implement( 1002 for_type=plugin.TypedProfileCommand, 1003 implementations={ 1004 structured.resolve: lambda _, __: None, # This should not happen. 1005 structured.reflect_runtime_member: 1006 lambda c, name: c.get_column_type(name), 1007 structured.getmembers_runtime: lambda c: c.table_header.all_names 1008 } 1009 ) 1010 1011 1012 # We support IAssociative (plugin[column]) using the same accessors as 1013 # IStructured (plugin.column). We're easy-going like that. 1014 # As with IStructured, the 'select' function doesn't get called on the 1015 # plugin itself, which is why we don't provide a real implementation. 1016 associative.IAssociative.implement( 1017 for_type=plugin.TypedProfileCommand, 1018 implementations={ 1019 associative.select: lambda _, __: None, # This should not happen. 1020 associative.reflect_runtime_key: 1021 lambda c, name: c.get_column_type(name), 1022 associative.getkeys_runtime: lambda c: c.table_header.all_names 1023 } 1024 ) 1025 1026 1027 # Implement IAssociative for Structs because why not. This lets us do 1028 # struct[key] as well as struct.key. 1029 associative.IAssociative.implement( 1030 for_type=obj.Struct, 1031 implementations={ 1032 associative.select: getattr, 1033 associative.reflect_runtime_key: structured.reflect_runtime_member, 1034 associative.getkeys_runtime: structured.getmembers_runtime 1035 } 1036 )
1037 1038 1039 -def Struct_getmembers_runtime(item):
1040 result = set((name for name, _ in item.getproperties())) 1041 result.update(["obj_offset", "obj_type", "obj_name"]) 1042 return result
1043 1044 1045 # This lets us do struct.member. If the struct does not have the member, we 1046 # return a NoneObject. This allows us to gracefully dereference structs with 1047 # missing fields depending on the profile. 1048 structured.IStructured.implement( 1049 for_type=obj.Struct, 1050 implementations={ 1051 structured.resolve: lambda x, y: getattr(x, y, obj.NoneObject("")), 1052 structured.reflect_runtime_member: 1053 lambda s, m: type(getattr(s, m, None)), 1054 structured.getmembers_runtime: Struct_getmembers_runtime, 1055 } 1056 ) 1057 1058 # This lets us recurse into a NoneObject without raising errors. 1059 structured.IStructured.implement( 1060 for_type=obj.NoneObject, 1061 implementations={ 1062 structured.resolve: lambda x, y: x, 1063 } 1064 ) 1065 1066 # This lets us do flags.member. 1067 structured.IStructured.implement( 1068 for_type=basic.Flags, 1069 implementations={ 1070 structured.resolve: getattr, 1071 structured.reflect_runtime_member: 1072 lambda s, m: type(getattr(s, m, None)), 1073 structured.getmembers_runtime: lambda x: list(x.maskmap), 1074 } 1075 ) 1076 1077 # This lets us get indices out of Arrays. 1078 associative.IAssociative.implement( 1079 for_type=obj.Array, 1080 implementations={ 1081 associative.select: lambda obj, key: obj[key], 1082 } 1083 ) 1084 1085 1086 # This lets us do some_array.some_member. Useful for accessing properties. 1087 structured.IStructured.implement( 1088 for_type=obj.Array, 1089 implementations={ 1090 structured.resolve: getattr 1091 } 1092 )
1093 1094 1095 -def select_Pointer(ptr, key):
1096 """Delegate to target of the pointer, if any.""" 1097 target_obj = ptr.deref() 1098 if not target_obj: 1099 ptr.session.logging.warn( 1100 "Attempting to access key %r of a void pointer %r.", key, ptr) 1101 if target_obj: 1102 return associative.select(target_obj, key)
1103 1104 1105 # Pointer[key] is implemented as Pointer.dereference()[key]. 1106 associative.IAssociative.implement( 1107 for_type=obj.Pointer, 1108 implementations={ 1109 associative.select: select_Pointer 1110 } 1111 )
1112 1113 1114 -def resolve_Pointer(ptr, member):
1115 """Delegate to target of the pointer, if any.""" 1116 target_obj = ptr.deref() 1117 if not target_obj: 1118 ptr.session.logging.warn( 1119 "Attempting to access member %r of a void pointer %r.", member, ptr) 1120 if target_obj: 1121 return structured.resolve(target_obj, member)
1122 1123 1124 # Pointer.member is implemented as Pointer.dereference().member. 1125 structured.IStructured.implement( 1126 for_type=obj.Pointer, 1127 implementations={ 1128 structured.resolve: resolve_Pointer 1129 } 1130 ) 1131 1132 # AttributeDict is like a dict, except it does not raise when accessed 1133 # via an attribute - it just returns None. Plugins can return an 1134 # AttributeDict when they may return arbitrary columns and then 1135 # Efilter can simply reference these columns via the "." operator. If 1136 # the field does not exist, the column will simply have None there. 1137 structured.IStructured.implement( 1138 for_type=utils.AttributeDict, 1139 implementations={ 1140 structured.resolve: lambda d, m: d.get(m), 1141 structured.getmembers_runtime: lambda d: d.keys(), 1142 } 1143 ) 1144 1145 # SlottedObject is similar in functionality to AttributeDict but it is much 1146 # faster and so it is preferred. 1147 structured.IStructured.implement( 1148 for_type=utils.SlottedObject, 1149 implementations={ 1150 structured.resolve: lambda s, m: getattr(s, m, None), 1151 structured.getmembers_runtime: lambda d: d.__slots__, 1152 } 1153 ) 1154