Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/IPython/core/guarded_eval.py: 34%
301 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:05 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:05 +0000
1from typing import (
2 Any,
3 Callable,
4 Dict,
5 Set,
6 Sequence,
7 Tuple,
8 NamedTuple,
9 Type,
10 Literal,
11 Union,
12 TYPE_CHECKING,
13)
14import ast
15import builtins
16import collections
17import operator
18import sys
19from functools import cached_property
20from dataclasses import dataclass, field
21from types import MethodDescriptorType, ModuleType
23from IPython.utils.docs import GENERATING_DOCUMENTATION
24from IPython.utils.decorators import undoc
27if TYPE_CHECKING or GENERATING_DOCUMENTATION:
28 from typing_extensions import Protocol
29else:
30 # do not require on runtime
31 Protocol = object # requires Python >=3.8
34@undoc
35class HasGetItem(Protocol):
36 def __getitem__(self, key) -> None:
37 ...
40@undoc
41class InstancesHaveGetItem(Protocol):
42 def __call__(self, *args, **kwargs) -> HasGetItem:
43 ...
46@undoc
47class HasGetAttr(Protocol):
48 def __getattr__(self, key) -> None:
49 ...
52@undoc
53class DoesNotHaveGetAttr(Protocol):
54 pass
57# By default `__getattr__` is not explicitly implemented on most objects
58MayHaveGetattr = Union[HasGetAttr, DoesNotHaveGetAttr]
61def _unbind_method(func: Callable) -> Union[Callable, None]:
62 """Get unbound method for given bound method.
64 Returns None if cannot get unbound method, or method is already unbound.
65 """
66 owner = getattr(func, "__self__", None)
67 owner_class = type(owner)
68 name = getattr(func, "__name__", None)
69 instance_dict_overrides = getattr(owner, "__dict__", None)
70 if (
71 owner is not None
72 and name
73 and (
74 not instance_dict_overrides
75 or (instance_dict_overrides and name not in instance_dict_overrides)
76 )
77 ):
78 return getattr(owner_class, name)
79 return None
82@undoc
83@dataclass
84class EvaluationPolicy:
85 """Definition of evaluation policy."""
87 allow_locals_access: bool = False
88 allow_globals_access: bool = False
89 allow_item_access: bool = False
90 allow_attr_access: bool = False
91 allow_builtins_access: bool = False
92 allow_all_operations: bool = False
93 allow_any_calls: bool = False
94 allowed_calls: Set[Callable] = field(default_factory=set)
96 def can_get_item(self, value, item):
97 return self.allow_item_access
99 def can_get_attr(self, value, attr):
100 return self.allow_attr_access
102 def can_operate(self, dunders: Tuple[str, ...], a, b=None):
103 if self.allow_all_operations:
104 return True
106 def can_call(self, func):
107 if self.allow_any_calls:
108 return True
110 if func in self.allowed_calls:
111 return True
113 owner_method = _unbind_method(func)
115 if owner_method and owner_method in self.allowed_calls:
116 return True
119def _get_external(module_name: str, access_path: Sequence[str]):
120 """Get value from external module given a dotted access path.
122 Raises:
123 * `KeyError` if module is removed not found, and
124 * `AttributeError` if acess path does not match an exported object
125 """
126 member_type = sys.modules[module_name]
127 for attr in access_path:
128 member_type = getattr(member_type, attr)
129 return member_type
132def _has_original_dunder_external(
133 value,
134 module_name: str,
135 access_path: Sequence[str],
136 method_name: str,
137):
138 if module_name not in sys.modules:
139 # LBYLB as it is faster
140 return False
141 try:
142 member_type = _get_external(module_name, access_path)
143 value_type = type(value)
144 if type(value) == member_type:
145 return True
146 if method_name == "__getattribute__":
147 # we have to short-circuit here due to an unresolved issue in
148 # `isinstance` implementation: https://bugs.python.org/issue32683
149 return False
150 if isinstance(value, member_type):
151 method = getattr(value_type, method_name, None)
152 member_method = getattr(member_type, method_name, None)
153 if member_method == method:
154 return True
155 except (AttributeError, KeyError):
156 return False
159def _has_original_dunder(
160 value, allowed_types, allowed_methods, allowed_external, method_name
161):
162 # note: Python ignores `__getattr__`/`__getitem__` on instances,
163 # we only need to check at class level
164 value_type = type(value)
166 # strict type check passes → no need to check method
167 if value_type in allowed_types:
168 return True
170 method = getattr(value_type, method_name, None)
172 if method is None:
173 return None
175 if method in allowed_methods:
176 return True
178 for module_name, *access_path in allowed_external:
179 if _has_original_dunder_external(value, module_name, access_path, method_name):
180 return True
182 return False
185@undoc
186@dataclass
187class SelectivePolicy(EvaluationPolicy):
188 allowed_getitem: Set[InstancesHaveGetItem] = field(default_factory=set)
189 allowed_getitem_external: Set[Tuple[str, ...]] = field(default_factory=set)
191 allowed_getattr: Set[MayHaveGetattr] = field(default_factory=set)
192 allowed_getattr_external: Set[Tuple[str, ...]] = field(default_factory=set)
194 allowed_operations: Set = field(default_factory=set)
195 allowed_operations_external: Set[Tuple[str, ...]] = field(default_factory=set)
197 _operation_methods_cache: Dict[str, Set[Callable]] = field(
198 default_factory=dict, init=False
199 )
201 def can_get_attr(self, value, attr):
202 has_original_attribute = _has_original_dunder(
203 value,
204 allowed_types=self.allowed_getattr,
205 allowed_methods=self._getattribute_methods,
206 allowed_external=self.allowed_getattr_external,
207 method_name="__getattribute__",
208 )
209 has_original_attr = _has_original_dunder(
210 value,
211 allowed_types=self.allowed_getattr,
212 allowed_methods=self._getattr_methods,
213 allowed_external=self.allowed_getattr_external,
214 method_name="__getattr__",
215 )
217 accept = False
219 # Many objects do not have `__getattr__`, this is fine.
220 if has_original_attr is None and has_original_attribute:
221 accept = True
222 else:
223 # Accept objects without modifications to `__getattr__` and `__getattribute__`
224 accept = has_original_attr and has_original_attribute
226 if accept:
227 # We still need to check for overriden properties.
229 value_class = type(value)
230 if not hasattr(value_class, attr):
231 return True
233 class_attr_val = getattr(value_class, attr)
234 is_property = isinstance(class_attr_val, property)
236 if not is_property:
237 return True
239 # Properties in allowed types are ok (although we do not include any
240 # properties in our default allow list currently).
241 if type(value) in self.allowed_getattr:
242 return True # pragma: no cover
244 # Properties in subclasses of allowed types may be ok if not changed
245 for module_name, *access_path in self.allowed_getattr_external:
246 try:
247 external_class = _get_external(module_name, access_path)
248 external_class_attr_val = getattr(external_class, attr)
249 except (KeyError, AttributeError):
250 return False # pragma: no cover
251 return class_attr_val == external_class_attr_val
253 return False
255 def can_get_item(self, value, item):
256 """Allow accessing `__getiitem__` of allow-listed instances unless it was not modified."""
257 return _has_original_dunder(
258 value,
259 allowed_types=self.allowed_getitem,
260 allowed_methods=self._getitem_methods,
261 allowed_external=self.allowed_getitem_external,
262 method_name="__getitem__",
263 )
265 def can_operate(self, dunders: Tuple[str, ...], a, b=None):
266 objects = [a]
267 if b is not None:
268 objects.append(b)
269 return all(
270 [
271 _has_original_dunder(
272 obj,
273 allowed_types=self.allowed_operations,
274 allowed_methods=self._operator_dunder_methods(dunder),
275 allowed_external=self.allowed_operations_external,
276 method_name=dunder,
277 )
278 for dunder in dunders
279 for obj in objects
280 ]
281 )
283 def _operator_dunder_methods(self, dunder: str) -> Set[Callable]:
284 if dunder not in self._operation_methods_cache:
285 self._operation_methods_cache[dunder] = self._safe_get_methods(
286 self.allowed_operations, dunder
287 )
288 return self._operation_methods_cache[dunder]
290 @cached_property
291 def _getitem_methods(self) -> Set[Callable]:
292 return self._safe_get_methods(self.allowed_getitem, "__getitem__")
294 @cached_property
295 def _getattr_methods(self) -> Set[Callable]:
296 return self._safe_get_methods(self.allowed_getattr, "__getattr__")
298 @cached_property
299 def _getattribute_methods(self) -> Set[Callable]:
300 return self._safe_get_methods(self.allowed_getattr, "__getattribute__")
302 def _safe_get_methods(self, classes, name) -> Set[Callable]:
303 return {
304 method
305 for class_ in classes
306 for method in [getattr(class_, name, None)]
307 if method
308 }
311class _DummyNamedTuple(NamedTuple):
312 """Used internally to retrieve methods of named tuple instance."""
315class EvaluationContext(NamedTuple):
316 #: Local namespace
317 locals: dict
318 #: Global namespace
319 globals: dict
320 #: Evaluation policy identifier
321 evaluation: Literal[
322 "forbidden", "minimal", "limited", "unsafe", "dangerous"
323 ] = "forbidden"
324 #: Whether the evalution of code takes place inside of a subscript.
325 #: Useful for evaluating ``:-1, 'col'`` in ``df[:-1, 'col']``.
326 in_subscript: bool = False
329class _IdentitySubscript:
330 """Returns the key itself when item is requested via subscript."""
332 def __getitem__(self, key):
333 return key
336IDENTITY_SUBSCRIPT = _IdentitySubscript()
337SUBSCRIPT_MARKER = "__SUBSCRIPT_SENTINEL__"
340class GuardRejection(Exception):
341 """Exception raised when guard rejects evaluation attempt."""
343 pass
346def guarded_eval(code: str, context: EvaluationContext):
347 """Evaluate provided code in the evaluation context.
349 If evaluation policy given by context is set to ``forbidden``
350 no evaluation will be performed; if it is set to ``dangerous``
351 standard :func:`eval` will be used; finally, for any other,
352 policy :func:`eval_node` will be called on parsed AST.
353 """
354 locals_ = context.locals
356 if context.evaluation == "forbidden":
357 raise GuardRejection("Forbidden mode")
359 # note: not using `ast.literal_eval` as it does not implement
360 # getitem at all, for example it fails on simple `[0][1]`
362 if context.in_subscript:
363 # syntatic sugar for ellipsis (:) is only available in susbcripts
364 # so we need to trick the ast parser into thinking that we have
365 # a subscript, but we need to be able to later recognise that we did
366 # it so we can ignore the actual __getitem__ operation
367 if not code:
368 return tuple()
369 locals_ = locals_.copy()
370 locals_[SUBSCRIPT_MARKER] = IDENTITY_SUBSCRIPT
371 code = SUBSCRIPT_MARKER + "[" + code + "]"
372 context = EvaluationContext(**{**context._asdict(), **{"locals": locals_}})
374 if context.evaluation == "dangerous":
375 return eval(code, context.globals, context.locals)
377 expression = ast.parse(code, mode="eval")
379 return eval_node(expression, context)
382BINARY_OP_DUNDERS: Dict[Type[ast.operator], Tuple[str]] = {
383 ast.Add: ("__add__",),
384 ast.Sub: ("__sub__",),
385 ast.Mult: ("__mul__",),
386 ast.Div: ("__truediv__",),
387 ast.FloorDiv: ("__floordiv__",),
388 ast.Mod: ("__mod__",),
389 ast.Pow: ("__pow__",),
390 ast.LShift: ("__lshift__",),
391 ast.RShift: ("__rshift__",),
392 ast.BitOr: ("__or__",),
393 ast.BitXor: ("__xor__",),
394 ast.BitAnd: ("__and__",),
395 ast.MatMult: ("__matmul__",),
396}
398COMP_OP_DUNDERS: Dict[Type[ast.cmpop], Tuple[str, ...]] = {
399 ast.Eq: ("__eq__",),
400 ast.NotEq: ("__ne__", "__eq__"),
401 ast.Lt: ("__lt__", "__gt__"),
402 ast.LtE: ("__le__", "__ge__"),
403 ast.Gt: ("__gt__", "__lt__"),
404 ast.GtE: ("__ge__", "__le__"),
405 ast.In: ("__contains__",),
406 # Note: ast.Is, ast.IsNot, ast.NotIn are handled specially
407}
409UNARY_OP_DUNDERS: Dict[Type[ast.unaryop], Tuple[str, ...]] = {
410 ast.USub: ("__neg__",),
411 ast.UAdd: ("__pos__",),
412 # we have to check both __inv__ and __invert__!
413 ast.Invert: ("__invert__", "__inv__"),
414 ast.Not: ("__not__",),
415}
418def _find_dunder(node_op, dunders) -> Union[Tuple[str, ...], None]:
419 dunder = None
420 for op, candidate_dunder in dunders.items():
421 if isinstance(node_op, op):
422 dunder = candidate_dunder
423 return dunder
426def eval_node(node: Union[ast.AST, None], context: EvaluationContext):
427 """Evaluate AST node in provided context.
429 Applies evaluation restrictions defined in the context. Currently does not support evaluation of functions with keyword arguments.
431 Does not evaluate actions that always have side effects:
433 - class definitions (``class sth: ...``)
434 - function definitions (``def sth: ...``)
435 - variable assignments (``x = 1``)
436 - augmented assignments (``x += 1``)
437 - deletions (``del x``)
439 Does not evaluate operations which do not return values:
441 - assertions (``assert x``)
442 - pass (``pass``)
443 - imports (``import x``)
444 - control flow:
446 - conditionals (``if x:``) except for ternary IfExp (``a if x else b``)
447 - loops (``for`` and ``while``)
448 - exception handling
450 The purpose of this function is to guard against unwanted side-effects;
451 it does not give guarantees on protection from malicious code execution.
452 """
453 policy = EVALUATION_POLICIES[context.evaluation]
454 if node is None:
455 return None
456 if isinstance(node, ast.Expression):
457 return eval_node(node.body, context)
458 if isinstance(node, ast.BinOp):
459 left = eval_node(node.left, context)
460 right = eval_node(node.right, context)
461 dunders = _find_dunder(node.op, BINARY_OP_DUNDERS)
462 if dunders:
463 if policy.can_operate(dunders, left, right):
464 return getattr(left, dunders[0])(right)
465 else:
466 raise GuardRejection(
467 f"Operation (`{dunders}`) for",
468 type(left),
469 f"not allowed in {context.evaluation} mode",
470 )
471 if isinstance(node, ast.Compare):
472 left = eval_node(node.left, context)
473 all_true = True
474 negate = False
475 for op, right in zip(node.ops, node.comparators):
476 right = eval_node(right, context)
477 dunder = None
478 dunders = _find_dunder(op, COMP_OP_DUNDERS)
479 if not dunders:
480 if isinstance(op, ast.NotIn):
481 dunders = COMP_OP_DUNDERS[ast.In]
482 negate = True
483 if isinstance(op, ast.Is):
484 dunder = "is_"
485 if isinstance(op, ast.IsNot):
486 dunder = "is_"
487 negate = True
488 if not dunder and dunders:
489 dunder = dunders[0]
490 if dunder:
491 a, b = (right, left) if dunder == "__contains__" else (left, right)
492 if dunder == "is_" or dunders and policy.can_operate(dunders, a, b):
493 result = getattr(operator, dunder)(a, b)
494 if negate:
495 result = not result
496 if not result:
497 all_true = False
498 left = right
499 else:
500 raise GuardRejection(
501 f"Comparison (`{dunder}`) for",
502 type(left),
503 f"not allowed in {context.evaluation} mode",
504 )
505 else:
506 raise ValueError(
507 f"Comparison `{dunder}` not supported"
508 ) # pragma: no cover
509 return all_true
510 if isinstance(node, ast.Constant):
511 return node.value
512 if isinstance(node, ast.Tuple):
513 return tuple(eval_node(e, context) for e in node.elts)
514 if isinstance(node, ast.List):
515 return [eval_node(e, context) for e in node.elts]
516 if isinstance(node, ast.Set):
517 return {eval_node(e, context) for e in node.elts}
518 if isinstance(node, ast.Dict):
519 return dict(
520 zip(
521 [eval_node(k, context) for k in node.keys],
522 [eval_node(v, context) for v in node.values],
523 )
524 )
525 if isinstance(node, ast.Slice):
526 return slice(
527 eval_node(node.lower, context),
528 eval_node(node.upper, context),
529 eval_node(node.step, context),
530 )
531 if isinstance(node, ast.UnaryOp):
532 value = eval_node(node.operand, context)
533 dunders = _find_dunder(node.op, UNARY_OP_DUNDERS)
534 if dunders:
535 if policy.can_operate(dunders, value):
536 return getattr(value, dunders[0])()
537 else:
538 raise GuardRejection(
539 f"Operation (`{dunders}`) for",
540 type(value),
541 f"not allowed in {context.evaluation} mode",
542 )
543 if isinstance(node, ast.Subscript):
544 value = eval_node(node.value, context)
545 slice_ = eval_node(node.slice, context)
546 if policy.can_get_item(value, slice_):
547 return value[slice_]
548 raise GuardRejection(
549 "Subscript access (`__getitem__`) for",
550 type(value), # not joined to avoid calling `repr`
551 f" not allowed in {context.evaluation} mode",
552 )
553 if isinstance(node, ast.Name):
554 if policy.allow_locals_access and node.id in context.locals:
555 return context.locals[node.id]
556 if policy.allow_globals_access and node.id in context.globals:
557 return context.globals[node.id]
558 if policy.allow_builtins_access and hasattr(builtins, node.id):
559 # note: do not use __builtins__, it is implementation detail of cPython
560 return getattr(builtins, node.id)
561 if not policy.allow_globals_access and not policy.allow_locals_access:
562 raise GuardRejection(
563 f"Namespace access not allowed in {context.evaluation} mode"
564 )
565 else:
566 raise NameError(f"{node.id} not found in locals, globals, nor builtins")
567 if isinstance(node, ast.Attribute):
568 value = eval_node(node.value, context)
569 if policy.can_get_attr(value, node.attr):
570 return getattr(value, node.attr)
571 raise GuardRejection(
572 "Attribute access (`__getattr__`) for",
573 type(value), # not joined to avoid calling `repr`
574 f"not allowed in {context.evaluation} mode",
575 )
576 if isinstance(node, ast.IfExp):
577 test = eval_node(node.test, context)
578 if test:
579 return eval_node(node.body, context)
580 else:
581 return eval_node(node.orelse, context)
582 if isinstance(node, ast.Call):
583 func = eval_node(node.func, context)
584 if policy.can_call(func) and not node.keywords:
585 args = [eval_node(arg, context) for arg in node.args]
586 return func(*args)
587 raise GuardRejection(
588 "Call for",
589 func, # not joined to avoid calling `repr`
590 f"not allowed in {context.evaluation} mode",
591 )
592 raise ValueError("Unhandled node", ast.dump(node))
595SUPPORTED_EXTERNAL_GETITEM = {
596 ("pandas", "core", "indexing", "_iLocIndexer"),
597 ("pandas", "core", "indexing", "_LocIndexer"),
598 ("pandas", "DataFrame"),
599 ("pandas", "Series"),
600 ("numpy", "ndarray"),
601 ("numpy", "void"),
602}
605BUILTIN_GETITEM: Set[InstancesHaveGetItem] = {
606 dict,
607 str, # type: ignore[arg-type]
608 bytes, # type: ignore[arg-type]
609 list,
610 tuple,
611 collections.defaultdict,
612 collections.deque,
613 collections.OrderedDict,
614 collections.ChainMap,
615 collections.UserDict,
616 collections.UserList,
617 collections.UserString, # type: ignore[arg-type]
618 _DummyNamedTuple,
619 _IdentitySubscript,
620}
623def _list_methods(cls, source=None):
624 """For use on immutable objects or with methods returning a copy"""
625 return [getattr(cls, k) for k in (source if source else dir(cls))]
628dict_non_mutating_methods = ("copy", "keys", "values", "items")
629list_non_mutating_methods = ("copy", "index", "count")
630set_non_mutating_methods = set(dir(set)) & set(dir(frozenset))
633dict_keys: Type[collections.abc.KeysView] = type({}.keys())
635NUMERICS = {int, float, complex}
637ALLOWED_CALLS = {
638 bytes,
639 *_list_methods(bytes),
640 dict,
641 *_list_methods(dict, dict_non_mutating_methods),
642 dict_keys.isdisjoint,
643 list,
644 *_list_methods(list, list_non_mutating_methods),
645 set,
646 *_list_methods(set, set_non_mutating_methods),
647 frozenset,
648 *_list_methods(frozenset),
649 range,
650 str,
651 *_list_methods(str),
652 tuple,
653 *_list_methods(tuple),
654 *NUMERICS,
655 *[method for numeric_cls in NUMERICS for method in _list_methods(numeric_cls)],
656 collections.deque,
657 *_list_methods(collections.deque, list_non_mutating_methods),
658 collections.defaultdict,
659 *_list_methods(collections.defaultdict, dict_non_mutating_methods),
660 collections.OrderedDict,
661 *_list_methods(collections.OrderedDict, dict_non_mutating_methods),
662 collections.UserDict,
663 *_list_methods(collections.UserDict, dict_non_mutating_methods),
664 collections.UserList,
665 *_list_methods(collections.UserList, list_non_mutating_methods),
666 collections.UserString,
667 *_list_methods(collections.UserString, dir(str)),
668 collections.Counter,
669 *_list_methods(collections.Counter, dict_non_mutating_methods),
670 collections.Counter.elements,
671 collections.Counter.most_common,
672}
674BUILTIN_GETATTR: Set[MayHaveGetattr] = {
675 *BUILTIN_GETITEM,
676 set,
677 frozenset,
678 object,
679 type, # `type` handles a lot of generic cases, e.g. numbers as in `int.real`.
680 *NUMERICS,
681 dict_keys,
682 MethodDescriptorType,
683 ModuleType,
684}
687BUILTIN_OPERATIONS = {*BUILTIN_GETATTR}
689EVALUATION_POLICIES = {
690 "minimal": EvaluationPolicy(
691 allow_builtins_access=True,
692 allow_locals_access=False,
693 allow_globals_access=False,
694 allow_item_access=False,
695 allow_attr_access=False,
696 allowed_calls=set(),
697 allow_any_calls=False,
698 allow_all_operations=False,
699 ),
700 "limited": SelectivePolicy(
701 allowed_getitem=BUILTIN_GETITEM,
702 allowed_getitem_external=SUPPORTED_EXTERNAL_GETITEM,
703 allowed_getattr=BUILTIN_GETATTR,
704 allowed_getattr_external={
705 # pandas Series/Frame implements custom `__getattr__`
706 ("pandas", "DataFrame"),
707 ("pandas", "Series"),
708 },
709 allowed_operations=BUILTIN_OPERATIONS,
710 allow_builtins_access=True,
711 allow_locals_access=True,
712 allow_globals_access=True,
713 allowed_calls=ALLOWED_CALLS,
714 ),
715 "unsafe": EvaluationPolicy(
716 allow_builtins_access=True,
717 allow_locals_access=True,
718 allow_globals_access=True,
719 allow_attr_access=True,
720 allow_item_access=True,
721 allow_any_calls=True,
722 allow_all_operations=True,
723 ),
724}
727__all__ = [
728 "guarded_eval",
729 "eval_node",
730 "GuardRejection",
731 "EvaluationContext",
732 "_unbind_method",
733]