1"""
2Top level ``eval`` module.
3"""
4from __future__ import annotations
5
6import tokenize
7from typing import TYPE_CHECKING
8import warnings
9
10from pandas.util._exceptions import find_stack_level
11from pandas.util._validators import validate_bool_kwarg
12
13from pandas.core.dtypes.common import is_extension_array_dtype
14
15from pandas.core.computation.engines import ENGINES
16from pandas.core.computation.expr import (
17 PARSERS,
18 Expr,
19)
20from pandas.core.computation.parsing import tokenize_string
21from pandas.core.computation.scope import ensure_scope
22from pandas.core.generic import NDFrame
23
24from pandas.io.formats.printing import pprint_thing
25
26if TYPE_CHECKING:
27 from pandas.core.computation.ops import BinOp
28
29
30def _check_engine(engine: str | None) -> str:
31 """
32 Make sure a valid engine is passed.
33
34 Parameters
35 ----------
36 engine : str
37 String to validate.
38
39 Raises
40 ------
41 KeyError
42 * If an invalid engine is passed.
43 ImportError
44 * If numexpr was requested but doesn't exist.
45
46 Returns
47 -------
48 str
49 Engine name.
50 """
51 from pandas.core.computation.check import NUMEXPR_INSTALLED
52 from pandas.core.computation.expressions import USE_NUMEXPR
53
54 if engine is None:
55 engine = "numexpr" if USE_NUMEXPR else "python"
56
57 if engine not in ENGINES:
58 valid_engines = list(ENGINES.keys())
59 raise KeyError(
60 f"Invalid engine '{engine}' passed, valid engines are {valid_engines}"
61 )
62
63 # TODO: validate this in a more general way (thinking of future engines
64 # that won't necessarily be import-able)
65 # Could potentially be done on engine instantiation
66 if engine == "numexpr" and not NUMEXPR_INSTALLED:
67 raise ImportError(
68 "'numexpr' is not installed or an unsupported version. Cannot use "
69 "engine='numexpr' for query/eval if 'numexpr' is not installed"
70 )
71
72 return engine
73
74
75def _check_parser(parser: str):
76 """
77 Make sure a valid parser is passed.
78
79 Parameters
80 ----------
81 parser : str
82
83 Raises
84 ------
85 KeyError
86 * If an invalid parser is passed
87 """
88 if parser not in PARSERS:
89 raise KeyError(
90 f"Invalid parser '{parser}' passed, valid parsers are {PARSERS.keys()}"
91 )
92
93
94def _check_resolvers(resolvers):
95 if resolvers is not None:
96 for resolver in resolvers:
97 if not hasattr(resolver, "__getitem__"):
98 name = type(resolver).__name__
99 raise TypeError(
100 f"Resolver of type '{name}' does not "
101 "implement the __getitem__ method"
102 )
103
104
105def _check_expression(expr):
106 """
107 Make sure an expression is not an empty string
108
109 Parameters
110 ----------
111 expr : object
112 An object that can be converted to a string
113
114 Raises
115 ------
116 ValueError
117 * If expr is an empty string
118 """
119 if not expr:
120 raise ValueError("expr cannot be an empty string")
121
122
123def _convert_expression(expr) -> str:
124 """
125 Convert an object to an expression.
126
127 This function converts an object to an expression (a unicode string) and
128 checks to make sure it isn't empty after conversion. This is used to
129 convert operators to their string representation for recursive calls to
130 :func:`~pandas.eval`.
131
132 Parameters
133 ----------
134 expr : object
135 The object to be converted to a string.
136
137 Returns
138 -------
139 str
140 The string representation of an object.
141
142 Raises
143 ------
144 ValueError
145 * If the expression is empty.
146 """
147 s = pprint_thing(expr)
148 _check_expression(s)
149 return s
150
151
152def _check_for_locals(expr: str, stack_level: int, parser: str):
153 at_top_of_stack = stack_level == 0
154 not_pandas_parser = parser != "pandas"
155
156 if not_pandas_parser:
157 msg = "The '@' prefix is only supported by the pandas parser"
158 elif at_top_of_stack:
159 msg = (
160 "The '@' prefix is not allowed in top-level eval calls.\n"
161 "please refer to your variables by name without the '@' prefix."
162 )
163
164 if at_top_of_stack or not_pandas_parser:
165 for toknum, tokval in tokenize_string(expr):
166 if toknum == tokenize.OP and tokval == "@":
167 raise SyntaxError(msg)
168
169
170def eval(
171 expr: str | BinOp, # we leave BinOp out of the docstr bc it isn't for users
172 parser: str = "pandas",
173 engine: str | None = None,
174 local_dict=None,
175 global_dict=None,
176 resolvers=(),
177 level: int = 0,
178 target=None,
179 inplace: bool = False,
180):
181 """
182 Evaluate a Python expression as a string using various backends.
183
184 The following arithmetic operations are supported: ``+``, ``-``, ``*``,
185 ``/``, ``**``, ``%``, ``//`` (python engine only) along with the following
186 boolean operations: ``|`` (or), ``&`` (and), and ``~`` (not).
187 Additionally, the ``'pandas'`` parser allows the use of :keyword:`and`,
188 :keyword:`or`, and :keyword:`not` with the same semantics as the
189 corresponding bitwise operators. :class:`~pandas.Series` and
190 :class:`~pandas.DataFrame` objects are supported and behave as they would
191 with plain ol' Python evaluation.
192
193 Parameters
194 ----------
195 expr : str
196 The expression to evaluate. This string cannot contain any Python
197 `statements
198 <https://docs.python.org/3/reference/simple_stmts.html#simple-statements>`__,
199 only Python `expressions
200 <https://docs.python.org/3/reference/simple_stmts.html#expression-statements>`__.
201 parser : {'pandas', 'python'}, default 'pandas'
202 The parser to use to construct the syntax tree from the expression. The
203 default of ``'pandas'`` parses code slightly different than standard
204 Python. Alternatively, you can parse an expression using the
205 ``'python'`` parser to retain strict Python semantics. See the
206 :ref:`enhancing performance <enhancingperf.eval>` documentation for
207 more details.
208 engine : {'python', 'numexpr'}, default 'numexpr'
209
210 The engine used to evaluate the expression. Supported engines are
211
212 - None : tries to use ``numexpr``, falls back to ``python``
213 - ``'numexpr'`` : This default engine evaluates pandas objects using
214 numexpr for large speed ups in complex expressions with large frames.
215 - ``'python'`` : Performs operations as if you had ``eval``'d in top
216 level python. This engine is generally not that useful.
217
218 More backends may be available in the future.
219 local_dict : dict or None, optional
220 A dictionary of local variables, taken from locals() by default.
221 global_dict : dict or None, optional
222 A dictionary of global variables, taken from globals() by default.
223 resolvers : list of dict-like or None, optional
224 A list of objects implementing the ``__getitem__`` special method that
225 you can use to inject an additional collection of namespaces to use for
226 variable lookup. For example, this is used in the
227 :meth:`~DataFrame.query` method to inject the
228 ``DataFrame.index`` and ``DataFrame.columns``
229 variables that refer to their respective :class:`~pandas.DataFrame`
230 instance attributes.
231 level : int, optional
232 The number of prior stack frames to traverse and add to the current
233 scope. Most users will **not** need to change this parameter.
234 target : object, optional, default None
235 This is the target object for assignment. It is used when there is
236 variable assignment in the expression. If so, then `target` must
237 support item assignment with string keys, and if a copy is being
238 returned, it must also support `.copy()`.
239 inplace : bool, default False
240 If `target` is provided, and the expression mutates `target`, whether
241 to modify `target` inplace. Otherwise, return a copy of `target` with
242 the mutation.
243
244 Returns
245 -------
246 ndarray, numeric scalar, DataFrame, Series, or None
247 The completion value of evaluating the given code or None if ``inplace=True``.
248
249 Raises
250 ------
251 ValueError
252 There are many instances where such an error can be raised:
253
254 - `target=None`, but the expression is multiline.
255 - The expression is multiline, but not all them have item assignment.
256 An example of such an arrangement is this:
257
258 a = b + 1
259 a + 2
260
261 Here, there are expressions on different lines, making it multiline,
262 but the last line has no variable assigned to the output of `a + 2`.
263 - `inplace=True`, but the expression is missing item assignment.
264 - Item assignment is provided, but the `target` does not support
265 string item assignment.
266 - Item assignment is provided and `inplace=False`, but the `target`
267 does not support the `.copy()` method
268
269 See Also
270 --------
271 DataFrame.query : Evaluates a boolean expression to query the columns
272 of a frame.
273 DataFrame.eval : Evaluate a string describing operations on
274 DataFrame columns.
275
276 Notes
277 -----
278 The ``dtype`` of any objects involved in an arithmetic ``%`` operation are
279 recursively cast to ``float64``.
280
281 See the :ref:`enhancing performance <enhancingperf.eval>` documentation for
282 more details.
283
284 Examples
285 --------
286 >>> df = pd.DataFrame({"animal": ["dog", "pig"], "age": [10, 20]})
287 >>> df
288 animal age
289 0 dog 10
290 1 pig 20
291
292 We can add a new column using ``pd.eval``:
293
294 >>> pd.eval("double_age = df.age * 2", target=df)
295 animal age double_age
296 0 dog 10 20
297 1 pig 20 40
298 """
299 inplace = validate_bool_kwarg(inplace, "inplace")
300
301 exprs: list[str | BinOp]
302 if isinstance(expr, str):
303 _check_expression(expr)
304 exprs = [e.strip() for e in expr.splitlines() if e.strip() != ""]
305 else:
306 # ops.BinOp; for internal compat, not intended to be passed by users
307 exprs = [expr]
308 multi_line = len(exprs) > 1
309
310 if multi_line and target is None:
311 raise ValueError(
312 "multi-line expressions are only valid in the "
313 "context of data, use DataFrame.eval"
314 )
315 engine = _check_engine(engine)
316 _check_parser(parser)
317 _check_resolvers(resolvers)
318
319 ret = None
320 first_expr = True
321 target_modified = False
322
323 for expr in exprs:
324 expr = _convert_expression(expr)
325 _check_for_locals(expr, level, parser)
326
327 # get our (possibly passed-in) scope
328 env = ensure_scope(
329 level + 1,
330 global_dict=global_dict,
331 local_dict=local_dict,
332 resolvers=resolvers,
333 target=target,
334 )
335
336 parsed_expr = Expr(expr, engine=engine, parser=parser, env=env)
337
338 if engine == "numexpr" and (
339 is_extension_array_dtype(parsed_expr.terms.return_type)
340 or getattr(parsed_expr.terms, "operand_types", None) is not None
341 and any(
342 is_extension_array_dtype(elem)
343 for elem in parsed_expr.terms.operand_types
344 )
345 ):
346 warnings.warn(
347 "Engine has switched to 'python' because numexpr does not support "
348 "extension array dtypes. Please set your engine to python manually.",
349 RuntimeWarning,
350 stacklevel=find_stack_level(),
351 )
352 engine = "python"
353
354 # construct the engine and evaluate the parsed expression
355 eng = ENGINES[engine]
356 eng_inst = eng(parsed_expr)
357 ret = eng_inst.evaluate()
358
359 if parsed_expr.assigner is None:
360 if multi_line:
361 raise ValueError(
362 "Multi-line expressions are only valid "
363 "if all expressions contain an assignment"
364 )
365 if inplace:
366 raise ValueError("Cannot operate inplace if there is no assignment")
367
368 # assign if needed
369 assigner = parsed_expr.assigner
370 if env.target is not None and assigner is not None:
371 target_modified = True
372
373 # if returning a copy, copy only on the first assignment
374 if not inplace and first_expr:
375 try:
376 target = env.target.copy()
377 except AttributeError as err:
378 raise ValueError("Cannot return a copy of the target") from err
379 else:
380 target = env.target
381
382 # TypeError is most commonly raised (e.g. int, list), but you
383 # get IndexError if you try to do this assignment on np.ndarray.
384 # we will ignore numpy warnings here; e.g. if trying
385 # to use a non-numeric indexer
386 try:
387 with warnings.catch_warnings(record=True):
388 # TODO: Filter the warnings we actually care about here.
389 if inplace and isinstance(target, NDFrame):
390 target.loc[:, assigner] = ret
391 else:
392 target[assigner] = ret
393 except (TypeError, IndexError) as err:
394 raise ValueError("Cannot assign expression output to target") from err
395
396 if not resolvers:
397 resolvers = ({assigner: ret},)
398 else:
399 # existing resolver needs updated to handle
400 # case of mutating existing column in copy
401 for resolver in resolvers:
402 if assigner in resolver:
403 resolver[assigner] = ret
404 break
405 else:
406 resolvers += ({assigner: ret},)
407
408 ret = None
409 first_expr = False
410
411 # We want to exclude `inplace=None` as being False.
412 if inplace is False:
413 return target if target_modified else ret