Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/computation/eval.py: 19%

1"""

2Top level ``eval`` module.

3"""

4from __future__ import annotations

6import tokenize

7from typing import TYPE_CHECKING

8import warnings

10from pandas.util._exceptions import find_stack_level

11from pandas.util._validators import validate_bool_kwarg

13from pandas.core.dtypes.common import is_extension_array_dtype

15from pandas.core.computation.engines import ENGINES

16from pandas.core.computation.expr import (

17 PARSERS,

18 Expr,

19)

20from pandas.core.computation.parsing import tokenize_string

21from pandas.core.computation.scope import ensure_scope

22from pandas.core.generic import NDFrame

24from pandas.io.formats.printing import pprint_thing

26if TYPE_CHECKING:

27 from pandas.core.computation.ops import BinOp

30def _check_engine(engine: str | None) -> str:

31 """

32 Make sure a valid engine is passed.

34 Parameters

35 ----------

36 engine : str

37 String to validate.

39 Raises

40 ------

41 KeyError

42 * If an invalid engine is passed.

43 ImportError

44 * If numexpr was requested but doesn't exist.

46 Returns

47 -------

48 str

49 Engine name.

50 """

51 from pandas.core.computation.check import NUMEXPR_INSTALLED

52 from pandas.core.computation.expressions import USE_NUMEXPR

54 if engine is None:

55 engine = "numexpr" if USE_NUMEXPR else "python"

57 if engine not in ENGINES:

58 valid_engines = list(ENGINES.keys())

59 raise KeyError(

60 f"Invalid engine '{engine}' passed, valid engines are {valid_engines}"

61 )

63 # TODO: validate this in a more general way (thinking of future engines

64 # that won't necessarily be import-able)

65 # Could potentially be done on engine instantiation

66 if engine == "numexpr" and not NUMEXPR_INSTALLED:

67 raise ImportError(

68 "'numexpr' is not installed or an unsupported version. Cannot use "

69 "engine='numexpr' for query/eval if 'numexpr' is not installed"

70 )

72 return engine

75def _check_parser(parser: str):

76 """

77 Make sure a valid parser is passed.

79 Parameters

80 ----------

81 parser : str

83 Raises

84 ------

85 KeyError

86 * If an invalid parser is passed

87 """

88 if parser not in PARSERS:

89 raise KeyError(

90 f"Invalid parser '{parser}' passed, valid parsers are {PARSERS.keys()}"

91 )

94def _check_resolvers(resolvers):

95 if resolvers is not None:

96 for resolver in resolvers:

97 if not hasattr(resolver, "__getitem__"):

98 name = type(resolver).__name__

99 raise TypeError(

100 f"Resolver of type '{name}' does not "

101 "implement the __getitem__ method"

102 )

103

104

105def _check_expression(expr):

106 """

107 Make sure an expression is not an empty string

108

109 Parameters

110 ----------

111 expr : object

112 An object that can be converted to a string

113

114 Raises

115 ------

116 ValueError

117 * If expr is an empty string

118 """

119 if not expr:

120 raise ValueError("expr cannot be an empty string")

121

122

123def _convert_expression(expr) -> str:

124 """

125 Convert an object to an expression.

126

127 This function converts an object to an expression (a unicode string) and

128 checks to make sure it isn't empty after conversion. This is used to

129 convert operators to their string representation for recursive calls to

130 :func:`~pandas.eval`.

131

132 Parameters

133 ----------

134 expr : object

135 The object to be converted to a string.

136

137 Returns

138 -------

139 str

140 The string representation of an object.

141

142 Raises

143 ------

144 ValueError

145 * If the expression is empty.

146 """

147 s = pprint_thing(expr)

148 _check_expression(s)

149 return s

150

151

152def _check_for_locals(expr: str, stack_level: int, parser: str):

153 at_top_of_stack = stack_level == 0

154 not_pandas_parser = parser != "pandas"

155

156 if not_pandas_parser:

157 msg = "The '@' prefix is only supported by the pandas parser"

158 elif at_top_of_stack:

159 msg = (

160 "The '@' prefix is not allowed in top-level eval calls.\n"

161 "please refer to your variables by name without the '@' prefix."

162 )

163

164 if at_top_of_stack or not_pandas_parser:

165 for toknum, tokval in tokenize_string(expr):

166 if toknum == tokenize.OP and tokval == "@":

167 raise SyntaxError(msg)

168

169

170def eval(

171 expr: str | BinOp, # we leave BinOp out of the docstr bc it isn't for users

172 parser: str = "pandas",

173 engine: str | None = None,

174 local_dict=None,

175 global_dict=None,

176 resolvers=(),

177 level: int = 0,

178 target=None,

179 inplace: bool = False,

180):

181 """

182 Evaluate a Python expression as a string using various backends.

183

184 The following arithmetic operations are supported: ``+``, ``-``, ``*``,

185 ``/``, ``**``, ``%``, ``//`` (python engine only) along with the following

186 boolean operations: ``|`` (or), ``&`` (and), and ``~`` (not).

187 Additionally, the ``'pandas'`` parser allows the use of :keyword:`and`,

188 :keyword:`or`, and :keyword:`not` with the same semantics as the

189 corresponding bitwise operators. :class:`~pandas.Series` and

190 :class:`~pandas.DataFrame` objects are supported and behave as they would

191 with plain ol' Python evaluation.

192

193 Parameters

194 ----------

195 expr : str

196 The expression to evaluate. This string cannot contain any Python

197 `statements

198 <https://docs.python.org/3/reference/simple_stmts.html#simple-statements>`__,

199 only Python `expressions

200 <https://docs.python.org/3/reference/simple_stmts.html#expression-statements>`__.

201 parser : {'pandas', 'python'}, default 'pandas'

202 The parser to use to construct the syntax tree from the expression. The

203 default of ``'pandas'`` parses code slightly different than standard

204 Python. Alternatively, you can parse an expression using the

205 ``'python'`` parser to retain strict Python semantics. See the

206 :ref:`enhancing performance <enhancingperf.eval>` documentation for

207 more details.

208 engine : {'python', 'numexpr'}, default 'numexpr'

209

210 The engine used to evaluate the expression. Supported engines are

211

212 - None : tries to use ``numexpr``, falls back to ``python``

213 - ``'numexpr'`` : This default engine evaluates pandas objects using

214 numexpr for large speed ups in complex expressions with large frames.

215 - ``'python'`` : Performs operations as if you had ``eval``'d in top

216 level python. This engine is generally not that useful.

217

218 More backends may be available in the future.

219 local_dict : dict or None, optional

220 A dictionary of local variables, taken from locals() by default.

221 global_dict : dict or None, optional

222 A dictionary of global variables, taken from globals() by default.

223 resolvers : list of dict-like or None, optional

224 A list of objects implementing the ``__getitem__`` special method that

225 you can use to inject an additional collection of namespaces to use for

226 variable lookup. For example, this is used in the

227 :meth:`~DataFrame.query` method to inject the

228 ``DataFrame.index`` and ``DataFrame.columns``

229 variables that refer to their respective :class:`~pandas.DataFrame`

230 instance attributes.

231 level : int, optional

232 The number of prior stack frames to traverse and add to the current

233 scope. Most users will **not** need to change this parameter.

234 target : object, optional, default None

235 This is the target object for assignment. It is used when there is

236 variable assignment in the expression. If so, then `target` must

237 support item assignment with string keys, and if a copy is being

238 returned, it must also support `.copy()`.

239 inplace : bool, default False

240 If `target` is provided, and the expression mutates `target`, whether

241 to modify `target` inplace. Otherwise, return a copy of `target` with

242 the mutation.

243

244 Returns

245 -------

246 ndarray, numeric scalar, DataFrame, Series, or None

247 The completion value of evaluating the given code or None if ``inplace=True``.

248

249 Raises

250 ------

251 ValueError

252 There are many instances where such an error can be raised:

253

254 - `target=None`, but the expression is multiline.

255 - The expression is multiline, but not all them have item assignment.

256 An example of such an arrangement is this:

257

258 a = b + 1

259 a + 2

260

261 Here, there are expressions on different lines, making it multiline,

262 but the last line has no variable assigned to the output of `a + 2`.

263 - `inplace=True`, but the expression is missing item assignment.

264 - Item assignment is provided, but the `target` does not support

265 string item assignment.

266 - Item assignment is provided and `inplace=False`, but the `target`

267 does not support the `.copy()` method

268

269 See Also

270 --------

271 DataFrame.query : Evaluates a boolean expression to query the columns

272 of a frame.

273 DataFrame.eval : Evaluate a string describing operations on

274 DataFrame columns.

275

276 Notes

277 -----

278 The ``dtype`` of any objects involved in an arithmetic ``%`` operation are

279 recursively cast to ``float64``.

280

281 See the :ref:`enhancing performance <enhancingperf.eval>` documentation for

282 more details.

283

284 Examples

285 --------

286 >>> df = pd.DataFrame({"animal": ["dog", "pig"], "age": [10, 20]})

287 >>> df

288 animal age

289 0 dog 10

290 1 pig 20

291

292 We can add a new column using ``pd.eval``:

293

294 >>> pd.eval("double_age = df.age * 2", target=df)

295 animal age double_age

296 0 dog 10 20

297 1 pig 20 40

298 """

299 inplace = validate_bool_kwarg(inplace, "inplace")

300

301 exprs: list[str | BinOp]

302 if isinstance(expr, str):

303 _check_expression(expr)

304 exprs = [e.strip() for e in expr.splitlines() if e.strip() != ""]

305 else:

306 # ops.BinOp; for internal compat, not intended to be passed by users

307 exprs = [expr]

308 multi_line = len(exprs) > 1

309

310 if multi_line and target is None:

311 raise ValueError(

312 "multi-line expressions are only valid in the "

313 "context of data, use DataFrame.eval"

314 )

315 engine = _check_engine(engine)

316 _check_parser(parser)

317 _check_resolvers(resolvers)

318

319 ret = None

320 first_expr = True

321 target_modified = False

322

323 for expr in exprs:

324 expr = _convert_expression(expr)

325 _check_for_locals(expr, level, parser)

326

327 # get our (possibly passed-in) scope

328 env = ensure_scope(

329 level + 1,

330 global_dict=global_dict,

331 local_dict=local_dict,

332 resolvers=resolvers,

333 target=target,

334 )

335

336 parsed_expr = Expr(expr, engine=engine, parser=parser, env=env)

337

338 if engine == "numexpr" and (

339 is_extension_array_dtype(parsed_expr.terms.return_type)

340 or getattr(parsed_expr.terms, "operand_types", None) is not None

341 and any(

342 is_extension_array_dtype(elem)

343 for elem in parsed_expr.terms.operand_types

344 )

345 ):

346 warnings.warn(

347 "Engine has switched to 'python' because numexpr does not support "

348 "extension array dtypes. Please set your engine to python manually.",

349 RuntimeWarning,

350 stacklevel=find_stack_level(),

351 )

352 engine = "python"

353

354 # construct the engine and evaluate the parsed expression

355 eng = ENGINES[engine]

356 eng_inst = eng(parsed_expr)

357 ret = eng_inst.evaluate()

358

359 if parsed_expr.assigner is None:

360 if multi_line:

361 raise ValueError(

362 "Multi-line expressions are only valid "

363 "if all expressions contain an assignment"

364 )

365 if inplace:

366 raise ValueError("Cannot operate inplace if there is no assignment")

367

368 # assign if needed

369 assigner = parsed_expr.assigner

370 if env.target is not None and assigner is not None:

371 target_modified = True

372

373 # if returning a copy, copy only on the first assignment

374 if not inplace and first_expr:

375 try:

376 target = env.target.copy()

377 except AttributeError as err:

378 raise ValueError("Cannot return a copy of the target") from err

379 else:

380 target = env.target

381

382 # TypeError is most commonly raised (e.g. int, list), but you

383 # get IndexError if you try to do this assignment on np.ndarray.

384 # we will ignore numpy warnings here; e.g. if trying

385 # to use a non-numeric indexer

386 try:

387 with warnings.catch_warnings(record=True):

388 # TODO: Filter the warnings we actually care about here.

389 if inplace and isinstance(target, NDFrame):

390 target.loc[:, assigner] = ret

391 else:

392 target[assigner] = ret

393 except (TypeError, IndexError) as err:

394 raise ValueError("Cannot assign expression output to target") from err

395

396 if not resolvers:

397 resolvers = ({assigner: ret},)

398 else:

399 # existing resolver needs updated to handle

400 # case of mutating existing column in copy

401 for resolver in resolvers:

402 if assigner in resolver:

403 resolver[assigner] = ret

404 break

405 else:

406 resolvers += ({assigner: ret},)

407

408 ret = None

409 first_expr = False

410

411 # We want to exclude `inplace=None` as being False.

412 if inplace is False:

413 return target if target_modified else ret