Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/computation/eval.py: 19%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

111 statements  

1""" 

2Top level ``eval`` module. 

3""" 

4from __future__ import annotations 

5 

6import tokenize 

7from typing import TYPE_CHECKING 

8import warnings 

9 

10from pandas.util._exceptions import find_stack_level 

11from pandas.util._validators import validate_bool_kwarg 

12 

13from pandas.core.dtypes.common import is_extension_array_dtype 

14 

15from pandas.core.computation.engines import ENGINES 

16from pandas.core.computation.expr import ( 

17 PARSERS, 

18 Expr, 

19) 

20from pandas.core.computation.parsing import tokenize_string 

21from pandas.core.computation.scope import ensure_scope 

22from pandas.core.generic import NDFrame 

23 

24from pandas.io.formats.printing import pprint_thing 

25 

26if TYPE_CHECKING: 

27 from pandas.core.computation.ops import BinOp 

28 

29 

30def _check_engine(engine: str | None) -> str: 

31 """ 

32 Make sure a valid engine is passed. 

33 

34 Parameters 

35 ---------- 

36 engine : str 

37 String to validate. 

38 

39 Raises 

40 ------ 

41 KeyError 

42 * If an invalid engine is passed. 

43 ImportError 

44 * If numexpr was requested but doesn't exist. 

45 

46 Returns 

47 ------- 

48 str 

49 Engine name. 

50 """ 

51 from pandas.core.computation.check import NUMEXPR_INSTALLED 

52 from pandas.core.computation.expressions import USE_NUMEXPR 

53 

54 if engine is None: 

55 engine = "numexpr" if USE_NUMEXPR else "python" 

56 

57 if engine not in ENGINES: 

58 valid_engines = list(ENGINES.keys()) 

59 raise KeyError( 

60 f"Invalid engine '{engine}' passed, valid engines are {valid_engines}" 

61 ) 

62 

63 # TODO: validate this in a more general way (thinking of future engines 

64 # that won't necessarily be import-able) 

65 # Could potentially be done on engine instantiation 

66 if engine == "numexpr" and not NUMEXPR_INSTALLED: 

67 raise ImportError( 

68 "'numexpr' is not installed or an unsupported version. Cannot use " 

69 "engine='numexpr' for query/eval if 'numexpr' is not installed" 

70 ) 

71 

72 return engine 

73 

74 

75def _check_parser(parser: str): 

76 """ 

77 Make sure a valid parser is passed. 

78 

79 Parameters 

80 ---------- 

81 parser : str 

82 

83 Raises 

84 ------ 

85 KeyError 

86 * If an invalid parser is passed 

87 """ 

88 if parser not in PARSERS: 

89 raise KeyError( 

90 f"Invalid parser '{parser}' passed, valid parsers are {PARSERS.keys()}" 

91 ) 

92 

93 

94def _check_resolvers(resolvers): 

95 if resolvers is not None: 

96 for resolver in resolvers: 

97 if not hasattr(resolver, "__getitem__"): 

98 name = type(resolver).__name__ 

99 raise TypeError( 

100 f"Resolver of type '{name}' does not " 

101 "implement the __getitem__ method" 

102 ) 

103 

104 

105def _check_expression(expr): 

106 """ 

107 Make sure an expression is not an empty string 

108 

109 Parameters 

110 ---------- 

111 expr : object 

112 An object that can be converted to a string 

113 

114 Raises 

115 ------ 

116 ValueError 

117 * If expr is an empty string 

118 """ 

119 if not expr: 

120 raise ValueError("expr cannot be an empty string") 

121 

122 

123def _convert_expression(expr) -> str: 

124 """ 

125 Convert an object to an expression. 

126 

127 This function converts an object to an expression (a unicode string) and 

128 checks to make sure it isn't empty after conversion. This is used to 

129 convert operators to their string representation for recursive calls to 

130 :func:`~pandas.eval`. 

131 

132 Parameters 

133 ---------- 

134 expr : object 

135 The object to be converted to a string. 

136 

137 Returns 

138 ------- 

139 str 

140 The string representation of an object. 

141 

142 Raises 

143 ------ 

144 ValueError 

145 * If the expression is empty. 

146 """ 

147 s = pprint_thing(expr) 

148 _check_expression(s) 

149 return s 

150 

151 

152def _check_for_locals(expr: str, stack_level: int, parser: str): 

153 at_top_of_stack = stack_level == 0 

154 not_pandas_parser = parser != "pandas" 

155 

156 if not_pandas_parser: 

157 msg = "The '@' prefix is only supported by the pandas parser" 

158 elif at_top_of_stack: 

159 msg = ( 

160 "The '@' prefix is not allowed in top-level eval calls.\n" 

161 "please refer to your variables by name without the '@' prefix." 

162 ) 

163 

164 if at_top_of_stack or not_pandas_parser: 

165 for toknum, tokval in tokenize_string(expr): 

166 if toknum == tokenize.OP and tokval == "@": 

167 raise SyntaxError(msg) 

168 

169 

170def eval( 

171 expr: str | BinOp, # we leave BinOp out of the docstr bc it isn't for users 

172 parser: str = "pandas", 

173 engine: str | None = None, 

174 local_dict=None, 

175 global_dict=None, 

176 resolvers=(), 

177 level: int = 0, 

178 target=None, 

179 inplace: bool = False, 

180): 

181 """ 

182 Evaluate a Python expression as a string using various backends. 

183 

184 The following arithmetic operations are supported: ``+``, ``-``, ``*``, 

185 ``/``, ``**``, ``%``, ``//`` (python engine only) along with the following 

186 boolean operations: ``|`` (or), ``&`` (and), and ``~`` (not). 

187 Additionally, the ``'pandas'`` parser allows the use of :keyword:`and`, 

188 :keyword:`or`, and :keyword:`not` with the same semantics as the 

189 corresponding bitwise operators. :class:`~pandas.Series` and 

190 :class:`~pandas.DataFrame` objects are supported and behave as they would 

191 with plain ol' Python evaluation. 

192 

193 Parameters 

194 ---------- 

195 expr : str 

196 The expression to evaluate. This string cannot contain any Python 

197 `statements 

198 <https://docs.python.org/3/reference/simple_stmts.html#simple-statements>`__, 

199 only Python `expressions 

200 <https://docs.python.org/3/reference/simple_stmts.html#expression-statements>`__. 

201 parser : {'pandas', 'python'}, default 'pandas' 

202 The parser to use to construct the syntax tree from the expression. The 

203 default of ``'pandas'`` parses code slightly different than standard 

204 Python. Alternatively, you can parse an expression using the 

205 ``'python'`` parser to retain strict Python semantics. See the 

206 :ref:`enhancing performance <enhancingperf.eval>` documentation for 

207 more details. 

208 engine : {'python', 'numexpr'}, default 'numexpr' 

209 

210 The engine used to evaluate the expression. Supported engines are 

211 

212 - None : tries to use ``numexpr``, falls back to ``python`` 

213 - ``'numexpr'`` : This default engine evaluates pandas objects using 

214 numexpr for large speed ups in complex expressions with large frames. 

215 - ``'python'`` : Performs operations as if you had ``eval``'d in top 

216 level python. This engine is generally not that useful. 

217 

218 More backends may be available in the future. 

219 local_dict : dict or None, optional 

220 A dictionary of local variables, taken from locals() by default. 

221 global_dict : dict or None, optional 

222 A dictionary of global variables, taken from globals() by default. 

223 resolvers : list of dict-like or None, optional 

224 A list of objects implementing the ``__getitem__`` special method that 

225 you can use to inject an additional collection of namespaces to use for 

226 variable lookup. For example, this is used in the 

227 :meth:`~DataFrame.query` method to inject the 

228 ``DataFrame.index`` and ``DataFrame.columns`` 

229 variables that refer to their respective :class:`~pandas.DataFrame` 

230 instance attributes. 

231 level : int, optional 

232 The number of prior stack frames to traverse and add to the current 

233 scope. Most users will **not** need to change this parameter. 

234 target : object, optional, default None 

235 This is the target object for assignment. It is used when there is 

236 variable assignment in the expression. If so, then `target` must 

237 support item assignment with string keys, and if a copy is being 

238 returned, it must also support `.copy()`. 

239 inplace : bool, default False 

240 If `target` is provided, and the expression mutates `target`, whether 

241 to modify `target` inplace. Otherwise, return a copy of `target` with 

242 the mutation. 

243 

244 Returns 

245 ------- 

246 ndarray, numeric scalar, DataFrame, Series, or None 

247 The completion value of evaluating the given code or None if ``inplace=True``. 

248 

249 Raises 

250 ------ 

251 ValueError 

252 There are many instances where such an error can be raised: 

253 

254 - `target=None`, but the expression is multiline. 

255 - The expression is multiline, but not all them have item assignment. 

256 An example of such an arrangement is this: 

257 

258 a = b + 1 

259 a + 2 

260 

261 Here, there are expressions on different lines, making it multiline, 

262 but the last line has no variable assigned to the output of `a + 2`. 

263 - `inplace=True`, but the expression is missing item assignment. 

264 - Item assignment is provided, but the `target` does not support 

265 string item assignment. 

266 - Item assignment is provided and `inplace=False`, but the `target` 

267 does not support the `.copy()` method 

268 

269 See Also 

270 -------- 

271 DataFrame.query : Evaluates a boolean expression to query the columns 

272 of a frame. 

273 DataFrame.eval : Evaluate a string describing operations on 

274 DataFrame columns. 

275 

276 Notes 

277 ----- 

278 The ``dtype`` of any objects involved in an arithmetic ``%`` operation are 

279 recursively cast to ``float64``. 

280 

281 See the :ref:`enhancing performance <enhancingperf.eval>` documentation for 

282 more details. 

283 

284 Examples 

285 -------- 

286 >>> df = pd.DataFrame({"animal": ["dog", "pig"], "age": [10, 20]}) 

287 >>> df 

288 animal age 

289 0 dog 10 

290 1 pig 20 

291 

292 We can add a new column using ``pd.eval``: 

293 

294 >>> pd.eval("double_age = df.age * 2", target=df) 

295 animal age double_age 

296 0 dog 10 20 

297 1 pig 20 40 

298 """ 

299 inplace = validate_bool_kwarg(inplace, "inplace") 

300 

301 exprs: list[str | BinOp] 

302 if isinstance(expr, str): 

303 _check_expression(expr) 

304 exprs = [e.strip() for e in expr.splitlines() if e.strip() != ""] 

305 else: 

306 # ops.BinOp; for internal compat, not intended to be passed by users 

307 exprs = [expr] 

308 multi_line = len(exprs) > 1 

309 

310 if multi_line and target is None: 

311 raise ValueError( 

312 "multi-line expressions are only valid in the " 

313 "context of data, use DataFrame.eval" 

314 ) 

315 engine = _check_engine(engine) 

316 _check_parser(parser) 

317 _check_resolvers(resolvers) 

318 

319 ret = None 

320 first_expr = True 

321 target_modified = False 

322 

323 for expr in exprs: 

324 expr = _convert_expression(expr) 

325 _check_for_locals(expr, level, parser) 

326 

327 # get our (possibly passed-in) scope 

328 env = ensure_scope( 

329 level + 1, 

330 global_dict=global_dict, 

331 local_dict=local_dict, 

332 resolvers=resolvers, 

333 target=target, 

334 ) 

335 

336 parsed_expr = Expr(expr, engine=engine, parser=parser, env=env) 

337 

338 if engine == "numexpr" and ( 

339 is_extension_array_dtype(parsed_expr.terms.return_type) 

340 or getattr(parsed_expr.terms, "operand_types", None) is not None 

341 and any( 

342 is_extension_array_dtype(elem) 

343 for elem in parsed_expr.terms.operand_types 

344 ) 

345 ): 

346 warnings.warn( 

347 "Engine has switched to 'python' because numexpr does not support " 

348 "extension array dtypes. Please set your engine to python manually.", 

349 RuntimeWarning, 

350 stacklevel=find_stack_level(), 

351 ) 

352 engine = "python" 

353 

354 # construct the engine and evaluate the parsed expression 

355 eng = ENGINES[engine] 

356 eng_inst = eng(parsed_expr) 

357 ret = eng_inst.evaluate() 

358 

359 if parsed_expr.assigner is None: 

360 if multi_line: 

361 raise ValueError( 

362 "Multi-line expressions are only valid " 

363 "if all expressions contain an assignment" 

364 ) 

365 if inplace: 

366 raise ValueError("Cannot operate inplace if there is no assignment") 

367 

368 # assign if needed 

369 assigner = parsed_expr.assigner 

370 if env.target is not None and assigner is not None: 

371 target_modified = True 

372 

373 # if returning a copy, copy only on the first assignment 

374 if not inplace and first_expr: 

375 try: 

376 target = env.target.copy() 

377 except AttributeError as err: 

378 raise ValueError("Cannot return a copy of the target") from err 

379 else: 

380 target = env.target 

381 

382 # TypeError is most commonly raised (e.g. int, list), but you 

383 # get IndexError if you try to do this assignment on np.ndarray. 

384 # we will ignore numpy warnings here; e.g. if trying 

385 # to use a non-numeric indexer 

386 try: 

387 with warnings.catch_warnings(record=True): 

388 # TODO: Filter the warnings we actually care about here. 

389 if inplace and isinstance(target, NDFrame): 

390 target.loc[:, assigner] = ret 

391 else: 

392 target[assigner] = ret 

393 except (TypeError, IndexError) as err: 

394 raise ValueError("Cannot assign expression output to target") from err 

395 

396 if not resolvers: 

397 resolvers = ({assigner: ret},) 

398 else: 

399 # existing resolver needs updated to handle 

400 # case of mutating existing column in copy 

401 for resolver in resolvers: 

402 if assigner in resolver: 

403 resolver[assigner] = ret 

404 break 

405 else: 

406 resolvers += ({assigner: ret},) 

407 

408 ret = None 

409 first_expr = False 

410 

411 # We want to exclude `inplace=None` as being False. 

412 if inplace is False: 

413 return target if target_modified else ret