1"""
2Module for scope operations
3"""
4from __future__ import annotations
5
6from collections import ChainMap
7import datetime
8import inspect
9from io import StringIO
10import itertools
11import pprint
12import struct
13import sys
14from typing import TypeVar
15
16import numpy as np
17
18from pandas._libs.tslibs import Timestamp
19from pandas.errors import UndefinedVariableError
20
21_KT = TypeVar("_KT")
22_VT = TypeVar("_VT")
23
24
25# https://docs.python.org/3/library/collections.html#chainmap-examples-and-recipes
26class DeepChainMap(ChainMap[_KT, _VT]):
27 """
28 Variant of ChainMap that allows direct updates to inner scopes.
29
30 Only works when all passed mapping are mutable.
31 """
32
33 def __setitem__(self, key: _KT, value: _VT) -> None:
34 for mapping in self.maps:
35 if key in mapping:
36 mapping[key] = value
37 return
38 self.maps[0][key] = value
39
40 def __delitem__(self, key: _KT) -> None:
41 """
42 Raises
43 ------
44 KeyError
45 If `key` doesn't exist.
46 """
47 for mapping in self.maps:
48 if key in mapping:
49 del mapping[key]
50 return
51 raise KeyError(key)
52
53
54def ensure_scope(
55 level: int, global_dict=None, local_dict=None, resolvers=(), target=None
56) -> Scope:
57 """Ensure that we are grabbing the correct scope."""
58 return Scope(
59 level + 1,
60 global_dict=global_dict,
61 local_dict=local_dict,
62 resolvers=resolvers,
63 target=target,
64 )
65
66
67def _replacer(x) -> str:
68 """
69 Replace a number with its hexadecimal representation. Used to tag
70 temporary variables with their calling scope's id.
71 """
72 # get the hex repr of the binary char and remove 0x and pad by pad_size
73 # zeros
74 try:
75 hexin = ord(x)
76 except TypeError:
77 # bytes literals masquerade as ints when iterating in py3
78 hexin = x
79
80 return hex(hexin)
81
82
83def _raw_hex_id(obj) -> str:
84 """Return the padded hexadecimal id of ``obj``."""
85 # interpret as a pointer since that's what really what id returns
86 packed = struct.pack("@P", id(obj))
87 return "".join([_replacer(x) for x in packed])
88
89
90DEFAULT_GLOBALS = {
91 "Timestamp": Timestamp,
92 "datetime": datetime.datetime,
93 "True": True,
94 "False": False,
95 "list": list,
96 "tuple": tuple,
97 "inf": np.inf,
98 "Inf": np.inf,
99}
100
101
102def _get_pretty_string(obj) -> str:
103 """
104 Return a prettier version of obj.
105
106 Parameters
107 ----------
108 obj : object
109 Object to pretty print
110
111 Returns
112 -------
113 str
114 Pretty print object repr
115 """
116 sio = StringIO()
117 pprint.pprint(obj, stream=sio)
118 return sio.getvalue()
119
120
121class Scope:
122 """
123 Object to hold scope, with a few bells to deal with some custom syntax
124 and contexts added by pandas.
125
126 Parameters
127 ----------
128 level : int
129 global_dict : dict or None, optional, default None
130 local_dict : dict or Scope or None, optional, default None
131 resolvers : list-like or None, optional, default None
132 target : object
133
134 Attributes
135 ----------
136 level : int
137 scope : DeepChainMap
138 target : object
139 temps : dict
140 """
141
142 __slots__ = ["level", "scope", "target", "resolvers", "temps"]
143 level: int
144 scope: DeepChainMap
145 resolvers: DeepChainMap
146 temps: dict
147
148 def __init__(
149 self, level: int, global_dict=None, local_dict=None, resolvers=(), target=None
150 ) -> None:
151 self.level = level + 1
152
153 # shallow copy because we don't want to keep filling this up with what
154 # was there before if there are multiple calls to Scope/_ensure_scope
155 self.scope = DeepChainMap(DEFAULT_GLOBALS.copy())
156 self.target = target
157
158 if isinstance(local_dict, Scope):
159 self.scope.update(local_dict.scope)
160 if local_dict.target is not None:
161 self.target = local_dict.target
162 self._update(local_dict.level)
163
164 frame = sys._getframe(self.level)
165
166 try:
167 # shallow copy here because we don't want to replace what's in
168 # scope when we align terms (alignment accesses the underlying
169 # numpy array of pandas objects)
170 scope_global = self.scope.new_child(
171 (global_dict if global_dict is not None else frame.f_globals).copy()
172 )
173 self.scope = DeepChainMap(scope_global)
174 if not isinstance(local_dict, Scope):
175 scope_local = self.scope.new_child(
176 (local_dict if local_dict is not None else frame.f_locals).copy()
177 )
178 self.scope = DeepChainMap(scope_local)
179 finally:
180 del frame
181
182 # assumes that resolvers are going from outermost scope to inner
183 if isinstance(local_dict, Scope):
184 resolvers += tuple(local_dict.resolvers.maps)
185 self.resolvers = DeepChainMap(*resolvers)
186 self.temps = {}
187
188 def __repr__(self) -> str:
189 scope_keys = _get_pretty_string(list(self.scope.keys()))
190 res_keys = _get_pretty_string(list(self.resolvers.keys()))
191 return f"{type(self).__name__}(scope={scope_keys}, resolvers={res_keys})"
192
193 @property
194 def has_resolvers(self) -> bool:
195 """
196 Return whether we have any extra scope.
197
198 For example, DataFrames pass Their columns as resolvers during calls to
199 ``DataFrame.eval()`` and ``DataFrame.query()``.
200
201 Returns
202 -------
203 hr : bool
204 """
205 return bool(len(self.resolvers))
206
207 def resolve(self, key: str, is_local: bool):
208 """
209 Resolve a variable name in a possibly local context.
210
211 Parameters
212 ----------
213 key : str
214 A variable name
215 is_local : bool
216 Flag indicating whether the variable is local or not (prefixed with
217 the '@' symbol)
218
219 Returns
220 -------
221 value : object
222 The value of a particular variable
223 """
224 try:
225 # only look for locals in outer scope
226 if is_local:
227 return self.scope[key]
228
229 # not a local variable so check in resolvers if we have them
230 if self.has_resolvers:
231 return self.resolvers[key]
232
233 # if we're here that means that we have no locals and we also have
234 # no resolvers
235 assert not is_local and not self.has_resolvers
236 return self.scope[key]
237 except KeyError:
238 try:
239 # last ditch effort we look in temporaries
240 # these are created when parsing indexing expressions
241 # e.g., df[df > 0]
242 return self.temps[key]
243 except KeyError as err:
244 raise UndefinedVariableError(key, is_local) from err
245
246 def swapkey(self, old_key: str, new_key: str, new_value=None) -> None:
247 """
248 Replace a variable name, with a potentially new value.
249
250 Parameters
251 ----------
252 old_key : str
253 Current variable name to replace
254 new_key : str
255 New variable name to replace `old_key` with
256 new_value : object
257 Value to be replaced along with the possible renaming
258 """
259 if self.has_resolvers:
260 maps = self.resolvers.maps + self.scope.maps
261 else:
262 maps = self.scope.maps
263
264 maps.append(self.temps)
265
266 for mapping in maps:
267 if old_key in mapping:
268 mapping[new_key] = new_value
269 return
270
271 def _get_vars(self, stack, scopes: list[str]) -> None:
272 """
273 Get specifically scoped variables from a list of stack frames.
274
275 Parameters
276 ----------
277 stack : list
278 A list of stack frames as returned by ``inspect.stack()``
279 scopes : sequence of strings
280 A sequence containing valid stack frame attribute names that
281 evaluate to a dictionary. For example, ('locals', 'globals')
282 """
283 variables = itertools.product(scopes, stack)
284 for scope, (frame, _, _, _, _, _) in variables:
285 try:
286 d = getattr(frame, f"f_{scope}")
287 self.scope = DeepChainMap(self.scope.new_child(d))
288 finally:
289 # won't remove it, but DECREF it
290 # in Py3 this probably isn't necessary since frame won't be
291 # scope after the loop
292 del frame
293
294 def _update(self, level: int) -> None:
295 """
296 Update the current scope by going back `level` levels.
297
298 Parameters
299 ----------
300 level : int
301 """
302 sl = level + 1
303
304 # add sl frames to the scope starting with the
305 # most distant and overwriting with more current
306 # makes sure that we can capture variable scope
307 stack = inspect.stack()
308
309 try:
310 self._get_vars(stack[:sl], scopes=["locals"])
311 finally:
312 del stack[:], stack
313
314 def add_tmp(self, value) -> str:
315 """
316 Add a temporary variable to the scope.
317
318 Parameters
319 ----------
320 value : object
321 An arbitrary object to be assigned to a temporary variable.
322
323 Returns
324 -------
325 str
326 The name of the temporary variable created.
327 """
328 name = f"{type(value).__name__}_{self.ntemps}_{_raw_hex_id(self)}"
329
330 # add to inner most scope
331 assert name not in self.temps
332 self.temps[name] = value
333 assert name in self.temps
334
335 # only increment if the variable gets put in the scope
336 return name
337
338 @property
339 def ntemps(self) -> int:
340 """The number of temporary variables in this scope"""
341 return len(self.temps)
342
343 @property
344 def full_scope(self) -> DeepChainMap:
345 """
346 Return the full scope for use with passing to engines transparently
347 as a mapping.
348
349 Returns
350 -------
351 vars : DeepChainMap
352 All variables in this scope.
353 """
354 maps = [self.temps] + self.resolvers.maps + self.scope.maps
355 return DeepChainMap(*maps)