1"""
2Module for scope operations
3"""
4from __future__ import annotations
5
6import datetime
7import inspect
8from io import StringIO
9import itertools
10import pprint
11import struct
12import sys
13from typing import (
14 ChainMap,
15 TypeVar,
16)
17
18import numpy as np
19
20from pandas._libs.tslibs import Timestamp
21from pandas.errors import UndefinedVariableError
22
23_KT = TypeVar("_KT")
24_VT = TypeVar("_VT")
25
26
27# https://docs.python.org/3/library/collections.html#chainmap-examples-and-recipes
28class DeepChainMap(ChainMap[_KT, _VT]):
29 """
30 Variant of ChainMap that allows direct updates to inner scopes.
31
32 Only works when all passed mapping are mutable.
33 """
34
35 def __setitem__(self, key: _KT, value: _VT) -> None:
36 for mapping in self.maps:
37 if key in mapping:
38 mapping[key] = value
39 return
40 self.maps[0][key] = value
41
42 def __delitem__(self, key: _KT) -> None:
43 """
44 Raises
45 ------
46 KeyError
47 If `key` doesn't exist.
48 """
49 for mapping in self.maps:
50 if key in mapping:
51 del mapping[key]
52 return
53 raise KeyError(key)
54
55
56def ensure_scope(
57 level: int, global_dict=None, local_dict=None, resolvers=(), target=None
58) -> Scope:
59 """Ensure that we are grabbing the correct scope."""
60 return Scope(
61 level + 1,
62 global_dict=global_dict,
63 local_dict=local_dict,
64 resolvers=resolvers,
65 target=target,
66 )
67
68
69def _replacer(x) -> str:
70 """
71 Replace a number with its hexadecimal representation. Used to tag
72 temporary variables with their calling scope's id.
73 """
74 # get the hex repr of the binary char and remove 0x and pad by pad_size
75 # zeros
76 try:
77 hexin = ord(x)
78 except TypeError:
79 # bytes literals masquerade as ints when iterating in py3
80 hexin = x
81
82 return hex(hexin)
83
84
85def _raw_hex_id(obj) -> str:
86 """Return the padded hexadecimal id of ``obj``."""
87 # interpret as a pointer since that's what really what id returns
88 packed = struct.pack("@P", id(obj))
89 return "".join([_replacer(x) for x in packed])
90
91
92DEFAULT_GLOBALS = {
93 "Timestamp": Timestamp,
94 "datetime": datetime.datetime,
95 "True": True,
96 "False": False,
97 "list": list,
98 "tuple": tuple,
99 "inf": np.inf,
100 "Inf": np.inf,
101}
102
103
104def _get_pretty_string(obj) -> str:
105 """
106 Return a prettier version of obj.
107
108 Parameters
109 ----------
110 obj : object
111 Object to pretty print
112
113 Returns
114 -------
115 str
116 Pretty print object repr
117 """
118 sio = StringIO()
119 pprint.pprint(obj, stream=sio)
120 return sio.getvalue()
121
122
123class Scope:
124 """
125 Object to hold scope, with a few bells to deal with some custom syntax
126 and contexts added by pandas.
127
128 Parameters
129 ----------
130 level : int
131 global_dict : dict or None, optional, default None
132 local_dict : dict or Scope or None, optional, default None
133 resolvers : list-like or None, optional, default None
134 target : object
135
136 Attributes
137 ----------
138 level : int
139 scope : DeepChainMap
140 target : object
141 temps : dict
142 """
143
144 __slots__ = ["level", "scope", "target", "resolvers", "temps"]
145 level: int
146 scope: DeepChainMap
147 resolvers: DeepChainMap
148 temps: dict
149
150 def __init__(
151 self, level: int, global_dict=None, local_dict=None, resolvers=(), target=None
152 ) -> None:
153 self.level = level + 1
154
155 # shallow copy because we don't want to keep filling this up with what
156 # was there before if there are multiple calls to Scope/_ensure_scope
157 self.scope = DeepChainMap(DEFAULT_GLOBALS.copy())
158 self.target = target
159
160 if isinstance(local_dict, Scope):
161 self.scope.update(local_dict.scope)
162 if local_dict.target is not None:
163 self.target = local_dict.target
164 self._update(local_dict.level)
165
166 frame = sys._getframe(self.level)
167
168 try:
169 # shallow copy here because we don't want to replace what's in
170 # scope when we align terms (alignment accesses the underlying
171 # numpy array of pandas objects)
172 scope_global = self.scope.new_child(
173 (global_dict if global_dict is not None else frame.f_globals).copy()
174 )
175 self.scope = DeepChainMap(scope_global)
176 if not isinstance(local_dict, Scope):
177 scope_local = self.scope.new_child(
178 (local_dict if local_dict is not None else frame.f_locals).copy()
179 )
180 self.scope = DeepChainMap(scope_local)
181 finally:
182 del frame
183
184 # assumes that resolvers are going from outermost scope to inner
185 if isinstance(local_dict, Scope):
186 resolvers += tuple(local_dict.resolvers.maps)
187 self.resolvers = DeepChainMap(*resolvers)
188 self.temps = {}
189
190 def __repr__(self) -> str:
191 scope_keys = _get_pretty_string(list(self.scope.keys()))
192 res_keys = _get_pretty_string(list(self.resolvers.keys()))
193 return f"{type(self).__name__}(scope={scope_keys}, resolvers={res_keys})"
194
195 @property
196 def has_resolvers(self) -> bool:
197 """
198 Return whether we have any extra scope.
199
200 For example, DataFrames pass Their columns as resolvers during calls to
201 ``DataFrame.eval()`` and ``DataFrame.query()``.
202
203 Returns
204 -------
205 hr : bool
206 """
207 return bool(len(self.resolvers))
208
209 def resolve(self, key: str, is_local: bool):
210 """
211 Resolve a variable name in a possibly local context.
212
213 Parameters
214 ----------
215 key : str
216 A variable name
217 is_local : bool
218 Flag indicating whether the variable is local or not (prefixed with
219 the '@' symbol)
220
221 Returns
222 -------
223 value : object
224 The value of a particular variable
225 """
226 try:
227 # only look for locals in outer scope
228 if is_local:
229 return self.scope[key]
230
231 # not a local variable so check in resolvers if we have them
232 if self.has_resolvers:
233 return self.resolvers[key]
234
235 # if we're here that means that we have no locals and we also have
236 # no resolvers
237 assert not is_local and not self.has_resolvers
238 return self.scope[key]
239 except KeyError:
240 try:
241 # last ditch effort we look in temporaries
242 # these are created when parsing indexing expressions
243 # e.g., df[df > 0]
244 return self.temps[key]
245 except KeyError as err:
246 raise UndefinedVariableError(key, is_local) from err
247
248 def swapkey(self, old_key: str, new_key: str, new_value=None) -> None:
249 """
250 Replace a variable name, with a potentially new value.
251
252 Parameters
253 ----------
254 old_key : str
255 Current variable name to replace
256 new_key : str
257 New variable name to replace `old_key` with
258 new_value : object
259 Value to be replaced along with the possible renaming
260 """
261 if self.has_resolvers:
262 maps = self.resolvers.maps + self.scope.maps
263 else:
264 maps = self.scope.maps
265
266 maps.append(self.temps)
267
268 for mapping in maps:
269 if old_key in mapping:
270 mapping[new_key] = new_value
271 return
272
273 def _get_vars(self, stack, scopes: list[str]) -> None:
274 """
275 Get specifically scoped variables from a list of stack frames.
276
277 Parameters
278 ----------
279 stack : list
280 A list of stack frames as returned by ``inspect.stack()``
281 scopes : sequence of strings
282 A sequence containing valid stack frame attribute names that
283 evaluate to a dictionary. For example, ('locals', 'globals')
284 """
285 variables = itertools.product(scopes, stack)
286 for scope, (frame, _, _, _, _, _) in variables:
287 try:
288 d = getattr(frame, f"f_{scope}")
289 self.scope = DeepChainMap(self.scope.new_child(d))
290 finally:
291 # won't remove it, but DECREF it
292 # in Py3 this probably isn't necessary since frame won't be
293 # scope after the loop
294 del frame
295
296 def _update(self, level: int) -> None:
297 """
298 Update the current scope by going back `level` levels.
299
300 Parameters
301 ----------
302 level : int
303 """
304 sl = level + 1
305
306 # add sl frames to the scope starting with the
307 # most distant and overwriting with more current
308 # makes sure that we can capture variable scope
309 stack = inspect.stack()
310
311 try:
312 self._get_vars(stack[:sl], scopes=["locals"])
313 finally:
314 del stack[:], stack
315
316 def add_tmp(self, value) -> str:
317 """
318 Add a temporary variable to the scope.
319
320 Parameters
321 ----------
322 value : object
323 An arbitrary object to be assigned to a temporary variable.
324
325 Returns
326 -------
327 str
328 The name of the temporary variable created.
329 """
330 name = f"{type(value).__name__}_{self.ntemps}_{_raw_hex_id(self)}"
331
332 # add to inner most scope
333 assert name not in self.temps
334 self.temps[name] = value
335 assert name in self.temps
336
337 # only increment if the variable gets put in the scope
338 return name
339
340 @property
341 def ntemps(self) -> int:
342 """The number of temporary variables in this scope"""
343 return len(self.temps)
344
345 @property
346 def full_scope(self) -> DeepChainMap:
347 """
348 Return the full scope for use with passing to engines transparently
349 as a mapping.
350
351 Returns
352 -------
353 vars : DeepChainMap
354 All variables in this scope.
355 """
356 maps = [self.temps] + self.resolvers.maps + self.scope.maps
357 return DeepChainMap(*maps)