Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/conditions.py: 13%
197 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-10 06:15 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-10 06:15 +0000
1"""Utility functions and classes for supporting query conditions.
3Classes:
5`CompileCondition`
6 Container for a compiled condition.
8Functions:
10`compile_condition`
11 Compile a condition and extract usable index conditions.
12`call_on_recarr`
13 Evaluate a function over a structured array.
15"""
17import re
18import numexpr as ne
20from .utilsextension import get_nested_field
21from .utils import lazyattr
24_no_matching_opcode = re.compile(r"[^a-z]([a-z]+)_([a-z]+)[^a-z]")
25# E.g. "gt" and "bfc" from "couldn't find matching opcode for 'gt_bfc'".
28def _unsupported_operation_error(exception):
29 """Make the \"no matching opcode\" Numexpr `exception` more clear.
31 A new exception of the same kind is returned.
33 """
35 message = exception.args[0]
36 op, types = _no_matching_opcode.search(message).groups()
37 newmessage = "unsupported operand types for *%s*: " % op
38 newmessage += ', '.join(
39 ne.necompiler.typecode_to_kind[t] for t in types[1:])
40 return exception.__class__(newmessage)
43def _check_indexable_cmp(getidxcmp):
44 """Decorate `getidxcmp` to check the returned indexable comparison.
46 This does some extra checking that Numexpr would perform later on
47 the comparison if it was compiled within a complete condition.
49 """
51 def newfunc(exprnode, indexedcols):
52 result = getidxcmp(exprnode, indexedcols)
53 if result[0] is not None:
54 try:
55 ne.necompiler.typeCompileAst(
56 ne.necompiler.expressionToAST(exprnode))
57 except NotImplementedError as nie:
58 # Try to make this Numexpr error less cryptic.
59 raise _unsupported_operation_error(nie)
60 return result
61 newfunc.__name__ = getidxcmp.__name__
62 newfunc.__doc__ = getidxcmp.__doc__
63 return newfunc
66@_check_indexable_cmp
67def _get_indexable_cmp(exprnode, indexedcols):
68 """Get the indexable variable-constant comparison in `exprnode`.
70 A tuple of (variable, operation, constant) is returned if
71 `exprnode` is a variable-constant (or constant-variable)
72 comparison, and the variable is in `indexedcols`. A normal
73 variable can also be used instead of a constant: a tuple with its
74 name will appear instead of its value.
76 Otherwise, the values in the tuple are ``None``.
77 """
79 not_indexable = (None, None, None)
80 turncmp = {'lt': 'gt',
81 'le': 'ge',
82 'eq': 'eq',
83 'ge': 'le',
84 'gt': 'lt', }
86 def get_cmp(var, const, op):
87 var_value, const_value = var.value, const.value
88 if (var.astType == 'variable' and var_value in indexedcols
89 and const.astType in ['constant', 'variable']):
90 if const.astType == 'variable':
91 const_value = (const_value, )
92 return (var_value, op, const_value)
93 return None
95 def is_indexed_boolean(node):
96 return (node.astType == 'variable'
97 and node.astKind == 'bool'
98 and node.value in indexedcols)
100 # Boolean variables are indexable by themselves.
101 if is_indexed_boolean(exprnode):
102 return (exprnode.value, 'eq', True)
103 # And so are negations of boolean variables.
104 if exprnode.astType == 'op' and exprnode.value == 'invert':
105 child = exprnode.children[0]
106 if is_indexed_boolean(child):
107 return (child.value, 'eq', False)
108 # A negation of an expression will be returned as ``~child``.
109 # The indexability of the negated expression will be decided later on.
110 if child.astKind == "bool":
111 return (child, 'invert', None)
113 # Check node type. Only comparisons are indexable from now on.
114 if exprnode.astType != 'op':
115 return not_indexable
116 cmpop = exprnode.value
117 if cmpop not in turncmp:
118 return not_indexable
120 # Look for a variable-constant comparison in both directions.
121 left, right = exprnode.children
122 cmp_ = get_cmp(left, right, cmpop)
123 if cmp_:
124 return cmp_
125 cmp_ = get_cmp(right, left, turncmp[cmpop])
126 if cmp_:
127 return cmp_
129 return not_indexable
132def _equiv_expr_node(x, y):
133 """Returns whether two ExpressionNodes are equivalent.
135 This is needed because '==' is overridden on ExpressionNode to
136 return a new ExpressionNode.
138 """
139 if (not isinstance(x, ne.expressions.ExpressionNode)
140 and not isinstance(y, ne.expressions.ExpressionNode)):
141 return x == y
142 elif (type(x) is not type(y)
143 or not isinstance(x, ne.expressions.ExpressionNode)
144 or not isinstance(y, ne.expressions.ExpressionNode)
145 or x.value != y.value
146 or x.astKind != y.astKind
147 or len(x.children) != len(y.children)):
148 return False
149 for xchild, ychild in zip(x.children, y.children):
150 if not _equiv_expr_node(xchild, ychild):
151 return False
152 return True
155def _get_idx_expr_recurse(exprnode, indexedcols, idxexprs, strexpr):
156 """Here lives the actual implementation of the get_idx_expr() wrapper.
158 'idxexprs' is a list of expressions in the form ``(var, (ops),
159 (limits))``. 'strexpr' is the indexable expression in string format.
160 These parameters will be received empty (i.e. [], ['']) for the
161 first time and populated during the different recursive calls.
162 Finally, they are returned in the last level to the original
163 wrapper. If 'exprnode' is not indexable, it will return the tuple
164 ([], ['']) so as to signal this.
166 """
168 not_indexable = ([], [''])
169 op_conv = {
170 'and': '&',
171 'or': '|',
172 'not': '~',
173 }
174 negcmp = {
175 'lt': 'ge',
176 'le': 'gt',
177 'ge': 'lt',
178 'gt': 'le',
179 }
181 def fix_invert(idxcmp, exprnode, indexedcols):
182 invert = False
183 # Loop until all leading negations have been dealt with
184 while idxcmp[1] == "invert":
185 invert ^= True
186 # The information about the negated node is in first position
187 exprnode = idxcmp[0]
188 idxcmp = _get_indexable_cmp(exprnode, indexedcols)
189 return idxcmp, exprnode, invert
191 # Indexable variable-constant comparison.
192 idxcmp = _get_indexable_cmp(exprnode, indexedcols)
193 idxcmp, exprnode, invert = fix_invert(idxcmp, exprnode, indexedcols)
194 if idxcmp[0]:
195 if invert:
196 var, op, value = idxcmp
197 if op == 'eq' and value in [True, False]:
198 # ``var`` must be a boolean index. Flip its value.
199 value ^= True
200 else:
201 op = negcmp[op]
202 expr = (var, (op,), (value,))
203 invert = False
204 else:
205 expr = (idxcmp[0], (idxcmp[1],), (idxcmp[2],))
206 return [expr]
208 # For now negations of complex expressions will be not supported as
209 # forming part of an indexable condition. This might be supported in
210 # the future.
211 if invert:
212 return not_indexable
214 # Only conjunctions and disjunctions of comparisons are considered
215 # for the moment.
216 if exprnode.astType != 'op' or exprnode.value not in ['and', 'or']:
217 return not_indexable
219 left, right = exprnode.children
220 # Get the expression at left
221 lcolvar, lop, llim = _get_indexable_cmp(left, indexedcols)
222 # Get the expression at right
223 rcolvar, rop, rlim = _get_indexable_cmp(right, indexedcols)
225 # Use conjunction of indexable VC comparisons like
226 # ``(a <[=] x) & (x <[=] b)`` or ``(a >[=] x) & (x >[=] b)``
227 # as ``a <[=] x <[=] b``, for the moment.
228 op = exprnode.value
229 if (lcolvar is not None and rcolvar is not None
230 and _equiv_expr_node(lcolvar, rcolvar) and op == 'and'):
231 if lop in ['gt', 'ge'] and rop in ['lt', 'le']: # l <= x <= r
232 expr = (lcolvar, (lop, rop), (llim, rlim))
233 return [expr]
234 if lop in ['lt', 'le'] and rop in ['gt', 'ge']: # l >= x >= r
235 expr = (rcolvar, (rop, lop), (rlim, llim))
236 return [expr]
238 # Recursively get the expressions at the left and the right
239 lexpr = _get_idx_expr_recurse(left, indexedcols, idxexprs, strexpr)
240 rexpr = _get_idx_expr_recurse(right, indexedcols, idxexprs, strexpr)
242 def add_expr(expr, idxexprs, strexpr):
243 """Add a single expression to the list."""
245 if isinstance(expr, list):
246 # expr is a single expression
247 idxexprs.append(expr[0])
248 lenexprs = len(idxexprs)
249 # Mutate the strexpr string
250 if lenexprs == 1:
251 strexpr[:] = ["e0"]
252 else:
253 strexpr[:] = [
254 "(%s %s e%d)" % (strexpr[0], op_conv[op], lenexprs - 1)]
256 # Add expressions to the indexable list when they are and'ed, or
257 # they are both indexable.
258 if lexpr != not_indexable and (op == "and" or rexpr != not_indexable):
259 add_expr(lexpr, idxexprs, strexpr)
260 if rexpr != not_indexable:
261 add_expr(rexpr, idxexprs, strexpr)
262 return (idxexprs, strexpr)
263 if rexpr != not_indexable and op == "and":
264 add_expr(rexpr, idxexprs, strexpr)
265 return (idxexprs, strexpr)
267 # Can not use indexed column.
268 return not_indexable
271def _get_idx_expr(expr, indexedcols):
272 """Extract an indexable expression out of `exprnode`.
274 Looks for variable-constant comparisons in the expression node
275 `exprnode` involving variables in `indexedcols`.
277 It returns a tuple of (idxexprs, strexpr) where 'idxexprs' is a
278 list of expressions in the form ``(var, (ops), (limits))`` and
279 'strexpr' is the indexable expression in string format.
281 Expressions such as ``0 < c1 <= 1`` do not work as expected.
283 Right now only some of the *indexable comparisons* are considered:
285 * ``a <[=] x``, ``a == x`` and ``a >[=] x``
286 * ``(a <[=] x) & (y <[=] b)`` and ``(a == x) | (b == y)``
287 * ``~(~c_bool)``, ``~~c_bool`` and ``~(~c_bool) & (c_extra != 2)``
289 (where ``a``, ``b`` and ``c_bool`` are indexed columns, but
290 ``c_extra`` is not)
292 Particularly, the ``!=`` operator and negations of complex boolean
293 expressions are *not considered* as valid candidates:
295 * ``a != 1`` and ``c_bool != False``
296 * ``~((a > 0) & (c_bool))``
298 """
300 return _get_idx_expr_recurse(expr, indexedcols, [], [''])
303class CompiledCondition:
304 """Container for a compiled condition."""
306 @lazyattr
307 def index_variables(self):
308 """The columns participating in the index expression."""
310 idxexprs = self.index_expressions
311 idxvars = []
312 for expr in idxexprs:
313 idxvar = expr[0]
314 if idxvar not in idxvars:
315 idxvars.append(idxvar)
316 return frozenset(idxvars)
318 def __init__(self, func, params, idxexprs, strexpr, **kwargs):
319 self.function = func
320 """The compiled function object corresponding to this condition."""
321 self.parameters = params
322 """A list of parameter names for this condition."""
323 self.index_expressions = idxexprs
324 """A list of expressions in the form ``(var, (ops), (limits))``."""
325 self.string_expression = strexpr
326 """The indexable expression in string format."""
327 self.kwargs = kwargs
328 """NumExpr kwargs (used to pass ex_uses_vml to numexpr)"""
330 def __repr__(self):
331 return ("idxexprs: %s\nstrexpr: %s\nidxvars: %s"
332 % (self.index_expressions, self.string_expression,
333 self.index_variables))
335 def with_replaced_vars(self, condvars):
336 """Replace index limit variables with their values in-place.
338 A new compiled condition is returned. Values are taken from
339 the `condvars` mapping and converted to Python scalars.
340 """
342 exprs = self.index_expressions
343 exprs2 = []
344 for expr in exprs:
345 idxlims = expr[2] # the limits are in third place
346 limit_values = []
347 for idxlim in idxlims:
348 if isinstance(idxlim, tuple): # variable
349 idxlim = condvars[idxlim[0]] # look up value
350 idxlim = idxlim.tolist() # convert back to Python
351 limit_values.append(idxlim)
352 # Add this replaced entry to the new exprs2
353 var, ops, _ = expr
354 exprs2.append((var, ops, tuple(limit_values)))
355 # Create a new container for the converted values
356 newcc = CompiledCondition(
357 self.function, self.parameters, exprs2, self.string_expression,
358 **self.kwargs)
359 return newcc
362def _get_variable_names(expression):
363 """Return the list of variable names in the Numexpr `expression`."""
365 names = []
366 stack = [expression]
367 while stack:
368 node = stack.pop()
369 if node.astType == 'variable':
370 names.append(node.value)
371 elif hasattr(node, 'children'):
372 stack.extend(node.children)
373 return list(set(names)) # remove repeated names
376def compile_condition(condition, typemap, indexedcols):
377 """Compile a condition and extract usable index conditions.
379 Looks for variable-constant comparisons in the `condition` string
380 involving the indexed columns whose variable names appear in
381 `indexedcols`. The part of `condition` having usable indexes is
382 returned as a compiled condition in a `CompiledCondition` container.
384 Expressions such as '0 < c1 <= 1' do not work as expected. The
385 Numexpr types of *all* variables must be given in the `typemap`
386 mapping. The ``function`` of the resulting `CompiledCondition`
387 instance is a Numexpr function object, and the ``parameters`` list
388 indicates the order of its parameters.
390 """
392 # Get the expression tree and extract index conditions.
393 expr = ne.necompiler.stringToExpression(condition, typemap, {})
394 if expr.astKind != 'bool':
395 raise TypeError("condition ``%s`` does not have a boolean type"
396 % condition)
397 idxexprs = _get_idx_expr(expr, indexedcols)
398 # Post-process the answer
399 if isinstance(idxexprs, list):
400 # Simple expression
401 strexpr = ['e0']
402 else:
403 # Complex expression
404 idxexprs, strexpr = idxexprs
405 # Get rid of the unneccessary list wrapper for strexpr
406 strexpr = strexpr[0]
408 # Get the variable names used in the condition.
409 # At the same time, build its signature.
410 varnames = _get_variable_names(expr)
411 signature = [(var, typemap[var]) for var in varnames]
412 try:
413 # See the comments in `numexpr.evaluate()` for the
414 # reasons of inserting copy operators for unaligned,
415 # *unidimensional* arrays.
416 func = ne.necompiler.NumExpr(expr, signature)
417 except NotImplementedError as nie:
418 # Try to make this Numexpr error less cryptic.
419 raise _unsupported_operation_error(nie)
421 _, ex_uses_vml = ne.necompiler.getExprNames(condition, {})
422 kwargs = {'ex_uses_vml': ex_uses_vml}
424 params = varnames
425 # This is more comfortable to handle about than a tuple.
426 return CompiledCondition(func, params, idxexprs, strexpr, **kwargs)
429def call_on_recarr(func, params, recarr, param2arg=None, **kwargs):
430 """Call `func` with `params` over `recarr`.
432 The `param2arg` function, when specified, is used to get an argument
433 given a parameter name; otherwise, the parameter itself is used as
434 an argument. When the argument is a `Column` object, the proper
435 column from `recarr` is used as its value.
437 """
439 args = []
440 for param in params:
441 if param2arg:
442 arg = param2arg(param)
443 else:
444 arg = param
445 if hasattr(arg, 'pathname'): # looks like a column
446 arg = get_nested_field(recarr, arg.pathname)
447 args.append(arg)
448 return func(*args, **kwargs)