Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/conditions.py: 13%

197 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-10 06:15 +0000

1"""Utility functions and classes for supporting query conditions. 

2 

3Classes: 

4 

5`CompileCondition` 

6 Container for a compiled condition. 

7 

8Functions: 

9 

10`compile_condition` 

11 Compile a condition and extract usable index conditions. 

12`call_on_recarr` 

13 Evaluate a function over a structured array. 

14 

15""" 

16 

17import re 

18import numexpr as ne 

19 

20from .utilsextension import get_nested_field 

21from .utils import lazyattr 

22 

23 

24_no_matching_opcode = re.compile(r"[^a-z]([a-z]+)_([a-z]+)[^a-z]") 

25# E.g. "gt" and "bfc" from "couldn't find matching opcode for 'gt_bfc'". 

26 

27 

28def _unsupported_operation_error(exception): 

29 """Make the \"no matching opcode\" Numexpr `exception` more clear. 

30 

31 A new exception of the same kind is returned. 

32 

33 """ 

34 

35 message = exception.args[0] 

36 op, types = _no_matching_opcode.search(message).groups() 

37 newmessage = "unsupported operand types for *%s*: " % op 

38 newmessage += ', '.join( 

39 ne.necompiler.typecode_to_kind[t] for t in types[1:]) 

40 return exception.__class__(newmessage) 

41 

42 

43def _check_indexable_cmp(getidxcmp): 

44 """Decorate `getidxcmp` to check the returned indexable comparison. 

45 

46 This does some extra checking that Numexpr would perform later on 

47 the comparison if it was compiled within a complete condition. 

48 

49 """ 

50 

51 def newfunc(exprnode, indexedcols): 

52 result = getidxcmp(exprnode, indexedcols) 

53 if result[0] is not None: 

54 try: 

55 ne.necompiler.typeCompileAst( 

56 ne.necompiler.expressionToAST(exprnode)) 

57 except NotImplementedError as nie: 

58 # Try to make this Numexpr error less cryptic. 

59 raise _unsupported_operation_error(nie) 

60 return result 

61 newfunc.__name__ = getidxcmp.__name__ 

62 newfunc.__doc__ = getidxcmp.__doc__ 

63 return newfunc 

64 

65 

66@_check_indexable_cmp 

67def _get_indexable_cmp(exprnode, indexedcols): 

68 """Get the indexable variable-constant comparison in `exprnode`. 

69 

70 A tuple of (variable, operation, constant) is returned if 

71 `exprnode` is a variable-constant (or constant-variable) 

72 comparison, and the variable is in `indexedcols`. A normal 

73 variable can also be used instead of a constant: a tuple with its 

74 name will appear instead of its value. 

75 

76 Otherwise, the values in the tuple are ``None``. 

77 """ 

78 

79 not_indexable = (None, None, None) 

80 turncmp = {'lt': 'gt', 

81 'le': 'ge', 

82 'eq': 'eq', 

83 'ge': 'le', 

84 'gt': 'lt', } 

85 

86 def get_cmp(var, const, op): 

87 var_value, const_value = var.value, const.value 

88 if (var.astType == 'variable' and var_value in indexedcols 

89 and const.astType in ['constant', 'variable']): 

90 if const.astType == 'variable': 

91 const_value = (const_value, ) 

92 return (var_value, op, const_value) 

93 return None 

94 

95 def is_indexed_boolean(node): 

96 return (node.astType == 'variable' 

97 and node.astKind == 'bool' 

98 and node.value in indexedcols) 

99 

100 # Boolean variables are indexable by themselves. 

101 if is_indexed_boolean(exprnode): 

102 return (exprnode.value, 'eq', True) 

103 # And so are negations of boolean variables. 

104 if exprnode.astType == 'op' and exprnode.value == 'invert': 

105 child = exprnode.children[0] 

106 if is_indexed_boolean(child): 

107 return (child.value, 'eq', False) 

108 # A negation of an expression will be returned as ``~child``. 

109 # The indexability of the negated expression will be decided later on. 

110 if child.astKind == "bool": 

111 return (child, 'invert', None) 

112 

113 # Check node type. Only comparisons are indexable from now on. 

114 if exprnode.astType != 'op': 

115 return not_indexable 

116 cmpop = exprnode.value 

117 if cmpop not in turncmp: 

118 return not_indexable 

119 

120 # Look for a variable-constant comparison in both directions. 

121 left, right = exprnode.children 

122 cmp_ = get_cmp(left, right, cmpop) 

123 if cmp_: 

124 return cmp_ 

125 cmp_ = get_cmp(right, left, turncmp[cmpop]) 

126 if cmp_: 

127 return cmp_ 

128 

129 return not_indexable 

130 

131 

132def _equiv_expr_node(x, y): 

133 """Returns whether two ExpressionNodes are equivalent. 

134 

135 This is needed because '==' is overridden on ExpressionNode to 

136 return a new ExpressionNode. 

137 

138 """ 

139 if (not isinstance(x, ne.expressions.ExpressionNode) 

140 and not isinstance(y, ne.expressions.ExpressionNode)): 

141 return x == y 

142 elif (type(x) is not type(y) 

143 or not isinstance(x, ne.expressions.ExpressionNode) 

144 or not isinstance(y, ne.expressions.ExpressionNode) 

145 or x.value != y.value 

146 or x.astKind != y.astKind 

147 or len(x.children) != len(y.children)): 

148 return False 

149 for xchild, ychild in zip(x.children, y.children): 

150 if not _equiv_expr_node(xchild, ychild): 

151 return False 

152 return True 

153 

154 

155def _get_idx_expr_recurse(exprnode, indexedcols, idxexprs, strexpr): 

156 """Here lives the actual implementation of the get_idx_expr() wrapper. 

157 

158 'idxexprs' is a list of expressions in the form ``(var, (ops), 

159 (limits))``. 'strexpr' is the indexable expression in string format. 

160 These parameters will be received empty (i.e. [], ['']) for the 

161 first time and populated during the different recursive calls. 

162 Finally, they are returned in the last level to the original 

163 wrapper. If 'exprnode' is not indexable, it will return the tuple 

164 ([], ['']) so as to signal this. 

165 

166 """ 

167 

168 not_indexable = ([], ['']) 

169 op_conv = { 

170 'and': '&', 

171 'or': '|', 

172 'not': '~', 

173 } 

174 negcmp = { 

175 'lt': 'ge', 

176 'le': 'gt', 

177 'ge': 'lt', 

178 'gt': 'le', 

179 } 

180 

181 def fix_invert(idxcmp, exprnode, indexedcols): 

182 invert = False 

183 # Loop until all leading negations have been dealt with 

184 while idxcmp[1] == "invert": 

185 invert ^= True 

186 # The information about the negated node is in first position 

187 exprnode = idxcmp[0] 

188 idxcmp = _get_indexable_cmp(exprnode, indexedcols) 

189 return idxcmp, exprnode, invert 

190 

191 # Indexable variable-constant comparison. 

192 idxcmp = _get_indexable_cmp(exprnode, indexedcols) 

193 idxcmp, exprnode, invert = fix_invert(idxcmp, exprnode, indexedcols) 

194 if idxcmp[0]: 

195 if invert: 

196 var, op, value = idxcmp 

197 if op == 'eq' and value in [True, False]: 

198 # ``var`` must be a boolean index. Flip its value. 

199 value ^= True 

200 else: 

201 op = negcmp[op] 

202 expr = (var, (op,), (value,)) 

203 invert = False 

204 else: 

205 expr = (idxcmp[0], (idxcmp[1],), (idxcmp[2],)) 

206 return [expr] 

207 

208 # For now negations of complex expressions will be not supported as 

209 # forming part of an indexable condition. This might be supported in 

210 # the future. 

211 if invert: 

212 return not_indexable 

213 

214 # Only conjunctions and disjunctions of comparisons are considered 

215 # for the moment. 

216 if exprnode.astType != 'op' or exprnode.value not in ['and', 'or']: 

217 return not_indexable 

218 

219 left, right = exprnode.children 

220 # Get the expression at left 

221 lcolvar, lop, llim = _get_indexable_cmp(left, indexedcols) 

222 # Get the expression at right 

223 rcolvar, rop, rlim = _get_indexable_cmp(right, indexedcols) 

224 

225 # Use conjunction of indexable VC comparisons like 

226 # ``(a <[=] x) & (x <[=] b)`` or ``(a >[=] x) & (x >[=] b)`` 

227 # as ``a <[=] x <[=] b``, for the moment. 

228 op = exprnode.value 

229 if (lcolvar is not None and rcolvar is not None 

230 and _equiv_expr_node(lcolvar, rcolvar) and op == 'and'): 

231 if lop in ['gt', 'ge'] and rop in ['lt', 'le']: # l <= x <= r 

232 expr = (lcolvar, (lop, rop), (llim, rlim)) 

233 return [expr] 

234 if lop in ['lt', 'le'] and rop in ['gt', 'ge']: # l >= x >= r 

235 expr = (rcolvar, (rop, lop), (rlim, llim)) 

236 return [expr] 

237 

238 # Recursively get the expressions at the left and the right 

239 lexpr = _get_idx_expr_recurse(left, indexedcols, idxexprs, strexpr) 

240 rexpr = _get_idx_expr_recurse(right, indexedcols, idxexprs, strexpr) 

241 

242 def add_expr(expr, idxexprs, strexpr): 

243 """Add a single expression to the list.""" 

244 

245 if isinstance(expr, list): 

246 # expr is a single expression 

247 idxexprs.append(expr[0]) 

248 lenexprs = len(idxexprs) 

249 # Mutate the strexpr string 

250 if lenexprs == 1: 

251 strexpr[:] = ["e0"] 

252 else: 

253 strexpr[:] = [ 

254 "(%s %s e%d)" % (strexpr[0], op_conv[op], lenexprs - 1)] 

255 

256 # Add expressions to the indexable list when they are and'ed, or 

257 # they are both indexable. 

258 if lexpr != not_indexable and (op == "and" or rexpr != not_indexable): 

259 add_expr(lexpr, idxexprs, strexpr) 

260 if rexpr != not_indexable: 

261 add_expr(rexpr, idxexprs, strexpr) 

262 return (idxexprs, strexpr) 

263 if rexpr != not_indexable and op == "and": 

264 add_expr(rexpr, idxexprs, strexpr) 

265 return (idxexprs, strexpr) 

266 

267 # Can not use indexed column. 

268 return not_indexable 

269 

270 

271def _get_idx_expr(expr, indexedcols): 

272 """Extract an indexable expression out of `exprnode`. 

273 

274 Looks for variable-constant comparisons in the expression node 

275 `exprnode` involving variables in `indexedcols`. 

276 

277 It returns a tuple of (idxexprs, strexpr) where 'idxexprs' is a 

278 list of expressions in the form ``(var, (ops), (limits))`` and 

279 'strexpr' is the indexable expression in string format. 

280 

281 Expressions such as ``0 < c1 <= 1`` do not work as expected. 

282 

283 Right now only some of the *indexable comparisons* are considered: 

284 

285 * ``a <[=] x``, ``a == x`` and ``a >[=] x`` 

286 * ``(a <[=] x) & (y <[=] b)`` and ``(a == x) | (b == y)`` 

287 * ``~(~c_bool)``, ``~~c_bool`` and ``~(~c_bool) & (c_extra != 2)`` 

288 

289 (where ``a``, ``b`` and ``c_bool`` are indexed columns, but 

290 ``c_extra`` is not) 

291 

292 Particularly, the ``!=`` operator and negations of complex boolean 

293 expressions are *not considered* as valid candidates: 

294 

295 * ``a != 1`` and ``c_bool != False`` 

296 * ``~((a > 0) & (c_bool))`` 

297 

298 """ 

299 

300 return _get_idx_expr_recurse(expr, indexedcols, [], ['']) 

301 

302 

303class CompiledCondition: 

304 """Container for a compiled condition.""" 

305 

306 @lazyattr 

307 def index_variables(self): 

308 """The columns participating in the index expression.""" 

309 

310 idxexprs = self.index_expressions 

311 idxvars = [] 

312 for expr in idxexprs: 

313 idxvar = expr[0] 

314 if idxvar not in idxvars: 

315 idxvars.append(idxvar) 

316 return frozenset(idxvars) 

317 

318 def __init__(self, func, params, idxexprs, strexpr, **kwargs): 

319 self.function = func 

320 """The compiled function object corresponding to this condition.""" 

321 self.parameters = params 

322 """A list of parameter names for this condition.""" 

323 self.index_expressions = idxexprs 

324 """A list of expressions in the form ``(var, (ops), (limits))``.""" 

325 self.string_expression = strexpr 

326 """The indexable expression in string format.""" 

327 self.kwargs = kwargs 

328 """NumExpr kwargs (used to pass ex_uses_vml to numexpr)""" 

329 

330 def __repr__(self): 

331 return ("idxexprs: %s\nstrexpr: %s\nidxvars: %s" 

332 % (self.index_expressions, self.string_expression, 

333 self.index_variables)) 

334 

335 def with_replaced_vars(self, condvars): 

336 """Replace index limit variables with their values in-place. 

337 

338 A new compiled condition is returned. Values are taken from 

339 the `condvars` mapping and converted to Python scalars. 

340 """ 

341 

342 exprs = self.index_expressions 

343 exprs2 = [] 

344 for expr in exprs: 

345 idxlims = expr[2] # the limits are in third place 

346 limit_values = [] 

347 for idxlim in idxlims: 

348 if isinstance(idxlim, tuple): # variable 

349 idxlim = condvars[idxlim[0]] # look up value 

350 idxlim = idxlim.tolist() # convert back to Python 

351 limit_values.append(idxlim) 

352 # Add this replaced entry to the new exprs2 

353 var, ops, _ = expr 

354 exprs2.append((var, ops, tuple(limit_values))) 

355 # Create a new container for the converted values 

356 newcc = CompiledCondition( 

357 self.function, self.parameters, exprs2, self.string_expression, 

358 **self.kwargs) 

359 return newcc 

360 

361 

362def _get_variable_names(expression): 

363 """Return the list of variable names in the Numexpr `expression`.""" 

364 

365 names = [] 

366 stack = [expression] 

367 while stack: 

368 node = stack.pop() 

369 if node.astType == 'variable': 

370 names.append(node.value) 

371 elif hasattr(node, 'children'): 

372 stack.extend(node.children) 

373 return list(set(names)) # remove repeated names 

374 

375 

376def compile_condition(condition, typemap, indexedcols): 

377 """Compile a condition and extract usable index conditions. 

378 

379 Looks for variable-constant comparisons in the `condition` string 

380 involving the indexed columns whose variable names appear in 

381 `indexedcols`. The part of `condition` having usable indexes is 

382 returned as a compiled condition in a `CompiledCondition` container. 

383 

384 Expressions such as '0 < c1 <= 1' do not work as expected. The 

385 Numexpr types of *all* variables must be given in the `typemap` 

386 mapping. The ``function`` of the resulting `CompiledCondition` 

387 instance is a Numexpr function object, and the ``parameters`` list 

388 indicates the order of its parameters. 

389 

390 """ 

391 

392 # Get the expression tree and extract index conditions. 

393 expr = ne.necompiler.stringToExpression(condition, typemap, {}) 

394 if expr.astKind != 'bool': 

395 raise TypeError("condition ``%s`` does not have a boolean type" 

396 % condition) 

397 idxexprs = _get_idx_expr(expr, indexedcols) 

398 # Post-process the answer 

399 if isinstance(idxexprs, list): 

400 # Simple expression 

401 strexpr = ['e0'] 

402 else: 

403 # Complex expression 

404 idxexprs, strexpr = idxexprs 

405 # Get rid of the unneccessary list wrapper for strexpr 

406 strexpr = strexpr[0] 

407 

408 # Get the variable names used in the condition. 

409 # At the same time, build its signature. 

410 varnames = _get_variable_names(expr) 

411 signature = [(var, typemap[var]) for var in varnames] 

412 try: 

413 # See the comments in `numexpr.evaluate()` for the 

414 # reasons of inserting copy operators for unaligned, 

415 # *unidimensional* arrays. 

416 func = ne.necompiler.NumExpr(expr, signature) 

417 except NotImplementedError as nie: 

418 # Try to make this Numexpr error less cryptic. 

419 raise _unsupported_operation_error(nie) 

420 

421 _, ex_uses_vml = ne.necompiler.getExprNames(condition, {}) 

422 kwargs = {'ex_uses_vml': ex_uses_vml} 

423 

424 params = varnames 

425 # This is more comfortable to handle about than a tuple. 

426 return CompiledCondition(func, params, idxexprs, strexpr, **kwargs) 

427 

428 

429def call_on_recarr(func, params, recarr, param2arg=None, **kwargs): 

430 """Call `func` with `params` over `recarr`. 

431 

432 The `param2arg` function, when specified, is used to get an argument 

433 given a parameter name; otherwise, the parameter itself is used as 

434 an argument. When the argument is a `Column` object, the proper 

435 column from `recarr` is used as its value. 

436 

437 """ 

438 

439 args = [] 

440 for param in params: 

441 if param2arg: 

442 arg = param2arg(param) 

443 else: 

444 arg = param 

445 if hasattr(arg, 'pathname'): # looks like a column 

446 arg = get_nested_field(recarr, arg.pathname) 

447 args.append(arg) 

448 return func(*args, **kwargs)