1###################################################################
2# Numexpr - Fast numerical array expression evaluator for NumPy.
3#
4# License: MIT
5# Author: See AUTHORS.txt
6#
7# See LICENSE.txt and LICENSES/*.txt for details about copyright and
8# rights to use.
9####################################################################
10
11from typing import Optional, Dict
12import __future__
13import sys
14import os
15import threading
16import re
17
18import numpy
19
20is_cpu_amd_intel = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE
21from numexpr import interpreter, expressions, use_vml
22from numexpr.utils import CacheDict, ContextDict
23
24# Declare a double type that does not exist in Python space
25double = numpy.double
26double = numpy.double
27
28int_ = numpy.int32
29long_ = numpy.int64
30
31typecode_to_kind = {'b': 'bool', 'i': 'int', 'l': 'long', 'f': 'float', 'd': 'double',
32 'c': 'complex', 'n': 'none', 's': 'str'}
33kind_to_typecode = {'bool': 'b', 'int': 'i', 'long': 'l', 'float': 'f', 'double': 'd',
34 'complex': 'c', 'bytes': 's', 'str': 's', 'none': 'n'}
35type_to_typecode = {bool: 'b', int_: 'i', long_: 'l', float: 'f',
36 double: 'd', complex: 'c', bytes: 's', str: 's'}
37type_to_kind = expressions.type_to_kind
38kind_to_type = expressions.kind_to_type
39default_type = kind_to_type[expressions.default_kind]
40scalar_constant_kinds = list(kind_to_typecode.keys())
41
42# VML functions that are implemented in numexpr
43vml_functions = [
44 "div", # interp_body.cpp
45 "inv", # interp_body.cpp
46 "pow", # interp_body.cpp
47 # Keep the rest of this list in sync with the ones listed in functions.hpp
48 "sqrt",
49 "sin",
50 "cos",
51 "tan",
52 "arcsin",
53 "arccos",
54 "arctan",
55 "sinh",
56 "cosh",
57 "tanh",
58 "arcsinh",
59 "arccosh",
60 "arctanh",
61 "log",
62 "log1p",
63 "log10",
64 "exp",
65 "expm1",
66 "absolute",
67 "conjugate",
68 "arctan2",
69 "fmod",
70 "ceil",
71 "floor"
72 ]
73
74
75class ASTNode():
76 """Abstract Syntax Tree node.
77
78 Members:
79
80 astType -- type of node (op, constant, variable, raw, or alias)
81 astKind -- the type of the result (bool, float, etc.)
82 value -- value associated with this node.
83 An opcode, numerical value, a variable name, etc.
84 children -- the children below this node
85 reg -- the register assigned to the result for this node.
86 """
87 cmpnames = ['astType', 'astKind', 'value', 'children']
88
89 def __init__(self, astType='generic', astKind='unknown', value=None, children=()):
90 self.astType = astType
91 self.astKind = astKind
92 self.value = value
93 self.children = tuple(children)
94 self.reg = None
95
96 def __eq__(self, other):
97 if self.astType == 'alias':
98 self = self.value
99 if other.astType == 'alias':
100 other = other.value
101 if not isinstance(other, ASTNode):
102 return False
103 for name in self.cmpnames:
104 if getattr(self, name) != getattr(other, name):
105 return False
106 return True
107
108 def __lt__(self,other):
109 # RAM: this is a fix for issue #88 whereby sorting on constants
110 # that may be of astKind == 'complex' but type(self.value) == int or float
111 # Here we let NumPy sort as it will cast data properly for comparison
112 # when the Python built-ins will raise an error.
113 if self.astType == 'constant':
114 if self.astKind == other.astKind:
115 return numpy.array(self.value) < numpy.array(other.value)
116 return self.astKind < other.astKind
117 else:
118 raise TypeError('Sorting not implemented for astType: %s'%self.astType)
119
120 def __hash__(self):
121 if self.astType == 'alias':
122 self = self.value
123 return hash((self.astType, self.astKind, self.value, self.children))
124
125 def __str__(self):
126 return 'AST(%s, %s, %s, %s, %s)' % (self.astType, self.astKind,
127 self.value, self.children, self.reg)
128
129 def __repr__(self):
130 return '<AST object at %s>' % id(self)
131
132 def key(self):
133 return (self.astType, self.astKind, self.value, self.children)
134
135 def typecode(self):
136 return kind_to_typecode[self.astKind]
137
138 def postorderWalk(self):
139 for c in self.children:
140 for w in c.postorderWalk():
141 yield w
142 yield self
143
144 def allOf(self, *astTypes):
145 astTypes = set(astTypes)
146 for w in self.postorderWalk():
147 if w.astType in astTypes:
148 yield w
149
150
151def expressionToAST(ex):
152 """Take an expression tree made out of expressions.ExpressionNode,
153 and convert to an AST tree.
154
155 This is necessary as ExpressionNode overrides many methods to act
156 like a number.
157 """
158 return ASTNode(ex.astType, ex.astKind, ex.value,
159 [expressionToAST(c) for c in ex.children])
160
161
162def sigPerms(s):
163 """Generate all possible signatures derived by upcasting the given
164 signature.
165 """
166 codes = 'bilfdc'
167 if not s:
168 yield ''
169 elif s[0] in codes:
170 start = codes.index(s[0])
171 for x in codes[start:]:
172 for y in sigPerms(s[1:]):
173 yield x + y
174 elif s[0] == 's': # numbers shall not be cast to strings
175 for y in sigPerms(s[1:]):
176 yield 's' + y
177 else:
178 yield s
179
180
181def typeCompileAst(ast):
182 """Assign appropriate types to each node in the AST.
183
184 Will convert opcodes and functions to appropriate upcast version,
185 and add "cast" ops if needed.
186 """
187 children = list(ast.children)
188 if ast.astType == 'op':
189 retsig = ast.typecode()
190 basesig = ''.join(x.typecode() for x in list(ast.children))
191 # Find some operation that will work on an acceptable casting of args.
192 for sig in sigPerms(basesig):
193 value = (ast.value + '_' + retsig + sig).encode('ascii')
194 if value in interpreter.opcodes:
195 break
196 else:
197 for sig in sigPerms(basesig):
198 funcname = (ast.value + '_' + retsig + sig).encode('ascii')
199 if funcname in interpreter.funccodes:
200 value = ('func_%sn' % (retsig + sig)).encode('ascii')
201 children += [ASTNode('raw', 'none',
202 interpreter.funccodes[funcname])]
203 break
204 else:
205 raise NotImplementedError(
206 "couldn't find matching opcode for '%s'"
207 % (ast.value + '_' + retsig + basesig))
208 # First just cast constants, then cast variables if necessary:
209 for i, (have, want) in enumerate(zip(basesig, sig)):
210 if have != want:
211 kind = typecode_to_kind[want]
212 if children[i].astType == 'constant':
213 children[i] = ASTNode('constant', kind, children[i].value)
214 else:
215 opname = "cast"
216 children[i] = ASTNode('op', kind, opname, [children[i]])
217 else:
218 value = ast.value
219 children = ast.children
220 return ASTNode(ast.astType, ast.astKind, value,
221 [typeCompileAst(c) for c in children])
222
223
224class Register():
225 """Abstraction for a register in the VM.
226
227 Members:
228 node -- the AST node this corresponds to
229 temporary -- True if this isn't an input or output
230 immediate -- not a register, but an immediate value
231 n -- the physical register number.
232 None if no number assigned yet.
233 """
234
235 def __init__(self, astnode, temporary=False):
236 self.node = astnode
237 self.temporary = temporary
238 self.immediate = False
239 self.n = None
240
241 def __str__(self):
242 if self.temporary:
243 name = 'Temporary'
244 else:
245 name = 'Register'
246 return '%s(%s, %s, %s)' % (name, self.node.astType,
247 self.node.astKind, self.n,)
248
249 def __repr__(self):
250 return self.__str__()
251
252
253class Immediate(Register):
254 """Representation of an immediate (integer) operand, instead of
255 a register.
256 """
257
258 def __init__(self, astnode):
259 Register.__init__(self, astnode)
260 self.immediate = True
261
262 def __str__(self):
263 return 'Immediate(%d)' % (self.node.value,)
264
265
266_flow_pat = r'[\;\[\:]'
267_dunder_pat = r'(^|[^\w])__[\w]+__($|[^\w])'
268_attr_pat = r'\.\b(?!(real|imag|(\d*[eE]?[+-]?\d+)|\d*j)\b)'
269_blacklist_re = re.compile(f'{_flow_pat}|{_dunder_pat}|{_attr_pat}')
270
271def stringToExpression(s, types, context, sanitize: bool=True):
272 """Given a string, convert it to a tree of ExpressionNode's.
273 """
274 # sanitize the string for obvious attack vectors that NumExpr cannot
275 # parse into its homebrew AST. This is to protect the call to `eval` below.
276 # We forbid `;`, `:`. `[` and `__`, and attribute access via '.'.
277 # We cannot ban `.real` or `.imag` however...
278 # We also cannot ban `.\d*j`, where `\d*` is some digits (or none), e.g. 1.5j, 1.j
279 if sanitize:
280 no_whitespace = re.sub(r'\s+', '', s)
281 skip_quotes = re.sub(r'(\'[^\']*\')', '', no_whitespace)
282 if _blacklist_re.search(skip_quotes) is not None:
283 raise ValueError(f'Expression {s} has forbidden control characters.')
284
285 old_ctx = expressions._context.get_current_context()
286 try:
287 expressions._context.set_new_context(context)
288 # first compile to a code object to determine the names
289 if context.get('truediv', False):
290 flags = __future__.division.compiler_flag
291 else:
292 flags = 0
293 c = compile(s, '<expr>', 'eval', flags)
294 # make VariableNode's for the names
295 names = {}
296 for name in c.co_names:
297 if name == "None":
298 names[name] = None
299 elif name == "True":
300 names[name] = True
301 elif name == "False":
302 names[name] = False
303 else:
304 t = types.get(name, default_type)
305 names[name] = expressions.VariableNode(name, type_to_kind[t])
306 names.update(expressions.functions)
307
308 # now build the expression
309 ex = eval(c, names)
310
311 if expressions.isConstant(ex):
312 ex = expressions.ConstantNode(ex, expressions.getKind(ex))
313 elif not isinstance(ex, expressions.ExpressionNode):
314 raise TypeError("unsupported expression type: %s" % type(ex))
315 finally:
316 expressions._context.set_new_context(old_ctx)
317 return ex
318
319
320def isReduction(ast):
321 prefixes = (b'sum_', b'prod_', b'min_', b'max_')
322 return any(ast.value.startswith(p) for p in prefixes)
323
324
325def getInputOrder(ast, input_order=None):
326 """
327 Derive the input order of the variables in an expression.
328 """
329 variables = {}
330 for a in ast.allOf('variable'):
331 variables[a.value] = a
332 variable_names = set(variables.keys())
333
334 if input_order:
335 if variable_names != set(input_order):
336 raise ValueError(
337 "input names (%s) don't match those found in expression (%s)"
338 % (input_order, variable_names))
339
340 ordered_names = input_order
341 else:
342 ordered_names = list(variable_names)
343 ordered_names.sort()
344 ordered_variables = [variables[v] for v in ordered_names]
345 return ordered_variables
346
347
348def convertConstantToKind(x, kind):
349 # Exception for 'float' types that will return the NumPy float32 type
350 if kind == 'float':
351 return numpy.float32(x)
352 elif isinstance(x,str):
353 return x.encode('ascii')
354 return kind_to_type[kind](x)
355
356
357def getConstants(ast):
358 """
359 RAM: implemented magic method __lt__ for ASTNode to fix issues
360 #88 and #209. The following test code works now, as does the test suite.
361
362 import numexpr as ne
363 a = 1 + 3j; b = 5.0
364 ne.evaluate('a*2 + 15j - b')
365 """
366 constant_registers = set([node.reg for node in ast.allOf("constant")])
367 constants_order = sorted([r.node for r in constant_registers])
368 constants = [convertConstantToKind(a.value, a.astKind)
369 for a in constants_order]
370 return constants_order, constants
371
372
373def sortNodesByOrder(nodes, order):
374 order_map = {}
375 for i, (_, v, _) in enumerate(order):
376 order_map[v] = i
377 dec_nodes = [(order_map[n.value], n) for n in nodes]
378 dec_nodes.sort()
379 return [a[1] for a in dec_nodes]
380
381
382def assignLeafRegisters(inodes, registerMaker):
383 """
384 Assign new registers to each of the leaf nodes.
385 """
386 leafRegisters = {}
387 for node in inodes:
388 key = node.key()
389 if key in leafRegisters:
390 node.reg = leafRegisters[key]
391 else:
392 node.reg = leafRegisters[key] = registerMaker(node)
393
394
395def assignBranchRegisters(inodes, registerMaker):
396 """
397 Assign temporary registers to each of the branch nodes.
398 """
399 for node in inodes:
400 node.reg = registerMaker(node, temporary=True)
401
402
403def collapseDuplicateSubtrees(ast):
404 """
405 Common subexpression elimination.
406 """
407 seen = {}
408 aliases = []
409 for a in ast.allOf('op'):
410 if a in seen:
411 target = seen[a]
412 a.astType = 'alias'
413 a.value = target
414 a.children = ()
415 aliases.append(a)
416 else:
417 seen[a] = a
418 # Set values and registers so optimizeTemporariesAllocation
419 # doesn't get confused
420 for a in aliases:
421 while a.value.astType == 'alias':
422 a.value = a.value.value
423 return aliases
424
425
426def optimizeTemporariesAllocation(ast):
427 """
428 Attempt to minimize the number of temporaries needed, by reusing old ones.
429 """
430 nodes = [n for n in ast.postorderWalk() if n.reg.temporary]
431 users_of = dict((n.reg, set()) for n in nodes)
432
433 node_regs = dict((n, set(c.reg for c in n.children if c.reg.temporary))
434 for n in nodes)
435 if nodes and nodes[-1] is not ast:
436 nodes_to_check = nodes + [ast]
437 else:
438 nodes_to_check = nodes
439 for n in nodes_to_check:
440 for c in n.children:
441 if c.reg.temporary:
442 users_of[c.reg].add(n)
443
444 unused = dict([(tc, set()) for tc in scalar_constant_kinds])
445 for n in nodes:
446 for c in n.children:
447 reg = c.reg
448 if reg.temporary:
449 users = users_of[reg]
450 users.discard(n)
451 if not users:
452 unused[reg.node.astKind].add(reg)
453 if unused[n.astKind]:
454 reg = unused[n.astKind].pop()
455 users_of[reg] = users_of[n.reg]
456 n.reg = reg
457
458
459def setOrderedRegisterNumbers(order, start):
460 """
461 Given an order of nodes, assign register numbers.
462 """
463 for i, node in enumerate(order):
464 node.reg.n = start + i
465 return start + len(order)
466
467
468def setRegisterNumbersForTemporaries(ast, start):
469 """
470 Assign register numbers for temporary registers, keeping track of
471 aliases and handling immediate operands.
472 """
473 seen = 0
474 signature = ''
475 aliases = []
476 for node in ast.postorderWalk():
477 if node.astType == 'alias':
478 aliases.append(node)
479 node = node.value
480 if node.reg.immediate:
481 node.reg.n = node.value
482 continue
483 reg = node.reg
484 if reg.n is None:
485 reg.n = start + seen
486 seen += 1
487 signature += reg.node.typecode()
488 for node in aliases:
489 node.reg = node.value.reg
490 return start + seen, signature
491
492
493def convertASTtoThreeAddrForm(ast):
494 """
495 Convert an AST to a three address form.
496
497 Three address form is (op, reg1, reg2, reg3), where reg1 is the
498 destination of the result of the instruction.
499
500 I suppose this should be called three register form, but three
501 address form is found in compiler theory.
502 """
503 return [(node.value, node.reg) + tuple([c.reg for c in node.children])
504 for node in ast.allOf('op')]
505
506
507def compileThreeAddrForm(program):
508 """
509 Given a three address form of the program, compile it a string that
510 the VM understands.
511 """
512
513 def nToChr(reg):
514 if reg is None:
515 return b'\xff'
516 elif reg.n < 0:
517 raise ValueError("negative value for register number %s" % reg.n)
518 else:
519 return bytes([reg.n])
520
521 def quadrupleToString(opcode, store, a1=None, a2=None):
522 cop = chr(interpreter.opcodes[opcode]).encode('ascii')
523 cs = nToChr(store)
524 ca1 = nToChr(a1)
525 ca2 = nToChr(a2)
526 return cop + cs + ca1 + ca2
527
528 def toString(args):
529 while len(args) < 4:
530 args += (None,)
531 opcode, store, a1, a2 = args[:4]
532 s = quadrupleToString(opcode, store, a1, a2)
533 l = [s]
534 args = args[4:]
535 while args:
536 s = quadrupleToString(b'noop', *args[:3])
537 l.append(s)
538 args = args[3:]
539 return b''.join(l)
540
541 prog_str = b''.join([toString(t) for t in program])
542 return prog_str
543
544
545context_info = [
546 ('optimization', ('none', 'moderate', 'aggressive'), 'aggressive'),
547 ('truediv', (False, True, 'auto'), 'auto')
548]
549
550
551def getContext(kwargs, _frame_depth=1):
552 d = kwargs.copy()
553 context = {}
554 for name, allowed, default in context_info:
555 value = d.pop(name, default)
556 if value in allowed:
557 context[name] = value
558 else:
559 raise ValueError("'%s' must be one of %s" % (name, allowed))
560
561 if d:
562 raise ValueError("Unknown keyword argument '%s'" % d.popitem()[0])
563 if context['truediv'] == 'auto':
564 caller_globals = sys._getframe(_frame_depth + 1).f_globals
565 context['truediv'] = caller_globals.get('division', None) == __future__.division
566
567 return context
568
569
570def precompile(ex, signature=(), context={}, sanitize: bool=True):
571 """
572 Compile the expression to an intermediate form.
573 """
574 types = dict(signature)
575 input_order = [name for (name, type_) in signature]
576
577 if isinstance(ex, str):
578 ex = stringToExpression(ex, types, context, sanitize)
579
580 # the AST is like the expression, but the node objects don't have
581 # any odd interpretations
582
583 ast = expressionToAST(ex)
584
585 if ex.astType != 'op':
586 ast = ASTNode('op', value='copy', astKind=ex.astKind, children=(ast,))
587
588 ast = typeCompileAst(ast)
589
590 aliases = collapseDuplicateSubtrees(ast)
591
592 assignLeafRegisters(ast.allOf('raw'), Immediate)
593 assignLeafRegisters(ast.allOf('variable', 'constant'), Register)
594 assignBranchRegisters(ast.allOf('op'), Register)
595
596 # assign registers for aliases
597 for a in aliases:
598 a.reg = a.value.reg
599
600 input_order = getInputOrder(ast, input_order)
601 constants_order, constants = getConstants(ast)
602
603 if isReduction(ast):
604 ast.reg.temporary = False
605
606 optimizeTemporariesAllocation(ast)
607
608 ast.reg.temporary = False
609 r_output = 0
610 ast.reg.n = 0
611
612 r_inputs = r_output + 1
613 r_constants = setOrderedRegisterNumbers(input_order, r_inputs)
614 r_temps = setOrderedRegisterNumbers(constants_order, r_constants)
615 r_end, tempsig = setRegisterNumbersForTemporaries(ast, r_temps)
616
617 threeAddrProgram = convertASTtoThreeAddrForm(ast)
618 input_names = tuple([a.value for a in input_order])
619 signature = ''.join(type_to_typecode[types.get(x, default_type)]
620 for x in input_names)
621 return threeAddrProgram, signature, tempsig, constants, input_names
622
623
624def NumExpr(ex, signature=(), sanitize: bool=True, **kwargs):
625 """
626 Compile an expression built using E.<variable> variables to a function.
627
628 ex can also be specified as a string "2*a+3*b".
629
630 The order of the input variables and their types can be specified using the
631 signature parameter, which is a list of (name, type) pairs.
632
633 Returns a `NumExpr` object containing the compiled function.
634 """
635
636 # In that case _frame_depth is wrong (it should be 2) but it doesn't matter
637 # since it will not be used (because truediv='auto' has already been
638 # translated to either True or False).
639 _frame_depth = 1
640 context = getContext(kwargs, _frame_depth=_frame_depth)
641 threeAddrProgram, inputsig, tempsig, constants, input_names = precompile(ex, signature, context, sanitize=sanitize)
642 program = compileThreeAddrForm(threeAddrProgram)
643 return interpreter.NumExpr(inputsig.encode('ascii'),
644 tempsig.encode('ascii'),
645 program, constants, input_names)
646
647
648def disassemble(nex):
649 """
650 Given a NumExpr object, return a list which is the program disassembled.
651 """
652 rev_opcodes = {}
653 for op in interpreter.opcodes:
654 rev_opcodes[interpreter.opcodes[op]] = op
655 r_constants = 1 + len(nex.signature)
656 r_temps = r_constants + len(nex.constants)
657
658 def parseOp(op):
659 name, sig = [*op.rsplit(b'_', 1), ''][:2]
660 return name, sig
661
662 def getArg(pc, offset):
663 arg = nex.program[pc + (offset if offset < 4 else offset+1)]
664 _, sig = parseOp(rev_opcodes.get(nex.program[pc]))
665 try:
666 code = sig[offset - 1]
667 except IndexError:
668 return None
669
670 code = bytes([code])
671
672 if arg == 255:
673 return None
674 if code != b'n':
675 if arg == 0:
676 return b'r0'
677 elif arg < r_constants:
678 return ('r%d[%s]' % (arg, nex.input_names[arg - 1])).encode('ascii')
679 elif arg < r_temps:
680 return ('c%d[%s]' % (arg, nex.constants[arg - r_constants])).encode('ascii')
681 else:
682 return ('t%d' % (arg,)).encode('ascii')
683 else:
684 return arg
685
686 source = []
687 for pc in range(0, len(nex.program), 4):
688 op = rev_opcodes.get(nex.program[pc])
689 _, sig = parseOp(op)
690 parsed = [op]
691 for i in range(len(sig)):
692 parsed.append(getArg(pc, 1 + i))
693 while len(parsed) < 4:
694 parsed.append(None)
695 source.append(parsed)
696 return source
697
698
699def getType(a):
700 kind = a.dtype.kind
701 if kind == 'b':
702 return bool
703 if kind in 'iu':
704 if a.dtype.itemsize > 4:
705 return long_ # ``long`` is for integers of more than 32 bits
706 if kind == 'u' and a.dtype.itemsize == 4:
707 return long_ # use ``long`` here as an ``int`` is not enough
708 return int_
709 if kind == 'f':
710 if a.dtype.itemsize > 4:
711 return double # ``double`` is for floats of more than 32 bits
712 return float
713 if kind == 'c':
714 return complex
715 if kind == 'S':
716 return bytes
717 if kind == 'U':
718 raise ValueError('NumExpr 2 does not support Unicode as a dtype.')
719 raise ValueError("unknown type %s" % a.dtype.name)
720
721
722def getExprNames(text, context, sanitize: bool=True):
723 ex = stringToExpression(text, {}, context, sanitize)
724 ast = expressionToAST(ex)
725 input_order = getInputOrder(ast, None)
726 #try to figure out if vml operations are used by expression
727 if not use_vml:
728 ex_uses_vml = False
729 else:
730 for node in ast.postorderWalk():
731 if node.astType == 'op' and node.value in vml_functions:
732 ex_uses_vml = True
733 break
734 else:
735 ex_uses_vml = False
736
737 return [a.value for a in input_order], ex_uses_vml
738
739
740def getArguments(names, local_dict=None, global_dict=None, _frame_depth: int=2):
741 """
742 Get the arguments based on the names.
743 """
744 call_frame = sys._getframe(_frame_depth)
745
746 clear_local_dict = False
747 if local_dict is None:
748 local_dict = call_frame.f_locals
749 clear_local_dict = True
750 try:
751 frame_globals = call_frame.f_globals
752 if global_dict is None:
753 global_dict = frame_globals
754
755 # If `call_frame` is the top frame of the interpreter we can't clear its
756 # `local_dict`, because it is actually the `global_dict`.
757 clear_local_dict = clear_local_dict and not frame_globals is local_dict
758
759 arguments = []
760 for name in names:
761 try:
762 a = local_dict[name]
763 except KeyError:
764 a = global_dict[name]
765 arguments.append(numpy.asarray(a))
766 finally:
767 # If we generated local_dict via an explicit reference to f_locals,
768 # clear the dict to prevent creating extra ref counts in the caller's scope
769 # See https://github.com/pydata/numexpr/issues/310
770 if clear_local_dict and hasattr(local_dict, 'clear'):
771 local_dict.clear()
772
773 return arguments
774
775
776# Dictionaries for caching variable names and compiled expressions
777_names_cache = CacheDict(256)
778_numexpr_cache = CacheDict(256)
779_numexpr_last = ContextDict()
780evaluate_lock = threading.Lock()
781
782def validate(ex: str,
783 local_dict: Optional[Dict] = None,
784 global_dict: Optional[Dict] = None,
785 out: numpy.ndarray = None,
786 order: str = 'K',
787 casting: str = 'safe',
788 _frame_depth: int = 2,
789 sanitize: Optional[bool] = None,
790 **kwargs) -> Optional[Exception]:
791 r"""
792 Validate a NumExpr expression with the given `local_dict` or `locals()`.
793 Returns `None` on success and the Exception object if one occurs. Note that
794 you can proceed directly to call `re_evaluate()` if you use `validate()`
795 to sanitize your expressions and variables in advance.
796
797 Parameters
798 ----------
799 ex: str
800 a string forming an expression, like "2*a+3*b". The values for "a"
801 and "b" will by default be taken from the calling function's frame
802 (through use of sys._getframe()). Alternatively, they can be specified
803 using the 'local_dict' or 'global_dict' arguments.
804
805 local_dict: dictionary, optional
806 A dictionary that replaces the local operands in current frame.
807
808 global_dict: dictionary, optional
809 A dictionary that replaces the global operands in current frame.
810
811 out: NumPy array, optional
812 An existing array where the outcome is going to be stored. Care is
813 required so that this array has the same shape and type than the
814 actual outcome of the computation. Useful for avoiding unnecessary
815 new array allocations.
816
817 order: {'C', 'F', 'A', or 'K'}, optional
818 Controls the iteration order for operands. 'C' means C order, 'F'
819 means Fortran order, 'A' means 'F' order if all the arrays are
820 Fortran contiguous, 'C' order otherwise, and 'K' means as close to
821 the order the array elements appear in memory as possible. For
822 efficient computations, typically 'K'eep order (the default) is
823 desired.
824
825 casting: {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
826 Controls what kind of data casting may occur when making a copy or
827 buffering. Setting this to 'unsafe' is not recommended, as it can
828 adversely affect accumulations.
829
830 * 'no' means the data types should not be cast at all.
831 * 'equiv' means only byte-order changes are allowed.
832 * 'safe' means only casts which can preserve values are allowed.
833 * 'same_kind' means only safe casts or casts within a kind,
834 like float64 to float32, are allowed.
835 * 'unsafe' means any data conversions may be done.
836
837 sanitize: Optional[bool]
838 Both `validate` and by extension `evaluate` call `eval(ex)`, which is
839 potentially dangerous on unsanitized inputs. As such, NumExpr by default
840 performs simple sanitization, banning the character ':;[', the
841 dunder '__[\w+]__', and attribute access to all but '.real' and '.imag'.
842
843 Using `None` defaults to `True` unless the environment variable
844 `NUMEXPR_SANITIZE=0` is set, in which case the default is `False`.
845 Nominally this can be set via `os.environ` before `import numexpr`.
846
847 _frame_depth: int
848 The calling frame depth. Unless you are a NumExpr developer you should
849 not set this value.
850
851 Note
852 ----
853
854 """
855 global _numexpr_last
856
857 try:
858
859 if not isinstance(ex, str):
860 raise ValueError("must specify expression as a string")
861
862 if sanitize is None:
863 if 'NUMEXPR_SANITIZE' in os.environ:
864 sanitize = bool(int(os.environ['NUMEXPR_SANITIZE']))
865 else:
866 sanitize = True
867
868 # Get the names for this expression
869 context = getContext(kwargs)
870 expr_key = (ex, tuple(sorted(context.items())))
871 if expr_key not in _names_cache:
872 _names_cache[expr_key] = getExprNames(ex, context, sanitize=sanitize)
873 names, ex_uses_vml = _names_cache[expr_key]
874 arguments = getArguments(names, local_dict, global_dict, _frame_depth=_frame_depth)
875
876 # Create a signature
877 signature = [(name, getType(arg)) for (name, arg) in
878 zip(names, arguments)]
879
880 # Look up numexpr if possible.
881 numexpr_key = expr_key + (tuple(signature),)
882 try:
883 compiled_ex = _numexpr_cache[numexpr_key]
884 except KeyError:
885 compiled_ex = _numexpr_cache[numexpr_key] = NumExpr(ex, signature, sanitize=sanitize, **context)
886 kwargs = {'out': out, 'order': order, 'casting': casting,
887 'ex_uses_vml': ex_uses_vml}
888 _numexpr_last.set(ex=compiled_ex, argnames=names, kwargs=kwargs)
889 except Exception as e:
890 return e
891 return None
892
893def evaluate(ex: str,
894 local_dict: Optional[Dict] = None,
895 global_dict: Optional[Dict] = None,
896 out: numpy.ndarray = None,
897 order: str = 'K',
898 casting: str = 'safe',
899 sanitize: Optional[bool] = None,
900 _frame_depth: int = 3,
901 **kwargs) -> numpy.ndarray:
902 r"""
903 Evaluate a simple array expression element-wise using the virtual machine.
904
905 Parameters
906 ----------
907 ex: str
908 a string forming an expression, like "2*a+3*b". The values for "a"
909 and "b" will by default be taken from the calling function's frame
910 (through use of sys._getframe()). Alternatively, they can be specified
911 using the 'local_dict' or 'global_dict' arguments.
912
913 local_dict: dictionary, optional
914 A dictionary that replaces the local operands in current frame.
915
916 global_dict: dictionary, optional
917 A dictionary that replaces the global operands in current frame.
918
919 out: NumPy array, optional
920 An existing array where the outcome is going to be stored. Care is
921 required so that this array has the same shape and type than the
922 actual outcome of the computation. Useful for avoiding unnecessary
923 new array allocations.
924
925 order: {'C', 'F', 'A', or 'K'}, optional
926 Controls the iteration order for operands. 'C' means C order, 'F'
927 means Fortran order, 'A' means 'F' order if all the arrays are
928 Fortran contiguous, 'C' order otherwise, and 'K' means as close to
929 the order the array elements appear in memory as possible. For
930 efficient computations, typically 'K'eep order (the default) is
931 desired.
932
933 casting: {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
934 Controls what kind of data casting may occur when making a copy or
935 buffering. Setting this to 'unsafe' is not recommended, as it can
936 adversely affect accumulations.
937
938 * 'no' means the data types should not be cast at all.
939 * 'equiv' means only byte-order changes are allowed.
940 * 'safe' means only casts which can preserve values are allowed.
941 * 'same_kind' means only safe casts or casts within a kind,
942 like float64 to float32, are allowed.
943 * 'unsafe' means any data conversions may be done.
944
945 sanitize: bool
946 Both `validate` and by extension `evaluate` call `eval(ex)`, which is
947 potentially dangerous on unsanitized inputs. As such, NumExpr by default
948 performs simple sanitization, banning the character ':;[', the
949 dunder '__[\w+]__', and attribute access to all but '.real' and '.imag'.
950
951 Using `None` defaults to `True` unless the environment variable
952 `NUMEXPR_SANITIZE=0` is set, in which case the default is `False`.
953 Nominally this can be set via `os.environ` before `import numexpr`.
954
955 _frame_depth: int
956 The calling frame depth. Unless you are a NumExpr developer you should
957 not set this value.
958
959 Note
960 ----
961 Both `validate` and by extension `evaluate` call `eval(ex)`, which is
962 potentially dangerous on unsanitized inputs. As such, NumExpr does some
963 sanitization, banning the character ':;[', the dunder '__', and attribute
964 access to all but '.r' for real and '.i' for imag access to complex numbers.
965 """
966 # We could avoid code duplication if we called validate and then re_evaluate
967 # here, but they we have difficulties with the `sys.getframe(2)` call in
968 # `getArguments`
969 e = validate(ex, local_dict=local_dict, global_dict=global_dict,
970 out=out, order=order, casting=casting,
971 _frame_depth=_frame_depth, sanitize=sanitize, **kwargs)
972 if e is None:
973 return re_evaluate(local_dict=local_dict, global_dict=global_dict, _frame_depth=_frame_depth)
974 else:
975 raise e
976
977def re_evaluate(local_dict: Optional[Dict] = None,
978 global_dict: Optional[Dict] = None,
979 _frame_depth: int=2) -> numpy.ndarray:
980 """
981 Re-evaluate the previous executed array expression without any check.
982
983 This is meant for accelerating loops that are re-evaluating the same
984 expression repeatedly without changing anything else than the operands.
985 If unsure, use evaluate() which is safer.
986
987 Parameters
988 ----------
989 local_dict: dictionary, optional
990 A dictionary that replaces the local operands in current frame.
991 _frame_depth: int
992 The calling frame depth. Unless you are a NumExpr developer you should
993 not set this value.
994 """
995 global _numexpr_last
996
997 try:
998 compiled_ex = _numexpr_last['ex']
999 except KeyError:
1000 raise RuntimeError("A previous evaluate() execution was not found, please call `validate` or `evaluate` once before `re_evaluate`")
1001 argnames = _numexpr_last['argnames']
1002 args = getArguments(argnames, local_dict, global_dict, _frame_depth=_frame_depth)
1003 kwargs = _numexpr_last['kwargs']
1004 with evaluate_lock:
1005 return compiled_ex(*args, **kwargs)