1###################################################################
2# Numexpr - Fast numerical array expression evaluator for NumPy.
3#
4# License: MIT
5# Author: See AUTHORS.txt
6#
7# See LICENSE.txt and LICENSES/*.txt for details about copyright and
8# rights to use.
9####################################################################
10
11from typing import Optional, Dict
12import __future__
13import sys
14import os
15import threading
16import re
17
18import numpy
19
20is_cpu_amd_intel = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE
21from numexpr import interpreter, expressions, use_vml
22from numexpr.utils import CacheDict
23
24# Declare a double type that does not exist in Python space
25double = numpy.double
26double = numpy.double
27
28int_ = numpy.int32
29long_ = numpy.int64
30
31typecode_to_kind = {'b': 'bool', 'i': 'int', 'l': 'long', 'f': 'float', 'd': 'double',
32 'c': 'complex', 'n': 'none', 's': 'str'}
33kind_to_typecode = {'bool': 'b', 'int': 'i', 'long': 'l', 'float': 'f', 'double': 'd',
34 'complex': 'c', 'bytes': 's', 'str': 's', 'none': 'n'}
35type_to_typecode = {bool: 'b', int_: 'i', long_: 'l', float: 'f',
36 double: 'd', complex: 'c', bytes: 's', str: 's'}
37type_to_kind = expressions.type_to_kind
38kind_to_type = expressions.kind_to_type
39default_type = kind_to_type[expressions.default_kind]
40scalar_constant_kinds = list(kind_to_typecode.keys())
41
42# VML functions that are implemented in numexpr
43vml_functions = [
44 "div", # interp_body.cpp
45 "inv", # interp_body.cpp
46 "pow", # interp_body.cpp
47 # Keep the rest of this list in sync with the ones listed in functions.hpp
48 "sqrt",
49 "sin",
50 "cos",
51 "tan",
52 "arcsin",
53 "arccos",
54 "arctan",
55 "sinh",
56 "cosh",
57 "tanh",
58 "arcsinh",
59 "arccosh",
60 "arctanh",
61 "log",
62 "log1p",
63 "log10",
64 "exp",
65 "expm1",
66 "absolute",
67 "conjugate",
68 "arctan2",
69 "fmod",
70 "ceil",
71 "floor"
72 ]
73
74
75class ASTNode():
76 """Abstract Syntax Tree node.
77
78 Members:
79
80 astType -- type of node (op, constant, variable, raw, or alias)
81 astKind -- the type of the result (bool, float, etc.)
82 value -- value associated with this node.
83 An opcode, numerical value, a variable name, etc.
84 children -- the children below this node
85 reg -- the register assigned to the result for this node.
86 """
87 cmpnames = ['astType', 'astKind', 'value', 'children']
88
89 def __init__(self, astType='generic', astKind='unknown', value=None, children=()):
90 self.astType = astType
91 self.astKind = astKind
92 self.value = value
93 self.children = tuple(children)
94 self.reg = None
95
96 def __eq__(self, other):
97 if self.astType == 'alias':
98 self = self.value
99 if other.astType == 'alias':
100 other = other.value
101 if not isinstance(other, ASTNode):
102 return False
103 for name in self.cmpnames:
104 if getattr(self, name) != getattr(other, name):
105 return False
106 return True
107
108 def __lt__(self,other):
109 # RAM: this is a fix for issue #88 whereby sorting on constants
110 # that may be of astKind == 'complex' but type(self.value) == int or float
111 # Here we let NumPy sort as it will cast data properly for comparison
112 # when the Python built-ins will raise an error.
113 if self.astType == 'constant':
114 if self.astKind == other.astKind:
115 return numpy.array(self.value) < numpy.array(other.value)
116 return self.astKind < other.astKind
117 else:
118 raise TypeError('Sorting not implemented for astType: %s'%self.astType)
119
120 def __hash__(self):
121 if self.astType == 'alias':
122 self = self.value
123 return hash((self.astType, self.astKind, self.value, self.children))
124
125 def __str__(self):
126 return 'AST(%s, %s, %s, %s, %s)' % (self.astType, self.astKind,
127 self.value, self.children, self.reg)
128
129 def __repr__(self):
130 return '<AST object at %s>' % id(self)
131
132 def key(self):
133 return (self.astType, self.astKind, self.value, self.children)
134
135 def typecode(self):
136 return kind_to_typecode[self.astKind]
137
138 def postorderWalk(self):
139 for c in self.children:
140 for w in c.postorderWalk():
141 yield w
142 yield self
143
144 def allOf(self, *astTypes):
145 astTypes = set(astTypes)
146 for w in self.postorderWalk():
147 if w.astType in astTypes:
148 yield w
149
150
151def expressionToAST(ex):
152 """Take an expression tree made out of expressions.ExpressionNode,
153 and convert to an AST tree.
154
155 This is necessary as ExpressionNode overrides many methods to act
156 like a number.
157 """
158 return ASTNode(ex.astType, ex.astKind, ex.value,
159 [expressionToAST(c) for c in ex.children])
160
161
162def sigPerms(s):
163 """Generate all possible signatures derived by upcasting the given
164 signature.
165 """
166 codes = 'bilfdc'
167 if not s:
168 yield ''
169 elif s[0] in codes:
170 start = codes.index(s[0])
171 for x in codes[start:]:
172 for y in sigPerms(s[1:]):
173 yield x + y
174 elif s[0] == 's': # numbers shall not be cast to strings
175 for y in sigPerms(s[1:]):
176 yield 's' + y
177 else:
178 yield s
179
180
181def typeCompileAst(ast):
182 """Assign appropriate types to each node in the AST.
183
184 Will convert opcodes and functions to appropriate upcast version,
185 and add "cast" ops if needed.
186 """
187 children = list(ast.children)
188 if ast.astType == 'op':
189 retsig = ast.typecode()
190 basesig = ''.join(x.typecode() for x in list(ast.children))
191 # Find some operation that will work on an acceptable casting of args.
192 for sig in sigPerms(basesig):
193 value = (ast.value + '_' + retsig + sig).encode('ascii')
194 if value in interpreter.opcodes:
195 break
196 else:
197 for sig in sigPerms(basesig):
198 funcname = (ast.value + '_' + retsig + sig).encode('ascii')
199 if funcname in interpreter.funccodes:
200 value = ('func_%sn' % (retsig + sig)).encode('ascii')
201 children += [ASTNode('raw', 'none',
202 interpreter.funccodes[funcname])]
203 break
204 else:
205 raise NotImplementedError(
206 "couldn't find matching opcode for '%s'"
207 % (ast.value + '_' + retsig + basesig))
208 # First just cast constants, then cast variables if necessary:
209 for i, (have, want) in enumerate(zip(basesig, sig)):
210 if have != want:
211 kind = typecode_to_kind[want]
212 if children[i].astType == 'constant':
213 children[i] = ASTNode('constant', kind, children[i].value)
214 else:
215 opname = "cast"
216 children[i] = ASTNode('op', kind, opname, [children[i]])
217 else:
218 value = ast.value
219 children = ast.children
220 return ASTNode(ast.astType, ast.astKind, value,
221 [typeCompileAst(c) for c in children])
222
223
224class Register():
225 """Abstraction for a register in the VM.
226
227 Members:
228 node -- the AST node this corresponds to
229 temporary -- True if this isn't an input or output
230 immediate -- not a register, but an immediate value
231 n -- the physical register number.
232 None if no number assigned yet.
233 """
234
235 def __init__(self, astnode, temporary=False):
236 self.node = astnode
237 self.temporary = temporary
238 self.immediate = False
239 self.n = None
240
241 def __str__(self):
242 if self.temporary:
243 name = 'Temporary'
244 else:
245 name = 'Register'
246 return '%s(%s, %s, %s)' % (name, self.node.astType,
247 self.node.astKind, self.n,)
248
249 def __repr__(self):
250 return self.__str__()
251
252
253class Immediate(Register):
254 """Representation of an immediate (integer) operand, instead of
255 a register.
256 """
257
258 def __init__(self, astnode):
259 Register.__init__(self, astnode)
260 self.immediate = True
261
262 def __str__(self):
263 return 'Immediate(%d)' % (self.node.value,)
264
265
266_flow_pat = r'[\;\[\:]'
267_dunder_pat = r'__[\w]+__'
268_attr_pat = r'\.\b(?!(real|imag|[eE]?[+-]?\d+)\b)'
269_blacklist_re = re.compile(f'{_flow_pat}|{_dunder_pat}|{_attr_pat}')
270
271def stringToExpression(s, types, context, sanitize: bool=True):
272 """Given a string, convert it to a tree of ExpressionNode's.
273 """
274 # sanitize the string for obvious attack vectors that NumExpr cannot
275 # parse into its homebrew AST. This is to protect the call to `eval` below.
276 # We forbid `;`, `:`. `[` and `__`, and attribute access via '.'.
277 # We cannot ban `.real` or `.imag` however...
278 if sanitize:
279 no_whitespace = re.sub(r'\s+', '', s)
280 if _blacklist_re.search(no_whitespace) is not None:
281 raise ValueError(f'Expression {s} has forbidden control characters.')
282
283 old_ctx = expressions._context.get_current_context()
284 try:
285 expressions._context.set_new_context(context)
286 # first compile to a code object to determine the names
287 if context.get('truediv', False):
288 flags = __future__.division.compiler_flag
289 else:
290 flags = 0
291 c = compile(s, '<expr>', 'eval', flags)
292 # make VariableNode's for the names
293 names = {}
294 for name in c.co_names:
295 if name == "None":
296 names[name] = None
297 elif name == "True":
298 names[name] = True
299 elif name == "False":
300 names[name] = False
301 else:
302 t = types.get(name, default_type)
303 names[name] = expressions.VariableNode(name, type_to_kind[t])
304 names.update(expressions.functions)
305
306 # now build the expression
307 ex = eval(c, names)
308
309 if expressions.isConstant(ex):
310 ex = expressions.ConstantNode(ex, expressions.getKind(ex))
311 elif not isinstance(ex, expressions.ExpressionNode):
312 raise TypeError("unsupported expression type: %s" % type(ex))
313 finally:
314 expressions._context.set_new_context(old_ctx)
315 return ex
316
317
318def isReduction(ast):
319 prefixes = (b'sum_', b'prod_', b'min_', b'max_')
320 return any(ast.value.startswith(p) for p in prefixes)
321
322
323def getInputOrder(ast, input_order=None):
324 """
325 Derive the input order of the variables in an expression.
326 """
327 variables = {}
328 for a in ast.allOf('variable'):
329 variables[a.value] = a
330 variable_names = set(variables.keys())
331
332 if input_order:
333 if variable_names != set(input_order):
334 raise ValueError(
335 "input names (%s) don't match those found in expression (%s)"
336 % (input_order, variable_names))
337
338 ordered_names = input_order
339 else:
340 ordered_names = list(variable_names)
341 ordered_names.sort()
342 ordered_variables = [variables[v] for v in ordered_names]
343 return ordered_variables
344
345
346def convertConstantToKind(x, kind):
347 # Exception for 'float' types that will return the NumPy float32 type
348 if kind == 'float':
349 return numpy.float32(x)
350 elif isinstance(x,str):
351 return x.encode('ascii')
352 return kind_to_type[kind](x)
353
354
355def getConstants(ast):
356 """
357 RAM: implemented magic method __lt__ for ASTNode to fix issues
358 #88 and #209. The following test code works now, as does the test suite.
359
360 import numexpr as ne
361 a = 1 + 3j; b = 5.0
362 ne.evaluate('a*2 + 15j - b')
363 """
364 constant_registers = set([node.reg for node in ast.allOf("constant")])
365 constants_order = sorted([r.node for r in constant_registers])
366 constants = [convertConstantToKind(a.value, a.astKind)
367 for a in constants_order]
368 return constants_order, constants
369
370
371def sortNodesByOrder(nodes, order):
372 order_map = {}
373 for i, (_, v, _) in enumerate(order):
374 order_map[v] = i
375 dec_nodes = [(order_map[n.value], n) for n in nodes]
376 dec_nodes.sort()
377 return [a[1] for a in dec_nodes]
378
379
380def assignLeafRegisters(inodes, registerMaker):
381 """
382 Assign new registers to each of the leaf nodes.
383 """
384 leafRegisters = {}
385 for node in inodes:
386 key = node.key()
387 if key in leafRegisters:
388 node.reg = leafRegisters[key]
389 else:
390 node.reg = leafRegisters[key] = registerMaker(node)
391
392
393def assignBranchRegisters(inodes, registerMaker):
394 """
395 Assign temporary registers to each of the branch nodes.
396 """
397 for node in inodes:
398 node.reg = registerMaker(node, temporary=True)
399
400
401def collapseDuplicateSubtrees(ast):
402 """
403 Common subexpression elimination.
404 """
405 seen = {}
406 aliases = []
407 for a in ast.allOf('op'):
408 if a in seen:
409 target = seen[a]
410 a.astType = 'alias'
411 a.value = target
412 a.children = ()
413 aliases.append(a)
414 else:
415 seen[a] = a
416 # Set values and registers so optimizeTemporariesAllocation
417 # doesn't get confused
418 for a in aliases:
419 while a.value.astType == 'alias':
420 a.value = a.value.value
421 return aliases
422
423
424def optimizeTemporariesAllocation(ast):
425 """
426 Attempt to minimize the number of temporaries needed, by reusing old ones.
427 """
428 nodes = [n for n in ast.postorderWalk() if n.reg.temporary]
429 users_of = dict((n.reg, set()) for n in nodes)
430
431 node_regs = dict((n, set(c.reg for c in n.children if c.reg.temporary))
432 for n in nodes)
433 if nodes and nodes[-1] is not ast:
434 nodes_to_check = nodes + [ast]
435 else:
436 nodes_to_check = nodes
437 for n in nodes_to_check:
438 for c in n.children:
439 if c.reg.temporary:
440 users_of[c.reg].add(n)
441
442 unused = dict([(tc, set()) for tc in scalar_constant_kinds])
443 for n in nodes:
444 for c in n.children:
445 reg = c.reg
446 if reg.temporary:
447 users = users_of[reg]
448 users.discard(n)
449 if not users:
450 unused[reg.node.astKind].add(reg)
451 if unused[n.astKind]:
452 reg = unused[n.astKind].pop()
453 users_of[reg] = users_of[n.reg]
454 n.reg = reg
455
456
457def setOrderedRegisterNumbers(order, start):
458 """
459 Given an order of nodes, assign register numbers.
460 """
461 for i, node in enumerate(order):
462 node.reg.n = start + i
463 return start + len(order)
464
465
466def setRegisterNumbersForTemporaries(ast, start):
467 """
468 Assign register numbers for temporary registers, keeping track of
469 aliases and handling immediate operands.
470 """
471 seen = 0
472 signature = ''
473 aliases = []
474 for node in ast.postorderWalk():
475 if node.astType == 'alias':
476 aliases.append(node)
477 node = node.value
478 if node.reg.immediate:
479 node.reg.n = node.value
480 continue
481 reg = node.reg
482 if reg.n is None:
483 reg.n = start + seen
484 seen += 1
485 signature += reg.node.typecode()
486 for node in aliases:
487 node.reg = node.value.reg
488 return start + seen, signature
489
490
491def convertASTtoThreeAddrForm(ast):
492 """
493 Convert an AST to a three address form.
494
495 Three address form is (op, reg1, reg2, reg3), where reg1 is the
496 destination of the result of the instruction.
497
498 I suppose this should be called three register form, but three
499 address form is found in compiler theory.
500 """
501 return [(node.value, node.reg) + tuple([c.reg for c in node.children])
502 for node in ast.allOf('op')]
503
504
505def compileThreeAddrForm(program):
506 """
507 Given a three address form of the program, compile it a string that
508 the VM understands.
509 """
510
511 def nToChr(reg):
512 if reg is None:
513 return b'\xff'
514 elif reg.n < 0:
515 raise ValueError("negative value for register number %s" % reg.n)
516 else:
517 return bytes([reg.n])
518
519 def quadrupleToString(opcode, store, a1=None, a2=None):
520 cop = chr(interpreter.opcodes[opcode]).encode('ascii')
521 cs = nToChr(store)
522 ca1 = nToChr(a1)
523 ca2 = nToChr(a2)
524 return cop + cs + ca1 + ca2
525
526 def toString(args):
527 while len(args) < 4:
528 args += (None,)
529 opcode, store, a1, a2 = args[:4]
530 s = quadrupleToString(opcode, store, a1, a2)
531 l = [s]
532 args = args[4:]
533 while args:
534 s = quadrupleToString(b'noop', *args[:3])
535 l.append(s)
536 args = args[3:]
537 return b''.join(l)
538
539 prog_str = b''.join([toString(t) for t in program])
540 return prog_str
541
542
543context_info = [
544 ('optimization', ('none', 'moderate', 'aggressive'), 'aggressive'),
545 ('truediv', (False, True, 'auto'), 'auto')
546]
547
548
549def getContext(kwargs, _frame_depth=1):
550 d = kwargs.copy()
551 context = {}
552 for name, allowed, default in context_info:
553 value = d.pop(name, default)
554 if value in allowed:
555 context[name] = value
556 else:
557 raise ValueError("'%s' must be one of %s" % (name, allowed))
558
559 if d:
560 raise ValueError("Unknown keyword argument '%s'" % d.popitem()[0])
561 if context['truediv'] == 'auto':
562 caller_globals = sys._getframe(_frame_depth + 1).f_globals
563 context['truediv'] = caller_globals.get('division', None) == __future__.division
564
565 return context
566
567
568def precompile(ex, signature=(), context={}, sanitize: bool=True):
569 """
570 Compile the expression to an intermediate form.
571 """
572 types = dict(signature)
573 input_order = [name for (name, type_) in signature]
574
575 if isinstance(ex, str):
576 ex = stringToExpression(ex, types, context, sanitize)
577
578 # the AST is like the expression, but the node objects don't have
579 # any odd interpretations
580
581 ast = expressionToAST(ex)
582
583 if ex.astType != 'op':
584 ast = ASTNode('op', value='copy', astKind=ex.astKind, children=(ast,))
585
586 ast = typeCompileAst(ast)
587
588 aliases = collapseDuplicateSubtrees(ast)
589
590 assignLeafRegisters(ast.allOf('raw'), Immediate)
591 assignLeafRegisters(ast.allOf('variable', 'constant'), Register)
592 assignBranchRegisters(ast.allOf('op'), Register)
593
594 # assign registers for aliases
595 for a in aliases:
596 a.reg = a.value.reg
597
598 input_order = getInputOrder(ast, input_order)
599 constants_order, constants = getConstants(ast)
600
601 if isReduction(ast):
602 ast.reg.temporary = False
603
604 optimizeTemporariesAllocation(ast)
605
606 ast.reg.temporary = False
607 r_output = 0
608 ast.reg.n = 0
609
610 r_inputs = r_output + 1
611 r_constants = setOrderedRegisterNumbers(input_order, r_inputs)
612 r_temps = setOrderedRegisterNumbers(constants_order, r_constants)
613 r_end, tempsig = setRegisterNumbersForTemporaries(ast, r_temps)
614
615 threeAddrProgram = convertASTtoThreeAddrForm(ast)
616 input_names = tuple([a.value for a in input_order])
617 signature = ''.join(type_to_typecode[types.get(x, default_type)]
618 for x in input_names)
619 return threeAddrProgram, signature, tempsig, constants, input_names
620
621
622def NumExpr(ex, signature=(), sanitize: bool=True, **kwargs):
623 """
624 Compile an expression built using E.<variable> variables to a function.
625
626 ex can also be specified as a string "2*a+3*b".
627
628 The order of the input variables and their types can be specified using the
629 signature parameter, which is a list of (name, type) pairs.
630
631 Returns a `NumExpr` object containing the compiled function.
632 """
633
634 # In that case _frame_depth is wrong (it should be 2) but it doesn't matter
635 # since it will not be used (because truediv='auto' has already been
636 # translated to either True or False).
637 _frame_depth = 1
638 context = getContext(kwargs, _frame_depth=_frame_depth)
639 threeAddrProgram, inputsig, tempsig, constants, input_names = precompile(ex, signature, context, sanitize=sanitize)
640 program = compileThreeAddrForm(threeAddrProgram)
641 return interpreter.NumExpr(inputsig.encode('ascii'),
642 tempsig.encode('ascii'),
643 program, constants, input_names)
644
645
646def disassemble(nex):
647 """
648 Given a NumExpr object, return a list which is the program disassembled.
649 """
650 rev_opcodes = {}
651 for op in interpreter.opcodes:
652 rev_opcodes[interpreter.opcodes[op]] = op
653 r_constants = 1 + len(nex.signature)
654 r_temps = r_constants + len(nex.constants)
655
656 def parseOp(op):
657 name, sig = [*op.rsplit(b'_', 1), ''][:2]
658 return name, sig
659
660 def getArg(pc, offset):
661 arg = nex.program[pc + (offset if offset < 4 else offset+1)]
662 _, sig = parseOp(rev_opcodes.get(nex.program[pc]))
663 try:
664 code = sig[offset - 1]
665 except IndexError:
666 return None
667
668 code = bytes([code])
669
670 if arg == 255:
671 return None
672 if code != b'n':
673 if arg == 0:
674 return b'r0'
675 elif arg < r_constants:
676 return ('r%d[%s]' % (arg, nex.input_names[arg - 1])).encode('ascii')
677 elif arg < r_temps:
678 return ('c%d[%s]' % (arg, nex.constants[arg - r_constants])).encode('ascii')
679 else:
680 return ('t%d' % (arg,)).encode('ascii')
681 else:
682 return arg
683
684 source = []
685 for pc in range(0, len(nex.program), 4):
686 op = rev_opcodes.get(nex.program[pc])
687 _, sig = parseOp(op)
688 parsed = [op]
689 for i in range(len(sig)):
690 parsed.append(getArg(pc, 1 + i))
691 while len(parsed) < 4:
692 parsed.append(None)
693 source.append(parsed)
694 return source
695
696
697def getType(a):
698 kind = a.dtype.kind
699 if kind == 'b':
700 return bool
701 if kind in 'iu':
702 if a.dtype.itemsize > 4:
703 return long_ # ``long`` is for integers of more than 32 bits
704 if kind == 'u' and a.dtype.itemsize == 4:
705 return long_ # use ``long`` here as an ``int`` is not enough
706 return int_
707 if kind == 'f':
708 if a.dtype.itemsize > 4:
709 return double # ``double`` is for floats of more than 32 bits
710 return float
711 if kind == 'c':
712 return complex
713 if kind == 'S':
714 return bytes
715 if kind == 'U':
716 raise ValueError('NumExpr 2 does not support Unicode as a dtype.')
717 raise ValueError("unknown type %s" % a.dtype.name)
718
719
720def getExprNames(text, context, sanitize: bool=True):
721 ex = stringToExpression(text, {}, context, sanitize)
722 ast = expressionToAST(ex)
723 input_order = getInputOrder(ast, None)
724 #try to figure out if vml operations are used by expression
725 if not use_vml:
726 ex_uses_vml = False
727 else:
728 for node in ast.postorderWalk():
729 if node.astType == 'op' and node.value in vml_functions:
730 ex_uses_vml = True
731 break
732 else:
733 ex_uses_vml = False
734
735 return [a.value for a in input_order], ex_uses_vml
736
737
738def getArguments(names, local_dict=None, global_dict=None, _frame_depth: int=2):
739 """
740 Get the arguments based on the names.
741 """
742 call_frame = sys._getframe(_frame_depth)
743
744 clear_local_dict = False
745 if local_dict is None:
746 local_dict = call_frame.f_locals
747 clear_local_dict = True
748 try:
749 frame_globals = call_frame.f_globals
750 if global_dict is None:
751 global_dict = frame_globals
752
753 # If `call_frame` is the top frame of the interpreter we can't clear its
754 # `local_dict`, because it is actually the `global_dict`.
755 clear_local_dict = clear_local_dict and not frame_globals is local_dict
756
757 arguments = []
758 for name in names:
759 try:
760 a = local_dict[name]
761 except KeyError:
762 a = global_dict[name]
763 arguments.append(numpy.asarray(a))
764 finally:
765 # If we generated local_dict via an explicit reference to f_locals,
766 # clear the dict to prevent creating extra ref counts in the caller's scope
767 # See https://github.com/pydata/numexpr/issues/310
768 if clear_local_dict:
769 local_dict.clear()
770
771 return arguments
772
773
774# Dictionaries for caching variable names and compiled expressions
775_names_cache = CacheDict(256)
776_numexpr_cache = CacheDict(256)
777_numexpr_last = {}
778evaluate_lock = threading.Lock()
779
780# MAYBE: decorate this function to add attributes instead of having the
781# _numexpr_last dictionary?
782def validate(ex: str,
783 local_dict: Optional[Dict] = None,
784 global_dict: Optional[Dict] = None,
785 out: numpy.ndarray = None,
786 order: str = 'K',
787 casting: str = 'safe',
788 _frame_depth: int = 2,
789 sanitize: Optional[bool] = None,
790 **kwargs) -> Optional[Exception]:
791 r"""
792 Validate a NumExpr expression with the given `local_dict` or `locals()`.
793 Returns `None` on success and the Exception object if one occurs. Note that
794 you can proceed directly to call `re_evaluate()` if you use `validate()`
795 to sanitize your expressions and variables in advance.
796
797 Parameters
798 ----------
799 ex: str
800 a string forming an expression, like "2*a+3*b". The values for "a"
801 and "b" will by default be taken from the calling function's frame
802 (through use of sys._getframe()). Alternatively, they can be specified
803 using the 'local_dict' or 'global_dict' arguments.
804
805 local_dict: dictionary, optional
806 A dictionary that replaces the local operands in current frame.
807
808 global_dict: dictionary, optional
809 A dictionary that replaces the global operands in current frame.
810
811 out: NumPy array, optional
812 An existing array where the outcome is going to be stored. Care is
813 required so that this array has the same shape and type than the
814 actual outcome of the computation. Useful for avoiding unnecessary
815 new array allocations.
816
817 order: {'C', 'F', 'A', or 'K'}, optional
818 Controls the iteration order for operands. 'C' means C order, 'F'
819 means Fortran order, 'A' means 'F' order if all the arrays are
820 Fortran contiguous, 'C' order otherwise, and 'K' means as close to
821 the order the array elements appear in memory as possible. For
822 efficient computations, typically 'K'eep order (the default) is
823 desired.
824
825 casting: {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
826 Controls what kind of data casting may occur when making a copy or
827 buffering. Setting this to 'unsafe' is not recommended, as it can
828 adversely affect accumulations.
829
830 * 'no' means the data types should not be cast at all.
831 * 'equiv' means only byte-order changes are allowed.
832 * 'safe' means only casts which can preserve values are allowed.
833 * 'same_kind' means only safe casts or casts within a kind,
834 like float64 to float32, are allowed.
835 * 'unsafe' means any data conversions may be done.
836
837 sanitize: Optional[bool]
838 Both `validate` and by extension `evaluate` call `eval(ex)`, which is
839 potentially dangerous on unsanitized inputs. As such, NumExpr by default
840 performs simple sanitization, banning the character ':;[', the
841 dunder '__[\w+]__', and attribute access to all but '.real' and '.imag'.
842
843 Using `None` defaults to `True` unless the environment variable
844 `NUMEXPR_SANITIZE=0` is set, in which case the default is `False`.
845 Nominally this can be set via `os.environ` before `import numexpr`.
846
847 _frame_depth: int
848 The calling frame depth. Unless you are a NumExpr developer you should
849 not set this value.
850
851 Note
852 ----
853
854 """
855 global _numexpr_last
856
857 try:
858
859 if not isinstance(ex, str):
860 raise ValueError("must specify expression as a string")
861
862 if sanitize is None:
863 if 'NUMEXPR_SANITIZE' in os.environ:
864 sanitize = bool(int(os.environ['NUMEXPR_SANITIZE']))
865 else:
866 sanitize = True
867
868 # Get the names for this expression
869 context = getContext(kwargs)
870 expr_key = (ex, tuple(sorted(context.items())))
871 if expr_key not in _names_cache:
872 _names_cache[expr_key] = getExprNames(ex, context, sanitize=sanitize)
873 names, ex_uses_vml = _names_cache[expr_key]
874 arguments = getArguments(names, local_dict, global_dict, _frame_depth=_frame_depth)
875
876 # Create a signature
877 signature = [(name, getType(arg)) for (name, arg) in
878 zip(names, arguments)]
879
880 # Look up numexpr if possible.
881 numexpr_key = expr_key + (tuple(signature),)
882 try:
883 compiled_ex = _numexpr_cache[numexpr_key]
884 except KeyError:
885 compiled_ex = _numexpr_cache[numexpr_key] = NumExpr(ex, signature, sanitize=sanitize, **context)
886 kwargs = {'out': out, 'order': order, 'casting': casting,
887 'ex_uses_vml': ex_uses_vml}
888 _numexpr_last = dict(ex=compiled_ex, argnames=names, kwargs=kwargs)
889 except Exception as e:
890 return e
891 return None
892
893def evaluate(ex: str,
894 local_dict: Optional[Dict] = None,
895 global_dict: Optional[Dict] = None,
896 out: numpy.ndarray = None,
897 order: str = 'K',
898 casting: str = 'safe',
899 sanitize: Optional[bool] = None,
900 _frame_depth: int = 3,
901 **kwargs) -> numpy.ndarray:
902 r"""
903 Evaluate a simple array expression element-wise using the virtual machine.
904
905 Parameters
906 ----------
907 ex: str
908 a string forming an expression, like "2*a+3*b". The values for "a"
909 and "b" will by default be taken from the calling function's frame
910 (through use of sys._getframe()). Alternatively, they can be specified
911 using the 'local_dict' or 'global_dict' arguments.
912
913 local_dict: dictionary, optional
914 A dictionary that replaces the local operands in current frame.
915
916 global_dict: dictionary, optional
917 A dictionary that replaces the global operands in current frame.
918
919 out: NumPy array, optional
920 An existing array where the outcome is going to be stored. Care is
921 required so that this array has the same shape and type than the
922 actual outcome of the computation. Useful for avoiding unnecessary
923 new array allocations.
924
925 order: {'C', 'F', 'A', or 'K'}, optional
926 Controls the iteration order for operands. 'C' means C order, 'F'
927 means Fortran order, 'A' means 'F' order if all the arrays are
928 Fortran contiguous, 'C' order otherwise, and 'K' means as close to
929 the order the array elements appear in memory as possible. For
930 efficient computations, typically 'K'eep order (the default) is
931 desired.
932
933 casting: {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
934 Controls what kind of data casting may occur when making a copy or
935 buffering. Setting this to 'unsafe' is not recommended, as it can
936 adversely affect accumulations.
937
938 * 'no' means the data types should not be cast at all.
939 * 'equiv' means only byte-order changes are allowed.
940 * 'safe' means only casts which can preserve values are allowed.
941 * 'same_kind' means only safe casts or casts within a kind,
942 like float64 to float32, are allowed.
943 * 'unsafe' means any data conversions may be done.
944
945 sanitize: bool
946 Both `validate` and by extension `evaluate` call `eval(ex)`, which is
947 potentially dangerous on unsanitized inputs. As such, NumExpr by default
948 performs simple sanitization, banning the character ':;[', the
949 dunder '__[\w+]__', and attribute access to all but '.real' and '.imag'.
950
951 Using `None` defaults to `True` unless the environment variable
952 `NUMEXPR_SANITIZE=0` is set, in which case the default is `False`.
953 Nominally this can be set via `os.environ` before `import numexpr`.
954
955 _frame_depth: int
956 The calling frame depth. Unless you are a NumExpr developer you should
957 not set this value.
958
959 Note
960 ----
961 Both `validate` and by extension `evaluate` call `eval(ex)`, which is
962 potentially dangerous on unsanitized inputs. As such, NumExpr does some
963 sanitization, banning the character ':;[', the dunder '__', and attribute
964 access to all but '.r' for real and '.i' for imag access to complex numbers.
965 """
966 # We could avoid code duplication if we called validate and then re_evaluate
967 # here, but they we have difficulties with the `sys.getframe(2)` call in
968 # `getArguments`
969 e = validate(ex, local_dict=local_dict, global_dict=global_dict,
970 out=out, order=order, casting=casting,
971 _frame_depth=_frame_depth, sanitize=sanitize, **kwargs)
972 if e is None:
973 return re_evaluate(local_dict=local_dict, _frame_depth=_frame_depth)
974 else:
975 raise e
976
977def re_evaluate(local_dict: Optional[Dict] = None,
978 _frame_depth: int=2) -> numpy.ndarray:
979 """
980 Re-evaluate the previous executed array expression without any check.
981
982 This is meant for accelerating loops that are re-evaluating the same
983 expression repeatedly without changing anything else than the operands.
984 If unsure, use evaluate() which is safer.
985
986 Parameters
987 ----------
988 local_dict: dictionary, optional
989 A dictionary that replaces the local operands in current frame.
990 _frame_depth: int
991 The calling frame depth. Unless you are a NumExpr developer you should
992 not set this value.
993 """
994 global _numexpr_last
995
996 try:
997 compiled_ex = _numexpr_last['ex']
998 except KeyError:
999 raise RuntimeError("A previous evaluate() execution was not found, please call `validate` or `evaluate` once before `re_evaluate`")
1000 argnames = _numexpr_last['argnames']
1001 args = getArguments(argnames, local_dict, _frame_depth=_frame_depth)
1002 kwargs = _numexpr_last['kwargs']
1003 with evaluate_lock:
1004 return compiled_ex(*args, **kwargs)