1"""
2Functions for arithmetic and comparison operations on NumPy arrays and
3ExtensionArrays.
4"""
5from __future__ import annotations
6
7import datetime
8from functools import partial
9import operator
10from typing import (
11 TYPE_CHECKING,
12 Any,
13)
14import warnings
15
16import numpy as np
17
18from pandas._libs import (
19 NaT,
20 Timedelta,
21 Timestamp,
22 lib,
23 ops as libops,
24)
25from pandas._libs.tslibs import (
26 BaseOffset,
27 get_supported_dtype,
28 is_supported_dtype,
29 is_unitless,
30)
31from pandas.util._exceptions import find_stack_level
32
33from pandas.core.dtypes.cast import (
34 construct_1d_object_array_from_listlike,
35 find_common_type,
36)
37from pandas.core.dtypes.common import (
38 ensure_object,
39 is_bool_dtype,
40 is_list_like,
41 is_numeric_v_string_like,
42 is_object_dtype,
43 is_scalar,
44)
45from pandas.core.dtypes.generic import (
46 ABCExtensionArray,
47 ABCIndex,
48 ABCSeries,
49)
50from pandas.core.dtypes.missing import (
51 isna,
52 notna,
53)
54
55from pandas.core import roperator
56from pandas.core.computation import expressions
57from pandas.core.construction import ensure_wrapped_if_datetimelike
58from pandas.core.ops import missing
59from pandas.core.ops.dispatch import should_extension_dispatch
60from pandas.core.ops.invalid import invalid_comparison
61
62if TYPE_CHECKING:
63 from pandas._typing import (
64 ArrayLike,
65 Shape,
66 )
67
68# -----------------------------------------------------------------------------
69# Masking NA values and fallbacks for operations numpy does not support
70
71
72def fill_binop(left, right, fill_value):
73 """
74 If a non-None fill_value is given, replace null entries in left and right
75 with this value, but only in positions where _one_ of left/right is null,
76 not both.
77
78 Parameters
79 ----------
80 left : array-like
81 right : array-like
82 fill_value : object
83
84 Returns
85 -------
86 left : array-like
87 right : array-like
88
89 Notes
90 -----
91 Makes copies if fill_value is not None and NAs are present.
92 """
93 if fill_value is not None:
94 left_mask = isna(left)
95 right_mask = isna(right)
96
97 # one but not both
98 mask = left_mask ^ right_mask
99
100 if left_mask.any():
101 # Avoid making a copy if we can
102 left = left.copy()
103 left[left_mask & mask] = fill_value
104
105 if right_mask.any():
106 # Avoid making a copy if we can
107 right = right.copy()
108 right[right_mask & mask] = fill_value
109
110 return left, right
111
112
113def comp_method_OBJECT_ARRAY(op, x, y):
114 if isinstance(y, list):
115 # e.g. test_tuple_categories
116 y = construct_1d_object_array_from_listlike(y)
117
118 if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)):
119 if not is_object_dtype(y.dtype):
120 y = y.astype(np.object_)
121
122 if isinstance(y, (ABCSeries, ABCIndex)):
123 y = y._values
124
125 if x.shape != y.shape:
126 raise ValueError("Shapes must match", x.shape, y.shape)
127 result = libops.vec_compare(x.ravel(), y.ravel(), op)
128 else:
129 result = libops.scalar_compare(x.ravel(), y, op)
130 return result.reshape(x.shape)
131
132
133def _masked_arith_op(x: np.ndarray, y, op):
134 """
135 If the given arithmetic operation fails, attempt it again on
136 only the non-null elements of the input array(s).
137
138 Parameters
139 ----------
140 x : np.ndarray
141 y : np.ndarray, Series, Index
142 op : binary operator
143 """
144 # For Series `x` is 1D so ravel() is a no-op; calling it anyway makes
145 # the logic valid for both Series and DataFrame ops.
146 xrav = x.ravel()
147
148 if isinstance(y, np.ndarray):
149 dtype = find_common_type([x.dtype, y.dtype])
150 result = np.empty(x.size, dtype=dtype)
151
152 if len(x) != len(y):
153 raise ValueError(x.shape, y.shape)
154 ymask = notna(y)
155
156 # NB: ravel() is only safe since y is ndarray; for e.g. PeriodIndex
157 # we would get int64 dtype, see GH#19956
158 yrav = y.ravel()
159 mask = notna(xrav) & ymask.ravel()
160
161 # See GH#5284, GH#5035, GH#19448 for historical reference
162 if mask.any():
163 result[mask] = op(xrav[mask], yrav[mask])
164
165 else:
166 if not is_scalar(y):
167 raise TypeError(
168 f"Cannot broadcast np.ndarray with operand of type { type(y) }"
169 )
170
171 # mask is only meaningful for x
172 result = np.empty(x.size, dtype=x.dtype)
173 mask = notna(xrav)
174
175 # 1 ** np.nan is 1. So we have to unmask those.
176 if op is pow:
177 mask = np.where(x == 1, False, mask)
178 elif op is roperator.rpow:
179 mask = np.where(y == 1, False, mask)
180
181 if mask.any():
182 result[mask] = op(xrav[mask], y)
183
184 np.putmask(result, ~mask, np.nan)
185 result = result.reshape(x.shape) # 2D compat
186 return result
187
188
189def _na_arithmetic_op(left: np.ndarray, right, op, is_cmp: bool = False):
190 """
191 Return the result of evaluating op on the passed in values.
192
193 If native types are not compatible, try coercion to object dtype.
194
195 Parameters
196 ----------
197 left : np.ndarray
198 right : np.ndarray or scalar
199 Excludes DataFrame, Series, Index, ExtensionArray.
200 is_cmp : bool, default False
201 If this a comparison operation.
202
203 Returns
204 -------
205 array-like
206
207 Raises
208 ------
209 TypeError : invalid operation
210 """
211 if isinstance(right, str):
212 # can never use numexpr
213 func = op
214 else:
215 func = partial(expressions.evaluate, op)
216
217 try:
218 result = func(left, right)
219 except TypeError:
220 if not is_cmp and (
221 left.dtype == object or getattr(right, "dtype", None) == object
222 ):
223 # For object dtype, fallback to a masked operation (only operating
224 # on the non-missing values)
225 # Don't do this for comparisons, as that will handle complex numbers
226 # incorrectly, see GH#32047
227 result = _masked_arith_op(left, right, op)
228 else:
229 raise
230
231 if is_cmp and (is_scalar(result) or result is NotImplemented):
232 # numpy returned a scalar instead of operating element-wise
233 # e.g. numeric array vs str
234 # TODO: can remove this after dropping some future numpy version?
235 return invalid_comparison(left, right, op)
236
237 return missing.dispatch_fill_zeros(op, left, right, result)
238
239
240def arithmetic_op(left: ArrayLike, right: Any, op):
241 """
242 Evaluate an arithmetic operation `+`, `-`, `*`, `/`, `//`, `%`, `**`, ...
243
244 Note: the caller is responsible for ensuring that numpy warnings are
245 suppressed (with np.errstate(all="ignore")) if needed.
246
247 Parameters
248 ----------
249 left : np.ndarray or ExtensionArray
250 right : object
251 Cannot be a DataFrame or Index. Series is *not* excluded.
252 op : {operator.add, operator.sub, ...}
253 Or one of the reversed variants from roperator.
254
255 Returns
256 -------
257 ndarray or ExtensionArray
258 Or a 2-tuple of these in the case of divmod or rdivmod.
259 """
260 # NB: We assume that extract_array and ensure_wrapped_if_datetimelike
261 # have already been called on `left` and `right`,
262 # and `maybe_prepare_scalar_for_op` has already been called on `right`
263 # We need to special-case datetime64/timedelta64 dtypes (e.g. because numpy
264 # casts integer dtypes to timedelta64 when operating with timedelta64 - GH#22390)
265
266 if (
267 should_extension_dispatch(left, right)
268 or isinstance(right, (Timedelta, BaseOffset, Timestamp))
269 or right is NaT
270 ):
271 # Timedelta/Timestamp and other custom scalars are included in the check
272 # because numexpr will fail on it, see GH#31457
273 res_values = op(left, right)
274 else:
275 # TODO we should handle EAs consistently and move this check before the if/else
276 # (https://github.com/pandas-dev/pandas/issues/41165)
277 # error: Argument 2 to "_bool_arith_check" has incompatible type
278 # "Union[ExtensionArray, ndarray[Any, Any]]"; expected "ndarray[Any, Any]"
279 _bool_arith_check(op, left, right) # type: ignore[arg-type]
280
281 # error: Argument 1 to "_na_arithmetic_op" has incompatible type
282 # "Union[ExtensionArray, ndarray[Any, Any]]"; expected "ndarray[Any, Any]"
283 res_values = _na_arithmetic_op(left, right, op) # type: ignore[arg-type]
284
285 return res_values
286
287
288def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike:
289 """
290 Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`.
291
292 Note: the caller is responsible for ensuring that numpy warnings are
293 suppressed (with np.errstate(all="ignore")) if needed.
294
295 Parameters
296 ----------
297 left : np.ndarray or ExtensionArray
298 right : object
299 Cannot be a DataFrame, Series, or Index.
300 op : {operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le}
301
302 Returns
303 -------
304 ndarray or ExtensionArray
305 """
306 # NB: We assume extract_array has already been called on left and right
307 lvalues = ensure_wrapped_if_datetimelike(left)
308 rvalues = ensure_wrapped_if_datetimelike(right)
309
310 rvalues = lib.item_from_zerodim(rvalues)
311 if isinstance(rvalues, list):
312 # We don't catch tuple here bc we may be comparing e.g. MultiIndex
313 # to a tuple that represents a single entry, see test_compare_tuple_strs
314 rvalues = np.asarray(rvalues)
315
316 if isinstance(rvalues, (np.ndarray, ABCExtensionArray)):
317 # TODO: make this treatment consistent across ops and classes.
318 # We are not catching all listlikes here (e.g. frozenset, tuple)
319 # The ambiguous case is object-dtype. See GH#27803
320 if len(lvalues) != len(rvalues):
321 raise ValueError(
322 "Lengths must match to compare", lvalues.shape, rvalues.shape
323 )
324
325 if should_extension_dispatch(lvalues, rvalues) or (
326 (isinstance(rvalues, (Timedelta, BaseOffset, Timestamp)) or right is NaT)
327 and lvalues.dtype != object
328 ):
329 # Call the method on lvalues
330 res_values = op(lvalues, rvalues)
331
332 elif is_scalar(rvalues) and isna(rvalues): # TODO: but not pd.NA?
333 # numpy does not like comparisons vs None
334 if op is operator.ne:
335 res_values = np.ones(lvalues.shape, dtype=bool)
336 else:
337 res_values = np.zeros(lvalues.shape, dtype=bool)
338
339 elif is_numeric_v_string_like(lvalues, rvalues):
340 # GH#36377 going through the numexpr path would incorrectly raise
341 return invalid_comparison(lvalues, rvalues, op)
342
343 elif lvalues.dtype == object or isinstance(rvalues, str):
344 res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)
345
346 else:
347 res_values = _na_arithmetic_op(lvalues, rvalues, op, is_cmp=True)
348
349 return res_values
350
351
352def na_logical_op(x: np.ndarray, y, op):
353 try:
354 # For exposition, write:
355 # yarr = isinstance(y, np.ndarray)
356 # yint = is_integer(y) or (yarr and y.dtype.kind == "i")
357 # ybool = is_bool(y) or (yarr and y.dtype.kind == "b")
358 # xint = x.dtype.kind == "i"
359 # xbool = x.dtype.kind == "b"
360 # Then Cases where this goes through without raising include:
361 # (xint or xbool) and (yint or bool)
362 result = op(x, y)
363 except TypeError:
364 if isinstance(y, np.ndarray):
365 # bool-bool dtype operations should be OK, should not get here
366 assert not (x.dtype.kind == "b" and y.dtype.kind == "b")
367 x = ensure_object(x)
368 y = ensure_object(y)
369 result = libops.vec_binop(x.ravel(), y.ravel(), op)
370 else:
371 # let null fall thru
372 assert lib.is_scalar(y)
373 if not isna(y):
374 y = bool(y)
375 try:
376 result = libops.scalar_binop(x, y, op)
377 except (
378 TypeError,
379 ValueError,
380 AttributeError,
381 OverflowError,
382 NotImplementedError,
383 ) as err:
384 typ = type(y).__name__
385 raise TypeError(
386 f"Cannot perform '{op.__name__}' with a dtyped [{x.dtype}] array "
387 f"and scalar of type [{typ}]"
388 ) from err
389
390 return result.reshape(x.shape)
391
392
393def logical_op(left: ArrayLike, right: Any, op) -> ArrayLike:
394 """
395 Evaluate a logical operation `|`, `&`, or `^`.
396
397 Parameters
398 ----------
399 left : np.ndarray or ExtensionArray
400 right : object
401 Cannot be a DataFrame, Series, or Index.
402 op : {operator.and_, operator.or_, operator.xor}
403 Or one of the reversed variants from roperator.
404
405 Returns
406 -------
407 ndarray or ExtensionArray
408 """
409
410 def fill_bool(x, left=None):
411 # if `left` is specifically not-boolean, we do not cast to bool
412 if x.dtype.kind in "cfO":
413 # dtypes that can hold NA
414 mask = isna(x)
415 if mask.any():
416 x = x.astype(object)
417 x[mask] = False
418
419 if left is None or left.dtype.kind == "b":
420 x = x.astype(bool)
421 return x
422
423 right = lib.item_from_zerodim(right)
424 if is_list_like(right) and not hasattr(right, "dtype"):
425 # e.g. list, tuple
426 warnings.warn(
427 "Logical ops (and, or, xor) between Pandas objects and dtype-less "
428 "sequences (e.g. list, tuple) are deprecated and will raise in a "
429 "future version. Wrap the object in a Series, Index, or np.array "
430 "before operating instead.",
431 FutureWarning,
432 stacklevel=find_stack_level(),
433 )
434 right = construct_1d_object_array_from_listlike(right)
435
436 # NB: We assume extract_array has already been called on left and right
437 lvalues = ensure_wrapped_if_datetimelike(left)
438 rvalues = right
439
440 if should_extension_dispatch(lvalues, rvalues):
441 # Call the method on lvalues
442 res_values = op(lvalues, rvalues)
443
444 else:
445 if isinstance(rvalues, np.ndarray):
446 is_other_int_dtype = rvalues.dtype.kind in "iu"
447 if not is_other_int_dtype:
448 rvalues = fill_bool(rvalues, lvalues)
449
450 else:
451 # i.e. scalar
452 is_other_int_dtype = lib.is_integer(rvalues)
453
454 res_values = na_logical_op(lvalues, rvalues, op)
455
456 # For int vs int `^`, `|`, `&` are bitwise operators and return
457 # integer dtypes. Otherwise these are boolean ops
458 if not (left.dtype.kind in "iu" and is_other_int_dtype):
459 res_values = fill_bool(res_values)
460
461 return res_values
462
463
464def get_array_op(op):
465 """
466 Return a binary array operation corresponding to the given operator op.
467
468 Parameters
469 ----------
470 op : function
471 Binary operator from operator or roperator module.
472
473 Returns
474 -------
475 functools.partial
476 """
477 if isinstance(op, partial):
478 # We get here via dispatch_to_series in DataFrame case
479 # e.g. test_rolling_consistency_var_debiasing_factors
480 return op
481
482 op_name = op.__name__.strip("_").lstrip("r")
483 if op_name == "arith_op":
484 # Reached via DataFrame._combine_frame i.e. flex methods
485 # e.g. test_df_add_flex_filled_mixed_dtypes
486 return op
487
488 if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}:
489 return partial(comparison_op, op=op)
490 elif op_name in {"and", "or", "xor", "rand", "ror", "rxor"}:
491 return partial(logical_op, op=op)
492 elif op_name in {
493 "add",
494 "sub",
495 "mul",
496 "truediv",
497 "floordiv",
498 "mod",
499 "divmod",
500 "pow",
501 }:
502 return partial(arithmetic_op, op=op)
503 else:
504 raise NotImplementedError(op_name)
505
506
507def maybe_prepare_scalar_for_op(obj, shape: Shape):
508 """
509 Cast non-pandas objects to pandas types to unify behavior of arithmetic
510 and comparison operations.
511
512 Parameters
513 ----------
514 obj: object
515 shape : tuple[int]
516
517 Returns
518 -------
519 out : object
520
521 Notes
522 -----
523 Be careful to call this *after* determining the `name` attribute to be
524 attached to the result of the arithmetic operation.
525 """
526 if type(obj) is datetime.timedelta:
527 # GH#22390 cast up to Timedelta to rely on Timedelta
528 # implementation; otherwise operation against numeric-dtype
529 # raises TypeError
530 return Timedelta(obj)
531 elif type(obj) is datetime.datetime:
532 # cast up to Timestamp to rely on Timestamp implementation, see Timedelta above
533 return Timestamp(obj)
534 elif isinstance(obj, np.datetime64):
535 # GH#28080 numpy casts integer-dtype to datetime64 when doing
536 # array[int] + datetime64, which we do not allow
537 if isna(obj):
538 from pandas.core.arrays import DatetimeArray
539
540 # Avoid possible ambiguities with pd.NaT
541 # GH 52295
542 if is_unitless(obj.dtype):
543 obj = obj.astype("datetime64[ns]")
544 elif not is_supported_dtype(obj.dtype):
545 new_dtype = get_supported_dtype(obj.dtype)
546 obj = obj.astype(new_dtype)
547 right = np.broadcast_to(obj, shape)
548 return DatetimeArray._simple_new(right, dtype=right.dtype)
549
550 return Timestamp(obj)
551
552 elif isinstance(obj, np.timedelta64):
553 if isna(obj):
554 from pandas.core.arrays import TimedeltaArray
555
556 # wrapping timedelta64("NaT") in Timedelta returns NaT,
557 # which would incorrectly be treated as a datetime-NaT, so
558 # we broadcast and wrap in a TimedeltaArray
559 # GH 52295
560 if is_unitless(obj.dtype):
561 obj = obj.astype("timedelta64[ns]")
562 elif not is_supported_dtype(obj.dtype):
563 new_dtype = get_supported_dtype(obj.dtype)
564 obj = obj.astype(new_dtype)
565 right = np.broadcast_to(obj, shape)
566 return TimedeltaArray._simple_new(right, dtype=right.dtype)
567
568 # In particular non-nanosecond timedelta64 needs to be cast to
569 # nanoseconds, or else we get undesired behavior like
570 # np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D')
571 return Timedelta(obj)
572
573 # We want NumPy numeric scalars to behave like Python scalars
574 # post NEP 50
575 elif isinstance(obj, np.integer):
576 return int(obj)
577
578 elif isinstance(obj, np.floating):
579 return float(obj)
580
581 return obj
582
583
584_BOOL_OP_NOT_ALLOWED = {
585 operator.truediv,
586 roperator.rtruediv,
587 operator.floordiv,
588 roperator.rfloordiv,
589 operator.pow,
590 roperator.rpow,
591}
592
593
594def _bool_arith_check(op, a: np.ndarray, b):
595 """
596 In contrast to numpy, pandas raises an error for certain operations
597 with booleans.
598 """
599 if op in _BOOL_OP_NOT_ALLOWED:
600 if a.dtype.kind == "b" and (is_bool_dtype(b) or lib.is_bool(b)):
601 op_name = op.__name__.strip("_").lstrip("r")
602 raise NotImplementedError(
603 f"operator '{op_name}' not implemented for bool dtypes"
604 )