1"""
2Methods that can be shared by many array-like classes or subclasses:
3 Series
4 Index
5 ExtensionArray
6"""
7from __future__ import annotations
8
9import operator
10from typing import Any
11
12import numpy as np
13
14from pandas._libs import lib
15from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op
16
17from pandas.core.dtypes.generic import ABCNDFrame
18
19from pandas.core import roperator
20from pandas.core.construction import extract_array
21from pandas.core.ops.common import unpack_zerodim_and_defer
22
23REDUCTION_ALIASES = {
24 "maximum": "max",
25 "minimum": "min",
26 "add": "sum",
27 "multiply": "prod",
28}
29
30
31class OpsMixin:
32 # -------------------------------------------------------------
33 # Comparisons
34
35 def _cmp_method(self, other, op):
36 return NotImplemented
37
38 @unpack_zerodim_and_defer("__eq__")
39 def __eq__(self, other):
40 return self._cmp_method(other, operator.eq)
41
42 @unpack_zerodim_and_defer("__ne__")
43 def __ne__(self, other):
44 return self._cmp_method(other, operator.ne)
45
46 @unpack_zerodim_and_defer("__lt__")
47 def __lt__(self, other):
48 return self._cmp_method(other, operator.lt)
49
50 @unpack_zerodim_and_defer("__le__")
51 def __le__(self, other):
52 return self._cmp_method(other, operator.le)
53
54 @unpack_zerodim_and_defer("__gt__")
55 def __gt__(self, other):
56 return self._cmp_method(other, operator.gt)
57
58 @unpack_zerodim_and_defer("__ge__")
59 def __ge__(self, other):
60 return self._cmp_method(other, operator.ge)
61
62 # -------------------------------------------------------------
63 # Logical Methods
64
65 def _logical_method(self, other, op):
66 return NotImplemented
67
68 @unpack_zerodim_and_defer("__and__")
69 def __and__(self, other):
70 return self._logical_method(other, operator.and_)
71
72 @unpack_zerodim_and_defer("__rand__")
73 def __rand__(self, other):
74 return self._logical_method(other, roperator.rand_)
75
76 @unpack_zerodim_and_defer("__or__")
77 def __or__(self, other):
78 return self._logical_method(other, operator.or_)
79
80 @unpack_zerodim_and_defer("__ror__")
81 def __ror__(self, other):
82 return self._logical_method(other, roperator.ror_)
83
84 @unpack_zerodim_and_defer("__xor__")
85 def __xor__(self, other):
86 return self._logical_method(other, operator.xor)
87
88 @unpack_zerodim_and_defer("__rxor__")
89 def __rxor__(self, other):
90 return self._logical_method(other, roperator.rxor)
91
92 # -------------------------------------------------------------
93 # Arithmetic Methods
94
95 def _arith_method(self, other, op):
96 return NotImplemented
97
98 @unpack_zerodim_and_defer("__add__")
99 def __add__(self, other):
100 """
101 Get Addition of DataFrame and other, column-wise.
102
103 Equivalent to ``DataFrame.add(other)``.
104
105 Parameters
106 ----------
107 other : scalar, sequence, Series, dict or DataFrame
108 Object to be added to the DataFrame.
109
110 Returns
111 -------
112 DataFrame
113 The result of adding ``other`` to DataFrame.
114
115 See Also
116 --------
117 DataFrame.add : Add a DataFrame and another object, with option for index-
118 or column-oriented addition.
119
120 Examples
121 --------
122 >>> df = pd.DataFrame({'height': [1.5, 2.6], 'weight': [500, 800]},
123 ... index=['elk', 'moose'])
124 >>> df
125 height weight
126 elk 1.5 500
127 moose 2.6 800
128
129 Adding a scalar affects all rows and columns.
130
131 >>> df[['height', 'weight']] + 1.5
132 height weight
133 elk 3.0 501.5
134 moose 4.1 801.5
135
136 Each element of a list is added to a column of the DataFrame, in order.
137
138 >>> df[['height', 'weight']] + [0.5, 1.5]
139 height weight
140 elk 2.0 501.5
141 moose 3.1 801.5
142
143 Keys of a dictionary are aligned to the DataFrame, based on column names;
144 each value in the dictionary is added to the corresponding column.
145
146 >>> df[['height', 'weight']] + {'height': 0.5, 'weight': 1.5}
147 height weight
148 elk 2.0 501.5
149 moose 3.1 801.5
150
151 When `other` is a :class:`Series`, the index of `other` is aligned with the
152 columns of the DataFrame.
153
154 >>> s1 = pd.Series([0.5, 1.5], index=['weight', 'height'])
155 >>> df[['height', 'weight']] + s1
156 height weight
157 elk 3.0 500.5
158 moose 4.1 800.5
159
160 Even when the index of `other` is the same as the index of the DataFrame,
161 the :class:`Series` will not be reoriented. If index-wise alignment is desired,
162 :meth:`DataFrame.add` should be used with `axis='index'`.
163
164 >>> s2 = pd.Series([0.5, 1.5], index=['elk', 'moose'])
165 >>> df[['height', 'weight']] + s2
166 elk height moose weight
167 elk NaN NaN NaN NaN
168 moose NaN NaN NaN NaN
169
170 >>> df[['height', 'weight']].add(s2, axis='index')
171 height weight
172 elk 2.0 500.5
173 moose 4.1 801.5
174
175 When `other` is a :class:`DataFrame`, both columns names and the
176 index are aligned.
177
178 >>> other = pd.DataFrame({'height': [0.2, 0.4, 0.6]},
179 ... index=['elk', 'moose', 'deer'])
180 >>> df[['height', 'weight']] + other
181 height weight
182 deer NaN NaN
183 elk 1.7 NaN
184 moose 3.0 NaN
185 """
186 return self._arith_method(other, operator.add)
187
188 @unpack_zerodim_and_defer("__radd__")
189 def __radd__(self, other):
190 return self._arith_method(other, roperator.radd)
191
192 @unpack_zerodim_and_defer("__sub__")
193 def __sub__(self, other):
194 return self._arith_method(other, operator.sub)
195
196 @unpack_zerodim_and_defer("__rsub__")
197 def __rsub__(self, other):
198 return self._arith_method(other, roperator.rsub)
199
200 @unpack_zerodim_and_defer("__mul__")
201 def __mul__(self, other):
202 return self._arith_method(other, operator.mul)
203
204 @unpack_zerodim_and_defer("__rmul__")
205 def __rmul__(self, other):
206 return self._arith_method(other, roperator.rmul)
207
208 @unpack_zerodim_and_defer("__truediv__")
209 def __truediv__(self, other):
210 return self._arith_method(other, operator.truediv)
211
212 @unpack_zerodim_and_defer("__rtruediv__")
213 def __rtruediv__(self, other):
214 return self._arith_method(other, roperator.rtruediv)
215
216 @unpack_zerodim_and_defer("__floordiv__")
217 def __floordiv__(self, other):
218 return self._arith_method(other, operator.floordiv)
219
220 @unpack_zerodim_and_defer("__rfloordiv")
221 def __rfloordiv__(self, other):
222 return self._arith_method(other, roperator.rfloordiv)
223
224 @unpack_zerodim_and_defer("__mod__")
225 def __mod__(self, other):
226 return self._arith_method(other, operator.mod)
227
228 @unpack_zerodim_and_defer("__rmod__")
229 def __rmod__(self, other):
230 return self._arith_method(other, roperator.rmod)
231
232 @unpack_zerodim_and_defer("__divmod__")
233 def __divmod__(self, other):
234 return self._arith_method(other, divmod)
235
236 @unpack_zerodim_and_defer("__rdivmod__")
237 def __rdivmod__(self, other):
238 return self._arith_method(other, roperator.rdivmod)
239
240 @unpack_zerodim_and_defer("__pow__")
241 def __pow__(self, other):
242 return self._arith_method(other, operator.pow)
243
244 @unpack_zerodim_and_defer("__rpow__")
245 def __rpow__(self, other):
246 return self._arith_method(other, roperator.rpow)
247
248
249# -----------------------------------------------------------------------------
250# Helpers to implement __array_ufunc__
251
252
253def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any):
254 """
255 Compatibility with numpy ufuncs.
256
257 See also
258 --------
259 numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__
260 """
261 from pandas.core.frame import (
262 DataFrame,
263 Series,
264 )
265 from pandas.core.generic import NDFrame
266 from pandas.core.internals import (
267 ArrayManager,
268 BlockManager,
269 )
270
271 cls = type(self)
272
273 kwargs = _standardize_out_kwarg(**kwargs)
274
275 # for binary ops, use our custom dunder methods
276 result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs)
277 if result is not NotImplemented:
278 return result
279
280 # Determine if we should defer.
281 no_defer = (
282 np.ndarray.__array_ufunc__,
283 cls.__array_ufunc__,
284 )
285
286 for item in inputs:
287 higher_priority = (
288 hasattr(item, "__array_priority__")
289 and item.__array_priority__ > self.__array_priority__
290 )
291 has_array_ufunc = (
292 hasattr(item, "__array_ufunc__")
293 and type(item).__array_ufunc__ not in no_defer
294 and not isinstance(item, self._HANDLED_TYPES)
295 )
296 if higher_priority or has_array_ufunc:
297 return NotImplemented
298
299 # align all the inputs.
300 types = tuple(type(x) for x in inputs)
301 alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)]
302
303 if len(alignable) > 1:
304 # This triggers alignment.
305 # At the moment, there aren't any ufuncs with more than two inputs
306 # so this ends up just being x1.index | x2.index, but we write
307 # it to handle *args.
308 set_types = set(types)
309 if len(set_types) > 1 and {DataFrame, Series}.issubset(set_types):
310 # We currently don't handle ufunc(DataFrame, Series)
311 # well. Previously this raised an internal ValueError. We might
312 # support it someday, so raise a NotImplementedError.
313 raise NotImplementedError(
314 f"Cannot apply ufunc {ufunc} to mixed DataFrame and Series inputs."
315 )
316 axes = self.axes
317 for obj in alignable[1:]:
318 # this relies on the fact that we aren't handling mixed
319 # series / frame ufuncs.
320 for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)):
321 axes[i] = ax1.union(ax2)
322
323 reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes))
324 inputs = tuple(
325 x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x
326 for x, t in zip(inputs, types)
327 )
328 else:
329 reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes))
330
331 if self.ndim == 1:
332 names = [getattr(x, "name") for x in inputs if hasattr(x, "name")]
333 name = names[0] if len(set(names)) == 1 else None
334 reconstruct_kwargs = {"name": name}
335 else:
336 reconstruct_kwargs = {}
337
338 def reconstruct(result):
339 if ufunc.nout > 1:
340 # np.modf, np.frexp, np.divmod
341 return tuple(_reconstruct(x) for x in result)
342
343 return _reconstruct(result)
344
345 def _reconstruct(result):
346 if lib.is_scalar(result):
347 return result
348
349 if result.ndim != self.ndim:
350 if method == "outer":
351 raise NotImplementedError
352 return result
353 if isinstance(result, (BlockManager, ArrayManager)):
354 # we went through BlockManager.apply e.g. np.sqrt
355 result = self._constructor_from_mgr(result, axes=result.axes)
356 else:
357 # we converted an array, lost our axes
358 result = self._constructor(
359 result, **reconstruct_axes, **reconstruct_kwargs, copy=False
360 )
361 # TODO: When we support multiple values in __finalize__, this
362 # should pass alignable to `__finalize__` instead of self.
363 # Then `np.add(a, b)` would consider attrs from both a and b
364 # when a and b are NDFrames.
365 if len(alignable) == 1:
366 result = result.__finalize__(self)
367 return result
368
369 if "out" in kwargs:
370 # e.g. test_multiindex_get_loc
371 result = dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs)
372 return reconstruct(result)
373
374 if method == "reduce":
375 # e.g. test.series.test_ufunc.test_reduce
376 result = dispatch_reduction_ufunc(self, ufunc, method, *inputs, **kwargs)
377 if result is not NotImplemented:
378 return result
379
380 # We still get here with kwargs `axis` for e.g. np.maximum.accumulate
381 # and `dtype` and `keepdims` for np.ptp
382
383 if self.ndim > 1 and (len(inputs) > 1 or ufunc.nout > 1):
384 # Just give up on preserving types in the complex case.
385 # In theory we could preserve them for them.
386 # * nout>1 is doable if BlockManager.apply took nout and
387 # returned a Tuple[BlockManager].
388 # * len(inputs) > 1 is doable when we know that we have
389 # aligned blocks / dtypes.
390
391 # e.g. my_ufunc, modf, logaddexp, heaviside, subtract, add
392 inputs = tuple(np.asarray(x) for x in inputs)
393 # Note: we can't use default_array_ufunc here bc reindexing means
394 # that `self` may not be among `inputs`
395 result = getattr(ufunc, method)(*inputs, **kwargs)
396 elif self.ndim == 1:
397 # ufunc(series, ...)
398 inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs)
399 result = getattr(ufunc, method)(*inputs, **kwargs)
400 else:
401 # ufunc(dataframe)
402 if method == "__call__" and not kwargs:
403 # for np.<ufunc>(..) calls
404 # kwargs cannot necessarily be handled block-by-block, so only
405 # take this path if there are no kwargs
406 mgr = inputs[0]._mgr
407 result = mgr.apply(getattr(ufunc, method))
408 else:
409 # otherwise specific ufunc methods (eg np.<ufunc>.accumulate(..))
410 # Those can have an axis keyword and thus can't be called block-by-block
411 result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs)
412 # e.g. np.negative (only one reached), with "where" and "out" in kwargs
413
414 result = reconstruct(result)
415 return result
416
417
418def _standardize_out_kwarg(**kwargs) -> dict:
419 """
420 If kwargs contain "out1" and "out2", replace that with a tuple "out"
421
422 np.divmod, np.modf, np.frexp can have either `out=(out1, out2)` or
423 `out1=out1, out2=out2)`
424 """
425 if "out" not in kwargs and "out1" in kwargs and "out2" in kwargs:
426 out1 = kwargs.pop("out1")
427 out2 = kwargs.pop("out2")
428 out = (out1, out2)
429 kwargs["out"] = out
430 return kwargs
431
432
433def dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
434 """
435 If we have an `out` keyword, then call the ufunc without `out` and then
436 set the result into the given `out`.
437 """
438
439 # Note: we assume _standardize_out_kwarg has already been called.
440 out = kwargs.pop("out")
441 where = kwargs.pop("where", None)
442
443 result = getattr(ufunc, method)(*inputs, **kwargs)
444
445 if result is NotImplemented:
446 return NotImplemented
447
448 if isinstance(result, tuple):
449 # i.e. np.divmod, np.modf, np.frexp
450 if not isinstance(out, tuple) or len(out) != len(result):
451 raise NotImplementedError
452
453 for arr, res in zip(out, result):
454 _assign_where(arr, res, where)
455
456 return out
457
458 if isinstance(out, tuple):
459 if len(out) == 1:
460 out = out[0]
461 else:
462 raise NotImplementedError
463
464 _assign_where(out, result, where)
465 return out
466
467
468def _assign_where(out, result, where) -> None:
469 """
470 Set a ufunc result into 'out', masking with a 'where' argument if necessary.
471 """
472 if where is None:
473 # no 'where' arg passed to ufunc
474 out[:] = result
475 else:
476 np.putmask(out, where, result)
477
478
479def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
480 """
481 Fallback to the behavior we would get if we did not define __array_ufunc__.
482
483 Notes
484 -----
485 We are assuming that `self` is among `inputs`.
486 """
487 if not any(x is self for x in inputs):
488 raise NotImplementedError
489
490 new_inputs = [x if x is not self else np.asarray(x) for x in inputs]
491
492 return getattr(ufunc, method)(*new_inputs, **kwargs)
493
494
495def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
496 """
497 Dispatch ufunc reductions to self's reduction methods.
498 """
499 assert method == "reduce"
500
501 if len(inputs) != 1 or inputs[0] is not self:
502 return NotImplemented
503
504 if ufunc.__name__ not in REDUCTION_ALIASES:
505 return NotImplemented
506
507 method_name = REDUCTION_ALIASES[ufunc.__name__]
508
509 # NB: we are assuming that min/max represent minimum/maximum methods,
510 # which would not be accurate for e.g. Timestamp.min
511 if not hasattr(self, method_name):
512 return NotImplemented
513
514 if self.ndim > 1:
515 if isinstance(self, ABCNDFrame):
516 # TODO: test cases where this doesn't hold, i.e. 2D DTA/TDA
517 kwargs["numeric_only"] = False
518
519 if "axis" not in kwargs:
520 # For DataFrame reductions we don't want the default axis=0
521 # Note: np.min is not a ufunc, but uses array_function_dispatch,
522 # so calls DataFrame.min (without ever getting here) with the np.min
523 # default of axis=None, which DataFrame.min catches and changes to axis=0.
524 # np.minimum.reduce(df) gets here bc axis is not in kwargs,
525 # so we set axis=0 to match the behaviorof np.minimum.reduce(df.values)
526 kwargs["axis"] = 0
527
528 # By default, numpy's reductions do not skip NaNs, so we have to
529 # pass skipna=False
530 return getattr(self, method_name)(skipna=False, **kwargs)