1"""
2Methods that can be shared by many array-like classes or subclasses:
3 Series
4 Index
5 ExtensionArray
6"""
7from __future__ import annotations
8
9import operator
10from typing import Any
11
12import numpy as np
13
14from pandas._libs import lib
15from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op
16
17from pandas.core.dtypes.generic import ABCNDFrame
18
19from pandas.core import roperator
20from pandas.core.construction import extract_array
21from pandas.core.ops.common import unpack_zerodim_and_defer
22
23REDUCTION_ALIASES = {
24 "maximum": "max",
25 "minimum": "min",
26 "add": "sum",
27 "multiply": "prod",
28}
29
30
31class OpsMixin:
32 # -------------------------------------------------------------
33 # Comparisons
34
35 def _cmp_method(self, other, op):
36 return NotImplemented
37
38 @unpack_zerodim_and_defer("__eq__")
39 def __eq__(self, other):
40 return self._cmp_method(other, operator.eq)
41
42 @unpack_zerodim_and_defer("__ne__")
43 def __ne__(self, other):
44 return self._cmp_method(other, operator.ne)
45
46 @unpack_zerodim_and_defer("__lt__")
47 def __lt__(self, other):
48 return self._cmp_method(other, operator.lt)
49
50 @unpack_zerodim_and_defer("__le__")
51 def __le__(self, other):
52 return self._cmp_method(other, operator.le)
53
54 @unpack_zerodim_and_defer("__gt__")
55 def __gt__(self, other):
56 return self._cmp_method(other, operator.gt)
57
58 @unpack_zerodim_and_defer("__ge__")
59 def __ge__(self, other):
60 return self._cmp_method(other, operator.ge)
61
62 # -------------------------------------------------------------
63 # Logical Methods
64
65 def _logical_method(self, other, op):
66 return NotImplemented
67
68 @unpack_zerodim_and_defer("__and__")
69 def __and__(self, other):
70 return self._logical_method(other, operator.and_)
71
72 @unpack_zerodim_and_defer("__rand__")
73 def __rand__(self, other):
74 return self._logical_method(other, roperator.rand_)
75
76 @unpack_zerodim_and_defer("__or__")
77 def __or__(self, other):
78 return self._logical_method(other, operator.or_)
79
80 @unpack_zerodim_and_defer("__ror__")
81 def __ror__(self, other):
82 return self._logical_method(other, roperator.ror_)
83
84 @unpack_zerodim_and_defer("__xor__")
85 def __xor__(self, other):
86 return self._logical_method(other, operator.xor)
87
88 @unpack_zerodim_and_defer("__rxor__")
89 def __rxor__(self, other):
90 return self._logical_method(other, roperator.rxor)
91
92 # -------------------------------------------------------------
93 # Arithmetic Methods
94
95 def _arith_method(self, other, op):
96 return NotImplemented
97
98 @unpack_zerodim_and_defer("__add__")
99 def __add__(self, other):
100 """
101 Get Addition of DataFrame and other, column-wise.
102
103 Equivalent to ``DataFrame.add(other)``.
104
105 Parameters
106 ----------
107 other : scalar, sequence, Series, dict or DataFrame
108 Object to be added to the DataFrame.
109
110 Returns
111 -------
112 DataFrame
113 The result of adding ``other`` to DataFrame.
114
115 See Also
116 --------
117 DataFrame.add : Add a DataFrame and another object, with option for index-
118 or column-oriented addition.
119
120 Examples
121 --------
122 >>> df = pd.DataFrame({'height': [1.5, 2.6], 'weight': [500, 800]},
123 ... index=['elk', 'moose'])
124 >>> df
125 height weight
126 elk 1.5 500
127 moose 2.6 800
128
129 Adding a scalar affects all rows and columns.
130
131 >>> df[['height', 'weight']] + 1.5
132 height weight
133 elk 3.0 501.5
134 moose 4.1 801.5
135
136 Each element of a list is added to a column of the DataFrame, in order.
137
138 >>> df[['height', 'weight']] + [0.5, 1.5]
139 height weight
140 elk 2.0 501.5
141 moose 3.1 801.5
142
143 Keys of a dictionary are aligned to the DataFrame, based on column names;
144 each value in the dictionary is added to the corresponding column.
145
146 >>> df[['height', 'weight']] + {'height': 0.5, 'weight': 1.5}
147 height weight
148 elk 2.0 501.5
149 moose 3.1 801.5
150
151 When `other` is a :class:`Series`, the index of `other` is aligned with the
152 columns of the DataFrame.
153
154 >>> s1 = pd.Series([0.5, 1.5], index=['weight', 'height'])
155 >>> df[['height', 'weight']] + s1
156 height weight
157 elk 3.0 500.5
158 moose 4.1 800.5
159
160 Even when the index of `other` is the same as the index of the DataFrame,
161 the :class:`Series` will not be reoriented. If index-wise alignment is desired,
162 :meth:`DataFrame.add` should be used with `axis='index'`.
163
164 >>> s2 = pd.Series([0.5, 1.5], index=['elk', 'moose'])
165 >>> df[['height', 'weight']] + s2
166 elk height moose weight
167 elk NaN NaN NaN NaN
168 moose NaN NaN NaN NaN
169
170 >>> df[['height', 'weight']].add(s2, axis='index')
171 height weight
172 elk 2.0 500.5
173 moose 4.1 801.5
174
175 When `other` is a :class:`DataFrame`, both columns names and the
176 index are aligned.
177
178 >>> other = pd.DataFrame({'height': [0.2, 0.4, 0.6]},
179 ... index=['elk', 'moose', 'deer'])
180 >>> df[['height', 'weight']] + other
181 height weight
182 deer NaN NaN
183 elk 1.7 NaN
184 moose 3.0 NaN
185 """
186 return self._arith_method(other, operator.add)
187
188 @unpack_zerodim_and_defer("__radd__")
189 def __radd__(self, other):
190 return self._arith_method(other, roperator.radd)
191
192 @unpack_zerodim_and_defer("__sub__")
193 def __sub__(self, other):
194 return self._arith_method(other, operator.sub)
195
196 @unpack_zerodim_and_defer("__rsub__")
197 def __rsub__(self, other):
198 return self._arith_method(other, roperator.rsub)
199
200 @unpack_zerodim_and_defer("__mul__")
201 def __mul__(self, other):
202 return self._arith_method(other, operator.mul)
203
204 @unpack_zerodim_and_defer("__rmul__")
205 def __rmul__(self, other):
206 return self._arith_method(other, roperator.rmul)
207
208 @unpack_zerodim_and_defer("__truediv__")
209 def __truediv__(self, other):
210 return self._arith_method(other, operator.truediv)
211
212 @unpack_zerodim_and_defer("__rtruediv__")
213 def __rtruediv__(self, other):
214 return self._arith_method(other, roperator.rtruediv)
215
216 @unpack_zerodim_and_defer("__floordiv__")
217 def __floordiv__(self, other):
218 return self._arith_method(other, operator.floordiv)
219
220 @unpack_zerodim_and_defer("__rfloordiv")
221 def __rfloordiv__(self, other):
222 return self._arith_method(other, roperator.rfloordiv)
223
224 @unpack_zerodim_and_defer("__mod__")
225 def __mod__(self, other):
226 return self._arith_method(other, operator.mod)
227
228 @unpack_zerodim_and_defer("__rmod__")
229 def __rmod__(self, other):
230 return self._arith_method(other, roperator.rmod)
231
232 @unpack_zerodim_and_defer("__divmod__")
233 def __divmod__(self, other):
234 return self._arith_method(other, divmod)
235
236 @unpack_zerodim_and_defer("__rdivmod__")
237 def __rdivmod__(self, other):
238 return self._arith_method(other, roperator.rdivmod)
239
240 @unpack_zerodim_and_defer("__pow__")
241 def __pow__(self, other):
242 return self._arith_method(other, operator.pow)
243
244 @unpack_zerodim_and_defer("__rpow__")
245 def __rpow__(self, other):
246 return self._arith_method(other, roperator.rpow)
247
248
249# -----------------------------------------------------------------------------
250# Helpers to implement __array_ufunc__
251
252
253def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any):
254 """
255 Compatibility with numpy ufuncs.
256
257 See also
258 --------
259 numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__
260 """
261 from pandas.core.frame import (
262 DataFrame,
263 Series,
264 )
265 from pandas.core.generic import NDFrame
266 from pandas.core.internals import BlockManager
267
268 cls = type(self)
269
270 kwargs = _standardize_out_kwarg(**kwargs)
271
272 # for binary ops, use our custom dunder methods
273 result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs)
274 if result is not NotImplemented:
275 return result
276
277 # Determine if we should defer.
278 no_defer = (
279 np.ndarray.__array_ufunc__,
280 cls.__array_ufunc__,
281 )
282
283 for item in inputs:
284 higher_priority = (
285 hasattr(item, "__array_priority__")
286 and item.__array_priority__ > self.__array_priority__
287 )
288 has_array_ufunc = (
289 hasattr(item, "__array_ufunc__")
290 and type(item).__array_ufunc__ not in no_defer
291 and not isinstance(item, self._HANDLED_TYPES)
292 )
293 if higher_priority or has_array_ufunc:
294 return NotImplemented
295
296 # align all the inputs.
297 types = tuple(type(x) for x in inputs)
298 alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)]
299
300 if len(alignable) > 1:
301 # This triggers alignment.
302 # At the moment, there aren't any ufuncs with more than two inputs
303 # so this ends up just being x1.index | x2.index, but we write
304 # it to handle *args.
305 set_types = set(types)
306 if len(set_types) > 1 and {DataFrame, Series}.issubset(set_types):
307 # We currently don't handle ufunc(DataFrame, Series)
308 # well. Previously this raised an internal ValueError. We might
309 # support it someday, so raise a NotImplementedError.
310 raise NotImplementedError(
311 f"Cannot apply ufunc {ufunc} to mixed DataFrame and Series inputs."
312 )
313 axes = self.axes
314 for obj in alignable[1:]:
315 # this relies on the fact that we aren't handling mixed
316 # series / frame ufuncs.
317 for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)):
318 axes[i] = ax1.union(ax2)
319
320 reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes))
321 inputs = tuple(
322 x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x
323 for x, t in zip(inputs, types)
324 )
325 else:
326 reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes))
327
328 if self.ndim == 1:
329 names = [getattr(x, "name") for x in inputs if hasattr(x, "name")]
330 name = names[0] if len(set(names)) == 1 else None
331 reconstruct_kwargs = {"name": name}
332 else:
333 reconstruct_kwargs = {}
334
335 def reconstruct(result):
336 if ufunc.nout > 1:
337 # np.modf, np.frexp, np.divmod
338 return tuple(_reconstruct(x) for x in result)
339
340 return _reconstruct(result)
341
342 def _reconstruct(result):
343 if lib.is_scalar(result):
344 return result
345
346 if result.ndim != self.ndim:
347 if method == "outer":
348 raise NotImplementedError
349 return result
350 if isinstance(result, BlockManager):
351 # we went through BlockManager.apply e.g. np.sqrt
352 result = self._constructor(result, **reconstruct_kwargs, copy=False)
353 else:
354 # we converted an array, lost our axes
355 result = self._constructor(
356 result, **reconstruct_axes, **reconstruct_kwargs, copy=False
357 )
358 # TODO: When we support multiple values in __finalize__, this
359 # should pass alignable to `__finalize__` instead of self.
360 # Then `np.add(a, b)` would consider attrs from both a and b
361 # when a and b are NDFrames.
362 if len(alignable) == 1:
363 result = result.__finalize__(self)
364 return result
365
366 if "out" in kwargs:
367 # e.g. test_multiindex_get_loc
368 result = dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs)
369 return reconstruct(result)
370
371 if method == "reduce":
372 # e.g. test.series.test_ufunc.test_reduce
373 result = dispatch_reduction_ufunc(self, ufunc, method, *inputs, **kwargs)
374 if result is not NotImplemented:
375 return result
376
377 # We still get here with kwargs `axis` for e.g. np.maximum.accumulate
378 # and `dtype` and `keepdims` for np.ptp
379
380 if self.ndim > 1 and (len(inputs) > 1 or ufunc.nout > 1):
381 # Just give up on preserving types in the complex case.
382 # In theory we could preserve them for them.
383 # * nout>1 is doable if BlockManager.apply took nout and
384 # returned a Tuple[BlockManager].
385 # * len(inputs) > 1 is doable when we know that we have
386 # aligned blocks / dtypes.
387
388 # e.g. my_ufunc, modf, logaddexp, heaviside, subtract, add
389 inputs = tuple(np.asarray(x) for x in inputs)
390 # Note: we can't use default_array_ufunc here bc reindexing means
391 # that `self` may not be among `inputs`
392 result = getattr(ufunc, method)(*inputs, **kwargs)
393 elif self.ndim == 1:
394 # ufunc(series, ...)
395 inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs)
396 result = getattr(ufunc, method)(*inputs, **kwargs)
397 else:
398 # ufunc(dataframe)
399 if method == "__call__" and not kwargs:
400 # for np.<ufunc>(..) calls
401 # kwargs cannot necessarily be handled block-by-block, so only
402 # take this path if there are no kwargs
403 mgr = inputs[0]._mgr
404 result = mgr.apply(getattr(ufunc, method))
405 else:
406 # otherwise specific ufunc methods (eg np.<ufunc>.accumulate(..))
407 # Those can have an axis keyword and thus can't be called block-by-block
408 result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs)
409 # e.g. np.negative (only one reached), with "where" and "out" in kwargs
410
411 result = reconstruct(result)
412 return result
413
414
415def _standardize_out_kwarg(**kwargs) -> dict:
416 """
417 If kwargs contain "out1" and "out2", replace that with a tuple "out"
418
419 np.divmod, np.modf, np.frexp can have either `out=(out1, out2)` or
420 `out1=out1, out2=out2)`
421 """
422 if "out" not in kwargs and "out1" in kwargs and "out2" in kwargs:
423 out1 = kwargs.pop("out1")
424 out2 = kwargs.pop("out2")
425 out = (out1, out2)
426 kwargs["out"] = out
427 return kwargs
428
429
430def dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
431 """
432 If we have an `out` keyword, then call the ufunc without `out` and then
433 set the result into the given `out`.
434 """
435
436 # Note: we assume _standardize_out_kwarg has already been called.
437 out = kwargs.pop("out")
438 where = kwargs.pop("where", None)
439
440 result = getattr(ufunc, method)(*inputs, **kwargs)
441
442 if result is NotImplemented:
443 return NotImplemented
444
445 if isinstance(result, tuple):
446 # i.e. np.divmod, np.modf, np.frexp
447 if not isinstance(out, tuple) or len(out) != len(result):
448 raise NotImplementedError
449
450 for arr, res in zip(out, result):
451 _assign_where(arr, res, where)
452
453 return out
454
455 if isinstance(out, tuple):
456 if len(out) == 1:
457 out = out[0]
458 else:
459 raise NotImplementedError
460
461 _assign_where(out, result, where)
462 return out
463
464
465def _assign_where(out, result, where) -> None:
466 """
467 Set a ufunc result into 'out', masking with a 'where' argument if necessary.
468 """
469 if where is None:
470 # no 'where' arg passed to ufunc
471 out[:] = result
472 else:
473 np.putmask(out, where, result)
474
475
476def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
477 """
478 Fallback to the behavior we would get if we did not define __array_ufunc__.
479
480 Notes
481 -----
482 We are assuming that `self` is among `inputs`.
483 """
484 if not any(x is self for x in inputs):
485 raise NotImplementedError
486
487 new_inputs = [x if x is not self else np.asarray(x) for x in inputs]
488
489 return getattr(ufunc, method)(*new_inputs, **kwargs)
490
491
492def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
493 """
494 Dispatch ufunc reductions to self's reduction methods.
495 """
496 assert method == "reduce"
497
498 if len(inputs) != 1 or inputs[0] is not self:
499 return NotImplemented
500
501 if ufunc.__name__ not in REDUCTION_ALIASES:
502 return NotImplemented
503
504 method_name = REDUCTION_ALIASES[ufunc.__name__]
505
506 # NB: we are assuming that min/max represent minimum/maximum methods,
507 # which would not be accurate for e.g. Timestamp.min
508 if not hasattr(self, method_name):
509 return NotImplemented
510
511 if self.ndim > 1:
512 if isinstance(self, ABCNDFrame):
513 # TODO: test cases where this doesn't hold, i.e. 2D DTA/TDA
514 kwargs["numeric_only"] = False
515
516 if "axis" not in kwargs:
517 # For DataFrame reductions we don't want the default axis=0
518 # Note: np.min is not a ufunc, but uses array_function_dispatch,
519 # so calls DataFrame.min (without ever getting here) with the np.min
520 # default of axis=None, which DataFrame.min catches and changes to axis=0.
521 # np.minimum.reduce(df) gets here bc axis is not in kwargs,
522 # so we set axis=0 to match the behaviorof np.minimum.reduce(df.values)
523 kwargs["axis"] = 0
524
525 # By default, numpy's reductions do not skip NaNs, so we have to
526 # pass skipna=False
527 return getattr(self, method_name)(skipna=False, **kwargs)