1"""
2Arithmetic operations for PandasObjects
3
4This is not a public API.
5"""
6from __future__ import annotations
7
8import operator
9from typing import (
10 TYPE_CHECKING,
11 cast,
12)
13
14import numpy as np
15
16from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op
17from pandas._typing import (
18 Axis,
19 AxisInt,
20 Level,
21)
22from pandas.util._decorators import Appender
23
24from pandas.core.dtypes.common import (
25 is_array_like,
26 is_list_like,
27)
28from pandas.core.dtypes.generic import (
29 ABCDataFrame,
30 ABCSeries,
31)
32from pandas.core.dtypes.missing import isna
33
34from pandas.core import (
35 algorithms,
36 roperator,
37)
38from pandas.core.ops.array_ops import (
39 arithmetic_op,
40 comp_method_OBJECT_ARRAY,
41 comparison_op,
42 get_array_op,
43 logical_op,
44 maybe_prepare_scalar_for_op,
45)
46from pandas.core.ops.common import (
47 get_op_result_name,
48 unpack_zerodim_and_defer,
49)
50from pandas.core.ops.docstrings import (
51 _flex_comp_doc_FRAME,
52 _op_descriptions,
53 make_flex_doc,
54)
55from pandas.core.ops.invalid import invalid_comparison
56from pandas.core.ops.mask_ops import (
57 kleene_and,
58 kleene_or,
59 kleene_xor,
60)
61from pandas.core.ops.methods import add_flex_arithmetic_methods
62from pandas.core.roperator import (
63 radd,
64 rand_,
65 rdiv,
66 rdivmod,
67 rfloordiv,
68 rmod,
69 rmul,
70 ror_,
71 rpow,
72 rsub,
73 rtruediv,
74 rxor,
75)
76
77if TYPE_CHECKING:
78 from pandas import (
79 DataFrame,
80 Series,
81 )
82
83# -----------------------------------------------------------------------------
84# constants
85ARITHMETIC_BINOPS: set[str] = {
86 "add",
87 "sub",
88 "mul",
89 "pow",
90 "mod",
91 "floordiv",
92 "truediv",
93 "divmod",
94 "radd",
95 "rsub",
96 "rmul",
97 "rpow",
98 "rmod",
99 "rfloordiv",
100 "rtruediv",
101 "rdivmod",
102}
103
104
105COMPARISON_BINOPS: set[str] = {"eq", "ne", "lt", "gt", "le", "ge"}
106
107
108# -----------------------------------------------------------------------------
109# Masking NA values and fallbacks for operations numpy does not support
110
111
112def fill_binop(left, right, fill_value):
113 """
114 If a non-None fill_value is given, replace null entries in left and right
115 with this value, but only in positions where _one_ of left/right is null,
116 not both.
117
118 Parameters
119 ----------
120 left : array-like
121 right : array-like
122 fill_value : object
123
124 Returns
125 -------
126 left : array-like
127 right : array-like
128
129 Notes
130 -----
131 Makes copies if fill_value is not None and NAs are present.
132 """
133 if fill_value is not None:
134 left_mask = isna(left)
135 right_mask = isna(right)
136
137 # one but not both
138 mask = left_mask ^ right_mask
139
140 if left_mask.any():
141 # Avoid making a copy if we can
142 left = left.copy()
143 left[left_mask & mask] = fill_value
144
145 if right_mask.any():
146 # Avoid making a copy if we can
147 right = right.copy()
148 right[right_mask & mask] = fill_value
149
150 return left, right
151
152
153# -----------------------------------------------------------------------------
154# Series
155
156
157def align_method_SERIES(left: Series, right, align_asobject: bool = False):
158 """align lhs and rhs Series"""
159 # ToDo: Different from align_method_FRAME, list, tuple and ndarray
160 # are not coerced here
161 # because Series has inconsistencies described in #13637
162
163 if isinstance(right, ABCSeries):
164 # avoid repeated alignment
165 if not left.index.equals(right.index):
166 if align_asobject:
167 # to keep original value's dtype for bool ops
168 left = left.astype(object)
169 right = right.astype(object)
170
171 left, right = left.align(right, copy=False)
172
173 return left, right
174
175
176def flex_method_SERIES(op):
177 name = op.__name__.strip("_")
178 doc = make_flex_doc(name, "series")
179
180 @Appender(doc)
181 def flex_wrapper(self, other, level=None, fill_value=None, axis: Axis = 0):
182 # validate axis
183 if axis is not None:
184 self._get_axis_number(axis)
185
186 res_name = get_op_result_name(self, other)
187
188 if isinstance(other, ABCSeries):
189 return self._binop(other, op, level=level, fill_value=fill_value)
190 elif isinstance(other, (np.ndarray, list, tuple)):
191 if len(other) != len(self):
192 raise ValueError("Lengths must be equal")
193 other = self._constructor(other, self.index)
194 result = self._binop(other, op, level=level, fill_value=fill_value)
195 result.name = res_name
196 return result
197 else:
198 if fill_value is not None:
199 self = self.fillna(fill_value)
200
201 return op(self, other)
202
203 flex_wrapper.__name__ = name
204 return flex_wrapper
205
206
207# -----------------------------------------------------------------------------
208# DataFrame
209
210
211def align_method_FRAME(
212 left, right, axis, flex: bool | None = False, level: Level = None
213):
214 """
215 Convert rhs to meet lhs dims if input is list, tuple or np.ndarray.
216
217 Parameters
218 ----------
219 left : DataFrame
220 right : Any
221 axis : int, str, or None
222 flex : bool or None, default False
223 Whether this is a flex op, in which case we reindex.
224 None indicates not to check for alignment.
225 level : int or level name, default None
226
227 Returns
228 -------
229 left : DataFrame
230 right : Any
231 """
232
233 def to_series(right):
234 msg = "Unable to coerce to Series, length must be {req_len}: given {given_len}"
235
236 # pass dtype to avoid doing inference, which would break consistency
237 # with Index/Series ops
238 dtype = None
239 if getattr(right, "dtype", None) == object:
240 # can't pass right.dtype unconditionally as that would break on e.g.
241 # datetime64[h] ndarray
242 dtype = object
243
244 if axis is not None and left._get_axis_name(axis) == "index":
245 if len(left.index) != len(right):
246 raise ValueError(
247 msg.format(req_len=len(left.index), given_len=len(right))
248 )
249 right = left._constructor_sliced(right, index=left.index, dtype=dtype)
250 else:
251 if len(left.columns) != len(right):
252 raise ValueError(
253 msg.format(req_len=len(left.columns), given_len=len(right))
254 )
255 right = left._constructor_sliced(right, index=left.columns, dtype=dtype)
256 return right
257
258 if isinstance(right, np.ndarray):
259 if right.ndim == 1:
260 right = to_series(right)
261
262 elif right.ndim == 2:
263 # We need to pass dtype=right.dtype to retain object dtype
264 # otherwise we lose consistency with Index and array ops
265 dtype = None
266 if getattr(right, "dtype", None) == object:
267 # can't pass right.dtype unconditionally as that would break on e.g.
268 # datetime64[h] ndarray
269 dtype = object
270
271 if right.shape == left.shape:
272 right = left._constructor(
273 right, index=left.index, columns=left.columns, dtype=dtype
274 )
275
276 elif right.shape[0] == left.shape[0] and right.shape[1] == 1:
277 # Broadcast across columns
278 right = np.broadcast_to(right, left.shape)
279 right = left._constructor(
280 right, index=left.index, columns=left.columns, dtype=dtype
281 )
282
283 elif right.shape[1] == left.shape[1] and right.shape[0] == 1:
284 # Broadcast along rows
285 right = to_series(right[0, :])
286
287 else:
288 raise ValueError(
289 "Unable to coerce to DataFrame, shape "
290 f"must be {left.shape}: given {right.shape}"
291 )
292
293 elif right.ndim > 2:
294 raise ValueError(
295 "Unable to coerce to Series/DataFrame, "
296 f"dimension must be <= 2: {right.shape}"
297 )
298
299 elif is_list_like(right) and not isinstance(right, (ABCSeries, ABCDataFrame)):
300 # GH 36702. Raise when attempting arithmetic with list of array-like.
301 if any(is_array_like(el) for el in right):
302 raise ValueError(
303 f"Unable to coerce list of {type(right[0])} to Series/DataFrame"
304 )
305 # GH17901
306 right = to_series(right)
307
308 if flex is not None and isinstance(right, ABCDataFrame):
309 if not left._indexed_same(right):
310 if flex:
311 left, right = left.align(right, join="outer", level=level, copy=False)
312 else:
313 raise ValueError(
314 "Can only compare identically-labeled (both index and columns) "
315 "DataFrame objects"
316 )
317 elif isinstance(right, ABCSeries):
318 # axis=1 is default for DataFrame-with-Series op
319 axis = left._get_axis_number(axis) if axis is not None else 1
320
321 if not flex:
322 if not left.axes[axis].equals(right.index):
323 raise ValueError(
324 "Operands are not aligned. Do "
325 "`left, right = left.align(right, axis=1, copy=False)` "
326 "before operating."
327 )
328
329 left, right = left.align(
330 right, join="outer", axis=axis, level=level, copy=False
331 )
332 right = _maybe_align_series_as_frame(left, right, axis)
333
334 return left, right
335
336
337def should_reindex_frame_op(
338 left: DataFrame, right, op, axis: int, fill_value, level
339) -> bool:
340 """
341 Check if this is an operation between DataFrames that will need to reindex.
342 """
343 assert isinstance(left, ABCDataFrame)
344
345 if op is operator.pow or op is roperator.rpow:
346 # GH#32685 pow has special semantics for operating with null values
347 return False
348
349 if not isinstance(right, ABCDataFrame):
350 return False
351
352 if fill_value is None and level is None and axis == 1:
353 # TODO: any other cases we should handle here?
354
355 # Intersection is always unique so we have to check the unique columns
356 left_uniques = left.columns.unique()
357 right_uniques = right.columns.unique()
358 cols = left_uniques.intersection(right_uniques)
359 if len(cols) and not (
360 len(cols) == len(left_uniques) and len(cols) == len(right_uniques)
361 ):
362 # TODO: is there a shortcut available when len(cols) == 0?
363 return True
364
365 return False
366
367
368def frame_arith_method_with_reindex(left: DataFrame, right: DataFrame, op) -> DataFrame:
369 """
370 For DataFrame-with-DataFrame operations that require reindexing,
371 operate only on shared columns, then reindex.
372
373 Parameters
374 ----------
375 left : DataFrame
376 right : DataFrame
377 op : binary operator
378
379 Returns
380 -------
381 DataFrame
382 """
383 # GH#31623, only operate on shared columns
384 cols, lcols, rcols = left.columns.join(
385 right.columns, how="inner", level=None, return_indexers=True
386 )
387
388 new_left = left.iloc[:, lcols]
389 new_right = right.iloc[:, rcols]
390 result = op(new_left, new_right)
391
392 # Do the join on the columns instead of using align_method_FRAME
393 # to avoid constructing two potentially large/sparse DataFrames
394 join_columns, _, _ = left.columns.join(
395 right.columns, how="outer", level=None, return_indexers=True
396 )
397
398 if result.columns.has_duplicates:
399 # Avoid reindexing with a duplicate axis.
400 # https://github.com/pandas-dev/pandas/issues/35194
401 indexer, _ = result.columns.get_indexer_non_unique(join_columns)
402 indexer = algorithms.unique1d(indexer)
403 result = result._reindex_with_indexers(
404 {1: [join_columns, indexer]}, allow_dups=True
405 )
406 else:
407 result = result.reindex(join_columns, axis=1)
408
409 return result
410
411
412def _maybe_align_series_as_frame(frame: DataFrame, series: Series, axis: AxisInt):
413 """
414 If the Series operand is not EA-dtype, we can broadcast to 2D and operate
415 blockwise.
416 """
417 rvalues = series._values
418 if not isinstance(rvalues, np.ndarray):
419 # TODO(EA2D): no need to special-case with 2D EAs
420 if rvalues.dtype in ("datetime64[ns]", "timedelta64[ns]"):
421 # We can losslessly+cheaply cast to ndarray
422 rvalues = np.asarray(rvalues)
423 else:
424 return series
425
426 if axis == 0:
427 rvalues = rvalues.reshape(-1, 1)
428 else:
429 rvalues = rvalues.reshape(1, -1)
430
431 rvalues = np.broadcast_to(rvalues, frame.shape)
432 # pass dtype to avoid doing inference
433 return type(frame)(
434 rvalues, index=frame.index, columns=frame.columns, dtype=rvalues.dtype
435 )
436
437
438def flex_arith_method_FRAME(op):
439 op_name = op.__name__.strip("_")
440
441 na_op = get_array_op(op)
442 doc = make_flex_doc(op_name, "dataframe")
443
444 @Appender(doc)
445 def f(self, other, axis: Axis = "columns", level=None, fill_value=None):
446 axis = self._get_axis_number(axis) if axis is not None else 1
447 axis = cast(int, axis)
448
449 if should_reindex_frame_op(self, other, op, axis, fill_value, level):
450 return frame_arith_method_with_reindex(self, other, op)
451
452 if isinstance(other, ABCSeries) and fill_value is not None:
453 # TODO: We could allow this in cases where we end up going
454 # through the DataFrame path
455 raise NotImplementedError(f"fill_value {fill_value} not supported.")
456
457 other = maybe_prepare_scalar_for_op(other, self.shape)
458 self, other = align_method_FRAME(self, other, axis, flex=True, level=level)
459
460 if isinstance(other, ABCDataFrame):
461 # Another DataFrame
462 new_data = self._combine_frame(other, na_op, fill_value)
463
464 elif isinstance(other, ABCSeries):
465 new_data = self._dispatch_frame_op(other, op, axis=axis)
466 else:
467 # in this case we always have `np.ndim(other) == 0`
468 if fill_value is not None:
469 self = self.fillna(fill_value)
470
471 new_data = self._dispatch_frame_op(other, op)
472
473 return self._construct_result(new_data)
474
475 f.__name__ = op_name
476
477 return f
478
479
480def flex_comp_method_FRAME(op):
481 op_name = op.__name__.strip("_")
482
483 doc = _flex_comp_doc_FRAME.format(
484 op_name=op_name, desc=_op_descriptions[op_name]["desc"]
485 )
486
487 @Appender(doc)
488 def f(self, other, axis: Axis = "columns", level=None):
489 axis = self._get_axis_number(axis) if axis is not None else 1
490
491 self, other = align_method_FRAME(self, other, axis, flex=True, level=level)
492
493 new_data = self._dispatch_frame_op(other, op, axis=axis)
494 return self._construct_result(new_data)
495
496 f.__name__ = op_name
497
498 return f
499
500
501__all__ = [
502 "add_flex_arithmetic_methods",
503 "align_method_FRAME",
504 "align_method_SERIES",
505 "ARITHMETIC_BINOPS",
506 "arithmetic_op",
507 "COMPARISON_BINOPS",
508 "comparison_op",
509 "comp_method_OBJECT_ARRAY",
510 "fill_binop",
511 "flex_arith_method_FRAME",
512 "flex_comp_method_FRAME",
513 "flex_method_SERIES",
514 "frame_arith_method_with_reindex",
515 "invalid_comparison",
516 "kleene_and",
517 "kleene_or",
518 "kleene_xor",
519 "logical_op",
520 "maybe_dispatch_ufunc_to_dunder_op",
521 "radd",
522 "rand_",
523 "rdiv",
524 "rdivmod",
525 "rfloordiv",
526 "rmod",
527 "rmul",
528 "ror_",
529 "rpow",
530 "rsub",
531 "rtruediv",
532 "rxor",
533 "should_reindex_frame_op",
534 "unpack_zerodim_and_defer",
535]