Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/ops/array_ops.py: 37%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

208 statements  

1""" 

2Functions for arithmetic and comparison operations on NumPy arrays and 

3ExtensionArrays. 

4""" 

5from __future__ import annotations 

6 

7import datetime 

8from functools import partial 

9import operator 

10from typing import ( 

11 TYPE_CHECKING, 

12 Any, 

13) 

14import warnings 

15 

16import numpy as np 

17 

18from pandas._libs import ( 

19 NaT, 

20 Timedelta, 

21 Timestamp, 

22 lib, 

23 ops as libops, 

24) 

25from pandas._libs.tslibs import ( 

26 BaseOffset, 

27 get_supported_dtype, 

28 is_supported_dtype, 

29 is_unitless, 

30) 

31from pandas.util._exceptions import find_stack_level 

32 

33from pandas.core.dtypes.cast import ( 

34 construct_1d_object_array_from_listlike, 

35 find_common_type, 

36) 

37from pandas.core.dtypes.common import ( 

38 ensure_object, 

39 is_bool_dtype, 

40 is_list_like, 

41 is_numeric_v_string_like, 

42 is_object_dtype, 

43 is_scalar, 

44) 

45from pandas.core.dtypes.generic import ( 

46 ABCExtensionArray, 

47 ABCIndex, 

48 ABCSeries, 

49) 

50from pandas.core.dtypes.missing import ( 

51 isna, 

52 notna, 

53) 

54 

55from pandas.core import roperator 

56from pandas.core.computation import expressions 

57from pandas.core.construction import ensure_wrapped_if_datetimelike 

58from pandas.core.ops import missing 

59from pandas.core.ops.dispatch import should_extension_dispatch 

60from pandas.core.ops.invalid import invalid_comparison 

61 

62if TYPE_CHECKING: 

63 from pandas._typing import ( 

64 ArrayLike, 

65 Shape, 

66 ) 

67 

68# ----------------------------------------------------------------------------- 

69# Masking NA values and fallbacks for operations numpy does not support 

70 

71 

72def fill_binop(left, right, fill_value): 

73 """ 

74 If a non-None fill_value is given, replace null entries in left and right 

75 with this value, but only in positions where _one_ of left/right is null, 

76 not both. 

77 

78 Parameters 

79 ---------- 

80 left : array-like 

81 right : array-like 

82 fill_value : object 

83 

84 Returns 

85 ------- 

86 left : array-like 

87 right : array-like 

88 

89 Notes 

90 ----- 

91 Makes copies if fill_value is not None and NAs are present. 

92 """ 

93 if fill_value is not None: 

94 left_mask = isna(left) 

95 right_mask = isna(right) 

96 

97 # one but not both 

98 mask = left_mask ^ right_mask 

99 

100 if left_mask.any(): 

101 # Avoid making a copy if we can 

102 left = left.copy() 

103 left[left_mask & mask] = fill_value 

104 

105 if right_mask.any(): 

106 # Avoid making a copy if we can 

107 right = right.copy() 

108 right[right_mask & mask] = fill_value 

109 

110 return left, right 

111 

112 

113def comp_method_OBJECT_ARRAY(op, x, y): 

114 if isinstance(y, list): 

115 # e.g. test_tuple_categories 

116 y = construct_1d_object_array_from_listlike(y) 

117 

118 if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)): 

119 if not is_object_dtype(y.dtype): 

120 y = y.astype(np.object_) 

121 

122 if isinstance(y, (ABCSeries, ABCIndex)): 

123 y = y._values 

124 

125 if x.shape != y.shape: 

126 raise ValueError("Shapes must match", x.shape, y.shape) 

127 result = libops.vec_compare(x.ravel(), y.ravel(), op) 

128 else: 

129 result = libops.scalar_compare(x.ravel(), y, op) 

130 return result.reshape(x.shape) 

131 

132 

133def _masked_arith_op(x: np.ndarray, y, op): 

134 """ 

135 If the given arithmetic operation fails, attempt it again on 

136 only the non-null elements of the input array(s). 

137 

138 Parameters 

139 ---------- 

140 x : np.ndarray 

141 y : np.ndarray, Series, Index 

142 op : binary operator 

143 """ 

144 # For Series `x` is 1D so ravel() is a no-op; calling it anyway makes 

145 # the logic valid for both Series and DataFrame ops. 

146 xrav = x.ravel() 

147 

148 if isinstance(y, np.ndarray): 

149 dtype = find_common_type([x.dtype, y.dtype]) 

150 result = np.empty(x.size, dtype=dtype) 

151 

152 if len(x) != len(y): 

153 raise ValueError(x.shape, y.shape) 

154 ymask = notna(y) 

155 

156 # NB: ravel() is only safe since y is ndarray; for e.g. PeriodIndex 

157 # we would get int64 dtype, see GH#19956 

158 yrav = y.ravel() 

159 mask = notna(xrav) & ymask.ravel() 

160 

161 # See GH#5284, GH#5035, GH#19448 for historical reference 

162 if mask.any(): 

163 result[mask] = op(xrav[mask], yrav[mask]) 

164 

165 else: 

166 if not is_scalar(y): 

167 raise TypeError( 

168 f"Cannot broadcast np.ndarray with operand of type { type(y) }" 

169 ) 

170 

171 # mask is only meaningful for x 

172 result = np.empty(x.size, dtype=x.dtype) 

173 mask = notna(xrav) 

174 

175 # 1 ** np.nan is 1. So we have to unmask those. 

176 if op is pow: 

177 mask = np.where(x == 1, False, mask) 

178 elif op is roperator.rpow: 

179 mask = np.where(y == 1, False, mask) 

180 

181 if mask.any(): 

182 result[mask] = op(xrav[mask], y) 

183 

184 np.putmask(result, ~mask, np.nan) 

185 result = result.reshape(x.shape) # 2D compat 

186 return result 

187 

188 

189def _na_arithmetic_op(left: np.ndarray, right, op, is_cmp: bool = False): 

190 """ 

191 Return the result of evaluating op on the passed in values. 

192 

193 If native types are not compatible, try coercion to object dtype. 

194 

195 Parameters 

196 ---------- 

197 left : np.ndarray 

198 right : np.ndarray or scalar 

199 Excludes DataFrame, Series, Index, ExtensionArray. 

200 is_cmp : bool, default False 

201 If this a comparison operation. 

202 

203 Returns 

204 ------- 

205 array-like 

206 

207 Raises 

208 ------ 

209 TypeError : invalid operation 

210 """ 

211 if isinstance(right, str): 

212 # can never use numexpr 

213 func = op 

214 else: 

215 func = partial(expressions.evaluate, op) 

216 

217 try: 

218 result = func(left, right) 

219 except TypeError: 

220 if not is_cmp and ( 

221 left.dtype == object or getattr(right, "dtype", None) == object 

222 ): 

223 # For object dtype, fallback to a masked operation (only operating 

224 # on the non-missing values) 

225 # Don't do this for comparisons, as that will handle complex numbers 

226 # incorrectly, see GH#32047 

227 result = _masked_arith_op(left, right, op) 

228 else: 

229 raise 

230 

231 if is_cmp and (is_scalar(result) or result is NotImplemented): 

232 # numpy returned a scalar instead of operating element-wise 

233 # e.g. numeric array vs str 

234 # TODO: can remove this after dropping some future numpy version? 

235 return invalid_comparison(left, right, op) 

236 

237 return missing.dispatch_fill_zeros(op, left, right, result) 

238 

239 

240def arithmetic_op(left: ArrayLike, right: Any, op): 

241 """ 

242 Evaluate an arithmetic operation `+`, `-`, `*`, `/`, `//`, `%`, `**`, ... 

243 

244 Note: the caller is responsible for ensuring that numpy warnings are 

245 suppressed (with np.errstate(all="ignore")) if needed. 

246 

247 Parameters 

248 ---------- 

249 left : np.ndarray or ExtensionArray 

250 right : object 

251 Cannot be a DataFrame or Index. Series is *not* excluded. 

252 op : {operator.add, operator.sub, ...} 

253 Or one of the reversed variants from roperator. 

254 

255 Returns 

256 ------- 

257 ndarray or ExtensionArray 

258 Or a 2-tuple of these in the case of divmod or rdivmod. 

259 """ 

260 # NB: We assume that extract_array and ensure_wrapped_if_datetimelike 

261 # have already been called on `left` and `right`, 

262 # and `maybe_prepare_scalar_for_op` has already been called on `right` 

263 # We need to special-case datetime64/timedelta64 dtypes (e.g. because numpy 

264 # casts integer dtypes to timedelta64 when operating with timedelta64 - GH#22390) 

265 

266 if ( 

267 should_extension_dispatch(left, right) 

268 or isinstance(right, (Timedelta, BaseOffset, Timestamp)) 

269 or right is NaT 

270 ): 

271 # Timedelta/Timestamp and other custom scalars are included in the check 

272 # because numexpr will fail on it, see GH#31457 

273 res_values = op(left, right) 

274 else: 

275 # TODO we should handle EAs consistently and move this check before the if/else 

276 # (https://github.com/pandas-dev/pandas/issues/41165) 

277 # error: Argument 2 to "_bool_arith_check" has incompatible type 

278 # "Union[ExtensionArray, ndarray[Any, Any]]"; expected "ndarray[Any, Any]" 

279 _bool_arith_check(op, left, right) # type: ignore[arg-type] 

280 

281 # error: Argument 1 to "_na_arithmetic_op" has incompatible type 

282 # "Union[ExtensionArray, ndarray[Any, Any]]"; expected "ndarray[Any, Any]" 

283 res_values = _na_arithmetic_op(left, right, op) # type: ignore[arg-type] 

284 

285 return res_values 

286 

287 

288def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike: 

289 """ 

290 Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`. 

291 

292 Note: the caller is responsible for ensuring that numpy warnings are 

293 suppressed (with np.errstate(all="ignore")) if needed. 

294 

295 Parameters 

296 ---------- 

297 left : np.ndarray or ExtensionArray 

298 right : object 

299 Cannot be a DataFrame, Series, or Index. 

300 op : {operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le} 

301 

302 Returns 

303 ------- 

304 ndarray or ExtensionArray 

305 """ 

306 # NB: We assume extract_array has already been called on left and right 

307 lvalues = ensure_wrapped_if_datetimelike(left) 

308 rvalues = ensure_wrapped_if_datetimelike(right) 

309 

310 rvalues = lib.item_from_zerodim(rvalues) 

311 if isinstance(rvalues, list): 

312 # We don't catch tuple here bc we may be comparing e.g. MultiIndex 

313 # to a tuple that represents a single entry, see test_compare_tuple_strs 

314 rvalues = np.asarray(rvalues) 

315 

316 if isinstance(rvalues, (np.ndarray, ABCExtensionArray)): 

317 # TODO: make this treatment consistent across ops and classes. 

318 # We are not catching all listlikes here (e.g. frozenset, tuple) 

319 # The ambiguous case is object-dtype. See GH#27803 

320 if len(lvalues) != len(rvalues): 

321 raise ValueError( 

322 "Lengths must match to compare", lvalues.shape, rvalues.shape 

323 ) 

324 

325 if should_extension_dispatch(lvalues, rvalues) or ( 

326 (isinstance(rvalues, (Timedelta, BaseOffset, Timestamp)) or right is NaT) 

327 and lvalues.dtype != object 

328 ): 

329 # Call the method on lvalues 

330 res_values = op(lvalues, rvalues) 

331 

332 elif is_scalar(rvalues) and isna(rvalues): # TODO: but not pd.NA? 

333 # numpy does not like comparisons vs None 

334 if op is operator.ne: 

335 res_values = np.ones(lvalues.shape, dtype=bool) 

336 else: 

337 res_values = np.zeros(lvalues.shape, dtype=bool) 

338 

339 elif is_numeric_v_string_like(lvalues, rvalues): 

340 # GH#36377 going through the numexpr path would incorrectly raise 

341 return invalid_comparison(lvalues, rvalues, op) 

342 

343 elif lvalues.dtype == object or isinstance(rvalues, str): 

344 res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues) 

345 

346 else: 

347 res_values = _na_arithmetic_op(lvalues, rvalues, op, is_cmp=True) 

348 

349 return res_values 

350 

351 

352def na_logical_op(x: np.ndarray, y, op): 

353 try: 

354 # For exposition, write: 

355 # yarr = isinstance(y, np.ndarray) 

356 # yint = is_integer(y) or (yarr and y.dtype.kind == "i") 

357 # ybool = is_bool(y) or (yarr and y.dtype.kind == "b") 

358 # xint = x.dtype.kind == "i" 

359 # xbool = x.dtype.kind == "b" 

360 # Then Cases where this goes through without raising include: 

361 # (xint or xbool) and (yint or bool) 

362 result = op(x, y) 

363 except TypeError: 

364 if isinstance(y, np.ndarray): 

365 # bool-bool dtype operations should be OK, should not get here 

366 assert not (x.dtype.kind == "b" and y.dtype.kind == "b") 

367 x = ensure_object(x) 

368 y = ensure_object(y) 

369 result = libops.vec_binop(x.ravel(), y.ravel(), op) 

370 else: 

371 # let null fall thru 

372 assert lib.is_scalar(y) 

373 if not isna(y): 

374 y = bool(y) 

375 try: 

376 result = libops.scalar_binop(x, y, op) 

377 except ( 

378 TypeError, 

379 ValueError, 

380 AttributeError, 

381 OverflowError, 

382 NotImplementedError, 

383 ) as err: 

384 typ = type(y).__name__ 

385 raise TypeError( 

386 f"Cannot perform '{op.__name__}' with a dtyped [{x.dtype}] array " 

387 f"and scalar of type [{typ}]" 

388 ) from err 

389 

390 return result.reshape(x.shape) 

391 

392 

393def logical_op(left: ArrayLike, right: Any, op) -> ArrayLike: 

394 """ 

395 Evaluate a logical operation `|`, `&`, or `^`. 

396 

397 Parameters 

398 ---------- 

399 left : np.ndarray or ExtensionArray 

400 right : object 

401 Cannot be a DataFrame, Series, or Index. 

402 op : {operator.and_, operator.or_, operator.xor} 

403 Or one of the reversed variants from roperator. 

404 

405 Returns 

406 ------- 

407 ndarray or ExtensionArray 

408 """ 

409 

410 def fill_bool(x, left=None): 

411 # if `left` is specifically not-boolean, we do not cast to bool 

412 if x.dtype.kind in "cfO": 

413 # dtypes that can hold NA 

414 mask = isna(x) 

415 if mask.any(): 

416 x = x.astype(object) 

417 x[mask] = False 

418 

419 if left is None or left.dtype.kind == "b": 

420 x = x.astype(bool) 

421 return x 

422 

423 right = lib.item_from_zerodim(right) 

424 if is_list_like(right) and not hasattr(right, "dtype"): 

425 # e.g. list, tuple 

426 warnings.warn( 

427 "Logical ops (and, or, xor) between Pandas objects and dtype-less " 

428 "sequences (e.g. list, tuple) are deprecated and will raise in a " 

429 "future version. Wrap the object in a Series, Index, or np.array " 

430 "before operating instead.", 

431 FutureWarning, 

432 stacklevel=find_stack_level(), 

433 ) 

434 right = construct_1d_object_array_from_listlike(right) 

435 

436 # NB: We assume extract_array has already been called on left and right 

437 lvalues = ensure_wrapped_if_datetimelike(left) 

438 rvalues = right 

439 

440 if should_extension_dispatch(lvalues, rvalues): 

441 # Call the method on lvalues 

442 res_values = op(lvalues, rvalues) 

443 

444 else: 

445 if isinstance(rvalues, np.ndarray): 

446 is_other_int_dtype = rvalues.dtype.kind in "iu" 

447 if not is_other_int_dtype: 

448 rvalues = fill_bool(rvalues, lvalues) 

449 

450 else: 

451 # i.e. scalar 

452 is_other_int_dtype = lib.is_integer(rvalues) 

453 

454 res_values = na_logical_op(lvalues, rvalues, op) 

455 

456 # For int vs int `^`, `|`, `&` are bitwise operators and return 

457 # integer dtypes. Otherwise these are boolean ops 

458 if not (left.dtype.kind in "iu" and is_other_int_dtype): 

459 res_values = fill_bool(res_values) 

460 

461 return res_values 

462 

463 

464def get_array_op(op): 

465 """ 

466 Return a binary array operation corresponding to the given operator op. 

467 

468 Parameters 

469 ---------- 

470 op : function 

471 Binary operator from operator or roperator module. 

472 

473 Returns 

474 ------- 

475 functools.partial 

476 """ 

477 if isinstance(op, partial): 

478 # We get here via dispatch_to_series in DataFrame case 

479 # e.g. test_rolling_consistency_var_debiasing_factors 

480 return op 

481 

482 op_name = op.__name__.strip("_").lstrip("r") 

483 if op_name == "arith_op": 

484 # Reached via DataFrame._combine_frame i.e. flex methods 

485 # e.g. test_df_add_flex_filled_mixed_dtypes 

486 return op 

487 

488 if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}: 

489 return partial(comparison_op, op=op) 

490 elif op_name in {"and", "or", "xor", "rand", "ror", "rxor"}: 

491 return partial(logical_op, op=op) 

492 elif op_name in { 

493 "add", 

494 "sub", 

495 "mul", 

496 "truediv", 

497 "floordiv", 

498 "mod", 

499 "divmod", 

500 "pow", 

501 }: 

502 return partial(arithmetic_op, op=op) 

503 else: 

504 raise NotImplementedError(op_name) 

505 

506 

507def maybe_prepare_scalar_for_op(obj, shape: Shape): 

508 """ 

509 Cast non-pandas objects to pandas types to unify behavior of arithmetic 

510 and comparison operations. 

511 

512 Parameters 

513 ---------- 

514 obj: object 

515 shape : tuple[int] 

516 

517 Returns 

518 ------- 

519 out : object 

520 

521 Notes 

522 ----- 

523 Be careful to call this *after* determining the `name` attribute to be 

524 attached to the result of the arithmetic operation. 

525 """ 

526 if type(obj) is datetime.timedelta: 

527 # GH#22390 cast up to Timedelta to rely on Timedelta 

528 # implementation; otherwise operation against numeric-dtype 

529 # raises TypeError 

530 return Timedelta(obj) 

531 elif type(obj) is datetime.datetime: 

532 # cast up to Timestamp to rely on Timestamp implementation, see Timedelta above 

533 return Timestamp(obj) 

534 elif isinstance(obj, np.datetime64): 

535 # GH#28080 numpy casts integer-dtype to datetime64 when doing 

536 # array[int] + datetime64, which we do not allow 

537 if isna(obj): 

538 from pandas.core.arrays import DatetimeArray 

539 

540 # Avoid possible ambiguities with pd.NaT 

541 # GH 52295 

542 if is_unitless(obj.dtype): 

543 obj = obj.astype("datetime64[ns]") 

544 elif not is_supported_dtype(obj.dtype): 

545 new_dtype = get_supported_dtype(obj.dtype) 

546 obj = obj.astype(new_dtype) 

547 right = np.broadcast_to(obj, shape) 

548 return DatetimeArray._simple_new(right, dtype=right.dtype) 

549 

550 return Timestamp(obj) 

551 

552 elif isinstance(obj, np.timedelta64): 

553 if isna(obj): 

554 from pandas.core.arrays import TimedeltaArray 

555 

556 # wrapping timedelta64("NaT") in Timedelta returns NaT, 

557 # which would incorrectly be treated as a datetime-NaT, so 

558 # we broadcast and wrap in a TimedeltaArray 

559 # GH 52295 

560 if is_unitless(obj.dtype): 

561 obj = obj.astype("timedelta64[ns]") 

562 elif not is_supported_dtype(obj.dtype): 

563 new_dtype = get_supported_dtype(obj.dtype) 

564 obj = obj.astype(new_dtype) 

565 right = np.broadcast_to(obj, shape) 

566 return TimedeltaArray._simple_new(right, dtype=right.dtype) 

567 

568 # In particular non-nanosecond timedelta64 needs to be cast to 

569 # nanoseconds, or else we get undesired behavior like 

570 # np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D') 

571 return Timedelta(obj) 

572 

573 # We want NumPy numeric scalars to behave like Python scalars 

574 # post NEP 50 

575 elif isinstance(obj, np.integer): 

576 return int(obj) 

577 

578 elif isinstance(obj, np.floating): 

579 return float(obj) 

580 

581 return obj 

582 

583 

584_BOOL_OP_NOT_ALLOWED = { 

585 operator.truediv, 

586 roperator.rtruediv, 

587 operator.floordiv, 

588 roperator.rfloordiv, 

589 operator.pow, 

590 roperator.rpow, 

591} 

592 

593 

594def _bool_arith_check(op, a: np.ndarray, b): 

595 """ 

596 In contrast to numpy, pandas raises an error for certain operations 

597 with booleans. 

598 """ 

599 if op in _BOOL_OP_NOT_ALLOWED: 

600 if a.dtype.kind == "b" and (is_bool_dtype(b) or lib.is_bool(b)): 

601 op_name = op.__name__.strip("_").lstrip("r") 

602 raise NotImplementedError( 

603 f"operator '{op_name}' not implemented for bool dtypes" 

604 )