Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/ops/__init__.py: 27%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

184 statements  

1""" 

2Arithmetic operations for PandasObjects 

3 

4This is not a public API. 

5""" 

6from __future__ import annotations 

7 

8import operator 

9from typing import ( 

10 TYPE_CHECKING, 

11 cast, 

12) 

13 

14import numpy as np 

15 

16from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op 

17from pandas._typing import ( 

18 Axis, 

19 AxisInt, 

20 Level, 

21) 

22from pandas.util._decorators import Appender 

23 

24from pandas.core.dtypes.common import ( 

25 is_array_like, 

26 is_list_like, 

27) 

28from pandas.core.dtypes.generic import ( 

29 ABCDataFrame, 

30 ABCSeries, 

31) 

32from pandas.core.dtypes.missing import isna 

33 

34from pandas.core import ( 

35 algorithms, 

36 roperator, 

37) 

38from pandas.core.ops.array_ops import ( 

39 arithmetic_op, 

40 comp_method_OBJECT_ARRAY, 

41 comparison_op, 

42 get_array_op, 

43 logical_op, 

44 maybe_prepare_scalar_for_op, 

45) 

46from pandas.core.ops.common import ( 

47 get_op_result_name, 

48 unpack_zerodim_and_defer, 

49) 

50from pandas.core.ops.docstrings import ( 

51 _flex_comp_doc_FRAME, 

52 _op_descriptions, 

53 make_flex_doc, 

54) 

55from pandas.core.ops.invalid import invalid_comparison 

56from pandas.core.ops.mask_ops import ( 

57 kleene_and, 

58 kleene_or, 

59 kleene_xor, 

60) 

61from pandas.core.ops.methods import add_flex_arithmetic_methods 

62from pandas.core.roperator import ( 

63 radd, 

64 rand_, 

65 rdiv, 

66 rdivmod, 

67 rfloordiv, 

68 rmod, 

69 rmul, 

70 ror_, 

71 rpow, 

72 rsub, 

73 rtruediv, 

74 rxor, 

75) 

76 

77if TYPE_CHECKING: 

78 from pandas import ( 

79 DataFrame, 

80 Series, 

81 ) 

82 

83# ----------------------------------------------------------------------------- 

84# constants 

85ARITHMETIC_BINOPS: set[str] = { 

86 "add", 

87 "sub", 

88 "mul", 

89 "pow", 

90 "mod", 

91 "floordiv", 

92 "truediv", 

93 "divmod", 

94 "radd", 

95 "rsub", 

96 "rmul", 

97 "rpow", 

98 "rmod", 

99 "rfloordiv", 

100 "rtruediv", 

101 "rdivmod", 

102} 

103 

104 

105COMPARISON_BINOPS: set[str] = {"eq", "ne", "lt", "gt", "le", "ge"} 

106 

107 

108# ----------------------------------------------------------------------------- 

109# Masking NA values and fallbacks for operations numpy does not support 

110 

111 

112def fill_binop(left, right, fill_value): 

113 """ 

114 If a non-None fill_value is given, replace null entries in left and right 

115 with this value, but only in positions where _one_ of left/right is null, 

116 not both. 

117 

118 Parameters 

119 ---------- 

120 left : array-like 

121 right : array-like 

122 fill_value : object 

123 

124 Returns 

125 ------- 

126 left : array-like 

127 right : array-like 

128 

129 Notes 

130 ----- 

131 Makes copies if fill_value is not None and NAs are present. 

132 """ 

133 if fill_value is not None: 

134 left_mask = isna(left) 

135 right_mask = isna(right) 

136 

137 # one but not both 

138 mask = left_mask ^ right_mask 

139 

140 if left_mask.any(): 

141 # Avoid making a copy if we can 

142 left = left.copy() 

143 left[left_mask & mask] = fill_value 

144 

145 if right_mask.any(): 

146 # Avoid making a copy if we can 

147 right = right.copy() 

148 right[right_mask & mask] = fill_value 

149 

150 return left, right 

151 

152 

153# ----------------------------------------------------------------------------- 

154# Series 

155 

156 

157def align_method_SERIES(left: Series, right, align_asobject: bool = False): 

158 """align lhs and rhs Series""" 

159 # ToDo: Different from align_method_FRAME, list, tuple and ndarray 

160 # are not coerced here 

161 # because Series has inconsistencies described in #13637 

162 

163 if isinstance(right, ABCSeries): 

164 # avoid repeated alignment 

165 if not left.index.equals(right.index): 

166 if align_asobject: 

167 # to keep original value's dtype for bool ops 

168 left = left.astype(object) 

169 right = right.astype(object) 

170 

171 left, right = left.align(right, copy=False) 

172 

173 return left, right 

174 

175 

176def flex_method_SERIES(op): 

177 name = op.__name__.strip("_") 

178 doc = make_flex_doc(name, "series") 

179 

180 @Appender(doc) 

181 def flex_wrapper(self, other, level=None, fill_value=None, axis: Axis = 0): 

182 # validate axis 

183 if axis is not None: 

184 self._get_axis_number(axis) 

185 

186 res_name = get_op_result_name(self, other) 

187 

188 if isinstance(other, ABCSeries): 

189 return self._binop(other, op, level=level, fill_value=fill_value) 

190 elif isinstance(other, (np.ndarray, list, tuple)): 

191 if len(other) != len(self): 

192 raise ValueError("Lengths must be equal") 

193 other = self._constructor(other, self.index) 

194 result = self._binop(other, op, level=level, fill_value=fill_value) 

195 result.name = res_name 

196 return result 

197 else: 

198 if fill_value is not None: 

199 self = self.fillna(fill_value) 

200 

201 return op(self, other) 

202 

203 flex_wrapper.__name__ = name 

204 return flex_wrapper 

205 

206 

207# ----------------------------------------------------------------------------- 

208# DataFrame 

209 

210 

211def align_method_FRAME( 

212 left, right, axis, flex: bool | None = False, level: Level = None 

213): 

214 """ 

215 Convert rhs to meet lhs dims if input is list, tuple or np.ndarray. 

216 

217 Parameters 

218 ---------- 

219 left : DataFrame 

220 right : Any 

221 axis : int, str, or None 

222 flex : bool or None, default False 

223 Whether this is a flex op, in which case we reindex. 

224 None indicates not to check for alignment. 

225 level : int or level name, default None 

226 

227 Returns 

228 ------- 

229 left : DataFrame 

230 right : Any 

231 """ 

232 

233 def to_series(right): 

234 msg = "Unable to coerce to Series, length must be {req_len}: given {given_len}" 

235 

236 # pass dtype to avoid doing inference, which would break consistency 

237 # with Index/Series ops 

238 dtype = None 

239 if getattr(right, "dtype", None) == object: 

240 # can't pass right.dtype unconditionally as that would break on e.g. 

241 # datetime64[h] ndarray 

242 dtype = object 

243 

244 if axis is not None and left._get_axis_name(axis) == "index": 

245 if len(left.index) != len(right): 

246 raise ValueError( 

247 msg.format(req_len=len(left.index), given_len=len(right)) 

248 ) 

249 right = left._constructor_sliced(right, index=left.index, dtype=dtype) 

250 else: 

251 if len(left.columns) != len(right): 

252 raise ValueError( 

253 msg.format(req_len=len(left.columns), given_len=len(right)) 

254 ) 

255 right = left._constructor_sliced(right, index=left.columns, dtype=dtype) 

256 return right 

257 

258 if isinstance(right, np.ndarray): 

259 if right.ndim == 1: 

260 right = to_series(right) 

261 

262 elif right.ndim == 2: 

263 # We need to pass dtype=right.dtype to retain object dtype 

264 # otherwise we lose consistency with Index and array ops 

265 dtype = None 

266 if getattr(right, "dtype", None) == object: 

267 # can't pass right.dtype unconditionally as that would break on e.g. 

268 # datetime64[h] ndarray 

269 dtype = object 

270 

271 if right.shape == left.shape: 

272 right = left._constructor( 

273 right, index=left.index, columns=left.columns, dtype=dtype 

274 ) 

275 

276 elif right.shape[0] == left.shape[0] and right.shape[1] == 1: 

277 # Broadcast across columns 

278 right = np.broadcast_to(right, left.shape) 

279 right = left._constructor( 

280 right, index=left.index, columns=left.columns, dtype=dtype 

281 ) 

282 

283 elif right.shape[1] == left.shape[1] and right.shape[0] == 1: 

284 # Broadcast along rows 

285 right = to_series(right[0, :]) 

286 

287 else: 

288 raise ValueError( 

289 "Unable to coerce to DataFrame, shape " 

290 f"must be {left.shape}: given {right.shape}" 

291 ) 

292 

293 elif right.ndim > 2: 

294 raise ValueError( 

295 "Unable to coerce to Series/DataFrame, " 

296 f"dimension must be <= 2: {right.shape}" 

297 ) 

298 

299 elif is_list_like(right) and not isinstance(right, (ABCSeries, ABCDataFrame)): 

300 # GH 36702. Raise when attempting arithmetic with list of array-like. 

301 if any(is_array_like(el) for el in right): 

302 raise ValueError( 

303 f"Unable to coerce list of {type(right[0])} to Series/DataFrame" 

304 ) 

305 # GH17901 

306 right = to_series(right) 

307 

308 if flex is not None and isinstance(right, ABCDataFrame): 

309 if not left._indexed_same(right): 

310 if flex: 

311 left, right = left.align(right, join="outer", level=level, copy=False) 

312 else: 

313 raise ValueError( 

314 "Can only compare identically-labeled (both index and columns) " 

315 "DataFrame objects" 

316 ) 

317 elif isinstance(right, ABCSeries): 

318 # axis=1 is default for DataFrame-with-Series op 

319 axis = left._get_axis_number(axis) if axis is not None else 1 

320 

321 if not flex: 

322 if not left.axes[axis].equals(right.index): 

323 raise ValueError( 

324 "Operands are not aligned. Do " 

325 "`left, right = left.align(right, axis=1, copy=False)` " 

326 "before operating." 

327 ) 

328 

329 left, right = left.align( 

330 right, join="outer", axis=axis, level=level, copy=False 

331 ) 

332 right = _maybe_align_series_as_frame(left, right, axis) 

333 

334 return left, right 

335 

336 

337def should_reindex_frame_op( 

338 left: DataFrame, right, op, axis: int, fill_value, level 

339) -> bool: 

340 """ 

341 Check if this is an operation between DataFrames that will need to reindex. 

342 """ 

343 assert isinstance(left, ABCDataFrame) 

344 

345 if op is operator.pow or op is roperator.rpow: 

346 # GH#32685 pow has special semantics for operating with null values 

347 return False 

348 

349 if not isinstance(right, ABCDataFrame): 

350 return False 

351 

352 if fill_value is None and level is None and axis == 1: 

353 # TODO: any other cases we should handle here? 

354 

355 # Intersection is always unique so we have to check the unique columns 

356 left_uniques = left.columns.unique() 

357 right_uniques = right.columns.unique() 

358 cols = left_uniques.intersection(right_uniques) 

359 if len(cols) and not ( 

360 len(cols) == len(left_uniques) and len(cols) == len(right_uniques) 

361 ): 

362 # TODO: is there a shortcut available when len(cols) == 0? 

363 return True 

364 

365 return False 

366 

367 

368def frame_arith_method_with_reindex(left: DataFrame, right: DataFrame, op) -> DataFrame: 

369 """ 

370 For DataFrame-with-DataFrame operations that require reindexing, 

371 operate only on shared columns, then reindex. 

372 

373 Parameters 

374 ---------- 

375 left : DataFrame 

376 right : DataFrame 

377 op : binary operator 

378 

379 Returns 

380 ------- 

381 DataFrame 

382 """ 

383 # GH#31623, only operate on shared columns 

384 cols, lcols, rcols = left.columns.join( 

385 right.columns, how="inner", level=None, return_indexers=True 

386 ) 

387 

388 new_left = left.iloc[:, lcols] 

389 new_right = right.iloc[:, rcols] 

390 result = op(new_left, new_right) 

391 

392 # Do the join on the columns instead of using align_method_FRAME 

393 # to avoid constructing two potentially large/sparse DataFrames 

394 join_columns, _, _ = left.columns.join( 

395 right.columns, how="outer", level=None, return_indexers=True 

396 ) 

397 

398 if result.columns.has_duplicates: 

399 # Avoid reindexing with a duplicate axis. 

400 # https://github.com/pandas-dev/pandas/issues/35194 

401 indexer, _ = result.columns.get_indexer_non_unique(join_columns) 

402 indexer = algorithms.unique1d(indexer) 

403 result = result._reindex_with_indexers( 

404 {1: [join_columns, indexer]}, allow_dups=True 

405 ) 

406 else: 

407 result = result.reindex(join_columns, axis=1) 

408 

409 return result 

410 

411 

412def _maybe_align_series_as_frame(frame: DataFrame, series: Series, axis: AxisInt): 

413 """ 

414 If the Series operand is not EA-dtype, we can broadcast to 2D and operate 

415 blockwise. 

416 """ 

417 rvalues = series._values 

418 if not isinstance(rvalues, np.ndarray): 

419 # TODO(EA2D): no need to special-case with 2D EAs 

420 if rvalues.dtype in ("datetime64[ns]", "timedelta64[ns]"): 

421 # We can losslessly+cheaply cast to ndarray 

422 rvalues = np.asarray(rvalues) 

423 else: 

424 return series 

425 

426 if axis == 0: 

427 rvalues = rvalues.reshape(-1, 1) 

428 else: 

429 rvalues = rvalues.reshape(1, -1) 

430 

431 rvalues = np.broadcast_to(rvalues, frame.shape) 

432 # pass dtype to avoid doing inference 

433 return type(frame)( 

434 rvalues, index=frame.index, columns=frame.columns, dtype=rvalues.dtype 

435 ) 

436 

437 

438def flex_arith_method_FRAME(op): 

439 op_name = op.__name__.strip("_") 

440 

441 na_op = get_array_op(op) 

442 doc = make_flex_doc(op_name, "dataframe") 

443 

444 @Appender(doc) 

445 def f(self, other, axis: Axis = "columns", level=None, fill_value=None): 

446 axis = self._get_axis_number(axis) if axis is not None else 1 

447 axis = cast(int, axis) 

448 

449 if should_reindex_frame_op(self, other, op, axis, fill_value, level): 

450 return frame_arith_method_with_reindex(self, other, op) 

451 

452 if isinstance(other, ABCSeries) and fill_value is not None: 

453 # TODO: We could allow this in cases where we end up going 

454 # through the DataFrame path 

455 raise NotImplementedError(f"fill_value {fill_value} not supported.") 

456 

457 other = maybe_prepare_scalar_for_op(other, self.shape) 

458 self, other = align_method_FRAME(self, other, axis, flex=True, level=level) 

459 

460 if isinstance(other, ABCDataFrame): 

461 # Another DataFrame 

462 new_data = self._combine_frame(other, na_op, fill_value) 

463 

464 elif isinstance(other, ABCSeries): 

465 new_data = self._dispatch_frame_op(other, op, axis=axis) 

466 else: 

467 # in this case we always have `np.ndim(other) == 0` 

468 if fill_value is not None: 

469 self = self.fillna(fill_value) 

470 

471 new_data = self._dispatch_frame_op(other, op) 

472 

473 return self._construct_result(new_data) 

474 

475 f.__name__ = op_name 

476 

477 return f 

478 

479 

480def flex_comp_method_FRAME(op): 

481 op_name = op.__name__.strip("_") 

482 

483 doc = _flex_comp_doc_FRAME.format( 

484 op_name=op_name, desc=_op_descriptions[op_name]["desc"] 

485 ) 

486 

487 @Appender(doc) 

488 def f(self, other, axis: Axis = "columns", level=None): 

489 axis = self._get_axis_number(axis) if axis is not None else 1 

490 

491 self, other = align_method_FRAME(self, other, axis, flex=True, level=level) 

492 

493 new_data = self._dispatch_frame_op(other, op, axis=axis) 

494 return self._construct_result(new_data) 

495 

496 f.__name__ = op_name 

497 

498 return f 

499 

500 

501__all__ = [ 

502 "add_flex_arithmetic_methods", 

503 "align_method_FRAME", 

504 "align_method_SERIES", 

505 "ARITHMETIC_BINOPS", 

506 "arithmetic_op", 

507 "COMPARISON_BINOPS", 

508 "comparison_op", 

509 "comp_method_OBJECT_ARRAY", 

510 "fill_binop", 

511 "flex_arith_method_FRAME", 

512 "flex_comp_method_FRAME", 

513 "flex_method_SERIES", 

514 "frame_arith_method_with_reindex", 

515 "invalid_comparison", 

516 "kleene_and", 

517 "kleene_or", 

518 "kleene_xor", 

519 "logical_op", 

520 "maybe_dispatch_ufunc_to_dunder_op", 

521 "radd", 

522 "rand_", 

523 "rdiv", 

524 "rdivmod", 

525 "rfloordiv", 

526 "rmod", 

527 "rmul", 

528 "ror_", 

529 "rpow", 

530 "rsub", 

531 "rtruediv", 

532 "rxor", 

533 "should_reindex_frame_op", 

534 "unpack_zerodim_and_defer", 

535]