Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/arraylike.py: 41%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

220 statements  

1""" 

2Methods that can be shared by many array-like classes or subclasses: 

3 Series 

4 Index 

5 ExtensionArray 

6""" 

7from __future__ import annotations 

8 

9import operator 

10from typing import Any 

11 

12import numpy as np 

13 

14from pandas._libs import lib 

15from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op 

16 

17from pandas.core.dtypes.generic import ABCNDFrame 

18 

19from pandas.core import roperator 

20from pandas.core.construction import extract_array 

21from pandas.core.ops.common import unpack_zerodim_and_defer 

22 

23REDUCTION_ALIASES = { 

24 "maximum": "max", 

25 "minimum": "min", 

26 "add": "sum", 

27 "multiply": "prod", 

28} 

29 

30 

31class OpsMixin: 

32 # ------------------------------------------------------------- 

33 # Comparisons 

34 

35 def _cmp_method(self, other, op): 

36 return NotImplemented 

37 

38 @unpack_zerodim_and_defer("__eq__") 

39 def __eq__(self, other): 

40 return self._cmp_method(other, operator.eq) 

41 

42 @unpack_zerodim_and_defer("__ne__") 

43 def __ne__(self, other): 

44 return self._cmp_method(other, operator.ne) 

45 

46 @unpack_zerodim_and_defer("__lt__") 

47 def __lt__(self, other): 

48 return self._cmp_method(other, operator.lt) 

49 

50 @unpack_zerodim_and_defer("__le__") 

51 def __le__(self, other): 

52 return self._cmp_method(other, operator.le) 

53 

54 @unpack_zerodim_and_defer("__gt__") 

55 def __gt__(self, other): 

56 return self._cmp_method(other, operator.gt) 

57 

58 @unpack_zerodim_and_defer("__ge__") 

59 def __ge__(self, other): 

60 return self._cmp_method(other, operator.ge) 

61 

62 # ------------------------------------------------------------- 

63 # Logical Methods 

64 

65 def _logical_method(self, other, op): 

66 return NotImplemented 

67 

68 @unpack_zerodim_and_defer("__and__") 

69 def __and__(self, other): 

70 return self._logical_method(other, operator.and_) 

71 

72 @unpack_zerodim_and_defer("__rand__") 

73 def __rand__(self, other): 

74 return self._logical_method(other, roperator.rand_) 

75 

76 @unpack_zerodim_and_defer("__or__") 

77 def __or__(self, other): 

78 return self._logical_method(other, operator.or_) 

79 

80 @unpack_zerodim_and_defer("__ror__") 

81 def __ror__(self, other): 

82 return self._logical_method(other, roperator.ror_) 

83 

84 @unpack_zerodim_and_defer("__xor__") 

85 def __xor__(self, other): 

86 return self._logical_method(other, operator.xor) 

87 

88 @unpack_zerodim_and_defer("__rxor__") 

89 def __rxor__(self, other): 

90 return self._logical_method(other, roperator.rxor) 

91 

92 # ------------------------------------------------------------- 

93 # Arithmetic Methods 

94 

95 def _arith_method(self, other, op): 

96 return NotImplemented 

97 

98 @unpack_zerodim_and_defer("__add__") 

99 def __add__(self, other): 

100 """ 

101 Get Addition of DataFrame and other, column-wise. 

102 

103 Equivalent to ``DataFrame.add(other)``. 

104 

105 Parameters 

106 ---------- 

107 other : scalar, sequence, Series, dict or DataFrame 

108 Object to be added to the DataFrame. 

109 

110 Returns 

111 ------- 

112 DataFrame 

113 The result of adding ``other`` to DataFrame. 

114 

115 See Also 

116 -------- 

117 DataFrame.add : Add a DataFrame and another object, with option for index- 

118 or column-oriented addition. 

119 

120 Examples 

121 -------- 

122 >>> df = pd.DataFrame({'height': [1.5, 2.6], 'weight': [500, 800]}, 

123 ... index=['elk', 'moose']) 

124 >>> df 

125 height weight 

126 elk 1.5 500 

127 moose 2.6 800 

128 

129 Adding a scalar affects all rows and columns. 

130 

131 >>> df[['height', 'weight']] + 1.5 

132 height weight 

133 elk 3.0 501.5 

134 moose 4.1 801.5 

135 

136 Each element of a list is added to a column of the DataFrame, in order. 

137 

138 >>> df[['height', 'weight']] + [0.5, 1.5] 

139 height weight 

140 elk 2.0 501.5 

141 moose 3.1 801.5 

142 

143 Keys of a dictionary are aligned to the DataFrame, based on column names; 

144 each value in the dictionary is added to the corresponding column. 

145 

146 >>> df[['height', 'weight']] + {'height': 0.5, 'weight': 1.5} 

147 height weight 

148 elk 2.0 501.5 

149 moose 3.1 801.5 

150 

151 When `other` is a :class:`Series`, the index of `other` is aligned with the 

152 columns of the DataFrame. 

153 

154 >>> s1 = pd.Series([0.5, 1.5], index=['weight', 'height']) 

155 >>> df[['height', 'weight']] + s1 

156 height weight 

157 elk 3.0 500.5 

158 moose 4.1 800.5 

159 

160 Even when the index of `other` is the same as the index of the DataFrame, 

161 the :class:`Series` will not be reoriented. If index-wise alignment is desired, 

162 :meth:`DataFrame.add` should be used with `axis='index'`. 

163 

164 >>> s2 = pd.Series([0.5, 1.5], index=['elk', 'moose']) 

165 >>> df[['height', 'weight']] + s2 

166 elk height moose weight 

167 elk NaN NaN NaN NaN 

168 moose NaN NaN NaN NaN 

169 

170 >>> df[['height', 'weight']].add(s2, axis='index') 

171 height weight 

172 elk 2.0 500.5 

173 moose 4.1 801.5 

174 

175 When `other` is a :class:`DataFrame`, both columns names and the 

176 index are aligned. 

177 

178 >>> other = pd.DataFrame({'height': [0.2, 0.4, 0.6]}, 

179 ... index=['elk', 'moose', 'deer']) 

180 >>> df[['height', 'weight']] + other 

181 height weight 

182 deer NaN NaN 

183 elk 1.7 NaN 

184 moose 3.0 NaN 

185 """ 

186 return self._arith_method(other, operator.add) 

187 

188 @unpack_zerodim_and_defer("__radd__") 

189 def __radd__(self, other): 

190 return self._arith_method(other, roperator.radd) 

191 

192 @unpack_zerodim_and_defer("__sub__") 

193 def __sub__(self, other): 

194 return self._arith_method(other, operator.sub) 

195 

196 @unpack_zerodim_and_defer("__rsub__") 

197 def __rsub__(self, other): 

198 return self._arith_method(other, roperator.rsub) 

199 

200 @unpack_zerodim_and_defer("__mul__") 

201 def __mul__(self, other): 

202 return self._arith_method(other, operator.mul) 

203 

204 @unpack_zerodim_and_defer("__rmul__") 

205 def __rmul__(self, other): 

206 return self._arith_method(other, roperator.rmul) 

207 

208 @unpack_zerodim_and_defer("__truediv__") 

209 def __truediv__(self, other): 

210 return self._arith_method(other, operator.truediv) 

211 

212 @unpack_zerodim_and_defer("__rtruediv__") 

213 def __rtruediv__(self, other): 

214 return self._arith_method(other, roperator.rtruediv) 

215 

216 @unpack_zerodim_and_defer("__floordiv__") 

217 def __floordiv__(self, other): 

218 return self._arith_method(other, operator.floordiv) 

219 

220 @unpack_zerodim_and_defer("__rfloordiv") 

221 def __rfloordiv__(self, other): 

222 return self._arith_method(other, roperator.rfloordiv) 

223 

224 @unpack_zerodim_and_defer("__mod__") 

225 def __mod__(self, other): 

226 return self._arith_method(other, operator.mod) 

227 

228 @unpack_zerodim_and_defer("__rmod__") 

229 def __rmod__(self, other): 

230 return self._arith_method(other, roperator.rmod) 

231 

232 @unpack_zerodim_and_defer("__divmod__") 

233 def __divmod__(self, other): 

234 return self._arith_method(other, divmod) 

235 

236 @unpack_zerodim_and_defer("__rdivmod__") 

237 def __rdivmod__(self, other): 

238 return self._arith_method(other, roperator.rdivmod) 

239 

240 @unpack_zerodim_and_defer("__pow__") 

241 def __pow__(self, other): 

242 return self._arith_method(other, operator.pow) 

243 

244 @unpack_zerodim_and_defer("__rpow__") 

245 def __rpow__(self, other): 

246 return self._arith_method(other, roperator.rpow) 

247 

248 

249# ----------------------------------------------------------------------------- 

250# Helpers to implement __array_ufunc__ 

251 

252 

253def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any): 

254 """ 

255 Compatibility with numpy ufuncs. 

256 

257 See also 

258 -------- 

259 numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__ 

260 """ 

261 from pandas.core.frame import ( 

262 DataFrame, 

263 Series, 

264 ) 

265 from pandas.core.generic import NDFrame 

266 from pandas.core.internals import ( 

267 ArrayManager, 

268 BlockManager, 

269 ) 

270 

271 cls = type(self) 

272 

273 kwargs = _standardize_out_kwarg(**kwargs) 

274 

275 # for binary ops, use our custom dunder methods 

276 result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs) 

277 if result is not NotImplemented: 

278 return result 

279 

280 # Determine if we should defer. 

281 no_defer = ( 

282 np.ndarray.__array_ufunc__, 

283 cls.__array_ufunc__, 

284 ) 

285 

286 for item in inputs: 

287 higher_priority = ( 

288 hasattr(item, "__array_priority__") 

289 and item.__array_priority__ > self.__array_priority__ 

290 ) 

291 has_array_ufunc = ( 

292 hasattr(item, "__array_ufunc__") 

293 and type(item).__array_ufunc__ not in no_defer 

294 and not isinstance(item, self._HANDLED_TYPES) 

295 ) 

296 if higher_priority or has_array_ufunc: 

297 return NotImplemented 

298 

299 # align all the inputs. 

300 types = tuple(type(x) for x in inputs) 

301 alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)] 

302 

303 if len(alignable) > 1: 

304 # This triggers alignment. 

305 # At the moment, there aren't any ufuncs with more than two inputs 

306 # so this ends up just being x1.index | x2.index, but we write 

307 # it to handle *args. 

308 set_types = set(types) 

309 if len(set_types) > 1 and {DataFrame, Series}.issubset(set_types): 

310 # We currently don't handle ufunc(DataFrame, Series) 

311 # well. Previously this raised an internal ValueError. We might 

312 # support it someday, so raise a NotImplementedError. 

313 raise NotImplementedError( 

314 f"Cannot apply ufunc {ufunc} to mixed DataFrame and Series inputs." 

315 ) 

316 axes = self.axes 

317 for obj in alignable[1:]: 

318 # this relies on the fact that we aren't handling mixed 

319 # series / frame ufuncs. 

320 for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)): 

321 axes[i] = ax1.union(ax2) 

322 

323 reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes)) 

324 inputs = tuple( 

325 x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x 

326 for x, t in zip(inputs, types) 

327 ) 

328 else: 

329 reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes)) 

330 

331 if self.ndim == 1: 

332 names = [getattr(x, "name") for x in inputs if hasattr(x, "name")] 

333 name = names[0] if len(set(names)) == 1 else None 

334 reconstruct_kwargs = {"name": name} 

335 else: 

336 reconstruct_kwargs = {} 

337 

338 def reconstruct(result): 

339 if ufunc.nout > 1: 

340 # np.modf, np.frexp, np.divmod 

341 return tuple(_reconstruct(x) for x in result) 

342 

343 return _reconstruct(result) 

344 

345 def _reconstruct(result): 

346 if lib.is_scalar(result): 

347 return result 

348 

349 if result.ndim != self.ndim: 

350 if method == "outer": 

351 raise NotImplementedError 

352 return result 

353 if isinstance(result, (BlockManager, ArrayManager)): 

354 # we went through BlockManager.apply e.g. np.sqrt 

355 result = self._constructor_from_mgr(result, axes=result.axes) 

356 else: 

357 # we converted an array, lost our axes 

358 result = self._constructor( 

359 result, **reconstruct_axes, **reconstruct_kwargs, copy=False 

360 ) 

361 # TODO: When we support multiple values in __finalize__, this 

362 # should pass alignable to `__finalize__` instead of self. 

363 # Then `np.add(a, b)` would consider attrs from both a and b 

364 # when a and b are NDFrames. 

365 if len(alignable) == 1: 

366 result = result.__finalize__(self) 

367 return result 

368 

369 if "out" in kwargs: 

370 # e.g. test_multiindex_get_loc 

371 result = dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs) 

372 return reconstruct(result) 

373 

374 if method == "reduce": 

375 # e.g. test.series.test_ufunc.test_reduce 

376 result = dispatch_reduction_ufunc(self, ufunc, method, *inputs, **kwargs) 

377 if result is not NotImplemented: 

378 return result 

379 

380 # We still get here with kwargs `axis` for e.g. np.maximum.accumulate 

381 # and `dtype` and `keepdims` for np.ptp 

382 

383 if self.ndim > 1 and (len(inputs) > 1 or ufunc.nout > 1): 

384 # Just give up on preserving types in the complex case. 

385 # In theory we could preserve them for them. 

386 # * nout>1 is doable if BlockManager.apply took nout and 

387 # returned a Tuple[BlockManager]. 

388 # * len(inputs) > 1 is doable when we know that we have 

389 # aligned blocks / dtypes. 

390 

391 # e.g. my_ufunc, modf, logaddexp, heaviside, subtract, add 

392 inputs = tuple(np.asarray(x) for x in inputs) 

393 # Note: we can't use default_array_ufunc here bc reindexing means 

394 # that `self` may not be among `inputs` 

395 result = getattr(ufunc, method)(*inputs, **kwargs) 

396 elif self.ndim == 1: 

397 # ufunc(series, ...) 

398 inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs) 

399 result = getattr(ufunc, method)(*inputs, **kwargs) 

400 else: 

401 # ufunc(dataframe) 

402 if method == "__call__" and not kwargs: 

403 # for np.<ufunc>(..) calls 

404 # kwargs cannot necessarily be handled block-by-block, so only 

405 # take this path if there are no kwargs 

406 mgr = inputs[0]._mgr 

407 result = mgr.apply(getattr(ufunc, method)) 

408 else: 

409 # otherwise specific ufunc methods (eg np.<ufunc>.accumulate(..)) 

410 # Those can have an axis keyword and thus can't be called block-by-block 

411 result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs) 

412 # e.g. np.negative (only one reached), with "where" and "out" in kwargs 

413 

414 result = reconstruct(result) 

415 return result 

416 

417 

418def _standardize_out_kwarg(**kwargs) -> dict: 

419 """ 

420 If kwargs contain "out1" and "out2", replace that with a tuple "out" 

421 

422 np.divmod, np.modf, np.frexp can have either `out=(out1, out2)` or 

423 `out1=out1, out2=out2)` 

424 """ 

425 if "out" not in kwargs and "out1" in kwargs and "out2" in kwargs: 

426 out1 = kwargs.pop("out1") 

427 out2 = kwargs.pop("out2") 

428 out = (out1, out2) 

429 kwargs["out"] = out 

430 return kwargs 

431 

432 

433def dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): 

434 """ 

435 If we have an `out` keyword, then call the ufunc without `out` and then 

436 set the result into the given `out`. 

437 """ 

438 

439 # Note: we assume _standardize_out_kwarg has already been called. 

440 out = kwargs.pop("out") 

441 where = kwargs.pop("where", None) 

442 

443 result = getattr(ufunc, method)(*inputs, **kwargs) 

444 

445 if result is NotImplemented: 

446 return NotImplemented 

447 

448 if isinstance(result, tuple): 

449 # i.e. np.divmod, np.modf, np.frexp 

450 if not isinstance(out, tuple) or len(out) != len(result): 

451 raise NotImplementedError 

452 

453 for arr, res in zip(out, result): 

454 _assign_where(arr, res, where) 

455 

456 return out 

457 

458 if isinstance(out, tuple): 

459 if len(out) == 1: 

460 out = out[0] 

461 else: 

462 raise NotImplementedError 

463 

464 _assign_where(out, result, where) 

465 return out 

466 

467 

468def _assign_where(out, result, where) -> None: 

469 """ 

470 Set a ufunc result into 'out', masking with a 'where' argument if necessary. 

471 """ 

472 if where is None: 

473 # no 'where' arg passed to ufunc 

474 out[:] = result 

475 else: 

476 np.putmask(out, where, result) 

477 

478 

479def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): 

480 """ 

481 Fallback to the behavior we would get if we did not define __array_ufunc__. 

482 

483 Notes 

484 ----- 

485 We are assuming that `self` is among `inputs`. 

486 """ 

487 if not any(x is self for x in inputs): 

488 raise NotImplementedError 

489 

490 new_inputs = [x if x is not self else np.asarray(x) for x in inputs] 

491 

492 return getattr(ufunc, method)(*new_inputs, **kwargs) 

493 

494 

495def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): 

496 """ 

497 Dispatch ufunc reductions to self's reduction methods. 

498 """ 

499 assert method == "reduce" 

500 

501 if len(inputs) != 1 or inputs[0] is not self: 

502 return NotImplemented 

503 

504 if ufunc.__name__ not in REDUCTION_ALIASES: 

505 return NotImplemented 

506 

507 method_name = REDUCTION_ALIASES[ufunc.__name__] 

508 

509 # NB: we are assuming that min/max represent minimum/maximum methods, 

510 # which would not be accurate for e.g. Timestamp.min 

511 if not hasattr(self, method_name): 

512 return NotImplemented 

513 

514 if self.ndim > 1: 

515 if isinstance(self, ABCNDFrame): 

516 # TODO: test cases where this doesn't hold, i.e. 2D DTA/TDA 

517 kwargs["numeric_only"] = False 

518 

519 if "axis" not in kwargs: 

520 # For DataFrame reductions we don't want the default axis=0 

521 # Note: np.min is not a ufunc, but uses array_function_dispatch, 

522 # so calls DataFrame.min (without ever getting here) with the np.min 

523 # default of axis=None, which DataFrame.min catches and changes to axis=0. 

524 # np.minimum.reduce(df) gets here bc axis is not in kwargs, 

525 # so we set axis=0 to match the behaviorof np.minimum.reduce(df.values) 

526 kwargs["axis"] = 0 

527 

528 # By default, numpy's reductions do not skip NaNs, so we have to 

529 # pass skipna=False 

530 return getattr(self, method_name)(skipna=False, **kwargs)