Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/arraylike.py: 35%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

220 statements  

1""" 

2Methods that can be shared by many array-like classes or subclasses: 

3 Series 

4 Index 

5 ExtensionArray 

6""" 

7from __future__ import annotations 

8 

9import operator 

10from typing import Any 

11 

12import numpy as np 

13 

14from pandas._libs import lib 

15from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op 

16 

17from pandas.core.dtypes.generic import ABCNDFrame 

18 

19from pandas.core import roperator 

20from pandas.core.construction import extract_array 

21from pandas.core.ops.common import unpack_zerodim_and_defer 

22 

23REDUCTION_ALIASES = { 

24 "maximum": "max", 

25 "minimum": "min", 

26 "add": "sum", 

27 "multiply": "prod", 

28} 

29 

30 

31class OpsMixin: 

32 # ------------------------------------------------------------- 

33 # Comparisons 

34 

35 def _cmp_method(self, other, op): 

36 return NotImplemented 

37 

38 @unpack_zerodim_and_defer("__eq__") 

39 def __eq__(self, other): 

40 return self._cmp_method(other, operator.eq) 

41 

42 @unpack_zerodim_and_defer("__ne__") 

43 def __ne__(self, other): 

44 return self._cmp_method(other, operator.ne) 

45 

46 @unpack_zerodim_and_defer("__lt__") 

47 def __lt__(self, other): 

48 return self._cmp_method(other, operator.lt) 

49 

50 @unpack_zerodim_and_defer("__le__") 

51 def __le__(self, other): 

52 return self._cmp_method(other, operator.le) 

53 

54 @unpack_zerodim_and_defer("__gt__") 

55 def __gt__(self, other): 

56 return self._cmp_method(other, operator.gt) 

57 

58 @unpack_zerodim_and_defer("__ge__") 

59 def __ge__(self, other): 

60 return self._cmp_method(other, operator.ge) 

61 

62 # ------------------------------------------------------------- 

63 # Logical Methods 

64 

65 def _logical_method(self, other, op): 

66 return NotImplemented 

67 

68 @unpack_zerodim_and_defer("__and__") 

69 def __and__(self, other): 

70 return self._logical_method(other, operator.and_) 

71 

72 @unpack_zerodim_and_defer("__rand__") 

73 def __rand__(self, other): 

74 return self._logical_method(other, roperator.rand_) 

75 

76 @unpack_zerodim_and_defer("__or__") 

77 def __or__(self, other): 

78 return self._logical_method(other, operator.or_) 

79 

80 @unpack_zerodim_and_defer("__ror__") 

81 def __ror__(self, other): 

82 return self._logical_method(other, roperator.ror_) 

83 

84 @unpack_zerodim_and_defer("__xor__") 

85 def __xor__(self, other): 

86 return self._logical_method(other, operator.xor) 

87 

88 @unpack_zerodim_and_defer("__rxor__") 

89 def __rxor__(self, other): 

90 return self._logical_method(other, roperator.rxor) 

91 

92 # ------------------------------------------------------------- 

93 # Arithmetic Methods 

94 

95 def _arith_method(self, other, op): 

96 return NotImplemented 

97 

98 @unpack_zerodim_and_defer("__add__") 

99 def __add__(self, other): 

100 """ 

101 Get Addition of DataFrame and other, column-wise. 

102 

103 Equivalent to ``DataFrame.add(other)``. 

104 

105 Parameters 

106 ---------- 

107 other : scalar, sequence, Series, dict or DataFrame 

108 Object to be added to the DataFrame. 

109 

110 Returns 

111 ------- 

112 DataFrame 

113 The result of adding ``other`` to DataFrame. 

114 

115 See Also 

116 -------- 

117 DataFrame.add : Add a DataFrame and another object, with option for index- 

118 or column-oriented addition. 

119 

120 Examples 

121 -------- 

122 >>> df = pd.DataFrame({'height': [1.5, 2.6], 'weight': [500, 800]}, 

123 ... index=['elk', 'moose']) 

124 >>> df 

125 height weight 

126 elk 1.5 500 

127 moose 2.6 800 

128 

129 Adding a scalar affects all rows and columns. 

130 

131 >>> df[['height', 'weight']] + 1.5 

132 height weight 

133 elk 3.0 501.5 

134 moose 4.1 801.5 

135 

136 Each element of a list is added to a column of the DataFrame, in order. 

137 

138 >>> df[['height', 'weight']] + [0.5, 1.5] 

139 height weight 

140 elk 2.0 501.5 

141 moose 3.1 801.5 

142 

143 Keys of a dictionary are aligned to the DataFrame, based on column names; 

144 each value in the dictionary is added to the corresponding column. 

145 

146 >>> df[['height', 'weight']] + {'height': 0.5, 'weight': 1.5} 

147 height weight 

148 elk 2.0 501.5 

149 moose 3.1 801.5 

150 

151 When `other` is a :class:`Series`, the index of `other` is aligned with the 

152 columns of the DataFrame. 

153 

154 >>> s1 = pd.Series([0.5, 1.5], index=['weight', 'height']) 

155 >>> df[['height', 'weight']] + s1 

156 height weight 

157 elk 3.0 500.5 

158 moose 4.1 800.5 

159 

160 Even when the index of `other` is the same as the index of the DataFrame, 

161 the :class:`Series` will not be reoriented. If index-wise alignment is desired, 

162 :meth:`DataFrame.add` should be used with `axis='index'`. 

163 

164 >>> s2 = pd.Series([0.5, 1.5], index=['elk', 'moose']) 

165 >>> df[['height', 'weight']] + s2 

166 elk height moose weight 

167 elk NaN NaN NaN NaN 

168 moose NaN NaN NaN NaN 

169 

170 >>> df[['height', 'weight']].add(s2, axis='index') 

171 height weight 

172 elk 2.0 500.5 

173 moose 4.1 801.5 

174 

175 When `other` is a :class:`DataFrame`, both columns names and the 

176 index are aligned. 

177 

178 >>> other = pd.DataFrame({'height': [0.2, 0.4, 0.6]}, 

179 ... index=['elk', 'moose', 'deer']) 

180 >>> df[['height', 'weight']] + other 

181 height weight 

182 deer NaN NaN 

183 elk 1.7 NaN 

184 moose 3.0 NaN 

185 """ 

186 return self._arith_method(other, operator.add) 

187 

188 @unpack_zerodim_and_defer("__radd__") 

189 def __radd__(self, other): 

190 return self._arith_method(other, roperator.radd) 

191 

192 @unpack_zerodim_and_defer("__sub__") 

193 def __sub__(self, other): 

194 return self._arith_method(other, operator.sub) 

195 

196 @unpack_zerodim_and_defer("__rsub__") 

197 def __rsub__(self, other): 

198 return self._arith_method(other, roperator.rsub) 

199 

200 @unpack_zerodim_and_defer("__mul__") 

201 def __mul__(self, other): 

202 return self._arith_method(other, operator.mul) 

203 

204 @unpack_zerodim_and_defer("__rmul__") 

205 def __rmul__(self, other): 

206 return self._arith_method(other, roperator.rmul) 

207 

208 @unpack_zerodim_and_defer("__truediv__") 

209 def __truediv__(self, other): 

210 return self._arith_method(other, operator.truediv) 

211 

212 @unpack_zerodim_and_defer("__rtruediv__") 

213 def __rtruediv__(self, other): 

214 return self._arith_method(other, roperator.rtruediv) 

215 

216 @unpack_zerodim_and_defer("__floordiv__") 

217 def __floordiv__(self, other): 

218 return self._arith_method(other, operator.floordiv) 

219 

220 @unpack_zerodim_and_defer("__rfloordiv") 

221 def __rfloordiv__(self, other): 

222 return self._arith_method(other, roperator.rfloordiv) 

223 

224 @unpack_zerodim_and_defer("__mod__") 

225 def __mod__(self, other): 

226 return self._arith_method(other, operator.mod) 

227 

228 @unpack_zerodim_and_defer("__rmod__") 

229 def __rmod__(self, other): 

230 return self._arith_method(other, roperator.rmod) 

231 

232 @unpack_zerodim_and_defer("__divmod__") 

233 def __divmod__(self, other): 

234 return self._arith_method(other, divmod) 

235 

236 @unpack_zerodim_and_defer("__rdivmod__") 

237 def __rdivmod__(self, other): 

238 return self._arith_method(other, roperator.rdivmod) 

239 

240 @unpack_zerodim_and_defer("__pow__") 

241 def __pow__(self, other): 

242 return self._arith_method(other, operator.pow) 

243 

244 @unpack_zerodim_and_defer("__rpow__") 

245 def __rpow__(self, other): 

246 return self._arith_method(other, roperator.rpow) 

247 

248 

249# ----------------------------------------------------------------------------- 

250# Helpers to implement __array_ufunc__ 

251 

252 

253def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any): 

254 """ 

255 Compatibility with numpy ufuncs. 

256 

257 See also 

258 -------- 

259 numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__ 

260 """ 

261 from pandas.core.frame import ( 

262 DataFrame, 

263 Series, 

264 ) 

265 from pandas.core.generic import NDFrame 

266 from pandas.core.internals import BlockManager 

267 

268 cls = type(self) 

269 

270 kwargs = _standardize_out_kwarg(**kwargs) 

271 

272 # for binary ops, use our custom dunder methods 

273 result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs) 

274 if result is not NotImplemented: 

275 return result 

276 

277 # Determine if we should defer. 

278 no_defer = ( 

279 np.ndarray.__array_ufunc__, 

280 cls.__array_ufunc__, 

281 ) 

282 

283 for item in inputs: 

284 higher_priority = ( 

285 hasattr(item, "__array_priority__") 

286 and item.__array_priority__ > self.__array_priority__ 

287 ) 

288 has_array_ufunc = ( 

289 hasattr(item, "__array_ufunc__") 

290 and type(item).__array_ufunc__ not in no_defer 

291 and not isinstance(item, self._HANDLED_TYPES) 

292 ) 

293 if higher_priority or has_array_ufunc: 

294 return NotImplemented 

295 

296 # align all the inputs. 

297 types = tuple(type(x) for x in inputs) 

298 alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)] 

299 

300 if len(alignable) > 1: 

301 # This triggers alignment. 

302 # At the moment, there aren't any ufuncs with more than two inputs 

303 # so this ends up just being x1.index | x2.index, but we write 

304 # it to handle *args. 

305 set_types = set(types) 

306 if len(set_types) > 1 and {DataFrame, Series}.issubset(set_types): 

307 # We currently don't handle ufunc(DataFrame, Series) 

308 # well. Previously this raised an internal ValueError. We might 

309 # support it someday, so raise a NotImplementedError. 

310 raise NotImplementedError( 

311 f"Cannot apply ufunc {ufunc} to mixed DataFrame and Series inputs." 

312 ) 

313 axes = self.axes 

314 for obj in alignable[1:]: 

315 # this relies on the fact that we aren't handling mixed 

316 # series / frame ufuncs. 

317 for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)): 

318 axes[i] = ax1.union(ax2) 

319 

320 reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes)) 

321 inputs = tuple( 

322 x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x 

323 for x, t in zip(inputs, types) 

324 ) 

325 else: 

326 reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes)) 

327 

328 if self.ndim == 1: 

329 names = [getattr(x, "name") for x in inputs if hasattr(x, "name")] 

330 name = names[0] if len(set(names)) == 1 else None 

331 reconstruct_kwargs = {"name": name} 

332 else: 

333 reconstruct_kwargs = {} 

334 

335 def reconstruct(result): 

336 if ufunc.nout > 1: 

337 # np.modf, np.frexp, np.divmod 

338 return tuple(_reconstruct(x) for x in result) 

339 

340 return _reconstruct(result) 

341 

342 def _reconstruct(result): 

343 if lib.is_scalar(result): 

344 return result 

345 

346 if result.ndim != self.ndim: 

347 if method == "outer": 

348 raise NotImplementedError 

349 return result 

350 if isinstance(result, BlockManager): 

351 # we went through BlockManager.apply e.g. np.sqrt 

352 result = self._constructor(result, **reconstruct_kwargs, copy=False) 

353 else: 

354 # we converted an array, lost our axes 

355 result = self._constructor( 

356 result, **reconstruct_axes, **reconstruct_kwargs, copy=False 

357 ) 

358 # TODO: When we support multiple values in __finalize__, this 

359 # should pass alignable to `__finalize__` instead of self. 

360 # Then `np.add(a, b)` would consider attrs from both a and b 

361 # when a and b are NDFrames. 

362 if len(alignable) == 1: 

363 result = result.__finalize__(self) 

364 return result 

365 

366 if "out" in kwargs: 

367 # e.g. test_multiindex_get_loc 

368 result = dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs) 

369 return reconstruct(result) 

370 

371 if method == "reduce": 

372 # e.g. test.series.test_ufunc.test_reduce 

373 result = dispatch_reduction_ufunc(self, ufunc, method, *inputs, **kwargs) 

374 if result is not NotImplemented: 

375 return result 

376 

377 # We still get here with kwargs `axis` for e.g. np.maximum.accumulate 

378 # and `dtype` and `keepdims` for np.ptp 

379 

380 if self.ndim > 1 and (len(inputs) > 1 or ufunc.nout > 1): 

381 # Just give up on preserving types in the complex case. 

382 # In theory we could preserve them for them. 

383 # * nout>1 is doable if BlockManager.apply took nout and 

384 # returned a Tuple[BlockManager]. 

385 # * len(inputs) > 1 is doable when we know that we have 

386 # aligned blocks / dtypes. 

387 

388 # e.g. my_ufunc, modf, logaddexp, heaviside, subtract, add 

389 inputs = tuple(np.asarray(x) for x in inputs) 

390 # Note: we can't use default_array_ufunc here bc reindexing means 

391 # that `self` may not be among `inputs` 

392 result = getattr(ufunc, method)(*inputs, **kwargs) 

393 elif self.ndim == 1: 

394 # ufunc(series, ...) 

395 inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs) 

396 result = getattr(ufunc, method)(*inputs, **kwargs) 

397 else: 

398 # ufunc(dataframe) 

399 if method == "__call__" and not kwargs: 

400 # for np.<ufunc>(..) calls 

401 # kwargs cannot necessarily be handled block-by-block, so only 

402 # take this path if there are no kwargs 

403 mgr = inputs[0]._mgr 

404 result = mgr.apply(getattr(ufunc, method)) 

405 else: 

406 # otherwise specific ufunc methods (eg np.<ufunc>.accumulate(..)) 

407 # Those can have an axis keyword and thus can't be called block-by-block 

408 result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs) 

409 # e.g. np.negative (only one reached), with "where" and "out" in kwargs 

410 

411 result = reconstruct(result) 

412 return result 

413 

414 

415def _standardize_out_kwarg(**kwargs) -> dict: 

416 """ 

417 If kwargs contain "out1" and "out2", replace that with a tuple "out" 

418 

419 np.divmod, np.modf, np.frexp can have either `out=(out1, out2)` or 

420 `out1=out1, out2=out2)` 

421 """ 

422 if "out" not in kwargs and "out1" in kwargs and "out2" in kwargs: 

423 out1 = kwargs.pop("out1") 

424 out2 = kwargs.pop("out2") 

425 out = (out1, out2) 

426 kwargs["out"] = out 

427 return kwargs 

428 

429 

430def dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): 

431 """ 

432 If we have an `out` keyword, then call the ufunc without `out` and then 

433 set the result into the given `out`. 

434 """ 

435 

436 # Note: we assume _standardize_out_kwarg has already been called. 

437 out = kwargs.pop("out") 

438 where = kwargs.pop("where", None) 

439 

440 result = getattr(ufunc, method)(*inputs, **kwargs) 

441 

442 if result is NotImplemented: 

443 return NotImplemented 

444 

445 if isinstance(result, tuple): 

446 # i.e. np.divmod, np.modf, np.frexp 

447 if not isinstance(out, tuple) or len(out) != len(result): 

448 raise NotImplementedError 

449 

450 for arr, res in zip(out, result): 

451 _assign_where(arr, res, where) 

452 

453 return out 

454 

455 if isinstance(out, tuple): 

456 if len(out) == 1: 

457 out = out[0] 

458 else: 

459 raise NotImplementedError 

460 

461 _assign_where(out, result, where) 

462 return out 

463 

464 

465def _assign_where(out, result, where) -> None: 

466 """ 

467 Set a ufunc result into 'out', masking with a 'where' argument if necessary. 

468 """ 

469 if where is None: 

470 # no 'where' arg passed to ufunc 

471 out[:] = result 

472 else: 

473 np.putmask(out, where, result) 

474 

475 

476def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): 

477 """ 

478 Fallback to the behavior we would get if we did not define __array_ufunc__. 

479 

480 Notes 

481 ----- 

482 We are assuming that `self` is among `inputs`. 

483 """ 

484 if not any(x is self for x in inputs): 

485 raise NotImplementedError 

486 

487 new_inputs = [x if x is not self else np.asarray(x) for x in inputs] 

488 

489 return getattr(ufunc, method)(*new_inputs, **kwargs) 

490 

491 

492def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): 

493 """ 

494 Dispatch ufunc reductions to self's reduction methods. 

495 """ 

496 assert method == "reduce" 

497 

498 if len(inputs) != 1 or inputs[0] is not self: 

499 return NotImplemented 

500 

501 if ufunc.__name__ not in REDUCTION_ALIASES: 

502 return NotImplemented 

503 

504 method_name = REDUCTION_ALIASES[ufunc.__name__] 

505 

506 # NB: we are assuming that min/max represent minimum/maximum methods, 

507 # which would not be accurate for e.g. Timestamp.min 

508 if not hasattr(self, method_name): 

509 return NotImplemented 

510 

511 if self.ndim > 1: 

512 if isinstance(self, ABCNDFrame): 

513 # TODO: test cases where this doesn't hold, i.e. 2D DTA/TDA 

514 kwargs["numeric_only"] = False 

515 

516 if "axis" not in kwargs: 

517 # For DataFrame reductions we don't want the default axis=0 

518 # Note: np.min is not a ufunc, but uses array_function_dispatch, 

519 # so calls DataFrame.min (without ever getting here) with the np.min 

520 # default of axis=None, which DataFrame.min catches and changes to axis=0. 

521 # np.minimum.reduce(df) gets here bc axis is not in kwargs, 

522 # so we set axis=0 to match the behaviorof np.minimum.reduce(df.values) 

523 kwargs["axis"] = 0 

524 

525 # By default, numpy's reductions do not skip NaNs, so we have to 

526 # pass skipna=False 

527 return getattr(self, method_name)(skipna=False, **kwargs)