Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/arraylike.py: 41%

1"""

2Methods that can be shared by many array-like classes or subclasses:

3 Series

4 Index

5 ExtensionArray

6"""

7from __future__ import annotations

9import operator

10from typing import Any

12import numpy as np

14from pandas._libs import lib

15from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op

17from pandas.core.dtypes.generic import ABCNDFrame

19from pandas.core import roperator

20from pandas.core.construction import extract_array

21from pandas.core.ops.common import unpack_zerodim_and_defer

23REDUCTION_ALIASES = {

24 "maximum": "max",

25 "minimum": "min",

26 "add": "sum",

27 "multiply": "prod",

28}

31class OpsMixin:

32 # -------------------------------------------------------------

33 # Comparisons

35 def _cmp_method(self, other, op):

36 return NotImplemented

38 @unpack_zerodim_and_defer("__eq__")

39 def __eq__(self, other):

40 return self._cmp_method(other, operator.eq)

42 @unpack_zerodim_and_defer("__ne__")

43 def __ne__(self, other):

44 return self._cmp_method(other, operator.ne)

46 @unpack_zerodim_and_defer("__lt__")

47 def __lt__(self, other):

48 return self._cmp_method(other, operator.lt)

50 @unpack_zerodim_and_defer("__le__")

51 def __le__(self, other):

52 return self._cmp_method(other, operator.le)

54 @unpack_zerodim_and_defer("__gt__")

55 def __gt__(self, other):

56 return self._cmp_method(other, operator.gt)

58 @unpack_zerodim_and_defer("__ge__")

59 def __ge__(self, other):

60 return self._cmp_method(other, operator.ge)

62 # -------------------------------------------------------------

63 # Logical Methods

65 def _logical_method(self, other, op):

66 return NotImplemented

68 @unpack_zerodim_and_defer("__and__")

69 def __and__(self, other):

70 return self._logical_method(other, operator.and_)

72 @unpack_zerodim_and_defer("__rand__")

73 def __rand__(self, other):

74 return self._logical_method(other, roperator.rand_)

76 @unpack_zerodim_and_defer("__or__")

77 def __or__(self, other):

78 return self._logical_method(other, operator.or_)

80 @unpack_zerodim_and_defer("__ror__")

81 def __ror__(self, other):

82 return self._logical_method(other, roperator.ror_)

84 @unpack_zerodim_and_defer("__xor__")

85 def __xor__(self, other):

86 return self._logical_method(other, operator.xor)

88 @unpack_zerodim_and_defer("__rxor__")

89 def __rxor__(self, other):

90 return self._logical_method(other, roperator.rxor)

92 # -------------------------------------------------------------

93 # Arithmetic Methods

95 def _arith_method(self, other, op):

96 return NotImplemented

98 @unpack_zerodim_and_defer("__add__")

99 def __add__(self, other):

100 """

101 Get Addition of DataFrame and other, column-wise.

102

103 Equivalent to ``DataFrame.add(other)``.

104

105 Parameters

106 ----------

107 other : scalar, sequence, Series, dict or DataFrame

108 Object to be added to the DataFrame.

109

110 Returns

111 -------

112 DataFrame

113 The result of adding ``other`` to DataFrame.

114

115 See Also

116 --------

117 DataFrame.add : Add a DataFrame and another object, with option for index-

118 or column-oriented addition.

119

120 Examples

121 --------

122 >>> df = pd.DataFrame({'height': [1.5, 2.6], 'weight': [500, 800]},

123 ... index=['elk', 'moose'])

124 >>> df

125 height weight

126 elk 1.5 500

127 moose 2.6 800

128

129 Adding a scalar affects all rows and columns.

130

131 >>> df[['height', 'weight']] + 1.5

132 height weight

133 elk 3.0 501.5

134 moose 4.1 801.5

135

136 Each element of a list is added to a column of the DataFrame, in order.

137

138 >>> df[['height', 'weight']] + [0.5, 1.5]

139 height weight

140 elk 2.0 501.5

141 moose 3.1 801.5

142

143 Keys of a dictionary are aligned to the DataFrame, based on column names;

144 each value in the dictionary is added to the corresponding column.

145

146 >>> df[['height', 'weight']] + {'height': 0.5, 'weight': 1.5}

147 height weight

148 elk 2.0 501.5

149 moose 3.1 801.5

150

151 When `other` is a :class:`Series`, the index of `other` is aligned with the

152 columns of the DataFrame.

153

154 >>> s1 = pd.Series([0.5, 1.5], index=['weight', 'height'])

155 >>> df[['height', 'weight']] + s1

156 height weight

157 elk 3.0 500.5

158 moose 4.1 800.5

159

160 Even when the index of `other` is the same as the index of the DataFrame,

161 the :class:`Series` will not be reoriented. If index-wise alignment is desired,

162 :meth:`DataFrame.add` should be used with `axis='index'`.

163

164 >>> s2 = pd.Series([0.5, 1.5], index=['elk', 'moose'])

165 >>> df[['height', 'weight']] + s2

166 elk height moose weight

167 elk NaN NaN NaN NaN

168 moose NaN NaN NaN NaN

169

170 >>> df[['height', 'weight']].add(s2, axis='index')

171 height weight

172 elk 2.0 500.5

173 moose 4.1 801.5

174

175 When `other` is a :class:`DataFrame`, both columns names and the

176 index are aligned.

177

178 >>> other = pd.DataFrame({'height': [0.2, 0.4, 0.6]},

179 ... index=['elk', 'moose', 'deer'])

180 >>> df[['height', 'weight']] + other

181 height weight

182 deer NaN NaN

183 elk 1.7 NaN

184 moose 3.0 NaN

185 """

186 return self._arith_method(other, operator.add)

187

188 @unpack_zerodim_and_defer("__radd__")

189 def __radd__(self, other):

190 return self._arith_method(other, roperator.radd)

191

192 @unpack_zerodim_and_defer("__sub__")

193 def __sub__(self, other):

194 return self._arith_method(other, operator.sub)

195

196 @unpack_zerodim_and_defer("__rsub__")

197 def __rsub__(self, other):

198 return self._arith_method(other, roperator.rsub)

199

200 @unpack_zerodim_and_defer("__mul__")

201 def __mul__(self, other):

202 return self._arith_method(other, operator.mul)

203

204 @unpack_zerodim_and_defer("__rmul__")

205 def __rmul__(self, other):

206 return self._arith_method(other, roperator.rmul)

207

208 @unpack_zerodim_and_defer("__truediv__")

209 def __truediv__(self, other):

210 return self._arith_method(other, operator.truediv)

211

212 @unpack_zerodim_and_defer("__rtruediv__")

213 def __rtruediv__(self, other):

214 return self._arith_method(other, roperator.rtruediv)

215

216 @unpack_zerodim_and_defer("__floordiv__")

217 def __floordiv__(self, other):

218 return self._arith_method(other, operator.floordiv)

219

220 @unpack_zerodim_and_defer("__rfloordiv")

221 def __rfloordiv__(self, other):

222 return self._arith_method(other, roperator.rfloordiv)

223

224 @unpack_zerodim_and_defer("__mod__")

225 def __mod__(self, other):

226 return self._arith_method(other, operator.mod)

227

228 @unpack_zerodim_and_defer("__rmod__")

229 def __rmod__(self, other):

230 return self._arith_method(other, roperator.rmod)

231

232 @unpack_zerodim_and_defer("__divmod__")

233 def __divmod__(self, other):

234 return self._arith_method(other, divmod)

235

236 @unpack_zerodim_and_defer("__rdivmod__")

237 def __rdivmod__(self, other):

238 return self._arith_method(other, roperator.rdivmod)

239

240 @unpack_zerodim_and_defer("__pow__")

241 def __pow__(self, other):

242 return self._arith_method(other, operator.pow)

243

244 @unpack_zerodim_and_defer("__rpow__")

245 def __rpow__(self, other):

246 return self._arith_method(other, roperator.rpow)

247

248

249# -----------------------------------------------------------------------------

250# Helpers to implement __array_ufunc__

251

252

253def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any):

254 """

255 Compatibility with numpy ufuncs.

256

257 See also

258 --------

259 numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__

260 """

261 from pandas.core.frame import (

262 DataFrame,

263 Series,

264 )

265 from pandas.core.generic import NDFrame

266 from pandas.core.internals import (

267 ArrayManager,

268 BlockManager,

269 )

270

271 cls = type(self)

272

273 kwargs = _standardize_out_kwarg(**kwargs)

274

275 # for binary ops, use our custom dunder methods

276 result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs)

277 if result is not NotImplemented:

278 return result

279

280 # Determine if we should defer.

281 no_defer = (

282 np.ndarray.__array_ufunc__,

283 cls.__array_ufunc__,

284 )

285

286 for item in inputs:

287 higher_priority = (

288 hasattr(item, "__array_priority__")

289 and item.__array_priority__ > self.__array_priority__

290 )

291 has_array_ufunc = (

292 hasattr(item, "__array_ufunc__")

293 and type(item).__array_ufunc__ not in no_defer

294 and not isinstance(item, self._HANDLED_TYPES)

295 )

296 if higher_priority or has_array_ufunc:

297 return NotImplemented

298

299 # align all the inputs.

300 types = tuple(type(x) for x in inputs)

301 alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)]

302

303 if len(alignable) > 1:

304 # This triggers alignment.

305 # At the moment, there aren't any ufuncs with more than two inputs

306 # so this ends up just being x1.index | x2.index, but we write

307 # it to handle *args.

308 set_types = set(types)

309 if len(set_types) > 1 and {DataFrame, Series}.issubset(set_types):

310 # We currently don't handle ufunc(DataFrame, Series)

311 # well. Previously this raised an internal ValueError. We might

312 # support it someday, so raise a NotImplementedError.

313 raise NotImplementedError(

314 f"Cannot apply ufunc {ufunc} to mixed DataFrame and Series inputs."

315 )

316 axes = self.axes

317 for obj in alignable[1:]:

318 # this relies on the fact that we aren't handling mixed

319 # series / frame ufuncs.

320 for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)):

321 axes[i] = ax1.union(ax2)

322

323 reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes))

324 inputs = tuple(

325 x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x

326 for x, t in zip(inputs, types)

327 )

328 else:

329 reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes))

330

331 if self.ndim == 1:

332 names = [getattr(x, "name") for x in inputs if hasattr(x, "name")]

333 name = names[0] if len(set(names)) == 1 else None

334 reconstruct_kwargs = {"name": name}

335 else:

336 reconstruct_kwargs = {}

337

338 def reconstruct(result):

339 if ufunc.nout > 1:

340 # np.modf, np.frexp, np.divmod

341 return tuple(_reconstruct(x) for x in result)

342

343 return _reconstruct(result)

344

345 def _reconstruct(result):

346 if lib.is_scalar(result):

347 return result

348

349 if result.ndim != self.ndim:

350 if method == "outer":

351 raise NotImplementedError

352 return result

353 if isinstance(result, (BlockManager, ArrayManager)):

354 # we went through BlockManager.apply e.g. np.sqrt

355 result = self._constructor_from_mgr(result, axes=result.axes)

356 else:

357 # we converted an array, lost our axes

358 result = self._constructor(

359 result, **reconstruct_axes, **reconstruct_kwargs, copy=False

360 )

361 # TODO: When we support multiple values in __finalize__, this

362 # should pass alignable to `__finalize__` instead of self.

363 # Then `np.add(a, b)` would consider attrs from both a and b

364 # when a and b are NDFrames.

365 if len(alignable) == 1:

366 result = result.__finalize__(self)

367 return result

368

369 if "out" in kwargs:

370 # e.g. test_multiindex_get_loc

371 result = dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs)

372 return reconstruct(result)

373

374 if method == "reduce":

375 # e.g. test.series.test_ufunc.test_reduce

376 result = dispatch_reduction_ufunc(self, ufunc, method, *inputs, **kwargs)

377 if result is not NotImplemented:

378 return result

379

380 # We still get here with kwargs `axis` for e.g. np.maximum.accumulate

381 # and `dtype` and `keepdims` for np.ptp

382

383 if self.ndim > 1 and (len(inputs) > 1 or ufunc.nout > 1):

384 # Just give up on preserving types in the complex case.

385 # In theory we could preserve them for them.

386 # * nout>1 is doable if BlockManager.apply took nout and

387 # returned a Tuple[BlockManager].

388 # * len(inputs) > 1 is doable when we know that we have

389 # aligned blocks / dtypes.

390

391 # e.g. my_ufunc, modf, logaddexp, heaviside, subtract, add

392 inputs = tuple(np.asarray(x) for x in inputs)

393 # Note: we can't use default_array_ufunc here bc reindexing means

394 # that `self` may not be among `inputs`

395 result = getattr(ufunc, method)(*inputs, **kwargs)

396 elif self.ndim == 1:

397 # ufunc(series, ...)

398 inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs)

399 result = getattr(ufunc, method)(*inputs, **kwargs)

400 else:

401 # ufunc(dataframe)

402 if method == "__call__" and not kwargs:

403 # for np.<ufunc>(..) calls

404 # kwargs cannot necessarily be handled block-by-block, so only

405 # take this path if there are no kwargs

406 mgr = inputs[0]._mgr

407 result = mgr.apply(getattr(ufunc, method))

408 else:

409 # otherwise specific ufunc methods (eg np.<ufunc>.accumulate(..))

410 # Those can have an axis keyword and thus can't be called block-by-block

411 result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs)

412 # e.g. np.negative (only one reached), with "where" and "out" in kwargs

413

414 result = reconstruct(result)

415 return result

416

417

418def _standardize_out_kwarg(**kwargs) -> dict:

419 """

420 If kwargs contain "out1" and "out2", replace that with a tuple "out"

421

422 np.divmod, np.modf, np.frexp can have either `out=(out1, out2)` or

423 `out1=out1, out2=out2)`

424 """

425 if "out" not in kwargs and "out1" in kwargs and "out2" in kwargs:

426 out1 = kwargs.pop("out1")

427 out2 = kwargs.pop("out2")

428 out = (out1, out2)

429 kwargs["out"] = out

430 return kwargs

431

432

433def dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

434 """

435 If we have an `out` keyword, then call the ufunc without `out` and then

436 set the result into the given `out`.

437 """

438

439 # Note: we assume _standardize_out_kwarg has already been called.

440 out = kwargs.pop("out")

441 where = kwargs.pop("where", None)

442

443 result = getattr(ufunc, method)(*inputs, **kwargs)

444

445 if result is NotImplemented:

446 return NotImplemented

447

448 if isinstance(result, tuple):

449 # i.e. np.divmod, np.modf, np.frexp

450 if not isinstance(out, tuple) or len(out) != len(result):

451 raise NotImplementedError

452

453 for arr, res in zip(out, result):

454 _assign_where(arr, res, where)

455

456 return out

457

458 if isinstance(out, tuple):

459 if len(out) == 1:

460 out = out[0]

461 else:

462 raise NotImplementedError

463

464 _assign_where(out, result, where)

465 return out

466

467

468def _assign_where(out, result, where) -> None:

469 """

470 Set a ufunc result into 'out', masking with a 'where' argument if necessary.

471 """

472 if where is None:

473 # no 'where' arg passed to ufunc

474 out[:] = result

475 else:

476 np.putmask(out, where, result)

477

478

479def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

480 """

481 Fallback to the behavior we would get if we did not define __array_ufunc__.

482

483 Notes

484 -----

485 We are assuming that `self` is among `inputs`.

486 """

487 if not any(x is self for x in inputs):

488 raise NotImplementedError

489

490 new_inputs = [x if x is not self else np.asarray(x) for x in inputs]

491

492 return getattr(ufunc, method)(*new_inputs, **kwargs)

493

494

495def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

496 """

497 Dispatch ufunc reductions to self's reduction methods.

498 """

499 assert method == "reduce"

500

501 if len(inputs) != 1 or inputs[0] is not self:

502 return NotImplemented

503

504 if ufunc.__name__ not in REDUCTION_ALIASES:

505 return NotImplemented

506

507 method_name = REDUCTION_ALIASES[ufunc.__name__]

508

509 # NB: we are assuming that min/max represent minimum/maximum methods,

510 # which would not be accurate for e.g. Timestamp.min

511 if not hasattr(self, method_name):

512 return NotImplemented

513

514 if self.ndim > 1:

515 if isinstance(self, ABCNDFrame):

516 # TODO: test cases where this doesn't hold, i.e. 2D DTA/TDA

517 kwargs["numeric_only"] = False

518

519 if "axis" not in kwargs:

520 # For DataFrame reductions we don't want the default axis=0

521 # Note: np.min is not a ufunc, but uses array_function_dispatch,

522 # so calls DataFrame.min (without ever getting here) with the np.min

523 # default of axis=None, which DataFrame.min catches and changes to axis=0.

524 # np.minimum.reduce(df) gets here bc axis is not in kwargs,

525 # so we set axis=0 to match the behaviorof np.minimum.reduce(df.values)

526 kwargs["axis"] = 0

527

528 # By default, numpy's reductions do not skip NaNs, so we have to

529 # pass skipna=False

530 return getattr(self, method_name)(skipna=False, **kwargs)