Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/arraylike.py: 35%

1"""

2Methods that can be shared by many array-like classes or subclasses:

3 Series

4 Index

5 ExtensionArray

6"""

7from __future__ import annotations

9import operator

10from typing import Any

12import numpy as np

14from pandas._libs import lib

15from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op

17from pandas.core.dtypes.generic import ABCNDFrame

19from pandas.core import roperator

20from pandas.core.construction import extract_array

21from pandas.core.ops.common import unpack_zerodim_and_defer

23REDUCTION_ALIASES = {

24 "maximum": "max",

25 "minimum": "min",

26 "add": "sum",

27 "multiply": "prod",

28}

31class OpsMixin:

32 # -------------------------------------------------------------

33 # Comparisons

35 def _cmp_method(self, other, op):

36 return NotImplemented

38 @unpack_zerodim_and_defer("__eq__")

39 def __eq__(self, other):

40 return self._cmp_method(other, operator.eq)

42 @unpack_zerodim_and_defer("__ne__")

43 def __ne__(self, other):

44 return self._cmp_method(other, operator.ne)

46 @unpack_zerodim_and_defer("__lt__")

47 def __lt__(self, other):

48 return self._cmp_method(other, operator.lt)

50 @unpack_zerodim_and_defer("__le__")

51 def __le__(self, other):

52 return self._cmp_method(other, operator.le)

54 @unpack_zerodim_and_defer("__gt__")

55 def __gt__(self, other):

56 return self._cmp_method(other, operator.gt)

58 @unpack_zerodim_and_defer("__ge__")

59 def __ge__(self, other):

60 return self._cmp_method(other, operator.ge)

62 # -------------------------------------------------------------

63 # Logical Methods

65 def _logical_method(self, other, op):

66 return NotImplemented

68 @unpack_zerodim_and_defer("__and__")

69 def __and__(self, other):

70 return self._logical_method(other, operator.and_)

72 @unpack_zerodim_and_defer("__rand__")

73 def __rand__(self, other):

74 return self._logical_method(other, roperator.rand_)

76 @unpack_zerodim_and_defer("__or__")

77 def __or__(self, other):

78 return self._logical_method(other, operator.or_)

80 @unpack_zerodim_and_defer("__ror__")

81 def __ror__(self, other):

82 return self._logical_method(other, roperator.ror_)

84 @unpack_zerodim_and_defer("__xor__")

85 def __xor__(self, other):

86 return self._logical_method(other, operator.xor)

88 @unpack_zerodim_and_defer("__rxor__")

89 def __rxor__(self, other):

90 return self._logical_method(other, roperator.rxor)

92 # -------------------------------------------------------------

93 # Arithmetic Methods

95 def _arith_method(self, other, op):

96 return NotImplemented

98 @unpack_zerodim_and_defer("__add__")

99 def __add__(self, other):

100 """

101 Get Addition of DataFrame and other, column-wise.

102

103 Equivalent to ``DataFrame.add(other)``.

104

105 Parameters

106 ----------

107 other : scalar, sequence, Series, dict or DataFrame

108 Object to be added to the DataFrame.

109

110 Returns

111 -------

112 DataFrame

113 The result of adding ``other`` to DataFrame.

114

115 See Also

116 --------

117 DataFrame.add : Add a DataFrame and another object, with option for index-

118 or column-oriented addition.

119

120 Examples

121 --------

122 >>> df = pd.DataFrame({'height': [1.5, 2.6], 'weight': [500, 800]},

123 ... index=['elk', 'moose'])

124 >>> df

125 height weight

126 elk 1.5 500

127 moose 2.6 800

128

129 Adding a scalar affects all rows and columns.

130

131 >>> df[['height', 'weight']] + 1.5

132 height weight

133 elk 3.0 501.5

134 moose 4.1 801.5

135

136 Each element of a list is added to a column of the DataFrame, in order.

137

138 >>> df[['height', 'weight']] + [0.5, 1.5]

139 height weight

140 elk 2.0 501.5

141 moose 3.1 801.5

142

143 Keys of a dictionary are aligned to the DataFrame, based on column names;

144 each value in the dictionary is added to the corresponding column.

145

146 >>> df[['height', 'weight']] + {'height': 0.5, 'weight': 1.5}

147 height weight

148 elk 2.0 501.5

149 moose 3.1 801.5

150

151 When `other` is a :class:`Series`, the index of `other` is aligned with the

152 columns of the DataFrame.

153

154 >>> s1 = pd.Series([0.5, 1.5], index=['weight', 'height'])

155 >>> df[['height', 'weight']] + s1

156 height weight

157 elk 3.0 500.5

158 moose 4.1 800.5

159

160 Even when the index of `other` is the same as the index of the DataFrame,

161 the :class:`Series` will not be reoriented. If index-wise alignment is desired,

162 :meth:`DataFrame.add` should be used with `axis='index'`.

163

164 >>> s2 = pd.Series([0.5, 1.5], index=['elk', 'moose'])

165 >>> df[['height', 'weight']] + s2

166 elk height moose weight

167 elk NaN NaN NaN NaN

168 moose NaN NaN NaN NaN

169

170 >>> df[['height', 'weight']].add(s2, axis='index')

171 height weight

172 elk 2.0 500.5

173 moose 4.1 801.5

174

175 When `other` is a :class:`DataFrame`, both columns names and the

176 index are aligned.

177

178 >>> other = pd.DataFrame({'height': [0.2, 0.4, 0.6]},

179 ... index=['elk', 'moose', 'deer'])

180 >>> df[['height', 'weight']] + other

181 height weight

182 deer NaN NaN

183 elk 1.7 NaN

184 moose 3.0 NaN

185 """

186 return self._arith_method(other, operator.add)

187

188 @unpack_zerodim_and_defer("__radd__")

189 def __radd__(self, other):

190 return self._arith_method(other, roperator.radd)

191

192 @unpack_zerodim_and_defer("__sub__")

193 def __sub__(self, other):

194 return self._arith_method(other, operator.sub)

195

196 @unpack_zerodim_and_defer("__rsub__")

197 def __rsub__(self, other):

198 return self._arith_method(other, roperator.rsub)

199

200 @unpack_zerodim_and_defer("__mul__")

201 def __mul__(self, other):

202 return self._arith_method(other, operator.mul)

203

204 @unpack_zerodim_and_defer("__rmul__")

205 def __rmul__(self, other):

206 return self._arith_method(other, roperator.rmul)

207

208 @unpack_zerodim_and_defer("__truediv__")

209 def __truediv__(self, other):

210 return self._arith_method(other, operator.truediv)

211

212 @unpack_zerodim_and_defer("__rtruediv__")

213 def __rtruediv__(self, other):

214 return self._arith_method(other, roperator.rtruediv)

215

216 @unpack_zerodim_and_defer("__floordiv__")

217 def __floordiv__(self, other):

218 return self._arith_method(other, operator.floordiv)

219

220 @unpack_zerodim_and_defer("__rfloordiv")

221 def __rfloordiv__(self, other):

222 return self._arith_method(other, roperator.rfloordiv)

223

224 @unpack_zerodim_and_defer("__mod__")

225 def __mod__(self, other):

226 return self._arith_method(other, operator.mod)

227

228 @unpack_zerodim_and_defer("__rmod__")

229 def __rmod__(self, other):

230 return self._arith_method(other, roperator.rmod)

231

232 @unpack_zerodim_and_defer("__divmod__")

233 def __divmod__(self, other):

234 return self._arith_method(other, divmod)

235

236 @unpack_zerodim_and_defer("__rdivmod__")

237 def __rdivmod__(self, other):

238 return self._arith_method(other, roperator.rdivmod)

239

240 @unpack_zerodim_and_defer("__pow__")

241 def __pow__(self, other):

242 return self._arith_method(other, operator.pow)

243

244 @unpack_zerodim_and_defer("__rpow__")

245 def __rpow__(self, other):

246 return self._arith_method(other, roperator.rpow)

247

248

249# -----------------------------------------------------------------------------

250# Helpers to implement __array_ufunc__

251

252

253def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any):

254 """

255 Compatibility with numpy ufuncs.

256

257 See also

258 --------

259 numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__

260 """

261 from pandas.core.frame import (

262 DataFrame,

263 Series,

264 )

265 from pandas.core.generic import NDFrame

266 from pandas.core.internals import BlockManager

267

268 cls = type(self)

269

270 kwargs = _standardize_out_kwarg(**kwargs)

271

272 # for binary ops, use our custom dunder methods

273 result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs)

274 if result is not NotImplemented:

275 return result

276

277 # Determine if we should defer.

278 no_defer = (

279 np.ndarray.__array_ufunc__,

280 cls.__array_ufunc__,

281 )

282

283 for item in inputs:

284 higher_priority = (

285 hasattr(item, "__array_priority__")

286 and item.__array_priority__ > self.__array_priority__

287 )

288 has_array_ufunc = (

289 hasattr(item, "__array_ufunc__")

290 and type(item).__array_ufunc__ not in no_defer

291 and not isinstance(item, self._HANDLED_TYPES)

292 )

293 if higher_priority or has_array_ufunc:

294 return NotImplemented

295

296 # align all the inputs.

297 types = tuple(type(x) for x in inputs)

298 alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)]

299

300 if len(alignable) > 1:

301 # This triggers alignment.

302 # At the moment, there aren't any ufuncs with more than two inputs

303 # so this ends up just being x1.index | x2.index, but we write

304 # it to handle *args.

305 set_types = set(types)

306 if len(set_types) > 1 and {DataFrame, Series}.issubset(set_types):

307 # We currently don't handle ufunc(DataFrame, Series)

308 # well. Previously this raised an internal ValueError. We might

309 # support it someday, so raise a NotImplementedError.

310 raise NotImplementedError(

311 f"Cannot apply ufunc {ufunc} to mixed DataFrame and Series inputs."

312 )

313 axes = self.axes

314 for obj in alignable[1:]:

315 # this relies on the fact that we aren't handling mixed

316 # series / frame ufuncs.

317 for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)):

318 axes[i] = ax1.union(ax2)

319

320 reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes))

321 inputs = tuple(

322 x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x

323 for x, t in zip(inputs, types)

324 )

325 else:

326 reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes))

327

328 if self.ndim == 1:

329 names = [getattr(x, "name") for x in inputs if hasattr(x, "name")]

330 name = names[0] if len(set(names)) == 1 else None

331 reconstruct_kwargs = {"name": name}

332 else:

333 reconstruct_kwargs = {}

334

335 def reconstruct(result):

336 if ufunc.nout > 1:

337 # np.modf, np.frexp, np.divmod

338 return tuple(_reconstruct(x) for x in result)

339

340 return _reconstruct(result)

341

342 def _reconstruct(result):

343 if lib.is_scalar(result):

344 return result

345

346 if result.ndim != self.ndim:

347 if method == "outer":

348 raise NotImplementedError

349 return result

350 if isinstance(result, BlockManager):

351 # we went through BlockManager.apply e.g. np.sqrt

352 result = self._constructor(result, **reconstruct_kwargs, copy=False)

353 else:

354 # we converted an array, lost our axes

355 result = self._constructor(

356 result, **reconstruct_axes, **reconstruct_kwargs, copy=False

357 )

358 # TODO: When we support multiple values in __finalize__, this

359 # should pass alignable to `__finalize__` instead of self.

360 # Then `np.add(a, b)` would consider attrs from both a and b

361 # when a and b are NDFrames.

362 if len(alignable) == 1:

363 result = result.__finalize__(self)

364 return result

365

366 if "out" in kwargs:

367 # e.g. test_multiindex_get_loc

368 result = dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs)

369 return reconstruct(result)

370

371 if method == "reduce":

372 # e.g. test.series.test_ufunc.test_reduce

373 result = dispatch_reduction_ufunc(self, ufunc, method, *inputs, **kwargs)

374 if result is not NotImplemented:

375 return result

376

377 # We still get here with kwargs `axis` for e.g. np.maximum.accumulate

378 # and `dtype` and `keepdims` for np.ptp

379

380 if self.ndim > 1 and (len(inputs) > 1 or ufunc.nout > 1):

381 # Just give up on preserving types in the complex case.

382 # In theory we could preserve them for them.

383 # * nout>1 is doable if BlockManager.apply took nout and

384 # returned a Tuple[BlockManager].

385 # * len(inputs) > 1 is doable when we know that we have

386 # aligned blocks / dtypes.

387

388 # e.g. my_ufunc, modf, logaddexp, heaviside, subtract, add

389 inputs = tuple(np.asarray(x) for x in inputs)

390 # Note: we can't use default_array_ufunc here bc reindexing means

391 # that `self` may not be among `inputs`

392 result = getattr(ufunc, method)(*inputs, **kwargs)

393 elif self.ndim == 1:

394 # ufunc(series, ...)

395 inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs)

396 result = getattr(ufunc, method)(*inputs, **kwargs)

397 else:

398 # ufunc(dataframe)

399 if method == "__call__" and not kwargs:

400 # for np.<ufunc>(..) calls

401 # kwargs cannot necessarily be handled block-by-block, so only

402 # take this path if there are no kwargs

403 mgr = inputs[0]._mgr

404 result = mgr.apply(getattr(ufunc, method))

405 else:

406 # otherwise specific ufunc methods (eg np.<ufunc>.accumulate(..))

407 # Those can have an axis keyword and thus can't be called block-by-block

408 result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs)

409 # e.g. np.negative (only one reached), with "where" and "out" in kwargs

410

411 result = reconstruct(result)

412 return result

413

414

415def _standardize_out_kwarg(**kwargs) -> dict:

416 """

417 If kwargs contain "out1" and "out2", replace that with a tuple "out"

418

419 np.divmod, np.modf, np.frexp can have either `out=(out1, out2)` or

420 `out1=out1, out2=out2)`

421 """

422 if "out" not in kwargs and "out1" in kwargs and "out2" in kwargs:

423 out1 = kwargs.pop("out1")

424 out2 = kwargs.pop("out2")

425 out = (out1, out2)

426 kwargs["out"] = out

427 return kwargs

428

429

430def dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

431 """

432 If we have an `out` keyword, then call the ufunc without `out` and then

433 set the result into the given `out`.

434 """

435

436 # Note: we assume _standardize_out_kwarg has already been called.

437 out = kwargs.pop("out")

438 where = kwargs.pop("where", None)

439

440 result = getattr(ufunc, method)(*inputs, **kwargs)

441

442 if result is NotImplemented:

443 return NotImplemented

444

445 if isinstance(result, tuple):

446 # i.e. np.divmod, np.modf, np.frexp

447 if not isinstance(out, tuple) or len(out) != len(result):

448 raise NotImplementedError

449

450 for arr, res in zip(out, result):

451 _assign_where(arr, res, where)

452

453 return out

454

455 if isinstance(out, tuple):

456 if len(out) == 1:

457 out = out[0]

458 else:

459 raise NotImplementedError

460

461 _assign_where(out, result, where)

462 return out

463

464

465def _assign_where(out, result, where) -> None:

466 """

467 Set a ufunc result into 'out', masking with a 'where' argument if necessary.

468 """

469 if where is None:

470 # no 'where' arg passed to ufunc

471 out[:] = result

472 else:

473 np.putmask(out, where, result)

474

475

476def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

477 """

478 Fallback to the behavior we would get if we did not define __array_ufunc__.

479

480 Notes

481 -----

482 We are assuming that `self` is among `inputs`.

483 """

484 if not any(x is self for x in inputs):

485 raise NotImplementedError

486

487 new_inputs = [x if x is not self else np.asarray(x) for x in inputs]

488

489 return getattr(ufunc, method)(*new_inputs, **kwargs)

490

491

492def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

493 """

494 Dispatch ufunc reductions to self's reduction methods.

495 """

496 assert method == "reduce"

497

498 if len(inputs) != 1 or inputs[0] is not self:

499 return NotImplemented

500

501 if ufunc.__name__ not in REDUCTION_ALIASES:

502 return NotImplemented

503

504 method_name = REDUCTION_ALIASES[ufunc.__name__]

505

506 # NB: we are assuming that min/max represent minimum/maximum methods,

507 # which would not be accurate for e.g. Timestamp.min

508 if not hasattr(self, method_name):

509 return NotImplemented

510

511 if self.ndim > 1:

512 if isinstance(self, ABCNDFrame):

513 # TODO: test cases where this doesn't hold, i.e. 2D DTA/TDA

514 kwargs["numeric_only"] = False

515

516 if "axis" not in kwargs:

517 # For DataFrame reductions we don't want the default axis=0

518 # Note: np.min is not a ufunc, but uses array_function_dispatch,

519 # so calls DataFrame.min (without ever getting here) with the np.min

520 # default of axis=None, which DataFrame.min catches and changes to axis=0.

521 # np.minimum.reduce(df) gets here bc axis is not in kwargs,

522 # so we set axis=0 to match the behaviorof np.minimum.reduce(df.values)

523 kwargs["axis"] = 0

524

525 # By default, numpy's reductions do not skip NaNs, so we have to

526 # pass skipna=False

527 return getattr(self, method_name)(skipna=False, **kwargs)