Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/nanops.py: 19%

1from __future__ import annotations

3import functools

4import itertools

5import operator

6from typing import (

7 Any,

8 Callable,

9 cast,

10)

11import warnings

13import numpy as np

15from pandas._config import get_option

17from pandas._libs import (

18 NaT,

19 NaTType,

20 iNaT,

21 lib,

22)

23from pandas._typing import (

24 ArrayLike,

25 AxisInt,

26 CorrelationMethod,

27 Dtype,

28 DtypeObj,

29 F,

30 Scalar,

31 Shape,

32 npt,

33)

34from pandas.compat._optional import import_optional_dependency

35from pandas.util._exceptions import find_stack_level

37from pandas.core.dtypes.common import (

38 is_any_int_dtype,

39 is_bool_dtype,

40 is_complex,

41 is_datetime64_any_dtype,

42 is_float,

43 is_float_dtype,

44 is_integer,

45 is_integer_dtype,

46 is_numeric_dtype,

47 is_object_dtype,

48 is_scalar,

49 is_timedelta64_dtype,

50 needs_i8_conversion,

51 pandas_dtype,

52)

53from pandas.core.dtypes.dtypes import PeriodDtype

54from pandas.core.dtypes.missing import (

55 isna,

56 na_value_for_dtype,

57 notna,

58)

60from pandas.core.construction import extract_array

62bn = import_optional_dependency("bottleneck", errors="warn")

63_BOTTLENECK_INSTALLED = bn is not None

64_USE_BOTTLENECK = False

67def set_use_bottleneck(v: bool = True) -> None:

68 # set/unset to use bottleneck

69 global _USE_BOTTLENECK

70 if _BOTTLENECK_INSTALLED:

71 _USE_BOTTLENECK = v

74set_use_bottleneck(get_option("compute.use_bottleneck"))

77class disallow:

78 def __init__(self, *dtypes: Dtype) -> None:

79 super().__init__()

80 self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes)

82 def check(self, obj) -> bool:

83 return hasattr(obj, "dtype") and issubclass(obj.dtype.type, self.dtypes)

85 def __call__(self, f: F) -> F:

86 @functools.wraps(f)

87 def _f(*args, **kwargs):

88 obj_iter = itertools.chain(args, kwargs.values())

89 if any(self.check(obj) for obj in obj_iter):

90 f_name = f.__name__.replace("nan", "")

91 raise TypeError(

92 f"reduction operation '{f_name}' not allowed for this dtype"

93 )

94 try:

95 with np.errstate(invalid="ignore"):

96 return f(*args, **kwargs)

97 except ValueError as e:

98 # we want to transform an object array

99 # ValueError message to the more typical TypeError

100 # e.g. this is normally a disallowed function on

101 # object arrays that contain strings

102 if is_object_dtype(args[0]):

103 raise TypeError(e) from e

104 raise

105

106 return cast(F, _f)

107

108

109class bottleneck_switch:

110 def __init__(self, name=None, **kwargs) -> None:

111 self.name = name

112 self.kwargs = kwargs

113

114 def __call__(self, alt: F) -> F:

115 bn_name = self.name or alt.__name__

116

117 try:

118 bn_func = getattr(bn, bn_name)

119 except (AttributeError, NameError): # pragma: no cover

120 bn_func = None

121

122 @functools.wraps(alt)

123 def f(

124 values: np.ndarray,

125 *,

126 axis: AxisInt | None = None,

127 skipna: bool = True,

128 **kwds,

129 ):

130 if len(self.kwargs) > 0:

131 for k, v in self.kwargs.items():

132 if k not in kwds:

133 kwds[k] = v

134

135 if values.size == 0 and kwds.get("min_count") is None:

136 # We are empty, returning NA for our type

137 # Only applies for the default `min_count` of None

138 # since that affects how empty arrays are handled.

139 # TODO(GH-18976) update all the nanops methods to

140 # correctly handle empty inputs and remove this check.

141 # It *may* just be `var`

142 return _na_for_min_count(values, axis)

143

144 if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name):

145 if kwds.get("mask", None) is None:

146 # `mask` is not recognised by bottleneck, would raise

147 # TypeError if called

148 kwds.pop("mask", None)

149 result = bn_func(values, axis=axis, **kwds)

150

151 # prefer to treat inf/-inf as NA, but must compute the func

152 # twice :(

153 if _has_infs(result):

154 result = alt(values, axis=axis, skipna=skipna, **kwds)

155 else:

156 result = alt(values, axis=axis, skipna=skipna, **kwds)

157 else:

158 result = alt(values, axis=axis, skipna=skipna, **kwds)

159

160 return result

161

162 return cast(F, f)

163

164

165def _bn_ok_dtype(dtype: DtypeObj, name: str) -> bool:

166 # Bottleneck chokes on datetime64, PeriodDtype (or and EA)

167 if not is_object_dtype(dtype) and not needs_i8_conversion(dtype):

168 # GH 42878

169 # Bottleneck uses naive summation leading to O(n) loss of precision

170 # unlike numpy which implements pairwise summation, which has O(log(n)) loss

171 # crossref: https://github.com/pydata/bottleneck/issues/379

172

173 # GH 15507

174 # bottleneck does not properly upcast during the sum

175 # so can overflow

176

177 # GH 9422

178 # further we also want to preserve NaN when all elements

179 # are NaN, unlike bottleneck/numpy which consider this

180 # to be 0

181 return name not in ["nansum", "nanprod", "nanmean"]

182 return False

183

184

185def _has_infs(result) -> bool:

186 if isinstance(result, np.ndarray):

187 if result.dtype in ("f8", "f4"):

188 # Note: outside of an nanops-specific test, we always have

189 # result.ndim == 1, so there is no risk of this ravel making a copy.

190 return lib.has_infs(result.ravel("K"))

191 try:

192 return np.isinf(result).any()

193 except (TypeError, NotImplementedError):

194 # if it doesn't support infs, then it can't have infs

195 return False

196

197

198def _get_fill_value(

199 dtype: DtypeObj, fill_value: Scalar | None = None, fill_value_typ=None

200):

201 """return the correct fill value for the dtype of the values"""

202 if fill_value is not None:

203 return fill_value

204 if _na_ok_dtype(dtype):

205 if fill_value_typ is None:

206 return np.nan

207 else:

208 if fill_value_typ == "+inf":

209 return np.inf

210 else:

211 return -np.inf

212 else:

213 if fill_value_typ == "+inf":

214 # need the max int here

215 return lib.i8max

216 else:

217 return iNaT

218

219

220def _maybe_get_mask(

221 values: np.ndarray, skipna: bool, mask: npt.NDArray[np.bool_] | None

222) -> npt.NDArray[np.bool_] | None:

223 """

224 Compute a mask if and only if necessary.

225

226 This function will compute a mask iff it is necessary. Otherwise,

227 return the provided mask (potentially None) when a mask does not need to be

228 computed.

229

230 A mask is never necessary if the values array is of boolean or integer

231 dtypes, as these are incapable of storing NaNs. If passing a NaN-capable

232 dtype that is interpretable as either boolean or integer data (eg,

233 timedelta64), a mask must be provided.

234

235 If the skipna parameter is False, a new mask will not be computed.

236

237 The mask is computed using isna() by default. Setting invert=True selects

238 notna() as the masking function.

239

240 Parameters

241 ----------

242 values : ndarray

243 input array to potentially compute mask for

244 skipna : bool

245 boolean for whether NaNs should be skipped

246 mask : Optional[ndarray]

247 nan-mask if known

248

249 Returns

250 -------

251 Optional[np.ndarray[bool]]

252 """

253 if mask is None:

254 if is_bool_dtype(values.dtype) or is_integer_dtype(values.dtype):

255 # Boolean data cannot contain nulls, so signal via mask being None

256 return None

257

258 if skipna or needs_i8_conversion(values.dtype):

259 mask = isna(values)

260

261 return mask

262

263

264def _get_values(

265 values: np.ndarray,

266 skipna: bool,

267 fill_value: Any = None,

268 fill_value_typ: str | None = None,

269 mask: npt.NDArray[np.bool_] | None = None,

270) -> tuple[np.ndarray, npt.NDArray[np.bool_] | None, np.dtype, np.dtype, Any]:

271 """

272 Utility to get the values view, mask, dtype, dtype_max, and fill_value.

273

274 If both mask and fill_value/fill_value_typ are not None and skipna is True,

275 the values array will be copied.

276

277 For input arrays of boolean or integer dtypes, copies will only occur if a

278 precomputed mask, a fill_value/fill_value_typ, and skipna=True are

279 provided.

280

281 Parameters

282 ----------

283 values : ndarray

284 input array to potentially compute mask for

285 skipna : bool

286 boolean for whether NaNs should be skipped

287 fill_value : Any

288 value to fill NaNs with

289 fill_value_typ : str

290 Set to '+inf' or '-inf' to handle dtype-specific infinities

291 mask : Optional[np.ndarray[bool]]

292 nan-mask if known

293

294 Returns

295 -------

296 values : ndarray

297 Potential copy of input value array

298 mask : Optional[ndarray[bool]]

299 Mask for values, if deemed necessary to compute

300 dtype : np.dtype

301 dtype for values

302 dtype_max : np.dtype

303 platform independent dtype

304 fill_value : Any

305 fill value used

306 """

307 # In _get_values is only called from within nanops, and in all cases

308 # with scalar fill_value. This guarantee is important for the

309 # np.where call below

310 assert is_scalar(fill_value)

311 # error: Incompatible types in assignment (expression has type "Union[Any,

312 # Union[ExtensionArray, ndarray]]", variable has type "ndarray")

313 values = extract_array(values, extract_numpy=True) # type: ignore[assignment]

314

315 mask = _maybe_get_mask(values, skipna, mask)

316

317 dtype = values.dtype

318

319 datetimelike = False

320 if needs_i8_conversion(values.dtype):

321 # changing timedelta64/datetime64 to int64 needs to happen after

322 # finding `mask` above

323 values = np.asarray(values.view("i8"))

324 datetimelike = True

325

326 dtype_ok = _na_ok_dtype(dtype)

327

328 # get our fill value (in case we need to provide an alternative

329 # dtype for it)

330 fill_value = _get_fill_value(

331 dtype, fill_value=fill_value, fill_value_typ=fill_value_typ

332 )

333

334 if skipna and (mask is not None) and (fill_value is not None):

335 if mask.any():

336 if dtype_ok or datetimelike:

337 values = values.copy()

338 np.putmask(values, mask, fill_value)

339 else:

340 # np.where will promote if needed

341 values = np.where(~mask, values, fill_value)

342

343 # return a platform independent precision dtype

344 dtype_max = dtype

345 if is_integer_dtype(dtype) or is_bool_dtype(dtype):

346 dtype_max = np.dtype(np.int64)

347 elif is_float_dtype(dtype):

348 dtype_max = np.dtype(np.float64)

349

350 return values, mask, dtype, dtype_max, fill_value

351

352

353def _na_ok_dtype(dtype: DtypeObj) -> bool:

354 if needs_i8_conversion(dtype):

355 return False

356 return not issubclass(dtype.type, np.integer)

357

358

359def _wrap_results(result, dtype: np.dtype, fill_value=None):

360 """wrap our results if needed"""

361 if result is NaT:

362 pass

363

364 elif is_datetime64_any_dtype(dtype):

365 if fill_value is None:

366 # GH#24293

367 fill_value = iNaT

368 if not isinstance(result, np.ndarray):

369 assert not isna(fill_value), "Expected non-null fill_value"

370 if result == fill_value:

371 result = np.nan

372

373 if isna(result):

374 result = np.datetime64("NaT", "ns").astype(dtype)

375 else:

376 result = np.int64(result).view(dtype)

377 # retain original unit

378 result = result.astype(dtype, copy=False)

379 else:

380 # If we have float dtype, taking a view will give the wrong result

381 result = result.astype(dtype)

382 elif is_timedelta64_dtype(dtype):

383 if not isinstance(result, np.ndarray):

384 if result == fill_value or np.isnan(result):

385 result = np.timedelta64("NaT").astype(dtype)

386

387 elif np.fabs(result) > lib.i8max:

388 # raise if we have a timedelta64[ns] which is too large

389 raise ValueError("overflow in timedelta operation")

390 else:

391 # return a timedelta64 with the original unit

392 result = np.int64(result).astype(dtype, copy=False)

393

394 else:

395 result = result.astype("m8[ns]").view(dtype)

396

397 return result

398

399

400def _datetimelike_compat(func: F) -> F:

401 """

402 If we have datetime64 or timedelta64 values, ensure we have a correct

403 mask before calling the wrapped function, then cast back afterwards.

404 """

405

406 @functools.wraps(func)

407 def new_func(

408 values: np.ndarray,

409 *,

410 axis: AxisInt | None = None,

411 skipna: bool = True,

412 mask: npt.NDArray[np.bool_] | None = None,

413 **kwargs,

414 ):

415 orig_values = values

416

417 datetimelike = values.dtype.kind in ["m", "M"]

418 if datetimelike and mask is None:

419 mask = isna(values)

420

421 result = func(values, axis=axis, skipna=skipna, mask=mask, **kwargs)

422

423 if datetimelike:

424 result = _wrap_results(result, orig_values.dtype, fill_value=iNaT)

425 if not skipna:

426 assert mask is not None # checked above

427 result = _mask_datetimelike_result(result, axis, mask, orig_values)

428

429 return result

430

431 return cast(F, new_func)

432

433

434def _na_for_min_count(values: np.ndarray, axis: AxisInt | None) -> Scalar | np.ndarray:

435 """

436 Return the missing value for `values`.

437

438 Parameters

439 ----------

440 values : ndarray

441 axis : int or None

442 axis for the reduction, required if values.ndim > 1.

443

444 Returns

445 -------

446 result : scalar or ndarray

447 For 1-D values, returns a scalar of the correct missing type.

448 For 2-D values, returns a 1-D array where each element is missing.

449 """

450 # we either return np.nan or pd.NaT

451 if is_numeric_dtype(values):

452 values = values.astype("float64")

453 fill_value = na_value_for_dtype(values.dtype)

454

455 if values.ndim == 1:

456 return fill_value

457 elif axis is None:

458 return fill_value

459 else:

460 result_shape = values.shape[:axis] + values.shape[axis + 1 :]

461

462 return np.full(result_shape, fill_value, dtype=values.dtype)

463

464

465def maybe_operate_rowwise(func: F) -> F:

466 """

467 NumPy operations on C-contiguous ndarrays with axis=1 can be

468 very slow if axis 1 >> axis 0.

469 Operate row-by-row and concatenate the results.

470 """

471

472 @functools.wraps(func)

473 def newfunc(values: np.ndarray, *, axis: AxisInt | None = None, **kwargs):

474 if (

475 axis == 1

476 and values.ndim == 2

477 and values.flags["C_CONTIGUOUS"]

478 # only takes this path for wide arrays (long dataframes), for threshold see

479 # https://github.com/pandas-dev/pandas/pull/43311#issuecomment-974891737

480 and (values.shape[1] / 1000) > values.shape[0]

481 and values.dtype != object

482 and values.dtype != bool

483 ):

484 arrs = list(values)

485 if kwargs.get("mask") is not None:

486 mask = kwargs.pop("mask")

487 results = [

488 func(arrs[i], mask=mask[i], **kwargs) for i in range(len(arrs))

489 ]

490 else:

491 results = [func(x, **kwargs) for x in arrs]

492 return np.array(results)

493

494 return func(values, axis=axis, **kwargs)

495

496 return cast(F, newfunc)

497

498

499def nanany(

500 values: np.ndarray,

501 *,

502 axis: AxisInt | None = None,

503 skipna: bool = True,

504 mask: npt.NDArray[np.bool_] | None = None,

505) -> bool:

506 """

507 Check if any elements along an axis evaluate to True.

508

509 Parameters

510 ----------

511 values : ndarray

512 axis : int, optional

513 skipna : bool, default True

514 mask : ndarray[bool], optional

515 nan-mask if known

516

517 Returns

518 -------

519 result : bool

520

521 Examples

522 --------

523 >>> from pandas.core import nanops

524 >>> s = pd.Series([1, 2])

525 >>> nanops.nanany(s)

526 True

527

528 >>> from pandas.core import nanops

529 >>> s = pd.Series([np.nan])

530 >>> nanops.nanany(s)

531 False

532 """

533 if needs_i8_conversion(values.dtype) and values.dtype.kind != "m":

534 # GH#34479

535 warnings.warn(

536 "'any' with datetime64 dtypes is deprecated and will raise in a "

537 "future version. Use (obj != pd.Timestamp(0)).any() instead.",

538 FutureWarning,

539 stacklevel=find_stack_level(),

540 )

541

542 values, _, _, _, _ = _get_values(values, skipna, fill_value=False, mask=mask)

543

544 # For object type, any won't necessarily return

545 # boolean values (numpy/numpy#4352)

546 if is_object_dtype(values):

547 values = values.astype(bool)

548

549 # error: Incompatible return value type (got "Union[bool_, ndarray]", expected

550 # "bool")

551 return values.any(axis) # type: ignore[return-value]

552

553

554def nanall(

555 values: np.ndarray,

556 *,

557 axis: AxisInt | None = None,

558 skipna: bool = True,

559 mask: npt.NDArray[np.bool_] | None = None,

560) -> bool:

561 """

562 Check if all elements along an axis evaluate to True.

563

564 Parameters

565 ----------

566 values : ndarray

567 axis : int, optional

568 skipna : bool, default True

569 mask : ndarray[bool], optional

570 nan-mask if known

571

572 Returns

573 -------

574 result : bool

575

576 Examples

577 --------

578 >>> from pandas.core import nanops

579 >>> s = pd.Series([1, 2, np.nan])

580 >>> nanops.nanall(s)

581 True

582

583 >>> from pandas.core import nanops

584 >>> s = pd.Series([1, 0])

585 >>> nanops.nanall(s)

586 False

587 """

588 if needs_i8_conversion(values.dtype) and values.dtype.kind != "m":

589 # GH#34479

590 warnings.warn(

591 "'all' with datetime64 dtypes is deprecated and will raise in a "

592 "future version. Use (obj != pd.Timestamp(0)).all() instead.",

593 FutureWarning,

594 stacklevel=find_stack_level(),

595 )

596

597 values, _, _, _, _ = _get_values(values, skipna, fill_value=True, mask=mask)

598

599 # For object type, all won't necessarily return

600 # boolean values (numpy/numpy#4352)

601 if is_object_dtype(values):

602 values = values.astype(bool)

603

604 # error: Incompatible return value type (got "Union[bool_, ndarray]", expected

605 # "bool")

606 return values.all(axis) # type: ignore[return-value]

607

608

609@disallow("M8")

610@_datetimelike_compat

611@maybe_operate_rowwise

612def nansum(

613 values: np.ndarray,

614 *,

615 axis: AxisInt | None = None,

616 skipna: bool = True,

617 min_count: int = 0,

618 mask: npt.NDArray[np.bool_] | None = None,

619) -> float:

620 """

621 Sum the elements along an axis ignoring NaNs

622

623 Parameters

624 ----------

625 values : ndarray[dtype]

626 axis : int, optional

627 skipna : bool, default True

628 min_count: int, default 0

629 mask : ndarray[bool], optional

630 nan-mask if known

631

632 Returns

633 -------

634 result : dtype

635

636 Examples

637 --------

638 >>> from pandas.core import nanops

639 >>> s = pd.Series([1, 2, np.nan])

640 >>> nanops.nansum(s)

641 3.0

642 """

643 values, mask, dtype, dtype_max, _ = _get_values(

644 values, skipna, fill_value=0, mask=mask

645 )

646 dtype_sum = dtype_max

647 if is_float_dtype(dtype):

648 dtype_sum = dtype

649 elif is_timedelta64_dtype(dtype):

650 dtype_sum = np.dtype(np.float64)

651

652 the_sum = values.sum(axis, dtype=dtype_sum)

653 the_sum = _maybe_null_out(the_sum, axis, mask, values.shape, min_count=min_count)

654

655 return the_sum

656

657

658def _mask_datetimelike_result(

659 result: np.ndarray | np.datetime64 | np.timedelta64,

660 axis: AxisInt | None,

661 mask: npt.NDArray[np.bool_],

662 orig_values: np.ndarray,

663) -> np.ndarray | np.datetime64 | np.timedelta64 | NaTType:

664 if isinstance(result, np.ndarray):

665 # we need to apply the mask

666 result = result.astype("i8").view(orig_values.dtype)

667 axis_mask = mask.any(axis=axis)

668 # error: Unsupported target for indexed assignment ("Union[ndarray[Any, Any],

669 # datetime64, timedelta64]")

670 result[axis_mask] = iNaT # type: ignore[index]

671 else:

672 if mask.any():

673 return np.int64(iNaT).view(orig_values.dtype)

674 return result

675

676

677@disallow(PeriodDtype)

678@bottleneck_switch()

679@_datetimelike_compat

680def nanmean(

681 values: np.ndarray,

682 *,

683 axis: AxisInt | None = None,

684 skipna: bool = True,

685 mask: npt.NDArray[np.bool_] | None = None,

686) -> float:

687 """

688 Compute the mean of the element along an axis ignoring NaNs

689

690 Parameters

691 ----------

692 values : ndarray

693 axis : int, optional

694 skipna : bool, default True

695 mask : ndarray[bool], optional

696 nan-mask if known

697

698 Returns

699 -------

700 float

701 Unless input is a float array, in which case use the same

702 precision as the input array.

703

704 Examples

705 --------

706 >>> from pandas.core import nanops

707 >>> s = pd.Series([1, 2, np.nan])

708 >>> nanops.nanmean(s)

709 1.5

710 """

711 values, mask, dtype, dtype_max, _ = _get_values(

712 values, skipna, fill_value=0, mask=mask

713 )

714 dtype_sum = dtype_max

715 dtype_count = np.dtype(np.float64)

716

717 # not using needs_i8_conversion because that includes period

718 if dtype.kind in ["m", "M"]:

719 dtype_sum = np.dtype(np.float64)

720 elif is_integer_dtype(dtype):

721 dtype_sum = np.dtype(np.float64)

722 elif is_float_dtype(dtype):

723 dtype_sum = dtype

724 dtype_count = dtype

725

726 count = _get_counts(values.shape, mask, axis, dtype=dtype_count)

727 the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum))

728

729 if axis is not None and getattr(the_sum, "ndim", False):

730 count = cast(np.ndarray, count)

731 with np.errstate(all="ignore"):

732 # suppress division by zero warnings

733 the_mean = the_sum / count

734 ct_mask = count == 0

735 if ct_mask.any():

736 the_mean[ct_mask] = np.nan

737 else:

738 the_mean = the_sum / count if count > 0 else np.nan

739

740 return the_mean

741

742

743@bottleneck_switch()

744def nanmedian(values, *, axis: AxisInt | None = None, skipna: bool = True, mask=None):

745 """

746 Parameters

747 ----------

748 values : ndarray

749 axis : int, optional

750 skipna : bool, default True

751 mask : ndarray[bool], optional

752 nan-mask if known

753

754 Returns

755 -------

756 result : float

757 Unless input is a float array, in which case use the same

758 precision as the input array.

759

760 Examples

761 --------

762 >>> from pandas.core import nanops

763 >>> s = pd.Series([1, np.nan, 2, 2])

764 >>> nanops.nanmedian(s)

765 2.0

766 """

767

768 def get_median(x, _mask=None):

769 if _mask is None:

770 _mask = notna(x)

771 else:

772 _mask = ~_mask

773 if not skipna and not _mask.all():

774 return np.nan

775 with warnings.catch_warnings():

776 # Suppress RuntimeWarning about All-NaN slice

777 warnings.filterwarnings(

778 "ignore", "All-NaN slice encountered", RuntimeWarning

779 )

780 res = np.nanmedian(x[_mask])

781 return res

782

783 values, mask, dtype, _, _ = _get_values(values, skipna, mask=mask, fill_value=0)

784 if not is_float_dtype(values.dtype):

785 try:

786 values = values.astype("f8")

787 except ValueError as err:

788 # e.g. "could not convert string to float: 'a'"

789 raise TypeError(str(err)) from err

790 if mask is not None:

791 values[mask] = np.nan

792

793 notempty = values.size

794

795 # an array from a frame

796 if values.ndim > 1 and axis is not None:

797 # there's a non-empty array to apply over otherwise numpy raises

798 if notempty:

799 if not skipna:

800 res = np.apply_along_axis(get_median, axis, values)

801

802 else:

803 # fastpath for the skipna case

804 with warnings.catch_warnings():

805 # Suppress RuntimeWarning about All-NaN slice

806 warnings.filterwarnings(

807 "ignore", "All-NaN slice encountered", RuntimeWarning

808 )

809 res = np.nanmedian(values, axis)

810

811 else:

812 # must return the correct shape, but median is not defined for the

813 # empty set so return nans of shape "everything but the passed axis"

814 # since "axis" is where the reduction would occur if we had a nonempty

815 # array

816 res = get_empty_reduction_result(values.shape, axis, np.float_, np.nan)

817

818 else:

819 # otherwise return a scalar value

820 res = get_median(values, mask) if notempty else np.nan

821 return _wrap_results(res, dtype)

822

823

824def get_empty_reduction_result(

825 shape: tuple[int, ...],

826 axis: AxisInt,

827 dtype: np.dtype | type[np.floating],

828 fill_value: Any,

829) -> np.ndarray:

830 """

831 The result from a reduction on an empty ndarray.

832

833 Parameters

834 ----------

835 shape : Tuple[int]

836 axis : int

837 dtype : np.dtype

838 fill_value : Any

839

840 Returns

841 -------

842 np.ndarray

843 """

844 shp = np.array(shape)

845 dims = np.arange(len(shape))

846 ret = np.empty(shp[dims != axis], dtype=dtype)

847 ret.fill(fill_value)

848 return ret

849

850

851def _get_counts_nanvar(

852 values_shape: Shape,

853 mask: npt.NDArray[np.bool_] | None,

854 axis: AxisInt | None,

855 ddof: int,

856 dtype: np.dtype = np.dtype(np.float64),

857) -> tuple[float | np.ndarray, float | np.ndarray]:

858 """

859 Get the count of non-null values along an axis, accounting

860 for degrees of freedom.

861

862 Parameters

863 ----------

864 values_shape : Tuple[int, ...]

865 shape tuple from values ndarray, used if mask is None

866 mask : Optional[ndarray[bool]]

867 locations in values that should be considered missing

868 axis : Optional[int]

869 axis to count along

870 ddof : int

871 degrees of freedom

872 dtype : type, optional

873 type to use for count

874

875 Returns

876 -------

877 count : int, np.nan or np.ndarray

878 d : int, np.nan or np.ndarray

879 """

880 count = _get_counts(values_shape, mask, axis, dtype=dtype)

881 d = count - dtype.type(ddof)

882

883 # always return NaN, never inf

884 if is_scalar(count):

885 if count <= ddof:

886 count = np.nan

887 d = np.nan

888 else:

889 # count is not narrowed by is_scalar check

890 count = cast(np.ndarray, count)

891 mask = count <= ddof

892 if mask.any():

893 np.putmask(d, mask, np.nan)

894 np.putmask(count, mask, np.nan)

895 return count, d

896

897

898@bottleneck_switch(ddof=1)

899def nanstd(

900 values,

901 *,

902 axis: AxisInt | None = None,

903 skipna: bool = True,

904 ddof: int = 1,

905 mask=None,

906):

907 """

908 Compute the standard deviation along given axis while ignoring NaNs

909

910 Parameters

911 ----------

912 values : ndarray

913 axis : int, optional

914 skipna : bool, default True

915 ddof : int, default 1

916 Delta Degrees of Freedom. The divisor used in calculations is N - ddof,

917 where N represents the number of elements.

918 mask : ndarray[bool], optional

919 nan-mask if known

920

921 Returns

922 -------

923 result : float

924 Unless input is a float array, in which case use the same

925 precision as the input array.

926

927 Examples

928 --------

929 >>> from pandas.core import nanops

930 >>> s = pd.Series([1, np.nan, 2, 3])

931 >>> nanops.nanstd(s)

932 1.0

933 """

934 if values.dtype == "M8[ns]":

935 values = values.view("m8[ns]")

936

937 orig_dtype = values.dtype

938 values, mask, _, _, _ = _get_values(values, skipna, mask=mask)

939

940 result = np.sqrt(nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask))

941 return _wrap_results(result, orig_dtype)

942

943

944@disallow("M8", "m8")

945@bottleneck_switch(ddof=1)

946def nanvar(

947 values,

948 *,

949 axis: AxisInt | None = None,

950 skipna: bool = True,

951 ddof: int = 1,

952 mask=None,

953):

954 """

955 Compute the variance along given axis while ignoring NaNs

956

957 Parameters

958 ----------

959 values : ndarray

960 axis : int, optional

961 skipna : bool, default True

962 ddof : int, default 1

963 Delta Degrees of Freedom. The divisor used in calculations is N - ddof,

964 where N represents the number of elements.

965 mask : ndarray[bool], optional

966 nan-mask if known

967

968 Returns

969 -------

970 result : float

971 Unless input is a float array, in which case use the same

972 precision as the input array.

973

974 Examples

975 --------

976 >>> from pandas.core import nanops

977 >>> s = pd.Series([1, np.nan, 2, 3])

978 >>> nanops.nanvar(s)

979 1.0

980 """

981 values = extract_array(values, extract_numpy=True)

982 dtype = values.dtype

983 mask = _maybe_get_mask(values, skipna, mask)

984 if is_any_int_dtype(dtype):

985 values = values.astype("f8")

986 if mask is not None:

987 values[mask] = np.nan

988

989 if is_float_dtype(values.dtype):

990 count, d = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype)

991 else:

992 count, d = _get_counts_nanvar(values.shape, mask, axis, ddof)

993

994 if skipna and mask is not None:

995 values = values.copy()

996 np.putmask(values, mask, 0)

997

998 # xref GH10242

999 # Compute variance via two-pass algorithm, which is stable against

1000 # cancellation errors and relatively accurate for small numbers of

1001 # observations.

1002 #

1003 # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance

1004 avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count

1005 if axis is not None:

1006 avg = np.expand_dims(avg, axis)

1007 sqr = _ensure_numeric((avg - values) ** 2)

1008 if mask is not None:

1009 np.putmask(sqr, mask, 0)

1010 result = sqr.sum(axis=axis, dtype=np.float64) / d

1011

1012 # Return variance as np.float64 (the datatype used in the accumulator),

1013 # unless we were dealing with a float array, in which case use the same

1014 # precision as the original values array.

1015 if is_float_dtype(dtype):

1016 result = result.astype(dtype, copy=False)

1017 return result

1018

1019

1020@disallow("M8", "m8")

1021def nansem(

1022 values: np.ndarray,

1023 *,

1024 axis: AxisInt | None = None,

1025 skipna: bool = True,

1026 ddof: int = 1,

1027 mask: npt.NDArray[np.bool_] | None = None,

1028) -> float:

1029 """

1030 Compute the standard error in the mean along given axis while ignoring NaNs

1031

1032 Parameters

1033 ----------

1034 values : ndarray

1035 axis : int, optional

1036 skipna : bool, default True

1037 ddof : int, default 1

1038 Delta Degrees of Freedom. The divisor used in calculations is N - ddof,

1039 where N represents the number of elements.

1040 mask : ndarray[bool], optional

1041 nan-mask if known

1042

1043 Returns

1044 -------

1045 result : float64

1046 Unless input is a float array, in which case use the same

1047 precision as the input array.

1048

1049 Examples

1050 --------

1051 >>> from pandas.core import nanops

1052 >>> s = pd.Series([1, np.nan, 2, 3])

1053 >>> nanops.nansem(s)

1054 0.5773502691896258

1055 """

1056 # This checks if non-numeric-like data is passed with numeric_only=False

1057 # and raises a TypeError otherwise

1058 nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask)

1059

1060 mask = _maybe_get_mask(values, skipna, mask)

1061 if not is_float_dtype(values.dtype):

1062 values = values.astype("f8")

1063

1064 if not skipna and mask is not None and mask.any():

1065 return np.nan

1066

1067 count, _ = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype)

1068 var = nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask)

1069

1070 return np.sqrt(var) / np.sqrt(count)

1071

1072

1073def _nanminmax(meth, fill_value_typ):

1074 @bottleneck_switch(name=f"nan{meth}")

1075 @_datetimelike_compat

1076 def reduction(

1077 values: np.ndarray,

1078 *,

1079 axis: AxisInt | None = None,

1080 skipna: bool = True,

1081 mask: npt.NDArray[np.bool_] | None = None,

1082 ) -> Dtype:

1083 values, mask, dtype, dtype_max, fill_value = _get_values(

1084 values, skipna, fill_value_typ=fill_value_typ, mask=mask

1085 )

1086

1087 if (axis is not None and values.shape[axis] == 0) or values.size == 0:

1088 try:

1089 result = getattr(values, meth)(axis, dtype=dtype_max)

1090 result.fill(np.nan)

1091 except (AttributeError, TypeError, ValueError):

1092 result = np.nan

1093 else:

1094 result = getattr(values, meth)(axis)

1095

1096 result = _maybe_null_out(result, axis, mask, values.shape)

1097 return result

1098

1099 return reduction

1100

1101

1102nanmin = _nanminmax("min", fill_value_typ="+inf")

1103nanmax = _nanminmax("max", fill_value_typ="-inf")

1104

1105

1106@disallow("O")

1107def nanargmax(

1108 values: np.ndarray,

1109 *,

1110 axis: AxisInt | None = None,

1111 skipna: bool = True,

1112 mask: npt.NDArray[np.bool_] | None = None,

1113) -> int | np.ndarray:

1114 """

1115 Parameters

1116 ----------

1117 values : ndarray

1118 axis : int, optional

1119 skipna : bool, default True

1120 mask : ndarray[bool], optional

1121 nan-mask if known

1122

1123 Returns

1124 -------

1125 result : int or ndarray[int]

1126 The index/indices of max value in specified axis or -1 in the NA case

1127

1128 Examples

1129 --------

1130 >>> from pandas.core import nanops

1131 >>> arr = np.array([1, 2, 3, np.nan, 4])

1132 >>> nanops.nanargmax(arr)

1133 4

1134

1135 >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3)

1136 >>> arr[2:, 2] = np.nan

1137 >>> arr

1138 array([[ 0., 1., 2.],

1139 [ 3., 4., 5.],

1140 [ 6., 7., nan],

1141 [ 9., 10., nan]])

1142 >>> nanops.nanargmax(arr, axis=1)

1143 array([2, 2, 1, 1])

1144 """

1145 values, mask, _, _, _ = _get_values(values, True, fill_value_typ="-inf", mask=mask)

1146 # error: Need type annotation for 'result'

1147 result = values.argmax(axis) # type: ignore[var-annotated]

1148 result = _maybe_arg_null_out(result, axis, mask, skipna)

1149 return result

1150

1151

1152@disallow("O")

1153def nanargmin(

1154 values: np.ndarray,

1155 *,

1156 axis: AxisInt | None = None,

1157 skipna: bool = True,

1158 mask: npt.NDArray[np.bool_] | None = None,

1159) -> int | np.ndarray:

1160 """

1161 Parameters

1162 ----------

1163 values : ndarray

1164 axis : int, optional

1165 skipna : bool, default True

1166 mask : ndarray[bool], optional

1167 nan-mask if known

1168

1169 Returns

1170 -------

1171 result : int or ndarray[int]

1172 The index/indices of min value in specified axis or -1 in the NA case

1173

1174 Examples

1175 --------

1176 >>> from pandas.core import nanops

1177 >>> arr = np.array([1, 2, 3, np.nan, 4])

1178 >>> nanops.nanargmin(arr)

1179 0

1180

1181 >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3)

1182 >>> arr[2:, 0] = np.nan

1183 >>> arr

1184 array([[ 0., 1., 2.],

1185 [ 3., 4., 5.],

1186 [nan, 7., 8.],

1187 [nan, 10., 11.]])

1188 >>> nanops.nanargmin(arr, axis=1)

1189 array([0, 0, 1, 1])

1190 """

1191 values, mask, _, _, _ = _get_values(values, True, fill_value_typ="+inf", mask=mask)

1192 # error: Need type annotation for 'result'

1193 result = values.argmin(axis) # type: ignore[var-annotated]

1194 result = _maybe_arg_null_out(result, axis, mask, skipna)

1195 return result

1196

1197

1198@disallow("M8", "m8")

1199@maybe_operate_rowwise

1200def nanskew(

1201 values: np.ndarray,

1202 *,

1203 axis: AxisInt | None = None,

1204 skipna: bool = True,

1205 mask: npt.NDArray[np.bool_] | None = None,

1206) -> float:

1207 """

1208 Compute the sample skewness.

1209

1210 The statistic computed here is the adjusted Fisher-Pearson standardized

1211 moment coefficient G1. The algorithm computes this coefficient directly

1212 from the second and third central moment.

1213

1214 Parameters

1215 ----------

1216 values : ndarray

1217 axis : int, optional

1218 skipna : bool, default True

1219 mask : ndarray[bool], optional

1220 nan-mask if known

1221

1222 Returns

1223 -------

1224 result : float64

1225 Unless input is a float array, in which case use the same

1226 precision as the input array.

1227

1228 Examples

1229 --------

1230 >>> from pandas.core import nanops

1231 >>> s = pd.Series([1, np.nan, 1, 2])

1232 >>> nanops.nanskew(s)

1233 1.7320508075688787

1234 """

1235 # error: Incompatible types in assignment (expression has type "Union[Any,

1236 # Union[ExtensionArray, ndarray]]", variable has type "ndarray")

1237 values = extract_array(values, extract_numpy=True) # type: ignore[assignment]

1238 mask = _maybe_get_mask(values, skipna, mask)

1239 if not is_float_dtype(values.dtype):

1240 values = values.astype("f8")

1241 count = _get_counts(values.shape, mask, axis)

1242 else:

1243 count = _get_counts(values.shape, mask, axis, dtype=values.dtype)

1244

1245 if skipna and mask is not None:

1246 values = values.copy()

1247 np.putmask(values, mask, 0)

1248 elif not skipna and mask is not None and mask.any():

1249 return np.nan

1250

1251 mean = values.sum(axis, dtype=np.float64) / count

1252 if axis is not None:

1253 mean = np.expand_dims(mean, axis)

1254

1255 adjusted = values - mean

1256 if skipna and mask is not None:

1257 np.putmask(adjusted, mask, 0)

1258 adjusted2 = adjusted**2

1259 adjusted3 = adjusted2 * adjusted

1260 m2 = adjusted2.sum(axis, dtype=np.float64)

1261 m3 = adjusted3.sum(axis, dtype=np.float64)

1262

1263 # floating point error

1264 #

1265 # #18044 in _libs/windows.pyx calc_skew follow this behavior

1266 # to fix the fperr to treat m2 <1e-14 as zero

1267 m2 = _zero_out_fperr(m2)

1268 m3 = _zero_out_fperr(m3)

1269

1270 with np.errstate(invalid="ignore", divide="ignore"):

1271 result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2**1.5)

1272

1273 dtype = values.dtype

1274 if is_float_dtype(dtype):

1275 result = result.astype(dtype, copy=False)

1276

1277 if isinstance(result, np.ndarray):

1278 result = np.where(m2 == 0, 0, result)

1279 result[count < 3] = np.nan

1280 else:

1281 result = 0 if m2 == 0 else result

1282 if count < 3:

1283 return np.nan

1284

1285 return result

1286

1287

1288@disallow("M8", "m8")

1289@maybe_operate_rowwise

1290def nankurt(

1291 values: np.ndarray,

1292 *,

1293 axis: AxisInt | None = None,

1294 skipna: bool = True,

1295 mask: npt.NDArray[np.bool_] | None = None,

1296) -> float:

1297 """

1298 Compute the sample excess kurtosis

1299

1300 The statistic computed here is the adjusted Fisher-Pearson standardized

1301 moment coefficient G2, computed directly from the second and fourth

1302 central moment.

1303

1304 Parameters

1305 ----------

1306 values : ndarray

1307 axis : int, optional

1308 skipna : bool, default True

1309 mask : ndarray[bool], optional

1310 nan-mask if known

1311

1312 Returns

1313 -------

1314 result : float64

1315 Unless input is a float array, in which case use the same

1316 precision as the input array.

1317

1318 Examples

1319 --------

1320 >>> from pandas.core import nanops

1321 >>> s = pd.Series([1, np.nan, 1, 3, 2])

1322 >>> nanops.nankurt(s)

1323 -1.2892561983471076

1324 """

1325 # error: Incompatible types in assignment (expression has type "Union[Any,

1326 # Union[ExtensionArray, ndarray]]", variable has type "ndarray")

1327 values = extract_array(values, extract_numpy=True) # type: ignore[assignment]

1328 mask = _maybe_get_mask(values, skipna, mask)

1329 if not is_float_dtype(values.dtype):

1330 values = values.astype("f8")

1331 count = _get_counts(values.shape, mask, axis)

1332 else:

1333 count = _get_counts(values.shape, mask, axis, dtype=values.dtype)

1334

1335 if skipna and mask is not None:

1336 values = values.copy()

1337 np.putmask(values, mask, 0)

1338 elif not skipna and mask is not None and mask.any():

1339 return np.nan

1340

1341 mean = values.sum(axis, dtype=np.float64) / count

1342 if axis is not None:

1343 mean = np.expand_dims(mean, axis)

1344

1345 adjusted = values - mean

1346 if skipna and mask is not None:

1347 np.putmask(adjusted, mask, 0)

1348 adjusted2 = adjusted**2

1349 adjusted4 = adjusted2**2

1350 m2 = adjusted2.sum(axis, dtype=np.float64)

1351 m4 = adjusted4.sum(axis, dtype=np.float64)

1352

1353 with np.errstate(invalid="ignore", divide="ignore"):

1354 adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3))

1355 numerator = count * (count + 1) * (count - 1) * m4

1356 denominator = (count - 2) * (count - 3) * m2**2

1357

1358 # floating point error

1359 #

1360 # #18044 in _libs/windows.pyx calc_kurt follow this behavior

1361 # to fix the fperr to treat denom <1e-14 as zero

1362 numerator = _zero_out_fperr(numerator)

1363 denominator = _zero_out_fperr(denominator)

1364

1365 if not isinstance(denominator, np.ndarray):

1366 # if ``denom`` is a scalar, check these corner cases first before

1367 # doing division

1368 if count < 4:

1369 return np.nan

1370 if denominator == 0:

1371 return 0

1372

1373 with np.errstate(invalid="ignore", divide="ignore"):

1374 result = numerator / denominator - adj

1375

1376 dtype = values.dtype

1377 if is_float_dtype(dtype):

1378 result = result.astype(dtype, copy=False)

1379

1380 if isinstance(result, np.ndarray):

1381 result = np.where(denominator == 0, 0, result)

1382 result[count < 4] = np.nan

1383

1384 return result

1385

1386

1387@disallow("M8", "m8")

1388@maybe_operate_rowwise

1389def nanprod(

1390 values: np.ndarray,

1391 *,

1392 axis: AxisInt | None = None,

1393 skipna: bool = True,

1394 min_count: int = 0,

1395 mask: npt.NDArray[np.bool_] | None = None,

1396) -> float:

1397 """

1398 Parameters

1399 ----------

1400 values : ndarray[dtype]

1401 axis : int, optional

1402 skipna : bool, default True

1403 min_count: int, default 0

1404 mask : ndarray[bool], optional

1405 nan-mask if known

1406

1407 Returns

1408 -------

1409 Dtype

1410 The product of all elements on a given axis. ( NaNs are treated as 1)

1411

1412 Examples

1413 --------

1414 >>> from pandas.core import nanops

1415 >>> s = pd.Series([1, 2, 3, np.nan])

1416 >>> nanops.nanprod(s)

1417 6.0

1418 """

1419 mask = _maybe_get_mask(values, skipna, mask)

1420

1421 if skipna and mask is not None:

1422 values = values.copy()

1423 values[mask] = 1

1424 result = values.prod(axis)

1425 # error: Incompatible return value type (got "Union[ndarray, float]", expected

1426 # "float")

1427 return _maybe_null_out( # type: ignore[return-value]

1428 result, axis, mask, values.shape, min_count=min_count

1429 )

1430

1431

1432def _maybe_arg_null_out(

1433 result: np.ndarray,

1434 axis: AxisInt | None,

1435 mask: npt.NDArray[np.bool_] | None,

1436 skipna: bool,

1437) -> np.ndarray | int:

1438 # helper function for nanargmin/nanargmax

1439 if mask is None:

1440 return result

1441

1442 if axis is None or not getattr(result, "ndim", False):

1443 if skipna:

1444 if mask.all():

1445 return -1

1446 else:

1447 if mask.any():

1448 return -1

1449 else:

1450 if skipna:

1451 na_mask = mask.all(axis)

1452 else:

1453 na_mask = mask.any(axis)

1454 if na_mask.any():

1455 result[na_mask] = -1

1456 return result

1457

1458

1459def _get_counts(

1460 values_shape: Shape,

1461 mask: npt.NDArray[np.bool_] | None,

1462 axis: AxisInt | None,

1463 dtype: np.dtype = np.dtype(np.float64),

1464) -> float | np.ndarray:

1465 """

1466 Get the count of non-null values along an axis

1467

1468 Parameters

1469 ----------

1470 values_shape : tuple of int

1471 shape tuple from values ndarray, used if mask is None

1472 mask : Optional[ndarray[bool]]

1473 locations in values that should be considered missing

1474 axis : Optional[int]

1475 axis to count along

1476 dtype : type, optional

1477 type to use for count

1478

1479 Returns

1480 -------

1481 count : scalar or array

1482 """

1483 if axis is None:

1484 if mask is not None:

1485 n = mask.size - mask.sum()

1486 else:

1487 n = np.prod(values_shape)

1488 return dtype.type(n)

1489

1490 if mask is not None:

1491 count = mask.shape[axis] - mask.sum(axis)

1492 else:

1493 count = values_shape[axis]

1494

1495 if is_scalar(count):

1496 return dtype.type(count)

1497 return count.astype(dtype, copy=False)

1498

1499

1500def _maybe_null_out(

1501 result: np.ndarray | float | NaTType,

1502 axis: AxisInt | None,

1503 mask: npt.NDArray[np.bool_] | None,

1504 shape: tuple[int, ...],

1505 min_count: int = 1,

1506) -> np.ndarray | float | NaTType:

1507 """

1508 Returns

1509 -------

1510 Dtype

1511 The product of all elements on a given axis. ( NaNs are treated as 1)

1512 """

1513 if mask is None and min_count == 0:

1514 # nothing to check; short-circuit

1515 return result

1516

1517 if axis is not None and isinstance(result, np.ndarray):

1518 if mask is not None:

1519 null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0

1520 else:

1521 # we have no nulls, kept mask=None in _maybe_get_mask

1522 below_count = shape[axis] - min_count < 0

1523 new_shape = shape[:axis] + shape[axis + 1 :]

1524 null_mask = np.broadcast_to(below_count, new_shape)

1525

1526 if np.any(null_mask):

1527 if is_numeric_dtype(result):

1528 if np.iscomplexobj(result):

1529 result = result.astype("c16")

1530 elif not is_float_dtype(result):

1531 result = result.astype("f8", copy=False)

1532 result[null_mask] = np.nan

1533 else:

1534 # GH12941, use None to auto cast null

1535 result[null_mask] = None

1536 elif result is not NaT:

1537 if check_below_min_count(shape, mask, min_count):

1538 result_dtype = getattr(result, "dtype", None)

1539 if is_float_dtype(result_dtype):

1540 # error: Item "None" of "Optional[Any]" has no attribute "type"

1541 result = result_dtype.type("nan") # type: ignore[union-attr]

1542 else:

1543 result = np.nan

1544

1545 return result

1546

1547

1548def check_below_min_count(

1549 shape: tuple[int, ...], mask: npt.NDArray[np.bool_] | None, min_count: int

1550) -> bool:

1551 """

1552 Check for the `min_count` keyword. Returns True if below `min_count` (when

1553 missing value should be returned from the reduction).

1554

1555 Parameters

1556 ----------

1557 shape : tuple

1558 The shape of the values (`values.shape`).

1559 mask : ndarray[bool] or None

1560 Boolean numpy array (typically of same shape as `shape`) or None.

1561 min_count : int

1562 Keyword passed through from sum/prod call.

1563

1564 Returns

1565 -------

1566 bool

1567 """

1568 if min_count > 0:

1569 if mask is None:

1570 # no missing values, only check size

1571 non_nulls = np.prod(shape)

1572 else:

1573 non_nulls = mask.size - mask.sum()

1574 if non_nulls < min_count:

1575 return True

1576 return False

1577

1578

1579def _zero_out_fperr(arg):

1580 # #18044 reference this behavior to fix rolling skew/kurt issue

1581 if isinstance(arg, np.ndarray):

1582 with np.errstate(invalid="ignore"):

1583 return np.where(np.abs(arg) < 1e-14, 0, arg)

1584 else:

1585 return arg.dtype.type(0) if np.abs(arg) < 1e-14 else arg

1586

1587

1588@disallow("M8", "m8")

1589def nancorr(

1590 a: np.ndarray,

1591 b: np.ndarray,

1592 *,

1593 method: CorrelationMethod = "pearson",

1594 min_periods: int | None = None,

1595) -> float:

1596 """

1597 a, b: ndarrays

1598 """

1599 if len(a) != len(b):

1600 raise AssertionError("Operands to nancorr must have same size")

1601

1602 if min_periods is None:

1603 min_periods = 1

1604

1605 valid = notna(a) & notna(b)

1606 if not valid.all():

1607 a = a[valid]

1608 b = b[valid]

1609

1610 if len(a) < min_periods:

1611 return np.nan

1612

1613 f = get_corr_func(method)

1614 return f(a, b)

1615

1616

1617def get_corr_func(

1618 method: CorrelationMethod,

1619) -> Callable[[np.ndarray, np.ndarray], float]:

1620 if method == "kendall":

1621 from scipy.stats import kendalltau

1622

1623 def func(a, b):

1624 return kendalltau(a, b)[0]

1625

1626 return func

1627 elif method == "spearman":

1628 from scipy.stats import spearmanr

1629

1630 def func(a, b):

1631 return spearmanr(a, b)[0]

1632

1633 return func

1634 elif method == "pearson":

1635

1636 def func(a, b):

1637 return np.corrcoef(a, b)[0, 1]

1638

1639 return func

1640 elif callable(method):

1641 return method

1642

1643 raise ValueError(

1644 f"Unknown method '{method}', expected one of "

1645 "'kendall', 'spearman', 'pearson', or callable"

1646 )

1647

1648

1649@disallow("M8", "m8")

1650def nancov(

1651 a: np.ndarray,

1652 b: np.ndarray,

1653 *,

1654 min_periods: int | None = None,

1655 ddof: int | None = 1,

1656) -> float:

1657 if len(a) != len(b):

1658 raise AssertionError("Operands to nancov must have same size")

1659

1660 if min_periods is None:

1661 min_periods = 1

1662

1663 valid = notna(a) & notna(b)

1664 if not valid.all():

1665 a = a[valid]

1666 b = b[valid]

1667

1668 if len(a) < min_periods:

1669 return np.nan

1670

1671 return np.cov(a, b, ddof=ddof)[0, 1]

1672

1673

1674def _ensure_numeric(x):

1675 if isinstance(x, np.ndarray):

1676 if is_integer_dtype(x) or is_bool_dtype(x):

1677 x = x.astype(np.float64)

1678 elif is_object_dtype(x):

1679 try:

1680 x = x.astype(np.complex128)

1681 except (TypeError, ValueError):

1682 try:

1683 x = x.astype(np.float64)

1684 except ValueError as err:

1685 # GH#29941 we get here with object arrays containing strs

1686 raise TypeError(f"Could not convert {x} to numeric") from err

1687 else:

1688 if not np.any(np.imag(x)):

1689 x = x.real

1690 elif not (is_float(x) or is_integer(x) or is_complex(x)):

1691 try:

1692 x = float(x)

1693 except (TypeError, ValueError):

1694 # e.g. "1+1j" or "foo"

1695 try:

1696 x = complex(x)

1697 except ValueError as err:

1698 # e.g. "foo"

1699 raise TypeError(f"Could not convert {x} to numeric") from err

1700 return x

1701

1702

1703# NA-friendly array comparisons

1704

1705

1706def make_nancomp(op):

1707 def f(x, y):

1708 xmask = isna(x)

1709 ymask = isna(y)

1710 mask = xmask | ymask

1711

1712 with np.errstate(all="ignore"):

1713 result = op(x, y)

1714

1715 if mask.any():

1716 if is_bool_dtype(result):

1717 result = result.astype("O")

1718 np.putmask(result, mask, np.nan)

1720 return result

1722 return f

1725nangt = make_nancomp(operator.gt)

1726nange = make_nancomp(operator.ge)

1727nanlt = make_nancomp(operator.lt)

1728nanle = make_nancomp(operator.le)

1729naneq = make_nancomp(operator.eq)

1730nanne = make_nancomp(operator.ne)

1731

1732

1733def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike:

1734 """

1735 Cumulative function with skipna support.

1736

1737 Parameters

1738 ----------

1739 values : np.ndarray or ExtensionArray

1740 accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minimum.accumulate}

1741 skipna : bool

1742

1743 Returns

1744 -------

1745 np.ndarray or ExtensionArray

1746 """

1747 mask_a, mask_b = {

1748 np.cumprod: (1.0, np.nan),

1749 np.maximum.accumulate: (-np.inf, np.nan),

1750 np.cumsum: (0.0, np.nan),

1751 np.minimum.accumulate: (np.inf, np.nan),

1752 }[accum_func]

1753

1754 # This should go through ea interface

1755 assert values.dtype.kind not in ["m", "M"]

1756

1757 # We will be applying this function to block values

1758 if skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)):

1759 vals = values.copy()

1760 mask = isna(vals)

1761 vals[mask] = mask_a

1762 result = accum_func(vals, axis=0)

1763 result[mask] = mask_b

1764 else:

1765 result = accum_func(values, axis=0)

1766

1767 return result