Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/nanops.py: 18%

1from __future__ import annotations

3import functools

4import itertools

5from typing import (

6 Any,

7 Callable,

8 cast,

10import warnings

12import numpy as np

14from pandas._config import get_option

16from pandas._libs import (

17 NaT,

18 NaTType,

19 iNaT,

20 lib,

21)

22from pandas._typing import (

23 ArrayLike,

24 AxisInt,

25 CorrelationMethod,

26 Dtype,

27 DtypeObj,

28 F,

29 Scalar,

30 Shape,

31 npt,

32)

33from pandas.compat._optional import import_optional_dependency

34from pandas.util._exceptions import find_stack_level

36from pandas.core.dtypes.common import (

37 is_complex,

38 is_float,

39 is_float_dtype,

40 is_integer,

41 is_numeric_dtype,

42 is_object_dtype,

43 needs_i8_conversion,

44 pandas_dtype,

45)

46from pandas.core.dtypes.missing import (

47 isna,

48 na_value_for_dtype,

49 notna,

50)

52bn = import_optional_dependency("bottleneck", errors="warn")

53_BOTTLENECK_INSTALLED = bn is not None

54_USE_BOTTLENECK = False

57def set_use_bottleneck(v: bool = True) -> None:

58 # set/unset to use bottleneck

59 global _USE_BOTTLENECK

60 if _BOTTLENECK_INSTALLED:

61 _USE_BOTTLENECK = v

64set_use_bottleneck(get_option("compute.use_bottleneck"))

67class disallow:

68 def __init__(self, *dtypes: Dtype) -> None:

69 super().__init__()

70 self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes)

72 def check(self, obj) -> bool:

73 return hasattr(obj, "dtype") and issubclass(obj.dtype.type, self.dtypes)

75 def __call__(self, f: F) -> F:

76 @functools.wraps(f)

77 def _f(*args, **kwargs):

78 obj_iter = itertools.chain(args, kwargs.values())

79 if any(self.check(obj) for obj in obj_iter):

80 f_name = f.__name__.replace("nan", "")

81 raise TypeError(

82 f"reduction operation '{f_name}' not allowed for this dtype"

83 )

84 try:

85 return f(*args, **kwargs)

86 except ValueError as e:

87 # we want to transform an object array

88 # ValueError message to the more typical TypeError

89 # e.g. this is normally a disallowed function on

90 # object arrays that contain strings

91 if is_object_dtype(args[0]):

92 raise TypeError(e) from e

93 raise

95 return cast(F, _f)

98class bottleneck_switch:

99 def __init__(self, name=None, **kwargs) -> None:

100 self.name = name

101 self.kwargs = kwargs

102

103 def __call__(self, alt: F) -> F:

104 bn_name = self.name or alt.__name__

105

106 try:

107 bn_func = getattr(bn, bn_name)

108 except (AttributeError, NameError): # pragma: no cover

109 bn_func = None

110

111 @functools.wraps(alt)

112 def f(

113 values: np.ndarray,

114 *,

115 axis: AxisInt | None = None,

116 skipna: bool = True,

117 **kwds,

118 ):

119 if len(self.kwargs) > 0:

120 for k, v in self.kwargs.items():

121 if k not in kwds:

122 kwds[k] = v

123

124 if values.size == 0 and kwds.get("min_count") is None:

125 # We are empty, returning NA for our type

126 # Only applies for the default `min_count` of None

127 # since that affects how empty arrays are handled.

128 # TODO(GH-18976) update all the nanops methods to

129 # correctly handle empty inputs and remove this check.

130 # It *may* just be `var`

131 return _na_for_min_count(values, axis)

132

133 if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name):

134 if kwds.get("mask", None) is None:

135 # `mask` is not recognised by bottleneck, would raise

136 # TypeError if called

137 kwds.pop("mask", None)

138 result = bn_func(values, axis=axis, **kwds)

139

140 # prefer to treat inf/-inf as NA, but must compute the func

141 # twice :(

142 if _has_infs(result):

143 result = alt(values, axis=axis, skipna=skipna, **kwds)

144 else:

145 result = alt(values, axis=axis, skipna=skipna, **kwds)

146 else:

147 result = alt(values, axis=axis, skipna=skipna, **kwds)

148

149 return result

150

151 return cast(F, f)

152

153

154def _bn_ok_dtype(dtype: DtypeObj, name: str) -> bool:

155 # Bottleneck chokes on datetime64, PeriodDtype (or and EA)

156 if dtype != object and not needs_i8_conversion(dtype):

157 # GH 42878

158 # Bottleneck uses naive summation leading to O(n) loss of precision

159 # unlike numpy which implements pairwise summation, which has O(log(n)) loss

160 # crossref: https://github.com/pydata/bottleneck/issues/379

161

162 # GH 15507

163 # bottleneck does not properly upcast during the sum

164 # so can overflow

165

166 # GH 9422

167 # further we also want to preserve NaN when all elements

168 # are NaN, unlike bottleneck/numpy which consider this

169 # to be 0

170 return name not in ["nansum", "nanprod", "nanmean"]

171 return False

172

173

174def _has_infs(result) -> bool:

175 if isinstance(result, np.ndarray):

176 if result.dtype in ("f8", "f4"):

177 # Note: outside of an nanops-specific test, we always have

178 # result.ndim == 1, so there is no risk of this ravel making a copy.

179 return lib.has_infs(result.ravel("K"))

180 try:

181 return np.isinf(result).any()

182 except (TypeError, NotImplementedError):

183 # if it doesn't support infs, then it can't have infs

184 return False

185

186

187def _get_fill_value(

188 dtype: DtypeObj, fill_value: Scalar | None = None, fill_value_typ=None

189):

190 """return the correct fill value for the dtype of the values"""

191 if fill_value is not None:

192 return fill_value

193 if _na_ok_dtype(dtype):

194 if fill_value_typ is None:

195 return np.nan

196 else:

197 if fill_value_typ == "+inf":

198 return np.inf

199 else:

200 return -np.inf

201 else:

202 if fill_value_typ == "+inf":

203 # need the max int here

204 return lib.i8max

205 else:

206 return iNaT

207

208

209def _maybe_get_mask(

210 values: np.ndarray, skipna: bool, mask: npt.NDArray[np.bool_] | None

211) -> npt.NDArray[np.bool_] | None:

212 """

213 Compute a mask if and only if necessary.

214

215 This function will compute a mask iff it is necessary. Otherwise,

216 return the provided mask (potentially None) when a mask does not need to be

217 computed.

218

219 A mask is never necessary if the values array is of boolean or integer

220 dtypes, as these are incapable of storing NaNs. If passing a NaN-capable

221 dtype that is interpretable as either boolean or integer data (eg,

222 timedelta64), a mask must be provided.

223

224 If the skipna parameter is False, a new mask will not be computed.

225

226 The mask is computed using isna() by default. Setting invert=True selects

227 notna() as the masking function.

228

229 Parameters

230 ----------

231 values : ndarray

232 input array to potentially compute mask for

233 skipna : bool

234 boolean for whether NaNs should be skipped

235 mask : Optional[ndarray]

236 nan-mask if known

237

238 Returns

239 -------

240 Optional[np.ndarray[bool]]

241 """

242 if mask is None:

243 if values.dtype.kind in "biu":

244 # Boolean data cannot contain nulls, so signal via mask being None

245 return None

246

247 if skipna or values.dtype.kind in "mM":

248 mask = isna(values)

249

250 return mask

251

252

253def _get_values(

254 values: np.ndarray,

255 skipna: bool,

256 fill_value: Any = None,

257 fill_value_typ: str | None = None,

258 mask: npt.NDArray[np.bool_] | None = None,

259) -> tuple[np.ndarray, npt.NDArray[np.bool_] | None]:

260 """

261 Utility to get the values view, mask, dtype, dtype_max, and fill_value.

262

263 If both mask and fill_value/fill_value_typ are not None and skipna is True,

264 the values array will be copied.

265

266 For input arrays of boolean or integer dtypes, copies will only occur if a

267 precomputed mask, a fill_value/fill_value_typ, and skipna=True are

268 provided.

269

270 Parameters

271 ----------

272 values : ndarray

273 input array to potentially compute mask for

274 skipna : bool

275 boolean for whether NaNs should be skipped

276 fill_value : Any

277 value to fill NaNs with

278 fill_value_typ : str

279 Set to '+inf' or '-inf' to handle dtype-specific infinities

280 mask : Optional[np.ndarray[bool]]

281 nan-mask if known

282

283 Returns

284 -------

285 values : ndarray

286 Potential copy of input value array

287 mask : Optional[ndarray[bool]]

288 Mask for values, if deemed necessary to compute

289 """

290 # In _get_values is only called from within nanops, and in all cases

291 # with scalar fill_value. This guarantee is important for the

292 # np.where call below

293

294 mask = _maybe_get_mask(values, skipna, mask)

295

296 dtype = values.dtype

297

298 datetimelike = False

299 if values.dtype.kind in "mM":

300 # changing timedelta64/datetime64 to int64 needs to happen after

301 # finding `mask` above

302 values = np.asarray(values.view("i8"))

303 datetimelike = True

304

305 if skipna and (mask is not None):

306 # get our fill value (in case we need to provide an alternative

307 # dtype for it)

308 fill_value = _get_fill_value(

309 dtype, fill_value=fill_value, fill_value_typ=fill_value_typ

310 )

311

312 if fill_value is not None:

313 if mask.any():

314 if datetimelike or _na_ok_dtype(dtype):

315 values = values.copy()

316 np.putmask(values, mask, fill_value)

317 else:

318 # np.where will promote if needed

319 values = np.where(~mask, values, fill_value)

320

321 return values, mask

322

323

324def _get_dtype_max(dtype: np.dtype) -> np.dtype:

325 # return a platform independent precision dtype

326 dtype_max = dtype

327 if dtype.kind in "bi":

328 dtype_max = np.dtype(np.int64)

329 elif dtype.kind == "u":

330 dtype_max = np.dtype(np.uint64)

331 elif dtype.kind == "f":

332 dtype_max = np.dtype(np.float64)

333 return dtype_max

334

335

336def _na_ok_dtype(dtype: DtypeObj) -> bool:

337 if needs_i8_conversion(dtype):

338 return False

339 return not issubclass(dtype.type, np.integer)

340

341

342def _wrap_results(result, dtype: np.dtype, fill_value=None):

343 """wrap our results if needed"""

344 if result is NaT:

345 pass

346

347 elif dtype.kind == "M":

348 if fill_value is None:

349 # GH#24293

350 fill_value = iNaT

351 if not isinstance(result, np.ndarray):

352 assert not isna(fill_value), "Expected non-null fill_value"

353 if result == fill_value:

354 result = np.nan

355

356 if isna(result):

357 result = np.datetime64("NaT", "ns").astype(dtype)

358 else:

359 result = np.int64(result).view(dtype)

360 # retain original unit

361 result = result.astype(dtype, copy=False)

362 else:

363 # If we have float dtype, taking a view will give the wrong result

364 result = result.astype(dtype)

365 elif dtype.kind == "m":

366 if not isinstance(result, np.ndarray):

367 if result == fill_value or np.isnan(result):

368 result = np.timedelta64("NaT").astype(dtype)

369

370 elif np.fabs(result) > lib.i8max:

371 # raise if we have a timedelta64[ns] which is too large

372 raise ValueError("overflow in timedelta operation")

373 else:

374 # return a timedelta64 with the original unit

375 result = np.int64(result).astype(dtype, copy=False)

376

377 else:

378 result = result.astype("m8[ns]").view(dtype)

379

380 return result

381

382

383def _datetimelike_compat(func: F) -> F:

384 """

385 If we have datetime64 or timedelta64 values, ensure we have a correct

386 mask before calling the wrapped function, then cast back afterwards.

387 """

388

389 @functools.wraps(func)

390 def new_func(

391 values: np.ndarray,

392 *,

393 axis: AxisInt | None = None,

394 skipna: bool = True,

395 mask: npt.NDArray[np.bool_] | None = None,

396 **kwargs,

397 ):

398 orig_values = values

399

400 datetimelike = values.dtype.kind in "mM"

401 if datetimelike and mask is None:

402 mask = isna(values)

403

404 result = func(values, axis=axis, skipna=skipna, mask=mask, **kwargs)

405

406 if datetimelike:

407 result = _wrap_results(result, orig_values.dtype, fill_value=iNaT)

408 if not skipna:

409 assert mask is not None # checked above

410 result = _mask_datetimelike_result(result, axis, mask, orig_values)

411

412 return result

413

414 return cast(F, new_func)

415

416

417def _na_for_min_count(values: np.ndarray, axis: AxisInt | None) -> Scalar | np.ndarray:

418 """

419 Return the missing value for `values`.

420

421 Parameters

422 ----------

423 values : ndarray

424 axis : int or None

425 axis for the reduction, required if values.ndim > 1.

426

427 Returns

428 -------

429 result : scalar or ndarray

430 For 1-D values, returns a scalar of the correct missing type.

431 For 2-D values, returns a 1-D array where each element is missing.

432 """

433 # we either return np.nan or pd.NaT

434 if values.dtype.kind in "iufcb":

435 values = values.astype("float64")

436 fill_value = na_value_for_dtype(values.dtype)

437

438 if values.ndim == 1:

439 return fill_value

440 elif axis is None:

441 return fill_value

442 else:

443 result_shape = values.shape[:axis] + values.shape[axis + 1 :]

444

445 return np.full(result_shape, fill_value, dtype=values.dtype)

446

447

448def maybe_operate_rowwise(func: F) -> F:

449 """

450 NumPy operations on C-contiguous ndarrays with axis=1 can be

451 very slow if axis 1 >> axis 0.

452 Operate row-by-row and concatenate the results.

453 """

454

455 @functools.wraps(func)

456 def newfunc(values: np.ndarray, *, axis: AxisInt | None = None, **kwargs):

457 if (

458 axis == 1

459 and values.ndim == 2

460 and values.flags["C_CONTIGUOUS"]

461 # only takes this path for wide arrays (long dataframes), for threshold see

462 # https://github.com/pandas-dev/pandas/pull/43311#issuecomment-974891737

463 and (values.shape[1] / 1000) > values.shape[0]

464 and values.dtype != object

465 and values.dtype != bool

466 ):

467 arrs = list(values)

468 if kwargs.get("mask") is not None:

469 mask = kwargs.pop("mask")

470 results = [

471 func(arrs[i], mask=mask[i], **kwargs) for i in range(len(arrs))

472 ]

473 else:

474 results = [func(x, **kwargs) for x in arrs]

475 return np.array(results)

476

477 return func(values, axis=axis, **kwargs)

478

479 return cast(F, newfunc)

480

481

482def nanany(

483 values: np.ndarray,

484 *,

485 axis: AxisInt | None = None,

486 skipna: bool = True,

487 mask: npt.NDArray[np.bool_] | None = None,

488) -> bool:

489 """

490 Check if any elements along an axis evaluate to True.

491

492 Parameters

493 ----------

494 values : ndarray

495 axis : int, optional

496 skipna : bool, default True

497 mask : ndarray[bool], optional

498 nan-mask if known

499

500 Returns

501 -------

502 result : bool

503

504 Examples

505 --------

506 >>> from pandas.core import nanops

507 >>> s = pd.Series([1, 2])

508 >>> nanops.nanany(s.values)

509 True

510

511 >>> from pandas.core import nanops

512 >>> s = pd.Series([np.nan])

513 >>> nanops.nanany(s.values)

514 False

515 """

516 if values.dtype.kind in "iub" and mask is None:

517 # GH#26032 fastpath

518 # error: Incompatible return value type (got "Union[bool_, ndarray]",

519 # expected "bool")

520 return values.any(axis) # type: ignore[return-value]

521

522 if values.dtype.kind == "M":

523 # GH#34479

524 warnings.warn(

525 "'any' with datetime64 dtypes is deprecated and will raise in a "

526 "future version. Use (obj != pd.Timestamp(0)).any() instead.",

527 FutureWarning,

528 stacklevel=find_stack_level(),

529 )

530

531 values, _ = _get_values(values, skipna, fill_value=False, mask=mask)

532

533 # For object type, any won't necessarily return

534 # boolean values (numpy/numpy#4352)

535 if values.dtype == object:

536 values = values.astype(bool)

537

538 # error: Incompatible return value type (got "Union[bool_, ndarray]", expected

539 # "bool")

540 return values.any(axis) # type: ignore[return-value]

541

542

543def nanall(

544 values: np.ndarray,

545 *,

546 axis: AxisInt | None = None,

547 skipna: bool = True,

548 mask: npt.NDArray[np.bool_] | None = None,

549) -> bool:

550 """

551 Check if all elements along an axis evaluate to True.

552

553 Parameters

554 ----------

555 values : ndarray

556 axis : int, optional

557 skipna : bool, default True

558 mask : ndarray[bool], optional

559 nan-mask if known

560

561 Returns

562 -------

563 result : bool

564

565 Examples

566 --------

567 >>> from pandas.core import nanops

568 >>> s = pd.Series([1, 2, np.nan])

569 >>> nanops.nanall(s.values)

570 True

571

572 >>> from pandas.core import nanops

573 >>> s = pd.Series([1, 0])

574 >>> nanops.nanall(s.values)

575 False

576 """

577 if values.dtype.kind in "iub" and mask is None:

578 # GH#26032 fastpath

579 # error: Incompatible return value type (got "Union[bool_, ndarray]",

580 # expected "bool")

581 return values.all(axis) # type: ignore[return-value]

582

583 if values.dtype.kind == "M":

584 # GH#34479

585 warnings.warn(

586 "'all' with datetime64 dtypes is deprecated and will raise in a "

587 "future version. Use (obj != pd.Timestamp(0)).all() instead.",

588 FutureWarning,

589 stacklevel=find_stack_level(),

590 )

591

592 values, _ = _get_values(values, skipna, fill_value=True, mask=mask)

593

594 # For object type, all won't necessarily return

595 # boolean values (numpy/numpy#4352)

596 if values.dtype == object:

597 values = values.astype(bool)

598

599 # error: Incompatible return value type (got "Union[bool_, ndarray]", expected

600 # "bool")

601 return values.all(axis) # type: ignore[return-value]

602

603

604@disallow("M8")

605@_datetimelike_compat

606@maybe_operate_rowwise

607def nansum(

608 values: np.ndarray,

609 *,

610 axis: AxisInt | None = None,

611 skipna: bool = True,

612 min_count: int = 0,

613 mask: npt.NDArray[np.bool_] | None = None,

614) -> float:

615 """

616 Sum the elements along an axis ignoring NaNs

617

618 Parameters

619 ----------

620 values : ndarray[dtype]

621 axis : int, optional

622 skipna : bool, default True

623 min_count: int, default 0

624 mask : ndarray[bool], optional

625 nan-mask if known

626

627 Returns

628 -------

629 result : dtype

630

631 Examples

632 --------

633 >>> from pandas.core import nanops

634 >>> s = pd.Series([1, 2, np.nan])

635 >>> nanops.nansum(s.values)

636 3.0

637 """

638 dtype = values.dtype

639 values, mask = _get_values(values, skipna, fill_value=0, mask=mask)

640 dtype_sum = _get_dtype_max(dtype)

641 if dtype.kind == "f":

642 dtype_sum = dtype

643 elif dtype.kind == "m":

644 dtype_sum = np.dtype(np.float64)

645

646 the_sum = values.sum(axis, dtype=dtype_sum)

647 the_sum = _maybe_null_out(the_sum, axis, mask, values.shape, min_count=min_count)

648

649 return the_sum

650

651

652def _mask_datetimelike_result(

653 result: np.ndarray | np.datetime64 | np.timedelta64,

654 axis: AxisInt | None,

655 mask: npt.NDArray[np.bool_],

656 orig_values: np.ndarray,

657) -> np.ndarray | np.datetime64 | np.timedelta64 | NaTType:

658 if isinstance(result, np.ndarray):

659 # we need to apply the mask

660 result = result.astype("i8").view(orig_values.dtype)

661 axis_mask = mask.any(axis=axis)

662 # error: Unsupported target for indexed assignment ("Union[ndarray[Any, Any],

663 # datetime64, timedelta64]")

664 result[axis_mask] = iNaT # type: ignore[index]

665 else:

666 if mask.any():

667 return np.int64(iNaT).view(orig_values.dtype)

668 return result

669

670

671@bottleneck_switch()

672@_datetimelike_compat

673def nanmean(

674 values: np.ndarray,

675 *,

676 axis: AxisInt | None = None,

677 skipna: bool = True,

678 mask: npt.NDArray[np.bool_] | None = None,

679) -> float:

680 """

681 Compute the mean of the element along an axis ignoring NaNs

682

683 Parameters

684 ----------

685 values : ndarray

686 axis : int, optional

687 skipna : bool, default True

688 mask : ndarray[bool], optional

689 nan-mask if known

690

691 Returns

692 -------

693 float

694 Unless input is a float array, in which case use the same

695 precision as the input array.

696

697 Examples

698 --------

699 >>> from pandas.core import nanops

700 >>> s = pd.Series([1, 2, np.nan])

701 >>> nanops.nanmean(s.values)

702 1.5

703 """

704 dtype = values.dtype

705 values, mask = _get_values(values, skipna, fill_value=0, mask=mask)

706 dtype_sum = _get_dtype_max(dtype)

707 dtype_count = np.dtype(np.float64)

708

709 # not using needs_i8_conversion because that includes period

710 if dtype.kind in "mM":

711 dtype_sum = np.dtype(np.float64)

712 elif dtype.kind in "iu":

713 dtype_sum = np.dtype(np.float64)

714 elif dtype.kind == "f":

715 dtype_sum = dtype

716 dtype_count = dtype

717

718 count = _get_counts(values.shape, mask, axis, dtype=dtype_count)

719 the_sum = values.sum(axis, dtype=dtype_sum)

720 the_sum = _ensure_numeric(the_sum)

721

722 if axis is not None and getattr(the_sum, "ndim", False):

723 count = cast(np.ndarray, count)

724 with np.errstate(all="ignore"):

725 # suppress division by zero warnings

726 the_mean = the_sum / count

727 ct_mask = count == 0

728 if ct_mask.any():

729 the_mean[ct_mask] = np.nan

730 else:

731 the_mean = the_sum / count if count > 0 else np.nan

732

733 return the_mean

734

735

736@bottleneck_switch()

737def nanmedian(values, *, axis: AxisInt | None = None, skipna: bool = True, mask=None):

738 """

739 Parameters

740 ----------

741 values : ndarray

742 axis : int, optional

743 skipna : bool, default True

744 mask : ndarray[bool], optional

745 nan-mask if known

746

747 Returns

748 -------

749 result : float

750 Unless input is a float array, in which case use the same

751 precision as the input array.

752

753 Examples

754 --------

755 >>> from pandas.core import nanops

756 >>> s = pd.Series([1, np.nan, 2, 2])

757 >>> nanops.nanmedian(s.values)

758 2.0

759 """

760 # for floats without mask, the data already uses NaN as missing value

761 # indicator, and `mask` will be calculated from that below -> in those

762 # cases we never need to set NaN to the masked values

763 using_nan_sentinel = values.dtype.kind == "f" and mask is None

764

765 def get_median(x, _mask=None):

766 if _mask is None:

767 _mask = notna(x)

768 else:

769 _mask = ~_mask

770 if not skipna and not _mask.all():

771 return np.nan

772 with warnings.catch_warnings():

773 # Suppress RuntimeWarning about All-NaN slice

774 warnings.filterwarnings(

775 "ignore", "All-NaN slice encountered", RuntimeWarning

776 )

777 res = np.nanmedian(x[_mask])

778 return res

779

780 dtype = values.dtype

781 values, mask = _get_values(values, skipna, mask=mask, fill_value=None)

782 if values.dtype.kind != "f":

783 if values.dtype == object:

784 # GH#34671 avoid casting strings to numeric

785 inferred = lib.infer_dtype(values)

786 if inferred in ["string", "mixed"]:

787 raise TypeError(f"Cannot convert {values} to numeric")

788 try:

789 values = values.astype("f8")

790 except ValueError as err:

791 # e.g. "could not convert string to float: 'a'"

792 raise TypeError(str(err)) from err

793 if not using_nan_sentinel and mask is not None:

794 if not values.flags.writeable:

795 values = values.copy()

796 values[mask] = np.nan

797

798 notempty = values.size

799

800 # an array from a frame

801 if values.ndim > 1 and axis is not None:

802 # there's a non-empty array to apply over otherwise numpy raises

803 if notempty:

804 if not skipna:

805 res = np.apply_along_axis(get_median, axis, values)

806

807 else:

808 # fastpath for the skipna case

809 with warnings.catch_warnings():

810 # Suppress RuntimeWarning about All-NaN slice

811 warnings.filterwarnings(

812 "ignore", "All-NaN slice encountered", RuntimeWarning

813 )

814 if (values.shape[1] == 1 and axis == 0) or (

815 values.shape[0] == 1 and axis == 1

816 ):

817 # GH52788: fastpath when squeezable, nanmedian for 2D array slow

818 res = np.nanmedian(np.squeeze(values), keepdims=True)

819 else:

820 res = np.nanmedian(values, axis=axis)

821

822 else:

823 # must return the correct shape, but median is not defined for the

824 # empty set so return nans of shape "everything but the passed axis"

825 # since "axis" is where the reduction would occur if we had a nonempty

826 # array

827 res = _get_empty_reduction_result(values.shape, axis)

828

829 else:

830 # otherwise return a scalar value

831 res = get_median(values, mask) if notempty else np.nan

832 return _wrap_results(res, dtype)

833

834

835def _get_empty_reduction_result(

836 shape: Shape,

837 axis: AxisInt,

838) -> np.ndarray:

839 """

840 The result from a reduction on an empty ndarray.

841

842 Parameters

843 ----------

844 shape : Tuple[int, ...]

845 axis : int

846

847 Returns

848 -------

849 np.ndarray

850 """

851 shp = np.array(shape)

852 dims = np.arange(len(shape))

853 ret = np.empty(shp[dims != axis], dtype=np.float64)

854 ret.fill(np.nan)

855 return ret

856

857

858def _get_counts_nanvar(

859 values_shape: Shape,

860 mask: npt.NDArray[np.bool_] | None,

861 axis: AxisInt | None,

862 ddof: int,

863 dtype: np.dtype = np.dtype(np.float64),

864) -> tuple[float | np.ndarray, float | np.ndarray]:

865 """

866 Get the count of non-null values along an axis, accounting

867 for degrees of freedom.

868

869 Parameters

870 ----------

871 values_shape : Tuple[int, ...]

872 shape tuple from values ndarray, used if mask is None

873 mask : Optional[ndarray[bool]]

874 locations in values that should be considered missing

875 axis : Optional[int]

876 axis to count along

877 ddof : int

878 degrees of freedom

879 dtype : type, optional

880 type to use for count

881

882 Returns

883 -------

884 count : int, np.nan or np.ndarray

885 d : int, np.nan or np.ndarray

886 """

887 count = _get_counts(values_shape, mask, axis, dtype=dtype)

888 d = count - dtype.type(ddof)

889

890 # always return NaN, never inf

891 if is_float(count):

892 if count <= ddof:

893 # error: Incompatible types in assignment (expression has type

894 # "float", variable has type "Union[floating[Any], ndarray[Any,

895 # dtype[floating[Any]]]]")

896 count = np.nan # type: ignore[assignment]

897 d = np.nan

898 else:

899 # count is not narrowed by is_float check

900 count = cast(np.ndarray, count)

901 mask = count <= ddof

902 if mask.any():

903 np.putmask(d, mask, np.nan)

904 np.putmask(count, mask, np.nan)

905 return count, d

906

907

908@bottleneck_switch(ddof=1)

909def nanstd(

910 values,

911 *,

912 axis: AxisInt | None = None,

913 skipna: bool = True,

914 ddof: int = 1,

915 mask=None,

916):

917 """

918 Compute the standard deviation along given axis while ignoring NaNs

919

920 Parameters

921 ----------

922 values : ndarray

923 axis : int, optional

924 skipna : bool, default True

925 ddof : int, default 1

926 Delta Degrees of Freedom. The divisor used in calculations is N - ddof,

927 where N represents the number of elements.

928 mask : ndarray[bool], optional

929 nan-mask if known

930

931 Returns

932 -------

933 result : float

934 Unless input is a float array, in which case use the same

935 precision as the input array.

936

937 Examples

938 --------

939 >>> from pandas.core import nanops

940 >>> s = pd.Series([1, np.nan, 2, 3])

941 >>> nanops.nanstd(s.values)

942 1.0

943 """

944 if values.dtype == "M8[ns]":

945 values = values.view("m8[ns]")

946

947 orig_dtype = values.dtype

948 values, mask = _get_values(values, skipna, mask=mask)

949

950 result = np.sqrt(nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask))

951 return _wrap_results(result, orig_dtype)

952

953

954@disallow("M8", "m8")

955@bottleneck_switch(ddof=1)

956def nanvar(

957 values: np.ndarray,

958 *,

959 axis: AxisInt | None = None,

960 skipna: bool = True,

961 ddof: int = 1,

962 mask=None,

963):

964 """

965 Compute the variance along given axis while ignoring NaNs

966

967 Parameters

968 ----------

969 values : ndarray

970 axis : int, optional

971 skipna : bool, default True

972 ddof : int, default 1

973 Delta Degrees of Freedom. The divisor used in calculations is N - ddof,

974 where N represents the number of elements.

975 mask : ndarray[bool], optional

976 nan-mask if known

977

978 Returns

979 -------

980 result : float

981 Unless input is a float array, in which case use the same

982 precision as the input array.

983

984 Examples

985 --------

986 >>> from pandas.core import nanops

987 >>> s = pd.Series([1, np.nan, 2, 3])

988 >>> nanops.nanvar(s.values)

989 1.0

990 """

991 dtype = values.dtype

992 mask = _maybe_get_mask(values, skipna, mask)

993 if dtype.kind in "iu":

994 values = values.astype("f8")

995 if mask is not None:

996 values[mask] = np.nan

997

998 if values.dtype.kind == "f":

999 count, d = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype)

1000 else:

1001 count, d = _get_counts_nanvar(values.shape, mask, axis, ddof)

1002

1003 if skipna and mask is not None:

1004 values = values.copy()

1005 np.putmask(values, mask, 0)

1006

1007 # xref GH10242

1008 # Compute variance via two-pass algorithm, which is stable against

1009 # cancellation errors and relatively accurate for small numbers of

1010 # observations.

1011 #

1012 # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance

1013 avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count

1014 if axis is not None:

1015 avg = np.expand_dims(avg, axis)

1016 sqr = _ensure_numeric((avg - values) ** 2)

1017 if mask is not None:

1018 np.putmask(sqr, mask, 0)

1019 result = sqr.sum(axis=axis, dtype=np.float64) / d

1020

1021 # Return variance as np.float64 (the datatype used in the accumulator),

1022 # unless we were dealing with a float array, in which case use the same

1023 # precision as the original values array.

1024 if dtype.kind == "f":

1025 result = result.astype(dtype, copy=False)

1026 return result

1027

1028

1029@disallow("M8", "m8")

1030def nansem(

1031 values: np.ndarray,

1032 *,

1033 axis: AxisInt | None = None,

1034 skipna: bool = True,

1035 ddof: int = 1,

1036 mask: npt.NDArray[np.bool_] | None = None,

1037) -> float:

1038 """

1039 Compute the standard error in the mean along given axis while ignoring NaNs

1040

1041 Parameters

1042 ----------

1043 values : ndarray

1044 axis : int, optional

1045 skipna : bool, default True

1046 ddof : int, default 1

1047 Delta Degrees of Freedom. The divisor used in calculations is N - ddof,

1048 where N represents the number of elements.

1049 mask : ndarray[bool], optional

1050 nan-mask if known

1051

1052 Returns

1053 -------

1054 result : float64

1055 Unless input is a float array, in which case use the same

1056 precision as the input array.

1057

1058 Examples

1059 --------

1060 >>> from pandas.core import nanops

1061 >>> s = pd.Series([1, np.nan, 2, 3])

1062 >>> nanops.nansem(s.values)

1063 0.5773502691896258

1064 """

1065 # This checks if non-numeric-like data is passed with numeric_only=False

1066 # and raises a TypeError otherwise

1067 nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask)

1068

1069 mask = _maybe_get_mask(values, skipna, mask)

1070 if values.dtype.kind != "f":

1071 values = values.astype("f8")

1072

1073 if not skipna and mask is not None and mask.any():

1074 return np.nan

1075

1076 count, _ = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype)

1077 var = nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask)

1078

1079 return np.sqrt(var) / np.sqrt(count)

1080

1081

1082def _nanminmax(meth, fill_value_typ):

1083 @bottleneck_switch(name=f"nan{meth}")

1084 @_datetimelike_compat

1085 def reduction(

1086 values: np.ndarray,

1087 *,

1088 axis: AxisInt | None = None,

1089 skipna: bool = True,

1090 mask: npt.NDArray[np.bool_] | None = None,

1091 ):

1092 if values.size == 0:

1093 return _na_for_min_count(values, axis)

1094

1095 values, mask = _get_values(

1096 values, skipna, fill_value_typ=fill_value_typ, mask=mask

1097 )

1098 result = getattr(values, meth)(axis)

1099 result = _maybe_null_out(result, axis, mask, values.shape)

1100 return result

1101

1102 return reduction

1103

1104

1105nanmin = _nanminmax("min", fill_value_typ="+inf")

1106nanmax = _nanminmax("max", fill_value_typ="-inf")

1107

1108

1109def nanargmax(

1110 values: np.ndarray,

1111 *,

1112 axis: AxisInt | None = None,

1113 skipna: bool = True,

1114 mask: npt.NDArray[np.bool_] | None = None,

1115) -> int | np.ndarray:

1116 """

1117 Parameters

1118 ----------

1119 values : ndarray

1120 axis : int, optional

1121 skipna : bool, default True

1122 mask : ndarray[bool], optional

1123 nan-mask if known

1124

1125 Returns

1126 -------

1127 result : int or ndarray[int]

1128 The index/indices of max value in specified axis or -1 in the NA case

1129

1130 Examples

1131 --------

1132 >>> from pandas.core import nanops

1133 >>> arr = np.array([1, 2, 3, np.nan, 4])

1134 >>> nanops.nanargmax(arr)

1135 4

1136

1137 >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3)

1138 >>> arr[2:, 2] = np.nan

1139 >>> arr

1140 array([[ 0., 1., 2.],

1141 [ 3., 4., 5.],

1142 [ 6., 7., nan],

1143 [ 9., 10., nan]])

1144 >>> nanops.nanargmax(arr, axis=1)

1145 array([2, 2, 1, 1])

1146 """

1147 values, mask = _get_values(values, True, fill_value_typ="-inf", mask=mask)

1148 result = values.argmax(axis)

1149 # error: Argument 1 to "_maybe_arg_null_out" has incompatible type "Any |

1150 # signedinteger[Any]"; expected "ndarray[Any, Any]"

1151 result = _maybe_arg_null_out(result, axis, mask, skipna) # type: ignore[arg-type]

1152 return result

1153

1154

1155def nanargmin(

1156 values: np.ndarray,

1157 *,

1158 axis: AxisInt | None = None,

1159 skipna: bool = True,

1160 mask: npt.NDArray[np.bool_] | None = None,

1161) -> int | np.ndarray:

1162 """

1163 Parameters

1164 ----------

1165 values : ndarray

1166 axis : int, optional

1167 skipna : bool, default True

1168 mask : ndarray[bool], optional

1169 nan-mask if known

1170

1171 Returns

1172 -------

1173 result : int or ndarray[int]

1174 The index/indices of min value in specified axis or -1 in the NA case

1175

1176 Examples

1177 --------

1178 >>> from pandas.core import nanops

1179 >>> arr = np.array([1, 2, 3, np.nan, 4])

1180 >>> nanops.nanargmin(arr)

1181 0

1182

1183 >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3)

1184 >>> arr[2:, 0] = np.nan

1185 >>> arr

1186 array([[ 0., 1., 2.],

1187 [ 3., 4., 5.],

1188 [nan, 7., 8.],

1189 [nan, 10., 11.]])

1190 >>> nanops.nanargmin(arr, axis=1)

1191 array([0, 0, 1, 1])

1192 """

1193 values, mask = _get_values(values, True, fill_value_typ="+inf", mask=mask)

1194 result = values.argmin(axis)

1195 # error: Argument 1 to "_maybe_arg_null_out" has incompatible type "Any |

1196 # signedinteger[Any]"; expected "ndarray[Any, Any]"

1197 result = _maybe_arg_null_out(result, axis, mask, skipna) # type: ignore[arg-type]

1198 return result

1199

1200

1201@disallow("M8", "m8")

1202@maybe_operate_rowwise

1203def nanskew(

1204 values: np.ndarray,

1205 *,

1206 axis: AxisInt | None = None,

1207 skipna: bool = True,

1208 mask: npt.NDArray[np.bool_] | None = None,

1209) -> float:

1210 """

1211 Compute the sample skewness.

1212

1213 The statistic computed here is the adjusted Fisher-Pearson standardized

1214 moment coefficient G1. The algorithm computes this coefficient directly

1215 from the second and third central moment.

1216

1217 Parameters

1218 ----------

1219 values : ndarray

1220 axis : int, optional

1221 skipna : bool, default True

1222 mask : ndarray[bool], optional

1223 nan-mask if known

1224

1225 Returns

1226 -------

1227 result : float64

1228 Unless input is a float array, in which case use the same

1229 precision as the input array.

1230

1231 Examples

1232 --------

1233 >>> from pandas.core import nanops

1234 >>> s = pd.Series([1, np.nan, 1, 2])

1235 >>> nanops.nanskew(s.values)

1236 1.7320508075688787

1237 """

1238 mask = _maybe_get_mask(values, skipna, mask)

1239 if values.dtype.kind != "f":

1240 values = values.astype("f8")

1241 count = _get_counts(values.shape, mask, axis)

1242 else:

1243 count = _get_counts(values.shape, mask, axis, dtype=values.dtype)

1244

1245 if skipna and mask is not None:

1246 values = values.copy()

1247 np.putmask(values, mask, 0)

1248 elif not skipna and mask is not None and mask.any():

1249 return np.nan

1250

1251 with np.errstate(invalid="ignore", divide="ignore"):

1252 mean = values.sum(axis, dtype=np.float64) / count

1253 if axis is not None:

1254 mean = np.expand_dims(mean, axis)

1255

1256 adjusted = values - mean

1257 if skipna and mask is not None:

1258 np.putmask(adjusted, mask, 0)

1259 adjusted2 = adjusted**2

1260 adjusted3 = adjusted2 * adjusted

1261 m2 = adjusted2.sum(axis, dtype=np.float64)

1262 m3 = adjusted3.sum(axis, dtype=np.float64)

1263

1264 # floating point error

1265 #

1266 # #18044 in _libs/windows.pyx calc_skew follow this behavior

1267 # to fix the fperr to treat m2 <1e-14 as zero

1268 m2 = _zero_out_fperr(m2)

1269 m3 = _zero_out_fperr(m3)

1270

1271 with np.errstate(invalid="ignore", divide="ignore"):

1272 result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2**1.5)

1273

1274 dtype = values.dtype

1275 if dtype.kind == "f":

1276 result = result.astype(dtype, copy=False)

1277

1278 if isinstance(result, np.ndarray):

1279 result = np.where(m2 == 0, 0, result)

1280 result[count < 3] = np.nan

1281 else:

1282 result = dtype.type(0) if m2 == 0 else result

1283 if count < 3:

1284 return np.nan

1285

1286 return result

1287

1288

1289@disallow("M8", "m8")

1290@maybe_operate_rowwise

1291def nankurt(

1292 values: np.ndarray,

1293 *,

1294 axis: AxisInt | None = None,

1295 skipna: bool = True,

1296 mask: npt.NDArray[np.bool_] | None = None,

1297) -> float:

1298 """

1299 Compute the sample excess kurtosis

1300

1301 The statistic computed here is the adjusted Fisher-Pearson standardized

1302 moment coefficient G2, computed directly from the second and fourth

1303 central moment.

1304

1305 Parameters

1306 ----------

1307 values : ndarray

1308 axis : int, optional

1309 skipna : bool, default True

1310 mask : ndarray[bool], optional

1311 nan-mask if known

1312

1313 Returns

1314 -------

1315 result : float64

1316 Unless input is a float array, in which case use the same

1317 precision as the input array.

1318

1319 Examples

1320 --------

1321 >>> from pandas.core import nanops

1322 >>> s = pd.Series([1, np.nan, 1, 3, 2])

1323 >>> nanops.nankurt(s.values)

1324 -1.2892561983471076

1325 """

1326 mask = _maybe_get_mask(values, skipna, mask)

1327 if values.dtype.kind != "f":

1328 values = values.astype("f8")

1329 count = _get_counts(values.shape, mask, axis)

1330 else:

1331 count = _get_counts(values.shape, mask, axis, dtype=values.dtype)

1332

1333 if skipna and mask is not None:

1334 values = values.copy()

1335 np.putmask(values, mask, 0)

1336 elif not skipna and mask is not None and mask.any():

1337 return np.nan

1338

1339 with np.errstate(invalid="ignore", divide="ignore"):

1340 mean = values.sum(axis, dtype=np.float64) / count

1341 if axis is not None:

1342 mean = np.expand_dims(mean, axis)

1343

1344 adjusted = values - mean

1345 if skipna and mask is not None:

1346 np.putmask(adjusted, mask, 0)

1347 adjusted2 = adjusted**2

1348 adjusted4 = adjusted2**2

1349 m2 = adjusted2.sum(axis, dtype=np.float64)

1350 m4 = adjusted4.sum(axis, dtype=np.float64)

1351

1352 with np.errstate(invalid="ignore", divide="ignore"):

1353 adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3))

1354 numerator = count * (count + 1) * (count - 1) * m4

1355 denominator = (count - 2) * (count - 3) * m2**2

1356

1357 # floating point error

1358 #

1359 # #18044 in _libs/windows.pyx calc_kurt follow this behavior

1360 # to fix the fperr to treat denom <1e-14 as zero

1361 numerator = _zero_out_fperr(numerator)

1362 denominator = _zero_out_fperr(denominator)

1363

1364 if not isinstance(denominator, np.ndarray):

1365 # if ``denom`` is a scalar, check these corner cases first before

1366 # doing division

1367 if count < 4:

1368 return np.nan

1369 if denominator == 0:

1370 return values.dtype.type(0)

1371

1372 with np.errstate(invalid="ignore", divide="ignore"):

1373 result = numerator / denominator - adj

1374

1375 dtype = values.dtype

1376 if dtype.kind == "f":

1377 result = result.astype(dtype, copy=False)

1378

1379 if isinstance(result, np.ndarray):

1380 result = np.where(denominator == 0, 0, result)

1381 result[count < 4] = np.nan

1382

1383 return result

1384

1385

1386@disallow("M8", "m8")

1387@maybe_operate_rowwise

1388def nanprod(

1389 values: np.ndarray,

1390 *,

1391 axis: AxisInt | None = None,

1392 skipna: bool = True,

1393 min_count: int = 0,

1394 mask: npt.NDArray[np.bool_] | None = None,

1395) -> float:

1396 """

1397 Parameters

1398 ----------

1399 values : ndarray[dtype]

1400 axis : int, optional

1401 skipna : bool, default True

1402 min_count: int, default 0

1403 mask : ndarray[bool], optional

1404 nan-mask if known

1405

1406 Returns

1407 -------

1408 Dtype

1409 The product of all elements on a given axis. ( NaNs are treated as 1)

1410

1411 Examples

1412 --------

1413 >>> from pandas.core import nanops

1414 >>> s = pd.Series([1, 2, 3, np.nan])

1415 >>> nanops.nanprod(s.values)

1416 6.0

1417 """

1418 mask = _maybe_get_mask(values, skipna, mask)

1419

1420 if skipna and mask is not None:

1421 values = values.copy()

1422 values[mask] = 1

1423 result = values.prod(axis)

1424 # error: Incompatible return value type (got "Union[ndarray, float]", expected

1425 # "float")

1426 return _maybe_null_out( # type: ignore[return-value]

1427 result, axis, mask, values.shape, min_count=min_count

1428 )

1429

1430

1431def _maybe_arg_null_out(

1432 result: np.ndarray,

1433 axis: AxisInt | None,

1434 mask: npt.NDArray[np.bool_] | None,

1435 skipna: bool,

1436) -> np.ndarray | int:

1437 # helper function for nanargmin/nanargmax

1438 if mask is None:

1439 return result

1440

1441 if axis is None or not getattr(result, "ndim", False):

1442 if skipna:

1443 if mask.all():

1444 return -1

1445 else:

1446 if mask.any():

1447 return -1

1448 else:

1449 if skipna:

1450 na_mask = mask.all(axis)

1451 else:

1452 na_mask = mask.any(axis)

1453 if na_mask.any():

1454 result[na_mask] = -1

1455 return result

1456

1457

1458def _get_counts(

1459 values_shape: Shape,

1460 mask: npt.NDArray[np.bool_] | None,

1461 axis: AxisInt | None,

1462 dtype: np.dtype[np.floating] = np.dtype(np.float64),

1463) -> np.floating | npt.NDArray[np.floating]:

1464 """

1465 Get the count of non-null values along an axis

1466

1467 Parameters

1468 ----------

1469 values_shape : tuple of int

1470 shape tuple from values ndarray, used if mask is None

1471 mask : Optional[ndarray[bool]]

1472 locations in values that should be considered missing

1473 axis : Optional[int]

1474 axis to count along

1475 dtype : type, optional

1476 type to use for count

1477

1478 Returns

1479 -------

1480 count : scalar or array

1481 """

1482 if axis is None:

1483 if mask is not None:

1484 n = mask.size - mask.sum()

1485 else:

1486 n = np.prod(values_shape)

1487 return dtype.type(n)

1488

1489 if mask is not None:

1490 count = mask.shape[axis] - mask.sum(axis)

1491 else:

1492 count = values_shape[axis]

1493

1494 if is_integer(count):

1495 return dtype.type(count)

1496 return count.astype(dtype, copy=False)

1497

1498

1499def _maybe_null_out(

1500 result: np.ndarray | float | NaTType,

1501 axis: AxisInt | None,

1502 mask: npt.NDArray[np.bool_] | None,

1503 shape: tuple[int, ...],

1504 min_count: int = 1,

1505) -> np.ndarray | float | NaTType:

1506 """

1507 Returns

1508 -------

1509 Dtype

1510 The product of all elements on a given axis. ( NaNs are treated as 1)

1511 """

1512 if mask is None and min_count == 0:

1513 # nothing to check; short-circuit

1514 return result

1515

1516 if axis is not None and isinstance(result, np.ndarray):

1517 if mask is not None:

1518 null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0

1519 else:

1520 # we have no nulls, kept mask=None in _maybe_get_mask

1521 below_count = shape[axis] - min_count < 0

1522 new_shape = shape[:axis] + shape[axis + 1 :]

1523 null_mask = np.broadcast_to(below_count, new_shape)

1524

1525 if np.any(null_mask):

1526 if is_numeric_dtype(result):

1527 if np.iscomplexobj(result):

1528 result = result.astype("c16")

1529 elif not is_float_dtype(result):

1530 result = result.astype("f8", copy=False)

1531 result[null_mask] = np.nan

1532 else:

1533 # GH12941, use None to auto cast null

1534 result[null_mask] = None

1535 elif result is not NaT:

1536 if check_below_min_count(shape, mask, min_count):

1537 result_dtype = getattr(result, "dtype", None)

1538 if is_float_dtype(result_dtype):

1539 # error: Item "None" of "Optional[Any]" has no attribute "type"

1540 result = result_dtype.type("nan") # type: ignore[union-attr]

1541 else:

1542 result = np.nan

1543

1544 return result

1545

1546

1547def check_below_min_count(

1548 shape: tuple[int, ...], mask: npt.NDArray[np.bool_] | None, min_count: int

1549) -> bool:

1550 """

1551 Check for the `min_count` keyword. Returns True if below `min_count` (when

1552 missing value should be returned from the reduction).

1553

1554 Parameters

1555 ----------

1556 shape : tuple

1557 The shape of the values (`values.shape`).

1558 mask : ndarray[bool] or None

1559 Boolean numpy array (typically of same shape as `shape`) or None.

1560 min_count : int

1561 Keyword passed through from sum/prod call.

1562

1563 Returns

1564 -------

1565 bool

1566 """

1567 if min_count > 0:

1568 if mask is None:

1569 # no missing values, only check size

1570 non_nulls = np.prod(shape)

1571 else:

1572 non_nulls = mask.size - mask.sum()

1573 if non_nulls < min_count:

1574 return True

1575 return False

1576

1577

1578def _zero_out_fperr(arg):

1579 # #18044 reference this behavior to fix rolling skew/kurt issue

1580 if isinstance(arg, np.ndarray):

1581 return np.where(np.abs(arg) < 1e-14, 0, arg)

1582 else:

1583 return arg.dtype.type(0) if np.abs(arg) < 1e-14 else arg

1584

1585

1586@disallow("M8", "m8")

1587def nancorr(

1588 a: np.ndarray,

1589 b: np.ndarray,

1590 *,

1591 method: CorrelationMethod = "pearson",

1592 min_periods: int | None = None,

1593) -> float:

1594 """

1595 a, b: ndarrays

1596 """

1597 if len(a) != len(b):

1598 raise AssertionError("Operands to nancorr must have same size")

1599

1600 if min_periods is None:

1601 min_periods = 1

1602

1603 valid = notna(a) & notna(b)

1604 if not valid.all():

1605 a = a[valid]

1606 b = b[valid]

1607

1608 if len(a) < min_periods:

1609 return np.nan

1610

1611 a = _ensure_numeric(a)

1612 b = _ensure_numeric(b)

1613

1614 f = get_corr_func(method)

1615 return f(a, b)

1616

1617

1618def get_corr_func(

1619 method: CorrelationMethod,

1620) -> Callable[[np.ndarray, np.ndarray], float]:

1621 if method == "kendall":

1622 from scipy.stats import kendalltau

1623

1624 def func(a, b):

1625 return kendalltau(a, b)[0]

1626

1627 return func

1628 elif method == "spearman":

1629 from scipy.stats import spearmanr

1630

1631 def func(a, b):

1632 return spearmanr(a, b)[0]

1633

1634 return func

1635 elif method == "pearson":

1636

1637 def func(a, b):

1638 return np.corrcoef(a, b)[0, 1]

1639

1640 return func

1641 elif callable(method):

1642 return method

1643

1644 raise ValueError(

1645 f"Unknown method '{method}', expected one of "

1646 "'kendall', 'spearman', 'pearson', or callable"

1647 )

1648

1649

1650@disallow("M8", "m8")

1651def nancov(

1652 a: np.ndarray,

1653 b: np.ndarray,

1654 *,

1655 min_periods: int | None = None,

1656 ddof: int | None = 1,

1657) -> float:

1658 if len(a) != len(b):

1659 raise AssertionError("Operands to nancov must have same size")

1660

1661 if min_periods is None:

1662 min_periods = 1

1663

1664 valid = notna(a) & notna(b)

1665 if not valid.all():

1666 a = a[valid]

1667 b = b[valid]

1668

1669 if len(a) < min_periods:

1670 return np.nan

1671

1672 a = _ensure_numeric(a)

1673 b = _ensure_numeric(b)

1674

1675 return np.cov(a, b, ddof=ddof)[0, 1]

1676

1677

1678def _ensure_numeric(x):

1679 if isinstance(x, np.ndarray):

1680 if x.dtype.kind in "biu":

1681 x = x.astype(np.float64)

1682 elif x.dtype == object:

1683 inferred = lib.infer_dtype(x)

1684 if inferred in ["string", "mixed"]:

1685 # GH#44008, GH#36703 avoid casting e.g. strings to numeric

1686 raise TypeError(f"Could not convert {x} to numeric")

1687 try:

1688 x = x.astype(np.complex128)

1689 except (TypeError, ValueError):

1690 try:

1691 x = x.astype(np.float64)

1692 except ValueError as err:

1693 # GH#29941 we get here with object arrays containing strs

1694 raise TypeError(f"Could not convert {x} to numeric") from err

1695 else:

1696 if not np.any(np.imag(x)):

1697 x = x.real

1698 elif not (is_float(x) or is_integer(x) or is_complex(x)):

1699 if isinstance(x, str):

1700 # GH#44008, GH#36703 avoid casting e.g. strings to numeric

1701 raise TypeError(f"Could not convert string '{x}' to numeric")

1702 try:

1703 x = float(x)

1704 except (TypeError, ValueError):

1705 # e.g. "1+1j" or "foo"

1706 try:

1707 x = complex(x)

1708 except ValueError as err:

1709 # e.g. "foo"

1710 raise TypeError(f"Could not convert {x} to numeric") from err

1711 return x

1712

1713

1714def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike:

1715 """

1716 Cumulative function with skipna support.

1717

1718 Parameters

1719 ----------

1720 values : np.ndarray or ExtensionArray

1721 accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minimum.accumulate}

1722 skipna : bool

1723

1724 Returns

1725 -------

1726 np.ndarray or ExtensionArray

1727 """

1728 mask_a, mask_b = {

1729 np.cumprod: (1.0, np.nan),

1730 np.maximum.accumulate: (-np.inf, np.nan),

1731 np.cumsum: (0.0, np.nan),

1732 np.minimum.accumulate: (np.inf, np.nan),

1733 }[accum_func]

1734

1735 # This should go through ea interface

1736 assert values.dtype.kind not in "mM"

1737

1738 # We will be applying this function to block values

1739 if skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)):

1740 vals = values.copy()

1741 mask = isna(vals)

1742 vals[mask] = mask_a

1743 result = accum_func(vals, axis=0)

1744 result[mask] = mask_b

1745 else:

1746 result = accum_func(values, axis=0)

1747

1748 return result