Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/nanops.py: 18%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

622 statements  

1from __future__ import annotations 

2 

3import functools 

4import itertools 

5from typing import ( 

6 Any, 

7 Callable, 

8 cast, 

9) 

10import warnings 

11 

12import numpy as np 

13 

14from pandas._config import get_option 

15 

16from pandas._libs import ( 

17 NaT, 

18 NaTType, 

19 iNaT, 

20 lib, 

21) 

22from pandas._typing import ( 

23 ArrayLike, 

24 AxisInt, 

25 CorrelationMethod, 

26 Dtype, 

27 DtypeObj, 

28 F, 

29 Scalar, 

30 Shape, 

31 npt, 

32) 

33from pandas.compat._optional import import_optional_dependency 

34from pandas.util._exceptions import find_stack_level 

35 

36from pandas.core.dtypes.common import ( 

37 is_complex, 

38 is_float, 

39 is_float_dtype, 

40 is_integer, 

41 is_numeric_dtype, 

42 is_object_dtype, 

43 needs_i8_conversion, 

44 pandas_dtype, 

45) 

46from pandas.core.dtypes.missing import ( 

47 isna, 

48 na_value_for_dtype, 

49 notna, 

50) 

51 

52bn = import_optional_dependency("bottleneck", errors="warn") 

53_BOTTLENECK_INSTALLED = bn is not None 

54_USE_BOTTLENECK = False 

55 

56 

57def set_use_bottleneck(v: bool = True) -> None: 

58 # set/unset to use bottleneck 

59 global _USE_BOTTLENECK 

60 if _BOTTLENECK_INSTALLED: 

61 _USE_BOTTLENECK = v 

62 

63 

64set_use_bottleneck(get_option("compute.use_bottleneck")) 

65 

66 

67class disallow: 

68 def __init__(self, *dtypes: Dtype) -> None: 

69 super().__init__() 

70 self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes) 

71 

72 def check(self, obj) -> bool: 

73 return hasattr(obj, "dtype") and issubclass(obj.dtype.type, self.dtypes) 

74 

75 def __call__(self, f: F) -> F: 

76 @functools.wraps(f) 

77 def _f(*args, **kwargs): 

78 obj_iter = itertools.chain(args, kwargs.values()) 

79 if any(self.check(obj) for obj in obj_iter): 

80 f_name = f.__name__.replace("nan", "") 

81 raise TypeError( 

82 f"reduction operation '{f_name}' not allowed for this dtype" 

83 ) 

84 try: 

85 return f(*args, **kwargs) 

86 except ValueError as e: 

87 # we want to transform an object array 

88 # ValueError message to the more typical TypeError 

89 # e.g. this is normally a disallowed function on 

90 # object arrays that contain strings 

91 if is_object_dtype(args[0]): 

92 raise TypeError(e) from e 

93 raise 

94 

95 return cast(F, _f) 

96 

97 

98class bottleneck_switch: 

99 def __init__(self, name=None, **kwargs) -> None: 

100 self.name = name 

101 self.kwargs = kwargs 

102 

103 def __call__(self, alt: F) -> F: 

104 bn_name = self.name or alt.__name__ 

105 

106 try: 

107 bn_func = getattr(bn, bn_name) 

108 except (AttributeError, NameError): # pragma: no cover 

109 bn_func = None 

110 

111 @functools.wraps(alt) 

112 def f( 

113 values: np.ndarray, 

114 *, 

115 axis: AxisInt | None = None, 

116 skipna: bool = True, 

117 **kwds, 

118 ): 

119 if len(self.kwargs) > 0: 

120 for k, v in self.kwargs.items(): 

121 if k not in kwds: 

122 kwds[k] = v 

123 

124 if values.size == 0 and kwds.get("min_count") is None: 

125 # We are empty, returning NA for our type 

126 # Only applies for the default `min_count` of None 

127 # since that affects how empty arrays are handled. 

128 # TODO(GH-18976) update all the nanops methods to 

129 # correctly handle empty inputs and remove this check. 

130 # It *may* just be `var` 

131 return _na_for_min_count(values, axis) 

132 

133 if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name): 

134 if kwds.get("mask", None) is None: 

135 # `mask` is not recognised by bottleneck, would raise 

136 # TypeError if called 

137 kwds.pop("mask", None) 

138 result = bn_func(values, axis=axis, **kwds) 

139 

140 # prefer to treat inf/-inf as NA, but must compute the func 

141 # twice :( 

142 if _has_infs(result): 

143 result = alt(values, axis=axis, skipna=skipna, **kwds) 

144 else: 

145 result = alt(values, axis=axis, skipna=skipna, **kwds) 

146 else: 

147 result = alt(values, axis=axis, skipna=skipna, **kwds) 

148 

149 return result 

150 

151 return cast(F, f) 

152 

153 

154def _bn_ok_dtype(dtype: DtypeObj, name: str) -> bool: 

155 # Bottleneck chokes on datetime64, PeriodDtype (or and EA) 

156 if dtype != object and not needs_i8_conversion(dtype): 

157 # GH 42878 

158 # Bottleneck uses naive summation leading to O(n) loss of precision 

159 # unlike numpy which implements pairwise summation, which has O(log(n)) loss 

160 # crossref: https://github.com/pydata/bottleneck/issues/379 

161 

162 # GH 15507 

163 # bottleneck does not properly upcast during the sum 

164 # so can overflow 

165 

166 # GH 9422 

167 # further we also want to preserve NaN when all elements 

168 # are NaN, unlike bottleneck/numpy which consider this 

169 # to be 0 

170 return name not in ["nansum", "nanprod", "nanmean"] 

171 return False 

172 

173 

174def _has_infs(result) -> bool: 

175 if isinstance(result, np.ndarray): 

176 if result.dtype in ("f8", "f4"): 

177 # Note: outside of an nanops-specific test, we always have 

178 # result.ndim == 1, so there is no risk of this ravel making a copy. 

179 return lib.has_infs(result.ravel("K")) 

180 try: 

181 return np.isinf(result).any() 

182 except (TypeError, NotImplementedError): 

183 # if it doesn't support infs, then it can't have infs 

184 return False 

185 

186 

187def _get_fill_value( 

188 dtype: DtypeObj, fill_value: Scalar | None = None, fill_value_typ=None 

189): 

190 """return the correct fill value for the dtype of the values""" 

191 if fill_value is not None: 

192 return fill_value 

193 if _na_ok_dtype(dtype): 

194 if fill_value_typ is None: 

195 return np.nan 

196 else: 

197 if fill_value_typ == "+inf": 

198 return np.inf 

199 else: 

200 return -np.inf 

201 else: 

202 if fill_value_typ == "+inf": 

203 # need the max int here 

204 return lib.i8max 

205 else: 

206 return iNaT 

207 

208 

209def _maybe_get_mask( 

210 values: np.ndarray, skipna: bool, mask: npt.NDArray[np.bool_] | None 

211) -> npt.NDArray[np.bool_] | None: 

212 """ 

213 Compute a mask if and only if necessary. 

214 

215 This function will compute a mask iff it is necessary. Otherwise, 

216 return the provided mask (potentially None) when a mask does not need to be 

217 computed. 

218 

219 A mask is never necessary if the values array is of boolean or integer 

220 dtypes, as these are incapable of storing NaNs. If passing a NaN-capable 

221 dtype that is interpretable as either boolean or integer data (eg, 

222 timedelta64), a mask must be provided. 

223 

224 If the skipna parameter is False, a new mask will not be computed. 

225 

226 The mask is computed using isna() by default. Setting invert=True selects 

227 notna() as the masking function. 

228 

229 Parameters 

230 ---------- 

231 values : ndarray 

232 input array to potentially compute mask for 

233 skipna : bool 

234 boolean for whether NaNs should be skipped 

235 mask : Optional[ndarray] 

236 nan-mask if known 

237 

238 Returns 

239 ------- 

240 Optional[np.ndarray[bool]] 

241 """ 

242 if mask is None: 

243 if values.dtype.kind in "biu": 

244 # Boolean data cannot contain nulls, so signal via mask being None 

245 return None 

246 

247 if skipna or values.dtype.kind in "mM": 

248 mask = isna(values) 

249 

250 return mask 

251 

252 

253def _get_values( 

254 values: np.ndarray, 

255 skipna: bool, 

256 fill_value: Any = None, 

257 fill_value_typ: str | None = None, 

258 mask: npt.NDArray[np.bool_] | None = None, 

259) -> tuple[np.ndarray, npt.NDArray[np.bool_] | None]: 

260 """ 

261 Utility to get the values view, mask, dtype, dtype_max, and fill_value. 

262 

263 If both mask and fill_value/fill_value_typ are not None and skipna is True, 

264 the values array will be copied. 

265 

266 For input arrays of boolean or integer dtypes, copies will only occur if a 

267 precomputed mask, a fill_value/fill_value_typ, and skipna=True are 

268 provided. 

269 

270 Parameters 

271 ---------- 

272 values : ndarray 

273 input array to potentially compute mask for 

274 skipna : bool 

275 boolean for whether NaNs should be skipped 

276 fill_value : Any 

277 value to fill NaNs with 

278 fill_value_typ : str 

279 Set to '+inf' or '-inf' to handle dtype-specific infinities 

280 mask : Optional[np.ndarray[bool]] 

281 nan-mask if known 

282 

283 Returns 

284 ------- 

285 values : ndarray 

286 Potential copy of input value array 

287 mask : Optional[ndarray[bool]] 

288 Mask for values, if deemed necessary to compute 

289 """ 

290 # In _get_values is only called from within nanops, and in all cases 

291 # with scalar fill_value. This guarantee is important for the 

292 # np.where call below 

293 

294 mask = _maybe_get_mask(values, skipna, mask) 

295 

296 dtype = values.dtype 

297 

298 datetimelike = False 

299 if values.dtype.kind in "mM": 

300 # changing timedelta64/datetime64 to int64 needs to happen after 

301 # finding `mask` above 

302 values = np.asarray(values.view("i8")) 

303 datetimelike = True 

304 

305 if skipna and (mask is not None): 

306 # get our fill value (in case we need to provide an alternative 

307 # dtype for it) 

308 fill_value = _get_fill_value( 

309 dtype, fill_value=fill_value, fill_value_typ=fill_value_typ 

310 ) 

311 

312 if fill_value is not None: 

313 if mask.any(): 

314 if datetimelike or _na_ok_dtype(dtype): 

315 values = values.copy() 

316 np.putmask(values, mask, fill_value) 

317 else: 

318 # np.where will promote if needed 

319 values = np.where(~mask, values, fill_value) 

320 

321 return values, mask 

322 

323 

324def _get_dtype_max(dtype: np.dtype) -> np.dtype: 

325 # return a platform independent precision dtype 

326 dtype_max = dtype 

327 if dtype.kind in "bi": 

328 dtype_max = np.dtype(np.int64) 

329 elif dtype.kind == "u": 

330 dtype_max = np.dtype(np.uint64) 

331 elif dtype.kind == "f": 

332 dtype_max = np.dtype(np.float64) 

333 return dtype_max 

334 

335 

336def _na_ok_dtype(dtype: DtypeObj) -> bool: 

337 if needs_i8_conversion(dtype): 

338 return False 

339 return not issubclass(dtype.type, np.integer) 

340 

341 

342def _wrap_results(result, dtype: np.dtype, fill_value=None): 

343 """wrap our results if needed""" 

344 if result is NaT: 

345 pass 

346 

347 elif dtype.kind == "M": 

348 if fill_value is None: 

349 # GH#24293 

350 fill_value = iNaT 

351 if not isinstance(result, np.ndarray): 

352 assert not isna(fill_value), "Expected non-null fill_value" 

353 if result == fill_value: 

354 result = np.nan 

355 

356 if isna(result): 

357 result = np.datetime64("NaT", "ns").astype(dtype) 

358 else: 

359 result = np.int64(result).view(dtype) 

360 # retain original unit 

361 result = result.astype(dtype, copy=False) 

362 else: 

363 # If we have float dtype, taking a view will give the wrong result 

364 result = result.astype(dtype) 

365 elif dtype.kind == "m": 

366 if not isinstance(result, np.ndarray): 

367 if result == fill_value or np.isnan(result): 

368 result = np.timedelta64("NaT").astype(dtype) 

369 

370 elif np.fabs(result) > lib.i8max: 

371 # raise if we have a timedelta64[ns] which is too large 

372 raise ValueError("overflow in timedelta operation") 

373 else: 

374 # return a timedelta64 with the original unit 

375 result = np.int64(result).astype(dtype, copy=False) 

376 

377 else: 

378 result = result.astype("m8[ns]").view(dtype) 

379 

380 return result 

381 

382 

383def _datetimelike_compat(func: F) -> F: 

384 """ 

385 If we have datetime64 or timedelta64 values, ensure we have a correct 

386 mask before calling the wrapped function, then cast back afterwards. 

387 """ 

388 

389 @functools.wraps(func) 

390 def new_func( 

391 values: np.ndarray, 

392 *, 

393 axis: AxisInt | None = None, 

394 skipna: bool = True, 

395 mask: npt.NDArray[np.bool_] | None = None, 

396 **kwargs, 

397 ): 

398 orig_values = values 

399 

400 datetimelike = values.dtype.kind in "mM" 

401 if datetimelike and mask is None: 

402 mask = isna(values) 

403 

404 result = func(values, axis=axis, skipna=skipna, mask=mask, **kwargs) 

405 

406 if datetimelike: 

407 result = _wrap_results(result, orig_values.dtype, fill_value=iNaT) 

408 if not skipna: 

409 assert mask is not None # checked above 

410 result = _mask_datetimelike_result(result, axis, mask, orig_values) 

411 

412 return result 

413 

414 return cast(F, new_func) 

415 

416 

417def _na_for_min_count(values: np.ndarray, axis: AxisInt | None) -> Scalar | np.ndarray: 

418 """ 

419 Return the missing value for `values`. 

420 

421 Parameters 

422 ---------- 

423 values : ndarray 

424 axis : int or None 

425 axis for the reduction, required if values.ndim > 1. 

426 

427 Returns 

428 ------- 

429 result : scalar or ndarray 

430 For 1-D values, returns a scalar of the correct missing type. 

431 For 2-D values, returns a 1-D array where each element is missing. 

432 """ 

433 # we either return np.nan or pd.NaT 

434 if values.dtype.kind in "iufcb": 

435 values = values.astype("float64") 

436 fill_value = na_value_for_dtype(values.dtype) 

437 

438 if values.ndim == 1: 

439 return fill_value 

440 elif axis is None: 

441 return fill_value 

442 else: 

443 result_shape = values.shape[:axis] + values.shape[axis + 1 :] 

444 

445 return np.full(result_shape, fill_value, dtype=values.dtype) 

446 

447 

448def maybe_operate_rowwise(func: F) -> F: 

449 """ 

450 NumPy operations on C-contiguous ndarrays with axis=1 can be 

451 very slow if axis 1 >> axis 0. 

452 Operate row-by-row and concatenate the results. 

453 """ 

454 

455 @functools.wraps(func) 

456 def newfunc(values: np.ndarray, *, axis: AxisInt | None = None, **kwargs): 

457 if ( 

458 axis == 1 

459 and values.ndim == 2 

460 and values.flags["C_CONTIGUOUS"] 

461 # only takes this path for wide arrays (long dataframes), for threshold see 

462 # https://github.com/pandas-dev/pandas/pull/43311#issuecomment-974891737 

463 and (values.shape[1] / 1000) > values.shape[0] 

464 and values.dtype != object 

465 and values.dtype != bool 

466 ): 

467 arrs = list(values) 

468 if kwargs.get("mask") is not None: 

469 mask = kwargs.pop("mask") 

470 results = [ 

471 func(arrs[i], mask=mask[i], **kwargs) for i in range(len(arrs)) 

472 ] 

473 else: 

474 results = [func(x, **kwargs) for x in arrs] 

475 return np.array(results) 

476 

477 return func(values, axis=axis, **kwargs) 

478 

479 return cast(F, newfunc) 

480 

481 

482def nanany( 

483 values: np.ndarray, 

484 *, 

485 axis: AxisInt | None = None, 

486 skipna: bool = True, 

487 mask: npt.NDArray[np.bool_] | None = None, 

488) -> bool: 

489 """ 

490 Check if any elements along an axis evaluate to True. 

491 

492 Parameters 

493 ---------- 

494 values : ndarray 

495 axis : int, optional 

496 skipna : bool, default True 

497 mask : ndarray[bool], optional 

498 nan-mask if known 

499 

500 Returns 

501 ------- 

502 result : bool 

503 

504 Examples 

505 -------- 

506 >>> from pandas.core import nanops 

507 >>> s = pd.Series([1, 2]) 

508 >>> nanops.nanany(s.values) 

509 True 

510 

511 >>> from pandas.core import nanops 

512 >>> s = pd.Series([np.nan]) 

513 >>> nanops.nanany(s.values) 

514 False 

515 """ 

516 if values.dtype.kind in "iub" and mask is None: 

517 # GH#26032 fastpath 

518 # error: Incompatible return value type (got "Union[bool_, ndarray]", 

519 # expected "bool") 

520 return values.any(axis) # type: ignore[return-value] 

521 

522 if values.dtype.kind == "M": 

523 # GH#34479 

524 warnings.warn( 

525 "'any' with datetime64 dtypes is deprecated and will raise in a " 

526 "future version. Use (obj != pd.Timestamp(0)).any() instead.", 

527 FutureWarning, 

528 stacklevel=find_stack_level(), 

529 ) 

530 

531 values, _ = _get_values(values, skipna, fill_value=False, mask=mask) 

532 

533 # For object type, any won't necessarily return 

534 # boolean values (numpy/numpy#4352) 

535 if values.dtype == object: 

536 values = values.astype(bool) 

537 

538 # error: Incompatible return value type (got "Union[bool_, ndarray]", expected 

539 # "bool") 

540 return values.any(axis) # type: ignore[return-value] 

541 

542 

543def nanall( 

544 values: np.ndarray, 

545 *, 

546 axis: AxisInt | None = None, 

547 skipna: bool = True, 

548 mask: npt.NDArray[np.bool_] | None = None, 

549) -> bool: 

550 """ 

551 Check if all elements along an axis evaluate to True. 

552 

553 Parameters 

554 ---------- 

555 values : ndarray 

556 axis : int, optional 

557 skipna : bool, default True 

558 mask : ndarray[bool], optional 

559 nan-mask if known 

560 

561 Returns 

562 ------- 

563 result : bool 

564 

565 Examples 

566 -------- 

567 >>> from pandas.core import nanops 

568 >>> s = pd.Series([1, 2, np.nan]) 

569 >>> nanops.nanall(s.values) 

570 True 

571 

572 >>> from pandas.core import nanops 

573 >>> s = pd.Series([1, 0]) 

574 >>> nanops.nanall(s.values) 

575 False 

576 """ 

577 if values.dtype.kind in "iub" and mask is None: 

578 # GH#26032 fastpath 

579 # error: Incompatible return value type (got "Union[bool_, ndarray]", 

580 # expected "bool") 

581 return values.all(axis) # type: ignore[return-value] 

582 

583 if values.dtype.kind == "M": 

584 # GH#34479 

585 warnings.warn( 

586 "'all' with datetime64 dtypes is deprecated and will raise in a " 

587 "future version. Use (obj != pd.Timestamp(0)).all() instead.", 

588 FutureWarning, 

589 stacklevel=find_stack_level(), 

590 ) 

591 

592 values, _ = _get_values(values, skipna, fill_value=True, mask=mask) 

593 

594 # For object type, all won't necessarily return 

595 # boolean values (numpy/numpy#4352) 

596 if values.dtype == object: 

597 values = values.astype(bool) 

598 

599 # error: Incompatible return value type (got "Union[bool_, ndarray]", expected 

600 # "bool") 

601 return values.all(axis) # type: ignore[return-value] 

602 

603 

604@disallow("M8") 

605@_datetimelike_compat 

606@maybe_operate_rowwise 

607def nansum( 

608 values: np.ndarray, 

609 *, 

610 axis: AxisInt | None = None, 

611 skipna: bool = True, 

612 min_count: int = 0, 

613 mask: npt.NDArray[np.bool_] | None = None, 

614) -> float: 

615 """ 

616 Sum the elements along an axis ignoring NaNs 

617 

618 Parameters 

619 ---------- 

620 values : ndarray[dtype] 

621 axis : int, optional 

622 skipna : bool, default True 

623 min_count: int, default 0 

624 mask : ndarray[bool], optional 

625 nan-mask if known 

626 

627 Returns 

628 ------- 

629 result : dtype 

630 

631 Examples 

632 -------- 

633 >>> from pandas.core import nanops 

634 >>> s = pd.Series([1, 2, np.nan]) 

635 >>> nanops.nansum(s.values) 

636 3.0 

637 """ 

638 dtype = values.dtype 

639 values, mask = _get_values(values, skipna, fill_value=0, mask=mask) 

640 dtype_sum = _get_dtype_max(dtype) 

641 if dtype.kind == "f": 

642 dtype_sum = dtype 

643 elif dtype.kind == "m": 

644 dtype_sum = np.dtype(np.float64) 

645 

646 the_sum = values.sum(axis, dtype=dtype_sum) 

647 the_sum = _maybe_null_out(the_sum, axis, mask, values.shape, min_count=min_count) 

648 

649 return the_sum 

650 

651 

652def _mask_datetimelike_result( 

653 result: np.ndarray | np.datetime64 | np.timedelta64, 

654 axis: AxisInt | None, 

655 mask: npt.NDArray[np.bool_], 

656 orig_values: np.ndarray, 

657) -> np.ndarray | np.datetime64 | np.timedelta64 | NaTType: 

658 if isinstance(result, np.ndarray): 

659 # we need to apply the mask 

660 result = result.astype("i8").view(orig_values.dtype) 

661 axis_mask = mask.any(axis=axis) 

662 # error: Unsupported target for indexed assignment ("Union[ndarray[Any, Any], 

663 # datetime64, timedelta64]") 

664 result[axis_mask] = iNaT # type: ignore[index] 

665 else: 

666 if mask.any(): 

667 return np.int64(iNaT).view(orig_values.dtype) 

668 return result 

669 

670 

671@bottleneck_switch() 

672@_datetimelike_compat 

673def nanmean( 

674 values: np.ndarray, 

675 *, 

676 axis: AxisInt | None = None, 

677 skipna: bool = True, 

678 mask: npt.NDArray[np.bool_] | None = None, 

679) -> float: 

680 """ 

681 Compute the mean of the element along an axis ignoring NaNs 

682 

683 Parameters 

684 ---------- 

685 values : ndarray 

686 axis : int, optional 

687 skipna : bool, default True 

688 mask : ndarray[bool], optional 

689 nan-mask if known 

690 

691 Returns 

692 ------- 

693 float 

694 Unless input is a float array, in which case use the same 

695 precision as the input array. 

696 

697 Examples 

698 -------- 

699 >>> from pandas.core import nanops 

700 >>> s = pd.Series([1, 2, np.nan]) 

701 >>> nanops.nanmean(s.values) 

702 1.5 

703 """ 

704 dtype = values.dtype 

705 values, mask = _get_values(values, skipna, fill_value=0, mask=mask) 

706 dtype_sum = _get_dtype_max(dtype) 

707 dtype_count = np.dtype(np.float64) 

708 

709 # not using needs_i8_conversion because that includes period 

710 if dtype.kind in "mM": 

711 dtype_sum = np.dtype(np.float64) 

712 elif dtype.kind in "iu": 

713 dtype_sum = np.dtype(np.float64) 

714 elif dtype.kind == "f": 

715 dtype_sum = dtype 

716 dtype_count = dtype 

717 

718 count = _get_counts(values.shape, mask, axis, dtype=dtype_count) 

719 the_sum = values.sum(axis, dtype=dtype_sum) 

720 the_sum = _ensure_numeric(the_sum) 

721 

722 if axis is not None and getattr(the_sum, "ndim", False): 

723 count = cast(np.ndarray, count) 

724 with np.errstate(all="ignore"): 

725 # suppress division by zero warnings 

726 the_mean = the_sum / count 

727 ct_mask = count == 0 

728 if ct_mask.any(): 

729 the_mean[ct_mask] = np.nan 

730 else: 

731 the_mean = the_sum / count if count > 0 else np.nan 

732 

733 return the_mean 

734 

735 

736@bottleneck_switch() 

737def nanmedian(values, *, axis: AxisInt | None = None, skipna: bool = True, mask=None): 

738 """ 

739 Parameters 

740 ---------- 

741 values : ndarray 

742 axis : int, optional 

743 skipna : bool, default True 

744 mask : ndarray[bool], optional 

745 nan-mask if known 

746 

747 Returns 

748 ------- 

749 result : float 

750 Unless input is a float array, in which case use the same 

751 precision as the input array. 

752 

753 Examples 

754 -------- 

755 >>> from pandas.core import nanops 

756 >>> s = pd.Series([1, np.nan, 2, 2]) 

757 >>> nanops.nanmedian(s.values) 

758 2.0 

759 """ 

760 # for floats without mask, the data already uses NaN as missing value 

761 # indicator, and `mask` will be calculated from that below -> in those 

762 # cases we never need to set NaN to the masked values 

763 using_nan_sentinel = values.dtype.kind == "f" and mask is None 

764 

765 def get_median(x, _mask=None): 

766 if _mask is None: 

767 _mask = notna(x) 

768 else: 

769 _mask = ~_mask 

770 if not skipna and not _mask.all(): 

771 return np.nan 

772 with warnings.catch_warnings(): 

773 # Suppress RuntimeWarning about All-NaN slice 

774 warnings.filterwarnings( 

775 "ignore", "All-NaN slice encountered", RuntimeWarning 

776 ) 

777 res = np.nanmedian(x[_mask]) 

778 return res 

779 

780 dtype = values.dtype 

781 values, mask = _get_values(values, skipna, mask=mask, fill_value=None) 

782 if values.dtype.kind != "f": 

783 if values.dtype == object: 

784 # GH#34671 avoid casting strings to numeric 

785 inferred = lib.infer_dtype(values) 

786 if inferred in ["string", "mixed"]: 

787 raise TypeError(f"Cannot convert {values} to numeric") 

788 try: 

789 values = values.astype("f8") 

790 except ValueError as err: 

791 # e.g. "could not convert string to float: 'a'" 

792 raise TypeError(str(err)) from err 

793 if not using_nan_sentinel and mask is not None: 

794 if not values.flags.writeable: 

795 values = values.copy() 

796 values[mask] = np.nan 

797 

798 notempty = values.size 

799 

800 # an array from a frame 

801 if values.ndim > 1 and axis is not None: 

802 # there's a non-empty array to apply over otherwise numpy raises 

803 if notempty: 

804 if not skipna: 

805 res = np.apply_along_axis(get_median, axis, values) 

806 

807 else: 

808 # fastpath for the skipna case 

809 with warnings.catch_warnings(): 

810 # Suppress RuntimeWarning about All-NaN slice 

811 warnings.filterwarnings( 

812 "ignore", "All-NaN slice encountered", RuntimeWarning 

813 ) 

814 if (values.shape[1] == 1 and axis == 0) or ( 

815 values.shape[0] == 1 and axis == 1 

816 ): 

817 # GH52788: fastpath when squeezable, nanmedian for 2D array slow 

818 res = np.nanmedian(np.squeeze(values), keepdims=True) 

819 else: 

820 res = np.nanmedian(values, axis=axis) 

821 

822 else: 

823 # must return the correct shape, but median is not defined for the 

824 # empty set so return nans of shape "everything but the passed axis" 

825 # since "axis" is where the reduction would occur if we had a nonempty 

826 # array 

827 res = _get_empty_reduction_result(values.shape, axis) 

828 

829 else: 

830 # otherwise return a scalar value 

831 res = get_median(values, mask) if notempty else np.nan 

832 return _wrap_results(res, dtype) 

833 

834 

835def _get_empty_reduction_result( 

836 shape: Shape, 

837 axis: AxisInt, 

838) -> np.ndarray: 

839 """ 

840 The result from a reduction on an empty ndarray. 

841 

842 Parameters 

843 ---------- 

844 shape : Tuple[int, ...] 

845 axis : int 

846 

847 Returns 

848 ------- 

849 np.ndarray 

850 """ 

851 shp = np.array(shape) 

852 dims = np.arange(len(shape)) 

853 ret = np.empty(shp[dims != axis], dtype=np.float64) 

854 ret.fill(np.nan) 

855 return ret 

856 

857 

858def _get_counts_nanvar( 

859 values_shape: Shape, 

860 mask: npt.NDArray[np.bool_] | None, 

861 axis: AxisInt | None, 

862 ddof: int, 

863 dtype: np.dtype = np.dtype(np.float64), 

864) -> tuple[float | np.ndarray, float | np.ndarray]: 

865 """ 

866 Get the count of non-null values along an axis, accounting 

867 for degrees of freedom. 

868 

869 Parameters 

870 ---------- 

871 values_shape : Tuple[int, ...] 

872 shape tuple from values ndarray, used if mask is None 

873 mask : Optional[ndarray[bool]] 

874 locations in values that should be considered missing 

875 axis : Optional[int] 

876 axis to count along 

877 ddof : int 

878 degrees of freedom 

879 dtype : type, optional 

880 type to use for count 

881 

882 Returns 

883 ------- 

884 count : int, np.nan or np.ndarray 

885 d : int, np.nan or np.ndarray 

886 """ 

887 count = _get_counts(values_shape, mask, axis, dtype=dtype) 

888 d = count - dtype.type(ddof) 

889 

890 # always return NaN, never inf 

891 if is_float(count): 

892 if count <= ddof: 

893 # error: Incompatible types in assignment (expression has type 

894 # "float", variable has type "Union[floating[Any], ndarray[Any, 

895 # dtype[floating[Any]]]]") 

896 count = np.nan # type: ignore[assignment] 

897 d = np.nan 

898 else: 

899 # count is not narrowed by is_float check 

900 count = cast(np.ndarray, count) 

901 mask = count <= ddof 

902 if mask.any(): 

903 np.putmask(d, mask, np.nan) 

904 np.putmask(count, mask, np.nan) 

905 return count, d 

906 

907 

908@bottleneck_switch(ddof=1) 

909def nanstd( 

910 values, 

911 *, 

912 axis: AxisInt | None = None, 

913 skipna: bool = True, 

914 ddof: int = 1, 

915 mask=None, 

916): 

917 """ 

918 Compute the standard deviation along given axis while ignoring NaNs 

919 

920 Parameters 

921 ---------- 

922 values : ndarray 

923 axis : int, optional 

924 skipna : bool, default True 

925 ddof : int, default 1 

926 Delta Degrees of Freedom. The divisor used in calculations is N - ddof, 

927 where N represents the number of elements. 

928 mask : ndarray[bool], optional 

929 nan-mask if known 

930 

931 Returns 

932 ------- 

933 result : float 

934 Unless input is a float array, in which case use the same 

935 precision as the input array. 

936 

937 Examples 

938 -------- 

939 >>> from pandas.core import nanops 

940 >>> s = pd.Series([1, np.nan, 2, 3]) 

941 >>> nanops.nanstd(s.values) 

942 1.0 

943 """ 

944 if values.dtype == "M8[ns]": 

945 values = values.view("m8[ns]") 

946 

947 orig_dtype = values.dtype 

948 values, mask = _get_values(values, skipna, mask=mask) 

949 

950 result = np.sqrt(nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask)) 

951 return _wrap_results(result, orig_dtype) 

952 

953 

954@disallow("M8", "m8") 

955@bottleneck_switch(ddof=1) 

956def nanvar( 

957 values: np.ndarray, 

958 *, 

959 axis: AxisInt | None = None, 

960 skipna: bool = True, 

961 ddof: int = 1, 

962 mask=None, 

963): 

964 """ 

965 Compute the variance along given axis while ignoring NaNs 

966 

967 Parameters 

968 ---------- 

969 values : ndarray 

970 axis : int, optional 

971 skipna : bool, default True 

972 ddof : int, default 1 

973 Delta Degrees of Freedom. The divisor used in calculations is N - ddof, 

974 where N represents the number of elements. 

975 mask : ndarray[bool], optional 

976 nan-mask if known 

977 

978 Returns 

979 ------- 

980 result : float 

981 Unless input is a float array, in which case use the same 

982 precision as the input array. 

983 

984 Examples 

985 -------- 

986 >>> from pandas.core import nanops 

987 >>> s = pd.Series([1, np.nan, 2, 3]) 

988 >>> nanops.nanvar(s.values) 

989 1.0 

990 """ 

991 dtype = values.dtype 

992 mask = _maybe_get_mask(values, skipna, mask) 

993 if dtype.kind in "iu": 

994 values = values.astype("f8") 

995 if mask is not None: 

996 values[mask] = np.nan 

997 

998 if values.dtype.kind == "f": 

999 count, d = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype) 

1000 else: 

1001 count, d = _get_counts_nanvar(values.shape, mask, axis, ddof) 

1002 

1003 if skipna and mask is not None: 

1004 values = values.copy() 

1005 np.putmask(values, mask, 0) 

1006 

1007 # xref GH10242 

1008 # Compute variance via two-pass algorithm, which is stable against 

1009 # cancellation errors and relatively accurate for small numbers of 

1010 # observations. 

1011 # 

1012 # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance 

1013 avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count 

1014 if axis is not None: 

1015 avg = np.expand_dims(avg, axis) 

1016 sqr = _ensure_numeric((avg - values) ** 2) 

1017 if mask is not None: 

1018 np.putmask(sqr, mask, 0) 

1019 result = sqr.sum(axis=axis, dtype=np.float64) / d 

1020 

1021 # Return variance as np.float64 (the datatype used in the accumulator), 

1022 # unless we were dealing with a float array, in which case use the same 

1023 # precision as the original values array. 

1024 if dtype.kind == "f": 

1025 result = result.astype(dtype, copy=False) 

1026 return result 

1027 

1028 

1029@disallow("M8", "m8") 

1030def nansem( 

1031 values: np.ndarray, 

1032 *, 

1033 axis: AxisInt | None = None, 

1034 skipna: bool = True, 

1035 ddof: int = 1, 

1036 mask: npt.NDArray[np.bool_] | None = None, 

1037) -> float: 

1038 """ 

1039 Compute the standard error in the mean along given axis while ignoring NaNs 

1040 

1041 Parameters 

1042 ---------- 

1043 values : ndarray 

1044 axis : int, optional 

1045 skipna : bool, default True 

1046 ddof : int, default 1 

1047 Delta Degrees of Freedom. The divisor used in calculations is N - ddof, 

1048 where N represents the number of elements. 

1049 mask : ndarray[bool], optional 

1050 nan-mask if known 

1051 

1052 Returns 

1053 ------- 

1054 result : float64 

1055 Unless input is a float array, in which case use the same 

1056 precision as the input array. 

1057 

1058 Examples 

1059 -------- 

1060 >>> from pandas.core import nanops 

1061 >>> s = pd.Series([1, np.nan, 2, 3]) 

1062 >>> nanops.nansem(s.values) 

1063 0.5773502691896258 

1064 """ 

1065 # This checks if non-numeric-like data is passed with numeric_only=False 

1066 # and raises a TypeError otherwise 

1067 nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask) 

1068 

1069 mask = _maybe_get_mask(values, skipna, mask) 

1070 if values.dtype.kind != "f": 

1071 values = values.astype("f8") 

1072 

1073 if not skipna and mask is not None and mask.any(): 

1074 return np.nan 

1075 

1076 count, _ = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype) 

1077 var = nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask) 

1078 

1079 return np.sqrt(var) / np.sqrt(count) 

1080 

1081 

1082def _nanminmax(meth, fill_value_typ): 

1083 @bottleneck_switch(name=f"nan{meth}") 

1084 @_datetimelike_compat 

1085 def reduction( 

1086 values: np.ndarray, 

1087 *, 

1088 axis: AxisInt | None = None, 

1089 skipna: bool = True, 

1090 mask: npt.NDArray[np.bool_] | None = None, 

1091 ): 

1092 if values.size == 0: 

1093 return _na_for_min_count(values, axis) 

1094 

1095 values, mask = _get_values( 

1096 values, skipna, fill_value_typ=fill_value_typ, mask=mask 

1097 ) 

1098 result = getattr(values, meth)(axis) 

1099 result = _maybe_null_out(result, axis, mask, values.shape) 

1100 return result 

1101 

1102 return reduction 

1103 

1104 

1105nanmin = _nanminmax("min", fill_value_typ="+inf") 

1106nanmax = _nanminmax("max", fill_value_typ="-inf") 

1107 

1108 

1109def nanargmax( 

1110 values: np.ndarray, 

1111 *, 

1112 axis: AxisInt | None = None, 

1113 skipna: bool = True, 

1114 mask: npt.NDArray[np.bool_] | None = None, 

1115) -> int | np.ndarray: 

1116 """ 

1117 Parameters 

1118 ---------- 

1119 values : ndarray 

1120 axis : int, optional 

1121 skipna : bool, default True 

1122 mask : ndarray[bool], optional 

1123 nan-mask if known 

1124 

1125 Returns 

1126 ------- 

1127 result : int or ndarray[int] 

1128 The index/indices of max value in specified axis or -1 in the NA case 

1129 

1130 Examples 

1131 -------- 

1132 >>> from pandas.core import nanops 

1133 >>> arr = np.array([1, 2, 3, np.nan, 4]) 

1134 >>> nanops.nanargmax(arr) 

1135 4 

1136 

1137 >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3) 

1138 >>> arr[2:, 2] = np.nan 

1139 >>> arr 

1140 array([[ 0., 1., 2.], 

1141 [ 3., 4., 5.], 

1142 [ 6., 7., nan], 

1143 [ 9., 10., nan]]) 

1144 >>> nanops.nanargmax(arr, axis=1) 

1145 array([2, 2, 1, 1]) 

1146 """ 

1147 values, mask = _get_values(values, True, fill_value_typ="-inf", mask=mask) 

1148 result = values.argmax(axis) 

1149 # error: Argument 1 to "_maybe_arg_null_out" has incompatible type "Any | 

1150 # signedinteger[Any]"; expected "ndarray[Any, Any]" 

1151 result = _maybe_arg_null_out(result, axis, mask, skipna) # type: ignore[arg-type] 

1152 return result 

1153 

1154 

1155def nanargmin( 

1156 values: np.ndarray, 

1157 *, 

1158 axis: AxisInt | None = None, 

1159 skipna: bool = True, 

1160 mask: npt.NDArray[np.bool_] | None = None, 

1161) -> int | np.ndarray: 

1162 """ 

1163 Parameters 

1164 ---------- 

1165 values : ndarray 

1166 axis : int, optional 

1167 skipna : bool, default True 

1168 mask : ndarray[bool], optional 

1169 nan-mask if known 

1170 

1171 Returns 

1172 ------- 

1173 result : int or ndarray[int] 

1174 The index/indices of min value in specified axis or -1 in the NA case 

1175 

1176 Examples 

1177 -------- 

1178 >>> from pandas.core import nanops 

1179 >>> arr = np.array([1, 2, 3, np.nan, 4]) 

1180 >>> nanops.nanargmin(arr) 

1181 0 

1182 

1183 >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3) 

1184 >>> arr[2:, 0] = np.nan 

1185 >>> arr 

1186 array([[ 0., 1., 2.], 

1187 [ 3., 4., 5.], 

1188 [nan, 7., 8.], 

1189 [nan, 10., 11.]]) 

1190 >>> nanops.nanargmin(arr, axis=1) 

1191 array([0, 0, 1, 1]) 

1192 """ 

1193 values, mask = _get_values(values, True, fill_value_typ="+inf", mask=mask) 

1194 result = values.argmin(axis) 

1195 # error: Argument 1 to "_maybe_arg_null_out" has incompatible type "Any | 

1196 # signedinteger[Any]"; expected "ndarray[Any, Any]" 

1197 result = _maybe_arg_null_out(result, axis, mask, skipna) # type: ignore[arg-type] 

1198 return result 

1199 

1200 

1201@disallow("M8", "m8") 

1202@maybe_operate_rowwise 

1203def nanskew( 

1204 values: np.ndarray, 

1205 *, 

1206 axis: AxisInt | None = None, 

1207 skipna: bool = True, 

1208 mask: npt.NDArray[np.bool_] | None = None, 

1209) -> float: 

1210 """ 

1211 Compute the sample skewness. 

1212 

1213 The statistic computed here is the adjusted Fisher-Pearson standardized 

1214 moment coefficient G1. The algorithm computes this coefficient directly 

1215 from the second and third central moment. 

1216 

1217 Parameters 

1218 ---------- 

1219 values : ndarray 

1220 axis : int, optional 

1221 skipna : bool, default True 

1222 mask : ndarray[bool], optional 

1223 nan-mask if known 

1224 

1225 Returns 

1226 ------- 

1227 result : float64 

1228 Unless input is a float array, in which case use the same 

1229 precision as the input array. 

1230 

1231 Examples 

1232 -------- 

1233 >>> from pandas.core import nanops 

1234 >>> s = pd.Series([1, np.nan, 1, 2]) 

1235 >>> nanops.nanskew(s.values) 

1236 1.7320508075688787 

1237 """ 

1238 mask = _maybe_get_mask(values, skipna, mask) 

1239 if values.dtype.kind != "f": 

1240 values = values.astype("f8") 

1241 count = _get_counts(values.shape, mask, axis) 

1242 else: 

1243 count = _get_counts(values.shape, mask, axis, dtype=values.dtype) 

1244 

1245 if skipna and mask is not None: 

1246 values = values.copy() 

1247 np.putmask(values, mask, 0) 

1248 elif not skipna and mask is not None and mask.any(): 

1249 return np.nan 

1250 

1251 with np.errstate(invalid="ignore", divide="ignore"): 

1252 mean = values.sum(axis, dtype=np.float64) / count 

1253 if axis is not None: 

1254 mean = np.expand_dims(mean, axis) 

1255 

1256 adjusted = values - mean 

1257 if skipna and mask is not None: 

1258 np.putmask(adjusted, mask, 0) 

1259 adjusted2 = adjusted**2 

1260 adjusted3 = adjusted2 * adjusted 

1261 m2 = adjusted2.sum(axis, dtype=np.float64) 

1262 m3 = adjusted3.sum(axis, dtype=np.float64) 

1263 

1264 # floating point error 

1265 # 

1266 # #18044 in _libs/windows.pyx calc_skew follow this behavior 

1267 # to fix the fperr to treat m2 <1e-14 as zero 

1268 m2 = _zero_out_fperr(m2) 

1269 m3 = _zero_out_fperr(m3) 

1270 

1271 with np.errstate(invalid="ignore", divide="ignore"): 

1272 result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2**1.5) 

1273 

1274 dtype = values.dtype 

1275 if dtype.kind == "f": 

1276 result = result.astype(dtype, copy=False) 

1277 

1278 if isinstance(result, np.ndarray): 

1279 result = np.where(m2 == 0, 0, result) 

1280 result[count < 3] = np.nan 

1281 else: 

1282 result = dtype.type(0) if m2 == 0 else result 

1283 if count < 3: 

1284 return np.nan 

1285 

1286 return result 

1287 

1288 

1289@disallow("M8", "m8") 

1290@maybe_operate_rowwise 

1291def nankurt( 

1292 values: np.ndarray, 

1293 *, 

1294 axis: AxisInt | None = None, 

1295 skipna: bool = True, 

1296 mask: npt.NDArray[np.bool_] | None = None, 

1297) -> float: 

1298 """ 

1299 Compute the sample excess kurtosis 

1300 

1301 The statistic computed here is the adjusted Fisher-Pearson standardized 

1302 moment coefficient G2, computed directly from the second and fourth 

1303 central moment. 

1304 

1305 Parameters 

1306 ---------- 

1307 values : ndarray 

1308 axis : int, optional 

1309 skipna : bool, default True 

1310 mask : ndarray[bool], optional 

1311 nan-mask if known 

1312 

1313 Returns 

1314 ------- 

1315 result : float64 

1316 Unless input is a float array, in which case use the same 

1317 precision as the input array. 

1318 

1319 Examples 

1320 -------- 

1321 >>> from pandas.core import nanops 

1322 >>> s = pd.Series([1, np.nan, 1, 3, 2]) 

1323 >>> nanops.nankurt(s.values) 

1324 -1.2892561983471076 

1325 """ 

1326 mask = _maybe_get_mask(values, skipna, mask) 

1327 if values.dtype.kind != "f": 

1328 values = values.astype("f8") 

1329 count = _get_counts(values.shape, mask, axis) 

1330 else: 

1331 count = _get_counts(values.shape, mask, axis, dtype=values.dtype) 

1332 

1333 if skipna and mask is not None: 

1334 values = values.copy() 

1335 np.putmask(values, mask, 0) 

1336 elif not skipna and mask is not None and mask.any(): 

1337 return np.nan 

1338 

1339 with np.errstate(invalid="ignore", divide="ignore"): 

1340 mean = values.sum(axis, dtype=np.float64) / count 

1341 if axis is not None: 

1342 mean = np.expand_dims(mean, axis) 

1343 

1344 adjusted = values - mean 

1345 if skipna and mask is not None: 

1346 np.putmask(adjusted, mask, 0) 

1347 adjusted2 = adjusted**2 

1348 adjusted4 = adjusted2**2 

1349 m2 = adjusted2.sum(axis, dtype=np.float64) 

1350 m4 = adjusted4.sum(axis, dtype=np.float64) 

1351 

1352 with np.errstate(invalid="ignore", divide="ignore"): 

1353 adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3)) 

1354 numerator = count * (count + 1) * (count - 1) * m4 

1355 denominator = (count - 2) * (count - 3) * m2**2 

1356 

1357 # floating point error 

1358 # 

1359 # #18044 in _libs/windows.pyx calc_kurt follow this behavior 

1360 # to fix the fperr to treat denom <1e-14 as zero 

1361 numerator = _zero_out_fperr(numerator) 

1362 denominator = _zero_out_fperr(denominator) 

1363 

1364 if not isinstance(denominator, np.ndarray): 

1365 # if ``denom`` is a scalar, check these corner cases first before 

1366 # doing division 

1367 if count < 4: 

1368 return np.nan 

1369 if denominator == 0: 

1370 return values.dtype.type(0) 

1371 

1372 with np.errstate(invalid="ignore", divide="ignore"): 

1373 result = numerator / denominator - adj 

1374 

1375 dtype = values.dtype 

1376 if dtype.kind == "f": 

1377 result = result.astype(dtype, copy=False) 

1378 

1379 if isinstance(result, np.ndarray): 

1380 result = np.where(denominator == 0, 0, result) 

1381 result[count < 4] = np.nan 

1382 

1383 return result 

1384 

1385 

1386@disallow("M8", "m8") 

1387@maybe_operate_rowwise 

1388def nanprod( 

1389 values: np.ndarray, 

1390 *, 

1391 axis: AxisInt | None = None, 

1392 skipna: bool = True, 

1393 min_count: int = 0, 

1394 mask: npt.NDArray[np.bool_] | None = None, 

1395) -> float: 

1396 """ 

1397 Parameters 

1398 ---------- 

1399 values : ndarray[dtype] 

1400 axis : int, optional 

1401 skipna : bool, default True 

1402 min_count: int, default 0 

1403 mask : ndarray[bool], optional 

1404 nan-mask if known 

1405 

1406 Returns 

1407 ------- 

1408 Dtype 

1409 The product of all elements on a given axis. ( NaNs are treated as 1) 

1410 

1411 Examples 

1412 -------- 

1413 >>> from pandas.core import nanops 

1414 >>> s = pd.Series([1, 2, 3, np.nan]) 

1415 >>> nanops.nanprod(s.values) 

1416 6.0 

1417 """ 

1418 mask = _maybe_get_mask(values, skipna, mask) 

1419 

1420 if skipna and mask is not None: 

1421 values = values.copy() 

1422 values[mask] = 1 

1423 result = values.prod(axis) 

1424 # error: Incompatible return value type (got "Union[ndarray, float]", expected 

1425 # "float") 

1426 return _maybe_null_out( # type: ignore[return-value] 

1427 result, axis, mask, values.shape, min_count=min_count 

1428 ) 

1429 

1430 

1431def _maybe_arg_null_out( 

1432 result: np.ndarray, 

1433 axis: AxisInt | None, 

1434 mask: npt.NDArray[np.bool_] | None, 

1435 skipna: bool, 

1436) -> np.ndarray | int: 

1437 # helper function for nanargmin/nanargmax 

1438 if mask is None: 

1439 return result 

1440 

1441 if axis is None or not getattr(result, "ndim", False): 

1442 if skipna: 

1443 if mask.all(): 

1444 return -1 

1445 else: 

1446 if mask.any(): 

1447 return -1 

1448 else: 

1449 if skipna: 

1450 na_mask = mask.all(axis) 

1451 else: 

1452 na_mask = mask.any(axis) 

1453 if na_mask.any(): 

1454 result[na_mask] = -1 

1455 return result 

1456 

1457 

1458def _get_counts( 

1459 values_shape: Shape, 

1460 mask: npt.NDArray[np.bool_] | None, 

1461 axis: AxisInt | None, 

1462 dtype: np.dtype[np.floating] = np.dtype(np.float64), 

1463) -> np.floating | npt.NDArray[np.floating]: 

1464 """ 

1465 Get the count of non-null values along an axis 

1466 

1467 Parameters 

1468 ---------- 

1469 values_shape : tuple of int 

1470 shape tuple from values ndarray, used if mask is None 

1471 mask : Optional[ndarray[bool]] 

1472 locations in values that should be considered missing 

1473 axis : Optional[int] 

1474 axis to count along 

1475 dtype : type, optional 

1476 type to use for count 

1477 

1478 Returns 

1479 ------- 

1480 count : scalar or array 

1481 """ 

1482 if axis is None: 

1483 if mask is not None: 

1484 n = mask.size - mask.sum() 

1485 else: 

1486 n = np.prod(values_shape) 

1487 return dtype.type(n) 

1488 

1489 if mask is not None: 

1490 count = mask.shape[axis] - mask.sum(axis) 

1491 else: 

1492 count = values_shape[axis] 

1493 

1494 if is_integer(count): 

1495 return dtype.type(count) 

1496 return count.astype(dtype, copy=False) 

1497 

1498 

1499def _maybe_null_out( 

1500 result: np.ndarray | float | NaTType, 

1501 axis: AxisInt | None, 

1502 mask: npt.NDArray[np.bool_] | None, 

1503 shape: tuple[int, ...], 

1504 min_count: int = 1, 

1505) -> np.ndarray | float | NaTType: 

1506 """ 

1507 Returns 

1508 ------- 

1509 Dtype 

1510 The product of all elements on a given axis. ( NaNs are treated as 1) 

1511 """ 

1512 if mask is None and min_count == 0: 

1513 # nothing to check; short-circuit 

1514 return result 

1515 

1516 if axis is not None and isinstance(result, np.ndarray): 

1517 if mask is not None: 

1518 null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0 

1519 else: 

1520 # we have no nulls, kept mask=None in _maybe_get_mask 

1521 below_count = shape[axis] - min_count < 0 

1522 new_shape = shape[:axis] + shape[axis + 1 :] 

1523 null_mask = np.broadcast_to(below_count, new_shape) 

1524 

1525 if np.any(null_mask): 

1526 if is_numeric_dtype(result): 

1527 if np.iscomplexobj(result): 

1528 result = result.astype("c16") 

1529 elif not is_float_dtype(result): 

1530 result = result.astype("f8", copy=False) 

1531 result[null_mask] = np.nan 

1532 else: 

1533 # GH12941, use None to auto cast null 

1534 result[null_mask] = None 

1535 elif result is not NaT: 

1536 if check_below_min_count(shape, mask, min_count): 

1537 result_dtype = getattr(result, "dtype", None) 

1538 if is_float_dtype(result_dtype): 

1539 # error: Item "None" of "Optional[Any]" has no attribute "type" 

1540 result = result_dtype.type("nan") # type: ignore[union-attr] 

1541 else: 

1542 result = np.nan 

1543 

1544 return result 

1545 

1546 

1547def check_below_min_count( 

1548 shape: tuple[int, ...], mask: npt.NDArray[np.bool_] | None, min_count: int 

1549) -> bool: 

1550 """ 

1551 Check for the `min_count` keyword. Returns True if below `min_count` (when 

1552 missing value should be returned from the reduction). 

1553 

1554 Parameters 

1555 ---------- 

1556 shape : tuple 

1557 The shape of the values (`values.shape`). 

1558 mask : ndarray[bool] or None 

1559 Boolean numpy array (typically of same shape as `shape`) or None. 

1560 min_count : int 

1561 Keyword passed through from sum/prod call. 

1562 

1563 Returns 

1564 ------- 

1565 bool 

1566 """ 

1567 if min_count > 0: 

1568 if mask is None: 

1569 # no missing values, only check size 

1570 non_nulls = np.prod(shape) 

1571 else: 

1572 non_nulls = mask.size - mask.sum() 

1573 if non_nulls < min_count: 

1574 return True 

1575 return False 

1576 

1577 

1578def _zero_out_fperr(arg): 

1579 # #18044 reference this behavior to fix rolling skew/kurt issue 

1580 if isinstance(arg, np.ndarray): 

1581 return np.where(np.abs(arg) < 1e-14, 0, arg) 

1582 else: 

1583 return arg.dtype.type(0) if np.abs(arg) < 1e-14 else arg 

1584 

1585 

1586@disallow("M8", "m8") 

1587def nancorr( 

1588 a: np.ndarray, 

1589 b: np.ndarray, 

1590 *, 

1591 method: CorrelationMethod = "pearson", 

1592 min_periods: int | None = None, 

1593) -> float: 

1594 """ 

1595 a, b: ndarrays 

1596 """ 

1597 if len(a) != len(b): 

1598 raise AssertionError("Operands to nancorr must have same size") 

1599 

1600 if min_periods is None: 

1601 min_periods = 1 

1602 

1603 valid = notna(a) & notna(b) 

1604 if not valid.all(): 

1605 a = a[valid] 

1606 b = b[valid] 

1607 

1608 if len(a) < min_periods: 

1609 return np.nan 

1610 

1611 a = _ensure_numeric(a) 

1612 b = _ensure_numeric(b) 

1613 

1614 f = get_corr_func(method) 

1615 return f(a, b) 

1616 

1617 

1618def get_corr_func( 

1619 method: CorrelationMethod, 

1620) -> Callable[[np.ndarray, np.ndarray], float]: 

1621 if method == "kendall": 

1622 from scipy.stats import kendalltau 

1623 

1624 def func(a, b): 

1625 return kendalltau(a, b)[0] 

1626 

1627 return func 

1628 elif method == "spearman": 

1629 from scipy.stats import spearmanr 

1630 

1631 def func(a, b): 

1632 return spearmanr(a, b)[0] 

1633 

1634 return func 

1635 elif method == "pearson": 

1636 

1637 def func(a, b): 

1638 return np.corrcoef(a, b)[0, 1] 

1639 

1640 return func 

1641 elif callable(method): 

1642 return method 

1643 

1644 raise ValueError( 

1645 f"Unknown method '{method}', expected one of " 

1646 "'kendall', 'spearman', 'pearson', or callable" 

1647 ) 

1648 

1649 

1650@disallow("M8", "m8") 

1651def nancov( 

1652 a: np.ndarray, 

1653 b: np.ndarray, 

1654 *, 

1655 min_periods: int | None = None, 

1656 ddof: int | None = 1, 

1657) -> float: 

1658 if len(a) != len(b): 

1659 raise AssertionError("Operands to nancov must have same size") 

1660 

1661 if min_periods is None: 

1662 min_periods = 1 

1663 

1664 valid = notna(a) & notna(b) 

1665 if not valid.all(): 

1666 a = a[valid] 

1667 b = b[valid] 

1668 

1669 if len(a) < min_periods: 

1670 return np.nan 

1671 

1672 a = _ensure_numeric(a) 

1673 b = _ensure_numeric(b) 

1674 

1675 return np.cov(a, b, ddof=ddof)[0, 1] 

1676 

1677 

1678def _ensure_numeric(x): 

1679 if isinstance(x, np.ndarray): 

1680 if x.dtype.kind in "biu": 

1681 x = x.astype(np.float64) 

1682 elif x.dtype == object: 

1683 inferred = lib.infer_dtype(x) 

1684 if inferred in ["string", "mixed"]: 

1685 # GH#44008, GH#36703 avoid casting e.g. strings to numeric 

1686 raise TypeError(f"Could not convert {x} to numeric") 

1687 try: 

1688 x = x.astype(np.complex128) 

1689 except (TypeError, ValueError): 

1690 try: 

1691 x = x.astype(np.float64) 

1692 except ValueError as err: 

1693 # GH#29941 we get here with object arrays containing strs 

1694 raise TypeError(f"Could not convert {x} to numeric") from err 

1695 else: 

1696 if not np.any(np.imag(x)): 

1697 x = x.real 

1698 elif not (is_float(x) or is_integer(x) or is_complex(x)): 

1699 if isinstance(x, str): 

1700 # GH#44008, GH#36703 avoid casting e.g. strings to numeric 

1701 raise TypeError(f"Could not convert string '{x}' to numeric") 

1702 try: 

1703 x = float(x) 

1704 except (TypeError, ValueError): 

1705 # e.g. "1+1j" or "foo" 

1706 try: 

1707 x = complex(x) 

1708 except ValueError as err: 

1709 # e.g. "foo" 

1710 raise TypeError(f"Could not convert {x} to numeric") from err 

1711 return x 

1712 

1713 

1714def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike: 

1715 """ 

1716 Cumulative function with skipna support. 

1717 

1718 Parameters 

1719 ---------- 

1720 values : np.ndarray or ExtensionArray 

1721 accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minimum.accumulate} 

1722 skipna : bool 

1723 

1724 Returns 

1725 ------- 

1726 np.ndarray or ExtensionArray 

1727 """ 

1728 mask_a, mask_b = { 

1729 np.cumprod: (1.0, np.nan), 

1730 np.maximum.accumulate: (-np.inf, np.nan), 

1731 np.cumsum: (0.0, np.nan), 

1732 np.minimum.accumulate: (np.inf, np.nan), 

1733 }[accum_func] 

1734 

1735 # This should go through ea interface 

1736 assert values.dtype.kind not in "mM" 

1737 

1738 # We will be applying this function to block values 

1739 if skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)): 

1740 vals = values.copy() 

1741 mask = isna(vals) 

1742 vals[mask] = mask_a 

1743 result = accum_func(vals, axis=0) 

1744 result[mask] = mask_b 

1745 else: 

1746 result = accum_func(values, axis=0) 

1747 

1748 return result