Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/nanops.py: 19%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

626 statements  

1from __future__ import annotations 

2 

3import functools 

4import itertools 

5import operator 

6from typing import ( 

7 Any, 

8 Callable, 

9 cast, 

10) 

11import warnings 

12 

13import numpy as np 

14 

15from pandas._config import get_option 

16 

17from pandas._libs import ( 

18 NaT, 

19 NaTType, 

20 iNaT, 

21 lib, 

22) 

23from pandas._typing import ( 

24 ArrayLike, 

25 AxisInt, 

26 CorrelationMethod, 

27 Dtype, 

28 DtypeObj, 

29 F, 

30 Scalar, 

31 Shape, 

32 npt, 

33) 

34from pandas.compat._optional import import_optional_dependency 

35from pandas.util._exceptions import find_stack_level 

36 

37from pandas.core.dtypes.common import ( 

38 is_any_int_dtype, 

39 is_bool_dtype, 

40 is_complex, 

41 is_datetime64_any_dtype, 

42 is_float, 

43 is_float_dtype, 

44 is_integer, 

45 is_integer_dtype, 

46 is_numeric_dtype, 

47 is_object_dtype, 

48 is_scalar, 

49 is_timedelta64_dtype, 

50 needs_i8_conversion, 

51 pandas_dtype, 

52) 

53from pandas.core.dtypes.dtypes import PeriodDtype 

54from pandas.core.dtypes.missing import ( 

55 isna, 

56 na_value_for_dtype, 

57 notna, 

58) 

59 

60from pandas.core.construction import extract_array 

61 

62bn = import_optional_dependency("bottleneck", errors="warn") 

63_BOTTLENECK_INSTALLED = bn is not None 

64_USE_BOTTLENECK = False 

65 

66 

67def set_use_bottleneck(v: bool = True) -> None: 

68 # set/unset to use bottleneck 

69 global _USE_BOTTLENECK 

70 if _BOTTLENECK_INSTALLED: 

71 _USE_BOTTLENECK = v 

72 

73 

74set_use_bottleneck(get_option("compute.use_bottleneck")) 

75 

76 

77class disallow: 

78 def __init__(self, *dtypes: Dtype) -> None: 

79 super().__init__() 

80 self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes) 

81 

82 def check(self, obj) -> bool: 

83 return hasattr(obj, "dtype") and issubclass(obj.dtype.type, self.dtypes) 

84 

85 def __call__(self, f: F) -> F: 

86 @functools.wraps(f) 

87 def _f(*args, **kwargs): 

88 obj_iter = itertools.chain(args, kwargs.values()) 

89 if any(self.check(obj) for obj in obj_iter): 

90 f_name = f.__name__.replace("nan", "") 

91 raise TypeError( 

92 f"reduction operation '{f_name}' not allowed for this dtype" 

93 ) 

94 try: 

95 with np.errstate(invalid="ignore"): 

96 return f(*args, **kwargs) 

97 except ValueError as e: 

98 # we want to transform an object array 

99 # ValueError message to the more typical TypeError 

100 # e.g. this is normally a disallowed function on 

101 # object arrays that contain strings 

102 if is_object_dtype(args[0]): 

103 raise TypeError(e) from e 

104 raise 

105 

106 return cast(F, _f) 

107 

108 

109class bottleneck_switch: 

110 def __init__(self, name=None, **kwargs) -> None: 

111 self.name = name 

112 self.kwargs = kwargs 

113 

114 def __call__(self, alt: F) -> F: 

115 bn_name = self.name or alt.__name__ 

116 

117 try: 

118 bn_func = getattr(bn, bn_name) 

119 except (AttributeError, NameError): # pragma: no cover 

120 bn_func = None 

121 

122 @functools.wraps(alt) 

123 def f( 

124 values: np.ndarray, 

125 *, 

126 axis: AxisInt | None = None, 

127 skipna: bool = True, 

128 **kwds, 

129 ): 

130 if len(self.kwargs) > 0: 

131 for k, v in self.kwargs.items(): 

132 if k not in kwds: 

133 kwds[k] = v 

134 

135 if values.size == 0 and kwds.get("min_count") is None: 

136 # We are empty, returning NA for our type 

137 # Only applies for the default `min_count` of None 

138 # since that affects how empty arrays are handled. 

139 # TODO(GH-18976) update all the nanops methods to 

140 # correctly handle empty inputs and remove this check. 

141 # It *may* just be `var` 

142 return _na_for_min_count(values, axis) 

143 

144 if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name): 

145 if kwds.get("mask", None) is None: 

146 # `mask` is not recognised by bottleneck, would raise 

147 # TypeError if called 

148 kwds.pop("mask", None) 

149 result = bn_func(values, axis=axis, **kwds) 

150 

151 # prefer to treat inf/-inf as NA, but must compute the func 

152 # twice :( 

153 if _has_infs(result): 

154 result = alt(values, axis=axis, skipna=skipna, **kwds) 

155 else: 

156 result = alt(values, axis=axis, skipna=skipna, **kwds) 

157 else: 

158 result = alt(values, axis=axis, skipna=skipna, **kwds) 

159 

160 return result 

161 

162 return cast(F, f) 

163 

164 

165def _bn_ok_dtype(dtype: DtypeObj, name: str) -> bool: 

166 # Bottleneck chokes on datetime64, PeriodDtype (or and EA) 

167 if not is_object_dtype(dtype) and not needs_i8_conversion(dtype): 

168 # GH 42878 

169 # Bottleneck uses naive summation leading to O(n) loss of precision 

170 # unlike numpy which implements pairwise summation, which has O(log(n)) loss 

171 # crossref: https://github.com/pydata/bottleneck/issues/379 

172 

173 # GH 15507 

174 # bottleneck does not properly upcast during the sum 

175 # so can overflow 

176 

177 # GH 9422 

178 # further we also want to preserve NaN when all elements 

179 # are NaN, unlike bottleneck/numpy which consider this 

180 # to be 0 

181 return name not in ["nansum", "nanprod", "nanmean"] 

182 return False 

183 

184 

185def _has_infs(result) -> bool: 

186 if isinstance(result, np.ndarray): 

187 if result.dtype in ("f8", "f4"): 

188 # Note: outside of an nanops-specific test, we always have 

189 # result.ndim == 1, so there is no risk of this ravel making a copy. 

190 return lib.has_infs(result.ravel("K")) 

191 try: 

192 return np.isinf(result).any() 

193 except (TypeError, NotImplementedError): 

194 # if it doesn't support infs, then it can't have infs 

195 return False 

196 

197 

198def _get_fill_value( 

199 dtype: DtypeObj, fill_value: Scalar | None = None, fill_value_typ=None 

200): 

201 """return the correct fill value for the dtype of the values""" 

202 if fill_value is not None: 

203 return fill_value 

204 if _na_ok_dtype(dtype): 

205 if fill_value_typ is None: 

206 return np.nan 

207 else: 

208 if fill_value_typ == "+inf": 

209 return np.inf 

210 else: 

211 return -np.inf 

212 else: 

213 if fill_value_typ == "+inf": 

214 # need the max int here 

215 return lib.i8max 

216 else: 

217 return iNaT 

218 

219 

220def _maybe_get_mask( 

221 values: np.ndarray, skipna: bool, mask: npt.NDArray[np.bool_] | None 

222) -> npt.NDArray[np.bool_] | None: 

223 """ 

224 Compute a mask if and only if necessary. 

225 

226 This function will compute a mask iff it is necessary. Otherwise, 

227 return the provided mask (potentially None) when a mask does not need to be 

228 computed. 

229 

230 A mask is never necessary if the values array is of boolean or integer 

231 dtypes, as these are incapable of storing NaNs. If passing a NaN-capable 

232 dtype that is interpretable as either boolean or integer data (eg, 

233 timedelta64), a mask must be provided. 

234 

235 If the skipna parameter is False, a new mask will not be computed. 

236 

237 The mask is computed using isna() by default. Setting invert=True selects 

238 notna() as the masking function. 

239 

240 Parameters 

241 ---------- 

242 values : ndarray 

243 input array to potentially compute mask for 

244 skipna : bool 

245 boolean for whether NaNs should be skipped 

246 mask : Optional[ndarray] 

247 nan-mask if known 

248 

249 Returns 

250 ------- 

251 Optional[np.ndarray[bool]] 

252 """ 

253 if mask is None: 

254 if is_bool_dtype(values.dtype) or is_integer_dtype(values.dtype): 

255 # Boolean data cannot contain nulls, so signal via mask being None 

256 return None 

257 

258 if skipna or needs_i8_conversion(values.dtype): 

259 mask = isna(values) 

260 

261 return mask 

262 

263 

264def _get_values( 

265 values: np.ndarray, 

266 skipna: bool, 

267 fill_value: Any = None, 

268 fill_value_typ: str | None = None, 

269 mask: npt.NDArray[np.bool_] | None = None, 

270) -> tuple[np.ndarray, npt.NDArray[np.bool_] | None, np.dtype, np.dtype, Any]: 

271 """ 

272 Utility to get the values view, mask, dtype, dtype_max, and fill_value. 

273 

274 If both mask and fill_value/fill_value_typ are not None and skipna is True, 

275 the values array will be copied. 

276 

277 For input arrays of boolean or integer dtypes, copies will only occur if a 

278 precomputed mask, a fill_value/fill_value_typ, and skipna=True are 

279 provided. 

280 

281 Parameters 

282 ---------- 

283 values : ndarray 

284 input array to potentially compute mask for 

285 skipna : bool 

286 boolean for whether NaNs should be skipped 

287 fill_value : Any 

288 value to fill NaNs with 

289 fill_value_typ : str 

290 Set to '+inf' or '-inf' to handle dtype-specific infinities 

291 mask : Optional[np.ndarray[bool]] 

292 nan-mask if known 

293 

294 Returns 

295 ------- 

296 values : ndarray 

297 Potential copy of input value array 

298 mask : Optional[ndarray[bool]] 

299 Mask for values, if deemed necessary to compute 

300 dtype : np.dtype 

301 dtype for values 

302 dtype_max : np.dtype 

303 platform independent dtype 

304 fill_value : Any 

305 fill value used 

306 """ 

307 # In _get_values is only called from within nanops, and in all cases 

308 # with scalar fill_value. This guarantee is important for the 

309 # np.where call below 

310 assert is_scalar(fill_value) 

311 # error: Incompatible types in assignment (expression has type "Union[Any, 

312 # Union[ExtensionArray, ndarray]]", variable has type "ndarray") 

313 values = extract_array(values, extract_numpy=True) # type: ignore[assignment] 

314 

315 mask = _maybe_get_mask(values, skipna, mask) 

316 

317 dtype = values.dtype 

318 

319 datetimelike = False 

320 if needs_i8_conversion(values.dtype): 

321 # changing timedelta64/datetime64 to int64 needs to happen after 

322 # finding `mask` above 

323 values = np.asarray(values.view("i8")) 

324 datetimelike = True 

325 

326 dtype_ok = _na_ok_dtype(dtype) 

327 

328 # get our fill value (in case we need to provide an alternative 

329 # dtype for it) 

330 fill_value = _get_fill_value( 

331 dtype, fill_value=fill_value, fill_value_typ=fill_value_typ 

332 ) 

333 

334 if skipna and (mask is not None) and (fill_value is not None): 

335 if mask.any(): 

336 if dtype_ok or datetimelike: 

337 values = values.copy() 

338 np.putmask(values, mask, fill_value) 

339 else: 

340 # np.where will promote if needed 

341 values = np.where(~mask, values, fill_value) 

342 

343 # return a platform independent precision dtype 

344 dtype_max = dtype 

345 if is_integer_dtype(dtype) or is_bool_dtype(dtype): 

346 dtype_max = np.dtype(np.int64) 

347 elif is_float_dtype(dtype): 

348 dtype_max = np.dtype(np.float64) 

349 

350 return values, mask, dtype, dtype_max, fill_value 

351 

352 

353def _na_ok_dtype(dtype: DtypeObj) -> bool: 

354 if needs_i8_conversion(dtype): 

355 return False 

356 return not issubclass(dtype.type, np.integer) 

357 

358 

359def _wrap_results(result, dtype: np.dtype, fill_value=None): 

360 """wrap our results if needed""" 

361 if result is NaT: 

362 pass 

363 

364 elif is_datetime64_any_dtype(dtype): 

365 if fill_value is None: 

366 # GH#24293 

367 fill_value = iNaT 

368 if not isinstance(result, np.ndarray): 

369 assert not isna(fill_value), "Expected non-null fill_value" 

370 if result == fill_value: 

371 result = np.nan 

372 

373 if isna(result): 

374 result = np.datetime64("NaT", "ns").astype(dtype) 

375 else: 

376 result = np.int64(result).view(dtype) 

377 # retain original unit 

378 result = result.astype(dtype, copy=False) 

379 else: 

380 # If we have float dtype, taking a view will give the wrong result 

381 result = result.astype(dtype) 

382 elif is_timedelta64_dtype(dtype): 

383 if not isinstance(result, np.ndarray): 

384 if result == fill_value or np.isnan(result): 

385 result = np.timedelta64("NaT").astype(dtype) 

386 

387 elif np.fabs(result) > lib.i8max: 

388 # raise if we have a timedelta64[ns] which is too large 

389 raise ValueError("overflow in timedelta operation") 

390 else: 

391 # return a timedelta64 with the original unit 

392 result = np.int64(result).astype(dtype, copy=False) 

393 

394 else: 

395 result = result.astype("m8[ns]").view(dtype) 

396 

397 return result 

398 

399 

400def _datetimelike_compat(func: F) -> F: 

401 """ 

402 If we have datetime64 or timedelta64 values, ensure we have a correct 

403 mask before calling the wrapped function, then cast back afterwards. 

404 """ 

405 

406 @functools.wraps(func) 

407 def new_func( 

408 values: np.ndarray, 

409 *, 

410 axis: AxisInt | None = None, 

411 skipna: bool = True, 

412 mask: npt.NDArray[np.bool_] | None = None, 

413 **kwargs, 

414 ): 

415 orig_values = values 

416 

417 datetimelike = values.dtype.kind in ["m", "M"] 

418 if datetimelike and mask is None: 

419 mask = isna(values) 

420 

421 result = func(values, axis=axis, skipna=skipna, mask=mask, **kwargs) 

422 

423 if datetimelike: 

424 result = _wrap_results(result, orig_values.dtype, fill_value=iNaT) 

425 if not skipna: 

426 assert mask is not None # checked above 

427 result = _mask_datetimelike_result(result, axis, mask, orig_values) 

428 

429 return result 

430 

431 return cast(F, new_func) 

432 

433 

434def _na_for_min_count(values: np.ndarray, axis: AxisInt | None) -> Scalar | np.ndarray: 

435 """ 

436 Return the missing value for `values`. 

437 

438 Parameters 

439 ---------- 

440 values : ndarray 

441 axis : int or None 

442 axis for the reduction, required if values.ndim > 1. 

443 

444 Returns 

445 ------- 

446 result : scalar or ndarray 

447 For 1-D values, returns a scalar of the correct missing type. 

448 For 2-D values, returns a 1-D array where each element is missing. 

449 """ 

450 # we either return np.nan or pd.NaT 

451 if is_numeric_dtype(values): 

452 values = values.astype("float64") 

453 fill_value = na_value_for_dtype(values.dtype) 

454 

455 if values.ndim == 1: 

456 return fill_value 

457 elif axis is None: 

458 return fill_value 

459 else: 

460 result_shape = values.shape[:axis] + values.shape[axis + 1 :] 

461 

462 return np.full(result_shape, fill_value, dtype=values.dtype) 

463 

464 

465def maybe_operate_rowwise(func: F) -> F: 

466 """ 

467 NumPy operations on C-contiguous ndarrays with axis=1 can be 

468 very slow if axis 1 >> axis 0. 

469 Operate row-by-row and concatenate the results. 

470 """ 

471 

472 @functools.wraps(func) 

473 def newfunc(values: np.ndarray, *, axis: AxisInt | None = None, **kwargs): 

474 if ( 

475 axis == 1 

476 and values.ndim == 2 

477 and values.flags["C_CONTIGUOUS"] 

478 # only takes this path for wide arrays (long dataframes), for threshold see 

479 # https://github.com/pandas-dev/pandas/pull/43311#issuecomment-974891737 

480 and (values.shape[1] / 1000) > values.shape[0] 

481 and values.dtype != object 

482 and values.dtype != bool 

483 ): 

484 arrs = list(values) 

485 if kwargs.get("mask") is not None: 

486 mask = kwargs.pop("mask") 

487 results = [ 

488 func(arrs[i], mask=mask[i], **kwargs) for i in range(len(arrs)) 

489 ] 

490 else: 

491 results = [func(x, **kwargs) for x in arrs] 

492 return np.array(results) 

493 

494 return func(values, axis=axis, **kwargs) 

495 

496 return cast(F, newfunc) 

497 

498 

499def nanany( 

500 values: np.ndarray, 

501 *, 

502 axis: AxisInt | None = None, 

503 skipna: bool = True, 

504 mask: npt.NDArray[np.bool_] | None = None, 

505) -> bool: 

506 """ 

507 Check if any elements along an axis evaluate to True. 

508 

509 Parameters 

510 ---------- 

511 values : ndarray 

512 axis : int, optional 

513 skipna : bool, default True 

514 mask : ndarray[bool], optional 

515 nan-mask if known 

516 

517 Returns 

518 ------- 

519 result : bool 

520 

521 Examples 

522 -------- 

523 >>> from pandas.core import nanops 

524 >>> s = pd.Series([1, 2]) 

525 >>> nanops.nanany(s) 

526 True 

527 

528 >>> from pandas.core import nanops 

529 >>> s = pd.Series([np.nan]) 

530 >>> nanops.nanany(s) 

531 False 

532 """ 

533 if needs_i8_conversion(values.dtype) and values.dtype.kind != "m": 

534 # GH#34479 

535 warnings.warn( 

536 "'any' with datetime64 dtypes is deprecated and will raise in a " 

537 "future version. Use (obj != pd.Timestamp(0)).any() instead.", 

538 FutureWarning, 

539 stacklevel=find_stack_level(), 

540 ) 

541 

542 values, _, _, _, _ = _get_values(values, skipna, fill_value=False, mask=mask) 

543 

544 # For object type, any won't necessarily return 

545 # boolean values (numpy/numpy#4352) 

546 if is_object_dtype(values): 

547 values = values.astype(bool) 

548 

549 # error: Incompatible return value type (got "Union[bool_, ndarray]", expected 

550 # "bool") 

551 return values.any(axis) # type: ignore[return-value] 

552 

553 

554def nanall( 

555 values: np.ndarray, 

556 *, 

557 axis: AxisInt | None = None, 

558 skipna: bool = True, 

559 mask: npt.NDArray[np.bool_] | None = None, 

560) -> bool: 

561 """ 

562 Check if all elements along an axis evaluate to True. 

563 

564 Parameters 

565 ---------- 

566 values : ndarray 

567 axis : int, optional 

568 skipna : bool, default True 

569 mask : ndarray[bool], optional 

570 nan-mask if known 

571 

572 Returns 

573 ------- 

574 result : bool 

575 

576 Examples 

577 -------- 

578 >>> from pandas.core import nanops 

579 >>> s = pd.Series([1, 2, np.nan]) 

580 >>> nanops.nanall(s) 

581 True 

582 

583 >>> from pandas.core import nanops 

584 >>> s = pd.Series([1, 0]) 

585 >>> nanops.nanall(s) 

586 False 

587 """ 

588 if needs_i8_conversion(values.dtype) and values.dtype.kind != "m": 

589 # GH#34479 

590 warnings.warn( 

591 "'all' with datetime64 dtypes is deprecated and will raise in a " 

592 "future version. Use (obj != pd.Timestamp(0)).all() instead.", 

593 FutureWarning, 

594 stacklevel=find_stack_level(), 

595 ) 

596 

597 values, _, _, _, _ = _get_values(values, skipna, fill_value=True, mask=mask) 

598 

599 # For object type, all won't necessarily return 

600 # boolean values (numpy/numpy#4352) 

601 if is_object_dtype(values): 

602 values = values.astype(bool) 

603 

604 # error: Incompatible return value type (got "Union[bool_, ndarray]", expected 

605 # "bool") 

606 return values.all(axis) # type: ignore[return-value] 

607 

608 

609@disallow("M8") 

610@_datetimelike_compat 

611@maybe_operate_rowwise 

612def nansum( 

613 values: np.ndarray, 

614 *, 

615 axis: AxisInt | None = None, 

616 skipna: bool = True, 

617 min_count: int = 0, 

618 mask: npt.NDArray[np.bool_] | None = None, 

619) -> float: 

620 """ 

621 Sum the elements along an axis ignoring NaNs 

622 

623 Parameters 

624 ---------- 

625 values : ndarray[dtype] 

626 axis : int, optional 

627 skipna : bool, default True 

628 min_count: int, default 0 

629 mask : ndarray[bool], optional 

630 nan-mask if known 

631 

632 Returns 

633 ------- 

634 result : dtype 

635 

636 Examples 

637 -------- 

638 >>> from pandas.core import nanops 

639 >>> s = pd.Series([1, 2, np.nan]) 

640 >>> nanops.nansum(s) 

641 3.0 

642 """ 

643 values, mask, dtype, dtype_max, _ = _get_values( 

644 values, skipna, fill_value=0, mask=mask 

645 ) 

646 dtype_sum = dtype_max 

647 if is_float_dtype(dtype): 

648 dtype_sum = dtype 

649 elif is_timedelta64_dtype(dtype): 

650 dtype_sum = np.dtype(np.float64) 

651 

652 the_sum = values.sum(axis, dtype=dtype_sum) 

653 the_sum = _maybe_null_out(the_sum, axis, mask, values.shape, min_count=min_count) 

654 

655 return the_sum 

656 

657 

658def _mask_datetimelike_result( 

659 result: np.ndarray | np.datetime64 | np.timedelta64, 

660 axis: AxisInt | None, 

661 mask: npt.NDArray[np.bool_], 

662 orig_values: np.ndarray, 

663) -> np.ndarray | np.datetime64 | np.timedelta64 | NaTType: 

664 if isinstance(result, np.ndarray): 

665 # we need to apply the mask 

666 result = result.astype("i8").view(orig_values.dtype) 

667 axis_mask = mask.any(axis=axis) 

668 # error: Unsupported target for indexed assignment ("Union[ndarray[Any, Any], 

669 # datetime64, timedelta64]") 

670 result[axis_mask] = iNaT # type: ignore[index] 

671 else: 

672 if mask.any(): 

673 return np.int64(iNaT).view(orig_values.dtype) 

674 return result 

675 

676 

677@disallow(PeriodDtype) 

678@bottleneck_switch() 

679@_datetimelike_compat 

680def nanmean( 

681 values: np.ndarray, 

682 *, 

683 axis: AxisInt | None = None, 

684 skipna: bool = True, 

685 mask: npt.NDArray[np.bool_] | None = None, 

686) -> float: 

687 """ 

688 Compute the mean of the element along an axis ignoring NaNs 

689 

690 Parameters 

691 ---------- 

692 values : ndarray 

693 axis : int, optional 

694 skipna : bool, default True 

695 mask : ndarray[bool], optional 

696 nan-mask if known 

697 

698 Returns 

699 ------- 

700 float 

701 Unless input is a float array, in which case use the same 

702 precision as the input array. 

703 

704 Examples 

705 -------- 

706 >>> from pandas.core import nanops 

707 >>> s = pd.Series([1, 2, np.nan]) 

708 >>> nanops.nanmean(s) 

709 1.5 

710 """ 

711 values, mask, dtype, dtype_max, _ = _get_values( 

712 values, skipna, fill_value=0, mask=mask 

713 ) 

714 dtype_sum = dtype_max 

715 dtype_count = np.dtype(np.float64) 

716 

717 # not using needs_i8_conversion because that includes period 

718 if dtype.kind in ["m", "M"]: 

719 dtype_sum = np.dtype(np.float64) 

720 elif is_integer_dtype(dtype): 

721 dtype_sum = np.dtype(np.float64) 

722 elif is_float_dtype(dtype): 

723 dtype_sum = dtype 

724 dtype_count = dtype 

725 

726 count = _get_counts(values.shape, mask, axis, dtype=dtype_count) 

727 the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum)) 

728 

729 if axis is not None and getattr(the_sum, "ndim", False): 

730 count = cast(np.ndarray, count) 

731 with np.errstate(all="ignore"): 

732 # suppress division by zero warnings 

733 the_mean = the_sum / count 

734 ct_mask = count == 0 

735 if ct_mask.any(): 

736 the_mean[ct_mask] = np.nan 

737 else: 

738 the_mean = the_sum / count if count > 0 else np.nan 

739 

740 return the_mean 

741 

742 

743@bottleneck_switch() 

744def nanmedian(values, *, axis: AxisInt | None = None, skipna: bool = True, mask=None): 

745 """ 

746 Parameters 

747 ---------- 

748 values : ndarray 

749 axis : int, optional 

750 skipna : bool, default True 

751 mask : ndarray[bool], optional 

752 nan-mask if known 

753 

754 Returns 

755 ------- 

756 result : float 

757 Unless input is a float array, in which case use the same 

758 precision as the input array. 

759 

760 Examples 

761 -------- 

762 >>> from pandas.core import nanops 

763 >>> s = pd.Series([1, np.nan, 2, 2]) 

764 >>> nanops.nanmedian(s) 

765 2.0 

766 """ 

767 

768 def get_median(x, _mask=None): 

769 if _mask is None: 

770 _mask = notna(x) 

771 else: 

772 _mask = ~_mask 

773 if not skipna and not _mask.all(): 

774 return np.nan 

775 with warnings.catch_warnings(): 

776 # Suppress RuntimeWarning about All-NaN slice 

777 warnings.filterwarnings( 

778 "ignore", "All-NaN slice encountered", RuntimeWarning 

779 ) 

780 res = np.nanmedian(x[_mask]) 

781 return res 

782 

783 values, mask, dtype, _, _ = _get_values(values, skipna, mask=mask, fill_value=0) 

784 if not is_float_dtype(values.dtype): 

785 try: 

786 values = values.astype("f8") 

787 except ValueError as err: 

788 # e.g. "could not convert string to float: 'a'" 

789 raise TypeError(str(err)) from err 

790 if mask is not None: 

791 values[mask] = np.nan 

792 

793 notempty = values.size 

794 

795 # an array from a frame 

796 if values.ndim > 1 and axis is not None: 

797 # there's a non-empty array to apply over otherwise numpy raises 

798 if notempty: 

799 if not skipna: 

800 res = np.apply_along_axis(get_median, axis, values) 

801 

802 else: 

803 # fastpath for the skipna case 

804 with warnings.catch_warnings(): 

805 # Suppress RuntimeWarning about All-NaN slice 

806 warnings.filterwarnings( 

807 "ignore", "All-NaN slice encountered", RuntimeWarning 

808 ) 

809 res = np.nanmedian(values, axis) 

810 

811 else: 

812 # must return the correct shape, but median is not defined for the 

813 # empty set so return nans of shape "everything but the passed axis" 

814 # since "axis" is where the reduction would occur if we had a nonempty 

815 # array 

816 res = get_empty_reduction_result(values.shape, axis, np.float_, np.nan) 

817 

818 else: 

819 # otherwise return a scalar value 

820 res = get_median(values, mask) if notempty else np.nan 

821 return _wrap_results(res, dtype) 

822 

823 

824def get_empty_reduction_result( 

825 shape: tuple[int, ...], 

826 axis: AxisInt, 

827 dtype: np.dtype | type[np.floating], 

828 fill_value: Any, 

829) -> np.ndarray: 

830 """ 

831 The result from a reduction on an empty ndarray. 

832 

833 Parameters 

834 ---------- 

835 shape : Tuple[int] 

836 axis : int 

837 dtype : np.dtype 

838 fill_value : Any 

839 

840 Returns 

841 ------- 

842 np.ndarray 

843 """ 

844 shp = np.array(shape) 

845 dims = np.arange(len(shape)) 

846 ret = np.empty(shp[dims != axis], dtype=dtype) 

847 ret.fill(fill_value) 

848 return ret 

849 

850 

851def _get_counts_nanvar( 

852 values_shape: Shape, 

853 mask: npt.NDArray[np.bool_] | None, 

854 axis: AxisInt | None, 

855 ddof: int, 

856 dtype: np.dtype = np.dtype(np.float64), 

857) -> tuple[float | np.ndarray, float | np.ndarray]: 

858 """ 

859 Get the count of non-null values along an axis, accounting 

860 for degrees of freedom. 

861 

862 Parameters 

863 ---------- 

864 values_shape : Tuple[int, ...] 

865 shape tuple from values ndarray, used if mask is None 

866 mask : Optional[ndarray[bool]] 

867 locations in values that should be considered missing 

868 axis : Optional[int] 

869 axis to count along 

870 ddof : int 

871 degrees of freedom 

872 dtype : type, optional 

873 type to use for count 

874 

875 Returns 

876 ------- 

877 count : int, np.nan or np.ndarray 

878 d : int, np.nan or np.ndarray 

879 """ 

880 count = _get_counts(values_shape, mask, axis, dtype=dtype) 

881 d = count - dtype.type(ddof) 

882 

883 # always return NaN, never inf 

884 if is_scalar(count): 

885 if count <= ddof: 

886 count = np.nan 

887 d = np.nan 

888 else: 

889 # count is not narrowed by is_scalar check 

890 count = cast(np.ndarray, count) 

891 mask = count <= ddof 

892 if mask.any(): 

893 np.putmask(d, mask, np.nan) 

894 np.putmask(count, mask, np.nan) 

895 return count, d 

896 

897 

898@bottleneck_switch(ddof=1) 

899def nanstd( 

900 values, 

901 *, 

902 axis: AxisInt | None = None, 

903 skipna: bool = True, 

904 ddof: int = 1, 

905 mask=None, 

906): 

907 """ 

908 Compute the standard deviation along given axis while ignoring NaNs 

909 

910 Parameters 

911 ---------- 

912 values : ndarray 

913 axis : int, optional 

914 skipna : bool, default True 

915 ddof : int, default 1 

916 Delta Degrees of Freedom. The divisor used in calculations is N - ddof, 

917 where N represents the number of elements. 

918 mask : ndarray[bool], optional 

919 nan-mask if known 

920 

921 Returns 

922 ------- 

923 result : float 

924 Unless input is a float array, in which case use the same 

925 precision as the input array. 

926 

927 Examples 

928 -------- 

929 >>> from pandas.core import nanops 

930 >>> s = pd.Series([1, np.nan, 2, 3]) 

931 >>> nanops.nanstd(s) 

932 1.0 

933 """ 

934 if values.dtype == "M8[ns]": 

935 values = values.view("m8[ns]") 

936 

937 orig_dtype = values.dtype 

938 values, mask, _, _, _ = _get_values(values, skipna, mask=mask) 

939 

940 result = np.sqrt(nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask)) 

941 return _wrap_results(result, orig_dtype) 

942 

943 

944@disallow("M8", "m8") 

945@bottleneck_switch(ddof=1) 

946def nanvar( 

947 values, 

948 *, 

949 axis: AxisInt | None = None, 

950 skipna: bool = True, 

951 ddof: int = 1, 

952 mask=None, 

953): 

954 """ 

955 Compute the variance along given axis while ignoring NaNs 

956 

957 Parameters 

958 ---------- 

959 values : ndarray 

960 axis : int, optional 

961 skipna : bool, default True 

962 ddof : int, default 1 

963 Delta Degrees of Freedom. The divisor used in calculations is N - ddof, 

964 where N represents the number of elements. 

965 mask : ndarray[bool], optional 

966 nan-mask if known 

967 

968 Returns 

969 ------- 

970 result : float 

971 Unless input is a float array, in which case use the same 

972 precision as the input array. 

973 

974 Examples 

975 -------- 

976 >>> from pandas.core import nanops 

977 >>> s = pd.Series([1, np.nan, 2, 3]) 

978 >>> nanops.nanvar(s) 

979 1.0 

980 """ 

981 values = extract_array(values, extract_numpy=True) 

982 dtype = values.dtype 

983 mask = _maybe_get_mask(values, skipna, mask) 

984 if is_any_int_dtype(dtype): 

985 values = values.astype("f8") 

986 if mask is not None: 

987 values[mask] = np.nan 

988 

989 if is_float_dtype(values.dtype): 

990 count, d = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype) 

991 else: 

992 count, d = _get_counts_nanvar(values.shape, mask, axis, ddof) 

993 

994 if skipna and mask is not None: 

995 values = values.copy() 

996 np.putmask(values, mask, 0) 

997 

998 # xref GH10242 

999 # Compute variance via two-pass algorithm, which is stable against 

1000 # cancellation errors and relatively accurate for small numbers of 

1001 # observations. 

1002 # 

1003 # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance 

1004 avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count 

1005 if axis is not None: 

1006 avg = np.expand_dims(avg, axis) 

1007 sqr = _ensure_numeric((avg - values) ** 2) 

1008 if mask is not None: 

1009 np.putmask(sqr, mask, 0) 

1010 result = sqr.sum(axis=axis, dtype=np.float64) / d 

1011 

1012 # Return variance as np.float64 (the datatype used in the accumulator), 

1013 # unless we were dealing with a float array, in which case use the same 

1014 # precision as the original values array. 

1015 if is_float_dtype(dtype): 

1016 result = result.astype(dtype, copy=False) 

1017 return result 

1018 

1019 

1020@disallow("M8", "m8") 

1021def nansem( 

1022 values: np.ndarray, 

1023 *, 

1024 axis: AxisInt | None = None, 

1025 skipna: bool = True, 

1026 ddof: int = 1, 

1027 mask: npt.NDArray[np.bool_] | None = None, 

1028) -> float: 

1029 """ 

1030 Compute the standard error in the mean along given axis while ignoring NaNs 

1031 

1032 Parameters 

1033 ---------- 

1034 values : ndarray 

1035 axis : int, optional 

1036 skipna : bool, default True 

1037 ddof : int, default 1 

1038 Delta Degrees of Freedom. The divisor used in calculations is N - ddof, 

1039 where N represents the number of elements. 

1040 mask : ndarray[bool], optional 

1041 nan-mask if known 

1042 

1043 Returns 

1044 ------- 

1045 result : float64 

1046 Unless input is a float array, in which case use the same 

1047 precision as the input array. 

1048 

1049 Examples 

1050 -------- 

1051 >>> from pandas.core import nanops 

1052 >>> s = pd.Series([1, np.nan, 2, 3]) 

1053 >>> nanops.nansem(s) 

1054 0.5773502691896258 

1055 """ 

1056 # This checks if non-numeric-like data is passed with numeric_only=False 

1057 # and raises a TypeError otherwise 

1058 nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask) 

1059 

1060 mask = _maybe_get_mask(values, skipna, mask) 

1061 if not is_float_dtype(values.dtype): 

1062 values = values.astype("f8") 

1063 

1064 if not skipna and mask is not None and mask.any(): 

1065 return np.nan 

1066 

1067 count, _ = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype) 

1068 var = nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask) 

1069 

1070 return np.sqrt(var) / np.sqrt(count) 

1071 

1072 

1073def _nanminmax(meth, fill_value_typ): 

1074 @bottleneck_switch(name=f"nan{meth}") 

1075 @_datetimelike_compat 

1076 def reduction( 

1077 values: np.ndarray, 

1078 *, 

1079 axis: AxisInt | None = None, 

1080 skipna: bool = True, 

1081 mask: npt.NDArray[np.bool_] | None = None, 

1082 ) -> Dtype: 

1083 values, mask, dtype, dtype_max, fill_value = _get_values( 

1084 values, skipna, fill_value_typ=fill_value_typ, mask=mask 

1085 ) 

1086 

1087 if (axis is not None and values.shape[axis] == 0) or values.size == 0: 

1088 try: 

1089 result = getattr(values, meth)(axis, dtype=dtype_max) 

1090 result.fill(np.nan) 

1091 except (AttributeError, TypeError, ValueError): 

1092 result = np.nan 

1093 else: 

1094 result = getattr(values, meth)(axis) 

1095 

1096 result = _maybe_null_out(result, axis, mask, values.shape) 

1097 return result 

1098 

1099 return reduction 

1100 

1101 

1102nanmin = _nanminmax("min", fill_value_typ="+inf") 

1103nanmax = _nanminmax("max", fill_value_typ="-inf") 

1104 

1105 

1106@disallow("O") 

1107def nanargmax( 

1108 values: np.ndarray, 

1109 *, 

1110 axis: AxisInt | None = None, 

1111 skipna: bool = True, 

1112 mask: npt.NDArray[np.bool_] | None = None, 

1113) -> int | np.ndarray: 

1114 """ 

1115 Parameters 

1116 ---------- 

1117 values : ndarray 

1118 axis : int, optional 

1119 skipna : bool, default True 

1120 mask : ndarray[bool], optional 

1121 nan-mask if known 

1122 

1123 Returns 

1124 ------- 

1125 result : int or ndarray[int] 

1126 The index/indices of max value in specified axis or -1 in the NA case 

1127 

1128 Examples 

1129 -------- 

1130 >>> from pandas.core import nanops 

1131 >>> arr = np.array([1, 2, 3, np.nan, 4]) 

1132 >>> nanops.nanargmax(arr) 

1133 4 

1134 

1135 >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3) 

1136 >>> arr[2:, 2] = np.nan 

1137 >>> arr 

1138 array([[ 0., 1., 2.], 

1139 [ 3., 4., 5.], 

1140 [ 6., 7., nan], 

1141 [ 9., 10., nan]]) 

1142 >>> nanops.nanargmax(arr, axis=1) 

1143 array([2, 2, 1, 1]) 

1144 """ 

1145 values, mask, _, _, _ = _get_values(values, True, fill_value_typ="-inf", mask=mask) 

1146 # error: Need type annotation for 'result' 

1147 result = values.argmax(axis) # type: ignore[var-annotated] 

1148 result = _maybe_arg_null_out(result, axis, mask, skipna) 

1149 return result 

1150 

1151 

1152@disallow("O") 

1153def nanargmin( 

1154 values: np.ndarray, 

1155 *, 

1156 axis: AxisInt | None = None, 

1157 skipna: bool = True, 

1158 mask: npt.NDArray[np.bool_] | None = None, 

1159) -> int | np.ndarray: 

1160 """ 

1161 Parameters 

1162 ---------- 

1163 values : ndarray 

1164 axis : int, optional 

1165 skipna : bool, default True 

1166 mask : ndarray[bool], optional 

1167 nan-mask if known 

1168 

1169 Returns 

1170 ------- 

1171 result : int or ndarray[int] 

1172 The index/indices of min value in specified axis or -1 in the NA case 

1173 

1174 Examples 

1175 -------- 

1176 >>> from pandas.core import nanops 

1177 >>> arr = np.array([1, 2, 3, np.nan, 4]) 

1178 >>> nanops.nanargmin(arr) 

1179 0 

1180 

1181 >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3) 

1182 >>> arr[2:, 0] = np.nan 

1183 >>> arr 

1184 array([[ 0., 1., 2.], 

1185 [ 3., 4., 5.], 

1186 [nan, 7., 8.], 

1187 [nan, 10., 11.]]) 

1188 >>> nanops.nanargmin(arr, axis=1) 

1189 array([0, 0, 1, 1]) 

1190 """ 

1191 values, mask, _, _, _ = _get_values(values, True, fill_value_typ="+inf", mask=mask) 

1192 # error: Need type annotation for 'result' 

1193 result = values.argmin(axis) # type: ignore[var-annotated] 

1194 result = _maybe_arg_null_out(result, axis, mask, skipna) 

1195 return result 

1196 

1197 

1198@disallow("M8", "m8") 

1199@maybe_operate_rowwise 

1200def nanskew( 

1201 values: np.ndarray, 

1202 *, 

1203 axis: AxisInt | None = None, 

1204 skipna: bool = True, 

1205 mask: npt.NDArray[np.bool_] | None = None, 

1206) -> float: 

1207 """ 

1208 Compute the sample skewness. 

1209 

1210 The statistic computed here is the adjusted Fisher-Pearson standardized 

1211 moment coefficient G1. The algorithm computes this coefficient directly 

1212 from the second and third central moment. 

1213 

1214 Parameters 

1215 ---------- 

1216 values : ndarray 

1217 axis : int, optional 

1218 skipna : bool, default True 

1219 mask : ndarray[bool], optional 

1220 nan-mask if known 

1221 

1222 Returns 

1223 ------- 

1224 result : float64 

1225 Unless input is a float array, in which case use the same 

1226 precision as the input array. 

1227 

1228 Examples 

1229 -------- 

1230 >>> from pandas.core import nanops 

1231 >>> s = pd.Series([1, np.nan, 1, 2]) 

1232 >>> nanops.nanskew(s) 

1233 1.7320508075688787 

1234 """ 

1235 # error: Incompatible types in assignment (expression has type "Union[Any, 

1236 # Union[ExtensionArray, ndarray]]", variable has type "ndarray") 

1237 values = extract_array(values, extract_numpy=True) # type: ignore[assignment] 

1238 mask = _maybe_get_mask(values, skipna, mask) 

1239 if not is_float_dtype(values.dtype): 

1240 values = values.astype("f8") 

1241 count = _get_counts(values.shape, mask, axis) 

1242 else: 

1243 count = _get_counts(values.shape, mask, axis, dtype=values.dtype) 

1244 

1245 if skipna and mask is not None: 

1246 values = values.copy() 

1247 np.putmask(values, mask, 0) 

1248 elif not skipna and mask is not None and mask.any(): 

1249 return np.nan 

1250 

1251 mean = values.sum(axis, dtype=np.float64) / count 

1252 if axis is not None: 

1253 mean = np.expand_dims(mean, axis) 

1254 

1255 adjusted = values - mean 

1256 if skipna and mask is not None: 

1257 np.putmask(adjusted, mask, 0) 

1258 adjusted2 = adjusted**2 

1259 adjusted3 = adjusted2 * adjusted 

1260 m2 = adjusted2.sum(axis, dtype=np.float64) 

1261 m3 = adjusted3.sum(axis, dtype=np.float64) 

1262 

1263 # floating point error 

1264 # 

1265 # #18044 in _libs/windows.pyx calc_skew follow this behavior 

1266 # to fix the fperr to treat m2 <1e-14 as zero 

1267 m2 = _zero_out_fperr(m2) 

1268 m3 = _zero_out_fperr(m3) 

1269 

1270 with np.errstate(invalid="ignore", divide="ignore"): 

1271 result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2**1.5) 

1272 

1273 dtype = values.dtype 

1274 if is_float_dtype(dtype): 

1275 result = result.astype(dtype, copy=False) 

1276 

1277 if isinstance(result, np.ndarray): 

1278 result = np.where(m2 == 0, 0, result) 

1279 result[count < 3] = np.nan 

1280 else: 

1281 result = 0 if m2 == 0 else result 

1282 if count < 3: 

1283 return np.nan 

1284 

1285 return result 

1286 

1287 

1288@disallow("M8", "m8") 

1289@maybe_operate_rowwise 

1290def nankurt( 

1291 values: np.ndarray, 

1292 *, 

1293 axis: AxisInt | None = None, 

1294 skipna: bool = True, 

1295 mask: npt.NDArray[np.bool_] | None = None, 

1296) -> float: 

1297 """ 

1298 Compute the sample excess kurtosis 

1299 

1300 The statistic computed here is the adjusted Fisher-Pearson standardized 

1301 moment coefficient G2, computed directly from the second and fourth 

1302 central moment. 

1303 

1304 Parameters 

1305 ---------- 

1306 values : ndarray 

1307 axis : int, optional 

1308 skipna : bool, default True 

1309 mask : ndarray[bool], optional 

1310 nan-mask if known 

1311 

1312 Returns 

1313 ------- 

1314 result : float64 

1315 Unless input is a float array, in which case use the same 

1316 precision as the input array. 

1317 

1318 Examples 

1319 -------- 

1320 >>> from pandas.core import nanops 

1321 >>> s = pd.Series([1, np.nan, 1, 3, 2]) 

1322 >>> nanops.nankurt(s) 

1323 -1.2892561983471076 

1324 """ 

1325 # error: Incompatible types in assignment (expression has type "Union[Any, 

1326 # Union[ExtensionArray, ndarray]]", variable has type "ndarray") 

1327 values = extract_array(values, extract_numpy=True) # type: ignore[assignment] 

1328 mask = _maybe_get_mask(values, skipna, mask) 

1329 if not is_float_dtype(values.dtype): 

1330 values = values.astype("f8") 

1331 count = _get_counts(values.shape, mask, axis) 

1332 else: 

1333 count = _get_counts(values.shape, mask, axis, dtype=values.dtype) 

1334 

1335 if skipna and mask is not None: 

1336 values = values.copy() 

1337 np.putmask(values, mask, 0) 

1338 elif not skipna and mask is not None and mask.any(): 

1339 return np.nan 

1340 

1341 mean = values.sum(axis, dtype=np.float64) / count 

1342 if axis is not None: 

1343 mean = np.expand_dims(mean, axis) 

1344 

1345 adjusted = values - mean 

1346 if skipna and mask is not None: 

1347 np.putmask(adjusted, mask, 0) 

1348 adjusted2 = adjusted**2 

1349 adjusted4 = adjusted2**2 

1350 m2 = adjusted2.sum(axis, dtype=np.float64) 

1351 m4 = adjusted4.sum(axis, dtype=np.float64) 

1352 

1353 with np.errstate(invalid="ignore", divide="ignore"): 

1354 adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3)) 

1355 numerator = count * (count + 1) * (count - 1) * m4 

1356 denominator = (count - 2) * (count - 3) * m2**2 

1357 

1358 # floating point error 

1359 # 

1360 # #18044 in _libs/windows.pyx calc_kurt follow this behavior 

1361 # to fix the fperr to treat denom <1e-14 as zero 

1362 numerator = _zero_out_fperr(numerator) 

1363 denominator = _zero_out_fperr(denominator) 

1364 

1365 if not isinstance(denominator, np.ndarray): 

1366 # if ``denom`` is a scalar, check these corner cases first before 

1367 # doing division 

1368 if count < 4: 

1369 return np.nan 

1370 if denominator == 0: 

1371 return 0 

1372 

1373 with np.errstate(invalid="ignore", divide="ignore"): 

1374 result = numerator / denominator - adj 

1375 

1376 dtype = values.dtype 

1377 if is_float_dtype(dtype): 

1378 result = result.astype(dtype, copy=False) 

1379 

1380 if isinstance(result, np.ndarray): 

1381 result = np.where(denominator == 0, 0, result) 

1382 result[count < 4] = np.nan 

1383 

1384 return result 

1385 

1386 

1387@disallow("M8", "m8") 

1388@maybe_operate_rowwise 

1389def nanprod( 

1390 values: np.ndarray, 

1391 *, 

1392 axis: AxisInt | None = None, 

1393 skipna: bool = True, 

1394 min_count: int = 0, 

1395 mask: npt.NDArray[np.bool_] | None = None, 

1396) -> float: 

1397 """ 

1398 Parameters 

1399 ---------- 

1400 values : ndarray[dtype] 

1401 axis : int, optional 

1402 skipna : bool, default True 

1403 min_count: int, default 0 

1404 mask : ndarray[bool], optional 

1405 nan-mask if known 

1406 

1407 Returns 

1408 ------- 

1409 Dtype 

1410 The product of all elements on a given axis. ( NaNs are treated as 1) 

1411 

1412 Examples 

1413 -------- 

1414 >>> from pandas.core import nanops 

1415 >>> s = pd.Series([1, 2, 3, np.nan]) 

1416 >>> nanops.nanprod(s) 

1417 6.0 

1418 """ 

1419 mask = _maybe_get_mask(values, skipna, mask) 

1420 

1421 if skipna and mask is not None: 

1422 values = values.copy() 

1423 values[mask] = 1 

1424 result = values.prod(axis) 

1425 # error: Incompatible return value type (got "Union[ndarray, float]", expected 

1426 # "float") 

1427 return _maybe_null_out( # type: ignore[return-value] 

1428 result, axis, mask, values.shape, min_count=min_count 

1429 ) 

1430 

1431 

1432def _maybe_arg_null_out( 

1433 result: np.ndarray, 

1434 axis: AxisInt | None, 

1435 mask: npt.NDArray[np.bool_] | None, 

1436 skipna: bool, 

1437) -> np.ndarray | int: 

1438 # helper function for nanargmin/nanargmax 

1439 if mask is None: 

1440 return result 

1441 

1442 if axis is None or not getattr(result, "ndim", False): 

1443 if skipna: 

1444 if mask.all(): 

1445 return -1 

1446 else: 

1447 if mask.any(): 

1448 return -1 

1449 else: 

1450 if skipna: 

1451 na_mask = mask.all(axis) 

1452 else: 

1453 na_mask = mask.any(axis) 

1454 if na_mask.any(): 

1455 result[na_mask] = -1 

1456 return result 

1457 

1458 

1459def _get_counts( 

1460 values_shape: Shape, 

1461 mask: npt.NDArray[np.bool_] | None, 

1462 axis: AxisInt | None, 

1463 dtype: np.dtype = np.dtype(np.float64), 

1464) -> float | np.ndarray: 

1465 """ 

1466 Get the count of non-null values along an axis 

1467 

1468 Parameters 

1469 ---------- 

1470 values_shape : tuple of int 

1471 shape tuple from values ndarray, used if mask is None 

1472 mask : Optional[ndarray[bool]] 

1473 locations in values that should be considered missing 

1474 axis : Optional[int] 

1475 axis to count along 

1476 dtype : type, optional 

1477 type to use for count 

1478 

1479 Returns 

1480 ------- 

1481 count : scalar or array 

1482 """ 

1483 if axis is None: 

1484 if mask is not None: 

1485 n = mask.size - mask.sum() 

1486 else: 

1487 n = np.prod(values_shape) 

1488 return dtype.type(n) 

1489 

1490 if mask is not None: 

1491 count = mask.shape[axis] - mask.sum(axis) 

1492 else: 

1493 count = values_shape[axis] 

1494 

1495 if is_scalar(count): 

1496 return dtype.type(count) 

1497 return count.astype(dtype, copy=False) 

1498 

1499 

1500def _maybe_null_out( 

1501 result: np.ndarray | float | NaTType, 

1502 axis: AxisInt | None, 

1503 mask: npt.NDArray[np.bool_] | None, 

1504 shape: tuple[int, ...], 

1505 min_count: int = 1, 

1506) -> np.ndarray | float | NaTType: 

1507 """ 

1508 Returns 

1509 ------- 

1510 Dtype 

1511 The product of all elements on a given axis. ( NaNs are treated as 1) 

1512 """ 

1513 if mask is None and min_count == 0: 

1514 # nothing to check; short-circuit 

1515 return result 

1516 

1517 if axis is not None and isinstance(result, np.ndarray): 

1518 if mask is not None: 

1519 null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0 

1520 else: 

1521 # we have no nulls, kept mask=None in _maybe_get_mask 

1522 below_count = shape[axis] - min_count < 0 

1523 new_shape = shape[:axis] + shape[axis + 1 :] 

1524 null_mask = np.broadcast_to(below_count, new_shape) 

1525 

1526 if np.any(null_mask): 

1527 if is_numeric_dtype(result): 

1528 if np.iscomplexobj(result): 

1529 result = result.astype("c16") 

1530 elif not is_float_dtype(result): 

1531 result = result.astype("f8", copy=False) 

1532 result[null_mask] = np.nan 

1533 else: 

1534 # GH12941, use None to auto cast null 

1535 result[null_mask] = None 

1536 elif result is not NaT: 

1537 if check_below_min_count(shape, mask, min_count): 

1538 result_dtype = getattr(result, "dtype", None) 

1539 if is_float_dtype(result_dtype): 

1540 # error: Item "None" of "Optional[Any]" has no attribute "type" 

1541 result = result_dtype.type("nan") # type: ignore[union-attr] 

1542 else: 

1543 result = np.nan 

1544 

1545 return result 

1546 

1547 

1548def check_below_min_count( 

1549 shape: tuple[int, ...], mask: npt.NDArray[np.bool_] | None, min_count: int 

1550) -> bool: 

1551 """ 

1552 Check for the `min_count` keyword. Returns True if below `min_count` (when 

1553 missing value should be returned from the reduction). 

1554 

1555 Parameters 

1556 ---------- 

1557 shape : tuple 

1558 The shape of the values (`values.shape`). 

1559 mask : ndarray[bool] or None 

1560 Boolean numpy array (typically of same shape as `shape`) or None. 

1561 min_count : int 

1562 Keyword passed through from sum/prod call. 

1563 

1564 Returns 

1565 ------- 

1566 bool 

1567 """ 

1568 if min_count > 0: 

1569 if mask is None: 

1570 # no missing values, only check size 

1571 non_nulls = np.prod(shape) 

1572 else: 

1573 non_nulls = mask.size - mask.sum() 

1574 if non_nulls < min_count: 

1575 return True 

1576 return False 

1577 

1578 

1579def _zero_out_fperr(arg): 

1580 # #18044 reference this behavior to fix rolling skew/kurt issue 

1581 if isinstance(arg, np.ndarray): 

1582 with np.errstate(invalid="ignore"): 

1583 return np.where(np.abs(arg) < 1e-14, 0, arg) 

1584 else: 

1585 return arg.dtype.type(0) if np.abs(arg) < 1e-14 else arg 

1586 

1587 

1588@disallow("M8", "m8") 

1589def nancorr( 

1590 a: np.ndarray, 

1591 b: np.ndarray, 

1592 *, 

1593 method: CorrelationMethod = "pearson", 

1594 min_periods: int | None = None, 

1595) -> float: 

1596 """ 

1597 a, b: ndarrays 

1598 """ 

1599 if len(a) != len(b): 

1600 raise AssertionError("Operands to nancorr must have same size") 

1601 

1602 if min_periods is None: 

1603 min_periods = 1 

1604 

1605 valid = notna(a) & notna(b) 

1606 if not valid.all(): 

1607 a = a[valid] 

1608 b = b[valid] 

1609 

1610 if len(a) < min_periods: 

1611 return np.nan 

1612 

1613 f = get_corr_func(method) 

1614 return f(a, b) 

1615 

1616 

1617def get_corr_func( 

1618 method: CorrelationMethod, 

1619) -> Callable[[np.ndarray, np.ndarray], float]: 

1620 if method == "kendall": 

1621 from scipy.stats import kendalltau 

1622 

1623 def func(a, b): 

1624 return kendalltau(a, b)[0] 

1625 

1626 return func 

1627 elif method == "spearman": 

1628 from scipy.stats import spearmanr 

1629 

1630 def func(a, b): 

1631 return spearmanr(a, b)[0] 

1632 

1633 return func 

1634 elif method == "pearson": 

1635 

1636 def func(a, b): 

1637 return np.corrcoef(a, b)[0, 1] 

1638 

1639 return func 

1640 elif callable(method): 

1641 return method 

1642 

1643 raise ValueError( 

1644 f"Unknown method '{method}', expected one of " 

1645 "'kendall', 'spearman', 'pearson', or callable" 

1646 ) 

1647 

1648 

1649@disallow("M8", "m8") 

1650def nancov( 

1651 a: np.ndarray, 

1652 b: np.ndarray, 

1653 *, 

1654 min_periods: int | None = None, 

1655 ddof: int | None = 1, 

1656) -> float: 

1657 if len(a) != len(b): 

1658 raise AssertionError("Operands to nancov must have same size") 

1659 

1660 if min_periods is None: 

1661 min_periods = 1 

1662 

1663 valid = notna(a) & notna(b) 

1664 if not valid.all(): 

1665 a = a[valid] 

1666 b = b[valid] 

1667 

1668 if len(a) < min_periods: 

1669 return np.nan 

1670 

1671 return np.cov(a, b, ddof=ddof)[0, 1] 

1672 

1673 

1674def _ensure_numeric(x): 

1675 if isinstance(x, np.ndarray): 

1676 if is_integer_dtype(x) or is_bool_dtype(x): 

1677 x = x.astype(np.float64) 

1678 elif is_object_dtype(x): 

1679 try: 

1680 x = x.astype(np.complex128) 

1681 except (TypeError, ValueError): 

1682 try: 

1683 x = x.astype(np.float64) 

1684 except ValueError as err: 

1685 # GH#29941 we get here with object arrays containing strs 

1686 raise TypeError(f"Could not convert {x} to numeric") from err 

1687 else: 

1688 if not np.any(np.imag(x)): 

1689 x = x.real 

1690 elif not (is_float(x) or is_integer(x) or is_complex(x)): 

1691 try: 

1692 x = float(x) 

1693 except (TypeError, ValueError): 

1694 # e.g. "1+1j" or "foo" 

1695 try: 

1696 x = complex(x) 

1697 except ValueError as err: 

1698 # e.g. "foo" 

1699 raise TypeError(f"Could not convert {x} to numeric") from err 

1700 return x 

1701 

1702 

1703# NA-friendly array comparisons 

1704 

1705 

1706def make_nancomp(op): 

1707 def f(x, y): 

1708 xmask = isna(x) 

1709 ymask = isna(y) 

1710 mask = xmask | ymask 

1711 

1712 with np.errstate(all="ignore"): 

1713 result = op(x, y) 

1714 

1715 if mask.any(): 

1716 if is_bool_dtype(result): 

1717 result = result.astype("O") 

1718 np.putmask(result, mask, np.nan) 

1719 

1720 return result 

1721 

1722 return f 

1723 

1724 

1725nangt = make_nancomp(operator.gt) 

1726nange = make_nancomp(operator.ge) 

1727nanlt = make_nancomp(operator.lt) 

1728nanle = make_nancomp(operator.le) 

1729naneq = make_nancomp(operator.eq) 

1730nanne = make_nancomp(operator.ne) 

1731 

1732 

1733def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike: 

1734 """ 

1735 Cumulative function with skipna support. 

1736 

1737 Parameters 

1738 ---------- 

1739 values : np.ndarray or ExtensionArray 

1740 accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minimum.accumulate} 

1741 skipna : bool 

1742 

1743 Returns 

1744 ------- 

1745 np.ndarray or ExtensionArray 

1746 """ 

1747 mask_a, mask_b = { 

1748 np.cumprod: (1.0, np.nan), 

1749 np.maximum.accumulate: (-np.inf, np.nan), 

1750 np.cumsum: (0.0, np.nan), 

1751 np.minimum.accumulate: (np.inf, np.nan), 

1752 }[accum_func] 

1753 

1754 # This should go through ea interface 

1755 assert values.dtype.kind not in ["m", "M"] 

1756 

1757 # We will be applying this function to block values 

1758 if skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)): 

1759 vals = values.copy() 

1760 mask = isna(vals) 

1761 vals[mask] = mask_a 

1762 result = accum_func(vals, axis=0) 

1763 result[mask] = mask_b 

1764 else: 

1765 result = accum_func(values, axis=0) 

1766 

1767 return result