1from __future__ import annotations
2
3import functools
4import itertools
5from typing import (
6 Any,
7 Callable,
8 cast,
9)
10import warnings
11
12import numpy as np
13
14from pandas._config import get_option
15
16from pandas._libs import (
17 NaT,
18 NaTType,
19 iNaT,
20 lib,
21)
22from pandas._typing import (
23 ArrayLike,
24 AxisInt,
25 CorrelationMethod,
26 Dtype,
27 DtypeObj,
28 F,
29 Scalar,
30 Shape,
31 npt,
32)
33from pandas.compat._optional import import_optional_dependency
34from pandas.util._exceptions import find_stack_level
35
36from pandas.core.dtypes.common import (
37 is_complex,
38 is_float,
39 is_float_dtype,
40 is_integer,
41 is_numeric_dtype,
42 is_object_dtype,
43 needs_i8_conversion,
44 pandas_dtype,
45)
46from pandas.core.dtypes.missing import (
47 isna,
48 na_value_for_dtype,
49 notna,
50)
51
52bn = import_optional_dependency("bottleneck", errors="warn")
53_BOTTLENECK_INSTALLED = bn is not None
54_USE_BOTTLENECK = False
55
56
57def set_use_bottleneck(v: bool = True) -> None:
58 # set/unset to use bottleneck
59 global _USE_BOTTLENECK
60 if _BOTTLENECK_INSTALLED:
61 _USE_BOTTLENECK = v
62
63
64set_use_bottleneck(get_option("compute.use_bottleneck"))
65
66
67class disallow:
68 def __init__(self, *dtypes: Dtype) -> None:
69 super().__init__()
70 self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes)
71
72 def check(self, obj) -> bool:
73 return hasattr(obj, "dtype") and issubclass(obj.dtype.type, self.dtypes)
74
75 def __call__(self, f: F) -> F:
76 @functools.wraps(f)
77 def _f(*args, **kwargs):
78 obj_iter = itertools.chain(args, kwargs.values())
79 if any(self.check(obj) for obj in obj_iter):
80 f_name = f.__name__.replace("nan", "")
81 raise TypeError(
82 f"reduction operation '{f_name}' not allowed for this dtype"
83 )
84 try:
85 return f(*args, **kwargs)
86 except ValueError as e:
87 # we want to transform an object array
88 # ValueError message to the more typical TypeError
89 # e.g. this is normally a disallowed function on
90 # object arrays that contain strings
91 if is_object_dtype(args[0]):
92 raise TypeError(e) from e
93 raise
94
95 return cast(F, _f)
96
97
98class bottleneck_switch:
99 def __init__(self, name=None, **kwargs) -> None:
100 self.name = name
101 self.kwargs = kwargs
102
103 def __call__(self, alt: F) -> F:
104 bn_name = self.name or alt.__name__
105
106 try:
107 bn_func = getattr(bn, bn_name)
108 except (AttributeError, NameError): # pragma: no cover
109 bn_func = None
110
111 @functools.wraps(alt)
112 def f(
113 values: np.ndarray,
114 *,
115 axis: AxisInt | None = None,
116 skipna: bool = True,
117 **kwds,
118 ):
119 if len(self.kwargs) > 0:
120 for k, v in self.kwargs.items():
121 if k not in kwds:
122 kwds[k] = v
123
124 if values.size == 0 and kwds.get("min_count") is None:
125 # We are empty, returning NA for our type
126 # Only applies for the default `min_count` of None
127 # since that affects how empty arrays are handled.
128 # TODO(GH-18976) update all the nanops methods to
129 # correctly handle empty inputs and remove this check.
130 # It *may* just be `var`
131 return _na_for_min_count(values, axis)
132
133 if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name):
134 if kwds.get("mask", None) is None:
135 # `mask` is not recognised by bottleneck, would raise
136 # TypeError if called
137 kwds.pop("mask", None)
138 result = bn_func(values, axis=axis, **kwds)
139
140 # prefer to treat inf/-inf as NA, but must compute the func
141 # twice :(
142 if _has_infs(result):
143 result = alt(values, axis=axis, skipna=skipna, **kwds)
144 else:
145 result = alt(values, axis=axis, skipna=skipna, **kwds)
146 else:
147 result = alt(values, axis=axis, skipna=skipna, **kwds)
148
149 return result
150
151 return cast(F, f)
152
153
154def _bn_ok_dtype(dtype: DtypeObj, name: str) -> bool:
155 # Bottleneck chokes on datetime64, PeriodDtype (or and EA)
156 if dtype != object and not needs_i8_conversion(dtype):
157 # GH 42878
158 # Bottleneck uses naive summation leading to O(n) loss of precision
159 # unlike numpy which implements pairwise summation, which has O(log(n)) loss
160 # crossref: https://github.com/pydata/bottleneck/issues/379
161
162 # GH 15507
163 # bottleneck does not properly upcast during the sum
164 # so can overflow
165
166 # GH 9422
167 # further we also want to preserve NaN when all elements
168 # are NaN, unlike bottleneck/numpy which consider this
169 # to be 0
170 return name not in ["nansum", "nanprod", "nanmean"]
171 return False
172
173
174def _has_infs(result) -> bool:
175 if isinstance(result, np.ndarray):
176 if result.dtype in ("f8", "f4"):
177 # Note: outside of an nanops-specific test, we always have
178 # result.ndim == 1, so there is no risk of this ravel making a copy.
179 return lib.has_infs(result.ravel("K"))
180 try:
181 return np.isinf(result).any()
182 except (TypeError, NotImplementedError):
183 # if it doesn't support infs, then it can't have infs
184 return False
185
186
187def _get_fill_value(
188 dtype: DtypeObj, fill_value: Scalar | None = None, fill_value_typ=None
189):
190 """return the correct fill value for the dtype of the values"""
191 if fill_value is not None:
192 return fill_value
193 if _na_ok_dtype(dtype):
194 if fill_value_typ is None:
195 return np.nan
196 else:
197 if fill_value_typ == "+inf":
198 return np.inf
199 else:
200 return -np.inf
201 else:
202 if fill_value_typ == "+inf":
203 # need the max int here
204 return lib.i8max
205 else:
206 return iNaT
207
208
209def _maybe_get_mask(
210 values: np.ndarray, skipna: bool, mask: npt.NDArray[np.bool_] | None
211) -> npt.NDArray[np.bool_] | None:
212 """
213 Compute a mask if and only if necessary.
214
215 This function will compute a mask iff it is necessary. Otherwise,
216 return the provided mask (potentially None) when a mask does not need to be
217 computed.
218
219 A mask is never necessary if the values array is of boolean or integer
220 dtypes, as these are incapable of storing NaNs. If passing a NaN-capable
221 dtype that is interpretable as either boolean or integer data (eg,
222 timedelta64), a mask must be provided.
223
224 If the skipna parameter is False, a new mask will not be computed.
225
226 The mask is computed using isna() by default. Setting invert=True selects
227 notna() as the masking function.
228
229 Parameters
230 ----------
231 values : ndarray
232 input array to potentially compute mask for
233 skipna : bool
234 boolean for whether NaNs should be skipped
235 mask : Optional[ndarray]
236 nan-mask if known
237
238 Returns
239 -------
240 Optional[np.ndarray[bool]]
241 """
242 if mask is None:
243 if values.dtype.kind in "biu":
244 # Boolean data cannot contain nulls, so signal via mask being None
245 return None
246
247 if skipna or values.dtype.kind in "mM":
248 mask = isna(values)
249
250 return mask
251
252
253def _get_values(
254 values: np.ndarray,
255 skipna: bool,
256 fill_value: Any = None,
257 fill_value_typ: str | None = None,
258 mask: npt.NDArray[np.bool_] | None = None,
259) -> tuple[np.ndarray, npt.NDArray[np.bool_] | None]:
260 """
261 Utility to get the values view, mask, dtype, dtype_max, and fill_value.
262
263 If both mask and fill_value/fill_value_typ are not None and skipna is True,
264 the values array will be copied.
265
266 For input arrays of boolean or integer dtypes, copies will only occur if a
267 precomputed mask, a fill_value/fill_value_typ, and skipna=True are
268 provided.
269
270 Parameters
271 ----------
272 values : ndarray
273 input array to potentially compute mask for
274 skipna : bool
275 boolean for whether NaNs should be skipped
276 fill_value : Any
277 value to fill NaNs with
278 fill_value_typ : str
279 Set to '+inf' or '-inf' to handle dtype-specific infinities
280 mask : Optional[np.ndarray[bool]]
281 nan-mask if known
282
283 Returns
284 -------
285 values : ndarray
286 Potential copy of input value array
287 mask : Optional[ndarray[bool]]
288 Mask for values, if deemed necessary to compute
289 """
290 # In _get_values is only called from within nanops, and in all cases
291 # with scalar fill_value. This guarantee is important for the
292 # np.where call below
293
294 mask = _maybe_get_mask(values, skipna, mask)
295
296 dtype = values.dtype
297
298 datetimelike = False
299 if values.dtype.kind in "mM":
300 # changing timedelta64/datetime64 to int64 needs to happen after
301 # finding `mask` above
302 values = np.asarray(values.view("i8"))
303 datetimelike = True
304
305 if skipna and (mask is not None):
306 # get our fill value (in case we need to provide an alternative
307 # dtype for it)
308 fill_value = _get_fill_value(
309 dtype, fill_value=fill_value, fill_value_typ=fill_value_typ
310 )
311
312 if fill_value is not None:
313 if mask.any():
314 if datetimelike or _na_ok_dtype(dtype):
315 values = values.copy()
316 np.putmask(values, mask, fill_value)
317 else:
318 # np.where will promote if needed
319 values = np.where(~mask, values, fill_value)
320
321 return values, mask
322
323
324def _get_dtype_max(dtype: np.dtype) -> np.dtype:
325 # return a platform independent precision dtype
326 dtype_max = dtype
327 if dtype.kind in "bi":
328 dtype_max = np.dtype(np.int64)
329 elif dtype.kind == "u":
330 dtype_max = np.dtype(np.uint64)
331 elif dtype.kind == "f":
332 dtype_max = np.dtype(np.float64)
333 return dtype_max
334
335
336def _na_ok_dtype(dtype: DtypeObj) -> bool:
337 if needs_i8_conversion(dtype):
338 return False
339 return not issubclass(dtype.type, np.integer)
340
341
342def _wrap_results(result, dtype: np.dtype, fill_value=None):
343 """wrap our results if needed"""
344 if result is NaT:
345 pass
346
347 elif dtype.kind == "M":
348 if fill_value is None:
349 # GH#24293
350 fill_value = iNaT
351 if not isinstance(result, np.ndarray):
352 assert not isna(fill_value), "Expected non-null fill_value"
353 if result == fill_value:
354 result = np.nan
355
356 if isna(result):
357 result = np.datetime64("NaT", "ns").astype(dtype)
358 else:
359 result = np.int64(result).view(dtype)
360 # retain original unit
361 result = result.astype(dtype, copy=False)
362 else:
363 # If we have float dtype, taking a view will give the wrong result
364 result = result.astype(dtype)
365 elif dtype.kind == "m":
366 if not isinstance(result, np.ndarray):
367 if result == fill_value or np.isnan(result):
368 result = np.timedelta64("NaT").astype(dtype)
369
370 elif np.fabs(result) > lib.i8max:
371 # raise if we have a timedelta64[ns] which is too large
372 raise ValueError("overflow in timedelta operation")
373 else:
374 # return a timedelta64 with the original unit
375 result = np.int64(result).astype(dtype, copy=False)
376
377 else:
378 result = result.astype("m8[ns]").view(dtype)
379
380 return result
381
382
383def _datetimelike_compat(func: F) -> F:
384 """
385 If we have datetime64 or timedelta64 values, ensure we have a correct
386 mask before calling the wrapped function, then cast back afterwards.
387 """
388
389 @functools.wraps(func)
390 def new_func(
391 values: np.ndarray,
392 *,
393 axis: AxisInt | None = None,
394 skipna: bool = True,
395 mask: npt.NDArray[np.bool_] | None = None,
396 **kwargs,
397 ):
398 orig_values = values
399
400 datetimelike = values.dtype.kind in "mM"
401 if datetimelike and mask is None:
402 mask = isna(values)
403
404 result = func(values, axis=axis, skipna=skipna, mask=mask, **kwargs)
405
406 if datetimelike:
407 result = _wrap_results(result, orig_values.dtype, fill_value=iNaT)
408 if not skipna:
409 assert mask is not None # checked above
410 result = _mask_datetimelike_result(result, axis, mask, orig_values)
411
412 return result
413
414 return cast(F, new_func)
415
416
417def _na_for_min_count(values: np.ndarray, axis: AxisInt | None) -> Scalar | np.ndarray:
418 """
419 Return the missing value for `values`.
420
421 Parameters
422 ----------
423 values : ndarray
424 axis : int or None
425 axis for the reduction, required if values.ndim > 1.
426
427 Returns
428 -------
429 result : scalar or ndarray
430 For 1-D values, returns a scalar of the correct missing type.
431 For 2-D values, returns a 1-D array where each element is missing.
432 """
433 # we either return np.nan or pd.NaT
434 if values.dtype.kind in "iufcb":
435 values = values.astype("float64")
436 fill_value = na_value_for_dtype(values.dtype)
437
438 if values.ndim == 1:
439 return fill_value
440 elif axis is None:
441 return fill_value
442 else:
443 result_shape = values.shape[:axis] + values.shape[axis + 1 :]
444
445 return np.full(result_shape, fill_value, dtype=values.dtype)
446
447
448def maybe_operate_rowwise(func: F) -> F:
449 """
450 NumPy operations on C-contiguous ndarrays with axis=1 can be
451 very slow if axis 1 >> axis 0.
452 Operate row-by-row and concatenate the results.
453 """
454
455 @functools.wraps(func)
456 def newfunc(values: np.ndarray, *, axis: AxisInt | None = None, **kwargs):
457 if (
458 axis == 1
459 and values.ndim == 2
460 and values.flags["C_CONTIGUOUS"]
461 # only takes this path for wide arrays (long dataframes), for threshold see
462 # https://github.com/pandas-dev/pandas/pull/43311#issuecomment-974891737
463 and (values.shape[1] / 1000) > values.shape[0]
464 and values.dtype != object
465 and values.dtype != bool
466 ):
467 arrs = list(values)
468 if kwargs.get("mask") is not None:
469 mask = kwargs.pop("mask")
470 results = [
471 func(arrs[i], mask=mask[i], **kwargs) for i in range(len(arrs))
472 ]
473 else:
474 results = [func(x, **kwargs) for x in arrs]
475 return np.array(results)
476
477 return func(values, axis=axis, **kwargs)
478
479 return cast(F, newfunc)
480
481
482def nanany(
483 values: np.ndarray,
484 *,
485 axis: AxisInt | None = None,
486 skipna: bool = True,
487 mask: npt.NDArray[np.bool_] | None = None,
488) -> bool:
489 """
490 Check if any elements along an axis evaluate to True.
491
492 Parameters
493 ----------
494 values : ndarray
495 axis : int, optional
496 skipna : bool, default True
497 mask : ndarray[bool], optional
498 nan-mask if known
499
500 Returns
501 -------
502 result : bool
503
504 Examples
505 --------
506 >>> from pandas.core import nanops
507 >>> s = pd.Series([1, 2])
508 >>> nanops.nanany(s.values)
509 True
510
511 >>> from pandas.core import nanops
512 >>> s = pd.Series([np.nan])
513 >>> nanops.nanany(s.values)
514 False
515 """
516 if values.dtype.kind in "iub" and mask is None:
517 # GH#26032 fastpath
518 # error: Incompatible return value type (got "Union[bool_, ndarray]",
519 # expected "bool")
520 return values.any(axis) # type: ignore[return-value]
521
522 if values.dtype.kind == "M":
523 # GH#34479
524 warnings.warn(
525 "'any' with datetime64 dtypes is deprecated and will raise in a "
526 "future version. Use (obj != pd.Timestamp(0)).any() instead.",
527 FutureWarning,
528 stacklevel=find_stack_level(),
529 )
530
531 values, _ = _get_values(values, skipna, fill_value=False, mask=mask)
532
533 # For object type, any won't necessarily return
534 # boolean values (numpy/numpy#4352)
535 if values.dtype == object:
536 values = values.astype(bool)
537
538 # error: Incompatible return value type (got "Union[bool_, ndarray]", expected
539 # "bool")
540 return values.any(axis) # type: ignore[return-value]
541
542
543def nanall(
544 values: np.ndarray,
545 *,
546 axis: AxisInt | None = None,
547 skipna: bool = True,
548 mask: npt.NDArray[np.bool_] | None = None,
549) -> bool:
550 """
551 Check if all elements along an axis evaluate to True.
552
553 Parameters
554 ----------
555 values : ndarray
556 axis : int, optional
557 skipna : bool, default True
558 mask : ndarray[bool], optional
559 nan-mask if known
560
561 Returns
562 -------
563 result : bool
564
565 Examples
566 --------
567 >>> from pandas.core import nanops
568 >>> s = pd.Series([1, 2, np.nan])
569 >>> nanops.nanall(s.values)
570 True
571
572 >>> from pandas.core import nanops
573 >>> s = pd.Series([1, 0])
574 >>> nanops.nanall(s.values)
575 False
576 """
577 if values.dtype.kind in "iub" and mask is None:
578 # GH#26032 fastpath
579 # error: Incompatible return value type (got "Union[bool_, ndarray]",
580 # expected "bool")
581 return values.all(axis) # type: ignore[return-value]
582
583 if values.dtype.kind == "M":
584 # GH#34479
585 warnings.warn(
586 "'all' with datetime64 dtypes is deprecated and will raise in a "
587 "future version. Use (obj != pd.Timestamp(0)).all() instead.",
588 FutureWarning,
589 stacklevel=find_stack_level(),
590 )
591
592 values, _ = _get_values(values, skipna, fill_value=True, mask=mask)
593
594 # For object type, all won't necessarily return
595 # boolean values (numpy/numpy#4352)
596 if values.dtype == object:
597 values = values.astype(bool)
598
599 # error: Incompatible return value type (got "Union[bool_, ndarray]", expected
600 # "bool")
601 return values.all(axis) # type: ignore[return-value]
602
603
604@disallow("M8")
605@_datetimelike_compat
606@maybe_operate_rowwise
607def nansum(
608 values: np.ndarray,
609 *,
610 axis: AxisInt | None = None,
611 skipna: bool = True,
612 min_count: int = 0,
613 mask: npt.NDArray[np.bool_] | None = None,
614) -> float:
615 """
616 Sum the elements along an axis ignoring NaNs
617
618 Parameters
619 ----------
620 values : ndarray[dtype]
621 axis : int, optional
622 skipna : bool, default True
623 min_count: int, default 0
624 mask : ndarray[bool], optional
625 nan-mask if known
626
627 Returns
628 -------
629 result : dtype
630
631 Examples
632 --------
633 >>> from pandas.core import nanops
634 >>> s = pd.Series([1, 2, np.nan])
635 >>> nanops.nansum(s.values)
636 3.0
637 """
638 dtype = values.dtype
639 values, mask = _get_values(values, skipna, fill_value=0, mask=mask)
640 dtype_sum = _get_dtype_max(dtype)
641 if dtype.kind == "f":
642 dtype_sum = dtype
643 elif dtype.kind == "m":
644 dtype_sum = np.dtype(np.float64)
645
646 the_sum = values.sum(axis, dtype=dtype_sum)
647 the_sum = _maybe_null_out(the_sum, axis, mask, values.shape, min_count=min_count)
648
649 return the_sum
650
651
652def _mask_datetimelike_result(
653 result: np.ndarray | np.datetime64 | np.timedelta64,
654 axis: AxisInt | None,
655 mask: npt.NDArray[np.bool_],
656 orig_values: np.ndarray,
657) -> np.ndarray | np.datetime64 | np.timedelta64 | NaTType:
658 if isinstance(result, np.ndarray):
659 # we need to apply the mask
660 result = result.astype("i8").view(orig_values.dtype)
661 axis_mask = mask.any(axis=axis)
662 # error: Unsupported target for indexed assignment ("Union[ndarray[Any, Any],
663 # datetime64, timedelta64]")
664 result[axis_mask] = iNaT # type: ignore[index]
665 else:
666 if mask.any():
667 return np.int64(iNaT).view(orig_values.dtype)
668 return result
669
670
671@bottleneck_switch()
672@_datetimelike_compat
673def nanmean(
674 values: np.ndarray,
675 *,
676 axis: AxisInt | None = None,
677 skipna: bool = True,
678 mask: npt.NDArray[np.bool_] | None = None,
679) -> float:
680 """
681 Compute the mean of the element along an axis ignoring NaNs
682
683 Parameters
684 ----------
685 values : ndarray
686 axis : int, optional
687 skipna : bool, default True
688 mask : ndarray[bool], optional
689 nan-mask if known
690
691 Returns
692 -------
693 float
694 Unless input is a float array, in which case use the same
695 precision as the input array.
696
697 Examples
698 --------
699 >>> from pandas.core import nanops
700 >>> s = pd.Series([1, 2, np.nan])
701 >>> nanops.nanmean(s.values)
702 1.5
703 """
704 dtype = values.dtype
705 values, mask = _get_values(values, skipna, fill_value=0, mask=mask)
706 dtype_sum = _get_dtype_max(dtype)
707 dtype_count = np.dtype(np.float64)
708
709 # not using needs_i8_conversion because that includes period
710 if dtype.kind in "mM":
711 dtype_sum = np.dtype(np.float64)
712 elif dtype.kind in "iu":
713 dtype_sum = np.dtype(np.float64)
714 elif dtype.kind == "f":
715 dtype_sum = dtype
716 dtype_count = dtype
717
718 count = _get_counts(values.shape, mask, axis, dtype=dtype_count)
719 the_sum = values.sum(axis, dtype=dtype_sum)
720 the_sum = _ensure_numeric(the_sum)
721
722 if axis is not None and getattr(the_sum, "ndim", False):
723 count = cast(np.ndarray, count)
724 with np.errstate(all="ignore"):
725 # suppress division by zero warnings
726 the_mean = the_sum / count
727 ct_mask = count == 0
728 if ct_mask.any():
729 the_mean[ct_mask] = np.nan
730 else:
731 the_mean = the_sum / count if count > 0 else np.nan
732
733 return the_mean
734
735
736@bottleneck_switch()
737def nanmedian(values, *, axis: AxisInt | None = None, skipna: bool = True, mask=None):
738 """
739 Parameters
740 ----------
741 values : ndarray
742 axis : int, optional
743 skipna : bool, default True
744 mask : ndarray[bool], optional
745 nan-mask if known
746
747 Returns
748 -------
749 result : float
750 Unless input is a float array, in which case use the same
751 precision as the input array.
752
753 Examples
754 --------
755 >>> from pandas.core import nanops
756 >>> s = pd.Series([1, np.nan, 2, 2])
757 >>> nanops.nanmedian(s.values)
758 2.0
759 """
760 # for floats without mask, the data already uses NaN as missing value
761 # indicator, and `mask` will be calculated from that below -> in those
762 # cases we never need to set NaN to the masked values
763 using_nan_sentinel = values.dtype.kind == "f" and mask is None
764
765 def get_median(x, _mask=None):
766 if _mask is None:
767 _mask = notna(x)
768 else:
769 _mask = ~_mask
770 if not skipna and not _mask.all():
771 return np.nan
772 with warnings.catch_warnings():
773 # Suppress RuntimeWarning about All-NaN slice
774 warnings.filterwarnings(
775 "ignore", "All-NaN slice encountered", RuntimeWarning
776 )
777 res = np.nanmedian(x[_mask])
778 return res
779
780 dtype = values.dtype
781 values, mask = _get_values(values, skipna, mask=mask, fill_value=None)
782 if values.dtype.kind != "f":
783 if values.dtype == object:
784 # GH#34671 avoid casting strings to numeric
785 inferred = lib.infer_dtype(values)
786 if inferred in ["string", "mixed"]:
787 raise TypeError(f"Cannot convert {values} to numeric")
788 try:
789 values = values.astype("f8")
790 except ValueError as err:
791 # e.g. "could not convert string to float: 'a'"
792 raise TypeError(str(err)) from err
793 if not using_nan_sentinel and mask is not None:
794 if not values.flags.writeable:
795 values = values.copy()
796 values[mask] = np.nan
797
798 notempty = values.size
799
800 # an array from a frame
801 if values.ndim > 1 and axis is not None:
802 # there's a non-empty array to apply over otherwise numpy raises
803 if notempty:
804 if not skipna:
805 res = np.apply_along_axis(get_median, axis, values)
806
807 else:
808 # fastpath for the skipna case
809 with warnings.catch_warnings():
810 # Suppress RuntimeWarning about All-NaN slice
811 warnings.filterwarnings(
812 "ignore", "All-NaN slice encountered", RuntimeWarning
813 )
814 if (values.shape[1] == 1 and axis == 0) or (
815 values.shape[0] == 1 and axis == 1
816 ):
817 # GH52788: fastpath when squeezable, nanmedian for 2D array slow
818 res = np.nanmedian(np.squeeze(values), keepdims=True)
819 else:
820 res = np.nanmedian(values, axis=axis)
821
822 else:
823 # must return the correct shape, but median is not defined for the
824 # empty set so return nans of shape "everything but the passed axis"
825 # since "axis" is where the reduction would occur if we had a nonempty
826 # array
827 res = _get_empty_reduction_result(values.shape, axis)
828
829 else:
830 # otherwise return a scalar value
831 res = get_median(values, mask) if notempty else np.nan
832 return _wrap_results(res, dtype)
833
834
835def _get_empty_reduction_result(
836 shape: Shape,
837 axis: AxisInt,
838) -> np.ndarray:
839 """
840 The result from a reduction on an empty ndarray.
841
842 Parameters
843 ----------
844 shape : Tuple[int, ...]
845 axis : int
846
847 Returns
848 -------
849 np.ndarray
850 """
851 shp = np.array(shape)
852 dims = np.arange(len(shape))
853 ret = np.empty(shp[dims != axis], dtype=np.float64)
854 ret.fill(np.nan)
855 return ret
856
857
858def _get_counts_nanvar(
859 values_shape: Shape,
860 mask: npt.NDArray[np.bool_] | None,
861 axis: AxisInt | None,
862 ddof: int,
863 dtype: np.dtype = np.dtype(np.float64),
864) -> tuple[float | np.ndarray, float | np.ndarray]:
865 """
866 Get the count of non-null values along an axis, accounting
867 for degrees of freedom.
868
869 Parameters
870 ----------
871 values_shape : Tuple[int, ...]
872 shape tuple from values ndarray, used if mask is None
873 mask : Optional[ndarray[bool]]
874 locations in values that should be considered missing
875 axis : Optional[int]
876 axis to count along
877 ddof : int
878 degrees of freedom
879 dtype : type, optional
880 type to use for count
881
882 Returns
883 -------
884 count : int, np.nan or np.ndarray
885 d : int, np.nan or np.ndarray
886 """
887 count = _get_counts(values_shape, mask, axis, dtype=dtype)
888 d = count - dtype.type(ddof)
889
890 # always return NaN, never inf
891 if is_float(count):
892 if count <= ddof:
893 # error: Incompatible types in assignment (expression has type
894 # "float", variable has type "Union[floating[Any], ndarray[Any,
895 # dtype[floating[Any]]]]")
896 count = np.nan # type: ignore[assignment]
897 d = np.nan
898 else:
899 # count is not narrowed by is_float check
900 count = cast(np.ndarray, count)
901 mask = count <= ddof
902 if mask.any():
903 np.putmask(d, mask, np.nan)
904 np.putmask(count, mask, np.nan)
905 return count, d
906
907
908@bottleneck_switch(ddof=1)
909def nanstd(
910 values,
911 *,
912 axis: AxisInt | None = None,
913 skipna: bool = True,
914 ddof: int = 1,
915 mask=None,
916):
917 """
918 Compute the standard deviation along given axis while ignoring NaNs
919
920 Parameters
921 ----------
922 values : ndarray
923 axis : int, optional
924 skipna : bool, default True
925 ddof : int, default 1
926 Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
927 where N represents the number of elements.
928 mask : ndarray[bool], optional
929 nan-mask if known
930
931 Returns
932 -------
933 result : float
934 Unless input is a float array, in which case use the same
935 precision as the input array.
936
937 Examples
938 --------
939 >>> from pandas.core import nanops
940 >>> s = pd.Series([1, np.nan, 2, 3])
941 >>> nanops.nanstd(s.values)
942 1.0
943 """
944 if values.dtype == "M8[ns]":
945 values = values.view("m8[ns]")
946
947 orig_dtype = values.dtype
948 values, mask = _get_values(values, skipna, mask=mask)
949
950 result = np.sqrt(nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask))
951 return _wrap_results(result, orig_dtype)
952
953
954@disallow("M8", "m8")
955@bottleneck_switch(ddof=1)
956def nanvar(
957 values: np.ndarray,
958 *,
959 axis: AxisInt | None = None,
960 skipna: bool = True,
961 ddof: int = 1,
962 mask=None,
963):
964 """
965 Compute the variance along given axis while ignoring NaNs
966
967 Parameters
968 ----------
969 values : ndarray
970 axis : int, optional
971 skipna : bool, default True
972 ddof : int, default 1
973 Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
974 where N represents the number of elements.
975 mask : ndarray[bool], optional
976 nan-mask if known
977
978 Returns
979 -------
980 result : float
981 Unless input is a float array, in which case use the same
982 precision as the input array.
983
984 Examples
985 --------
986 >>> from pandas.core import nanops
987 >>> s = pd.Series([1, np.nan, 2, 3])
988 >>> nanops.nanvar(s.values)
989 1.0
990 """
991 dtype = values.dtype
992 mask = _maybe_get_mask(values, skipna, mask)
993 if dtype.kind in "iu":
994 values = values.astype("f8")
995 if mask is not None:
996 values[mask] = np.nan
997
998 if values.dtype.kind == "f":
999 count, d = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype)
1000 else:
1001 count, d = _get_counts_nanvar(values.shape, mask, axis, ddof)
1002
1003 if skipna and mask is not None:
1004 values = values.copy()
1005 np.putmask(values, mask, 0)
1006
1007 # xref GH10242
1008 # Compute variance via two-pass algorithm, which is stable against
1009 # cancellation errors and relatively accurate for small numbers of
1010 # observations.
1011 #
1012 # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
1013 avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count
1014 if axis is not None:
1015 avg = np.expand_dims(avg, axis)
1016 sqr = _ensure_numeric((avg - values) ** 2)
1017 if mask is not None:
1018 np.putmask(sqr, mask, 0)
1019 result = sqr.sum(axis=axis, dtype=np.float64) / d
1020
1021 # Return variance as np.float64 (the datatype used in the accumulator),
1022 # unless we were dealing with a float array, in which case use the same
1023 # precision as the original values array.
1024 if dtype.kind == "f":
1025 result = result.astype(dtype, copy=False)
1026 return result
1027
1028
1029@disallow("M8", "m8")
1030def nansem(
1031 values: np.ndarray,
1032 *,
1033 axis: AxisInt | None = None,
1034 skipna: bool = True,
1035 ddof: int = 1,
1036 mask: npt.NDArray[np.bool_] | None = None,
1037) -> float:
1038 """
1039 Compute the standard error in the mean along given axis while ignoring NaNs
1040
1041 Parameters
1042 ----------
1043 values : ndarray
1044 axis : int, optional
1045 skipna : bool, default True
1046 ddof : int, default 1
1047 Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
1048 where N represents the number of elements.
1049 mask : ndarray[bool], optional
1050 nan-mask if known
1051
1052 Returns
1053 -------
1054 result : float64
1055 Unless input is a float array, in which case use the same
1056 precision as the input array.
1057
1058 Examples
1059 --------
1060 >>> from pandas.core import nanops
1061 >>> s = pd.Series([1, np.nan, 2, 3])
1062 >>> nanops.nansem(s.values)
1063 0.5773502691896258
1064 """
1065 # This checks if non-numeric-like data is passed with numeric_only=False
1066 # and raises a TypeError otherwise
1067 nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask)
1068
1069 mask = _maybe_get_mask(values, skipna, mask)
1070 if values.dtype.kind != "f":
1071 values = values.astype("f8")
1072
1073 if not skipna and mask is not None and mask.any():
1074 return np.nan
1075
1076 count, _ = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype)
1077 var = nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask)
1078
1079 return np.sqrt(var) / np.sqrt(count)
1080
1081
1082def _nanminmax(meth, fill_value_typ):
1083 @bottleneck_switch(name=f"nan{meth}")
1084 @_datetimelike_compat
1085 def reduction(
1086 values: np.ndarray,
1087 *,
1088 axis: AxisInt | None = None,
1089 skipna: bool = True,
1090 mask: npt.NDArray[np.bool_] | None = None,
1091 ):
1092 if values.size == 0:
1093 return _na_for_min_count(values, axis)
1094
1095 values, mask = _get_values(
1096 values, skipna, fill_value_typ=fill_value_typ, mask=mask
1097 )
1098 result = getattr(values, meth)(axis)
1099 result = _maybe_null_out(result, axis, mask, values.shape)
1100 return result
1101
1102 return reduction
1103
1104
1105nanmin = _nanminmax("min", fill_value_typ="+inf")
1106nanmax = _nanminmax("max", fill_value_typ="-inf")
1107
1108
1109def nanargmax(
1110 values: np.ndarray,
1111 *,
1112 axis: AxisInt | None = None,
1113 skipna: bool = True,
1114 mask: npt.NDArray[np.bool_] | None = None,
1115) -> int | np.ndarray:
1116 """
1117 Parameters
1118 ----------
1119 values : ndarray
1120 axis : int, optional
1121 skipna : bool, default True
1122 mask : ndarray[bool], optional
1123 nan-mask if known
1124
1125 Returns
1126 -------
1127 result : int or ndarray[int]
1128 The index/indices of max value in specified axis or -1 in the NA case
1129
1130 Examples
1131 --------
1132 >>> from pandas.core import nanops
1133 >>> arr = np.array([1, 2, 3, np.nan, 4])
1134 >>> nanops.nanargmax(arr)
1135 4
1136
1137 >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3)
1138 >>> arr[2:, 2] = np.nan
1139 >>> arr
1140 array([[ 0., 1., 2.],
1141 [ 3., 4., 5.],
1142 [ 6., 7., nan],
1143 [ 9., 10., nan]])
1144 >>> nanops.nanargmax(arr, axis=1)
1145 array([2, 2, 1, 1])
1146 """
1147 values, mask = _get_values(values, True, fill_value_typ="-inf", mask=mask)
1148 result = values.argmax(axis)
1149 # error: Argument 1 to "_maybe_arg_null_out" has incompatible type "Any |
1150 # signedinteger[Any]"; expected "ndarray[Any, Any]"
1151 result = _maybe_arg_null_out(result, axis, mask, skipna) # type: ignore[arg-type]
1152 return result
1153
1154
1155def nanargmin(
1156 values: np.ndarray,
1157 *,
1158 axis: AxisInt | None = None,
1159 skipna: bool = True,
1160 mask: npt.NDArray[np.bool_] | None = None,
1161) -> int | np.ndarray:
1162 """
1163 Parameters
1164 ----------
1165 values : ndarray
1166 axis : int, optional
1167 skipna : bool, default True
1168 mask : ndarray[bool], optional
1169 nan-mask if known
1170
1171 Returns
1172 -------
1173 result : int or ndarray[int]
1174 The index/indices of min value in specified axis or -1 in the NA case
1175
1176 Examples
1177 --------
1178 >>> from pandas.core import nanops
1179 >>> arr = np.array([1, 2, 3, np.nan, 4])
1180 >>> nanops.nanargmin(arr)
1181 0
1182
1183 >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3)
1184 >>> arr[2:, 0] = np.nan
1185 >>> arr
1186 array([[ 0., 1., 2.],
1187 [ 3., 4., 5.],
1188 [nan, 7., 8.],
1189 [nan, 10., 11.]])
1190 >>> nanops.nanargmin(arr, axis=1)
1191 array([0, 0, 1, 1])
1192 """
1193 values, mask = _get_values(values, True, fill_value_typ="+inf", mask=mask)
1194 result = values.argmin(axis)
1195 # error: Argument 1 to "_maybe_arg_null_out" has incompatible type "Any |
1196 # signedinteger[Any]"; expected "ndarray[Any, Any]"
1197 result = _maybe_arg_null_out(result, axis, mask, skipna) # type: ignore[arg-type]
1198 return result
1199
1200
1201@disallow("M8", "m8")
1202@maybe_operate_rowwise
1203def nanskew(
1204 values: np.ndarray,
1205 *,
1206 axis: AxisInt | None = None,
1207 skipna: bool = True,
1208 mask: npt.NDArray[np.bool_] | None = None,
1209) -> float:
1210 """
1211 Compute the sample skewness.
1212
1213 The statistic computed here is the adjusted Fisher-Pearson standardized
1214 moment coefficient G1. The algorithm computes this coefficient directly
1215 from the second and third central moment.
1216
1217 Parameters
1218 ----------
1219 values : ndarray
1220 axis : int, optional
1221 skipna : bool, default True
1222 mask : ndarray[bool], optional
1223 nan-mask if known
1224
1225 Returns
1226 -------
1227 result : float64
1228 Unless input is a float array, in which case use the same
1229 precision as the input array.
1230
1231 Examples
1232 --------
1233 >>> from pandas.core import nanops
1234 >>> s = pd.Series([1, np.nan, 1, 2])
1235 >>> nanops.nanskew(s.values)
1236 1.7320508075688787
1237 """
1238 mask = _maybe_get_mask(values, skipna, mask)
1239 if values.dtype.kind != "f":
1240 values = values.astype("f8")
1241 count = _get_counts(values.shape, mask, axis)
1242 else:
1243 count = _get_counts(values.shape, mask, axis, dtype=values.dtype)
1244
1245 if skipna and mask is not None:
1246 values = values.copy()
1247 np.putmask(values, mask, 0)
1248 elif not skipna and mask is not None and mask.any():
1249 return np.nan
1250
1251 with np.errstate(invalid="ignore", divide="ignore"):
1252 mean = values.sum(axis, dtype=np.float64) / count
1253 if axis is not None:
1254 mean = np.expand_dims(mean, axis)
1255
1256 adjusted = values - mean
1257 if skipna and mask is not None:
1258 np.putmask(adjusted, mask, 0)
1259 adjusted2 = adjusted**2
1260 adjusted3 = adjusted2 * adjusted
1261 m2 = adjusted2.sum(axis, dtype=np.float64)
1262 m3 = adjusted3.sum(axis, dtype=np.float64)
1263
1264 # floating point error
1265 #
1266 # #18044 in _libs/windows.pyx calc_skew follow this behavior
1267 # to fix the fperr to treat m2 <1e-14 as zero
1268 m2 = _zero_out_fperr(m2)
1269 m3 = _zero_out_fperr(m3)
1270
1271 with np.errstate(invalid="ignore", divide="ignore"):
1272 result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2**1.5)
1273
1274 dtype = values.dtype
1275 if dtype.kind == "f":
1276 result = result.astype(dtype, copy=False)
1277
1278 if isinstance(result, np.ndarray):
1279 result = np.where(m2 == 0, 0, result)
1280 result[count < 3] = np.nan
1281 else:
1282 result = dtype.type(0) if m2 == 0 else result
1283 if count < 3:
1284 return np.nan
1285
1286 return result
1287
1288
1289@disallow("M8", "m8")
1290@maybe_operate_rowwise
1291def nankurt(
1292 values: np.ndarray,
1293 *,
1294 axis: AxisInt | None = None,
1295 skipna: bool = True,
1296 mask: npt.NDArray[np.bool_] | None = None,
1297) -> float:
1298 """
1299 Compute the sample excess kurtosis
1300
1301 The statistic computed here is the adjusted Fisher-Pearson standardized
1302 moment coefficient G2, computed directly from the second and fourth
1303 central moment.
1304
1305 Parameters
1306 ----------
1307 values : ndarray
1308 axis : int, optional
1309 skipna : bool, default True
1310 mask : ndarray[bool], optional
1311 nan-mask if known
1312
1313 Returns
1314 -------
1315 result : float64
1316 Unless input is a float array, in which case use the same
1317 precision as the input array.
1318
1319 Examples
1320 --------
1321 >>> from pandas.core import nanops
1322 >>> s = pd.Series([1, np.nan, 1, 3, 2])
1323 >>> nanops.nankurt(s.values)
1324 -1.2892561983471076
1325 """
1326 mask = _maybe_get_mask(values, skipna, mask)
1327 if values.dtype.kind != "f":
1328 values = values.astype("f8")
1329 count = _get_counts(values.shape, mask, axis)
1330 else:
1331 count = _get_counts(values.shape, mask, axis, dtype=values.dtype)
1332
1333 if skipna and mask is not None:
1334 values = values.copy()
1335 np.putmask(values, mask, 0)
1336 elif not skipna and mask is not None and mask.any():
1337 return np.nan
1338
1339 with np.errstate(invalid="ignore", divide="ignore"):
1340 mean = values.sum(axis, dtype=np.float64) / count
1341 if axis is not None:
1342 mean = np.expand_dims(mean, axis)
1343
1344 adjusted = values - mean
1345 if skipna and mask is not None:
1346 np.putmask(adjusted, mask, 0)
1347 adjusted2 = adjusted**2
1348 adjusted4 = adjusted2**2
1349 m2 = adjusted2.sum(axis, dtype=np.float64)
1350 m4 = adjusted4.sum(axis, dtype=np.float64)
1351
1352 with np.errstate(invalid="ignore", divide="ignore"):
1353 adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3))
1354 numerator = count * (count + 1) * (count - 1) * m4
1355 denominator = (count - 2) * (count - 3) * m2**2
1356
1357 # floating point error
1358 #
1359 # #18044 in _libs/windows.pyx calc_kurt follow this behavior
1360 # to fix the fperr to treat denom <1e-14 as zero
1361 numerator = _zero_out_fperr(numerator)
1362 denominator = _zero_out_fperr(denominator)
1363
1364 if not isinstance(denominator, np.ndarray):
1365 # if ``denom`` is a scalar, check these corner cases first before
1366 # doing division
1367 if count < 4:
1368 return np.nan
1369 if denominator == 0:
1370 return values.dtype.type(0)
1371
1372 with np.errstate(invalid="ignore", divide="ignore"):
1373 result = numerator / denominator - adj
1374
1375 dtype = values.dtype
1376 if dtype.kind == "f":
1377 result = result.astype(dtype, copy=False)
1378
1379 if isinstance(result, np.ndarray):
1380 result = np.where(denominator == 0, 0, result)
1381 result[count < 4] = np.nan
1382
1383 return result
1384
1385
1386@disallow("M8", "m8")
1387@maybe_operate_rowwise
1388def nanprod(
1389 values: np.ndarray,
1390 *,
1391 axis: AxisInt | None = None,
1392 skipna: bool = True,
1393 min_count: int = 0,
1394 mask: npt.NDArray[np.bool_] | None = None,
1395) -> float:
1396 """
1397 Parameters
1398 ----------
1399 values : ndarray[dtype]
1400 axis : int, optional
1401 skipna : bool, default True
1402 min_count: int, default 0
1403 mask : ndarray[bool], optional
1404 nan-mask if known
1405
1406 Returns
1407 -------
1408 Dtype
1409 The product of all elements on a given axis. ( NaNs are treated as 1)
1410
1411 Examples
1412 --------
1413 >>> from pandas.core import nanops
1414 >>> s = pd.Series([1, 2, 3, np.nan])
1415 >>> nanops.nanprod(s.values)
1416 6.0
1417 """
1418 mask = _maybe_get_mask(values, skipna, mask)
1419
1420 if skipna and mask is not None:
1421 values = values.copy()
1422 values[mask] = 1
1423 result = values.prod(axis)
1424 # error: Incompatible return value type (got "Union[ndarray, float]", expected
1425 # "float")
1426 return _maybe_null_out( # type: ignore[return-value]
1427 result, axis, mask, values.shape, min_count=min_count
1428 )
1429
1430
1431def _maybe_arg_null_out(
1432 result: np.ndarray,
1433 axis: AxisInt | None,
1434 mask: npt.NDArray[np.bool_] | None,
1435 skipna: bool,
1436) -> np.ndarray | int:
1437 # helper function for nanargmin/nanargmax
1438 if mask is None:
1439 return result
1440
1441 if axis is None or not getattr(result, "ndim", False):
1442 if skipna:
1443 if mask.all():
1444 return -1
1445 else:
1446 if mask.any():
1447 return -1
1448 else:
1449 if skipna:
1450 na_mask = mask.all(axis)
1451 else:
1452 na_mask = mask.any(axis)
1453 if na_mask.any():
1454 result[na_mask] = -1
1455 return result
1456
1457
1458def _get_counts(
1459 values_shape: Shape,
1460 mask: npt.NDArray[np.bool_] | None,
1461 axis: AxisInt | None,
1462 dtype: np.dtype[np.floating] = np.dtype(np.float64),
1463) -> np.floating | npt.NDArray[np.floating]:
1464 """
1465 Get the count of non-null values along an axis
1466
1467 Parameters
1468 ----------
1469 values_shape : tuple of int
1470 shape tuple from values ndarray, used if mask is None
1471 mask : Optional[ndarray[bool]]
1472 locations in values that should be considered missing
1473 axis : Optional[int]
1474 axis to count along
1475 dtype : type, optional
1476 type to use for count
1477
1478 Returns
1479 -------
1480 count : scalar or array
1481 """
1482 if axis is None:
1483 if mask is not None:
1484 n = mask.size - mask.sum()
1485 else:
1486 n = np.prod(values_shape)
1487 return dtype.type(n)
1488
1489 if mask is not None:
1490 count = mask.shape[axis] - mask.sum(axis)
1491 else:
1492 count = values_shape[axis]
1493
1494 if is_integer(count):
1495 return dtype.type(count)
1496 return count.astype(dtype, copy=False)
1497
1498
1499def _maybe_null_out(
1500 result: np.ndarray | float | NaTType,
1501 axis: AxisInt | None,
1502 mask: npt.NDArray[np.bool_] | None,
1503 shape: tuple[int, ...],
1504 min_count: int = 1,
1505) -> np.ndarray | float | NaTType:
1506 """
1507 Returns
1508 -------
1509 Dtype
1510 The product of all elements on a given axis. ( NaNs are treated as 1)
1511 """
1512 if mask is None and min_count == 0:
1513 # nothing to check; short-circuit
1514 return result
1515
1516 if axis is not None and isinstance(result, np.ndarray):
1517 if mask is not None:
1518 null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0
1519 else:
1520 # we have no nulls, kept mask=None in _maybe_get_mask
1521 below_count = shape[axis] - min_count < 0
1522 new_shape = shape[:axis] + shape[axis + 1 :]
1523 null_mask = np.broadcast_to(below_count, new_shape)
1524
1525 if np.any(null_mask):
1526 if is_numeric_dtype(result):
1527 if np.iscomplexobj(result):
1528 result = result.astype("c16")
1529 elif not is_float_dtype(result):
1530 result = result.astype("f8", copy=False)
1531 result[null_mask] = np.nan
1532 else:
1533 # GH12941, use None to auto cast null
1534 result[null_mask] = None
1535 elif result is not NaT:
1536 if check_below_min_count(shape, mask, min_count):
1537 result_dtype = getattr(result, "dtype", None)
1538 if is_float_dtype(result_dtype):
1539 # error: Item "None" of "Optional[Any]" has no attribute "type"
1540 result = result_dtype.type("nan") # type: ignore[union-attr]
1541 else:
1542 result = np.nan
1543
1544 return result
1545
1546
1547def check_below_min_count(
1548 shape: tuple[int, ...], mask: npt.NDArray[np.bool_] | None, min_count: int
1549) -> bool:
1550 """
1551 Check for the `min_count` keyword. Returns True if below `min_count` (when
1552 missing value should be returned from the reduction).
1553
1554 Parameters
1555 ----------
1556 shape : tuple
1557 The shape of the values (`values.shape`).
1558 mask : ndarray[bool] or None
1559 Boolean numpy array (typically of same shape as `shape`) or None.
1560 min_count : int
1561 Keyword passed through from sum/prod call.
1562
1563 Returns
1564 -------
1565 bool
1566 """
1567 if min_count > 0:
1568 if mask is None:
1569 # no missing values, only check size
1570 non_nulls = np.prod(shape)
1571 else:
1572 non_nulls = mask.size - mask.sum()
1573 if non_nulls < min_count:
1574 return True
1575 return False
1576
1577
1578def _zero_out_fperr(arg):
1579 # #18044 reference this behavior to fix rolling skew/kurt issue
1580 if isinstance(arg, np.ndarray):
1581 return np.where(np.abs(arg) < 1e-14, 0, arg)
1582 else:
1583 return arg.dtype.type(0) if np.abs(arg) < 1e-14 else arg
1584
1585
1586@disallow("M8", "m8")
1587def nancorr(
1588 a: np.ndarray,
1589 b: np.ndarray,
1590 *,
1591 method: CorrelationMethod = "pearson",
1592 min_periods: int | None = None,
1593) -> float:
1594 """
1595 a, b: ndarrays
1596 """
1597 if len(a) != len(b):
1598 raise AssertionError("Operands to nancorr must have same size")
1599
1600 if min_periods is None:
1601 min_periods = 1
1602
1603 valid = notna(a) & notna(b)
1604 if not valid.all():
1605 a = a[valid]
1606 b = b[valid]
1607
1608 if len(a) < min_periods:
1609 return np.nan
1610
1611 a = _ensure_numeric(a)
1612 b = _ensure_numeric(b)
1613
1614 f = get_corr_func(method)
1615 return f(a, b)
1616
1617
1618def get_corr_func(
1619 method: CorrelationMethod,
1620) -> Callable[[np.ndarray, np.ndarray], float]:
1621 if method == "kendall":
1622 from scipy.stats import kendalltau
1623
1624 def func(a, b):
1625 return kendalltau(a, b)[0]
1626
1627 return func
1628 elif method == "spearman":
1629 from scipy.stats import spearmanr
1630
1631 def func(a, b):
1632 return spearmanr(a, b)[0]
1633
1634 return func
1635 elif method == "pearson":
1636
1637 def func(a, b):
1638 return np.corrcoef(a, b)[0, 1]
1639
1640 return func
1641 elif callable(method):
1642 return method
1643
1644 raise ValueError(
1645 f"Unknown method '{method}', expected one of "
1646 "'kendall', 'spearman', 'pearson', or callable"
1647 )
1648
1649
1650@disallow("M8", "m8")
1651def nancov(
1652 a: np.ndarray,
1653 b: np.ndarray,
1654 *,
1655 min_periods: int | None = None,
1656 ddof: int | None = 1,
1657) -> float:
1658 if len(a) != len(b):
1659 raise AssertionError("Operands to nancov must have same size")
1660
1661 if min_periods is None:
1662 min_periods = 1
1663
1664 valid = notna(a) & notna(b)
1665 if not valid.all():
1666 a = a[valid]
1667 b = b[valid]
1668
1669 if len(a) < min_periods:
1670 return np.nan
1671
1672 a = _ensure_numeric(a)
1673 b = _ensure_numeric(b)
1674
1675 return np.cov(a, b, ddof=ddof)[0, 1]
1676
1677
1678def _ensure_numeric(x):
1679 if isinstance(x, np.ndarray):
1680 if x.dtype.kind in "biu":
1681 x = x.astype(np.float64)
1682 elif x.dtype == object:
1683 inferred = lib.infer_dtype(x)
1684 if inferred in ["string", "mixed"]:
1685 # GH#44008, GH#36703 avoid casting e.g. strings to numeric
1686 raise TypeError(f"Could not convert {x} to numeric")
1687 try:
1688 x = x.astype(np.complex128)
1689 except (TypeError, ValueError):
1690 try:
1691 x = x.astype(np.float64)
1692 except ValueError as err:
1693 # GH#29941 we get here with object arrays containing strs
1694 raise TypeError(f"Could not convert {x} to numeric") from err
1695 else:
1696 if not np.any(np.imag(x)):
1697 x = x.real
1698 elif not (is_float(x) or is_integer(x) or is_complex(x)):
1699 if isinstance(x, str):
1700 # GH#44008, GH#36703 avoid casting e.g. strings to numeric
1701 raise TypeError(f"Could not convert string '{x}' to numeric")
1702 try:
1703 x = float(x)
1704 except (TypeError, ValueError):
1705 # e.g. "1+1j" or "foo"
1706 try:
1707 x = complex(x)
1708 except ValueError as err:
1709 # e.g. "foo"
1710 raise TypeError(f"Could not convert {x} to numeric") from err
1711 return x
1712
1713
1714def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike:
1715 """
1716 Cumulative function with skipna support.
1717
1718 Parameters
1719 ----------
1720 values : np.ndarray or ExtensionArray
1721 accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minimum.accumulate}
1722 skipna : bool
1723
1724 Returns
1725 -------
1726 np.ndarray or ExtensionArray
1727 """
1728 mask_a, mask_b = {
1729 np.cumprod: (1.0, np.nan),
1730 np.maximum.accumulate: (-np.inf, np.nan),
1731 np.cumsum: (0.0, np.nan),
1732 np.minimum.accumulate: (np.inf, np.nan),
1733 }[accum_func]
1734
1735 # This should go through ea interface
1736 assert values.dtype.kind not in "mM"
1737
1738 # We will be applying this function to block values
1739 if skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)):
1740 vals = values.copy()
1741 mask = isna(vals)
1742 vals[mask] = mask_a
1743 result = accum_func(vals, axis=0)
1744 result[mask] = mask_b
1745 else:
1746 result = accum_func(values, axis=0)
1747
1748 return result