1from __future__ import annotations
2
3import functools
4import itertools
5import operator
6from typing import (
7 Any,
8 Callable,
9 cast,
10)
11import warnings
12
13import numpy as np
14
15from pandas._config import get_option
16
17from pandas._libs import (
18 NaT,
19 NaTType,
20 iNaT,
21 lib,
22)
23from pandas._typing import (
24 ArrayLike,
25 AxisInt,
26 CorrelationMethod,
27 Dtype,
28 DtypeObj,
29 F,
30 Scalar,
31 Shape,
32 npt,
33)
34from pandas.compat._optional import import_optional_dependency
35from pandas.util._exceptions import find_stack_level
36
37from pandas.core.dtypes.common import (
38 is_any_int_dtype,
39 is_bool_dtype,
40 is_complex,
41 is_datetime64_any_dtype,
42 is_float,
43 is_float_dtype,
44 is_integer,
45 is_integer_dtype,
46 is_numeric_dtype,
47 is_object_dtype,
48 is_scalar,
49 is_timedelta64_dtype,
50 needs_i8_conversion,
51 pandas_dtype,
52)
53from pandas.core.dtypes.dtypes import PeriodDtype
54from pandas.core.dtypes.missing import (
55 isna,
56 na_value_for_dtype,
57 notna,
58)
59
60from pandas.core.construction import extract_array
61
62bn = import_optional_dependency("bottleneck", errors="warn")
63_BOTTLENECK_INSTALLED = bn is not None
64_USE_BOTTLENECK = False
65
66
67def set_use_bottleneck(v: bool = True) -> None:
68 # set/unset to use bottleneck
69 global _USE_BOTTLENECK
70 if _BOTTLENECK_INSTALLED:
71 _USE_BOTTLENECK = v
72
73
74set_use_bottleneck(get_option("compute.use_bottleneck"))
75
76
77class disallow:
78 def __init__(self, *dtypes: Dtype) -> None:
79 super().__init__()
80 self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes)
81
82 def check(self, obj) -> bool:
83 return hasattr(obj, "dtype") and issubclass(obj.dtype.type, self.dtypes)
84
85 def __call__(self, f: F) -> F:
86 @functools.wraps(f)
87 def _f(*args, **kwargs):
88 obj_iter = itertools.chain(args, kwargs.values())
89 if any(self.check(obj) for obj in obj_iter):
90 f_name = f.__name__.replace("nan", "")
91 raise TypeError(
92 f"reduction operation '{f_name}' not allowed for this dtype"
93 )
94 try:
95 with np.errstate(invalid="ignore"):
96 return f(*args, **kwargs)
97 except ValueError as e:
98 # we want to transform an object array
99 # ValueError message to the more typical TypeError
100 # e.g. this is normally a disallowed function on
101 # object arrays that contain strings
102 if is_object_dtype(args[0]):
103 raise TypeError(e) from e
104 raise
105
106 return cast(F, _f)
107
108
109class bottleneck_switch:
110 def __init__(self, name=None, **kwargs) -> None:
111 self.name = name
112 self.kwargs = kwargs
113
114 def __call__(self, alt: F) -> F:
115 bn_name = self.name or alt.__name__
116
117 try:
118 bn_func = getattr(bn, bn_name)
119 except (AttributeError, NameError): # pragma: no cover
120 bn_func = None
121
122 @functools.wraps(alt)
123 def f(
124 values: np.ndarray,
125 *,
126 axis: AxisInt | None = None,
127 skipna: bool = True,
128 **kwds,
129 ):
130 if len(self.kwargs) > 0:
131 for k, v in self.kwargs.items():
132 if k not in kwds:
133 kwds[k] = v
134
135 if values.size == 0 and kwds.get("min_count") is None:
136 # We are empty, returning NA for our type
137 # Only applies for the default `min_count` of None
138 # since that affects how empty arrays are handled.
139 # TODO(GH-18976) update all the nanops methods to
140 # correctly handle empty inputs and remove this check.
141 # It *may* just be `var`
142 return _na_for_min_count(values, axis)
143
144 if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name):
145 if kwds.get("mask", None) is None:
146 # `mask` is not recognised by bottleneck, would raise
147 # TypeError if called
148 kwds.pop("mask", None)
149 result = bn_func(values, axis=axis, **kwds)
150
151 # prefer to treat inf/-inf as NA, but must compute the func
152 # twice :(
153 if _has_infs(result):
154 result = alt(values, axis=axis, skipna=skipna, **kwds)
155 else:
156 result = alt(values, axis=axis, skipna=skipna, **kwds)
157 else:
158 result = alt(values, axis=axis, skipna=skipna, **kwds)
159
160 return result
161
162 return cast(F, f)
163
164
165def _bn_ok_dtype(dtype: DtypeObj, name: str) -> bool:
166 # Bottleneck chokes on datetime64, PeriodDtype (or and EA)
167 if not is_object_dtype(dtype) and not needs_i8_conversion(dtype):
168 # GH 42878
169 # Bottleneck uses naive summation leading to O(n) loss of precision
170 # unlike numpy which implements pairwise summation, which has O(log(n)) loss
171 # crossref: https://github.com/pydata/bottleneck/issues/379
172
173 # GH 15507
174 # bottleneck does not properly upcast during the sum
175 # so can overflow
176
177 # GH 9422
178 # further we also want to preserve NaN when all elements
179 # are NaN, unlike bottleneck/numpy which consider this
180 # to be 0
181 return name not in ["nansum", "nanprod", "nanmean"]
182 return False
183
184
185def _has_infs(result) -> bool:
186 if isinstance(result, np.ndarray):
187 if result.dtype in ("f8", "f4"):
188 # Note: outside of an nanops-specific test, we always have
189 # result.ndim == 1, so there is no risk of this ravel making a copy.
190 return lib.has_infs(result.ravel("K"))
191 try:
192 return np.isinf(result).any()
193 except (TypeError, NotImplementedError):
194 # if it doesn't support infs, then it can't have infs
195 return False
196
197
198def _get_fill_value(
199 dtype: DtypeObj, fill_value: Scalar | None = None, fill_value_typ=None
200):
201 """return the correct fill value for the dtype of the values"""
202 if fill_value is not None:
203 return fill_value
204 if _na_ok_dtype(dtype):
205 if fill_value_typ is None:
206 return np.nan
207 else:
208 if fill_value_typ == "+inf":
209 return np.inf
210 else:
211 return -np.inf
212 else:
213 if fill_value_typ == "+inf":
214 # need the max int here
215 return lib.i8max
216 else:
217 return iNaT
218
219
220def _maybe_get_mask(
221 values: np.ndarray, skipna: bool, mask: npt.NDArray[np.bool_] | None
222) -> npt.NDArray[np.bool_] | None:
223 """
224 Compute a mask if and only if necessary.
225
226 This function will compute a mask iff it is necessary. Otherwise,
227 return the provided mask (potentially None) when a mask does not need to be
228 computed.
229
230 A mask is never necessary if the values array is of boolean or integer
231 dtypes, as these are incapable of storing NaNs. If passing a NaN-capable
232 dtype that is interpretable as either boolean or integer data (eg,
233 timedelta64), a mask must be provided.
234
235 If the skipna parameter is False, a new mask will not be computed.
236
237 The mask is computed using isna() by default. Setting invert=True selects
238 notna() as the masking function.
239
240 Parameters
241 ----------
242 values : ndarray
243 input array to potentially compute mask for
244 skipna : bool
245 boolean for whether NaNs should be skipped
246 mask : Optional[ndarray]
247 nan-mask if known
248
249 Returns
250 -------
251 Optional[np.ndarray[bool]]
252 """
253 if mask is None:
254 if is_bool_dtype(values.dtype) or is_integer_dtype(values.dtype):
255 # Boolean data cannot contain nulls, so signal via mask being None
256 return None
257
258 if skipna or needs_i8_conversion(values.dtype):
259 mask = isna(values)
260
261 return mask
262
263
264def _get_values(
265 values: np.ndarray,
266 skipna: bool,
267 fill_value: Any = None,
268 fill_value_typ: str | None = None,
269 mask: npt.NDArray[np.bool_] | None = None,
270) -> tuple[np.ndarray, npt.NDArray[np.bool_] | None, np.dtype, np.dtype, Any]:
271 """
272 Utility to get the values view, mask, dtype, dtype_max, and fill_value.
273
274 If both mask and fill_value/fill_value_typ are not None and skipna is True,
275 the values array will be copied.
276
277 For input arrays of boolean or integer dtypes, copies will only occur if a
278 precomputed mask, a fill_value/fill_value_typ, and skipna=True are
279 provided.
280
281 Parameters
282 ----------
283 values : ndarray
284 input array to potentially compute mask for
285 skipna : bool
286 boolean for whether NaNs should be skipped
287 fill_value : Any
288 value to fill NaNs with
289 fill_value_typ : str
290 Set to '+inf' or '-inf' to handle dtype-specific infinities
291 mask : Optional[np.ndarray[bool]]
292 nan-mask if known
293
294 Returns
295 -------
296 values : ndarray
297 Potential copy of input value array
298 mask : Optional[ndarray[bool]]
299 Mask for values, if deemed necessary to compute
300 dtype : np.dtype
301 dtype for values
302 dtype_max : np.dtype
303 platform independent dtype
304 fill_value : Any
305 fill value used
306 """
307 # In _get_values is only called from within nanops, and in all cases
308 # with scalar fill_value. This guarantee is important for the
309 # np.where call below
310 assert is_scalar(fill_value)
311 # error: Incompatible types in assignment (expression has type "Union[Any,
312 # Union[ExtensionArray, ndarray]]", variable has type "ndarray")
313 values = extract_array(values, extract_numpy=True) # type: ignore[assignment]
314
315 mask = _maybe_get_mask(values, skipna, mask)
316
317 dtype = values.dtype
318
319 datetimelike = False
320 if needs_i8_conversion(values.dtype):
321 # changing timedelta64/datetime64 to int64 needs to happen after
322 # finding `mask` above
323 values = np.asarray(values.view("i8"))
324 datetimelike = True
325
326 dtype_ok = _na_ok_dtype(dtype)
327
328 # get our fill value (in case we need to provide an alternative
329 # dtype for it)
330 fill_value = _get_fill_value(
331 dtype, fill_value=fill_value, fill_value_typ=fill_value_typ
332 )
333
334 if skipna and (mask is not None) and (fill_value is not None):
335 if mask.any():
336 if dtype_ok or datetimelike:
337 values = values.copy()
338 np.putmask(values, mask, fill_value)
339 else:
340 # np.where will promote if needed
341 values = np.where(~mask, values, fill_value)
342
343 # return a platform independent precision dtype
344 dtype_max = dtype
345 if is_integer_dtype(dtype) or is_bool_dtype(dtype):
346 dtype_max = np.dtype(np.int64)
347 elif is_float_dtype(dtype):
348 dtype_max = np.dtype(np.float64)
349
350 return values, mask, dtype, dtype_max, fill_value
351
352
353def _na_ok_dtype(dtype: DtypeObj) -> bool:
354 if needs_i8_conversion(dtype):
355 return False
356 return not issubclass(dtype.type, np.integer)
357
358
359def _wrap_results(result, dtype: np.dtype, fill_value=None):
360 """wrap our results if needed"""
361 if result is NaT:
362 pass
363
364 elif is_datetime64_any_dtype(dtype):
365 if fill_value is None:
366 # GH#24293
367 fill_value = iNaT
368 if not isinstance(result, np.ndarray):
369 assert not isna(fill_value), "Expected non-null fill_value"
370 if result == fill_value:
371 result = np.nan
372
373 if isna(result):
374 result = np.datetime64("NaT", "ns").astype(dtype)
375 else:
376 result = np.int64(result).view(dtype)
377 # retain original unit
378 result = result.astype(dtype, copy=False)
379 else:
380 # If we have float dtype, taking a view will give the wrong result
381 result = result.astype(dtype)
382 elif is_timedelta64_dtype(dtype):
383 if not isinstance(result, np.ndarray):
384 if result == fill_value or np.isnan(result):
385 result = np.timedelta64("NaT").astype(dtype)
386
387 elif np.fabs(result) > lib.i8max:
388 # raise if we have a timedelta64[ns] which is too large
389 raise ValueError("overflow in timedelta operation")
390 else:
391 # return a timedelta64 with the original unit
392 result = np.int64(result).astype(dtype, copy=False)
393
394 else:
395 result = result.astype("m8[ns]").view(dtype)
396
397 return result
398
399
400def _datetimelike_compat(func: F) -> F:
401 """
402 If we have datetime64 or timedelta64 values, ensure we have a correct
403 mask before calling the wrapped function, then cast back afterwards.
404 """
405
406 @functools.wraps(func)
407 def new_func(
408 values: np.ndarray,
409 *,
410 axis: AxisInt | None = None,
411 skipna: bool = True,
412 mask: npt.NDArray[np.bool_] | None = None,
413 **kwargs,
414 ):
415 orig_values = values
416
417 datetimelike = values.dtype.kind in ["m", "M"]
418 if datetimelike and mask is None:
419 mask = isna(values)
420
421 result = func(values, axis=axis, skipna=skipna, mask=mask, **kwargs)
422
423 if datetimelike:
424 result = _wrap_results(result, orig_values.dtype, fill_value=iNaT)
425 if not skipna:
426 assert mask is not None # checked above
427 result = _mask_datetimelike_result(result, axis, mask, orig_values)
428
429 return result
430
431 return cast(F, new_func)
432
433
434def _na_for_min_count(values: np.ndarray, axis: AxisInt | None) -> Scalar | np.ndarray:
435 """
436 Return the missing value for `values`.
437
438 Parameters
439 ----------
440 values : ndarray
441 axis : int or None
442 axis for the reduction, required if values.ndim > 1.
443
444 Returns
445 -------
446 result : scalar or ndarray
447 For 1-D values, returns a scalar of the correct missing type.
448 For 2-D values, returns a 1-D array where each element is missing.
449 """
450 # we either return np.nan or pd.NaT
451 if is_numeric_dtype(values):
452 values = values.astype("float64")
453 fill_value = na_value_for_dtype(values.dtype)
454
455 if values.ndim == 1:
456 return fill_value
457 elif axis is None:
458 return fill_value
459 else:
460 result_shape = values.shape[:axis] + values.shape[axis + 1 :]
461
462 return np.full(result_shape, fill_value, dtype=values.dtype)
463
464
465def maybe_operate_rowwise(func: F) -> F:
466 """
467 NumPy operations on C-contiguous ndarrays with axis=1 can be
468 very slow if axis 1 >> axis 0.
469 Operate row-by-row and concatenate the results.
470 """
471
472 @functools.wraps(func)
473 def newfunc(values: np.ndarray, *, axis: AxisInt | None = None, **kwargs):
474 if (
475 axis == 1
476 and values.ndim == 2
477 and values.flags["C_CONTIGUOUS"]
478 # only takes this path for wide arrays (long dataframes), for threshold see
479 # https://github.com/pandas-dev/pandas/pull/43311#issuecomment-974891737
480 and (values.shape[1] / 1000) > values.shape[0]
481 and values.dtype != object
482 and values.dtype != bool
483 ):
484 arrs = list(values)
485 if kwargs.get("mask") is not None:
486 mask = kwargs.pop("mask")
487 results = [
488 func(arrs[i], mask=mask[i], **kwargs) for i in range(len(arrs))
489 ]
490 else:
491 results = [func(x, **kwargs) for x in arrs]
492 return np.array(results)
493
494 return func(values, axis=axis, **kwargs)
495
496 return cast(F, newfunc)
497
498
499def nanany(
500 values: np.ndarray,
501 *,
502 axis: AxisInt | None = None,
503 skipna: bool = True,
504 mask: npt.NDArray[np.bool_] | None = None,
505) -> bool:
506 """
507 Check if any elements along an axis evaluate to True.
508
509 Parameters
510 ----------
511 values : ndarray
512 axis : int, optional
513 skipna : bool, default True
514 mask : ndarray[bool], optional
515 nan-mask if known
516
517 Returns
518 -------
519 result : bool
520
521 Examples
522 --------
523 >>> from pandas.core import nanops
524 >>> s = pd.Series([1, 2])
525 >>> nanops.nanany(s)
526 True
527
528 >>> from pandas.core import nanops
529 >>> s = pd.Series([np.nan])
530 >>> nanops.nanany(s)
531 False
532 """
533 if needs_i8_conversion(values.dtype) and values.dtype.kind != "m":
534 # GH#34479
535 warnings.warn(
536 "'any' with datetime64 dtypes is deprecated and will raise in a "
537 "future version. Use (obj != pd.Timestamp(0)).any() instead.",
538 FutureWarning,
539 stacklevel=find_stack_level(),
540 )
541
542 values, _, _, _, _ = _get_values(values, skipna, fill_value=False, mask=mask)
543
544 # For object type, any won't necessarily return
545 # boolean values (numpy/numpy#4352)
546 if is_object_dtype(values):
547 values = values.astype(bool)
548
549 # error: Incompatible return value type (got "Union[bool_, ndarray]", expected
550 # "bool")
551 return values.any(axis) # type: ignore[return-value]
552
553
554def nanall(
555 values: np.ndarray,
556 *,
557 axis: AxisInt | None = None,
558 skipna: bool = True,
559 mask: npt.NDArray[np.bool_] | None = None,
560) -> bool:
561 """
562 Check if all elements along an axis evaluate to True.
563
564 Parameters
565 ----------
566 values : ndarray
567 axis : int, optional
568 skipna : bool, default True
569 mask : ndarray[bool], optional
570 nan-mask if known
571
572 Returns
573 -------
574 result : bool
575
576 Examples
577 --------
578 >>> from pandas.core import nanops
579 >>> s = pd.Series([1, 2, np.nan])
580 >>> nanops.nanall(s)
581 True
582
583 >>> from pandas.core import nanops
584 >>> s = pd.Series([1, 0])
585 >>> nanops.nanall(s)
586 False
587 """
588 if needs_i8_conversion(values.dtype) and values.dtype.kind != "m":
589 # GH#34479
590 warnings.warn(
591 "'all' with datetime64 dtypes is deprecated and will raise in a "
592 "future version. Use (obj != pd.Timestamp(0)).all() instead.",
593 FutureWarning,
594 stacklevel=find_stack_level(),
595 )
596
597 values, _, _, _, _ = _get_values(values, skipna, fill_value=True, mask=mask)
598
599 # For object type, all won't necessarily return
600 # boolean values (numpy/numpy#4352)
601 if is_object_dtype(values):
602 values = values.astype(bool)
603
604 # error: Incompatible return value type (got "Union[bool_, ndarray]", expected
605 # "bool")
606 return values.all(axis) # type: ignore[return-value]
607
608
609@disallow("M8")
610@_datetimelike_compat
611@maybe_operate_rowwise
612def nansum(
613 values: np.ndarray,
614 *,
615 axis: AxisInt | None = None,
616 skipna: bool = True,
617 min_count: int = 0,
618 mask: npt.NDArray[np.bool_] | None = None,
619) -> float:
620 """
621 Sum the elements along an axis ignoring NaNs
622
623 Parameters
624 ----------
625 values : ndarray[dtype]
626 axis : int, optional
627 skipna : bool, default True
628 min_count: int, default 0
629 mask : ndarray[bool], optional
630 nan-mask if known
631
632 Returns
633 -------
634 result : dtype
635
636 Examples
637 --------
638 >>> from pandas.core import nanops
639 >>> s = pd.Series([1, 2, np.nan])
640 >>> nanops.nansum(s)
641 3.0
642 """
643 values, mask, dtype, dtype_max, _ = _get_values(
644 values, skipna, fill_value=0, mask=mask
645 )
646 dtype_sum = dtype_max
647 if is_float_dtype(dtype):
648 dtype_sum = dtype
649 elif is_timedelta64_dtype(dtype):
650 dtype_sum = np.dtype(np.float64)
651
652 the_sum = values.sum(axis, dtype=dtype_sum)
653 the_sum = _maybe_null_out(the_sum, axis, mask, values.shape, min_count=min_count)
654
655 return the_sum
656
657
658def _mask_datetimelike_result(
659 result: np.ndarray | np.datetime64 | np.timedelta64,
660 axis: AxisInt | None,
661 mask: npt.NDArray[np.bool_],
662 orig_values: np.ndarray,
663) -> np.ndarray | np.datetime64 | np.timedelta64 | NaTType:
664 if isinstance(result, np.ndarray):
665 # we need to apply the mask
666 result = result.astype("i8").view(orig_values.dtype)
667 axis_mask = mask.any(axis=axis)
668 # error: Unsupported target for indexed assignment ("Union[ndarray[Any, Any],
669 # datetime64, timedelta64]")
670 result[axis_mask] = iNaT # type: ignore[index]
671 else:
672 if mask.any():
673 return np.int64(iNaT).view(orig_values.dtype)
674 return result
675
676
677@disallow(PeriodDtype)
678@bottleneck_switch()
679@_datetimelike_compat
680def nanmean(
681 values: np.ndarray,
682 *,
683 axis: AxisInt | None = None,
684 skipna: bool = True,
685 mask: npt.NDArray[np.bool_] | None = None,
686) -> float:
687 """
688 Compute the mean of the element along an axis ignoring NaNs
689
690 Parameters
691 ----------
692 values : ndarray
693 axis : int, optional
694 skipna : bool, default True
695 mask : ndarray[bool], optional
696 nan-mask if known
697
698 Returns
699 -------
700 float
701 Unless input is a float array, in which case use the same
702 precision as the input array.
703
704 Examples
705 --------
706 >>> from pandas.core import nanops
707 >>> s = pd.Series([1, 2, np.nan])
708 >>> nanops.nanmean(s)
709 1.5
710 """
711 values, mask, dtype, dtype_max, _ = _get_values(
712 values, skipna, fill_value=0, mask=mask
713 )
714 dtype_sum = dtype_max
715 dtype_count = np.dtype(np.float64)
716
717 # not using needs_i8_conversion because that includes period
718 if dtype.kind in ["m", "M"]:
719 dtype_sum = np.dtype(np.float64)
720 elif is_integer_dtype(dtype):
721 dtype_sum = np.dtype(np.float64)
722 elif is_float_dtype(dtype):
723 dtype_sum = dtype
724 dtype_count = dtype
725
726 count = _get_counts(values.shape, mask, axis, dtype=dtype_count)
727 the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum))
728
729 if axis is not None and getattr(the_sum, "ndim", False):
730 count = cast(np.ndarray, count)
731 with np.errstate(all="ignore"):
732 # suppress division by zero warnings
733 the_mean = the_sum / count
734 ct_mask = count == 0
735 if ct_mask.any():
736 the_mean[ct_mask] = np.nan
737 else:
738 the_mean = the_sum / count if count > 0 else np.nan
739
740 return the_mean
741
742
743@bottleneck_switch()
744def nanmedian(values, *, axis: AxisInt | None = None, skipna: bool = True, mask=None):
745 """
746 Parameters
747 ----------
748 values : ndarray
749 axis : int, optional
750 skipna : bool, default True
751 mask : ndarray[bool], optional
752 nan-mask if known
753
754 Returns
755 -------
756 result : float
757 Unless input is a float array, in which case use the same
758 precision as the input array.
759
760 Examples
761 --------
762 >>> from pandas.core import nanops
763 >>> s = pd.Series([1, np.nan, 2, 2])
764 >>> nanops.nanmedian(s)
765 2.0
766 """
767
768 def get_median(x, _mask=None):
769 if _mask is None:
770 _mask = notna(x)
771 else:
772 _mask = ~_mask
773 if not skipna and not _mask.all():
774 return np.nan
775 with warnings.catch_warnings():
776 # Suppress RuntimeWarning about All-NaN slice
777 warnings.filterwarnings(
778 "ignore", "All-NaN slice encountered", RuntimeWarning
779 )
780 res = np.nanmedian(x[_mask])
781 return res
782
783 values, mask, dtype, _, _ = _get_values(values, skipna, mask=mask, fill_value=0)
784 if not is_float_dtype(values.dtype):
785 try:
786 values = values.astype("f8")
787 except ValueError as err:
788 # e.g. "could not convert string to float: 'a'"
789 raise TypeError(str(err)) from err
790 if mask is not None:
791 values[mask] = np.nan
792
793 notempty = values.size
794
795 # an array from a frame
796 if values.ndim > 1 and axis is not None:
797 # there's a non-empty array to apply over otherwise numpy raises
798 if notempty:
799 if not skipna:
800 res = np.apply_along_axis(get_median, axis, values)
801
802 else:
803 # fastpath for the skipna case
804 with warnings.catch_warnings():
805 # Suppress RuntimeWarning about All-NaN slice
806 warnings.filterwarnings(
807 "ignore", "All-NaN slice encountered", RuntimeWarning
808 )
809 res = np.nanmedian(values, axis)
810
811 else:
812 # must return the correct shape, but median is not defined for the
813 # empty set so return nans of shape "everything but the passed axis"
814 # since "axis" is where the reduction would occur if we had a nonempty
815 # array
816 res = get_empty_reduction_result(values.shape, axis, np.float_, np.nan)
817
818 else:
819 # otherwise return a scalar value
820 res = get_median(values, mask) if notempty else np.nan
821 return _wrap_results(res, dtype)
822
823
824def get_empty_reduction_result(
825 shape: tuple[int, ...],
826 axis: AxisInt,
827 dtype: np.dtype | type[np.floating],
828 fill_value: Any,
829) -> np.ndarray:
830 """
831 The result from a reduction on an empty ndarray.
832
833 Parameters
834 ----------
835 shape : Tuple[int]
836 axis : int
837 dtype : np.dtype
838 fill_value : Any
839
840 Returns
841 -------
842 np.ndarray
843 """
844 shp = np.array(shape)
845 dims = np.arange(len(shape))
846 ret = np.empty(shp[dims != axis], dtype=dtype)
847 ret.fill(fill_value)
848 return ret
849
850
851def _get_counts_nanvar(
852 values_shape: Shape,
853 mask: npt.NDArray[np.bool_] | None,
854 axis: AxisInt | None,
855 ddof: int,
856 dtype: np.dtype = np.dtype(np.float64),
857) -> tuple[float | np.ndarray, float | np.ndarray]:
858 """
859 Get the count of non-null values along an axis, accounting
860 for degrees of freedom.
861
862 Parameters
863 ----------
864 values_shape : Tuple[int, ...]
865 shape tuple from values ndarray, used if mask is None
866 mask : Optional[ndarray[bool]]
867 locations in values that should be considered missing
868 axis : Optional[int]
869 axis to count along
870 ddof : int
871 degrees of freedom
872 dtype : type, optional
873 type to use for count
874
875 Returns
876 -------
877 count : int, np.nan or np.ndarray
878 d : int, np.nan or np.ndarray
879 """
880 count = _get_counts(values_shape, mask, axis, dtype=dtype)
881 d = count - dtype.type(ddof)
882
883 # always return NaN, never inf
884 if is_scalar(count):
885 if count <= ddof:
886 count = np.nan
887 d = np.nan
888 else:
889 # count is not narrowed by is_scalar check
890 count = cast(np.ndarray, count)
891 mask = count <= ddof
892 if mask.any():
893 np.putmask(d, mask, np.nan)
894 np.putmask(count, mask, np.nan)
895 return count, d
896
897
898@bottleneck_switch(ddof=1)
899def nanstd(
900 values,
901 *,
902 axis: AxisInt | None = None,
903 skipna: bool = True,
904 ddof: int = 1,
905 mask=None,
906):
907 """
908 Compute the standard deviation along given axis while ignoring NaNs
909
910 Parameters
911 ----------
912 values : ndarray
913 axis : int, optional
914 skipna : bool, default True
915 ddof : int, default 1
916 Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
917 where N represents the number of elements.
918 mask : ndarray[bool], optional
919 nan-mask if known
920
921 Returns
922 -------
923 result : float
924 Unless input is a float array, in which case use the same
925 precision as the input array.
926
927 Examples
928 --------
929 >>> from pandas.core import nanops
930 >>> s = pd.Series([1, np.nan, 2, 3])
931 >>> nanops.nanstd(s)
932 1.0
933 """
934 if values.dtype == "M8[ns]":
935 values = values.view("m8[ns]")
936
937 orig_dtype = values.dtype
938 values, mask, _, _, _ = _get_values(values, skipna, mask=mask)
939
940 result = np.sqrt(nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask))
941 return _wrap_results(result, orig_dtype)
942
943
944@disallow("M8", "m8")
945@bottleneck_switch(ddof=1)
946def nanvar(
947 values,
948 *,
949 axis: AxisInt | None = None,
950 skipna: bool = True,
951 ddof: int = 1,
952 mask=None,
953):
954 """
955 Compute the variance along given axis while ignoring NaNs
956
957 Parameters
958 ----------
959 values : ndarray
960 axis : int, optional
961 skipna : bool, default True
962 ddof : int, default 1
963 Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
964 where N represents the number of elements.
965 mask : ndarray[bool], optional
966 nan-mask if known
967
968 Returns
969 -------
970 result : float
971 Unless input is a float array, in which case use the same
972 precision as the input array.
973
974 Examples
975 --------
976 >>> from pandas.core import nanops
977 >>> s = pd.Series([1, np.nan, 2, 3])
978 >>> nanops.nanvar(s)
979 1.0
980 """
981 values = extract_array(values, extract_numpy=True)
982 dtype = values.dtype
983 mask = _maybe_get_mask(values, skipna, mask)
984 if is_any_int_dtype(dtype):
985 values = values.astype("f8")
986 if mask is not None:
987 values[mask] = np.nan
988
989 if is_float_dtype(values.dtype):
990 count, d = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype)
991 else:
992 count, d = _get_counts_nanvar(values.shape, mask, axis, ddof)
993
994 if skipna and mask is not None:
995 values = values.copy()
996 np.putmask(values, mask, 0)
997
998 # xref GH10242
999 # Compute variance via two-pass algorithm, which is stable against
1000 # cancellation errors and relatively accurate for small numbers of
1001 # observations.
1002 #
1003 # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
1004 avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count
1005 if axis is not None:
1006 avg = np.expand_dims(avg, axis)
1007 sqr = _ensure_numeric((avg - values) ** 2)
1008 if mask is not None:
1009 np.putmask(sqr, mask, 0)
1010 result = sqr.sum(axis=axis, dtype=np.float64) / d
1011
1012 # Return variance as np.float64 (the datatype used in the accumulator),
1013 # unless we were dealing with a float array, in which case use the same
1014 # precision as the original values array.
1015 if is_float_dtype(dtype):
1016 result = result.astype(dtype, copy=False)
1017 return result
1018
1019
1020@disallow("M8", "m8")
1021def nansem(
1022 values: np.ndarray,
1023 *,
1024 axis: AxisInt | None = None,
1025 skipna: bool = True,
1026 ddof: int = 1,
1027 mask: npt.NDArray[np.bool_] | None = None,
1028) -> float:
1029 """
1030 Compute the standard error in the mean along given axis while ignoring NaNs
1031
1032 Parameters
1033 ----------
1034 values : ndarray
1035 axis : int, optional
1036 skipna : bool, default True
1037 ddof : int, default 1
1038 Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
1039 where N represents the number of elements.
1040 mask : ndarray[bool], optional
1041 nan-mask if known
1042
1043 Returns
1044 -------
1045 result : float64
1046 Unless input is a float array, in which case use the same
1047 precision as the input array.
1048
1049 Examples
1050 --------
1051 >>> from pandas.core import nanops
1052 >>> s = pd.Series([1, np.nan, 2, 3])
1053 >>> nanops.nansem(s)
1054 0.5773502691896258
1055 """
1056 # This checks if non-numeric-like data is passed with numeric_only=False
1057 # and raises a TypeError otherwise
1058 nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask)
1059
1060 mask = _maybe_get_mask(values, skipna, mask)
1061 if not is_float_dtype(values.dtype):
1062 values = values.astype("f8")
1063
1064 if not skipna and mask is not None and mask.any():
1065 return np.nan
1066
1067 count, _ = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype)
1068 var = nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask)
1069
1070 return np.sqrt(var) / np.sqrt(count)
1071
1072
1073def _nanminmax(meth, fill_value_typ):
1074 @bottleneck_switch(name=f"nan{meth}")
1075 @_datetimelike_compat
1076 def reduction(
1077 values: np.ndarray,
1078 *,
1079 axis: AxisInt | None = None,
1080 skipna: bool = True,
1081 mask: npt.NDArray[np.bool_] | None = None,
1082 ) -> Dtype:
1083 values, mask, dtype, dtype_max, fill_value = _get_values(
1084 values, skipna, fill_value_typ=fill_value_typ, mask=mask
1085 )
1086
1087 if (axis is not None and values.shape[axis] == 0) or values.size == 0:
1088 try:
1089 result = getattr(values, meth)(axis, dtype=dtype_max)
1090 result.fill(np.nan)
1091 except (AttributeError, TypeError, ValueError):
1092 result = np.nan
1093 else:
1094 result = getattr(values, meth)(axis)
1095
1096 result = _maybe_null_out(result, axis, mask, values.shape)
1097 return result
1098
1099 return reduction
1100
1101
1102nanmin = _nanminmax("min", fill_value_typ="+inf")
1103nanmax = _nanminmax("max", fill_value_typ="-inf")
1104
1105
1106@disallow("O")
1107def nanargmax(
1108 values: np.ndarray,
1109 *,
1110 axis: AxisInt | None = None,
1111 skipna: bool = True,
1112 mask: npt.NDArray[np.bool_] | None = None,
1113) -> int | np.ndarray:
1114 """
1115 Parameters
1116 ----------
1117 values : ndarray
1118 axis : int, optional
1119 skipna : bool, default True
1120 mask : ndarray[bool], optional
1121 nan-mask if known
1122
1123 Returns
1124 -------
1125 result : int or ndarray[int]
1126 The index/indices of max value in specified axis or -1 in the NA case
1127
1128 Examples
1129 --------
1130 >>> from pandas.core import nanops
1131 >>> arr = np.array([1, 2, 3, np.nan, 4])
1132 >>> nanops.nanargmax(arr)
1133 4
1134
1135 >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3)
1136 >>> arr[2:, 2] = np.nan
1137 >>> arr
1138 array([[ 0., 1., 2.],
1139 [ 3., 4., 5.],
1140 [ 6., 7., nan],
1141 [ 9., 10., nan]])
1142 >>> nanops.nanargmax(arr, axis=1)
1143 array([2, 2, 1, 1])
1144 """
1145 values, mask, _, _, _ = _get_values(values, True, fill_value_typ="-inf", mask=mask)
1146 # error: Need type annotation for 'result'
1147 result = values.argmax(axis) # type: ignore[var-annotated]
1148 result = _maybe_arg_null_out(result, axis, mask, skipna)
1149 return result
1150
1151
1152@disallow("O")
1153def nanargmin(
1154 values: np.ndarray,
1155 *,
1156 axis: AxisInt | None = None,
1157 skipna: bool = True,
1158 mask: npt.NDArray[np.bool_] | None = None,
1159) -> int | np.ndarray:
1160 """
1161 Parameters
1162 ----------
1163 values : ndarray
1164 axis : int, optional
1165 skipna : bool, default True
1166 mask : ndarray[bool], optional
1167 nan-mask if known
1168
1169 Returns
1170 -------
1171 result : int or ndarray[int]
1172 The index/indices of min value in specified axis or -1 in the NA case
1173
1174 Examples
1175 --------
1176 >>> from pandas.core import nanops
1177 >>> arr = np.array([1, 2, 3, np.nan, 4])
1178 >>> nanops.nanargmin(arr)
1179 0
1180
1181 >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3)
1182 >>> arr[2:, 0] = np.nan
1183 >>> arr
1184 array([[ 0., 1., 2.],
1185 [ 3., 4., 5.],
1186 [nan, 7., 8.],
1187 [nan, 10., 11.]])
1188 >>> nanops.nanargmin(arr, axis=1)
1189 array([0, 0, 1, 1])
1190 """
1191 values, mask, _, _, _ = _get_values(values, True, fill_value_typ="+inf", mask=mask)
1192 # error: Need type annotation for 'result'
1193 result = values.argmin(axis) # type: ignore[var-annotated]
1194 result = _maybe_arg_null_out(result, axis, mask, skipna)
1195 return result
1196
1197
1198@disallow("M8", "m8")
1199@maybe_operate_rowwise
1200def nanskew(
1201 values: np.ndarray,
1202 *,
1203 axis: AxisInt | None = None,
1204 skipna: bool = True,
1205 mask: npt.NDArray[np.bool_] | None = None,
1206) -> float:
1207 """
1208 Compute the sample skewness.
1209
1210 The statistic computed here is the adjusted Fisher-Pearson standardized
1211 moment coefficient G1. The algorithm computes this coefficient directly
1212 from the second and third central moment.
1213
1214 Parameters
1215 ----------
1216 values : ndarray
1217 axis : int, optional
1218 skipna : bool, default True
1219 mask : ndarray[bool], optional
1220 nan-mask if known
1221
1222 Returns
1223 -------
1224 result : float64
1225 Unless input is a float array, in which case use the same
1226 precision as the input array.
1227
1228 Examples
1229 --------
1230 >>> from pandas.core import nanops
1231 >>> s = pd.Series([1, np.nan, 1, 2])
1232 >>> nanops.nanskew(s)
1233 1.7320508075688787
1234 """
1235 # error: Incompatible types in assignment (expression has type "Union[Any,
1236 # Union[ExtensionArray, ndarray]]", variable has type "ndarray")
1237 values = extract_array(values, extract_numpy=True) # type: ignore[assignment]
1238 mask = _maybe_get_mask(values, skipna, mask)
1239 if not is_float_dtype(values.dtype):
1240 values = values.astype("f8")
1241 count = _get_counts(values.shape, mask, axis)
1242 else:
1243 count = _get_counts(values.shape, mask, axis, dtype=values.dtype)
1244
1245 if skipna and mask is not None:
1246 values = values.copy()
1247 np.putmask(values, mask, 0)
1248 elif not skipna and mask is not None and mask.any():
1249 return np.nan
1250
1251 mean = values.sum(axis, dtype=np.float64) / count
1252 if axis is not None:
1253 mean = np.expand_dims(mean, axis)
1254
1255 adjusted = values - mean
1256 if skipna and mask is not None:
1257 np.putmask(adjusted, mask, 0)
1258 adjusted2 = adjusted**2
1259 adjusted3 = adjusted2 * adjusted
1260 m2 = adjusted2.sum(axis, dtype=np.float64)
1261 m3 = adjusted3.sum(axis, dtype=np.float64)
1262
1263 # floating point error
1264 #
1265 # #18044 in _libs/windows.pyx calc_skew follow this behavior
1266 # to fix the fperr to treat m2 <1e-14 as zero
1267 m2 = _zero_out_fperr(m2)
1268 m3 = _zero_out_fperr(m3)
1269
1270 with np.errstate(invalid="ignore", divide="ignore"):
1271 result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2**1.5)
1272
1273 dtype = values.dtype
1274 if is_float_dtype(dtype):
1275 result = result.astype(dtype, copy=False)
1276
1277 if isinstance(result, np.ndarray):
1278 result = np.where(m2 == 0, 0, result)
1279 result[count < 3] = np.nan
1280 else:
1281 result = 0 if m2 == 0 else result
1282 if count < 3:
1283 return np.nan
1284
1285 return result
1286
1287
1288@disallow("M8", "m8")
1289@maybe_operate_rowwise
1290def nankurt(
1291 values: np.ndarray,
1292 *,
1293 axis: AxisInt | None = None,
1294 skipna: bool = True,
1295 mask: npt.NDArray[np.bool_] | None = None,
1296) -> float:
1297 """
1298 Compute the sample excess kurtosis
1299
1300 The statistic computed here is the adjusted Fisher-Pearson standardized
1301 moment coefficient G2, computed directly from the second and fourth
1302 central moment.
1303
1304 Parameters
1305 ----------
1306 values : ndarray
1307 axis : int, optional
1308 skipna : bool, default True
1309 mask : ndarray[bool], optional
1310 nan-mask if known
1311
1312 Returns
1313 -------
1314 result : float64
1315 Unless input is a float array, in which case use the same
1316 precision as the input array.
1317
1318 Examples
1319 --------
1320 >>> from pandas.core import nanops
1321 >>> s = pd.Series([1, np.nan, 1, 3, 2])
1322 >>> nanops.nankurt(s)
1323 -1.2892561983471076
1324 """
1325 # error: Incompatible types in assignment (expression has type "Union[Any,
1326 # Union[ExtensionArray, ndarray]]", variable has type "ndarray")
1327 values = extract_array(values, extract_numpy=True) # type: ignore[assignment]
1328 mask = _maybe_get_mask(values, skipna, mask)
1329 if not is_float_dtype(values.dtype):
1330 values = values.astype("f8")
1331 count = _get_counts(values.shape, mask, axis)
1332 else:
1333 count = _get_counts(values.shape, mask, axis, dtype=values.dtype)
1334
1335 if skipna and mask is not None:
1336 values = values.copy()
1337 np.putmask(values, mask, 0)
1338 elif not skipna and mask is not None and mask.any():
1339 return np.nan
1340
1341 mean = values.sum(axis, dtype=np.float64) / count
1342 if axis is not None:
1343 mean = np.expand_dims(mean, axis)
1344
1345 adjusted = values - mean
1346 if skipna and mask is not None:
1347 np.putmask(adjusted, mask, 0)
1348 adjusted2 = adjusted**2
1349 adjusted4 = adjusted2**2
1350 m2 = adjusted2.sum(axis, dtype=np.float64)
1351 m4 = adjusted4.sum(axis, dtype=np.float64)
1352
1353 with np.errstate(invalid="ignore", divide="ignore"):
1354 adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3))
1355 numerator = count * (count + 1) * (count - 1) * m4
1356 denominator = (count - 2) * (count - 3) * m2**2
1357
1358 # floating point error
1359 #
1360 # #18044 in _libs/windows.pyx calc_kurt follow this behavior
1361 # to fix the fperr to treat denom <1e-14 as zero
1362 numerator = _zero_out_fperr(numerator)
1363 denominator = _zero_out_fperr(denominator)
1364
1365 if not isinstance(denominator, np.ndarray):
1366 # if ``denom`` is a scalar, check these corner cases first before
1367 # doing division
1368 if count < 4:
1369 return np.nan
1370 if denominator == 0:
1371 return 0
1372
1373 with np.errstate(invalid="ignore", divide="ignore"):
1374 result = numerator / denominator - adj
1375
1376 dtype = values.dtype
1377 if is_float_dtype(dtype):
1378 result = result.astype(dtype, copy=False)
1379
1380 if isinstance(result, np.ndarray):
1381 result = np.where(denominator == 0, 0, result)
1382 result[count < 4] = np.nan
1383
1384 return result
1385
1386
1387@disallow("M8", "m8")
1388@maybe_operate_rowwise
1389def nanprod(
1390 values: np.ndarray,
1391 *,
1392 axis: AxisInt | None = None,
1393 skipna: bool = True,
1394 min_count: int = 0,
1395 mask: npt.NDArray[np.bool_] | None = None,
1396) -> float:
1397 """
1398 Parameters
1399 ----------
1400 values : ndarray[dtype]
1401 axis : int, optional
1402 skipna : bool, default True
1403 min_count: int, default 0
1404 mask : ndarray[bool], optional
1405 nan-mask if known
1406
1407 Returns
1408 -------
1409 Dtype
1410 The product of all elements on a given axis. ( NaNs are treated as 1)
1411
1412 Examples
1413 --------
1414 >>> from pandas.core import nanops
1415 >>> s = pd.Series([1, 2, 3, np.nan])
1416 >>> nanops.nanprod(s)
1417 6.0
1418 """
1419 mask = _maybe_get_mask(values, skipna, mask)
1420
1421 if skipna and mask is not None:
1422 values = values.copy()
1423 values[mask] = 1
1424 result = values.prod(axis)
1425 # error: Incompatible return value type (got "Union[ndarray, float]", expected
1426 # "float")
1427 return _maybe_null_out( # type: ignore[return-value]
1428 result, axis, mask, values.shape, min_count=min_count
1429 )
1430
1431
1432def _maybe_arg_null_out(
1433 result: np.ndarray,
1434 axis: AxisInt | None,
1435 mask: npt.NDArray[np.bool_] | None,
1436 skipna: bool,
1437) -> np.ndarray | int:
1438 # helper function for nanargmin/nanargmax
1439 if mask is None:
1440 return result
1441
1442 if axis is None or not getattr(result, "ndim", False):
1443 if skipna:
1444 if mask.all():
1445 return -1
1446 else:
1447 if mask.any():
1448 return -1
1449 else:
1450 if skipna:
1451 na_mask = mask.all(axis)
1452 else:
1453 na_mask = mask.any(axis)
1454 if na_mask.any():
1455 result[na_mask] = -1
1456 return result
1457
1458
1459def _get_counts(
1460 values_shape: Shape,
1461 mask: npt.NDArray[np.bool_] | None,
1462 axis: AxisInt | None,
1463 dtype: np.dtype = np.dtype(np.float64),
1464) -> float | np.ndarray:
1465 """
1466 Get the count of non-null values along an axis
1467
1468 Parameters
1469 ----------
1470 values_shape : tuple of int
1471 shape tuple from values ndarray, used if mask is None
1472 mask : Optional[ndarray[bool]]
1473 locations in values that should be considered missing
1474 axis : Optional[int]
1475 axis to count along
1476 dtype : type, optional
1477 type to use for count
1478
1479 Returns
1480 -------
1481 count : scalar or array
1482 """
1483 if axis is None:
1484 if mask is not None:
1485 n = mask.size - mask.sum()
1486 else:
1487 n = np.prod(values_shape)
1488 return dtype.type(n)
1489
1490 if mask is not None:
1491 count = mask.shape[axis] - mask.sum(axis)
1492 else:
1493 count = values_shape[axis]
1494
1495 if is_scalar(count):
1496 return dtype.type(count)
1497 return count.astype(dtype, copy=False)
1498
1499
1500def _maybe_null_out(
1501 result: np.ndarray | float | NaTType,
1502 axis: AxisInt | None,
1503 mask: npt.NDArray[np.bool_] | None,
1504 shape: tuple[int, ...],
1505 min_count: int = 1,
1506) -> np.ndarray | float | NaTType:
1507 """
1508 Returns
1509 -------
1510 Dtype
1511 The product of all elements on a given axis. ( NaNs are treated as 1)
1512 """
1513 if mask is None and min_count == 0:
1514 # nothing to check; short-circuit
1515 return result
1516
1517 if axis is not None and isinstance(result, np.ndarray):
1518 if mask is not None:
1519 null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0
1520 else:
1521 # we have no nulls, kept mask=None in _maybe_get_mask
1522 below_count = shape[axis] - min_count < 0
1523 new_shape = shape[:axis] + shape[axis + 1 :]
1524 null_mask = np.broadcast_to(below_count, new_shape)
1525
1526 if np.any(null_mask):
1527 if is_numeric_dtype(result):
1528 if np.iscomplexobj(result):
1529 result = result.astype("c16")
1530 elif not is_float_dtype(result):
1531 result = result.astype("f8", copy=False)
1532 result[null_mask] = np.nan
1533 else:
1534 # GH12941, use None to auto cast null
1535 result[null_mask] = None
1536 elif result is not NaT:
1537 if check_below_min_count(shape, mask, min_count):
1538 result_dtype = getattr(result, "dtype", None)
1539 if is_float_dtype(result_dtype):
1540 # error: Item "None" of "Optional[Any]" has no attribute "type"
1541 result = result_dtype.type("nan") # type: ignore[union-attr]
1542 else:
1543 result = np.nan
1544
1545 return result
1546
1547
1548def check_below_min_count(
1549 shape: tuple[int, ...], mask: npt.NDArray[np.bool_] | None, min_count: int
1550) -> bool:
1551 """
1552 Check for the `min_count` keyword. Returns True if below `min_count` (when
1553 missing value should be returned from the reduction).
1554
1555 Parameters
1556 ----------
1557 shape : tuple
1558 The shape of the values (`values.shape`).
1559 mask : ndarray[bool] or None
1560 Boolean numpy array (typically of same shape as `shape`) or None.
1561 min_count : int
1562 Keyword passed through from sum/prod call.
1563
1564 Returns
1565 -------
1566 bool
1567 """
1568 if min_count > 0:
1569 if mask is None:
1570 # no missing values, only check size
1571 non_nulls = np.prod(shape)
1572 else:
1573 non_nulls = mask.size - mask.sum()
1574 if non_nulls < min_count:
1575 return True
1576 return False
1577
1578
1579def _zero_out_fperr(arg):
1580 # #18044 reference this behavior to fix rolling skew/kurt issue
1581 if isinstance(arg, np.ndarray):
1582 with np.errstate(invalid="ignore"):
1583 return np.where(np.abs(arg) < 1e-14, 0, arg)
1584 else:
1585 return arg.dtype.type(0) if np.abs(arg) < 1e-14 else arg
1586
1587
1588@disallow("M8", "m8")
1589def nancorr(
1590 a: np.ndarray,
1591 b: np.ndarray,
1592 *,
1593 method: CorrelationMethod = "pearson",
1594 min_periods: int | None = None,
1595) -> float:
1596 """
1597 a, b: ndarrays
1598 """
1599 if len(a) != len(b):
1600 raise AssertionError("Operands to nancorr must have same size")
1601
1602 if min_periods is None:
1603 min_periods = 1
1604
1605 valid = notna(a) & notna(b)
1606 if not valid.all():
1607 a = a[valid]
1608 b = b[valid]
1609
1610 if len(a) < min_periods:
1611 return np.nan
1612
1613 f = get_corr_func(method)
1614 return f(a, b)
1615
1616
1617def get_corr_func(
1618 method: CorrelationMethod,
1619) -> Callable[[np.ndarray, np.ndarray], float]:
1620 if method == "kendall":
1621 from scipy.stats import kendalltau
1622
1623 def func(a, b):
1624 return kendalltau(a, b)[0]
1625
1626 return func
1627 elif method == "spearman":
1628 from scipy.stats import spearmanr
1629
1630 def func(a, b):
1631 return spearmanr(a, b)[0]
1632
1633 return func
1634 elif method == "pearson":
1635
1636 def func(a, b):
1637 return np.corrcoef(a, b)[0, 1]
1638
1639 return func
1640 elif callable(method):
1641 return method
1642
1643 raise ValueError(
1644 f"Unknown method '{method}', expected one of "
1645 "'kendall', 'spearman', 'pearson', or callable"
1646 )
1647
1648
1649@disallow("M8", "m8")
1650def nancov(
1651 a: np.ndarray,
1652 b: np.ndarray,
1653 *,
1654 min_periods: int | None = None,
1655 ddof: int | None = 1,
1656) -> float:
1657 if len(a) != len(b):
1658 raise AssertionError("Operands to nancov must have same size")
1659
1660 if min_periods is None:
1661 min_periods = 1
1662
1663 valid = notna(a) & notna(b)
1664 if not valid.all():
1665 a = a[valid]
1666 b = b[valid]
1667
1668 if len(a) < min_periods:
1669 return np.nan
1670
1671 return np.cov(a, b, ddof=ddof)[0, 1]
1672
1673
1674def _ensure_numeric(x):
1675 if isinstance(x, np.ndarray):
1676 if is_integer_dtype(x) or is_bool_dtype(x):
1677 x = x.astype(np.float64)
1678 elif is_object_dtype(x):
1679 try:
1680 x = x.astype(np.complex128)
1681 except (TypeError, ValueError):
1682 try:
1683 x = x.astype(np.float64)
1684 except ValueError as err:
1685 # GH#29941 we get here with object arrays containing strs
1686 raise TypeError(f"Could not convert {x} to numeric") from err
1687 else:
1688 if not np.any(np.imag(x)):
1689 x = x.real
1690 elif not (is_float(x) or is_integer(x) or is_complex(x)):
1691 try:
1692 x = float(x)
1693 except (TypeError, ValueError):
1694 # e.g. "1+1j" or "foo"
1695 try:
1696 x = complex(x)
1697 except ValueError as err:
1698 # e.g. "foo"
1699 raise TypeError(f"Could not convert {x} to numeric") from err
1700 return x
1701
1702
1703# NA-friendly array comparisons
1704
1705
1706def make_nancomp(op):
1707 def f(x, y):
1708 xmask = isna(x)
1709 ymask = isna(y)
1710 mask = xmask | ymask
1711
1712 with np.errstate(all="ignore"):
1713 result = op(x, y)
1714
1715 if mask.any():
1716 if is_bool_dtype(result):
1717 result = result.astype("O")
1718 np.putmask(result, mask, np.nan)
1719
1720 return result
1721
1722 return f
1723
1724
1725nangt = make_nancomp(operator.gt)
1726nange = make_nancomp(operator.ge)
1727nanlt = make_nancomp(operator.lt)
1728nanle = make_nancomp(operator.le)
1729naneq = make_nancomp(operator.eq)
1730nanne = make_nancomp(operator.ne)
1731
1732
1733def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike:
1734 """
1735 Cumulative function with skipna support.
1736
1737 Parameters
1738 ----------
1739 values : np.ndarray or ExtensionArray
1740 accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minimum.accumulate}
1741 skipna : bool
1742
1743 Returns
1744 -------
1745 np.ndarray or ExtensionArray
1746 """
1747 mask_a, mask_b = {
1748 np.cumprod: (1.0, np.nan),
1749 np.maximum.accumulate: (-np.inf, np.nan),
1750 np.cumsum: (0.0, np.nan),
1751 np.minimum.accumulate: (np.inf, np.nan),
1752 }[accum_func]
1753
1754 # This should go through ea interface
1755 assert values.dtype.kind not in ["m", "M"]
1756
1757 # We will be applying this function to block values
1758 if skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)):
1759 vals = values.copy()
1760 mask = isna(vals)
1761 vals[mask] = mask_a
1762 result = accum_func(vals, axis=0)
1763 result[mask] = mask_b
1764 else:
1765 result = accum_func(values, axis=0)
1766
1767 return result