Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/numpy/lib/_nanfunctions_impl.py: 19%
332 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-09 06:12 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-09 06:12 +0000
1"""
2Functions that ignore NaN.
4Functions
5---------
7- `nanmin` -- minimum non-NaN value
8- `nanmax` -- maximum non-NaN value
9- `nanargmin` -- index of minimum non-NaN value
10- `nanargmax` -- index of maximum non-NaN value
11- `nansum` -- sum of non-NaN values
12- `nanprod` -- product of non-NaN values
13- `nancumsum` -- cumulative sum of non-NaN values
14- `nancumprod` -- cumulative product of non-NaN values
15- `nanmean` -- mean of non-NaN values
16- `nanvar` -- variance of non-NaN values
17- `nanstd` -- standard deviation of non-NaN values
18- `nanmedian` -- median of non-NaN values
19- `nanquantile` -- qth quantile of non-NaN values
20- `nanpercentile` -- qth percentile of non-NaN values
22"""
23import functools
24import warnings
25import numpy as np
26import numpy._core.numeric as _nx
27from numpy.lib import _function_base_impl as fnb
28from numpy.lib._function_base_impl import _weights_are_valid
29from numpy._core import overrides
32array_function_dispatch = functools.partial(
33 overrides.array_function_dispatch, module='numpy')
36__all__ = [
37 'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean',
38 'nanmedian', 'nanpercentile', 'nanvar', 'nanstd', 'nanprod',
39 'nancumsum', 'nancumprod', 'nanquantile'
40 ]
43def _nan_mask(a, out=None):
44 """
45 Parameters
46 ----------
47 a : array-like
48 Input array with at least 1 dimension.
49 out : ndarray, optional
50 Alternate output array in which to place the result. The default
51 is ``None``; if provided, it must have the same shape as the
52 expected output and will prevent the allocation of a new array.
54 Returns
55 -------
56 y : bool ndarray or True
57 A bool array where ``np.nan`` positions are marked with ``False``
58 and other positions are marked with ``True``. If the type of ``a``
59 is such that it can't possibly contain ``np.nan``, returns ``True``.
60 """
61 # we assume that a is an array for this private function
63 if a.dtype.kind not in 'fc':
64 return True
66 y = np.isnan(a, out=out)
67 y = np.invert(y, out=y)
68 return y
70def _replace_nan(a, val):
71 """
72 If `a` is of inexact type, make a copy of `a`, replace NaNs with
73 the `val` value, and return the copy together with a boolean mask
74 marking the locations where NaNs were present. If `a` is not of
75 inexact type, do nothing and return `a` together with a mask of None.
77 Note that scalars will end up as array scalars, which is important
78 for using the result as the value of the out argument in some
79 operations.
81 Parameters
82 ----------
83 a : array-like
84 Input array.
85 val : float
86 NaN values are set to val before doing the operation.
88 Returns
89 -------
90 y : ndarray
91 If `a` is of inexact type, return a copy of `a` with the NaNs
92 replaced by the fill value, otherwise return `a`.
93 mask: {bool, None}
94 If `a` is of inexact type, return a boolean mask marking locations of
95 NaNs, otherwise return None.
97 """
98 a = np.asanyarray(a)
100 if a.dtype == np.object_:
101 # object arrays do not support `isnan` (gh-9009), so make a guess
102 mask = np.not_equal(a, a, dtype=bool)
103 elif issubclass(a.dtype.type, np.inexact):
104 mask = np.isnan(a)
105 else:
106 mask = None
108 if mask is not None:
109 a = np.array(a, subok=True, copy=True)
110 np.copyto(a, val, where=mask)
112 return a, mask
115def _copyto(a, val, mask):
116 """
117 Replace values in `a` with NaN where `mask` is True. This differs from
118 copyto in that it will deal with the case where `a` is a numpy scalar.
120 Parameters
121 ----------
122 a : ndarray or numpy scalar
123 Array or numpy scalar some of whose values are to be replaced
124 by val.
125 val : numpy scalar
126 Value used a replacement.
127 mask : ndarray, scalar
128 Boolean array. Where True the corresponding element of `a` is
129 replaced by `val`. Broadcasts.
131 Returns
132 -------
133 res : ndarray, scalar
134 Array with elements replaced or scalar `val`.
136 """
137 if isinstance(a, np.ndarray):
138 np.copyto(a, val, where=mask, casting='unsafe')
139 else:
140 a = a.dtype.type(val)
141 return a
144def _remove_nan_1d(arr1d, overwrite_input=False):
145 """
146 Equivalent to arr1d[~arr1d.isnan()], but in a different order
148 Presumably faster as it incurs fewer copies
150 Parameters
151 ----------
152 arr1d : ndarray
153 Array to remove nans from
154 overwrite_input : bool
155 True if `arr1d` can be modified in place
157 Returns
158 -------
159 res : ndarray
160 Array with nan elements removed
161 overwrite_input : bool
162 True if `res` can be modified in place, given the constraint on the
163 input
164 """
165 if arr1d.dtype == object:
166 # object arrays do not support `isnan` (gh-9009), so make a guess
167 c = np.not_equal(arr1d, arr1d, dtype=bool)
168 else:
169 c = np.isnan(arr1d)
171 s = np.nonzero(c)[0]
172 if s.size == arr1d.size:
173 warnings.warn("All-NaN slice encountered", RuntimeWarning,
174 stacklevel=6)
175 return arr1d[:0], True
176 elif s.size == 0:
177 return arr1d, overwrite_input
178 else:
179 if not overwrite_input:
180 arr1d = arr1d.copy()
181 # select non-nans at end of array
182 enonan = arr1d[-s.size:][~c[-s.size:]]
183 # fill nans in beginning of array with non-nans of end
184 arr1d[s[:enonan.size]] = enonan
186 return arr1d[:-s.size], True
189def _divide_by_count(a, b, out=None):
190 """
191 Compute a/b ignoring invalid results. If `a` is an array the division
192 is done in place. If `a` is a scalar, then its type is preserved in the
193 output. If out is None, then a is used instead so that the division
194 is in place. Note that this is only called with `a` an inexact type.
196 Parameters
197 ----------
198 a : {ndarray, numpy scalar}
199 Numerator. Expected to be of inexact type but not checked.
200 b : {ndarray, numpy scalar}
201 Denominator.
202 out : ndarray, optional
203 Alternate output array in which to place the result. The default
204 is ``None``; if provided, it must have the same shape as the
205 expected output, but the type will be cast if necessary.
207 Returns
208 -------
209 ret : {ndarray, numpy scalar}
210 The return value is a/b. If `a` was an ndarray the division is done
211 in place. If `a` is a numpy scalar, the division preserves its type.
213 """
214 with np.errstate(invalid='ignore', divide='ignore'):
215 if isinstance(a, np.ndarray):
216 if out is None:
217 return np.divide(a, b, out=a, casting='unsafe')
218 else:
219 return np.divide(a, b, out=out, casting='unsafe')
220 else:
221 if out is None:
222 # Precaution against reduced object arrays
223 try:
224 return a.dtype.type(a / b)
225 except AttributeError:
226 return a / b
227 else:
228 # This is questionable, but currently a numpy scalar can
229 # be output to a zero dimensional array.
230 return np.divide(a, b, out=out, casting='unsafe')
233def _nanmin_dispatcher(a, axis=None, out=None, keepdims=None,
234 initial=None, where=None):
235 return (a, out)
238@array_function_dispatch(_nanmin_dispatcher)
239def nanmin(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue,
240 where=np._NoValue):
241 """
242 Return minimum of an array or minimum along an axis, ignoring any NaNs.
243 When all-NaN slices are encountered a ``RuntimeWarning`` is raised and
244 Nan is returned for that slice.
246 Parameters
247 ----------
248 a : array_like
249 Array containing numbers whose minimum is desired. If `a` is not an
250 array, a conversion is attempted.
251 axis : {int, tuple of int, None}, optional
252 Axis or axes along which the minimum is computed. The default is to compute
253 the minimum of the flattened array.
254 out : ndarray, optional
255 Alternate output array in which to place the result. The default
256 is ``None``; if provided, it must have the same shape as the
257 expected output, but the type will be cast if necessary. See
258 :ref:`ufuncs-output-type` for more details.
260 .. versionadded:: 1.8.0
261 keepdims : bool, optional
262 If this is set to True, the axes which are reduced are left
263 in the result as dimensions with size one. With this option,
264 the result will broadcast correctly against the original `a`.
266 If the value is anything but the default, then
267 `keepdims` will be passed through to the `min` method
268 of sub-classes of `ndarray`. If the sub-classes methods
269 does not implement `keepdims` any exceptions will be raised.
271 .. versionadded:: 1.8.0
272 initial : scalar, optional
273 The maximum value of an output element. Must be present to allow
274 computation on empty slice. See `~numpy.ufunc.reduce` for details.
276 .. versionadded:: 1.22.0
277 where : array_like of bool, optional
278 Elements to compare for the minimum. See `~numpy.ufunc.reduce`
279 for details.
281 .. versionadded:: 1.22.0
283 Returns
284 -------
285 nanmin : ndarray
286 An array with the same shape as `a`, with the specified axis
287 removed. If `a` is a 0-d array, or if axis is None, an ndarray
288 scalar is returned. The same dtype as `a` is returned.
290 See Also
291 --------
292 nanmax :
293 The maximum value of an array along a given axis, ignoring any NaNs.
294 amin :
295 The minimum value of an array along a given axis, propagating any NaNs.
296 fmin :
297 Element-wise minimum of two arrays, ignoring any NaNs.
298 minimum :
299 Element-wise minimum of two arrays, propagating any NaNs.
300 isnan :
301 Shows which elements are Not a Number (NaN).
302 isfinite:
303 Shows which elements are neither NaN nor infinity.
305 amax, fmax, maximum
307 Notes
308 -----
309 NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic
310 (IEEE 754). This means that Not a Number is not equivalent to infinity.
311 Positive infinity is treated as a very large number and negative
312 infinity is treated as a very small (i.e. negative) number.
314 If the input has a integer type the function is equivalent to np.min.
316 Examples
317 --------
318 >>> a = np.array([[1, 2], [3, np.nan]])
319 >>> np.nanmin(a)
320 1.0
321 >>> np.nanmin(a, axis=0)
322 array([1., 2.])
323 >>> np.nanmin(a, axis=1)
324 array([1., 3.])
326 When positive infinity and negative infinity are present:
328 >>> np.nanmin([1, 2, np.nan, np.inf])
329 1.0
330 >>> np.nanmin([1, 2, np.nan, -np.inf])
331 -inf
333 """
334 kwargs = {}
335 if keepdims is not np._NoValue:
336 kwargs['keepdims'] = keepdims
337 if initial is not np._NoValue:
338 kwargs['initial'] = initial
339 if where is not np._NoValue:
340 kwargs['where'] = where
342 if type(a) is np.ndarray and a.dtype != np.object_:
343 # Fast, but not safe for subclasses of ndarray, or object arrays,
344 # which do not implement isnan (gh-9009), or fmin correctly (gh-8975)
345 res = np.fmin.reduce(a, axis=axis, out=out, **kwargs)
346 if np.isnan(res).any():
347 warnings.warn("All-NaN slice encountered", RuntimeWarning,
348 stacklevel=2)
349 else:
350 # Slow, but safe for subclasses of ndarray
351 a, mask = _replace_nan(a, +np.inf)
352 res = np.amin(a, axis=axis, out=out, **kwargs)
353 if mask is None:
354 return res
356 # Check for all-NaN axis
357 kwargs.pop("initial", None)
358 mask = np.all(mask, axis=axis, **kwargs)
359 if np.any(mask):
360 res = _copyto(res, np.nan, mask)
361 warnings.warn("All-NaN axis encountered", RuntimeWarning,
362 stacklevel=2)
363 return res
366def _nanmax_dispatcher(a, axis=None, out=None, keepdims=None,
367 initial=None, where=None):
368 return (a, out)
371@array_function_dispatch(_nanmax_dispatcher)
372def nanmax(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue,
373 where=np._NoValue):
374 """
375 Return the maximum of an array or maximum along an axis, ignoring any
376 NaNs. When all-NaN slices are encountered a ``RuntimeWarning`` is
377 raised and NaN is returned for that slice.
379 Parameters
380 ----------
381 a : array_like
382 Array containing numbers whose maximum is desired. If `a` is not an
383 array, a conversion is attempted.
384 axis : {int, tuple of int, None}, optional
385 Axis or axes along which the maximum is computed. The default is to compute
386 the maximum of the flattened array.
387 out : ndarray, optional
388 Alternate output array in which to place the result. The default
389 is ``None``; if provided, it must have the same shape as the
390 expected output, but the type will be cast if necessary. See
391 :ref:`ufuncs-output-type` for more details.
393 .. versionadded:: 1.8.0
394 keepdims : bool, optional
395 If this is set to True, the axes which are reduced are left
396 in the result as dimensions with size one. With this option,
397 the result will broadcast correctly against the original `a`.
399 If the value is anything but the default, then
400 `keepdims` will be passed through to the `max` method
401 of sub-classes of `ndarray`. If the sub-classes methods
402 does not implement `keepdims` any exceptions will be raised.
404 .. versionadded:: 1.8.0
405 initial : scalar, optional
406 The minimum value of an output element. Must be present to allow
407 computation on empty slice. See `~numpy.ufunc.reduce` for details.
409 .. versionadded:: 1.22.0
410 where : array_like of bool, optional
411 Elements to compare for the maximum. See `~numpy.ufunc.reduce`
412 for details.
414 .. versionadded:: 1.22.0
416 Returns
417 -------
418 nanmax : ndarray
419 An array with the same shape as `a`, with the specified axis removed.
420 If `a` is a 0-d array, or if axis is None, an ndarray scalar is
421 returned. The same dtype as `a` is returned.
423 See Also
424 --------
425 nanmin :
426 The minimum value of an array along a given axis, ignoring any NaNs.
427 amax :
428 The maximum value of an array along a given axis, propagating any NaNs.
429 fmax :
430 Element-wise maximum of two arrays, ignoring any NaNs.
431 maximum :
432 Element-wise maximum of two arrays, propagating any NaNs.
433 isnan :
434 Shows which elements are Not a Number (NaN).
435 isfinite:
436 Shows which elements are neither NaN nor infinity.
438 amin, fmin, minimum
440 Notes
441 -----
442 NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic
443 (IEEE 754). This means that Not a Number is not equivalent to infinity.
444 Positive infinity is treated as a very large number and negative
445 infinity is treated as a very small (i.e. negative) number.
447 If the input has a integer type the function is equivalent to np.max.
449 Examples
450 --------
451 >>> a = np.array([[1, 2], [3, np.nan]])
452 >>> np.nanmax(a)
453 3.0
454 >>> np.nanmax(a, axis=0)
455 array([3., 2.])
456 >>> np.nanmax(a, axis=1)
457 array([2., 3.])
459 When positive infinity and negative infinity are present:
461 >>> np.nanmax([1, 2, np.nan, -np.inf])
462 2.0
463 >>> np.nanmax([1, 2, np.nan, np.inf])
464 inf
466 """
467 kwargs = {}
468 if keepdims is not np._NoValue:
469 kwargs['keepdims'] = keepdims
470 if initial is not np._NoValue:
471 kwargs['initial'] = initial
472 if where is not np._NoValue:
473 kwargs['where'] = where
475 if type(a) is np.ndarray and a.dtype != np.object_:
476 # Fast, but not safe for subclasses of ndarray, or object arrays,
477 # which do not implement isnan (gh-9009), or fmax correctly (gh-8975)
478 res = np.fmax.reduce(a, axis=axis, out=out, **kwargs)
479 if np.isnan(res).any():
480 warnings.warn("All-NaN slice encountered", RuntimeWarning,
481 stacklevel=2)
482 else:
483 # Slow, but safe for subclasses of ndarray
484 a, mask = _replace_nan(a, -np.inf)
485 res = np.amax(a, axis=axis, out=out, **kwargs)
486 if mask is None:
487 return res
489 # Check for all-NaN axis
490 kwargs.pop("initial", None)
491 mask = np.all(mask, axis=axis, **kwargs)
492 if np.any(mask):
493 res = _copyto(res, np.nan, mask)
494 warnings.warn("All-NaN axis encountered", RuntimeWarning,
495 stacklevel=2)
496 return res
499def _nanargmin_dispatcher(a, axis=None, out=None, *, keepdims=None):
500 return (a,)
503@array_function_dispatch(_nanargmin_dispatcher)
504def nanargmin(a, axis=None, out=None, *, keepdims=np._NoValue):
505 """
506 Return the indices of the minimum values in the specified axis ignoring
507 NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the results
508 cannot be trusted if a slice contains only NaNs and Infs.
510 Parameters
511 ----------
512 a : array_like
513 Input data.
514 axis : int, optional
515 Axis along which to operate. By default flattened input is used.
516 out : array, optional
517 If provided, the result will be inserted into this array. It should
518 be of the appropriate shape and dtype.
520 .. versionadded:: 1.22.0
521 keepdims : bool, optional
522 If this is set to True, the axes which are reduced are left
523 in the result as dimensions with size one. With this option,
524 the result will broadcast correctly against the array.
526 .. versionadded:: 1.22.0
528 Returns
529 -------
530 index_array : ndarray
531 An array of indices or a single index value.
533 See Also
534 --------
535 argmin, nanargmax
537 Examples
538 --------
539 >>> a = np.array([[np.nan, 4], [2, 3]])
540 >>> np.argmin(a)
541 0
542 >>> np.nanargmin(a)
543 2
544 >>> np.nanargmin(a, axis=0)
545 array([1, 1])
546 >>> np.nanargmin(a, axis=1)
547 array([1, 0])
549 """
550 a, mask = _replace_nan(a, np.inf)
551 if mask is not None and mask.size:
552 mask = np.all(mask, axis=axis)
553 if np.any(mask):
554 raise ValueError("All-NaN slice encountered")
555 res = np.argmin(a, axis=axis, out=out, keepdims=keepdims)
556 return res
559def _nanargmax_dispatcher(a, axis=None, out=None, *, keepdims=None):
560 return (a,)
563@array_function_dispatch(_nanargmax_dispatcher)
564def nanargmax(a, axis=None, out=None, *, keepdims=np._NoValue):
565 """
566 Return the indices of the maximum values in the specified axis ignoring
567 NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the
568 results cannot be trusted if a slice contains only NaNs and -Infs.
571 Parameters
572 ----------
573 a : array_like
574 Input data.
575 axis : int, optional
576 Axis along which to operate. By default flattened input is used.
577 out : array, optional
578 If provided, the result will be inserted into this array. It should
579 be of the appropriate shape and dtype.
581 .. versionadded:: 1.22.0
582 keepdims : bool, optional
583 If this is set to True, the axes which are reduced are left
584 in the result as dimensions with size one. With this option,
585 the result will broadcast correctly against the array.
587 .. versionadded:: 1.22.0
589 Returns
590 -------
591 index_array : ndarray
592 An array of indices or a single index value.
594 See Also
595 --------
596 argmax, nanargmin
598 Examples
599 --------
600 >>> a = np.array([[np.nan, 4], [2, 3]])
601 >>> np.argmax(a)
602 0
603 >>> np.nanargmax(a)
604 1
605 >>> np.nanargmax(a, axis=0)
606 array([1, 0])
607 >>> np.nanargmax(a, axis=1)
608 array([1, 1])
610 """
611 a, mask = _replace_nan(a, -np.inf)
612 if mask is not None and mask.size:
613 mask = np.all(mask, axis=axis)
614 if np.any(mask):
615 raise ValueError("All-NaN slice encountered")
616 res = np.argmax(a, axis=axis, out=out, keepdims=keepdims)
617 return res
620def _nansum_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None,
621 initial=None, where=None):
622 return (a, out)
625@array_function_dispatch(_nansum_dispatcher)
626def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
627 initial=np._NoValue, where=np._NoValue):
628 """
629 Return the sum of array elements over a given axis treating Not a
630 Numbers (NaNs) as zero.
632 In NumPy versions <= 1.9.0 Nan is returned for slices that are all-NaN or
633 empty. In later versions zero is returned.
635 Parameters
636 ----------
637 a : array_like
638 Array containing numbers whose sum is desired. If `a` is not an
639 array, a conversion is attempted.
640 axis : {int, tuple of int, None}, optional
641 Axis or axes along which the sum is computed. The default is to compute the
642 sum of the flattened array.
643 dtype : data-type, optional
644 The type of the returned array and of the accumulator in which the
645 elements are summed. By default, the dtype of `a` is used. An
646 exception is when `a` has an integer type with less precision than
647 the platform (u)intp. In that case, the default will be either
648 (u)int32 or (u)int64 depending on whether the platform is 32 or 64
649 bits. For inexact inputs, dtype must be inexact.
651 .. versionadded:: 1.8.0
652 out : ndarray, optional
653 Alternate output array in which to place the result. The default
654 is ``None``. If provided, it must have the same shape as the
655 expected output, but the type will be cast if necessary. See
656 :ref:`ufuncs-output-type` for more details. The casting of NaN to integer
657 can yield unexpected results.
659 .. versionadded:: 1.8.0
660 keepdims : bool, optional
661 If this is set to True, the axes which are reduced are left
662 in the result as dimensions with size one. With this option,
663 the result will broadcast correctly against the original `a`.
666 If the value is anything but the default, then
667 `keepdims` will be passed through to the `mean` or `sum` methods
668 of sub-classes of `ndarray`. If the sub-classes methods
669 does not implement `keepdims` any exceptions will be raised.
671 .. versionadded:: 1.8.0
672 initial : scalar, optional
673 Starting value for the sum. See `~numpy.ufunc.reduce` for details.
675 .. versionadded:: 1.22.0
676 where : array_like of bool, optional
677 Elements to include in the sum. See `~numpy.ufunc.reduce` for details.
679 .. versionadded:: 1.22.0
681 Returns
682 -------
683 nansum : ndarray.
684 A new array holding the result is returned unless `out` is
685 specified, in which it is returned. The result has the same
686 size as `a`, and the same shape as `a` if `axis` is not None
687 or `a` is a 1-d array.
689 See Also
690 --------
691 numpy.sum : Sum across array propagating NaNs.
692 isnan : Show which elements are NaN.
693 isfinite : Show which elements are not NaN or +/-inf.
695 Notes
696 -----
697 If both positive and negative infinity are present, the sum will be Not
698 A Number (NaN).
700 Examples
701 --------
702 >>> np.nansum(1)
703 1
704 >>> np.nansum([1])
705 1
706 >>> np.nansum([1, np.nan])
707 1.0
708 >>> a = np.array([[1, 1], [1, np.nan]])
709 >>> np.nansum(a)
710 3.0
711 >>> np.nansum(a, axis=0)
712 array([2., 1.])
713 >>> np.nansum([1, np.nan, np.inf])
714 inf
715 >>> np.nansum([1, np.nan, -np.inf])
716 -inf
717 >>> from numpy.testing import suppress_warnings
718 >>> with np.errstate(invalid="ignore"):
719 ... np.nansum([1, np.nan, np.inf, -np.inf]) # both +/- infinity present
720 np.float64(nan)
722 """
723 a, mask = _replace_nan(a, 0)
724 return np.sum(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
725 initial=initial, where=where)
728def _nanprod_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None,
729 initial=None, where=None):
730 return (a, out)
733@array_function_dispatch(_nanprod_dispatcher)
734def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
735 initial=np._NoValue, where=np._NoValue):
736 """
737 Return the product of array elements over a given axis treating Not a
738 Numbers (NaNs) as ones.
740 One is returned for slices that are all-NaN or empty.
742 .. versionadded:: 1.10.0
744 Parameters
745 ----------
746 a : array_like
747 Array containing numbers whose product is desired. If `a` is not an
748 array, a conversion is attempted.
749 axis : {int, tuple of int, None}, optional
750 Axis or axes along which the product is computed. The default is to compute
751 the product of the flattened array.
752 dtype : data-type, optional
753 The type of the returned array and of the accumulator in which the
754 elements are summed. By default, the dtype of `a` is used. An
755 exception is when `a` has an integer type with less precision than
756 the platform (u)intp. In that case, the default will be either
757 (u)int32 or (u)int64 depending on whether the platform is 32 or 64
758 bits. For inexact inputs, dtype must be inexact.
759 out : ndarray, optional
760 Alternate output array in which to place the result. The default
761 is ``None``. If provided, it must have the same shape as the
762 expected output, but the type will be cast if necessary. See
763 :ref:`ufuncs-output-type` for more details. The casting of NaN to integer
764 can yield unexpected results.
765 keepdims : bool, optional
766 If True, the axes which are reduced are left in the result as
767 dimensions with size one. With this option, the result will
768 broadcast correctly against the original `arr`.
769 initial : scalar, optional
770 The starting value for this product. See `~numpy.ufunc.reduce`
771 for details.
773 .. versionadded:: 1.22.0
774 where : array_like of bool, optional
775 Elements to include in the product. See `~numpy.ufunc.reduce`
776 for details.
778 .. versionadded:: 1.22.0
780 Returns
781 -------
782 nanprod : ndarray
783 A new array holding the result is returned unless `out` is
784 specified, in which case it is returned.
786 See Also
787 --------
788 numpy.prod : Product across array propagating NaNs.
789 isnan : Show which elements are NaN.
791 Examples
792 --------
793 >>> np.nanprod(1)
794 1
795 >>> np.nanprod([1])
796 1
797 >>> np.nanprod([1, np.nan])
798 1.0
799 >>> a = np.array([[1, 2], [3, np.nan]])
800 >>> np.nanprod(a)
801 6.0
802 >>> np.nanprod(a, axis=0)
803 array([3., 2.])
805 """
806 a, mask = _replace_nan(a, 1)
807 return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
808 initial=initial, where=where)
811def _nancumsum_dispatcher(a, axis=None, dtype=None, out=None):
812 return (a, out)
815@array_function_dispatch(_nancumsum_dispatcher)
816def nancumsum(a, axis=None, dtype=None, out=None):
817 """
818 Return the cumulative sum of array elements over a given axis treating Not a
819 Numbers (NaNs) as zero. The cumulative sum does not change when NaNs are
820 encountered and leading NaNs are replaced by zeros.
822 Zeros are returned for slices that are all-NaN or empty.
824 .. versionadded:: 1.12.0
826 Parameters
827 ----------
828 a : array_like
829 Input array.
830 axis : int, optional
831 Axis along which the cumulative sum is computed. The default
832 (None) is to compute the cumsum over the flattened array.
833 dtype : dtype, optional
834 Type of the returned array and of the accumulator in which the
835 elements are summed. If `dtype` is not specified, it defaults
836 to the dtype of `a`, unless `a` has an integer dtype with a
837 precision less than that of the default platform integer. In
838 that case, the default platform integer is used.
839 out : ndarray, optional
840 Alternative output array in which to place the result. It must
841 have the same shape and buffer length as the expected output
842 but the type will be cast if necessary. See :ref:`ufuncs-output-type` for
843 more details.
845 Returns
846 -------
847 nancumsum : ndarray.
848 A new array holding the result is returned unless `out` is
849 specified, in which it is returned. The result has the same
850 size as `a`, and the same shape as `a` if `axis` is not None
851 or `a` is a 1-d array.
853 See Also
854 --------
855 numpy.cumsum : Cumulative sum across array propagating NaNs.
856 isnan : Show which elements are NaN.
858 Examples
859 --------
860 >>> np.nancumsum(1)
861 array([1])
862 >>> np.nancumsum([1])
863 array([1])
864 >>> np.nancumsum([1, np.nan])
865 array([1., 1.])
866 >>> a = np.array([[1, 2], [3, np.nan]])
867 >>> np.nancumsum(a)
868 array([1., 3., 6., 6.])
869 >>> np.nancumsum(a, axis=0)
870 array([[1., 2.],
871 [4., 2.]])
872 >>> np.nancumsum(a, axis=1)
873 array([[1., 3.],
874 [3., 3.]])
876 """
877 a, mask = _replace_nan(a, 0)
878 return np.cumsum(a, axis=axis, dtype=dtype, out=out)
881def _nancumprod_dispatcher(a, axis=None, dtype=None, out=None):
882 return (a, out)
885@array_function_dispatch(_nancumprod_dispatcher)
886def nancumprod(a, axis=None, dtype=None, out=None):
887 """
888 Return the cumulative product of array elements over a given axis treating Not a
889 Numbers (NaNs) as one. The cumulative product does not change when NaNs are
890 encountered and leading NaNs are replaced by ones.
892 Ones are returned for slices that are all-NaN or empty.
894 .. versionadded:: 1.12.0
896 Parameters
897 ----------
898 a : array_like
899 Input array.
900 axis : int, optional
901 Axis along which the cumulative product is computed. By default
902 the input is flattened.
903 dtype : dtype, optional
904 Type of the returned array, as well as of the accumulator in which
905 the elements are multiplied. If *dtype* is not specified, it
906 defaults to the dtype of `a`, unless `a` has an integer dtype with
907 a precision less than that of the default platform integer. In
908 that case, the default platform integer is used instead.
909 out : ndarray, optional
910 Alternative output array in which to place the result. It must
911 have the same shape and buffer length as the expected output
912 but the type of the resulting values will be cast if necessary.
914 Returns
915 -------
916 nancumprod : ndarray
917 A new array holding the result is returned unless `out` is
918 specified, in which case it is returned.
920 See Also
921 --------
922 numpy.cumprod : Cumulative product across array propagating NaNs.
923 isnan : Show which elements are NaN.
925 Examples
926 --------
927 >>> np.nancumprod(1)
928 array([1])
929 >>> np.nancumprod([1])
930 array([1])
931 >>> np.nancumprod([1, np.nan])
932 array([1., 1.])
933 >>> a = np.array([[1, 2], [3, np.nan]])
934 >>> np.nancumprod(a)
935 array([1., 2., 6., 6.])
936 >>> np.nancumprod(a, axis=0)
937 array([[1., 2.],
938 [3., 2.]])
939 >>> np.nancumprod(a, axis=1)
940 array([[1., 2.],
941 [3., 3.]])
943 """
944 a, mask = _replace_nan(a, 1)
945 return np.cumprod(a, axis=axis, dtype=dtype, out=out)
948def _nanmean_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None,
949 *, where=None):
950 return (a, out)
953@array_function_dispatch(_nanmean_dispatcher)
954def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
955 *, where=np._NoValue):
956 """
957 Compute the arithmetic mean along the specified axis, ignoring NaNs.
959 Returns the average of the array elements. The average is taken over
960 the flattened array by default, otherwise over the specified axis.
961 `float64` intermediate and return values are used for integer inputs.
963 For all-NaN slices, NaN is returned and a `RuntimeWarning` is raised.
965 .. versionadded:: 1.8.0
967 Parameters
968 ----------
969 a : array_like
970 Array containing numbers whose mean is desired. If `a` is not an
971 array, a conversion is attempted.
972 axis : {int, tuple of int, None}, optional
973 Axis or axes along which the means are computed. The default is to compute
974 the mean of the flattened array.
975 dtype : data-type, optional
976 Type to use in computing the mean. For integer inputs, the default
977 is `float64`; for inexact inputs, it is the same as the input
978 dtype.
979 out : ndarray, optional
980 Alternate output array in which to place the result. The default
981 is ``None``; if provided, it must have the same shape as the
982 expected output, but the type will be cast if necessary.
983 See :ref:`ufuncs-output-type` for more details.
984 keepdims : bool, optional
985 If this is set to True, the axes which are reduced are left
986 in the result as dimensions with size one. With this option,
987 the result will broadcast correctly against the original `a`.
989 If the value is anything but the default, then
990 `keepdims` will be passed through to the `mean` or `sum` methods
991 of sub-classes of `ndarray`. If the sub-classes methods
992 does not implement `keepdims` any exceptions will be raised.
993 where : array_like of bool, optional
994 Elements to include in the mean. See `~numpy.ufunc.reduce` for details.
996 .. versionadded:: 1.22.0
998 Returns
999 -------
1000 m : ndarray, see dtype parameter above
1001 If `out=None`, returns a new array containing the mean values,
1002 otherwise a reference to the output array is returned. Nan is
1003 returned for slices that contain only NaNs.
1005 See Also
1006 --------
1007 average : Weighted average
1008 mean : Arithmetic mean taken while not ignoring NaNs
1009 var, nanvar
1011 Notes
1012 -----
1013 The arithmetic mean is the sum of the non-NaN elements along the axis
1014 divided by the number of non-NaN elements.
1016 Note that for floating-point input, the mean is computed using the same
1017 precision the input has. Depending on the input data, this can cause
1018 the results to be inaccurate, especially for `float32`. Specifying a
1019 higher-precision accumulator using the `dtype` keyword can alleviate
1020 this issue.
1022 Examples
1023 --------
1024 >>> a = np.array([[1, np.nan], [3, 4]])
1025 >>> np.nanmean(a)
1026 2.6666666666666665
1027 >>> np.nanmean(a, axis=0)
1028 array([2., 4.])
1029 >>> np.nanmean(a, axis=1)
1030 array([1., 3.5]) # may vary
1032 """
1033 arr, mask = _replace_nan(a, 0)
1034 if mask is None:
1035 return np.mean(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
1036 where=where)
1038 if dtype is not None:
1039 dtype = np.dtype(dtype)
1040 if dtype is not None and not issubclass(dtype.type, np.inexact):
1041 raise TypeError("If a is inexact, then dtype must be inexact")
1042 if out is not None and not issubclass(out.dtype.type, np.inexact):
1043 raise TypeError("If a is inexact, then out must be inexact")
1045 cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=keepdims,
1046 where=where)
1047 tot = np.sum(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
1048 where=where)
1049 avg = _divide_by_count(tot, cnt, out=out)
1051 isbad = (cnt == 0)
1052 if isbad.any():
1053 warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=2)
1054 # NaN is the only possible bad value, so no further
1055 # action is needed to handle bad results.
1056 return avg
1059def _nanmedian1d(arr1d, overwrite_input=False):
1060 """
1061 Private function for rank 1 arrays. Compute the median ignoring NaNs.
1062 See nanmedian for parameter usage
1063 """
1064 arr1d_parsed, overwrite_input = _remove_nan_1d(
1065 arr1d, overwrite_input=overwrite_input,
1066 )
1068 if arr1d_parsed.size == 0:
1069 # Ensure that a nan-esque scalar of the appropriate type (and unit)
1070 # is returned for `timedelta64` and `complexfloating`
1071 return arr1d[-1]
1073 return np.median(arr1d_parsed, overwrite_input=overwrite_input)
1076def _nanmedian(a, axis=None, out=None, overwrite_input=False):
1077 """
1078 Private function that doesn't support extended axis or keepdims.
1079 These methods are extended to this function using _ureduce
1080 See nanmedian for parameter usage
1082 """
1083 if axis is None or a.ndim == 1:
1084 part = a.ravel()
1085 if out is None:
1086 return _nanmedian1d(part, overwrite_input)
1087 else:
1088 out[...] = _nanmedian1d(part, overwrite_input)
1089 return out
1090 else:
1091 # for small medians use sort + indexing which is still faster than
1092 # apply_along_axis
1093 # benchmarked with shuffled (50, 50, x) containing a few NaN
1094 if a.shape[axis] < 600:
1095 return _nanmedian_small(a, axis, out, overwrite_input)
1096 result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input)
1097 if out is not None:
1098 out[...] = result
1099 return result
1102def _nanmedian_small(a, axis=None, out=None, overwrite_input=False):
1103 """
1104 sort + indexing median, faster for small medians along multiple
1105 dimensions due to the high overhead of apply_along_axis
1107 see nanmedian for parameter usage
1108 """
1109 a = np.ma.masked_array(a, np.isnan(a))
1110 m = np.ma.median(a, axis=axis, overwrite_input=overwrite_input)
1111 for i in range(np.count_nonzero(m.mask.ravel())):
1112 warnings.warn("All-NaN slice encountered", RuntimeWarning,
1113 stacklevel=5)
1115 fill_value = np.timedelta64("NaT") if m.dtype.kind == "m" else np.nan
1116 if out is not None:
1117 out[...] = m.filled(fill_value)
1118 return out
1119 return m.filled(fill_value)
1122def _nanmedian_dispatcher(
1123 a, axis=None, out=None, overwrite_input=None, keepdims=None):
1124 return (a, out)
1127@array_function_dispatch(_nanmedian_dispatcher)
1128def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValue):
1129 """
1130 Compute the median along the specified axis, while ignoring NaNs.
1132 Returns the median of the array elements.
1134 .. versionadded:: 1.9.0
1136 Parameters
1137 ----------
1138 a : array_like
1139 Input array or object that can be converted to an array.
1140 axis : {int, sequence of int, None}, optional
1141 Axis or axes along which the medians are computed. The default
1142 is to compute the median along a flattened version of the array.
1143 A sequence of axes is supported since version 1.9.0.
1144 out : ndarray, optional
1145 Alternative output array in which to place the result. It must
1146 have the same shape and buffer length as the expected output,
1147 but the type (of the output) will be cast if necessary.
1148 overwrite_input : bool, optional
1149 If True, then allow use of memory of input array `a` for
1150 calculations. The input array will be modified by the call to
1151 `median`. This will save memory when you do not need to preserve
1152 the contents of the input array. Treat the input as undefined,
1153 but it will probably be fully or partially sorted. Default is
1154 False. If `overwrite_input` is ``True`` and `a` is not already an
1155 `ndarray`, an error will be raised.
1156 keepdims : bool, optional
1157 If this is set to True, the axes which are reduced are left
1158 in the result as dimensions with size one. With this option,
1159 the result will broadcast correctly against the original `a`.
1161 If this is anything but the default value it will be passed
1162 through (in the special case of an empty array) to the
1163 `mean` function of the underlying array. If the array is
1164 a sub-class and `mean` does not have the kwarg `keepdims` this
1165 will raise a RuntimeError.
1167 Returns
1168 -------
1169 median : ndarray
1170 A new array holding the result. If the input contains integers
1171 or floats smaller than ``float64``, then the output data-type is
1172 ``np.float64``. Otherwise, the data-type of the output is the
1173 same as that of the input. If `out` is specified, that array is
1174 returned instead.
1176 See Also
1177 --------
1178 mean, median, percentile
1180 Notes
1181 -----
1182 Given a vector ``V`` of length ``N``, the median of ``V`` is the
1183 middle value of a sorted copy of ``V``, ``V_sorted`` - i.e.,
1184 ``V_sorted[(N-1)/2]``, when ``N`` is odd and the average of the two
1185 middle values of ``V_sorted`` when ``N`` is even.
1187 Examples
1188 --------
1189 >>> a = np.array([[10.0, 7, 4], [3, 2, 1]])
1190 >>> a[0, 1] = np.nan
1191 >>> a
1192 array([[10., nan, 4.],
1193 [ 3., 2., 1.]])
1194 >>> np.median(a)
1195 np.float64(nan)
1196 >>> np.nanmedian(a)
1197 3.0
1198 >>> np.nanmedian(a, axis=0)
1199 array([6.5, 2. , 2.5])
1200 >>> np.median(a, axis=1)
1201 array([nan, 2.])
1202 >>> b = a.copy()
1203 >>> np.nanmedian(b, axis=1, overwrite_input=True)
1204 array([7., 2.])
1205 >>> assert not np.all(a==b)
1206 >>> b = a.copy()
1207 >>> np.nanmedian(b, axis=None, overwrite_input=True)
1208 3.0
1209 >>> assert not np.all(a==b)
1211 """
1212 a = np.asanyarray(a)
1213 # apply_along_axis in _nanmedian doesn't handle empty arrays well,
1214 # so deal them upfront
1215 if a.size == 0:
1216 return np.nanmean(a, axis, out=out, keepdims=keepdims)
1218 return fnb._ureduce(a, func=_nanmedian, keepdims=keepdims,
1219 axis=axis, out=out,
1220 overwrite_input=overwrite_input)
1223def _nanpercentile_dispatcher(
1224 a, q, axis=None, out=None, overwrite_input=None,
1225 method=None, keepdims=None, *, weights=None, interpolation=None):
1226 return (a, q, out, weights)
1229@array_function_dispatch(_nanpercentile_dispatcher)
1230def nanpercentile(
1231 a,
1232 q,
1233 axis=None,
1234 out=None,
1235 overwrite_input=False,
1236 method="linear",
1237 keepdims=np._NoValue,
1238 *,
1239 weights=None,
1240 interpolation=None,
1241):
1242 """
1243 Compute the qth percentile of the data along the specified axis,
1244 while ignoring nan values.
1246 Returns the qth percentile(s) of the array elements.
1248 .. versionadded:: 1.9.0
1250 Parameters
1251 ----------
1252 a : array_like
1253 Input array or object that can be converted to an array, containing
1254 nan values to be ignored.
1255 q : array_like of float
1256 Percentile or sequence of percentiles to compute, which must be
1257 between 0 and 100 inclusive.
1258 axis : {int, tuple of int, None}, optional
1259 Axis or axes along which the percentiles are computed. The default
1260 is to compute the percentile(s) along a flattened version of the
1261 array.
1262 out : ndarray, optional
1263 Alternative output array in which to place the result. It must have
1264 the same shape and buffer length as the expected output, but the
1265 type (of the output) will be cast if necessary.
1266 overwrite_input : bool, optional
1267 If True, then allow the input array `a` to be modified by
1268 intermediate calculations, to save memory. In this case, the
1269 contents of the input `a` after this function completes is
1270 undefined.
1271 method : str, optional
1272 This parameter specifies the method to use for estimating the
1273 percentile. There are many different methods, some unique to NumPy.
1274 See the notes for explanation. The options sorted by their R type
1275 as summarized in the H&F paper [1]_ are:
1277 1. 'inverted_cdf'
1278 2. 'averaged_inverted_cdf'
1279 3. 'closest_observation'
1280 4. 'interpolated_inverted_cdf'
1281 5. 'hazen'
1282 6. 'weibull'
1283 7. 'linear' (default)
1284 8. 'median_unbiased'
1285 9. 'normal_unbiased'
1287 The first three methods are discontinuous. NumPy further defines the
1288 following discontinuous variations of the default 'linear' (7.) option:
1290 * 'lower'
1291 * 'higher',
1292 * 'midpoint'
1293 * 'nearest'
1295 .. versionchanged:: 1.22.0
1296 This argument was previously called "interpolation" and only
1297 offered the "linear" default and last four options.
1299 keepdims : bool, optional
1300 If this is set to True, the axes which are reduced are left in
1301 the result as dimensions with size one. With this option, the
1302 result will broadcast correctly against the original array `a`.
1304 If this is anything but the default value it will be passed
1305 through (in the special case of an empty array) to the
1306 `mean` function of the underlying array. If the array is
1307 a sub-class and `mean` does not have the kwarg `keepdims` this
1308 will raise a RuntimeError.
1310 weights : array_like, optional
1311 An array of weights associated with the values in `a`. Each value in
1312 `a` contributes to the percentile according to its associated weight.
1313 The weights array can either be 1-D (in which case its length must be
1314 the size of `a` along the given axis) or of the same shape as `a`.
1315 If `weights=None`, then all data in `a` are assumed to have a
1316 weight equal to one.
1317 Only `method="inverted_cdf"` supports weights.
1319 .. versionadded:: 2.0.0
1321 interpolation : str, optional
1322 Deprecated name for the method keyword argument.
1324 .. deprecated:: 1.22.0
1326 Returns
1327 -------
1328 percentile : scalar or ndarray
1329 If `q` is a single percentile and `axis=None`, then the result
1330 is a scalar. If multiple percentiles are given, first axis of
1331 the result corresponds to the percentiles. The other axes are
1332 the axes that remain after the reduction of `a`. If the input
1333 contains integers or floats smaller than ``float64``, the output
1334 data-type is ``float64``. Otherwise, the output data-type is the
1335 same as that of the input. If `out` is specified, that array is
1336 returned instead.
1338 See Also
1339 --------
1340 nanmean
1341 nanmedian : equivalent to ``nanpercentile(..., 50)``
1342 percentile, median, mean
1343 nanquantile : equivalent to nanpercentile, except q in range [0, 1].
1345 Notes
1346 -----
1347 For more information please see `numpy.percentile`
1349 Examples
1350 --------
1351 >>> a = np.array([[10., 7., 4.], [3., 2., 1.]])
1352 >>> a[0][1] = np.nan
1353 >>> a
1354 array([[10., nan, 4.],
1355 [ 3., 2., 1.]])
1356 >>> np.percentile(a, 50)
1357 np.float64(nan)
1358 >>> np.nanpercentile(a, 50)
1359 3.0
1360 >>> np.nanpercentile(a, 50, axis=0)
1361 array([6.5, 2. , 2.5])
1362 >>> np.nanpercentile(a, 50, axis=1, keepdims=True)
1363 array([[7.],
1364 [2.]])
1365 >>> m = np.nanpercentile(a, 50, axis=0)
1366 >>> out = np.zeros_like(m)
1367 >>> np.nanpercentile(a, 50, axis=0, out=out)
1368 array([6.5, 2. , 2.5])
1369 >>> m
1370 array([6.5, 2. , 2.5])
1372 >>> b = a.copy()
1373 >>> np.nanpercentile(b, 50, axis=1, overwrite_input=True)
1374 array([7., 2.])
1375 >>> assert not np.all(a==b)
1377 References
1378 ----------
1379 .. [1] R. J. Hyndman and Y. Fan,
1380 "Sample quantiles in statistical packages,"
1381 The American Statistician, 50(4), pp. 361-365, 1996
1383 """
1384 if interpolation is not None:
1385 method = fnb._check_interpolation_as_method(
1386 method, interpolation, "nanpercentile")
1388 a = np.asanyarray(a)
1389 if a.dtype.kind == "c":
1390 raise TypeError("a must be an array of real numbers")
1392 q = np.true_divide(q, a.dtype.type(100) if a.dtype.kind == "f" else 100)
1393 # undo any decay that the ufunc performed (see gh-13105)
1394 q = np.asanyarray(q)
1395 if not fnb._quantile_is_valid(q):
1396 raise ValueError("Percentiles must be in the range [0, 100]")
1398 if weights is not None:
1399 if method != "inverted_cdf":
1400 msg = ("Only method 'inverted_cdf' supports weights. "
1401 f"Got: {method}.")
1402 raise ValueError(msg)
1403 if axis is not None:
1404 axis = _nx.normalize_axis_tuple(axis, a.ndim, argname="axis")
1405 weights = _weights_are_valid(weights=weights, a=a, axis=axis)
1406 if np.any(weights < 0):
1407 raise ValueError("Weights must be non-negative.")
1409 return _nanquantile_unchecked(
1410 a, q, axis, out, overwrite_input, method, keepdims, weights)
1413def _nanquantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
1414 method=None, keepdims=None, *, weights=None,
1415 interpolation=None):
1416 return (a, q, out, weights)
1419@array_function_dispatch(_nanquantile_dispatcher)
1420def nanquantile(
1421 a,
1422 q,
1423 axis=None,
1424 out=None,
1425 overwrite_input=False,
1426 method="linear",
1427 keepdims=np._NoValue,
1428 *,
1429 weights=None,
1430 interpolation=None,
1431):
1432 """
1433 Compute the qth quantile of the data along the specified axis,
1434 while ignoring nan values.
1435 Returns the qth quantile(s) of the array elements.
1437 .. versionadded:: 1.15.0
1439 Parameters
1440 ----------
1441 a : array_like
1442 Input array or object that can be converted to an array, containing
1443 nan values to be ignored
1444 q : array_like of float
1445 Probability or sequence of probabilities for the quantiles to compute.
1446 Values must be between 0 and 1 inclusive.
1447 axis : {int, tuple of int, None}, optional
1448 Axis or axes along which the quantiles are computed. The
1449 default is to compute the quantile(s) along a flattened
1450 version of the array.
1451 out : ndarray, optional
1452 Alternative output array in which to place the result. It must
1453 have the same shape and buffer length as the expected output,
1454 but the type (of the output) will be cast if necessary.
1455 overwrite_input : bool, optional
1456 If True, then allow the input array `a` to be modified by intermediate
1457 calculations, to save memory. In this case, the contents of the input
1458 `a` after this function completes is undefined.
1459 method : str, optional
1460 This parameter specifies the method to use for estimating the
1461 quantile. There are many different methods, some unique to NumPy.
1462 See the notes for explanation. The options sorted by their R type
1463 as summarized in the H&F paper [1]_ are:
1465 1. 'inverted_cdf'
1466 2. 'averaged_inverted_cdf'
1467 3. 'closest_observation'
1468 4. 'interpolated_inverted_cdf'
1469 5. 'hazen'
1470 6. 'weibull'
1471 7. 'linear' (default)
1472 8. 'median_unbiased'
1473 9. 'normal_unbiased'
1475 The first three methods are discontinuous. NumPy further defines the
1476 following discontinuous variations of the default 'linear' (7.) option:
1478 * 'lower'
1479 * 'higher',
1480 * 'midpoint'
1481 * 'nearest'
1483 .. versionchanged:: 1.22.0
1484 This argument was previously called "interpolation" and only
1485 offered the "linear" default and last four options.
1487 keepdims : bool, optional
1488 If this is set to True, the axes which are reduced are left in
1489 the result as dimensions with size one. With this option, the
1490 result will broadcast correctly against the original array `a`.
1492 If this is anything but the default value it will be passed
1493 through (in the special case of an empty array) to the
1494 `mean` function of the underlying array. If the array is
1495 a sub-class and `mean` does not have the kwarg `keepdims` this
1496 will raise a RuntimeError.
1498 weights : array_like, optional
1499 An array of weights associated with the values in `a`. Each value in
1500 `a` contributes to the quantile according to its associated weight.
1501 The weights array can either be 1-D (in which case its length must be
1502 the size of `a` along the given axis) or of the same shape as `a`.
1503 If `weights=None`, then all data in `a` are assumed to have a
1504 weight equal to one.
1505 Only `method="inverted_cdf"` supports weights.
1507 .. versionadded:: 2.0.0
1509 interpolation : str, optional
1510 Deprecated name for the method keyword argument.
1512 .. deprecated:: 1.22.0
1514 Returns
1515 -------
1516 quantile : scalar or ndarray
1517 If `q` is a single probability and `axis=None`, then the result
1518 is a scalar. If multiple probability levels are given, first axis of
1519 the result corresponds to the quantiles. The other axes are
1520 the axes that remain after the reduction of `a`. If the input
1521 contains integers or floats smaller than ``float64``, the output
1522 data-type is ``float64``. Otherwise, the output data-type is the
1523 same as that of the input. If `out` is specified, that array is
1524 returned instead.
1526 See Also
1527 --------
1528 quantile
1529 nanmean, nanmedian
1530 nanmedian : equivalent to ``nanquantile(..., 0.5)``
1531 nanpercentile : same as nanquantile, but with q in the range [0, 100].
1533 Notes
1534 -----
1535 For more information please see `numpy.quantile`
1537 Examples
1538 --------
1539 >>> a = np.array([[10., 7., 4.], [3., 2., 1.]])
1540 >>> a[0][1] = np.nan
1541 >>> a
1542 array([[10., nan, 4.],
1543 [ 3., 2., 1.]])
1544 >>> np.quantile(a, 0.5)
1545 np.float64(nan)
1546 >>> np.nanquantile(a, 0.5)
1547 3.0
1548 >>> np.nanquantile(a, 0.5, axis=0)
1549 array([6.5, 2. , 2.5])
1550 >>> np.nanquantile(a, 0.5, axis=1, keepdims=True)
1551 array([[7.],
1552 [2.]])
1553 >>> m = np.nanquantile(a, 0.5, axis=0)
1554 >>> out = np.zeros_like(m)
1555 >>> np.nanquantile(a, 0.5, axis=0, out=out)
1556 array([6.5, 2. , 2.5])
1557 >>> m
1558 array([6.5, 2. , 2.5])
1559 >>> b = a.copy()
1560 >>> np.nanquantile(b, 0.5, axis=1, overwrite_input=True)
1561 array([7., 2.])
1562 >>> assert not np.all(a==b)
1564 References
1565 ----------
1566 .. [1] R. J. Hyndman and Y. Fan,
1567 "Sample quantiles in statistical packages,"
1568 The American Statistician, 50(4), pp. 361-365, 1996
1570 """
1572 if interpolation is not None:
1573 method = fnb._check_interpolation_as_method(
1574 method, interpolation, "nanquantile")
1576 a = np.asanyarray(a)
1577 if a.dtype.kind == "c":
1578 raise TypeError("a must be an array of real numbers")
1580 # Use dtype of array if possible (e.g., if q is a python int or float).
1581 if isinstance(q, (int, float)) and a.dtype.kind == "f":
1582 q = np.asanyarray(q, dtype=a.dtype)
1583 else:
1584 q = np.asanyarray(q)
1586 if not fnb._quantile_is_valid(q):
1587 raise ValueError("Quantiles must be in the range [0, 1]")
1589 if weights is not None:
1590 if method != "inverted_cdf":
1591 msg = ("Only method 'inverted_cdf' supports weights. "
1592 f"Got: {method}.")
1593 raise ValueError(msg)
1594 if axis is not None:
1595 axis = _nx.normalize_axis_tuple(axis, a.ndim, argname="axis")
1596 weights = _weights_are_valid(weights=weights, a=a, axis=axis)
1597 if np.any(weights < 0):
1598 raise ValueError("Weights must be non-negative.")
1600 return _nanquantile_unchecked(
1601 a, q, axis, out, overwrite_input, method, keepdims, weights)
1604def _nanquantile_unchecked(
1605 a,
1606 q,
1607 axis=None,
1608 out=None,
1609 overwrite_input=False,
1610 method="linear",
1611 keepdims=np._NoValue,
1612 weights=None,
1613):
1614 """Assumes that q is in [0, 1], and is an ndarray"""
1615 # apply_along_axis in _nanpercentile doesn't handle empty arrays well,
1616 # so deal them upfront
1617 if a.size == 0:
1618 return np.nanmean(a, axis, out=out, keepdims=keepdims)
1619 return fnb._ureduce(a,
1620 func=_nanquantile_ureduce_func,
1621 q=q,
1622 weights=weights,
1623 keepdims=keepdims,
1624 axis=axis,
1625 out=out,
1626 overwrite_input=overwrite_input,
1627 method=method)
1630def _nanquantile_ureduce_func(
1631 a: np.array,
1632 q: np.array,
1633 weights: np.array,
1634 axis: int = None,
1635 out=None,
1636 overwrite_input: bool = False,
1637 method="linear",
1638):
1639 """
1640 Private function that doesn't support extended axis or keepdims.
1641 These methods are extended to this function using _ureduce
1642 See nanpercentile for parameter usage
1643 """
1644 if axis is None or a.ndim == 1:
1645 part = a.ravel()
1646 wgt = None if weights is None else weights.ravel()
1647 result = _nanquantile_1d(part, q, overwrite_input, method, weights=wgt)
1648 else:
1649 result = np.apply_along_axis(_nanquantile_1d, axis, a, q,
1650 overwrite_input, method, weights)
1651 # apply_along_axis fills in collapsed axis with results.
1652 # Move that axis to the beginning to match percentile's
1653 # convention.
1654 if q.ndim != 0:
1655 result = np.moveaxis(result, axis, 0)
1657 if out is not None:
1658 out[...] = result
1659 return result
1662def _nanquantile_1d(
1663 arr1d, q, overwrite_input=False, method="linear", weights=None,
1664):
1665 """
1666 Private function for rank 1 arrays. Compute quantile ignoring NaNs.
1667 See nanpercentile for parameter usage
1668 """
1669 arr1d, overwrite_input = _remove_nan_1d(arr1d,
1670 overwrite_input=overwrite_input)
1671 if arr1d.size == 0:
1672 # convert to scalar
1673 return np.full(q.shape, np.nan, dtype=arr1d.dtype)[()]
1675 return fnb._quantile_unchecked(
1676 arr1d,
1677 q,
1678 overwrite_input=overwrite_input,
1679 method=method,
1680 weights=weights,
1681 )
1684def _nanvar_dispatcher(a, axis=None, dtype=None, out=None, ddof=None,
1685 keepdims=None, *, where=None, mean=None,
1686 correction=None):
1687 return (a, out)
1690@array_function_dispatch(_nanvar_dispatcher)
1691def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue,
1692 *, where=np._NoValue, mean=np._NoValue, correction=np._NoValue):
1693 """
1694 Compute the variance along the specified axis, while ignoring NaNs.
1696 Returns the variance of the array elements, a measure of the spread of
1697 a distribution. The variance is computed for the flattened array by
1698 default, otherwise over the specified axis.
1700 For all-NaN slices or slices with zero degrees of freedom, NaN is
1701 returned and a `RuntimeWarning` is raised.
1703 .. versionadded:: 1.8.0
1705 Parameters
1706 ----------
1707 a : array_like
1708 Array containing numbers whose variance is desired. If `a` is not an
1709 array, a conversion is attempted.
1710 axis : {int, tuple of int, None}, optional
1711 Axis or axes along which the variance is computed. The default is to compute
1712 the variance of the flattened array.
1713 dtype : data-type, optional
1714 Type to use in computing the variance. For arrays of integer type
1715 the default is `float64`; for arrays of float types it is the same as
1716 the array type.
1717 out : ndarray, optional
1718 Alternate output array in which to place the result. It must have
1719 the same shape as the expected output, but the type is cast if
1720 necessary.
1721 ddof : {int, float}, optional
1722 "Delta Degrees of Freedom": the divisor used in the calculation is
1723 ``N - ddof``, where ``N`` represents the number of non-NaN
1724 elements. By default `ddof` is zero.
1725 keepdims : bool, optional
1726 If this is set to True, the axes which are reduced are left
1727 in the result as dimensions with size one. With this option,
1728 the result will broadcast correctly against the original `a`.
1729 where : array_like of bool, optional
1730 Elements to include in the variance. See `~numpy.ufunc.reduce` for
1731 details.
1733 .. versionadded:: 1.22.0
1735 mean : array_like, optional
1736 Provide the mean to prevent its recalculation. The mean should have
1737 a shape as if it was calculated with ``keepdims=True``.
1738 The axis for the calculation of the mean should be the same as used in
1739 the call to this var function.
1741 .. versionadded:: 1.26.0
1743 correction : {int, float}, optional
1744 Array API compatible name for the ``ddof`` parameter. Only one of them
1745 can be provided at the same time.
1747 .. versionadded:: 2.0.0
1749 Returns
1750 -------
1751 variance : ndarray, see dtype parameter above
1752 If `out` is None, return a new array containing the variance,
1753 otherwise return a reference to the output array. If ddof is >= the
1754 number of non-NaN elements in a slice or the slice contains only
1755 NaNs, then the result for that slice is NaN.
1757 See Also
1758 --------
1759 std : Standard deviation
1760 mean : Average
1761 var : Variance while not ignoring NaNs
1762 nanstd, nanmean
1763 :ref:`ufuncs-output-type`
1765 Notes
1766 -----
1767 The variance is the average of the squared deviations from the mean,
1768 i.e., ``var = mean(abs(x - x.mean())**2)``.
1770 The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``.
1771 If, however, `ddof` is specified, the divisor ``N - ddof`` is used
1772 instead. In standard statistical practice, ``ddof=1`` provides an
1773 unbiased estimator of the variance of a hypothetical infinite
1774 population. ``ddof=0`` provides a maximum likelihood estimate of the
1775 variance for normally distributed variables.
1777 Note that for complex numbers, the absolute value is taken before
1778 squaring, so that the result is always real and nonnegative.
1780 For floating-point input, the variance is computed using the same
1781 precision the input has. Depending on the input data, this can cause
1782 the results to be inaccurate, especially for `float32` (see example
1783 below). Specifying a higher-accuracy accumulator using the ``dtype``
1784 keyword can alleviate this issue.
1786 For this function to work on sub-classes of ndarray, they must define
1787 `sum` with the kwarg `keepdims`
1789 Examples
1790 --------
1791 >>> a = np.array([[1, np.nan], [3, 4]])
1792 >>> np.nanvar(a)
1793 1.5555555555555554
1794 >>> np.nanvar(a, axis=0)
1795 array([1., 0.])
1796 >>> np.nanvar(a, axis=1)
1797 array([0., 0.25]) # may vary
1799 """
1800 arr, mask = _replace_nan(a, 0)
1801 if mask is None:
1802 return np.var(arr, axis=axis, dtype=dtype, out=out, ddof=ddof,
1803 keepdims=keepdims, where=where, mean=mean,
1804 correction=correction)
1806 if dtype is not None:
1807 dtype = np.dtype(dtype)
1808 if dtype is not None and not issubclass(dtype.type, np.inexact):
1809 raise TypeError("If a is inexact, then dtype must be inexact")
1810 if out is not None and not issubclass(out.dtype.type, np.inexact):
1811 raise TypeError("If a is inexact, then out must be inexact")
1813 if correction != np._NoValue:
1814 if ddof != 0:
1815 raise ValueError(
1816 "ddof and correction can't be provided simultaneously."
1817 )
1818 else:
1819 ddof = correction
1821 # Compute mean
1822 if type(arr) is np.matrix:
1823 _keepdims = np._NoValue
1824 else:
1825 _keepdims = True
1827 cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=_keepdims,
1828 where=where)
1830 if mean is not np._NoValue:
1831 avg = mean
1832 else:
1833 # we need to special case matrix for reverse compatibility
1834 # in order for this to work, these sums need to be called with
1835 # keepdims=True, however matrix now raises an error in this case, but
1836 # the reason that it drops the keepdims kwarg is to force keepdims=True
1837 # so this used to work by serendipity.
1838 avg = np.sum(arr, axis=axis, dtype=dtype,
1839 keepdims=_keepdims, where=where)
1840 avg = _divide_by_count(avg, cnt)
1842 # Compute squared deviation from mean.
1843 np.subtract(arr, avg, out=arr, casting='unsafe', where=where)
1844 arr = _copyto(arr, 0, mask)
1845 if issubclass(arr.dtype.type, np.complexfloating):
1846 sqr = np.multiply(arr, arr.conj(), out=arr, where=where).real
1847 else:
1848 sqr = np.multiply(arr, arr, out=arr, where=where)
1850 # Compute variance.
1851 var = np.sum(sqr, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
1852 where=where)
1854 # Precaution against reduced object arrays
1855 try:
1856 var_ndim = var.ndim
1857 except AttributeError:
1858 var_ndim = np.ndim(var)
1859 if var_ndim < cnt.ndim:
1860 # Subclasses of ndarray may ignore keepdims, so check here.
1861 cnt = cnt.squeeze(axis)
1862 dof = cnt - ddof
1863 var = _divide_by_count(var, dof)
1865 isbad = (dof <= 0)
1866 if np.any(isbad):
1867 warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning,
1868 stacklevel=2)
1869 # NaN, inf, or negative numbers are all possible bad
1870 # values, so explicitly replace them with NaN.
1871 var = _copyto(var, np.nan, isbad)
1872 return var
1875def _nanstd_dispatcher(a, axis=None, dtype=None, out=None, ddof=None,
1876 keepdims=None, *, where=None, mean=None,
1877 correction=None):
1878 return (a, out)
1881@array_function_dispatch(_nanstd_dispatcher)
1882def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue,
1883 *, where=np._NoValue, mean=np._NoValue, correction=np._NoValue):
1884 """
1885 Compute the standard deviation along the specified axis, while
1886 ignoring NaNs.
1888 Returns the standard deviation, a measure of the spread of a
1889 distribution, of the non-NaN array elements. The standard deviation is
1890 computed for the flattened array by default, otherwise over the
1891 specified axis.
1893 For all-NaN slices or slices with zero degrees of freedom, NaN is
1894 returned and a `RuntimeWarning` is raised.
1896 .. versionadded:: 1.8.0
1898 Parameters
1899 ----------
1900 a : array_like
1901 Calculate the standard deviation of the non-NaN values.
1902 axis : {int, tuple of int, None}, optional
1903 Axis or axes along which the standard deviation is computed. The default is
1904 to compute the standard deviation of the flattened array.
1905 dtype : dtype, optional
1906 Type to use in computing the standard deviation. For arrays of
1907 integer type the default is float64, for arrays of float types it
1908 is the same as the array type.
1909 out : ndarray, optional
1910 Alternative output array in which to place the result. It must have
1911 the same shape as the expected output but the type (of the
1912 calculated values) will be cast if necessary.
1913 ddof : {int, float}, optional
1914 Means Delta Degrees of Freedom. The divisor used in calculations
1915 is ``N - ddof``, where ``N`` represents the number of non-NaN
1916 elements. By default `ddof` is zero.
1918 keepdims : bool, optional
1919 If this is set to True, the axes which are reduced are left
1920 in the result as dimensions with size one. With this option,
1921 the result will broadcast correctly against the original `a`.
1923 If this value is anything but the default it is passed through
1924 as-is to the relevant functions of the sub-classes. If these
1925 functions do not have a `keepdims` kwarg, a RuntimeError will
1926 be raised.
1927 where : array_like of bool, optional
1928 Elements to include in the standard deviation.
1929 See `~numpy.ufunc.reduce` for details.
1931 .. versionadded:: 1.22.0
1933 mean : array_like, optional
1934 Provide the mean to prevent its recalculation. The mean should have
1935 a shape as if it was calculated with ``keepdims=True``.
1936 The axis for the calculation of the mean should be the same as used in
1937 the call to this std function.
1939 .. versionadded:: 1.26.0
1941 correction : {int, float}, optional
1942 Array API compatible name for the ``ddof`` parameter. Only one of them
1943 can be provided at the same time.
1945 .. versionadded:: 2.0.0
1947 Returns
1948 -------
1949 standard_deviation : ndarray, see dtype parameter above.
1950 If `out` is None, return a new array containing the standard
1951 deviation, otherwise return a reference to the output array. If
1952 ddof is >= the number of non-NaN elements in a slice or the slice
1953 contains only NaNs, then the result for that slice is NaN.
1955 See Also
1956 --------
1957 var, mean, std
1958 nanvar, nanmean
1959 :ref:`ufuncs-output-type`
1961 Notes
1962 -----
1963 The standard deviation is the square root of the average of the squared
1964 deviations from the mean: ``std = sqrt(mean(abs(x - x.mean())**2))``.
1966 The average squared deviation is normally calculated as
1967 ``x.sum() / N``, where ``N = len(x)``. If, however, `ddof` is
1968 specified, the divisor ``N - ddof`` is used instead. In standard
1969 statistical practice, ``ddof=1`` provides an unbiased estimator of the
1970 variance of the infinite population. ``ddof=0`` provides a maximum
1971 likelihood estimate of the variance for normally distributed variables.
1972 The standard deviation computed in this function is the square root of
1973 the estimated variance, so even with ``ddof=1``, it will not be an
1974 unbiased estimate of the standard deviation per se.
1976 Note that, for complex numbers, `std` takes the absolute value before
1977 squaring, so that the result is always real and nonnegative.
1979 For floating-point input, the *std* is computed using the same
1980 precision the input has. Depending on the input data, this can cause
1981 the results to be inaccurate, especially for float32 (see example
1982 below). Specifying a higher-accuracy accumulator using the `dtype`
1983 keyword can alleviate this issue.
1985 Examples
1986 --------
1987 >>> a = np.array([[1, np.nan], [3, 4]])
1988 >>> np.nanstd(a)
1989 1.247219128924647
1990 >>> np.nanstd(a, axis=0)
1991 array([1., 0.])
1992 >>> np.nanstd(a, axis=1)
1993 array([0., 0.5]) # may vary
1995 """
1996 var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
1997 keepdims=keepdims, where=where, mean=mean,
1998 correction=correction)
1999 if isinstance(var, np.ndarray):
2000 std = np.sqrt(var, out=var)
2001 elif hasattr(var, 'dtype'):
2002 std = var.dtype.type(np.sqrt(var))
2003 else:
2004 std = np.sqrt(var)
2005 return std