1"""
2Functions that ignore NaN.
3
4Functions
5---------
6
7- `nanmin` -- minimum non-NaN value
8- `nanmax` -- maximum non-NaN value
9- `nanargmin` -- index of minimum non-NaN value
10- `nanargmax` -- index of maximum non-NaN value
11- `nansum` -- sum of non-NaN values
12- `nanprod` -- product of non-NaN values
13- `nancumsum` -- cumulative sum of non-NaN values
14- `nancumprod` -- cumulative product of non-NaN values
15- `nanmean` -- mean of non-NaN values
16- `nanvar` -- variance of non-NaN values
17- `nanstd` -- standard deviation of non-NaN values
18- `nanmedian` -- median of non-NaN values
19- `nanquantile` -- qth quantile of non-NaN values
20- `nanpercentile` -- qth percentile of non-NaN values
21
22"""
23import functools
24import warnings
25import numpy as np
26from numpy.lib import function_base
27from numpy.core import overrides
28
29
30array_function_dispatch = functools.partial(
31 overrides.array_function_dispatch, module='numpy')
32
33
34__all__ = [
35 'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean',
36 'nanmedian', 'nanpercentile', 'nanvar', 'nanstd', 'nanprod',
37 'nancumsum', 'nancumprod', 'nanquantile'
38 ]
39
40
41def _nan_mask(a, out=None):
42 """
43 Parameters
44 ----------
45 a : array-like
46 Input array with at least 1 dimension.
47 out : ndarray, optional
48 Alternate output array in which to place the result. The default
49 is ``None``; if provided, it must have the same shape as the
50 expected output and will prevent the allocation of a new array.
51
52 Returns
53 -------
54 y : bool ndarray or True
55 A bool array where ``np.nan`` positions are marked with ``False``
56 and other positions are marked with ``True``. If the type of ``a``
57 is such that it can't possibly contain ``np.nan``, returns ``True``.
58 """
59 # we assume that a is an array for this private function
60
61 if a.dtype.kind not in 'fc':
62 return True
63
64 y = np.isnan(a, out=out)
65 y = np.invert(y, out=y)
66 return y
67
68def _replace_nan(a, val):
69 """
70 If `a` is of inexact type, make a copy of `a`, replace NaNs with
71 the `val` value, and return the copy together with a boolean mask
72 marking the locations where NaNs were present. If `a` is not of
73 inexact type, do nothing and return `a` together with a mask of None.
74
75 Note that scalars will end up as array scalars, which is important
76 for using the result as the value of the out argument in some
77 operations.
78
79 Parameters
80 ----------
81 a : array-like
82 Input array.
83 val : float
84 NaN values are set to val before doing the operation.
85
86 Returns
87 -------
88 y : ndarray
89 If `a` is of inexact type, return a copy of `a` with the NaNs
90 replaced by the fill value, otherwise return `a`.
91 mask: {bool, None}
92 If `a` is of inexact type, return a boolean mask marking locations of
93 NaNs, otherwise return None.
94
95 """
96 a = np.asanyarray(a)
97
98 if a.dtype == np.object_:
99 # object arrays do not support `isnan` (gh-9009), so make a guess
100 mask = np.not_equal(a, a, dtype=bool)
101 elif issubclass(a.dtype.type, np.inexact):
102 mask = np.isnan(a)
103 else:
104 mask = None
105
106 if mask is not None:
107 a = np.array(a, subok=True, copy=True)
108 np.copyto(a, val, where=mask)
109
110 return a, mask
111
112
113def _copyto(a, val, mask):
114 """
115 Replace values in `a` with NaN where `mask` is True. This differs from
116 copyto in that it will deal with the case where `a` is a numpy scalar.
117
118 Parameters
119 ----------
120 a : ndarray or numpy scalar
121 Array or numpy scalar some of whose values are to be replaced
122 by val.
123 val : numpy scalar
124 Value used a replacement.
125 mask : ndarray, scalar
126 Boolean array. Where True the corresponding element of `a` is
127 replaced by `val`. Broadcasts.
128
129 Returns
130 -------
131 res : ndarray, scalar
132 Array with elements replaced or scalar `val`.
133
134 """
135 if isinstance(a, np.ndarray):
136 np.copyto(a, val, where=mask, casting='unsafe')
137 else:
138 a = a.dtype.type(val)
139 return a
140
141
142def _remove_nan_1d(arr1d, overwrite_input=False):
143 """
144 Equivalent to arr1d[~arr1d.isnan()], but in a different order
145
146 Presumably faster as it incurs fewer copies
147
148 Parameters
149 ----------
150 arr1d : ndarray
151 Array to remove nans from
152 overwrite_input : bool
153 True if `arr1d` can be modified in place
154
155 Returns
156 -------
157 res : ndarray
158 Array with nan elements removed
159 overwrite_input : bool
160 True if `res` can be modified in place, given the constraint on the
161 input
162 """
163 if arr1d.dtype == object:
164 # object arrays do not support `isnan` (gh-9009), so make a guess
165 c = np.not_equal(arr1d, arr1d, dtype=bool)
166 else:
167 c = np.isnan(arr1d)
168
169 s = np.nonzero(c)[0]
170 if s.size == arr1d.size:
171 warnings.warn("All-NaN slice encountered", RuntimeWarning,
172 stacklevel=6)
173 return arr1d[:0], True
174 elif s.size == 0:
175 return arr1d, overwrite_input
176 else:
177 if not overwrite_input:
178 arr1d = arr1d.copy()
179 # select non-nans at end of array
180 enonan = arr1d[-s.size:][~c[-s.size:]]
181 # fill nans in beginning of array with non-nans of end
182 arr1d[s[:enonan.size]] = enonan
183
184 return arr1d[:-s.size], True
185
186
187def _divide_by_count(a, b, out=None):
188 """
189 Compute a/b ignoring invalid results. If `a` is an array the division
190 is done in place. If `a` is a scalar, then its type is preserved in the
191 output. If out is None, then a is used instead so that the division
192 is in place. Note that this is only called with `a` an inexact type.
193
194 Parameters
195 ----------
196 a : {ndarray, numpy scalar}
197 Numerator. Expected to be of inexact type but not checked.
198 b : {ndarray, numpy scalar}
199 Denominator.
200 out : ndarray, optional
201 Alternate output array in which to place the result. The default
202 is ``None``; if provided, it must have the same shape as the
203 expected output, but the type will be cast if necessary.
204
205 Returns
206 -------
207 ret : {ndarray, numpy scalar}
208 The return value is a/b. If `a` was an ndarray the division is done
209 in place. If `a` is a numpy scalar, the division preserves its type.
210
211 """
212 with np.errstate(invalid='ignore', divide='ignore'):
213 if isinstance(a, np.ndarray):
214 if out is None:
215 return np.divide(a, b, out=a, casting='unsafe')
216 else:
217 return np.divide(a, b, out=out, casting='unsafe')
218 else:
219 if out is None:
220 # Precaution against reduced object arrays
221 try:
222 return a.dtype.type(a / b)
223 except AttributeError:
224 return a / b
225 else:
226 # This is questionable, but currently a numpy scalar can
227 # be output to a zero dimensional array.
228 return np.divide(a, b, out=out, casting='unsafe')
229
230
231def _nanmin_dispatcher(a, axis=None, out=None, keepdims=None,
232 initial=None, where=None):
233 return (a, out)
234
235
236@array_function_dispatch(_nanmin_dispatcher)
237def nanmin(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue,
238 where=np._NoValue):
239 """
240 Return minimum of an array or minimum along an axis, ignoring any NaNs.
241 When all-NaN slices are encountered a ``RuntimeWarning`` is raised and
242 Nan is returned for that slice.
243
244 Parameters
245 ----------
246 a : array_like
247 Array containing numbers whose minimum is desired. If `a` is not an
248 array, a conversion is attempted.
249 axis : {int, tuple of int, None}, optional
250 Axis or axes along which the minimum is computed. The default is to compute
251 the minimum of the flattened array.
252 out : ndarray, optional
253 Alternate output array in which to place the result. The default
254 is ``None``; if provided, it must have the same shape as the
255 expected output, but the type will be cast if necessary. See
256 :ref:`ufuncs-output-type` for more details.
257
258 .. versionadded:: 1.8.0
259 keepdims : bool, optional
260 If this is set to True, the axes which are reduced are left
261 in the result as dimensions with size one. With this option,
262 the result will broadcast correctly against the original `a`.
263
264 If the value is anything but the default, then
265 `keepdims` will be passed through to the `min` method
266 of sub-classes of `ndarray`. If the sub-classes methods
267 does not implement `keepdims` any exceptions will be raised.
268
269 .. versionadded:: 1.8.0
270 initial : scalar, optional
271 The maximum value of an output element. Must be present to allow
272 computation on empty slice. See `~numpy.ufunc.reduce` for details.
273
274 .. versionadded:: 1.22.0
275 where : array_like of bool, optional
276 Elements to compare for the minimum. See `~numpy.ufunc.reduce`
277 for details.
278
279 .. versionadded:: 1.22.0
280
281 Returns
282 -------
283 nanmin : ndarray
284 An array with the same shape as `a`, with the specified axis
285 removed. If `a` is a 0-d array, or if axis is None, an ndarray
286 scalar is returned. The same dtype as `a` is returned.
287
288 See Also
289 --------
290 nanmax :
291 The maximum value of an array along a given axis, ignoring any NaNs.
292 amin :
293 The minimum value of an array along a given axis, propagating any NaNs.
294 fmin :
295 Element-wise minimum of two arrays, ignoring any NaNs.
296 minimum :
297 Element-wise minimum of two arrays, propagating any NaNs.
298 isnan :
299 Shows which elements are Not a Number (NaN).
300 isfinite:
301 Shows which elements are neither NaN nor infinity.
302
303 amax, fmax, maximum
304
305 Notes
306 -----
307 NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic
308 (IEEE 754). This means that Not a Number is not equivalent to infinity.
309 Positive infinity is treated as a very large number and negative
310 infinity is treated as a very small (i.e. negative) number.
311
312 If the input has a integer type the function is equivalent to np.min.
313
314 Examples
315 --------
316 >>> a = np.array([[1, 2], [3, np.nan]])
317 >>> np.nanmin(a)
318 1.0
319 >>> np.nanmin(a, axis=0)
320 array([1., 2.])
321 >>> np.nanmin(a, axis=1)
322 array([1., 3.])
323
324 When positive infinity and negative infinity are present:
325
326 >>> np.nanmin([1, 2, np.nan, np.inf])
327 1.0
328 >>> np.nanmin([1, 2, np.nan, np.NINF])
329 -inf
330
331 """
332 kwargs = {}
333 if keepdims is not np._NoValue:
334 kwargs['keepdims'] = keepdims
335 if initial is not np._NoValue:
336 kwargs['initial'] = initial
337 if where is not np._NoValue:
338 kwargs['where'] = where
339
340 if type(a) is np.ndarray and a.dtype != np.object_:
341 # Fast, but not safe for subclasses of ndarray, or object arrays,
342 # which do not implement isnan (gh-9009), or fmin correctly (gh-8975)
343 res = np.fmin.reduce(a, axis=axis, out=out, **kwargs)
344 if np.isnan(res).any():
345 warnings.warn("All-NaN slice encountered", RuntimeWarning,
346 stacklevel=2)
347 else:
348 # Slow, but safe for subclasses of ndarray
349 a, mask = _replace_nan(a, +np.inf)
350 res = np.amin(a, axis=axis, out=out, **kwargs)
351 if mask is None:
352 return res
353
354 # Check for all-NaN axis
355 kwargs.pop("initial", None)
356 mask = np.all(mask, axis=axis, **kwargs)
357 if np.any(mask):
358 res = _copyto(res, np.nan, mask)
359 warnings.warn("All-NaN axis encountered", RuntimeWarning,
360 stacklevel=2)
361 return res
362
363
364def _nanmax_dispatcher(a, axis=None, out=None, keepdims=None,
365 initial=None, where=None):
366 return (a, out)
367
368
369@array_function_dispatch(_nanmax_dispatcher)
370def nanmax(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue,
371 where=np._NoValue):
372 """
373 Return the maximum of an array or maximum along an axis, ignoring any
374 NaNs. When all-NaN slices are encountered a ``RuntimeWarning`` is
375 raised and NaN is returned for that slice.
376
377 Parameters
378 ----------
379 a : array_like
380 Array containing numbers whose maximum is desired. If `a` is not an
381 array, a conversion is attempted.
382 axis : {int, tuple of int, None}, optional
383 Axis or axes along which the maximum is computed. The default is to compute
384 the maximum of the flattened array.
385 out : ndarray, optional
386 Alternate output array in which to place the result. The default
387 is ``None``; if provided, it must have the same shape as the
388 expected output, but the type will be cast if necessary. See
389 :ref:`ufuncs-output-type` for more details.
390
391 .. versionadded:: 1.8.0
392 keepdims : bool, optional
393 If this is set to True, the axes which are reduced are left
394 in the result as dimensions with size one. With this option,
395 the result will broadcast correctly against the original `a`.
396
397 If the value is anything but the default, then
398 `keepdims` will be passed through to the `max` method
399 of sub-classes of `ndarray`. If the sub-classes methods
400 does not implement `keepdims` any exceptions will be raised.
401
402 .. versionadded:: 1.8.0
403 initial : scalar, optional
404 The minimum value of an output element. Must be present to allow
405 computation on empty slice. See `~numpy.ufunc.reduce` for details.
406
407 .. versionadded:: 1.22.0
408 where : array_like of bool, optional
409 Elements to compare for the maximum. See `~numpy.ufunc.reduce`
410 for details.
411
412 .. versionadded:: 1.22.0
413
414 Returns
415 -------
416 nanmax : ndarray
417 An array with the same shape as `a`, with the specified axis removed.
418 If `a` is a 0-d array, or if axis is None, an ndarray scalar is
419 returned. The same dtype as `a` is returned.
420
421 See Also
422 --------
423 nanmin :
424 The minimum value of an array along a given axis, ignoring any NaNs.
425 amax :
426 The maximum value of an array along a given axis, propagating any NaNs.
427 fmax :
428 Element-wise maximum of two arrays, ignoring any NaNs.
429 maximum :
430 Element-wise maximum of two arrays, propagating any NaNs.
431 isnan :
432 Shows which elements are Not a Number (NaN).
433 isfinite:
434 Shows which elements are neither NaN nor infinity.
435
436 amin, fmin, minimum
437
438 Notes
439 -----
440 NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic
441 (IEEE 754). This means that Not a Number is not equivalent to infinity.
442 Positive infinity is treated as a very large number and negative
443 infinity is treated as a very small (i.e. negative) number.
444
445 If the input has a integer type the function is equivalent to np.max.
446
447 Examples
448 --------
449 >>> a = np.array([[1, 2], [3, np.nan]])
450 >>> np.nanmax(a)
451 3.0
452 >>> np.nanmax(a, axis=0)
453 array([3., 2.])
454 >>> np.nanmax(a, axis=1)
455 array([2., 3.])
456
457 When positive infinity and negative infinity are present:
458
459 >>> np.nanmax([1, 2, np.nan, np.NINF])
460 2.0
461 >>> np.nanmax([1, 2, np.nan, np.inf])
462 inf
463
464 """
465 kwargs = {}
466 if keepdims is not np._NoValue:
467 kwargs['keepdims'] = keepdims
468 if initial is not np._NoValue:
469 kwargs['initial'] = initial
470 if where is not np._NoValue:
471 kwargs['where'] = where
472
473 if type(a) is np.ndarray and a.dtype != np.object_:
474 # Fast, but not safe for subclasses of ndarray, or object arrays,
475 # which do not implement isnan (gh-9009), or fmax correctly (gh-8975)
476 res = np.fmax.reduce(a, axis=axis, out=out, **kwargs)
477 if np.isnan(res).any():
478 warnings.warn("All-NaN slice encountered", RuntimeWarning,
479 stacklevel=2)
480 else:
481 # Slow, but safe for subclasses of ndarray
482 a, mask = _replace_nan(a, -np.inf)
483 res = np.amax(a, axis=axis, out=out, **kwargs)
484 if mask is None:
485 return res
486
487 # Check for all-NaN axis
488 kwargs.pop("initial", None)
489 mask = np.all(mask, axis=axis, **kwargs)
490 if np.any(mask):
491 res = _copyto(res, np.nan, mask)
492 warnings.warn("All-NaN axis encountered", RuntimeWarning,
493 stacklevel=2)
494 return res
495
496
497def _nanargmin_dispatcher(a, axis=None, out=None, *, keepdims=None):
498 return (a,)
499
500
501@array_function_dispatch(_nanargmin_dispatcher)
502def nanargmin(a, axis=None, out=None, *, keepdims=np._NoValue):
503 """
504 Return the indices of the minimum values in the specified axis ignoring
505 NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the results
506 cannot be trusted if a slice contains only NaNs and Infs.
507
508 Parameters
509 ----------
510 a : array_like
511 Input data.
512 axis : int, optional
513 Axis along which to operate. By default flattened input is used.
514 out : array, optional
515 If provided, the result will be inserted into this array. It should
516 be of the appropriate shape and dtype.
517
518 .. versionadded:: 1.22.0
519 keepdims : bool, optional
520 If this is set to True, the axes which are reduced are left
521 in the result as dimensions with size one. With this option,
522 the result will broadcast correctly against the array.
523
524 .. versionadded:: 1.22.0
525
526 Returns
527 -------
528 index_array : ndarray
529 An array of indices or a single index value.
530
531 See Also
532 --------
533 argmin, nanargmax
534
535 Examples
536 --------
537 >>> a = np.array([[np.nan, 4], [2, 3]])
538 >>> np.argmin(a)
539 0
540 >>> np.nanargmin(a)
541 2
542 >>> np.nanargmin(a, axis=0)
543 array([1, 1])
544 >>> np.nanargmin(a, axis=1)
545 array([1, 0])
546
547 """
548 a, mask = _replace_nan(a, np.inf)
549 if mask is not None:
550 mask = np.all(mask, axis=axis)
551 if np.any(mask):
552 raise ValueError("All-NaN slice encountered")
553 res = np.argmin(a, axis=axis, out=out, keepdims=keepdims)
554 return res
555
556
557def _nanargmax_dispatcher(a, axis=None, out=None, *, keepdims=None):
558 return (a,)
559
560
561@array_function_dispatch(_nanargmax_dispatcher)
562def nanargmax(a, axis=None, out=None, *, keepdims=np._NoValue):
563 """
564 Return the indices of the maximum values in the specified axis ignoring
565 NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the
566 results cannot be trusted if a slice contains only NaNs and -Infs.
567
568
569 Parameters
570 ----------
571 a : array_like
572 Input data.
573 axis : int, optional
574 Axis along which to operate. By default flattened input is used.
575 out : array, optional
576 If provided, the result will be inserted into this array. It should
577 be of the appropriate shape and dtype.
578
579 .. versionadded:: 1.22.0
580 keepdims : bool, optional
581 If this is set to True, the axes which are reduced are left
582 in the result as dimensions with size one. With this option,
583 the result will broadcast correctly against the array.
584
585 .. versionadded:: 1.22.0
586
587 Returns
588 -------
589 index_array : ndarray
590 An array of indices or a single index value.
591
592 See Also
593 --------
594 argmax, nanargmin
595
596 Examples
597 --------
598 >>> a = np.array([[np.nan, 4], [2, 3]])
599 >>> np.argmax(a)
600 0
601 >>> np.nanargmax(a)
602 1
603 >>> np.nanargmax(a, axis=0)
604 array([1, 0])
605 >>> np.nanargmax(a, axis=1)
606 array([1, 1])
607
608 """
609 a, mask = _replace_nan(a, -np.inf)
610 if mask is not None:
611 mask = np.all(mask, axis=axis)
612 if np.any(mask):
613 raise ValueError("All-NaN slice encountered")
614 res = np.argmax(a, axis=axis, out=out, keepdims=keepdims)
615 return res
616
617
618def _nansum_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None,
619 initial=None, where=None):
620 return (a, out)
621
622
623@array_function_dispatch(_nansum_dispatcher)
624def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
625 initial=np._NoValue, where=np._NoValue):
626 """
627 Return the sum of array elements over a given axis treating Not a
628 Numbers (NaNs) as zero.
629
630 In NumPy versions <= 1.9.0 Nan is returned for slices that are all-NaN or
631 empty. In later versions zero is returned.
632
633 Parameters
634 ----------
635 a : array_like
636 Array containing numbers whose sum is desired. If `a` is not an
637 array, a conversion is attempted.
638 axis : {int, tuple of int, None}, optional
639 Axis or axes along which the sum is computed. The default is to compute the
640 sum of the flattened array.
641 dtype : data-type, optional
642 The type of the returned array and of the accumulator in which the
643 elements are summed. By default, the dtype of `a` is used. An
644 exception is when `a` has an integer type with less precision than
645 the platform (u)intp. In that case, the default will be either
646 (u)int32 or (u)int64 depending on whether the platform is 32 or 64
647 bits. For inexact inputs, dtype must be inexact.
648
649 .. versionadded:: 1.8.0
650 out : ndarray, optional
651 Alternate output array in which to place the result. The default
652 is ``None``. If provided, it must have the same shape as the
653 expected output, but the type will be cast if necessary. See
654 :ref:`ufuncs-output-type` for more details. The casting of NaN to integer
655 can yield unexpected results.
656
657 .. versionadded:: 1.8.0
658 keepdims : bool, optional
659 If this is set to True, the axes which are reduced are left
660 in the result as dimensions with size one. With this option,
661 the result will broadcast correctly against the original `a`.
662
663
664 If the value is anything but the default, then
665 `keepdims` will be passed through to the `mean` or `sum` methods
666 of sub-classes of `ndarray`. If the sub-classes methods
667 does not implement `keepdims` any exceptions will be raised.
668
669 .. versionadded:: 1.8.0
670 initial : scalar, optional
671 Starting value for the sum. See `~numpy.ufunc.reduce` for details.
672
673 .. versionadded:: 1.22.0
674 where : array_like of bool, optional
675 Elements to include in the sum. See `~numpy.ufunc.reduce` for details.
676
677 .. versionadded:: 1.22.0
678
679 Returns
680 -------
681 nansum : ndarray.
682 A new array holding the result is returned unless `out` is
683 specified, in which it is returned. The result has the same
684 size as `a`, and the same shape as `a` if `axis` is not None
685 or `a` is a 1-d array.
686
687 See Also
688 --------
689 numpy.sum : Sum across array propagating NaNs.
690 isnan : Show which elements are NaN.
691 isfinite : Show which elements are not NaN or +/-inf.
692
693 Notes
694 -----
695 If both positive and negative infinity are present, the sum will be Not
696 A Number (NaN).
697
698 Examples
699 --------
700 >>> np.nansum(1)
701 1
702 >>> np.nansum([1])
703 1
704 >>> np.nansum([1, np.nan])
705 1.0
706 >>> a = np.array([[1, 1], [1, np.nan]])
707 >>> np.nansum(a)
708 3.0
709 >>> np.nansum(a, axis=0)
710 array([2., 1.])
711 >>> np.nansum([1, np.nan, np.inf])
712 inf
713 >>> np.nansum([1, np.nan, np.NINF])
714 -inf
715 >>> from numpy.testing import suppress_warnings
716 >>> with suppress_warnings() as sup:
717 ... sup.filter(RuntimeWarning)
718 ... np.nansum([1, np.nan, np.inf, -np.inf]) # both +/- infinity present
719 nan
720
721 """
722 a, mask = _replace_nan(a, 0)
723 return np.sum(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
724 initial=initial, where=where)
725
726
727def _nanprod_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None,
728 initial=None, where=None):
729 return (a, out)
730
731
732@array_function_dispatch(_nanprod_dispatcher)
733def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
734 initial=np._NoValue, where=np._NoValue):
735 """
736 Return the product of array elements over a given axis treating Not a
737 Numbers (NaNs) as ones.
738
739 One is returned for slices that are all-NaN or empty.
740
741 .. versionadded:: 1.10.0
742
743 Parameters
744 ----------
745 a : array_like
746 Array containing numbers whose product is desired. If `a` is not an
747 array, a conversion is attempted.
748 axis : {int, tuple of int, None}, optional
749 Axis or axes along which the product is computed. The default is to compute
750 the product of the flattened array.
751 dtype : data-type, optional
752 The type of the returned array and of the accumulator in which the
753 elements are summed. By default, the dtype of `a` is used. An
754 exception is when `a` has an integer type with less precision than
755 the platform (u)intp. In that case, the default will be either
756 (u)int32 or (u)int64 depending on whether the platform is 32 or 64
757 bits. For inexact inputs, dtype must be inexact.
758 out : ndarray, optional
759 Alternate output array in which to place the result. The default
760 is ``None``. If provided, it must have the same shape as the
761 expected output, but the type will be cast if necessary. See
762 :ref:`ufuncs-output-type` for more details. The casting of NaN to integer
763 can yield unexpected results.
764 keepdims : bool, optional
765 If True, the axes which are reduced are left in the result as
766 dimensions with size one. With this option, the result will
767 broadcast correctly against the original `arr`.
768 initial : scalar, optional
769 The starting value for this product. See `~numpy.ufunc.reduce`
770 for details.
771
772 .. versionadded:: 1.22.0
773 where : array_like of bool, optional
774 Elements to include in the product. See `~numpy.ufunc.reduce`
775 for details.
776
777 .. versionadded:: 1.22.0
778
779 Returns
780 -------
781 nanprod : ndarray
782 A new array holding the result is returned unless `out` is
783 specified, in which case it is returned.
784
785 See Also
786 --------
787 numpy.prod : Product across array propagating NaNs.
788 isnan : Show which elements are NaN.
789
790 Examples
791 --------
792 >>> np.nanprod(1)
793 1
794 >>> np.nanprod([1])
795 1
796 >>> np.nanprod([1, np.nan])
797 1.0
798 >>> a = np.array([[1, 2], [3, np.nan]])
799 >>> np.nanprod(a)
800 6.0
801 >>> np.nanprod(a, axis=0)
802 array([3., 2.])
803
804 """
805 a, mask = _replace_nan(a, 1)
806 return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
807 initial=initial, where=where)
808
809
810def _nancumsum_dispatcher(a, axis=None, dtype=None, out=None):
811 return (a, out)
812
813
814@array_function_dispatch(_nancumsum_dispatcher)
815def nancumsum(a, axis=None, dtype=None, out=None):
816 """
817 Return the cumulative sum of array elements over a given axis treating Not a
818 Numbers (NaNs) as zero. The cumulative sum does not change when NaNs are
819 encountered and leading NaNs are replaced by zeros.
820
821 Zeros are returned for slices that are all-NaN or empty.
822
823 .. versionadded:: 1.12.0
824
825 Parameters
826 ----------
827 a : array_like
828 Input array.
829 axis : int, optional
830 Axis along which the cumulative sum is computed. The default
831 (None) is to compute the cumsum over the flattened array.
832 dtype : dtype, optional
833 Type of the returned array and of the accumulator in which the
834 elements are summed. If `dtype` is not specified, it defaults
835 to the dtype of `a`, unless `a` has an integer dtype with a
836 precision less than that of the default platform integer. In
837 that case, the default platform integer is used.
838 out : ndarray, optional
839 Alternative output array in which to place the result. It must
840 have the same shape and buffer length as the expected output
841 but the type will be cast if necessary. See :ref:`ufuncs-output-type` for
842 more details.
843
844 Returns
845 -------
846 nancumsum : ndarray.
847 A new array holding the result is returned unless `out` is
848 specified, in which it is returned. The result has the same
849 size as `a`, and the same shape as `a` if `axis` is not None
850 or `a` is a 1-d array.
851
852 See Also
853 --------
854 numpy.cumsum : Cumulative sum across array propagating NaNs.
855 isnan : Show which elements are NaN.
856
857 Examples
858 --------
859 >>> np.nancumsum(1)
860 array([1])
861 >>> np.nancumsum([1])
862 array([1])
863 >>> np.nancumsum([1, np.nan])
864 array([1., 1.])
865 >>> a = np.array([[1, 2], [3, np.nan]])
866 >>> np.nancumsum(a)
867 array([1., 3., 6., 6.])
868 >>> np.nancumsum(a, axis=0)
869 array([[1., 2.],
870 [4., 2.]])
871 >>> np.nancumsum(a, axis=1)
872 array([[1., 3.],
873 [3., 3.]])
874
875 """
876 a, mask = _replace_nan(a, 0)
877 return np.cumsum(a, axis=axis, dtype=dtype, out=out)
878
879
880def _nancumprod_dispatcher(a, axis=None, dtype=None, out=None):
881 return (a, out)
882
883
884@array_function_dispatch(_nancumprod_dispatcher)
885def nancumprod(a, axis=None, dtype=None, out=None):
886 """
887 Return the cumulative product of array elements over a given axis treating Not a
888 Numbers (NaNs) as one. The cumulative product does not change when NaNs are
889 encountered and leading NaNs are replaced by ones.
890
891 Ones are returned for slices that are all-NaN or empty.
892
893 .. versionadded:: 1.12.0
894
895 Parameters
896 ----------
897 a : array_like
898 Input array.
899 axis : int, optional
900 Axis along which the cumulative product is computed. By default
901 the input is flattened.
902 dtype : dtype, optional
903 Type of the returned array, as well as of the accumulator in which
904 the elements are multiplied. If *dtype* is not specified, it
905 defaults to the dtype of `a`, unless `a` has an integer dtype with
906 a precision less than that of the default platform integer. In
907 that case, the default platform integer is used instead.
908 out : ndarray, optional
909 Alternative output array in which to place the result. It must
910 have the same shape and buffer length as the expected output
911 but the type of the resulting values will be cast if necessary.
912
913 Returns
914 -------
915 nancumprod : ndarray
916 A new array holding the result is returned unless `out` is
917 specified, in which case it is returned.
918
919 See Also
920 --------
921 numpy.cumprod : Cumulative product across array propagating NaNs.
922 isnan : Show which elements are NaN.
923
924 Examples
925 --------
926 >>> np.nancumprod(1)
927 array([1])
928 >>> np.nancumprod([1])
929 array([1])
930 >>> np.nancumprod([1, np.nan])
931 array([1., 1.])
932 >>> a = np.array([[1, 2], [3, np.nan]])
933 >>> np.nancumprod(a)
934 array([1., 2., 6., 6.])
935 >>> np.nancumprod(a, axis=0)
936 array([[1., 2.],
937 [3., 2.]])
938 >>> np.nancumprod(a, axis=1)
939 array([[1., 2.],
940 [3., 3.]])
941
942 """
943 a, mask = _replace_nan(a, 1)
944 return np.cumprod(a, axis=axis, dtype=dtype, out=out)
945
946
947def _nanmean_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None,
948 *, where=None):
949 return (a, out)
950
951
952@array_function_dispatch(_nanmean_dispatcher)
953def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
954 *, where=np._NoValue):
955 """
956 Compute the arithmetic mean along the specified axis, ignoring NaNs.
957
958 Returns the average of the array elements. The average is taken over
959 the flattened array by default, otherwise over the specified axis.
960 `float64` intermediate and return values are used for integer inputs.
961
962 For all-NaN slices, NaN is returned and a `RuntimeWarning` is raised.
963
964 .. versionadded:: 1.8.0
965
966 Parameters
967 ----------
968 a : array_like
969 Array containing numbers whose mean is desired. If `a` is not an
970 array, a conversion is attempted.
971 axis : {int, tuple of int, None}, optional
972 Axis or axes along which the means are computed. The default is to compute
973 the mean of the flattened array.
974 dtype : data-type, optional
975 Type to use in computing the mean. For integer inputs, the default
976 is `float64`; for inexact inputs, it is the same as the input
977 dtype.
978 out : ndarray, optional
979 Alternate output array in which to place the result. The default
980 is ``None``; if provided, it must have the same shape as the
981 expected output, but the type will be cast if necessary. See
982 :ref:`ufuncs-output-type` for more details.
983 keepdims : bool, optional
984 If this is set to True, the axes which are reduced are left
985 in the result as dimensions with size one. With this option,
986 the result will broadcast correctly against the original `a`.
987
988 If the value is anything but the default, then
989 `keepdims` will be passed through to the `mean` or `sum` methods
990 of sub-classes of `ndarray`. If the sub-classes methods
991 does not implement `keepdims` any exceptions will be raised.
992 where : array_like of bool, optional
993 Elements to include in the mean. See `~numpy.ufunc.reduce` for details.
994
995 .. versionadded:: 1.22.0
996
997 Returns
998 -------
999 m : ndarray, see dtype parameter above
1000 If `out=None`, returns a new array containing the mean values,
1001 otherwise a reference to the output array is returned. Nan is
1002 returned for slices that contain only NaNs.
1003
1004 See Also
1005 --------
1006 average : Weighted average
1007 mean : Arithmetic mean taken while not ignoring NaNs
1008 var, nanvar
1009
1010 Notes
1011 -----
1012 The arithmetic mean is the sum of the non-NaN elements along the axis
1013 divided by the number of non-NaN elements.
1014
1015 Note that for floating-point input, the mean is computed using the same
1016 precision the input has. Depending on the input data, this can cause
1017 the results to be inaccurate, especially for `float32`. Specifying a
1018 higher-precision accumulator using the `dtype` keyword can alleviate
1019 this issue.
1020
1021 Examples
1022 --------
1023 >>> a = np.array([[1, np.nan], [3, 4]])
1024 >>> np.nanmean(a)
1025 2.6666666666666665
1026 >>> np.nanmean(a, axis=0)
1027 array([2., 4.])
1028 >>> np.nanmean(a, axis=1)
1029 array([1., 3.5]) # may vary
1030
1031 """
1032 arr, mask = _replace_nan(a, 0)
1033 if mask is None:
1034 return np.mean(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
1035 where=where)
1036
1037 if dtype is not None:
1038 dtype = np.dtype(dtype)
1039 if dtype is not None and not issubclass(dtype.type, np.inexact):
1040 raise TypeError("If a is inexact, then dtype must be inexact")
1041 if out is not None and not issubclass(out.dtype.type, np.inexact):
1042 raise TypeError("If a is inexact, then out must be inexact")
1043
1044 cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=keepdims,
1045 where=where)
1046 tot = np.sum(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
1047 where=where)
1048 avg = _divide_by_count(tot, cnt, out=out)
1049
1050 isbad = (cnt == 0)
1051 if isbad.any():
1052 warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=2)
1053 # NaN is the only possible bad value, so no further
1054 # action is needed to handle bad results.
1055 return avg
1056
1057
1058def _nanmedian1d(arr1d, overwrite_input=False):
1059 """
1060 Private function for rank 1 arrays. Compute the median ignoring NaNs.
1061 See nanmedian for parameter usage
1062 """
1063 arr1d_parsed, overwrite_input = _remove_nan_1d(
1064 arr1d, overwrite_input=overwrite_input,
1065 )
1066
1067 if arr1d_parsed.size == 0:
1068 # Ensure that a nan-esque scalar of the appropriate type (and unit)
1069 # is returned for `timedelta64` and `complexfloating`
1070 return arr1d[-1]
1071
1072 return np.median(arr1d_parsed, overwrite_input=overwrite_input)
1073
1074
1075def _nanmedian(a, axis=None, out=None, overwrite_input=False):
1076 """
1077 Private function that doesn't support extended axis or keepdims.
1078 These methods are extended to this function using _ureduce
1079 See nanmedian for parameter usage
1080
1081 """
1082 if axis is None or a.ndim == 1:
1083 part = a.ravel()
1084 if out is None:
1085 return _nanmedian1d(part, overwrite_input)
1086 else:
1087 out[...] = _nanmedian1d(part, overwrite_input)
1088 return out
1089 else:
1090 # for small medians use sort + indexing which is still faster than
1091 # apply_along_axis
1092 # benchmarked with shuffled (50, 50, x) containing a few NaN
1093 if a.shape[axis] < 600:
1094 return _nanmedian_small(a, axis, out, overwrite_input)
1095 result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input)
1096 if out is not None:
1097 out[...] = result
1098 return result
1099
1100
1101def _nanmedian_small(a, axis=None, out=None, overwrite_input=False):
1102 """
1103 sort + indexing median, faster for small medians along multiple
1104 dimensions due to the high overhead of apply_along_axis
1105
1106 see nanmedian for parameter usage
1107 """
1108 a = np.ma.masked_array(a, np.isnan(a))
1109 m = np.ma.median(a, axis=axis, overwrite_input=overwrite_input)
1110 for i in range(np.count_nonzero(m.mask.ravel())):
1111 warnings.warn("All-NaN slice encountered", RuntimeWarning,
1112 stacklevel=5)
1113
1114 fill_value = np.timedelta64("NaT") if m.dtype.kind == "m" else np.nan
1115 if out is not None:
1116 out[...] = m.filled(fill_value)
1117 return out
1118 return m.filled(fill_value)
1119
1120
1121def _nanmedian_dispatcher(
1122 a, axis=None, out=None, overwrite_input=None, keepdims=None):
1123 return (a, out)
1124
1125
1126@array_function_dispatch(_nanmedian_dispatcher)
1127def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValue):
1128 """
1129 Compute the median along the specified axis, while ignoring NaNs.
1130
1131 Returns the median of the array elements.
1132
1133 .. versionadded:: 1.9.0
1134
1135 Parameters
1136 ----------
1137 a : array_like
1138 Input array or object that can be converted to an array.
1139 axis : {int, sequence of int, None}, optional
1140 Axis or axes along which the medians are computed. The default
1141 is to compute the median along a flattened version of the array.
1142 A sequence of axes is supported since version 1.9.0.
1143 out : ndarray, optional
1144 Alternative output array in which to place the result. It must
1145 have the same shape and buffer length as the expected output,
1146 but the type (of the output) will be cast if necessary.
1147 overwrite_input : bool, optional
1148 If True, then allow use of memory of input array `a` for
1149 calculations. The input array will be modified by the call to
1150 `median`. This will save memory when you do not need to preserve
1151 the contents of the input array. Treat the input as undefined,
1152 but it will probably be fully or partially sorted. Default is
1153 False. If `overwrite_input` is ``True`` and `a` is not already an
1154 `ndarray`, an error will be raised.
1155 keepdims : bool, optional
1156 If this is set to True, the axes which are reduced are left
1157 in the result as dimensions with size one. With this option,
1158 the result will broadcast correctly against the original `a`.
1159
1160 If this is anything but the default value it will be passed
1161 through (in the special case of an empty array) to the
1162 `mean` function of the underlying array. If the array is
1163 a sub-class and `mean` does not have the kwarg `keepdims` this
1164 will raise a RuntimeError.
1165
1166 Returns
1167 -------
1168 median : ndarray
1169 A new array holding the result. If the input contains integers
1170 or floats smaller than ``float64``, then the output data-type is
1171 ``np.float64``. Otherwise, the data-type of the output is the
1172 same as that of the input. If `out` is specified, that array is
1173 returned instead.
1174
1175 See Also
1176 --------
1177 mean, median, percentile
1178
1179 Notes
1180 -----
1181 Given a vector ``V`` of length ``N``, the median of ``V`` is the
1182 middle value of a sorted copy of ``V``, ``V_sorted`` - i.e.,
1183 ``V_sorted[(N-1)/2]``, when ``N`` is odd and the average of the two
1184 middle values of ``V_sorted`` when ``N`` is even.
1185
1186 Examples
1187 --------
1188 >>> a = np.array([[10.0, 7, 4], [3, 2, 1]])
1189 >>> a[0, 1] = np.nan
1190 >>> a
1191 array([[10., nan, 4.],
1192 [ 3., 2., 1.]])
1193 >>> np.median(a)
1194 nan
1195 >>> np.nanmedian(a)
1196 3.0
1197 >>> np.nanmedian(a, axis=0)
1198 array([6.5, 2. , 2.5])
1199 >>> np.median(a, axis=1)
1200 array([nan, 2.])
1201 >>> b = a.copy()
1202 >>> np.nanmedian(b, axis=1, overwrite_input=True)
1203 array([7., 2.])
1204 >>> assert not np.all(a==b)
1205 >>> b = a.copy()
1206 >>> np.nanmedian(b, axis=None, overwrite_input=True)
1207 3.0
1208 >>> assert not np.all(a==b)
1209
1210 """
1211 a = np.asanyarray(a)
1212 # apply_along_axis in _nanmedian doesn't handle empty arrays well,
1213 # so deal them upfront
1214 if a.size == 0:
1215 return np.nanmean(a, axis, out=out, keepdims=keepdims)
1216
1217 return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,
1218 axis=axis, out=out,
1219 overwrite_input=overwrite_input)
1220
1221
1222def _nanpercentile_dispatcher(
1223 a, q, axis=None, out=None, overwrite_input=None,
1224 method=None, keepdims=None, *, interpolation=None):
1225 return (a, q, out)
1226
1227
1228@array_function_dispatch(_nanpercentile_dispatcher)
1229def nanpercentile(
1230 a,
1231 q,
1232 axis=None,
1233 out=None,
1234 overwrite_input=False,
1235 method="linear",
1236 keepdims=np._NoValue,
1237 *,
1238 interpolation=None,
1239):
1240 """
1241 Compute the qth percentile of the data along the specified axis,
1242 while ignoring nan values.
1243
1244 Returns the qth percentile(s) of the array elements.
1245
1246 .. versionadded:: 1.9.0
1247
1248 Parameters
1249 ----------
1250 a : array_like
1251 Input array or object that can be converted to an array, containing
1252 nan values to be ignored.
1253 q : array_like of float
1254 Percentile or sequence of percentiles to compute, which must be
1255 between 0 and 100 inclusive.
1256 axis : {int, tuple of int, None}, optional
1257 Axis or axes along which the percentiles are computed. The default
1258 is to compute the percentile(s) along a flattened version of the
1259 array.
1260 out : ndarray, optional
1261 Alternative output array in which to place the result. It must have
1262 the same shape and buffer length as the expected output, but the
1263 type (of the output) will be cast if necessary.
1264 overwrite_input : bool, optional
1265 If True, then allow the input array `a` to be modified by
1266 intermediate calculations, to save memory. In this case, the
1267 contents of the input `a` after this function completes is
1268 undefined.
1269 method : str, optional
1270 This parameter specifies the method to use for estimating the
1271 percentile. There are many different methods, some unique to NumPy.
1272 See the notes for explanation. The options sorted by their R type
1273 as summarized in the H&F paper [1]_ are:
1274
1275 1. 'inverted_cdf'
1276 2. 'averaged_inverted_cdf'
1277 3. 'closest_observation'
1278 4. 'interpolated_inverted_cdf'
1279 5. 'hazen'
1280 6. 'weibull'
1281 7. 'linear' (default)
1282 8. 'median_unbiased'
1283 9. 'normal_unbiased'
1284
1285 The first three methods are discontinuous. NumPy further defines the
1286 following discontinuous variations of the default 'linear' (7.) option:
1287
1288 * 'lower'
1289 * 'higher',
1290 * 'midpoint'
1291 * 'nearest'
1292
1293 .. versionchanged:: 1.22.0
1294 This argument was previously called "interpolation" and only
1295 offered the "linear" default and last four options.
1296
1297 keepdims : bool, optional
1298 If this is set to True, the axes which are reduced are left in
1299 the result as dimensions with size one. With this option, the
1300 result will broadcast correctly against the original array `a`.
1301
1302 If this is anything but the default value it will be passed
1303 through (in the special case of an empty array) to the
1304 `mean` function of the underlying array. If the array is
1305 a sub-class and `mean` does not have the kwarg `keepdims` this
1306 will raise a RuntimeError.
1307
1308 interpolation : str, optional
1309 Deprecated name for the method keyword argument.
1310
1311 .. deprecated:: 1.22.0
1312
1313 Returns
1314 -------
1315 percentile : scalar or ndarray
1316 If `q` is a single percentile and `axis=None`, then the result
1317 is a scalar. If multiple percentiles are given, first axis of
1318 the result corresponds to the percentiles. The other axes are
1319 the axes that remain after the reduction of `a`. If the input
1320 contains integers or floats smaller than ``float64``, the output
1321 data-type is ``float64``. Otherwise, the output data-type is the
1322 same as that of the input. If `out` is specified, that array is
1323 returned instead.
1324
1325 See Also
1326 --------
1327 nanmean
1328 nanmedian : equivalent to ``nanpercentile(..., 50)``
1329 percentile, median, mean
1330 nanquantile : equivalent to nanpercentile, except q in range [0, 1].
1331
1332 Notes
1333 -----
1334 For more information please see `numpy.percentile`
1335
1336 Examples
1337 --------
1338 >>> a = np.array([[10., 7., 4.], [3., 2., 1.]])
1339 >>> a[0][1] = np.nan
1340 >>> a
1341 array([[10., nan, 4.],
1342 [ 3., 2., 1.]])
1343 >>> np.percentile(a, 50)
1344 nan
1345 >>> np.nanpercentile(a, 50)
1346 3.0
1347 >>> np.nanpercentile(a, 50, axis=0)
1348 array([6.5, 2. , 2.5])
1349 >>> np.nanpercentile(a, 50, axis=1, keepdims=True)
1350 array([[7.],
1351 [2.]])
1352 >>> m = np.nanpercentile(a, 50, axis=0)
1353 >>> out = np.zeros_like(m)
1354 >>> np.nanpercentile(a, 50, axis=0, out=out)
1355 array([6.5, 2. , 2.5])
1356 >>> m
1357 array([6.5, 2. , 2.5])
1358
1359 >>> b = a.copy()
1360 >>> np.nanpercentile(b, 50, axis=1, overwrite_input=True)
1361 array([7., 2.])
1362 >>> assert not np.all(a==b)
1363
1364 References
1365 ----------
1366 .. [1] R. J. Hyndman and Y. Fan,
1367 "Sample quantiles in statistical packages,"
1368 The American Statistician, 50(4), pp. 361-365, 1996
1369
1370 """
1371 if interpolation is not None:
1372 method = function_base._check_interpolation_as_method(
1373 method, interpolation, "nanpercentile")
1374
1375 a = np.asanyarray(a)
1376 if a.dtype.kind == "c":
1377 raise TypeError("a must be an array of real numbers")
1378
1379 q = np.true_divide(q, 100.0)
1380 # undo any decay that the ufunc performed (see gh-13105)
1381 q = np.asanyarray(q)
1382 if not function_base._quantile_is_valid(q):
1383 raise ValueError("Percentiles must be in the range [0, 100]")
1384 return _nanquantile_unchecked(
1385 a, q, axis, out, overwrite_input, method, keepdims)
1386
1387
1388def _nanquantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
1389 method=None, keepdims=None, *, interpolation=None):
1390 return (a, q, out)
1391
1392
1393@array_function_dispatch(_nanquantile_dispatcher)
1394def nanquantile(
1395 a,
1396 q,
1397 axis=None,
1398 out=None,
1399 overwrite_input=False,
1400 method="linear",
1401 keepdims=np._NoValue,
1402 *,
1403 interpolation=None,
1404):
1405 """
1406 Compute the qth quantile of the data along the specified axis,
1407 while ignoring nan values.
1408 Returns the qth quantile(s) of the array elements.
1409
1410 .. versionadded:: 1.15.0
1411
1412 Parameters
1413 ----------
1414 a : array_like
1415 Input array or object that can be converted to an array, containing
1416 nan values to be ignored
1417 q : array_like of float
1418 Probability or sequence of probabilities for the quantiles to compute.
1419 Values must be between 0 and 1 inclusive.
1420 axis : {int, tuple of int, None}, optional
1421 Axis or axes along which the quantiles are computed. The
1422 default is to compute the quantile(s) along a flattened
1423 version of the array.
1424 out : ndarray, optional
1425 Alternative output array in which to place the result. It must
1426 have the same shape and buffer length as the expected output,
1427 but the type (of the output) will be cast if necessary.
1428 overwrite_input : bool, optional
1429 If True, then allow the input array `a` to be modified by intermediate
1430 calculations, to save memory. In this case, the contents of the input
1431 `a` after this function completes is undefined.
1432 method : str, optional
1433 This parameter specifies the method to use for estimating the
1434 quantile. There are many different methods, some unique to NumPy.
1435 See the notes for explanation. The options sorted by their R type
1436 as summarized in the H&F paper [1]_ are:
1437
1438 1. 'inverted_cdf'
1439 2. 'averaged_inverted_cdf'
1440 3. 'closest_observation'
1441 4. 'interpolated_inverted_cdf'
1442 5. 'hazen'
1443 6. 'weibull'
1444 7. 'linear' (default)
1445 8. 'median_unbiased'
1446 9. 'normal_unbiased'
1447
1448 The first three methods are discontinuous. NumPy further defines the
1449 following discontinuous variations of the default 'linear' (7.) option:
1450
1451 * 'lower'
1452 * 'higher',
1453 * 'midpoint'
1454 * 'nearest'
1455
1456 .. versionchanged:: 1.22.0
1457 This argument was previously called "interpolation" and only
1458 offered the "linear" default and last four options.
1459
1460 keepdims : bool, optional
1461 If this is set to True, the axes which are reduced are left in
1462 the result as dimensions with size one. With this option, the
1463 result will broadcast correctly against the original array `a`.
1464
1465 If this is anything but the default value it will be passed
1466 through (in the special case of an empty array) to the
1467 `mean` function of the underlying array. If the array is
1468 a sub-class and `mean` does not have the kwarg `keepdims` this
1469 will raise a RuntimeError.
1470
1471 interpolation : str, optional
1472 Deprecated name for the method keyword argument.
1473
1474 .. deprecated:: 1.22.0
1475
1476 Returns
1477 -------
1478 quantile : scalar or ndarray
1479 If `q` is a single probability and `axis=None`, then the result
1480 is a scalar. If multiple probability levels are given, first axis of
1481 the result corresponds to the quantiles. The other axes are
1482 the axes that remain after the reduction of `a`. If the input
1483 contains integers or floats smaller than ``float64``, the output
1484 data-type is ``float64``. Otherwise, the output data-type is the
1485 same as that of the input. If `out` is specified, that array is
1486 returned instead.
1487
1488 See Also
1489 --------
1490 quantile
1491 nanmean, nanmedian
1492 nanmedian : equivalent to ``nanquantile(..., 0.5)``
1493 nanpercentile : same as nanquantile, but with q in the range [0, 100].
1494
1495 Notes
1496 -----
1497 For more information please see `numpy.quantile`
1498
1499 Examples
1500 --------
1501 >>> a = np.array([[10., 7., 4.], [3., 2., 1.]])
1502 >>> a[0][1] = np.nan
1503 >>> a
1504 array([[10., nan, 4.],
1505 [ 3., 2., 1.]])
1506 >>> np.quantile(a, 0.5)
1507 nan
1508 >>> np.nanquantile(a, 0.5)
1509 3.0
1510 >>> np.nanquantile(a, 0.5, axis=0)
1511 array([6.5, 2. , 2.5])
1512 >>> np.nanquantile(a, 0.5, axis=1, keepdims=True)
1513 array([[7.],
1514 [2.]])
1515 >>> m = np.nanquantile(a, 0.5, axis=0)
1516 >>> out = np.zeros_like(m)
1517 >>> np.nanquantile(a, 0.5, axis=0, out=out)
1518 array([6.5, 2. , 2.5])
1519 >>> m
1520 array([6.5, 2. , 2.5])
1521 >>> b = a.copy()
1522 >>> np.nanquantile(b, 0.5, axis=1, overwrite_input=True)
1523 array([7., 2.])
1524 >>> assert not np.all(a==b)
1525
1526 References
1527 ----------
1528 .. [1] R. J. Hyndman and Y. Fan,
1529 "Sample quantiles in statistical packages,"
1530 The American Statistician, 50(4), pp. 361-365, 1996
1531
1532 """
1533
1534 if interpolation is not None:
1535 method = function_base._check_interpolation_as_method(
1536 method, interpolation, "nanquantile")
1537
1538 a = np.asanyarray(a)
1539 if a.dtype.kind == "c":
1540 raise TypeError("a must be an array of real numbers")
1541
1542 q = np.asanyarray(q)
1543 if not function_base._quantile_is_valid(q):
1544 raise ValueError("Quantiles must be in the range [0, 1]")
1545 return _nanquantile_unchecked(
1546 a, q, axis, out, overwrite_input, method, keepdims)
1547
1548
1549def _nanquantile_unchecked(
1550 a,
1551 q,
1552 axis=None,
1553 out=None,
1554 overwrite_input=False,
1555 method="linear",
1556 keepdims=np._NoValue,
1557):
1558 """Assumes that q is in [0, 1], and is an ndarray"""
1559 # apply_along_axis in _nanpercentile doesn't handle empty arrays well,
1560 # so deal them upfront
1561 if a.size == 0:
1562 return np.nanmean(a, axis, out=out, keepdims=keepdims)
1563 return function_base._ureduce(a,
1564 func=_nanquantile_ureduce_func,
1565 q=q,
1566 keepdims=keepdims,
1567 axis=axis,
1568 out=out,
1569 overwrite_input=overwrite_input,
1570 method=method)
1571
1572
1573def _nanquantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False,
1574 method="linear"):
1575 """
1576 Private function that doesn't support extended axis or keepdims.
1577 These methods are extended to this function using _ureduce
1578 See nanpercentile for parameter usage
1579 """
1580 if axis is None or a.ndim == 1:
1581 part = a.ravel()
1582 result = _nanquantile_1d(part, q, overwrite_input, method)
1583 else:
1584 result = np.apply_along_axis(_nanquantile_1d, axis, a, q,
1585 overwrite_input, method)
1586 # apply_along_axis fills in collapsed axis with results.
1587 # Move that axis to the beginning to match percentile's
1588 # convention.
1589 if q.ndim != 0:
1590 result = np.moveaxis(result, axis, 0)
1591
1592 if out is not None:
1593 out[...] = result
1594 return result
1595
1596
1597def _nanquantile_1d(arr1d, q, overwrite_input=False, method="linear"):
1598 """
1599 Private function for rank 1 arrays. Compute quantile ignoring NaNs.
1600 See nanpercentile for parameter usage
1601 """
1602 arr1d, overwrite_input = _remove_nan_1d(arr1d,
1603 overwrite_input=overwrite_input)
1604 if arr1d.size == 0:
1605 # convert to scalar
1606 return np.full(q.shape, np.nan, dtype=arr1d.dtype)[()]
1607
1608 return function_base._quantile_unchecked(
1609 arr1d, q, overwrite_input=overwrite_input, method=method)
1610
1611
1612def _nanvar_dispatcher(a, axis=None, dtype=None, out=None, ddof=None,
1613 keepdims=None, *, where=None):
1614 return (a, out)
1615
1616
1617@array_function_dispatch(_nanvar_dispatcher)
1618def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue,
1619 *, where=np._NoValue):
1620 """
1621 Compute the variance along the specified axis, while ignoring NaNs.
1622
1623 Returns the variance of the array elements, a measure of the spread of
1624 a distribution. The variance is computed for the flattened array by
1625 default, otherwise over the specified axis.
1626
1627 For all-NaN slices or slices with zero degrees of freedom, NaN is
1628 returned and a `RuntimeWarning` is raised.
1629
1630 .. versionadded:: 1.8.0
1631
1632 Parameters
1633 ----------
1634 a : array_like
1635 Array containing numbers whose variance is desired. If `a` is not an
1636 array, a conversion is attempted.
1637 axis : {int, tuple of int, None}, optional
1638 Axis or axes along which the variance is computed. The default is to compute
1639 the variance of the flattened array.
1640 dtype : data-type, optional
1641 Type to use in computing the variance. For arrays of integer type
1642 the default is `float64`; for arrays of float types it is the same as
1643 the array type.
1644 out : ndarray, optional
1645 Alternate output array in which to place the result. It must have
1646 the same shape as the expected output, but the type is cast if
1647 necessary.
1648 ddof : int, optional
1649 "Delta Degrees of Freedom": the divisor used in the calculation is
1650 ``N - ddof``, where ``N`` represents the number of non-NaN
1651 elements. By default `ddof` is zero.
1652 keepdims : bool, optional
1653 If this is set to True, the axes which are reduced are left
1654 in the result as dimensions with size one. With this option,
1655 the result will broadcast correctly against the original `a`.
1656 where : array_like of bool, optional
1657 Elements to include in the variance. See `~numpy.ufunc.reduce` for
1658 details.
1659
1660 .. versionadded:: 1.22.0
1661
1662 Returns
1663 -------
1664 variance : ndarray, see dtype parameter above
1665 If `out` is None, return a new array containing the variance,
1666 otherwise return a reference to the output array. If ddof is >= the
1667 number of non-NaN elements in a slice or the slice contains only
1668 NaNs, then the result for that slice is NaN.
1669
1670 See Also
1671 --------
1672 std : Standard deviation
1673 mean : Average
1674 var : Variance while not ignoring NaNs
1675 nanstd, nanmean
1676 :ref:`ufuncs-output-type`
1677
1678 Notes
1679 -----
1680 The variance is the average of the squared deviations from the mean,
1681 i.e., ``var = mean(abs(x - x.mean())**2)``.
1682
1683 The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``.
1684 If, however, `ddof` is specified, the divisor ``N - ddof`` is used
1685 instead. In standard statistical practice, ``ddof=1`` provides an
1686 unbiased estimator of the variance of a hypothetical infinite
1687 population. ``ddof=0`` provides a maximum likelihood estimate of the
1688 variance for normally distributed variables.
1689
1690 Note that for complex numbers, the absolute value is taken before
1691 squaring, so that the result is always real and nonnegative.
1692
1693 For floating-point input, the variance is computed using the same
1694 precision the input has. Depending on the input data, this can cause
1695 the results to be inaccurate, especially for `float32` (see example
1696 below). Specifying a higher-accuracy accumulator using the ``dtype``
1697 keyword can alleviate this issue.
1698
1699 For this function to work on sub-classes of ndarray, they must define
1700 `sum` with the kwarg `keepdims`
1701
1702 Examples
1703 --------
1704 >>> a = np.array([[1, np.nan], [3, 4]])
1705 >>> np.nanvar(a)
1706 1.5555555555555554
1707 >>> np.nanvar(a, axis=0)
1708 array([1., 0.])
1709 >>> np.nanvar(a, axis=1)
1710 array([0., 0.25]) # may vary
1711
1712 """
1713 arr, mask = _replace_nan(a, 0)
1714 if mask is None:
1715 return np.var(arr, axis=axis, dtype=dtype, out=out, ddof=ddof,
1716 keepdims=keepdims, where=where)
1717
1718 if dtype is not None:
1719 dtype = np.dtype(dtype)
1720 if dtype is not None and not issubclass(dtype.type, np.inexact):
1721 raise TypeError("If a is inexact, then dtype must be inexact")
1722 if out is not None and not issubclass(out.dtype.type, np.inexact):
1723 raise TypeError("If a is inexact, then out must be inexact")
1724
1725 # Compute mean
1726 if type(arr) is np.matrix:
1727 _keepdims = np._NoValue
1728 else:
1729 _keepdims = True
1730 # we need to special case matrix for reverse compatibility
1731 # in order for this to work, these sums need to be called with
1732 # keepdims=True, however matrix now raises an error in this case, but
1733 # the reason that it drops the keepdims kwarg is to force keepdims=True
1734 # so this used to work by serendipity.
1735 cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=_keepdims,
1736 where=where)
1737 avg = np.sum(arr, axis=axis, dtype=dtype, keepdims=_keepdims, where=where)
1738 avg = _divide_by_count(avg, cnt)
1739
1740 # Compute squared deviation from mean.
1741 np.subtract(arr, avg, out=arr, casting='unsafe', where=where)
1742 arr = _copyto(arr, 0, mask)
1743 if issubclass(arr.dtype.type, np.complexfloating):
1744 sqr = np.multiply(arr, arr.conj(), out=arr, where=where).real
1745 else:
1746 sqr = np.multiply(arr, arr, out=arr, where=where)
1747
1748 # Compute variance.
1749 var = np.sum(sqr, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
1750 where=where)
1751
1752 # Precaution against reduced object arrays
1753 try:
1754 var_ndim = var.ndim
1755 except AttributeError:
1756 var_ndim = np.ndim(var)
1757 if var_ndim < cnt.ndim:
1758 # Subclasses of ndarray may ignore keepdims, so check here.
1759 cnt = cnt.squeeze(axis)
1760 dof = cnt - ddof
1761 var = _divide_by_count(var, dof)
1762
1763 isbad = (dof <= 0)
1764 if np.any(isbad):
1765 warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning,
1766 stacklevel=2)
1767 # NaN, inf, or negative numbers are all possible bad
1768 # values, so explicitly replace them with NaN.
1769 var = _copyto(var, np.nan, isbad)
1770 return var
1771
1772
1773def _nanstd_dispatcher(a, axis=None, dtype=None, out=None, ddof=None,
1774 keepdims=None, *, where=None):
1775 return (a, out)
1776
1777
1778@array_function_dispatch(_nanstd_dispatcher)
1779def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue,
1780 *, where=np._NoValue):
1781 """
1782 Compute the standard deviation along the specified axis, while
1783 ignoring NaNs.
1784
1785 Returns the standard deviation, a measure of the spread of a
1786 distribution, of the non-NaN array elements. The standard deviation is
1787 computed for the flattened array by default, otherwise over the
1788 specified axis.
1789
1790 For all-NaN slices or slices with zero degrees of freedom, NaN is
1791 returned and a `RuntimeWarning` is raised.
1792
1793 .. versionadded:: 1.8.0
1794
1795 Parameters
1796 ----------
1797 a : array_like
1798 Calculate the standard deviation of the non-NaN values.
1799 axis : {int, tuple of int, None}, optional
1800 Axis or axes along which the standard deviation is computed. The default is
1801 to compute the standard deviation of the flattened array.
1802 dtype : dtype, optional
1803 Type to use in computing the standard deviation. For arrays of
1804 integer type the default is float64, for arrays of float types it
1805 is the same as the array type.
1806 out : ndarray, optional
1807 Alternative output array in which to place the result. It must have
1808 the same shape as the expected output but the type (of the
1809 calculated values) will be cast if necessary.
1810 ddof : int, optional
1811 Means Delta Degrees of Freedom. The divisor used in calculations
1812 is ``N - ddof``, where ``N`` represents the number of non-NaN
1813 elements. By default `ddof` is zero.
1814
1815 keepdims : bool, optional
1816 If this is set to True, the axes which are reduced are left
1817 in the result as dimensions with size one. With this option,
1818 the result will broadcast correctly against the original `a`.
1819
1820 If this value is anything but the default it is passed through
1821 as-is to the relevant functions of the sub-classes. If these
1822 functions do not have a `keepdims` kwarg, a RuntimeError will
1823 be raised.
1824 where : array_like of bool, optional
1825 Elements to include in the standard deviation.
1826 See `~numpy.ufunc.reduce` for details.
1827
1828 .. versionadded:: 1.22.0
1829
1830 Returns
1831 -------
1832 standard_deviation : ndarray, see dtype parameter above.
1833 If `out` is None, return a new array containing the standard
1834 deviation, otherwise return a reference to the output array. If
1835 ddof is >= the number of non-NaN elements in a slice or the slice
1836 contains only NaNs, then the result for that slice is NaN.
1837
1838 See Also
1839 --------
1840 var, mean, std
1841 nanvar, nanmean
1842 :ref:`ufuncs-output-type`
1843
1844 Notes
1845 -----
1846 The standard deviation is the square root of the average of the squared
1847 deviations from the mean: ``std = sqrt(mean(abs(x - x.mean())**2))``.
1848
1849 The average squared deviation is normally calculated as
1850 ``x.sum() / N``, where ``N = len(x)``. If, however, `ddof` is
1851 specified, the divisor ``N - ddof`` is used instead. In standard
1852 statistical practice, ``ddof=1`` provides an unbiased estimator of the
1853 variance of the infinite population. ``ddof=0`` provides a maximum
1854 likelihood estimate of the variance for normally distributed variables.
1855 The standard deviation computed in this function is the square root of
1856 the estimated variance, so even with ``ddof=1``, it will not be an
1857 unbiased estimate of the standard deviation per se.
1858
1859 Note that, for complex numbers, `std` takes the absolute value before
1860 squaring, so that the result is always real and nonnegative.
1861
1862 For floating-point input, the *std* is computed using the same
1863 precision the input has. Depending on the input data, this can cause
1864 the results to be inaccurate, especially for float32 (see example
1865 below). Specifying a higher-accuracy accumulator using the `dtype`
1866 keyword can alleviate this issue.
1867
1868 Examples
1869 --------
1870 >>> a = np.array([[1, np.nan], [3, 4]])
1871 >>> np.nanstd(a)
1872 1.247219128924647
1873 >>> np.nanstd(a, axis=0)
1874 array([1., 0.])
1875 >>> np.nanstd(a, axis=1)
1876 array([0., 0.5]) # may vary
1877
1878 """
1879 var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
1880 keepdims=keepdims, where=where)
1881 if isinstance(var, np.ndarray):
1882 std = np.sqrt(var, out=var)
1883 elif hasattr(var, 'dtype'):
1884 std = var.dtype.type(np.sqrt(var))
1885 else:
1886 std = np.sqrt(var)
1887 return std