1"""
2Functions that ignore NaN.
3
4Functions
5---------
6
7- `nanmin` -- minimum non-NaN value
8- `nanmax` -- maximum non-NaN value
9- `nanargmin` -- index of minimum non-NaN value
10- `nanargmax` -- index of maximum non-NaN value
11- `nansum` -- sum of non-NaN values
12- `nanprod` -- product of non-NaN values
13- `nancumsum` -- cumulative sum of non-NaN values
14- `nancumprod` -- cumulative product of non-NaN values
15- `nanmean` -- mean of non-NaN values
16- `nanvar` -- variance of non-NaN values
17- `nanstd` -- standard deviation of non-NaN values
18- `nanmedian` -- median of non-NaN values
19- `nanquantile` -- qth quantile of non-NaN values
20- `nanpercentile` -- qth percentile of non-NaN values
21
22"""
23import functools
24import warnings
25
26import numpy as np
27import numpy._core.numeric as _nx
28from numpy._core import overrides
29from numpy.lib import _function_base_impl as fnb
30from numpy.lib._function_base_impl import _weights_are_valid
31
32array_function_dispatch = functools.partial(
33 overrides.array_function_dispatch, module='numpy')
34
35
36__all__ = [
37 'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean',
38 'nanmedian', 'nanpercentile', 'nanvar', 'nanstd', 'nanprod',
39 'nancumsum', 'nancumprod', 'nanquantile'
40 ]
41
42
43def _nan_mask(a, out=None):
44 """
45 Parameters
46 ----------
47 a : array-like
48 Input array with at least 1 dimension.
49 out : ndarray, optional
50 Alternate output array in which to place the result. The default
51 is ``None``; if provided, it must have the same shape as the
52 expected output and will prevent the allocation of a new array.
53
54 Returns
55 -------
56 y : bool ndarray or True
57 A bool array where ``np.nan`` positions are marked with ``False``
58 and other positions are marked with ``True``. If the type of ``a``
59 is such that it can't possibly contain ``np.nan``, returns ``True``.
60 """
61 # we assume that a is an array for this private function
62
63 if a.dtype.kind not in 'fc':
64 return True
65
66 y = np.isnan(a, out=out)
67 y = np.invert(y, out=y)
68 return y
69
70def _replace_nan(a, val):
71 """
72 If `a` is of inexact type, make a copy of `a`, replace NaNs with
73 the `val` value, and return the copy together with a boolean mask
74 marking the locations where NaNs were present. If `a` is not of
75 inexact type, do nothing and return `a` together with a mask of None.
76
77 Note that scalars will end up as array scalars, which is important
78 for using the result as the value of the out argument in some
79 operations.
80
81 Parameters
82 ----------
83 a : array-like
84 Input array.
85 val : float
86 NaN values are set to val before doing the operation.
87
88 Returns
89 -------
90 y : ndarray
91 If `a` is of inexact type, return a copy of `a` with the NaNs
92 replaced by the fill value, otherwise return `a`.
93 mask: {bool, None}
94 If `a` is of inexact type, return a boolean mask marking locations of
95 NaNs, otherwise return None.
96
97 """
98 a = np.asanyarray(a)
99
100 if a.dtype == np.object_:
101 # object arrays do not support `isnan` (gh-9009), so make a guess
102 mask = np.not_equal(a, a, dtype=bool)
103 elif issubclass(a.dtype.type, np.inexact):
104 mask = np.isnan(a)
105 else:
106 mask = None
107
108 if mask is not None:
109 a = np.array(a, subok=True, copy=True)
110 np.copyto(a, val, where=mask)
111
112 return a, mask
113
114
115def _copyto(a, val, mask):
116 """
117 Replace values in `a` with NaN where `mask` is True. This differs from
118 copyto in that it will deal with the case where `a` is a numpy scalar.
119
120 Parameters
121 ----------
122 a : ndarray or numpy scalar
123 Array or numpy scalar some of whose values are to be replaced
124 by val.
125 val : numpy scalar
126 Value used a replacement.
127 mask : ndarray, scalar
128 Boolean array. Where True the corresponding element of `a` is
129 replaced by `val`. Broadcasts.
130
131 Returns
132 -------
133 res : ndarray, scalar
134 Array with elements replaced or scalar `val`.
135
136 """
137 if isinstance(a, np.ndarray):
138 np.copyto(a, val, where=mask, casting='unsafe')
139 else:
140 a = a.dtype.type(val)
141 return a
142
143
144def _remove_nan_1d(arr1d, second_arr1d=None, overwrite_input=False):
145 """
146 Equivalent to arr1d[~arr1d.isnan()], but in a different order
147
148 Presumably faster as it incurs fewer copies
149
150 Parameters
151 ----------
152 arr1d : ndarray
153 Array to remove nans from
154 second_arr1d : ndarray or None
155 A second array which will have the same positions removed as arr1d.
156 overwrite_input : bool
157 True if `arr1d` can be modified in place
158
159 Returns
160 -------
161 res : ndarray
162 Array with nan elements removed
163 second_res : ndarray or None
164 Second array with nan element positions of first array removed.
165 overwrite_input : bool
166 True if `res` can be modified in place, given the constraint on the
167 input
168 """
169 if arr1d.dtype == object:
170 # object arrays do not support `isnan` (gh-9009), so make a guess
171 c = np.not_equal(arr1d, arr1d, dtype=bool)
172 else:
173 c = np.isnan(arr1d)
174
175 s = np.nonzero(c)[0]
176 if s.size == arr1d.size:
177 warnings.warn("All-NaN slice encountered", RuntimeWarning,
178 stacklevel=6)
179 if second_arr1d is None:
180 return arr1d[:0], None, True
181 else:
182 return arr1d[:0], second_arr1d[:0], True
183 elif s.size == 0:
184 return arr1d, second_arr1d, overwrite_input
185 else:
186 if not overwrite_input:
187 arr1d = arr1d.copy()
188 # select non-nans at end of array
189 enonan = arr1d[-s.size:][~c[-s.size:]]
190 # fill nans in beginning of array with non-nans of end
191 arr1d[s[:enonan.size]] = enonan
192
193 if second_arr1d is None:
194 return arr1d[:-s.size], None, True
195 else:
196 if not overwrite_input:
197 second_arr1d = second_arr1d.copy()
198 enonan = second_arr1d[-s.size:][~c[-s.size:]]
199 second_arr1d[s[:enonan.size]] = enonan
200
201 return arr1d[:-s.size], second_arr1d[:-s.size], True
202
203
204def _divide_by_count(a, b, out=None):
205 """
206 Compute a/b ignoring invalid results. If `a` is an array the division
207 is done in place. If `a` is a scalar, then its type is preserved in the
208 output. If out is None, then a is used instead so that the division
209 is in place. Note that this is only called with `a` an inexact type.
210
211 Parameters
212 ----------
213 a : {ndarray, numpy scalar}
214 Numerator. Expected to be of inexact type but not checked.
215 b : {ndarray, numpy scalar}
216 Denominator.
217 out : ndarray, optional
218 Alternate output array in which to place the result. The default
219 is ``None``; if provided, it must have the same shape as the
220 expected output, but the type will be cast if necessary.
221
222 Returns
223 -------
224 ret : {ndarray, numpy scalar}
225 The return value is a/b. If `a` was an ndarray the division is done
226 in place. If `a` is a numpy scalar, the division preserves its type.
227
228 """
229 with np.errstate(invalid='ignore', divide='ignore'):
230 if isinstance(a, np.ndarray):
231 if out is None:
232 return np.divide(a, b, out=a, casting='unsafe')
233 else:
234 return np.divide(a, b, out=out, casting='unsafe')
235 elif out is None:
236 # Precaution against reduced object arrays
237 try:
238 return a.dtype.type(a / b)
239 except AttributeError:
240 return a / b
241 else:
242 # This is questionable, but currently a numpy scalar can
243 # be output to a zero dimensional array.
244 return np.divide(a, b, out=out, casting='unsafe')
245
246
247def _nanmin_dispatcher(a, axis=None, out=None, keepdims=None,
248 initial=None, where=None):
249 return (a, out)
250
251
252@array_function_dispatch(_nanmin_dispatcher)
253def nanmin(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue,
254 where=np._NoValue):
255 """
256 Return minimum of an array or minimum along an axis, ignoring any NaNs.
257 When all-NaN slices are encountered a ``RuntimeWarning`` is raised and
258 Nan is returned for that slice.
259
260 Parameters
261 ----------
262 a : array_like
263 Array containing numbers whose minimum is desired. If `a` is not an
264 array, a conversion is attempted.
265 axis : {int, tuple of int, None}, optional
266 Axis or axes along which the minimum is computed. The default is to compute
267 the minimum of the flattened array.
268 out : ndarray, optional
269 Alternate output array in which to place the result. The default
270 is ``None``; if provided, it must have the same shape as the
271 expected output, but the type will be cast if necessary. See
272 :ref:`ufuncs-output-type` for more details.
273 keepdims : bool, optional
274 If this is set to True, the axes which are reduced are left
275 in the result as dimensions with size one. With this option,
276 the result will broadcast correctly against the original `a`.
277
278 If the value is anything but the default, then
279 `keepdims` will be passed through to the `min` method
280 of sub-classes of `ndarray`. If the sub-classes methods
281 does not implement `keepdims` any exceptions will be raised.
282 initial : scalar, optional
283 The maximum value of an output element. Must be present to allow
284 computation on empty slice. See `~numpy.ufunc.reduce` for details.
285
286 .. versionadded:: 1.22.0
287 where : array_like of bool, optional
288 Elements to compare for the minimum. See `~numpy.ufunc.reduce`
289 for details.
290
291 .. versionadded:: 1.22.0
292
293 Returns
294 -------
295 nanmin : ndarray
296 An array with the same shape as `a`, with the specified axis
297 removed. If `a` is a 0-d array, or if axis is None, an ndarray
298 scalar is returned. The same dtype as `a` is returned.
299
300 See Also
301 --------
302 nanmax :
303 The maximum value of an array along a given axis, ignoring any NaNs.
304 amin :
305 The minimum value of an array along a given axis, propagating any NaNs.
306 fmin :
307 Element-wise minimum of two arrays, ignoring any NaNs.
308 minimum :
309 Element-wise minimum of two arrays, propagating any NaNs.
310 isnan :
311 Shows which elements are Not a Number (NaN).
312 isfinite:
313 Shows which elements are neither NaN nor infinity.
314
315 amax, fmax, maximum
316
317 Notes
318 -----
319 NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic
320 (IEEE 754). This means that Not a Number is not equivalent to infinity.
321 Positive infinity is treated as a very large number and negative
322 infinity is treated as a very small (i.e. negative) number.
323
324 If the input has a integer type the function is equivalent to np.min.
325
326 Examples
327 --------
328 >>> import numpy as np
329 >>> a = np.array([[1, 2], [3, np.nan]])
330 >>> np.nanmin(a)
331 1.0
332 >>> np.nanmin(a, axis=0)
333 array([1., 2.])
334 >>> np.nanmin(a, axis=1)
335 array([1., 3.])
336
337 When positive infinity and negative infinity are present:
338
339 >>> np.nanmin([1, 2, np.nan, np.inf])
340 1.0
341 >>> np.nanmin([1, 2, np.nan, -np.inf])
342 -inf
343
344 """
345 kwargs = {}
346 if keepdims is not np._NoValue:
347 kwargs['keepdims'] = keepdims
348 if initial is not np._NoValue:
349 kwargs['initial'] = initial
350 if where is not np._NoValue:
351 kwargs['where'] = where
352
353 if (type(a) is np.ndarray or type(a) is np.memmap) and a.dtype != np.object_:
354 # Fast, but not safe for subclasses of ndarray, or object arrays,
355 # which do not implement isnan (gh-9009), or fmin correctly (gh-8975)
356 res = np.fmin.reduce(a, axis=axis, out=out, **kwargs)
357 if np.isnan(res).any():
358 warnings.warn("All-NaN slice encountered", RuntimeWarning,
359 stacklevel=2)
360 else:
361 # Slow, but safe for subclasses of ndarray
362 a, mask = _replace_nan(a, +np.inf)
363 res = np.amin(a, axis=axis, out=out, **kwargs)
364 if mask is None:
365 return res
366
367 # Check for all-NaN axis
368 kwargs.pop("initial", None)
369 mask = np.all(mask, axis=axis, **kwargs)
370 if np.any(mask):
371 res = _copyto(res, np.nan, mask)
372 warnings.warn("All-NaN axis encountered", RuntimeWarning,
373 stacklevel=2)
374 return res
375
376
377def _nanmax_dispatcher(a, axis=None, out=None, keepdims=None,
378 initial=None, where=None):
379 return (a, out)
380
381
382@array_function_dispatch(_nanmax_dispatcher)
383def nanmax(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue,
384 where=np._NoValue):
385 """
386 Return the maximum of an array or maximum along an axis, ignoring any
387 NaNs. When all-NaN slices are encountered a ``RuntimeWarning`` is
388 raised and NaN is returned for that slice.
389
390 Parameters
391 ----------
392 a : array_like
393 Array containing numbers whose maximum is desired. If `a` is not an
394 array, a conversion is attempted.
395 axis : {int, tuple of int, None}, optional
396 Axis or axes along which the maximum is computed. The default is to compute
397 the maximum of the flattened array.
398 out : ndarray, optional
399 Alternate output array in which to place the result. The default
400 is ``None``; if provided, it must have the same shape as the
401 expected output, but the type will be cast if necessary. See
402 :ref:`ufuncs-output-type` for more details.
403 keepdims : bool, optional
404 If this is set to True, the axes which are reduced are left
405 in the result as dimensions with size one. With this option,
406 the result will broadcast correctly against the original `a`.
407 If the value is anything but the default, then
408 `keepdims` will be passed through to the `max` method
409 of sub-classes of `ndarray`. If the sub-classes methods
410 does not implement `keepdims` any exceptions will be raised.
411 initial : scalar, optional
412 The minimum value of an output element. Must be present to allow
413 computation on empty slice. See `~numpy.ufunc.reduce` for details.
414
415 .. versionadded:: 1.22.0
416 where : array_like of bool, optional
417 Elements to compare for the maximum. See `~numpy.ufunc.reduce`
418 for details.
419
420 .. versionadded:: 1.22.0
421
422 Returns
423 -------
424 nanmax : ndarray
425 An array with the same shape as `a`, with the specified axis removed.
426 If `a` is a 0-d array, or if axis is None, an ndarray scalar is
427 returned. The same dtype as `a` is returned.
428
429 See Also
430 --------
431 nanmin :
432 The minimum value of an array along a given axis, ignoring any NaNs.
433 amax :
434 The maximum value of an array along a given axis, propagating any NaNs.
435 fmax :
436 Element-wise maximum of two arrays, ignoring any NaNs.
437 maximum :
438 Element-wise maximum of two arrays, propagating any NaNs.
439 isnan :
440 Shows which elements are Not a Number (NaN).
441 isfinite:
442 Shows which elements are neither NaN nor infinity.
443
444 amin, fmin, minimum
445
446 Notes
447 -----
448 NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic
449 (IEEE 754). This means that Not a Number is not equivalent to infinity.
450 Positive infinity is treated as a very large number and negative
451 infinity is treated as a very small (i.e. negative) number.
452
453 If the input has a integer type the function is equivalent to np.max.
454
455 Examples
456 --------
457 >>> import numpy as np
458 >>> a = np.array([[1, 2], [3, np.nan]])
459 >>> np.nanmax(a)
460 3.0
461 >>> np.nanmax(a, axis=0)
462 array([3., 2.])
463 >>> np.nanmax(a, axis=1)
464 array([2., 3.])
465
466 When positive infinity and negative infinity are present:
467
468 >>> np.nanmax([1, 2, np.nan, -np.inf])
469 2.0
470 >>> np.nanmax([1, 2, np.nan, np.inf])
471 inf
472
473 """
474 kwargs = {}
475 if keepdims is not np._NoValue:
476 kwargs['keepdims'] = keepdims
477 if initial is not np._NoValue:
478 kwargs['initial'] = initial
479 if where is not np._NoValue:
480 kwargs['where'] = where
481
482 if (type(a) is np.ndarray or type(a) is np.memmap) and a.dtype != np.object_:
483 # Fast, but not safe for subclasses of ndarray, or object arrays,
484 # which do not implement isnan (gh-9009), or fmax correctly (gh-8975)
485 res = np.fmax.reduce(a, axis=axis, out=out, **kwargs)
486 if np.isnan(res).any():
487 warnings.warn("All-NaN slice encountered", RuntimeWarning,
488 stacklevel=2)
489 else:
490 # Slow, but safe for subclasses of ndarray
491 a, mask = _replace_nan(a, -np.inf)
492 res = np.amax(a, axis=axis, out=out, **kwargs)
493 if mask is None:
494 return res
495
496 # Check for all-NaN axis
497 kwargs.pop("initial", None)
498 mask = np.all(mask, axis=axis, **kwargs)
499 if np.any(mask):
500 res = _copyto(res, np.nan, mask)
501 warnings.warn("All-NaN axis encountered", RuntimeWarning,
502 stacklevel=2)
503 return res
504
505
506def _nanargmin_dispatcher(a, axis=None, out=None, *, keepdims=None):
507 return (a,)
508
509
510@array_function_dispatch(_nanargmin_dispatcher)
511def nanargmin(a, axis=None, out=None, *, keepdims=np._NoValue):
512 """
513 Return the indices of the minimum values in the specified axis ignoring
514 NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the results
515 cannot be trusted if a slice contains only NaNs and Infs.
516
517 Parameters
518 ----------
519 a : array_like
520 Input data.
521 axis : int, optional
522 Axis along which to operate. By default flattened input is used.
523 out : array, optional
524 If provided, the result will be inserted into this array. It should
525 be of the appropriate shape and dtype.
526
527 .. versionadded:: 1.22.0
528 keepdims : bool, optional
529 If this is set to True, the axes which are reduced are left
530 in the result as dimensions with size one. With this option,
531 the result will broadcast correctly against the array.
532
533 .. versionadded:: 1.22.0
534
535 Returns
536 -------
537 index_array : ndarray
538 An array of indices or a single index value.
539
540 See Also
541 --------
542 argmin, nanargmax
543
544 Examples
545 --------
546 >>> import numpy as np
547 >>> a = np.array([[np.nan, 4], [2, 3]])
548 >>> np.argmin(a)
549 0
550 >>> np.nanargmin(a)
551 2
552 >>> np.nanargmin(a, axis=0)
553 array([1, 1])
554 >>> np.nanargmin(a, axis=1)
555 array([1, 0])
556
557 """
558 a, mask = _replace_nan(a, np.inf)
559 if mask is not None and mask.size:
560 mask = np.all(mask, axis=axis)
561 if np.any(mask):
562 raise ValueError("All-NaN slice encountered")
563 res = np.argmin(a, axis=axis, out=out, keepdims=keepdims)
564 return res
565
566
567def _nanargmax_dispatcher(a, axis=None, out=None, *, keepdims=None):
568 return (a,)
569
570
571@array_function_dispatch(_nanargmax_dispatcher)
572def nanargmax(a, axis=None, out=None, *, keepdims=np._NoValue):
573 """
574 Return the indices of the maximum values in the specified axis ignoring
575 NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the
576 results cannot be trusted if a slice contains only NaNs and -Infs.
577
578
579 Parameters
580 ----------
581 a : array_like
582 Input data.
583 axis : int, optional
584 Axis along which to operate. By default flattened input is used.
585 out : array, optional
586 If provided, the result will be inserted into this array. It should
587 be of the appropriate shape and dtype.
588
589 .. versionadded:: 1.22.0
590 keepdims : bool, optional
591 If this is set to True, the axes which are reduced are left
592 in the result as dimensions with size one. With this option,
593 the result will broadcast correctly against the array.
594
595 .. versionadded:: 1.22.0
596
597 Returns
598 -------
599 index_array : ndarray
600 An array of indices or a single index value.
601
602 See Also
603 --------
604 argmax, nanargmin
605
606 Examples
607 --------
608 >>> import numpy as np
609 >>> a = np.array([[np.nan, 4], [2, 3]])
610 >>> np.argmax(a)
611 0
612 >>> np.nanargmax(a)
613 1
614 >>> np.nanargmax(a, axis=0)
615 array([1, 0])
616 >>> np.nanargmax(a, axis=1)
617 array([1, 1])
618
619 """
620 a, mask = _replace_nan(a, -np.inf)
621 if mask is not None and mask.size:
622 mask = np.all(mask, axis=axis)
623 if np.any(mask):
624 raise ValueError("All-NaN slice encountered")
625 res = np.argmax(a, axis=axis, out=out, keepdims=keepdims)
626 return res
627
628
629def _nansum_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None,
630 initial=None, where=None):
631 return (a, out)
632
633
634@array_function_dispatch(_nansum_dispatcher)
635def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
636 initial=np._NoValue, where=np._NoValue):
637 """
638 Return the sum of array elements over a given axis treating Not a
639 Numbers (NaNs) as zero.
640
641 In NumPy versions <= 1.9.0 Nan is returned for slices that are all-NaN or
642 empty. In later versions zero is returned.
643
644 Parameters
645 ----------
646 a : array_like
647 Array containing numbers whose sum is desired. If `a` is not an
648 array, a conversion is attempted.
649 axis : {int, tuple of int, None}, optional
650 Axis or axes along which the sum is computed. The default is to compute the
651 sum of the flattened array.
652 dtype : data-type, optional
653 The type of the returned array and of the accumulator in which the
654 elements are summed. By default, the dtype of `a` is used. An
655 exception is when `a` has an integer type with less precision than
656 the platform (u)intp. In that case, the default will be either
657 (u)int32 or (u)int64 depending on whether the platform is 32 or 64
658 bits. For inexact inputs, dtype must be inexact.
659 out : ndarray, optional
660 Alternate output array in which to place the result. The default
661 is ``None``. If provided, it must have the same shape as the
662 expected output, but the type will be cast if necessary. See
663 :ref:`ufuncs-output-type` for more details. The casting of NaN to integer
664 can yield unexpected results.
665 keepdims : bool, optional
666 If this is set to True, the axes which are reduced are left
667 in the result as dimensions with size one. With this option,
668 the result will broadcast correctly against the original `a`.
669
670 If the value is anything but the default, then
671 `keepdims` will be passed through to the `mean` or `sum` methods
672 of sub-classes of `ndarray`. If the sub-classes methods
673 does not implement `keepdims` any exceptions will be raised.
674 initial : scalar, optional
675 Starting value for the sum. See `~numpy.ufunc.reduce` for details.
676
677 .. versionadded:: 1.22.0
678 where : array_like of bool, optional
679 Elements to include in the sum. See `~numpy.ufunc.reduce` for details.
680
681 .. versionadded:: 1.22.0
682
683 Returns
684 -------
685 nansum : ndarray.
686 A new array holding the result is returned unless `out` is
687 specified, in which it is returned. The result has the same
688 size as `a`, and the same shape as `a` if `axis` is not None
689 or `a` is a 1-d array.
690
691 See Also
692 --------
693 numpy.sum : Sum across array propagating NaNs.
694 isnan : Show which elements are NaN.
695 isfinite : Show which elements are not NaN or +/-inf.
696
697 Notes
698 -----
699 If both positive and negative infinity are present, the sum will be Not
700 A Number (NaN).
701
702 Examples
703 --------
704 >>> import numpy as np
705 >>> np.nansum(1)
706 1
707 >>> np.nansum([1])
708 1
709 >>> np.nansum([1, np.nan])
710 1.0
711 >>> a = np.array([[1, 1], [1, np.nan]])
712 >>> np.nansum(a)
713 3.0
714 >>> np.nansum(a, axis=0)
715 array([2., 1.])
716 >>> np.nansum([1, np.nan, np.inf])
717 inf
718 >>> np.nansum([1, np.nan, -np.inf])
719 -inf
720 >>> with np.errstate(invalid="ignore"):
721 ... np.nansum([1, np.nan, np.inf, -np.inf]) # both +/- infinity present
722 np.float64(nan)
723
724 """
725 a, mask = _replace_nan(a, 0)
726 return np.sum(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
727 initial=initial, where=where)
728
729
730def _nanprod_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None,
731 initial=None, where=None):
732 return (a, out)
733
734
735@array_function_dispatch(_nanprod_dispatcher)
736def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
737 initial=np._NoValue, where=np._NoValue):
738 """
739 Return the product of array elements over a given axis treating Not a
740 Numbers (NaNs) as ones.
741
742 One is returned for slices that are all-NaN or empty.
743
744 Parameters
745 ----------
746 a : array_like
747 Array containing numbers whose product is desired. If `a` is not an
748 array, a conversion is attempted.
749 axis : {int, tuple of int, None}, optional
750 Axis or axes along which the product is computed. The default is to compute
751 the product of the flattened array.
752 dtype : data-type, optional
753 The type of the returned array and of the accumulator in which the
754 elements are summed. By default, the dtype of `a` is used. An
755 exception is when `a` has an integer type with less precision than
756 the platform (u)intp. In that case, the default will be either
757 (u)int32 or (u)int64 depending on whether the platform is 32 or 64
758 bits. For inexact inputs, dtype must be inexact.
759 out : ndarray, optional
760 Alternate output array in which to place the result. The default
761 is ``None``. If provided, it must have the same shape as the
762 expected output, but the type will be cast if necessary. See
763 :ref:`ufuncs-output-type` for more details. The casting of NaN to integer
764 can yield unexpected results.
765 keepdims : bool, optional
766 If True, the axes which are reduced are left in the result as
767 dimensions with size one. With this option, the result will
768 broadcast correctly against the original `arr`.
769 initial : scalar, optional
770 The starting value for this product. See `~numpy.ufunc.reduce`
771 for details.
772
773 .. versionadded:: 1.22.0
774 where : array_like of bool, optional
775 Elements to include in the product. See `~numpy.ufunc.reduce`
776 for details.
777
778 .. versionadded:: 1.22.0
779
780 Returns
781 -------
782 nanprod : ndarray
783 A new array holding the result is returned unless `out` is
784 specified, in which case it is returned.
785
786 See Also
787 --------
788 numpy.prod : Product across array propagating NaNs.
789 isnan : Show which elements are NaN.
790
791 Examples
792 --------
793 >>> import numpy as np
794 >>> np.nanprod(1)
795 1
796 >>> np.nanprod([1])
797 1
798 >>> np.nanprod([1, np.nan])
799 1.0
800 >>> a = np.array([[1, 2], [3, np.nan]])
801 >>> np.nanprod(a)
802 6.0
803 >>> np.nanprod(a, axis=0)
804 array([3., 2.])
805
806 """
807 a, mask = _replace_nan(a, 1)
808 return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
809 initial=initial, where=where)
810
811
812def _nancumsum_dispatcher(a, axis=None, dtype=None, out=None):
813 return (a, out)
814
815
816@array_function_dispatch(_nancumsum_dispatcher)
817def nancumsum(a, axis=None, dtype=None, out=None):
818 """
819 Return the cumulative sum of array elements over a given axis treating Not a
820 Numbers (NaNs) as zero. The cumulative sum does not change when NaNs are
821 encountered and leading NaNs are replaced by zeros.
822
823 Zeros are returned for slices that are all-NaN or empty.
824
825 Parameters
826 ----------
827 a : array_like
828 Input array.
829 axis : int, optional
830 Axis along which the cumulative sum is computed. The default
831 (None) is to compute the cumsum over the flattened array.
832 dtype : dtype, optional
833 Type of the returned array and of the accumulator in which the
834 elements are summed. If `dtype` is not specified, it defaults
835 to the dtype of `a`, unless `a` has an integer dtype with a
836 precision less than that of the default platform integer. In
837 that case, the default platform integer is used.
838 out : ndarray, optional
839 Alternative output array in which to place the result. It must
840 have the same shape and buffer length as the expected output
841 but the type will be cast if necessary. See :ref:`ufuncs-output-type` for
842 more details.
843
844 Returns
845 -------
846 nancumsum : ndarray.
847 A new array holding the result is returned unless `out` is
848 specified, in which it is returned. The result has the same
849 size as `a`, and the same shape as `a` if `axis` is not None
850 or `a` is a 1-d array.
851
852 See Also
853 --------
854 numpy.cumsum : Cumulative sum across array propagating NaNs.
855 isnan : Show which elements are NaN.
856
857 Examples
858 --------
859 >>> import numpy as np
860 >>> np.nancumsum(1)
861 array([1])
862 >>> np.nancumsum([1])
863 array([1])
864 >>> np.nancumsum([1, np.nan])
865 array([1., 1.])
866 >>> a = np.array([[1, 2], [3, np.nan]])
867 >>> np.nancumsum(a)
868 array([1., 3., 6., 6.])
869 >>> np.nancumsum(a, axis=0)
870 array([[1., 2.],
871 [4., 2.]])
872 >>> np.nancumsum(a, axis=1)
873 array([[1., 3.],
874 [3., 3.]])
875
876 """
877 a, mask = _replace_nan(a, 0)
878 return np.cumsum(a, axis=axis, dtype=dtype, out=out)
879
880
881def _nancumprod_dispatcher(a, axis=None, dtype=None, out=None):
882 return (a, out)
883
884
885@array_function_dispatch(_nancumprod_dispatcher)
886def nancumprod(a, axis=None, dtype=None, out=None):
887 """
888 Return the cumulative product of array elements over a given axis treating Not a
889 Numbers (NaNs) as one. The cumulative product does not change when NaNs are
890 encountered and leading NaNs are replaced by ones.
891
892 Ones are returned for slices that are all-NaN or empty.
893
894 Parameters
895 ----------
896 a : array_like
897 Input array.
898 axis : int, optional
899 Axis along which the cumulative product is computed. By default
900 the input is flattened.
901 dtype : dtype, optional
902 Type of the returned array, as well as of the accumulator in which
903 the elements are multiplied. If *dtype* is not specified, it
904 defaults to the dtype of `a`, unless `a` has an integer dtype with
905 a precision less than that of the default platform integer. In
906 that case, the default platform integer is used instead.
907 out : ndarray, optional
908 Alternative output array in which to place the result. It must
909 have the same shape and buffer length as the expected output
910 but the type of the resulting values will be cast if necessary.
911
912 Returns
913 -------
914 nancumprod : ndarray
915 A new array holding the result is returned unless `out` is
916 specified, in which case it is returned.
917
918 See Also
919 --------
920 numpy.cumprod : Cumulative product across array propagating NaNs.
921 isnan : Show which elements are NaN.
922
923 Examples
924 --------
925 >>> import numpy as np
926 >>> np.nancumprod(1)
927 array([1])
928 >>> np.nancumprod([1])
929 array([1])
930 >>> np.nancumprod([1, np.nan])
931 array([1., 1.])
932 >>> a = np.array([[1, 2], [3, np.nan]])
933 >>> np.nancumprod(a)
934 array([1., 2., 6., 6.])
935 >>> np.nancumprod(a, axis=0)
936 array([[1., 2.],
937 [3., 2.]])
938 >>> np.nancumprod(a, axis=1)
939 array([[1., 2.],
940 [3., 3.]])
941
942 """
943 a, mask = _replace_nan(a, 1)
944 return np.cumprod(a, axis=axis, dtype=dtype, out=out)
945
946
947def _nanmean_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None,
948 *, where=None):
949 return (a, out)
950
951
952@array_function_dispatch(_nanmean_dispatcher)
953def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
954 *, where=np._NoValue):
955 """
956 Compute the arithmetic mean along the specified axis, ignoring NaNs.
957
958 Returns the average of the array elements. The average is taken over
959 the flattened array by default, otherwise over the specified axis.
960 `float64` intermediate and return values are used for integer inputs.
961
962 For all-NaN slices, NaN is returned and a `RuntimeWarning` is raised.
963
964 Parameters
965 ----------
966 a : array_like
967 Array containing numbers whose mean is desired. If `a` is not an
968 array, a conversion is attempted.
969 axis : {int, tuple of int, None}, optional
970 Axis or axes along which the means are computed. The default is to compute
971 the mean of the flattened array.
972 dtype : data-type, optional
973 Type to use in computing the mean. For integer inputs, the default
974 is `float64`; for inexact inputs, it is the same as the input
975 dtype.
976 out : ndarray, optional
977 Alternate output array in which to place the result. The default
978 is ``None``; if provided, it must have the same shape as the
979 expected output, but the type will be cast if necessary.
980 See :ref:`ufuncs-output-type` for more details.
981 keepdims : bool, optional
982 If this is set to True, the axes which are reduced are left
983 in the result as dimensions with size one. With this option,
984 the result will broadcast correctly against the original `a`.
985
986 If the value is anything but the default, then
987 `keepdims` will be passed through to the `mean` or `sum` methods
988 of sub-classes of `ndarray`. If the sub-classes methods
989 does not implement `keepdims` any exceptions will be raised.
990 where : array_like of bool, optional
991 Elements to include in the mean. See `~numpy.ufunc.reduce` for details.
992
993 .. versionadded:: 1.22.0
994
995 Returns
996 -------
997 m : ndarray, see dtype parameter above
998 If `out=None`, returns a new array containing the mean values,
999 otherwise a reference to the output array is returned. Nan is
1000 returned for slices that contain only NaNs.
1001
1002 See Also
1003 --------
1004 average : Weighted average
1005 mean : Arithmetic mean taken while not ignoring NaNs
1006 var, nanvar
1007
1008 Notes
1009 -----
1010 The arithmetic mean is the sum of the non-NaN elements along the axis
1011 divided by the number of non-NaN elements.
1012
1013 Note that for floating-point input, the mean is computed using the same
1014 precision the input has. Depending on the input data, this can cause
1015 the results to be inaccurate, especially for `float32`. Specifying a
1016 higher-precision accumulator using the `dtype` keyword can alleviate
1017 this issue.
1018
1019 Examples
1020 --------
1021 >>> import numpy as np
1022 >>> a = np.array([[1, np.nan], [3, 4]])
1023 >>> np.nanmean(a)
1024 2.6666666666666665
1025 >>> np.nanmean(a, axis=0)
1026 array([2., 4.])
1027 >>> np.nanmean(a, axis=1)
1028 array([1., 3.5]) # may vary
1029
1030 """
1031 arr, mask = _replace_nan(a, 0)
1032 if mask is None:
1033 return np.mean(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
1034 where=where)
1035
1036 if dtype is not None:
1037 dtype = np.dtype(dtype)
1038 if dtype is not None and not issubclass(dtype.type, np.inexact):
1039 raise TypeError("If a is inexact, then dtype must be inexact")
1040 if out is not None and not issubclass(out.dtype.type, np.inexact):
1041 raise TypeError("If a is inexact, then out must be inexact")
1042
1043 cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=keepdims,
1044 where=where)
1045 tot = np.sum(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
1046 where=where)
1047 avg = _divide_by_count(tot, cnt, out=out)
1048
1049 isbad = (cnt == 0)
1050 if isbad.any():
1051 warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=2)
1052 # NaN is the only possible bad value, so no further
1053 # action is needed to handle bad results.
1054 return avg
1055
1056
1057def _nanmedian1d(arr1d, overwrite_input=False):
1058 """
1059 Private function for rank 1 arrays. Compute the median ignoring NaNs.
1060 See nanmedian for parameter usage
1061 """
1062 arr1d_parsed, _, overwrite_input = _remove_nan_1d(
1063 arr1d, overwrite_input=overwrite_input,
1064 )
1065
1066 if arr1d_parsed.size == 0:
1067 # Ensure that a nan-esque scalar of the appropriate type (and unit)
1068 # is returned for `timedelta64` and `complexfloating`
1069 return arr1d[-1]
1070
1071 return np.median(arr1d_parsed, overwrite_input=overwrite_input)
1072
1073
1074def _nanmedian(a, axis=None, out=None, overwrite_input=False):
1075 """
1076 Private function that doesn't support extended axis or keepdims.
1077 These methods are extended to this function using _ureduce
1078 See nanmedian for parameter usage
1079
1080 """
1081 if axis is None or a.ndim == 1:
1082 part = a.ravel()
1083 if out is None:
1084 return _nanmedian1d(part, overwrite_input)
1085 else:
1086 out[...] = _nanmedian1d(part, overwrite_input)
1087 return out
1088 else:
1089 # for small medians use sort + indexing which is still faster than
1090 # apply_along_axis
1091 # benchmarked with shuffled (50, 50, x) containing a few NaN
1092 if a.shape[axis] < 600:
1093 return _nanmedian_small(a, axis, out, overwrite_input)
1094 result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input)
1095 if out is not None:
1096 out[...] = result
1097 return result
1098
1099
1100def _nanmedian_small(a, axis=None, out=None, overwrite_input=False):
1101 """
1102 sort + indexing median, faster for small medians along multiple
1103 dimensions due to the high overhead of apply_along_axis
1104
1105 see nanmedian for parameter usage
1106 """
1107 a = np.ma.masked_array(a, np.isnan(a))
1108 m = np.ma.median(a, axis=axis, overwrite_input=overwrite_input)
1109 for i in range(np.count_nonzero(m.mask.ravel())):
1110 warnings.warn("All-NaN slice encountered", RuntimeWarning,
1111 stacklevel=5)
1112
1113 fill_value = np.timedelta64("NaT") if m.dtype.kind == "m" else np.nan
1114 if out is not None:
1115 out[...] = m.filled(fill_value)
1116 return out
1117 return m.filled(fill_value)
1118
1119
1120def _nanmedian_dispatcher(
1121 a, axis=None, out=None, overwrite_input=None, keepdims=None):
1122 return (a, out)
1123
1124
1125@array_function_dispatch(_nanmedian_dispatcher)
1126def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValue):
1127 """
1128 Compute the median along the specified axis, while ignoring NaNs.
1129
1130 Returns the median of the array elements.
1131
1132 Parameters
1133 ----------
1134 a : array_like
1135 Input array or object that can be converted to an array.
1136 axis : {int, sequence of int, None}, optional
1137 Axis or axes along which the medians are computed. The default
1138 is to compute the median along a flattened version of the array.
1139 A sequence of axes is supported since version 1.9.0.
1140 out : ndarray, optional
1141 Alternative output array in which to place the result. It must
1142 have the same shape and buffer length as the expected output,
1143 but the type (of the output) will be cast if necessary.
1144 overwrite_input : bool, optional
1145 If True, then allow use of memory of input array `a` for
1146 calculations. The input array will be modified by the call to
1147 `median`. This will save memory when you do not need to preserve
1148 the contents of the input array. Treat the input as undefined,
1149 but it will probably be fully or partially sorted. Default is
1150 False. If `overwrite_input` is ``True`` and `a` is not already an
1151 `ndarray`, an error will be raised.
1152 keepdims : bool, optional
1153 If this is set to True, the axes which are reduced are left
1154 in the result as dimensions with size one. With this option,
1155 the result will broadcast correctly against the original `a`.
1156
1157 If this is anything but the default value it will be passed
1158 through (in the special case of an empty array) to the
1159 `mean` function of the underlying array. If the array is
1160 a sub-class and `mean` does not have the kwarg `keepdims` this
1161 will raise a RuntimeError.
1162
1163 Returns
1164 -------
1165 median : ndarray
1166 A new array holding the result. If the input contains integers
1167 or floats smaller than ``float64``, then the output data-type is
1168 ``np.float64``. Otherwise, the data-type of the output is the
1169 same as that of the input. If `out` is specified, that array is
1170 returned instead.
1171
1172 See Also
1173 --------
1174 mean, median, percentile
1175
1176 Notes
1177 -----
1178 Given a vector ``V`` of length ``N``, the median of ``V`` is the
1179 middle value of a sorted copy of ``V``, ``V_sorted`` - i.e.,
1180 ``V_sorted[(N-1)/2]``, when ``N`` is odd and the average of the two
1181 middle values of ``V_sorted`` when ``N`` is even.
1182
1183 Examples
1184 --------
1185 >>> import numpy as np
1186 >>> a = np.array([[10.0, 7, 4], [3, 2, 1]])
1187 >>> a[0, 1] = np.nan
1188 >>> a
1189 array([[10., nan, 4.],
1190 [ 3., 2., 1.]])
1191 >>> np.median(a)
1192 np.float64(nan)
1193 >>> np.nanmedian(a)
1194 3.0
1195 >>> np.nanmedian(a, axis=0)
1196 array([6.5, 2. , 2.5])
1197 >>> np.median(a, axis=1)
1198 array([nan, 2.])
1199 >>> b = a.copy()
1200 >>> np.nanmedian(b, axis=1, overwrite_input=True)
1201 array([7., 2.])
1202 >>> assert not np.all(a==b)
1203 >>> b = a.copy()
1204 >>> np.nanmedian(b, axis=None, overwrite_input=True)
1205 3.0
1206 >>> assert not np.all(a==b)
1207
1208 """
1209 a = np.asanyarray(a)
1210 # apply_along_axis in _nanmedian doesn't handle empty arrays well,
1211 # so deal them upfront
1212 if a.size == 0:
1213 return np.nanmean(a, axis, out=out, keepdims=keepdims)
1214
1215 return fnb._ureduce(a, func=_nanmedian, keepdims=keepdims,
1216 axis=axis, out=out,
1217 overwrite_input=overwrite_input)
1218
1219
1220def _nanpercentile_dispatcher(
1221 a, q, axis=None, out=None, overwrite_input=None,
1222 method=None, keepdims=None, *, weights=None):
1223 return (a, q, out, weights)
1224
1225
1226@array_function_dispatch(_nanpercentile_dispatcher)
1227def nanpercentile(
1228 a,
1229 q,
1230 axis=None,
1231 out=None,
1232 overwrite_input=False,
1233 method="linear",
1234 keepdims=np._NoValue,
1235 *,
1236 weights=None,
1237):
1238 """
1239 Compute the qth percentile of the data along the specified axis,
1240 while ignoring nan values.
1241
1242 Returns the qth percentile(s) of the array elements.
1243
1244 Parameters
1245 ----------
1246 a : array_like
1247 Input array or object that can be converted to an array, containing
1248 nan values to be ignored.
1249 q : array_like of float
1250 Percentile or sequence of percentiles to compute, which must be
1251 between 0 and 100 inclusive.
1252 axis : {int, tuple of int, None}, optional
1253 Axis or axes along which the percentiles are computed. The default
1254 is to compute the percentile(s) along a flattened version of the
1255 array.
1256 out : ndarray, optional
1257 Alternative output array in which to place the result. It must have
1258 the same shape and buffer length as the expected output, but the
1259 type (of the output) will be cast if necessary.
1260 overwrite_input : bool, optional
1261 If True, then allow the input array `a` to be modified by
1262 intermediate calculations, to save memory. In this case, the
1263 contents of the input `a` after this function completes is
1264 undefined.
1265 method : str, optional
1266 This parameter specifies the method to use for estimating the
1267 percentile. There are many different methods, some unique to NumPy.
1268 See the notes for explanation. The options sorted by their R type
1269 as summarized in the H&F paper [1]_ are:
1270
1271 1. 'inverted_cdf'
1272 2. 'averaged_inverted_cdf'
1273 3. 'closest_observation'
1274 4. 'interpolated_inverted_cdf'
1275 5. 'hazen'
1276 6. 'weibull'
1277 7. 'linear' (default)
1278 8. 'median_unbiased'
1279 9. 'normal_unbiased'
1280
1281 The first three methods are discontinuous. NumPy further defines the
1282 following discontinuous variations of the default 'linear' (7.) option:
1283
1284 * 'lower'
1285 * 'higher',
1286 * 'midpoint'
1287 * 'nearest'
1288
1289 .. versionchanged:: 1.22.0
1290 This argument was previously called "interpolation" and only
1291 offered the "linear" default and last four options.
1292
1293 keepdims : bool, optional
1294 If this is set to True, the axes which are reduced are left in
1295 the result as dimensions with size one. With this option, the
1296 result will broadcast correctly against the original array `a`.
1297
1298 If this is anything but the default value it will be passed
1299 through (in the special case of an empty array) to the
1300 `mean` function of the underlying array. If the array is
1301 a sub-class and `mean` does not have the kwarg `keepdims` this
1302 will raise a RuntimeError.
1303
1304 weights : array_like, optional
1305 An array of weights associated with the values in `a`. Each value in
1306 `a` contributes to the percentile according to its associated weight.
1307 The weights array can either be 1-D (in which case its length must be
1308 the size of `a` along the given axis) or of the same shape as `a`.
1309 If `weights=None`, then all data in `a` are assumed to have a
1310 weight equal to one.
1311 Only `method="inverted_cdf"` supports weights.
1312
1313 .. versionadded:: 2.0.0
1314
1315 Returns
1316 -------
1317 percentile : scalar or ndarray
1318 If `q` is a single percentile and `axis=None`, then the result
1319 is a scalar. If multiple percentiles are given, first axis of
1320 the result corresponds to the percentiles. The other axes are
1321 the axes that remain after the reduction of `a`. If the input
1322 contains integers or floats smaller than ``float64``, the output
1323 data-type is ``float64``. Otherwise, the output data-type is the
1324 same as that of the input. If `out` is specified, that array is
1325 returned instead.
1326
1327 See Also
1328 --------
1329 nanmean
1330 nanmedian : equivalent to ``nanpercentile(..., 50)``
1331 percentile, median, mean
1332 nanquantile : equivalent to nanpercentile, except q in range [0, 1].
1333
1334 Notes
1335 -----
1336 The behavior of `numpy.nanpercentile` with percentage `q` is that of
1337 `numpy.quantile` with argument ``q/100`` (ignoring nan values).
1338 For more information, please see `numpy.quantile`.
1339
1340 Examples
1341 --------
1342 >>> import numpy as np
1343 >>> a = np.array([[10., 7., 4.], [3., 2., 1.]])
1344 >>> a[0][1] = np.nan
1345 >>> a
1346 array([[10., nan, 4.],
1347 [ 3., 2., 1.]])
1348 >>> np.percentile(a, 50)
1349 np.float64(nan)
1350 >>> np.nanpercentile(a, 50)
1351 3.0
1352 >>> np.nanpercentile(a, 50, axis=0)
1353 array([6.5, 2. , 2.5])
1354 >>> np.nanpercentile(a, 50, axis=1, keepdims=True)
1355 array([[7.],
1356 [2.]])
1357 >>> m = np.nanpercentile(a, 50, axis=0)
1358 >>> out = np.zeros_like(m)
1359 >>> np.nanpercentile(a, 50, axis=0, out=out)
1360 array([6.5, 2. , 2.5])
1361 >>> m
1362 array([6.5, 2. , 2.5])
1363
1364 >>> b = a.copy()
1365 >>> np.nanpercentile(b, 50, axis=1, overwrite_input=True)
1366 array([7., 2.])
1367 >>> assert not np.all(a==b)
1368
1369 References
1370 ----------
1371 .. [1] R. J. Hyndman and Y. Fan,
1372 "Sample quantiles in statistical packages,"
1373 The American Statistician, 50(4), pp. 361-365, 1996
1374
1375 """
1376 a = np.asanyarray(a)
1377 if a.dtype.kind == "c":
1378 raise TypeError("a must be an array of real numbers")
1379
1380 weak_q = type(q) in (int, float) # use weak promotion for final result type
1381 q = np.true_divide(q, 100, out=...)
1382 if not fnb._quantile_is_valid(q):
1383 raise ValueError("Percentiles must be in the range [0, 100]")
1384
1385 if weights is not None:
1386 if method != "inverted_cdf":
1387 msg = ("Only method 'inverted_cdf' supports weights. "
1388 f"Got: {method}.")
1389 raise ValueError(msg)
1390 if axis is not None:
1391 axis = _nx.normalize_axis_tuple(axis, a.ndim, argname="axis")
1392 weights = _weights_are_valid(weights=weights, a=a, axis=axis)
1393 if np.any(weights < 0):
1394 raise ValueError("Weights must be non-negative.")
1395
1396 return _nanquantile_unchecked(
1397 a, q, axis, out, overwrite_input, method, keepdims, weights, weak_q)
1398
1399
1400def _nanquantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
1401 method=None, keepdims=None, *, weights=None):
1402 return (a, q, out, weights)
1403
1404
1405@array_function_dispatch(_nanquantile_dispatcher)
1406def nanquantile(
1407 a,
1408 q,
1409 axis=None,
1410 out=None,
1411 overwrite_input=False,
1412 method="linear",
1413 keepdims=np._NoValue,
1414 *,
1415 weights=None,
1416):
1417 """
1418 Compute the qth quantile of the data along the specified axis,
1419 while ignoring nan values.
1420 Returns the qth quantile(s) of the array elements.
1421
1422 Parameters
1423 ----------
1424 a : array_like
1425 Input array or object that can be converted to an array, containing
1426 nan values to be ignored
1427 q : array_like of float
1428 Probability or sequence of probabilities for the quantiles to compute.
1429 Values must be between 0 and 1 inclusive.
1430 axis : {int, tuple of int, None}, optional
1431 Axis or axes along which the quantiles are computed. The
1432 default is to compute the quantile(s) along a flattened
1433 version of the array.
1434 out : ndarray, optional
1435 Alternative output array in which to place the result. It must
1436 have the same shape and buffer length as the expected output,
1437 but the type (of the output) will be cast if necessary.
1438 overwrite_input : bool, optional
1439 If True, then allow the input array `a` to be modified by intermediate
1440 calculations, to save memory. In this case, the contents of the input
1441 `a` after this function completes is undefined.
1442 method : str, optional
1443 This parameter specifies the method to use for estimating the
1444 quantile. There are many different methods, some unique to NumPy.
1445 See the notes for explanation. The options sorted by their R type
1446 as summarized in the H&F paper [1]_ are:
1447
1448 1. 'inverted_cdf'
1449 2. 'averaged_inverted_cdf'
1450 3. 'closest_observation'
1451 4. 'interpolated_inverted_cdf'
1452 5. 'hazen'
1453 6. 'weibull'
1454 7. 'linear' (default)
1455 8. 'median_unbiased'
1456 9. 'normal_unbiased'
1457
1458 The first three methods are discontinuous. NumPy further defines the
1459 following discontinuous variations of the default 'linear' (7.) option:
1460
1461 * 'lower'
1462 * 'higher',
1463 * 'midpoint'
1464 * 'nearest'
1465
1466 .. versionchanged:: 1.22.0
1467 This argument was previously called "interpolation" and only
1468 offered the "linear" default and last four options.
1469
1470 keepdims : bool, optional
1471 If this is set to True, the axes which are reduced are left in
1472 the result as dimensions with size one. With this option, the
1473 result will broadcast correctly against the original array `a`.
1474
1475 If this is anything but the default value it will be passed
1476 through (in the special case of an empty array) to the
1477 `mean` function of the underlying array. If the array is
1478 a sub-class and `mean` does not have the kwarg `keepdims` this
1479 will raise a RuntimeError.
1480
1481 weights : array_like, optional
1482 An array of weights associated with the values in `a`. Each value in
1483 `a` contributes to the quantile according to its associated weight.
1484 The weights array can either be 1-D (in which case its length must be
1485 the size of `a` along the given axis) or of the same shape as `a`.
1486 If `weights=None`, then all data in `a` are assumed to have a
1487 weight equal to one.
1488 Only `method="inverted_cdf"` supports weights.
1489
1490 .. versionadded:: 2.0.0
1491
1492 Returns
1493 -------
1494 quantile : scalar or ndarray
1495 If `q` is a single probability and `axis=None`, then the result
1496 is a scalar. If multiple probability levels are given, first axis of
1497 the result corresponds to the quantiles. The other axes are
1498 the axes that remain after the reduction of `a`. If the input
1499 contains integers or floats smaller than ``float64``, the output
1500 data-type is ``float64``. Otherwise, the output data-type is the
1501 same as that of the input. If `out` is specified, that array is
1502 returned instead.
1503
1504 See Also
1505 --------
1506 quantile
1507 nanmean, nanmedian
1508 nanmedian : equivalent to ``nanquantile(..., 0.5)``
1509 nanpercentile : same as nanquantile, but with q in the range [0, 100].
1510
1511 Notes
1512 -----
1513 The behavior of `numpy.nanquantile` is the same as that of
1514 `numpy.quantile` (ignoring nan values).
1515 For more information, please see `numpy.quantile`.
1516
1517 Examples
1518 --------
1519 >>> import numpy as np
1520 >>> a = np.array([[10., 7., 4.], [3., 2., 1.]])
1521 >>> a[0][1] = np.nan
1522 >>> a
1523 array([[10., nan, 4.],
1524 [ 3., 2., 1.]])
1525 >>> np.quantile(a, 0.5)
1526 np.float64(nan)
1527 >>> np.nanquantile(a, 0.5)
1528 3.0
1529 >>> np.nanquantile(a, 0.5, axis=0)
1530 array([6.5, 2. , 2.5])
1531 >>> np.nanquantile(a, 0.5, axis=1, keepdims=True)
1532 array([[7.],
1533 [2.]])
1534 >>> m = np.nanquantile(a, 0.5, axis=0)
1535 >>> out = np.zeros_like(m)
1536 >>> np.nanquantile(a, 0.5, axis=0, out=out)
1537 array([6.5, 2. , 2.5])
1538 >>> m
1539 array([6.5, 2. , 2.5])
1540 >>> b = a.copy()
1541 >>> np.nanquantile(b, 0.5, axis=1, overwrite_input=True)
1542 array([7., 2.])
1543 >>> assert not np.all(a==b)
1544
1545 References
1546 ----------
1547 .. [1] R. J. Hyndman and Y. Fan,
1548 "Sample quantiles in statistical packages,"
1549 The American Statistician, 50(4), pp. 361-365, 1996
1550
1551 """
1552 a = np.asanyarray(a)
1553 if a.dtype.kind == "c":
1554 raise TypeError("a must be an array of real numbers")
1555
1556 weak_q = type(q) in (int, float) # use weak promotion for final result type
1557 q = np.asanyarray(q)
1558
1559 if not fnb._quantile_is_valid(q):
1560 raise ValueError("Quantiles must be in the range [0, 1]")
1561
1562 if weights is not None:
1563 if method != "inverted_cdf":
1564 msg = ("Only method 'inverted_cdf' supports weights. "
1565 f"Got: {method}.")
1566 raise ValueError(msg)
1567 if axis is not None:
1568 axis = _nx.normalize_axis_tuple(axis, a.ndim, argname="axis")
1569 weights = _weights_are_valid(weights=weights, a=a, axis=axis)
1570 if np.any(weights < 0):
1571 raise ValueError("Weights must be non-negative.")
1572
1573 return _nanquantile_unchecked(
1574 a, q, axis, out, overwrite_input, method, keepdims, weights, weak_q)
1575
1576
1577def _nanquantile_unchecked(
1578 a,
1579 q,
1580 axis=None,
1581 out=None,
1582 overwrite_input=False,
1583 method="linear",
1584 keepdims=np._NoValue,
1585 weights=None,
1586 weak_q=False,
1587):
1588 """Assumes that q is in [0, 1], and is an ndarray"""
1589 # apply_along_axis in _nanpercentile doesn't handle empty arrays well,
1590 # so deal them upfront
1591 if a.size == 0:
1592 return np.nanmean(a, axis, out=out, keepdims=keepdims)
1593 return fnb._ureduce(a,
1594 func=_nanquantile_ureduce_func,
1595 q=q,
1596 weights=weights,
1597 keepdims=keepdims,
1598 axis=axis,
1599 out=out,
1600 overwrite_input=overwrite_input,
1601 method=method,
1602 weak_q=weak_q)
1603
1604
1605def _nanquantile_ureduce_func(
1606 a: np.array,
1607 q: np.array,
1608 weights: np.array,
1609 axis: int | None = None,
1610 out=None,
1611 overwrite_input: bool = False,
1612 method="linear",
1613 weak_q=False,
1614):
1615 """
1616 Private function that doesn't support extended axis or keepdims.
1617 These methods are extended to this function using _ureduce
1618 See nanpercentile for parameter usage
1619 """
1620 if axis is None or a.ndim == 1:
1621 part = a.ravel()
1622 wgt = None if weights is None else weights.ravel()
1623 result = _nanquantile_1d(part, q, overwrite_input, method,
1624 weights=wgt, weak_q=weak_q)
1625 # Note that this code could try to fill in `out` right away
1626 elif weights is None:
1627 result = np.apply_along_axis(_nanquantile_1d, axis, a, q,
1628 overwrite_input, method, weights, weak_q)
1629 # apply_along_axis fills in collapsed axis with results.
1630 # Move those axes to the beginning to match percentile's
1631 # convention.
1632 if q.ndim != 0:
1633 from_ax = [axis + i for i in range(q.ndim)]
1634 result = np.moveaxis(result, from_ax, list(range(q.ndim)))
1635 else:
1636 # We need to apply along axis over 2 arrays, a and weights.
1637 # move operation axes to end for simplicity:
1638 a = np.moveaxis(a, axis, -1)
1639 if weights is not None:
1640 weights = np.moveaxis(weights, axis, -1)
1641 if out is not None:
1642 result = out
1643 else:
1644 # weights are limited to `inverted_cdf` so the result dtype
1645 # is known to be identical to that of `a` here:
1646 result = np.empty_like(a, shape=q.shape + a.shape[:-1])
1647
1648 for ii in np.ndindex(a.shape[:-1]):
1649 result[(...,) + ii] = _nanquantile_1d(
1650 a[ii], q, weights=weights[ii],
1651 overwrite_input=overwrite_input, method=method,
1652 weak_q=weak_q,
1653 )
1654 # This path dealt with `out` already...
1655 return result
1656
1657 if out is not None:
1658 out[...] = result
1659 return result
1660
1661
1662def _nanquantile_1d(
1663 arr1d, q, overwrite_input=False, method="linear", weights=None,
1664 weak_q=False,
1665):
1666 """
1667 Private function for rank 1 arrays. Compute quantile ignoring NaNs.
1668 See nanpercentile for parameter usage
1669 """
1670 # TODO: What to do when arr1d = [1, np.nan] and weights = [0, 1]?
1671 arr1d, weights, overwrite_input = _remove_nan_1d(arr1d,
1672 second_arr1d=weights, overwrite_input=overwrite_input)
1673 if arr1d.size == 0:
1674 # convert to scalar
1675 return np.full(q.shape, np.nan, dtype=arr1d.dtype)[()]
1676
1677 return fnb._quantile_unchecked(
1678 arr1d,
1679 q,
1680 overwrite_input=overwrite_input,
1681 method=method,
1682 weights=weights,
1683 weak_q=weak_q,
1684 )
1685
1686
1687def _nanvar_dispatcher(a, axis=None, dtype=None, out=None, ddof=None,
1688 keepdims=None, *, where=None, mean=None,
1689 correction=None):
1690 return (a, out)
1691
1692
1693@array_function_dispatch(_nanvar_dispatcher)
1694def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue,
1695 *, where=np._NoValue, mean=np._NoValue, correction=np._NoValue):
1696 """
1697 Compute the variance along the specified axis, while ignoring NaNs.
1698
1699 Returns the variance of the array elements, a measure of the spread of
1700 a distribution. The variance is computed for the flattened array by
1701 default, otherwise over the specified axis.
1702
1703 For all-NaN slices or slices with zero degrees of freedom, NaN is
1704 returned and a `RuntimeWarning` is raised.
1705
1706 Parameters
1707 ----------
1708 a : array_like
1709 Array containing numbers whose variance is desired. If `a` is not an
1710 array, a conversion is attempted.
1711 axis : {int, tuple of int, None}, optional
1712 Axis or axes along which the variance is computed. The default is to compute
1713 the variance of the flattened array.
1714 dtype : data-type, optional
1715 Type to use in computing the variance. For arrays of integer type
1716 the default is `float64`; for arrays of float types it is the same as
1717 the array type.
1718 out : ndarray, optional
1719 Alternate output array in which to place the result. It must have
1720 the same shape as the expected output, but the type is cast if
1721 necessary.
1722 ddof : {int, float}, optional
1723 "Delta Degrees of Freedom": the divisor used in the calculation is
1724 ``N - ddof``, where ``N`` represents the number of non-NaN
1725 elements. By default `ddof` is zero.
1726 keepdims : bool, optional
1727 If this is set to True, the axes which are reduced are left
1728 in the result as dimensions with size one. With this option,
1729 the result will broadcast correctly against the original `a`.
1730 where : array_like of bool, optional
1731 Elements to include in the variance. See `~numpy.ufunc.reduce` for
1732 details.
1733
1734 .. versionadded:: 1.22.0
1735
1736 mean : array_like, optional
1737 Provide the mean to prevent its recalculation. The mean should have
1738 a shape as if it was calculated with ``keepdims=True``.
1739 The axis for the calculation of the mean should be the same as used in
1740 the call to this var function.
1741
1742 .. versionadded:: 2.0.0
1743
1744 correction : {int, float}, optional
1745 Array API compatible name for the ``ddof`` parameter. Only one of them
1746 can be provided at the same time.
1747
1748 .. versionadded:: 2.0.0
1749
1750 Returns
1751 -------
1752 variance : ndarray, see dtype parameter above
1753 If `out` is None, return a new array containing the variance,
1754 otherwise return a reference to the output array. If ddof is >= the
1755 number of non-NaN elements in a slice or the slice contains only
1756 NaNs, then the result for that slice is NaN.
1757
1758 See Also
1759 --------
1760 std : Standard deviation
1761 mean : Average
1762 var : Variance while not ignoring NaNs
1763 nanstd, nanmean
1764 :ref:`ufuncs-output-type`
1765
1766 Notes
1767 -----
1768 The variance is the average of the squared deviations from the mean,
1769 i.e., ``var = mean(abs(x - x.mean())**2)``.
1770
1771 The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``.
1772 If, however, `ddof` is specified, the divisor ``N - ddof`` is used
1773 instead. In standard statistical practice, ``ddof=1`` provides an
1774 unbiased estimator of the variance of a hypothetical infinite
1775 population. ``ddof=0`` provides a maximum likelihood estimate of the
1776 variance for normally distributed variables.
1777
1778 Note that for complex numbers, the absolute value is taken before
1779 squaring, so that the result is always real and nonnegative.
1780
1781 For floating-point input, the variance is computed using the same
1782 precision the input has. Depending on the input data, this can cause
1783 the results to be inaccurate, especially for `float32` (see example
1784 below). Specifying a higher-accuracy accumulator using the ``dtype``
1785 keyword can alleviate this issue.
1786
1787 For this function to work on sub-classes of ndarray, they must define
1788 `sum` with the kwarg `keepdims`
1789
1790 Examples
1791 --------
1792 >>> import numpy as np
1793 >>> a = np.array([[1, np.nan], [3, 4]])
1794 >>> np.nanvar(a)
1795 1.5555555555555554
1796 >>> np.nanvar(a, axis=0)
1797 array([1., 0.])
1798 >>> np.nanvar(a, axis=1)
1799 array([0., 0.25]) # may vary
1800
1801 """
1802 arr, mask = _replace_nan(a, 0)
1803 if mask is None:
1804 return np.var(arr, axis=axis, dtype=dtype, out=out, ddof=ddof,
1805 keepdims=keepdims, where=where, mean=mean,
1806 correction=correction)
1807
1808 if dtype is not None:
1809 dtype = np.dtype(dtype)
1810 if dtype is not None and not issubclass(dtype.type, np.inexact):
1811 raise TypeError("If a is inexact, then dtype must be inexact")
1812 if out is not None and not issubclass(out.dtype.type, np.inexact):
1813 raise TypeError("If a is inexact, then out must be inexact")
1814
1815 if correction != np._NoValue:
1816 if ddof != 0:
1817 raise ValueError(
1818 "ddof and correction can't be provided simultaneously."
1819 )
1820 else:
1821 ddof = correction
1822
1823 # Compute mean
1824 if type(arr) is np.matrix:
1825 _keepdims = np._NoValue
1826 else:
1827 _keepdims = True
1828
1829 cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=_keepdims,
1830 where=where)
1831
1832 if mean is not np._NoValue:
1833 avg = mean
1834 else:
1835 # we need to special case matrix for reverse compatibility
1836 # in order for this to work, these sums need to be called with
1837 # keepdims=True, however matrix now raises an error in this case, but
1838 # the reason that it drops the keepdims kwarg is to force keepdims=True
1839 # so this used to work by serendipity.
1840 avg = np.sum(arr, axis=axis, dtype=dtype,
1841 keepdims=_keepdims, where=where)
1842 avg = _divide_by_count(avg, cnt)
1843
1844 # Compute squared deviation from mean.
1845 np.subtract(arr, avg, out=arr, casting='unsafe', where=where)
1846 arr = _copyto(arr, 0, mask)
1847 if issubclass(arr.dtype.type, np.complexfloating):
1848 sqr = np.multiply(arr, arr.conj(), out=arr, where=where).real
1849 else:
1850 sqr = np.multiply(arr, arr, out=arr, where=where)
1851
1852 # Compute variance.
1853 var = np.sum(sqr, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
1854 where=where)
1855
1856 # Precaution against reduced object arrays
1857 try:
1858 var_ndim = var.ndim
1859 except AttributeError:
1860 var_ndim = np.ndim(var)
1861 if var_ndim < cnt.ndim:
1862 # Subclasses of ndarray may ignore keepdims, so check here.
1863 cnt = cnt.squeeze(axis)
1864 dof = cnt - ddof
1865 var = _divide_by_count(var, dof)
1866
1867 isbad = (dof <= 0)
1868 if np.any(isbad):
1869 warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning,
1870 stacklevel=2)
1871 # NaN, inf, or negative numbers are all possible bad
1872 # values, so explicitly replace them with NaN.
1873 var = _copyto(var, np.nan, isbad)
1874 return var
1875
1876
1877def _nanstd_dispatcher(a, axis=None, dtype=None, out=None, ddof=None,
1878 keepdims=None, *, where=None, mean=None,
1879 correction=None):
1880 return (a, out)
1881
1882
1883@array_function_dispatch(_nanstd_dispatcher)
1884def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue,
1885 *, where=np._NoValue, mean=np._NoValue, correction=np._NoValue):
1886 """
1887 Compute the standard deviation along the specified axis, while
1888 ignoring NaNs.
1889
1890 Returns the standard deviation, a measure of the spread of a
1891 distribution, of the non-NaN array elements. The standard deviation is
1892 computed for the flattened array by default, otherwise over the
1893 specified axis.
1894
1895 For all-NaN slices or slices with zero degrees of freedom, NaN is
1896 returned and a `RuntimeWarning` is raised.
1897
1898 Parameters
1899 ----------
1900 a : array_like
1901 Calculate the standard deviation of the non-NaN values.
1902 axis : {int, tuple of int, None}, optional
1903 Axis or axes along which the standard deviation is computed. The default is
1904 to compute the standard deviation of the flattened array.
1905 dtype : dtype, optional
1906 Type to use in computing the standard deviation. For arrays of
1907 integer type the default is float64, for arrays of float types it
1908 is the same as the array type.
1909 out : ndarray, optional
1910 Alternative output array in which to place the result. It must have
1911 the same shape as the expected output but the type (of the
1912 calculated values) will be cast if necessary.
1913 ddof : {int, float}, optional
1914 Means Delta Degrees of Freedom. The divisor used in calculations
1915 is ``N - ddof``, where ``N`` represents the number of non-NaN
1916 elements. By default `ddof` is zero.
1917
1918 keepdims : bool, optional
1919 If this is set to True, the axes which are reduced are left
1920 in the result as dimensions with size one. With this option,
1921 the result will broadcast correctly against the original `a`.
1922
1923 If this value is anything but the default it is passed through
1924 as-is to the relevant functions of the sub-classes. If these
1925 functions do not have a `keepdims` kwarg, a RuntimeError will
1926 be raised.
1927 where : array_like of bool, optional
1928 Elements to include in the standard deviation.
1929 See `~numpy.ufunc.reduce` for details.
1930
1931 .. versionadded:: 1.22.0
1932
1933 mean : array_like, optional
1934 Provide the mean to prevent its recalculation. The mean should have
1935 a shape as if it was calculated with ``keepdims=True``.
1936 The axis for the calculation of the mean should be the same as used in
1937 the call to this std function.
1938
1939 .. versionadded:: 2.0.0
1940
1941 correction : {int, float}, optional
1942 Array API compatible name for the ``ddof`` parameter. Only one of them
1943 can be provided at the same time.
1944
1945 .. versionadded:: 2.0.0
1946
1947 Returns
1948 -------
1949 standard_deviation : ndarray, see dtype parameter above.
1950 If `out` is None, return a new array containing the standard
1951 deviation, otherwise return a reference to the output array. If
1952 ddof is >= the number of non-NaN elements in a slice or the slice
1953 contains only NaNs, then the result for that slice is NaN.
1954
1955 See Also
1956 --------
1957 var, mean, std
1958 nanvar, nanmean
1959 :ref:`ufuncs-output-type`
1960
1961 Notes
1962 -----
1963 The standard deviation is the square root of the average of the squared
1964 deviations from the mean: ``std = sqrt(mean(abs(x - x.mean())**2))``.
1965
1966 The average squared deviation is normally calculated as
1967 ``x.sum() / N``, where ``N = len(x)``. If, however, `ddof` is
1968 specified, the divisor ``N - ddof`` is used instead. In standard
1969 statistical practice, ``ddof=1`` provides an unbiased estimator of the
1970 variance of the infinite population. ``ddof=0`` provides a maximum
1971 likelihood estimate of the variance for normally distributed variables.
1972 The standard deviation computed in this function is the square root of
1973 the estimated variance, so even with ``ddof=1``, it will not be an
1974 unbiased estimate of the standard deviation per se.
1975
1976 Note that, for complex numbers, `std` takes the absolute value before
1977 squaring, so that the result is always real and nonnegative.
1978
1979 For floating-point input, the *std* is computed using the same
1980 precision the input has. Depending on the input data, this can cause
1981 the results to be inaccurate, especially for float32 (see example
1982 below). Specifying a higher-accuracy accumulator using the `dtype`
1983 keyword can alleviate this issue.
1984
1985 Examples
1986 --------
1987 >>> import numpy as np
1988 >>> a = np.array([[1, np.nan], [3, 4]])
1989 >>> np.nanstd(a)
1990 1.247219128924647
1991 >>> np.nanstd(a, axis=0)
1992 array([1., 0.])
1993 >>> np.nanstd(a, axis=1)
1994 array([0., 0.5]) # may vary
1995
1996 """
1997 var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
1998 keepdims=keepdims, where=where, mean=mean,
1999 correction=correction)
2000 if isinstance(var, np.ndarray):
2001 std = np.sqrt(var, out=var)
2002 elif hasattr(var, 'dtype'):
2003 std = var.dtype.type(np.sqrt(var))
2004 else:
2005 std = np.sqrt(var)
2006 return std