1"""
2Functions that ignore NaN.
3
4Functions
5---------
6
7- `nanmin` -- minimum non-NaN value
8- `nanmax` -- maximum non-NaN value
9- `nanargmin` -- index of minimum non-NaN value
10- `nanargmax` -- index of maximum non-NaN value
11- `nansum` -- sum of non-NaN values
12- `nanprod` -- product of non-NaN values
13- `nancumsum` -- cumulative sum of non-NaN values
14- `nancumprod` -- cumulative product of non-NaN values
15- `nanmean` -- mean of non-NaN values
16- `nanvar` -- variance of non-NaN values
17- `nanstd` -- standard deviation of non-NaN values
18- `nanmedian` -- median of non-NaN values
19- `nanquantile` -- qth quantile of non-NaN values
20- `nanpercentile` -- qth percentile of non-NaN values
21
22"""
23import functools
24import warnings
25import numpy as np
26import numpy._core.numeric as _nx
27from numpy.lib import _function_base_impl as fnb
28from numpy.lib._function_base_impl import _weights_are_valid
29from numpy._core import overrides
30
31
32array_function_dispatch = functools.partial(
33 overrides.array_function_dispatch, module='numpy')
34
35
36__all__ = [
37 'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean',
38 'nanmedian', 'nanpercentile', 'nanvar', 'nanstd', 'nanprod',
39 'nancumsum', 'nancumprod', 'nanquantile'
40 ]
41
42
43def _nan_mask(a, out=None):
44 """
45 Parameters
46 ----------
47 a : array-like
48 Input array with at least 1 dimension.
49 out : ndarray, optional
50 Alternate output array in which to place the result. The default
51 is ``None``; if provided, it must have the same shape as the
52 expected output and will prevent the allocation of a new array.
53
54 Returns
55 -------
56 y : bool ndarray or True
57 A bool array where ``np.nan`` positions are marked with ``False``
58 and other positions are marked with ``True``. If the type of ``a``
59 is such that it can't possibly contain ``np.nan``, returns ``True``.
60 """
61 # we assume that a is an array for this private function
62
63 if a.dtype.kind not in 'fc':
64 return True
65
66 y = np.isnan(a, out=out)
67 y = np.invert(y, out=y)
68 return y
69
70def _replace_nan(a, val):
71 """
72 If `a` is of inexact type, make a copy of `a`, replace NaNs with
73 the `val` value, and return the copy together with a boolean mask
74 marking the locations where NaNs were present. If `a` is not of
75 inexact type, do nothing and return `a` together with a mask of None.
76
77 Note that scalars will end up as array scalars, which is important
78 for using the result as the value of the out argument in some
79 operations.
80
81 Parameters
82 ----------
83 a : array-like
84 Input array.
85 val : float
86 NaN values are set to val before doing the operation.
87
88 Returns
89 -------
90 y : ndarray
91 If `a` is of inexact type, return a copy of `a` with the NaNs
92 replaced by the fill value, otherwise return `a`.
93 mask: {bool, None}
94 If `a` is of inexact type, return a boolean mask marking locations of
95 NaNs, otherwise return None.
96
97 """
98 a = np.asanyarray(a)
99
100 if a.dtype == np.object_:
101 # object arrays do not support `isnan` (gh-9009), so make a guess
102 mask = np.not_equal(a, a, dtype=bool)
103 elif issubclass(a.dtype.type, np.inexact):
104 mask = np.isnan(a)
105 else:
106 mask = None
107
108 if mask is not None:
109 a = np.array(a, subok=True, copy=True)
110 np.copyto(a, val, where=mask)
111
112 return a, mask
113
114
115def _copyto(a, val, mask):
116 """
117 Replace values in `a` with NaN where `mask` is True. This differs from
118 copyto in that it will deal with the case where `a` is a numpy scalar.
119
120 Parameters
121 ----------
122 a : ndarray or numpy scalar
123 Array or numpy scalar some of whose values are to be replaced
124 by val.
125 val : numpy scalar
126 Value used a replacement.
127 mask : ndarray, scalar
128 Boolean array. Where True the corresponding element of `a` is
129 replaced by `val`. Broadcasts.
130
131 Returns
132 -------
133 res : ndarray, scalar
134 Array with elements replaced or scalar `val`.
135
136 """
137 if isinstance(a, np.ndarray):
138 np.copyto(a, val, where=mask, casting='unsafe')
139 else:
140 a = a.dtype.type(val)
141 return a
142
143
144def _remove_nan_1d(arr1d, second_arr1d=None, overwrite_input=False):
145 """
146 Equivalent to arr1d[~arr1d.isnan()], but in a different order
147
148 Presumably faster as it incurs fewer copies
149
150 Parameters
151 ----------
152 arr1d : ndarray
153 Array to remove nans from
154 second_arr1d : ndarray or None
155 A second array which will have the same positions removed as arr1d.
156 overwrite_input : bool
157 True if `arr1d` can be modified in place
158
159 Returns
160 -------
161 res : ndarray
162 Array with nan elements removed
163 second_res : ndarray or None
164 Second array with nan element positions of first array removed.
165 overwrite_input : bool
166 True if `res` can be modified in place, given the constraint on the
167 input
168 """
169 if arr1d.dtype == object:
170 # object arrays do not support `isnan` (gh-9009), so make a guess
171 c = np.not_equal(arr1d, arr1d, dtype=bool)
172 else:
173 c = np.isnan(arr1d)
174
175 s = np.nonzero(c)[0]
176 if s.size == arr1d.size:
177 warnings.warn("All-NaN slice encountered", RuntimeWarning,
178 stacklevel=6)
179 if second_arr1d is None:
180 return arr1d[:0], None, True
181 else:
182 return arr1d[:0], second_arr1d[:0], True
183 elif s.size == 0:
184 return arr1d, second_arr1d, overwrite_input
185 else:
186 if not overwrite_input:
187 arr1d = arr1d.copy()
188 # select non-nans at end of array
189 enonan = arr1d[-s.size:][~c[-s.size:]]
190 # fill nans in beginning of array with non-nans of end
191 arr1d[s[:enonan.size]] = enonan
192
193 if second_arr1d is None:
194 return arr1d[:-s.size], None, True
195 else:
196 if not overwrite_input:
197 second_arr1d = second_arr1d.copy()
198 enonan = second_arr1d[-s.size:][~c[-s.size:]]
199 second_arr1d[s[:enonan.size]] = enonan
200
201 return arr1d[:-s.size], second_arr1d[:-s.size], True
202
203
204def _divide_by_count(a, b, out=None):
205 """
206 Compute a/b ignoring invalid results. If `a` is an array the division
207 is done in place. If `a` is a scalar, then its type is preserved in the
208 output. If out is None, then a is used instead so that the division
209 is in place. Note that this is only called with `a` an inexact type.
210
211 Parameters
212 ----------
213 a : {ndarray, numpy scalar}
214 Numerator. Expected to be of inexact type but not checked.
215 b : {ndarray, numpy scalar}
216 Denominator.
217 out : ndarray, optional
218 Alternate output array in which to place the result. The default
219 is ``None``; if provided, it must have the same shape as the
220 expected output, but the type will be cast if necessary.
221
222 Returns
223 -------
224 ret : {ndarray, numpy scalar}
225 The return value is a/b. If `a` was an ndarray the division is done
226 in place. If `a` is a numpy scalar, the division preserves its type.
227
228 """
229 with np.errstate(invalid='ignore', divide='ignore'):
230 if isinstance(a, np.ndarray):
231 if out is None:
232 return np.divide(a, b, out=a, casting='unsafe')
233 else:
234 return np.divide(a, b, out=out, casting='unsafe')
235 else:
236 if out is None:
237 # Precaution against reduced object arrays
238 try:
239 return a.dtype.type(a / b)
240 except AttributeError:
241 return a / b
242 else:
243 # This is questionable, but currently a numpy scalar can
244 # be output to a zero dimensional array.
245 return np.divide(a, b, out=out, casting='unsafe')
246
247
248def _nanmin_dispatcher(a, axis=None, out=None, keepdims=None,
249 initial=None, where=None):
250 return (a, out)
251
252
253@array_function_dispatch(_nanmin_dispatcher)
254def nanmin(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue,
255 where=np._NoValue):
256 """
257 Return minimum of an array or minimum along an axis, ignoring any NaNs.
258 When all-NaN slices are encountered a ``RuntimeWarning`` is raised and
259 Nan is returned for that slice.
260
261 Parameters
262 ----------
263 a : array_like
264 Array containing numbers whose minimum is desired. If `a` is not an
265 array, a conversion is attempted.
266 axis : {int, tuple of int, None}, optional
267 Axis or axes along which the minimum is computed. The default is to compute
268 the minimum of the flattened array.
269 out : ndarray, optional
270 Alternate output array in which to place the result. The default
271 is ``None``; if provided, it must have the same shape as the
272 expected output, but the type will be cast if necessary. See
273 :ref:`ufuncs-output-type` for more details.
274 keepdims : bool, optional
275 If this is set to True, the axes which are reduced are left
276 in the result as dimensions with size one. With this option,
277 the result will broadcast correctly against the original `a`.
278
279 If the value is anything but the default, then
280 `keepdims` will be passed through to the `min` method
281 of sub-classes of `ndarray`. If the sub-classes methods
282 does not implement `keepdims` any exceptions will be raised.
283 initial : scalar, optional
284 The maximum value of an output element. Must be present to allow
285 computation on empty slice. See `~numpy.ufunc.reduce` for details.
286
287 .. versionadded:: 1.22.0
288 where : array_like of bool, optional
289 Elements to compare for the minimum. See `~numpy.ufunc.reduce`
290 for details.
291
292 .. versionadded:: 1.22.0
293
294 Returns
295 -------
296 nanmin : ndarray
297 An array with the same shape as `a`, with the specified axis
298 removed. If `a` is a 0-d array, or if axis is None, an ndarray
299 scalar is returned. The same dtype as `a` is returned.
300
301 See Also
302 --------
303 nanmax :
304 The maximum value of an array along a given axis, ignoring any NaNs.
305 amin :
306 The minimum value of an array along a given axis, propagating any NaNs.
307 fmin :
308 Element-wise minimum of two arrays, ignoring any NaNs.
309 minimum :
310 Element-wise minimum of two arrays, propagating any NaNs.
311 isnan :
312 Shows which elements are Not a Number (NaN).
313 isfinite:
314 Shows which elements are neither NaN nor infinity.
315
316 amax, fmax, maximum
317
318 Notes
319 -----
320 NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic
321 (IEEE 754). This means that Not a Number is not equivalent to infinity.
322 Positive infinity is treated as a very large number and negative
323 infinity is treated as a very small (i.e. negative) number.
324
325 If the input has a integer type the function is equivalent to np.min.
326
327 Examples
328 --------
329 >>> import numpy as np
330 >>> a = np.array([[1, 2], [3, np.nan]])
331 >>> np.nanmin(a)
332 1.0
333 >>> np.nanmin(a, axis=0)
334 array([1., 2.])
335 >>> np.nanmin(a, axis=1)
336 array([1., 3.])
337
338 When positive infinity and negative infinity are present:
339
340 >>> np.nanmin([1, 2, np.nan, np.inf])
341 1.0
342 >>> np.nanmin([1, 2, np.nan, -np.inf])
343 -inf
344
345 """
346 kwargs = {}
347 if keepdims is not np._NoValue:
348 kwargs['keepdims'] = keepdims
349 if initial is not np._NoValue:
350 kwargs['initial'] = initial
351 if where is not np._NoValue:
352 kwargs['where'] = where
353
354 if type(a) is np.ndarray and a.dtype != np.object_:
355 # Fast, but not safe for subclasses of ndarray, or object arrays,
356 # which do not implement isnan (gh-9009), or fmin correctly (gh-8975)
357 res = np.fmin.reduce(a, axis=axis, out=out, **kwargs)
358 if np.isnan(res).any():
359 warnings.warn("All-NaN slice encountered", RuntimeWarning,
360 stacklevel=2)
361 else:
362 # Slow, but safe for subclasses of ndarray
363 a, mask = _replace_nan(a, +np.inf)
364 res = np.amin(a, axis=axis, out=out, **kwargs)
365 if mask is None:
366 return res
367
368 # Check for all-NaN axis
369 kwargs.pop("initial", None)
370 mask = np.all(mask, axis=axis, **kwargs)
371 if np.any(mask):
372 res = _copyto(res, np.nan, mask)
373 warnings.warn("All-NaN axis encountered", RuntimeWarning,
374 stacklevel=2)
375 return res
376
377
378def _nanmax_dispatcher(a, axis=None, out=None, keepdims=None,
379 initial=None, where=None):
380 return (a, out)
381
382
383@array_function_dispatch(_nanmax_dispatcher)
384def nanmax(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue,
385 where=np._NoValue):
386 """
387 Return the maximum of an array or maximum along an axis, ignoring any
388 NaNs. When all-NaN slices are encountered a ``RuntimeWarning`` is
389 raised and NaN is returned for that slice.
390
391 Parameters
392 ----------
393 a : array_like
394 Array containing numbers whose maximum is desired. If `a` is not an
395 array, a conversion is attempted.
396 axis : {int, tuple of int, None}, optional
397 Axis or axes along which the maximum is computed. The default is to compute
398 the maximum of the flattened array.
399 out : ndarray, optional
400 Alternate output array in which to place the result. The default
401 is ``None``; if provided, it must have the same shape as the
402 expected output, but the type will be cast if necessary. See
403 :ref:`ufuncs-output-type` for more details.
404 keepdims : bool, optional
405 If this is set to True, the axes which are reduced are left
406 in the result as dimensions with size one. With this option,
407 the result will broadcast correctly against the original `a`.
408 If the value is anything but the default, then
409 `keepdims` will be passed through to the `max` method
410 of sub-classes of `ndarray`. If the sub-classes methods
411 does not implement `keepdims` any exceptions will be raised.
412 initial : scalar, optional
413 The minimum value of an output element. Must be present to allow
414 computation on empty slice. See `~numpy.ufunc.reduce` for details.
415
416 .. versionadded:: 1.22.0
417 where : array_like of bool, optional
418 Elements to compare for the maximum. See `~numpy.ufunc.reduce`
419 for details.
420
421 .. versionadded:: 1.22.0
422
423 Returns
424 -------
425 nanmax : ndarray
426 An array with the same shape as `a`, with the specified axis removed.
427 If `a` is a 0-d array, or if axis is None, an ndarray scalar is
428 returned. The same dtype as `a` is returned.
429
430 See Also
431 --------
432 nanmin :
433 The minimum value of an array along a given axis, ignoring any NaNs.
434 amax :
435 The maximum value of an array along a given axis, propagating any NaNs.
436 fmax :
437 Element-wise maximum of two arrays, ignoring any NaNs.
438 maximum :
439 Element-wise maximum of two arrays, propagating any NaNs.
440 isnan :
441 Shows which elements are Not a Number (NaN).
442 isfinite:
443 Shows which elements are neither NaN nor infinity.
444
445 amin, fmin, minimum
446
447 Notes
448 -----
449 NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic
450 (IEEE 754). This means that Not a Number is not equivalent to infinity.
451 Positive infinity is treated as a very large number and negative
452 infinity is treated as a very small (i.e. negative) number.
453
454 If the input has a integer type the function is equivalent to np.max.
455
456 Examples
457 --------
458 >>> import numpy as np
459 >>> a = np.array([[1, 2], [3, np.nan]])
460 >>> np.nanmax(a)
461 3.0
462 >>> np.nanmax(a, axis=0)
463 array([3., 2.])
464 >>> np.nanmax(a, axis=1)
465 array([2., 3.])
466
467 When positive infinity and negative infinity are present:
468
469 >>> np.nanmax([1, 2, np.nan, -np.inf])
470 2.0
471 >>> np.nanmax([1, 2, np.nan, np.inf])
472 inf
473
474 """
475 kwargs = {}
476 if keepdims is not np._NoValue:
477 kwargs['keepdims'] = keepdims
478 if initial is not np._NoValue:
479 kwargs['initial'] = initial
480 if where is not np._NoValue:
481 kwargs['where'] = where
482
483 if type(a) is np.ndarray and a.dtype != np.object_:
484 # Fast, but not safe for subclasses of ndarray, or object arrays,
485 # which do not implement isnan (gh-9009), or fmax correctly (gh-8975)
486 res = np.fmax.reduce(a, axis=axis, out=out, **kwargs)
487 if np.isnan(res).any():
488 warnings.warn("All-NaN slice encountered", RuntimeWarning,
489 stacklevel=2)
490 else:
491 # Slow, but safe for subclasses of ndarray
492 a, mask = _replace_nan(a, -np.inf)
493 res = np.amax(a, axis=axis, out=out, **kwargs)
494 if mask is None:
495 return res
496
497 # Check for all-NaN axis
498 kwargs.pop("initial", None)
499 mask = np.all(mask, axis=axis, **kwargs)
500 if np.any(mask):
501 res = _copyto(res, np.nan, mask)
502 warnings.warn("All-NaN axis encountered", RuntimeWarning,
503 stacklevel=2)
504 return res
505
506
507def _nanargmin_dispatcher(a, axis=None, out=None, *, keepdims=None):
508 return (a,)
509
510
511@array_function_dispatch(_nanargmin_dispatcher)
512def nanargmin(a, axis=None, out=None, *, keepdims=np._NoValue):
513 """
514 Return the indices of the minimum values in the specified axis ignoring
515 NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the results
516 cannot be trusted if a slice contains only NaNs and Infs.
517
518 Parameters
519 ----------
520 a : array_like
521 Input data.
522 axis : int, optional
523 Axis along which to operate. By default flattened input is used.
524 out : array, optional
525 If provided, the result will be inserted into this array. It should
526 be of the appropriate shape and dtype.
527
528 .. versionadded:: 1.22.0
529 keepdims : bool, optional
530 If this is set to True, the axes which are reduced are left
531 in the result as dimensions with size one. With this option,
532 the result will broadcast correctly against the array.
533
534 .. versionadded:: 1.22.0
535
536 Returns
537 -------
538 index_array : ndarray
539 An array of indices or a single index value.
540
541 See Also
542 --------
543 argmin, nanargmax
544
545 Examples
546 --------
547 >>> import numpy as np
548 >>> a = np.array([[np.nan, 4], [2, 3]])
549 >>> np.argmin(a)
550 0
551 >>> np.nanargmin(a)
552 2
553 >>> np.nanargmin(a, axis=0)
554 array([1, 1])
555 >>> np.nanargmin(a, axis=1)
556 array([1, 0])
557
558 """
559 a, mask = _replace_nan(a, np.inf)
560 if mask is not None and mask.size:
561 mask = np.all(mask, axis=axis)
562 if np.any(mask):
563 raise ValueError("All-NaN slice encountered")
564 res = np.argmin(a, axis=axis, out=out, keepdims=keepdims)
565 return res
566
567
568def _nanargmax_dispatcher(a, axis=None, out=None, *, keepdims=None):
569 return (a,)
570
571
572@array_function_dispatch(_nanargmax_dispatcher)
573def nanargmax(a, axis=None, out=None, *, keepdims=np._NoValue):
574 """
575 Return the indices of the maximum values in the specified axis ignoring
576 NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the
577 results cannot be trusted if a slice contains only NaNs and -Infs.
578
579
580 Parameters
581 ----------
582 a : array_like
583 Input data.
584 axis : int, optional
585 Axis along which to operate. By default flattened input is used.
586 out : array, optional
587 If provided, the result will be inserted into this array. It should
588 be of the appropriate shape and dtype.
589
590 .. versionadded:: 1.22.0
591 keepdims : bool, optional
592 If this is set to True, the axes which are reduced are left
593 in the result as dimensions with size one. With this option,
594 the result will broadcast correctly against the array.
595
596 .. versionadded:: 1.22.0
597
598 Returns
599 -------
600 index_array : ndarray
601 An array of indices or a single index value.
602
603 See Also
604 --------
605 argmax, nanargmin
606
607 Examples
608 --------
609 >>> import numpy as np
610 >>> a = np.array([[np.nan, 4], [2, 3]])
611 >>> np.argmax(a)
612 0
613 >>> np.nanargmax(a)
614 1
615 >>> np.nanargmax(a, axis=0)
616 array([1, 0])
617 >>> np.nanargmax(a, axis=1)
618 array([1, 1])
619
620 """
621 a, mask = _replace_nan(a, -np.inf)
622 if mask is not None and mask.size:
623 mask = np.all(mask, axis=axis)
624 if np.any(mask):
625 raise ValueError("All-NaN slice encountered")
626 res = np.argmax(a, axis=axis, out=out, keepdims=keepdims)
627 return res
628
629
630def _nansum_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None,
631 initial=None, where=None):
632 return (a, out)
633
634
635@array_function_dispatch(_nansum_dispatcher)
636def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
637 initial=np._NoValue, where=np._NoValue):
638 """
639 Return the sum of array elements over a given axis treating Not a
640 Numbers (NaNs) as zero.
641
642 In NumPy versions <= 1.9.0 Nan is returned for slices that are all-NaN or
643 empty. In later versions zero is returned.
644
645 Parameters
646 ----------
647 a : array_like
648 Array containing numbers whose sum is desired. If `a` is not an
649 array, a conversion is attempted.
650 axis : {int, tuple of int, None}, optional
651 Axis or axes along which the sum is computed. The default is to compute the
652 sum of the flattened array.
653 dtype : data-type, optional
654 The type of the returned array and of the accumulator in which the
655 elements are summed. By default, the dtype of `a` is used. An
656 exception is when `a` has an integer type with less precision than
657 the platform (u)intp. In that case, the default will be either
658 (u)int32 or (u)int64 depending on whether the platform is 32 or 64
659 bits. For inexact inputs, dtype must be inexact.
660 out : ndarray, optional
661 Alternate output array in which to place the result. The default
662 is ``None``. If provided, it must have the same shape as the
663 expected output, but the type will be cast if necessary. See
664 :ref:`ufuncs-output-type` for more details. The casting of NaN to integer
665 can yield unexpected results.
666 keepdims : bool, optional
667 If this is set to True, the axes which are reduced are left
668 in the result as dimensions with size one. With this option,
669 the result will broadcast correctly against the original `a`.
670
671 If the value is anything but the default, then
672 `keepdims` will be passed through to the `mean` or `sum` methods
673 of sub-classes of `ndarray`. If the sub-classes methods
674 does not implement `keepdims` any exceptions will be raised.
675 initial : scalar, optional
676 Starting value for the sum. See `~numpy.ufunc.reduce` for details.
677
678 .. versionadded:: 1.22.0
679 where : array_like of bool, optional
680 Elements to include in the sum. See `~numpy.ufunc.reduce` for details.
681
682 .. versionadded:: 1.22.0
683
684 Returns
685 -------
686 nansum : ndarray.
687 A new array holding the result is returned unless `out` is
688 specified, in which it is returned. The result has the same
689 size as `a`, and the same shape as `a` if `axis` is not None
690 or `a` is a 1-d array.
691
692 See Also
693 --------
694 numpy.sum : Sum across array propagating NaNs.
695 isnan : Show which elements are NaN.
696 isfinite : Show which elements are not NaN or +/-inf.
697
698 Notes
699 -----
700 If both positive and negative infinity are present, the sum will be Not
701 A Number (NaN).
702
703 Examples
704 --------
705 >>> import numpy as np
706 >>> np.nansum(1)
707 1
708 >>> np.nansum([1])
709 1
710 >>> np.nansum([1, np.nan])
711 1.0
712 >>> a = np.array([[1, 1], [1, np.nan]])
713 >>> np.nansum(a)
714 3.0
715 >>> np.nansum(a, axis=0)
716 array([2., 1.])
717 >>> np.nansum([1, np.nan, np.inf])
718 inf
719 >>> np.nansum([1, np.nan, -np.inf])
720 -inf
721 >>> from numpy.testing import suppress_warnings
722 >>> with np.errstate(invalid="ignore"):
723 ... np.nansum([1, np.nan, np.inf, -np.inf]) # both +/- infinity present
724 np.float64(nan)
725
726 """
727 a, mask = _replace_nan(a, 0)
728 return np.sum(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
729 initial=initial, where=where)
730
731
732def _nanprod_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None,
733 initial=None, where=None):
734 return (a, out)
735
736
737@array_function_dispatch(_nanprod_dispatcher)
738def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
739 initial=np._NoValue, where=np._NoValue):
740 """
741 Return the product of array elements over a given axis treating Not a
742 Numbers (NaNs) as ones.
743
744 One is returned for slices that are all-NaN or empty.
745
746 Parameters
747 ----------
748 a : array_like
749 Array containing numbers whose product is desired. If `a` is not an
750 array, a conversion is attempted.
751 axis : {int, tuple of int, None}, optional
752 Axis or axes along which the product is computed. The default is to compute
753 the product of the flattened array.
754 dtype : data-type, optional
755 The type of the returned array and of the accumulator in which the
756 elements are summed. By default, the dtype of `a` is used. An
757 exception is when `a` has an integer type with less precision than
758 the platform (u)intp. In that case, the default will be either
759 (u)int32 or (u)int64 depending on whether the platform is 32 or 64
760 bits. For inexact inputs, dtype must be inexact.
761 out : ndarray, optional
762 Alternate output array in which to place the result. The default
763 is ``None``. If provided, it must have the same shape as the
764 expected output, but the type will be cast if necessary. See
765 :ref:`ufuncs-output-type` for more details. The casting of NaN to integer
766 can yield unexpected results.
767 keepdims : bool, optional
768 If True, the axes which are reduced are left in the result as
769 dimensions with size one. With this option, the result will
770 broadcast correctly against the original `arr`.
771 initial : scalar, optional
772 The starting value for this product. See `~numpy.ufunc.reduce`
773 for details.
774
775 .. versionadded:: 1.22.0
776 where : array_like of bool, optional
777 Elements to include in the product. See `~numpy.ufunc.reduce`
778 for details.
779
780 .. versionadded:: 1.22.0
781
782 Returns
783 -------
784 nanprod : ndarray
785 A new array holding the result is returned unless `out` is
786 specified, in which case it is returned.
787
788 See Also
789 --------
790 numpy.prod : Product across array propagating NaNs.
791 isnan : Show which elements are NaN.
792
793 Examples
794 --------
795 >>> import numpy as np
796 >>> np.nanprod(1)
797 1
798 >>> np.nanprod([1])
799 1
800 >>> np.nanprod([1, np.nan])
801 1.0
802 >>> a = np.array([[1, 2], [3, np.nan]])
803 >>> np.nanprod(a)
804 6.0
805 >>> np.nanprod(a, axis=0)
806 array([3., 2.])
807
808 """
809 a, mask = _replace_nan(a, 1)
810 return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
811 initial=initial, where=where)
812
813
814def _nancumsum_dispatcher(a, axis=None, dtype=None, out=None):
815 return (a, out)
816
817
818@array_function_dispatch(_nancumsum_dispatcher)
819def nancumsum(a, axis=None, dtype=None, out=None):
820 """
821 Return the cumulative sum of array elements over a given axis treating Not a
822 Numbers (NaNs) as zero. The cumulative sum does not change when NaNs are
823 encountered and leading NaNs are replaced by zeros.
824
825 Zeros are returned for slices that are all-NaN or empty.
826
827 Parameters
828 ----------
829 a : array_like
830 Input array.
831 axis : int, optional
832 Axis along which the cumulative sum is computed. The default
833 (None) is to compute the cumsum over the flattened array.
834 dtype : dtype, optional
835 Type of the returned array and of the accumulator in which the
836 elements are summed. If `dtype` is not specified, it defaults
837 to the dtype of `a`, unless `a` has an integer dtype with a
838 precision less than that of the default platform integer. In
839 that case, the default platform integer is used.
840 out : ndarray, optional
841 Alternative output array in which to place the result. It must
842 have the same shape and buffer length as the expected output
843 but the type will be cast if necessary. See :ref:`ufuncs-output-type` for
844 more details.
845
846 Returns
847 -------
848 nancumsum : ndarray.
849 A new array holding the result is returned unless `out` is
850 specified, in which it is returned. The result has the same
851 size as `a`, and the same shape as `a` if `axis` is not None
852 or `a` is a 1-d array.
853
854 See Also
855 --------
856 numpy.cumsum : Cumulative sum across array propagating NaNs.
857 isnan : Show which elements are NaN.
858
859 Examples
860 --------
861 >>> import numpy as np
862 >>> np.nancumsum(1)
863 array([1])
864 >>> np.nancumsum([1])
865 array([1])
866 >>> np.nancumsum([1, np.nan])
867 array([1., 1.])
868 >>> a = np.array([[1, 2], [3, np.nan]])
869 >>> np.nancumsum(a)
870 array([1., 3., 6., 6.])
871 >>> np.nancumsum(a, axis=0)
872 array([[1., 2.],
873 [4., 2.]])
874 >>> np.nancumsum(a, axis=1)
875 array([[1., 3.],
876 [3., 3.]])
877
878 """
879 a, mask = _replace_nan(a, 0)
880 return np.cumsum(a, axis=axis, dtype=dtype, out=out)
881
882
883def _nancumprod_dispatcher(a, axis=None, dtype=None, out=None):
884 return (a, out)
885
886
887@array_function_dispatch(_nancumprod_dispatcher)
888def nancumprod(a, axis=None, dtype=None, out=None):
889 """
890 Return the cumulative product of array elements over a given axis treating Not a
891 Numbers (NaNs) as one. The cumulative product does not change when NaNs are
892 encountered and leading NaNs are replaced by ones.
893
894 Ones are returned for slices that are all-NaN or empty.
895
896 Parameters
897 ----------
898 a : array_like
899 Input array.
900 axis : int, optional
901 Axis along which the cumulative product is computed. By default
902 the input is flattened.
903 dtype : dtype, optional
904 Type of the returned array, as well as of the accumulator in which
905 the elements are multiplied. If *dtype* is not specified, it
906 defaults to the dtype of `a`, unless `a` has an integer dtype with
907 a precision less than that of the default platform integer. In
908 that case, the default platform integer is used instead.
909 out : ndarray, optional
910 Alternative output array in which to place the result. It must
911 have the same shape and buffer length as the expected output
912 but the type of the resulting values will be cast if necessary.
913
914 Returns
915 -------
916 nancumprod : ndarray
917 A new array holding the result is returned unless `out` is
918 specified, in which case it is returned.
919
920 See Also
921 --------
922 numpy.cumprod : Cumulative product across array propagating NaNs.
923 isnan : Show which elements are NaN.
924
925 Examples
926 --------
927 >>> import numpy as np
928 >>> np.nancumprod(1)
929 array([1])
930 >>> np.nancumprod([1])
931 array([1])
932 >>> np.nancumprod([1, np.nan])
933 array([1., 1.])
934 >>> a = np.array([[1, 2], [3, np.nan]])
935 >>> np.nancumprod(a)
936 array([1., 2., 6., 6.])
937 >>> np.nancumprod(a, axis=0)
938 array([[1., 2.],
939 [3., 2.]])
940 >>> np.nancumprod(a, axis=1)
941 array([[1., 2.],
942 [3., 3.]])
943
944 """
945 a, mask = _replace_nan(a, 1)
946 return np.cumprod(a, axis=axis, dtype=dtype, out=out)
947
948
949def _nanmean_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None,
950 *, where=None):
951 return (a, out)
952
953
954@array_function_dispatch(_nanmean_dispatcher)
955def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
956 *, where=np._NoValue):
957 """
958 Compute the arithmetic mean along the specified axis, ignoring NaNs.
959
960 Returns the average of the array elements. The average is taken over
961 the flattened array by default, otherwise over the specified axis.
962 `float64` intermediate and return values are used for integer inputs.
963
964 For all-NaN slices, NaN is returned and a `RuntimeWarning` is raised.
965
966 Parameters
967 ----------
968 a : array_like
969 Array containing numbers whose mean is desired. If `a` is not an
970 array, a conversion is attempted.
971 axis : {int, tuple of int, None}, optional
972 Axis or axes along which the means are computed. The default is to compute
973 the mean of the flattened array.
974 dtype : data-type, optional
975 Type to use in computing the mean. For integer inputs, the default
976 is `float64`; for inexact inputs, it is the same as the input
977 dtype.
978 out : ndarray, optional
979 Alternate output array in which to place the result. The default
980 is ``None``; if provided, it must have the same shape as the
981 expected output, but the type will be cast if necessary.
982 See :ref:`ufuncs-output-type` for more details.
983 keepdims : bool, optional
984 If this is set to True, the axes which are reduced are left
985 in the result as dimensions with size one. With this option,
986 the result will broadcast correctly against the original `a`.
987
988 If the value is anything but the default, then
989 `keepdims` will be passed through to the `mean` or `sum` methods
990 of sub-classes of `ndarray`. If the sub-classes methods
991 does not implement `keepdims` any exceptions will be raised.
992 where : array_like of bool, optional
993 Elements to include in the mean. See `~numpy.ufunc.reduce` for details.
994
995 .. versionadded:: 1.22.0
996
997 Returns
998 -------
999 m : ndarray, see dtype parameter above
1000 If `out=None`, returns a new array containing the mean values,
1001 otherwise a reference to the output array is returned. Nan is
1002 returned for slices that contain only NaNs.
1003
1004 See Also
1005 --------
1006 average : Weighted average
1007 mean : Arithmetic mean taken while not ignoring NaNs
1008 var, nanvar
1009
1010 Notes
1011 -----
1012 The arithmetic mean is the sum of the non-NaN elements along the axis
1013 divided by the number of non-NaN elements.
1014
1015 Note that for floating-point input, the mean is computed using the same
1016 precision the input has. Depending on the input data, this can cause
1017 the results to be inaccurate, especially for `float32`. Specifying a
1018 higher-precision accumulator using the `dtype` keyword can alleviate
1019 this issue.
1020
1021 Examples
1022 --------
1023 >>> import numpy as np
1024 >>> a = np.array([[1, np.nan], [3, 4]])
1025 >>> np.nanmean(a)
1026 2.6666666666666665
1027 >>> np.nanmean(a, axis=0)
1028 array([2., 4.])
1029 >>> np.nanmean(a, axis=1)
1030 array([1., 3.5]) # may vary
1031
1032 """
1033 arr, mask = _replace_nan(a, 0)
1034 if mask is None:
1035 return np.mean(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
1036 where=where)
1037
1038 if dtype is not None:
1039 dtype = np.dtype(dtype)
1040 if dtype is not None and not issubclass(dtype.type, np.inexact):
1041 raise TypeError("If a is inexact, then dtype must be inexact")
1042 if out is not None and not issubclass(out.dtype.type, np.inexact):
1043 raise TypeError("If a is inexact, then out must be inexact")
1044
1045 cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=keepdims,
1046 where=where)
1047 tot = np.sum(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
1048 where=where)
1049 avg = _divide_by_count(tot, cnt, out=out)
1050
1051 isbad = (cnt == 0)
1052 if isbad.any():
1053 warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=2)
1054 # NaN is the only possible bad value, so no further
1055 # action is needed to handle bad results.
1056 return avg
1057
1058
1059def _nanmedian1d(arr1d, overwrite_input=False):
1060 """
1061 Private function for rank 1 arrays. Compute the median ignoring NaNs.
1062 See nanmedian for parameter usage
1063 """
1064 arr1d_parsed, _, overwrite_input = _remove_nan_1d(
1065 arr1d, overwrite_input=overwrite_input,
1066 )
1067
1068 if arr1d_parsed.size == 0:
1069 # Ensure that a nan-esque scalar of the appropriate type (and unit)
1070 # is returned for `timedelta64` and `complexfloating`
1071 return arr1d[-1]
1072
1073 return np.median(arr1d_parsed, overwrite_input=overwrite_input)
1074
1075
1076def _nanmedian(a, axis=None, out=None, overwrite_input=False):
1077 """
1078 Private function that doesn't support extended axis or keepdims.
1079 These methods are extended to this function using _ureduce
1080 See nanmedian for parameter usage
1081
1082 """
1083 if axis is None or a.ndim == 1:
1084 part = a.ravel()
1085 if out is None:
1086 return _nanmedian1d(part, overwrite_input)
1087 else:
1088 out[...] = _nanmedian1d(part, overwrite_input)
1089 return out
1090 else:
1091 # for small medians use sort + indexing which is still faster than
1092 # apply_along_axis
1093 # benchmarked with shuffled (50, 50, x) containing a few NaN
1094 if a.shape[axis] < 600:
1095 return _nanmedian_small(a, axis, out, overwrite_input)
1096 result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input)
1097 if out is not None:
1098 out[...] = result
1099 return result
1100
1101
1102def _nanmedian_small(a, axis=None, out=None, overwrite_input=False):
1103 """
1104 sort + indexing median, faster for small medians along multiple
1105 dimensions due to the high overhead of apply_along_axis
1106
1107 see nanmedian for parameter usage
1108 """
1109 a = np.ma.masked_array(a, np.isnan(a))
1110 m = np.ma.median(a, axis=axis, overwrite_input=overwrite_input)
1111 for i in range(np.count_nonzero(m.mask.ravel())):
1112 warnings.warn("All-NaN slice encountered", RuntimeWarning,
1113 stacklevel=5)
1114
1115 fill_value = np.timedelta64("NaT") if m.dtype.kind == "m" else np.nan
1116 if out is not None:
1117 out[...] = m.filled(fill_value)
1118 return out
1119 return m.filled(fill_value)
1120
1121
1122def _nanmedian_dispatcher(
1123 a, axis=None, out=None, overwrite_input=None, keepdims=None):
1124 return (a, out)
1125
1126
1127@array_function_dispatch(_nanmedian_dispatcher)
1128def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValue):
1129 """
1130 Compute the median along the specified axis, while ignoring NaNs.
1131
1132 Returns the median of the array elements.
1133
1134 Parameters
1135 ----------
1136 a : array_like
1137 Input array or object that can be converted to an array.
1138 axis : {int, sequence of int, None}, optional
1139 Axis or axes along which the medians are computed. The default
1140 is to compute the median along a flattened version of the array.
1141 A sequence of axes is supported since version 1.9.0.
1142 out : ndarray, optional
1143 Alternative output array in which to place the result. It must
1144 have the same shape and buffer length as the expected output,
1145 but the type (of the output) will be cast if necessary.
1146 overwrite_input : bool, optional
1147 If True, then allow use of memory of input array `a` for
1148 calculations. The input array will be modified by the call to
1149 `median`. This will save memory when you do not need to preserve
1150 the contents of the input array. Treat the input as undefined,
1151 but it will probably be fully or partially sorted. Default is
1152 False. If `overwrite_input` is ``True`` and `a` is not already an
1153 `ndarray`, an error will be raised.
1154 keepdims : bool, optional
1155 If this is set to True, the axes which are reduced are left
1156 in the result as dimensions with size one. With this option,
1157 the result will broadcast correctly against the original `a`.
1158
1159 If this is anything but the default value it will be passed
1160 through (in the special case of an empty array) to the
1161 `mean` function of the underlying array. If the array is
1162 a sub-class and `mean` does not have the kwarg `keepdims` this
1163 will raise a RuntimeError.
1164
1165 Returns
1166 -------
1167 median : ndarray
1168 A new array holding the result. If the input contains integers
1169 or floats smaller than ``float64``, then the output data-type is
1170 ``np.float64``. Otherwise, the data-type of the output is the
1171 same as that of the input. If `out` is specified, that array is
1172 returned instead.
1173
1174 See Also
1175 --------
1176 mean, median, percentile
1177
1178 Notes
1179 -----
1180 Given a vector ``V`` of length ``N``, the median of ``V`` is the
1181 middle value of a sorted copy of ``V``, ``V_sorted`` - i.e.,
1182 ``V_sorted[(N-1)/2]``, when ``N`` is odd and the average of the two
1183 middle values of ``V_sorted`` when ``N`` is even.
1184
1185 Examples
1186 --------
1187 >>> import numpy as np
1188 >>> a = np.array([[10.0, 7, 4], [3, 2, 1]])
1189 >>> a[0, 1] = np.nan
1190 >>> a
1191 array([[10., nan, 4.],
1192 [ 3., 2., 1.]])
1193 >>> np.median(a)
1194 np.float64(nan)
1195 >>> np.nanmedian(a)
1196 3.0
1197 >>> np.nanmedian(a, axis=0)
1198 array([6.5, 2. , 2.5])
1199 >>> np.median(a, axis=1)
1200 array([nan, 2.])
1201 >>> b = a.copy()
1202 >>> np.nanmedian(b, axis=1, overwrite_input=True)
1203 array([7., 2.])
1204 >>> assert not np.all(a==b)
1205 >>> b = a.copy()
1206 >>> np.nanmedian(b, axis=None, overwrite_input=True)
1207 3.0
1208 >>> assert not np.all(a==b)
1209
1210 """
1211 a = np.asanyarray(a)
1212 # apply_along_axis in _nanmedian doesn't handle empty arrays well,
1213 # so deal them upfront
1214 if a.size == 0:
1215 return np.nanmean(a, axis, out=out, keepdims=keepdims)
1216
1217 return fnb._ureduce(a, func=_nanmedian, keepdims=keepdims,
1218 axis=axis, out=out,
1219 overwrite_input=overwrite_input)
1220
1221
1222def _nanpercentile_dispatcher(
1223 a, q, axis=None, out=None, overwrite_input=None,
1224 method=None, keepdims=None, *, weights=None, interpolation=None):
1225 return (a, q, out, weights)
1226
1227
1228@array_function_dispatch(_nanpercentile_dispatcher)
1229def nanpercentile(
1230 a,
1231 q,
1232 axis=None,
1233 out=None,
1234 overwrite_input=False,
1235 method="linear",
1236 keepdims=np._NoValue,
1237 *,
1238 weights=None,
1239 interpolation=None,
1240):
1241 """
1242 Compute the qth percentile of the data along the specified axis,
1243 while ignoring nan values.
1244
1245 Returns the qth percentile(s) of the array elements.
1246
1247 Parameters
1248 ----------
1249 a : array_like
1250 Input array or object that can be converted to an array, containing
1251 nan values to be ignored.
1252 q : array_like of float
1253 Percentile or sequence of percentiles to compute, which must be
1254 between 0 and 100 inclusive.
1255 axis : {int, tuple of int, None}, optional
1256 Axis or axes along which the percentiles are computed. The default
1257 is to compute the percentile(s) along a flattened version of the
1258 array.
1259 out : ndarray, optional
1260 Alternative output array in which to place the result. It must have
1261 the same shape and buffer length as the expected output, but the
1262 type (of the output) will be cast if necessary.
1263 overwrite_input : bool, optional
1264 If True, then allow the input array `a` to be modified by
1265 intermediate calculations, to save memory. In this case, the
1266 contents of the input `a` after this function completes is
1267 undefined.
1268 method : str, optional
1269 This parameter specifies the method to use for estimating the
1270 percentile. There are many different methods, some unique to NumPy.
1271 See the notes for explanation. The options sorted by their R type
1272 as summarized in the H&F paper [1]_ are:
1273
1274 1. 'inverted_cdf'
1275 2. 'averaged_inverted_cdf'
1276 3. 'closest_observation'
1277 4. 'interpolated_inverted_cdf'
1278 5. 'hazen'
1279 6. 'weibull'
1280 7. 'linear' (default)
1281 8. 'median_unbiased'
1282 9. 'normal_unbiased'
1283
1284 The first three methods are discontinuous. NumPy further defines the
1285 following discontinuous variations of the default 'linear' (7.) option:
1286
1287 * 'lower'
1288 * 'higher',
1289 * 'midpoint'
1290 * 'nearest'
1291
1292 .. versionchanged:: 1.22.0
1293 This argument was previously called "interpolation" and only
1294 offered the "linear" default and last four options.
1295
1296 keepdims : bool, optional
1297 If this is set to True, the axes which are reduced are left in
1298 the result as dimensions with size one. With this option, the
1299 result will broadcast correctly against the original array `a`.
1300
1301 If this is anything but the default value it will be passed
1302 through (in the special case of an empty array) to the
1303 `mean` function of the underlying array. If the array is
1304 a sub-class and `mean` does not have the kwarg `keepdims` this
1305 will raise a RuntimeError.
1306
1307 weights : array_like, optional
1308 An array of weights associated with the values in `a`. Each value in
1309 `a` contributes to the percentile according to its associated weight.
1310 The weights array can either be 1-D (in which case its length must be
1311 the size of `a` along the given axis) or of the same shape as `a`.
1312 If `weights=None`, then all data in `a` are assumed to have a
1313 weight equal to one.
1314 Only `method="inverted_cdf"` supports weights.
1315
1316 .. versionadded:: 2.0.0
1317
1318 interpolation : str, optional
1319 Deprecated name for the method keyword argument.
1320
1321 .. deprecated:: 1.22.0
1322
1323 Returns
1324 -------
1325 percentile : scalar or ndarray
1326 If `q` is a single percentile and `axis=None`, then the result
1327 is a scalar. If multiple percentiles are given, first axis of
1328 the result corresponds to the percentiles. The other axes are
1329 the axes that remain after the reduction of `a`. If the input
1330 contains integers or floats smaller than ``float64``, the output
1331 data-type is ``float64``. Otherwise, the output data-type is the
1332 same as that of the input. If `out` is specified, that array is
1333 returned instead.
1334
1335 See Also
1336 --------
1337 nanmean
1338 nanmedian : equivalent to ``nanpercentile(..., 50)``
1339 percentile, median, mean
1340 nanquantile : equivalent to nanpercentile, except q in range [0, 1].
1341
1342 Notes
1343 -----
1344 The behavior of `numpy.nanpercentile` with percentage `q` is that of
1345 `numpy.quantile` with argument ``q/100`` (ignoring nan values).
1346 For more information, please see `numpy.quantile`.
1347
1348 Examples
1349 --------
1350 >>> import numpy as np
1351 >>> a = np.array([[10., 7., 4.], [3., 2., 1.]])
1352 >>> a[0][1] = np.nan
1353 >>> a
1354 array([[10., nan, 4.],
1355 [ 3., 2., 1.]])
1356 >>> np.percentile(a, 50)
1357 np.float64(nan)
1358 >>> np.nanpercentile(a, 50)
1359 3.0
1360 >>> np.nanpercentile(a, 50, axis=0)
1361 array([6.5, 2. , 2.5])
1362 >>> np.nanpercentile(a, 50, axis=1, keepdims=True)
1363 array([[7.],
1364 [2.]])
1365 >>> m = np.nanpercentile(a, 50, axis=0)
1366 >>> out = np.zeros_like(m)
1367 >>> np.nanpercentile(a, 50, axis=0, out=out)
1368 array([6.5, 2. , 2.5])
1369 >>> m
1370 array([6.5, 2. , 2.5])
1371
1372 >>> b = a.copy()
1373 >>> np.nanpercentile(b, 50, axis=1, overwrite_input=True)
1374 array([7., 2.])
1375 >>> assert not np.all(a==b)
1376
1377 References
1378 ----------
1379 .. [1] R. J. Hyndman and Y. Fan,
1380 "Sample quantiles in statistical packages,"
1381 The American Statistician, 50(4), pp. 361-365, 1996
1382
1383 """
1384 if interpolation is not None:
1385 method = fnb._check_interpolation_as_method(
1386 method, interpolation, "nanpercentile")
1387
1388 a = np.asanyarray(a)
1389 if a.dtype.kind == "c":
1390 raise TypeError("a must be an array of real numbers")
1391
1392 q = np.true_divide(q, a.dtype.type(100) if a.dtype.kind == "f" else 100)
1393 # undo any decay that the ufunc performed (see gh-13105)
1394 q = np.asanyarray(q)
1395 if not fnb._quantile_is_valid(q):
1396 raise ValueError("Percentiles must be in the range [0, 100]")
1397
1398 if weights is not None:
1399 if method != "inverted_cdf":
1400 msg = ("Only method 'inverted_cdf' supports weights. "
1401 f"Got: {method}.")
1402 raise ValueError(msg)
1403 if axis is not None:
1404 axis = _nx.normalize_axis_tuple(axis, a.ndim, argname="axis")
1405 weights = _weights_are_valid(weights=weights, a=a, axis=axis)
1406 if np.any(weights < 0):
1407 raise ValueError("Weights must be non-negative.")
1408
1409 return _nanquantile_unchecked(
1410 a, q, axis, out, overwrite_input, method, keepdims, weights)
1411
1412
1413def _nanquantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
1414 method=None, keepdims=None, *, weights=None,
1415 interpolation=None):
1416 return (a, q, out, weights)
1417
1418
1419@array_function_dispatch(_nanquantile_dispatcher)
1420def nanquantile(
1421 a,
1422 q,
1423 axis=None,
1424 out=None,
1425 overwrite_input=False,
1426 method="linear",
1427 keepdims=np._NoValue,
1428 *,
1429 weights=None,
1430 interpolation=None,
1431):
1432 """
1433 Compute the qth quantile of the data along the specified axis,
1434 while ignoring nan values.
1435 Returns the qth quantile(s) of the array elements.
1436
1437 Parameters
1438 ----------
1439 a : array_like
1440 Input array or object that can be converted to an array, containing
1441 nan values to be ignored
1442 q : array_like of float
1443 Probability or sequence of probabilities for the quantiles to compute.
1444 Values must be between 0 and 1 inclusive.
1445 axis : {int, tuple of int, None}, optional
1446 Axis or axes along which the quantiles are computed. The
1447 default is to compute the quantile(s) along a flattened
1448 version of the array.
1449 out : ndarray, optional
1450 Alternative output array in which to place the result. It must
1451 have the same shape and buffer length as the expected output,
1452 but the type (of the output) will be cast if necessary.
1453 overwrite_input : bool, optional
1454 If True, then allow the input array `a` to be modified by intermediate
1455 calculations, to save memory. In this case, the contents of the input
1456 `a` after this function completes is undefined.
1457 method : str, optional
1458 This parameter specifies the method to use for estimating the
1459 quantile. There are many different methods, some unique to NumPy.
1460 See the notes for explanation. The options sorted by their R type
1461 as summarized in the H&F paper [1]_ are:
1462
1463 1. 'inverted_cdf'
1464 2. 'averaged_inverted_cdf'
1465 3. 'closest_observation'
1466 4. 'interpolated_inverted_cdf'
1467 5. 'hazen'
1468 6. 'weibull'
1469 7. 'linear' (default)
1470 8. 'median_unbiased'
1471 9. 'normal_unbiased'
1472
1473 The first three methods are discontinuous. NumPy further defines the
1474 following discontinuous variations of the default 'linear' (7.) option:
1475
1476 * 'lower'
1477 * 'higher',
1478 * 'midpoint'
1479 * 'nearest'
1480
1481 .. versionchanged:: 1.22.0
1482 This argument was previously called "interpolation" and only
1483 offered the "linear" default and last four options.
1484
1485 keepdims : bool, optional
1486 If this is set to True, the axes which are reduced are left in
1487 the result as dimensions with size one. With this option, the
1488 result will broadcast correctly against the original array `a`.
1489
1490 If this is anything but the default value it will be passed
1491 through (in the special case of an empty array) to the
1492 `mean` function of the underlying array. If the array is
1493 a sub-class and `mean` does not have the kwarg `keepdims` this
1494 will raise a RuntimeError.
1495
1496 weights : array_like, optional
1497 An array of weights associated with the values in `a`. Each value in
1498 `a` contributes to the quantile according to its associated weight.
1499 The weights array can either be 1-D (in which case its length must be
1500 the size of `a` along the given axis) or of the same shape as `a`.
1501 If `weights=None`, then all data in `a` are assumed to have a
1502 weight equal to one.
1503 Only `method="inverted_cdf"` supports weights.
1504
1505 .. versionadded:: 2.0.0
1506
1507 interpolation : str, optional
1508 Deprecated name for the method keyword argument.
1509
1510 .. deprecated:: 1.22.0
1511
1512 Returns
1513 -------
1514 quantile : scalar or ndarray
1515 If `q` is a single probability and `axis=None`, then the result
1516 is a scalar. If multiple probability levels are given, first axis of
1517 the result corresponds to the quantiles. The other axes are
1518 the axes that remain after the reduction of `a`. If the input
1519 contains integers or floats smaller than ``float64``, the output
1520 data-type is ``float64``. Otherwise, the output data-type is the
1521 same as that of the input. If `out` is specified, that array is
1522 returned instead.
1523
1524 See Also
1525 --------
1526 quantile
1527 nanmean, nanmedian
1528 nanmedian : equivalent to ``nanquantile(..., 0.5)``
1529 nanpercentile : same as nanquantile, but with q in the range [0, 100].
1530
1531 Notes
1532 -----
1533 The behavior of `numpy.nanquantile` is the same as that of
1534 `numpy.quantile` (ignoring nan values).
1535 For more information, please see `numpy.quantile`.
1536
1537 Examples
1538 --------
1539 >>> import numpy as np
1540 >>> a = np.array([[10., 7., 4.], [3., 2., 1.]])
1541 >>> a[0][1] = np.nan
1542 >>> a
1543 array([[10., nan, 4.],
1544 [ 3., 2., 1.]])
1545 >>> np.quantile(a, 0.5)
1546 np.float64(nan)
1547 >>> np.nanquantile(a, 0.5)
1548 3.0
1549 >>> np.nanquantile(a, 0.5, axis=0)
1550 array([6.5, 2. , 2.5])
1551 >>> np.nanquantile(a, 0.5, axis=1, keepdims=True)
1552 array([[7.],
1553 [2.]])
1554 >>> m = np.nanquantile(a, 0.5, axis=0)
1555 >>> out = np.zeros_like(m)
1556 >>> np.nanquantile(a, 0.5, axis=0, out=out)
1557 array([6.5, 2. , 2.5])
1558 >>> m
1559 array([6.5, 2. , 2.5])
1560 >>> b = a.copy()
1561 >>> np.nanquantile(b, 0.5, axis=1, overwrite_input=True)
1562 array([7., 2.])
1563 >>> assert not np.all(a==b)
1564
1565 References
1566 ----------
1567 .. [1] R. J. Hyndman and Y. Fan,
1568 "Sample quantiles in statistical packages,"
1569 The American Statistician, 50(4), pp. 361-365, 1996
1570
1571 """
1572
1573 if interpolation is not None:
1574 method = fnb._check_interpolation_as_method(
1575 method, interpolation, "nanquantile")
1576
1577 a = np.asanyarray(a)
1578 if a.dtype.kind == "c":
1579 raise TypeError("a must be an array of real numbers")
1580
1581 # Use dtype of array if possible (e.g., if q is a python int or float).
1582 if isinstance(q, (int, float)) and a.dtype.kind == "f":
1583 q = np.asanyarray(q, dtype=a.dtype)
1584 else:
1585 q = np.asanyarray(q)
1586
1587 if not fnb._quantile_is_valid(q):
1588 raise ValueError("Quantiles must be in the range [0, 1]")
1589
1590 if weights is not None:
1591 if method != "inverted_cdf":
1592 msg = ("Only method 'inverted_cdf' supports weights. "
1593 f"Got: {method}.")
1594 raise ValueError(msg)
1595 if axis is not None:
1596 axis = _nx.normalize_axis_tuple(axis, a.ndim, argname="axis")
1597 weights = _weights_are_valid(weights=weights, a=a, axis=axis)
1598 if np.any(weights < 0):
1599 raise ValueError("Weights must be non-negative.")
1600
1601 return _nanquantile_unchecked(
1602 a, q, axis, out, overwrite_input, method, keepdims, weights)
1603
1604
1605def _nanquantile_unchecked(
1606 a,
1607 q,
1608 axis=None,
1609 out=None,
1610 overwrite_input=False,
1611 method="linear",
1612 keepdims=np._NoValue,
1613 weights=None,
1614):
1615 """Assumes that q is in [0, 1], and is an ndarray"""
1616 # apply_along_axis in _nanpercentile doesn't handle empty arrays well,
1617 # so deal them upfront
1618 if a.size == 0:
1619 return np.nanmean(a, axis, out=out, keepdims=keepdims)
1620 return fnb._ureduce(a,
1621 func=_nanquantile_ureduce_func,
1622 q=q,
1623 weights=weights,
1624 keepdims=keepdims,
1625 axis=axis,
1626 out=out,
1627 overwrite_input=overwrite_input,
1628 method=method)
1629
1630
1631def _nanquantile_ureduce_func(
1632 a: np.array,
1633 q: np.array,
1634 weights: np.array,
1635 axis: int | None = None,
1636 out=None,
1637 overwrite_input: bool = False,
1638 method="linear",
1639):
1640 """
1641 Private function that doesn't support extended axis or keepdims.
1642 These methods are extended to this function using _ureduce
1643 See nanpercentile for parameter usage
1644 """
1645 if axis is None or a.ndim == 1:
1646 part = a.ravel()
1647 wgt = None if weights is None else weights.ravel()
1648 result = _nanquantile_1d(part, q, overwrite_input, method, weights=wgt)
1649 else:
1650 # Note that this code could try to fill in `out` right away
1651 if weights is None:
1652 result = np.apply_along_axis(_nanquantile_1d, axis, a, q,
1653 overwrite_input, method, weights)
1654 # apply_along_axis fills in collapsed axis with results.
1655 # Move those axes to the beginning to match percentile's
1656 # convention.
1657 if q.ndim != 0:
1658 from_ax = [axis + i for i in range(q.ndim)]
1659 result = np.moveaxis(result, from_ax, list(range(q.ndim)))
1660 else:
1661 # We need to apply along axis over 2 arrays, a and weights.
1662 # move operation axes to end for simplicity:
1663 a = np.moveaxis(a, axis, -1)
1664 if weights is not None:
1665 weights = np.moveaxis(weights, axis, -1)
1666 if out is not None:
1667 result = out
1668 else:
1669 # weights are limited to `inverted_cdf` so the result dtype
1670 # is known to be identical to that of `a` here:
1671 result = np.empty_like(a, shape=q.shape + a.shape[:-1])
1672
1673 for ii in np.ndindex(a.shape[:-1]):
1674 result[(...,) + ii] = _nanquantile_1d(
1675 a[ii], q, weights=weights[ii],
1676 overwrite_input=overwrite_input, method=method,
1677 )
1678 # This path dealt with `out` already...
1679 return result
1680
1681 if out is not None:
1682 out[...] = result
1683 return result
1684
1685
1686def _nanquantile_1d(
1687 arr1d, q, overwrite_input=False, method="linear", weights=None,
1688):
1689 """
1690 Private function for rank 1 arrays. Compute quantile ignoring NaNs.
1691 See nanpercentile for parameter usage
1692 """
1693 # TODO: What to do when arr1d = [1, np.nan] and weights = [0, 1]?
1694 arr1d, weights, overwrite_input = _remove_nan_1d(arr1d,
1695 second_arr1d=weights, overwrite_input=overwrite_input)
1696 if arr1d.size == 0:
1697 # convert to scalar
1698 return np.full(q.shape, np.nan, dtype=arr1d.dtype)[()]
1699
1700 return fnb._quantile_unchecked(
1701 arr1d,
1702 q,
1703 overwrite_input=overwrite_input,
1704 method=method,
1705 weights=weights,
1706 )
1707
1708
1709def _nanvar_dispatcher(a, axis=None, dtype=None, out=None, ddof=None,
1710 keepdims=None, *, where=None, mean=None,
1711 correction=None):
1712 return (a, out)
1713
1714
1715@array_function_dispatch(_nanvar_dispatcher)
1716def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue,
1717 *, where=np._NoValue, mean=np._NoValue, correction=np._NoValue):
1718 """
1719 Compute the variance along the specified axis, while ignoring NaNs.
1720
1721 Returns the variance of the array elements, a measure of the spread of
1722 a distribution. The variance is computed for the flattened array by
1723 default, otherwise over the specified axis.
1724
1725 For all-NaN slices or slices with zero degrees of freedom, NaN is
1726 returned and a `RuntimeWarning` is raised.
1727
1728 Parameters
1729 ----------
1730 a : array_like
1731 Array containing numbers whose variance is desired. If `a` is not an
1732 array, a conversion is attempted.
1733 axis : {int, tuple of int, None}, optional
1734 Axis or axes along which the variance is computed. The default is to compute
1735 the variance of the flattened array.
1736 dtype : data-type, optional
1737 Type to use in computing the variance. For arrays of integer type
1738 the default is `float64`; for arrays of float types it is the same as
1739 the array type.
1740 out : ndarray, optional
1741 Alternate output array in which to place the result. It must have
1742 the same shape as the expected output, but the type is cast if
1743 necessary.
1744 ddof : {int, float}, optional
1745 "Delta Degrees of Freedom": the divisor used in the calculation is
1746 ``N - ddof``, where ``N`` represents the number of non-NaN
1747 elements. By default `ddof` is zero.
1748 keepdims : bool, optional
1749 If this is set to True, the axes which are reduced are left
1750 in the result as dimensions with size one. With this option,
1751 the result will broadcast correctly against the original `a`.
1752 where : array_like of bool, optional
1753 Elements to include in the variance. See `~numpy.ufunc.reduce` for
1754 details.
1755
1756 .. versionadded:: 1.22.0
1757
1758 mean : array_like, optional
1759 Provide the mean to prevent its recalculation. The mean should have
1760 a shape as if it was calculated with ``keepdims=True``.
1761 The axis for the calculation of the mean should be the same as used in
1762 the call to this var function.
1763
1764 .. versionadded:: 2.0.0
1765
1766 correction : {int, float}, optional
1767 Array API compatible name for the ``ddof`` parameter. Only one of them
1768 can be provided at the same time.
1769
1770 .. versionadded:: 2.0.0
1771
1772 Returns
1773 -------
1774 variance : ndarray, see dtype parameter above
1775 If `out` is None, return a new array containing the variance,
1776 otherwise return a reference to the output array. If ddof is >= the
1777 number of non-NaN elements in a slice or the slice contains only
1778 NaNs, then the result for that slice is NaN.
1779
1780 See Also
1781 --------
1782 std : Standard deviation
1783 mean : Average
1784 var : Variance while not ignoring NaNs
1785 nanstd, nanmean
1786 :ref:`ufuncs-output-type`
1787
1788 Notes
1789 -----
1790 The variance is the average of the squared deviations from the mean,
1791 i.e., ``var = mean(abs(x - x.mean())**2)``.
1792
1793 The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``.
1794 If, however, `ddof` is specified, the divisor ``N - ddof`` is used
1795 instead. In standard statistical practice, ``ddof=1`` provides an
1796 unbiased estimator of the variance of a hypothetical infinite
1797 population. ``ddof=0`` provides a maximum likelihood estimate of the
1798 variance for normally distributed variables.
1799
1800 Note that for complex numbers, the absolute value is taken before
1801 squaring, so that the result is always real and nonnegative.
1802
1803 For floating-point input, the variance is computed using the same
1804 precision the input has. Depending on the input data, this can cause
1805 the results to be inaccurate, especially for `float32` (see example
1806 below). Specifying a higher-accuracy accumulator using the ``dtype``
1807 keyword can alleviate this issue.
1808
1809 For this function to work on sub-classes of ndarray, they must define
1810 `sum` with the kwarg `keepdims`
1811
1812 Examples
1813 --------
1814 >>> import numpy as np
1815 >>> a = np.array([[1, np.nan], [3, 4]])
1816 >>> np.nanvar(a)
1817 1.5555555555555554
1818 >>> np.nanvar(a, axis=0)
1819 array([1., 0.])
1820 >>> np.nanvar(a, axis=1)
1821 array([0., 0.25]) # may vary
1822
1823 """
1824 arr, mask = _replace_nan(a, 0)
1825 if mask is None:
1826 return np.var(arr, axis=axis, dtype=dtype, out=out, ddof=ddof,
1827 keepdims=keepdims, where=where, mean=mean,
1828 correction=correction)
1829
1830 if dtype is not None:
1831 dtype = np.dtype(dtype)
1832 if dtype is not None and not issubclass(dtype.type, np.inexact):
1833 raise TypeError("If a is inexact, then dtype must be inexact")
1834 if out is not None and not issubclass(out.dtype.type, np.inexact):
1835 raise TypeError("If a is inexact, then out must be inexact")
1836
1837 if correction != np._NoValue:
1838 if ddof != 0:
1839 raise ValueError(
1840 "ddof and correction can't be provided simultaneously."
1841 )
1842 else:
1843 ddof = correction
1844
1845 # Compute mean
1846 if type(arr) is np.matrix:
1847 _keepdims = np._NoValue
1848 else:
1849 _keepdims = True
1850
1851 cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=_keepdims,
1852 where=where)
1853
1854 if mean is not np._NoValue:
1855 avg = mean
1856 else:
1857 # we need to special case matrix for reverse compatibility
1858 # in order for this to work, these sums need to be called with
1859 # keepdims=True, however matrix now raises an error in this case, but
1860 # the reason that it drops the keepdims kwarg is to force keepdims=True
1861 # so this used to work by serendipity.
1862 avg = np.sum(arr, axis=axis, dtype=dtype,
1863 keepdims=_keepdims, where=where)
1864 avg = _divide_by_count(avg, cnt)
1865
1866 # Compute squared deviation from mean.
1867 np.subtract(arr, avg, out=arr, casting='unsafe', where=where)
1868 arr = _copyto(arr, 0, mask)
1869 if issubclass(arr.dtype.type, np.complexfloating):
1870 sqr = np.multiply(arr, arr.conj(), out=arr, where=where).real
1871 else:
1872 sqr = np.multiply(arr, arr, out=arr, where=where)
1873
1874 # Compute variance.
1875 var = np.sum(sqr, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
1876 where=where)
1877
1878 # Precaution against reduced object arrays
1879 try:
1880 var_ndim = var.ndim
1881 except AttributeError:
1882 var_ndim = np.ndim(var)
1883 if var_ndim < cnt.ndim:
1884 # Subclasses of ndarray may ignore keepdims, so check here.
1885 cnt = cnt.squeeze(axis)
1886 dof = cnt - ddof
1887 var = _divide_by_count(var, dof)
1888
1889 isbad = (dof <= 0)
1890 if np.any(isbad):
1891 warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning,
1892 stacklevel=2)
1893 # NaN, inf, or negative numbers are all possible bad
1894 # values, so explicitly replace them with NaN.
1895 var = _copyto(var, np.nan, isbad)
1896 return var
1897
1898
1899def _nanstd_dispatcher(a, axis=None, dtype=None, out=None, ddof=None,
1900 keepdims=None, *, where=None, mean=None,
1901 correction=None):
1902 return (a, out)
1903
1904
1905@array_function_dispatch(_nanstd_dispatcher)
1906def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue,
1907 *, where=np._NoValue, mean=np._NoValue, correction=np._NoValue):
1908 """
1909 Compute the standard deviation along the specified axis, while
1910 ignoring NaNs.
1911
1912 Returns the standard deviation, a measure of the spread of a
1913 distribution, of the non-NaN array elements. The standard deviation is
1914 computed for the flattened array by default, otherwise over the
1915 specified axis.
1916
1917 For all-NaN slices or slices with zero degrees of freedom, NaN is
1918 returned and a `RuntimeWarning` is raised.
1919
1920 Parameters
1921 ----------
1922 a : array_like
1923 Calculate the standard deviation of the non-NaN values.
1924 axis : {int, tuple of int, None}, optional
1925 Axis or axes along which the standard deviation is computed. The default is
1926 to compute the standard deviation of the flattened array.
1927 dtype : dtype, optional
1928 Type to use in computing the standard deviation. For arrays of
1929 integer type the default is float64, for arrays of float types it
1930 is the same as the array type.
1931 out : ndarray, optional
1932 Alternative output array in which to place the result. It must have
1933 the same shape as the expected output but the type (of the
1934 calculated values) will be cast if necessary.
1935 ddof : {int, float}, optional
1936 Means Delta Degrees of Freedom. The divisor used in calculations
1937 is ``N - ddof``, where ``N`` represents the number of non-NaN
1938 elements. By default `ddof` is zero.
1939
1940 keepdims : bool, optional
1941 If this is set to True, the axes which are reduced are left
1942 in the result as dimensions with size one. With this option,
1943 the result will broadcast correctly against the original `a`.
1944
1945 If this value is anything but the default it is passed through
1946 as-is to the relevant functions of the sub-classes. If these
1947 functions do not have a `keepdims` kwarg, a RuntimeError will
1948 be raised.
1949 where : array_like of bool, optional
1950 Elements to include in the standard deviation.
1951 See `~numpy.ufunc.reduce` for details.
1952
1953 .. versionadded:: 1.22.0
1954
1955 mean : array_like, optional
1956 Provide the mean to prevent its recalculation. The mean should have
1957 a shape as if it was calculated with ``keepdims=True``.
1958 The axis for the calculation of the mean should be the same as used in
1959 the call to this std function.
1960
1961 .. versionadded:: 2.0.0
1962
1963 correction : {int, float}, optional
1964 Array API compatible name for the ``ddof`` parameter. Only one of them
1965 can be provided at the same time.
1966
1967 .. versionadded:: 2.0.0
1968
1969 Returns
1970 -------
1971 standard_deviation : ndarray, see dtype parameter above.
1972 If `out` is None, return a new array containing the standard
1973 deviation, otherwise return a reference to the output array. If
1974 ddof is >= the number of non-NaN elements in a slice or the slice
1975 contains only NaNs, then the result for that slice is NaN.
1976
1977 See Also
1978 --------
1979 var, mean, std
1980 nanvar, nanmean
1981 :ref:`ufuncs-output-type`
1982
1983 Notes
1984 -----
1985 The standard deviation is the square root of the average of the squared
1986 deviations from the mean: ``std = sqrt(mean(abs(x - x.mean())**2))``.
1987
1988 The average squared deviation is normally calculated as
1989 ``x.sum() / N``, where ``N = len(x)``. If, however, `ddof` is
1990 specified, the divisor ``N - ddof`` is used instead. In standard
1991 statistical practice, ``ddof=1`` provides an unbiased estimator of the
1992 variance of the infinite population. ``ddof=0`` provides a maximum
1993 likelihood estimate of the variance for normally distributed variables.
1994 The standard deviation computed in this function is the square root of
1995 the estimated variance, so even with ``ddof=1``, it will not be an
1996 unbiased estimate of the standard deviation per se.
1997
1998 Note that, for complex numbers, `std` takes the absolute value before
1999 squaring, so that the result is always real and nonnegative.
2000
2001 For floating-point input, the *std* is computed using the same
2002 precision the input has. Depending on the input data, this can cause
2003 the results to be inaccurate, especially for float32 (see example
2004 below). Specifying a higher-accuracy accumulator using the `dtype`
2005 keyword can alleviate this issue.
2006
2007 Examples
2008 --------
2009 >>> import numpy as np
2010 >>> a = np.array([[1, np.nan], [3, 4]])
2011 >>> np.nanstd(a)
2012 1.247219128924647
2013 >>> np.nanstd(a, axis=0)
2014 array([1., 0.])
2015 >>> np.nanstd(a, axis=1)
2016 array([0., 0.5]) # may vary
2017
2018 """
2019 var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
2020 keepdims=keepdims, where=where, mean=mean,
2021 correction=correction)
2022 if isinstance(var, np.ndarray):
2023 std = np.sqrt(var, out=var)
2024 elif hasattr(var, 'dtype'):
2025 std = var.dtype.type(np.sqrt(var))
2026 else:
2027 std = np.sqrt(var)
2028 return std